aigetwey 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. package/CHANGELOG.md +84 -0
  2. package/LICENSE +21 -0
  3. package/README.md +302 -0
  4. package/assets/logo.svg +8 -0
  5. package/assets/screenshot.png +0 -0
  6. package/assets/wordmark.svg +9 -0
  7. package/config.example.yaml +56 -0
  8. package/dashboard/.env.example +12 -0
  9. package/dashboard/next-env.d.ts +6 -0
  10. package/dashboard/next.config.ts +12 -0
  11. package/dashboard/package-lock.json +1771 -0
  12. package/dashboard/package.json +29 -0
  13. package/dashboard/postcss.config.mjs +5 -0
  14. package/dashboard/src/app/(console)/combos/page.tsx +10 -0
  15. package/dashboard/src/app/(console)/config/page.tsx +5 -0
  16. package/dashboard/src/app/(console)/console/page.tsx +92 -0
  17. package/dashboard/src/app/(console)/endpoint/page.tsx +5 -0
  18. package/dashboard/src/app/(console)/layout.tsx +17 -0
  19. package/dashboard/src/app/(console)/page.tsx +8 -0
  20. package/dashboard/src/app/(console)/providers/[id]/page.tsx +6 -0
  21. package/dashboard/src/app/(console)/providers/page.tsx +5 -0
  22. package/dashboard/src/app/(console)/quota/page.tsx +5 -0
  23. package/dashboard/src/app/(console)/tools/[id]/page.tsx +6 -0
  24. package/dashboard/src/app/(console)/tools/page.tsx +5 -0
  25. package/dashboard/src/app/(console)/usage/page.tsx +24 -0
  26. package/dashboard/src/app/api/cli-detect/[tool]/route.ts +253 -0
  27. package/dashboard/src/app/api/gw/[...path]/route.ts +89 -0
  28. package/dashboard/src/app/api/login/route.ts +30 -0
  29. package/dashboard/src/app/api/logout/route.ts +9 -0
  30. package/dashboard/src/app/api/password/route.ts +34 -0
  31. package/dashboard/src/app/globals.css +340 -0
  32. package/dashboard/src/app/icon.svg +8 -0
  33. package/dashboard/src/app/layout.tsx +28 -0
  34. package/dashboard/src/app/login/page.tsx +60 -0
  35. package/dashboard/src/components/AreaChart.tsx +115 -0
  36. package/dashboard/src/components/Badge.tsx +32 -0
  37. package/dashboard/src/components/Button.tsx +60 -0
  38. package/dashboard/src/components/CapacityBadges.tsx +40 -0
  39. package/dashboard/src/components/Checkbox.tsx +40 -0
  40. package/dashboard/src/components/CliToolConfig.tsx +63 -0
  41. package/dashboard/src/components/ConfigEditor.tsx +199 -0
  42. package/dashboard/src/components/ConfirmModal.tsx +36 -0
  43. package/dashboard/src/components/CooldownTimer.tsx +42 -0
  44. package/dashboard/src/components/EndpointView.tsx +439 -0
  45. package/dashboard/src/components/Icon.tsx +25 -0
  46. package/dashboard/src/components/KeyReveal.tsx +78 -0
  47. package/dashboard/src/components/Lamp.tsx +8 -0
  48. package/dashboard/src/components/LogTable.tsx +223 -0
  49. package/dashboard/src/components/LogoutButton.tsx +20 -0
  50. package/dashboard/src/components/ModelPicker.tsx +121 -0
  51. package/dashboard/src/components/ModelSelectModal.tsx +126 -0
  52. package/dashboard/src/components/PasswordEditor.tsx +86 -0
  53. package/dashboard/src/components/PricingEditor.tsx +171 -0
  54. package/dashboard/src/components/ProviderDetail.tsx +566 -0
  55. package/dashboard/src/components/ProviderManager.tsx +311 -0
  56. package/dashboard/src/components/QuotaView.tsx +78 -0
  57. package/dashboard/src/components/Rail.tsx +82 -0
  58. package/dashboard/src/components/RichCard.tsx +46 -0
  59. package/dashboard/src/components/RoutingView.tsx +329 -0
  60. package/dashboard/src/components/ThemeProvider.tsx +36 -0
  61. package/dashboard/src/components/ToastProvider.tsx +58 -0
  62. package/dashboard/src/components/ToolDetail.tsx +475 -0
  63. package/dashboard/src/components/TopBar.tsx +128 -0
  64. package/dashboard/src/components/UsageView.tsx +151 -0
  65. package/dashboard/src/components/ui.tsx +54 -0
  66. package/dashboard/src/lib/capabilities.ts +318 -0
  67. package/dashboard/src/lib/cliTools.ts +120 -0
  68. package/dashboard/src/lib/client.ts +190 -0
  69. package/dashboard/src/lib/gateway.ts +269 -0
  70. package/dashboard/src/lib/session.ts +71 -0
  71. package/dashboard/src/middleware.ts +37 -0
  72. package/dashboard/tsconfig.json +21 -0
  73. package/dist/adapters/anthropic.js +289 -0
  74. package/dist/adapters/anthropic.js.map +1 -0
  75. package/dist/adapters/gemini.js +268 -0
  76. package/dist/adapters/gemini.js.map +1 -0
  77. package/dist/adapters/index.js +8 -0
  78. package/dist/adapters/index.js.map +1 -0
  79. package/dist/adapters/openai.js +13 -0
  80. package/dist/adapters/openai.js.map +1 -0
  81. package/dist/cli/tray/autostart.js +152 -0
  82. package/dist/cli/tray/autostart.js.map +1 -0
  83. package/dist/cli/tray/icon.js +4 -0
  84. package/dist/cli/tray/icon.js.map +1 -0
  85. package/dist/cli/tray/tray.js +141 -0
  86. package/dist/cli/tray/tray.js.map +1 -0
  87. package/dist/cli/tray/trayRuntime.js +91 -0
  88. package/dist/cli/tray/trayRuntime.js.map +1 -0
  89. package/dist/cli.js +361 -0
  90. package/dist/cli.js.map +1 -0
  91. package/dist/config.js +728 -0
  92. package/dist/config.js.map +1 -0
  93. package/dist/core/authStore.js +78 -0
  94. package/dist/core/authStore.js.map +1 -0
  95. package/dist/core/canonical.js +9 -0
  96. package/dist/core/canonical.js.map +1 -0
  97. package/dist/core/console-buffer.js +25 -0
  98. package/dist/core/console-buffer.js.map +1 -0
  99. package/dist/core/fallback.js +62 -0
  100. package/dist/core/fallback.js.map +1 -0
  101. package/dist/core/handler.js +174 -0
  102. package/dist/core/handler.js.map +1 -0
  103. package/dist/core/keypool.js +105 -0
  104. package/dist/core/keypool.js.map +1 -0
  105. package/dist/core/quota.js +165 -0
  106. package/dist/core/quota.js.map +1 -0
  107. package/dist/core/state.js +52 -0
  108. package/dist/core/state.js.map +1 -0
  109. package/dist/db.js +193 -0
  110. package/dist/db.js.map +1 -0
  111. package/dist/headroom/compress.js +44 -0
  112. package/dist/headroom/compress.js.map +1 -0
  113. package/dist/headroom/detect.js +108 -0
  114. package/dist/headroom/detect.js.map +1 -0
  115. package/dist/headroom/process.js +158 -0
  116. package/dist/headroom/process.js.map +1 -0
  117. package/dist/inject/caveman.js +30 -0
  118. package/dist/inject/caveman.js.map +1 -0
  119. package/dist/inject/index.js +24 -0
  120. package/dist/inject/index.js.map +1 -0
  121. package/dist/inject/ponytail.js +19 -0
  122. package/dist/inject/ponytail.js.map +1 -0
  123. package/dist/middleware/auth.js +66 -0
  124. package/dist/middleware/auth.js.map +1 -0
  125. package/dist/providers/capabilities.js +246 -0
  126. package/dist/providers/capabilities.js.map +1 -0
  127. package/dist/providers/free.js +43 -0
  128. package/dist/providers/free.js.map +1 -0
  129. package/dist/providers/pricing.js +224 -0
  130. package/dist/providers/pricing.js.map +1 -0
  131. package/dist/providers/vertex.js +97 -0
  132. package/dist/providers/vertex.js.map +1 -0
  133. package/dist/routes/admin.js +622 -0
  134. package/dist/routes/admin.js.map +1 -0
  135. package/dist/routes/health.js +4 -0
  136. package/dist/routes/health.js.map +1 -0
  137. package/dist/routes/index.js +12 -0
  138. package/dist/routes/index.js.map +1 -0
  139. package/dist/routes/v1.js +75 -0
  140. package/dist/routes/v1.js.map +1 -0
  141. package/dist/rtk/detect.js +50 -0
  142. package/dist/rtk/detect.js.map +1 -0
  143. package/dist/rtk/filters.js +85 -0
  144. package/dist/rtk/filters.js.map +1 -0
  145. package/dist/rtk/index.js +39 -0
  146. package/dist/rtk/index.js.map +1 -0
  147. package/dist/server.js +100 -0
  148. package/dist/server.js.map +1 -0
  149. package/dist/stream/anthropic-stream.js +239 -0
  150. package/dist/stream/anthropic-stream.js.map +1 -0
  151. package/dist/stream/chunk.js +7 -0
  152. package/dist/stream/chunk.js.map +1 -0
  153. package/dist/stream/gemini-stream.js +135 -0
  154. package/dist/stream/gemini-stream.js.map +1 -0
  155. package/dist/stream/index.js +12 -0
  156. package/dist/stream/index.js.map +1 -0
  157. package/dist/stream/openai-stream.js +34 -0
  158. package/dist/stream/openai-stream.js.map +1 -0
  159. package/dist/stream/sse.js +64 -0
  160. package/dist/stream/sse.js.map +1 -0
  161. package/dist/translator/thinking.js +70 -0
  162. package/dist/translator/thinking.js.map +1 -0
  163. package/dist/translator/thinkingUnified.js +322 -0
  164. package/dist/translator/thinkingUnified.js.map +1 -0
  165. package/dist/upstream/client.js +120 -0
  166. package/dist/upstream/client.js.map +1 -0
  167. package/package.json +76 -0
  168. package/run.sh +27 -0
  169. package/src/adapters/anthropic.ts +377 -0
  170. package/src/adapters/gemini.ts +341 -0
  171. package/src/adapters/index.ts +17 -0
  172. package/src/adapters/openai.ts +22 -0
  173. package/src/cli/tray/autostart.ts +133 -0
  174. package/src/cli/tray/icon.ts +4 -0
  175. package/src/cli/tray/tray.ts +156 -0
  176. package/src/cli/tray/trayRuntime.ts +90 -0
  177. package/src/cli.ts +379 -0
  178. package/src/config.ts +777 -0
  179. package/src/core/authStore.ts +86 -0
  180. package/src/core/canonical.ts +93 -0
  181. package/src/core/console-buffer.ts +39 -0
  182. package/src/core/fallback.ts +116 -0
  183. package/src/core/handler.ts +236 -0
  184. package/src/core/keypool.ts +152 -0
  185. package/src/core/quota.ts +214 -0
  186. package/src/core/state.ts +65 -0
  187. package/src/db.ts +280 -0
  188. package/src/headroom/compress.ts +78 -0
  189. package/src/headroom/detect.ts +119 -0
  190. package/src/headroom/process.ts +166 -0
  191. package/src/inject/caveman.ts +35 -0
  192. package/src/inject/index.ts +46 -0
  193. package/src/inject/ponytail.ts +31 -0
  194. package/src/middleware/auth.ts +76 -0
  195. package/src/providers/capabilities.ts +297 -0
  196. package/src/providers/free.ts +53 -0
  197. package/src/providers/pricing.ts +261 -0
  198. package/src/providers/vertex.ts +117 -0
  199. package/src/routes/admin.ts +716 -0
  200. package/src/routes/health.ts +5 -0
  201. package/src/routes/index.ts +24 -0
  202. package/src/routes/v1.ts +87 -0
  203. package/src/rtk/detect.ts +55 -0
  204. package/src/rtk/filters.ts +94 -0
  205. package/src/rtk/index.ts +58 -0
  206. package/src/server.ts +108 -0
  207. package/src/stream/anthropic-stream.ts +310 -0
  208. package/src/stream/chunk.ts +46 -0
  209. package/src/stream/gemini-stream.ts +158 -0
  210. package/src/stream/index.ts +23 -0
  211. package/src/stream/openai-stream.ts +41 -0
  212. package/src/stream/sse.ts +72 -0
  213. package/src/translator/thinking.ts +64 -0
  214. package/src/translator/thinkingUnified.ts +319 -0
  215. package/src/upstream/client.ts +155 -0
  216. package/tsconfig.json +20 -0
@@ -0,0 +1,86 @@
1
+ /**
2
+ * Admin password store — the single source of truth for the admin password,
3
+ * persisted as a scrypt hash (no plaintext, no native deps). Seeded once from
4
+ * AIGETWEY_ADMIN_PASSWORD (default 123456 via the launcher); after that it is
5
+ * changed at runtime from the dashboard and the env var is only a fallback seed.
6
+ *
7
+ * File: <dataDir>/auth.json — { algo, salt, hash } (all hex). Absent → seeded.
8
+ */
9
+ import { scryptSync, randomBytes, timingSafeEqual } from "node:crypto";
10
+ import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs";
11
+ import { tmpdir } from "node:os";
12
+ import { dirname, join } from "node:path";
13
+
14
+ interface AuthRecord {
15
+ algo: "scrypt";
16
+ salt: string;
17
+ hash: string;
18
+ }
19
+
20
+ function hashPassword(password: string, salt: Buffer): Buffer {
21
+ // 64-byte derived key; scrypt's default cost is fine for a local admin gate.
22
+ return scryptSync(password, salt, 64);
23
+ }
24
+
25
+ function makeRecord(password: string): AuthRecord {
26
+ const salt = randomBytes(16);
27
+ return { algo: "scrypt", salt: salt.toString("hex"), hash: hashPassword(password, salt).toString("hex") };
28
+ }
29
+
30
+ export class AuthStore {
31
+ private record: AuthRecord | null = null;
32
+
33
+ constructor(private file: string) {}
34
+
35
+ /** Load the stored hash, seeding it from `seed` (the env password) on first run. */
36
+ static open(dataDir: string, seed: string | undefined): AuthStore {
37
+ const store = new AuthStore(join(dataDir, "auth.json"));
38
+ if (existsSync(store.file)) {
39
+ try {
40
+ store.record = JSON.parse(readFileSync(store.file, "utf8")) as AuthRecord;
41
+ } catch {
42
+ store.record = null;
43
+ }
44
+ }
45
+ // seed from the env password when there's nothing stored yet.
46
+ if (!store.record && seed) store.persist(makeRecord(seed));
47
+ return store;
48
+ }
49
+
50
+ /** In-memory store seeded from a password — for tests (file under tmpdir). */
51
+ static memory(seed: string): AuthStore {
52
+ const store = new AuthStore(join(tmpdir(), `aigetwey-auth-${randomBytes(4).toString("hex")}.json`));
53
+ store.record = makeRecord(seed);
54
+ return store;
55
+ }
56
+
57
+ /** True once a password is set (stored or seeded). */
58
+ get enabled(): boolean {
59
+ return this.record !== null;
60
+ }
61
+
62
+ private persist(rec: AuthRecord): void {
63
+ mkdirSync(dirname(this.file), { recursive: true });
64
+ writeFileSync(this.file, JSON.stringify(rec));
65
+ this.record = rec;
66
+ }
67
+
68
+ /** Constant-time check of a presented password against the stored hash. */
69
+ verify(password: string): boolean {
70
+ if (!this.record) return false;
71
+ const salt = Buffer.from(this.record.salt, "hex");
72
+ const expected = Buffer.from(this.record.hash, "hex");
73
+ const got = hashPassword(password, salt);
74
+ return got.length === expected.length && timingSafeEqual(got, expected);
75
+ }
76
+
77
+ /** Change the password after verifying the current one. */
78
+ change(current: string, next: string): { ok: boolean; error?: string } {
79
+ if (!this.verify(current)) return { ok: false, error: "current password is incorrect" };
80
+ if (typeof next !== "string" || next.length < 4) {
81
+ return { ok: false, error: "new password must be at least 4 characters" };
82
+ }
83
+ this.persist(makeRecord(next));
84
+ return { ok: true };
85
+ }
86
+ }
@@ -0,0 +1,93 @@
1
+ /**
2
+ * Canonical message format = OpenAI Chat Completions shape.
3
+ *
4
+ * Every ingress format is translated INTO this shape, and every provider format
5
+ * is translated OUT of it. Picking OpenAI as the pivot makes a new provider cost
6
+ * one adapter (N adapters) instead of N×N pairwise translators.
7
+ */
8
+
9
+ export type Role = "system" | "user" | "assistant" | "tool";
10
+
11
+ export interface CanonicalTextPart {
12
+ type: "text";
13
+ text: string;
14
+ }
15
+
16
+ export interface CanonicalImagePart {
17
+ type: "image_url";
18
+ image_url: { url: string };
19
+ }
20
+
21
+ export type CanonicalContentPart = CanonicalTextPart | CanonicalImagePart;
22
+
23
+ export interface CanonicalToolCall {
24
+ id: string;
25
+ type: "function";
26
+ function: {
27
+ name: string;
28
+ /** raw JSON string of args, exactly as OpenAI emits it */
29
+ arguments: string;
30
+ };
31
+ }
32
+
33
+ export interface CanonicalMessage {
34
+ role: Role;
35
+ /** string for simple text, multi-part array for mixed content, or null */
36
+ content: string | CanonicalContentPart[] | null;
37
+ /** assistant turns that call tools */
38
+ tool_calls?: CanonicalToolCall[];
39
+ /** present on role="tool" messages, links back to a tool_call id */
40
+ tool_call_id?: string;
41
+ /** tool/function name on tool messages */
42
+ name?: string;
43
+ }
44
+
45
+ export interface CanonicalToolDef {
46
+ type: "function";
47
+ function: {
48
+ name: string;
49
+ description?: string;
50
+ parameters?: Record<string, unknown>;
51
+ };
52
+ }
53
+
54
+ export interface CanonicalRequest {
55
+ model: string;
56
+ messages: CanonicalMessage[];
57
+ stream?: boolean;
58
+ max_tokens?: number;
59
+ temperature?: number;
60
+ top_p?: number;
61
+ stop?: string | string[];
62
+ tools?: CanonicalToolDef[];
63
+ tool_choice?: unknown;
64
+ /** anything else passes through untouched */
65
+ [k: string]: unknown;
66
+ }
67
+
68
+ export interface CanonicalUsage {
69
+ prompt_tokens: number;
70
+ completion_tokens: number;
71
+ total_tokens: number;
72
+ /** cached input tokens, normalized across providers */
73
+ cached_tokens?: number;
74
+ cache_creation_tokens?: number;
75
+ reasoning_tokens?: number;
76
+ }
77
+
78
+ export type FinishReason = "stop" | "length" | "tool_calls" | "content_filter" | null;
79
+
80
+ export interface CanonicalResponse {
81
+ id: string;
82
+ model: string;
83
+ created: number;
84
+ choices: Array<{
85
+ index: number;
86
+ message: CanonicalMessage;
87
+ finish_reason: FinishReason;
88
+ }>;
89
+ usage?: CanonicalUsage;
90
+ }
91
+
92
+ /** Wire format spoken by a provider or expected by a client. */
93
+ export type WireFormat = "openai" | "anthropic" | "gemini";
@@ -0,0 +1,39 @@
1
+ type LogLevel = "LOG" | "INFO" | "WARN" | "ERROR" | "DEBUG";
2
+
3
+ interface LogEntry {
4
+ ts: number;
5
+ level: LogLevel;
6
+ message: string;
7
+ }
8
+
9
+ type Listener = (entry: LogEntry) => void;
10
+
11
+ const MAX_ENTRIES = 500;
12
+
13
+ class ConsoleBuffer {
14
+ private entries: LogEntry[] = [];
15
+ private listeners = new Set<Listener>();
16
+
17
+ push(level: LogLevel, message: string): void {
18
+ const entry: LogEntry = { ts: Date.now(), level, message };
19
+ this.entries.push(entry);
20
+ if (this.entries.length > MAX_ENTRIES) this.entries.shift();
21
+ for (const fn of this.listeners) fn(entry);
22
+ }
23
+
24
+ recent(): LogEntry[] {
25
+ return this.entries.slice();
26
+ }
27
+
28
+ subscribe(fn: Listener): () => void {
29
+ this.listeners.add(fn);
30
+ return () => { this.listeners.delete(fn); };
31
+ }
32
+
33
+ clear(): void {
34
+ this.entries = [];
35
+ }
36
+ }
37
+
38
+ export const consoleBuffer = new ConsoleBuffer();
39
+ export type { LogEntry, LogLevel };
@@ -0,0 +1,116 @@
1
+ /**
2
+ * Fallback engine. Walks a prioritized chain of routes, rotating keys within
3
+ * each provider, until one succeeds or the chain is exhausted.
4
+ *
5
+ * Streaming note: callUpstream() throws BEFORE returning a stream when the
6
+ * upstream status is >= 400, so the commit point is a 200 response. A failure
7
+ * mid-stream surfaces later during body iteration (in the handler), which we
8
+ * deliberately do NOT retry — fail clean, no duplicate output.
9
+ */
10
+ import type { ResolvedRoute } from "../config.js";
11
+ import type { CanonicalRequest } from "./canonical.js";
12
+ import type { ThinkingConfig } from "../translator/thinkingUnified.js";
13
+ import type { KeyPool } from "./keypool.js";
14
+ import {
15
+ callUpstream,
16
+ type NonStreamResult,
17
+ type StreamResult,
18
+ type UpstreamError,
19
+ } from "../upstream/client.js";
20
+
21
+ export interface AttemptLog {
22
+ provider: string;
23
+ model: string;
24
+ status?: number;
25
+ outcome: "success" | "retry" | "fallback" | "fatal" | "skip";
26
+ detail?: string;
27
+ }
28
+
29
+ export interface FallbackOpts {
30
+ stream: boolean;
31
+ signal?: AbortSignal;
32
+ onAttempt?: (log: AttemptLog) => void;
33
+ /** which key the pool handed out for the winning attempt (handler uses it for usage). */
34
+ onServed?: (route: ResolvedRoute, key: string) => void;
35
+ /** when set, a provider this returns true for is skipped (quota exhausted). */
36
+ isExhausted?: (provider: ResolvedRoute["provider"]) => boolean;
37
+ /** captured client thinking intent, applied per-attempt in the provider's format. */
38
+ thinkingIntent?: ThinkingConfig | null;
39
+ }
40
+
41
+ export interface FallbackResult {
42
+ /** the route that actually served the request (for response translation) */
43
+ route: ResolvedRoute;
44
+ result: NonStreamResult | StreamResult;
45
+ }
46
+
47
+ export async function executeWithFallback(
48
+ routes: ResolvedRoute[],
49
+ pool: KeyPool,
50
+ req: CanonicalRequest,
51
+ opts: FallbackOpts,
52
+ ): Promise<FallbackResult> {
53
+ let lastError: UpstreamError | undefined;
54
+ const log = opts.onAttempt ?? (() => {});
55
+
56
+ for (const route of routes) {
57
+ const { provider } = route;
58
+
59
+ // skip a provider whose token budget is spent for this window — like a key
60
+ // cooling down, but for the whole provider. Falls through to the next route.
61
+ if (opts.isExhausted?.(provider)) {
62
+ log({ provider: provider.id, model: route.model, outcome: "skip", detail: "quota exhausted" });
63
+ continue;
64
+ }
65
+
66
+ const attempts = provider.max_retries + 1;
67
+
68
+ for (let i = 0; i < attempts; i++) {
69
+ const key = pool.pick(provider);
70
+ if (key === null) {
71
+ // every key for this provider is cooling down
72
+ log({ provider: provider.id, model: route.model, outcome: "skip", detail: "all keys cooling down" });
73
+ break;
74
+ }
75
+
76
+ try {
77
+ const result = await callUpstream(provider, req, route.model, {
78
+ stream: opts.stream,
79
+ key,
80
+ signal: opts.signal,
81
+ thinkingIntent: opts.thinkingIntent,
82
+ });
83
+ pool.success(provider, key);
84
+ opts.onServed?.(route, key);
85
+ log({ provider: provider.id, model: route.model, status: 200, outcome: "success" });
86
+ return { route, result };
87
+ } catch (e) {
88
+ const err = e as UpstreamError;
89
+ lastError = err;
90
+
91
+ if (!err.retryable) {
92
+ // the request itself is bad — falling back won't help
93
+ log({ provider: provider.id, model: route.model, status: err.status, outcome: "fatal" });
94
+ throw err;
95
+ }
96
+
97
+ pool.penalize(provider, key, { message: err.message ?? `HTTP ${err.status}`, status: err.status });
98
+ const moreKeysHere = i < attempts - 1 && pool.hasAvailable(provider);
99
+ log({
100
+ provider: provider.id,
101
+ model: route.model,
102
+ status: err.status,
103
+ outcome: moreKeysHere ? "retry" : "fallback",
104
+ });
105
+ if (!moreKeysHere) break; // move to the next provider in the chain
106
+ }
107
+ }
108
+ }
109
+
110
+ // chain exhausted
111
+ if (lastError) throw lastError;
112
+ const err = new Error("no available provider for this model") as UpstreamError;
113
+ err.status = 503;
114
+ err.retryable = false;
115
+ throw err;
116
+ }
@@ -0,0 +1,236 @@
1
+ /**
2
+ * Core request pipeline, independent of which client endpoint was hit.
3
+ *
4
+ * client body (clientFormat)
5
+ * -> ingress adapter -> canonical request
6
+ * -> config.resolve(model) -> prioritized provider chain + upstream model
7
+ * -> fallback engine -> rotate keys, walk the chain until one serves
8
+ * -> provider reply -> canonical -> egress adapter -> client body
9
+ *
10
+ * Streaming (Phase 3): provider SSE -> canonical chunks -> client SSE. Fallback
11
+ * + key rotation (Phase 4) run here. RTK compression + caveman/ponytail
12
+ * injection (Phase 6) transform the request before routing; usage logging
13
+ * (Phase 5) records each served request.
14
+ */
15
+ import type { GatewayConfig, ResolvedRoute } from "../config.js";
16
+ import type { WireFormat, CanonicalUsage } from "./canonical.js";
17
+ import { adapterFor } from "../adapters/index.js";
18
+ import type { UpstreamError } from "../upstream/client.js";
19
+ import { parseSSE, encodeSSE } from "../stream/sse.js";
20
+ import { streamAdapterFor } from "../stream/index.js";
21
+ import type { CanonicalChunk } from "../stream/chunk.js";
22
+ import type { KeyPool } from "./keypool.js";
23
+ import type { QuotaTracker } from "./quota.js";
24
+ import { executeWithFallback } from "./fallback.js";
25
+ import { type UsageDB, computeCost } from "../db.js";
26
+ import { compressMessages } from "../rtk/index.js";
27
+ import { injectInto } from "../inject/index.js";
28
+ import { parseSuffix, captureThinking, type ThinkingConfig } from "../translator/thinkingUnified.js";
29
+ import { compressWithHeadroom, formatHeadroomLog } from "../headroom/compress.js";
30
+ import { getPricingForModel } from "../providers/pricing.js";
31
+
32
+ export interface HandleResult {
33
+ status: number;
34
+ /** non-streaming JSON reply */
35
+ json?: unknown;
36
+ /** streaming reply: an async iterable of SSE bytes */
37
+ sse?: AsyncIterable<Uint8Array>;
38
+ }
39
+
40
+ export class GatewayError extends Error {
41
+ constructor(
42
+ readonly status: number,
43
+ readonly payload: unknown,
44
+ ) {
45
+ super(typeof payload === "string" ? payload : JSON.stringify(payload));
46
+ }
47
+ }
48
+
49
+ export interface HandleDeps {
50
+ config: GatewayConfig;
51
+ pool: KeyPool;
52
+ db?: UsageDB;
53
+ quota?: QuotaTracker;
54
+ log?: (msg: string) => void;
55
+ now?: () => number;
56
+ }
57
+
58
+ function recordUsage(
59
+ deps: HandleDeps,
60
+ route: ResolvedRoute,
61
+ usage: CanonicalUsage | undefined,
62
+ status: number,
63
+ latencyMs: number,
64
+ stream: boolean,
65
+ ): void {
66
+ const tokensIn = usage?.prompt_tokens ?? 0;
67
+ const tokensOut = usage?.completion_tokens ?? 0;
68
+ // count the full request against the served provider's window budget.
69
+ deps.quota?.consume(route.provider, tokensIn + tokensOut);
70
+ if (!deps.db) return;
71
+ // Cost: a combo/route may set explicit prices; otherwise fall back to the ported
72
+ // aigetwey pricing table so cost auto-resolves per model instead of showing $0.
73
+ const pricing = getPricingForModel(route.provider.id, route.model);
74
+ const priceIn = route.price_in ?? pricing?.input;
75
+ const priceOut = route.price_out ?? pricing?.output;
76
+ deps.db.record({
77
+ alias: route.alias,
78
+ provider: route.provider.id,
79
+ model: route.model,
80
+ tokens_in: tokensIn,
81
+ tokens_out: tokensOut,
82
+ cached_tokens: usage?.cached_tokens ?? 0,
83
+ cost: computeCost(tokensIn, tokensOut, priceIn, priceOut),
84
+ status,
85
+ latency_ms: latencyMs,
86
+ stream: stream ? 1 : 0,
87
+ });
88
+ }
89
+
90
+ export async function handle(
91
+ deps: HandleDeps,
92
+ clientFormat: WireFormat,
93
+ body: unknown,
94
+ signal?: AbortSignal,
95
+ ): Promise<HandleResult> {
96
+ const { config, pool } = deps;
97
+ const now = deps.now ?? Date.now;
98
+ const startedAt = now();
99
+ const ingress = adapterFor(clientFormat);
100
+ const canonical = ingress.requestToCanonical(body);
101
+
102
+ if (!canonical.model) {
103
+ throw new GatewayError(400, { error: "missing 'model' in request" });
104
+ }
105
+
106
+ // Thinking: a model-name suffix like "claude-opus-4-6(high)" or "alias(none)"
107
+ // carries the client's thinking intent. Strip it so routing matches the clean
108
+ // model, and capture the intent (suffix wins, else any reasoning param already
109
+ // in the body). It's applied per-attempt in the served provider's native format
110
+ // (upstream/client.ts), driven by the capabilities table — a no-op for models
111
+ // that can't reason. Matches aigetwey's capture-before-translate flow.
112
+ const { cleanModel, override } = parseSuffix(canonical.model);
113
+ canonical.model = cleanModel;
114
+ const thinkingIntent: ThinkingConfig | null =
115
+ override ?? captureThinking(canonical as Record<string, unknown>);
116
+
117
+ const routes = config.resolve(canonical.model);
118
+ if (routes.length === 0) {
119
+ throw new GatewayError(404, { error: `unknown model "${canonical.model}"` });
120
+ }
121
+
122
+ // Pipeline order matters: RTK compresses tool_result in the INPUT first, then
123
+ // inject prepends the output-style system prompt. They touch different parts
124
+ // of the request and stack cleanly. Both run before routing so every fallback
125
+ // attempt sends the same transformed request.
126
+ if (config.endpoint.rtk) {
127
+ const stats = compressMessages(canonical.messages);
128
+ if (stats.hits > 0) {
129
+ const pct = Math.round((1 - stats.bytesOut / stats.bytesIn) * 100);
130
+ deps.log?.(
131
+ `[rtk] compressed ${stats.hits} tool output(s): ${stats.bytesIn}B -> ${stats.bytesOut}B (${pct}%) via [${stats.shapes.join(",")}]`,
132
+ );
133
+ }
134
+ }
135
+
136
+ // fail-open: an injection error must never break the request.
137
+ try {
138
+ const injected = injectInto(canonical, {
139
+ caveman: config.endpoint.caveman,
140
+ ponytail: config.endpoint.ponytail,
141
+ });
142
+ if (injected) deps.log?.(`[inject] caveman=${config.endpoint.caveman} ponytail=${config.endpoint.ponytail}`);
143
+ } catch (e) {
144
+ deps.log?.(`[inject] skipped (error): ${(e as Error).message}`);
145
+ }
146
+
147
+ // Headroom: pipe the (OpenAI-shaped) messages through the external compression
148
+ // proxy when enabled. Fail-open — on any error the original messages stand and
149
+ // the request proceeds. Runs after RTK/inject so it compresses the final context.
150
+ if (config.endpoint.headroom.enabled) {
151
+ const hr = await compressWithHeadroom(canonical.messages, {
152
+ url: config.endpoint.headroom.url,
153
+ model: canonical.model,
154
+ compressUserMessages: config.endpoint.headroom.compress_user_messages,
155
+ });
156
+ if (hr) {
157
+ canonical.messages = hr.messages;
158
+ const line = formatHeadroomLog(hr);
159
+ if (line) deps.log?.(`[headroom] ${line}`);
160
+ }
161
+ }
162
+
163
+ const wantStream = canonical.stream === true;
164
+
165
+ let won;
166
+ try {
167
+ won = await executeWithFallback(routes, pool, canonical, {
168
+ stream: wantStream,
169
+ signal,
170
+ thinkingIntent,
171
+ isExhausted: deps.quota ? (p) => deps.quota!.isExhausted(p) : undefined,
172
+ onAttempt: (a) =>
173
+ deps.log?.(`[fallback] ${a.provider}/${a.model} ${a.status ?? "-"} -> ${a.outcome}${a.detail ? ` (${a.detail})` : ""}`),
174
+ });
175
+ } catch (e) {
176
+ const err = e as UpstreamError;
177
+ const status = err.status ?? 502;
178
+ let payload: unknown = { error: err.message };
179
+ if (err.body) {
180
+ try {
181
+ payload = JSON.parse(err.body);
182
+ } catch {
183
+ payload = { error: err.body };
184
+ }
185
+ }
186
+ throw new GatewayError(status, payload);
187
+ }
188
+
189
+ const { route, result } = won;
190
+
191
+ if (!result.stream) {
192
+ const clientBody = ingress.responseFromCanonical(result.response);
193
+ recordUsage(deps, route, result.response.usage, 200, now() - startedAt, false);
194
+ return { status: 200, json: clientBody };
195
+ }
196
+
197
+ // streaming: provider SSE -> canonical chunks -> client SSE bytes. The
198
+ // provider and client formats may differ (e.g. an Anthropic client talking to
199
+ // an OpenAI provider), so both ends translate through the canonical chunk.
200
+ const providerStream = streamAdapterFor(route.provider.format);
201
+ const clientStream = streamAdapterFor(clientFormat);
202
+ const canonicalChunks = providerStream.streamToCanonical(parseSSE(result.body));
203
+
204
+ // tap the canonical chunk stream to capture usage from the final chunk(s),
205
+ // which arrive as partial fields across multiple chunks.
206
+ let lastUsage: CanonicalUsage | undefined;
207
+ async function* tap(): AsyncGenerator<CanonicalChunk> {
208
+ for await (const chunk of canonicalChunks) {
209
+ if (chunk.usage) {
210
+ lastUsage = {
211
+ prompt_tokens: chunk.usage.prompt_tokens ?? lastUsage?.prompt_tokens ?? 0,
212
+ completion_tokens: chunk.usage.completion_tokens ?? lastUsage?.completion_tokens ?? 0,
213
+ total_tokens: 0,
214
+ cached_tokens: chunk.usage.cached_tokens ?? lastUsage?.cached_tokens,
215
+ };
216
+ }
217
+ yield chunk;
218
+ }
219
+ }
220
+
221
+ const clientEvents = clientStream.streamFromCanonical(tap());
222
+
223
+ async function* toBytes(): AsyncGenerator<Uint8Array> {
224
+ try {
225
+ for await (const ev of clientEvents) {
226
+ yield encodeSSE(ev);
227
+ }
228
+ } finally {
229
+ // record once the stream drains (or the client disconnects) so usage is
230
+ // captured even on early termination.
231
+ recordUsage(deps, route, lastUsage, 200, now() - startedAt, true);
232
+ }
233
+ }
234
+
235
+ return { status: 200, sse: toBytes() };
236
+ }