@loreai/gateway 0.13.4 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -1,41 +1,41 @@
1
1
  /**
2
- * Lore Gateway — entry point.
2
+ * Lore Gateway — package entry point.
3
3
  *
4
- * Starts the HTTP proxy server that applies Lore's context management
5
- * pipeline to any AI coding client speaking the Anthropic or OpenAI
6
- * protocol.
4
+ * Library exports for programmatic use, plus `_cli()` for the CLI binary.
7
5
  *
8
- * Usage:
9
- * bun run packages/gateway/src/index.ts
10
- * ANTHROPIC_BASE_URL=http://127.0.0.1:6969 claude
6
+ * Library usage:
7
+ * import { startServer, loadConfig } from "@loreai/gateway";
8
+ *
9
+ * CLI usage (via bin wrapper):
10
+ * lore start
11
+ * lore run claude
11
12
  */
12
- import { loadConfig } from "./config";
13
- import { startServer } from "./server";
14
- import { resetPipelineState } from "./pipeline";
13
+ import "../instrument";
15
14
 
16
15
  // ---------------------------------------------------------------------------
17
- // Boot
16
+ // Library API
18
17
  // ---------------------------------------------------------------------------
19
18
 
20
- const config = loadConfig();
21
- const server = startServer(config);
19
+ export { loadConfig } from "./config";
20
+ export type { GatewayConfig } from "./config";
21
+ export { startServer } from "./server";
22
+ export { handleRequest, resetPipelineState } from "./pipeline";
22
23
 
23
- const addr = `http://${config.host}:${server.port}`;
24
- console.error(`[lore] Gateway listening on ${addr}`);
25
- console.error(`[lore] Model routing: claude-* → Anthropic, nvidia/* → Nvidia NIM, gpt-* → OpenAI, …`);
26
- console.error(`[lore] Plugin auto-detects gateway — just start OpenCode normally`);
24
+ // ---------------------------------------------------------------------------
25
+ // CLI entry — called by dist/bin.cjs or `bun run src/index.ts`
26
+ // ---------------------------------------------------------------------------
27
+
28
+ export { _cli } from "./cli/main";
27
29
 
28
30
  // ---------------------------------------------------------------------------
29
- // Graceful shutdown
31
+ // Direct execution — `bun run src/index.ts` still works as before
30
32
  // ---------------------------------------------------------------------------
31
33
 
32
- async function shutdown() {
33
- console.error("[lore] Shutting down…");
34
- server.stop();
35
- // Gracefully shut down the batch queue (flushes pending items synchronously)
36
- await resetPipelineState();
37
- process.exit(0);
34
+ if (typeof Bun !== "undefined" && Bun.main === import.meta.path) {
35
+ // Direct execution (e.g. `bun run src/index.ts` from the OpenCode plugin)
36
+ // defaults to server-only mode (`start`), not `run` — there's no TTY and
37
+ // no reason to auto-detect agents when launched as an embedded server.
38
+ // esbuild CJS output drops import.meta to `{}` so the condition is
39
+ // always false in the npm bundle — the await is dead-code-eliminated.
40
+ import("./cli/start").then(({ commandStart }) => commandStart({}));
38
41
  }
39
-
40
- process.on("SIGINT", () => shutdown());
41
- process.on("SIGTERM", () => shutdown());
@@ -16,6 +16,36 @@ import { authHeaders } from "./auth";
16
16
  /** Tracks worker session IDs so temporal capture can skip them. */
17
17
  export const activeWorkerCalls = new Set<string>();
18
18
 
19
+ // ---------------------------------------------------------------------------
20
+ // Retry helpers
21
+ // ---------------------------------------------------------------------------
22
+
23
+ /** HTTP status codes that are transient and worth retrying. */
24
+ const TRANSIENT_CODES = new Set([429, 500, 502, 503, 529]);
25
+ const MAX_RETRIES = 3;
26
+
27
+ /** Parse the Retry-After header into milliseconds, or null if absent/invalid. */
28
+ function parseRetryAfter(response: Response): number | null {
29
+ const header = response.headers.get("retry-after");
30
+ if (!header) return null;
31
+ const seconds = Number(header);
32
+ if (!Number.isNaN(seconds)) return seconds * 1000;
33
+ const date = Date.parse(header);
34
+ if (!Number.isNaN(date)) return Math.max(0, date - Date.now());
35
+ return null;
36
+ }
37
+
38
+ /** Compute delay for a retry attempt, respecting Retry-After on the first try. */
39
+ function backoffMs(attempt: number, retryAfterMs: number | null): number {
40
+ if (attempt === 0 && retryAfterMs != null)
41
+ return Math.min(retryAfterMs, 30_000); // cap Retry-After at 30s
42
+ return Math.min(1000 * 2 ** attempt, 8000); // 1s, 2s, 4s, capped at 8s
43
+ }
44
+
45
+ function sleep(ms: number): Promise<void> {
46
+ return new Promise((resolve) => setTimeout(resolve, ms));
47
+ }
48
+
19
49
  // ---------------------------------------------------------------------------
20
50
  // LLMClient factory
21
51
  // ---------------------------------------------------------------------------
@@ -64,41 +94,83 @@ export function createGatewayLLMClient(
64
94
  ]
65
95
  : undefined;
66
96
 
67
- const response = await fetch(url, {
68
- method: "POST",
69
- headers: {
70
- "Content-Type": "application/json",
71
- "anthropic-version": "2023-06-01",
72
- ...authHeaders(cred),
73
- },
74
- // opts.thinking is intentionally not forwarded — this bare API
75
- // call never includes the `thinking` parameter so Anthropic
76
- // models won't produce thinking tokens regardless.
77
- body: JSON.stringify({
78
- model: model.modelID,
79
- max_tokens: 8192,
80
- system: systemPayload ?? system,
81
- messages: [{ role: "user", content: user }],
82
- }),
97
+ const body = JSON.stringify({
98
+ model: model.modelID,
99
+ max_tokens: 8192,
100
+ system: systemPayload ?? system,
101
+ messages: [{ role: "user", content: user }],
83
102
  });
84
103
 
85
- if (!response.ok) {
104
+ const headers = {
105
+ "Content-Type": "application/json",
106
+ "anthropic-version": "2023-06-01",
107
+ ...authHeaders(cred),
108
+ };
109
+
110
+ // Retry loop for transient errors (429, 5xx)
111
+ for (let attempt = 0; ; attempt++) {
112
+ let response: Response;
113
+ try {
114
+ response = await fetch(url, {
115
+ method: "POST",
116
+ headers,
117
+ // opts.thinking is intentionally not forwarded — this bare API
118
+ // call never includes the `thinking` parameter so Anthropic
119
+ // models won't produce thinking tokens regardless.
120
+ body,
121
+ });
122
+ } catch (e) {
123
+ // Network/fetch error — retry if attempts remain
124
+ if (attempt < MAX_RETRIES) {
125
+ const delay = backoffMs(attempt, null);
126
+ log.warn(
127
+ `worker request network error (attempt ${attempt + 1}/${MAX_RETRIES + 1}), retrying in ${delay}ms`,
128
+ );
129
+ await sleep(delay);
130
+ continue;
131
+ }
132
+ throw e; // exhausted retries — rethrow to outer catch
133
+ }
134
+
135
+ if (response.ok) {
136
+ const data = (await response.json()) as {
137
+ content?: Array<{ type: string; text?: string }>;
138
+ };
139
+
140
+ const textBlock = data.content?.find(
141
+ (b) => b.type === "text" && typeof b.text === "string",
142
+ );
143
+
144
+ return textBlock?.text ?? null;
145
+ }
146
+
147
+ // Non-transient error — fail immediately, no retry
148
+ if (!TRANSIENT_CODES.has(response.status)) {
149
+ const text = await response.text().catch(() => "(no body)");
150
+ log.error(
151
+ `worker upstream request failed: ${response.status} ${response.statusText} — ${text}`,
152
+ );
153
+ return null;
154
+ }
155
+
156
+ // Transient error — retry if attempts remain
157
+ if (attempt < MAX_RETRIES) {
158
+ const retryAfter = parseRetryAfter(response);
159
+ const delay = backoffMs(attempt, retryAfter);
160
+ log.warn(
161
+ `worker upstream ${response.status} (attempt ${attempt + 1}/${MAX_RETRIES + 1}), retrying in ${delay}ms`,
162
+ );
163
+ await sleep(delay);
164
+ continue;
165
+ }
166
+
167
+ // Exhausted retries
86
168
  const text = await response.text().catch(() => "(no body)");
87
169
  log.error(
88
- `worker upstream request failed: ${response.status} ${response.statusText} — ${text}`,
170
+ `worker upstream request failed after ${MAX_RETRIES + 1} attempts: ${response.status} ${response.statusText} — ${text}`,
89
171
  );
90
172
  return null;
91
173
  }
92
-
93
- const data = (await response.json()) as {
94
- content?: Array<{ type: string; text?: string }>;
95
- };
96
-
97
- const textBlock = data.content?.find(
98
- (b) => b.type === "text" && typeof b.text === "string",
99
- );
100
-
101
- return textBlock?.text ?? null;
102
174
  } catch (e) {
103
175
  log.error("worker prompt failed:", e);
104
176
  return null;