@loreai/gateway 0.13.3 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@loreai/gateway",
3
- "version": "0.13.3",
3
+ "version": "0.14.0",
4
4
  "type": "module",
5
5
  "license": "FSL-1.1-Apache-2.0",
6
6
  "description": "Lore as a transparent LLM proxy — context management for any AI coding client",
@@ -13,23 +13,26 @@
13
13
  }
14
14
  },
15
15
  "bin": {
16
- "lore-gateway": "./dist/index.js"
16
+ "lore": "./dist/bin.cjs",
17
+ "lore-gateway": "./dist/bin.cjs"
17
18
  },
18
19
  "scripts": {
19
20
  "typecheck": "tsc --noEmit",
20
21
  "build": "bun run script/build.ts",
22
+ "bundle": "bun run script/bundle.ts",
23
+ "build:binary": "bun run script/build.ts --binary",
21
24
  "start": "bun run src/index.ts"
22
25
  },
23
- "dependencies": {
24
- "@loreai/core": "0.13.3"
25
- },
26
+ "dependencies": {},
26
27
  "files": [
27
28
  "src/",
28
29
  "dist/",
30
+ "!dist/**/*.map",
29
31
  "README.md",
30
32
  "LICENSE"
31
33
  ],
32
34
  "engines": {
35
+ "node": ">=22.15",
33
36
  "bun": ">=1.2.0"
34
37
  },
35
38
  "repository": {
@@ -49,5 +52,10 @@
49
52
  "anthropic",
50
53
  "openai"
51
54
  ],
52
- "author": "BYK"
55
+ "author": "BYK",
56
+ "devDependencies": {
57
+ "@loreai/core": "0.14.0",
58
+ "@sentry/bun": "^10.52.0",
59
+ "binpunch": "^1.0.0"
60
+ }
53
61
  }
@@ -135,6 +135,9 @@ export function createBatchLLMClient(
135
135
  let flushTimer: ReturnType<typeof setInterval> | null = null;
136
136
  let shuttingDown = false;
137
137
 
138
+ /** Credentials whose batch API access has been permanently disabled (401/403). */
139
+ const disabledBatchAuth = new Set<string>();
140
+
138
141
  // Stats
139
142
  let totalQueued = 0;
140
143
  let totalBatched = 0;
@@ -169,7 +172,19 @@ export function createBatchLLMClient(
169
172
 
170
173
  if (!response.ok) {
171
174
  const text = await response.text().catch(() => "(no body)");
172
- log.error(`batch create failed: ${response.status} ${response.statusText} ${text}`);
175
+ // Permanent auth errorsdisable batch API for this credential
176
+ if (response.status === 401 || response.status === 403) {
177
+ const key = authKey(auth);
178
+ if (!disabledBatchAuth.has(key)) {
179
+ disabledBatchAuth.add(key);
180
+ log.warn(
181
+ `batch API disabled for this credential (${response.status}): ${text}. ` +
182
+ `Future worker calls will use individual requests.`,
183
+ );
184
+ }
185
+ } else {
186
+ log.error(`batch create failed: ${response.status} ${response.statusText} — ${text}`);
187
+ }
173
188
  // Fall back to synchronous for all items
174
189
  await fallbackAll(items);
175
190
  return;
@@ -231,6 +246,11 @@ export function createBatchLLMClient(
231
246
  }
232
247
 
233
248
  for (const { auth, items } of byAuth.values()) {
249
+ // Skip batch API for credentials with permanent auth failures
250
+ if (disabledBatchAuth.has(authKey(auth))) {
251
+ await fallbackAll(items);
252
+ continue;
253
+ }
234
254
  await submitBatch(auth, items);
235
255
  }
236
256
  }
@@ -0,0 +1,344 @@
1
+ /**
2
+ * Cache analytics — deterministic cache-bust detection using API request
3
+ * body prefix comparison and Anthropic response cache fields.
4
+ *
5
+ * Instead of fingerprinting internal message representations, this module
6
+ * compares the actual serialized JSON request body byte-for-byte across
7
+ * turns. When the prefix diverges, it maps the byte offset back to a
8
+ * semantic location in the JSON structure (e.g. "messages[3].content[1]").
9
+ *
10
+ * The API response's `cache_read_input_tokens` and
11
+ * `cache_creation_input_tokens` provide ground-truth confirmation.
12
+ *
13
+ * Request bodies are stored zstd-compressed (~99.9% reduction on
14
+ * repetitive JSON) to keep per-session memory overhead low.
15
+ */
16
+
17
+ import type {
18
+ CacheAnalytics,
19
+ CacheTurnAnalysis,
20
+ GatewayUsage,
21
+ } from "./translate/types.ts";
22
+ import { log } from "@loreai/core";
23
+
24
+ // ---------------------------------------------------------------------------
25
+ // Compression helpers (Bun built-in zstd, zero dependencies)
26
+ // ---------------------------------------------------------------------------
27
+
28
+ export function compressBody(body: string): Uint8Array {
29
+ return Bun.zstdCompressSync(Buffer.from(body));
30
+ }
31
+
32
+ export function decompressBody(compressed: Uint8Array): string {
33
+ return Buffer.from(
34
+ Bun.zstdDecompressSync(compressed as Uint8Array<ArrayBuffer>),
35
+ ).toString();
36
+ }
37
+
38
+ // ---------------------------------------------------------------------------
39
+ // Byte-level prefix comparison
40
+ // ---------------------------------------------------------------------------
41
+
42
+ /**
43
+ * Find the byte offset where two strings first differ.
44
+ * Returns the length of the shorter string if one is a prefix of the other.
45
+ */
46
+ export function findDivergenceOffset(a: string, b: string): number {
47
+ const len = Math.min(a.length, b.length);
48
+ for (let i = 0; i < len; i++) {
49
+ if (a[i] !== b[i]) return i;
50
+ }
51
+ return len;
52
+ }
53
+
54
+ // ---------------------------------------------------------------------------
55
+ // Semantic location mapping
56
+ // ---------------------------------------------------------------------------
57
+
58
+ /**
59
+ * Map a byte offset in a serialized JSON string to a semantic JSON path.
60
+ *
61
+ * Walks the JSON structure character-by-character, tracking the current
62
+ * path (keys and array indices). Stops when we reach the target offset.
63
+ *
64
+ * Returns a human-readable path like "messages[3].content[1].text" or
65
+ * "system" or "tools[2].name".
66
+ */
67
+ export function mapOffsetToJsonPath(json: string, offset: number): string {
68
+ if (offset >= json.length) return "<end>";
69
+ if (offset === 0) return "<start>";
70
+
71
+ // Stack-based JSON path tracker. Each frame represents a nesting level.
72
+ // - object frames: { kind: "object", key: current key or "" }
73
+ // - array frames: { kind: "array", index: current element index }
74
+ type Frame =
75
+ | { kind: "object"; key: string }
76
+ | { kind: "array"; index: number };
77
+
78
+ const stack: Frame[] = [];
79
+ let inString = false;
80
+ let escaped = false;
81
+ let currentKey = "";
82
+ let collectingKey = false;
83
+
84
+ for (let i = 0; i < json.length && i < offset; i++) {
85
+ const ch = json[i];
86
+
87
+ if (inString) {
88
+ if (escaped) {
89
+ escaped = false;
90
+ if (collectingKey) currentKey += ch;
91
+ continue;
92
+ }
93
+ if (ch === "\\") {
94
+ escaped = true;
95
+ if (collectingKey) currentKey += ch;
96
+ continue;
97
+ }
98
+ if (ch === '"') {
99
+ inString = false;
100
+ collectingKey = false;
101
+ continue;
102
+ }
103
+ if (collectingKey) currentKey += ch;
104
+ continue;
105
+ }
106
+
107
+ switch (ch) {
108
+ case '"':
109
+ inString = true;
110
+ // Determine if this quote starts a key (object context, key position)
111
+ collectingKey = isObjectKeyPosition(json, i);
112
+ if (collectingKey) currentKey = "";
113
+ break;
114
+
115
+ case "{":
116
+ stack.push({ kind: "object", key: "" });
117
+ break;
118
+
119
+ case "[":
120
+ stack.push({ kind: "array", index: 0 });
121
+ break;
122
+
123
+ case "}":
124
+ case "]":
125
+ stack.pop();
126
+ break;
127
+
128
+ case ":":
129
+ // Assign the collected key to the current object frame
130
+ if (currentKey && stack.length > 0) {
131
+ const top = stack[stack.length - 1];
132
+ if (top.kind === "object") {
133
+ top.key = currentKey;
134
+ }
135
+ currentKey = "";
136
+ }
137
+ break;
138
+
139
+ case ",":
140
+ // In array context, advance to the next element
141
+ if (stack.length > 0) {
142
+ const top = stack[stack.length - 1];
143
+ if (top.kind === "array") {
144
+ top.index++;
145
+ }
146
+ }
147
+ break;
148
+ }
149
+ }
150
+
151
+ // Build path from the stack
152
+ const parts: string[] = [];
153
+ for (const frame of stack) {
154
+ if (frame.kind === "object" && frame.key) {
155
+ parts.push(parts.length === 0 ? frame.key : `.${frame.key}`);
156
+ } else if (frame.kind === "array") {
157
+ parts.push(`[${frame.index}]`);
158
+ }
159
+ }
160
+
161
+ return parts.length === 0 ? "<root>" : parts.join("");
162
+ }
163
+
164
+ /**
165
+ * Determine if a '"' at `offset` starts an object key.
166
+ * Scans backwards to find the last structural character — if it's '{' or ','
167
+ * we're in key position (not value position).
168
+ */
169
+ function isObjectKeyPosition(json: string, offset: number): boolean {
170
+ for (let i = offset - 1; i >= 0; i--) {
171
+ const ch = json[i];
172
+ if (ch === " " || ch === "\n" || ch === "\r" || ch === "\t") continue;
173
+ // After '{' or ',' in object = key position
174
+ return ch === "{" || ch === ",";
175
+ }
176
+ return false;
177
+ }
178
+
179
+ // ---------------------------------------------------------------------------
180
+ // Divergence reason inference
181
+ // ---------------------------------------------------------------------------
182
+
183
+ /**
184
+ * Infer a human-readable reason from the semantic path.
185
+ */
186
+ export function inferDivergenceReason(
187
+ path: string,
188
+ prevLength: number,
189
+ currLength: number,
190
+ ): string {
191
+ if (path === "<end>") {
192
+ return currLength > prevLength
193
+ ? "new content appended"
194
+ : "content truncated";
195
+ }
196
+ if (path === "<start>") return "request structure changed from start";
197
+ if (path === "<root>") return "top-level structure changed";
198
+
199
+ if (path === "system" || path.startsWith("system"))
200
+ return "system prompt changed";
201
+ if (path === "model") return "model changed";
202
+ if (path === "max_tokens") return "max_tokens changed";
203
+ if (path === "tools" || path.startsWith("tools"))
204
+ return "tool definitions changed";
205
+
206
+ // messages[N] patterns
207
+ const msgMatch = path.match(/^messages\[(\d+)\]/);
208
+ if (msgMatch) {
209
+ const idx = parseInt(msgMatch[1], 10);
210
+ const rest = path.slice(msgMatch[0].length);
211
+
212
+ if (!rest) return `message ${idx} structure changed`;
213
+ if (rest === ".role") return `message ${idx} role changed`;
214
+ if (rest.startsWith(".content"))
215
+ return `message ${idx} content changed`;
216
+ }
217
+
218
+ return `changed at ${path}`;
219
+ }
220
+
221
+ // ---------------------------------------------------------------------------
222
+ // Main analytics function
223
+ // ---------------------------------------------------------------------------
224
+
225
+ /**
226
+ * Analyze cache performance for a turn. Compares the current request body
227
+ * with the previous turn's (stored compressed) and incorporates the API
228
+ * response's cache usage fields.
229
+ *
230
+ * Updates `analytics` state in-place and returns the per-turn analysis.
231
+ */
232
+ export function analyzeCacheTurn(
233
+ analytics: CacheAnalytics,
234
+ currentBody: string,
235
+ usage: GatewayUsage,
236
+ sessionID?: string,
237
+ ): CacheTurnAnalysis {
238
+ analytics.turnCount++;
239
+
240
+ const cacheRead = usage.cacheReadInputTokens ?? 0;
241
+ const cacheCreation = usage.cacheCreationInputTokens ?? 0;
242
+ const inputTokens = usage.inputTokens ?? 0;
243
+ const totalInput = cacheRead + cacheCreation + inputTokens;
244
+ const cacheHitRate = totalInput > 0 ? cacheRead / totalInput : 0;
245
+
246
+ // Track confirmed busts (API says no cache hit + new cache written)
247
+ if (cacheRead === 0 && cacheCreation > 0 && analytics.turnCount > 1) {
248
+ analytics.bustCount++;
249
+ }
250
+
251
+ // Default values for first turn (no previous body to compare)
252
+ let prefixMatchBytes = 0;
253
+ let prefixMatchPercent = 0;
254
+ let divergencePoint = "<first-turn>";
255
+ let divergenceReason = "first turn — no previous request to compare";
256
+
257
+ // Compare with previous body if available
258
+ if (analytics.lastRequestBody !== null) {
259
+ const prevBody = decompressBody(analytics.lastRequestBody);
260
+ const prevLength = prevBody.length;
261
+ const currLength = currentBody.length;
262
+
263
+ prefixMatchBytes = findDivergenceOffset(prevBody, currentBody);
264
+ const minLength = Math.min(prevLength, currLength);
265
+ prefixMatchPercent = minLength > 0 ? prefixMatchBytes / minLength : 0;
266
+
267
+ if (prefixMatchBytes < minLength) {
268
+ // Actual divergence within the shared prefix
269
+ divergencePoint = mapOffsetToJsonPath(currentBody, prefixMatchBytes);
270
+ divergenceReason = inferDivergenceReason(
271
+ divergencePoint,
272
+ prevLength,
273
+ currLength,
274
+ );
275
+ } else if (prevLength !== currLength) {
276
+ // One is a prefix of the other — new content appended/removed
277
+ divergencePoint = "<end>";
278
+ divergenceReason = currLength > prevLength
279
+ ? "new content appended (likely new messages)"
280
+ : "content truncated (context window compressed)";
281
+ } else {
282
+ // Identical bodies
283
+ divergencePoint = "<identical>";
284
+ divergenceReason = "request bodies are identical";
285
+ }
286
+ }
287
+
288
+ // Store compressed body for next turn
289
+ analytics.lastRequestBody = compressBody(currentBody);
290
+ analytics.lastRequestBodyLength = currentBody.length;
291
+ analytics.lastCacheRead = cacheRead;
292
+ analytics.lastCacheCreation = cacheCreation;
293
+
294
+ const result: CacheTurnAnalysis = {
295
+ turn: analytics.turnCount,
296
+ cacheRead,
297
+ cacheCreation,
298
+ inputTokens,
299
+ cacheHitRate,
300
+ prefixMatchBytes,
301
+ prefixMatchPercent,
302
+ divergencePoint,
303
+ divergenceReason,
304
+ };
305
+
306
+ // Log structured analysis
307
+ if (analytics.turnCount > 1) {
308
+ const bustStr = cacheRead === 0 && cacheCreation > 0 ? " [BUST]" : "";
309
+ const sidStr = sessionID ? ` session=${sessionID.slice(0, 16)}` : "";
310
+ log.info(
311
+ `cache-analytics:${sidStr} turn=${result.turn}` +
312
+ ` hit=${(result.cacheHitRate * 100).toFixed(0)}%` +
313
+ ` read=${cacheRead} create=${cacheCreation} input=${inputTokens}` +
314
+ ` prefixMatch=${(result.prefixMatchPercent * 100).toFixed(1)}%` +
315
+ ` (${prefixMatchBytes}/${analytics.lastRequestBodyLength}B)` +
316
+ ` divergence="${divergencePoint}" reason="${divergenceReason}"` +
317
+ bustStr,
318
+ );
319
+ }
320
+
321
+ return result;
322
+ }
323
+
324
+ /**
325
+ * Log a cache analytics summary for the session.
326
+ * Suitable for calling on session cleanup or periodically.
327
+ */
328
+ export function logCacheAnalyticsSummary(
329
+ sessionID: string,
330
+ analytics: CacheAnalytics,
331
+ ): void {
332
+ if (analytics.turnCount === 0) return;
333
+
334
+ const bustRate = analytics.turnCount > 1
335
+ ? analytics.bustCount / (analytics.turnCount - 1)
336
+ : 0;
337
+
338
+ log.info(
339
+ `cache-analytics summary: session=${sessionID.slice(0, 16)}` +
340
+ ` turns=${analytics.turnCount}` +
341
+ ` busts=${analytics.bustCount}` +
342
+ ` bustRate=${(bustRate * 100).toFixed(0)}%`,
343
+ );
344
+ }
@@ -0,0 +1,107 @@
1
+ /**
2
+ * Agent registry — known AI coding agents that can be launched through
3
+ * the gateway.
4
+ *
5
+ * Each agent defines:
6
+ * - How to detect it (binary name on PATH)
7
+ * - What env vars to set so it talks through the gateway
8
+ */
9
+
10
+ // ---------------------------------------------------------------------------
11
+ // which() — cross-runtime binary lookup
12
+ // ---------------------------------------------------------------------------
13
+
14
+ /**
15
+ * Find a binary on PATH. Uses Bun.which() when available (Bun runtime),
16
+ * falls back to `which`/`where` via child_process (Node.js runtime).
17
+ */
18
+ function which(binary: string): string | null {
19
+ // Bun runtime
20
+ if (typeof Bun !== "undefined" && typeof Bun.which === "function") {
21
+ return Bun.which(binary);
22
+ }
23
+
24
+ // Node.js runtime
25
+ try {
26
+ const { execFileSync } = require("node:child_process") as typeof import("node:child_process");
27
+ const cmd = process.platform === "win32" ? "where" : "which";
28
+ const result = execFileSync(cmd, [binary], {
29
+ encoding: "utf8",
30
+ stdio: ["pipe", "pipe", "pipe"],
31
+ });
32
+ const path = result.trim().split("\n")[0];
33
+ return path || null;
34
+ } catch {
35
+ return null;
36
+ }
37
+ }
38
+
39
+ // ---------------------------------------------------------------------------
40
+ // Agent definitions
41
+ // ---------------------------------------------------------------------------
42
+
43
+ export interface AgentDef {
44
+ /** Internal identifier, e.g. "claude-code" */
45
+ name: string;
46
+ /** Human-readable name, e.g. "Claude Code" */
47
+ displayName: string;
48
+ /** Binary to search for on PATH */
49
+ binary: string;
50
+ /** Returns the binary path if found, or null */
51
+ detect: () => string | null;
52
+ /** Env vars to inject given the gateway URL (e.g. "http://127.0.0.1:6969") */
53
+ envVars: (gatewayUrl: string) => Record<string, string>;
54
+ }
55
+
56
+ export const AGENTS: AgentDef[] = [
57
+ {
58
+ name: "claude-code",
59
+ displayName: "Claude Code",
60
+ binary: "claude",
61
+ detect: () => which("claude"),
62
+ envVars: (url) => ({ ANTHROPIC_BASE_URL: url }),
63
+ },
64
+ {
65
+ name: "codex",
66
+ displayName: "Codex",
67
+ binary: "codex",
68
+ detect: () => which("codex"),
69
+ envVars: (url) => ({ OPENAI_BASE_URL: `${url}/v1` }),
70
+ },
71
+ {
72
+ name: "pi",
73
+ displayName: "Pi",
74
+ binary: "pi",
75
+ detect: () => which("pi"),
76
+ envVars: (url) => ({ ANTHROPIC_BASE_URL: url }),
77
+ },
78
+ {
79
+ name: "opencode",
80
+ displayName: "OpenCode",
81
+ binary: "opencode",
82
+ detect: () => which("opencode"),
83
+ envVars: (url) => ({ OPENAI_BASE_URL: `${url}/v1` }),
84
+ },
85
+ ];
86
+
87
+ // ---------------------------------------------------------------------------
88
+ // Detection
89
+ // ---------------------------------------------------------------------------
90
+
91
+ export interface DetectedAgent {
92
+ def: AgentDef;
93
+ path: string;
94
+ }
95
+
96
+ /**
97
+ * Scan PATH for all known agents. Returns the ones found with their
98
+ * binary paths.
99
+ */
100
+ export function detectAgents(): DetectedAgent[] {
101
+ const found: DetectedAgent[] = [];
102
+ for (const def of AGENTS) {
103
+ const path = def.detect();
104
+ if (path) found.push({ def, path });
105
+ }
106
+ return found;
107
+ }
package/src/cli/bin.ts ADDED
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Binary entry point — called when running the standalone Bun binary
3
+ * or directly via `bun run src/cli/bin.ts`.
4
+ */
5
+ import "../../instrument";
6
+ import { _cli } from "./main";
7
+
8
+ _cli().catch((e) => {
9
+ if (e) console.error(e);
10
+ process.exit(1);
11
+ });
@@ -0,0 +1,55 @@
1
+ /**
2
+ * CLI help text — printed by `lore help` and `lore --help`.
3
+ */
4
+ import { VERSION } from "./version";
5
+
6
+ const USAGE = `
7
+ lore v${VERSION} — context management proxy for AI coding agents
8
+
9
+ Usage:
10
+ lore [command] [options]
11
+
12
+ Commands:
13
+ run [command...] Start gateway and launch an AI agent (default)
14
+ start Start the gateway server only
15
+ upgrade [version] Update lore to the latest (or specified) version
16
+ Flags: --check, --force, --offline, --channel <ch>
17
+ help Show this help text
18
+
19
+ Options:
20
+ -p, --port <port> Gateway port (default: 6969, env: LORE_LISTEN_PORT)
21
+ -H, --host <host> Gateway host (default: 127.0.0.1, env: LORE_LISTEN_HOST)
22
+ -d, --debug Enable debug logging (env: LORE_DEBUG=1)
23
+ -v, --version Print version and exit
24
+ -h, --help Show this help text
25
+
26
+ Examples:
27
+ lore # Auto-detect agent and launch with gateway
28
+ lore run claude # Launch Claude Code through the gateway
29
+ lore run opencode # Launch OpenCode through the gateway
30
+ lore start # Start gateway only (set ANTHROPIC_BASE_URL yourself)
31
+ lore start -p 8080 # Start gateway on a custom port
32
+ lore upgrade # Upgrade to latest version
33
+ lore upgrade --check # Check for updates without installing
34
+ lore upgrade --force # Force re-download even if up to date
35
+ lore upgrade nightly # Switch to nightly channel and update
36
+ lore upgrade stable # Switch back to stable channel
37
+ lore upgrade 0.14.0 # Install a specific version
38
+ lore upgrade --offline # Upgrade from cached patches (no network)
39
+
40
+ Environment variables:
41
+ LORE_LISTEN_PORT Gateway port (overridden by --port)
42
+ LORE_LISTEN_HOST Gateway host (overridden by --host)
43
+ LORE_UPSTREAM_ANTHROPIC Upstream Anthropic API URL
44
+ LORE_UPSTREAM_OPENAI Upstream OpenAI API URL
45
+ LORE_DEBUG Enable debug logging (1 or true)
46
+ LORE_NO_UPDATE_CHECK Disable background update checks (set to 1)
47
+ `.trimStart();
48
+
49
+ export function printHelp(): void {
50
+ console.log(USAGE);
51
+ }
52
+
53
+ export function printVersion(): void {
54
+ console.log(VERSION);
55
+ }