@loreai/gateway 0.13.4 → 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js DELETED
@@ -1,3548 +0,0 @@
1
- var __getOwnPropNames = Object.getOwnPropertyNames;
2
- var __commonJS = (cb, mod) => function __require() {
3
- return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
4
- };
5
-
6
- // package.json
7
- var require_package = __commonJS({
8
- "package.json"(exports, module) {
9
- module.exports = {
10
- name: "@loreai/gateway",
11
- version: "0.13.4",
12
- type: "module",
13
- license: "FSL-1.1-Apache-2.0",
14
- description: "Lore as a transparent LLM proxy \u2014 context management for any AI coding client",
15
- main: "./dist/index.js",
16
- types: "./dist/index.d.ts",
17
- exports: {
18
- ".": {
19
- bun: "./src/index.ts",
20
- default: "./dist/index.js"
21
- }
22
- },
23
- bin: {
24
- "lore-gateway": "./dist/index.js"
25
- },
26
- scripts: {
27
- typecheck: "tsc --noEmit",
28
- build: "bun run script/build.ts",
29
- start: "bun run src/index.ts"
30
- },
31
- dependencies: {
32
- "@loreai/core": "workspace:*"
33
- },
34
- files: [
35
- "src/",
36
- "dist/",
37
- "README.md",
38
- "LICENSE"
39
- ],
40
- engines: {
41
- bun: ">=1.2.0"
42
- },
43
- repository: {
44
- type: "git",
45
- url: "git+https://github.com/BYK/loreai.git",
46
- directory: "packages/gateway"
47
- },
48
- publishConfig: {
49
- access: "public"
50
- },
51
- keywords: [
52
- "lore",
53
- "gateway",
54
- "proxy",
55
- "llm",
56
- "context-management",
57
- "anthropic",
58
- "openai"
59
- ],
60
- author: "BYK"
61
- };
62
- }
63
- });
64
-
65
- // src/config.ts
66
- function loadConfig() {
67
- const env = process.env;
68
- return {
69
- port: parsePort(env.LORE_LISTEN_PORT, 6969),
70
- host: env.LORE_LISTEN_HOST || "127.0.0.1",
71
- upstreamAnthropic: trimTrailingSlash(
72
- env.LORE_UPSTREAM_ANTHROPIC || "https://api.anthropic.com"
73
- ),
74
- upstreamOpenAI: trimTrailingSlash(
75
- env.LORE_UPSTREAM_OPENAI || "https://api.openai.com"
76
- ),
77
- idleTimeoutSeconds: parsePositiveInt(env.LORE_IDLE_TIMEOUT, 60),
78
- debug: isTruthy(env.LORE_DEBUG)
79
- };
80
- }
81
- var UPSTREAM_ROUTES = [
82
- // Anthropic
83
- { prefix: "claude-", url: "https://api.anthropic.com", protocol: "anthropic" },
84
- // Nvidia NIM
85
- { prefix: "nvidia/", url: "https://integrate.api.nvidia.com", protocol: "openai" },
86
- { prefix: "meta/", url: "https://integrate.api.nvidia.com", protocol: "openai" },
87
- { prefix: "mistralai/", url: "https://integrate.api.nvidia.com", protocol: "openai" },
88
- { prefix: "google/", url: "https://integrate.api.nvidia.com", protocol: "openai" },
89
- { prefix: "qwen/", url: "https://integrate.api.nvidia.com", protocol: "openai" },
90
- { prefix: "deepseek/", url: "https://integrate.api.nvidia.com", protocol: "openai" },
91
- // OpenAI
92
- { prefix: "gpt-", url: "https://api.openai.com", protocol: "openai" },
93
- { prefix: "o1-", url: "https://api.openai.com", protocol: "openai" },
94
- { prefix: "o3-", url: "https://api.openai.com", protocol: "openai" },
95
- { prefix: "o4-", url: "https://api.openai.com", protocol: "openai" },
96
- // xAI
97
- { prefix: "grok-", url: "https://api.x.ai", protocol: "openai" },
98
- // Mistral (direct)
99
- { prefix: "mistral-", url: "https://api.mistral.ai", protocol: "openai" },
100
- { prefix: "codestral-", url: "https://api.mistral.ai", protocol: "openai" },
101
- // Google (direct)
102
- { prefix: "gemini-", url: "https://generativelanguage.googleapis.com", protocol: "openai" }
103
- ];
104
- function resolveUpstreamRoute(model) {
105
- for (const route of UPSTREAM_ROUTES) {
106
- if (model.startsWith(route.prefix)) {
107
- return { url: route.url, protocol: route.protocol };
108
- }
109
- }
110
- return null;
111
- }
112
- var PROJECT_PATH_PATTERNS = [
113
- // "cwd": "/home/…/project" or "cwd":"/Users/…/project" (JSON-style)
114
- /["']?cwd["']?\s*[:=]\s*["']?(\/(?:home|Users)\/[^\s"',}]+)/,
115
- // Working directory: /home/user/project
116
- /[Ww]orking\s+directory[:=]\s*(\/(?:home|Users)\/[^\s"',]+)/,
117
- // CLAUDE.md / AGENTS.md / .lore.md file path → take the directory
118
- /(\/(?:home|Users)\/[^\s"',]+)\/(?:CLAUDE|AGENTS|\.lore)\.md/,
119
- // Generic absolute path starting with /home/ or /Users/ — first occurrence
120
- // Captures until whitespace, quote, comma, or bracket.
121
- /(\/(?:home|Users)\/[\w./-]+)/
122
- ];
123
- function inferProjectPath(systemPrompt) {
124
- for (const pattern of PROJECT_PATH_PATTERNS) {
125
- const match = pattern.exec(systemPrompt);
126
- if (match?.[1]) {
127
- return match[1].replace(/\/+$/, "") || null;
128
- }
129
- }
130
- return null;
131
- }
132
- function getProjectPath(systemPrompt, headers) {
133
- const headerPath = headers["x-lore-project"];
134
- if (headerPath) return headerPath;
135
- const inferred = inferProjectPath(systemPrompt);
136
- if (inferred) return inferred;
137
- return process.cwd();
138
- }
139
- function parsePort(value, fallback) {
140
- if (!value) return fallback;
141
- const n = Number.parseInt(value, 10);
142
- if (Number.isNaN(n) || n < 0 || n > 65535) return fallback;
143
- return n;
144
- }
145
- function parsePositiveInt(value, fallback) {
146
- if (!value) return fallback;
147
- const n = Number.parseInt(value, 10);
148
- if (Number.isNaN(n) || n <= 0) return fallback;
149
- return n;
150
- }
151
- function isTruthy(value) {
152
- return value === "1" || value?.toLowerCase() === "true";
153
- }
154
- function trimTrailingSlash(url) {
155
- return url.replace(/\/+$/, "");
156
- }
157
-
158
- // src/auth.ts
159
- function extractAuth(headers) {
160
- const apiKey = headers["x-api-key"] || headers["X-Api-Key"];
161
- if (apiKey) return { scheme: "api-key", value: apiKey };
162
- const authHeader = headers["authorization"] || headers["Authorization"];
163
- if (authHeader) {
164
- const match = /^Bearer\s+(\S+)$/i.exec(authHeader);
165
- if (match) return { scheme: "bearer", value: match[1] };
166
- }
167
- return null;
168
- }
169
- function authHeaders(cred) {
170
- switch (cred.scheme) {
171
- case "api-key":
172
- return { "x-api-key": cred.value };
173
- case "bearer":
174
- return { Authorization: `Bearer ${cred.value}` };
175
- }
176
- }
177
- function authFingerprint(cred) {
178
- return cred.value.slice(-8);
179
- }
180
- var sessionAuth = /* @__PURE__ */ new Map();
181
- function setSessionAuth(sessionID, cred) {
182
- sessionAuth.set(sessionID, cred);
183
- }
184
- function getSessionAuth(sessionID) {
185
- return sessionAuth.get(sessionID) ?? null;
186
- }
187
- var lastSeenAuth = null;
188
- function setLastSeenAuth(cred) {
189
- lastSeenAuth = cred;
190
- }
191
- function getLastSeenAuth() {
192
- return lastSeenAuth;
193
- }
194
- function resolveAuth(sessionID) {
195
- if (sessionID) {
196
- const cred = getSessionAuth(sessionID);
197
- if (cred) return cred;
198
- }
199
- return getLastSeenAuth();
200
- }
201
-
202
- // src/translate/anthropic.ts
203
- var ANTHROPIC_VERSION = "2023-06-01";
204
- var KNOWN_BODY_FIELDS = /* @__PURE__ */ new Set([
205
- "model",
206
- "system",
207
- "messages",
208
- "tools",
209
- "max_tokens",
210
- "stream"
211
- ]);
212
- function toGatewayBlock(block) {
213
- switch (block.type) {
214
- case "text":
215
- return { type: "text", text: String(block.text ?? "") };
216
- case "thinking":
217
- return {
218
- type: "thinking",
219
- thinking: String(block.thinking ?? ""),
220
- ...block.signature != null ? { signature: String(block.signature) } : void 0
221
- };
222
- case "tool_use":
223
- return {
224
- type: "tool_use",
225
- id: String(block.id ?? ""),
226
- name: String(block.name ?? ""),
227
- input: block.input
228
- };
229
- case "tool_result": {
230
- let content = "";
231
- if (typeof block.content === "string") {
232
- content = block.content;
233
- } else if (Array.isArray(block.content)) {
234
- content = block.content.filter((b) => b.type === "text").map((b) => String(b.text ?? "")).join("\n");
235
- }
236
- return {
237
- type: "tool_result",
238
- toolUseId: String(block.tool_use_id ?? ""),
239
- content,
240
- ...block.is_error ? { isError: true } : void 0
241
- };
242
- }
243
- default:
244
- return { type: "text", text: JSON.stringify(block) };
245
- }
246
- }
247
- function normalizeContent(content) {
248
- if (typeof content === "string") {
249
- return [{ type: "text", text: content }];
250
- }
251
- if (Array.isArray(content)) {
252
- return content.map(
253
- (block) => toGatewayBlock(block)
254
- );
255
- }
256
- return [];
257
- }
258
- function normalizeSystem(system) {
259
- if (system == null) return "";
260
- if (typeof system === "string") return system;
261
- if (Array.isArray(system)) {
262
- return system.filter((block) => block.type === "text").map((block) => String(block.text ?? "")).join("\n");
263
- }
264
- return String(system);
265
- }
266
- function toAnthropicBlock(block) {
267
- switch (block.type) {
268
- case "text":
269
- return { type: "text", text: block.text };
270
- case "thinking":
271
- return {
272
- type: "thinking",
273
- thinking: block.thinking,
274
- ...block.signature != null ? { signature: block.signature } : void 0
275
- };
276
- case "tool_use":
277
- return {
278
- type: "tool_use",
279
- id: block.id,
280
- name: block.name,
281
- input: block.input
282
- };
283
- case "tool_result": {
284
- const result = {
285
- type: "tool_result",
286
- tool_use_id: block.toolUseId,
287
- content: block.content
288
- };
289
- if (block.isError) result.is_error = true;
290
- return result;
291
- }
292
- }
293
- }
294
- function parseAnthropicRequest(body, headers) {
295
- const raw = body ?? {};
296
- const model = String(raw.model ?? "");
297
- const system = normalizeSystem(raw.system);
298
- const stream = raw.stream === true;
299
- const maxTokens = typeof raw.max_tokens === "number" ? raw.max_tokens : 4096;
300
- const rawMessages = Array.isArray(raw.messages) ? raw.messages : [];
301
- const messages = rawMessages.map(
302
- (msg) => ({
303
- role: msg.role === "assistant" ? "assistant" : "user",
304
- content: normalizeContent(msg.content)
305
- })
306
- );
307
- const rawTools = Array.isArray(raw.tools) ? raw.tools : [];
308
- const tools = rawTools.map(
309
- (t) => ({
310
- name: String(t.name ?? ""),
311
- description: String(t.description ?? ""),
312
- inputSchema: t.input_schema ?? {}
313
- })
314
- );
315
- const metadata = {};
316
- for (const [key, value] of Object.entries(raw)) {
317
- if (!KNOWN_BODY_FIELDS.has(key)) {
318
- metadata[key] = value;
319
- }
320
- }
321
- return {
322
- protocol: "anthropic",
323
- model,
324
- system,
325
- messages,
326
- tools,
327
- stream,
328
- maxTokens,
329
- metadata,
330
- rawHeaders: headers
331
- };
332
- }
333
- function buildAnthropicRequest(req, cache) {
334
- const headers = {
335
- "content-type": "application/json",
336
- "anthropic-version": ANTHROPIC_VERSION
337
- };
338
- const cred = extractAuth(req.rawHeaders);
339
- if (cred) {
340
- Object.assign(headers, authHeaders(cred));
341
- }
342
- const beta = req.rawHeaders["anthropic-beta"] || req.rawHeaders["Anthropic-Beta"] || "";
343
- if (beta) {
344
- headers["anthropic-beta"] = beta;
345
- }
346
- const body = {
347
- model: req.model,
348
- max_tokens: req.maxTokens,
349
- stream: req.stream
350
- };
351
- if (req.system) {
352
- const systemTTL = cache?.systemTTL;
353
- if (systemTTL) {
354
- const cacheControl = systemTTL === "1h" ? { type: "ephemeral", ttl: "1h" } : { type: "ephemeral" };
355
- body.system = [
356
- { type: "text", text: req.system, cache_control: cacheControl }
357
- ];
358
- } else {
359
- body.system = req.system;
360
- }
361
- }
362
- const messages = req.messages.map((msg) => ({
363
- role: msg.role,
364
- content: msg.content.map(toAnthropicBlock)
365
- }));
366
- if (cache?.cacheConversation && messages.length > 0) {
367
- const lastMsg = messages[messages.length - 1];
368
- if (lastMsg.content.length > 0) {
369
- const lastBlock = lastMsg.content[lastMsg.content.length - 1];
370
- lastBlock.cache_control = {
371
- type: "ephemeral"
372
- };
373
- }
374
- }
375
- body.messages = messages;
376
- if (req.tools.length > 0) {
377
- body.tools = req.tools.map((t) => ({
378
- name: t.name,
379
- description: t.description,
380
- input_schema: t.inputSchema
381
- }));
382
- }
383
- for (const [key, value] of Object.entries(req.metadata)) {
384
- body[key] = value;
385
- }
386
- return {
387
- url: "/v1/messages",
388
- headers,
389
- body
390
- };
391
- }
392
- function buildAnthropicNonStreamResponse(resp) {
393
- const usage = {
394
- input_tokens: resp.usage.inputTokens,
395
- output_tokens: resp.usage.outputTokens
396
- };
397
- if (resp.usage.cacheReadInputTokens != null) {
398
- usage.cache_read_input_tokens = resp.usage.cacheReadInputTokens;
399
- }
400
- if (resp.usage.cacheCreationInputTokens != null) {
401
- usage.cache_creation_input_tokens = resp.usage.cacheCreationInputTokens;
402
- }
403
- return {
404
- id: resp.id,
405
- type: "message",
406
- role: "assistant",
407
- model: resp.model,
408
- content: resp.content.map(toAnthropicBlock),
409
- stop_reason: resp.stopReason,
410
- stop_sequence: null,
411
- usage
412
- };
413
- }
414
-
415
- // src/translate/openai.ts
416
- function parseOpenAIRequest(body, headers) {
417
- const raw = body ?? {};
418
- const model = String(raw.model ?? "");
419
- const stream = raw.stream === true;
420
- const maxTokens = typeof raw.max_tokens === "number" ? raw.max_tokens : 4096;
421
- const extras = {};
422
- if (typeof raw.temperature === "number") {
423
- extras.temperature = raw.temperature;
424
- }
425
- if (typeof raw.top_p === "number") {
426
- extras.top_p = raw.top_p;
427
- }
428
- if (typeof raw.frequency_penalty === "number") {
429
- extras.frequency_penalty = raw.frequency_penalty;
430
- }
431
- if (typeof raw.presence_penalty === "number") {
432
- extras.presence_penalty = raw.presence_penalty;
433
- }
434
- if (typeof raw.user === "string") {
435
- extras.user = raw.user;
436
- }
437
- if (raw.logprobs === true || raw.logprobs === false) {
438
- extras.logprobs = raw.logprobs;
439
- }
440
- if (typeof raw.top_logprobs === "number") {
441
- extras.top_logprobs = raw.top_logprobs;
442
- }
443
- const rawMessages = Array.isArray(raw.messages) ? raw.messages : [];
444
- let system = "";
445
- const messages = [];
446
- for (const msg of rawMessages) {
447
- const role = msg.role;
448
- const content = msg.content;
449
- if (role === "system") {
450
- const text = typeof content === "string" ? content : "";
451
- if (system) {
452
- system += "\n\n" + text;
453
- } else {
454
- system = text;
455
- }
456
- continue;
457
- }
458
- if (role === "user") {
459
- const blocks = parseUserContent(content, msg.tool_calls);
460
- messages.push({ role: "user", content: blocks });
461
- continue;
462
- }
463
- if (role === "assistant") {
464
- const blocks = parseAssistantContent(
465
- content,
466
- msg.tool_calls
467
- );
468
- messages.push({ role: "assistant", content: blocks });
469
- continue;
470
- }
471
- if (role === "tool") {
472
- const toolResultBlocks = parseToolResult(msg);
473
- if (toolResultBlocks.length > 0) {
474
- messages.push({ role: "user", content: toolResultBlocks });
475
- }
476
- continue;
477
- }
478
- }
479
- const rawTools = Array.isArray(raw.tools) ? raw.tools : [];
480
- const tools = rawTools.map(
481
- (t) => {
482
- const func = t.function;
483
- return {
484
- name: String(func?.name ?? t.name ?? ""),
485
- description: String(func?.description ?? ""),
486
- inputSchema: func?.parameters ?? {}
487
- };
488
- }
489
- );
490
- return {
491
- protocol: "openai",
492
- model,
493
- system,
494
- messages,
495
- tools,
496
- stream,
497
- maxTokens,
498
- metadata: {},
499
- rawHeaders: {
500
- ...headers,
501
- "x-api-key": headers["x-api-key"] ?? ""
502
- },
503
- extras
504
- };
505
- }
506
- function parseUserContent(content, toolCalls) {
507
- const blocks = [];
508
- if (typeof content === "string" && content) {
509
- blocks.push({ type: "text", text: content });
510
- } else if (Array.isArray(content)) {
511
- for (const item of content) {
512
- if (item.type === "text") {
513
- blocks.push({ type: "text", text: String(item.text ?? "") });
514
- } else if (item.type === "tool_use") {
515
- blocks.push({
516
- type: "tool_use",
517
- id: String(item.id ?? ""),
518
- name: String(item.name ?? ""),
519
- input: item.input ?? {}
520
- });
521
- }
522
- }
523
- }
524
- if (toolCalls) {
525
- for (const tc of toolCalls) {
526
- const fn = tc.function;
527
- blocks.push({
528
- type: "tool_use",
529
- id: String(tc.id ?? ""),
530
- name: String(fn?.name ?? ""),
531
- input: fn?.arguments ? JSON.parse(fn.arguments) : {}
532
- });
533
- }
534
- }
535
- return blocks;
536
- }
537
- function parseAssistantContent(content, toolCalls) {
538
- const blocks = [];
539
- if (typeof content === "string" && content) {
540
- blocks.push({ type: "text", text: content });
541
- } else if (Array.isArray(content)) {
542
- for (const item of content) {
543
- if (item.type === "text") {
544
- blocks.push({ type: "text", text: String(item.text ?? "") });
545
- } else if (item.type === "tool_use") {
546
- blocks.push({
547
- type: "tool_use",
548
- id: String(item.id ?? ""),
549
- name: String(item.name ?? ""),
550
- input: item.input ?? {}
551
- });
552
- }
553
- }
554
- }
555
- if (toolCalls) {
556
- for (const tc of toolCalls) {
557
- const fn = tc.function;
558
- blocks.push({
559
- type: "tool_use",
560
- id: String(tc.id ?? ""),
561
- name: String(fn?.name ?? ""),
562
- input: fn?.arguments ? JSON.parse(fn.arguments) : {}
563
- });
564
- }
565
- }
566
- return blocks;
567
- }
568
- function parseToolResult(msg) {
569
- const blocks = [];
570
- const toolCallId = String(msg.tool_call_id ?? "");
571
- const content = msg.content;
572
- if (typeof content === "string" && content) {
573
- blocks.push({
574
- type: "tool_result",
575
- toolUseId: toolCallId,
576
- content
577
- });
578
- } else if (Array.isArray(content)) {
579
- for (const item of content) {
580
- if (item.type === "text") {
581
- blocks.push({
582
- type: "tool_result",
583
- toolUseId: toolCallId,
584
- content: String(item.text ?? "")
585
- });
586
- }
587
- }
588
- }
589
- return blocks;
590
- }
591
- function buildOpenAIResponse(resp, wasStreaming) {
592
- if (wasStreaming) {
593
- return buildOpenAIStreamResponse(resp);
594
- }
595
- return buildOpenAINonStreamResponse(resp);
596
- }
597
- function buildOpenAINonStreamResponse(resp) {
598
- const chunks = [];
599
- let content = "";
600
- const toolCalls = [];
601
- for (const block of resp.content) {
602
- if (block.type === "text") {
603
- content += block.text;
604
- } else if (block.type === "tool_use") {
605
- toolCalls.push({
606
- id: block.id,
607
- type: "function",
608
- function: {
609
- name: block.name,
610
- arguments: JSON.stringify(block.input)
611
- }
612
- });
613
- }
614
- }
615
- const message = {
616
- role: "assistant",
617
- content: content || null
618
- };
619
- if (toolCalls.length > 0) {
620
- message.tool_calls = toolCalls;
621
- }
622
- const response = {
623
- id: resp.id.startsWith("chatcmpl-") ? resp.id : `chatcmpl-${resp.id}`,
624
- object: "chat.completion",
625
- created: Math.floor(Date.now() / 1e3),
626
- model: resp.model,
627
- choices: [
628
- {
629
- index: 0,
630
- message,
631
- finish_reason: mapStopReason(resp.stopReason),
632
- logprobs: null
633
- }
634
- ],
635
- usage: {
636
- prompt_tokens: resp.usage.inputTokens,
637
- completion_tokens: resp.usage.outputTokens,
638
- total_tokens: resp.usage.inputTokens + resp.usage.outputTokens
639
- }
640
- };
641
- return new Response(JSON.stringify(response), {
642
- status: 200,
643
- headers: { "content-type": "application/json" }
644
- });
645
- }
646
- function mapStopReason(reason) {
647
- switch (reason) {
648
- case "end_turn":
649
- case "stop":
650
- case "stop_sequence":
651
- return "stop";
652
- case "max_tokens":
653
- case "length":
654
- return "length";
655
- case "tool_use":
656
- return "tool_calls";
657
- default:
658
- return "stop";
659
- }
660
- }
661
- function buildOpenAIStreamResponse(resp) {
662
- const encoder = new TextEncoder();
663
- let offset = 0;
664
- const stream = new ReadableStream({
665
- start(controller) {
666
- const baseId = resp.id.startsWith("chatcmpl-") ? resp.id : `chatcmpl-${resp.id}`;
667
- const created = Math.floor(Date.now() / 1e3);
668
- function emitChunk(delta, finishReason) {
669
- const chunk = {
670
- id: baseId,
671
- object: "chat.completion.chunk",
672
- created,
673
- model: resp.model,
674
- choices: [
675
- {
676
- index: 0,
677
- delta,
678
- finish_reason: finishReason
679
- }
680
- ]
681
- };
682
- controller.enqueue(
683
- encoder.encode(`data: ${JSON.stringify(chunk)}
684
-
685
- `)
686
- );
687
- }
688
- emitChunk({ role: "assistant" }, null);
689
- for (const block of resp.content) {
690
- if (block.type === "text") {
691
- const text = block.text;
692
- let pos = 0;
693
- while (pos < text.length) {
694
- const chunk = text.slice(pos, pos + 10);
695
- emitChunk({ content: chunk }, null);
696
- pos += 10;
697
- }
698
- } else if (block.type === "tool_use") {
699
- emitChunk(
700
- {
701
- tool_calls: [
702
- {
703
- index: offset,
704
- id: block.id,
705
- type: "function",
706
- function: {
707
- name: block.name,
708
- arguments: JSON.stringify(block.input)
709
- }
710
- }
711
- ]
712
- },
713
- null
714
- );
715
- offset++;
716
- }
717
- }
718
- emitChunk({}, mapStopReason(resp.stopReason));
719
- controller.enqueue(encoder.encode("data: [DONE]\n\n"));
720
- controller.close();
721
- }
722
- });
723
- return new Response(stream, {
724
- status: 200,
725
- headers: {
726
- "content-type": "text/event-stream",
727
- "cache-control": "no-cache",
728
- connection: "keep-alive"
729
- }
730
- });
731
- }
732
- function buildOpenAIUpstreamRequest(req, upstreamBase) {
733
- const headers = {
734
- "content-type": "application/json"
735
- };
736
- const cred = extractAuth(req.rawHeaders);
737
- if (cred) {
738
- headers["Authorization"] = `Bearer ${cred.value}`;
739
- }
740
- const body = {
741
- model: req.model,
742
- messages: buildOpenAIMessages(req.messages, req.system),
743
- stream: req.stream
744
- };
745
- if (req.maxTokens) {
746
- body.max_tokens = req.maxTokens;
747
- }
748
- if (req.tools.length > 0) {
749
- body.tools = req.tools.map((t) => ({
750
- type: "function",
751
- function: {
752
- name: t.name,
753
- description: t.description,
754
- parameters: t.inputSchema
755
- }
756
- }));
757
- }
758
- if (req.extras) {
759
- if (req.extras.temperature !== void 0) {
760
- body.temperature = req.extras.temperature;
761
- }
762
- if (req.extras.top_p !== void 0) {
763
- body.top_p = req.extras.top_p;
764
- }
765
- if (req.extras.frequency_penalty !== void 0) {
766
- body.frequency_penalty = req.extras.frequency_penalty;
767
- }
768
- if (req.extras.presence_penalty !== void 0) {
769
- body.presence_penalty = req.extras.presence_penalty;
770
- }
771
- if (req.extras.user !== void 0) {
772
- body.user = req.extras.user;
773
- }
774
- if (req.extras.logprobs !== void 0) {
775
- body.logprobs = req.extras.logprobs;
776
- }
777
- if (req.extras.top_logprobs !== void 0) {
778
- body.top_logprobs = req.extras.top_logprobs;
779
- }
780
- }
781
- return {
782
- url: `${upstreamBase}/v1/chat/completions`,
783
- headers,
784
- body
785
- };
786
- }
787
- function buildOpenAIMessages(messages, system) {
788
- const result = [];
789
- if (system) {
790
- result.push({ role: "system", content: system });
791
- }
792
- for (const msg of messages) {
793
- const blocks = msg.content;
794
- const role = msg.role;
795
- const textParts = [];
796
- const toolUses = [];
797
- for (const block of blocks) {
798
- if (block.type === "text") {
799
- textParts.push(block.text);
800
- } else if (block.type === "tool_use") {
801
- toolUses.push({
802
- id: block.id,
803
- type: "function",
804
- function: {
805
- name: block.name,
806
- arguments: JSON.stringify(block.input)
807
- }
808
- });
809
- } else if (block.type === "tool_result") {
810
- }
811
- }
812
- const msgRecord = { role };
813
- if (textParts.length > 0) {
814
- msgRecord.content = textParts.join("");
815
- }
816
- if (toolUses.length > 0) {
817
- msgRecord.tool_calls = toolUses;
818
- }
819
- result.push(msgRecord);
820
- }
821
- return result;
822
- }
823
-
824
- // src/stream/anthropic.ts
825
- function formatSSEEvent(eventType, data) {
826
- return `event: ${eventType}
827
- data: ${data}
828
-
829
- `;
830
- }
831
- async function* parseSSEStream(reader) {
832
- const decoder = new TextDecoder();
833
- let buffer = "";
834
- for (; ; ) {
835
- const { done, value } = await reader.read();
836
- if (value) {
837
- buffer += decoder.decode(value, { stream: true });
838
- }
839
- let boundary;
840
- while ((boundary = buffer.indexOf("\n\n")) !== -1) {
841
- const block = buffer.slice(0, boundary);
842
- buffer = buffer.slice(boundary + 2);
843
- if (block.trim() === "") continue;
844
- let eventType = "message";
845
- const dataLines = [];
846
- for (const line of block.split("\n")) {
847
- if (line.startsWith("event:")) {
848
- eventType = line.slice(6).trim();
849
- } else if (line.startsWith("data:")) {
850
- dataLines.push(line.slice(5).trimStart());
851
- }
852
- }
853
- if (dataLines.length > 0) {
854
- yield { event: eventType, data: dataLines.join("\n") };
855
- }
856
- }
857
- if (done) {
858
- if (buffer.trim()) {
859
- let eventType = "message";
860
- const dataLines = [];
861
- for (const line of buffer.split("\n")) {
862
- if (line.startsWith("event:")) {
863
- eventType = line.slice(6).trim();
864
- } else if (line.startsWith("data:")) {
865
- dataLines.push(line.slice(5).trimStart());
866
- }
867
- }
868
- if (dataLines.length > 0) {
869
- yield { event: eventType, data: dataLines.join("\n") };
870
- }
871
- }
872
- break;
873
- }
874
- }
875
- }
876
- function createStreamAccumulator() {
877
- let id = "";
878
- let model = "";
879
- let stopReason = "";
880
- let done = false;
881
- const usage = {
882
- inputTokens: 0,
883
- outputTokens: 0
884
- };
885
- const blocks = /* @__PURE__ */ new Map();
886
- const content = [];
887
- const finalized = /* @__PURE__ */ new Set();
888
- function processEvent(eventType, data) {
889
- const forwarded = formatSSEEvent(eventType, data);
890
- let parsed;
891
- try {
892
- parsed = JSON.parse(data);
893
- } catch {
894
- return forwarded;
895
- }
896
- switch (eventType) {
897
- case "message_start":
898
- handleMessageStart(parsed);
899
- break;
900
- case "content_block_start":
901
- handleContentBlockStart(parsed);
902
- break;
903
- case "content_block_delta":
904
- handleContentBlockDelta(parsed);
905
- break;
906
- case "content_block_stop":
907
- handleContentBlockStop(parsed);
908
- break;
909
- case "message_delta":
910
- handleMessageDelta(parsed);
911
- break;
912
- case "message_stop":
913
- done = true;
914
- break;
915
- }
916
- return forwarded;
917
- }
918
- function handleMessageStart(parsed) {
919
- const message = parsed.message;
920
- if (!message) return;
921
- if (typeof message.id === "string") id = message.id;
922
- if (typeof message.model === "string") model = message.model;
923
- const msgUsage = message.usage;
924
- if (msgUsage) {
925
- if (typeof msgUsage.input_tokens === "number") {
926
- usage.inputTokens = msgUsage.input_tokens;
927
- }
928
- if (typeof msgUsage.output_tokens === "number") {
929
- usage.outputTokens = msgUsage.output_tokens;
930
- }
931
- if (typeof msgUsage.cache_read_input_tokens === "number") {
932
- usage.cacheReadInputTokens = msgUsage.cache_read_input_tokens;
933
- }
934
- if (typeof msgUsage.cache_creation_input_tokens === "number") {
935
- usage.cacheCreationInputTokens = msgUsage.cache_creation_input_tokens;
936
- }
937
- }
938
- }
939
- function handleContentBlockStart(parsed) {
940
- const index = parsed.index;
941
- if (typeof index !== "number") return;
942
- const block = parsed.content_block;
943
- if (!block || typeof block.type !== "string") return;
944
- switch (block.type) {
945
- case "text":
946
- blocks.set(index, {
947
- type: "text",
948
- text: typeof block.text === "string" ? block.text : ""
949
- });
950
- break;
951
- case "thinking":
952
- blocks.set(index, {
953
- type: "thinking",
954
- thinking: typeof block.thinking === "string" ? block.thinking : "",
955
- signature: ""
956
- });
957
- break;
958
- case "tool_use":
959
- blocks.set(index, {
960
- type: "tool_use",
961
- id: typeof block.id === "string" ? block.id : "",
962
- name: typeof block.name === "string" ? block.name : "",
963
- partialJson: ""
964
- });
965
- break;
966
- }
967
- }
968
- function handleContentBlockDelta(parsed) {
969
- const index = parsed.index;
970
- if (typeof index !== "number") return;
971
- const delta = parsed.delta;
972
- if (!delta || typeof delta.type !== "string") return;
973
- const block = blocks.get(index);
974
- if (!block) return;
975
- switch (delta.type) {
976
- case "text_delta":
977
- if (block.type === "text" && typeof delta.text === "string") {
978
- block.text += delta.text;
979
- }
980
- break;
981
- case "thinking_delta":
982
- if (block.type === "thinking" && typeof delta.thinking === "string") {
983
- block.thinking += delta.thinking;
984
- }
985
- break;
986
- case "signature_delta":
987
- if (block.type === "thinking" && typeof delta.signature === "string") {
988
- block.signature += delta.signature;
989
- }
990
- break;
991
- case "input_json_delta":
992
- if (block.type === "tool_use" && typeof delta.partial_json === "string") {
993
- block.partialJson += delta.partial_json;
994
- }
995
- break;
996
- }
997
- }
998
- function handleContentBlockStop(parsed) {
999
- const index = parsed.index;
1000
- if (typeof index !== "number") return;
1001
- const block = blocks.get(index);
1002
- if (!block || finalized.has(index)) return;
1003
- finalized.add(index);
1004
- switch (block.type) {
1005
- case "text":
1006
- content.push({ type: "text", text: block.text });
1007
- break;
1008
- case "thinking": {
1009
- const thinkingBlock = {
1010
- type: "thinking",
1011
- thinking: block.thinking
1012
- };
1013
- if (block.signature) {
1014
- thinkingBlock.signature = block.signature;
1015
- }
1016
- content.push(thinkingBlock);
1017
- break;
1018
- }
1019
- case "tool_use": {
1020
- let input = {};
1021
- if (block.partialJson) {
1022
- try {
1023
- input = JSON.parse(block.partialJson);
1024
- } catch {
1025
- input = block.partialJson;
1026
- }
1027
- }
1028
- content.push({
1029
- type: "tool_use",
1030
- id: block.id,
1031
- name: block.name,
1032
- input
1033
- });
1034
- break;
1035
- }
1036
- }
1037
- }
1038
- function handleMessageDelta(parsed) {
1039
- const delta = parsed.delta;
1040
- if (delta && typeof delta.stop_reason === "string") {
1041
- stopReason = delta.stop_reason;
1042
- }
1043
- const deltaUsage = parsed.usage;
1044
- if (deltaUsage) {
1045
- if (typeof deltaUsage.output_tokens === "number") {
1046
- usage.outputTokens = deltaUsage.output_tokens;
1047
- }
1048
- }
1049
- }
1050
- function getResponse() {
1051
- for (const [index, block] of blocks) {
1052
- if (!finalized.has(index)) {
1053
- finalized.add(index);
1054
- switch (block.type) {
1055
- case "text":
1056
- content.push({ type: "text", text: block.text });
1057
- break;
1058
- case "thinking":
1059
- content.push({
1060
- type: "thinking",
1061
- thinking: block.thinking,
1062
- ...block.signature ? { signature: block.signature } : {}
1063
- });
1064
- break;
1065
- case "tool_use": {
1066
- let input = {};
1067
- if (block.partialJson) {
1068
- try {
1069
- input = JSON.parse(block.partialJson);
1070
- } catch {
1071
- input = block.partialJson;
1072
- }
1073
- }
1074
- content.push({
1075
- type: "tool_use",
1076
- id: block.id,
1077
- name: block.name,
1078
- input
1079
- });
1080
- break;
1081
- }
1082
- }
1083
- }
1084
- }
1085
- return {
1086
- id,
1087
- model,
1088
- content,
1089
- stopReason,
1090
- usage: { ...usage }
1091
- };
1092
- }
1093
- return {
1094
- processEvent,
1095
- getResponse,
1096
- isDone: () => done
1097
- };
1098
- }
1099
- function buildSSETextResponse(id, model, text, usage) {
1100
- const events = [];
1101
- events.push(
1102
- formatSSEEvent(
1103
- "message_start",
1104
- JSON.stringify({
1105
- type: "message_start",
1106
- message: {
1107
- id,
1108
- type: "message",
1109
- role: "assistant",
1110
- content: [],
1111
- model,
1112
- stop_reason: null,
1113
- stop_sequence: null,
1114
- usage: {
1115
- input_tokens: usage.inputTokens,
1116
- output_tokens: 1
1117
- }
1118
- }
1119
- })
1120
- )
1121
- );
1122
- events.push(
1123
- formatSSEEvent(
1124
- "content_block_start",
1125
- JSON.stringify({
1126
- type: "content_block_start",
1127
- index: 0,
1128
- content_block: { type: "text", text: "" }
1129
- })
1130
- )
1131
- );
1132
- events.push(
1133
- formatSSEEvent(
1134
- "content_block_delta",
1135
- JSON.stringify({
1136
- type: "content_block_delta",
1137
- index: 0,
1138
- delta: { type: "text_delta", text }
1139
- })
1140
- )
1141
- );
1142
- events.push(
1143
- formatSSEEvent(
1144
- "content_block_stop",
1145
- JSON.stringify({
1146
- type: "content_block_stop",
1147
- index: 0
1148
- })
1149
- )
1150
- );
1151
- events.push(
1152
- formatSSEEvent(
1153
- "message_delta",
1154
- JSON.stringify({
1155
- type: "message_delta",
1156
- delta: { stop_reason: "end_turn", stop_sequence: null },
1157
- usage: { output_tokens: usage.outputTokens }
1158
- })
1159
- )
1160
- );
1161
- events.push(
1162
- formatSSEEvent(
1163
- "message_stop",
1164
- JSON.stringify({ type: "message_stop" })
1165
- )
1166
- );
1167
- return events.join("");
1168
- }
1169
- function createRecallAwareAccumulator(recallToolName = "recall") {
1170
- const inner = createStreamAccumulator();
1171
- const suppressedIndices = /* @__PURE__ */ new Set();
1172
- const otherToolIndices = /* @__PURE__ */ new Set();
1173
- let suppressedCount = 0;
1174
- let firstSuppressedIndex = -1;
1175
- let clientBlocks = 0;
1176
- let heldBack = "";
1177
- let recallDetected = false;
1178
- function processEvent(eventType, data) {
1179
- inner.processEvent(eventType, data);
1180
- let parsed;
1181
- try {
1182
- parsed = JSON.parse(data);
1183
- } catch {
1184
- return formatSSEEvent(eventType, data);
1185
- }
1186
- switch (eventType) {
1187
- case "content_block_start": {
1188
- const index = parsed.index;
1189
- if (typeof index !== "number") break;
1190
- const block = parsed.content_block;
1191
- if (block?.type === "tool_use" && block.name === recallToolName) {
1192
- suppressedIndices.add(index);
1193
- suppressedCount++;
1194
- recallDetected = true;
1195
- if (firstSuppressedIndex < 0) firstSuppressedIndex = index;
1196
- return "";
1197
- }
1198
- if (block?.type === "tool_use") {
1199
- otherToolIndices.add(index);
1200
- }
1201
- clientBlocks++;
1202
- if (suppressedCount > 0) {
1203
- const adjusted = { ...parsed, index: index - suppressedCount };
1204
- return formatSSEEvent(eventType, JSON.stringify(adjusted));
1205
- }
1206
- break;
1207
- }
1208
- case "content_block_delta":
1209
- case "content_block_stop": {
1210
- const index = parsed.index;
1211
- if (typeof index === "number" && suppressedIndices.has(index)) {
1212
- return "";
1213
- }
1214
- if (suppressedCount > 0 && typeof parsed.index === "number") {
1215
- const adjusted = {
1216
- ...parsed,
1217
- index: parsed.index - suppressedCount
1218
- };
1219
- return formatSSEEvent(eventType, JSON.stringify(adjusted));
1220
- }
1221
- break;
1222
- }
1223
- case "message_delta":
1224
- case "message_stop": {
1225
- if (recallDetected) {
1226
- heldBack += formatSSEEvent(eventType, data);
1227
- return "";
1228
- }
1229
- break;
1230
- }
1231
- }
1232
- return formatSSEEvent(eventType, data);
1233
- }
1234
- return {
1235
- processEvent,
1236
- getResponse: () => inner.getResponse(),
1237
- isDone: () => inner.isDone(),
1238
- hasRecall: () => recallDetected,
1239
- hasOtherTools: () => otherToolIndices.size > 0,
1240
- recallBlockIndex: () => firstSuppressedIndex,
1241
- clientBlockCount: () => clientBlocks,
1242
- heldBackEvents: () => heldBack
1243
- };
1244
- }
1245
- async function accumulateSSEResponse(response) {
1246
- const accumulator = createStreamAccumulator();
1247
- const text = await response.text();
1248
- for (const block of text.split("\n\n")) {
1249
- if (!block.trim()) continue;
1250
- let eventType = "message";
1251
- const dataLines = [];
1252
- for (const line of block.split("\n")) {
1253
- if (line.startsWith("event:")) {
1254
- eventType = line.slice(6).trim();
1255
- } else if (line.startsWith("data:")) {
1256
- dataLines.push(line.slice(5).trimStart());
1257
- }
1258
- }
1259
- if (dataLines.length > 0) {
1260
- accumulator.processEvent(eventType, dataLines.join("\n"));
1261
- }
1262
- }
1263
- return accumulator.getResponse();
1264
- }
1265
-
1266
- // src/pipeline.ts
1267
- import {
1268
- load,
1269
- config as loreConfig4,
1270
- ensureProject,
1271
- isFirstRun,
1272
- temporal as temporal3,
1273
- ltm as ltm2,
1274
- distillation as distillation2,
1275
- curator as curator2,
1276
- log as log6,
1277
- transform,
1278
- setModelLimits,
1279
- setLtmTokens,
1280
- getLtmBudget,
1281
- setMaxLayer0Tokens,
1282
- computeLayer0Cap,
1283
- calibrate,
1284
- getLastTransformedCount,
1285
- onIdleResume,
1286
- consumeCameOutOfIdle,
1287
- needsUrgentDistillation,
1288
- formatKnowledge,
1289
- buildCompactPrompt
1290
- } from "@loreai/core";
1291
-
1292
- // src/session.ts
1293
- var BASE62_ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
1294
- var BASE = 62n;
1295
- function base62Encode(bytes, minLength = 0) {
1296
- let n = 0n;
1297
- for (const b of bytes) {
1298
- n = n << 8n | BigInt(b);
1299
- }
1300
- if (n === 0n) return BASE62_ALPHABET[0].repeat(Math.max(1, minLength));
1301
- const chars = [];
1302
- while (n > 0n) {
1303
- chars.push(BASE62_ALPHABET[Number(n % BASE)]);
1304
- n /= BASE;
1305
- }
1306
- chars.reverse();
1307
- while (chars.length < minLength) {
1308
- chars.unshift(BASE62_ALPHABET[0]);
1309
- }
1310
- return chars.join("");
1311
- }
1312
- var SESSION_ID_MIN_LENGTH = 17;
1313
- function generateSessionID() {
1314
- const buf = new Uint8Array(12);
1315
- crypto.getRandomValues(buf.subarray(0, 8));
1316
- const ts = Math.floor(Date.now() / 1e3);
1317
- const view = new DataView(buf.buffer, buf.byteOffset, buf.byteLength);
1318
- view.setUint32(8, ts >>> 0, false);
1319
- return base62Encode(buf, SESSION_ID_MIN_LENGTH);
1320
- }
1321
- function extractTextParts(content) {
1322
- if (typeof content === "string") return [content];
1323
- if (Array.isArray(content)) {
1324
- const texts = [];
1325
- for (const block of content) {
1326
- if (block && typeof block === "object" && "type" in block && block.type === "text" && "text" in block && typeof block.text === "string") {
1327
- texts.push(block.text);
1328
- }
1329
- }
1330
- return texts;
1331
- }
1332
- return [];
1333
- }
1334
- async function fingerprintMessages(messages, extras) {
1335
- let firstUserContent = "";
1336
- for (const msg of messages) {
1337
- if (msg.role === "user") {
1338
- const texts = extractTextParts(msg.content);
1339
- firstUserContent = texts.join("");
1340
- break;
1341
- }
1342
- }
1343
- const material = firstUserContent + (extras?.model ?? "") + (extras?.authSuffix ?? "");
1344
- const encoded = new TextEncoder().encode(material);
1345
- const hash = await crypto.subtle.digest("SHA-256", encoded);
1346
- const bytes = new Uint8Array(hash);
1347
- let hex = "";
1348
- for (let i = 0; i < 8; i++) {
1349
- hex += bytes[i].toString(16).padStart(2, "0");
1350
- }
1351
- return hex;
1352
- }
1353
- var MESSAGE_COUNT_PROXIMITY_THRESHOLD = 20;
1354
-
1355
- // src/compaction.ts
1356
- var COMPACTION_SYSTEM_PATTERNS = [
1357
- "anchored context summarization assistant"
1358
- ];
1359
- var COMPACTION_USER_PATTERNS = [
1360
- "anchored summary from the conversation history above",
1361
- "Update the anchored summary below",
1362
- "<previous-summary>"
1363
- ];
1364
- var COMPACTION_TEMPLATE_SECTIONS = [
1365
- "## Goal",
1366
- "## Progress",
1367
- "## Key Decisions",
1368
- "## Next Steps",
1369
- "## Critical Context",
1370
- "## Relevant Files"
1371
- ];
1372
- var MIN_TEMPLATE_SECTION_MATCHES = 4;
1373
- function lastUserText(req) {
1374
- for (let i = req.messages.length - 1; i >= 0; i--) {
1375
- const msg = req.messages[i];
1376
- if (msg.role === "user") {
1377
- return msg.content.filter((b) => b.type === "text").map((b) => b.text).join("\n");
1378
- }
1379
- }
1380
- return "";
1381
- }
1382
- function estimateTokens(text) {
1383
- return Math.ceil(text.length / 4);
1384
- }
1385
- function isCompactionRequest(req) {
1386
- const systemLower = req.system.toLowerCase();
1387
- for (const pattern of COMPACTION_SYSTEM_PATTERNS) {
1388
- if (systemLower.includes(pattern.toLowerCase())) return true;
1389
- }
1390
- const userText = lastUserText(req);
1391
- if (req.tools.length === 0 && userText) {
1392
- for (const pattern of COMPACTION_USER_PATTERNS) {
1393
- if (userText.includes(pattern)) return true;
1394
- }
1395
- }
1396
- if (userText.includes("<template>")) {
1397
- let matches = 0;
1398
- for (const section of COMPACTION_TEMPLATE_SECTIONS) {
1399
- if (userText.includes(section)) matches++;
1400
- }
1401
- if (matches >= MIN_TEMPLATE_SECTION_MATCHES) return true;
1402
- }
1403
- return false;
1404
- }
1405
- var PREVIOUS_SUMMARY_RE = /<previous-summary>\n(.*?)\n<\/previous-summary>/s;
1406
- function extractPreviousSummary(req) {
1407
- const userText = lastUserText(req);
1408
- const match = PREVIOUS_SUMMARY_RE.exec(userText);
1409
- return match?.[1] ?? void 0;
1410
- }
1411
- var TITLE_SUMMARY_MAX_SYSTEM_LENGTH = 500;
1412
- var TITLE_SUMMARY_MAX_TOOLS = 2;
1413
- var TITLE_SUMMARY_MAX_MESSAGES = 2;
1414
- function isTitleOrSummaryRequest(req) {
1415
- if (isCompactionRequest(req)) return false;
1416
- return req.tools.length <= TITLE_SUMMARY_MAX_TOOLS && req.messages.length <= TITLE_SUMMARY_MAX_MESSAGES && req.system.length < TITLE_SUMMARY_MAX_SYSTEM_LENGTH;
1417
- }
1418
- function buildCompactionResponse(_sessionID, summary, model) {
1419
- return {
1420
- id: `msg_lore_compact_${crypto.randomUUID().slice(0, 8)}`,
1421
- model,
1422
- content: [{ type: "text", text: summary }],
1423
- stopReason: "end_turn",
1424
- usage: {
1425
- inputTokens: 0,
1426
- outputTokens: estimateTokens(summary)
1427
- }
1428
- };
1429
- }
1430
-
1431
- // src/temporal-adapter.ts
1432
- import { createHash, randomUUID } from "crypto";
1433
- import { isToolPart } from "@loreai/core";
1434
- function deterministicID(role, index, content) {
1435
- const h = createHash("sha256");
1436
- h.update(`${role}:${index}:`);
1437
- for (const block of content) {
1438
- switch (block.type) {
1439
- case "text":
1440
- h.update(`text:${block.text}`);
1441
- break;
1442
- case "thinking":
1443
- h.update(`thinking:${block.thinking}`);
1444
- break;
1445
- case "tool_use":
1446
- h.update(`tool_use:${block.id}:${block.name}:${JSON.stringify(block.input)}`);
1447
- break;
1448
- case "tool_result":
1449
- h.update(`tool_result:${block.toolUseId}:${block.content}`);
1450
- break;
1451
- }
1452
- }
1453
- return h.digest("hex").slice(0, 32);
1454
- }
1455
- function deterministicPartID(messageID, partIndex) {
1456
- const h = createHash("sha256");
1457
- h.update(`${messageID}:part:${partIndex}`);
1458
- return h.digest("hex").slice(0, 32);
1459
- }
1460
- function contentBlockToPart(block, sessionID, messageID, partIndex) {
1461
- const now = Date.now();
1462
- const id = deterministicPartID(messageID, partIndex);
1463
- switch (block.type) {
1464
- case "text":
1465
- return {
1466
- id,
1467
- sessionID,
1468
- messageID,
1469
- type: "text",
1470
- text: block.text,
1471
- time: { start: now, end: now }
1472
- };
1473
- case "thinking":
1474
- return {
1475
- id,
1476
- sessionID,
1477
- messageID,
1478
- type: "reasoning",
1479
- text: block.thinking
1480
- };
1481
- case "tool_use":
1482
- return {
1483
- id,
1484
- sessionID,
1485
- messageID,
1486
- type: "tool",
1487
- tool: block.name,
1488
- callID: block.id,
1489
- state: { status: "pending", input: block.input }
1490
- };
1491
- case "tool_result":
1492
- return {
1493
- id,
1494
- sessionID,
1495
- messageID,
1496
- type: "tool",
1497
- tool: "result",
1498
- callID: block.toolUseId,
1499
- state: {
1500
- status: "completed",
1501
- input: null,
1502
- output: block.content,
1503
- time: { start: now, end: now }
1504
- }
1505
- };
1506
- }
1507
- }
1508
- function gatewayMessagesToLore(messages, sessionID) {
1509
- const out = [];
1510
- const now = Date.now();
1511
- for (let i = 0; i < messages.length; i++) {
1512
- const m = messages[i];
1513
- const id = deterministicID(m.role, i, m.content);
1514
- const parts = m.content.map(
1515
- (block, pi) => contentBlockToPart(block, sessionID, id, pi)
1516
- );
1517
- if (m.role === "user") {
1518
- const info = {
1519
- id,
1520
- sessionID,
1521
- role: "user",
1522
- time: { created: now },
1523
- agent: "gateway",
1524
- model: { providerID: "anthropic", modelID: "unknown" }
1525
- };
1526
- out.push({ info, parts });
1527
- } else {
1528
- const info = {
1529
- id,
1530
- sessionID,
1531
- role: "assistant",
1532
- time: { created: now },
1533
- parentID: "",
1534
- modelID: "unknown",
1535
- providerID: "anthropic",
1536
- mode: "gateway",
1537
- path: { cwd: "", root: "" },
1538
- cost: 0,
1539
- tokens: {
1540
- input: 0,
1541
- output: 0,
1542
- reasoning: 0,
1543
- cache: { read: 0, write: 0 }
1544
- }
1545
- };
1546
- out.push({ info, parts });
1547
- }
1548
- }
1549
- return out;
1550
- }
1551
- function updateAssistantMessageTokens(msg, usage, model) {
1552
- const info = msg.info;
1553
- if (info.role !== "assistant") return;
1554
- info.tokens.input = usage.inputTokens;
1555
- info.tokens.output = usage.outputTokens;
1556
- info.tokens.cache.read = usage.cacheReadInputTokens ?? 0;
1557
- info.tokens.cache.write = usage.cacheCreationInputTokens ?? 0;
1558
- info.modelID = model;
1559
- }
1560
- function resolveToolResults(messages) {
1561
- const resultsByCallID = /* @__PURE__ */ new Map();
1562
- for (const msg of messages) {
1563
- for (const part of msg.parts) {
1564
- if (isToolPart(part) && part.tool === "result" && part.state.status === "completed") {
1565
- resultsByCallID.set(part.callID, {
1566
- output: part.state.output,
1567
- isError: false
1568
- });
1569
- }
1570
- }
1571
- }
1572
- const now = Date.now();
1573
- for (const msg of messages) {
1574
- for (const part of msg.parts) {
1575
- if (isToolPart(part) && part.tool !== "result" && part.state.status === "pending") {
1576
- const result = resultsByCallID.get(part.callID);
1577
- if (result) {
1578
- part.state = {
1579
- status: "completed",
1580
- input: part.state.input,
1581
- output: result.output,
1582
- time: { start: now, end: now }
1583
- };
1584
- }
1585
- }
1586
- }
1587
- }
1588
- for (const msg of messages) {
1589
- if (msg.info.role !== "user") continue;
1590
- const before = msg.parts.length;
1591
- msg.parts = msg.parts.filter(
1592
- (p) => !(isToolPart(p) && p.tool === "result")
1593
- );
1594
- if (msg.parts.length === 0 && before > 0) {
1595
- msg.parts = [
1596
- {
1597
- id: randomUUID(),
1598
- sessionID: "",
1599
- messageID: msg.info.id,
1600
- type: "text",
1601
- text: "[tool results provided]",
1602
- time: { start: 0, end: 0 }
1603
- }
1604
- ];
1605
- }
1606
- }
1607
- }
1608
-
1609
- // src/llm-adapter.ts
1610
- import { log } from "@loreai/core";
1611
- var activeWorkerCalls = /* @__PURE__ */ new Set();
1612
- function createGatewayLLMClient(upstreamUrl, getAuth, defaultModel) {
1613
- return {
1614
- async prompt(system, user, opts) {
1615
- const cred = getAuth(opts?.sessionID);
1616
- if (!cred) {
1617
- log.warn("no auth credentials available for worker call");
1618
- return null;
1619
- }
1620
- const model = opts?.model ?? defaultModel;
1621
- const url = `${upstreamUrl.replace(/\/$/, "")}/v1/messages`;
1622
- const callID = `gw-worker-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
1623
- activeWorkerCalls.add(callID);
1624
- try {
1625
- const systemPayload = system ? [
1626
- {
1627
- type: "text",
1628
- text: system,
1629
- cache_control: { type: "ephemeral", ttl: "1h" }
1630
- }
1631
- ] : void 0;
1632
- const response = await fetch(url, {
1633
- method: "POST",
1634
- headers: {
1635
- "Content-Type": "application/json",
1636
- "anthropic-version": "2023-06-01",
1637
- ...authHeaders(cred)
1638
- },
1639
- // opts.thinking is intentionally not forwarded — this bare API
1640
- // call never includes the `thinking` parameter so Anthropic
1641
- // models won't produce thinking tokens regardless.
1642
- body: JSON.stringify({
1643
- model: model.modelID,
1644
- max_tokens: 8192,
1645
- system: systemPayload ?? system,
1646
- messages: [{ role: "user", content: user }]
1647
- })
1648
- });
1649
- if (!response.ok) {
1650
- const text = await response.text().catch(() => "(no body)");
1651
- log.error(
1652
- `worker upstream request failed: ${response.status} ${response.statusText} \u2014 ${text}`
1653
- );
1654
- return null;
1655
- }
1656
- const data = await response.json();
1657
- const textBlock = data.content?.find(
1658
- (b) => b.type === "text" && typeof b.text === "string"
1659
- );
1660
- return textBlock?.text ?? null;
1661
- } catch (e) {
1662
- log.error("worker prompt failed:", e);
1663
- return null;
1664
- } finally {
1665
- activeWorkerCalls.delete(callID);
1666
- }
1667
- }
1668
- };
1669
- }
1670
-
1671
- // src/batch-queue.ts
1672
- import { log as log2 } from "@loreai/core";
1673
- var DEFAULT_FLUSH_INTERVAL_MS = 3e4;
1674
- var DEFAULT_MAX_QUEUE_SIZE = 50;
1675
- var DEFAULT_POLL_INTERVAL_MS = 6e4;
1676
- var DEFAULT_MAX_BATCH_AGE_MS = 36e5;
1677
- var idCounter = 0;
1678
- function generateCustomId() {
1679
- const ts = Date.now().toString(36);
1680
- const seq = (idCounter++).toString(36);
1681
- const rand = Math.random().toString(36).slice(2, 8);
1682
- return `lore-${ts}-${seq}-${rand}`;
1683
- }
1684
- function authKey(cred) {
1685
- return `${cred.scheme}:${cred.value}`;
1686
- }
1687
- function createBatchLLMClient(inner, upstreamUrl, getAuth, defaultModel, batchConfig) {
1688
- const flushIntervalMs = batchConfig?.flushIntervalMs ?? DEFAULT_FLUSH_INTERVAL_MS;
1689
- const maxQueueSize = batchConfig?.maxQueueSize ?? DEFAULT_MAX_QUEUE_SIZE;
1690
- const pollIntervalMs = batchConfig?.pollIntervalMs ?? DEFAULT_POLL_INTERVAL_MS;
1691
- const maxBatchAgeMs = batchConfig?.maxBatchAgeMs ?? DEFAULT_MAX_BATCH_AGE_MS;
1692
- const queue = [];
1693
- const inflight = /* @__PURE__ */ new Map();
1694
- let flushTimer = null;
1695
- let shuttingDown = false;
1696
- let totalQueued = 0;
1697
- let totalBatched = 0;
1698
- let totalUrgent = 0;
1699
- let totalFallback = 0;
1700
- let totalResolved = 0;
1701
- let totalFailed = 0;
1702
- async function submitBatch(auth, items) {
1703
- const requests = items.map((item) => ({
1704
- custom_id: item.customId,
1705
- params: item.params
1706
- }));
1707
- log2.info(`batch flush: submitting ${items.length} requests`);
1708
- try {
1709
- const url = `${upstreamUrl.replace(/\/$/, "")}/v1/messages/batches`;
1710
- const response = await fetch(url, {
1711
- method: "POST",
1712
- headers: {
1713
- "Content-Type": "application/json",
1714
- "anthropic-version": "2023-06-01",
1715
- ...authHeaders(auth)
1716
- },
1717
- body: JSON.stringify({ requests })
1718
- });
1719
- if (!response.ok) {
1720
- const text = await response.text().catch(() => "(no body)");
1721
- log2.error(`batch create failed: ${response.status} ${response.statusText} \u2014 ${text}`);
1722
- await fallbackAll(items);
1723
- return;
1724
- }
1725
- const data = await response.json();
1726
- totalBatched += items.length;
1727
- const requestMap = /* @__PURE__ */ new Map();
1728
- for (const item of items) {
1729
- requestMap.set(item.customId, item);
1730
- }
1731
- const pollTimer = setInterval(
1732
- () => pollBatch(data.id).catch((e) => log2.error("batch poll error:", e)),
1733
- pollIntervalMs
1734
- );
1735
- inflight.set(data.id, {
1736
- batchId: data.id,
1737
- requests: requestMap,
1738
- submittedAt: Date.now(),
1739
- pollTimer,
1740
- auth
1741
- });
1742
- log2.info(`batch created: ${data.id} with ${items.length} requests`);
1743
- } catch (e) {
1744
- log2.error("batch create error:", e);
1745
- await fallbackAll(items);
1746
- }
1747
- }
1748
- async function flush() {
1749
- if (queue.length === 0) return;
1750
- const batch = queue.splice(0);
1751
- const byAuth = /* @__PURE__ */ new Map();
1752
- for (const item of batch) {
1753
- const key = authKey(item.auth);
1754
- let group = byAuth.get(key);
1755
- if (!group) {
1756
- group = { auth: item.auth, items: [] };
1757
- byAuth.set(key, group);
1758
- }
1759
- group.items.push(item);
1760
- }
1761
- for (const { auth, items } of byAuth.values()) {
1762
- await submitBatch(auth, items);
1763
- }
1764
- }
1765
- async function pollBatch(batchId) {
1766
- const batch = inflight.get(batchId);
1767
- if (!batch) return;
1768
- if (Date.now() - batch.submittedAt > maxBatchAgeMs) {
1769
- log2.warn(`batch ${batchId} exceeded max age \u2014 falling back to synchronous`);
1770
- clearInterval(batch.pollTimer);
1771
- inflight.delete(batchId);
1772
- await fallbackAll([...batch.requests.values()]);
1773
- return;
1774
- }
1775
- try {
1776
- const url = `${upstreamUrl.replace(/\/$/, "")}/v1/messages/batches/${batchId}`;
1777
- const response = await fetch(url, {
1778
- headers: {
1779
- "anthropic-version": "2023-06-01",
1780
- ...authHeaders(batch.auth)
1781
- }
1782
- });
1783
- if (!response.ok) {
1784
- log2.error(`batch poll failed for ${batchId}: ${response.status}`);
1785
- return;
1786
- }
1787
- const data = await response.json();
1788
- if (data.processing_status !== "ended") return;
1789
- log2.info(`batch ${batchId} ended \u2014 retrieving results`);
1790
- if (data.results_url) {
1791
- await retrieveResults(batchId, data.results_url);
1792
- } else {
1793
- await retrieveResults(
1794
- batchId,
1795
- `${upstreamUrl.replace(/\/$/, "")}/v1/messages/batches/${batchId}/results`
1796
- );
1797
- }
1798
- } catch (e) {
1799
- log2.error(`batch poll error for ${batchId}:`, e);
1800
- }
1801
- }
1802
- async function retrieveResults(batchId, resultsUrl) {
1803
- const batch = inflight.get(batchId);
1804
- if (!batch) return;
1805
- try {
1806
- const response = await fetch(resultsUrl, {
1807
- headers: {
1808
- "anthropic-version": "2023-06-01",
1809
- ...authHeaders(batch.auth)
1810
- }
1811
- });
1812
- if (!response.ok) {
1813
- log2.error(`batch results fetch failed for ${batchId}: ${response.status}`);
1814
- return;
1815
- }
1816
- const text = await response.text();
1817
- const lines = text.split("\n").filter((l) => l.trim());
1818
- for (const line of lines) {
1819
- try {
1820
- const result = JSON.parse(line);
1821
- const pending = batch.requests.get(result.custom_id);
1822
- if (!pending) continue;
1823
- switch (result.result.type) {
1824
- case "succeeded": {
1825
- const textBlock = result.result.message?.content?.find(
1826
- (b) => b.type === "text" && typeof b.text === "string"
1827
- );
1828
- pending.resolve(textBlock?.text ?? null);
1829
- totalResolved++;
1830
- break;
1831
- }
1832
- case "errored":
1833
- pending.resolve(null);
1834
- totalFailed++;
1835
- log2.error(
1836
- `batch item ${result.custom_id} errored: ${result.result.error?.type ?? "unknown"} \u2014 ${result.result.error?.message ?? JSON.stringify(result.result.error)}`
1837
- );
1838
- break;
1839
- case "canceled":
1840
- case "expired":
1841
- pending.resolve(null);
1842
- totalFailed++;
1843
- log2.warn(`batch item ${result.custom_id} ${result.result.type}`);
1844
- break;
1845
- }
1846
- batch.requests.delete(result.custom_id);
1847
- } catch {
1848
- log2.error(`failed to parse batch result line: ${line.slice(0, 200)}`);
1849
- }
1850
- }
1851
- for (const [, pending] of batch.requests) {
1852
- pending.resolve(null);
1853
- totalFailed++;
1854
- }
1855
- clearInterval(batch.pollTimer);
1856
- inflight.delete(batchId);
1857
- log2.info(
1858
- `batch ${batchId} fully resolved (${totalResolved} ok, ${totalFailed} failed total)`
1859
- );
1860
- } catch (e) {
1861
- log2.error(`batch results retrieval error for ${batchId}:`, e);
1862
- }
1863
- }
1864
- async function fallbackAll(items) {
1865
- totalFallback += items.length;
1866
- log2.info(`batch fallback: processing ${items.length} items synchronously`);
1867
- const CONCURRENCY = 5;
1868
- for (let i = 0; i < items.length; i += CONCURRENCY) {
1869
- const chunk = items.slice(i, i + CONCURRENCY);
1870
- await Promise.all(
1871
- chunk.map(async (item) => {
1872
- try {
1873
- const system = typeof item.params.system === "string" ? item.params.system : item.params.system.map((b) => b.text).join("\n");
1874
- const user = item.params.messages[0]?.content ?? "";
1875
- const result = await inner.prompt(system, user, { urgent: true });
1876
- item.resolve(result);
1877
- } catch (e) {
1878
- log2.error(`batch fallback error for ${item.customId}:`, e);
1879
- item.resolve(null);
1880
- }
1881
- })
1882
- );
1883
- }
1884
- }
1885
- flushTimer = setInterval(() => {
1886
- flush().catch((e) => log2.error("batch flush timer error:", e));
1887
- }, flushIntervalMs);
1888
- return {
1889
- async prompt(system, user, opts) {
1890
- if (opts?.urgent || shuttingDown) {
1891
- totalUrgent++;
1892
- return inner.prompt(system, user, opts);
1893
- }
1894
- const cred = getAuth(opts?.sessionID);
1895
- if (!cred) {
1896
- totalUrgent++;
1897
- return inner.prompt(system, user, opts);
1898
- }
1899
- totalQueued++;
1900
- const model = opts?.model ?? defaultModel;
1901
- const systemPayload = system ? [
1902
- {
1903
- type: "text",
1904
- text: system,
1905
- cache_control: { type: "ephemeral", ttl: "1h" }
1906
- }
1907
- ] : system;
1908
- const customId = generateCustomId();
1909
- const promise = new Promise((resolve, reject) => {
1910
- queue.push({
1911
- customId,
1912
- params: {
1913
- model: model.modelID,
1914
- max_tokens: 8192,
1915
- system: systemPayload ?? system,
1916
- messages: [{ role: "user", content: user }]
1917
- },
1918
- resolve,
1919
- reject,
1920
- enqueuedAt: Date.now(),
1921
- auth: cred
1922
- });
1923
- });
1924
- if (queue.length >= maxQueueSize) {
1925
- flush().catch((e) => log2.error("batch auto-flush error:", e));
1926
- }
1927
- return promise;
1928
- },
1929
- /**
1930
- * Gracefully shut down the batch queue:
1931
- * 1. Stop the flush timer
1932
- * 2. Flush any remaining queued items (as a batch if possible, fallback sync)
1933
- * 3. Switch to synchronous mode for future calls
1934
- * 4. DON'T wait for inflight batches — they resolve eventually or expire
1935
- */
1936
- async shutdown() {
1937
- shuttingDown = true;
1938
- if (flushTimer) {
1939
- clearInterval(flushTimer);
1940
- flushTimer = null;
1941
- }
1942
- if (queue.length > 0) {
1943
- log2.info(`batch shutdown: processing ${queue.length} remaining items synchronously`);
1944
- await fallbackAll(queue.splice(0));
1945
- }
1946
- for (const [batchId, batch] of inflight) {
1947
- clearInterval(batch.pollTimer);
1948
- for (const [, pending] of batch.requests) {
1949
- pending.resolve(null);
1950
- }
1951
- log2.warn(`batch shutdown: abandoned inflight batch ${batchId}`);
1952
- }
1953
- inflight.clear();
1954
- },
1955
- /** Return current batch queue statistics. */
1956
- stats() {
1957
- return {
1958
- queued: queue.length,
1959
- inflightBatches: inflight.size,
1960
- inflightRequests: [...inflight.values()].reduce(
1961
- (sum, b) => sum + b.requests.size,
1962
- 0
1963
- ),
1964
- totalQueued,
1965
- totalBatched,
1966
- totalUrgent,
1967
- totalFallback,
1968
- totalResolved,
1969
- totalFailed
1970
- };
1971
- }
1972
- };
1973
- }
1974
-
1975
- // src/idle.ts
1976
- import { join } from "node:path";
1977
- import {
1978
- temporal as temporal2,
1979
- distillation,
1980
- curator,
1981
- ltm,
1982
- latReader,
1983
- log as log4,
1984
- config as loreConfig2,
1985
- exportToFile,
1986
- exportLoreFile
1987
- } from "@loreai/core";
1988
-
1989
- // src/worker-model.ts
1990
- import {
1991
- workerModel,
1992
- temporal,
1993
- distillation as distillationMod,
1994
- config as loreConfig,
1995
- log as log3
1996
- } from "@loreai/core";
1997
- var MODELS_DEV_API = "https://models.dev/api.json";
1998
- var cachedCostMap = null;
1999
- var cachedCostMapAt = 0;
2000
- var COST_CACHE_TTL_MS = 60 * 60 * 1e3;
2001
- var FALLBACK_COSTS = [
2002
- { prefix: "claude-opus-4", inputCostPerToken: 15 / 1e6 },
2003
- { prefix: "claude-sonnet-4", inputCostPerToken: 3 / 1e6 },
2004
- { prefix: "claude-haiku-4", inputCostPerToken: 1 / 1e6 },
2005
- { prefix: "claude-haiku-3-5", inputCostPerToken: 0.8 / 1e6 },
2006
- { prefix: "claude-sonnet-3-5", inputCostPerToken: 3 / 1e6 },
2007
- { prefix: "claude-3-haiku", inputCostPerToken: 0.25 / 1e6 },
2008
- { prefix: "claude-3-sonnet", inputCostPerToken: 3 / 1e6 },
2009
- { prefix: "claude-3-opus", inputCostPerToken: 15 / 1e6 }
2010
- ];
2011
- function fallbackCost(modelID) {
2012
- for (const { prefix, inputCostPerToken } of FALLBACK_COSTS) {
2013
- if (modelID.startsWith(prefix)) return inputCostPerToken;
2014
- }
2015
- return 100 / 1e6;
2016
- }
2017
- async function fetchCostMap() {
2018
- if (cachedCostMap && Date.now() - cachedCostMapAt < COST_CACHE_TTL_MS) {
2019
- return cachedCostMap;
2020
- }
2021
- try {
2022
- const controller = new AbortController();
2023
- const timeout = setTimeout(() => controller.abort(), 1e4);
2024
- const response = await fetch(MODELS_DEV_API, { signal: controller.signal });
2025
- clearTimeout(timeout);
2026
- if (!response.ok) {
2027
- log3.warn(`models.dev API failed: ${response.status} ${response.statusText}`);
2028
- return cachedCostMap ?? /* @__PURE__ */ new Map();
2029
- }
2030
- const data = await response.json();
2031
- const anthropic = data.anthropic?.models;
2032
- if (!anthropic) {
2033
- log3.warn("models.dev API: no anthropic provider found");
2034
- return cachedCostMap ?? /* @__PURE__ */ new Map();
2035
- }
2036
- const costMap = /* @__PURE__ */ new Map();
2037
- for (const [modelId, entry] of Object.entries(anthropic)) {
2038
- if (entry.cost?.input != null) {
2039
- costMap.set(modelId, entry.cost.input);
2040
- }
2041
- }
2042
- cachedCostMap = costMap;
2043
- cachedCostMapAt = Date.now();
2044
- log3.info(`models.dev: loaded costs for ${costMap.size} anthropic models`);
2045
- return costMap;
2046
- } catch (e) {
2047
- log3.warn("models.dev API error:", e);
2048
- return cachedCostMap ?? /* @__PURE__ */ new Map();
2049
- }
2050
- }
2051
- function clearCostCache() {
2052
- cachedCostMap = null;
2053
- cachedCostMapAt = 0;
2054
- }
2055
- async function fetchModelCosts(modelIDs) {
2056
- const costMap = await fetchCostMap();
2057
- const costs = /* @__PURE__ */ new Map();
2058
- for (const id of modelIDs) {
2059
- const costPerMillion = costMap.get(id);
2060
- if (costPerMillion != null) {
2061
- costs.set(id, costPerMillion / 1e6);
2062
- } else {
2063
- costs.set(id, fallbackCost(id));
2064
- }
2065
- }
2066
- return costs;
2067
- }
2068
- var cachedModels = null;
2069
- var cachedModelsAt = 0;
2070
- var MODEL_CACHE_TTL_MS = 60 * 60 * 1e3;
2071
- async function discoverModels(upstreamUrl, cred) {
2072
- if (cachedModels && Date.now() - cachedModelsAt < MODEL_CACHE_TTL_MS) {
2073
- return cachedModels;
2074
- }
2075
- try {
2076
- const entries = [];
2077
- let afterId;
2078
- do {
2079
- const url = new URL(`${upstreamUrl}/v1/models`);
2080
- url.searchParams.set("limit", "1000");
2081
- if (afterId) url.searchParams.set("after_id", afterId);
2082
- const response = await fetch(url.toString(), {
2083
- headers: {
2084
- "content-type": "application/json",
2085
- "anthropic-version": "2023-06-01",
2086
- ...authHeaders(cred)
2087
- }
2088
- });
2089
- if (!response.ok) {
2090
- const text = await response.text().catch(() => "(no body)");
2091
- log3.warn(
2092
- `model discovery failed: ${response.status} ${response.statusText} \u2014 ${text}`
2093
- );
2094
- return cachedModels ?? [];
2095
- }
2096
- const data = await response.json();
2097
- for (const entry of data.data) {
2098
- entries.push(entry);
2099
- }
2100
- afterId = data.has_more ? data.last_id : void 0;
2101
- } while (afterId);
2102
- const modelIDs = entries.map((e) => e.id);
2103
- const costs = await fetchModelCosts(modelIDs);
2104
- const models = entries.map((entry) => ({
2105
- id: entry.id,
2106
- providerID: "anthropic",
2107
- cost: { input: costs.get(entry.id) ?? fallbackCost(entry.id) },
2108
- status: "active",
2109
- // Only active models are returned by the API
2110
- capabilities: {
2111
- input: { text: true },
2112
- // All Anthropic models accept text
2113
- reasoning: entry.capabilities?.thinking?.supported ?? false
2114
- }
2115
- }));
2116
- cachedModels = models;
2117
- cachedModelsAt = Date.now();
2118
- log3.info(
2119
- `model discovery: found ${models.length} models (${models.map((m) => m.id).join(", ")})`
2120
- );
2121
- return models;
2122
- } catch (e) {
2123
- log3.warn("model discovery error:", e);
2124
- return cachedModels ?? [];
2125
- }
2126
- }
2127
- function clearModelCache() {
2128
- cachedModels = null;
2129
- cachedModelsAt = 0;
2130
- }
2131
- var validating = false;
2132
- async function maybeValidateWorkerModel(sessionModel, upstreamUrl, cred, llm, projectPath, sessionID) {
2133
- if (validating) return;
2134
- const cfg = loreConfig();
2135
- if (cfg.workerModel) return;
2136
- const models = await discoverModels(upstreamUrl, cred);
2137
- if (models.length === 0) return;
2138
- const discoveredModel = models.find((m) => m.id === sessionModel);
2139
- const sessionModelInfo = {
2140
- id: sessionModel,
2141
- providerID: "anthropic",
2142
- cost: { input: discoveredModel?.cost.input ?? fallbackCost(sessionModel) }
2143
- };
2144
- const candidates = workerModel.selectWorkerCandidates(sessionModelInfo, models);
2145
- if (candidates.length === 0) return;
2146
- if (candidates.length === 1 && candidates[0].id === sessionModel) return;
2147
- const fingerprint = workerModel.computeModelFingerprint(
2148
- "anthropic",
2149
- sessionModel,
2150
- models.filter((m) => m.providerID === "anthropic").map((m) => m.id)
2151
- );
2152
- const stored = workerModel.getValidatedWorkerModel("anthropic");
2153
- if (!workerModel.isValidationStale(stored, fingerprint)) return;
2154
- const distillations = distillationMod.loadForSession(projectPath, sessionID, true);
2155
- const gen0 = distillations.filter((d) => d.generation === 0);
2156
- if (gen0.length === 0) return;
2157
- const reference = gen0[gen0.length - 1];
2158
- const sourceIds = reference.source_ids;
2159
- if (sourceIds.length === 0) return;
2160
- const allMessages = temporal.bySession(projectPath, sessionID);
2161
- const sourceSet = new Set(sourceIds);
2162
- const sourceMessages = allMessages.filter((m) => sourceSet.has(m.id));
2163
- if (sourceMessages.length === 0) return;
2164
- const messagesText = sourceMessages.map((m) => m.content).join("\n");
2165
- const date = new Date(sourceMessages[0].created_at).toLocaleDateString(
2166
- "en-US",
2167
- { year: "numeric", month: "long", day: "numeric" }
2168
- );
2169
- validating = true;
2170
- try {
2171
- const result = await workerModel.runValidation({
2172
- llm,
2173
- providerID: "anthropic",
2174
- sessionModelID: sessionModel,
2175
- candidates,
2176
- referenceObservations: reference.observations,
2177
- sourceMessagesText: messagesText,
2178
- date
2179
- });
2180
- if (result) {
2181
- log3.info(
2182
- `worker model validated: ${result.modelID} (judge=${result.judgeScore}) \u2014 saving 50%+ on worker calls`
2183
- );
2184
- }
2185
- } catch (e) {
2186
- log3.error("worker model validation error:", e);
2187
- } finally {
2188
- validating = false;
2189
- }
2190
- }
2191
- function getWorkerModel() {
2192
- const cfg = loreConfig();
2193
- return workerModel.resolveWorkerModel(
2194
- "anthropic",
2195
- cfg.workerModel,
2196
- cfg.model
2197
- );
2198
- }
2199
- function resetWorkerModelState() {
2200
- clearModelCache();
2201
- clearCostCache();
2202
- validating = false;
2203
- }
2204
-
2205
- // src/idle.ts
2206
- var POLL_INTERVAL_MS = 3e4;
2207
- function startIdleScheduler(config2, sessions2, doIdleWork) {
2208
- const inProgress = /* @__PURE__ */ new Set();
2209
- const timer = setInterval(() => {
2210
- const now = Date.now();
2211
- const timeoutMs = config2.idleTimeoutSeconds * 1e3;
2212
- for (const [sessionID, state] of sessions2) {
2213
- if (inProgress.has(sessionID)) continue;
2214
- if (now - state.lastRequestTime < timeoutMs) continue;
2215
- inProgress.add(sessionID);
2216
- doIdleWork(sessionID, state).catch((e) => log4.error(`idle work failed for session ${sessionID}:`, e)).finally(() => inProgress.delete(sessionID));
2217
- }
2218
- }, POLL_INTERVAL_MS);
2219
- return () => clearInterval(timer);
2220
- }
2221
- function buildIdleWorkHandler(projectPath, llm, upstreamUrl, getAuth, sessionModel, onLtmInvalidated) {
2222
- return async (sessionID, state) => {
2223
- const cfg = loreConfig2();
2224
- try {
2225
- const cred = getAuth();
2226
- if (cred) {
2227
- await maybeValidateWorkerModel(
2228
- sessionModel,
2229
- upstreamUrl,
2230
- cred,
2231
- llm,
2232
- projectPath,
2233
- sessionID
2234
- );
2235
- }
2236
- } catch (e) {
2237
- log4.error("idle worker model validation error:", e);
2238
- }
2239
- const model = getWorkerModel();
2240
- try {
2241
- const pending = temporal2.undistilledCount(projectPath, sessionID);
2242
- if (pending > 0) {
2243
- await distillation.run({ llm, projectPath, sessionID, model, force: true });
2244
- }
2245
- } catch (e) {
2246
- log4.error("idle distillation error:", e);
2247
- }
2248
- if (cfg.knowledge.enabled && cfg.curator.onIdle) {
2249
- try {
2250
- if (state.turnsSinceCuration >= cfg.curator.afterTurns) {
2251
- await curator.run({ llm, projectPath, sessionID, model });
2252
- state.turnsSinceCuration = 0;
2253
- onLtmInvalidated?.();
2254
- }
2255
- } catch (e) {
2256
- log4.error("idle curation error:", e);
2257
- }
2258
- }
2259
- if (cfg.knowledge.enabled) {
2260
- try {
2261
- const entries = ltm.forProject(projectPath, false);
2262
- if (entries.length > cfg.curator.maxEntries) {
2263
- log4.info(
2264
- `entry count ${entries.length} exceeds maxEntries ${cfg.curator.maxEntries} \u2014 running consolidation`
2265
- );
2266
- const { updated, deleted } = await curator.consolidate({
2267
- llm,
2268
- projectPath,
2269
- sessionID,
2270
- model
2271
- });
2272
- if (updated > 0 || deleted > 0) {
2273
- log4.info(`consolidation: ${updated} updated, ${deleted} deleted`);
2274
- onLtmInvalidated?.();
2275
- }
2276
- }
2277
- } catch (e) {
2278
- log4.error("idle consolidation error:", e);
2279
- }
2280
- }
2281
- try {
2282
- const { ttlDeleted, capDeleted } = temporal2.prune({
2283
- projectPath,
2284
- retentionDays: cfg.pruning.retention,
2285
- maxStorageMB: cfg.pruning.maxStorage
2286
- });
2287
- if (ttlDeleted > 0 || capDeleted > 0) {
2288
- log4.info(
2289
- `pruned temporal messages: ${ttlDeleted} by TTL, ${capDeleted} by size cap`
2290
- );
2291
- }
2292
- } catch (e) {
2293
- log4.error("idle pruning error:", e);
2294
- }
2295
- if (cfg.knowledge.enabled) {
2296
- try {
2297
- const entries = ltm.forProject(projectPath, false);
2298
- if (entries.length > 0) {
2299
- if (cfg.agentsFile.enabled) {
2300
- const filePath = join(projectPath, cfg.agentsFile.path);
2301
- exportToFile({ projectPath, filePath });
2302
- } else {
2303
- exportLoreFile(projectPath);
2304
- }
2305
- }
2306
- } catch (e) {
2307
- log4.error("idle knowledge export error:", e);
2308
- }
2309
- }
2310
- if (cfg.knowledge.enabled) {
2311
- try {
2312
- const cleaned = ltm.cleanDeadRefs();
2313
- if (cleaned > 0) {
2314
- log4.info(`cleaned ${cleaned} dead knowledge cross-references`);
2315
- onLtmInvalidated?.();
2316
- }
2317
- } catch (e) {
2318
- log4.error("idle dead-ref cleanup error:", e);
2319
- }
2320
- }
2321
- try {
2322
- latReader.refresh(projectPath);
2323
- } catch (e) {
2324
- log4.error("idle lat-reader refresh error:", e);
2325
- }
2326
- };
2327
- }
2328
-
2329
- // src/recall.ts
2330
- import {
2331
- runRecall,
2332
- RECALL_TOOL_DESCRIPTION,
2333
- RECALL_PARAM_DESCRIPTIONS,
2334
- log as log5,
2335
- config as loreConfig3
2336
- } from "@loreai/core";
2337
- var RECALL_GATEWAY_TOOL = {
2338
- name: "recall",
2339
- description: RECALL_TOOL_DESCRIPTION,
2340
- inputSchema: {
2341
- type: "object",
2342
- properties: {
2343
- query: {
2344
- type: "string",
2345
- description: RECALL_PARAM_DESCRIPTIONS.query
2346
- },
2347
- scope: {
2348
- type: "string",
2349
- enum: ["all", "session", "project", "knowledge"],
2350
- description: RECALL_PARAM_DESCRIPTIONS.scope
2351
- }
2352
- },
2353
- required: ["query"]
2354
- }
2355
- };
2356
- var RECALL_TOOL_NAME = "recall";
2357
- var PENDING_RECALL_TTL_MS = 6e4;
2358
- function isPendingRecallValid(pending) {
2359
- return Date.now() - pending.timestamp < PENDING_RECALL_TTL_MS;
2360
- }
2361
- function findRecallToolUse(resp) {
2362
- return resp.content.find(
2363
- (b) => b.type === "tool_use" && b.name === RECALL_TOOL_NAME
2364
- );
2365
- }
2366
- function hasRecallToolUse(resp) {
2367
- return findRecallToolUse(resp) !== void 0;
2368
- }
2369
- function hasOtherToolUse(resp) {
2370
- return resp.content.some(
2371
- (b) => b.type === "tool_use" && b.name !== RECALL_TOOL_NAME
2372
- );
2373
- }
2374
- function clientHasRecallTool(tools) {
2375
- return tools.some((t) => t.name === RECALL_TOOL_NAME);
2376
- }
2377
- function parseRecallInput(block) {
2378
- const input = block.input;
2379
- return {
2380
- query: typeof input.query === "string" ? input.query : "",
2381
- scope: input.scope ?? "all"
2382
- };
2383
- }
2384
- async function executeRecall(block, projectPath, sessionID) {
2385
- const { query, scope } = parseRecallInput(block);
2386
- const cfg = loreConfig3();
2387
- try {
2388
- const result = await runRecall({
2389
- query,
2390
- scope,
2391
- projectPath,
2392
- sessionID,
2393
- knowledgeEnabled: cfg.knowledge?.enabled ?? true,
2394
- searchConfig: cfg.search
2395
- });
2396
- return { result, input: { query, scope } };
2397
- } catch (e) {
2398
- log5.error("gateway recall execution failed:", e);
2399
- return {
2400
- result: "Recall search failed. The memory system encountered an error.",
2401
- input: { query, scope }
2402
- };
2403
- }
2404
- }
2405
- function buildRecallFollowUp(originalReq, resp, recallResult, recallToolUseBlock) {
2406
- const assistantMessage = {
2407
- role: "assistant",
2408
- content: [recallToolUseBlock]
2409
- };
2410
- const toolResultMessage = {
2411
- role: "user",
2412
- content: [
2413
- {
2414
- type: "tool_result",
2415
- toolUseId: recallToolUseBlock.id,
2416
- content: recallResult || "[No results found.]"
2417
- }
2418
- ]
2419
- };
2420
- const toolsWithoutRecall = originalReq.tools.filter(
2421
- (t) => t.name !== RECALL_TOOL_NAME
2422
- );
2423
- return {
2424
- ...originalReq,
2425
- messages: [
2426
- ...originalReq.messages,
2427
- assistantMessage,
2428
- toolResultMessage
2429
- ],
2430
- tools: toolsWithoutRecall
2431
- };
2432
- }
2433
- function injectPendingRecall(req, pending) {
2434
- const messages = req.messages;
2435
- if (messages.length < 2) return false;
2436
- let assistantIdx = -1;
2437
- for (let i = messages.length - 2; i >= 0; i--) {
2438
- if (messages[i].role === "assistant" && messages[i + 1]?.role === "user") {
2439
- assistantIdx = i;
2440
- break;
2441
- }
2442
- }
2443
- if (assistantIdx < 0) {
2444
- log5.warn("injectPendingRecall: no assistant\u2192user pair found");
2445
- return false;
2446
- }
2447
- const assistantMsg = messages[assistantIdx];
2448
- const userMsg = messages[assistantIdx + 1];
2449
- const insertPos = Math.min(pending.position, assistantMsg.content.length);
2450
- const recallToolUse = {
2451
- type: "tool_use",
2452
- id: pending.toolUseId,
2453
- name: RECALL_TOOL_NAME,
2454
- input: pending.input
2455
- };
2456
- assistantMsg.content.splice(insertPos, 0, recallToolUse);
2457
- userMsg.content.unshift({
2458
- type: "tool_result",
2459
- toolUseId: pending.toolUseId,
2460
- content: pending.result
2461
- });
2462
- req.tools = req.tools.filter((t) => t.name !== RECALL_TOOL_NAME);
2463
- return true;
2464
- }
2465
- function stripRecallFromResponse(resp) {
2466
- return {
2467
- ...resp,
2468
- content: resp.content.filter(
2469
- (b) => !(b.type === "tool_use" && b.name === RECALL_TOOL_NAME)
2470
- )
2471
- };
2472
- }
2473
-
2474
- // src/pipeline.ts
2475
- var initialized = false;
2476
- var activeInterceptor;
2477
- async function resetPipelineState() {
2478
- initialized = false;
2479
- cachedProjectPath = null;
2480
- sessions.clear();
2481
- ltmSessionCache.clear();
2482
- if (llmClient && "shutdown" in llmClient) {
2483
- await llmClient.shutdown();
2484
- }
2485
- llmClient = null;
2486
- activeInterceptor = void 0;
2487
- if (stopIdleScheduler) {
2488
- stopIdleScheduler();
2489
- stopIdleScheduler = null;
2490
- }
2491
- lastSeenSessionModel = null;
2492
- resetWorkerModelState();
2493
- }
2494
- var cachedProjectPath = null;
2495
- var sessions = /* @__PURE__ */ new Map();
2496
- var ltmSessionCache = /* @__PURE__ */ new Map();
2497
- var llmClient = null;
2498
- var stopIdleScheduler = null;
2499
- var lastSeenSessionModel = null;
2500
- var MODEL_SPECS = {
2501
- // Pricing: https://docs.anthropic.com/en/docs/about-claude/models
2502
- // Cache-read = input_price / 1_000_000 * 0.1 (10% of input for Anthropic)
2503
- "claude-opus-4": { context: 2e5, output: 32e3, cacheReadCost: 15 / 1e6 * 0.1 },
2504
- "claude-sonnet-4": { context: 2e5, output: 16e3, cacheReadCost: 3 / 1e6 * 0.1 },
2505
- "claude-sonnet-3-5": { context: 2e5, output: 8192, cacheReadCost: 3 / 1e6 * 0.1 },
2506
- "claude-haiku-3-5": { context: 2e5, output: 8192, cacheReadCost: 0.8 / 1e6 * 0.1 }
2507
- };
2508
- var DEFAULT_MODEL_SPEC = { context: 2e5, output: 8192 };
2509
- function getModelSpec(model) {
2510
- for (const [prefix, spec] of Object.entries(MODEL_SPECS)) {
2511
- if (model.startsWith(prefix)) return spec;
2512
- }
2513
- return DEFAULT_MODEL_SPEC;
2514
- }
2515
- async function initIfNeeded(projectPath, config2) {
2516
- if (initialized) return;
2517
- await load(projectPath);
2518
- ensureProject(projectPath);
2519
- initialized = true;
2520
- cachedProjectPath = projectPath;
2521
- if (config2 && !stopIdleScheduler) {
2522
- const llm = getLLMClient(config2);
2523
- const sessionModelID = lastSeenSessionModel ?? (loreConfig4().model?.modelID ?? "claude-sonnet-4-20250514");
2524
- const idleHandler = buildIdleWorkHandler(
2525
- projectPath,
2526
- llm,
2527
- config2.upstreamAnthropic,
2528
- () => resolveAuth(),
2529
- sessionModelID,
2530
- // onLtmInvalidated: clear the LTM session cache
2531
- () => ltmSessionCache.clear()
2532
- );
2533
- stopIdleScheduler = startIdleScheduler(config2, sessions, idleHandler);
2534
- }
2535
- log6.info(`gateway pipeline initialized: ${projectPath}`);
2536
- }
2537
- function getLLMClient(config2) {
2538
- if (!llmClient) {
2539
- const cfg = loreConfig4();
2540
- const defaultModel = cfg.model ?? {
2541
- providerID: "anthropic",
2542
- modelID: "claude-sonnet-4-20250514"
2543
- };
2544
- const inner = createGatewayLLMClient(
2545
- config2.upstreamAnthropic,
2546
- resolveAuth,
2547
- defaultModel
2548
- );
2549
- const batchDisabled = process.env.LORE_BATCH_DISABLED === "1";
2550
- if (batchDisabled) {
2551
- llmClient = inner;
2552
- } else {
2553
- llmClient = createBatchLLMClient(
2554
- inner,
2555
- config2.upstreamAnthropic,
2556
- resolveAuth,
2557
- defaultModel
2558
- );
2559
- }
2560
- }
2561
- return llmClient;
2562
- }
2563
- function getOrCreateSession(sessionID, projectPath) {
2564
- let state = sessions.get(sessionID);
2565
- if (!state) {
2566
- state = {
2567
- sessionID,
2568
- projectPath,
2569
- fingerprint: "",
2570
- lastRequestTime: Date.now(),
2571
- messageCount: 0,
2572
- turnsSinceCuration: 0
2573
- };
2574
- sessions.set(sessionID, state);
2575
- }
2576
- state.lastRequestTime = Date.now();
2577
- if (state.pendingRecall && !isPendingRecallValid(state.pendingRecall)) {
2578
- log6.warn(
2579
- `lazy cleanup: discarding expired pending recall for session ${sessionID.slice(0, 16)}`
2580
- );
2581
- state.pendingRecall = void 0;
2582
- }
2583
- return state;
2584
- }
2585
- async function identifySession(req, _projectPath) {
2586
- const rawMessages = req.messages.map((m) => ({
2587
- role: m.role,
2588
- content: m.content
2589
- }));
2590
- const cred = extractAuth(req.rawHeaders);
2591
- const fingerprint = await fingerprintMessages(rawMessages, {
2592
- model: req.model,
2593
- authSuffix: cred ? authFingerprint(cred) : ""
2594
- });
2595
- const msgCount = req.messages.length;
2596
- let bestMatch = null;
2597
- for (const [sid, state] of sessions) {
2598
- if (state.fingerprint !== fingerprint) continue;
2599
- const diff = msgCount - state.messageCount;
2600
- if (diff < -MESSAGE_COUNT_PROXIMITY_THRESHOLD) continue;
2601
- const absDiff = Math.abs(diff);
2602
- if (!bestMatch || absDiff < bestMatch.countDiff) {
2603
- bestMatch = { sid, countDiff: absDiff };
2604
- }
2605
- }
2606
- if (bestMatch) {
2607
- return { sessionID: bestMatch.sid, isNew: false };
2608
- }
2609
- const sessionID = generateSessionID();
2610
- return { sessionID, isNew: true };
2611
- }
2612
- async function forwardToUpstream(req, config2, interceptor, cache) {
2613
- let url;
2614
- let headers;
2615
- let body;
2616
- const route = resolveUpstreamRoute(req.model);
2617
- const effectiveProtocol = route?.protocol ?? req.protocol;
2618
- const effectiveUpstreamBase = route?.url ?? (effectiveProtocol === "openai" ? config2.upstreamOpenAI : config2.upstreamAnthropic);
2619
- if (effectiveProtocol === "openai") {
2620
- const result = buildOpenAIUpstreamRequest(req, effectiveUpstreamBase);
2621
- url = result.url;
2622
- headers = result.headers;
2623
- body = result.body;
2624
- } else {
2625
- const result = buildAnthropicRequest(req, cache);
2626
- url = `${effectiveUpstreamBase}${result.url}`;
2627
- headers = result.headers;
2628
- body = result.body;
2629
- }
2630
- const effectiveInterceptor = interceptor ?? activeInterceptor;
2631
- if (effectiveInterceptor) {
2632
- return effectiveInterceptor(
2633
- body,
2634
- req.model,
2635
- req.stream,
2636
- () => fetch(url, {
2637
- method: "POST",
2638
- headers,
2639
- body: JSON.stringify(body)
2640
- })
2641
- );
2642
- }
2643
- return fetch(url, {
2644
- method: "POST",
2645
- headers,
2646
- body: JSON.stringify(body)
2647
- });
2648
- }
2649
- function buildStreamingResponse(upstreamResponse, onComplete, recallContext) {
2650
- const recallAccum = recallContext ? createRecallAwareAccumulator(RECALL_TOOL_NAME) : null;
2651
- const accumulator = recallAccum ?? createStreamAccumulator();
2652
- const encoder = new TextEncoder();
2653
- const stream = new ReadableStream({
2654
- async start(controller) {
2655
- try {
2656
- const reader = upstreamResponse.body.getReader();
2657
- for await (const { event, data } of parseSSEStream(reader)) {
2658
- const forwarded = accumulator.processEvent(event, data);
2659
- if (forwarded) {
2660
- controller.enqueue(encoder.encode(forwarded));
2661
- }
2662
- }
2663
- if (recallAccum?.hasRecall()) {
2664
- const resp = recallAccum.getResponse();
2665
- const recallBlock = findRecallToolUse(resp);
2666
- if (recallBlock && recallContext) {
2667
- const { result, input } = await executeRecall(
2668
- recallBlock,
2669
- recallContext.sessionState.projectPath,
2670
- recallContext.sessionState.sessionID
2671
- );
2672
- if (recallAccum.hasOtherTools()) {
2673
- const position = resp.content.indexOf(recallBlock);
2674
- recallContext.sessionState.pendingRecall = {
2675
- toolUseId: recallBlock.id,
2676
- input,
2677
- position,
2678
- result,
2679
- timestamp: Date.now()
2680
- };
2681
- log6.info(
2682
- `recall (stream, mixed): stored pending result for session ${recallContext.sessionState.sessionID.slice(0, 16)}`
2683
- );
2684
- const searchingIdx = recallAccum.clientBlockCount();
2685
- const syntheticCase2 = [
2686
- formatSSEEvent("content_block_start", JSON.stringify({
2687
- type: "content_block_start",
2688
- index: searchingIdx,
2689
- content_block: { type: "text", text: "" }
2690
- })),
2691
- formatSSEEvent("content_block_delta", JSON.stringify({
2692
- type: "content_block_delta",
2693
- index: searchingIdx,
2694
- delta: { type: "text_delta", text: "\n\n[Searching memory...]" }
2695
- })),
2696
- formatSSEEvent("content_block_stop", JSON.stringify({
2697
- type: "content_block_stop",
2698
- index: searchingIdx
2699
- }))
2700
- ].join("");
2701
- controller.enqueue(encoder.encode(syntheticCase2));
2702
- const heldBack = recallAccum.heldBackEvents();
2703
- if (heldBack) {
2704
- controller.enqueue(encoder.encode(heldBack));
2705
- }
2706
- controller.close();
2707
- const cleanResp = stripRecallFromResponse(resp);
2708
- onComplete(cleanResp);
2709
- return;
2710
- }
2711
- log6.info(
2712
- `recall (stream, only): executing follow-up for session ${recallContext.sessionState.sessionID.slice(0, 16)}`
2713
- );
2714
- const searchingIndex = recallAccum.clientBlockCount();
2715
- const syntheticBlock = [
2716
- formatSSEEvent("content_block_start", JSON.stringify({
2717
- type: "content_block_start",
2718
- index: searchingIndex,
2719
- content_block: { type: "text", text: "" }
2720
- })),
2721
- formatSSEEvent("content_block_delta", JSON.stringify({
2722
- type: "content_block_delta",
2723
- index: searchingIndex,
2724
- delta: { type: "text_delta", text: "\n\n[Searching memory...]" }
2725
- })),
2726
- formatSSEEvent("content_block_stop", JSON.stringify({
2727
- type: "content_block_stop",
2728
- index: searchingIndex
2729
- }))
2730
- ].join("");
2731
- controller.enqueue(encoder.encode(syntheticBlock));
2732
- const followUp = buildRecallFollowUp(
2733
- recallContext.modifiedReq,
2734
- resp,
2735
- result,
2736
- recallBlock
2737
- );
2738
- const followUpResponse = await forwardToUpstream(
2739
- followUp,
2740
- recallContext.config,
2741
- void 0,
2742
- recallContext.cacheOptions
2743
- );
2744
- if (!followUpResponse.ok) {
2745
- const errorBody = await followUpResponse.text();
2746
- log6.error(
2747
- `recall follow-up upstream error: ${followUpResponse.status} ${errorBody.slice(0, 500)}`
2748
- );
2749
- const heldBack = recallAccum.heldBackEvents();
2750
- if (heldBack) {
2751
- controller.enqueue(encoder.encode(heldBack));
2752
- }
2753
- controller.close();
2754
- const cleanResp = stripRecallFromResponse(resp);
2755
- onComplete(cleanResp);
2756
- return;
2757
- }
2758
- const blockOffset = recallAccum.clientBlockCount() + 1;
2759
- const contReader = followUpResponse.body.getReader();
2760
- for await (const { event: contEvent, data: contData } of parseSSEStream(contReader)) {
2761
- if (contEvent === "message_start") {
2762
- continue;
2763
- }
2764
- if (contEvent === "content_block_start" || contEvent === "content_block_delta" || contEvent === "content_block_stop") {
2765
- try {
2766
- const parsed = JSON.parse(contData);
2767
- if (typeof parsed.index === "number") {
2768
- parsed.index = parsed.index + blockOffset;
2769
- const adjusted = formatSSEEvent(
2770
- contEvent,
2771
- JSON.stringify(parsed)
2772
- );
2773
- controller.enqueue(encoder.encode(adjusted));
2774
- continue;
2775
- }
2776
- } catch {
2777
- }
2778
- }
2779
- const forwarded = formatSSEEvent(contEvent, contData);
2780
- controller.enqueue(encoder.encode(forwarded));
2781
- }
2782
- controller.close();
2783
- onComplete(resp);
2784
- return;
2785
- }
2786
- }
2787
- controller.close();
2788
- const response = accumulator.getResponse();
2789
- onComplete(response);
2790
- } catch (err) {
2791
- log6.error("streaming pipeline error:", err);
2792
- controller.error(err);
2793
- }
2794
- }
2795
- });
2796
- return new Response(stream, {
2797
- status: 200,
2798
- headers: {
2799
- "content-type": "text/event-stream",
2800
- "cache-control": "no-cache",
2801
- connection: "keep-alive"
2802
- }
2803
- });
2804
- }
2805
- async function accumulateNonStreamResponse(upstreamResponse) {
2806
- const json = await upstreamResponse.json();
2807
- const content = [];
2808
- const rawContent = json.content;
2809
- if (rawContent) {
2810
- for (const block of rawContent) {
2811
- switch (block.type) {
2812
- case "text":
2813
- content.push({ type: "text", text: String(block.text ?? "") });
2814
- break;
2815
- case "thinking":
2816
- content.push({
2817
- type: "thinking",
2818
- thinking: String(block.thinking ?? ""),
2819
- ...block.signature ? { signature: String(block.signature) } : void 0
2820
- });
2821
- break;
2822
- case "tool_use":
2823
- content.push({
2824
- type: "tool_use",
2825
- id: String(block.id ?? ""),
2826
- name: String(block.name ?? ""),
2827
- input: block.input
2828
- });
2829
- break;
2830
- }
2831
- }
2832
- }
2833
- const usage = json.usage;
2834
- return {
2835
- id: String(json.id ?? ""),
2836
- model: String(json.model ?? ""),
2837
- content,
2838
- stopReason: String(
2839
- json.stop_reason ?? "end_turn"
2840
- ),
2841
- usage: {
2842
- inputTokens: usage?.input_tokens ?? 0,
2843
- outputTokens: usage?.output_tokens ?? 0,
2844
- cacheReadInputTokens: usage?.cache_read_input_tokens,
2845
- cacheCreationInputTokens: usage?.cache_creation_input_tokens
2846
- }
2847
- };
2848
- }
2849
- function nonStreamHttpResponse(resp) {
2850
- const body = buildAnthropicNonStreamResponse(resp);
2851
- return new Response(JSON.stringify(body), {
2852
- status: 200,
2853
- headers: { "content-type": "application/json" }
2854
- });
2855
- }
2856
- function streamHttpResponse(resp) {
2857
- const textBlocks = resp.content.filter(
2858
- (b) => b.type === "text"
2859
- );
2860
- const fullText = textBlocks.map((b) => b.text).join("");
2861
- const sseBody = buildSSETextResponse(resp.id, resp.model, fullText, {
2862
- inputTokens: resp.usage.inputTokens,
2863
- outputTokens: resp.usage.outputTokens
2864
- });
2865
- return new Response(sseBody, {
2866
- status: 200,
2867
- headers: {
2868
- "content-type": "text/event-stream",
2869
- "cache-control": "no-cache",
2870
- connection: "keep-alive"
2871
- }
2872
- });
2873
- }
2874
- function postResponse(req, resp, sessionState, config2) {
2875
- const { sessionID, projectPath } = sessionState;
2876
- try {
2877
- const actualInput = (resp.usage.inputTokens ?? 0) + (resp.usage.cacheReadInputTokens ?? 0) + (resp.usage.cacheCreationInputTokens ?? 0);
2878
- calibrate(
2879
- actualInput,
2880
- sessionID,
2881
- getLastTransformedCount(sessionID)
2882
- );
2883
- const loreMessages = gatewayMessagesToLore(req.messages, sessionID);
2884
- resolveToolResults(loreMessages);
2885
- for (let i = loreMessages.length - 1; i >= 0; i--) {
2886
- if (loreMessages[i].info.role === "user") {
2887
- temporal3.store({
2888
- projectPath,
2889
- info: loreMessages[i].info,
2890
- parts: loreMessages[i].parts
2891
- });
2892
- break;
2893
- }
2894
- }
2895
- const assistantMsg = gatewayMessagesToLore(
2896
- [{ role: "assistant", content: resp.content }],
2897
- sessionID
2898
- )[0];
2899
- updateAssistantMessageTokens(assistantMsg, resp.usage, resp.model);
2900
- temporal3.store({
2901
- projectPath,
2902
- info: assistantMsg.info,
2903
- parts: assistantMsg.parts
2904
- });
2905
- sessionState.turnsSinceCuration = (sessionState.turnsSinceCuration ?? 0) + 1;
2906
- scheduleBackgroundWork(sessionState, config2);
2907
- } catch (e) {
2908
- log6.error("post-response processing failed:", e);
2909
- }
2910
- }
2911
- function scheduleBackgroundWork(sessionState, config2) {
2912
- const { sessionID, projectPath } = sessionState;
2913
- const llm = getLLMClient(config2);
2914
- const cfg = loreConfig4();
2915
- const model = getWorkerModel();
2916
- if (needsUrgentDistillation()) {
2917
- distillation2.run({
2918
- llm,
2919
- projectPath,
2920
- sessionID,
2921
- model,
2922
- force: true,
2923
- urgent: true
2924
- }).catch((e) => log6.error("background distillation failed:", e));
2925
- }
2926
- const pending = temporal3.undistilledCount(projectPath, sessionID);
2927
- if (pending >= cfg.distillation.maxSegment) {
2928
- log6.info(
2929
- `incremental distillation: ${pending} undistilled messages in ${sessionID.slice(0, 16)}`
2930
- );
2931
- distillation2.run({ llm, projectPath, sessionID, model }).catch((e) => log6.error("background distillation failed:", e));
2932
- }
2933
- if (cfg.knowledge.enabled && cfg.curator.onIdle && sessionState.turnsSinceCuration >= cfg.curator.afterTurns) {
2934
- curator2.run({ llm, projectPath, sessionID, model }).then(() => {
2935
- sessionState.turnsSinceCuration = 0;
2936
- ltmSessionCache.delete(sessionID);
2937
- }).catch((e) => log6.error("background curation failed:", e));
2938
- }
2939
- }
2940
- async function handleCompaction(req, config2) {
2941
- const projectPath = cachedProjectPath ?? getProjectPath(req.system, req.rawHeaders);
2942
- await initIfNeeded(projectPath, config2);
2943
- const { sessionID } = await identifySession(req, projectPath);
2944
- const sessionState = getOrCreateSession(sessionID, projectPath);
2945
- const llm = getLLMClient(config2);
2946
- log6.info(`compaction intercepted for session ${sessionID.slice(0, 16)}`);
2947
- const model = getWorkerModel();
2948
- await distillation2.run({
2949
- llm,
2950
- projectPath,
2951
- sessionID,
2952
- model,
2953
- force: true,
2954
- urgent: true
2955
- });
2956
- const distillations = distillation2.loadForSession(projectPath, sessionID);
2957
- const previousSummary = extractPreviousSummary(req);
2958
- const cfg = loreConfig4();
2959
- const entries = cfg.knowledge.enabled ? ltm2.forProject(projectPath, cfg.crossProject) : [];
2960
- const knowledge = entries.length ? formatKnowledge(
2961
- entries.map((e) => ({
2962
- category: e.category,
2963
- title: e.title,
2964
- content: e.content
2965
- }))
2966
- ) : "";
2967
- const compactPrompt = buildCompactPrompt({
2968
- hasDistillations: distillations.length > 0,
2969
- knowledge,
2970
- previousSummary
2971
- });
2972
- let context = "";
2973
- if (distillations.length > 0) {
2974
- context = `## Lore Pre-computed Session Summaries
2975
-
2976
- The following ${distillations.length} summary chunk(s) were pre-computed from the conversation history. Use these as the authoritative source.
2977
-
2978
- ` + distillations.map(
2979
- (d, i) => `### Chunk ${i + 1}${d.generation > 0 ? " (consolidated)" : ""}
2980
- ${d.observations}`
2981
- ).join("\n\n");
2982
- }
2983
- const userContent = context ? `${context}
2984
-
2985
- ---
2986
-
2987
- ${compactPrompt}` : compactPrompt;
2988
- const summaryText = await llm.prompt(compactPrompt, userContent, {
2989
- model: cfg.model,
2990
- workerID: "lore-compact",
2991
- urgent: true
2992
- // Client is blocking on this response
2993
- });
2994
- const summary = summaryText ?? "(Compaction failed \u2014 no summary generated.)";
2995
- const resp = buildCompactionResponse(sessionID, summary, req.model);
2996
- if (req.stream) {
2997
- return streamHttpResponse(resp);
2998
- }
2999
- return nonStreamHttpResponse(resp);
3000
- }
3001
- async function handlePassthrough(req, config2) {
3002
- const upstreamResponse = await forwardToUpstream(req, config2);
3003
- if (req.stream && upstreamResponse.body) {
3004
- return new Response(upstreamResponse.body, {
3005
- status: upstreamResponse.status,
3006
- headers: {
3007
- "content-type": upstreamResponse.headers.get("content-type") ?? "text/event-stream"
3008
- }
3009
- });
3010
- }
3011
- const body = await upstreamResponse.text();
3012
- return new Response(body, {
3013
- status: upstreamResponse.status,
3014
- headers: {
3015
- "content-type": "application/json"
3016
- }
3017
- });
3018
- }
3019
- async function handleConversationTurn(req, config2) {
3020
- const projectPath = getProjectPath(req.system, req.rawHeaders);
3021
- await initIfNeeded(projectPath, config2);
3022
- const cred = extractAuth(req.rawHeaders);
3023
- if (cred) {
3024
- setLastSeenAuth(cred);
3025
- }
3026
- const { sessionID, isNew } = await identifySession(req, projectPath);
3027
- const sessionState = getOrCreateSession(sessionID, projectPath);
3028
- if (cred) {
3029
- setSessionAuth(sessionID, cred);
3030
- }
3031
- if (isNew) {
3032
- const fingerprint = await fingerprintMessages(
3033
- req.messages.map((m) => ({ role: m.role, content: m.content })),
3034
- {
3035
- model: req.model,
3036
- authSuffix: cred ? authFingerprint(cred) : ""
3037
- }
3038
- );
3039
- sessionState.fingerprint = fingerprint;
3040
- }
3041
- sessionState.messageCount = req.messages.length;
3042
- lastSeenSessionModel = req.model;
3043
- if (sessionState.pendingRecall) {
3044
- if (isPendingRecallValid(sessionState.pendingRecall)) {
3045
- const injected = injectPendingRecall(req, sessionState.pendingRecall);
3046
- if (injected) {
3047
- log6.info(
3048
- `injected pending recall result into request for session ${sessionID.slice(0, 16)}`
3049
- );
3050
- } else {
3051
- log6.warn(
3052
- `failed to inject pending recall \u2014 conversation structure mismatch`
3053
- );
3054
- }
3055
- } else {
3056
- log6.warn(
3057
- `discarding expired pending recall for session ${sessionID.slice(0, 16)}`
3058
- );
3059
- }
3060
- sessionState.pendingRecall = void 0;
3061
- }
3062
- log6.info(
3063
- `turn: session=${sessionID.slice(0, 16)} messages=${req.messages.length} model=${req.model} stream=${req.stream} new=${isNew}`
3064
- );
3065
- const modelSpec = getModelSpec(req.model);
3066
- setModelLimits({ context: modelSpec.context, output: modelSpec.output });
3067
- const cfg = loreConfig4();
3068
- if (cfg.budget.maxLayer0Tokens !== void 0) {
3069
- setMaxLayer0Tokens(cfg.budget.maxLayer0Tokens);
3070
- } else if (modelSpec.cacheReadCost && cfg.budget.targetCacheReadCostPerTurn > 0) {
3071
- setMaxLayer0Tokens(computeLayer0Cap(
3072
- cfg.budget.targetCacheReadCostPerTurn,
3073
- modelSpec.cacheReadCost
3074
- ));
3075
- }
3076
- const thresholdMs = cfg.idleResumeMinutes * 6e4;
3077
- const idleResult = onIdleResume(sessionID, thresholdMs);
3078
- if (idleResult.triggered) {
3079
- ltmSessionCache.delete(sessionID);
3080
- log6.info(
3081
- `session idle ${Math.round(idleResult.idleMs / 6e4)}min \u2014 refreshing caches`
3082
- );
3083
- }
3084
- let modifiedSystem = req.system;
3085
- if (cfg.knowledge.enabled) {
3086
- try {
3087
- let cached = ltmSessionCache.get(sessionID);
3088
- if (!cached) {
3089
- const ltmFraction = cfg.budget.ltm;
3090
- const ltmBudget = getLtmBudget(ltmFraction);
3091
- const entries = ltm2.forSession(projectPath, sessionID, ltmBudget);
3092
- if (entries.length) {
3093
- const formatted = formatKnowledge(
3094
- entries.map((e) => ({
3095
- category: e.category,
3096
- title: e.title,
3097
- content: e.content
3098
- })),
3099
- ltmBudget
3100
- );
3101
- if (formatted) {
3102
- const tokenCount = Math.ceil(formatted.length / 3);
3103
- cached = { formatted, tokenCount };
3104
- ltmSessionCache.set(sessionID, cached);
3105
- }
3106
- }
3107
- }
3108
- if (cached) {
3109
- setLtmTokens(cached.tokenCount, sessionID);
3110
- modifiedSystem = `${req.system}
3111
-
3112
- ${cached.formatted}`;
3113
- } else {
3114
- setLtmTokens(0, sessionID);
3115
- }
3116
- } catch (e) {
3117
- log6.error("LTM injection failed:", e);
3118
- setLtmTokens(0, sessionID);
3119
- } finally {
3120
- consumeCameOutOfIdle(sessionID);
3121
- }
3122
- } else {
3123
- setLtmTokens(0, sessionID);
3124
- consumeCameOutOfIdle(sessionID);
3125
- }
3126
- if (isFirstRun()) {
3127
- modifiedSystem += "\n\n[Lore plugin] This is the first time Lore has been activated. Briefly let the user know that Lore is now active and their coding agent will get progressively smarter on this codebase over time as knowledge accumulates across sessions.";
3128
- }
3129
- if (cfg.knowledge.enabled) {
3130
- const filesToTrack = [".lore.md"];
3131
- if (cfg.agentsFile.enabled) filesToTrack.push(cfg.agentsFile.path);
3132
- modifiedSystem += `
3133
-
3134
- When making git commits, always check if ${filesToTrack.join(" and ")} have unstaged changes and include them in the commit. These files contain shared project knowledge managed by lore and must be version-controlled.`;
3135
- }
3136
- const loreMessages = gatewayMessagesToLore(req.messages, sessionID);
3137
- resolveToolResults(loreMessages);
3138
- const result = transform({
3139
- messages: loreMessages,
3140
- projectPath,
3141
- sessionID
3142
- });
3143
- while (result.messages.length > 0 && result.messages.at(-1).info.role !== "user") {
3144
- const last = result.messages.at(-1);
3145
- const hasToolParts = last.parts.some((p) => p.type === "tool");
3146
- if (hasToolParts) break;
3147
- result.messages.pop();
3148
- }
3149
- const transformedMessages = loreMessagesToGateway(result.messages);
3150
- removeOrphanedToolResults(transformedMessages);
3151
- const modifiedReq = {
3152
- ...req,
3153
- system: modifiedSystem,
3154
- messages: transformedMessages
3155
- };
3156
- if (modifiedReq.tools.length > 0 && !clientHasRecallTool(modifiedReq.tools)) {
3157
- modifiedReq.tools = [...modifiedReq.tools, RECALL_GATEWAY_TOOL];
3158
- }
3159
- const cacheOptions = {
3160
- systemTTL: "5m",
3161
- cacheConversation: true
3162
- };
3163
- const upstreamResponse = await forwardToUpstream(
3164
- modifiedReq,
3165
- config2,
3166
- void 0,
3167
- cacheOptions
3168
- );
3169
- if (!upstreamResponse.ok) {
3170
- const errorBody = await upstreamResponse.text();
3171
- log6.error(
3172
- `upstream error: ${upstreamResponse.status} ${errorBody.slice(0, 500)}`
3173
- );
3174
- return new Response(errorBody, {
3175
- status: upstreamResponse.status,
3176
- headers: { "content-type": "application/json" }
3177
- });
3178
- }
3179
- if (req.stream && upstreamResponse.body) {
3180
- const hasRecallTool = modifiedReq.tools.some(
3181
- (t) => t.name === RECALL_TOOL_NAME
3182
- );
3183
- return buildStreamingResponse(
3184
- upstreamResponse,
3185
- (resp2) => postResponse(req, resp2, sessionState, config2),
3186
- hasRecallTool ? { modifiedReq, config: config2, sessionState, cacheOptions } : void 0
3187
- );
3188
- }
3189
- const resp = await accumulateNonStreamResponse(upstreamResponse);
3190
- if (hasRecallToolUse(resp)) {
3191
- const recallBlock = findRecallToolUse(resp);
3192
- const { result: result2, input } = await executeRecall(
3193
- recallBlock,
3194
- sessionState.projectPath,
3195
- sessionState.sessionID
3196
- );
3197
- if (hasOtherToolUse(resp)) {
3198
- const position = resp.content.indexOf(recallBlock);
3199
- sessionState.pendingRecall = {
3200
- toolUseId: recallBlock.id,
3201
- input,
3202
- position,
3203
- result: result2,
3204
- timestamp: Date.now()
3205
- };
3206
- log6.info(
3207
- `recall (non-stream, mixed): stored pending result for session ${sessionState.sessionID.slice(0, 16)}`
3208
- );
3209
- const cleanResp = stripRecallFromResponse(resp);
3210
- postResponse(req, cleanResp, sessionState, config2);
3211
- return nonStreamHttpResponse(cleanResp);
3212
- }
3213
- log6.info(
3214
- `recall (non-stream, only): executing follow-up for session ${sessionState.sessionID.slice(0, 16)}`
3215
- );
3216
- const followUp = buildRecallFollowUp(modifiedReq, resp, result2, recallBlock);
3217
- const followUpResponse = await forwardToUpstream(
3218
- followUp,
3219
- config2,
3220
- void 0,
3221
- cacheOptions
3222
- );
3223
- if (!followUpResponse.ok) {
3224
- const errorBody = await followUpResponse.text();
3225
- log6.error(
3226
- `recall follow-up upstream error: ${followUpResponse.status} ${errorBody.slice(0, 500)}`
3227
- );
3228
- const cleanResp = stripRecallFromResponse(resp);
3229
- postResponse(req, cleanResp, sessionState, config2);
3230
- return nonStreamHttpResponse(cleanResp);
3231
- }
3232
- const continuationResp = await accumulateNonStreamResponse(followUpResponse);
3233
- continuationResp.usage.inputTokens += resp.usage.inputTokens;
3234
- continuationResp.usage.outputTokens += resp.usage.outputTokens;
3235
- if (resp.usage.cacheReadInputTokens) {
3236
- continuationResp.usage.cacheReadInputTokens = (continuationResp.usage.cacheReadInputTokens ?? 0) + resp.usage.cacheReadInputTokens;
3237
- }
3238
- if (resp.usage.cacheCreationInputTokens) {
3239
- continuationResp.usage.cacheCreationInputTokens = (continuationResp.usage.cacheCreationInputTokens ?? 0) + resp.usage.cacheCreationInputTokens;
3240
- }
3241
- postResponse(req, continuationResp, sessionState, config2);
3242
- return nonStreamHttpResponse(continuationResp);
3243
- }
3244
- postResponse(req, resp, sessionState, config2);
3245
- return nonStreamHttpResponse(resp);
3246
- }
3247
- function loreMessagesToGateway(messages) {
3248
- const out = [];
3249
- let pendingToolResults = [];
3250
- for (const msg of messages) {
3251
- const content = [];
3252
- if (msg.info.role === "user") {
3253
- content.push(...pendingToolResults);
3254
- pendingToolResults = [];
3255
- } else {
3256
- pendingToolResults = [];
3257
- }
3258
- for (const part of msg.parts) {
3259
- switch (part.type) {
3260
- case "text":
3261
- content.push({
3262
- type: "text",
3263
- text: part.text
3264
- });
3265
- break;
3266
- case "reasoning":
3267
- content.push({
3268
- type: "thinking",
3269
- thinking: part.text ?? ""
3270
- });
3271
- break;
3272
- case "tool": {
3273
- const toolPart = part;
3274
- if (toolPart.tool === "result") {
3275
- content.push({
3276
- type: "tool_result",
3277
- toolUseId: toolPart.callID,
3278
- content: toolPart.state.output ?? ""
3279
- });
3280
- } else {
3281
- content.push({
3282
- type: "tool_use",
3283
- id: toolPart.callID,
3284
- name: toolPart.tool,
3285
- input: toolPart.state.input ?? {}
3286
- });
3287
- if (toolPart.state.status === "completed") {
3288
- pendingToolResults.push({
3289
- type: "tool_result",
3290
- toolUseId: toolPart.callID,
3291
- content: toolPart.state.output ?? ""
3292
- });
3293
- } else if (toolPart.state.status === "error") {
3294
- pendingToolResults.push({
3295
- type: "tool_result",
3296
- toolUseId: toolPart.callID,
3297
- content: toolPart.state.error ?? "[error]",
3298
- isError: true
3299
- });
3300
- }
3301
- }
3302
- break;
3303
- }
3304
- // Generic / unknown parts — skip or represent as text
3305
- default:
3306
- if ("text" in part && typeof part.text === "string") {
3307
- content.push({ type: "text", text: part.text });
3308
- }
3309
- break;
3310
- }
3311
- }
3312
- out.push({ role: msg.info.role, content });
3313
- }
3314
- return out;
3315
- }
3316
- function removeOrphanedToolResults(messages) {
3317
- for (let i = 0; i < messages.length; i++) {
3318
- const msg = messages[i];
3319
- if (msg.role !== "user") continue;
3320
- if (!msg.content.some((b) => b.type === "tool_result")) continue;
3321
- const prev = i > 0 && messages[i - 1].role === "assistant" ? messages[i - 1] : null;
3322
- const toolUseIds = new Set(
3323
- (prev?.content ?? []).filter((b) => b.type === "tool_use").map((b) => b.id)
3324
- );
3325
- const before = msg.content.length;
3326
- msg.content = msg.content.filter(
3327
- (b) => b.type !== "tool_result" || toolUseIds.has(b.toolUseId)
3328
- );
3329
- if (msg.content.length < before) {
3330
- log6.warn(
3331
- `removed ${before - msg.content.length} orphaned tool_result block(s) from message ${i}`
3332
- );
3333
- }
3334
- if (msg.content.length === 0) {
3335
- msg.content = [{ type: "text", text: "[tool results provided]" }];
3336
- }
3337
- }
3338
- }
3339
- function errorResponse(status, message) {
3340
- return new Response(
3341
- JSON.stringify({
3342
- type: "error",
3343
- error: {
3344
- type: "server_error",
3345
- message
3346
- }
3347
- }),
3348
- {
3349
- status,
3350
- headers: { "content-type": "application/json" }
3351
- }
3352
- );
3353
- }
3354
- async function handleRequest(req, config2) {
3355
- try {
3356
- const earlyAuth = extractAuth(req.rawHeaders);
3357
- if (earlyAuth) {
3358
- setLastSeenAuth(earlyAuth);
3359
- }
3360
- if (isCompactionRequest(req)) {
3361
- return await handleCompaction(req, config2);
3362
- }
3363
- if (isTitleOrSummaryRequest(req)) {
3364
- return await handlePassthrough(req, config2);
3365
- }
3366
- return await handleConversationTurn(req, config2);
3367
- } catch (err) {
3368
- const message = err instanceof Error ? err.message : "Unknown gateway error";
3369
- log6.error("pipeline error:", err);
3370
- return errorResponse(502, message);
3371
- }
3372
- }
3373
-
3374
- // src/server.ts
3375
- var version = "unknown";
3376
- try {
3377
- const pkg = require_package();
3378
- if (pkg.version) version = pkg.version;
3379
- } catch {
3380
- }
3381
- var CORS_HEADERS = {
3382
- "access-control-allow-origin": "*",
3383
- "access-control-allow-methods": "GET, POST, OPTIONS",
3384
- "access-control-allow-headers": "*",
3385
- "access-control-max-age": "86400"
3386
- };
3387
- function withCors(response) {
3388
- for (const [key, value] of Object.entries(CORS_HEADERS)) {
3389
- response.headers.set(key, value);
3390
- }
3391
- return response;
3392
- }
3393
- function headersToRecord(headers) {
3394
- const record = {};
3395
- headers.forEach((value, key) => {
3396
- record[key] = value;
3397
- });
3398
- return record;
3399
- }
3400
- function jsonResponse(body, status = 200) {
3401
- return withCors(
3402
- new Response(JSON.stringify(body), {
3403
- status,
3404
- headers: { "content-type": "application/json" }
3405
- })
3406
- );
3407
- }
3408
- function errorResponse2(status, type, message) {
3409
- return jsonResponse(
3410
- {
3411
- type: "error",
3412
- error: { type, message }
3413
- },
3414
- status
3415
- );
3416
- }
3417
- async function handleAnthropicMessages(req, config2) {
3418
- let body;
3419
- try {
3420
- body = await req.json();
3421
- } catch {
3422
- return errorResponse2(400, "invalid_request_error", "Invalid JSON body");
3423
- }
3424
- let gatewayReq;
3425
- try {
3426
- gatewayReq = parseAnthropicRequest(body, headersToRecord(req.headers));
3427
- } catch (e) {
3428
- const msg = e instanceof Error ? e.message : "Failed to parse request";
3429
- return errorResponse2(400, "invalid_request_error", msg);
3430
- }
3431
- try {
3432
- const result = await handleRequest(gatewayReq, config2);
3433
- return withCors(result);
3434
- } catch (e) {
3435
- const msg = e instanceof Error ? e.message : "Pipeline error";
3436
- console.error(`[lore] pipeline error: ${msg}`);
3437
- return errorResponse2(502, "api_error", `Gateway pipeline error: ${msg}`);
3438
- }
3439
- }
3440
- async function handleModelsPassthrough(config2) {
3441
- try {
3442
- const upstream = await fetch(`${config2.upstreamAnthropic}/v1/models`, {
3443
- headers: { "content-type": "application/json" }
3444
- });
3445
- const response = new Response(upstream.body, {
3446
- status: upstream.status,
3447
- statusText: upstream.statusText,
3448
- headers: new Headers(upstream.headers)
3449
- });
3450
- return withCors(response);
3451
- } catch (e) {
3452
- const msg = e instanceof Error ? e.message : "Upstream unreachable";
3453
- return errorResponse2(502, "api_error", `Failed to fetch models: ${msg}`);
3454
- }
3455
- }
3456
- function handleHealth() {
3457
- return jsonResponse({ status: "ok", version });
3458
- }
3459
- async function handleOpenAIChatCompletions(req, config2) {
3460
- let body;
3461
- try {
3462
- body = await req.json();
3463
- } catch {
3464
- return errorResponse2(400, "invalid_request_error", "Invalid JSON body");
3465
- }
3466
- let gatewayReq;
3467
- try {
3468
- gatewayReq = parseOpenAIRequest(body, headersToRecord(req.headers));
3469
- } catch (e) {
3470
- const msg = e instanceof Error ? e.message : "Failed to parse request";
3471
- return errorResponse2(400, "invalid_request_error", msg);
3472
- }
3473
- let pipelineResp;
3474
- try {
3475
- pipelineResp = await handleRequest(gatewayReq, config2);
3476
- } catch (e) {
3477
- const msg = e instanceof Error ? e.message : "Pipeline error";
3478
- console.error(`[lore] pipeline error: ${msg}`);
3479
- return errorResponse2(502, "api_error", `Gateway pipeline error: ${msg}`);
3480
- }
3481
- if (!pipelineResp.ok) {
3482
- return withCors(pipelineResp);
3483
- }
3484
- const contentType = pipelineResp.headers.get("content-type") ?? "";
3485
- if (contentType.includes("text/event-stream")) {
3486
- const accumulated = await accumulateSSEResponse(pipelineResp);
3487
- return withCors(buildOpenAIResponse(accumulated, true));
3488
- }
3489
- const respBody = await pipelineResp.json();
3490
- return withCors(buildOpenAIResponse(respBody, false));
3491
- }
3492
- function startServer(config2) {
3493
- const server2 = Bun.serve({
3494
- port: config2.port,
3495
- hostname: config2.host,
3496
- async fetch(req) {
3497
- const url = new URL(req.url);
3498
- const { pathname } = url;
3499
- const method = req.method;
3500
- if (method === "OPTIONS") {
3501
- return withCors(new Response(null, { status: 204 }));
3502
- }
3503
- if (config2.debug) {
3504
- console.error(`[lore] ${method} ${pathname}`);
3505
- }
3506
- try {
3507
- if (method === "POST" && pathname === "/v1/messages") {
3508
- return await handleAnthropicMessages(req, config2);
3509
- }
3510
- if (method === "POST" && pathname === "/v1/chat/completions") {
3511
- return await handleOpenAIChatCompletions(req, config2);
3512
- }
3513
- if (method === "GET" && pathname === "/v1/models") {
3514
- return await handleModelsPassthrough(config2);
3515
- }
3516
- if (method === "GET" && pathname === "/health") {
3517
- return handleHealth();
3518
- }
3519
- return errorResponse2(404, "not_found", `No route for ${method} ${pathname}`);
3520
- } catch (e) {
3521
- const msg = e instanceof Error ? e.message : "Internal server error";
3522
- console.error(`[lore] uncaught error: ${msg}`);
3523
- return errorResponse2(500, "api_error", msg);
3524
- }
3525
- }
3526
- });
3527
- return {
3528
- stop: () => server2.stop(),
3529
- port: server2.port ?? config2.port
3530
- };
3531
- }
3532
-
3533
- // src/index.ts
3534
- var config = loadConfig();
3535
- var server = startServer(config);
3536
- var addr = `http://${config.host}:${server.port}`;
3537
- console.error(`[lore] Gateway listening on ${addr}`);
3538
- console.error(`[lore] Model routing: claude-* \u2192 Anthropic, nvidia/* \u2192 Nvidia NIM, gpt-* \u2192 OpenAI, \u2026`);
3539
- console.error(`[lore] Plugin auto-detects gateway \u2014 just start OpenCode normally`);
3540
- async function shutdown() {
3541
- console.error("[lore] Shutting down\u2026");
3542
- server.stop();
3543
- await resetPipelineState();
3544
- process.exit(0);
3545
- }
3546
- process.on("SIGINT", () => shutdown());
3547
- process.on("SIGTERM", () => shutdown());
3548
- //# sourceMappingURL=index.js.map