@loreai/gateway 0.13.4 → 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,110 +0,0 @@
1
- /**
2
- * Gateway LLM adapter: implements LLMClient via direct Anthropic API calls.
3
- * Used by Lore's background workers (distillation, curation, query expansion)
4
- * running inside the gateway process.
5
- */
6
-
7
- import type { LLMClient } from "@loreai/core";
8
- import { log } from "@loreai/core";
9
- import type { AuthCredential } from "./auth";
10
- import { authHeaders } from "./auth";
11
-
12
- // ---------------------------------------------------------------------------
13
- // Worker call tracking
14
- // ---------------------------------------------------------------------------
15
-
16
- /** Tracks worker session IDs so temporal capture can skip them. */
17
- export const activeWorkerCalls = new Set<string>();
18
-
19
- // ---------------------------------------------------------------------------
20
- // LLMClient factory
21
- // ---------------------------------------------------------------------------
22
-
23
- /**
24
- * Create an LLMClient that sends single-turn prompts directly to Anthropic.
25
- *
26
- * @param upstreamUrl Base URL of the upstream Anthropic endpoint
27
- * @param getAuth Callback to resolve auth credentials (per-session → global fallback)
28
- * @param defaultModel Default model to use when no override is specified
29
- */
30
- export function createGatewayLLMClient(
31
- upstreamUrl: string,
32
- getAuth: (sessionID?: string) => AuthCredential | null,
33
- defaultModel: { providerID: string; modelID: string },
34
- ): LLMClient {
35
- return {
36
- async prompt(system, user, opts) {
37
- const cred = getAuth(opts?.sessionID);
38
- if (!cred) {
39
- log.warn("no auth credentials available for worker call");
40
- return null;
41
- }
42
-
43
- const model = opts?.model ?? defaultModel;
44
- const url = `${upstreamUrl.replace(/\/$/, "")}/v1/messages`;
45
-
46
- // Track this call so temporal capture can skip it
47
- const callID = `gw-worker-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
48
- activeWorkerCalls.add(callID);
49
-
50
- try {
51
- // System prompt caching for workers: send as block array with 1h TTL.
52
- // Worker calls come in bursts (distillation, curation) separated by
53
- // minutes of user thinking — 5m TTL expires between bursts, but 1h
54
- // survives. The system prompt (DISTILLATION_SYSTEM, etc.) is static
55
- // across all calls → near-100% cache hit rate after the first write.
56
- // Cost: 1.25× base for the initial write, 0.1× for subsequent reads.
57
- const systemPayload = system
58
- ? [
59
- {
60
- type: "text",
61
- text: system,
62
- cache_control: { type: "ephemeral", ttl: "1h" },
63
- },
64
- ]
65
- : undefined;
66
-
67
- const response = await fetch(url, {
68
- method: "POST",
69
- headers: {
70
- "Content-Type": "application/json",
71
- "anthropic-version": "2023-06-01",
72
- ...authHeaders(cred),
73
- },
74
- // opts.thinking is intentionally not forwarded — this bare API
75
- // call never includes the `thinking` parameter so Anthropic
76
- // models won't produce thinking tokens regardless.
77
- body: JSON.stringify({
78
- model: model.modelID,
79
- max_tokens: 8192,
80
- system: systemPayload ?? system,
81
- messages: [{ role: "user", content: user }],
82
- }),
83
- });
84
-
85
- if (!response.ok) {
86
- const text = await response.text().catch(() => "(no body)");
87
- log.error(
88
- `worker upstream request failed: ${response.status} ${response.statusText} — ${text}`,
89
- );
90
- return null;
91
- }
92
-
93
- const data = (await response.json()) as {
94
- content?: Array<{ type: string; text?: string }>;
95
- };
96
-
97
- const textBlock = data.content?.find(
98
- (b) => b.type === "text" && typeof b.text === "string",
99
- );
100
-
101
- return textBlock?.text ?? null;
102
- } catch (e) {
103
- log.error("worker prompt failed:", e);
104
- return null;
105
- } finally {
106
- activeWorkerCalls.delete(callID);
107
- }
108
- },
109
- };
110
- }