@loreai/gateway 0.14.0 → 0.14.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin.cjs +27 -0
- package/dist/index.cjs +1042 -0
- package/dist/index.d.cts +21 -0
- package/package.json +10 -10
- package/dist/index.js +0 -50087
- package/src/auth.ts +0 -133
- package/src/batch-queue.ts +0 -575
- package/src/cache-analytics.ts +0 -344
- package/src/cli/agents.ts +0 -107
- package/src/cli/bin.ts +0 -11
- package/src/cli/help.ts +0 -55
- package/src/cli/lib/binary.ts +0 -353
- package/src/cli/lib/bspatch.ts +0 -306
- package/src/cli/lib/delta-upgrade.ts +0 -790
- package/src/cli/lib/errors.ts +0 -48
- package/src/cli/lib/ghcr.ts +0 -389
- package/src/cli/lib/patch-cache.ts +0 -342
- package/src/cli/lib/upgrade.ts +0 -454
- package/src/cli/lib/version-check.ts +0 -385
- package/src/cli/main.ts +0 -152
- package/src/cli/run.ts +0 -181
- package/src/cli/start.ts +0 -82
- package/src/cli/upgrade.ts +0 -311
- package/src/cli/version.ts +0 -22
- package/src/compaction.ts +0 -195
- package/src/config.ts +0 -199
- package/src/idle.ts +0 -240
- package/src/index.ts +0 -41
- package/src/llm-adapter.ts +0 -182
- package/src/pipeline.ts +0 -1681
- package/src/recall.ts +0 -433
- package/src/recorder.ts +0 -192
- package/src/server.ts +0 -250
- package/src/session.ts +0 -207
- package/src/stream/anthropic.ts +0 -708
- package/src/temporal-adapter.ts +0 -310
- package/src/translate/anthropic.ts +0 -469
- package/src/translate/openai.ts +0 -536
- package/src/translate/types.ts +0 -222
- package/src/worker-model.ts +0 -408
package/src/llm-adapter.ts
DELETED
|
@@ -1,182 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Gateway LLM adapter: implements LLMClient via direct Anthropic API calls.
|
|
3
|
-
* Used by Lore's background workers (distillation, curation, query expansion)
|
|
4
|
-
* running inside the gateway process.
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
import type { LLMClient } from "@loreai/core";
|
|
8
|
-
import { log } from "@loreai/core";
|
|
9
|
-
import type { AuthCredential } from "./auth";
|
|
10
|
-
import { authHeaders } from "./auth";
|
|
11
|
-
|
|
12
|
-
// ---------------------------------------------------------------------------
|
|
13
|
-
// Worker call tracking
|
|
14
|
-
// ---------------------------------------------------------------------------
|
|
15
|
-
|
|
16
|
-
/** Tracks worker session IDs so temporal capture can skip them. */
|
|
17
|
-
export const activeWorkerCalls = new Set<string>();
|
|
18
|
-
|
|
19
|
-
// ---------------------------------------------------------------------------
|
|
20
|
-
// Retry helpers
|
|
21
|
-
// ---------------------------------------------------------------------------
|
|
22
|
-
|
|
23
|
-
/** HTTP status codes that are transient and worth retrying. */
|
|
24
|
-
const TRANSIENT_CODES = new Set([429, 500, 502, 503, 529]);
|
|
25
|
-
const MAX_RETRIES = 3;
|
|
26
|
-
|
|
27
|
-
/** Parse the Retry-After header into milliseconds, or null if absent/invalid. */
|
|
28
|
-
function parseRetryAfter(response: Response): number | null {
|
|
29
|
-
const header = response.headers.get("retry-after");
|
|
30
|
-
if (!header) return null;
|
|
31
|
-
const seconds = Number(header);
|
|
32
|
-
if (!Number.isNaN(seconds)) return seconds * 1000;
|
|
33
|
-
const date = Date.parse(header);
|
|
34
|
-
if (!Number.isNaN(date)) return Math.max(0, date - Date.now());
|
|
35
|
-
return null;
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
/** Compute delay for a retry attempt, respecting Retry-After on the first try. */
|
|
39
|
-
function backoffMs(attempt: number, retryAfterMs: number | null): number {
|
|
40
|
-
if (attempt === 0 && retryAfterMs != null)
|
|
41
|
-
return Math.min(retryAfterMs, 30_000); // cap Retry-After at 30s
|
|
42
|
-
return Math.min(1000 * 2 ** attempt, 8000); // 1s, 2s, 4s, capped at 8s
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
function sleep(ms: number): Promise<void> {
|
|
46
|
-
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
// ---------------------------------------------------------------------------
|
|
50
|
-
// LLMClient factory
|
|
51
|
-
// ---------------------------------------------------------------------------
|
|
52
|
-
|
|
53
|
-
/**
|
|
54
|
-
* Create an LLMClient that sends single-turn prompts directly to Anthropic.
|
|
55
|
-
*
|
|
56
|
-
* @param upstreamUrl Base URL of the upstream Anthropic endpoint
|
|
57
|
-
* @param getAuth Callback to resolve auth credentials (per-session → global fallback)
|
|
58
|
-
* @param defaultModel Default model to use when no override is specified
|
|
59
|
-
*/
|
|
60
|
-
export function createGatewayLLMClient(
|
|
61
|
-
upstreamUrl: string,
|
|
62
|
-
getAuth: (sessionID?: string) => AuthCredential | null,
|
|
63
|
-
defaultModel: { providerID: string; modelID: string },
|
|
64
|
-
): LLMClient {
|
|
65
|
-
return {
|
|
66
|
-
async prompt(system, user, opts) {
|
|
67
|
-
const cred = getAuth(opts?.sessionID);
|
|
68
|
-
if (!cred) {
|
|
69
|
-
log.warn("no auth credentials available for worker call");
|
|
70
|
-
return null;
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
const model = opts?.model ?? defaultModel;
|
|
74
|
-
const url = `${upstreamUrl.replace(/\/$/, "")}/v1/messages`;
|
|
75
|
-
|
|
76
|
-
// Track this call so temporal capture can skip it
|
|
77
|
-
const callID = `gw-worker-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
78
|
-
activeWorkerCalls.add(callID);
|
|
79
|
-
|
|
80
|
-
try {
|
|
81
|
-
// System prompt caching for workers: send as block array with 1h TTL.
|
|
82
|
-
// Worker calls come in bursts (distillation, curation) separated by
|
|
83
|
-
// minutes of user thinking — 5m TTL expires between bursts, but 1h
|
|
84
|
-
// survives. The system prompt (DISTILLATION_SYSTEM, etc.) is static
|
|
85
|
-
// across all calls → near-100% cache hit rate after the first write.
|
|
86
|
-
// Cost: 1.25× base for the initial write, 0.1× for subsequent reads.
|
|
87
|
-
const systemPayload = system
|
|
88
|
-
? [
|
|
89
|
-
{
|
|
90
|
-
type: "text",
|
|
91
|
-
text: system,
|
|
92
|
-
cache_control: { type: "ephemeral", ttl: "1h" },
|
|
93
|
-
},
|
|
94
|
-
]
|
|
95
|
-
: undefined;
|
|
96
|
-
|
|
97
|
-
const body = JSON.stringify({
|
|
98
|
-
model: model.modelID,
|
|
99
|
-
max_tokens: 8192,
|
|
100
|
-
system: systemPayload ?? system,
|
|
101
|
-
messages: [{ role: "user", content: user }],
|
|
102
|
-
});
|
|
103
|
-
|
|
104
|
-
const headers = {
|
|
105
|
-
"Content-Type": "application/json",
|
|
106
|
-
"anthropic-version": "2023-06-01",
|
|
107
|
-
...authHeaders(cred),
|
|
108
|
-
};
|
|
109
|
-
|
|
110
|
-
// Retry loop for transient errors (429, 5xx)
|
|
111
|
-
for (let attempt = 0; ; attempt++) {
|
|
112
|
-
let response: Response;
|
|
113
|
-
try {
|
|
114
|
-
response = await fetch(url, {
|
|
115
|
-
method: "POST",
|
|
116
|
-
headers,
|
|
117
|
-
// opts.thinking is intentionally not forwarded — this bare API
|
|
118
|
-
// call never includes the `thinking` parameter so Anthropic
|
|
119
|
-
// models won't produce thinking tokens regardless.
|
|
120
|
-
body,
|
|
121
|
-
});
|
|
122
|
-
} catch (e) {
|
|
123
|
-
// Network/fetch error — retry if attempts remain
|
|
124
|
-
if (attempt < MAX_RETRIES) {
|
|
125
|
-
const delay = backoffMs(attempt, null);
|
|
126
|
-
log.warn(
|
|
127
|
-
`worker request network error (attempt ${attempt + 1}/${MAX_RETRIES + 1}), retrying in ${delay}ms`,
|
|
128
|
-
);
|
|
129
|
-
await sleep(delay);
|
|
130
|
-
continue;
|
|
131
|
-
}
|
|
132
|
-
throw e; // exhausted retries — rethrow to outer catch
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
if (response.ok) {
|
|
136
|
-
const data = (await response.json()) as {
|
|
137
|
-
content?: Array<{ type: string; text?: string }>;
|
|
138
|
-
};
|
|
139
|
-
|
|
140
|
-
const textBlock = data.content?.find(
|
|
141
|
-
(b) => b.type === "text" && typeof b.text === "string",
|
|
142
|
-
);
|
|
143
|
-
|
|
144
|
-
return textBlock?.text ?? null;
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
// Non-transient error — fail immediately, no retry
|
|
148
|
-
if (!TRANSIENT_CODES.has(response.status)) {
|
|
149
|
-
const text = await response.text().catch(() => "(no body)");
|
|
150
|
-
log.error(
|
|
151
|
-
`worker upstream request failed: ${response.status} ${response.statusText} — ${text}`,
|
|
152
|
-
);
|
|
153
|
-
return null;
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
// Transient error — retry if attempts remain
|
|
157
|
-
if (attempt < MAX_RETRIES) {
|
|
158
|
-
const retryAfter = parseRetryAfter(response);
|
|
159
|
-
const delay = backoffMs(attempt, retryAfter);
|
|
160
|
-
log.warn(
|
|
161
|
-
`worker upstream ${response.status} (attempt ${attempt + 1}/${MAX_RETRIES + 1}), retrying in ${delay}ms`,
|
|
162
|
-
);
|
|
163
|
-
await sleep(delay);
|
|
164
|
-
continue;
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
// Exhausted retries
|
|
168
|
-
const text = await response.text().catch(() => "(no body)");
|
|
169
|
-
log.error(
|
|
170
|
-
`worker upstream request failed after ${MAX_RETRIES + 1} attempts: ${response.status} ${response.statusText} — ${text}`,
|
|
171
|
-
);
|
|
172
|
-
return null;
|
|
173
|
-
}
|
|
174
|
-
} catch (e) {
|
|
175
|
-
log.error("worker prompt failed:", e);
|
|
176
|
-
return null;
|
|
177
|
-
} finally {
|
|
178
|
-
activeWorkerCalls.delete(callID);
|
|
179
|
-
}
|
|
180
|
-
},
|
|
181
|
-
};
|
|
182
|
-
}
|