@loreai/gateway 0.13.4 → 0.14.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin.cjs +27 -0
- package/dist/index.cjs +1042 -0
- package/dist/index.d.cts +21 -0
- package/package.json +21 -13
- package/dist/index.js +0 -3548
- package/dist/index.js.map +0 -7
- package/src/auth.ts +0 -133
- package/src/batch-queue.ts +0 -555
- package/src/compaction.ts +0 -195
- package/src/config.ts +0 -199
- package/src/idle.ts +0 -246
- package/src/index.ts +0 -41
- package/src/llm-adapter.ts +0 -110
- package/src/pipeline.ts +0 -1604
- package/src/recall.ts +0 -301
- package/src/recorder.ts +0 -192
- package/src/server.ts +0 -250
- package/src/session.ts +0 -207
- package/src/stream/anthropic.ts +0 -708
- package/src/temporal-adapter.ts +0 -307
- package/src/translate/anthropic.ts +0 -425
- package/src/translate/openai.ts +0 -536
- package/src/translate/types.ts +0 -177
- package/src/worker-model.ts +0 -408
package/src/worker-model.ts
DELETED
|
@@ -1,408 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Gateway worker model discovery and resolution.
|
|
3
|
-
*
|
|
4
|
-
* Discovers available models from the upstream Anthropic `/v1/models` API,
|
|
5
|
-
* fetches per-model pricing from models.dev (open-source model database),
|
|
6
|
-
* and integrates with core's worker model validation/resolution pipeline.
|
|
7
|
-
*
|
|
8
|
-
* This replaces the OpenCode adapter's `getProviderModels()` +
|
|
9
|
-
* `maybeValidateWorkerModel()` — the gateway is the universal path and
|
|
10
|
-
* doesn't depend on the OpenCode SDK's model listing (which can report
|
|
11
|
-
* deprecated models as "active").
|
|
12
|
-
*/
|
|
13
|
-
|
|
14
|
-
import {
|
|
15
|
-
workerModel,
|
|
16
|
-
temporal,
|
|
17
|
-
distillation as distillationMod,
|
|
18
|
-
config as loreConfig,
|
|
19
|
-
log,
|
|
20
|
-
} from "@loreai/core";
|
|
21
|
-
import type { LLMClient } from "@loreai/core";
|
|
22
|
-
import type { AuthCredential } from "./auth";
|
|
23
|
-
import { authHeaders } from "./auth";
|
|
24
|
-
|
|
25
|
-
// ---------------------------------------------------------------------------
|
|
26
|
-
// Cost lookup — models.dev with hardcoded fallback
|
|
27
|
-
// ---------------------------------------------------------------------------
|
|
28
|
-
|
|
29
|
-
/**
|
|
30
|
-
* models.dev JSON API endpoint — returns all providers/models with pricing.
|
|
31
|
-
*
|
|
32
|
-
* Single request replaces N individual TOML fetches. Response shape:
|
|
33
|
-
* { anthropic: { models: { "claude-sonnet-4-20250514": { cost: { input: 3 }, ... }, ... } } }
|
|
34
|
-
* Cost values are per-million-token USD.
|
|
35
|
-
*/
|
|
36
|
-
const MODELS_DEV_API = "https://models.dev/api.json";
|
|
37
|
-
|
|
38
|
-
/** Cached models.dev cost data: modelID → per-million-token input cost. */
|
|
39
|
-
let cachedCostMap: Map<string, number> | null = null;
|
|
40
|
-
let cachedCostMapAt = 0;
|
|
41
|
-
const COST_CACHE_TTL_MS = 60 * 60 * 1000; // 1 hour
|
|
42
|
-
|
|
43
|
-
/**
|
|
44
|
-
* Hardcoded fallback costs (per-input-token, USD) used when models.dev
|
|
45
|
-
* API is unreachable. Prefix-matched against model IDs.
|
|
46
|
-
*
|
|
47
|
-
* These only serve as a safety net — runtime pricing from models.dev is
|
|
48
|
-
* preferred and fetched on every discovery cycle (cached 1h).
|
|
49
|
-
*/
|
|
50
|
-
const FALLBACK_COSTS: Array<{ prefix: string; inputCostPerToken: number }> = [
|
|
51
|
-
{ prefix: "claude-opus-4", inputCostPerToken: 15 / 1_000_000 },
|
|
52
|
-
{ prefix: "claude-sonnet-4", inputCostPerToken: 3 / 1_000_000 },
|
|
53
|
-
{ prefix: "claude-haiku-4", inputCostPerToken: 1 / 1_000_000 },
|
|
54
|
-
{ prefix: "claude-haiku-3-5", inputCostPerToken: 0.8 / 1_000_000 },
|
|
55
|
-
{ prefix: "claude-sonnet-3-5", inputCostPerToken: 3 / 1_000_000 },
|
|
56
|
-
{ prefix: "claude-3-haiku", inputCostPerToken: 0.25 / 1_000_000 },
|
|
57
|
-
{ prefix: "claude-3-sonnet", inputCostPerToken: 3 / 1_000_000 },
|
|
58
|
-
{ prefix: "claude-3-opus", inputCostPerToken: 15 / 1_000_000 },
|
|
59
|
-
];
|
|
60
|
-
|
|
61
|
-
function fallbackCost(modelID: string): number {
|
|
62
|
-
for (const { prefix, inputCostPerToken } of FALLBACK_COSTS) {
|
|
63
|
-
if (modelID.startsWith(prefix)) return inputCostPerToken;
|
|
64
|
-
}
|
|
65
|
-
// Unknown model — assume expensive so it doesn't get picked as a worker
|
|
66
|
-
return 100 / 1_000_000;
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
/** Shape of a model entry in the models.dev JSON API. */
|
|
70
|
-
type ModelsDevEntry = {
|
|
71
|
-
id: string;
|
|
72
|
-
cost?: { input?: number };
|
|
73
|
-
};
|
|
74
|
-
|
|
75
|
-
/** Shape of the models.dev JSON API response (subset we care about). */
|
|
76
|
-
type ModelsDevResponse = {
|
|
77
|
-
[provider: string]: {
|
|
78
|
-
models?: { [modelId: string]: ModelsDevEntry };
|
|
79
|
-
};
|
|
80
|
-
};
|
|
81
|
-
|
|
82
|
-
/**
|
|
83
|
-
* Fetch the models.dev cost map for Anthropic models.
|
|
84
|
-
*
|
|
85
|
-
* Single HTTP request to the JSON API, cached for 1 hour.
|
|
86
|
-
* Returns a map of modelID → per-million-token input cost.
|
|
87
|
-
*/
|
|
88
|
-
export async function fetchCostMap(): Promise<Map<string, number>> {
|
|
89
|
-
// Return cache if fresh
|
|
90
|
-
if (cachedCostMap && Date.now() - cachedCostMapAt < COST_CACHE_TTL_MS) {
|
|
91
|
-
return cachedCostMap;
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
try {
|
|
95
|
-
const controller = new AbortController();
|
|
96
|
-
const timeout = setTimeout(() => controller.abort(), 10_000);
|
|
97
|
-
|
|
98
|
-
const response = await fetch(MODELS_DEV_API, { signal: controller.signal });
|
|
99
|
-
clearTimeout(timeout);
|
|
100
|
-
|
|
101
|
-
if (!response.ok) {
|
|
102
|
-
log.warn(`models.dev API failed: ${response.status} ${response.statusText}`);
|
|
103
|
-
return cachedCostMap ?? new Map();
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
const data = (await response.json()) as ModelsDevResponse;
|
|
107
|
-
const anthropic = data.anthropic?.models;
|
|
108
|
-
if (!anthropic) {
|
|
109
|
-
log.warn("models.dev API: no anthropic provider found");
|
|
110
|
-
return cachedCostMap ?? new Map();
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
const costMap = new Map<string, number>();
|
|
114
|
-
for (const [modelId, entry] of Object.entries(anthropic)) {
|
|
115
|
-
if (entry.cost?.input != null) {
|
|
116
|
-
costMap.set(modelId, entry.cost.input);
|
|
117
|
-
}
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
cachedCostMap = costMap;
|
|
121
|
-
cachedCostMapAt = Date.now();
|
|
122
|
-
|
|
123
|
-
log.info(`models.dev: loaded costs for ${costMap.size} anthropic models`);
|
|
124
|
-
return costMap;
|
|
125
|
-
} catch (e) {
|
|
126
|
-
log.warn("models.dev API error:", e);
|
|
127
|
-
return cachedCostMap ?? new Map();
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
/** Clear the cached cost map (for testing). */
|
|
132
|
-
export function clearCostCache(): void {
|
|
133
|
-
cachedCostMap = null;
|
|
134
|
-
cachedCostMapAt = 0;
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
/**
|
|
138
|
-
* Fetch per-model input cost from models.dev JSON API.
|
|
139
|
-
*
|
|
140
|
-
* Single HTTP request fetches all Anthropic model costs. Returns a map of
|
|
141
|
-
* modelID → per-token cost. Models not found in models.dev get fallback costs.
|
|
142
|
-
*/
|
|
143
|
-
export async function fetchModelCosts(
|
|
144
|
-
modelIDs: string[],
|
|
145
|
-
): Promise<Map<string, number>> {
|
|
146
|
-
const costMap = await fetchCostMap();
|
|
147
|
-
const costs = new Map<string, number>();
|
|
148
|
-
|
|
149
|
-
for (const id of modelIDs) {
|
|
150
|
-
const costPerMillion = costMap.get(id);
|
|
151
|
-
if (costPerMillion != null) {
|
|
152
|
-
costs.set(id, costPerMillion / 1_000_000);
|
|
153
|
-
} else {
|
|
154
|
-
costs.set(id, fallbackCost(id));
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
return costs;
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
// ---------------------------------------------------------------------------
|
|
162
|
-
// Anthropic /v1/models API types (subset we care about)
|
|
163
|
-
// ---------------------------------------------------------------------------
|
|
164
|
-
|
|
165
|
-
type AnthropicModelEntry = {
|
|
166
|
-
id: string;
|
|
167
|
-
display_name: string;
|
|
168
|
-
created_at: string;
|
|
169
|
-
capabilities?: {
|
|
170
|
-
thinking?: { supported: boolean };
|
|
171
|
-
};
|
|
172
|
-
};
|
|
173
|
-
|
|
174
|
-
type AnthropicModelsResponse = {
|
|
175
|
-
data: AnthropicModelEntry[];
|
|
176
|
-
has_more: boolean;
|
|
177
|
-
last_id?: string;
|
|
178
|
-
};
|
|
179
|
-
|
|
180
|
-
// ---------------------------------------------------------------------------
|
|
181
|
-
// Model discovery — fetch from upstream /v1/models
|
|
182
|
-
// ---------------------------------------------------------------------------
|
|
183
|
-
|
|
184
|
-
/** Cached model list with TTL. */
|
|
185
|
-
let cachedModels: workerModel.ModelInfo[] | null = null;
|
|
186
|
-
let cachedModelsAt = 0;
|
|
187
|
-
const MODEL_CACHE_TTL_MS = 60 * 60 * 1000; // 1 hour
|
|
188
|
-
|
|
189
|
-
/**
|
|
190
|
-
* Fetch available Anthropic models from the upstream API.
|
|
191
|
-
*
|
|
192
|
-
* Results are cached for 1 hour — model listings change rarely and we
|
|
193
|
-
* don't want to hit the API on every idle cycle.
|
|
194
|
-
*
|
|
195
|
-
* Unlike the OpenCode SDK's `provider.list()`, the Anthropic `/v1/models`
|
|
196
|
-
* API only returns models that actually exist — deprecated models are
|
|
197
|
-
* removed, so we never get stale entries like `claude-3-haiku-20240307`.
|
|
198
|
-
*/
|
|
199
|
-
export async function discoverModels(
|
|
200
|
-
upstreamUrl: string,
|
|
201
|
-
cred: AuthCredential,
|
|
202
|
-
): Promise<workerModel.ModelInfo[]> {
|
|
203
|
-
// Return cache if fresh
|
|
204
|
-
if (cachedModels && Date.now() - cachedModelsAt < MODEL_CACHE_TTL_MS) {
|
|
205
|
-
return cachedModels;
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
try {
|
|
209
|
-
const entries: AnthropicModelEntry[] = [];
|
|
210
|
-
let afterId: string | undefined;
|
|
211
|
-
|
|
212
|
-
// Paginate through all models
|
|
213
|
-
do {
|
|
214
|
-
const url = new URL(`${upstreamUrl}/v1/models`);
|
|
215
|
-
url.searchParams.set("limit", "1000");
|
|
216
|
-
if (afterId) url.searchParams.set("after_id", afterId);
|
|
217
|
-
|
|
218
|
-
const response = await fetch(url.toString(), {
|
|
219
|
-
headers: {
|
|
220
|
-
"content-type": "application/json",
|
|
221
|
-
"anthropic-version": "2023-06-01",
|
|
222
|
-
...authHeaders(cred),
|
|
223
|
-
},
|
|
224
|
-
});
|
|
225
|
-
|
|
226
|
-
if (!response.ok) {
|
|
227
|
-
const text = await response.text().catch(() => "(no body)");
|
|
228
|
-
log.warn(
|
|
229
|
-
`model discovery failed: ${response.status} ${response.statusText} — ${text}`,
|
|
230
|
-
);
|
|
231
|
-
return cachedModels ?? [];
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
const data = (await response.json()) as AnthropicModelsResponse;
|
|
235
|
-
|
|
236
|
-
for (const entry of data.data) {
|
|
237
|
-
entries.push(entry);
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
afterId = data.has_more ? data.last_id : undefined;
|
|
241
|
-
} while (afterId);
|
|
242
|
-
|
|
243
|
-
// Fetch costs from models.dev in parallel (with fallback to hardcoded)
|
|
244
|
-
const modelIDs = entries.map((e) => e.id);
|
|
245
|
-
const costs = await fetchModelCosts(modelIDs);
|
|
246
|
-
|
|
247
|
-
const models: workerModel.ModelInfo[] = entries.map((entry) => ({
|
|
248
|
-
id: entry.id,
|
|
249
|
-
providerID: "anthropic",
|
|
250
|
-
cost: { input: costs.get(entry.id) ?? fallbackCost(entry.id) },
|
|
251
|
-
status: "active", // Only active models are returned by the API
|
|
252
|
-
capabilities: {
|
|
253
|
-
input: { text: true }, // All Anthropic models accept text
|
|
254
|
-
reasoning: entry.capabilities?.thinking?.supported ?? false,
|
|
255
|
-
},
|
|
256
|
-
}));
|
|
257
|
-
|
|
258
|
-
cachedModels = models;
|
|
259
|
-
cachedModelsAt = Date.now();
|
|
260
|
-
|
|
261
|
-
log.info(
|
|
262
|
-
`model discovery: found ${models.length} models (${models.map((m) => m.id).join(", ")})`,
|
|
263
|
-
);
|
|
264
|
-
|
|
265
|
-
return models;
|
|
266
|
-
} catch (e) {
|
|
267
|
-
log.warn("model discovery error:", e);
|
|
268
|
-
return cachedModels ?? [];
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
/** Clear the cached model list (for testing). */
|
|
273
|
-
export function clearModelCache(): void {
|
|
274
|
-
cachedModels = null;
|
|
275
|
-
cachedModelsAt = 0;
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
// ---------------------------------------------------------------------------
|
|
279
|
-
// Worker model validation — gateway version of maybeValidateWorkerModel
|
|
280
|
-
// ---------------------------------------------------------------------------
|
|
281
|
-
|
|
282
|
-
/** Guard against concurrent validation runs. */
|
|
283
|
-
let validating = false;
|
|
284
|
-
|
|
285
|
-
/**
|
|
286
|
-
* Run worker model validation if needed.
|
|
287
|
-
*
|
|
288
|
-
* Called on session idle — discovers available models, selects candidates,
|
|
289
|
-
* checks if the stored validation is stale, and runs the two-phase
|
|
290
|
-
* comparison (structural check + LLM judge) if needed.
|
|
291
|
-
*
|
|
292
|
-
* @param sessionModel The model ID being used for conversation (frontier)
|
|
293
|
-
* @param upstreamUrl Anthropic API base URL
|
|
294
|
-
* @param cred Auth credential for API calls
|
|
295
|
-
* @param llm LLM client for validation prompts
|
|
296
|
-
* @param projectPath Project directory path
|
|
297
|
-
* @param sessionID Session ID for loading reference distillation data
|
|
298
|
-
*/
|
|
299
|
-
export async function maybeValidateWorkerModel(
|
|
300
|
-
sessionModel: string,
|
|
301
|
-
upstreamUrl: string,
|
|
302
|
-
cred: AuthCredential,
|
|
303
|
-
llm: LLMClient,
|
|
304
|
-
projectPath: string,
|
|
305
|
-
sessionID: string,
|
|
306
|
-
): Promise<void> {
|
|
307
|
-
if (validating) return;
|
|
308
|
-
|
|
309
|
-
const cfg = loreConfig();
|
|
310
|
-
if (cfg.workerModel) return; // explicit override — skip auto-selection
|
|
311
|
-
|
|
312
|
-
const models = await discoverModels(upstreamUrl, cred);
|
|
313
|
-
if (models.length === 0) return;
|
|
314
|
-
|
|
315
|
-
// Build the session model info for candidate selection.
|
|
316
|
-
// Use cost from discovered models if available, otherwise fallback.
|
|
317
|
-
const discoveredModel = models.find((m) => m.id === sessionModel);
|
|
318
|
-
const sessionModelInfo: Parameters<typeof workerModel.selectWorkerCandidates>[0] = {
|
|
319
|
-
id: sessionModel,
|
|
320
|
-
providerID: "anthropic",
|
|
321
|
-
cost: { input: discoveredModel?.cost.input ?? fallbackCost(sessionModel) },
|
|
322
|
-
};
|
|
323
|
-
|
|
324
|
-
const candidates = workerModel.selectWorkerCandidates(sessionModelInfo, models);
|
|
325
|
-
if (candidates.length === 0) return;
|
|
326
|
-
// If session model is already the cheapest, no comparison needed
|
|
327
|
-
if (candidates.length === 1 && candidates[0].id === sessionModel) return;
|
|
328
|
-
|
|
329
|
-
const fingerprint = workerModel.computeModelFingerprint(
|
|
330
|
-
"anthropic",
|
|
331
|
-
sessionModel,
|
|
332
|
-
models.filter((m) => m.providerID === "anthropic").map((m) => m.id),
|
|
333
|
-
);
|
|
334
|
-
|
|
335
|
-
const stored = workerModel.getValidatedWorkerModel("anthropic");
|
|
336
|
-
if (!workerModel.isValidationStale(stored, fingerprint)) return;
|
|
337
|
-
|
|
338
|
-
// Need reference distillation data
|
|
339
|
-
const distillations = distillationMod.loadForSession(projectPath, sessionID, true);
|
|
340
|
-
const gen0 = distillations.filter((d) => d.generation === 0);
|
|
341
|
-
if (gen0.length === 0) return;
|
|
342
|
-
|
|
343
|
-
const reference = gen0[gen0.length - 1]; // most recent gen-0
|
|
344
|
-
const sourceIds = reference.source_ids;
|
|
345
|
-
if (sourceIds.length === 0) return;
|
|
346
|
-
|
|
347
|
-
// Load source temporal messages
|
|
348
|
-
const allMessages = temporal.bySession(projectPath, sessionID);
|
|
349
|
-
const sourceSet = new Set(sourceIds);
|
|
350
|
-
const sourceMessages = allMessages.filter((m) => sourceSet.has(m.id));
|
|
351
|
-
if (sourceMessages.length === 0) return;
|
|
352
|
-
|
|
353
|
-
const messagesText = sourceMessages.map((m) => m.content).join("\n");
|
|
354
|
-
const date = new Date(sourceMessages[0].created_at).toLocaleDateString(
|
|
355
|
-
"en-US",
|
|
356
|
-
{ year: "numeric", month: "long", day: "numeric" },
|
|
357
|
-
);
|
|
358
|
-
|
|
359
|
-
validating = true;
|
|
360
|
-
try {
|
|
361
|
-
const result = await workerModel.runValidation({
|
|
362
|
-
llm,
|
|
363
|
-
providerID: "anthropic",
|
|
364
|
-
sessionModelID: sessionModel,
|
|
365
|
-
candidates,
|
|
366
|
-
referenceObservations: reference.observations,
|
|
367
|
-
sourceMessagesText: messagesText,
|
|
368
|
-
date,
|
|
369
|
-
});
|
|
370
|
-
if (result) {
|
|
371
|
-
log.info(
|
|
372
|
-
`worker model validated: ${result.modelID} (judge=${result.judgeScore}) — saving 50%+ on worker calls`,
|
|
373
|
-
);
|
|
374
|
-
}
|
|
375
|
-
} catch (e) {
|
|
376
|
-
log.error("worker model validation error:", e);
|
|
377
|
-
} finally {
|
|
378
|
-
validating = false;
|
|
379
|
-
}
|
|
380
|
-
}
|
|
381
|
-
|
|
382
|
-
// ---------------------------------------------------------------------------
|
|
383
|
-
// Resolution — wrapper around core's resolveWorkerModel
|
|
384
|
-
// ---------------------------------------------------------------------------
|
|
385
|
-
|
|
386
|
-
/**
|
|
387
|
-
* Resolve the effective worker model for background calls.
|
|
388
|
-
*
|
|
389
|
-
* Checks (in order):
|
|
390
|
-
* 1. Explicit config override (`workerModel` in lore config)
|
|
391
|
-
* 2. Validated auto-selection from kv_meta (with 24h TTL)
|
|
392
|
-
* 3. Config model fallback (frontier model)
|
|
393
|
-
*/
|
|
394
|
-
export function getWorkerModel(): { providerID: string; modelID: string } | undefined {
|
|
395
|
-
const cfg = loreConfig();
|
|
396
|
-
return workerModel.resolveWorkerModel(
|
|
397
|
-
"anthropic",
|
|
398
|
-
cfg.workerModel,
|
|
399
|
-
cfg.model,
|
|
400
|
-
);
|
|
401
|
-
}
|
|
402
|
-
|
|
403
|
-
/** Reset module state (for testing). */
|
|
404
|
-
export function resetWorkerModelState(): void {
|
|
405
|
-
clearModelCache();
|
|
406
|
-
clearCostCache();
|
|
407
|
-
validating = false;
|
|
408
|
-
}
|