@loreai/core 0.15.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -0
- package/dist/bun/agents-file.d.ts +13 -1
- package/dist/bun/agents-file.d.ts.map +1 -1
- package/dist/bun/config.d.ts +20 -1
- package/dist/bun/config.d.ts.map +1 -1
- package/dist/bun/data.d.ts +174 -0
- package/dist/bun/data.d.ts.map +1 -0
- package/dist/bun/db.d.ts +65 -0
- package/dist/bun/db.d.ts.map +1 -1
- package/dist/bun/distillation.d.ts +49 -6
- package/dist/bun/distillation.d.ts.map +1 -1
- package/dist/bun/embedding-vendor.d.ts +66 -0
- package/dist/bun/embedding-vendor.d.ts.map +1 -0
- package/dist/bun/embedding-worker-types.d.ts +66 -0
- package/dist/bun/embedding-worker-types.d.ts.map +1 -0
- package/dist/bun/embedding-worker.d.ts +16 -0
- package/dist/bun/embedding-worker.d.ts.map +1 -0
- package/dist/bun/embedding-worker.js +100 -0
- package/dist/bun/embedding-worker.js.map +7 -0
- package/dist/bun/embedding.d.ts +91 -8
- package/dist/bun/embedding.d.ts.map +1 -1
- package/dist/bun/git.d.ts +47 -0
- package/dist/bun/git.d.ts.map +1 -0
- package/dist/bun/gradient.d.ts +19 -1
- package/dist/bun/gradient.d.ts.map +1 -1
- package/dist/bun/index.d.ts +9 -6
- package/dist/bun/index.d.ts.map +1 -1
- package/dist/bun/index.js +13205 -11259
- package/dist/bun/index.js.map +4 -4
- package/dist/bun/lat-reader.d.ts +1 -1
- package/dist/bun/lat-reader.d.ts.map +1 -1
- package/dist/bun/ltm.d.ts.map +1 -1
- package/dist/bun/markdown.d.ts +11 -0
- package/dist/bun/markdown.d.ts.map +1 -1
- package/dist/bun/prompt.d.ts +1 -1
- package/dist/bun/prompt.d.ts.map +1 -1
- package/dist/bun/recall.d.ts +53 -0
- package/dist/bun/recall.d.ts.map +1 -1
- package/dist/bun/search.d.ts +29 -0
- package/dist/bun/search.d.ts.map +1 -1
- package/dist/bun/temporal.d.ts +2 -0
- package/dist/bun/temporal.d.ts.map +1 -1
- package/dist/bun/types.d.ts +15 -0
- package/dist/bun/types.d.ts.map +1 -1
- package/dist/bun/worker-model.d.ts +15 -80
- package/dist/bun/worker-model.d.ts.map +1 -1
- package/dist/node/agents-file.d.ts +13 -1
- package/dist/node/agents-file.d.ts.map +1 -1
- package/dist/node/config.d.ts +20 -1
- package/dist/node/config.d.ts.map +1 -1
- package/dist/node/data.d.ts +174 -0
- package/dist/node/data.d.ts.map +1 -0
- package/dist/node/db.d.ts +65 -0
- package/dist/node/db.d.ts.map +1 -1
- package/dist/node/distillation.d.ts +49 -6
- package/dist/node/distillation.d.ts.map +1 -1
- package/dist/node/embedding-vendor.d.ts +66 -0
- package/dist/node/embedding-vendor.d.ts.map +1 -0
- package/dist/node/embedding-worker-types.d.ts +66 -0
- package/dist/node/embedding-worker-types.d.ts.map +1 -0
- package/dist/node/embedding-worker.d.ts +16 -0
- package/dist/node/embedding-worker.d.ts.map +1 -0
- package/dist/node/embedding-worker.js +100 -0
- package/dist/node/embedding-worker.js.map +7 -0
- package/dist/node/embedding.d.ts +91 -8
- package/dist/node/embedding.d.ts.map +1 -1
- package/dist/node/git.d.ts +47 -0
- package/dist/node/git.d.ts.map +1 -0
- package/dist/node/gradient.d.ts +19 -1
- package/dist/node/gradient.d.ts.map +1 -1
- package/dist/node/index.d.ts +9 -6
- package/dist/node/index.d.ts.map +1 -1
- package/dist/node/index.js +13205 -11259
- package/dist/node/index.js.map +4 -4
- package/dist/node/lat-reader.d.ts +1 -1
- package/dist/node/lat-reader.d.ts.map +1 -1
- package/dist/node/ltm.d.ts.map +1 -1
- package/dist/node/markdown.d.ts +11 -0
- package/dist/node/markdown.d.ts.map +1 -1
- package/dist/node/prompt.d.ts +1 -1
- package/dist/node/prompt.d.ts.map +1 -1
- package/dist/node/recall.d.ts +53 -0
- package/dist/node/recall.d.ts.map +1 -1
- package/dist/node/search.d.ts +29 -0
- package/dist/node/search.d.ts.map +1 -1
- package/dist/node/temporal.d.ts +2 -0
- package/dist/node/temporal.d.ts.map +1 -1
- package/dist/node/types.d.ts +15 -0
- package/dist/node/types.d.ts.map +1 -1
- package/dist/node/worker-model.d.ts +15 -80
- package/dist/node/worker-model.d.ts.map +1 -1
- package/dist/types/agents-file.d.ts +13 -1
- package/dist/types/agents-file.d.ts.map +1 -1
- package/dist/types/config.d.ts +20 -1
- package/dist/types/config.d.ts.map +1 -1
- package/dist/types/data.d.ts +174 -0
- package/dist/types/data.d.ts.map +1 -0
- package/dist/types/db.d.ts +65 -0
- package/dist/types/db.d.ts.map +1 -1
- package/dist/types/distillation.d.ts +49 -6
- package/dist/types/distillation.d.ts.map +1 -1
- package/dist/types/embedding-vendor.d.ts +66 -0
- package/dist/types/embedding-vendor.d.ts.map +1 -0
- package/dist/types/embedding-worker-types.d.ts +66 -0
- package/dist/types/embedding-worker-types.d.ts.map +1 -0
- package/dist/types/embedding-worker.d.ts +16 -0
- package/dist/types/embedding-worker.d.ts.map +1 -0
- package/dist/types/embedding.d.ts +91 -8
- package/dist/types/embedding.d.ts.map +1 -1
- package/dist/types/git.d.ts +47 -0
- package/dist/types/git.d.ts.map +1 -0
- package/dist/types/gradient.d.ts +19 -1
- package/dist/types/gradient.d.ts.map +1 -1
- package/dist/types/index.d.ts +9 -6
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/lat-reader.d.ts +1 -1
- package/dist/types/lat-reader.d.ts.map +1 -1
- package/dist/types/ltm.d.ts.map +1 -1
- package/dist/types/markdown.d.ts +11 -0
- package/dist/types/markdown.d.ts.map +1 -1
- package/dist/types/prompt.d.ts +1 -1
- package/dist/types/prompt.d.ts.map +1 -1
- package/dist/types/recall.d.ts +53 -0
- package/dist/types/recall.d.ts.map +1 -1
- package/dist/types/search.d.ts +29 -0
- package/dist/types/search.d.ts.map +1 -1
- package/dist/types/temporal.d.ts +2 -0
- package/dist/types/temporal.d.ts.map +1 -1
- package/dist/types/types.d.ts +15 -0
- package/dist/types/types.d.ts.map +1 -1
- package/dist/types/worker-model.d.ts +15 -80
- package/dist/types/worker-model.d.ts.map +1 -1
- package/package.json +5 -2
- package/src/agents-file.ts +87 -4
- package/src/config.ts +68 -5
- package/src/curator.ts +2 -2
- package/src/data.ts +768 -0
- package/src/db.ts +386 -7
- package/src/distillation.ts +178 -35
- package/src/embedding-vendor.ts +102 -0
- package/src/embedding-worker-types.ts +82 -0
- package/src/embedding-worker.ts +185 -0
- package/src/embedding.ts +607 -61
- package/src/git.ts +144 -0
- package/src/gradient.ts +174 -17
- package/src/index.ts +20 -0
- package/src/lat-reader.ts +5 -11
- package/src/ltm.ts +17 -44
- package/src/markdown.ts +15 -0
- package/src/prompt.ts +1 -2
- package/src/recall.ts +401 -70
- package/src/search.ts +71 -1
- package/src/temporal.ts +42 -35
- package/src/types.ts +15 -0
- package/src/worker-model.ts +17 -363
package/src/temporal.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { db, ensureProject } from "./db";
|
|
2
|
-
import { ftsQuery,
|
|
2
|
+
import { ftsQuery, EMPTY_QUERY, runRelaxedSearch } from "./search";
|
|
3
3
|
import { sanitizeSurrogates } from "./markdown";
|
|
4
|
+
import * as embedding from "./embedding";
|
|
4
5
|
import type { LoreMessage, LorePart } from "./types";
|
|
5
6
|
import { isTextPart, isReasoningPart, isToolPart } from "./types";
|
|
6
7
|
|
|
@@ -93,6 +94,10 @@ export function store(input: {
|
|
|
93
94
|
messageMetadata(input.info, input.parts),
|
|
94
95
|
input.info.id,
|
|
95
96
|
);
|
|
97
|
+
// Re-embed on content update (fire-and-forget)
|
|
98
|
+
if (embedding.isAvailable()) {
|
|
99
|
+
embedding.embedTemporalMessage(input.info.id, content);
|
|
100
|
+
}
|
|
96
101
|
return;
|
|
97
102
|
}
|
|
98
103
|
|
|
@@ -111,6 +116,11 @@ export function store(input: {
|
|
|
111
116
|
input.info.time.created,
|
|
112
117
|
messageMetadata(input.info, input.parts),
|
|
113
118
|
);
|
|
119
|
+
|
|
120
|
+
// Embed new message for vector search (fire-and-forget)
|
|
121
|
+
if (embedding.isAvailable()) {
|
|
122
|
+
embedding.embedTemporalMessage(input.info.id, content);
|
|
123
|
+
}
|
|
114
124
|
}
|
|
115
125
|
|
|
116
126
|
export type TemporalMessage = {
|
|
@@ -156,7 +166,7 @@ export function markDistilled(ids: string[]) {
|
|
|
156
166
|
const placeholders = ids.map(() => "?").join(",");
|
|
157
167
|
db()
|
|
158
168
|
.query(
|
|
159
|
-
`UPDATE temporal_messages SET distilled = 1 WHERE id IN (${placeholders})`,
|
|
169
|
+
`UPDATE temporal_messages SET distilled = 1, embedding = NULL WHERE id IN (${placeholders})`,
|
|
160
170
|
)
|
|
161
171
|
.run(...ids);
|
|
162
172
|
}
|
|
@@ -194,8 +204,6 @@ export function search(input: {
|
|
|
194
204
|
}): TemporalMessage[] {
|
|
195
205
|
const pid = ensureProject(input.projectPath);
|
|
196
206
|
const limit = input.limit ?? 20;
|
|
197
|
-
const q = ftsQuery(input.query);
|
|
198
|
-
if (q === EMPTY_QUERY) return [];
|
|
199
207
|
|
|
200
208
|
const ftsSQL = input.sessionID
|
|
201
209
|
? `SELECT m.* FROM temporal_fts f
|
|
@@ -206,24 +214,14 @@ export function search(input: {
|
|
|
206
214
|
CROSS JOIN temporal_messages m ON m.rowid = f.rowid
|
|
207
215
|
WHERE f.content MATCH ? AND m.project_id = ?
|
|
208
216
|
ORDER BY rank LIMIT ?`;
|
|
209
|
-
|
|
210
|
-
? [q, pid, input.sessionID, limit]
|
|
211
|
-
: [q, pid, limit];
|
|
217
|
+
|
|
212
218
|
try {
|
|
213
|
-
|
|
214
|
-
.
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
const qOr = ftsQueryOr(input.query);
|
|
220
|
-
if (qOr === EMPTY_QUERY) return [];
|
|
221
|
-
const paramsOr = input.sessionID
|
|
222
|
-
? [qOr, pid, input.sessionID, limit]
|
|
223
|
-
: [qOr, pid, limit];
|
|
224
|
-
return db()
|
|
225
|
-
.query(ftsSQL)
|
|
226
|
-
.all(...paramsOr) as TemporalMessage[];
|
|
219
|
+
return runRelaxedSearch(input.query, (matchExpr) => {
|
|
220
|
+
const params = input.sessionID
|
|
221
|
+
? [matchExpr, pid, input.sessionID, limit]
|
|
222
|
+
: [matchExpr, pid, limit];
|
|
223
|
+
return db().query(ftsSQL).all(...params) as TemporalMessage[];
|
|
224
|
+
});
|
|
227
225
|
} catch {
|
|
228
226
|
// FTS5 still choked (edge case) — fall back to LIKE search
|
|
229
227
|
return searchLike({
|
|
@@ -249,8 +247,6 @@ export function searchScored(input: {
|
|
|
249
247
|
}): ScoredTemporalMessage[] {
|
|
250
248
|
const pid = ensureProject(input.projectPath);
|
|
251
249
|
const limit = input.limit ?? 20;
|
|
252
|
-
const q = ftsQuery(input.query);
|
|
253
|
-
if (q === EMPTY_QUERY) return [];
|
|
254
250
|
|
|
255
251
|
const ftsSQL = input.sessionID
|
|
256
252
|
? `SELECT m.*, rank FROM temporal_fts f
|
|
@@ -261,20 +257,14 @@ export function searchScored(input: {
|
|
|
261
257
|
CROSS JOIN temporal_messages m ON m.rowid = f.rowid
|
|
262
258
|
WHERE f.content MATCH ? AND m.project_id = ?
|
|
263
259
|
ORDER BY rank LIMIT ?`;
|
|
264
|
-
const params = input.sessionID
|
|
265
|
-
? [q, pid, input.sessionID, limit]
|
|
266
|
-
: [q, pid, limit];
|
|
267
260
|
|
|
268
261
|
try {
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
? [qOr, pid, input.sessionID, limit]
|
|
276
|
-
: [qOr, pid, limit];
|
|
277
|
-
return db().query(ftsSQL).all(...paramsOr) as ScoredTemporalMessage[];
|
|
262
|
+
return runRelaxedSearch(input.query, (matchExpr) => {
|
|
263
|
+
const params = input.sessionID
|
|
264
|
+
? [matchExpr, pid, input.sessionID, limit]
|
|
265
|
+
: [matchExpr, pid, limit];
|
|
266
|
+
return db().query(ftsSQL).all(...params) as ScoredTemporalMessage[];
|
|
267
|
+
});
|
|
278
268
|
} catch {
|
|
279
269
|
return [];
|
|
280
270
|
}
|
|
@@ -348,6 +338,23 @@ export function undistilledCount(
|
|
|
348
338
|
).count;
|
|
349
339
|
}
|
|
350
340
|
|
|
341
|
+
/** Sum of estimated tokens across undistilled messages for a project/session. */
|
|
342
|
+
export function undistilledTokens(
|
|
343
|
+
projectPath: string,
|
|
344
|
+
sessionID?: string,
|
|
345
|
+
): number {
|
|
346
|
+
const pid = ensureProject(projectPath);
|
|
347
|
+
const query = sessionID
|
|
348
|
+
? "SELECT COALESCE(SUM(tokens), 0) as total FROM temporal_messages WHERE project_id = ? AND session_id = ? AND distilled = 0"
|
|
349
|
+
: "SELECT COALESCE(SUM(tokens), 0) as total FROM temporal_messages WHERE project_id = ? AND distilled = 0";
|
|
350
|
+
const params = sessionID ? [pid, sessionID] : [pid];
|
|
351
|
+
return (
|
|
352
|
+
db()
|
|
353
|
+
.query(query)
|
|
354
|
+
.get(...params) as { total: number }
|
|
355
|
+
).total;
|
|
356
|
+
}
|
|
357
|
+
|
|
351
358
|
export type PruneResult = {
|
|
352
359
|
/** Rows deleted by the TTL pass (distilled=1 AND older than retention period). */
|
|
353
360
|
ttlDeleted: number;
|
package/src/types.ts
CHANGED
|
@@ -245,6 +245,21 @@ export interface LLMClient {
|
|
|
245
245
|
* auth through their own mechanisms.
|
|
246
246
|
*/
|
|
247
247
|
sessionID?: string;
|
|
248
|
+
/**
|
|
249
|
+
* Maximum output tokens for this call. When absent, the adapter
|
|
250
|
+
* uses its built-in default (typically 8192).
|
|
251
|
+
*
|
|
252
|
+
* Worker call sites should set this explicitly based on expected
|
|
253
|
+
* output size to avoid wasting tokens on unnecessarily large
|
|
254
|
+
* output budgets.
|
|
255
|
+
*
|
|
256
|
+
* Adapter behavior:
|
|
257
|
+
* - Gateway: uses as `max_tokens` in the API request body
|
|
258
|
+
* - Pi: passes as `maxTokens` to `complete()`
|
|
259
|
+
* - OpenCode: cannot honor — SDK has no maxTokens on session.prompt();
|
|
260
|
+
* the field is silently ignored
|
|
261
|
+
*/
|
|
262
|
+
maxTokens?: number;
|
|
248
263
|
},
|
|
249
264
|
): Promise<string | null>;
|
|
250
265
|
}
|
package/src/worker-model.ts
CHANGED
|
@@ -1,25 +1,22 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* Worker model resolution.
|
|
3
3
|
*
|
|
4
|
-
* Background workers (distillation, curation, query expansion)
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
* Phase 2: LLM judge (session model rates candidate output vs reference)
|
|
4
|
+
* Background workers (distillation, curation, query expansion) default to
|
|
5
|
+
* sonnet-4-6 when the session model is more expensive ($1.50+/M input).
|
|
6
|
+
* Sonnet-4-6 produces equivalent-quality distillations at lower cost.
|
|
7
|
+
* An explicit `workerModel` config override takes priority over this default.
|
|
9
8
|
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
9
|
+
* Resolution order:
|
|
10
|
+
* 1. Explicit config override (`workerModel`)
|
|
11
|
+
* 2. Cost-aware default (sonnet-4 for expensive session models)
|
|
12
|
+
* 3. Session model fallback (same model as the conversation)
|
|
12
13
|
*/
|
|
13
14
|
|
|
14
|
-
import { db } from "./db";
|
|
15
|
-
import { sha256 } from "#db/driver";
|
|
16
|
-
import * as log from "./log";
|
|
17
|
-
|
|
18
15
|
// ---------------------------------------------------------------------------
|
|
19
|
-
// Types
|
|
16
|
+
// Types (kept for config compatibility)
|
|
20
17
|
// ---------------------------------------------------------------------------
|
|
21
18
|
|
|
22
|
-
/** Minimal model info
|
|
19
|
+
/** Minimal model info — kept for downstream consumers. */
|
|
23
20
|
export type ModelInfo = {
|
|
24
21
|
id: string;
|
|
25
22
|
providerID: string;
|
|
@@ -32,369 +29,26 @@ export type ModelInfo = {
|
|
|
32
29
|
};
|
|
33
30
|
};
|
|
34
31
|
|
|
35
|
-
/** Result of a worker model validation stored in kv_meta. */
|
|
36
|
-
export type WorkerModelResult = {
|
|
37
|
-
modelID: string;
|
|
38
|
-
providerID: string;
|
|
39
|
-
fingerprint: string;
|
|
40
|
-
validatedAt: number;
|
|
41
|
-
judgeScore: number | null; // null = structural-only (no judge run yet)
|
|
42
|
-
};
|
|
43
|
-
|
|
44
|
-
const KV_PREFIX = "lore:worker_model:";
|
|
45
|
-
|
|
46
|
-
// ---------------------------------------------------------------------------
|
|
47
|
-
// Candidate selection
|
|
48
|
-
// ---------------------------------------------------------------------------
|
|
49
|
-
|
|
50
|
-
/**
|
|
51
|
-
* Select worker model candidates from the available models.
|
|
52
|
-
*
|
|
53
|
-
* Returns up to 2 candidates: cheapest overall + one tier below the session
|
|
54
|
-
* model. The session model itself is included (if it's the cheapest, the list
|
|
55
|
-
* has 1 entry and no comparison is needed).
|
|
56
|
-
*/
|
|
57
|
-
export function selectWorkerCandidates(
|
|
58
|
-
sessionModel: { id: string; providerID: string; cost: { input: number } },
|
|
59
|
-
providerModels: ModelInfo[],
|
|
60
|
-
): ModelInfo[] {
|
|
61
|
-
// Filter: same provider, active, text-capable
|
|
62
|
-
const eligible = providerModels.filter(
|
|
63
|
-
(m) =>
|
|
64
|
-
m.providerID === sessionModel.providerID &&
|
|
65
|
-
m.status === "active" &&
|
|
66
|
-
m.capabilities.input.text,
|
|
67
|
-
);
|
|
68
|
-
|
|
69
|
-
if (eligible.length === 0) return [];
|
|
70
|
-
|
|
71
|
-
// Sort by cost ascending, then prefer non-reasoning models at equal cost.
|
|
72
|
-
// Non-reasoning models don't produce thinking tokens, avoiding wasted spend
|
|
73
|
-
// on tokens that background workers discard.
|
|
74
|
-
const sorted = [...eligible].sort((a, b) => {
|
|
75
|
-
const costDiff = a.cost.input - b.cost.input;
|
|
76
|
-
if (costDiff !== 0) return costDiff;
|
|
77
|
-
// At equal cost, non-reasoning (0) sorts before reasoning (1)
|
|
78
|
-
const aReasoning = a.capabilities.reasoning ? 1 : 0;
|
|
79
|
-
const bReasoning = b.capabilities.reasoning ? 1 : 0;
|
|
80
|
-
return aReasoning - bReasoning;
|
|
81
|
-
});
|
|
82
|
-
|
|
83
|
-
// Cheapest overall
|
|
84
|
-
const cheapest = sorted[0];
|
|
85
|
-
|
|
86
|
-
// One tier below session model: the most expensive model that's still
|
|
87
|
-
// cheaper than the session model. If session IS cheapest, this is undefined.
|
|
88
|
-
const belowSession = sorted
|
|
89
|
-
.filter((m) => m.cost.input < sessionModel.cost.input)
|
|
90
|
-
.pop(); // last = most expensive among cheaper ones
|
|
91
|
-
|
|
92
|
-
// Deduplicate
|
|
93
|
-
const candidates = new Map<string, ModelInfo>();
|
|
94
|
-
candidates.set(cheapest.id, cheapest);
|
|
95
|
-
if (belowSession && belowSession.id !== cheapest.id) {
|
|
96
|
-
candidates.set(belowSession.id, belowSession);
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
// If session model is the cheapest, return just it
|
|
100
|
-
if (cheapest.id === sessionModel.id || cheapest.cost.input >= sessionModel.cost.input) {
|
|
101
|
-
return [cheapest];
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
return [...candidates.values()];
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
// ---------------------------------------------------------------------------
|
|
108
|
-
// Fingerprinting
|
|
109
|
-
// ---------------------------------------------------------------------------
|
|
110
|
-
|
|
111
|
-
/**
|
|
112
|
-
* Compute a fingerprint from the model landscape. Changes when:
|
|
113
|
-
* - Models are added or removed from the provider
|
|
114
|
-
* - The session model changes
|
|
115
|
-
*/
|
|
116
|
-
export function computeModelFingerprint(
|
|
117
|
-
providerID: string,
|
|
118
|
-
sessionModelID: string,
|
|
119
|
-
activeModelIDs: string[],
|
|
120
|
-
): string {
|
|
121
|
-
const sorted = [...activeModelIDs].sort();
|
|
122
|
-
return sha256(
|
|
123
|
-
JSON.stringify({ providerID, sessionModelID, modelIDs: sorted }),
|
|
124
|
-
);
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
// ---------------------------------------------------------------------------
|
|
128
|
-
// Persistence
|
|
129
|
-
// ---------------------------------------------------------------------------
|
|
130
|
-
|
|
131
|
-
export function getValidatedWorkerModel(
|
|
132
|
-
providerID: string,
|
|
133
|
-
): WorkerModelResult | null {
|
|
134
|
-
const row = db()
|
|
135
|
-
.query("SELECT value FROM kv_meta WHERE key = ?")
|
|
136
|
-
.get(`${KV_PREFIX}${providerID}`) as { value: string } | null;
|
|
137
|
-
if (!row) return null;
|
|
138
|
-
try {
|
|
139
|
-
return JSON.parse(row.value) as WorkerModelResult;
|
|
140
|
-
} catch {
|
|
141
|
-
return null;
|
|
142
|
-
}
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
export function storeValidatedWorkerModel(result: WorkerModelResult): void {
|
|
146
|
-
const key = `${KV_PREFIX}${result.providerID}`;
|
|
147
|
-
const value = JSON.stringify(result);
|
|
148
|
-
db()
|
|
149
|
-
.query(
|
|
150
|
-
"INSERT INTO kv_meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value = ?",
|
|
151
|
-
)
|
|
152
|
-
.run(key, value, value);
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
/** Clear a stored worker model validation (e.g. when the model is deprecated). */
|
|
156
|
-
export function clearValidatedWorkerModel(providerID: string): void {
|
|
157
|
-
db().query("DELETE FROM kv_meta WHERE key = ?").run(`${KV_PREFIX}${providerID}`);
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
/**
|
|
161
|
-
* Check whether the stored validation is stale (fingerprint mismatch).
|
|
162
|
-
*/
|
|
163
|
-
export function isValidationStale(
|
|
164
|
-
stored: WorkerModelResult | null,
|
|
165
|
-
currentFingerprint: string,
|
|
166
|
-
): boolean {
|
|
167
|
-
if (!stored) return true;
|
|
168
|
-
return stored.fingerprint !== currentFingerprint;
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
// ---------------------------------------------------------------------------
|
|
172
|
-
// Structural validation
|
|
173
|
-
// ---------------------------------------------------------------------------
|
|
174
|
-
|
|
175
|
-
export type StructuralCheckResult = {
|
|
176
|
-
passed: boolean;
|
|
177
|
-
observationCount: number;
|
|
178
|
-
tokenCount: number;
|
|
179
|
-
reason?: string;
|
|
180
|
-
};
|
|
181
|
-
|
|
182
|
-
/**
|
|
183
|
-
* Structural quality check: does the candidate distillation output meet
|
|
184
|
-
* minimum quality thresholds relative to the reference?
|
|
185
|
-
*/
|
|
186
|
-
export function structuralCheck(
|
|
187
|
-
candidateObservations: string | null,
|
|
188
|
-
referenceObservations: string,
|
|
189
|
-
): StructuralCheckResult {
|
|
190
|
-
if (candidateObservations == null || candidateObservations.length === 0) {
|
|
191
|
-
return { passed: false, observationCount: 0, tokenCount: 0, reason: candidateObservations === null ? "parse_failed" : "empty" };
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
// Count observation lines (non-empty lines starting with common markers)
|
|
195
|
-
const countObs = (text: string) =>
|
|
196
|
-
text.split("\n").filter((l) => l.trim().length > 0).length;
|
|
197
|
-
|
|
198
|
-
const refCount = countObs(referenceObservations);
|
|
199
|
-
const candCount = countObs(candidateObservations);
|
|
200
|
-
const candTokens = Math.ceil(candidateObservations.length / 3);
|
|
201
|
-
|
|
202
|
-
// Observation count within ±50% of reference
|
|
203
|
-
if (refCount > 0 && (candCount < refCount * 0.5 || candCount > refCount * 1.5)) {
|
|
204
|
-
return {
|
|
205
|
-
passed: false,
|
|
206
|
-
observationCount: candCount,
|
|
207
|
-
tokenCount: candTokens,
|
|
208
|
-
reason: `observation_count_${candCount}_vs_ref_${refCount}`,
|
|
209
|
-
};
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
// Not degenerate: not empty, not >3x reference size
|
|
213
|
-
const refTokens = Math.ceil(referenceObservations.length / 3);
|
|
214
|
-
if (candTokens === 0) {
|
|
215
|
-
return { passed: false, observationCount: candCount, tokenCount: candTokens, reason: "empty" };
|
|
216
|
-
}
|
|
217
|
-
if (refTokens > 0 && candTokens > refTokens * 3) {
|
|
218
|
-
return {
|
|
219
|
-
passed: false,
|
|
220
|
-
observationCount: candCount,
|
|
221
|
-
tokenCount: candTokens,
|
|
222
|
-
reason: `token_count_${candTokens}_vs_ref_${refTokens}_3x`,
|
|
223
|
-
};
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
return { passed: true, observationCount: candCount, tokenCount: candTokens };
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
// ---------------------------------------------------------------------------
|
|
230
|
-
// Judge prompt
|
|
231
|
-
// ---------------------------------------------------------------------------
|
|
232
|
-
|
|
233
|
-
export const WORKER_JUDGE_SYSTEM = `You are evaluating distillation quality. You will be given a REFERENCE distillation (produced by a capable model) and a CANDIDATE distillation (produced by a cheaper model) of the same conversation segment.
|
|
234
|
-
|
|
235
|
-
Rate the candidate on a scale of 1-5:
|
|
236
|
-
5 = Captures all key facts and decisions, equivalent to reference
|
|
237
|
-
4 = Captures most facts, minor omissions
|
|
238
|
-
3 = Captures the essential facts, some detail loss acceptable
|
|
239
|
-
2 = Missing important facts or technical details
|
|
240
|
-
1 = Significantly incomplete or inaccurate
|
|
241
|
-
|
|
242
|
-
Respond with ONLY a single digit (1-5).`;
|
|
243
|
-
|
|
244
|
-
export function workerJudgeUser(
|
|
245
|
-
reference: string,
|
|
246
|
-
candidate: string,
|
|
247
|
-
): string {
|
|
248
|
-
return `<reference>\n${reference}\n</reference>\n\n<candidate>\n${candidate}\n</candidate>`;
|
|
249
|
-
}
|
|
250
|
-
|
|
251
|
-
/** Parse the judge's score from a response. Returns null on parse failure. */
|
|
252
|
-
export function parseJudgeScore(response: string): number | null {
|
|
253
|
-
const match = response.trim().match(/^([1-5])/);
|
|
254
|
-
if (!match) return null;
|
|
255
|
-
return parseInt(match[1], 10);
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
// ---------------------------------------------------------------------------
|
|
259
|
-
// Validation orchestration
|
|
260
|
-
// ---------------------------------------------------------------------------
|
|
261
|
-
|
|
262
|
-
import { DISTILLATION_SYSTEM, distillationUser } from "./prompt";
|
|
263
|
-
import type { LLMClient } from "./types";
|
|
264
|
-
|
|
265
|
-
export type ValidationInput = {
|
|
266
|
-
llm: LLMClient;
|
|
267
|
-
providerID: string;
|
|
268
|
-
sessionModelID: string;
|
|
269
|
-
candidates: ModelInfo[];
|
|
270
|
-
/** Recent gen-0 distillation to use as reference (observations text). */
|
|
271
|
-
referenceObservations: string;
|
|
272
|
-
/** Source messages text for re-running distillation with candidates. */
|
|
273
|
-
sourceMessagesText: string;
|
|
274
|
-
/** Date string for the distillation prompt. */
|
|
275
|
-
date: string;
|
|
276
|
-
};
|
|
277
|
-
|
|
278
|
-
/**
|
|
279
|
-
* Run the two-phase quality validation for worker model candidates.
|
|
280
|
-
* Returns the cheapest passing candidate, or null if none pass.
|
|
281
|
-
*/
|
|
282
|
-
export async function runValidation(
|
|
283
|
-
input: ValidationInput,
|
|
284
|
-
): Promise<WorkerModelResult | null> {
|
|
285
|
-
const { llm, candidates, referenceObservations, sourceMessagesText, date } = input;
|
|
286
|
-
|
|
287
|
-
const userPrompt = distillationUser({
|
|
288
|
-
messages: sourceMessagesText,
|
|
289
|
-
date,
|
|
290
|
-
});
|
|
291
|
-
|
|
292
|
-
for (const candidate of candidates) {
|
|
293
|
-
// Skip the session model — it produced the reference, no need to test
|
|
294
|
-
if (candidate.id === input.sessionModelID) continue;
|
|
295
|
-
|
|
296
|
-
// Phase 1: run distillation with candidate model
|
|
297
|
-
let candidateObservations: string | null = null;
|
|
298
|
-
try {
|
|
299
|
-
const raw = await llm.prompt(DISTILLATION_SYSTEM, userPrompt, {
|
|
300
|
-
model: { providerID: candidate.providerID, modelID: candidate.id },
|
|
301
|
-
workerID: "lore-distill",
|
|
302
|
-
thinking: false,
|
|
303
|
-
});
|
|
304
|
-
if (raw) {
|
|
305
|
-
// Parse <observations>...</observations> block
|
|
306
|
-
const match = raw.match(/<observations>([\s\S]*?)<\/observations>/);
|
|
307
|
-
candidateObservations = match ? match[1].trim() : raw.trim();
|
|
308
|
-
}
|
|
309
|
-
} catch (e) {
|
|
310
|
-
log.warn(`worker model validation: candidate ${candidate.id} failed:`, e);
|
|
311
|
-
continue;
|
|
312
|
-
}
|
|
313
|
-
|
|
314
|
-
const structural = structuralCheck(candidateObservations, referenceObservations);
|
|
315
|
-
if (!structural.passed) {
|
|
316
|
-
log.info(
|
|
317
|
-
`worker model validation: ${candidate.id} failed structural check: ${structural.reason}`,
|
|
318
|
-
);
|
|
319
|
-
continue;
|
|
320
|
-
}
|
|
321
|
-
|
|
322
|
-
// Phase 2: LLM judge (using session model)
|
|
323
|
-
let judgeScore: number | null = null;
|
|
324
|
-
try {
|
|
325
|
-
const judgeResponse = await llm.prompt(
|
|
326
|
-
WORKER_JUDGE_SYSTEM,
|
|
327
|
-
workerJudgeUser(referenceObservations, candidateObservations!),
|
|
328
|
-
{ workerID: "lore-distill", thinking: false }, // use session model (no model override)
|
|
329
|
-
);
|
|
330
|
-
if (judgeResponse) {
|
|
331
|
-
judgeScore = parseJudgeScore(judgeResponse);
|
|
332
|
-
}
|
|
333
|
-
} catch (e) {
|
|
334
|
-
log.warn(`worker model validation: judge call failed for ${candidate.id}:`, e);
|
|
335
|
-
}
|
|
336
|
-
|
|
337
|
-
if (judgeScore !== null && judgeScore < 3) {
|
|
338
|
-
log.info(
|
|
339
|
-
`worker model validation: ${candidate.id} failed judge (score=${judgeScore})`,
|
|
340
|
-
);
|
|
341
|
-
continue;
|
|
342
|
-
}
|
|
343
|
-
|
|
344
|
-
// Candidate passed both phases
|
|
345
|
-
const fingerprint = computeModelFingerprint(
|
|
346
|
-
input.providerID,
|
|
347
|
-
input.sessionModelID,
|
|
348
|
-
candidates.map((c) => c.id),
|
|
349
|
-
);
|
|
350
|
-
|
|
351
|
-
const result: WorkerModelResult = {
|
|
352
|
-
modelID: candidate.id,
|
|
353
|
-
providerID: candidate.providerID,
|
|
354
|
-
fingerprint,
|
|
355
|
-
validatedAt: Date.now(),
|
|
356
|
-
judgeScore,
|
|
357
|
-
};
|
|
358
|
-
storeValidatedWorkerModel(result);
|
|
359
|
-
log.info(
|
|
360
|
-
`worker model validated: ${candidate.id} (judge=${judgeScore}) for provider ${input.providerID}`,
|
|
361
|
-
);
|
|
362
|
-
return result;
|
|
363
|
-
}
|
|
364
|
-
|
|
365
|
-
// No candidate passed — clear any stale stored result so we don't keep
|
|
366
|
-
// routing worker calls to a potentially-deprecated model.
|
|
367
|
-
clearValidatedWorkerModel(input.providerID);
|
|
368
|
-
log.info(
|
|
369
|
-
`worker model validation: no candidate passed for ${input.providerID} — cleared stale entry`,
|
|
370
|
-
);
|
|
371
|
-
return null;
|
|
372
|
-
}
|
|
373
|
-
|
|
374
32
|
// ---------------------------------------------------------------------------
|
|
375
33
|
// Effective worker model resolution
|
|
376
34
|
// ---------------------------------------------------------------------------
|
|
377
35
|
|
|
378
36
|
/**
|
|
379
37
|
* Resolve the effective worker model for a given provider.
|
|
380
|
-
* Priority: explicit config >
|
|
38
|
+
* Priority: explicit config override > cost-aware default > session model.
|
|
381
39
|
*/
|
|
382
40
|
export function resolveWorkerModel(
|
|
383
|
-
|
|
41
|
+
_providerID: string,
|
|
384
42
|
configWorkerModel?: { providerID: string; modelID: string },
|
|
385
43
|
configModel?: { providerID: string; modelID: string },
|
|
44
|
+
costAwareDefault?: { providerID: string; modelID: string },
|
|
386
45
|
): { providerID: string; modelID: string } | undefined {
|
|
387
46
|
// Explicit override wins
|
|
388
47
|
if (configWorkerModel) return configWorkerModel;
|
|
389
48
|
|
|
390
|
-
//
|
|
391
|
-
//
|
|
392
|
-
|
|
393
|
-
const validated = getValidatedWorkerModel(providerID);
|
|
394
|
-
const MAX_AGE_MS = 24 * 60 * 60 * 1000;
|
|
395
|
-
if (validated && Date.now() - validated.validatedAt <= MAX_AGE_MS) {
|
|
396
|
-
return { providerID: validated.providerID, modelID: validated.modelID };
|
|
397
|
-
}
|
|
49
|
+
// Cost-aware default: cheaper model for background work when the session
|
|
50
|
+
// model is expensive. Caller determines when this applies based on pricing.
|
|
51
|
+
if (costAwareDefault) return costAwareDefault;
|
|
398
52
|
|
|
399
53
|
// Fall back to the session model config (or undefined = host default)
|
|
400
54
|
return configModel;
|