@agenr/agenr-plugin 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-MEHOGUZE.js → chunk-6T5RXGIR.js} +989 -70
- package/dist/{chunk-Y2BC7RCE.js → chunk-7TDALVPY.js} +1434 -305
- package/dist/{chunk-XD3446YW.js → chunk-DGV6D6Q3.js} +2 -21
- package/dist/chunk-IMQIJPIP.js +886 -0
- package/dist/chunk-MJIB6J5S.js +3059 -0
- package/dist/index.js +1466 -124
- package/openclaw.plugin.json +86 -2
- package/package.json +1 -1
|
@@ -0,0 +1,3059 @@
|
|
|
1
|
+
import {
|
|
2
|
+
composeProcedureRecallText,
|
|
3
|
+
computeProcedureRevisionHash,
|
|
4
|
+
computeProcedureSourceHash
|
|
5
|
+
} from "./chunk-ZYADFKX3.js";
|
|
6
|
+
import {
|
|
7
|
+
BEFORE_TURN_DEBUG_ARTIFACT_DEFAULT_TOP_K,
|
|
8
|
+
BEFORE_TURN_DEBUG_ARTIFACT_MAX_TOP_K,
|
|
9
|
+
RECALL_DEBUG_ARTIFACT_DEFAULT_TOP_K,
|
|
10
|
+
RECALL_DEBUG_ARTIFACT_MAX_TOP_K,
|
|
11
|
+
formatAgenrBeforeTurnRecall,
|
|
12
|
+
runBeforeTurn
|
|
13
|
+
} from "./chunk-IMQIJPIP.js";
|
|
14
|
+
import {
|
|
15
|
+
CLAIM_KEY_SOURCES,
|
|
16
|
+
CLAIM_KEY_STATUSES,
|
|
17
|
+
CLAIM_SUPPORT_MODES,
|
|
18
|
+
ENTRY_TYPES,
|
|
19
|
+
EXPIRY_LEVELS,
|
|
20
|
+
attachCrossEncoderPort,
|
|
21
|
+
composeEmbeddingText,
|
|
22
|
+
createDatabase,
|
|
23
|
+
createEmbeddingClient,
|
|
24
|
+
createOpenAICrossEncoder,
|
|
25
|
+
createRecallAdapter,
|
|
26
|
+
isRecord,
|
|
27
|
+
normalizeProcedureDefinition,
|
|
28
|
+
parseOptionalBoolean,
|
|
29
|
+
parseOptionalIntegerInRange,
|
|
30
|
+
parseOptionalTimestampString,
|
|
31
|
+
parseOptionalTrimmedString,
|
|
32
|
+
parseRequiredTrimmedString,
|
|
33
|
+
projectClaimCentricRecallEntry,
|
|
34
|
+
pushIssue,
|
|
35
|
+
pushUnexpectedFields,
|
|
36
|
+
readConfig,
|
|
37
|
+
resolveCrossEncoderApiKey,
|
|
38
|
+
resolveEmbeddingApiKey,
|
|
39
|
+
resolveEmbeddingModel,
|
|
40
|
+
resolveModel,
|
|
41
|
+
runUnifiedRecall
|
|
42
|
+
} from "./chunk-7TDALVPY.js";
|
|
43
|
+
import {
|
|
44
|
+
recall
|
|
45
|
+
} from "./chunk-6T5RXGIR.js";
|
|
46
|
+
|
|
47
|
+
// src/internal-eval-server.ts
|
|
48
|
+
import process from "process";
|
|
49
|
+
|
|
50
|
+
// src/adapters/api/internal-eval-server.ts
|
|
51
|
+
import { createServer } from "http";
|
|
52
|
+
|
|
53
|
+
// src/app/evals/recall/provision-fixtures.ts
|
|
54
|
+
import { createHash } from "crypto";
|
|
55
|
+
var DEFAULT_IMPORTANCE = 6;
|
|
56
|
+
var DEFAULT_EXPIRY = "permanent";
|
|
57
|
+
var DEFAULT_QUALITY_SCORE = 0.5;
|
|
58
|
+
async function provisionRecallEvalFixtures(params) {
|
|
59
|
+
const preparedBatch = prepareFixtures(params.caseId, params.memoryPool, params.provisionedAt);
|
|
60
|
+
if (preparedBatch.insertionOrder.length === 0) {
|
|
61
|
+
return {
|
|
62
|
+
provisionedCount: 0,
|
|
63
|
+
providedIdCount: 0,
|
|
64
|
+
generatedIdCount: 0,
|
|
65
|
+
retiredCount: 0,
|
|
66
|
+
supersededCount: 0,
|
|
67
|
+
createdAtDefaultedCount: 0,
|
|
68
|
+
updatedAtDefaultedCount: 0,
|
|
69
|
+
seededEntries: []
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
const embeddings = await params.embedding.embed(preparedBatch.insertionOrder.map((fixture) => fixture.embeddingText));
|
|
73
|
+
if (embeddings.length !== preparedBatch.insertionOrder.length) {
|
|
74
|
+
throw new Error(`Fixture embedding count mismatch: expected ${preparedBatch.insertionOrder.length}, received ${embeddings.length}.`);
|
|
75
|
+
}
|
|
76
|
+
await params.store.withTransaction(async (store) => {
|
|
77
|
+
for (const [index, fixture] of preparedBatch.insertionOrder.entries()) {
|
|
78
|
+
await store.insertEntry(fixture.entry, embeddings[index] ?? [], fixture.contentHash);
|
|
79
|
+
}
|
|
80
|
+
});
|
|
81
|
+
return {
|
|
82
|
+
provisionedCount: preparedBatch.insertionOrder.length,
|
|
83
|
+
providedIdCount: preparedBatch.providedIdCount,
|
|
84
|
+
generatedIdCount: preparedBatch.generatedIdCount,
|
|
85
|
+
retiredCount: preparedBatch.retiredCount,
|
|
86
|
+
supersededCount: preparedBatch.supersededCount,
|
|
87
|
+
createdAtDefaultedCount: preparedBatch.createdAtDefaultedCount,
|
|
88
|
+
updatedAtDefaultedCount: preparedBatch.updatedAtDefaultedCount,
|
|
89
|
+
seededEntries: preparedBatch.seededEntries
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
function prepareFixtures(caseId, fixtures, provisionedAt) {
|
|
93
|
+
const resolvedIds = fixtures.map((fixture, index) => fixture.id ?? createFixtureId(caseId, index, fixture));
|
|
94
|
+
const duplicateIds = findDuplicateIds(resolvedIds);
|
|
95
|
+
if (duplicateIds.length > 0) {
|
|
96
|
+
throw new Error(`Fixture IDs must be unique. Duplicate IDs: ${duplicateIds.join(", ")}.`);
|
|
97
|
+
}
|
|
98
|
+
const knownIds = new Set(resolvedIds);
|
|
99
|
+
const prepared = fixtures.map((fixture, index) => {
|
|
100
|
+
const supersededBy = fixture.superseded_by;
|
|
101
|
+
if (supersededBy && !knownIds.has(supersededBy)) {
|
|
102
|
+
throw new Error(`memoryPool[${index}].superseded_by references unknown fixture id "${supersededBy}".`);
|
|
103
|
+
}
|
|
104
|
+
const entry = buildEntry(fixture, resolvedIds[index] ?? "", provisionedAt);
|
|
105
|
+
return {
|
|
106
|
+
fixtureIndex: index,
|
|
107
|
+
entry,
|
|
108
|
+
contentHash: hashText(`${entry.type}
|
|
109
|
+
${entry.subject}
|
|
110
|
+
${entry.content}`),
|
|
111
|
+
embeddingText: composeEmbeddingText(entry)
|
|
112
|
+
};
|
|
113
|
+
});
|
|
114
|
+
return {
|
|
115
|
+
insertionOrder: topologicallySortFixtures(prepared),
|
|
116
|
+
providedIdCount: fixtures.filter((fixture) => fixture.id !== void 0).length,
|
|
117
|
+
generatedIdCount: fixtures.filter((fixture) => fixture.id === void 0).length,
|
|
118
|
+
retiredCount: prepared.filter((fixture) => fixture.entry.retired).length,
|
|
119
|
+
supersededCount: prepared.filter((fixture) => fixture.entry.superseded_by !== void 0).length,
|
|
120
|
+
createdAtDefaultedCount: fixtures.filter((fixture) => fixture.created_at === void 0).length,
|
|
121
|
+
updatedAtDefaultedCount: fixtures.filter((fixture) => fixture.updated_at === void 0).length,
|
|
122
|
+
seededEntries: prepared.map((fixture) => summarizePreparedFixture(fixture.entry))
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
function buildEntry(fixture, id, provisionedAt) {
|
|
126
|
+
const createdAt = fixture.created_at ?? provisionedAt;
|
|
127
|
+
const updatedAt = fixture.updated_at ?? createdAt;
|
|
128
|
+
return {
|
|
129
|
+
id,
|
|
130
|
+
type: fixture.type,
|
|
131
|
+
subject: fixture.subject,
|
|
132
|
+
content: fixture.content,
|
|
133
|
+
importance: fixture.importance ?? DEFAULT_IMPORTANCE,
|
|
134
|
+
expiry: fixture.expiry ?? DEFAULT_EXPIRY,
|
|
135
|
+
tags: fixture.tags ?? [],
|
|
136
|
+
source_file: fixture.source_file,
|
|
137
|
+
source_context: fixture.source_context,
|
|
138
|
+
quality_score: DEFAULT_QUALITY_SCORE,
|
|
139
|
+
recall_count: 0,
|
|
140
|
+
superseded_by: fixture.superseded_by,
|
|
141
|
+
claim_key: fixture.claim_key,
|
|
142
|
+
claim_key_status: fixture.claim_key_status,
|
|
143
|
+
claim_key_source: fixture.claim_key_source,
|
|
144
|
+
claim_support_source_kind: fixture.claim_support_source_kind,
|
|
145
|
+
claim_support_locator: fixture.claim_support_locator,
|
|
146
|
+
claim_support_observed_at: fixture.claim_support_observed_at,
|
|
147
|
+
claim_support_mode: fixture.claim_support_mode,
|
|
148
|
+
valid_from: fixture.valid_from,
|
|
149
|
+
valid_to: fixture.valid_to,
|
|
150
|
+
supersession_kind: fixture.supersession_kind,
|
|
151
|
+
supersession_reason: fixture.supersession_reason,
|
|
152
|
+
retired: fixture.retired ?? false,
|
|
153
|
+
retired_at: fixture.retired_at,
|
|
154
|
+
retired_reason: fixture.retired_reason,
|
|
155
|
+
created_at: createdAt,
|
|
156
|
+
updated_at: updatedAt
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
function summarizePreparedFixture(entry) {
|
|
160
|
+
return {
|
|
161
|
+
id: entry.id,
|
|
162
|
+
created_at: entry.created_at,
|
|
163
|
+
updated_at: entry.updated_at,
|
|
164
|
+
retired: entry.retired,
|
|
165
|
+
superseded_by: entry.superseded_by,
|
|
166
|
+
claim_key: entry.claim_key,
|
|
167
|
+
claim_key_status: entry.claim_key_status,
|
|
168
|
+
valid_from: entry.valid_from,
|
|
169
|
+
valid_to: entry.valid_to
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
function createFixtureId(caseId, index, fixture) {
|
|
173
|
+
const digest = createHash("sha256").update(caseId).update(":").update(String(index)).update(":").update(fixture.type).update(":").update(fixture.subject).update(":").update(fixture.content).digest("hex");
|
|
174
|
+
return `eval-${digest.slice(0, 24)}`;
|
|
175
|
+
}
|
|
176
|
+
function findDuplicateIds(ids) {
|
|
177
|
+
const seen = /* @__PURE__ */ new Set();
|
|
178
|
+
const duplicates = [];
|
|
179
|
+
for (const id of ids) {
|
|
180
|
+
if (seen.has(id)) {
|
|
181
|
+
if (!duplicates.includes(id)) {
|
|
182
|
+
duplicates.push(id);
|
|
183
|
+
}
|
|
184
|
+
continue;
|
|
185
|
+
}
|
|
186
|
+
seen.add(id);
|
|
187
|
+
}
|
|
188
|
+
return duplicates;
|
|
189
|
+
}
|
|
190
|
+
function topologicallySortFixtures(fixtures) {
|
|
191
|
+
const indegree = new Map(fixtures.map((fixture) => [fixture.entry.id, 0]));
|
|
192
|
+
const dependents = /* @__PURE__ */ new Map();
|
|
193
|
+
for (const fixture of fixtures) {
|
|
194
|
+
const successorId = fixture.entry.superseded_by;
|
|
195
|
+
if (!successorId) {
|
|
196
|
+
continue;
|
|
197
|
+
}
|
|
198
|
+
indegree.set(fixture.entry.id, (indegree.get(fixture.entry.id) ?? 0) + 1);
|
|
199
|
+
const successorDependents = dependents.get(successorId) ?? [];
|
|
200
|
+
successorDependents.push(fixture);
|
|
201
|
+
dependents.set(successorId, successorDependents);
|
|
202
|
+
}
|
|
203
|
+
const ready = fixtures.filter((fixture) => (indegree.get(fixture.entry.id) ?? 0) === 0).sort((left, right) => left.fixtureIndex - right.fixtureIndex);
|
|
204
|
+
const sorted = [];
|
|
205
|
+
while (ready.length > 0) {
|
|
206
|
+
const current = ready.shift();
|
|
207
|
+
if (!current) {
|
|
208
|
+
break;
|
|
209
|
+
}
|
|
210
|
+
sorted.push(current);
|
|
211
|
+
const currentDependents = (dependents.get(current.entry.id) ?? []).sort((left, right) => left.fixtureIndex - right.fixtureIndex);
|
|
212
|
+
for (const dependent of currentDependents) {
|
|
213
|
+
const remaining = (indegree.get(dependent.entry.id) ?? 0) - 1;
|
|
214
|
+
indegree.set(dependent.entry.id, remaining);
|
|
215
|
+
if (remaining === 0) {
|
|
216
|
+
ready.push(dependent);
|
|
217
|
+
ready.sort((left, right) => left.fixtureIndex - right.fixtureIndex);
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
if (sorted.length !== fixtures.length) {
|
|
222
|
+
const unresolved = fixtures.filter((fixture) => !sorted.includes(fixture)).map((fixture) => fixture.entry.id);
|
|
223
|
+
throw new Error(`Fixture supersession metadata contains a cycle: ${unresolved.join(", ")}.`);
|
|
224
|
+
}
|
|
225
|
+
return sorted;
|
|
226
|
+
}
|
|
227
|
+
function hashText(value) {
|
|
228
|
+
return createHash("sha256").update(value).digest("hex");
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// src/app/evals/recall/provision-procedure-fixtures.ts
|
|
232
|
+
import { createHash as createHash2 } from "crypto";
|
|
233
|
+
async function provisionRecallEvalProcedureFixtures(params) {
|
|
234
|
+
const procedures = prepareProcedures(params.caseId, params.procedurePool, params.provisionedAt);
|
|
235
|
+
if (procedures.length === 0) {
|
|
236
|
+
return {
|
|
237
|
+
provisionedCount: 0
|
|
238
|
+
};
|
|
239
|
+
}
|
|
240
|
+
await params.store.withTransaction(async (store) => {
|
|
241
|
+
for (const procedure of procedures) {
|
|
242
|
+
await store.insertProcedure(procedure);
|
|
243
|
+
}
|
|
244
|
+
});
|
|
245
|
+
return {
|
|
246
|
+
provisionedCount: procedures.length
|
|
247
|
+
};
|
|
248
|
+
}
|
|
249
|
+
function prepareProcedures(caseId, fixtures, provisionedAt) {
|
|
250
|
+
const resolvedIds = fixtures.map((fixture, index) => fixture.id ?? createFixtureId2(caseId, index, fixture));
|
|
251
|
+
const duplicateIds = findDuplicateIds2(resolvedIds);
|
|
252
|
+
if (duplicateIds.length > 0) {
|
|
253
|
+
throw new Error(`Procedure fixture IDs must be unique. Duplicate IDs: ${duplicateIds.join(", ")}.`);
|
|
254
|
+
}
|
|
255
|
+
const knownIds = new Set(resolvedIds);
|
|
256
|
+
return fixtures.map((fixture, index) => {
|
|
257
|
+
if (fixture.superseded_by && !knownIds.has(fixture.superseded_by)) {
|
|
258
|
+
throw new Error(`procedurePool[${index}].superseded_by references unknown fixture id "${fixture.superseded_by}".`);
|
|
259
|
+
}
|
|
260
|
+
const normalizedBody = normalizeProcedureDefinition(
|
|
261
|
+
{
|
|
262
|
+
procedure_key: fixture.procedure_key,
|
|
263
|
+
title: fixture.title,
|
|
264
|
+
goal: fixture.goal,
|
|
265
|
+
when_to_use: fixture.when_to_use ?? [],
|
|
266
|
+
when_not_to_use: fixture.when_not_to_use ?? [],
|
|
267
|
+
prerequisites: fixture.prerequisites ?? [],
|
|
268
|
+
steps: fixture.steps,
|
|
269
|
+
verification: fixture.verification ?? [],
|
|
270
|
+
failure_modes: fixture.failure_modes ?? [],
|
|
271
|
+
sources: fixture.sources ?? [{ kind: "manual", label: "recall eval fixture" }]
|
|
272
|
+
},
|
|
273
|
+
`procedurePool[${index}]`
|
|
274
|
+
);
|
|
275
|
+
const createdAt = fixture.created_at ?? provisionedAt;
|
|
276
|
+
const updatedAt = fixture.updated_at ?? createdAt;
|
|
277
|
+
return {
|
|
278
|
+
id: resolvedIds[index] ?? "",
|
|
279
|
+
...normalizedBody,
|
|
280
|
+
source_file: fixture.source_file,
|
|
281
|
+
recall_text: composeProcedureRecallText(normalizedBody),
|
|
282
|
+
revision_hash: computeProcedureRevisionHash(normalizedBody),
|
|
283
|
+
source_hash: computeProcedureSourceHash(JSON.stringify(normalizedBody)),
|
|
284
|
+
retired: fixture.retired ?? false,
|
|
285
|
+
retired_at: fixture.retired_at,
|
|
286
|
+
retired_reason: fixture.retired_reason,
|
|
287
|
+
superseded_by: fixture.superseded_by,
|
|
288
|
+
created_at: createdAt,
|
|
289
|
+
updated_at: updatedAt
|
|
290
|
+
};
|
|
291
|
+
});
|
|
292
|
+
}
|
|
293
|
+
function createFixtureId2(caseId, index, fixture) {
|
|
294
|
+
const digest = createHash2("sha256").update(caseId).update(":").update(String(index)).update(":").update(fixture.procedure_key).update(":").update(fixture.title).update(":").update(fixture.goal).digest("hex");
|
|
295
|
+
return `eval-procedure-${digest.slice(0, 24)}`;
|
|
296
|
+
}
|
|
297
|
+
function findDuplicateIds2(ids) {
|
|
298
|
+
const seen = /* @__PURE__ */ new Set();
|
|
299
|
+
const duplicates = [];
|
|
300
|
+
for (const id of ids) {
|
|
301
|
+
if (seen.has(id)) {
|
|
302
|
+
if (!duplicates.includes(id)) {
|
|
303
|
+
duplicates.push(id);
|
|
304
|
+
}
|
|
305
|
+
continue;
|
|
306
|
+
}
|
|
307
|
+
seen.add(id);
|
|
308
|
+
}
|
|
309
|
+
return duplicates;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
// src/app/evals/recall/sandbox.ts
|
|
313
|
+
import { access, copyFile, mkdir, mkdtemp, rm } from "fs/promises";
|
|
314
|
+
import { tmpdir } from "os";
|
|
315
|
+
import path from "path";
|
|
316
|
+
|
|
317
|
+
// src/adapters/db/eval-fixture-store.ts
|
|
318
|
+
function createRecallEvalFixtureStore(database) {
|
|
319
|
+
return {
|
|
320
|
+
insertEntry: async (entry, embedding, contentHash) => database.insertEntry(entry, embedding, contentHash),
|
|
321
|
+
insertProcedure: async (procedure) => database.upsertProcedure(procedure),
|
|
322
|
+
withTransaction: async (fn) => database.withTransaction(async (transaction) => fn(createRecallEvalFixtureStore(transaction)))
|
|
323
|
+
};
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
// src/app/evals/recall/sandbox.ts
|
|
327
|
+
var SANDBOX_DB_FILENAME = "knowledge.db";
|
|
328
|
+
var SANDBOX_DIR_PREFIX = "agenr-recall-eval-";
|
|
329
|
+
async function setupRecallEvalSandbox(request) {
|
|
330
|
+
const suppliedRoot = request?.root !== void 0;
|
|
331
|
+
const preserved = request?.preserve === true;
|
|
332
|
+
const root = suppliedRoot ? path.resolve(request.root ?? "") : await mkdtemp(path.join(tmpdir(), SANDBOX_DIR_PREFIX));
|
|
333
|
+
const snapshotSeed = request?.corpusSeed?.mode === "snapshot_copy" ? request.corpusSeed : void 0;
|
|
334
|
+
let database;
|
|
335
|
+
const dbPath = path.join(root, SANDBOX_DB_FILENAME);
|
|
336
|
+
try {
|
|
337
|
+
if (suppliedRoot) {
|
|
338
|
+
await mkdir(root, { recursive: true });
|
|
339
|
+
}
|
|
340
|
+
await removeDatabaseFiles(dbPath);
|
|
341
|
+
if (snapshotSeed !== void 0) {
|
|
342
|
+
await seedSandboxFromSnapshot(snapshotSeed, dbPath);
|
|
343
|
+
}
|
|
344
|
+
database = await createDatabase(dbPath);
|
|
345
|
+
const openDatabase = database;
|
|
346
|
+
const snapshot = snapshotSeed ? buildSnapshotMetadata(snapshotSeed) : void 0;
|
|
347
|
+
return {
|
|
348
|
+
root,
|
|
349
|
+
dbPath,
|
|
350
|
+
preserved,
|
|
351
|
+
fixtureStore: createRecallEvalFixtureStore(openDatabase),
|
|
352
|
+
episodeDatabase: openDatabase,
|
|
353
|
+
procedureDatabase: openDatabase,
|
|
354
|
+
...snapshot ? { snapshot } : {},
|
|
355
|
+
createRecallPorts: (embedding) => createRecallAdapter(openDatabase, embedding),
|
|
356
|
+
cleanup: async () => {
|
|
357
|
+
await openDatabase.close().catch(() => void 0);
|
|
358
|
+
if (preserved) {
|
|
359
|
+
return;
|
|
360
|
+
}
|
|
361
|
+
if (suppliedRoot) {
|
|
362
|
+
await removeDatabaseFiles(dbPath);
|
|
363
|
+
return;
|
|
364
|
+
}
|
|
365
|
+
await rm(root, { recursive: true, force: true });
|
|
366
|
+
}
|
|
367
|
+
};
|
|
368
|
+
} catch (error) {
|
|
369
|
+
await database?.close().catch(() => void 0);
|
|
370
|
+
if (!preserved) {
|
|
371
|
+
if (suppliedRoot) {
|
|
372
|
+
await removeDatabaseFiles(dbPath).catch(() => void 0);
|
|
373
|
+
} else {
|
|
374
|
+
await rm(root, { recursive: true, force: true }).catch(() => void 0);
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
throw error;
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
async function seedSandboxFromSnapshot(seed, dbPath) {
|
|
381
|
+
const rawPath = seed.snapshotDbPath.trim();
|
|
382
|
+
if (rawPath.length === 0) {
|
|
383
|
+
throw new Error("Snapshot database path must not be empty.");
|
|
384
|
+
}
|
|
385
|
+
const sourcePath = path.resolve(rawPath);
|
|
386
|
+
const targetPath = path.resolve(dbPath);
|
|
387
|
+
if (sourcePath === targetPath) {
|
|
388
|
+
throw new Error("Snapshot database path must not point at the sandbox database path.");
|
|
389
|
+
}
|
|
390
|
+
try {
|
|
391
|
+
await access(sourcePath);
|
|
392
|
+
} catch (error) {
|
|
393
|
+
const cause = error instanceof Error ? error.message : String(error);
|
|
394
|
+
throw new Error(`Snapshot database file is not accessible at ${sourcePath}: ${cause}`, {
|
|
395
|
+
cause: error
|
|
396
|
+
});
|
|
397
|
+
}
|
|
398
|
+
await copyFile(sourcePath, dbPath);
|
|
399
|
+
}
|
|
400
|
+
function buildSnapshotMetadata(seed) {
|
|
401
|
+
return {
|
|
402
|
+
...seed.snapshotId !== void 0 ? { id: seed.snapshotId } : {},
|
|
403
|
+
...seed.snapshotLabel !== void 0 ? { label: seed.snapshotLabel } : {},
|
|
404
|
+
dbPathBasename: path.basename(seed.snapshotDbPath),
|
|
405
|
+
allowedTelemetryWrites: seed.allowTelemetryWrites === true
|
|
406
|
+
};
|
|
407
|
+
}
|
|
408
|
+
async function removeDatabaseFiles(dbPath) {
|
|
409
|
+
await Promise.all([rm(dbPath, { force: true }), rm(`${dbPath}-wal`, { force: true }), rm(`${dbPath}-shm`, { force: true })]);
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
// src/app/evals/recall/telemetry-write-gate.ts
|
|
413
|
+
function applyTelemetryWriteGate(ports, sandbox) {
|
|
414
|
+
const snapshot = sandbox.snapshot;
|
|
415
|
+
if (snapshot === void 0 || snapshot.allowedTelemetryWrites) {
|
|
416
|
+
return ports;
|
|
417
|
+
}
|
|
418
|
+
return {
|
|
419
|
+
async embed(text) {
|
|
420
|
+
return ports.embed(text);
|
|
421
|
+
},
|
|
422
|
+
async vectorSearch(params) {
|
|
423
|
+
return ports.vectorSearch(params);
|
|
424
|
+
},
|
|
425
|
+
async ftsSearch(params) {
|
|
426
|
+
return ports.ftsSearch(params);
|
|
427
|
+
},
|
|
428
|
+
...ports.expandNeighborhood ? {
|
|
429
|
+
async expandNeighborhood(request) {
|
|
430
|
+
return ports.expandNeighborhood(request);
|
|
431
|
+
}
|
|
432
|
+
} : {},
|
|
433
|
+
...ports.crossEncoder ? {
|
|
434
|
+
crossEncoder: ports.crossEncoder
|
|
435
|
+
} : {},
|
|
436
|
+
async hydrateEntries(ids) {
|
|
437
|
+
return ports.hydrateEntries(ids);
|
|
438
|
+
},
|
|
439
|
+
async recordRecallEvents() {
|
|
440
|
+
return void 0;
|
|
441
|
+
}
|
|
442
|
+
};
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
// src/app/evals/before-turn/build-debug-artifact.ts
|
|
446
|
+
function buildBeforeTurnDebugArtifact(params) {
|
|
447
|
+
const { request, patch, sandbox } = params;
|
|
448
|
+
const diagnostics = patch.diagnostics;
|
|
449
|
+
const topK = resolveTopK(request.options?.topKCandidates);
|
|
450
|
+
const durableTopCandidates = buildDurableCandidates(patch, topK);
|
|
451
|
+
const procedureTopCandidates = buildProcedureCandidates(patch, topK);
|
|
452
|
+
const trigger = request.beforeTurnInput.trigger ?? "unspecified";
|
|
453
|
+
return {
|
|
454
|
+
schemaVersion: "before-turn-debug-artifact.v1",
|
|
455
|
+
caseId: request.caseId,
|
|
456
|
+
...sandbox.snapshot ? { snapshot: buildSnapshot(sandbox.snapshot) } : {},
|
|
457
|
+
input: {
|
|
458
|
+
trigger,
|
|
459
|
+
currentTurnText: request.beforeTurnInput.currentTurnText
|
|
460
|
+
},
|
|
461
|
+
...diagnostics.queryPolicy ? { queryPolicy: diagnostics.queryPolicy } : {},
|
|
462
|
+
...diagnostics.queryVariants.length > 0 ? { queryVariants: [...diagnostics.queryVariants] } : {},
|
|
463
|
+
...diagnostics.abstentionReasons.length > 0 ? { abstentionReasons: [...diagnostics.abstentionReasons] } : {},
|
|
464
|
+
selectedEntryIds: patch.durableMemory.map((item) => item.entry.id),
|
|
465
|
+
selectedProcedureKey: patch.procedure?.procedure.procedure_key ?? null,
|
|
466
|
+
...durableTopCandidates.length > 0 ? { durableRecallTopCandidates: durableTopCandidates } : {},
|
|
467
|
+
...procedureTopCandidates.length > 0 ? { procedureTopCandidates } : {}
|
|
468
|
+
};
|
|
469
|
+
}
|
|
470
|
+
function buildSnapshot(snapshot) {
|
|
471
|
+
return {
|
|
472
|
+
...snapshot.id !== void 0 ? { id: snapshot.id } : {},
|
|
473
|
+
...snapshot.label !== void 0 ? { label: snapshot.label } : {},
|
|
474
|
+
dbPathBasename: snapshot.dbPathBasename
|
|
475
|
+
};
|
|
476
|
+
}
|
|
477
|
+
function resolveTopK(requested) {
|
|
478
|
+
if (requested === void 0) {
|
|
479
|
+
return BEFORE_TURN_DEBUG_ARTIFACT_DEFAULT_TOP_K;
|
|
480
|
+
}
|
|
481
|
+
if (!Number.isFinite(requested) || !Number.isInteger(requested)) {
|
|
482
|
+
return BEFORE_TURN_DEBUG_ARTIFACT_DEFAULT_TOP_K;
|
|
483
|
+
}
|
|
484
|
+
if (requested < 1) {
|
|
485
|
+
return 1;
|
|
486
|
+
}
|
|
487
|
+
if (requested > BEFORE_TURN_DEBUG_ARTIFACT_MAX_TOP_K) {
|
|
488
|
+
return BEFORE_TURN_DEBUG_ARTIFACT_MAX_TOP_K;
|
|
489
|
+
}
|
|
490
|
+
return requested;
|
|
491
|
+
}
|
|
492
|
+
function buildDurableCandidates(patch, topK) {
|
|
493
|
+
return patch.durableMemory.slice(0, topK).map((item) => {
|
|
494
|
+
const reasons = item.whySurfaced.reasons.length > 0 ? [...item.whySurfaced.reasons] : void 0;
|
|
495
|
+
return {
|
|
496
|
+
id: item.entry.id,
|
|
497
|
+
score: item.score,
|
|
498
|
+
...reasons ? { reasons } : {}
|
|
499
|
+
};
|
|
500
|
+
});
|
|
501
|
+
}
|
|
502
|
+
function buildProcedureCandidates(patch, topK) {
|
|
503
|
+
if (!patch.procedure) {
|
|
504
|
+
return [];
|
|
505
|
+
}
|
|
506
|
+
if (topK < 1) {
|
|
507
|
+
return [];
|
|
508
|
+
}
|
|
509
|
+
const reasons = patch.procedure.whySurfaced.reasons.length > 0 ? [...patch.procedure.whySurfaced.reasons] : void 0;
|
|
510
|
+
return [
|
|
511
|
+
{
|
|
512
|
+
procedureKey: patch.procedure.procedure.procedure_key,
|
|
513
|
+
score: patch.procedure.score,
|
|
514
|
+
...reasons ? { reasons } : {}
|
|
515
|
+
}
|
|
516
|
+
];
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
// src/app/evals/before-turn/normalize-response.ts
|
|
520
|
+
function buildBeforeTurnEvalSuccessResponse(params) {
|
|
521
|
+
const output = buildOutput(params.patch, params.renderedPatchText);
|
|
522
|
+
const debugArtifact = params.request.options?.includeDebugArtifact === true ? buildBeforeTurnDebugArtifact({ request: params.request, patch: params.patch, sandbox: params.sandbox }) : void 0;
|
|
523
|
+
return {
|
|
524
|
+
status: "ok",
|
|
525
|
+
caseId: params.request.caseId,
|
|
526
|
+
output,
|
|
527
|
+
diagnostics: params.request.options?.includeDiagnostics === true ? params.patch.diagnostics : void 0,
|
|
528
|
+
timings: params.timings,
|
|
529
|
+
sandbox: buildSandboxResult(params.sandbox),
|
|
530
|
+
...debugArtifact ? { debugArtifact } : {}
|
|
531
|
+
};
|
|
532
|
+
}
|
|
533
|
+
function buildBeforeTurnEvalErrorResponse(params) {
|
|
534
|
+
return {
|
|
535
|
+
status: "error",
|
|
536
|
+
caseId: params.request.caseId,
|
|
537
|
+
error: {
|
|
538
|
+
code: params.code,
|
|
539
|
+
message: params.message,
|
|
540
|
+
details: params.details
|
|
541
|
+
},
|
|
542
|
+
timings: params.timings,
|
|
543
|
+
sandbox: params.sandbox ? buildSandboxResult(params.sandbox) : void 0
|
|
544
|
+
};
|
|
545
|
+
}
|
|
546
|
+
function maybeRenderBeforeTurnPatch(request, patch) {
|
|
547
|
+
if (request.options?.includeRenderedPatch !== true) {
|
|
548
|
+
return void 0;
|
|
549
|
+
}
|
|
550
|
+
return formatAgenrBeforeTurnRecall(patch);
|
|
551
|
+
}
|
|
552
|
+
function buildSandboxResult(sandbox) {
|
|
553
|
+
return {
|
|
554
|
+
root: sandbox.root,
|
|
555
|
+
dbPath: sandbox.dbPath,
|
|
556
|
+
preserved: sandbox.preserved,
|
|
557
|
+
...sandbox.snapshot ? { snapshot: sandbox.snapshot } : {}
|
|
558
|
+
};
|
|
559
|
+
}
|
|
560
|
+
function buildOutput(patch, renderedPatchText) {
|
|
561
|
+
return {
|
|
562
|
+
abstained: patch.diagnostics.abstained,
|
|
563
|
+
selectedEntryIds: patch.durableMemory.map((item) => item.entry.id),
|
|
564
|
+
selectedProcedureKey: patch.procedure?.procedure.procedure_key ?? null,
|
|
565
|
+
patch: normalizePatchForEvalOutput(patch),
|
|
566
|
+
...renderedPatchText !== void 0 ? { renderedPatchText } : {}
|
|
567
|
+
};
|
|
568
|
+
}
|
|
569
|
+
function normalizePatchForEvalOutput(patch) {
|
|
570
|
+
return {
|
|
571
|
+
...patch,
|
|
572
|
+
durableMemory: patch.durableMemory.map((item) => ({
|
|
573
|
+
...item,
|
|
574
|
+
entry: {
|
|
575
|
+
...item.entry,
|
|
576
|
+
...typeof item.entry.claim_key === "string" ? { claimKey: item.entry.claim_key } : {}
|
|
577
|
+
}
|
|
578
|
+
}))
|
|
579
|
+
};
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
// src/app/evals/before-turn/run-before-turn-eval-case.ts
|
|
583
|
+
async function runBeforeTurnEvalCase(request, dependencies = {}) {
|
|
584
|
+
const startedAt = Date.now();
|
|
585
|
+
const provisionedAt = new Date(startedAt).toISOString();
|
|
586
|
+
let sandbox;
|
|
587
|
+
let sharedEmbeddingPort;
|
|
588
|
+
let sharedEmbeddingError;
|
|
589
|
+
let timings;
|
|
590
|
+
const getEmbeddingSupport = () => {
|
|
591
|
+
if (sharedEmbeddingPort) {
|
|
592
|
+
return {
|
|
593
|
+
available: true,
|
|
594
|
+
port: sharedEmbeddingPort
|
|
595
|
+
};
|
|
596
|
+
}
|
|
597
|
+
if (sharedEmbeddingError) {
|
|
598
|
+
return {
|
|
599
|
+
available: false,
|
|
600
|
+
error: sharedEmbeddingError
|
|
601
|
+
};
|
|
602
|
+
}
|
|
603
|
+
const config = readConfig();
|
|
604
|
+
try {
|
|
605
|
+
sharedEmbeddingPort = createEmbeddingClient(resolveEmbeddingApiKey(config), resolveEmbeddingModel(config));
|
|
606
|
+
return {
|
|
607
|
+
available: true,
|
|
608
|
+
port: sharedEmbeddingPort
|
|
609
|
+
};
|
|
610
|
+
} catch (error) {
|
|
611
|
+
sharedEmbeddingError = error instanceof Error ? error.message : String(error);
|
|
612
|
+
return {
|
|
613
|
+
available: false,
|
|
614
|
+
error: sharedEmbeddingError
|
|
615
|
+
};
|
|
616
|
+
}
|
|
617
|
+
};
|
|
618
|
+
try {
|
|
619
|
+
const sandboxStartedAt = Date.now();
|
|
620
|
+
try {
|
|
621
|
+
sandbox = await setupRecallEvalSandbox(request.sandbox);
|
|
622
|
+
timings = {
|
|
623
|
+
...timings,
|
|
624
|
+
sandboxSetupMs: elapsedMs(sandboxStartedAt)
|
|
625
|
+
};
|
|
626
|
+
} catch (error) {
|
|
627
|
+
timings = {
|
|
628
|
+
...timings,
|
|
629
|
+
totalMs: elapsedMs(startedAt),
|
|
630
|
+
sandboxSetupMs: elapsedMs(sandboxStartedAt)
|
|
631
|
+
};
|
|
632
|
+
return buildBeforeTurnEvalErrorResponse({
|
|
633
|
+
request,
|
|
634
|
+
code: "sandbox_setup_failed",
|
|
635
|
+
message: "Failed to create isolated before-turn eval sandbox.",
|
|
636
|
+
details: toErrorDetails(error),
|
|
637
|
+
timings: request.options?.includeTimings === true ? timings : void 0
|
|
638
|
+
});
|
|
639
|
+
}
|
|
640
|
+
if (request.memoryPool.length > 0 || (request.procedurePool?.length ?? 0) > 0) {
|
|
641
|
+
const provisionStartedAt = Date.now();
|
|
642
|
+
try {
|
|
643
|
+
if (request.memoryPool.length > 0) {
|
|
644
|
+
const embeddingSupport = getEmbeddingSupport();
|
|
645
|
+
const embeddingPort = embeddingSupport.port ?? createUnavailableEmbeddingPort(embeddingSupport.error ?? "Embeddings are unavailable.");
|
|
646
|
+
await provisionRecallEvalFixtures({
|
|
647
|
+
caseId: request.caseId,
|
|
648
|
+
memoryPool: request.memoryPool,
|
|
649
|
+
store: sandbox.fixtureStore,
|
|
650
|
+
embedding: embeddingPort,
|
|
651
|
+
provisionedAt
|
|
652
|
+
});
|
|
653
|
+
}
|
|
654
|
+
if ((request.procedurePool?.length ?? 0) > 0) {
|
|
655
|
+
await provisionRecallEvalProcedureFixtures({
|
|
656
|
+
caseId: request.caseId,
|
|
657
|
+
procedurePool: request.procedurePool ?? [],
|
|
658
|
+
store: sandbox.fixtureStore,
|
|
659
|
+
provisionedAt
|
|
660
|
+
});
|
|
661
|
+
}
|
|
662
|
+
timings = {
|
|
663
|
+
...timings,
|
|
664
|
+
fixtureProvisionMs: elapsedMs(provisionStartedAt)
|
|
665
|
+
};
|
|
666
|
+
} catch (error) {
|
|
667
|
+
timings = {
|
|
668
|
+
...timings,
|
|
669
|
+
totalMs: elapsedMs(startedAt),
|
|
670
|
+
fixtureProvisionMs: elapsedMs(provisionStartedAt)
|
|
671
|
+
};
|
|
672
|
+
return buildBeforeTurnEvalErrorResponse({
|
|
673
|
+
request,
|
|
674
|
+
code: "fixture_provision_failed",
|
|
675
|
+
message: "Failed to provision before-turn eval fixtures into isolated storage.",
|
|
676
|
+
details: toErrorDetails(error),
|
|
677
|
+
timings: request.options?.includeTimings === true ? timings : void 0,
|
|
678
|
+
sandbox
|
|
679
|
+
});
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
const beforeTurnStartedAt = Date.now();
|
|
683
|
+
try {
|
|
684
|
+
const embeddingSupport = getEmbeddingSupport();
|
|
685
|
+
const sandboxRecallPorts = sandbox.createRecallPorts(
|
|
686
|
+
embeddingSupport.port ?? createUnavailableEmbeddingPort(embeddingSupport.error ?? "Embeddings are unavailable.")
|
|
687
|
+
);
|
|
688
|
+
const recallPorts = applyTelemetryWriteGate(attachCrossEncoderPort(sandboxRecallPorts, dependencies.crossEncoder), sandbox);
|
|
689
|
+
const patch = await runBeforeTurn(request.beforeTurnInput, {
|
|
690
|
+
recall: recallPorts,
|
|
691
|
+
procedures: sandbox.procedureDatabase,
|
|
692
|
+
embedQuery: embeddingSupport.port ? async (text) => {
|
|
693
|
+
const vectors = await embeddingSupport.port.embed([text]);
|
|
694
|
+
return vectors[0] ?? [];
|
|
695
|
+
} : void 0
|
|
696
|
+
});
|
|
697
|
+
timings = {
|
|
698
|
+
...timings,
|
|
699
|
+
beforeTurnMs: elapsedMs(beforeTurnStartedAt)
|
|
700
|
+
};
|
|
701
|
+
let renderedPatchText;
|
|
702
|
+
if (request.options?.includeRenderedPatch === true) {
|
|
703
|
+
const renderStartedAt = Date.now();
|
|
704
|
+
renderedPatchText = maybeRenderBeforeTurnPatch(request, patch);
|
|
705
|
+
timings = {
|
|
706
|
+
...timings,
|
|
707
|
+
renderPatchMs: elapsedMs(renderStartedAt)
|
|
708
|
+
};
|
|
709
|
+
}
|
|
710
|
+
timings = {
|
|
711
|
+
...timings,
|
|
712
|
+
totalMs: elapsedMs(startedAt)
|
|
713
|
+
};
|
|
714
|
+
return buildBeforeTurnEvalSuccessResponse({
|
|
715
|
+
request,
|
|
716
|
+
patch,
|
|
717
|
+
renderedPatchText,
|
|
718
|
+
timings: request.options?.includeTimings === true ? timings : void 0,
|
|
719
|
+
sandbox
|
|
720
|
+
});
|
|
721
|
+
} catch (error) {
|
|
722
|
+
timings = {
|
|
723
|
+
...timings,
|
|
724
|
+
totalMs: elapsedMs(startedAt),
|
|
725
|
+
beforeTurnMs: elapsedMs(beforeTurnStartedAt)
|
|
726
|
+
};
|
|
727
|
+
return buildBeforeTurnEvalErrorResponse({
|
|
728
|
+
request,
|
|
729
|
+
code: "before_turn_execution_failed",
|
|
730
|
+
message: "Failed to execute real before-turn selection against isolated eval state.",
|
|
731
|
+
details: toErrorDetails(error),
|
|
732
|
+
timings: request.options?.includeTimings === true ? timings : void 0,
|
|
733
|
+
sandbox
|
|
734
|
+
});
|
|
735
|
+
}
|
|
736
|
+
} catch (error) {
|
|
737
|
+
return buildBeforeTurnEvalErrorResponse({
|
|
738
|
+
request,
|
|
739
|
+
code: "internal_error",
|
|
740
|
+
message: "Before-turn eval execution failed unexpectedly.",
|
|
741
|
+
details: toErrorDetails(error),
|
|
742
|
+
timings: request.options?.includeTimings === true ? { ...timings, totalMs: elapsedMs(startedAt) } : void 0,
|
|
743
|
+
sandbox
|
|
744
|
+
});
|
|
745
|
+
} finally {
|
|
746
|
+
await sandbox?.cleanup().catch(() => void 0);
|
|
747
|
+
}
|
|
748
|
+
}
|
|
749
|
+
function toErrorDetails(error) {
|
|
750
|
+
if (error instanceof Error) {
|
|
751
|
+
return {
|
|
752
|
+
cause: error.message
|
|
753
|
+
};
|
|
754
|
+
}
|
|
755
|
+
return {
|
|
756
|
+
cause: String(error)
|
|
757
|
+
};
|
|
758
|
+
}
|
|
759
|
+
function createUnavailableEmbeddingPort(message) {
|
|
760
|
+
return {
|
|
761
|
+
async embed() {
|
|
762
|
+
throw new Error(message);
|
|
763
|
+
}
|
|
764
|
+
};
|
|
765
|
+
}
|
|
766
|
+
function elapsedMs(startedAt) {
|
|
767
|
+
return Date.now() - startedAt;
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
// src/adapters/api/validation/internal-eval-shared.ts
|
|
771
|
+
var SANDBOX_REQUEST_KEYS = /* @__PURE__ */ new Set(["root", "preserve", "corpusSeed"]);
|
|
772
|
+
var CORPUS_SEED_MODES = ["fixture", "snapshot_copy"];
|
|
773
|
+
var FIXTURE_CORPUS_SEED_KEYS = /* @__PURE__ */ new Set(["mode"]);
|
|
774
|
+
var SNAPSHOT_COPY_CORPUS_SEED_KEYS = /* @__PURE__ */ new Set(["mode", "snapshotDbPath", "snapshotId", "snapshotLabel", "allowTelemetryWrites"]);
|
|
775
|
+
var FIXTURE_ENTRY_KEYS = /* @__PURE__ */ new Set([
|
|
776
|
+
"id",
|
|
777
|
+
"type",
|
|
778
|
+
"subject",
|
|
779
|
+
"content",
|
|
780
|
+
"importance",
|
|
781
|
+
"expiry",
|
|
782
|
+
"tags",
|
|
783
|
+
"source_file",
|
|
784
|
+
"source_context",
|
|
785
|
+
"created_at",
|
|
786
|
+
"updated_at",
|
|
787
|
+
"retired",
|
|
788
|
+
"retired_at",
|
|
789
|
+
"retired_reason",
|
|
790
|
+
"superseded_by",
|
|
791
|
+
"claim_key",
|
|
792
|
+
"claim_key_status",
|
|
793
|
+
"claim_key_source",
|
|
794
|
+
"claim_support_source_kind",
|
|
795
|
+
"claim_support_locator",
|
|
796
|
+
"claim_support_observed_at",
|
|
797
|
+
"claim_support_mode",
|
|
798
|
+
"valid_from",
|
|
799
|
+
"valid_to",
|
|
800
|
+
"supersession_kind",
|
|
801
|
+
"supersession_reason"
|
|
802
|
+
]);
|
|
803
|
+
var FIXTURE_PROCEDURE_KEYS = /* @__PURE__ */ new Set([
|
|
804
|
+
"id",
|
|
805
|
+
"procedure_key",
|
|
806
|
+
"title",
|
|
807
|
+
"goal",
|
|
808
|
+
"when_to_use",
|
|
809
|
+
"when_not_to_use",
|
|
810
|
+
"prerequisites",
|
|
811
|
+
"steps",
|
|
812
|
+
"verification",
|
|
813
|
+
"failure_modes",
|
|
814
|
+
"sources",
|
|
815
|
+
"source_file",
|
|
816
|
+
"retired",
|
|
817
|
+
"retired_at",
|
|
818
|
+
"retired_reason",
|
|
819
|
+
"superseded_by",
|
|
820
|
+
"created_at",
|
|
821
|
+
"updated_at"
|
|
822
|
+
]);
|
|
823
|
+
function extractParseableCaseId(value) {
|
|
824
|
+
if (!isRecord(value) || typeof value.caseId !== "string") {
|
|
825
|
+
return void 0;
|
|
826
|
+
}
|
|
827
|
+
const normalized = value.caseId.trim();
|
|
828
|
+
return normalized.length > 0 ? normalized : void 0;
|
|
829
|
+
}
|
|
830
|
+
function parseObject(value, path2, issues) {
|
|
831
|
+
if (!isRecord(value)) {
|
|
832
|
+
pushIssue(issues, path2, "Expected an object.");
|
|
833
|
+
return void 0;
|
|
834
|
+
}
|
|
835
|
+
return value;
|
|
836
|
+
}
|
|
837
|
+
function parseSandbox(value, issues) {
|
|
838
|
+
if (value === void 0) {
|
|
839
|
+
return void 0;
|
|
840
|
+
}
|
|
841
|
+
const sandbox = parseObject(value, "sandbox", issues);
|
|
842
|
+
if (sandbox === void 0) {
|
|
843
|
+
return void 0;
|
|
844
|
+
}
|
|
845
|
+
pushUnexpectedFields(sandbox, SANDBOX_REQUEST_KEYS, "sandbox", issues);
|
|
846
|
+
return {
|
|
847
|
+
root: parseOptionalTrimmedString(sandbox.root, "sandbox.root", issues),
|
|
848
|
+
preserve: parseOptionalBoolean(sandbox.preserve, "sandbox.preserve", issues),
|
|
849
|
+
corpusSeed: parseCorpusSeed(sandbox.corpusSeed, issues)
|
|
850
|
+
};
|
|
851
|
+
}
|
|
852
|
+
function parseCorpusSeed(value, issues) {
|
|
853
|
+
if (value === void 0) {
|
|
854
|
+
return void 0;
|
|
855
|
+
}
|
|
856
|
+
const seed = parseObject(value, "sandbox.corpusSeed", issues);
|
|
857
|
+
if (seed === void 0) {
|
|
858
|
+
return void 0;
|
|
859
|
+
}
|
|
860
|
+
const mode = parseCorpusSeedMode(seed.mode, "sandbox.corpusSeed.mode", issues);
|
|
861
|
+
if (mode === void 0) {
|
|
862
|
+
return void 0;
|
|
863
|
+
}
|
|
864
|
+
if (mode === "fixture") {
|
|
865
|
+
pushUnexpectedFields(seed, FIXTURE_CORPUS_SEED_KEYS, "sandbox.corpusSeed", issues);
|
|
866
|
+
return { mode: "fixture" };
|
|
867
|
+
}
|
|
868
|
+
pushUnexpectedFields(seed, SNAPSHOT_COPY_CORPUS_SEED_KEYS, "sandbox.corpusSeed", issues);
|
|
869
|
+
const snapshotDbPath = parseRequiredTrimmedString(seed.snapshotDbPath, "sandbox.corpusSeed.snapshotDbPath", issues);
|
|
870
|
+
const snapshotId = parseOptionalTrimmedString(seed.snapshotId, "sandbox.corpusSeed.snapshotId", issues);
|
|
871
|
+
const snapshotLabel = parseOptionalTrimmedString(seed.snapshotLabel, "sandbox.corpusSeed.snapshotLabel", issues);
|
|
872
|
+
const allowTelemetryWrites = parseOptionalBoolean(seed.allowTelemetryWrites, "sandbox.corpusSeed.allowTelemetryWrites", issues);
|
|
873
|
+
if (snapshotDbPath === void 0) {
|
|
874
|
+
return void 0;
|
|
875
|
+
}
|
|
876
|
+
return {
|
|
877
|
+
mode: "snapshot_copy",
|
|
878
|
+
snapshotDbPath,
|
|
879
|
+
...snapshotId !== void 0 ? { snapshotId } : {},
|
|
880
|
+
...snapshotLabel !== void 0 ? { snapshotLabel } : {},
|
|
881
|
+
...allowTelemetryWrites !== void 0 ? { allowTelemetryWrites } : {}
|
|
882
|
+
};
|
|
883
|
+
}
|
|
884
|
+
function parseCorpusSeedMode(value, path2, issues) {
|
|
885
|
+
if (typeof value !== "string" || !CORPUS_SEED_MODES.includes(value)) {
|
|
886
|
+
pushIssue(issues, path2, `Expected one of: ${CORPUS_SEED_MODES.join(", ")}.`);
|
|
887
|
+
return void 0;
|
|
888
|
+
}
|
|
889
|
+
return value;
|
|
890
|
+
}
|
|
891
|
+
function parseMemoryPool(value, issues) {
|
|
892
|
+
if (!Array.isArray(value)) {
|
|
893
|
+
pushIssue(issues, "memoryPool", "Expected an array of fixture entries.");
|
|
894
|
+
return void 0;
|
|
895
|
+
}
|
|
896
|
+
return value.flatMap((entry, index) => {
|
|
897
|
+
const parsed = parseFixtureEntry(entry, index, issues);
|
|
898
|
+
return parsed ? [parsed] : [];
|
|
899
|
+
});
|
|
900
|
+
}
|
|
901
|
+
function parseProcedurePool(value, issues) {
|
|
902
|
+
if (value === void 0) {
|
|
903
|
+
return void 0;
|
|
904
|
+
}
|
|
905
|
+
if (!Array.isArray(value)) {
|
|
906
|
+
pushIssue(issues, "procedurePool", "Expected an array of fixture procedures.");
|
|
907
|
+
return void 0;
|
|
908
|
+
}
|
|
909
|
+
return value.flatMap((procedure, index) => {
|
|
910
|
+
const parsed = parseFixtureProcedure(procedure, index, issues);
|
|
911
|
+
return parsed ? [parsed] : [];
|
|
912
|
+
});
|
|
913
|
+
}
|
|
914
|
+
function mapSandboxRequestDto(dto) {
|
|
915
|
+
if (dto === void 0) {
|
|
916
|
+
return void 0;
|
|
917
|
+
}
|
|
918
|
+
return {
|
|
919
|
+
root: dto.root,
|
|
920
|
+
preserve: dto.preserve,
|
|
921
|
+
corpusSeed: dto.corpusSeed
|
|
922
|
+
};
|
|
923
|
+
}
|
|
924
|
+
function mapFixtureEntryDto(dto) {
|
|
925
|
+
return {
|
|
926
|
+
id: dto.id,
|
|
927
|
+
type: dto.type,
|
|
928
|
+
subject: dto.subject,
|
|
929
|
+
content: dto.content,
|
|
930
|
+
importance: dto.importance,
|
|
931
|
+
expiry: dto.expiry,
|
|
932
|
+
tags: dto.tags,
|
|
933
|
+
source_file: dto.source_file,
|
|
934
|
+
source_context: dto.source_context,
|
|
935
|
+
created_at: dto.created_at,
|
|
936
|
+
updated_at: dto.updated_at,
|
|
937
|
+
retired: dto.retired,
|
|
938
|
+
retired_at: dto.retired_at,
|
|
939
|
+
retired_reason: dto.retired_reason,
|
|
940
|
+
superseded_by: dto.superseded_by,
|
|
941
|
+
claim_key: dto.claim_key,
|
|
942
|
+
claim_key_status: dto.claim_key_status,
|
|
943
|
+
claim_key_source: dto.claim_key_source,
|
|
944
|
+
claim_support_source_kind: dto.claim_support_source_kind,
|
|
945
|
+
claim_support_locator: dto.claim_support_locator,
|
|
946
|
+
claim_support_observed_at: dto.claim_support_observed_at,
|
|
947
|
+
claim_support_mode: dto.claim_support_mode,
|
|
948
|
+
valid_from: dto.valid_from,
|
|
949
|
+
valid_to: dto.valid_to,
|
|
950
|
+
supersession_kind: dto.supersession_kind,
|
|
951
|
+
supersession_reason: dto.supersession_reason
|
|
952
|
+
};
|
|
953
|
+
}
|
|
954
|
+
function mapFixtureProcedureDto(dto) {
|
|
955
|
+
return {
|
|
956
|
+
id: dto.id,
|
|
957
|
+
procedure_key: dto.procedure_key,
|
|
958
|
+
title: dto.title,
|
|
959
|
+
goal: dto.goal,
|
|
960
|
+
when_to_use: dto.when_to_use,
|
|
961
|
+
when_not_to_use: dto.when_not_to_use,
|
|
962
|
+
prerequisites: dto.prerequisites,
|
|
963
|
+
steps: dto.steps,
|
|
964
|
+
verification: dto.verification,
|
|
965
|
+
failure_modes: dto.failure_modes,
|
|
966
|
+
sources: dto.sources,
|
|
967
|
+
source_file: dto.source_file,
|
|
968
|
+
retired: dto.retired,
|
|
969
|
+
retired_at: dto.retired_at,
|
|
970
|
+
retired_reason: dto.retired_reason,
|
|
971
|
+
superseded_by: dto.superseded_by,
|
|
972
|
+
created_at: dto.created_at,
|
|
973
|
+
updated_at: dto.updated_at
|
|
974
|
+
};
|
|
975
|
+
}
|
|
976
|
+
function parseOptionalStringArray(value, path2, issues) {
|
|
977
|
+
if (value === void 0) {
|
|
978
|
+
return void 0;
|
|
979
|
+
}
|
|
980
|
+
if (!Array.isArray(value) || value.some((item) => typeof item !== "string")) {
|
|
981
|
+
pushIssue(issues, path2, "Expected an array of strings.");
|
|
982
|
+
return void 0;
|
|
983
|
+
}
|
|
984
|
+
return value.map((item) => item.trim()).filter((item) => item.length > 0);
|
|
985
|
+
}
|
|
986
|
+
function parseOptionalThreshold(value, path2, issues) {
|
|
987
|
+
if (value === void 0) {
|
|
988
|
+
return void 0;
|
|
989
|
+
}
|
|
990
|
+
if (typeof value !== "number" || Number.isNaN(value) || value < 0 || value > 1) {
|
|
991
|
+
pushIssue(issues, path2, "Expected a number from 0 to 1.");
|
|
992
|
+
return void 0;
|
|
993
|
+
}
|
|
994
|
+
return value;
|
|
995
|
+
}
|
|
996
|
+
function parseRequiredString(value, path2, issues) {
|
|
997
|
+
if (typeof value !== "string") {
|
|
998
|
+
pushIssue(issues, path2, "Expected a string.");
|
|
999
|
+
return void 0;
|
|
1000
|
+
}
|
|
1001
|
+
return value;
|
|
1002
|
+
}
|
|
1003
|
+
function parseRecentTurnRole(value, path2, issues) {
|
|
1004
|
+
if (typeof value !== "string") {
|
|
1005
|
+
pushIssue(issues, path2, 'Expected "user" or "assistant".');
|
|
1006
|
+
return void 0;
|
|
1007
|
+
}
|
|
1008
|
+
const normalized = value.trim();
|
|
1009
|
+
if (normalized !== "user" && normalized !== "assistant") {
|
|
1010
|
+
pushIssue(issues, path2, 'Expected "user" or "assistant".');
|
|
1011
|
+
return void 0;
|
|
1012
|
+
}
|
|
1013
|
+
return normalized;
|
|
1014
|
+
}
|
|
1015
|
+
function parseFixtureEntry(value, index, issues) {
|
|
1016
|
+
const basePath = `memoryPool[${index}]`;
|
|
1017
|
+
const fixture = parseObject(value, basePath, issues);
|
|
1018
|
+
if (fixture === void 0) {
|
|
1019
|
+
return void 0;
|
|
1020
|
+
}
|
|
1021
|
+
pushUnexpectedFields(fixture, FIXTURE_ENTRY_KEYS, basePath, issues);
|
|
1022
|
+
const type = parseEntryType(fixture.type, `${basePath}.type`, issues);
|
|
1023
|
+
const subject = parseRequiredTrimmedString(fixture.subject, `${basePath}.subject`, issues);
|
|
1024
|
+
const content = parseRequiredTrimmedString(fixture.content, `${basePath}.content`, issues);
|
|
1025
|
+
if (type === void 0 || subject === void 0 || content === void 0) {
|
|
1026
|
+
return void 0;
|
|
1027
|
+
}
|
|
1028
|
+
return {
|
|
1029
|
+
id: parseOptionalTrimmedString(fixture.id, `${basePath}.id`, issues),
|
|
1030
|
+
type,
|
|
1031
|
+
subject,
|
|
1032
|
+
content,
|
|
1033
|
+
importance: parseOptionalIntegerInRange(fixture.importance, `${basePath}.importance`, issues, {
|
|
1034
|
+
min: 1,
|
|
1035
|
+
max: 10
|
|
1036
|
+
}),
|
|
1037
|
+
expiry: parseOptionalExpiry(fixture.expiry, `${basePath}.expiry`, issues),
|
|
1038
|
+
tags: parseOptionalStringArray(fixture.tags, `${basePath}.tags`, issues),
|
|
1039
|
+
source_file: parseOptionalTrimmedString(fixture.source_file, `${basePath}.source_file`, issues),
|
|
1040
|
+
source_context: parseOptionalTrimmedString(fixture.source_context, `${basePath}.source_context`, issues),
|
|
1041
|
+
created_at: parseOptionalTimestampString(fixture.created_at, `${basePath}.created_at`, issues),
|
|
1042
|
+
updated_at: parseOptionalTimestampString(fixture.updated_at, `${basePath}.updated_at`, issues),
|
|
1043
|
+
retired: parseOptionalBoolean(fixture.retired, `${basePath}.retired`, issues),
|
|
1044
|
+
retired_at: parseOptionalTimestampString(fixture.retired_at, `${basePath}.retired_at`, issues),
|
|
1045
|
+
retired_reason: parseOptionalTrimmedString(fixture.retired_reason, `${basePath}.retired_reason`, issues),
|
|
1046
|
+
superseded_by: parseOptionalTrimmedString(fixture.superseded_by, `${basePath}.superseded_by`, issues),
|
|
1047
|
+
claim_key: parseOptionalTrimmedString(fixture.claim_key, `${basePath}.claim_key`, issues),
|
|
1048
|
+
claim_key_status: parseOptionalClaimKeyStatus(fixture.claim_key_status, `${basePath}.claim_key_status`, issues),
|
|
1049
|
+
claim_key_source: parseOptionalClaimKeySource(fixture.claim_key_source, `${basePath}.claim_key_source`, issues),
|
|
1050
|
+
claim_support_source_kind: parseOptionalTrimmedString(fixture.claim_support_source_kind, `${basePath}.claim_support_source_kind`, issues),
|
|
1051
|
+
claim_support_locator: parseOptionalTrimmedString(fixture.claim_support_locator, `${basePath}.claim_support_locator`, issues),
|
|
1052
|
+
claim_support_observed_at: parseOptionalTimestampString(fixture.claim_support_observed_at, `${basePath}.claim_support_observed_at`, issues),
|
|
1053
|
+
claim_support_mode: parseOptionalClaimSupportMode(fixture.claim_support_mode, `${basePath}.claim_support_mode`, issues),
|
|
1054
|
+
valid_from: parseOptionalTimestampString(fixture.valid_from, `${basePath}.valid_from`, issues),
|
|
1055
|
+
valid_to: parseOptionalTimestampString(fixture.valid_to, `${basePath}.valid_to`, issues),
|
|
1056
|
+
supersession_kind: parseOptionalTrimmedString(fixture.supersession_kind, `${basePath}.supersession_kind`, issues),
|
|
1057
|
+
supersession_reason: parseOptionalTrimmedString(fixture.supersession_reason, `${basePath}.supersession_reason`, issues)
|
|
1058
|
+
};
|
|
1059
|
+
}
|
|
1060
|
+
function parseFixtureProcedure(value, index, issues) {
|
|
1061
|
+
const basePath = `procedurePool[${index}]`;
|
|
1062
|
+
const fixture = parseObject(value, basePath, issues);
|
|
1063
|
+
if (fixture === void 0) {
|
|
1064
|
+
return void 0;
|
|
1065
|
+
}
|
|
1066
|
+
pushUnexpectedFields(fixture, FIXTURE_PROCEDURE_KEYS, basePath, issues);
|
|
1067
|
+
const procedureKey = parseRequiredTrimmedString(fixture.procedure_key, `${basePath}.procedure_key`, issues);
|
|
1068
|
+
const title = parseRequiredTrimmedString(fixture.title, `${basePath}.title`, issues);
|
|
1069
|
+
const goal = parseRequiredTrimmedString(fixture.goal, `${basePath}.goal`, issues);
|
|
1070
|
+
const whenToUse = parseOptionalStringArray(fixture.when_to_use, `${basePath}.when_to_use`, issues);
|
|
1071
|
+
const whenNotToUse = parseOptionalStringArray(fixture.when_not_to_use, `${basePath}.when_not_to_use`, issues);
|
|
1072
|
+
const prerequisites = parseOptionalStringArray(fixture.prerequisites, `${basePath}.prerequisites`, issues);
|
|
1073
|
+
const verification = parseOptionalStringArray(fixture.verification, `${basePath}.verification`, issues);
|
|
1074
|
+
const failureModes = parseOptionalStringArray(fixture.failure_modes, `${basePath}.failure_modes`, issues);
|
|
1075
|
+
if (procedureKey === void 0 || title === void 0 || goal === void 0) {
|
|
1076
|
+
return void 0;
|
|
1077
|
+
}
|
|
1078
|
+
try {
|
|
1079
|
+
const normalized = normalizeProcedureDefinition(
|
|
1080
|
+
{
|
|
1081
|
+
procedure_key: procedureKey,
|
|
1082
|
+
title,
|
|
1083
|
+
goal,
|
|
1084
|
+
when_to_use: whenToUse ?? [],
|
|
1085
|
+
when_not_to_use: whenNotToUse ?? [],
|
|
1086
|
+
prerequisites: prerequisites ?? [],
|
|
1087
|
+
steps: fixture.steps,
|
|
1088
|
+
verification: verification ?? [],
|
|
1089
|
+
failure_modes: failureModes ?? [],
|
|
1090
|
+
sources: fixture.sources ?? [{ kind: "manual", label: "recall eval fixture" }]
|
|
1091
|
+
},
|
|
1092
|
+
basePath
|
|
1093
|
+
);
|
|
1094
|
+
return {
|
|
1095
|
+
id: parseOptionalTrimmedString(fixture.id, `${basePath}.id`, issues),
|
|
1096
|
+
procedure_key: normalized.procedure_key,
|
|
1097
|
+
title: normalized.title,
|
|
1098
|
+
goal: normalized.goal,
|
|
1099
|
+
when_to_use: normalized.when_to_use,
|
|
1100
|
+
when_not_to_use: normalized.when_not_to_use,
|
|
1101
|
+
prerequisites: normalized.prerequisites,
|
|
1102
|
+
steps: normalized.steps,
|
|
1103
|
+
verification: normalized.verification,
|
|
1104
|
+
failure_modes: normalized.failure_modes,
|
|
1105
|
+
sources: normalized.sources,
|
|
1106
|
+
source_file: parseOptionalTrimmedString(fixture.source_file, `${basePath}.source_file`, issues),
|
|
1107
|
+
retired: parseOptionalBoolean(fixture.retired, `${basePath}.retired`, issues),
|
|
1108
|
+
retired_at: parseOptionalTimestampString(fixture.retired_at, `${basePath}.retired_at`, issues),
|
|
1109
|
+
retired_reason: parseOptionalTrimmedString(fixture.retired_reason, `${basePath}.retired_reason`, issues),
|
|
1110
|
+
superseded_by: parseOptionalTrimmedString(fixture.superseded_by, `${basePath}.superseded_by`, issues),
|
|
1111
|
+
created_at: parseOptionalTimestampString(fixture.created_at, `${basePath}.created_at`, issues),
|
|
1112
|
+
updated_at: parseOptionalTimestampString(fixture.updated_at, `${basePath}.updated_at`, issues)
|
|
1113
|
+
};
|
|
1114
|
+
} catch (error) {
|
|
1115
|
+
pushIssue(issues, basePath, error instanceof Error ? error.message : String(error));
|
|
1116
|
+
return void 0;
|
|
1117
|
+
}
|
|
1118
|
+
}
|
|
1119
|
+
function parseEntryType(value, path2, issues) {
|
|
1120
|
+
if (typeof value !== "string" || !ENTRY_TYPES.includes(value)) {
|
|
1121
|
+
pushIssue(issues, path2, `Expected one of: ${ENTRY_TYPES.join(", ")}.`);
|
|
1122
|
+
return void 0;
|
|
1123
|
+
}
|
|
1124
|
+
return value;
|
|
1125
|
+
}
|
|
1126
|
+
function parseOptionalExpiry(value, path2, issues) {
|
|
1127
|
+
if (value === void 0) {
|
|
1128
|
+
return void 0;
|
|
1129
|
+
}
|
|
1130
|
+
if (typeof value !== "string" || !EXPIRY_LEVELS.includes(value)) {
|
|
1131
|
+
pushIssue(issues, path2, `Expected one of: ${EXPIRY_LEVELS.join(", ")}.`);
|
|
1132
|
+
return void 0;
|
|
1133
|
+
}
|
|
1134
|
+
return value;
|
|
1135
|
+
}
|
|
1136
|
+
function parseOptionalClaimKeyStatus(value, path2, issues) {
|
|
1137
|
+
if (value === void 0) {
|
|
1138
|
+
return void 0;
|
|
1139
|
+
}
|
|
1140
|
+
if (typeof value !== "string" || !CLAIM_KEY_STATUSES.includes(value)) {
|
|
1141
|
+
pushIssue(issues, path2, `Expected one of: ${CLAIM_KEY_STATUSES.join(", ")}.`);
|
|
1142
|
+
return void 0;
|
|
1143
|
+
}
|
|
1144
|
+
return value;
|
|
1145
|
+
}
|
|
1146
|
+
function parseOptionalClaimKeySource(value, path2, issues) {
|
|
1147
|
+
if (value === void 0) {
|
|
1148
|
+
return void 0;
|
|
1149
|
+
}
|
|
1150
|
+
if (typeof value !== "string" || !CLAIM_KEY_SOURCES.includes(value)) {
|
|
1151
|
+
pushIssue(issues, path2, `Expected one of: ${CLAIM_KEY_SOURCES.join(", ")}.`);
|
|
1152
|
+
return void 0;
|
|
1153
|
+
}
|
|
1154
|
+
return value;
|
|
1155
|
+
}
|
|
1156
|
+
function parseOptionalClaimSupportMode(value, path2, issues) {
|
|
1157
|
+
if (value === void 0) {
|
|
1158
|
+
return void 0;
|
|
1159
|
+
}
|
|
1160
|
+
if (typeof value !== "string" || !CLAIM_SUPPORT_MODES.includes(value)) {
|
|
1161
|
+
pushIssue(issues, path2, `Expected one of: ${CLAIM_SUPPORT_MODES.join(", ")}.`);
|
|
1162
|
+
return void 0;
|
|
1163
|
+
}
|
|
1164
|
+
return value;
|
|
1165
|
+
}
|
|
1166
|
+
|
|
1167
|
+
// src/adapters/api/validation/before-turn-eval-request.ts
|
|
1168
|
+
var ROOT_REQUEST_KEYS = /* @__PURE__ */ new Set(["caseId", "description", "sandbox", "memoryPool", "procedurePool", "beforeTurnInput", "options"]);
|
|
1169
|
+
var BEFORE_TURN_INPUT_KEYS = /* @__PURE__ */ new Set(["sessionKey", "currentTurnText", "recentTurns", "trigger", "policy"]);
|
|
1170
|
+
var BEFORE_TURN_RECENT_TURN_KEYS = /* @__PURE__ */ new Set(["role", "text"]);
|
|
1171
|
+
var BEFORE_TURN_POLICY_KEYS = /* @__PURE__ */ new Set([
|
|
1172
|
+
"enableDurableRecall",
|
|
1173
|
+
"enableProcedureSuggestion",
|
|
1174
|
+
"maxRecentTurns",
|
|
1175
|
+
"maxQueryChars",
|
|
1176
|
+
"maxDurableEntries",
|
|
1177
|
+
"maxHighConfidenceDurableEntries",
|
|
1178
|
+
"maxProcedureCandidates",
|
|
1179
|
+
"recallThreshold",
|
|
1180
|
+
"highConfidenceRecallThreshold",
|
|
1181
|
+
"procedureThreshold",
|
|
1182
|
+
"skipTrivialTurns",
|
|
1183
|
+
"requireTurnSignal"
|
|
1184
|
+
]);
|
|
1185
|
+
var OPTIONS_KEYS = /* @__PURE__ */ new Set(["includeDiagnostics", "includeRenderedPatch", "includeTimings", "includeDebugArtifact", "topKCandidates"]);
|
|
1186
|
+
var BeforeTurnEvalRequestValidationError = class extends Error {
|
|
1187
|
+
/** Parseable case identifier echoed for invalid request correlation when available. */
|
|
1188
|
+
caseId;
|
|
1189
|
+
/** Structured list of request validation issues. */
|
|
1190
|
+
issues;
|
|
1191
|
+
/**
|
|
1192
|
+
* Creates a request validation error with stable issue details.
|
|
1193
|
+
*
|
|
1194
|
+
* @param issues - Structured validation issues collected during parsing.
|
|
1195
|
+
* @param caseId - Parseable request case identifier when available.
|
|
1196
|
+
*/
|
|
1197
|
+
constructor(issues, caseId) {
|
|
1198
|
+
super("Invalid before-turn eval request.");
|
|
1199
|
+
this.name = "BeforeTurnEvalRequestValidationError";
|
|
1200
|
+
this.issues = issues;
|
|
1201
|
+
this.caseId = caseId;
|
|
1202
|
+
}
|
|
1203
|
+
};
|
|
1204
|
+
function parseBeforeTurnEvalCaseRequest(input) {
|
|
1205
|
+
const caseId = extractParseableCaseId(input);
|
|
1206
|
+
if (!isRecord(input)) {
|
|
1207
|
+
throw new BeforeTurnEvalRequestValidationError(
|
|
1208
|
+
[
|
|
1209
|
+
{
|
|
1210
|
+
path: "$",
|
|
1211
|
+
message: "Request body must be a JSON object."
|
|
1212
|
+
}
|
|
1213
|
+
],
|
|
1214
|
+
caseId
|
|
1215
|
+
);
|
|
1216
|
+
}
|
|
1217
|
+
const issues = [];
|
|
1218
|
+
pushUnexpectedFields(input, ROOT_REQUEST_KEYS, "", issues);
|
|
1219
|
+
const parsedCaseId = parseRequiredTrimmedString(input.caseId, "caseId", issues);
|
|
1220
|
+
const description = parseOptionalTrimmedString(input.description, "description", issues);
|
|
1221
|
+
const sandbox = parseSandbox(input.sandbox, issues);
|
|
1222
|
+
const memoryPool = parseMemoryPool(input.memoryPool, issues);
|
|
1223
|
+
const procedurePool = parseProcedurePool(input.procedurePool, issues);
|
|
1224
|
+
const beforeTurnInput = parseBeforeTurnInput(input.beforeTurnInput, issues);
|
|
1225
|
+
const options = parseOptions(input.options, issues);
|
|
1226
|
+
if (issues.length > 0 || parsedCaseId === void 0 || memoryPool === void 0 || beforeTurnInput === void 0) {
|
|
1227
|
+
throw new BeforeTurnEvalRequestValidationError(issues, caseId);
|
|
1228
|
+
}
|
|
1229
|
+
return {
|
|
1230
|
+
caseId: parsedCaseId,
|
|
1231
|
+
description,
|
|
1232
|
+
sandbox,
|
|
1233
|
+
memoryPool,
|
|
1234
|
+
procedurePool,
|
|
1235
|
+
beforeTurnInput,
|
|
1236
|
+
options
|
|
1237
|
+
};
|
|
1238
|
+
}
|
|
1239
|
+
function mapBeforeTurnEvalCaseRequestDto(dto) {
|
|
1240
|
+
return {
|
|
1241
|
+
caseId: dto.caseId,
|
|
1242
|
+
description: dto.description,
|
|
1243
|
+
sandbox: mapSandboxRequestDto(dto.sandbox),
|
|
1244
|
+
memoryPool: dto.memoryPool.map((entry) => mapFixtureEntryDto(entry)),
|
|
1245
|
+
procedurePool: dto.procedurePool?.map((procedure) => mapFixtureProcedureDto(procedure)),
|
|
1246
|
+
beforeTurnInput: mapBeforeTurnInputDto(dto.beforeTurnInput),
|
|
1247
|
+
options: mapCaseOptionsDto(dto.options)
|
|
1248
|
+
};
|
|
1249
|
+
}
|
|
1250
|
+
function parseBeforeTurnInput(value, issues) {
|
|
1251
|
+
const beforeTurnInput = parseObject(value, "beforeTurnInput", issues);
|
|
1252
|
+
if (beforeTurnInput === void 0) {
|
|
1253
|
+
return void 0;
|
|
1254
|
+
}
|
|
1255
|
+
pushUnexpectedFields(beforeTurnInput, BEFORE_TURN_INPUT_KEYS, "beforeTurnInput", issues);
|
|
1256
|
+
const currentTurnText = parseRequiredString(beforeTurnInput.currentTurnText, "beforeTurnInput.currentTurnText", issues);
|
|
1257
|
+
if (currentTurnText === void 0) {
|
|
1258
|
+
return void 0;
|
|
1259
|
+
}
|
|
1260
|
+
return {
|
|
1261
|
+
sessionKey: parseOptionalTrimmedString(beforeTurnInput.sessionKey, "beforeTurnInput.sessionKey", issues),
|
|
1262
|
+
currentTurnText,
|
|
1263
|
+
recentTurns: parseRecentTurns(beforeTurnInput.recentTurns, issues),
|
|
1264
|
+
trigger: parseOptionalTrimmedString(beforeTurnInput.trigger, "beforeTurnInput.trigger", issues),
|
|
1265
|
+
policy: parseBeforeTurnPolicy(beforeTurnInput.policy, issues)
|
|
1266
|
+
};
|
|
1267
|
+
}
|
|
1268
|
+
function parseRecentTurns(value, issues) {
|
|
1269
|
+
if (value === void 0) {
|
|
1270
|
+
return void 0;
|
|
1271
|
+
}
|
|
1272
|
+
if (!Array.isArray(value)) {
|
|
1273
|
+
issues.push({
|
|
1274
|
+
path: "beforeTurnInput.recentTurns",
|
|
1275
|
+
message: "Expected an array of recent turns."
|
|
1276
|
+
});
|
|
1277
|
+
return void 0;
|
|
1278
|
+
}
|
|
1279
|
+
return value.flatMap((turn, index) => {
|
|
1280
|
+
const basePath = `beforeTurnInput.recentTurns[${index}]`;
|
|
1281
|
+
const record = parseObject(turn, basePath, issues);
|
|
1282
|
+
if (record === void 0) {
|
|
1283
|
+
return [];
|
|
1284
|
+
}
|
|
1285
|
+
pushUnexpectedFields(record, BEFORE_TURN_RECENT_TURN_KEYS, basePath, issues);
|
|
1286
|
+
const role = parseRecentTurnRole(record.role, `${basePath}.role`, issues);
|
|
1287
|
+
const text = parseRequiredString(record.text, `${basePath}.text`, issues);
|
|
1288
|
+
if (role === void 0 || text === void 0) {
|
|
1289
|
+
return [];
|
|
1290
|
+
}
|
|
1291
|
+
return [{ role, text }];
|
|
1292
|
+
});
|
|
1293
|
+
}
|
|
1294
|
+
function parseBeforeTurnPolicy(value, issues) {
|
|
1295
|
+
if (value === void 0) {
|
|
1296
|
+
return void 0;
|
|
1297
|
+
}
|
|
1298
|
+
const policy = parseObject(value, "beforeTurnInput.policy", issues);
|
|
1299
|
+
if (policy === void 0) {
|
|
1300
|
+
return void 0;
|
|
1301
|
+
}
|
|
1302
|
+
pushUnexpectedFields(policy, BEFORE_TURN_POLICY_KEYS, "beforeTurnInput.policy", issues);
|
|
1303
|
+
return {
|
|
1304
|
+
enableDurableRecall: parseOptionalBoolean(policy.enableDurableRecall, "beforeTurnInput.policy.enableDurableRecall", issues),
|
|
1305
|
+
enableProcedureSuggestion: parseOptionalBoolean(policy.enableProcedureSuggestion, "beforeTurnInput.policy.enableProcedureSuggestion", issues),
|
|
1306
|
+
maxRecentTurns: parseOptionalIntegerInRange(policy.maxRecentTurns, "beforeTurnInput.policy.maxRecentTurns", issues, {
|
|
1307
|
+
min: 0
|
|
1308
|
+
}),
|
|
1309
|
+
maxQueryChars: parseOptionalIntegerInRange(policy.maxQueryChars, "beforeTurnInput.policy.maxQueryChars", issues, {
|
|
1310
|
+
min: 0
|
|
1311
|
+
}),
|
|
1312
|
+
maxDurableEntries: parseOptionalIntegerInRange(policy.maxDurableEntries, "beforeTurnInput.policy.maxDurableEntries", issues, {
|
|
1313
|
+
min: 0
|
|
1314
|
+
}),
|
|
1315
|
+
maxHighConfidenceDurableEntries: parseOptionalIntegerInRange(
|
|
1316
|
+
policy.maxHighConfidenceDurableEntries,
|
|
1317
|
+
"beforeTurnInput.policy.maxHighConfidenceDurableEntries",
|
|
1318
|
+
issues,
|
|
1319
|
+
{
|
|
1320
|
+
min: 0
|
|
1321
|
+
}
|
|
1322
|
+
),
|
|
1323
|
+
maxProcedureCandidates: parseOptionalIntegerInRange(policy.maxProcedureCandidates, "beforeTurnInput.policy.maxProcedureCandidates", issues, {
|
|
1324
|
+
min: 0
|
|
1325
|
+
}),
|
|
1326
|
+
recallThreshold: parseOptionalThreshold(policy.recallThreshold, "beforeTurnInput.policy.recallThreshold", issues),
|
|
1327
|
+
highConfidenceRecallThreshold: parseOptionalThreshold(policy.highConfidenceRecallThreshold, "beforeTurnInput.policy.highConfidenceRecallThreshold", issues),
|
|
1328
|
+
procedureThreshold: parseOptionalThreshold(policy.procedureThreshold, "beforeTurnInput.policy.procedureThreshold", issues),
|
|
1329
|
+
skipTrivialTurns: parseOptionalBoolean(policy.skipTrivialTurns, "beforeTurnInput.policy.skipTrivialTurns", issues),
|
|
1330
|
+
requireTurnSignal: parseOptionalBoolean(policy.requireTurnSignal, "beforeTurnInput.policy.requireTurnSignal", issues)
|
|
1331
|
+
};
|
|
1332
|
+
}
|
|
1333
|
+
function parseOptions(value, issues) {
|
|
1334
|
+
if (value === void 0) {
|
|
1335
|
+
return void 0;
|
|
1336
|
+
}
|
|
1337
|
+
const options = parseObject(value, "options", issues);
|
|
1338
|
+
if (options === void 0) {
|
|
1339
|
+
return void 0;
|
|
1340
|
+
}
|
|
1341
|
+
pushUnexpectedFields(options, OPTIONS_KEYS, "options", issues);
|
|
1342
|
+
return {
|
|
1343
|
+
includeDiagnostics: parseOptionalBoolean(options.includeDiagnostics, "options.includeDiagnostics", issues),
|
|
1344
|
+
includeRenderedPatch: parseOptionalBoolean(options.includeRenderedPatch, "options.includeRenderedPatch", issues),
|
|
1345
|
+
includeTimings: parseOptionalBoolean(options.includeTimings, "options.includeTimings", issues),
|
|
1346
|
+
includeDebugArtifact: parseOptionalBoolean(options.includeDebugArtifact, "options.includeDebugArtifact", issues),
|
|
1347
|
+
topKCandidates: parseOptionalIntegerInRange(options.topKCandidates, "options.topKCandidates", issues, {
|
|
1348
|
+
min: 1,
|
|
1349
|
+
max: BEFORE_TURN_DEBUG_ARTIFACT_MAX_TOP_K
|
|
1350
|
+
})
|
|
1351
|
+
};
|
|
1352
|
+
}
|
|
1353
|
+
function mapBeforeTurnInputDto(dto) {
|
|
1354
|
+
return {
|
|
1355
|
+
sessionKey: dto.sessionKey,
|
|
1356
|
+
currentTurnText: dto.currentTurnText,
|
|
1357
|
+
recentTurns: dto.recentTurns?.map((turn) => ({
|
|
1358
|
+
role: turn.role,
|
|
1359
|
+
text: turn.text
|
|
1360
|
+
})),
|
|
1361
|
+
trigger: dto.trigger,
|
|
1362
|
+
policy: mapBeforeTurnPolicyDto(dto.policy)
|
|
1363
|
+
};
|
|
1364
|
+
}
|
|
1365
|
+
function mapBeforeTurnPolicyDto(dto) {
|
|
1366
|
+
if (dto === void 0) {
|
|
1367
|
+
return void 0;
|
|
1368
|
+
}
|
|
1369
|
+
return {
|
|
1370
|
+
enableDurableRecall: dto.enableDurableRecall,
|
|
1371
|
+
enableProcedureSuggestion: dto.enableProcedureSuggestion,
|
|
1372
|
+
maxRecentTurns: dto.maxRecentTurns,
|
|
1373
|
+
maxQueryChars: dto.maxQueryChars,
|
|
1374
|
+
maxDurableEntries: dto.maxDurableEntries,
|
|
1375
|
+
maxHighConfidenceDurableEntries: dto.maxHighConfidenceDurableEntries,
|
|
1376
|
+
maxProcedureCandidates: dto.maxProcedureCandidates,
|
|
1377
|
+
recallThreshold: dto.recallThreshold,
|
|
1378
|
+
highConfidenceRecallThreshold: dto.highConfidenceRecallThreshold,
|
|
1379
|
+
procedureThreshold: dto.procedureThreshold,
|
|
1380
|
+
skipTrivialTurns: dto.skipTrivialTurns,
|
|
1381
|
+
requireTurnSignal: dto.requireTurnSignal
|
|
1382
|
+
};
|
|
1383
|
+
}
|
|
1384
|
+
function mapCaseOptionsDto(dto) {
|
|
1385
|
+
if (dto === void 0) {
|
|
1386
|
+
return void 0;
|
|
1387
|
+
}
|
|
1388
|
+
return {
|
|
1389
|
+
includeDiagnostics: dto.includeDiagnostics,
|
|
1390
|
+
includeRenderedPatch: dto.includeRenderedPatch,
|
|
1391
|
+
includeTimings: dto.includeTimings,
|
|
1392
|
+
includeDebugArtifact: dto.includeDebugArtifact,
|
|
1393
|
+
topKCandidates: dto.topKCandidates
|
|
1394
|
+
};
|
|
1395
|
+
}
|
|
1396
|
+
|
|
1397
|
+
// src/adapters/api/routes/internal-before-turn-eval.ts
|
|
1398
|
+
var INTERNAL_BEFORE_TURN_EVAL_ROUTE_PATH = "/internal/evals/before-turn/run";
|
|
1399
|
+
function createInternalBeforeTurnEvalRoute(optionsOrRunner = {}) {
|
|
1400
|
+
const options = typeof optionsOrRunner === "function" ? { runner: optionsOrRunner } : optionsOrRunner;
|
|
1401
|
+
const crossEncoder = options.crossEncoder;
|
|
1402
|
+
const runner = options.runner ?? ((request) => runBeforeTurnEvalCase(request, { crossEncoder }));
|
|
1403
|
+
return {
|
|
1404
|
+
method: "POST",
|
|
1405
|
+
path: INTERNAL_BEFORE_TURN_EVAL_ROUTE_PATH,
|
|
1406
|
+
handler: async (request) => {
|
|
1407
|
+
let validatedRequest;
|
|
1408
|
+
try {
|
|
1409
|
+
validatedRequest = await parseValidatedRequest(request);
|
|
1410
|
+
const result = await runner(validatedRequest);
|
|
1411
|
+
return jsonResponse(result, 200);
|
|
1412
|
+
} catch (error) {
|
|
1413
|
+
if (error instanceof BeforeTurnEvalRequestValidationError) {
|
|
1414
|
+
return jsonResponse(
|
|
1415
|
+
{
|
|
1416
|
+
status: "error",
|
|
1417
|
+
caseId: error.caseId,
|
|
1418
|
+
error: {
|
|
1419
|
+
code: "invalid_request",
|
|
1420
|
+
message: error.message,
|
|
1421
|
+
details: error.issues
|
|
1422
|
+
}
|
|
1423
|
+
},
|
|
1424
|
+
400
|
|
1425
|
+
);
|
|
1426
|
+
}
|
|
1427
|
+
return jsonResponse(
|
|
1428
|
+
{
|
|
1429
|
+
status: "error",
|
|
1430
|
+
caseId: validatedRequest?.caseId,
|
|
1431
|
+
error: {
|
|
1432
|
+
code: "internal_error",
|
|
1433
|
+
message: "Internal before-turn eval adapter error."
|
|
1434
|
+
}
|
|
1435
|
+
},
|
|
1436
|
+
500
|
|
1437
|
+
);
|
|
1438
|
+
}
|
|
1439
|
+
}
|
|
1440
|
+
};
|
|
1441
|
+
}
|
|
1442
|
+
var parseJsonBody = async (request) => {
|
|
1443
|
+
try {
|
|
1444
|
+
return await request.json();
|
|
1445
|
+
} catch {
|
|
1446
|
+
throw new BeforeTurnEvalRequestValidationError([
|
|
1447
|
+
{
|
|
1448
|
+
path: "$",
|
|
1449
|
+
message: "Request body must be valid JSON."
|
|
1450
|
+
}
|
|
1451
|
+
]);
|
|
1452
|
+
}
|
|
1453
|
+
};
|
|
1454
|
+
var parseValidatedRequest = async (request) => {
|
|
1455
|
+
const payload = await parseJsonBody(request);
|
|
1456
|
+
const requestDto = parseBeforeTurnEvalCaseRequest(payload);
|
|
1457
|
+
return mapBeforeTurnEvalCaseRequestDto(requestDto);
|
|
1458
|
+
};
|
|
1459
|
+
var jsonResponse = (body, status) => new Response(JSON.stringify(body), {
|
|
1460
|
+
status,
|
|
1461
|
+
headers: {
|
|
1462
|
+
"content-type": "application/json; charset=utf-8"
|
|
1463
|
+
}
|
|
1464
|
+
});
|
|
1465
|
+
|
|
1466
|
+
// src/app/evals/recall/collect-diagnostics.ts
|
|
1467
|
+
function createRecallEvalDiagnosticsCollector(request) {
|
|
1468
|
+
const diagnosticsRequested = wantsRecallEvalDiagnostics(request);
|
|
1469
|
+
const timingsRequested = request.options?.includeTimings === true;
|
|
1470
|
+
const debugArtifactRequested = request.options?.includeDebugArtifact === true;
|
|
1471
|
+
const observationEnabled = diagnosticsRequested || timingsRequested || debugArtifactRequested;
|
|
1472
|
+
const execution = {
|
|
1473
|
+
mode: "isolated-case",
|
|
1474
|
+
provisioning: "exact-fixture-seed",
|
|
1475
|
+
recallPath: request.recallPath ?? "core",
|
|
1476
|
+
memoryPoolCount: request.memoryPool.length,
|
|
1477
|
+
provisionedCount: 0,
|
|
1478
|
+
requestedDiagnostics: request.options?.includeDiagnostics === true,
|
|
1479
|
+
requestedCandidates: request.options?.includeCandidates === true
|
|
1480
|
+
};
|
|
1481
|
+
const stageTimings = {
|
|
1482
|
+
sandboxSetupMs: 0,
|
|
1483
|
+
fixtureProvisionMs: 0,
|
|
1484
|
+
recallMs: 0,
|
|
1485
|
+
queryEmbeddingMs: 0,
|
|
1486
|
+
vectorSearchMs: 0,
|
|
1487
|
+
lexicalSearchMs: 0,
|
|
1488
|
+
mergeCandidatesMs: 0,
|
|
1489
|
+
scoreCandidatesMs: 0,
|
|
1490
|
+
thresholdMs: 0,
|
|
1491
|
+
budgetMs: 0,
|
|
1492
|
+
hydrateEntriesMs: 0,
|
|
1493
|
+
shapeResultsMs: 0,
|
|
1494
|
+
recordRecallEventsMs: 0
|
|
1495
|
+
};
|
|
1496
|
+
const retrieval = {
|
|
1497
|
+
queryEmbeddingDimensions: 0,
|
|
1498
|
+
vectorSearchLimit: 0,
|
|
1499
|
+
lexicalSearchLimit: 0
|
|
1500
|
+
};
|
|
1501
|
+
const candidateCounts = {
|
|
1502
|
+
vectorRetrieved: 0,
|
|
1503
|
+
lexicalRetrieved: 0,
|
|
1504
|
+
merged: 0,
|
|
1505
|
+
thresholdQualified: 0,
|
|
1506
|
+
budgetAccepted: 0,
|
|
1507
|
+
finalRanked: 0,
|
|
1508
|
+
hydrated: 0,
|
|
1509
|
+
returned: 0,
|
|
1510
|
+
telemetryAttempted: 0
|
|
1511
|
+
};
|
|
1512
|
+
let provision;
|
|
1513
|
+
let ranking;
|
|
1514
|
+
let filtering;
|
|
1515
|
+
let claimKey;
|
|
1516
|
+
let rrf;
|
|
1517
|
+
let neighborhood;
|
|
1518
|
+
let mmr;
|
|
1519
|
+
let crossEncoder;
|
|
1520
|
+
let degraded;
|
|
1521
|
+
let provisionObserved = false;
|
|
1522
|
+
let retrievalObserved = false;
|
|
1523
|
+
let traceObserved = false;
|
|
1524
|
+
const traceSink = {
|
|
1525
|
+
reportSummary(summary) {
|
|
1526
|
+
traceObserved = true;
|
|
1527
|
+
ranking = {
|
|
1528
|
+
limit: summary.ranking.limit,
|
|
1529
|
+
threshold: summary.ranking.threshold,
|
|
1530
|
+
budget: summary.ranking.budget,
|
|
1531
|
+
noResultReason: summary.ranking.noResultReason
|
|
1532
|
+
};
|
|
1533
|
+
filtering = {
|
|
1534
|
+
types: [...summary.filtering.types],
|
|
1535
|
+
tags: [...summary.filtering.tags],
|
|
1536
|
+
since: summary.filtering.since,
|
|
1537
|
+
until: summary.filtering.until,
|
|
1538
|
+
around: summary.filtering.around ? {
|
|
1539
|
+
source: summary.filtering.around.source,
|
|
1540
|
+
anchor: summary.filtering.around.anchor,
|
|
1541
|
+
radiusDays: summary.filtering.around.radiusDays
|
|
1542
|
+
} : void 0
|
|
1543
|
+
};
|
|
1544
|
+
candidateCounts.merged = summary.candidateCounts.merged;
|
|
1545
|
+
candidateCounts.thresholdQualified = summary.candidateCounts.thresholdQualified;
|
|
1546
|
+
candidateCounts.budgetAccepted = summary.candidateCounts.budgetAccepted;
|
|
1547
|
+
candidateCounts.finalRanked = summary.candidateCounts.finalRanked;
|
|
1548
|
+
candidateCounts.returned = summary.candidateCounts.returned;
|
|
1549
|
+
claimKey = {
|
|
1550
|
+
historicalBoosted: summary.claimKey.historicalBoosted,
|
|
1551
|
+
tentativeLineageSuppressed: summary.claimKey.tentativeLineageSuppressed,
|
|
1552
|
+
trustPenalized: summary.claimKey.trustPenalized,
|
|
1553
|
+
redundancyPenalized: summary.claimKey.redundancyPenalized
|
|
1554
|
+
};
|
|
1555
|
+
rrf = {
|
|
1556
|
+
applied: summary.rrf.applied,
|
|
1557
|
+
channelCount: summary.rrf.channelCount,
|
|
1558
|
+
rankConstant: summary.rrf.rankConstant,
|
|
1559
|
+
fusedCandidateCount: summary.rrf.fusedCandidateCount,
|
|
1560
|
+
maxFusedScore: summary.rrf.maxFusedScore
|
|
1561
|
+
};
|
|
1562
|
+
neighborhood = {
|
|
1563
|
+
expansionRequested: summary.neighborhood.expansionRequested,
|
|
1564
|
+
expansionAvailable: summary.neighborhood.expansionAvailable,
|
|
1565
|
+
familiesRequested: [...summary.neighborhood.familiesRequested],
|
|
1566
|
+
includeRetired: summary.neighborhood.includeRetired,
|
|
1567
|
+
seedIds: [...summary.neighborhood.seedIds],
|
|
1568
|
+
expansionCandidates: summary.neighborhood.expansionCandidates,
|
|
1569
|
+
strongSeedIds: [...summary.neighborhood.strongSeedIds],
|
|
1570
|
+
rerankBoostedIds: [...summary.neighborhood.rerankBoostedIds]
|
|
1571
|
+
};
|
|
1572
|
+
mmr = {
|
|
1573
|
+
applied: summary.mmr.applied,
|
|
1574
|
+
lambda: summary.mmr.lambda,
|
|
1575
|
+
droppedDuplicateCount: summary.mmr.droppedDuplicateCount,
|
|
1576
|
+
reorderedIds: [...summary.mmr.reorderedIds]
|
|
1577
|
+
};
|
|
1578
|
+
crossEncoder = {
|
|
1579
|
+
applied: summary.crossEncoder.applied,
|
|
1580
|
+
k: summary.crossEncoder.k,
|
|
1581
|
+
alpha: summary.crossEncoder.alpha,
|
|
1582
|
+
latencyMs: summary.crossEncoder.latencyMs,
|
|
1583
|
+
rescoredIds: [...summary.crossEncoder.rescoredIds],
|
|
1584
|
+
...summary.crossEncoder.degradedReason ? { degradedReason: summary.crossEncoder.degradedReason } : {}
|
|
1585
|
+
};
|
|
1586
|
+
degraded = {
|
|
1587
|
+
active: summary.degraded.active,
|
|
1588
|
+
reasons: [...summary.degraded.reasons],
|
|
1589
|
+
lexicalOnly: summary.degraded.lexicalOnly,
|
|
1590
|
+
notices: [...summary.degraded.notices]
|
|
1591
|
+
};
|
|
1592
|
+
stageTimings.mergeCandidatesMs = summary.timings.mergeCandidatesMs;
|
|
1593
|
+
stageTimings.scoreCandidatesMs = summary.timings.scoreCandidatesMs;
|
|
1594
|
+
stageTimings.thresholdMs = summary.timings.thresholdMs;
|
|
1595
|
+
stageTimings.budgetMs = summary.timings.budgetMs;
|
|
1596
|
+
stageTimings.shapeResultsMs = summary.timings.shapeResultsMs;
|
|
1597
|
+
}
|
|
1598
|
+
};
|
|
1599
|
+
return {
|
|
1600
|
+
traceSink,
|
|
1601
|
+
isObservationEnabled() {
|
|
1602
|
+
return observationEnabled;
|
|
1603
|
+
},
|
|
1604
|
+
buildObservedArtifactFacts() {
|
|
1605
|
+
return {
|
|
1606
|
+
candidateCounts: { ...candidateCounts },
|
|
1607
|
+
ranking: traceObserved && ranking ? { ...ranking } : void 0,
|
|
1608
|
+
degraded: traceObserved && degraded ? {
|
|
1609
|
+
active: degraded.active,
|
|
1610
|
+
reasons: [...degraded.reasons],
|
|
1611
|
+
lexicalOnly: degraded.lexicalOnly,
|
|
1612
|
+
notices: [...degraded.notices]
|
|
1613
|
+
} : void 0,
|
|
1614
|
+
traceObserved
|
|
1615
|
+
};
|
|
1616
|
+
},
|
|
1617
|
+
recordSandboxSetup(durationMs) {
|
|
1618
|
+
stageTimings.sandboxSetupMs = durationMs;
|
|
1619
|
+
},
|
|
1620
|
+
recordFixtureProvisionTiming(durationMs) {
|
|
1621
|
+
stageTimings.fixtureProvisionMs = durationMs;
|
|
1622
|
+
},
|
|
1623
|
+
recordProvision(result, durationMs) {
|
|
1624
|
+
provisionObserved = true;
|
|
1625
|
+
execution.provisionedCount = result.provisionedCount;
|
|
1626
|
+
stageTimings.fixtureProvisionMs = durationMs;
|
|
1627
|
+
provision = {
|
|
1628
|
+
requestedCount: request.memoryPool.length,
|
|
1629
|
+
provisionedCount: result.provisionedCount,
|
|
1630
|
+
providedIdCount: result.providedIdCount,
|
|
1631
|
+
generatedIdCount: result.generatedIdCount,
|
|
1632
|
+
retiredCount: result.retiredCount,
|
|
1633
|
+
supersededCount: result.supersededCount,
|
|
1634
|
+
createdAtDefaultedCount: result.createdAtDefaultedCount,
|
|
1635
|
+
updatedAtDefaultedCount: result.updatedAtDefaultedCount,
|
|
1636
|
+
seededEntries: result.seededEntries.map((entry) => ({
|
|
1637
|
+
id: entry.id,
|
|
1638
|
+
created_at: entry.created_at,
|
|
1639
|
+
updated_at: entry.updated_at,
|
|
1640
|
+
retired: entry.retired,
|
|
1641
|
+
superseded_by: entry.superseded_by,
|
|
1642
|
+
claim_key: entry.claim_key,
|
|
1643
|
+
claim_key_status: entry.claim_key_status,
|
|
1644
|
+
valid_from: entry.valid_from,
|
|
1645
|
+
valid_to: entry.valid_to
|
|
1646
|
+
}))
|
|
1647
|
+
};
|
|
1648
|
+
},
|
|
1649
|
+
recordRecall(durationMs) {
|
|
1650
|
+
stageTimings.recallMs = durationMs;
|
|
1651
|
+
},
|
|
1652
|
+
recordQueryEmbedding(params) {
|
|
1653
|
+
retrievalObserved = true;
|
|
1654
|
+
stageTimings.queryEmbeddingMs = params.durationMs;
|
|
1655
|
+
retrieval.queryEmbeddingDimensions = params.dimensions;
|
|
1656
|
+
},
|
|
1657
|
+
recordVectorSearch(params) {
|
|
1658
|
+
retrievalObserved = true;
|
|
1659
|
+
stageTimings.vectorSearchMs = params.durationMs;
|
|
1660
|
+
retrieval.vectorSearchLimit = params.limit;
|
|
1661
|
+
candidateCounts.vectorRetrieved = params.count;
|
|
1662
|
+
},
|
|
1663
|
+
recordLexicalSearch(params) {
|
|
1664
|
+
retrievalObserved = true;
|
|
1665
|
+
stageTimings.lexicalSearchMs = params.durationMs;
|
|
1666
|
+
retrieval.lexicalSearchLimit = params.limit;
|
|
1667
|
+
candidateCounts.lexicalRetrieved = params.count;
|
|
1668
|
+
},
|
|
1669
|
+
recordHydrateEntries(params) {
|
|
1670
|
+
retrievalObserved = true;
|
|
1671
|
+
stageTimings.hydrateEntriesMs = params.durationMs;
|
|
1672
|
+
candidateCounts.hydrated = params.count;
|
|
1673
|
+
},
|
|
1674
|
+
recordRecallTelemetry(params) {
|
|
1675
|
+
retrievalObserved = true;
|
|
1676
|
+
stageTimings.recordRecallEventsMs = params.durationMs;
|
|
1677
|
+
candidateCounts.telemetryAttempted = params.entryCount;
|
|
1678
|
+
},
|
|
1679
|
+
buildDiagnostics() {
|
|
1680
|
+
if (!diagnosticsRequested) {
|
|
1681
|
+
return void 0;
|
|
1682
|
+
}
|
|
1683
|
+
return {
|
|
1684
|
+
execution,
|
|
1685
|
+
provision: provisionObserved ? provision : void 0,
|
|
1686
|
+
retrieval: retrievalObserved ? retrieval : void 0,
|
|
1687
|
+
ranking: traceObserved ? ranking : void 0,
|
|
1688
|
+
filtering: traceObserved ? filtering : void 0,
|
|
1689
|
+
claimKey: traceObserved ? claimKey : void 0,
|
|
1690
|
+
rrf: traceObserved ? rrf : void 0,
|
|
1691
|
+
neighborhood: traceObserved ? neighborhood : void 0,
|
|
1692
|
+
mmr: traceObserved ? mmr : void 0,
|
|
1693
|
+
crossEncoder: traceObserved ? crossEncoder : void 0,
|
|
1694
|
+
degraded: traceObserved ? degraded : void 0,
|
|
1695
|
+
candidateCounts
|
|
1696
|
+
};
|
|
1697
|
+
},
|
|
1698
|
+
buildTimings(totalMs) {
|
|
1699
|
+
if (!timingsRequested) {
|
|
1700
|
+
return void 0;
|
|
1701
|
+
}
|
|
1702
|
+
return {
|
|
1703
|
+
totalMs,
|
|
1704
|
+
sandboxSetupMs: stageTimings.sandboxSetupMs,
|
|
1705
|
+
fixtureProvisionMs: stageTimings.fixtureProvisionMs,
|
|
1706
|
+
recallMs: stageTimings.recallMs,
|
|
1707
|
+
queryEmbeddingMs: stageTimings.queryEmbeddingMs,
|
|
1708
|
+
vectorSearchMs: stageTimings.vectorSearchMs,
|
|
1709
|
+
lexicalSearchMs: stageTimings.lexicalSearchMs,
|
|
1710
|
+
mergeCandidatesMs: stageTimings.mergeCandidatesMs,
|
|
1711
|
+
scoreCandidatesMs: stageTimings.scoreCandidatesMs,
|
|
1712
|
+
thresholdMs: stageTimings.thresholdMs,
|
|
1713
|
+
budgetMs: stageTimings.budgetMs,
|
|
1714
|
+
hydrateEntriesMs: stageTimings.hydrateEntriesMs,
|
|
1715
|
+
shapeResultsMs: stageTimings.shapeResultsMs,
|
|
1716
|
+
recordRecallEventsMs: stageTimings.recordRecallEventsMs
|
|
1717
|
+
};
|
|
1718
|
+
}
|
|
1719
|
+
};
|
|
1720
|
+
}
|
|
1721
|
+
function wantsRecallEvalDiagnostics(request) {
|
|
1722
|
+
return request.options?.includeDiagnostics === true || request.options?.includeCandidates === true;
|
|
1723
|
+
}
|
|
1724
|
+
|
|
1725
|
+
// src/app/evals/recall/instrumented-recall-ports.ts
|
|
1726
|
+
function createInstrumentedRecallPorts(ports, observer) {
|
|
1727
|
+
return {
|
|
1728
|
+
async embed(text) {
|
|
1729
|
+
const startedAt = Date.now();
|
|
1730
|
+
try {
|
|
1731
|
+
const embedding = await ports.embed(text);
|
|
1732
|
+
observer.recordQueryEmbedding({
|
|
1733
|
+
durationMs: elapsedMs2(startedAt),
|
|
1734
|
+
dimensions: embedding.length
|
|
1735
|
+
});
|
|
1736
|
+
return embedding;
|
|
1737
|
+
} catch (error) {
|
|
1738
|
+
observer.recordQueryEmbedding({
|
|
1739
|
+
durationMs: elapsedMs2(startedAt),
|
|
1740
|
+
dimensions: 0
|
|
1741
|
+
});
|
|
1742
|
+
throw error;
|
|
1743
|
+
}
|
|
1744
|
+
},
|
|
1745
|
+
async vectorSearch(params) {
|
|
1746
|
+
const startedAt = Date.now();
|
|
1747
|
+
try {
|
|
1748
|
+
const results = await ports.vectorSearch(params);
|
|
1749
|
+
observer.recordVectorSearch({
|
|
1750
|
+
durationMs: elapsedMs2(startedAt),
|
|
1751
|
+
count: results.length,
|
|
1752
|
+
limit: params.limit
|
|
1753
|
+
});
|
|
1754
|
+
return results;
|
|
1755
|
+
} catch (error) {
|
|
1756
|
+
observer.recordVectorSearch({
|
|
1757
|
+
durationMs: elapsedMs2(startedAt),
|
|
1758
|
+
count: 0,
|
|
1759
|
+
limit: params.limit
|
|
1760
|
+
});
|
|
1761
|
+
throw error;
|
|
1762
|
+
}
|
|
1763
|
+
},
|
|
1764
|
+
async ftsSearch(params) {
|
|
1765
|
+
const startedAt = Date.now();
|
|
1766
|
+
try {
|
|
1767
|
+
const results = await ports.ftsSearch(params);
|
|
1768
|
+
observer.recordLexicalSearch({
|
|
1769
|
+
durationMs: elapsedMs2(startedAt),
|
|
1770
|
+
count: results.length,
|
|
1771
|
+
limit: params.limit
|
|
1772
|
+
});
|
|
1773
|
+
return results;
|
|
1774
|
+
} catch (error) {
|
|
1775
|
+
observer.recordLexicalSearch({
|
|
1776
|
+
durationMs: elapsedMs2(startedAt),
|
|
1777
|
+
count: 0,
|
|
1778
|
+
limit: params.limit
|
|
1779
|
+
});
|
|
1780
|
+
throw error;
|
|
1781
|
+
}
|
|
1782
|
+
},
|
|
1783
|
+
...ports.expandNeighborhood ? {
|
|
1784
|
+
async expandNeighborhood(request) {
|
|
1785
|
+
return ports.expandNeighborhood(request);
|
|
1786
|
+
}
|
|
1787
|
+
} : {},
|
|
1788
|
+
// Cross-encoder is an optional port: proxy it when available so the
|
|
1789
|
+
// core recall pipeline sees the same rerank surface as in production,
|
|
1790
|
+
// and the diagnostics collector can report the `crossEncoder` trace
|
|
1791
|
+
// branch exactly as the core emits it. Dropping the proxy here would
|
|
1792
|
+
// silently convert rerank-aware eval cases into rerank-disabled runs.
|
|
1793
|
+
...ports.crossEncoder ? {
|
|
1794
|
+
crossEncoder: ports.crossEncoder
|
|
1795
|
+
} : {},
|
|
1796
|
+
async hydrateEntries(ids) {
|
|
1797
|
+
const startedAt = Date.now();
|
|
1798
|
+
try {
|
|
1799
|
+
const entries = await ports.hydrateEntries(ids);
|
|
1800
|
+
observer.recordHydrateEntries({
|
|
1801
|
+
durationMs: elapsedMs2(startedAt),
|
|
1802
|
+
count: entries.length
|
|
1803
|
+
});
|
|
1804
|
+
return entries;
|
|
1805
|
+
} catch (error) {
|
|
1806
|
+
observer.recordHydrateEntries({
|
|
1807
|
+
durationMs: elapsedMs2(startedAt),
|
|
1808
|
+
count: 0
|
|
1809
|
+
});
|
|
1810
|
+
throw error;
|
|
1811
|
+
}
|
|
1812
|
+
},
|
|
1813
|
+
async recordRecallEvents(params) {
|
|
1814
|
+
const startedAt = Date.now();
|
|
1815
|
+
try {
|
|
1816
|
+
await ports.recordRecallEvents(params);
|
|
1817
|
+
} finally {
|
|
1818
|
+
observer.recordRecallTelemetry({
|
|
1819
|
+
durationMs: elapsedMs2(startedAt),
|
|
1820
|
+
entryCount: params.entryIds.length
|
|
1821
|
+
});
|
|
1822
|
+
}
|
|
1823
|
+
}
|
|
1824
|
+
};
|
|
1825
|
+
}
|
|
1826
|
+
function elapsedMs2(startedAt) {
|
|
1827
|
+
return Math.max(0, Date.now() - startedAt);
|
|
1828
|
+
}
|
|
1829
|
+
|
|
1830
|
+
// src/app/evals/recall/build-debug-artifact.ts
|
|
1831
|
+
function buildRecallDebugArtifact(params) {
|
|
1832
|
+
const { request, results, projectedEntries, sandbox, observed } = params;
|
|
1833
|
+
const recallPath = request.recallPath ?? "core";
|
|
1834
|
+
const entryResults = Array.isArray(results) ? results : results.entries;
|
|
1835
|
+
const selectedEntryIds = entryResults.map((result) => result.entry.id);
|
|
1836
|
+
const topK = resolveTopK2(request.options?.topKCandidates);
|
|
1837
|
+
const reasonsByEntryId = /* @__PURE__ */ new Map();
|
|
1838
|
+
for (const entry of projectedEntries) {
|
|
1839
|
+
if (entry.whySurfaced.reasons.length > 0) {
|
|
1840
|
+
reasonsByEntryId.set(entry.entryId, [...entry.whySurfaced.reasons]);
|
|
1841
|
+
}
|
|
1842
|
+
}
|
|
1843
|
+
const topCandidates = buildTopCandidates(entryResults, reasonsByEntryId, topK);
|
|
1844
|
+
const artifact = {
|
|
1845
|
+
schemaVersion: "recall-debug-artifact.v1",
|
|
1846
|
+
caseId: request.caseId,
|
|
1847
|
+
...sandbox.snapshot ? { snapshot: buildSnapshot2(sandbox.snapshot) } : {},
|
|
1848
|
+
request: {
|
|
1849
|
+
recallPath,
|
|
1850
|
+
query: request.recallRequest.text
|
|
1851
|
+
},
|
|
1852
|
+
...Array.isArray(results) ? {} : { routing: results.routing },
|
|
1853
|
+
...observed.traceObserved ? { candidateCounts: observed.candidateCounts } : {},
|
|
1854
|
+
...observed.ranking ? { ranking: observed.ranking } : {},
|
|
1855
|
+
...observed.degraded ? { degraded: observed.degraded } : {},
|
|
1856
|
+
selectedEntryIds,
|
|
1857
|
+
...topCandidates.length > 0 ? { topCandidates } : {}
|
|
1858
|
+
};
|
|
1859
|
+
return artifact;
|
|
1860
|
+
}
|
|
1861
|
+
function buildSnapshot2(snapshot) {
|
|
1862
|
+
return {
|
|
1863
|
+
...snapshot.id !== void 0 ? { id: snapshot.id } : {},
|
|
1864
|
+
...snapshot.label !== void 0 ? { label: snapshot.label } : {},
|
|
1865
|
+
dbPathBasename: snapshot.dbPathBasename
|
|
1866
|
+
};
|
|
1867
|
+
}
|
|
1868
|
+
function resolveTopK2(requested) {
|
|
1869
|
+
if (requested === void 0) {
|
|
1870
|
+
return RECALL_DEBUG_ARTIFACT_DEFAULT_TOP_K;
|
|
1871
|
+
}
|
|
1872
|
+
if (!Number.isFinite(requested) || !Number.isInteger(requested)) {
|
|
1873
|
+
return RECALL_DEBUG_ARTIFACT_DEFAULT_TOP_K;
|
|
1874
|
+
}
|
|
1875
|
+
if (requested < 1) {
|
|
1876
|
+
return 1;
|
|
1877
|
+
}
|
|
1878
|
+
if (requested > RECALL_DEBUG_ARTIFACT_MAX_TOP_K) {
|
|
1879
|
+
return RECALL_DEBUG_ARTIFACT_MAX_TOP_K;
|
|
1880
|
+
}
|
|
1881
|
+
return requested;
|
|
1882
|
+
}
|
|
1883
|
+
function buildTopCandidates(entryResults, reasonsByEntryId, topK) {
|
|
1884
|
+
const sliced = entryResults.slice(0, topK);
|
|
1885
|
+
return sliced.map((result) => {
|
|
1886
|
+
const reasons = reasonsByEntryId.get(result.entry.id);
|
|
1887
|
+
return {
|
|
1888
|
+
id: result.entry.id,
|
|
1889
|
+
score: result.score,
|
|
1890
|
+
lexicalScore: result.scores.lexical,
|
|
1891
|
+
vectorScore: result.scores.vector,
|
|
1892
|
+
recencyScore: result.scores.recency,
|
|
1893
|
+
importanceScore: result.scores.importance,
|
|
1894
|
+
...reasons && reasons.length > 0 ? { reasons } : {}
|
|
1895
|
+
};
|
|
1896
|
+
});
|
|
1897
|
+
}
|
|
1898
|
+
|
|
1899
|
+
// src/app/evals/recall/normalize-response.ts
|
|
1900
|
+
function buildRecallEvalSuccessResponse(params) {
|
|
1901
|
+
const entryResults = Array.isArray(params.results) ? params.results : params.results.entries;
|
|
1902
|
+
const projectedEntries = Array.isArray(params.results) ? entryResults.map((result) => projectClaimCentricRecallEntry(result, { asOf: params.request.recallRequest.asOf })) : params.results.projectedEntries;
|
|
1903
|
+
const metadata = buildMetadata(params.request, params.results, projectedEntries);
|
|
1904
|
+
const debugArtifact = params.request.options?.includeDebugArtifact === true && params.observedArtifactFacts ? buildRecallDebugArtifact({
|
|
1905
|
+
request: params.request,
|
|
1906
|
+
results: params.results,
|
|
1907
|
+
projectedEntries,
|
|
1908
|
+
sandbox: params.sandbox,
|
|
1909
|
+
observed: params.observedArtifactFacts
|
|
1910
|
+
}) : void 0;
|
|
1911
|
+
return {
|
|
1912
|
+
status: "ok",
|
|
1913
|
+
caseId: params.request.caseId,
|
|
1914
|
+
result: {
|
|
1915
|
+
entries: entryResults.map((result, index) => ({
|
|
1916
|
+
id: result.entry.id,
|
|
1917
|
+
subject: result.entry.subject,
|
|
1918
|
+
content: result.entry.content,
|
|
1919
|
+
type: result.entry.type,
|
|
1920
|
+
importance: result.entry.importance,
|
|
1921
|
+
expiry: result.entry.expiry,
|
|
1922
|
+
tags: result.entry.tags,
|
|
1923
|
+
created_at: result.entry.created_at,
|
|
1924
|
+
score: result.score,
|
|
1925
|
+
scores: result.scores,
|
|
1926
|
+
claim: {
|
|
1927
|
+
familyKey: projectedEntries[index]?.familyKey ?? `entry:${result.entry.id}`,
|
|
1928
|
+
claimKey: projectedEntries[index]?.claimKey,
|
|
1929
|
+
slotPolicy: projectedEntries[index]?.slotPolicy ?? "exclusive",
|
|
1930
|
+
memoryState: projectedEntries[index]?.memoryState ?? "current",
|
|
1931
|
+
claimStatus: projectedEntries[index]?.claimStatus ?? "no_key",
|
|
1932
|
+
freshness: projectedEntries[index]?.freshness ?? {
|
|
1933
|
+
createdAt: result.entry.created_at,
|
|
1934
|
+
isCurrent: true,
|
|
1935
|
+
label: `created ${result.entry.created_at} | current state`
|
|
1936
|
+
},
|
|
1937
|
+
provenance: projectedEntries[index]?.provenance ?? {},
|
|
1938
|
+
whySurfaced: projectedEntries[index]?.whySurfaced ?? {
|
|
1939
|
+
summary: `ranked score ${result.score.toFixed(2)}`,
|
|
1940
|
+
reasons: []
|
|
1941
|
+
}
|
|
1942
|
+
}
|
|
1943
|
+
})),
|
|
1944
|
+
entryIds: entryResults.map((result) => result.entry.id)
|
|
1945
|
+
},
|
|
1946
|
+
metadata,
|
|
1947
|
+
diagnostics: params.diagnostics,
|
|
1948
|
+
timings: params.timings,
|
|
1949
|
+
sandbox: buildSandboxResult2(params.sandbox),
|
|
1950
|
+
...debugArtifact ? { debugArtifact } : {}
|
|
1951
|
+
};
|
|
1952
|
+
}
|
|
1953
|
+
function buildRecallEvalErrorResponse(params) {
|
|
1954
|
+
return {
|
|
1955
|
+
status: "error",
|
|
1956
|
+
caseId: params.request.caseId,
|
|
1957
|
+
error: {
|
|
1958
|
+
code: params.code,
|
|
1959
|
+
message: params.message,
|
|
1960
|
+
details: params.details
|
|
1961
|
+
},
|
|
1962
|
+
diagnostics: params.diagnostics,
|
|
1963
|
+
timings: params.timings,
|
|
1964
|
+
sandbox: params.sandbox ? buildSandboxResult2(params.sandbox) : void 0
|
|
1965
|
+
};
|
|
1966
|
+
}
|
|
1967
|
+
function buildSandboxResult2(sandbox) {
|
|
1968
|
+
return {
|
|
1969
|
+
root: sandbox.root,
|
|
1970
|
+
dbPath: sandbox.dbPath,
|
|
1971
|
+
preserved: sandbox.preserved,
|
|
1972
|
+
...sandbox.snapshot ? { snapshot: sandbox.snapshot } : {}
|
|
1973
|
+
};
|
|
1974
|
+
}
|
|
1975
|
+
function buildMetadata(request, results, projectedEntries) {
|
|
1976
|
+
if (Array.isArray(results)) {
|
|
1977
|
+
return {
|
|
1978
|
+
path: request.recallPath ?? "core",
|
|
1979
|
+
claim: {
|
|
1980
|
+
projectedEntries: projectedEntries.map(buildProjectedEntryMetadata)
|
|
1981
|
+
}
|
|
1982
|
+
};
|
|
1983
|
+
}
|
|
1984
|
+
return {
|
|
1985
|
+
path: "unified",
|
|
1986
|
+
claim: {
|
|
1987
|
+
projectedEntries: projectedEntries.map(buildProjectedEntryMetadata),
|
|
1988
|
+
entryFamilies: results.entryFamilies.map(buildClaimFamilyMetadata),
|
|
1989
|
+
transitions: results.claimTransitions
|
|
1990
|
+
},
|
|
1991
|
+
unified: {
|
|
1992
|
+
routing: results.routing,
|
|
1993
|
+
timeWindow: results.timeWindow,
|
|
1994
|
+
asOf: results.asOf,
|
|
1995
|
+
procedure: results.procedure ? {
|
|
1996
|
+
id: results.procedure.id,
|
|
1997
|
+
procedureKey: results.procedure.procedure_key,
|
|
1998
|
+
title: results.procedure.title,
|
|
1999
|
+
goal: results.procedure.goal
|
|
2000
|
+
} : void 0,
|
|
2001
|
+
procedureCandidates: results.procedureCandidates.map((candidate) => ({
|
|
2002
|
+
id: candidate.procedure.id,
|
|
2003
|
+
procedureKey: candidate.procedure.procedure_key,
|
|
2004
|
+
title: candidate.procedure.title,
|
|
2005
|
+
score: candidate.score,
|
|
2006
|
+
lexicalScore: candidate.scores.lexical,
|
|
2007
|
+
vectorScore: candidate.scores.vector
|
|
2008
|
+
})),
|
|
2009
|
+
procedureNotices: results.procedureNotices,
|
|
2010
|
+
notices: results.notices,
|
|
2011
|
+
episodeCount: results.episodes.length
|
|
2012
|
+
}
|
|
2013
|
+
};
|
|
2014
|
+
}
|
|
2015
|
+
function buildProjectedEntryMetadata(entry) {
|
|
2016
|
+
return {
|
|
2017
|
+
entryId: entry.entryId,
|
|
2018
|
+
familyKey: entry.familyKey,
|
|
2019
|
+
claimKey: entry.claimKey,
|
|
2020
|
+
slotPolicy: entry.slotPolicy,
|
|
2021
|
+
memoryState: entry.memoryState,
|
|
2022
|
+
claimStatus: entry.claimStatus,
|
|
2023
|
+
freshness: entry.freshness,
|
|
2024
|
+
provenance: entry.provenance,
|
|
2025
|
+
whySurfaced: entry.whySurfaced
|
|
2026
|
+
};
|
|
2027
|
+
}
|
|
2028
|
+
function buildClaimFamilyMetadata(family) {
|
|
2029
|
+
return {
|
|
2030
|
+
familyKey: family.familyKey,
|
|
2031
|
+
claimKey: family.claimKey,
|
|
2032
|
+
slotPolicy: family.slotPolicy,
|
|
2033
|
+
subject: family.subject,
|
|
2034
|
+
primaryEntryId: family.primary.entryId,
|
|
2035
|
+
entries: family.entries.map((entry) => ({
|
|
2036
|
+
id: entry.entryId,
|
|
2037
|
+
memoryState: entry.memoryState,
|
|
2038
|
+
claimStatus: entry.claimStatus
|
|
2039
|
+
}))
|
|
2040
|
+
};
|
|
2041
|
+
}
|
|
2042
|
+
|
|
2043
|
+
// src/app/evals/recall/run-recall-eval-case.ts
|
|
2044
|
+
async function runRecallEvalCase(request, dependencies = {}) {
|
|
2045
|
+
const startedAt = Date.now();
|
|
2046
|
+
const provisionedAt = new Date(startedAt).toISOString();
|
|
2047
|
+
const diagnostics = createRecallEvalDiagnosticsCollector(request);
|
|
2048
|
+
const recallPath = request.recallPath ?? "core";
|
|
2049
|
+
let sandbox;
|
|
2050
|
+
let sharedEmbeddingPort;
|
|
2051
|
+
let sharedEmbeddingError;
|
|
2052
|
+
const getEmbeddingSupport = () => {
|
|
2053
|
+
if (sharedEmbeddingPort) {
|
|
2054
|
+
return {
|
|
2055
|
+
available: true,
|
|
2056
|
+
port: sharedEmbeddingPort
|
|
2057
|
+
};
|
|
2058
|
+
}
|
|
2059
|
+
if (sharedEmbeddingError) {
|
|
2060
|
+
return {
|
|
2061
|
+
available: false,
|
|
2062
|
+
error: sharedEmbeddingError
|
|
2063
|
+
};
|
|
2064
|
+
}
|
|
2065
|
+
const config = readConfig();
|
|
2066
|
+
try {
|
|
2067
|
+
sharedEmbeddingPort = createEmbeddingClient(resolveEmbeddingApiKey(config), resolveEmbeddingModel(config));
|
|
2068
|
+
return {
|
|
2069
|
+
available: true,
|
|
2070
|
+
port: sharedEmbeddingPort
|
|
2071
|
+
};
|
|
2072
|
+
} catch (error) {
|
|
2073
|
+
sharedEmbeddingError = error instanceof Error ? error.message : String(error);
|
|
2074
|
+
return {
|
|
2075
|
+
available: false,
|
|
2076
|
+
error: sharedEmbeddingError
|
|
2077
|
+
};
|
|
2078
|
+
}
|
|
2079
|
+
};
|
|
2080
|
+
const getEmbeddingPort = () => {
|
|
2081
|
+
const support = getEmbeddingSupport();
|
|
2082
|
+
if (!support.port) {
|
|
2083
|
+
throw new Error(support.error ?? "Embeddings are unavailable.");
|
|
2084
|
+
}
|
|
2085
|
+
return support.port;
|
|
2086
|
+
};
|
|
2087
|
+
try {
|
|
2088
|
+
const sandboxStartedAt = Date.now();
|
|
2089
|
+
try {
|
|
2090
|
+
sandbox = await setupRecallEvalSandbox(request.sandbox);
|
|
2091
|
+
diagnostics.recordSandboxSetup(elapsedMs3(sandboxStartedAt));
|
|
2092
|
+
} catch (error) {
|
|
2093
|
+
diagnostics.recordSandboxSetup(elapsedMs3(sandboxStartedAt));
|
|
2094
|
+
return buildRecallEvalErrorResponse({
|
|
2095
|
+
request,
|
|
2096
|
+
code: "sandbox_setup_failed",
|
|
2097
|
+
message: "Failed to create isolated recall eval sandbox.",
|
|
2098
|
+
details: toErrorDetails2(error),
|
|
2099
|
+
diagnostics: diagnostics.buildDiagnostics(),
|
|
2100
|
+
timings: diagnostics.buildTimings(elapsedMs3(startedAt))
|
|
2101
|
+
});
|
|
2102
|
+
}
|
|
2103
|
+
if (request.memoryPool.length > 0 || (request.procedurePool?.length ?? 0) > 0) {
|
|
2104
|
+
const provisionStartedAt = Date.now();
|
|
2105
|
+
try {
|
|
2106
|
+
let entryProvisionResult;
|
|
2107
|
+
if (request.memoryPool.length > 0) {
|
|
2108
|
+
entryProvisionResult = await provisionRecallEvalFixtures({
|
|
2109
|
+
caseId: request.caseId,
|
|
2110
|
+
memoryPool: request.memoryPool,
|
|
2111
|
+
store: sandbox.fixtureStore,
|
|
2112
|
+
embedding: getEmbeddingPort(),
|
|
2113
|
+
provisionedAt
|
|
2114
|
+
});
|
|
2115
|
+
}
|
|
2116
|
+
if ((request.procedurePool?.length ?? 0) > 0) {
|
|
2117
|
+
await provisionRecallEvalProcedureFixtures({
|
|
2118
|
+
caseId: request.caseId,
|
|
2119
|
+
procedurePool: request.procedurePool ?? [],
|
|
2120
|
+
store: sandbox.fixtureStore,
|
|
2121
|
+
provisionedAt
|
|
2122
|
+
});
|
|
2123
|
+
}
|
|
2124
|
+
if (entryProvisionResult) {
|
|
2125
|
+
diagnostics.recordProvision(entryProvisionResult, elapsedMs3(provisionStartedAt));
|
|
2126
|
+
} else {
|
|
2127
|
+
diagnostics.recordFixtureProvisionTiming(elapsedMs3(provisionStartedAt));
|
|
2128
|
+
}
|
|
2129
|
+
} catch (error) {
|
|
2130
|
+
diagnostics.recordFixtureProvisionTiming(elapsedMs3(provisionStartedAt));
|
|
2131
|
+
return buildRecallEvalErrorResponse({
|
|
2132
|
+
request,
|
|
2133
|
+
code: "fixture_provision_failed",
|
|
2134
|
+
message: "Failed to provision recall eval fixtures into isolated storage.",
|
|
2135
|
+
details: toErrorDetails2(error),
|
|
2136
|
+
diagnostics: diagnostics.buildDiagnostics(),
|
|
2137
|
+
timings: diagnostics.buildTimings(elapsedMs3(startedAt)),
|
|
2138
|
+
sandbox
|
|
2139
|
+
});
|
|
2140
|
+
}
|
|
2141
|
+
}
|
|
2142
|
+
const recallStartedAt = Date.now();
|
|
2143
|
+
try {
|
|
2144
|
+
const embeddingSupport = getEmbeddingSupport();
|
|
2145
|
+
const recallEmbeddingPort = request.options?.faultInjection?.queryEmbeddingFailure === true ? createUnavailableEmbeddingPort2("Injected recall eval query embedding failure.") : embeddingSupport.port ?? createUnavailableEmbeddingPort2(embeddingSupport.error ?? "Embeddings are unavailable.");
|
|
2146
|
+
const sandboxPorts = sandbox.createRecallPorts(recallEmbeddingPort);
|
|
2147
|
+
const portsWithCrossEncoder = attachCrossEncoderPort(sandboxPorts, dependencies.crossEncoder);
|
|
2148
|
+
const telemetryGatedPorts = applyTelemetryWriteGate(portsWithCrossEncoder, sandbox);
|
|
2149
|
+
const basePorts = applyRecallEvalFaultInjection(telemetryGatedPorts, request);
|
|
2150
|
+
const recallPorts = diagnostics.isObservationEnabled() ? createInstrumentedRecallPorts(basePorts, diagnostics) : basePorts;
|
|
2151
|
+
const slotPolicyConfig = request.unified?.memoryPolicy?.slotPolicies;
|
|
2152
|
+
const rankingPolicy = request.recallRequest.rankingPolicy;
|
|
2153
|
+
const unifiedRecallOptions = {
|
|
2154
|
+
...slotPolicyConfig ? { slotPolicyConfig } : {},
|
|
2155
|
+
...rankingPolicy ? { rankingPolicy } : {},
|
|
2156
|
+
...diagnostics.isObservationEnabled() ? { trace: diagnostics.traceSink } : {}
|
|
2157
|
+
};
|
|
2158
|
+
const coreRecallOptions = {
|
|
2159
|
+
...rankingPolicy ? { rankingPolicy } : {},
|
|
2160
|
+
...diagnostics.isObservationEnabled() ? { trace: diagnostics.traceSink } : {}
|
|
2161
|
+
};
|
|
2162
|
+
const results = recallPath === "unified" ? await runUnifiedRecall(
|
|
2163
|
+
{
|
|
2164
|
+
text: request.recallRequest.text,
|
|
2165
|
+
...request.unified?.mode ? { mode: request.unified.mode } : {},
|
|
2166
|
+
...request.recallRequest.limit !== void 0 ? { limit: request.recallRequest.limit } : {},
|
|
2167
|
+
...request.recallRequest.threshold !== void 0 ? { threshold: request.recallRequest.threshold } : {},
|
|
2168
|
+
...request.recallRequest.types && request.recallRequest.types.length > 0 ? { types: request.recallRequest.types } : {},
|
|
2169
|
+
...request.recallRequest.tags && request.recallRequest.tags.length > 0 ? { tags: request.recallRequest.tags } : {},
|
|
2170
|
+
...request.recallRequest.asOf ? { asOf: request.recallRequest.asOf } : {},
|
|
2171
|
+
...request.unified?.sessionKey ? { sessionKey: request.unified.sessionKey } : {}
|
|
2172
|
+
},
|
|
2173
|
+
{
|
|
2174
|
+
database: sandbox.episodeDatabase,
|
|
2175
|
+
procedures: sandbox.procedureDatabase,
|
|
2176
|
+
recall: recallPorts,
|
|
2177
|
+
embeddingAvailable: embeddingSupport.available,
|
|
2178
|
+
...embeddingSupport.error ? { embeddingError: embeddingSupport.error } : {},
|
|
2179
|
+
...slotPolicyConfig ? { claimSlotPolicyConfig: slotPolicyConfig } : {},
|
|
2180
|
+
...embeddingSupport.available ? {
|
|
2181
|
+
embedQuery: async (text) => {
|
|
2182
|
+
const vectors = await recallEmbeddingPort.embed([text]);
|
|
2183
|
+
return vectors[0] ?? [];
|
|
2184
|
+
}
|
|
2185
|
+
} : {},
|
|
2186
|
+
...Object.keys(unifiedRecallOptions).length > 0 ? { recallOptions: unifiedRecallOptions } : {}
|
|
2187
|
+
}
|
|
2188
|
+
) : await recall(request.recallRequest, recallPorts, Object.keys(coreRecallOptions).length > 0 ? coreRecallOptions : void 0);
|
|
2189
|
+
diagnostics.recordRecall(elapsedMs3(recallStartedAt));
|
|
2190
|
+
return buildRecallEvalSuccessResponse({
|
|
2191
|
+
request,
|
|
2192
|
+
results,
|
|
2193
|
+
diagnostics: diagnostics.buildDiagnostics(),
|
|
2194
|
+
timings: diagnostics.buildTimings(elapsedMs3(startedAt)),
|
|
2195
|
+
sandbox,
|
|
2196
|
+
observedArtifactFacts: request.options?.includeDebugArtifact === true ? diagnostics.buildObservedArtifactFacts() : void 0
|
|
2197
|
+
});
|
|
2198
|
+
} catch (error) {
|
|
2199
|
+
diagnostics.recordRecall(elapsedMs3(recallStartedAt));
|
|
2200
|
+
return buildRecallEvalErrorResponse({
|
|
2201
|
+
request,
|
|
2202
|
+
code: "recall_execution_failed",
|
|
2203
|
+
message: "Failed to execute real recall against isolated eval state.",
|
|
2204
|
+
details: toErrorDetails2(error),
|
|
2205
|
+
diagnostics: diagnostics.buildDiagnostics(),
|
|
2206
|
+
timings: diagnostics.buildTimings(elapsedMs3(startedAt)),
|
|
2207
|
+
sandbox
|
|
2208
|
+
});
|
|
2209
|
+
}
|
|
2210
|
+
} catch (error) {
|
|
2211
|
+
return buildRecallEvalErrorResponse({
|
|
2212
|
+
request,
|
|
2213
|
+
code: "internal_error",
|
|
2214
|
+
message: "Recall eval execution failed unexpectedly.",
|
|
2215
|
+
details: toErrorDetails2(error),
|
|
2216
|
+
diagnostics: diagnostics.buildDiagnostics(),
|
|
2217
|
+
timings: diagnostics.buildTimings(elapsedMs3(startedAt)),
|
|
2218
|
+
sandbox
|
|
2219
|
+
});
|
|
2220
|
+
} finally {
|
|
2221
|
+
await sandbox?.cleanup().catch(() => void 0);
|
|
2222
|
+
}
|
|
2223
|
+
}
|
|
2224
|
+
function toErrorDetails2(error) {
|
|
2225
|
+
if (error instanceof Error) {
|
|
2226
|
+
return {
|
|
2227
|
+
cause: error.message
|
|
2228
|
+
};
|
|
2229
|
+
}
|
|
2230
|
+
return {
|
|
2231
|
+
cause: String(error)
|
|
2232
|
+
};
|
|
2233
|
+
}
|
|
2234
|
+
function createUnavailableEmbeddingPort2(message) {
|
|
2235
|
+
return {
|
|
2236
|
+
async embed() {
|
|
2237
|
+
throw new Error(message);
|
|
2238
|
+
}
|
|
2239
|
+
};
|
|
2240
|
+
}
|
|
2241
|
+
function applyRecallEvalFaultInjection(ports, request) {
|
|
2242
|
+
if (request.options?.faultInjection?.vectorSearchFailure !== true) {
|
|
2243
|
+
return ports;
|
|
2244
|
+
}
|
|
2245
|
+
return {
|
|
2246
|
+
async embed(text) {
|
|
2247
|
+
return ports.embed(text);
|
|
2248
|
+
},
|
|
2249
|
+
async vectorSearch() {
|
|
2250
|
+
throw new Error("Injected recall eval vector search failure.");
|
|
2251
|
+
},
|
|
2252
|
+
async ftsSearch(params) {
|
|
2253
|
+
return ports.ftsSearch(params);
|
|
2254
|
+
},
|
|
2255
|
+
...ports.expandNeighborhood ? {
|
|
2256
|
+
async expandNeighborhood(request2) {
|
|
2257
|
+
return ports.expandNeighborhood(request2);
|
|
2258
|
+
}
|
|
2259
|
+
} : {},
|
|
2260
|
+
// Cross-encoder remains wired during fault injection so rerank-aware
|
|
2261
|
+
// cases can still observe the rerank stage running after the core
|
|
2262
|
+
// recall pipeline falls back to lexical-only retrieval. Preserving
|
|
2263
|
+
// the port is safe because the core helper fails closed on adapter
|
|
2264
|
+
// errors and honors the ranking policy kill switch.
|
|
2265
|
+
...ports.crossEncoder ? {
|
|
2266
|
+
crossEncoder: ports.crossEncoder
|
|
2267
|
+
} : {},
|
|
2268
|
+
async hydrateEntries(ids) {
|
|
2269
|
+
return ports.hydrateEntries(ids);
|
|
2270
|
+
},
|
|
2271
|
+
async recordRecallEvents(params) {
|
|
2272
|
+
return ports.recordRecallEvents(params);
|
|
2273
|
+
}
|
|
2274
|
+
};
|
|
2275
|
+
}
|
|
2276
|
+
function elapsedMs3(startedAt) {
|
|
2277
|
+
return Math.max(0, Date.now() - startedAt);
|
|
2278
|
+
}
|
|
2279
|
+
|
|
2280
|
+
// src/adapters/api/validation/recall-eval-request.ts
|
|
2281
|
+
var ROOT_REQUEST_KEYS2 = /* @__PURE__ */ new Set(["caseId", "description", "recallPath", "sandbox", "memoryPool", "recallRequest", "unified", "options"]);
|
|
2282
|
+
var RECALL_REQUEST_KEYS = /* @__PURE__ */ new Set([
|
|
2283
|
+
"text",
|
|
2284
|
+
"limit",
|
|
2285
|
+
"threshold",
|
|
2286
|
+
"budget",
|
|
2287
|
+
"types",
|
|
2288
|
+
"tags",
|
|
2289
|
+
"since",
|
|
2290
|
+
"until",
|
|
2291
|
+
"around",
|
|
2292
|
+
"aroundRadius",
|
|
2293
|
+
"asOf",
|
|
2294
|
+
"rankingProfile",
|
|
2295
|
+
"rankingPolicy"
|
|
2296
|
+
]);
|
|
2297
|
+
var RANKING_POLICY_KEYS = /* @__PURE__ */ new Set([
|
|
2298
|
+
"rrf",
|
|
2299
|
+
"rrfRankConstant",
|
|
2300
|
+
"rrfSmallPoolRankConstant",
|
|
2301
|
+
"neighborhood",
|
|
2302
|
+
"mmr",
|
|
2303
|
+
"mmrLambda",
|
|
2304
|
+
"mmrMinPoolSize",
|
|
2305
|
+
"crossEncoder",
|
|
2306
|
+
"crossEncoderTopK",
|
|
2307
|
+
"crossEncoderAlpha"
|
|
2308
|
+
]);
|
|
2309
|
+
var UNIFIED_REQUEST_KEYS = /* @__PURE__ */ new Set(["mode", "sessionKey", "memoryPolicy"]);
|
|
2310
|
+
var UNIFIED_MEMORY_POLICY_KEYS = /* @__PURE__ */ new Set(["slotPolicies"]);
|
|
2311
|
+
var SLOT_POLICY_KEYS = /* @__PURE__ */ new Set(["attributeHeads"]);
|
|
2312
|
+
var OPTIONS_KEYS2 = /* @__PURE__ */ new Set(["includeDiagnostics", "includeCandidates", "includeTimings", "includeDebugArtifact", "topKCandidates", "faultInjection"]);
|
|
2313
|
+
var FAULT_INJECTION_KEYS = /* @__PURE__ */ new Set(["queryEmbeddingFailure", "vectorSearchFailure"]);
|
|
2314
|
+
var RECALL_PATHS = ["core", "unified"];
|
|
2315
|
+
var RECALL_RANKING_PROFILES = ["historical_state"];
|
|
2316
|
+
var RANKING_POLICY_TOGGLES = ["enabled", "disabled"];
|
|
2317
|
+
var UNIFIED_RECALL_MODES = ["auto", "entries", "episodes"];
|
|
2318
|
+
var CLAIM_SLOT_POLICIES = ["exclusive", "multivalued"];
|
|
2319
|
+
var RecallEvalRequestValidationError = class extends Error {
|
|
2320
|
+
/** Parseable case identifier echoed for invalid request correlation when available. */
|
|
2321
|
+
caseId;
|
|
2322
|
+
/** Structured list of request validation issues. */
|
|
2323
|
+
issues;
|
|
2324
|
+
/**
|
|
2325
|
+
* Creates a request validation error with stable issue details.
|
|
2326
|
+
*
|
|
2327
|
+
* @param issues - Structured validation issues collected during parsing.
|
|
2328
|
+
* @param caseId - Parseable request case identifier when available.
|
|
2329
|
+
*/
|
|
2330
|
+
constructor(issues, caseId) {
|
|
2331
|
+
super("Invalid recall eval request.");
|
|
2332
|
+
this.name = "RecallEvalRequestValidationError";
|
|
2333
|
+
this.issues = issues;
|
|
2334
|
+
this.caseId = caseId;
|
|
2335
|
+
}
|
|
2336
|
+
};
|
|
2337
|
+
function parseRecallEvalCaseRequest(input) {
|
|
2338
|
+
const caseId = extractParseableCaseId(input);
|
|
2339
|
+
if (!isRecord(input)) {
|
|
2340
|
+
throw new RecallEvalRequestValidationError(
|
|
2341
|
+
[
|
|
2342
|
+
{
|
|
2343
|
+
path: "$",
|
|
2344
|
+
message: "Request body must be a JSON object."
|
|
2345
|
+
}
|
|
2346
|
+
],
|
|
2347
|
+
caseId
|
|
2348
|
+
);
|
|
2349
|
+
}
|
|
2350
|
+
const issues = [];
|
|
2351
|
+
pushUnexpectedFields(input, ROOT_REQUEST_KEYS2, "", issues);
|
|
2352
|
+
const parsedCaseId = parseRequiredTrimmedString(input.caseId, "caseId", issues);
|
|
2353
|
+
const description = parseOptionalTrimmedString(input.description, "description", issues);
|
|
2354
|
+
const recallPath = parseOptionalRecallPath(input.recallPath, "recallPath", issues);
|
|
2355
|
+
const sandbox = parseSandbox(input.sandbox, issues);
|
|
2356
|
+
const memoryPool = parseMemoryPool(input.memoryPool, issues);
|
|
2357
|
+
const recallRequest = parseRecallRequest(input.recallRequest, issues);
|
|
2358
|
+
const unified = parseUnifiedRequest(input.unified, issues);
|
|
2359
|
+
const options = parseOptions2(input.options, issues);
|
|
2360
|
+
validatePathSpecificRequest(recallPath, recallRequest, unified, issues);
|
|
2361
|
+
if (issues.length > 0 || parsedCaseId === void 0 || memoryPool === void 0 || recallRequest === void 0) {
|
|
2362
|
+
throw new RecallEvalRequestValidationError(issues, caseId);
|
|
2363
|
+
}
|
|
2364
|
+
return {
|
|
2365
|
+
caseId: parsedCaseId,
|
|
2366
|
+
description,
|
|
2367
|
+
recallPath,
|
|
2368
|
+
sandbox,
|
|
2369
|
+
memoryPool,
|
|
2370
|
+
recallRequest,
|
|
2371
|
+
unified,
|
|
2372
|
+
options
|
|
2373
|
+
};
|
|
2374
|
+
}
|
|
2375
|
+
function mapRecallEvalCaseRequestDto(dto) {
|
|
2376
|
+
return {
|
|
2377
|
+
caseId: dto.caseId,
|
|
2378
|
+
description: dto.description,
|
|
2379
|
+
recallPath: dto.recallPath,
|
|
2380
|
+
sandbox: mapSandboxRequestDto2(dto.sandbox),
|
|
2381
|
+
memoryPool: dto.memoryPool.map(mapFixtureEntryDto2),
|
|
2382
|
+
recallRequest: mapRecallRequestDto(dto.recallRequest),
|
|
2383
|
+
unified: mapUnifiedRequestDto(dto.unified),
|
|
2384
|
+
options: mapCaseOptionsDto2(dto.options)
|
|
2385
|
+
};
|
|
2386
|
+
}
|
|
2387
|
+
function parseRecallRequest(value, issues) {
|
|
2388
|
+
const recallRequest = parseObject(value, "recallRequest", issues);
|
|
2389
|
+
if (recallRequest === void 0) {
|
|
2390
|
+
return void 0;
|
|
2391
|
+
}
|
|
2392
|
+
pushUnexpectedFields(recallRequest, RECALL_REQUEST_KEYS, "recallRequest", issues);
|
|
2393
|
+
const text = parseRequiredTrimmedString(recallRequest.text, "recallRequest.text", issues);
|
|
2394
|
+
if (text === void 0) {
|
|
2395
|
+
return void 0;
|
|
2396
|
+
}
|
|
2397
|
+
return {
|
|
2398
|
+
text,
|
|
2399
|
+
limit: parseOptionalIntegerInRange(recallRequest.limit, "recallRequest.limit", issues, {
|
|
2400
|
+
min: 0
|
|
2401
|
+
}),
|
|
2402
|
+
threshold: parseOptionalThreshold(recallRequest.threshold, "recallRequest.threshold", issues),
|
|
2403
|
+
budget: parseOptionalIntegerInRange(recallRequest.budget, "recallRequest.budget", issues, {
|
|
2404
|
+
min: 0
|
|
2405
|
+
}),
|
|
2406
|
+
types: parseOptionalEntryTypeArray(recallRequest.types, "recallRequest.types", issues),
|
|
2407
|
+
tags: parseOptionalStringArray(recallRequest.tags, "recallRequest.tags", issues),
|
|
2408
|
+
since: parseOptionalTrimmedString(recallRequest.since, "recallRequest.since", issues),
|
|
2409
|
+
until: parseOptionalTrimmedString(recallRequest.until, "recallRequest.until", issues),
|
|
2410
|
+
around: parseOptionalTrimmedString(recallRequest.around, "recallRequest.around", issues),
|
|
2411
|
+
aroundRadius: parseOptionalIntegerInRange(recallRequest.aroundRadius, "recallRequest.aroundRadius", issues, {
|
|
2412
|
+
min: 1
|
|
2413
|
+
}),
|
|
2414
|
+
asOf: parseOptionalTrimmedString(recallRequest.asOf, "recallRequest.asOf", issues),
|
|
2415
|
+
rankingProfile: parseOptionalRankingProfile(recallRequest.rankingProfile, "recallRequest.rankingProfile", issues),
|
|
2416
|
+
rankingPolicy: parseOptionalRankingPolicy(recallRequest.rankingPolicy, "recallRequest.rankingPolicy", issues)
|
|
2417
|
+
};
|
|
2418
|
+
}
|
|
2419
|
+
function parseOptionalRankingPolicy(value, path2, issues) {
|
|
2420
|
+
if (value === void 0) {
|
|
2421
|
+
return void 0;
|
|
2422
|
+
}
|
|
2423
|
+
const policy = parseObject(value, path2, issues);
|
|
2424
|
+
if (policy === void 0) {
|
|
2425
|
+
return void 0;
|
|
2426
|
+
}
|
|
2427
|
+
pushUnexpectedFields(policy, RANKING_POLICY_KEYS, path2, issues);
|
|
2428
|
+
const parsed = {};
|
|
2429
|
+
const rrf = parseOptionalStageToggle(policy.rrf, `${path2}.rrf`, issues);
|
|
2430
|
+
if (rrf !== void 0) {
|
|
2431
|
+
parsed.rrf = rrf;
|
|
2432
|
+
}
|
|
2433
|
+
const rrfRankConstant = parseOptionalIntegerInRange(policy.rrfRankConstant, `${path2}.rrfRankConstant`, issues, {
|
|
2434
|
+
min: 1
|
|
2435
|
+
});
|
|
2436
|
+
if (rrfRankConstant !== void 0) {
|
|
2437
|
+
parsed.rrfRankConstant = rrfRankConstant;
|
|
2438
|
+
}
|
|
2439
|
+
const rrfSmallPoolRankConstant = parseOptionalIntegerInRange(policy.rrfSmallPoolRankConstant, `${path2}.rrfSmallPoolRankConstant`, issues, { min: 1 });
|
|
2440
|
+
if (rrfSmallPoolRankConstant !== void 0) {
|
|
2441
|
+
parsed.rrfSmallPoolRankConstant = rrfSmallPoolRankConstant;
|
|
2442
|
+
}
|
|
2443
|
+
const neighborhood = parseOptionalStageToggle(policy.neighborhood, `${path2}.neighborhood`, issues);
|
|
2444
|
+
if (neighborhood !== void 0) {
|
|
2445
|
+
parsed.neighborhood = neighborhood;
|
|
2446
|
+
}
|
|
2447
|
+
const mmr = parseOptionalStageToggle(policy.mmr, `${path2}.mmr`, issues);
|
|
2448
|
+
if (mmr !== void 0) {
|
|
2449
|
+
parsed.mmr = mmr;
|
|
2450
|
+
}
|
|
2451
|
+
const mmrLambda = parseOptionalUnitInterval(policy.mmrLambda, `${path2}.mmrLambda`, issues);
|
|
2452
|
+
if (mmrLambda !== void 0) {
|
|
2453
|
+
parsed.mmrLambda = mmrLambda;
|
|
2454
|
+
}
|
|
2455
|
+
const mmrMinPoolSize = parseOptionalIntegerInRange(policy.mmrMinPoolSize, `${path2}.mmrMinPoolSize`, issues, {
|
|
2456
|
+
min: 0
|
|
2457
|
+
});
|
|
2458
|
+
if (mmrMinPoolSize !== void 0) {
|
|
2459
|
+
parsed.mmrMinPoolSize = mmrMinPoolSize;
|
|
2460
|
+
}
|
|
2461
|
+
const crossEncoder = parseOptionalStageToggle(policy.crossEncoder, `${path2}.crossEncoder`, issues);
|
|
2462
|
+
if (crossEncoder !== void 0) {
|
|
2463
|
+
parsed.crossEncoder = crossEncoder;
|
|
2464
|
+
}
|
|
2465
|
+
const crossEncoderTopK = parseOptionalIntegerInRange(policy.crossEncoderTopK, `${path2}.crossEncoderTopK`, issues, {
|
|
2466
|
+
min: 1
|
|
2467
|
+
});
|
|
2468
|
+
if (crossEncoderTopK !== void 0) {
|
|
2469
|
+
parsed.crossEncoderTopK = crossEncoderTopK;
|
|
2470
|
+
}
|
|
2471
|
+
const crossEncoderAlpha = parseOptionalUnitInterval(policy.crossEncoderAlpha, `${path2}.crossEncoderAlpha`, issues);
|
|
2472
|
+
if (crossEncoderAlpha !== void 0) {
|
|
2473
|
+
parsed.crossEncoderAlpha = crossEncoderAlpha;
|
|
2474
|
+
}
|
|
2475
|
+
return Object.keys(parsed).length > 0 ? parsed : void 0;
|
|
2476
|
+
}
|
|
2477
|
+
function parseOptionalStageToggle(value, path2, issues) {
|
|
2478
|
+
if (value === void 0) {
|
|
2479
|
+
return void 0;
|
|
2480
|
+
}
|
|
2481
|
+
if (typeof value !== "string" || !RANKING_POLICY_TOGGLES.includes(value)) {
|
|
2482
|
+
pushIssue(issues, path2, `Expected one of: ${RANKING_POLICY_TOGGLES.join(", ")}.`);
|
|
2483
|
+
return void 0;
|
|
2484
|
+
}
|
|
2485
|
+
return value;
|
|
2486
|
+
}
|
|
2487
|
+
function parseOptionalUnitInterval(value, path2, issues) {
|
|
2488
|
+
if (value === void 0) {
|
|
2489
|
+
return void 0;
|
|
2490
|
+
}
|
|
2491
|
+
if (typeof value !== "number" || !Number.isFinite(value) || value < 0 || value > 1) {
|
|
2492
|
+
pushIssue(issues, path2, "Expected a number from 0 to 1.");
|
|
2493
|
+
return void 0;
|
|
2494
|
+
}
|
|
2495
|
+
return value;
|
|
2496
|
+
}
|
|
2497
|
+
function parseUnifiedRequest(value, issues) {
|
|
2498
|
+
if (value === void 0) {
|
|
2499
|
+
return void 0;
|
|
2500
|
+
}
|
|
2501
|
+
const unified = parseObject(value, "unified", issues);
|
|
2502
|
+
if (unified === void 0) {
|
|
2503
|
+
return void 0;
|
|
2504
|
+
}
|
|
2505
|
+
pushUnexpectedFields(unified, UNIFIED_REQUEST_KEYS, "unified", issues);
|
|
2506
|
+
return {
|
|
2507
|
+
mode: parseOptionalUnifiedRecallMode(unified.mode, "unified.mode", issues),
|
|
2508
|
+
sessionKey: parseOptionalTrimmedString(unified.sessionKey, "unified.sessionKey", issues),
|
|
2509
|
+
memoryPolicy: parseUnifiedMemoryPolicy(unified.memoryPolicy, issues)
|
|
2510
|
+
};
|
|
2511
|
+
}
|
|
2512
|
+
function parseOptions2(value, issues) {
|
|
2513
|
+
if (value === void 0) {
|
|
2514
|
+
return void 0;
|
|
2515
|
+
}
|
|
2516
|
+
const options = parseObject(value, "options", issues);
|
|
2517
|
+
if (options === void 0) {
|
|
2518
|
+
return void 0;
|
|
2519
|
+
}
|
|
2520
|
+
pushUnexpectedFields(options, OPTIONS_KEYS2, "options", issues);
|
|
2521
|
+
return {
|
|
2522
|
+
includeDiagnostics: parseOptionalBoolean(options.includeDiagnostics, "options.includeDiagnostics", issues),
|
|
2523
|
+
includeCandidates: parseOptionalBoolean(options.includeCandidates, "options.includeCandidates", issues),
|
|
2524
|
+
includeTimings: parseOptionalBoolean(options.includeTimings, "options.includeTimings", issues),
|
|
2525
|
+
includeDebugArtifact: parseOptionalBoolean(options.includeDebugArtifact, "options.includeDebugArtifact", issues),
|
|
2526
|
+
topKCandidates: parseOptionalIntegerInRange(options.topKCandidates, "options.topKCandidates", issues, {
|
|
2527
|
+
min: 1,
|
|
2528
|
+
max: RECALL_DEBUG_ARTIFACT_MAX_TOP_K
|
|
2529
|
+
}),
|
|
2530
|
+
faultInjection: parseFaultInjection(options.faultInjection, issues)
|
|
2531
|
+
};
|
|
2532
|
+
}
|
|
2533
|
+
function parseFaultInjection(value, issues) {
|
|
2534
|
+
if (value === void 0) {
|
|
2535
|
+
return void 0;
|
|
2536
|
+
}
|
|
2537
|
+
const faultInjection = parseObject(value, "options.faultInjection", issues);
|
|
2538
|
+
if (faultInjection === void 0) {
|
|
2539
|
+
return void 0;
|
|
2540
|
+
}
|
|
2541
|
+
pushUnexpectedFields(faultInjection, FAULT_INJECTION_KEYS, "options.faultInjection", issues);
|
|
2542
|
+
return {
|
|
2543
|
+
queryEmbeddingFailure: parseOptionalBoolean(faultInjection.queryEmbeddingFailure, "options.faultInjection.queryEmbeddingFailure", issues),
|
|
2544
|
+
vectorSearchFailure: parseOptionalBoolean(faultInjection.vectorSearchFailure, "options.faultInjection.vectorSearchFailure", issues)
|
|
2545
|
+
};
|
|
2546
|
+
}
|
|
2547
|
+
function parseUnifiedMemoryPolicy(value, issues) {
|
|
2548
|
+
if (value === void 0) {
|
|
2549
|
+
return void 0;
|
|
2550
|
+
}
|
|
2551
|
+
const memoryPolicy = parseObject(value, "unified.memoryPolicy", issues);
|
|
2552
|
+
if (memoryPolicy === void 0) {
|
|
2553
|
+
return void 0;
|
|
2554
|
+
}
|
|
2555
|
+
pushUnexpectedFields(memoryPolicy, UNIFIED_MEMORY_POLICY_KEYS, "unified.memoryPolicy", issues);
|
|
2556
|
+
return {
|
|
2557
|
+
slotPolicies: parseClaimSlotPolicyConfig(memoryPolicy.slotPolicies, "unified.memoryPolicy.slotPolicies", issues)
|
|
2558
|
+
};
|
|
2559
|
+
}
|
|
2560
|
+
function parseEntryType2(value, path2, issues) {
|
|
2561
|
+
if (typeof value !== "string" || !ENTRY_TYPES.includes(value)) {
|
|
2562
|
+
pushIssue(issues, path2, `Expected one of: ${ENTRY_TYPES.join(", ")}.`);
|
|
2563
|
+
return void 0;
|
|
2564
|
+
}
|
|
2565
|
+
return value;
|
|
2566
|
+
}
|
|
2567
|
+
function parseOptionalRecallPath(value, path2, issues) {
|
|
2568
|
+
if (value === void 0) {
|
|
2569
|
+
return void 0;
|
|
2570
|
+
}
|
|
2571
|
+
if (typeof value !== "string" || !RECALL_PATHS.includes(value)) {
|
|
2572
|
+
pushIssue(issues, path2, `Expected one of: ${RECALL_PATHS.join(", ")}.`);
|
|
2573
|
+
return void 0;
|
|
2574
|
+
}
|
|
2575
|
+
return value;
|
|
2576
|
+
}
|
|
2577
|
+
function parseOptionalUnifiedRecallMode(value, path2, issues) {
|
|
2578
|
+
if (value === void 0) {
|
|
2579
|
+
return void 0;
|
|
2580
|
+
}
|
|
2581
|
+
if (typeof value !== "string" || !UNIFIED_RECALL_MODES.includes(value)) {
|
|
2582
|
+
pushIssue(issues, path2, `Expected one of: ${UNIFIED_RECALL_MODES.join(", ")}.`);
|
|
2583
|
+
return void 0;
|
|
2584
|
+
}
|
|
2585
|
+
return value;
|
|
2586
|
+
}
|
|
2587
|
+
function parseOptionalRankingProfile(value, path2, issues) {
|
|
2588
|
+
if (value === void 0) {
|
|
2589
|
+
return void 0;
|
|
2590
|
+
}
|
|
2591
|
+
if (typeof value !== "string" || !RECALL_RANKING_PROFILES.includes(value)) {
|
|
2592
|
+
pushIssue(issues, path2, `Expected one of: ${RECALL_RANKING_PROFILES.join(", ")}.`);
|
|
2593
|
+
return void 0;
|
|
2594
|
+
}
|
|
2595
|
+
return value;
|
|
2596
|
+
}
|
|
2597
|
+
function parseOptionalEntryTypeArray(value, path2, issues) {
|
|
2598
|
+
if (value === void 0) {
|
|
2599
|
+
return void 0;
|
|
2600
|
+
}
|
|
2601
|
+
if (!Array.isArray(value)) {
|
|
2602
|
+
pushIssue(issues, path2, "Expected an array.");
|
|
2603
|
+
return void 0;
|
|
2604
|
+
}
|
|
2605
|
+
const parsed = [];
|
|
2606
|
+
for (const [index, item] of value.entries()) {
|
|
2607
|
+
const entryType = parseEntryType2(item, `${path2}[${index}]`, issues);
|
|
2608
|
+
if (entryType !== void 0) {
|
|
2609
|
+
parsed.push(entryType);
|
|
2610
|
+
}
|
|
2611
|
+
}
|
|
2612
|
+
return parsed;
|
|
2613
|
+
}
|
|
2614
|
+
function parseClaimSlotPolicyConfig(value, path2, issues) {
|
|
2615
|
+
if (value === void 0) {
|
|
2616
|
+
return void 0;
|
|
2617
|
+
}
|
|
2618
|
+
const config = parseObject(value, path2, issues);
|
|
2619
|
+
if (config === void 0) {
|
|
2620
|
+
return void 0;
|
|
2621
|
+
}
|
|
2622
|
+
pushUnexpectedFields(config, SLOT_POLICY_KEYS, path2, issues);
|
|
2623
|
+
const attributeHeads = parseClaimSlotPolicyAttributeHeads(config.attributeHeads, `${path2}.attributeHeads`, issues);
|
|
2624
|
+
return attributeHeads ? { attributeHeads } : void 0;
|
|
2625
|
+
}
|
|
2626
|
+
function parseClaimSlotPolicyAttributeHeads(value, path2, issues) {
|
|
2627
|
+
if (value === void 0) {
|
|
2628
|
+
return void 0;
|
|
2629
|
+
}
|
|
2630
|
+
if (!isRecord(value)) {
|
|
2631
|
+
pushIssue(issues, path2, "Expected an object.");
|
|
2632
|
+
return void 0;
|
|
2633
|
+
}
|
|
2634
|
+
const normalized = {};
|
|
2635
|
+
for (const [rawKey, rawValue] of Object.entries(value)) {
|
|
2636
|
+
const attributeHead = rawKey.trim().toLowerCase();
|
|
2637
|
+
if (!/^[a-z0-9][a-z0-9_-]*$/.test(attributeHead)) {
|
|
2638
|
+
pushIssue(issues, `${path2}.${rawKey}`, "Expected a canonical attribute-head label.");
|
|
2639
|
+
continue;
|
|
2640
|
+
}
|
|
2641
|
+
if (typeof rawValue !== "string" || !CLAIM_SLOT_POLICIES.includes(rawValue)) {
|
|
2642
|
+
pushIssue(issues, `${path2}.${attributeHead}`, `Expected one of: ${CLAIM_SLOT_POLICIES.join(", ")}.`);
|
|
2643
|
+
continue;
|
|
2644
|
+
}
|
|
2645
|
+
normalized[attributeHead] = rawValue;
|
|
2646
|
+
}
|
|
2647
|
+
return Object.keys(normalized).length > 0 ? normalized : void 0;
|
|
2648
|
+
}
|
|
2649
|
+
function validatePathSpecificRequest(recallPath, recallRequest, unified, issues) {
|
|
2650
|
+
const effectivePath = recallPath ?? "core";
|
|
2651
|
+
if (effectivePath !== "unified") {
|
|
2652
|
+
if (unified !== void 0) {
|
|
2653
|
+
pushIssue(issues, "unified", 'The "unified" block is only allowed when recallPath is "unified".');
|
|
2654
|
+
}
|
|
2655
|
+
return;
|
|
2656
|
+
}
|
|
2657
|
+
if (recallRequest === void 0) {
|
|
2658
|
+
return;
|
|
2659
|
+
}
|
|
2660
|
+
if (recallRequest.budget !== void 0) {
|
|
2661
|
+
pushIssue(issues, "recallRequest.budget", 'This field is only supported when recallPath is "core".');
|
|
2662
|
+
}
|
|
2663
|
+
if (recallRequest.since !== void 0) {
|
|
2664
|
+
pushIssue(issues, "recallRequest.since", 'This field is only supported when recallPath is "core".');
|
|
2665
|
+
}
|
|
2666
|
+
if (recallRequest.until !== void 0) {
|
|
2667
|
+
pushIssue(issues, "recallRequest.until", 'This field is only supported when recallPath is "core".');
|
|
2668
|
+
}
|
|
2669
|
+
if (recallRequest.around !== void 0) {
|
|
2670
|
+
pushIssue(issues, "recallRequest.around", 'This field is only supported when recallPath is "core".');
|
|
2671
|
+
}
|
|
2672
|
+
if (recallRequest.aroundRadius !== void 0) {
|
|
2673
|
+
pushIssue(issues, "recallRequest.aroundRadius", 'This field is only supported when recallPath is "core".');
|
|
2674
|
+
}
|
|
2675
|
+
if (recallRequest.rankingProfile !== void 0) {
|
|
2676
|
+
pushIssue(issues, "recallRequest.rankingProfile", 'This field is derived by unified recall and cannot be supplied when recallPath is "unified".');
|
|
2677
|
+
}
|
|
2678
|
+
}
|
|
2679
|
+
function mapSandboxRequestDto2(dto) {
|
|
2680
|
+
if (dto === void 0) {
|
|
2681
|
+
return void 0;
|
|
2682
|
+
}
|
|
2683
|
+
return {
|
|
2684
|
+
root: dto.root,
|
|
2685
|
+
preserve: dto.preserve,
|
|
2686
|
+
corpusSeed: dto.corpusSeed
|
|
2687
|
+
};
|
|
2688
|
+
}
|
|
2689
|
+
function mapFixtureEntryDto2(dto) {
|
|
2690
|
+
return mapFixtureEntryDto(dto);
|
|
2691
|
+
}
|
|
2692
|
+
function mapRecallRequestDto(dto) {
|
|
2693
|
+
return {
|
|
2694
|
+
text: dto.text,
|
|
2695
|
+
limit: dto.limit,
|
|
2696
|
+
threshold: dto.threshold,
|
|
2697
|
+
budget: dto.budget,
|
|
2698
|
+
types: dto.types,
|
|
2699
|
+
tags: dto.tags,
|
|
2700
|
+
since: dto.since,
|
|
2701
|
+
until: dto.until,
|
|
2702
|
+
around: dto.around,
|
|
2703
|
+
aroundRadius: dto.aroundRadius,
|
|
2704
|
+
asOf: dto.asOf,
|
|
2705
|
+
rankingProfile: dto.rankingProfile,
|
|
2706
|
+
rankingPolicy: dto.rankingPolicy
|
|
2707
|
+
};
|
|
2708
|
+
}
|
|
2709
|
+
function mapUnifiedRequestDto(dto) {
|
|
2710
|
+
if (dto === void 0) {
|
|
2711
|
+
return void 0;
|
|
2712
|
+
}
|
|
2713
|
+
return {
|
|
2714
|
+
mode: dto.mode,
|
|
2715
|
+
sessionKey: dto.sessionKey,
|
|
2716
|
+
memoryPolicy: dto.memoryPolicy !== void 0 ? {
|
|
2717
|
+
slotPolicies: dto.memoryPolicy.slotPolicies
|
|
2718
|
+
} : void 0
|
|
2719
|
+
};
|
|
2720
|
+
}
|
|
2721
|
+
function mapCaseOptionsDto2(dto) {
|
|
2722
|
+
if (dto === void 0) {
|
|
2723
|
+
return void 0;
|
|
2724
|
+
}
|
|
2725
|
+
return {
|
|
2726
|
+
includeDiagnostics: dto.includeDiagnostics,
|
|
2727
|
+
includeCandidates: dto.includeCandidates,
|
|
2728
|
+
includeTimings: dto.includeTimings,
|
|
2729
|
+
includeDebugArtifact: dto.includeDebugArtifact,
|
|
2730
|
+
topKCandidates: dto.topKCandidates,
|
|
2731
|
+
faultInjection: dto.faultInjection
|
|
2732
|
+
};
|
|
2733
|
+
}
|
|
2734
|
+
|
|
2735
|
+
// src/adapters/api/routes/internal-recall-eval.ts
|
|
2736
|
+
var INTERNAL_RECALL_EVAL_ROUTE_PATH = "/internal/evals/recall/run";
|
|
2737
|
+
var INTERNAL_RECALL_EVAL_ROUTE = {
|
|
2738
|
+
method: "POST",
|
|
2739
|
+
path: INTERNAL_RECALL_EVAL_ROUTE_PATH
|
|
2740
|
+
};
|
|
2741
|
+
function createInternalRecallEvalRoute(optionsOrRunner = {}) {
|
|
2742
|
+
const options = typeof optionsOrRunner === "function" ? { runner: optionsOrRunner } : optionsOrRunner;
|
|
2743
|
+
const crossEncoder = options.crossEncoder;
|
|
2744
|
+
const runner = options.runner ?? ((request) => runRecallEvalCase(request, { crossEncoder }));
|
|
2745
|
+
return {
|
|
2746
|
+
...INTERNAL_RECALL_EVAL_ROUTE,
|
|
2747
|
+
handler: async (request) => {
|
|
2748
|
+
let validatedRequest;
|
|
2749
|
+
try {
|
|
2750
|
+
validatedRequest = await parseValidatedRequest2(request);
|
|
2751
|
+
const result = await runner(validatedRequest);
|
|
2752
|
+
return jsonResponse2(result, 200);
|
|
2753
|
+
} catch (error) {
|
|
2754
|
+
if (error instanceof RecallEvalRequestValidationError) {
|
|
2755
|
+
return jsonResponse2(
|
|
2756
|
+
{
|
|
2757
|
+
status: "error",
|
|
2758
|
+
caseId: error.caseId,
|
|
2759
|
+
error: {
|
|
2760
|
+
code: "invalid_request",
|
|
2761
|
+
message: error.message,
|
|
2762
|
+
details: error.issues
|
|
2763
|
+
}
|
|
2764
|
+
},
|
|
2765
|
+
400
|
|
2766
|
+
);
|
|
2767
|
+
}
|
|
2768
|
+
return jsonResponse2(
|
|
2769
|
+
{
|
|
2770
|
+
status: "error",
|
|
2771
|
+
caseId: validatedRequest?.caseId,
|
|
2772
|
+
error: {
|
|
2773
|
+
code: "internal_error",
|
|
2774
|
+
message: "Internal recall eval adapter error."
|
|
2775
|
+
}
|
|
2776
|
+
},
|
|
2777
|
+
500
|
|
2778
|
+
);
|
|
2779
|
+
}
|
|
2780
|
+
}
|
|
2781
|
+
};
|
|
2782
|
+
}
|
|
2783
|
+
var parseJsonBody2 = async (request) => {
|
|
2784
|
+
try {
|
|
2785
|
+
return await request.json();
|
|
2786
|
+
} catch {
|
|
2787
|
+
throw new RecallEvalRequestValidationError([
|
|
2788
|
+
{
|
|
2789
|
+
path: "$",
|
|
2790
|
+
message: "Request body must be valid JSON."
|
|
2791
|
+
}
|
|
2792
|
+
]);
|
|
2793
|
+
}
|
|
2794
|
+
};
|
|
2795
|
+
var parseValidatedRequest2 = async (request) => {
|
|
2796
|
+
const payload = await parseJsonBody2(request);
|
|
2797
|
+
const requestDto = parseRecallEvalCaseRequest(payload);
|
|
2798
|
+
return mapRecallEvalCaseRequestDto(requestDto);
|
|
2799
|
+
};
|
|
2800
|
+
var jsonResponse2 = (body, status) => new Response(JSON.stringify(body), {
|
|
2801
|
+
status,
|
|
2802
|
+
headers: {
|
|
2803
|
+
"content-type": "application/json; charset=utf-8"
|
|
2804
|
+
}
|
|
2805
|
+
});
|
|
2806
|
+
|
|
2807
|
+
// src/adapters/api/internal-eval-routes.ts
|
|
2808
|
+
function createInternalEvalRoutes(options = {}) {
|
|
2809
|
+
return [createInternalRecallEvalRoute({ crossEncoder: options.crossEncoder }), createInternalBeforeTurnEvalRoute({ crossEncoder: options.crossEncoder })];
|
|
2810
|
+
}
|
|
2811
|
+
|
|
2812
|
+
// src/adapters/api/internal-eval-server.ts
|
|
2813
|
+
var DEFAULT_INTERNAL_EVAL_HOST = "127.0.0.1";
|
|
2814
|
+
var DEFAULT_INTERNAL_EVAL_PORT = 4010;
|
|
2815
|
+
async function startInternalEvalServer(options = {}) {
|
|
2816
|
+
const host2 = options.host ?? DEFAULT_INTERNAL_EVAL_HOST;
|
|
2817
|
+
const port2 = options.port ?? DEFAULT_INTERNAL_EVAL_PORT;
|
|
2818
|
+
const crossEncoderResolution = resolveCrossEncoderPort(options);
|
|
2819
|
+
const routes = options.routes ?? createInternalEvalRoutes({ crossEncoder: crossEncoderResolution.port });
|
|
2820
|
+
const server2 = createServer((request, response) => {
|
|
2821
|
+
void handleRequest(request, response, routes, host2, port2).catch(() => {
|
|
2822
|
+
if (response.headersSent !== true) {
|
|
2823
|
+
writeTextResponse(response, 500, "Internal server error.\n");
|
|
2824
|
+
return;
|
|
2825
|
+
}
|
|
2826
|
+
response.destroy();
|
|
2827
|
+
});
|
|
2828
|
+
});
|
|
2829
|
+
await listen(server2, port2, host2);
|
|
2830
|
+
const address = server2.address();
|
|
2831
|
+
if (address === null || typeof address === "string") {
|
|
2832
|
+
await closeServer(server2);
|
|
2833
|
+
throw new Error("Internal eval server did not expose a TCP address.");
|
|
2834
|
+
}
|
|
2835
|
+
return {
|
|
2836
|
+
host: host2,
|
|
2837
|
+
port: address.port,
|
|
2838
|
+
routePaths: routes.map((route) => route.path),
|
|
2839
|
+
baseUrl: `http://${formatHostForUrl(host2)}:${address.port}`,
|
|
2840
|
+
crossEncoder: crossEncoderResolution.result,
|
|
2841
|
+
close: async () => {
|
|
2842
|
+
await closeServer(server2);
|
|
2843
|
+
}
|
|
2844
|
+
};
|
|
2845
|
+
}
|
|
2846
|
+
function resolveCrossEncoderPort(options) {
|
|
2847
|
+
if (options.crossEncoder) {
|
|
2848
|
+
return {
|
|
2849
|
+
port: options.crossEncoder,
|
|
2850
|
+
result: { status: "configured" }
|
|
2851
|
+
};
|
|
2852
|
+
}
|
|
2853
|
+
if (options.autoResolveCrossEncoder === false) {
|
|
2854
|
+
return {
|
|
2855
|
+
port: void 0,
|
|
2856
|
+
result: {
|
|
2857
|
+
status: "not_configured",
|
|
2858
|
+
reason: "Auto-resolution disabled by caller."
|
|
2859
|
+
}
|
|
2860
|
+
};
|
|
2861
|
+
}
|
|
2862
|
+
let config;
|
|
2863
|
+
try {
|
|
2864
|
+
config = readConfig();
|
|
2865
|
+
} catch (error) {
|
|
2866
|
+
return {
|
|
2867
|
+
port: void 0,
|
|
2868
|
+
result: {
|
|
2869
|
+
status: "error",
|
|
2870
|
+
reason: toReason(error, "Failed to read agenr config for cross-encoder resolution.")
|
|
2871
|
+
}
|
|
2872
|
+
};
|
|
2873
|
+
}
|
|
2874
|
+
let apiKey;
|
|
2875
|
+
try {
|
|
2876
|
+
apiKey = resolveCrossEncoderApiKey(config);
|
|
2877
|
+
} catch (error) {
|
|
2878
|
+
return {
|
|
2879
|
+
port: void 0,
|
|
2880
|
+
result: {
|
|
2881
|
+
status: "not_configured",
|
|
2882
|
+
reason: toReason(error, "OPENAI_API_KEY not configured.")
|
|
2883
|
+
}
|
|
2884
|
+
};
|
|
2885
|
+
}
|
|
2886
|
+
try {
|
|
2887
|
+
const { modelId } = resolveModel(config, "cross_encoder");
|
|
2888
|
+
return {
|
|
2889
|
+
port: createOpenAICrossEncoder({ apiKey, model: modelId }),
|
|
2890
|
+
result: { status: "configured" }
|
|
2891
|
+
};
|
|
2892
|
+
} catch (error) {
|
|
2893
|
+
return {
|
|
2894
|
+
port: void 0,
|
|
2895
|
+
result: {
|
|
2896
|
+
status: "error",
|
|
2897
|
+
reason: toReason(error, "Failed to construct OpenAI cross-encoder adapter.")
|
|
2898
|
+
}
|
|
2899
|
+
};
|
|
2900
|
+
}
|
|
2901
|
+
}
|
|
2902
|
+
function toReason(error, fallback) {
|
|
2903
|
+
if (error instanceof Error && error.message.length > 0) {
|
|
2904
|
+
return error.message;
|
|
2905
|
+
}
|
|
2906
|
+
return fallback;
|
|
2907
|
+
}
|
|
2908
|
+
var handleRequest = async (request, response, routes, fallbackHost, fallbackPort) => {
|
|
2909
|
+
const requestUrl = new URL(request.url ?? "/", `http://${formatHostForUrl(fallbackHost)}:${request.socket.localPort ?? fallbackPort}`);
|
|
2910
|
+
const route = routes.find((candidate) => candidate.path === requestUrl.pathname);
|
|
2911
|
+
if (!route) {
|
|
2912
|
+
writeTextResponse(response, 404, "Not found.\n");
|
|
2913
|
+
return;
|
|
2914
|
+
}
|
|
2915
|
+
if (request.method !== route.method) {
|
|
2916
|
+
response.statusCode = 405;
|
|
2917
|
+
response.setHeader("allow", route.method);
|
|
2918
|
+
response.end();
|
|
2919
|
+
return;
|
|
2920
|
+
}
|
|
2921
|
+
const body = await readBody(request);
|
|
2922
|
+
const routeRequest = new Request(requestUrl, {
|
|
2923
|
+
method: route.method,
|
|
2924
|
+
headers: toHeaders(request),
|
|
2925
|
+
body: body.length > 0 ? body : void 0
|
|
2926
|
+
});
|
|
2927
|
+
const routeResponse = await route.handler(routeRequest);
|
|
2928
|
+
await writeRouteResponse(response, routeResponse);
|
|
2929
|
+
};
|
|
2930
|
+
var readBody = async (request) => {
|
|
2931
|
+
const chunks = [];
|
|
2932
|
+
for await (const chunk of request) {
|
|
2933
|
+
chunks.push(typeof chunk === "string" ? Buffer.from(chunk) : chunk);
|
|
2934
|
+
}
|
|
2935
|
+
return Buffer.concat(chunks);
|
|
2936
|
+
};
|
|
2937
|
+
var toHeaders = (request) => {
|
|
2938
|
+
const headers = new Headers();
|
|
2939
|
+
for (const [name, value] of Object.entries(request.headers)) {
|
|
2940
|
+
if (value === void 0) {
|
|
2941
|
+
continue;
|
|
2942
|
+
}
|
|
2943
|
+
if (Array.isArray(value)) {
|
|
2944
|
+
for (const item of value) {
|
|
2945
|
+
headers.append(name, item);
|
|
2946
|
+
}
|
|
2947
|
+
continue;
|
|
2948
|
+
}
|
|
2949
|
+
headers.set(name, value);
|
|
2950
|
+
}
|
|
2951
|
+
return headers;
|
|
2952
|
+
};
|
|
2953
|
+
var writeRouteResponse = async (response, routeResponse) => {
|
|
2954
|
+
response.statusCode = routeResponse.status;
|
|
2955
|
+
for (const [name, value] of routeResponse.headers) {
|
|
2956
|
+
response.setHeader(name, value);
|
|
2957
|
+
}
|
|
2958
|
+
const body = Buffer.from(await routeResponse.arrayBuffer());
|
|
2959
|
+
response.end(body);
|
|
2960
|
+
};
|
|
2961
|
+
var writeTextResponse = (response, status, body) => {
|
|
2962
|
+
response.statusCode = status;
|
|
2963
|
+
response.setHeader("content-type", "text/plain; charset=utf-8");
|
|
2964
|
+
response.end(body);
|
|
2965
|
+
};
|
|
2966
|
+
var listen = async (server2, port2, host2) => {
|
|
2967
|
+
await new Promise((resolve, reject) => {
|
|
2968
|
+
const onError = (error) => {
|
|
2969
|
+
server2.off("listening", onListening);
|
|
2970
|
+
reject(error);
|
|
2971
|
+
};
|
|
2972
|
+
const onListening = () => {
|
|
2973
|
+
server2.off("error", onError);
|
|
2974
|
+
resolve();
|
|
2975
|
+
};
|
|
2976
|
+
server2.once("error", onError);
|
|
2977
|
+
server2.once("listening", onListening);
|
|
2978
|
+
server2.listen(port2, host2);
|
|
2979
|
+
});
|
|
2980
|
+
};
|
|
2981
|
+
var closeServer = async (server2) => {
|
|
2982
|
+
if (server2.listening !== true) {
|
|
2983
|
+
return;
|
|
2984
|
+
}
|
|
2985
|
+
await new Promise((resolve, reject) => {
|
|
2986
|
+
server2.close((error) => {
|
|
2987
|
+
if (error) {
|
|
2988
|
+
reject(error);
|
|
2989
|
+
return;
|
|
2990
|
+
}
|
|
2991
|
+
resolve();
|
|
2992
|
+
});
|
|
2993
|
+
});
|
|
2994
|
+
};
|
|
2995
|
+
var formatHostForUrl = (host2) => {
|
|
2996
|
+
if (host2.includes(":") && host2.startsWith("[") !== true) {
|
|
2997
|
+
return `[${host2}]`;
|
|
2998
|
+
}
|
|
2999
|
+
return host2;
|
|
3000
|
+
};
|
|
3001
|
+
|
|
3002
|
+
// src/internal-eval-server.ts
|
|
3003
|
+
var HOST_ENV_NAME = "AGENR_INTERNAL_EVAL_HOST";
|
|
3004
|
+
var PORT_ENV_NAME = "AGENR_INTERNAL_EVAL_PORT";
|
|
3005
|
+
var LEGACY_HOST_ENV_NAME = "AGENR_INTERNAL_RECALL_EVAL_HOST";
|
|
3006
|
+
var LEGACY_PORT_ENV_NAME = "AGENR_INTERNAL_RECALL_EVAL_PORT";
|
|
3007
|
+
var host = resolveHost(process.env[HOST_ENV_NAME], process.env[LEGACY_HOST_ENV_NAME]);
|
|
3008
|
+
var port = resolvePort(process.env[PORT_ENV_NAME], process.env[LEGACY_PORT_ENV_NAME]);
|
|
3009
|
+
var server = await startInternalEvalServer({ host, port });
|
|
3010
|
+
console.log(`Internal eval dev server listening at ${server.baseUrl}`);
|
|
3011
|
+
console.log(`Serving routes: ${server.routePaths.join(", ")}`);
|
|
3012
|
+
if (server.crossEncoder.status === "configured") {
|
|
3013
|
+
console.log("Cross-encoder enabled: OpenAI credential resolved at startup.");
|
|
3014
|
+
} else if (server.crossEncoder.status === "not_configured") {
|
|
3015
|
+
console.log("Cross-encoder disabled: OPENAI_API_KEY not configured.");
|
|
3016
|
+
} else {
|
|
3017
|
+
console.warn(`Cross-encoder unavailable: ${server.crossEncoder.reason ?? "construction failed"}.`);
|
|
3018
|
+
}
|
|
3019
|
+
installSignalHandler("SIGINT");
|
|
3020
|
+
installSignalHandler("SIGTERM");
|
|
3021
|
+
function installSignalHandler(signal) {
|
|
3022
|
+
process.once(signal, () => {
|
|
3023
|
+
void shutdown(signal);
|
|
3024
|
+
});
|
|
3025
|
+
}
|
|
3026
|
+
var shuttingDown = false;
|
|
3027
|
+
async function shutdown(signal) {
|
|
3028
|
+
if (shuttingDown === true) {
|
|
3029
|
+
return;
|
|
3030
|
+
}
|
|
3031
|
+
shuttingDown = true;
|
|
3032
|
+
console.log(`Received ${signal}. Shutting down internal eval dev server.`);
|
|
3033
|
+
try {
|
|
3034
|
+
await server.close();
|
|
3035
|
+
} finally {
|
|
3036
|
+
process.exit(0);
|
|
3037
|
+
}
|
|
3038
|
+
}
|
|
3039
|
+
function resolveHost(value, fallbackValue) {
|
|
3040
|
+
const trimmed = value?.trim() || fallbackValue?.trim();
|
|
3041
|
+
if (!trimmed) {
|
|
3042
|
+
return DEFAULT_INTERNAL_EVAL_HOST;
|
|
3043
|
+
}
|
|
3044
|
+
return trimmed;
|
|
3045
|
+
}
|
|
3046
|
+
function resolvePort(value, fallbackValue) {
|
|
3047
|
+
const trimmed = value?.trim() || fallbackValue?.trim();
|
|
3048
|
+
if (!trimmed) {
|
|
3049
|
+
return DEFAULT_INTERNAL_EVAL_PORT;
|
|
3050
|
+
}
|
|
3051
|
+
if (/^\d+$/u.test(trimmed) !== true) {
|
|
3052
|
+
throw new Error(`${PORT_ENV_NAME} must be an integer between 0 and 65535.`);
|
|
3053
|
+
}
|
|
3054
|
+
const parsed = Number.parseInt(trimmed, 10);
|
|
3055
|
+
if (!Number.isInteger(parsed) || parsed < 0 || parsed > 65535) {
|
|
3056
|
+
throw new Error(`${PORT_ENV_NAME} must be an integer between 0 and 65535.`);
|
|
3057
|
+
}
|
|
3058
|
+
return parsed;
|
|
3059
|
+
}
|