@claritylabs/cl-sdk 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +372 -67
- package/dist/index.d.ts +372 -67
- package/dist/index.js +526 -219
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +523 -219
- package/dist/index.mjs.map +1 -1
- package/dist/storage-sqlite.d.mts +52 -10
- package/dist/storage-sqlite.d.ts +52 -10
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -54,6 +54,7 @@ __export(index_exports, {
|
|
|
54
54
|
CommercialAutoDeclarationsSchema: () => CommercialAutoDeclarationsSchema,
|
|
55
55
|
CommercialPropertyDeclarationsSchema: () => CommercialPropertyDeclarationsSchema,
|
|
56
56
|
CommunicationIntentSchema: () => CommunicationIntentSchema,
|
|
57
|
+
ConditionKeyValueSchema: () => ConditionKeyValueSchema,
|
|
57
58
|
ConditionTypeSchema: () => ConditionTypeSchema,
|
|
58
59
|
ConstructionTypeSchema: () => ConstructionTypeSchema,
|
|
59
60
|
ContactSchema: () => ContactSchema,
|
|
@@ -220,6 +221,7 @@ __export(index_exports, {
|
|
|
220
221
|
chunkDocument: () => chunkDocument,
|
|
221
222
|
createApplicationPipeline: () => createApplicationPipeline,
|
|
222
223
|
createExtractor: () => createExtractor,
|
|
224
|
+
createPipelineContext: () => createPipelineContext,
|
|
223
225
|
createQueryAgent: () => createQueryAgent,
|
|
224
226
|
extractPageRange: () => extractPageRange,
|
|
225
227
|
fillAcroForm: () => fillAcroForm,
|
|
@@ -229,6 +231,7 @@ __export(index_exports, {
|
|
|
229
231
|
getTemplate: () => getTemplate,
|
|
230
232
|
overlayTextOnPdf: () => overlayTextOnPdf,
|
|
231
233
|
pLimit: () => pLimit,
|
|
234
|
+
safeGenerateObject: () => safeGenerateObject,
|
|
232
235
|
sanitizeNulls: () => sanitizeNulls,
|
|
233
236
|
stripFences: () => stripFences,
|
|
234
237
|
withRetry: () => withRetry
|
|
@@ -308,6 +311,69 @@ function sanitizeNulls(obj) {
|
|
|
308
311
|
return obj;
|
|
309
312
|
}
|
|
310
313
|
|
|
314
|
+
// src/core/safe-generate.ts
|
|
315
|
+
async function safeGenerateObject(generateObject, params, options) {
|
|
316
|
+
const maxRetries = options?.maxRetries ?? 1;
|
|
317
|
+
let lastError;
|
|
318
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
319
|
+
try {
|
|
320
|
+
const result = await withRetry(
|
|
321
|
+
() => generateObject(params),
|
|
322
|
+
options?.log
|
|
323
|
+
);
|
|
324
|
+
return result;
|
|
325
|
+
} catch (error) {
|
|
326
|
+
lastError = error;
|
|
327
|
+
options?.onError?.(error, attempt);
|
|
328
|
+
await options?.log?.(
|
|
329
|
+
`safeGenerateObject attempt ${attempt + 1}/${maxRetries + 1} failed: ${error instanceof Error ? error.message : String(error)}`
|
|
330
|
+
);
|
|
331
|
+
if (attempt < maxRetries) {
|
|
332
|
+
await new Promise((resolve) => setTimeout(resolve, 1e3));
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
if (options?.fallback !== void 0) {
|
|
337
|
+
await options?.log?.(
|
|
338
|
+
`safeGenerateObject: all retries exhausted, returning fallback`
|
|
339
|
+
);
|
|
340
|
+
return { object: options.fallback };
|
|
341
|
+
}
|
|
342
|
+
throw lastError;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
// src/core/pipeline.ts
|
|
346
|
+
function createPipelineContext(opts) {
|
|
347
|
+
let latest = opts.resumeFrom;
|
|
348
|
+
const completedPhases = /* @__PURE__ */ new Set();
|
|
349
|
+
if (opts.resumeFrom) {
|
|
350
|
+
completedPhases.add(opts.resumeFrom.phase);
|
|
351
|
+
}
|
|
352
|
+
return {
|
|
353
|
+
id: opts.id,
|
|
354
|
+
async save(phase, state) {
|
|
355
|
+
const checkpoint = {
|
|
356
|
+
phase,
|
|
357
|
+
state,
|
|
358
|
+
timestamp: Date.now()
|
|
359
|
+
};
|
|
360
|
+
latest = checkpoint;
|
|
361
|
+
completedPhases.add(phase);
|
|
362
|
+
await opts.onSave?.(checkpoint);
|
|
363
|
+
},
|
|
364
|
+
getCheckpoint() {
|
|
365
|
+
return latest;
|
|
366
|
+
},
|
|
367
|
+
isPhaseComplete(phase) {
|
|
368
|
+
return completedPhases.has(phase);
|
|
369
|
+
},
|
|
370
|
+
clear() {
|
|
371
|
+
latest = void 0;
|
|
372
|
+
completedPhases.clear();
|
|
373
|
+
}
|
|
374
|
+
};
|
|
375
|
+
}
|
|
376
|
+
|
|
311
377
|
// src/schemas/enums.ts
|
|
312
378
|
var import_zod = require("zod");
|
|
313
379
|
var PolicyTypeSchema = import_zod.z.enum([
|
|
@@ -708,11 +774,15 @@ var ExclusionSchema = import_zod5.z.object({
|
|
|
708
774
|
|
|
709
775
|
// src/schemas/condition.ts
|
|
710
776
|
var import_zod6 = require("zod");
|
|
777
|
+
var ConditionKeyValueSchema = import_zod6.z.object({
|
|
778
|
+
key: import_zod6.z.string(),
|
|
779
|
+
value: import_zod6.z.string()
|
|
780
|
+
});
|
|
711
781
|
var PolicyConditionSchema = import_zod6.z.object({
|
|
712
782
|
name: import_zod6.z.string(),
|
|
713
783
|
conditionType: ConditionTypeSchema,
|
|
714
784
|
content: import_zod6.z.string(),
|
|
715
|
-
keyValues: import_zod6.z.
|
|
785
|
+
keyValues: import_zod6.z.array(ConditionKeyValueSchema).optional(),
|
|
716
786
|
pageNumber: import_zod6.z.number().optional()
|
|
717
787
|
});
|
|
718
788
|
|
|
@@ -1881,21 +1951,33 @@ async function formatDocumentContent(doc, generateText, options) {
|
|
|
1881
1951
|
for (let i = 0; i < entries.length; i += MAX_ENTRIES_PER_BATCH) {
|
|
1882
1952
|
batches.push(entries.slice(i, i + MAX_ENTRIES_PER_BATCH));
|
|
1883
1953
|
}
|
|
1884
|
-
for (
|
|
1885
|
-
const
|
|
1886
|
-
|
|
1887
|
-
() =>
|
|
1888
|
-
|
|
1889
|
-
|
|
1890
|
-
|
|
1891
|
-
|
|
1892
|
-
|
|
1893
|
-
|
|
1894
|
-
|
|
1895
|
-
|
|
1954
|
+
for (let batchIdx = 0; batchIdx < batches.length; batchIdx++) {
|
|
1955
|
+
const batch = batches[batchIdx];
|
|
1956
|
+
try {
|
|
1957
|
+
const prompt = buildFormatPrompt(batch.map((e) => ({ id: e.id, text: e.text })));
|
|
1958
|
+
const result = await withRetry(
|
|
1959
|
+
() => generateText({
|
|
1960
|
+
prompt,
|
|
1961
|
+
maxTokens: 16384,
|
|
1962
|
+
providerOptions: options?.providerOptions
|
|
1963
|
+
})
|
|
1964
|
+
);
|
|
1965
|
+
if (result.usage) {
|
|
1966
|
+
totalUsage.inputTokens += result.usage.inputTokens;
|
|
1967
|
+
totalUsage.outputTokens += result.usage.outputTokens;
|
|
1968
|
+
}
|
|
1969
|
+
const formatted = parseFormatResponse(result.text);
|
|
1970
|
+
if (formatted.size < batch.length) {
|
|
1971
|
+
await options?.log?.(
|
|
1972
|
+
`Format batch ${batchIdx + 1}/${batches.length}: model returned ${formatted.size}/${batch.length} entries \u2014 unformatted entries will keep original content`
|
|
1973
|
+
);
|
|
1974
|
+
}
|
|
1975
|
+
applyFormattedContent(doc, batch, formatted);
|
|
1976
|
+
} catch (error) {
|
|
1977
|
+
await options?.log?.(
|
|
1978
|
+
`Format batch ${batchIdx + 1}/${batches.length} failed, keeping original content: ${error instanceof Error ? error.message : String(error)}`
|
|
1979
|
+
);
|
|
1896
1980
|
}
|
|
1897
|
-
const formatted = parseFormatResponse(result.text);
|
|
1898
|
-
applyFormattedContent(doc, batch, formatted);
|
|
1899
1981
|
}
|
|
1900
1982
|
return { document: doc, usage: totalUsage };
|
|
1901
1983
|
}
|
|
@@ -2736,9 +2818,13 @@ var ExtractionTaskSchema = import_zod18.z.object({
|
|
|
2736
2818
|
endPage: import_zod18.z.number(),
|
|
2737
2819
|
description: import_zod18.z.string()
|
|
2738
2820
|
});
|
|
2821
|
+
var PageMapEntrySchema = import_zod18.z.object({
|
|
2822
|
+
section: import_zod18.z.string(),
|
|
2823
|
+
pages: import_zod18.z.string()
|
|
2824
|
+
});
|
|
2739
2825
|
var ExtractionPlanSchema = import_zod18.z.object({
|
|
2740
2826
|
tasks: import_zod18.z.array(ExtractionTaskSchema),
|
|
2741
|
-
pageMap: import_zod18.z.
|
|
2827
|
+
pageMap: import_zod18.z.array(PageMapEntrySchema).optional()
|
|
2742
2828
|
});
|
|
2743
2829
|
function buildPlanPrompt(templateHints) {
|
|
2744
2830
|
return `You are planning the extraction of an insurance document. You have already classified this document. Now scan the full document and create a page map + extraction plan.
|
|
@@ -2767,7 +2853,10 @@ Return JSON:
|
|
|
2767
2853
|
{ "extractorName": "carrier_info", "startPage": 1, "endPage": 2, "description": "Extract carrier details from declarations page" },
|
|
2768
2854
|
...
|
|
2769
2855
|
],
|
|
2770
|
-
"pageMap":
|
|
2856
|
+
"pageMap": [
|
|
2857
|
+
{ "section": "declarations", "pages": "pages 1-3" },
|
|
2858
|
+
{ "section": "endorsements", "pages": "pages 15-22" }
|
|
2859
|
+
]
|
|
2771
2860
|
}
|
|
2772
2861
|
|
|
2773
2862
|
Create tasks that cover the entire document. Prefer specific extractors over generic "sections" where possible. Keep page ranges tight \u2014 only include pages relevant to each extractor.
|
|
@@ -3260,7 +3349,8 @@ function createExtractor(config) {
|
|
|
3260
3349
|
onTokenUsage,
|
|
3261
3350
|
onProgress,
|
|
3262
3351
|
log,
|
|
3263
|
-
providerOptions
|
|
3352
|
+
providerOptions,
|
|
3353
|
+
onCheckpointSave
|
|
3264
3354
|
} = config;
|
|
3265
3355
|
const limit = pLimit(concurrency);
|
|
3266
3356
|
let totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
@@ -3271,100 +3361,106 @@ function createExtractor(config) {
|
|
|
3271
3361
|
onTokenUsage?.(usage);
|
|
3272
3362
|
}
|
|
3273
3363
|
}
|
|
3274
|
-
async function extract(pdfBase64, documentId) {
|
|
3364
|
+
async function extract(pdfBase64, documentId, options) {
|
|
3275
3365
|
const id = documentId ?? `doc-${Date.now()}`;
|
|
3276
3366
|
const memory = /* @__PURE__ */ new Map();
|
|
3277
3367
|
totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
3278
|
-
|
|
3279
|
-
|
|
3280
|
-
|
|
3281
|
-
|
|
3282
|
-
|
|
3283
|
-
|
|
3284
|
-
|
|
3285
|
-
|
|
3286
|
-
|
|
3287
|
-
);
|
|
3288
|
-
trackUsage(classifyResult.usage);
|
|
3289
|
-
memory.set("classify", classifyResult.object);
|
|
3290
|
-
const { documentType, policyTypes } = classifyResult.object;
|
|
3291
|
-
const primaryType = policyTypes[0] ?? "other";
|
|
3292
|
-
const template = getTemplate(primaryType);
|
|
3293
|
-
onProgress?.(`Planning extraction for ${primaryType} ${documentType}...`);
|
|
3294
|
-
const templateHints = [
|
|
3295
|
-
`Document type: ${primaryType} ${documentType}`,
|
|
3296
|
-
`Expected sections: ${template.expectedSections.join(", ")}`,
|
|
3297
|
-
`Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
|
|
3298
|
-
`Total pages: ${pageCount}`
|
|
3299
|
-
].join("\n");
|
|
3300
|
-
const planResult = await withRetry(
|
|
3301
|
-
() => generateObject({
|
|
3302
|
-
prompt: buildPlanPrompt(templateHints),
|
|
3303
|
-
schema: ExtractionPlanSchema,
|
|
3304
|
-
maxTokens: 2048,
|
|
3305
|
-
providerOptions
|
|
3306
|
-
})
|
|
3307
|
-
);
|
|
3308
|
-
trackUsage(planResult.usage);
|
|
3309
|
-
const tasks = planResult.object.tasks;
|
|
3310
|
-
onProgress?.(`Dispatching ${tasks.length} extractors...`);
|
|
3311
|
-
const extractorResults = await Promise.all(
|
|
3312
|
-
tasks.map(
|
|
3313
|
-
(task) => limit(async () => {
|
|
3314
|
-
const ext = getExtractor(task.extractorName);
|
|
3315
|
-
if (!ext) {
|
|
3316
|
-
await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
|
|
3317
|
-
return null;
|
|
3318
|
-
}
|
|
3319
|
-
onProgress?.(`Extracting ${task.extractorName} (pages ${task.startPage}-${task.endPage})...`);
|
|
3320
|
-
try {
|
|
3321
|
-
const result = await runExtractor({
|
|
3322
|
-
name: task.extractorName,
|
|
3323
|
-
prompt: ext.buildPrompt(),
|
|
3324
|
-
schema: ext.schema,
|
|
3325
|
-
pdfBase64,
|
|
3326
|
-
startPage: task.startPage,
|
|
3327
|
-
endPage: task.endPage,
|
|
3328
|
-
generateObject,
|
|
3329
|
-
convertPdfToImages,
|
|
3330
|
-
maxTokens: ext.maxTokens ?? 4096,
|
|
3331
|
-
providerOptions
|
|
3332
|
-
});
|
|
3333
|
-
trackUsage(result.usage);
|
|
3334
|
-
return result;
|
|
3335
|
-
} catch (error) {
|
|
3336
|
-
await log?.(`Extractor ${task.extractorName} failed: ${error}`);
|
|
3337
|
-
return null;
|
|
3338
|
-
}
|
|
3339
|
-
})
|
|
3340
|
-
)
|
|
3341
|
-
);
|
|
3342
|
-
for (const result of extractorResults) {
|
|
3343
|
-
if (result) {
|
|
3344
|
-
memory.set(result.name, result.data);
|
|
3368
|
+
const pipelineCtx = createPipelineContext({
|
|
3369
|
+
id,
|
|
3370
|
+
onSave: onCheckpointSave,
|
|
3371
|
+
resumeFrom: options?.resumeFrom
|
|
3372
|
+
});
|
|
3373
|
+
const resumed = pipelineCtx.getCheckpoint()?.state;
|
|
3374
|
+
if (resumed?.memory) {
|
|
3375
|
+
for (const [k, v] of Object.entries(resumed.memory)) {
|
|
3376
|
+
memory.set(k, v);
|
|
3345
3377
|
}
|
|
3346
3378
|
}
|
|
3347
|
-
|
|
3348
|
-
|
|
3349
|
-
|
|
3350
|
-
|
|
3351
|
-
|
|
3352
|
-
|
|
3353
|
-
|
|
3379
|
+
let classifyResult;
|
|
3380
|
+
if (resumed?.classifyResult && pipelineCtx.isPhaseComplete("classify")) {
|
|
3381
|
+
classifyResult = resumed.classifyResult;
|
|
3382
|
+
onProgress?.("Resuming from checkpoint (classify complete)...");
|
|
3383
|
+
} else {
|
|
3384
|
+
onProgress?.("Classifying document...");
|
|
3385
|
+
const pageCount2 = await getPdfPageCount(pdfBase64);
|
|
3386
|
+
const classifyResponse = await safeGenerateObject(
|
|
3387
|
+
generateObject,
|
|
3388
|
+
{
|
|
3389
|
+
prompt: buildClassifyPrompt(),
|
|
3390
|
+
schema: ClassifyResultSchema,
|
|
3391
|
+
maxTokens: 512,
|
|
3354
3392
|
providerOptions
|
|
3355
|
-
}
|
|
3393
|
+
},
|
|
3394
|
+
{
|
|
3395
|
+
fallback: { documentType: "policy", policyTypes: ["other"], confidence: 0 },
|
|
3396
|
+
log,
|
|
3397
|
+
onError: (err, attempt) => log?.(`Classify attempt ${attempt + 1} failed: ${err}`)
|
|
3398
|
+
}
|
|
3356
3399
|
);
|
|
3357
|
-
trackUsage(
|
|
3358
|
-
|
|
3359
|
-
|
|
3360
|
-
|
|
3361
|
-
|
|
3362
|
-
|
|
3363
|
-
|
|
3364
|
-
|
|
3400
|
+
trackUsage(classifyResponse.usage);
|
|
3401
|
+
classifyResult = classifyResponse.object;
|
|
3402
|
+
memory.set("classify", classifyResult);
|
|
3403
|
+
await pipelineCtx.save("classify", {
|
|
3404
|
+
id,
|
|
3405
|
+
pageCount: pageCount2,
|
|
3406
|
+
classifyResult,
|
|
3407
|
+
memory: Object.fromEntries(memory)
|
|
3408
|
+
});
|
|
3409
|
+
}
|
|
3410
|
+
const { documentType, policyTypes } = classifyResult;
|
|
3411
|
+
const primaryType = policyTypes[0] ?? "other";
|
|
3412
|
+
const template = getTemplate(primaryType);
|
|
3413
|
+
const pageCount = resumed?.pageCount ?? await getPdfPageCount(pdfBase64);
|
|
3414
|
+
let plan;
|
|
3415
|
+
if (resumed?.plan && pipelineCtx.isPhaseComplete("plan")) {
|
|
3416
|
+
plan = resumed.plan;
|
|
3417
|
+
onProgress?.("Resuming from checkpoint (plan complete)...");
|
|
3418
|
+
} else {
|
|
3419
|
+
onProgress?.(`Planning extraction for ${primaryType} ${documentType}...`);
|
|
3420
|
+
const templateHints = [
|
|
3421
|
+
`Document type: ${primaryType} ${documentType}`,
|
|
3422
|
+
`Expected sections: ${template.expectedSections.join(", ")}`,
|
|
3423
|
+
`Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
|
|
3424
|
+
`Total pages: ${pageCount}`
|
|
3425
|
+
].join("\n");
|
|
3426
|
+
const planResponse = await safeGenerateObject(
|
|
3427
|
+
generateObject,
|
|
3428
|
+
{
|
|
3429
|
+
prompt: buildPlanPrompt(templateHints),
|
|
3430
|
+
schema: ExtractionPlanSchema,
|
|
3431
|
+
maxTokens: 2048,
|
|
3432
|
+
providerOptions
|
|
3433
|
+
},
|
|
3434
|
+
{
|
|
3435
|
+
fallback: {
|
|
3436
|
+
tasks: [{ extractorName: "sections", startPage: 1, endPage: pageCount, description: "Full document fallback extraction" }]
|
|
3437
|
+
},
|
|
3438
|
+
log,
|
|
3439
|
+
onError: (err, attempt) => log?.(`Plan attempt ${attempt + 1} failed: ${err}`)
|
|
3440
|
+
}
|
|
3441
|
+
);
|
|
3442
|
+
trackUsage(planResponse.usage);
|
|
3443
|
+
plan = planResponse.object;
|
|
3444
|
+
await pipelineCtx.save("plan", {
|
|
3445
|
+
id,
|
|
3446
|
+
pageCount,
|
|
3447
|
+
classifyResult,
|
|
3448
|
+
plan,
|
|
3449
|
+
memory: Object.fromEntries(memory)
|
|
3450
|
+
});
|
|
3451
|
+
}
|
|
3452
|
+
if (!pipelineCtx.isPhaseComplete("extract")) {
|
|
3453
|
+
const tasks = plan.tasks;
|
|
3454
|
+
onProgress?.(`Dispatching ${tasks.length} extractors...`);
|
|
3455
|
+
const extractorResults = await Promise.all(
|
|
3456
|
+
tasks.map(
|
|
3365
3457
|
(task) => limit(async () => {
|
|
3366
3458
|
const ext = getExtractor(task.extractorName);
|
|
3367
|
-
if (!ext)
|
|
3459
|
+
if (!ext) {
|
|
3460
|
+
await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
|
|
3461
|
+
return null;
|
|
3462
|
+
}
|
|
3463
|
+
onProgress?.(`Extracting ${task.extractorName} (pages ${task.startPage}-${task.endPage})...`);
|
|
3368
3464
|
try {
|
|
3369
3465
|
const result = await runExtractor({
|
|
3370
3466
|
name: task.extractorName,
|
|
@@ -3381,28 +3477,114 @@ function createExtractor(config) {
|
|
|
3381
3477
|
trackUsage(result.usage);
|
|
3382
3478
|
return result;
|
|
3383
3479
|
} catch (error) {
|
|
3384
|
-
await log?.(`
|
|
3480
|
+
await log?.(`Extractor ${task.extractorName} failed: ${error}`);
|
|
3385
3481
|
return null;
|
|
3386
3482
|
}
|
|
3387
3483
|
})
|
|
3388
3484
|
)
|
|
3389
3485
|
);
|
|
3390
|
-
for (const result of
|
|
3486
|
+
for (const result of extractorResults) {
|
|
3391
3487
|
if (result) {
|
|
3392
3488
|
memory.set(result.name, result.data);
|
|
3393
3489
|
}
|
|
3394
3490
|
}
|
|
3491
|
+
await pipelineCtx.save("extract", {
|
|
3492
|
+
id,
|
|
3493
|
+
pageCount,
|
|
3494
|
+
classifyResult,
|
|
3495
|
+
plan,
|
|
3496
|
+
memory: Object.fromEntries(memory)
|
|
3497
|
+
});
|
|
3498
|
+
}
|
|
3499
|
+
if (!pipelineCtx.isPhaseComplete("review")) {
|
|
3500
|
+
for (let round = 0; round < maxReviewRounds; round++) {
|
|
3501
|
+
const extractedKeys = [...memory.keys()].filter((k) => k !== "classify");
|
|
3502
|
+
const reviewResponse = await safeGenerateObject(
|
|
3503
|
+
generateObject,
|
|
3504
|
+
{
|
|
3505
|
+
prompt: buildReviewPrompt(template.required, extractedKeys),
|
|
3506
|
+
schema: ReviewResultSchema,
|
|
3507
|
+
maxTokens: 1024,
|
|
3508
|
+
providerOptions
|
|
3509
|
+
},
|
|
3510
|
+
{
|
|
3511
|
+
fallback: { complete: true, missingFields: [], additionalTasks: [] },
|
|
3512
|
+
log,
|
|
3513
|
+
onError: (err, attempt) => log?.(`Review round ${round + 1} attempt ${attempt + 1} failed: ${err}`)
|
|
3514
|
+
}
|
|
3515
|
+
);
|
|
3516
|
+
trackUsage(reviewResponse.usage);
|
|
3517
|
+
if (reviewResponse.object.complete || reviewResponse.object.additionalTasks.length === 0) {
|
|
3518
|
+
onProgress?.("Extraction complete.");
|
|
3519
|
+
break;
|
|
3520
|
+
}
|
|
3521
|
+
onProgress?.(`Review round ${round + 1}: dispatching ${reviewResponse.object.additionalTasks.length} follow-up extractors...`);
|
|
3522
|
+
const followUpResults = await Promise.all(
|
|
3523
|
+
reviewResponse.object.additionalTasks.map(
|
|
3524
|
+
(task) => limit(async () => {
|
|
3525
|
+
const ext = getExtractor(task.extractorName);
|
|
3526
|
+
if (!ext) return null;
|
|
3527
|
+
try {
|
|
3528
|
+
const result = await runExtractor({
|
|
3529
|
+
name: task.extractorName,
|
|
3530
|
+
prompt: ext.buildPrompt(),
|
|
3531
|
+
schema: ext.schema,
|
|
3532
|
+
pdfBase64,
|
|
3533
|
+
startPage: task.startPage,
|
|
3534
|
+
endPage: task.endPage,
|
|
3535
|
+
generateObject,
|
|
3536
|
+
convertPdfToImages,
|
|
3537
|
+
maxTokens: ext.maxTokens ?? 4096,
|
|
3538
|
+
providerOptions
|
|
3539
|
+
});
|
|
3540
|
+
trackUsage(result.usage);
|
|
3541
|
+
return result;
|
|
3542
|
+
} catch (error) {
|
|
3543
|
+
await log?.(`Follow-up extractor ${task.extractorName} failed: ${error}`);
|
|
3544
|
+
return null;
|
|
3545
|
+
}
|
|
3546
|
+
})
|
|
3547
|
+
)
|
|
3548
|
+
);
|
|
3549
|
+
for (const result of followUpResults) {
|
|
3550
|
+
if (result) {
|
|
3551
|
+
memory.set(result.name, result.data);
|
|
3552
|
+
}
|
|
3553
|
+
}
|
|
3554
|
+
}
|
|
3555
|
+
await pipelineCtx.save("review", {
|
|
3556
|
+
id,
|
|
3557
|
+
pageCount,
|
|
3558
|
+
classifyResult,
|
|
3559
|
+
plan,
|
|
3560
|
+
memory: Object.fromEntries(memory)
|
|
3561
|
+
});
|
|
3395
3562
|
}
|
|
3396
3563
|
onProgress?.("Assembling document...");
|
|
3397
3564
|
const document = assembleDocument(id, documentType, memory);
|
|
3565
|
+
await pipelineCtx.save("assemble", {
|
|
3566
|
+
id,
|
|
3567
|
+
pageCount,
|
|
3568
|
+
classifyResult,
|
|
3569
|
+
plan,
|
|
3570
|
+
memory: Object.fromEntries(memory),
|
|
3571
|
+
document
|
|
3572
|
+
});
|
|
3398
3573
|
onProgress?.("Formatting extracted content...");
|
|
3399
3574
|
const formatResult = await formatDocumentContent(document, generateText, {
|
|
3400
3575
|
providerOptions,
|
|
3401
|
-
onProgress
|
|
3576
|
+
onProgress,
|
|
3577
|
+
log
|
|
3402
3578
|
});
|
|
3403
3579
|
trackUsage(formatResult.usage);
|
|
3404
3580
|
const chunks = chunkDocument(formatResult.document);
|
|
3405
|
-
|
|
3581
|
+
const finalCheckpoint = pipelineCtx.getCheckpoint();
|
|
3582
|
+
return {
|
|
3583
|
+
document: formatResult.document,
|
|
3584
|
+
chunks,
|
|
3585
|
+
tokenUsage: totalUsage,
|
|
3586
|
+
checkpoint: finalCheckpoint
|
|
3587
|
+
};
|
|
3406
3588
|
}
|
|
3407
3589
|
return { extract };
|
|
3408
3590
|
}
|
|
@@ -4265,7 +4447,6 @@ function createApplicationPipeline(config) {
|
|
|
4265
4447
|
let state = {
|
|
4266
4448
|
id,
|
|
4267
4449
|
pdfBase64: void 0,
|
|
4268
|
-
// Don't persist the full PDF in state
|
|
4269
4450
|
title: void 0,
|
|
4270
4451
|
applicationType: null,
|
|
4271
4452
|
fields: [],
|
|
@@ -4276,13 +4457,20 @@ function createApplicationPipeline(config) {
|
|
|
4276
4457
|
updatedAt: now
|
|
4277
4458
|
};
|
|
4278
4459
|
onProgress?.("Classifying document...");
|
|
4279
|
-
|
|
4280
|
-
|
|
4281
|
-
|
|
4282
|
-
|
|
4283
|
-
|
|
4284
|
-
|
|
4285
|
-
|
|
4460
|
+
await applicationStore?.save(state);
|
|
4461
|
+
let classifyResult;
|
|
4462
|
+
try {
|
|
4463
|
+
const { result, usage: classifyUsage } = await classifyApplication(
|
|
4464
|
+
pdfBase64.slice(0, 2e3),
|
|
4465
|
+
generateObject,
|
|
4466
|
+
providerOptions
|
|
4467
|
+
);
|
|
4468
|
+
trackUsage(classifyUsage);
|
|
4469
|
+
classifyResult = result;
|
|
4470
|
+
} catch (error) {
|
|
4471
|
+
await log?.(`Classification failed, treating as non-application: ${error instanceof Error ? error.message : String(error)}`);
|
|
4472
|
+
classifyResult = { isApplication: false, confidence: 0, applicationType: null };
|
|
4473
|
+
}
|
|
4286
4474
|
if (!classifyResult.isApplication) {
|
|
4287
4475
|
state.status = "complete";
|
|
4288
4476
|
state.updatedAt = Date.now();
|
|
@@ -4292,13 +4480,28 @@ function createApplicationPipeline(config) {
|
|
|
4292
4480
|
state.applicationType = classifyResult.applicationType;
|
|
4293
4481
|
state.status = "extracting";
|
|
4294
4482
|
state.updatedAt = Date.now();
|
|
4483
|
+
await applicationStore?.save(state);
|
|
4295
4484
|
onProgress?.("Extracting form fields...");
|
|
4296
|
-
|
|
4297
|
-
|
|
4298
|
-
|
|
4299
|
-
|
|
4300
|
-
|
|
4301
|
-
|
|
4485
|
+
let fields;
|
|
4486
|
+
try {
|
|
4487
|
+
const { fields: extractedFields, usage: extractUsage } = await extractFields(
|
|
4488
|
+
pdfBase64,
|
|
4489
|
+
generateObject,
|
|
4490
|
+
providerOptions
|
|
4491
|
+
);
|
|
4492
|
+
trackUsage(extractUsage);
|
|
4493
|
+
fields = extractedFields;
|
|
4494
|
+
} catch (error) {
|
|
4495
|
+
await log?.(`Field extraction failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
4496
|
+
fields = [];
|
|
4497
|
+
}
|
|
4498
|
+
if (fields.length === 0) {
|
|
4499
|
+
await log?.("No fields extracted, completing pipeline with empty result");
|
|
4500
|
+
state.status = "complete";
|
|
4501
|
+
state.updatedAt = Date.now();
|
|
4502
|
+
await applicationStore?.save(state);
|
|
4503
|
+
return { state, tokenUsage: totalUsage };
|
|
4504
|
+
}
|
|
4302
4505
|
state.fields = fields;
|
|
4303
4506
|
state.title = classifyResult.applicationType ?? void 0;
|
|
4304
4507
|
state.status = "auto_filling";
|
|
@@ -4330,20 +4533,24 @@ function createApplicationPipeline(config) {
|
|
|
4330
4533
|
limit(async () => {
|
|
4331
4534
|
const unfilledFields2 = state.fields.filter((f) => !f.value);
|
|
4332
4535
|
if (unfilledFields2.length === 0) return;
|
|
4333
|
-
|
|
4334
|
-
|
|
4335
|
-
|
|
4336
|
-
|
|
4337
|
-
|
|
4338
|
-
|
|
4339
|
-
|
|
4340
|
-
|
|
4341
|
-
const
|
|
4342
|
-
|
|
4343
|
-
field
|
|
4344
|
-
|
|
4345
|
-
|
|
4536
|
+
try {
|
|
4537
|
+
const { result: autoFillResult, usage: afUsage } = await autoFillFromContext(
|
|
4538
|
+
unfilledFields2,
|
|
4539
|
+
orgContext,
|
|
4540
|
+
generateObject,
|
|
4541
|
+
providerOptions
|
|
4542
|
+
);
|
|
4543
|
+
trackUsage(afUsage);
|
|
4544
|
+
for (const match of autoFillResult.matches) {
|
|
4545
|
+
const field = state.fields.find((f) => f.id === match.fieldId);
|
|
4546
|
+
if (field && !field.value) {
|
|
4547
|
+
field.value = match.value;
|
|
4548
|
+
field.source = `auto-fill: ${match.contextKey}`;
|
|
4549
|
+
field.confidence = match.confidence;
|
|
4550
|
+
}
|
|
4346
4551
|
}
|
|
4552
|
+
} catch (e) {
|
|
4553
|
+
await log?.(`Auto-fill from context failed: ${e instanceof Error ? e.message : String(e)}`);
|
|
4347
4554
|
}
|
|
4348
4555
|
})
|
|
4349
4556
|
);
|
|
@@ -4376,13 +4583,18 @@ function createApplicationPipeline(config) {
|
|
|
4376
4583
|
if (unfilledFields.length > 0) {
|
|
4377
4584
|
onProgress?.(`Batching ${unfilledFields.length} remaining questions...`);
|
|
4378
4585
|
state.status = "batching";
|
|
4379
|
-
|
|
4380
|
-
|
|
4381
|
-
|
|
4382
|
-
|
|
4383
|
-
|
|
4384
|
-
|
|
4385
|
-
|
|
4586
|
+
try {
|
|
4587
|
+
const { result: batchResult, usage: batchUsage } = await batchQuestions(
|
|
4588
|
+
unfilledFields,
|
|
4589
|
+
generateObject,
|
|
4590
|
+
providerOptions
|
|
4591
|
+
);
|
|
4592
|
+
trackUsage(batchUsage);
|
|
4593
|
+
state.batches = batchResult.batches;
|
|
4594
|
+
} catch (error) {
|
|
4595
|
+
await log?.(`Batching failed, using single-batch fallback: ${error instanceof Error ? error.message : String(error)}`);
|
|
4596
|
+
state.batches = [unfilledFields.map((f) => f.id)];
|
|
4597
|
+
}
|
|
4386
4598
|
state.currentBatchIndex = 0;
|
|
4387
4599
|
state.status = "collecting";
|
|
4388
4600
|
} else {
|
|
@@ -4409,32 +4621,49 @@ function createApplicationPipeline(config) {
|
|
|
4409
4621
|
(f) => currentBatchFieldIds.includes(f.id)
|
|
4410
4622
|
);
|
|
4411
4623
|
onProgress?.("Classifying reply...");
|
|
4412
|
-
|
|
4413
|
-
|
|
4414
|
-
|
|
4415
|
-
generateObject,
|
|
4416
|
-
providerOptions
|
|
4417
|
-
);
|
|
4418
|
-
trackUsage(intentUsage);
|
|
4419
|
-
let fieldsFilled = 0;
|
|
4420
|
-
let responseText;
|
|
4421
|
-
if (intent.hasAnswers) {
|
|
4422
|
-
onProgress?.("Parsing answers...");
|
|
4423
|
-
const { result: parseResult, usage: parseUsage } = await parseAnswers(
|
|
4624
|
+
let intent;
|
|
4625
|
+
try {
|
|
4626
|
+
const { intent: classifiedIntent, usage: intentUsage } = await classifyReplyIntent(
|
|
4424
4627
|
currentBatchFields,
|
|
4425
4628
|
replyText,
|
|
4426
4629
|
generateObject,
|
|
4427
4630
|
providerOptions
|
|
4428
4631
|
);
|
|
4429
|
-
trackUsage(
|
|
4430
|
-
|
|
4431
|
-
|
|
4432
|
-
|
|
4433
|
-
|
|
4434
|
-
|
|
4435
|
-
|
|
4436
|
-
|
|
4632
|
+
trackUsage(intentUsage);
|
|
4633
|
+
intent = classifiedIntent;
|
|
4634
|
+
} catch (error) {
|
|
4635
|
+
await log?.(`Reply intent classification failed, defaulting to answers_only: ${error instanceof Error ? error.message : String(error)}`);
|
|
4636
|
+
intent = {
|
|
4637
|
+
primaryIntent: "answers_only",
|
|
4638
|
+
hasAnswers: true,
|
|
4639
|
+
questionText: void 0,
|
|
4640
|
+
questionFieldIds: void 0,
|
|
4641
|
+
lookupRequests: void 0
|
|
4642
|
+
};
|
|
4643
|
+
}
|
|
4644
|
+
let fieldsFilled = 0;
|
|
4645
|
+
let responseText;
|
|
4646
|
+
if (intent.hasAnswers) {
|
|
4647
|
+
onProgress?.("Parsing answers...");
|
|
4648
|
+
try {
|
|
4649
|
+
const { result: parseResult, usage: parseUsage } = await parseAnswers(
|
|
4650
|
+
currentBatchFields,
|
|
4651
|
+
replyText,
|
|
4652
|
+
generateObject,
|
|
4653
|
+
providerOptions
|
|
4654
|
+
);
|
|
4655
|
+
trackUsage(parseUsage);
|
|
4656
|
+
for (const answer of parseResult.answers) {
|
|
4657
|
+
const field = state.fields.find((f) => f.id === answer.fieldId);
|
|
4658
|
+
if (field) {
|
|
4659
|
+
field.value = answer.value;
|
|
4660
|
+
field.source = "user";
|
|
4661
|
+
field.confidence = "confirmed";
|
|
4662
|
+
fieldsFilled++;
|
|
4663
|
+
}
|
|
4437
4664
|
}
|
|
4665
|
+
} catch (error) {
|
|
4666
|
+
await log?.(`Answer parsing failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
4438
4667
|
}
|
|
4439
4668
|
}
|
|
4440
4669
|
if (intent.lookupRequests?.length) {
|
|
@@ -4455,36 +4684,45 @@ function createApplicationPipeline(config) {
|
|
|
4455
4684
|
const targetFields = state.fields.filter(
|
|
4456
4685
|
(f) => intent.lookupRequests.some((lr) => lr.targetFieldIds.includes(f.id))
|
|
4457
4686
|
);
|
|
4458
|
-
|
|
4459
|
-
|
|
4460
|
-
|
|
4461
|
-
|
|
4462
|
-
|
|
4463
|
-
|
|
4464
|
-
|
|
4465
|
-
|
|
4466
|
-
|
|
4467
|
-
const
|
|
4468
|
-
|
|
4469
|
-
field
|
|
4470
|
-
|
|
4471
|
-
|
|
4472
|
-
|
|
4687
|
+
try {
|
|
4688
|
+
const { result: lookupResult, usage: lookupUsage } = await fillFromLookup(
|
|
4689
|
+
intent.lookupRequests,
|
|
4690
|
+
targetFields,
|
|
4691
|
+
availableData,
|
|
4692
|
+
generateObject,
|
|
4693
|
+
providerOptions
|
|
4694
|
+
);
|
|
4695
|
+
trackUsage(lookupUsage);
|
|
4696
|
+
for (const fill of lookupResult.fills) {
|
|
4697
|
+
const field = state.fields.find((f) => f.id === fill.fieldId);
|
|
4698
|
+
if (field) {
|
|
4699
|
+
field.value = fill.value;
|
|
4700
|
+
field.source = `lookup: ${fill.source}`;
|
|
4701
|
+
field.confidence = "high";
|
|
4702
|
+
fieldsFilled++;
|
|
4703
|
+
}
|
|
4473
4704
|
}
|
|
4705
|
+
} catch (error) {
|
|
4706
|
+
await log?.(`Lookup fill failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
4474
4707
|
}
|
|
4475
4708
|
}
|
|
4476
4709
|
}
|
|
4477
4710
|
if (intent.primaryIntent === "question" || intent.primaryIntent === "mixed") {
|
|
4478
4711
|
if (intent.questionText) {
|
|
4479
|
-
|
|
4480
|
-
|
|
4712
|
+
try {
|
|
4713
|
+
const { text, usage } = await generateText({
|
|
4714
|
+
prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
|
|
4481
4715
|
|
|
4482
4716
|
Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with the answer when you're ready and I'll fill it in."`,
|
|
4483
|
-
|
|
4484
|
-
|
|
4485
|
-
|
|
4486
|
-
|
|
4487
|
-
|
|
4717
|
+
maxTokens: 512,
|
|
4718
|
+
providerOptions
|
|
4719
|
+
});
|
|
4720
|
+
trackUsage(usage);
|
|
4721
|
+
responseText = text;
|
|
4722
|
+
} catch (error) {
|
|
4723
|
+
await log?.(`Question response generation failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
4724
|
+
responseText = `I wasn't able to generate an explanation for your question. Could you rephrase it, or just provide the answer directly?`;
|
|
4725
|
+
}
|
|
4488
4726
|
}
|
|
4489
4727
|
}
|
|
4490
4728
|
const currentBatchComplete = currentBatchFieldIds.every(
|
|
@@ -4498,26 +4736,30 @@ Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with
|
|
|
4498
4736
|
(f) => nextBatchFieldIds.includes(f.id)
|
|
4499
4737
|
);
|
|
4500
4738
|
const filledCount = state.fields.filter((f) => f.value).length;
|
|
4501
|
-
|
|
4502
|
-
|
|
4503
|
-
|
|
4504
|
-
|
|
4505
|
-
|
|
4506
|
-
|
|
4507
|
-
|
|
4508
|
-
|
|
4509
|
-
|
|
4510
|
-
|
|
4511
|
-
|
|
4512
|
-
|
|
4513
|
-
|
|
4514
|
-
|
|
4515
|
-
|
|
4516
|
-
responseText
|
|
4517
|
-
|
|
4518
|
-
|
|
4739
|
+
try {
|
|
4740
|
+
const { text: emailText, usage: emailUsage } = await generateBatchEmail(
|
|
4741
|
+
nextBatchFields,
|
|
4742
|
+
state.currentBatchIndex,
|
|
4743
|
+
state.batches.length,
|
|
4744
|
+
{
|
|
4745
|
+
appTitle: state.title,
|
|
4746
|
+
totalFieldCount: state.fields.length,
|
|
4747
|
+
filledFieldCount: filledCount,
|
|
4748
|
+
companyName: context?.companyName
|
|
4749
|
+
},
|
|
4750
|
+
generateText,
|
|
4751
|
+
providerOptions
|
|
4752
|
+
);
|
|
4753
|
+
trackUsage(emailUsage);
|
|
4754
|
+
if (!responseText) {
|
|
4755
|
+
responseText = emailText;
|
|
4756
|
+
} else {
|
|
4757
|
+
responseText += `
|
|
4519
4758
|
|
|
4520
4759
|
${emailText}`;
|
|
4760
|
+
}
|
|
4761
|
+
} catch (error) {
|
|
4762
|
+
await log?.(`Batch email generation failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
4521
4763
|
}
|
|
4522
4764
|
} else {
|
|
4523
4765
|
state.status = "confirming";
|
|
@@ -4726,7 +4968,7 @@ var EvidenceItemSchema = import_zod32.z.object({
|
|
|
4726
4968
|
turnId: import_zod32.z.string().optional(),
|
|
4727
4969
|
text: import_zod32.z.string().describe("Text excerpt from the source"),
|
|
4728
4970
|
relevance: import_zod32.z.number().min(0).max(1),
|
|
4729
|
-
metadata: import_zod32.z.
|
|
4971
|
+
metadata: import_zod32.z.array(import_zod32.z.object({ key: import_zod32.z.string(), value: import_zod32.z.string() })).optional()
|
|
4730
4972
|
});
|
|
4731
4973
|
var RetrievalResultSchema = import_zod32.z.object({
|
|
4732
4974
|
subQuestion: import_zod32.z.string(),
|
|
@@ -4762,6 +5004,9 @@ var QueryResultSchema = import_zod32.z.object({
|
|
|
4762
5004
|
});
|
|
4763
5005
|
|
|
4764
5006
|
// src/query/retriever.ts
|
|
5007
|
+
function recordToKVArray(record) {
|
|
5008
|
+
return Object.entries(record).map(([key, value]) => ({ key, value }));
|
|
5009
|
+
}
|
|
4765
5010
|
async function retrieve(subQuestion, conversationId, config) {
|
|
4766
5011
|
const { documentStore, memoryStore, retrievalLimit, log } = config;
|
|
4767
5012
|
const evidence = [];
|
|
@@ -4788,7 +5033,7 @@ async function retrieve(subQuestion, conversationId, config) {
|
|
|
4788
5033
|
text: chunk.text,
|
|
4789
5034
|
relevance: 0.8,
|
|
4790
5035
|
// Default — store doesn't expose scores directly
|
|
4791
|
-
metadata: chunk.metadata
|
|
5036
|
+
metadata: recordToKVArray(chunk.metadata)
|
|
4792
5037
|
});
|
|
4793
5038
|
}
|
|
4794
5039
|
}
|
|
@@ -4803,7 +5048,7 @@ async function retrieve(subQuestion, conversationId, config) {
|
|
|
4803
5048
|
documentId: chunk.documentId,
|
|
4804
5049
|
text: chunk.text,
|
|
4805
5050
|
relevance: 0.8,
|
|
4806
|
-
metadata: chunk.metadata
|
|
5051
|
+
metadata: recordToKVArray(chunk.metadata)
|
|
4807
5052
|
});
|
|
4808
5053
|
}
|
|
4809
5054
|
}
|
|
@@ -4831,11 +5076,11 @@ async function retrieve(subQuestion, conversationId, config) {
|
|
|
4831
5076
|
text: summary,
|
|
4832
5077
|
relevance: 0.9,
|
|
4833
5078
|
// Direct lookup is high relevance
|
|
4834
|
-
metadata:
|
|
4835
|
-
type: doc.type,
|
|
4836
|
-
carrier: doc.carrier ?? "",
|
|
4837
|
-
insuredName: doc.insuredName ?? ""
|
|
4838
|
-
|
|
5079
|
+
metadata: [
|
|
5080
|
+
{ key: "type", value: doc.type },
|
|
5081
|
+
{ key: "carrier", value: doc.carrier ?? "" },
|
|
5082
|
+
{ key: "insuredName", value: doc.insuredName ?? "" }
|
|
5083
|
+
]
|
|
4839
5084
|
});
|
|
4840
5085
|
}
|
|
4841
5086
|
} catch (e) {
|
|
@@ -5070,8 +5315,12 @@ function createQueryAgent(config) {
|
|
|
5070
5315
|
async function query(input) {
|
|
5071
5316
|
totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
5072
5317
|
const { question, conversationId, context } = input;
|
|
5318
|
+
const pipelineCtx = createPipelineContext({
|
|
5319
|
+
id: `query-${Date.now()}`
|
|
5320
|
+
});
|
|
5073
5321
|
onProgress?.("Classifying query...");
|
|
5074
5322
|
const classification = await classify(question, conversationId);
|
|
5323
|
+
await pipelineCtx.save("classify", { classification });
|
|
5075
5324
|
onProgress?.(`Retrieving evidence for ${classification.subQuestions.length} sub-question(s)...`);
|
|
5076
5325
|
const retrieverConfig = {
|
|
5077
5326
|
documentStore,
|
|
@@ -5085,9 +5334,10 @@ function createQueryAgent(config) {
|
|
|
5085
5334
|
)
|
|
5086
5335
|
);
|
|
5087
5336
|
const allEvidence = retrievalResults.flatMap((r) => r.evidence);
|
|
5337
|
+
await pipelineCtx.save("retrieve", { classification, evidence: allEvidence });
|
|
5088
5338
|
onProgress?.("Reasoning over evidence...");
|
|
5089
5339
|
const reasonerConfig = { generateObject, providerOptions };
|
|
5090
|
-
|
|
5340
|
+
const reasonResults = await Promise.allSettled(
|
|
5091
5341
|
classification.subQuestions.map(
|
|
5092
5342
|
(sq, i) => limit(async () => {
|
|
5093
5343
|
const { subAnswer, usage } = await reason(
|
|
@@ -5101,10 +5351,27 @@ function createQueryAgent(config) {
|
|
|
5101
5351
|
})
|
|
5102
5352
|
)
|
|
5103
5353
|
);
|
|
5354
|
+
let subAnswers = [];
|
|
5355
|
+
for (let i = 0; i < reasonResults.length; i++) {
|
|
5356
|
+
const result = reasonResults[i];
|
|
5357
|
+
if (result.status === "fulfilled") {
|
|
5358
|
+
subAnswers.push(result.value);
|
|
5359
|
+
} else {
|
|
5360
|
+
await log?.(`Reasoner failed for sub-question "${classification.subQuestions[i].question}": ${result.reason}`);
|
|
5361
|
+
subAnswers.push({
|
|
5362
|
+
subQuestion: classification.subQuestions[i].question,
|
|
5363
|
+
answer: "Unable to answer this part of the question due to a processing error.",
|
|
5364
|
+
citations: [],
|
|
5365
|
+
confidence: 0,
|
|
5366
|
+
needsMoreContext: true
|
|
5367
|
+
});
|
|
5368
|
+
}
|
|
5369
|
+
}
|
|
5370
|
+
await pipelineCtx.save("reason", { classification, evidence: allEvidence, subAnswers });
|
|
5104
5371
|
onProgress?.("Verifying answer grounding...");
|
|
5105
5372
|
const verifierConfig = { generateObject, providerOptions };
|
|
5106
5373
|
for (let round = 0; round < maxVerifyRounds; round++) {
|
|
5107
|
-
const { result: verifyResult, usage } = await
|
|
5374
|
+
const { result: verifyResult, usage } = await safeVerify(
|
|
5108
5375
|
question,
|
|
5109
5376
|
subAnswers,
|
|
5110
5377
|
allEvidence,
|
|
@@ -5128,7 +5395,6 @@ function createQueryAgent(config) {
|
|
|
5128
5395
|
() => retrieve(sq, conversationId, {
|
|
5129
5396
|
...retrieverConfig,
|
|
5130
5397
|
retrievalLimit: retrievalLimit * 2
|
|
5131
|
-
// Broader retrieval on retry
|
|
5132
5398
|
})
|
|
5133
5399
|
)
|
|
5134
5400
|
)
|
|
@@ -5136,7 +5402,7 @@ function createQueryAgent(config) {
|
|
|
5136
5402
|
for (const r of retryRetrievals) {
|
|
5137
5403
|
allEvidence.push(...r.evidence);
|
|
5138
5404
|
}
|
|
5139
|
-
const
|
|
5405
|
+
const retrySettled = await Promise.allSettled(
|
|
5140
5406
|
retryQuestions.map(
|
|
5141
5407
|
(sq, i) => limit(async () => {
|
|
5142
5408
|
const { subAnswer, usage: u } = await reason(
|
|
@@ -5150,6 +5416,7 @@ function createQueryAgent(config) {
|
|
|
5150
5416
|
})
|
|
5151
5417
|
)
|
|
5152
5418
|
);
|
|
5419
|
+
const retrySubAnswers = retrySettled.filter((r) => r.status === "fulfilled").map((r) => r.value);
|
|
5153
5420
|
const retryQSet = new Set(retryQuestions.map((sq) => sq.question));
|
|
5154
5421
|
subAnswers = subAnswers.map((sa) => {
|
|
5155
5422
|
if (retryQSet.has(sa.subQuestion)) {
|
|
@@ -5202,17 +5469,42 @@ function createQueryAgent(config) {
|
|
|
5202
5469
|
}
|
|
5203
5470
|
}
|
|
5204
5471
|
const prompt = buildQueryClassifyPrompt(question, conversationContext);
|
|
5205
|
-
const { object, usage } = await
|
|
5206
|
-
|
|
5472
|
+
const { object, usage } = await safeGenerateObject(
|
|
5473
|
+
generateObject,
|
|
5474
|
+
{
|
|
5207
5475
|
prompt,
|
|
5208
5476
|
schema: QueryClassifyResultSchema,
|
|
5209
5477
|
maxTokens: 2048,
|
|
5210
5478
|
providerOptions
|
|
5211
|
-
}
|
|
5479
|
+
},
|
|
5480
|
+
{
|
|
5481
|
+
fallback: {
|
|
5482
|
+
intent: "general_knowledge",
|
|
5483
|
+
subQuestions: [
|
|
5484
|
+
{
|
|
5485
|
+
question,
|
|
5486
|
+
intent: "general_knowledge"
|
|
5487
|
+
}
|
|
5488
|
+
],
|
|
5489
|
+
requiresDocumentLookup: true,
|
|
5490
|
+
requiresChunkSearch: true,
|
|
5491
|
+
requiresConversationHistory: !!conversationId
|
|
5492
|
+
},
|
|
5493
|
+
log,
|
|
5494
|
+
onError: (err, attempt) => log?.(`Query classify attempt ${attempt + 1} failed: ${err}`)
|
|
5495
|
+
}
|
|
5212
5496
|
);
|
|
5213
5497
|
trackUsage(usage);
|
|
5214
5498
|
return object;
|
|
5215
5499
|
}
|
|
5500
|
+
async function safeVerify(originalQuestion, subAnswers, allEvidence, verifierConfig) {
|
|
5501
|
+
try {
|
|
5502
|
+
return await verify(originalQuestion, subAnswers, allEvidence, verifierConfig);
|
|
5503
|
+
} catch (error) {
|
|
5504
|
+
await log?.(`Verification failed, approving by default: ${error instanceof Error ? error.message : String(error)}`);
|
|
5505
|
+
return { result: { approved: true, issues: [] } };
|
|
5506
|
+
}
|
|
5507
|
+
}
|
|
5216
5508
|
async function respond(originalQuestion, subAnswers, classification, platform) {
|
|
5217
5509
|
const subAnswersJson = JSON.stringify(
|
|
5218
5510
|
subAnswers.map((sa) => ({
|
|
@@ -5226,13 +5518,25 @@ function createQueryAgent(config) {
|
|
|
5226
5518
|
2
|
|
5227
5519
|
);
|
|
5228
5520
|
const prompt = buildRespondPrompt(originalQuestion, subAnswersJson, platform);
|
|
5229
|
-
const { object, usage } = await
|
|
5230
|
-
|
|
5521
|
+
const { object, usage } = await safeGenerateObject(
|
|
5522
|
+
generateObject,
|
|
5523
|
+
{
|
|
5231
5524
|
prompt,
|
|
5232
5525
|
schema: QueryResultSchema,
|
|
5233
5526
|
maxTokens: 4096,
|
|
5234
5527
|
providerOptions
|
|
5235
|
-
}
|
|
5528
|
+
},
|
|
5529
|
+
{
|
|
5530
|
+
fallback: {
|
|
5531
|
+
answer: subAnswers.map((sa) => `**${sa.subQuestion}**
|
|
5532
|
+
${sa.answer}`).join("\n\n"),
|
|
5533
|
+
citations: subAnswers.flatMap((sa) => sa.citations),
|
|
5534
|
+
intent: classification.intent,
|
|
5535
|
+
confidence: Math.min(...subAnswers.map((sa) => sa.confidence), 1)
|
|
5536
|
+
},
|
|
5537
|
+
log,
|
|
5538
|
+
onError: (err, attempt) => log?.(`Respond attempt ${attempt + 1} failed: ${err}`)
|
|
5539
|
+
}
|
|
5236
5540
|
);
|
|
5237
5541
|
trackUsage(usage);
|
|
5238
5542
|
const result = object;
|
|
@@ -5398,6 +5702,7 @@ var AGENT_TOOLS = [
|
|
|
5398
5702
|
CommercialAutoDeclarationsSchema,
|
|
5399
5703
|
CommercialPropertyDeclarationsSchema,
|
|
5400
5704
|
CommunicationIntentSchema,
|
|
5705
|
+
ConditionKeyValueSchema,
|
|
5401
5706
|
ConditionTypeSchema,
|
|
5402
5707
|
ConstructionTypeSchema,
|
|
5403
5708
|
ContactSchema,
|
|
@@ -5564,6 +5869,7 @@ var AGENT_TOOLS = [
|
|
|
5564
5869
|
chunkDocument,
|
|
5565
5870
|
createApplicationPipeline,
|
|
5566
5871
|
createExtractor,
|
|
5872
|
+
createPipelineContext,
|
|
5567
5873
|
createQueryAgent,
|
|
5568
5874
|
extractPageRange,
|
|
5569
5875
|
fillAcroForm,
|
|
@@ -5573,6 +5879,7 @@ var AGENT_TOOLS = [
|
|
|
5573
5879
|
getTemplate,
|
|
5574
5880
|
overlayTextOnPdf,
|
|
5575
5881
|
pLimit,
|
|
5882
|
+
safeGenerateObject,
|
|
5576
5883
|
sanitizeNulls,
|
|
5577
5884
|
stripFences,
|
|
5578
5885
|
withRetry
|