@claritylabs/cl-sdk 0.6.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +372 -67
- package/dist/index.d.ts +372 -67
- package/dist/index.js +545 -224
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +542 -224
- package/dist/index.mjs.map +1 -1
- package/dist/storage-sqlite.d.mts +52 -10
- package/dist/storage-sqlite.d.ts +52 -10
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -54,6 +54,7 @@ __export(index_exports, {
|
|
|
54
54
|
CommercialAutoDeclarationsSchema: () => CommercialAutoDeclarationsSchema,
|
|
55
55
|
CommercialPropertyDeclarationsSchema: () => CommercialPropertyDeclarationsSchema,
|
|
56
56
|
CommunicationIntentSchema: () => CommunicationIntentSchema,
|
|
57
|
+
ConditionKeyValueSchema: () => ConditionKeyValueSchema,
|
|
57
58
|
ConditionTypeSchema: () => ConditionTypeSchema,
|
|
58
59
|
ConstructionTypeSchema: () => ConstructionTypeSchema,
|
|
59
60
|
ContactSchema: () => ContactSchema,
|
|
@@ -220,6 +221,7 @@ __export(index_exports, {
|
|
|
220
221
|
chunkDocument: () => chunkDocument,
|
|
221
222
|
createApplicationPipeline: () => createApplicationPipeline,
|
|
222
223
|
createExtractor: () => createExtractor,
|
|
224
|
+
createPipelineContext: () => createPipelineContext,
|
|
223
225
|
createQueryAgent: () => createQueryAgent,
|
|
224
226
|
extractPageRange: () => extractPageRange,
|
|
225
227
|
fillAcroForm: () => fillAcroForm,
|
|
@@ -229,6 +231,7 @@ __export(index_exports, {
|
|
|
229
231
|
getTemplate: () => getTemplate,
|
|
230
232
|
overlayTextOnPdf: () => overlayTextOnPdf,
|
|
231
233
|
pLimit: () => pLimit,
|
|
234
|
+
safeGenerateObject: () => safeGenerateObject,
|
|
232
235
|
sanitizeNulls: () => sanitizeNulls,
|
|
233
236
|
stripFences: () => stripFences,
|
|
234
237
|
withRetry: () => withRetry
|
|
@@ -308,6 +311,69 @@ function sanitizeNulls(obj) {
|
|
|
308
311
|
return obj;
|
|
309
312
|
}
|
|
310
313
|
|
|
314
|
+
// src/core/safe-generate.ts
|
|
315
|
+
async function safeGenerateObject(generateObject, params, options) {
|
|
316
|
+
const maxRetries = options?.maxRetries ?? 1;
|
|
317
|
+
let lastError;
|
|
318
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
319
|
+
try {
|
|
320
|
+
const result = await withRetry(
|
|
321
|
+
() => generateObject(params),
|
|
322
|
+
options?.log
|
|
323
|
+
);
|
|
324
|
+
return result;
|
|
325
|
+
} catch (error) {
|
|
326
|
+
lastError = error;
|
|
327
|
+
options?.onError?.(error, attempt);
|
|
328
|
+
await options?.log?.(
|
|
329
|
+
`safeGenerateObject attempt ${attempt + 1}/${maxRetries + 1} failed: ${error instanceof Error ? error.message : String(error)}`
|
|
330
|
+
);
|
|
331
|
+
if (attempt < maxRetries) {
|
|
332
|
+
await new Promise((resolve) => setTimeout(resolve, 1e3));
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
if (options?.fallback !== void 0) {
|
|
337
|
+
await options?.log?.(
|
|
338
|
+
`safeGenerateObject: all retries exhausted, returning fallback`
|
|
339
|
+
);
|
|
340
|
+
return { object: options.fallback };
|
|
341
|
+
}
|
|
342
|
+
throw lastError;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
// src/core/pipeline.ts
|
|
346
|
+
function createPipelineContext(opts) {
|
|
347
|
+
let latest = opts.resumeFrom;
|
|
348
|
+
const completedPhases = /* @__PURE__ */ new Set();
|
|
349
|
+
if (opts.resumeFrom) {
|
|
350
|
+
completedPhases.add(opts.resumeFrom.phase);
|
|
351
|
+
}
|
|
352
|
+
return {
|
|
353
|
+
id: opts.id,
|
|
354
|
+
async save(phase, state) {
|
|
355
|
+
const checkpoint = {
|
|
356
|
+
phase,
|
|
357
|
+
state,
|
|
358
|
+
timestamp: Date.now()
|
|
359
|
+
};
|
|
360
|
+
latest = checkpoint;
|
|
361
|
+
completedPhases.add(phase);
|
|
362
|
+
await opts.onSave?.(checkpoint);
|
|
363
|
+
},
|
|
364
|
+
getCheckpoint() {
|
|
365
|
+
return latest;
|
|
366
|
+
},
|
|
367
|
+
isPhaseComplete(phase) {
|
|
368
|
+
return completedPhases.has(phase);
|
|
369
|
+
},
|
|
370
|
+
clear() {
|
|
371
|
+
latest = void 0;
|
|
372
|
+
completedPhases.clear();
|
|
373
|
+
}
|
|
374
|
+
};
|
|
375
|
+
}
|
|
376
|
+
|
|
311
377
|
// src/schemas/enums.ts
|
|
312
378
|
var import_zod = require("zod");
|
|
313
379
|
var PolicyTypeSchema = import_zod.z.enum([
|
|
@@ -708,11 +774,15 @@ var ExclusionSchema = import_zod5.z.object({
|
|
|
708
774
|
|
|
709
775
|
// src/schemas/condition.ts
|
|
710
776
|
var import_zod6 = require("zod");
|
|
777
|
+
var ConditionKeyValueSchema = import_zod6.z.object({
|
|
778
|
+
key: import_zod6.z.string(),
|
|
779
|
+
value: import_zod6.z.string()
|
|
780
|
+
});
|
|
711
781
|
var PolicyConditionSchema = import_zod6.z.object({
|
|
712
782
|
name: import_zod6.z.string(),
|
|
713
783
|
conditionType: ConditionTypeSchema,
|
|
714
784
|
content: import_zod6.z.string(),
|
|
715
|
-
keyValues: import_zod6.z.
|
|
785
|
+
keyValues: import_zod6.z.array(ConditionKeyValueSchema).optional(),
|
|
716
786
|
pageNumber: import_zod6.z.number().optional()
|
|
717
787
|
});
|
|
718
788
|
|
|
@@ -1881,21 +1951,33 @@ async function formatDocumentContent(doc, generateText, options) {
|
|
|
1881
1951
|
for (let i = 0; i < entries.length; i += MAX_ENTRIES_PER_BATCH) {
|
|
1882
1952
|
batches.push(entries.slice(i, i + MAX_ENTRIES_PER_BATCH));
|
|
1883
1953
|
}
|
|
1884
|
-
for (
|
|
1885
|
-
const
|
|
1886
|
-
|
|
1887
|
-
() =>
|
|
1888
|
-
|
|
1889
|
-
|
|
1890
|
-
|
|
1891
|
-
|
|
1892
|
-
|
|
1893
|
-
|
|
1894
|
-
|
|
1895
|
-
|
|
1954
|
+
for (let batchIdx = 0; batchIdx < batches.length; batchIdx++) {
|
|
1955
|
+
const batch = batches[batchIdx];
|
|
1956
|
+
try {
|
|
1957
|
+
const prompt = buildFormatPrompt(batch.map((e) => ({ id: e.id, text: e.text })));
|
|
1958
|
+
const result = await withRetry(
|
|
1959
|
+
() => generateText({
|
|
1960
|
+
prompt,
|
|
1961
|
+
maxTokens: 16384,
|
|
1962
|
+
providerOptions: options?.providerOptions
|
|
1963
|
+
})
|
|
1964
|
+
);
|
|
1965
|
+
if (result.usage) {
|
|
1966
|
+
totalUsage.inputTokens += result.usage.inputTokens;
|
|
1967
|
+
totalUsage.outputTokens += result.usage.outputTokens;
|
|
1968
|
+
}
|
|
1969
|
+
const formatted = parseFormatResponse(result.text);
|
|
1970
|
+
if (formatted.size < batch.length) {
|
|
1971
|
+
await options?.log?.(
|
|
1972
|
+
`Format batch ${batchIdx + 1}/${batches.length}: model returned ${formatted.size}/${batch.length} entries \u2014 unformatted entries will keep original content`
|
|
1973
|
+
);
|
|
1974
|
+
}
|
|
1975
|
+
applyFormattedContent(doc, batch, formatted);
|
|
1976
|
+
} catch (error) {
|
|
1977
|
+
await options?.log?.(
|
|
1978
|
+
`Format batch ${batchIdx + 1}/${batches.length} failed, keeping original content: ${error instanceof Error ? error.message : String(error)}`
|
|
1979
|
+
);
|
|
1896
1980
|
}
|
|
1897
|
-
const formatted = parseFormatResponse(result.text);
|
|
1898
|
-
applyFormattedContent(doc, batch, formatted);
|
|
1899
1981
|
}
|
|
1900
1982
|
return { document: doc, usage: totalUsage };
|
|
1901
1983
|
}
|
|
@@ -2736,9 +2818,13 @@ var ExtractionTaskSchema = import_zod18.z.object({
|
|
|
2736
2818
|
endPage: import_zod18.z.number(),
|
|
2737
2819
|
description: import_zod18.z.string()
|
|
2738
2820
|
});
|
|
2821
|
+
var PageMapEntrySchema = import_zod18.z.object({
|
|
2822
|
+
section: import_zod18.z.string(),
|
|
2823
|
+
pages: import_zod18.z.string()
|
|
2824
|
+
});
|
|
2739
2825
|
var ExtractionPlanSchema = import_zod18.z.object({
|
|
2740
2826
|
tasks: import_zod18.z.array(ExtractionTaskSchema),
|
|
2741
|
-
pageMap: import_zod18.z.
|
|
2827
|
+
pageMap: import_zod18.z.array(PageMapEntrySchema).optional()
|
|
2742
2828
|
});
|
|
2743
2829
|
function buildPlanPrompt(templateHints) {
|
|
2744
2830
|
return `You are planning the extraction of an insurance document. You have already classified this document. Now scan the full document and create a page map + extraction plan.
|
|
@@ -2767,7 +2853,10 @@ Return JSON:
|
|
|
2767
2853
|
{ "extractorName": "carrier_info", "startPage": 1, "endPage": 2, "description": "Extract carrier details from declarations page" },
|
|
2768
2854
|
...
|
|
2769
2855
|
],
|
|
2770
|
-
"pageMap":
|
|
2856
|
+
"pageMap": [
|
|
2857
|
+
{ "section": "declarations", "pages": "pages 1-3" },
|
|
2858
|
+
{ "section": "endorsements", "pages": "pages 15-22" }
|
|
2859
|
+
]
|
|
2771
2860
|
}
|
|
2772
2861
|
|
|
2773
2862
|
Create tasks that cover the entire document. Prefer specific extractors over generic "sections" where possible. Keep page ranges tight \u2014 only include pages relevant to each extractor.
|
|
@@ -3084,9 +3173,14 @@ Return JSON only.`;
|
|
|
3084
3173
|
|
|
3085
3174
|
// src/prompts/extractors/declarations.ts
|
|
3086
3175
|
var import_zod27 = require("zod");
|
|
3087
|
-
var
|
|
3088
|
-
"
|
|
3089
|
-
)
|
|
3176
|
+
var DeclarationsFieldSchema = import_zod27.z.object({
|
|
3177
|
+
field: import_zod27.z.string().describe("Descriptive field name (e.g. 'policyNumber', 'effectiveDate', 'coverageALimit')"),
|
|
3178
|
+
value: import_zod27.z.string().describe("Extracted value exactly as it appears in the document"),
|
|
3179
|
+
section: import_zod27.z.string().optional().describe("Section or grouping this field belongs to (e.g. 'Coverage Limits', 'Vehicle Schedule')")
|
|
3180
|
+
});
|
|
3181
|
+
var DeclarationsExtractSchema = import_zod27.z.object({
|
|
3182
|
+
fields: import_zod27.z.array(DeclarationsFieldSchema).describe("All declarations page fields extracted as key-value pairs. Structure varies by line of business.")
|
|
3183
|
+
});
|
|
3090
3184
|
function buildDeclarationsPrompt() {
|
|
3091
3185
|
return `You are an expert insurance document analyst. Extract all declarations page data from this document into a flexible key-value structure.
|
|
3092
3186
|
|
|
@@ -3110,9 +3204,18 @@ For PERSONAL LINES declarations:
|
|
|
3110
3204
|
- Flood (NFIP): flood zone, community number, building/contents coverage
|
|
3111
3205
|
- Personal Articles: scheduled items list with appraised values
|
|
3112
3206
|
|
|
3113
|
-
|
|
3207
|
+
Return each field as an object with "field" (descriptive name), "value" (exact text from document), and optional "section" (grouping).
|
|
3114
3208
|
|
|
3115
|
-
|
|
3209
|
+
Example output:
|
|
3210
|
+
{
|
|
3211
|
+
"fields": [
|
|
3212
|
+
{ "field": "policyNumber", "value": "GL-2025-78432", "section": "Policy Info" },
|
|
3213
|
+
{ "field": "effectiveDate", "value": "04/10/2025", "section": "Policy Info" },
|
|
3214
|
+
{ "field": "eachOccurrenceLimit", "value": "$1,000,000", "section": "Coverage Limits" }
|
|
3215
|
+
]
|
|
3216
|
+
}
|
|
3217
|
+
|
|
3218
|
+
Preserve original values exactly as they appear. Return JSON only.`;
|
|
3116
3219
|
}
|
|
3117
3220
|
|
|
3118
3221
|
// src/prompts/extractors/loss-history.ts
|
|
@@ -3260,7 +3363,8 @@ function createExtractor(config) {
|
|
|
3260
3363
|
onTokenUsage,
|
|
3261
3364
|
onProgress,
|
|
3262
3365
|
log,
|
|
3263
|
-
providerOptions
|
|
3366
|
+
providerOptions,
|
|
3367
|
+
onCheckpointSave
|
|
3264
3368
|
} = config;
|
|
3265
3369
|
const limit = pLimit(concurrency);
|
|
3266
3370
|
let totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
@@ -3271,100 +3375,106 @@ function createExtractor(config) {
|
|
|
3271
3375
|
onTokenUsage?.(usage);
|
|
3272
3376
|
}
|
|
3273
3377
|
}
|
|
3274
|
-
async function extract(pdfBase64, documentId) {
|
|
3378
|
+
async function extract(pdfBase64, documentId, options) {
|
|
3275
3379
|
const id = documentId ?? `doc-${Date.now()}`;
|
|
3276
3380
|
const memory = /* @__PURE__ */ new Map();
|
|
3277
3381
|
totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
3278
|
-
|
|
3279
|
-
|
|
3280
|
-
|
|
3281
|
-
|
|
3282
|
-
|
|
3283
|
-
|
|
3284
|
-
|
|
3285
|
-
|
|
3286
|
-
|
|
3287
|
-
);
|
|
3288
|
-
trackUsage(classifyResult.usage);
|
|
3289
|
-
memory.set("classify", classifyResult.object);
|
|
3290
|
-
const { documentType, policyTypes } = classifyResult.object;
|
|
3291
|
-
const primaryType = policyTypes[0] ?? "other";
|
|
3292
|
-
const template = getTemplate(primaryType);
|
|
3293
|
-
onProgress?.(`Planning extraction for ${primaryType} ${documentType}...`);
|
|
3294
|
-
const templateHints = [
|
|
3295
|
-
`Document type: ${primaryType} ${documentType}`,
|
|
3296
|
-
`Expected sections: ${template.expectedSections.join(", ")}`,
|
|
3297
|
-
`Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
|
|
3298
|
-
`Total pages: ${pageCount}`
|
|
3299
|
-
].join("\n");
|
|
3300
|
-
const planResult = await withRetry(
|
|
3301
|
-
() => generateObject({
|
|
3302
|
-
prompt: buildPlanPrompt(templateHints),
|
|
3303
|
-
schema: ExtractionPlanSchema,
|
|
3304
|
-
maxTokens: 2048,
|
|
3305
|
-
providerOptions
|
|
3306
|
-
})
|
|
3307
|
-
);
|
|
3308
|
-
trackUsage(planResult.usage);
|
|
3309
|
-
const tasks = planResult.object.tasks;
|
|
3310
|
-
onProgress?.(`Dispatching ${tasks.length} extractors...`);
|
|
3311
|
-
const extractorResults = await Promise.all(
|
|
3312
|
-
tasks.map(
|
|
3313
|
-
(task) => limit(async () => {
|
|
3314
|
-
const ext = getExtractor(task.extractorName);
|
|
3315
|
-
if (!ext) {
|
|
3316
|
-
await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
|
|
3317
|
-
return null;
|
|
3318
|
-
}
|
|
3319
|
-
onProgress?.(`Extracting ${task.extractorName} (pages ${task.startPage}-${task.endPage})...`);
|
|
3320
|
-
try {
|
|
3321
|
-
const result = await runExtractor({
|
|
3322
|
-
name: task.extractorName,
|
|
3323
|
-
prompt: ext.buildPrompt(),
|
|
3324
|
-
schema: ext.schema,
|
|
3325
|
-
pdfBase64,
|
|
3326
|
-
startPage: task.startPage,
|
|
3327
|
-
endPage: task.endPage,
|
|
3328
|
-
generateObject,
|
|
3329
|
-
convertPdfToImages,
|
|
3330
|
-
maxTokens: ext.maxTokens ?? 4096,
|
|
3331
|
-
providerOptions
|
|
3332
|
-
});
|
|
3333
|
-
trackUsage(result.usage);
|
|
3334
|
-
return result;
|
|
3335
|
-
} catch (error) {
|
|
3336
|
-
await log?.(`Extractor ${task.extractorName} failed: ${error}`);
|
|
3337
|
-
return null;
|
|
3338
|
-
}
|
|
3339
|
-
})
|
|
3340
|
-
)
|
|
3341
|
-
);
|
|
3342
|
-
for (const result of extractorResults) {
|
|
3343
|
-
if (result) {
|
|
3344
|
-
memory.set(result.name, result.data);
|
|
3382
|
+
const pipelineCtx = createPipelineContext({
|
|
3383
|
+
id,
|
|
3384
|
+
onSave: onCheckpointSave,
|
|
3385
|
+
resumeFrom: options?.resumeFrom
|
|
3386
|
+
});
|
|
3387
|
+
const resumed = pipelineCtx.getCheckpoint()?.state;
|
|
3388
|
+
if (resumed?.memory) {
|
|
3389
|
+
for (const [k, v] of Object.entries(resumed.memory)) {
|
|
3390
|
+
memory.set(k, v);
|
|
3345
3391
|
}
|
|
3346
3392
|
}
|
|
3347
|
-
|
|
3348
|
-
|
|
3349
|
-
|
|
3350
|
-
|
|
3351
|
-
|
|
3352
|
-
|
|
3353
|
-
|
|
3393
|
+
let classifyResult;
|
|
3394
|
+
if (resumed?.classifyResult && pipelineCtx.isPhaseComplete("classify")) {
|
|
3395
|
+
classifyResult = resumed.classifyResult;
|
|
3396
|
+
onProgress?.("Resuming from checkpoint (classify complete)...");
|
|
3397
|
+
} else {
|
|
3398
|
+
onProgress?.("Classifying document...");
|
|
3399
|
+
const pageCount2 = await getPdfPageCount(pdfBase64);
|
|
3400
|
+
const classifyResponse = await safeGenerateObject(
|
|
3401
|
+
generateObject,
|
|
3402
|
+
{
|
|
3403
|
+
prompt: buildClassifyPrompt(),
|
|
3404
|
+
schema: ClassifyResultSchema,
|
|
3405
|
+
maxTokens: 512,
|
|
3354
3406
|
providerOptions
|
|
3355
|
-
}
|
|
3407
|
+
},
|
|
3408
|
+
{
|
|
3409
|
+
fallback: { documentType: "policy", policyTypes: ["other"], confidence: 0 },
|
|
3410
|
+
log,
|
|
3411
|
+
onError: (err, attempt) => log?.(`Classify attempt ${attempt + 1} failed: ${err}`)
|
|
3412
|
+
}
|
|
3356
3413
|
);
|
|
3357
|
-
trackUsage(
|
|
3358
|
-
|
|
3359
|
-
|
|
3360
|
-
|
|
3361
|
-
|
|
3362
|
-
|
|
3363
|
-
|
|
3364
|
-
|
|
3414
|
+
trackUsage(classifyResponse.usage);
|
|
3415
|
+
classifyResult = classifyResponse.object;
|
|
3416
|
+
memory.set("classify", classifyResult);
|
|
3417
|
+
await pipelineCtx.save("classify", {
|
|
3418
|
+
id,
|
|
3419
|
+
pageCount: pageCount2,
|
|
3420
|
+
classifyResult,
|
|
3421
|
+
memory: Object.fromEntries(memory)
|
|
3422
|
+
});
|
|
3423
|
+
}
|
|
3424
|
+
const { documentType, policyTypes } = classifyResult;
|
|
3425
|
+
const primaryType = policyTypes[0] ?? "other";
|
|
3426
|
+
const template = getTemplate(primaryType);
|
|
3427
|
+
const pageCount = resumed?.pageCount ?? await getPdfPageCount(pdfBase64);
|
|
3428
|
+
let plan;
|
|
3429
|
+
if (resumed?.plan && pipelineCtx.isPhaseComplete("plan")) {
|
|
3430
|
+
plan = resumed.plan;
|
|
3431
|
+
onProgress?.("Resuming from checkpoint (plan complete)...");
|
|
3432
|
+
} else {
|
|
3433
|
+
onProgress?.(`Planning extraction for ${primaryType} ${documentType}...`);
|
|
3434
|
+
const templateHints = [
|
|
3435
|
+
`Document type: ${primaryType} ${documentType}`,
|
|
3436
|
+
`Expected sections: ${template.expectedSections.join(", ")}`,
|
|
3437
|
+
`Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
|
|
3438
|
+
`Total pages: ${pageCount}`
|
|
3439
|
+
].join("\n");
|
|
3440
|
+
const planResponse = await safeGenerateObject(
|
|
3441
|
+
generateObject,
|
|
3442
|
+
{
|
|
3443
|
+
prompt: buildPlanPrompt(templateHints),
|
|
3444
|
+
schema: ExtractionPlanSchema,
|
|
3445
|
+
maxTokens: 2048,
|
|
3446
|
+
providerOptions
|
|
3447
|
+
},
|
|
3448
|
+
{
|
|
3449
|
+
fallback: {
|
|
3450
|
+
tasks: [{ extractorName: "sections", startPage: 1, endPage: pageCount, description: "Full document fallback extraction" }]
|
|
3451
|
+
},
|
|
3452
|
+
log,
|
|
3453
|
+
onError: (err, attempt) => log?.(`Plan attempt ${attempt + 1} failed: ${err}`)
|
|
3454
|
+
}
|
|
3455
|
+
);
|
|
3456
|
+
trackUsage(planResponse.usage);
|
|
3457
|
+
plan = planResponse.object;
|
|
3458
|
+
await pipelineCtx.save("plan", {
|
|
3459
|
+
id,
|
|
3460
|
+
pageCount,
|
|
3461
|
+
classifyResult,
|
|
3462
|
+
plan,
|
|
3463
|
+
memory: Object.fromEntries(memory)
|
|
3464
|
+
});
|
|
3465
|
+
}
|
|
3466
|
+
if (!pipelineCtx.isPhaseComplete("extract")) {
|
|
3467
|
+
const tasks = plan.tasks;
|
|
3468
|
+
onProgress?.(`Dispatching ${tasks.length} extractors...`);
|
|
3469
|
+
const extractorResults = await Promise.all(
|
|
3470
|
+
tasks.map(
|
|
3365
3471
|
(task) => limit(async () => {
|
|
3366
3472
|
const ext = getExtractor(task.extractorName);
|
|
3367
|
-
if (!ext)
|
|
3473
|
+
if (!ext) {
|
|
3474
|
+
await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
|
|
3475
|
+
return null;
|
|
3476
|
+
}
|
|
3477
|
+
onProgress?.(`Extracting ${task.extractorName} (pages ${task.startPage}-${task.endPage})...`);
|
|
3368
3478
|
try {
|
|
3369
3479
|
const result = await runExtractor({
|
|
3370
3480
|
name: task.extractorName,
|
|
@@ -3381,28 +3491,114 @@ function createExtractor(config) {
|
|
|
3381
3491
|
trackUsage(result.usage);
|
|
3382
3492
|
return result;
|
|
3383
3493
|
} catch (error) {
|
|
3384
|
-
await log?.(`
|
|
3494
|
+
await log?.(`Extractor ${task.extractorName} failed: ${error}`);
|
|
3385
3495
|
return null;
|
|
3386
3496
|
}
|
|
3387
3497
|
})
|
|
3388
3498
|
)
|
|
3389
3499
|
);
|
|
3390
|
-
for (const result of
|
|
3500
|
+
for (const result of extractorResults) {
|
|
3391
3501
|
if (result) {
|
|
3392
3502
|
memory.set(result.name, result.data);
|
|
3393
3503
|
}
|
|
3394
3504
|
}
|
|
3505
|
+
await pipelineCtx.save("extract", {
|
|
3506
|
+
id,
|
|
3507
|
+
pageCount,
|
|
3508
|
+
classifyResult,
|
|
3509
|
+
plan,
|
|
3510
|
+
memory: Object.fromEntries(memory)
|
|
3511
|
+
});
|
|
3512
|
+
}
|
|
3513
|
+
if (!pipelineCtx.isPhaseComplete("review")) {
|
|
3514
|
+
for (let round = 0; round < maxReviewRounds; round++) {
|
|
3515
|
+
const extractedKeys = [...memory.keys()].filter((k) => k !== "classify");
|
|
3516
|
+
const reviewResponse = await safeGenerateObject(
|
|
3517
|
+
generateObject,
|
|
3518
|
+
{
|
|
3519
|
+
prompt: buildReviewPrompt(template.required, extractedKeys),
|
|
3520
|
+
schema: ReviewResultSchema,
|
|
3521
|
+
maxTokens: 1024,
|
|
3522
|
+
providerOptions
|
|
3523
|
+
},
|
|
3524
|
+
{
|
|
3525
|
+
fallback: { complete: true, missingFields: [], additionalTasks: [] },
|
|
3526
|
+
log,
|
|
3527
|
+
onError: (err, attempt) => log?.(`Review round ${round + 1} attempt ${attempt + 1} failed: ${err}`)
|
|
3528
|
+
}
|
|
3529
|
+
);
|
|
3530
|
+
trackUsage(reviewResponse.usage);
|
|
3531
|
+
if (reviewResponse.object.complete || reviewResponse.object.additionalTasks.length === 0) {
|
|
3532
|
+
onProgress?.("Extraction complete.");
|
|
3533
|
+
break;
|
|
3534
|
+
}
|
|
3535
|
+
onProgress?.(`Review round ${round + 1}: dispatching ${reviewResponse.object.additionalTasks.length} follow-up extractors...`);
|
|
3536
|
+
const followUpResults = await Promise.all(
|
|
3537
|
+
reviewResponse.object.additionalTasks.map(
|
|
3538
|
+
(task) => limit(async () => {
|
|
3539
|
+
const ext = getExtractor(task.extractorName);
|
|
3540
|
+
if (!ext) return null;
|
|
3541
|
+
try {
|
|
3542
|
+
const result = await runExtractor({
|
|
3543
|
+
name: task.extractorName,
|
|
3544
|
+
prompt: ext.buildPrompt(),
|
|
3545
|
+
schema: ext.schema,
|
|
3546
|
+
pdfBase64,
|
|
3547
|
+
startPage: task.startPage,
|
|
3548
|
+
endPage: task.endPage,
|
|
3549
|
+
generateObject,
|
|
3550
|
+
convertPdfToImages,
|
|
3551
|
+
maxTokens: ext.maxTokens ?? 4096,
|
|
3552
|
+
providerOptions
|
|
3553
|
+
});
|
|
3554
|
+
trackUsage(result.usage);
|
|
3555
|
+
return result;
|
|
3556
|
+
} catch (error) {
|
|
3557
|
+
await log?.(`Follow-up extractor ${task.extractorName} failed: ${error}`);
|
|
3558
|
+
return null;
|
|
3559
|
+
}
|
|
3560
|
+
})
|
|
3561
|
+
)
|
|
3562
|
+
);
|
|
3563
|
+
for (const result of followUpResults) {
|
|
3564
|
+
if (result) {
|
|
3565
|
+
memory.set(result.name, result.data);
|
|
3566
|
+
}
|
|
3567
|
+
}
|
|
3568
|
+
}
|
|
3569
|
+
await pipelineCtx.save("review", {
|
|
3570
|
+
id,
|
|
3571
|
+
pageCount,
|
|
3572
|
+
classifyResult,
|
|
3573
|
+
plan,
|
|
3574
|
+
memory: Object.fromEntries(memory)
|
|
3575
|
+
});
|
|
3395
3576
|
}
|
|
3396
3577
|
onProgress?.("Assembling document...");
|
|
3397
3578
|
const document = assembleDocument(id, documentType, memory);
|
|
3579
|
+
await pipelineCtx.save("assemble", {
|
|
3580
|
+
id,
|
|
3581
|
+
pageCount,
|
|
3582
|
+
classifyResult,
|
|
3583
|
+
plan,
|
|
3584
|
+
memory: Object.fromEntries(memory),
|
|
3585
|
+
document
|
|
3586
|
+
});
|
|
3398
3587
|
onProgress?.("Formatting extracted content...");
|
|
3399
3588
|
const formatResult = await formatDocumentContent(document, generateText, {
|
|
3400
3589
|
providerOptions,
|
|
3401
|
-
onProgress
|
|
3590
|
+
onProgress,
|
|
3591
|
+
log
|
|
3402
3592
|
});
|
|
3403
3593
|
trackUsage(formatResult.usage);
|
|
3404
3594
|
const chunks = chunkDocument(formatResult.document);
|
|
3405
|
-
|
|
3595
|
+
const finalCheckpoint = pipelineCtx.getCheckpoint();
|
|
3596
|
+
return {
|
|
3597
|
+
document: formatResult.document,
|
|
3598
|
+
chunks,
|
|
3599
|
+
tokenUsage: totalUsage,
|
|
3600
|
+
checkpoint: finalCheckpoint
|
|
3601
|
+
};
|
|
3406
3602
|
}
|
|
3407
3603
|
return { extract };
|
|
3408
3604
|
}
|
|
@@ -4265,7 +4461,6 @@ function createApplicationPipeline(config) {
|
|
|
4265
4461
|
let state = {
|
|
4266
4462
|
id,
|
|
4267
4463
|
pdfBase64: void 0,
|
|
4268
|
-
// Don't persist the full PDF in state
|
|
4269
4464
|
title: void 0,
|
|
4270
4465
|
applicationType: null,
|
|
4271
4466
|
fields: [],
|
|
@@ -4276,13 +4471,20 @@ function createApplicationPipeline(config) {
|
|
|
4276
4471
|
updatedAt: now
|
|
4277
4472
|
};
|
|
4278
4473
|
onProgress?.("Classifying document...");
|
|
4279
|
-
|
|
4280
|
-
|
|
4281
|
-
|
|
4282
|
-
|
|
4283
|
-
|
|
4284
|
-
|
|
4285
|
-
|
|
4474
|
+
await applicationStore?.save(state);
|
|
4475
|
+
let classifyResult;
|
|
4476
|
+
try {
|
|
4477
|
+
const { result, usage: classifyUsage } = await classifyApplication(
|
|
4478
|
+
pdfBase64.slice(0, 2e3),
|
|
4479
|
+
generateObject,
|
|
4480
|
+
providerOptions
|
|
4481
|
+
);
|
|
4482
|
+
trackUsage(classifyUsage);
|
|
4483
|
+
classifyResult = result;
|
|
4484
|
+
} catch (error) {
|
|
4485
|
+
await log?.(`Classification failed, treating as non-application: ${error instanceof Error ? error.message : String(error)}`);
|
|
4486
|
+
classifyResult = { isApplication: false, confidence: 0, applicationType: null };
|
|
4487
|
+
}
|
|
4286
4488
|
if (!classifyResult.isApplication) {
|
|
4287
4489
|
state.status = "complete";
|
|
4288
4490
|
state.updatedAt = Date.now();
|
|
@@ -4292,13 +4494,28 @@ function createApplicationPipeline(config) {
|
|
|
4292
4494
|
state.applicationType = classifyResult.applicationType;
|
|
4293
4495
|
state.status = "extracting";
|
|
4294
4496
|
state.updatedAt = Date.now();
|
|
4497
|
+
await applicationStore?.save(state);
|
|
4295
4498
|
onProgress?.("Extracting form fields...");
|
|
4296
|
-
|
|
4297
|
-
|
|
4298
|
-
|
|
4299
|
-
|
|
4300
|
-
|
|
4301
|
-
|
|
4499
|
+
let fields;
|
|
4500
|
+
try {
|
|
4501
|
+
const { fields: extractedFields, usage: extractUsage } = await extractFields(
|
|
4502
|
+
pdfBase64,
|
|
4503
|
+
generateObject,
|
|
4504
|
+
providerOptions
|
|
4505
|
+
);
|
|
4506
|
+
trackUsage(extractUsage);
|
|
4507
|
+
fields = extractedFields;
|
|
4508
|
+
} catch (error) {
|
|
4509
|
+
await log?.(`Field extraction failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
4510
|
+
fields = [];
|
|
4511
|
+
}
|
|
4512
|
+
if (fields.length === 0) {
|
|
4513
|
+
await log?.("No fields extracted, completing pipeline with empty result");
|
|
4514
|
+
state.status = "complete";
|
|
4515
|
+
state.updatedAt = Date.now();
|
|
4516
|
+
await applicationStore?.save(state);
|
|
4517
|
+
return { state, tokenUsage: totalUsage };
|
|
4518
|
+
}
|
|
4302
4519
|
state.fields = fields;
|
|
4303
4520
|
state.title = classifyResult.applicationType ?? void 0;
|
|
4304
4521
|
state.status = "auto_filling";
|
|
@@ -4330,20 +4547,24 @@ function createApplicationPipeline(config) {
|
|
|
4330
4547
|
limit(async () => {
|
|
4331
4548
|
const unfilledFields2 = state.fields.filter((f) => !f.value);
|
|
4332
4549
|
if (unfilledFields2.length === 0) return;
|
|
4333
|
-
|
|
4334
|
-
|
|
4335
|
-
|
|
4336
|
-
|
|
4337
|
-
|
|
4338
|
-
|
|
4339
|
-
|
|
4340
|
-
|
|
4341
|
-
const
|
|
4342
|
-
|
|
4343
|
-
field
|
|
4344
|
-
|
|
4345
|
-
|
|
4550
|
+
try {
|
|
4551
|
+
const { result: autoFillResult, usage: afUsage } = await autoFillFromContext(
|
|
4552
|
+
unfilledFields2,
|
|
4553
|
+
orgContext,
|
|
4554
|
+
generateObject,
|
|
4555
|
+
providerOptions
|
|
4556
|
+
);
|
|
4557
|
+
trackUsage(afUsage);
|
|
4558
|
+
for (const match of autoFillResult.matches) {
|
|
4559
|
+
const field = state.fields.find((f) => f.id === match.fieldId);
|
|
4560
|
+
if (field && !field.value) {
|
|
4561
|
+
field.value = match.value;
|
|
4562
|
+
field.source = `auto-fill: ${match.contextKey}`;
|
|
4563
|
+
field.confidence = match.confidence;
|
|
4564
|
+
}
|
|
4346
4565
|
}
|
|
4566
|
+
} catch (e) {
|
|
4567
|
+
await log?.(`Auto-fill from context failed: ${e instanceof Error ? e.message : String(e)}`);
|
|
4347
4568
|
}
|
|
4348
4569
|
})
|
|
4349
4570
|
);
|
|
@@ -4376,13 +4597,18 @@ function createApplicationPipeline(config) {
|
|
|
4376
4597
|
if (unfilledFields.length > 0) {
|
|
4377
4598
|
onProgress?.(`Batching ${unfilledFields.length} remaining questions...`);
|
|
4378
4599
|
state.status = "batching";
|
|
4379
|
-
|
|
4380
|
-
|
|
4381
|
-
|
|
4382
|
-
|
|
4383
|
-
|
|
4384
|
-
|
|
4385
|
-
|
|
4600
|
+
try {
|
|
4601
|
+
const { result: batchResult, usage: batchUsage } = await batchQuestions(
|
|
4602
|
+
unfilledFields,
|
|
4603
|
+
generateObject,
|
|
4604
|
+
providerOptions
|
|
4605
|
+
);
|
|
4606
|
+
trackUsage(batchUsage);
|
|
4607
|
+
state.batches = batchResult.batches;
|
|
4608
|
+
} catch (error) {
|
|
4609
|
+
await log?.(`Batching failed, using single-batch fallback: ${error instanceof Error ? error.message : String(error)}`);
|
|
4610
|
+
state.batches = [unfilledFields.map((f) => f.id)];
|
|
4611
|
+
}
|
|
4386
4612
|
state.currentBatchIndex = 0;
|
|
4387
4613
|
state.status = "collecting";
|
|
4388
4614
|
} else {
|
|
@@ -4409,32 +4635,49 @@ function createApplicationPipeline(config) {
|
|
|
4409
4635
|
(f) => currentBatchFieldIds.includes(f.id)
|
|
4410
4636
|
);
|
|
4411
4637
|
onProgress?.("Classifying reply...");
|
|
4412
|
-
|
|
4413
|
-
|
|
4414
|
-
|
|
4415
|
-
generateObject,
|
|
4416
|
-
providerOptions
|
|
4417
|
-
);
|
|
4418
|
-
trackUsage(intentUsage);
|
|
4419
|
-
let fieldsFilled = 0;
|
|
4420
|
-
let responseText;
|
|
4421
|
-
if (intent.hasAnswers) {
|
|
4422
|
-
onProgress?.("Parsing answers...");
|
|
4423
|
-
const { result: parseResult, usage: parseUsage } = await parseAnswers(
|
|
4638
|
+
let intent;
|
|
4639
|
+
try {
|
|
4640
|
+
const { intent: classifiedIntent, usage: intentUsage } = await classifyReplyIntent(
|
|
4424
4641
|
currentBatchFields,
|
|
4425
4642
|
replyText,
|
|
4426
4643
|
generateObject,
|
|
4427
4644
|
providerOptions
|
|
4428
4645
|
);
|
|
4429
|
-
trackUsage(
|
|
4430
|
-
|
|
4431
|
-
|
|
4432
|
-
|
|
4433
|
-
|
|
4434
|
-
|
|
4435
|
-
|
|
4436
|
-
|
|
4646
|
+
trackUsage(intentUsage);
|
|
4647
|
+
intent = classifiedIntent;
|
|
4648
|
+
} catch (error) {
|
|
4649
|
+
await log?.(`Reply intent classification failed, defaulting to answers_only: ${error instanceof Error ? error.message : String(error)}`);
|
|
4650
|
+
intent = {
|
|
4651
|
+
primaryIntent: "answers_only",
|
|
4652
|
+
hasAnswers: true,
|
|
4653
|
+
questionText: void 0,
|
|
4654
|
+
questionFieldIds: void 0,
|
|
4655
|
+
lookupRequests: void 0
|
|
4656
|
+
};
|
|
4657
|
+
}
|
|
4658
|
+
let fieldsFilled = 0;
|
|
4659
|
+
let responseText;
|
|
4660
|
+
if (intent.hasAnswers) {
|
|
4661
|
+
onProgress?.("Parsing answers...");
|
|
4662
|
+
try {
|
|
4663
|
+
const { result: parseResult, usage: parseUsage } = await parseAnswers(
|
|
4664
|
+
currentBatchFields,
|
|
4665
|
+
replyText,
|
|
4666
|
+
generateObject,
|
|
4667
|
+
providerOptions
|
|
4668
|
+
);
|
|
4669
|
+
trackUsage(parseUsage);
|
|
4670
|
+
for (const answer of parseResult.answers) {
|
|
4671
|
+
const field = state.fields.find((f) => f.id === answer.fieldId);
|
|
4672
|
+
if (field) {
|
|
4673
|
+
field.value = answer.value;
|
|
4674
|
+
field.source = "user";
|
|
4675
|
+
field.confidence = "confirmed";
|
|
4676
|
+
fieldsFilled++;
|
|
4677
|
+
}
|
|
4437
4678
|
}
|
|
4679
|
+
} catch (error) {
|
|
4680
|
+
await log?.(`Answer parsing failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
4438
4681
|
}
|
|
4439
4682
|
}
|
|
4440
4683
|
if (intent.lookupRequests?.length) {
|
|
@@ -4455,36 +4698,45 @@ function createApplicationPipeline(config) {
|
|
|
4455
4698
|
const targetFields = state.fields.filter(
|
|
4456
4699
|
(f) => intent.lookupRequests.some((lr) => lr.targetFieldIds.includes(f.id))
|
|
4457
4700
|
);
|
|
4458
|
-
|
|
4459
|
-
|
|
4460
|
-
|
|
4461
|
-
|
|
4462
|
-
|
|
4463
|
-
|
|
4464
|
-
|
|
4465
|
-
|
|
4466
|
-
|
|
4467
|
-
const
|
|
4468
|
-
|
|
4469
|
-
field
|
|
4470
|
-
|
|
4471
|
-
|
|
4472
|
-
|
|
4701
|
+
try {
|
|
4702
|
+
const { result: lookupResult, usage: lookupUsage } = await fillFromLookup(
|
|
4703
|
+
intent.lookupRequests,
|
|
4704
|
+
targetFields,
|
|
4705
|
+
availableData,
|
|
4706
|
+
generateObject,
|
|
4707
|
+
providerOptions
|
|
4708
|
+
);
|
|
4709
|
+
trackUsage(lookupUsage);
|
|
4710
|
+
for (const fill of lookupResult.fills) {
|
|
4711
|
+
const field = state.fields.find((f) => f.id === fill.fieldId);
|
|
4712
|
+
if (field) {
|
|
4713
|
+
field.value = fill.value;
|
|
4714
|
+
field.source = `lookup: ${fill.source}`;
|
|
4715
|
+
field.confidence = "high";
|
|
4716
|
+
fieldsFilled++;
|
|
4717
|
+
}
|
|
4473
4718
|
}
|
|
4719
|
+
} catch (error) {
|
|
4720
|
+
await log?.(`Lookup fill failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
4474
4721
|
}
|
|
4475
4722
|
}
|
|
4476
4723
|
}
|
|
4477
4724
|
if (intent.primaryIntent === "question" || intent.primaryIntent === "mixed") {
|
|
4478
4725
|
if (intent.questionText) {
|
|
4479
|
-
|
|
4480
|
-
|
|
4726
|
+
try {
|
|
4727
|
+
const { text, usage } = await generateText({
|
|
4728
|
+
prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
|
|
4481
4729
|
|
|
4482
4730
|
Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with the answer when you're ready and I'll fill it in."`,
|
|
4483
|
-
|
|
4484
|
-
|
|
4485
|
-
|
|
4486
|
-
|
|
4487
|
-
|
|
4731
|
+
maxTokens: 512,
|
|
4732
|
+
providerOptions
|
|
4733
|
+
});
|
|
4734
|
+
trackUsage(usage);
|
|
4735
|
+
responseText = text;
|
|
4736
|
+
} catch (error) {
|
|
4737
|
+
await log?.(`Question response generation failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
4738
|
+
responseText = `I wasn't able to generate an explanation for your question. Could you rephrase it, or just provide the answer directly?`;
|
|
4739
|
+
}
|
|
4488
4740
|
}
|
|
4489
4741
|
}
|
|
4490
4742
|
const currentBatchComplete = currentBatchFieldIds.every(
|
|
@@ -4498,26 +4750,30 @@ Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with
|
|
|
4498
4750
|
(f) => nextBatchFieldIds.includes(f.id)
|
|
4499
4751
|
);
|
|
4500
4752
|
const filledCount = state.fields.filter((f) => f.value).length;
|
|
4501
|
-
|
|
4502
|
-
|
|
4503
|
-
|
|
4504
|
-
|
|
4505
|
-
|
|
4506
|
-
|
|
4507
|
-
|
|
4508
|
-
|
|
4509
|
-
|
|
4510
|
-
|
|
4511
|
-
|
|
4512
|
-
|
|
4513
|
-
|
|
4514
|
-
|
|
4515
|
-
|
|
4516
|
-
responseText
|
|
4517
|
-
|
|
4518
|
-
|
|
4753
|
+
try {
|
|
4754
|
+
const { text: emailText, usage: emailUsage } = await generateBatchEmail(
|
|
4755
|
+
nextBatchFields,
|
|
4756
|
+
state.currentBatchIndex,
|
|
4757
|
+
state.batches.length,
|
|
4758
|
+
{
|
|
4759
|
+
appTitle: state.title,
|
|
4760
|
+
totalFieldCount: state.fields.length,
|
|
4761
|
+
filledFieldCount: filledCount,
|
|
4762
|
+
companyName: context?.companyName
|
|
4763
|
+
},
|
|
4764
|
+
generateText,
|
|
4765
|
+
providerOptions
|
|
4766
|
+
);
|
|
4767
|
+
trackUsage(emailUsage);
|
|
4768
|
+
if (!responseText) {
|
|
4769
|
+
responseText = emailText;
|
|
4770
|
+
} else {
|
|
4771
|
+
responseText += `
|
|
4519
4772
|
|
|
4520
4773
|
${emailText}`;
|
|
4774
|
+
}
|
|
4775
|
+
} catch (error) {
|
|
4776
|
+
await log?.(`Batch email generation failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
4521
4777
|
}
|
|
4522
4778
|
} else {
|
|
4523
4779
|
state.status = "confirming";
|
|
@@ -4726,7 +4982,7 @@ var EvidenceItemSchema = import_zod32.z.object({
|
|
|
4726
4982
|
turnId: import_zod32.z.string().optional(),
|
|
4727
4983
|
text: import_zod32.z.string().describe("Text excerpt from the source"),
|
|
4728
4984
|
relevance: import_zod32.z.number().min(0).max(1),
|
|
4729
|
-
metadata: import_zod32.z.
|
|
4985
|
+
metadata: import_zod32.z.array(import_zod32.z.object({ key: import_zod32.z.string(), value: import_zod32.z.string() })).optional()
|
|
4730
4986
|
});
|
|
4731
4987
|
var RetrievalResultSchema = import_zod32.z.object({
|
|
4732
4988
|
subQuestion: import_zod32.z.string(),
|
|
@@ -4762,6 +5018,9 @@ var QueryResultSchema = import_zod32.z.object({
|
|
|
4762
5018
|
});
|
|
4763
5019
|
|
|
4764
5020
|
// src/query/retriever.ts
|
|
5021
|
+
function recordToKVArray(record) {
|
|
5022
|
+
return Object.entries(record).map(([key, value]) => ({ key, value }));
|
|
5023
|
+
}
|
|
4765
5024
|
async function retrieve(subQuestion, conversationId, config) {
|
|
4766
5025
|
const { documentStore, memoryStore, retrievalLimit, log } = config;
|
|
4767
5026
|
const evidence = [];
|
|
@@ -4788,7 +5047,7 @@ async function retrieve(subQuestion, conversationId, config) {
|
|
|
4788
5047
|
text: chunk.text,
|
|
4789
5048
|
relevance: 0.8,
|
|
4790
5049
|
// Default — store doesn't expose scores directly
|
|
4791
|
-
metadata: chunk.metadata
|
|
5050
|
+
metadata: recordToKVArray(chunk.metadata)
|
|
4792
5051
|
});
|
|
4793
5052
|
}
|
|
4794
5053
|
}
|
|
@@ -4803,7 +5062,7 @@ async function retrieve(subQuestion, conversationId, config) {
|
|
|
4803
5062
|
documentId: chunk.documentId,
|
|
4804
5063
|
text: chunk.text,
|
|
4805
5064
|
relevance: 0.8,
|
|
4806
|
-
metadata: chunk.metadata
|
|
5065
|
+
metadata: recordToKVArray(chunk.metadata)
|
|
4807
5066
|
});
|
|
4808
5067
|
}
|
|
4809
5068
|
}
|
|
@@ -4831,11 +5090,11 @@ async function retrieve(subQuestion, conversationId, config) {
|
|
|
4831
5090
|
text: summary,
|
|
4832
5091
|
relevance: 0.9,
|
|
4833
5092
|
// Direct lookup is high relevance
|
|
4834
|
-
metadata:
|
|
4835
|
-
type: doc.type,
|
|
4836
|
-
carrier: doc.carrier ?? "",
|
|
4837
|
-
insuredName: doc.insuredName ?? ""
|
|
4838
|
-
|
|
5093
|
+
metadata: [
|
|
5094
|
+
{ key: "type", value: doc.type },
|
|
5095
|
+
{ key: "carrier", value: doc.carrier ?? "" },
|
|
5096
|
+
{ key: "insuredName", value: doc.insuredName ?? "" }
|
|
5097
|
+
]
|
|
4839
5098
|
});
|
|
4840
5099
|
}
|
|
4841
5100
|
} catch (e) {
|
|
@@ -5070,8 +5329,12 @@ function createQueryAgent(config) {
|
|
|
5070
5329
|
async function query(input) {
|
|
5071
5330
|
totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
5072
5331
|
const { question, conversationId, context } = input;
|
|
5332
|
+
const pipelineCtx = createPipelineContext({
|
|
5333
|
+
id: `query-${Date.now()}`
|
|
5334
|
+
});
|
|
5073
5335
|
onProgress?.("Classifying query...");
|
|
5074
5336
|
const classification = await classify(question, conversationId);
|
|
5337
|
+
await pipelineCtx.save("classify", { classification });
|
|
5075
5338
|
onProgress?.(`Retrieving evidence for ${classification.subQuestions.length} sub-question(s)...`);
|
|
5076
5339
|
const retrieverConfig = {
|
|
5077
5340
|
documentStore,
|
|
@@ -5085,9 +5348,10 @@ function createQueryAgent(config) {
|
|
|
5085
5348
|
)
|
|
5086
5349
|
);
|
|
5087
5350
|
const allEvidence = retrievalResults.flatMap((r) => r.evidence);
|
|
5351
|
+
await pipelineCtx.save("retrieve", { classification, evidence: allEvidence });
|
|
5088
5352
|
onProgress?.("Reasoning over evidence...");
|
|
5089
5353
|
const reasonerConfig = { generateObject, providerOptions };
|
|
5090
|
-
|
|
5354
|
+
const reasonResults = await Promise.allSettled(
|
|
5091
5355
|
classification.subQuestions.map(
|
|
5092
5356
|
(sq, i) => limit(async () => {
|
|
5093
5357
|
const { subAnswer, usage } = await reason(
|
|
@@ -5101,10 +5365,27 @@ function createQueryAgent(config) {
|
|
|
5101
5365
|
})
|
|
5102
5366
|
)
|
|
5103
5367
|
);
|
|
5368
|
+
let subAnswers = [];
|
|
5369
|
+
for (let i = 0; i < reasonResults.length; i++) {
|
|
5370
|
+
const result = reasonResults[i];
|
|
5371
|
+
if (result.status === "fulfilled") {
|
|
5372
|
+
subAnswers.push(result.value);
|
|
5373
|
+
} else {
|
|
5374
|
+
await log?.(`Reasoner failed for sub-question "${classification.subQuestions[i].question}": ${result.reason}`);
|
|
5375
|
+
subAnswers.push({
|
|
5376
|
+
subQuestion: classification.subQuestions[i].question,
|
|
5377
|
+
answer: "Unable to answer this part of the question due to a processing error.",
|
|
5378
|
+
citations: [],
|
|
5379
|
+
confidence: 0,
|
|
5380
|
+
needsMoreContext: true
|
|
5381
|
+
});
|
|
5382
|
+
}
|
|
5383
|
+
}
|
|
5384
|
+
await pipelineCtx.save("reason", { classification, evidence: allEvidence, subAnswers });
|
|
5104
5385
|
onProgress?.("Verifying answer grounding...");
|
|
5105
5386
|
const verifierConfig = { generateObject, providerOptions };
|
|
5106
5387
|
for (let round = 0; round < maxVerifyRounds; round++) {
|
|
5107
|
-
const { result: verifyResult, usage } = await
|
|
5388
|
+
const { result: verifyResult, usage } = await safeVerify(
|
|
5108
5389
|
question,
|
|
5109
5390
|
subAnswers,
|
|
5110
5391
|
allEvidence,
|
|
@@ -5128,7 +5409,6 @@ function createQueryAgent(config) {
|
|
|
5128
5409
|
() => retrieve(sq, conversationId, {
|
|
5129
5410
|
...retrieverConfig,
|
|
5130
5411
|
retrievalLimit: retrievalLimit * 2
|
|
5131
|
-
// Broader retrieval on retry
|
|
5132
5412
|
})
|
|
5133
5413
|
)
|
|
5134
5414
|
)
|
|
@@ -5136,7 +5416,7 @@ function createQueryAgent(config) {
|
|
|
5136
5416
|
for (const r of retryRetrievals) {
|
|
5137
5417
|
allEvidence.push(...r.evidence);
|
|
5138
5418
|
}
|
|
5139
|
-
const
|
|
5419
|
+
const retrySettled = await Promise.allSettled(
|
|
5140
5420
|
retryQuestions.map(
|
|
5141
5421
|
(sq, i) => limit(async () => {
|
|
5142
5422
|
const { subAnswer, usage: u } = await reason(
|
|
@@ -5150,6 +5430,7 @@ function createQueryAgent(config) {
|
|
|
5150
5430
|
})
|
|
5151
5431
|
)
|
|
5152
5432
|
);
|
|
5433
|
+
const retrySubAnswers = retrySettled.filter((r) => r.status === "fulfilled").map((r) => r.value);
|
|
5153
5434
|
const retryQSet = new Set(retryQuestions.map((sq) => sq.question));
|
|
5154
5435
|
subAnswers = subAnswers.map((sa) => {
|
|
5155
5436
|
if (retryQSet.has(sa.subQuestion)) {
|
|
@@ -5202,17 +5483,42 @@ function createQueryAgent(config) {
|
|
|
5202
5483
|
}
|
|
5203
5484
|
}
|
|
5204
5485
|
const prompt = buildQueryClassifyPrompt(question, conversationContext);
|
|
5205
|
-
const { object, usage } = await
|
|
5206
|
-
|
|
5486
|
+
const { object, usage } = await safeGenerateObject(
|
|
5487
|
+
generateObject,
|
|
5488
|
+
{
|
|
5207
5489
|
prompt,
|
|
5208
5490
|
schema: QueryClassifyResultSchema,
|
|
5209
5491
|
maxTokens: 2048,
|
|
5210
5492
|
providerOptions
|
|
5211
|
-
}
|
|
5493
|
+
},
|
|
5494
|
+
{
|
|
5495
|
+
fallback: {
|
|
5496
|
+
intent: "general_knowledge",
|
|
5497
|
+
subQuestions: [
|
|
5498
|
+
{
|
|
5499
|
+
question,
|
|
5500
|
+
intent: "general_knowledge"
|
|
5501
|
+
}
|
|
5502
|
+
],
|
|
5503
|
+
requiresDocumentLookup: true,
|
|
5504
|
+
requiresChunkSearch: true,
|
|
5505
|
+
requiresConversationHistory: !!conversationId
|
|
5506
|
+
},
|
|
5507
|
+
log,
|
|
5508
|
+
onError: (err, attempt) => log?.(`Query classify attempt ${attempt + 1} failed: ${err}`)
|
|
5509
|
+
}
|
|
5212
5510
|
);
|
|
5213
5511
|
trackUsage(usage);
|
|
5214
5512
|
return object;
|
|
5215
5513
|
}
|
|
5514
|
+
async function safeVerify(originalQuestion, subAnswers, allEvidence, verifierConfig) {
|
|
5515
|
+
try {
|
|
5516
|
+
return await verify(originalQuestion, subAnswers, allEvidence, verifierConfig);
|
|
5517
|
+
} catch (error) {
|
|
5518
|
+
await log?.(`Verification failed, approving by default: ${error instanceof Error ? error.message : String(error)}`);
|
|
5519
|
+
return { result: { approved: true, issues: [] } };
|
|
5520
|
+
}
|
|
5521
|
+
}
|
|
5216
5522
|
async function respond(originalQuestion, subAnswers, classification, platform) {
|
|
5217
5523
|
const subAnswersJson = JSON.stringify(
|
|
5218
5524
|
subAnswers.map((sa) => ({
|
|
@@ -5226,13 +5532,25 @@ function createQueryAgent(config) {
|
|
|
5226
5532
|
2
|
|
5227
5533
|
);
|
|
5228
5534
|
const prompt = buildRespondPrompt(originalQuestion, subAnswersJson, platform);
|
|
5229
|
-
const { object, usage } = await
|
|
5230
|
-
|
|
5535
|
+
const { object, usage } = await safeGenerateObject(
|
|
5536
|
+
generateObject,
|
|
5537
|
+
{
|
|
5231
5538
|
prompt,
|
|
5232
5539
|
schema: QueryResultSchema,
|
|
5233
5540
|
maxTokens: 4096,
|
|
5234
5541
|
providerOptions
|
|
5235
|
-
}
|
|
5542
|
+
},
|
|
5543
|
+
{
|
|
5544
|
+
fallback: {
|
|
5545
|
+
answer: subAnswers.map((sa) => `**${sa.subQuestion}**
|
|
5546
|
+
${sa.answer}`).join("\n\n"),
|
|
5547
|
+
citations: subAnswers.flatMap((sa) => sa.citations),
|
|
5548
|
+
intent: classification.intent,
|
|
5549
|
+
confidence: Math.min(...subAnswers.map((sa) => sa.confidence), 1)
|
|
5550
|
+
},
|
|
5551
|
+
log,
|
|
5552
|
+
onError: (err, attempt) => log?.(`Respond attempt ${attempt + 1} failed: ${err}`)
|
|
5553
|
+
}
|
|
5236
5554
|
);
|
|
5237
5555
|
trackUsage(usage);
|
|
5238
5556
|
const result = object;
|
|
@@ -5398,6 +5716,7 @@ var AGENT_TOOLS = [
|
|
|
5398
5716
|
CommercialAutoDeclarationsSchema,
|
|
5399
5717
|
CommercialPropertyDeclarationsSchema,
|
|
5400
5718
|
CommunicationIntentSchema,
|
|
5719
|
+
ConditionKeyValueSchema,
|
|
5401
5720
|
ConditionTypeSchema,
|
|
5402
5721
|
ConstructionTypeSchema,
|
|
5403
5722
|
ContactSchema,
|
|
@@ -5564,6 +5883,7 @@ var AGENT_TOOLS = [
|
|
|
5564
5883
|
chunkDocument,
|
|
5565
5884
|
createApplicationPipeline,
|
|
5566
5885
|
createExtractor,
|
|
5886
|
+
createPipelineContext,
|
|
5567
5887
|
createQueryAgent,
|
|
5568
5888
|
extractPageRange,
|
|
5569
5889
|
fillAcroForm,
|
|
@@ -5573,6 +5893,7 @@ var AGENT_TOOLS = [
|
|
|
5573
5893
|
getTemplate,
|
|
5574
5894
|
overlayTextOnPdf,
|
|
5575
5895
|
pLimit,
|
|
5896
|
+
safeGenerateObject,
|
|
5576
5897
|
sanitizeNulls,
|
|
5577
5898
|
stripFences,
|
|
5578
5899
|
withRetry
|