@claritylabs/cl-sdk 0.6.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +372 -67
- package/dist/index.d.ts +372 -67
- package/dist/index.js +545 -224
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +542 -224
- package/dist/index.mjs.map +1 -1
- package/dist/storage-sqlite.d.mts +52 -10
- package/dist/storage-sqlite.d.ts +52 -10
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -71,6 +71,69 @@ function sanitizeNulls(obj) {
|
|
|
71
71
|
return obj;
|
|
72
72
|
}
|
|
73
73
|
|
|
74
|
+
// src/core/safe-generate.ts
|
|
75
|
+
async function safeGenerateObject(generateObject, params, options) {
|
|
76
|
+
const maxRetries = options?.maxRetries ?? 1;
|
|
77
|
+
let lastError;
|
|
78
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
79
|
+
try {
|
|
80
|
+
const result = await withRetry(
|
|
81
|
+
() => generateObject(params),
|
|
82
|
+
options?.log
|
|
83
|
+
);
|
|
84
|
+
return result;
|
|
85
|
+
} catch (error) {
|
|
86
|
+
lastError = error;
|
|
87
|
+
options?.onError?.(error, attempt);
|
|
88
|
+
await options?.log?.(
|
|
89
|
+
`safeGenerateObject attempt ${attempt + 1}/${maxRetries + 1} failed: ${error instanceof Error ? error.message : String(error)}`
|
|
90
|
+
);
|
|
91
|
+
if (attempt < maxRetries) {
|
|
92
|
+
await new Promise((resolve) => setTimeout(resolve, 1e3));
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
if (options?.fallback !== void 0) {
|
|
97
|
+
await options?.log?.(
|
|
98
|
+
`safeGenerateObject: all retries exhausted, returning fallback`
|
|
99
|
+
);
|
|
100
|
+
return { object: options.fallback };
|
|
101
|
+
}
|
|
102
|
+
throw lastError;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// src/core/pipeline.ts
|
|
106
|
+
function createPipelineContext(opts) {
|
|
107
|
+
let latest = opts.resumeFrom;
|
|
108
|
+
const completedPhases = /* @__PURE__ */ new Set();
|
|
109
|
+
if (opts.resumeFrom) {
|
|
110
|
+
completedPhases.add(opts.resumeFrom.phase);
|
|
111
|
+
}
|
|
112
|
+
return {
|
|
113
|
+
id: opts.id,
|
|
114
|
+
async save(phase, state) {
|
|
115
|
+
const checkpoint = {
|
|
116
|
+
phase,
|
|
117
|
+
state,
|
|
118
|
+
timestamp: Date.now()
|
|
119
|
+
};
|
|
120
|
+
latest = checkpoint;
|
|
121
|
+
completedPhases.add(phase);
|
|
122
|
+
await opts.onSave?.(checkpoint);
|
|
123
|
+
},
|
|
124
|
+
getCheckpoint() {
|
|
125
|
+
return latest;
|
|
126
|
+
},
|
|
127
|
+
isPhaseComplete(phase) {
|
|
128
|
+
return completedPhases.has(phase);
|
|
129
|
+
},
|
|
130
|
+
clear() {
|
|
131
|
+
latest = void 0;
|
|
132
|
+
completedPhases.clear();
|
|
133
|
+
}
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
|
|
74
137
|
// src/schemas/enums.ts
|
|
75
138
|
import { z } from "zod";
|
|
76
139
|
var PolicyTypeSchema = z.enum([
|
|
@@ -471,11 +534,15 @@ var ExclusionSchema = z5.object({
|
|
|
471
534
|
|
|
472
535
|
// src/schemas/condition.ts
|
|
473
536
|
import { z as z6 } from "zod";
|
|
537
|
+
var ConditionKeyValueSchema = z6.object({
|
|
538
|
+
key: z6.string(),
|
|
539
|
+
value: z6.string()
|
|
540
|
+
});
|
|
474
541
|
var PolicyConditionSchema = z6.object({
|
|
475
542
|
name: z6.string(),
|
|
476
543
|
conditionType: ConditionTypeSchema,
|
|
477
544
|
content: z6.string(),
|
|
478
|
-
keyValues: z6.
|
|
545
|
+
keyValues: z6.array(ConditionKeyValueSchema).optional(),
|
|
479
546
|
pageNumber: z6.number().optional()
|
|
480
547
|
});
|
|
481
548
|
|
|
@@ -1652,21 +1719,33 @@ async function formatDocumentContent(doc, generateText, options) {
|
|
|
1652
1719
|
for (let i = 0; i < entries.length; i += MAX_ENTRIES_PER_BATCH) {
|
|
1653
1720
|
batches.push(entries.slice(i, i + MAX_ENTRIES_PER_BATCH));
|
|
1654
1721
|
}
|
|
1655
|
-
for (
|
|
1656
|
-
const
|
|
1657
|
-
|
|
1658
|
-
() =>
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
|
|
1662
|
-
|
|
1663
|
-
|
|
1664
|
-
|
|
1665
|
-
|
|
1666
|
-
|
|
1722
|
+
for (let batchIdx = 0; batchIdx < batches.length; batchIdx++) {
|
|
1723
|
+
const batch = batches[batchIdx];
|
|
1724
|
+
try {
|
|
1725
|
+
const prompt = buildFormatPrompt(batch.map((e) => ({ id: e.id, text: e.text })));
|
|
1726
|
+
const result = await withRetry(
|
|
1727
|
+
() => generateText({
|
|
1728
|
+
prompt,
|
|
1729
|
+
maxTokens: 16384,
|
|
1730
|
+
providerOptions: options?.providerOptions
|
|
1731
|
+
})
|
|
1732
|
+
);
|
|
1733
|
+
if (result.usage) {
|
|
1734
|
+
totalUsage.inputTokens += result.usage.inputTokens;
|
|
1735
|
+
totalUsage.outputTokens += result.usage.outputTokens;
|
|
1736
|
+
}
|
|
1737
|
+
const formatted = parseFormatResponse(result.text);
|
|
1738
|
+
if (formatted.size < batch.length) {
|
|
1739
|
+
await options?.log?.(
|
|
1740
|
+
`Format batch ${batchIdx + 1}/${batches.length}: model returned ${formatted.size}/${batch.length} entries \u2014 unformatted entries will keep original content`
|
|
1741
|
+
);
|
|
1742
|
+
}
|
|
1743
|
+
applyFormattedContent(doc, batch, formatted);
|
|
1744
|
+
} catch (error) {
|
|
1745
|
+
await options?.log?.(
|
|
1746
|
+
`Format batch ${batchIdx + 1}/${batches.length} failed, keeping original content: ${error instanceof Error ? error.message : String(error)}`
|
|
1747
|
+
);
|
|
1667
1748
|
}
|
|
1668
|
-
const formatted = parseFormatResponse(result.text);
|
|
1669
|
-
applyFormattedContent(doc, batch, formatted);
|
|
1670
1749
|
}
|
|
1671
1750
|
return { document: doc, usage: totalUsage };
|
|
1672
1751
|
}
|
|
@@ -2507,9 +2586,13 @@ var ExtractionTaskSchema = z18.object({
|
|
|
2507
2586
|
endPage: z18.number(),
|
|
2508
2587
|
description: z18.string()
|
|
2509
2588
|
});
|
|
2589
|
+
var PageMapEntrySchema = z18.object({
|
|
2590
|
+
section: z18.string(),
|
|
2591
|
+
pages: z18.string()
|
|
2592
|
+
});
|
|
2510
2593
|
var ExtractionPlanSchema = z18.object({
|
|
2511
2594
|
tasks: z18.array(ExtractionTaskSchema),
|
|
2512
|
-
pageMap: z18.
|
|
2595
|
+
pageMap: z18.array(PageMapEntrySchema).optional()
|
|
2513
2596
|
});
|
|
2514
2597
|
function buildPlanPrompt(templateHints) {
|
|
2515
2598
|
return `You are planning the extraction of an insurance document. You have already classified this document. Now scan the full document and create a page map + extraction plan.
|
|
@@ -2538,7 +2621,10 @@ Return JSON:
|
|
|
2538
2621
|
{ "extractorName": "carrier_info", "startPage": 1, "endPage": 2, "description": "Extract carrier details from declarations page" },
|
|
2539
2622
|
...
|
|
2540
2623
|
],
|
|
2541
|
-
"pageMap":
|
|
2624
|
+
"pageMap": [
|
|
2625
|
+
{ "section": "declarations", "pages": "pages 1-3" },
|
|
2626
|
+
{ "section": "endorsements", "pages": "pages 15-22" }
|
|
2627
|
+
]
|
|
2542
2628
|
}
|
|
2543
2629
|
|
|
2544
2630
|
Create tasks that cover the entire document. Prefer specific extractors over generic "sections" where possible. Keep page ranges tight \u2014 only include pages relevant to each extractor.
|
|
@@ -2855,9 +2941,14 @@ Return JSON only.`;
|
|
|
2855
2941
|
|
|
2856
2942
|
// src/prompts/extractors/declarations.ts
|
|
2857
2943
|
import { z as z27 } from "zod";
|
|
2858
|
-
var
|
|
2859
|
-
"
|
|
2860
|
-
)
|
|
2944
|
+
var DeclarationsFieldSchema = z27.object({
|
|
2945
|
+
field: z27.string().describe("Descriptive field name (e.g. 'policyNumber', 'effectiveDate', 'coverageALimit')"),
|
|
2946
|
+
value: z27.string().describe("Extracted value exactly as it appears in the document"),
|
|
2947
|
+
section: z27.string().optional().describe("Section or grouping this field belongs to (e.g. 'Coverage Limits', 'Vehicle Schedule')")
|
|
2948
|
+
});
|
|
2949
|
+
var DeclarationsExtractSchema = z27.object({
|
|
2950
|
+
fields: z27.array(DeclarationsFieldSchema).describe("All declarations page fields extracted as key-value pairs. Structure varies by line of business.")
|
|
2951
|
+
});
|
|
2861
2952
|
function buildDeclarationsPrompt() {
|
|
2862
2953
|
return `You are an expert insurance document analyst. Extract all declarations page data from this document into a flexible key-value structure.
|
|
2863
2954
|
|
|
@@ -2881,9 +2972,18 @@ For PERSONAL LINES declarations:
|
|
|
2881
2972
|
- Flood (NFIP): flood zone, community number, building/contents coverage
|
|
2882
2973
|
- Personal Articles: scheduled items list with appraised values
|
|
2883
2974
|
|
|
2884
|
-
|
|
2975
|
+
Return each field as an object with "field" (descriptive name), "value" (exact text from document), and optional "section" (grouping).
|
|
2885
2976
|
|
|
2886
|
-
|
|
2977
|
+
Example output:
|
|
2978
|
+
{
|
|
2979
|
+
"fields": [
|
|
2980
|
+
{ "field": "policyNumber", "value": "GL-2025-78432", "section": "Policy Info" },
|
|
2981
|
+
{ "field": "effectiveDate", "value": "04/10/2025", "section": "Policy Info" },
|
|
2982
|
+
{ "field": "eachOccurrenceLimit", "value": "$1,000,000", "section": "Coverage Limits" }
|
|
2983
|
+
]
|
|
2984
|
+
}
|
|
2985
|
+
|
|
2986
|
+
Preserve original values exactly as they appear. Return JSON only.`;
|
|
2887
2987
|
}
|
|
2888
2988
|
|
|
2889
2989
|
// src/prompts/extractors/loss-history.ts
|
|
@@ -3031,7 +3131,8 @@ function createExtractor(config) {
|
|
|
3031
3131
|
onTokenUsage,
|
|
3032
3132
|
onProgress,
|
|
3033
3133
|
log,
|
|
3034
|
-
providerOptions
|
|
3134
|
+
providerOptions,
|
|
3135
|
+
onCheckpointSave
|
|
3035
3136
|
} = config;
|
|
3036
3137
|
const limit = pLimit(concurrency);
|
|
3037
3138
|
let totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
@@ -3042,100 +3143,106 @@ function createExtractor(config) {
|
|
|
3042
3143
|
onTokenUsage?.(usage);
|
|
3043
3144
|
}
|
|
3044
3145
|
}
|
|
3045
|
-
async function extract(pdfBase64, documentId) {
|
|
3146
|
+
async function extract(pdfBase64, documentId, options) {
|
|
3046
3147
|
const id = documentId ?? `doc-${Date.now()}`;
|
|
3047
3148
|
const memory = /* @__PURE__ */ new Map();
|
|
3048
3149
|
totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
3049
|
-
|
|
3050
|
-
|
|
3051
|
-
|
|
3052
|
-
|
|
3053
|
-
|
|
3054
|
-
|
|
3055
|
-
|
|
3056
|
-
|
|
3057
|
-
|
|
3058
|
-
);
|
|
3059
|
-
trackUsage(classifyResult.usage);
|
|
3060
|
-
memory.set("classify", classifyResult.object);
|
|
3061
|
-
const { documentType, policyTypes } = classifyResult.object;
|
|
3062
|
-
const primaryType = policyTypes[0] ?? "other";
|
|
3063
|
-
const template = getTemplate(primaryType);
|
|
3064
|
-
onProgress?.(`Planning extraction for ${primaryType} ${documentType}...`);
|
|
3065
|
-
const templateHints = [
|
|
3066
|
-
`Document type: ${primaryType} ${documentType}`,
|
|
3067
|
-
`Expected sections: ${template.expectedSections.join(", ")}`,
|
|
3068
|
-
`Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
|
|
3069
|
-
`Total pages: ${pageCount}`
|
|
3070
|
-
].join("\n");
|
|
3071
|
-
const planResult = await withRetry(
|
|
3072
|
-
() => generateObject({
|
|
3073
|
-
prompt: buildPlanPrompt(templateHints),
|
|
3074
|
-
schema: ExtractionPlanSchema,
|
|
3075
|
-
maxTokens: 2048,
|
|
3076
|
-
providerOptions
|
|
3077
|
-
})
|
|
3078
|
-
);
|
|
3079
|
-
trackUsage(planResult.usage);
|
|
3080
|
-
const tasks = planResult.object.tasks;
|
|
3081
|
-
onProgress?.(`Dispatching ${tasks.length} extractors...`);
|
|
3082
|
-
const extractorResults = await Promise.all(
|
|
3083
|
-
tasks.map(
|
|
3084
|
-
(task) => limit(async () => {
|
|
3085
|
-
const ext = getExtractor(task.extractorName);
|
|
3086
|
-
if (!ext) {
|
|
3087
|
-
await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
|
|
3088
|
-
return null;
|
|
3089
|
-
}
|
|
3090
|
-
onProgress?.(`Extracting ${task.extractorName} (pages ${task.startPage}-${task.endPage})...`);
|
|
3091
|
-
try {
|
|
3092
|
-
const result = await runExtractor({
|
|
3093
|
-
name: task.extractorName,
|
|
3094
|
-
prompt: ext.buildPrompt(),
|
|
3095
|
-
schema: ext.schema,
|
|
3096
|
-
pdfBase64,
|
|
3097
|
-
startPage: task.startPage,
|
|
3098
|
-
endPage: task.endPage,
|
|
3099
|
-
generateObject,
|
|
3100
|
-
convertPdfToImages,
|
|
3101
|
-
maxTokens: ext.maxTokens ?? 4096,
|
|
3102
|
-
providerOptions
|
|
3103
|
-
});
|
|
3104
|
-
trackUsage(result.usage);
|
|
3105
|
-
return result;
|
|
3106
|
-
} catch (error) {
|
|
3107
|
-
await log?.(`Extractor ${task.extractorName} failed: ${error}`);
|
|
3108
|
-
return null;
|
|
3109
|
-
}
|
|
3110
|
-
})
|
|
3111
|
-
)
|
|
3112
|
-
);
|
|
3113
|
-
for (const result of extractorResults) {
|
|
3114
|
-
if (result) {
|
|
3115
|
-
memory.set(result.name, result.data);
|
|
3150
|
+
const pipelineCtx = createPipelineContext({
|
|
3151
|
+
id,
|
|
3152
|
+
onSave: onCheckpointSave,
|
|
3153
|
+
resumeFrom: options?.resumeFrom
|
|
3154
|
+
});
|
|
3155
|
+
const resumed = pipelineCtx.getCheckpoint()?.state;
|
|
3156
|
+
if (resumed?.memory) {
|
|
3157
|
+
for (const [k, v] of Object.entries(resumed.memory)) {
|
|
3158
|
+
memory.set(k, v);
|
|
3116
3159
|
}
|
|
3117
3160
|
}
|
|
3118
|
-
|
|
3119
|
-
|
|
3120
|
-
|
|
3121
|
-
|
|
3122
|
-
|
|
3123
|
-
|
|
3124
|
-
|
|
3161
|
+
let classifyResult;
|
|
3162
|
+
if (resumed?.classifyResult && pipelineCtx.isPhaseComplete("classify")) {
|
|
3163
|
+
classifyResult = resumed.classifyResult;
|
|
3164
|
+
onProgress?.("Resuming from checkpoint (classify complete)...");
|
|
3165
|
+
} else {
|
|
3166
|
+
onProgress?.("Classifying document...");
|
|
3167
|
+
const pageCount2 = await getPdfPageCount(pdfBase64);
|
|
3168
|
+
const classifyResponse = await safeGenerateObject(
|
|
3169
|
+
generateObject,
|
|
3170
|
+
{
|
|
3171
|
+
prompt: buildClassifyPrompt(),
|
|
3172
|
+
schema: ClassifyResultSchema,
|
|
3173
|
+
maxTokens: 512,
|
|
3125
3174
|
providerOptions
|
|
3126
|
-
}
|
|
3175
|
+
},
|
|
3176
|
+
{
|
|
3177
|
+
fallback: { documentType: "policy", policyTypes: ["other"], confidence: 0 },
|
|
3178
|
+
log,
|
|
3179
|
+
onError: (err, attempt) => log?.(`Classify attempt ${attempt + 1} failed: ${err}`)
|
|
3180
|
+
}
|
|
3127
3181
|
);
|
|
3128
|
-
trackUsage(
|
|
3129
|
-
|
|
3130
|
-
|
|
3131
|
-
|
|
3132
|
-
|
|
3133
|
-
|
|
3134
|
-
|
|
3135
|
-
|
|
3182
|
+
trackUsage(classifyResponse.usage);
|
|
3183
|
+
classifyResult = classifyResponse.object;
|
|
3184
|
+
memory.set("classify", classifyResult);
|
|
3185
|
+
await pipelineCtx.save("classify", {
|
|
3186
|
+
id,
|
|
3187
|
+
pageCount: pageCount2,
|
|
3188
|
+
classifyResult,
|
|
3189
|
+
memory: Object.fromEntries(memory)
|
|
3190
|
+
});
|
|
3191
|
+
}
|
|
3192
|
+
const { documentType, policyTypes } = classifyResult;
|
|
3193
|
+
const primaryType = policyTypes[0] ?? "other";
|
|
3194
|
+
const template = getTemplate(primaryType);
|
|
3195
|
+
const pageCount = resumed?.pageCount ?? await getPdfPageCount(pdfBase64);
|
|
3196
|
+
let plan;
|
|
3197
|
+
if (resumed?.plan && pipelineCtx.isPhaseComplete("plan")) {
|
|
3198
|
+
plan = resumed.plan;
|
|
3199
|
+
onProgress?.("Resuming from checkpoint (plan complete)...");
|
|
3200
|
+
} else {
|
|
3201
|
+
onProgress?.(`Planning extraction for ${primaryType} ${documentType}...`);
|
|
3202
|
+
const templateHints = [
|
|
3203
|
+
`Document type: ${primaryType} ${documentType}`,
|
|
3204
|
+
`Expected sections: ${template.expectedSections.join(", ")}`,
|
|
3205
|
+
`Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
|
|
3206
|
+
`Total pages: ${pageCount}`
|
|
3207
|
+
].join("\n");
|
|
3208
|
+
const planResponse = await safeGenerateObject(
|
|
3209
|
+
generateObject,
|
|
3210
|
+
{
|
|
3211
|
+
prompt: buildPlanPrompt(templateHints),
|
|
3212
|
+
schema: ExtractionPlanSchema,
|
|
3213
|
+
maxTokens: 2048,
|
|
3214
|
+
providerOptions
|
|
3215
|
+
},
|
|
3216
|
+
{
|
|
3217
|
+
fallback: {
|
|
3218
|
+
tasks: [{ extractorName: "sections", startPage: 1, endPage: pageCount, description: "Full document fallback extraction" }]
|
|
3219
|
+
},
|
|
3220
|
+
log,
|
|
3221
|
+
onError: (err, attempt) => log?.(`Plan attempt ${attempt + 1} failed: ${err}`)
|
|
3222
|
+
}
|
|
3223
|
+
);
|
|
3224
|
+
trackUsage(planResponse.usage);
|
|
3225
|
+
plan = planResponse.object;
|
|
3226
|
+
await pipelineCtx.save("plan", {
|
|
3227
|
+
id,
|
|
3228
|
+
pageCount,
|
|
3229
|
+
classifyResult,
|
|
3230
|
+
plan,
|
|
3231
|
+
memory: Object.fromEntries(memory)
|
|
3232
|
+
});
|
|
3233
|
+
}
|
|
3234
|
+
if (!pipelineCtx.isPhaseComplete("extract")) {
|
|
3235
|
+
const tasks = plan.tasks;
|
|
3236
|
+
onProgress?.(`Dispatching ${tasks.length} extractors...`);
|
|
3237
|
+
const extractorResults = await Promise.all(
|
|
3238
|
+
tasks.map(
|
|
3136
3239
|
(task) => limit(async () => {
|
|
3137
3240
|
const ext = getExtractor(task.extractorName);
|
|
3138
|
-
if (!ext)
|
|
3241
|
+
if (!ext) {
|
|
3242
|
+
await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
|
|
3243
|
+
return null;
|
|
3244
|
+
}
|
|
3245
|
+
onProgress?.(`Extracting ${task.extractorName} (pages ${task.startPage}-${task.endPage})...`);
|
|
3139
3246
|
try {
|
|
3140
3247
|
const result = await runExtractor({
|
|
3141
3248
|
name: task.extractorName,
|
|
@@ -3152,28 +3259,114 @@ function createExtractor(config) {
|
|
|
3152
3259
|
trackUsage(result.usage);
|
|
3153
3260
|
return result;
|
|
3154
3261
|
} catch (error) {
|
|
3155
|
-
await log?.(`
|
|
3262
|
+
await log?.(`Extractor ${task.extractorName} failed: ${error}`);
|
|
3156
3263
|
return null;
|
|
3157
3264
|
}
|
|
3158
3265
|
})
|
|
3159
3266
|
)
|
|
3160
3267
|
);
|
|
3161
|
-
for (const result of
|
|
3268
|
+
for (const result of extractorResults) {
|
|
3162
3269
|
if (result) {
|
|
3163
3270
|
memory.set(result.name, result.data);
|
|
3164
3271
|
}
|
|
3165
3272
|
}
|
|
3273
|
+
await pipelineCtx.save("extract", {
|
|
3274
|
+
id,
|
|
3275
|
+
pageCount,
|
|
3276
|
+
classifyResult,
|
|
3277
|
+
plan,
|
|
3278
|
+
memory: Object.fromEntries(memory)
|
|
3279
|
+
});
|
|
3280
|
+
}
|
|
3281
|
+
if (!pipelineCtx.isPhaseComplete("review")) {
|
|
3282
|
+
for (let round = 0; round < maxReviewRounds; round++) {
|
|
3283
|
+
const extractedKeys = [...memory.keys()].filter((k) => k !== "classify");
|
|
3284
|
+
const reviewResponse = await safeGenerateObject(
|
|
3285
|
+
generateObject,
|
|
3286
|
+
{
|
|
3287
|
+
prompt: buildReviewPrompt(template.required, extractedKeys),
|
|
3288
|
+
schema: ReviewResultSchema,
|
|
3289
|
+
maxTokens: 1024,
|
|
3290
|
+
providerOptions
|
|
3291
|
+
},
|
|
3292
|
+
{
|
|
3293
|
+
fallback: { complete: true, missingFields: [], additionalTasks: [] },
|
|
3294
|
+
log,
|
|
3295
|
+
onError: (err, attempt) => log?.(`Review round ${round + 1} attempt ${attempt + 1} failed: ${err}`)
|
|
3296
|
+
}
|
|
3297
|
+
);
|
|
3298
|
+
trackUsage(reviewResponse.usage);
|
|
3299
|
+
if (reviewResponse.object.complete || reviewResponse.object.additionalTasks.length === 0) {
|
|
3300
|
+
onProgress?.("Extraction complete.");
|
|
3301
|
+
break;
|
|
3302
|
+
}
|
|
3303
|
+
onProgress?.(`Review round ${round + 1}: dispatching ${reviewResponse.object.additionalTasks.length} follow-up extractors...`);
|
|
3304
|
+
const followUpResults = await Promise.all(
|
|
3305
|
+
reviewResponse.object.additionalTasks.map(
|
|
3306
|
+
(task) => limit(async () => {
|
|
3307
|
+
const ext = getExtractor(task.extractorName);
|
|
3308
|
+
if (!ext) return null;
|
|
3309
|
+
try {
|
|
3310
|
+
const result = await runExtractor({
|
|
3311
|
+
name: task.extractorName,
|
|
3312
|
+
prompt: ext.buildPrompt(),
|
|
3313
|
+
schema: ext.schema,
|
|
3314
|
+
pdfBase64,
|
|
3315
|
+
startPage: task.startPage,
|
|
3316
|
+
endPage: task.endPage,
|
|
3317
|
+
generateObject,
|
|
3318
|
+
convertPdfToImages,
|
|
3319
|
+
maxTokens: ext.maxTokens ?? 4096,
|
|
3320
|
+
providerOptions
|
|
3321
|
+
});
|
|
3322
|
+
trackUsage(result.usage);
|
|
3323
|
+
return result;
|
|
3324
|
+
} catch (error) {
|
|
3325
|
+
await log?.(`Follow-up extractor ${task.extractorName} failed: ${error}`);
|
|
3326
|
+
return null;
|
|
3327
|
+
}
|
|
3328
|
+
})
|
|
3329
|
+
)
|
|
3330
|
+
);
|
|
3331
|
+
for (const result of followUpResults) {
|
|
3332
|
+
if (result) {
|
|
3333
|
+
memory.set(result.name, result.data);
|
|
3334
|
+
}
|
|
3335
|
+
}
|
|
3336
|
+
}
|
|
3337
|
+
await pipelineCtx.save("review", {
|
|
3338
|
+
id,
|
|
3339
|
+
pageCount,
|
|
3340
|
+
classifyResult,
|
|
3341
|
+
plan,
|
|
3342
|
+
memory: Object.fromEntries(memory)
|
|
3343
|
+
});
|
|
3166
3344
|
}
|
|
3167
3345
|
onProgress?.("Assembling document...");
|
|
3168
3346
|
const document = assembleDocument(id, documentType, memory);
|
|
3347
|
+
await pipelineCtx.save("assemble", {
|
|
3348
|
+
id,
|
|
3349
|
+
pageCount,
|
|
3350
|
+
classifyResult,
|
|
3351
|
+
plan,
|
|
3352
|
+
memory: Object.fromEntries(memory),
|
|
3353
|
+
document
|
|
3354
|
+
});
|
|
3169
3355
|
onProgress?.("Formatting extracted content...");
|
|
3170
3356
|
const formatResult = await formatDocumentContent(document, generateText, {
|
|
3171
3357
|
providerOptions,
|
|
3172
|
-
onProgress
|
|
3358
|
+
onProgress,
|
|
3359
|
+
log
|
|
3173
3360
|
});
|
|
3174
3361
|
trackUsage(formatResult.usage);
|
|
3175
3362
|
const chunks = chunkDocument(formatResult.document);
|
|
3176
|
-
|
|
3363
|
+
const finalCheckpoint = pipelineCtx.getCheckpoint();
|
|
3364
|
+
return {
|
|
3365
|
+
document: formatResult.document,
|
|
3366
|
+
chunks,
|
|
3367
|
+
tokenUsage: totalUsage,
|
|
3368
|
+
checkpoint: finalCheckpoint
|
|
3369
|
+
};
|
|
3177
3370
|
}
|
|
3178
3371
|
return { extract };
|
|
3179
3372
|
}
|
|
@@ -4036,7 +4229,6 @@ function createApplicationPipeline(config) {
|
|
|
4036
4229
|
let state = {
|
|
4037
4230
|
id,
|
|
4038
4231
|
pdfBase64: void 0,
|
|
4039
|
-
// Don't persist the full PDF in state
|
|
4040
4232
|
title: void 0,
|
|
4041
4233
|
applicationType: null,
|
|
4042
4234
|
fields: [],
|
|
@@ -4047,13 +4239,20 @@ function createApplicationPipeline(config) {
|
|
|
4047
4239
|
updatedAt: now
|
|
4048
4240
|
};
|
|
4049
4241
|
onProgress?.("Classifying document...");
|
|
4050
|
-
|
|
4051
|
-
|
|
4052
|
-
|
|
4053
|
-
|
|
4054
|
-
|
|
4055
|
-
|
|
4056
|
-
|
|
4242
|
+
await applicationStore?.save(state);
|
|
4243
|
+
let classifyResult;
|
|
4244
|
+
try {
|
|
4245
|
+
const { result, usage: classifyUsage } = await classifyApplication(
|
|
4246
|
+
pdfBase64.slice(0, 2e3),
|
|
4247
|
+
generateObject,
|
|
4248
|
+
providerOptions
|
|
4249
|
+
);
|
|
4250
|
+
trackUsage(classifyUsage);
|
|
4251
|
+
classifyResult = result;
|
|
4252
|
+
} catch (error) {
|
|
4253
|
+
await log?.(`Classification failed, treating as non-application: ${error instanceof Error ? error.message : String(error)}`);
|
|
4254
|
+
classifyResult = { isApplication: false, confidence: 0, applicationType: null };
|
|
4255
|
+
}
|
|
4057
4256
|
if (!classifyResult.isApplication) {
|
|
4058
4257
|
state.status = "complete";
|
|
4059
4258
|
state.updatedAt = Date.now();
|
|
@@ -4063,13 +4262,28 @@ function createApplicationPipeline(config) {
|
|
|
4063
4262
|
state.applicationType = classifyResult.applicationType;
|
|
4064
4263
|
state.status = "extracting";
|
|
4065
4264
|
state.updatedAt = Date.now();
|
|
4265
|
+
await applicationStore?.save(state);
|
|
4066
4266
|
onProgress?.("Extracting form fields...");
|
|
4067
|
-
|
|
4068
|
-
|
|
4069
|
-
|
|
4070
|
-
|
|
4071
|
-
|
|
4072
|
-
|
|
4267
|
+
let fields;
|
|
4268
|
+
try {
|
|
4269
|
+
const { fields: extractedFields, usage: extractUsage } = await extractFields(
|
|
4270
|
+
pdfBase64,
|
|
4271
|
+
generateObject,
|
|
4272
|
+
providerOptions
|
|
4273
|
+
);
|
|
4274
|
+
trackUsage(extractUsage);
|
|
4275
|
+
fields = extractedFields;
|
|
4276
|
+
} catch (error) {
|
|
4277
|
+
await log?.(`Field extraction failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
4278
|
+
fields = [];
|
|
4279
|
+
}
|
|
4280
|
+
if (fields.length === 0) {
|
|
4281
|
+
await log?.("No fields extracted, completing pipeline with empty result");
|
|
4282
|
+
state.status = "complete";
|
|
4283
|
+
state.updatedAt = Date.now();
|
|
4284
|
+
await applicationStore?.save(state);
|
|
4285
|
+
return { state, tokenUsage: totalUsage };
|
|
4286
|
+
}
|
|
4073
4287
|
state.fields = fields;
|
|
4074
4288
|
state.title = classifyResult.applicationType ?? void 0;
|
|
4075
4289
|
state.status = "auto_filling";
|
|
@@ -4101,20 +4315,24 @@ function createApplicationPipeline(config) {
|
|
|
4101
4315
|
limit(async () => {
|
|
4102
4316
|
const unfilledFields2 = state.fields.filter((f) => !f.value);
|
|
4103
4317
|
if (unfilledFields2.length === 0) return;
|
|
4104
|
-
|
|
4105
|
-
|
|
4106
|
-
|
|
4107
|
-
|
|
4108
|
-
|
|
4109
|
-
|
|
4110
|
-
|
|
4111
|
-
|
|
4112
|
-
const
|
|
4113
|
-
|
|
4114
|
-
field
|
|
4115
|
-
|
|
4116
|
-
|
|
4318
|
+
try {
|
|
4319
|
+
const { result: autoFillResult, usage: afUsage } = await autoFillFromContext(
|
|
4320
|
+
unfilledFields2,
|
|
4321
|
+
orgContext,
|
|
4322
|
+
generateObject,
|
|
4323
|
+
providerOptions
|
|
4324
|
+
);
|
|
4325
|
+
trackUsage(afUsage);
|
|
4326
|
+
for (const match of autoFillResult.matches) {
|
|
4327
|
+
const field = state.fields.find((f) => f.id === match.fieldId);
|
|
4328
|
+
if (field && !field.value) {
|
|
4329
|
+
field.value = match.value;
|
|
4330
|
+
field.source = `auto-fill: ${match.contextKey}`;
|
|
4331
|
+
field.confidence = match.confidence;
|
|
4332
|
+
}
|
|
4117
4333
|
}
|
|
4334
|
+
} catch (e) {
|
|
4335
|
+
await log?.(`Auto-fill from context failed: ${e instanceof Error ? e.message : String(e)}`);
|
|
4118
4336
|
}
|
|
4119
4337
|
})
|
|
4120
4338
|
);
|
|
@@ -4147,13 +4365,18 @@ function createApplicationPipeline(config) {
|
|
|
4147
4365
|
if (unfilledFields.length > 0) {
|
|
4148
4366
|
onProgress?.(`Batching ${unfilledFields.length} remaining questions...`);
|
|
4149
4367
|
state.status = "batching";
|
|
4150
|
-
|
|
4151
|
-
|
|
4152
|
-
|
|
4153
|
-
|
|
4154
|
-
|
|
4155
|
-
|
|
4156
|
-
|
|
4368
|
+
try {
|
|
4369
|
+
const { result: batchResult, usage: batchUsage } = await batchQuestions(
|
|
4370
|
+
unfilledFields,
|
|
4371
|
+
generateObject,
|
|
4372
|
+
providerOptions
|
|
4373
|
+
);
|
|
4374
|
+
trackUsage(batchUsage);
|
|
4375
|
+
state.batches = batchResult.batches;
|
|
4376
|
+
} catch (error) {
|
|
4377
|
+
await log?.(`Batching failed, using single-batch fallback: ${error instanceof Error ? error.message : String(error)}`);
|
|
4378
|
+
state.batches = [unfilledFields.map((f) => f.id)];
|
|
4379
|
+
}
|
|
4157
4380
|
state.currentBatchIndex = 0;
|
|
4158
4381
|
state.status = "collecting";
|
|
4159
4382
|
} else {
|
|
@@ -4180,32 +4403,49 @@ function createApplicationPipeline(config) {
|
|
|
4180
4403
|
(f) => currentBatchFieldIds.includes(f.id)
|
|
4181
4404
|
);
|
|
4182
4405
|
onProgress?.("Classifying reply...");
|
|
4183
|
-
|
|
4184
|
-
|
|
4185
|
-
|
|
4186
|
-
generateObject,
|
|
4187
|
-
providerOptions
|
|
4188
|
-
);
|
|
4189
|
-
trackUsage(intentUsage);
|
|
4190
|
-
let fieldsFilled = 0;
|
|
4191
|
-
let responseText;
|
|
4192
|
-
if (intent.hasAnswers) {
|
|
4193
|
-
onProgress?.("Parsing answers...");
|
|
4194
|
-
const { result: parseResult, usage: parseUsage } = await parseAnswers(
|
|
4406
|
+
let intent;
|
|
4407
|
+
try {
|
|
4408
|
+
const { intent: classifiedIntent, usage: intentUsage } = await classifyReplyIntent(
|
|
4195
4409
|
currentBatchFields,
|
|
4196
4410
|
replyText,
|
|
4197
4411
|
generateObject,
|
|
4198
4412
|
providerOptions
|
|
4199
4413
|
);
|
|
4200
|
-
trackUsage(
|
|
4201
|
-
|
|
4202
|
-
|
|
4203
|
-
|
|
4204
|
-
|
|
4205
|
-
|
|
4206
|
-
|
|
4207
|
-
|
|
4414
|
+
trackUsage(intentUsage);
|
|
4415
|
+
intent = classifiedIntent;
|
|
4416
|
+
} catch (error) {
|
|
4417
|
+
await log?.(`Reply intent classification failed, defaulting to answers_only: ${error instanceof Error ? error.message : String(error)}`);
|
|
4418
|
+
intent = {
|
|
4419
|
+
primaryIntent: "answers_only",
|
|
4420
|
+
hasAnswers: true,
|
|
4421
|
+
questionText: void 0,
|
|
4422
|
+
questionFieldIds: void 0,
|
|
4423
|
+
lookupRequests: void 0
|
|
4424
|
+
};
|
|
4425
|
+
}
|
|
4426
|
+
let fieldsFilled = 0;
|
|
4427
|
+
let responseText;
|
|
4428
|
+
if (intent.hasAnswers) {
|
|
4429
|
+
onProgress?.("Parsing answers...");
|
|
4430
|
+
try {
|
|
4431
|
+
const { result: parseResult, usage: parseUsage } = await parseAnswers(
|
|
4432
|
+
currentBatchFields,
|
|
4433
|
+
replyText,
|
|
4434
|
+
generateObject,
|
|
4435
|
+
providerOptions
|
|
4436
|
+
);
|
|
4437
|
+
trackUsage(parseUsage);
|
|
4438
|
+
for (const answer of parseResult.answers) {
|
|
4439
|
+
const field = state.fields.find((f) => f.id === answer.fieldId);
|
|
4440
|
+
if (field) {
|
|
4441
|
+
field.value = answer.value;
|
|
4442
|
+
field.source = "user";
|
|
4443
|
+
field.confidence = "confirmed";
|
|
4444
|
+
fieldsFilled++;
|
|
4445
|
+
}
|
|
4208
4446
|
}
|
|
4447
|
+
} catch (error) {
|
|
4448
|
+
await log?.(`Answer parsing failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
4209
4449
|
}
|
|
4210
4450
|
}
|
|
4211
4451
|
if (intent.lookupRequests?.length) {
|
|
@@ -4226,36 +4466,45 @@ function createApplicationPipeline(config) {
|
|
|
4226
4466
|
const targetFields = state.fields.filter(
|
|
4227
4467
|
(f) => intent.lookupRequests.some((lr) => lr.targetFieldIds.includes(f.id))
|
|
4228
4468
|
);
|
|
4229
|
-
|
|
4230
|
-
|
|
4231
|
-
|
|
4232
|
-
|
|
4233
|
-
|
|
4234
|
-
|
|
4235
|
-
|
|
4236
|
-
|
|
4237
|
-
|
|
4238
|
-
const
|
|
4239
|
-
|
|
4240
|
-
field
|
|
4241
|
-
|
|
4242
|
-
|
|
4243
|
-
|
|
4469
|
+
try {
|
|
4470
|
+
const { result: lookupResult, usage: lookupUsage } = await fillFromLookup(
|
|
4471
|
+
intent.lookupRequests,
|
|
4472
|
+
targetFields,
|
|
4473
|
+
availableData,
|
|
4474
|
+
generateObject,
|
|
4475
|
+
providerOptions
|
|
4476
|
+
);
|
|
4477
|
+
trackUsage(lookupUsage);
|
|
4478
|
+
for (const fill of lookupResult.fills) {
|
|
4479
|
+
const field = state.fields.find((f) => f.id === fill.fieldId);
|
|
4480
|
+
if (field) {
|
|
4481
|
+
field.value = fill.value;
|
|
4482
|
+
field.source = `lookup: ${fill.source}`;
|
|
4483
|
+
field.confidence = "high";
|
|
4484
|
+
fieldsFilled++;
|
|
4485
|
+
}
|
|
4244
4486
|
}
|
|
4487
|
+
} catch (error) {
|
|
4488
|
+
await log?.(`Lookup fill failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
4245
4489
|
}
|
|
4246
4490
|
}
|
|
4247
4491
|
}
|
|
4248
4492
|
if (intent.primaryIntent === "question" || intent.primaryIntent === "mixed") {
|
|
4249
4493
|
if (intent.questionText) {
|
|
4250
|
-
|
|
4251
|
-
|
|
4494
|
+
try {
|
|
4495
|
+
const { text, usage } = await generateText({
|
|
4496
|
+
prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
|
|
4252
4497
|
|
|
4253
4498
|
Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with the answer when you're ready and I'll fill it in."`,
|
|
4254
|
-
|
|
4255
|
-
|
|
4256
|
-
|
|
4257
|
-
|
|
4258
|
-
|
|
4499
|
+
maxTokens: 512,
|
|
4500
|
+
providerOptions
|
|
4501
|
+
});
|
|
4502
|
+
trackUsage(usage);
|
|
4503
|
+
responseText = text;
|
|
4504
|
+
} catch (error) {
|
|
4505
|
+
await log?.(`Question response generation failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
4506
|
+
responseText = `I wasn't able to generate an explanation for your question. Could you rephrase it, or just provide the answer directly?`;
|
|
4507
|
+
}
|
|
4259
4508
|
}
|
|
4260
4509
|
}
|
|
4261
4510
|
const currentBatchComplete = currentBatchFieldIds.every(
|
|
@@ -4269,26 +4518,30 @@ Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with
|
|
|
4269
4518
|
(f) => nextBatchFieldIds.includes(f.id)
|
|
4270
4519
|
);
|
|
4271
4520
|
const filledCount = state.fields.filter((f) => f.value).length;
|
|
4272
|
-
|
|
4273
|
-
|
|
4274
|
-
|
|
4275
|
-
|
|
4276
|
-
|
|
4277
|
-
|
|
4278
|
-
|
|
4279
|
-
|
|
4280
|
-
|
|
4281
|
-
|
|
4282
|
-
|
|
4283
|
-
|
|
4284
|
-
|
|
4285
|
-
|
|
4286
|
-
|
|
4287
|
-
responseText
|
|
4288
|
-
|
|
4289
|
-
|
|
4521
|
+
try {
|
|
4522
|
+
const { text: emailText, usage: emailUsage } = await generateBatchEmail(
|
|
4523
|
+
nextBatchFields,
|
|
4524
|
+
state.currentBatchIndex,
|
|
4525
|
+
state.batches.length,
|
|
4526
|
+
{
|
|
4527
|
+
appTitle: state.title,
|
|
4528
|
+
totalFieldCount: state.fields.length,
|
|
4529
|
+
filledFieldCount: filledCount,
|
|
4530
|
+
companyName: context?.companyName
|
|
4531
|
+
},
|
|
4532
|
+
generateText,
|
|
4533
|
+
providerOptions
|
|
4534
|
+
);
|
|
4535
|
+
trackUsage(emailUsage);
|
|
4536
|
+
if (!responseText) {
|
|
4537
|
+
responseText = emailText;
|
|
4538
|
+
} else {
|
|
4539
|
+
responseText += `
|
|
4290
4540
|
|
|
4291
4541
|
${emailText}`;
|
|
4542
|
+
}
|
|
4543
|
+
} catch (error) {
|
|
4544
|
+
await log?.(`Batch email generation failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
4292
4545
|
}
|
|
4293
4546
|
} else {
|
|
4294
4547
|
state.status = "confirming";
|
|
@@ -4497,7 +4750,7 @@ var EvidenceItemSchema = z32.object({
|
|
|
4497
4750
|
turnId: z32.string().optional(),
|
|
4498
4751
|
text: z32.string().describe("Text excerpt from the source"),
|
|
4499
4752
|
relevance: z32.number().min(0).max(1),
|
|
4500
|
-
metadata: z32.
|
|
4753
|
+
metadata: z32.array(z32.object({ key: z32.string(), value: z32.string() })).optional()
|
|
4501
4754
|
});
|
|
4502
4755
|
var RetrievalResultSchema = z32.object({
|
|
4503
4756
|
subQuestion: z32.string(),
|
|
@@ -4533,6 +4786,9 @@ var QueryResultSchema = z32.object({
|
|
|
4533
4786
|
});
|
|
4534
4787
|
|
|
4535
4788
|
// src/query/retriever.ts
|
|
4789
|
+
function recordToKVArray(record) {
|
|
4790
|
+
return Object.entries(record).map(([key, value]) => ({ key, value }));
|
|
4791
|
+
}
|
|
4536
4792
|
async function retrieve(subQuestion, conversationId, config) {
|
|
4537
4793
|
const { documentStore, memoryStore, retrievalLimit, log } = config;
|
|
4538
4794
|
const evidence = [];
|
|
@@ -4559,7 +4815,7 @@ async function retrieve(subQuestion, conversationId, config) {
|
|
|
4559
4815
|
text: chunk.text,
|
|
4560
4816
|
relevance: 0.8,
|
|
4561
4817
|
// Default — store doesn't expose scores directly
|
|
4562
|
-
metadata: chunk.metadata
|
|
4818
|
+
metadata: recordToKVArray(chunk.metadata)
|
|
4563
4819
|
});
|
|
4564
4820
|
}
|
|
4565
4821
|
}
|
|
@@ -4574,7 +4830,7 @@ async function retrieve(subQuestion, conversationId, config) {
|
|
|
4574
4830
|
documentId: chunk.documentId,
|
|
4575
4831
|
text: chunk.text,
|
|
4576
4832
|
relevance: 0.8,
|
|
4577
|
-
metadata: chunk.metadata
|
|
4833
|
+
metadata: recordToKVArray(chunk.metadata)
|
|
4578
4834
|
});
|
|
4579
4835
|
}
|
|
4580
4836
|
}
|
|
@@ -4602,11 +4858,11 @@ async function retrieve(subQuestion, conversationId, config) {
|
|
|
4602
4858
|
text: summary,
|
|
4603
4859
|
relevance: 0.9,
|
|
4604
4860
|
// Direct lookup is high relevance
|
|
4605
|
-
metadata:
|
|
4606
|
-
type: doc.type,
|
|
4607
|
-
carrier: doc.carrier ?? "",
|
|
4608
|
-
insuredName: doc.insuredName ?? ""
|
|
4609
|
-
|
|
4861
|
+
metadata: [
|
|
4862
|
+
{ key: "type", value: doc.type },
|
|
4863
|
+
{ key: "carrier", value: doc.carrier ?? "" },
|
|
4864
|
+
{ key: "insuredName", value: doc.insuredName ?? "" }
|
|
4865
|
+
]
|
|
4610
4866
|
});
|
|
4611
4867
|
}
|
|
4612
4868
|
} catch (e) {
|
|
@@ -4841,8 +5097,12 @@ function createQueryAgent(config) {
|
|
|
4841
5097
|
async function query(input) {
|
|
4842
5098
|
totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
4843
5099
|
const { question, conversationId, context } = input;
|
|
5100
|
+
const pipelineCtx = createPipelineContext({
|
|
5101
|
+
id: `query-${Date.now()}`
|
|
5102
|
+
});
|
|
4844
5103
|
onProgress?.("Classifying query...");
|
|
4845
5104
|
const classification = await classify(question, conversationId);
|
|
5105
|
+
await pipelineCtx.save("classify", { classification });
|
|
4846
5106
|
onProgress?.(`Retrieving evidence for ${classification.subQuestions.length} sub-question(s)...`);
|
|
4847
5107
|
const retrieverConfig = {
|
|
4848
5108
|
documentStore,
|
|
@@ -4856,9 +5116,10 @@ function createQueryAgent(config) {
|
|
|
4856
5116
|
)
|
|
4857
5117
|
);
|
|
4858
5118
|
const allEvidence = retrievalResults.flatMap((r) => r.evidence);
|
|
5119
|
+
await pipelineCtx.save("retrieve", { classification, evidence: allEvidence });
|
|
4859
5120
|
onProgress?.("Reasoning over evidence...");
|
|
4860
5121
|
const reasonerConfig = { generateObject, providerOptions };
|
|
4861
|
-
|
|
5122
|
+
const reasonResults = await Promise.allSettled(
|
|
4862
5123
|
classification.subQuestions.map(
|
|
4863
5124
|
(sq, i) => limit(async () => {
|
|
4864
5125
|
const { subAnswer, usage } = await reason(
|
|
@@ -4872,10 +5133,27 @@ function createQueryAgent(config) {
|
|
|
4872
5133
|
})
|
|
4873
5134
|
)
|
|
4874
5135
|
);
|
|
5136
|
+
let subAnswers = [];
|
|
5137
|
+
for (let i = 0; i < reasonResults.length; i++) {
|
|
5138
|
+
const result = reasonResults[i];
|
|
5139
|
+
if (result.status === "fulfilled") {
|
|
5140
|
+
subAnswers.push(result.value);
|
|
5141
|
+
} else {
|
|
5142
|
+
await log?.(`Reasoner failed for sub-question "${classification.subQuestions[i].question}": ${result.reason}`);
|
|
5143
|
+
subAnswers.push({
|
|
5144
|
+
subQuestion: classification.subQuestions[i].question,
|
|
5145
|
+
answer: "Unable to answer this part of the question due to a processing error.",
|
|
5146
|
+
citations: [],
|
|
5147
|
+
confidence: 0,
|
|
5148
|
+
needsMoreContext: true
|
|
5149
|
+
});
|
|
5150
|
+
}
|
|
5151
|
+
}
|
|
5152
|
+
await pipelineCtx.save("reason", { classification, evidence: allEvidence, subAnswers });
|
|
4875
5153
|
onProgress?.("Verifying answer grounding...");
|
|
4876
5154
|
const verifierConfig = { generateObject, providerOptions };
|
|
4877
5155
|
for (let round = 0; round < maxVerifyRounds; round++) {
|
|
4878
|
-
const { result: verifyResult, usage } = await
|
|
5156
|
+
const { result: verifyResult, usage } = await safeVerify(
|
|
4879
5157
|
question,
|
|
4880
5158
|
subAnswers,
|
|
4881
5159
|
allEvidence,
|
|
@@ -4899,7 +5177,6 @@ function createQueryAgent(config) {
|
|
|
4899
5177
|
() => retrieve(sq, conversationId, {
|
|
4900
5178
|
...retrieverConfig,
|
|
4901
5179
|
retrievalLimit: retrievalLimit * 2
|
|
4902
|
-
// Broader retrieval on retry
|
|
4903
5180
|
})
|
|
4904
5181
|
)
|
|
4905
5182
|
)
|
|
@@ -4907,7 +5184,7 @@ function createQueryAgent(config) {
|
|
|
4907
5184
|
for (const r of retryRetrievals) {
|
|
4908
5185
|
allEvidence.push(...r.evidence);
|
|
4909
5186
|
}
|
|
4910
|
-
const
|
|
5187
|
+
const retrySettled = await Promise.allSettled(
|
|
4911
5188
|
retryQuestions.map(
|
|
4912
5189
|
(sq, i) => limit(async () => {
|
|
4913
5190
|
const { subAnswer, usage: u } = await reason(
|
|
@@ -4921,6 +5198,7 @@ function createQueryAgent(config) {
|
|
|
4921
5198
|
})
|
|
4922
5199
|
)
|
|
4923
5200
|
);
|
|
5201
|
+
const retrySubAnswers = retrySettled.filter((r) => r.status === "fulfilled").map((r) => r.value);
|
|
4924
5202
|
const retryQSet = new Set(retryQuestions.map((sq) => sq.question));
|
|
4925
5203
|
subAnswers = subAnswers.map((sa) => {
|
|
4926
5204
|
if (retryQSet.has(sa.subQuestion)) {
|
|
@@ -4973,17 +5251,42 @@ function createQueryAgent(config) {
|
|
|
4973
5251
|
}
|
|
4974
5252
|
}
|
|
4975
5253
|
const prompt = buildQueryClassifyPrompt(question, conversationContext);
|
|
4976
|
-
const { object, usage } = await
|
|
4977
|
-
|
|
5254
|
+
const { object, usage } = await safeGenerateObject(
|
|
5255
|
+
generateObject,
|
|
5256
|
+
{
|
|
4978
5257
|
prompt,
|
|
4979
5258
|
schema: QueryClassifyResultSchema,
|
|
4980
5259
|
maxTokens: 2048,
|
|
4981
5260
|
providerOptions
|
|
4982
|
-
}
|
|
5261
|
+
},
|
|
5262
|
+
{
|
|
5263
|
+
fallback: {
|
|
5264
|
+
intent: "general_knowledge",
|
|
5265
|
+
subQuestions: [
|
|
5266
|
+
{
|
|
5267
|
+
question,
|
|
5268
|
+
intent: "general_knowledge"
|
|
5269
|
+
}
|
|
5270
|
+
],
|
|
5271
|
+
requiresDocumentLookup: true,
|
|
5272
|
+
requiresChunkSearch: true,
|
|
5273
|
+
requiresConversationHistory: !!conversationId
|
|
5274
|
+
},
|
|
5275
|
+
log,
|
|
5276
|
+
onError: (err, attempt) => log?.(`Query classify attempt ${attempt + 1} failed: ${err}`)
|
|
5277
|
+
}
|
|
4983
5278
|
);
|
|
4984
5279
|
trackUsage(usage);
|
|
4985
5280
|
return object;
|
|
4986
5281
|
}
|
|
5282
|
+
async function safeVerify(originalQuestion, subAnswers, allEvidence, verifierConfig) {
|
|
5283
|
+
try {
|
|
5284
|
+
return await verify(originalQuestion, subAnswers, allEvidence, verifierConfig);
|
|
5285
|
+
} catch (error) {
|
|
5286
|
+
await log?.(`Verification failed, approving by default: ${error instanceof Error ? error.message : String(error)}`);
|
|
5287
|
+
return { result: { approved: true, issues: [] } };
|
|
5288
|
+
}
|
|
5289
|
+
}
|
|
4987
5290
|
async function respond(originalQuestion, subAnswers, classification, platform) {
|
|
4988
5291
|
const subAnswersJson = JSON.stringify(
|
|
4989
5292
|
subAnswers.map((sa) => ({
|
|
@@ -4997,13 +5300,25 @@ function createQueryAgent(config) {
|
|
|
4997
5300
|
2
|
|
4998
5301
|
);
|
|
4999
5302
|
const prompt = buildRespondPrompt(originalQuestion, subAnswersJson, platform);
|
|
5000
|
-
const { object, usage } = await
|
|
5001
|
-
|
|
5303
|
+
const { object, usage } = await safeGenerateObject(
|
|
5304
|
+
generateObject,
|
|
5305
|
+
{
|
|
5002
5306
|
prompt,
|
|
5003
5307
|
schema: QueryResultSchema,
|
|
5004
5308
|
maxTokens: 4096,
|
|
5005
5309
|
providerOptions
|
|
5006
|
-
}
|
|
5310
|
+
},
|
|
5311
|
+
{
|
|
5312
|
+
fallback: {
|
|
5313
|
+
answer: subAnswers.map((sa) => `**${sa.subQuestion}**
|
|
5314
|
+
${sa.answer}`).join("\n\n"),
|
|
5315
|
+
citations: subAnswers.flatMap((sa) => sa.citations),
|
|
5316
|
+
intent: classification.intent,
|
|
5317
|
+
confidence: Math.min(...subAnswers.map((sa) => sa.confidence), 1)
|
|
5318
|
+
},
|
|
5319
|
+
log,
|
|
5320
|
+
onError: (err, attempt) => log?.(`Respond attempt ${attempt + 1} failed: ${err}`)
|
|
5321
|
+
}
|
|
5007
5322
|
);
|
|
5008
5323
|
trackUsage(usage);
|
|
5009
5324
|
const result = object;
|
|
@@ -5168,6 +5483,7 @@ export {
|
|
|
5168
5483
|
CommercialAutoDeclarationsSchema,
|
|
5169
5484
|
CommercialPropertyDeclarationsSchema,
|
|
5170
5485
|
CommunicationIntentSchema,
|
|
5486
|
+
ConditionKeyValueSchema,
|
|
5171
5487
|
ConditionTypeSchema,
|
|
5172
5488
|
ConstructionTypeSchema,
|
|
5173
5489
|
ContactSchema,
|
|
@@ -5334,6 +5650,7 @@ export {
|
|
|
5334
5650
|
chunkDocument,
|
|
5335
5651
|
createApplicationPipeline,
|
|
5336
5652
|
createExtractor,
|
|
5653
|
+
createPipelineContext,
|
|
5337
5654
|
createQueryAgent,
|
|
5338
5655
|
extractPageRange,
|
|
5339
5656
|
fillAcroForm,
|
|
@@ -5343,6 +5660,7 @@ export {
|
|
|
5343
5660
|
getTemplate,
|
|
5344
5661
|
overlayTextOnPdf,
|
|
5345
5662
|
pLimit,
|
|
5663
|
+
safeGenerateObject,
|
|
5346
5664
|
sanitizeNulls,
|
|
5347
5665
|
stripFences,
|
|
5348
5666
|
withRetry
|