@claritylabs/cl-sdk 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +372 -67
- package/dist/index.d.ts +372 -67
- package/dist/index.js +526 -219
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +523 -219
- package/dist/index.mjs.map +1 -1
- package/dist/storage-sqlite.d.mts +52 -10
- package/dist/storage-sqlite.d.ts +52 -10
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -71,6 +71,69 @@ function sanitizeNulls(obj) {
|
|
|
71
71
|
return obj;
|
|
72
72
|
}
|
|
73
73
|
|
|
74
|
+
// src/core/safe-generate.ts
|
|
75
|
+
async function safeGenerateObject(generateObject, params, options) {
|
|
76
|
+
const maxRetries = options?.maxRetries ?? 1;
|
|
77
|
+
let lastError;
|
|
78
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
79
|
+
try {
|
|
80
|
+
const result = await withRetry(
|
|
81
|
+
() => generateObject(params),
|
|
82
|
+
options?.log
|
|
83
|
+
);
|
|
84
|
+
return result;
|
|
85
|
+
} catch (error) {
|
|
86
|
+
lastError = error;
|
|
87
|
+
options?.onError?.(error, attempt);
|
|
88
|
+
await options?.log?.(
|
|
89
|
+
`safeGenerateObject attempt ${attempt + 1}/${maxRetries + 1} failed: ${error instanceof Error ? error.message : String(error)}`
|
|
90
|
+
);
|
|
91
|
+
if (attempt < maxRetries) {
|
|
92
|
+
await new Promise((resolve) => setTimeout(resolve, 1e3));
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
if (options?.fallback !== void 0) {
|
|
97
|
+
await options?.log?.(
|
|
98
|
+
`safeGenerateObject: all retries exhausted, returning fallback`
|
|
99
|
+
);
|
|
100
|
+
return { object: options.fallback };
|
|
101
|
+
}
|
|
102
|
+
throw lastError;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// src/core/pipeline.ts
|
|
106
|
+
function createPipelineContext(opts) {
|
|
107
|
+
let latest = opts.resumeFrom;
|
|
108
|
+
const completedPhases = /* @__PURE__ */ new Set();
|
|
109
|
+
if (opts.resumeFrom) {
|
|
110
|
+
completedPhases.add(opts.resumeFrom.phase);
|
|
111
|
+
}
|
|
112
|
+
return {
|
|
113
|
+
id: opts.id,
|
|
114
|
+
async save(phase, state) {
|
|
115
|
+
const checkpoint = {
|
|
116
|
+
phase,
|
|
117
|
+
state,
|
|
118
|
+
timestamp: Date.now()
|
|
119
|
+
};
|
|
120
|
+
latest = checkpoint;
|
|
121
|
+
completedPhases.add(phase);
|
|
122
|
+
await opts.onSave?.(checkpoint);
|
|
123
|
+
},
|
|
124
|
+
getCheckpoint() {
|
|
125
|
+
return latest;
|
|
126
|
+
},
|
|
127
|
+
isPhaseComplete(phase) {
|
|
128
|
+
return completedPhases.has(phase);
|
|
129
|
+
},
|
|
130
|
+
clear() {
|
|
131
|
+
latest = void 0;
|
|
132
|
+
completedPhases.clear();
|
|
133
|
+
}
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
|
|
74
137
|
// src/schemas/enums.ts
|
|
75
138
|
import { z } from "zod";
|
|
76
139
|
var PolicyTypeSchema = z.enum([
|
|
@@ -471,11 +534,15 @@ var ExclusionSchema = z5.object({
|
|
|
471
534
|
|
|
472
535
|
// src/schemas/condition.ts
|
|
473
536
|
import { z as z6 } from "zod";
|
|
537
|
+
var ConditionKeyValueSchema = z6.object({
|
|
538
|
+
key: z6.string(),
|
|
539
|
+
value: z6.string()
|
|
540
|
+
});
|
|
474
541
|
var PolicyConditionSchema = z6.object({
|
|
475
542
|
name: z6.string(),
|
|
476
543
|
conditionType: ConditionTypeSchema,
|
|
477
544
|
content: z6.string(),
|
|
478
|
-
keyValues: z6.
|
|
545
|
+
keyValues: z6.array(ConditionKeyValueSchema).optional(),
|
|
479
546
|
pageNumber: z6.number().optional()
|
|
480
547
|
});
|
|
481
548
|
|
|
@@ -1652,21 +1719,33 @@ async function formatDocumentContent(doc, generateText, options) {
|
|
|
1652
1719
|
for (let i = 0; i < entries.length; i += MAX_ENTRIES_PER_BATCH) {
|
|
1653
1720
|
batches.push(entries.slice(i, i + MAX_ENTRIES_PER_BATCH));
|
|
1654
1721
|
}
|
|
1655
|
-
for (
|
|
1656
|
-
const
|
|
1657
|
-
|
|
1658
|
-
() =>
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
|
|
1662
|
-
|
|
1663
|
-
|
|
1664
|
-
|
|
1665
|
-
|
|
1666
|
-
|
|
1722
|
+
for (let batchIdx = 0; batchIdx < batches.length; batchIdx++) {
|
|
1723
|
+
const batch = batches[batchIdx];
|
|
1724
|
+
try {
|
|
1725
|
+
const prompt = buildFormatPrompt(batch.map((e) => ({ id: e.id, text: e.text })));
|
|
1726
|
+
const result = await withRetry(
|
|
1727
|
+
() => generateText({
|
|
1728
|
+
prompt,
|
|
1729
|
+
maxTokens: 16384,
|
|
1730
|
+
providerOptions: options?.providerOptions
|
|
1731
|
+
})
|
|
1732
|
+
);
|
|
1733
|
+
if (result.usage) {
|
|
1734
|
+
totalUsage.inputTokens += result.usage.inputTokens;
|
|
1735
|
+
totalUsage.outputTokens += result.usage.outputTokens;
|
|
1736
|
+
}
|
|
1737
|
+
const formatted = parseFormatResponse(result.text);
|
|
1738
|
+
if (formatted.size < batch.length) {
|
|
1739
|
+
await options?.log?.(
|
|
1740
|
+
`Format batch ${batchIdx + 1}/${batches.length}: model returned ${formatted.size}/${batch.length} entries \u2014 unformatted entries will keep original content`
|
|
1741
|
+
);
|
|
1742
|
+
}
|
|
1743
|
+
applyFormattedContent(doc, batch, formatted);
|
|
1744
|
+
} catch (error) {
|
|
1745
|
+
await options?.log?.(
|
|
1746
|
+
`Format batch ${batchIdx + 1}/${batches.length} failed, keeping original content: ${error instanceof Error ? error.message : String(error)}`
|
|
1747
|
+
);
|
|
1667
1748
|
}
|
|
1668
|
-
const formatted = parseFormatResponse(result.text);
|
|
1669
|
-
applyFormattedContent(doc, batch, formatted);
|
|
1670
1749
|
}
|
|
1671
1750
|
return { document: doc, usage: totalUsage };
|
|
1672
1751
|
}
|
|
@@ -2507,9 +2586,13 @@ var ExtractionTaskSchema = z18.object({
|
|
|
2507
2586
|
endPage: z18.number(),
|
|
2508
2587
|
description: z18.string()
|
|
2509
2588
|
});
|
|
2589
|
+
var PageMapEntrySchema = z18.object({
|
|
2590
|
+
section: z18.string(),
|
|
2591
|
+
pages: z18.string()
|
|
2592
|
+
});
|
|
2510
2593
|
var ExtractionPlanSchema = z18.object({
|
|
2511
2594
|
tasks: z18.array(ExtractionTaskSchema),
|
|
2512
|
-
pageMap: z18.
|
|
2595
|
+
pageMap: z18.array(PageMapEntrySchema).optional()
|
|
2513
2596
|
});
|
|
2514
2597
|
function buildPlanPrompt(templateHints) {
|
|
2515
2598
|
return `You are planning the extraction of an insurance document. You have already classified this document. Now scan the full document and create a page map + extraction plan.
|
|
@@ -2538,7 +2621,10 @@ Return JSON:
|
|
|
2538
2621
|
{ "extractorName": "carrier_info", "startPage": 1, "endPage": 2, "description": "Extract carrier details from declarations page" },
|
|
2539
2622
|
...
|
|
2540
2623
|
],
|
|
2541
|
-
"pageMap":
|
|
2624
|
+
"pageMap": [
|
|
2625
|
+
{ "section": "declarations", "pages": "pages 1-3" },
|
|
2626
|
+
{ "section": "endorsements", "pages": "pages 15-22" }
|
|
2627
|
+
]
|
|
2542
2628
|
}
|
|
2543
2629
|
|
|
2544
2630
|
Create tasks that cover the entire document. Prefer specific extractors over generic "sections" where possible. Keep page ranges tight \u2014 only include pages relevant to each extractor.
|
|
@@ -3031,7 +3117,8 @@ function createExtractor(config) {
|
|
|
3031
3117
|
onTokenUsage,
|
|
3032
3118
|
onProgress,
|
|
3033
3119
|
log,
|
|
3034
|
-
providerOptions
|
|
3120
|
+
providerOptions,
|
|
3121
|
+
onCheckpointSave
|
|
3035
3122
|
} = config;
|
|
3036
3123
|
const limit = pLimit(concurrency);
|
|
3037
3124
|
let totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
@@ -3042,100 +3129,106 @@ function createExtractor(config) {
|
|
|
3042
3129
|
onTokenUsage?.(usage);
|
|
3043
3130
|
}
|
|
3044
3131
|
}
|
|
3045
|
-
async function extract(pdfBase64, documentId) {
|
|
3132
|
+
async function extract(pdfBase64, documentId, options) {
|
|
3046
3133
|
const id = documentId ?? `doc-${Date.now()}`;
|
|
3047
3134
|
const memory = /* @__PURE__ */ new Map();
|
|
3048
3135
|
totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
3049
|
-
|
|
3050
|
-
|
|
3051
|
-
|
|
3052
|
-
|
|
3053
|
-
|
|
3054
|
-
|
|
3055
|
-
|
|
3056
|
-
|
|
3057
|
-
|
|
3058
|
-
);
|
|
3059
|
-
trackUsage(classifyResult.usage);
|
|
3060
|
-
memory.set("classify", classifyResult.object);
|
|
3061
|
-
const { documentType, policyTypes } = classifyResult.object;
|
|
3062
|
-
const primaryType = policyTypes[0] ?? "other";
|
|
3063
|
-
const template = getTemplate(primaryType);
|
|
3064
|
-
onProgress?.(`Planning extraction for ${primaryType} ${documentType}...`);
|
|
3065
|
-
const templateHints = [
|
|
3066
|
-
`Document type: ${primaryType} ${documentType}`,
|
|
3067
|
-
`Expected sections: ${template.expectedSections.join(", ")}`,
|
|
3068
|
-
`Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
|
|
3069
|
-
`Total pages: ${pageCount}`
|
|
3070
|
-
].join("\n");
|
|
3071
|
-
const planResult = await withRetry(
|
|
3072
|
-
() => generateObject({
|
|
3073
|
-
prompt: buildPlanPrompt(templateHints),
|
|
3074
|
-
schema: ExtractionPlanSchema,
|
|
3075
|
-
maxTokens: 2048,
|
|
3076
|
-
providerOptions
|
|
3077
|
-
})
|
|
3078
|
-
);
|
|
3079
|
-
trackUsage(planResult.usage);
|
|
3080
|
-
const tasks = planResult.object.tasks;
|
|
3081
|
-
onProgress?.(`Dispatching ${tasks.length} extractors...`);
|
|
3082
|
-
const extractorResults = await Promise.all(
|
|
3083
|
-
tasks.map(
|
|
3084
|
-
(task) => limit(async () => {
|
|
3085
|
-
const ext = getExtractor(task.extractorName);
|
|
3086
|
-
if (!ext) {
|
|
3087
|
-
await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
|
|
3088
|
-
return null;
|
|
3089
|
-
}
|
|
3090
|
-
onProgress?.(`Extracting ${task.extractorName} (pages ${task.startPage}-${task.endPage})...`);
|
|
3091
|
-
try {
|
|
3092
|
-
const result = await runExtractor({
|
|
3093
|
-
name: task.extractorName,
|
|
3094
|
-
prompt: ext.buildPrompt(),
|
|
3095
|
-
schema: ext.schema,
|
|
3096
|
-
pdfBase64,
|
|
3097
|
-
startPage: task.startPage,
|
|
3098
|
-
endPage: task.endPage,
|
|
3099
|
-
generateObject,
|
|
3100
|
-
convertPdfToImages,
|
|
3101
|
-
maxTokens: ext.maxTokens ?? 4096,
|
|
3102
|
-
providerOptions
|
|
3103
|
-
});
|
|
3104
|
-
trackUsage(result.usage);
|
|
3105
|
-
return result;
|
|
3106
|
-
} catch (error) {
|
|
3107
|
-
await log?.(`Extractor ${task.extractorName} failed: ${error}`);
|
|
3108
|
-
return null;
|
|
3109
|
-
}
|
|
3110
|
-
})
|
|
3111
|
-
)
|
|
3112
|
-
);
|
|
3113
|
-
for (const result of extractorResults) {
|
|
3114
|
-
if (result) {
|
|
3115
|
-
memory.set(result.name, result.data);
|
|
3136
|
+
const pipelineCtx = createPipelineContext({
|
|
3137
|
+
id,
|
|
3138
|
+
onSave: onCheckpointSave,
|
|
3139
|
+
resumeFrom: options?.resumeFrom
|
|
3140
|
+
});
|
|
3141
|
+
const resumed = pipelineCtx.getCheckpoint()?.state;
|
|
3142
|
+
if (resumed?.memory) {
|
|
3143
|
+
for (const [k, v] of Object.entries(resumed.memory)) {
|
|
3144
|
+
memory.set(k, v);
|
|
3116
3145
|
}
|
|
3117
3146
|
}
|
|
3118
|
-
|
|
3119
|
-
|
|
3120
|
-
|
|
3121
|
-
|
|
3122
|
-
|
|
3123
|
-
|
|
3124
|
-
|
|
3147
|
+
let classifyResult;
|
|
3148
|
+
if (resumed?.classifyResult && pipelineCtx.isPhaseComplete("classify")) {
|
|
3149
|
+
classifyResult = resumed.classifyResult;
|
|
3150
|
+
onProgress?.("Resuming from checkpoint (classify complete)...");
|
|
3151
|
+
} else {
|
|
3152
|
+
onProgress?.("Classifying document...");
|
|
3153
|
+
const pageCount2 = await getPdfPageCount(pdfBase64);
|
|
3154
|
+
const classifyResponse = await safeGenerateObject(
|
|
3155
|
+
generateObject,
|
|
3156
|
+
{
|
|
3157
|
+
prompt: buildClassifyPrompt(),
|
|
3158
|
+
schema: ClassifyResultSchema,
|
|
3159
|
+
maxTokens: 512,
|
|
3125
3160
|
providerOptions
|
|
3126
|
-
}
|
|
3161
|
+
},
|
|
3162
|
+
{
|
|
3163
|
+
fallback: { documentType: "policy", policyTypes: ["other"], confidence: 0 },
|
|
3164
|
+
log,
|
|
3165
|
+
onError: (err, attempt) => log?.(`Classify attempt ${attempt + 1} failed: ${err}`)
|
|
3166
|
+
}
|
|
3127
3167
|
);
|
|
3128
|
-
trackUsage(
|
|
3129
|
-
|
|
3130
|
-
|
|
3131
|
-
|
|
3132
|
-
|
|
3133
|
-
|
|
3134
|
-
|
|
3135
|
-
|
|
3168
|
+
trackUsage(classifyResponse.usage);
|
|
3169
|
+
classifyResult = classifyResponse.object;
|
|
3170
|
+
memory.set("classify", classifyResult);
|
|
3171
|
+
await pipelineCtx.save("classify", {
|
|
3172
|
+
id,
|
|
3173
|
+
pageCount: pageCount2,
|
|
3174
|
+
classifyResult,
|
|
3175
|
+
memory: Object.fromEntries(memory)
|
|
3176
|
+
});
|
|
3177
|
+
}
|
|
3178
|
+
const { documentType, policyTypes } = classifyResult;
|
|
3179
|
+
const primaryType = policyTypes[0] ?? "other";
|
|
3180
|
+
const template = getTemplate(primaryType);
|
|
3181
|
+
const pageCount = resumed?.pageCount ?? await getPdfPageCount(pdfBase64);
|
|
3182
|
+
let plan;
|
|
3183
|
+
if (resumed?.plan && pipelineCtx.isPhaseComplete("plan")) {
|
|
3184
|
+
plan = resumed.plan;
|
|
3185
|
+
onProgress?.("Resuming from checkpoint (plan complete)...");
|
|
3186
|
+
} else {
|
|
3187
|
+
onProgress?.(`Planning extraction for ${primaryType} ${documentType}...`);
|
|
3188
|
+
const templateHints = [
|
|
3189
|
+
`Document type: ${primaryType} ${documentType}`,
|
|
3190
|
+
`Expected sections: ${template.expectedSections.join(", ")}`,
|
|
3191
|
+
`Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
|
|
3192
|
+
`Total pages: ${pageCount}`
|
|
3193
|
+
].join("\n");
|
|
3194
|
+
const planResponse = await safeGenerateObject(
|
|
3195
|
+
generateObject,
|
|
3196
|
+
{
|
|
3197
|
+
prompt: buildPlanPrompt(templateHints),
|
|
3198
|
+
schema: ExtractionPlanSchema,
|
|
3199
|
+
maxTokens: 2048,
|
|
3200
|
+
providerOptions
|
|
3201
|
+
},
|
|
3202
|
+
{
|
|
3203
|
+
fallback: {
|
|
3204
|
+
tasks: [{ extractorName: "sections", startPage: 1, endPage: pageCount, description: "Full document fallback extraction" }]
|
|
3205
|
+
},
|
|
3206
|
+
log,
|
|
3207
|
+
onError: (err, attempt) => log?.(`Plan attempt ${attempt + 1} failed: ${err}`)
|
|
3208
|
+
}
|
|
3209
|
+
);
|
|
3210
|
+
trackUsage(planResponse.usage);
|
|
3211
|
+
plan = planResponse.object;
|
|
3212
|
+
await pipelineCtx.save("plan", {
|
|
3213
|
+
id,
|
|
3214
|
+
pageCount,
|
|
3215
|
+
classifyResult,
|
|
3216
|
+
plan,
|
|
3217
|
+
memory: Object.fromEntries(memory)
|
|
3218
|
+
});
|
|
3219
|
+
}
|
|
3220
|
+
if (!pipelineCtx.isPhaseComplete("extract")) {
|
|
3221
|
+
const tasks = plan.tasks;
|
|
3222
|
+
onProgress?.(`Dispatching ${tasks.length} extractors...`);
|
|
3223
|
+
const extractorResults = await Promise.all(
|
|
3224
|
+
tasks.map(
|
|
3136
3225
|
(task) => limit(async () => {
|
|
3137
3226
|
const ext = getExtractor(task.extractorName);
|
|
3138
|
-
if (!ext)
|
|
3227
|
+
if (!ext) {
|
|
3228
|
+
await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
|
|
3229
|
+
return null;
|
|
3230
|
+
}
|
|
3231
|
+
onProgress?.(`Extracting ${task.extractorName} (pages ${task.startPage}-${task.endPage})...`);
|
|
3139
3232
|
try {
|
|
3140
3233
|
const result = await runExtractor({
|
|
3141
3234
|
name: task.extractorName,
|
|
@@ -3152,28 +3245,114 @@ function createExtractor(config) {
|
|
|
3152
3245
|
trackUsage(result.usage);
|
|
3153
3246
|
return result;
|
|
3154
3247
|
} catch (error) {
|
|
3155
|
-
await log?.(`
|
|
3248
|
+
await log?.(`Extractor ${task.extractorName} failed: ${error}`);
|
|
3156
3249
|
return null;
|
|
3157
3250
|
}
|
|
3158
3251
|
})
|
|
3159
3252
|
)
|
|
3160
3253
|
);
|
|
3161
|
-
for (const result of
|
|
3254
|
+
for (const result of extractorResults) {
|
|
3162
3255
|
if (result) {
|
|
3163
3256
|
memory.set(result.name, result.data);
|
|
3164
3257
|
}
|
|
3165
3258
|
}
|
|
3259
|
+
await pipelineCtx.save("extract", {
|
|
3260
|
+
id,
|
|
3261
|
+
pageCount,
|
|
3262
|
+
classifyResult,
|
|
3263
|
+
plan,
|
|
3264
|
+
memory: Object.fromEntries(memory)
|
|
3265
|
+
});
|
|
3266
|
+
}
|
|
3267
|
+
if (!pipelineCtx.isPhaseComplete("review")) {
|
|
3268
|
+
for (let round = 0; round < maxReviewRounds; round++) {
|
|
3269
|
+
const extractedKeys = [...memory.keys()].filter((k) => k !== "classify");
|
|
3270
|
+
const reviewResponse = await safeGenerateObject(
|
|
3271
|
+
generateObject,
|
|
3272
|
+
{
|
|
3273
|
+
prompt: buildReviewPrompt(template.required, extractedKeys),
|
|
3274
|
+
schema: ReviewResultSchema,
|
|
3275
|
+
maxTokens: 1024,
|
|
3276
|
+
providerOptions
|
|
3277
|
+
},
|
|
3278
|
+
{
|
|
3279
|
+
fallback: { complete: true, missingFields: [], additionalTasks: [] },
|
|
3280
|
+
log,
|
|
3281
|
+
onError: (err, attempt) => log?.(`Review round ${round + 1} attempt ${attempt + 1} failed: ${err}`)
|
|
3282
|
+
}
|
|
3283
|
+
);
|
|
3284
|
+
trackUsage(reviewResponse.usage);
|
|
3285
|
+
if (reviewResponse.object.complete || reviewResponse.object.additionalTasks.length === 0) {
|
|
3286
|
+
onProgress?.("Extraction complete.");
|
|
3287
|
+
break;
|
|
3288
|
+
}
|
|
3289
|
+
onProgress?.(`Review round ${round + 1}: dispatching ${reviewResponse.object.additionalTasks.length} follow-up extractors...`);
|
|
3290
|
+
const followUpResults = await Promise.all(
|
|
3291
|
+
reviewResponse.object.additionalTasks.map(
|
|
3292
|
+
(task) => limit(async () => {
|
|
3293
|
+
const ext = getExtractor(task.extractorName);
|
|
3294
|
+
if (!ext) return null;
|
|
3295
|
+
try {
|
|
3296
|
+
const result = await runExtractor({
|
|
3297
|
+
name: task.extractorName,
|
|
3298
|
+
prompt: ext.buildPrompt(),
|
|
3299
|
+
schema: ext.schema,
|
|
3300
|
+
pdfBase64,
|
|
3301
|
+
startPage: task.startPage,
|
|
3302
|
+
endPage: task.endPage,
|
|
3303
|
+
generateObject,
|
|
3304
|
+
convertPdfToImages,
|
|
3305
|
+
maxTokens: ext.maxTokens ?? 4096,
|
|
3306
|
+
providerOptions
|
|
3307
|
+
});
|
|
3308
|
+
trackUsage(result.usage);
|
|
3309
|
+
return result;
|
|
3310
|
+
} catch (error) {
|
|
3311
|
+
await log?.(`Follow-up extractor ${task.extractorName} failed: ${error}`);
|
|
3312
|
+
return null;
|
|
3313
|
+
}
|
|
3314
|
+
})
|
|
3315
|
+
)
|
|
3316
|
+
);
|
|
3317
|
+
for (const result of followUpResults) {
|
|
3318
|
+
if (result) {
|
|
3319
|
+
memory.set(result.name, result.data);
|
|
3320
|
+
}
|
|
3321
|
+
}
|
|
3322
|
+
}
|
|
3323
|
+
await pipelineCtx.save("review", {
|
|
3324
|
+
id,
|
|
3325
|
+
pageCount,
|
|
3326
|
+
classifyResult,
|
|
3327
|
+
plan,
|
|
3328
|
+
memory: Object.fromEntries(memory)
|
|
3329
|
+
});
|
|
3166
3330
|
}
|
|
3167
3331
|
onProgress?.("Assembling document...");
|
|
3168
3332
|
const document = assembleDocument(id, documentType, memory);
|
|
3333
|
+
await pipelineCtx.save("assemble", {
|
|
3334
|
+
id,
|
|
3335
|
+
pageCount,
|
|
3336
|
+
classifyResult,
|
|
3337
|
+
plan,
|
|
3338
|
+
memory: Object.fromEntries(memory),
|
|
3339
|
+
document
|
|
3340
|
+
});
|
|
3169
3341
|
onProgress?.("Formatting extracted content...");
|
|
3170
3342
|
const formatResult = await formatDocumentContent(document, generateText, {
|
|
3171
3343
|
providerOptions,
|
|
3172
|
-
onProgress
|
|
3344
|
+
onProgress,
|
|
3345
|
+
log
|
|
3173
3346
|
});
|
|
3174
3347
|
trackUsage(formatResult.usage);
|
|
3175
3348
|
const chunks = chunkDocument(formatResult.document);
|
|
3176
|
-
|
|
3349
|
+
const finalCheckpoint = pipelineCtx.getCheckpoint();
|
|
3350
|
+
return {
|
|
3351
|
+
document: formatResult.document,
|
|
3352
|
+
chunks,
|
|
3353
|
+
tokenUsage: totalUsage,
|
|
3354
|
+
checkpoint: finalCheckpoint
|
|
3355
|
+
};
|
|
3177
3356
|
}
|
|
3178
3357
|
return { extract };
|
|
3179
3358
|
}
|
|
@@ -4036,7 +4215,6 @@ function createApplicationPipeline(config) {
|
|
|
4036
4215
|
let state = {
|
|
4037
4216
|
id,
|
|
4038
4217
|
pdfBase64: void 0,
|
|
4039
|
-
// Don't persist the full PDF in state
|
|
4040
4218
|
title: void 0,
|
|
4041
4219
|
applicationType: null,
|
|
4042
4220
|
fields: [],
|
|
@@ -4047,13 +4225,20 @@ function createApplicationPipeline(config) {
|
|
|
4047
4225
|
updatedAt: now
|
|
4048
4226
|
};
|
|
4049
4227
|
onProgress?.("Classifying document...");
|
|
4050
|
-
|
|
4051
|
-
|
|
4052
|
-
|
|
4053
|
-
|
|
4054
|
-
|
|
4055
|
-
|
|
4056
|
-
|
|
4228
|
+
await applicationStore?.save(state);
|
|
4229
|
+
let classifyResult;
|
|
4230
|
+
try {
|
|
4231
|
+
const { result, usage: classifyUsage } = await classifyApplication(
|
|
4232
|
+
pdfBase64.slice(0, 2e3),
|
|
4233
|
+
generateObject,
|
|
4234
|
+
providerOptions
|
|
4235
|
+
);
|
|
4236
|
+
trackUsage(classifyUsage);
|
|
4237
|
+
classifyResult = result;
|
|
4238
|
+
} catch (error) {
|
|
4239
|
+
await log?.(`Classification failed, treating as non-application: ${error instanceof Error ? error.message : String(error)}`);
|
|
4240
|
+
classifyResult = { isApplication: false, confidence: 0, applicationType: null };
|
|
4241
|
+
}
|
|
4057
4242
|
if (!classifyResult.isApplication) {
|
|
4058
4243
|
state.status = "complete";
|
|
4059
4244
|
state.updatedAt = Date.now();
|
|
@@ -4063,13 +4248,28 @@ function createApplicationPipeline(config) {
|
|
|
4063
4248
|
state.applicationType = classifyResult.applicationType;
|
|
4064
4249
|
state.status = "extracting";
|
|
4065
4250
|
state.updatedAt = Date.now();
|
|
4251
|
+
await applicationStore?.save(state);
|
|
4066
4252
|
onProgress?.("Extracting form fields...");
|
|
4067
|
-
|
|
4068
|
-
|
|
4069
|
-
|
|
4070
|
-
|
|
4071
|
-
|
|
4072
|
-
|
|
4253
|
+
let fields;
|
|
4254
|
+
try {
|
|
4255
|
+
const { fields: extractedFields, usage: extractUsage } = await extractFields(
|
|
4256
|
+
pdfBase64,
|
|
4257
|
+
generateObject,
|
|
4258
|
+
providerOptions
|
|
4259
|
+
);
|
|
4260
|
+
trackUsage(extractUsage);
|
|
4261
|
+
fields = extractedFields;
|
|
4262
|
+
} catch (error) {
|
|
4263
|
+
await log?.(`Field extraction failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
4264
|
+
fields = [];
|
|
4265
|
+
}
|
|
4266
|
+
if (fields.length === 0) {
|
|
4267
|
+
await log?.("No fields extracted, completing pipeline with empty result");
|
|
4268
|
+
state.status = "complete";
|
|
4269
|
+
state.updatedAt = Date.now();
|
|
4270
|
+
await applicationStore?.save(state);
|
|
4271
|
+
return { state, tokenUsage: totalUsage };
|
|
4272
|
+
}
|
|
4073
4273
|
state.fields = fields;
|
|
4074
4274
|
state.title = classifyResult.applicationType ?? void 0;
|
|
4075
4275
|
state.status = "auto_filling";
|
|
@@ -4101,20 +4301,24 @@ function createApplicationPipeline(config) {
|
|
|
4101
4301
|
limit(async () => {
|
|
4102
4302
|
const unfilledFields2 = state.fields.filter((f) => !f.value);
|
|
4103
4303
|
if (unfilledFields2.length === 0) return;
|
|
4104
|
-
|
|
4105
|
-
|
|
4106
|
-
|
|
4107
|
-
|
|
4108
|
-
|
|
4109
|
-
|
|
4110
|
-
|
|
4111
|
-
|
|
4112
|
-
const
|
|
4113
|
-
|
|
4114
|
-
field
|
|
4115
|
-
|
|
4116
|
-
|
|
4304
|
+
try {
|
|
4305
|
+
const { result: autoFillResult, usage: afUsage } = await autoFillFromContext(
|
|
4306
|
+
unfilledFields2,
|
|
4307
|
+
orgContext,
|
|
4308
|
+
generateObject,
|
|
4309
|
+
providerOptions
|
|
4310
|
+
);
|
|
4311
|
+
trackUsage(afUsage);
|
|
4312
|
+
for (const match of autoFillResult.matches) {
|
|
4313
|
+
const field = state.fields.find((f) => f.id === match.fieldId);
|
|
4314
|
+
if (field && !field.value) {
|
|
4315
|
+
field.value = match.value;
|
|
4316
|
+
field.source = `auto-fill: ${match.contextKey}`;
|
|
4317
|
+
field.confidence = match.confidence;
|
|
4318
|
+
}
|
|
4117
4319
|
}
|
|
4320
|
+
} catch (e) {
|
|
4321
|
+
await log?.(`Auto-fill from context failed: ${e instanceof Error ? e.message : String(e)}`);
|
|
4118
4322
|
}
|
|
4119
4323
|
})
|
|
4120
4324
|
);
|
|
@@ -4147,13 +4351,18 @@ function createApplicationPipeline(config) {
|
|
|
4147
4351
|
if (unfilledFields.length > 0) {
|
|
4148
4352
|
onProgress?.(`Batching ${unfilledFields.length} remaining questions...`);
|
|
4149
4353
|
state.status = "batching";
|
|
4150
|
-
|
|
4151
|
-
|
|
4152
|
-
|
|
4153
|
-
|
|
4154
|
-
|
|
4155
|
-
|
|
4156
|
-
|
|
4354
|
+
try {
|
|
4355
|
+
const { result: batchResult, usage: batchUsage } = await batchQuestions(
|
|
4356
|
+
unfilledFields,
|
|
4357
|
+
generateObject,
|
|
4358
|
+
providerOptions
|
|
4359
|
+
);
|
|
4360
|
+
trackUsage(batchUsage);
|
|
4361
|
+
state.batches = batchResult.batches;
|
|
4362
|
+
} catch (error) {
|
|
4363
|
+
await log?.(`Batching failed, using single-batch fallback: ${error instanceof Error ? error.message : String(error)}`);
|
|
4364
|
+
state.batches = [unfilledFields.map((f) => f.id)];
|
|
4365
|
+
}
|
|
4157
4366
|
state.currentBatchIndex = 0;
|
|
4158
4367
|
state.status = "collecting";
|
|
4159
4368
|
} else {
|
|
@@ -4180,32 +4389,49 @@ function createApplicationPipeline(config) {
|
|
|
4180
4389
|
(f) => currentBatchFieldIds.includes(f.id)
|
|
4181
4390
|
);
|
|
4182
4391
|
onProgress?.("Classifying reply...");
|
|
4183
|
-
|
|
4184
|
-
|
|
4185
|
-
|
|
4186
|
-
generateObject,
|
|
4187
|
-
providerOptions
|
|
4188
|
-
);
|
|
4189
|
-
trackUsage(intentUsage);
|
|
4190
|
-
let fieldsFilled = 0;
|
|
4191
|
-
let responseText;
|
|
4192
|
-
if (intent.hasAnswers) {
|
|
4193
|
-
onProgress?.("Parsing answers...");
|
|
4194
|
-
const { result: parseResult, usage: parseUsage } = await parseAnswers(
|
|
4392
|
+
let intent;
|
|
4393
|
+
try {
|
|
4394
|
+
const { intent: classifiedIntent, usage: intentUsage } = await classifyReplyIntent(
|
|
4195
4395
|
currentBatchFields,
|
|
4196
4396
|
replyText,
|
|
4197
4397
|
generateObject,
|
|
4198
4398
|
providerOptions
|
|
4199
4399
|
);
|
|
4200
|
-
trackUsage(
|
|
4201
|
-
|
|
4202
|
-
|
|
4203
|
-
|
|
4204
|
-
|
|
4205
|
-
|
|
4206
|
-
|
|
4207
|
-
|
|
4400
|
+
trackUsage(intentUsage);
|
|
4401
|
+
intent = classifiedIntent;
|
|
4402
|
+
} catch (error) {
|
|
4403
|
+
await log?.(`Reply intent classification failed, defaulting to answers_only: ${error instanceof Error ? error.message : String(error)}`);
|
|
4404
|
+
intent = {
|
|
4405
|
+
primaryIntent: "answers_only",
|
|
4406
|
+
hasAnswers: true,
|
|
4407
|
+
questionText: void 0,
|
|
4408
|
+
questionFieldIds: void 0,
|
|
4409
|
+
lookupRequests: void 0
|
|
4410
|
+
};
|
|
4411
|
+
}
|
|
4412
|
+
let fieldsFilled = 0;
|
|
4413
|
+
let responseText;
|
|
4414
|
+
if (intent.hasAnswers) {
|
|
4415
|
+
onProgress?.("Parsing answers...");
|
|
4416
|
+
try {
|
|
4417
|
+
const { result: parseResult, usage: parseUsage } = await parseAnswers(
|
|
4418
|
+
currentBatchFields,
|
|
4419
|
+
replyText,
|
|
4420
|
+
generateObject,
|
|
4421
|
+
providerOptions
|
|
4422
|
+
);
|
|
4423
|
+
trackUsage(parseUsage);
|
|
4424
|
+
for (const answer of parseResult.answers) {
|
|
4425
|
+
const field = state.fields.find((f) => f.id === answer.fieldId);
|
|
4426
|
+
if (field) {
|
|
4427
|
+
field.value = answer.value;
|
|
4428
|
+
field.source = "user";
|
|
4429
|
+
field.confidence = "confirmed";
|
|
4430
|
+
fieldsFilled++;
|
|
4431
|
+
}
|
|
4208
4432
|
}
|
|
4433
|
+
} catch (error) {
|
|
4434
|
+
await log?.(`Answer parsing failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
4209
4435
|
}
|
|
4210
4436
|
}
|
|
4211
4437
|
if (intent.lookupRequests?.length) {
|
|
@@ -4226,36 +4452,45 @@ function createApplicationPipeline(config) {
|
|
|
4226
4452
|
const targetFields = state.fields.filter(
|
|
4227
4453
|
(f) => intent.lookupRequests.some((lr) => lr.targetFieldIds.includes(f.id))
|
|
4228
4454
|
);
|
|
4229
|
-
|
|
4230
|
-
|
|
4231
|
-
|
|
4232
|
-
|
|
4233
|
-
|
|
4234
|
-
|
|
4235
|
-
|
|
4236
|
-
|
|
4237
|
-
|
|
4238
|
-
const
|
|
4239
|
-
|
|
4240
|
-
field
|
|
4241
|
-
|
|
4242
|
-
|
|
4243
|
-
|
|
4455
|
+
try {
|
|
4456
|
+
const { result: lookupResult, usage: lookupUsage } = await fillFromLookup(
|
|
4457
|
+
intent.lookupRequests,
|
|
4458
|
+
targetFields,
|
|
4459
|
+
availableData,
|
|
4460
|
+
generateObject,
|
|
4461
|
+
providerOptions
|
|
4462
|
+
);
|
|
4463
|
+
trackUsage(lookupUsage);
|
|
4464
|
+
for (const fill of lookupResult.fills) {
|
|
4465
|
+
const field = state.fields.find((f) => f.id === fill.fieldId);
|
|
4466
|
+
if (field) {
|
|
4467
|
+
field.value = fill.value;
|
|
4468
|
+
field.source = `lookup: ${fill.source}`;
|
|
4469
|
+
field.confidence = "high";
|
|
4470
|
+
fieldsFilled++;
|
|
4471
|
+
}
|
|
4244
4472
|
}
|
|
4473
|
+
} catch (error) {
|
|
4474
|
+
await log?.(`Lookup fill failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
4245
4475
|
}
|
|
4246
4476
|
}
|
|
4247
4477
|
}
|
|
4248
4478
|
if (intent.primaryIntent === "question" || intent.primaryIntent === "mixed") {
|
|
4249
4479
|
if (intent.questionText) {
|
|
4250
|
-
|
|
4251
|
-
|
|
4480
|
+
try {
|
|
4481
|
+
const { text, usage } = await generateText({
|
|
4482
|
+
prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
|
|
4252
4483
|
|
|
4253
4484
|
Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with the answer when you're ready and I'll fill it in."`,
|
|
4254
|
-
|
|
4255
|
-
|
|
4256
|
-
|
|
4257
|
-
|
|
4258
|
-
|
|
4485
|
+
maxTokens: 512,
|
|
4486
|
+
providerOptions
|
|
4487
|
+
});
|
|
4488
|
+
trackUsage(usage);
|
|
4489
|
+
responseText = text;
|
|
4490
|
+
} catch (error) {
|
|
4491
|
+
await log?.(`Question response generation failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
4492
|
+
responseText = `I wasn't able to generate an explanation for your question. Could you rephrase it, or just provide the answer directly?`;
|
|
4493
|
+
}
|
|
4259
4494
|
}
|
|
4260
4495
|
}
|
|
4261
4496
|
const currentBatchComplete = currentBatchFieldIds.every(
|
|
@@ -4269,26 +4504,30 @@ Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with
|
|
|
4269
4504
|
(f) => nextBatchFieldIds.includes(f.id)
|
|
4270
4505
|
);
|
|
4271
4506
|
const filledCount = state.fields.filter((f) => f.value).length;
|
|
4272
|
-
|
|
4273
|
-
|
|
4274
|
-
|
|
4275
|
-
|
|
4276
|
-
|
|
4277
|
-
|
|
4278
|
-
|
|
4279
|
-
|
|
4280
|
-
|
|
4281
|
-
|
|
4282
|
-
|
|
4283
|
-
|
|
4284
|
-
|
|
4285
|
-
|
|
4286
|
-
|
|
4287
|
-
responseText
|
|
4288
|
-
|
|
4289
|
-
|
|
4507
|
+
try {
|
|
4508
|
+
const { text: emailText, usage: emailUsage } = await generateBatchEmail(
|
|
4509
|
+
nextBatchFields,
|
|
4510
|
+
state.currentBatchIndex,
|
|
4511
|
+
state.batches.length,
|
|
4512
|
+
{
|
|
4513
|
+
appTitle: state.title,
|
|
4514
|
+
totalFieldCount: state.fields.length,
|
|
4515
|
+
filledFieldCount: filledCount,
|
|
4516
|
+
companyName: context?.companyName
|
|
4517
|
+
},
|
|
4518
|
+
generateText,
|
|
4519
|
+
providerOptions
|
|
4520
|
+
);
|
|
4521
|
+
trackUsage(emailUsage);
|
|
4522
|
+
if (!responseText) {
|
|
4523
|
+
responseText = emailText;
|
|
4524
|
+
} else {
|
|
4525
|
+
responseText += `
|
|
4290
4526
|
|
|
4291
4527
|
${emailText}`;
|
|
4528
|
+
}
|
|
4529
|
+
} catch (error) {
|
|
4530
|
+
await log?.(`Batch email generation failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
4292
4531
|
}
|
|
4293
4532
|
} else {
|
|
4294
4533
|
state.status = "confirming";
|
|
@@ -4497,7 +4736,7 @@ var EvidenceItemSchema = z32.object({
|
|
|
4497
4736
|
turnId: z32.string().optional(),
|
|
4498
4737
|
text: z32.string().describe("Text excerpt from the source"),
|
|
4499
4738
|
relevance: z32.number().min(0).max(1),
|
|
4500
|
-
metadata: z32.
|
|
4739
|
+
metadata: z32.array(z32.object({ key: z32.string(), value: z32.string() })).optional()
|
|
4501
4740
|
});
|
|
4502
4741
|
var RetrievalResultSchema = z32.object({
|
|
4503
4742
|
subQuestion: z32.string(),
|
|
@@ -4533,6 +4772,9 @@ var QueryResultSchema = z32.object({
|
|
|
4533
4772
|
});
|
|
4534
4773
|
|
|
4535
4774
|
// src/query/retriever.ts
|
|
4775
|
+
function recordToKVArray(record) {
|
|
4776
|
+
return Object.entries(record).map(([key, value]) => ({ key, value }));
|
|
4777
|
+
}
|
|
4536
4778
|
async function retrieve(subQuestion, conversationId, config) {
|
|
4537
4779
|
const { documentStore, memoryStore, retrievalLimit, log } = config;
|
|
4538
4780
|
const evidence = [];
|
|
@@ -4559,7 +4801,7 @@ async function retrieve(subQuestion, conversationId, config) {
|
|
|
4559
4801
|
text: chunk.text,
|
|
4560
4802
|
relevance: 0.8,
|
|
4561
4803
|
// Default — store doesn't expose scores directly
|
|
4562
|
-
metadata: chunk.metadata
|
|
4804
|
+
metadata: recordToKVArray(chunk.metadata)
|
|
4563
4805
|
});
|
|
4564
4806
|
}
|
|
4565
4807
|
}
|
|
@@ -4574,7 +4816,7 @@ async function retrieve(subQuestion, conversationId, config) {
|
|
|
4574
4816
|
documentId: chunk.documentId,
|
|
4575
4817
|
text: chunk.text,
|
|
4576
4818
|
relevance: 0.8,
|
|
4577
|
-
metadata: chunk.metadata
|
|
4819
|
+
metadata: recordToKVArray(chunk.metadata)
|
|
4578
4820
|
});
|
|
4579
4821
|
}
|
|
4580
4822
|
}
|
|
@@ -4602,11 +4844,11 @@ async function retrieve(subQuestion, conversationId, config) {
|
|
|
4602
4844
|
text: summary,
|
|
4603
4845
|
relevance: 0.9,
|
|
4604
4846
|
// Direct lookup is high relevance
|
|
4605
|
-
metadata:
|
|
4606
|
-
type: doc.type,
|
|
4607
|
-
carrier: doc.carrier ?? "",
|
|
4608
|
-
insuredName: doc.insuredName ?? ""
|
|
4609
|
-
|
|
4847
|
+
metadata: [
|
|
4848
|
+
{ key: "type", value: doc.type },
|
|
4849
|
+
{ key: "carrier", value: doc.carrier ?? "" },
|
|
4850
|
+
{ key: "insuredName", value: doc.insuredName ?? "" }
|
|
4851
|
+
]
|
|
4610
4852
|
});
|
|
4611
4853
|
}
|
|
4612
4854
|
} catch (e) {
|
|
@@ -4841,8 +5083,12 @@ function createQueryAgent(config) {
|
|
|
4841
5083
|
async function query(input) {
|
|
4842
5084
|
totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
4843
5085
|
const { question, conversationId, context } = input;
|
|
5086
|
+
const pipelineCtx = createPipelineContext({
|
|
5087
|
+
id: `query-${Date.now()}`
|
|
5088
|
+
});
|
|
4844
5089
|
onProgress?.("Classifying query...");
|
|
4845
5090
|
const classification = await classify(question, conversationId);
|
|
5091
|
+
await pipelineCtx.save("classify", { classification });
|
|
4846
5092
|
onProgress?.(`Retrieving evidence for ${classification.subQuestions.length} sub-question(s)...`);
|
|
4847
5093
|
const retrieverConfig = {
|
|
4848
5094
|
documentStore,
|
|
@@ -4856,9 +5102,10 @@ function createQueryAgent(config) {
|
|
|
4856
5102
|
)
|
|
4857
5103
|
);
|
|
4858
5104
|
const allEvidence = retrievalResults.flatMap((r) => r.evidence);
|
|
5105
|
+
await pipelineCtx.save("retrieve", { classification, evidence: allEvidence });
|
|
4859
5106
|
onProgress?.("Reasoning over evidence...");
|
|
4860
5107
|
const reasonerConfig = { generateObject, providerOptions };
|
|
4861
|
-
|
|
5108
|
+
const reasonResults = await Promise.allSettled(
|
|
4862
5109
|
classification.subQuestions.map(
|
|
4863
5110
|
(sq, i) => limit(async () => {
|
|
4864
5111
|
const { subAnswer, usage } = await reason(
|
|
@@ -4872,10 +5119,27 @@ function createQueryAgent(config) {
|
|
|
4872
5119
|
})
|
|
4873
5120
|
)
|
|
4874
5121
|
);
|
|
5122
|
+
let subAnswers = [];
|
|
5123
|
+
for (let i = 0; i < reasonResults.length; i++) {
|
|
5124
|
+
const result = reasonResults[i];
|
|
5125
|
+
if (result.status === "fulfilled") {
|
|
5126
|
+
subAnswers.push(result.value);
|
|
5127
|
+
} else {
|
|
5128
|
+
await log?.(`Reasoner failed for sub-question "${classification.subQuestions[i].question}": ${result.reason}`);
|
|
5129
|
+
subAnswers.push({
|
|
5130
|
+
subQuestion: classification.subQuestions[i].question,
|
|
5131
|
+
answer: "Unable to answer this part of the question due to a processing error.",
|
|
5132
|
+
citations: [],
|
|
5133
|
+
confidence: 0,
|
|
5134
|
+
needsMoreContext: true
|
|
5135
|
+
});
|
|
5136
|
+
}
|
|
5137
|
+
}
|
|
5138
|
+
await pipelineCtx.save("reason", { classification, evidence: allEvidence, subAnswers });
|
|
4875
5139
|
onProgress?.("Verifying answer grounding...");
|
|
4876
5140
|
const verifierConfig = { generateObject, providerOptions };
|
|
4877
5141
|
for (let round = 0; round < maxVerifyRounds; round++) {
|
|
4878
|
-
const { result: verifyResult, usage } = await
|
|
5142
|
+
const { result: verifyResult, usage } = await safeVerify(
|
|
4879
5143
|
question,
|
|
4880
5144
|
subAnswers,
|
|
4881
5145
|
allEvidence,
|
|
@@ -4899,7 +5163,6 @@ function createQueryAgent(config) {
|
|
|
4899
5163
|
() => retrieve(sq, conversationId, {
|
|
4900
5164
|
...retrieverConfig,
|
|
4901
5165
|
retrievalLimit: retrievalLimit * 2
|
|
4902
|
-
// Broader retrieval on retry
|
|
4903
5166
|
})
|
|
4904
5167
|
)
|
|
4905
5168
|
)
|
|
@@ -4907,7 +5170,7 @@ function createQueryAgent(config) {
|
|
|
4907
5170
|
for (const r of retryRetrievals) {
|
|
4908
5171
|
allEvidence.push(...r.evidence);
|
|
4909
5172
|
}
|
|
4910
|
-
const
|
|
5173
|
+
const retrySettled = await Promise.allSettled(
|
|
4911
5174
|
retryQuestions.map(
|
|
4912
5175
|
(sq, i) => limit(async () => {
|
|
4913
5176
|
const { subAnswer, usage: u } = await reason(
|
|
@@ -4921,6 +5184,7 @@ function createQueryAgent(config) {
|
|
|
4921
5184
|
})
|
|
4922
5185
|
)
|
|
4923
5186
|
);
|
|
5187
|
+
const retrySubAnswers = retrySettled.filter((r) => r.status === "fulfilled").map((r) => r.value);
|
|
4924
5188
|
const retryQSet = new Set(retryQuestions.map((sq) => sq.question));
|
|
4925
5189
|
subAnswers = subAnswers.map((sa) => {
|
|
4926
5190
|
if (retryQSet.has(sa.subQuestion)) {
|
|
@@ -4973,17 +5237,42 @@ function createQueryAgent(config) {
|
|
|
4973
5237
|
}
|
|
4974
5238
|
}
|
|
4975
5239
|
const prompt = buildQueryClassifyPrompt(question, conversationContext);
|
|
4976
|
-
const { object, usage } = await
|
|
4977
|
-
|
|
5240
|
+
const { object, usage } = await safeGenerateObject(
|
|
5241
|
+
generateObject,
|
|
5242
|
+
{
|
|
4978
5243
|
prompt,
|
|
4979
5244
|
schema: QueryClassifyResultSchema,
|
|
4980
5245
|
maxTokens: 2048,
|
|
4981
5246
|
providerOptions
|
|
4982
|
-
}
|
|
5247
|
+
},
|
|
5248
|
+
{
|
|
5249
|
+
fallback: {
|
|
5250
|
+
intent: "general_knowledge",
|
|
5251
|
+
subQuestions: [
|
|
5252
|
+
{
|
|
5253
|
+
question,
|
|
5254
|
+
intent: "general_knowledge"
|
|
5255
|
+
}
|
|
5256
|
+
],
|
|
5257
|
+
requiresDocumentLookup: true,
|
|
5258
|
+
requiresChunkSearch: true,
|
|
5259
|
+
requiresConversationHistory: !!conversationId
|
|
5260
|
+
},
|
|
5261
|
+
log,
|
|
5262
|
+
onError: (err, attempt) => log?.(`Query classify attempt ${attempt + 1} failed: ${err}`)
|
|
5263
|
+
}
|
|
4983
5264
|
);
|
|
4984
5265
|
trackUsage(usage);
|
|
4985
5266
|
return object;
|
|
4986
5267
|
}
|
|
5268
|
+
async function safeVerify(originalQuestion, subAnswers, allEvidence, verifierConfig) {
|
|
5269
|
+
try {
|
|
5270
|
+
return await verify(originalQuestion, subAnswers, allEvidence, verifierConfig);
|
|
5271
|
+
} catch (error) {
|
|
5272
|
+
await log?.(`Verification failed, approving by default: ${error instanceof Error ? error.message : String(error)}`);
|
|
5273
|
+
return { result: { approved: true, issues: [] } };
|
|
5274
|
+
}
|
|
5275
|
+
}
|
|
4987
5276
|
async function respond(originalQuestion, subAnswers, classification, platform) {
|
|
4988
5277
|
const subAnswersJson = JSON.stringify(
|
|
4989
5278
|
subAnswers.map((sa) => ({
|
|
@@ -4997,13 +5286,25 @@ function createQueryAgent(config) {
|
|
|
4997
5286
|
2
|
|
4998
5287
|
);
|
|
4999
5288
|
const prompt = buildRespondPrompt(originalQuestion, subAnswersJson, platform);
|
|
5000
|
-
const { object, usage } = await
|
|
5001
|
-
|
|
5289
|
+
const { object, usage } = await safeGenerateObject(
|
|
5290
|
+
generateObject,
|
|
5291
|
+
{
|
|
5002
5292
|
prompt,
|
|
5003
5293
|
schema: QueryResultSchema,
|
|
5004
5294
|
maxTokens: 4096,
|
|
5005
5295
|
providerOptions
|
|
5006
|
-
}
|
|
5296
|
+
},
|
|
5297
|
+
{
|
|
5298
|
+
fallback: {
|
|
5299
|
+
answer: subAnswers.map((sa) => `**${sa.subQuestion}**
|
|
5300
|
+
${sa.answer}`).join("\n\n"),
|
|
5301
|
+
citations: subAnswers.flatMap((sa) => sa.citations),
|
|
5302
|
+
intent: classification.intent,
|
|
5303
|
+
confidence: Math.min(...subAnswers.map((sa) => sa.confidence), 1)
|
|
5304
|
+
},
|
|
5305
|
+
log,
|
|
5306
|
+
onError: (err, attempt) => log?.(`Respond attempt ${attempt + 1} failed: ${err}`)
|
|
5307
|
+
}
|
|
5007
5308
|
);
|
|
5008
5309
|
trackUsage(usage);
|
|
5009
5310
|
const result = object;
|
|
@@ -5168,6 +5469,7 @@ export {
|
|
|
5168
5469
|
CommercialAutoDeclarationsSchema,
|
|
5169
5470
|
CommercialPropertyDeclarationsSchema,
|
|
5170
5471
|
CommunicationIntentSchema,
|
|
5472
|
+
ConditionKeyValueSchema,
|
|
5171
5473
|
ConditionTypeSchema,
|
|
5172
5474
|
ConstructionTypeSchema,
|
|
5173
5475
|
ContactSchema,
|
|
@@ -5334,6 +5636,7 @@ export {
|
|
|
5334
5636
|
chunkDocument,
|
|
5335
5637
|
createApplicationPipeline,
|
|
5336
5638
|
createExtractor,
|
|
5639
|
+
createPipelineContext,
|
|
5337
5640
|
createQueryAgent,
|
|
5338
5641
|
extractPageRange,
|
|
5339
5642
|
fillAcroForm,
|
|
@@ -5343,6 +5646,7 @@ export {
|
|
|
5343
5646
|
getTemplate,
|
|
5344
5647
|
overlayTextOnPdf,
|
|
5345
5648
|
pLimit,
|
|
5649
|
+
safeGenerateObject,
|
|
5346
5650
|
sanitizeNulls,
|
|
5347
5651
|
stripFences,
|
|
5348
5652
|
withRetry
|