@claritylabs/cl-sdk 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -71,6 +71,69 @@ function sanitizeNulls(obj) {
71
71
  return obj;
72
72
  }
73
73
 
74
+ // src/core/safe-generate.ts
75
+ async function safeGenerateObject(generateObject, params, options) {
76
+ const maxRetries = options?.maxRetries ?? 1;
77
+ let lastError;
78
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
79
+ try {
80
+ const result = await withRetry(
81
+ () => generateObject(params),
82
+ options?.log
83
+ );
84
+ return result;
85
+ } catch (error) {
86
+ lastError = error;
87
+ options?.onError?.(error, attempt);
88
+ await options?.log?.(
89
+ `safeGenerateObject attempt ${attempt + 1}/${maxRetries + 1} failed: ${error instanceof Error ? error.message : String(error)}`
90
+ );
91
+ if (attempt < maxRetries) {
92
+ await new Promise((resolve) => setTimeout(resolve, 1e3));
93
+ }
94
+ }
95
+ }
96
+ if (options?.fallback !== void 0) {
97
+ await options?.log?.(
98
+ `safeGenerateObject: all retries exhausted, returning fallback`
99
+ );
100
+ return { object: options.fallback };
101
+ }
102
+ throw lastError;
103
+ }
104
+
105
+ // src/core/pipeline.ts
106
+ function createPipelineContext(opts) {
107
+ let latest = opts.resumeFrom;
108
+ const completedPhases = /* @__PURE__ */ new Set();
109
+ if (opts.resumeFrom) {
110
+ completedPhases.add(opts.resumeFrom.phase);
111
+ }
112
+ return {
113
+ id: opts.id,
114
+ async save(phase, state) {
115
+ const checkpoint = {
116
+ phase,
117
+ state,
118
+ timestamp: Date.now()
119
+ };
120
+ latest = checkpoint;
121
+ completedPhases.add(phase);
122
+ await opts.onSave?.(checkpoint);
123
+ },
124
+ getCheckpoint() {
125
+ return latest;
126
+ },
127
+ isPhaseComplete(phase) {
128
+ return completedPhases.has(phase);
129
+ },
130
+ clear() {
131
+ latest = void 0;
132
+ completedPhases.clear();
133
+ }
134
+ };
135
+ }
136
+
74
137
  // src/schemas/enums.ts
75
138
  import { z } from "zod";
76
139
  var PolicyTypeSchema = z.enum([
@@ -471,11 +534,15 @@ var ExclusionSchema = z5.object({
471
534
 
472
535
  // src/schemas/condition.ts
473
536
  import { z as z6 } from "zod";
537
+ var ConditionKeyValueSchema = z6.object({
538
+ key: z6.string(),
539
+ value: z6.string()
540
+ });
474
541
  var PolicyConditionSchema = z6.object({
475
542
  name: z6.string(),
476
543
  conditionType: ConditionTypeSchema,
477
544
  content: z6.string(),
478
- keyValues: z6.record(z6.string(), z6.string()).optional(),
545
+ keyValues: z6.array(ConditionKeyValueSchema).optional(),
479
546
  pageNumber: z6.number().optional()
480
547
  });
481
548
 
@@ -1471,6 +1538,218 @@ function assembleDocument(documentId, documentType, memory) {
1471
1538
  };
1472
1539
  }
1473
1540
 
1541
+ // src/prompts/coordinator/format.ts
1542
+ function buildFormatPrompt(entries) {
1543
+ const block = entries.map((e) => `===ENTRY ${e.id}===
1544
+ ${e.text}`).join("\n\n");
1545
+ return `You are a markdown formatting specialist for insurance document content. You will receive numbered content entries extracted from insurance policies, quotes, and endorsements. Your job is to clean up the formatting so every entry renders correctly as standard markdown.
1546
+
1547
+ ## Primary issues to fix
1548
+
1549
+ ### 1. Pipe-delimited data missing table syntax
1550
+ The most common issue. Content uses pipe characters as column separators but is missing the separator row required for markdown table rendering.
1551
+
1552
+ Before (broken \u2014 won't render as a table):
1553
+ COVERAGE | FORM # | LIMIT | DEDUCTIBLE
1554
+ Employee Theft | | $10,000 | $1,000
1555
+
1556
+ After (valid markdown table):
1557
+ | COVERAGE | FORM # | LIMIT | DEDUCTIBLE |
1558
+ | --- | --- | --- | --- |
1559
+ | Employee Theft | | $10,000 | $1,000 |
1560
+
1561
+ Rules for pipe tables:
1562
+ - Add leading and trailing pipes to every row
1563
+ - Add the separator row (| --- | --- |) after the header row
1564
+ - Every row must have the same number of pipe-separated columns as the header
1565
+ - Empty cells are fine \u2014 just keep the pipes: | | $10,000 |
1566
+
1567
+ ### 2. Sub-items indented within pipe tables
1568
+ Insurance schedules often have indented sub-items that belong to the previous coverage line. These break table column counts.
1569
+
1570
+ Before (broken):
1571
+ COVERAGE | LIMIT | DEDUCTIBLE
1572
+ Causes Of Loss - Equipment Breakdown | PR650END
1573
+ Described Premises Limit | | $350,804 |
1574
+ Diagnostic Equipment | | $100,000 |
1575
+ Deductible Type - Business Income: Waiting Period - Hours
1576
+ Waiting Period (Hours): 24
1577
+
1578
+ After: Pull sub-items out of the table. End the table before the sub-items, show them as an indented list, then start a new table if tabular data resumes:
1579
+ | COVERAGE | LIMIT | DEDUCTIBLE |
1580
+ | --- | --- | --- |
1581
+ | Causes Of Loss - Equipment Breakdown | PR650END | |
1582
+
1583
+ - Described Premises Limit: $350,804
1584
+ - Diagnostic Equipment: $100,000
1585
+ - Deductible Type - Business Income: Waiting Period - Hours
1586
+ - Waiting Period (Hours): 24
1587
+
1588
+ ### 3. Space-aligned tables
1589
+ Declarations often align columns with spaces instead of pipes. These render as plain monospace text and lose structure.
1590
+
1591
+ Before:
1592
+ Coverage Limit of Liability Retention
1593
+ A. Network Security Liability $500,000 $10,000
1594
+ B. Privacy Liability $500,000 $10,000
1595
+
1596
+ After (convert to proper markdown table):
1597
+ | Coverage | Limit of Liability | Retention |
1598
+ | --- | --- | --- |
1599
+ | A. Network Security Liability | $500,000 | $10,000 |
1600
+ | B. Privacy Liability | $500,000 | $10,000 |
1601
+
1602
+ ### 4. Mixed table/prose content
1603
+ A single entry often contains prose paragraphs followed by tabular data followed by more prose. Handle each segment independently \u2014 don't try to force everything into one table.
1604
+
1605
+ ### 5. General markdown cleanup
1606
+ - **Line spacing**: Remove excessive blank lines (3+ consecutive newlines \u2192 2). Ensure one blank line before and after tables and headings.
1607
+ - **Trailing whitespace**: Remove trailing spaces on all lines.
1608
+ - **Broken lists**: Ensure list items use consistent markers (-, *, or 1.) with proper nesting indentation.
1609
+ - **Orphaned formatting**: Close any unclosed bold (**), italic (*), or code (\`) markers.
1610
+ - **Heading levels**: Ensure heading markers (##) have a space after the hashes.
1611
+
1612
+ ## Rules
1613
+ - Do NOT change the meaning or substance of any content. Only fix formatting.
1614
+ - Do NOT add new information, headers, or commentary.
1615
+ - Do NOT wrap entries in code fences.
1616
+ - Preserve all dollar amounts, dates, policy numbers, form numbers, and technical terms exactly as they appear.
1617
+ - If an entry is already well-formatted, return it unchanged.
1618
+ - When in doubt about whether something is a table, prefer table formatting for structured data with multiple columns.
1619
+
1620
+ Return your output in this exact format \u2014 one block per entry, in the same order:
1621
+
1622
+ ===ENTRY 0===
1623
+ (cleaned content for entry 0)
1624
+
1625
+ ===ENTRY 1===
1626
+ (cleaned content for entry 1)
1627
+
1628
+ ...and so on for each entry.
1629
+
1630
+ Here are the entries to format:
1631
+
1632
+ ${block}`;
1633
+ }
1634
+
1635
+ // src/extraction/formatter.ts
1636
+ function collectContentFields(doc) {
1637
+ const entries = [];
1638
+ let id = 0;
1639
+ function add(path, text) {
1640
+ if (text && text.length > 20) {
1641
+ entries.push({ id: id++, path, text });
1642
+ }
1643
+ }
1644
+ add("summary", doc.summary);
1645
+ if (doc.sections) {
1646
+ for (let i = 0; i < doc.sections.length; i++) {
1647
+ const s = doc.sections[i];
1648
+ add(`sections[${i}].content`, s.content);
1649
+ if (s.subsections) {
1650
+ for (let j = 0; j < s.subsections.length; j++) {
1651
+ add(`sections[${i}].subsections[${j}].content`, s.subsections[j].content);
1652
+ }
1653
+ }
1654
+ }
1655
+ }
1656
+ if (doc.endorsements) {
1657
+ for (let i = 0; i < doc.endorsements.length; i++) {
1658
+ add(`endorsements[${i}].content`, doc.endorsements[i].content);
1659
+ }
1660
+ }
1661
+ if (doc.exclusions) {
1662
+ for (let i = 0; i < doc.exclusions.length; i++) {
1663
+ add(`exclusions[${i}].content`, doc.exclusions[i].content);
1664
+ }
1665
+ }
1666
+ if (doc.conditions) {
1667
+ for (let i = 0; i < doc.conditions.length; i++) {
1668
+ add(`conditions[${i}].content`, doc.conditions[i].content);
1669
+ }
1670
+ }
1671
+ return entries;
1672
+ }
1673
+ function parseFormatResponse(response) {
1674
+ const results = /* @__PURE__ */ new Map();
1675
+ const parts = response.split(/===ENTRY (\d+)===/);
1676
+ for (let i = 1; i < parts.length; i += 2) {
1677
+ const entryId = parseInt(parts[i], 10);
1678
+ const content = parts[i + 1]?.trim();
1679
+ if (!isNaN(entryId) && content !== void 0) {
1680
+ results.set(entryId, content);
1681
+ }
1682
+ }
1683
+ return results;
1684
+ }
1685
+ function applyFormattedContent(doc, entries, formatted) {
1686
+ for (const entry of entries) {
1687
+ const cleaned = formatted.get(entry.id);
1688
+ if (!cleaned) continue;
1689
+ const segments = entry.path.match(/^(\w+)(?:\[(\d+)\])?(?:\.(\w+)(?:\[(\d+)\])?(?:\.(\w+))?)?$/);
1690
+ if (!segments) continue;
1691
+ const [, field, idx1, sub1, idx2, sub2] = segments;
1692
+ if (!sub1) {
1693
+ doc[field] = cleaned;
1694
+ } else if (!sub2) {
1695
+ const arr = doc[field];
1696
+ if (arr && arr[Number(idx1)]) {
1697
+ arr[Number(idx1)][sub1] = cleaned;
1698
+ }
1699
+ } else {
1700
+ const arr = doc[field];
1701
+ if (arr && arr[Number(idx1)]) {
1702
+ const nested = arr[Number(idx1)][sub1];
1703
+ if (nested && nested[Number(idx2)]) {
1704
+ nested[Number(idx2)][sub2] = cleaned;
1705
+ }
1706
+ }
1707
+ }
1708
+ }
1709
+ }
1710
+ var MAX_ENTRIES_PER_BATCH = 20;
1711
+ async function formatDocumentContent(doc, generateText, options) {
1712
+ const entries = collectContentFields(doc);
1713
+ const totalUsage = { inputTokens: 0, outputTokens: 0 };
1714
+ if (entries.length === 0) {
1715
+ return { document: doc, usage: totalUsage };
1716
+ }
1717
+ options?.onProgress?.(`Formatting ${entries.length} content fields...`);
1718
+ const batches = [];
1719
+ for (let i = 0; i < entries.length; i += MAX_ENTRIES_PER_BATCH) {
1720
+ batches.push(entries.slice(i, i + MAX_ENTRIES_PER_BATCH));
1721
+ }
1722
+ for (let batchIdx = 0; batchIdx < batches.length; batchIdx++) {
1723
+ const batch = batches[batchIdx];
1724
+ try {
1725
+ const prompt = buildFormatPrompt(batch.map((e) => ({ id: e.id, text: e.text })));
1726
+ const result = await withRetry(
1727
+ () => generateText({
1728
+ prompt,
1729
+ maxTokens: 16384,
1730
+ providerOptions: options?.providerOptions
1731
+ })
1732
+ );
1733
+ if (result.usage) {
1734
+ totalUsage.inputTokens += result.usage.inputTokens;
1735
+ totalUsage.outputTokens += result.usage.outputTokens;
1736
+ }
1737
+ const formatted = parseFormatResponse(result.text);
1738
+ if (formatted.size < batch.length) {
1739
+ await options?.log?.(
1740
+ `Format batch ${batchIdx + 1}/${batches.length}: model returned ${formatted.size}/${batch.length} entries \u2014 unformatted entries will keep original content`
1741
+ );
1742
+ }
1743
+ applyFormattedContent(doc, batch, formatted);
1744
+ } catch (error) {
1745
+ await options?.log?.(
1746
+ `Format batch ${batchIdx + 1}/${batches.length} failed, keeping original content: ${error instanceof Error ? error.message : String(error)}`
1747
+ );
1748
+ }
1749
+ }
1750
+ return { document: doc, usage: totalUsage };
1751
+ }
1752
+
1474
1753
  // src/extraction/chunking.ts
1475
1754
  function chunkDocument(doc) {
1476
1755
  const chunks = [];
@@ -2307,9 +2586,13 @@ var ExtractionTaskSchema = z18.object({
2307
2586
  endPage: z18.number(),
2308
2587
  description: z18.string()
2309
2588
  });
2589
+ var PageMapEntrySchema = z18.object({
2590
+ section: z18.string(),
2591
+ pages: z18.string()
2592
+ });
2310
2593
  var ExtractionPlanSchema = z18.object({
2311
2594
  tasks: z18.array(ExtractionTaskSchema),
2312
- pageMap: z18.record(z18.string(), z18.string()).optional()
2595
+ pageMap: z18.array(PageMapEntrySchema).optional()
2313
2596
  });
2314
2597
  function buildPlanPrompt(templateHints) {
2315
2598
  return `You are planning the extraction of an insurance document. You have already classified this document. Now scan the full document and create a page map + extraction plan.
@@ -2338,7 +2621,10 @@ Return JSON:
2338
2621
  { "extractorName": "carrier_info", "startPage": 1, "endPage": 2, "description": "Extract carrier details from declarations page" },
2339
2622
  ...
2340
2623
  ],
2341
- "pageMap": { "declarations": "pages 1-3", "endorsements": "pages 15-22", ... }
2624
+ "pageMap": [
2625
+ { "section": "declarations", "pages": "pages 1-3" },
2626
+ { "section": "endorsements", "pages": "pages 15-22" }
2627
+ ]
2342
2628
  }
2343
2629
 
2344
2630
  Create tasks that cover the entire document. Prefer specific extractors over generic "sections" where possible. Keep page ranges tight \u2014 only include pages relevant to each extractor.
@@ -2831,7 +3117,8 @@ function createExtractor(config) {
2831
3117
  onTokenUsage,
2832
3118
  onProgress,
2833
3119
  log,
2834
- providerOptions
3120
+ providerOptions,
3121
+ onCheckpointSave
2835
3122
  } = config;
2836
3123
  const limit = pLimit(concurrency);
2837
3124
  let totalUsage = { inputTokens: 0, outputTokens: 0 };
@@ -2842,100 +3129,106 @@ function createExtractor(config) {
2842
3129
  onTokenUsage?.(usage);
2843
3130
  }
2844
3131
  }
2845
- async function extract(pdfBase64, documentId) {
3132
+ async function extract(pdfBase64, documentId, options) {
2846
3133
  const id = documentId ?? `doc-${Date.now()}`;
2847
3134
  const memory = /* @__PURE__ */ new Map();
2848
3135
  totalUsage = { inputTokens: 0, outputTokens: 0 };
2849
- onProgress?.("Classifying document...");
2850
- const pageCount = await getPdfPageCount(pdfBase64);
2851
- const classifyResult = await withRetry(
2852
- () => generateObject({
2853
- prompt: buildClassifyPrompt(),
2854
- schema: ClassifyResultSchema,
2855
- maxTokens: 512,
2856
- providerOptions
2857
- })
2858
- );
2859
- trackUsage(classifyResult.usage);
2860
- memory.set("classify", classifyResult.object);
2861
- const { documentType, policyTypes } = classifyResult.object;
2862
- const primaryType = policyTypes[0] ?? "other";
2863
- const template = getTemplate(primaryType);
2864
- onProgress?.(`Planning extraction for ${primaryType} ${documentType}...`);
2865
- const templateHints = [
2866
- `Document type: ${primaryType} ${documentType}`,
2867
- `Expected sections: ${template.expectedSections.join(", ")}`,
2868
- `Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
2869
- `Total pages: ${pageCount}`
2870
- ].join("\n");
2871
- const planResult = await withRetry(
2872
- () => generateObject({
2873
- prompt: buildPlanPrompt(templateHints),
2874
- schema: ExtractionPlanSchema,
2875
- maxTokens: 2048,
2876
- providerOptions
2877
- })
2878
- );
2879
- trackUsage(planResult.usage);
2880
- const tasks = planResult.object.tasks;
2881
- onProgress?.(`Dispatching ${tasks.length} extractors...`);
2882
- const extractorResults = await Promise.all(
2883
- tasks.map(
2884
- (task) => limit(async () => {
2885
- const ext = getExtractor(task.extractorName);
2886
- if (!ext) {
2887
- await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
2888
- return null;
2889
- }
2890
- onProgress?.(`Extracting ${task.extractorName} (pages ${task.startPage}-${task.endPage})...`);
2891
- try {
2892
- const result = await runExtractor({
2893
- name: task.extractorName,
2894
- prompt: ext.buildPrompt(),
2895
- schema: ext.schema,
2896
- pdfBase64,
2897
- startPage: task.startPage,
2898
- endPage: task.endPage,
2899
- generateObject,
2900
- convertPdfToImages,
2901
- maxTokens: ext.maxTokens ?? 4096,
2902
- providerOptions
2903
- });
2904
- trackUsage(result.usage);
2905
- return result;
2906
- } catch (error) {
2907
- await log?.(`Extractor ${task.extractorName} failed: ${error}`);
2908
- return null;
2909
- }
2910
- })
2911
- )
2912
- );
2913
- for (const result of extractorResults) {
2914
- if (result) {
2915
- memory.set(result.name, result.data);
3136
+ const pipelineCtx = createPipelineContext({
3137
+ id,
3138
+ onSave: onCheckpointSave,
3139
+ resumeFrom: options?.resumeFrom
3140
+ });
3141
+ const resumed = pipelineCtx.getCheckpoint()?.state;
3142
+ if (resumed?.memory) {
3143
+ for (const [k, v] of Object.entries(resumed.memory)) {
3144
+ memory.set(k, v);
2916
3145
  }
2917
3146
  }
2918
- for (let round = 0; round < maxReviewRounds; round++) {
2919
- const extractedKeys = [...memory.keys()].filter((k) => k !== "classify");
2920
- const reviewResult = await withRetry(
2921
- () => generateObject({
2922
- prompt: buildReviewPrompt(template.required, extractedKeys),
2923
- schema: ReviewResultSchema,
2924
- maxTokens: 1024,
3147
+ let classifyResult;
3148
+ if (resumed?.classifyResult && pipelineCtx.isPhaseComplete("classify")) {
3149
+ classifyResult = resumed.classifyResult;
3150
+ onProgress?.("Resuming from checkpoint (classify complete)...");
3151
+ } else {
3152
+ onProgress?.("Classifying document...");
3153
+ const pageCount2 = await getPdfPageCount(pdfBase64);
3154
+ const classifyResponse = await safeGenerateObject(
3155
+ generateObject,
3156
+ {
3157
+ prompt: buildClassifyPrompt(),
3158
+ schema: ClassifyResultSchema,
3159
+ maxTokens: 512,
2925
3160
  providerOptions
2926
- })
3161
+ },
3162
+ {
3163
+ fallback: { documentType: "policy", policyTypes: ["other"], confidence: 0 },
3164
+ log,
3165
+ onError: (err, attempt) => log?.(`Classify attempt ${attempt + 1} failed: ${err}`)
3166
+ }
2927
3167
  );
2928
- trackUsage(reviewResult.usage);
2929
- if (reviewResult.object.complete || reviewResult.object.additionalTasks.length === 0) {
2930
- onProgress?.("Extraction complete.");
2931
- break;
2932
- }
2933
- onProgress?.(`Review round ${round + 1}: dispatching ${reviewResult.object.additionalTasks.length} follow-up extractors...`);
2934
- const followUpResults = await Promise.all(
2935
- reviewResult.object.additionalTasks.map(
3168
+ trackUsage(classifyResponse.usage);
3169
+ classifyResult = classifyResponse.object;
3170
+ memory.set("classify", classifyResult);
3171
+ await pipelineCtx.save("classify", {
3172
+ id,
3173
+ pageCount: pageCount2,
3174
+ classifyResult,
3175
+ memory: Object.fromEntries(memory)
3176
+ });
3177
+ }
3178
+ const { documentType, policyTypes } = classifyResult;
3179
+ const primaryType = policyTypes[0] ?? "other";
3180
+ const template = getTemplate(primaryType);
3181
+ const pageCount = resumed?.pageCount ?? await getPdfPageCount(pdfBase64);
3182
+ let plan;
3183
+ if (resumed?.plan && pipelineCtx.isPhaseComplete("plan")) {
3184
+ plan = resumed.plan;
3185
+ onProgress?.("Resuming from checkpoint (plan complete)...");
3186
+ } else {
3187
+ onProgress?.(`Planning extraction for ${primaryType} ${documentType}...`);
3188
+ const templateHints = [
3189
+ `Document type: ${primaryType} ${documentType}`,
3190
+ `Expected sections: ${template.expectedSections.join(", ")}`,
3191
+ `Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
3192
+ `Total pages: ${pageCount}`
3193
+ ].join("\n");
3194
+ const planResponse = await safeGenerateObject(
3195
+ generateObject,
3196
+ {
3197
+ prompt: buildPlanPrompt(templateHints),
3198
+ schema: ExtractionPlanSchema,
3199
+ maxTokens: 2048,
3200
+ providerOptions
3201
+ },
3202
+ {
3203
+ fallback: {
3204
+ tasks: [{ extractorName: "sections", startPage: 1, endPage: pageCount, description: "Full document fallback extraction" }]
3205
+ },
3206
+ log,
3207
+ onError: (err, attempt) => log?.(`Plan attempt ${attempt + 1} failed: ${err}`)
3208
+ }
3209
+ );
3210
+ trackUsage(planResponse.usage);
3211
+ plan = planResponse.object;
3212
+ await pipelineCtx.save("plan", {
3213
+ id,
3214
+ pageCount,
3215
+ classifyResult,
3216
+ plan,
3217
+ memory: Object.fromEntries(memory)
3218
+ });
3219
+ }
3220
+ if (!pipelineCtx.isPhaseComplete("extract")) {
3221
+ const tasks = plan.tasks;
3222
+ onProgress?.(`Dispatching ${tasks.length} extractors...`);
3223
+ const extractorResults = await Promise.all(
3224
+ tasks.map(
2936
3225
  (task) => limit(async () => {
2937
3226
  const ext = getExtractor(task.extractorName);
2938
- if (!ext) return null;
3227
+ if (!ext) {
3228
+ await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
3229
+ return null;
3230
+ }
3231
+ onProgress?.(`Extracting ${task.extractorName} (pages ${task.startPage}-${task.endPage})...`);
2939
3232
  try {
2940
3233
  const result = await runExtractor({
2941
3234
  name: task.extractorName,
@@ -2952,22 +3245,114 @@ function createExtractor(config) {
2952
3245
  trackUsage(result.usage);
2953
3246
  return result;
2954
3247
  } catch (error) {
2955
- await log?.(`Follow-up extractor ${task.extractorName} failed: ${error}`);
3248
+ await log?.(`Extractor ${task.extractorName} failed: ${error}`);
2956
3249
  return null;
2957
3250
  }
2958
3251
  })
2959
3252
  )
2960
3253
  );
2961
- for (const result of followUpResults) {
3254
+ for (const result of extractorResults) {
2962
3255
  if (result) {
2963
3256
  memory.set(result.name, result.data);
2964
3257
  }
2965
3258
  }
3259
+ await pipelineCtx.save("extract", {
3260
+ id,
3261
+ pageCount,
3262
+ classifyResult,
3263
+ plan,
3264
+ memory: Object.fromEntries(memory)
3265
+ });
3266
+ }
3267
+ if (!pipelineCtx.isPhaseComplete("review")) {
3268
+ for (let round = 0; round < maxReviewRounds; round++) {
3269
+ const extractedKeys = [...memory.keys()].filter((k) => k !== "classify");
3270
+ const reviewResponse = await safeGenerateObject(
3271
+ generateObject,
3272
+ {
3273
+ prompt: buildReviewPrompt(template.required, extractedKeys),
3274
+ schema: ReviewResultSchema,
3275
+ maxTokens: 1024,
3276
+ providerOptions
3277
+ },
3278
+ {
3279
+ fallback: { complete: true, missingFields: [], additionalTasks: [] },
3280
+ log,
3281
+ onError: (err, attempt) => log?.(`Review round ${round + 1} attempt ${attempt + 1} failed: ${err}`)
3282
+ }
3283
+ );
3284
+ trackUsage(reviewResponse.usage);
3285
+ if (reviewResponse.object.complete || reviewResponse.object.additionalTasks.length === 0) {
3286
+ onProgress?.("Extraction complete.");
3287
+ break;
3288
+ }
3289
+ onProgress?.(`Review round ${round + 1}: dispatching ${reviewResponse.object.additionalTasks.length} follow-up extractors...`);
3290
+ const followUpResults = await Promise.all(
3291
+ reviewResponse.object.additionalTasks.map(
3292
+ (task) => limit(async () => {
3293
+ const ext = getExtractor(task.extractorName);
3294
+ if (!ext) return null;
3295
+ try {
3296
+ const result = await runExtractor({
3297
+ name: task.extractorName,
3298
+ prompt: ext.buildPrompt(),
3299
+ schema: ext.schema,
3300
+ pdfBase64,
3301
+ startPage: task.startPage,
3302
+ endPage: task.endPage,
3303
+ generateObject,
3304
+ convertPdfToImages,
3305
+ maxTokens: ext.maxTokens ?? 4096,
3306
+ providerOptions
3307
+ });
3308
+ trackUsage(result.usage);
3309
+ return result;
3310
+ } catch (error) {
3311
+ await log?.(`Follow-up extractor ${task.extractorName} failed: ${error}`);
3312
+ return null;
3313
+ }
3314
+ })
3315
+ )
3316
+ );
3317
+ for (const result of followUpResults) {
3318
+ if (result) {
3319
+ memory.set(result.name, result.data);
3320
+ }
3321
+ }
3322
+ }
3323
+ await pipelineCtx.save("review", {
3324
+ id,
3325
+ pageCount,
3326
+ classifyResult,
3327
+ plan,
3328
+ memory: Object.fromEntries(memory)
3329
+ });
2966
3330
  }
2967
3331
  onProgress?.("Assembling document...");
2968
3332
  const document = assembleDocument(id, documentType, memory);
2969
- const chunks = chunkDocument(document);
2970
- return { document, chunks, tokenUsage: totalUsage };
3333
+ await pipelineCtx.save("assemble", {
3334
+ id,
3335
+ pageCount,
3336
+ classifyResult,
3337
+ plan,
3338
+ memory: Object.fromEntries(memory),
3339
+ document
3340
+ });
3341
+ onProgress?.("Formatting extracted content...");
3342
+ const formatResult = await formatDocumentContent(document, generateText, {
3343
+ providerOptions,
3344
+ onProgress,
3345
+ log
3346
+ });
3347
+ trackUsage(formatResult.usage);
3348
+ const chunks = chunkDocument(formatResult.document);
3349
+ const finalCheckpoint = pipelineCtx.getCheckpoint();
3350
+ return {
3351
+ document: formatResult.document,
3352
+ chunks,
3353
+ tokenUsage: totalUsage,
3354
+ checkpoint: finalCheckpoint
3355
+ };
2971
3356
  }
2972
3357
  return { extract };
2973
3358
  }
@@ -3830,7 +4215,6 @@ function createApplicationPipeline(config) {
3830
4215
  let state = {
3831
4216
  id,
3832
4217
  pdfBase64: void 0,
3833
- // Don't persist the full PDF in state
3834
4218
  title: void 0,
3835
4219
  applicationType: null,
3836
4220
  fields: [],
@@ -3841,13 +4225,20 @@ function createApplicationPipeline(config) {
3841
4225
  updatedAt: now
3842
4226
  };
3843
4227
  onProgress?.("Classifying document...");
3844
- const { result: classifyResult, usage: classifyUsage } = await classifyApplication(
3845
- pdfBase64.slice(0, 2e3),
3846
- // Send truncated content for classification
3847
- generateObject,
3848
- providerOptions
3849
- );
3850
- trackUsage(classifyUsage);
4228
+ await applicationStore?.save(state);
4229
+ let classifyResult;
4230
+ try {
4231
+ const { result, usage: classifyUsage } = await classifyApplication(
4232
+ pdfBase64.slice(0, 2e3),
4233
+ generateObject,
4234
+ providerOptions
4235
+ );
4236
+ trackUsage(classifyUsage);
4237
+ classifyResult = result;
4238
+ } catch (error) {
4239
+ await log?.(`Classification failed, treating as non-application: ${error instanceof Error ? error.message : String(error)}`);
4240
+ classifyResult = { isApplication: false, confidence: 0, applicationType: null };
4241
+ }
3851
4242
  if (!classifyResult.isApplication) {
3852
4243
  state.status = "complete";
3853
4244
  state.updatedAt = Date.now();
@@ -3857,13 +4248,28 @@ function createApplicationPipeline(config) {
3857
4248
  state.applicationType = classifyResult.applicationType;
3858
4249
  state.status = "extracting";
3859
4250
  state.updatedAt = Date.now();
4251
+ await applicationStore?.save(state);
3860
4252
  onProgress?.("Extracting form fields...");
3861
- const { fields, usage: extractUsage } = await extractFields(
3862
- pdfBase64,
3863
- generateObject,
3864
- providerOptions
3865
- );
3866
- trackUsage(extractUsage);
4253
+ let fields;
4254
+ try {
4255
+ const { fields: extractedFields, usage: extractUsage } = await extractFields(
4256
+ pdfBase64,
4257
+ generateObject,
4258
+ providerOptions
4259
+ );
4260
+ trackUsage(extractUsage);
4261
+ fields = extractedFields;
4262
+ } catch (error) {
4263
+ await log?.(`Field extraction failed: ${error instanceof Error ? error.message : String(error)}`);
4264
+ fields = [];
4265
+ }
4266
+ if (fields.length === 0) {
4267
+ await log?.("No fields extracted, completing pipeline with empty result");
4268
+ state.status = "complete";
4269
+ state.updatedAt = Date.now();
4270
+ await applicationStore?.save(state);
4271
+ return { state, tokenUsage: totalUsage };
4272
+ }
3867
4273
  state.fields = fields;
3868
4274
  state.title = classifyResult.applicationType ?? void 0;
3869
4275
  state.status = "auto_filling";
@@ -3895,20 +4301,24 @@ function createApplicationPipeline(config) {
3895
4301
  limit(async () => {
3896
4302
  const unfilledFields2 = state.fields.filter((f) => !f.value);
3897
4303
  if (unfilledFields2.length === 0) return;
3898
- const { result: autoFillResult, usage: afUsage } = await autoFillFromContext(
3899
- unfilledFields2,
3900
- orgContext,
3901
- generateObject,
3902
- providerOptions
3903
- );
3904
- trackUsage(afUsage);
3905
- for (const match of autoFillResult.matches) {
3906
- const field = state.fields.find((f) => f.id === match.fieldId);
3907
- if (field && !field.value) {
3908
- field.value = match.value;
3909
- field.source = `auto-fill: ${match.contextKey}`;
3910
- field.confidence = match.confidence;
4304
+ try {
4305
+ const { result: autoFillResult, usage: afUsage } = await autoFillFromContext(
4306
+ unfilledFields2,
4307
+ orgContext,
4308
+ generateObject,
4309
+ providerOptions
4310
+ );
4311
+ trackUsage(afUsage);
4312
+ for (const match of autoFillResult.matches) {
4313
+ const field = state.fields.find((f) => f.id === match.fieldId);
4314
+ if (field && !field.value) {
4315
+ field.value = match.value;
4316
+ field.source = `auto-fill: ${match.contextKey}`;
4317
+ field.confidence = match.confidence;
4318
+ }
3911
4319
  }
4320
+ } catch (e) {
4321
+ await log?.(`Auto-fill from context failed: ${e instanceof Error ? e.message : String(e)}`);
3912
4322
  }
3913
4323
  })
3914
4324
  );
@@ -3941,13 +4351,18 @@ function createApplicationPipeline(config) {
3941
4351
  if (unfilledFields.length > 0) {
3942
4352
  onProgress?.(`Batching ${unfilledFields.length} remaining questions...`);
3943
4353
  state.status = "batching";
3944
- const { result: batchResult, usage: batchUsage } = await batchQuestions(
3945
- unfilledFields,
3946
- generateObject,
3947
- providerOptions
3948
- );
3949
- trackUsage(batchUsage);
3950
- state.batches = batchResult.batches;
4354
+ try {
4355
+ const { result: batchResult, usage: batchUsage } = await batchQuestions(
4356
+ unfilledFields,
4357
+ generateObject,
4358
+ providerOptions
4359
+ );
4360
+ trackUsage(batchUsage);
4361
+ state.batches = batchResult.batches;
4362
+ } catch (error) {
4363
+ await log?.(`Batching failed, using single-batch fallback: ${error instanceof Error ? error.message : String(error)}`);
4364
+ state.batches = [unfilledFields.map((f) => f.id)];
4365
+ }
3951
4366
  state.currentBatchIndex = 0;
3952
4367
  state.status = "collecting";
3953
4368
  } else {
@@ -3974,32 +4389,49 @@ function createApplicationPipeline(config) {
3974
4389
  (f) => currentBatchFieldIds.includes(f.id)
3975
4390
  );
3976
4391
  onProgress?.("Classifying reply...");
3977
- const { intent, usage: intentUsage } = await classifyReplyIntent(
3978
- currentBatchFields,
3979
- replyText,
3980
- generateObject,
3981
- providerOptions
3982
- );
3983
- trackUsage(intentUsage);
3984
- let fieldsFilled = 0;
3985
- let responseText;
3986
- if (intent.hasAnswers) {
3987
- onProgress?.("Parsing answers...");
3988
- const { result: parseResult, usage: parseUsage } = await parseAnswers(
4392
+ let intent;
4393
+ try {
4394
+ const { intent: classifiedIntent, usage: intentUsage } = await classifyReplyIntent(
3989
4395
  currentBatchFields,
3990
4396
  replyText,
3991
4397
  generateObject,
3992
4398
  providerOptions
3993
4399
  );
3994
- trackUsage(parseUsage);
3995
- for (const answer of parseResult.answers) {
3996
- const field = state.fields.find((f) => f.id === answer.fieldId);
3997
- if (field) {
3998
- field.value = answer.value;
3999
- field.source = "user";
4000
- field.confidence = "confirmed";
4001
- fieldsFilled++;
4400
+ trackUsage(intentUsage);
4401
+ intent = classifiedIntent;
4402
+ } catch (error) {
4403
+ await log?.(`Reply intent classification failed, defaulting to answers_only: ${error instanceof Error ? error.message : String(error)}`);
4404
+ intent = {
4405
+ primaryIntent: "answers_only",
4406
+ hasAnswers: true,
4407
+ questionText: void 0,
4408
+ questionFieldIds: void 0,
4409
+ lookupRequests: void 0
4410
+ };
4411
+ }
4412
+ let fieldsFilled = 0;
4413
+ let responseText;
4414
+ if (intent.hasAnswers) {
4415
+ onProgress?.("Parsing answers...");
4416
+ try {
4417
+ const { result: parseResult, usage: parseUsage } = await parseAnswers(
4418
+ currentBatchFields,
4419
+ replyText,
4420
+ generateObject,
4421
+ providerOptions
4422
+ );
4423
+ trackUsage(parseUsage);
4424
+ for (const answer of parseResult.answers) {
4425
+ const field = state.fields.find((f) => f.id === answer.fieldId);
4426
+ if (field) {
4427
+ field.value = answer.value;
4428
+ field.source = "user";
4429
+ field.confidence = "confirmed";
4430
+ fieldsFilled++;
4431
+ }
4002
4432
  }
4433
+ } catch (error) {
4434
+ await log?.(`Answer parsing failed: ${error instanceof Error ? error.message : String(error)}`);
4003
4435
  }
4004
4436
  }
4005
4437
  if (intent.lookupRequests?.length) {
@@ -4020,36 +4452,45 @@ function createApplicationPipeline(config) {
4020
4452
  const targetFields = state.fields.filter(
4021
4453
  (f) => intent.lookupRequests.some((lr) => lr.targetFieldIds.includes(f.id))
4022
4454
  );
4023
- const { result: lookupResult, usage: lookupUsage } = await fillFromLookup(
4024
- intent.lookupRequests,
4025
- targetFields,
4026
- availableData,
4027
- generateObject,
4028
- providerOptions
4029
- );
4030
- trackUsage(lookupUsage);
4031
- for (const fill of lookupResult.fills) {
4032
- const field = state.fields.find((f) => f.id === fill.fieldId);
4033
- if (field) {
4034
- field.value = fill.value;
4035
- field.source = `lookup: ${fill.source}`;
4036
- field.confidence = "high";
4037
- fieldsFilled++;
4455
+ try {
4456
+ const { result: lookupResult, usage: lookupUsage } = await fillFromLookup(
4457
+ intent.lookupRequests,
4458
+ targetFields,
4459
+ availableData,
4460
+ generateObject,
4461
+ providerOptions
4462
+ );
4463
+ trackUsage(lookupUsage);
4464
+ for (const fill of lookupResult.fills) {
4465
+ const field = state.fields.find((f) => f.id === fill.fieldId);
4466
+ if (field) {
4467
+ field.value = fill.value;
4468
+ field.source = `lookup: ${fill.source}`;
4469
+ field.confidence = "high";
4470
+ fieldsFilled++;
4471
+ }
4038
4472
  }
4473
+ } catch (error) {
4474
+ await log?.(`Lookup fill failed: ${error instanceof Error ? error.message : String(error)}`);
4039
4475
  }
4040
4476
  }
4041
4477
  }
4042
4478
  if (intent.primaryIntent === "question" || intent.primaryIntent === "mixed") {
4043
4479
  if (intent.questionText) {
4044
- const { text, usage } = await generateText({
4045
- prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
4480
+ try {
4481
+ const { text, usage } = await generateText({
4482
+ prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
4046
4483
 
4047
4484
  Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with the answer when you're ready and I'll fill it in."`,
4048
- maxTokens: 512,
4049
- providerOptions
4050
- });
4051
- trackUsage(usage);
4052
- responseText = text;
4485
+ maxTokens: 512,
4486
+ providerOptions
4487
+ });
4488
+ trackUsage(usage);
4489
+ responseText = text;
4490
+ } catch (error) {
4491
+ await log?.(`Question response generation failed: ${error instanceof Error ? error.message : String(error)}`);
4492
+ responseText = `I wasn't able to generate an explanation for your question. Could you rephrase it, or just provide the answer directly?`;
4493
+ }
4053
4494
  }
4054
4495
  }
4055
4496
  const currentBatchComplete = currentBatchFieldIds.every(
@@ -4063,26 +4504,30 @@ Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with
4063
4504
  (f) => nextBatchFieldIds.includes(f.id)
4064
4505
  );
4065
4506
  const filledCount = state.fields.filter((f) => f.value).length;
4066
- const { text: emailText, usage: emailUsage } = await generateBatchEmail(
4067
- nextBatchFields,
4068
- state.currentBatchIndex,
4069
- state.batches.length,
4070
- {
4071
- appTitle: state.title,
4072
- totalFieldCount: state.fields.length,
4073
- filledFieldCount: filledCount,
4074
- companyName: context?.companyName
4075
- },
4076
- generateText,
4077
- providerOptions
4078
- );
4079
- trackUsage(emailUsage);
4080
- if (!responseText) {
4081
- responseText = emailText;
4082
- } else {
4083
- responseText += `
4507
+ try {
4508
+ const { text: emailText, usage: emailUsage } = await generateBatchEmail(
4509
+ nextBatchFields,
4510
+ state.currentBatchIndex,
4511
+ state.batches.length,
4512
+ {
4513
+ appTitle: state.title,
4514
+ totalFieldCount: state.fields.length,
4515
+ filledFieldCount: filledCount,
4516
+ companyName: context?.companyName
4517
+ },
4518
+ generateText,
4519
+ providerOptions
4520
+ );
4521
+ trackUsage(emailUsage);
4522
+ if (!responseText) {
4523
+ responseText = emailText;
4524
+ } else {
4525
+ responseText += `
4084
4526
 
4085
4527
  ${emailText}`;
4528
+ }
4529
+ } catch (error) {
4530
+ await log?.(`Batch email generation failed: ${error instanceof Error ? error.message : String(error)}`);
4086
4531
  }
4087
4532
  } else {
4088
4533
  state.status = "confirming";
@@ -4291,7 +4736,7 @@ var EvidenceItemSchema = z32.object({
4291
4736
  turnId: z32.string().optional(),
4292
4737
  text: z32.string().describe("Text excerpt from the source"),
4293
4738
  relevance: z32.number().min(0).max(1),
4294
- metadata: z32.record(z32.string(), z32.string()).optional()
4739
+ metadata: z32.array(z32.object({ key: z32.string(), value: z32.string() })).optional()
4295
4740
  });
4296
4741
  var RetrievalResultSchema = z32.object({
4297
4742
  subQuestion: z32.string(),
@@ -4327,6 +4772,9 @@ var QueryResultSchema = z32.object({
4327
4772
  });
4328
4773
 
4329
4774
  // src/query/retriever.ts
4775
+ function recordToKVArray(record) {
4776
+ return Object.entries(record).map(([key, value]) => ({ key, value }));
4777
+ }
4330
4778
  async function retrieve(subQuestion, conversationId, config) {
4331
4779
  const { documentStore, memoryStore, retrievalLimit, log } = config;
4332
4780
  const evidence = [];
@@ -4353,7 +4801,7 @@ async function retrieve(subQuestion, conversationId, config) {
4353
4801
  text: chunk.text,
4354
4802
  relevance: 0.8,
4355
4803
  // Default — store doesn't expose scores directly
4356
- metadata: chunk.metadata
4804
+ metadata: recordToKVArray(chunk.metadata)
4357
4805
  });
4358
4806
  }
4359
4807
  }
@@ -4368,7 +4816,7 @@ async function retrieve(subQuestion, conversationId, config) {
4368
4816
  documentId: chunk.documentId,
4369
4817
  text: chunk.text,
4370
4818
  relevance: 0.8,
4371
- metadata: chunk.metadata
4819
+ metadata: recordToKVArray(chunk.metadata)
4372
4820
  });
4373
4821
  }
4374
4822
  }
@@ -4396,11 +4844,11 @@ async function retrieve(subQuestion, conversationId, config) {
4396
4844
  text: summary,
4397
4845
  relevance: 0.9,
4398
4846
  // Direct lookup is high relevance
4399
- metadata: {
4400
- type: doc.type,
4401
- carrier: doc.carrier ?? "",
4402
- insuredName: doc.insuredName ?? ""
4403
- }
4847
+ metadata: [
4848
+ { key: "type", value: doc.type },
4849
+ { key: "carrier", value: doc.carrier ?? "" },
4850
+ { key: "insuredName", value: doc.insuredName ?? "" }
4851
+ ]
4404
4852
  });
4405
4853
  }
4406
4854
  } catch (e) {
@@ -4635,8 +5083,12 @@ function createQueryAgent(config) {
4635
5083
  async function query(input) {
4636
5084
  totalUsage = { inputTokens: 0, outputTokens: 0 };
4637
5085
  const { question, conversationId, context } = input;
5086
+ const pipelineCtx = createPipelineContext({
5087
+ id: `query-${Date.now()}`
5088
+ });
4638
5089
  onProgress?.("Classifying query...");
4639
5090
  const classification = await classify(question, conversationId);
5091
+ await pipelineCtx.save("classify", { classification });
4640
5092
  onProgress?.(`Retrieving evidence for ${classification.subQuestions.length} sub-question(s)...`);
4641
5093
  const retrieverConfig = {
4642
5094
  documentStore,
@@ -4650,9 +5102,10 @@ function createQueryAgent(config) {
4650
5102
  )
4651
5103
  );
4652
5104
  const allEvidence = retrievalResults.flatMap((r) => r.evidence);
5105
+ await pipelineCtx.save("retrieve", { classification, evidence: allEvidence });
4653
5106
  onProgress?.("Reasoning over evidence...");
4654
5107
  const reasonerConfig = { generateObject, providerOptions };
4655
- let subAnswers = await Promise.all(
5108
+ const reasonResults = await Promise.allSettled(
4656
5109
  classification.subQuestions.map(
4657
5110
  (sq, i) => limit(async () => {
4658
5111
  const { subAnswer, usage } = await reason(
@@ -4666,10 +5119,27 @@ function createQueryAgent(config) {
4666
5119
  })
4667
5120
  )
4668
5121
  );
5122
+ let subAnswers = [];
5123
+ for (let i = 0; i < reasonResults.length; i++) {
5124
+ const result = reasonResults[i];
5125
+ if (result.status === "fulfilled") {
5126
+ subAnswers.push(result.value);
5127
+ } else {
5128
+ await log?.(`Reasoner failed for sub-question "${classification.subQuestions[i].question}": ${result.reason}`);
5129
+ subAnswers.push({
5130
+ subQuestion: classification.subQuestions[i].question,
5131
+ answer: "Unable to answer this part of the question due to a processing error.",
5132
+ citations: [],
5133
+ confidence: 0,
5134
+ needsMoreContext: true
5135
+ });
5136
+ }
5137
+ }
5138
+ await pipelineCtx.save("reason", { classification, evidence: allEvidence, subAnswers });
4669
5139
  onProgress?.("Verifying answer grounding...");
4670
5140
  const verifierConfig = { generateObject, providerOptions };
4671
5141
  for (let round = 0; round < maxVerifyRounds; round++) {
4672
- const { result: verifyResult, usage } = await verify(
5142
+ const { result: verifyResult, usage } = await safeVerify(
4673
5143
  question,
4674
5144
  subAnswers,
4675
5145
  allEvidence,
@@ -4693,7 +5163,6 @@ function createQueryAgent(config) {
4693
5163
  () => retrieve(sq, conversationId, {
4694
5164
  ...retrieverConfig,
4695
5165
  retrievalLimit: retrievalLimit * 2
4696
- // Broader retrieval on retry
4697
5166
  })
4698
5167
  )
4699
5168
  )
@@ -4701,7 +5170,7 @@ function createQueryAgent(config) {
4701
5170
  for (const r of retryRetrievals) {
4702
5171
  allEvidence.push(...r.evidence);
4703
5172
  }
4704
- const retrySubAnswers = await Promise.all(
5173
+ const retrySettled = await Promise.allSettled(
4705
5174
  retryQuestions.map(
4706
5175
  (sq, i) => limit(async () => {
4707
5176
  const { subAnswer, usage: u } = await reason(
@@ -4715,6 +5184,7 @@ function createQueryAgent(config) {
4715
5184
  })
4716
5185
  )
4717
5186
  );
5187
+ const retrySubAnswers = retrySettled.filter((r) => r.status === "fulfilled").map((r) => r.value);
4718
5188
  const retryQSet = new Set(retryQuestions.map((sq) => sq.question));
4719
5189
  subAnswers = subAnswers.map((sa) => {
4720
5190
  if (retryQSet.has(sa.subQuestion)) {
@@ -4767,17 +5237,42 @@ function createQueryAgent(config) {
4767
5237
  }
4768
5238
  }
4769
5239
  const prompt = buildQueryClassifyPrompt(question, conversationContext);
4770
- const { object, usage } = await withRetry(
4771
- () => generateObject({
5240
+ const { object, usage } = await safeGenerateObject(
5241
+ generateObject,
5242
+ {
4772
5243
  prompt,
4773
5244
  schema: QueryClassifyResultSchema,
4774
5245
  maxTokens: 2048,
4775
5246
  providerOptions
4776
- })
5247
+ },
5248
+ {
5249
+ fallback: {
5250
+ intent: "general_knowledge",
5251
+ subQuestions: [
5252
+ {
5253
+ question,
5254
+ intent: "general_knowledge"
5255
+ }
5256
+ ],
5257
+ requiresDocumentLookup: true,
5258
+ requiresChunkSearch: true,
5259
+ requiresConversationHistory: !!conversationId
5260
+ },
5261
+ log,
5262
+ onError: (err, attempt) => log?.(`Query classify attempt ${attempt + 1} failed: ${err}`)
5263
+ }
4777
5264
  );
4778
5265
  trackUsage(usage);
4779
5266
  return object;
4780
5267
  }
5268
+ async function safeVerify(originalQuestion, subAnswers, allEvidence, verifierConfig) {
5269
+ try {
5270
+ return await verify(originalQuestion, subAnswers, allEvidence, verifierConfig);
5271
+ } catch (error) {
5272
+ await log?.(`Verification failed, approving by default: ${error instanceof Error ? error.message : String(error)}`);
5273
+ return { result: { approved: true, issues: [] } };
5274
+ }
5275
+ }
4781
5276
  async function respond(originalQuestion, subAnswers, classification, platform) {
4782
5277
  const subAnswersJson = JSON.stringify(
4783
5278
  subAnswers.map((sa) => ({
@@ -4791,13 +5286,25 @@ function createQueryAgent(config) {
4791
5286
  2
4792
5287
  );
4793
5288
  const prompt = buildRespondPrompt(originalQuestion, subAnswersJson, platform);
4794
- const { object, usage } = await withRetry(
4795
- () => generateObject({
5289
+ const { object, usage } = await safeGenerateObject(
5290
+ generateObject,
5291
+ {
4796
5292
  prompt,
4797
5293
  schema: QueryResultSchema,
4798
5294
  maxTokens: 4096,
4799
5295
  providerOptions
4800
- })
5296
+ },
5297
+ {
5298
+ fallback: {
5299
+ answer: subAnswers.map((sa) => `**${sa.subQuestion}**
5300
+ ${sa.answer}`).join("\n\n"),
5301
+ citations: subAnswers.flatMap((sa) => sa.citations),
5302
+ intent: classification.intent,
5303
+ confidence: Math.min(...subAnswers.map((sa) => sa.confidence), 1)
5304
+ },
5305
+ log,
5306
+ onError: (err, attempt) => log?.(`Respond attempt ${attempt + 1} failed: ${err}`)
5307
+ }
4801
5308
  );
4802
5309
  trackUsage(usage);
4803
5310
  const result = object;
@@ -4962,6 +5469,7 @@ export {
4962
5469
  CommercialAutoDeclarationsSchema,
4963
5470
  CommercialPropertyDeclarationsSchema,
4964
5471
  CommunicationIntentSchema,
5472
+ ConditionKeyValueSchema,
4965
5473
  ConditionTypeSchema,
4966
5474
  ConstructionTypeSchema,
4967
5475
  ContactSchema,
@@ -5128,6 +5636,7 @@ export {
5128
5636
  chunkDocument,
5129
5637
  createApplicationPipeline,
5130
5638
  createExtractor,
5639
+ createPipelineContext,
5131
5640
  createQueryAgent,
5132
5641
  extractPageRange,
5133
5642
  fillAcroForm,
@@ -5137,6 +5646,7 @@ export {
5137
5646
  getTemplate,
5138
5647
  overlayTextOnPdf,
5139
5648
  pLimit,
5649
+ safeGenerateObject,
5140
5650
  sanitizeNulls,
5141
5651
  stripFences,
5142
5652
  withRetry