@claritylabs/cl-sdk 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -71,6 +71,69 @@ function sanitizeNulls(obj) {
71
71
  return obj;
72
72
  }
73
73
 
74
+ // src/core/safe-generate.ts
75
+ async function safeGenerateObject(generateObject, params, options) {
76
+ const maxRetries = options?.maxRetries ?? 1;
77
+ let lastError;
78
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
79
+ try {
80
+ const result = await withRetry(
81
+ () => generateObject(params),
82
+ options?.log
83
+ );
84
+ return result;
85
+ } catch (error) {
86
+ lastError = error;
87
+ options?.onError?.(error, attempt);
88
+ await options?.log?.(
89
+ `safeGenerateObject attempt ${attempt + 1}/${maxRetries + 1} failed: ${error instanceof Error ? error.message : String(error)}`
90
+ );
91
+ if (attempt < maxRetries) {
92
+ await new Promise((resolve) => setTimeout(resolve, 1e3));
93
+ }
94
+ }
95
+ }
96
+ if (options?.fallback !== void 0) {
97
+ await options?.log?.(
98
+ `safeGenerateObject: all retries exhausted, returning fallback`
99
+ );
100
+ return { object: options.fallback };
101
+ }
102
+ throw lastError;
103
+ }
104
+
105
+ // src/core/pipeline.ts
106
+ function createPipelineContext(opts) {
107
+ let latest = opts.resumeFrom;
108
+ const completedPhases = /* @__PURE__ */ new Set();
109
+ if (opts.resumeFrom) {
110
+ completedPhases.add(opts.resumeFrom.phase);
111
+ }
112
+ return {
113
+ id: opts.id,
114
+ async save(phase, state) {
115
+ const checkpoint = {
116
+ phase,
117
+ state,
118
+ timestamp: Date.now()
119
+ };
120
+ latest = checkpoint;
121
+ completedPhases.add(phase);
122
+ await opts.onSave?.(checkpoint);
123
+ },
124
+ getCheckpoint() {
125
+ return latest;
126
+ },
127
+ isPhaseComplete(phase) {
128
+ return completedPhases.has(phase);
129
+ },
130
+ clear() {
131
+ latest = void 0;
132
+ completedPhases.clear();
133
+ }
134
+ };
135
+ }
136
+
74
137
  // src/schemas/enums.ts
75
138
  import { z } from "zod";
76
139
  var PolicyTypeSchema = z.enum([
@@ -471,11 +534,15 @@ var ExclusionSchema = z5.object({
471
534
 
472
535
  // src/schemas/condition.ts
473
536
  import { z as z6 } from "zod";
537
+ var ConditionKeyValueSchema = z6.object({
538
+ key: z6.string(),
539
+ value: z6.string()
540
+ });
474
541
  var PolicyConditionSchema = z6.object({
475
542
  name: z6.string(),
476
543
  conditionType: ConditionTypeSchema,
477
544
  content: z6.string(),
478
- keyValues: z6.record(z6.string(), z6.string()).optional(),
545
+ keyValues: z6.array(ConditionKeyValueSchema).optional(),
479
546
  pageNumber: z6.number().optional()
480
547
  });
481
548
 
@@ -1652,21 +1719,33 @@ async function formatDocumentContent(doc, generateText, options) {
1652
1719
  for (let i = 0; i < entries.length; i += MAX_ENTRIES_PER_BATCH) {
1653
1720
  batches.push(entries.slice(i, i + MAX_ENTRIES_PER_BATCH));
1654
1721
  }
1655
- for (const batch of batches) {
1656
- const prompt = buildFormatPrompt(batch.map((e) => ({ id: e.id, text: e.text })));
1657
- const result = await withRetry(
1658
- () => generateText({
1659
- prompt,
1660
- maxTokens: 16384,
1661
- providerOptions: options?.providerOptions
1662
- })
1663
- );
1664
- if (result.usage) {
1665
- totalUsage.inputTokens += result.usage.inputTokens;
1666
- totalUsage.outputTokens += result.usage.outputTokens;
1722
+ for (let batchIdx = 0; batchIdx < batches.length; batchIdx++) {
1723
+ const batch = batches[batchIdx];
1724
+ try {
1725
+ const prompt = buildFormatPrompt(batch.map((e) => ({ id: e.id, text: e.text })));
1726
+ const result = await withRetry(
1727
+ () => generateText({
1728
+ prompt,
1729
+ maxTokens: 16384,
1730
+ providerOptions: options?.providerOptions
1731
+ })
1732
+ );
1733
+ if (result.usage) {
1734
+ totalUsage.inputTokens += result.usage.inputTokens;
1735
+ totalUsage.outputTokens += result.usage.outputTokens;
1736
+ }
1737
+ const formatted = parseFormatResponse(result.text);
1738
+ if (formatted.size < batch.length) {
1739
+ await options?.log?.(
1740
+ `Format batch ${batchIdx + 1}/${batches.length}: model returned ${formatted.size}/${batch.length} entries \u2014 unformatted entries will keep original content`
1741
+ );
1742
+ }
1743
+ applyFormattedContent(doc, batch, formatted);
1744
+ } catch (error) {
1745
+ await options?.log?.(
1746
+ `Format batch ${batchIdx + 1}/${batches.length} failed, keeping original content: ${error instanceof Error ? error.message : String(error)}`
1747
+ );
1667
1748
  }
1668
- const formatted = parseFormatResponse(result.text);
1669
- applyFormattedContent(doc, batch, formatted);
1670
1749
  }
1671
1750
  return { document: doc, usage: totalUsage };
1672
1751
  }
@@ -2507,9 +2586,13 @@ var ExtractionTaskSchema = z18.object({
2507
2586
  endPage: z18.number(),
2508
2587
  description: z18.string()
2509
2588
  });
2589
+ var PageMapEntrySchema = z18.object({
2590
+ section: z18.string(),
2591
+ pages: z18.string()
2592
+ });
2510
2593
  var ExtractionPlanSchema = z18.object({
2511
2594
  tasks: z18.array(ExtractionTaskSchema),
2512
- pageMap: z18.record(z18.string(), z18.string()).optional()
2595
+ pageMap: z18.array(PageMapEntrySchema).optional()
2513
2596
  });
2514
2597
  function buildPlanPrompt(templateHints) {
2515
2598
  return `You are planning the extraction of an insurance document. You have already classified this document. Now scan the full document and create a page map + extraction plan.
@@ -2538,7 +2621,10 @@ Return JSON:
2538
2621
  { "extractorName": "carrier_info", "startPage": 1, "endPage": 2, "description": "Extract carrier details from declarations page" },
2539
2622
  ...
2540
2623
  ],
2541
- "pageMap": { "declarations": "pages 1-3", "endorsements": "pages 15-22", ... }
2624
+ "pageMap": [
2625
+ { "section": "declarations", "pages": "pages 1-3" },
2626
+ { "section": "endorsements", "pages": "pages 15-22" }
2627
+ ]
2542
2628
  }
2543
2629
 
2544
2630
  Create tasks that cover the entire document. Prefer specific extractors over generic "sections" where possible. Keep page ranges tight \u2014 only include pages relevant to each extractor.
@@ -2855,9 +2941,14 @@ Return JSON only.`;
2855
2941
 
2856
2942
  // src/prompts/extractors/declarations.ts
2857
2943
  import { z as z27 } from "zod";
2858
- var DeclarationsExtractSchema = z27.record(z27.string(), z27.unknown()).describe(
2859
- "Flexible declarations data \u2014 structure varies by line of business. Keys are descriptive field names, values are the extracted data."
2860
- );
2944
+ var DeclarationsFieldSchema = z27.object({
2945
+ field: z27.string().describe("Descriptive field name (e.g. 'policyNumber', 'effectiveDate', 'coverageALimit')"),
2946
+ value: z27.string().describe("Extracted value exactly as it appears in the document"),
2947
+ section: z27.string().optional().describe("Section or grouping this field belongs to (e.g. 'Coverage Limits', 'Vehicle Schedule')")
2948
+ });
2949
+ var DeclarationsExtractSchema = z27.object({
2950
+ fields: z27.array(DeclarationsFieldSchema).describe("All declarations page fields extracted as key-value pairs. Structure varies by line of business.")
2951
+ });
2861
2952
  function buildDeclarationsPrompt() {
2862
2953
  return `You are an expert insurance document analyst. Extract all declarations page data from this document into a flexible key-value structure.
2863
2954
 
@@ -2881,9 +2972,18 @@ For PERSONAL LINES declarations:
2881
2972
  - Flood (NFIP): flood zone, community number, building/contents coverage
2882
2973
  - Personal Articles: scheduled items list with appraised values
2883
2974
 
2884
- Use descriptive field names as keys. Preserve original values exactly as they appear.
2975
+ Return each field as an object with "field" (descriptive name), "value" (exact text from document), and optional "section" (grouping).
2885
2976
 
2886
- Return JSON only.`;
2977
+ Example output:
2978
+ {
2979
+ "fields": [
2980
+ { "field": "policyNumber", "value": "GL-2025-78432", "section": "Policy Info" },
2981
+ { "field": "effectiveDate", "value": "04/10/2025", "section": "Policy Info" },
2982
+ { "field": "eachOccurrenceLimit", "value": "$1,000,000", "section": "Coverage Limits" }
2983
+ ]
2984
+ }
2985
+
2986
+ Preserve original values exactly as they appear. Return JSON only.`;
2887
2987
  }
2888
2988
 
2889
2989
  // src/prompts/extractors/loss-history.ts
@@ -3031,7 +3131,8 @@ function createExtractor(config) {
3031
3131
  onTokenUsage,
3032
3132
  onProgress,
3033
3133
  log,
3034
- providerOptions
3134
+ providerOptions,
3135
+ onCheckpointSave
3035
3136
  } = config;
3036
3137
  const limit = pLimit(concurrency);
3037
3138
  let totalUsage = { inputTokens: 0, outputTokens: 0 };
@@ -3042,100 +3143,106 @@ function createExtractor(config) {
3042
3143
  onTokenUsage?.(usage);
3043
3144
  }
3044
3145
  }
3045
- async function extract(pdfBase64, documentId) {
3146
+ async function extract(pdfBase64, documentId, options) {
3046
3147
  const id = documentId ?? `doc-${Date.now()}`;
3047
3148
  const memory = /* @__PURE__ */ new Map();
3048
3149
  totalUsage = { inputTokens: 0, outputTokens: 0 };
3049
- onProgress?.("Classifying document...");
3050
- const pageCount = await getPdfPageCount(pdfBase64);
3051
- const classifyResult = await withRetry(
3052
- () => generateObject({
3053
- prompt: buildClassifyPrompt(),
3054
- schema: ClassifyResultSchema,
3055
- maxTokens: 512,
3056
- providerOptions
3057
- })
3058
- );
3059
- trackUsage(classifyResult.usage);
3060
- memory.set("classify", classifyResult.object);
3061
- const { documentType, policyTypes } = classifyResult.object;
3062
- const primaryType = policyTypes[0] ?? "other";
3063
- const template = getTemplate(primaryType);
3064
- onProgress?.(`Planning extraction for ${primaryType} ${documentType}...`);
3065
- const templateHints = [
3066
- `Document type: ${primaryType} ${documentType}`,
3067
- `Expected sections: ${template.expectedSections.join(", ")}`,
3068
- `Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
3069
- `Total pages: ${pageCount}`
3070
- ].join("\n");
3071
- const planResult = await withRetry(
3072
- () => generateObject({
3073
- prompt: buildPlanPrompt(templateHints),
3074
- schema: ExtractionPlanSchema,
3075
- maxTokens: 2048,
3076
- providerOptions
3077
- })
3078
- );
3079
- trackUsage(planResult.usage);
3080
- const tasks = planResult.object.tasks;
3081
- onProgress?.(`Dispatching ${tasks.length} extractors...`);
3082
- const extractorResults = await Promise.all(
3083
- tasks.map(
3084
- (task) => limit(async () => {
3085
- const ext = getExtractor(task.extractorName);
3086
- if (!ext) {
3087
- await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
3088
- return null;
3089
- }
3090
- onProgress?.(`Extracting ${task.extractorName} (pages ${task.startPage}-${task.endPage})...`);
3091
- try {
3092
- const result = await runExtractor({
3093
- name: task.extractorName,
3094
- prompt: ext.buildPrompt(),
3095
- schema: ext.schema,
3096
- pdfBase64,
3097
- startPage: task.startPage,
3098
- endPage: task.endPage,
3099
- generateObject,
3100
- convertPdfToImages,
3101
- maxTokens: ext.maxTokens ?? 4096,
3102
- providerOptions
3103
- });
3104
- trackUsage(result.usage);
3105
- return result;
3106
- } catch (error) {
3107
- await log?.(`Extractor ${task.extractorName} failed: ${error}`);
3108
- return null;
3109
- }
3110
- })
3111
- )
3112
- );
3113
- for (const result of extractorResults) {
3114
- if (result) {
3115
- memory.set(result.name, result.data);
3150
+ const pipelineCtx = createPipelineContext({
3151
+ id,
3152
+ onSave: onCheckpointSave,
3153
+ resumeFrom: options?.resumeFrom
3154
+ });
3155
+ const resumed = pipelineCtx.getCheckpoint()?.state;
3156
+ if (resumed?.memory) {
3157
+ for (const [k, v] of Object.entries(resumed.memory)) {
3158
+ memory.set(k, v);
3116
3159
  }
3117
3160
  }
3118
- for (let round = 0; round < maxReviewRounds; round++) {
3119
- const extractedKeys = [...memory.keys()].filter((k) => k !== "classify");
3120
- const reviewResult = await withRetry(
3121
- () => generateObject({
3122
- prompt: buildReviewPrompt(template.required, extractedKeys),
3123
- schema: ReviewResultSchema,
3124
- maxTokens: 1024,
3161
+ let classifyResult;
3162
+ if (resumed?.classifyResult && pipelineCtx.isPhaseComplete("classify")) {
3163
+ classifyResult = resumed.classifyResult;
3164
+ onProgress?.("Resuming from checkpoint (classify complete)...");
3165
+ } else {
3166
+ onProgress?.("Classifying document...");
3167
+ const pageCount2 = await getPdfPageCount(pdfBase64);
3168
+ const classifyResponse = await safeGenerateObject(
3169
+ generateObject,
3170
+ {
3171
+ prompt: buildClassifyPrompt(),
3172
+ schema: ClassifyResultSchema,
3173
+ maxTokens: 512,
3125
3174
  providerOptions
3126
- })
3175
+ },
3176
+ {
3177
+ fallback: { documentType: "policy", policyTypes: ["other"], confidence: 0 },
3178
+ log,
3179
+ onError: (err, attempt) => log?.(`Classify attempt ${attempt + 1} failed: ${err}`)
3180
+ }
3127
3181
  );
3128
- trackUsage(reviewResult.usage);
3129
- if (reviewResult.object.complete || reviewResult.object.additionalTasks.length === 0) {
3130
- onProgress?.("Extraction complete.");
3131
- break;
3132
- }
3133
- onProgress?.(`Review round ${round + 1}: dispatching ${reviewResult.object.additionalTasks.length} follow-up extractors...`);
3134
- const followUpResults = await Promise.all(
3135
- reviewResult.object.additionalTasks.map(
3182
+ trackUsage(classifyResponse.usage);
3183
+ classifyResult = classifyResponse.object;
3184
+ memory.set("classify", classifyResult);
3185
+ await pipelineCtx.save("classify", {
3186
+ id,
3187
+ pageCount: pageCount2,
3188
+ classifyResult,
3189
+ memory: Object.fromEntries(memory)
3190
+ });
3191
+ }
3192
+ const { documentType, policyTypes } = classifyResult;
3193
+ const primaryType = policyTypes[0] ?? "other";
3194
+ const template = getTemplate(primaryType);
3195
+ const pageCount = resumed?.pageCount ?? await getPdfPageCount(pdfBase64);
3196
+ let plan;
3197
+ if (resumed?.plan && pipelineCtx.isPhaseComplete("plan")) {
3198
+ plan = resumed.plan;
3199
+ onProgress?.("Resuming from checkpoint (plan complete)...");
3200
+ } else {
3201
+ onProgress?.(`Planning extraction for ${primaryType} ${documentType}...`);
3202
+ const templateHints = [
3203
+ `Document type: ${primaryType} ${documentType}`,
3204
+ `Expected sections: ${template.expectedSections.join(", ")}`,
3205
+ `Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
3206
+ `Total pages: ${pageCount}`
3207
+ ].join("\n");
3208
+ const planResponse = await safeGenerateObject(
3209
+ generateObject,
3210
+ {
3211
+ prompt: buildPlanPrompt(templateHints),
3212
+ schema: ExtractionPlanSchema,
3213
+ maxTokens: 2048,
3214
+ providerOptions
3215
+ },
3216
+ {
3217
+ fallback: {
3218
+ tasks: [{ extractorName: "sections", startPage: 1, endPage: pageCount, description: "Full document fallback extraction" }]
3219
+ },
3220
+ log,
3221
+ onError: (err, attempt) => log?.(`Plan attempt ${attempt + 1} failed: ${err}`)
3222
+ }
3223
+ );
3224
+ trackUsage(planResponse.usage);
3225
+ plan = planResponse.object;
3226
+ await pipelineCtx.save("plan", {
3227
+ id,
3228
+ pageCount,
3229
+ classifyResult,
3230
+ plan,
3231
+ memory: Object.fromEntries(memory)
3232
+ });
3233
+ }
3234
+ if (!pipelineCtx.isPhaseComplete("extract")) {
3235
+ const tasks = plan.tasks;
3236
+ onProgress?.(`Dispatching ${tasks.length} extractors...`);
3237
+ const extractorResults = await Promise.all(
3238
+ tasks.map(
3136
3239
  (task) => limit(async () => {
3137
3240
  const ext = getExtractor(task.extractorName);
3138
- if (!ext) return null;
3241
+ if (!ext) {
3242
+ await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
3243
+ return null;
3244
+ }
3245
+ onProgress?.(`Extracting ${task.extractorName} (pages ${task.startPage}-${task.endPage})...`);
3139
3246
  try {
3140
3247
  const result = await runExtractor({
3141
3248
  name: task.extractorName,
@@ -3152,28 +3259,114 @@ function createExtractor(config) {
3152
3259
  trackUsage(result.usage);
3153
3260
  return result;
3154
3261
  } catch (error) {
3155
- await log?.(`Follow-up extractor ${task.extractorName} failed: ${error}`);
3262
+ await log?.(`Extractor ${task.extractorName} failed: ${error}`);
3156
3263
  return null;
3157
3264
  }
3158
3265
  })
3159
3266
  )
3160
3267
  );
3161
- for (const result of followUpResults) {
3268
+ for (const result of extractorResults) {
3162
3269
  if (result) {
3163
3270
  memory.set(result.name, result.data);
3164
3271
  }
3165
3272
  }
3273
+ await pipelineCtx.save("extract", {
3274
+ id,
3275
+ pageCount,
3276
+ classifyResult,
3277
+ plan,
3278
+ memory: Object.fromEntries(memory)
3279
+ });
3280
+ }
3281
+ if (!pipelineCtx.isPhaseComplete("review")) {
3282
+ for (let round = 0; round < maxReviewRounds; round++) {
3283
+ const extractedKeys = [...memory.keys()].filter((k) => k !== "classify");
3284
+ const reviewResponse = await safeGenerateObject(
3285
+ generateObject,
3286
+ {
3287
+ prompt: buildReviewPrompt(template.required, extractedKeys),
3288
+ schema: ReviewResultSchema,
3289
+ maxTokens: 1024,
3290
+ providerOptions
3291
+ },
3292
+ {
3293
+ fallback: { complete: true, missingFields: [], additionalTasks: [] },
3294
+ log,
3295
+ onError: (err, attempt) => log?.(`Review round ${round + 1} attempt ${attempt + 1} failed: ${err}`)
3296
+ }
3297
+ );
3298
+ trackUsage(reviewResponse.usage);
3299
+ if (reviewResponse.object.complete || reviewResponse.object.additionalTasks.length === 0) {
3300
+ onProgress?.("Extraction complete.");
3301
+ break;
3302
+ }
3303
+ onProgress?.(`Review round ${round + 1}: dispatching ${reviewResponse.object.additionalTasks.length} follow-up extractors...`);
3304
+ const followUpResults = await Promise.all(
3305
+ reviewResponse.object.additionalTasks.map(
3306
+ (task) => limit(async () => {
3307
+ const ext = getExtractor(task.extractorName);
3308
+ if (!ext) return null;
3309
+ try {
3310
+ const result = await runExtractor({
3311
+ name: task.extractorName,
3312
+ prompt: ext.buildPrompt(),
3313
+ schema: ext.schema,
3314
+ pdfBase64,
3315
+ startPage: task.startPage,
3316
+ endPage: task.endPage,
3317
+ generateObject,
3318
+ convertPdfToImages,
3319
+ maxTokens: ext.maxTokens ?? 4096,
3320
+ providerOptions
3321
+ });
3322
+ trackUsage(result.usage);
3323
+ return result;
3324
+ } catch (error) {
3325
+ await log?.(`Follow-up extractor ${task.extractorName} failed: ${error}`);
3326
+ return null;
3327
+ }
3328
+ })
3329
+ )
3330
+ );
3331
+ for (const result of followUpResults) {
3332
+ if (result) {
3333
+ memory.set(result.name, result.data);
3334
+ }
3335
+ }
3336
+ }
3337
+ await pipelineCtx.save("review", {
3338
+ id,
3339
+ pageCount,
3340
+ classifyResult,
3341
+ plan,
3342
+ memory: Object.fromEntries(memory)
3343
+ });
3166
3344
  }
3167
3345
  onProgress?.("Assembling document...");
3168
3346
  const document = assembleDocument(id, documentType, memory);
3347
+ await pipelineCtx.save("assemble", {
3348
+ id,
3349
+ pageCount,
3350
+ classifyResult,
3351
+ plan,
3352
+ memory: Object.fromEntries(memory),
3353
+ document
3354
+ });
3169
3355
  onProgress?.("Formatting extracted content...");
3170
3356
  const formatResult = await formatDocumentContent(document, generateText, {
3171
3357
  providerOptions,
3172
- onProgress
3358
+ onProgress,
3359
+ log
3173
3360
  });
3174
3361
  trackUsage(formatResult.usage);
3175
3362
  const chunks = chunkDocument(formatResult.document);
3176
- return { document: formatResult.document, chunks, tokenUsage: totalUsage };
3363
+ const finalCheckpoint = pipelineCtx.getCheckpoint();
3364
+ return {
3365
+ document: formatResult.document,
3366
+ chunks,
3367
+ tokenUsage: totalUsage,
3368
+ checkpoint: finalCheckpoint
3369
+ };
3177
3370
  }
3178
3371
  return { extract };
3179
3372
  }
@@ -4036,7 +4229,6 @@ function createApplicationPipeline(config) {
4036
4229
  let state = {
4037
4230
  id,
4038
4231
  pdfBase64: void 0,
4039
- // Don't persist the full PDF in state
4040
4232
  title: void 0,
4041
4233
  applicationType: null,
4042
4234
  fields: [],
@@ -4047,13 +4239,20 @@ function createApplicationPipeline(config) {
4047
4239
  updatedAt: now
4048
4240
  };
4049
4241
  onProgress?.("Classifying document...");
4050
- const { result: classifyResult, usage: classifyUsage } = await classifyApplication(
4051
- pdfBase64.slice(0, 2e3),
4052
- // Send truncated content for classification
4053
- generateObject,
4054
- providerOptions
4055
- );
4056
- trackUsage(classifyUsage);
4242
+ await applicationStore?.save(state);
4243
+ let classifyResult;
4244
+ try {
4245
+ const { result, usage: classifyUsage } = await classifyApplication(
4246
+ pdfBase64.slice(0, 2e3),
4247
+ generateObject,
4248
+ providerOptions
4249
+ );
4250
+ trackUsage(classifyUsage);
4251
+ classifyResult = result;
4252
+ } catch (error) {
4253
+ await log?.(`Classification failed, treating as non-application: ${error instanceof Error ? error.message : String(error)}`);
4254
+ classifyResult = { isApplication: false, confidence: 0, applicationType: null };
4255
+ }
4057
4256
  if (!classifyResult.isApplication) {
4058
4257
  state.status = "complete";
4059
4258
  state.updatedAt = Date.now();
@@ -4063,13 +4262,28 @@ function createApplicationPipeline(config) {
4063
4262
  state.applicationType = classifyResult.applicationType;
4064
4263
  state.status = "extracting";
4065
4264
  state.updatedAt = Date.now();
4265
+ await applicationStore?.save(state);
4066
4266
  onProgress?.("Extracting form fields...");
4067
- const { fields, usage: extractUsage } = await extractFields(
4068
- pdfBase64,
4069
- generateObject,
4070
- providerOptions
4071
- );
4072
- trackUsage(extractUsage);
4267
+ let fields;
4268
+ try {
4269
+ const { fields: extractedFields, usage: extractUsage } = await extractFields(
4270
+ pdfBase64,
4271
+ generateObject,
4272
+ providerOptions
4273
+ );
4274
+ trackUsage(extractUsage);
4275
+ fields = extractedFields;
4276
+ } catch (error) {
4277
+ await log?.(`Field extraction failed: ${error instanceof Error ? error.message : String(error)}`);
4278
+ fields = [];
4279
+ }
4280
+ if (fields.length === 0) {
4281
+ await log?.("No fields extracted, completing pipeline with empty result");
4282
+ state.status = "complete";
4283
+ state.updatedAt = Date.now();
4284
+ await applicationStore?.save(state);
4285
+ return { state, tokenUsage: totalUsage };
4286
+ }
4073
4287
  state.fields = fields;
4074
4288
  state.title = classifyResult.applicationType ?? void 0;
4075
4289
  state.status = "auto_filling";
@@ -4101,20 +4315,24 @@ function createApplicationPipeline(config) {
4101
4315
  limit(async () => {
4102
4316
  const unfilledFields2 = state.fields.filter((f) => !f.value);
4103
4317
  if (unfilledFields2.length === 0) return;
4104
- const { result: autoFillResult, usage: afUsage } = await autoFillFromContext(
4105
- unfilledFields2,
4106
- orgContext,
4107
- generateObject,
4108
- providerOptions
4109
- );
4110
- trackUsage(afUsage);
4111
- for (const match of autoFillResult.matches) {
4112
- const field = state.fields.find((f) => f.id === match.fieldId);
4113
- if (field && !field.value) {
4114
- field.value = match.value;
4115
- field.source = `auto-fill: ${match.contextKey}`;
4116
- field.confidence = match.confidence;
4318
+ try {
4319
+ const { result: autoFillResult, usage: afUsage } = await autoFillFromContext(
4320
+ unfilledFields2,
4321
+ orgContext,
4322
+ generateObject,
4323
+ providerOptions
4324
+ );
4325
+ trackUsage(afUsage);
4326
+ for (const match of autoFillResult.matches) {
4327
+ const field = state.fields.find((f) => f.id === match.fieldId);
4328
+ if (field && !field.value) {
4329
+ field.value = match.value;
4330
+ field.source = `auto-fill: ${match.contextKey}`;
4331
+ field.confidence = match.confidence;
4332
+ }
4117
4333
  }
4334
+ } catch (e) {
4335
+ await log?.(`Auto-fill from context failed: ${e instanceof Error ? e.message : String(e)}`);
4118
4336
  }
4119
4337
  })
4120
4338
  );
@@ -4147,13 +4365,18 @@ function createApplicationPipeline(config) {
4147
4365
  if (unfilledFields.length > 0) {
4148
4366
  onProgress?.(`Batching ${unfilledFields.length} remaining questions...`);
4149
4367
  state.status = "batching";
4150
- const { result: batchResult, usage: batchUsage } = await batchQuestions(
4151
- unfilledFields,
4152
- generateObject,
4153
- providerOptions
4154
- );
4155
- trackUsage(batchUsage);
4156
- state.batches = batchResult.batches;
4368
+ try {
4369
+ const { result: batchResult, usage: batchUsage } = await batchQuestions(
4370
+ unfilledFields,
4371
+ generateObject,
4372
+ providerOptions
4373
+ );
4374
+ trackUsage(batchUsage);
4375
+ state.batches = batchResult.batches;
4376
+ } catch (error) {
4377
+ await log?.(`Batching failed, using single-batch fallback: ${error instanceof Error ? error.message : String(error)}`);
4378
+ state.batches = [unfilledFields.map((f) => f.id)];
4379
+ }
4157
4380
  state.currentBatchIndex = 0;
4158
4381
  state.status = "collecting";
4159
4382
  } else {
@@ -4180,32 +4403,49 @@ function createApplicationPipeline(config) {
4180
4403
  (f) => currentBatchFieldIds.includes(f.id)
4181
4404
  );
4182
4405
  onProgress?.("Classifying reply...");
4183
- const { intent, usage: intentUsage } = await classifyReplyIntent(
4184
- currentBatchFields,
4185
- replyText,
4186
- generateObject,
4187
- providerOptions
4188
- );
4189
- trackUsage(intentUsage);
4190
- let fieldsFilled = 0;
4191
- let responseText;
4192
- if (intent.hasAnswers) {
4193
- onProgress?.("Parsing answers...");
4194
- const { result: parseResult, usage: parseUsage } = await parseAnswers(
4406
+ let intent;
4407
+ try {
4408
+ const { intent: classifiedIntent, usage: intentUsage } = await classifyReplyIntent(
4195
4409
  currentBatchFields,
4196
4410
  replyText,
4197
4411
  generateObject,
4198
4412
  providerOptions
4199
4413
  );
4200
- trackUsage(parseUsage);
4201
- for (const answer of parseResult.answers) {
4202
- const field = state.fields.find((f) => f.id === answer.fieldId);
4203
- if (field) {
4204
- field.value = answer.value;
4205
- field.source = "user";
4206
- field.confidence = "confirmed";
4207
- fieldsFilled++;
4414
+ trackUsage(intentUsage);
4415
+ intent = classifiedIntent;
4416
+ } catch (error) {
4417
+ await log?.(`Reply intent classification failed, defaulting to answers_only: ${error instanceof Error ? error.message : String(error)}`);
4418
+ intent = {
4419
+ primaryIntent: "answers_only",
4420
+ hasAnswers: true,
4421
+ questionText: void 0,
4422
+ questionFieldIds: void 0,
4423
+ lookupRequests: void 0
4424
+ };
4425
+ }
4426
+ let fieldsFilled = 0;
4427
+ let responseText;
4428
+ if (intent.hasAnswers) {
4429
+ onProgress?.("Parsing answers...");
4430
+ try {
4431
+ const { result: parseResult, usage: parseUsage } = await parseAnswers(
4432
+ currentBatchFields,
4433
+ replyText,
4434
+ generateObject,
4435
+ providerOptions
4436
+ );
4437
+ trackUsage(parseUsage);
4438
+ for (const answer of parseResult.answers) {
4439
+ const field = state.fields.find((f) => f.id === answer.fieldId);
4440
+ if (field) {
4441
+ field.value = answer.value;
4442
+ field.source = "user";
4443
+ field.confidence = "confirmed";
4444
+ fieldsFilled++;
4445
+ }
4208
4446
  }
4447
+ } catch (error) {
4448
+ await log?.(`Answer parsing failed: ${error instanceof Error ? error.message : String(error)}`);
4209
4449
  }
4210
4450
  }
4211
4451
  if (intent.lookupRequests?.length) {
@@ -4226,36 +4466,45 @@ function createApplicationPipeline(config) {
4226
4466
  const targetFields = state.fields.filter(
4227
4467
  (f) => intent.lookupRequests.some((lr) => lr.targetFieldIds.includes(f.id))
4228
4468
  );
4229
- const { result: lookupResult, usage: lookupUsage } = await fillFromLookup(
4230
- intent.lookupRequests,
4231
- targetFields,
4232
- availableData,
4233
- generateObject,
4234
- providerOptions
4235
- );
4236
- trackUsage(lookupUsage);
4237
- for (const fill of lookupResult.fills) {
4238
- const field = state.fields.find((f) => f.id === fill.fieldId);
4239
- if (field) {
4240
- field.value = fill.value;
4241
- field.source = `lookup: ${fill.source}`;
4242
- field.confidence = "high";
4243
- fieldsFilled++;
4469
+ try {
4470
+ const { result: lookupResult, usage: lookupUsage } = await fillFromLookup(
4471
+ intent.lookupRequests,
4472
+ targetFields,
4473
+ availableData,
4474
+ generateObject,
4475
+ providerOptions
4476
+ );
4477
+ trackUsage(lookupUsage);
4478
+ for (const fill of lookupResult.fills) {
4479
+ const field = state.fields.find((f) => f.id === fill.fieldId);
4480
+ if (field) {
4481
+ field.value = fill.value;
4482
+ field.source = `lookup: ${fill.source}`;
4483
+ field.confidence = "high";
4484
+ fieldsFilled++;
4485
+ }
4244
4486
  }
4487
+ } catch (error) {
4488
+ await log?.(`Lookup fill failed: ${error instanceof Error ? error.message : String(error)}`);
4245
4489
  }
4246
4490
  }
4247
4491
  }
4248
4492
  if (intent.primaryIntent === "question" || intent.primaryIntent === "mixed") {
4249
4493
  if (intent.questionText) {
4250
- const { text, usage } = await generateText({
4251
- prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
4494
+ try {
4495
+ const { text, usage } = await generateText({
4496
+ prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
4252
4497
 
4253
4498
  Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with the answer when you're ready and I'll fill it in."`,
4254
- maxTokens: 512,
4255
- providerOptions
4256
- });
4257
- trackUsage(usage);
4258
- responseText = text;
4499
+ maxTokens: 512,
4500
+ providerOptions
4501
+ });
4502
+ trackUsage(usage);
4503
+ responseText = text;
4504
+ } catch (error) {
4505
+ await log?.(`Question response generation failed: ${error instanceof Error ? error.message : String(error)}`);
4506
+ responseText = `I wasn't able to generate an explanation for your question. Could you rephrase it, or just provide the answer directly?`;
4507
+ }
4259
4508
  }
4260
4509
  }
4261
4510
  const currentBatchComplete = currentBatchFieldIds.every(
@@ -4269,26 +4518,30 @@ Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with
4269
4518
  (f) => nextBatchFieldIds.includes(f.id)
4270
4519
  );
4271
4520
  const filledCount = state.fields.filter((f) => f.value).length;
4272
- const { text: emailText, usage: emailUsage } = await generateBatchEmail(
4273
- nextBatchFields,
4274
- state.currentBatchIndex,
4275
- state.batches.length,
4276
- {
4277
- appTitle: state.title,
4278
- totalFieldCount: state.fields.length,
4279
- filledFieldCount: filledCount,
4280
- companyName: context?.companyName
4281
- },
4282
- generateText,
4283
- providerOptions
4284
- );
4285
- trackUsage(emailUsage);
4286
- if (!responseText) {
4287
- responseText = emailText;
4288
- } else {
4289
- responseText += `
4521
+ try {
4522
+ const { text: emailText, usage: emailUsage } = await generateBatchEmail(
4523
+ nextBatchFields,
4524
+ state.currentBatchIndex,
4525
+ state.batches.length,
4526
+ {
4527
+ appTitle: state.title,
4528
+ totalFieldCount: state.fields.length,
4529
+ filledFieldCount: filledCount,
4530
+ companyName: context?.companyName
4531
+ },
4532
+ generateText,
4533
+ providerOptions
4534
+ );
4535
+ trackUsage(emailUsage);
4536
+ if (!responseText) {
4537
+ responseText = emailText;
4538
+ } else {
4539
+ responseText += `
4290
4540
 
4291
4541
  ${emailText}`;
4542
+ }
4543
+ } catch (error) {
4544
+ await log?.(`Batch email generation failed: ${error instanceof Error ? error.message : String(error)}`);
4292
4545
  }
4293
4546
  } else {
4294
4547
  state.status = "confirming";
@@ -4497,7 +4750,7 @@ var EvidenceItemSchema = z32.object({
4497
4750
  turnId: z32.string().optional(),
4498
4751
  text: z32.string().describe("Text excerpt from the source"),
4499
4752
  relevance: z32.number().min(0).max(1),
4500
- metadata: z32.record(z32.string(), z32.string()).optional()
4753
+ metadata: z32.array(z32.object({ key: z32.string(), value: z32.string() })).optional()
4501
4754
  });
4502
4755
  var RetrievalResultSchema = z32.object({
4503
4756
  subQuestion: z32.string(),
@@ -4533,6 +4786,9 @@ var QueryResultSchema = z32.object({
4533
4786
  });
4534
4787
 
4535
4788
  // src/query/retriever.ts
4789
+ function recordToKVArray(record) {
4790
+ return Object.entries(record).map(([key, value]) => ({ key, value }));
4791
+ }
4536
4792
  async function retrieve(subQuestion, conversationId, config) {
4537
4793
  const { documentStore, memoryStore, retrievalLimit, log } = config;
4538
4794
  const evidence = [];
@@ -4559,7 +4815,7 @@ async function retrieve(subQuestion, conversationId, config) {
4559
4815
  text: chunk.text,
4560
4816
  relevance: 0.8,
4561
4817
  // Default — store doesn't expose scores directly
4562
- metadata: chunk.metadata
4818
+ metadata: recordToKVArray(chunk.metadata)
4563
4819
  });
4564
4820
  }
4565
4821
  }
@@ -4574,7 +4830,7 @@ async function retrieve(subQuestion, conversationId, config) {
4574
4830
  documentId: chunk.documentId,
4575
4831
  text: chunk.text,
4576
4832
  relevance: 0.8,
4577
- metadata: chunk.metadata
4833
+ metadata: recordToKVArray(chunk.metadata)
4578
4834
  });
4579
4835
  }
4580
4836
  }
@@ -4602,11 +4858,11 @@ async function retrieve(subQuestion, conversationId, config) {
4602
4858
  text: summary,
4603
4859
  relevance: 0.9,
4604
4860
  // Direct lookup is high relevance
4605
- metadata: {
4606
- type: doc.type,
4607
- carrier: doc.carrier ?? "",
4608
- insuredName: doc.insuredName ?? ""
4609
- }
4861
+ metadata: [
4862
+ { key: "type", value: doc.type },
4863
+ { key: "carrier", value: doc.carrier ?? "" },
4864
+ { key: "insuredName", value: doc.insuredName ?? "" }
4865
+ ]
4610
4866
  });
4611
4867
  }
4612
4868
  } catch (e) {
@@ -4841,8 +5097,12 @@ function createQueryAgent(config) {
4841
5097
  async function query(input) {
4842
5098
  totalUsage = { inputTokens: 0, outputTokens: 0 };
4843
5099
  const { question, conversationId, context } = input;
5100
+ const pipelineCtx = createPipelineContext({
5101
+ id: `query-${Date.now()}`
5102
+ });
4844
5103
  onProgress?.("Classifying query...");
4845
5104
  const classification = await classify(question, conversationId);
5105
+ await pipelineCtx.save("classify", { classification });
4846
5106
  onProgress?.(`Retrieving evidence for ${classification.subQuestions.length} sub-question(s)...`);
4847
5107
  const retrieverConfig = {
4848
5108
  documentStore,
@@ -4856,9 +5116,10 @@ function createQueryAgent(config) {
4856
5116
  )
4857
5117
  );
4858
5118
  const allEvidence = retrievalResults.flatMap((r) => r.evidence);
5119
+ await pipelineCtx.save("retrieve", { classification, evidence: allEvidence });
4859
5120
  onProgress?.("Reasoning over evidence...");
4860
5121
  const reasonerConfig = { generateObject, providerOptions };
4861
- let subAnswers = await Promise.all(
5122
+ const reasonResults = await Promise.allSettled(
4862
5123
  classification.subQuestions.map(
4863
5124
  (sq, i) => limit(async () => {
4864
5125
  const { subAnswer, usage } = await reason(
@@ -4872,10 +5133,27 @@ function createQueryAgent(config) {
4872
5133
  })
4873
5134
  )
4874
5135
  );
5136
+ let subAnswers = [];
5137
+ for (let i = 0; i < reasonResults.length; i++) {
5138
+ const result = reasonResults[i];
5139
+ if (result.status === "fulfilled") {
5140
+ subAnswers.push(result.value);
5141
+ } else {
5142
+ await log?.(`Reasoner failed for sub-question "${classification.subQuestions[i].question}": ${result.reason}`);
5143
+ subAnswers.push({
5144
+ subQuestion: classification.subQuestions[i].question,
5145
+ answer: "Unable to answer this part of the question due to a processing error.",
5146
+ citations: [],
5147
+ confidence: 0,
5148
+ needsMoreContext: true
5149
+ });
5150
+ }
5151
+ }
5152
+ await pipelineCtx.save("reason", { classification, evidence: allEvidence, subAnswers });
4875
5153
  onProgress?.("Verifying answer grounding...");
4876
5154
  const verifierConfig = { generateObject, providerOptions };
4877
5155
  for (let round = 0; round < maxVerifyRounds; round++) {
4878
- const { result: verifyResult, usage } = await verify(
5156
+ const { result: verifyResult, usage } = await safeVerify(
4879
5157
  question,
4880
5158
  subAnswers,
4881
5159
  allEvidence,
@@ -4899,7 +5177,6 @@ function createQueryAgent(config) {
4899
5177
  () => retrieve(sq, conversationId, {
4900
5178
  ...retrieverConfig,
4901
5179
  retrievalLimit: retrievalLimit * 2
4902
- // Broader retrieval on retry
4903
5180
  })
4904
5181
  )
4905
5182
  )
@@ -4907,7 +5184,7 @@ function createQueryAgent(config) {
4907
5184
  for (const r of retryRetrievals) {
4908
5185
  allEvidence.push(...r.evidence);
4909
5186
  }
4910
- const retrySubAnswers = await Promise.all(
5187
+ const retrySettled = await Promise.allSettled(
4911
5188
  retryQuestions.map(
4912
5189
  (sq, i) => limit(async () => {
4913
5190
  const { subAnswer, usage: u } = await reason(
@@ -4921,6 +5198,7 @@ function createQueryAgent(config) {
4921
5198
  })
4922
5199
  )
4923
5200
  );
5201
+ const retrySubAnswers = retrySettled.filter((r) => r.status === "fulfilled").map((r) => r.value);
4924
5202
  const retryQSet = new Set(retryQuestions.map((sq) => sq.question));
4925
5203
  subAnswers = subAnswers.map((sa) => {
4926
5204
  if (retryQSet.has(sa.subQuestion)) {
@@ -4973,17 +5251,42 @@ function createQueryAgent(config) {
4973
5251
  }
4974
5252
  }
4975
5253
  const prompt = buildQueryClassifyPrompt(question, conversationContext);
4976
- const { object, usage } = await withRetry(
4977
- () => generateObject({
5254
+ const { object, usage } = await safeGenerateObject(
5255
+ generateObject,
5256
+ {
4978
5257
  prompt,
4979
5258
  schema: QueryClassifyResultSchema,
4980
5259
  maxTokens: 2048,
4981
5260
  providerOptions
4982
- })
5261
+ },
5262
+ {
5263
+ fallback: {
5264
+ intent: "general_knowledge",
5265
+ subQuestions: [
5266
+ {
5267
+ question,
5268
+ intent: "general_knowledge"
5269
+ }
5270
+ ],
5271
+ requiresDocumentLookup: true,
5272
+ requiresChunkSearch: true,
5273
+ requiresConversationHistory: !!conversationId
5274
+ },
5275
+ log,
5276
+ onError: (err, attempt) => log?.(`Query classify attempt ${attempt + 1} failed: ${err}`)
5277
+ }
4983
5278
  );
4984
5279
  trackUsage(usage);
4985
5280
  return object;
4986
5281
  }
5282
+ async function safeVerify(originalQuestion, subAnswers, allEvidence, verifierConfig) {
5283
+ try {
5284
+ return await verify(originalQuestion, subAnswers, allEvidence, verifierConfig);
5285
+ } catch (error) {
5286
+ await log?.(`Verification failed, approving by default: ${error instanceof Error ? error.message : String(error)}`);
5287
+ return { result: { approved: true, issues: [] } };
5288
+ }
5289
+ }
4987
5290
  async function respond(originalQuestion, subAnswers, classification, platform) {
4988
5291
  const subAnswersJson = JSON.stringify(
4989
5292
  subAnswers.map((sa) => ({
@@ -4997,13 +5300,25 @@ function createQueryAgent(config) {
4997
5300
  2
4998
5301
  );
4999
5302
  const prompt = buildRespondPrompt(originalQuestion, subAnswersJson, platform);
5000
- const { object, usage } = await withRetry(
5001
- () => generateObject({
5303
+ const { object, usage } = await safeGenerateObject(
5304
+ generateObject,
5305
+ {
5002
5306
  prompt,
5003
5307
  schema: QueryResultSchema,
5004
5308
  maxTokens: 4096,
5005
5309
  providerOptions
5006
- })
5310
+ },
5311
+ {
5312
+ fallback: {
5313
+ answer: subAnswers.map((sa) => `**${sa.subQuestion}**
5314
+ ${sa.answer}`).join("\n\n"),
5315
+ citations: subAnswers.flatMap((sa) => sa.citations),
5316
+ intent: classification.intent,
5317
+ confidence: Math.min(...subAnswers.map((sa) => sa.confidence), 1)
5318
+ },
5319
+ log,
5320
+ onError: (err, attempt) => log?.(`Respond attempt ${attempt + 1} failed: ${err}`)
5321
+ }
5007
5322
  );
5008
5323
  trackUsage(usage);
5009
5324
  const result = object;
@@ -5168,6 +5483,7 @@ export {
5168
5483
  CommercialAutoDeclarationsSchema,
5169
5484
  CommercialPropertyDeclarationsSchema,
5170
5485
  CommunicationIntentSchema,
5486
+ ConditionKeyValueSchema,
5171
5487
  ConditionTypeSchema,
5172
5488
  ConstructionTypeSchema,
5173
5489
  ContactSchema,
@@ -5334,6 +5650,7 @@ export {
5334
5650
  chunkDocument,
5335
5651
  createApplicationPipeline,
5336
5652
  createExtractor,
5653
+ createPipelineContext,
5337
5654
  createQueryAgent,
5338
5655
  extractPageRange,
5339
5656
  fillAcroForm,
@@ -5343,6 +5660,7 @@ export {
5343
5660
  getTemplate,
5344
5661
  overlayTextOnPdf,
5345
5662
  pLimit,
5663
+ safeGenerateObject,
5346
5664
  sanitizeNulls,
5347
5665
  stripFences,
5348
5666
  withRetry