@claritylabs/cl-sdk 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -54,6 +54,7 @@ __export(index_exports, {
54
54
  CommercialAutoDeclarationsSchema: () => CommercialAutoDeclarationsSchema,
55
55
  CommercialPropertyDeclarationsSchema: () => CommercialPropertyDeclarationsSchema,
56
56
  CommunicationIntentSchema: () => CommunicationIntentSchema,
57
+ ConditionKeyValueSchema: () => ConditionKeyValueSchema,
57
58
  ConditionTypeSchema: () => ConditionTypeSchema,
58
59
  ConstructionTypeSchema: () => ConstructionTypeSchema,
59
60
  ContactSchema: () => ContactSchema,
@@ -220,6 +221,7 @@ __export(index_exports, {
220
221
  chunkDocument: () => chunkDocument,
221
222
  createApplicationPipeline: () => createApplicationPipeline,
222
223
  createExtractor: () => createExtractor,
224
+ createPipelineContext: () => createPipelineContext,
223
225
  createQueryAgent: () => createQueryAgent,
224
226
  extractPageRange: () => extractPageRange,
225
227
  fillAcroForm: () => fillAcroForm,
@@ -229,6 +231,7 @@ __export(index_exports, {
229
231
  getTemplate: () => getTemplate,
230
232
  overlayTextOnPdf: () => overlayTextOnPdf,
231
233
  pLimit: () => pLimit,
234
+ safeGenerateObject: () => safeGenerateObject,
232
235
  sanitizeNulls: () => sanitizeNulls,
233
236
  stripFences: () => stripFences,
234
237
  withRetry: () => withRetry
@@ -308,6 +311,69 @@ function sanitizeNulls(obj) {
308
311
  return obj;
309
312
  }
310
313
 
314
+ // src/core/safe-generate.ts
315
+ async function safeGenerateObject(generateObject, params, options) {
316
+ const maxRetries = options?.maxRetries ?? 1;
317
+ let lastError;
318
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
319
+ try {
320
+ const result = await withRetry(
321
+ () => generateObject(params),
322
+ options?.log
323
+ );
324
+ return result;
325
+ } catch (error) {
326
+ lastError = error;
327
+ options?.onError?.(error, attempt);
328
+ await options?.log?.(
329
+ `safeGenerateObject attempt ${attempt + 1}/${maxRetries + 1} failed: ${error instanceof Error ? error.message : String(error)}`
330
+ );
331
+ if (attempt < maxRetries) {
332
+ await new Promise((resolve) => setTimeout(resolve, 1e3));
333
+ }
334
+ }
335
+ }
336
+ if (options?.fallback !== void 0) {
337
+ await options?.log?.(
338
+ `safeGenerateObject: all retries exhausted, returning fallback`
339
+ );
340
+ return { object: options.fallback };
341
+ }
342
+ throw lastError;
343
+ }
344
+
345
+ // src/core/pipeline.ts
346
+ function createPipelineContext(opts) {
347
+ let latest = opts.resumeFrom;
348
+ const completedPhases = /* @__PURE__ */ new Set();
349
+ if (opts.resumeFrom) {
350
+ completedPhases.add(opts.resumeFrom.phase);
351
+ }
352
+ return {
353
+ id: opts.id,
354
+ async save(phase, state) {
355
+ const checkpoint = {
356
+ phase,
357
+ state,
358
+ timestamp: Date.now()
359
+ };
360
+ latest = checkpoint;
361
+ completedPhases.add(phase);
362
+ await opts.onSave?.(checkpoint);
363
+ },
364
+ getCheckpoint() {
365
+ return latest;
366
+ },
367
+ isPhaseComplete(phase) {
368
+ return completedPhases.has(phase);
369
+ },
370
+ clear() {
371
+ latest = void 0;
372
+ completedPhases.clear();
373
+ }
374
+ };
375
+ }
376
+
311
377
  // src/schemas/enums.ts
312
378
  var import_zod = require("zod");
313
379
  var PolicyTypeSchema = import_zod.z.enum([
@@ -708,11 +774,15 @@ var ExclusionSchema = import_zod5.z.object({
708
774
 
709
775
  // src/schemas/condition.ts
710
776
  var import_zod6 = require("zod");
777
+ var ConditionKeyValueSchema = import_zod6.z.object({
778
+ key: import_zod6.z.string(),
779
+ value: import_zod6.z.string()
780
+ });
711
781
  var PolicyConditionSchema = import_zod6.z.object({
712
782
  name: import_zod6.z.string(),
713
783
  conditionType: ConditionTypeSchema,
714
784
  content: import_zod6.z.string(),
715
- keyValues: import_zod6.z.record(import_zod6.z.string(), import_zod6.z.string()).optional(),
785
+ keyValues: import_zod6.z.array(ConditionKeyValueSchema).optional(),
716
786
  pageNumber: import_zod6.z.number().optional()
717
787
  });
718
788
 
@@ -1881,21 +1951,33 @@ async function formatDocumentContent(doc, generateText, options) {
1881
1951
  for (let i = 0; i < entries.length; i += MAX_ENTRIES_PER_BATCH) {
1882
1952
  batches.push(entries.slice(i, i + MAX_ENTRIES_PER_BATCH));
1883
1953
  }
1884
- for (const batch of batches) {
1885
- const prompt = buildFormatPrompt(batch.map((e) => ({ id: e.id, text: e.text })));
1886
- const result = await withRetry(
1887
- () => generateText({
1888
- prompt,
1889
- maxTokens: 16384,
1890
- providerOptions: options?.providerOptions
1891
- })
1892
- );
1893
- if (result.usage) {
1894
- totalUsage.inputTokens += result.usage.inputTokens;
1895
- totalUsage.outputTokens += result.usage.outputTokens;
1954
+ for (let batchIdx = 0; batchIdx < batches.length; batchIdx++) {
1955
+ const batch = batches[batchIdx];
1956
+ try {
1957
+ const prompt = buildFormatPrompt(batch.map((e) => ({ id: e.id, text: e.text })));
1958
+ const result = await withRetry(
1959
+ () => generateText({
1960
+ prompt,
1961
+ maxTokens: 16384,
1962
+ providerOptions: options?.providerOptions
1963
+ })
1964
+ );
1965
+ if (result.usage) {
1966
+ totalUsage.inputTokens += result.usage.inputTokens;
1967
+ totalUsage.outputTokens += result.usage.outputTokens;
1968
+ }
1969
+ const formatted = parseFormatResponse(result.text);
1970
+ if (formatted.size < batch.length) {
1971
+ await options?.log?.(
1972
+ `Format batch ${batchIdx + 1}/${batches.length}: model returned ${formatted.size}/${batch.length} entries \u2014 unformatted entries will keep original content`
1973
+ );
1974
+ }
1975
+ applyFormattedContent(doc, batch, formatted);
1976
+ } catch (error) {
1977
+ await options?.log?.(
1978
+ `Format batch ${batchIdx + 1}/${batches.length} failed, keeping original content: ${error instanceof Error ? error.message : String(error)}`
1979
+ );
1896
1980
  }
1897
- const formatted = parseFormatResponse(result.text);
1898
- applyFormattedContent(doc, batch, formatted);
1899
1981
  }
1900
1982
  return { document: doc, usage: totalUsage };
1901
1983
  }
@@ -2736,9 +2818,13 @@ var ExtractionTaskSchema = import_zod18.z.object({
2736
2818
  endPage: import_zod18.z.number(),
2737
2819
  description: import_zod18.z.string()
2738
2820
  });
2821
+ var PageMapEntrySchema = import_zod18.z.object({
2822
+ section: import_zod18.z.string(),
2823
+ pages: import_zod18.z.string()
2824
+ });
2739
2825
  var ExtractionPlanSchema = import_zod18.z.object({
2740
2826
  tasks: import_zod18.z.array(ExtractionTaskSchema),
2741
- pageMap: import_zod18.z.record(import_zod18.z.string(), import_zod18.z.string()).optional()
2827
+ pageMap: import_zod18.z.array(PageMapEntrySchema).optional()
2742
2828
  });
2743
2829
  function buildPlanPrompt(templateHints) {
2744
2830
  return `You are planning the extraction of an insurance document. You have already classified this document. Now scan the full document and create a page map + extraction plan.
@@ -2767,7 +2853,10 @@ Return JSON:
2767
2853
  { "extractorName": "carrier_info", "startPage": 1, "endPage": 2, "description": "Extract carrier details from declarations page" },
2768
2854
  ...
2769
2855
  ],
2770
- "pageMap": { "declarations": "pages 1-3", "endorsements": "pages 15-22", ... }
2856
+ "pageMap": [
2857
+ { "section": "declarations", "pages": "pages 1-3" },
2858
+ { "section": "endorsements", "pages": "pages 15-22" }
2859
+ ]
2771
2860
  }
2772
2861
 
2773
2862
  Create tasks that cover the entire document. Prefer specific extractors over generic "sections" where possible. Keep page ranges tight \u2014 only include pages relevant to each extractor.
@@ -3260,7 +3349,8 @@ function createExtractor(config) {
3260
3349
  onTokenUsage,
3261
3350
  onProgress,
3262
3351
  log,
3263
- providerOptions
3352
+ providerOptions,
3353
+ onCheckpointSave
3264
3354
  } = config;
3265
3355
  const limit = pLimit(concurrency);
3266
3356
  let totalUsage = { inputTokens: 0, outputTokens: 0 };
@@ -3271,100 +3361,106 @@ function createExtractor(config) {
3271
3361
  onTokenUsage?.(usage);
3272
3362
  }
3273
3363
  }
3274
- async function extract(pdfBase64, documentId) {
3364
+ async function extract(pdfBase64, documentId, options) {
3275
3365
  const id = documentId ?? `doc-${Date.now()}`;
3276
3366
  const memory = /* @__PURE__ */ new Map();
3277
3367
  totalUsage = { inputTokens: 0, outputTokens: 0 };
3278
- onProgress?.("Classifying document...");
3279
- const pageCount = await getPdfPageCount(pdfBase64);
3280
- const classifyResult = await withRetry(
3281
- () => generateObject({
3282
- prompt: buildClassifyPrompt(),
3283
- schema: ClassifyResultSchema,
3284
- maxTokens: 512,
3285
- providerOptions
3286
- })
3287
- );
3288
- trackUsage(classifyResult.usage);
3289
- memory.set("classify", classifyResult.object);
3290
- const { documentType, policyTypes } = classifyResult.object;
3291
- const primaryType = policyTypes[0] ?? "other";
3292
- const template = getTemplate(primaryType);
3293
- onProgress?.(`Planning extraction for ${primaryType} ${documentType}...`);
3294
- const templateHints = [
3295
- `Document type: ${primaryType} ${documentType}`,
3296
- `Expected sections: ${template.expectedSections.join(", ")}`,
3297
- `Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
3298
- `Total pages: ${pageCount}`
3299
- ].join("\n");
3300
- const planResult = await withRetry(
3301
- () => generateObject({
3302
- prompt: buildPlanPrompt(templateHints),
3303
- schema: ExtractionPlanSchema,
3304
- maxTokens: 2048,
3305
- providerOptions
3306
- })
3307
- );
3308
- trackUsage(planResult.usage);
3309
- const tasks = planResult.object.tasks;
3310
- onProgress?.(`Dispatching ${tasks.length} extractors...`);
3311
- const extractorResults = await Promise.all(
3312
- tasks.map(
3313
- (task) => limit(async () => {
3314
- const ext = getExtractor(task.extractorName);
3315
- if (!ext) {
3316
- await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
3317
- return null;
3318
- }
3319
- onProgress?.(`Extracting ${task.extractorName} (pages ${task.startPage}-${task.endPage})...`);
3320
- try {
3321
- const result = await runExtractor({
3322
- name: task.extractorName,
3323
- prompt: ext.buildPrompt(),
3324
- schema: ext.schema,
3325
- pdfBase64,
3326
- startPage: task.startPage,
3327
- endPage: task.endPage,
3328
- generateObject,
3329
- convertPdfToImages,
3330
- maxTokens: ext.maxTokens ?? 4096,
3331
- providerOptions
3332
- });
3333
- trackUsage(result.usage);
3334
- return result;
3335
- } catch (error) {
3336
- await log?.(`Extractor ${task.extractorName} failed: ${error}`);
3337
- return null;
3338
- }
3339
- })
3340
- )
3341
- );
3342
- for (const result of extractorResults) {
3343
- if (result) {
3344
- memory.set(result.name, result.data);
3368
+ const pipelineCtx = createPipelineContext({
3369
+ id,
3370
+ onSave: onCheckpointSave,
3371
+ resumeFrom: options?.resumeFrom
3372
+ });
3373
+ const resumed = pipelineCtx.getCheckpoint()?.state;
3374
+ if (resumed?.memory) {
3375
+ for (const [k, v] of Object.entries(resumed.memory)) {
3376
+ memory.set(k, v);
3345
3377
  }
3346
3378
  }
3347
- for (let round = 0; round < maxReviewRounds; round++) {
3348
- const extractedKeys = [...memory.keys()].filter((k) => k !== "classify");
3349
- const reviewResult = await withRetry(
3350
- () => generateObject({
3351
- prompt: buildReviewPrompt(template.required, extractedKeys),
3352
- schema: ReviewResultSchema,
3353
- maxTokens: 1024,
3379
+ let classifyResult;
3380
+ if (resumed?.classifyResult && pipelineCtx.isPhaseComplete("classify")) {
3381
+ classifyResult = resumed.classifyResult;
3382
+ onProgress?.("Resuming from checkpoint (classify complete)...");
3383
+ } else {
3384
+ onProgress?.("Classifying document...");
3385
+ const pageCount2 = await getPdfPageCount(pdfBase64);
3386
+ const classifyResponse = await safeGenerateObject(
3387
+ generateObject,
3388
+ {
3389
+ prompt: buildClassifyPrompt(),
3390
+ schema: ClassifyResultSchema,
3391
+ maxTokens: 512,
3354
3392
  providerOptions
3355
- })
3393
+ },
3394
+ {
3395
+ fallback: { documentType: "policy", policyTypes: ["other"], confidence: 0 },
3396
+ log,
3397
+ onError: (err, attempt) => log?.(`Classify attempt ${attempt + 1} failed: ${err}`)
3398
+ }
3356
3399
  );
3357
- trackUsage(reviewResult.usage);
3358
- if (reviewResult.object.complete || reviewResult.object.additionalTasks.length === 0) {
3359
- onProgress?.("Extraction complete.");
3360
- break;
3361
- }
3362
- onProgress?.(`Review round ${round + 1}: dispatching ${reviewResult.object.additionalTasks.length} follow-up extractors...`);
3363
- const followUpResults = await Promise.all(
3364
- reviewResult.object.additionalTasks.map(
3400
+ trackUsage(classifyResponse.usage);
3401
+ classifyResult = classifyResponse.object;
3402
+ memory.set("classify", classifyResult);
3403
+ await pipelineCtx.save("classify", {
3404
+ id,
3405
+ pageCount: pageCount2,
3406
+ classifyResult,
3407
+ memory: Object.fromEntries(memory)
3408
+ });
3409
+ }
3410
+ const { documentType, policyTypes } = classifyResult;
3411
+ const primaryType = policyTypes[0] ?? "other";
3412
+ const template = getTemplate(primaryType);
3413
+ const pageCount = resumed?.pageCount ?? await getPdfPageCount(pdfBase64);
3414
+ let plan;
3415
+ if (resumed?.plan && pipelineCtx.isPhaseComplete("plan")) {
3416
+ plan = resumed.plan;
3417
+ onProgress?.("Resuming from checkpoint (plan complete)...");
3418
+ } else {
3419
+ onProgress?.(`Planning extraction for ${primaryType} ${documentType}...`);
3420
+ const templateHints = [
3421
+ `Document type: ${primaryType} ${documentType}`,
3422
+ `Expected sections: ${template.expectedSections.join(", ")}`,
3423
+ `Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
3424
+ `Total pages: ${pageCount}`
3425
+ ].join("\n");
3426
+ const planResponse = await safeGenerateObject(
3427
+ generateObject,
3428
+ {
3429
+ prompt: buildPlanPrompt(templateHints),
3430
+ schema: ExtractionPlanSchema,
3431
+ maxTokens: 2048,
3432
+ providerOptions
3433
+ },
3434
+ {
3435
+ fallback: {
3436
+ tasks: [{ extractorName: "sections", startPage: 1, endPage: pageCount, description: "Full document fallback extraction" }]
3437
+ },
3438
+ log,
3439
+ onError: (err, attempt) => log?.(`Plan attempt ${attempt + 1} failed: ${err}`)
3440
+ }
3441
+ );
3442
+ trackUsage(planResponse.usage);
3443
+ plan = planResponse.object;
3444
+ await pipelineCtx.save("plan", {
3445
+ id,
3446
+ pageCount,
3447
+ classifyResult,
3448
+ plan,
3449
+ memory: Object.fromEntries(memory)
3450
+ });
3451
+ }
3452
+ if (!pipelineCtx.isPhaseComplete("extract")) {
3453
+ const tasks = plan.tasks;
3454
+ onProgress?.(`Dispatching ${tasks.length} extractors...`);
3455
+ const extractorResults = await Promise.all(
3456
+ tasks.map(
3365
3457
  (task) => limit(async () => {
3366
3458
  const ext = getExtractor(task.extractorName);
3367
- if (!ext) return null;
3459
+ if (!ext) {
3460
+ await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
3461
+ return null;
3462
+ }
3463
+ onProgress?.(`Extracting ${task.extractorName} (pages ${task.startPage}-${task.endPage})...`);
3368
3464
  try {
3369
3465
  const result = await runExtractor({
3370
3466
  name: task.extractorName,
@@ -3381,28 +3477,114 @@ function createExtractor(config) {
3381
3477
  trackUsage(result.usage);
3382
3478
  return result;
3383
3479
  } catch (error) {
3384
- await log?.(`Follow-up extractor ${task.extractorName} failed: ${error}`);
3480
+ await log?.(`Extractor ${task.extractorName} failed: ${error}`);
3385
3481
  return null;
3386
3482
  }
3387
3483
  })
3388
3484
  )
3389
3485
  );
3390
- for (const result of followUpResults) {
3486
+ for (const result of extractorResults) {
3391
3487
  if (result) {
3392
3488
  memory.set(result.name, result.data);
3393
3489
  }
3394
3490
  }
3491
+ await pipelineCtx.save("extract", {
3492
+ id,
3493
+ pageCount,
3494
+ classifyResult,
3495
+ plan,
3496
+ memory: Object.fromEntries(memory)
3497
+ });
3498
+ }
3499
+ if (!pipelineCtx.isPhaseComplete("review")) {
3500
+ for (let round = 0; round < maxReviewRounds; round++) {
3501
+ const extractedKeys = [...memory.keys()].filter((k) => k !== "classify");
3502
+ const reviewResponse = await safeGenerateObject(
3503
+ generateObject,
3504
+ {
3505
+ prompt: buildReviewPrompt(template.required, extractedKeys),
3506
+ schema: ReviewResultSchema,
3507
+ maxTokens: 1024,
3508
+ providerOptions
3509
+ },
3510
+ {
3511
+ fallback: { complete: true, missingFields: [], additionalTasks: [] },
3512
+ log,
3513
+ onError: (err, attempt) => log?.(`Review round ${round + 1} attempt ${attempt + 1} failed: ${err}`)
3514
+ }
3515
+ );
3516
+ trackUsage(reviewResponse.usage);
3517
+ if (reviewResponse.object.complete || reviewResponse.object.additionalTasks.length === 0) {
3518
+ onProgress?.("Extraction complete.");
3519
+ break;
3520
+ }
3521
+ onProgress?.(`Review round ${round + 1}: dispatching ${reviewResponse.object.additionalTasks.length} follow-up extractors...`);
3522
+ const followUpResults = await Promise.all(
3523
+ reviewResponse.object.additionalTasks.map(
3524
+ (task) => limit(async () => {
3525
+ const ext = getExtractor(task.extractorName);
3526
+ if (!ext) return null;
3527
+ try {
3528
+ const result = await runExtractor({
3529
+ name: task.extractorName,
3530
+ prompt: ext.buildPrompt(),
3531
+ schema: ext.schema,
3532
+ pdfBase64,
3533
+ startPage: task.startPage,
3534
+ endPage: task.endPage,
3535
+ generateObject,
3536
+ convertPdfToImages,
3537
+ maxTokens: ext.maxTokens ?? 4096,
3538
+ providerOptions
3539
+ });
3540
+ trackUsage(result.usage);
3541
+ return result;
3542
+ } catch (error) {
3543
+ await log?.(`Follow-up extractor ${task.extractorName} failed: ${error}`);
3544
+ return null;
3545
+ }
3546
+ })
3547
+ )
3548
+ );
3549
+ for (const result of followUpResults) {
3550
+ if (result) {
3551
+ memory.set(result.name, result.data);
3552
+ }
3553
+ }
3554
+ }
3555
+ await pipelineCtx.save("review", {
3556
+ id,
3557
+ pageCount,
3558
+ classifyResult,
3559
+ plan,
3560
+ memory: Object.fromEntries(memory)
3561
+ });
3395
3562
  }
3396
3563
  onProgress?.("Assembling document...");
3397
3564
  const document = assembleDocument(id, documentType, memory);
3565
+ await pipelineCtx.save("assemble", {
3566
+ id,
3567
+ pageCount,
3568
+ classifyResult,
3569
+ plan,
3570
+ memory: Object.fromEntries(memory),
3571
+ document
3572
+ });
3398
3573
  onProgress?.("Formatting extracted content...");
3399
3574
  const formatResult = await formatDocumentContent(document, generateText, {
3400
3575
  providerOptions,
3401
- onProgress
3576
+ onProgress,
3577
+ log
3402
3578
  });
3403
3579
  trackUsage(formatResult.usage);
3404
3580
  const chunks = chunkDocument(formatResult.document);
3405
- return { document: formatResult.document, chunks, tokenUsage: totalUsage };
3581
+ const finalCheckpoint = pipelineCtx.getCheckpoint();
3582
+ return {
3583
+ document: formatResult.document,
3584
+ chunks,
3585
+ tokenUsage: totalUsage,
3586
+ checkpoint: finalCheckpoint
3587
+ };
3406
3588
  }
3407
3589
  return { extract };
3408
3590
  }
@@ -4265,7 +4447,6 @@ function createApplicationPipeline(config) {
4265
4447
  let state = {
4266
4448
  id,
4267
4449
  pdfBase64: void 0,
4268
- // Don't persist the full PDF in state
4269
4450
  title: void 0,
4270
4451
  applicationType: null,
4271
4452
  fields: [],
@@ -4276,13 +4457,20 @@ function createApplicationPipeline(config) {
4276
4457
  updatedAt: now
4277
4458
  };
4278
4459
  onProgress?.("Classifying document...");
4279
- const { result: classifyResult, usage: classifyUsage } = await classifyApplication(
4280
- pdfBase64.slice(0, 2e3),
4281
- // Send truncated content for classification
4282
- generateObject,
4283
- providerOptions
4284
- );
4285
- trackUsage(classifyUsage);
4460
+ await applicationStore?.save(state);
4461
+ let classifyResult;
4462
+ try {
4463
+ const { result, usage: classifyUsage } = await classifyApplication(
4464
+ pdfBase64.slice(0, 2e3),
4465
+ generateObject,
4466
+ providerOptions
4467
+ );
4468
+ trackUsage(classifyUsage);
4469
+ classifyResult = result;
4470
+ } catch (error) {
4471
+ await log?.(`Classification failed, treating as non-application: ${error instanceof Error ? error.message : String(error)}`);
4472
+ classifyResult = { isApplication: false, confidence: 0, applicationType: null };
4473
+ }
4286
4474
  if (!classifyResult.isApplication) {
4287
4475
  state.status = "complete";
4288
4476
  state.updatedAt = Date.now();
@@ -4292,13 +4480,28 @@ function createApplicationPipeline(config) {
4292
4480
  state.applicationType = classifyResult.applicationType;
4293
4481
  state.status = "extracting";
4294
4482
  state.updatedAt = Date.now();
4483
+ await applicationStore?.save(state);
4295
4484
  onProgress?.("Extracting form fields...");
4296
- const { fields, usage: extractUsage } = await extractFields(
4297
- pdfBase64,
4298
- generateObject,
4299
- providerOptions
4300
- );
4301
- trackUsage(extractUsage);
4485
+ let fields;
4486
+ try {
4487
+ const { fields: extractedFields, usage: extractUsage } = await extractFields(
4488
+ pdfBase64,
4489
+ generateObject,
4490
+ providerOptions
4491
+ );
4492
+ trackUsage(extractUsage);
4493
+ fields = extractedFields;
4494
+ } catch (error) {
4495
+ await log?.(`Field extraction failed: ${error instanceof Error ? error.message : String(error)}`);
4496
+ fields = [];
4497
+ }
4498
+ if (fields.length === 0) {
4499
+ await log?.("No fields extracted, completing pipeline with empty result");
4500
+ state.status = "complete";
4501
+ state.updatedAt = Date.now();
4502
+ await applicationStore?.save(state);
4503
+ return { state, tokenUsage: totalUsage };
4504
+ }
4302
4505
  state.fields = fields;
4303
4506
  state.title = classifyResult.applicationType ?? void 0;
4304
4507
  state.status = "auto_filling";
@@ -4330,20 +4533,24 @@ function createApplicationPipeline(config) {
4330
4533
  limit(async () => {
4331
4534
  const unfilledFields2 = state.fields.filter((f) => !f.value);
4332
4535
  if (unfilledFields2.length === 0) return;
4333
- const { result: autoFillResult, usage: afUsage } = await autoFillFromContext(
4334
- unfilledFields2,
4335
- orgContext,
4336
- generateObject,
4337
- providerOptions
4338
- );
4339
- trackUsage(afUsage);
4340
- for (const match of autoFillResult.matches) {
4341
- const field = state.fields.find((f) => f.id === match.fieldId);
4342
- if (field && !field.value) {
4343
- field.value = match.value;
4344
- field.source = `auto-fill: ${match.contextKey}`;
4345
- field.confidence = match.confidence;
4536
+ try {
4537
+ const { result: autoFillResult, usage: afUsage } = await autoFillFromContext(
4538
+ unfilledFields2,
4539
+ orgContext,
4540
+ generateObject,
4541
+ providerOptions
4542
+ );
4543
+ trackUsage(afUsage);
4544
+ for (const match of autoFillResult.matches) {
4545
+ const field = state.fields.find((f) => f.id === match.fieldId);
4546
+ if (field && !field.value) {
4547
+ field.value = match.value;
4548
+ field.source = `auto-fill: ${match.contextKey}`;
4549
+ field.confidence = match.confidence;
4550
+ }
4346
4551
  }
4552
+ } catch (e) {
4553
+ await log?.(`Auto-fill from context failed: ${e instanceof Error ? e.message : String(e)}`);
4347
4554
  }
4348
4555
  })
4349
4556
  );
@@ -4376,13 +4583,18 @@ function createApplicationPipeline(config) {
4376
4583
  if (unfilledFields.length > 0) {
4377
4584
  onProgress?.(`Batching ${unfilledFields.length} remaining questions...`);
4378
4585
  state.status = "batching";
4379
- const { result: batchResult, usage: batchUsage } = await batchQuestions(
4380
- unfilledFields,
4381
- generateObject,
4382
- providerOptions
4383
- );
4384
- trackUsage(batchUsage);
4385
- state.batches = batchResult.batches;
4586
+ try {
4587
+ const { result: batchResult, usage: batchUsage } = await batchQuestions(
4588
+ unfilledFields,
4589
+ generateObject,
4590
+ providerOptions
4591
+ );
4592
+ trackUsage(batchUsage);
4593
+ state.batches = batchResult.batches;
4594
+ } catch (error) {
4595
+ await log?.(`Batching failed, using single-batch fallback: ${error instanceof Error ? error.message : String(error)}`);
4596
+ state.batches = [unfilledFields.map((f) => f.id)];
4597
+ }
4386
4598
  state.currentBatchIndex = 0;
4387
4599
  state.status = "collecting";
4388
4600
  } else {
@@ -4409,32 +4621,49 @@ function createApplicationPipeline(config) {
4409
4621
  (f) => currentBatchFieldIds.includes(f.id)
4410
4622
  );
4411
4623
  onProgress?.("Classifying reply...");
4412
- const { intent, usage: intentUsage } = await classifyReplyIntent(
4413
- currentBatchFields,
4414
- replyText,
4415
- generateObject,
4416
- providerOptions
4417
- );
4418
- trackUsage(intentUsage);
4419
- let fieldsFilled = 0;
4420
- let responseText;
4421
- if (intent.hasAnswers) {
4422
- onProgress?.("Parsing answers...");
4423
- const { result: parseResult, usage: parseUsage } = await parseAnswers(
4624
+ let intent;
4625
+ try {
4626
+ const { intent: classifiedIntent, usage: intentUsage } = await classifyReplyIntent(
4424
4627
  currentBatchFields,
4425
4628
  replyText,
4426
4629
  generateObject,
4427
4630
  providerOptions
4428
4631
  );
4429
- trackUsage(parseUsage);
4430
- for (const answer of parseResult.answers) {
4431
- const field = state.fields.find((f) => f.id === answer.fieldId);
4432
- if (field) {
4433
- field.value = answer.value;
4434
- field.source = "user";
4435
- field.confidence = "confirmed";
4436
- fieldsFilled++;
4632
+ trackUsage(intentUsage);
4633
+ intent = classifiedIntent;
4634
+ } catch (error) {
4635
+ await log?.(`Reply intent classification failed, defaulting to answers_only: ${error instanceof Error ? error.message : String(error)}`);
4636
+ intent = {
4637
+ primaryIntent: "answers_only",
4638
+ hasAnswers: true,
4639
+ questionText: void 0,
4640
+ questionFieldIds: void 0,
4641
+ lookupRequests: void 0
4642
+ };
4643
+ }
4644
+ let fieldsFilled = 0;
4645
+ let responseText;
4646
+ if (intent.hasAnswers) {
4647
+ onProgress?.("Parsing answers...");
4648
+ try {
4649
+ const { result: parseResult, usage: parseUsage } = await parseAnswers(
4650
+ currentBatchFields,
4651
+ replyText,
4652
+ generateObject,
4653
+ providerOptions
4654
+ );
4655
+ trackUsage(parseUsage);
4656
+ for (const answer of parseResult.answers) {
4657
+ const field = state.fields.find((f) => f.id === answer.fieldId);
4658
+ if (field) {
4659
+ field.value = answer.value;
4660
+ field.source = "user";
4661
+ field.confidence = "confirmed";
4662
+ fieldsFilled++;
4663
+ }
4437
4664
  }
4665
+ } catch (error) {
4666
+ await log?.(`Answer parsing failed: ${error instanceof Error ? error.message : String(error)}`);
4438
4667
  }
4439
4668
  }
4440
4669
  if (intent.lookupRequests?.length) {
@@ -4455,36 +4684,45 @@ function createApplicationPipeline(config) {
4455
4684
  const targetFields = state.fields.filter(
4456
4685
  (f) => intent.lookupRequests.some((lr) => lr.targetFieldIds.includes(f.id))
4457
4686
  );
4458
- const { result: lookupResult, usage: lookupUsage } = await fillFromLookup(
4459
- intent.lookupRequests,
4460
- targetFields,
4461
- availableData,
4462
- generateObject,
4463
- providerOptions
4464
- );
4465
- trackUsage(lookupUsage);
4466
- for (const fill of lookupResult.fills) {
4467
- const field = state.fields.find((f) => f.id === fill.fieldId);
4468
- if (field) {
4469
- field.value = fill.value;
4470
- field.source = `lookup: ${fill.source}`;
4471
- field.confidence = "high";
4472
- fieldsFilled++;
4687
+ try {
4688
+ const { result: lookupResult, usage: lookupUsage } = await fillFromLookup(
4689
+ intent.lookupRequests,
4690
+ targetFields,
4691
+ availableData,
4692
+ generateObject,
4693
+ providerOptions
4694
+ );
4695
+ trackUsage(lookupUsage);
4696
+ for (const fill of lookupResult.fills) {
4697
+ const field = state.fields.find((f) => f.id === fill.fieldId);
4698
+ if (field) {
4699
+ field.value = fill.value;
4700
+ field.source = `lookup: ${fill.source}`;
4701
+ field.confidence = "high";
4702
+ fieldsFilled++;
4703
+ }
4473
4704
  }
4705
+ } catch (error) {
4706
+ await log?.(`Lookup fill failed: ${error instanceof Error ? error.message : String(error)}`);
4474
4707
  }
4475
4708
  }
4476
4709
  }
4477
4710
  if (intent.primaryIntent === "question" || intent.primaryIntent === "mixed") {
4478
4711
  if (intent.questionText) {
4479
- const { text, usage } = await generateText({
4480
- prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
4712
+ try {
4713
+ const { text, usage } = await generateText({
4714
+ prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
4481
4715
 
4482
4716
  Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with the answer when you're ready and I'll fill it in."`,
4483
- maxTokens: 512,
4484
- providerOptions
4485
- });
4486
- trackUsage(usage);
4487
- responseText = text;
4717
+ maxTokens: 512,
4718
+ providerOptions
4719
+ });
4720
+ trackUsage(usage);
4721
+ responseText = text;
4722
+ } catch (error) {
4723
+ await log?.(`Question response generation failed: ${error instanceof Error ? error.message : String(error)}`);
4724
+ responseText = `I wasn't able to generate an explanation for your question. Could you rephrase it, or just provide the answer directly?`;
4725
+ }
4488
4726
  }
4489
4727
  }
4490
4728
  const currentBatchComplete = currentBatchFieldIds.every(
@@ -4498,26 +4736,30 @@ Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with
4498
4736
  (f) => nextBatchFieldIds.includes(f.id)
4499
4737
  );
4500
4738
  const filledCount = state.fields.filter((f) => f.value).length;
4501
- const { text: emailText, usage: emailUsage } = await generateBatchEmail(
4502
- nextBatchFields,
4503
- state.currentBatchIndex,
4504
- state.batches.length,
4505
- {
4506
- appTitle: state.title,
4507
- totalFieldCount: state.fields.length,
4508
- filledFieldCount: filledCount,
4509
- companyName: context?.companyName
4510
- },
4511
- generateText,
4512
- providerOptions
4513
- );
4514
- trackUsage(emailUsage);
4515
- if (!responseText) {
4516
- responseText = emailText;
4517
- } else {
4518
- responseText += `
4739
+ try {
4740
+ const { text: emailText, usage: emailUsage } = await generateBatchEmail(
4741
+ nextBatchFields,
4742
+ state.currentBatchIndex,
4743
+ state.batches.length,
4744
+ {
4745
+ appTitle: state.title,
4746
+ totalFieldCount: state.fields.length,
4747
+ filledFieldCount: filledCount,
4748
+ companyName: context?.companyName
4749
+ },
4750
+ generateText,
4751
+ providerOptions
4752
+ );
4753
+ trackUsage(emailUsage);
4754
+ if (!responseText) {
4755
+ responseText = emailText;
4756
+ } else {
4757
+ responseText += `
4519
4758
 
4520
4759
  ${emailText}`;
4760
+ }
4761
+ } catch (error) {
4762
+ await log?.(`Batch email generation failed: ${error instanceof Error ? error.message : String(error)}`);
4521
4763
  }
4522
4764
  } else {
4523
4765
  state.status = "confirming";
@@ -4726,7 +4968,7 @@ var EvidenceItemSchema = import_zod32.z.object({
4726
4968
  turnId: import_zod32.z.string().optional(),
4727
4969
  text: import_zod32.z.string().describe("Text excerpt from the source"),
4728
4970
  relevance: import_zod32.z.number().min(0).max(1),
4729
- metadata: import_zod32.z.record(import_zod32.z.string(), import_zod32.z.string()).optional()
4971
+ metadata: import_zod32.z.array(import_zod32.z.object({ key: import_zod32.z.string(), value: import_zod32.z.string() })).optional()
4730
4972
  });
4731
4973
  var RetrievalResultSchema = import_zod32.z.object({
4732
4974
  subQuestion: import_zod32.z.string(),
@@ -4762,6 +5004,9 @@ var QueryResultSchema = import_zod32.z.object({
4762
5004
  });
4763
5005
 
4764
5006
  // src/query/retriever.ts
5007
+ function recordToKVArray(record) {
5008
+ return Object.entries(record).map(([key, value]) => ({ key, value }));
5009
+ }
4765
5010
  async function retrieve(subQuestion, conversationId, config) {
4766
5011
  const { documentStore, memoryStore, retrievalLimit, log } = config;
4767
5012
  const evidence = [];
@@ -4788,7 +5033,7 @@ async function retrieve(subQuestion, conversationId, config) {
4788
5033
  text: chunk.text,
4789
5034
  relevance: 0.8,
4790
5035
  // Default — store doesn't expose scores directly
4791
- metadata: chunk.metadata
5036
+ metadata: recordToKVArray(chunk.metadata)
4792
5037
  });
4793
5038
  }
4794
5039
  }
@@ -4803,7 +5048,7 @@ async function retrieve(subQuestion, conversationId, config) {
4803
5048
  documentId: chunk.documentId,
4804
5049
  text: chunk.text,
4805
5050
  relevance: 0.8,
4806
- metadata: chunk.metadata
5051
+ metadata: recordToKVArray(chunk.metadata)
4807
5052
  });
4808
5053
  }
4809
5054
  }
@@ -4831,11 +5076,11 @@ async function retrieve(subQuestion, conversationId, config) {
4831
5076
  text: summary,
4832
5077
  relevance: 0.9,
4833
5078
  // Direct lookup is high relevance
4834
- metadata: {
4835
- type: doc.type,
4836
- carrier: doc.carrier ?? "",
4837
- insuredName: doc.insuredName ?? ""
4838
- }
5079
+ metadata: [
5080
+ { key: "type", value: doc.type },
5081
+ { key: "carrier", value: doc.carrier ?? "" },
5082
+ { key: "insuredName", value: doc.insuredName ?? "" }
5083
+ ]
4839
5084
  });
4840
5085
  }
4841
5086
  } catch (e) {
@@ -5070,8 +5315,12 @@ function createQueryAgent(config) {
5070
5315
  async function query(input) {
5071
5316
  totalUsage = { inputTokens: 0, outputTokens: 0 };
5072
5317
  const { question, conversationId, context } = input;
5318
+ const pipelineCtx = createPipelineContext({
5319
+ id: `query-${Date.now()}`
5320
+ });
5073
5321
  onProgress?.("Classifying query...");
5074
5322
  const classification = await classify(question, conversationId);
5323
+ await pipelineCtx.save("classify", { classification });
5075
5324
  onProgress?.(`Retrieving evidence for ${classification.subQuestions.length} sub-question(s)...`);
5076
5325
  const retrieverConfig = {
5077
5326
  documentStore,
@@ -5085,9 +5334,10 @@ function createQueryAgent(config) {
5085
5334
  )
5086
5335
  );
5087
5336
  const allEvidence = retrievalResults.flatMap((r) => r.evidence);
5337
+ await pipelineCtx.save("retrieve", { classification, evidence: allEvidence });
5088
5338
  onProgress?.("Reasoning over evidence...");
5089
5339
  const reasonerConfig = { generateObject, providerOptions };
5090
- let subAnswers = await Promise.all(
5340
+ const reasonResults = await Promise.allSettled(
5091
5341
  classification.subQuestions.map(
5092
5342
  (sq, i) => limit(async () => {
5093
5343
  const { subAnswer, usage } = await reason(
@@ -5101,10 +5351,27 @@ function createQueryAgent(config) {
5101
5351
  })
5102
5352
  )
5103
5353
  );
5354
+ let subAnswers = [];
5355
+ for (let i = 0; i < reasonResults.length; i++) {
5356
+ const result = reasonResults[i];
5357
+ if (result.status === "fulfilled") {
5358
+ subAnswers.push(result.value);
5359
+ } else {
5360
+ await log?.(`Reasoner failed for sub-question "${classification.subQuestions[i].question}": ${result.reason}`);
5361
+ subAnswers.push({
5362
+ subQuestion: classification.subQuestions[i].question,
5363
+ answer: "Unable to answer this part of the question due to a processing error.",
5364
+ citations: [],
5365
+ confidence: 0,
5366
+ needsMoreContext: true
5367
+ });
5368
+ }
5369
+ }
5370
+ await pipelineCtx.save("reason", { classification, evidence: allEvidence, subAnswers });
5104
5371
  onProgress?.("Verifying answer grounding...");
5105
5372
  const verifierConfig = { generateObject, providerOptions };
5106
5373
  for (let round = 0; round < maxVerifyRounds; round++) {
5107
- const { result: verifyResult, usage } = await verify(
5374
+ const { result: verifyResult, usage } = await safeVerify(
5108
5375
  question,
5109
5376
  subAnswers,
5110
5377
  allEvidence,
@@ -5128,7 +5395,6 @@ function createQueryAgent(config) {
5128
5395
  () => retrieve(sq, conversationId, {
5129
5396
  ...retrieverConfig,
5130
5397
  retrievalLimit: retrievalLimit * 2
5131
- // Broader retrieval on retry
5132
5398
  })
5133
5399
  )
5134
5400
  )
@@ -5136,7 +5402,7 @@ function createQueryAgent(config) {
5136
5402
  for (const r of retryRetrievals) {
5137
5403
  allEvidence.push(...r.evidence);
5138
5404
  }
5139
- const retrySubAnswers = await Promise.all(
5405
+ const retrySettled = await Promise.allSettled(
5140
5406
  retryQuestions.map(
5141
5407
  (sq, i) => limit(async () => {
5142
5408
  const { subAnswer, usage: u } = await reason(
@@ -5150,6 +5416,7 @@ function createQueryAgent(config) {
5150
5416
  })
5151
5417
  )
5152
5418
  );
5419
+ const retrySubAnswers = retrySettled.filter((r) => r.status === "fulfilled").map((r) => r.value);
5153
5420
  const retryQSet = new Set(retryQuestions.map((sq) => sq.question));
5154
5421
  subAnswers = subAnswers.map((sa) => {
5155
5422
  if (retryQSet.has(sa.subQuestion)) {
@@ -5202,17 +5469,42 @@ function createQueryAgent(config) {
5202
5469
  }
5203
5470
  }
5204
5471
  const prompt = buildQueryClassifyPrompt(question, conversationContext);
5205
- const { object, usage } = await withRetry(
5206
- () => generateObject({
5472
+ const { object, usage } = await safeGenerateObject(
5473
+ generateObject,
5474
+ {
5207
5475
  prompt,
5208
5476
  schema: QueryClassifyResultSchema,
5209
5477
  maxTokens: 2048,
5210
5478
  providerOptions
5211
- })
5479
+ },
5480
+ {
5481
+ fallback: {
5482
+ intent: "general_knowledge",
5483
+ subQuestions: [
5484
+ {
5485
+ question,
5486
+ intent: "general_knowledge"
5487
+ }
5488
+ ],
5489
+ requiresDocumentLookup: true,
5490
+ requiresChunkSearch: true,
5491
+ requiresConversationHistory: !!conversationId
5492
+ },
5493
+ log,
5494
+ onError: (err, attempt) => log?.(`Query classify attempt ${attempt + 1} failed: ${err}`)
5495
+ }
5212
5496
  );
5213
5497
  trackUsage(usage);
5214
5498
  return object;
5215
5499
  }
5500
+ async function safeVerify(originalQuestion, subAnswers, allEvidence, verifierConfig) {
5501
+ try {
5502
+ return await verify(originalQuestion, subAnswers, allEvidence, verifierConfig);
5503
+ } catch (error) {
5504
+ await log?.(`Verification failed, approving by default: ${error instanceof Error ? error.message : String(error)}`);
5505
+ return { result: { approved: true, issues: [] } };
5506
+ }
5507
+ }
5216
5508
  async function respond(originalQuestion, subAnswers, classification, platform) {
5217
5509
  const subAnswersJson = JSON.stringify(
5218
5510
  subAnswers.map((sa) => ({
@@ -5226,13 +5518,25 @@ function createQueryAgent(config) {
5226
5518
  2
5227
5519
  );
5228
5520
  const prompt = buildRespondPrompt(originalQuestion, subAnswersJson, platform);
5229
- const { object, usage } = await withRetry(
5230
- () => generateObject({
5521
+ const { object, usage } = await safeGenerateObject(
5522
+ generateObject,
5523
+ {
5231
5524
  prompt,
5232
5525
  schema: QueryResultSchema,
5233
5526
  maxTokens: 4096,
5234
5527
  providerOptions
5235
- })
5528
+ },
5529
+ {
5530
+ fallback: {
5531
+ answer: subAnswers.map((sa) => `**${sa.subQuestion}**
5532
+ ${sa.answer}`).join("\n\n"),
5533
+ citations: subAnswers.flatMap((sa) => sa.citations),
5534
+ intent: classification.intent,
5535
+ confidence: Math.min(...subAnswers.map((sa) => sa.confidence), 1)
5536
+ },
5537
+ log,
5538
+ onError: (err, attempt) => log?.(`Respond attempt ${attempt + 1} failed: ${err}`)
5539
+ }
5236
5540
  );
5237
5541
  trackUsage(usage);
5238
5542
  const result = object;
@@ -5398,6 +5702,7 @@ var AGENT_TOOLS = [
5398
5702
  CommercialAutoDeclarationsSchema,
5399
5703
  CommercialPropertyDeclarationsSchema,
5400
5704
  CommunicationIntentSchema,
5705
+ ConditionKeyValueSchema,
5401
5706
  ConditionTypeSchema,
5402
5707
  ConstructionTypeSchema,
5403
5708
  ContactSchema,
@@ -5564,6 +5869,7 @@ var AGENT_TOOLS = [
5564
5869
  chunkDocument,
5565
5870
  createApplicationPipeline,
5566
5871
  createExtractor,
5872
+ createPipelineContext,
5567
5873
  createQueryAgent,
5568
5874
  extractPageRange,
5569
5875
  fillAcroForm,
@@ -5573,6 +5879,7 @@ var AGENT_TOOLS = [
5573
5879
  getTemplate,
5574
5880
  overlayTextOnPdf,
5575
5881
  pLimit,
5882
+ safeGenerateObject,
5576
5883
  sanitizeNulls,
5577
5884
  stripFences,
5578
5885
  withRetry