npm - @claritylabs/cl-sdk - Versions diffs - 0.5.0 → 0.7.0 - Mend

@claritylabs/cl-sdk 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +33 -9
package/dist/index.d.mts +382 -77
package/dist/index.d.ts +382 -77
package/dist/index.js +718 -205
package/dist/index.js.map +1 -1
package/dist/index.mjs +715 -205
package/dist/index.mjs.map +1 -1
package/dist/storage-sqlite.d.mts +52 -10
package/dist/storage-sqlite.d.ts +52 -10
package/package.json +1 -1

package/dist/index.js CHANGED Viewed

@@ -54,6 +54,7 @@ __export(index_exports, {
   CommercialAutoDeclarationsSchema: () => CommercialAutoDeclarationsSchema,
   CommercialPropertyDeclarationsSchema: () => CommercialPropertyDeclarationsSchema,
   CommunicationIntentSchema: () => CommunicationIntentSchema,
+  ConditionKeyValueSchema: () => ConditionKeyValueSchema,
   ConditionTypeSchema: () => ConditionTypeSchema,
   ConstructionTypeSchema: () => ConstructionTypeSchema,
   ContactSchema: () => ContactSchema,
@@ -220,6 +221,7 @@ __export(index_exports, {
   chunkDocument: () => chunkDocument,
   createApplicationPipeline: () => createApplicationPipeline,
   createExtractor: () => createExtractor,
+  createPipelineContext: () => createPipelineContext,
   createQueryAgent: () => createQueryAgent,
   extractPageRange: () => extractPageRange,
   fillAcroForm: () => fillAcroForm,
@@ -229,6 +231,7 @@ __export(index_exports, {
   getTemplate: () => getTemplate,
   overlayTextOnPdf: () => overlayTextOnPdf,
   pLimit: () => pLimit,
+  safeGenerateObject: () => safeGenerateObject,
   sanitizeNulls: () => sanitizeNulls,
   stripFences: () => stripFences,
   withRetry: () => withRetry
@@ -308,6 +311,69 @@ function sanitizeNulls(obj) {
   return obj;
 }
+// src/core/safe-generate.ts
+async function safeGenerateObject(generateObject, params, options) {
+  const maxRetries = options?.maxRetries ?? 1;
+  let lastError;
+  for (let attempt = 0; attempt <= maxRetries; attempt++) {
+    try {
+      const result = await withRetry(
+        () => generateObject(params),
+        options?.log
+      );
+      return result;
+    } catch (error) {
+      lastError = error;
+      options?.onError?.(error, attempt);
+      await options?.log?.(
+        `safeGenerateObject attempt ${attempt + 1}/${maxRetries + 1} failed: ${error instanceof Error ? error.message : String(error)}`
+      );
+      if (attempt < maxRetries) {
+        await new Promise((resolve) => setTimeout(resolve, 1e3));
+      }
+    }
+  }
+  if (options?.fallback !== void 0) {
+    await options?.log?.(
+      `safeGenerateObject: all retries exhausted, returning fallback`
+    );
+    return { object: options.fallback };
+  }
+  throw lastError;
+}
+// src/core/pipeline.ts
+function createPipelineContext(opts) {
+  let latest = opts.resumeFrom;
+  const completedPhases = /* @__PURE__ */ new Set();
+  if (opts.resumeFrom) {
+    completedPhases.add(opts.resumeFrom.phase);
+  }
+  return {
+    id: opts.id,
+    async save(phase, state) {
+      const checkpoint = {
+        phase,
+        state,
+        timestamp: Date.now()
+      };
+      latest = checkpoint;
+      completedPhases.add(phase);
+      await opts.onSave?.(checkpoint);
+    },
+    getCheckpoint() {
+      return latest;
+    },
+    isPhaseComplete(phase) {
+      return completedPhases.has(phase);
+    },
+    clear() {
+      latest = void 0;
+      completedPhases.clear();
+    }
+  };
+}
 // src/schemas/enums.ts
 var import_zod = require("zod");
 var PolicyTypeSchema = import_zod.z.enum([
@@ -708,11 +774,15 @@ var ExclusionSchema = import_zod5.z.object({
 // src/schemas/condition.ts
 var import_zod6 = require("zod");
+var ConditionKeyValueSchema = import_zod6.z.object({
+  key: import_zod6.z.string(),
+  value: import_zod6.z.string()
+});
 var PolicyConditionSchema = import_zod6.z.object({
   name: import_zod6.z.string(),
   conditionType: ConditionTypeSchema,
   content: import_zod6.z.string(),
-  keyValues: import_zod6.z.record(import_zod6.z.string(), import_zod6.z.string()).optional(),
+  keyValues: import_zod6.z.array(ConditionKeyValueSchema).optional(),
   pageNumber: import_zod6.z.number().optional()
 });
@@ -1700,6 +1770,218 @@ function assembleDocument(documentId, documentType, memory) {
   };
 }
+// src/prompts/coordinator/format.ts
+function buildFormatPrompt(entries) {
+  const block = entries.map((e) => `===ENTRY ${e.id}===
+${e.text}`).join("\n\n");
+  return `You are a markdown formatting specialist for insurance document content. You will receive numbered content entries extracted from insurance policies, quotes, and endorsements. Your job is to clean up the formatting so every entry renders correctly as standard markdown.
+## Primary issues to fix
+### 1. Pipe-delimited data missing table syntax
+The most common issue. Content uses pipe characters as column separators but is missing the separator row required for markdown table rendering.
+Before (broken \u2014 won't render as a table):
+COVERAGE | FORM # | LIMIT | DEDUCTIBLE
+Employee Theft | | $10,000 | $1,000
+After (valid markdown table):
+| COVERAGE | FORM # | LIMIT | DEDUCTIBLE |
+| --- | --- | --- | --- |
+| Employee Theft | | $10,000 | $1,000 |
+Rules for pipe tables:
+- Add leading and trailing pipes to every row
+- Add the separator row (| --- | --- |) after the header row
+- Every row must have the same number of pipe-separated columns as the header
+- Empty cells are fine \u2014 just keep the pipes: | | $10,000 |
+### 2. Sub-items indented within pipe tables
+Insurance schedules often have indented sub-items that belong to the previous coverage line. These break table column counts.
+Before (broken):
+COVERAGE | LIMIT | DEDUCTIBLE
+Causes Of Loss - Equipment Breakdown | PR650END
+  Described Premises Limit | | $350,804 |
+  Diagnostic Equipment | | $100,000 |
+  Deductible Type - Business Income: Waiting Period - Hours
+  Waiting Period (Hours): 24
+After: Pull sub-items out of the table. End the table before the sub-items, show them as an indented list, then start a new table if tabular data resumes:
+| COVERAGE | LIMIT | DEDUCTIBLE |
+| --- | --- | --- |
+| Causes Of Loss - Equipment Breakdown | PR650END | |
+- Described Premises Limit: $350,804
+- Diagnostic Equipment: $100,000
+- Deductible Type - Business Income: Waiting Period - Hours
+- Waiting Period (Hours): 24
+### 3. Space-aligned tables
+Declarations often align columns with spaces instead of pipes. These render as plain monospace text and lose structure.
+Before:
+Coverage                               Limit of Liability    Retention
+A. Network Security Liability          $500,000              $10,000
+B. Privacy Liability                   $500,000              $10,000
+After (convert to proper markdown table):
+| Coverage | Limit of Liability | Retention |
+| --- | --- | --- |
+| A. Network Security Liability | $500,000 | $10,000 |
+| B. Privacy Liability | $500,000 | $10,000 |
+### 4. Mixed table/prose content
+A single entry often contains prose paragraphs followed by tabular data followed by more prose. Handle each segment independently \u2014 don't try to force everything into one table.
+### 5. General markdown cleanup
+- **Line spacing**: Remove excessive blank lines (3+ consecutive newlines \u2192 2). Ensure one blank line before and after tables and headings.
+- **Trailing whitespace**: Remove trailing spaces on all lines.
+- **Broken lists**: Ensure list items use consistent markers (-, *, or 1.) with proper nesting indentation.
+- **Orphaned formatting**: Close any unclosed bold (**), italic (*), or code (\`) markers.
+- **Heading levels**: Ensure heading markers (##) have a space after the hashes.
+## Rules
+- Do NOT change the meaning or substance of any content. Only fix formatting.
+- Do NOT add new information, headers, or commentary.
+- Do NOT wrap entries in code fences.
+- Preserve all dollar amounts, dates, policy numbers, form numbers, and technical terms exactly as they appear.
+- If an entry is already well-formatted, return it unchanged.
+- When in doubt about whether something is a table, prefer table formatting for structured data with multiple columns.
+Return your output in this exact format \u2014 one block per entry, in the same order:
+===ENTRY 0===
+(cleaned content for entry 0)
+===ENTRY 1===
+(cleaned content for entry 1)
+...and so on for each entry.
+Here are the entries to format:
+${block}`;
+}
+// src/extraction/formatter.ts
+function collectContentFields(doc) {
+  const entries = [];
+  let id = 0;
+  function add(path, text) {
+    if (text && text.length > 20) {
+      entries.push({ id: id++, path, text });
+    }
+  }
+  add("summary", doc.summary);
+  if (doc.sections) {
+    for (let i = 0; i < doc.sections.length; i++) {
+      const s = doc.sections[i];
+      add(`sections[${i}].content`, s.content);
+      if (s.subsections) {
+        for (let j = 0; j < s.subsections.length; j++) {
+          add(`sections[${i}].subsections[${j}].content`, s.subsections[j].content);
+        }
+      }
+    }
+  }
+  if (doc.endorsements) {
+    for (let i = 0; i < doc.endorsements.length; i++) {
+      add(`endorsements[${i}].content`, doc.endorsements[i].content);
+    }
+  }
+  if (doc.exclusions) {
+    for (let i = 0; i < doc.exclusions.length; i++) {
+      add(`exclusions[${i}].content`, doc.exclusions[i].content);
+    }
+  }
+  if (doc.conditions) {
+    for (let i = 0; i < doc.conditions.length; i++) {
+      add(`conditions[${i}].content`, doc.conditions[i].content);
+    }
+  }
+  return entries;
+}
+function parseFormatResponse(response) {
+  const results = /* @__PURE__ */ new Map();
+  const parts = response.split(/===ENTRY (\d+)===/);
+  for (let i = 1; i < parts.length; i += 2) {
+    const entryId = parseInt(parts[i], 10);
+    const content = parts[i + 1]?.trim();
+    if (!isNaN(entryId) && content !== void 0) {
+      results.set(entryId, content);
+    }
+  }
+  return results;
+}
+function applyFormattedContent(doc, entries, formatted) {
+  for (const entry of entries) {
+    const cleaned = formatted.get(entry.id);
+    if (!cleaned) continue;
+    const segments = entry.path.match(/^(\w+)(?:\[(\d+)\])?(?:\.(\w+)(?:\[(\d+)\])?(?:\.(\w+))?)?$/);
+    if (!segments) continue;
+    const [, field, idx1, sub1, idx2, sub2] = segments;
+    if (!sub1) {
+      doc[field] = cleaned;
+    } else if (!sub2) {
+      const arr = doc[field];
+      if (arr && arr[Number(idx1)]) {
+        arr[Number(idx1)][sub1] = cleaned;
+      }
+    } else {
+      const arr = doc[field];
+      if (arr && arr[Number(idx1)]) {
+        const nested = arr[Number(idx1)][sub1];
+        if (nested && nested[Number(idx2)]) {
+          nested[Number(idx2)][sub2] = cleaned;
+        }
+      }
+    }
+  }
+}
+var MAX_ENTRIES_PER_BATCH = 20;
+async function formatDocumentContent(doc, generateText, options) {
+  const entries = collectContentFields(doc);
+  const totalUsage = { inputTokens: 0, outputTokens: 0 };
+  if (entries.length === 0) {
+    return { document: doc, usage: totalUsage };
+  }
+  options?.onProgress?.(`Formatting ${entries.length} content fields...`);
+  const batches = [];
+  for (let i = 0; i < entries.length; i += MAX_ENTRIES_PER_BATCH) {
+    batches.push(entries.slice(i, i + MAX_ENTRIES_PER_BATCH));
+  }
+  for (let batchIdx = 0; batchIdx < batches.length; batchIdx++) {
+    const batch = batches[batchIdx];
+    try {
+      const prompt = buildFormatPrompt(batch.map((e) => ({ id: e.id, text: e.text })));
+      const result = await withRetry(
+        () => generateText({
+          prompt,
+          maxTokens: 16384,
+          providerOptions: options?.providerOptions
+        })
+      );
+      if (result.usage) {
+        totalUsage.inputTokens += result.usage.inputTokens;
+        totalUsage.outputTokens += result.usage.outputTokens;
+      }
+      const formatted = parseFormatResponse(result.text);
+      if (formatted.size < batch.length) {
+        await options?.log?.(
+          `Format batch ${batchIdx + 1}/${batches.length}: model returned ${formatted.size}/${batch.length} entries \u2014 unformatted entries will keep original content`
+        );
+      }
+      applyFormattedContent(doc, batch, formatted);
+    } catch (error) {
+      await options?.log?.(
+        `Format batch ${batchIdx + 1}/${batches.length} failed, keeping original content: ${error instanceof Error ? error.message : String(error)}`
+      );
+    }
+  }
+  return { document: doc, usage: totalUsage };
+}
 // src/extraction/chunking.ts
 function chunkDocument(doc) {
   const chunks = [];
@@ -2536,9 +2818,13 @@ var ExtractionTaskSchema = import_zod18.z.object({
   endPage: import_zod18.z.number(),
   description: import_zod18.z.string()
 });
+var PageMapEntrySchema = import_zod18.z.object({
+  section: import_zod18.z.string(),
+  pages: import_zod18.z.string()
+});
 var ExtractionPlanSchema = import_zod18.z.object({
   tasks: import_zod18.z.array(ExtractionTaskSchema),
-  pageMap: import_zod18.z.record(import_zod18.z.string(), import_zod18.z.string()).optional()
+  pageMap: import_zod18.z.array(PageMapEntrySchema).optional()
 });
 function buildPlanPrompt(templateHints) {
   return `You are planning the extraction of an insurance document. You have already classified this document. Now scan the full document and create a page map + extraction plan.
@@ -2567,7 +2853,10 @@ Return JSON:
     { "extractorName": "carrier_info", "startPage": 1, "endPage": 2, "description": "Extract carrier details from declarations page" },
     ...
   ],
-  "pageMap": { "declarations": "pages 1-3", "endorsements": "pages 15-22", ... }
+  "pageMap": [
+    { "section": "declarations", "pages": "pages 1-3" },
+    { "section": "endorsements", "pages": "pages 15-22" }
+  ]
 }
 Create tasks that cover the entire document. Prefer specific extractors over generic "sections" where possible. Keep page ranges tight \u2014 only include pages relevant to each extractor.
@@ -3060,7 +3349,8 @@ function createExtractor(config) {
     onTokenUsage,
     onProgress,
     log,
-    providerOptions
+    providerOptions,
+    onCheckpointSave
   } = config;
   const limit = pLimit(concurrency);
   let totalUsage = { inputTokens: 0, outputTokens: 0 };
@@ -3071,100 +3361,106 @@ function createExtractor(config) {
       onTokenUsage?.(usage);
     }
   }
-  async function extract(pdfBase64, documentId) {
+  async function extract(pdfBase64, documentId, options) {
     const id = documentId ?? `doc-${Date.now()}`;
     const memory = /* @__PURE__ */ new Map();
     totalUsage = { inputTokens: 0, outputTokens: 0 };
-    onProgress?.("Classifying document...");
-    const pageCount = await getPdfPageCount(pdfBase64);
-    const classifyResult = await withRetry(
-      () => generateObject({
-        prompt: buildClassifyPrompt(),
-        schema: ClassifyResultSchema,
-        maxTokens: 512,
-        providerOptions
-      })
-    );
-    trackUsage(classifyResult.usage);
-    memory.set("classify", classifyResult.object);
-    const { documentType, policyTypes } = classifyResult.object;
-    const primaryType = policyTypes[0] ?? "other";
-    const template = getTemplate(primaryType);
-    onProgress?.(`Planning extraction for ${primaryType} ${documentType}...`);
-    const templateHints = [
-      `Document type: ${primaryType} ${documentType}`,
-      `Expected sections: ${template.expectedSections.join(", ")}`,
-      `Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
-      `Total pages: ${pageCount}`
-    ].join("\n");
-    const planResult = await withRetry(
-      () => generateObject({
-        prompt: buildPlanPrompt(templateHints),
-        schema: ExtractionPlanSchema,
-        maxTokens: 2048,
-        providerOptions
-      })
-    );
-    trackUsage(planResult.usage);
-    const tasks = planResult.object.tasks;
-    onProgress?.(`Dispatching ${tasks.length} extractors...`);
-    const extractorResults = await Promise.all(
-      tasks.map(
-        (task) => limit(async () => {
-          const ext = getExtractor(task.extractorName);
-          if (!ext) {
-            await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
-            return null;
-          }
-          onProgress?.(`Extracting ${task.extractorName} (pages ${task.startPage}-${task.endPage})...`);
-          try {
-            const result = await runExtractor({
-              name: task.extractorName,
-              prompt: ext.buildPrompt(),
-              schema: ext.schema,
-              pdfBase64,
-              startPage: task.startPage,
-              endPage: task.endPage,
-              generateObject,
-              convertPdfToImages,
-              maxTokens: ext.maxTokens ?? 4096,
-              providerOptions
-            });
-            trackUsage(result.usage);
-            return result;
-          } catch (error) {
-            await log?.(`Extractor ${task.extractorName} failed: ${error}`);
-            return null;
-          }
-        })
-      )
-    );
-    for (const result of extractorResults) {
-      if (result) {
-        memory.set(result.name, result.data);
+    const pipelineCtx = createPipelineContext({
+      id,
+      onSave: onCheckpointSave,
+      resumeFrom: options?.resumeFrom
+    });
+    const resumed = pipelineCtx.getCheckpoint()?.state;
+    if (resumed?.memory) {
+      for (const [k, v] of Object.entries(resumed.memory)) {
+        memory.set(k, v);
       }
     }
-    for (let round = 0; round < maxReviewRounds; round++) {
-      const extractedKeys = [...memory.keys()].filter((k) => k !== "classify");
-      const reviewResult = await withRetry(
-        () => generateObject({
-          prompt: buildReviewPrompt(template.required, extractedKeys),
-          schema: ReviewResultSchema,
-          maxTokens: 1024,
+    let classifyResult;
+    if (resumed?.classifyResult && pipelineCtx.isPhaseComplete("classify")) {
+      classifyResult = resumed.classifyResult;
+      onProgress?.("Resuming from checkpoint (classify complete)...");
+    } else {
+      onProgress?.("Classifying document...");
+      const pageCount2 = await getPdfPageCount(pdfBase64);
+      const classifyResponse = await safeGenerateObject(
+        generateObject,
+        {
+          prompt: buildClassifyPrompt(),
+          schema: ClassifyResultSchema,
+          maxTokens: 512,
           providerOptions
-        })
+        },
+        {
+          fallback: { documentType: "policy", policyTypes: ["other"], confidence: 0 },
+          log,
+          onError: (err, attempt) => log?.(`Classify attempt ${attempt + 1} failed: ${err}`)
+        }
       );
-      trackUsage(reviewResult.usage);
-      if (reviewResult.object.complete || reviewResult.object.additionalTasks.length === 0) {
-        onProgress?.("Extraction complete.");
-        break;
-      }
-      onProgress?.(`Review round ${round + 1}: dispatching ${reviewResult.object.additionalTasks.length} follow-up extractors...`);
-      const followUpResults = await Promise.all(
-        reviewResult.object.additionalTasks.map(
+      trackUsage(classifyResponse.usage);
+      classifyResult = classifyResponse.object;
+      memory.set("classify", classifyResult);
+      await pipelineCtx.save("classify", {
+        id,
+        pageCount: pageCount2,
+        classifyResult,
+        memory: Object.fromEntries(memory)
+      });
+    }
+    const { documentType, policyTypes } = classifyResult;
+    const primaryType = policyTypes[0] ?? "other";
+    const template = getTemplate(primaryType);
+    const pageCount = resumed?.pageCount ?? await getPdfPageCount(pdfBase64);
+    let plan;
+    if (resumed?.plan && pipelineCtx.isPhaseComplete("plan")) {
+      plan = resumed.plan;
+      onProgress?.("Resuming from checkpoint (plan complete)...");
+    } else {
+      onProgress?.(`Planning extraction for ${primaryType} ${documentType}...`);
+      const templateHints = [
+        `Document type: ${primaryType} ${documentType}`,
+        `Expected sections: ${template.expectedSections.join(", ")}`,
+        `Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
+        `Total pages: ${pageCount}`
+      ].join("\n");
+      const planResponse = await safeGenerateObject(
+        generateObject,
+        {
+          prompt: buildPlanPrompt(templateHints),
+          schema: ExtractionPlanSchema,
+          maxTokens: 2048,
+          providerOptions
+        },
+        {
+          fallback: {
+            tasks: [{ extractorName: "sections", startPage: 1, endPage: pageCount, description: "Full document fallback extraction" }]
+          },
+          log,
+          onError: (err, attempt) => log?.(`Plan attempt ${attempt + 1} failed: ${err}`)
+        }
+      );
+      trackUsage(planResponse.usage);
+      plan = planResponse.object;
+      await pipelineCtx.save("plan", {
+        id,
+        pageCount,
+        classifyResult,
+        plan,
+        memory: Object.fromEntries(memory)
+      });
+    }
+    if (!pipelineCtx.isPhaseComplete("extract")) {
+      const tasks = plan.tasks;
+      onProgress?.(`Dispatching ${tasks.length} extractors...`);
+      const extractorResults = await Promise.all(
+        tasks.map(
           (task) => limit(async () => {
             const ext = getExtractor(task.extractorName);
-            if (!ext) return null;
+            if (!ext) {
+              await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
+              return null;
+            }
+            onProgress?.(`Extracting ${task.extractorName} (pages ${task.startPage}-${task.endPage})...`);
             try {
               const result = await runExtractor({
                 name: task.extractorName,
@@ -3181,22 +3477,114 @@ function createExtractor(config) {
               trackUsage(result.usage);
               return result;
             } catch (error) {
-              await log?.(`Follow-up extractor ${task.extractorName} failed: ${error}`);
+              await log?.(`Extractor ${task.extractorName} failed: ${error}`);
               return null;
             }
           })
         )
       );
-      for (const result of followUpResults) {
+      for (const result of extractorResults) {
         if (result) {
           memory.set(result.name, result.data);
         }
       }
+      await pipelineCtx.save("extract", {
+        id,
+        pageCount,
+        classifyResult,
+        plan,
+        memory: Object.fromEntries(memory)
+      });
+    }
+    if (!pipelineCtx.isPhaseComplete("review")) {
+      for (let round = 0; round < maxReviewRounds; round++) {
+        const extractedKeys = [...memory.keys()].filter((k) => k !== "classify");
+        const reviewResponse = await safeGenerateObject(
+          generateObject,
+          {
+            prompt: buildReviewPrompt(template.required, extractedKeys),
+            schema: ReviewResultSchema,
+            maxTokens: 1024,
+            providerOptions
+          },
+          {
+            fallback: { complete: true, missingFields: [], additionalTasks: [] },
+            log,
+            onError: (err, attempt) => log?.(`Review round ${round + 1} attempt ${attempt + 1} failed: ${err}`)
+          }
+        );
+        trackUsage(reviewResponse.usage);
+        if (reviewResponse.object.complete || reviewResponse.object.additionalTasks.length === 0) {
+          onProgress?.("Extraction complete.");
+          break;
+        }
+        onProgress?.(`Review round ${round + 1}: dispatching ${reviewResponse.object.additionalTasks.length} follow-up extractors...`);
+        const followUpResults = await Promise.all(
+          reviewResponse.object.additionalTasks.map(
+            (task) => limit(async () => {
+              const ext = getExtractor(task.extractorName);
+              if (!ext) return null;
+              try {
+                const result = await runExtractor({
+                  name: task.extractorName,
+                  prompt: ext.buildPrompt(),
+                  schema: ext.schema,
+                  pdfBase64,
+                  startPage: task.startPage,
+                  endPage: task.endPage,
+                  generateObject,
+                  convertPdfToImages,
+                  maxTokens: ext.maxTokens ?? 4096,
+                  providerOptions
+                });
+                trackUsage(result.usage);
+                return result;
+              } catch (error) {
+                await log?.(`Follow-up extractor ${task.extractorName} failed: ${error}`);
+                return null;
+              }
+            })
+          )
+        );
+        for (const result of followUpResults) {
+          if (result) {
+            memory.set(result.name, result.data);
+          }
+        }
+      }
+      await pipelineCtx.save("review", {
+        id,
+        pageCount,
+        classifyResult,
+        plan,
+        memory: Object.fromEntries(memory)
+      });
     }
     onProgress?.("Assembling document...");
     const document = assembleDocument(id, documentType, memory);
-    const chunks = chunkDocument(document);
-    return { document, chunks, tokenUsage: totalUsage };
+    await pipelineCtx.save("assemble", {
+      id,
+      pageCount,
+      classifyResult,
+      plan,
+      memory: Object.fromEntries(memory),
+      document
+    });
+    onProgress?.("Formatting extracted content...");
+    const formatResult = await formatDocumentContent(document, generateText, {
+      providerOptions,
+      onProgress,
+      log
+    });
+    trackUsage(formatResult.usage);
+    const chunks = chunkDocument(formatResult.document);
+    const finalCheckpoint = pipelineCtx.getCheckpoint();
+    return {
+      document: formatResult.document,
+      chunks,
+      tokenUsage: totalUsage,
+      checkpoint: finalCheckpoint
+    };
   }
   return { extract };
 }
@@ -4059,7 +4447,6 @@ function createApplicationPipeline(config) {
     let state = {
       id,
       pdfBase64: void 0,
-      // Don't persist the full PDF in state
       title: void 0,
       applicationType: null,
       fields: [],
@@ -4070,13 +4457,20 @@ function createApplicationPipeline(config) {
       updatedAt: now
     };
     onProgress?.("Classifying document...");
-    const { result: classifyResult, usage: classifyUsage } = await classifyApplication(
-      pdfBase64.slice(0, 2e3),
-      // Send truncated content for classification
-      generateObject,
-      providerOptions
-    );
-    trackUsage(classifyUsage);
+    await applicationStore?.save(state);
+    let classifyResult;
+    try {
+      const { result, usage: classifyUsage } = await classifyApplication(
+        pdfBase64.slice(0, 2e3),
+        generateObject,
+        providerOptions
+      );
+      trackUsage(classifyUsage);
+      classifyResult = result;
+    } catch (error) {
+      await log?.(`Classification failed, treating as non-application: ${error instanceof Error ? error.message : String(error)}`);
+      classifyResult = { isApplication: false, confidence: 0, applicationType: null };
+    }
     if (!classifyResult.isApplication) {
       state.status = "complete";
       state.updatedAt = Date.now();
@@ -4086,13 +4480,28 @@ function createApplicationPipeline(config) {
     state.applicationType = classifyResult.applicationType;
     state.status = "extracting";
     state.updatedAt = Date.now();
+    await applicationStore?.save(state);
     onProgress?.("Extracting form fields...");
-    const { fields, usage: extractUsage } = await extractFields(
-      pdfBase64,
-      generateObject,
-      providerOptions
-    );
-    trackUsage(extractUsage);
+    let fields;
+    try {
+      const { fields: extractedFields, usage: extractUsage } = await extractFields(
+        pdfBase64,
+        generateObject,
+        providerOptions
+      );
+      trackUsage(extractUsage);
+      fields = extractedFields;
+    } catch (error) {
+      await log?.(`Field extraction failed: ${error instanceof Error ? error.message : String(error)}`);
+      fields = [];
+    }
+    if (fields.length === 0) {
+      await log?.("No fields extracted, completing pipeline with empty result");
+      state.status = "complete";
+      state.updatedAt = Date.now();
+      await applicationStore?.save(state);
+      return { state, tokenUsage: totalUsage };
+    }
     state.fields = fields;
     state.title = classifyResult.applicationType ?? void 0;
     state.status = "auto_filling";
@@ -4124,20 +4533,24 @@ function createApplicationPipeline(config) {
         limit(async () => {
           const unfilledFields2 = state.fields.filter((f) => !f.value);
           if (unfilledFields2.length === 0) return;
-          const { result: autoFillResult, usage: afUsage } = await autoFillFromContext(
-            unfilledFields2,
-            orgContext,
-            generateObject,
-            providerOptions
-          );
-          trackUsage(afUsage);
-          for (const match of autoFillResult.matches) {
-            const field = state.fields.find((f) => f.id === match.fieldId);
-            if (field && !field.value) {
-              field.value = match.value;
-              field.source = `auto-fill: ${match.contextKey}`;
-              field.confidence = match.confidence;
+          try {
+            const { result: autoFillResult, usage: afUsage } = await autoFillFromContext(
+              unfilledFields2,
+              orgContext,
+              generateObject,
+              providerOptions
+            );
+            trackUsage(afUsage);
+            for (const match of autoFillResult.matches) {
+              const field = state.fields.find((f) => f.id === match.fieldId);
+              if (field && !field.value) {
+                field.value = match.value;
+                field.source = `auto-fill: ${match.contextKey}`;
+                field.confidence = match.confidence;
+              }
             }
+          } catch (e) {
+            await log?.(`Auto-fill from context failed: ${e instanceof Error ? e.message : String(e)}`);
           }
         })
       );
@@ -4170,13 +4583,18 @@ function createApplicationPipeline(config) {
     if (unfilledFields.length > 0) {
       onProgress?.(`Batching ${unfilledFields.length} remaining questions...`);
       state.status = "batching";
-      const { result: batchResult, usage: batchUsage } = await batchQuestions(
-        unfilledFields,
-        generateObject,
-        providerOptions
-      );
-      trackUsage(batchUsage);
-      state.batches = batchResult.batches;
+      try {
+        const { result: batchResult, usage: batchUsage } = await batchQuestions(
+          unfilledFields,
+          generateObject,
+          providerOptions
+        );
+        trackUsage(batchUsage);
+        state.batches = batchResult.batches;
+      } catch (error) {
+        await log?.(`Batching failed, using single-batch fallback: ${error instanceof Error ? error.message : String(error)}`);
+        state.batches = [unfilledFields.map((f) => f.id)];
+      }
       state.currentBatchIndex = 0;
       state.status = "collecting";
     } else {
@@ -4203,32 +4621,49 @@ function createApplicationPipeline(config) {
       (f) => currentBatchFieldIds.includes(f.id)
     );
     onProgress?.("Classifying reply...");
-    const { intent, usage: intentUsage } = await classifyReplyIntent(
-      currentBatchFields,
-      replyText,
-      generateObject,
-      providerOptions
-    );
-    trackUsage(intentUsage);
-    let fieldsFilled = 0;
-    let responseText;
-    if (intent.hasAnswers) {
-      onProgress?.("Parsing answers...");
-      const { result: parseResult, usage: parseUsage } = await parseAnswers(
+    let intent;
+    try {
+      const { intent: classifiedIntent, usage: intentUsage } = await classifyReplyIntent(
         currentBatchFields,
         replyText,
         generateObject,
         providerOptions
       );
-      trackUsage(parseUsage);
-      for (const answer of parseResult.answers) {
-        const field = state.fields.find((f) => f.id === answer.fieldId);
-        if (field) {
-          field.value = answer.value;
-          field.source = "user";
-          field.confidence = "confirmed";
-          fieldsFilled++;
+      trackUsage(intentUsage);
+      intent = classifiedIntent;
+    } catch (error) {
+      await log?.(`Reply intent classification failed, defaulting to answers_only: ${error instanceof Error ? error.message : String(error)}`);
+      intent = {
+        primaryIntent: "answers_only",
+        hasAnswers: true,
+        questionText: void 0,
+        questionFieldIds: void 0,
+        lookupRequests: void 0
+      };
+    }
+    let fieldsFilled = 0;
+    let responseText;
+    if (intent.hasAnswers) {
+      onProgress?.("Parsing answers...");
+      try {
+        const { result: parseResult, usage: parseUsage } = await parseAnswers(
+          currentBatchFields,
+          replyText,
+          generateObject,
+          providerOptions
+        );
+        trackUsage(parseUsage);
+        for (const answer of parseResult.answers) {
+          const field = state.fields.find((f) => f.id === answer.fieldId);
+          if (field) {
+            field.value = answer.value;
+            field.source = "user";
+            field.confidence = "confirmed";
+            fieldsFilled++;
+          }
         }
+      } catch (error) {
+        await log?.(`Answer parsing failed: ${error instanceof Error ? error.message : String(error)}`);
       }
     }
     if (intent.lookupRequests?.length) {
@@ -4249,36 +4684,45 @@ function createApplicationPipeline(config) {
         const targetFields = state.fields.filter(
           (f) => intent.lookupRequests.some((lr) => lr.targetFieldIds.includes(f.id))
         );
-        const { result: lookupResult, usage: lookupUsage } = await fillFromLookup(
-          intent.lookupRequests,
-          targetFields,
-          availableData,
-          generateObject,
-          providerOptions
-        );
-        trackUsage(lookupUsage);
-        for (const fill of lookupResult.fills) {
-          const field = state.fields.find((f) => f.id === fill.fieldId);
-          if (field) {
-            field.value = fill.value;
-            field.source = `lookup: ${fill.source}`;
-            field.confidence = "high";
-            fieldsFilled++;
+        try {
+          const { result: lookupResult, usage: lookupUsage } = await fillFromLookup(
+            intent.lookupRequests,
+            targetFields,
+            availableData,
+            generateObject,
+            providerOptions
+          );
+          trackUsage(lookupUsage);
+          for (const fill of lookupResult.fills) {
+            const field = state.fields.find((f) => f.id === fill.fieldId);
+            if (field) {
+              field.value = fill.value;
+              field.source = `lookup: ${fill.source}`;
+              field.confidence = "high";
+              fieldsFilled++;
+            }
           }
+        } catch (error) {
+          await log?.(`Lookup fill failed: ${error instanceof Error ? error.message : String(error)}`);
         }
       }
     }
     if (intent.primaryIntent === "question" || intent.primaryIntent === "mixed") {
       if (intent.questionText) {
-        const { text, usage } = await generateText({
-          prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
+        try {
+          const { text, usage } = await generateText({
+            prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
 Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with the answer when you're ready and I'll fill it in."`,
-          maxTokens: 512,
-          providerOptions
-        });
-        trackUsage(usage);
-        responseText = text;
+            maxTokens: 512,
+            providerOptions
+          });
+          trackUsage(usage);
+          responseText = text;
+        } catch (error) {
+          await log?.(`Question response generation failed: ${error instanceof Error ? error.message : String(error)}`);
+          responseText = `I wasn't able to generate an explanation for your question. Could you rephrase it, or just provide the answer directly?`;
+        }
       }
     }
     const currentBatchComplete = currentBatchFieldIds.every(
@@ -4292,26 +4736,30 @@ Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with
           (f) => nextBatchFieldIds.includes(f.id)
         );
         const filledCount = state.fields.filter((f) => f.value).length;
-        const { text: emailText, usage: emailUsage } = await generateBatchEmail(
-          nextBatchFields,
-          state.currentBatchIndex,
-          state.batches.length,
-          {
-            appTitle: state.title,
-            totalFieldCount: state.fields.length,
-            filledFieldCount: filledCount,
-            companyName: context?.companyName
-          },
-          generateText,
-          providerOptions
-        );
-        trackUsage(emailUsage);
-        if (!responseText) {
-          responseText = emailText;
-        } else {
-          responseText += `
+        try {
+          const { text: emailText, usage: emailUsage } = await generateBatchEmail(
+            nextBatchFields,
+            state.currentBatchIndex,
+            state.batches.length,
+            {
+              appTitle: state.title,
+              totalFieldCount: state.fields.length,
+              filledFieldCount: filledCount,
+              companyName: context?.companyName
+            },
+            generateText,
+            providerOptions
+          );
+          trackUsage(emailUsage);
+          if (!responseText) {
+            responseText = emailText;
+          } else {
+            responseText += `
 ${emailText}`;
+          }
+        } catch (error) {
+          await log?.(`Batch email generation failed: ${error instanceof Error ? error.message : String(error)}`);
         }
       } else {
         state.status = "confirming";
@@ -4520,7 +4968,7 @@ var EvidenceItemSchema = import_zod32.z.object({
   turnId: import_zod32.z.string().optional(),
   text: import_zod32.z.string().describe("Text excerpt from the source"),
   relevance: import_zod32.z.number().min(0).max(1),
-  metadata: import_zod32.z.record(import_zod32.z.string(), import_zod32.z.string()).optional()
+  metadata: import_zod32.z.array(import_zod32.z.object({ key: import_zod32.z.string(), value: import_zod32.z.string() })).optional()
 });
 var RetrievalResultSchema = import_zod32.z.object({
   subQuestion: import_zod32.z.string(),
@@ -4556,6 +5004,9 @@ var QueryResultSchema = import_zod32.z.object({
 });
 // src/query/retriever.ts
+function recordToKVArray(record) {
+  return Object.entries(record).map(([key, value]) => ({ key, value }));
+}
 async function retrieve(subQuestion, conversationId, config) {
   const { documentStore, memoryStore, retrievalLimit, log } = config;
   const evidence = [];
@@ -4582,7 +5033,7 @@ async function retrieve(subQuestion, conversationId, config) {
                 text: chunk.text,
                 relevance: 0.8,
                 // Default — store doesn't expose scores directly
-                metadata: chunk.metadata
+                metadata: recordToKVArray(chunk.metadata)
               });
             }
           }
@@ -4597,7 +5048,7 @@ async function retrieve(subQuestion, conversationId, config) {
               documentId: chunk.documentId,
               text: chunk.text,
               relevance: 0.8,
-              metadata: chunk.metadata
+              metadata: recordToKVArray(chunk.metadata)
             });
           }
         }
@@ -4625,11 +5076,11 @@ async function retrieve(subQuestion, conversationId, config) {
               text: summary,
               relevance: 0.9,
               // Direct lookup is high relevance
-              metadata: {
-                type: doc.type,
-                carrier: doc.carrier ?? "",
-                insuredName: doc.insuredName ?? ""
-              }
+              metadata: [
+                { key: "type", value: doc.type },
+                { key: "carrier", value: doc.carrier ?? "" },
+                { key: "insuredName", value: doc.insuredName ?? "" }
+              ]
             });
           }
         } catch (e) {
@@ -4864,8 +5315,12 @@ function createQueryAgent(config) {
   async function query(input) {
     totalUsage = { inputTokens: 0, outputTokens: 0 };
     const { question, conversationId, context } = input;
+    const pipelineCtx = createPipelineContext({
+      id: `query-${Date.now()}`
+    });
     onProgress?.("Classifying query...");
     const classification = await classify(question, conversationId);
+    await pipelineCtx.save("classify", { classification });
     onProgress?.(`Retrieving evidence for ${classification.subQuestions.length} sub-question(s)...`);
     const retrieverConfig = {
       documentStore,
@@ -4879,9 +5334,10 @@ function createQueryAgent(config) {
       )
     );
     const allEvidence = retrievalResults.flatMap((r) => r.evidence);
+    await pipelineCtx.save("retrieve", { classification, evidence: allEvidence });
     onProgress?.("Reasoning over evidence...");
     const reasonerConfig = { generateObject, providerOptions };
-    let subAnswers = await Promise.all(
+    const reasonResults = await Promise.allSettled(
       classification.subQuestions.map(
         (sq, i) => limit(async () => {
           const { subAnswer, usage } = await reason(
@@ -4895,10 +5351,27 @@ function createQueryAgent(config) {
         })
       )
     );
+    let subAnswers = [];
+    for (let i = 0; i < reasonResults.length; i++) {
+      const result = reasonResults[i];
+      if (result.status === "fulfilled") {
+        subAnswers.push(result.value);
+      } else {
+        await log?.(`Reasoner failed for sub-question "${classification.subQuestions[i].question}": ${result.reason}`);
+        subAnswers.push({
+          subQuestion: classification.subQuestions[i].question,
+          answer: "Unable to answer this part of the question due to a processing error.",
+          citations: [],
+          confidence: 0,
+          needsMoreContext: true
+        });
+      }
+    }
+    await pipelineCtx.save("reason", { classification, evidence: allEvidence, subAnswers });
     onProgress?.("Verifying answer grounding...");
     const verifierConfig = { generateObject, providerOptions };
     for (let round = 0; round < maxVerifyRounds; round++) {
-      const { result: verifyResult, usage } = await verify(
+      const { result: verifyResult, usage } = await safeVerify(
         question,
         subAnswers,
         allEvidence,
@@ -4922,7 +5395,6 @@ function createQueryAgent(config) {
                 () => retrieve(sq, conversationId, {
                   ...retrieverConfig,
                   retrievalLimit: retrievalLimit * 2
-                  // Broader retrieval on retry
                 })
               )
             )
@@ -4930,7 +5402,7 @@ function createQueryAgent(config) {
           for (const r of retryRetrievals) {
             allEvidence.push(...r.evidence);
           }
-          const retrySubAnswers = await Promise.all(
+          const retrySettled = await Promise.allSettled(
             retryQuestions.map(
               (sq, i) => limit(async () => {
                 const { subAnswer, usage: u } = await reason(
@@ -4944,6 +5416,7 @@ function createQueryAgent(config) {
               })
             )
           );
+          const retrySubAnswers = retrySettled.filter((r) => r.status === "fulfilled").map((r) => r.value);
           const retryQSet = new Set(retryQuestions.map((sq) => sq.question));
           subAnswers = subAnswers.map((sa) => {
             if (retryQSet.has(sa.subQuestion)) {
@@ -4996,17 +5469,42 @@ function createQueryAgent(config) {
       }
     }
     const prompt = buildQueryClassifyPrompt(question, conversationContext);
-    const { object, usage } = await withRetry(
-      () => generateObject({
+    const { object, usage } = await safeGenerateObject(
+      generateObject,
+      {
         prompt,
         schema: QueryClassifyResultSchema,
         maxTokens: 2048,
         providerOptions
-      })
+      },
+      {
+        fallback: {
+          intent: "general_knowledge",
+          subQuestions: [
+            {
+              question,
+              intent: "general_knowledge"
+            }
+          ],
+          requiresDocumentLookup: true,
+          requiresChunkSearch: true,
+          requiresConversationHistory: !!conversationId
+        },
+        log,
+        onError: (err, attempt) => log?.(`Query classify attempt ${attempt + 1} failed: ${err}`)
+      }
     );
     trackUsage(usage);
     return object;
   }
+  async function safeVerify(originalQuestion, subAnswers, allEvidence, verifierConfig) {
+    try {
+      return await verify(originalQuestion, subAnswers, allEvidence, verifierConfig);
+    } catch (error) {
+      await log?.(`Verification failed, approving by default: ${error instanceof Error ? error.message : String(error)}`);
+      return { result: { approved: true, issues: [] } };
+    }
+  }
   async function respond(originalQuestion, subAnswers, classification, platform) {
     const subAnswersJson = JSON.stringify(
       subAnswers.map((sa) => ({
@@ -5020,13 +5518,25 @@ function createQueryAgent(config) {
       2
     );
     const prompt = buildRespondPrompt(originalQuestion, subAnswersJson, platform);
-    const { object, usage } = await withRetry(
-      () => generateObject({
+    const { object, usage } = await safeGenerateObject(
+      generateObject,
+      {
         prompt,
         schema: QueryResultSchema,
         maxTokens: 4096,
         providerOptions
-      })
+      },
+      {
+        fallback: {
+          answer: subAnswers.map((sa) => `**${sa.subQuestion}**
+${sa.answer}`).join("\n\n"),
+          citations: subAnswers.flatMap((sa) => sa.citations),
+          intent: classification.intent,
+          confidence: Math.min(...subAnswers.map((sa) => sa.confidence), 1)
+        },
+        log,
+        onError: (err, attempt) => log?.(`Respond attempt ${attempt + 1} failed: ${err}`)
+      }
     );
     trackUsage(usage);
     const result = object;
@@ -5192,6 +5702,7 @@ var AGENT_TOOLS = [
   CommercialAutoDeclarationsSchema,
   CommercialPropertyDeclarationsSchema,
   CommunicationIntentSchema,
+  ConditionKeyValueSchema,
   ConditionTypeSchema,
   ConstructionTypeSchema,
   ContactSchema,
@@ -5358,6 +5869,7 @@ var AGENT_TOOLS = [
   chunkDocument,
   createApplicationPipeline,
   createExtractor,
+  createPipelineContext,
   createQueryAgent,
   extractPageRange,
   fillAcroForm,
@@ -5367,6 +5879,7 @@ var AGENT_TOOLS = [
   getTemplate,
   overlayTextOnPdf,
   pLimit,
+  safeGenerateObject,
   sanitizeNulls,
   stripFences,
   withRetry