@claritylabs/cl-sdk 0.17.0 → 0.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -169,7 +169,14 @@ function createPipelineContext(opts) {
169
169
  let latest = opts.resumeFrom;
170
170
  const completedPhases = /* @__PURE__ */ new Set();
171
171
  if (opts.resumeFrom) {
172
- completedPhases.add(opts.resumeFrom.phase);
172
+ const phaseIndex = opts.phaseOrder?.indexOf(opts.resumeFrom.phase) ?? -1;
173
+ if (phaseIndex >= 0 && opts.phaseOrder) {
174
+ for (const phase of opts.phaseOrder.slice(0, phaseIndex + 1)) {
175
+ completedPhases.add(phase);
176
+ }
177
+ } else {
178
+ completedPhases.add(opts.resumeFrom.phase);
179
+ }
173
180
  }
174
181
  return {
175
182
  id: opts.id,
@@ -1708,6 +1715,53 @@ async function runExtractor(params) {
1708
1715
  };
1709
1716
  }
1710
1717
 
1718
+ // src/extraction/memory.ts
1719
+ function isMemoryRecord(value) {
1720
+ return typeof value === "object" && value !== null && !Array.isArray(value);
1721
+ }
1722
+ function readMemoryRecord(memory, key) {
1723
+ const value = memory.get(key);
1724
+ return isMemoryRecord(value) ? value : void 0;
1725
+ }
1726
+ function readRecordValue(record, key) {
1727
+ return record?.[key];
1728
+ }
1729
+ function readRecordArray(record, key) {
1730
+ const value = readRecordValue(record, key);
1731
+ return Array.isArray(value) ? value : void 0;
1732
+ }
1733
+ function getCarrierInfo(memory) {
1734
+ return readMemoryRecord(memory, "carrier_info");
1735
+ }
1736
+ function getNamedInsured(memory) {
1737
+ return readMemoryRecord(memory, "named_insured");
1738
+ }
1739
+ function getCoverageLimits(memory) {
1740
+ return readMemoryRecord(memory, "coverage_limits");
1741
+ }
1742
+ function getCoverageLimitCoverages(memory) {
1743
+ return readRecordArray(getCoverageLimits(memory), "coverages") ?? [];
1744
+ }
1745
+ function getSectionsPayload(memory) {
1746
+ return readMemoryRecord(memory, "sections");
1747
+ }
1748
+ function getSections(memory) {
1749
+ return readRecordArray(getSectionsPayload(memory), "sections");
1750
+ }
1751
+ function getDefinitionsPayload(memory) {
1752
+ return readMemoryRecord(memory, "definitions");
1753
+ }
1754
+ function getDefinitions(memory) {
1755
+ return readRecordArray(getDefinitionsPayload(memory), "definitions");
1756
+ }
1757
+ function getCoveredReasonsPayload(memory) {
1758
+ return readMemoryRecord(memory, "covered_reasons");
1759
+ }
1760
+ function getCoveredReasons(memory) {
1761
+ const payload = getCoveredReasonsPayload(memory);
1762
+ return readRecordArray(payload, "coveredReasons") ?? readRecordArray(payload, "covered_reasons");
1763
+ }
1764
+
1711
1765
  // src/extraction/promote.ts
1712
1766
  function getDeclarationFields(doc) {
1713
1767
  const decl = doc.declarations;
@@ -1734,20 +1788,29 @@ function findRawString(raw, keys) {
1734
1788
  }
1735
1789
  return void 0;
1736
1790
  }
1791
+ function promoteRawFields(raw, mappings) {
1792
+ for (const { from, to } of mappings) {
1793
+ if (!raw[to] && raw[from]) {
1794
+ raw[to] = raw[from];
1795
+ }
1796
+ delete raw[from];
1797
+ }
1798
+ }
1799
+ function findRawOrDeclarationValue(raw, fields, lookup) {
1800
+ return (lookup.rawKey ? raw[lookup.rawKey] : void 0) || findFieldValue(fields, lookup.patterns, lookup.reject);
1801
+ }
1802
+ function promoteRawOrDeclarationString(raw, fields, targetKey, rawKeys, lookup) {
1803
+ if (raw[targetKey]) return;
1804
+ const value = findRawString(raw, rawKeys) ?? findFieldValue(fields, lookup.patterns, lookup.reject);
1805
+ if (value) raw[targetKey] = value;
1806
+ }
1737
1807
  function promoteCarrierFields(doc) {
1738
1808
  const raw = doc;
1739
- if (!raw.carrierNaicNumber && raw.naicNumber) {
1740
- raw.carrierNaicNumber = raw.naicNumber;
1741
- }
1742
- if (!raw.carrierAmBestRating && raw.amBestRating) {
1743
- raw.carrierAmBestRating = raw.amBestRating;
1744
- }
1745
- if (!raw.carrierAdmittedStatus && raw.admittedStatus) {
1746
- raw.carrierAdmittedStatus = raw.admittedStatus;
1747
- }
1748
- delete raw.naicNumber;
1749
- delete raw.amBestRating;
1750
- delete raw.admittedStatus;
1809
+ promoteRawFields(raw, [
1810
+ { from: "naicNumber", to: "carrierNaicNumber" },
1811
+ { from: "amBestRating", to: "carrierAmBestRating" },
1812
+ { from: "admittedStatus", to: "carrierAdmittedStatus" }
1813
+ ]);
1751
1814
  if (!raw.insurer && raw.carrierLegalName) {
1752
1815
  raw.insurer = {
1753
1816
  legalName: raw.carrierLegalName,
@@ -1788,12 +1851,21 @@ var BROKER_ADDRESS_PATTERNS = ["brokerAddress", "agentAddress", "producerAddress
1788
1851
  function promoteBroker(doc) {
1789
1852
  const raw = doc;
1790
1853
  const fields = getDeclarationFields(doc);
1791
- const brokerAgency = raw.brokerAgency || findFieldValue(fields, BROKER_NAME_PATTERNS);
1792
- const brokerContact = raw.brokerContactName || findFieldValue(fields, BROKER_CONTACT_PATTERNS);
1793
- const brokerLicense = raw.brokerLicenseNumber || findFieldValue(fields, BROKER_LICENSE_PATTERNS);
1794
- const brokerPhone = findFieldValue(fields, BROKER_PHONE_PATTERNS);
1795
- const brokerEmail = findFieldValue(fields, BROKER_EMAIL_PATTERNS);
1796
- const brokerAddress = findFieldValue(fields, BROKER_ADDRESS_PATTERNS);
1854
+ const brokerAgency = findRawOrDeclarationValue(raw, fields, {
1855
+ rawKey: "brokerAgency",
1856
+ patterns: BROKER_NAME_PATTERNS
1857
+ });
1858
+ const brokerContact = findRawOrDeclarationValue(raw, fields, {
1859
+ rawKey: "brokerContactName",
1860
+ patterns: BROKER_CONTACT_PATTERNS
1861
+ });
1862
+ const brokerLicense = findRawOrDeclarationValue(raw, fields, {
1863
+ rawKey: "brokerLicenseNumber",
1864
+ patterns: BROKER_LICENSE_PATTERNS
1865
+ });
1866
+ const brokerPhone = findRawOrDeclarationValue(raw, fields, { patterns: BROKER_PHONE_PATTERNS });
1867
+ const brokerEmail = findRawOrDeclarationValue(raw, fields, { patterns: BROKER_EMAIL_PATTERNS });
1868
+ const brokerAddress = findRawOrDeclarationValue(raw, fields, { patterns: BROKER_ADDRESS_PATTERNS });
1797
1869
  if (brokerAgency) raw.brokerAgency = brokerAgency;
1798
1870
  if (brokerContact) raw.brokerContactName = brokerContact;
1799
1871
  if (brokerLicense) raw.brokerLicenseNumber = brokerLicense;
@@ -2169,20 +2241,27 @@ function taxFeeKey(item) {
2169
2241
  item.type ?? ""
2170
2242
  ].join("|");
2171
2243
  }
2244
+ function taxFeeItemFromField(field) {
2245
+ const type = taxFeeType(field.field);
2246
+ return {
2247
+ name: titleizeFieldName(field.field),
2248
+ amount: absorbNegative(field.value),
2249
+ ...type ? { type } : {}
2250
+ };
2251
+ }
2172
2252
  function absorbNegative(value) {
2173
2253
  return value.replace(/^-\s*/, "").replace(/^\(\s*(.*?)\s*\)$/, "$1");
2174
2254
  }
2175
2255
  function promotePremium(doc) {
2176
2256
  const raw = doc;
2177
2257
  const fields = getDeclarationFields(doc);
2178
- if (!raw.premium) {
2179
- const premium = findRawString(raw, PREMIUM_RAW_KEYS) ?? findFieldValue(fields, PREMIUM_PATTERNS, (field) => isTaxOrFeeField(field.field));
2180
- if (premium) raw.premium = premium;
2181
- }
2182
- if (!raw.totalCost) {
2183
- const totalCost = findRawString(raw, TOTAL_COST_RAW_KEYS) ?? findFieldValue(fields, TOTAL_COST_PATTERNS);
2184
- if (totalCost) raw.totalCost = totalCost;
2185
- }
2258
+ promoteRawOrDeclarationString(raw, fields, "premium", PREMIUM_RAW_KEYS, {
2259
+ patterns: PREMIUM_PATTERNS,
2260
+ reject: (field) => isTaxOrFeeField(field.field)
2261
+ });
2262
+ promoteRawOrDeclarationString(raw, fields, "totalCost", TOTAL_COST_RAW_KEYS, {
2263
+ patterns: TOTAL_COST_PATTERNS
2264
+ });
2186
2265
  if (typeof raw.premium === "string") raw.premium = absorbNegative(raw.premium);
2187
2266
  if (typeof raw.totalCost === "string") raw.totalCost = absorbNegative(raw.totalCost);
2188
2267
  }
@@ -2200,11 +2279,7 @@ function synthesizeTaxesAndFees(doc) {
2200
2279
  if (!field.value?.trim()) continue;
2201
2280
  if (!isTaxOrFeeField(field.field)) continue;
2202
2281
  if (isTotalCostField(field.field)) continue;
2203
- const item = {
2204
- name: titleizeFieldName(field.field),
2205
- amount: absorbNegative(field.value),
2206
- ...taxFeeType(field.field) ? { type: taxFeeType(field.field) } : {}
2207
- };
2282
+ const item = taxFeeItemFromField(field);
2208
2283
  byKey.set(taxFeeKey(item), item);
2209
2284
  }
2210
2285
  if (byKey.size > 0) {
@@ -2224,43 +2299,47 @@ function promoteExtractedFields(doc) {
2224
2299
 
2225
2300
  // src/extraction/assembler.ts
2226
2301
  function assembleDocument(documentId, documentType, memory) {
2227
- const carrier = memory.get("carrier_info");
2228
- const insured = memory.get("named_insured");
2229
- const coverages = memory.get("coverage_limits");
2230
- const endorsements = memory.get("endorsements");
2231
- const exclusions = memory.get("exclusions");
2232
- const conditions = memory.get("conditions");
2233
- const premium = memory.get("premium_breakdown");
2234
- const declarations = memory.get("declarations");
2235
- const lossHistory = memory.get("loss_history");
2236
- const sections = memory.get("sections");
2237
- const supplementary = memory.get("supplementary");
2238
- const formInventory = memory.get("form_inventory");
2239
- const definitions = memory.get("definitions");
2240
- const coveredReasons = memory.get("covered_reasons");
2241
- const classify = memory.get("classify");
2302
+ const carrier = getCarrierInfo(memory);
2303
+ const insured = getNamedInsured(memory);
2304
+ const coverages = getCoverageLimits(memory);
2305
+ const endorsements = readMemoryRecord(memory, "endorsements");
2306
+ const exclusions = readMemoryRecord(memory, "exclusions");
2307
+ const conditions = readMemoryRecord(memory, "conditions");
2308
+ const premium = readMemoryRecord(memory, "premium_breakdown");
2309
+ const declarations = readMemoryRecord(memory, "declarations");
2310
+ const lossHistory = readMemoryRecord(memory, "loss_history");
2311
+ const supplementary = readMemoryRecord(memory, "supplementary");
2312
+ const formInventory = readMemoryRecord(memory, "form_inventory");
2313
+ const classify = readMemoryRecord(memory, "classify");
2314
+ const lossPayees = readRecordArray(insured, "lossPayees");
2315
+ const mortgageHolders = readRecordArray(insured, "mortgageHolders");
2242
2316
  const base = {
2243
2317
  id: documentId,
2244
- carrier: carrier?.carrierName ?? "Unknown",
2245
- insuredName: insured?.insuredName ?? "Unknown",
2246
- coverages: coverages?.coverages ?? [],
2247
- policyTypes: classify?.policyTypes,
2318
+ carrier: readRecordValue(carrier, "carrierName") ?? "Unknown",
2319
+ insuredName: readRecordValue(insured, "insuredName") ?? "Unknown",
2320
+ coverages: getCoverageLimitCoverages(memory),
2321
+ policyTypes: readRecordValue(classify, "policyTypes"),
2248
2322
  ...sanitizeNulls(carrier ?? {}),
2249
2323
  ...sanitizeNulls(insured ?? {}),
2250
2324
  // Map named_insured extractor's loss payees/mortgage holders to EndorsementParty shape
2251
- ...Array.isArray(insured?.lossPayees) && insured.lossPayees.length > 0 ? { lossPayees: insured.lossPayees.map((lp) => ({ ...lp, role: "loss_payee" })) } : {},
2252
- ...Array.isArray(insured?.mortgageHolders) && insured.mortgageHolders.length > 0 ? { mortgageHolders: insured.mortgageHolders.map((mh) => ({ ...mh, role: "mortgage_holder" })) } : {},
2325
+ ...lossPayees && lossPayees.length > 0 ? { lossPayees: lossPayees.map((lp) => ({ ...lp, role: "loss_payee" })) } : {},
2326
+ ...mortgageHolders && mortgageHolders.length > 0 ? {
2327
+ mortgageHolders: mortgageHolders.map((mh) => ({
2328
+ ...mh,
2329
+ role: "mortgage_holder"
2330
+ }))
2331
+ } : {},
2253
2332
  ...sanitizeNulls(coverages ?? {}),
2254
2333
  ...sanitizeNulls(premium ?? {}),
2255
2334
  ...sanitizeNulls(supplementary ?? {}),
2256
- supplementaryFacts: supplementary?.auxiliaryFacts,
2257
- endorsements: endorsements?.endorsements,
2258
- exclusions: exclusions?.exclusions,
2259
- conditions: conditions?.conditions,
2260
- sections: sections?.sections,
2261
- formInventory: formInventory?.forms,
2262
- definitions: definitions?.definitions,
2263
- coveredReasons: coveredReasons?.coveredReasons ?? coveredReasons?.covered_reasons,
2335
+ supplementaryFacts: readRecordValue(supplementary, "auxiliaryFacts"),
2336
+ endorsements: readRecordValue(endorsements, "endorsements"),
2337
+ exclusions: readRecordValue(exclusions, "exclusions"),
2338
+ conditions: readRecordValue(conditions, "conditions"),
2339
+ sections: getSections(memory),
2340
+ formInventory: readRecordValue(formInventory, "forms"),
2341
+ definitions: getDefinitions(memory),
2342
+ coveredReasons: getCoveredReasons(memory),
2264
2343
  declarations: declarations ? sanitizeNulls(declarations) : void 0,
2265
2344
  ...sanitizeNulls(lossHistory ?? {})
2266
2345
  };
@@ -2269,21 +2348,21 @@ function assembleDocument(documentId, documentType, memory) {
2269
2348
  doc = {
2270
2349
  ...base,
2271
2350
  type: "policy",
2272
- policyNumber: carrier?.policyNumber ?? insured?.policyNumber ?? "Unknown",
2273
- effectiveDate: carrier?.effectiveDate ?? insured?.effectiveDate ?? "Unknown",
2274
- expirationDate: carrier?.expirationDate,
2275
- policyTermType: carrier?.policyTermType
2351
+ policyNumber: readRecordValue(carrier, "policyNumber") ?? readRecordValue(insured, "policyNumber") ?? "Unknown",
2352
+ effectiveDate: readRecordValue(carrier, "effectiveDate") ?? readRecordValue(insured, "effectiveDate") ?? "Unknown",
2353
+ expirationDate: readRecordValue(carrier, "expirationDate"),
2354
+ policyTermType: readRecordValue(carrier, "policyTermType")
2276
2355
  };
2277
2356
  } else {
2278
2357
  doc = {
2279
2358
  ...base,
2280
2359
  type: "quote",
2281
- quoteNumber: carrier?.quoteNumber ?? "Unknown",
2282
- proposedEffectiveDate: carrier?.proposedEffectiveDate,
2283
- proposedExpirationDate: carrier?.proposedExpirationDate,
2284
- subjectivities: coverages?.subjectivities,
2285
- underwritingConditions: coverages?.underwritingConditions,
2286
- premiumBreakdown: premium?.premiumBreakdown
2360
+ quoteNumber: readRecordValue(carrier, "quoteNumber") ?? "Unknown",
2361
+ proposedEffectiveDate: readRecordValue(carrier, "proposedEffectiveDate"),
2362
+ proposedExpirationDate: readRecordValue(carrier, "proposedExpirationDate"),
2363
+ subjectivities: readRecordValue(coverages, "subjectivities"),
2364
+ underwritingConditions: readRecordValue(coverages, "underwritingConditions"),
2365
+ premiumBreakdown: readRecordValue(premium, "premiumBreakdown")
2287
2366
  };
2288
2367
  }
2289
2368
  promoteExtractedFields(doc);
@@ -2385,6 +2464,23 @@ ${block}`;
2385
2464
  }
2386
2465
 
2387
2466
  // src/extraction/formatter.ts
2467
+ var LONG_CONTENT_THRESHOLD = 1200;
2468
+ function shouldFormatContent(text) {
2469
+ const trimmed = text.trim();
2470
+ if (trimmed.length === 0) return false;
2471
+ if (trimmed.length >= LONG_CONTENT_THRESHOLD) return true;
2472
+ if (/```|~~~|<br\s*\/?>/i.test(trimmed)) return true;
2473
+ if (/(^|\s)(\*\*|__|`)/.test(trimmed)) return true;
2474
+ if (/!?\[[^\]]+\]\([^)]+\)/.test(trimmed)) return true;
2475
+ if (/^\s{0,3}#{1,6}\s*\S/m.test(trimmed)) return true;
2476
+ if (/^\s{0,6}(?:[-*+]|\d+[.)])\s+\S/m.test(trimmed)) return true;
2477
+ if (/\t|[^\S\r\n]{3,}|\n{3,}|[ \t]+$/m.test(text)) return true;
2478
+ const lines = trimmed.split(/\r?\n/).map((line) => line.trim()).filter(Boolean);
2479
+ if (lines.some((line) => (line.match(/\|/g)?.length ?? 0) >= 2)) return true;
2480
+ const spaceAlignedRows = lines.filter((line) => /\S\s{2,}\S\s{2,}\S/.test(line));
2481
+ if (spaceAlignedRows.length >= 2) return true;
2482
+ return false;
2483
+ }
2388
2484
  function collectContentFields(doc) {
2389
2485
  const entries = [];
2390
2486
  let id = 0;
@@ -2488,7 +2584,7 @@ function applyFormattedContent(doc, entries, formatted) {
2488
2584
  }
2489
2585
  var MAX_ENTRIES_PER_BATCH = 20;
2490
2586
  async function formatDocumentContent(doc, generateText, options) {
2491
- const entries = collectContentFields(doc);
2587
+ const entries = collectContentFields(doc).filter((entry) => shouldFormatContent(entry.text));
2492
2588
  const totalUsage = { inputTokens: 0, outputTokens: 0 };
2493
2589
  if (entries.length === 0) {
2494
2590
  return { document: doc, usage: totalUsage };
@@ -2565,11 +2661,22 @@ function chunkDocument(doc) {
2565
2661
  if (policyTypesStr) base.policyTypes = policyTypesStr;
2566
2662
  return base;
2567
2663
  }
2568
- chunks.push({
2569
- id: `${docId}:carrier_info:0`,
2570
- documentId: docId,
2571
- type: "carrier_info",
2572
- text: [
2664
+ function lines(values) {
2665
+ return values.filter(Boolean).join("\n");
2666
+ }
2667
+ function pushChunk(idSuffix, type, text, metadata) {
2668
+ chunks.push({
2669
+ id: `${docId}:${idSuffix}`,
2670
+ documentId: docId,
2671
+ type,
2672
+ text,
2673
+ metadata: stringMetadata(metadata)
2674
+ });
2675
+ }
2676
+ pushChunk(
2677
+ "carrier_info:0",
2678
+ "carrier_info",
2679
+ lines([
2573
2680
  `Carrier: ${doc.carrier}`,
2574
2681
  doc.carrierLegalName ? `Legal Name: ${doc.carrierLegalName}` : null,
2575
2682
  doc.carrierNaicNumber ? `NAIC: ${doc.carrierNaicNumber}` : null,
@@ -2586,94 +2693,83 @@ function chunkDocument(doc) {
2586
2693
  doc.isPackage != null ? `Package: ${doc.isPackage ? "Yes" : "No"}` : null,
2587
2694
  doc.security ? `Security: ${doc.security}` : null,
2588
2695
  doc.policyTypes?.length ? `Policy Types: ${doc.policyTypes.join(", ")}` : null
2589
- ].filter(Boolean).join("\n"),
2590
- metadata: stringMetadata({ carrier: doc.carrier, documentType: doc.type })
2591
- });
2696
+ ]),
2697
+ { carrier: doc.carrier, documentType: doc.type }
2698
+ );
2592
2699
  if (doc.summary) {
2593
- chunks.push({
2594
- id: `${docId}:declaration:summary`,
2595
- documentId: docId,
2596
- type: "declaration",
2597
- text: `Policy Summary: ${doc.summary}`,
2598
- metadata: stringMetadata({ documentType: doc.type })
2599
- });
2700
+ pushChunk("declaration:summary", "declaration", `Policy Summary: ${doc.summary}`, { documentType: doc.type });
2600
2701
  }
2601
2702
  if (doc.type === "policy") {
2602
2703
  const pol = doc;
2603
- chunks.push({
2604
- id: `${docId}:declaration:policy_details`,
2605
- documentId: docId,
2606
- type: "declaration",
2607
- text: [
2704
+ pushChunk(
2705
+ "declaration:policy_details",
2706
+ "declaration",
2707
+ lines([
2608
2708
  `Policy Number: ${pol.policyNumber}`,
2609
2709
  `Effective Date: ${pol.effectiveDate}`,
2610
2710
  pol.expirationDate ? `Expiration Date: ${pol.expirationDate}` : null,
2611
2711
  pol.policyTermType ? `Term Type: ${pol.policyTermType}` : null,
2612
2712
  pol.effectiveTime ? `Effective Time: ${pol.effectiveTime}` : null,
2613
2713
  pol.nextReviewDate ? `Next Review Date: ${pol.nextReviewDate}` : null
2614
- ].filter(Boolean).join("\n"),
2615
- metadata: stringMetadata({
2714
+ ]),
2715
+ {
2616
2716
  policyNumber: pol.policyNumber,
2617
2717
  effectiveDate: pol.effectiveDate,
2618
2718
  expirationDate: pol.expirationDate,
2619
2719
  documentType: doc.type
2620
- })
2621
- });
2720
+ }
2721
+ );
2622
2722
  } else {
2623
2723
  const quote = doc;
2624
- chunks.push({
2625
- id: `${docId}:declaration:quote_details`,
2626
- documentId: docId,
2627
- type: "declaration",
2628
- text: [
2724
+ pushChunk(
2725
+ "declaration:quote_details",
2726
+ "declaration",
2727
+ lines([
2629
2728
  `Quote Number: ${quote.quoteNumber}`,
2630
2729
  quote.proposedEffectiveDate ? `Proposed Effective Date: ${quote.proposedEffectiveDate}` : null,
2631
2730
  quote.proposedExpirationDate ? `Proposed Expiration Date: ${quote.proposedExpirationDate}` : null,
2632
2731
  quote.quoteExpirationDate ? `Quote Expiration Date: ${quote.quoteExpirationDate}` : null
2633
- ].filter(Boolean).join("\n"),
2634
- metadata: stringMetadata({
2732
+ ]),
2733
+ {
2635
2734
  quoteNumber: quote.quoteNumber,
2636
2735
  documentType: doc.type
2637
- })
2638
- });
2736
+ }
2737
+ );
2639
2738
  }
2640
2739
  if (doc.insurer) {
2641
- chunks.push({
2642
- id: `${docId}:party:insurer`,
2643
- documentId: docId,
2644
- type: "party",
2645
- text: [
2740
+ pushChunk(
2741
+ "party:insurer",
2742
+ "party",
2743
+ lines([
2646
2744
  `Insurer: ${doc.insurer.legalName}`,
2647
2745
  doc.insurer.naicNumber ? `NAIC: ${doc.insurer.naicNumber}` : null,
2648
2746
  doc.insurer.amBestRating ? `AM Best Rating: ${doc.insurer.amBestRating}` : null,
2649
2747
  doc.insurer.amBestNumber ? `AM Best Number: ${doc.insurer.amBestNumber}` : null,
2650
2748
  doc.insurer.admittedStatus ? `Admitted Status: ${doc.insurer.admittedStatus}` : null,
2651
2749
  doc.insurer.stateOfDomicile ? `State of Domicile: ${doc.insurer.stateOfDomicile}` : null
2652
- ].filter(Boolean).join("\n"),
2653
- metadata: stringMetadata({ partyRole: "insurer", partyName: doc.insurer.legalName, documentType: doc.type })
2654
- });
2750
+ ]),
2751
+ { partyRole: "insurer", partyName: doc.insurer.legalName, documentType: doc.type }
2752
+ );
2655
2753
  }
2656
2754
  if (doc.producer) {
2657
- chunks.push({
2658
- id: `${docId}:party:producer`,
2659
- documentId: docId,
2660
- type: "party",
2661
- text: [
2755
+ pushChunk(
2756
+ "party:producer",
2757
+ "party",
2758
+ lines([
2662
2759
  `Producer/Broker: ${doc.producer.agencyName}`,
2663
2760
  doc.producer.contactName ? `Contact: ${doc.producer.contactName}` : null,
2664
2761
  doc.producer.licenseNumber ? `License: ${doc.producer.licenseNumber}` : null,
2665
2762
  doc.producer.phone ? `Phone: ${doc.producer.phone}` : null,
2666
2763
  doc.producer.email ? `Email: ${doc.producer.email}` : null,
2667
2764
  doc.producer.address ? `Address: ${formatAddress(doc.producer.address)}` : null
2668
- ].filter(Boolean).join("\n"),
2669
- metadata: stringMetadata({ partyRole: "producer", partyName: doc.producer.agencyName, documentType: doc.type })
2670
- });
2765
+ ]),
2766
+ { partyRole: "producer", partyName: doc.producer.agencyName, documentType: doc.type }
2767
+ );
2671
2768
  }
2672
- chunks.push({
2673
- id: `${docId}:named_insured:0`,
2674
- documentId: docId,
2675
- type: "named_insured",
2676
- text: [
2769
+ pushChunk(
2770
+ "named_insured:0",
2771
+ "named_insured",
2772
+ lines([
2677
2773
  `Insured: ${doc.insuredName}`,
2678
2774
  doc.insuredDba ? `DBA: ${doc.insuredDba}` : null,
2679
2775
  doc.insuredEntityType ? `Entity Type: ${doc.insuredEntityType}` : null,
@@ -2681,36 +2777,34 @@ function chunkDocument(doc) {
2681
2777
  doc.insuredSicCode ? `SIC: ${doc.insuredSicCode}` : null,
2682
2778
  doc.insuredNaicsCode ? `NAICS: ${doc.insuredNaicsCode}` : null,
2683
2779
  doc.insuredAddress ? `Address: ${formatAddress(doc.insuredAddress)}` : null
2684
- ].filter(Boolean).join("\n"),
2685
- metadata: stringMetadata({ insuredName: doc.insuredName, documentType: doc.type })
2686
- });
2780
+ ]),
2781
+ { insuredName: doc.insuredName, documentType: doc.type }
2782
+ );
2687
2783
  doc.additionalNamedInsureds?.forEach((insured, i) => {
2688
- chunks.push({
2689
- id: `${docId}:named_insured:${i + 1}`,
2690
- documentId: docId,
2691
- type: "named_insured",
2692
- text: [
2784
+ pushChunk(
2785
+ `named_insured:${i + 1}`,
2786
+ "named_insured",
2787
+ lines([
2693
2788
  `Additional Named Insured: ${insured.name}`,
2694
2789
  insured.address ? `Address: ${formatAddress(insured.address)}` : null,
2695
2790
  insured.relationship ? `Relationship: ${insured.relationship}` : null
2696
- ].filter(Boolean).join("\n"),
2697
- metadata: stringMetadata({ insuredName: insured.name, role: "additional_named_insured", documentType: doc.type })
2698
- });
2791
+ ]),
2792
+ { insuredName: insured.name, role: "additional_named_insured", documentType: doc.type }
2793
+ );
2699
2794
  });
2700
2795
  doc.coverages.forEach((cov, i) => {
2701
- chunks.push({
2702
- id: `${docId}:coverage:${i}`,
2703
- documentId: docId,
2704
- type: "coverage",
2705
- text: [
2796
+ pushChunk(
2797
+ `coverage:${i}`,
2798
+ "coverage",
2799
+ lines([
2706
2800
  `Coverage: ${cov.name}`,
2707
2801
  `Limit: ${cov.limit}`,
2708
2802
  cov.limitValueType ? `Limit Type: ${cov.limitValueType}` : null,
2709
2803
  cov.deductible ? `Deductible: ${cov.deductible}` : null,
2710
2804
  cov.deductibleValueType ? `Deductible Type: ${cov.deductibleValueType}` : null,
2711
2805
  cov.originalContent ? `Source: ${cov.originalContent}` : null
2712
- ].filter(Boolean).join("\n"),
2713
- metadata: stringMetadata({
2806
+ ]),
2807
+ {
2714
2808
  coverageName: cov.name,
2715
2809
  limit: cov.limit,
2716
2810
  limitValueType: cov.limitValueType,
@@ -2720,15 +2814,14 @@ function chunkDocument(doc) {
2720
2814
  pageNumber: cov.pageNumber,
2721
2815
  sectionRef: cov.sectionRef,
2722
2816
  documentType: doc.type
2723
- })
2724
- });
2817
+ }
2818
+ );
2725
2819
  });
2726
2820
  doc.enrichedCoverages?.forEach((cov, i) => {
2727
- chunks.push({
2728
- id: `${docId}:coverage:enriched:${i}`,
2729
- documentId: docId,
2730
- type: "coverage",
2731
- text: [
2821
+ pushChunk(
2822
+ `coverage:enriched:${i}`,
2823
+ "coverage",
2824
+ lines([
2732
2825
  `Coverage: ${cov.name}`,
2733
2826
  cov.coverageCode ? `Code: ${cov.coverageCode}` : null,
2734
2827
  `Limit: ${cov.limit}`,
@@ -2745,8 +2838,8 @@ function chunkDocument(doc) {
2745
2838
  `Included: ${cov.included ? "Yes" : "No"}`,
2746
2839
  cov.premium ? `Premium: ${cov.premium}` : null,
2747
2840
  cov.originalContent ? `Source: ${cov.originalContent}` : null
2748
- ].filter(Boolean).join("\n"),
2749
- metadata: stringMetadata({
2841
+ ]),
2842
+ {
2750
2843
  coverageName: cov.name,
2751
2844
  coverageCode: cov.coverageCode,
2752
2845
  limit: cov.limit,
@@ -2755,8 +2848,8 @@ function chunkDocument(doc) {
2755
2848
  pageNumber: cov.pageNumber,
2756
2849
  included: cov.included,
2757
2850
  documentType: doc.type
2758
- })
2759
- });
2851
+ }
2852
+ );
2760
2853
  });
2761
2854
  if (doc.limits) {
2762
2855
  const limitLines = ["Limit Schedule"];
@@ -2780,39 +2873,31 @@ function chunkDocument(doc) {
2780
2873
  limitLines.push(`Employers Liability \u2014 Each Accident: ${lim.employersLiability.eachAccident}, Disease Policy Limit: ${lim.employersLiability.diseasePolicyLimit}, Disease Each Employee: ${lim.employersLiability.diseaseEachEmployee}`);
2781
2874
  }
2782
2875
  if (lim.defenseCostTreatment) limitLines.push(`Defense Cost Treatment: ${lim.defenseCostTreatment}`);
2783
- chunks.push({
2784
- id: `${docId}:coverage:limit_schedule`,
2785
- documentId: docId,
2786
- type: "coverage",
2787
- text: limitLines.join("\n"),
2788
- metadata: stringMetadata({ coverageName: "limit_schedule", documentType: doc.type })
2789
- });
2876
+ pushChunk("coverage:limit_schedule", "coverage", limitLines.join("\n"), { coverageName: "limit_schedule", documentType: doc.type });
2790
2877
  lim.sublimits?.forEach((sub, i) => {
2791
- chunks.push({
2792
- id: `${docId}:coverage:sublimit:${i}`,
2793
- documentId: docId,
2794
- type: "coverage",
2795
- text: [
2878
+ pushChunk(
2879
+ `coverage:sublimit:${i}`,
2880
+ "coverage",
2881
+ lines([
2796
2882
  `Sublimit: ${sub.name}`,
2797
2883
  `Limit: ${sub.limit}`,
2798
2884
  sub.appliesTo ? `Applies To: ${sub.appliesTo}` : null,
2799
2885
  sub.deductible ? `Deductible: ${sub.deductible}` : null
2800
- ].filter(Boolean).join("\n"),
2801
- metadata: stringMetadata({ coverageName: sub.name, limit: sub.limit, documentType: doc.type })
2802
- });
2886
+ ]),
2887
+ { coverageName: sub.name, limit: sub.limit, documentType: doc.type }
2888
+ );
2803
2889
  });
2804
2890
  lim.sharedLimits?.forEach((sl, i) => {
2805
- chunks.push({
2806
- id: `${docId}:coverage:shared_limit:${i}`,
2807
- documentId: docId,
2808
- type: "coverage",
2809
- text: [
2891
+ pushChunk(
2892
+ `coverage:shared_limit:${i}`,
2893
+ "coverage",
2894
+ [
2810
2895
  `Shared Limit: ${sl.description}`,
2811
2896
  `Limit: ${sl.limit}`,
2812
2897
  `Coverage Parts: ${sl.coverageParts.join(", ")}`
2813
2898
  ].join("\n"),
2814
- metadata: stringMetadata({ coverageName: sl.description, limit: sl.limit, documentType: doc.type })
2815
- });
2899
+ { coverageName: sl.description, limit: sl.limit, documentType: doc.type }
2900
+ );
2816
2901
  });
2817
2902
  }
2818
2903
  if (doc.deductibles) {
@@ -2826,12 +2911,9 @@ function chunkDocument(doc) {
2826
2911
  if (ded.waitingPeriod) dedLines.push(`Waiting Period: ${ded.waitingPeriod}`);
2827
2912
  if (ded.appliesTo) dedLines.push(`Applies To: ${ded.appliesTo}`);
2828
2913
  if (dedLines.length > 1) {
2829
- chunks.push({
2830
- id: `${docId}:coverage:deductible_schedule`,
2831
- documentId: docId,
2832
- type: "coverage",
2833
- text: dedLines.join("\n"),
2834
- metadata: stringMetadata({ coverageName: "deductible_schedule", documentType: doc.type })
2914
+ pushChunk("coverage:deductible_schedule", "coverage", dedLines.join("\n"), {
2915
+ coverageName: "deductible_schedule",
2916
+ documentType: doc.type
2835
2917
  });
2836
2918
  }
2837
2919
  }
@@ -2843,99 +2925,90 @@ function chunkDocument(doc) {
2843
2925
  doc.extendedReportingPeriod?.supplementalPremium ? `Extended Reporting Period Premium: ${doc.extendedReportingPeriod.supplementalPremium}` : null
2844
2926
  ].filter(Boolean);
2845
2927
  if (claimsMadeLines.length > 0) {
2846
- chunks.push({
2847
- id: `${docId}:coverage:claims_made_details`,
2848
- documentId: docId,
2849
- type: "coverage",
2850
- text: claimsMadeLines.join("\n"),
2851
- metadata: stringMetadata({ coverageName: "claims_made_details", documentType: doc.type })
2928
+ pushChunk("coverage:claims_made_details", "coverage", claimsMadeLines.join("\n"), {
2929
+ coverageName: "claims_made_details",
2930
+ documentType: doc.type
2852
2931
  });
2853
2932
  }
2854
2933
  doc.formInventory?.forEach((form, i) => {
2855
- chunks.push({
2856
- id: `${docId}:declaration:form:${i}`,
2857
- documentId: docId,
2858
- type: "declaration",
2859
- text: [
2934
+ pushChunk(
2935
+ `declaration:form:${i}`,
2936
+ "declaration",
2937
+ lines([
2860
2938
  `Form: ${form.formNumber}`,
2861
2939
  form.title ? `Title: ${form.title}` : null,
2862
2940
  `Type: ${form.formType}`,
2863
2941
  form.editionDate ? `Edition: ${form.editionDate}` : null,
2864
2942
  form.pageStart ? `Pages: ${form.pageStart}${form.pageEnd ? `-${form.pageEnd}` : ""}` : null
2865
- ].filter(Boolean).join("\n"),
2866
- metadata: stringMetadata({
2943
+ ]),
2944
+ {
2867
2945
  formNumber: form.formNumber,
2868
2946
  formType: form.formType,
2869
2947
  documentType: doc.type
2870
- })
2871
- });
2948
+ }
2949
+ );
2872
2950
  });
2873
2951
  doc.endorsements?.forEach((end, i) => {
2874
- chunks.push({
2875
- id: `${docId}:endorsement:${i}`,
2876
- documentId: docId,
2877
- type: "endorsement",
2878
- text: `Endorsement: ${end.title}
2952
+ pushChunk(
2953
+ `endorsement:${i}`,
2954
+ "endorsement",
2955
+ `Endorsement: ${end.title}
2879
2956
  ${end.content}`.trim(),
2880
- metadata: stringMetadata({
2957
+ {
2881
2958
  endorsementType: end.endorsementType,
2882
2959
  formNumber: end.formNumber,
2883
2960
  pageStart: end.pageStart,
2884
2961
  pageEnd: end.pageEnd,
2885
2962
  documentType: doc.type
2886
- })
2887
- });
2963
+ }
2964
+ );
2888
2965
  });
2889
2966
  doc.exclusions?.forEach((exc, i) => {
2890
- chunks.push({
2891
- id: `${docId}:exclusion:${i}`,
2892
- documentId: docId,
2893
- type: "exclusion",
2894
- text: `Exclusion: ${exc.name}
2895
- ${exc.content}`.trim(),
2896
- metadata: stringMetadata({ formNumber: exc.formNumber, pageNumber: exc.pageNumber, documentType: doc.type })
2967
+ pushChunk(`exclusion:${i}`, "exclusion", `Exclusion: ${exc.name}
2968
+ ${exc.content}`.trim(), {
2969
+ formNumber: exc.formNumber,
2970
+ pageNumber: exc.pageNumber,
2971
+ documentType: doc.type
2897
2972
  });
2898
2973
  });
2899
2974
  doc.conditions?.forEach((cond, i) => {
2900
- chunks.push({
2901
- id: `${docId}:condition:${i}`,
2902
- documentId: docId,
2903
- type: "condition",
2904
- text: [
2975
+ pushChunk(
2976
+ `condition:${i}`,
2977
+ "condition",
2978
+ [
2905
2979
  `Condition: ${cond.name}`,
2906
2980
  `Type: ${cond.conditionType}`,
2907
2981
  cond.content,
2908
2982
  ...cond.keyValues?.map((kv) => `${kv.key}: ${kv.value}`) ?? []
2909
2983
  ].join("\n"),
2910
- metadata: stringMetadata({
2984
+ {
2911
2985
  conditionName: cond.name,
2912
2986
  conditionType: cond.conditionType,
2913
2987
  pageNumber: cond.pageNumber,
2914
2988
  documentType: doc.type
2915
- })
2916
- });
2989
+ }
2990
+ );
2917
2991
  });
2918
2992
  asRecordArray(extendedDoc.definitions).forEach((definition, i) => {
2919
2993
  const term = firstString(definition, ["term", "name", "title"]) ?? `Definition ${i + 1}`;
2920
2994
  const body = firstString(definition, ["definition", "content", "text", "meaning"]);
2921
- chunks.push({
2922
- id: `${docId}:definition:${i}`,
2923
- documentId: docId,
2924
- type: "definition",
2925
- text: [
2995
+ pushChunk(
2996
+ `definition:${i}`,
2997
+ "definition",
2998
+ lines([
2926
2999
  `Definition: ${term}`,
2927
3000
  body,
2928
3001
  firstString(definition, ["originalContent", "source"]) ? `Source: ${firstString(definition, ["originalContent", "source"])}` : null
2929
- ].filter(Boolean).join("\n"),
2930
- metadata: stringMetadata({
3002
+ ]),
3003
+ {
2931
3004
  term,
2932
3005
  formNumber: firstString(definition, ["formNumber"]),
2933
3006
  formTitle: firstString(definition, ["formTitle"]),
2934
3007
  pageNumber: typeof definition.pageNumber === "number" ? definition.pageNumber : void 0,
2935
3008
  sectionRef: firstString(definition, ["sectionRef", "sectionTitle"]),
2936
3009
  documentType: doc.type
2937
- })
2938
- });
3010
+ }
3011
+ );
2939
3012
  });
2940
3013
  const coveredReasons = asRecordArray(extendedDoc.coveredReasons ?? extendedDoc.covered_reasons);
2941
3014
  coveredReasons.forEach((coveredReason, i) => {
@@ -2943,18 +3016,17 @@ ${exc.content}`.trim(),
2943
3016
  const coverageName = firstString(coveredReason, ["coverageName", "coverage", "coveragePart"]);
2944
3017
  const reasonNumber = firstString(coveredReason, ["reasonNumber", "number"]);
2945
3018
  const body = firstString(coveredReason, ["content", "description", "text", "coverageGrant"]);
2946
- chunks.push({
2947
- id: `${docId}:covered_reason:${i}`,
2948
- documentId: docId,
2949
- type: "covered_reason",
2950
- text: [
3019
+ pushChunk(
3020
+ `covered_reason:${i}`,
3021
+ "covered_reason",
3022
+ lines([
2951
3023
  coverageName ? `Coverage: ${coverageName}` : null,
2952
3024
  reasonNumber ? `Reason Number: ${reasonNumber}` : null,
2953
3025
  `Covered Reason: ${title}`,
2954
3026
  body,
2955
3027
  firstString(coveredReason, ["originalContent", "source"]) ? `Source: ${firstString(coveredReason, ["originalContent", "source"])}` : null
2956
- ].filter(Boolean).join("\n"),
2957
- metadata: stringMetadata({
3028
+ ]),
3029
+ {
2958
3030
  coverageName,
2959
3031
  reasonNumber,
2960
3032
  title,
@@ -2963,21 +3035,20 @@ ${exc.content}`.trim(),
2963
3035
  pageNumber: typeof coveredReason.pageNumber === "number" ? coveredReason.pageNumber : void 0,
2964
3036
  sectionRef: firstString(coveredReason, ["sectionRef", "sectionTitle"]),
2965
3037
  documentType: doc.type
2966
- })
2967
- });
3038
+ }
3039
+ );
2968
3040
  const conditions = Array.isArray(coveredReason.conditions) ? coveredReason.conditions.filter((condition) => typeof condition === "string" && condition.trim().length > 0) : [];
2969
3041
  conditions.forEach((condition, conditionIndex) => {
2970
- chunks.push({
2971
- id: `${docId}:covered_reason:${i}:condition:${conditionIndex}`,
2972
- documentId: docId,
2973
- type: "covered_reason",
2974
- text: [
3042
+ pushChunk(
3043
+ `covered_reason:${i}:condition:${conditionIndex}`,
3044
+ "covered_reason",
3045
+ lines([
2975
3046
  coverageName ? `Coverage: ${coverageName}` : null,
2976
3047
  reasonNumber ? `Reason Number: ${reasonNumber}` : null,
2977
3048
  `Covered Reason Condition: ${title}`,
2978
3049
  condition
2979
- ].filter(Boolean).join("\n"),
2980
- metadata: stringMetadata({
3050
+ ]),
3051
+ {
2981
3052
  coverageName,
2982
3053
  reasonNumber,
2983
3054
  title,
@@ -2987,8 +3058,8 @@ ${exc.content}`.trim(),
2987
3058
  pageNumber: typeof coveredReason.pageNumber === "number" ? coveredReason.pageNumber : void 0,
2988
3059
  sectionRef: firstString(coveredReason, ["sectionRef", "sectionTitle"]),
2989
3060
  documentType: doc.type
2990
- })
2991
- });
3061
+ }
3062
+ );
2992
3063
  });
2993
3064
  });
2994
3065
  if (doc.declarations) {
@@ -3003,50 +3074,42 @@ ${exc.content}`.trim(),
3003
3074
  const declMeta = { documentType: doc.type };
3004
3075
  if (typeof decl.formType === "string") declMeta.formType = decl.formType;
3005
3076
  if (typeof decl.line === "string") declMeta.declarationLine = decl.line;
3006
- chunks.push({
3007
- id: `${docId}:declaration:0`,
3008
- documentId: docId,
3009
- type: "declaration",
3010
- text: `Declarations
3011
- ${declLines.join("\n")}`,
3012
- metadata: stringMetadata(declMeta)
3013
- });
3077
+ pushChunk("declaration:0", "declaration", `Declarations
3078
+ ${declLines.join("\n")}`, declMeta);
3014
3079
  }
3015
3080
  }
3016
3081
  doc.sections?.forEach((sec, i) => {
3017
3082
  const hasSubsections = sec.subsections && sec.subsections.length > 0;
3018
3083
  const contentLength = sec.content.length;
3019
3084
  if (hasSubsections) {
3020
- chunks.push({
3021
- id: `${docId}:section:${i}`,
3022
- documentId: docId,
3023
- type: "section",
3024
- text: `Section: ${sec.title}
3085
+ pushChunk(
3086
+ `section:${i}`,
3087
+ "section",
3088
+ `Section: ${sec.title}
3025
3089
  ${sec.content}`,
3026
- metadata: stringMetadata({
3090
+ {
3027
3091
  sectionType: sec.type,
3028
3092
  sectionNumber: sec.sectionNumber,
3029
3093
  pageStart: sec.pageStart,
3030
3094
  pageEnd: sec.pageEnd,
3031
3095
  documentType: doc.type,
3032
3096
  hasSubsections: "true"
3033
- })
3034
- });
3097
+ }
3098
+ );
3035
3099
  sec.subsections.forEach((sub, j) => {
3036
- chunks.push({
3037
- id: `${docId}:section:${i}:sub:${j}`,
3038
- documentId: docId,
3039
- type: "section",
3040
- text: `${sec.title} > ${sub.title}
3100
+ pushChunk(
3101
+ `section:${i}:sub:${j}`,
3102
+ "section",
3103
+ `${sec.title} > ${sub.title}
3041
3104
  ${sub.content}`,
3042
- metadata: stringMetadata({
3105
+ {
3043
3106
  sectionType: sec.type,
3044
3107
  parentSection: sec.title,
3045
3108
  sectionNumber: sub.sectionNumber,
3046
3109
  pageNumber: sub.pageNumber,
3047
3110
  documentType: doc.type
3048
- })
3049
- });
3111
+ }
3112
+ );
3050
3113
  });
3051
3114
  } else if (contentLength > 2e3) {
3052
3115
  const paragraphs = sec.content.split(/\n\n+/);
@@ -3054,58 +3117,55 @@ ${sub.content}`,
3054
3117
  let chunkIndex = 0;
3055
3118
  for (const para of paragraphs) {
3056
3119
  if (currentChunk.length + para.length > 1e3 && currentChunk.length > 0) {
3057
- chunks.push({
3058
- id: `${docId}:section:${i}:part:${chunkIndex}`,
3059
- documentId: docId,
3060
- type: "section",
3061
- text: `Section: ${sec.title} (part ${chunkIndex + 1})
3120
+ pushChunk(
3121
+ `section:${i}:part:${chunkIndex}`,
3122
+ "section",
3123
+ `Section: ${sec.title} (part ${chunkIndex + 1})
3062
3124
  ${currentChunk.trim()}`,
3063
- metadata: stringMetadata({
3125
+ {
3064
3126
  sectionType: sec.type,
3065
3127
  sectionNumber: sec.sectionNumber,
3066
3128
  pageStart: sec.pageStart,
3067
3129
  pageEnd: sec.pageEnd,
3068
3130
  documentType: doc.type,
3069
3131
  partIndex: chunkIndex
3070
- })
3071
- });
3132
+ }
3133
+ );
3072
3134
  currentChunk = "";
3073
3135
  chunkIndex++;
3074
3136
  }
3075
3137
  currentChunk += (currentChunk ? "\n\n" : "") + para;
3076
3138
  }
3077
3139
  if (currentChunk.trim()) {
3078
- chunks.push({
3079
- id: `${docId}:section:${i}:part:${chunkIndex}`,
3080
- documentId: docId,
3081
- type: "section",
3082
- text: `Section: ${sec.title} (part ${chunkIndex + 1})
3140
+ pushChunk(
3141
+ `section:${i}:part:${chunkIndex}`,
3142
+ "section",
3143
+ `Section: ${sec.title} (part ${chunkIndex + 1})
3083
3144
  ${currentChunk.trim()}`,
3084
- metadata: stringMetadata({
3145
+ {
3085
3146
  sectionType: sec.type,
3086
3147
  sectionNumber: sec.sectionNumber,
3087
3148
  pageStart: sec.pageStart,
3088
3149
  pageEnd: sec.pageEnd,
3089
3150
  documentType: doc.type,
3090
3151
  partIndex: chunkIndex
3091
- })
3092
- });
3152
+ }
3153
+ );
3093
3154
  }
3094
3155
  } else {
3095
- chunks.push({
3096
- id: `${docId}:section:${i}`,
3097
- documentId: docId,
3098
- type: "section",
3099
- text: `Section: ${sec.title}
3156
+ pushChunk(
3157
+ `section:${i}`,
3158
+ "section",
3159
+ `Section: ${sec.title}
3100
3160
  ${sec.content}`,
3101
- metadata: stringMetadata({
3161
+ {
3102
3162
  sectionType: sec.type,
3103
3163
  sectionNumber: sec.sectionNumber,
3104
3164
  pageStart: sec.pageStart,
3105
3165
  pageEnd: sec.pageEnd,
3106
3166
  documentType: doc.type
3107
- })
3108
- });
3167
+ }
3168
+ );
3109
3169
  }
3110
3170
  });
3111
3171
  doc.locations?.forEach((loc, i) => {
@@ -4683,12 +4743,15 @@ var ReviewResultSchema = z21.object({
4683
4743
  description: z21.string()
4684
4744
  }))
4685
4745
  });
4686
- function buildReviewPrompt(templateExpected, extractedKeys, extractionSummary, pageMapSummary) {
4687
- return `You are reviewing an extraction for completeness and quality. Compare what was expected vs what was found.
4746
+ function buildReviewPrompt(templateExpected, extractedKeys, extractionSummary, pageMapSummary, extractorCatalog) {
4747
+ return `You are the extraction coordinator for an insurance-document agent system. Review the current extraction state, decide whether the result is complete enough, and choose any follow-up extractor tasks needed to improve it.
4688
4748
 
4689
4749
  EXPECTED FIELDS (from document type template):
4690
4750
  ${templateExpected.map((f) => `- ${f}`).join("\n")}
4691
4751
 
4752
+ AVAILABLE FOLLOW-UP EXTRACTORS:
4753
+ ${extractorCatalog}
4754
+
4692
4755
  FIELDS ALREADY EXTRACTED:
4693
4756
  ${extractedKeys.map((f) => `- ${f}`).join("\n")}
4694
4757
 
@@ -4702,7 +4765,7 @@ Determine:
4702
4765
  1. Is the extraction complete enough?
4703
4766
  2. What fields are missing?
4704
4767
  3. What quality issues are present?
4705
- 4. Should any additional extraction tasks be dispatched?
4768
+ 4. Which follow-up extraction tasks, if any, should be dispatched?
4706
4769
 
4707
4770
  Mark the extraction as NOT complete if any of these are true:
4708
4771
  - required fields are missing
@@ -4713,7 +4776,9 @@ Mark the extraction as NOT complete if any of these are true:
4713
4776
  - page assignments suggest declaration, schedule, endorsement, exclusion, or condition pages were not actually extracted with the matching focused extractor
4714
4777
  - a focused extractor exists but returned too little substance for the relevant pages
4715
4778
 
4716
- When reviewing CURRENT EXTRACTION SUMMARY, compare the page-map counts to extracted counts. For definitions and covered_reasons, missing extraction should produce a quality issue and a narrow follow-up task over the mapped page range.
4779
+ When reviewing CURRENT EXTRACTION SUMMARY, compare the page-map counts to extracted counts. If an assigned extractor produced no useful records, produce a quality issue and a narrow follow-up task over the mapped page range.
4780
+
4781
+ Choose follow-up tasks from AVAILABLE FOLLOW-UP EXTRACTORS. You may dispatch any listed extractor when the page map, current extraction summary, or quality evidence shows that the focused extraction is missing, generic, referential, or too thin. Do not invent extractor names.
4717
4782
 
4718
4783
  Return JSON:
4719
4784
  {
@@ -4725,7 +4790,7 @@ Return JSON:
4725
4790
  ]
4726
4791
  }
4727
4792
 
4728
- Use the page map to target follow-up extraction pages precisely. Prefer narrow, declaration/schedule-focused follow-up tasks over broad page ranges.
4793
+ Use the page map to target follow-up extraction pages precisely. Prefer narrow, declaration/schedule-focused follow-up tasks over broad page ranges. If no additional model work is likely to improve the extraction, return an empty additionalTasks array.
4729
4794
 
4730
4795
  Respond with JSON only.`;
4731
4796
  }
@@ -5260,6 +5325,7 @@ var SectionsSchema = z32.object({
5260
5325
  "policy_form",
5261
5326
  "endorsement",
5262
5327
  "application",
5328
+ "covered_reason",
5263
5329
  "exclusion",
5264
5330
  "condition",
5265
5331
  "definition",
@@ -5283,6 +5349,7 @@ For each section, classify its type:
5283
5349
  - "policy_form" \u2014 named ISO or proprietary forms (e.g. CG 00 01, IL 00 17). All sections within a named form should be typed as "policy_form"
5284
5350
  - "endorsement" \u2014 standalone endorsements modifying the base policy
5285
5351
  - "application" \u2014 the insurance application or supplemental application
5352
+ - "covered_reason" \u2014 affirmative grants of coverage, covered causes of loss, covered perils, or named covered events
5286
5353
  - "insuring_agreement" \u2014 the insuring agreement clause (only if standalone, not inside a policy_form)
5287
5354
  - "exclusion", "condition", "definition" \u2014 for standalone sections only
5288
5355
  - "schedule" \u2014 coverage or rating schedules
@@ -5460,6 +5527,53 @@ Return JSON only.`;
5460
5527
  }
5461
5528
 
5462
5529
  // src/prompts/extractors/index.ts
5530
+ function asRecord(data) {
5531
+ return data && typeof data === "object" ? data : void 0;
5532
+ }
5533
+ function getSections2(data) {
5534
+ const sections = asRecord(data)?.sections;
5535
+ return Array.isArray(sections) ? sections : [];
5536
+ }
5537
+ function isCoveredReasonsEmpty(data) {
5538
+ const record = asRecord(data);
5539
+ if (!record) return true;
5540
+ const coveredReasons = Array.isArray(record.coveredReasons) ? record.coveredReasons : Array.isArray(record.covered_reasons) ? record.covered_reasons : [];
5541
+ return coveredReasons.length === 0;
5542
+ }
5543
+ function isDefinitionsEmpty(data) {
5544
+ const definitions = asRecord(data)?.definitions;
5545
+ return !Array.isArray(definitions) || definitions.length === 0;
5546
+ }
5547
+ function sectionLooksLikeCoveredReason(section) {
5548
+ const type = String(section.type ?? "").toLowerCase();
5549
+ const title = String(section.title ?? "").toLowerCase();
5550
+ return type === "covered_reason" || title.includes("covered cause") || title.includes("covered reason") || title.includes("covered peril") || title.includes("named peril") || title.includes("insuring agreement");
5551
+ }
5552
+ function deriveCoveredReasonsFromSections(data) {
5553
+ const coveredReasons = getSections2(data).filter(sectionLooksLikeCoveredReason).map((section) => ({
5554
+ coverageName: String(section.coverageName ?? section.formTitle ?? section.title ?? "Covered Reasons"),
5555
+ title: typeof section.title === "string" ? section.title : void 0,
5556
+ content: String(section.content ?? ""),
5557
+ pageNumber: typeof section.pageStart === "number" ? section.pageStart : void 0,
5558
+ formNumber: typeof section.formNumber === "string" ? section.formNumber : void 0,
5559
+ formTitle: typeof section.formTitle === "string" ? section.formTitle : void 0,
5560
+ sectionRef: typeof section.sectionNumber === "string" ? section.sectionNumber : void 0,
5561
+ originalContent: typeof section.content === "string" ? section.content.slice(0, 500) : void 0
5562
+ })).filter((coveredReason) => coveredReason.content.trim().length > 0);
5563
+ return coveredReasons.length > 0 ? { coveredReasons } : void 0;
5564
+ }
5565
+ function deriveDefinitionsFromSections(data) {
5566
+ const definitions = getSections2(data).filter((section) => String(section.type ?? "").toLowerCase() === "definition").map((section) => ({
5567
+ term: String(section.title ?? "Definitions"),
5568
+ definition: String(section.content ?? ""),
5569
+ pageNumber: typeof section.pageStart === "number" ? section.pageStart : void 0,
5570
+ formNumber: typeof section.formNumber === "string" ? section.formNumber : void 0,
5571
+ formTitle: typeof section.formTitle === "string" ? section.formTitle : void 0,
5572
+ sectionRef: typeof section.sectionNumber === "string" ? section.sectionNumber : void 0,
5573
+ originalContent: typeof section.content === "string" ? section.content.slice(0, 500) : void 0
5574
+ })).filter((definition) => definition.definition.trim().length > 0);
5575
+ return definitions.length > 0 ? { definitions } : void 0;
5576
+ }
5463
5577
  var EXTRACTORS = {
5464
5578
  carrier_info: { buildPrompt: buildCarrierInfoPrompt, schema: CarrierInfoSchema, maxTokens: 2048 },
5465
5579
  named_insured: { buildPrompt: buildNamedInsuredPrompt, schema: NamedInsuredSchema2, maxTokens: 2048 },
@@ -5472,12 +5586,36 @@ var EXTRACTORS = {
5472
5586
  loss_history: { buildPrompt: buildLossHistoryPrompt, schema: LossHistorySchema, maxTokens: 4096 },
5473
5587
  sections: { buildPrompt: buildSectionsPrompt, schema: SectionsSchema, maxTokens: 8192 },
5474
5588
  supplementary: { buildPrompt: buildSupplementaryPrompt, schema: SupplementarySchema, maxTokens: 2048 },
5475
- definitions: { buildPrompt: buildDefinitionsPrompt, schema: DefinitionsSchema, maxTokens: 8192 },
5476
- covered_reasons: { buildPrompt: buildCoveredReasonsPrompt, schema: CoveredReasonsSchema, maxTokens: 8192 }
5589
+ definitions: {
5590
+ buildPrompt: buildDefinitionsPrompt,
5591
+ schema: DefinitionsSchema,
5592
+ maxTokens: 8192,
5593
+ fallback: {
5594
+ extractorName: "sections",
5595
+ isEmpty: isDefinitionsEmpty,
5596
+ deriveFocusedResult: deriveDefinitionsFromSections
5597
+ }
5598
+ },
5599
+ covered_reasons: {
5600
+ buildPrompt: buildCoveredReasonsPrompt,
5601
+ schema: CoveredReasonsSchema,
5602
+ maxTokens: 8192,
5603
+ fallback: {
5604
+ extractorName: "sections",
5605
+ isEmpty: isCoveredReasonsEmpty,
5606
+ deriveFocusedResult: deriveCoveredReasonsFromSections
5607
+ }
5608
+ }
5477
5609
  };
5478
5610
  function getExtractor(name) {
5479
5611
  return EXTRACTORS[name];
5480
5612
  }
5613
+ function formatExtractorCatalogForPrompt() {
5614
+ return Object.entries(EXTRACTORS).map(([name, extractor]) => {
5615
+ const fallback = extractor.fallback ? `; fallback: ${extractor.fallback.extractorName}` : "";
5616
+ return `- ${name} (maxTokens: ${extractor.maxTokens ?? 4096}${fallback})`;
5617
+ }).join("\n");
5618
+ }
5481
5619
 
5482
5620
  // src/extraction/resolve-referential.ts
5483
5621
  import { z as z37 } from "zod";
@@ -5529,18 +5667,124 @@ Your task:
5529
5667
  Return JSON only.`;
5530
5668
  }
5531
5669
 
5532
- // src/extraction/resolve-referential.ts
5670
+ // src/extraction/heuristics.ts
5533
5671
  function looksReferential(value) {
5534
5672
  if (typeof value !== "string") return false;
5535
5673
  const normalized = value.toLowerCase();
5536
5674
  return normalized.includes("shown in the declarations") || normalized.includes("shown in declarations") || normalized.includes("shown in the schedule") || normalized.includes("as stated") || normalized.includes("if applicable");
5537
5675
  }
5676
+ function looksCoveredReasonSection(section) {
5677
+ const title = String(section.title ?? "").toLowerCase();
5678
+ const type = String(section.type ?? "").toLowerCase();
5679
+ return type === "covered_reason" || title.includes("covered cause") || title.includes("covered reason") || title.includes("covered peril");
5680
+ }
5681
+
5682
+ // src/extraction/referential-workflow.ts
5683
+ function normalizeText(value) {
5684
+ return typeof value === "string" ? value.trim().toLowerCase() : "";
5685
+ }
5686
+ function containsTarget(value, target) {
5687
+ const normalizedValue = normalizeText(value);
5688
+ return Boolean(normalizedValue && target && normalizedValue.includes(target));
5689
+ }
5690
+ function pageRangeFrom(startPage, endPage) {
5691
+ if (typeof startPage !== "number" || !Number.isFinite(startPage) || startPage <= 0) {
5692
+ return void 0;
5693
+ }
5694
+ const normalizedEnd = typeof endPage === "number" && Number.isFinite(endPage) && endPage >= startPage ? endPage : startPage;
5695
+ return { startPage, endPage: normalizedEnd };
5696
+ }
5697
+ function parseReferentialTarget(rawTarget) {
5698
+ const raw = rawTarget?.trim() || "unknown";
5699
+ const normalized = raw.toLowerCase();
5700
+ if (normalized === "unknown") return { raw, normalized, kind: "unknown" };
5701
+ if (/declarations?|dec\b|decs\b/.test(normalized)) return { raw, normalized, kind: "declarations" };
5702
+ if (/schedule|scheduled/.test(normalized)) return { raw, normalized, kind: "schedule" };
5703
+ if (/\bitem\b/.test(normalized)) return { raw, normalized, kind: "item" };
5704
+ if (/premises?|location|building/.test(normalized)) return { raw, normalized, kind: "premises" };
5705
+ if (/\bsection\b/.test(normalized)) return { raw, normalized, kind: "section" };
5706
+ if (/policy|coverage\s+part|coverage\s+form/.test(normalized)) return { raw, normalized, kind: "policy" };
5707
+ return { raw, normalized, kind: "unknown" };
5708
+ }
5709
+ function findLocalReferentialPages(params) {
5710
+ const targetLower = params.referenceTarget.toLowerCase();
5711
+ for (const section of params.sections) {
5712
+ if (containsTarget(section.title, targetLower)) {
5713
+ const range = pageRangeFrom(section.pageStart, section.pageEnd);
5714
+ if (range) return range;
5715
+ }
5716
+ }
5717
+ for (const form of params.formInventory) {
5718
+ const titleMatch = containsTarget(form.title, targetLower);
5719
+ const typeMatch = containsTarget(form.formType, targetLower);
5720
+ const numberMatch = containsTarget(form.formNumber, targetLower);
5721
+ if (titleMatch || typeMatch || numberMatch) {
5722
+ const range = pageRangeFrom(form.pageStart, form.pageEnd);
5723
+ if (range) return range;
5724
+ }
5725
+ }
5726
+ return void 0;
5727
+ }
5728
+ function findDeclarationsSchedulePages(parsedTarget, formInventory) {
5729
+ for (const form of formInventory) {
5730
+ const formType = normalizeText(form.formType);
5731
+ const title = normalizeText(form.title);
5732
+ const matchesDeclarations = formType === "declarations" || /declarations?|dec\b|decs\b/.test(title);
5733
+ const matchesSchedule = /schedule|scheduled|coverage/.test(title) || formType === "coverage";
5734
+ const shouldUse = parsedTarget.kind === "declarations" ? matchesDeclarations : parsedTarget.kind === "schedule" || parsedTarget.kind === "item" || parsedTarget.kind === "premises" ? matchesSchedule || matchesDeclarations : parsedTarget.kind === "policy" ? matchesDeclarations || matchesSchedule : false;
5735
+ if (shouldUse) {
5736
+ const range = pageRangeFrom(form.pageStart, form.pageEnd);
5737
+ if (range) return range;
5738
+ }
5739
+ }
5740
+ return void 0;
5741
+ }
5742
+ function findSectionPages(parsedTarget, sections) {
5743
+ for (const section of sections) {
5744
+ const title = normalizeText(section.title);
5745
+ const type = normalizeText(section.type);
5746
+ const matchesKind = parsedTarget.kind === "declarations" && (type === "declarations" || /declarations?/.test(title)) || parsedTarget.kind === "schedule" && (type === "schedule" || /schedule|scheduled/.test(title)) || parsedTarget.kind === "premises" && /premises?|location|building/.test(title) || parsedTarget.kind === "item" && /\bitem\b|schedule|scheduled/.test(title) || parsedTarget.kind === "section" && containsTarget(title, parsedTarget.normalized);
5747
+ if (matchesKind) {
5748
+ const range = pageRangeFrom(section.pageStart, section.pageEnd);
5749
+ if (range) return range;
5750
+ }
5751
+ }
5752
+ return void 0;
5753
+ }
5754
+ function decideReferentialResolutionAction(params) {
5755
+ if (params.localPageRange) {
5756
+ return { kind: "lookup_pages", source: "local", pageRange: params.localPageRange };
5757
+ }
5758
+ const parsedTarget = parseReferentialTarget(params.referenceTarget);
5759
+ const declarationsScheduleRange = findDeclarationsSchedulePages(parsedTarget, params.formInventory);
5760
+ if (declarationsScheduleRange) {
5761
+ return {
5762
+ kind: "lookup_pages",
5763
+ source: "declarations_schedule",
5764
+ pageRange: declarationsScheduleRange
5765
+ };
5766
+ }
5767
+ const sectionRange = findSectionPages(parsedTarget, params.sections);
5768
+ if (sectionRange) {
5769
+ return { kind: "lookup_pages", source: "sections", pageRange: sectionRange };
5770
+ }
5771
+ if (parsedTarget.kind === "unknown") {
5772
+ return { kind: "skip", reason: "no concrete reference target" };
5773
+ }
5774
+ return { kind: "page_location" };
5775
+ }
5776
+
5777
+ // src/extraction/resolve-referential.ts
5538
5778
  function parseReferenceTarget(text) {
5539
5779
  if (typeof text !== "string") return void 0;
5540
5780
  const normalized = text.trim();
5541
5781
  if (!normalized) return void 0;
5542
5782
  const sectionMatch = normalized.match(/\b(Section\s+\d+[A-Za-z]?)/i);
5543
5783
  if (sectionMatch) return sectionMatch[1];
5784
+ const itemMatch = normalized.match(/\b(Item\s+\d+[A-Za-z]?)/i);
5785
+ if (itemMatch) return itemMatch[1];
5786
+ const premisesMatch = normalized.match(/\b(Premises?(?:\s+No\.?\s*\d+[A-Za-z]?|\s+\d+[A-Za-z]?)?)/i);
5787
+ if (premisesMatch) return premisesMatch[1].trim();
5544
5788
  if (/declarations/i.test(normalized)) return "Declarations";
5545
5789
  const scheduleMatch = normalized.match(/\b(Schedule(?:\s+of\s+[A-Za-z ]+)?)/i);
5546
5790
  if (scheduleMatch) return scheduleMatch[1].trim();
@@ -5566,26 +5810,31 @@ async function findReferencedPages(params) {
5566
5810
  pageCount,
5567
5811
  generateObject,
5568
5812
  providerOptions,
5813
+ trackUsage,
5569
5814
  log
5570
5815
  } = params;
5571
- const targetLower = referenceTarget.toLowerCase();
5572
- for (const section of sections) {
5573
- if (section.title && section.pageStart != null && section.title.toLowerCase().includes(targetLower)) {
5574
- return {
5575
- startPage: section.pageStart,
5576
- endPage: section.pageEnd ?? section.pageStart
5577
- };
5578
- }
5816
+ const localPageRange = findLocalReferentialPages({
5817
+ referenceTarget,
5818
+ sections,
5819
+ formInventory
5820
+ });
5821
+ const action = decideReferentialResolutionAction({
5822
+ referenceTarget,
5823
+ sections,
5824
+ formInventory,
5825
+ localPageRange
5826
+ });
5827
+ if (action.kind === "lookup_pages") {
5828
+ await log?.(
5829
+ `Referential target "${referenceTarget}" resolved to pages ${action.pageRange.startPage}-${action.pageRange.endPage} via ${action.source}.`
5830
+ );
5831
+ return action.pageRange;
5579
5832
  }
5580
- for (const form of formInventory) {
5581
- const titleMatch = form.title && form.title.toLowerCase().includes(targetLower);
5582
- const typeMatch = form.formType && form.formType.toLowerCase().includes(targetLower);
5583
- if ((titleMatch || typeMatch) && form.pageStart != null) {
5584
- return {
5585
- startPage: form.pageStart,
5586
- endPage: form.pageEnd ?? form.pageStart
5587
- };
5588
- }
5833
+ if (action.kind === "skip") {
5834
+ await log?.(
5835
+ `Skipping referential target "${referenceTarget}": ${action.reason}.`
5836
+ );
5837
+ return void 0;
5589
5838
  }
5590
5839
  try {
5591
5840
  const result = await safeGenerateObject(
@@ -5613,6 +5862,7 @@ Return JSON only.`,
5613
5862
  )
5614
5863
  }
5615
5864
  );
5865
+ trackUsage?.(result.usage);
5616
5866
  if (result.object.startPage > 0 && result.object.endPage > 0) {
5617
5867
  return {
5618
5868
  startPage: result.object.startPage,
@@ -5670,7 +5920,9 @@ async function resolveReferentialCoverages(params) {
5670
5920
  for (let i = 0; i < referentialCoverages.length; i++) {
5671
5921
  const cov = referentialCoverages[i];
5672
5922
  const refString = (looksReferential(cov.limit) ? cov.limit : void 0) ?? (looksReferential(cov.deductible) ? cov.deductible : void 0) ?? cov.limit ?? "";
5673
- const target = parseReferenceTarget(refString) ?? "unknown";
5923
+ const sectionRef = typeof cov.sectionRef === "string" ? cov.sectionRef : "";
5924
+ const parsedTarget = parseReferenceTarget(refString) ?? parseReferenceTarget(sectionRef) ?? sectionRef;
5925
+ const target = parsedTarget || "unknown";
5674
5926
  const group = targetGroups.get(target) ?? [];
5675
5927
  group.push({ coverage: cov, index: i });
5676
5928
  targetGroups.set(target, group);
@@ -5694,6 +5946,7 @@ async function resolveReferentialCoverages(params) {
5694
5946
  pageCount,
5695
5947
  generateObject,
5696
5948
  providerOptions,
5949
+ trackUsage,
5697
5950
  log
5698
5951
  });
5699
5952
  if (!pageRange) {
@@ -5811,6 +6064,78 @@ async function resolveReferentialCoverages(params) {
5811
6064
  };
5812
6065
  }
5813
6066
 
6067
+ // src/extraction/focused-dispatch.ts
6068
+ async function runFocusedExtractorWithFallback(params) {
6069
+ const {
6070
+ task,
6071
+ pdfInput,
6072
+ generateObject,
6073
+ convertPdfToImages,
6074
+ providerOptions,
6075
+ trackUsage,
6076
+ log
6077
+ } = params;
6078
+ const ext = getExtractor(task.extractorName);
6079
+ if (!ext) {
6080
+ await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
6081
+ return null;
6082
+ }
6083
+ try {
6084
+ const result = await runExtractor({
6085
+ name: task.extractorName,
6086
+ prompt: ext.buildPrompt(),
6087
+ schema: ext.schema,
6088
+ pdfInput,
6089
+ startPage: task.startPage,
6090
+ endPage: task.endPage,
6091
+ generateObject,
6092
+ convertPdfToImages,
6093
+ maxTokens: ext.maxTokens ?? 4096,
6094
+ providerOptions
6095
+ });
6096
+ trackUsage(result.usage);
6097
+ if (!ext.fallback?.isEmpty(result.data)) {
6098
+ return result;
6099
+ }
6100
+ if (!ext.fallback) {
6101
+ return result;
6102
+ }
6103
+ } catch (error) {
6104
+ await log?.(`Extractor ${task.extractorName} failed: ${error}`);
6105
+ if (!ext.fallback) {
6106
+ return null;
6107
+ }
6108
+ }
6109
+ const fallbackExt = getExtractor(ext.fallback.extractorName);
6110
+ if (!fallbackExt) return null;
6111
+ await log?.(
6112
+ `Extractor ${task.extractorName} produced no usable records; trying ${ext.fallback.extractorName} fallback for pages ${task.startPage}-${task.endPage}`
6113
+ );
6114
+ try {
6115
+ const fallbackResult = await runExtractor({
6116
+ name: ext.fallback.extractorName,
6117
+ prompt: fallbackExt.buildPrompt(),
6118
+ schema: fallbackExt.schema,
6119
+ pdfInput,
6120
+ startPage: task.startPage,
6121
+ endPage: task.endPage,
6122
+ generateObject,
6123
+ convertPdfToImages,
6124
+ maxTokens: fallbackExt.maxTokens ?? 4096,
6125
+ providerOptions
6126
+ });
6127
+ trackUsage(fallbackResult.usage);
6128
+ const focusedData = ext.fallback.deriveFocusedResult(fallbackResult.data);
6129
+ return focusedData ? [
6130
+ fallbackResult,
6131
+ { name: task.extractorName, data: focusedData, usage: void 0 }
6132
+ ] : fallbackResult;
6133
+ } catch (fallbackError) {
6134
+ await log?.(`${ext.fallback.extractorName} fallback for ${task.extractorName} failed: ${fallbackError}`);
6135
+ return null;
6136
+ }
6137
+ }
6138
+
5814
6139
  // src/core/quality.ts
5815
6140
  function evaluateQualityGate(params) {
5816
6141
  const { issues, hasRoundWarnings = false } = params;
@@ -5847,11 +6172,6 @@ function addFormEntry(inventory, formNumber, source, extra) {
5847
6172
  sources: [source]
5848
6173
  });
5849
6174
  }
5850
- function looksReferential2(value) {
5851
- if (typeof value !== "string") return false;
5852
- const normalized = value.toLowerCase();
5853
- return normalized.includes("shown in the declarations") || normalized.includes("shown in declarations") || normalized.includes("shown in the schedule") || normalized.includes("as stated") || normalized.includes("if applicable");
5854
- }
5855
6175
  function looksTocArtifact(value) {
5856
6176
  if (typeof value !== "string") return false;
5857
6177
  return /\.{4,}\d{1,3}$/.test(value.trim()) || /^\d+\.\s+[A-Z][\s\S]*\.{3,}\d{1,3}$/.test(value.trim());
@@ -5878,11 +6198,7 @@ function buildExtractionReviewReport(params) {
5878
6198
  const definitionsResult = memory.get("definitions");
5879
6199
  const coveredReasonsResult = memory.get("covered_reasons");
5880
6200
  const definitions = Array.isArray(definitionsResult?.definitions) ? definitionsResult.definitions : sections.filter((section) => section.type === "definition");
5881
- const coveredReasons = Array.isArray(coveredReasonsResult?.coveredReasons) ? coveredReasonsResult.coveredReasons : Array.isArray(coveredReasonsResult?.covered_reasons) ? coveredReasonsResult.covered_reasons : sections.filter((section) => {
5882
- const title = String(section.title ?? "").toLowerCase();
5883
- const type = String(section.type ?? "").toLowerCase();
5884
- return type === "covered_reason" || title.includes("covered cause") || title.includes("covered reason") || title.includes("covered peril");
5885
- });
6201
+ const coveredReasons = Array.isArray(coveredReasonsResult?.coveredReasons) ? coveredReasonsResult.coveredReasons : Array.isArray(coveredReasonsResult?.covered_reasons) ? coveredReasonsResult.covered_reasons : sections.filter(looksCoveredReasonSection);
5886
6202
  const mappedDefinitions = params.pageAssignments.some((assignment) => assignment.extractorNames.includes("definitions"));
5887
6203
  const mappedCoveredReasons = params.pageAssignments.some((assignment) => assignment.extractorNames.includes("covered_reasons"));
5888
6204
  if (mappedDefinitions && definitions.length === 0) {
@@ -5997,7 +6313,7 @@ function buildExtractionReviewReport(params) {
5997
6313
  itemName: typeof coverage.name === "string" ? coverage.name : void 0
5998
6314
  });
5999
6315
  }
6000
- if (looksReferential2(coverage.limit) || looksReferential2(coverage.deductible)) {
6316
+ if (looksReferential(coverage.limit) || looksReferential(coverage.deductible)) {
6001
6317
  deterministicIssues.push({
6002
6318
  code: "coverage_referential_value",
6003
6319
  severity: "warning",
@@ -6147,7 +6463,7 @@ function buildExtractionReviewReport(params) {
6147
6463
  itemName
6148
6464
  });
6149
6465
  }
6150
- if (looksReferential2(content) || looksReferential2(coveredReason.reason)) {
6466
+ if (looksReferential(content) || looksReferential(coveredReason.reason)) {
6151
6467
  deterministicIssues.push({
6152
6468
  code: "covered_reason_referential_value",
6153
6469
  severity: "warning",
@@ -6208,6 +6524,134 @@ function toReviewRoundRecord(round, review) {
6208
6524
  };
6209
6525
  }
6210
6526
 
6527
+ // src/extraction/planning.ts
6528
+ function normalizePageAssignments(pageAssignments, formInventory) {
6529
+ const pageFormTypes = /* @__PURE__ */ new Map();
6530
+ if (formInventory) {
6531
+ for (const form of formInventory.forms) {
6532
+ if (form.pageStart != null) {
6533
+ const end = form.pageEnd ?? form.pageStart;
6534
+ for (let p = form.pageStart; p <= end; p += 1) {
6535
+ const types = pageFormTypes.get(p) ?? /* @__PURE__ */ new Set();
6536
+ types.add(form.formType);
6537
+ pageFormTypes.set(p, types);
6538
+ }
6539
+ }
6540
+ }
6541
+ }
6542
+ return pageAssignments.map((assignment) => {
6543
+ let extractorNames = [...new Set(
6544
+ (assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"]).filter(Boolean)
6545
+ )];
6546
+ const hasDeclarations = extractorNames.includes("declarations");
6547
+ const hasConditions = extractorNames.includes("conditions");
6548
+ const hasExclusions = extractorNames.includes("exclusions");
6549
+ const hasEndorsements = extractorNames.includes("endorsements");
6550
+ const looksLikeScheduleValues = assignment.hasScheduleValues === true;
6551
+ const roleBlocksCoverageLimits = assignment.pageRole === "policy_form" || assignment.pageRole === "condition_exclusion_form" || assignment.pageRole === "endorsement_form";
6552
+ const inventoryTypes = pageFormTypes.get(assignment.localPageNumber);
6553
+ const inventoryBlocksCoverageLimits = inventoryTypes != null && !looksLikeScheduleValues && !hasDeclarations && (inventoryTypes.has("endorsement") || inventoryTypes.has("notice") || inventoryTypes.has("application"));
6554
+ if (extractorNames.includes("coverage_limits")) {
6555
+ const shouldDropCoverageLimits = inventoryBlocksCoverageLimits || !looksLikeScheduleValues && roleBlocksCoverageLimits || !hasDeclarations && !looksLikeScheduleValues && (hasConditions || hasExclusions) || !hasDeclarations && !looksLikeScheduleValues && hasEndorsements;
6556
+ if (shouldDropCoverageLimits) {
6557
+ extractorNames = extractorNames.filter((name) => name !== "coverage_limits");
6558
+ }
6559
+ }
6560
+ if (inventoryTypes?.has("endorsement") && !extractorNames.includes("endorsements")) {
6561
+ extractorNames = [...extractorNames, "endorsements"];
6562
+ }
6563
+ if (extractorNames.length === 0) {
6564
+ extractorNames = ["sections"];
6565
+ }
6566
+ return {
6567
+ ...assignment,
6568
+ extractorNames
6569
+ };
6570
+ });
6571
+ }
6572
+ function buildTemplateHints(primaryType, documentType, pageCount, template) {
6573
+ return [
6574
+ `Document type: ${primaryType} ${documentType}`,
6575
+ `Expected sections: ${template.expectedSections.join(", ")}`,
6576
+ `Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
6577
+ `Total pages: ${pageCount}`
6578
+ ].join("\n");
6579
+ }
6580
+ function groupContiguousPages(pages) {
6581
+ if (pages.length === 0) return [];
6582
+ const sorted = [...new Set(pages)].sort((a, b) => a - b);
6583
+ const ranges = [];
6584
+ let start = sorted[0];
6585
+ let previous = sorted[0];
6586
+ for (let i = 1; i < sorted.length; i += 1) {
6587
+ const current = sorted[i];
6588
+ if (current === previous + 1) {
6589
+ previous = current;
6590
+ continue;
6591
+ }
6592
+ ranges.push({ startPage: start, endPage: previous });
6593
+ start = current;
6594
+ previous = current;
6595
+ }
6596
+ ranges.push({ startPage: start, endPage: previous });
6597
+ return ranges;
6598
+ }
6599
+ function buildPlanFromPageAssignments(pageAssignments, pageCount, formInventory) {
6600
+ const extractorPages = /* @__PURE__ */ new Map();
6601
+ for (const assignment of pageAssignments) {
6602
+ const extractors = assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"];
6603
+ for (const extractorName of extractors) {
6604
+ extractorPages.set(extractorName, [...extractorPages.get(extractorName) ?? [], assignment.localPageNumber]);
6605
+ }
6606
+ }
6607
+ const coveredPages = /* @__PURE__ */ new Set();
6608
+ for (const pages of extractorPages.values()) {
6609
+ for (const page of pages) coveredPages.add(page);
6610
+ }
6611
+ for (let page = 1; page <= pageCount; page += 1) {
6612
+ if (!coveredPages.has(page)) {
6613
+ extractorPages.set("sections", [...extractorPages.get("sections") ?? [], page]);
6614
+ }
6615
+ }
6616
+ const contextualExtractors = /* @__PURE__ */ new Set(["conditions", "covered_reasons", "definitions", "exclusions", "endorsements"]);
6617
+ const contextualForms = (formInventory?.forms ?? []).filter(
6618
+ (form) => form.pageStart != null && (form.pageEnd ?? form.pageStart) != null
6619
+ );
6620
+ const expandPagesToFormRanges = (extractorName, pages) => {
6621
+ if (!contextualExtractors.has(extractorName)) return pages;
6622
+ const expanded = new Set(pages);
6623
+ for (const page of pages) {
6624
+ for (const form of contextualForms) {
6625
+ const pageStart = form.pageStart;
6626
+ const pageEnd = form.pageEnd ?? form.pageStart;
6627
+ const formType = form.formType;
6628
+ const supportsContextualExpansion = extractorName === "endorsements" ? formType === "endorsement" : formType === "coverage" || formType === "endorsement";
6629
+ if (!supportsContextualExpansion) continue;
6630
+ if (page < pageStart || page > pageEnd) continue;
6631
+ for (let current = pageStart; current <= pageEnd; current += 1) {
6632
+ expanded.add(current);
6633
+ }
6634
+ }
6635
+ }
6636
+ return [...expanded].sort((a, b) => a - b);
6637
+ };
6638
+ const tasks = [...extractorPages.entries()].flatMap(
6639
+ ([extractorName, pages]) => groupContiguousPages(expandPagesToFormRanges(extractorName, pages)).map(({ startPage, endPage }) => ({
6640
+ extractorName,
6641
+ startPage,
6642
+ endPage,
6643
+ description: `Page-mapped ${extractorName} extraction for pages ${startPage}-${endPage}`
6644
+ }))
6645
+ ).sort((a, b) => a.startPage - b.startPage || a.extractorName.localeCompare(b.extractorName));
6646
+ return {
6647
+ tasks,
6648
+ pageMap: [...extractorPages.entries()].map(([section, pages]) => ({
6649
+ section,
6650
+ pages: `pages ${[...new Set(pages)].sort((a, b) => a - b).join(", ")}`
6651
+ }))
6652
+ };
6653
+ }
6654
+
6211
6655
  // src/extraction/coordinator.ts
6212
6656
  function createExtractor(config) {
6213
6657
  const {
@@ -6224,6 +6668,7 @@ function createExtractor(config) {
6224
6668
  onCheckpointSave
6225
6669
  } = config;
6226
6670
  const limit = pLimit(concurrency);
6671
+ const extractorCatalog = formatExtractorCatalogForPrompt();
6227
6672
  let totalUsage = { inputTokens: 0, outputTokens: 0 };
6228
6673
  let modelCalls = 0;
6229
6674
  let callsWithUsage = 0;
@@ -6244,43 +6689,56 @@ function createExtractor(config) {
6244
6689
  memory.set(name, mergeExtractorResult(name, existing, data));
6245
6690
  }
6246
6691
  function summarizeExtraction(memory) {
6247
- const coverageResult = memory.get("coverage_limits");
6248
- const declarationResult = memory.get("declarations");
6249
- const endorsementResult = memory.get("endorsements");
6250
- const exclusionResult = memory.get("exclusions");
6251
- const conditionResult = memory.get("conditions");
6252
- const sectionResult = memory.get("sections");
6253
- const definitionsResult = memory.get("definitions");
6254
- const coveredReasonsResult = memory.get("covered_reasons");
6255
- const sections = Array.isArray(sectionResult?.sections) ? sectionResult.sections : [];
6256
- const definitionCount = Array.isArray(definitionsResult?.definitions) ? definitionsResult.definitions.length : sections.filter((section) => section.type === "definition").length;
6257
- const coveredReasonCount = Array.isArray(coveredReasonsResult?.coveredReasons) ? coveredReasonsResult.coveredReasons.length : Array.isArray(coveredReasonsResult?.covered_reasons) ? coveredReasonsResult.covered_reasons.length : sections.filter((section) => {
6258
- const title = String(section.title ?? "").toLowerCase();
6259
- const type = String(section.type ?? "").toLowerCase();
6260
- return type === "covered_reason" || title.includes("covered cause") || title.includes("covered reason") || title.includes("covered peril");
6261
- }).length;
6262
- const coverageSummary = Array.isArray(coverageResult?.coverages) ? coverageResult.coverages.slice(0, 12).map((coverage) => ({
6692
+ const declarationResult = readMemoryRecord(memory, "declarations");
6693
+ const endorsements = readRecordArray(readMemoryRecord(memory, "endorsements"), "endorsements") ?? [];
6694
+ const exclusions = readRecordArray(readMemoryRecord(memory, "exclusions"), "exclusions") ?? [];
6695
+ const conditions = readRecordArray(readMemoryRecord(memory, "conditions"), "conditions") ?? [];
6696
+ const sections = getSections(memory) ?? [];
6697
+ const definitions = getDefinitions(memory) ?? sections.filter((section) => section.type === "definition");
6698
+ const coveredReasons = getCoveredReasons(memory) ?? sections.filter(looksCoveredReasonSection);
6699
+ const coverages = getCoverageLimitCoverages(memory);
6700
+ const coverageSummary = coverages.slice(0, 12).map((coverage) => ({
6263
6701
  name: coverage.name,
6264
6702
  limit: coverage.limit,
6265
6703
  deductible: coverage.deductible,
6266
6704
  formNumber: coverage.formNumber
6267
- })) : [];
6705
+ }));
6268
6706
  return JSON.stringify({
6269
6707
  extractedKeys: [...memory.keys()].filter((key) => key !== "classify"),
6270
6708
  declarationFieldCount: Array.isArray(declarationResult?.fields) ? declarationResult.fields.length : 0,
6271
- coverageCount: Array.isArray(coverageResult?.coverages) ? coverageResult.coverages.length : 0,
6709
+ coverageCount: coverages.length,
6272
6710
  coverageSamples: coverageSummary,
6273
- endorsementCount: Array.isArray(endorsementResult?.endorsements) ? endorsementResult.endorsements.length : 0,
6274
- exclusionCount: Array.isArray(exclusionResult?.exclusions) ? exclusionResult.exclusions.length : 0,
6275
- conditionCount: Array.isArray(conditionResult?.conditions) ? conditionResult.conditions.length : 0,
6276
- definitionCount,
6277
- coveredReasonCount,
6711
+ endorsementCount: endorsements.length,
6712
+ exclusionCount: exclusions.length,
6713
+ conditionCount: conditions.length,
6714
+ definitionCount: definitions.length,
6715
+ coveredReasonCount: coveredReasons.length,
6278
6716
  sectionCount: sections.length
6279
6717
  }, null, 2);
6280
6718
  }
6719
+ function textIncludesSupplementarySignal(value) {
6720
+ if (typeof value !== "string") return false;
6721
+ return /\b(supplementary|regulatory|department of insurance|ombudsman|complaint|claim|claims|contact|phone|email|cancellation|cancelled|nonrenewal|non-renewal|non renew|notice|governing law|jurisdiction|third[- ]party administrator|tpa)\b/i.test(value);
6722
+ }
6723
+ function hasSupplementaryExtractionSignal(pageAssignments, formInventory, memory) {
6724
+ const hasPageSignal = pageAssignments.some(
6725
+ (assignment) => assignment.pageRole === "supplementary" || assignment.extractorNames.includes("supplementary") || textIncludesSupplementarySignal(assignment.notes)
6726
+ );
6727
+ if (hasPageSignal) return true;
6728
+ const hasFormSignal = (formInventory?.forms ?? []).some(
6729
+ (form) => form.formType === "notice" || textIncludesSupplementarySignal(form.title) || textIncludesSupplementarySignal(form.formNumber)
6730
+ );
6731
+ if (hasFormSignal) return true;
6732
+ const likelySupplementaryKeys = ["sections", "conditions", "endorsements", "exclusions"];
6733
+ return likelySupplementaryKeys.some((key) => {
6734
+ const value = memory.get(key);
6735
+ if (!value) return false;
6736
+ return textIncludesSupplementarySignal(JSON.stringify(value));
6737
+ });
6738
+ }
6281
6739
  function buildAlreadyExtractedSummary(memory) {
6282
6740
  const lines = [];
6283
- const declarationResult = memory.get("declarations");
6741
+ const declarationResult = readMemoryRecord(memory, "declarations");
6284
6742
  if (Array.isArray(declarationResult?.fields)) {
6285
6743
  for (const field of declarationResult.fields) {
6286
6744
  if (field.key && field.value) {
@@ -6289,20 +6747,17 @@ function createExtractor(config) {
6289
6747
  }
6290
6748
  }
6291
6749
  }
6292
- const coverageResult = memory.get("coverage_limits");
6293
- if (Array.isArray(coverageResult?.coverages)) {
6294
- for (const cov of coverageResult.coverages) {
6295
- const parts = [cov.name, cov.limit && `limit=${cov.limit}`, cov.deductible && `deductible=${cov.deductible}`].filter(Boolean);
6296
- if (parts.length > 0) lines.push(`- coverage: ${parts.join(", ")}`);
6297
- }
6750
+ for (const cov of getCoverageLimitCoverages(memory)) {
6751
+ const parts = [cov.name, cov.limit && `limit=${cov.limit}`, cov.deductible && `deductible=${cov.deductible}`].filter(Boolean);
6752
+ if (parts.length > 0) lines.push(`- coverage: ${parts.join(", ")}`);
6298
6753
  }
6299
- const namedInsured = memory.get("named_insured");
6754
+ const namedInsured = getNamedInsured(memory);
6300
6755
  if (namedInsured) {
6301
6756
  for (const [key, value] of Object.entries(namedInsured)) {
6302
6757
  if (value && typeof value === "string") lines.push(`- ${key}: ${value}`);
6303
6758
  }
6304
6759
  }
6305
- const carrierInfo = memory.get("carrier_info");
6760
+ const carrierInfo = getCarrierInfo(memory);
6306
6761
  if (carrierInfo) {
6307
6762
  for (const [key, value] of Object.entries(carrierInfo)) {
6308
6763
  if (value && typeof value === "string") lines.push(`- ${key}: ${value}`);
@@ -6310,6 +6765,34 @@ function createExtractor(config) {
6310
6765
  }
6311
6766
  return lines.length > 0 ? lines.join("\n") : "";
6312
6767
  }
6768
+ async function runFocusedExtractorTask(task, pdfInput, memory) {
6769
+ if (task.extractorName === "supplementary") {
6770
+ const alreadyExtractedSummary = buildAlreadyExtractedSummary(memory);
6771
+ const result = await runExtractor({
6772
+ name: "supplementary",
6773
+ prompt: buildSupplementaryPrompt(alreadyExtractedSummary),
6774
+ schema: SupplementarySchema,
6775
+ pdfInput,
6776
+ startPage: task.startPage,
6777
+ endPage: task.endPage,
6778
+ generateObject,
6779
+ convertPdfToImages,
6780
+ maxTokens: 4096,
6781
+ providerOptions
6782
+ });
6783
+ trackUsage(result.usage);
6784
+ return result;
6785
+ }
6786
+ return runFocusedExtractorWithFallback({
6787
+ task,
6788
+ pdfInput,
6789
+ generateObject,
6790
+ convertPdfToImages,
6791
+ providerOptions,
6792
+ trackUsage,
6793
+ log
6794
+ });
6795
+ }
6313
6796
  function formatPageMapSummary(pageAssignments) {
6314
6797
  const extractorPages = /* @__PURE__ */ new Map();
6315
6798
  for (const assignment of pageAssignments) {
@@ -6320,132 +6803,6 @@ function createExtractor(config) {
6320
6803
  if (extractorPages.size === 0) return "No page assignments available.";
6321
6804
  return [...extractorPages.entries()].map(([extractorName, pages]) => `${extractorName}: ${pages.length} page(s), pages ${pages.join(", ")}`).join("\n");
6322
6805
  }
6323
- function normalizePageAssignments(pageAssignments, formInventory) {
6324
- const pageFormTypes = /* @__PURE__ */ new Map();
6325
- if (formInventory) {
6326
- for (const form of formInventory.forms) {
6327
- if (form.pageStart != null) {
6328
- const end = form.pageEnd ?? form.pageStart;
6329
- for (let p = form.pageStart; p <= end; p++) {
6330
- const types = pageFormTypes.get(p) ?? /* @__PURE__ */ new Set();
6331
- types.add(form.formType);
6332
- pageFormTypes.set(p, types);
6333
- }
6334
- }
6335
- }
6336
- }
6337
- return pageAssignments.map((assignment) => {
6338
- let extractorNames = [...new Set(
6339
- (assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"]).filter(Boolean)
6340
- )];
6341
- const hasDeclarations = extractorNames.includes("declarations");
6342
- const hasConditions = extractorNames.includes("conditions");
6343
- const hasExclusions = extractorNames.includes("exclusions");
6344
- const hasEndorsements = extractorNames.includes("endorsements");
6345
- const looksLikeScheduleValues = assignment.hasScheduleValues === true;
6346
- const roleBlocksCoverageLimits = assignment.pageRole === "policy_form" || assignment.pageRole === "condition_exclusion_form" || assignment.pageRole === "endorsement_form";
6347
- const inventoryTypes = pageFormTypes.get(assignment.localPageNumber);
6348
- const inventoryBlocksCoverageLimits = inventoryTypes != null && !looksLikeScheduleValues && !hasDeclarations && (inventoryTypes.has("endorsement") || inventoryTypes.has("notice") || inventoryTypes.has("application"));
6349
- if (extractorNames.includes("coverage_limits")) {
6350
- const shouldDropCoverageLimits = inventoryBlocksCoverageLimits || !looksLikeScheduleValues && roleBlocksCoverageLimits || !hasDeclarations && !looksLikeScheduleValues && (hasConditions || hasExclusions) || !hasDeclarations && !looksLikeScheduleValues && hasEndorsements;
6351
- if (shouldDropCoverageLimits) {
6352
- extractorNames = extractorNames.filter((name) => name !== "coverage_limits");
6353
- }
6354
- }
6355
- if (inventoryTypes?.has("endorsement") && !extractorNames.includes("endorsements")) {
6356
- extractorNames = [...extractorNames, "endorsements"];
6357
- }
6358
- if (extractorNames.length === 0) {
6359
- extractorNames = ["sections"];
6360
- }
6361
- return {
6362
- ...assignment,
6363
- extractorNames
6364
- };
6365
- });
6366
- }
6367
- function buildTemplateHints(primaryType, documentType, pageCount, template) {
6368
- return [
6369
- `Document type: ${primaryType} ${documentType}`,
6370
- `Expected sections: ${template.expectedSections.join(", ")}`,
6371
- `Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
6372
- `Total pages: ${pageCount}`
6373
- ].join("\n");
6374
- }
6375
- function groupContiguousPages(pages) {
6376
- if (pages.length === 0) return [];
6377
- const sorted = [...new Set(pages)].sort((a, b) => a - b);
6378
- const ranges = [];
6379
- let start = sorted[0];
6380
- let previous = sorted[0];
6381
- for (let i = 1; i < sorted.length; i += 1) {
6382
- const current = sorted[i];
6383
- if (current === previous + 1) {
6384
- previous = current;
6385
- continue;
6386
- }
6387
- ranges.push({ startPage: start, endPage: previous });
6388
- start = current;
6389
- previous = current;
6390
- }
6391
- ranges.push({ startPage: start, endPage: previous });
6392
- return ranges;
6393
- }
6394
- function buildPlanFromPageAssignments(pageAssignments, pageCount, formInventory) {
6395
- const extractorPages = /* @__PURE__ */ new Map();
6396
- for (const assignment of pageAssignments) {
6397
- const extractors = assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"];
6398
- for (const extractorName of extractors) {
6399
- extractorPages.set(extractorName, [...extractorPages.get(extractorName) ?? [], assignment.localPageNumber]);
6400
- }
6401
- }
6402
- const coveredPages = /* @__PURE__ */ new Set();
6403
- for (const pages of extractorPages.values()) {
6404
- for (const page of pages) coveredPages.add(page);
6405
- }
6406
- for (let page = 1; page <= pageCount; page += 1) {
6407
- if (!coveredPages.has(page)) {
6408
- extractorPages.set("sections", [...extractorPages.get("sections") ?? [], page]);
6409
- }
6410
- }
6411
- const contextualExtractors = /* @__PURE__ */ new Set(["conditions", "covered_reasons", "definitions", "exclusions", "endorsements"]);
6412
- const contextualForms = (formInventory?.forms ?? []).filter(
6413
- (form) => form.pageStart != null && (form.pageEnd ?? form.pageStart) != null
6414
- );
6415
- const expandPagesToFormRanges = (extractorName, pages) => {
6416
- if (!contextualExtractors.has(extractorName)) return pages;
6417
- const expanded = new Set(pages);
6418
- for (const page of pages) {
6419
- for (const form of contextualForms) {
6420
- const pageStart = form.pageStart;
6421
- const pageEnd = form.pageEnd ?? form.pageStart;
6422
- const formType = form.formType;
6423
- const supportsContextualExpansion = extractorName === "endorsements" ? formType === "endorsement" : formType === "coverage" || formType === "endorsement";
6424
- if (!supportsContextualExpansion) continue;
6425
- if (page < pageStart || page > pageEnd) continue;
6426
- for (let current = pageStart; current <= pageEnd; current += 1) {
6427
- expanded.add(current);
6428
- }
6429
- }
6430
- }
6431
- return [...expanded].sort((a, b) => a - b);
6432
- };
6433
- const tasks = [...extractorPages.entries()].flatMap(
6434
- ([extractorName, pages]) => groupContiguousPages(expandPagesToFormRanges(extractorName, pages)).map(({ startPage, endPage }) => ({
6435
- extractorName,
6436
- startPage,
6437
- endPage,
6438
- description: `Page-mapped ${extractorName} extraction for pages ${startPage}-${endPage}`
6439
- }))
6440
- ).sort((a, b) => a.startPage - b.startPage || a.extractorName.localeCompare(b.extractorName));
6441
- return {
6442
- tasks,
6443
- pageMap: [...extractorPages.entries()].map(([section, pages]) => ({
6444
- section,
6445
- pages: `pages ${[...new Set(pages)].sort((a, b) => a - b).join(", ")}`
6446
- }))
6447
- };
6448
- }
6449
6806
  async function extract(pdfInput, documentId, options) {
6450
6807
  const id = documentId ?? `doc-${Date.now()}`;
6451
6808
  const memory = /* @__PURE__ */ new Map();
@@ -6456,7 +6813,8 @@ function createExtractor(config) {
6456
6813
  const pipelineCtx = createPipelineContext({
6457
6814
  id,
6458
6815
  onSave: onCheckpointSave,
6459
- resumeFrom: options?.resumeFrom
6816
+ resumeFrom: options?.resumeFrom,
6817
+ phaseOrder: ["classify", "form_inventory", "page_map", "plan", "extract", "resolve_referential", "review", "assemble"]
6460
6818
  });
6461
6819
  const resumed = pipelineCtx.getCheckpoint()?.state;
6462
6820
  if (resumed?.memory) {
@@ -6624,40 +6982,18 @@ function createExtractor(config) {
6624
6982
  const extractorResults = await Promise.all(
6625
6983
  tasks.map(
6626
6984
  (task) => limit(async () => {
6627
- const ext = getExtractor(task.extractorName) ?? (task.extractorName === "definitions" || task.extractorName === "covered_reasons" ? getExtractor("sections") : void 0);
6628
- if (!ext) {
6629
- await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
6630
- return null;
6631
- }
6632
6985
  onProgress?.(`Extracting ${task.extractorName} (pages ${task.startPage}-${task.endPage})...`);
6633
- try {
6634
- const result = await runExtractor({
6635
- name: task.extractorName,
6636
- prompt: ext.buildPrompt(),
6637
- schema: ext.schema,
6638
- pdfInput,
6639
- startPage: task.startPage,
6640
- endPage: task.endPage,
6641
- generateObject,
6642
- convertPdfToImages,
6643
- maxTokens: ext.maxTokens ?? 4096,
6644
- providerOptions
6645
- });
6646
- trackUsage(result.usage);
6647
- return result;
6648
- } catch (error) {
6649
- await log?.(`Extractor ${task.extractorName} failed: ${error}`);
6650
- return null;
6651
- }
6986
+ return runFocusedExtractorTask(task, pdfInput, memory);
6652
6987
  })
6653
6988
  )
6654
6989
  );
6655
- for (const result of extractorResults) {
6990
+ for (const result of extractorResults.flatMap((item) => Array.isArray(item) ? item : item ? [item] : [])) {
6656
6991
  if (result) {
6657
6992
  mergeMemoryResult(result.name, result.data, memory);
6658
6993
  }
6659
6994
  }
6660
- {
6995
+ const planIncludesSupplementary = tasks.some((task) => task.extractorName === "supplementary");
6996
+ if (!planIncludesSupplementary && hasSupplementaryExtractionSignal(pageAssignments, formInventory, memory)) {
6661
6997
  onProgress?.("Extracting supplementary retrieval facts...");
6662
6998
  try {
6663
6999
  const alreadyExtractedSummary = buildAlreadyExtractedSummary(memory);
@@ -6731,7 +7067,7 @@ function createExtractor(config) {
6731
7067
  const reviewResponse = await safeGenerateObject(
6732
7068
  generateObject,
6733
7069
  {
6734
- prompt: buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary),
7070
+ prompt: buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary, extractorCatalog),
6735
7071
  schema: ReviewResultSchema,
6736
7072
  maxTokens: 1536,
6737
7073
  providerOptions: await buildPdfProviderOptions(pdfInput, providerOptions)
@@ -6755,31 +7091,11 @@ function createExtractor(config) {
6755
7091
  const followUpResults = await Promise.all(
6756
7092
  reviewResponse.object.additionalTasks.map(
6757
7093
  (task) => limit(async () => {
6758
- const ext = getExtractor(task.extractorName) ?? (task.extractorName === "definitions" || task.extractorName === "covered_reasons" ? getExtractor("sections") : void 0);
6759
- if (!ext) return null;
6760
- try {
6761
- const result = await runExtractor({
6762
- name: task.extractorName,
6763
- prompt: ext.buildPrompt(),
6764
- schema: ext.schema,
6765
- pdfInput,
6766
- startPage: task.startPage,
6767
- endPage: task.endPage,
6768
- generateObject,
6769
- convertPdfToImages,
6770
- maxTokens: ext.maxTokens ?? 4096,
6771
- providerOptions
6772
- });
6773
- trackUsage(result.usage);
6774
- return result;
6775
- } catch (error) {
6776
- await log?.(`Follow-up extractor ${task.extractorName} failed: ${error}`);
6777
- return null;
6778
- }
7094
+ return runFocusedExtractorTask(task, pdfInput, memory);
6779
7095
  })
6780
7096
  )
6781
7097
  );
6782
- for (const result of followUpResults) {
7098
+ for (const result of followUpResults.flatMap((item) => Array.isArray(item) ? item : item ? [item] : [])) {
6783
7099
  if (result) {
6784
7100
  mergeMemoryResult(result.name, result.data, memory);
6785
7101
  }
@@ -7818,6 +8134,70 @@ function reviewBatchEmail(text, batchFields) {
7818
8134
  };
7819
8135
  }
7820
8136
 
8137
+ // src/application/workflow.ts
8138
+ var MAX_DOCUMENT_SEARCH_FIELDS = 5;
8139
+ var LOW_VALUE_FIELD_RATIO_LIMIT = 0.6;
8140
+ function planApplicationWorkflow(input) {
8141
+ const unfilledFields = input.fields.filter(isUnfilled);
8142
+ const documentSearchFields = planDocumentSearchFields(
8143
+ unfilledFields,
8144
+ input.hasDocumentStore && input.hasMemoryStore
8145
+ );
8146
+ return {
8147
+ runBackfill: input.hasBackfillProvider && unfilledFields.length > 0,
8148
+ runContextAutoFill: input.orgContextCount > 0 && unfilledFields.length > 0,
8149
+ documentSearchFields,
8150
+ runBatching: unfilledFields.length > 0,
8151
+ unfilledFields
8152
+ };
8153
+ }
8154
+ function planReplyActions(input) {
8155
+ const hasCurrentFields = input.currentBatchFields.length > 0;
8156
+ const nextBatchNeedsAnswers = (input.nextBatchFields ?? []).some(isUnfilled);
8157
+ const hasLookupRequests = (input.intent.lookupRequests?.length ?? 0) > 0;
8158
+ return {
8159
+ parseAnswers: input.intent.hasAnswers && hasCurrentFields,
8160
+ runLookup: hasLookupRequests && input.hasDocumentStore,
8161
+ answerQuestion: Boolean(input.intent.questionText) && (input.intent.primaryIntent === "question" || input.intent.primaryIntent === "mixed"),
8162
+ advanceBatch: hasCurrentFields && input.currentBatchFields.every((field) => !isUnfilled(field)),
8163
+ generateNextEmail: nextBatchNeedsAnswers
8164
+ };
8165
+ }
8166
+ function planDocumentSearchFields(unfilledFields, hasStores) {
8167
+ if (!hasStores || unfilledFields.length === 0) return [];
8168
+ const searchableFields = unfilledFields.filter(isHighValueLookupField);
8169
+ if (searchableFields.length === 0) return [];
8170
+ const lowValueRatio = 1 - searchableFields.length / unfilledFields.length;
8171
+ if (unfilledFields.length > MAX_DOCUMENT_SEARCH_FIELDS && lowValueRatio > LOW_VALUE_FIELD_RATIO_LIMIT) {
8172
+ return [];
8173
+ }
8174
+ return searchableFields.slice(0, MAX_DOCUMENT_SEARCH_FIELDS);
8175
+ }
8176
+ function isUnfilled(field) {
8177
+ return field.value === void 0 || field.value.trim() === "";
8178
+ }
8179
+ function isHighValueLookupField(field) {
8180
+ const text = `${field.section} ${field.label}`.toLowerCase();
8181
+ if (field.required) return true;
8182
+ return [
8183
+ "carrier",
8184
+ "policy",
8185
+ "premium",
8186
+ "limit",
8187
+ "deductible",
8188
+ "insured",
8189
+ "address",
8190
+ "revenue",
8191
+ "payroll",
8192
+ "effective",
8193
+ "expiration",
8194
+ "coverage",
8195
+ "class code",
8196
+ "fein",
8197
+ "entity"
8198
+ ].some((term) => text.includes(term));
8199
+ }
8200
+
7821
8201
  // src/application/coordinator.ts
7822
8202
  function createApplicationPipeline(config) {
7823
8203
  const {
@@ -7916,27 +8296,37 @@ function createApplicationPipeline(config) {
7916
8296
  state.updatedAt = Date.now();
7917
8297
  await applicationStore?.save(state);
7918
8298
  onProgress?.(`Auto-filling ${fields.length} fields...`);
7919
- const fillTasks = [];
7920
- if (backfillProvider) {
7921
- fillTasks.push(
7922
- (async () => {
7923
- try {
7924
- const priorAnswers = await backfillFromPriorAnswers(fields, backfillProvider);
7925
- for (const pa of priorAnswers) {
7926
- const field = state.fields.find((f) => f.id === pa.fieldId);
7927
- if (field && !field.value && pa.relevance > 0.8) {
7928
- field.value = pa.value;
7929
- field.source = `backfill: ${pa.source}`;
7930
- field.confidence = "high";
7931
- }
7932
- }
7933
- } catch (e) {
7934
- await log?.(`Backfill failed: ${e}`);
8299
+ let workflowPlan = planApplicationWorkflow({
8300
+ fields: state.fields,
8301
+ hasBackfillProvider: Boolean(backfillProvider),
8302
+ orgContextCount: orgContext.length,
8303
+ hasDocumentStore: Boolean(documentStore),
8304
+ hasMemoryStore: Boolean(memoryStore)
8305
+ });
8306
+ if (workflowPlan.runBackfill && backfillProvider) {
8307
+ try {
8308
+ const priorAnswers = await backfillFromPriorAnswers(state.fields, backfillProvider);
8309
+ for (const pa of priorAnswers) {
8310
+ const field = state.fields.find((f) => f.id === pa.fieldId);
8311
+ if (field && !field.value && pa.relevance > 0.8) {
8312
+ field.value = pa.value;
8313
+ field.source = `backfill: ${pa.source}`;
8314
+ field.confidence = "high";
7935
8315
  }
7936
- })()
7937
- );
8316
+ }
8317
+ } catch (e) {
8318
+ await log?.(`Backfill failed: ${e}`);
8319
+ }
7938
8320
  }
7939
- if (orgContext.length > 0) {
8321
+ workflowPlan = planApplicationWorkflow({
8322
+ fields: state.fields,
8323
+ hasBackfillProvider: false,
8324
+ orgContextCount: orgContext.length,
8325
+ hasDocumentStore: Boolean(documentStore),
8326
+ hasMemoryStore: Boolean(memoryStore)
8327
+ });
8328
+ const fillTasks = [];
8329
+ if (workflowPlan.runContextAutoFill) {
7940
8330
  fillTasks.push(
7941
8331
  limit(async () => {
7942
8332
  const unfilledFields2 = state.fields.filter((f) => !f.value);
@@ -7963,18 +8353,13 @@ function createApplicationPipeline(config) {
7963
8353
  })
7964
8354
  );
7965
8355
  }
7966
- if (documentStore && memoryStore) {
8356
+ if (workflowPlan.documentSearchFields.length > 0 && memoryStore) {
7967
8357
  fillTasks.push(
7968
8358
  (async () => {
7969
8359
  try {
7970
- const unfilledFields2 = state.fields.filter((f) => !f.value);
7971
- const searchPromises = unfilledFields2.slice(0, 10).map(
8360
+ const searchPromises = workflowPlan.documentSearchFields.map(
7972
8361
  (f) => limit(async () => {
7973
- const chunks = await memoryStore.search(f.label, { limit: 3 });
7974
- for (const chunk of chunks) {
7975
- if (!state.fields.find((sf) => sf.id === f.id)?.value) {
7976
- }
7977
- }
8362
+ await memoryStore.search(f.label, { limit: 3 });
7978
8363
  })
7979
8364
  );
7980
8365
  await Promise.all(searchPromises);
@@ -7987,8 +8372,15 @@ function createApplicationPipeline(config) {
7987
8372
  await Promise.all(fillTasks);
7988
8373
  state.updatedAt = Date.now();
7989
8374
  await applicationStore?.save(state);
7990
- const unfilledFields = state.fields.filter((f) => !f.value);
7991
- if (unfilledFields.length > 0) {
8375
+ workflowPlan = planApplicationWorkflow({
8376
+ fields: state.fields,
8377
+ hasBackfillProvider: false,
8378
+ orgContextCount: 0,
8379
+ hasDocumentStore: false,
8380
+ hasMemoryStore: false
8381
+ });
8382
+ const unfilledFields = workflowPlan.unfilledFields;
8383
+ if (workflowPlan.runBatching) {
7992
8384
  onProgress?.(`Batching ${unfilledFields.length} remaining questions...`);
7993
8385
  state.status = "batching";
7994
8386
  try {
@@ -8055,7 +8447,12 @@ function createApplicationPipeline(config) {
8055
8447
  }
8056
8448
  let fieldsFilled = 0;
8057
8449
  let responseText;
8058
- if (intent.hasAnswers) {
8450
+ let replyPlan = planReplyActions({
8451
+ intent,
8452
+ currentBatchFields,
8453
+ hasDocumentStore: Boolean(documentStore)
8454
+ });
8455
+ if (replyPlan.parseAnswers) {
8059
8456
  onProgress?.("Parsing answers...");
8060
8457
  try {
8061
8458
  const { result: parseResult, usage: parseUsage } = await parseAnswers(
@@ -8078,7 +8475,7 @@ function createApplicationPipeline(config) {
8078
8475
  await log?.(`Answer parsing failed: ${error instanceof Error ? error.message : String(error)}`);
8079
8476
  }
8080
8477
  }
8081
- if (intent.lookupRequests?.length) {
8478
+ if (replyPlan.runLookup && intent.lookupRequests?.length) {
8082
8479
  onProgress?.("Processing lookup requests...");
8083
8480
  let availableData = "";
8084
8481
  if (documentStore) {
@@ -8119,64 +8516,78 @@ function createApplicationPipeline(config) {
8119
8516
  }
8120
8517
  }
8121
8518
  }
8122
- if (intent.primaryIntent === "question" || intent.primaryIntent === "mixed") {
8123
- if (intent.questionText) {
8124
- try {
8125
- const { text, usage } = await generateText({
8126
- prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
8519
+ if (replyPlan.answerQuestion && intent.questionText) {
8520
+ try {
8521
+ const { text, usage } = await generateText({
8522
+ prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
8127
8523
 
8128
8524
  Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with the answer when you're ready and I'll fill it in."`,
8129
- maxTokens: 512,
8130
- providerOptions
8131
- });
8132
- trackUsage(usage);
8133
- responseText = text;
8134
- } catch (error) {
8135
- await log?.(`Question response generation failed: ${error instanceof Error ? error.message : String(error)}`);
8136
- responseText = `I wasn't able to generate an explanation for your question. Could you rephrase it, or just provide the answer directly?`;
8137
- }
8525
+ maxTokens: 512,
8526
+ providerOptions
8527
+ });
8528
+ trackUsage(usage);
8529
+ responseText = text;
8530
+ } catch (error) {
8531
+ await log?.(`Question response generation failed: ${error instanceof Error ? error.message : String(error)}`);
8532
+ responseText = `I wasn't able to generate an explanation for your question. Could you rephrase it, or just provide the answer directly?`;
8138
8533
  }
8139
8534
  }
8140
8535
  const currentBatchComplete = currentBatchFieldIds.every(
8141
8536
  (fid) => state.fields.find((f) => f.id === fid)?.value
8142
8537
  );
8143
- if (currentBatchComplete && state.batches) {
8144
- if (state.currentBatchIndex < state.batches.length - 1) {
8145
- state.currentBatchIndex++;
8146
- const nextBatchFieldIds = state.batches[state.currentBatchIndex];
8147
- const nextBatchFields = state.fields.filter(
8148
- (f) => nextBatchFieldIds.includes(f.id)
8149
- );
8538
+ let nextBatchIndex;
8539
+ let nextBatchFields;
8540
+ if (state.batches) {
8541
+ for (let index = state.currentBatchIndex + 1; index < state.batches.length; index++) {
8542
+ const candidateFields = state.fields.filter((f) => state.batches[index].includes(f.id));
8543
+ if (candidateFields.some((f) => !f.value)) {
8544
+ nextBatchIndex = index;
8545
+ nextBatchFields = candidateFields;
8546
+ break;
8547
+ }
8548
+ }
8549
+ }
8550
+ replyPlan = planReplyActions({
8551
+ intent,
8552
+ currentBatchFields,
8553
+ nextBatchFields,
8554
+ hasDocumentStore: Boolean(documentStore)
8555
+ });
8556
+ if (currentBatchComplete && replyPlan.advanceBatch && state.batches) {
8557
+ if (nextBatchIndex !== void 0 && nextBatchFields) {
8558
+ state.currentBatchIndex = nextBatchIndex;
8150
8559
  const filledCount = state.fields.filter((f) => f.value).length;
8151
- try {
8152
- const { text: emailText, usage: emailUsage } = await generateBatchEmail(
8153
- nextBatchFields,
8154
- state.currentBatchIndex,
8155
- state.batches.length,
8156
- {
8157
- appTitle: state.title,
8158
- totalFieldCount: state.fields.length,
8159
- filledFieldCount: filledCount,
8160
- companyName: context?.companyName
8161
- },
8162
- generateText,
8163
- providerOptions
8164
- );
8165
- trackUsage(emailUsage);
8166
- const emailReview = reviewBatchEmail(emailText, nextBatchFields);
8167
- state.qualityReport = {
8168
- ...buildApplicationQualityReport(state),
8169
- emailReview
8170
- };
8171
- if (!responseText) {
8172
- responseText = emailText;
8173
- } else {
8174
- responseText += `
8560
+ if (replyPlan.generateNextEmail) {
8561
+ try {
8562
+ const { text: emailText, usage: emailUsage } = await generateBatchEmail(
8563
+ nextBatchFields,
8564
+ state.currentBatchIndex,
8565
+ state.batches.length,
8566
+ {
8567
+ appTitle: state.title,
8568
+ totalFieldCount: state.fields.length,
8569
+ filledFieldCount: filledCount,
8570
+ companyName: context?.companyName
8571
+ },
8572
+ generateText,
8573
+ providerOptions
8574
+ );
8575
+ trackUsage(emailUsage);
8576
+ const emailReview = reviewBatchEmail(emailText, nextBatchFields);
8577
+ state.qualityReport = {
8578
+ ...buildApplicationQualityReport(state),
8579
+ emailReview
8580
+ };
8581
+ if (!responseText) {
8582
+ responseText = emailText;
8583
+ } else {
8584
+ responseText += `
8175
8585
 
8176
8586
  ${emailText}`;
8587
+ }
8588
+ } catch (error) {
8589
+ await log?.(`Batch email generation failed: ${error instanceof Error ? error.message : String(error)}`);
8177
8590
  }
8178
- } catch (error) {
8179
- await log?.(`Batch email generation failed: ${error instanceof Error ? error.message : String(error)}`);
8180
8591
  }
8181
8592
  } else {
8182
8593
  state.status = "confirming";
@@ -8990,6 +9401,42 @@ ${item.text}`).join("\n\n");
8990
9401
  return { evidence, contextSummary };
8991
9402
  }
8992
9403
 
9404
+ // src/query/workflow.ts
9405
+ function shouldRetrieveForClassification(classification) {
9406
+ return classification.requiresDocumentLookup || classification.requiresChunkSearch;
9407
+ }
9408
+ function buildInitialQueryWorkflowPlan(params) {
9409
+ const { classification, attachmentEvidence } = params;
9410
+ const actions = [];
9411
+ const shouldRetrieve = shouldRetrieveForClassification(classification);
9412
+ if (shouldRetrieve) {
9413
+ actions.push({
9414
+ type: "retrieve",
9415
+ subQuestions: classification.subQuestions,
9416
+ reason: "classification requested document or chunk lookup"
9417
+ });
9418
+ }
9419
+ actions.push({
9420
+ type: "reason",
9421
+ subQuestions: classification.subQuestions,
9422
+ reason: shouldRetrieve ? "answer with retrieved evidence and any attachment evidence" : attachmentEvidence.length > 0 ? "answer with attachment evidence only" : "answer without document retrieval"
9423
+ });
9424
+ actions.push(
9425
+ {
9426
+ type: "verify",
9427
+ reason: "check grounding and request targeted retries when needed"
9428
+ },
9429
+ {
9430
+ type: "respond",
9431
+ reason: "compose final response"
9432
+ }
9433
+ );
9434
+ return { actions, shouldRetrieve };
9435
+ }
9436
+ function getWorkflowAction(plan, type) {
9437
+ return plan.actions.find((action) => action.type === type);
9438
+ }
9439
+
8993
9440
  // src/query/coordinator.ts
8994
9441
  function createQueryAgent(config) {
8995
9442
  const {
@@ -9034,29 +9481,37 @@ function createQueryAgent(config) {
9034
9481
  onProgress?.("Classifying query...");
9035
9482
  const classification = await classify(question, conversationId, attachmentContext);
9036
9483
  await pipelineCtx.save("classify", { classification, attachmentEvidence });
9037
- onProgress?.(`Retrieving evidence for ${classification.subQuestions.length} sub-question(s)...`);
9038
9484
  const retrieverConfig = {
9039
9485
  documentStore,
9040
9486
  memoryStore,
9041
9487
  retrievalLimit,
9042
9488
  log
9043
9489
  };
9044
- const retrievalResults = await Promise.all(
9045
- classification.subQuestions.map(
9046
- (sq) => limit(() => retrieve(sq, conversationId, retrieverConfig))
9047
- )
9048
- );
9490
+ const workflowPlan = buildInitialQueryWorkflowPlan({ classification, attachmentEvidence });
9491
+ const retrieveAction = getWorkflowAction(workflowPlan, "retrieve");
9492
+ const reasonAction = getWorkflowAction(workflowPlan, "reason");
9493
+ await pipelineCtx.save("workflow", { classification, attachmentEvidence, workflowPlan });
9494
+ const retrievalResults = retrieveAction ? await (async () => {
9495
+ onProgress?.(`Retrieving evidence for ${retrieveAction.subQuestions.length} sub-question(s)...`);
9496
+ return Promise.all(
9497
+ retrieveAction.subQuestions.map(
9498
+ (sq) => limit(() => retrieve(sq, conversationId, retrieverConfig))
9499
+ )
9500
+ );
9501
+ })() : [];
9049
9502
  const allEvidence = [...attachmentEvidence, ...retrievalResults.flatMap((r) => r.evidence)];
9050
9503
  await pipelineCtx.save("retrieve", { classification, attachmentEvidence, evidence: allEvidence });
9051
9504
  onProgress?.("Reasoning over evidence...");
9052
9505
  const reasonerConfig = { generateObject, providerOptions };
9506
+ const subQuestionsToReason = reasonAction?.subQuestions ?? classification.subQuestions;
9053
9507
  const reasonResults = await Promise.allSettled(
9054
- classification.subQuestions.map(
9055
- (sq, i) => limit(async () => {
9508
+ subQuestionsToReason.map(
9509
+ (sq) => limit(async () => {
9510
+ const retrievedEvidence = retrievalResults.find((r) => r.subQuestion === sq.question)?.evidence ?? [];
9056
9511
  const { subAnswer, usage } = await reason(
9057
9512
  sq.question,
9058
9513
  sq.intent,
9059
- [...attachmentEvidence, ...retrievalResults[i].evidence],
9514
+ [...attachmentEvidence, ...retrievedEvidence],
9060
9515
  reasonerConfig
9061
9516
  );
9062
9517
  trackUsage(usage);
@@ -9070,9 +9525,9 @@ function createQueryAgent(config) {
9070
9525
  if (result.status === "fulfilled") {
9071
9526
  subAnswers.push(result.value);
9072
9527
  } else {
9073
- await log?.(`Reasoner failed for sub-question "${classification.subQuestions[i].question}": ${result.reason}`);
9528
+ await log?.(`Reasoner failed for sub-question "${subQuestionsToReason[i].question}": ${result.reason}`);
9074
9529
  subAnswers.push({
9075
- subQuestion: classification.subQuestions[i].question,
9530
+ subQuestion: subQuestionsToReason[i].question,
9076
9531
  answer: "Unable to answer this part of the question due to a processing error.",
9077
9532
  citations: [],
9078
9533
  confidence: 0,