@claritylabs/cl-sdk 0.17.0 → 0.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -438,7 +438,14 @@ function createPipelineContext(opts) {
438
438
  let latest = opts.resumeFrom;
439
439
  const completedPhases = /* @__PURE__ */ new Set();
440
440
  if (opts.resumeFrom) {
441
- completedPhases.add(opts.resumeFrom.phase);
441
+ const phaseIndex = opts.phaseOrder?.indexOf(opts.resumeFrom.phase) ?? -1;
442
+ if (phaseIndex >= 0 && opts.phaseOrder) {
443
+ for (const phase of opts.phaseOrder.slice(0, phaseIndex + 1)) {
444
+ completedPhases.add(phase);
445
+ }
446
+ } else {
447
+ completedPhases.add(opts.resumeFrom.phase);
448
+ }
442
449
  }
443
450
  return {
444
451
  id: opts.id,
@@ -1969,6 +1976,53 @@ async function runExtractor(params) {
1969
1976
  };
1970
1977
  }
1971
1978
 
1979
+ // src/extraction/memory.ts
1980
+ function isMemoryRecord(value) {
1981
+ return typeof value === "object" && value !== null && !Array.isArray(value);
1982
+ }
1983
+ function readMemoryRecord(memory, key) {
1984
+ const value = memory.get(key);
1985
+ return isMemoryRecord(value) ? value : void 0;
1986
+ }
1987
+ function readRecordValue(record, key) {
1988
+ return record?.[key];
1989
+ }
1990
+ function readRecordArray(record, key) {
1991
+ const value = readRecordValue(record, key);
1992
+ return Array.isArray(value) ? value : void 0;
1993
+ }
1994
+ function getCarrierInfo(memory) {
1995
+ return readMemoryRecord(memory, "carrier_info");
1996
+ }
1997
+ function getNamedInsured(memory) {
1998
+ return readMemoryRecord(memory, "named_insured");
1999
+ }
2000
+ function getCoverageLimits(memory) {
2001
+ return readMemoryRecord(memory, "coverage_limits");
2002
+ }
2003
+ function getCoverageLimitCoverages(memory) {
2004
+ return readRecordArray(getCoverageLimits(memory), "coverages") ?? [];
2005
+ }
2006
+ function getSectionsPayload(memory) {
2007
+ return readMemoryRecord(memory, "sections");
2008
+ }
2009
+ function getSections(memory) {
2010
+ return readRecordArray(getSectionsPayload(memory), "sections");
2011
+ }
2012
+ function getDefinitionsPayload(memory) {
2013
+ return readMemoryRecord(memory, "definitions");
2014
+ }
2015
+ function getDefinitions(memory) {
2016
+ return readRecordArray(getDefinitionsPayload(memory), "definitions");
2017
+ }
2018
+ function getCoveredReasonsPayload(memory) {
2019
+ return readMemoryRecord(memory, "covered_reasons");
2020
+ }
2021
+ function getCoveredReasons(memory) {
2022
+ const payload = getCoveredReasonsPayload(memory);
2023
+ return readRecordArray(payload, "coveredReasons") ?? readRecordArray(payload, "covered_reasons");
2024
+ }
2025
+
1972
2026
  // src/extraction/promote.ts
1973
2027
  function getDeclarationFields(doc) {
1974
2028
  const decl = doc.declarations;
@@ -1995,20 +2049,29 @@ function findRawString(raw, keys) {
1995
2049
  }
1996
2050
  return void 0;
1997
2051
  }
2052
+ function promoteRawFields(raw, mappings) {
2053
+ for (const { from, to } of mappings) {
2054
+ if (!raw[to] && raw[from]) {
2055
+ raw[to] = raw[from];
2056
+ }
2057
+ delete raw[from];
2058
+ }
2059
+ }
2060
+ function findRawOrDeclarationValue(raw, fields, lookup) {
2061
+ return (lookup.rawKey ? raw[lookup.rawKey] : void 0) || findFieldValue(fields, lookup.patterns, lookup.reject);
2062
+ }
2063
+ function promoteRawOrDeclarationString(raw, fields, targetKey, rawKeys, lookup) {
2064
+ if (raw[targetKey]) return;
2065
+ const value = findRawString(raw, rawKeys) ?? findFieldValue(fields, lookup.patterns, lookup.reject);
2066
+ if (value) raw[targetKey] = value;
2067
+ }
1998
2068
  function promoteCarrierFields(doc) {
1999
2069
  const raw = doc;
2000
- if (!raw.carrierNaicNumber && raw.naicNumber) {
2001
- raw.carrierNaicNumber = raw.naicNumber;
2002
- }
2003
- if (!raw.carrierAmBestRating && raw.amBestRating) {
2004
- raw.carrierAmBestRating = raw.amBestRating;
2005
- }
2006
- if (!raw.carrierAdmittedStatus && raw.admittedStatus) {
2007
- raw.carrierAdmittedStatus = raw.admittedStatus;
2008
- }
2009
- delete raw.naicNumber;
2010
- delete raw.amBestRating;
2011
- delete raw.admittedStatus;
2070
+ promoteRawFields(raw, [
2071
+ { from: "naicNumber", to: "carrierNaicNumber" },
2072
+ { from: "amBestRating", to: "carrierAmBestRating" },
2073
+ { from: "admittedStatus", to: "carrierAdmittedStatus" }
2074
+ ]);
2012
2075
  if (!raw.insurer && raw.carrierLegalName) {
2013
2076
  raw.insurer = {
2014
2077
  legalName: raw.carrierLegalName,
@@ -2049,12 +2112,21 @@ var BROKER_ADDRESS_PATTERNS = ["brokerAddress", "agentAddress", "producerAddress
2049
2112
  function promoteBroker(doc) {
2050
2113
  const raw = doc;
2051
2114
  const fields = getDeclarationFields(doc);
2052
- const brokerAgency = raw.brokerAgency || findFieldValue(fields, BROKER_NAME_PATTERNS);
2053
- const brokerContact = raw.brokerContactName || findFieldValue(fields, BROKER_CONTACT_PATTERNS);
2054
- const brokerLicense = raw.brokerLicenseNumber || findFieldValue(fields, BROKER_LICENSE_PATTERNS);
2055
- const brokerPhone = findFieldValue(fields, BROKER_PHONE_PATTERNS);
2056
- const brokerEmail = findFieldValue(fields, BROKER_EMAIL_PATTERNS);
2057
- const brokerAddress = findFieldValue(fields, BROKER_ADDRESS_PATTERNS);
2115
+ const brokerAgency = findRawOrDeclarationValue(raw, fields, {
2116
+ rawKey: "brokerAgency",
2117
+ patterns: BROKER_NAME_PATTERNS
2118
+ });
2119
+ const brokerContact = findRawOrDeclarationValue(raw, fields, {
2120
+ rawKey: "brokerContactName",
2121
+ patterns: BROKER_CONTACT_PATTERNS
2122
+ });
2123
+ const brokerLicense = findRawOrDeclarationValue(raw, fields, {
2124
+ rawKey: "brokerLicenseNumber",
2125
+ patterns: BROKER_LICENSE_PATTERNS
2126
+ });
2127
+ const brokerPhone = findRawOrDeclarationValue(raw, fields, { patterns: BROKER_PHONE_PATTERNS });
2128
+ const brokerEmail = findRawOrDeclarationValue(raw, fields, { patterns: BROKER_EMAIL_PATTERNS });
2129
+ const brokerAddress = findRawOrDeclarationValue(raw, fields, { patterns: BROKER_ADDRESS_PATTERNS });
2058
2130
  if (brokerAgency) raw.brokerAgency = brokerAgency;
2059
2131
  if (brokerContact) raw.brokerContactName = brokerContact;
2060
2132
  if (brokerLicense) raw.brokerLicenseNumber = brokerLicense;
@@ -2430,20 +2502,27 @@ function taxFeeKey(item) {
2430
2502
  item.type ?? ""
2431
2503
  ].join("|");
2432
2504
  }
2505
+ function taxFeeItemFromField(field) {
2506
+ const type = taxFeeType(field.field);
2507
+ return {
2508
+ name: titleizeFieldName(field.field),
2509
+ amount: absorbNegative(field.value),
2510
+ ...type ? { type } : {}
2511
+ };
2512
+ }
2433
2513
  function absorbNegative(value) {
2434
2514
  return value.replace(/^-\s*/, "").replace(/^\(\s*(.*?)\s*\)$/, "$1");
2435
2515
  }
2436
2516
  function promotePremium(doc) {
2437
2517
  const raw = doc;
2438
2518
  const fields = getDeclarationFields(doc);
2439
- if (!raw.premium) {
2440
- const premium = findRawString(raw, PREMIUM_RAW_KEYS) ?? findFieldValue(fields, PREMIUM_PATTERNS, (field) => isTaxOrFeeField(field.field));
2441
- if (premium) raw.premium = premium;
2442
- }
2443
- if (!raw.totalCost) {
2444
- const totalCost = findRawString(raw, TOTAL_COST_RAW_KEYS) ?? findFieldValue(fields, TOTAL_COST_PATTERNS);
2445
- if (totalCost) raw.totalCost = totalCost;
2446
- }
2519
+ promoteRawOrDeclarationString(raw, fields, "premium", PREMIUM_RAW_KEYS, {
2520
+ patterns: PREMIUM_PATTERNS,
2521
+ reject: (field) => isTaxOrFeeField(field.field)
2522
+ });
2523
+ promoteRawOrDeclarationString(raw, fields, "totalCost", TOTAL_COST_RAW_KEYS, {
2524
+ patterns: TOTAL_COST_PATTERNS
2525
+ });
2447
2526
  if (typeof raw.premium === "string") raw.premium = absorbNegative(raw.premium);
2448
2527
  if (typeof raw.totalCost === "string") raw.totalCost = absorbNegative(raw.totalCost);
2449
2528
  }
@@ -2461,11 +2540,7 @@ function synthesizeTaxesAndFees(doc) {
2461
2540
  if (!field.value?.trim()) continue;
2462
2541
  if (!isTaxOrFeeField(field.field)) continue;
2463
2542
  if (isTotalCostField(field.field)) continue;
2464
- const item = {
2465
- name: titleizeFieldName(field.field),
2466
- amount: absorbNegative(field.value),
2467
- ...taxFeeType(field.field) ? { type: taxFeeType(field.field) } : {}
2468
- };
2543
+ const item = taxFeeItemFromField(field);
2469
2544
  byKey.set(taxFeeKey(item), item);
2470
2545
  }
2471
2546
  if (byKey.size > 0) {
@@ -2485,43 +2560,47 @@ function promoteExtractedFields(doc) {
2485
2560
 
2486
2561
  // src/extraction/assembler.ts
2487
2562
  function assembleDocument(documentId, documentType, memory) {
2488
- const carrier = memory.get("carrier_info");
2489
- const insured = memory.get("named_insured");
2490
- const coverages = memory.get("coverage_limits");
2491
- const endorsements = memory.get("endorsements");
2492
- const exclusions = memory.get("exclusions");
2493
- const conditions = memory.get("conditions");
2494
- const premium = memory.get("premium_breakdown");
2495
- const declarations = memory.get("declarations");
2496
- const lossHistory = memory.get("loss_history");
2497
- const sections = memory.get("sections");
2498
- const supplementary = memory.get("supplementary");
2499
- const formInventory = memory.get("form_inventory");
2500
- const definitions = memory.get("definitions");
2501
- const coveredReasons = memory.get("covered_reasons");
2502
- const classify = memory.get("classify");
2563
+ const carrier = getCarrierInfo(memory);
2564
+ const insured = getNamedInsured(memory);
2565
+ const coverages = getCoverageLimits(memory);
2566
+ const endorsements = readMemoryRecord(memory, "endorsements");
2567
+ const exclusions = readMemoryRecord(memory, "exclusions");
2568
+ const conditions = readMemoryRecord(memory, "conditions");
2569
+ const premium = readMemoryRecord(memory, "premium_breakdown");
2570
+ const declarations = readMemoryRecord(memory, "declarations");
2571
+ const lossHistory = readMemoryRecord(memory, "loss_history");
2572
+ const supplementary = readMemoryRecord(memory, "supplementary");
2573
+ const formInventory = readMemoryRecord(memory, "form_inventory");
2574
+ const classify = readMemoryRecord(memory, "classify");
2575
+ const lossPayees = readRecordArray(insured, "lossPayees");
2576
+ const mortgageHolders = readRecordArray(insured, "mortgageHolders");
2503
2577
  const base = {
2504
2578
  id: documentId,
2505
- carrier: carrier?.carrierName ?? "Unknown",
2506
- insuredName: insured?.insuredName ?? "Unknown",
2507
- coverages: coverages?.coverages ?? [],
2508
- policyTypes: classify?.policyTypes,
2579
+ carrier: readRecordValue(carrier, "carrierName") ?? "Unknown",
2580
+ insuredName: readRecordValue(insured, "insuredName") ?? "Unknown",
2581
+ coverages: getCoverageLimitCoverages(memory),
2582
+ policyTypes: readRecordValue(classify, "policyTypes"),
2509
2583
  ...sanitizeNulls(carrier ?? {}),
2510
2584
  ...sanitizeNulls(insured ?? {}),
2511
2585
  // Map named_insured extractor's loss payees/mortgage holders to EndorsementParty shape
2512
- ...Array.isArray(insured?.lossPayees) && insured.lossPayees.length > 0 ? { lossPayees: insured.lossPayees.map((lp) => ({ ...lp, role: "loss_payee" })) } : {},
2513
- ...Array.isArray(insured?.mortgageHolders) && insured.mortgageHolders.length > 0 ? { mortgageHolders: insured.mortgageHolders.map((mh) => ({ ...mh, role: "mortgage_holder" })) } : {},
2586
+ ...lossPayees && lossPayees.length > 0 ? { lossPayees: lossPayees.map((lp) => ({ ...lp, role: "loss_payee" })) } : {},
2587
+ ...mortgageHolders && mortgageHolders.length > 0 ? {
2588
+ mortgageHolders: mortgageHolders.map((mh) => ({
2589
+ ...mh,
2590
+ role: "mortgage_holder"
2591
+ }))
2592
+ } : {},
2514
2593
  ...sanitizeNulls(coverages ?? {}),
2515
2594
  ...sanitizeNulls(premium ?? {}),
2516
2595
  ...sanitizeNulls(supplementary ?? {}),
2517
- supplementaryFacts: supplementary?.auxiliaryFacts,
2518
- endorsements: endorsements?.endorsements,
2519
- exclusions: exclusions?.exclusions,
2520
- conditions: conditions?.conditions,
2521
- sections: sections?.sections,
2522
- formInventory: formInventory?.forms,
2523
- definitions: definitions?.definitions,
2524
- coveredReasons: coveredReasons?.coveredReasons ?? coveredReasons?.covered_reasons,
2596
+ supplementaryFacts: readRecordValue(supplementary, "auxiliaryFacts"),
2597
+ endorsements: readRecordValue(endorsements, "endorsements"),
2598
+ exclusions: readRecordValue(exclusions, "exclusions"),
2599
+ conditions: readRecordValue(conditions, "conditions"),
2600
+ sections: getSections(memory),
2601
+ formInventory: readRecordValue(formInventory, "forms"),
2602
+ definitions: getDefinitions(memory),
2603
+ coveredReasons: getCoveredReasons(memory),
2525
2604
  declarations: declarations ? sanitizeNulls(declarations) : void 0,
2526
2605
  ...sanitizeNulls(lossHistory ?? {})
2527
2606
  };
@@ -2530,21 +2609,21 @@ function assembleDocument(documentId, documentType, memory) {
2530
2609
  doc = {
2531
2610
  ...base,
2532
2611
  type: "policy",
2533
- policyNumber: carrier?.policyNumber ?? insured?.policyNumber ?? "Unknown",
2534
- effectiveDate: carrier?.effectiveDate ?? insured?.effectiveDate ?? "Unknown",
2535
- expirationDate: carrier?.expirationDate,
2536
- policyTermType: carrier?.policyTermType
2612
+ policyNumber: readRecordValue(carrier, "policyNumber") ?? readRecordValue(insured, "policyNumber") ?? "Unknown",
2613
+ effectiveDate: readRecordValue(carrier, "effectiveDate") ?? readRecordValue(insured, "effectiveDate") ?? "Unknown",
2614
+ expirationDate: readRecordValue(carrier, "expirationDate"),
2615
+ policyTermType: readRecordValue(carrier, "policyTermType")
2537
2616
  };
2538
2617
  } else {
2539
2618
  doc = {
2540
2619
  ...base,
2541
2620
  type: "quote",
2542
- quoteNumber: carrier?.quoteNumber ?? "Unknown",
2543
- proposedEffectiveDate: carrier?.proposedEffectiveDate,
2544
- proposedExpirationDate: carrier?.proposedExpirationDate,
2545
- subjectivities: coverages?.subjectivities,
2546
- underwritingConditions: coverages?.underwritingConditions,
2547
- premiumBreakdown: premium?.premiumBreakdown
2621
+ quoteNumber: readRecordValue(carrier, "quoteNumber") ?? "Unknown",
2622
+ proposedEffectiveDate: readRecordValue(carrier, "proposedEffectiveDate"),
2623
+ proposedExpirationDate: readRecordValue(carrier, "proposedExpirationDate"),
2624
+ subjectivities: readRecordValue(coverages, "subjectivities"),
2625
+ underwritingConditions: readRecordValue(coverages, "underwritingConditions"),
2626
+ premiumBreakdown: readRecordValue(premium, "premiumBreakdown")
2548
2627
  };
2549
2628
  }
2550
2629
  promoteExtractedFields(doc);
@@ -2646,6 +2725,23 @@ ${block}`;
2646
2725
  }
2647
2726
 
2648
2727
  // src/extraction/formatter.ts
2728
+ var LONG_CONTENT_THRESHOLD = 1200;
2729
+ function shouldFormatContent(text) {
2730
+ const trimmed = text.trim();
2731
+ if (trimmed.length === 0) return false;
2732
+ if (trimmed.length >= LONG_CONTENT_THRESHOLD) return true;
2733
+ if (/```|~~~|<br\s*\/?>/i.test(trimmed)) return true;
2734
+ if (/(^|\s)(\*\*|__|`)/.test(trimmed)) return true;
2735
+ if (/!?\[[^\]]+\]\([^)]+\)/.test(trimmed)) return true;
2736
+ if (/^\s{0,3}#{1,6}\s*\S/m.test(trimmed)) return true;
2737
+ if (/^\s{0,6}(?:[-*+]|\d+[.)])\s+\S/m.test(trimmed)) return true;
2738
+ if (/\t|[^\S\r\n]{3,}|\n{3,}|[ \t]+$/m.test(text)) return true;
2739
+ const lines = trimmed.split(/\r?\n/).map((line) => line.trim()).filter(Boolean);
2740
+ if (lines.some((line) => (line.match(/\|/g)?.length ?? 0) >= 2)) return true;
2741
+ const spaceAlignedRows = lines.filter((line) => /\S\s{2,}\S\s{2,}\S/.test(line));
2742
+ if (spaceAlignedRows.length >= 2) return true;
2743
+ return false;
2744
+ }
2649
2745
  function collectContentFields(doc) {
2650
2746
  const entries = [];
2651
2747
  let id = 0;
@@ -2749,7 +2845,7 @@ function applyFormattedContent(doc, entries, formatted) {
2749
2845
  }
2750
2846
  var MAX_ENTRIES_PER_BATCH = 20;
2751
2847
  async function formatDocumentContent(doc, generateText, options) {
2752
- const entries = collectContentFields(doc);
2848
+ const entries = collectContentFields(doc).filter((entry) => shouldFormatContent(entry.text));
2753
2849
  const totalUsage = { inputTokens: 0, outputTokens: 0 };
2754
2850
  if (entries.length === 0) {
2755
2851
  return { document: doc, usage: totalUsage };
@@ -2826,11 +2922,22 @@ function chunkDocument(doc) {
2826
2922
  if (policyTypesStr) base.policyTypes = policyTypesStr;
2827
2923
  return base;
2828
2924
  }
2829
- chunks.push({
2830
- id: `${docId}:carrier_info:0`,
2831
- documentId: docId,
2832
- type: "carrier_info",
2833
- text: [
2925
+ function lines(values) {
2926
+ return values.filter(Boolean).join("\n");
2927
+ }
2928
+ function pushChunk(idSuffix, type, text, metadata) {
2929
+ chunks.push({
2930
+ id: `${docId}:${idSuffix}`,
2931
+ documentId: docId,
2932
+ type,
2933
+ text,
2934
+ metadata: stringMetadata(metadata)
2935
+ });
2936
+ }
2937
+ pushChunk(
2938
+ "carrier_info:0",
2939
+ "carrier_info",
2940
+ lines([
2834
2941
  `Carrier: ${doc.carrier}`,
2835
2942
  doc.carrierLegalName ? `Legal Name: ${doc.carrierLegalName}` : null,
2836
2943
  doc.carrierNaicNumber ? `NAIC: ${doc.carrierNaicNumber}` : null,
@@ -2847,94 +2954,83 @@ function chunkDocument(doc) {
2847
2954
  doc.isPackage != null ? `Package: ${doc.isPackage ? "Yes" : "No"}` : null,
2848
2955
  doc.security ? `Security: ${doc.security}` : null,
2849
2956
  doc.policyTypes?.length ? `Policy Types: ${doc.policyTypes.join(", ")}` : null
2850
- ].filter(Boolean).join("\n"),
2851
- metadata: stringMetadata({ carrier: doc.carrier, documentType: doc.type })
2852
- });
2957
+ ]),
2958
+ { carrier: doc.carrier, documentType: doc.type }
2959
+ );
2853
2960
  if (doc.summary) {
2854
- chunks.push({
2855
- id: `${docId}:declaration:summary`,
2856
- documentId: docId,
2857
- type: "declaration",
2858
- text: `Policy Summary: ${doc.summary}`,
2859
- metadata: stringMetadata({ documentType: doc.type })
2860
- });
2961
+ pushChunk("declaration:summary", "declaration", `Policy Summary: ${doc.summary}`, { documentType: doc.type });
2861
2962
  }
2862
2963
  if (doc.type === "policy") {
2863
2964
  const pol = doc;
2864
- chunks.push({
2865
- id: `${docId}:declaration:policy_details`,
2866
- documentId: docId,
2867
- type: "declaration",
2868
- text: [
2965
+ pushChunk(
2966
+ "declaration:policy_details",
2967
+ "declaration",
2968
+ lines([
2869
2969
  `Policy Number: ${pol.policyNumber}`,
2870
2970
  `Effective Date: ${pol.effectiveDate}`,
2871
2971
  pol.expirationDate ? `Expiration Date: ${pol.expirationDate}` : null,
2872
2972
  pol.policyTermType ? `Term Type: ${pol.policyTermType}` : null,
2873
2973
  pol.effectiveTime ? `Effective Time: ${pol.effectiveTime}` : null,
2874
2974
  pol.nextReviewDate ? `Next Review Date: ${pol.nextReviewDate}` : null
2875
- ].filter(Boolean).join("\n"),
2876
- metadata: stringMetadata({
2975
+ ]),
2976
+ {
2877
2977
  policyNumber: pol.policyNumber,
2878
2978
  effectiveDate: pol.effectiveDate,
2879
2979
  expirationDate: pol.expirationDate,
2880
2980
  documentType: doc.type
2881
- })
2882
- });
2981
+ }
2982
+ );
2883
2983
  } else {
2884
2984
  const quote = doc;
2885
- chunks.push({
2886
- id: `${docId}:declaration:quote_details`,
2887
- documentId: docId,
2888
- type: "declaration",
2889
- text: [
2985
+ pushChunk(
2986
+ "declaration:quote_details",
2987
+ "declaration",
2988
+ lines([
2890
2989
  `Quote Number: ${quote.quoteNumber}`,
2891
2990
  quote.proposedEffectiveDate ? `Proposed Effective Date: ${quote.proposedEffectiveDate}` : null,
2892
2991
  quote.proposedExpirationDate ? `Proposed Expiration Date: ${quote.proposedExpirationDate}` : null,
2893
2992
  quote.quoteExpirationDate ? `Quote Expiration Date: ${quote.quoteExpirationDate}` : null
2894
- ].filter(Boolean).join("\n"),
2895
- metadata: stringMetadata({
2993
+ ]),
2994
+ {
2896
2995
  quoteNumber: quote.quoteNumber,
2897
2996
  documentType: doc.type
2898
- })
2899
- });
2997
+ }
2998
+ );
2900
2999
  }
2901
3000
  if (doc.insurer) {
2902
- chunks.push({
2903
- id: `${docId}:party:insurer`,
2904
- documentId: docId,
2905
- type: "party",
2906
- text: [
3001
+ pushChunk(
3002
+ "party:insurer",
3003
+ "party",
3004
+ lines([
2907
3005
  `Insurer: ${doc.insurer.legalName}`,
2908
3006
  doc.insurer.naicNumber ? `NAIC: ${doc.insurer.naicNumber}` : null,
2909
3007
  doc.insurer.amBestRating ? `AM Best Rating: ${doc.insurer.amBestRating}` : null,
2910
3008
  doc.insurer.amBestNumber ? `AM Best Number: ${doc.insurer.amBestNumber}` : null,
2911
3009
  doc.insurer.admittedStatus ? `Admitted Status: ${doc.insurer.admittedStatus}` : null,
2912
3010
  doc.insurer.stateOfDomicile ? `State of Domicile: ${doc.insurer.stateOfDomicile}` : null
2913
- ].filter(Boolean).join("\n"),
2914
- metadata: stringMetadata({ partyRole: "insurer", partyName: doc.insurer.legalName, documentType: doc.type })
2915
- });
3011
+ ]),
3012
+ { partyRole: "insurer", partyName: doc.insurer.legalName, documentType: doc.type }
3013
+ );
2916
3014
  }
2917
3015
  if (doc.producer) {
2918
- chunks.push({
2919
- id: `${docId}:party:producer`,
2920
- documentId: docId,
2921
- type: "party",
2922
- text: [
3016
+ pushChunk(
3017
+ "party:producer",
3018
+ "party",
3019
+ lines([
2923
3020
  `Producer/Broker: ${doc.producer.agencyName}`,
2924
3021
  doc.producer.contactName ? `Contact: ${doc.producer.contactName}` : null,
2925
3022
  doc.producer.licenseNumber ? `License: ${doc.producer.licenseNumber}` : null,
2926
3023
  doc.producer.phone ? `Phone: ${doc.producer.phone}` : null,
2927
3024
  doc.producer.email ? `Email: ${doc.producer.email}` : null,
2928
3025
  doc.producer.address ? `Address: ${formatAddress(doc.producer.address)}` : null
2929
- ].filter(Boolean).join("\n"),
2930
- metadata: stringMetadata({ partyRole: "producer", partyName: doc.producer.agencyName, documentType: doc.type })
2931
- });
3026
+ ]),
3027
+ { partyRole: "producer", partyName: doc.producer.agencyName, documentType: doc.type }
3028
+ );
2932
3029
  }
2933
- chunks.push({
2934
- id: `${docId}:named_insured:0`,
2935
- documentId: docId,
2936
- type: "named_insured",
2937
- text: [
3030
+ pushChunk(
3031
+ "named_insured:0",
3032
+ "named_insured",
3033
+ lines([
2938
3034
  `Insured: ${doc.insuredName}`,
2939
3035
  doc.insuredDba ? `DBA: ${doc.insuredDba}` : null,
2940
3036
  doc.insuredEntityType ? `Entity Type: ${doc.insuredEntityType}` : null,
@@ -2942,36 +3038,34 @@ function chunkDocument(doc) {
2942
3038
  doc.insuredSicCode ? `SIC: ${doc.insuredSicCode}` : null,
2943
3039
  doc.insuredNaicsCode ? `NAICS: ${doc.insuredNaicsCode}` : null,
2944
3040
  doc.insuredAddress ? `Address: ${formatAddress(doc.insuredAddress)}` : null
2945
- ].filter(Boolean).join("\n"),
2946
- metadata: stringMetadata({ insuredName: doc.insuredName, documentType: doc.type })
2947
- });
3041
+ ]),
3042
+ { insuredName: doc.insuredName, documentType: doc.type }
3043
+ );
2948
3044
  doc.additionalNamedInsureds?.forEach((insured, i) => {
2949
- chunks.push({
2950
- id: `${docId}:named_insured:${i + 1}`,
2951
- documentId: docId,
2952
- type: "named_insured",
2953
- text: [
3045
+ pushChunk(
3046
+ `named_insured:${i + 1}`,
3047
+ "named_insured",
3048
+ lines([
2954
3049
  `Additional Named Insured: ${insured.name}`,
2955
3050
  insured.address ? `Address: ${formatAddress(insured.address)}` : null,
2956
3051
  insured.relationship ? `Relationship: ${insured.relationship}` : null
2957
- ].filter(Boolean).join("\n"),
2958
- metadata: stringMetadata({ insuredName: insured.name, role: "additional_named_insured", documentType: doc.type })
2959
- });
3052
+ ]),
3053
+ { insuredName: insured.name, role: "additional_named_insured", documentType: doc.type }
3054
+ );
2960
3055
  });
2961
3056
  doc.coverages.forEach((cov, i) => {
2962
- chunks.push({
2963
- id: `${docId}:coverage:${i}`,
2964
- documentId: docId,
2965
- type: "coverage",
2966
- text: [
3057
+ pushChunk(
3058
+ `coverage:${i}`,
3059
+ "coverage",
3060
+ lines([
2967
3061
  `Coverage: ${cov.name}`,
2968
3062
  `Limit: ${cov.limit}`,
2969
3063
  cov.limitValueType ? `Limit Type: ${cov.limitValueType}` : null,
2970
3064
  cov.deductible ? `Deductible: ${cov.deductible}` : null,
2971
3065
  cov.deductibleValueType ? `Deductible Type: ${cov.deductibleValueType}` : null,
2972
3066
  cov.originalContent ? `Source: ${cov.originalContent}` : null
2973
- ].filter(Boolean).join("\n"),
2974
- metadata: stringMetadata({
3067
+ ]),
3068
+ {
2975
3069
  coverageName: cov.name,
2976
3070
  limit: cov.limit,
2977
3071
  limitValueType: cov.limitValueType,
@@ -2981,15 +3075,14 @@ function chunkDocument(doc) {
2981
3075
  pageNumber: cov.pageNumber,
2982
3076
  sectionRef: cov.sectionRef,
2983
3077
  documentType: doc.type
2984
- })
2985
- });
3078
+ }
3079
+ );
2986
3080
  });
2987
3081
  doc.enrichedCoverages?.forEach((cov, i) => {
2988
- chunks.push({
2989
- id: `${docId}:coverage:enriched:${i}`,
2990
- documentId: docId,
2991
- type: "coverage",
2992
- text: [
3082
+ pushChunk(
3083
+ `coverage:enriched:${i}`,
3084
+ "coverage",
3085
+ lines([
2993
3086
  `Coverage: ${cov.name}`,
2994
3087
  cov.coverageCode ? `Code: ${cov.coverageCode}` : null,
2995
3088
  `Limit: ${cov.limit}`,
@@ -3006,8 +3099,8 @@ function chunkDocument(doc) {
3006
3099
  `Included: ${cov.included ? "Yes" : "No"}`,
3007
3100
  cov.premium ? `Premium: ${cov.premium}` : null,
3008
3101
  cov.originalContent ? `Source: ${cov.originalContent}` : null
3009
- ].filter(Boolean).join("\n"),
3010
- metadata: stringMetadata({
3102
+ ]),
3103
+ {
3011
3104
  coverageName: cov.name,
3012
3105
  coverageCode: cov.coverageCode,
3013
3106
  limit: cov.limit,
@@ -3016,8 +3109,8 @@ function chunkDocument(doc) {
3016
3109
  pageNumber: cov.pageNumber,
3017
3110
  included: cov.included,
3018
3111
  documentType: doc.type
3019
- })
3020
- });
3112
+ }
3113
+ );
3021
3114
  });
3022
3115
  if (doc.limits) {
3023
3116
  const limitLines = ["Limit Schedule"];
@@ -3041,39 +3134,31 @@ function chunkDocument(doc) {
3041
3134
  limitLines.push(`Employers Liability \u2014 Each Accident: ${lim.employersLiability.eachAccident}, Disease Policy Limit: ${lim.employersLiability.diseasePolicyLimit}, Disease Each Employee: ${lim.employersLiability.diseaseEachEmployee}`);
3042
3135
  }
3043
3136
  if (lim.defenseCostTreatment) limitLines.push(`Defense Cost Treatment: ${lim.defenseCostTreatment}`);
3044
- chunks.push({
3045
- id: `${docId}:coverage:limit_schedule`,
3046
- documentId: docId,
3047
- type: "coverage",
3048
- text: limitLines.join("\n"),
3049
- metadata: stringMetadata({ coverageName: "limit_schedule", documentType: doc.type })
3050
- });
3137
+ pushChunk("coverage:limit_schedule", "coverage", limitLines.join("\n"), { coverageName: "limit_schedule", documentType: doc.type });
3051
3138
  lim.sublimits?.forEach((sub, i) => {
3052
- chunks.push({
3053
- id: `${docId}:coverage:sublimit:${i}`,
3054
- documentId: docId,
3055
- type: "coverage",
3056
- text: [
3139
+ pushChunk(
3140
+ `coverage:sublimit:${i}`,
3141
+ "coverage",
3142
+ lines([
3057
3143
  `Sublimit: ${sub.name}`,
3058
3144
  `Limit: ${sub.limit}`,
3059
3145
  sub.appliesTo ? `Applies To: ${sub.appliesTo}` : null,
3060
3146
  sub.deductible ? `Deductible: ${sub.deductible}` : null
3061
- ].filter(Boolean).join("\n"),
3062
- metadata: stringMetadata({ coverageName: sub.name, limit: sub.limit, documentType: doc.type })
3063
- });
3147
+ ]),
3148
+ { coverageName: sub.name, limit: sub.limit, documentType: doc.type }
3149
+ );
3064
3150
  });
3065
3151
  lim.sharedLimits?.forEach((sl, i) => {
3066
- chunks.push({
3067
- id: `${docId}:coverage:shared_limit:${i}`,
3068
- documentId: docId,
3069
- type: "coverage",
3070
- text: [
3152
+ pushChunk(
3153
+ `coverage:shared_limit:${i}`,
3154
+ "coverage",
3155
+ [
3071
3156
  `Shared Limit: ${sl.description}`,
3072
3157
  `Limit: ${sl.limit}`,
3073
3158
  `Coverage Parts: ${sl.coverageParts.join(", ")}`
3074
3159
  ].join("\n"),
3075
- metadata: stringMetadata({ coverageName: sl.description, limit: sl.limit, documentType: doc.type })
3076
- });
3160
+ { coverageName: sl.description, limit: sl.limit, documentType: doc.type }
3161
+ );
3077
3162
  });
3078
3163
  }
3079
3164
  if (doc.deductibles) {
@@ -3087,12 +3172,9 @@ function chunkDocument(doc) {
3087
3172
  if (ded.waitingPeriod) dedLines.push(`Waiting Period: ${ded.waitingPeriod}`);
3088
3173
  if (ded.appliesTo) dedLines.push(`Applies To: ${ded.appliesTo}`);
3089
3174
  if (dedLines.length > 1) {
3090
- chunks.push({
3091
- id: `${docId}:coverage:deductible_schedule`,
3092
- documentId: docId,
3093
- type: "coverage",
3094
- text: dedLines.join("\n"),
3095
- metadata: stringMetadata({ coverageName: "deductible_schedule", documentType: doc.type })
3175
+ pushChunk("coverage:deductible_schedule", "coverage", dedLines.join("\n"), {
3176
+ coverageName: "deductible_schedule",
3177
+ documentType: doc.type
3096
3178
  });
3097
3179
  }
3098
3180
  }
@@ -3104,99 +3186,90 @@ function chunkDocument(doc) {
3104
3186
  doc.extendedReportingPeriod?.supplementalPremium ? `Extended Reporting Period Premium: ${doc.extendedReportingPeriod.supplementalPremium}` : null
3105
3187
  ].filter(Boolean);
3106
3188
  if (claimsMadeLines.length > 0) {
3107
- chunks.push({
3108
- id: `${docId}:coverage:claims_made_details`,
3109
- documentId: docId,
3110
- type: "coverage",
3111
- text: claimsMadeLines.join("\n"),
3112
- metadata: stringMetadata({ coverageName: "claims_made_details", documentType: doc.type })
3189
+ pushChunk("coverage:claims_made_details", "coverage", claimsMadeLines.join("\n"), {
3190
+ coverageName: "claims_made_details",
3191
+ documentType: doc.type
3113
3192
  });
3114
3193
  }
3115
3194
  doc.formInventory?.forEach((form, i) => {
3116
- chunks.push({
3117
- id: `${docId}:declaration:form:${i}`,
3118
- documentId: docId,
3119
- type: "declaration",
3120
- text: [
3195
+ pushChunk(
3196
+ `declaration:form:${i}`,
3197
+ "declaration",
3198
+ lines([
3121
3199
  `Form: ${form.formNumber}`,
3122
3200
  form.title ? `Title: ${form.title}` : null,
3123
3201
  `Type: ${form.formType}`,
3124
3202
  form.editionDate ? `Edition: ${form.editionDate}` : null,
3125
3203
  form.pageStart ? `Pages: ${form.pageStart}${form.pageEnd ? `-${form.pageEnd}` : ""}` : null
3126
- ].filter(Boolean).join("\n"),
3127
- metadata: stringMetadata({
3204
+ ]),
3205
+ {
3128
3206
  formNumber: form.formNumber,
3129
3207
  formType: form.formType,
3130
3208
  documentType: doc.type
3131
- })
3132
- });
3209
+ }
3210
+ );
3133
3211
  });
3134
3212
  doc.endorsements?.forEach((end, i) => {
3135
- chunks.push({
3136
- id: `${docId}:endorsement:${i}`,
3137
- documentId: docId,
3138
- type: "endorsement",
3139
- text: `Endorsement: ${end.title}
3213
+ pushChunk(
3214
+ `endorsement:${i}`,
3215
+ "endorsement",
3216
+ `Endorsement: ${end.title}
3140
3217
  ${end.content}`.trim(),
3141
- metadata: stringMetadata({
3218
+ {
3142
3219
  endorsementType: end.endorsementType,
3143
3220
  formNumber: end.formNumber,
3144
3221
  pageStart: end.pageStart,
3145
3222
  pageEnd: end.pageEnd,
3146
3223
  documentType: doc.type
3147
- })
3148
- });
3224
+ }
3225
+ );
3149
3226
  });
3150
3227
  doc.exclusions?.forEach((exc, i) => {
3151
- chunks.push({
3152
- id: `${docId}:exclusion:${i}`,
3153
- documentId: docId,
3154
- type: "exclusion",
3155
- text: `Exclusion: ${exc.name}
3156
- ${exc.content}`.trim(),
3157
- metadata: stringMetadata({ formNumber: exc.formNumber, pageNumber: exc.pageNumber, documentType: doc.type })
3228
+ pushChunk(`exclusion:${i}`, "exclusion", `Exclusion: ${exc.name}
3229
+ ${exc.content}`.trim(), {
3230
+ formNumber: exc.formNumber,
3231
+ pageNumber: exc.pageNumber,
3232
+ documentType: doc.type
3158
3233
  });
3159
3234
  });
3160
3235
  doc.conditions?.forEach((cond, i) => {
3161
- chunks.push({
3162
- id: `${docId}:condition:${i}`,
3163
- documentId: docId,
3164
- type: "condition",
3165
- text: [
3236
+ pushChunk(
3237
+ `condition:${i}`,
3238
+ "condition",
3239
+ [
3166
3240
  `Condition: ${cond.name}`,
3167
3241
  `Type: ${cond.conditionType}`,
3168
3242
  cond.content,
3169
3243
  ...cond.keyValues?.map((kv) => `${kv.key}: ${kv.value}`) ?? []
3170
3244
  ].join("\n"),
3171
- metadata: stringMetadata({
3245
+ {
3172
3246
  conditionName: cond.name,
3173
3247
  conditionType: cond.conditionType,
3174
3248
  pageNumber: cond.pageNumber,
3175
3249
  documentType: doc.type
3176
- })
3177
- });
3250
+ }
3251
+ );
3178
3252
  });
3179
3253
  asRecordArray(extendedDoc.definitions).forEach((definition, i) => {
3180
3254
  const term = firstString(definition, ["term", "name", "title"]) ?? `Definition ${i + 1}`;
3181
3255
  const body = firstString(definition, ["definition", "content", "text", "meaning"]);
3182
- chunks.push({
3183
- id: `${docId}:definition:${i}`,
3184
- documentId: docId,
3185
- type: "definition",
3186
- text: [
3256
+ pushChunk(
3257
+ `definition:${i}`,
3258
+ "definition",
3259
+ lines([
3187
3260
  `Definition: ${term}`,
3188
3261
  body,
3189
3262
  firstString(definition, ["originalContent", "source"]) ? `Source: ${firstString(definition, ["originalContent", "source"])}` : null
3190
- ].filter(Boolean).join("\n"),
3191
- metadata: stringMetadata({
3263
+ ]),
3264
+ {
3192
3265
  term,
3193
3266
  formNumber: firstString(definition, ["formNumber"]),
3194
3267
  formTitle: firstString(definition, ["formTitle"]),
3195
3268
  pageNumber: typeof definition.pageNumber === "number" ? definition.pageNumber : void 0,
3196
3269
  sectionRef: firstString(definition, ["sectionRef", "sectionTitle"]),
3197
3270
  documentType: doc.type
3198
- })
3199
- });
3271
+ }
3272
+ );
3200
3273
  });
3201
3274
  const coveredReasons = asRecordArray(extendedDoc.coveredReasons ?? extendedDoc.covered_reasons);
3202
3275
  coveredReasons.forEach((coveredReason, i) => {
@@ -3204,18 +3277,17 @@ ${exc.content}`.trim(),
3204
3277
  const coverageName = firstString(coveredReason, ["coverageName", "coverage", "coveragePart"]);
3205
3278
  const reasonNumber = firstString(coveredReason, ["reasonNumber", "number"]);
3206
3279
  const body = firstString(coveredReason, ["content", "description", "text", "coverageGrant"]);
3207
- chunks.push({
3208
- id: `${docId}:covered_reason:${i}`,
3209
- documentId: docId,
3210
- type: "covered_reason",
3211
- text: [
3280
+ pushChunk(
3281
+ `covered_reason:${i}`,
3282
+ "covered_reason",
3283
+ lines([
3212
3284
  coverageName ? `Coverage: ${coverageName}` : null,
3213
3285
  reasonNumber ? `Reason Number: ${reasonNumber}` : null,
3214
3286
  `Covered Reason: ${title}`,
3215
3287
  body,
3216
3288
  firstString(coveredReason, ["originalContent", "source"]) ? `Source: ${firstString(coveredReason, ["originalContent", "source"])}` : null
3217
- ].filter(Boolean).join("\n"),
3218
- metadata: stringMetadata({
3289
+ ]),
3290
+ {
3219
3291
  coverageName,
3220
3292
  reasonNumber,
3221
3293
  title,
@@ -3224,21 +3296,20 @@ ${exc.content}`.trim(),
3224
3296
  pageNumber: typeof coveredReason.pageNumber === "number" ? coveredReason.pageNumber : void 0,
3225
3297
  sectionRef: firstString(coveredReason, ["sectionRef", "sectionTitle"]),
3226
3298
  documentType: doc.type
3227
- })
3228
- });
3299
+ }
3300
+ );
3229
3301
  const conditions = Array.isArray(coveredReason.conditions) ? coveredReason.conditions.filter((condition) => typeof condition === "string" && condition.trim().length > 0) : [];
3230
3302
  conditions.forEach((condition, conditionIndex) => {
3231
- chunks.push({
3232
- id: `${docId}:covered_reason:${i}:condition:${conditionIndex}`,
3233
- documentId: docId,
3234
- type: "covered_reason",
3235
- text: [
3303
+ pushChunk(
3304
+ `covered_reason:${i}:condition:${conditionIndex}`,
3305
+ "covered_reason",
3306
+ lines([
3236
3307
  coverageName ? `Coverage: ${coverageName}` : null,
3237
3308
  reasonNumber ? `Reason Number: ${reasonNumber}` : null,
3238
3309
  `Covered Reason Condition: ${title}`,
3239
3310
  condition
3240
- ].filter(Boolean).join("\n"),
3241
- metadata: stringMetadata({
3311
+ ]),
3312
+ {
3242
3313
  coverageName,
3243
3314
  reasonNumber,
3244
3315
  title,
@@ -3248,8 +3319,8 @@ ${exc.content}`.trim(),
3248
3319
  pageNumber: typeof coveredReason.pageNumber === "number" ? coveredReason.pageNumber : void 0,
3249
3320
  sectionRef: firstString(coveredReason, ["sectionRef", "sectionTitle"]),
3250
3321
  documentType: doc.type
3251
- })
3252
- });
3322
+ }
3323
+ );
3253
3324
  });
3254
3325
  });
3255
3326
  if (doc.declarations) {
@@ -3264,50 +3335,42 @@ ${exc.content}`.trim(),
3264
3335
  const declMeta = { documentType: doc.type };
3265
3336
  if (typeof decl.formType === "string") declMeta.formType = decl.formType;
3266
3337
  if (typeof decl.line === "string") declMeta.declarationLine = decl.line;
3267
- chunks.push({
3268
- id: `${docId}:declaration:0`,
3269
- documentId: docId,
3270
- type: "declaration",
3271
- text: `Declarations
3272
- ${declLines.join("\n")}`,
3273
- metadata: stringMetadata(declMeta)
3274
- });
3338
+ pushChunk("declaration:0", "declaration", `Declarations
3339
+ ${declLines.join("\n")}`, declMeta);
3275
3340
  }
3276
3341
  }
3277
3342
  doc.sections?.forEach((sec, i) => {
3278
3343
  const hasSubsections = sec.subsections && sec.subsections.length > 0;
3279
3344
  const contentLength = sec.content.length;
3280
3345
  if (hasSubsections) {
3281
- chunks.push({
3282
- id: `${docId}:section:${i}`,
3283
- documentId: docId,
3284
- type: "section",
3285
- text: `Section: ${sec.title}
3346
+ pushChunk(
3347
+ `section:${i}`,
3348
+ "section",
3349
+ `Section: ${sec.title}
3286
3350
  ${sec.content}`,
3287
- metadata: stringMetadata({
3351
+ {
3288
3352
  sectionType: sec.type,
3289
3353
  sectionNumber: sec.sectionNumber,
3290
3354
  pageStart: sec.pageStart,
3291
3355
  pageEnd: sec.pageEnd,
3292
3356
  documentType: doc.type,
3293
3357
  hasSubsections: "true"
3294
- })
3295
- });
3358
+ }
3359
+ );
3296
3360
  sec.subsections.forEach((sub, j) => {
3297
- chunks.push({
3298
- id: `${docId}:section:${i}:sub:${j}`,
3299
- documentId: docId,
3300
- type: "section",
3301
- text: `${sec.title} > ${sub.title}
3361
+ pushChunk(
3362
+ `section:${i}:sub:${j}`,
3363
+ "section",
3364
+ `${sec.title} > ${sub.title}
3302
3365
  ${sub.content}`,
3303
- metadata: stringMetadata({
3366
+ {
3304
3367
  sectionType: sec.type,
3305
3368
  parentSection: sec.title,
3306
3369
  sectionNumber: sub.sectionNumber,
3307
3370
  pageNumber: sub.pageNumber,
3308
3371
  documentType: doc.type
3309
- })
3310
- });
3372
+ }
3373
+ );
3311
3374
  });
3312
3375
  } else if (contentLength > 2e3) {
3313
3376
  const paragraphs = sec.content.split(/\n\n+/);
@@ -3315,58 +3378,55 @@ ${sub.content}`,
3315
3378
  let chunkIndex = 0;
3316
3379
  for (const para of paragraphs) {
3317
3380
  if (currentChunk.length + para.length > 1e3 && currentChunk.length > 0) {
3318
- chunks.push({
3319
- id: `${docId}:section:${i}:part:${chunkIndex}`,
3320
- documentId: docId,
3321
- type: "section",
3322
- text: `Section: ${sec.title} (part ${chunkIndex + 1})
3381
+ pushChunk(
3382
+ `section:${i}:part:${chunkIndex}`,
3383
+ "section",
3384
+ `Section: ${sec.title} (part ${chunkIndex + 1})
3323
3385
  ${currentChunk.trim()}`,
3324
- metadata: stringMetadata({
3386
+ {
3325
3387
  sectionType: sec.type,
3326
3388
  sectionNumber: sec.sectionNumber,
3327
3389
  pageStart: sec.pageStart,
3328
3390
  pageEnd: sec.pageEnd,
3329
3391
  documentType: doc.type,
3330
3392
  partIndex: chunkIndex
3331
- })
3332
- });
3393
+ }
3394
+ );
3333
3395
  currentChunk = "";
3334
3396
  chunkIndex++;
3335
3397
  }
3336
3398
  currentChunk += (currentChunk ? "\n\n" : "") + para;
3337
3399
  }
3338
3400
  if (currentChunk.trim()) {
3339
- chunks.push({
3340
- id: `${docId}:section:${i}:part:${chunkIndex}`,
3341
- documentId: docId,
3342
- type: "section",
3343
- text: `Section: ${sec.title} (part ${chunkIndex + 1})
3401
+ pushChunk(
3402
+ `section:${i}:part:${chunkIndex}`,
3403
+ "section",
3404
+ `Section: ${sec.title} (part ${chunkIndex + 1})
3344
3405
  ${currentChunk.trim()}`,
3345
- metadata: stringMetadata({
3406
+ {
3346
3407
  sectionType: sec.type,
3347
3408
  sectionNumber: sec.sectionNumber,
3348
3409
  pageStart: sec.pageStart,
3349
3410
  pageEnd: sec.pageEnd,
3350
3411
  documentType: doc.type,
3351
3412
  partIndex: chunkIndex
3352
- })
3353
- });
3413
+ }
3414
+ );
3354
3415
  }
3355
3416
  } else {
3356
- chunks.push({
3357
- id: `${docId}:section:${i}`,
3358
- documentId: docId,
3359
- type: "section",
3360
- text: `Section: ${sec.title}
3417
+ pushChunk(
3418
+ `section:${i}`,
3419
+ "section",
3420
+ `Section: ${sec.title}
3361
3421
  ${sec.content}`,
3362
- metadata: stringMetadata({
3422
+ {
3363
3423
  sectionType: sec.type,
3364
3424
  sectionNumber: sec.sectionNumber,
3365
3425
  pageStart: sec.pageStart,
3366
3426
  pageEnd: sec.pageEnd,
3367
3427
  documentType: doc.type
3368
- })
3369
- });
3428
+ }
3429
+ );
3370
3430
  }
3371
3431
  });
3372
3432
  doc.locations?.forEach((loc, i) => {
@@ -4944,12 +5004,15 @@ var ReviewResultSchema = import_zod21.z.object({
4944
5004
  description: import_zod21.z.string()
4945
5005
  }))
4946
5006
  });
4947
- function buildReviewPrompt(templateExpected, extractedKeys, extractionSummary, pageMapSummary) {
4948
- return `You are reviewing an extraction for completeness and quality. Compare what was expected vs what was found.
5007
+ function buildReviewPrompt(templateExpected, extractedKeys, extractionSummary, pageMapSummary, extractorCatalog) {
5008
+ return `You are the extraction coordinator for an insurance-document agent system. Review the current extraction state, decide whether the result is complete enough, and choose any follow-up extractor tasks needed to improve it.
4949
5009
 
4950
5010
  EXPECTED FIELDS (from document type template):
4951
5011
  ${templateExpected.map((f) => `- ${f}`).join("\n")}
4952
5012
 
5013
+ AVAILABLE FOLLOW-UP EXTRACTORS:
5014
+ ${extractorCatalog}
5015
+
4953
5016
  FIELDS ALREADY EXTRACTED:
4954
5017
  ${extractedKeys.map((f) => `- ${f}`).join("\n")}
4955
5018
 
@@ -4963,7 +5026,7 @@ Determine:
4963
5026
  1. Is the extraction complete enough?
4964
5027
  2. What fields are missing?
4965
5028
  3. What quality issues are present?
4966
- 4. Should any additional extraction tasks be dispatched?
5029
+ 4. Which follow-up extraction tasks, if any, should be dispatched?
4967
5030
 
4968
5031
  Mark the extraction as NOT complete if any of these are true:
4969
5032
  - required fields are missing
@@ -4974,7 +5037,9 @@ Mark the extraction as NOT complete if any of these are true:
4974
5037
  - page assignments suggest declaration, schedule, endorsement, exclusion, or condition pages were not actually extracted with the matching focused extractor
4975
5038
  - a focused extractor exists but returned too little substance for the relevant pages
4976
5039
 
4977
- When reviewing CURRENT EXTRACTION SUMMARY, compare the page-map counts to extracted counts. For definitions and covered_reasons, missing extraction should produce a quality issue and a narrow follow-up task over the mapped page range.
5040
+ When reviewing CURRENT EXTRACTION SUMMARY, compare the page-map counts to extracted counts. If an assigned extractor produced no useful records, produce a quality issue and a narrow follow-up task over the mapped page range.
5041
+
5042
+ Choose follow-up tasks from AVAILABLE FOLLOW-UP EXTRACTORS. You may dispatch any listed extractor when the page map, current extraction summary, or quality evidence shows that the focused extraction is missing, generic, referential, or too thin. Do not invent extractor names.
4978
5043
 
4979
5044
  Return JSON:
4980
5045
  {
@@ -4986,7 +5051,7 @@ Return JSON:
4986
5051
  ]
4987
5052
  }
4988
5053
 
4989
- Use the page map to target follow-up extraction pages precisely. Prefer narrow, declaration/schedule-focused follow-up tasks over broad page ranges.
5054
+ Use the page map to target follow-up extraction pages precisely. Prefer narrow, declaration/schedule-focused follow-up tasks over broad page ranges. If no additional model work is likely to improve the extraction, return an empty additionalTasks array.
4990
5055
 
4991
5056
  Respond with JSON only.`;
4992
5057
  }
@@ -5521,6 +5586,7 @@ var SectionsSchema = import_zod32.z.object({
5521
5586
  "policy_form",
5522
5587
  "endorsement",
5523
5588
  "application",
5589
+ "covered_reason",
5524
5590
  "exclusion",
5525
5591
  "condition",
5526
5592
  "definition",
@@ -5544,6 +5610,7 @@ For each section, classify its type:
5544
5610
  - "policy_form" \u2014 named ISO or proprietary forms (e.g. CG 00 01, IL 00 17). All sections within a named form should be typed as "policy_form"
5545
5611
  - "endorsement" \u2014 standalone endorsements modifying the base policy
5546
5612
  - "application" \u2014 the insurance application or supplemental application
5613
+ - "covered_reason" \u2014 affirmative grants of coverage, covered causes of loss, covered perils, or named covered events
5547
5614
  - "insuring_agreement" \u2014 the insuring agreement clause (only if standalone, not inside a policy_form)
5548
5615
  - "exclusion", "condition", "definition" \u2014 for standalone sections only
5549
5616
  - "schedule" \u2014 coverage or rating schedules
@@ -5721,6 +5788,53 @@ Return JSON only.`;
5721
5788
  }
5722
5789
 
5723
5790
  // src/prompts/extractors/index.ts
5791
+ function asRecord(data) {
5792
+ return data && typeof data === "object" ? data : void 0;
5793
+ }
5794
+ function getSections2(data) {
5795
+ const sections = asRecord(data)?.sections;
5796
+ return Array.isArray(sections) ? sections : [];
5797
+ }
5798
+ function isCoveredReasonsEmpty(data) {
5799
+ const record = asRecord(data);
5800
+ if (!record) return true;
5801
+ const coveredReasons = Array.isArray(record.coveredReasons) ? record.coveredReasons : Array.isArray(record.covered_reasons) ? record.covered_reasons : [];
5802
+ return coveredReasons.length === 0;
5803
+ }
5804
+ function isDefinitionsEmpty(data) {
5805
+ const definitions = asRecord(data)?.definitions;
5806
+ return !Array.isArray(definitions) || definitions.length === 0;
5807
+ }
5808
+ function sectionLooksLikeCoveredReason(section) {
5809
+ const type = String(section.type ?? "").toLowerCase();
5810
+ const title = String(section.title ?? "").toLowerCase();
5811
+ return type === "covered_reason" || title.includes("covered cause") || title.includes("covered reason") || title.includes("covered peril") || title.includes("named peril") || title.includes("insuring agreement");
5812
+ }
5813
+ function deriveCoveredReasonsFromSections(data) {
5814
+ const coveredReasons = getSections2(data).filter(sectionLooksLikeCoveredReason).map((section) => ({
5815
+ coverageName: String(section.coverageName ?? section.formTitle ?? section.title ?? "Covered Reasons"),
5816
+ title: typeof section.title === "string" ? section.title : void 0,
5817
+ content: String(section.content ?? ""),
5818
+ pageNumber: typeof section.pageStart === "number" ? section.pageStart : void 0,
5819
+ formNumber: typeof section.formNumber === "string" ? section.formNumber : void 0,
5820
+ formTitle: typeof section.formTitle === "string" ? section.formTitle : void 0,
5821
+ sectionRef: typeof section.sectionNumber === "string" ? section.sectionNumber : void 0,
5822
+ originalContent: typeof section.content === "string" ? section.content.slice(0, 500) : void 0
5823
+ })).filter((coveredReason) => coveredReason.content.trim().length > 0);
5824
+ return coveredReasons.length > 0 ? { coveredReasons } : void 0;
5825
+ }
5826
+ function deriveDefinitionsFromSections(data) {
5827
+ const definitions = getSections2(data).filter((section) => String(section.type ?? "").toLowerCase() === "definition").map((section) => ({
5828
+ term: String(section.title ?? "Definitions"),
5829
+ definition: String(section.content ?? ""),
5830
+ pageNumber: typeof section.pageStart === "number" ? section.pageStart : void 0,
5831
+ formNumber: typeof section.formNumber === "string" ? section.formNumber : void 0,
5832
+ formTitle: typeof section.formTitle === "string" ? section.formTitle : void 0,
5833
+ sectionRef: typeof section.sectionNumber === "string" ? section.sectionNumber : void 0,
5834
+ originalContent: typeof section.content === "string" ? section.content.slice(0, 500) : void 0
5835
+ })).filter((definition) => definition.definition.trim().length > 0);
5836
+ return definitions.length > 0 ? { definitions } : void 0;
5837
+ }
5724
5838
  var EXTRACTORS = {
5725
5839
  carrier_info: { buildPrompt: buildCarrierInfoPrompt, schema: CarrierInfoSchema, maxTokens: 2048 },
5726
5840
  named_insured: { buildPrompt: buildNamedInsuredPrompt, schema: NamedInsuredSchema2, maxTokens: 2048 },
@@ -5733,12 +5847,36 @@ var EXTRACTORS = {
5733
5847
  loss_history: { buildPrompt: buildLossHistoryPrompt, schema: LossHistorySchema, maxTokens: 4096 },
5734
5848
  sections: { buildPrompt: buildSectionsPrompt, schema: SectionsSchema, maxTokens: 8192 },
5735
5849
  supplementary: { buildPrompt: buildSupplementaryPrompt, schema: SupplementarySchema, maxTokens: 2048 },
5736
- definitions: { buildPrompt: buildDefinitionsPrompt, schema: DefinitionsSchema, maxTokens: 8192 },
5737
- covered_reasons: { buildPrompt: buildCoveredReasonsPrompt, schema: CoveredReasonsSchema, maxTokens: 8192 }
5850
+ definitions: {
5851
+ buildPrompt: buildDefinitionsPrompt,
5852
+ schema: DefinitionsSchema,
5853
+ maxTokens: 8192,
5854
+ fallback: {
5855
+ extractorName: "sections",
5856
+ isEmpty: isDefinitionsEmpty,
5857
+ deriveFocusedResult: deriveDefinitionsFromSections
5858
+ }
5859
+ },
5860
+ covered_reasons: {
5861
+ buildPrompt: buildCoveredReasonsPrompt,
5862
+ schema: CoveredReasonsSchema,
5863
+ maxTokens: 8192,
5864
+ fallback: {
5865
+ extractorName: "sections",
5866
+ isEmpty: isCoveredReasonsEmpty,
5867
+ deriveFocusedResult: deriveCoveredReasonsFromSections
5868
+ }
5869
+ }
5738
5870
  };
5739
5871
  function getExtractor(name) {
5740
5872
  return EXTRACTORS[name];
5741
5873
  }
5874
+ function formatExtractorCatalogForPrompt() {
5875
+ return Object.entries(EXTRACTORS).map(([name, extractor]) => {
5876
+ const fallback = extractor.fallback ? `; fallback: ${extractor.fallback.extractorName}` : "";
5877
+ return `- ${name} (maxTokens: ${extractor.maxTokens ?? 4096}${fallback})`;
5878
+ }).join("\n");
5879
+ }
5742
5880
 
5743
5881
  // src/extraction/resolve-referential.ts
5744
5882
  var import_zod37 = require("zod");
@@ -5790,18 +5928,124 @@ Your task:
5790
5928
  Return JSON only.`;
5791
5929
  }
5792
5930
 
5793
- // src/extraction/resolve-referential.ts
5931
+ // src/extraction/heuristics.ts
5794
5932
  function looksReferential(value) {
5795
5933
  if (typeof value !== "string") return false;
5796
5934
  const normalized = value.toLowerCase();
5797
5935
  return normalized.includes("shown in the declarations") || normalized.includes("shown in declarations") || normalized.includes("shown in the schedule") || normalized.includes("as stated") || normalized.includes("if applicable");
5798
5936
  }
5937
+ function looksCoveredReasonSection(section) {
5938
+ const title = String(section.title ?? "").toLowerCase();
5939
+ const type = String(section.type ?? "").toLowerCase();
5940
+ return type === "covered_reason" || title.includes("covered cause") || title.includes("covered reason") || title.includes("covered peril");
5941
+ }
5942
+
5943
+ // src/extraction/referential-workflow.ts
5944
+ function normalizeText(value) {
5945
+ return typeof value === "string" ? value.trim().toLowerCase() : "";
5946
+ }
5947
+ function containsTarget(value, target) {
5948
+ const normalizedValue = normalizeText(value);
5949
+ return Boolean(normalizedValue && target && normalizedValue.includes(target));
5950
+ }
5951
+ function pageRangeFrom(startPage, endPage) {
5952
+ if (typeof startPage !== "number" || !Number.isFinite(startPage) || startPage <= 0) {
5953
+ return void 0;
5954
+ }
5955
+ const normalizedEnd = typeof endPage === "number" && Number.isFinite(endPage) && endPage >= startPage ? endPage : startPage;
5956
+ return { startPage, endPage: normalizedEnd };
5957
+ }
5958
+ function parseReferentialTarget(rawTarget) {
5959
+ const raw = rawTarget?.trim() || "unknown";
5960
+ const normalized = raw.toLowerCase();
5961
+ if (normalized === "unknown") return { raw, normalized, kind: "unknown" };
5962
+ if (/declarations?|dec\b|decs\b/.test(normalized)) return { raw, normalized, kind: "declarations" };
5963
+ if (/schedule|scheduled/.test(normalized)) return { raw, normalized, kind: "schedule" };
5964
+ if (/\bitem\b/.test(normalized)) return { raw, normalized, kind: "item" };
5965
+ if (/premises?|location|building/.test(normalized)) return { raw, normalized, kind: "premises" };
5966
+ if (/\bsection\b/.test(normalized)) return { raw, normalized, kind: "section" };
5967
+ if (/policy|coverage\s+part|coverage\s+form/.test(normalized)) return { raw, normalized, kind: "policy" };
5968
+ return { raw, normalized, kind: "unknown" };
5969
+ }
5970
+ function findLocalReferentialPages(params) {
5971
+ const targetLower = params.referenceTarget.toLowerCase();
5972
+ for (const section of params.sections) {
5973
+ if (containsTarget(section.title, targetLower)) {
5974
+ const range = pageRangeFrom(section.pageStart, section.pageEnd);
5975
+ if (range) return range;
5976
+ }
5977
+ }
5978
+ for (const form of params.formInventory) {
5979
+ const titleMatch = containsTarget(form.title, targetLower);
5980
+ const typeMatch = containsTarget(form.formType, targetLower);
5981
+ const numberMatch = containsTarget(form.formNumber, targetLower);
5982
+ if (titleMatch || typeMatch || numberMatch) {
5983
+ const range = pageRangeFrom(form.pageStart, form.pageEnd);
5984
+ if (range) return range;
5985
+ }
5986
+ }
5987
+ return void 0;
5988
+ }
5989
+ function findDeclarationsSchedulePages(parsedTarget, formInventory) {
5990
+ for (const form of formInventory) {
5991
+ const formType = normalizeText(form.formType);
5992
+ const title = normalizeText(form.title);
5993
+ const matchesDeclarations = formType === "declarations" || /declarations?|dec\b|decs\b/.test(title);
5994
+ const matchesSchedule = /schedule|scheduled|coverage/.test(title) || formType === "coverage";
5995
+ const shouldUse = parsedTarget.kind === "declarations" ? matchesDeclarations : parsedTarget.kind === "schedule" || parsedTarget.kind === "item" || parsedTarget.kind === "premises" ? matchesSchedule || matchesDeclarations : parsedTarget.kind === "policy" ? matchesDeclarations || matchesSchedule : false;
5996
+ if (shouldUse) {
5997
+ const range = pageRangeFrom(form.pageStart, form.pageEnd);
5998
+ if (range) return range;
5999
+ }
6000
+ }
6001
+ return void 0;
6002
+ }
6003
+ function findSectionPages(parsedTarget, sections) {
6004
+ for (const section of sections) {
6005
+ const title = normalizeText(section.title);
6006
+ const type = normalizeText(section.type);
6007
+ const matchesKind = parsedTarget.kind === "declarations" && (type === "declarations" || /declarations?/.test(title)) || parsedTarget.kind === "schedule" && (type === "schedule" || /schedule|scheduled/.test(title)) || parsedTarget.kind === "premises" && /premises?|location|building/.test(title) || parsedTarget.kind === "item" && /\bitem\b|schedule|scheduled/.test(title) || parsedTarget.kind === "section" && containsTarget(title, parsedTarget.normalized);
6008
+ if (matchesKind) {
6009
+ const range = pageRangeFrom(section.pageStart, section.pageEnd);
6010
+ if (range) return range;
6011
+ }
6012
+ }
6013
+ return void 0;
6014
+ }
6015
+ function decideReferentialResolutionAction(params) {
6016
+ if (params.localPageRange) {
6017
+ return { kind: "lookup_pages", source: "local", pageRange: params.localPageRange };
6018
+ }
6019
+ const parsedTarget = parseReferentialTarget(params.referenceTarget);
6020
+ const declarationsScheduleRange = findDeclarationsSchedulePages(parsedTarget, params.formInventory);
6021
+ if (declarationsScheduleRange) {
6022
+ return {
6023
+ kind: "lookup_pages",
6024
+ source: "declarations_schedule",
6025
+ pageRange: declarationsScheduleRange
6026
+ };
6027
+ }
6028
+ const sectionRange = findSectionPages(parsedTarget, params.sections);
6029
+ if (sectionRange) {
6030
+ return { kind: "lookup_pages", source: "sections", pageRange: sectionRange };
6031
+ }
6032
+ if (parsedTarget.kind === "unknown") {
6033
+ return { kind: "skip", reason: "no concrete reference target" };
6034
+ }
6035
+ return { kind: "page_location" };
6036
+ }
6037
+
6038
+ // src/extraction/resolve-referential.ts
5799
6039
  function parseReferenceTarget(text) {
5800
6040
  if (typeof text !== "string") return void 0;
5801
6041
  const normalized = text.trim();
5802
6042
  if (!normalized) return void 0;
5803
6043
  const sectionMatch = normalized.match(/\b(Section\s+\d+[A-Za-z]?)/i);
5804
6044
  if (sectionMatch) return sectionMatch[1];
6045
+ const itemMatch = normalized.match(/\b(Item\s+\d+[A-Za-z]?)/i);
6046
+ if (itemMatch) return itemMatch[1];
6047
+ const premisesMatch = normalized.match(/\b(Premises?(?:\s+No\.?\s*\d+[A-Za-z]?|\s+\d+[A-Za-z]?)?)/i);
6048
+ if (premisesMatch) return premisesMatch[1].trim();
5805
6049
  if (/declarations/i.test(normalized)) return "Declarations";
5806
6050
  const scheduleMatch = normalized.match(/\b(Schedule(?:\s+of\s+[A-Za-z ]+)?)/i);
5807
6051
  if (scheduleMatch) return scheduleMatch[1].trim();
@@ -5827,26 +6071,31 @@ async function findReferencedPages(params) {
5827
6071
  pageCount,
5828
6072
  generateObject,
5829
6073
  providerOptions,
6074
+ trackUsage,
5830
6075
  log
5831
6076
  } = params;
5832
- const targetLower = referenceTarget.toLowerCase();
5833
- for (const section of sections) {
5834
- if (section.title && section.pageStart != null && section.title.toLowerCase().includes(targetLower)) {
5835
- return {
5836
- startPage: section.pageStart,
5837
- endPage: section.pageEnd ?? section.pageStart
5838
- };
5839
- }
6077
+ const localPageRange = findLocalReferentialPages({
6078
+ referenceTarget,
6079
+ sections,
6080
+ formInventory
6081
+ });
6082
+ const action = decideReferentialResolutionAction({
6083
+ referenceTarget,
6084
+ sections,
6085
+ formInventory,
6086
+ localPageRange
6087
+ });
6088
+ if (action.kind === "lookup_pages") {
6089
+ await log?.(
6090
+ `Referential target "${referenceTarget}" resolved to pages ${action.pageRange.startPage}-${action.pageRange.endPage} via ${action.source}.`
6091
+ );
6092
+ return action.pageRange;
5840
6093
  }
5841
- for (const form of formInventory) {
5842
- const titleMatch = form.title && form.title.toLowerCase().includes(targetLower);
5843
- const typeMatch = form.formType && form.formType.toLowerCase().includes(targetLower);
5844
- if ((titleMatch || typeMatch) && form.pageStart != null) {
5845
- return {
5846
- startPage: form.pageStart,
5847
- endPage: form.pageEnd ?? form.pageStart
5848
- };
5849
- }
6094
+ if (action.kind === "skip") {
6095
+ await log?.(
6096
+ `Skipping referential target "${referenceTarget}": ${action.reason}.`
6097
+ );
6098
+ return void 0;
5850
6099
  }
5851
6100
  try {
5852
6101
  const result = await safeGenerateObject(
@@ -5874,6 +6123,7 @@ Return JSON only.`,
5874
6123
  )
5875
6124
  }
5876
6125
  );
6126
+ trackUsage?.(result.usage);
5877
6127
  if (result.object.startPage > 0 && result.object.endPage > 0) {
5878
6128
  return {
5879
6129
  startPage: result.object.startPage,
@@ -5931,7 +6181,9 @@ async function resolveReferentialCoverages(params) {
5931
6181
  for (let i = 0; i < referentialCoverages.length; i++) {
5932
6182
  const cov = referentialCoverages[i];
5933
6183
  const refString = (looksReferential(cov.limit) ? cov.limit : void 0) ?? (looksReferential(cov.deductible) ? cov.deductible : void 0) ?? cov.limit ?? "";
5934
- const target = parseReferenceTarget(refString) ?? "unknown";
6184
+ const sectionRef = typeof cov.sectionRef === "string" ? cov.sectionRef : "";
6185
+ const parsedTarget = parseReferenceTarget(refString) ?? parseReferenceTarget(sectionRef) ?? sectionRef;
6186
+ const target = parsedTarget || "unknown";
5935
6187
  const group = targetGroups.get(target) ?? [];
5936
6188
  group.push({ coverage: cov, index: i });
5937
6189
  targetGroups.set(target, group);
@@ -5955,6 +6207,7 @@ async function resolveReferentialCoverages(params) {
5955
6207
  pageCount,
5956
6208
  generateObject,
5957
6209
  providerOptions,
6210
+ trackUsage,
5958
6211
  log
5959
6212
  });
5960
6213
  if (!pageRange) {
@@ -6072,6 +6325,78 @@ async function resolveReferentialCoverages(params) {
6072
6325
  };
6073
6326
  }
6074
6327
 
6328
+ // src/extraction/focused-dispatch.ts
6329
+ async function runFocusedExtractorWithFallback(params) {
6330
+ const {
6331
+ task,
6332
+ pdfInput,
6333
+ generateObject,
6334
+ convertPdfToImages,
6335
+ providerOptions,
6336
+ trackUsage,
6337
+ log
6338
+ } = params;
6339
+ const ext = getExtractor(task.extractorName);
6340
+ if (!ext) {
6341
+ await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
6342
+ return null;
6343
+ }
6344
+ try {
6345
+ const result = await runExtractor({
6346
+ name: task.extractorName,
6347
+ prompt: ext.buildPrompt(),
6348
+ schema: ext.schema,
6349
+ pdfInput,
6350
+ startPage: task.startPage,
6351
+ endPage: task.endPage,
6352
+ generateObject,
6353
+ convertPdfToImages,
6354
+ maxTokens: ext.maxTokens ?? 4096,
6355
+ providerOptions
6356
+ });
6357
+ trackUsage(result.usage);
6358
+ if (!ext.fallback?.isEmpty(result.data)) {
6359
+ return result;
6360
+ }
6361
+ if (!ext.fallback) {
6362
+ return result;
6363
+ }
6364
+ } catch (error) {
6365
+ await log?.(`Extractor ${task.extractorName} failed: ${error}`);
6366
+ if (!ext.fallback) {
6367
+ return null;
6368
+ }
6369
+ }
6370
+ const fallbackExt = getExtractor(ext.fallback.extractorName);
6371
+ if (!fallbackExt) return null;
6372
+ await log?.(
6373
+ `Extractor ${task.extractorName} produced no usable records; trying ${ext.fallback.extractorName} fallback for pages ${task.startPage}-${task.endPage}`
6374
+ );
6375
+ try {
6376
+ const fallbackResult = await runExtractor({
6377
+ name: ext.fallback.extractorName,
6378
+ prompt: fallbackExt.buildPrompt(),
6379
+ schema: fallbackExt.schema,
6380
+ pdfInput,
6381
+ startPage: task.startPage,
6382
+ endPage: task.endPage,
6383
+ generateObject,
6384
+ convertPdfToImages,
6385
+ maxTokens: fallbackExt.maxTokens ?? 4096,
6386
+ providerOptions
6387
+ });
6388
+ trackUsage(fallbackResult.usage);
6389
+ const focusedData = ext.fallback.deriveFocusedResult(fallbackResult.data);
6390
+ return focusedData ? [
6391
+ fallbackResult,
6392
+ { name: task.extractorName, data: focusedData, usage: void 0 }
6393
+ ] : fallbackResult;
6394
+ } catch (fallbackError) {
6395
+ await log?.(`${ext.fallback.extractorName} fallback for ${task.extractorName} failed: ${fallbackError}`);
6396
+ return null;
6397
+ }
6398
+ }
6399
+
6075
6400
  // src/core/quality.ts
6076
6401
  function evaluateQualityGate(params) {
6077
6402
  const { issues, hasRoundWarnings = false } = params;
@@ -6108,11 +6433,6 @@ function addFormEntry(inventory, formNumber, source, extra) {
6108
6433
  sources: [source]
6109
6434
  });
6110
6435
  }
6111
- function looksReferential2(value) {
6112
- if (typeof value !== "string") return false;
6113
- const normalized = value.toLowerCase();
6114
- return normalized.includes("shown in the declarations") || normalized.includes("shown in declarations") || normalized.includes("shown in the schedule") || normalized.includes("as stated") || normalized.includes("if applicable");
6115
- }
6116
6436
  function looksTocArtifact(value) {
6117
6437
  if (typeof value !== "string") return false;
6118
6438
  return /\.{4,}\d{1,3}$/.test(value.trim()) || /^\d+\.\s+[A-Z][\s\S]*\.{3,}\d{1,3}$/.test(value.trim());
@@ -6139,11 +6459,7 @@ function buildExtractionReviewReport(params) {
6139
6459
  const definitionsResult = memory.get("definitions");
6140
6460
  const coveredReasonsResult = memory.get("covered_reasons");
6141
6461
  const definitions = Array.isArray(definitionsResult?.definitions) ? definitionsResult.definitions : sections.filter((section) => section.type === "definition");
6142
- const coveredReasons = Array.isArray(coveredReasonsResult?.coveredReasons) ? coveredReasonsResult.coveredReasons : Array.isArray(coveredReasonsResult?.covered_reasons) ? coveredReasonsResult.covered_reasons : sections.filter((section) => {
6143
- const title = String(section.title ?? "").toLowerCase();
6144
- const type = String(section.type ?? "").toLowerCase();
6145
- return type === "covered_reason" || title.includes("covered cause") || title.includes("covered reason") || title.includes("covered peril");
6146
- });
6462
+ const coveredReasons = Array.isArray(coveredReasonsResult?.coveredReasons) ? coveredReasonsResult.coveredReasons : Array.isArray(coveredReasonsResult?.covered_reasons) ? coveredReasonsResult.covered_reasons : sections.filter(looksCoveredReasonSection);
6147
6463
  const mappedDefinitions = params.pageAssignments.some((assignment) => assignment.extractorNames.includes("definitions"));
6148
6464
  const mappedCoveredReasons = params.pageAssignments.some((assignment) => assignment.extractorNames.includes("covered_reasons"));
6149
6465
  if (mappedDefinitions && definitions.length === 0) {
@@ -6258,7 +6574,7 @@ function buildExtractionReviewReport(params) {
6258
6574
  itemName: typeof coverage.name === "string" ? coverage.name : void 0
6259
6575
  });
6260
6576
  }
6261
- if (looksReferential2(coverage.limit) || looksReferential2(coverage.deductible)) {
6577
+ if (looksReferential(coverage.limit) || looksReferential(coverage.deductible)) {
6262
6578
  deterministicIssues.push({
6263
6579
  code: "coverage_referential_value",
6264
6580
  severity: "warning",
@@ -6408,7 +6724,7 @@ function buildExtractionReviewReport(params) {
6408
6724
  itemName
6409
6725
  });
6410
6726
  }
6411
- if (looksReferential2(content) || looksReferential2(coveredReason.reason)) {
6727
+ if (looksReferential(content) || looksReferential(coveredReason.reason)) {
6412
6728
  deterministicIssues.push({
6413
6729
  code: "covered_reason_referential_value",
6414
6730
  severity: "warning",
@@ -6469,6 +6785,134 @@ function toReviewRoundRecord(round, review) {
6469
6785
  };
6470
6786
  }
6471
6787
 
6788
+ // src/extraction/planning.ts
6789
+ function normalizePageAssignments(pageAssignments, formInventory) {
6790
+ const pageFormTypes = /* @__PURE__ */ new Map();
6791
+ if (formInventory) {
6792
+ for (const form of formInventory.forms) {
6793
+ if (form.pageStart != null) {
6794
+ const end = form.pageEnd ?? form.pageStart;
6795
+ for (let p = form.pageStart; p <= end; p += 1) {
6796
+ const types = pageFormTypes.get(p) ?? /* @__PURE__ */ new Set();
6797
+ types.add(form.formType);
6798
+ pageFormTypes.set(p, types);
6799
+ }
6800
+ }
6801
+ }
6802
+ }
6803
+ return pageAssignments.map((assignment) => {
6804
+ let extractorNames = [...new Set(
6805
+ (assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"]).filter(Boolean)
6806
+ )];
6807
+ const hasDeclarations = extractorNames.includes("declarations");
6808
+ const hasConditions = extractorNames.includes("conditions");
6809
+ const hasExclusions = extractorNames.includes("exclusions");
6810
+ const hasEndorsements = extractorNames.includes("endorsements");
6811
+ const looksLikeScheduleValues = assignment.hasScheduleValues === true;
6812
+ const roleBlocksCoverageLimits = assignment.pageRole === "policy_form" || assignment.pageRole === "condition_exclusion_form" || assignment.pageRole === "endorsement_form";
6813
+ const inventoryTypes = pageFormTypes.get(assignment.localPageNumber);
6814
+ const inventoryBlocksCoverageLimits = inventoryTypes != null && !looksLikeScheduleValues && !hasDeclarations && (inventoryTypes.has("endorsement") || inventoryTypes.has("notice") || inventoryTypes.has("application"));
6815
+ if (extractorNames.includes("coverage_limits")) {
6816
+ const shouldDropCoverageLimits = inventoryBlocksCoverageLimits || !looksLikeScheduleValues && roleBlocksCoverageLimits || !hasDeclarations && !looksLikeScheduleValues && (hasConditions || hasExclusions) || !hasDeclarations && !looksLikeScheduleValues && hasEndorsements;
6817
+ if (shouldDropCoverageLimits) {
6818
+ extractorNames = extractorNames.filter((name) => name !== "coverage_limits");
6819
+ }
6820
+ }
6821
+ if (inventoryTypes?.has("endorsement") && !extractorNames.includes("endorsements")) {
6822
+ extractorNames = [...extractorNames, "endorsements"];
6823
+ }
6824
+ if (extractorNames.length === 0) {
6825
+ extractorNames = ["sections"];
6826
+ }
6827
+ return {
6828
+ ...assignment,
6829
+ extractorNames
6830
+ };
6831
+ });
6832
+ }
6833
+ function buildTemplateHints(primaryType, documentType, pageCount, template) {
6834
+ return [
6835
+ `Document type: ${primaryType} ${documentType}`,
6836
+ `Expected sections: ${template.expectedSections.join(", ")}`,
6837
+ `Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
6838
+ `Total pages: ${pageCount}`
6839
+ ].join("\n");
6840
+ }
6841
+ function groupContiguousPages(pages) {
6842
+ if (pages.length === 0) return [];
6843
+ const sorted = [...new Set(pages)].sort((a, b) => a - b);
6844
+ const ranges = [];
6845
+ let start = sorted[0];
6846
+ let previous = sorted[0];
6847
+ for (let i = 1; i < sorted.length; i += 1) {
6848
+ const current = sorted[i];
6849
+ if (current === previous + 1) {
6850
+ previous = current;
6851
+ continue;
6852
+ }
6853
+ ranges.push({ startPage: start, endPage: previous });
6854
+ start = current;
6855
+ previous = current;
6856
+ }
6857
+ ranges.push({ startPage: start, endPage: previous });
6858
+ return ranges;
6859
+ }
6860
+ function buildPlanFromPageAssignments(pageAssignments, pageCount, formInventory) {
6861
+ const extractorPages = /* @__PURE__ */ new Map();
6862
+ for (const assignment of pageAssignments) {
6863
+ const extractors = assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"];
6864
+ for (const extractorName of extractors) {
6865
+ extractorPages.set(extractorName, [...extractorPages.get(extractorName) ?? [], assignment.localPageNumber]);
6866
+ }
6867
+ }
6868
+ const coveredPages = /* @__PURE__ */ new Set();
6869
+ for (const pages of extractorPages.values()) {
6870
+ for (const page of pages) coveredPages.add(page);
6871
+ }
6872
+ for (let page = 1; page <= pageCount; page += 1) {
6873
+ if (!coveredPages.has(page)) {
6874
+ extractorPages.set("sections", [...extractorPages.get("sections") ?? [], page]);
6875
+ }
6876
+ }
6877
+ const contextualExtractors = /* @__PURE__ */ new Set(["conditions", "covered_reasons", "definitions", "exclusions", "endorsements"]);
6878
+ const contextualForms = (formInventory?.forms ?? []).filter(
6879
+ (form) => form.pageStart != null && (form.pageEnd ?? form.pageStart) != null
6880
+ );
6881
+ const expandPagesToFormRanges = (extractorName, pages) => {
6882
+ if (!contextualExtractors.has(extractorName)) return pages;
6883
+ const expanded = new Set(pages);
6884
+ for (const page of pages) {
6885
+ for (const form of contextualForms) {
6886
+ const pageStart = form.pageStart;
6887
+ const pageEnd = form.pageEnd ?? form.pageStart;
6888
+ const formType = form.formType;
6889
+ const supportsContextualExpansion = extractorName === "endorsements" ? formType === "endorsement" : formType === "coverage" || formType === "endorsement";
6890
+ if (!supportsContextualExpansion) continue;
6891
+ if (page < pageStart || page > pageEnd) continue;
6892
+ for (let current = pageStart; current <= pageEnd; current += 1) {
6893
+ expanded.add(current);
6894
+ }
6895
+ }
6896
+ }
6897
+ return [...expanded].sort((a, b) => a - b);
6898
+ };
6899
+ const tasks = [...extractorPages.entries()].flatMap(
6900
+ ([extractorName, pages]) => groupContiguousPages(expandPagesToFormRanges(extractorName, pages)).map(({ startPage, endPage }) => ({
6901
+ extractorName,
6902
+ startPage,
6903
+ endPage,
6904
+ description: `Page-mapped ${extractorName} extraction for pages ${startPage}-${endPage}`
6905
+ }))
6906
+ ).sort((a, b) => a.startPage - b.startPage || a.extractorName.localeCompare(b.extractorName));
6907
+ return {
6908
+ tasks,
6909
+ pageMap: [...extractorPages.entries()].map(([section, pages]) => ({
6910
+ section,
6911
+ pages: `pages ${[...new Set(pages)].sort((a, b) => a - b).join(", ")}`
6912
+ }))
6913
+ };
6914
+ }
6915
+
6472
6916
  // src/extraction/coordinator.ts
6473
6917
  function createExtractor(config) {
6474
6918
  const {
@@ -6485,6 +6929,7 @@ function createExtractor(config) {
6485
6929
  onCheckpointSave
6486
6930
  } = config;
6487
6931
  const limit = pLimit(concurrency);
6932
+ const extractorCatalog = formatExtractorCatalogForPrompt();
6488
6933
  let totalUsage = { inputTokens: 0, outputTokens: 0 };
6489
6934
  let modelCalls = 0;
6490
6935
  let callsWithUsage = 0;
@@ -6505,43 +6950,56 @@ function createExtractor(config) {
6505
6950
  memory.set(name, mergeExtractorResult(name, existing, data));
6506
6951
  }
6507
6952
  function summarizeExtraction(memory) {
6508
- const coverageResult = memory.get("coverage_limits");
6509
- const declarationResult = memory.get("declarations");
6510
- const endorsementResult = memory.get("endorsements");
6511
- const exclusionResult = memory.get("exclusions");
6512
- const conditionResult = memory.get("conditions");
6513
- const sectionResult = memory.get("sections");
6514
- const definitionsResult = memory.get("definitions");
6515
- const coveredReasonsResult = memory.get("covered_reasons");
6516
- const sections = Array.isArray(sectionResult?.sections) ? sectionResult.sections : [];
6517
- const definitionCount = Array.isArray(definitionsResult?.definitions) ? definitionsResult.definitions.length : sections.filter((section) => section.type === "definition").length;
6518
- const coveredReasonCount = Array.isArray(coveredReasonsResult?.coveredReasons) ? coveredReasonsResult.coveredReasons.length : Array.isArray(coveredReasonsResult?.covered_reasons) ? coveredReasonsResult.covered_reasons.length : sections.filter((section) => {
6519
- const title = String(section.title ?? "").toLowerCase();
6520
- const type = String(section.type ?? "").toLowerCase();
6521
- return type === "covered_reason" || title.includes("covered cause") || title.includes("covered reason") || title.includes("covered peril");
6522
- }).length;
6523
- const coverageSummary = Array.isArray(coverageResult?.coverages) ? coverageResult.coverages.slice(0, 12).map((coverage) => ({
6953
+ const declarationResult = readMemoryRecord(memory, "declarations");
6954
+ const endorsements = readRecordArray(readMemoryRecord(memory, "endorsements"), "endorsements") ?? [];
6955
+ const exclusions = readRecordArray(readMemoryRecord(memory, "exclusions"), "exclusions") ?? [];
6956
+ const conditions = readRecordArray(readMemoryRecord(memory, "conditions"), "conditions") ?? [];
6957
+ const sections = getSections(memory) ?? [];
6958
+ const definitions = getDefinitions(memory) ?? sections.filter((section) => section.type === "definition");
6959
+ const coveredReasons = getCoveredReasons(memory) ?? sections.filter(looksCoveredReasonSection);
6960
+ const coverages = getCoverageLimitCoverages(memory);
6961
+ const coverageSummary = coverages.slice(0, 12).map((coverage) => ({
6524
6962
  name: coverage.name,
6525
6963
  limit: coverage.limit,
6526
6964
  deductible: coverage.deductible,
6527
6965
  formNumber: coverage.formNumber
6528
- })) : [];
6966
+ }));
6529
6967
  return JSON.stringify({
6530
6968
  extractedKeys: [...memory.keys()].filter((key) => key !== "classify"),
6531
6969
  declarationFieldCount: Array.isArray(declarationResult?.fields) ? declarationResult.fields.length : 0,
6532
- coverageCount: Array.isArray(coverageResult?.coverages) ? coverageResult.coverages.length : 0,
6970
+ coverageCount: coverages.length,
6533
6971
  coverageSamples: coverageSummary,
6534
- endorsementCount: Array.isArray(endorsementResult?.endorsements) ? endorsementResult.endorsements.length : 0,
6535
- exclusionCount: Array.isArray(exclusionResult?.exclusions) ? exclusionResult.exclusions.length : 0,
6536
- conditionCount: Array.isArray(conditionResult?.conditions) ? conditionResult.conditions.length : 0,
6537
- definitionCount,
6538
- coveredReasonCount,
6972
+ endorsementCount: endorsements.length,
6973
+ exclusionCount: exclusions.length,
6974
+ conditionCount: conditions.length,
6975
+ definitionCount: definitions.length,
6976
+ coveredReasonCount: coveredReasons.length,
6539
6977
  sectionCount: sections.length
6540
6978
  }, null, 2);
6541
6979
  }
6980
+ function textIncludesSupplementarySignal(value) {
6981
+ if (typeof value !== "string") return false;
6982
+ return /\b(supplementary|regulatory|department of insurance|ombudsman|complaint|claim|claims|contact|phone|email|cancellation|cancelled|nonrenewal|non-renewal|non renew|notice|governing law|jurisdiction|third[- ]party administrator|tpa)\b/i.test(value);
6983
+ }
6984
+ function hasSupplementaryExtractionSignal(pageAssignments, formInventory, memory) {
6985
+ const hasPageSignal = pageAssignments.some(
6986
+ (assignment) => assignment.pageRole === "supplementary" || assignment.extractorNames.includes("supplementary") || textIncludesSupplementarySignal(assignment.notes)
6987
+ );
6988
+ if (hasPageSignal) return true;
6989
+ const hasFormSignal = (formInventory?.forms ?? []).some(
6990
+ (form) => form.formType === "notice" || textIncludesSupplementarySignal(form.title) || textIncludesSupplementarySignal(form.formNumber)
6991
+ );
6992
+ if (hasFormSignal) return true;
6993
+ const likelySupplementaryKeys = ["sections", "conditions", "endorsements", "exclusions"];
6994
+ return likelySupplementaryKeys.some((key) => {
6995
+ const value = memory.get(key);
6996
+ if (!value) return false;
6997
+ return textIncludesSupplementarySignal(JSON.stringify(value));
6998
+ });
6999
+ }
6542
7000
  function buildAlreadyExtractedSummary(memory) {
6543
7001
  const lines = [];
6544
- const declarationResult = memory.get("declarations");
7002
+ const declarationResult = readMemoryRecord(memory, "declarations");
6545
7003
  if (Array.isArray(declarationResult?.fields)) {
6546
7004
  for (const field of declarationResult.fields) {
6547
7005
  if (field.key && field.value) {
@@ -6550,20 +7008,17 @@ function createExtractor(config) {
6550
7008
  }
6551
7009
  }
6552
7010
  }
6553
- const coverageResult = memory.get("coverage_limits");
6554
- if (Array.isArray(coverageResult?.coverages)) {
6555
- for (const cov of coverageResult.coverages) {
6556
- const parts = [cov.name, cov.limit && `limit=${cov.limit}`, cov.deductible && `deductible=${cov.deductible}`].filter(Boolean);
6557
- if (parts.length > 0) lines.push(`- coverage: ${parts.join(", ")}`);
6558
- }
7011
+ for (const cov of getCoverageLimitCoverages(memory)) {
7012
+ const parts = [cov.name, cov.limit && `limit=${cov.limit}`, cov.deductible && `deductible=${cov.deductible}`].filter(Boolean);
7013
+ if (parts.length > 0) lines.push(`- coverage: ${parts.join(", ")}`);
6559
7014
  }
6560
- const namedInsured = memory.get("named_insured");
7015
+ const namedInsured = getNamedInsured(memory);
6561
7016
  if (namedInsured) {
6562
7017
  for (const [key, value] of Object.entries(namedInsured)) {
6563
7018
  if (value && typeof value === "string") lines.push(`- ${key}: ${value}`);
6564
7019
  }
6565
7020
  }
6566
- const carrierInfo = memory.get("carrier_info");
7021
+ const carrierInfo = getCarrierInfo(memory);
6567
7022
  if (carrierInfo) {
6568
7023
  for (const [key, value] of Object.entries(carrierInfo)) {
6569
7024
  if (value && typeof value === "string") lines.push(`- ${key}: ${value}`);
@@ -6571,6 +7026,34 @@ function createExtractor(config) {
6571
7026
  }
6572
7027
  return lines.length > 0 ? lines.join("\n") : "";
6573
7028
  }
7029
+ async function runFocusedExtractorTask(task, pdfInput, memory) {
7030
+ if (task.extractorName === "supplementary") {
7031
+ const alreadyExtractedSummary = buildAlreadyExtractedSummary(memory);
7032
+ const result = await runExtractor({
7033
+ name: "supplementary",
7034
+ prompt: buildSupplementaryPrompt(alreadyExtractedSummary),
7035
+ schema: SupplementarySchema,
7036
+ pdfInput,
7037
+ startPage: task.startPage,
7038
+ endPage: task.endPage,
7039
+ generateObject,
7040
+ convertPdfToImages,
7041
+ maxTokens: 4096,
7042
+ providerOptions
7043
+ });
7044
+ trackUsage(result.usage);
7045
+ return result;
7046
+ }
7047
+ return runFocusedExtractorWithFallback({
7048
+ task,
7049
+ pdfInput,
7050
+ generateObject,
7051
+ convertPdfToImages,
7052
+ providerOptions,
7053
+ trackUsage,
7054
+ log
7055
+ });
7056
+ }
6574
7057
  function formatPageMapSummary(pageAssignments) {
6575
7058
  const extractorPages = /* @__PURE__ */ new Map();
6576
7059
  for (const assignment of pageAssignments) {
@@ -6581,132 +7064,6 @@ function createExtractor(config) {
6581
7064
  if (extractorPages.size === 0) return "No page assignments available.";
6582
7065
  return [...extractorPages.entries()].map(([extractorName, pages]) => `${extractorName}: ${pages.length} page(s), pages ${pages.join(", ")}`).join("\n");
6583
7066
  }
6584
- function normalizePageAssignments(pageAssignments, formInventory) {
6585
- const pageFormTypes = /* @__PURE__ */ new Map();
6586
- if (formInventory) {
6587
- for (const form of formInventory.forms) {
6588
- if (form.pageStart != null) {
6589
- const end = form.pageEnd ?? form.pageStart;
6590
- for (let p = form.pageStart; p <= end; p++) {
6591
- const types = pageFormTypes.get(p) ?? /* @__PURE__ */ new Set();
6592
- types.add(form.formType);
6593
- pageFormTypes.set(p, types);
6594
- }
6595
- }
6596
- }
6597
- }
6598
- return pageAssignments.map((assignment) => {
6599
- let extractorNames = [...new Set(
6600
- (assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"]).filter(Boolean)
6601
- )];
6602
- const hasDeclarations = extractorNames.includes("declarations");
6603
- const hasConditions = extractorNames.includes("conditions");
6604
- const hasExclusions = extractorNames.includes("exclusions");
6605
- const hasEndorsements = extractorNames.includes("endorsements");
6606
- const looksLikeScheduleValues = assignment.hasScheduleValues === true;
6607
- const roleBlocksCoverageLimits = assignment.pageRole === "policy_form" || assignment.pageRole === "condition_exclusion_form" || assignment.pageRole === "endorsement_form";
6608
- const inventoryTypes = pageFormTypes.get(assignment.localPageNumber);
6609
- const inventoryBlocksCoverageLimits = inventoryTypes != null && !looksLikeScheduleValues && !hasDeclarations && (inventoryTypes.has("endorsement") || inventoryTypes.has("notice") || inventoryTypes.has("application"));
6610
- if (extractorNames.includes("coverage_limits")) {
6611
- const shouldDropCoverageLimits = inventoryBlocksCoverageLimits || !looksLikeScheduleValues && roleBlocksCoverageLimits || !hasDeclarations && !looksLikeScheduleValues && (hasConditions || hasExclusions) || !hasDeclarations && !looksLikeScheduleValues && hasEndorsements;
6612
- if (shouldDropCoverageLimits) {
6613
- extractorNames = extractorNames.filter((name) => name !== "coverage_limits");
6614
- }
6615
- }
6616
- if (inventoryTypes?.has("endorsement") && !extractorNames.includes("endorsements")) {
6617
- extractorNames = [...extractorNames, "endorsements"];
6618
- }
6619
- if (extractorNames.length === 0) {
6620
- extractorNames = ["sections"];
6621
- }
6622
- return {
6623
- ...assignment,
6624
- extractorNames
6625
- };
6626
- });
6627
- }
6628
- function buildTemplateHints(primaryType, documentType, pageCount, template) {
6629
- return [
6630
- `Document type: ${primaryType} ${documentType}`,
6631
- `Expected sections: ${template.expectedSections.join(", ")}`,
6632
- `Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
6633
- `Total pages: ${pageCount}`
6634
- ].join("\n");
6635
- }
6636
- function groupContiguousPages(pages) {
6637
- if (pages.length === 0) return [];
6638
- const sorted = [...new Set(pages)].sort((a, b) => a - b);
6639
- const ranges = [];
6640
- let start = sorted[0];
6641
- let previous = sorted[0];
6642
- for (let i = 1; i < sorted.length; i += 1) {
6643
- const current = sorted[i];
6644
- if (current === previous + 1) {
6645
- previous = current;
6646
- continue;
6647
- }
6648
- ranges.push({ startPage: start, endPage: previous });
6649
- start = current;
6650
- previous = current;
6651
- }
6652
- ranges.push({ startPage: start, endPage: previous });
6653
- return ranges;
6654
- }
6655
- function buildPlanFromPageAssignments(pageAssignments, pageCount, formInventory) {
6656
- const extractorPages = /* @__PURE__ */ new Map();
6657
- for (const assignment of pageAssignments) {
6658
- const extractors = assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"];
6659
- for (const extractorName of extractors) {
6660
- extractorPages.set(extractorName, [...extractorPages.get(extractorName) ?? [], assignment.localPageNumber]);
6661
- }
6662
- }
6663
- const coveredPages = /* @__PURE__ */ new Set();
6664
- for (const pages of extractorPages.values()) {
6665
- for (const page of pages) coveredPages.add(page);
6666
- }
6667
- for (let page = 1; page <= pageCount; page += 1) {
6668
- if (!coveredPages.has(page)) {
6669
- extractorPages.set("sections", [...extractorPages.get("sections") ?? [], page]);
6670
- }
6671
- }
6672
- const contextualExtractors = /* @__PURE__ */ new Set(["conditions", "covered_reasons", "definitions", "exclusions", "endorsements"]);
6673
- const contextualForms = (formInventory?.forms ?? []).filter(
6674
- (form) => form.pageStart != null && (form.pageEnd ?? form.pageStart) != null
6675
- );
6676
- const expandPagesToFormRanges = (extractorName, pages) => {
6677
- if (!contextualExtractors.has(extractorName)) return pages;
6678
- const expanded = new Set(pages);
6679
- for (const page of pages) {
6680
- for (const form of contextualForms) {
6681
- const pageStart = form.pageStart;
6682
- const pageEnd = form.pageEnd ?? form.pageStart;
6683
- const formType = form.formType;
6684
- const supportsContextualExpansion = extractorName === "endorsements" ? formType === "endorsement" : formType === "coverage" || formType === "endorsement";
6685
- if (!supportsContextualExpansion) continue;
6686
- if (page < pageStart || page > pageEnd) continue;
6687
- for (let current = pageStart; current <= pageEnd; current += 1) {
6688
- expanded.add(current);
6689
- }
6690
- }
6691
- }
6692
- return [...expanded].sort((a, b) => a - b);
6693
- };
6694
- const tasks = [...extractorPages.entries()].flatMap(
6695
- ([extractorName, pages]) => groupContiguousPages(expandPagesToFormRanges(extractorName, pages)).map(({ startPage, endPage }) => ({
6696
- extractorName,
6697
- startPage,
6698
- endPage,
6699
- description: `Page-mapped ${extractorName} extraction for pages ${startPage}-${endPage}`
6700
- }))
6701
- ).sort((a, b) => a.startPage - b.startPage || a.extractorName.localeCompare(b.extractorName));
6702
- return {
6703
- tasks,
6704
- pageMap: [...extractorPages.entries()].map(([section, pages]) => ({
6705
- section,
6706
- pages: `pages ${[...new Set(pages)].sort((a, b) => a - b).join(", ")}`
6707
- }))
6708
- };
6709
- }
6710
7067
  async function extract(pdfInput, documentId, options) {
6711
7068
  const id = documentId ?? `doc-${Date.now()}`;
6712
7069
  const memory = /* @__PURE__ */ new Map();
@@ -6717,7 +7074,8 @@ function createExtractor(config) {
6717
7074
  const pipelineCtx = createPipelineContext({
6718
7075
  id,
6719
7076
  onSave: onCheckpointSave,
6720
- resumeFrom: options?.resumeFrom
7077
+ resumeFrom: options?.resumeFrom,
7078
+ phaseOrder: ["classify", "form_inventory", "page_map", "plan", "extract", "resolve_referential", "review", "assemble"]
6721
7079
  });
6722
7080
  const resumed = pipelineCtx.getCheckpoint()?.state;
6723
7081
  if (resumed?.memory) {
@@ -6885,40 +7243,18 @@ function createExtractor(config) {
6885
7243
  const extractorResults = await Promise.all(
6886
7244
  tasks.map(
6887
7245
  (task) => limit(async () => {
6888
- const ext = getExtractor(task.extractorName) ?? (task.extractorName === "definitions" || task.extractorName === "covered_reasons" ? getExtractor("sections") : void 0);
6889
- if (!ext) {
6890
- await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
6891
- return null;
6892
- }
6893
7246
  onProgress?.(`Extracting ${task.extractorName} (pages ${task.startPage}-${task.endPage})...`);
6894
- try {
6895
- const result = await runExtractor({
6896
- name: task.extractorName,
6897
- prompt: ext.buildPrompt(),
6898
- schema: ext.schema,
6899
- pdfInput,
6900
- startPage: task.startPage,
6901
- endPage: task.endPage,
6902
- generateObject,
6903
- convertPdfToImages,
6904
- maxTokens: ext.maxTokens ?? 4096,
6905
- providerOptions
6906
- });
6907
- trackUsage(result.usage);
6908
- return result;
6909
- } catch (error) {
6910
- await log?.(`Extractor ${task.extractorName} failed: ${error}`);
6911
- return null;
6912
- }
7247
+ return runFocusedExtractorTask(task, pdfInput, memory);
6913
7248
  })
6914
7249
  )
6915
7250
  );
6916
- for (const result of extractorResults) {
7251
+ for (const result of extractorResults.flatMap((item) => Array.isArray(item) ? item : item ? [item] : [])) {
6917
7252
  if (result) {
6918
7253
  mergeMemoryResult(result.name, result.data, memory);
6919
7254
  }
6920
7255
  }
6921
- {
7256
+ const planIncludesSupplementary = tasks.some((task) => task.extractorName === "supplementary");
7257
+ if (!planIncludesSupplementary && hasSupplementaryExtractionSignal(pageAssignments, formInventory, memory)) {
6922
7258
  onProgress?.("Extracting supplementary retrieval facts...");
6923
7259
  try {
6924
7260
  const alreadyExtractedSummary = buildAlreadyExtractedSummary(memory);
@@ -6992,7 +7328,7 @@ function createExtractor(config) {
6992
7328
  const reviewResponse = await safeGenerateObject(
6993
7329
  generateObject,
6994
7330
  {
6995
- prompt: buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary),
7331
+ prompt: buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary, extractorCatalog),
6996
7332
  schema: ReviewResultSchema,
6997
7333
  maxTokens: 1536,
6998
7334
  providerOptions: await buildPdfProviderOptions(pdfInput, providerOptions)
@@ -7016,31 +7352,11 @@ function createExtractor(config) {
7016
7352
  const followUpResults = await Promise.all(
7017
7353
  reviewResponse.object.additionalTasks.map(
7018
7354
  (task) => limit(async () => {
7019
- const ext = getExtractor(task.extractorName) ?? (task.extractorName === "definitions" || task.extractorName === "covered_reasons" ? getExtractor("sections") : void 0);
7020
- if (!ext) return null;
7021
- try {
7022
- const result = await runExtractor({
7023
- name: task.extractorName,
7024
- prompt: ext.buildPrompt(),
7025
- schema: ext.schema,
7026
- pdfInput,
7027
- startPage: task.startPage,
7028
- endPage: task.endPage,
7029
- generateObject,
7030
- convertPdfToImages,
7031
- maxTokens: ext.maxTokens ?? 4096,
7032
- providerOptions
7033
- });
7034
- trackUsage(result.usage);
7035
- return result;
7036
- } catch (error) {
7037
- await log?.(`Follow-up extractor ${task.extractorName} failed: ${error}`);
7038
- return null;
7039
- }
7355
+ return runFocusedExtractorTask(task, pdfInput, memory);
7040
7356
  })
7041
7357
  )
7042
7358
  );
7043
- for (const result of followUpResults) {
7359
+ for (const result of followUpResults.flatMap((item) => Array.isArray(item) ? item : item ? [item] : [])) {
7044
7360
  if (result) {
7045
7361
  mergeMemoryResult(result.name, result.data, memory);
7046
7362
  }
@@ -8079,6 +8395,70 @@ function reviewBatchEmail(text, batchFields) {
8079
8395
  };
8080
8396
  }
8081
8397
 
8398
+ // src/application/workflow.ts
8399
+ var MAX_DOCUMENT_SEARCH_FIELDS = 5;
8400
+ var LOW_VALUE_FIELD_RATIO_LIMIT = 0.6;
8401
+ function planApplicationWorkflow(input) {
8402
+ const unfilledFields = input.fields.filter(isUnfilled);
8403
+ const documentSearchFields = planDocumentSearchFields(
8404
+ unfilledFields,
8405
+ input.hasDocumentStore && input.hasMemoryStore
8406
+ );
8407
+ return {
8408
+ runBackfill: input.hasBackfillProvider && unfilledFields.length > 0,
8409
+ runContextAutoFill: input.orgContextCount > 0 && unfilledFields.length > 0,
8410
+ documentSearchFields,
8411
+ runBatching: unfilledFields.length > 0,
8412
+ unfilledFields
8413
+ };
8414
+ }
8415
+ function planReplyActions(input) {
8416
+ const hasCurrentFields = input.currentBatchFields.length > 0;
8417
+ const nextBatchNeedsAnswers = (input.nextBatchFields ?? []).some(isUnfilled);
8418
+ const hasLookupRequests = (input.intent.lookupRequests?.length ?? 0) > 0;
8419
+ return {
8420
+ parseAnswers: input.intent.hasAnswers && hasCurrentFields,
8421
+ runLookup: hasLookupRequests && input.hasDocumentStore,
8422
+ answerQuestion: Boolean(input.intent.questionText) && (input.intent.primaryIntent === "question" || input.intent.primaryIntent === "mixed"),
8423
+ advanceBatch: hasCurrentFields && input.currentBatchFields.every((field) => !isUnfilled(field)),
8424
+ generateNextEmail: nextBatchNeedsAnswers
8425
+ };
8426
+ }
8427
+ function planDocumentSearchFields(unfilledFields, hasStores) {
8428
+ if (!hasStores || unfilledFields.length === 0) return [];
8429
+ const searchableFields = unfilledFields.filter(isHighValueLookupField);
8430
+ if (searchableFields.length === 0) return [];
8431
+ const lowValueRatio = 1 - searchableFields.length / unfilledFields.length;
8432
+ if (unfilledFields.length > MAX_DOCUMENT_SEARCH_FIELDS && lowValueRatio > LOW_VALUE_FIELD_RATIO_LIMIT) {
8433
+ return [];
8434
+ }
8435
+ return searchableFields.slice(0, MAX_DOCUMENT_SEARCH_FIELDS);
8436
+ }
8437
+ function isUnfilled(field) {
8438
+ return field.value === void 0 || field.value.trim() === "";
8439
+ }
8440
+ function isHighValueLookupField(field) {
8441
+ const text = `${field.section} ${field.label}`.toLowerCase();
8442
+ if (field.required) return true;
8443
+ return [
8444
+ "carrier",
8445
+ "policy",
8446
+ "premium",
8447
+ "limit",
8448
+ "deductible",
8449
+ "insured",
8450
+ "address",
8451
+ "revenue",
8452
+ "payroll",
8453
+ "effective",
8454
+ "expiration",
8455
+ "coverage",
8456
+ "class code",
8457
+ "fein",
8458
+ "entity"
8459
+ ].some((term) => text.includes(term));
8460
+ }
8461
+
8082
8462
  // src/application/coordinator.ts
8083
8463
  function createApplicationPipeline(config) {
8084
8464
  const {
@@ -8177,27 +8557,37 @@ function createApplicationPipeline(config) {
8177
8557
  state.updatedAt = Date.now();
8178
8558
  await applicationStore?.save(state);
8179
8559
  onProgress?.(`Auto-filling ${fields.length} fields...`);
8180
- const fillTasks = [];
8181
- if (backfillProvider) {
8182
- fillTasks.push(
8183
- (async () => {
8184
- try {
8185
- const priorAnswers = await backfillFromPriorAnswers(fields, backfillProvider);
8186
- for (const pa of priorAnswers) {
8187
- const field = state.fields.find((f) => f.id === pa.fieldId);
8188
- if (field && !field.value && pa.relevance > 0.8) {
8189
- field.value = pa.value;
8190
- field.source = `backfill: ${pa.source}`;
8191
- field.confidence = "high";
8192
- }
8193
- }
8194
- } catch (e) {
8195
- await log?.(`Backfill failed: ${e}`);
8560
+ let workflowPlan = planApplicationWorkflow({
8561
+ fields: state.fields,
8562
+ hasBackfillProvider: Boolean(backfillProvider),
8563
+ orgContextCount: orgContext.length,
8564
+ hasDocumentStore: Boolean(documentStore),
8565
+ hasMemoryStore: Boolean(memoryStore)
8566
+ });
8567
+ if (workflowPlan.runBackfill && backfillProvider) {
8568
+ try {
8569
+ const priorAnswers = await backfillFromPriorAnswers(state.fields, backfillProvider);
8570
+ for (const pa of priorAnswers) {
8571
+ const field = state.fields.find((f) => f.id === pa.fieldId);
8572
+ if (field && !field.value && pa.relevance > 0.8) {
8573
+ field.value = pa.value;
8574
+ field.source = `backfill: ${pa.source}`;
8575
+ field.confidence = "high";
8196
8576
  }
8197
- })()
8198
- );
8577
+ }
8578
+ } catch (e) {
8579
+ await log?.(`Backfill failed: ${e}`);
8580
+ }
8199
8581
  }
8200
- if (orgContext.length > 0) {
8582
+ workflowPlan = planApplicationWorkflow({
8583
+ fields: state.fields,
8584
+ hasBackfillProvider: false,
8585
+ orgContextCount: orgContext.length,
8586
+ hasDocumentStore: Boolean(documentStore),
8587
+ hasMemoryStore: Boolean(memoryStore)
8588
+ });
8589
+ const fillTasks = [];
8590
+ if (workflowPlan.runContextAutoFill) {
8201
8591
  fillTasks.push(
8202
8592
  limit(async () => {
8203
8593
  const unfilledFields2 = state.fields.filter((f) => !f.value);
@@ -8224,18 +8614,13 @@ function createApplicationPipeline(config) {
8224
8614
  })
8225
8615
  );
8226
8616
  }
8227
- if (documentStore && memoryStore) {
8617
+ if (workflowPlan.documentSearchFields.length > 0 && memoryStore) {
8228
8618
  fillTasks.push(
8229
8619
  (async () => {
8230
8620
  try {
8231
- const unfilledFields2 = state.fields.filter((f) => !f.value);
8232
- const searchPromises = unfilledFields2.slice(0, 10).map(
8621
+ const searchPromises = workflowPlan.documentSearchFields.map(
8233
8622
  (f) => limit(async () => {
8234
- const chunks = await memoryStore.search(f.label, { limit: 3 });
8235
- for (const chunk of chunks) {
8236
- if (!state.fields.find((sf) => sf.id === f.id)?.value) {
8237
- }
8238
- }
8623
+ await memoryStore.search(f.label, { limit: 3 });
8239
8624
  })
8240
8625
  );
8241
8626
  await Promise.all(searchPromises);
@@ -8248,8 +8633,15 @@ function createApplicationPipeline(config) {
8248
8633
  await Promise.all(fillTasks);
8249
8634
  state.updatedAt = Date.now();
8250
8635
  await applicationStore?.save(state);
8251
- const unfilledFields = state.fields.filter((f) => !f.value);
8252
- if (unfilledFields.length > 0) {
8636
+ workflowPlan = planApplicationWorkflow({
8637
+ fields: state.fields,
8638
+ hasBackfillProvider: false,
8639
+ orgContextCount: 0,
8640
+ hasDocumentStore: false,
8641
+ hasMemoryStore: false
8642
+ });
8643
+ const unfilledFields = workflowPlan.unfilledFields;
8644
+ if (workflowPlan.runBatching) {
8253
8645
  onProgress?.(`Batching ${unfilledFields.length} remaining questions...`);
8254
8646
  state.status = "batching";
8255
8647
  try {
@@ -8316,7 +8708,12 @@ function createApplicationPipeline(config) {
8316
8708
  }
8317
8709
  let fieldsFilled = 0;
8318
8710
  let responseText;
8319
- if (intent.hasAnswers) {
8711
+ let replyPlan = planReplyActions({
8712
+ intent,
8713
+ currentBatchFields,
8714
+ hasDocumentStore: Boolean(documentStore)
8715
+ });
8716
+ if (replyPlan.parseAnswers) {
8320
8717
  onProgress?.("Parsing answers...");
8321
8718
  try {
8322
8719
  const { result: parseResult, usage: parseUsage } = await parseAnswers(
@@ -8339,7 +8736,7 @@ function createApplicationPipeline(config) {
8339
8736
  await log?.(`Answer parsing failed: ${error instanceof Error ? error.message : String(error)}`);
8340
8737
  }
8341
8738
  }
8342
- if (intent.lookupRequests?.length) {
8739
+ if (replyPlan.runLookup && intent.lookupRequests?.length) {
8343
8740
  onProgress?.("Processing lookup requests...");
8344
8741
  let availableData = "";
8345
8742
  if (documentStore) {
@@ -8380,64 +8777,78 @@ function createApplicationPipeline(config) {
8380
8777
  }
8381
8778
  }
8382
8779
  }
8383
- if (intent.primaryIntent === "question" || intent.primaryIntent === "mixed") {
8384
- if (intent.questionText) {
8385
- try {
8386
- const { text, usage } = await generateText({
8387
- prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
8780
+ if (replyPlan.answerQuestion && intent.questionText) {
8781
+ try {
8782
+ const { text, usage } = await generateText({
8783
+ prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
8388
8784
 
8389
8785
  Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with the answer when you're ready and I'll fill it in."`,
8390
- maxTokens: 512,
8391
- providerOptions
8392
- });
8393
- trackUsage(usage);
8394
- responseText = text;
8395
- } catch (error) {
8396
- await log?.(`Question response generation failed: ${error instanceof Error ? error.message : String(error)}`);
8397
- responseText = `I wasn't able to generate an explanation for your question. Could you rephrase it, or just provide the answer directly?`;
8398
- }
8786
+ maxTokens: 512,
8787
+ providerOptions
8788
+ });
8789
+ trackUsage(usage);
8790
+ responseText = text;
8791
+ } catch (error) {
8792
+ await log?.(`Question response generation failed: ${error instanceof Error ? error.message : String(error)}`);
8793
+ responseText = `I wasn't able to generate an explanation for your question. Could you rephrase it, or just provide the answer directly?`;
8399
8794
  }
8400
8795
  }
8401
8796
  const currentBatchComplete = currentBatchFieldIds.every(
8402
8797
  (fid) => state.fields.find((f) => f.id === fid)?.value
8403
8798
  );
8404
- if (currentBatchComplete && state.batches) {
8405
- if (state.currentBatchIndex < state.batches.length - 1) {
8406
- state.currentBatchIndex++;
8407
- const nextBatchFieldIds = state.batches[state.currentBatchIndex];
8408
- const nextBatchFields = state.fields.filter(
8409
- (f) => nextBatchFieldIds.includes(f.id)
8410
- );
8799
+ let nextBatchIndex;
8800
+ let nextBatchFields;
8801
+ if (state.batches) {
8802
+ for (let index = state.currentBatchIndex + 1; index < state.batches.length; index++) {
8803
+ const candidateFields = state.fields.filter((f) => state.batches[index].includes(f.id));
8804
+ if (candidateFields.some((f) => !f.value)) {
8805
+ nextBatchIndex = index;
8806
+ nextBatchFields = candidateFields;
8807
+ break;
8808
+ }
8809
+ }
8810
+ }
8811
+ replyPlan = planReplyActions({
8812
+ intent,
8813
+ currentBatchFields,
8814
+ nextBatchFields,
8815
+ hasDocumentStore: Boolean(documentStore)
8816
+ });
8817
+ if (currentBatchComplete && replyPlan.advanceBatch && state.batches) {
8818
+ if (nextBatchIndex !== void 0 && nextBatchFields) {
8819
+ state.currentBatchIndex = nextBatchIndex;
8411
8820
  const filledCount = state.fields.filter((f) => f.value).length;
8412
- try {
8413
- const { text: emailText, usage: emailUsage } = await generateBatchEmail(
8414
- nextBatchFields,
8415
- state.currentBatchIndex,
8416
- state.batches.length,
8417
- {
8418
- appTitle: state.title,
8419
- totalFieldCount: state.fields.length,
8420
- filledFieldCount: filledCount,
8421
- companyName: context?.companyName
8422
- },
8423
- generateText,
8424
- providerOptions
8425
- );
8426
- trackUsage(emailUsage);
8427
- const emailReview = reviewBatchEmail(emailText, nextBatchFields);
8428
- state.qualityReport = {
8429
- ...buildApplicationQualityReport(state),
8430
- emailReview
8431
- };
8432
- if (!responseText) {
8433
- responseText = emailText;
8434
- } else {
8435
- responseText += `
8821
+ if (replyPlan.generateNextEmail) {
8822
+ try {
8823
+ const { text: emailText, usage: emailUsage } = await generateBatchEmail(
8824
+ nextBatchFields,
8825
+ state.currentBatchIndex,
8826
+ state.batches.length,
8827
+ {
8828
+ appTitle: state.title,
8829
+ totalFieldCount: state.fields.length,
8830
+ filledFieldCount: filledCount,
8831
+ companyName: context?.companyName
8832
+ },
8833
+ generateText,
8834
+ providerOptions
8835
+ );
8836
+ trackUsage(emailUsage);
8837
+ const emailReview = reviewBatchEmail(emailText, nextBatchFields);
8838
+ state.qualityReport = {
8839
+ ...buildApplicationQualityReport(state),
8840
+ emailReview
8841
+ };
8842
+ if (!responseText) {
8843
+ responseText = emailText;
8844
+ } else {
8845
+ responseText += `
8436
8846
 
8437
8847
  ${emailText}`;
8848
+ }
8849
+ } catch (error) {
8850
+ await log?.(`Batch email generation failed: ${error instanceof Error ? error.message : String(error)}`);
8438
8851
  }
8439
- } catch (error) {
8440
- await log?.(`Batch email generation failed: ${error instanceof Error ? error.message : String(error)}`);
8441
8852
  }
8442
8853
  } else {
8443
8854
  state.status = "confirming";
@@ -9251,6 +9662,42 @@ ${item.text}`).join("\n\n");
9251
9662
  return { evidence, contextSummary };
9252
9663
  }
9253
9664
 
9665
+ // src/query/workflow.ts
9666
+ function shouldRetrieveForClassification(classification) {
9667
+ return classification.requiresDocumentLookup || classification.requiresChunkSearch;
9668
+ }
9669
+ function buildInitialQueryWorkflowPlan(params) {
9670
+ const { classification, attachmentEvidence } = params;
9671
+ const actions = [];
9672
+ const shouldRetrieve = shouldRetrieveForClassification(classification);
9673
+ if (shouldRetrieve) {
9674
+ actions.push({
9675
+ type: "retrieve",
9676
+ subQuestions: classification.subQuestions,
9677
+ reason: "classification requested document or chunk lookup"
9678
+ });
9679
+ }
9680
+ actions.push({
9681
+ type: "reason",
9682
+ subQuestions: classification.subQuestions,
9683
+ reason: shouldRetrieve ? "answer with retrieved evidence and any attachment evidence" : attachmentEvidence.length > 0 ? "answer with attachment evidence only" : "answer without document retrieval"
9684
+ });
9685
+ actions.push(
9686
+ {
9687
+ type: "verify",
9688
+ reason: "check grounding and request targeted retries when needed"
9689
+ },
9690
+ {
9691
+ type: "respond",
9692
+ reason: "compose final response"
9693
+ }
9694
+ );
9695
+ return { actions, shouldRetrieve };
9696
+ }
9697
+ function getWorkflowAction(plan, type) {
9698
+ return plan.actions.find((action) => action.type === type);
9699
+ }
9700
+
9254
9701
  // src/query/coordinator.ts
9255
9702
  function createQueryAgent(config) {
9256
9703
  const {
@@ -9295,29 +9742,37 @@ function createQueryAgent(config) {
9295
9742
  onProgress?.("Classifying query...");
9296
9743
  const classification = await classify(question, conversationId, attachmentContext);
9297
9744
  await pipelineCtx.save("classify", { classification, attachmentEvidence });
9298
- onProgress?.(`Retrieving evidence for ${classification.subQuestions.length} sub-question(s)...`);
9299
9745
  const retrieverConfig = {
9300
9746
  documentStore,
9301
9747
  memoryStore,
9302
9748
  retrievalLimit,
9303
9749
  log
9304
9750
  };
9305
- const retrievalResults = await Promise.all(
9306
- classification.subQuestions.map(
9307
- (sq) => limit(() => retrieve(sq, conversationId, retrieverConfig))
9308
- )
9309
- );
9751
+ const workflowPlan = buildInitialQueryWorkflowPlan({ classification, attachmentEvidence });
9752
+ const retrieveAction = getWorkflowAction(workflowPlan, "retrieve");
9753
+ const reasonAction = getWorkflowAction(workflowPlan, "reason");
9754
+ await pipelineCtx.save("workflow", { classification, attachmentEvidence, workflowPlan });
9755
+ const retrievalResults = retrieveAction ? await (async () => {
9756
+ onProgress?.(`Retrieving evidence for ${retrieveAction.subQuestions.length} sub-question(s)...`);
9757
+ return Promise.all(
9758
+ retrieveAction.subQuestions.map(
9759
+ (sq) => limit(() => retrieve(sq, conversationId, retrieverConfig))
9760
+ )
9761
+ );
9762
+ })() : [];
9310
9763
  const allEvidence = [...attachmentEvidence, ...retrievalResults.flatMap((r) => r.evidence)];
9311
9764
  await pipelineCtx.save("retrieve", { classification, attachmentEvidence, evidence: allEvidence });
9312
9765
  onProgress?.("Reasoning over evidence...");
9313
9766
  const reasonerConfig = { generateObject, providerOptions };
9767
+ const subQuestionsToReason = reasonAction?.subQuestions ?? classification.subQuestions;
9314
9768
  const reasonResults = await Promise.allSettled(
9315
- classification.subQuestions.map(
9316
- (sq, i) => limit(async () => {
9769
+ subQuestionsToReason.map(
9770
+ (sq) => limit(async () => {
9771
+ const retrievedEvidence = retrievalResults.find((r) => r.subQuestion === sq.question)?.evidence ?? [];
9317
9772
  const { subAnswer, usage } = await reason(
9318
9773
  sq.question,
9319
9774
  sq.intent,
9320
- [...attachmentEvidence, ...retrievalResults[i].evidence],
9775
+ [...attachmentEvidence, ...retrievedEvidence],
9321
9776
  reasonerConfig
9322
9777
  );
9323
9778
  trackUsage(usage);
@@ -9331,9 +9786,9 @@ function createQueryAgent(config) {
9331
9786
  if (result.status === "fulfilled") {
9332
9787
  subAnswers.push(result.value);
9333
9788
  } else {
9334
- await log?.(`Reasoner failed for sub-question "${classification.subQuestions[i].question}": ${result.reason}`);
9789
+ await log?.(`Reasoner failed for sub-question "${subQuestionsToReason[i].question}": ${result.reason}`);
9335
9790
  subAnswers.push({
9336
- subQuestion: classification.subQuestions[i].question,
9791
+ subQuestion: subQuestionsToReason[i].question,
9337
9792
  answer: "Unable to answer this part of the question due to a processing error.",
9338
9793
  citations: [],
9339
9794
  confidence: 0,