@claritylabs/cl-sdk 0.16.2 → 0.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -79,6 +79,7 @@ __export(index_exports, {
79
79
  CoverageSchema: () => CoverageSchema,
80
80
  CoverageTriggerSchema: () => CoverageTriggerSchema,
81
81
  CoverageValueTypeSchema: () => CoverageValueTypeSchema,
82
+ CoveredReasonSchema: () => CoveredReasonSchema,
82
83
  CrimeDeclarationsSchema: () => CrimeDeclarationsSchema,
83
84
  CyberDeclarationsSchema: () => CyberDeclarationsSchema,
84
85
  DEDUCTIBLE_TYPES: () => DEDUCTIBLE_TYPES,
@@ -91,6 +92,7 @@ __export(index_exports, {
91
92
  DeductibleScheduleSchema: () => DeductibleScheduleSchema,
92
93
  DeductibleTypeSchema: () => DeductibleTypeSchema,
93
94
  DefenseCostTreatmentSchema: () => DefenseCostTreatmentSchema,
95
+ DefinitionSchema: () => DefinitionSchema,
94
96
  DocumentTypeSchema: () => DocumentTypeSchema,
95
97
  DriverRecordSchema: () => DriverRecordSchema,
96
98
  DwellingDetailsSchema: () => DwellingDetailsSchema,
@@ -436,7 +438,14 @@ function createPipelineContext(opts) {
436
438
  let latest = opts.resumeFrom;
437
439
  const completedPhases = /* @__PURE__ */ new Set();
438
440
  if (opts.resumeFrom) {
439
- completedPhases.add(opts.resumeFrom.phase);
441
+ const phaseIndex = opts.phaseOrder?.indexOf(opts.resumeFrom.phase) ?? -1;
442
+ if (phaseIndex >= 0 && opts.phaseOrder) {
443
+ for (const phase of opts.phaseOrder.slice(0, phaseIndex + 1)) {
444
+ completedPhases.add(phase);
445
+ }
446
+ } else {
447
+ completedPhases.add(opts.resumeFrom.phase);
448
+ }
440
449
  }
441
450
  return {
442
451
  id: opts.id,
@@ -1493,6 +1502,29 @@ var AuxiliaryFactSchema = import_zod16.z.object({
1493
1502
  subject: import_zod16.z.string().optional(),
1494
1503
  context: import_zod16.z.string().optional()
1495
1504
  });
1505
+ var DefinitionSchema = import_zod16.z.object({
1506
+ term: import_zod16.z.string(),
1507
+ definition: import_zod16.z.string(),
1508
+ pageNumber: import_zod16.z.number().optional(),
1509
+ formNumber: import_zod16.z.string().optional(),
1510
+ formTitle: import_zod16.z.string().optional(),
1511
+ sectionRef: import_zod16.z.string().optional(),
1512
+ originalContent: import_zod16.z.string().optional()
1513
+ });
1514
+ var CoveredReasonSchema = import_zod16.z.object({
1515
+ coverageName: import_zod16.z.string(),
1516
+ reasonNumber: import_zod16.z.string().optional(),
1517
+ title: import_zod16.z.string().optional(),
1518
+ content: import_zod16.z.string(),
1519
+ conditions: import_zod16.z.array(import_zod16.z.string()).optional(),
1520
+ exceptions: import_zod16.z.array(import_zod16.z.string()).optional(),
1521
+ appliesTo: import_zod16.z.array(import_zod16.z.string()).optional(),
1522
+ pageNumber: import_zod16.z.number().optional(),
1523
+ formNumber: import_zod16.z.string().optional(),
1524
+ formTitle: import_zod16.z.string().optional(),
1525
+ sectionRef: import_zod16.z.string().optional(),
1526
+ originalContent: import_zod16.z.string().optional()
1527
+ });
1496
1528
  var BaseDocumentFields = {
1497
1529
  id: import_zod16.z.string(),
1498
1530
  carrier: import_zod16.z.string(),
@@ -1503,6 +1535,8 @@ var BaseDocumentFields = {
1503
1535
  policyTypes: import_zod16.z.array(import_zod16.z.string()).optional(),
1504
1536
  coverages: import_zod16.z.array(CoverageSchema),
1505
1537
  sections: import_zod16.z.array(SectionSchema).optional(),
1538
+ definitions: import_zod16.z.array(DefinitionSchema).optional(),
1539
+ coveredReasons: import_zod16.z.array(CoveredReasonSchema).optional(),
1506
1540
  // Enriched fields (v1.2+)
1507
1541
  carrierLegalName: import_zod16.z.string().optional(),
1508
1542
  carrierNaicNumber: import_zod16.z.string().optional(),
@@ -1942,33 +1976,102 @@ async function runExtractor(params) {
1942
1976
  };
1943
1977
  }
1944
1978
 
1979
+ // src/extraction/memory.ts
1980
+ function isMemoryRecord(value) {
1981
+ return typeof value === "object" && value !== null && !Array.isArray(value);
1982
+ }
1983
+ function readMemoryRecord(memory, key) {
1984
+ const value = memory.get(key);
1985
+ return isMemoryRecord(value) ? value : void 0;
1986
+ }
1987
+ function readRecordValue(record, key) {
1988
+ return record?.[key];
1989
+ }
1990
+ function readRecordArray(record, key) {
1991
+ const value = readRecordValue(record, key);
1992
+ return Array.isArray(value) ? value : void 0;
1993
+ }
1994
+ function getCarrierInfo(memory) {
1995
+ return readMemoryRecord(memory, "carrier_info");
1996
+ }
1997
+ function getNamedInsured(memory) {
1998
+ return readMemoryRecord(memory, "named_insured");
1999
+ }
2000
+ function getCoverageLimits(memory) {
2001
+ return readMemoryRecord(memory, "coverage_limits");
2002
+ }
2003
+ function getCoverageLimitCoverages(memory) {
2004
+ return readRecordArray(getCoverageLimits(memory), "coverages") ?? [];
2005
+ }
2006
+ function getSectionsPayload(memory) {
2007
+ return readMemoryRecord(memory, "sections");
2008
+ }
2009
+ function getSections(memory) {
2010
+ return readRecordArray(getSectionsPayload(memory), "sections");
2011
+ }
2012
+ function getDefinitionsPayload(memory) {
2013
+ return readMemoryRecord(memory, "definitions");
2014
+ }
2015
+ function getDefinitions(memory) {
2016
+ return readRecordArray(getDefinitionsPayload(memory), "definitions");
2017
+ }
2018
+ function getCoveredReasonsPayload(memory) {
2019
+ return readMemoryRecord(memory, "covered_reasons");
2020
+ }
2021
+ function getCoveredReasons(memory) {
2022
+ const payload = getCoveredReasonsPayload(memory);
2023
+ return readRecordArray(payload, "coveredReasons") ?? readRecordArray(payload, "covered_reasons");
2024
+ }
2025
+
1945
2026
  // src/extraction/promote.ts
1946
2027
  function getDeclarationFields(doc) {
1947
2028
  const decl = doc.declarations;
1948
2029
  return Array.isArray(decl?.fields) ? decl.fields : [];
1949
2030
  }
1950
2031
  function fieldMatches(fieldName, patterns) {
1951
- const lower = fieldName.toLowerCase().replace(/[\s_-]/g, "");
1952
- return patterns.some((p) => lower === p.toLowerCase().replace(/[\s_-]/g, ""));
2032
+ const lower = normalizeFieldName(fieldName);
2033
+ return patterns.some((p) => lower === normalizeFieldName(p));
2034
+ }
2035
+ function normalizeFieldName(fieldName) {
2036
+ return fieldName.toLowerCase().replace(/[^a-z0-9]/g, "");
1953
2037
  }
1954
- function findFieldValue(fields, patterns) {
1955
- const match = fields.find((f) => fieldMatches(f.field, patterns));
2038
+ function findFieldValue(fields, patterns, reject) {
2039
+ const match = fields.find((f) => fieldMatches(f.field, patterns) && !reject?.(f));
1956
2040
  return match?.value;
1957
2041
  }
1958
- function promoteCarrierFields(doc) {
1959
- const raw = doc;
1960
- if (!raw.carrierNaicNumber && raw.naicNumber) {
1961
- raw.carrierNaicNumber = raw.naicNumber;
1962
- }
1963
- if (!raw.carrierAmBestRating && raw.amBestRating) {
1964
- raw.carrierAmBestRating = raw.amBestRating;
2042
+ function stringValue(value) {
2043
+ return typeof value === "string" && value.trim() ? value : void 0;
2044
+ }
2045
+ function findRawString(raw, keys) {
2046
+ for (const key of keys) {
2047
+ const value = stringValue(raw[key]);
2048
+ if (value) return value;
1965
2049
  }
1966
- if (!raw.carrierAdmittedStatus && raw.admittedStatus) {
1967
- raw.carrierAdmittedStatus = raw.admittedStatus;
2050
+ return void 0;
2051
+ }
2052
+ function promoteRawFields(raw, mappings) {
2053
+ for (const { from, to } of mappings) {
2054
+ if (!raw[to] && raw[from]) {
2055
+ raw[to] = raw[from];
2056
+ }
2057
+ delete raw[from];
1968
2058
  }
1969
- delete raw.naicNumber;
1970
- delete raw.amBestRating;
1971
- delete raw.admittedStatus;
2059
+ }
2060
+ function findRawOrDeclarationValue(raw, fields, lookup) {
2061
+ return (lookup.rawKey ? raw[lookup.rawKey] : void 0) || findFieldValue(fields, lookup.patterns, lookup.reject);
2062
+ }
2063
+ function promoteRawOrDeclarationString(raw, fields, targetKey, rawKeys, lookup) {
2064
+ if (raw[targetKey]) return;
2065
+ const value = findRawString(raw, rawKeys) ?? findFieldValue(fields, lookup.patterns, lookup.reject);
2066
+ if (value) raw[targetKey] = value;
2067
+ }
2068
+ function promoteCarrierFields(doc) {
2069
+ const raw = doc;
2070
+ promoteRawFields(raw, [
2071
+ { from: "naicNumber", to: "carrierNaicNumber" },
2072
+ { from: "amBestRating", to: "carrierAmBestRating" },
2073
+ { from: "admittedStatus", to: "carrierAdmittedStatus" }
2074
+ ]);
1972
2075
  if (!raw.insurer && raw.carrierLegalName) {
1973
2076
  raw.insurer = {
1974
2077
  legalName: raw.carrierLegalName,
@@ -2009,12 +2112,21 @@ var BROKER_ADDRESS_PATTERNS = ["brokerAddress", "agentAddress", "producerAddress
2009
2112
  function promoteBroker(doc) {
2010
2113
  const raw = doc;
2011
2114
  const fields = getDeclarationFields(doc);
2012
- const brokerAgency = raw.brokerAgency || findFieldValue(fields, BROKER_NAME_PATTERNS);
2013
- const brokerContact = raw.brokerContactName || findFieldValue(fields, BROKER_CONTACT_PATTERNS);
2014
- const brokerLicense = raw.brokerLicenseNumber || findFieldValue(fields, BROKER_LICENSE_PATTERNS);
2015
- const brokerPhone = findFieldValue(fields, BROKER_PHONE_PATTERNS);
2016
- const brokerEmail = findFieldValue(fields, BROKER_EMAIL_PATTERNS);
2017
- const brokerAddress = findFieldValue(fields, BROKER_ADDRESS_PATTERNS);
2115
+ const brokerAgency = findRawOrDeclarationValue(raw, fields, {
2116
+ rawKey: "brokerAgency",
2117
+ patterns: BROKER_NAME_PATTERNS
2118
+ });
2119
+ const brokerContact = findRawOrDeclarationValue(raw, fields, {
2120
+ rawKey: "brokerContactName",
2121
+ patterns: BROKER_CONTACT_PATTERNS
2122
+ });
2123
+ const brokerLicense = findRawOrDeclarationValue(raw, fields, {
2124
+ rawKey: "brokerLicenseNumber",
2125
+ patterns: BROKER_LICENSE_PATTERNS
2126
+ });
2127
+ const brokerPhone = findRawOrDeclarationValue(raw, fields, { patterns: BROKER_PHONE_PATTERNS });
2128
+ const brokerEmail = findRawOrDeclarationValue(raw, fields, { patterns: BROKER_EMAIL_PATTERNS });
2129
+ const brokerAddress = findRawOrDeclarationValue(raw, fields, { patterns: BROKER_ADDRESS_PATTERNS });
2018
2130
  if (brokerAgency) raw.brokerAgency = brokerAgency;
2019
2131
  if (brokerContact) raw.brokerContactName = brokerContact;
2020
2132
  if (brokerLicense) raw.brokerLicenseNumber = brokerLicense;
@@ -2270,25 +2382,171 @@ function synthesizeDeductibles(doc) {
2270
2382
  raw.deductibles = deductibles;
2271
2383
  }
2272
2384
  }
2273
- var PREMIUM_PATTERNS = ["premium", "totalPremium", "annualPremium", "policyPremium", "basePremium"];
2274
- var TOTAL_COST_PATTERNS = ["totalCost", "totalDue", "totalAmount", "totalPolicyPremium"];
2385
+ var PREMIUM_PATTERNS = [
2386
+ "premium",
2387
+ "premiumAmount",
2388
+ "premium amount",
2389
+ "totalPremium",
2390
+ "total premium",
2391
+ "totalPolicyPremium",
2392
+ "total policy premium",
2393
+ "annualPremium",
2394
+ "annual premium",
2395
+ "estimatedAnnualPremium",
2396
+ "estimated annual premium",
2397
+ "policyPremium",
2398
+ "policy premium",
2399
+ "basePremium",
2400
+ "base premium",
2401
+ "planCost",
2402
+ "plan cost",
2403
+ "policyCost",
2404
+ "policy cost",
2405
+ "premiumSubtotal",
2406
+ "premium subtotal",
2407
+ "subtotalPremium",
2408
+ "subtotal premium",
2409
+ "quotedPremium",
2410
+ "quoted premium"
2411
+ ];
2412
+ var TOTAL_COST_PATTERNS = [
2413
+ "totalCost",
2414
+ "total cost",
2415
+ "total",
2416
+ "totalDue",
2417
+ "total due",
2418
+ "amountPaid",
2419
+ "amount paid",
2420
+ "totalPaid",
2421
+ "total paid",
2422
+ "totalPrice",
2423
+ "total price",
2424
+ "totalTripCost",
2425
+ "total trip cost",
2426
+ "amountCharged",
2427
+ "amount charged",
2428
+ "amountDue",
2429
+ "amount due",
2430
+ "totalAmountDue",
2431
+ "total amount due",
2432
+ "totalAmount",
2433
+ "total amount",
2434
+ "grandTotal",
2435
+ "grand total",
2436
+ "totalPayable",
2437
+ "total payable",
2438
+ "totalCharges",
2439
+ "total charges",
2440
+ "totalPolicyCost",
2441
+ "total policy cost"
2442
+ ];
2443
+ var PREMIUM_RAW_KEYS = [
2444
+ "premium",
2445
+ "premiumAmount",
2446
+ "premium_amount",
2447
+ "totalPremium",
2448
+ "totalPolicyPremium",
2449
+ "annualPremium",
2450
+ "estimatedAnnualPremium",
2451
+ "policyPremium",
2452
+ "basePremium",
2453
+ "planCost",
2454
+ "policyCost",
2455
+ "premiumSubtotal",
2456
+ "subtotalPremium",
2457
+ "quotedPremium"
2458
+ ];
2459
+ var TOTAL_COST_RAW_KEYS = [
2460
+ "totalCost",
2461
+ "total_cost",
2462
+ "total",
2463
+ "totalDue",
2464
+ "amountPaid",
2465
+ "amount_paid",
2466
+ "totalPaid",
2467
+ "total_paid",
2468
+ "totalPrice",
2469
+ "totalTripCost",
2470
+ "amountCharged",
2471
+ "amountDue",
2472
+ "totalAmountDue",
2473
+ "totalAmount",
2474
+ "grandTotal",
2475
+ "totalPayable",
2476
+ "totalCharges",
2477
+ "totalPolicyCost"
2478
+ ];
2479
+ function isTaxOrFeeField(fieldName) {
2480
+ const normalized = normalizeFieldName(fieldName);
2481
+ return /tax|gst|hst|pst|qst|fee|surcharge|assessment|stamp|filing|inspection/.test(normalized);
2482
+ }
2483
+ function isTotalCostField(fieldName) {
2484
+ return fieldMatches(fieldName, TOTAL_COST_PATTERNS);
2485
+ }
2486
+ function taxFeeType(fieldName) {
2487
+ const normalized = normalizeFieldName(fieldName);
2488
+ if (normalized.includes("tax") || ["gst", "hst", "pst", "qst"].some((token) => normalized.includes(token))) return "tax";
2489
+ if (normalized.includes("surcharge")) return "surcharge";
2490
+ if (normalized.includes("assessment")) return "assessment";
2491
+ if (normalized.includes("fee") || normalized.includes("stamp") || normalized.includes("filing")) return "fee";
2492
+ return void 0;
2493
+ }
2494
+ function titleizeFieldName(fieldName) {
2495
+ const spaced = fieldName.replace(/([a-z0-9])([A-Z])/g, "$1 $2").replace(/[_-]+/g, " ").replace(/\s+/g, " ").trim();
2496
+ return spaced.replace(/\b\w/g, (letter) => letter.toUpperCase());
2497
+ }
2498
+ function taxFeeKey(item) {
2499
+ return [
2500
+ normalizeFieldName(item.name),
2501
+ normalizeFieldName(item.amount),
2502
+ item.type ?? ""
2503
+ ].join("|");
2504
+ }
2505
+ function taxFeeItemFromField(field) {
2506
+ const type = taxFeeType(field.field);
2507
+ return {
2508
+ name: titleizeFieldName(field.field),
2509
+ amount: absorbNegative(field.value),
2510
+ ...type ? { type } : {}
2511
+ };
2512
+ }
2275
2513
  function absorbNegative(value) {
2276
2514
  return value.replace(/^-\s*/, "").replace(/^\(\s*(.*?)\s*\)$/, "$1");
2277
2515
  }
2278
2516
  function promotePremium(doc) {
2279
2517
  const raw = doc;
2280
2518
  const fields = getDeclarationFields(doc);
2281
- if (!raw.premium) {
2282
- const premium = findFieldValue(fields, PREMIUM_PATTERNS);
2283
- if (premium) raw.premium = premium;
2284
- }
2285
- if (!raw.totalCost) {
2286
- const totalCost = findFieldValue(fields, TOTAL_COST_PATTERNS);
2287
- if (totalCost) raw.totalCost = totalCost;
2288
- }
2519
+ promoteRawOrDeclarationString(raw, fields, "premium", PREMIUM_RAW_KEYS, {
2520
+ patterns: PREMIUM_PATTERNS,
2521
+ reject: (field) => isTaxOrFeeField(field.field)
2522
+ });
2523
+ promoteRawOrDeclarationString(raw, fields, "totalCost", TOTAL_COST_RAW_KEYS, {
2524
+ patterns: TOTAL_COST_PATTERNS
2525
+ });
2289
2526
  if (typeof raw.premium === "string") raw.premium = absorbNegative(raw.premium);
2290
2527
  if (typeof raw.totalCost === "string") raw.totalCost = absorbNegative(raw.totalCost);
2291
2528
  }
2529
+ function synthesizeTaxesAndFees(doc) {
2530
+ const raw = doc;
2531
+ const fields = getDeclarationFields(doc);
2532
+ if (fields.length === 0) return;
2533
+ const existing = Array.isArray(raw.taxesAndFees) ? raw.taxesAndFees : [];
2534
+ const byKey = /* @__PURE__ */ new Map();
2535
+ for (const item of existing) {
2536
+ if (!item?.name || !item?.amount) continue;
2537
+ byKey.set(taxFeeKey(item), item);
2538
+ }
2539
+ for (const field of fields) {
2540
+ if (!field.value?.trim()) continue;
2541
+ if (!isTaxOrFeeField(field.field)) continue;
2542
+ if (isTotalCostField(field.field)) continue;
2543
+ const item = taxFeeItemFromField(field);
2544
+ byKey.set(taxFeeKey(item), item);
2545
+ }
2546
+ if (byKey.size > 0) {
2547
+ raw.taxesAndFees = [...byKey.values()];
2548
+ }
2549
+ }
2292
2550
  function promoteExtractedFields(doc) {
2293
2551
  promoteCarrierFields(doc);
2294
2552
  promoteBroker(doc);
@@ -2296,44 +2554,53 @@ function promoteExtractedFields(doc) {
2296
2554
  promoteLocations(doc);
2297
2555
  synthesizeLimits(doc);
2298
2556
  synthesizeDeductibles(doc);
2557
+ synthesizeTaxesAndFees(doc);
2299
2558
  promotePremium(doc);
2300
2559
  }
2301
2560
 
2302
2561
  // src/extraction/assembler.ts
2303
2562
  function assembleDocument(documentId, documentType, memory) {
2304
- const carrier = memory.get("carrier_info");
2305
- const insured = memory.get("named_insured");
2306
- const coverages = memory.get("coverage_limits");
2307
- const endorsements = memory.get("endorsements");
2308
- const exclusions = memory.get("exclusions");
2309
- const conditions = memory.get("conditions");
2310
- const premium = memory.get("premium_breakdown");
2311
- const declarations = memory.get("declarations");
2312
- const lossHistory = memory.get("loss_history");
2313
- const sections = memory.get("sections");
2314
- const supplementary = memory.get("supplementary");
2315
- const formInventory = memory.get("form_inventory");
2316
- const classify = memory.get("classify");
2563
+ const carrier = getCarrierInfo(memory);
2564
+ const insured = getNamedInsured(memory);
2565
+ const coverages = getCoverageLimits(memory);
2566
+ const endorsements = readMemoryRecord(memory, "endorsements");
2567
+ const exclusions = readMemoryRecord(memory, "exclusions");
2568
+ const conditions = readMemoryRecord(memory, "conditions");
2569
+ const premium = readMemoryRecord(memory, "premium_breakdown");
2570
+ const declarations = readMemoryRecord(memory, "declarations");
2571
+ const lossHistory = readMemoryRecord(memory, "loss_history");
2572
+ const supplementary = readMemoryRecord(memory, "supplementary");
2573
+ const formInventory = readMemoryRecord(memory, "form_inventory");
2574
+ const classify = readMemoryRecord(memory, "classify");
2575
+ const lossPayees = readRecordArray(insured, "lossPayees");
2576
+ const mortgageHolders = readRecordArray(insured, "mortgageHolders");
2317
2577
  const base = {
2318
2578
  id: documentId,
2319
- carrier: carrier?.carrierName ?? "Unknown",
2320
- insuredName: insured?.insuredName ?? "Unknown",
2321
- coverages: coverages?.coverages ?? [],
2322
- policyTypes: classify?.policyTypes,
2579
+ carrier: readRecordValue(carrier, "carrierName") ?? "Unknown",
2580
+ insuredName: readRecordValue(insured, "insuredName") ?? "Unknown",
2581
+ coverages: getCoverageLimitCoverages(memory),
2582
+ policyTypes: readRecordValue(classify, "policyTypes"),
2323
2583
  ...sanitizeNulls(carrier ?? {}),
2324
2584
  ...sanitizeNulls(insured ?? {}),
2325
2585
  // Map named_insured extractor's loss payees/mortgage holders to EndorsementParty shape
2326
- ...Array.isArray(insured?.lossPayees) && insured.lossPayees.length > 0 ? { lossPayees: insured.lossPayees.map((lp) => ({ ...lp, role: "loss_payee" })) } : {},
2327
- ...Array.isArray(insured?.mortgageHolders) && insured.mortgageHolders.length > 0 ? { mortgageHolders: insured.mortgageHolders.map((mh) => ({ ...mh, role: "mortgage_holder" })) } : {},
2586
+ ...lossPayees && lossPayees.length > 0 ? { lossPayees: lossPayees.map((lp) => ({ ...lp, role: "loss_payee" })) } : {},
2587
+ ...mortgageHolders && mortgageHolders.length > 0 ? {
2588
+ mortgageHolders: mortgageHolders.map((mh) => ({
2589
+ ...mh,
2590
+ role: "mortgage_holder"
2591
+ }))
2592
+ } : {},
2328
2593
  ...sanitizeNulls(coverages ?? {}),
2329
2594
  ...sanitizeNulls(premium ?? {}),
2330
2595
  ...sanitizeNulls(supplementary ?? {}),
2331
- supplementaryFacts: supplementary?.auxiliaryFacts,
2332
- endorsements: endorsements?.endorsements,
2333
- exclusions: exclusions?.exclusions,
2334
- conditions: conditions?.conditions,
2335
- sections: sections?.sections,
2336
- formInventory: formInventory?.forms,
2596
+ supplementaryFacts: readRecordValue(supplementary, "auxiliaryFacts"),
2597
+ endorsements: readRecordValue(endorsements, "endorsements"),
2598
+ exclusions: readRecordValue(exclusions, "exclusions"),
2599
+ conditions: readRecordValue(conditions, "conditions"),
2600
+ sections: getSections(memory),
2601
+ formInventory: readRecordValue(formInventory, "forms"),
2602
+ definitions: getDefinitions(memory),
2603
+ coveredReasons: getCoveredReasons(memory),
2337
2604
  declarations: declarations ? sanitizeNulls(declarations) : void 0,
2338
2605
  ...sanitizeNulls(lossHistory ?? {})
2339
2606
  };
@@ -2342,21 +2609,21 @@ function assembleDocument(documentId, documentType, memory) {
2342
2609
  doc = {
2343
2610
  ...base,
2344
2611
  type: "policy",
2345
- policyNumber: carrier?.policyNumber ?? insured?.policyNumber ?? "Unknown",
2346
- effectiveDate: carrier?.effectiveDate ?? insured?.effectiveDate ?? "Unknown",
2347
- expirationDate: carrier?.expirationDate,
2348
- policyTermType: carrier?.policyTermType
2612
+ policyNumber: readRecordValue(carrier, "policyNumber") ?? readRecordValue(insured, "policyNumber") ?? "Unknown",
2613
+ effectiveDate: readRecordValue(carrier, "effectiveDate") ?? readRecordValue(insured, "effectiveDate") ?? "Unknown",
2614
+ expirationDate: readRecordValue(carrier, "expirationDate"),
2615
+ policyTermType: readRecordValue(carrier, "policyTermType")
2349
2616
  };
2350
2617
  } else {
2351
2618
  doc = {
2352
2619
  ...base,
2353
2620
  type: "quote",
2354
- quoteNumber: carrier?.quoteNumber ?? "Unknown",
2355
- proposedEffectiveDate: carrier?.proposedEffectiveDate,
2356
- proposedExpirationDate: carrier?.proposedExpirationDate,
2357
- subjectivities: coverages?.subjectivities,
2358
- underwritingConditions: coverages?.underwritingConditions,
2359
- premiumBreakdown: premium?.premiumBreakdown
2621
+ quoteNumber: readRecordValue(carrier, "quoteNumber") ?? "Unknown",
2622
+ proposedEffectiveDate: readRecordValue(carrier, "proposedEffectiveDate"),
2623
+ proposedExpirationDate: readRecordValue(carrier, "proposedExpirationDate"),
2624
+ subjectivities: readRecordValue(coverages, "subjectivities"),
2625
+ underwritingConditions: readRecordValue(coverages, "underwritingConditions"),
2626
+ premiumBreakdown: readRecordValue(premium, "premiumBreakdown")
2360
2627
  };
2361
2628
  }
2362
2629
  promoteExtractedFields(doc);
@@ -2458,6 +2725,23 @@ ${block}`;
2458
2725
  }
2459
2726
 
2460
2727
  // src/extraction/formatter.ts
2728
+ var LONG_CONTENT_THRESHOLD = 1200;
2729
+ function shouldFormatContent(text) {
2730
+ const trimmed = text.trim();
2731
+ if (trimmed.length === 0) return false;
2732
+ if (trimmed.length >= LONG_CONTENT_THRESHOLD) return true;
2733
+ if (/```|~~~|<br\s*\/?>/i.test(trimmed)) return true;
2734
+ if (/(^|\s)(\*\*|__|`)/.test(trimmed)) return true;
2735
+ if (/!?\[[^\]]+\]\([^)]+\)/.test(trimmed)) return true;
2736
+ if (/^\s{0,3}#{1,6}\s*\S/m.test(trimmed)) return true;
2737
+ if (/^\s{0,6}(?:[-*+]|\d+[.)])\s+\S/m.test(trimmed)) return true;
2738
+ if (/\t|[^\S\r\n]{3,}|\n{3,}|[ \t]+$/m.test(text)) return true;
2739
+ const lines = trimmed.split(/\r?\n/).map((line) => line.trim()).filter(Boolean);
2740
+ if (lines.some((line) => (line.match(/\|/g)?.length ?? 0) >= 2)) return true;
2741
+ const spaceAlignedRows = lines.filter((line) => /\S\s{2,}\S\s{2,}\S/.test(line));
2742
+ if (spaceAlignedRows.length >= 2) return true;
2743
+ return false;
2744
+ }
2461
2745
  function collectContentFields(doc) {
2462
2746
  const entries = [];
2463
2747
  let id = 0;
@@ -2493,6 +2777,21 @@ function collectContentFields(doc) {
2493
2777
  add(`conditions[${i}].content`, doc.conditions[i].content);
2494
2778
  }
2495
2779
  }
2780
+ const extendedDoc = doc;
2781
+ if (extendedDoc.definitions) {
2782
+ for (let i = 0; i < extendedDoc.definitions.length; i++) {
2783
+ add(`definitions[${i}].definition`, extendedDoc.definitions[i].definition);
2784
+ }
2785
+ }
2786
+ const coveredReasons = extendedDoc.coveredReasons ?? extendedDoc.covered_reasons;
2787
+ if (coveredReasons) {
2788
+ for (let i = 0; i < coveredReasons.length; i++) {
2789
+ add(`coveredReasons[${i}].content`, coveredReasons[i].content);
2790
+ coveredReasons[i].conditions?.forEach((condition, j) => {
2791
+ add(`coveredReasons[${i}].conditions[${j}]`, condition);
2792
+ });
2793
+ }
2794
+ }
2496
2795
  return entries;
2497
2796
  }
2498
2797
  function parseFormatResponse(response) {
@@ -2508,6 +2807,10 @@ function parseFormatResponse(response) {
2508
2807
  return results;
2509
2808
  }
2510
2809
  function applyFormattedContent(doc, entries, formatted) {
2810
+ const docRecord = doc;
2811
+ if (!docRecord.coveredReasons && docRecord.covered_reasons) {
2812
+ docRecord.coveredReasons = docRecord.covered_reasons;
2813
+ }
2511
2814
  for (const entry of entries) {
2512
2815
  const cleaned = formatted.get(entry.id);
2513
2816
  if (!cleaned) continue;
@@ -2516,6 +2819,14 @@ function applyFormattedContent(doc, entries, formatted) {
2516
2819
  const [, field, idx1, sub1, idx2, sub2] = segments;
2517
2820
  if (!sub1) {
2518
2821
  doc[field] = cleaned;
2822
+ } else if (idx2 && !sub2) {
2823
+ const arr = doc[field];
2824
+ if (arr && arr[Number(idx1)]) {
2825
+ const nested = arr[Number(idx1)][sub1];
2826
+ if (Array.isArray(nested)) {
2827
+ nested[Number(idx2)] = cleaned;
2828
+ }
2829
+ }
2519
2830
  } else if (!sub2) {
2520
2831
  const arr = doc[field];
2521
2832
  if (arr && arr[Number(idx1)]) {
@@ -2534,7 +2845,7 @@ function applyFormattedContent(doc, entries, formatted) {
2534
2845
  }
2535
2846
  var MAX_ENTRIES_PER_BATCH = 20;
2536
2847
  async function formatDocumentContent(doc, generateText, options) {
2537
- const entries = collectContentFields(doc);
2848
+ const entries = collectContentFields(doc).filter((entry) => shouldFormatContent(entry.text));
2538
2849
  const totalUsage = { inputTokens: 0, outputTokens: 0 };
2539
2850
  if (entries.length === 0) {
2540
2851
  return { document: doc, usage: totalUsage };
@@ -2580,6 +2891,16 @@ function formatAddress(addr) {
2580
2891
  const parts = [addr.street1, addr.street2, addr.city, addr.state, addr.zip, addr.country].filter(Boolean);
2581
2892
  return parts.join(", ");
2582
2893
  }
2894
+ function asRecordArray(value) {
2895
+ return Array.isArray(value) ? value.filter((item) => Boolean(item) && typeof item === "object" && !Array.isArray(item)) : [];
2896
+ }
2897
+ function firstString(item, keys) {
2898
+ for (const key of keys) {
2899
+ const value = item[key];
2900
+ if (typeof value === "string" && value.trim()) return value;
2901
+ }
2902
+ return void 0;
2903
+ }
2583
2904
  function chunkDocument(doc) {
2584
2905
  const ensureArray = (v) => Array.isArray(v) ? v : [];
2585
2906
  doc = {
@@ -2593,6 +2914,7 @@ function chunkDocument(doc) {
2593
2914
  const chunks = [];
2594
2915
  const docId = doc.id;
2595
2916
  const policyTypesStr = doc.policyTypes?.length ? doc.policyTypes.join(",") : void 0;
2917
+ const extendedDoc = doc;
2596
2918
  function stringMetadata(entries) {
2597
2919
  const base = Object.fromEntries(
2598
2920
  Object.entries(entries).filter(([, value]) => value !== void 0 && value !== null && String(value).length > 0).map(([key, value]) => [key, String(value)])
@@ -2600,11 +2922,22 @@ function chunkDocument(doc) {
2600
2922
  if (policyTypesStr) base.policyTypes = policyTypesStr;
2601
2923
  return base;
2602
2924
  }
2603
- chunks.push({
2604
- id: `${docId}:carrier_info:0`,
2605
- documentId: docId,
2606
- type: "carrier_info",
2607
- text: [
2925
+ function lines(values) {
2926
+ return values.filter(Boolean).join("\n");
2927
+ }
2928
+ function pushChunk(idSuffix, type, text, metadata) {
2929
+ chunks.push({
2930
+ id: `${docId}:${idSuffix}`,
2931
+ documentId: docId,
2932
+ type,
2933
+ text,
2934
+ metadata: stringMetadata(metadata)
2935
+ });
2936
+ }
2937
+ pushChunk(
2938
+ "carrier_info:0",
2939
+ "carrier_info",
2940
+ lines([
2608
2941
  `Carrier: ${doc.carrier}`,
2609
2942
  doc.carrierLegalName ? `Legal Name: ${doc.carrierLegalName}` : null,
2610
2943
  doc.carrierNaicNumber ? `NAIC: ${doc.carrierNaicNumber}` : null,
@@ -2621,94 +2954,83 @@ function chunkDocument(doc) {
2621
2954
  doc.isPackage != null ? `Package: ${doc.isPackage ? "Yes" : "No"}` : null,
2622
2955
  doc.security ? `Security: ${doc.security}` : null,
2623
2956
  doc.policyTypes?.length ? `Policy Types: ${doc.policyTypes.join(", ")}` : null
2624
- ].filter(Boolean).join("\n"),
2625
- metadata: stringMetadata({ carrier: doc.carrier, documentType: doc.type })
2626
- });
2957
+ ]),
2958
+ { carrier: doc.carrier, documentType: doc.type }
2959
+ );
2627
2960
  if (doc.summary) {
2628
- chunks.push({
2629
- id: `${docId}:declaration:summary`,
2630
- documentId: docId,
2631
- type: "declaration",
2632
- text: `Policy Summary: ${doc.summary}`,
2633
- metadata: stringMetadata({ documentType: doc.type })
2634
- });
2961
+ pushChunk("declaration:summary", "declaration", `Policy Summary: ${doc.summary}`, { documentType: doc.type });
2635
2962
  }
2636
2963
  if (doc.type === "policy") {
2637
2964
  const pol = doc;
2638
- chunks.push({
2639
- id: `${docId}:declaration:policy_details`,
2640
- documentId: docId,
2641
- type: "declaration",
2642
- text: [
2965
+ pushChunk(
2966
+ "declaration:policy_details",
2967
+ "declaration",
2968
+ lines([
2643
2969
  `Policy Number: ${pol.policyNumber}`,
2644
2970
  `Effective Date: ${pol.effectiveDate}`,
2645
2971
  pol.expirationDate ? `Expiration Date: ${pol.expirationDate}` : null,
2646
2972
  pol.policyTermType ? `Term Type: ${pol.policyTermType}` : null,
2647
2973
  pol.effectiveTime ? `Effective Time: ${pol.effectiveTime}` : null,
2648
2974
  pol.nextReviewDate ? `Next Review Date: ${pol.nextReviewDate}` : null
2649
- ].filter(Boolean).join("\n"),
2650
- metadata: stringMetadata({
2975
+ ]),
2976
+ {
2651
2977
  policyNumber: pol.policyNumber,
2652
2978
  effectiveDate: pol.effectiveDate,
2653
2979
  expirationDate: pol.expirationDate,
2654
2980
  documentType: doc.type
2655
- })
2656
- });
2981
+ }
2982
+ );
2657
2983
  } else {
2658
2984
  const quote = doc;
2659
- chunks.push({
2660
- id: `${docId}:declaration:quote_details`,
2661
- documentId: docId,
2662
- type: "declaration",
2663
- text: [
2985
+ pushChunk(
2986
+ "declaration:quote_details",
2987
+ "declaration",
2988
+ lines([
2664
2989
  `Quote Number: ${quote.quoteNumber}`,
2665
2990
  quote.proposedEffectiveDate ? `Proposed Effective Date: ${quote.proposedEffectiveDate}` : null,
2666
2991
  quote.proposedExpirationDate ? `Proposed Expiration Date: ${quote.proposedExpirationDate}` : null,
2667
2992
  quote.quoteExpirationDate ? `Quote Expiration Date: ${quote.quoteExpirationDate}` : null
2668
- ].filter(Boolean).join("\n"),
2669
- metadata: stringMetadata({
2993
+ ]),
2994
+ {
2670
2995
  quoteNumber: quote.quoteNumber,
2671
2996
  documentType: doc.type
2672
- })
2673
- });
2997
+ }
2998
+ );
2674
2999
  }
2675
3000
  if (doc.insurer) {
2676
- chunks.push({
2677
- id: `${docId}:party:insurer`,
2678
- documentId: docId,
2679
- type: "party",
2680
- text: [
3001
+ pushChunk(
3002
+ "party:insurer",
3003
+ "party",
3004
+ lines([
2681
3005
  `Insurer: ${doc.insurer.legalName}`,
2682
3006
  doc.insurer.naicNumber ? `NAIC: ${doc.insurer.naicNumber}` : null,
2683
3007
  doc.insurer.amBestRating ? `AM Best Rating: ${doc.insurer.amBestRating}` : null,
2684
3008
  doc.insurer.amBestNumber ? `AM Best Number: ${doc.insurer.amBestNumber}` : null,
2685
3009
  doc.insurer.admittedStatus ? `Admitted Status: ${doc.insurer.admittedStatus}` : null,
2686
3010
  doc.insurer.stateOfDomicile ? `State of Domicile: ${doc.insurer.stateOfDomicile}` : null
2687
- ].filter(Boolean).join("\n"),
2688
- metadata: stringMetadata({ partyRole: "insurer", partyName: doc.insurer.legalName, documentType: doc.type })
2689
- });
3011
+ ]),
3012
+ { partyRole: "insurer", partyName: doc.insurer.legalName, documentType: doc.type }
3013
+ );
2690
3014
  }
2691
3015
  if (doc.producer) {
2692
- chunks.push({
2693
- id: `${docId}:party:producer`,
2694
- documentId: docId,
2695
- type: "party",
2696
- text: [
3016
+ pushChunk(
3017
+ "party:producer",
3018
+ "party",
3019
+ lines([
2697
3020
  `Producer/Broker: ${doc.producer.agencyName}`,
2698
3021
  doc.producer.contactName ? `Contact: ${doc.producer.contactName}` : null,
2699
3022
  doc.producer.licenseNumber ? `License: ${doc.producer.licenseNumber}` : null,
2700
3023
  doc.producer.phone ? `Phone: ${doc.producer.phone}` : null,
2701
3024
  doc.producer.email ? `Email: ${doc.producer.email}` : null,
2702
3025
  doc.producer.address ? `Address: ${formatAddress(doc.producer.address)}` : null
2703
- ].filter(Boolean).join("\n"),
2704
- metadata: stringMetadata({ partyRole: "producer", partyName: doc.producer.agencyName, documentType: doc.type })
2705
- });
3026
+ ]),
3027
+ { partyRole: "producer", partyName: doc.producer.agencyName, documentType: doc.type }
3028
+ );
2706
3029
  }
2707
- chunks.push({
2708
- id: `${docId}:named_insured:0`,
2709
- documentId: docId,
2710
- type: "named_insured",
2711
- text: [
3030
+ pushChunk(
3031
+ "named_insured:0",
3032
+ "named_insured",
3033
+ lines([
2712
3034
  `Insured: ${doc.insuredName}`,
2713
3035
  doc.insuredDba ? `DBA: ${doc.insuredDba}` : null,
2714
3036
  doc.insuredEntityType ? `Entity Type: ${doc.insuredEntityType}` : null,
@@ -2716,36 +3038,34 @@ function chunkDocument(doc) {
2716
3038
  doc.insuredSicCode ? `SIC: ${doc.insuredSicCode}` : null,
2717
3039
  doc.insuredNaicsCode ? `NAICS: ${doc.insuredNaicsCode}` : null,
2718
3040
  doc.insuredAddress ? `Address: ${formatAddress(doc.insuredAddress)}` : null
2719
- ].filter(Boolean).join("\n"),
2720
- metadata: stringMetadata({ insuredName: doc.insuredName, documentType: doc.type })
2721
- });
3041
+ ]),
3042
+ { insuredName: doc.insuredName, documentType: doc.type }
3043
+ );
2722
3044
  doc.additionalNamedInsureds?.forEach((insured, i) => {
2723
- chunks.push({
2724
- id: `${docId}:named_insured:${i + 1}`,
2725
- documentId: docId,
2726
- type: "named_insured",
2727
- text: [
3045
+ pushChunk(
3046
+ `named_insured:${i + 1}`,
3047
+ "named_insured",
3048
+ lines([
2728
3049
  `Additional Named Insured: ${insured.name}`,
2729
3050
  insured.address ? `Address: ${formatAddress(insured.address)}` : null,
2730
3051
  insured.relationship ? `Relationship: ${insured.relationship}` : null
2731
- ].filter(Boolean).join("\n"),
2732
- metadata: stringMetadata({ insuredName: insured.name, role: "additional_named_insured", documentType: doc.type })
2733
- });
3052
+ ]),
3053
+ { insuredName: insured.name, role: "additional_named_insured", documentType: doc.type }
3054
+ );
2734
3055
  });
2735
3056
  doc.coverages.forEach((cov, i) => {
2736
- chunks.push({
2737
- id: `${docId}:coverage:${i}`,
2738
- documentId: docId,
2739
- type: "coverage",
2740
- text: [
3057
+ pushChunk(
3058
+ `coverage:${i}`,
3059
+ "coverage",
3060
+ lines([
2741
3061
  `Coverage: ${cov.name}`,
2742
3062
  `Limit: ${cov.limit}`,
2743
3063
  cov.limitValueType ? `Limit Type: ${cov.limitValueType}` : null,
2744
3064
  cov.deductible ? `Deductible: ${cov.deductible}` : null,
2745
3065
  cov.deductibleValueType ? `Deductible Type: ${cov.deductibleValueType}` : null,
2746
3066
  cov.originalContent ? `Source: ${cov.originalContent}` : null
2747
- ].filter(Boolean).join("\n"),
2748
- metadata: stringMetadata({
3067
+ ]),
3068
+ {
2749
3069
  coverageName: cov.name,
2750
3070
  limit: cov.limit,
2751
3071
  limitValueType: cov.limitValueType,
@@ -2755,15 +3075,14 @@ function chunkDocument(doc) {
2755
3075
  pageNumber: cov.pageNumber,
2756
3076
  sectionRef: cov.sectionRef,
2757
3077
  documentType: doc.type
2758
- })
2759
- });
3078
+ }
3079
+ );
2760
3080
  });
2761
3081
  doc.enrichedCoverages?.forEach((cov, i) => {
2762
- chunks.push({
2763
- id: `${docId}:coverage:enriched:${i}`,
2764
- documentId: docId,
2765
- type: "coverage",
2766
- text: [
3082
+ pushChunk(
3083
+ `coverage:enriched:${i}`,
3084
+ "coverage",
3085
+ lines([
2767
3086
  `Coverage: ${cov.name}`,
2768
3087
  cov.coverageCode ? `Code: ${cov.coverageCode}` : null,
2769
3088
  `Limit: ${cov.limit}`,
@@ -2780,8 +3099,8 @@ function chunkDocument(doc) {
2780
3099
  `Included: ${cov.included ? "Yes" : "No"}`,
2781
3100
  cov.premium ? `Premium: ${cov.premium}` : null,
2782
3101
  cov.originalContent ? `Source: ${cov.originalContent}` : null
2783
- ].filter(Boolean).join("\n"),
2784
- metadata: stringMetadata({
3102
+ ]),
3103
+ {
2785
3104
  coverageName: cov.name,
2786
3105
  coverageCode: cov.coverageCode,
2787
3106
  limit: cov.limit,
@@ -2790,8 +3109,8 @@ function chunkDocument(doc) {
2790
3109
  pageNumber: cov.pageNumber,
2791
3110
  included: cov.included,
2792
3111
  documentType: doc.type
2793
- })
2794
- });
3112
+ }
3113
+ );
2795
3114
  });
2796
3115
  if (doc.limits) {
2797
3116
  const limitLines = ["Limit Schedule"];
@@ -2815,39 +3134,31 @@ function chunkDocument(doc) {
2815
3134
  limitLines.push(`Employers Liability \u2014 Each Accident: ${lim.employersLiability.eachAccident}, Disease Policy Limit: ${lim.employersLiability.diseasePolicyLimit}, Disease Each Employee: ${lim.employersLiability.diseaseEachEmployee}`);
2816
3135
  }
2817
3136
  if (lim.defenseCostTreatment) limitLines.push(`Defense Cost Treatment: ${lim.defenseCostTreatment}`);
2818
- chunks.push({
2819
- id: `${docId}:coverage:limit_schedule`,
2820
- documentId: docId,
2821
- type: "coverage",
2822
- text: limitLines.join("\n"),
2823
- metadata: stringMetadata({ coverageName: "limit_schedule", documentType: doc.type })
2824
- });
3137
+ pushChunk("coverage:limit_schedule", "coverage", limitLines.join("\n"), { coverageName: "limit_schedule", documentType: doc.type });
2825
3138
  lim.sublimits?.forEach((sub, i) => {
2826
- chunks.push({
2827
- id: `${docId}:coverage:sublimit:${i}`,
2828
- documentId: docId,
2829
- type: "coverage",
2830
- text: [
3139
+ pushChunk(
3140
+ `coverage:sublimit:${i}`,
3141
+ "coverage",
3142
+ lines([
2831
3143
  `Sublimit: ${sub.name}`,
2832
3144
  `Limit: ${sub.limit}`,
2833
3145
  sub.appliesTo ? `Applies To: ${sub.appliesTo}` : null,
2834
3146
  sub.deductible ? `Deductible: ${sub.deductible}` : null
2835
- ].filter(Boolean).join("\n"),
2836
- metadata: stringMetadata({ coverageName: sub.name, limit: sub.limit, documentType: doc.type })
2837
- });
3147
+ ]),
3148
+ { coverageName: sub.name, limit: sub.limit, documentType: doc.type }
3149
+ );
2838
3150
  });
2839
3151
  lim.sharedLimits?.forEach((sl, i) => {
2840
- chunks.push({
2841
- id: `${docId}:coverage:shared_limit:${i}`,
2842
- documentId: docId,
2843
- type: "coverage",
2844
- text: [
3152
+ pushChunk(
3153
+ `coverage:shared_limit:${i}`,
3154
+ "coverage",
3155
+ [
2845
3156
  `Shared Limit: ${sl.description}`,
2846
3157
  `Limit: ${sl.limit}`,
2847
3158
  `Coverage Parts: ${sl.coverageParts.join(", ")}`
2848
3159
  ].join("\n"),
2849
- metadata: stringMetadata({ coverageName: sl.description, limit: sl.limit, documentType: doc.type })
2850
- });
3160
+ { coverageName: sl.description, limit: sl.limit, documentType: doc.type }
3161
+ );
2851
3162
  });
2852
3163
  }
2853
3164
  if (doc.deductibles) {
@@ -2861,12 +3172,9 @@ function chunkDocument(doc) {
2861
3172
  if (ded.waitingPeriod) dedLines.push(`Waiting Period: ${ded.waitingPeriod}`);
2862
3173
  if (ded.appliesTo) dedLines.push(`Applies To: ${ded.appliesTo}`);
2863
3174
  if (dedLines.length > 1) {
2864
- chunks.push({
2865
- id: `${docId}:coverage:deductible_schedule`,
2866
- documentId: docId,
2867
- type: "coverage",
2868
- text: dedLines.join("\n"),
2869
- metadata: stringMetadata({ coverageName: "deductible_schedule", documentType: doc.type })
3175
+ pushChunk("coverage:deductible_schedule", "coverage", dedLines.join("\n"), {
3176
+ coverageName: "deductible_schedule",
3177
+ documentType: doc.type
2870
3178
  });
2871
3179
  }
2872
3180
  }
@@ -2878,76 +3186,141 @@ function chunkDocument(doc) {
2878
3186
  doc.extendedReportingPeriod?.supplementalPremium ? `Extended Reporting Period Premium: ${doc.extendedReportingPeriod.supplementalPremium}` : null
2879
3187
  ].filter(Boolean);
2880
3188
  if (claimsMadeLines.length > 0) {
2881
- chunks.push({
2882
- id: `${docId}:coverage:claims_made_details`,
2883
- documentId: docId,
2884
- type: "coverage",
2885
- text: claimsMadeLines.join("\n"),
2886
- metadata: stringMetadata({ coverageName: "claims_made_details", documentType: doc.type })
3189
+ pushChunk("coverage:claims_made_details", "coverage", claimsMadeLines.join("\n"), {
3190
+ coverageName: "claims_made_details",
3191
+ documentType: doc.type
2887
3192
  });
2888
3193
  }
2889
3194
  doc.formInventory?.forEach((form, i) => {
2890
- chunks.push({
2891
- id: `${docId}:declaration:form:${i}`,
2892
- documentId: docId,
2893
- type: "declaration",
2894
- text: [
3195
+ pushChunk(
3196
+ `declaration:form:${i}`,
3197
+ "declaration",
3198
+ lines([
2895
3199
  `Form: ${form.formNumber}`,
2896
3200
  form.title ? `Title: ${form.title}` : null,
2897
3201
  `Type: ${form.formType}`,
2898
3202
  form.editionDate ? `Edition: ${form.editionDate}` : null,
2899
3203
  form.pageStart ? `Pages: ${form.pageStart}${form.pageEnd ? `-${form.pageEnd}` : ""}` : null
2900
- ].filter(Boolean).join("\n"),
2901
- metadata: stringMetadata({
3204
+ ]),
3205
+ {
2902
3206
  formNumber: form.formNumber,
2903
3207
  formType: form.formType,
2904
3208
  documentType: doc.type
2905
- })
2906
- });
3209
+ }
3210
+ );
2907
3211
  });
2908
3212
  doc.endorsements?.forEach((end, i) => {
2909
- chunks.push({
2910
- id: `${docId}:endorsement:${i}`,
2911
- documentId: docId,
2912
- type: "endorsement",
2913
- text: `Endorsement: ${end.title}
3213
+ pushChunk(
3214
+ `endorsement:${i}`,
3215
+ "endorsement",
3216
+ `Endorsement: ${end.title}
2914
3217
  ${end.content}`.trim(),
2915
- metadata: stringMetadata({
3218
+ {
2916
3219
  endorsementType: end.endorsementType,
2917
3220
  formNumber: end.formNumber,
2918
3221
  pageStart: end.pageStart,
2919
3222
  pageEnd: end.pageEnd,
2920
3223
  documentType: doc.type
2921
- })
2922
- });
3224
+ }
3225
+ );
2923
3226
  });
2924
3227
  doc.exclusions?.forEach((exc, i) => {
2925
- chunks.push({
2926
- id: `${docId}:exclusion:${i}`,
2927
- documentId: docId,
2928
- type: "exclusion",
2929
- text: `Exclusion: ${exc.name}
2930
- ${exc.content}`.trim(),
2931
- metadata: stringMetadata({ formNumber: exc.formNumber, pageNumber: exc.pageNumber, documentType: doc.type })
3228
+ pushChunk(`exclusion:${i}`, "exclusion", `Exclusion: ${exc.name}
3229
+ ${exc.content}`.trim(), {
3230
+ formNumber: exc.formNumber,
3231
+ pageNumber: exc.pageNumber,
3232
+ documentType: doc.type
2932
3233
  });
2933
3234
  });
2934
3235
  doc.conditions?.forEach((cond, i) => {
2935
- chunks.push({
2936
- id: `${docId}:condition:${i}`,
2937
- documentId: docId,
2938
- type: "condition",
2939
- text: [
3236
+ pushChunk(
3237
+ `condition:${i}`,
3238
+ "condition",
3239
+ [
2940
3240
  `Condition: ${cond.name}`,
2941
3241
  `Type: ${cond.conditionType}`,
2942
3242
  cond.content,
2943
3243
  ...cond.keyValues?.map((kv) => `${kv.key}: ${kv.value}`) ?? []
2944
3244
  ].join("\n"),
2945
- metadata: stringMetadata({
3245
+ {
2946
3246
  conditionName: cond.name,
2947
3247
  conditionType: cond.conditionType,
2948
3248
  pageNumber: cond.pageNumber,
2949
3249
  documentType: doc.type
2950
- })
3250
+ }
3251
+ );
3252
+ });
3253
+ asRecordArray(extendedDoc.definitions).forEach((definition, i) => {
3254
+ const term = firstString(definition, ["term", "name", "title"]) ?? `Definition ${i + 1}`;
3255
+ const body = firstString(definition, ["definition", "content", "text", "meaning"]);
3256
+ pushChunk(
3257
+ `definition:${i}`,
3258
+ "definition",
3259
+ lines([
3260
+ `Definition: ${term}`,
3261
+ body,
3262
+ firstString(definition, ["originalContent", "source"]) ? `Source: ${firstString(definition, ["originalContent", "source"])}` : null
3263
+ ]),
3264
+ {
3265
+ term,
3266
+ formNumber: firstString(definition, ["formNumber"]),
3267
+ formTitle: firstString(definition, ["formTitle"]),
3268
+ pageNumber: typeof definition.pageNumber === "number" ? definition.pageNumber : void 0,
3269
+ sectionRef: firstString(definition, ["sectionRef", "sectionTitle"]),
3270
+ documentType: doc.type
3271
+ }
3272
+ );
3273
+ });
3274
+ const coveredReasons = asRecordArray(extendedDoc.coveredReasons ?? extendedDoc.covered_reasons);
3275
+ coveredReasons.forEach((coveredReason, i) => {
3276
+ const title = firstString(coveredReason, ["title", "name", "reason", "peril", "cause"]) ?? `Covered Reason ${i + 1}`;
3277
+ const coverageName = firstString(coveredReason, ["coverageName", "coverage", "coveragePart"]);
3278
+ const reasonNumber = firstString(coveredReason, ["reasonNumber", "number"]);
3279
+ const body = firstString(coveredReason, ["content", "description", "text", "coverageGrant"]);
3280
+ pushChunk(
3281
+ `covered_reason:${i}`,
3282
+ "covered_reason",
3283
+ lines([
3284
+ coverageName ? `Coverage: ${coverageName}` : null,
3285
+ reasonNumber ? `Reason Number: ${reasonNumber}` : null,
3286
+ `Covered Reason: ${title}`,
3287
+ body,
3288
+ firstString(coveredReason, ["originalContent", "source"]) ? `Source: ${firstString(coveredReason, ["originalContent", "source"])}` : null
3289
+ ]),
3290
+ {
3291
+ coverageName,
3292
+ reasonNumber,
3293
+ title,
3294
+ formNumber: firstString(coveredReason, ["formNumber"]),
3295
+ formTitle: firstString(coveredReason, ["formTitle"]),
3296
+ pageNumber: typeof coveredReason.pageNumber === "number" ? coveredReason.pageNumber : void 0,
3297
+ sectionRef: firstString(coveredReason, ["sectionRef", "sectionTitle"]),
3298
+ documentType: doc.type
3299
+ }
3300
+ );
3301
+ const conditions = Array.isArray(coveredReason.conditions) ? coveredReason.conditions.filter((condition) => typeof condition === "string" && condition.trim().length > 0) : [];
3302
+ conditions.forEach((condition, conditionIndex) => {
3303
+ pushChunk(
3304
+ `covered_reason:${i}:condition:${conditionIndex}`,
3305
+ "covered_reason",
3306
+ lines([
3307
+ coverageName ? `Coverage: ${coverageName}` : null,
3308
+ reasonNumber ? `Reason Number: ${reasonNumber}` : null,
3309
+ `Covered Reason Condition: ${title}`,
3310
+ condition
3311
+ ]),
3312
+ {
3313
+ coverageName,
3314
+ reasonNumber,
3315
+ title,
3316
+ conditionIndex,
3317
+ formNumber: firstString(coveredReason, ["formNumber"]),
3318
+ formTitle: firstString(coveredReason, ["formTitle"]),
3319
+ pageNumber: typeof coveredReason.pageNumber === "number" ? coveredReason.pageNumber : void 0,
3320
+ sectionRef: firstString(coveredReason, ["sectionRef", "sectionTitle"]),
3321
+ documentType: doc.type
3322
+ }
3323
+ );
2951
3324
  });
2952
3325
  });
2953
3326
  if (doc.declarations) {
@@ -2962,50 +3335,42 @@ ${exc.content}`.trim(),
2962
3335
  const declMeta = { documentType: doc.type };
2963
3336
  if (typeof decl.formType === "string") declMeta.formType = decl.formType;
2964
3337
  if (typeof decl.line === "string") declMeta.declarationLine = decl.line;
2965
- chunks.push({
2966
- id: `${docId}:declaration:0`,
2967
- documentId: docId,
2968
- type: "declaration",
2969
- text: `Declarations
2970
- ${declLines.join("\n")}`,
2971
- metadata: stringMetadata(declMeta)
2972
- });
3338
+ pushChunk("declaration:0", "declaration", `Declarations
3339
+ ${declLines.join("\n")}`, declMeta);
2973
3340
  }
2974
3341
  }
2975
3342
  doc.sections?.forEach((sec, i) => {
2976
3343
  const hasSubsections = sec.subsections && sec.subsections.length > 0;
2977
3344
  const contentLength = sec.content.length;
2978
3345
  if (hasSubsections) {
2979
- chunks.push({
2980
- id: `${docId}:section:${i}`,
2981
- documentId: docId,
2982
- type: "section",
2983
- text: `Section: ${sec.title}
3346
+ pushChunk(
3347
+ `section:${i}`,
3348
+ "section",
3349
+ `Section: ${sec.title}
2984
3350
  ${sec.content}`,
2985
- metadata: stringMetadata({
3351
+ {
2986
3352
  sectionType: sec.type,
2987
3353
  sectionNumber: sec.sectionNumber,
2988
3354
  pageStart: sec.pageStart,
2989
3355
  pageEnd: sec.pageEnd,
2990
3356
  documentType: doc.type,
2991
3357
  hasSubsections: "true"
2992
- })
2993
- });
3358
+ }
3359
+ );
2994
3360
  sec.subsections.forEach((sub, j) => {
2995
- chunks.push({
2996
- id: `${docId}:section:${i}:sub:${j}`,
2997
- documentId: docId,
2998
- type: "section",
2999
- text: `${sec.title} > ${sub.title}
3361
+ pushChunk(
3362
+ `section:${i}:sub:${j}`,
3363
+ "section",
3364
+ `${sec.title} > ${sub.title}
3000
3365
  ${sub.content}`,
3001
- metadata: stringMetadata({
3366
+ {
3002
3367
  sectionType: sec.type,
3003
3368
  parentSection: sec.title,
3004
3369
  sectionNumber: sub.sectionNumber,
3005
3370
  pageNumber: sub.pageNumber,
3006
3371
  documentType: doc.type
3007
- })
3008
- });
3372
+ }
3373
+ );
3009
3374
  });
3010
3375
  } else if (contentLength > 2e3) {
3011
3376
  const paragraphs = sec.content.split(/\n\n+/);
@@ -3013,58 +3378,55 @@ ${sub.content}`,
3013
3378
  let chunkIndex = 0;
3014
3379
  for (const para of paragraphs) {
3015
3380
  if (currentChunk.length + para.length > 1e3 && currentChunk.length > 0) {
3016
- chunks.push({
3017
- id: `${docId}:section:${i}:part:${chunkIndex}`,
3018
- documentId: docId,
3019
- type: "section",
3020
- text: `Section: ${sec.title} (part ${chunkIndex + 1})
3381
+ pushChunk(
3382
+ `section:${i}:part:${chunkIndex}`,
3383
+ "section",
3384
+ `Section: ${sec.title} (part ${chunkIndex + 1})
3021
3385
  ${currentChunk.trim()}`,
3022
- metadata: stringMetadata({
3386
+ {
3023
3387
  sectionType: sec.type,
3024
3388
  sectionNumber: sec.sectionNumber,
3025
3389
  pageStart: sec.pageStart,
3026
3390
  pageEnd: sec.pageEnd,
3027
3391
  documentType: doc.type,
3028
3392
  partIndex: chunkIndex
3029
- })
3030
- });
3393
+ }
3394
+ );
3031
3395
  currentChunk = "";
3032
3396
  chunkIndex++;
3033
3397
  }
3034
3398
  currentChunk += (currentChunk ? "\n\n" : "") + para;
3035
3399
  }
3036
3400
  if (currentChunk.trim()) {
3037
- chunks.push({
3038
- id: `${docId}:section:${i}:part:${chunkIndex}`,
3039
- documentId: docId,
3040
- type: "section",
3041
- text: `Section: ${sec.title} (part ${chunkIndex + 1})
3401
+ pushChunk(
3402
+ `section:${i}:part:${chunkIndex}`,
3403
+ "section",
3404
+ `Section: ${sec.title} (part ${chunkIndex + 1})
3042
3405
  ${currentChunk.trim()}`,
3043
- metadata: stringMetadata({
3406
+ {
3044
3407
  sectionType: sec.type,
3045
3408
  sectionNumber: sec.sectionNumber,
3046
3409
  pageStart: sec.pageStart,
3047
3410
  pageEnd: sec.pageEnd,
3048
3411
  documentType: doc.type,
3049
3412
  partIndex: chunkIndex
3050
- })
3051
- });
3413
+ }
3414
+ );
3052
3415
  }
3053
3416
  } else {
3054
- chunks.push({
3055
- id: `${docId}:section:${i}`,
3056
- documentId: docId,
3057
- type: "section",
3058
- text: `Section: ${sec.title}
3417
+ pushChunk(
3418
+ `section:${i}`,
3419
+ "section",
3420
+ `Section: ${sec.title}
3059
3421
  ${sec.content}`,
3060
- metadata: stringMetadata({
3422
+ {
3061
3423
  sectionType: sec.type,
3062
3424
  sectionNumber: sec.sectionNumber,
3063
3425
  pageStart: sec.pageStart,
3064
3426
  pageEnd: sec.pageEnd,
3065
3427
  documentType: doc.type
3066
- })
3067
- });
3428
+ }
3429
+ );
3068
3430
  }
3069
3431
  });
3070
3432
  doc.locations?.forEach((loc, i) => {
@@ -3495,6 +3857,13 @@ function dedupeByKey(items, keyFn) {
3495
3857
  }
3496
3858
  return merged;
3497
3859
  }
3860
+ function normalizeKeyPart(value) {
3861
+ if (value === void 0 || value === null) return "";
3862
+ return String(value).toLowerCase().replace(/&/g, "and").replace(/[^a-z0-9]+/g, "");
3863
+ }
3864
+ function keyFromParts(...parts) {
3865
+ return parts.map(normalizeKeyPart).join("|");
3866
+ }
3498
3867
  function mergeUniqueObjects(existing, incoming, keyFn) {
3499
3868
  return dedupeByKey([...existing, ...incoming], keyFn);
3500
3869
  }
@@ -3523,13 +3892,13 @@ function mergeCoverageLimits(existing, incoming) {
3523
3892
  const merged = mergeShallowPreferPresent(existing, incoming);
3524
3893
  const existingCoverages = Array.isArray(existing.coverages) ? existing.coverages : [];
3525
3894
  const incomingCoverages = Array.isArray(incoming.coverages) ? incoming.coverages : [];
3526
- const coverageKey = (coverage) => [
3527
- String(coverage.name ?? "").toLowerCase(),
3528
- String(coverage.limitType ?? "").toLowerCase(),
3529
- String(coverage.limit ?? "").toLowerCase(),
3530
- String(coverage.deductible ?? "").toLowerCase(),
3531
- String(coverage.formNumber ?? "").toLowerCase()
3532
- ].join("|");
3895
+ const coverageKey = (coverage) => keyFromParts(
3896
+ coverage.name,
3897
+ coverage.limitType,
3898
+ coverage.limit,
3899
+ coverage.deductible,
3900
+ coverage.formNumber
3901
+ );
3533
3902
  const byKey = /* @__PURE__ */ new Map();
3534
3903
  for (const coverage of [...existingCoverages, ...incomingCoverages]) {
3535
3904
  const key = coverageKey(coverage);
@@ -3543,11 +3912,11 @@ function mergeDeclarations(existing, incoming) {
3543
3912
  const merged = mergeShallowPreferPresent(existing, incoming);
3544
3913
  const existingFields = Array.isArray(existing.fields) ? existing.fields : [];
3545
3914
  const incomingFields = Array.isArray(incoming.fields) ? incoming.fields : [];
3546
- merged.fields = mergeUniqueObjects(existingFields, incomingFields, (field) => [
3547
- String(field.field ?? "").toLowerCase(),
3548
- String(field.value ?? "").toLowerCase(),
3549
- String(field.section ?? "").toLowerCase()
3550
- ].join("|"));
3915
+ merged.fields = mergeUniqueObjects(existingFields, incomingFields, (field) => keyFromParts(
3916
+ field.field,
3917
+ field.value,
3918
+ field.section
3919
+ ));
3551
3920
  return merged;
3552
3921
  }
3553
3922
  function mergeArrayPayload(existing, incoming, arrayKey, keyFn) {
@@ -3557,30 +3926,53 @@ function mergeArrayPayload(existing, incoming, arrayKey, keyFn) {
3557
3926
  merged[arrayKey] = mergeUniqueObjects(existingItems, incomingItems, keyFn);
3558
3927
  return merged;
3559
3928
  }
3929
+ function readArray(record, ...keys) {
3930
+ for (const key of keys) {
3931
+ if (Array.isArray(record[key])) return record[key];
3932
+ }
3933
+ return [];
3934
+ }
3935
+ function mergeAliasedArrayPayload(existing, incoming, outputKey, inputKeys, keyFn) {
3936
+ const merged = mergeShallowPreferPresent(existing, incoming);
3937
+ const byKey = /* @__PURE__ */ new Map();
3938
+ for (const item of [
3939
+ ...readArray(existing, outputKey, ...inputKeys),
3940
+ ...readArray(incoming, outputKey, ...inputKeys)
3941
+ ]) {
3942
+ const key = keyFn(item);
3943
+ const current = byKey.get(key);
3944
+ byKey.set(key, current ? mergeShallowPreferPresent(current, item) : item);
3945
+ }
3946
+ merged[outputKey] = [...byKey.values()];
3947
+ for (const key of inputKeys) {
3948
+ if (key !== outputKey) delete merged[key];
3949
+ }
3950
+ return merged;
3951
+ }
3560
3952
  function mergeSupplementary(existing, incoming) {
3561
3953
  const merged = mergeShallowPreferPresent(existing, incoming);
3562
3954
  const mergeContactArray = (arrayKey) => {
3563
3955
  const existingItems = Array.isArray(existing[arrayKey]) ? existing[arrayKey] : [];
3564
3956
  const incomingItems = Array.isArray(incoming[arrayKey]) ? incoming[arrayKey] : [];
3565
- merged[arrayKey] = mergeUniqueObjects(existingItems, incomingItems, (item) => [
3566
- String(item.name ?? "").toLowerCase(),
3567
- String(item.phone ?? "").toLowerCase(),
3568
- String(item.email ?? "").toLowerCase(),
3569
- String(item.address ?? "").toLowerCase(),
3570
- String(item.type ?? "").toLowerCase()
3571
- ].join("|"));
3957
+ merged[arrayKey] = mergeUniqueObjects(existingItems, incomingItems, (item) => keyFromParts(
3958
+ item.name,
3959
+ item.phone,
3960
+ item.email,
3961
+ item.address,
3962
+ item.type
3963
+ ));
3572
3964
  };
3573
3965
  mergeContactArray("regulatoryContacts");
3574
3966
  mergeContactArray("claimsContacts");
3575
3967
  mergeContactArray("thirdPartyAdministrators");
3576
3968
  const existingFacts = Array.isArray(existing.auxiliaryFacts) ? existing.auxiliaryFacts : [];
3577
3969
  const incomingFacts = Array.isArray(incoming.auxiliaryFacts) ? incoming.auxiliaryFacts : [];
3578
- merged.auxiliaryFacts = mergeUniqueObjects(existingFacts, incomingFacts, (item) => [
3579
- String(item.key ?? "").toLowerCase(),
3580
- String(item.value ?? "").toLowerCase(),
3581
- String(item.subject ?? "").toLowerCase(),
3582
- String(item.context ?? "").toLowerCase()
3583
- ].join("|"));
3970
+ merged.auxiliaryFacts = mergeUniqueObjects(existingFacts, incomingFacts, (item) => keyFromParts(
3971
+ item.key,
3972
+ item.value,
3973
+ item.subject,
3974
+ item.context
3975
+ ));
3584
3976
  return merged;
3585
3977
  }
3586
3978
  function mergeExtractorResult(extractorName, existing, incoming) {
@@ -3601,31 +3993,43 @@ function mergeExtractorResult(extractorName, existing, incoming) {
3601
3993
  return mergeCoverageLimits(current, next);
3602
3994
  case "declarations":
3603
3995
  return mergeDeclarations(current, next);
3996
+ case "definitions":
3997
+ return mergeArrayPayload(current, next, "definitions", (item) => keyFromParts(
3998
+ item.term ?? item.name ?? item.key,
3999
+ item.pageNumber ?? item.pageStart
4000
+ ));
4001
+ case "covered_reasons":
4002
+ return mergeAliasedArrayPayload(current, next, "coveredReasons", ["covered_reasons"], (item) => keyFromParts(
4003
+ item.coverageName ?? item.coverage,
4004
+ item.reasonNumber ?? item.number,
4005
+ item.title ?? item.reason ?? item.name ?? item.cause,
4006
+ item.pageNumber ?? item.pageStart
4007
+ ));
3604
4008
  case "endorsements":
3605
- return mergeArrayPayload(current, next, "endorsements", (item) => [
3606
- String(item.formNumber ?? "").toLowerCase(),
3607
- String(item.title ?? "").toLowerCase(),
3608
- String(item.pageStart ?? "")
3609
- ].join("|"));
4009
+ return mergeArrayPayload(current, next, "endorsements", (item) => keyFromParts(
4010
+ item.formNumber,
4011
+ item.title,
4012
+ item.pageStart
4013
+ ));
3610
4014
  case "exclusions":
3611
- return mergeArrayPayload(current, next, "exclusions", (item) => [
3612
- String(item.name ?? "").toLowerCase(),
3613
- String(item.formNumber ?? "").toLowerCase(),
3614
- String(item.pageNumber ?? "")
3615
- ].join("|"));
4015
+ return mergeArrayPayload(current, next, "exclusions", (item) => keyFromParts(
4016
+ item.name,
4017
+ item.formNumber,
4018
+ item.pageNumber
4019
+ ));
3616
4020
  case "conditions":
3617
- return mergeArrayPayload(current, next, "conditions", (item) => [
3618
- String(item.name ?? "").toLowerCase(),
3619
- String(item.conditionType ?? "").toLowerCase(),
3620
- String(item.pageNumber ?? "")
3621
- ].join("|"));
4021
+ return mergeArrayPayload(current, next, "conditions", (item) => keyFromParts(
4022
+ item.name,
4023
+ item.conditionType,
4024
+ item.pageNumber
4025
+ ));
3622
4026
  case "sections":
3623
- return mergeArrayPayload(current, next, "sections", (item) => [
3624
- String(item.title ?? "").toLowerCase(),
3625
- String(item.type ?? "").toLowerCase(),
3626
- String(item.pageStart ?? ""),
3627
- String(item.pageEnd ?? "")
3628
- ].join("|"));
4027
+ return mergeArrayPayload(current, next, "sections", (item) => keyFromParts(
4028
+ item.title,
4029
+ item.type,
4030
+ item.pageStart,
4031
+ item.pageEnd
4032
+ ));
3629
4033
  default:
3630
4034
  return mergeShallowPreferPresent(current, next);
3631
4035
  }
@@ -4484,6 +4888,8 @@ var PageExtractorSchema = import_zod20.z.enum([
4484
4888
  "carrier_info",
4485
4889
  "named_insured",
4486
4890
  "coverage_limits",
4891
+ "covered_reasons",
4892
+ "definitions",
4487
4893
  "endorsements",
4488
4894
  "exclusions",
4489
4895
  "conditions",
@@ -4530,6 +4936,8 @@ Available extractors:
4530
4936
  - carrier_info
4531
4937
  - named_insured
4532
4938
  - coverage_limits
4939
+ - covered_reasons
4940
+ - definitions
4533
4941
  - endorsements
4534
4942
  - exclusions
4535
4943
  - conditions
@@ -4543,6 +4951,8 @@ Rules:
4543
4951
  - Identify the broad section or form context first, then assign focused extractors within that context.
4544
4952
  - Use specific extractors for declarations, schedules, endorsements, exclusions, conditions, premium pages, and loss runs.
4545
4953
  - Use "sections" for pages that contain substantive policy text or mixed content that should still be preserved as raw sections.
4954
+ - Use "definitions" for policy-form pages containing defined terms, definitions sections, or term meaning clauses.
4955
+ - Use "covered_reasons" for pages listing covered causes of loss, covered reasons, covered perils, named perils, covered events, or covered loss triggers.
4546
4956
  - Avoid assigning broad ranges mentally; decide page by page.
4547
4957
  - A page may map to multiple extractors if it legitimately contains multiple relevant sections.
4548
4958
  - Prefer declarations and schedules for numeric limits/deductibles over later generic form wording.
@@ -4550,6 +4960,7 @@ Rules:
4550
4960
  - Do NOT assign "coverage_limits" for generic policy-form or endorsement text that merely explains how limits, deductibles, waiting periods, or coinsurance work, or that says values are "shown in the declarations", "shown in the schedule", "as stated", or "if applicable".
4551
4961
  - Headings like "Limits of Insurance", "Deductible", "Coinsurance", "Loss Conditions", or "Definitions" inside a policy form usually indicate form language, not declarations or schedules.
4552
4962
  - Continuation pages near the end of a form should stay mapped to "sections" plus "conditions"/"exclusions" when applicable, even if they mention limits or deductibles.
4963
+ - Covered causes/reasons and definitions often span a whole form section; tag every substantive page in that section, not just the heading page.
4553
4964
  - When a form inventory entry identifies a page range as a specific form type (e.g., endorsement, coverage, application), use that classification to guide your extractor choice. Do not assign "coverage_limits" to pages the inventory identifies as endorsement or condition/exclusion forms unless the page contains actual schedule values.
4554
4965
  - Do not tag a page with "exclusions" or "conditions" if it only contains a table of contents, page-number reference, running header/footer, or a heading that points to another page without substantive wording.
4555
4966
  - If a page appears to be part of a larger exclusion, conditions, or endorsement section within the same form, keep the assignment consistent across nearby pages in that section rather than isolating a single page fragment.
@@ -4593,12 +5004,15 @@ var ReviewResultSchema = import_zod21.z.object({
4593
5004
  description: import_zod21.z.string()
4594
5005
  }))
4595
5006
  });
4596
- function buildReviewPrompt(templateExpected, extractedKeys, extractionSummary, pageMapSummary) {
4597
- return `You are reviewing an extraction for completeness and quality. Compare what was expected vs what was found.
5007
+ function buildReviewPrompt(templateExpected, extractedKeys, extractionSummary, pageMapSummary, extractorCatalog) {
5008
+ return `You are the extraction coordinator for an insurance-document agent system. Review the current extraction state, decide whether the result is complete enough, and choose any follow-up extractor tasks needed to improve it.
4598
5009
 
4599
5010
  EXPECTED FIELDS (from document type template):
4600
5011
  ${templateExpected.map((f) => `- ${f}`).join("\n")}
4601
5012
 
5013
+ AVAILABLE FOLLOW-UP EXTRACTORS:
5014
+ ${extractorCatalog}
5015
+
4602
5016
  FIELDS ALREADY EXTRACTED:
4603
5017
  ${extractedKeys.map((f) => `- ${f}`).join("\n")}
4604
5018
 
@@ -4612,15 +5026,21 @@ Determine:
4612
5026
  1. Is the extraction complete enough?
4613
5027
  2. What fields are missing?
4614
5028
  3. What quality issues are present?
4615
- 4. Should any additional extraction tasks be dispatched?
5029
+ 4. Which follow-up extraction tasks, if any, should be dispatched?
4616
5030
 
4617
5031
  Mark the extraction as NOT complete if any of these are true:
4618
5032
  - required fields are missing
4619
5033
  - extracted values are generic placeholders like "shown in declarations", "per schedule", "if applicable", "as stated"
4620
5034
  - coverage limits or deductibles appear to come from generic form language instead of declaration/schedule-specific values
5035
+ - definitions pages were mapped but no definition records or definition-type sections were extracted
5036
+ - covered causes/reasons pages were mapped but no covered reason, covered peril, covered cause, or matching section records were extracted
4621
5037
  - page assignments suggest declaration, schedule, endorsement, exclusion, or condition pages were not actually extracted with the matching focused extractor
4622
5038
  - a focused extractor exists but returned too little substance for the relevant pages
4623
5039
 
5040
+ When reviewing CURRENT EXTRACTION SUMMARY, compare the page-map counts to extracted counts. If an assigned extractor produced no useful records, produce a quality issue and a narrow follow-up task over the mapped page range.
5041
+
5042
+ Choose follow-up tasks from AVAILABLE FOLLOW-UP EXTRACTORS. You may dispatch any listed extractor when the page map, current extraction summary, or quality evidence shows that the focused extraction is missing, generic, referential, or too thin. Do not invent extractor names.
5043
+
4624
5044
  Return JSON:
4625
5045
  {
4626
5046
  "complete": boolean,
@@ -4631,7 +5051,7 @@ Return JSON:
4631
5051
  ]
4632
5052
  }
4633
5053
 
4634
- Use the page map to target follow-up extraction pages precisely. Prefer narrow, declaration/schedule-focused follow-up tasks over broad page ranges.
5054
+ Use the page map to target follow-up extraction pages precisely. Prefer narrow, declaration/schedule-focused follow-up tasks over broad page ranges. If no additional model work is likely to improve the extraction, return an empty additionalTasks array.
4635
5055
 
4636
5056
  Respond with JSON only.`;
4637
5057
  }
@@ -5166,6 +5586,7 @@ var SectionsSchema = import_zod32.z.object({
5166
5586
  "policy_form",
5167
5587
  "endorsement",
5168
5588
  "application",
5589
+ "covered_reason",
5169
5590
  "exclusion",
5170
5591
  "condition",
5171
5592
  "definition",
@@ -5189,6 +5610,7 @@ For each section, classify its type:
5189
5610
  - "policy_form" \u2014 named ISO or proprietary forms (e.g. CG 00 01, IL 00 17). All sections within a named form should be typed as "policy_form"
5190
5611
  - "endorsement" \u2014 standalone endorsements modifying the base policy
5191
5612
  - "application" \u2014 the insurance application or supplemental application
5613
+ - "covered_reason" \u2014 affirmative grants of coverage, covered causes of loss, covered perils, or named covered events
5192
5614
  - "insuring_agreement" \u2014 the insuring agreement clause (only if standalone, not inside a policy_form)
5193
5615
  - "exclusion", "condition", "definition" \u2014 for standalone sections only
5194
5616
  - "schedule" \u2014 coverage or rating schedules
@@ -5264,7 +5686,155 @@ For auxiliaryFacts:
5264
5686
  Return JSON only.`;
5265
5687
  }
5266
5688
 
5689
+ // src/prompts/extractors/definitions.ts
5690
+ var import_zod34 = require("zod");
5691
+ var DefinitionsSchema = import_zod34.z.object({
5692
+ definitions: import_zod34.z.array(
5693
+ import_zod34.z.object({
5694
+ term: import_zod34.z.string().describe("Defined term exactly as shown in the document"),
5695
+ definition: import_zod34.z.string().describe("Full verbatim definition text, preserving original wording"),
5696
+ pageNumber: import_zod34.z.number().optional().describe("Original document page number"),
5697
+ formNumber: import_zod34.z.string().optional().describe("Form number where this definition appears"),
5698
+ formTitle: import_zod34.z.string().optional().describe("Form title where this definition appears"),
5699
+ sectionRef: import_zod34.z.string().optional().describe("Definition section heading or subsection reference"),
5700
+ originalContent: import_zod34.z.string().optional().describe("Short verbatim source snippet containing the term and definition")
5701
+ })
5702
+ ).describe("All substantive insurance definitions found in the document")
5703
+ });
5704
+ function buildDefinitionsPrompt() {
5705
+ return `You are an expert insurance document analyst. Extract ALL substantive defined terms from this document. Preserve original wording verbatim.
5706
+
5707
+ For EACH definition, extract:
5708
+ - term: defined term exactly as shown \u2014 REQUIRED
5709
+ - definition: full verbatim definition text including all included subparts \u2014 REQUIRED
5710
+ - pageNumber: original document page number where the definition appears
5711
+ - formNumber: form number where the definition appears, if shown
5712
+ - formTitle: form title where the definition appears, if shown
5713
+ - sectionRef: heading such as "Definitions", "Words and Phrases Defined", or coverage-specific definition section
5714
+ - originalContent: short verbatim source snippet containing the term and definition
5715
+
5716
+ Focus on:
5717
+ - Terms in sections titled Definitions, Words and Phrases Defined, Glossary, or similar
5718
+ - Coverage-specific defined terms embedded in insuring agreements, endorsements, exclusions, or conditions
5719
+ - Multi-part definitions with numbered, lettered, or bulleted clauses
5720
+ - Definitions that affect coverage triggers, covered property, insured status, exclusions, limits, or duties
5721
+
5722
+ Critical rules:
5723
+ - Preserve the original content. Do not paraphrase content.
5724
+ - Keep all subparts of a definition together in one item when they define the same term.
5725
+ - Ignore table-of-contents entries, running headers/footers, indexes, and cross-references that do not include substantive definition text.
5726
+ - Do not emit generic headings like "Definitions" as a term unless the page defines an actual term.
5727
+ - Always include pageNumber when the definition appears on a specific page in the supplied document chunk.
5728
+ - Use definition as the canonical full text. Do not return a separate content field.
5729
+
5730
+ Return JSON only.`;
5731
+ }
5732
+
5733
+ // src/prompts/extractors/covered-reasons.ts
5734
+ var import_zod35 = require("zod");
5735
+ var CoveredReasonsSchema = import_zod35.z.object({
5736
+ coveredReasons: import_zod35.z.array(
5737
+ import_zod35.z.object({
5738
+ coverageName: import_zod35.z.string().describe("Coverage, coverage part, or form this covered reason belongs to"),
5739
+ reasonNumber: import_zod35.z.string().optional().describe("Source number or letter for the covered reason, if shown"),
5740
+ title: import_zod35.z.string().optional().describe("Covered reason title, peril, cause of loss, trigger, or short name"),
5741
+ content: import_zod35.z.string().describe("Full verbatim covered-reason or insuring-agreement text"),
5742
+ conditions: import_zod35.z.array(import_zod35.z.string()).optional().describe("Conditions, timing rules, documentation requirements, or prerequisites attached to this covered reason"),
5743
+ exceptions: import_zod35.z.array(import_zod35.z.string()).optional().describe("Exceptions or limitations attached to this covered reason"),
5744
+ appliesTo: import_zod35.z.array(import_zod35.z.string()).optional().describe("Covered property, persons, autos, locations, operations, or coverage parts this reason applies to"),
5745
+ pageNumber: import_zod35.z.number().optional().describe("Original document page number"),
5746
+ formNumber: import_zod35.z.string().optional().describe("Form number where this covered reason appears"),
5747
+ formTitle: import_zod35.z.string().optional().describe("Form title where this covered reason appears"),
5748
+ sectionRef: import_zod35.z.string().optional().describe("Section heading where this covered reason appears"),
5749
+ originalContent: import_zod35.z.string().optional().describe("Short verbatim source snippet used for this covered reason")
5750
+ })
5751
+ ).describe("Covered causes, perils, triggers, or reasons that affirmatively grant coverage")
5752
+ });
5753
+ function buildCoveredReasonsPrompt() {
5754
+ return `You are an expert insurance document analyst. Extract ALL covered reasons from this document. Preserve original wording verbatim.
5755
+
5756
+ A covered reason is affirmative coverage language explaining why, when, or for what cause the insurer will pay. This may be called a covered peril, covered cause of loss, accident, occurrence, loss trigger, additional coverage, expense, or insuring agreement grant.
5757
+
5758
+ For EACH covered reason, extract:
5759
+ - coverageName: coverage, coverage part, or form this covered reason belongs to \u2014 REQUIRED
5760
+ - reasonNumber: source number or letter for the covered reason, if shown
5761
+ - title: covered peril, cause of loss, trigger, or short name
5762
+ - content: full verbatim covered-reason or insuring-agreement text \u2014 REQUIRED
5763
+ - conditions: conditions, timing rules, documentation requirements, or prerequisites attached to this covered reason
5764
+ - exceptions: exceptions or limitations attached to this covered reason
5765
+ - appliesTo: covered property, persons, autos, locations, operations, or coverage parts this reason applies to
5766
+ - pageNumber: original document page number where this covered reason appears
5767
+ - formNumber: form number where this covered reason appears, if shown
5768
+ - formTitle: form title where this covered reason appears, if shown
5769
+ - sectionRef: heading where this covered reason appears
5770
+ - originalContent: short verbatim source snippet used for this covered reason
5771
+
5772
+ Focus on:
5773
+ - Named perils and covered causes of loss
5774
+ - Insuring agreement grants and coverage triggers
5775
+ - Additional coverages and coverage extensions that state when payment applies
5776
+ - Personal lines phrases such as fire, lightning, windstorm, hail, theft, collision, comprehensive, or accidental direct physical loss
5777
+ - Commercial lines phrases such as bodily injury, property damage, personal and advertising injury, employee dishonesty, computer fraud, equipment breakdown, or professional services acts
5778
+
5779
+ Critical rules:
5780
+ - Preserve the original content. Do not paraphrase content.
5781
+ - Extract affirmative coverage grants, not exclusions, conditions, or declarations-only limit rows.
5782
+ - Do not emit a covered reason from a table-of-contents entry, running header/footer, or reference that only points elsewhere.
5783
+ - If a covered reason includes exceptions or limitations in the same clause, keep them in content and also list them in exceptions when they can be separated cleanly.
5784
+ - Always include pageNumber when the covered reason appears on a specific page in the supplied document chunk.
5785
+ - Preserve coverage grouping. Do not merge separate coverage parts into one generic list.
5786
+
5787
+ Return JSON only.`;
5788
+ }
5789
+
5267
5790
  // src/prompts/extractors/index.ts
5791
+ function asRecord(data) {
5792
+ return data && typeof data === "object" ? data : void 0;
5793
+ }
5794
+ function getSections2(data) {
5795
+ const sections = asRecord(data)?.sections;
5796
+ return Array.isArray(sections) ? sections : [];
5797
+ }
5798
+ function isCoveredReasonsEmpty(data) {
5799
+ const record = asRecord(data);
5800
+ if (!record) return true;
5801
+ const coveredReasons = Array.isArray(record.coveredReasons) ? record.coveredReasons : Array.isArray(record.covered_reasons) ? record.covered_reasons : [];
5802
+ return coveredReasons.length === 0;
5803
+ }
5804
+ function isDefinitionsEmpty(data) {
5805
+ const definitions = asRecord(data)?.definitions;
5806
+ return !Array.isArray(definitions) || definitions.length === 0;
5807
+ }
5808
+ function sectionLooksLikeCoveredReason(section) {
5809
+ const type = String(section.type ?? "").toLowerCase();
5810
+ const title = String(section.title ?? "").toLowerCase();
5811
+ return type === "covered_reason" || title.includes("covered cause") || title.includes("covered reason") || title.includes("covered peril") || title.includes("named peril") || title.includes("insuring agreement");
5812
+ }
5813
+ function deriveCoveredReasonsFromSections(data) {
5814
+ const coveredReasons = getSections2(data).filter(sectionLooksLikeCoveredReason).map((section) => ({
5815
+ coverageName: String(section.coverageName ?? section.formTitle ?? section.title ?? "Covered Reasons"),
5816
+ title: typeof section.title === "string" ? section.title : void 0,
5817
+ content: String(section.content ?? ""),
5818
+ pageNumber: typeof section.pageStart === "number" ? section.pageStart : void 0,
5819
+ formNumber: typeof section.formNumber === "string" ? section.formNumber : void 0,
5820
+ formTitle: typeof section.formTitle === "string" ? section.formTitle : void 0,
5821
+ sectionRef: typeof section.sectionNumber === "string" ? section.sectionNumber : void 0,
5822
+ originalContent: typeof section.content === "string" ? section.content.slice(0, 500) : void 0
5823
+ })).filter((coveredReason) => coveredReason.content.trim().length > 0);
5824
+ return coveredReasons.length > 0 ? { coveredReasons } : void 0;
5825
+ }
5826
+ function deriveDefinitionsFromSections(data) {
5827
+ const definitions = getSections2(data).filter((section) => String(section.type ?? "").toLowerCase() === "definition").map((section) => ({
5828
+ term: String(section.title ?? "Definitions"),
5829
+ definition: String(section.content ?? ""),
5830
+ pageNumber: typeof section.pageStart === "number" ? section.pageStart : void 0,
5831
+ formNumber: typeof section.formNumber === "string" ? section.formNumber : void 0,
5832
+ formTitle: typeof section.formTitle === "string" ? section.formTitle : void 0,
5833
+ sectionRef: typeof section.sectionNumber === "string" ? section.sectionNumber : void 0,
5834
+ originalContent: typeof section.content === "string" ? section.content.slice(0, 500) : void 0
5835
+ })).filter((definition) => definition.definition.trim().length > 0);
5836
+ return definitions.length > 0 ? { definitions } : void 0;
5837
+ }
5268
5838
  var EXTRACTORS = {
5269
5839
  carrier_info: { buildPrompt: buildCarrierInfoPrompt, schema: CarrierInfoSchema, maxTokens: 2048 },
5270
5840
  named_insured: { buildPrompt: buildNamedInsuredPrompt, schema: NamedInsuredSchema2, maxTokens: 2048 },
@@ -5276,28 +5846,54 @@ var EXTRACTORS = {
5276
5846
  declarations: { buildPrompt: buildDeclarationsPrompt, schema: DeclarationsExtractSchema, maxTokens: 8192 },
5277
5847
  loss_history: { buildPrompt: buildLossHistoryPrompt, schema: LossHistorySchema, maxTokens: 4096 },
5278
5848
  sections: { buildPrompt: buildSectionsPrompt, schema: SectionsSchema, maxTokens: 8192 },
5279
- supplementary: { buildPrompt: buildSupplementaryPrompt, schema: SupplementarySchema, maxTokens: 2048 }
5849
+ supplementary: { buildPrompt: buildSupplementaryPrompt, schema: SupplementarySchema, maxTokens: 2048 },
5850
+ definitions: {
5851
+ buildPrompt: buildDefinitionsPrompt,
5852
+ schema: DefinitionsSchema,
5853
+ maxTokens: 8192,
5854
+ fallback: {
5855
+ extractorName: "sections",
5856
+ isEmpty: isDefinitionsEmpty,
5857
+ deriveFocusedResult: deriveDefinitionsFromSections
5858
+ }
5859
+ },
5860
+ covered_reasons: {
5861
+ buildPrompt: buildCoveredReasonsPrompt,
5862
+ schema: CoveredReasonsSchema,
5863
+ maxTokens: 8192,
5864
+ fallback: {
5865
+ extractorName: "sections",
5866
+ isEmpty: isCoveredReasonsEmpty,
5867
+ deriveFocusedResult: deriveCoveredReasonsFromSections
5868
+ }
5869
+ }
5280
5870
  };
5281
5871
  function getExtractor(name) {
5282
5872
  return EXTRACTORS[name];
5283
5873
  }
5874
+ function formatExtractorCatalogForPrompt() {
5875
+ return Object.entries(EXTRACTORS).map(([name, extractor]) => {
5876
+ const fallback = extractor.fallback ? `; fallback: ${extractor.fallback.extractorName}` : "";
5877
+ return `- ${name} (maxTokens: ${extractor.maxTokens ?? 4096}${fallback})`;
5878
+ }).join("\n");
5879
+ }
5284
5880
 
5285
5881
  // src/extraction/resolve-referential.ts
5286
- var import_zod35 = require("zod");
5882
+ var import_zod37 = require("zod");
5287
5883
 
5288
5884
  // src/prompts/extractors/referential-lookup.ts
5289
- var import_zod34 = require("zod");
5290
- var ReferentialLookupSchema = import_zod34.z.object({
5291
- resolvedCoverages: import_zod34.z.array(
5292
- import_zod34.z.object({
5293
- coverageName: import_zod34.z.string().describe("The coverage name that was referenced"),
5294
- resolvedLimit: import_zod34.z.string().optional().describe("The concrete limit value found, if any"),
5885
+ var import_zod36 = require("zod");
5886
+ var ReferentialLookupSchema = import_zod36.z.object({
5887
+ resolvedCoverages: import_zod36.z.array(
5888
+ import_zod36.z.object({
5889
+ coverageName: import_zod36.z.string().describe("The coverage name that was referenced"),
5890
+ resolvedLimit: import_zod36.z.string().optional().describe("The concrete limit value found, if any"),
5295
5891
  resolvedLimitValueType: CoverageValueTypeSchema.optional(),
5296
- resolvedDeductible: import_zod34.z.string().optional().describe("The concrete deductible value found, if any"),
5892
+ resolvedDeductible: import_zod36.z.string().optional().describe("The concrete deductible value found, if any"),
5297
5893
  resolvedDeductibleValueType: CoverageValueTypeSchema.optional(),
5298
- pageNumber: import_zod34.z.number().optional().describe("Page where the resolved value was found"),
5299
- originalContent: import_zod34.z.string().optional().describe("Verbatim source text for the resolved value"),
5300
- confidence: import_zod34.z.enum(["high", "medium", "low"]).describe("Confidence in the resolution")
5894
+ pageNumber: import_zod36.z.number().optional().describe("Page where the resolved value was found"),
5895
+ originalContent: import_zod36.z.string().optional().describe("Verbatim source text for the resolved value"),
5896
+ confidence: import_zod36.z.enum(["high", "medium", "low"]).describe("Confidence in the resolution")
5301
5897
  })
5302
5898
  )
5303
5899
  });
@@ -5332,18 +5928,124 @@ Your task:
5332
5928
  Return JSON only.`;
5333
5929
  }
5334
5930
 
5335
- // src/extraction/resolve-referential.ts
5931
+ // src/extraction/heuristics.ts
5336
5932
  function looksReferential(value) {
5337
5933
  if (typeof value !== "string") return false;
5338
5934
  const normalized = value.toLowerCase();
5339
5935
  return normalized.includes("shown in the declarations") || normalized.includes("shown in declarations") || normalized.includes("shown in the schedule") || normalized.includes("as stated") || normalized.includes("if applicable");
5340
5936
  }
5937
+ function looksCoveredReasonSection(section) {
5938
+ const title = String(section.title ?? "").toLowerCase();
5939
+ const type = String(section.type ?? "").toLowerCase();
5940
+ return type === "covered_reason" || title.includes("covered cause") || title.includes("covered reason") || title.includes("covered peril");
5941
+ }
5942
+
5943
+ // src/extraction/referential-workflow.ts
5944
+ function normalizeText(value) {
5945
+ return typeof value === "string" ? value.trim().toLowerCase() : "";
5946
+ }
5947
+ function containsTarget(value, target) {
5948
+ const normalizedValue = normalizeText(value);
5949
+ return Boolean(normalizedValue && target && normalizedValue.includes(target));
5950
+ }
5951
+ function pageRangeFrom(startPage, endPage) {
5952
+ if (typeof startPage !== "number" || !Number.isFinite(startPage) || startPage <= 0) {
5953
+ return void 0;
5954
+ }
5955
+ const normalizedEnd = typeof endPage === "number" && Number.isFinite(endPage) && endPage >= startPage ? endPage : startPage;
5956
+ return { startPage, endPage: normalizedEnd };
5957
+ }
5958
+ function parseReferentialTarget(rawTarget) {
5959
+ const raw = rawTarget?.trim() || "unknown";
5960
+ const normalized = raw.toLowerCase();
5961
+ if (normalized === "unknown") return { raw, normalized, kind: "unknown" };
5962
+ if (/declarations?|dec\b|decs\b/.test(normalized)) return { raw, normalized, kind: "declarations" };
5963
+ if (/schedule|scheduled/.test(normalized)) return { raw, normalized, kind: "schedule" };
5964
+ if (/\bitem\b/.test(normalized)) return { raw, normalized, kind: "item" };
5965
+ if (/premises?|location|building/.test(normalized)) return { raw, normalized, kind: "premises" };
5966
+ if (/\bsection\b/.test(normalized)) return { raw, normalized, kind: "section" };
5967
+ if (/policy|coverage\s+part|coverage\s+form/.test(normalized)) return { raw, normalized, kind: "policy" };
5968
+ return { raw, normalized, kind: "unknown" };
5969
+ }
5970
+ function findLocalReferentialPages(params) {
5971
+ const targetLower = params.referenceTarget.toLowerCase();
5972
+ for (const section of params.sections) {
5973
+ if (containsTarget(section.title, targetLower)) {
5974
+ const range = pageRangeFrom(section.pageStart, section.pageEnd);
5975
+ if (range) return range;
5976
+ }
5977
+ }
5978
+ for (const form of params.formInventory) {
5979
+ const titleMatch = containsTarget(form.title, targetLower);
5980
+ const typeMatch = containsTarget(form.formType, targetLower);
5981
+ const numberMatch = containsTarget(form.formNumber, targetLower);
5982
+ if (titleMatch || typeMatch || numberMatch) {
5983
+ const range = pageRangeFrom(form.pageStart, form.pageEnd);
5984
+ if (range) return range;
5985
+ }
5986
+ }
5987
+ return void 0;
5988
+ }
5989
+ function findDeclarationsSchedulePages(parsedTarget, formInventory) {
5990
+ for (const form of formInventory) {
5991
+ const formType = normalizeText(form.formType);
5992
+ const title = normalizeText(form.title);
5993
+ const matchesDeclarations = formType === "declarations" || /declarations?|dec\b|decs\b/.test(title);
5994
+ const matchesSchedule = /schedule|scheduled|coverage/.test(title) || formType === "coverage";
5995
+ const shouldUse = parsedTarget.kind === "declarations" ? matchesDeclarations : parsedTarget.kind === "schedule" || parsedTarget.kind === "item" || parsedTarget.kind === "premises" ? matchesSchedule || matchesDeclarations : parsedTarget.kind === "policy" ? matchesDeclarations || matchesSchedule : false;
5996
+ if (shouldUse) {
5997
+ const range = pageRangeFrom(form.pageStart, form.pageEnd);
5998
+ if (range) return range;
5999
+ }
6000
+ }
6001
+ return void 0;
6002
+ }
6003
+ function findSectionPages(parsedTarget, sections) {
6004
+ for (const section of sections) {
6005
+ const title = normalizeText(section.title);
6006
+ const type = normalizeText(section.type);
6007
+ const matchesKind = parsedTarget.kind === "declarations" && (type === "declarations" || /declarations?/.test(title)) || parsedTarget.kind === "schedule" && (type === "schedule" || /schedule|scheduled/.test(title)) || parsedTarget.kind === "premises" && /premises?|location|building/.test(title) || parsedTarget.kind === "item" && /\bitem\b|schedule|scheduled/.test(title) || parsedTarget.kind === "section" && containsTarget(title, parsedTarget.normalized);
6008
+ if (matchesKind) {
6009
+ const range = pageRangeFrom(section.pageStart, section.pageEnd);
6010
+ if (range) return range;
6011
+ }
6012
+ }
6013
+ return void 0;
6014
+ }
6015
+ function decideReferentialResolutionAction(params) {
6016
+ if (params.localPageRange) {
6017
+ return { kind: "lookup_pages", source: "local", pageRange: params.localPageRange };
6018
+ }
6019
+ const parsedTarget = parseReferentialTarget(params.referenceTarget);
6020
+ const declarationsScheduleRange = findDeclarationsSchedulePages(parsedTarget, params.formInventory);
6021
+ if (declarationsScheduleRange) {
6022
+ return {
6023
+ kind: "lookup_pages",
6024
+ source: "declarations_schedule",
6025
+ pageRange: declarationsScheduleRange
6026
+ };
6027
+ }
6028
+ const sectionRange = findSectionPages(parsedTarget, params.sections);
6029
+ if (sectionRange) {
6030
+ return { kind: "lookup_pages", source: "sections", pageRange: sectionRange };
6031
+ }
6032
+ if (parsedTarget.kind === "unknown") {
6033
+ return { kind: "skip", reason: "no concrete reference target" };
6034
+ }
6035
+ return { kind: "page_location" };
6036
+ }
6037
+
6038
+ // src/extraction/resolve-referential.ts
5341
6039
  function parseReferenceTarget(text) {
5342
6040
  if (typeof text !== "string") return void 0;
5343
6041
  const normalized = text.trim();
5344
6042
  if (!normalized) return void 0;
5345
6043
  const sectionMatch = normalized.match(/\b(Section\s+\d+[A-Za-z]?)/i);
5346
6044
  if (sectionMatch) return sectionMatch[1];
6045
+ const itemMatch = normalized.match(/\b(Item\s+\d+[A-Za-z]?)/i);
6046
+ if (itemMatch) return itemMatch[1];
6047
+ const premisesMatch = normalized.match(/\b(Premises?(?:\s+No\.?\s*\d+[A-Za-z]?|\s+\d+[A-Za-z]?)?)/i);
6048
+ if (premisesMatch) return premisesMatch[1].trim();
5347
6049
  if (/declarations/i.test(normalized)) return "Declarations";
5348
6050
  const scheduleMatch = normalized.match(/\b(Schedule(?:\s+of\s+[A-Za-z ]+)?)/i);
5349
6051
  if (scheduleMatch) return scheduleMatch[1].trim();
@@ -5356,9 +6058,9 @@ function parseReferenceTarget(text) {
5356
6058
  if (/if applicable/i.test(normalized)) return void 0;
5357
6059
  return void 0;
5358
6060
  }
5359
- var PageLocationSchema = import_zod35.z.object({
5360
- startPage: import_zod35.z.number(),
5361
- endPage: import_zod35.z.number()
6061
+ var PageLocationSchema = import_zod37.z.object({
6062
+ startPage: import_zod37.z.number(),
6063
+ endPage: import_zod37.z.number()
5362
6064
  });
5363
6065
  async function findReferencedPages(params) {
5364
6066
  const {
@@ -5369,26 +6071,31 @@ async function findReferencedPages(params) {
5369
6071
  pageCount,
5370
6072
  generateObject,
5371
6073
  providerOptions,
6074
+ trackUsage,
5372
6075
  log
5373
6076
  } = params;
5374
- const targetLower = referenceTarget.toLowerCase();
5375
- for (const section of sections) {
5376
- if (section.title && section.pageStart != null && section.title.toLowerCase().includes(targetLower)) {
5377
- return {
5378
- startPage: section.pageStart,
5379
- endPage: section.pageEnd ?? section.pageStart
5380
- };
5381
- }
6077
+ const localPageRange = findLocalReferentialPages({
6078
+ referenceTarget,
6079
+ sections,
6080
+ formInventory
6081
+ });
6082
+ const action = decideReferentialResolutionAction({
6083
+ referenceTarget,
6084
+ sections,
6085
+ formInventory,
6086
+ localPageRange
6087
+ });
6088
+ if (action.kind === "lookup_pages") {
6089
+ await log?.(
6090
+ `Referential target "${referenceTarget}" resolved to pages ${action.pageRange.startPage}-${action.pageRange.endPage} via ${action.source}.`
6091
+ );
6092
+ return action.pageRange;
5382
6093
  }
5383
- for (const form of formInventory) {
5384
- const titleMatch = form.title && form.title.toLowerCase().includes(targetLower);
5385
- const typeMatch = form.formType && form.formType.toLowerCase().includes(targetLower);
5386
- if ((titleMatch || typeMatch) && form.pageStart != null) {
5387
- return {
5388
- startPage: form.pageStart,
5389
- endPage: form.pageEnd ?? form.pageStart
5390
- };
5391
- }
6094
+ if (action.kind === "skip") {
6095
+ await log?.(
6096
+ `Skipping referential target "${referenceTarget}": ${action.reason}.`
6097
+ );
6098
+ return void 0;
5392
6099
  }
5393
6100
  try {
5394
6101
  const result = await safeGenerateObject(
@@ -5416,6 +6123,7 @@ Return JSON only.`,
5416
6123
  )
5417
6124
  }
5418
6125
  );
6126
+ trackUsage?.(result.usage);
5419
6127
  if (result.object.startPage > 0 && result.object.endPage > 0) {
5420
6128
  return {
5421
6129
  startPage: result.object.startPage,
@@ -5473,7 +6181,9 @@ async function resolveReferentialCoverages(params) {
5473
6181
  for (let i = 0; i < referentialCoverages.length; i++) {
5474
6182
  const cov = referentialCoverages[i];
5475
6183
  const refString = (looksReferential(cov.limit) ? cov.limit : void 0) ?? (looksReferential(cov.deductible) ? cov.deductible : void 0) ?? cov.limit ?? "";
5476
- const target = parseReferenceTarget(refString) ?? "unknown";
6184
+ const sectionRef = typeof cov.sectionRef === "string" ? cov.sectionRef : "";
6185
+ const parsedTarget = parseReferenceTarget(refString) ?? parseReferenceTarget(sectionRef) ?? sectionRef;
6186
+ const target = parsedTarget || "unknown";
5477
6187
  const group = targetGroups.get(target) ?? [];
5478
6188
  group.push({ coverage: cov, index: i });
5479
6189
  targetGroups.set(target, group);
@@ -5497,6 +6207,7 @@ async function resolveReferentialCoverages(params) {
5497
6207
  pageCount,
5498
6208
  generateObject,
5499
6209
  providerOptions,
6210
+ trackUsage,
5500
6211
  log
5501
6212
  });
5502
6213
  if (!pageRange) {
@@ -5614,6 +6325,78 @@ async function resolveReferentialCoverages(params) {
5614
6325
  };
5615
6326
  }
5616
6327
 
6328
+ // src/extraction/focused-dispatch.ts
6329
+ async function runFocusedExtractorWithFallback(params) {
6330
+ const {
6331
+ task,
6332
+ pdfInput,
6333
+ generateObject,
6334
+ convertPdfToImages,
6335
+ providerOptions,
6336
+ trackUsage,
6337
+ log
6338
+ } = params;
6339
+ const ext = getExtractor(task.extractorName);
6340
+ if (!ext) {
6341
+ await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
6342
+ return null;
6343
+ }
6344
+ try {
6345
+ const result = await runExtractor({
6346
+ name: task.extractorName,
6347
+ prompt: ext.buildPrompt(),
6348
+ schema: ext.schema,
6349
+ pdfInput,
6350
+ startPage: task.startPage,
6351
+ endPage: task.endPage,
6352
+ generateObject,
6353
+ convertPdfToImages,
6354
+ maxTokens: ext.maxTokens ?? 4096,
6355
+ providerOptions
6356
+ });
6357
+ trackUsage(result.usage);
6358
+ if (!ext.fallback?.isEmpty(result.data)) {
6359
+ return result;
6360
+ }
6361
+ if (!ext.fallback) {
6362
+ return result;
6363
+ }
6364
+ } catch (error) {
6365
+ await log?.(`Extractor ${task.extractorName} failed: ${error}`);
6366
+ if (!ext.fallback) {
6367
+ return null;
6368
+ }
6369
+ }
6370
+ const fallbackExt = getExtractor(ext.fallback.extractorName);
6371
+ if (!fallbackExt) return null;
6372
+ await log?.(
6373
+ `Extractor ${task.extractorName} produced no usable records; trying ${ext.fallback.extractorName} fallback for pages ${task.startPage}-${task.endPage}`
6374
+ );
6375
+ try {
6376
+ const fallbackResult = await runExtractor({
6377
+ name: ext.fallback.extractorName,
6378
+ prompt: fallbackExt.buildPrompt(),
6379
+ schema: fallbackExt.schema,
6380
+ pdfInput,
6381
+ startPage: task.startPage,
6382
+ endPage: task.endPage,
6383
+ generateObject,
6384
+ convertPdfToImages,
6385
+ maxTokens: fallbackExt.maxTokens ?? 4096,
6386
+ providerOptions
6387
+ });
6388
+ trackUsage(fallbackResult.usage);
6389
+ const focusedData = ext.fallback.deriveFocusedResult(fallbackResult.data);
6390
+ return focusedData ? [
6391
+ fallbackResult,
6392
+ { name: task.extractorName, data: focusedData, usage: void 0 }
6393
+ ] : fallbackResult;
6394
+ } catch (fallbackError) {
6395
+ await log?.(`${ext.fallback.extractorName} fallback for ${task.extractorName} failed: ${fallbackError}`);
6396
+ return null;
6397
+ }
6398
+ }
6399
+
5617
6400
  // src/core/quality.ts
5618
6401
  function evaluateQualityGate(params) {
5619
6402
  const { issues, hasRoundWarnings = false } = params;
@@ -5650,11 +6433,6 @@ function addFormEntry(inventory, formNumber, source, extra) {
5650
6433
  sources: [source]
5651
6434
  });
5652
6435
  }
5653
- function looksReferential2(value) {
5654
- if (typeof value !== "string") return false;
5655
- const normalized = value.toLowerCase();
5656
- return normalized.includes("shown in the declarations") || normalized.includes("shown in declarations") || normalized.includes("shown in the schedule") || normalized.includes("as stated") || normalized.includes("if applicable");
5657
- }
5658
6436
  function looksTocArtifact(value) {
5659
6437
  if (typeof value !== "string") return false;
5660
6438
  return /\.{4,}\d{1,3}$/.test(value.trim()) || /^\d+\.\s+[A-Z][\s\S]*\.{3,}\d{1,3}$/.test(value.trim());
@@ -5678,6 +6456,28 @@ function buildExtractionReviewReport(params) {
5678
6456
  const exclusions = memory.get("exclusions")?.exclusions ?? [];
5679
6457
  const conditions = memory.get("conditions")?.conditions ?? [];
5680
6458
  const sections = memory.get("sections")?.sections ?? [];
6459
+ const definitionsResult = memory.get("definitions");
6460
+ const coveredReasonsResult = memory.get("covered_reasons");
6461
+ const definitions = Array.isArray(definitionsResult?.definitions) ? definitionsResult.definitions : sections.filter((section) => section.type === "definition");
6462
+ const coveredReasons = Array.isArray(coveredReasonsResult?.coveredReasons) ? coveredReasonsResult.coveredReasons : Array.isArray(coveredReasonsResult?.covered_reasons) ? coveredReasonsResult.covered_reasons : sections.filter(looksCoveredReasonSection);
6463
+ const mappedDefinitions = params.pageAssignments.some((assignment) => assignment.extractorNames.includes("definitions"));
6464
+ const mappedCoveredReasons = params.pageAssignments.some((assignment) => assignment.extractorNames.includes("covered_reasons"));
6465
+ if (mappedDefinitions && definitions.length === 0) {
6466
+ deterministicIssues.push({
6467
+ code: "definitions_mapped_but_empty",
6468
+ severity: "warning",
6469
+ message: "Page map assigned definitions extraction, but no definition records were extracted.",
6470
+ extractorName: "definitions"
6471
+ });
6472
+ }
6473
+ if (mappedCoveredReasons && coveredReasons.length === 0) {
6474
+ deterministicIssues.push({
6475
+ code: "covered_reasons_mapped_but_empty",
6476
+ severity: "warning",
6477
+ message: "Page map assigned covered reasons extraction, but no covered reason records were extracted.",
6478
+ extractorName: "covered_reasons"
6479
+ });
6480
+ }
5681
6481
  for (const form of extractedFormInventory) {
5682
6482
  addFormEntry(
5683
6483
  inventory,
@@ -5774,7 +6574,7 @@ function buildExtractionReviewReport(params) {
5774
6574
  itemName: typeof coverage.name === "string" ? coverage.name : void 0
5775
6575
  });
5776
6576
  }
5777
- if (looksReferential2(coverage.limit) || looksReferential2(coverage.deductible)) {
6577
+ if (looksReferential(coverage.limit) || looksReferential(coverage.deductible)) {
5778
6578
  deterministicIssues.push({
5779
6579
  code: "coverage_referential_value",
5780
6580
  severity: "warning",
@@ -5875,6 +6675,67 @@ function buildExtractionReviewReport(params) {
5875
6675
  });
5876
6676
  }
5877
6677
  }
6678
+ for (const definition of definitions) {
6679
+ const term = typeof definition.term === "string" ? definition.term : typeof definition.title === "string" ? definition.title : "unknown";
6680
+ const content = typeof definition.definition === "string" ? definition.definition : typeof definition.content === "string" ? definition.content : "";
6681
+ if (!content.trim()) {
6682
+ deterministicIssues.push({
6683
+ code: "definition_missing_content",
6684
+ severity: "warning",
6685
+ message: `Definition "${term}" is missing definition text.`,
6686
+ extractorName: "definitions",
6687
+ formNumber: normalizeFormNumber(definition.formNumber),
6688
+ pageNumber: typeof definition.pageNumber === "number" ? definition.pageNumber : typeof definition.pageStart === "number" ? definition.pageStart : void 0,
6689
+ itemName: term
6690
+ });
6691
+ }
6692
+ if (typeof definition.pageNumber !== "number" && typeof definition.pageStart !== "number") {
6693
+ deterministicIssues.push({
6694
+ code: "definition_missing_page_number",
6695
+ severity: "warning",
6696
+ message: `Definition "${term}" is missing page provenance.`,
6697
+ extractorName: "definitions",
6698
+ formNumber: normalizeFormNumber(definition.formNumber),
6699
+ itemName: term
6700
+ });
6701
+ }
6702
+ }
6703
+ for (const coveredReason of coveredReasons) {
6704
+ const itemName = typeof coveredReason.name === "string" ? coveredReason.name : typeof coveredReason.reason === "string" ? coveredReason.reason : typeof coveredReason.title === "string" ? coveredReason.title : "unknown";
6705
+ const content = typeof coveredReason.content === "string" ? coveredReason.content : typeof coveredReason.description === "string" ? coveredReason.description : "";
6706
+ if (!content.trim()) {
6707
+ deterministicIssues.push({
6708
+ code: "covered_reason_missing_content",
6709
+ severity: "warning",
6710
+ message: `Covered reason "${itemName}" is missing substantive text.`,
6711
+ extractorName: "covered_reasons",
6712
+ formNumber: normalizeFormNumber(coveredReason.formNumber),
6713
+ pageNumber: typeof coveredReason.pageNumber === "number" ? coveredReason.pageNumber : typeof coveredReason.pageStart === "number" ? coveredReason.pageStart : void 0,
6714
+ itemName
6715
+ });
6716
+ }
6717
+ if (typeof coveredReason.pageNumber !== "number" && typeof coveredReason.pageStart !== "number") {
6718
+ deterministicIssues.push({
6719
+ code: "covered_reason_missing_page_number",
6720
+ severity: "warning",
6721
+ message: `Covered reason "${itemName}" is missing page provenance.`,
6722
+ extractorName: "covered_reasons",
6723
+ formNumber: normalizeFormNumber(coveredReason.formNumber),
6724
+ itemName
6725
+ });
6726
+ }
6727
+ if (looksReferential(content) || looksReferential(coveredReason.reason)) {
6728
+ deterministicIssues.push({
6729
+ code: "covered_reason_referential_value",
6730
+ severity: "warning",
6731
+ message: `Covered reason "${itemName}" contains referential language instead of the extracted covered cause wording.`,
6732
+ extractorName: "covered_reasons",
6733
+ formNumber: normalizeFormNumber(coveredReason.formNumber),
6734
+ pageNumber: typeof coveredReason.pageNumber === "number" ? coveredReason.pageNumber : typeof coveredReason.pageStart === "number" ? coveredReason.pageStart : void 0,
6735
+ itemName
6736
+ });
6737
+ }
6738
+ }
5878
6739
  for (const section of sections) {
5879
6740
  if (typeof section.content === "string" && section.content.trim().length < 120 && typeof section.pageStart === "number" && (!("pageEnd" in section) || section.pageEnd === section.pageStart || section.pageEnd === void 0)) {
5880
6741
  deterministicIssues.push({
@@ -5897,6 +6758,8 @@ function buildExtractionReviewReport(params) {
5897
6758
  const artifacts = [
5898
6759
  { kind: "form_inventory", label: "Form Inventory", itemCount: formInventory.length },
5899
6760
  { kind: "page_map", label: "Page Map", itemCount: params.pageAssignments.length },
6761
+ { kind: "definitions", label: "Definitions", itemCount: definitions.length },
6762
+ { kind: "covered_reasons", label: "Covered Reasons", itemCount: coveredReasons.length },
5900
6763
  { kind: "referential_resolution", label: "Referential Resolution", itemCount: coverages.filter((c) => c.limitValueType === "referential" || c.limitValueType === "as_stated" || c.deductibleValueType === "referential" || c.deductibleValueType === "as_stated").length }
5901
6764
  ];
5902
6765
  const qualityGateStatus = evaluateQualityGate({
@@ -5922,6 +6785,134 @@ function toReviewRoundRecord(round, review) {
5922
6785
  };
5923
6786
  }
5924
6787
 
6788
+ // src/extraction/planning.ts
6789
+ function normalizePageAssignments(pageAssignments, formInventory) {
6790
+ const pageFormTypes = /* @__PURE__ */ new Map();
6791
+ if (formInventory) {
6792
+ for (const form of formInventory.forms) {
6793
+ if (form.pageStart != null) {
6794
+ const end = form.pageEnd ?? form.pageStart;
6795
+ for (let p = form.pageStart; p <= end; p += 1) {
6796
+ const types = pageFormTypes.get(p) ?? /* @__PURE__ */ new Set();
6797
+ types.add(form.formType);
6798
+ pageFormTypes.set(p, types);
6799
+ }
6800
+ }
6801
+ }
6802
+ }
6803
+ return pageAssignments.map((assignment) => {
6804
+ let extractorNames = [...new Set(
6805
+ (assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"]).filter(Boolean)
6806
+ )];
6807
+ const hasDeclarations = extractorNames.includes("declarations");
6808
+ const hasConditions = extractorNames.includes("conditions");
6809
+ const hasExclusions = extractorNames.includes("exclusions");
6810
+ const hasEndorsements = extractorNames.includes("endorsements");
6811
+ const looksLikeScheduleValues = assignment.hasScheduleValues === true;
6812
+ const roleBlocksCoverageLimits = assignment.pageRole === "policy_form" || assignment.pageRole === "condition_exclusion_form" || assignment.pageRole === "endorsement_form";
6813
+ const inventoryTypes = pageFormTypes.get(assignment.localPageNumber);
6814
+ const inventoryBlocksCoverageLimits = inventoryTypes != null && !looksLikeScheduleValues && !hasDeclarations && (inventoryTypes.has("endorsement") || inventoryTypes.has("notice") || inventoryTypes.has("application"));
6815
+ if (extractorNames.includes("coverage_limits")) {
6816
+ const shouldDropCoverageLimits = inventoryBlocksCoverageLimits || !looksLikeScheduleValues && roleBlocksCoverageLimits || !hasDeclarations && !looksLikeScheduleValues && (hasConditions || hasExclusions) || !hasDeclarations && !looksLikeScheduleValues && hasEndorsements;
6817
+ if (shouldDropCoverageLimits) {
6818
+ extractorNames = extractorNames.filter((name) => name !== "coverage_limits");
6819
+ }
6820
+ }
6821
+ if (inventoryTypes?.has("endorsement") && !extractorNames.includes("endorsements")) {
6822
+ extractorNames = [...extractorNames, "endorsements"];
6823
+ }
6824
+ if (extractorNames.length === 0) {
6825
+ extractorNames = ["sections"];
6826
+ }
6827
+ return {
6828
+ ...assignment,
6829
+ extractorNames
6830
+ };
6831
+ });
6832
+ }
6833
+ function buildTemplateHints(primaryType, documentType, pageCount, template) {
6834
+ return [
6835
+ `Document type: ${primaryType} ${documentType}`,
6836
+ `Expected sections: ${template.expectedSections.join(", ")}`,
6837
+ `Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
6838
+ `Total pages: ${pageCount}`
6839
+ ].join("\n");
6840
+ }
6841
+ function groupContiguousPages(pages) {
6842
+ if (pages.length === 0) return [];
6843
+ const sorted = [...new Set(pages)].sort((a, b) => a - b);
6844
+ const ranges = [];
6845
+ let start = sorted[0];
6846
+ let previous = sorted[0];
6847
+ for (let i = 1; i < sorted.length; i += 1) {
6848
+ const current = sorted[i];
6849
+ if (current === previous + 1) {
6850
+ previous = current;
6851
+ continue;
6852
+ }
6853
+ ranges.push({ startPage: start, endPage: previous });
6854
+ start = current;
6855
+ previous = current;
6856
+ }
6857
+ ranges.push({ startPage: start, endPage: previous });
6858
+ return ranges;
6859
+ }
6860
+ function buildPlanFromPageAssignments(pageAssignments, pageCount, formInventory) {
6861
+ const extractorPages = /* @__PURE__ */ new Map();
6862
+ for (const assignment of pageAssignments) {
6863
+ const extractors = assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"];
6864
+ for (const extractorName of extractors) {
6865
+ extractorPages.set(extractorName, [...extractorPages.get(extractorName) ?? [], assignment.localPageNumber]);
6866
+ }
6867
+ }
6868
+ const coveredPages = /* @__PURE__ */ new Set();
6869
+ for (const pages of extractorPages.values()) {
6870
+ for (const page of pages) coveredPages.add(page);
6871
+ }
6872
+ for (let page = 1; page <= pageCount; page += 1) {
6873
+ if (!coveredPages.has(page)) {
6874
+ extractorPages.set("sections", [...extractorPages.get("sections") ?? [], page]);
6875
+ }
6876
+ }
6877
+ const contextualExtractors = /* @__PURE__ */ new Set(["conditions", "covered_reasons", "definitions", "exclusions", "endorsements"]);
6878
+ const contextualForms = (formInventory?.forms ?? []).filter(
6879
+ (form) => form.pageStart != null && (form.pageEnd ?? form.pageStart) != null
6880
+ );
6881
+ const expandPagesToFormRanges = (extractorName, pages) => {
6882
+ if (!contextualExtractors.has(extractorName)) return pages;
6883
+ const expanded = new Set(pages);
6884
+ for (const page of pages) {
6885
+ for (const form of contextualForms) {
6886
+ const pageStart = form.pageStart;
6887
+ const pageEnd = form.pageEnd ?? form.pageStart;
6888
+ const formType = form.formType;
6889
+ const supportsContextualExpansion = extractorName === "endorsements" ? formType === "endorsement" : formType === "coverage" || formType === "endorsement";
6890
+ if (!supportsContextualExpansion) continue;
6891
+ if (page < pageStart || page > pageEnd) continue;
6892
+ for (let current = pageStart; current <= pageEnd; current += 1) {
6893
+ expanded.add(current);
6894
+ }
6895
+ }
6896
+ }
6897
+ return [...expanded].sort((a, b) => a - b);
6898
+ };
6899
+ const tasks = [...extractorPages.entries()].flatMap(
6900
+ ([extractorName, pages]) => groupContiguousPages(expandPagesToFormRanges(extractorName, pages)).map(({ startPage, endPage }) => ({
6901
+ extractorName,
6902
+ startPage,
6903
+ endPage,
6904
+ description: `Page-mapped ${extractorName} extraction for pages ${startPage}-${endPage}`
6905
+ }))
6906
+ ).sort((a, b) => a.startPage - b.startPage || a.extractorName.localeCompare(b.extractorName));
6907
+ return {
6908
+ tasks,
6909
+ pageMap: [...extractorPages.entries()].map(([section, pages]) => ({
6910
+ section,
6911
+ pages: `pages ${[...new Set(pages)].sort((a, b) => a - b).join(", ")}`
6912
+ }))
6913
+ };
6914
+ }
6915
+
5925
6916
  // src/extraction/coordinator.ts
5926
6917
  function createExtractor(config) {
5927
6918
  const {
@@ -5938,6 +6929,7 @@ function createExtractor(config) {
5938
6929
  onCheckpointSave
5939
6930
  } = config;
5940
6931
  const limit = pLimit(concurrency);
6932
+ const extractorCatalog = formatExtractorCatalogForPrompt();
5941
6933
  let totalUsage = { inputTokens: 0, outputTokens: 0 };
5942
6934
  let modelCalls = 0;
5943
6935
  let callsWithUsage = 0;
@@ -5958,32 +6950,56 @@ function createExtractor(config) {
5958
6950
  memory.set(name, mergeExtractorResult(name, existing, data));
5959
6951
  }
5960
6952
  function summarizeExtraction(memory) {
5961
- const coverageResult = memory.get("coverage_limits");
5962
- const declarationResult = memory.get("declarations");
5963
- const endorsementResult = memory.get("endorsements");
5964
- const exclusionResult = memory.get("exclusions");
5965
- const conditionResult = memory.get("conditions");
5966
- const sectionResult = memory.get("sections");
5967
- const coverageSummary = Array.isArray(coverageResult?.coverages) ? coverageResult.coverages.slice(0, 12).map((coverage) => ({
6953
+ const declarationResult = readMemoryRecord(memory, "declarations");
6954
+ const endorsements = readRecordArray(readMemoryRecord(memory, "endorsements"), "endorsements") ?? [];
6955
+ const exclusions = readRecordArray(readMemoryRecord(memory, "exclusions"), "exclusions") ?? [];
6956
+ const conditions = readRecordArray(readMemoryRecord(memory, "conditions"), "conditions") ?? [];
6957
+ const sections = getSections(memory) ?? [];
6958
+ const definitions = getDefinitions(memory) ?? sections.filter((section) => section.type === "definition");
6959
+ const coveredReasons = getCoveredReasons(memory) ?? sections.filter(looksCoveredReasonSection);
6960
+ const coverages = getCoverageLimitCoverages(memory);
6961
+ const coverageSummary = coverages.slice(0, 12).map((coverage) => ({
5968
6962
  name: coverage.name,
5969
6963
  limit: coverage.limit,
5970
6964
  deductible: coverage.deductible,
5971
6965
  formNumber: coverage.formNumber
5972
- })) : [];
6966
+ }));
5973
6967
  return JSON.stringify({
5974
6968
  extractedKeys: [...memory.keys()].filter((key) => key !== "classify"),
5975
6969
  declarationFieldCount: Array.isArray(declarationResult?.fields) ? declarationResult.fields.length : 0,
5976
- coverageCount: Array.isArray(coverageResult?.coverages) ? coverageResult.coverages.length : 0,
6970
+ coverageCount: coverages.length,
5977
6971
  coverageSamples: coverageSummary,
5978
- endorsementCount: Array.isArray(endorsementResult?.endorsements) ? endorsementResult.endorsements.length : 0,
5979
- exclusionCount: Array.isArray(exclusionResult?.exclusions) ? exclusionResult.exclusions.length : 0,
5980
- conditionCount: Array.isArray(conditionResult?.conditions) ? conditionResult.conditions.length : 0,
5981
- sectionCount: Array.isArray(sectionResult?.sections) ? sectionResult.sections.length : 0
6972
+ endorsementCount: endorsements.length,
6973
+ exclusionCount: exclusions.length,
6974
+ conditionCount: conditions.length,
6975
+ definitionCount: definitions.length,
6976
+ coveredReasonCount: coveredReasons.length,
6977
+ sectionCount: sections.length
5982
6978
  }, null, 2);
5983
6979
  }
6980
+ function textIncludesSupplementarySignal(value) {
6981
+ if (typeof value !== "string") return false;
6982
+ return /\b(supplementary|regulatory|department of insurance|ombudsman|complaint|claim|claims|contact|phone|email|cancellation|cancelled|nonrenewal|non-renewal|non renew|notice|governing law|jurisdiction|third[- ]party administrator|tpa)\b/i.test(value);
6983
+ }
6984
+ function hasSupplementaryExtractionSignal(pageAssignments, formInventory, memory) {
6985
+ const hasPageSignal = pageAssignments.some(
6986
+ (assignment) => assignment.pageRole === "supplementary" || assignment.extractorNames.includes("supplementary") || textIncludesSupplementarySignal(assignment.notes)
6987
+ );
6988
+ if (hasPageSignal) return true;
6989
+ const hasFormSignal = (formInventory?.forms ?? []).some(
6990
+ (form) => form.formType === "notice" || textIncludesSupplementarySignal(form.title) || textIncludesSupplementarySignal(form.formNumber)
6991
+ );
6992
+ if (hasFormSignal) return true;
6993
+ const likelySupplementaryKeys = ["sections", "conditions", "endorsements", "exclusions"];
6994
+ return likelySupplementaryKeys.some((key) => {
6995
+ const value = memory.get(key);
6996
+ if (!value) return false;
6997
+ return textIncludesSupplementarySignal(JSON.stringify(value));
6998
+ });
6999
+ }
5984
7000
  function buildAlreadyExtractedSummary(memory) {
5985
7001
  const lines = [];
5986
- const declarationResult = memory.get("declarations");
7002
+ const declarationResult = readMemoryRecord(memory, "declarations");
5987
7003
  if (Array.isArray(declarationResult?.fields)) {
5988
7004
  for (const field of declarationResult.fields) {
5989
7005
  if (field.key && field.value) {
@@ -5992,20 +7008,17 @@ function createExtractor(config) {
5992
7008
  }
5993
7009
  }
5994
7010
  }
5995
- const coverageResult = memory.get("coverage_limits");
5996
- if (Array.isArray(coverageResult?.coverages)) {
5997
- for (const cov of coverageResult.coverages) {
5998
- const parts = [cov.name, cov.limit && `limit=${cov.limit}`, cov.deductible && `deductible=${cov.deductible}`].filter(Boolean);
5999
- if (parts.length > 0) lines.push(`- coverage: ${parts.join(", ")}`);
6000
- }
7011
+ for (const cov of getCoverageLimitCoverages(memory)) {
7012
+ const parts = [cov.name, cov.limit && `limit=${cov.limit}`, cov.deductible && `deductible=${cov.deductible}`].filter(Boolean);
7013
+ if (parts.length > 0) lines.push(`- coverage: ${parts.join(", ")}`);
6001
7014
  }
6002
- const namedInsured = memory.get("named_insured");
7015
+ const namedInsured = getNamedInsured(memory);
6003
7016
  if (namedInsured) {
6004
7017
  for (const [key, value] of Object.entries(namedInsured)) {
6005
7018
  if (value && typeof value === "string") lines.push(`- ${key}: ${value}`);
6006
7019
  }
6007
7020
  }
6008
- const carrierInfo = memory.get("carrier_info");
7021
+ const carrierInfo = getCarrierInfo(memory);
6009
7022
  if (carrierInfo) {
6010
7023
  for (const [key, value] of Object.entries(carrierInfo)) {
6011
7024
  if (value && typeof value === "string") lines.push(`- ${key}: ${value}`);
@@ -6013,141 +7026,43 @@ function createExtractor(config) {
6013
7026
  }
6014
7027
  return lines.length > 0 ? lines.join("\n") : "";
6015
7028
  }
6016
- function formatPageMapSummary(pageAssignments) {
6017
- const extractorPages = /* @__PURE__ */ new Map();
6018
- for (const assignment of pageAssignments) {
6019
- for (const extractorName of assignment.extractorNames) {
6020
- extractorPages.set(extractorName, [...extractorPages.get(extractorName) ?? [], assignment.localPageNumber]);
6021
- }
6022
- }
6023
- if (extractorPages.size === 0) return "No page assignments available.";
6024
- return [...extractorPages.entries()].map(([extractorName, pages]) => `${extractorName}: pages ${pages.join(", ")}`).join("\n");
6025
- }
6026
- function normalizePageAssignments(pageAssignments, formInventory) {
6027
- const pageFormTypes = /* @__PURE__ */ new Map();
6028
- if (formInventory) {
6029
- for (const form of formInventory.forms) {
6030
- if (form.pageStart != null) {
6031
- const end = form.pageEnd ?? form.pageStart;
6032
- for (let p = form.pageStart; p <= end; p++) {
6033
- const types = pageFormTypes.get(p) ?? /* @__PURE__ */ new Set();
6034
- types.add(form.formType);
6035
- pageFormTypes.set(p, types);
6036
- }
6037
- }
6038
- }
7029
+ async function runFocusedExtractorTask(task, pdfInput, memory) {
7030
+ if (task.extractorName === "supplementary") {
7031
+ const alreadyExtractedSummary = buildAlreadyExtractedSummary(memory);
7032
+ const result = await runExtractor({
7033
+ name: "supplementary",
7034
+ prompt: buildSupplementaryPrompt(alreadyExtractedSummary),
7035
+ schema: SupplementarySchema,
7036
+ pdfInput,
7037
+ startPage: task.startPage,
7038
+ endPage: task.endPage,
7039
+ generateObject,
7040
+ convertPdfToImages,
7041
+ maxTokens: 4096,
7042
+ providerOptions
7043
+ });
7044
+ trackUsage(result.usage);
7045
+ return result;
6039
7046
  }
6040
- return pageAssignments.map((assignment) => {
6041
- let extractorNames = [...new Set(
6042
- (assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"]).filter(Boolean)
6043
- )];
6044
- const hasDeclarations = extractorNames.includes("declarations");
6045
- const hasConditions = extractorNames.includes("conditions");
6046
- const hasExclusions = extractorNames.includes("exclusions");
6047
- const hasEndorsements = extractorNames.includes("endorsements");
6048
- const looksLikeScheduleValues = assignment.hasScheduleValues === true;
6049
- const roleBlocksCoverageLimits = assignment.pageRole === "policy_form" || assignment.pageRole === "condition_exclusion_form" || assignment.pageRole === "endorsement_form";
6050
- const inventoryTypes = pageFormTypes.get(assignment.localPageNumber);
6051
- const inventoryBlocksCoverageLimits = inventoryTypes != null && !looksLikeScheduleValues && !hasDeclarations && (inventoryTypes.has("endorsement") || inventoryTypes.has("notice") || inventoryTypes.has("application"));
6052
- if (extractorNames.includes("coverage_limits")) {
6053
- const shouldDropCoverageLimits = inventoryBlocksCoverageLimits || !looksLikeScheduleValues && roleBlocksCoverageLimits || !hasDeclarations && !looksLikeScheduleValues && (hasConditions || hasExclusions) || !hasDeclarations && !looksLikeScheduleValues && hasEndorsements;
6054
- if (shouldDropCoverageLimits) {
6055
- extractorNames = extractorNames.filter((name) => name !== "coverage_limits");
6056
- }
6057
- }
6058
- if (inventoryTypes?.has("endorsement") && !extractorNames.includes("endorsements")) {
6059
- extractorNames = [...extractorNames, "endorsements"];
6060
- }
6061
- if (extractorNames.length === 0) {
6062
- extractorNames = ["sections"];
6063
- }
6064
- return {
6065
- ...assignment,
6066
- extractorNames
6067
- };
7047
+ return runFocusedExtractorWithFallback({
7048
+ task,
7049
+ pdfInput,
7050
+ generateObject,
7051
+ convertPdfToImages,
7052
+ providerOptions,
7053
+ trackUsage,
7054
+ log
6068
7055
  });
6069
7056
  }
6070
- function buildTemplateHints(primaryType, documentType, pageCount, template) {
6071
- return [
6072
- `Document type: ${primaryType} ${documentType}`,
6073
- `Expected sections: ${template.expectedSections.join(", ")}`,
6074
- `Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
6075
- `Total pages: ${pageCount}`
6076
- ].join("\n");
6077
- }
6078
- function groupContiguousPages(pages) {
6079
- if (pages.length === 0) return [];
6080
- const sorted = [...new Set(pages)].sort((a, b) => a - b);
6081
- const ranges = [];
6082
- let start = sorted[0];
6083
- let previous = sorted[0];
6084
- for (let i = 1; i < sorted.length; i += 1) {
6085
- const current = sorted[i];
6086
- if (current === previous + 1) {
6087
- previous = current;
6088
- continue;
6089
- }
6090
- ranges.push({ startPage: start, endPage: previous });
6091
- start = current;
6092
- previous = current;
6093
- }
6094
- ranges.push({ startPage: start, endPage: previous });
6095
- return ranges;
6096
- }
6097
- function buildPlanFromPageAssignments(pageAssignments, pageCount, formInventory) {
7057
+ function formatPageMapSummary(pageAssignments) {
6098
7058
  const extractorPages = /* @__PURE__ */ new Map();
6099
7059
  for (const assignment of pageAssignments) {
6100
- const extractors = assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"];
6101
- for (const extractorName of extractors) {
7060
+ for (const extractorName of assignment.extractorNames) {
6102
7061
  extractorPages.set(extractorName, [...extractorPages.get(extractorName) ?? [], assignment.localPageNumber]);
6103
7062
  }
6104
7063
  }
6105
- const coveredPages = /* @__PURE__ */ new Set();
6106
- for (const pages of extractorPages.values()) {
6107
- for (const page of pages) coveredPages.add(page);
6108
- }
6109
- for (let page = 1; page <= pageCount; page += 1) {
6110
- if (!coveredPages.has(page)) {
6111
- extractorPages.set("sections", [...extractorPages.get("sections") ?? [], page]);
6112
- }
6113
- }
6114
- const contextualExtractors = /* @__PURE__ */ new Set(["conditions", "exclusions", "endorsements"]);
6115
- const contextualForms = (formInventory?.forms ?? []).filter(
6116
- (form) => form.pageStart != null && (form.pageEnd ?? form.pageStart) != null
6117
- );
6118
- const expandPagesToFormRanges = (extractorName, pages) => {
6119
- if (!contextualExtractors.has(extractorName)) return pages;
6120
- const expanded = new Set(pages);
6121
- for (const page of pages) {
6122
- for (const form of contextualForms) {
6123
- const pageStart = form.pageStart;
6124
- const pageEnd = form.pageEnd ?? form.pageStart;
6125
- const formType = form.formType;
6126
- const supportsContextualExpansion = extractorName === "endorsements" ? formType === "endorsement" : formType === "coverage" || formType === "endorsement";
6127
- if (!supportsContextualExpansion) continue;
6128
- if (page < pageStart || page > pageEnd) continue;
6129
- for (let current = pageStart; current <= pageEnd; current += 1) {
6130
- expanded.add(current);
6131
- }
6132
- }
6133
- }
6134
- return [...expanded].sort((a, b) => a - b);
6135
- };
6136
- const tasks = [...extractorPages.entries()].flatMap(
6137
- ([extractorName, pages]) => groupContiguousPages(expandPagesToFormRanges(extractorName, pages)).map(({ startPage, endPage }) => ({
6138
- extractorName,
6139
- startPage,
6140
- endPage,
6141
- description: `Page-mapped ${extractorName} extraction for pages ${startPage}-${endPage}`
6142
- }))
6143
- ).sort((a, b) => a.startPage - b.startPage || a.extractorName.localeCompare(b.extractorName));
6144
- return {
6145
- tasks,
6146
- pageMap: [...extractorPages.entries()].map(([section, pages]) => ({
6147
- section,
6148
- pages: `pages ${[...new Set(pages)].sort((a, b) => a - b).join(", ")}`
6149
- }))
6150
- };
7064
+ if (extractorPages.size === 0) return "No page assignments available.";
7065
+ return [...extractorPages.entries()].map(([extractorName, pages]) => `${extractorName}: ${pages.length} page(s), pages ${pages.join(", ")}`).join("\n");
6151
7066
  }
6152
7067
  async function extract(pdfInput, documentId, options) {
6153
7068
  const id = documentId ?? `doc-${Date.now()}`;
@@ -6159,7 +7074,8 @@ function createExtractor(config) {
6159
7074
  const pipelineCtx = createPipelineContext({
6160
7075
  id,
6161
7076
  onSave: onCheckpointSave,
6162
- resumeFrom: options?.resumeFrom
7077
+ resumeFrom: options?.resumeFrom,
7078
+ phaseOrder: ["classify", "form_inventory", "page_map", "plan", "extract", "resolve_referential", "review", "assemble"]
6163
7079
  });
6164
7080
  const resumed = pipelineCtx.getCheckpoint()?.state;
6165
7081
  if (resumed?.memory) {
@@ -6327,40 +7243,18 @@ function createExtractor(config) {
6327
7243
  const extractorResults = await Promise.all(
6328
7244
  tasks.map(
6329
7245
  (task) => limit(async () => {
6330
- const ext = getExtractor(task.extractorName);
6331
- if (!ext) {
6332
- await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
6333
- return null;
6334
- }
6335
7246
  onProgress?.(`Extracting ${task.extractorName} (pages ${task.startPage}-${task.endPage})...`);
6336
- try {
6337
- const result = await runExtractor({
6338
- name: task.extractorName,
6339
- prompt: ext.buildPrompt(),
6340
- schema: ext.schema,
6341
- pdfInput,
6342
- startPage: task.startPage,
6343
- endPage: task.endPage,
6344
- generateObject,
6345
- convertPdfToImages,
6346
- maxTokens: ext.maxTokens ?? 4096,
6347
- providerOptions
6348
- });
6349
- trackUsage(result.usage);
6350
- return result;
6351
- } catch (error) {
6352
- await log?.(`Extractor ${task.extractorName} failed: ${error}`);
6353
- return null;
6354
- }
7247
+ return runFocusedExtractorTask(task, pdfInput, memory);
6355
7248
  })
6356
7249
  )
6357
7250
  );
6358
- for (const result of extractorResults) {
7251
+ for (const result of extractorResults.flatMap((item) => Array.isArray(item) ? item : item ? [item] : [])) {
6359
7252
  if (result) {
6360
7253
  mergeMemoryResult(result.name, result.data, memory);
6361
7254
  }
6362
7255
  }
6363
- {
7256
+ const planIncludesSupplementary = tasks.some((task) => task.extractorName === "supplementary");
7257
+ if (!planIncludesSupplementary && hasSupplementaryExtractionSignal(pageAssignments, formInventory, memory)) {
6364
7258
  onProgress?.("Extracting supplementary retrieval facts...");
6365
7259
  try {
6366
7260
  const alreadyExtractedSummary = buildAlreadyExtractedSummary(memory);
@@ -6434,7 +7328,7 @@ function createExtractor(config) {
6434
7328
  const reviewResponse = await safeGenerateObject(
6435
7329
  generateObject,
6436
7330
  {
6437
- prompt: buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary),
7331
+ prompt: buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary, extractorCatalog),
6438
7332
  schema: ReviewResultSchema,
6439
7333
  maxTokens: 1536,
6440
7334
  providerOptions: await buildPdfProviderOptions(pdfInput, providerOptions)
@@ -6458,31 +7352,11 @@ function createExtractor(config) {
6458
7352
  const followUpResults = await Promise.all(
6459
7353
  reviewResponse.object.additionalTasks.map(
6460
7354
  (task) => limit(async () => {
6461
- const ext = getExtractor(task.extractorName);
6462
- if (!ext) return null;
6463
- try {
6464
- const result = await runExtractor({
6465
- name: task.extractorName,
6466
- prompt: ext.buildPrompt(),
6467
- schema: ext.schema,
6468
- pdfInput,
6469
- startPage: task.startPage,
6470
- endPage: task.endPage,
6471
- generateObject,
6472
- convertPdfToImages,
6473
- maxTokens: ext.maxTokens ?? 4096,
6474
- providerOptions
6475
- });
6476
- trackUsage(result.usage);
6477
- return result;
6478
- } catch (error) {
6479
- await log?.(`Follow-up extractor ${task.extractorName} failed: ${error}`);
6480
- return null;
6481
- }
7355
+ return runFocusedExtractorTask(task, pdfInput, memory);
6482
7356
  })
6483
7357
  )
6484
7358
  );
6485
- for (const result of followUpResults) {
7359
+ for (const result of followUpResults.flatMap((item) => Array.isArray(item) ? item : item ? [item] : [])) {
6486
7360
  if (result) {
6487
7361
  mergeMemoryResult(result.name, result.data, memory);
6488
7362
  }
@@ -6798,8 +7672,8 @@ Respond with JSON only:
6798
7672
  }`;
6799
7673
 
6800
7674
  // src/schemas/application.ts
6801
- var import_zod36 = require("zod");
6802
- var FieldTypeSchema = import_zod36.z.enum([
7675
+ var import_zod38 = require("zod");
7676
+ var FieldTypeSchema = import_zod38.z.enum([
6803
7677
  "text",
6804
7678
  "numeric",
6805
7679
  "currency",
@@ -6808,131 +7682,131 @@ var FieldTypeSchema = import_zod36.z.enum([
6808
7682
  "table",
6809
7683
  "declaration"
6810
7684
  ]);
6811
- var ApplicationFieldSchema = import_zod36.z.object({
6812
- id: import_zod36.z.string(),
6813
- label: import_zod36.z.string(),
6814
- section: import_zod36.z.string(),
7685
+ var ApplicationFieldSchema = import_zod38.z.object({
7686
+ id: import_zod38.z.string(),
7687
+ label: import_zod38.z.string(),
7688
+ section: import_zod38.z.string(),
6815
7689
  fieldType: FieldTypeSchema,
6816
- required: import_zod36.z.boolean(),
6817
- options: import_zod36.z.array(import_zod36.z.string()).optional(),
6818
- columns: import_zod36.z.array(import_zod36.z.string()).optional(),
6819
- requiresExplanationIfYes: import_zod36.z.boolean().optional(),
6820
- condition: import_zod36.z.object({
6821
- dependsOn: import_zod36.z.string(),
6822
- whenValue: import_zod36.z.string()
7690
+ required: import_zod38.z.boolean(),
7691
+ options: import_zod38.z.array(import_zod38.z.string()).optional(),
7692
+ columns: import_zod38.z.array(import_zod38.z.string()).optional(),
7693
+ requiresExplanationIfYes: import_zod38.z.boolean().optional(),
7694
+ condition: import_zod38.z.object({
7695
+ dependsOn: import_zod38.z.string(),
7696
+ whenValue: import_zod38.z.string()
6823
7697
  }).optional(),
6824
- value: import_zod36.z.string().optional(),
6825
- source: import_zod36.z.string().optional().describe("Where the value came from: auto-fill, user, lookup"),
6826
- confidence: import_zod36.z.enum(["confirmed", "high", "medium", "low"]).optional()
6827
- });
6828
- var ApplicationClassifyResultSchema = import_zod36.z.object({
6829
- isApplication: import_zod36.z.boolean(),
6830
- confidence: import_zod36.z.number().min(0).max(1),
6831
- applicationType: import_zod36.z.string().nullable()
6832
- });
6833
- var FieldExtractionResultSchema = import_zod36.z.object({
6834
- fields: import_zod36.z.array(ApplicationFieldSchema)
6835
- });
6836
- var AutoFillMatchSchema = import_zod36.z.object({
6837
- fieldId: import_zod36.z.string(),
6838
- value: import_zod36.z.string(),
6839
- confidence: import_zod36.z.enum(["confirmed"]),
6840
- contextKey: import_zod36.z.string()
6841
- });
6842
- var AutoFillResultSchema = import_zod36.z.object({
6843
- matches: import_zod36.z.array(AutoFillMatchSchema)
6844
- });
6845
- var QuestionBatchResultSchema = import_zod36.z.object({
6846
- batches: import_zod36.z.array(import_zod36.z.array(import_zod36.z.string()).describe("Array of field IDs in this batch"))
6847
- });
6848
- var LookupRequestSchema = import_zod36.z.object({
6849
- type: import_zod36.z.string().describe("Type of lookup: 'records', 'website', 'policy'"),
6850
- description: import_zod36.z.string(),
6851
- url: import_zod36.z.string().optional(),
6852
- targetFieldIds: import_zod36.z.array(import_zod36.z.string())
6853
- });
6854
- var ReplyIntentSchema = import_zod36.z.object({
6855
- primaryIntent: import_zod36.z.enum(["answers_only", "question", "lookup_request", "mixed"]),
6856
- hasAnswers: import_zod36.z.boolean(),
6857
- questionText: import_zod36.z.string().optional(),
6858
- questionFieldIds: import_zod36.z.array(import_zod36.z.string()).optional(),
6859
- lookupRequests: import_zod36.z.array(LookupRequestSchema).optional()
6860
- });
6861
- var ParsedAnswerSchema = import_zod36.z.object({
6862
- fieldId: import_zod36.z.string(),
6863
- value: import_zod36.z.string(),
6864
- explanation: import_zod36.z.string().optional()
6865
- });
6866
- var AnswerParsingResultSchema = import_zod36.z.object({
6867
- answers: import_zod36.z.array(ParsedAnswerSchema),
6868
- unanswered: import_zod36.z.array(import_zod36.z.string()).describe("Field IDs that were not answered")
6869
- });
6870
- var LookupFillSchema = import_zod36.z.object({
6871
- fieldId: import_zod36.z.string(),
6872
- value: import_zod36.z.string(),
6873
- source: import_zod36.z.string().describe("Specific citable reference, e.g. 'GL Policy #POL-12345 (Hartford)'")
6874
- });
6875
- var LookupFillResultSchema = import_zod36.z.object({
6876
- fills: import_zod36.z.array(LookupFillSchema),
6877
- unfillable: import_zod36.z.array(import_zod36.z.string()),
6878
- explanation: import_zod36.z.string().optional()
6879
- });
6880
- var FlatPdfPlacementSchema = import_zod36.z.object({
6881
- fieldId: import_zod36.z.string(),
6882
- page: import_zod36.z.number(),
6883
- x: import_zod36.z.number().describe("Percentage from left edge (0-100)"),
6884
- y: import_zod36.z.number().describe("Percentage from top edge (0-100)"),
6885
- text: import_zod36.z.string(),
6886
- fontSize: import_zod36.z.number().optional(),
6887
- isCheckmark: import_zod36.z.boolean().optional()
6888
- });
6889
- var AcroFormMappingSchema = import_zod36.z.object({
6890
- fieldId: import_zod36.z.string(),
6891
- acroFormName: import_zod36.z.string(),
6892
- value: import_zod36.z.string()
6893
- });
6894
- var QualityGateStatusSchema = import_zod36.z.enum(["passed", "warning", "failed"]);
6895
- var QualitySeveritySchema = import_zod36.z.enum(["info", "warning", "blocking"]);
6896
- var ApplicationQualityIssueSchema = import_zod36.z.object({
6897
- code: import_zod36.z.string(),
7698
+ value: import_zod38.z.string().optional(),
7699
+ source: import_zod38.z.string().optional().describe("Where the value came from: auto-fill, user, lookup"),
7700
+ confidence: import_zod38.z.enum(["confirmed", "high", "medium", "low"]).optional()
7701
+ });
7702
+ var ApplicationClassifyResultSchema = import_zod38.z.object({
7703
+ isApplication: import_zod38.z.boolean(),
7704
+ confidence: import_zod38.z.number().min(0).max(1),
7705
+ applicationType: import_zod38.z.string().nullable()
7706
+ });
7707
+ var FieldExtractionResultSchema = import_zod38.z.object({
7708
+ fields: import_zod38.z.array(ApplicationFieldSchema)
7709
+ });
7710
+ var AutoFillMatchSchema = import_zod38.z.object({
7711
+ fieldId: import_zod38.z.string(),
7712
+ value: import_zod38.z.string(),
7713
+ confidence: import_zod38.z.enum(["confirmed"]),
7714
+ contextKey: import_zod38.z.string()
7715
+ });
7716
+ var AutoFillResultSchema = import_zod38.z.object({
7717
+ matches: import_zod38.z.array(AutoFillMatchSchema)
7718
+ });
7719
+ var QuestionBatchResultSchema = import_zod38.z.object({
7720
+ batches: import_zod38.z.array(import_zod38.z.array(import_zod38.z.string()).describe("Array of field IDs in this batch"))
7721
+ });
7722
+ var LookupRequestSchema = import_zod38.z.object({
7723
+ type: import_zod38.z.string().describe("Type of lookup: 'records', 'website', 'policy'"),
7724
+ description: import_zod38.z.string(),
7725
+ url: import_zod38.z.string().optional(),
7726
+ targetFieldIds: import_zod38.z.array(import_zod38.z.string())
7727
+ });
7728
+ var ReplyIntentSchema = import_zod38.z.object({
7729
+ primaryIntent: import_zod38.z.enum(["answers_only", "question", "lookup_request", "mixed"]),
7730
+ hasAnswers: import_zod38.z.boolean(),
7731
+ questionText: import_zod38.z.string().optional(),
7732
+ questionFieldIds: import_zod38.z.array(import_zod38.z.string()).optional(),
7733
+ lookupRequests: import_zod38.z.array(LookupRequestSchema).optional()
7734
+ });
7735
+ var ParsedAnswerSchema = import_zod38.z.object({
7736
+ fieldId: import_zod38.z.string(),
7737
+ value: import_zod38.z.string(),
7738
+ explanation: import_zod38.z.string().optional()
7739
+ });
7740
+ var AnswerParsingResultSchema = import_zod38.z.object({
7741
+ answers: import_zod38.z.array(ParsedAnswerSchema),
7742
+ unanswered: import_zod38.z.array(import_zod38.z.string()).describe("Field IDs that were not answered")
7743
+ });
7744
+ var LookupFillSchema = import_zod38.z.object({
7745
+ fieldId: import_zod38.z.string(),
7746
+ value: import_zod38.z.string(),
7747
+ source: import_zod38.z.string().describe("Specific citable reference, e.g. 'GL Policy #POL-12345 (Hartford)'")
7748
+ });
7749
+ var LookupFillResultSchema = import_zod38.z.object({
7750
+ fills: import_zod38.z.array(LookupFillSchema),
7751
+ unfillable: import_zod38.z.array(import_zod38.z.string()),
7752
+ explanation: import_zod38.z.string().optional()
7753
+ });
7754
+ var FlatPdfPlacementSchema = import_zod38.z.object({
7755
+ fieldId: import_zod38.z.string(),
7756
+ page: import_zod38.z.number(),
7757
+ x: import_zod38.z.number().describe("Percentage from left edge (0-100)"),
7758
+ y: import_zod38.z.number().describe("Percentage from top edge (0-100)"),
7759
+ text: import_zod38.z.string(),
7760
+ fontSize: import_zod38.z.number().optional(),
7761
+ isCheckmark: import_zod38.z.boolean().optional()
7762
+ });
7763
+ var AcroFormMappingSchema = import_zod38.z.object({
7764
+ fieldId: import_zod38.z.string(),
7765
+ acroFormName: import_zod38.z.string(),
7766
+ value: import_zod38.z.string()
7767
+ });
7768
+ var QualityGateStatusSchema = import_zod38.z.enum(["passed", "warning", "failed"]);
7769
+ var QualitySeveritySchema = import_zod38.z.enum(["info", "warning", "blocking"]);
7770
+ var ApplicationQualityIssueSchema = import_zod38.z.object({
7771
+ code: import_zod38.z.string(),
6898
7772
  severity: QualitySeveritySchema,
6899
- message: import_zod36.z.string(),
6900
- fieldId: import_zod36.z.string().optional()
7773
+ message: import_zod38.z.string(),
7774
+ fieldId: import_zod38.z.string().optional()
6901
7775
  });
6902
- var ApplicationQualityRoundSchema = import_zod36.z.object({
6903
- round: import_zod36.z.number(),
6904
- kind: import_zod36.z.string(),
7776
+ var ApplicationQualityRoundSchema = import_zod38.z.object({
7777
+ round: import_zod38.z.number(),
7778
+ kind: import_zod38.z.string(),
6905
7779
  status: QualityGateStatusSchema,
6906
- summary: import_zod36.z.string().optional()
7780
+ summary: import_zod38.z.string().optional()
6907
7781
  });
6908
- var ApplicationQualityArtifactSchema = import_zod36.z.object({
6909
- kind: import_zod36.z.string(),
6910
- label: import_zod36.z.string().optional(),
6911
- itemCount: import_zod36.z.number().optional()
7782
+ var ApplicationQualityArtifactSchema = import_zod38.z.object({
7783
+ kind: import_zod38.z.string(),
7784
+ label: import_zod38.z.string().optional(),
7785
+ itemCount: import_zod38.z.number().optional()
6912
7786
  });
6913
- var ApplicationEmailReviewSchema = import_zod36.z.object({
6914
- issues: import_zod36.z.array(ApplicationQualityIssueSchema),
7787
+ var ApplicationEmailReviewSchema = import_zod38.z.object({
7788
+ issues: import_zod38.z.array(ApplicationQualityIssueSchema),
6915
7789
  qualityGateStatus: QualityGateStatusSchema
6916
7790
  });
6917
- var ApplicationQualityReportSchema = import_zod36.z.object({
6918
- issues: import_zod36.z.array(ApplicationQualityIssueSchema),
6919
- rounds: import_zod36.z.array(ApplicationQualityRoundSchema).optional(),
6920
- artifacts: import_zod36.z.array(ApplicationQualityArtifactSchema).optional(),
7791
+ var ApplicationQualityReportSchema = import_zod38.z.object({
7792
+ issues: import_zod38.z.array(ApplicationQualityIssueSchema),
7793
+ rounds: import_zod38.z.array(ApplicationQualityRoundSchema).optional(),
7794
+ artifacts: import_zod38.z.array(ApplicationQualityArtifactSchema).optional(),
6921
7795
  emailReview: ApplicationEmailReviewSchema.optional(),
6922
7796
  qualityGateStatus: QualityGateStatusSchema
6923
7797
  });
6924
- var ApplicationStateSchema = import_zod36.z.object({
6925
- id: import_zod36.z.string(),
6926
- pdfBase64: import_zod36.z.string().optional().describe("Original PDF, omitted after extraction"),
6927
- title: import_zod36.z.string().optional(),
6928
- applicationType: import_zod36.z.string().nullable().optional(),
6929
- fields: import_zod36.z.array(ApplicationFieldSchema),
6930
- batches: import_zod36.z.array(import_zod36.z.array(import_zod36.z.string())).optional(),
6931
- currentBatchIndex: import_zod36.z.number().default(0),
7798
+ var ApplicationStateSchema = import_zod38.z.object({
7799
+ id: import_zod38.z.string(),
7800
+ pdfBase64: import_zod38.z.string().optional().describe("Original PDF, omitted after extraction"),
7801
+ title: import_zod38.z.string().optional(),
7802
+ applicationType: import_zod38.z.string().nullable().optional(),
7803
+ fields: import_zod38.z.array(ApplicationFieldSchema),
7804
+ batches: import_zod38.z.array(import_zod38.z.array(import_zod38.z.string())).optional(),
7805
+ currentBatchIndex: import_zod38.z.number().default(0),
6932
7806
  qualityReport: ApplicationQualityReportSchema.optional(),
6933
- status: import_zod36.z.enum(["classifying", "extracting", "auto_filling", "batching", "collecting", "confirming", "mapping", "complete"]),
6934
- createdAt: import_zod36.z.number(),
6935
- updatedAt: import_zod36.z.number()
7807
+ status: import_zod38.z.enum(["classifying", "extracting", "auto_filling", "batching", "collecting", "confirming", "mapping", "complete"]),
7808
+ createdAt: import_zod38.z.number(),
7809
+ updatedAt: import_zod38.z.number()
6936
7810
  });
6937
7811
 
6938
7812
  // src/application/agents/classifier.ts
@@ -7521,6 +8395,70 @@ function reviewBatchEmail(text, batchFields) {
7521
8395
  };
7522
8396
  }
7523
8397
 
8398
+ // src/application/workflow.ts
8399
+ var MAX_DOCUMENT_SEARCH_FIELDS = 5;
8400
+ var LOW_VALUE_FIELD_RATIO_LIMIT = 0.6;
8401
+ function planApplicationWorkflow(input) {
8402
+ const unfilledFields = input.fields.filter(isUnfilled);
8403
+ const documentSearchFields = planDocumentSearchFields(
8404
+ unfilledFields,
8405
+ input.hasDocumentStore && input.hasMemoryStore
8406
+ );
8407
+ return {
8408
+ runBackfill: input.hasBackfillProvider && unfilledFields.length > 0,
8409
+ runContextAutoFill: input.orgContextCount > 0 && unfilledFields.length > 0,
8410
+ documentSearchFields,
8411
+ runBatching: unfilledFields.length > 0,
8412
+ unfilledFields
8413
+ };
8414
+ }
8415
+ function planReplyActions(input) {
8416
+ const hasCurrentFields = input.currentBatchFields.length > 0;
8417
+ const nextBatchNeedsAnswers = (input.nextBatchFields ?? []).some(isUnfilled);
8418
+ const hasLookupRequests = (input.intent.lookupRequests?.length ?? 0) > 0;
8419
+ return {
8420
+ parseAnswers: input.intent.hasAnswers && hasCurrentFields,
8421
+ runLookup: hasLookupRequests && input.hasDocumentStore,
8422
+ answerQuestion: Boolean(input.intent.questionText) && (input.intent.primaryIntent === "question" || input.intent.primaryIntent === "mixed"),
8423
+ advanceBatch: hasCurrentFields && input.currentBatchFields.every((field) => !isUnfilled(field)),
8424
+ generateNextEmail: nextBatchNeedsAnswers
8425
+ };
8426
+ }
8427
+ function planDocumentSearchFields(unfilledFields, hasStores) {
8428
+ if (!hasStores || unfilledFields.length === 0) return [];
8429
+ const searchableFields = unfilledFields.filter(isHighValueLookupField);
8430
+ if (searchableFields.length === 0) return [];
8431
+ const lowValueRatio = 1 - searchableFields.length / unfilledFields.length;
8432
+ if (unfilledFields.length > MAX_DOCUMENT_SEARCH_FIELDS && lowValueRatio > LOW_VALUE_FIELD_RATIO_LIMIT) {
8433
+ return [];
8434
+ }
8435
+ return searchableFields.slice(0, MAX_DOCUMENT_SEARCH_FIELDS);
8436
+ }
8437
+ function isUnfilled(field) {
8438
+ return field.value === void 0 || field.value.trim() === "";
8439
+ }
8440
+ function isHighValueLookupField(field) {
8441
+ const text = `${field.section} ${field.label}`.toLowerCase();
8442
+ if (field.required) return true;
8443
+ return [
8444
+ "carrier",
8445
+ "policy",
8446
+ "premium",
8447
+ "limit",
8448
+ "deductible",
8449
+ "insured",
8450
+ "address",
8451
+ "revenue",
8452
+ "payroll",
8453
+ "effective",
8454
+ "expiration",
8455
+ "coverage",
8456
+ "class code",
8457
+ "fein",
8458
+ "entity"
8459
+ ].some((term) => text.includes(term));
8460
+ }
8461
+
7524
8462
  // src/application/coordinator.ts
7525
8463
  function createApplicationPipeline(config) {
7526
8464
  const {
@@ -7619,27 +8557,37 @@ function createApplicationPipeline(config) {
7619
8557
  state.updatedAt = Date.now();
7620
8558
  await applicationStore?.save(state);
7621
8559
  onProgress?.(`Auto-filling ${fields.length} fields...`);
7622
- const fillTasks = [];
7623
- if (backfillProvider) {
7624
- fillTasks.push(
7625
- (async () => {
7626
- try {
7627
- const priorAnswers = await backfillFromPriorAnswers(fields, backfillProvider);
7628
- for (const pa of priorAnswers) {
7629
- const field = state.fields.find((f) => f.id === pa.fieldId);
7630
- if (field && !field.value && pa.relevance > 0.8) {
7631
- field.value = pa.value;
7632
- field.source = `backfill: ${pa.source}`;
7633
- field.confidence = "high";
7634
- }
7635
- }
7636
- } catch (e) {
7637
- await log?.(`Backfill failed: ${e}`);
8560
+ let workflowPlan = planApplicationWorkflow({
8561
+ fields: state.fields,
8562
+ hasBackfillProvider: Boolean(backfillProvider),
8563
+ orgContextCount: orgContext.length,
8564
+ hasDocumentStore: Boolean(documentStore),
8565
+ hasMemoryStore: Boolean(memoryStore)
8566
+ });
8567
+ if (workflowPlan.runBackfill && backfillProvider) {
8568
+ try {
8569
+ const priorAnswers = await backfillFromPriorAnswers(state.fields, backfillProvider);
8570
+ for (const pa of priorAnswers) {
8571
+ const field = state.fields.find((f) => f.id === pa.fieldId);
8572
+ if (field && !field.value && pa.relevance > 0.8) {
8573
+ field.value = pa.value;
8574
+ field.source = `backfill: ${pa.source}`;
8575
+ field.confidence = "high";
7638
8576
  }
7639
- })()
7640
- );
8577
+ }
8578
+ } catch (e) {
8579
+ await log?.(`Backfill failed: ${e}`);
8580
+ }
7641
8581
  }
7642
- if (orgContext.length > 0) {
8582
+ workflowPlan = planApplicationWorkflow({
8583
+ fields: state.fields,
8584
+ hasBackfillProvider: false,
8585
+ orgContextCount: orgContext.length,
8586
+ hasDocumentStore: Boolean(documentStore),
8587
+ hasMemoryStore: Boolean(memoryStore)
8588
+ });
8589
+ const fillTasks = [];
8590
+ if (workflowPlan.runContextAutoFill) {
7643
8591
  fillTasks.push(
7644
8592
  limit(async () => {
7645
8593
  const unfilledFields2 = state.fields.filter((f) => !f.value);
@@ -7666,18 +8614,13 @@ function createApplicationPipeline(config) {
7666
8614
  })
7667
8615
  );
7668
8616
  }
7669
- if (documentStore && memoryStore) {
8617
+ if (workflowPlan.documentSearchFields.length > 0 && memoryStore) {
7670
8618
  fillTasks.push(
7671
8619
  (async () => {
7672
8620
  try {
7673
- const unfilledFields2 = state.fields.filter((f) => !f.value);
7674
- const searchPromises = unfilledFields2.slice(0, 10).map(
8621
+ const searchPromises = workflowPlan.documentSearchFields.map(
7675
8622
  (f) => limit(async () => {
7676
- const chunks = await memoryStore.search(f.label, { limit: 3 });
7677
- for (const chunk of chunks) {
7678
- if (!state.fields.find((sf) => sf.id === f.id)?.value) {
7679
- }
7680
- }
8623
+ await memoryStore.search(f.label, { limit: 3 });
7681
8624
  })
7682
8625
  );
7683
8626
  await Promise.all(searchPromises);
@@ -7690,8 +8633,15 @@ function createApplicationPipeline(config) {
7690
8633
  await Promise.all(fillTasks);
7691
8634
  state.updatedAt = Date.now();
7692
8635
  await applicationStore?.save(state);
7693
- const unfilledFields = state.fields.filter((f) => !f.value);
7694
- if (unfilledFields.length > 0) {
8636
+ workflowPlan = planApplicationWorkflow({
8637
+ fields: state.fields,
8638
+ hasBackfillProvider: false,
8639
+ orgContextCount: 0,
8640
+ hasDocumentStore: false,
8641
+ hasMemoryStore: false
8642
+ });
8643
+ const unfilledFields = workflowPlan.unfilledFields;
8644
+ if (workflowPlan.runBatching) {
7695
8645
  onProgress?.(`Batching ${unfilledFields.length} remaining questions...`);
7696
8646
  state.status = "batching";
7697
8647
  try {
@@ -7758,7 +8708,12 @@ function createApplicationPipeline(config) {
7758
8708
  }
7759
8709
  let fieldsFilled = 0;
7760
8710
  let responseText;
7761
- if (intent.hasAnswers) {
8711
+ let replyPlan = planReplyActions({
8712
+ intent,
8713
+ currentBatchFields,
8714
+ hasDocumentStore: Boolean(documentStore)
8715
+ });
8716
+ if (replyPlan.parseAnswers) {
7762
8717
  onProgress?.("Parsing answers...");
7763
8718
  try {
7764
8719
  const { result: parseResult, usage: parseUsage } = await parseAnswers(
@@ -7781,7 +8736,7 @@ function createApplicationPipeline(config) {
7781
8736
  await log?.(`Answer parsing failed: ${error instanceof Error ? error.message : String(error)}`);
7782
8737
  }
7783
8738
  }
7784
- if (intent.lookupRequests?.length) {
8739
+ if (replyPlan.runLookup && intent.lookupRequests?.length) {
7785
8740
  onProgress?.("Processing lookup requests...");
7786
8741
  let availableData = "";
7787
8742
  if (documentStore) {
@@ -7822,64 +8777,78 @@ function createApplicationPipeline(config) {
7822
8777
  }
7823
8778
  }
7824
8779
  }
7825
- if (intent.primaryIntent === "question" || intent.primaryIntent === "mixed") {
7826
- if (intent.questionText) {
7827
- try {
7828
- const { text, usage } = await generateText({
7829
- prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
8780
+ if (replyPlan.answerQuestion && intent.questionText) {
8781
+ try {
8782
+ const { text, usage } = await generateText({
8783
+ prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
7830
8784
 
7831
8785
  Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with the answer when you're ready and I'll fill it in."`,
7832
- maxTokens: 512,
7833
- providerOptions
7834
- });
7835
- trackUsage(usage);
7836
- responseText = text;
7837
- } catch (error) {
7838
- await log?.(`Question response generation failed: ${error instanceof Error ? error.message : String(error)}`);
7839
- responseText = `I wasn't able to generate an explanation for your question. Could you rephrase it, or just provide the answer directly?`;
7840
- }
8786
+ maxTokens: 512,
8787
+ providerOptions
8788
+ });
8789
+ trackUsage(usage);
8790
+ responseText = text;
8791
+ } catch (error) {
8792
+ await log?.(`Question response generation failed: ${error instanceof Error ? error.message : String(error)}`);
8793
+ responseText = `I wasn't able to generate an explanation for your question. Could you rephrase it, or just provide the answer directly?`;
7841
8794
  }
7842
8795
  }
7843
8796
  const currentBatchComplete = currentBatchFieldIds.every(
7844
8797
  (fid) => state.fields.find((f) => f.id === fid)?.value
7845
8798
  );
7846
- if (currentBatchComplete && state.batches) {
7847
- if (state.currentBatchIndex < state.batches.length - 1) {
7848
- state.currentBatchIndex++;
7849
- const nextBatchFieldIds = state.batches[state.currentBatchIndex];
7850
- const nextBatchFields = state.fields.filter(
7851
- (f) => nextBatchFieldIds.includes(f.id)
7852
- );
8799
+ let nextBatchIndex;
8800
+ let nextBatchFields;
8801
+ if (state.batches) {
8802
+ for (let index = state.currentBatchIndex + 1; index < state.batches.length; index++) {
8803
+ const candidateFields = state.fields.filter((f) => state.batches[index].includes(f.id));
8804
+ if (candidateFields.some((f) => !f.value)) {
8805
+ nextBatchIndex = index;
8806
+ nextBatchFields = candidateFields;
8807
+ break;
8808
+ }
8809
+ }
8810
+ }
8811
+ replyPlan = planReplyActions({
8812
+ intent,
8813
+ currentBatchFields,
8814
+ nextBatchFields,
8815
+ hasDocumentStore: Boolean(documentStore)
8816
+ });
8817
+ if (currentBatchComplete && replyPlan.advanceBatch && state.batches) {
8818
+ if (nextBatchIndex !== void 0 && nextBatchFields) {
8819
+ state.currentBatchIndex = nextBatchIndex;
7853
8820
  const filledCount = state.fields.filter((f) => f.value).length;
7854
- try {
7855
- const { text: emailText, usage: emailUsage } = await generateBatchEmail(
7856
- nextBatchFields,
7857
- state.currentBatchIndex,
7858
- state.batches.length,
7859
- {
7860
- appTitle: state.title,
7861
- totalFieldCount: state.fields.length,
7862
- filledFieldCount: filledCount,
7863
- companyName: context?.companyName
7864
- },
7865
- generateText,
7866
- providerOptions
7867
- );
7868
- trackUsage(emailUsage);
7869
- const emailReview = reviewBatchEmail(emailText, nextBatchFields);
7870
- state.qualityReport = {
7871
- ...buildApplicationQualityReport(state),
7872
- emailReview
7873
- };
7874
- if (!responseText) {
7875
- responseText = emailText;
7876
- } else {
7877
- responseText += `
8821
+ if (replyPlan.generateNextEmail) {
8822
+ try {
8823
+ const { text: emailText, usage: emailUsage } = await generateBatchEmail(
8824
+ nextBatchFields,
8825
+ state.currentBatchIndex,
8826
+ state.batches.length,
8827
+ {
8828
+ appTitle: state.title,
8829
+ totalFieldCount: state.fields.length,
8830
+ filledFieldCount: filledCount,
8831
+ companyName: context?.companyName
8832
+ },
8833
+ generateText,
8834
+ providerOptions
8835
+ );
8836
+ trackUsage(emailUsage);
8837
+ const emailReview = reviewBatchEmail(emailText, nextBatchFields);
8838
+ state.qualityReport = {
8839
+ ...buildApplicationQualityReport(state),
8840
+ emailReview
8841
+ };
8842
+ if (!responseText) {
8843
+ responseText = emailText;
8844
+ } else {
8845
+ responseText += `
7878
8846
 
7879
8847
  ${emailText}`;
8848
+ }
8849
+ } catch (error) {
8850
+ await log?.(`Batch email generation failed: ${error instanceof Error ? error.message : String(error)}`);
7880
8851
  }
7881
- } catch (error) {
7882
- await log?.(`Batch email generation failed: ${error instanceof Error ? error.message : String(error)}`);
7883
8852
  }
7884
8853
  } else {
7885
8854
  state.status = "confirming";
@@ -8038,7 +9007,7 @@ INSTRUCTIONS:
8038
9007
  - If the user's attachment already contains critical facts, still request chunk/document lookup when policy or quote details should be cross-checked against stored records
8039
9008
 
8040
9009
  CHUNK TYPES (for chunkTypes filter):
8041
- carrier_info, named_insured, coverage, endorsement, exclusion, condition, section, declaration, loss_history, premium, supplementary
9010
+ carrier_info, named_insured, coverage, covered_reason, definition, endorsement, exclusion, condition, section, declaration, loss_history, premium, supplementary
8042
9011
 
8043
9012
  Respond with the structured classification.`;
8044
9013
  }
@@ -8069,91 +9038,91 @@ Respond with the final answer, deduplicated citations array, overall confidence
8069
9038
  }
8070
9039
 
8071
9040
  // src/schemas/query.ts
8072
- var import_zod37 = require("zod");
8073
- var QueryIntentSchema = import_zod37.z.enum([
9041
+ var import_zod39 = require("zod");
9042
+ var QueryIntentSchema = import_zod39.z.enum([
8074
9043
  "policy_question",
8075
9044
  "coverage_comparison",
8076
9045
  "document_search",
8077
9046
  "claims_inquiry",
8078
9047
  "general_knowledge"
8079
9048
  ]);
8080
- var QueryAttachmentKindSchema = import_zod37.z.enum(["image", "pdf", "text"]);
8081
- var QueryAttachmentSchema = import_zod37.z.object({
8082
- id: import_zod37.z.string().optional().describe("Optional stable attachment ID from the caller"),
9049
+ var QueryAttachmentKindSchema = import_zod39.z.enum(["image", "pdf", "text"]);
9050
+ var QueryAttachmentSchema = import_zod39.z.object({
9051
+ id: import_zod39.z.string().optional().describe("Optional stable attachment ID from the caller"),
8083
9052
  kind: QueryAttachmentKindSchema,
8084
- name: import_zod37.z.string().optional().describe("Original filename or user-facing label"),
8085
- mimeType: import_zod37.z.string().optional().describe("MIME type such as image/jpeg or application/pdf"),
8086
- base64: import_zod37.z.string().optional().describe("Base64-encoded file content for image/pdf attachments"),
8087
- text: import_zod37.z.string().optional().describe("Plain-text attachment content when available"),
8088
- description: import_zod37.z.string().optional().describe("Caller-provided description of the attachment")
8089
- });
8090
- var SubQuestionSchema = import_zod37.z.object({
8091
- question: import_zod37.z.string().describe("Atomic sub-question to retrieve and answer independently"),
9053
+ name: import_zod39.z.string().optional().describe("Original filename or user-facing label"),
9054
+ mimeType: import_zod39.z.string().optional().describe("MIME type such as image/jpeg or application/pdf"),
9055
+ base64: import_zod39.z.string().optional().describe("Base64-encoded file content for image/pdf attachments"),
9056
+ text: import_zod39.z.string().optional().describe("Plain-text attachment content when available"),
9057
+ description: import_zod39.z.string().optional().describe("Caller-provided description of the attachment")
9058
+ });
9059
+ var SubQuestionSchema = import_zod39.z.object({
9060
+ question: import_zod39.z.string().describe("Atomic sub-question to retrieve and answer independently"),
8092
9061
  intent: QueryIntentSchema,
8093
- chunkTypes: import_zod37.z.array(import_zod37.z.string()).optional().describe("Chunk types to filter retrieval (e.g. coverage, endorsement, declaration)"),
8094
- documentFilters: import_zod37.z.object({
8095
- type: import_zod37.z.enum(["policy", "quote"]).optional(),
8096
- carrier: import_zod37.z.string().optional(),
8097
- insuredName: import_zod37.z.string().optional(),
8098
- policyNumber: import_zod37.z.string().optional(),
8099
- quoteNumber: import_zod37.z.string().optional(),
8100
- policyTypes: import_zod37.z.array(PolicyTypeSchema).optional().describe("Filter by policy type (e.g. homeowners_ho3, renters_ho4, pet) to avoid mixing up similar policies")
9062
+ chunkTypes: import_zod39.z.array(import_zod39.z.string()).optional().describe("Chunk types to filter retrieval (e.g. coverage, endorsement, declaration)"),
9063
+ documentFilters: import_zod39.z.object({
9064
+ type: import_zod39.z.enum(["policy", "quote"]).optional(),
9065
+ carrier: import_zod39.z.string().optional(),
9066
+ insuredName: import_zod39.z.string().optional(),
9067
+ policyNumber: import_zod39.z.string().optional(),
9068
+ quoteNumber: import_zod39.z.string().optional(),
9069
+ policyTypes: import_zod39.z.array(PolicyTypeSchema).optional().describe("Filter by policy type (e.g. homeowners_ho3, renters_ho4, pet) to avoid mixing up similar policies")
8101
9070
  }).optional().describe("Structured filters to narrow document lookup")
8102
9071
  });
8103
- var QueryClassifyResultSchema = import_zod37.z.object({
9072
+ var QueryClassifyResultSchema = import_zod39.z.object({
8104
9073
  intent: QueryIntentSchema,
8105
- subQuestions: import_zod37.z.array(SubQuestionSchema).min(1).describe("Decomposed atomic sub-questions"),
8106
- requiresDocumentLookup: import_zod37.z.boolean().describe("Whether structured document lookup is needed"),
8107
- requiresChunkSearch: import_zod37.z.boolean().describe("Whether semantic chunk search is needed"),
8108
- requiresConversationHistory: import_zod37.z.boolean().describe("Whether conversation history is relevant")
8109
- });
8110
- var EvidenceItemSchema = import_zod37.z.object({
8111
- source: import_zod37.z.enum(["chunk", "document", "conversation", "attachment"]),
8112
- chunkId: import_zod37.z.string().optional(),
8113
- documentId: import_zod37.z.string().optional(),
8114
- turnId: import_zod37.z.string().optional(),
8115
- attachmentId: import_zod37.z.string().optional(),
8116
- text: import_zod37.z.string().describe("Text excerpt from the source"),
8117
- relevance: import_zod37.z.number().min(0).max(1),
8118
- metadata: import_zod37.z.array(import_zod37.z.object({ key: import_zod37.z.string(), value: import_zod37.z.string() })).optional()
8119
- });
8120
- var AttachmentInterpretationSchema = import_zod37.z.object({
8121
- summary: import_zod37.z.string().describe("Concise summary of what the attachment shows or contains"),
8122
- extractedFacts: import_zod37.z.array(import_zod37.z.string()).describe("Specific observable or document facts grounded in the attachment"),
8123
- recommendedFocus: import_zod37.z.array(import_zod37.z.string()).describe("Important details to incorporate when answering follow-up questions"),
8124
- confidence: import_zod37.z.number().min(0).max(1)
8125
- });
8126
- var RetrievalResultSchema = import_zod37.z.object({
8127
- subQuestion: import_zod37.z.string(),
8128
- evidence: import_zod37.z.array(EvidenceItemSchema)
8129
- });
8130
- var CitationSchema = import_zod37.z.object({
8131
- index: import_zod37.z.number().describe("Citation number [1], [2], etc."),
8132
- chunkId: import_zod37.z.string().describe("Source chunk ID, e.g. doc-123:coverage:2"),
8133
- documentId: import_zod37.z.string(),
8134
- documentType: import_zod37.z.enum(["policy", "quote"]).optional(),
8135
- field: import_zod37.z.string().optional().describe("Specific field path, e.g. coverages[0].deductible"),
8136
- quote: import_zod37.z.string().describe("Exact text from source that supports the claim"),
8137
- relevance: import_zod37.z.number().min(0).max(1)
8138
- });
8139
- var SubAnswerSchema = import_zod37.z.object({
8140
- subQuestion: import_zod37.z.string(),
8141
- answer: import_zod37.z.string(),
8142
- citations: import_zod37.z.array(CitationSchema),
8143
- confidence: import_zod37.z.number().min(0).max(1),
8144
- needsMoreContext: import_zod37.z.boolean().describe("True if evidence was insufficient to answer fully")
8145
- });
8146
- var VerifyResultSchema = import_zod37.z.object({
8147
- approved: import_zod37.z.boolean().describe("Whether all sub-answers are adequately grounded"),
8148
- issues: import_zod37.z.array(import_zod37.z.string()).describe("Specific grounding or consistency issues found"),
8149
- retrySubQuestions: import_zod37.z.array(import_zod37.z.string()).optional().describe("Sub-questions that need additional retrieval or re-reasoning")
8150
- });
8151
- var QueryResultSchema = import_zod37.z.object({
8152
- answer: import_zod37.z.string(),
8153
- citations: import_zod37.z.array(CitationSchema),
9074
+ subQuestions: import_zod39.z.array(SubQuestionSchema).min(1).describe("Decomposed atomic sub-questions"),
9075
+ requiresDocumentLookup: import_zod39.z.boolean().describe("Whether structured document lookup is needed"),
9076
+ requiresChunkSearch: import_zod39.z.boolean().describe("Whether semantic chunk search is needed"),
9077
+ requiresConversationHistory: import_zod39.z.boolean().describe("Whether conversation history is relevant")
9078
+ });
9079
+ var EvidenceItemSchema = import_zod39.z.object({
9080
+ source: import_zod39.z.enum(["chunk", "document", "conversation", "attachment"]),
9081
+ chunkId: import_zod39.z.string().optional(),
9082
+ documentId: import_zod39.z.string().optional(),
9083
+ turnId: import_zod39.z.string().optional(),
9084
+ attachmentId: import_zod39.z.string().optional(),
9085
+ text: import_zod39.z.string().describe("Text excerpt from the source"),
9086
+ relevance: import_zod39.z.number().min(0).max(1),
9087
+ metadata: import_zod39.z.array(import_zod39.z.object({ key: import_zod39.z.string(), value: import_zod39.z.string() })).optional()
9088
+ });
9089
+ var AttachmentInterpretationSchema = import_zod39.z.object({
9090
+ summary: import_zod39.z.string().describe("Concise summary of what the attachment shows or contains"),
9091
+ extractedFacts: import_zod39.z.array(import_zod39.z.string()).describe("Specific observable or document facts grounded in the attachment"),
9092
+ recommendedFocus: import_zod39.z.array(import_zod39.z.string()).describe("Important details to incorporate when answering follow-up questions"),
9093
+ confidence: import_zod39.z.number().min(0).max(1)
9094
+ });
9095
+ var RetrievalResultSchema = import_zod39.z.object({
9096
+ subQuestion: import_zod39.z.string(),
9097
+ evidence: import_zod39.z.array(EvidenceItemSchema)
9098
+ });
9099
+ var CitationSchema = import_zod39.z.object({
9100
+ index: import_zod39.z.number().describe("Citation number [1], [2], etc."),
9101
+ chunkId: import_zod39.z.string().describe("Source chunk ID, e.g. doc-123:coverage:2"),
9102
+ documentId: import_zod39.z.string(),
9103
+ documentType: import_zod39.z.enum(["policy", "quote"]).optional(),
9104
+ field: import_zod39.z.string().optional().describe("Specific field path, e.g. coverages[0].deductible"),
9105
+ quote: import_zod39.z.string().describe("Exact text from source that supports the claim"),
9106
+ relevance: import_zod39.z.number().min(0).max(1)
9107
+ });
9108
+ var SubAnswerSchema = import_zod39.z.object({
9109
+ subQuestion: import_zod39.z.string(),
9110
+ answer: import_zod39.z.string(),
9111
+ citations: import_zod39.z.array(CitationSchema),
9112
+ confidence: import_zod39.z.number().min(0).max(1),
9113
+ needsMoreContext: import_zod39.z.boolean().describe("True if evidence was insufficient to answer fully")
9114
+ });
9115
+ var VerifyResultSchema = import_zod39.z.object({
9116
+ approved: import_zod39.z.boolean().describe("Whether all sub-answers are adequately grounded"),
9117
+ issues: import_zod39.z.array(import_zod39.z.string()).describe("Specific grounding or consistency issues found"),
9118
+ retrySubQuestions: import_zod39.z.array(import_zod39.z.string()).optional().describe("Sub-questions that need additional retrieval or re-reasoning")
9119
+ });
9120
+ var QueryResultSchema = import_zod39.z.object({
9121
+ answer: import_zod39.z.string(),
9122
+ citations: import_zod39.z.array(CitationSchema),
8154
9123
  intent: QueryIntentSchema,
8155
- confidence: import_zod37.z.number().min(0).max(1),
8156
- followUp: import_zod37.z.string().optional().describe("Suggested follow-up question if applicable")
9124
+ confidence: import_zod39.z.number().min(0).max(1),
9125
+ followUp: import_zod39.z.string().optional().describe("Suggested follow-up question if applicable")
8157
9126
  });
8158
9127
 
8159
9128
  // src/query/retriever.ts
@@ -8693,6 +9662,42 @@ ${item.text}`).join("\n\n");
8693
9662
  return { evidence, contextSummary };
8694
9663
  }
8695
9664
 
9665
+ // src/query/workflow.ts
9666
+ function shouldRetrieveForClassification(classification) {
9667
+ return classification.requiresDocumentLookup || classification.requiresChunkSearch;
9668
+ }
9669
+ function buildInitialQueryWorkflowPlan(params) {
9670
+ const { classification, attachmentEvidence } = params;
9671
+ const actions = [];
9672
+ const shouldRetrieve = shouldRetrieveForClassification(classification);
9673
+ if (shouldRetrieve) {
9674
+ actions.push({
9675
+ type: "retrieve",
9676
+ subQuestions: classification.subQuestions,
9677
+ reason: "classification requested document or chunk lookup"
9678
+ });
9679
+ }
9680
+ actions.push({
9681
+ type: "reason",
9682
+ subQuestions: classification.subQuestions,
9683
+ reason: shouldRetrieve ? "answer with retrieved evidence and any attachment evidence" : attachmentEvidence.length > 0 ? "answer with attachment evidence only" : "answer without document retrieval"
9684
+ });
9685
+ actions.push(
9686
+ {
9687
+ type: "verify",
9688
+ reason: "check grounding and request targeted retries when needed"
9689
+ },
9690
+ {
9691
+ type: "respond",
9692
+ reason: "compose final response"
9693
+ }
9694
+ );
9695
+ return { actions, shouldRetrieve };
9696
+ }
9697
+ function getWorkflowAction(plan, type) {
9698
+ return plan.actions.find((action) => action.type === type);
9699
+ }
9700
+
8696
9701
  // src/query/coordinator.ts
8697
9702
  function createQueryAgent(config) {
8698
9703
  const {
@@ -8737,29 +9742,37 @@ function createQueryAgent(config) {
8737
9742
  onProgress?.("Classifying query...");
8738
9743
  const classification = await classify(question, conversationId, attachmentContext);
8739
9744
  await pipelineCtx.save("classify", { classification, attachmentEvidence });
8740
- onProgress?.(`Retrieving evidence for ${classification.subQuestions.length} sub-question(s)...`);
8741
9745
  const retrieverConfig = {
8742
9746
  documentStore,
8743
9747
  memoryStore,
8744
9748
  retrievalLimit,
8745
9749
  log
8746
9750
  };
8747
- const retrievalResults = await Promise.all(
8748
- classification.subQuestions.map(
8749
- (sq) => limit(() => retrieve(sq, conversationId, retrieverConfig))
8750
- )
8751
- );
9751
+ const workflowPlan = buildInitialQueryWorkflowPlan({ classification, attachmentEvidence });
9752
+ const retrieveAction = getWorkflowAction(workflowPlan, "retrieve");
9753
+ const reasonAction = getWorkflowAction(workflowPlan, "reason");
9754
+ await pipelineCtx.save("workflow", { classification, attachmentEvidence, workflowPlan });
9755
+ const retrievalResults = retrieveAction ? await (async () => {
9756
+ onProgress?.(`Retrieving evidence for ${retrieveAction.subQuestions.length} sub-question(s)...`);
9757
+ return Promise.all(
9758
+ retrieveAction.subQuestions.map(
9759
+ (sq) => limit(() => retrieve(sq, conversationId, retrieverConfig))
9760
+ )
9761
+ );
9762
+ })() : [];
8752
9763
  const allEvidence = [...attachmentEvidence, ...retrievalResults.flatMap((r) => r.evidence)];
8753
9764
  await pipelineCtx.save("retrieve", { classification, attachmentEvidence, evidence: allEvidence });
8754
9765
  onProgress?.("Reasoning over evidence...");
8755
9766
  const reasonerConfig = { generateObject, providerOptions };
9767
+ const subQuestionsToReason = reasonAction?.subQuestions ?? classification.subQuestions;
8756
9768
  const reasonResults = await Promise.allSettled(
8757
- classification.subQuestions.map(
8758
- (sq, i) => limit(async () => {
9769
+ subQuestionsToReason.map(
9770
+ (sq) => limit(async () => {
9771
+ const retrievedEvidence = retrievalResults.find((r) => r.subQuestion === sq.question)?.evidence ?? [];
8759
9772
  const { subAnswer, usage } = await reason(
8760
9773
  sq.question,
8761
9774
  sq.intent,
8762
- [...attachmentEvidence, ...retrievalResults[i].evidence],
9775
+ [...attachmentEvidence, ...retrievedEvidence],
8763
9776
  reasonerConfig
8764
9777
  );
8765
9778
  trackUsage(usage);
@@ -8773,9 +9786,9 @@ function createQueryAgent(config) {
8773
9786
  if (result.status === "fulfilled") {
8774
9787
  subAnswers.push(result.value);
8775
9788
  } else {
8776
- await log?.(`Reasoner failed for sub-question "${classification.subQuestions[i].question}": ${result.reason}`);
9789
+ await log?.(`Reasoner failed for sub-question "${subQuestionsToReason[i].question}": ${result.reason}`);
8777
9790
  subAnswers.push({
8778
- subQuestion: classification.subQuestions[i].question,
9791
+ subQuestion: subQuestionsToReason[i].question,
8779
9792
  answer: "Unable to answer this part of the question due to a processing error.",
8780
9793
  citations: [],
8781
9794
  confidence: 0,
@@ -9159,6 +10172,7 @@ var AGENT_TOOLS = [
9159
10172
  CoverageSchema,
9160
10173
  CoverageTriggerSchema,
9161
10174
  CoverageValueTypeSchema,
10175
+ CoveredReasonSchema,
9162
10176
  CrimeDeclarationsSchema,
9163
10177
  CyberDeclarationsSchema,
9164
10178
  DEDUCTIBLE_TYPES,
@@ -9171,6 +10185,7 @@ var AGENT_TOOLS = [
9171
10185
  DeductibleScheduleSchema,
9172
10186
  DeductibleTypeSchema,
9173
10187
  DefenseCostTreatmentSchema,
10188
+ DefinitionSchema,
9174
10189
  DocumentTypeSchema,
9175
10190
  DriverRecordSchema,
9176
10191
  DwellingDetailsSchema,