@claritylabs/cl-sdk 0.16.2 → 0.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -169,7 +169,14 @@ function createPipelineContext(opts) {
169
169
  let latest = opts.resumeFrom;
170
170
  const completedPhases = /* @__PURE__ */ new Set();
171
171
  if (opts.resumeFrom) {
172
- completedPhases.add(opts.resumeFrom.phase);
172
+ const phaseIndex = opts.phaseOrder?.indexOf(opts.resumeFrom.phase) ?? -1;
173
+ if (phaseIndex >= 0 && opts.phaseOrder) {
174
+ for (const phase of opts.phaseOrder.slice(0, phaseIndex + 1)) {
175
+ completedPhases.add(phase);
176
+ }
177
+ } else {
178
+ completedPhases.add(opts.resumeFrom.phase);
179
+ }
173
180
  }
174
181
  return {
175
182
  id: opts.id,
@@ -1226,6 +1233,29 @@ var AuxiliaryFactSchema = z16.object({
1226
1233
  subject: z16.string().optional(),
1227
1234
  context: z16.string().optional()
1228
1235
  });
1236
+ var DefinitionSchema = z16.object({
1237
+ term: z16.string(),
1238
+ definition: z16.string(),
1239
+ pageNumber: z16.number().optional(),
1240
+ formNumber: z16.string().optional(),
1241
+ formTitle: z16.string().optional(),
1242
+ sectionRef: z16.string().optional(),
1243
+ originalContent: z16.string().optional()
1244
+ });
1245
+ var CoveredReasonSchema = z16.object({
1246
+ coverageName: z16.string(),
1247
+ reasonNumber: z16.string().optional(),
1248
+ title: z16.string().optional(),
1249
+ content: z16.string(),
1250
+ conditions: z16.array(z16.string()).optional(),
1251
+ exceptions: z16.array(z16.string()).optional(),
1252
+ appliesTo: z16.array(z16.string()).optional(),
1253
+ pageNumber: z16.number().optional(),
1254
+ formNumber: z16.string().optional(),
1255
+ formTitle: z16.string().optional(),
1256
+ sectionRef: z16.string().optional(),
1257
+ originalContent: z16.string().optional()
1258
+ });
1229
1259
  var BaseDocumentFields = {
1230
1260
  id: z16.string(),
1231
1261
  carrier: z16.string(),
@@ -1236,6 +1266,8 @@ var BaseDocumentFields = {
1236
1266
  policyTypes: z16.array(z16.string()).optional(),
1237
1267
  coverages: z16.array(CoverageSchema),
1238
1268
  sections: z16.array(SectionSchema).optional(),
1269
+ definitions: z16.array(DefinitionSchema).optional(),
1270
+ coveredReasons: z16.array(CoveredReasonSchema).optional(),
1239
1271
  // Enriched fields (v1.2+)
1240
1272
  carrierLegalName: z16.string().optional(),
1241
1273
  carrierNaicNumber: z16.string().optional(),
@@ -1683,33 +1715,102 @@ async function runExtractor(params) {
1683
1715
  };
1684
1716
  }
1685
1717
 
1718
+ // src/extraction/memory.ts
1719
+ function isMemoryRecord(value) {
1720
+ return typeof value === "object" && value !== null && !Array.isArray(value);
1721
+ }
1722
+ function readMemoryRecord(memory, key) {
1723
+ const value = memory.get(key);
1724
+ return isMemoryRecord(value) ? value : void 0;
1725
+ }
1726
+ function readRecordValue(record, key) {
1727
+ return record?.[key];
1728
+ }
1729
+ function readRecordArray(record, key) {
1730
+ const value = readRecordValue(record, key);
1731
+ return Array.isArray(value) ? value : void 0;
1732
+ }
1733
+ function getCarrierInfo(memory) {
1734
+ return readMemoryRecord(memory, "carrier_info");
1735
+ }
1736
+ function getNamedInsured(memory) {
1737
+ return readMemoryRecord(memory, "named_insured");
1738
+ }
1739
+ function getCoverageLimits(memory) {
1740
+ return readMemoryRecord(memory, "coverage_limits");
1741
+ }
1742
+ function getCoverageLimitCoverages(memory) {
1743
+ return readRecordArray(getCoverageLimits(memory), "coverages") ?? [];
1744
+ }
1745
+ function getSectionsPayload(memory) {
1746
+ return readMemoryRecord(memory, "sections");
1747
+ }
1748
+ function getSections(memory) {
1749
+ return readRecordArray(getSectionsPayload(memory), "sections");
1750
+ }
1751
+ function getDefinitionsPayload(memory) {
1752
+ return readMemoryRecord(memory, "definitions");
1753
+ }
1754
+ function getDefinitions(memory) {
1755
+ return readRecordArray(getDefinitionsPayload(memory), "definitions");
1756
+ }
1757
+ function getCoveredReasonsPayload(memory) {
1758
+ return readMemoryRecord(memory, "covered_reasons");
1759
+ }
1760
+ function getCoveredReasons(memory) {
1761
+ const payload = getCoveredReasonsPayload(memory);
1762
+ return readRecordArray(payload, "coveredReasons") ?? readRecordArray(payload, "covered_reasons");
1763
+ }
1764
+
1686
1765
  // src/extraction/promote.ts
1687
1766
  function getDeclarationFields(doc) {
1688
1767
  const decl = doc.declarations;
1689
1768
  return Array.isArray(decl?.fields) ? decl.fields : [];
1690
1769
  }
1691
1770
  function fieldMatches(fieldName, patterns) {
1692
- const lower = fieldName.toLowerCase().replace(/[\s_-]/g, "");
1693
- return patterns.some((p) => lower === p.toLowerCase().replace(/[\s_-]/g, ""));
1771
+ const lower = normalizeFieldName(fieldName);
1772
+ return patterns.some((p) => lower === normalizeFieldName(p));
1773
+ }
1774
+ function normalizeFieldName(fieldName) {
1775
+ return fieldName.toLowerCase().replace(/[^a-z0-9]/g, "");
1694
1776
  }
1695
- function findFieldValue(fields, patterns) {
1696
- const match = fields.find((f) => fieldMatches(f.field, patterns));
1777
+ function findFieldValue(fields, patterns, reject) {
1778
+ const match = fields.find((f) => fieldMatches(f.field, patterns) && !reject?.(f));
1697
1779
  return match?.value;
1698
1780
  }
1699
- function promoteCarrierFields(doc) {
1700
- const raw = doc;
1701
- if (!raw.carrierNaicNumber && raw.naicNumber) {
1702
- raw.carrierNaicNumber = raw.naicNumber;
1703
- }
1704
- if (!raw.carrierAmBestRating && raw.amBestRating) {
1705
- raw.carrierAmBestRating = raw.amBestRating;
1781
+ function stringValue(value) {
1782
+ return typeof value === "string" && value.trim() ? value : void 0;
1783
+ }
1784
+ function findRawString(raw, keys) {
1785
+ for (const key of keys) {
1786
+ const value = stringValue(raw[key]);
1787
+ if (value) return value;
1706
1788
  }
1707
- if (!raw.carrierAdmittedStatus && raw.admittedStatus) {
1708
- raw.carrierAdmittedStatus = raw.admittedStatus;
1789
+ return void 0;
1790
+ }
1791
+ function promoteRawFields(raw, mappings) {
1792
+ for (const { from, to } of mappings) {
1793
+ if (!raw[to] && raw[from]) {
1794
+ raw[to] = raw[from];
1795
+ }
1796
+ delete raw[from];
1709
1797
  }
1710
- delete raw.naicNumber;
1711
- delete raw.amBestRating;
1712
- delete raw.admittedStatus;
1798
+ }
1799
+ function findRawOrDeclarationValue(raw, fields, lookup) {
1800
+ return (lookup.rawKey ? raw[lookup.rawKey] : void 0) || findFieldValue(fields, lookup.patterns, lookup.reject);
1801
+ }
1802
+ function promoteRawOrDeclarationString(raw, fields, targetKey, rawKeys, lookup) {
1803
+ if (raw[targetKey]) return;
1804
+ const value = findRawString(raw, rawKeys) ?? findFieldValue(fields, lookup.patterns, lookup.reject);
1805
+ if (value) raw[targetKey] = value;
1806
+ }
1807
+ function promoteCarrierFields(doc) {
1808
+ const raw = doc;
1809
+ promoteRawFields(raw, [
1810
+ { from: "naicNumber", to: "carrierNaicNumber" },
1811
+ { from: "amBestRating", to: "carrierAmBestRating" },
1812
+ { from: "admittedStatus", to: "carrierAdmittedStatus" }
1813
+ ]);
1713
1814
  if (!raw.insurer && raw.carrierLegalName) {
1714
1815
  raw.insurer = {
1715
1816
  legalName: raw.carrierLegalName,
@@ -1750,12 +1851,21 @@ var BROKER_ADDRESS_PATTERNS = ["brokerAddress", "agentAddress", "producerAddress
1750
1851
  function promoteBroker(doc) {
1751
1852
  const raw = doc;
1752
1853
  const fields = getDeclarationFields(doc);
1753
- const brokerAgency = raw.brokerAgency || findFieldValue(fields, BROKER_NAME_PATTERNS);
1754
- const brokerContact = raw.brokerContactName || findFieldValue(fields, BROKER_CONTACT_PATTERNS);
1755
- const brokerLicense = raw.brokerLicenseNumber || findFieldValue(fields, BROKER_LICENSE_PATTERNS);
1756
- const brokerPhone = findFieldValue(fields, BROKER_PHONE_PATTERNS);
1757
- const brokerEmail = findFieldValue(fields, BROKER_EMAIL_PATTERNS);
1758
- const brokerAddress = findFieldValue(fields, BROKER_ADDRESS_PATTERNS);
1854
+ const brokerAgency = findRawOrDeclarationValue(raw, fields, {
1855
+ rawKey: "brokerAgency",
1856
+ patterns: BROKER_NAME_PATTERNS
1857
+ });
1858
+ const brokerContact = findRawOrDeclarationValue(raw, fields, {
1859
+ rawKey: "brokerContactName",
1860
+ patterns: BROKER_CONTACT_PATTERNS
1861
+ });
1862
+ const brokerLicense = findRawOrDeclarationValue(raw, fields, {
1863
+ rawKey: "brokerLicenseNumber",
1864
+ patterns: BROKER_LICENSE_PATTERNS
1865
+ });
1866
+ const brokerPhone = findRawOrDeclarationValue(raw, fields, { patterns: BROKER_PHONE_PATTERNS });
1867
+ const brokerEmail = findRawOrDeclarationValue(raw, fields, { patterns: BROKER_EMAIL_PATTERNS });
1868
+ const brokerAddress = findRawOrDeclarationValue(raw, fields, { patterns: BROKER_ADDRESS_PATTERNS });
1759
1869
  if (brokerAgency) raw.brokerAgency = brokerAgency;
1760
1870
  if (brokerContact) raw.brokerContactName = brokerContact;
1761
1871
  if (brokerLicense) raw.brokerLicenseNumber = brokerLicense;
@@ -2011,25 +2121,171 @@ function synthesizeDeductibles(doc) {
2011
2121
  raw.deductibles = deductibles;
2012
2122
  }
2013
2123
  }
2014
- var PREMIUM_PATTERNS = ["premium", "totalPremium", "annualPremium", "policyPremium", "basePremium"];
2015
- var TOTAL_COST_PATTERNS = ["totalCost", "totalDue", "totalAmount", "totalPolicyPremium"];
2124
+ var PREMIUM_PATTERNS = [
2125
+ "premium",
2126
+ "premiumAmount",
2127
+ "premium amount",
2128
+ "totalPremium",
2129
+ "total premium",
2130
+ "totalPolicyPremium",
2131
+ "total policy premium",
2132
+ "annualPremium",
2133
+ "annual premium",
2134
+ "estimatedAnnualPremium",
2135
+ "estimated annual premium",
2136
+ "policyPremium",
2137
+ "policy premium",
2138
+ "basePremium",
2139
+ "base premium",
2140
+ "planCost",
2141
+ "plan cost",
2142
+ "policyCost",
2143
+ "policy cost",
2144
+ "premiumSubtotal",
2145
+ "premium subtotal",
2146
+ "subtotalPremium",
2147
+ "subtotal premium",
2148
+ "quotedPremium",
2149
+ "quoted premium"
2150
+ ];
2151
+ var TOTAL_COST_PATTERNS = [
2152
+ "totalCost",
2153
+ "total cost",
2154
+ "total",
2155
+ "totalDue",
2156
+ "total due",
2157
+ "amountPaid",
2158
+ "amount paid",
2159
+ "totalPaid",
2160
+ "total paid",
2161
+ "totalPrice",
2162
+ "total price",
2163
+ "totalTripCost",
2164
+ "total trip cost",
2165
+ "amountCharged",
2166
+ "amount charged",
2167
+ "amountDue",
2168
+ "amount due",
2169
+ "totalAmountDue",
2170
+ "total amount due",
2171
+ "totalAmount",
2172
+ "total amount",
2173
+ "grandTotal",
2174
+ "grand total",
2175
+ "totalPayable",
2176
+ "total payable",
2177
+ "totalCharges",
2178
+ "total charges",
2179
+ "totalPolicyCost",
2180
+ "total policy cost"
2181
+ ];
2182
+ var PREMIUM_RAW_KEYS = [
2183
+ "premium",
2184
+ "premiumAmount",
2185
+ "premium_amount",
2186
+ "totalPremium",
2187
+ "totalPolicyPremium",
2188
+ "annualPremium",
2189
+ "estimatedAnnualPremium",
2190
+ "policyPremium",
2191
+ "basePremium",
2192
+ "planCost",
2193
+ "policyCost",
2194
+ "premiumSubtotal",
2195
+ "subtotalPremium",
2196
+ "quotedPremium"
2197
+ ];
2198
+ var TOTAL_COST_RAW_KEYS = [
2199
+ "totalCost",
2200
+ "total_cost",
2201
+ "total",
2202
+ "totalDue",
2203
+ "amountPaid",
2204
+ "amount_paid",
2205
+ "totalPaid",
2206
+ "total_paid",
2207
+ "totalPrice",
2208
+ "totalTripCost",
2209
+ "amountCharged",
2210
+ "amountDue",
2211
+ "totalAmountDue",
2212
+ "totalAmount",
2213
+ "grandTotal",
2214
+ "totalPayable",
2215
+ "totalCharges",
2216
+ "totalPolicyCost"
2217
+ ];
2218
+ function isTaxOrFeeField(fieldName) {
2219
+ const normalized = normalizeFieldName(fieldName);
2220
+ return /tax|gst|hst|pst|qst|fee|surcharge|assessment|stamp|filing|inspection/.test(normalized);
2221
+ }
2222
+ function isTotalCostField(fieldName) {
2223
+ return fieldMatches(fieldName, TOTAL_COST_PATTERNS);
2224
+ }
2225
+ function taxFeeType(fieldName) {
2226
+ const normalized = normalizeFieldName(fieldName);
2227
+ if (normalized.includes("tax") || ["gst", "hst", "pst", "qst"].some((token) => normalized.includes(token))) return "tax";
2228
+ if (normalized.includes("surcharge")) return "surcharge";
2229
+ if (normalized.includes("assessment")) return "assessment";
2230
+ if (normalized.includes("fee") || normalized.includes("stamp") || normalized.includes("filing")) return "fee";
2231
+ return void 0;
2232
+ }
2233
+ function titleizeFieldName(fieldName) {
2234
+ const spaced = fieldName.replace(/([a-z0-9])([A-Z])/g, "$1 $2").replace(/[_-]+/g, " ").replace(/\s+/g, " ").trim();
2235
+ return spaced.replace(/\b\w/g, (letter) => letter.toUpperCase());
2236
+ }
2237
+ function taxFeeKey(item) {
2238
+ return [
2239
+ normalizeFieldName(item.name),
2240
+ normalizeFieldName(item.amount),
2241
+ item.type ?? ""
2242
+ ].join("|");
2243
+ }
2244
+ function taxFeeItemFromField(field) {
2245
+ const type = taxFeeType(field.field);
2246
+ return {
2247
+ name: titleizeFieldName(field.field),
2248
+ amount: absorbNegative(field.value),
2249
+ ...type ? { type } : {}
2250
+ };
2251
+ }
2016
2252
  function absorbNegative(value) {
2017
2253
  return value.replace(/^-\s*/, "").replace(/^\(\s*(.*?)\s*\)$/, "$1");
2018
2254
  }
2019
2255
  function promotePremium(doc) {
2020
2256
  const raw = doc;
2021
2257
  const fields = getDeclarationFields(doc);
2022
- if (!raw.premium) {
2023
- const premium = findFieldValue(fields, PREMIUM_PATTERNS);
2024
- if (premium) raw.premium = premium;
2025
- }
2026
- if (!raw.totalCost) {
2027
- const totalCost = findFieldValue(fields, TOTAL_COST_PATTERNS);
2028
- if (totalCost) raw.totalCost = totalCost;
2029
- }
2258
+ promoteRawOrDeclarationString(raw, fields, "premium", PREMIUM_RAW_KEYS, {
2259
+ patterns: PREMIUM_PATTERNS,
2260
+ reject: (field) => isTaxOrFeeField(field.field)
2261
+ });
2262
+ promoteRawOrDeclarationString(raw, fields, "totalCost", TOTAL_COST_RAW_KEYS, {
2263
+ patterns: TOTAL_COST_PATTERNS
2264
+ });
2030
2265
  if (typeof raw.premium === "string") raw.premium = absorbNegative(raw.premium);
2031
2266
  if (typeof raw.totalCost === "string") raw.totalCost = absorbNegative(raw.totalCost);
2032
2267
  }
2268
+ function synthesizeTaxesAndFees(doc) {
2269
+ const raw = doc;
2270
+ const fields = getDeclarationFields(doc);
2271
+ if (fields.length === 0) return;
2272
+ const existing = Array.isArray(raw.taxesAndFees) ? raw.taxesAndFees : [];
2273
+ const byKey = /* @__PURE__ */ new Map();
2274
+ for (const item of existing) {
2275
+ if (!item?.name || !item?.amount) continue;
2276
+ byKey.set(taxFeeKey(item), item);
2277
+ }
2278
+ for (const field of fields) {
2279
+ if (!field.value?.trim()) continue;
2280
+ if (!isTaxOrFeeField(field.field)) continue;
2281
+ if (isTotalCostField(field.field)) continue;
2282
+ const item = taxFeeItemFromField(field);
2283
+ byKey.set(taxFeeKey(item), item);
2284
+ }
2285
+ if (byKey.size > 0) {
2286
+ raw.taxesAndFees = [...byKey.values()];
2287
+ }
2288
+ }
2033
2289
  function promoteExtractedFields(doc) {
2034
2290
  promoteCarrierFields(doc);
2035
2291
  promoteBroker(doc);
@@ -2037,44 +2293,53 @@ function promoteExtractedFields(doc) {
2037
2293
  promoteLocations(doc);
2038
2294
  synthesizeLimits(doc);
2039
2295
  synthesizeDeductibles(doc);
2296
+ synthesizeTaxesAndFees(doc);
2040
2297
  promotePremium(doc);
2041
2298
  }
2042
2299
 
2043
2300
  // src/extraction/assembler.ts
2044
2301
  function assembleDocument(documentId, documentType, memory) {
2045
- const carrier = memory.get("carrier_info");
2046
- const insured = memory.get("named_insured");
2047
- const coverages = memory.get("coverage_limits");
2048
- const endorsements = memory.get("endorsements");
2049
- const exclusions = memory.get("exclusions");
2050
- const conditions = memory.get("conditions");
2051
- const premium = memory.get("premium_breakdown");
2052
- const declarations = memory.get("declarations");
2053
- const lossHistory = memory.get("loss_history");
2054
- const sections = memory.get("sections");
2055
- const supplementary = memory.get("supplementary");
2056
- const formInventory = memory.get("form_inventory");
2057
- const classify = memory.get("classify");
2302
+ const carrier = getCarrierInfo(memory);
2303
+ const insured = getNamedInsured(memory);
2304
+ const coverages = getCoverageLimits(memory);
2305
+ const endorsements = readMemoryRecord(memory, "endorsements");
2306
+ const exclusions = readMemoryRecord(memory, "exclusions");
2307
+ const conditions = readMemoryRecord(memory, "conditions");
2308
+ const premium = readMemoryRecord(memory, "premium_breakdown");
2309
+ const declarations = readMemoryRecord(memory, "declarations");
2310
+ const lossHistory = readMemoryRecord(memory, "loss_history");
2311
+ const supplementary = readMemoryRecord(memory, "supplementary");
2312
+ const formInventory = readMemoryRecord(memory, "form_inventory");
2313
+ const classify = readMemoryRecord(memory, "classify");
2314
+ const lossPayees = readRecordArray(insured, "lossPayees");
2315
+ const mortgageHolders = readRecordArray(insured, "mortgageHolders");
2058
2316
  const base = {
2059
2317
  id: documentId,
2060
- carrier: carrier?.carrierName ?? "Unknown",
2061
- insuredName: insured?.insuredName ?? "Unknown",
2062
- coverages: coverages?.coverages ?? [],
2063
- policyTypes: classify?.policyTypes,
2318
+ carrier: readRecordValue(carrier, "carrierName") ?? "Unknown",
2319
+ insuredName: readRecordValue(insured, "insuredName") ?? "Unknown",
2320
+ coverages: getCoverageLimitCoverages(memory),
2321
+ policyTypes: readRecordValue(classify, "policyTypes"),
2064
2322
  ...sanitizeNulls(carrier ?? {}),
2065
2323
  ...sanitizeNulls(insured ?? {}),
2066
2324
  // Map named_insured extractor's loss payees/mortgage holders to EndorsementParty shape
2067
- ...Array.isArray(insured?.lossPayees) && insured.lossPayees.length > 0 ? { lossPayees: insured.lossPayees.map((lp) => ({ ...lp, role: "loss_payee" })) } : {},
2068
- ...Array.isArray(insured?.mortgageHolders) && insured.mortgageHolders.length > 0 ? { mortgageHolders: insured.mortgageHolders.map((mh) => ({ ...mh, role: "mortgage_holder" })) } : {},
2325
+ ...lossPayees && lossPayees.length > 0 ? { lossPayees: lossPayees.map((lp) => ({ ...lp, role: "loss_payee" })) } : {},
2326
+ ...mortgageHolders && mortgageHolders.length > 0 ? {
2327
+ mortgageHolders: mortgageHolders.map((mh) => ({
2328
+ ...mh,
2329
+ role: "mortgage_holder"
2330
+ }))
2331
+ } : {},
2069
2332
  ...sanitizeNulls(coverages ?? {}),
2070
2333
  ...sanitizeNulls(premium ?? {}),
2071
2334
  ...sanitizeNulls(supplementary ?? {}),
2072
- supplementaryFacts: supplementary?.auxiliaryFacts,
2073
- endorsements: endorsements?.endorsements,
2074
- exclusions: exclusions?.exclusions,
2075
- conditions: conditions?.conditions,
2076
- sections: sections?.sections,
2077
- formInventory: formInventory?.forms,
2335
+ supplementaryFacts: readRecordValue(supplementary, "auxiliaryFacts"),
2336
+ endorsements: readRecordValue(endorsements, "endorsements"),
2337
+ exclusions: readRecordValue(exclusions, "exclusions"),
2338
+ conditions: readRecordValue(conditions, "conditions"),
2339
+ sections: getSections(memory),
2340
+ formInventory: readRecordValue(formInventory, "forms"),
2341
+ definitions: getDefinitions(memory),
2342
+ coveredReasons: getCoveredReasons(memory),
2078
2343
  declarations: declarations ? sanitizeNulls(declarations) : void 0,
2079
2344
  ...sanitizeNulls(lossHistory ?? {})
2080
2345
  };
@@ -2083,21 +2348,21 @@ function assembleDocument(documentId, documentType, memory) {
2083
2348
  doc = {
2084
2349
  ...base,
2085
2350
  type: "policy",
2086
- policyNumber: carrier?.policyNumber ?? insured?.policyNumber ?? "Unknown",
2087
- effectiveDate: carrier?.effectiveDate ?? insured?.effectiveDate ?? "Unknown",
2088
- expirationDate: carrier?.expirationDate,
2089
- policyTermType: carrier?.policyTermType
2351
+ policyNumber: readRecordValue(carrier, "policyNumber") ?? readRecordValue(insured, "policyNumber") ?? "Unknown",
2352
+ effectiveDate: readRecordValue(carrier, "effectiveDate") ?? readRecordValue(insured, "effectiveDate") ?? "Unknown",
2353
+ expirationDate: readRecordValue(carrier, "expirationDate"),
2354
+ policyTermType: readRecordValue(carrier, "policyTermType")
2090
2355
  };
2091
2356
  } else {
2092
2357
  doc = {
2093
2358
  ...base,
2094
2359
  type: "quote",
2095
- quoteNumber: carrier?.quoteNumber ?? "Unknown",
2096
- proposedEffectiveDate: carrier?.proposedEffectiveDate,
2097
- proposedExpirationDate: carrier?.proposedExpirationDate,
2098
- subjectivities: coverages?.subjectivities,
2099
- underwritingConditions: coverages?.underwritingConditions,
2100
- premiumBreakdown: premium?.premiumBreakdown
2360
+ quoteNumber: readRecordValue(carrier, "quoteNumber") ?? "Unknown",
2361
+ proposedEffectiveDate: readRecordValue(carrier, "proposedEffectiveDate"),
2362
+ proposedExpirationDate: readRecordValue(carrier, "proposedExpirationDate"),
2363
+ subjectivities: readRecordValue(coverages, "subjectivities"),
2364
+ underwritingConditions: readRecordValue(coverages, "underwritingConditions"),
2365
+ premiumBreakdown: readRecordValue(premium, "premiumBreakdown")
2101
2366
  };
2102
2367
  }
2103
2368
  promoteExtractedFields(doc);
@@ -2199,6 +2464,23 @@ ${block}`;
2199
2464
  }
2200
2465
 
2201
2466
  // src/extraction/formatter.ts
2467
+ var LONG_CONTENT_THRESHOLD = 1200;
2468
+ function shouldFormatContent(text) {
2469
+ const trimmed = text.trim();
2470
+ if (trimmed.length === 0) return false;
2471
+ if (trimmed.length >= LONG_CONTENT_THRESHOLD) return true;
2472
+ if (/```|~~~|<br\s*\/?>/i.test(trimmed)) return true;
2473
+ if (/(^|\s)(\*\*|__|`)/.test(trimmed)) return true;
2474
+ if (/!?\[[^\]]+\]\([^)]+\)/.test(trimmed)) return true;
2475
+ if (/^\s{0,3}#{1,6}\s*\S/m.test(trimmed)) return true;
2476
+ if (/^\s{0,6}(?:[-*+]|\d+[.)])\s+\S/m.test(trimmed)) return true;
2477
+ if (/\t|[^\S\r\n]{3,}|\n{3,}|[ \t]+$/m.test(text)) return true;
2478
+ const lines = trimmed.split(/\r?\n/).map((line) => line.trim()).filter(Boolean);
2479
+ if (lines.some((line) => (line.match(/\|/g)?.length ?? 0) >= 2)) return true;
2480
+ const spaceAlignedRows = lines.filter((line) => /\S\s{2,}\S\s{2,}\S/.test(line));
2481
+ if (spaceAlignedRows.length >= 2) return true;
2482
+ return false;
2483
+ }
2202
2484
  function collectContentFields(doc) {
2203
2485
  const entries = [];
2204
2486
  let id = 0;
@@ -2234,6 +2516,21 @@ function collectContentFields(doc) {
2234
2516
  add(`conditions[${i}].content`, doc.conditions[i].content);
2235
2517
  }
2236
2518
  }
2519
+ const extendedDoc = doc;
2520
+ if (extendedDoc.definitions) {
2521
+ for (let i = 0; i < extendedDoc.definitions.length; i++) {
2522
+ add(`definitions[${i}].definition`, extendedDoc.definitions[i].definition);
2523
+ }
2524
+ }
2525
+ const coveredReasons = extendedDoc.coveredReasons ?? extendedDoc.covered_reasons;
2526
+ if (coveredReasons) {
2527
+ for (let i = 0; i < coveredReasons.length; i++) {
2528
+ add(`coveredReasons[${i}].content`, coveredReasons[i].content);
2529
+ coveredReasons[i].conditions?.forEach((condition, j) => {
2530
+ add(`coveredReasons[${i}].conditions[${j}]`, condition);
2531
+ });
2532
+ }
2533
+ }
2237
2534
  return entries;
2238
2535
  }
2239
2536
  function parseFormatResponse(response) {
@@ -2249,6 +2546,10 @@ function parseFormatResponse(response) {
2249
2546
  return results;
2250
2547
  }
2251
2548
  function applyFormattedContent(doc, entries, formatted) {
2549
+ const docRecord = doc;
2550
+ if (!docRecord.coveredReasons && docRecord.covered_reasons) {
2551
+ docRecord.coveredReasons = docRecord.covered_reasons;
2552
+ }
2252
2553
  for (const entry of entries) {
2253
2554
  const cleaned = formatted.get(entry.id);
2254
2555
  if (!cleaned) continue;
@@ -2257,6 +2558,14 @@ function applyFormattedContent(doc, entries, formatted) {
2257
2558
  const [, field, idx1, sub1, idx2, sub2] = segments;
2258
2559
  if (!sub1) {
2259
2560
  doc[field] = cleaned;
2561
+ } else if (idx2 && !sub2) {
2562
+ const arr = doc[field];
2563
+ if (arr && arr[Number(idx1)]) {
2564
+ const nested = arr[Number(idx1)][sub1];
2565
+ if (Array.isArray(nested)) {
2566
+ nested[Number(idx2)] = cleaned;
2567
+ }
2568
+ }
2260
2569
  } else if (!sub2) {
2261
2570
  const arr = doc[field];
2262
2571
  if (arr && arr[Number(idx1)]) {
@@ -2275,7 +2584,7 @@ function applyFormattedContent(doc, entries, formatted) {
2275
2584
  }
2276
2585
  var MAX_ENTRIES_PER_BATCH = 20;
2277
2586
  async function formatDocumentContent(doc, generateText, options) {
2278
- const entries = collectContentFields(doc);
2587
+ const entries = collectContentFields(doc).filter((entry) => shouldFormatContent(entry.text));
2279
2588
  const totalUsage = { inputTokens: 0, outputTokens: 0 };
2280
2589
  if (entries.length === 0) {
2281
2590
  return { document: doc, usage: totalUsage };
@@ -2321,6 +2630,16 @@ function formatAddress(addr) {
2321
2630
  const parts = [addr.street1, addr.street2, addr.city, addr.state, addr.zip, addr.country].filter(Boolean);
2322
2631
  return parts.join(", ");
2323
2632
  }
2633
+ function asRecordArray(value) {
2634
+ return Array.isArray(value) ? value.filter((item) => Boolean(item) && typeof item === "object" && !Array.isArray(item)) : [];
2635
+ }
2636
+ function firstString(item, keys) {
2637
+ for (const key of keys) {
2638
+ const value = item[key];
2639
+ if (typeof value === "string" && value.trim()) return value;
2640
+ }
2641
+ return void 0;
2642
+ }
2324
2643
  function chunkDocument(doc) {
2325
2644
  const ensureArray = (v) => Array.isArray(v) ? v : [];
2326
2645
  doc = {
@@ -2334,6 +2653,7 @@ function chunkDocument(doc) {
2334
2653
  const chunks = [];
2335
2654
  const docId = doc.id;
2336
2655
  const policyTypesStr = doc.policyTypes?.length ? doc.policyTypes.join(",") : void 0;
2656
+ const extendedDoc = doc;
2337
2657
  function stringMetadata(entries) {
2338
2658
  const base = Object.fromEntries(
2339
2659
  Object.entries(entries).filter(([, value]) => value !== void 0 && value !== null && String(value).length > 0).map(([key, value]) => [key, String(value)])
@@ -2341,11 +2661,22 @@ function chunkDocument(doc) {
2341
2661
  if (policyTypesStr) base.policyTypes = policyTypesStr;
2342
2662
  return base;
2343
2663
  }
2344
- chunks.push({
2345
- id: `${docId}:carrier_info:0`,
2346
- documentId: docId,
2347
- type: "carrier_info",
2348
- text: [
2664
+ function lines(values) {
2665
+ return values.filter(Boolean).join("\n");
2666
+ }
2667
+ function pushChunk(idSuffix, type, text, metadata) {
2668
+ chunks.push({
2669
+ id: `${docId}:${idSuffix}`,
2670
+ documentId: docId,
2671
+ type,
2672
+ text,
2673
+ metadata: stringMetadata(metadata)
2674
+ });
2675
+ }
2676
+ pushChunk(
2677
+ "carrier_info:0",
2678
+ "carrier_info",
2679
+ lines([
2349
2680
  `Carrier: ${doc.carrier}`,
2350
2681
  doc.carrierLegalName ? `Legal Name: ${doc.carrierLegalName}` : null,
2351
2682
  doc.carrierNaicNumber ? `NAIC: ${doc.carrierNaicNumber}` : null,
@@ -2362,94 +2693,83 @@ function chunkDocument(doc) {
2362
2693
  doc.isPackage != null ? `Package: ${doc.isPackage ? "Yes" : "No"}` : null,
2363
2694
  doc.security ? `Security: ${doc.security}` : null,
2364
2695
  doc.policyTypes?.length ? `Policy Types: ${doc.policyTypes.join(", ")}` : null
2365
- ].filter(Boolean).join("\n"),
2366
- metadata: stringMetadata({ carrier: doc.carrier, documentType: doc.type })
2367
- });
2696
+ ]),
2697
+ { carrier: doc.carrier, documentType: doc.type }
2698
+ );
2368
2699
  if (doc.summary) {
2369
- chunks.push({
2370
- id: `${docId}:declaration:summary`,
2371
- documentId: docId,
2372
- type: "declaration",
2373
- text: `Policy Summary: ${doc.summary}`,
2374
- metadata: stringMetadata({ documentType: doc.type })
2375
- });
2700
+ pushChunk("declaration:summary", "declaration", `Policy Summary: ${doc.summary}`, { documentType: doc.type });
2376
2701
  }
2377
2702
  if (doc.type === "policy") {
2378
2703
  const pol = doc;
2379
- chunks.push({
2380
- id: `${docId}:declaration:policy_details`,
2381
- documentId: docId,
2382
- type: "declaration",
2383
- text: [
2704
+ pushChunk(
2705
+ "declaration:policy_details",
2706
+ "declaration",
2707
+ lines([
2384
2708
  `Policy Number: ${pol.policyNumber}`,
2385
2709
  `Effective Date: ${pol.effectiveDate}`,
2386
2710
  pol.expirationDate ? `Expiration Date: ${pol.expirationDate}` : null,
2387
2711
  pol.policyTermType ? `Term Type: ${pol.policyTermType}` : null,
2388
2712
  pol.effectiveTime ? `Effective Time: ${pol.effectiveTime}` : null,
2389
2713
  pol.nextReviewDate ? `Next Review Date: ${pol.nextReviewDate}` : null
2390
- ].filter(Boolean).join("\n"),
2391
- metadata: stringMetadata({
2714
+ ]),
2715
+ {
2392
2716
  policyNumber: pol.policyNumber,
2393
2717
  effectiveDate: pol.effectiveDate,
2394
2718
  expirationDate: pol.expirationDate,
2395
2719
  documentType: doc.type
2396
- })
2397
- });
2720
+ }
2721
+ );
2398
2722
  } else {
2399
2723
  const quote = doc;
2400
- chunks.push({
2401
- id: `${docId}:declaration:quote_details`,
2402
- documentId: docId,
2403
- type: "declaration",
2404
- text: [
2724
+ pushChunk(
2725
+ "declaration:quote_details",
2726
+ "declaration",
2727
+ lines([
2405
2728
  `Quote Number: ${quote.quoteNumber}`,
2406
2729
  quote.proposedEffectiveDate ? `Proposed Effective Date: ${quote.proposedEffectiveDate}` : null,
2407
2730
  quote.proposedExpirationDate ? `Proposed Expiration Date: ${quote.proposedExpirationDate}` : null,
2408
2731
  quote.quoteExpirationDate ? `Quote Expiration Date: ${quote.quoteExpirationDate}` : null
2409
- ].filter(Boolean).join("\n"),
2410
- metadata: stringMetadata({
2732
+ ]),
2733
+ {
2411
2734
  quoteNumber: quote.quoteNumber,
2412
2735
  documentType: doc.type
2413
- })
2414
- });
2736
+ }
2737
+ );
2415
2738
  }
2416
2739
  if (doc.insurer) {
2417
- chunks.push({
2418
- id: `${docId}:party:insurer`,
2419
- documentId: docId,
2420
- type: "party",
2421
- text: [
2740
+ pushChunk(
2741
+ "party:insurer",
2742
+ "party",
2743
+ lines([
2422
2744
  `Insurer: ${doc.insurer.legalName}`,
2423
2745
  doc.insurer.naicNumber ? `NAIC: ${doc.insurer.naicNumber}` : null,
2424
2746
  doc.insurer.amBestRating ? `AM Best Rating: ${doc.insurer.amBestRating}` : null,
2425
2747
  doc.insurer.amBestNumber ? `AM Best Number: ${doc.insurer.amBestNumber}` : null,
2426
2748
  doc.insurer.admittedStatus ? `Admitted Status: ${doc.insurer.admittedStatus}` : null,
2427
2749
  doc.insurer.stateOfDomicile ? `State of Domicile: ${doc.insurer.stateOfDomicile}` : null
2428
- ].filter(Boolean).join("\n"),
2429
- metadata: stringMetadata({ partyRole: "insurer", partyName: doc.insurer.legalName, documentType: doc.type })
2430
- });
2750
+ ]),
2751
+ { partyRole: "insurer", partyName: doc.insurer.legalName, documentType: doc.type }
2752
+ );
2431
2753
  }
2432
2754
  if (doc.producer) {
2433
- chunks.push({
2434
- id: `${docId}:party:producer`,
2435
- documentId: docId,
2436
- type: "party",
2437
- text: [
2755
+ pushChunk(
2756
+ "party:producer",
2757
+ "party",
2758
+ lines([
2438
2759
  `Producer/Broker: ${doc.producer.agencyName}`,
2439
2760
  doc.producer.contactName ? `Contact: ${doc.producer.contactName}` : null,
2440
2761
  doc.producer.licenseNumber ? `License: ${doc.producer.licenseNumber}` : null,
2441
2762
  doc.producer.phone ? `Phone: ${doc.producer.phone}` : null,
2442
2763
  doc.producer.email ? `Email: ${doc.producer.email}` : null,
2443
2764
  doc.producer.address ? `Address: ${formatAddress(doc.producer.address)}` : null
2444
- ].filter(Boolean).join("\n"),
2445
- metadata: stringMetadata({ partyRole: "producer", partyName: doc.producer.agencyName, documentType: doc.type })
2446
- });
2765
+ ]),
2766
+ { partyRole: "producer", partyName: doc.producer.agencyName, documentType: doc.type }
2767
+ );
2447
2768
  }
2448
- chunks.push({
2449
- id: `${docId}:named_insured:0`,
2450
- documentId: docId,
2451
- type: "named_insured",
2452
- text: [
2769
+ pushChunk(
2770
+ "named_insured:0",
2771
+ "named_insured",
2772
+ lines([
2453
2773
  `Insured: ${doc.insuredName}`,
2454
2774
  doc.insuredDba ? `DBA: ${doc.insuredDba}` : null,
2455
2775
  doc.insuredEntityType ? `Entity Type: ${doc.insuredEntityType}` : null,
@@ -2457,36 +2777,34 @@ function chunkDocument(doc) {
2457
2777
  doc.insuredSicCode ? `SIC: ${doc.insuredSicCode}` : null,
2458
2778
  doc.insuredNaicsCode ? `NAICS: ${doc.insuredNaicsCode}` : null,
2459
2779
  doc.insuredAddress ? `Address: ${formatAddress(doc.insuredAddress)}` : null
2460
- ].filter(Boolean).join("\n"),
2461
- metadata: stringMetadata({ insuredName: doc.insuredName, documentType: doc.type })
2462
- });
2780
+ ]),
2781
+ { insuredName: doc.insuredName, documentType: doc.type }
2782
+ );
2463
2783
  doc.additionalNamedInsureds?.forEach((insured, i) => {
2464
- chunks.push({
2465
- id: `${docId}:named_insured:${i + 1}`,
2466
- documentId: docId,
2467
- type: "named_insured",
2468
- text: [
2784
+ pushChunk(
2785
+ `named_insured:${i + 1}`,
2786
+ "named_insured",
2787
+ lines([
2469
2788
  `Additional Named Insured: ${insured.name}`,
2470
2789
  insured.address ? `Address: ${formatAddress(insured.address)}` : null,
2471
2790
  insured.relationship ? `Relationship: ${insured.relationship}` : null
2472
- ].filter(Boolean).join("\n"),
2473
- metadata: stringMetadata({ insuredName: insured.name, role: "additional_named_insured", documentType: doc.type })
2474
- });
2791
+ ]),
2792
+ { insuredName: insured.name, role: "additional_named_insured", documentType: doc.type }
2793
+ );
2475
2794
  });
2476
2795
  doc.coverages.forEach((cov, i) => {
2477
- chunks.push({
2478
- id: `${docId}:coverage:${i}`,
2479
- documentId: docId,
2480
- type: "coverage",
2481
- text: [
2796
+ pushChunk(
2797
+ `coverage:${i}`,
2798
+ "coverage",
2799
+ lines([
2482
2800
  `Coverage: ${cov.name}`,
2483
2801
  `Limit: ${cov.limit}`,
2484
2802
  cov.limitValueType ? `Limit Type: ${cov.limitValueType}` : null,
2485
2803
  cov.deductible ? `Deductible: ${cov.deductible}` : null,
2486
2804
  cov.deductibleValueType ? `Deductible Type: ${cov.deductibleValueType}` : null,
2487
2805
  cov.originalContent ? `Source: ${cov.originalContent}` : null
2488
- ].filter(Boolean).join("\n"),
2489
- metadata: stringMetadata({
2806
+ ]),
2807
+ {
2490
2808
  coverageName: cov.name,
2491
2809
  limit: cov.limit,
2492
2810
  limitValueType: cov.limitValueType,
@@ -2496,15 +2814,14 @@ function chunkDocument(doc) {
2496
2814
  pageNumber: cov.pageNumber,
2497
2815
  sectionRef: cov.sectionRef,
2498
2816
  documentType: doc.type
2499
- })
2500
- });
2817
+ }
2818
+ );
2501
2819
  });
2502
2820
  doc.enrichedCoverages?.forEach((cov, i) => {
2503
- chunks.push({
2504
- id: `${docId}:coverage:enriched:${i}`,
2505
- documentId: docId,
2506
- type: "coverage",
2507
- text: [
2821
+ pushChunk(
2822
+ `coverage:enriched:${i}`,
2823
+ "coverage",
2824
+ lines([
2508
2825
  `Coverage: ${cov.name}`,
2509
2826
  cov.coverageCode ? `Code: ${cov.coverageCode}` : null,
2510
2827
  `Limit: ${cov.limit}`,
@@ -2521,8 +2838,8 @@ function chunkDocument(doc) {
2521
2838
  `Included: ${cov.included ? "Yes" : "No"}`,
2522
2839
  cov.premium ? `Premium: ${cov.premium}` : null,
2523
2840
  cov.originalContent ? `Source: ${cov.originalContent}` : null
2524
- ].filter(Boolean).join("\n"),
2525
- metadata: stringMetadata({
2841
+ ]),
2842
+ {
2526
2843
  coverageName: cov.name,
2527
2844
  coverageCode: cov.coverageCode,
2528
2845
  limit: cov.limit,
@@ -2531,8 +2848,8 @@ function chunkDocument(doc) {
2531
2848
  pageNumber: cov.pageNumber,
2532
2849
  included: cov.included,
2533
2850
  documentType: doc.type
2534
- })
2535
- });
2851
+ }
2852
+ );
2536
2853
  });
2537
2854
  if (doc.limits) {
2538
2855
  const limitLines = ["Limit Schedule"];
@@ -2556,39 +2873,31 @@ function chunkDocument(doc) {
2556
2873
  limitLines.push(`Employers Liability \u2014 Each Accident: ${lim.employersLiability.eachAccident}, Disease Policy Limit: ${lim.employersLiability.diseasePolicyLimit}, Disease Each Employee: ${lim.employersLiability.diseaseEachEmployee}`);
2557
2874
  }
2558
2875
  if (lim.defenseCostTreatment) limitLines.push(`Defense Cost Treatment: ${lim.defenseCostTreatment}`);
2559
- chunks.push({
2560
- id: `${docId}:coverage:limit_schedule`,
2561
- documentId: docId,
2562
- type: "coverage",
2563
- text: limitLines.join("\n"),
2564
- metadata: stringMetadata({ coverageName: "limit_schedule", documentType: doc.type })
2565
- });
2876
+ pushChunk("coverage:limit_schedule", "coverage", limitLines.join("\n"), { coverageName: "limit_schedule", documentType: doc.type });
2566
2877
  lim.sublimits?.forEach((sub, i) => {
2567
- chunks.push({
2568
- id: `${docId}:coverage:sublimit:${i}`,
2569
- documentId: docId,
2570
- type: "coverage",
2571
- text: [
2878
+ pushChunk(
2879
+ `coverage:sublimit:${i}`,
2880
+ "coverage",
2881
+ lines([
2572
2882
  `Sublimit: ${sub.name}`,
2573
2883
  `Limit: ${sub.limit}`,
2574
2884
  sub.appliesTo ? `Applies To: ${sub.appliesTo}` : null,
2575
2885
  sub.deductible ? `Deductible: ${sub.deductible}` : null
2576
- ].filter(Boolean).join("\n"),
2577
- metadata: stringMetadata({ coverageName: sub.name, limit: sub.limit, documentType: doc.type })
2578
- });
2886
+ ]),
2887
+ { coverageName: sub.name, limit: sub.limit, documentType: doc.type }
2888
+ );
2579
2889
  });
2580
2890
  lim.sharedLimits?.forEach((sl, i) => {
2581
- chunks.push({
2582
- id: `${docId}:coverage:shared_limit:${i}`,
2583
- documentId: docId,
2584
- type: "coverage",
2585
- text: [
2891
+ pushChunk(
2892
+ `coverage:shared_limit:${i}`,
2893
+ "coverage",
2894
+ [
2586
2895
  `Shared Limit: ${sl.description}`,
2587
2896
  `Limit: ${sl.limit}`,
2588
2897
  `Coverage Parts: ${sl.coverageParts.join(", ")}`
2589
2898
  ].join("\n"),
2590
- metadata: stringMetadata({ coverageName: sl.description, limit: sl.limit, documentType: doc.type })
2591
- });
2899
+ { coverageName: sl.description, limit: sl.limit, documentType: doc.type }
2900
+ );
2592
2901
  });
2593
2902
  }
2594
2903
  if (doc.deductibles) {
@@ -2602,12 +2911,9 @@ function chunkDocument(doc) {
2602
2911
  if (ded.waitingPeriod) dedLines.push(`Waiting Period: ${ded.waitingPeriod}`);
2603
2912
  if (ded.appliesTo) dedLines.push(`Applies To: ${ded.appliesTo}`);
2604
2913
  if (dedLines.length > 1) {
2605
- chunks.push({
2606
- id: `${docId}:coverage:deductible_schedule`,
2607
- documentId: docId,
2608
- type: "coverage",
2609
- text: dedLines.join("\n"),
2610
- metadata: stringMetadata({ coverageName: "deductible_schedule", documentType: doc.type })
2914
+ pushChunk("coverage:deductible_schedule", "coverage", dedLines.join("\n"), {
2915
+ coverageName: "deductible_schedule",
2916
+ documentType: doc.type
2611
2917
  });
2612
2918
  }
2613
2919
  }
@@ -2619,76 +2925,141 @@ function chunkDocument(doc) {
2619
2925
  doc.extendedReportingPeriod?.supplementalPremium ? `Extended Reporting Period Premium: ${doc.extendedReportingPeriod.supplementalPremium}` : null
2620
2926
  ].filter(Boolean);
2621
2927
  if (claimsMadeLines.length > 0) {
2622
- chunks.push({
2623
- id: `${docId}:coverage:claims_made_details`,
2624
- documentId: docId,
2625
- type: "coverage",
2626
- text: claimsMadeLines.join("\n"),
2627
- metadata: stringMetadata({ coverageName: "claims_made_details", documentType: doc.type })
2928
+ pushChunk("coverage:claims_made_details", "coverage", claimsMadeLines.join("\n"), {
2929
+ coverageName: "claims_made_details",
2930
+ documentType: doc.type
2628
2931
  });
2629
2932
  }
2630
2933
  doc.formInventory?.forEach((form, i) => {
2631
- chunks.push({
2632
- id: `${docId}:declaration:form:${i}`,
2633
- documentId: docId,
2634
- type: "declaration",
2635
- text: [
2934
+ pushChunk(
2935
+ `declaration:form:${i}`,
2936
+ "declaration",
2937
+ lines([
2636
2938
  `Form: ${form.formNumber}`,
2637
2939
  form.title ? `Title: ${form.title}` : null,
2638
2940
  `Type: ${form.formType}`,
2639
2941
  form.editionDate ? `Edition: ${form.editionDate}` : null,
2640
2942
  form.pageStart ? `Pages: ${form.pageStart}${form.pageEnd ? `-${form.pageEnd}` : ""}` : null
2641
- ].filter(Boolean).join("\n"),
2642
- metadata: stringMetadata({
2943
+ ]),
2944
+ {
2643
2945
  formNumber: form.formNumber,
2644
2946
  formType: form.formType,
2645
2947
  documentType: doc.type
2646
- })
2647
- });
2948
+ }
2949
+ );
2648
2950
  });
2649
2951
  doc.endorsements?.forEach((end, i) => {
2650
- chunks.push({
2651
- id: `${docId}:endorsement:${i}`,
2652
- documentId: docId,
2653
- type: "endorsement",
2654
- text: `Endorsement: ${end.title}
2952
+ pushChunk(
2953
+ `endorsement:${i}`,
2954
+ "endorsement",
2955
+ `Endorsement: ${end.title}
2655
2956
  ${end.content}`.trim(),
2656
- metadata: stringMetadata({
2957
+ {
2657
2958
  endorsementType: end.endorsementType,
2658
2959
  formNumber: end.formNumber,
2659
2960
  pageStart: end.pageStart,
2660
2961
  pageEnd: end.pageEnd,
2661
2962
  documentType: doc.type
2662
- })
2663
- });
2963
+ }
2964
+ );
2664
2965
  });
2665
2966
  doc.exclusions?.forEach((exc, i) => {
2666
- chunks.push({
2667
- id: `${docId}:exclusion:${i}`,
2668
- documentId: docId,
2669
- type: "exclusion",
2670
- text: `Exclusion: ${exc.name}
2671
- ${exc.content}`.trim(),
2672
- metadata: stringMetadata({ formNumber: exc.formNumber, pageNumber: exc.pageNumber, documentType: doc.type })
2967
+ pushChunk(`exclusion:${i}`, "exclusion", `Exclusion: ${exc.name}
2968
+ ${exc.content}`.trim(), {
2969
+ formNumber: exc.formNumber,
2970
+ pageNumber: exc.pageNumber,
2971
+ documentType: doc.type
2673
2972
  });
2674
2973
  });
2675
2974
  doc.conditions?.forEach((cond, i) => {
2676
- chunks.push({
2677
- id: `${docId}:condition:${i}`,
2678
- documentId: docId,
2679
- type: "condition",
2680
- text: [
2975
+ pushChunk(
2976
+ `condition:${i}`,
2977
+ "condition",
2978
+ [
2681
2979
  `Condition: ${cond.name}`,
2682
2980
  `Type: ${cond.conditionType}`,
2683
2981
  cond.content,
2684
2982
  ...cond.keyValues?.map((kv) => `${kv.key}: ${kv.value}`) ?? []
2685
2983
  ].join("\n"),
2686
- metadata: stringMetadata({
2984
+ {
2687
2985
  conditionName: cond.name,
2688
2986
  conditionType: cond.conditionType,
2689
2987
  pageNumber: cond.pageNumber,
2690
2988
  documentType: doc.type
2691
- })
2989
+ }
2990
+ );
2991
+ });
2992
+ asRecordArray(extendedDoc.definitions).forEach((definition, i) => {
2993
+ const term = firstString(definition, ["term", "name", "title"]) ?? `Definition ${i + 1}`;
2994
+ const body = firstString(definition, ["definition", "content", "text", "meaning"]);
2995
+ pushChunk(
2996
+ `definition:${i}`,
2997
+ "definition",
2998
+ lines([
2999
+ `Definition: ${term}`,
3000
+ body,
3001
+ firstString(definition, ["originalContent", "source"]) ? `Source: ${firstString(definition, ["originalContent", "source"])}` : null
3002
+ ]),
3003
+ {
3004
+ term,
3005
+ formNumber: firstString(definition, ["formNumber"]),
3006
+ formTitle: firstString(definition, ["formTitle"]),
3007
+ pageNumber: typeof definition.pageNumber === "number" ? definition.pageNumber : void 0,
3008
+ sectionRef: firstString(definition, ["sectionRef", "sectionTitle"]),
3009
+ documentType: doc.type
3010
+ }
3011
+ );
3012
+ });
3013
+ const coveredReasons = asRecordArray(extendedDoc.coveredReasons ?? extendedDoc.covered_reasons);
3014
+ coveredReasons.forEach((coveredReason, i) => {
3015
+ const title = firstString(coveredReason, ["title", "name", "reason", "peril", "cause"]) ?? `Covered Reason ${i + 1}`;
3016
+ const coverageName = firstString(coveredReason, ["coverageName", "coverage", "coveragePart"]);
3017
+ const reasonNumber = firstString(coveredReason, ["reasonNumber", "number"]);
3018
+ const body = firstString(coveredReason, ["content", "description", "text", "coverageGrant"]);
3019
+ pushChunk(
3020
+ `covered_reason:${i}`,
3021
+ "covered_reason",
3022
+ lines([
3023
+ coverageName ? `Coverage: ${coverageName}` : null,
3024
+ reasonNumber ? `Reason Number: ${reasonNumber}` : null,
3025
+ `Covered Reason: ${title}`,
3026
+ body,
3027
+ firstString(coveredReason, ["originalContent", "source"]) ? `Source: ${firstString(coveredReason, ["originalContent", "source"])}` : null
3028
+ ]),
3029
+ {
3030
+ coverageName,
3031
+ reasonNumber,
3032
+ title,
3033
+ formNumber: firstString(coveredReason, ["formNumber"]),
3034
+ formTitle: firstString(coveredReason, ["formTitle"]),
3035
+ pageNumber: typeof coveredReason.pageNumber === "number" ? coveredReason.pageNumber : void 0,
3036
+ sectionRef: firstString(coveredReason, ["sectionRef", "sectionTitle"]),
3037
+ documentType: doc.type
3038
+ }
3039
+ );
3040
+ const conditions = Array.isArray(coveredReason.conditions) ? coveredReason.conditions.filter((condition) => typeof condition === "string" && condition.trim().length > 0) : [];
3041
+ conditions.forEach((condition, conditionIndex) => {
3042
+ pushChunk(
3043
+ `covered_reason:${i}:condition:${conditionIndex}`,
3044
+ "covered_reason",
3045
+ lines([
3046
+ coverageName ? `Coverage: ${coverageName}` : null,
3047
+ reasonNumber ? `Reason Number: ${reasonNumber}` : null,
3048
+ `Covered Reason Condition: ${title}`,
3049
+ condition
3050
+ ]),
3051
+ {
3052
+ coverageName,
3053
+ reasonNumber,
3054
+ title,
3055
+ conditionIndex,
3056
+ formNumber: firstString(coveredReason, ["formNumber"]),
3057
+ formTitle: firstString(coveredReason, ["formTitle"]),
3058
+ pageNumber: typeof coveredReason.pageNumber === "number" ? coveredReason.pageNumber : void 0,
3059
+ sectionRef: firstString(coveredReason, ["sectionRef", "sectionTitle"]),
3060
+ documentType: doc.type
3061
+ }
3062
+ );
2692
3063
  });
2693
3064
  });
2694
3065
  if (doc.declarations) {
@@ -2703,50 +3074,42 @@ ${exc.content}`.trim(),
2703
3074
  const declMeta = { documentType: doc.type };
2704
3075
  if (typeof decl.formType === "string") declMeta.formType = decl.formType;
2705
3076
  if (typeof decl.line === "string") declMeta.declarationLine = decl.line;
2706
- chunks.push({
2707
- id: `${docId}:declaration:0`,
2708
- documentId: docId,
2709
- type: "declaration",
2710
- text: `Declarations
2711
- ${declLines.join("\n")}`,
2712
- metadata: stringMetadata(declMeta)
2713
- });
3077
+ pushChunk("declaration:0", "declaration", `Declarations
3078
+ ${declLines.join("\n")}`, declMeta);
2714
3079
  }
2715
3080
  }
2716
3081
  doc.sections?.forEach((sec, i) => {
2717
3082
  const hasSubsections = sec.subsections && sec.subsections.length > 0;
2718
3083
  const contentLength = sec.content.length;
2719
3084
  if (hasSubsections) {
2720
- chunks.push({
2721
- id: `${docId}:section:${i}`,
2722
- documentId: docId,
2723
- type: "section",
2724
- text: `Section: ${sec.title}
3085
+ pushChunk(
3086
+ `section:${i}`,
3087
+ "section",
3088
+ `Section: ${sec.title}
2725
3089
  ${sec.content}`,
2726
- metadata: stringMetadata({
3090
+ {
2727
3091
  sectionType: sec.type,
2728
3092
  sectionNumber: sec.sectionNumber,
2729
3093
  pageStart: sec.pageStart,
2730
3094
  pageEnd: sec.pageEnd,
2731
3095
  documentType: doc.type,
2732
3096
  hasSubsections: "true"
2733
- })
2734
- });
3097
+ }
3098
+ );
2735
3099
  sec.subsections.forEach((sub, j) => {
2736
- chunks.push({
2737
- id: `${docId}:section:${i}:sub:${j}`,
2738
- documentId: docId,
2739
- type: "section",
2740
- text: `${sec.title} > ${sub.title}
3100
+ pushChunk(
3101
+ `section:${i}:sub:${j}`,
3102
+ "section",
3103
+ `${sec.title} > ${sub.title}
2741
3104
  ${sub.content}`,
2742
- metadata: stringMetadata({
3105
+ {
2743
3106
  sectionType: sec.type,
2744
3107
  parentSection: sec.title,
2745
3108
  sectionNumber: sub.sectionNumber,
2746
3109
  pageNumber: sub.pageNumber,
2747
3110
  documentType: doc.type
2748
- })
2749
- });
3111
+ }
3112
+ );
2750
3113
  });
2751
3114
  } else if (contentLength > 2e3) {
2752
3115
  const paragraphs = sec.content.split(/\n\n+/);
@@ -2754,58 +3117,55 @@ ${sub.content}`,
2754
3117
  let chunkIndex = 0;
2755
3118
  for (const para of paragraphs) {
2756
3119
  if (currentChunk.length + para.length > 1e3 && currentChunk.length > 0) {
2757
- chunks.push({
2758
- id: `${docId}:section:${i}:part:${chunkIndex}`,
2759
- documentId: docId,
2760
- type: "section",
2761
- text: `Section: ${sec.title} (part ${chunkIndex + 1})
3120
+ pushChunk(
3121
+ `section:${i}:part:${chunkIndex}`,
3122
+ "section",
3123
+ `Section: ${sec.title} (part ${chunkIndex + 1})
2762
3124
  ${currentChunk.trim()}`,
2763
- metadata: stringMetadata({
3125
+ {
2764
3126
  sectionType: sec.type,
2765
3127
  sectionNumber: sec.sectionNumber,
2766
3128
  pageStart: sec.pageStart,
2767
3129
  pageEnd: sec.pageEnd,
2768
3130
  documentType: doc.type,
2769
3131
  partIndex: chunkIndex
2770
- })
2771
- });
3132
+ }
3133
+ );
2772
3134
  currentChunk = "";
2773
3135
  chunkIndex++;
2774
3136
  }
2775
3137
  currentChunk += (currentChunk ? "\n\n" : "") + para;
2776
3138
  }
2777
3139
  if (currentChunk.trim()) {
2778
- chunks.push({
2779
- id: `${docId}:section:${i}:part:${chunkIndex}`,
2780
- documentId: docId,
2781
- type: "section",
2782
- text: `Section: ${sec.title} (part ${chunkIndex + 1})
3140
+ pushChunk(
3141
+ `section:${i}:part:${chunkIndex}`,
3142
+ "section",
3143
+ `Section: ${sec.title} (part ${chunkIndex + 1})
2783
3144
  ${currentChunk.trim()}`,
2784
- metadata: stringMetadata({
3145
+ {
2785
3146
  sectionType: sec.type,
2786
3147
  sectionNumber: sec.sectionNumber,
2787
3148
  pageStart: sec.pageStart,
2788
3149
  pageEnd: sec.pageEnd,
2789
3150
  documentType: doc.type,
2790
3151
  partIndex: chunkIndex
2791
- })
2792
- });
3152
+ }
3153
+ );
2793
3154
  }
2794
3155
  } else {
2795
- chunks.push({
2796
- id: `${docId}:section:${i}`,
2797
- documentId: docId,
2798
- type: "section",
2799
- text: `Section: ${sec.title}
3156
+ pushChunk(
3157
+ `section:${i}`,
3158
+ "section",
3159
+ `Section: ${sec.title}
2800
3160
  ${sec.content}`,
2801
- metadata: stringMetadata({
3161
+ {
2802
3162
  sectionType: sec.type,
2803
3163
  sectionNumber: sec.sectionNumber,
2804
3164
  pageStart: sec.pageStart,
2805
3165
  pageEnd: sec.pageEnd,
2806
3166
  documentType: doc.type
2807
- })
2808
- });
3167
+ }
3168
+ );
2809
3169
  }
2810
3170
  });
2811
3171
  doc.locations?.forEach((loc, i) => {
@@ -3236,6 +3596,13 @@ function dedupeByKey(items, keyFn) {
3236
3596
  }
3237
3597
  return merged;
3238
3598
  }
3599
+ function normalizeKeyPart(value) {
3600
+ if (value === void 0 || value === null) return "";
3601
+ return String(value).toLowerCase().replace(/&/g, "and").replace(/[^a-z0-9]+/g, "");
3602
+ }
3603
+ function keyFromParts(...parts) {
3604
+ return parts.map(normalizeKeyPart).join("|");
3605
+ }
3239
3606
  function mergeUniqueObjects(existing, incoming, keyFn) {
3240
3607
  return dedupeByKey([...existing, ...incoming], keyFn);
3241
3608
  }
@@ -3264,13 +3631,13 @@ function mergeCoverageLimits(existing, incoming) {
3264
3631
  const merged = mergeShallowPreferPresent(existing, incoming);
3265
3632
  const existingCoverages = Array.isArray(existing.coverages) ? existing.coverages : [];
3266
3633
  const incomingCoverages = Array.isArray(incoming.coverages) ? incoming.coverages : [];
3267
- const coverageKey = (coverage) => [
3268
- String(coverage.name ?? "").toLowerCase(),
3269
- String(coverage.limitType ?? "").toLowerCase(),
3270
- String(coverage.limit ?? "").toLowerCase(),
3271
- String(coverage.deductible ?? "").toLowerCase(),
3272
- String(coverage.formNumber ?? "").toLowerCase()
3273
- ].join("|");
3634
+ const coverageKey = (coverage) => keyFromParts(
3635
+ coverage.name,
3636
+ coverage.limitType,
3637
+ coverage.limit,
3638
+ coverage.deductible,
3639
+ coverage.formNumber
3640
+ );
3274
3641
  const byKey = /* @__PURE__ */ new Map();
3275
3642
  for (const coverage of [...existingCoverages, ...incomingCoverages]) {
3276
3643
  const key = coverageKey(coverage);
@@ -3284,11 +3651,11 @@ function mergeDeclarations(existing, incoming) {
3284
3651
  const merged = mergeShallowPreferPresent(existing, incoming);
3285
3652
  const existingFields = Array.isArray(existing.fields) ? existing.fields : [];
3286
3653
  const incomingFields = Array.isArray(incoming.fields) ? incoming.fields : [];
3287
- merged.fields = mergeUniqueObjects(existingFields, incomingFields, (field) => [
3288
- String(field.field ?? "").toLowerCase(),
3289
- String(field.value ?? "").toLowerCase(),
3290
- String(field.section ?? "").toLowerCase()
3291
- ].join("|"));
3654
+ merged.fields = mergeUniqueObjects(existingFields, incomingFields, (field) => keyFromParts(
3655
+ field.field,
3656
+ field.value,
3657
+ field.section
3658
+ ));
3292
3659
  return merged;
3293
3660
  }
3294
3661
  function mergeArrayPayload(existing, incoming, arrayKey, keyFn) {
@@ -3298,30 +3665,53 @@ function mergeArrayPayload(existing, incoming, arrayKey, keyFn) {
3298
3665
  merged[arrayKey] = mergeUniqueObjects(existingItems, incomingItems, keyFn);
3299
3666
  return merged;
3300
3667
  }
3668
+ function readArray(record, ...keys) {
3669
+ for (const key of keys) {
3670
+ if (Array.isArray(record[key])) return record[key];
3671
+ }
3672
+ return [];
3673
+ }
3674
+ function mergeAliasedArrayPayload(existing, incoming, outputKey, inputKeys, keyFn) {
3675
+ const merged = mergeShallowPreferPresent(existing, incoming);
3676
+ const byKey = /* @__PURE__ */ new Map();
3677
+ for (const item of [
3678
+ ...readArray(existing, outputKey, ...inputKeys),
3679
+ ...readArray(incoming, outputKey, ...inputKeys)
3680
+ ]) {
3681
+ const key = keyFn(item);
3682
+ const current = byKey.get(key);
3683
+ byKey.set(key, current ? mergeShallowPreferPresent(current, item) : item);
3684
+ }
3685
+ merged[outputKey] = [...byKey.values()];
3686
+ for (const key of inputKeys) {
3687
+ if (key !== outputKey) delete merged[key];
3688
+ }
3689
+ return merged;
3690
+ }
3301
3691
  function mergeSupplementary(existing, incoming) {
3302
3692
  const merged = mergeShallowPreferPresent(existing, incoming);
3303
3693
  const mergeContactArray = (arrayKey) => {
3304
3694
  const existingItems = Array.isArray(existing[arrayKey]) ? existing[arrayKey] : [];
3305
3695
  const incomingItems = Array.isArray(incoming[arrayKey]) ? incoming[arrayKey] : [];
3306
- merged[arrayKey] = mergeUniqueObjects(existingItems, incomingItems, (item) => [
3307
- String(item.name ?? "").toLowerCase(),
3308
- String(item.phone ?? "").toLowerCase(),
3309
- String(item.email ?? "").toLowerCase(),
3310
- String(item.address ?? "").toLowerCase(),
3311
- String(item.type ?? "").toLowerCase()
3312
- ].join("|"));
3696
+ merged[arrayKey] = mergeUniqueObjects(existingItems, incomingItems, (item) => keyFromParts(
3697
+ item.name,
3698
+ item.phone,
3699
+ item.email,
3700
+ item.address,
3701
+ item.type
3702
+ ));
3313
3703
  };
3314
3704
  mergeContactArray("regulatoryContacts");
3315
3705
  mergeContactArray("claimsContacts");
3316
3706
  mergeContactArray("thirdPartyAdministrators");
3317
3707
  const existingFacts = Array.isArray(existing.auxiliaryFacts) ? existing.auxiliaryFacts : [];
3318
3708
  const incomingFacts = Array.isArray(incoming.auxiliaryFacts) ? incoming.auxiliaryFacts : [];
3319
- merged.auxiliaryFacts = mergeUniqueObjects(existingFacts, incomingFacts, (item) => [
3320
- String(item.key ?? "").toLowerCase(),
3321
- String(item.value ?? "").toLowerCase(),
3322
- String(item.subject ?? "").toLowerCase(),
3323
- String(item.context ?? "").toLowerCase()
3324
- ].join("|"));
3709
+ merged.auxiliaryFacts = mergeUniqueObjects(existingFacts, incomingFacts, (item) => keyFromParts(
3710
+ item.key,
3711
+ item.value,
3712
+ item.subject,
3713
+ item.context
3714
+ ));
3325
3715
  return merged;
3326
3716
  }
3327
3717
  function mergeExtractorResult(extractorName, existing, incoming) {
@@ -3342,31 +3732,43 @@ function mergeExtractorResult(extractorName, existing, incoming) {
3342
3732
  return mergeCoverageLimits(current, next);
3343
3733
  case "declarations":
3344
3734
  return mergeDeclarations(current, next);
3735
+ case "definitions":
3736
+ return mergeArrayPayload(current, next, "definitions", (item) => keyFromParts(
3737
+ item.term ?? item.name ?? item.key,
3738
+ item.pageNumber ?? item.pageStart
3739
+ ));
3740
+ case "covered_reasons":
3741
+ return mergeAliasedArrayPayload(current, next, "coveredReasons", ["covered_reasons"], (item) => keyFromParts(
3742
+ item.coverageName ?? item.coverage,
3743
+ item.reasonNumber ?? item.number,
3744
+ item.title ?? item.reason ?? item.name ?? item.cause,
3745
+ item.pageNumber ?? item.pageStart
3746
+ ));
3345
3747
  case "endorsements":
3346
- return mergeArrayPayload(current, next, "endorsements", (item) => [
3347
- String(item.formNumber ?? "").toLowerCase(),
3348
- String(item.title ?? "").toLowerCase(),
3349
- String(item.pageStart ?? "")
3350
- ].join("|"));
3748
+ return mergeArrayPayload(current, next, "endorsements", (item) => keyFromParts(
3749
+ item.formNumber,
3750
+ item.title,
3751
+ item.pageStart
3752
+ ));
3351
3753
  case "exclusions":
3352
- return mergeArrayPayload(current, next, "exclusions", (item) => [
3353
- String(item.name ?? "").toLowerCase(),
3354
- String(item.formNumber ?? "").toLowerCase(),
3355
- String(item.pageNumber ?? "")
3356
- ].join("|"));
3754
+ return mergeArrayPayload(current, next, "exclusions", (item) => keyFromParts(
3755
+ item.name,
3756
+ item.formNumber,
3757
+ item.pageNumber
3758
+ ));
3357
3759
  case "conditions":
3358
- return mergeArrayPayload(current, next, "conditions", (item) => [
3359
- String(item.name ?? "").toLowerCase(),
3360
- String(item.conditionType ?? "").toLowerCase(),
3361
- String(item.pageNumber ?? "")
3362
- ].join("|"));
3760
+ return mergeArrayPayload(current, next, "conditions", (item) => keyFromParts(
3761
+ item.name,
3762
+ item.conditionType,
3763
+ item.pageNumber
3764
+ ));
3363
3765
  case "sections":
3364
- return mergeArrayPayload(current, next, "sections", (item) => [
3365
- String(item.title ?? "").toLowerCase(),
3366
- String(item.type ?? "").toLowerCase(),
3367
- String(item.pageStart ?? ""),
3368
- String(item.pageEnd ?? "")
3369
- ].join("|"));
3766
+ return mergeArrayPayload(current, next, "sections", (item) => keyFromParts(
3767
+ item.title,
3768
+ item.type,
3769
+ item.pageStart,
3770
+ item.pageEnd
3771
+ ));
3370
3772
  default:
3371
3773
  return mergeShallowPreferPresent(current, next);
3372
3774
  }
@@ -4225,6 +4627,8 @@ var PageExtractorSchema = z20.enum([
4225
4627
  "carrier_info",
4226
4628
  "named_insured",
4227
4629
  "coverage_limits",
4630
+ "covered_reasons",
4631
+ "definitions",
4228
4632
  "endorsements",
4229
4633
  "exclusions",
4230
4634
  "conditions",
@@ -4271,6 +4675,8 @@ Available extractors:
4271
4675
  - carrier_info
4272
4676
  - named_insured
4273
4677
  - coverage_limits
4678
+ - covered_reasons
4679
+ - definitions
4274
4680
  - endorsements
4275
4681
  - exclusions
4276
4682
  - conditions
@@ -4284,6 +4690,8 @@ Rules:
4284
4690
  - Identify the broad section or form context first, then assign focused extractors within that context.
4285
4691
  - Use specific extractors for declarations, schedules, endorsements, exclusions, conditions, premium pages, and loss runs.
4286
4692
  - Use "sections" for pages that contain substantive policy text or mixed content that should still be preserved as raw sections.
4693
+ - Use "definitions" for policy-form pages containing defined terms, definitions sections, or term meaning clauses.
4694
+ - Use "covered_reasons" for pages listing covered causes of loss, covered reasons, covered perils, named perils, covered events, or covered loss triggers.
4287
4695
  - Avoid assigning broad ranges mentally; decide page by page.
4288
4696
  - A page may map to multiple extractors if it legitimately contains multiple relevant sections.
4289
4697
  - Prefer declarations and schedules for numeric limits/deductibles over later generic form wording.
@@ -4291,6 +4699,7 @@ Rules:
4291
4699
  - Do NOT assign "coverage_limits" for generic policy-form or endorsement text that merely explains how limits, deductibles, waiting periods, or coinsurance work, or that says values are "shown in the declarations", "shown in the schedule", "as stated", or "if applicable".
4292
4700
  - Headings like "Limits of Insurance", "Deductible", "Coinsurance", "Loss Conditions", or "Definitions" inside a policy form usually indicate form language, not declarations or schedules.
4293
4701
  - Continuation pages near the end of a form should stay mapped to "sections" plus "conditions"/"exclusions" when applicable, even if they mention limits or deductibles.
4702
+ - Covered causes/reasons and definitions often span a whole form section; tag every substantive page in that section, not just the heading page.
4294
4703
  - When a form inventory entry identifies a page range as a specific form type (e.g., endorsement, coverage, application), use that classification to guide your extractor choice. Do not assign "coverage_limits" to pages the inventory identifies as endorsement or condition/exclusion forms unless the page contains actual schedule values.
4295
4704
  - Do not tag a page with "exclusions" or "conditions" if it only contains a table of contents, page-number reference, running header/footer, or a heading that points to another page without substantive wording.
4296
4705
  - If a page appears to be part of a larger exclusion, conditions, or endorsement section within the same form, keep the assignment consistent across nearby pages in that section rather than isolating a single page fragment.
@@ -4334,12 +4743,15 @@ var ReviewResultSchema = z21.object({
4334
4743
  description: z21.string()
4335
4744
  }))
4336
4745
  });
4337
- function buildReviewPrompt(templateExpected, extractedKeys, extractionSummary, pageMapSummary) {
4338
- return `You are reviewing an extraction for completeness and quality. Compare what was expected vs what was found.
4746
+ function buildReviewPrompt(templateExpected, extractedKeys, extractionSummary, pageMapSummary, extractorCatalog) {
4747
+ return `You are the extraction coordinator for an insurance-document agent system. Review the current extraction state, decide whether the result is complete enough, and choose any follow-up extractor tasks needed to improve it.
4339
4748
 
4340
4749
  EXPECTED FIELDS (from document type template):
4341
4750
  ${templateExpected.map((f) => `- ${f}`).join("\n")}
4342
4751
 
4752
+ AVAILABLE FOLLOW-UP EXTRACTORS:
4753
+ ${extractorCatalog}
4754
+
4343
4755
  FIELDS ALREADY EXTRACTED:
4344
4756
  ${extractedKeys.map((f) => `- ${f}`).join("\n")}
4345
4757
 
@@ -4353,15 +4765,21 @@ Determine:
4353
4765
  1. Is the extraction complete enough?
4354
4766
  2. What fields are missing?
4355
4767
  3. What quality issues are present?
4356
- 4. Should any additional extraction tasks be dispatched?
4768
+ 4. Which follow-up extraction tasks, if any, should be dispatched?
4357
4769
 
4358
4770
  Mark the extraction as NOT complete if any of these are true:
4359
4771
  - required fields are missing
4360
4772
  - extracted values are generic placeholders like "shown in declarations", "per schedule", "if applicable", "as stated"
4361
4773
  - coverage limits or deductibles appear to come from generic form language instead of declaration/schedule-specific values
4774
+ - definitions pages were mapped but no definition records or definition-type sections were extracted
4775
+ - covered causes/reasons pages were mapped but no covered reason, covered peril, covered cause, or matching section records were extracted
4362
4776
  - page assignments suggest declaration, schedule, endorsement, exclusion, or condition pages were not actually extracted with the matching focused extractor
4363
4777
  - a focused extractor exists but returned too little substance for the relevant pages
4364
4778
 
4779
+ When reviewing CURRENT EXTRACTION SUMMARY, compare the page-map counts to extracted counts. If an assigned extractor produced no useful records, produce a quality issue and a narrow follow-up task over the mapped page range.
4780
+
4781
+ Choose follow-up tasks from AVAILABLE FOLLOW-UP EXTRACTORS. You may dispatch any listed extractor when the page map, current extraction summary, or quality evidence shows that the focused extraction is missing, generic, referential, or too thin. Do not invent extractor names.
4782
+
4365
4783
  Return JSON:
4366
4784
  {
4367
4785
  "complete": boolean,
@@ -4372,7 +4790,7 @@ Return JSON:
4372
4790
  ]
4373
4791
  }
4374
4792
 
4375
- Use the page map to target follow-up extraction pages precisely. Prefer narrow, declaration/schedule-focused follow-up tasks over broad page ranges.
4793
+ Use the page map to target follow-up extraction pages precisely. Prefer narrow, declaration/schedule-focused follow-up tasks over broad page ranges. If no additional model work is likely to improve the extraction, return an empty additionalTasks array.
4376
4794
 
4377
4795
  Respond with JSON only.`;
4378
4796
  }
@@ -4907,6 +5325,7 @@ var SectionsSchema = z32.object({
4907
5325
  "policy_form",
4908
5326
  "endorsement",
4909
5327
  "application",
5328
+ "covered_reason",
4910
5329
  "exclusion",
4911
5330
  "condition",
4912
5331
  "definition",
@@ -4930,6 +5349,7 @@ For each section, classify its type:
4930
5349
  - "policy_form" \u2014 named ISO or proprietary forms (e.g. CG 00 01, IL 00 17). All sections within a named form should be typed as "policy_form"
4931
5350
  - "endorsement" \u2014 standalone endorsements modifying the base policy
4932
5351
  - "application" \u2014 the insurance application or supplemental application
5352
+ - "covered_reason" \u2014 affirmative grants of coverage, covered causes of loss, covered perils, or named covered events
4933
5353
  - "insuring_agreement" \u2014 the insuring agreement clause (only if standalone, not inside a policy_form)
4934
5354
  - "exclusion", "condition", "definition" \u2014 for standalone sections only
4935
5355
  - "schedule" \u2014 coverage or rating schedules
@@ -5005,7 +5425,155 @@ For auxiliaryFacts:
5005
5425
  Return JSON only.`;
5006
5426
  }
5007
5427
 
5428
+ // src/prompts/extractors/definitions.ts
5429
+ import { z as z34 } from "zod";
5430
+ var DefinitionsSchema = z34.object({
5431
+ definitions: z34.array(
5432
+ z34.object({
5433
+ term: z34.string().describe("Defined term exactly as shown in the document"),
5434
+ definition: z34.string().describe("Full verbatim definition text, preserving original wording"),
5435
+ pageNumber: z34.number().optional().describe("Original document page number"),
5436
+ formNumber: z34.string().optional().describe("Form number where this definition appears"),
5437
+ formTitle: z34.string().optional().describe("Form title where this definition appears"),
5438
+ sectionRef: z34.string().optional().describe("Definition section heading or subsection reference"),
5439
+ originalContent: z34.string().optional().describe("Short verbatim source snippet containing the term and definition")
5440
+ })
5441
+ ).describe("All substantive insurance definitions found in the document")
5442
+ });
5443
+ function buildDefinitionsPrompt() {
5444
+ return `You are an expert insurance document analyst. Extract ALL substantive defined terms from this document. Preserve original wording verbatim.
5445
+
5446
+ For EACH definition, extract:
5447
+ - term: defined term exactly as shown \u2014 REQUIRED
5448
+ - definition: full verbatim definition text including all included subparts \u2014 REQUIRED
5449
+ - pageNumber: original document page number where the definition appears
5450
+ - formNumber: form number where the definition appears, if shown
5451
+ - formTitle: form title where the definition appears, if shown
5452
+ - sectionRef: heading such as "Definitions", "Words and Phrases Defined", or coverage-specific definition section
5453
+ - originalContent: short verbatim source snippet containing the term and definition
5454
+
5455
+ Focus on:
5456
+ - Terms in sections titled Definitions, Words and Phrases Defined, Glossary, or similar
5457
+ - Coverage-specific defined terms embedded in insuring agreements, endorsements, exclusions, or conditions
5458
+ - Multi-part definitions with numbered, lettered, or bulleted clauses
5459
+ - Definitions that affect coverage triggers, covered property, insured status, exclusions, limits, or duties
5460
+
5461
+ Critical rules:
5462
+ - Preserve the original content. Do not paraphrase content.
5463
+ - Keep all subparts of a definition together in one item when they define the same term.
5464
+ - Ignore table-of-contents entries, running headers/footers, indexes, and cross-references that do not include substantive definition text.
5465
+ - Do not emit generic headings like "Definitions" as a term unless the page defines an actual term.
5466
+ - Always include pageNumber when the definition appears on a specific page in the supplied document chunk.
5467
+ - Use definition as the canonical full text. Do not return a separate content field.
5468
+
5469
+ Return JSON only.`;
5470
+ }
5471
+
5472
+ // src/prompts/extractors/covered-reasons.ts
5473
+ import { z as z35 } from "zod";
5474
+ var CoveredReasonsSchema = z35.object({
5475
+ coveredReasons: z35.array(
5476
+ z35.object({
5477
+ coverageName: z35.string().describe("Coverage, coverage part, or form this covered reason belongs to"),
5478
+ reasonNumber: z35.string().optional().describe("Source number or letter for the covered reason, if shown"),
5479
+ title: z35.string().optional().describe("Covered reason title, peril, cause of loss, trigger, or short name"),
5480
+ content: z35.string().describe("Full verbatim covered-reason or insuring-agreement text"),
5481
+ conditions: z35.array(z35.string()).optional().describe("Conditions, timing rules, documentation requirements, or prerequisites attached to this covered reason"),
5482
+ exceptions: z35.array(z35.string()).optional().describe("Exceptions or limitations attached to this covered reason"),
5483
+ appliesTo: z35.array(z35.string()).optional().describe("Covered property, persons, autos, locations, operations, or coverage parts this reason applies to"),
5484
+ pageNumber: z35.number().optional().describe("Original document page number"),
5485
+ formNumber: z35.string().optional().describe("Form number where this covered reason appears"),
5486
+ formTitle: z35.string().optional().describe("Form title where this covered reason appears"),
5487
+ sectionRef: z35.string().optional().describe("Section heading where this covered reason appears"),
5488
+ originalContent: z35.string().optional().describe("Short verbatim source snippet used for this covered reason")
5489
+ })
5490
+ ).describe("Covered causes, perils, triggers, or reasons that affirmatively grant coverage")
5491
+ });
5492
+ function buildCoveredReasonsPrompt() {
5493
+ return `You are an expert insurance document analyst. Extract ALL covered reasons from this document. Preserve original wording verbatim.
5494
+
5495
+ A covered reason is affirmative coverage language explaining why, when, or for what cause the insurer will pay. This may be called a covered peril, covered cause of loss, accident, occurrence, loss trigger, additional coverage, expense, or insuring agreement grant.
5496
+
5497
+ For EACH covered reason, extract:
5498
+ - coverageName: coverage, coverage part, or form this covered reason belongs to \u2014 REQUIRED
5499
+ - reasonNumber: source number or letter for the covered reason, if shown
5500
+ - title: covered peril, cause of loss, trigger, or short name
5501
+ - content: full verbatim covered-reason or insuring-agreement text \u2014 REQUIRED
5502
+ - conditions: conditions, timing rules, documentation requirements, or prerequisites attached to this covered reason
5503
+ - exceptions: exceptions or limitations attached to this covered reason
5504
+ - appliesTo: covered property, persons, autos, locations, operations, or coverage parts this reason applies to
5505
+ - pageNumber: original document page number where this covered reason appears
5506
+ - formNumber: form number where this covered reason appears, if shown
5507
+ - formTitle: form title where this covered reason appears, if shown
5508
+ - sectionRef: heading where this covered reason appears
5509
+ - originalContent: short verbatim source snippet used for this covered reason
5510
+
5511
+ Focus on:
5512
+ - Named perils and covered causes of loss
5513
+ - Insuring agreement grants and coverage triggers
5514
+ - Additional coverages and coverage extensions that state when payment applies
5515
+ - Personal lines phrases such as fire, lightning, windstorm, hail, theft, collision, comprehensive, or accidental direct physical loss
5516
+ - Commercial lines phrases such as bodily injury, property damage, personal and advertising injury, employee dishonesty, computer fraud, equipment breakdown, or professional services acts
5517
+
5518
+ Critical rules:
5519
+ - Preserve the original content. Do not paraphrase content.
5520
+ - Extract affirmative coverage grants, not exclusions, conditions, or declarations-only limit rows.
5521
+ - Do not emit a covered reason from a table-of-contents entry, running header/footer, or reference that only points elsewhere.
5522
+ - If a covered reason includes exceptions or limitations in the same clause, keep them in content and also list them in exceptions when they can be separated cleanly.
5523
+ - Always include pageNumber when the covered reason appears on a specific page in the supplied document chunk.
5524
+ - Preserve coverage grouping. Do not merge separate coverage parts into one generic list.
5525
+
5526
+ Return JSON only.`;
5527
+ }
5528
+
5008
5529
  // src/prompts/extractors/index.ts
5530
+ function asRecord(data) {
5531
+ return data && typeof data === "object" ? data : void 0;
5532
+ }
5533
+ function getSections2(data) {
5534
+ const sections = asRecord(data)?.sections;
5535
+ return Array.isArray(sections) ? sections : [];
5536
+ }
5537
+ function isCoveredReasonsEmpty(data) {
5538
+ const record = asRecord(data);
5539
+ if (!record) return true;
5540
+ const coveredReasons = Array.isArray(record.coveredReasons) ? record.coveredReasons : Array.isArray(record.covered_reasons) ? record.covered_reasons : [];
5541
+ return coveredReasons.length === 0;
5542
+ }
5543
+ function isDefinitionsEmpty(data) {
5544
+ const definitions = asRecord(data)?.definitions;
5545
+ return !Array.isArray(definitions) || definitions.length === 0;
5546
+ }
5547
+ function sectionLooksLikeCoveredReason(section) {
5548
+ const type = String(section.type ?? "").toLowerCase();
5549
+ const title = String(section.title ?? "").toLowerCase();
5550
+ return type === "covered_reason" || title.includes("covered cause") || title.includes("covered reason") || title.includes("covered peril") || title.includes("named peril") || title.includes("insuring agreement");
5551
+ }
5552
+ function deriveCoveredReasonsFromSections(data) {
5553
+ const coveredReasons = getSections2(data).filter(sectionLooksLikeCoveredReason).map((section) => ({
5554
+ coverageName: String(section.coverageName ?? section.formTitle ?? section.title ?? "Covered Reasons"),
5555
+ title: typeof section.title === "string" ? section.title : void 0,
5556
+ content: String(section.content ?? ""),
5557
+ pageNumber: typeof section.pageStart === "number" ? section.pageStart : void 0,
5558
+ formNumber: typeof section.formNumber === "string" ? section.formNumber : void 0,
5559
+ formTitle: typeof section.formTitle === "string" ? section.formTitle : void 0,
5560
+ sectionRef: typeof section.sectionNumber === "string" ? section.sectionNumber : void 0,
5561
+ originalContent: typeof section.content === "string" ? section.content.slice(0, 500) : void 0
5562
+ })).filter((coveredReason) => coveredReason.content.trim().length > 0);
5563
+ return coveredReasons.length > 0 ? { coveredReasons } : void 0;
5564
+ }
5565
+ function deriveDefinitionsFromSections(data) {
5566
+ const definitions = getSections2(data).filter((section) => String(section.type ?? "").toLowerCase() === "definition").map((section) => ({
5567
+ term: String(section.title ?? "Definitions"),
5568
+ definition: String(section.content ?? ""),
5569
+ pageNumber: typeof section.pageStart === "number" ? section.pageStart : void 0,
5570
+ formNumber: typeof section.formNumber === "string" ? section.formNumber : void 0,
5571
+ formTitle: typeof section.formTitle === "string" ? section.formTitle : void 0,
5572
+ sectionRef: typeof section.sectionNumber === "string" ? section.sectionNumber : void 0,
5573
+ originalContent: typeof section.content === "string" ? section.content.slice(0, 500) : void 0
5574
+ })).filter((definition) => definition.definition.trim().length > 0);
5575
+ return definitions.length > 0 ? { definitions } : void 0;
5576
+ }
5009
5577
  var EXTRACTORS = {
5010
5578
  carrier_info: { buildPrompt: buildCarrierInfoPrompt, schema: CarrierInfoSchema, maxTokens: 2048 },
5011
5579
  named_insured: { buildPrompt: buildNamedInsuredPrompt, schema: NamedInsuredSchema2, maxTokens: 2048 },
@@ -5017,28 +5585,54 @@ var EXTRACTORS = {
5017
5585
  declarations: { buildPrompt: buildDeclarationsPrompt, schema: DeclarationsExtractSchema, maxTokens: 8192 },
5018
5586
  loss_history: { buildPrompt: buildLossHistoryPrompt, schema: LossHistorySchema, maxTokens: 4096 },
5019
5587
  sections: { buildPrompt: buildSectionsPrompt, schema: SectionsSchema, maxTokens: 8192 },
5020
- supplementary: { buildPrompt: buildSupplementaryPrompt, schema: SupplementarySchema, maxTokens: 2048 }
5588
+ supplementary: { buildPrompt: buildSupplementaryPrompt, schema: SupplementarySchema, maxTokens: 2048 },
5589
+ definitions: {
5590
+ buildPrompt: buildDefinitionsPrompt,
5591
+ schema: DefinitionsSchema,
5592
+ maxTokens: 8192,
5593
+ fallback: {
5594
+ extractorName: "sections",
5595
+ isEmpty: isDefinitionsEmpty,
5596
+ deriveFocusedResult: deriveDefinitionsFromSections
5597
+ }
5598
+ },
5599
+ covered_reasons: {
5600
+ buildPrompt: buildCoveredReasonsPrompt,
5601
+ schema: CoveredReasonsSchema,
5602
+ maxTokens: 8192,
5603
+ fallback: {
5604
+ extractorName: "sections",
5605
+ isEmpty: isCoveredReasonsEmpty,
5606
+ deriveFocusedResult: deriveCoveredReasonsFromSections
5607
+ }
5608
+ }
5021
5609
  };
5022
5610
  function getExtractor(name) {
5023
5611
  return EXTRACTORS[name];
5024
5612
  }
5613
+ function formatExtractorCatalogForPrompt() {
5614
+ return Object.entries(EXTRACTORS).map(([name, extractor]) => {
5615
+ const fallback = extractor.fallback ? `; fallback: ${extractor.fallback.extractorName}` : "";
5616
+ return `- ${name} (maxTokens: ${extractor.maxTokens ?? 4096}${fallback})`;
5617
+ }).join("\n");
5618
+ }
5025
5619
 
5026
5620
  // src/extraction/resolve-referential.ts
5027
- import { z as z35 } from "zod";
5621
+ import { z as z37 } from "zod";
5028
5622
 
5029
5623
  // src/prompts/extractors/referential-lookup.ts
5030
- import { z as z34 } from "zod";
5031
- var ReferentialLookupSchema = z34.object({
5032
- resolvedCoverages: z34.array(
5033
- z34.object({
5034
- coverageName: z34.string().describe("The coverage name that was referenced"),
5035
- resolvedLimit: z34.string().optional().describe("The concrete limit value found, if any"),
5624
+ import { z as z36 } from "zod";
5625
+ var ReferentialLookupSchema = z36.object({
5626
+ resolvedCoverages: z36.array(
5627
+ z36.object({
5628
+ coverageName: z36.string().describe("The coverage name that was referenced"),
5629
+ resolvedLimit: z36.string().optional().describe("The concrete limit value found, if any"),
5036
5630
  resolvedLimitValueType: CoverageValueTypeSchema.optional(),
5037
- resolvedDeductible: z34.string().optional().describe("The concrete deductible value found, if any"),
5631
+ resolvedDeductible: z36.string().optional().describe("The concrete deductible value found, if any"),
5038
5632
  resolvedDeductibleValueType: CoverageValueTypeSchema.optional(),
5039
- pageNumber: z34.number().optional().describe("Page where the resolved value was found"),
5040
- originalContent: z34.string().optional().describe("Verbatim source text for the resolved value"),
5041
- confidence: z34.enum(["high", "medium", "low"]).describe("Confidence in the resolution")
5633
+ pageNumber: z36.number().optional().describe("Page where the resolved value was found"),
5634
+ originalContent: z36.string().optional().describe("Verbatim source text for the resolved value"),
5635
+ confidence: z36.enum(["high", "medium", "low"]).describe("Confidence in the resolution")
5042
5636
  })
5043
5637
  )
5044
5638
  });
@@ -5073,18 +5667,124 @@ Your task:
5073
5667
  Return JSON only.`;
5074
5668
  }
5075
5669
 
5076
- // src/extraction/resolve-referential.ts
5670
+ // src/extraction/heuristics.ts
5077
5671
  function looksReferential(value) {
5078
5672
  if (typeof value !== "string") return false;
5079
5673
  const normalized = value.toLowerCase();
5080
5674
  return normalized.includes("shown in the declarations") || normalized.includes("shown in declarations") || normalized.includes("shown in the schedule") || normalized.includes("as stated") || normalized.includes("if applicable");
5081
5675
  }
5676
+ function looksCoveredReasonSection(section) {
5677
+ const title = String(section.title ?? "").toLowerCase();
5678
+ const type = String(section.type ?? "").toLowerCase();
5679
+ return type === "covered_reason" || title.includes("covered cause") || title.includes("covered reason") || title.includes("covered peril");
5680
+ }
5681
+
5682
+ // src/extraction/referential-workflow.ts
5683
+ function normalizeText(value) {
5684
+ return typeof value === "string" ? value.trim().toLowerCase() : "";
5685
+ }
5686
+ function containsTarget(value, target) {
5687
+ const normalizedValue = normalizeText(value);
5688
+ return Boolean(normalizedValue && target && normalizedValue.includes(target));
5689
+ }
5690
+ function pageRangeFrom(startPage, endPage) {
5691
+ if (typeof startPage !== "number" || !Number.isFinite(startPage) || startPage <= 0) {
5692
+ return void 0;
5693
+ }
5694
+ const normalizedEnd = typeof endPage === "number" && Number.isFinite(endPage) && endPage >= startPage ? endPage : startPage;
5695
+ return { startPage, endPage: normalizedEnd };
5696
+ }
5697
+ function parseReferentialTarget(rawTarget) {
5698
+ const raw = rawTarget?.trim() || "unknown";
5699
+ const normalized = raw.toLowerCase();
5700
+ if (normalized === "unknown") return { raw, normalized, kind: "unknown" };
5701
+ if (/declarations?|dec\b|decs\b/.test(normalized)) return { raw, normalized, kind: "declarations" };
5702
+ if (/schedule|scheduled/.test(normalized)) return { raw, normalized, kind: "schedule" };
5703
+ if (/\bitem\b/.test(normalized)) return { raw, normalized, kind: "item" };
5704
+ if (/premises?|location|building/.test(normalized)) return { raw, normalized, kind: "premises" };
5705
+ if (/\bsection\b/.test(normalized)) return { raw, normalized, kind: "section" };
5706
+ if (/policy|coverage\s+part|coverage\s+form/.test(normalized)) return { raw, normalized, kind: "policy" };
5707
+ return { raw, normalized, kind: "unknown" };
5708
+ }
5709
+ function findLocalReferentialPages(params) {
5710
+ const targetLower = params.referenceTarget.toLowerCase();
5711
+ for (const section of params.sections) {
5712
+ if (containsTarget(section.title, targetLower)) {
5713
+ const range = pageRangeFrom(section.pageStart, section.pageEnd);
5714
+ if (range) return range;
5715
+ }
5716
+ }
5717
+ for (const form of params.formInventory) {
5718
+ const titleMatch = containsTarget(form.title, targetLower);
5719
+ const typeMatch = containsTarget(form.formType, targetLower);
5720
+ const numberMatch = containsTarget(form.formNumber, targetLower);
5721
+ if (titleMatch || typeMatch || numberMatch) {
5722
+ const range = pageRangeFrom(form.pageStart, form.pageEnd);
5723
+ if (range) return range;
5724
+ }
5725
+ }
5726
+ return void 0;
5727
+ }
5728
+ function findDeclarationsSchedulePages(parsedTarget, formInventory) {
5729
+ for (const form of formInventory) {
5730
+ const formType = normalizeText(form.formType);
5731
+ const title = normalizeText(form.title);
5732
+ const matchesDeclarations = formType === "declarations" || /declarations?|dec\b|decs\b/.test(title);
5733
+ const matchesSchedule = /schedule|scheduled|coverage/.test(title) || formType === "coverage";
5734
+ const shouldUse = parsedTarget.kind === "declarations" ? matchesDeclarations : parsedTarget.kind === "schedule" || parsedTarget.kind === "item" || parsedTarget.kind === "premises" ? matchesSchedule || matchesDeclarations : parsedTarget.kind === "policy" ? matchesDeclarations || matchesSchedule : false;
5735
+ if (shouldUse) {
5736
+ const range = pageRangeFrom(form.pageStart, form.pageEnd);
5737
+ if (range) return range;
5738
+ }
5739
+ }
5740
+ return void 0;
5741
+ }
5742
+ function findSectionPages(parsedTarget, sections) {
5743
+ for (const section of sections) {
5744
+ const title = normalizeText(section.title);
5745
+ const type = normalizeText(section.type);
5746
+ const matchesKind = parsedTarget.kind === "declarations" && (type === "declarations" || /declarations?/.test(title)) || parsedTarget.kind === "schedule" && (type === "schedule" || /schedule|scheduled/.test(title)) || parsedTarget.kind === "premises" && /premises?|location|building/.test(title) || parsedTarget.kind === "item" && /\bitem\b|schedule|scheduled/.test(title) || parsedTarget.kind === "section" && containsTarget(title, parsedTarget.normalized);
5747
+ if (matchesKind) {
5748
+ const range = pageRangeFrom(section.pageStart, section.pageEnd);
5749
+ if (range) return range;
5750
+ }
5751
+ }
5752
+ return void 0;
5753
+ }
5754
+ function decideReferentialResolutionAction(params) {
5755
+ if (params.localPageRange) {
5756
+ return { kind: "lookup_pages", source: "local", pageRange: params.localPageRange };
5757
+ }
5758
+ const parsedTarget = parseReferentialTarget(params.referenceTarget);
5759
+ const declarationsScheduleRange = findDeclarationsSchedulePages(parsedTarget, params.formInventory);
5760
+ if (declarationsScheduleRange) {
5761
+ return {
5762
+ kind: "lookup_pages",
5763
+ source: "declarations_schedule",
5764
+ pageRange: declarationsScheduleRange
5765
+ };
5766
+ }
5767
+ const sectionRange = findSectionPages(parsedTarget, params.sections);
5768
+ if (sectionRange) {
5769
+ return { kind: "lookup_pages", source: "sections", pageRange: sectionRange };
5770
+ }
5771
+ if (parsedTarget.kind === "unknown") {
5772
+ return { kind: "skip", reason: "no concrete reference target" };
5773
+ }
5774
+ return { kind: "page_location" };
5775
+ }
5776
+
5777
+ // src/extraction/resolve-referential.ts
5082
5778
  function parseReferenceTarget(text) {
5083
5779
  if (typeof text !== "string") return void 0;
5084
5780
  const normalized = text.trim();
5085
5781
  if (!normalized) return void 0;
5086
5782
  const sectionMatch = normalized.match(/\b(Section\s+\d+[A-Za-z]?)/i);
5087
5783
  if (sectionMatch) return sectionMatch[1];
5784
+ const itemMatch = normalized.match(/\b(Item\s+\d+[A-Za-z]?)/i);
5785
+ if (itemMatch) return itemMatch[1];
5786
+ const premisesMatch = normalized.match(/\b(Premises?(?:\s+No\.?\s*\d+[A-Za-z]?|\s+\d+[A-Za-z]?)?)/i);
5787
+ if (premisesMatch) return premisesMatch[1].trim();
5088
5788
  if (/declarations/i.test(normalized)) return "Declarations";
5089
5789
  const scheduleMatch = normalized.match(/\b(Schedule(?:\s+of\s+[A-Za-z ]+)?)/i);
5090
5790
  if (scheduleMatch) return scheduleMatch[1].trim();
@@ -5097,9 +5797,9 @@ function parseReferenceTarget(text) {
5097
5797
  if (/if applicable/i.test(normalized)) return void 0;
5098
5798
  return void 0;
5099
5799
  }
5100
- var PageLocationSchema = z35.object({
5101
- startPage: z35.number(),
5102
- endPage: z35.number()
5800
+ var PageLocationSchema = z37.object({
5801
+ startPage: z37.number(),
5802
+ endPage: z37.number()
5103
5803
  });
5104
5804
  async function findReferencedPages(params) {
5105
5805
  const {
@@ -5110,26 +5810,31 @@ async function findReferencedPages(params) {
5110
5810
  pageCount,
5111
5811
  generateObject,
5112
5812
  providerOptions,
5813
+ trackUsage,
5113
5814
  log
5114
5815
  } = params;
5115
- const targetLower = referenceTarget.toLowerCase();
5116
- for (const section of sections) {
5117
- if (section.title && section.pageStart != null && section.title.toLowerCase().includes(targetLower)) {
5118
- return {
5119
- startPage: section.pageStart,
5120
- endPage: section.pageEnd ?? section.pageStart
5121
- };
5122
- }
5816
+ const localPageRange = findLocalReferentialPages({
5817
+ referenceTarget,
5818
+ sections,
5819
+ formInventory
5820
+ });
5821
+ const action = decideReferentialResolutionAction({
5822
+ referenceTarget,
5823
+ sections,
5824
+ formInventory,
5825
+ localPageRange
5826
+ });
5827
+ if (action.kind === "lookup_pages") {
5828
+ await log?.(
5829
+ `Referential target "${referenceTarget}" resolved to pages ${action.pageRange.startPage}-${action.pageRange.endPage} via ${action.source}.`
5830
+ );
5831
+ return action.pageRange;
5123
5832
  }
5124
- for (const form of formInventory) {
5125
- const titleMatch = form.title && form.title.toLowerCase().includes(targetLower);
5126
- const typeMatch = form.formType && form.formType.toLowerCase().includes(targetLower);
5127
- if ((titleMatch || typeMatch) && form.pageStart != null) {
5128
- return {
5129
- startPage: form.pageStart,
5130
- endPage: form.pageEnd ?? form.pageStart
5131
- };
5132
- }
5833
+ if (action.kind === "skip") {
5834
+ await log?.(
5835
+ `Skipping referential target "${referenceTarget}": ${action.reason}.`
5836
+ );
5837
+ return void 0;
5133
5838
  }
5134
5839
  try {
5135
5840
  const result = await safeGenerateObject(
@@ -5157,6 +5862,7 @@ Return JSON only.`,
5157
5862
  )
5158
5863
  }
5159
5864
  );
5865
+ trackUsage?.(result.usage);
5160
5866
  if (result.object.startPage > 0 && result.object.endPage > 0) {
5161
5867
  return {
5162
5868
  startPage: result.object.startPage,
@@ -5214,7 +5920,9 @@ async function resolveReferentialCoverages(params) {
5214
5920
  for (let i = 0; i < referentialCoverages.length; i++) {
5215
5921
  const cov = referentialCoverages[i];
5216
5922
  const refString = (looksReferential(cov.limit) ? cov.limit : void 0) ?? (looksReferential(cov.deductible) ? cov.deductible : void 0) ?? cov.limit ?? "";
5217
- const target = parseReferenceTarget(refString) ?? "unknown";
5923
+ const sectionRef = typeof cov.sectionRef === "string" ? cov.sectionRef : "";
5924
+ const parsedTarget = parseReferenceTarget(refString) ?? parseReferenceTarget(sectionRef) ?? sectionRef;
5925
+ const target = parsedTarget || "unknown";
5218
5926
  const group = targetGroups.get(target) ?? [];
5219
5927
  group.push({ coverage: cov, index: i });
5220
5928
  targetGroups.set(target, group);
@@ -5238,6 +5946,7 @@ async function resolveReferentialCoverages(params) {
5238
5946
  pageCount,
5239
5947
  generateObject,
5240
5948
  providerOptions,
5949
+ trackUsage,
5241
5950
  log
5242
5951
  });
5243
5952
  if (!pageRange) {
@@ -5355,6 +6064,78 @@ async function resolveReferentialCoverages(params) {
5355
6064
  };
5356
6065
  }
5357
6066
 
6067
+ // src/extraction/focused-dispatch.ts
6068
+ async function runFocusedExtractorWithFallback(params) {
6069
+ const {
6070
+ task,
6071
+ pdfInput,
6072
+ generateObject,
6073
+ convertPdfToImages,
6074
+ providerOptions,
6075
+ trackUsage,
6076
+ log
6077
+ } = params;
6078
+ const ext = getExtractor(task.extractorName);
6079
+ if (!ext) {
6080
+ await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
6081
+ return null;
6082
+ }
6083
+ try {
6084
+ const result = await runExtractor({
6085
+ name: task.extractorName,
6086
+ prompt: ext.buildPrompt(),
6087
+ schema: ext.schema,
6088
+ pdfInput,
6089
+ startPage: task.startPage,
6090
+ endPage: task.endPage,
6091
+ generateObject,
6092
+ convertPdfToImages,
6093
+ maxTokens: ext.maxTokens ?? 4096,
6094
+ providerOptions
6095
+ });
6096
+ trackUsage(result.usage);
6097
+ if (!ext.fallback?.isEmpty(result.data)) {
6098
+ return result;
6099
+ }
6100
+ if (!ext.fallback) {
6101
+ return result;
6102
+ }
6103
+ } catch (error) {
6104
+ await log?.(`Extractor ${task.extractorName} failed: ${error}`);
6105
+ if (!ext.fallback) {
6106
+ return null;
6107
+ }
6108
+ }
6109
+ const fallbackExt = getExtractor(ext.fallback.extractorName);
6110
+ if (!fallbackExt) return null;
6111
+ await log?.(
6112
+ `Extractor ${task.extractorName} produced no usable records; trying ${ext.fallback.extractorName} fallback for pages ${task.startPage}-${task.endPage}`
6113
+ );
6114
+ try {
6115
+ const fallbackResult = await runExtractor({
6116
+ name: ext.fallback.extractorName,
6117
+ prompt: fallbackExt.buildPrompt(),
6118
+ schema: fallbackExt.schema,
6119
+ pdfInput,
6120
+ startPage: task.startPage,
6121
+ endPage: task.endPage,
6122
+ generateObject,
6123
+ convertPdfToImages,
6124
+ maxTokens: fallbackExt.maxTokens ?? 4096,
6125
+ providerOptions
6126
+ });
6127
+ trackUsage(fallbackResult.usage);
6128
+ const focusedData = ext.fallback.deriveFocusedResult(fallbackResult.data);
6129
+ return focusedData ? [
6130
+ fallbackResult,
6131
+ { name: task.extractorName, data: focusedData, usage: void 0 }
6132
+ ] : fallbackResult;
6133
+ } catch (fallbackError) {
6134
+ await log?.(`${ext.fallback.extractorName} fallback for ${task.extractorName} failed: ${fallbackError}`);
6135
+ return null;
6136
+ }
6137
+ }
6138
+
5358
6139
  // src/core/quality.ts
5359
6140
  function evaluateQualityGate(params) {
5360
6141
  const { issues, hasRoundWarnings = false } = params;
@@ -5391,11 +6172,6 @@ function addFormEntry(inventory, formNumber, source, extra) {
5391
6172
  sources: [source]
5392
6173
  });
5393
6174
  }
5394
- function looksReferential2(value) {
5395
- if (typeof value !== "string") return false;
5396
- const normalized = value.toLowerCase();
5397
- return normalized.includes("shown in the declarations") || normalized.includes("shown in declarations") || normalized.includes("shown in the schedule") || normalized.includes("as stated") || normalized.includes("if applicable");
5398
- }
5399
6175
  function looksTocArtifact(value) {
5400
6176
  if (typeof value !== "string") return false;
5401
6177
  return /\.{4,}\d{1,3}$/.test(value.trim()) || /^\d+\.\s+[A-Z][\s\S]*\.{3,}\d{1,3}$/.test(value.trim());
@@ -5419,6 +6195,28 @@ function buildExtractionReviewReport(params) {
5419
6195
  const exclusions = memory.get("exclusions")?.exclusions ?? [];
5420
6196
  const conditions = memory.get("conditions")?.conditions ?? [];
5421
6197
  const sections = memory.get("sections")?.sections ?? [];
6198
+ const definitionsResult = memory.get("definitions");
6199
+ const coveredReasonsResult = memory.get("covered_reasons");
6200
+ const definitions = Array.isArray(definitionsResult?.definitions) ? definitionsResult.definitions : sections.filter((section) => section.type === "definition");
6201
+ const coveredReasons = Array.isArray(coveredReasonsResult?.coveredReasons) ? coveredReasonsResult.coveredReasons : Array.isArray(coveredReasonsResult?.covered_reasons) ? coveredReasonsResult.covered_reasons : sections.filter(looksCoveredReasonSection);
6202
+ const mappedDefinitions = params.pageAssignments.some((assignment) => assignment.extractorNames.includes("definitions"));
6203
+ const mappedCoveredReasons = params.pageAssignments.some((assignment) => assignment.extractorNames.includes("covered_reasons"));
6204
+ if (mappedDefinitions && definitions.length === 0) {
6205
+ deterministicIssues.push({
6206
+ code: "definitions_mapped_but_empty",
6207
+ severity: "warning",
6208
+ message: "Page map assigned definitions extraction, but no definition records were extracted.",
6209
+ extractorName: "definitions"
6210
+ });
6211
+ }
6212
+ if (mappedCoveredReasons && coveredReasons.length === 0) {
6213
+ deterministicIssues.push({
6214
+ code: "covered_reasons_mapped_but_empty",
6215
+ severity: "warning",
6216
+ message: "Page map assigned covered reasons extraction, but no covered reason records were extracted.",
6217
+ extractorName: "covered_reasons"
6218
+ });
6219
+ }
5422
6220
  for (const form of extractedFormInventory) {
5423
6221
  addFormEntry(
5424
6222
  inventory,
@@ -5515,7 +6313,7 @@ function buildExtractionReviewReport(params) {
5515
6313
  itemName: typeof coverage.name === "string" ? coverage.name : void 0
5516
6314
  });
5517
6315
  }
5518
- if (looksReferential2(coverage.limit) || looksReferential2(coverage.deductible)) {
6316
+ if (looksReferential(coverage.limit) || looksReferential(coverage.deductible)) {
5519
6317
  deterministicIssues.push({
5520
6318
  code: "coverage_referential_value",
5521
6319
  severity: "warning",
@@ -5616,6 +6414,67 @@ function buildExtractionReviewReport(params) {
5616
6414
  });
5617
6415
  }
5618
6416
  }
6417
+ for (const definition of definitions) {
6418
+ const term = typeof definition.term === "string" ? definition.term : typeof definition.title === "string" ? definition.title : "unknown";
6419
+ const content = typeof definition.definition === "string" ? definition.definition : typeof definition.content === "string" ? definition.content : "";
6420
+ if (!content.trim()) {
6421
+ deterministicIssues.push({
6422
+ code: "definition_missing_content",
6423
+ severity: "warning",
6424
+ message: `Definition "${term}" is missing definition text.`,
6425
+ extractorName: "definitions",
6426
+ formNumber: normalizeFormNumber(definition.formNumber),
6427
+ pageNumber: typeof definition.pageNumber === "number" ? definition.pageNumber : typeof definition.pageStart === "number" ? definition.pageStart : void 0,
6428
+ itemName: term
6429
+ });
6430
+ }
6431
+ if (typeof definition.pageNumber !== "number" && typeof definition.pageStart !== "number") {
6432
+ deterministicIssues.push({
6433
+ code: "definition_missing_page_number",
6434
+ severity: "warning",
6435
+ message: `Definition "${term}" is missing page provenance.`,
6436
+ extractorName: "definitions",
6437
+ formNumber: normalizeFormNumber(definition.formNumber),
6438
+ itemName: term
6439
+ });
6440
+ }
6441
+ }
6442
+ for (const coveredReason of coveredReasons) {
6443
+ const itemName = typeof coveredReason.name === "string" ? coveredReason.name : typeof coveredReason.reason === "string" ? coveredReason.reason : typeof coveredReason.title === "string" ? coveredReason.title : "unknown";
6444
+ const content = typeof coveredReason.content === "string" ? coveredReason.content : typeof coveredReason.description === "string" ? coveredReason.description : "";
6445
+ if (!content.trim()) {
6446
+ deterministicIssues.push({
6447
+ code: "covered_reason_missing_content",
6448
+ severity: "warning",
6449
+ message: `Covered reason "${itemName}" is missing substantive text.`,
6450
+ extractorName: "covered_reasons",
6451
+ formNumber: normalizeFormNumber(coveredReason.formNumber),
6452
+ pageNumber: typeof coveredReason.pageNumber === "number" ? coveredReason.pageNumber : typeof coveredReason.pageStart === "number" ? coveredReason.pageStart : void 0,
6453
+ itemName
6454
+ });
6455
+ }
6456
+ if (typeof coveredReason.pageNumber !== "number" && typeof coveredReason.pageStart !== "number") {
6457
+ deterministicIssues.push({
6458
+ code: "covered_reason_missing_page_number",
6459
+ severity: "warning",
6460
+ message: `Covered reason "${itemName}" is missing page provenance.`,
6461
+ extractorName: "covered_reasons",
6462
+ formNumber: normalizeFormNumber(coveredReason.formNumber),
6463
+ itemName
6464
+ });
6465
+ }
6466
+ if (looksReferential(content) || looksReferential(coveredReason.reason)) {
6467
+ deterministicIssues.push({
6468
+ code: "covered_reason_referential_value",
6469
+ severity: "warning",
6470
+ message: `Covered reason "${itemName}" contains referential language instead of the extracted covered cause wording.`,
6471
+ extractorName: "covered_reasons",
6472
+ formNumber: normalizeFormNumber(coveredReason.formNumber),
6473
+ pageNumber: typeof coveredReason.pageNumber === "number" ? coveredReason.pageNumber : typeof coveredReason.pageStart === "number" ? coveredReason.pageStart : void 0,
6474
+ itemName
6475
+ });
6476
+ }
6477
+ }
5619
6478
  for (const section of sections) {
5620
6479
  if (typeof section.content === "string" && section.content.trim().length < 120 && typeof section.pageStart === "number" && (!("pageEnd" in section) || section.pageEnd === section.pageStart || section.pageEnd === void 0)) {
5621
6480
  deterministicIssues.push({
@@ -5638,6 +6497,8 @@ function buildExtractionReviewReport(params) {
5638
6497
  const artifacts = [
5639
6498
  { kind: "form_inventory", label: "Form Inventory", itemCount: formInventory.length },
5640
6499
  { kind: "page_map", label: "Page Map", itemCount: params.pageAssignments.length },
6500
+ { kind: "definitions", label: "Definitions", itemCount: definitions.length },
6501
+ { kind: "covered_reasons", label: "Covered Reasons", itemCount: coveredReasons.length },
5641
6502
  { kind: "referential_resolution", label: "Referential Resolution", itemCount: coverages.filter((c) => c.limitValueType === "referential" || c.limitValueType === "as_stated" || c.deductibleValueType === "referential" || c.deductibleValueType === "as_stated").length }
5642
6503
  ];
5643
6504
  const qualityGateStatus = evaluateQualityGate({
@@ -5663,6 +6524,134 @@ function toReviewRoundRecord(round, review) {
5663
6524
  };
5664
6525
  }
5665
6526
 
6527
+ // src/extraction/planning.ts
6528
+ function normalizePageAssignments(pageAssignments, formInventory) {
6529
+ const pageFormTypes = /* @__PURE__ */ new Map();
6530
+ if (formInventory) {
6531
+ for (const form of formInventory.forms) {
6532
+ if (form.pageStart != null) {
6533
+ const end = form.pageEnd ?? form.pageStart;
6534
+ for (let p = form.pageStart; p <= end; p += 1) {
6535
+ const types = pageFormTypes.get(p) ?? /* @__PURE__ */ new Set();
6536
+ types.add(form.formType);
6537
+ pageFormTypes.set(p, types);
6538
+ }
6539
+ }
6540
+ }
6541
+ }
6542
+ return pageAssignments.map((assignment) => {
6543
+ let extractorNames = [...new Set(
6544
+ (assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"]).filter(Boolean)
6545
+ )];
6546
+ const hasDeclarations = extractorNames.includes("declarations");
6547
+ const hasConditions = extractorNames.includes("conditions");
6548
+ const hasExclusions = extractorNames.includes("exclusions");
6549
+ const hasEndorsements = extractorNames.includes("endorsements");
6550
+ const looksLikeScheduleValues = assignment.hasScheduleValues === true;
6551
+ const roleBlocksCoverageLimits = assignment.pageRole === "policy_form" || assignment.pageRole === "condition_exclusion_form" || assignment.pageRole === "endorsement_form";
6552
+ const inventoryTypes = pageFormTypes.get(assignment.localPageNumber);
6553
+ const inventoryBlocksCoverageLimits = inventoryTypes != null && !looksLikeScheduleValues && !hasDeclarations && (inventoryTypes.has("endorsement") || inventoryTypes.has("notice") || inventoryTypes.has("application"));
6554
+ if (extractorNames.includes("coverage_limits")) {
6555
+ const shouldDropCoverageLimits = inventoryBlocksCoverageLimits || !looksLikeScheduleValues && roleBlocksCoverageLimits || !hasDeclarations && !looksLikeScheduleValues && (hasConditions || hasExclusions) || !hasDeclarations && !looksLikeScheduleValues && hasEndorsements;
6556
+ if (shouldDropCoverageLimits) {
6557
+ extractorNames = extractorNames.filter((name) => name !== "coverage_limits");
6558
+ }
6559
+ }
6560
+ if (inventoryTypes?.has("endorsement") && !extractorNames.includes("endorsements")) {
6561
+ extractorNames = [...extractorNames, "endorsements"];
6562
+ }
6563
+ if (extractorNames.length === 0) {
6564
+ extractorNames = ["sections"];
6565
+ }
6566
+ return {
6567
+ ...assignment,
6568
+ extractorNames
6569
+ };
6570
+ });
6571
+ }
6572
+ function buildTemplateHints(primaryType, documentType, pageCount, template) {
6573
+ return [
6574
+ `Document type: ${primaryType} ${documentType}`,
6575
+ `Expected sections: ${template.expectedSections.join(", ")}`,
6576
+ `Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
6577
+ `Total pages: ${pageCount}`
6578
+ ].join("\n");
6579
+ }
6580
+ function groupContiguousPages(pages) {
6581
+ if (pages.length === 0) return [];
6582
+ const sorted = [...new Set(pages)].sort((a, b) => a - b);
6583
+ const ranges = [];
6584
+ let start = sorted[0];
6585
+ let previous = sorted[0];
6586
+ for (let i = 1; i < sorted.length; i += 1) {
6587
+ const current = sorted[i];
6588
+ if (current === previous + 1) {
6589
+ previous = current;
6590
+ continue;
6591
+ }
6592
+ ranges.push({ startPage: start, endPage: previous });
6593
+ start = current;
6594
+ previous = current;
6595
+ }
6596
+ ranges.push({ startPage: start, endPage: previous });
6597
+ return ranges;
6598
+ }
6599
+ function buildPlanFromPageAssignments(pageAssignments, pageCount, formInventory) {
6600
+ const extractorPages = /* @__PURE__ */ new Map();
6601
+ for (const assignment of pageAssignments) {
6602
+ const extractors = assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"];
6603
+ for (const extractorName of extractors) {
6604
+ extractorPages.set(extractorName, [...extractorPages.get(extractorName) ?? [], assignment.localPageNumber]);
6605
+ }
6606
+ }
6607
+ const coveredPages = /* @__PURE__ */ new Set();
6608
+ for (const pages of extractorPages.values()) {
6609
+ for (const page of pages) coveredPages.add(page);
6610
+ }
6611
+ for (let page = 1; page <= pageCount; page += 1) {
6612
+ if (!coveredPages.has(page)) {
6613
+ extractorPages.set("sections", [...extractorPages.get("sections") ?? [], page]);
6614
+ }
6615
+ }
6616
+ const contextualExtractors = /* @__PURE__ */ new Set(["conditions", "covered_reasons", "definitions", "exclusions", "endorsements"]);
6617
+ const contextualForms = (formInventory?.forms ?? []).filter(
6618
+ (form) => form.pageStart != null && (form.pageEnd ?? form.pageStart) != null
6619
+ );
6620
+ const expandPagesToFormRanges = (extractorName, pages) => {
6621
+ if (!contextualExtractors.has(extractorName)) return pages;
6622
+ const expanded = new Set(pages);
6623
+ for (const page of pages) {
6624
+ for (const form of contextualForms) {
6625
+ const pageStart = form.pageStart;
6626
+ const pageEnd = form.pageEnd ?? form.pageStart;
6627
+ const formType = form.formType;
6628
+ const supportsContextualExpansion = extractorName === "endorsements" ? formType === "endorsement" : formType === "coverage" || formType === "endorsement";
6629
+ if (!supportsContextualExpansion) continue;
6630
+ if (page < pageStart || page > pageEnd) continue;
6631
+ for (let current = pageStart; current <= pageEnd; current += 1) {
6632
+ expanded.add(current);
6633
+ }
6634
+ }
6635
+ }
6636
+ return [...expanded].sort((a, b) => a - b);
6637
+ };
6638
+ const tasks = [...extractorPages.entries()].flatMap(
6639
+ ([extractorName, pages]) => groupContiguousPages(expandPagesToFormRanges(extractorName, pages)).map(({ startPage, endPage }) => ({
6640
+ extractorName,
6641
+ startPage,
6642
+ endPage,
6643
+ description: `Page-mapped ${extractorName} extraction for pages ${startPage}-${endPage}`
6644
+ }))
6645
+ ).sort((a, b) => a.startPage - b.startPage || a.extractorName.localeCompare(b.extractorName));
6646
+ return {
6647
+ tasks,
6648
+ pageMap: [...extractorPages.entries()].map(([section, pages]) => ({
6649
+ section,
6650
+ pages: `pages ${[...new Set(pages)].sort((a, b) => a - b).join(", ")}`
6651
+ }))
6652
+ };
6653
+ }
6654
+
5666
6655
  // src/extraction/coordinator.ts
5667
6656
  function createExtractor(config) {
5668
6657
  const {
@@ -5679,6 +6668,7 @@ function createExtractor(config) {
5679
6668
  onCheckpointSave
5680
6669
  } = config;
5681
6670
  const limit = pLimit(concurrency);
6671
+ const extractorCatalog = formatExtractorCatalogForPrompt();
5682
6672
  let totalUsage = { inputTokens: 0, outputTokens: 0 };
5683
6673
  let modelCalls = 0;
5684
6674
  let callsWithUsage = 0;
@@ -5699,32 +6689,56 @@ function createExtractor(config) {
5699
6689
  memory.set(name, mergeExtractorResult(name, existing, data));
5700
6690
  }
5701
6691
  function summarizeExtraction(memory) {
5702
- const coverageResult = memory.get("coverage_limits");
5703
- const declarationResult = memory.get("declarations");
5704
- const endorsementResult = memory.get("endorsements");
5705
- const exclusionResult = memory.get("exclusions");
5706
- const conditionResult = memory.get("conditions");
5707
- const sectionResult = memory.get("sections");
5708
- const coverageSummary = Array.isArray(coverageResult?.coverages) ? coverageResult.coverages.slice(0, 12).map((coverage) => ({
6692
+ const declarationResult = readMemoryRecord(memory, "declarations");
6693
+ const endorsements = readRecordArray(readMemoryRecord(memory, "endorsements"), "endorsements") ?? [];
6694
+ const exclusions = readRecordArray(readMemoryRecord(memory, "exclusions"), "exclusions") ?? [];
6695
+ const conditions = readRecordArray(readMemoryRecord(memory, "conditions"), "conditions") ?? [];
6696
+ const sections = getSections(memory) ?? [];
6697
+ const definitions = getDefinitions(memory) ?? sections.filter((section) => section.type === "definition");
6698
+ const coveredReasons = getCoveredReasons(memory) ?? sections.filter(looksCoveredReasonSection);
6699
+ const coverages = getCoverageLimitCoverages(memory);
6700
+ const coverageSummary = coverages.slice(0, 12).map((coverage) => ({
5709
6701
  name: coverage.name,
5710
6702
  limit: coverage.limit,
5711
6703
  deductible: coverage.deductible,
5712
6704
  formNumber: coverage.formNumber
5713
- })) : [];
6705
+ }));
5714
6706
  return JSON.stringify({
5715
6707
  extractedKeys: [...memory.keys()].filter((key) => key !== "classify"),
5716
6708
  declarationFieldCount: Array.isArray(declarationResult?.fields) ? declarationResult.fields.length : 0,
5717
- coverageCount: Array.isArray(coverageResult?.coverages) ? coverageResult.coverages.length : 0,
6709
+ coverageCount: coverages.length,
5718
6710
  coverageSamples: coverageSummary,
5719
- endorsementCount: Array.isArray(endorsementResult?.endorsements) ? endorsementResult.endorsements.length : 0,
5720
- exclusionCount: Array.isArray(exclusionResult?.exclusions) ? exclusionResult.exclusions.length : 0,
5721
- conditionCount: Array.isArray(conditionResult?.conditions) ? conditionResult.conditions.length : 0,
5722
- sectionCount: Array.isArray(sectionResult?.sections) ? sectionResult.sections.length : 0
6711
+ endorsementCount: endorsements.length,
6712
+ exclusionCount: exclusions.length,
6713
+ conditionCount: conditions.length,
6714
+ definitionCount: definitions.length,
6715
+ coveredReasonCount: coveredReasons.length,
6716
+ sectionCount: sections.length
5723
6717
  }, null, 2);
5724
6718
  }
6719
+ function textIncludesSupplementarySignal(value) {
6720
+ if (typeof value !== "string") return false;
6721
+ return /\b(supplementary|regulatory|department of insurance|ombudsman|complaint|claim|claims|contact|phone|email|cancellation|cancelled|nonrenewal|non-renewal|non renew|notice|governing law|jurisdiction|third[- ]party administrator|tpa)\b/i.test(value);
6722
+ }
6723
+ function hasSupplementaryExtractionSignal(pageAssignments, formInventory, memory) {
6724
+ const hasPageSignal = pageAssignments.some(
6725
+ (assignment) => assignment.pageRole === "supplementary" || assignment.extractorNames.includes("supplementary") || textIncludesSupplementarySignal(assignment.notes)
6726
+ );
6727
+ if (hasPageSignal) return true;
6728
+ const hasFormSignal = (formInventory?.forms ?? []).some(
6729
+ (form) => form.formType === "notice" || textIncludesSupplementarySignal(form.title) || textIncludesSupplementarySignal(form.formNumber)
6730
+ );
6731
+ if (hasFormSignal) return true;
6732
+ const likelySupplementaryKeys = ["sections", "conditions", "endorsements", "exclusions"];
6733
+ return likelySupplementaryKeys.some((key) => {
6734
+ const value = memory.get(key);
6735
+ if (!value) return false;
6736
+ return textIncludesSupplementarySignal(JSON.stringify(value));
6737
+ });
6738
+ }
5725
6739
  function buildAlreadyExtractedSummary(memory) {
5726
6740
  const lines = [];
5727
- const declarationResult = memory.get("declarations");
6741
+ const declarationResult = readMemoryRecord(memory, "declarations");
5728
6742
  if (Array.isArray(declarationResult?.fields)) {
5729
6743
  for (const field of declarationResult.fields) {
5730
6744
  if (field.key && field.value) {
@@ -5733,20 +6747,17 @@ function createExtractor(config) {
5733
6747
  }
5734
6748
  }
5735
6749
  }
5736
- const coverageResult = memory.get("coverage_limits");
5737
- if (Array.isArray(coverageResult?.coverages)) {
5738
- for (const cov of coverageResult.coverages) {
5739
- const parts = [cov.name, cov.limit && `limit=${cov.limit}`, cov.deductible && `deductible=${cov.deductible}`].filter(Boolean);
5740
- if (parts.length > 0) lines.push(`- coverage: ${parts.join(", ")}`);
5741
- }
6750
+ for (const cov of getCoverageLimitCoverages(memory)) {
6751
+ const parts = [cov.name, cov.limit && `limit=${cov.limit}`, cov.deductible && `deductible=${cov.deductible}`].filter(Boolean);
6752
+ if (parts.length > 0) lines.push(`- coverage: ${parts.join(", ")}`);
5742
6753
  }
5743
- const namedInsured = memory.get("named_insured");
6754
+ const namedInsured = getNamedInsured(memory);
5744
6755
  if (namedInsured) {
5745
6756
  for (const [key, value] of Object.entries(namedInsured)) {
5746
6757
  if (value && typeof value === "string") lines.push(`- ${key}: ${value}`);
5747
6758
  }
5748
6759
  }
5749
- const carrierInfo = memory.get("carrier_info");
6760
+ const carrierInfo = getCarrierInfo(memory);
5750
6761
  if (carrierInfo) {
5751
6762
  for (const [key, value] of Object.entries(carrierInfo)) {
5752
6763
  if (value && typeof value === "string") lines.push(`- ${key}: ${value}`);
@@ -5754,141 +6765,43 @@ function createExtractor(config) {
5754
6765
  }
5755
6766
  return lines.length > 0 ? lines.join("\n") : "";
5756
6767
  }
5757
- function formatPageMapSummary(pageAssignments) {
5758
- const extractorPages = /* @__PURE__ */ new Map();
5759
- for (const assignment of pageAssignments) {
5760
- for (const extractorName of assignment.extractorNames) {
5761
- extractorPages.set(extractorName, [...extractorPages.get(extractorName) ?? [], assignment.localPageNumber]);
5762
- }
5763
- }
5764
- if (extractorPages.size === 0) return "No page assignments available.";
5765
- return [...extractorPages.entries()].map(([extractorName, pages]) => `${extractorName}: pages ${pages.join(", ")}`).join("\n");
5766
- }
5767
- function normalizePageAssignments(pageAssignments, formInventory) {
5768
- const pageFormTypes = /* @__PURE__ */ new Map();
5769
- if (formInventory) {
5770
- for (const form of formInventory.forms) {
5771
- if (form.pageStart != null) {
5772
- const end = form.pageEnd ?? form.pageStart;
5773
- for (let p = form.pageStart; p <= end; p++) {
5774
- const types = pageFormTypes.get(p) ?? /* @__PURE__ */ new Set();
5775
- types.add(form.formType);
5776
- pageFormTypes.set(p, types);
5777
- }
5778
- }
5779
- }
6768
+ async function runFocusedExtractorTask(task, pdfInput, memory) {
6769
+ if (task.extractorName === "supplementary") {
6770
+ const alreadyExtractedSummary = buildAlreadyExtractedSummary(memory);
6771
+ const result = await runExtractor({
6772
+ name: "supplementary",
6773
+ prompt: buildSupplementaryPrompt(alreadyExtractedSummary),
6774
+ schema: SupplementarySchema,
6775
+ pdfInput,
6776
+ startPage: task.startPage,
6777
+ endPage: task.endPage,
6778
+ generateObject,
6779
+ convertPdfToImages,
6780
+ maxTokens: 4096,
6781
+ providerOptions
6782
+ });
6783
+ trackUsage(result.usage);
6784
+ return result;
5780
6785
  }
5781
- return pageAssignments.map((assignment) => {
5782
- let extractorNames = [...new Set(
5783
- (assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"]).filter(Boolean)
5784
- )];
5785
- const hasDeclarations = extractorNames.includes("declarations");
5786
- const hasConditions = extractorNames.includes("conditions");
5787
- const hasExclusions = extractorNames.includes("exclusions");
5788
- const hasEndorsements = extractorNames.includes("endorsements");
5789
- const looksLikeScheduleValues = assignment.hasScheduleValues === true;
5790
- const roleBlocksCoverageLimits = assignment.pageRole === "policy_form" || assignment.pageRole === "condition_exclusion_form" || assignment.pageRole === "endorsement_form";
5791
- const inventoryTypes = pageFormTypes.get(assignment.localPageNumber);
5792
- const inventoryBlocksCoverageLimits = inventoryTypes != null && !looksLikeScheduleValues && !hasDeclarations && (inventoryTypes.has("endorsement") || inventoryTypes.has("notice") || inventoryTypes.has("application"));
5793
- if (extractorNames.includes("coverage_limits")) {
5794
- const shouldDropCoverageLimits = inventoryBlocksCoverageLimits || !looksLikeScheduleValues && roleBlocksCoverageLimits || !hasDeclarations && !looksLikeScheduleValues && (hasConditions || hasExclusions) || !hasDeclarations && !looksLikeScheduleValues && hasEndorsements;
5795
- if (shouldDropCoverageLimits) {
5796
- extractorNames = extractorNames.filter((name) => name !== "coverage_limits");
5797
- }
5798
- }
5799
- if (inventoryTypes?.has("endorsement") && !extractorNames.includes("endorsements")) {
5800
- extractorNames = [...extractorNames, "endorsements"];
5801
- }
5802
- if (extractorNames.length === 0) {
5803
- extractorNames = ["sections"];
5804
- }
5805
- return {
5806
- ...assignment,
5807
- extractorNames
5808
- };
6786
+ return runFocusedExtractorWithFallback({
6787
+ task,
6788
+ pdfInput,
6789
+ generateObject,
6790
+ convertPdfToImages,
6791
+ providerOptions,
6792
+ trackUsage,
6793
+ log
5809
6794
  });
5810
6795
  }
5811
- function buildTemplateHints(primaryType, documentType, pageCount, template) {
5812
- return [
5813
- `Document type: ${primaryType} ${documentType}`,
5814
- `Expected sections: ${template.expectedSections.join(", ")}`,
5815
- `Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
5816
- `Total pages: ${pageCount}`
5817
- ].join("\n");
5818
- }
5819
- function groupContiguousPages(pages) {
5820
- if (pages.length === 0) return [];
5821
- const sorted = [...new Set(pages)].sort((a, b) => a - b);
5822
- const ranges = [];
5823
- let start = sorted[0];
5824
- let previous = sorted[0];
5825
- for (let i = 1; i < sorted.length; i += 1) {
5826
- const current = sorted[i];
5827
- if (current === previous + 1) {
5828
- previous = current;
5829
- continue;
5830
- }
5831
- ranges.push({ startPage: start, endPage: previous });
5832
- start = current;
5833
- previous = current;
5834
- }
5835
- ranges.push({ startPage: start, endPage: previous });
5836
- return ranges;
5837
- }
5838
- function buildPlanFromPageAssignments(pageAssignments, pageCount, formInventory) {
6796
+ function formatPageMapSummary(pageAssignments) {
5839
6797
  const extractorPages = /* @__PURE__ */ new Map();
5840
6798
  for (const assignment of pageAssignments) {
5841
- const extractors = assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"];
5842
- for (const extractorName of extractors) {
6799
+ for (const extractorName of assignment.extractorNames) {
5843
6800
  extractorPages.set(extractorName, [...extractorPages.get(extractorName) ?? [], assignment.localPageNumber]);
5844
6801
  }
5845
6802
  }
5846
- const coveredPages = /* @__PURE__ */ new Set();
5847
- for (const pages of extractorPages.values()) {
5848
- for (const page of pages) coveredPages.add(page);
5849
- }
5850
- for (let page = 1; page <= pageCount; page += 1) {
5851
- if (!coveredPages.has(page)) {
5852
- extractorPages.set("sections", [...extractorPages.get("sections") ?? [], page]);
5853
- }
5854
- }
5855
- const contextualExtractors = /* @__PURE__ */ new Set(["conditions", "exclusions", "endorsements"]);
5856
- const contextualForms = (formInventory?.forms ?? []).filter(
5857
- (form) => form.pageStart != null && (form.pageEnd ?? form.pageStart) != null
5858
- );
5859
- const expandPagesToFormRanges = (extractorName, pages) => {
5860
- if (!contextualExtractors.has(extractorName)) return pages;
5861
- const expanded = new Set(pages);
5862
- for (const page of pages) {
5863
- for (const form of contextualForms) {
5864
- const pageStart = form.pageStart;
5865
- const pageEnd = form.pageEnd ?? form.pageStart;
5866
- const formType = form.formType;
5867
- const supportsContextualExpansion = extractorName === "endorsements" ? formType === "endorsement" : formType === "coverage" || formType === "endorsement";
5868
- if (!supportsContextualExpansion) continue;
5869
- if (page < pageStart || page > pageEnd) continue;
5870
- for (let current = pageStart; current <= pageEnd; current += 1) {
5871
- expanded.add(current);
5872
- }
5873
- }
5874
- }
5875
- return [...expanded].sort((a, b) => a - b);
5876
- };
5877
- const tasks = [...extractorPages.entries()].flatMap(
5878
- ([extractorName, pages]) => groupContiguousPages(expandPagesToFormRanges(extractorName, pages)).map(({ startPage, endPage }) => ({
5879
- extractorName,
5880
- startPage,
5881
- endPage,
5882
- description: `Page-mapped ${extractorName} extraction for pages ${startPage}-${endPage}`
5883
- }))
5884
- ).sort((a, b) => a.startPage - b.startPage || a.extractorName.localeCompare(b.extractorName));
5885
- return {
5886
- tasks,
5887
- pageMap: [...extractorPages.entries()].map(([section, pages]) => ({
5888
- section,
5889
- pages: `pages ${[...new Set(pages)].sort((a, b) => a - b).join(", ")}`
5890
- }))
5891
- };
6803
+ if (extractorPages.size === 0) return "No page assignments available.";
6804
+ return [...extractorPages.entries()].map(([extractorName, pages]) => `${extractorName}: ${pages.length} page(s), pages ${pages.join(", ")}`).join("\n");
5892
6805
  }
5893
6806
  async function extract(pdfInput, documentId, options) {
5894
6807
  const id = documentId ?? `doc-${Date.now()}`;
@@ -5900,7 +6813,8 @@ function createExtractor(config) {
5900
6813
  const pipelineCtx = createPipelineContext({
5901
6814
  id,
5902
6815
  onSave: onCheckpointSave,
5903
- resumeFrom: options?.resumeFrom
6816
+ resumeFrom: options?.resumeFrom,
6817
+ phaseOrder: ["classify", "form_inventory", "page_map", "plan", "extract", "resolve_referential", "review", "assemble"]
5904
6818
  });
5905
6819
  const resumed = pipelineCtx.getCheckpoint()?.state;
5906
6820
  if (resumed?.memory) {
@@ -6068,40 +6982,18 @@ function createExtractor(config) {
6068
6982
  const extractorResults = await Promise.all(
6069
6983
  tasks.map(
6070
6984
  (task) => limit(async () => {
6071
- const ext = getExtractor(task.extractorName);
6072
- if (!ext) {
6073
- await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
6074
- return null;
6075
- }
6076
6985
  onProgress?.(`Extracting ${task.extractorName} (pages ${task.startPage}-${task.endPage})...`);
6077
- try {
6078
- const result = await runExtractor({
6079
- name: task.extractorName,
6080
- prompt: ext.buildPrompt(),
6081
- schema: ext.schema,
6082
- pdfInput,
6083
- startPage: task.startPage,
6084
- endPage: task.endPage,
6085
- generateObject,
6086
- convertPdfToImages,
6087
- maxTokens: ext.maxTokens ?? 4096,
6088
- providerOptions
6089
- });
6090
- trackUsage(result.usage);
6091
- return result;
6092
- } catch (error) {
6093
- await log?.(`Extractor ${task.extractorName} failed: ${error}`);
6094
- return null;
6095
- }
6986
+ return runFocusedExtractorTask(task, pdfInput, memory);
6096
6987
  })
6097
6988
  )
6098
6989
  );
6099
- for (const result of extractorResults) {
6990
+ for (const result of extractorResults.flatMap((item) => Array.isArray(item) ? item : item ? [item] : [])) {
6100
6991
  if (result) {
6101
6992
  mergeMemoryResult(result.name, result.data, memory);
6102
6993
  }
6103
6994
  }
6104
- {
6995
+ const planIncludesSupplementary = tasks.some((task) => task.extractorName === "supplementary");
6996
+ if (!planIncludesSupplementary && hasSupplementaryExtractionSignal(pageAssignments, formInventory, memory)) {
6105
6997
  onProgress?.("Extracting supplementary retrieval facts...");
6106
6998
  try {
6107
6999
  const alreadyExtractedSummary = buildAlreadyExtractedSummary(memory);
@@ -6175,7 +7067,7 @@ function createExtractor(config) {
6175
7067
  const reviewResponse = await safeGenerateObject(
6176
7068
  generateObject,
6177
7069
  {
6178
- prompt: buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary),
7070
+ prompt: buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary, extractorCatalog),
6179
7071
  schema: ReviewResultSchema,
6180
7072
  maxTokens: 1536,
6181
7073
  providerOptions: await buildPdfProviderOptions(pdfInput, providerOptions)
@@ -6199,31 +7091,11 @@ function createExtractor(config) {
6199
7091
  const followUpResults = await Promise.all(
6200
7092
  reviewResponse.object.additionalTasks.map(
6201
7093
  (task) => limit(async () => {
6202
- const ext = getExtractor(task.extractorName);
6203
- if (!ext) return null;
6204
- try {
6205
- const result = await runExtractor({
6206
- name: task.extractorName,
6207
- prompt: ext.buildPrompt(),
6208
- schema: ext.schema,
6209
- pdfInput,
6210
- startPage: task.startPage,
6211
- endPage: task.endPage,
6212
- generateObject,
6213
- convertPdfToImages,
6214
- maxTokens: ext.maxTokens ?? 4096,
6215
- providerOptions
6216
- });
6217
- trackUsage(result.usage);
6218
- return result;
6219
- } catch (error) {
6220
- await log?.(`Follow-up extractor ${task.extractorName} failed: ${error}`);
6221
- return null;
6222
- }
7094
+ return runFocusedExtractorTask(task, pdfInput, memory);
6223
7095
  })
6224
7096
  )
6225
7097
  );
6226
- for (const result of followUpResults) {
7098
+ for (const result of followUpResults.flatMap((item) => Array.isArray(item) ? item : item ? [item] : [])) {
6227
7099
  if (result) {
6228
7100
  mergeMemoryResult(result.name, result.data, memory);
6229
7101
  }
@@ -6539,8 +7411,8 @@ Respond with JSON only:
6539
7411
  }`;
6540
7412
 
6541
7413
  // src/schemas/application.ts
6542
- import { z as z36 } from "zod";
6543
- var FieldTypeSchema = z36.enum([
7414
+ import { z as z38 } from "zod";
7415
+ var FieldTypeSchema = z38.enum([
6544
7416
  "text",
6545
7417
  "numeric",
6546
7418
  "currency",
@@ -6549,131 +7421,131 @@ var FieldTypeSchema = z36.enum([
6549
7421
  "table",
6550
7422
  "declaration"
6551
7423
  ]);
6552
- var ApplicationFieldSchema = z36.object({
6553
- id: z36.string(),
6554
- label: z36.string(),
6555
- section: z36.string(),
7424
+ var ApplicationFieldSchema = z38.object({
7425
+ id: z38.string(),
7426
+ label: z38.string(),
7427
+ section: z38.string(),
6556
7428
  fieldType: FieldTypeSchema,
6557
- required: z36.boolean(),
6558
- options: z36.array(z36.string()).optional(),
6559
- columns: z36.array(z36.string()).optional(),
6560
- requiresExplanationIfYes: z36.boolean().optional(),
6561
- condition: z36.object({
6562
- dependsOn: z36.string(),
6563
- whenValue: z36.string()
7429
+ required: z38.boolean(),
7430
+ options: z38.array(z38.string()).optional(),
7431
+ columns: z38.array(z38.string()).optional(),
7432
+ requiresExplanationIfYes: z38.boolean().optional(),
7433
+ condition: z38.object({
7434
+ dependsOn: z38.string(),
7435
+ whenValue: z38.string()
6564
7436
  }).optional(),
6565
- value: z36.string().optional(),
6566
- source: z36.string().optional().describe("Where the value came from: auto-fill, user, lookup"),
6567
- confidence: z36.enum(["confirmed", "high", "medium", "low"]).optional()
6568
- });
6569
- var ApplicationClassifyResultSchema = z36.object({
6570
- isApplication: z36.boolean(),
6571
- confidence: z36.number().min(0).max(1),
6572
- applicationType: z36.string().nullable()
6573
- });
6574
- var FieldExtractionResultSchema = z36.object({
6575
- fields: z36.array(ApplicationFieldSchema)
6576
- });
6577
- var AutoFillMatchSchema = z36.object({
6578
- fieldId: z36.string(),
6579
- value: z36.string(),
6580
- confidence: z36.enum(["confirmed"]),
6581
- contextKey: z36.string()
6582
- });
6583
- var AutoFillResultSchema = z36.object({
6584
- matches: z36.array(AutoFillMatchSchema)
6585
- });
6586
- var QuestionBatchResultSchema = z36.object({
6587
- batches: z36.array(z36.array(z36.string()).describe("Array of field IDs in this batch"))
6588
- });
6589
- var LookupRequestSchema = z36.object({
6590
- type: z36.string().describe("Type of lookup: 'records', 'website', 'policy'"),
6591
- description: z36.string(),
6592
- url: z36.string().optional(),
6593
- targetFieldIds: z36.array(z36.string())
6594
- });
6595
- var ReplyIntentSchema = z36.object({
6596
- primaryIntent: z36.enum(["answers_only", "question", "lookup_request", "mixed"]),
6597
- hasAnswers: z36.boolean(),
6598
- questionText: z36.string().optional(),
6599
- questionFieldIds: z36.array(z36.string()).optional(),
6600
- lookupRequests: z36.array(LookupRequestSchema).optional()
6601
- });
6602
- var ParsedAnswerSchema = z36.object({
6603
- fieldId: z36.string(),
6604
- value: z36.string(),
6605
- explanation: z36.string().optional()
6606
- });
6607
- var AnswerParsingResultSchema = z36.object({
6608
- answers: z36.array(ParsedAnswerSchema),
6609
- unanswered: z36.array(z36.string()).describe("Field IDs that were not answered")
6610
- });
6611
- var LookupFillSchema = z36.object({
6612
- fieldId: z36.string(),
6613
- value: z36.string(),
6614
- source: z36.string().describe("Specific citable reference, e.g. 'GL Policy #POL-12345 (Hartford)'")
6615
- });
6616
- var LookupFillResultSchema = z36.object({
6617
- fills: z36.array(LookupFillSchema),
6618
- unfillable: z36.array(z36.string()),
6619
- explanation: z36.string().optional()
6620
- });
6621
- var FlatPdfPlacementSchema = z36.object({
6622
- fieldId: z36.string(),
6623
- page: z36.number(),
6624
- x: z36.number().describe("Percentage from left edge (0-100)"),
6625
- y: z36.number().describe("Percentage from top edge (0-100)"),
6626
- text: z36.string(),
6627
- fontSize: z36.number().optional(),
6628
- isCheckmark: z36.boolean().optional()
6629
- });
6630
- var AcroFormMappingSchema = z36.object({
6631
- fieldId: z36.string(),
6632
- acroFormName: z36.string(),
6633
- value: z36.string()
6634
- });
6635
- var QualityGateStatusSchema = z36.enum(["passed", "warning", "failed"]);
6636
- var QualitySeveritySchema = z36.enum(["info", "warning", "blocking"]);
6637
- var ApplicationQualityIssueSchema = z36.object({
6638
- code: z36.string(),
7437
+ value: z38.string().optional(),
7438
+ source: z38.string().optional().describe("Where the value came from: auto-fill, user, lookup"),
7439
+ confidence: z38.enum(["confirmed", "high", "medium", "low"]).optional()
7440
+ });
7441
+ var ApplicationClassifyResultSchema = z38.object({
7442
+ isApplication: z38.boolean(),
7443
+ confidence: z38.number().min(0).max(1),
7444
+ applicationType: z38.string().nullable()
7445
+ });
7446
+ var FieldExtractionResultSchema = z38.object({
7447
+ fields: z38.array(ApplicationFieldSchema)
7448
+ });
7449
+ var AutoFillMatchSchema = z38.object({
7450
+ fieldId: z38.string(),
7451
+ value: z38.string(),
7452
+ confidence: z38.enum(["confirmed"]),
7453
+ contextKey: z38.string()
7454
+ });
7455
+ var AutoFillResultSchema = z38.object({
7456
+ matches: z38.array(AutoFillMatchSchema)
7457
+ });
7458
+ var QuestionBatchResultSchema = z38.object({
7459
+ batches: z38.array(z38.array(z38.string()).describe("Array of field IDs in this batch"))
7460
+ });
7461
+ var LookupRequestSchema = z38.object({
7462
+ type: z38.string().describe("Type of lookup: 'records', 'website', 'policy'"),
7463
+ description: z38.string(),
7464
+ url: z38.string().optional(),
7465
+ targetFieldIds: z38.array(z38.string())
7466
+ });
7467
+ var ReplyIntentSchema = z38.object({
7468
+ primaryIntent: z38.enum(["answers_only", "question", "lookup_request", "mixed"]),
7469
+ hasAnswers: z38.boolean(),
7470
+ questionText: z38.string().optional(),
7471
+ questionFieldIds: z38.array(z38.string()).optional(),
7472
+ lookupRequests: z38.array(LookupRequestSchema).optional()
7473
+ });
7474
+ var ParsedAnswerSchema = z38.object({
7475
+ fieldId: z38.string(),
7476
+ value: z38.string(),
7477
+ explanation: z38.string().optional()
7478
+ });
7479
+ var AnswerParsingResultSchema = z38.object({
7480
+ answers: z38.array(ParsedAnswerSchema),
7481
+ unanswered: z38.array(z38.string()).describe("Field IDs that were not answered")
7482
+ });
7483
+ var LookupFillSchema = z38.object({
7484
+ fieldId: z38.string(),
7485
+ value: z38.string(),
7486
+ source: z38.string().describe("Specific citable reference, e.g. 'GL Policy #POL-12345 (Hartford)'")
7487
+ });
7488
+ var LookupFillResultSchema = z38.object({
7489
+ fills: z38.array(LookupFillSchema),
7490
+ unfillable: z38.array(z38.string()),
7491
+ explanation: z38.string().optional()
7492
+ });
7493
+ var FlatPdfPlacementSchema = z38.object({
7494
+ fieldId: z38.string(),
7495
+ page: z38.number(),
7496
+ x: z38.number().describe("Percentage from left edge (0-100)"),
7497
+ y: z38.number().describe("Percentage from top edge (0-100)"),
7498
+ text: z38.string(),
7499
+ fontSize: z38.number().optional(),
7500
+ isCheckmark: z38.boolean().optional()
7501
+ });
7502
+ var AcroFormMappingSchema = z38.object({
7503
+ fieldId: z38.string(),
7504
+ acroFormName: z38.string(),
7505
+ value: z38.string()
7506
+ });
7507
+ var QualityGateStatusSchema = z38.enum(["passed", "warning", "failed"]);
7508
+ var QualitySeveritySchema = z38.enum(["info", "warning", "blocking"]);
7509
+ var ApplicationQualityIssueSchema = z38.object({
7510
+ code: z38.string(),
6639
7511
  severity: QualitySeveritySchema,
6640
- message: z36.string(),
6641
- fieldId: z36.string().optional()
7512
+ message: z38.string(),
7513
+ fieldId: z38.string().optional()
6642
7514
  });
6643
- var ApplicationQualityRoundSchema = z36.object({
6644
- round: z36.number(),
6645
- kind: z36.string(),
7515
+ var ApplicationQualityRoundSchema = z38.object({
7516
+ round: z38.number(),
7517
+ kind: z38.string(),
6646
7518
  status: QualityGateStatusSchema,
6647
- summary: z36.string().optional()
7519
+ summary: z38.string().optional()
6648
7520
  });
6649
- var ApplicationQualityArtifactSchema = z36.object({
6650
- kind: z36.string(),
6651
- label: z36.string().optional(),
6652
- itemCount: z36.number().optional()
7521
+ var ApplicationQualityArtifactSchema = z38.object({
7522
+ kind: z38.string(),
7523
+ label: z38.string().optional(),
7524
+ itemCount: z38.number().optional()
6653
7525
  });
6654
- var ApplicationEmailReviewSchema = z36.object({
6655
- issues: z36.array(ApplicationQualityIssueSchema),
7526
+ var ApplicationEmailReviewSchema = z38.object({
7527
+ issues: z38.array(ApplicationQualityIssueSchema),
6656
7528
  qualityGateStatus: QualityGateStatusSchema
6657
7529
  });
6658
- var ApplicationQualityReportSchema = z36.object({
6659
- issues: z36.array(ApplicationQualityIssueSchema),
6660
- rounds: z36.array(ApplicationQualityRoundSchema).optional(),
6661
- artifacts: z36.array(ApplicationQualityArtifactSchema).optional(),
7530
+ var ApplicationQualityReportSchema = z38.object({
7531
+ issues: z38.array(ApplicationQualityIssueSchema),
7532
+ rounds: z38.array(ApplicationQualityRoundSchema).optional(),
7533
+ artifacts: z38.array(ApplicationQualityArtifactSchema).optional(),
6662
7534
  emailReview: ApplicationEmailReviewSchema.optional(),
6663
7535
  qualityGateStatus: QualityGateStatusSchema
6664
7536
  });
6665
- var ApplicationStateSchema = z36.object({
6666
- id: z36.string(),
6667
- pdfBase64: z36.string().optional().describe("Original PDF, omitted after extraction"),
6668
- title: z36.string().optional(),
6669
- applicationType: z36.string().nullable().optional(),
6670
- fields: z36.array(ApplicationFieldSchema),
6671
- batches: z36.array(z36.array(z36.string())).optional(),
6672
- currentBatchIndex: z36.number().default(0),
7537
+ var ApplicationStateSchema = z38.object({
7538
+ id: z38.string(),
7539
+ pdfBase64: z38.string().optional().describe("Original PDF, omitted after extraction"),
7540
+ title: z38.string().optional(),
7541
+ applicationType: z38.string().nullable().optional(),
7542
+ fields: z38.array(ApplicationFieldSchema),
7543
+ batches: z38.array(z38.array(z38.string())).optional(),
7544
+ currentBatchIndex: z38.number().default(0),
6673
7545
  qualityReport: ApplicationQualityReportSchema.optional(),
6674
- status: z36.enum(["classifying", "extracting", "auto_filling", "batching", "collecting", "confirming", "mapping", "complete"]),
6675
- createdAt: z36.number(),
6676
- updatedAt: z36.number()
7546
+ status: z38.enum(["classifying", "extracting", "auto_filling", "batching", "collecting", "confirming", "mapping", "complete"]),
7547
+ createdAt: z38.number(),
7548
+ updatedAt: z38.number()
6677
7549
  });
6678
7550
 
6679
7551
  // src/application/agents/classifier.ts
@@ -7262,6 +8134,70 @@ function reviewBatchEmail(text, batchFields) {
7262
8134
  };
7263
8135
  }
7264
8136
 
8137
+ // src/application/workflow.ts
8138
+ var MAX_DOCUMENT_SEARCH_FIELDS = 5;
8139
+ var LOW_VALUE_FIELD_RATIO_LIMIT = 0.6;
8140
+ function planApplicationWorkflow(input) {
8141
+ const unfilledFields = input.fields.filter(isUnfilled);
8142
+ const documentSearchFields = planDocumentSearchFields(
8143
+ unfilledFields,
8144
+ input.hasDocumentStore && input.hasMemoryStore
8145
+ );
8146
+ return {
8147
+ runBackfill: input.hasBackfillProvider && unfilledFields.length > 0,
8148
+ runContextAutoFill: input.orgContextCount > 0 && unfilledFields.length > 0,
8149
+ documentSearchFields,
8150
+ runBatching: unfilledFields.length > 0,
8151
+ unfilledFields
8152
+ };
8153
+ }
8154
+ function planReplyActions(input) {
8155
+ const hasCurrentFields = input.currentBatchFields.length > 0;
8156
+ const nextBatchNeedsAnswers = (input.nextBatchFields ?? []).some(isUnfilled);
8157
+ const hasLookupRequests = (input.intent.lookupRequests?.length ?? 0) > 0;
8158
+ return {
8159
+ parseAnswers: input.intent.hasAnswers && hasCurrentFields,
8160
+ runLookup: hasLookupRequests && input.hasDocumentStore,
8161
+ answerQuestion: Boolean(input.intent.questionText) && (input.intent.primaryIntent === "question" || input.intent.primaryIntent === "mixed"),
8162
+ advanceBatch: hasCurrentFields && input.currentBatchFields.every((field) => !isUnfilled(field)),
8163
+ generateNextEmail: nextBatchNeedsAnswers
8164
+ };
8165
+ }
8166
+ function planDocumentSearchFields(unfilledFields, hasStores) {
8167
+ if (!hasStores || unfilledFields.length === 0) return [];
8168
+ const searchableFields = unfilledFields.filter(isHighValueLookupField);
8169
+ if (searchableFields.length === 0) return [];
8170
+ const lowValueRatio = 1 - searchableFields.length / unfilledFields.length;
8171
+ if (unfilledFields.length > MAX_DOCUMENT_SEARCH_FIELDS && lowValueRatio > LOW_VALUE_FIELD_RATIO_LIMIT) {
8172
+ return [];
8173
+ }
8174
+ return searchableFields.slice(0, MAX_DOCUMENT_SEARCH_FIELDS);
8175
+ }
8176
+ function isUnfilled(field) {
8177
+ return field.value === void 0 || field.value.trim() === "";
8178
+ }
8179
+ function isHighValueLookupField(field) {
8180
+ const text = `${field.section} ${field.label}`.toLowerCase();
8181
+ if (field.required) return true;
8182
+ return [
8183
+ "carrier",
8184
+ "policy",
8185
+ "premium",
8186
+ "limit",
8187
+ "deductible",
8188
+ "insured",
8189
+ "address",
8190
+ "revenue",
8191
+ "payroll",
8192
+ "effective",
8193
+ "expiration",
8194
+ "coverage",
8195
+ "class code",
8196
+ "fein",
8197
+ "entity"
8198
+ ].some((term) => text.includes(term));
8199
+ }
8200
+
7265
8201
  // src/application/coordinator.ts
7266
8202
  function createApplicationPipeline(config) {
7267
8203
  const {
@@ -7360,27 +8296,37 @@ function createApplicationPipeline(config) {
7360
8296
  state.updatedAt = Date.now();
7361
8297
  await applicationStore?.save(state);
7362
8298
  onProgress?.(`Auto-filling ${fields.length} fields...`);
7363
- const fillTasks = [];
7364
- if (backfillProvider) {
7365
- fillTasks.push(
7366
- (async () => {
7367
- try {
7368
- const priorAnswers = await backfillFromPriorAnswers(fields, backfillProvider);
7369
- for (const pa of priorAnswers) {
7370
- const field = state.fields.find((f) => f.id === pa.fieldId);
7371
- if (field && !field.value && pa.relevance > 0.8) {
7372
- field.value = pa.value;
7373
- field.source = `backfill: ${pa.source}`;
7374
- field.confidence = "high";
7375
- }
7376
- }
7377
- } catch (e) {
7378
- await log?.(`Backfill failed: ${e}`);
8299
+ let workflowPlan = planApplicationWorkflow({
8300
+ fields: state.fields,
8301
+ hasBackfillProvider: Boolean(backfillProvider),
8302
+ orgContextCount: orgContext.length,
8303
+ hasDocumentStore: Boolean(documentStore),
8304
+ hasMemoryStore: Boolean(memoryStore)
8305
+ });
8306
+ if (workflowPlan.runBackfill && backfillProvider) {
8307
+ try {
8308
+ const priorAnswers = await backfillFromPriorAnswers(state.fields, backfillProvider);
8309
+ for (const pa of priorAnswers) {
8310
+ const field = state.fields.find((f) => f.id === pa.fieldId);
8311
+ if (field && !field.value && pa.relevance > 0.8) {
8312
+ field.value = pa.value;
8313
+ field.source = `backfill: ${pa.source}`;
8314
+ field.confidence = "high";
7379
8315
  }
7380
- })()
7381
- );
8316
+ }
8317
+ } catch (e) {
8318
+ await log?.(`Backfill failed: ${e}`);
8319
+ }
7382
8320
  }
7383
- if (orgContext.length > 0) {
8321
+ workflowPlan = planApplicationWorkflow({
8322
+ fields: state.fields,
8323
+ hasBackfillProvider: false,
8324
+ orgContextCount: orgContext.length,
8325
+ hasDocumentStore: Boolean(documentStore),
8326
+ hasMemoryStore: Boolean(memoryStore)
8327
+ });
8328
+ const fillTasks = [];
8329
+ if (workflowPlan.runContextAutoFill) {
7384
8330
  fillTasks.push(
7385
8331
  limit(async () => {
7386
8332
  const unfilledFields2 = state.fields.filter((f) => !f.value);
@@ -7407,18 +8353,13 @@ function createApplicationPipeline(config) {
7407
8353
  })
7408
8354
  );
7409
8355
  }
7410
- if (documentStore && memoryStore) {
8356
+ if (workflowPlan.documentSearchFields.length > 0 && memoryStore) {
7411
8357
  fillTasks.push(
7412
8358
  (async () => {
7413
8359
  try {
7414
- const unfilledFields2 = state.fields.filter((f) => !f.value);
7415
- const searchPromises = unfilledFields2.slice(0, 10).map(
8360
+ const searchPromises = workflowPlan.documentSearchFields.map(
7416
8361
  (f) => limit(async () => {
7417
- const chunks = await memoryStore.search(f.label, { limit: 3 });
7418
- for (const chunk of chunks) {
7419
- if (!state.fields.find((sf) => sf.id === f.id)?.value) {
7420
- }
7421
- }
8362
+ await memoryStore.search(f.label, { limit: 3 });
7422
8363
  })
7423
8364
  );
7424
8365
  await Promise.all(searchPromises);
@@ -7431,8 +8372,15 @@ function createApplicationPipeline(config) {
7431
8372
  await Promise.all(fillTasks);
7432
8373
  state.updatedAt = Date.now();
7433
8374
  await applicationStore?.save(state);
7434
- const unfilledFields = state.fields.filter((f) => !f.value);
7435
- if (unfilledFields.length > 0) {
8375
+ workflowPlan = planApplicationWorkflow({
8376
+ fields: state.fields,
8377
+ hasBackfillProvider: false,
8378
+ orgContextCount: 0,
8379
+ hasDocumentStore: false,
8380
+ hasMemoryStore: false
8381
+ });
8382
+ const unfilledFields = workflowPlan.unfilledFields;
8383
+ if (workflowPlan.runBatching) {
7436
8384
  onProgress?.(`Batching ${unfilledFields.length} remaining questions...`);
7437
8385
  state.status = "batching";
7438
8386
  try {
@@ -7499,7 +8447,12 @@ function createApplicationPipeline(config) {
7499
8447
  }
7500
8448
  let fieldsFilled = 0;
7501
8449
  let responseText;
7502
- if (intent.hasAnswers) {
8450
+ let replyPlan = planReplyActions({
8451
+ intent,
8452
+ currentBatchFields,
8453
+ hasDocumentStore: Boolean(documentStore)
8454
+ });
8455
+ if (replyPlan.parseAnswers) {
7503
8456
  onProgress?.("Parsing answers...");
7504
8457
  try {
7505
8458
  const { result: parseResult, usage: parseUsage } = await parseAnswers(
@@ -7522,7 +8475,7 @@ function createApplicationPipeline(config) {
7522
8475
  await log?.(`Answer parsing failed: ${error instanceof Error ? error.message : String(error)}`);
7523
8476
  }
7524
8477
  }
7525
- if (intent.lookupRequests?.length) {
8478
+ if (replyPlan.runLookup && intent.lookupRequests?.length) {
7526
8479
  onProgress?.("Processing lookup requests...");
7527
8480
  let availableData = "";
7528
8481
  if (documentStore) {
@@ -7563,64 +8516,78 @@ function createApplicationPipeline(config) {
7563
8516
  }
7564
8517
  }
7565
8518
  }
7566
- if (intent.primaryIntent === "question" || intent.primaryIntent === "mixed") {
7567
- if (intent.questionText) {
7568
- try {
7569
- const { text, usage } = await generateText({
7570
- prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
8519
+ if (replyPlan.answerQuestion && intent.questionText) {
8520
+ try {
8521
+ const { text, usage } = await generateText({
8522
+ prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
7571
8523
 
7572
8524
  Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with the answer when you're ready and I'll fill it in."`,
7573
- maxTokens: 512,
7574
- providerOptions
7575
- });
7576
- trackUsage(usage);
7577
- responseText = text;
7578
- } catch (error) {
7579
- await log?.(`Question response generation failed: ${error instanceof Error ? error.message : String(error)}`);
7580
- responseText = `I wasn't able to generate an explanation for your question. Could you rephrase it, or just provide the answer directly?`;
7581
- }
8525
+ maxTokens: 512,
8526
+ providerOptions
8527
+ });
8528
+ trackUsage(usage);
8529
+ responseText = text;
8530
+ } catch (error) {
8531
+ await log?.(`Question response generation failed: ${error instanceof Error ? error.message : String(error)}`);
8532
+ responseText = `I wasn't able to generate an explanation for your question. Could you rephrase it, or just provide the answer directly?`;
7582
8533
  }
7583
8534
  }
7584
8535
  const currentBatchComplete = currentBatchFieldIds.every(
7585
8536
  (fid) => state.fields.find((f) => f.id === fid)?.value
7586
8537
  );
7587
- if (currentBatchComplete && state.batches) {
7588
- if (state.currentBatchIndex < state.batches.length - 1) {
7589
- state.currentBatchIndex++;
7590
- const nextBatchFieldIds = state.batches[state.currentBatchIndex];
7591
- const nextBatchFields = state.fields.filter(
7592
- (f) => nextBatchFieldIds.includes(f.id)
7593
- );
8538
+ let nextBatchIndex;
8539
+ let nextBatchFields;
8540
+ if (state.batches) {
8541
+ for (let index = state.currentBatchIndex + 1; index < state.batches.length; index++) {
8542
+ const candidateFields = state.fields.filter((f) => state.batches[index].includes(f.id));
8543
+ if (candidateFields.some((f) => !f.value)) {
8544
+ nextBatchIndex = index;
8545
+ nextBatchFields = candidateFields;
8546
+ break;
8547
+ }
8548
+ }
8549
+ }
8550
+ replyPlan = planReplyActions({
8551
+ intent,
8552
+ currentBatchFields,
8553
+ nextBatchFields,
8554
+ hasDocumentStore: Boolean(documentStore)
8555
+ });
8556
+ if (currentBatchComplete && replyPlan.advanceBatch && state.batches) {
8557
+ if (nextBatchIndex !== void 0 && nextBatchFields) {
8558
+ state.currentBatchIndex = nextBatchIndex;
7594
8559
  const filledCount = state.fields.filter((f) => f.value).length;
7595
- try {
7596
- const { text: emailText, usage: emailUsage } = await generateBatchEmail(
7597
- nextBatchFields,
7598
- state.currentBatchIndex,
7599
- state.batches.length,
7600
- {
7601
- appTitle: state.title,
7602
- totalFieldCount: state.fields.length,
7603
- filledFieldCount: filledCount,
7604
- companyName: context?.companyName
7605
- },
7606
- generateText,
7607
- providerOptions
7608
- );
7609
- trackUsage(emailUsage);
7610
- const emailReview = reviewBatchEmail(emailText, nextBatchFields);
7611
- state.qualityReport = {
7612
- ...buildApplicationQualityReport(state),
7613
- emailReview
7614
- };
7615
- if (!responseText) {
7616
- responseText = emailText;
7617
- } else {
7618
- responseText += `
8560
+ if (replyPlan.generateNextEmail) {
8561
+ try {
8562
+ const { text: emailText, usage: emailUsage } = await generateBatchEmail(
8563
+ nextBatchFields,
8564
+ state.currentBatchIndex,
8565
+ state.batches.length,
8566
+ {
8567
+ appTitle: state.title,
8568
+ totalFieldCount: state.fields.length,
8569
+ filledFieldCount: filledCount,
8570
+ companyName: context?.companyName
8571
+ },
8572
+ generateText,
8573
+ providerOptions
8574
+ );
8575
+ trackUsage(emailUsage);
8576
+ const emailReview = reviewBatchEmail(emailText, nextBatchFields);
8577
+ state.qualityReport = {
8578
+ ...buildApplicationQualityReport(state),
8579
+ emailReview
8580
+ };
8581
+ if (!responseText) {
8582
+ responseText = emailText;
8583
+ } else {
8584
+ responseText += `
7619
8585
 
7620
8586
  ${emailText}`;
8587
+ }
8588
+ } catch (error) {
8589
+ await log?.(`Batch email generation failed: ${error instanceof Error ? error.message : String(error)}`);
7621
8590
  }
7622
- } catch (error) {
7623
- await log?.(`Batch email generation failed: ${error instanceof Error ? error.message : String(error)}`);
7624
8591
  }
7625
8592
  } else {
7626
8593
  state.status = "confirming";
@@ -7779,7 +8746,7 @@ INSTRUCTIONS:
7779
8746
  - If the user's attachment already contains critical facts, still request chunk/document lookup when policy or quote details should be cross-checked against stored records
7780
8747
 
7781
8748
  CHUNK TYPES (for chunkTypes filter):
7782
- carrier_info, named_insured, coverage, endorsement, exclusion, condition, section, declaration, loss_history, premium, supplementary
8749
+ carrier_info, named_insured, coverage, covered_reason, definition, endorsement, exclusion, condition, section, declaration, loss_history, premium, supplementary
7783
8750
 
7784
8751
  Respond with the structured classification.`;
7785
8752
  }
@@ -7810,91 +8777,91 @@ Respond with the final answer, deduplicated citations array, overall confidence
7810
8777
  }
7811
8778
 
7812
8779
  // src/schemas/query.ts
7813
- import { z as z37 } from "zod";
7814
- var QueryIntentSchema = z37.enum([
8780
+ import { z as z39 } from "zod";
8781
+ var QueryIntentSchema = z39.enum([
7815
8782
  "policy_question",
7816
8783
  "coverage_comparison",
7817
8784
  "document_search",
7818
8785
  "claims_inquiry",
7819
8786
  "general_knowledge"
7820
8787
  ]);
7821
- var QueryAttachmentKindSchema = z37.enum(["image", "pdf", "text"]);
7822
- var QueryAttachmentSchema = z37.object({
7823
- id: z37.string().optional().describe("Optional stable attachment ID from the caller"),
8788
+ var QueryAttachmentKindSchema = z39.enum(["image", "pdf", "text"]);
8789
+ var QueryAttachmentSchema = z39.object({
8790
+ id: z39.string().optional().describe("Optional stable attachment ID from the caller"),
7824
8791
  kind: QueryAttachmentKindSchema,
7825
- name: z37.string().optional().describe("Original filename or user-facing label"),
7826
- mimeType: z37.string().optional().describe("MIME type such as image/jpeg or application/pdf"),
7827
- base64: z37.string().optional().describe("Base64-encoded file content for image/pdf attachments"),
7828
- text: z37.string().optional().describe("Plain-text attachment content when available"),
7829
- description: z37.string().optional().describe("Caller-provided description of the attachment")
7830
- });
7831
- var SubQuestionSchema = z37.object({
7832
- question: z37.string().describe("Atomic sub-question to retrieve and answer independently"),
8792
+ name: z39.string().optional().describe("Original filename or user-facing label"),
8793
+ mimeType: z39.string().optional().describe("MIME type such as image/jpeg or application/pdf"),
8794
+ base64: z39.string().optional().describe("Base64-encoded file content for image/pdf attachments"),
8795
+ text: z39.string().optional().describe("Plain-text attachment content when available"),
8796
+ description: z39.string().optional().describe("Caller-provided description of the attachment")
8797
+ });
8798
+ var SubQuestionSchema = z39.object({
8799
+ question: z39.string().describe("Atomic sub-question to retrieve and answer independently"),
7833
8800
  intent: QueryIntentSchema,
7834
- chunkTypes: z37.array(z37.string()).optional().describe("Chunk types to filter retrieval (e.g. coverage, endorsement, declaration)"),
7835
- documentFilters: z37.object({
7836
- type: z37.enum(["policy", "quote"]).optional(),
7837
- carrier: z37.string().optional(),
7838
- insuredName: z37.string().optional(),
7839
- policyNumber: z37.string().optional(),
7840
- quoteNumber: z37.string().optional(),
7841
- policyTypes: z37.array(PolicyTypeSchema).optional().describe("Filter by policy type (e.g. homeowners_ho3, renters_ho4, pet) to avoid mixing up similar policies")
8801
+ chunkTypes: z39.array(z39.string()).optional().describe("Chunk types to filter retrieval (e.g. coverage, endorsement, declaration)"),
8802
+ documentFilters: z39.object({
8803
+ type: z39.enum(["policy", "quote"]).optional(),
8804
+ carrier: z39.string().optional(),
8805
+ insuredName: z39.string().optional(),
8806
+ policyNumber: z39.string().optional(),
8807
+ quoteNumber: z39.string().optional(),
8808
+ policyTypes: z39.array(PolicyTypeSchema).optional().describe("Filter by policy type (e.g. homeowners_ho3, renters_ho4, pet) to avoid mixing up similar policies")
7842
8809
  }).optional().describe("Structured filters to narrow document lookup")
7843
8810
  });
7844
- var QueryClassifyResultSchema = z37.object({
8811
+ var QueryClassifyResultSchema = z39.object({
7845
8812
  intent: QueryIntentSchema,
7846
- subQuestions: z37.array(SubQuestionSchema).min(1).describe("Decomposed atomic sub-questions"),
7847
- requiresDocumentLookup: z37.boolean().describe("Whether structured document lookup is needed"),
7848
- requiresChunkSearch: z37.boolean().describe("Whether semantic chunk search is needed"),
7849
- requiresConversationHistory: z37.boolean().describe("Whether conversation history is relevant")
7850
- });
7851
- var EvidenceItemSchema = z37.object({
7852
- source: z37.enum(["chunk", "document", "conversation", "attachment"]),
7853
- chunkId: z37.string().optional(),
7854
- documentId: z37.string().optional(),
7855
- turnId: z37.string().optional(),
7856
- attachmentId: z37.string().optional(),
7857
- text: z37.string().describe("Text excerpt from the source"),
7858
- relevance: z37.number().min(0).max(1),
7859
- metadata: z37.array(z37.object({ key: z37.string(), value: z37.string() })).optional()
7860
- });
7861
- var AttachmentInterpretationSchema = z37.object({
7862
- summary: z37.string().describe("Concise summary of what the attachment shows or contains"),
7863
- extractedFacts: z37.array(z37.string()).describe("Specific observable or document facts grounded in the attachment"),
7864
- recommendedFocus: z37.array(z37.string()).describe("Important details to incorporate when answering follow-up questions"),
7865
- confidence: z37.number().min(0).max(1)
7866
- });
7867
- var RetrievalResultSchema = z37.object({
7868
- subQuestion: z37.string(),
7869
- evidence: z37.array(EvidenceItemSchema)
7870
- });
7871
- var CitationSchema = z37.object({
7872
- index: z37.number().describe("Citation number [1], [2], etc."),
7873
- chunkId: z37.string().describe("Source chunk ID, e.g. doc-123:coverage:2"),
7874
- documentId: z37.string(),
7875
- documentType: z37.enum(["policy", "quote"]).optional(),
7876
- field: z37.string().optional().describe("Specific field path, e.g. coverages[0].deductible"),
7877
- quote: z37.string().describe("Exact text from source that supports the claim"),
7878
- relevance: z37.number().min(0).max(1)
7879
- });
7880
- var SubAnswerSchema = z37.object({
7881
- subQuestion: z37.string(),
7882
- answer: z37.string(),
7883
- citations: z37.array(CitationSchema),
7884
- confidence: z37.number().min(0).max(1),
7885
- needsMoreContext: z37.boolean().describe("True if evidence was insufficient to answer fully")
7886
- });
7887
- var VerifyResultSchema = z37.object({
7888
- approved: z37.boolean().describe("Whether all sub-answers are adequately grounded"),
7889
- issues: z37.array(z37.string()).describe("Specific grounding or consistency issues found"),
7890
- retrySubQuestions: z37.array(z37.string()).optional().describe("Sub-questions that need additional retrieval or re-reasoning")
7891
- });
7892
- var QueryResultSchema = z37.object({
7893
- answer: z37.string(),
7894
- citations: z37.array(CitationSchema),
8813
+ subQuestions: z39.array(SubQuestionSchema).min(1).describe("Decomposed atomic sub-questions"),
8814
+ requiresDocumentLookup: z39.boolean().describe("Whether structured document lookup is needed"),
8815
+ requiresChunkSearch: z39.boolean().describe("Whether semantic chunk search is needed"),
8816
+ requiresConversationHistory: z39.boolean().describe("Whether conversation history is relevant")
8817
+ });
8818
+ var EvidenceItemSchema = z39.object({
8819
+ source: z39.enum(["chunk", "document", "conversation", "attachment"]),
8820
+ chunkId: z39.string().optional(),
8821
+ documentId: z39.string().optional(),
8822
+ turnId: z39.string().optional(),
8823
+ attachmentId: z39.string().optional(),
8824
+ text: z39.string().describe("Text excerpt from the source"),
8825
+ relevance: z39.number().min(0).max(1),
8826
+ metadata: z39.array(z39.object({ key: z39.string(), value: z39.string() })).optional()
8827
+ });
8828
+ var AttachmentInterpretationSchema = z39.object({
8829
+ summary: z39.string().describe("Concise summary of what the attachment shows or contains"),
8830
+ extractedFacts: z39.array(z39.string()).describe("Specific observable or document facts grounded in the attachment"),
8831
+ recommendedFocus: z39.array(z39.string()).describe("Important details to incorporate when answering follow-up questions"),
8832
+ confidence: z39.number().min(0).max(1)
8833
+ });
8834
+ var RetrievalResultSchema = z39.object({
8835
+ subQuestion: z39.string(),
8836
+ evidence: z39.array(EvidenceItemSchema)
8837
+ });
8838
+ var CitationSchema = z39.object({
8839
+ index: z39.number().describe("Citation number [1], [2], etc."),
8840
+ chunkId: z39.string().describe("Source chunk ID, e.g. doc-123:coverage:2"),
8841
+ documentId: z39.string(),
8842
+ documentType: z39.enum(["policy", "quote"]).optional(),
8843
+ field: z39.string().optional().describe("Specific field path, e.g. coverages[0].deductible"),
8844
+ quote: z39.string().describe("Exact text from source that supports the claim"),
8845
+ relevance: z39.number().min(0).max(1)
8846
+ });
8847
+ var SubAnswerSchema = z39.object({
8848
+ subQuestion: z39.string(),
8849
+ answer: z39.string(),
8850
+ citations: z39.array(CitationSchema),
8851
+ confidence: z39.number().min(0).max(1),
8852
+ needsMoreContext: z39.boolean().describe("True if evidence was insufficient to answer fully")
8853
+ });
8854
+ var VerifyResultSchema = z39.object({
8855
+ approved: z39.boolean().describe("Whether all sub-answers are adequately grounded"),
8856
+ issues: z39.array(z39.string()).describe("Specific grounding or consistency issues found"),
8857
+ retrySubQuestions: z39.array(z39.string()).optional().describe("Sub-questions that need additional retrieval or re-reasoning")
8858
+ });
8859
+ var QueryResultSchema = z39.object({
8860
+ answer: z39.string(),
8861
+ citations: z39.array(CitationSchema),
7895
8862
  intent: QueryIntentSchema,
7896
- confidence: z37.number().min(0).max(1),
7897
- followUp: z37.string().optional().describe("Suggested follow-up question if applicable")
8863
+ confidence: z39.number().min(0).max(1),
8864
+ followUp: z39.string().optional().describe("Suggested follow-up question if applicable")
7898
8865
  });
7899
8866
 
7900
8867
  // src/query/retriever.ts
@@ -8434,6 +9401,42 @@ ${item.text}`).join("\n\n");
8434
9401
  return { evidence, contextSummary };
8435
9402
  }
8436
9403
 
9404
+ // src/query/workflow.ts
9405
+ function shouldRetrieveForClassification(classification) {
9406
+ return classification.requiresDocumentLookup || classification.requiresChunkSearch;
9407
+ }
9408
+ function buildInitialQueryWorkflowPlan(params) {
9409
+ const { classification, attachmentEvidence } = params;
9410
+ const actions = [];
9411
+ const shouldRetrieve = shouldRetrieveForClassification(classification);
9412
+ if (shouldRetrieve) {
9413
+ actions.push({
9414
+ type: "retrieve",
9415
+ subQuestions: classification.subQuestions,
9416
+ reason: "classification requested document or chunk lookup"
9417
+ });
9418
+ }
9419
+ actions.push({
9420
+ type: "reason",
9421
+ subQuestions: classification.subQuestions,
9422
+ reason: shouldRetrieve ? "answer with retrieved evidence and any attachment evidence" : attachmentEvidence.length > 0 ? "answer with attachment evidence only" : "answer without document retrieval"
9423
+ });
9424
+ actions.push(
9425
+ {
9426
+ type: "verify",
9427
+ reason: "check grounding and request targeted retries when needed"
9428
+ },
9429
+ {
9430
+ type: "respond",
9431
+ reason: "compose final response"
9432
+ }
9433
+ );
9434
+ return { actions, shouldRetrieve };
9435
+ }
9436
+ function getWorkflowAction(plan, type) {
9437
+ return plan.actions.find((action) => action.type === type);
9438
+ }
9439
+
8437
9440
  // src/query/coordinator.ts
8438
9441
  function createQueryAgent(config) {
8439
9442
  const {
@@ -8478,29 +9481,37 @@ function createQueryAgent(config) {
8478
9481
  onProgress?.("Classifying query...");
8479
9482
  const classification = await classify(question, conversationId, attachmentContext);
8480
9483
  await pipelineCtx.save("classify", { classification, attachmentEvidence });
8481
- onProgress?.(`Retrieving evidence for ${classification.subQuestions.length} sub-question(s)...`);
8482
9484
  const retrieverConfig = {
8483
9485
  documentStore,
8484
9486
  memoryStore,
8485
9487
  retrievalLimit,
8486
9488
  log
8487
9489
  };
8488
- const retrievalResults = await Promise.all(
8489
- classification.subQuestions.map(
8490
- (sq) => limit(() => retrieve(sq, conversationId, retrieverConfig))
8491
- )
8492
- );
9490
+ const workflowPlan = buildInitialQueryWorkflowPlan({ classification, attachmentEvidence });
9491
+ const retrieveAction = getWorkflowAction(workflowPlan, "retrieve");
9492
+ const reasonAction = getWorkflowAction(workflowPlan, "reason");
9493
+ await pipelineCtx.save("workflow", { classification, attachmentEvidence, workflowPlan });
9494
+ const retrievalResults = retrieveAction ? await (async () => {
9495
+ onProgress?.(`Retrieving evidence for ${retrieveAction.subQuestions.length} sub-question(s)...`);
9496
+ return Promise.all(
9497
+ retrieveAction.subQuestions.map(
9498
+ (sq) => limit(() => retrieve(sq, conversationId, retrieverConfig))
9499
+ )
9500
+ );
9501
+ })() : [];
8493
9502
  const allEvidence = [...attachmentEvidence, ...retrievalResults.flatMap((r) => r.evidence)];
8494
9503
  await pipelineCtx.save("retrieve", { classification, attachmentEvidence, evidence: allEvidence });
8495
9504
  onProgress?.("Reasoning over evidence...");
8496
9505
  const reasonerConfig = { generateObject, providerOptions };
9506
+ const subQuestionsToReason = reasonAction?.subQuestions ?? classification.subQuestions;
8497
9507
  const reasonResults = await Promise.allSettled(
8498
- classification.subQuestions.map(
8499
- (sq, i) => limit(async () => {
9508
+ subQuestionsToReason.map(
9509
+ (sq) => limit(async () => {
9510
+ const retrievedEvidence = retrievalResults.find((r) => r.subQuestion === sq.question)?.evidence ?? [];
8500
9511
  const { subAnswer, usage } = await reason(
8501
9512
  sq.question,
8502
9513
  sq.intent,
8503
- [...attachmentEvidence, ...retrievalResults[i].evidence],
9514
+ [...attachmentEvidence, ...retrievedEvidence],
8504
9515
  reasonerConfig
8505
9516
  );
8506
9517
  trackUsage(usage);
@@ -8514,9 +9525,9 @@ function createQueryAgent(config) {
8514
9525
  if (result.status === "fulfilled") {
8515
9526
  subAnswers.push(result.value);
8516
9527
  } else {
8517
- await log?.(`Reasoner failed for sub-question "${classification.subQuestions[i].question}": ${result.reason}`);
9528
+ await log?.(`Reasoner failed for sub-question "${subQuestionsToReason[i].question}": ${result.reason}`);
8518
9529
  subAnswers.push({
8519
- subQuestion: classification.subQuestions[i].question,
9530
+ subQuestion: subQuestionsToReason[i].question,
8520
9531
  answer: "Unable to answer this part of the question due to a processing error.",
8521
9532
  citations: [],
8522
9533
  confidence: 0,
@@ -8899,6 +9910,7 @@ export {
8899
9910
  CoverageSchema,
8900
9911
  CoverageTriggerSchema,
8901
9912
  CoverageValueTypeSchema,
9913
+ CoveredReasonSchema,
8902
9914
  CrimeDeclarationsSchema,
8903
9915
  CyberDeclarationsSchema,
8904
9916
  DEDUCTIBLE_TYPES,
@@ -8911,6 +9923,7 @@ export {
8911
9923
  DeductibleScheduleSchema,
8912
9924
  DeductibleTypeSchema,
8913
9925
  DefenseCostTreatmentSchema,
9926
+ DefinitionSchema,
8914
9927
  DocumentTypeSchema,
8915
9928
  DriverRecordSchema,
8916
9929
  DwellingDetailsSchema,