@claritylabs/cl-sdk 0.16.2 → 0.17.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -4
- package/dist/index.d.mts +534 -15
- package/dist/index.d.ts +534 -15
- package/dist/index.js +1846 -831
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +1844 -831
- package/dist/index.mjs.map +1 -1
- package/dist/storage-sqlite.d.mts +223 -1
- package/dist/storage-sqlite.d.ts +223 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -169,7 +169,14 @@ function createPipelineContext(opts) {
|
|
|
169
169
|
let latest = opts.resumeFrom;
|
|
170
170
|
const completedPhases = /* @__PURE__ */ new Set();
|
|
171
171
|
if (opts.resumeFrom) {
|
|
172
|
-
|
|
172
|
+
const phaseIndex = opts.phaseOrder?.indexOf(opts.resumeFrom.phase) ?? -1;
|
|
173
|
+
if (phaseIndex >= 0 && opts.phaseOrder) {
|
|
174
|
+
for (const phase of opts.phaseOrder.slice(0, phaseIndex + 1)) {
|
|
175
|
+
completedPhases.add(phase);
|
|
176
|
+
}
|
|
177
|
+
} else {
|
|
178
|
+
completedPhases.add(opts.resumeFrom.phase);
|
|
179
|
+
}
|
|
173
180
|
}
|
|
174
181
|
return {
|
|
175
182
|
id: opts.id,
|
|
@@ -1226,6 +1233,29 @@ var AuxiliaryFactSchema = z16.object({
|
|
|
1226
1233
|
subject: z16.string().optional(),
|
|
1227
1234
|
context: z16.string().optional()
|
|
1228
1235
|
});
|
|
1236
|
+
var DefinitionSchema = z16.object({
|
|
1237
|
+
term: z16.string(),
|
|
1238
|
+
definition: z16.string(),
|
|
1239
|
+
pageNumber: z16.number().optional(),
|
|
1240
|
+
formNumber: z16.string().optional(),
|
|
1241
|
+
formTitle: z16.string().optional(),
|
|
1242
|
+
sectionRef: z16.string().optional(),
|
|
1243
|
+
originalContent: z16.string().optional()
|
|
1244
|
+
});
|
|
1245
|
+
var CoveredReasonSchema = z16.object({
|
|
1246
|
+
coverageName: z16.string(),
|
|
1247
|
+
reasonNumber: z16.string().optional(),
|
|
1248
|
+
title: z16.string().optional(),
|
|
1249
|
+
content: z16.string(),
|
|
1250
|
+
conditions: z16.array(z16.string()).optional(),
|
|
1251
|
+
exceptions: z16.array(z16.string()).optional(),
|
|
1252
|
+
appliesTo: z16.array(z16.string()).optional(),
|
|
1253
|
+
pageNumber: z16.number().optional(),
|
|
1254
|
+
formNumber: z16.string().optional(),
|
|
1255
|
+
formTitle: z16.string().optional(),
|
|
1256
|
+
sectionRef: z16.string().optional(),
|
|
1257
|
+
originalContent: z16.string().optional()
|
|
1258
|
+
});
|
|
1229
1259
|
var BaseDocumentFields = {
|
|
1230
1260
|
id: z16.string(),
|
|
1231
1261
|
carrier: z16.string(),
|
|
@@ -1236,6 +1266,8 @@ var BaseDocumentFields = {
|
|
|
1236
1266
|
policyTypes: z16.array(z16.string()).optional(),
|
|
1237
1267
|
coverages: z16.array(CoverageSchema),
|
|
1238
1268
|
sections: z16.array(SectionSchema).optional(),
|
|
1269
|
+
definitions: z16.array(DefinitionSchema).optional(),
|
|
1270
|
+
coveredReasons: z16.array(CoveredReasonSchema).optional(),
|
|
1239
1271
|
// Enriched fields (v1.2+)
|
|
1240
1272
|
carrierLegalName: z16.string().optional(),
|
|
1241
1273
|
carrierNaicNumber: z16.string().optional(),
|
|
@@ -1683,33 +1715,102 @@ async function runExtractor(params) {
|
|
|
1683
1715
|
};
|
|
1684
1716
|
}
|
|
1685
1717
|
|
|
1718
|
+
// src/extraction/memory.ts
|
|
1719
|
+
function isMemoryRecord(value) {
|
|
1720
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
1721
|
+
}
|
|
1722
|
+
function readMemoryRecord(memory, key) {
|
|
1723
|
+
const value = memory.get(key);
|
|
1724
|
+
return isMemoryRecord(value) ? value : void 0;
|
|
1725
|
+
}
|
|
1726
|
+
function readRecordValue(record, key) {
|
|
1727
|
+
return record?.[key];
|
|
1728
|
+
}
|
|
1729
|
+
function readRecordArray(record, key) {
|
|
1730
|
+
const value = readRecordValue(record, key);
|
|
1731
|
+
return Array.isArray(value) ? value : void 0;
|
|
1732
|
+
}
|
|
1733
|
+
function getCarrierInfo(memory) {
|
|
1734
|
+
return readMemoryRecord(memory, "carrier_info");
|
|
1735
|
+
}
|
|
1736
|
+
function getNamedInsured(memory) {
|
|
1737
|
+
return readMemoryRecord(memory, "named_insured");
|
|
1738
|
+
}
|
|
1739
|
+
function getCoverageLimits(memory) {
|
|
1740
|
+
return readMemoryRecord(memory, "coverage_limits");
|
|
1741
|
+
}
|
|
1742
|
+
function getCoverageLimitCoverages(memory) {
|
|
1743
|
+
return readRecordArray(getCoverageLimits(memory), "coverages") ?? [];
|
|
1744
|
+
}
|
|
1745
|
+
function getSectionsPayload(memory) {
|
|
1746
|
+
return readMemoryRecord(memory, "sections");
|
|
1747
|
+
}
|
|
1748
|
+
function getSections(memory) {
|
|
1749
|
+
return readRecordArray(getSectionsPayload(memory), "sections");
|
|
1750
|
+
}
|
|
1751
|
+
function getDefinitionsPayload(memory) {
|
|
1752
|
+
return readMemoryRecord(memory, "definitions");
|
|
1753
|
+
}
|
|
1754
|
+
function getDefinitions(memory) {
|
|
1755
|
+
return readRecordArray(getDefinitionsPayload(memory), "definitions");
|
|
1756
|
+
}
|
|
1757
|
+
function getCoveredReasonsPayload(memory) {
|
|
1758
|
+
return readMemoryRecord(memory, "covered_reasons");
|
|
1759
|
+
}
|
|
1760
|
+
function getCoveredReasons(memory) {
|
|
1761
|
+
const payload = getCoveredReasonsPayload(memory);
|
|
1762
|
+
return readRecordArray(payload, "coveredReasons") ?? readRecordArray(payload, "covered_reasons");
|
|
1763
|
+
}
|
|
1764
|
+
|
|
1686
1765
|
// src/extraction/promote.ts
|
|
1687
1766
|
function getDeclarationFields(doc) {
|
|
1688
1767
|
const decl = doc.declarations;
|
|
1689
1768
|
return Array.isArray(decl?.fields) ? decl.fields : [];
|
|
1690
1769
|
}
|
|
1691
1770
|
function fieldMatches(fieldName, patterns) {
|
|
1692
|
-
const lower = fieldName
|
|
1693
|
-
return patterns.some((p) => lower === p
|
|
1771
|
+
const lower = normalizeFieldName(fieldName);
|
|
1772
|
+
return patterns.some((p) => lower === normalizeFieldName(p));
|
|
1773
|
+
}
|
|
1774
|
+
function normalizeFieldName(fieldName) {
|
|
1775
|
+
return fieldName.toLowerCase().replace(/[^a-z0-9]/g, "");
|
|
1694
1776
|
}
|
|
1695
|
-
function findFieldValue(fields, patterns) {
|
|
1696
|
-
const match = fields.find((f) => fieldMatches(f.field, patterns));
|
|
1777
|
+
function findFieldValue(fields, patterns, reject) {
|
|
1778
|
+
const match = fields.find((f) => fieldMatches(f.field, patterns) && !reject?.(f));
|
|
1697
1779
|
return match?.value;
|
|
1698
1780
|
}
|
|
1699
|
-
function
|
|
1700
|
-
|
|
1701
|
-
|
|
1702
|
-
|
|
1703
|
-
|
|
1704
|
-
|
|
1705
|
-
|
|
1781
|
+
function stringValue(value) {
|
|
1782
|
+
return typeof value === "string" && value.trim() ? value : void 0;
|
|
1783
|
+
}
|
|
1784
|
+
function findRawString(raw, keys) {
|
|
1785
|
+
for (const key of keys) {
|
|
1786
|
+
const value = stringValue(raw[key]);
|
|
1787
|
+
if (value) return value;
|
|
1706
1788
|
}
|
|
1707
|
-
|
|
1708
|
-
|
|
1789
|
+
return void 0;
|
|
1790
|
+
}
|
|
1791
|
+
function promoteRawFields(raw, mappings) {
|
|
1792
|
+
for (const { from, to } of mappings) {
|
|
1793
|
+
if (!raw[to] && raw[from]) {
|
|
1794
|
+
raw[to] = raw[from];
|
|
1795
|
+
}
|
|
1796
|
+
delete raw[from];
|
|
1709
1797
|
}
|
|
1710
|
-
|
|
1711
|
-
|
|
1712
|
-
|
|
1798
|
+
}
|
|
1799
|
+
function findRawOrDeclarationValue(raw, fields, lookup) {
|
|
1800
|
+
return (lookup.rawKey ? raw[lookup.rawKey] : void 0) || findFieldValue(fields, lookup.patterns, lookup.reject);
|
|
1801
|
+
}
|
|
1802
|
+
function promoteRawOrDeclarationString(raw, fields, targetKey, rawKeys, lookup) {
|
|
1803
|
+
if (raw[targetKey]) return;
|
|
1804
|
+
const value = findRawString(raw, rawKeys) ?? findFieldValue(fields, lookup.patterns, lookup.reject);
|
|
1805
|
+
if (value) raw[targetKey] = value;
|
|
1806
|
+
}
|
|
1807
|
+
function promoteCarrierFields(doc) {
|
|
1808
|
+
const raw = doc;
|
|
1809
|
+
promoteRawFields(raw, [
|
|
1810
|
+
{ from: "naicNumber", to: "carrierNaicNumber" },
|
|
1811
|
+
{ from: "amBestRating", to: "carrierAmBestRating" },
|
|
1812
|
+
{ from: "admittedStatus", to: "carrierAdmittedStatus" }
|
|
1813
|
+
]);
|
|
1713
1814
|
if (!raw.insurer && raw.carrierLegalName) {
|
|
1714
1815
|
raw.insurer = {
|
|
1715
1816
|
legalName: raw.carrierLegalName,
|
|
@@ -1750,12 +1851,21 @@ var BROKER_ADDRESS_PATTERNS = ["brokerAddress", "agentAddress", "producerAddress
|
|
|
1750
1851
|
function promoteBroker(doc) {
|
|
1751
1852
|
const raw = doc;
|
|
1752
1853
|
const fields = getDeclarationFields(doc);
|
|
1753
|
-
const brokerAgency = raw
|
|
1754
|
-
|
|
1755
|
-
|
|
1756
|
-
|
|
1757
|
-
const
|
|
1758
|
-
|
|
1854
|
+
const brokerAgency = findRawOrDeclarationValue(raw, fields, {
|
|
1855
|
+
rawKey: "brokerAgency",
|
|
1856
|
+
patterns: BROKER_NAME_PATTERNS
|
|
1857
|
+
});
|
|
1858
|
+
const brokerContact = findRawOrDeclarationValue(raw, fields, {
|
|
1859
|
+
rawKey: "brokerContactName",
|
|
1860
|
+
patterns: BROKER_CONTACT_PATTERNS
|
|
1861
|
+
});
|
|
1862
|
+
const brokerLicense = findRawOrDeclarationValue(raw, fields, {
|
|
1863
|
+
rawKey: "brokerLicenseNumber",
|
|
1864
|
+
patterns: BROKER_LICENSE_PATTERNS
|
|
1865
|
+
});
|
|
1866
|
+
const brokerPhone = findRawOrDeclarationValue(raw, fields, { patterns: BROKER_PHONE_PATTERNS });
|
|
1867
|
+
const brokerEmail = findRawOrDeclarationValue(raw, fields, { patterns: BROKER_EMAIL_PATTERNS });
|
|
1868
|
+
const brokerAddress = findRawOrDeclarationValue(raw, fields, { patterns: BROKER_ADDRESS_PATTERNS });
|
|
1759
1869
|
if (brokerAgency) raw.brokerAgency = brokerAgency;
|
|
1760
1870
|
if (brokerContact) raw.brokerContactName = brokerContact;
|
|
1761
1871
|
if (brokerLicense) raw.brokerLicenseNumber = brokerLicense;
|
|
@@ -2011,25 +2121,171 @@ function synthesizeDeductibles(doc) {
|
|
|
2011
2121
|
raw.deductibles = deductibles;
|
|
2012
2122
|
}
|
|
2013
2123
|
}
|
|
2014
|
-
var PREMIUM_PATTERNS = [
|
|
2015
|
-
|
|
2124
|
+
var PREMIUM_PATTERNS = [
|
|
2125
|
+
"premium",
|
|
2126
|
+
"premiumAmount",
|
|
2127
|
+
"premium amount",
|
|
2128
|
+
"totalPremium",
|
|
2129
|
+
"total premium",
|
|
2130
|
+
"totalPolicyPremium",
|
|
2131
|
+
"total policy premium",
|
|
2132
|
+
"annualPremium",
|
|
2133
|
+
"annual premium",
|
|
2134
|
+
"estimatedAnnualPremium",
|
|
2135
|
+
"estimated annual premium",
|
|
2136
|
+
"policyPremium",
|
|
2137
|
+
"policy premium",
|
|
2138
|
+
"basePremium",
|
|
2139
|
+
"base premium",
|
|
2140
|
+
"planCost",
|
|
2141
|
+
"plan cost",
|
|
2142
|
+
"policyCost",
|
|
2143
|
+
"policy cost",
|
|
2144
|
+
"premiumSubtotal",
|
|
2145
|
+
"premium subtotal",
|
|
2146
|
+
"subtotalPremium",
|
|
2147
|
+
"subtotal premium",
|
|
2148
|
+
"quotedPremium",
|
|
2149
|
+
"quoted premium"
|
|
2150
|
+
];
|
|
2151
|
+
var TOTAL_COST_PATTERNS = [
|
|
2152
|
+
"totalCost",
|
|
2153
|
+
"total cost",
|
|
2154
|
+
"total",
|
|
2155
|
+
"totalDue",
|
|
2156
|
+
"total due",
|
|
2157
|
+
"amountPaid",
|
|
2158
|
+
"amount paid",
|
|
2159
|
+
"totalPaid",
|
|
2160
|
+
"total paid",
|
|
2161
|
+
"totalPrice",
|
|
2162
|
+
"total price",
|
|
2163
|
+
"totalTripCost",
|
|
2164
|
+
"total trip cost",
|
|
2165
|
+
"amountCharged",
|
|
2166
|
+
"amount charged",
|
|
2167
|
+
"amountDue",
|
|
2168
|
+
"amount due",
|
|
2169
|
+
"totalAmountDue",
|
|
2170
|
+
"total amount due",
|
|
2171
|
+
"totalAmount",
|
|
2172
|
+
"total amount",
|
|
2173
|
+
"grandTotal",
|
|
2174
|
+
"grand total",
|
|
2175
|
+
"totalPayable",
|
|
2176
|
+
"total payable",
|
|
2177
|
+
"totalCharges",
|
|
2178
|
+
"total charges",
|
|
2179
|
+
"totalPolicyCost",
|
|
2180
|
+
"total policy cost"
|
|
2181
|
+
];
|
|
2182
|
+
var PREMIUM_RAW_KEYS = [
|
|
2183
|
+
"premium",
|
|
2184
|
+
"premiumAmount",
|
|
2185
|
+
"premium_amount",
|
|
2186
|
+
"totalPremium",
|
|
2187
|
+
"totalPolicyPremium",
|
|
2188
|
+
"annualPremium",
|
|
2189
|
+
"estimatedAnnualPremium",
|
|
2190
|
+
"policyPremium",
|
|
2191
|
+
"basePremium",
|
|
2192
|
+
"planCost",
|
|
2193
|
+
"policyCost",
|
|
2194
|
+
"premiumSubtotal",
|
|
2195
|
+
"subtotalPremium",
|
|
2196
|
+
"quotedPremium"
|
|
2197
|
+
];
|
|
2198
|
+
var TOTAL_COST_RAW_KEYS = [
|
|
2199
|
+
"totalCost",
|
|
2200
|
+
"total_cost",
|
|
2201
|
+
"total",
|
|
2202
|
+
"totalDue",
|
|
2203
|
+
"amountPaid",
|
|
2204
|
+
"amount_paid",
|
|
2205
|
+
"totalPaid",
|
|
2206
|
+
"total_paid",
|
|
2207
|
+
"totalPrice",
|
|
2208
|
+
"totalTripCost",
|
|
2209
|
+
"amountCharged",
|
|
2210
|
+
"amountDue",
|
|
2211
|
+
"totalAmountDue",
|
|
2212
|
+
"totalAmount",
|
|
2213
|
+
"grandTotal",
|
|
2214
|
+
"totalPayable",
|
|
2215
|
+
"totalCharges",
|
|
2216
|
+
"totalPolicyCost"
|
|
2217
|
+
];
|
|
2218
|
+
function isTaxOrFeeField(fieldName) {
|
|
2219
|
+
const normalized = normalizeFieldName(fieldName);
|
|
2220
|
+
return /tax|gst|hst|pst|qst|fee|surcharge|assessment|stamp|filing|inspection/.test(normalized);
|
|
2221
|
+
}
|
|
2222
|
+
function isTotalCostField(fieldName) {
|
|
2223
|
+
return fieldMatches(fieldName, TOTAL_COST_PATTERNS);
|
|
2224
|
+
}
|
|
2225
|
+
function taxFeeType(fieldName) {
|
|
2226
|
+
const normalized = normalizeFieldName(fieldName);
|
|
2227
|
+
if (normalized.includes("tax") || ["gst", "hst", "pst", "qst"].some((token) => normalized.includes(token))) return "tax";
|
|
2228
|
+
if (normalized.includes("surcharge")) return "surcharge";
|
|
2229
|
+
if (normalized.includes("assessment")) return "assessment";
|
|
2230
|
+
if (normalized.includes("fee") || normalized.includes("stamp") || normalized.includes("filing")) return "fee";
|
|
2231
|
+
return void 0;
|
|
2232
|
+
}
|
|
2233
|
+
function titleizeFieldName(fieldName) {
|
|
2234
|
+
const spaced = fieldName.replace(/([a-z0-9])([A-Z])/g, "$1 $2").replace(/[_-]+/g, " ").replace(/\s+/g, " ").trim();
|
|
2235
|
+
return spaced.replace(/\b\w/g, (letter) => letter.toUpperCase());
|
|
2236
|
+
}
|
|
2237
|
+
function taxFeeKey(item) {
|
|
2238
|
+
return [
|
|
2239
|
+
normalizeFieldName(item.name),
|
|
2240
|
+
normalizeFieldName(item.amount),
|
|
2241
|
+
item.type ?? ""
|
|
2242
|
+
].join("|");
|
|
2243
|
+
}
|
|
2244
|
+
function taxFeeItemFromField(field) {
|
|
2245
|
+
const type = taxFeeType(field.field);
|
|
2246
|
+
return {
|
|
2247
|
+
name: titleizeFieldName(field.field),
|
|
2248
|
+
amount: absorbNegative(field.value),
|
|
2249
|
+
...type ? { type } : {}
|
|
2250
|
+
};
|
|
2251
|
+
}
|
|
2016
2252
|
function absorbNegative(value) {
|
|
2017
2253
|
return value.replace(/^-\s*/, "").replace(/^\(\s*(.*?)\s*\)$/, "$1");
|
|
2018
2254
|
}
|
|
2019
2255
|
function promotePremium(doc) {
|
|
2020
2256
|
const raw = doc;
|
|
2021
2257
|
const fields = getDeclarationFields(doc);
|
|
2022
|
-
|
|
2023
|
-
|
|
2024
|
-
|
|
2025
|
-
}
|
|
2026
|
-
|
|
2027
|
-
|
|
2028
|
-
|
|
2029
|
-
}
|
|
2258
|
+
promoteRawOrDeclarationString(raw, fields, "premium", PREMIUM_RAW_KEYS, {
|
|
2259
|
+
patterns: PREMIUM_PATTERNS,
|
|
2260
|
+
reject: (field) => isTaxOrFeeField(field.field)
|
|
2261
|
+
});
|
|
2262
|
+
promoteRawOrDeclarationString(raw, fields, "totalCost", TOTAL_COST_RAW_KEYS, {
|
|
2263
|
+
patterns: TOTAL_COST_PATTERNS
|
|
2264
|
+
});
|
|
2030
2265
|
if (typeof raw.premium === "string") raw.premium = absorbNegative(raw.premium);
|
|
2031
2266
|
if (typeof raw.totalCost === "string") raw.totalCost = absorbNegative(raw.totalCost);
|
|
2032
2267
|
}
|
|
2268
|
+
function synthesizeTaxesAndFees(doc) {
|
|
2269
|
+
const raw = doc;
|
|
2270
|
+
const fields = getDeclarationFields(doc);
|
|
2271
|
+
if (fields.length === 0) return;
|
|
2272
|
+
const existing = Array.isArray(raw.taxesAndFees) ? raw.taxesAndFees : [];
|
|
2273
|
+
const byKey = /* @__PURE__ */ new Map();
|
|
2274
|
+
for (const item of existing) {
|
|
2275
|
+
if (!item?.name || !item?.amount) continue;
|
|
2276
|
+
byKey.set(taxFeeKey(item), item);
|
|
2277
|
+
}
|
|
2278
|
+
for (const field of fields) {
|
|
2279
|
+
if (!field.value?.trim()) continue;
|
|
2280
|
+
if (!isTaxOrFeeField(field.field)) continue;
|
|
2281
|
+
if (isTotalCostField(field.field)) continue;
|
|
2282
|
+
const item = taxFeeItemFromField(field);
|
|
2283
|
+
byKey.set(taxFeeKey(item), item);
|
|
2284
|
+
}
|
|
2285
|
+
if (byKey.size > 0) {
|
|
2286
|
+
raw.taxesAndFees = [...byKey.values()];
|
|
2287
|
+
}
|
|
2288
|
+
}
|
|
2033
2289
|
function promoteExtractedFields(doc) {
|
|
2034
2290
|
promoteCarrierFields(doc);
|
|
2035
2291
|
promoteBroker(doc);
|
|
@@ -2037,44 +2293,53 @@ function promoteExtractedFields(doc) {
|
|
|
2037
2293
|
promoteLocations(doc);
|
|
2038
2294
|
synthesizeLimits(doc);
|
|
2039
2295
|
synthesizeDeductibles(doc);
|
|
2296
|
+
synthesizeTaxesAndFees(doc);
|
|
2040
2297
|
promotePremium(doc);
|
|
2041
2298
|
}
|
|
2042
2299
|
|
|
2043
2300
|
// src/extraction/assembler.ts
|
|
2044
2301
|
function assembleDocument(documentId, documentType, memory) {
|
|
2045
|
-
const carrier = memory
|
|
2046
|
-
const insured = memory
|
|
2047
|
-
const coverages = memory
|
|
2048
|
-
const endorsements = memory
|
|
2049
|
-
const exclusions = memory
|
|
2050
|
-
const conditions = memory
|
|
2051
|
-
const premium = memory
|
|
2052
|
-
const declarations = memory
|
|
2053
|
-
const lossHistory = memory
|
|
2054
|
-
const
|
|
2055
|
-
const
|
|
2056
|
-
const
|
|
2057
|
-
const
|
|
2302
|
+
const carrier = getCarrierInfo(memory);
|
|
2303
|
+
const insured = getNamedInsured(memory);
|
|
2304
|
+
const coverages = getCoverageLimits(memory);
|
|
2305
|
+
const endorsements = readMemoryRecord(memory, "endorsements");
|
|
2306
|
+
const exclusions = readMemoryRecord(memory, "exclusions");
|
|
2307
|
+
const conditions = readMemoryRecord(memory, "conditions");
|
|
2308
|
+
const premium = readMemoryRecord(memory, "premium_breakdown");
|
|
2309
|
+
const declarations = readMemoryRecord(memory, "declarations");
|
|
2310
|
+
const lossHistory = readMemoryRecord(memory, "loss_history");
|
|
2311
|
+
const supplementary = readMemoryRecord(memory, "supplementary");
|
|
2312
|
+
const formInventory = readMemoryRecord(memory, "form_inventory");
|
|
2313
|
+
const classify = readMemoryRecord(memory, "classify");
|
|
2314
|
+
const lossPayees = readRecordArray(insured, "lossPayees");
|
|
2315
|
+
const mortgageHolders = readRecordArray(insured, "mortgageHolders");
|
|
2058
2316
|
const base = {
|
|
2059
2317
|
id: documentId,
|
|
2060
|
-
carrier: carrier
|
|
2061
|
-
insuredName: insured
|
|
2062
|
-
coverages:
|
|
2063
|
-
policyTypes: classify
|
|
2318
|
+
carrier: readRecordValue(carrier, "carrierName") ?? "Unknown",
|
|
2319
|
+
insuredName: readRecordValue(insured, "insuredName") ?? "Unknown",
|
|
2320
|
+
coverages: getCoverageLimitCoverages(memory),
|
|
2321
|
+
policyTypes: readRecordValue(classify, "policyTypes"),
|
|
2064
2322
|
...sanitizeNulls(carrier ?? {}),
|
|
2065
2323
|
...sanitizeNulls(insured ?? {}),
|
|
2066
2324
|
// Map named_insured extractor's loss payees/mortgage holders to EndorsementParty shape
|
|
2067
|
-
...
|
|
2068
|
-
...
|
|
2325
|
+
...lossPayees && lossPayees.length > 0 ? { lossPayees: lossPayees.map((lp) => ({ ...lp, role: "loss_payee" })) } : {},
|
|
2326
|
+
...mortgageHolders && mortgageHolders.length > 0 ? {
|
|
2327
|
+
mortgageHolders: mortgageHolders.map((mh) => ({
|
|
2328
|
+
...mh,
|
|
2329
|
+
role: "mortgage_holder"
|
|
2330
|
+
}))
|
|
2331
|
+
} : {},
|
|
2069
2332
|
...sanitizeNulls(coverages ?? {}),
|
|
2070
2333
|
...sanitizeNulls(premium ?? {}),
|
|
2071
2334
|
...sanitizeNulls(supplementary ?? {}),
|
|
2072
|
-
supplementaryFacts: supplementary
|
|
2073
|
-
endorsements: endorsements
|
|
2074
|
-
exclusions: exclusions
|
|
2075
|
-
conditions: conditions
|
|
2076
|
-
sections:
|
|
2077
|
-
formInventory: formInventory
|
|
2335
|
+
supplementaryFacts: readRecordValue(supplementary, "auxiliaryFacts"),
|
|
2336
|
+
endorsements: readRecordValue(endorsements, "endorsements"),
|
|
2337
|
+
exclusions: readRecordValue(exclusions, "exclusions"),
|
|
2338
|
+
conditions: readRecordValue(conditions, "conditions"),
|
|
2339
|
+
sections: getSections(memory),
|
|
2340
|
+
formInventory: readRecordValue(formInventory, "forms"),
|
|
2341
|
+
definitions: getDefinitions(memory),
|
|
2342
|
+
coveredReasons: getCoveredReasons(memory),
|
|
2078
2343
|
declarations: declarations ? sanitizeNulls(declarations) : void 0,
|
|
2079
2344
|
...sanitizeNulls(lossHistory ?? {})
|
|
2080
2345
|
};
|
|
@@ -2083,21 +2348,21 @@ function assembleDocument(documentId, documentType, memory) {
|
|
|
2083
2348
|
doc = {
|
|
2084
2349
|
...base,
|
|
2085
2350
|
type: "policy",
|
|
2086
|
-
policyNumber: carrier
|
|
2087
|
-
effectiveDate: carrier
|
|
2088
|
-
expirationDate: carrier
|
|
2089
|
-
policyTermType: carrier
|
|
2351
|
+
policyNumber: readRecordValue(carrier, "policyNumber") ?? readRecordValue(insured, "policyNumber") ?? "Unknown",
|
|
2352
|
+
effectiveDate: readRecordValue(carrier, "effectiveDate") ?? readRecordValue(insured, "effectiveDate") ?? "Unknown",
|
|
2353
|
+
expirationDate: readRecordValue(carrier, "expirationDate"),
|
|
2354
|
+
policyTermType: readRecordValue(carrier, "policyTermType")
|
|
2090
2355
|
};
|
|
2091
2356
|
} else {
|
|
2092
2357
|
doc = {
|
|
2093
2358
|
...base,
|
|
2094
2359
|
type: "quote",
|
|
2095
|
-
quoteNumber: carrier
|
|
2096
|
-
proposedEffectiveDate: carrier
|
|
2097
|
-
proposedExpirationDate: carrier
|
|
2098
|
-
subjectivities: coverages
|
|
2099
|
-
underwritingConditions: coverages
|
|
2100
|
-
premiumBreakdown: premium
|
|
2360
|
+
quoteNumber: readRecordValue(carrier, "quoteNumber") ?? "Unknown",
|
|
2361
|
+
proposedEffectiveDate: readRecordValue(carrier, "proposedEffectiveDate"),
|
|
2362
|
+
proposedExpirationDate: readRecordValue(carrier, "proposedExpirationDate"),
|
|
2363
|
+
subjectivities: readRecordValue(coverages, "subjectivities"),
|
|
2364
|
+
underwritingConditions: readRecordValue(coverages, "underwritingConditions"),
|
|
2365
|
+
premiumBreakdown: readRecordValue(premium, "premiumBreakdown")
|
|
2101
2366
|
};
|
|
2102
2367
|
}
|
|
2103
2368
|
promoteExtractedFields(doc);
|
|
@@ -2199,6 +2464,23 @@ ${block}`;
|
|
|
2199
2464
|
}
|
|
2200
2465
|
|
|
2201
2466
|
// src/extraction/formatter.ts
|
|
2467
|
+
var LONG_CONTENT_THRESHOLD = 1200;
|
|
2468
|
+
function shouldFormatContent(text) {
|
|
2469
|
+
const trimmed = text.trim();
|
|
2470
|
+
if (trimmed.length === 0) return false;
|
|
2471
|
+
if (trimmed.length >= LONG_CONTENT_THRESHOLD) return true;
|
|
2472
|
+
if (/```|~~~|<br\s*\/?>/i.test(trimmed)) return true;
|
|
2473
|
+
if (/(^|\s)(\*\*|__|`)/.test(trimmed)) return true;
|
|
2474
|
+
if (/!?\[[^\]]+\]\([^)]+\)/.test(trimmed)) return true;
|
|
2475
|
+
if (/^\s{0,3}#{1,6}\s*\S/m.test(trimmed)) return true;
|
|
2476
|
+
if (/^\s{0,6}(?:[-*+]|\d+[.)])\s+\S/m.test(trimmed)) return true;
|
|
2477
|
+
if (/\t|[^\S\r\n]{3,}|\n{3,}|[ \t]+$/m.test(text)) return true;
|
|
2478
|
+
const lines = trimmed.split(/\r?\n/).map((line) => line.trim()).filter(Boolean);
|
|
2479
|
+
if (lines.some((line) => (line.match(/\|/g)?.length ?? 0) >= 2)) return true;
|
|
2480
|
+
const spaceAlignedRows = lines.filter((line) => /\S\s{2,}\S\s{2,}\S/.test(line));
|
|
2481
|
+
if (spaceAlignedRows.length >= 2) return true;
|
|
2482
|
+
return false;
|
|
2483
|
+
}
|
|
2202
2484
|
function collectContentFields(doc) {
|
|
2203
2485
|
const entries = [];
|
|
2204
2486
|
let id = 0;
|
|
@@ -2234,6 +2516,21 @@ function collectContentFields(doc) {
|
|
|
2234
2516
|
add(`conditions[${i}].content`, doc.conditions[i].content);
|
|
2235
2517
|
}
|
|
2236
2518
|
}
|
|
2519
|
+
const extendedDoc = doc;
|
|
2520
|
+
if (extendedDoc.definitions) {
|
|
2521
|
+
for (let i = 0; i < extendedDoc.definitions.length; i++) {
|
|
2522
|
+
add(`definitions[${i}].definition`, extendedDoc.definitions[i].definition);
|
|
2523
|
+
}
|
|
2524
|
+
}
|
|
2525
|
+
const coveredReasons = extendedDoc.coveredReasons ?? extendedDoc.covered_reasons;
|
|
2526
|
+
if (coveredReasons) {
|
|
2527
|
+
for (let i = 0; i < coveredReasons.length; i++) {
|
|
2528
|
+
add(`coveredReasons[${i}].content`, coveredReasons[i].content);
|
|
2529
|
+
coveredReasons[i].conditions?.forEach((condition, j) => {
|
|
2530
|
+
add(`coveredReasons[${i}].conditions[${j}]`, condition);
|
|
2531
|
+
});
|
|
2532
|
+
}
|
|
2533
|
+
}
|
|
2237
2534
|
return entries;
|
|
2238
2535
|
}
|
|
2239
2536
|
function parseFormatResponse(response) {
|
|
@@ -2249,6 +2546,10 @@ function parseFormatResponse(response) {
|
|
|
2249
2546
|
return results;
|
|
2250
2547
|
}
|
|
2251
2548
|
function applyFormattedContent(doc, entries, formatted) {
|
|
2549
|
+
const docRecord = doc;
|
|
2550
|
+
if (!docRecord.coveredReasons && docRecord.covered_reasons) {
|
|
2551
|
+
docRecord.coveredReasons = docRecord.covered_reasons;
|
|
2552
|
+
}
|
|
2252
2553
|
for (const entry of entries) {
|
|
2253
2554
|
const cleaned = formatted.get(entry.id);
|
|
2254
2555
|
if (!cleaned) continue;
|
|
@@ -2257,6 +2558,14 @@ function applyFormattedContent(doc, entries, formatted) {
|
|
|
2257
2558
|
const [, field, idx1, sub1, idx2, sub2] = segments;
|
|
2258
2559
|
if (!sub1) {
|
|
2259
2560
|
doc[field] = cleaned;
|
|
2561
|
+
} else if (idx2 && !sub2) {
|
|
2562
|
+
const arr = doc[field];
|
|
2563
|
+
if (arr && arr[Number(idx1)]) {
|
|
2564
|
+
const nested = arr[Number(idx1)][sub1];
|
|
2565
|
+
if (Array.isArray(nested)) {
|
|
2566
|
+
nested[Number(idx2)] = cleaned;
|
|
2567
|
+
}
|
|
2568
|
+
}
|
|
2260
2569
|
} else if (!sub2) {
|
|
2261
2570
|
const arr = doc[field];
|
|
2262
2571
|
if (arr && arr[Number(idx1)]) {
|
|
@@ -2275,7 +2584,7 @@ function applyFormattedContent(doc, entries, formatted) {
|
|
|
2275
2584
|
}
|
|
2276
2585
|
var MAX_ENTRIES_PER_BATCH = 20;
|
|
2277
2586
|
async function formatDocumentContent(doc, generateText, options) {
|
|
2278
|
-
const entries = collectContentFields(doc);
|
|
2587
|
+
const entries = collectContentFields(doc).filter((entry) => shouldFormatContent(entry.text));
|
|
2279
2588
|
const totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
2280
2589
|
if (entries.length === 0) {
|
|
2281
2590
|
return { document: doc, usage: totalUsage };
|
|
@@ -2321,6 +2630,16 @@ function formatAddress(addr) {
|
|
|
2321
2630
|
const parts = [addr.street1, addr.street2, addr.city, addr.state, addr.zip, addr.country].filter(Boolean);
|
|
2322
2631
|
return parts.join(", ");
|
|
2323
2632
|
}
|
|
2633
|
+
function asRecordArray(value) {
|
|
2634
|
+
return Array.isArray(value) ? value.filter((item) => Boolean(item) && typeof item === "object" && !Array.isArray(item)) : [];
|
|
2635
|
+
}
|
|
2636
|
+
function firstString(item, keys) {
|
|
2637
|
+
for (const key of keys) {
|
|
2638
|
+
const value = item[key];
|
|
2639
|
+
if (typeof value === "string" && value.trim()) return value;
|
|
2640
|
+
}
|
|
2641
|
+
return void 0;
|
|
2642
|
+
}
|
|
2324
2643
|
function chunkDocument(doc) {
|
|
2325
2644
|
const ensureArray = (v) => Array.isArray(v) ? v : [];
|
|
2326
2645
|
doc = {
|
|
@@ -2334,6 +2653,7 @@ function chunkDocument(doc) {
|
|
|
2334
2653
|
const chunks = [];
|
|
2335
2654
|
const docId = doc.id;
|
|
2336
2655
|
const policyTypesStr = doc.policyTypes?.length ? doc.policyTypes.join(",") : void 0;
|
|
2656
|
+
const extendedDoc = doc;
|
|
2337
2657
|
function stringMetadata(entries) {
|
|
2338
2658
|
const base = Object.fromEntries(
|
|
2339
2659
|
Object.entries(entries).filter(([, value]) => value !== void 0 && value !== null && String(value).length > 0).map(([key, value]) => [key, String(value)])
|
|
@@ -2341,11 +2661,22 @@ function chunkDocument(doc) {
|
|
|
2341
2661
|
if (policyTypesStr) base.policyTypes = policyTypesStr;
|
|
2342
2662
|
return base;
|
|
2343
2663
|
}
|
|
2344
|
-
|
|
2345
|
-
|
|
2346
|
-
|
|
2347
|
-
|
|
2348
|
-
|
|
2664
|
+
function lines(values) {
|
|
2665
|
+
return values.filter(Boolean).join("\n");
|
|
2666
|
+
}
|
|
2667
|
+
function pushChunk(idSuffix, type, text, metadata) {
|
|
2668
|
+
chunks.push({
|
|
2669
|
+
id: `${docId}:${idSuffix}`,
|
|
2670
|
+
documentId: docId,
|
|
2671
|
+
type,
|
|
2672
|
+
text,
|
|
2673
|
+
metadata: stringMetadata(metadata)
|
|
2674
|
+
});
|
|
2675
|
+
}
|
|
2676
|
+
pushChunk(
|
|
2677
|
+
"carrier_info:0",
|
|
2678
|
+
"carrier_info",
|
|
2679
|
+
lines([
|
|
2349
2680
|
`Carrier: ${doc.carrier}`,
|
|
2350
2681
|
doc.carrierLegalName ? `Legal Name: ${doc.carrierLegalName}` : null,
|
|
2351
2682
|
doc.carrierNaicNumber ? `NAIC: ${doc.carrierNaicNumber}` : null,
|
|
@@ -2362,94 +2693,83 @@ function chunkDocument(doc) {
|
|
|
2362
2693
|
doc.isPackage != null ? `Package: ${doc.isPackage ? "Yes" : "No"}` : null,
|
|
2363
2694
|
doc.security ? `Security: ${doc.security}` : null,
|
|
2364
2695
|
doc.policyTypes?.length ? `Policy Types: ${doc.policyTypes.join(", ")}` : null
|
|
2365
|
-
]
|
|
2366
|
-
|
|
2367
|
-
|
|
2696
|
+
]),
|
|
2697
|
+
{ carrier: doc.carrier, documentType: doc.type }
|
|
2698
|
+
);
|
|
2368
2699
|
if (doc.summary) {
|
|
2369
|
-
|
|
2370
|
-
id: `${docId}:declaration:summary`,
|
|
2371
|
-
documentId: docId,
|
|
2372
|
-
type: "declaration",
|
|
2373
|
-
text: `Policy Summary: ${doc.summary}`,
|
|
2374
|
-
metadata: stringMetadata({ documentType: doc.type })
|
|
2375
|
-
});
|
|
2700
|
+
pushChunk("declaration:summary", "declaration", `Policy Summary: ${doc.summary}`, { documentType: doc.type });
|
|
2376
2701
|
}
|
|
2377
2702
|
if (doc.type === "policy") {
|
|
2378
2703
|
const pol = doc;
|
|
2379
|
-
|
|
2380
|
-
|
|
2381
|
-
|
|
2382
|
-
|
|
2383
|
-
text: [
|
|
2704
|
+
pushChunk(
|
|
2705
|
+
"declaration:policy_details",
|
|
2706
|
+
"declaration",
|
|
2707
|
+
lines([
|
|
2384
2708
|
`Policy Number: ${pol.policyNumber}`,
|
|
2385
2709
|
`Effective Date: ${pol.effectiveDate}`,
|
|
2386
2710
|
pol.expirationDate ? `Expiration Date: ${pol.expirationDate}` : null,
|
|
2387
2711
|
pol.policyTermType ? `Term Type: ${pol.policyTermType}` : null,
|
|
2388
2712
|
pol.effectiveTime ? `Effective Time: ${pol.effectiveTime}` : null,
|
|
2389
2713
|
pol.nextReviewDate ? `Next Review Date: ${pol.nextReviewDate}` : null
|
|
2390
|
-
]
|
|
2391
|
-
|
|
2714
|
+
]),
|
|
2715
|
+
{
|
|
2392
2716
|
policyNumber: pol.policyNumber,
|
|
2393
2717
|
effectiveDate: pol.effectiveDate,
|
|
2394
2718
|
expirationDate: pol.expirationDate,
|
|
2395
2719
|
documentType: doc.type
|
|
2396
|
-
}
|
|
2397
|
-
|
|
2720
|
+
}
|
|
2721
|
+
);
|
|
2398
2722
|
} else {
|
|
2399
2723
|
const quote = doc;
|
|
2400
|
-
|
|
2401
|
-
|
|
2402
|
-
|
|
2403
|
-
|
|
2404
|
-
text: [
|
|
2724
|
+
pushChunk(
|
|
2725
|
+
"declaration:quote_details",
|
|
2726
|
+
"declaration",
|
|
2727
|
+
lines([
|
|
2405
2728
|
`Quote Number: ${quote.quoteNumber}`,
|
|
2406
2729
|
quote.proposedEffectiveDate ? `Proposed Effective Date: ${quote.proposedEffectiveDate}` : null,
|
|
2407
2730
|
quote.proposedExpirationDate ? `Proposed Expiration Date: ${quote.proposedExpirationDate}` : null,
|
|
2408
2731
|
quote.quoteExpirationDate ? `Quote Expiration Date: ${quote.quoteExpirationDate}` : null
|
|
2409
|
-
]
|
|
2410
|
-
|
|
2732
|
+
]),
|
|
2733
|
+
{
|
|
2411
2734
|
quoteNumber: quote.quoteNumber,
|
|
2412
2735
|
documentType: doc.type
|
|
2413
|
-
}
|
|
2414
|
-
|
|
2736
|
+
}
|
|
2737
|
+
);
|
|
2415
2738
|
}
|
|
2416
2739
|
if (doc.insurer) {
|
|
2417
|
-
|
|
2418
|
-
|
|
2419
|
-
|
|
2420
|
-
|
|
2421
|
-
text: [
|
|
2740
|
+
pushChunk(
|
|
2741
|
+
"party:insurer",
|
|
2742
|
+
"party",
|
|
2743
|
+
lines([
|
|
2422
2744
|
`Insurer: ${doc.insurer.legalName}`,
|
|
2423
2745
|
doc.insurer.naicNumber ? `NAIC: ${doc.insurer.naicNumber}` : null,
|
|
2424
2746
|
doc.insurer.amBestRating ? `AM Best Rating: ${doc.insurer.amBestRating}` : null,
|
|
2425
2747
|
doc.insurer.amBestNumber ? `AM Best Number: ${doc.insurer.amBestNumber}` : null,
|
|
2426
2748
|
doc.insurer.admittedStatus ? `Admitted Status: ${doc.insurer.admittedStatus}` : null,
|
|
2427
2749
|
doc.insurer.stateOfDomicile ? `State of Domicile: ${doc.insurer.stateOfDomicile}` : null
|
|
2428
|
-
]
|
|
2429
|
-
|
|
2430
|
-
|
|
2750
|
+
]),
|
|
2751
|
+
{ partyRole: "insurer", partyName: doc.insurer.legalName, documentType: doc.type }
|
|
2752
|
+
);
|
|
2431
2753
|
}
|
|
2432
2754
|
if (doc.producer) {
|
|
2433
|
-
|
|
2434
|
-
|
|
2435
|
-
|
|
2436
|
-
|
|
2437
|
-
text: [
|
|
2755
|
+
pushChunk(
|
|
2756
|
+
"party:producer",
|
|
2757
|
+
"party",
|
|
2758
|
+
lines([
|
|
2438
2759
|
`Producer/Broker: ${doc.producer.agencyName}`,
|
|
2439
2760
|
doc.producer.contactName ? `Contact: ${doc.producer.contactName}` : null,
|
|
2440
2761
|
doc.producer.licenseNumber ? `License: ${doc.producer.licenseNumber}` : null,
|
|
2441
2762
|
doc.producer.phone ? `Phone: ${doc.producer.phone}` : null,
|
|
2442
2763
|
doc.producer.email ? `Email: ${doc.producer.email}` : null,
|
|
2443
2764
|
doc.producer.address ? `Address: ${formatAddress(doc.producer.address)}` : null
|
|
2444
|
-
]
|
|
2445
|
-
|
|
2446
|
-
|
|
2765
|
+
]),
|
|
2766
|
+
{ partyRole: "producer", partyName: doc.producer.agencyName, documentType: doc.type }
|
|
2767
|
+
);
|
|
2447
2768
|
}
|
|
2448
|
-
|
|
2449
|
-
|
|
2450
|
-
|
|
2451
|
-
|
|
2452
|
-
text: [
|
|
2769
|
+
pushChunk(
|
|
2770
|
+
"named_insured:0",
|
|
2771
|
+
"named_insured",
|
|
2772
|
+
lines([
|
|
2453
2773
|
`Insured: ${doc.insuredName}`,
|
|
2454
2774
|
doc.insuredDba ? `DBA: ${doc.insuredDba}` : null,
|
|
2455
2775
|
doc.insuredEntityType ? `Entity Type: ${doc.insuredEntityType}` : null,
|
|
@@ -2457,36 +2777,34 @@ function chunkDocument(doc) {
|
|
|
2457
2777
|
doc.insuredSicCode ? `SIC: ${doc.insuredSicCode}` : null,
|
|
2458
2778
|
doc.insuredNaicsCode ? `NAICS: ${doc.insuredNaicsCode}` : null,
|
|
2459
2779
|
doc.insuredAddress ? `Address: ${formatAddress(doc.insuredAddress)}` : null
|
|
2460
|
-
]
|
|
2461
|
-
|
|
2462
|
-
|
|
2780
|
+
]),
|
|
2781
|
+
{ insuredName: doc.insuredName, documentType: doc.type }
|
|
2782
|
+
);
|
|
2463
2783
|
doc.additionalNamedInsureds?.forEach((insured, i) => {
|
|
2464
|
-
|
|
2465
|
-
|
|
2466
|
-
|
|
2467
|
-
|
|
2468
|
-
text: [
|
|
2784
|
+
pushChunk(
|
|
2785
|
+
`named_insured:${i + 1}`,
|
|
2786
|
+
"named_insured",
|
|
2787
|
+
lines([
|
|
2469
2788
|
`Additional Named Insured: ${insured.name}`,
|
|
2470
2789
|
insured.address ? `Address: ${formatAddress(insured.address)}` : null,
|
|
2471
2790
|
insured.relationship ? `Relationship: ${insured.relationship}` : null
|
|
2472
|
-
]
|
|
2473
|
-
|
|
2474
|
-
|
|
2791
|
+
]),
|
|
2792
|
+
{ insuredName: insured.name, role: "additional_named_insured", documentType: doc.type }
|
|
2793
|
+
);
|
|
2475
2794
|
});
|
|
2476
2795
|
doc.coverages.forEach((cov, i) => {
|
|
2477
|
-
|
|
2478
|
-
|
|
2479
|
-
|
|
2480
|
-
|
|
2481
|
-
text: [
|
|
2796
|
+
pushChunk(
|
|
2797
|
+
`coverage:${i}`,
|
|
2798
|
+
"coverage",
|
|
2799
|
+
lines([
|
|
2482
2800
|
`Coverage: ${cov.name}`,
|
|
2483
2801
|
`Limit: ${cov.limit}`,
|
|
2484
2802
|
cov.limitValueType ? `Limit Type: ${cov.limitValueType}` : null,
|
|
2485
2803
|
cov.deductible ? `Deductible: ${cov.deductible}` : null,
|
|
2486
2804
|
cov.deductibleValueType ? `Deductible Type: ${cov.deductibleValueType}` : null,
|
|
2487
2805
|
cov.originalContent ? `Source: ${cov.originalContent}` : null
|
|
2488
|
-
]
|
|
2489
|
-
|
|
2806
|
+
]),
|
|
2807
|
+
{
|
|
2490
2808
|
coverageName: cov.name,
|
|
2491
2809
|
limit: cov.limit,
|
|
2492
2810
|
limitValueType: cov.limitValueType,
|
|
@@ -2496,15 +2814,14 @@ function chunkDocument(doc) {
|
|
|
2496
2814
|
pageNumber: cov.pageNumber,
|
|
2497
2815
|
sectionRef: cov.sectionRef,
|
|
2498
2816
|
documentType: doc.type
|
|
2499
|
-
}
|
|
2500
|
-
|
|
2817
|
+
}
|
|
2818
|
+
);
|
|
2501
2819
|
});
|
|
2502
2820
|
doc.enrichedCoverages?.forEach((cov, i) => {
|
|
2503
|
-
|
|
2504
|
-
|
|
2505
|
-
|
|
2506
|
-
|
|
2507
|
-
text: [
|
|
2821
|
+
pushChunk(
|
|
2822
|
+
`coverage:enriched:${i}`,
|
|
2823
|
+
"coverage",
|
|
2824
|
+
lines([
|
|
2508
2825
|
`Coverage: ${cov.name}`,
|
|
2509
2826
|
cov.coverageCode ? `Code: ${cov.coverageCode}` : null,
|
|
2510
2827
|
`Limit: ${cov.limit}`,
|
|
@@ -2521,8 +2838,8 @@ function chunkDocument(doc) {
|
|
|
2521
2838
|
`Included: ${cov.included ? "Yes" : "No"}`,
|
|
2522
2839
|
cov.premium ? `Premium: ${cov.premium}` : null,
|
|
2523
2840
|
cov.originalContent ? `Source: ${cov.originalContent}` : null
|
|
2524
|
-
]
|
|
2525
|
-
|
|
2841
|
+
]),
|
|
2842
|
+
{
|
|
2526
2843
|
coverageName: cov.name,
|
|
2527
2844
|
coverageCode: cov.coverageCode,
|
|
2528
2845
|
limit: cov.limit,
|
|
@@ -2531,8 +2848,8 @@ function chunkDocument(doc) {
|
|
|
2531
2848
|
pageNumber: cov.pageNumber,
|
|
2532
2849
|
included: cov.included,
|
|
2533
2850
|
documentType: doc.type
|
|
2534
|
-
}
|
|
2535
|
-
|
|
2851
|
+
}
|
|
2852
|
+
);
|
|
2536
2853
|
});
|
|
2537
2854
|
if (doc.limits) {
|
|
2538
2855
|
const limitLines = ["Limit Schedule"];
|
|
@@ -2556,39 +2873,31 @@ function chunkDocument(doc) {
|
|
|
2556
2873
|
limitLines.push(`Employers Liability \u2014 Each Accident: ${lim.employersLiability.eachAccident}, Disease Policy Limit: ${lim.employersLiability.diseasePolicyLimit}, Disease Each Employee: ${lim.employersLiability.diseaseEachEmployee}`);
|
|
2557
2874
|
}
|
|
2558
2875
|
if (lim.defenseCostTreatment) limitLines.push(`Defense Cost Treatment: ${lim.defenseCostTreatment}`);
|
|
2559
|
-
|
|
2560
|
-
id: `${docId}:coverage:limit_schedule`,
|
|
2561
|
-
documentId: docId,
|
|
2562
|
-
type: "coverage",
|
|
2563
|
-
text: limitLines.join("\n"),
|
|
2564
|
-
metadata: stringMetadata({ coverageName: "limit_schedule", documentType: doc.type })
|
|
2565
|
-
});
|
|
2876
|
+
pushChunk("coverage:limit_schedule", "coverage", limitLines.join("\n"), { coverageName: "limit_schedule", documentType: doc.type });
|
|
2566
2877
|
lim.sublimits?.forEach((sub, i) => {
|
|
2567
|
-
|
|
2568
|
-
|
|
2569
|
-
|
|
2570
|
-
|
|
2571
|
-
text: [
|
|
2878
|
+
pushChunk(
|
|
2879
|
+
`coverage:sublimit:${i}`,
|
|
2880
|
+
"coverage",
|
|
2881
|
+
lines([
|
|
2572
2882
|
`Sublimit: ${sub.name}`,
|
|
2573
2883
|
`Limit: ${sub.limit}`,
|
|
2574
2884
|
sub.appliesTo ? `Applies To: ${sub.appliesTo}` : null,
|
|
2575
2885
|
sub.deductible ? `Deductible: ${sub.deductible}` : null
|
|
2576
|
-
]
|
|
2577
|
-
|
|
2578
|
-
|
|
2886
|
+
]),
|
|
2887
|
+
{ coverageName: sub.name, limit: sub.limit, documentType: doc.type }
|
|
2888
|
+
);
|
|
2579
2889
|
});
|
|
2580
2890
|
lim.sharedLimits?.forEach((sl, i) => {
|
|
2581
|
-
|
|
2582
|
-
|
|
2583
|
-
|
|
2584
|
-
|
|
2585
|
-
text: [
|
|
2891
|
+
pushChunk(
|
|
2892
|
+
`coverage:shared_limit:${i}`,
|
|
2893
|
+
"coverage",
|
|
2894
|
+
[
|
|
2586
2895
|
`Shared Limit: ${sl.description}`,
|
|
2587
2896
|
`Limit: ${sl.limit}`,
|
|
2588
2897
|
`Coverage Parts: ${sl.coverageParts.join(", ")}`
|
|
2589
2898
|
].join("\n"),
|
|
2590
|
-
|
|
2591
|
-
|
|
2899
|
+
{ coverageName: sl.description, limit: sl.limit, documentType: doc.type }
|
|
2900
|
+
);
|
|
2592
2901
|
});
|
|
2593
2902
|
}
|
|
2594
2903
|
if (doc.deductibles) {
|
|
@@ -2602,12 +2911,9 @@ function chunkDocument(doc) {
|
|
|
2602
2911
|
if (ded.waitingPeriod) dedLines.push(`Waiting Period: ${ded.waitingPeriod}`);
|
|
2603
2912
|
if (ded.appliesTo) dedLines.push(`Applies To: ${ded.appliesTo}`);
|
|
2604
2913
|
if (dedLines.length > 1) {
|
|
2605
|
-
|
|
2606
|
-
|
|
2607
|
-
|
|
2608
|
-
type: "coverage",
|
|
2609
|
-
text: dedLines.join("\n"),
|
|
2610
|
-
metadata: stringMetadata({ coverageName: "deductible_schedule", documentType: doc.type })
|
|
2914
|
+
pushChunk("coverage:deductible_schedule", "coverage", dedLines.join("\n"), {
|
|
2915
|
+
coverageName: "deductible_schedule",
|
|
2916
|
+
documentType: doc.type
|
|
2611
2917
|
});
|
|
2612
2918
|
}
|
|
2613
2919
|
}
|
|
@@ -2619,76 +2925,141 @@ function chunkDocument(doc) {
|
|
|
2619
2925
|
doc.extendedReportingPeriod?.supplementalPremium ? `Extended Reporting Period Premium: ${doc.extendedReportingPeriod.supplementalPremium}` : null
|
|
2620
2926
|
].filter(Boolean);
|
|
2621
2927
|
if (claimsMadeLines.length > 0) {
|
|
2622
|
-
|
|
2623
|
-
|
|
2624
|
-
|
|
2625
|
-
type: "coverage",
|
|
2626
|
-
text: claimsMadeLines.join("\n"),
|
|
2627
|
-
metadata: stringMetadata({ coverageName: "claims_made_details", documentType: doc.type })
|
|
2928
|
+
pushChunk("coverage:claims_made_details", "coverage", claimsMadeLines.join("\n"), {
|
|
2929
|
+
coverageName: "claims_made_details",
|
|
2930
|
+
documentType: doc.type
|
|
2628
2931
|
});
|
|
2629
2932
|
}
|
|
2630
2933
|
doc.formInventory?.forEach((form, i) => {
|
|
2631
|
-
|
|
2632
|
-
|
|
2633
|
-
|
|
2634
|
-
|
|
2635
|
-
text: [
|
|
2934
|
+
pushChunk(
|
|
2935
|
+
`declaration:form:${i}`,
|
|
2936
|
+
"declaration",
|
|
2937
|
+
lines([
|
|
2636
2938
|
`Form: ${form.formNumber}`,
|
|
2637
2939
|
form.title ? `Title: ${form.title}` : null,
|
|
2638
2940
|
`Type: ${form.formType}`,
|
|
2639
2941
|
form.editionDate ? `Edition: ${form.editionDate}` : null,
|
|
2640
2942
|
form.pageStart ? `Pages: ${form.pageStart}${form.pageEnd ? `-${form.pageEnd}` : ""}` : null
|
|
2641
|
-
]
|
|
2642
|
-
|
|
2943
|
+
]),
|
|
2944
|
+
{
|
|
2643
2945
|
formNumber: form.formNumber,
|
|
2644
2946
|
formType: form.formType,
|
|
2645
2947
|
documentType: doc.type
|
|
2646
|
-
}
|
|
2647
|
-
|
|
2948
|
+
}
|
|
2949
|
+
);
|
|
2648
2950
|
});
|
|
2649
2951
|
doc.endorsements?.forEach((end, i) => {
|
|
2650
|
-
|
|
2651
|
-
|
|
2652
|
-
|
|
2653
|
-
|
|
2654
|
-
text: `Endorsement: ${end.title}
|
|
2952
|
+
pushChunk(
|
|
2953
|
+
`endorsement:${i}`,
|
|
2954
|
+
"endorsement",
|
|
2955
|
+
`Endorsement: ${end.title}
|
|
2655
2956
|
${end.content}`.trim(),
|
|
2656
|
-
|
|
2957
|
+
{
|
|
2657
2958
|
endorsementType: end.endorsementType,
|
|
2658
2959
|
formNumber: end.formNumber,
|
|
2659
2960
|
pageStart: end.pageStart,
|
|
2660
2961
|
pageEnd: end.pageEnd,
|
|
2661
2962
|
documentType: doc.type
|
|
2662
|
-
}
|
|
2663
|
-
|
|
2963
|
+
}
|
|
2964
|
+
);
|
|
2664
2965
|
});
|
|
2665
2966
|
doc.exclusions?.forEach((exc, i) => {
|
|
2666
|
-
|
|
2667
|
-
|
|
2668
|
-
|
|
2669
|
-
|
|
2670
|
-
|
|
2671
|
-
${exc.content}`.trim(),
|
|
2672
|
-
metadata: stringMetadata({ formNumber: exc.formNumber, pageNumber: exc.pageNumber, documentType: doc.type })
|
|
2967
|
+
pushChunk(`exclusion:${i}`, "exclusion", `Exclusion: ${exc.name}
|
|
2968
|
+
${exc.content}`.trim(), {
|
|
2969
|
+
formNumber: exc.formNumber,
|
|
2970
|
+
pageNumber: exc.pageNumber,
|
|
2971
|
+
documentType: doc.type
|
|
2673
2972
|
});
|
|
2674
2973
|
});
|
|
2675
2974
|
doc.conditions?.forEach((cond, i) => {
|
|
2676
|
-
|
|
2677
|
-
|
|
2678
|
-
|
|
2679
|
-
|
|
2680
|
-
text: [
|
|
2975
|
+
pushChunk(
|
|
2976
|
+
`condition:${i}`,
|
|
2977
|
+
"condition",
|
|
2978
|
+
[
|
|
2681
2979
|
`Condition: ${cond.name}`,
|
|
2682
2980
|
`Type: ${cond.conditionType}`,
|
|
2683
2981
|
cond.content,
|
|
2684
2982
|
...cond.keyValues?.map((kv) => `${kv.key}: ${kv.value}`) ?? []
|
|
2685
2983
|
].join("\n"),
|
|
2686
|
-
|
|
2984
|
+
{
|
|
2687
2985
|
conditionName: cond.name,
|
|
2688
2986
|
conditionType: cond.conditionType,
|
|
2689
2987
|
pageNumber: cond.pageNumber,
|
|
2690
2988
|
documentType: doc.type
|
|
2691
|
-
}
|
|
2989
|
+
}
|
|
2990
|
+
);
|
|
2991
|
+
});
|
|
2992
|
+
asRecordArray(extendedDoc.definitions).forEach((definition, i) => {
|
|
2993
|
+
const term = firstString(definition, ["term", "name", "title"]) ?? `Definition ${i + 1}`;
|
|
2994
|
+
const body = firstString(definition, ["definition", "content", "text", "meaning"]);
|
|
2995
|
+
pushChunk(
|
|
2996
|
+
`definition:${i}`,
|
|
2997
|
+
"definition",
|
|
2998
|
+
lines([
|
|
2999
|
+
`Definition: ${term}`,
|
|
3000
|
+
body,
|
|
3001
|
+
firstString(definition, ["originalContent", "source"]) ? `Source: ${firstString(definition, ["originalContent", "source"])}` : null
|
|
3002
|
+
]),
|
|
3003
|
+
{
|
|
3004
|
+
term,
|
|
3005
|
+
formNumber: firstString(definition, ["formNumber"]),
|
|
3006
|
+
formTitle: firstString(definition, ["formTitle"]),
|
|
3007
|
+
pageNumber: typeof definition.pageNumber === "number" ? definition.pageNumber : void 0,
|
|
3008
|
+
sectionRef: firstString(definition, ["sectionRef", "sectionTitle"]),
|
|
3009
|
+
documentType: doc.type
|
|
3010
|
+
}
|
|
3011
|
+
);
|
|
3012
|
+
});
|
|
3013
|
+
const coveredReasons = asRecordArray(extendedDoc.coveredReasons ?? extendedDoc.covered_reasons);
|
|
3014
|
+
coveredReasons.forEach((coveredReason, i) => {
|
|
3015
|
+
const title = firstString(coveredReason, ["title", "name", "reason", "peril", "cause"]) ?? `Covered Reason ${i + 1}`;
|
|
3016
|
+
const coverageName = firstString(coveredReason, ["coverageName", "coverage", "coveragePart"]);
|
|
3017
|
+
const reasonNumber = firstString(coveredReason, ["reasonNumber", "number"]);
|
|
3018
|
+
const body = firstString(coveredReason, ["content", "description", "text", "coverageGrant"]);
|
|
3019
|
+
pushChunk(
|
|
3020
|
+
`covered_reason:${i}`,
|
|
3021
|
+
"covered_reason",
|
|
3022
|
+
lines([
|
|
3023
|
+
coverageName ? `Coverage: ${coverageName}` : null,
|
|
3024
|
+
reasonNumber ? `Reason Number: ${reasonNumber}` : null,
|
|
3025
|
+
`Covered Reason: ${title}`,
|
|
3026
|
+
body,
|
|
3027
|
+
firstString(coveredReason, ["originalContent", "source"]) ? `Source: ${firstString(coveredReason, ["originalContent", "source"])}` : null
|
|
3028
|
+
]),
|
|
3029
|
+
{
|
|
3030
|
+
coverageName,
|
|
3031
|
+
reasonNumber,
|
|
3032
|
+
title,
|
|
3033
|
+
formNumber: firstString(coveredReason, ["formNumber"]),
|
|
3034
|
+
formTitle: firstString(coveredReason, ["formTitle"]),
|
|
3035
|
+
pageNumber: typeof coveredReason.pageNumber === "number" ? coveredReason.pageNumber : void 0,
|
|
3036
|
+
sectionRef: firstString(coveredReason, ["sectionRef", "sectionTitle"]),
|
|
3037
|
+
documentType: doc.type
|
|
3038
|
+
}
|
|
3039
|
+
);
|
|
3040
|
+
const conditions = Array.isArray(coveredReason.conditions) ? coveredReason.conditions.filter((condition) => typeof condition === "string" && condition.trim().length > 0) : [];
|
|
3041
|
+
conditions.forEach((condition, conditionIndex) => {
|
|
3042
|
+
pushChunk(
|
|
3043
|
+
`covered_reason:${i}:condition:${conditionIndex}`,
|
|
3044
|
+
"covered_reason",
|
|
3045
|
+
lines([
|
|
3046
|
+
coverageName ? `Coverage: ${coverageName}` : null,
|
|
3047
|
+
reasonNumber ? `Reason Number: ${reasonNumber}` : null,
|
|
3048
|
+
`Covered Reason Condition: ${title}`,
|
|
3049
|
+
condition
|
|
3050
|
+
]),
|
|
3051
|
+
{
|
|
3052
|
+
coverageName,
|
|
3053
|
+
reasonNumber,
|
|
3054
|
+
title,
|
|
3055
|
+
conditionIndex,
|
|
3056
|
+
formNumber: firstString(coveredReason, ["formNumber"]),
|
|
3057
|
+
formTitle: firstString(coveredReason, ["formTitle"]),
|
|
3058
|
+
pageNumber: typeof coveredReason.pageNumber === "number" ? coveredReason.pageNumber : void 0,
|
|
3059
|
+
sectionRef: firstString(coveredReason, ["sectionRef", "sectionTitle"]),
|
|
3060
|
+
documentType: doc.type
|
|
3061
|
+
}
|
|
3062
|
+
);
|
|
2692
3063
|
});
|
|
2693
3064
|
});
|
|
2694
3065
|
if (doc.declarations) {
|
|
@@ -2703,50 +3074,42 @@ ${exc.content}`.trim(),
|
|
|
2703
3074
|
const declMeta = { documentType: doc.type };
|
|
2704
3075
|
if (typeof decl.formType === "string") declMeta.formType = decl.formType;
|
|
2705
3076
|
if (typeof decl.line === "string") declMeta.declarationLine = decl.line;
|
|
2706
|
-
|
|
2707
|
-
|
|
2708
|
-
documentId: docId,
|
|
2709
|
-
type: "declaration",
|
|
2710
|
-
text: `Declarations
|
|
2711
|
-
${declLines.join("\n")}`,
|
|
2712
|
-
metadata: stringMetadata(declMeta)
|
|
2713
|
-
});
|
|
3077
|
+
pushChunk("declaration:0", "declaration", `Declarations
|
|
3078
|
+
${declLines.join("\n")}`, declMeta);
|
|
2714
3079
|
}
|
|
2715
3080
|
}
|
|
2716
3081
|
doc.sections?.forEach((sec, i) => {
|
|
2717
3082
|
const hasSubsections = sec.subsections && sec.subsections.length > 0;
|
|
2718
3083
|
const contentLength = sec.content.length;
|
|
2719
3084
|
if (hasSubsections) {
|
|
2720
|
-
|
|
2721
|
-
|
|
2722
|
-
|
|
2723
|
-
|
|
2724
|
-
text: `Section: ${sec.title}
|
|
3085
|
+
pushChunk(
|
|
3086
|
+
`section:${i}`,
|
|
3087
|
+
"section",
|
|
3088
|
+
`Section: ${sec.title}
|
|
2725
3089
|
${sec.content}`,
|
|
2726
|
-
|
|
3090
|
+
{
|
|
2727
3091
|
sectionType: sec.type,
|
|
2728
3092
|
sectionNumber: sec.sectionNumber,
|
|
2729
3093
|
pageStart: sec.pageStart,
|
|
2730
3094
|
pageEnd: sec.pageEnd,
|
|
2731
3095
|
documentType: doc.type,
|
|
2732
3096
|
hasSubsections: "true"
|
|
2733
|
-
}
|
|
2734
|
-
|
|
3097
|
+
}
|
|
3098
|
+
);
|
|
2735
3099
|
sec.subsections.forEach((sub, j) => {
|
|
2736
|
-
|
|
2737
|
-
|
|
2738
|
-
|
|
2739
|
-
|
|
2740
|
-
text: `${sec.title} > ${sub.title}
|
|
3100
|
+
pushChunk(
|
|
3101
|
+
`section:${i}:sub:${j}`,
|
|
3102
|
+
"section",
|
|
3103
|
+
`${sec.title} > ${sub.title}
|
|
2741
3104
|
${sub.content}`,
|
|
2742
|
-
|
|
3105
|
+
{
|
|
2743
3106
|
sectionType: sec.type,
|
|
2744
3107
|
parentSection: sec.title,
|
|
2745
3108
|
sectionNumber: sub.sectionNumber,
|
|
2746
3109
|
pageNumber: sub.pageNumber,
|
|
2747
3110
|
documentType: doc.type
|
|
2748
|
-
}
|
|
2749
|
-
|
|
3111
|
+
}
|
|
3112
|
+
);
|
|
2750
3113
|
});
|
|
2751
3114
|
} else if (contentLength > 2e3) {
|
|
2752
3115
|
const paragraphs = sec.content.split(/\n\n+/);
|
|
@@ -2754,58 +3117,55 @@ ${sub.content}`,
|
|
|
2754
3117
|
let chunkIndex = 0;
|
|
2755
3118
|
for (const para of paragraphs) {
|
|
2756
3119
|
if (currentChunk.length + para.length > 1e3 && currentChunk.length > 0) {
|
|
2757
|
-
|
|
2758
|
-
|
|
2759
|
-
|
|
2760
|
-
|
|
2761
|
-
text: `Section: ${sec.title} (part ${chunkIndex + 1})
|
|
3120
|
+
pushChunk(
|
|
3121
|
+
`section:${i}:part:${chunkIndex}`,
|
|
3122
|
+
"section",
|
|
3123
|
+
`Section: ${sec.title} (part ${chunkIndex + 1})
|
|
2762
3124
|
${currentChunk.trim()}`,
|
|
2763
|
-
|
|
3125
|
+
{
|
|
2764
3126
|
sectionType: sec.type,
|
|
2765
3127
|
sectionNumber: sec.sectionNumber,
|
|
2766
3128
|
pageStart: sec.pageStart,
|
|
2767
3129
|
pageEnd: sec.pageEnd,
|
|
2768
3130
|
documentType: doc.type,
|
|
2769
3131
|
partIndex: chunkIndex
|
|
2770
|
-
}
|
|
2771
|
-
|
|
3132
|
+
}
|
|
3133
|
+
);
|
|
2772
3134
|
currentChunk = "";
|
|
2773
3135
|
chunkIndex++;
|
|
2774
3136
|
}
|
|
2775
3137
|
currentChunk += (currentChunk ? "\n\n" : "") + para;
|
|
2776
3138
|
}
|
|
2777
3139
|
if (currentChunk.trim()) {
|
|
2778
|
-
|
|
2779
|
-
|
|
2780
|
-
|
|
2781
|
-
|
|
2782
|
-
text: `Section: ${sec.title} (part ${chunkIndex + 1})
|
|
3140
|
+
pushChunk(
|
|
3141
|
+
`section:${i}:part:${chunkIndex}`,
|
|
3142
|
+
"section",
|
|
3143
|
+
`Section: ${sec.title} (part ${chunkIndex + 1})
|
|
2783
3144
|
${currentChunk.trim()}`,
|
|
2784
|
-
|
|
3145
|
+
{
|
|
2785
3146
|
sectionType: sec.type,
|
|
2786
3147
|
sectionNumber: sec.sectionNumber,
|
|
2787
3148
|
pageStart: sec.pageStart,
|
|
2788
3149
|
pageEnd: sec.pageEnd,
|
|
2789
3150
|
documentType: doc.type,
|
|
2790
3151
|
partIndex: chunkIndex
|
|
2791
|
-
}
|
|
2792
|
-
|
|
3152
|
+
}
|
|
3153
|
+
);
|
|
2793
3154
|
}
|
|
2794
3155
|
} else {
|
|
2795
|
-
|
|
2796
|
-
|
|
2797
|
-
|
|
2798
|
-
|
|
2799
|
-
text: `Section: ${sec.title}
|
|
3156
|
+
pushChunk(
|
|
3157
|
+
`section:${i}`,
|
|
3158
|
+
"section",
|
|
3159
|
+
`Section: ${sec.title}
|
|
2800
3160
|
${sec.content}`,
|
|
2801
|
-
|
|
3161
|
+
{
|
|
2802
3162
|
sectionType: sec.type,
|
|
2803
3163
|
sectionNumber: sec.sectionNumber,
|
|
2804
3164
|
pageStart: sec.pageStart,
|
|
2805
3165
|
pageEnd: sec.pageEnd,
|
|
2806
3166
|
documentType: doc.type
|
|
2807
|
-
}
|
|
2808
|
-
|
|
3167
|
+
}
|
|
3168
|
+
);
|
|
2809
3169
|
}
|
|
2810
3170
|
});
|
|
2811
3171
|
doc.locations?.forEach((loc, i) => {
|
|
@@ -3236,6 +3596,13 @@ function dedupeByKey(items, keyFn) {
|
|
|
3236
3596
|
}
|
|
3237
3597
|
return merged;
|
|
3238
3598
|
}
|
|
3599
|
+
function normalizeKeyPart(value) {
|
|
3600
|
+
if (value === void 0 || value === null) return "";
|
|
3601
|
+
return String(value).toLowerCase().replace(/&/g, "and").replace(/[^a-z0-9]+/g, "");
|
|
3602
|
+
}
|
|
3603
|
+
function keyFromParts(...parts) {
|
|
3604
|
+
return parts.map(normalizeKeyPart).join("|");
|
|
3605
|
+
}
|
|
3239
3606
|
function mergeUniqueObjects(existing, incoming, keyFn) {
|
|
3240
3607
|
return dedupeByKey([...existing, ...incoming], keyFn);
|
|
3241
3608
|
}
|
|
@@ -3264,13 +3631,13 @@ function mergeCoverageLimits(existing, incoming) {
|
|
|
3264
3631
|
const merged = mergeShallowPreferPresent(existing, incoming);
|
|
3265
3632
|
const existingCoverages = Array.isArray(existing.coverages) ? existing.coverages : [];
|
|
3266
3633
|
const incomingCoverages = Array.isArray(incoming.coverages) ? incoming.coverages : [];
|
|
3267
|
-
const coverageKey = (coverage) =>
|
|
3268
|
-
|
|
3269
|
-
|
|
3270
|
-
|
|
3271
|
-
|
|
3272
|
-
|
|
3273
|
-
|
|
3634
|
+
const coverageKey = (coverage) => keyFromParts(
|
|
3635
|
+
coverage.name,
|
|
3636
|
+
coverage.limitType,
|
|
3637
|
+
coverage.limit,
|
|
3638
|
+
coverage.deductible,
|
|
3639
|
+
coverage.formNumber
|
|
3640
|
+
);
|
|
3274
3641
|
const byKey = /* @__PURE__ */ new Map();
|
|
3275
3642
|
for (const coverage of [...existingCoverages, ...incomingCoverages]) {
|
|
3276
3643
|
const key = coverageKey(coverage);
|
|
@@ -3284,11 +3651,11 @@ function mergeDeclarations(existing, incoming) {
|
|
|
3284
3651
|
const merged = mergeShallowPreferPresent(existing, incoming);
|
|
3285
3652
|
const existingFields = Array.isArray(existing.fields) ? existing.fields : [];
|
|
3286
3653
|
const incomingFields = Array.isArray(incoming.fields) ? incoming.fields : [];
|
|
3287
|
-
merged.fields = mergeUniqueObjects(existingFields, incomingFields, (field) =>
|
|
3288
|
-
|
|
3289
|
-
|
|
3290
|
-
|
|
3291
|
-
|
|
3654
|
+
merged.fields = mergeUniqueObjects(existingFields, incomingFields, (field) => keyFromParts(
|
|
3655
|
+
field.field,
|
|
3656
|
+
field.value,
|
|
3657
|
+
field.section
|
|
3658
|
+
));
|
|
3292
3659
|
return merged;
|
|
3293
3660
|
}
|
|
3294
3661
|
function mergeArrayPayload(existing, incoming, arrayKey, keyFn) {
|
|
@@ -3298,30 +3665,53 @@ function mergeArrayPayload(existing, incoming, arrayKey, keyFn) {
|
|
|
3298
3665
|
merged[arrayKey] = mergeUniqueObjects(existingItems, incomingItems, keyFn);
|
|
3299
3666
|
return merged;
|
|
3300
3667
|
}
|
|
3668
|
+
function readArray(record, ...keys) {
|
|
3669
|
+
for (const key of keys) {
|
|
3670
|
+
if (Array.isArray(record[key])) return record[key];
|
|
3671
|
+
}
|
|
3672
|
+
return [];
|
|
3673
|
+
}
|
|
3674
|
+
function mergeAliasedArrayPayload(existing, incoming, outputKey, inputKeys, keyFn) {
|
|
3675
|
+
const merged = mergeShallowPreferPresent(existing, incoming);
|
|
3676
|
+
const byKey = /* @__PURE__ */ new Map();
|
|
3677
|
+
for (const item of [
|
|
3678
|
+
...readArray(existing, outputKey, ...inputKeys),
|
|
3679
|
+
...readArray(incoming, outputKey, ...inputKeys)
|
|
3680
|
+
]) {
|
|
3681
|
+
const key = keyFn(item);
|
|
3682
|
+
const current = byKey.get(key);
|
|
3683
|
+
byKey.set(key, current ? mergeShallowPreferPresent(current, item) : item);
|
|
3684
|
+
}
|
|
3685
|
+
merged[outputKey] = [...byKey.values()];
|
|
3686
|
+
for (const key of inputKeys) {
|
|
3687
|
+
if (key !== outputKey) delete merged[key];
|
|
3688
|
+
}
|
|
3689
|
+
return merged;
|
|
3690
|
+
}
|
|
3301
3691
|
function mergeSupplementary(existing, incoming) {
|
|
3302
3692
|
const merged = mergeShallowPreferPresent(existing, incoming);
|
|
3303
3693
|
const mergeContactArray = (arrayKey) => {
|
|
3304
3694
|
const existingItems = Array.isArray(existing[arrayKey]) ? existing[arrayKey] : [];
|
|
3305
3695
|
const incomingItems = Array.isArray(incoming[arrayKey]) ? incoming[arrayKey] : [];
|
|
3306
|
-
merged[arrayKey] = mergeUniqueObjects(existingItems, incomingItems, (item) =>
|
|
3307
|
-
|
|
3308
|
-
|
|
3309
|
-
|
|
3310
|
-
|
|
3311
|
-
|
|
3312
|
-
|
|
3696
|
+
merged[arrayKey] = mergeUniqueObjects(existingItems, incomingItems, (item) => keyFromParts(
|
|
3697
|
+
item.name,
|
|
3698
|
+
item.phone,
|
|
3699
|
+
item.email,
|
|
3700
|
+
item.address,
|
|
3701
|
+
item.type
|
|
3702
|
+
));
|
|
3313
3703
|
};
|
|
3314
3704
|
mergeContactArray("regulatoryContacts");
|
|
3315
3705
|
mergeContactArray("claimsContacts");
|
|
3316
3706
|
mergeContactArray("thirdPartyAdministrators");
|
|
3317
3707
|
const existingFacts = Array.isArray(existing.auxiliaryFacts) ? existing.auxiliaryFacts : [];
|
|
3318
3708
|
const incomingFacts = Array.isArray(incoming.auxiliaryFacts) ? incoming.auxiliaryFacts : [];
|
|
3319
|
-
merged.auxiliaryFacts = mergeUniqueObjects(existingFacts, incomingFacts, (item) =>
|
|
3320
|
-
|
|
3321
|
-
|
|
3322
|
-
|
|
3323
|
-
|
|
3324
|
-
|
|
3709
|
+
merged.auxiliaryFacts = mergeUniqueObjects(existingFacts, incomingFacts, (item) => keyFromParts(
|
|
3710
|
+
item.key,
|
|
3711
|
+
item.value,
|
|
3712
|
+
item.subject,
|
|
3713
|
+
item.context
|
|
3714
|
+
));
|
|
3325
3715
|
return merged;
|
|
3326
3716
|
}
|
|
3327
3717
|
function mergeExtractorResult(extractorName, existing, incoming) {
|
|
@@ -3342,31 +3732,43 @@ function mergeExtractorResult(extractorName, existing, incoming) {
|
|
|
3342
3732
|
return mergeCoverageLimits(current, next);
|
|
3343
3733
|
case "declarations":
|
|
3344
3734
|
return mergeDeclarations(current, next);
|
|
3735
|
+
case "definitions":
|
|
3736
|
+
return mergeArrayPayload(current, next, "definitions", (item) => keyFromParts(
|
|
3737
|
+
item.term ?? item.name ?? item.key,
|
|
3738
|
+
item.pageNumber ?? item.pageStart
|
|
3739
|
+
));
|
|
3740
|
+
case "covered_reasons":
|
|
3741
|
+
return mergeAliasedArrayPayload(current, next, "coveredReasons", ["covered_reasons"], (item) => keyFromParts(
|
|
3742
|
+
item.coverageName ?? item.coverage,
|
|
3743
|
+
item.reasonNumber ?? item.number,
|
|
3744
|
+
item.title ?? item.reason ?? item.name ?? item.cause,
|
|
3745
|
+
item.pageNumber ?? item.pageStart
|
|
3746
|
+
));
|
|
3345
3747
|
case "endorsements":
|
|
3346
|
-
return mergeArrayPayload(current, next, "endorsements", (item) =>
|
|
3347
|
-
|
|
3348
|
-
|
|
3349
|
-
|
|
3350
|
-
|
|
3748
|
+
return mergeArrayPayload(current, next, "endorsements", (item) => keyFromParts(
|
|
3749
|
+
item.formNumber,
|
|
3750
|
+
item.title,
|
|
3751
|
+
item.pageStart
|
|
3752
|
+
));
|
|
3351
3753
|
case "exclusions":
|
|
3352
|
-
return mergeArrayPayload(current, next, "exclusions", (item) =>
|
|
3353
|
-
|
|
3354
|
-
|
|
3355
|
-
|
|
3356
|
-
|
|
3754
|
+
return mergeArrayPayload(current, next, "exclusions", (item) => keyFromParts(
|
|
3755
|
+
item.name,
|
|
3756
|
+
item.formNumber,
|
|
3757
|
+
item.pageNumber
|
|
3758
|
+
));
|
|
3357
3759
|
case "conditions":
|
|
3358
|
-
return mergeArrayPayload(current, next, "conditions", (item) =>
|
|
3359
|
-
|
|
3360
|
-
|
|
3361
|
-
|
|
3362
|
-
|
|
3760
|
+
return mergeArrayPayload(current, next, "conditions", (item) => keyFromParts(
|
|
3761
|
+
item.name,
|
|
3762
|
+
item.conditionType,
|
|
3763
|
+
item.pageNumber
|
|
3764
|
+
));
|
|
3363
3765
|
case "sections":
|
|
3364
|
-
return mergeArrayPayload(current, next, "sections", (item) =>
|
|
3365
|
-
|
|
3366
|
-
|
|
3367
|
-
|
|
3368
|
-
|
|
3369
|
-
|
|
3766
|
+
return mergeArrayPayload(current, next, "sections", (item) => keyFromParts(
|
|
3767
|
+
item.title,
|
|
3768
|
+
item.type,
|
|
3769
|
+
item.pageStart,
|
|
3770
|
+
item.pageEnd
|
|
3771
|
+
));
|
|
3370
3772
|
default:
|
|
3371
3773
|
return mergeShallowPreferPresent(current, next);
|
|
3372
3774
|
}
|
|
@@ -4225,6 +4627,8 @@ var PageExtractorSchema = z20.enum([
|
|
|
4225
4627
|
"carrier_info",
|
|
4226
4628
|
"named_insured",
|
|
4227
4629
|
"coverage_limits",
|
|
4630
|
+
"covered_reasons",
|
|
4631
|
+
"definitions",
|
|
4228
4632
|
"endorsements",
|
|
4229
4633
|
"exclusions",
|
|
4230
4634
|
"conditions",
|
|
@@ -4271,6 +4675,8 @@ Available extractors:
|
|
|
4271
4675
|
- carrier_info
|
|
4272
4676
|
- named_insured
|
|
4273
4677
|
- coverage_limits
|
|
4678
|
+
- covered_reasons
|
|
4679
|
+
- definitions
|
|
4274
4680
|
- endorsements
|
|
4275
4681
|
- exclusions
|
|
4276
4682
|
- conditions
|
|
@@ -4284,6 +4690,8 @@ Rules:
|
|
|
4284
4690
|
- Identify the broad section or form context first, then assign focused extractors within that context.
|
|
4285
4691
|
- Use specific extractors for declarations, schedules, endorsements, exclusions, conditions, premium pages, and loss runs.
|
|
4286
4692
|
- Use "sections" for pages that contain substantive policy text or mixed content that should still be preserved as raw sections.
|
|
4693
|
+
- Use "definitions" for policy-form pages containing defined terms, definitions sections, or term meaning clauses.
|
|
4694
|
+
- Use "covered_reasons" for pages listing covered causes of loss, covered reasons, covered perils, named perils, covered events, or covered loss triggers.
|
|
4287
4695
|
- Avoid assigning broad ranges mentally; decide page by page.
|
|
4288
4696
|
- A page may map to multiple extractors if it legitimately contains multiple relevant sections.
|
|
4289
4697
|
- Prefer declarations and schedules for numeric limits/deductibles over later generic form wording.
|
|
@@ -4291,6 +4699,7 @@ Rules:
|
|
|
4291
4699
|
- Do NOT assign "coverage_limits" for generic policy-form or endorsement text that merely explains how limits, deductibles, waiting periods, or coinsurance work, or that says values are "shown in the declarations", "shown in the schedule", "as stated", or "if applicable".
|
|
4292
4700
|
- Headings like "Limits of Insurance", "Deductible", "Coinsurance", "Loss Conditions", or "Definitions" inside a policy form usually indicate form language, not declarations or schedules.
|
|
4293
4701
|
- Continuation pages near the end of a form should stay mapped to "sections" plus "conditions"/"exclusions" when applicable, even if they mention limits or deductibles.
|
|
4702
|
+
- Covered causes/reasons and definitions often span a whole form section; tag every substantive page in that section, not just the heading page.
|
|
4294
4703
|
- When a form inventory entry identifies a page range as a specific form type (e.g., endorsement, coverage, application), use that classification to guide your extractor choice. Do not assign "coverage_limits" to pages the inventory identifies as endorsement or condition/exclusion forms unless the page contains actual schedule values.
|
|
4295
4704
|
- Do not tag a page with "exclusions" or "conditions" if it only contains a table of contents, page-number reference, running header/footer, or a heading that points to another page without substantive wording.
|
|
4296
4705
|
- If a page appears to be part of a larger exclusion, conditions, or endorsement section within the same form, keep the assignment consistent across nearby pages in that section rather than isolating a single page fragment.
|
|
@@ -4334,12 +4743,15 @@ var ReviewResultSchema = z21.object({
|
|
|
4334
4743
|
description: z21.string()
|
|
4335
4744
|
}))
|
|
4336
4745
|
});
|
|
4337
|
-
function buildReviewPrompt(templateExpected, extractedKeys, extractionSummary, pageMapSummary) {
|
|
4338
|
-
return `You are
|
|
4746
|
+
function buildReviewPrompt(templateExpected, extractedKeys, extractionSummary, pageMapSummary, extractorCatalog) {
|
|
4747
|
+
return `You are the extraction coordinator for an insurance-document agent system. Review the current extraction state, decide whether the result is complete enough, and choose any follow-up extractor tasks needed to improve it.
|
|
4339
4748
|
|
|
4340
4749
|
EXPECTED FIELDS (from document type template):
|
|
4341
4750
|
${templateExpected.map((f) => `- ${f}`).join("\n")}
|
|
4342
4751
|
|
|
4752
|
+
AVAILABLE FOLLOW-UP EXTRACTORS:
|
|
4753
|
+
${extractorCatalog}
|
|
4754
|
+
|
|
4343
4755
|
FIELDS ALREADY EXTRACTED:
|
|
4344
4756
|
${extractedKeys.map((f) => `- ${f}`).join("\n")}
|
|
4345
4757
|
|
|
@@ -4353,15 +4765,21 @@ Determine:
|
|
|
4353
4765
|
1. Is the extraction complete enough?
|
|
4354
4766
|
2. What fields are missing?
|
|
4355
4767
|
3. What quality issues are present?
|
|
4356
|
-
4.
|
|
4768
|
+
4. Which follow-up extraction tasks, if any, should be dispatched?
|
|
4357
4769
|
|
|
4358
4770
|
Mark the extraction as NOT complete if any of these are true:
|
|
4359
4771
|
- required fields are missing
|
|
4360
4772
|
- extracted values are generic placeholders like "shown in declarations", "per schedule", "if applicable", "as stated"
|
|
4361
4773
|
- coverage limits or deductibles appear to come from generic form language instead of declaration/schedule-specific values
|
|
4774
|
+
- definitions pages were mapped but no definition records or definition-type sections were extracted
|
|
4775
|
+
- covered causes/reasons pages were mapped but no covered reason, covered peril, covered cause, or matching section records were extracted
|
|
4362
4776
|
- page assignments suggest declaration, schedule, endorsement, exclusion, or condition pages were not actually extracted with the matching focused extractor
|
|
4363
4777
|
- a focused extractor exists but returned too little substance for the relevant pages
|
|
4364
4778
|
|
|
4779
|
+
When reviewing CURRENT EXTRACTION SUMMARY, compare the page-map counts to extracted counts. If an assigned extractor produced no useful records, produce a quality issue and a narrow follow-up task over the mapped page range.
|
|
4780
|
+
|
|
4781
|
+
Choose follow-up tasks from AVAILABLE FOLLOW-UP EXTRACTORS. You may dispatch any listed extractor when the page map, current extraction summary, or quality evidence shows that the focused extraction is missing, generic, referential, or too thin. Do not invent extractor names.
|
|
4782
|
+
|
|
4365
4783
|
Return JSON:
|
|
4366
4784
|
{
|
|
4367
4785
|
"complete": boolean,
|
|
@@ -4372,7 +4790,7 @@ Return JSON:
|
|
|
4372
4790
|
]
|
|
4373
4791
|
}
|
|
4374
4792
|
|
|
4375
|
-
Use the page map to target follow-up extraction pages precisely. Prefer narrow, declaration/schedule-focused follow-up tasks over broad page ranges.
|
|
4793
|
+
Use the page map to target follow-up extraction pages precisely. Prefer narrow, declaration/schedule-focused follow-up tasks over broad page ranges. If no additional model work is likely to improve the extraction, return an empty additionalTasks array.
|
|
4376
4794
|
|
|
4377
4795
|
Respond with JSON only.`;
|
|
4378
4796
|
}
|
|
@@ -4907,6 +5325,7 @@ var SectionsSchema = z32.object({
|
|
|
4907
5325
|
"policy_form",
|
|
4908
5326
|
"endorsement",
|
|
4909
5327
|
"application",
|
|
5328
|
+
"covered_reason",
|
|
4910
5329
|
"exclusion",
|
|
4911
5330
|
"condition",
|
|
4912
5331
|
"definition",
|
|
@@ -4930,6 +5349,7 @@ For each section, classify its type:
|
|
|
4930
5349
|
- "policy_form" \u2014 named ISO or proprietary forms (e.g. CG 00 01, IL 00 17). All sections within a named form should be typed as "policy_form"
|
|
4931
5350
|
- "endorsement" \u2014 standalone endorsements modifying the base policy
|
|
4932
5351
|
- "application" \u2014 the insurance application or supplemental application
|
|
5352
|
+
- "covered_reason" \u2014 affirmative grants of coverage, covered causes of loss, covered perils, or named covered events
|
|
4933
5353
|
- "insuring_agreement" \u2014 the insuring agreement clause (only if standalone, not inside a policy_form)
|
|
4934
5354
|
- "exclusion", "condition", "definition" \u2014 for standalone sections only
|
|
4935
5355
|
- "schedule" \u2014 coverage or rating schedules
|
|
@@ -5005,7 +5425,155 @@ For auxiliaryFacts:
|
|
|
5005
5425
|
Return JSON only.`;
|
|
5006
5426
|
}
|
|
5007
5427
|
|
|
5428
|
+
// src/prompts/extractors/definitions.ts
|
|
5429
|
+
import { z as z34 } from "zod";
|
|
5430
|
+
var DefinitionsSchema = z34.object({
|
|
5431
|
+
definitions: z34.array(
|
|
5432
|
+
z34.object({
|
|
5433
|
+
term: z34.string().describe("Defined term exactly as shown in the document"),
|
|
5434
|
+
definition: z34.string().describe("Full verbatim definition text, preserving original wording"),
|
|
5435
|
+
pageNumber: z34.number().optional().describe("Original document page number"),
|
|
5436
|
+
formNumber: z34.string().optional().describe("Form number where this definition appears"),
|
|
5437
|
+
formTitle: z34.string().optional().describe("Form title where this definition appears"),
|
|
5438
|
+
sectionRef: z34.string().optional().describe("Definition section heading or subsection reference"),
|
|
5439
|
+
originalContent: z34.string().optional().describe("Short verbatim source snippet containing the term and definition")
|
|
5440
|
+
})
|
|
5441
|
+
).describe("All substantive insurance definitions found in the document")
|
|
5442
|
+
});
|
|
5443
|
+
function buildDefinitionsPrompt() {
|
|
5444
|
+
return `You are an expert insurance document analyst. Extract ALL substantive defined terms from this document. Preserve original wording verbatim.
|
|
5445
|
+
|
|
5446
|
+
For EACH definition, extract:
|
|
5447
|
+
- term: defined term exactly as shown \u2014 REQUIRED
|
|
5448
|
+
- definition: full verbatim definition text including all included subparts \u2014 REQUIRED
|
|
5449
|
+
- pageNumber: original document page number where the definition appears
|
|
5450
|
+
- formNumber: form number where the definition appears, if shown
|
|
5451
|
+
- formTitle: form title where the definition appears, if shown
|
|
5452
|
+
- sectionRef: heading such as "Definitions", "Words and Phrases Defined", or coverage-specific definition section
|
|
5453
|
+
- originalContent: short verbatim source snippet containing the term and definition
|
|
5454
|
+
|
|
5455
|
+
Focus on:
|
|
5456
|
+
- Terms in sections titled Definitions, Words and Phrases Defined, Glossary, or similar
|
|
5457
|
+
- Coverage-specific defined terms embedded in insuring agreements, endorsements, exclusions, or conditions
|
|
5458
|
+
- Multi-part definitions with numbered, lettered, or bulleted clauses
|
|
5459
|
+
- Definitions that affect coverage triggers, covered property, insured status, exclusions, limits, or duties
|
|
5460
|
+
|
|
5461
|
+
Critical rules:
|
|
5462
|
+
- Preserve the original content. Do not paraphrase content.
|
|
5463
|
+
- Keep all subparts of a definition together in one item when they define the same term.
|
|
5464
|
+
- Ignore table-of-contents entries, running headers/footers, indexes, and cross-references that do not include substantive definition text.
|
|
5465
|
+
- Do not emit generic headings like "Definitions" as a term unless the page defines an actual term.
|
|
5466
|
+
- Always include pageNumber when the definition appears on a specific page in the supplied document chunk.
|
|
5467
|
+
- Use definition as the canonical full text. Do not return a separate content field.
|
|
5468
|
+
|
|
5469
|
+
Return JSON only.`;
|
|
5470
|
+
}
|
|
5471
|
+
|
|
5472
|
+
// src/prompts/extractors/covered-reasons.ts
|
|
5473
|
+
import { z as z35 } from "zod";
|
|
5474
|
+
var CoveredReasonsSchema = z35.object({
|
|
5475
|
+
coveredReasons: z35.array(
|
|
5476
|
+
z35.object({
|
|
5477
|
+
coverageName: z35.string().describe("Coverage, coverage part, or form this covered reason belongs to"),
|
|
5478
|
+
reasonNumber: z35.string().optional().describe("Source number or letter for the covered reason, if shown"),
|
|
5479
|
+
title: z35.string().optional().describe("Covered reason title, peril, cause of loss, trigger, or short name"),
|
|
5480
|
+
content: z35.string().describe("Full verbatim covered-reason or insuring-agreement text"),
|
|
5481
|
+
conditions: z35.array(z35.string()).optional().describe("Conditions, timing rules, documentation requirements, or prerequisites attached to this covered reason"),
|
|
5482
|
+
exceptions: z35.array(z35.string()).optional().describe("Exceptions or limitations attached to this covered reason"),
|
|
5483
|
+
appliesTo: z35.array(z35.string()).optional().describe("Covered property, persons, autos, locations, operations, or coverage parts this reason applies to"),
|
|
5484
|
+
pageNumber: z35.number().optional().describe("Original document page number"),
|
|
5485
|
+
formNumber: z35.string().optional().describe("Form number where this covered reason appears"),
|
|
5486
|
+
formTitle: z35.string().optional().describe("Form title where this covered reason appears"),
|
|
5487
|
+
sectionRef: z35.string().optional().describe("Section heading where this covered reason appears"),
|
|
5488
|
+
originalContent: z35.string().optional().describe("Short verbatim source snippet used for this covered reason")
|
|
5489
|
+
})
|
|
5490
|
+
).describe("Covered causes, perils, triggers, or reasons that affirmatively grant coverage")
|
|
5491
|
+
});
|
|
5492
|
+
function buildCoveredReasonsPrompt() {
|
|
5493
|
+
return `You are an expert insurance document analyst. Extract ALL covered reasons from this document. Preserve original wording verbatim.
|
|
5494
|
+
|
|
5495
|
+
A covered reason is affirmative coverage language explaining why, when, or for what cause the insurer will pay. This may be called a covered peril, covered cause of loss, accident, occurrence, loss trigger, additional coverage, expense, or insuring agreement grant.
|
|
5496
|
+
|
|
5497
|
+
For EACH covered reason, extract:
|
|
5498
|
+
- coverageName: coverage, coverage part, or form this covered reason belongs to \u2014 REQUIRED
|
|
5499
|
+
- reasonNumber: source number or letter for the covered reason, if shown
|
|
5500
|
+
- title: covered peril, cause of loss, trigger, or short name
|
|
5501
|
+
- content: full verbatim covered-reason or insuring-agreement text \u2014 REQUIRED
|
|
5502
|
+
- conditions: conditions, timing rules, documentation requirements, or prerequisites attached to this covered reason
|
|
5503
|
+
- exceptions: exceptions or limitations attached to this covered reason
|
|
5504
|
+
- appliesTo: covered property, persons, autos, locations, operations, or coverage parts this reason applies to
|
|
5505
|
+
- pageNumber: original document page number where this covered reason appears
|
|
5506
|
+
- formNumber: form number where this covered reason appears, if shown
|
|
5507
|
+
- formTitle: form title where this covered reason appears, if shown
|
|
5508
|
+
- sectionRef: heading where this covered reason appears
|
|
5509
|
+
- originalContent: short verbatim source snippet used for this covered reason
|
|
5510
|
+
|
|
5511
|
+
Focus on:
|
|
5512
|
+
- Named perils and covered causes of loss
|
|
5513
|
+
- Insuring agreement grants and coverage triggers
|
|
5514
|
+
- Additional coverages and coverage extensions that state when payment applies
|
|
5515
|
+
- Personal lines phrases such as fire, lightning, windstorm, hail, theft, collision, comprehensive, or accidental direct physical loss
|
|
5516
|
+
- Commercial lines phrases such as bodily injury, property damage, personal and advertising injury, employee dishonesty, computer fraud, equipment breakdown, or professional services acts
|
|
5517
|
+
|
|
5518
|
+
Critical rules:
|
|
5519
|
+
- Preserve the original content. Do not paraphrase content.
|
|
5520
|
+
- Extract affirmative coverage grants, not exclusions, conditions, or declarations-only limit rows.
|
|
5521
|
+
- Do not emit a covered reason from a table-of-contents entry, running header/footer, or reference that only points elsewhere.
|
|
5522
|
+
- If a covered reason includes exceptions or limitations in the same clause, keep them in content and also list them in exceptions when they can be separated cleanly.
|
|
5523
|
+
- Always include pageNumber when the covered reason appears on a specific page in the supplied document chunk.
|
|
5524
|
+
- Preserve coverage grouping. Do not merge separate coverage parts into one generic list.
|
|
5525
|
+
|
|
5526
|
+
Return JSON only.`;
|
|
5527
|
+
}
|
|
5528
|
+
|
|
5008
5529
|
// src/prompts/extractors/index.ts
|
|
5530
|
+
function asRecord(data) {
|
|
5531
|
+
return data && typeof data === "object" ? data : void 0;
|
|
5532
|
+
}
|
|
5533
|
+
function getSections2(data) {
|
|
5534
|
+
const sections = asRecord(data)?.sections;
|
|
5535
|
+
return Array.isArray(sections) ? sections : [];
|
|
5536
|
+
}
|
|
5537
|
+
function isCoveredReasonsEmpty(data) {
|
|
5538
|
+
const record = asRecord(data);
|
|
5539
|
+
if (!record) return true;
|
|
5540
|
+
const coveredReasons = Array.isArray(record.coveredReasons) ? record.coveredReasons : Array.isArray(record.covered_reasons) ? record.covered_reasons : [];
|
|
5541
|
+
return coveredReasons.length === 0;
|
|
5542
|
+
}
|
|
5543
|
+
function isDefinitionsEmpty(data) {
|
|
5544
|
+
const definitions = asRecord(data)?.definitions;
|
|
5545
|
+
return !Array.isArray(definitions) || definitions.length === 0;
|
|
5546
|
+
}
|
|
5547
|
+
function sectionLooksLikeCoveredReason(section) {
|
|
5548
|
+
const type = String(section.type ?? "").toLowerCase();
|
|
5549
|
+
const title = String(section.title ?? "").toLowerCase();
|
|
5550
|
+
return type === "covered_reason" || title.includes("covered cause") || title.includes("covered reason") || title.includes("covered peril") || title.includes("named peril") || title.includes("insuring agreement");
|
|
5551
|
+
}
|
|
5552
|
+
function deriveCoveredReasonsFromSections(data) {
|
|
5553
|
+
const coveredReasons = getSections2(data).filter(sectionLooksLikeCoveredReason).map((section) => ({
|
|
5554
|
+
coverageName: String(section.coverageName ?? section.formTitle ?? section.title ?? "Covered Reasons"),
|
|
5555
|
+
title: typeof section.title === "string" ? section.title : void 0,
|
|
5556
|
+
content: String(section.content ?? ""),
|
|
5557
|
+
pageNumber: typeof section.pageStart === "number" ? section.pageStart : void 0,
|
|
5558
|
+
formNumber: typeof section.formNumber === "string" ? section.formNumber : void 0,
|
|
5559
|
+
formTitle: typeof section.formTitle === "string" ? section.formTitle : void 0,
|
|
5560
|
+
sectionRef: typeof section.sectionNumber === "string" ? section.sectionNumber : void 0,
|
|
5561
|
+
originalContent: typeof section.content === "string" ? section.content.slice(0, 500) : void 0
|
|
5562
|
+
})).filter((coveredReason) => coveredReason.content.trim().length > 0);
|
|
5563
|
+
return coveredReasons.length > 0 ? { coveredReasons } : void 0;
|
|
5564
|
+
}
|
|
5565
|
+
function deriveDefinitionsFromSections(data) {
|
|
5566
|
+
const definitions = getSections2(data).filter((section) => String(section.type ?? "").toLowerCase() === "definition").map((section) => ({
|
|
5567
|
+
term: String(section.title ?? "Definitions"),
|
|
5568
|
+
definition: String(section.content ?? ""),
|
|
5569
|
+
pageNumber: typeof section.pageStart === "number" ? section.pageStart : void 0,
|
|
5570
|
+
formNumber: typeof section.formNumber === "string" ? section.formNumber : void 0,
|
|
5571
|
+
formTitle: typeof section.formTitle === "string" ? section.formTitle : void 0,
|
|
5572
|
+
sectionRef: typeof section.sectionNumber === "string" ? section.sectionNumber : void 0,
|
|
5573
|
+
originalContent: typeof section.content === "string" ? section.content.slice(0, 500) : void 0
|
|
5574
|
+
})).filter((definition) => definition.definition.trim().length > 0);
|
|
5575
|
+
return definitions.length > 0 ? { definitions } : void 0;
|
|
5576
|
+
}
|
|
5009
5577
|
var EXTRACTORS = {
|
|
5010
5578
|
carrier_info: { buildPrompt: buildCarrierInfoPrompt, schema: CarrierInfoSchema, maxTokens: 2048 },
|
|
5011
5579
|
named_insured: { buildPrompt: buildNamedInsuredPrompt, schema: NamedInsuredSchema2, maxTokens: 2048 },
|
|
@@ -5017,28 +5585,54 @@ var EXTRACTORS = {
|
|
|
5017
5585
|
declarations: { buildPrompt: buildDeclarationsPrompt, schema: DeclarationsExtractSchema, maxTokens: 8192 },
|
|
5018
5586
|
loss_history: { buildPrompt: buildLossHistoryPrompt, schema: LossHistorySchema, maxTokens: 4096 },
|
|
5019
5587
|
sections: { buildPrompt: buildSectionsPrompt, schema: SectionsSchema, maxTokens: 8192 },
|
|
5020
|
-
supplementary: { buildPrompt: buildSupplementaryPrompt, schema: SupplementarySchema, maxTokens: 2048 }
|
|
5588
|
+
supplementary: { buildPrompt: buildSupplementaryPrompt, schema: SupplementarySchema, maxTokens: 2048 },
|
|
5589
|
+
definitions: {
|
|
5590
|
+
buildPrompt: buildDefinitionsPrompt,
|
|
5591
|
+
schema: DefinitionsSchema,
|
|
5592
|
+
maxTokens: 8192,
|
|
5593
|
+
fallback: {
|
|
5594
|
+
extractorName: "sections",
|
|
5595
|
+
isEmpty: isDefinitionsEmpty,
|
|
5596
|
+
deriveFocusedResult: deriveDefinitionsFromSections
|
|
5597
|
+
}
|
|
5598
|
+
},
|
|
5599
|
+
covered_reasons: {
|
|
5600
|
+
buildPrompt: buildCoveredReasonsPrompt,
|
|
5601
|
+
schema: CoveredReasonsSchema,
|
|
5602
|
+
maxTokens: 8192,
|
|
5603
|
+
fallback: {
|
|
5604
|
+
extractorName: "sections",
|
|
5605
|
+
isEmpty: isCoveredReasonsEmpty,
|
|
5606
|
+
deriveFocusedResult: deriveCoveredReasonsFromSections
|
|
5607
|
+
}
|
|
5608
|
+
}
|
|
5021
5609
|
};
|
|
5022
5610
|
function getExtractor(name) {
|
|
5023
5611
|
return EXTRACTORS[name];
|
|
5024
5612
|
}
|
|
5613
|
+
function formatExtractorCatalogForPrompt() {
|
|
5614
|
+
return Object.entries(EXTRACTORS).map(([name, extractor]) => {
|
|
5615
|
+
const fallback = extractor.fallback ? `; fallback: ${extractor.fallback.extractorName}` : "";
|
|
5616
|
+
return `- ${name} (maxTokens: ${extractor.maxTokens ?? 4096}${fallback})`;
|
|
5617
|
+
}).join("\n");
|
|
5618
|
+
}
|
|
5025
5619
|
|
|
5026
5620
|
// src/extraction/resolve-referential.ts
|
|
5027
|
-
import { z as
|
|
5621
|
+
import { z as z37 } from "zod";
|
|
5028
5622
|
|
|
5029
5623
|
// src/prompts/extractors/referential-lookup.ts
|
|
5030
|
-
import { z as
|
|
5031
|
-
var ReferentialLookupSchema =
|
|
5032
|
-
resolvedCoverages:
|
|
5033
|
-
|
|
5034
|
-
coverageName:
|
|
5035
|
-
resolvedLimit:
|
|
5624
|
+
import { z as z36 } from "zod";
|
|
5625
|
+
var ReferentialLookupSchema = z36.object({
|
|
5626
|
+
resolvedCoverages: z36.array(
|
|
5627
|
+
z36.object({
|
|
5628
|
+
coverageName: z36.string().describe("The coverage name that was referenced"),
|
|
5629
|
+
resolvedLimit: z36.string().optional().describe("The concrete limit value found, if any"),
|
|
5036
5630
|
resolvedLimitValueType: CoverageValueTypeSchema.optional(),
|
|
5037
|
-
resolvedDeductible:
|
|
5631
|
+
resolvedDeductible: z36.string().optional().describe("The concrete deductible value found, if any"),
|
|
5038
5632
|
resolvedDeductibleValueType: CoverageValueTypeSchema.optional(),
|
|
5039
|
-
pageNumber:
|
|
5040
|
-
originalContent:
|
|
5041
|
-
confidence:
|
|
5633
|
+
pageNumber: z36.number().optional().describe("Page where the resolved value was found"),
|
|
5634
|
+
originalContent: z36.string().optional().describe("Verbatim source text for the resolved value"),
|
|
5635
|
+
confidence: z36.enum(["high", "medium", "low"]).describe("Confidence in the resolution")
|
|
5042
5636
|
})
|
|
5043
5637
|
)
|
|
5044
5638
|
});
|
|
@@ -5073,18 +5667,124 @@ Your task:
|
|
|
5073
5667
|
Return JSON only.`;
|
|
5074
5668
|
}
|
|
5075
5669
|
|
|
5076
|
-
// src/extraction/
|
|
5670
|
+
// src/extraction/heuristics.ts
|
|
5077
5671
|
function looksReferential(value) {
|
|
5078
5672
|
if (typeof value !== "string") return false;
|
|
5079
5673
|
const normalized = value.toLowerCase();
|
|
5080
5674
|
return normalized.includes("shown in the declarations") || normalized.includes("shown in declarations") || normalized.includes("shown in the schedule") || normalized.includes("as stated") || normalized.includes("if applicable");
|
|
5081
5675
|
}
|
|
5676
|
+
function looksCoveredReasonSection(section) {
|
|
5677
|
+
const title = String(section.title ?? "").toLowerCase();
|
|
5678
|
+
const type = String(section.type ?? "").toLowerCase();
|
|
5679
|
+
return type === "covered_reason" || title.includes("covered cause") || title.includes("covered reason") || title.includes("covered peril");
|
|
5680
|
+
}
|
|
5681
|
+
|
|
5682
|
+
// src/extraction/referential-workflow.ts
|
|
5683
|
+
function normalizeText(value) {
|
|
5684
|
+
return typeof value === "string" ? value.trim().toLowerCase() : "";
|
|
5685
|
+
}
|
|
5686
|
+
function containsTarget(value, target) {
|
|
5687
|
+
const normalizedValue = normalizeText(value);
|
|
5688
|
+
return Boolean(normalizedValue && target && normalizedValue.includes(target));
|
|
5689
|
+
}
|
|
5690
|
+
function pageRangeFrom(startPage, endPage) {
|
|
5691
|
+
if (typeof startPage !== "number" || !Number.isFinite(startPage) || startPage <= 0) {
|
|
5692
|
+
return void 0;
|
|
5693
|
+
}
|
|
5694
|
+
const normalizedEnd = typeof endPage === "number" && Number.isFinite(endPage) && endPage >= startPage ? endPage : startPage;
|
|
5695
|
+
return { startPage, endPage: normalizedEnd };
|
|
5696
|
+
}
|
|
5697
|
+
function parseReferentialTarget(rawTarget) {
|
|
5698
|
+
const raw = rawTarget?.trim() || "unknown";
|
|
5699
|
+
const normalized = raw.toLowerCase();
|
|
5700
|
+
if (normalized === "unknown") return { raw, normalized, kind: "unknown" };
|
|
5701
|
+
if (/declarations?|dec\b|decs\b/.test(normalized)) return { raw, normalized, kind: "declarations" };
|
|
5702
|
+
if (/schedule|scheduled/.test(normalized)) return { raw, normalized, kind: "schedule" };
|
|
5703
|
+
if (/\bitem\b/.test(normalized)) return { raw, normalized, kind: "item" };
|
|
5704
|
+
if (/premises?|location|building/.test(normalized)) return { raw, normalized, kind: "premises" };
|
|
5705
|
+
if (/\bsection\b/.test(normalized)) return { raw, normalized, kind: "section" };
|
|
5706
|
+
if (/policy|coverage\s+part|coverage\s+form/.test(normalized)) return { raw, normalized, kind: "policy" };
|
|
5707
|
+
return { raw, normalized, kind: "unknown" };
|
|
5708
|
+
}
|
|
5709
|
+
function findLocalReferentialPages(params) {
|
|
5710
|
+
const targetLower = params.referenceTarget.toLowerCase();
|
|
5711
|
+
for (const section of params.sections) {
|
|
5712
|
+
if (containsTarget(section.title, targetLower)) {
|
|
5713
|
+
const range = pageRangeFrom(section.pageStart, section.pageEnd);
|
|
5714
|
+
if (range) return range;
|
|
5715
|
+
}
|
|
5716
|
+
}
|
|
5717
|
+
for (const form of params.formInventory) {
|
|
5718
|
+
const titleMatch = containsTarget(form.title, targetLower);
|
|
5719
|
+
const typeMatch = containsTarget(form.formType, targetLower);
|
|
5720
|
+
const numberMatch = containsTarget(form.formNumber, targetLower);
|
|
5721
|
+
if (titleMatch || typeMatch || numberMatch) {
|
|
5722
|
+
const range = pageRangeFrom(form.pageStart, form.pageEnd);
|
|
5723
|
+
if (range) return range;
|
|
5724
|
+
}
|
|
5725
|
+
}
|
|
5726
|
+
return void 0;
|
|
5727
|
+
}
|
|
5728
|
+
function findDeclarationsSchedulePages(parsedTarget, formInventory) {
|
|
5729
|
+
for (const form of formInventory) {
|
|
5730
|
+
const formType = normalizeText(form.formType);
|
|
5731
|
+
const title = normalizeText(form.title);
|
|
5732
|
+
const matchesDeclarations = formType === "declarations" || /declarations?|dec\b|decs\b/.test(title);
|
|
5733
|
+
const matchesSchedule = /schedule|scheduled|coverage/.test(title) || formType === "coverage";
|
|
5734
|
+
const shouldUse = parsedTarget.kind === "declarations" ? matchesDeclarations : parsedTarget.kind === "schedule" || parsedTarget.kind === "item" || parsedTarget.kind === "premises" ? matchesSchedule || matchesDeclarations : parsedTarget.kind === "policy" ? matchesDeclarations || matchesSchedule : false;
|
|
5735
|
+
if (shouldUse) {
|
|
5736
|
+
const range = pageRangeFrom(form.pageStart, form.pageEnd);
|
|
5737
|
+
if (range) return range;
|
|
5738
|
+
}
|
|
5739
|
+
}
|
|
5740
|
+
return void 0;
|
|
5741
|
+
}
|
|
5742
|
+
function findSectionPages(parsedTarget, sections) {
|
|
5743
|
+
for (const section of sections) {
|
|
5744
|
+
const title = normalizeText(section.title);
|
|
5745
|
+
const type = normalizeText(section.type);
|
|
5746
|
+
const matchesKind = parsedTarget.kind === "declarations" && (type === "declarations" || /declarations?/.test(title)) || parsedTarget.kind === "schedule" && (type === "schedule" || /schedule|scheduled/.test(title)) || parsedTarget.kind === "premises" && /premises?|location|building/.test(title) || parsedTarget.kind === "item" && /\bitem\b|schedule|scheduled/.test(title) || parsedTarget.kind === "section" && containsTarget(title, parsedTarget.normalized);
|
|
5747
|
+
if (matchesKind) {
|
|
5748
|
+
const range = pageRangeFrom(section.pageStart, section.pageEnd);
|
|
5749
|
+
if (range) return range;
|
|
5750
|
+
}
|
|
5751
|
+
}
|
|
5752
|
+
return void 0;
|
|
5753
|
+
}
|
|
5754
|
+
function decideReferentialResolutionAction(params) {
|
|
5755
|
+
if (params.localPageRange) {
|
|
5756
|
+
return { kind: "lookup_pages", source: "local", pageRange: params.localPageRange };
|
|
5757
|
+
}
|
|
5758
|
+
const parsedTarget = parseReferentialTarget(params.referenceTarget);
|
|
5759
|
+
const declarationsScheduleRange = findDeclarationsSchedulePages(parsedTarget, params.formInventory);
|
|
5760
|
+
if (declarationsScheduleRange) {
|
|
5761
|
+
return {
|
|
5762
|
+
kind: "lookup_pages",
|
|
5763
|
+
source: "declarations_schedule",
|
|
5764
|
+
pageRange: declarationsScheduleRange
|
|
5765
|
+
};
|
|
5766
|
+
}
|
|
5767
|
+
const sectionRange = findSectionPages(parsedTarget, params.sections);
|
|
5768
|
+
if (sectionRange) {
|
|
5769
|
+
return { kind: "lookup_pages", source: "sections", pageRange: sectionRange };
|
|
5770
|
+
}
|
|
5771
|
+
if (parsedTarget.kind === "unknown") {
|
|
5772
|
+
return { kind: "skip", reason: "no concrete reference target" };
|
|
5773
|
+
}
|
|
5774
|
+
return { kind: "page_location" };
|
|
5775
|
+
}
|
|
5776
|
+
|
|
5777
|
+
// src/extraction/resolve-referential.ts
|
|
5082
5778
|
function parseReferenceTarget(text) {
|
|
5083
5779
|
if (typeof text !== "string") return void 0;
|
|
5084
5780
|
const normalized = text.trim();
|
|
5085
5781
|
if (!normalized) return void 0;
|
|
5086
5782
|
const sectionMatch = normalized.match(/\b(Section\s+\d+[A-Za-z]?)/i);
|
|
5087
5783
|
if (sectionMatch) return sectionMatch[1];
|
|
5784
|
+
const itemMatch = normalized.match(/\b(Item\s+\d+[A-Za-z]?)/i);
|
|
5785
|
+
if (itemMatch) return itemMatch[1];
|
|
5786
|
+
const premisesMatch = normalized.match(/\b(Premises?(?:\s+No\.?\s*\d+[A-Za-z]?|\s+\d+[A-Za-z]?)?)/i);
|
|
5787
|
+
if (premisesMatch) return premisesMatch[1].trim();
|
|
5088
5788
|
if (/declarations/i.test(normalized)) return "Declarations";
|
|
5089
5789
|
const scheduleMatch = normalized.match(/\b(Schedule(?:\s+of\s+[A-Za-z ]+)?)/i);
|
|
5090
5790
|
if (scheduleMatch) return scheduleMatch[1].trim();
|
|
@@ -5097,9 +5797,9 @@ function parseReferenceTarget(text) {
|
|
|
5097
5797
|
if (/if applicable/i.test(normalized)) return void 0;
|
|
5098
5798
|
return void 0;
|
|
5099
5799
|
}
|
|
5100
|
-
var PageLocationSchema =
|
|
5101
|
-
startPage:
|
|
5102
|
-
endPage:
|
|
5800
|
+
var PageLocationSchema = z37.object({
|
|
5801
|
+
startPage: z37.number(),
|
|
5802
|
+
endPage: z37.number()
|
|
5103
5803
|
});
|
|
5104
5804
|
async function findReferencedPages(params) {
|
|
5105
5805
|
const {
|
|
@@ -5110,26 +5810,31 @@ async function findReferencedPages(params) {
|
|
|
5110
5810
|
pageCount,
|
|
5111
5811
|
generateObject,
|
|
5112
5812
|
providerOptions,
|
|
5813
|
+
trackUsage,
|
|
5113
5814
|
log
|
|
5114
5815
|
} = params;
|
|
5115
|
-
const
|
|
5116
|
-
|
|
5117
|
-
|
|
5118
|
-
|
|
5119
|
-
|
|
5120
|
-
|
|
5121
|
-
|
|
5122
|
-
|
|
5816
|
+
const localPageRange = findLocalReferentialPages({
|
|
5817
|
+
referenceTarget,
|
|
5818
|
+
sections,
|
|
5819
|
+
formInventory
|
|
5820
|
+
});
|
|
5821
|
+
const action = decideReferentialResolutionAction({
|
|
5822
|
+
referenceTarget,
|
|
5823
|
+
sections,
|
|
5824
|
+
formInventory,
|
|
5825
|
+
localPageRange
|
|
5826
|
+
});
|
|
5827
|
+
if (action.kind === "lookup_pages") {
|
|
5828
|
+
await log?.(
|
|
5829
|
+
`Referential target "${referenceTarget}" resolved to pages ${action.pageRange.startPage}-${action.pageRange.endPage} via ${action.source}.`
|
|
5830
|
+
);
|
|
5831
|
+
return action.pageRange;
|
|
5123
5832
|
}
|
|
5124
|
-
|
|
5125
|
-
|
|
5126
|
-
|
|
5127
|
-
|
|
5128
|
-
|
|
5129
|
-
startPage: form.pageStart,
|
|
5130
|
-
endPage: form.pageEnd ?? form.pageStart
|
|
5131
|
-
};
|
|
5132
|
-
}
|
|
5833
|
+
if (action.kind === "skip") {
|
|
5834
|
+
await log?.(
|
|
5835
|
+
`Skipping referential target "${referenceTarget}": ${action.reason}.`
|
|
5836
|
+
);
|
|
5837
|
+
return void 0;
|
|
5133
5838
|
}
|
|
5134
5839
|
try {
|
|
5135
5840
|
const result = await safeGenerateObject(
|
|
@@ -5157,6 +5862,7 @@ Return JSON only.`,
|
|
|
5157
5862
|
)
|
|
5158
5863
|
}
|
|
5159
5864
|
);
|
|
5865
|
+
trackUsage?.(result.usage);
|
|
5160
5866
|
if (result.object.startPage > 0 && result.object.endPage > 0) {
|
|
5161
5867
|
return {
|
|
5162
5868
|
startPage: result.object.startPage,
|
|
@@ -5214,7 +5920,9 @@ async function resolveReferentialCoverages(params) {
|
|
|
5214
5920
|
for (let i = 0; i < referentialCoverages.length; i++) {
|
|
5215
5921
|
const cov = referentialCoverages[i];
|
|
5216
5922
|
const refString = (looksReferential(cov.limit) ? cov.limit : void 0) ?? (looksReferential(cov.deductible) ? cov.deductible : void 0) ?? cov.limit ?? "";
|
|
5217
|
-
const
|
|
5923
|
+
const sectionRef = typeof cov.sectionRef === "string" ? cov.sectionRef : "";
|
|
5924
|
+
const parsedTarget = parseReferenceTarget(refString) ?? parseReferenceTarget(sectionRef) ?? sectionRef;
|
|
5925
|
+
const target = parsedTarget || "unknown";
|
|
5218
5926
|
const group = targetGroups.get(target) ?? [];
|
|
5219
5927
|
group.push({ coverage: cov, index: i });
|
|
5220
5928
|
targetGroups.set(target, group);
|
|
@@ -5238,6 +5946,7 @@ async function resolveReferentialCoverages(params) {
|
|
|
5238
5946
|
pageCount,
|
|
5239
5947
|
generateObject,
|
|
5240
5948
|
providerOptions,
|
|
5949
|
+
trackUsage,
|
|
5241
5950
|
log
|
|
5242
5951
|
});
|
|
5243
5952
|
if (!pageRange) {
|
|
@@ -5355,6 +6064,78 @@ async function resolveReferentialCoverages(params) {
|
|
|
5355
6064
|
};
|
|
5356
6065
|
}
|
|
5357
6066
|
|
|
6067
|
+
// src/extraction/focused-dispatch.ts
|
|
6068
|
+
async function runFocusedExtractorWithFallback(params) {
|
|
6069
|
+
const {
|
|
6070
|
+
task,
|
|
6071
|
+
pdfInput,
|
|
6072
|
+
generateObject,
|
|
6073
|
+
convertPdfToImages,
|
|
6074
|
+
providerOptions,
|
|
6075
|
+
trackUsage,
|
|
6076
|
+
log
|
|
6077
|
+
} = params;
|
|
6078
|
+
const ext = getExtractor(task.extractorName);
|
|
6079
|
+
if (!ext) {
|
|
6080
|
+
await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
|
|
6081
|
+
return null;
|
|
6082
|
+
}
|
|
6083
|
+
try {
|
|
6084
|
+
const result = await runExtractor({
|
|
6085
|
+
name: task.extractorName,
|
|
6086
|
+
prompt: ext.buildPrompt(),
|
|
6087
|
+
schema: ext.schema,
|
|
6088
|
+
pdfInput,
|
|
6089
|
+
startPage: task.startPage,
|
|
6090
|
+
endPage: task.endPage,
|
|
6091
|
+
generateObject,
|
|
6092
|
+
convertPdfToImages,
|
|
6093
|
+
maxTokens: ext.maxTokens ?? 4096,
|
|
6094
|
+
providerOptions
|
|
6095
|
+
});
|
|
6096
|
+
trackUsage(result.usage);
|
|
6097
|
+
if (!ext.fallback?.isEmpty(result.data)) {
|
|
6098
|
+
return result;
|
|
6099
|
+
}
|
|
6100
|
+
if (!ext.fallback) {
|
|
6101
|
+
return result;
|
|
6102
|
+
}
|
|
6103
|
+
} catch (error) {
|
|
6104
|
+
await log?.(`Extractor ${task.extractorName} failed: ${error}`);
|
|
6105
|
+
if (!ext.fallback) {
|
|
6106
|
+
return null;
|
|
6107
|
+
}
|
|
6108
|
+
}
|
|
6109
|
+
const fallbackExt = getExtractor(ext.fallback.extractorName);
|
|
6110
|
+
if (!fallbackExt) return null;
|
|
6111
|
+
await log?.(
|
|
6112
|
+
`Extractor ${task.extractorName} produced no usable records; trying ${ext.fallback.extractorName} fallback for pages ${task.startPage}-${task.endPage}`
|
|
6113
|
+
);
|
|
6114
|
+
try {
|
|
6115
|
+
const fallbackResult = await runExtractor({
|
|
6116
|
+
name: ext.fallback.extractorName,
|
|
6117
|
+
prompt: fallbackExt.buildPrompt(),
|
|
6118
|
+
schema: fallbackExt.schema,
|
|
6119
|
+
pdfInput,
|
|
6120
|
+
startPage: task.startPage,
|
|
6121
|
+
endPage: task.endPage,
|
|
6122
|
+
generateObject,
|
|
6123
|
+
convertPdfToImages,
|
|
6124
|
+
maxTokens: fallbackExt.maxTokens ?? 4096,
|
|
6125
|
+
providerOptions
|
|
6126
|
+
});
|
|
6127
|
+
trackUsage(fallbackResult.usage);
|
|
6128
|
+
const focusedData = ext.fallback.deriveFocusedResult(fallbackResult.data);
|
|
6129
|
+
return focusedData ? [
|
|
6130
|
+
fallbackResult,
|
|
6131
|
+
{ name: task.extractorName, data: focusedData, usage: void 0 }
|
|
6132
|
+
] : fallbackResult;
|
|
6133
|
+
} catch (fallbackError) {
|
|
6134
|
+
await log?.(`${ext.fallback.extractorName} fallback for ${task.extractorName} failed: ${fallbackError}`);
|
|
6135
|
+
return null;
|
|
6136
|
+
}
|
|
6137
|
+
}
|
|
6138
|
+
|
|
5358
6139
|
// src/core/quality.ts
|
|
5359
6140
|
function evaluateQualityGate(params) {
|
|
5360
6141
|
const { issues, hasRoundWarnings = false } = params;
|
|
@@ -5391,11 +6172,6 @@ function addFormEntry(inventory, formNumber, source, extra) {
|
|
|
5391
6172
|
sources: [source]
|
|
5392
6173
|
});
|
|
5393
6174
|
}
|
|
5394
|
-
function looksReferential2(value) {
|
|
5395
|
-
if (typeof value !== "string") return false;
|
|
5396
|
-
const normalized = value.toLowerCase();
|
|
5397
|
-
return normalized.includes("shown in the declarations") || normalized.includes("shown in declarations") || normalized.includes("shown in the schedule") || normalized.includes("as stated") || normalized.includes("if applicable");
|
|
5398
|
-
}
|
|
5399
6175
|
function looksTocArtifact(value) {
|
|
5400
6176
|
if (typeof value !== "string") return false;
|
|
5401
6177
|
return /\.{4,}\d{1,3}$/.test(value.trim()) || /^\d+\.\s+[A-Z][\s\S]*\.{3,}\d{1,3}$/.test(value.trim());
|
|
@@ -5419,6 +6195,28 @@ function buildExtractionReviewReport(params) {
|
|
|
5419
6195
|
const exclusions = memory.get("exclusions")?.exclusions ?? [];
|
|
5420
6196
|
const conditions = memory.get("conditions")?.conditions ?? [];
|
|
5421
6197
|
const sections = memory.get("sections")?.sections ?? [];
|
|
6198
|
+
const definitionsResult = memory.get("definitions");
|
|
6199
|
+
const coveredReasonsResult = memory.get("covered_reasons");
|
|
6200
|
+
const definitions = Array.isArray(definitionsResult?.definitions) ? definitionsResult.definitions : sections.filter((section) => section.type === "definition");
|
|
6201
|
+
const coveredReasons = Array.isArray(coveredReasonsResult?.coveredReasons) ? coveredReasonsResult.coveredReasons : Array.isArray(coveredReasonsResult?.covered_reasons) ? coveredReasonsResult.covered_reasons : sections.filter(looksCoveredReasonSection);
|
|
6202
|
+
const mappedDefinitions = params.pageAssignments.some((assignment) => assignment.extractorNames.includes("definitions"));
|
|
6203
|
+
const mappedCoveredReasons = params.pageAssignments.some((assignment) => assignment.extractorNames.includes("covered_reasons"));
|
|
6204
|
+
if (mappedDefinitions && definitions.length === 0) {
|
|
6205
|
+
deterministicIssues.push({
|
|
6206
|
+
code: "definitions_mapped_but_empty",
|
|
6207
|
+
severity: "warning",
|
|
6208
|
+
message: "Page map assigned definitions extraction, but no definition records were extracted.",
|
|
6209
|
+
extractorName: "definitions"
|
|
6210
|
+
});
|
|
6211
|
+
}
|
|
6212
|
+
if (mappedCoveredReasons && coveredReasons.length === 0) {
|
|
6213
|
+
deterministicIssues.push({
|
|
6214
|
+
code: "covered_reasons_mapped_but_empty",
|
|
6215
|
+
severity: "warning",
|
|
6216
|
+
message: "Page map assigned covered reasons extraction, but no covered reason records were extracted.",
|
|
6217
|
+
extractorName: "covered_reasons"
|
|
6218
|
+
});
|
|
6219
|
+
}
|
|
5422
6220
|
for (const form of extractedFormInventory) {
|
|
5423
6221
|
addFormEntry(
|
|
5424
6222
|
inventory,
|
|
@@ -5515,7 +6313,7 @@ function buildExtractionReviewReport(params) {
|
|
|
5515
6313
|
itemName: typeof coverage.name === "string" ? coverage.name : void 0
|
|
5516
6314
|
});
|
|
5517
6315
|
}
|
|
5518
|
-
if (
|
|
6316
|
+
if (looksReferential(coverage.limit) || looksReferential(coverage.deductible)) {
|
|
5519
6317
|
deterministicIssues.push({
|
|
5520
6318
|
code: "coverage_referential_value",
|
|
5521
6319
|
severity: "warning",
|
|
@@ -5616,6 +6414,67 @@ function buildExtractionReviewReport(params) {
|
|
|
5616
6414
|
});
|
|
5617
6415
|
}
|
|
5618
6416
|
}
|
|
6417
|
+
for (const definition of definitions) {
|
|
6418
|
+
const term = typeof definition.term === "string" ? definition.term : typeof definition.title === "string" ? definition.title : "unknown";
|
|
6419
|
+
const content = typeof definition.definition === "string" ? definition.definition : typeof definition.content === "string" ? definition.content : "";
|
|
6420
|
+
if (!content.trim()) {
|
|
6421
|
+
deterministicIssues.push({
|
|
6422
|
+
code: "definition_missing_content",
|
|
6423
|
+
severity: "warning",
|
|
6424
|
+
message: `Definition "${term}" is missing definition text.`,
|
|
6425
|
+
extractorName: "definitions",
|
|
6426
|
+
formNumber: normalizeFormNumber(definition.formNumber),
|
|
6427
|
+
pageNumber: typeof definition.pageNumber === "number" ? definition.pageNumber : typeof definition.pageStart === "number" ? definition.pageStart : void 0,
|
|
6428
|
+
itemName: term
|
|
6429
|
+
});
|
|
6430
|
+
}
|
|
6431
|
+
if (typeof definition.pageNumber !== "number" && typeof definition.pageStart !== "number") {
|
|
6432
|
+
deterministicIssues.push({
|
|
6433
|
+
code: "definition_missing_page_number",
|
|
6434
|
+
severity: "warning",
|
|
6435
|
+
message: `Definition "${term}" is missing page provenance.`,
|
|
6436
|
+
extractorName: "definitions",
|
|
6437
|
+
formNumber: normalizeFormNumber(definition.formNumber),
|
|
6438
|
+
itemName: term
|
|
6439
|
+
});
|
|
6440
|
+
}
|
|
6441
|
+
}
|
|
6442
|
+
for (const coveredReason of coveredReasons) {
|
|
6443
|
+
const itemName = typeof coveredReason.name === "string" ? coveredReason.name : typeof coveredReason.reason === "string" ? coveredReason.reason : typeof coveredReason.title === "string" ? coveredReason.title : "unknown";
|
|
6444
|
+
const content = typeof coveredReason.content === "string" ? coveredReason.content : typeof coveredReason.description === "string" ? coveredReason.description : "";
|
|
6445
|
+
if (!content.trim()) {
|
|
6446
|
+
deterministicIssues.push({
|
|
6447
|
+
code: "covered_reason_missing_content",
|
|
6448
|
+
severity: "warning",
|
|
6449
|
+
message: `Covered reason "${itemName}" is missing substantive text.`,
|
|
6450
|
+
extractorName: "covered_reasons",
|
|
6451
|
+
formNumber: normalizeFormNumber(coveredReason.formNumber),
|
|
6452
|
+
pageNumber: typeof coveredReason.pageNumber === "number" ? coveredReason.pageNumber : typeof coveredReason.pageStart === "number" ? coveredReason.pageStart : void 0,
|
|
6453
|
+
itemName
|
|
6454
|
+
});
|
|
6455
|
+
}
|
|
6456
|
+
if (typeof coveredReason.pageNumber !== "number" && typeof coveredReason.pageStart !== "number") {
|
|
6457
|
+
deterministicIssues.push({
|
|
6458
|
+
code: "covered_reason_missing_page_number",
|
|
6459
|
+
severity: "warning",
|
|
6460
|
+
message: `Covered reason "${itemName}" is missing page provenance.`,
|
|
6461
|
+
extractorName: "covered_reasons",
|
|
6462
|
+
formNumber: normalizeFormNumber(coveredReason.formNumber),
|
|
6463
|
+
itemName
|
|
6464
|
+
});
|
|
6465
|
+
}
|
|
6466
|
+
if (looksReferential(content) || looksReferential(coveredReason.reason)) {
|
|
6467
|
+
deterministicIssues.push({
|
|
6468
|
+
code: "covered_reason_referential_value",
|
|
6469
|
+
severity: "warning",
|
|
6470
|
+
message: `Covered reason "${itemName}" contains referential language instead of the extracted covered cause wording.`,
|
|
6471
|
+
extractorName: "covered_reasons",
|
|
6472
|
+
formNumber: normalizeFormNumber(coveredReason.formNumber),
|
|
6473
|
+
pageNumber: typeof coveredReason.pageNumber === "number" ? coveredReason.pageNumber : typeof coveredReason.pageStart === "number" ? coveredReason.pageStart : void 0,
|
|
6474
|
+
itemName
|
|
6475
|
+
});
|
|
6476
|
+
}
|
|
6477
|
+
}
|
|
5619
6478
|
for (const section of sections) {
|
|
5620
6479
|
if (typeof section.content === "string" && section.content.trim().length < 120 && typeof section.pageStart === "number" && (!("pageEnd" in section) || section.pageEnd === section.pageStart || section.pageEnd === void 0)) {
|
|
5621
6480
|
deterministicIssues.push({
|
|
@@ -5638,6 +6497,8 @@ function buildExtractionReviewReport(params) {
|
|
|
5638
6497
|
const artifacts = [
|
|
5639
6498
|
{ kind: "form_inventory", label: "Form Inventory", itemCount: formInventory.length },
|
|
5640
6499
|
{ kind: "page_map", label: "Page Map", itemCount: params.pageAssignments.length },
|
|
6500
|
+
{ kind: "definitions", label: "Definitions", itemCount: definitions.length },
|
|
6501
|
+
{ kind: "covered_reasons", label: "Covered Reasons", itemCount: coveredReasons.length },
|
|
5641
6502
|
{ kind: "referential_resolution", label: "Referential Resolution", itemCount: coverages.filter((c) => c.limitValueType === "referential" || c.limitValueType === "as_stated" || c.deductibleValueType === "referential" || c.deductibleValueType === "as_stated").length }
|
|
5642
6503
|
];
|
|
5643
6504
|
const qualityGateStatus = evaluateQualityGate({
|
|
@@ -5663,6 +6524,134 @@ function toReviewRoundRecord(round, review) {
|
|
|
5663
6524
|
};
|
|
5664
6525
|
}
|
|
5665
6526
|
|
|
6527
|
+
// src/extraction/planning.ts
|
|
6528
|
+
function normalizePageAssignments(pageAssignments, formInventory) {
|
|
6529
|
+
const pageFormTypes = /* @__PURE__ */ new Map();
|
|
6530
|
+
if (formInventory) {
|
|
6531
|
+
for (const form of formInventory.forms) {
|
|
6532
|
+
if (form.pageStart != null) {
|
|
6533
|
+
const end = form.pageEnd ?? form.pageStart;
|
|
6534
|
+
for (let p = form.pageStart; p <= end; p += 1) {
|
|
6535
|
+
const types = pageFormTypes.get(p) ?? /* @__PURE__ */ new Set();
|
|
6536
|
+
types.add(form.formType);
|
|
6537
|
+
pageFormTypes.set(p, types);
|
|
6538
|
+
}
|
|
6539
|
+
}
|
|
6540
|
+
}
|
|
6541
|
+
}
|
|
6542
|
+
return pageAssignments.map((assignment) => {
|
|
6543
|
+
let extractorNames = [...new Set(
|
|
6544
|
+
(assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"]).filter(Boolean)
|
|
6545
|
+
)];
|
|
6546
|
+
const hasDeclarations = extractorNames.includes("declarations");
|
|
6547
|
+
const hasConditions = extractorNames.includes("conditions");
|
|
6548
|
+
const hasExclusions = extractorNames.includes("exclusions");
|
|
6549
|
+
const hasEndorsements = extractorNames.includes("endorsements");
|
|
6550
|
+
const looksLikeScheduleValues = assignment.hasScheduleValues === true;
|
|
6551
|
+
const roleBlocksCoverageLimits = assignment.pageRole === "policy_form" || assignment.pageRole === "condition_exclusion_form" || assignment.pageRole === "endorsement_form";
|
|
6552
|
+
const inventoryTypes = pageFormTypes.get(assignment.localPageNumber);
|
|
6553
|
+
const inventoryBlocksCoverageLimits = inventoryTypes != null && !looksLikeScheduleValues && !hasDeclarations && (inventoryTypes.has("endorsement") || inventoryTypes.has("notice") || inventoryTypes.has("application"));
|
|
6554
|
+
if (extractorNames.includes("coverage_limits")) {
|
|
6555
|
+
const shouldDropCoverageLimits = inventoryBlocksCoverageLimits || !looksLikeScheduleValues && roleBlocksCoverageLimits || !hasDeclarations && !looksLikeScheduleValues && (hasConditions || hasExclusions) || !hasDeclarations && !looksLikeScheduleValues && hasEndorsements;
|
|
6556
|
+
if (shouldDropCoverageLimits) {
|
|
6557
|
+
extractorNames = extractorNames.filter((name) => name !== "coverage_limits");
|
|
6558
|
+
}
|
|
6559
|
+
}
|
|
6560
|
+
if (inventoryTypes?.has("endorsement") && !extractorNames.includes("endorsements")) {
|
|
6561
|
+
extractorNames = [...extractorNames, "endorsements"];
|
|
6562
|
+
}
|
|
6563
|
+
if (extractorNames.length === 0) {
|
|
6564
|
+
extractorNames = ["sections"];
|
|
6565
|
+
}
|
|
6566
|
+
return {
|
|
6567
|
+
...assignment,
|
|
6568
|
+
extractorNames
|
|
6569
|
+
};
|
|
6570
|
+
});
|
|
6571
|
+
}
|
|
6572
|
+
function buildTemplateHints(primaryType, documentType, pageCount, template) {
|
|
6573
|
+
return [
|
|
6574
|
+
`Document type: ${primaryType} ${documentType}`,
|
|
6575
|
+
`Expected sections: ${template.expectedSections.join(", ")}`,
|
|
6576
|
+
`Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
|
|
6577
|
+
`Total pages: ${pageCount}`
|
|
6578
|
+
].join("\n");
|
|
6579
|
+
}
|
|
6580
|
+
function groupContiguousPages(pages) {
|
|
6581
|
+
if (pages.length === 0) return [];
|
|
6582
|
+
const sorted = [...new Set(pages)].sort((a, b) => a - b);
|
|
6583
|
+
const ranges = [];
|
|
6584
|
+
let start = sorted[0];
|
|
6585
|
+
let previous = sorted[0];
|
|
6586
|
+
for (let i = 1; i < sorted.length; i += 1) {
|
|
6587
|
+
const current = sorted[i];
|
|
6588
|
+
if (current === previous + 1) {
|
|
6589
|
+
previous = current;
|
|
6590
|
+
continue;
|
|
6591
|
+
}
|
|
6592
|
+
ranges.push({ startPage: start, endPage: previous });
|
|
6593
|
+
start = current;
|
|
6594
|
+
previous = current;
|
|
6595
|
+
}
|
|
6596
|
+
ranges.push({ startPage: start, endPage: previous });
|
|
6597
|
+
return ranges;
|
|
6598
|
+
}
|
|
6599
|
+
function buildPlanFromPageAssignments(pageAssignments, pageCount, formInventory) {
|
|
6600
|
+
const extractorPages = /* @__PURE__ */ new Map();
|
|
6601
|
+
for (const assignment of pageAssignments) {
|
|
6602
|
+
const extractors = assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"];
|
|
6603
|
+
for (const extractorName of extractors) {
|
|
6604
|
+
extractorPages.set(extractorName, [...extractorPages.get(extractorName) ?? [], assignment.localPageNumber]);
|
|
6605
|
+
}
|
|
6606
|
+
}
|
|
6607
|
+
const coveredPages = /* @__PURE__ */ new Set();
|
|
6608
|
+
for (const pages of extractorPages.values()) {
|
|
6609
|
+
for (const page of pages) coveredPages.add(page);
|
|
6610
|
+
}
|
|
6611
|
+
for (let page = 1; page <= pageCount; page += 1) {
|
|
6612
|
+
if (!coveredPages.has(page)) {
|
|
6613
|
+
extractorPages.set("sections", [...extractorPages.get("sections") ?? [], page]);
|
|
6614
|
+
}
|
|
6615
|
+
}
|
|
6616
|
+
const contextualExtractors = /* @__PURE__ */ new Set(["conditions", "covered_reasons", "definitions", "exclusions", "endorsements"]);
|
|
6617
|
+
const contextualForms = (formInventory?.forms ?? []).filter(
|
|
6618
|
+
(form) => form.pageStart != null && (form.pageEnd ?? form.pageStart) != null
|
|
6619
|
+
);
|
|
6620
|
+
const expandPagesToFormRanges = (extractorName, pages) => {
|
|
6621
|
+
if (!contextualExtractors.has(extractorName)) return pages;
|
|
6622
|
+
const expanded = new Set(pages);
|
|
6623
|
+
for (const page of pages) {
|
|
6624
|
+
for (const form of contextualForms) {
|
|
6625
|
+
const pageStart = form.pageStart;
|
|
6626
|
+
const pageEnd = form.pageEnd ?? form.pageStart;
|
|
6627
|
+
const formType = form.formType;
|
|
6628
|
+
const supportsContextualExpansion = extractorName === "endorsements" ? formType === "endorsement" : formType === "coverage" || formType === "endorsement";
|
|
6629
|
+
if (!supportsContextualExpansion) continue;
|
|
6630
|
+
if (page < pageStart || page > pageEnd) continue;
|
|
6631
|
+
for (let current = pageStart; current <= pageEnd; current += 1) {
|
|
6632
|
+
expanded.add(current);
|
|
6633
|
+
}
|
|
6634
|
+
}
|
|
6635
|
+
}
|
|
6636
|
+
return [...expanded].sort((a, b) => a - b);
|
|
6637
|
+
};
|
|
6638
|
+
const tasks = [...extractorPages.entries()].flatMap(
|
|
6639
|
+
([extractorName, pages]) => groupContiguousPages(expandPagesToFormRanges(extractorName, pages)).map(({ startPage, endPage }) => ({
|
|
6640
|
+
extractorName,
|
|
6641
|
+
startPage,
|
|
6642
|
+
endPage,
|
|
6643
|
+
description: `Page-mapped ${extractorName} extraction for pages ${startPage}-${endPage}`
|
|
6644
|
+
}))
|
|
6645
|
+
).sort((a, b) => a.startPage - b.startPage || a.extractorName.localeCompare(b.extractorName));
|
|
6646
|
+
return {
|
|
6647
|
+
tasks,
|
|
6648
|
+
pageMap: [...extractorPages.entries()].map(([section, pages]) => ({
|
|
6649
|
+
section,
|
|
6650
|
+
pages: `pages ${[...new Set(pages)].sort((a, b) => a - b).join(", ")}`
|
|
6651
|
+
}))
|
|
6652
|
+
};
|
|
6653
|
+
}
|
|
6654
|
+
|
|
5666
6655
|
// src/extraction/coordinator.ts
|
|
5667
6656
|
function createExtractor(config) {
|
|
5668
6657
|
const {
|
|
@@ -5679,6 +6668,7 @@ function createExtractor(config) {
|
|
|
5679
6668
|
onCheckpointSave
|
|
5680
6669
|
} = config;
|
|
5681
6670
|
const limit = pLimit(concurrency);
|
|
6671
|
+
const extractorCatalog = formatExtractorCatalogForPrompt();
|
|
5682
6672
|
let totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
5683
6673
|
let modelCalls = 0;
|
|
5684
6674
|
let callsWithUsage = 0;
|
|
@@ -5699,32 +6689,56 @@ function createExtractor(config) {
|
|
|
5699
6689
|
memory.set(name, mergeExtractorResult(name, existing, data));
|
|
5700
6690
|
}
|
|
5701
6691
|
function summarizeExtraction(memory) {
|
|
5702
|
-
const
|
|
5703
|
-
const
|
|
5704
|
-
const
|
|
5705
|
-
const
|
|
5706
|
-
const
|
|
5707
|
-
const
|
|
5708
|
-
const
|
|
6692
|
+
const declarationResult = readMemoryRecord(memory, "declarations");
|
|
6693
|
+
const endorsements = readRecordArray(readMemoryRecord(memory, "endorsements"), "endorsements") ?? [];
|
|
6694
|
+
const exclusions = readRecordArray(readMemoryRecord(memory, "exclusions"), "exclusions") ?? [];
|
|
6695
|
+
const conditions = readRecordArray(readMemoryRecord(memory, "conditions"), "conditions") ?? [];
|
|
6696
|
+
const sections = getSections(memory) ?? [];
|
|
6697
|
+
const definitions = getDefinitions(memory) ?? sections.filter((section) => section.type === "definition");
|
|
6698
|
+
const coveredReasons = getCoveredReasons(memory) ?? sections.filter(looksCoveredReasonSection);
|
|
6699
|
+
const coverages = getCoverageLimitCoverages(memory);
|
|
6700
|
+
const coverageSummary = coverages.slice(0, 12).map((coverage) => ({
|
|
5709
6701
|
name: coverage.name,
|
|
5710
6702
|
limit: coverage.limit,
|
|
5711
6703
|
deductible: coverage.deductible,
|
|
5712
6704
|
formNumber: coverage.formNumber
|
|
5713
|
-
}))
|
|
6705
|
+
}));
|
|
5714
6706
|
return JSON.stringify({
|
|
5715
6707
|
extractedKeys: [...memory.keys()].filter((key) => key !== "classify"),
|
|
5716
6708
|
declarationFieldCount: Array.isArray(declarationResult?.fields) ? declarationResult.fields.length : 0,
|
|
5717
|
-
coverageCount:
|
|
6709
|
+
coverageCount: coverages.length,
|
|
5718
6710
|
coverageSamples: coverageSummary,
|
|
5719
|
-
endorsementCount:
|
|
5720
|
-
exclusionCount:
|
|
5721
|
-
conditionCount:
|
|
5722
|
-
|
|
6711
|
+
endorsementCount: endorsements.length,
|
|
6712
|
+
exclusionCount: exclusions.length,
|
|
6713
|
+
conditionCount: conditions.length,
|
|
6714
|
+
definitionCount: definitions.length,
|
|
6715
|
+
coveredReasonCount: coveredReasons.length,
|
|
6716
|
+
sectionCount: sections.length
|
|
5723
6717
|
}, null, 2);
|
|
5724
6718
|
}
|
|
6719
|
+
function textIncludesSupplementarySignal(value) {
|
|
6720
|
+
if (typeof value !== "string") return false;
|
|
6721
|
+
return /\b(supplementary|regulatory|department of insurance|ombudsman|complaint|claim|claims|contact|phone|email|cancellation|cancelled|nonrenewal|non-renewal|non renew|notice|governing law|jurisdiction|third[- ]party administrator|tpa)\b/i.test(value);
|
|
6722
|
+
}
|
|
6723
|
+
function hasSupplementaryExtractionSignal(pageAssignments, formInventory, memory) {
|
|
6724
|
+
const hasPageSignal = pageAssignments.some(
|
|
6725
|
+
(assignment) => assignment.pageRole === "supplementary" || assignment.extractorNames.includes("supplementary") || textIncludesSupplementarySignal(assignment.notes)
|
|
6726
|
+
);
|
|
6727
|
+
if (hasPageSignal) return true;
|
|
6728
|
+
const hasFormSignal = (formInventory?.forms ?? []).some(
|
|
6729
|
+
(form) => form.formType === "notice" || textIncludesSupplementarySignal(form.title) || textIncludesSupplementarySignal(form.formNumber)
|
|
6730
|
+
);
|
|
6731
|
+
if (hasFormSignal) return true;
|
|
6732
|
+
const likelySupplementaryKeys = ["sections", "conditions", "endorsements", "exclusions"];
|
|
6733
|
+
return likelySupplementaryKeys.some((key) => {
|
|
6734
|
+
const value = memory.get(key);
|
|
6735
|
+
if (!value) return false;
|
|
6736
|
+
return textIncludesSupplementarySignal(JSON.stringify(value));
|
|
6737
|
+
});
|
|
6738
|
+
}
|
|
5725
6739
|
function buildAlreadyExtractedSummary(memory) {
|
|
5726
6740
|
const lines = [];
|
|
5727
|
-
const declarationResult = memory
|
|
6741
|
+
const declarationResult = readMemoryRecord(memory, "declarations");
|
|
5728
6742
|
if (Array.isArray(declarationResult?.fields)) {
|
|
5729
6743
|
for (const field of declarationResult.fields) {
|
|
5730
6744
|
if (field.key && field.value) {
|
|
@@ -5733,20 +6747,17 @@ function createExtractor(config) {
|
|
|
5733
6747
|
}
|
|
5734
6748
|
}
|
|
5735
6749
|
}
|
|
5736
|
-
const
|
|
5737
|
-
|
|
5738
|
-
|
|
5739
|
-
const parts = [cov.name, cov.limit && `limit=${cov.limit}`, cov.deductible && `deductible=${cov.deductible}`].filter(Boolean);
|
|
5740
|
-
if (parts.length > 0) lines.push(`- coverage: ${parts.join(", ")}`);
|
|
5741
|
-
}
|
|
6750
|
+
for (const cov of getCoverageLimitCoverages(memory)) {
|
|
6751
|
+
const parts = [cov.name, cov.limit && `limit=${cov.limit}`, cov.deductible && `deductible=${cov.deductible}`].filter(Boolean);
|
|
6752
|
+
if (parts.length > 0) lines.push(`- coverage: ${parts.join(", ")}`);
|
|
5742
6753
|
}
|
|
5743
|
-
const namedInsured = memory
|
|
6754
|
+
const namedInsured = getNamedInsured(memory);
|
|
5744
6755
|
if (namedInsured) {
|
|
5745
6756
|
for (const [key, value] of Object.entries(namedInsured)) {
|
|
5746
6757
|
if (value && typeof value === "string") lines.push(`- ${key}: ${value}`);
|
|
5747
6758
|
}
|
|
5748
6759
|
}
|
|
5749
|
-
const carrierInfo = memory
|
|
6760
|
+
const carrierInfo = getCarrierInfo(memory);
|
|
5750
6761
|
if (carrierInfo) {
|
|
5751
6762
|
for (const [key, value] of Object.entries(carrierInfo)) {
|
|
5752
6763
|
if (value && typeof value === "string") lines.push(`- ${key}: ${value}`);
|
|
@@ -5754,141 +6765,43 @@ function createExtractor(config) {
|
|
|
5754
6765
|
}
|
|
5755
6766
|
return lines.length > 0 ? lines.join("\n") : "";
|
|
5756
6767
|
}
|
|
5757
|
-
function
|
|
5758
|
-
|
|
5759
|
-
|
|
5760
|
-
|
|
5761
|
-
|
|
5762
|
-
|
|
5763
|
-
|
|
5764
|
-
|
|
5765
|
-
|
|
5766
|
-
|
|
5767
|
-
|
|
5768
|
-
|
|
5769
|
-
|
|
5770
|
-
|
|
5771
|
-
|
|
5772
|
-
|
|
5773
|
-
|
|
5774
|
-
const types = pageFormTypes.get(p) ?? /* @__PURE__ */ new Set();
|
|
5775
|
-
types.add(form.formType);
|
|
5776
|
-
pageFormTypes.set(p, types);
|
|
5777
|
-
}
|
|
5778
|
-
}
|
|
5779
|
-
}
|
|
6768
|
+
async function runFocusedExtractorTask(task, pdfInput, memory) {
|
|
6769
|
+
if (task.extractorName === "supplementary") {
|
|
6770
|
+
const alreadyExtractedSummary = buildAlreadyExtractedSummary(memory);
|
|
6771
|
+
const result = await runExtractor({
|
|
6772
|
+
name: "supplementary",
|
|
6773
|
+
prompt: buildSupplementaryPrompt(alreadyExtractedSummary),
|
|
6774
|
+
schema: SupplementarySchema,
|
|
6775
|
+
pdfInput,
|
|
6776
|
+
startPage: task.startPage,
|
|
6777
|
+
endPage: task.endPage,
|
|
6778
|
+
generateObject,
|
|
6779
|
+
convertPdfToImages,
|
|
6780
|
+
maxTokens: 4096,
|
|
6781
|
+
providerOptions
|
|
6782
|
+
});
|
|
6783
|
+
trackUsage(result.usage);
|
|
6784
|
+
return result;
|
|
5780
6785
|
}
|
|
5781
|
-
return
|
|
5782
|
-
|
|
5783
|
-
|
|
5784
|
-
|
|
5785
|
-
|
|
5786
|
-
|
|
5787
|
-
|
|
5788
|
-
|
|
5789
|
-
const looksLikeScheduleValues = assignment.hasScheduleValues === true;
|
|
5790
|
-
const roleBlocksCoverageLimits = assignment.pageRole === "policy_form" || assignment.pageRole === "condition_exclusion_form" || assignment.pageRole === "endorsement_form";
|
|
5791
|
-
const inventoryTypes = pageFormTypes.get(assignment.localPageNumber);
|
|
5792
|
-
const inventoryBlocksCoverageLimits = inventoryTypes != null && !looksLikeScheduleValues && !hasDeclarations && (inventoryTypes.has("endorsement") || inventoryTypes.has("notice") || inventoryTypes.has("application"));
|
|
5793
|
-
if (extractorNames.includes("coverage_limits")) {
|
|
5794
|
-
const shouldDropCoverageLimits = inventoryBlocksCoverageLimits || !looksLikeScheduleValues && roleBlocksCoverageLimits || !hasDeclarations && !looksLikeScheduleValues && (hasConditions || hasExclusions) || !hasDeclarations && !looksLikeScheduleValues && hasEndorsements;
|
|
5795
|
-
if (shouldDropCoverageLimits) {
|
|
5796
|
-
extractorNames = extractorNames.filter((name) => name !== "coverage_limits");
|
|
5797
|
-
}
|
|
5798
|
-
}
|
|
5799
|
-
if (inventoryTypes?.has("endorsement") && !extractorNames.includes("endorsements")) {
|
|
5800
|
-
extractorNames = [...extractorNames, "endorsements"];
|
|
5801
|
-
}
|
|
5802
|
-
if (extractorNames.length === 0) {
|
|
5803
|
-
extractorNames = ["sections"];
|
|
5804
|
-
}
|
|
5805
|
-
return {
|
|
5806
|
-
...assignment,
|
|
5807
|
-
extractorNames
|
|
5808
|
-
};
|
|
6786
|
+
return runFocusedExtractorWithFallback({
|
|
6787
|
+
task,
|
|
6788
|
+
pdfInput,
|
|
6789
|
+
generateObject,
|
|
6790
|
+
convertPdfToImages,
|
|
6791
|
+
providerOptions,
|
|
6792
|
+
trackUsage,
|
|
6793
|
+
log
|
|
5809
6794
|
});
|
|
5810
6795
|
}
|
|
5811
|
-
function
|
|
5812
|
-
return [
|
|
5813
|
-
`Document type: ${primaryType} ${documentType}`,
|
|
5814
|
-
`Expected sections: ${template.expectedSections.join(", ")}`,
|
|
5815
|
-
`Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
|
|
5816
|
-
`Total pages: ${pageCount}`
|
|
5817
|
-
].join("\n");
|
|
5818
|
-
}
|
|
5819
|
-
function groupContiguousPages(pages) {
|
|
5820
|
-
if (pages.length === 0) return [];
|
|
5821
|
-
const sorted = [...new Set(pages)].sort((a, b) => a - b);
|
|
5822
|
-
const ranges = [];
|
|
5823
|
-
let start = sorted[0];
|
|
5824
|
-
let previous = sorted[0];
|
|
5825
|
-
for (let i = 1; i < sorted.length; i += 1) {
|
|
5826
|
-
const current = sorted[i];
|
|
5827
|
-
if (current === previous + 1) {
|
|
5828
|
-
previous = current;
|
|
5829
|
-
continue;
|
|
5830
|
-
}
|
|
5831
|
-
ranges.push({ startPage: start, endPage: previous });
|
|
5832
|
-
start = current;
|
|
5833
|
-
previous = current;
|
|
5834
|
-
}
|
|
5835
|
-
ranges.push({ startPage: start, endPage: previous });
|
|
5836
|
-
return ranges;
|
|
5837
|
-
}
|
|
5838
|
-
function buildPlanFromPageAssignments(pageAssignments, pageCount, formInventory) {
|
|
6796
|
+
function formatPageMapSummary(pageAssignments) {
|
|
5839
6797
|
const extractorPages = /* @__PURE__ */ new Map();
|
|
5840
6798
|
for (const assignment of pageAssignments) {
|
|
5841
|
-
const
|
|
5842
|
-
for (const extractorName of extractors) {
|
|
6799
|
+
for (const extractorName of assignment.extractorNames) {
|
|
5843
6800
|
extractorPages.set(extractorName, [...extractorPages.get(extractorName) ?? [], assignment.localPageNumber]);
|
|
5844
6801
|
}
|
|
5845
6802
|
}
|
|
5846
|
-
|
|
5847
|
-
|
|
5848
|
-
for (const page of pages) coveredPages.add(page);
|
|
5849
|
-
}
|
|
5850
|
-
for (let page = 1; page <= pageCount; page += 1) {
|
|
5851
|
-
if (!coveredPages.has(page)) {
|
|
5852
|
-
extractorPages.set("sections", [...extractorPages.get("sections") ?? [], page]);
|
|
5853
|
-
}
|
|
5854
|
-
}
|
|
5855
|
-
const contextualExtractors = /* @__PURE__ */ new Set(["conditions", "exclusions", "endorsements"]);
|
|
5856
|
-
const contextualForms = (formInventory?.forms ?? []).filter(
|
|
5857
|
-
(form) => form.pageStart != null && (form.pageEnd ?? form.pageStart) != null
|
|
5858
|
-
);
|
|
5859
|
-
const expandPagesToFormRanges = (extractorName, pages) => {
|
|
5860
|
-
if (!contextualExtractors.has(extractorName)) return pages;
|
|
5861
|
-
const expanded = new Set(pages);
|
|
5862
|
-
for (const page of pages) {
|
|
5863
|
-
for (const form of contextualForms) {
|
|
5864
|
-
const pageStart = form.pageStart;
|
|
5865
|
-
const pageEnd = form.pageEnd ?? form.pageStart;
|
|
5866
|
-
const formType = form.formType;
|
|
5867
|
-
const supportsContextualExpansion = extractorName === "endorsements" ? formType === "endorsement" : formType === "coverage" || formType === "endorsement";
|
|
5868
|
-
if (!supportsContextualExpansion) continue;
|
|
5869
|
-
if (page < pageStart || page > pageEnd) continue;
|
|
5870
|
-
for (let current = pageStart; current <= pageEnd; current += 1) {
|
|
5871
|
-
expanded.add(current);
|
|
5872
|
-
}
|
|
5873
|
-
}
|
|
5874
|
-
}
|
|
5875
|
-
return [...expanded].sort((a, b) => a - b);
|
|
5876
|
-
};
|
|
5877
|
-
const tasks = [...extractorPages.entries()].flatMap(
|
|
5878
|
-
([extractorName, pages]) => groupContiguousPages(expandPagesToFormRanges(extractorName, pages)).map(({ startPage, endPage }) => ({
|
|
5879
|
-
extractorName,
|
|
5880
|
-
startPage,
|
|
5881
|
-
endPage,
|
|
5882
|
-
description: `Page-mapped ${extractorName} extraction for pages ${startPage}-${endPage}`
|
|
5883
|
-
}))
|
|
5884
|
-
).sort((a, b) => a.startPage - b.startPage || a.extractorName.localeCompare(b.extractorName));
|
|
5885
|
-
return {
|
|
5886
|
-
tasks,
|
|
5887
|
-
pageMap: [...extractorPages.entries()].map(([section, pages]) => ({
|
|
5888
|
-
section,
|
|
5889
|
-
pages: `pages ${[...new Set(pages)].sort((a, b) => a - b).join(", ")}`
|
|
5890
|
-
}))
|
|
5891
|
-
};
|
|
6803
|
+
if (extractorPages.size === 0) return "No page assignments available.";
|
|
6804
|
+
return [...extractorPages.entries()].map(([extractorName, pages]) => `${extractorName}: ${pages.length} page(s), pages ${pages.join(", ")}`).join("\n");
|
|
5892
6805
|
}
|
|
5893
6806
|
async function extract(pdfInput, documentId, options) {
|
|
5894
6807
|
const id = documentId ?? `doc-${Date.now()}`;
|
|
@@ -5900,7 +6813,8 @@ function createExtractor(config) {
|
|
|
5900
6813
|
const pipelineCtx = createPipelineContext({
|
|
5901
6814
|
id,
|
|
5902
6815
|
onSave: onCheckpointSave,
|
|
5903
|
-
resumeFrom: options?.resumeFrom
|
|
6816
|
+
resumeFrom: options?.resumeFrom,
|
|
6817
|
+
phaseOrder: ["classify", "form_inventory", "page_map", "plan", "extract", "resolve_referential", "review", "assemble"]
|
|
5904
6818
|
});
|
|
5905
6819
|
const resumed = pipelineCtx.getCheckpoint()?.state;
|
|
5906
6820
|
if (resumed?.memory) {
|
|
@@ -6068,40 +6982,18 @@ function createExtractor(config) {
|
|
|
6068
6982
|
const extractorResults = await Promise.all(
|
|
6069
6983
|
tasks.map(
|
|
6070
6984
|
(task) => limit(async () => {
|
|
6071
|
-
const ext = getExtractor(task.extractorName);
|
|
6072
|
-
if (!ext) {
|
|
6073
|
-
await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
|
|
6074
|
-
return null;
|
|
6075
|
-
}
|
|
6076
6985
|
onProgress?.(`Extracting ${task.extractorName} (pages ${task.startPage}-${task.endPage})...`);
|
|
6077
|
-
|
|
6078
|
-
const result = await runExtractor({
|
|
6079
|
-
name: task.extractorName,
|
|
6080
|
-
prompt: ext.buildPrompt(),
|
|
6081
|
-
schema: ext.schema,
|
|
6082
|
-
pdfInput,
|
|
6083
|
-
startPage: task.startPage,
|
|
6084
|
-
endPage: task.endPage,
|
|
6085
|
-
generateObject,
|
|
6086
|
-
convertPdfToImages,
|
|
6087
|
-
maxTokens: ext.maxTokens ?? 4096,
|
|
6088
|
-
providerOptions
|
|
6089
|
-
});
|
|
6090
|
-
trackUsage(result.usage);
|
|
6091
|
-
return result;
|
|
6092
|
-
} catch (error) {
|
|
6093
|
-
await log?.(`Extractor ${task.extractorName} failed: ${error}`);
|
|
6094
|
-
return null;
|
|
6095
|
-
}
|
|
6986
|
+
return runFocusedExtractorTask(task, pdfInput, memory);
|
|
6096
6987
|
})
|
|
6097
6988
|
)
|
|
6098
6989
|
);
|
|
6099
|
-
for (const result of extractorResults) {
|
|
6990
|
+
for (const result of extractorResults.flatMap((item) => Array.isArray(item) ? item : item ? [item] : [])) {
|
|
6100
6991
|
if (result) {
|
|
6101
6992
|
mergeMemoryResult(result.name, result.data, memory);
|
|
6102
6993
|
}
|
|
6103
6994
|
}
|
|
6104
|
-
|
|
6995
|
+
const planIncludesSupplementary = tasks.some((task) => task.extractorName === "supplementary");
|
|
6996
|
+
if (!planIncludesSupplementary && hasSupplementaryExtractionSignal(pageAssignments, formInventory, memory)) {
|
|
6105
6997
|
onProgress?.("Extracting supplementary retrieval facts...");
|
|
6106
6998
|
try {
|
|
6107
6999
|
const alreadyExtractedSummary = buildAlreadyExtractedSummary(memory);
|
|
@@ -6175,7 +7067,7 @@ function createExtractor(config) {
|
|
|
6175
7067
|
const reviewResponse = await safeGenerateObject(
|
|
6176
7068
|
generateObject,
|
|
6177
7069
|
{
|
|
6178
|
-
prompt: buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary),
|
|
7070
|
+
prompt: buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary, extractorCatalog),
|
|
6179
7071
|
schema: ReviewResultSchema,
|
|
6180
7072
|
maxTokens: 1536,
|
|
6181
7073
|
providerOptions: await buildPdfProviderOptions(pdfInput, providerOptions)
|
|
@@ -6199,31 +7091,11 @@ function createExtractor(config) {
|
|
|
6199
7091
|
const followUpResults = await Promise.all(
|
|
6200
7092
|
reviewResponse.object.additionalTasks.map(
|
|
6201
7093
|
(task) => limit(async () => {
|
|
6202
|
-
|
|
6203
|
-
if (!ext) return null;
|
|
6204
|
-
try {
|
|
6205
|
-
const result = await runExtractor({
|
|
6206
|
-
name: task.extractorName,
|
|
6207
|
-
prompt: ext.buildPrompt(),
|
|
6208
|
-
schema: ext.schema,
|
|
6209
|
-
pdfInput,
|
|
6210
|
-
startPage: task.startPage,
|
|
6211
|
-
endPage: task.endPage,
|
|
6212
|
-
generateObject,
|
|
6213
|
-
convertPdfToImages,
|
|
6214
|
-
maxTokens: ext.maxTokens ?? 4096,
|
|
6215
|
-
providerOptions
|
|
6216
|
-
});
|
|
6217
|
-
trackUsage(result.usage);
|
|
6218
|
-
return result;
|
|
6219
|
-
} catch (error) {
|
|
6220
|
-
await log?.(`Follow-up extractor ${task.extractorName} failed: ${error}`);
|
|
6221
|
-
return null;
|
|
6222
|
-
}
|
|
7094
|
+
return runFocusedExtractorTask(task, pdfInput, memory);
|
|
6223
7095
|
})
|
|
6224
7096
|
)
|
|
6225
7097
|
);
|
|
6226
|
-
for (const result of followUpResults) {
|
|
7098
|
+
for (const result of followUpResults.flatMap((item) => Array.isArray(item) ? item : item ? [item] : [])) {
|
|
6227
7099
|
if (result) {
|
|
6228
7100
|
mergeMemoryResult(result.name, result.data, memory);
|
|
6229
7101
|
}
|
|
@@ -6539,8 +7411,8 @@ Respond with JSON only:
|
|
|
6539
7411
|
}`;
|
|
6540
7412
|
|
|
6541
7413
|
// src/schemas/application.ts
|
|
6542
|
-
import { z as
|
|
6543
|
-
var FieldTypeSchema =
|
|
7414
|
+
import { z as z38 } from "zod";
|
|
7415
|
+
var FieldTypeSchema = z38.enum([
|
|
6544
7416
|
"text",
|
|
6545
7417
|
"numeric",
|
|
6546
7418
|
"currency",
|
|
@@ -6549,131 +7421,131 @@ var FieldTypeSchema = z36.enum([
|
|
|
6549
7421
|
"table",
|
|
6550
7422
|
"declaration"
|
|
6551
7423
|
]);
|
|
6552
|
-
var ApplicationFieldSchema =
|
|
6553
|
-
id:
|
|
6554
|
-
label:
|
|
6555
|
-
section:
|
|
7424
|
+
var ApplicationFieldSchema = z38.object({
|
|
7425
|
+
id: z38.string(),
|
|
7426
|
+
label: z38.string(),
|
|
7427
|
+
section: z38.string(),
|
|
6556
7428
|
fieldType: FieldTypeSchema,
|
|
6557
|
-
required:
|
|
6558
|
-
options:
|
|
6559
|
-
columns:
|
|
6560
|
-
requiresExplanationIfYes:
|
|
6561
|
-
condition:
|
|
6562
|
-
dependsOn:
|
|
6563
|
-
whenValue:
|
|
7429
|
+
required: z38.boolean(),
|
|
7430
|
+
options: z38.array(z38.string()).optional(),
|
|
7431
|
+
columns: z38.array(z38.string()).optional(),
|
|
7432
|
+
requiresExplanationIfYes: z38.boolean().optional(),
|
|
7433
|
+
condition: z38.object({
|
|
7434
|
+
dependsOn: z38.string(),
|
|
7435
|
+
whenValue: z38.string()
|
|
6564
7436
|
}).optional(),
|
|
6565
|
-
value:
|
|
6566
|
-
source:
|
|
6567
|
-
confidence:
|
|
6568
|
-
});
|
|
6569
|
-
var ApplicationClassifyResultSchema =
|
|
6570
|
-
isApplication:
|
|
6571
|
-
confidence:
|
|
6572
|
-
applicationType:
|
|
6573
|
-
});
|
|
6574
|
-
var FieldExtractionResultSchema =
|
|
6575
|
-
fields:
|
|
6576
|
-
});
|
|
6577
|
-
var AutoFillMatchSchema =
|
|
6578
|
-
fieldId:
|
|
6579
|
-
value:
|
|
6580
|
-
confidence:
|
|
6581
|
-
contextKey:
|
|
6582
|
-
});
|
|
6583
|
-
var AutoFillResultSchema =
|
|
6584
|
-
matches:
|
|
6585
|
-
});
|
|
6586
|
-
var QuestionBatchResultSchema =
|
|
6587
|
-
batches:
|
|
6588
|
-
});
|
|
6589
|
-
var LookupRequestSchema =
|
|
6590
|
-
type:
|
|
6591
|
-
description:
|
|
6592
|
-
url:
|
|
6593
|
-
targetFieldIds:
|
|
6594
|
-
});
|
|
6595
|
-
var ReplyIntentSchema =
|
|
6596
|
-
primaryIntent:
|
|
6597
|
-
hasAnswers:
|
|
6598
|
-
questionText:
|
|
6599
|
-
questionFieldIds:
|
|
6600
|
-
lookupRequests:
|
|
6601
|
-
});
|
|
6602
|
-
var ParsedAnswerSchema =
|
|
6603
|
-
fieldId:
|
|
6604
|
-
value:
|
|
6605
|
-
explanation:
|
|
6606
|
-
});
|
|
6607
|
-
var AnswerParsingResultSchema =
|
|
6608
|
-
answers:
|
|
6609
|
-
unanswered:
|
|
6610
|
-
});
|
|
6611
|
-
var LookupFillSchema =
|
|
6612
|
-
fieldId:
|
|
6613
|
-
value:
|
|
6614
|
-
source:
|
|
6615
|
-
});
|
|
6616
|
-
var LookupFillResultSchema =
|
|
6617
|
-
fills:
|
|
6618
|
-
unfillable:
|
|
6619
|
-
explanation:
|
|
6620
|
-
});
|
|
6621
|
-
var FlatPdfPlacementSchema =
|
|
6622
|
-
fieldId:
|
|
6623
|
-
page:
|
|
6624
|
-
x:
|
|
6625
|
-
y:
|
|
6626
|
-
text:
|
|
6627
|
-
fontSize:
|
|
6628
|
-
isCheckmark:
|
|
6629
|
-
});
|
|
6630
|
-
var AcroFormMappingSchema =
|
|
6631
|
-
fieldId:
|
|
6632
|
-
acroFormName:
|
|
6633
|
-
value:
|
|
6634
|
-
});
|
|
6635
|
-
var QualityGateStatusSchema =
|
|
6636
|
-
var QualitySeveritySchema =
|
|
6637
|
-
var ApplicationQualityIssueSchema =
|
|
6638
|
-
code:
|
|
7437
|
+
value: z38.string().optional(),
|
|
7438
|
+
source: z38.string().optional().describe("Where the value came from: auto-fill, user, lookup"),
|
|
7439
|
+
confidence: z38.enum(["confirmed", "high", "medium", "low"]).optional()
|
|
7440
|
+
});
|
|
7441
|
+
var ApplicationClassifyResultSchema = z38.object({
|
|
7442
|
+
isApplication: z38.boolean(),
|
|
7443
|
+
confidence: z38.number().min(0).max(1),
|
|
7444
|
+
applicationType: z38.string().nullable()
|
|
7445
|
+
});
|
|
7446
|
+
var FieldExtractionResultSchema = z38.object({
|
|
7447
|
+
fields: z38.array(ApplicationFieldSchema)
|
|
7448
|
+
});
|
|
7449
|
+
var AutoFillMatchSchema = z38.object({
|
|
7450
|
+
fieldId: z38.string(),
|
|
7451
|
+
value: z38.string(),
|
|
7452
|
+
confidence: z38.enum(["confirmed"]),
|
|
7453
|
+
contextKey: z38.string()
|
|
7454
|
+
});
|
|
7455
|
+
var AutoFillResultSchema = z38.object({
|
|
7456
|
+
matches: z38.array(AutoFillMatchSchema)
|
|
7457
|
+
});
|
|
7458
|
+
var QuestionBatchResultSchema = z38.object({
|
|
7459
|
+
batches: z38.array(z38.array(z38.string()).describe("Array of field IDs in this batch"))
|
|
7460
|
+
});
|
|
7461
|
+
var LookupRequestSchema = z38.object({
|
|
7462
|
+
type: z38.string().describe("Type of lookup: 'records', 'website', 'policy'"),
|
|
7463
|
+
description: z38.string(),
|
|
7464
|
+
url: z38.string().optional(),
|
|
7465
|
+
targetFieldIds: z38.array(z38.string())
|
|
7466
|
+
});
|
|
7467
|
+
var ReplyIntentSchema = z38.object({
|
|
7468
|
+
primaryIntent: z38.enum(["answers_only", "question", "lookup_request", "mixed"]),
|
|
7469
|
+
hasAnswers: z38.boolean(),
|
|
7470
|
+
questionText: z38.string().optional(),
|
|
7471
|
+
questionFieldIds: z38.array(z38.string()).optional(),
|
|
7472
|
+
lookupRequests: z38.array(LookupRequestSchema).optional()
|
|
7473
|
+
});
|
|
7474
|
+
var ParsedAnswerSchema = z38.object({
|
|
7475
|
+
fieldId: z38.string(),
|
|
7476
|
+
value: z38.string(),
|
|
7477
|
+
explanation: z38.string().optional()
|
|
7478
|
+
});
|
|
7479
|
+
var AnswerParsingResultSchema = z38.object({
|
|
7480
|
+
answers: z38.array(ParsedAnswerSchema),
|
|
7481
|
+
unanswered: z38.array(z38.string()).describe("Field IDs that were not answered")
|
|
7482
|
+
});
|
|
7483
|
+
var LookupFillSchema = z38.object({
|
|
7484
|
+
fieldId: z38.string(),
|
|
7485
|
+
value: z38.string(),
|
|
7486
|
+
source: z38.string().describe("Specific citable reference, e.g. 'GL Policy #POL-12345 (Hartford)'")
|
|
7487
|
+
});
|
|
7488
|
+
var LookupFillResultSchema = z38.object({
|
|
7489
|
+
fills: z38.array(LookupFillSchema),
|
|
7490
|
+
unfillable: z38.array(z38.string()),
|
|
7491
|
+
explanation: z38.string().optional()
|
|
7492
|
+
});
|
|
7493
|
+
var FlatPdfPlacementSchema = z38.object({
|
|
7494
|
+
fieldId: z38.string(),
|
|
7495
|
+
page: z38.number(),
|
|
7496
|
+
x: z38.number().describe("Percentage from left edge (0-100)"),
|
|
7497
|
+
y: z38.number().describe("Percentage from top edge (0-100)"),
|
|
7498
|
+
text: z38.string(),
|
|
7499
|
+
fontSize: z38.number().optional(),
|
|
7500
|
+
isCheckmark: z38.boolean().optional()
|
|
7501
|
+
});
|
|
7502
|
+
var AcroFormMappingSchema = z38.object({
|
|
7503
|
+
fieldId: z38.string(),
|
|
7504
|
+
acroFormName: z38.string(),
|
|
7505
|
+
value: z38.string()
|
|
7506
|
+
});
|
|
7507
|
+
var QualityGateStatusSchema = z38.enum(["passed", "warning", "failed"]);
|
|
7508
|
+
var QualitySeveritySchema = z38.enum(["info", "warning", "blocking"]);
|
|
7509
|
+
var ApplicationQualityIssueSchema = z38.object({
|
|
7510
|
+
code: z38.string(),
|
|
6639
7511
|
severity: QualitySeveritySchema,
|
|
6640
|
-
message:
|
|
6641
|
-
fieldId:
|
|
7512
|
+
message: z38.string(),
|
|
7513
|
+
fieldId: z38.string().optional()
|
|
6642
7514
|
});
|
|
6643
|
-
var ApplicationQualityRoundSchema =
|
|
6644
|
-
round:
|
|
6645
|
-
kind:
|
|
7515
|
+
var ApplicationQualityRoundSchema = z38.object({
|
|
7516
|
+
round: z38.number(),
|
|
7517
|
+
kind: z38.string(),
|
|
6646
7518
|
status: QualityGateStatusSchema,
|
|
6647
|
-
summary:
|
|
7519
|
+
summary: z38.string().optional()
|
|
6648
7520
|
});
|
|
6649
|
-
var ApplicationQualityArtifactSchema =
|
|
6650
|
-
kind:
|
|
6651
|
-
label:
|
|
6652
|
-
itemCount:
|
|
7521
|
+
var ApplicationQualityArtifactSchema = z38.object({
|
|
7522
|
+
kind: z38.string(),
|
|
7523
|
+
label: z38.string().optional(),
|
|
7524
|
+
itemCount: z38.number().optional()
|
|
6653
7525
|
});
|
|
6654
|
-
var ApplicationEmailReviewSchema =
|
|
6655
|
-
issues:
|
|
7526
|
+
var ApplicationEmailReviewSchema = z38.object({
|
|
7527
|
+
issues: z38.array(ApplicationQualityIssueSchema),
|
|
6656
7528
|
qualityGateStatus: QualityGateStatusSchema
|
|
6657
7529
|
});
|
|
6658
|
-
var ApplicationQualityReportSchema =
|
|
6659
|
-
issues:
|
|
6660
|
-
rounds:
|
|
6661
|
-
artifacts:
|
|
7530
|
+
var ApplicationQualityReportSchema = z38.object({
|
|
7531
|
+
issues: z38.array(ApplicationQualityIssueSchema),
|
|
7532
|
+
rounds: z38.array(ApplicationQualityRoundSchema).optional(),
|
|
7533
|
+
artifacts: z38.array(ApplicationQualityArtifactSchema).optional(),
|
|
6662
7534
|
emailReview: ApplicationEmailReviewSchema.optional(),
|
|
6663
7535
|
qualityGateStatus: QualityGateStatusSchema
|
|
6664
7536
|
});
|
|
6665
|
-
var ApplicationStateSchema =
|
|
6666
|
-
id:
|
|
6667
|
-
pdfBase64:
|
|
6668
|
-
title:
|
|
6669
|
-
applicationType:
|
|
6670
|
-
fields:
|
|
6671
|
-
batches:
|
|
6672
|
-
currentBatchIndex:
|
|
7537
|
+
var ApplicationStateSchema = z38.object({
|
|
7538
|
+
id: z38.string(),
|
|
7539
|
+
pdfBase64: z38.string().optional().describe("Original PDF, omitted after extraction"),
|
|
7540
|
+
title: z38.string().optional(),
|
|
7541
|
+
applicationType: z38.string().nullable().optional(),
|
|
7542
|
+
fields: z38.array(ApplicationFieldSchema),
|
|
7543
|
+
batches: z38.array(z38.array(z38.string())).optional(),
|
|
7544
|
+
currentBatchIndex: z38.number().default(0),
|
|
6673
7545
|
qualityReport: ApplicationQualityReportSchema.optional(),
|
|
6674
|
-
status:
|
|
6675
|
-
createdAt:
|
|
6676
|
-
updatedAt:
|
|
7546
|
+
status: z38.enum(["classifying", "extracting", "auto_filling", "batching", "collecting", "confirming", "mapping", "complete"]),
|
|
7547
|
+
createdAt: z38.number(),
|
|
7548
|
+
updatedAt: z38.number()
|
|
6677
7549
|
});
|
|
6678
7550
|
|
|
6679
7551
|
// src/application/agents/classifier.ts
|
|
@@ -7262,6 +8134,70 @@ function reviewBatchEmail(text, batchFields) {
|
|
|
7262
8134
|
};
|
|
7263
8135
|
}
|
|
7264
8136
|
|
|
8137
|
+
// src/application/workflow.ts
|
|
8138
|
+
var MAX_DOCUMENT_SEARCH_FIELDS = 5;
|
|
8139
|
+
var LOW_VALUE_FIELD_RATIO_LIMIT = 0.6;
|
|
8140
|
+
function planApplicationWorkflow(input) {
|
|
8141
|
+
const unfilledFields = input.fields.filter(isUnfilled);
|
|
8142
|
+
const documentSearchFields = planDocumentSearchFields(
|
|
8143
|
+
unfilledFields,
|
|
8144
|
+
input.hasDocumentStore && input.hasMemoryStore
|
|
8145
|
+
);
|
|
8146
|
+
return {
|
|
8147
|
+
runBackfill: input.hasBackfillProvider && unfilledFields.length > 0,
|
|
8148
|
+
runContextAutoFill: input.orgContextCount > 0 && unfilledFields.length > 0,
|
|
8149
|
+
documentSearchFields,
|
|
8150
|
+
runBatching: unfilledFields.length > 0,
|
|
8151
|
+
unfilledFields
|
|
8152
|
+
};
|
|
8153
|
+
}
|
|
8154
|
+
function planReplyActions(input) {
|
|
8155
|
+
const hasCurrentFields = input.currentBatchFields.length > 0;
|
|
8156
|
+
const nextBatchNeedsAnswers = (input.nextBatchFields ?? []).some(isUnfilled);
|
|
8157
|
+
const hasLookupRequests = (input.intent.lookupRequests?.length ?? 0) > 0;
|
|
8158
|
+
return {
|
|
8159
|
+
parseAnswers: input.intent.hasAnswers && hasCurrentFields,
|
|
8160
|
+
runLookup: hasLookupRequests && input.hasDocumentStore,
|
|
8161
|
+
answerQuestion: Boolean(input.intent.questionText) && (input.intent.primaryIntent === "question" || input.intent.primaryIntent === "mixed"),
|
|
8162
|
+
advanceBatch: hasCurrentFields && input.currentBatchFields.every((field) => !isUnfilled(field)),
|
|
8163
|
+
generateNextEmail: nextBatchNeedsAnswers
|
|
8164
|
+
};
|
|
8165
|
+
}
|
|
8166
|
+
function planDocumentSearchFields(unfilledFields, hasStores) {
|
|
8167
|
+
if (!hasStores || unfilledFields.length === 0) return [];
|
|
8168
|
+
const searchableFields = unfilledFields.filter(isHighValueLookupField);
|
|
8169
|
+
if (searchableFields.length === 0) return [];
|
|
8170
|
+
const lowValueRatio = 1 - searchableFields.length / unfilledFields.length;
|
|
8171
|
+
if (unfilledFields.length > MAX_DOCUMENT_SEARCH_FIELDS && lowValueRatio > LOW_VALUE_FIELD_RATIO_LIMIT) {
|
|
8172
|
+
return [];
|
|
8173
|
+
}
|
|
8174
|
+
return searchableFields.slice(0, MAX_DOCUMENT_SEARCH_FIELDS);
|
|
8175
|
+
}
|
|
8176
|
+
function isUnfilled(field) {
|
|
8177
|
+
return field.value === void 0 || field.value.trim() === "";
|
|
8178
|
+
}
|
|
8179
|
+
function isHighValueLookupField(field) {
|
|
8180
|
+
const text = `${field.section} ${field.label}`.toLowerCase();
|
|
8181
|
+
if (field.required) return true;
|
|
8182
|
+
return [
|
|
8183
|
+
"carrier",
|
|
8184
|
+
"policy",
|
|
8185
|
+
"premium",
|
|
8186
|
+
"limit",
|
|
8187
|
+
"deductible",
|
|
8188
|
+
"insured",
|
|
8189
|
+
"address",
|
|
8190
|
+
"revenue",
|
|
8191
|
+
"payroll",
|
|
8192
|
+
"effective",
|
|
8193
|
+
"expiration",
|
|
8194
|
+
"coverage",
|
|
8195
|
+
"class code",
|
|
8196
|
+
"fein",
|
|
8197
|
+
"entity"
|
|
8198
|
+
].some((term) => text.includes(term));
|
|
8199
|
+
}
|
|
8200
|
+
|
|
7265
8201
|
// src/application/coordinator.ts
|
|
7266
8202
|
function createApplicationPipeline(config) {
|
|
7267
8203
|
const {
|
|
@@ -7360,27 +8296,37 @@ function createApplicationPipeline(config) {
|
|
|
7360
8296
|
state.updatedAt = Date.now();
|
|
7361
8297
|
await applicationStore?.save(state);
|
|
7362
8298
|
onProgress?.(`Auto-filling ${fields.length} fields...`);
|
|
7363
|
-
|
|
7364
|
-
|
|
7365
|
-
|
|
7366
|
-
|
|
7367
|
-
|
|
7368
|
-
|
|
7369
|
-
|
|
7370
|
-
|
|
7371
|
-
|
|
7372
|
-
|
|
7373
|
-
|
|
7374
|
-
|
|
7375
|
-
|
|
7376
|
-
|
|
7377
|
-
|
|
7378
|
-
|
|
8299
|
+
let workflowPlan = planApplicationWorkflow({
|
|
8300
|
+
fields: state.fields,
|
|
8301
|
+
hasBackfillProvider: Boolean(backfillProvider),
|
|
8302
|
+
orgContextCount: orgContext.length,
|
|
8303
|
+
hasDocumentStore: Boolean(documentStore),
|
|
8304
|
+
hasMemoryStore: Boolean(memoryStore)
|
|
8305
|
+
});
|
|
8306
|
+
if (workflowPlan.runBackfill && backfillProvider) {
|
|
8307
|
+
try {
|
|
8308
|
+
const priorAnswers = await backfillFromPriorAnswers(state.fields, backfillProvider);
|
|
8309
|
+
for (const pa of priorAnswers) {
|
|
8310
|
+
const field = state.fields.find((f) => f.id === pa.fieldId);
|
|
8311
|
+
if (field && !field.value && pa.relevance > 0.8) {
|
|
8312
|
+
field.value = pa.value;
|
|
8313
|
+
field.source = `backfill: ${pa.source}`;
|
|
8314
|
+
field.confidence = "high";
|
|
7379
8315
|
}
|
|
7380
|
-
}
|
|
7381
|
-
)
|
|
8316
|
+
}
|
|
8317
|
+
} catch (e) {
|
|
8318
|
+
await log?.(`Backfill failed: ${e}`);
|
|
8319
|
+
}
|
|
7382
8320
|
}
|
|
7383
|
-
|
|
8321
|
+
workflowPlan = planApplicationWorkflow({
|
|
8322
|
+
fields: state.fields,
|
|
8323
|
+
hasBackfillProvider: false,
|
|
8324
|
+
orgContextCount: orgContext.length,
|
|
8325
|
+
hasDocumentStore: Boolean(documentStore),
|
|
8326
|
+
hasMemoryStore: Boolean(memoryStore)
|
|
8327
|
+
});
|
|
8328
|
+
const fillTasks = [];
|
|
8329
|
+
if (workflowPlan.runContextAutoFill) {
|
|
7384
8330
|
fillTasks.push(
|
|
7385
8331
|
limit(async () => {
|
|
7386
8332
|
const unfilledFields2 = state.fields.filter((f) => !f.value);
|
|
@@ -7407,18 +8353,13 @@ function createApplicationPipeline(config) {
|
|
|
7407
8353
|
})
|
|
7408
8354
|
);
|
|
7409
8355
|
}
|
|
7410
|
-
if (
|
|
8356
|
+
if (workflowPlan.documentSearchFields.length > 0 && memoryStore) {
|
|
7411
8357
|
fillTasks.push(
|
|
7412
8358
|
(async () => {
|
|
7413
8359
|
try {
|
|
7414
|
-
const
|
|
7415
|
-
const searchPromises = unfilledFields2.slice(0, 10).map(
|
|
8360
|
+
const searchPromises = workflowPlan.documentSearchFields.map(
|
|
7416
8361
|
(f) => limit(async () => {
|
|
7417
|
-
|
|
7418
|
-
for (const chunk of chunks) {
|
|
7419
|
-
if (!state.fields.find((sf) => sf.id === f.id)?.value) {
|
|
7420
|
-
}
|
|
7421
|
-
}
|
|
8362
|
+
await memoryStore.search(f.label, { limit: 3 });
|
|
7422
8363
|
})
|
|
7423
8364
|
);
|
|
7424
8365
|
await Promise.all(searchPromises);
|
|
@@ -7431,8 +8372,15 @@ function createApplicationPipeline(config) {
|
|
|
7431
8372
|
await Promise.all(fillTasks);
|
|
7432
8373
|
state.updatedAt = Date.now();
|
|
7433
8374
|
await applicationStore?.save(state);
|
|
7434
|
-
|
|
7435
|
-
|
|
8375
|
+
workflowPlan = planApplicationWorkflow({
|
|
8376
|
+
fields: state.fields,
|
|
8377
|
+
hasBackfillProvider: false,
|
|
8378
|
+
orgContextCount: 0,
|
|
8379
|
+
hasDocumentStore: false,
|
|
8380
|
+
hasMemoryStore: false
|
|
8381
|
+
});
|
|
8382
|
+
const unfilledFields = workflowPlan.unfilledFields;
|
|
8383
|
+
if (workflowPlan.runBatching) {
|
|
7436
8384
|
onProgress?.(`Batching ${unfilledFields.length} remaining questions...`);
|
|
7437
8385
|
state.status = "batching";
|
|
7438
8386
|
try {
|
|
@@ -7499,7 +8447,12 @@ function createApplicationPipeline(config) {
|
|
|
7499
8447
|
}
|
|
7500
8448
|
let fieldsFilled = 0;
|
|
7501
8449
|
let responseText;
|
|
7502
|
-
|
|
8450
|
+
let replyPlan = planReplyActions({
|
|
8451
|
+
intent,
|
|
8452
|
+
currentBatchFields,
|
|
8453
|
+
hasDocumentStore: Boolean(documentStore)
|
|
8454
|
+
});
|
|
8455
|
+
if (replyPlan.parseAnswers) {
|
|
7503
8456
|
onProgress?.("Parsing answers...");
|
|
7504
8457
|
try {
|
|
7505
8458
|
const { result: parseResult, usage: parseUsage } = await parseAnswers(
|
|
@@ -7522,7 +8475,7 @@ function createApplicationPipeline(config) {
|
|
|
7522
8475
|
await log?.(`Answer parsing failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
7523
8476
|
}
|
|
7524
8477
|
}
|
|
7525
|
-
if (intent.lookupRequests?.length) {
|
|
8478
|
+
if (replyPlan.runLookup && intent.lookupRequests?.length) {
|
|
7526
8479
|
onProgress?.("Processing lookup requests...");
|
|
7527
8480
|
let availableData = "";
|
|
7528
8481
|
if (documentStore) {
|
|
@@ -7563,64 +8516,78 @@ function createApplicationPipeline(config) {
|
|
|
7563
8516
|
}
|
|
7564
8517
|
}
|
|
7565
8518
|
}
|
|
7566
|
-
if (
|
|
7567
|
-
|
|
7568
|
-
|
|
7569
|
-
|
|
7570
|
-
prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
|
|
8519
|
+
if (replyPlan.answerQuestion && intent.questionText) {
|
|
8520
|
+
try {
|
|
8521
|
+
const { text, usage } = await generateText({
|
|
8522
|
+
prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
|
|
7571
8523
|
|
|
7572
8524
|
Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with the answer when you're ready and I'll fill it in."`,
|
|
7573
|
-
|
|
7574
|
-
|
|
7575
|
-
|
|
7576
|
-
|
|
7577
|
-
|
|
7578
|
-
|
|
7579
|
-
|
|
7580
|
-
|
|
7581
|
-
}
|
|
8525
|
+
maxTokens: 512,
|
|
8526
|
+
providerOptions
|
|
8527
|
+
});
|
|
8528
|
+
trackUsage(usage);
|
|
8529
|
+
responseText = text;
|
|
8530
|
+
} catch (error) {
|
|
8531
|
+
await log?.(`Question response generation failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
8532
|
+
responseText = `I wasn't able to generate an explanation for your question. Could you rephrase it, or just provide the answer directly?`;
|
|
7582
8533
|
}
|
|
7583
8534
|
}
|
|
7584
8535
|
const currentBatchComplete = currentBatchFieldIds.every(
|
|
7585
8536
|
(fid) => state.fields.find((f) => f.id === fid)?.value
|
|
7586
8537
|
);
|
|
7587
|
-
|
|
7588
|
-
|
|
7589
|
-
|
|
7590
|
-
|
|
7591
|
-
const
|
|
7592
|
-
|
|
7593
|
-
|
|
8538
|
+
let nextBatchIndex;
|
|
8539
|
+
let nextBatchFields;
|
|
8540
|
+
if (state.batches) {
|
|
8541
|
+
for (let index = state.currentBatchIndex + 1; index < state.batches.length; index++) {
|
|
8542
|
+
const candidateFields = state.fields.filter((f) => state.batches[index].includes(f.id));
|
|
8543
|
+
if (candidateFields.some((f) => !f.value)) {
|
|
8544
|
+
nextBatchIndex = index;
|
|
8545
|
+
nextBatchFields = candidateFields;
|
|
8546
|
+
break;
|
|
8547
|
+
}
|
|
8548
|
+
}
|
|
8549
|
+
}
|
|
8550
|
+
replyPlan = planReplyActions({
|
|
8551
|
+
intent,
|
|
8552
|
+
currentBatchFields,
|
|
8553
|
+
nextBatchFields,
|
|
8554
|
+
hasDocumentStore: Boolean(documentStore)
|
|
8555
|
+
});
|
|
8556
|
+
if (currentBatchComplete && replyPlan.advanceBatch && state.batches) {
|
|
8557
|
+
if (nextBatchIndex !== void 0 && nextBatchFields) {
|
|
8558
|
+
state.currentBatchIndex = nextBatchIndex;
|
|
7594
8559
|
const filledCount = state.fields.filter((f) => f.value).length;
|
|
7595
|
-
|
|
7596
|
-
|
|
7597
|
-
|
|
7598
|
-
|
|
7599
|
-
|
|
7600
|
-
|
|
7601
|
-
|
|
7602
|
-
|
|
7603
|
-
|
|
7604
|
-
|
|
7605
|
-
|
|
7606
|
-
|
|
7607
|
-
|
|
7608
|
-
|
|
7609
|
-
|
|
7610
|
-
|
|
7611
|
-
|
|
7612
|
-
|
|
7613
|
-
|
|
7614
|
-
|
|
7615
|
-
|
|
7616
|
-
responseText
|
|
7617
|
-
|
|
7618
|
-
|
|
8560
|
+
if (replyPlan.generateNextEmail) {
|
|
8561
|
+
try {
|
|
8562
|
+
const { text: emailText, usage: emailUsage } = await generateBatchEmail(
|
|
8563
|
+
nextBatchFields,
|
|
8564
|
+
state.currentBatchIndex,
|
|
8565
|
+
state.batches.length,
|
|
8566
|
+
{
|
|
8567
|
+
appTitle: state.title,
|
|
8568
|
+
totalFieldCount: state.fields.length,
|
|
8569
|
+
filledFieldCount: filledCount,
|
|
8570
|
+
companyName: context?.companyName
|
|
8571
|
+
},
|
|
8572
|
+
generateText,
|
|
8573
|
+
providerOptions
|
|
8574
|
+
);
|
|
8575
|
+
trackUsage(emailUsage);
|
|
8576
|
+
const emailReview = reviewBatchEmail(emailText, nextBatchFields);
|
|
8577
|
+
state.qualityReport = {
|
|
8578
|
+
...buildApplicationQualityReport(state),
|
|
8579
|
+
emailReview
|
|
8580
|
+
};
|
|
8581
|
+
if (!responseText) {
|
|
8582
|
+
responseText = emailText;
|
|
8583
|
+
} else {
|
|
8584
|
+
responseText += `
|
|
7619
8585
|
|
|
7620
8586
|
${emailText}`;
|
|
8587
|
+
}
|
|
8588
|
+
} catch (error) {
|
|
8589
|
+
await log?.(`Batch email generation failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
7621
8590
|
}
|
|
7622
|
-
} catch (error) {
|
|
7623
|
-
await log?.(`Batch email generation failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
7624
8591
|
}
|
|
7625
8592
|
} else {
|
|
7626
8593
|
state.status = "confirming";
|
|
@@ -7779,7 +8746,7 @@ INSTRUCTIONS:
|
|
|
7779
8746
|
- If the user's attachment already contains critical facts, still request chunk/document lookup when policy or quote details should be cross-checked against stored records
|
|
7780
8747
|
|
|
7781
8748
|
CHUNK TYPES (for chunkTypes filter):
|
|
7782
|
-
carrier_info, named_insured, coverage, endorsement, exclusion, condition, section, declaration, loss_history, premium, supplementary
|
|
8749
|
+
carrier_info, named_insured, coverage, covered_reason, definition, endorsement, exclusion, condition, section, declaration, loss_history, premium, supplementary
|
|
7783
8750
|
|
|
7784
8751
|
Respond with the structured classification.`;
|
|
7785
8752
|
}
|
|
@@ -7810,91 +8777,91 @@ Respond with the final answer, deduplicated citations array, overall confidence
|
|
|
7810
8777
|
}
|
|
7811
8778
|
|
|
7812
8779
|
// src/schemas/query.ts
|
|
7813
|
-
import { z as
|
|
7814
|
-
var QueryIntentSchema =
|
|
8780
|
+
import { z as z39 } from "zod";
|
|
8781
|
+
var QueryIntentSchema = z39.enum([
|
|
7815
8782
|
"policy_question",
|
|
7816
8783
|
"coverage_comparison",
|
|
7817
8784
|
"document_search",
|
|
7818
8785
|
"claims_inquiry",
|
|
7819
8786
|
"general_knowledge"
|
|
7820
8787
|
]);
|
|
7821
|
-
var QueryAttachmentKindSchema =
|
|
7822
|
-
var QueryAttachmentSchema =
|
|
7823
|
-
id:
|
|
8788
|
+
var QueryAttachmentKindSchema = z39.enum(["image", "pdf", "text"]);
|
|
8789
|
+
var QueryAttachmentSchema = z39.object({
|
|
8790
|
+
id: z39.string().optional().describe("Optional stable attachment ID from the caller"),
|
|
7824
8791
|
kind: QueryAttachmentKindSchema,
|
|
7825
|
-
name:
|
|
7826
|
-
mimeType:
|
|
7827
|
-
base64:
|
|
7828
|
-
text:
|
|
7829
|
-
description:
|
|
7830
|
-
});
|
|
7831
|
-
var SubQuestionSchema =
|
|
7832
|
-
question:
|
|
8792
|
+
name: z39.string().optional().describe("Original filename or user-facing label"),
|
|
8793
|
+
mimeType: z39.string().optional().describe("MIME type such as image/jpeg or application/pdf"),
|
|
8794
|
+
base64: z39.string().optional().describe("Base64-encoded file content for image/pdf attachments"),
|
|
8795
|
+
text: z39.string().optional().describe("Plain-text attachment content when available"),
|
|
8796
|
+
description: z39.string().optional().describe("Caller-provided description of the attachment")
|
|
8797
|
+
});
|
|
8798
|
+
var SubQuestionSchema = z39.object({
|
|
8799
|
+
question: z39.string().describe("Atomic sub-question to retrieve and answer independently"),
|
|
7833
8800
|
intent: QueryIntentSchema,
|
|
7834
|
-
chunkTypes:
|
|
7835
|
-
documentFilters:
|
|
7836
|
-
type:
|
|
7837
|
-
carrier:
|
|
7838
|
-
insuredName:
|
|
7839
|
-
policyNumber:
|
|
7840
|
-
quoteNumber:
|
|
7841
|
-
policyTypes:
|
|
8801
|
+
chunkTypes: z39.array(z39.string()).optional().describe("Chunk types to filter retrieval (e.g. coverage, endorsement, declaration)"),
|
|
8802
|
+
documentFilters: z39.object({
|
|
8803
|
+
type: z39.enum(["policy", "quote"]).optional(),
|
|
8804
|
+
carrier: z39.string().optional(),
|
|
8805
|
+
insuredName: z39.string().optional(),
|
|
8806
|
+
policyNumber: z39.string().optional(),
|
|
8807
|
+
quoteNumber: z39.string().optional(),
|
|
8808
|
+
policyTypes: z39.array(PolicyTypeSchema).optional().describe("Filter by policy type (e.g. homeowners_ho3, renters_ho4, pet) to avoid mixing up similar policies")
|
|
7842
8809
|
}).optional().describe("Structured filters to narrow document lookup")
|
|
7843
8810
|
});
|
|
7844
|
-
var QueryClassifyResultSchema =
|
|
8811
|
+
var QueryClassifyResultSchema = z39.object({
|
|
7845
8812
|
intent: QueryIntentSchema,
|
|
7846
|
-
subQuestions:
|
|
7847
|
-
requiresDocumentLookup:
|
|
7848
|
-
requiresChunkSearch:
|
|
7849
|
-
requiresConversationHistory:
|
|
7850
|
-
});
|
|
7851
|
-
var EvidenceItemSchema =
|
|
7852
|
-
source:
|
|
7853
|
-
chunkId:
|
|
7854
|
-
documentId:
|
|
7855
|
-
turnId:
|
|
7856
|
-
attachmentId:
|
|
7857
|
-
text:
|
|
7858
|
-
relevance:
|
|
7859
|
-
metadata:
|
|
7860
|
-
});
|
|
7861
|
-
var AttachmentInterpretationSchema =
|
|
7862
|
-
summary:
|
|
7863
|
-
extractedFacts:
|
|
7864
|
-
recommendedFocus:
|
|
7865
|
-
confidence:
|
|
7866
|
-
});
|
|
7867
|
-
var RetrievalResultSchema =
|
|
7868
|
-
subQuestion:
|
|
7869
|
-
evidence:
|
|
7870
|
-
});
|
|
7871
|
-
var CitationSchema =
|
|
7872
|
-
index:
|
|
7873
|
-
chunkId:
|
|
7874
|
-
documentId:
|
|
7875
|
-
documentType:
|
|
7876
|
-
field:
|
|
7877
|
-
quote:
|
|
7878
|
-
relevance:
|
|
7879
|
-
});
|
|
7880
|
-
var SubAnswerSchema =
|
|
7881
|
-
subQuestion:
|
|
7882
|
-
answer:
|
|
7883
|
-
citations:
|
|
7884
|
-
confidence:
|
|
7885
|
-
needsMoreContext:
|
|
7886
|
-
});
|
|
7887
|
-
var VerifyResultSchema =
|
|
7888
|
-
approved:
|
|
7889
|
-
issues:
|
|
7890
|
-
retrySubQuestions:
|
|
7891
|
-
});
|
|
7892
|
-
var QueryResultSchema =
|
|
7893
|
-
answer:
|
|
7894
|
-
citations:
|
|
8813
|
+
subQuestions: z39.array(SubQuestionSchema).min(1).describe("Decomposed atomic sub-questions"),
|
|
8814
|
+
requiresDocumentLookup: z39.boolean().describe("Whether structured document lookup is needed"),
|
|
8815
|
+
requiresChunkSearch: z39.boolean().describe("Whether semantic chunk search is needed"),
|
|
8816
|
+
requiresConversationHistory: z39.boolean().describe("Whether conversation history is relevant")
|
|
8817
|
+
});
|
|
8818
|
+
var EvidenceItemSchema = z39.object({
|
|
8819
|
+
source: z39.enum(["chunk", "document", "conversation", "attachment"]),
|
|
8820
|
+
chunkId: z39.string().optional(),
|
|
8821
|
+
documentId: z39.string().optional(),
|
|
8822
|
+
turnId: z39.string().optional(),
|
|
8823
|
+
attachmentId: z39.string().optional(),
|
|
8824
|
+
text: z39.string().describe("Text excerpt from the source"),
|
|
8825
|
+
relevance: z39.number().min(0).max(1),
|
|
8826
|
+
metadata: z39.array(z39.object({ key: z39.string(), value: z39.string() })).optional()
|
|
8827
|
+
});
|
|
8828
|
+
var AttachmentInterpretationSchema = z39.object({
|
|
8829
|
+
summary: z39.string().describe("Concise summary of what the attachment shows or contains"),
|
|
8830
|
+
extractedFacts: z39.array(z39.string()).describe("Specific observable or document facts grounded in the attachment"),
|
|
8831
|
+
recommendedFocus: z39.array(z39.string()).describe("Important details to incorporate when answering follow-up questions"),
|
|
8832
|
+
confidence: z39.number().min(0).max(1)
|
|
8833
|
+
});
|
|
8834
|
+
var RetrievalResultSchema = z39.object({
|
|
8835
|
+
subQuestion: z39.string(),
|
|
8836
|
+
evidence: z39.array(EvidenceItemSchema)
|
|
8837
|
+
});
|
|
8838
|
+
var CitationSchema = z39.object({
|
|
8839
|
+
index: z39.number().describe("Citation number [1], [2], etc."),
|
|
8840
|
+
chunkId: z39.string().describe("Source chunk ID, e.g. doc-123:coverage:2"),
|
|
8841
|
+
documentId: z39.string(),
|
|
8842
|
+
documentType: z39.enum(["policy", "quote"]).optional(),
|
|
8843
|
+
field: z39.string().optional().describe("Specific field path, e.g. coverages[0].deductible"),
|
|
8844
|
+
quote: z39.string().describe("Exact text from source that supports the claim"),
|
|
8845
|
+
relevance: z39.number().min(0).max(1)
|
|
8846
|
+
});
|
|
8847
|
+
var SubAnswerSchema = z39.object({
|
|
8848
|
+
subQuestion: z39.string(),
|
|
8849
|
+
answer: z39.string(),
|
|
8850
|
+
citations: z39.array(CitationSchema),
|
|
8851
|
+
confidence: z39.number().min(0).max(1),
|
|
8852
|
+
needsMoreContext: z39.boolean().describe("True if evidence was insufficient to answer fully")
|
|
8853
|
+
});
|
|
8854
|
+
var VerifyResultSchema = z39.object({
|
|
8855
|
+
approved: z39.boolean().describe("Whether all sub-answers are adequately grounded"),
|
|
8856
|
+
issues: z39.array(z39.string()).describe("Specific grounding or consistency issues found"),
|
|
8857
|
+
retrySubQuestions: z39.array(z39.string()).optional().describe("Sub-questions that need additional retrieval or re-reasoning")
|
|
8858
|
+
});
|
|
8859
|
+
var QueryResultSchema = z39.object({
|
|
8860
|
+
answer: z39.string(),
|
|
8861
|
+
citations: z39.array(CitationSchema),
|
|
7895
8862
|
intent: QueryIntentSchema,
|
|
7896
|
-
confidence:
|
|
7897
|
-
followUp:
|
|
8863
|
+
confidence: z39.number().min(0).max(1),
|
|
8864
|
+
followUp: z39.string().optional().describe("Suggested follow-up question if applicable")
|
|
7898
8865
|
});
|
|
7899
8866
|
|
|
7900
8867
|
// src/query/retriever.ts
|
|
@@ -8434,6 +9401,42 @@ ${item.text}`).join("\n\n");
|
|
|
8434
9401
|
return { evidence, contextSummary };
|
|
8435
9402
|
}
|
|
8436
9403
|
|
|
9404
|
+
// src/query/workflow.ts
|
|
9405
|
+
function shouldRetrieveForClassification(classification) {
|
|
9406
|
+
return classification.requiresDocumentLookup || classification.requiresChunkSearch;
|
|
9407
|
+
}
|
|
9408
|
+
function buildInitialQueryWorkflowPlan(params) {
|
|
9409
|
+
const { classification, attachmentEvidence } = params;
|
|
9410
|
+
const actions = [];
|
|
9411
|
+
const shouldRetrieve = shouldRetrieveForClassification(classification);
|
|
9412
|
+
if (shouldRetrieve) {
|
|
9413
|
+
actions.push({
|
|
9414
|
+
type: "retrieve",
|
|
9415
|
+
subQuestions: classification.subQuestions,
|
|
9416
|
+
reason: "classification requested document or chunk lookup"
|
|
9417
|
+
});
|
|
9418
|
+
}
|
|
9419
|
+
actions.push({
|
|
9420
|
+
type: "reason",
|
|
9421
|
+
subQuestions: classification.subQuestions,
|
|
9422
|
+
reason: shouldRetrieve ? "answer with retrieved evidence and any attachment evidence" : attachmentEvidence.length > 0 ? "answer with attachment evidence only" : "answer without document retrieval"
|
|
9423
|
+
});
|
|
9424
|
+
actions.push(
|
|
9425
|
+
{
|
|
9426
|
+
type: "verify",
|
|
9427
|
+
reason: "check grounding and request targeted retries when needed"
|
|
9428
|
+
},
|
|
9429
|
+
{
|
|
9430
|
+
type: "respond",
|
|
9431
|
+
reason: "compose final response"
|
|
9432
|
+
}
|
|
9433
|
+
);
|
|
9434
|
+
return { actions, shouldRetrieve };
|
|
9435
|
+
}
|
|
9436
|
+
function getWorkflowAction(plan, type) {
|
|
9437
|
+
return plan.actions.find((action) => action.type === type);
|
|
9438
|
+
}
|
|
9439
|
+
|
|
8437
9440
|
// src/query/coordinator.ts
|
|
8438
9441
|
function createQueryAgent(config) {
|
|
8439
9442
|
const {
|
|
@@ -8478,29 +9481,37 @@ function createQueryAgent(config) {
|
|
|
8478
9481
|
onProgress?.("Classifying query...");
|
|
8479
9482
|
const classification = await classify(question, conversationId, attachmentContext);
|
|
8480
9483
|
await pipelineCtx.save("classify", { classification, attachmentEvidence });
|
|
8481
|
-
onProgress?.(`Retrieving evidence for ${classification.subQuestions.length} sub-question(s)...`);
|
|
8482
9484
|
const retrieverConfig = {
|
|
8483
9485
|
documentStore,
|
|
8484
9486
|
memoryStore,
|
|
8485
9487
|
retrievalLimit,
|
|
8486
9488
|
log
|
|
8487
9489
|
};
|
|
8488
|
-
const
|
|
8489
|
-
|
|
8490
|
-
|
|
8491
|
-
|
|
8492
|
-
)
|
|
9490
|
+
const workflowPlan = buildInitialQueryWorkflowPlan({ classification, attachmentEvidence });
|
|
9491
|
+
const retrieveAction = getWorkflowAction(workflowPlan, "retrieve");
|
|
9492
|
+
const reasonAction = getWorkflowAction(workflowPlan, "reason");
|
|
9493
|
+
await pipelineCtx.save("workflow", { classification, attachmentEvidence, workflowPlan });
|
|
9494
|
+
const retrievalResults = retrieveAction ? await (async () => {
|
|
9495
|
+
onProgress?.(`Retrieving evidence for ${retrieveAction.subQuestions.length} sub-question(s)...`);
|
|
9496
|
+
return Promise.all(
|
|
9497
|
+
retrieveAction.subQuestions.map(
|
|
9498
|
+
(sq) => limit(() => retrieve(sq, conversationId, retrieverConfig))
|
|
9499
|
+
)
|
|
9500
|
+
);
|
|
9501
|
+
})() : [];
|
|
8493
9502
|
const allEvidence = [...attachmentEvidence, ...retrievalResults.flatMap((r) => r.evidence)];
|
|
8494
9503
|
await pipelineCtx.save("retrieve", { classification, attachmentEvidence, evidence: allEvidence });
|
|
8495
9504
|
onProgress?.("Reasoning over evidence...");
|
|
8496
9505
|
const reasonerConfig = { generateObject, providerOptions };
|
|
9506
|
+
const subQuestionsToReason = reasonAction?.subQuestions ?? classification.subQuestions;
|
|
8497
9507
|
const reasonResults = await Promise.allSettled(
|
|
8498
|
-
|
|
8499
|
-
(sq
|
|
9508
|
+
subQuestionsToReason.map(
|
|
9509
|
+
(sq) => limit(async () => {
|
|
9510
|
+
const retrievedEvidence = retrievalResults.find((r) => r.subQuestion === sq.question)?.evidence ?? [];
|
|
8500
9511
|
const { subAnswer, usage } = await reason(
|
|
8501
9512
|
sq.question,
|
|
8502
9513
|
sq.intent,
|
|
8503
|
-
[...attachmentEvidence, ...
|
|
9514
|
+
[...attachmentEvidence, ...retrievedEvidence],
|
|
8504
9515
|
reasonerConfig
|
|
8505
9516
|
);
|
|
8506
9517
|
trackUsage(usage);
|
|
@@ -8514,9 +9525,9 @@ function createQueryAgent(config) {
|
|
|
8514
9525
|
if (result.status === "fulfilled") {
|
|
8515
9526
|
subAnswers.push(result.value);
|
|
8516
9527
|
} else {
|
|
8517
|
-
await log?.(`Reasoner failed for sub-question "${
|
|
9528
|
+
await log?.(`Reasoner failed for sub-question "${subQuestionsToReason[i].question}": ${result.reason}`);
|
|
8518
9529
|
subAnswers.push({
|
|
8519
|
-
subQuestion:
|
|
9530
|
+
subQuestion: subQuestionsToReason[i].question,
|
|
8520
9531
|
answer: "Unable to answer this part of the question due to a processing error.",
|
|
8521
9532
|
citations: [],
|
|
8522
9533
|
confidence: 0,
|
|
@@ -8899,6 +9910,7 @@ export {
|
|
|
8899
9910
|
CoverageSchema,
|
|
8900
9911
|
CoverageTriggerSchema,
|
|
8901
9912
|
CoverageValueTypeSchema,
|
|
9913
|
+
CoveredReasonSchema,
|
|
8902
9914
|
CrimeDeclarationsSchema,
|
|
8903
9915
|
CyberDeclarationsSchema,
|
|
8904
9916
|
DEDUCTIBLE_TYPES,
|
|
@@ -8911,6 +9923,7 @@ export {
|
|
|
8911
9923
|
DeductibleScheduleSchema,
|
|
8912
9924
|
DeductibleTypeSchema,
|
|
8913
9925
|
DefenseCostTreatmentSchema,
|
|
9926
|
+
DefinitionSchema,
|
|
8914
9927
|
DocumentTypeSchema,
|
|
8915
9928
|
DriverRecordSchema,
|
|
8916
9929
|
DwellingDetailsSchema,
|