@claritylabs/cl-sdk 0.17.0 → 0.17.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -4
- package/dist/index.d.mts +10 -2
- package/dist/index.d.ts +10 -2
- package/dist/index.js +1086 -631
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +1086 -631
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -169,7 +169,14 @@ function createPipelineContext(opts) {
|
|
|
169
169
|
let latest = opts.resumeFrom;
|
|
170
170
|
const completedPhases = /* @__PURE__ */ new Set();
|
|
171
171
|
if (opts.resumeFrom) {
|
|
172
|
-
|
|
172
|
+
const phaseIndex = opts.phaseOrder?.indexOf(opts.resumeFrom.phase) ?? -1;
|
|
173
|
+
if (phaseIndex >= 0 && opts.phaseOrder) {
|
|
174
|
+
for (const phase of opts.phaseOrder.slice(0, phaseIndex + 1)) {
|
|
175
|
+
completedPhases.add(phase);
|
|
176
|
+
}
|
|
177
|
+
} else {
|
|
178
|
+
completedPhases.add(opts.resumeFrom.phase);
|
|
179
|
+
}
|
|
173
180
|
}
|
|
174
181
|
return {
|
|
175
182
|
id: opts.id,
|
|
@@ -1708,6 +1715,53 @@ async function runExtractor(params) {
|
|
|
1708
1715
|
};
|
|
1709
1716
|
}
|
|
1710
1717
|
|
|
1718
|
+
// src/extraction/memory.ts
|
|
1719
|
+
function isMemoryRecord(value) {
|
|
1720
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
1721
|
+
}
|
|
1722
|
+
function readMemoryRecord(memory, key) {
|
|
1723
|
+
const value = memory.get(key);
|
|
1724
|
+
return isMemoryRecord(value) ? value : void 0;
|
|
1725
|
+
}
|
|
1726
|
+
function readRecordValue(record, key) {
|
|
1727
|
+
return record?.[key];
|
|
1728
|
+
}
|
|
1729
|
+
function readRecordArray(record, key) {
|
|
1730
|
+
const value = readRecordValue(record, key);
|
|
1731
|
+
return Array.isArray(value) ? value : void 0;
|
|
1732
|
+
}
|
|
1733
|
+
function getCarrierInfo(memory) {
|
|
1734
|
+
return readMemoryRecord(memory, "carrier_info");
|
|
1735
|
+
}
|
|
1736
|
+
function getNamedInsured(memory) {
|
|
1737
|
+
return readMemoryRecord(memory, "named_insured");
|
|
1738
|
+
}
|
|
1739
|
+
function getCoverageLimits(memory) {
|
|
1740
|
+
return readMemoryRecord(memory, "coverage_limits");
|
|
1741
|
+
}
|
|
1742
|
+
function getCoverageLimitCoverages(memory) {
|
|
1743
|
+
return readRecordArray(getCoverageLimits(memory), "coverages") ?? [];
|
|
1744
|
+
}
|
|
1745
|
+
function getSectionsPayload(memory) {
|
|
1746
|
+
return readMemoryRecord(memory, "sections");
|
|
1747
|
+
}
|
|
1748
|
+
function getSections(memory) {
|
|
1749
|
+
return readRecordArray(getSectionsPayload(memory), "sections");
|
|
1750
|
+
}
|
|
1751
|
+
function getDefinitionsPayload(memory) {
|
|
1752
|
+
return readMemoryRecord(memory, "definitions");
|
|
1753
|
+
}
|
|
1754
|
+
function getDefinitions(memory) {
|
|
1755
|
+
return readRecordArray(getDefinitionsPayload(memory), "definitions");
|
|
1756
|
+
}
|
|
1757
|
+
function getCoveredReasonsPayload(memory) {
|
|
1758
|
+
return readMemoryRecord(memory, "covered_reasons");
|
|
1759
|
+
}
|
|
1760
|
+
function getCoveredReasons(memory) {
|
|
1761
|
+
const payload = getCoveredReasonsPayload(memory);
|
|
1762
|
+
return readRecordArray(payload, "coveredReasons") ?? readRecordArray(payload, "covered_reasons");
|
|
1763
|
+
}
|
|
1764
|
+
|
|
1711
1765
|
// src/extraction/promote.ts
|
|
1712
1766
|
function getDeclarationFields(doc) {
|
|
1713
1767
|
const decl = doc.declarations;
|
|
@@ -1734,20 +1788,29 @@ function findRawString(raw, keys) {
|
|
|
1734
1788
|
}
|
|
1735
1789
|
return void 0;
|
|
1736
1790
|
}
|
|
1791
|
+
function promoteRawFields(raw, mappings) {
|
|
1792
|
+
for (const { from, to } of mappings) {
|
|
1793
|
+
if (!raw[to] && raw[from]) {
|
|
1794
|
+
raw[to] = raw[from];
|
|
1795
|
+
}
|
|
1796
|
+
delete raw[from];
|
|
1797
|
+
}
|
|
1798
|
+
}
|
|
1799
|
+
function findRawOrDeclarationValue(raw, fields, lookup) {
|
|
1800
|
+
return (lookup.rawKey ? raw[lookup.rawKey] : void 0) || findFieldValue(fields, lookup.patterns, lookup.reject);
|
|
1801
|
+
}
|
|
1802
|
+
function promoteRawOrDeclarationString(raw, fields, targetKey, rawKeys, lookup) {
|
|
1803
|
+
if (raw[targetKey]) return;
|
|
1804
|
+
const value = findRawString(raw, rawKeys) ?? findFieldValue(fields, lookup.patterns, lookup.reject);
|
|
1805
|
+
if (value) raw[targetKey] = value;
|
|
1806
|
+
}
|
|
1737
1807
|
function promoteCarrierFields(doc) {
|
|
1738
1808
|
const raw = doc;
|
|
1739
|
-
|
|
1740
|
-
|
|
1741
|
-
|
|
1742
|
-
|
|
1743
|
-
|
|
1744
|
-
}
|
|
1745
|
-
if (!raw.carrierAdmittedStatus && raw.admittedStatus) {
|
|
1746
|
-
raw.carrierAdmittedStatus = raw.admittedStatus;
|
|
1747
|
-
}
|
|
1748
|
-
delete raw.naicNumber;
|
|
1749
|
-
delete raw.amBestRating;
|
|
1750
|
-
delete raw.admittedStatus;
|
|
1809
|
+
promoteRawFields(raw, [
|
|
1810
|
+
{ from: "naicNumber", to: "carrierNaicNumber" },
|
|
1811
|
+
{ from: "amBestRating", to: "carrierAmBestRating" },
|
|
1812
|
+
{ from: "admittedStatus", to: "carrierAdmittedStatus" }
|
|
1813
|
+
]);
|
|
1751
1814
|
if (!raw.insurer && raw.carrierLegalName) {
|
|
1752
1815
|
raw.insurer = {
|
|
1753
1816
|
legalName: raw.carrierLegalName,
|
|
@@ -1788,12 +1851,21 @@ var BROKER_ADDRESS_PATTERNS = ["brokerAddress", "agentAddress", "producerAddress
|
|
|
1788
1851
|
function promoteBroker(doc) {
|
|
1789
1852
|
const raw = doc;
|
|
1790
1853
|
const fields = getDeclarationFields(doc);
|
|
1791
|
-
const brokerAgency = raw
|
|
1792
|
-
|
|
1793
|
-
|
|
1794
|
-
|
|
1795
|
-
const
|
|
1796
|
-
|
|
1854
|
+
const brokerAgency = findRawOrDeclarationValue(raw, fields, {
|
|
1855
|
+
rawKey: "brokerAgency",
|
|
1856
|
+
patterns: BROKER_NAME_PATTERNS
|
|
1857
|
+
});
|
|
1858
|
+
const brokerContact = findRawOrDeclarationValue(raw, fields, {
|
|
1859
|
+
rawKey: "brokerContactName",
|
|
1860
|
+
patterns: BROKER_CONTACT_PATTERNS
|
|
1861
|
+
});
|
|
1862
|
+
const brokerLicense = findRawOrDeclarationValue(raw, fields, {
|
|
1863
|
+
rawKey: "brokerLicenseNumber",
|
|
1864
|
+
patterns: BROKER_LICENSE_PATTERNS
|
|
1865
|
+
});
|
|
1866
|
+
const brokerPhone = findRawOrDeclarationValue(raw, fields, { patterns: BROKER_PHONE_PATTERNS });
|
|
1867
|
+
const brokerEmail = findRawOrDeclarationValue(raw, fields, { patterns: BROKER_EMAIL_PATTERNS });
|
|
1868
|
+
const brokerAddress = findRawOrDeclarationValue(raw, fields, { patterns: BROKER_ADDRESS_PATTERNS });
|
|
1797
1869
|
if (brokerAgency) raw.brokerAgency = brokerAgency;
|
|
1798
1870
|
if (brokerContact) raw.brokerContactName = brokerContact;
|
|
1799
1871
|
if (brokerLicense) raw.brokerLicenseNumber = brokerLicense;
|
|
@@ -2169,20 +2241,27 @@ function taxFeeKey(item) {
|
|
|
2169
2241
|
item.type ?? ""
|
|
2170
2242
|
].join("|");
|
|
2171
2243
|
}
|
|
2244
|
+
function taxFeeItemFromField(field) {
|
|
2245
|
+
const type = taxFeeType(field.field);
|
|
2246
|
+
return {
|
|
2247
|
+
name: titleizeFieldName(field.field),
|
|
2248
|
+
amount: absorbNegative(field.value),
|
|
2249
|
+
...type ? { type } : {}
|
|
2250
|
+
};
|
|
2251
|
+
}
|
|
2172
2252
|
function absorbNegative(value) {
|
|
2173
2253
|
return value.replace(/^-\s*/, "").replace(/^\(\s*(.*?)\s*\)$/, "$1");
|
|
2174
2254
|
}
|
|
2175
2255
|
function promotePremium(doc) {
|
|
2176
2256
|
const raw = doc;
|
|
2177
2257
|
const fields = getDeclarationFields(doc);
|
|
2178
|
-
|
|
2179
|
-
|
|
2180
|
-
|
|
2181
|
-
}
|
|
2182
|
-
|
|
2183
|
-
|
|
2184
|
-
|
|
2185
|
-
}
|
|
2258
|
+
promoteRawOrDeclarationString(raw, fields, "premium", PREMIUM_RAW_KEYS, {
|
|
2259
|
+
patterns: PREMIUM_PATTERNS,
|
|
2260
|
+
reject: (field) => isTaxOrFeeField(field.field)
|
|
2261
|
+
});
|
|
2262
|
+
promoteRawOrDeclarationString(raw, fields, "totalCost", TOTAL_COST_RAW_KEYS, {
|
|
2263
|
+
patterns: TOTAL_COST_PATTERNS
|
|
2264
|
+
});
|
|
2186
2265
|
if (typeof raw.premium === "string") raw.premium = absorbNegative(raw.premium);
|
|
2187
2266
|
if (typeof raw.totalCost === "string") raw.totalCost = absorbNegative(raw.totalCost);
|
|
2188
2267
|
}
|
|
@@ -2200,11 +2279,7 @@ function synthesizeTaxesAndFees(doc) {
|
|
|
2200
2279
|
if (!field.value?.trim()) continue;
|
|
2201
2280
|
if (!isTaxOrFeeField(field.field)) continue;
|
|
2202
2281
|
if (isTotalCostField(field.field)) continue;
|
|
2203
|
-
const item =
|
|
2204
|
-
name: titleizeFieldName(field.field),
|
|
2205
|
-
amount: absorbNegative(field.value),
|
|
2206
|
-
...taxFeeType(field.field) ? { type: taxFeeType(field.field) } : {}
|
|
2207
|
-
};
|
|
2282
|
+
const item = taxFeeItemFromField(field);
|
|
2208
2283
|
byKey.set(taxFeeKey(item), item);
|
|
2209
2284
|
}
|
|
2210
2285
|
if (byKey.size > 0) {
|
|
@@ -2224,43 +2299,47 @@ function promoteExtractedFields(doc) {
|
|
|
2224
2299
|
|
|
2225
2300
|
// src/extraction/assembler.ts
|
|
2226
2301
|
function assembleDocument(documentId, documentType, memory) {
|
|
2227
|
-
const carrier = memory
|
|
2228
|
-
const insured = memory
|
|
2229
|
-
const coverages = memory
|
|
2230
|
-
const endorsements = memory
|
|
2231
|
-
const exclusions = memory
|
|
2232
|
-
const conditions = memory
|
|
2233
|
-
const premium = memory
|
|
2234
|
-
const declarations = memory
|
|
2235
|
-
const lossHistory = memory
|
|
2236
|
-
const
|
|
2237
|
-
const
|
|
2238
|
-
const
|
|
2239
|
-
const
|
|
2240
|
-
const
|
|
2241
|
-
const classify = memory.get("classify");
|
|
2302
|
+
const carrier = getCarrierInfo(memory);
|
|
2303
|
+
const insured = getNamedInsured(memory);
|
|
2304
|
+
const coverages = getCoverageLimits(memory);
|
|
2305
|
+
const endorsements = readMemoryRecord(memory, "endorsements");
|
|
2306
|
+
const exclusions = readMemoryRecord(memory, "exclusions");
|
|
2307
|
+
const conditions = readMemoryRecord(memory, "conditions");
|
|
2308
|
+
const premium = readMemoryRecord(memory, "premium_breakdown");
|
|
2309
|
+
const declarations = readMemoryRecord(memory, "declarations");
|
|
2310
|
+
const lossHistory = readMemoryRecord(memory, "loss_history");
|
|
2311
|
+
const supplementary = readMemoryRecord(memory, "supplementary");
|
|
2312
|
+
const formInventory = readMemoryRecord(memory, "form_inventory");
|
|
2313
|
+
const classify = readMemoryRecord(memory, "classify");
|
|
2314
|
+
const lossPayees = readRecordArray(insured, "lossPayees");
|
|
2315
|
+
const mortgageHolders = readRecordArray(insured, "mortgageHolders");
|
|
2242
2316
|
const base = {
|
|
2243
2317
|
id: documentId,
|
|
2244
|
-
carrier: carrier
|
|
2245
|
-
insuredName: insured
|
|
2246
|
-
coverages:
|
|
2247
|
-
policyTypes: classify
|
|
2318
|
+
carrier: readRecordValue(carrier, "carrierName") ?? "Unknown",
|
|
2319
|
+
insuredName: readRecordValue(insured, "insuredName") ?? "Unknown",
|
|
2320
|
+
coverages: getCoverageLimitCoverages(memory),
|
|
2321
|
+
policyTypes: readRecordValue(classify, "policyTypes"),
|
|
2248
2322
|
...sanitizeNulls(carrier ?? {}),
|
|
2249
2323
|
...sanitizeNulls(insured ?? {}),
|
|
2250
2324
|
// Map named_insured extractor's loss payees/mortgage holders to EndorsementParty shape
|
|
2251
|
-
...
|
|
2252
|
-
...
|
|
2325
|
+
...lossPayees && lossPayees.length > 0 ? { lossPayees: lossPayees.map((lp) => ({ ...lp, role: "loss_payee" })) } : {},
|
|
2326
|
+
...mortgageHolders && mortgageHolders.length > 0 ? {
|
|
2327
|
+
mortgageHolders: mortgageHolders.map((mh) => ({
|
|
2328
|
+
...mh,
|
|
2329
|
+
role: "mortgage_holder"
|
|
2330
|
+
}))
|
|
2331
|
+
} : {},
|
|
2253
2332
|
...sanitizeNulls(coverages ?? {}),
|
|
2254
2333
|
...sanitizeNulls(premium ?? {}),
|
|
2255
2334
|
...sanitizeNulls(supplementary ?? {}),
|
|
2256
|
-
supplementaryFacts: supplementary
|
|
2257
|
-
endorsements: endorsements
|
|
2258
|
-
exclusions: exclusions
|
|
2259
|
-
conditions: conditions
|
|
2260
|
-
sections:
|
|
2261
|
-
formInventory: formInventory
|
|
2262
|
-
definitions:
|
|
2263
|
-
coveredReasons:
|
|
2335
|
+
supplementaryFacts: readRecordValue(supplementary, "auxiliaryFacts"),
|
|
2336
|
+
endorsements: readRecordValue(endorsements, "endorsements"),
|
|
2337
|
+
exclusions: readRecordValue(exclusions, "exclusions"),
|
|
2338
|
+
conditions: readRecordValue(conditions, "conditions"),
|
|
2339
|
+
sections: getSections(memory),
|
|
2340
|
+
formInventory: readRecordValue(formInventory, "forms"),
|
|
2341
|
+
definitions: getDefinitions(memory),
|
|
2342
|
+
coveredReasons: getCoveredReasons(memory),
|
|
2264
2343
|
declarations: declarations ? sanitizeNulls(declarations) : void 0,
|
|
2265
2344
|
...sanitizeNulls(lossHistory ?? {})
|
|
2266
2345
|
};
|
|
@@ -2269,21 +2348,21 @@ function assembleDocument(documentId, documentType, memory) {
|
|
|
2269
2348
|
doc = {
|
|
2270
2349
|
...base,
|
|
2271
2350
|
type: "policy",
|
|
2272
|
-
policyNumber: carrier
|
|
2273
|
-
effectiveDate: carrier
|
|
2274
|
-
expirationDate: carrier
|
|
2275
|
-
policyTermType: carrier
|
|
2351
|
+
policyNumber: readRecordValue(carrier, "policyNumber") ?? readRecordValue(insured, "policyNumber") ?? "Unknown",
|
|
2352
|
+
effectiveDate: readRecordValue(carrier, "effectiveDate") ?? readRecordValue(insured, "effectiveDate") ?? "Unknown",
|
|
2353
|
+
expirationDate: readRecordValue(carrier, "expirationDate"),
|
|
2354
|
+
policyTermType: readRecordValue(carrier, "policyTermType")
|
|
2276
2355
|
};
|
|
2277
2356
|
} else {
|
|
2278
2357
|
doc = {
|
|
2279
2358
|
...base,
|
|
2280
2359
|
type: "quote",
|
|
2281
|
-
quoteNumber: carrier
|
|
2282
|
-
proposedEffectiveDate: carrier
|
|
2283
|
-
proposedExpirationDate: carrier
|
|
2284
|
-
subjectivities: coverages
|
|
2285
|
-
underwritingConditions: coverages
|
|
2286
|
-
premiumBreakdown: premium
|
|
2360
|
+
quoteNumber: readRecordValue(carrier, "quoteNumber") ?? "Unknown",
|
|
2361
|
+
proposedEffectiveDate: readRecordValue(carrier, "proposedEffectiveDate"),
|
|
2362
|
+
proposedExpirationDate: readRecordValue(carrier, "proposedExpirationDate"),
|
|
2363
|
+
subjectivities: readRecordValue(coverages, "subjectivities"),
|
|
2364
|
+
underwritingConditions: readRecordValue(coverages, "underwritingConditions"),
|
|
2365
|
+
premiumBreakdown: readRecordValue(premium, "premiumBreakdown")
|
|
2287
2366
|
};
|
|
2288
2367
|
}
|
|
2289
2368
|
promoteExtractedFields(doc);
|
|
@@ -2385,6 +2464,23 @@ ${block}`;
|
|
|
2385
2464
|
}
|
|
2386
2465
|
|
|
2387
2466
|
// src/extraction/formatter.ts
|
|
2467
|
+
var LONG_CONTENT_THRESHOLD = 1200;
|
|
2468
|
+
function shouldFormatContent(text) {
|
|
2469
|
+
const trimmed = text.trim();
|
|
2470
|
+
if (trimmed.length === 0) return false;
|
|
2471
|
+
if (trimmed.length >= LONG_CONTENT_THRESHOLD) return true;
|
|
2472
|
+
if (/```|~~~|<br\s*\/?>/i.test(trimmed)) return true;
|
|
2473
|
+
if (/(^|\s)(\*\*|__|`)/.test(trimmed)) return true;
|
|
2474
|
+
if (/!?\[[^\]]+\]\([^)]+\)/.test(trimmed)) return true;
|
|
2475
|
+
if (/^\s{0,3}#{1,6}\s*\S/m.test(trimmed)) return true;
|
|
2476
|
+
if (/^\s{0,6}(?:[-*+]|\d+[.)])\s+\S/m.test(trimmed)) return true;
|
|
2477
|
+
if (/\t|[^\S\r\n]{3,}|\n{3,}|[ \t]+$/m.test(text)) return true;
|
|
2478
|
+
const lines = trimmed.split(/\r?\n/).map((line) => line.trim()).filter(Boolean);
|
|
2479
|
+
if (lines.some((line) => (line.match(/\|/g)?.length ?? 0) >= 2)) return true;
|
|
2480
|
+
const spaceAlignedRows = lines.filter((line) => /\S\s{2,}\S\s{2,}\S/.test(line));
|
|
2481
|
+
if (spaceAlignedRows.length >= 2) return true;
|
|
2482
|
+
return false;
|
|
2483
|
+
}
|
|
2388
2484
|
function collectContentFields(doc) {
|
|
2389
2485
|
const entries = [];
|
|
2390
2486
|
let id = 0;
|
|
@@ -2488,7 +2584,7 @@ function applyFormattedContent(doc, entries, formatted) {
|
|
|
2488
2584
|
}
|
|
2489
2585
|
var MAX_ENTRIES_PER_BATCH = 20;
|
|
2490
2586
|
async function formatDocumentContent(doc, generateText, options) {
|
|
2491
|
-
const entries = collectContentFields(doc);
|
|
2587
|
+
const entries = collectContentFields(doc).filter((entry) => shouldFormatContent(entry.text));
|
|
2492
2588
|
const totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
2493
2589
|
if (entries.length === 0) {
|
|
2494
2590
|
return { document: doc, usage: totalUsage };
|
|
@@ -2565,11 +2661,22 @@ function chunkDocument(doc) {
|
|
|
2565
2661
|
if (policyTypesStr) base.policyTypes = policyTypesStr;
|
|
2566
2662
|
return base;
|
|
2567
2663
|
}
|
|
2568
|
-
|
|
2569
|
-
|
|
2570
|
-
|
|
2571
|
-
|
|
2572
|
-
|
|
2664
|
+
function lines(values) {
|
|
2665
|
+
return values.filter(Boolean).join("\n");
|
|
2666
|
+
}
|
|
2667
|
+
function pushChunk(idSuffix, type, text, metadata) {
|
|
2668
|
+
chunks.push({
|
|
2669
|
+
id: `${docId}:${idSuffix}`,
|
|
2670
|
+
documentId: docId,
|
|
2671
|
+
type,
|
|
2672
|
+
text,
|
|
2673
|
+
metadata: stringMetadata(metadata)
|
|
2674
|
+
});
|
|
2675
|
+
}
|
|
2676
|
+
pushChunk(
|
|
2677
|
+
"carrier_info:0",
|
|
2678
|
+
"carrier_info",
|
|
2679
|
+
lines([
|
|
2573
2680
|
`Carrier: ${doc.carrier}`,
|
|
2574
2681
|
doc.carrierLegalName ? `Legal Name: ${doc.carrierLegalName}` : null,
|
|
2575
2682
|
doc.carrierNaicNumber ? `NAIC: ${doc.carrierNaicNumber}` : null,
|
|
@@ -2586,94 +2693,83 @@ function chunkDocument(doc) {
|
|
|
2586
2693
|
doc.isPackage != null ? `Package: ${doc.isPackage ? "Yes" : "No"}` : null,
|
|
2587
2694
|
doc.security ? `Security: ${doc.security}` : null,
|
|
2588
2695
|
doc.policyTypes?.length ? `Policy Types: ${doc.policyTypes.join(", ")}` : null
|
|
2589
|
-
]
|
|
2590
|
-
|
|
2591
|
-
|
|
2696
|
+
]),
|
|
2697
|
+
{ carrier: doc.carrier, documentType: doc.type }
|
|
2698
|
+
);
|
|
2592
2699
|
if (doc.summary) {
|
|
2593
|
-
|
|
2594
|
-
id: `${docId}:declaration:summary`,
|
|
2595
|
-
documentId: docId,
|
|
2596
|
-
type: "declaration",
|
|
2597
|
-
text: `Policy Summary: ${doc.summary}`,
|
|
2598
|
-
metadata: stringMetadata({ documentType: doc.type })
|
|
2599
|
-
});
|
|
2700
|
+
pushChunk("declaration:summary", "declaration", `Policy Summary: ${doc.summary}`, { documentType: doc.type });
|
|
2600
2701
|
}
|
|
2601
2702
|
if (doc.type === "policy") {
|
|
2602
2703
|
const pol = doc;
|
|
2603
|
-
|
|
2604
|
-
|
|
2605
|
-
|
|
2606
|
-
|
|
2607
|
-
text: [
|
|
2704
|
+
pushChunk(
|
|
2705
|
+
"declaration:policy_details",
|
|
2706
|
+
"declaration",
|
|
2707
|
+
lines([
|
|
2608
2708
|
`Policy Number: ${pol.policyNumber}`,
|
|
2609
2709
|
`Effective Date: ${pol.effectiveDate}`,
|
|
2610
2710
|
pol.expirationDate ? `Expiration Date: ${pol.expirationDate}` : null,
|
|
2611
2711
|
pol.policyTermType ? `Term Type: ${pol.policyTermType}` : null,
|
|
2612
2712
|
pol.effectiveTime ? `Effective Time: ${pol.effectiveTime}` : null,
|
|
2613
2713
|
pol.nextReviewDate ? `Next Review Date: ${pol.nextReviewDate}` : null
|
|
2614
|
-
]
|
|
2615
|
-
|
|
2714
|
+
]),
|
|
2715
|
+
{
|
|
2616
2716
|
policyNumber: pol.policyNumber,
|
|
2617
2717
|
effectiveDate: pol.effectiveDate,
|
|
2618
2718
|
expirationDate: pol.expirationDate,
|
|
2619
2719
|
documentType: doc.type
|
|
2620
|
-
}
|
|
2621
|
-
|
|
2720
|
+
}
|
|
2721
|
+
);
|
|
2622
2722
|
} else {
|
|
2623
2723
|
const quote = doc;
|
|
2624
|
-
|
|
2625
|
-
|
|
2626
|
-
|
|
2627
|
-
|
|
2628
|
-
text: [
|
|
2724
|
+
pushChunk(
|
|
2725
|
+
"declaration:quote_details",
|
|
2726
|
+
"declaration",
|
|
2727
|
+
lines([
|
|
2629
2728
|
`Quote Number: ${quote.quoteNumber}`,
|
|
2630
2729
|
quote.proposedEffectiveDate ? `Proposed Effective Date: ${quote.proposedEffectiveDate}` : null,
|
|
2631
2730
|
quote.proposedExpirationDate ? `Proposed Expiration Date: ${quote.proposedExpirationDate}` : null,
|
|
2632
2731
|
quote.quoteExpirationDate ? `Quote Expiration Date: ${quote.quoteExpirationDate}` : null
|
|
2633
|
-
]
|
|
2634
|
-
|
|
2732
|
+
]),
|
|
2733
|
+
{
|
|
2635
2734
|
quoteNumber: quote.quoteNumber,
|
|
2636
2735
|
documentType: doc.type
|
|
2637
|
-
}
|
|
2638
|
-
|
|
2736
|
+
}
|
|
2737
|
+
);
|
|
2639
2738
|
}
|
|
2640
2739
|
if (doc.insurer) {
|
|
2641
|
-
|
|
2642
|
-
|
|
2643
|
-
|
|
2644
|
-
|
|
2645
|
-
text: [
|
|
2740
|
+
pushChunk(
|
|
2741
|
+
"party:insurer",
|
|
2742
|
+
"party",
|
|
2743
|
+
lines([
|
|
2646
2744
|
`Insurer: ${doc.insurer.legalName}`,
|
|
2647
2745
|
doc.insurer.naicNumber ? `NAIC: ${doc.insurer.naicNumber}` : null,
|
|
2648
2746
|
doc.insurer.amBestRating ? `AM Best Rating: ${doc.insurer.amBestRating}` : null,
|
|
2649
2747
|
doc.insurer.amBestNumber ? `AM Best Number: ${doc.insurer.amBestNumber}` : null,
|
|
2650
2748
|
doc.insurer.admittedStatus ? `Admitted Status: ${doc.insurer.admittedStatus}` : null,
|
|
2651
2749
|
doc.insurer.stateOfDomicile ? `State of Domicile: ${doc.insurer.stateOfDomicile}` : null
|
|
2652
|
-
]
|
|
2653
|
-
|
|
2654
|
-
|
|
2750
|
+
]),
|
|
2751
|
+
{ partyRole: "insurer", partyName: doc.insurer.legalName, documentType: doc.type }
|
|
2752
|
+
);
|
|
2655
2753
|
}
|
|
2656
2754
|
if (doc.producer) {
|
|
2657
|
-
|
|
2658
|
-
|
|
2659
|
-
|
|
2660
|
-
|
|
2661
|
-
text: [
|
|
2755
|
+
pushChunk(
|
|
2756
|
+
"party:producer",
|
|
2757
|
+
"party",
|
|
2758
|
+
lines([
|
|
2662
2759
|
`Producer/Broker: ${doc.producer.agencyName}`,
|
|
2663
2760
|
doc.producer.contactName ? `Contact: ${doc.producer.contactName}` : null,
|
|
2664
2761
|
doc.producer.licenseNumber ? `License: ${doc.producer.licenseNumber}` : null,
|
|
2665
2762
|
doc.producer.phone ? `Phone: ${doc.producer.phone}` : null,
|
|
2666
2763
|
doc.producer.email ? `Email: ${doc.producer.email}` : null,
|
|
2667
2764
|
doc.producer.address ? `Address: ${formatAddress(doc.producer.address)}` : null
|
|
2668
|
-
]
|
|
2669
|
-
|
|
2670
|
-
|
|
2765
|
+
]),
|
|
2766
|
+
{ partyRole: "producer", partyName: doc.producer.agencyName, documentType: doc.type }
|
|
2767
|
+
);
|
|
2671
2768
|
}
|
|
2672
|
-
|
|
2673
|
-
|
|
2674
|
-
|
|
2675
|
-
|
|
2676
|
-
text: [
|
|
2769
|
+
pushChunk(
|
|
2770
|
+
"named_insured:0",
|
|
2771
|
+
"named_insured",
|
|
2772
|
+
lines([
|
|
2677
2773
|
`Insured: ${doc.insuredName}`,
|
|
2678
2774
|
doc.insuredDba ? `DBA: ${doc.insuredDba}` : null,
|
|
2679
2775
|
doc.insuredEntityType ? `Entity Type: ${doc.insuredEntityType}` : null,
|
|
@@ -2681,36 +2777,34 @@ function chunkDocument(doc) {
|
|
|
2681
2777
|
doc.insuredSicCode ? `SIC: ${doc.insuredSicCode}` : null,
|
|
2682
2778
|
doc.insuredNaicsCode ? `NAICS: ${doc.insuredNaicsCode}` : null,
|
|
2683
2779
|
doc.insuredAddress ? `Address: ${formatAddress(doc.insuredAddress)}` : null
|
|
2684
|
-
]
|
|
2685
|
-
|
|
2686
|
-
|
|
2780
|
+
]),
|
|
2781
|
+
{ insuredName: doc.insuredName, documentType: doc.type }
|
|
2782
|
+
);
|
|
2687
2783
|
doc.additionalNamedInsureds?.forEach((insured, i) => {
|
|
2688
|
-
|
|
2689
|
-
|
|
2690
|
-
|
|
2691
|
-
|
|
2692
|
-
text: [
|
|
2784
|
+
pushChunk(
|
|
2785
|
+
`named_insured:${i + 1}`,
|
|
2786
|
+
"named_insured",
|
|
2787
|
+
lines([
|
|
2693
2788
|
`Additional Named Insured: ${insured.name}`,
|
|
2694
2789
|
insured.address ? `Address: ${formatAddress(insured.address)}` : null,
|
|
2695
2790
|
insured.relationship ? `Relationship: ${insured.relationship}` : null
|
|
2696
|
-
]
|
|
2697
|
-
|
|
2698
|
-
|
|
2791
|
+
]),
|
|
2792
|
+
{ insuredName: insured.name, role: "additional_named_insured", documentType: doc.type }
|
|
2793
|
+
);
|
|
2699
2794
|
});
|
|
2700
2795
|
doc.coverages.forEach((cov, i) => {
|
|
2701
|
-
|
|
2702
|
-
|
|
2703
|
-
|
|
2704
|
-
|
|
2705
|
-
text: [
|
|
2796
|
+
pushChunk(
|
|
2797
|
+
`coverage:${i}`,
|
|
2798
|
+
"coverage",
|
|
2799
|
+
lines([
|
|
2706
2800
|
`Coverage: ${cov.name}`,
|
|
2707
2801
|
`Limit: ${cov.limit}`,
|
|
2708
2802
|
cov.limitValueType ? `Limit Type: ${cov.limitValueType}` : null,
|
|
2709
2803
|
cov.deductible ? `Deductible: ${cov.deductible}` : null,
|
|
2710
2804
|
cov.deductibleValueType ? `Deductible Type: ${cov.deductibleValueType}` : null,
|
|
2711
2805
|
cov.originalContent ? `Source: ${cov.originalContent}` : null
|
|
2712
|
-
]
|
|
2713
|
-
|
|
2806
|
+
]),
|
|
2807
|
+
{
|
|
2714
2808
|
coverageName: cov.name,
|
|
2715
2809
|
limit: cov.limit,
|
|
2716
2810
|
limitValueType: cov.limitValueType,
|
|
@@ -2720,15 +2814,14 @@ function chunkDocument(doc) {
|
|
|
2720
2814
|
pageNumber: cov.pageNumber,
|
|
2721
2815
|
sectionRef: cov.sectionRef,
|
|
2722
2816
|
documentType: doc.type
|
|
2723
|
-
}
|
|
2724
|
-
|
|
2817
|
+
}
|
|
2818
|
+
);
|
|
2725
2819
|
});
|
|
2726
2820
|
doc.enrichedCoverages?.forEach((cov, i) => {
|
|
2727
|
-
|
|
2728
|
-
|
|
2729
|
-
|
|
2730
|
-
|
|
2731
|
-
text: [
|
|
2821
|
+
pushChunk(
|
|
2822
|
+
`coverage:enriched:${i}`,
|
|
2823
|
+
"coverage",
|
|
2824
|
+
lines([
|
|
2732
2825
|
`Coverage: ${cov.name}`,
|
|
2733
2826
|
cov.coverageCode ? `Code: ${cov.coverageCode}` : null,
|
|
2734
2827
|
`Limit: ${cov.limit}`,
|
|
@@ -2745,8 +2838,8 @@ function chunkDocument(doc) {
|
|
|
2745
2838
|
`Included: ${cov.included ? "Yes" : "No"}`,
|
|
2746
2839
|
cov.premium ? `Premium: ${cov.premium}` : null,
|
|
2747
2840
|
cov.originalContent ? `Source: ${cov.originalContent}` : null
|
|
2748
|
-
]
|
|
2749
|
-
|
|
2841
|
+
]),
|
|
2842
|
+
{
|
|
2750
2843
|
coverageName: cov.name,
|
|
2751
2844
|
coverageCode: cov.coverageCode,
|
|
2752
2845
|
limit: cov.limit,
|
|
@@ -2755,8 +2848,8 @@ function chunkDocument(doc) {
|
|
|
2755
2848
|
pageNumber: cov.pageNumber,
|
|
2756
2849
|
included: cov.included,
|
|
2757
2850
|
documentType: doc.type
|
|
2758
|
-
}
|
|
2759
|
-
|
|
2851
|
+
}
|
|
2852
|
+
);
|
|
2760
2853
|
});
|
|
2761
2854
|
if (doc.limits) {
|
|
2762
2855
|
const limitLines = ["Limit Schedule"];
|
|
@@ -2780,39 +2873,31 @@ function chunkDocument(doc) {
|
|
|
2780
2873
|
limitLines.push(`Employers Liability \u2014 Each Accident: ${lim.employersLiability.eachAccident}, Disease Policy Limit: ${lim.employersLiability.diseasePolicyLimit}, Disease Each Employee: ${lim.employersLiability.diseaseEachEmployee}`);
|
|
2781
2874
|
}
|
|
2782
2875
|
if (lim.defenseCostTreatment) limitLines.push(`Defense Cost Treatment: ${lim.defenseCostTreatment}`);
|
|
2783
|
-
|
|
2784
|
-
id: `${docId}:coverage:limit_schedule`,
|
|
2785
|
-
documentId: docId,
|
|
2786
|
-
type: "coverage",
|
|
2787
|
-
text: limitLines.join("\n"),
|
|
2788
|
-
metadata: stringMetadata({ coverageName: "limit_schedule", documentType: doc.type })
|
|
2789
|
-
});
|
|
2876
|
+
pushChunk("coverage:limit_schedule", "coverage", limitLines.join("\n"), { coverageName: "limit_schedule", documentType: doc.type });
|
|
2790
2877
|
lim.sublimits?.forEach((sub, i) => {
|
|
2791
|
-
|
|
2792
|
-
|
|
2793
|
-
|
|
2794
|
-
|
|
2795
|
-
text: [
|
|
2878
|
+
pushChunk(
|
|
2879
|
+
`coverage:sublimit:${i}`,
|
|
2880
|
+
"coverage",
|
|
2881
|
+
lines([
|
|
2796
2882
|
`Sublimit: ${sub.name}`,
|
|
2797
2883
|
`Limit: ${sub.limit}`,
|
|
2798
2884
|
sub.appliesTo ? `Applies To: ${sub.appliesTo}` : null,
|
|
2799
2885
|
sub.deductible ? `Deductible: ${sub.deductible}` : null
|
|
2800
|
-
]
|
|
2801
|
-
|
|
2802
|
-
|
|
2886
|
+
]),
|
|
2887
|
+
{ coverageName: sub.name, limit: sub.limit, documentType: doc.type }
|
|
2888
|
+
);
|
|
2803
2889
|
});
|
|
2804
2890
|
lim.sharedLimits?.forEach((sl, i) => {
|
|
2805
|
-
|
|
2806
|
-
|
|
2807
|
-
|
|
2808
|
-
|
|
2809
|
-
text: [
|
|
2891
|
+
pushChunk(
|
|
2892
|
+
`coverage:shared_limit:${i}`,
|
|
2893
|
+
"coverage",
|
|
2894
|
+
[
|
|
2810
2895
|
`Shared Limit: ${sl.description}`,
|
|
2811
2896
|
`Limit: ${sl.limit}`,
|
|
2812
2897
|
`Coverage Parts: ${sl.coverageParts.join(", ")}`
|
|
2813
2898
|
].join("\n"),
|
|
2814
|
-
|
|
2815
|
-
|
|
2899
|
+
{ coverageName: sl.description, limit: sl.limit, documentType: doc.type }
|
|
2900
|
+
);
|
|
2816
2901
|
});
|
|
2817
2902
|
}
|
|
2818
2903
|
if (doc.deductibles) {
|
|
@@ -2826,12 +2911,9 @@ function chunkDocument(doc) {
|
|
|
2826
2911
|
if (ded.waitingPeriod) dedLines.push(`Waiting Period: ${ded.waitingPeriod}`);
|
|
2827
2912
|
if (ded.appliesTo) dedLines.push(`Applies To: ${ded.appliesTo}`);
|
|
2828
2913
|
if (dedLines.length > 1) {
|
|
2829
|
-
|
|
2830
|
-
|
|
2831
|
-
|
|
2832
|
-
type: "coverage",
|
|
2833
|
-
text: dedLines.join("\n"),
|
|
2834
|
-
metadata: stringMetadata({ coverageName: "deductible_schedule", documentType: doc.type })
|
|
2914
|
+
pushChunk("coverage:deductible_schedule", "coverage", dedLines.join("\n"), {
|
|
2915
|
+
coverageName: "deductible_schedule",
|
|
2916
|
+
documentType: doc.type
|
|
2835
2917
|
});
|
|
2836
2918
|
}
|
|
2837
2919
|
}
|
|
@@ -2843,99 +2925,90 @@ function chunkDocument(doc) {
|
|
|
2843
2925
|
doc.extendedReportingPeriod?.supplementalPremium ? `Extended Reporting Period Premium: ${doc.extendedReportingPeriod.supplementalPremium}` : null
|
|
2844
2926
|
].filter(Boolean);
|
|
2845
2927
|
if (claimsMadeLines.length > 0) {
|
|
2846
|
-
|
|
2847
|
-
|
|
2848
|
-
|
|
2849
|
-
type: "coverage",
|
|
2850
|
-
text: claimsMadeLines.join("\n"),
|
|
2851
|
-
metadata: stringMetadata({ coverageName: "claims_made_details", documentType: doc.type })
|
|
2928
|
+
pushChunk("coverage:claims_made_details", "coverage", claimsMadeLines.join("\n"), {
|
|
2929
|
+
coverageName: "claims_made_details",
|
|
2930
|
+
documentType: doc.type
|
|
2852
2931
|
});
|
|
2853
2932
|
}
|
|
2854
2933
|
doc.formInventory?.forEach((form, i) => {
|
|
2855
|
-
|
|
2856
|
-
|
|
2857
|
-
|
|
2858
|
-
|
|
2859
|
-
text: [
|
|
2934
|
+
pushChunk(
|
|
2935
|
+
`declaration:form:${i}`,
|
|
2936
|
+
"declaration",
|
|
2937
|
+
lines([
|
|
2860
2938
|
`Form: ${form.formNumber}`,
|
|
2861
2939
|
form.title ? `Title: ${form.title}` : null,
|
|
2862
2940
|
`Type: ${form.formType}`,
|
|
2863
2941
|
form.editionDate ? `Edition: ${form.editionDate}` : null,
|
|
2864
2942
|
form.pageStart ? `Pages: ${form.pageStart}${form.pageEnd ? `-${form.pageEnd}` : ""}` : null
|
|
2865
|
-
]
|
|
2866
|
-
|
|
2943
|
+
]),
|
|
2944
|
+
{
|
|
2867
2945
|
formNumber: form.formNumber,
|
|
2868
2946
|
formType: form.formType,
|
|
2869
2947
|
documentType: doc.type
|
|
2870
|
-
}
|
|
2871
|
-
|
|
2948
|
+
}
|
|
2949
|
+
);
|
|
2872
2950
|
});
|
|
2873
2951
|
doc.endorsements?.forEach((end, i) => {
|
|
2874
|
-
|
|
2875
|
-
|
|
2876
|
-
|
|
2877
|
-
|
|
2878
|
-
text: `Endorsement: ${end.title}
|
|
2952
|
+
pushChunk(
|
|
2953
|
+
`endorsement:${i}`,
|
|
2954
|
+
"endorsement",
|
|
2955
|
+
`Endorsement: ${end.title}
|
|
2879
2956
|
${end.content}`.trim(),
|
|
2880
|
-
|
|
2957
|
+
{
|
|
2881
2958
|
endorsementType: end.endorsementType,
|
|
2882
2959
|
formNumber: end.formNumber,
|
|
2883
2960
|
pageStart: end.pageStart,
|
|
2884
2961
|
pageEnd: end.pageEnd,
|
|
2885
2962
|
documentType: doc.type
|
|
2886
|
-
}
|
|
2887
|
-
|
|
2963
|
+
}
|
|
2964
|
+
);
|
|
2888
2965
|
});
|
|
2889
2966
|
doc.exclusions?.forEach((exc, i) => {
|
|
2890
|
-
|
|
2891
|
-
|
|
2892
|
-
|
|
2893
|
-
|
|
2894
|
-
|
|
2895
|
-
${exc.content}`.trim(),
|
|
2896
|
-
metadata: stringMetadata({ formNumber: exc.formNumber, pageNumber: exc.pageNumber, documentType: doc.type })
|
|
2967
|
+
pushChunk(`exclusion:${i}`, "exclusion", `Exclusion: ${exc.name}
|
|
2968
|
+
${exc.content}`.trim(), {
|
|
2969
|
+
formNumber: exc.formNumber,
|
|
2970
|
+
pageNumber: exc.pageNumber,
|
|
2971
|
+
documentType: doc.type
|
|
2897
2972
|
});
|
|
2898
2973
|
});
|
|
2899
2974
|
doc.conditions?.forEach((cond, i) => {
|
|
2900
|
-
|
|
2901
|
-
|
|
2902
|
-
|
|
2903
|
-
|
|
2904
|
-
text: [
|
|
2975
|
+
pushChunk(
|
|
2976
|
+
`condition:${i}`,
|
|
2977
|
+
"condition",
|
|
2978
|
+
[
|
|
2905
2979
|
`Condition: ${cond.name}`,
|
|
2906
2980
|
`Type: ${cond.conditionType}`,
|
|
2907
2981
|
cond.content,
|
|
2908
2982
|
...cond.keyValues?.map((kv) => `${kv.key}: ${kv.value}`) ?? []
|
|
2909
2983
|
].join("\n"),
|
|
2910
|
-
|
|
2984
|
+
{
|
|
2911
2985
|
conditionName: cond.name,
|
|
2912
2986
|
conditionType: cond.conditionType,
|
|
2913
2987
|
pageNumber: cond.pageNumber,
|
|
2914
2988
|
documentType: doc.type
|
|
2915
|
-
}
|
|
2916
|
-
|
|
2989
|
+
}
|
|
2990
|
+
);
|
|
2917
2991
|
});
|
|
2918
2992
|
asRecordArray(extendedDoc.definitions).forEach((definition, i) => {
|
|
2919
2993
|
const term = firstString(definition, ["term", "name", "title"]) ?? `Definition ${i + 1}`;
|
|
2920
2994
|
const body = firstString(definition, ["definition", "content", "text", "meaning"]);
|
|
2921
|
-
|
|
2922
|
-
|
|
2923
|
-
|
|
2924
|
-
|
|
2925
|
-
text: [
|
|
2995
|
+
pushChunk(
|
|
2996
|
+
`definition:${i}`,
|
|
2997
|
+
"definition",
|
|
2998
|
+
lines([
|
|
2926
2999
|
`Definition: ${term}`,
|
|
2927
3000
|
body,
|
|
2928
3001
|
firstString(definition, ["originalContent", "source"]) ? `Source: ${firstString(definition, ["originalContent", "source"])}` : null
|
|
2929
|
-
]
|
|
2930
|
-
|
|
3002
|
+
]),
|
|
3003
|
+
{
|
|
2931
3004
|
term,
|
|
2932
3005
|
formNumber: firstString(definition, ["formNumber"]),
|
|
2933
3006
|
formTitle: firstString(definition, ["formTitle"]),
|
|
2934
3007
|
pageNumber: typeof definition.pageNumber === "number" ? definition.pageNumber : void 0,
|
|
2935
3008
|
sectionRef: firstString(definition, ["sectionRef", "sectionTitle"]),
|
|
2936
3009
|
documentType: doc.type
|
|
2937
|
-
}
|
|
2938
|
-
|
|
3010
|
+
}
|
|
3011
|
+
);
|
|
2939
3012
|
});
|
|
2940
3013
|
const coveredReasons = asRecordArray(extendedDoc.coveredReasons ?? extendedDoc.covered_reasons);
|
|
2941
3014
|
coveredReasons.forEach((coveredReason, i) => {
|
|
@@ -2943,18 +3016,17 @@ ${exc.content}`.trim(),
|
|
|
2943
3016
|
const coverageName = firstString(coveredReason, ["coverageName", "coverage", "coveragePart"]);
|
|
2944
3017
|
const reasonNumber = firstString(coveredReason, ["reasonNumber", "number"]);
|
|
2945
3018
|
const body = firstString(coveredReason, ["content", "description", "text", "coverageGrant"]);
|
|
2946
|
-
|
|
2947
|
-
|
|
2948
|
-
|
|
2949
|
-
|
|
2950
|
-
text: [
|
|
3019
|
+
pushChunk(
|
|
3020
|
+
`covered_reason:${i}`,
|
|
3021
|
+
"covered_reason",
|
|
3022
|
+
lines([
|
|
2951
3023
|
coverageName ? `Coverage: ${coverageName}` : null,
|
|
2952
3024
|
reasonNumber ? `Reason Number: ${reasonNumber}` : null,
|
|
2953
3025
|
`Covered Reason: ${title}`,
|
|
2954
3026
|
body,
|
|
2955
3027
|
firstString(coveredReason, ["originalContent", "source"]) ? `Source: ${firstString(coveredReason, ["originalContent", "source"])}` : null
|
|
2956
|
-
]
|
|
2957
|
-
|
|
3028
|
+
]),
|
|
3029
|
+
{
|
|
2958
3030
|
coverageName,
|
|
2959
3031
|
reasonNumber,
|
|
2960
3032
|
title,
|
|
@@ -2963,21 +3035,20 @@ ${exc.content}`.trim(),
|
|
|
2963
3035
|
pageNumber: typeof coveredReason.pageNumber === "number" ? coveredReason.pageNumber : void 0,
|
|
2964
3036
|
sectionRef: firstString(coveredReason, ["sectionRef", "sectionTitle"]),
|
|
2965
3037
|
documentType: doc.type
|
|
2966
|
-
}
|
|
2967
|
-
|
|
3038
|
+
}
|
|
3039
|
+
);
|
|
2968
3040
|
const conditions = Array.isArray(coveredReason.conditions) ? coveredReason.conditions.filter((condition) => typeof condition === "string" && condition.trim().length > 0) : [];
|
|
2969
3041
|
conditions.forEach((condition, conditionIndex) => {
|
|
2970
|
-
|
|
2971
|
-
|
|
2972
|
-
|
|
2973
|
-
|
|
2974
|
-
text: [
|
|
3042
|
+
pushChunk(
|
|
3043
|
+
`covered_reason:${i}:condition:${conditionIndex}`,
|
|
3044
|
+
"covered_reason",
|
|
3045
|
+
lines([
|
|
2975
3046
|
coverageName ? `Coverage: ${coverageName}` : null,
|
|
2976
3047
|
reasonNumber ? `Reason Number: ${reasonNumber}` : null,
|
|
2977
3048
|
`Covered Reason Condition: ${title}`,
|
|
2978
3049
|
condition
|
|
2979
|
-
]
|
|
2980
|
-
|
|
3050
|
+
]),
|
|
3051
|
+
{
|
|
2981
3052
|
coverageName,
|
|
2982
3053
|
reasonNumber,
|
|
2983
3054
|
title,
|
|
@@ -2987,8 +3058,8 @@ ${exc.content}`.trim(),
|
|
|
2987
3058
|
pageNumber: typeof coveredReason.pageNumber === "number" ? coveredReason.pageNumber : void 0,
|
|
2988
3059
|
sectionRef: firstString(coveredReason, ["sectionRef", "sectionTitle"]),
|
|
2989
3060
|
documentType: doc.type
|
|
2990
|
-
}
|
|
2991
|
-
|
|
3061
|
+
}
|
|
3062
|
+
);
|
|
2992
3063
|
});
|
|
2993
3064
|
});
|
|
2994
3065
|
if (doc.declarations) {
|
|
@@ -3003,50 +3074,42 @@ ${exc.content}`.trim(),
|
|
|
3003
3074
|
const declMeta = { documentType: doc.type };
|
|
3004
3075
|
if (typeof decl.formType === "string") declMeta.formType = decl.formType;
|
|
3005
3076
|
if (typeof decl.line === "string") declMeta.declarationLine = decl.line;
|
|
3006
|
-
|
|
3007
|
-
|
|
3008
|
-
documentId: docId,
|
|
3009
|
-
type: "declaration",
|
|
3010
|
-
text: `Declarations
|
|
3011
|
-
${declLines.join("\n")}`,
|
|
3012
|
-
metadata: stringMetadata(declMeta)
|
|
3013
|
-
});
|
|
3077
|
+
pushChunk("declaration:0", "declaration", `Declarations
|
|
3078
|
+
${declLines.join("\n")}`, declMeta);
|
|
3014
3079
|
}
|
|
3015
3080
|
}
|
|
3016
3081
|
doc.sections?.forEach((sec, i) => {
|
|
3017
3082
|
const hasSubsections = sec.subsections && sec.subsections.length > 0;
|
|
3018
3083
|
const contentLength = sec.content.length;
|
|
3019
3084
|
if (hasSubsections) {
|
|
3020
|
-
|
|
3021
|
-
|
|
3022
|
-
|
|
3023
|
-
|
|
3024
|
-
text: `Section: ${sec.title}
|
|
3085
|
+
pushChunk(
|
|
3086
|
+
`section:${i}`,
|
|
3087
|
+
"section",
|
|
3088
|
+
`Section: ${sec.title}
|
|
3025
3089
|
${sec.content}`,
|
|
3026
|
-
|
|
3090
|
+
{
|
|
3027
3091
|
sectionType: sec.type,
|
|
3028
3092
|
sectionNumber: sec.sectionNumber,
|
|
3029
3093
|
pageStart: sec.pageStart,
|
|
3030
3094
|
pageEnd: sec.pageEnd,
|
|
3031
3095
|
documentType: doc.type,
|
|
3032
3096
|
hasSubsections: "true"
|
|
3033
|
-
}
|
|
3034
|
-
|
|
3097
|
+
}
|
|
3098
|
+
);
|
|
3035
3099
|
sec.subsections.forEach((sub, j) => {
|
|
3036
|
-
|
|
3037
|
-
|
|
3038
|
-
|
|
3039
|
-
|
|
3040
|
-
text: `${sec.title} > ${sub.title}
|
|
3100
|
+
pushChunk(
|
|
3101
|
+
`section:${i}:sub:${j}`,
|
|
3102
|
+
"section",
|
|
3103
|
+
`${sec.title} > ${sub.title}
|
|
3041
3104
|
${sub.content}`,
|
|
3042
|
-
|
|
3105
|
+
{
|
|
3043
3106
|
sectionType: sec.type,
|
|
3044
3107
|
parentSection: sec.title,
|
|
3045
3108
|
sectionNumber: sub.sectionNumber,
|
|
3046
3109
|
pageNumber: sub.pageNumber,
|
|
3047
3110
|
documentType: doc.type
|
|
3048
|
-
}
|
|
3049
|
-
|
|
3111
|
+
}
|
|
3112
|
+
);
|
|
3050
3113
|
});
|
|
3051
3114
|
} else if (contentLength > 2e3) {
|
|
3052
3115
|
const paragraphs = sec.content.split(/\n\n+/);
|
|
@@ -3054,58 +3117,55 @@ ${sub.content}`,
|
|
|
3054
3117
|
let chunkIndex = 0;
|
|
3055
3118
|
for (const para of paragraphs) {
|
|
3056
3119
|
if (currentChunk.length + para.length > 1e3 && currentChunk.length > 0) {
|
|
3057
|
-
|
|
3058
|
-
|
|
3059
|
-
|
|
3060
|
-
|
|
3061
|
-
text: `Section: ${sec.title} (part ${chunkIndex + 1})
|
|
3120
|
+
pushChunk(
|
|
3121
|
+
`section:${i}:part:${chunkIndex}`,
|
|
3122
|
+
"section",
|
|
3123
|
+
`Section: ${sec.title} (part ${chunkIndex + 1})
|
|
3062
3124
|
${currentChunk.trim()}`,
|
|
3063
|
-
|
|
3125
|
+
{
|
|
3064
3126
|
sectionType: sec.type,
|
|
3065
3127
|
sectionNumber: sec.sectionNumber,
|
|
3066
3128
|
pageStart: sec.pageStart,
|
|
3067
3129
|
pageEnd: sec.pageEnd,
|
|
3068
3130
|
documentType: doc.type,
|
|
3069
3131
|
partIndex: chunkIndex
|
|
3070
|
-
}
|
|
3071
|
-
|
|
3132
|
+
}
|
|
3133
|
+
);
|
|
3072
3134
|
currentChunk = "";
|
|
3073
3135
|
chunkIndex++;
|
|
3074
3136
|
}
|
|
3075
3137
|
currentChunk += (currentChunk ? "\n\n" : "") + para;
|
|
3076
3138
|
}
|
|
3077
3139
|
if (currentChunk.trim()) {
|
|
3078
|
-
|
|
3079
|
-
|
|
3080
|
-
|
|
3081
|
-
|
|
3082
|
-
text: `Section: ${sec.title} (part ${chunkIndex + 1})
|
|
3140
|
+
pushChunk(
|
|
3141
|
+
`section:${i}:part:${chunkIndex}`,
|
|
3142
|
+
"section",
|
|
3143
|
+
`Section: ${sec.title} (part ${chunkIndex + 1})
|
|
3083
3144
|
${currentChunk.trim()}`,
|
|
3084
|
-
|
|
3145
|
+
{
|
|
3085
3146
|
sectionType: sec.type,
|
|
3086
3147
|
sectionNumber: sec.sectionNumber,
|
|
3087
3148
|
pageStart: sec.pageStart,
|
|
3088
3149
|
pageEnd: sec.pageEnd,
|
|
3089
3150
|
documentType: doc.type,
|
|
3090
3151
|
partIndex: chunkIndex
|
|
3091
|
-
}
|
|
3092
|
-
|
|
3152
|
+
}
|
|
3153
|
+
);
|
|
3093
3154
|
}
|
|
3094
3155
|
} else {
|
|
3095
|
-
|
|
3096
|
-
|
|
3097
|
-
|
|
3098
|
-
|
|
3099
|
-
text: `Section: ${sec.title}
|
|
3156
|
+
pushChunk(
|
|
3157
|
+
`section:${i}`,
|
|
3158
|
+
"section",
|
|
3159
|
+
`Section: ${sec.title}
|
|
3100
3160
|
${sec.content}`,
|
|
3101
|
-
|
|
3161
|
+
{
|
|
3102
3162
|
sectionType: sec.type,
|
|
3103
3163
|
sectionNumber: sec.sectionNumber,
|
|
3104
3164
|
pageStart: sec.pageStart,
|
|
3105
3165
|
pageEnd: sec.pageEnd,
|
|
3106
3166
|
documentType: doc.type
|
|
3107
|
-
}
|
|
3108
|
-
|
|
3167
|
+
}
|
|
3168
|
+
);
|
|
3109
3169
|
}
|
|
3110
3170
|
});
|
|
3111
3171
|
doc.locations?.forEach((loc, i) => {
|
|
@@ -4683,12 +4743,15 @@ var ReviewResultSchema = z21.object({
|
|
|
4683
4743
|
description: z21.string()
|
|
4684
4744
|
}))
|
|
4685
4745
|
});
|
|
4686
|
-
function buildReviewPrompt(templateExpected, extractedKeys, extractionSummary, pageMapSummary) {
|
|
4687
|
-
return `You are
|
|
4746
|
+
function buildReviewPrompt(templateExpected, extractedKeys, extractionSummary, pageMapSummary, extractorCatalog) {
|
|
4747
|
+
return `You are the extraction coordinator for an insurance-document agent system. Review the current extraction state, decide whether the result is complete enough, and choose any follow-up extractor tasks needed to improve it.
|
|
4688
4748
|
|
|
4689
4749
|
EXPECTED FIELDS (from document type template):
|
|
4690
4750
|
${templateExpected.map((f) => `- ${f}`).join("\n")}
|
|
4691
4751
|
|
|
4752
|
+
AVAILABLE FOLLOW-UP EXTRACTORS:
|
|
4753
|
+
${extractorCatalog}
|
|
4754
|
+
|
|
4692
4755
|
FIELDS ALREADY EXTRACTED:
|
|
4693
4756
|
${extractedKeys.map((f) => `- ${f}`).join("\n")}
|
|
4694
4757
|
|
|
@@ -4702,7 +4765,7 @@ Determine:
|
|
|
4702
4765
|
1. Is the extraction complete enough?
|
|
4703
4766
|
2. What fields are missing?
|
|
4704
4767
|
3. What quality issues are present?
|
|
4705
|
-
4.
|
|
4768
|
+
4. Which follow-up extraction tasks, if any, should be dispatched?
|
|
4706
4769
|
|
|
4707
4770
|
Mark the extraction as NOT complete if any of these are true:
|
|
4708
4771
|
- required fields are missing
|
|
@@ -4713,7 +4776,9 @@ Mark the extraction as NOT complete if any of these are true:
|
|
|
4713
4776
|
- page assignments suggest declaration, schedule, endorsement, exclusion, or condition pages were not actually extracted with the matching focused extractor
|
|
4714
4777
|
- a focused extractor exists but returned too little substance for the relevant pages
|
|
4715
4778
|
|
|
4716
|
-
When reviewing CURRENT EXTRACTION SUMMARY, compare the page-map counts to extracted counts.
|
|
4779
|
+
When reviewing CURRENT EXTRACTION SUMMARY, compare the page-map counts to extracted counts. If an assigned extractor produced no useful records, produce a quality issue and a narrow follow-up task over the mapped page range.
|
|
4780
|
+
|
|
4781
|
+
Choose follow-up tasks from AVAILABLE FOLLOW-UP EXTRACTORS. You may dispatch any listed extractor when the page map, current extraction summary, or quality evidence shows that the focused extraction is missing, generic, referential, or too thin. Do not invent extractor names.
|
|
4717
4782
|
|
|
4718
4783
|
Return JSON:
|
|
4719
4784
|
{
|
|
@@ -4725,7 +4790,7 @@ Return JSON:
|
|
|
4725
4790
|
]
|
|
4726
4791
|
}
|
|
4727
4792
|
|
|
4728
|
-
Use the page map to target follow-up extraction pages precisely. Prefer narrow, declaration/schedule-focused follow-up tasks over broad page ranges.
|
|
4793
|
+
Use the page map to target follow-up extraction pages precisely. Prefer narrow, declaration/schedule-focused follow-up tasks over broad page ranges. If no additional model work is likely to improve the extraction, return an empty additionalTasks array.
|
|
4729
4794
|
|
|
4730
4795
|
Respond with JSON only.`;
|
|
4731
4796
|
}
|
|
@@ -5260,6 +5325,7 @@ var SectionsSchema = z32.object({
|
|
|
5260
5325
|
"policy_form",
|
|
5261
5326
|
"endorsement",
|
|
5262
5327
|
"application",
|
|
5328
|
+
"covered_reason",
|
|
5263
5329
|
"exclusion",
|
|
5264
5330
|
"condition",
|
|
5265
5331
|
"definition",
|
|
@@ -5283,6 +5349,7 @@ For each section, classify its type:
|
|
|
5283
5349
|
- "policy_form" \u2014 named ISO or proprietary forms (e.g. CG 00 01, IL 00 17). All sections within a named form should be typed as "policy_form"
|
|
5284
5350
|
- "endorsement" \u2014 standalone endorsements modifying the base policy
|
|
5285
5351
|
- "application" \u2014 the insurance application or supplemental application
|
|
5352
|
+
- "covered_reason" \u2014 affirmative grants of coverage, covered causes of loss, covered perils, or named covered events
|
|
5286
5353
|
- "insuring_agreement" \u2014 the insuring agreement clause (only if standalone, not inside a policy_form)
|
|
5287
5354
|
- "exclusion", "condition", "definition" \u2014 for standalone sections only
|
|
5288
5355
|
- "schedule" \u2014 coverage or rating schedules
|
|
@@ -5460,6 +5527,53 @@ Return JSON only.`;
|
|
|
5460
5527
|
}
|
|
5461
5528
|
|
|
5462
5529
|
// src/prompts/extractors/index.ts
|
|
5530
|
+
function asRecord(data) {
|
|
5531
|
+
return data && typeof data === "object" ? data : void 0;
|
|
5532
|
+
}
|
|
5533
|
+
function getSections2(data) {
|
|
5534
|
+
const sections = asRecord(data)?.sections;
|
|
5535
|
+
return Array.isArray(sections) ? sections : [];
|
|
5536
|
+
}
|
|
5537
|
+
function isCoveredReasonsEmpty(data) {
|
|
5538
|
+
const record = asRecord(data);
|
|
5539
|
+
if (!record) return true;
|
|
5540
|
+
const coveredReasons = Array.isArray(record.coveredReasons) ? record.coveredReasons : Array.isArray(record.covered_reasons) ? record.covered_reasons : [];
|
|
5541
|
+
return coveredReasons.length === 0;
|
|
5542
|
+
}
|
|
5543
|
+
function isDefinitionsEmpty(data) {
|
|
5544
|
+
const definitions = asRecord(data)?.definitions;
|
|
5545
|
+
return !Array.isArray(definitions) || definitions.length === 0;
|
|
5546
|
+
}
|
|
5547
|
+
function sectionLooksLikeCoveredReason(section) {
|
|
5548
|
+
const type = String(section.type ?? "").toLowerCase();
|
|
5549
|
+
const title = String(section.title ?? "").toLowerCase();
|
|
5550
|
+
return type === "covered_reason" || title.includes("covered cause") || title.includes("covered reason") || title.includes("covered peril") || title.includes("named peril") || title.includes("insuring agreement");
|
|
5551
|
+
}
|
|
5552
|
+
function deriveCoveredReasonsFromSections(data) {
|
|
5553
|
+
const coveredReasons = getSections2(data).filter(sectionLooksLikeCoveredReason).map((section) => ({
|
|
5554
|
+
coverageName: String(section.coverageName ?? section.formTitle ?? section.title ?? "Covered Reasons"),
|
|
5555
|
+
title: typeof section.title === "string" ? section.title : void 0,
|
|
5556
|
+
content: String(section.content ?? ""),
|
|
5557
|
+
pageNumber: typeof section.pageStart === "number" ? section.pageStart : void 0,
|
|
5558
|
+
formNumber: typeof section.formNumber === "string" ? section.formNumber : void 0,
|
|
5559
|
+
formTitle: typeof section.formTitle === "string" ? section.formTitle : void 0,
|
|
5560
|
+
sectionRef: typeof section.sectionNumber === "string" ? section.sectionNumber : void 0,
|
|
5561
|
+
originalContent: typeof section.content === "string" ? section.content.slice(0, 500) : void 0
|
|
5562
|
+
})).filter((coveredReason) => coveredReason.content.trim().length > 0);
|
|
5563
|
+
return coveredReasons.length > 0 ? { coveredReasons } : void 0;
|
|
5564
|
+
}
|
|
5565
|
+
function deriveDefinitionsFromSections(data) {
|
|
5566
|
+
const definitions = getSections2(data).filter((section) => String(section.type ?? "").toLowerCase() === "definition").map((section) => ({
|
|
5567
|
+
term: String(section.title ?? "Definitions"),
|
|
5568
|
+
definition: String(section.content ?? ""),
|
|
5569
|
+
pageNumber: typeof section.pageStart === "number" ? section.pageStart : void 0,
|
|
5570
|
+
formNumber: typeof section.formNumber === "string" ? section.formNumber : void 0,
|
|
5571
|
+
formTitle: typeof section.formTitle === "string" ? section.formTitle : void 0,
|
|
5572
|
+
sectionRef: typeof section.sectionNumber === "string" ? section.sectionNumber : void 0,
|
|
5573
|
+
originalContent: typeof section.content === "string" ? section.content.slice(0, 500) : void 0
|
|
5574
|
+
})).filter((definition) => definition.definition.trim().length > 0);
|
|
5575
|
+
return definitions.length > 0 ? { definitions } : void 0;
|
|
5576
|
+
}
|
|
5463
5577
|
var EXTRACTORS = {
|
|
5464
5578
|
carrier_info: { buildPrompt: buildCarrierInfoPrompt, schema: CarrierInfoSchema, maxTokens: 2048 },
|
|
5465
5579
|
named_insured: { buildPrompt: buildNamedInsuredPrompt, schema: NamedInsuredSchema2, maxTokens: 2048 },
|
|
@@ -5472,12 +5586,36 @@ var EXTRACTORS = {
|
|
|
5472
5586
|
loss_history: { buildPrompt: buildLossHistoryPrompt, schema: LossHistorySchema, maxTokens: 4096 },
|
|
5473
5587
|
sections: { buildPrompt: buildSectionsPrompt, schema: SectionsSchema, maxTokens: 8192 },
|
|
5474
5588
|
supplementary: { buildPrompt: buildSupplementaryPrompt, schema: SupplementarySchema, maxTokens: 2048 },
|
|
5475
|
-
definitions: {
|
|
5476
|
-
|
|
5589
|
+
definitions: {
|
|
5590
|
+
buildPrompt: buildDefinitionsPrompt,
|
|
5591
|
+
schema: DefinitionsSchema,
|
|
5592
|
+
maxTokens: 8192,
|
|
5593
|
+
fallback: {
|
|
5594
|
+
extractorName: "sections",
|
|
5595
|
+
isEmpty: isDefinitionsEmpty,
|
|
5596
|
+
deriveFocusedResult: deriveDefinitionsFromSections
|
|
5597
|
+
}
|
|
5598
|
+
},
|
|
5599
|
+
covered_reasons: {
|
|
5600
|
+
buildPrompt: buildCoveredReasonsPrompt,
|
|
5601
|
+
schema: CoveredReasonsSchema,
|
|
5602
|
+
maxTokens: 8192,
|
|
5603
|
+
fallback: {
|
|
5604
|
+
extractorName: "sections",
|
|
5605
|
+
isEmpty: isCoveredReasonsEmpty,
|
|
5606
|
+
deriveFocusedResult: deriveCoveredReasonsFromSections
|
|
5607
|
+
}
|
|
5608
|
+
}
|
|
5477
5609
|
};
|
|
5478
5610
|
function getExtractor(name) {
|
|
5479
5611
|
return EXTRACTORS[name];
|
|
5480
5612
|
}
|
|
5613
|
+
function formatExtractorCatalogForPrompt() {
|
|
5614
|
+
return Object.entries(EXTRACTORS).map(([name, extractor]) => {
|
|
5615
|
+
const fallback = extractor.fallback ? `; fallback: ${extractor.fallback.extractorName}` : "";
|
|
5616
|
+
return `- ${name} (maxTokens: ${extractor.maxTokens ?? 4096}${fallback})`;
|
|
5617
|
+
}).join("\n");
|
|
5618
|
+
}
|
|
5481
5619
|
|
|
5482
5620
|
// src/extraction/resolve-referential.ts
|
|
5483
5621
|
import { z as z37 } from "zod";
|
|
@@ -5529,18 +5667,124 @@ Your task:
|
|
|
5529
5667
|
Return JSON only.`;
|
|
5530
5668
|
}
|
|
5531
5669
|
|
|
5532
|
-
// src/extraction/
|
|
5670
|
+
// src/extraction/heuristics.ts
|
|
5533
5671
|
function looksReferential(value) {
|
|
5534
5672
|
if (typeof value !== "string") return false;
|
|
5535
5673
|
const normalized = value.toLowerCase();
|
|
5536
5674
|
return normalized.includes("shown in the declarations") || normalized.includes("shown in declarations") || normalized.includes("shown in the schedule") || normalized.includes("as stated") || normalized.includes("if applicable");
|
|
5537
5675
|
}
|
|
5676
|
+
function looksCoveredReasonSection(section) {
|
|
5677
|
+
const title = String(section.title ?? "").toLowerCase();
|
|
5678
|
+
const type = String(section.type ?? "").toLowerCase();
|
|
5679
|
+
return type === "covered_reason" || title.includes("covered cause") || title.includes("covered reason") || title.includes("covered peril");
|
|
5680
|
+
}
|
|
5681
|
+
|
|
5682
|
+
// src/extraction/referential-workflow.ts
|
|
5683
|
+
function normalizeText(value) {
|
|
5684
|
+
return typeof value === "string" ? value.trim().toLowerCase() : "";
|
|
5685
|
+
}
|
|
5686
|
+
function containsTarget(value, target) {
|
|
5687
|
+
const normalizedValue = normalizeText(value);
|
|
5688
|
+
return Boolean(normalizedValue && target && normalizedValue.includes(target));
|
|
5689
|
+
}
|
|
5690
|
+
function pageRangeFrom(startPage, endPage) {
|
|
5691
|
+
if (typeof startPage !== "number" || !Number.isFinite(startPage) || startPage <= 0) {
|
|
5692
|
+
return void 0;
|
|
5693
|
+
}
|
|
5694
|
+
const normalizedEnd = typeof endPage === "number" && Number.isFinite(endPage) && endPage >= startPage ? endPage : startPage;
|
|
5695
|
+
return { startPage, endPage: normalizedEnd };
|
|
5696
|
+
}
|
|
5697
|
+
function parseReferentialTarget(rawTarget) {
|
|
5698
|
+
const raw = rawTarget?.trim() || "unknown";
|
|
5699
|
+
const normalized = raw.toLowerCase();
|
|
5700
|
+
if (normalized === "unknown") return { raw, normalized, kind: "unknown" };
|
|
5701
|
+
if (/declarations?|dec\b|decs\b/.test(normalized)) return { raw, normalized, kind: "declarations" };
|
|
5702
|
+
if (/schedule|scheduled/.test(normalized)) return { raw, normalized, kind: "schedule" };
|
|
5703
|
+
if (/\bitem\b/.test(normalized)) return { raw, normalized, kind: "item" };
|
|
5704
|
+
if (/premises?|location|building/.test(normalized)) return { raw, normalized, kind: "premises" };
|
|
5705
|
+
if (/\bsection\b/.test(normalized)) return { raw, normalized, kind: "section" };
|
|
5706
|
+
if (/policy|coverage\s+part|coverage\s+form/.test(normalized)) return { raw, normalized, kind: "policy" };
|
|
5707
|
+
return { raw, normalized, kind: "unknown" };
|
|
5708
|
+
}
|
|
5709
|
+
function findLocalReferentialPages(params) {
|
|
5710
|
+
const targetLower = params.referenceTarget.toLowerCase();
|
|
5711
|
+
for (const section of params.sections) {
|
|
5712
|
+
if (containsTarget(section.title, targetLower)) {
|
|
5713
|
+
const range = pageRangeFrom(section.pageStart, section.pageEnd);
|
|
5714
|
+
if (range) return range;
|
|
5715
|
+
}
|
|
5716
|
+
}
|
|
5717
|
+
for (const form of params.formInventory) {
|
|
5718
|
+
const titleMatch = containsTarget(form.title, targetLower);
|
|
5719
|
+
const typeMatch = containsTarget(form.formType, targetLower);
|
|
5720
|
+
const numberMatch = containsTarget(form.formNumber, targetLower);
|
|
5721
|
+
if (titleMatch || typeMatch || numberMatch) {
|
|
5722
|
+
const range = pageRangeFrom(form.pageStart, form.pageEnd);
|
|
5723
|
+
if (range) return range;
|
|
5724
|
+
}
|
|
5725
|
+
}
|
|
5726
|
+
return void 0;
|
|
5727
|
+
}
|
|
5728
|
+
function findDeclarationsSchedulePages(parsedTarget, formInventory) {
|
|
5729
|
+
for (const form of formInventory) {
|
|
5730
|
+
const formType = normalizeText(form.formType);
|
|
5731
|
+
const title = normalizeText(form.title);
|
|
5732
|
+
const matchesDeclarations = formType === "declarations" || /declarations?|dec\b|decs\b/.test(title);
|
|
5733
|
+
const matchesSchedule = /schedule|scheduled|coverage/.test(title) || formType === "coverage";
|
|
5734
|
+
const shouldUse = parsedTarget.kind === "declarations" ? matchesDeclarations : parsedTarget.kind === "schedule" || parsedTarget.kind === "item" || parsedTarget.kind === "premises" ? matchesSchedule || matchesDeclarations : parsedTarget.kind === "policy" ? matchesDeclarations || matchesSchedule : false;
|
|
5735
|
+
if (shouldUse) {
|
|
5736
|
+
const range = pageRangeFrom(form.pageStart, form.pageEnd);
|
|
5737
|
+
if (range) return range;
|
|
5738
|
+
}
|
|
5739
|
+
}
|
|
5740
|
+
return void 0;
|
|
5741
|
+
}
|
|
5742
|
+
function findSectionPages(parsedTarget, sections) {
|
|
5743
|
+
for (const section of sections) {
|
|
5744
|
+
const title = normalizeText(section.title);
|
|
5745
|
+
const type = normalizeText(section.type);
|
|
5746
|
+
const matchesKind = parsedTarget.kind === "declarations" && (type === "declarations" || /declarations?/.test(title)) || parsedTarget.kind === "schedule" && (type === "schedule" || /schedule|scheduled/.test(title)) || parsedTarget.kind === "premises" && /premises?|location|building/.test(title) || parsedTarget.kind === "item" && /\bitem\b|schedule|scheduled/.test(title) || parsedTarget.kind === "section" && containsTarget(title, parsedTarget.normalized);
|
|
5747
|
+
if (matchesKind) {
|
|
5748
|
+
const range = pageRangeFrom(section.pageStart, section.pageEnd);
|
|
5749
|
+
if (range) return range;
|
|
5750
|
+
}
|
|
5751
|
+
}
|
|
5752
|
+
return void 0;
|
|
5753
|
+
}
|
|
5754
|
+
function decideReferentialResolutionAction(params) {
|
|
5755
|
+
if (params.localPageRange) {
|
|
5756
|
+
return { kind: "lookup_pages", source: "local", pageRange: params.localPageRange };
|
|
5757
|
+
}
|
|
5758
|
+
const parsedTarget = parseReferentialTarget(params.referenceTarget);
|
|
5759
|
+
const declarationsScheduleRange = findDeclarationsSchedulePages(parsedTarget, params.formInventory);
|
|
5760
|
+
if (declarationsScheduleRange) {
|
|
5761
|
+
return {
|
|
5762
|
+
kind: "lookup_pages",
|
|
5763
|
+
source: "declarations_schedule",
|
|
5764
|
+
pageRange: declarationsScheduleRange
|
|
5765
|
+
};
|
|
5766
|
+
}
|
|
5767
|
+
const sectionRange = findSectionPages(parsedTarget, params.sections);
|
|
5768
|
+
if (sectionRange) {
|
|
5769
|
+
return { kind: "lookup_pages", source: "sections", pageRange: sectionRange };
|
|
5770
|
+
}
|
|
5771
|
+
if (parsedTarget.kind === "unknown") {
|
|
5772
|
+
return { kind: "skip", reason: "no concrete reference target" };
|
|
5773
|
+
}
|
|
5774
|
+
return { kind: "page_location" };
|
|
5775
|
+
}
|
|
5776
|
+
|
|
5777
|
+
// src/extraction/resolve-referential.ts
|
|
5538
5778
|
function parseReferenceTarget(text) {
|
|
5539
5779
|
if (typeof text !== "string") return void 0;
|
|
5540
5780
|
const normalized = text.trim();
|
|
5541
5781
|
if (!normalized) return void 0;
|
|
5542
5782
|
const sectionMatch = normalized.match(/\b(Section\s+\d+[A-Za-z]?)/i);
|
|
5543
5783
|
if (sectionMatch) return sectionMatch[1];
|
|
5784
|
+
const itemMatch = normalized.match(/\b(Item\s+\d+[A-Za-z]?)/i);
|
|
5785
|
+
if (itemMatch) return itemMatch[1];
|
|
5786
|
+
const premisesMatch = normalized.match(/\b(Premises?(?:\s+No\.?\s*\d+[A-Za-z]?|\s+\d+[A-Za-z]?)?)/i);
|
|
5787
|
+
if (premisesMatch) return premisesMatch[1].trim();
|
|
5544
5788
|
if (/declarations/i.test(normalized)) return "Declarations";
|
|
5545
5789
|
const scheduleMatch = normalized.match(/\b(Schedule(?:\s+of\s+[A-Za-z ]+)?)/i);
|
|
5546
5790
|
if (scheduleMatch) return scheduleMatch[1].trim();
|
|
@@ -5566,26 +5810,31 @@ async function findReferencedPages(params) {
|
|
|
5566
5810
|
pageCount,
|
|
5567
5811
|
generateObject,
|
|
5568
5812
|
providerOptions,
|
|
5813
|
+
trackUsage,
|
|
5569
5814
|
log
|
|
5570
5815
|
} = params;
|
|
5571
|
-
const
|
|
5572
|
-
|
|
5573
|
-
|
|
5574
|
-
|
|
5575
|
-
|
|
5576
|
-
|
|
5577
|
-
|
|
5578
|
-
|
|
5816
|
+
const localPageRange = findLocalReferentialPages({
|
|
5817
|
+
referenceTarget,
|
|
5818
|
+
sections,
|
|
5819
|
+
formInventory
|
|
5820
|
+
});
|
|
5821
|
+
const action = decideReferentialResolutionAction({
|
|
5822
|
+
referenceTarget,
|
|
5823
|
+
sections,
|
|
5824
|
+
formInventory,
|
|
5825
|
+
localPageRange
|
|
5826
|
+
});
|
|
5827
|
+
if (action.kind === "lookup_pages") {
|
|
5828
|
+
await log?.(
|
|
5829
|
+
`Referential target "${referenceTarget}" resolved to pages ${action.pageRange.startPage}-${action.pageRange.endPage} via ${action.source}.`
|
|
5830
|
+
);
|
|
5831
|
+
return action.pageRange;
|
|
5579
5832
|
}
|
|
5580
|
-
|
|
5581
|
-
|
|
5582
|
-
|
|
5583
|
-
|
|
5584
|
-
|
|
5585
|
-
startPage: form.pageStart,
|
|
5586
|
-
endPage: form.pageEnd ?? form.pageStart
|
|
5587
|
-
};
|
|
5588
|
-
}
|
|
5833
|
+
if (action.kind === "skip") {
|
|
5834
|
+
await log?.(
|
|
5835
|
+
`Skipping referential target "${referenceTarget}": ${action.reason}.`
|
|
5836
|
+
);
|
|
5837
|
+
return void 0;
|
|
5589
5838
|
}
|
|
5590
5839
|
try {
|
|
5591
5840
|
const result = await safeGenerateObject(
|
|
@@ -5613,6 +5862,7 @@ Return JSON only.`,
|
|
|
5613
5862
|
)
|
|
5614
5863
|
}
|
|
5615
5864
|
);
|
|
5865
|
+
trackUsage?.(result.usage);
|
|
5616
5866
|
if (result.object.startPage > 0 && result.object.endPage > 0) {
|
|
5617
5867
|
return {
|
|
5618
5868
|
startPage: result.object.startPage,
|
|
@@ -5670,7 +5920,9 @@ async function resolveReferentialCoverages(params) {
|
|
|
5670
5920
|
for (let i = 0; i < referentialCoverages.length; i++) {
|
|
5671
5921
|
const cov = referentialCoverages[i];
|
|
5672
5922
|
const refString = (looksReferential(cov.limit) ? cov.limit : void 0) ?? (looksReferential(cov.deductible) ? cov.deductible : void 0) ?? cov.limit ?? "";
|
|
5673
|
-
const
|
|
5923
|
+
const sectionRef = typeof cov.sectionRef === "string" ? cov.sectionRef : "";
|
|
5924
|
+
const parsedTarget = parseReferenceTarget(refString) ?? parseReferenceTarget(sectionRef) ?? sectionRef;
|
|
5925
|
+
const target = parsedTarget || "unknown";
|
|
5674
5926
|
const group = targetGroups.get(target) ?? [];
|
|
5675
5927
|
group.push({ coverage: cov, index: i });
|
|
5676
5928
|
targetGroups.set(target, group);
|
|
@@ -5694,6 +5946,7 @@ async function resolveReferentialCoverages(params) {
|
|
|
5694
5946
|
pageCount,
|
|
5695
5947
|
generateObject,
|
|
5696
5948
|
providerOptions,
|
|
5949
|
+
trackUsage,
|
|
5697
5950
|
log
|
|
5698
5951
|
});
|
|
5699
5952
|
if (!pageRange) {
|
|
@@ -5811,6 +6064,78 @@ async function resolveReferentialCoverages(params) {
|
|
|
5811
6064
|
};
|
|
5812
6065
|
}
|
|
5813
6066
|
|
|
6067
|
+
// src/extraction/focused-dispatch.ts
|
|
6068
|
+
async function runFocusedExtractorWithFallback(params) {
|
|
6069
|
+
const {
|
|
6070
|
+
task,
|
|
6071
|
+
pdfInput,
|
|
6072
|
+
generateObject,
|
|
6073
|
+
convertPdfToImages,
|
|
6074
|
+
providerOptions,
|
|
6075
|
+
trackUsage,
|
|
6076
|
+
log
|
|
6077
|
+
} = params;
|
|
6078
|
+
const ext = getExtractor(task.extractorName);
|
|
6079
|
+
if (!ext) {
|
|
6080
|
+
await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
|
|
6081
|
+
return null;
|
|
6082
|
+
}
|
|
6083
|
+
try {
|
|
6084
|
+
const result = await runExtractor({
|
|
6085
|
+
name: task.extractorName,
|
|
6086
|
+
prompt: ext.buildPrompt(),
|
|
6087
|
+
schema: ext.schema,
|
|
6088
|
+
pdfInput,
|
|
6089
|
+
startPage: task.startPage,
|
|
6090
|
+
endPage: task.endPage,
|
|
6091
|
+
generateObject,
|
|
6092
|
+
convertPdfToImages,
|
|
6093
|
+
maxTokens: ext.maxTokens ?? 4096,
|
|
6094
|
+
providerOptions
|
|
6095
|
+
});
|
|
6096
|
+
trackUsage(result.usage);
|
|
6097
|
+
if (!ext.fallback?.isEmpty(result.data)) {
|
|
6098
|
+
return result;
|
|
6099
|
+
}
|
|
6100
|
+
if (!ext.fallback) {
|
|
6101
|
+
return result;
|
|
6102
|
+
}
|
|
6103
|
+
} catch (error) {
|
|
6104
|
+
await log?.(`Extractor ${task.extractorName} failed: ${error}`);
|
|
6105
|
+
if (!ext.fallback) {
|
|
6106
|
+
return null;
|
|
6107
|
+
}
|
|
6108
|
+
}
|
|
6109
|
+
const fallbackExt = getExtractor(ext.fallback.extractorName);
|
|
6110
|
+
if (!fallbackExt) return null;
|
|
6111
|
+
await log?.(
|
|
6112
|
+
`Extractor ${task.extractorName} produced no usable records; trying ${ext.fallback.extractorName} fallback for pages ${task.startPage}-${task.endPage}`
|
|
6113
|
+
);
|
|
6114
|
+
try {
|
|
6115
|
+
const fallbackResult = await runExtractor({
|
|
6116
|
+
name: ext.fallback.extractorName,
|
|
6117
|
+
prompt: fallbackExt.buildPrompt(),
|
|
6118
|
+
schema: fallbackExt.schema,
|
|
6119
|
+
pdfInput,
|
|
6120
|
+
startPage: task.startPage,
|
|
6121
|
+
endPage: task.endPage,
|
|
6122
|
+
generateObject,
|
|
6123
|
+
convertPdfToImages,
|
|
6124
|
+
maxTokens: fallbackExt.maxTokens ?? 4096,
|
|
6125
|
+
providerOptions
|
|
6126
|
+
});
|
|
6127
|
+
trackUsage(fallbackResult.usage);
|
|
6128
|
+
const focusedData = ext.fallback.deriveFocusedResult(fallbackResult.data);
|
|
6129
|
+
return focusedData ? [
|
|
6130
|
+
fallbackResult,
|
|
6131
|
+
{ name: task.extractorName, data: focusedData, usage: void 0 }
|
|
6132
|
+
] : fallbackResult;
|
|
6133
|
+
} catch (fallbackError) {
|
|
6134
|
+
await log?.(`${ext.fallback.extractorName} fallback for ${task.extractorName} failed: ${fallbackError}`);
|
|
6135
|
+
return null;
|
|
6136
|
+
}
|
|
6137
|
+
}
|
|
6138
|
+
|
|
5814
6139
|
// src/core/quality.ts
|
|
5815
6140
|
function evaluateQualityGate(params) {
|
|
5816
6141
|
const { issues, hasRoundWarnings = false } = params;
|
|
@@ -5847,11 +6172,6 @@ function addFormEntry(inventory, formNumber, source, extra) {
|
|
|
5847
6172
|
sources: [source]
|
|
5848
6173
|
});
|
|
5849
6174
|
}
|
|
5850
|
-
function looksReferential2(value) {
|
|
5851
|
-
if (typeof value !== "string") return false;
|
|
5852
|
-
const normalized = value.toLowerCase();
|
|
5853
|
-
return normalized.includes("shown in the declarations") || normalized.includes("shown in declarations") || normalized.includes("shown in the schedule") || normalized.includes("as stated") || normalized.includes("if applicable");
|
|
5854
|
-
}
|
|
5855
6175
|
function looksTocArtifact(value) {
|
|
5856
6176
|
if (typeof value !== "string") return false;
|
|
5857
6177
|
return /\.{4,}\d{1,3}$/.test(value.trim()) || /^\d+\.\s+[A-Z][\s\S]*\.{3,}\d{1,3}$/.test(value.trim());
|
|
@@ -5878,11 +6198,7 @@ function buildExtractionReviewReport(params) {
|
|
|
5878
6198
|
const definitionsResult = memory.get("definitions");
|
|
5879
6199
|
const coveredReasonsResult = memory.get("covered_reasons");
|
|
5880
6200
|
const definitions = Array.isArray(definitionsResult?.definitions) ? definitionsResult.definitions : sections.filter((section) => section.type === "definition");
|
|
5881
|
-
const coveredReasons = Array.isArray(coveredReasonsResult?.coveredReasons) ? coveredReasonsResult.coveredReasons : Array.isArray(coveredReasonsResult?.covered_reasons) ? coveredReasonsResult.covered_reasons : sections.filter(
|
|
5882
|
-
const title = String(section.title ?? "").toLowerCase();
|
|
5883
|
-
const type = String(section.type ?? "").toLowerCase();
|
|
5884
|
-
return type === "covered_reason" || title.includes("covered cause") || title.includes("covered reason") || title.includes("covered peril");
|
|
5885
|
-
});
|
|
6201
|
+
const coveredReasons = Array.isArray(coveredReasonsResult?.coveredReasons) ? coveredReasonsResult.coveredReasons : Array.isArray(coveredReasonsResult?.covered_reasons) ? coveredReasonsResult.covered_reasons : sections.filter(looksCoveredReasonSection);
|
|
5886
6202
|
const mappedDefinitions = params.pageAssignments.some((assignment) => assignment.extractorNames.includes("definitions"));
|
|
5887
6203
|
const mappedCoveredReasons = params.pageAssignments.some((assignment) => assignment.extractorNames.includes("covered_reasons"));
|
|
5888
6204
|
if (mappedDefinitions && definitions.length === 0) {
|
|
@@ -5997,7 +6313,7 @@ function buildExtractionReviewReport(params) {
|
|
|
5997
6313
|
itemName: typeof coverage.name === "string" ? coverage.name : void 0
|
|
5998
6314
|
});
|
|
5999
6315
|
}
|
|
6000
|
-
if (
|
|
6316
|
+
if (looksReferential(coverage.limit) || looksReferential(coverage.deductible)) {
|
|
6001
6317
|
deterministicIssues.push({
|
|
6002
6318
|
code: "coverage_referential_value",
|
|
6003
6319
|
severity: "warning",
|
|
@@ -6147,7 +6463,7 @@ function buildExtractionReviewReport(params) {
|
|
|
6147
6463
|
itemName
|
|
6148
6464
|
});
|
|
6149
6465
|
}
|
|
6150
|
-
if (
|
|
6466
|
+
if (looksReferential(content) || looksReferential(coveredReason.reason)) {
|
|
6151
6467
|
deterministicIssues.push({
|
|
6152
6468
|
code: "covered_reason_referential_value",
|
|
6153
6469
|
severity: "warning",
|
|
@@ -6208,6 +6524,134 @@ function toReviewRoundRecord(round, review) {
|
|
|
6208
6524
|
};
|
|
6209
6525
|
}
|
|
6210
6526
|
|
|
6527
|
+
// src/extraction/planning.ts
|
|
6528
|
+
function normalizePageAssignments(pageAssignments, formInventory) {
|
|
6529
|
+
const pageFormTypes = /* @__PURE__ */ new Map();
|
|
6530
|
+
if (formInventory) {
|
|
6531
|
+
for (const form of formInventory.forms) {
|
|
6532
|
+
if (form.pageStart != null) {
|
|
6533
|
+
const end = form.pageEnd ?? form.pageStart;
|
|
6534
|
+
for (let p = form.pageStart; p <= end; p += 1) {
|
|
6535
|
+
const types = pageFormTypes.get(p) ?? /* @__PURE__ */ new Set();
|
|
6536
|
+
types.add(form.formType);
|
|
6537
|
+
pageFormTypes.set(p, types);
|
|
6538
|
+
}
|
|
6539
|
+
}
|
|
6540
|
+
}
|
|
6541
|
+
}
|
|
6542
|
+
return pageAssignments.map((assignment) => {
|
|
6543
|
+
let extractorNames = [...new Set(
|
|
6544
|
+
(assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"]).filter(Boolean)
|
|
6545
|
+
)];
|
|
6546
|
+
const hasDeclarations = extractorNames.includes("declarations");
|
|
6547
|
+
const hasConditions = extractorNames.includes("conditions");
|
|
6548
|
+
const hasExclusions = extractorNames.includes("exclusions");
|
|
6549
|
+
const hasEndorsements = extractorNames.includes("endorsements");
|
|
6550
|
+
const looksLikeScheduleValues = assignment.hasScheduleValues === true;
|
|
6551
|
+
const roleBlocksCoverageLimits = assignment.pageRole === "policy_form" || assignment.pageRole === "condition_exclusion_form" || assignment.pageRole === "endorsement_form";
|
|
6552
|
+
const inventoryTypes = pageFormTypes.get(assignment.localPageNumber);
|
|
6553
|
+
const inventoryBlocksCoverageLimits = inventoryTypes != null && !looksLikeScheduleValues && !hasDeclarations && (inventoryTypes.has("endorsement") || inventoryTypes.has("notice") || inventoryTypes.has("application"));
|
|
6554
|
+
if (extractorNames.includes("coverage_limits")) {
|
|
6555
|
+
const shouldDropCoverageLimits = inventoryBlocksCoverageLimits || !looksLikeScheduleValues && roleBlocksCoverageLimits || !hasDeclarations && !looksLikeScheduleValues && (hasConditions || hasExclusions) || !hasDeclarations && !looksLikeScheduleValues && hasEndorsements;
|
|
6556
|
+
if (shouldDropCoverageLimits) {
|
|
6557
|
+
extractorNames = extractorNames.filter((name) => name !== "coverage_limits");
|
|
6558
|
+
}
|
|
6559
|
+
}
|
|
6560
|
+
if (inventoryTypes?.has("endorsement") && !extractorNames.includes("endorsements")) {
|
|
6561
|
+
extractorNames = [...extractorNames, "endorsements"];
|
|
6562
|
+
}
|
|
6563
|
+
if (extractorNames.length === 0) {
|
|
6564
|
+
extractorNames = ["sections"];
|
|
6565
|
+
}
|
|
6566
|
+
return {
|
|
6567
|
+
...assignment,
|
|
6568
|
+
extractorNames
|
|
6569
|
+
};
|
|
6570
|
+
});
|
|
6571
|
+
}
|
|
6572
|
+
function buildTemplateHints(primaryType, documentType, pageCount, template) {
|
|
6573
|
+
return [
|
|
6574
|
+
`Document type: ${primaryType} ${documentType}`,
|
|
6575
|
+
`Expected sections: ${template.expectedSections.join(", ")}`,
|
|
6576
|
+
`Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
|
|
6577
|
+
`Total pages: ${pageCount}`
|
|
6578
|
+
].join("\n");
|
|
6579
|
+
}
|
|
6580
|
+
function groupContiguousPages(pages) {
|
|
6581
|
+
if (pages.length === 0) return [];
|
|
6582
|
+
const sorted = [...new Set(pages)].sort((a, b) => a - b);
|
|
6583
|
+
const ranges = [];
|
|
6584
|
+
let start = sorted[0];
|
|
6585
|
+
let previous = sorted[0];
|
|
6586
|
+
for (let i = 1; i < sorted.length; i += 1) {
|
|
6587
|
+
const current = sorted[i];
|
|
6588
|
+
if (current === previous + 1) {
|
|
6589
|
+
previous = current;
|
|
6590
|
+
continue;
|
|
6591
|
+
}
|
|
6592
|
+
ranges.push({ startPage: start, endPage: previous });
|
|
6593
|
+
start = current;
|
|
6594
|
+
previous = current;
|
|
6595
|
+
}
|
|
6596
|
+
ranges.push({ startPage: start, endPage: previous });
|
|
6597
|
+
return ranges;
|
|
6598
|
+
}
|
|
6599
|
+
function buildPlanFromPageAssignments(pageAssignments, pageCount, formInventory) {
|
|
6600
|
+
const extractorPages = /* @__PURE__ */ new Map();
|
|
6601
|
+
for (const assignment of pageAssignments) {
|
|
6602
|
+
const extractors = assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"];
|
|
6603
|
+
for (const extractorName of extractors) {
|
|
6604
|
+
extractorPages.set(extractorName, [...extractorPages.get(extractorName) ?? [], assignment.localPageNumber]);
|
|
6605
|
+
}
|
|
6606
|
+
}
|
|
6607
|
+
const coveredPages = /* @__PURE__ */ new Set();
|
|
6608
|
+
for (const pages of extractorPages.values()) {
|
|
6609
|
+
for (const page of pages) coveredPages.add(page);
|
|
6610
|
+
}
|
|
6611
|
+
for (let page = 1; page <= pageCount; page += 1) {
|
|
6612
|
+
if (!coveredPages.has(page)) {
|
|
6613
|
+
extractorPages.set("sections", [...extractorPages.get("sections") ?? [], page]);
|
|
6614
|
+
}
|
|
6615
|
+
}
|
|
6616
|
+
const contextualExtractors = /* @__PURE__ */ new Set(["conditions", "covered_reasons", "definitions", "exclusions", "endorsements"]);
|
|
6617
|
+
const contextualForms = (formInventory?.forms ?? []).filter(
|
|
6618
|
+
(form) => form.pageStart != null && (form.pageEnd ?? form.pageStart) != null
|
|
6619
|
+
);
|
|
6620
|
+
const expandPagesToFormRanges = (extractorName, pages) => {
|
|
6621
|
+
if (!contextualExtractors.has(extractorName)) return pages;
|
|
6622
|
+
const expanded = new Set(pages);
|
|
6623
|
+
for (const page of pages) {
|
|
6624
|
+
for (const form of contextualForms) {
|
|
6625
|
+
const pageStart = form.pageStart;
|
|
6626
|
+
const pageEnd = form.pageEnd ?? form.pageStart;
|
|
6627
|
+
const formType = form.formType;
|
|
6628
|
+
const supportsContextualExpansion = extractorName === "endorsements" ? formType === "endorsement" : formType === "coverage" || formType === "endorsement";
|
|
6629
|
+
if (!supportsContextualExpansion) continue;
|
|
6630
|
+
if (page < pageStart || page > pageEnd) continue;
|
|
6631
|
+
for (let current = pageStart; current <= pageEnd; current += 1) {
|
|
6632
|
+
expanded.add(current);
|
|
6633
|
+
}
|
|
6634
|
+
}
|
|
6635
|
+
}
|
|
6636
|
+
return [...expanded].sort((a, b) => a - b);
|
|
6637
|
+
};
|
|
6638
|
+
const tasks = [...extractorPages.entries()].flatMap(
|
|
6639
|
+
([extractorName, pages]) => groupContiguousPages(expandPagesToFormRanges(extractorName, pages)).map(({ startPage, endPage }) => ({
|
|
6640
|
+
extractorName,
|
|
6641
|
+
startPage,
|
|
6642
|
+
endPage,
|
|
6643
|
+
description: `Page-mapped ${extractorName} extraction for pages ${startPage}-${endPage}`
|
|
6644
|
+
}))
|
|
6645
|
+
).sort((a, b) => a.startPage - b.startPage || a.extractorName.localeCompare(b.extractorName));
|
|
6646
|
+
return {
|
|
6647
|
+
tasks,
|
|
6648
|
+
pageMap: [...extractorPages.entries()].map(([section, pages]) => ({
|
|
6649
|
+
section,
|
|
6650
|
+
pages: `pages ${[...new Set(pages)].sort((a, b) => a - b).join(", ")}`
|
|
6651
|
+
}))
|
|
6652
|
+
};
|
|
6653
|
+
}
|
|
6654
|
+
|
|
6211
6655
|
// src/extraction/coordinator.ts
|
|
6212
6656
|
function createExtractor(config) {
|
|
6213
6657
|
const {
|
|
@@ -6224,6 +6668,7 @@ function createExtractor(config) {
|
|
|
6224
6668
|
onCheckpointSave
|
|
6225
6669
|
} = config;
|
|
6226
6670
|
const limit = pLimit(concurrency);
|
|
6671
|
+
const extractorCatalog = formatExtractorCatalogForPrompt();
|
|
6227
6672
|
let totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
6228
6673
|
let modelCalls = 0;
|
|
6229
6674
|
let callsWithUsage = 0;
|
|
@@ -6244,43 +6689,56 @@ function createExtractor(config) {
|
|
|
6244
6689
|
memory.set(name, mergeExtractorResult(name, existing, data));
|
|
6245
6690
|
}
|
|
6246
6691
|
function summarizeExtraction(memory) {
|
|
6247
|
-
const
|
|
6248
|
-
const
|
|
6249
|
-
const
|
|
6250
|
-
const
|
|
6251
|
-
const
|
|
6252
|
-
const
|
|
6253
|
-
const
|
|
6254
|
-
const
|
|
6255
|
-
const
|
|
6256
|
-
const definitionCount = Array.isArray(definitionsResult?.definitions) ? definitionsResult.definitions.length : sections.filter((section) => section.type === "definition").length;
|
|
6257
|
-
const coveredReasonCount = Array.isArray(coveredReasonsResult?.coveredReasons) ? coveredReasonsResult.coveredReasons.length : Array.isArray(coveredReasonsResult?.covered_reasons) ? coveredReasonsResult.covered_reasons.length : sections.filter((section) => {
|
|
6258
|
-
const title = String(section.title ?? "").toLowerCase();
|
|
6259
|
-
const type = String(section.type ?? "").toLowerCase();
|
|
6260
|
-
return type === "covered_reason" || title.includes("covered cause") || title.includes("covered reason") || title.includes("covered peril");
|
|
6261
|
-
}).length;
|
|
6262
|
-
const coverageSummary = Array.isArray(coverageResult?.coverages) ? coverageResult.coverages.slice(0, 12).map((coverage) => ({
|
|
6692
|
+
const declarationResult = readMemoryRecord(memory, "declarations");
|
|
6693
|
+
const endorsements = readRecordArray(readMemoryRecord(memory, "endorsements"), "endorsements") ?? [];
|
|
6694
|
+
const exclusions = readRecordArray(readMemoryRecord(memory, "exclusions"), "exclusions") ?? [];
|
|
6695
|
+
const conditions = readRecordArray(readMemoryRecord(memory, "conditions"), "conditions") ?? [];
|
|
6696
|
+
const sections = getSections(memory) ?? [];
|
|
6697
|
+
const definitions = getDefinitions(memory) ?? sections.filter((section) => section.type === "definition");
|
|
6698
|
+
const coveredReasons = getCoveredReasons(memory) ?? sections.filter(looksCoveredReasonSection);
|
|
6699
|
+
const coverages = getCoverageLimitCoverages(memory);
|
|
6700
|
+
const coverageSummary = coverages.slice(0, 12).map((coverage) => ({
|
|
6263
6701
|
name: coverage.name,
|
|
6264
6702
|
limit: coverage.limit,
|
|
6265
6703
|
deductible: coverage.deductible,
|
|
6266
6704
|
formNumber: coverage.formNumber
|
|
6267
|
-
}))
|
|
6705
|
+
}));
|
|
6268
6706
|
return JSON.stringify({
|
|
6269
6707
|
extractedKeys: [...memory.keys()].filter((key) => key !== "classify"),
|
|
6270
6708
|
declarationFieldCount: Array.isArray(declarationResult?.fields) ? declarationResult.fields.length : 0,
|
|
6271
|
-
coverageCount:
|
|
6709
|
+
coverageCount: coverages.length,
|
|
6272
6710
|
coverageSamples: coverageSummary,
|
|
6273
|
-
endorsementCount:
|
|
6274
|
-
exclusionCount:
|
|
6275
|
-
conditionCount:
|
|
6276
|
-
definitionCount,
|
|
6277
|
-
coveredReasonCount,
|
|
6711
|
+
endorsementCount: endorsements.length,
|
|
6712
|
+
exclusionCount: exclusions.length,
|
|
6713
|
+
conditionCount: conditions.length,
|
|
6714
|
+
definitionCount: definitions.length,
|
|
6715
|
+
coveredReasonCount: coveredReasons.length,
|
|
6278
6716
|
sectionCount: sections.length
|
|
6279
6717
|
}, null, 2);
|
|
6280
6718
|
}
|
|
6719
|
+
function textIncludesSupplementarySignal(value) {
|
|
6720
|
+
if (typeof value !== "string") return false;
|
|
6721
|
+
return /\b(supplementary|regulatory|department of insurance|ombudsman|complaint|claim|claims|contact|phone|email|cancellation|cancelled|nonrenewal|non-renewal|non renew|notice|governing law|jurisdiction|third[- ]party administrator|tpa)\b/i.test(value);
|
|
6722
|
+
}
|
|
6723
|
+
function hasSupplementaryExtractionSignal(pageAssignments, formInventory, memory) {
|
|
6724
|
+
const hasPageSignal = pageAssignments.some(
|
|
6725
|
+
(assignment) => assignment.pageRole === "supplementary" || assignment.extractorNames.includes("supplementary") || textIncludesSupplementarySignal(assignment.notes)
|
|
6726
|
+
);
|
|
6727
|
+
if (hasPageSignal) return true;
|
|
6728
|
+
const hasFormSignal = (formInventory?.forms ?? []).some(
|
|
6729
|
+
(form) => form.formType === "notice" || textIncludesSupplementarySignal(form.title) || textIncludesSupplementarySignal(form.formNumber)
|
|
6730
|
+
);
|
|
6731
|
+
if (hasFormSignal) return true;
|
|
6732
|
+
const likelySupplementaryKeys = ["sections", "conditions", "endorsements", "exclusions"];
|
|
6733
|
+
return likelySupplementaryKeys.some((key) => {
|
|
6734
|
+
const value = memory.get(key);
|
|
6735
|
+
if (!value) return false;
|
|
6736
|
+
return textIncludesSupplementarySignal(JSON.stringify(value));
|
|
6737
|
+
});
|
|
6738
|
+
}
|
|
6281
6739
|
function buildAlreadyExtractedSummary(memory) {
|
|
6282
6740
|
const lines = [];
|
|
6283
|
-
const declarationResult = memory
|
|
6741
|
+
const declarationResult = readMemoryRecord(memory, "declarations");
|
|
6284
6742
|
if (Array.isArray(declarationResult?.fields)) {
|
|
6285
6743
|
for (const field of declarationResult.fields) {
|
|
6286
6744
|
if (field.key && field.value) {
|
|
@@ -6289,20 +6747,17 @@ function createExtractor(config) {
|
|
|
6289
6747
|
}
|
|
6290
6748
|
}
|
|
6291
6749
|
}
|
|
6292
|
-
const
|
|
6293
|
-
|
|
6294
|
-
|
|
6295
|
-
const parts = [cov.name, cov.limit && `limit=${cov.limit}`, cov.deductible && `deductible=${cov.deductible}`].filter(Boolean);
|
|
6296
|
-
if (parts.length > 0) lines.push(`- coverage: ${parts.join(", ")}`);
|
|
6297
|
-
}
|
|
6750
|
+
for (const cov of getCoverageLimitCoverages(memory)) {
|
|
6751
|
+
const parts = [cov.name, cov.limit && `limit=${cov.limit}`, cov.deductible && `deductible=${cov.deductible}`].filter(Boolean);
|
|
6752
|
+
if (parts.length > 0) lines.push(`- coverage: ${parts.join(", ")}`);
|
|
6298
6753
|
}
|
|
6299
|
-
const namedInsured = memory
|
|
6754
|
+
const namedInsured = getNamedInsured(memory);
|
|
6300
6755
|
if (namedInsured) {
|
|
6301
6756
|
for (const [key, value] of Object.entries(namedInsured)) {
|
|
6302
6757
|
if (value && typeof value === "string") lines.push(`- ${key}: ${value}`);
|
|
6303
6758
|
}
|
|
6304
6759
|
}
|
|
6305
|
-
const carrierInfo = memory
|
|
6760
|
+
const carrierInfo = getCarrierInfo(memory);
|
|
6306
6761
|
if (carrierInfo) {
|
|
6307
6762
|
for (const [key, value] of Object.entries(carrierInfo)) {
|
|
6308
6763
|
if (value && typeof value === "string") lines.push(`- ${key}: ${value}`);
|
|
@@ -6310,6 +6765,34 @@ function createExtractor(config) {
|
|
|
6310
6765
|
}
|
|
6311
6766
|
return lines.length > 0 ? lines.join("\n") : "";
|
|
6312
6767
|
}
|
|
6768
|
+
async function runFocusedExtractorTask(task, pdfInput, memory) {
|
|
6769
|
+
if (task.extractorName === "supplementary") {
|
|
6770
|
+
const alreadyExtractedSummary = buildAlreadyExtractedSummary(memory);
|
|
6771
|
+
const result = await runExtractor({
|
|
6772
|
+
name: "supplementary",
|
|
6773
|
+
prompt: buildSupplementaryPrompt(alreadyExtractedSummary),
|
|
6774
|
+
schema: SupplementarySchema,
|
|
6775
|
+
pdfInput,
|
|
6776
|
+
startPage: task.startPage,
|
|
6777
|
+
endPage: task.endPage,
|
|
6778
|
+
generateObject,
|
|
6779
|
+
convertPdfToImages,
|
|
6780
|
+
maxTokens: 4096,
|
|
6781
|
+
providerOptions
|
|
6782
|
+
});
|
|
6783
|
+
trackUsage(result.usage);
|
|
6784
|
+
return result;
|
|
6785
|
+
}
|
|
6786
|
+
return runFocusedExtractorWithFallback({
|
|
6787
|
+
task,
|
|
6788
|
+
pdfInput,
|
|
6789
|
+
generateObject,
|
|
6790
|
+
convertPdfToImages,
|
|
6791
|
+
providerOptions,
|
|
6792
|
+
trackUsage,
|
|
6793
|
+
log
|
|
6794
|
+
});
|
|
6795
|
+
}
|
|
6313
6796
|
function formatPageMapSummary(pageAssignments) {
|
|
6314
6797
|
const extractorPages = /* @__PURE__ */ new Map();
|
|
6315
6798
|
for (const assignment of pageAssignments) {
|
|
@@ -6320,132 +6803,6 @@ function createExtractor(config) {
|
|
|
6320
6803
|
if (extractorPages.size === 0) return "No page assignments available.";
|
|
6321
6804
|
return [...extractorPages.entries()].map(([extractorName, pages]) => `${extractorName}: ${pages.length} page(s), pages ${pages.join(", ")}`).join("\n");
|
|
6322
6805
|
}
|
|
6323
|
-
function normalizePageAssignments(pageAssignments, formInventory) {
|
|
6324
|
-
const pageFormTypes = /* @__PURE__ */ new Map();
|
|
6325
|
-
if (formInventory) {
|
|
6326
|
-
for (const form of formInventory.forms) {
|
|
6327
|
-
if (form.pageStart != null) {
|
|
6328
|
-
const end = form.pageEnd ?? form.pageStart;
|
|
6329
|
-
for (let p = form.pageStart; p <= end; p++) {
|
|
6330
|
-
const types = pageFormTypes.get(p) ?? /* @__PURE__ */ new Set();
|
|
6331
|
-
types.add(form.formType);
|
|
6332
|
-
pageFormTypes.set(p, types);
|
|
6333
|
-
}
|
|
6334
|
-
}
|
|
6335
|
-
}
|
|
6336
|
-
}
|
|
6337
|
-
return pageAssignments.map((assignment) => {
|
|
6338
|
-
let extractorNames = [...new Set(
|
|
6339
|
-
(assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"]).filter(Boolean)
|
|
6340
|
-
)];
|
|
6341
|
-
const hasDeclarations = extractorNames.includes("declarations");
|
|
6342
|
-
const hasConditions = extractorNames.includes("conditions");
|
|
6343
|
-
const hasExclusions = extractorNames.includes("exclusions");
|
|
6344
|
-
const hasEndorsements = extractorNames.includes("endorsements");
|
|
6345
|
-
const looksLikeScheduleValues = assignment.hasScheduleValues === true;
|
|
6346
|
-
const roleBlocksCoverageLimits = assignment.pageRole === "policy_form" || assignment.pageRole === "condition_exclusion_form" || assignment.pageRole === "endorsement_form";
|
|
6347
|
-
const inventoryTypes = pageFormTypes.get(assignment.localPageNumber);
|
|
6348
|
-
const inventoryBlocksCoverageLimits = inventoryTypes != null && !looksLikeScheduleValues && !hasDeclarations && (inventoryTypes.has("endorsement") || inventoryTypes.has("notice") || inventoryTypes.has("application"));
|
|
6349
|
-
if (extractorNames.includes("coverage_limits")) {
|
|
6350
|
-
const shouldDropCoverageLimits = inventoryBlocksCoverageLimits || !looksLikeScheduleValues && roleBlocksCoverageLimits || !hasDeclarations && !looksLikeScheduleValues && (hasConditions || hasExclusions) || !hasDeclarations && !looksLikeScheduleValues && hasEndorsements;
|
|
6351
|
-
if (shouldDropCoverageLimits) {
|
|
6352
|
-
extractorNames = extractorNames.filter((name) => name !== "coverage_limits");
|
|
6353
|
-
}
|
|
6354
|
-
}
|
|
6355
|
-
if (inventoryTypes?.has("endorsement") && !extractorNames.includes("endorsements")) {
|
|
6356
|
-
extractorNames = [...extractorNames, "endorsements"];
|
|
6357
|
-
}
|
|
6358
|
-
if (extractorNames.length === 0) {
|
|
6359
|
-
extractorNames = ["sections"];
|
|
6360
|
-
}
|
|
6361
|
-
return {
|
|
6362
|
-
...assignment,
|
|
6363
|
-
extractorNames
|
|
6364
|
-
};
|
|
6365
|
-
});
|
|
6366
|
-
}
|
|
6367
|
-
function buildTemplateHints(primaryType, documentType, pageCount, template) {
|
|
6368
|
-
return [
|
|
6369
|
-
`Document type: ${primaryType} ${documentType}`,
|
|
6370
|
-
`Expected sections: ${template.expectedSections.join(", ")}`,
|
|
6371
|
-
`Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
|
|
6372
|
-
`Total pages: ${pageCount}`
|
|
6373
|
-
].join("\n");
|
|
6374
|
-
}
|
|
6375
|
-
function groupContiguousPages(pages) {
|
|
6376
|
-
if (pages.length === 0) return [];
|
|
6377
|
-
const sorted = [...new Set(pages)].sort((a, b) => a - b);
|
|
6378
|
-
const ranges = [];
|
|
6379
|
-
let start = sorted[0];
|
|
6380
|
-
let previous = sorted[0];
|
|
6381
|
-
for (let i = 1; i < sorted.length; i += 1) {
|
|
6382
|
-
const current = sorted[i];
|
|
6383
|
-
if (current === previous + 1) {
|
|
6384
|
-
previous = current;
|
|
6385
|
-
continue;
|
|
6386
|
-
}
|
|
6387
|
-
ranges.push({ startPage: start, endPage: previous });
|
|
6388
|
-
start = current;
|
|
6389
|
-
previous = current;
|
|
6390
|
-
}
|
|
6391
|
-
ranges.push({ startPage: start, endPage: previous });
|
|
6392
|
-
return ranges;
|
|
6393
|
-
}
|
|
6394
|
-
function buildPlanFromPageAssignments(pageAssignments, pageCount, formInventory) {
|
|
6395
|
-
const extractorPages = /* @__PURE__ */ new Map();
|
|
6396
|
-
for (const assignment of pageAssignments) {
|
|
6397
|
-
const extractors = assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"];
|
|
6398
|
-
for (const extractorName of extractors) {
|
|
6399
|
-
extractorPages.set(extractorName, [...extractorPages.get(extractorName) ?? [], assignment.localPageNumber]);
|
|
6400
|
-
}
|
|
6401
|
-
}
|
|
6402
|
-
const coveredPages = /* @__PURE__ */ new Set();
|
|
6403
|
-
for (const pages of extractorPages.values()) {
|
|
6404
|
-
for (const page of pages) coveredPages.add(page);
|
|
6405
|
-
}
|
|
6406
|
-
for (let page = 1; page <= pageCount; page += 1) {
|
|
6407
|
-
if (!coveredPages.has(page)) {
|
|
6408
|
-
extractorPages.set("sections", [...extractorPages.get("sections") ?? [], page]);
|
|
6409
|
-
}
|
|
6410
|
-
}
|
|
6411
|
-
const contextualExtractors = /* @__PURE__ */ new Set(["conditions", "covered_reasons", "definitions", "exclusions", "endorsements"]);
|
|
6412
|
-
const contextualForms = (formInventory?.forms ?? []).filter(
|
|
6413
|
-
(form) => form.pageStart != null && (form.pageEnd ?? form.pageStart) != null
|
|
6414
|
-
);
|
|
6415
|
-
const expandPagesToFormRanges = (extractorName, pages) => {
|
|
6416
|
-
if (!contextualExtractors.has(extractorName)) return pages;
|
|
6417
|
-
const expanded = new Set(pages);
|
|
6418
|
-
for (const page of pages) {
|
|
6419
|
-
for (const form of contextualForms) {
|
|
6420
|
-
const pageStart = form.pageStart;
|
|
6421
|
-
const pageEnd = form.pageEnd ?? form.pageStart;
|
|
6422
|
-
const formType = form.formType;
|
|
6423
|
-
const supportsContextualExpansion = extractorName === "endorsements" ? formType === "endorsement" : formType === "coverage" || formType === "endorsement";
|
|
6424
|
-
if (!supportsContextualExpansion) continue;
|
|
6425
|
-
if (page < pageStart || page > pageEnd) continue;
|
|
6426
|
-
for (let current = pageStart; current <= pageEnd; current += 1) {
|
|
6427
|
-
expanded.add(current);
|
|
6428
|
-
}
|
|
6429
|
-
}
|
|
6430
|
-
}
|
|
6431
|
-
return [...expanded].sort((a, b) => a - b);
|
|
6432
|
-
};
|
|
6433
|
-
const tasks = [...extractorPages.entries()].flatMap(
|
|
6434
|
-
([extractorName, pages]) => groupContiguousPages(expandPagesToFormRanges(extractorName, pages)).map(({ startPage, endPage }) => ({
|
|
6435
|
-
extractorName,
|
|
6436
|
-
startPage,
|
|
6437
|
-
endPage,
|
|
6438
|
-
description: `Page-mapped ${extractorName} extraction for pages ${startPage}-${endPage}`
|
|
6439
|
-
}))
|
|
6440
|
-
).sort((a, b) => a.startPage - b.startPage || a.extractorName.localeCompare(b.extractorName));
|
|
6441
|
-
return {
|
|
6442
|
-
tasks,
|
|
6443
|
-
pageMap: [...extractorPages.entries()].map(([section, pages]) => ({
|
|
6444
|
-
section,
|
|
6445
|
-
pages: `pages ${[...new Set(pages)].sort((a, b) => a - b).join(", ")}`
|
|
6446
|
-
}))
|
|
6447
|
-
};
|
|
6448
|
-
}
|
|
6449
6806
|
async function extract(pdfInput, documentId, options) {
|
|
6450
6807
|
const id = documentId ?? `doc-${Date.now()}`;
|
|
6451
6808
|
const memory = /* @__PURE__ */ new Map();
|
|
@@ -6456,7 +6813,8 @@ function createExtractor(config) {
|
|
|
6456
6813
|
const pipelineCtx = createPipelineContext({
|
|
6457
6814
|
id,
|
|
6458
6815
|
onSave: onCheckpointSave,
|
|
6459
|
-
resumeFrom: options?.resumeFrom
|
|
6816
|
+
resumeFrom: options?.resumeFrom,
|
|
6817
|
+
phaseOrder: ["classify", "form_inventory", "page_map", "plan", "extract", "resolve_referential", "review", "assemble"]
|
|
6460
6818
|
});
|
|
6461
6819
|
const resumed = pipelineCtx.getCheckpoint()?.state;
|
|
6462
6820
|
if (resumed?.memory) {
|
|
@@ -6624,40 +6982,18 @@ function createExtractor(config) {
|
|
|
6624
6982
|
const extractorResults = await Promise.all(
|
|
6625
6983
|
tasks.map(
|
|
6626
6984
|
(task) => limit(async () => {
|
|
6627
|
-
const ext = getExtractor(task.extractorName) ?? (task.extractorName === "definitions" || task.extractorName === "covered_reasons" ? getExtractor("sections") : void 0);
|
|
6628
|
-
if (!ext) {
|
|
6629
|
-
await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
|
|
6630
|
-
return null;
|
|
6631
|
-
}
|
|
6632
6985
|
onProgress?.(`Extracting ${task.extractorName} (pages ${task.startPage}-${task.endPage})...`);
|
|
6633
|
-
|
|
6634
|
-
const result = await runExtractor({
|
|
6635
|
-
name: task.extractorName,
|
|
6636
|
-
prompt: ext.buildPrompt(),
|
|
6637
|
-
schema: ext.schema,
|
|
6638
|
-
pdfInput,
|
|
6639
|
-
startPage: task.startPage,
|
|
6640
|
-
endPage: task.endPage,
|
|
6641
|
-
generateObject,
|
|
6642
|
-
convertPdfToImages,
|
|
6643
|
-
maxTokens: ext.maxTokens ?? 4096,
|
|
6644
|
-
providerOptions
|
|
6645
|
-
});
|
|
6646
|
-
trackUsage(result.usage);
|
|
6647
|
-
return result;
|
|
6648
|
-
} catch (error) {
|
|
6649
|
-
await log?.(`Extractor ${task.extractorName} failed: ${error}`);
|
|
6650
|
-
return null;
|
|
6651
|
-
}
|
|
6986
|
+
return runFocusedExtractorTask(task, pdfInput, memory);
|
|
6652
6987
|
})
|
|
6653
6988
|
)
|
|
6654
6989
|
);
|
|
6655
|
-
for (const result of extractorResults) {
|
|
6990
|
+
for (const result of extractorResults.flatMap((item) => Array.isArray(item) ? item : item ? [item] : [])) {
|
|
6656
6991
|
if (result) {
|
|
6657
6992
|
mergeMemoryResult(result.name, result.data, memory);
|
|
6658
6993
|
}
|
|
6659
6994
|
}
|
|
6660
|
-
|
|
6995
|
+
const planIncludesSupplementary = tasks.some((task) => task.extractorName === "supplementary");
|
|
6996
|
+
if (!planIncludesSupplementary && hasSupplementaryExtractionSignal(pageAssignments, formInventory, memory)) {
|
|
6661
6997
|
onProgress?.("Extracting supplementary retrieval facts...");
|
|
6662
6998
|
try {
|
|
6663
6999
|
const alreadyExtractedSummary = buildAlreadyExtractedSummary(memory);
|
|
@@ -6731,7 +7067,7 @@ function createExtractor(config) {
|
|
|
6731
7067
|
const reviewResponse = await safeGenerateObject(
|
|
6732
7068
|
generateObject,
|
|
6733
7069
|
{
|
|
6734
|
-
prompt: buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary),
|
|
7070
|
+
prompt: buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary, extractorCatalog),
|
|
6735
7071
|
schema: ReviewResultSchema,
|
|
6736
7072
|
maxTokens: 1536,
|
|
6737
7073
|
providerOptions: await buildPdfProviderOptions(pdfInput, providerOptions)
|
|
@@ -6755,31 +7091,11 @@ function createExtractor(config) {
|
|
|
6755
7091
|
const followUpResults = await Promise.all(
|
|
6756
7092
|
reviewResponse.object.additionalTasks.map(
|
|
6757
7093
|
(task) => limit(async () => {
|
|
6758
|
-
|
|
6759
|
-
if (!ext) return null;
|
|
6760
|
-
try {
|
|
6761
|
-
const result = await runExtractor({
|
|
6762
|
-
name: task.extractorName,
|
|
6763
|
-
prompt: ext.buildPrompt(),
|
|
6764
|
-
schema: ext.schema,
|
|
6765
|
-
pdfInput,
|
|
6766
|
-
startPage: task.startPage,
|
|
6767
|
-
endPage: task.endPage,
|
|
6768
|
-
generateObject,
|
|
6769
|
-
convertPdfToImages,
|
|
6770
|
-
maxTokens: ext.maxTokens ?? 4096,
|
|
6771
|
-
providerOptions
|
|
6772
|
-
});
|
|
6773
|
-
trackUsage(result.usage);
|
|
6774
|
-
return result;
|
|
6775
|
-
} catch (error) {
|
|
6776
|
-
await log?.(`Follow-up extractor ${task.extractorName} failed: ${error}`);
|
|
6777
|
-
return null;
|
|
6778
|
-
}
|
|
7094
|
+
return runFocusedExtractorTask(task, pdfInput, memory);
|
|
6779
7095
|
})
|
|
6780
7096
|
)
|
|
6781
7097
|
);
|
|
6782
|
-
for (const result of followUpResults) {
|
|
7098
|
+
for (const result of followUpResults.flatMap((item) => Array.isArray(item) ? item : item ? [item] : [])) {
|
|
6783
7099
|
if (result) {
|
|
6784
7100
|
mergeMemoryResult(result.name, result.data, memory);
|
|
6785
7101
|
}
|
|
@@ -7818,6 +8134,70 @@ function reviewBatchEmail(text, batchFields) {
|
|
|
7818
8134
|
};
|
|
7819
8135
|
}
|
|
7820
8136
|
|
|
8137
|
+
// src/application/workflow.ts
|
|
8138
|
+
var MAX_DOCUMENT_SEARCH_FIELDS = 5;
|
|
8139
|
+
var LOW_VALUE_FIELD_RATIO_LIMIT = 0.6;
|
|
8140
|
+
function planApplicationWorkflow(input) {
|
|
8141
|
+
const unfilledFields = input.fields.filter(isUnfilled);
|
|
8142
|
+
const documentSearchFields = planDocumentSearchFields(
|
|
8143
|
+
unfilledFields,
|
|
8144
|
+
input.hasDocumentStore && input.hasMemoryStore
|
|
8145
|
+
);
|
|
8146
|
+
return {
|
|
8147
|
+
runBackfill: input.hasBackfillProvider && unfilledFields.length > 0,
|
|
8148
|
+
runContextAutoFill: input.orgContextCount > 0 && unfilledFields.length > 0,
|
|
8149
|
+
documentSearchFields,
|
|
8150
|
+
runBatching: unfilledFields.length > 0,
|
|
8151
|
+
unfilledFields
|
|
8152
|
+
};
|
|
8153
|
+
}
|
|
8154
|
+
function planReplyActions(input) {
|
|
8155
|
+
const hasCurrentFields = input.currentBatchFields.length > 0;
|
|
8156
|
+
const nextBatchNeedsAnswers = (input.nextBatchFields ?? []).some(isUnfilled);
|
|
8157
|
+
const hasLookupRequests = (input.intent.lookupRequests?.length ?? 0) > 0;
|
|
8158
|
+
return {
|
|
8159
|
+
parseAnswers: input.intent.hasAnswers && hasCurrentFields,
|
|
8160
|
+
runLookup: hasLookupRequests && input.hasDocumentStore,
|
|
8161
|
+
answerQuestion: Boolean(input.intent.questionText) && (input.intent.primaryIntent === "question" || input.intent.primaryIntent === "mixed"),
|
|
8162
|
+
advanceBatch: hasCurrentFields && input.currentBatchFields.every((field) => !isUnfilled(field)),
|
|
8163
|
+
generateNextEmail: nextBatchNeedsAnswers
|
|
8164
|
+
};
|
|
8165
|
+
}
|
|
8166
|
+
function planDocumentSearchFields(unfilledFields, hasStores) {
|
|
8167
|
+
if (!hasStores || unfilledFields.length === 0) return [];
|
|
8168
|
+
const searchableFields = unfilledFields.filter(isHighValueLookupField);
|
|
8169
|
+
if (searchableFields.length === 0) return [];
|
|
8170
|
+
const lowValueRatio = 1 - searchableFields.length / unfilledFields.length;
|
|
8171
|
+
if (unfilledFields.length > MAX_DOCUMENT_SEARCH_FIELDS && lowValueRatio > LOW_VALUE_FIELD_RATIO_LIMIT) {
|
|
8172
|
+
return [];
|
|
8173
|
+
}
|
|
8174
|
+
return searchableFields.slice(0, MAX_DOCUMENT_SEARCH_FIELDS);
|
|
8175
|
+
}
|
|
8176
|
+
function isUnfilled(field) {
|
|
8177
|
+
return field.value === void 0 || field.value.trim() === "";
|
|
8178
|
+
}
|
|
8179
|
+
function isHighValueLookupField(field) {
|
|
8180
|
+
const text = `${field.section} ${field.label}`.toLowerCase();
|
|
8181
|
+
if (field.required) return true;
|
|
8182
|
+
return [
|
|
8183
|
+
"carrier",
|
|
8184
|
+
"policy",
|
|
8185
|
+
"premium",
|
|
8186
|
+
"limit",
|
|
8187
|
+
"deductible",
|
|
8188
|
+
"insured",
|
|
8189
|
+
"address",
|
|
8190
|
+
"revenue",
|
|
8191
|
+
"payroll",
|
|
8192
|
+
"effective",
|
|
8193
|
+
"expiration",
|
|
8194
|
+
"coverage",
|
|
8195
|
+
"class code",
|
|
8196
|
+
"fein",
|
|
8197
|
+
"entity"
|
|
8198
|
+
].some((term) => text.includes(term));
|
|
8199
|
+
}
|
|
8200
|
+
|
|
7821
8201
|
// src/application/coordinator.ts
|
|
7822
8202
|
function createApplicationPipeline(config) {
|
|
7823
8203
|
const {
|
|
@@ -7916,27 +8296,37 @@ function createApplicationPipeline(config) {
|
|
|
7916
8296
|
state.updatedAt = Date.now();
|
|
7917
8297
|
await applicationStore?.save(state);
|
|
7918
8298
|
onProgress?.(`Auto-filling ${fields.length} fields...`);
|
|
7919
|
-
|
|
7920
|
-
|
|
7921
|
-
|
|
7922
|
-
|
|
7923
|
-
|
|
7924
|
-
|
|
7925
|
-
|
|
7926
|
-
|
|
7927
|
-
|
|
7928
|
-
|
|
7929
|
-
|
|
7930
|
-
|
|
7931
|
-
|
|
7932
|
-
|
|
7933
|
-
|
|
7934
|
-
|
|
8299
|
+
let workflowPlan = planApplicationWorkflow({
|
|
8300
|
+
fields: state.fields,
|
|
8301
|
+
hasBackfillProvider: Boolean(backfillProvider),
|
|
8302
|
+
orgContextCount: orgContext.length,
|
|
8303
|
+
hasDocumentStore: Boolean(documentStore),
|
|
8304
|
+
hasMemoryStore: Boolean(memoryStore)
|
|
8305
|
+
});
|
|
8306
|
+
if (workflowPlan.runBackfill && backfillProvider) {
|
|
8307
|
+
try {
|
|
8308
|
+
const priorAnswers = await backfillFromPriorAnswers(state.fields, backfillProvider);
|
|
8309
|
+
for (const pa of priorAnswers) {
|
|
8310
|
+
const field = state.fields.find((f) => f.id === pa.fieldId);
|
|
8311
|
+
if (field && !field.value && pa.relevance > 0.8) {
|
|
8312
|
+
field.value = pa.value;
|
|
8313
|
+
field.source = `backfill: ${pa.source}`;
|
|
8314
|
+
field.confidence = "high";
|
|
7935
8315
|
}
|
|
7936
|
-
}
|
|
7937
|
-
)
|
|
8316
|
+
}
|
|
8317
|
+
} catch (e) {
|
|
8318
|
+
await log?.(`Backfill failed: ${e}`);
|
|
8319
|
+
}
|
|
7938
8320
|
}
|
|
7939
|
-
|
|
8321
|
+
workflowPlan = planApplicationWorkflow({
|
|
8322
|
+
fields: state.fields,
|
|
8323
|
+
hasBackfillProvider: false,
|
|
8324
|
+
orgContextCount: orgContext.length,
|
|
8325
|
+
hasDocumentStore: Boolean(documentStore),
|
|
8326
|
+
hasMemoryStore: Boolean(memoryStore)
|
|
8327
|
+
});
|
|
8328
|
+
const fillTasks = [];
|
|
8329
|
+
if (workflowPlan.runContextAutoFill) {
|
|
7940
8330
|
fillTasks.push(
|
|
7941
8331
|
limit(async () => {
|
|
7942
8332
|
const unfilledFields2 = state.fields.filter((f) => !f.value);
|
|
@@ -7963,18 +8353,13 @@ function createApplicationPipeline(config) {
|
|
|
7963
8353
|
})
|
|
7964
8354
|
);
|
|
7965
8355
|
}
|
|
7966
|
-
if (
|
|
8356
|
+
if (workflowPlan.documentSearchFields.length > 0 && memoryStore) {
|
|
7967
8357
|
fillTasks.push(
|
|
7968
8358
|
(async () => {
|
|
7969
8359
|
try {
|
|
7970
|
-
const
|
|
7971
|
-
const searchPromises = unfilledFields2.slice(0, 10).map(
|
|
8360
|
+
const searchPromises = workflowPlan.documentSearchFields.map(
|
|
7972
8361
|
(f) => limit(async () => {
|
|
7973
|
-
|
|
7974
|
-
for (const chunk of chunks) {
|
|
7975
|
-
if (!state.fields.find((sf) => sf.id === f.id)?.value) {
|
|
7976
|
-
}
|
|
7977
|
-
}
|
|
8362
|
+
await memoryStore.search(f.label, { limit: 3 });
|
|
7978
8363
|
})
|
|
7979
8364
|
);
|
|
7980
8365
|
await Promise.all(searchPromises);
|
|
@@ -7987,8 +8372,15 @@ function createApplicationPipeline(config) {
|
|
|
7987
8372
|
await Promise.all(fillTasks);
|
|
7988
8373
|
state.updatedAt = Date.now();
|
|
7989
8374
|
await applicationStore?.save(state);
|
|
7990
|
-
|
|
7991
|
-
|
|
8375
|
+
workflowPlan = planApplicationWorkflow({
|
|
8376
|
+
fields: state.fields,
|
|
8377
|
+
hasBackfillProvider: false,
|
|
8378
|
+
orgContextCount: 0,
|
|
8379
|
+
hasDocumentStore: false,
|
|
8380
|
+
hasMemoryStore: false
|
|
8381
|
+
});
|
|
8382
|
+
const unfilledFields = workflowPlan.unfilledFields;
|
|
8383
|
+
if (workflowPlan.runBatching) {
|
|
7992
8384
|
onProgress?.(`Batching ${unfilledFields.length} remaining questions...`);
|
|
7993
8385
|
state.status = "batching";
|
|
7994
8386
|
try {
|
|
@@ -8055,7 +8447,12 @@ function createApplicationPipeline(config) {
|
|
|
8055
8447
|
}
|
|
8056
8448
|
let fieldsFilled = 0;
|
|
8057
8449
|
let responseText;
|
|
8058
|
-
|
|
8450
|
+
let replyPlan = planReplyActions({
|
|
8451
|
+
intent,
|
|
8452
|
+
currentBatchFields,
|
|
8453
|
+
hasDocumentStore: Boolean(documentStore)
|
|
8454
|
+
});
|
|
8455
|
+
if (replyPlan.parseAnswers) {
|
|
8059
8456
|
onProgress?.("Parsing answers...");
|
|
8060
8457
|
try {
|
|
8061
8458
|
const { result: parseResult, usage: parseUsage } = await parseAnswers(
|
|
@@ -8078,7 +8475,7 @@ function createApplicationPipeline(config) {
|
|
|
8078
8475
|
await log?.(`Answer parsing failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
8079
8476
|
}
|
|
8080
8477
|
}
|
|
8081
|
-
if (intent.lookupRequests?.length) {
|
|
8478
|
+
if (replyPlan.runLookup && intent.lookupRequests?.length) {
|
|
8082
8479
|
onProgress?.("Processing lookup requests...");
|
|
8083
8480
|
let availableData = "";
|
|
8084
8481
|
if (documentStore) {
|
|
@@ -8119,64 +8516,78 @@ function createApplicationPipeline(config) {
|
|
|
8119
8516
|
}
|
|
8120
8517
|
}
|
|
8121
8518
|
}
|
|
8122
|
-
if (
|
|
8123
|
-
|
|
8124
|
-
|
|
8125
|
-
|
|
8126
|
-
prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
|
|
8519
|
+
if (replyPlan.answerQuestion && intent.questionText) {
|
|
8520
|
+
try {
|
|
8521
|
+
const { text, usage } = await generateText({
|
|
8522
|
+
prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
|
|
8127
8523
|
|
|
8128
8524
|
Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with the answer when you're ready and I'll fill it in."`,
|
|
8129
|
-
|
|
8130
|
-
|
|
8131
|
-
|
|
8132
|
-
|
|
8133
|
-
|
|
8134
|
-
|
|
8135
|
-
|
|
8136
|
-
|
|
8137
|
-
}
|
|
8525
|
+
maxTokens: 512,
|
|
8526
|
+
providerOptions
|
|
8527
|
+
});
|
|
8528
|
+
trackUsage(usage);
|
|
8529
|
+
responseText = text;
|
|
8530
|
+
} catch (error) {
|
|
8531
|
+
await log?.(`Question response generation failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
8532
|
+
responseText = `I wasn't able to generate an explanation for your question. Could you rephrase it, or just provide the answer directly?`;
|
|
8138
8533
|
}
|
|
8139
8534
|
}
|
|
8140
8535
|
const currentBatchComplete = currentBatchFieldIds.every(
|
|
8141
8536
|
(fid) => state.fields.find((f) => f.id === fid)?.value
|
|
8142
8537
|
);
|
|
8143
|
-
|
|
8144
|
-
|
|
8145
|
-
|
|
8146
|
-
|
|
8147
|
-
const
|
|
8148
|
-
|
|
8149
|
-
|
|
8538
|
+
let nextBatchIndex;
|
|
8539
|
+
let nextBatchFields;
|
|
8540
|
+
if (state.batches) {
|
|
8541
|
+
for (let index = state.currentBatchIndex + 1; index < state.batches.length; index++) {
|
|
8542
|
+
const candidateFields = state.fields.filter((f) => state.batches[index].includes(f.id));
|
|
8543
|
+
if (candidateFields.some((f) => !f.value)) {
|
|
8544
|
+
nextBatchIndex = index;
|
|
8545
|
+
nextBatchFields = candidateFields;
|
|
8546
|
+
break;
|
|
8547
|
+
}
|
|
8548
|
+
}
|
|
8549
|
+
}
|
|
8550
|
+
replyPlan = planReplyActions({
|
|
8551
|
+
intent,
|
|
8552
|
+
currentBatchFields,
|
|
8553
|
+
nextBatchFields,
|
|
8554
|
+
hasDocumentStore: Boolean(documentStore)
|
|
8555
|
+
});
|
|
8556
|
+
if (currentBatchComplete && replyPlan.advanceBatch && state.batches) {
|
|
8557
|
+
if (nextBatchIndex !== void 0 && nextBatchFields) {
|
|
8558
|
+
state.currentBatchIndex = nextBatchIndex;
|
|
8150
8559
|
const filledCount = state.fields.filter((f) => f.value).length;
|
|
8151
|
-
|
|
8152
|
-
|
|
8153
|
-
|
|
8154
|
-
|
|
8155
|
-
|
|
8156
|
-
|
|
8157
|
-
|
|
8158
|
-
|
|
8159
|
-
|
|
8160
|
-
|
|
8161
|
-
|
|
8162
|
-
|
|
8163
|
-
|
|
8164
|
-
|
|
8165
|
-
|
|
8166
|
-
|
|
8167
|
-
|
|
8168
|
-
|
|
8169
|
-
|
|
8170
|
-
|
|
8171
|
-
|
|
8172
|
-
responseText
|
|
8173
|
-
|
|
8174
|
-
|
|
8560
|
+
if (replyPlan.generateNextEmail) {
|
|
8561
|
+
try {
|
|
8562
|
+
const { text: emailText, usage: emailUsage } = await generateBatchEmail(
|
|
8563
|
+
nextBatchFields,
|
|
8564
|
+
state.currentBatchIndex,
|
|
8565
|
+
state.batches.length,
|
|
8566
|
+
{
|
|
8567
|
+
appTitle: state.title,
|
|
8568
|
+
totalFieldCount: state.fields.length,
|
|
8569
|
+
filledFieldCount: filledCount,
|
|
8570
|
+
companyName: context?.companyName
|
|
8571
|
+
},
|
|
8572
|
+
generateText,
|
|
8573
|
+
providerOptions
|
|
8574
|
+
);
|
|
8575
|
+
trackUsage(emailUsage);
|
|
8576
|
+
const emailReview = reviewBatchEmail(emailText, nextBatchFields);
|
|
8577
|
+
state.qualityReport = {
|
|
8578
|
+
...buildApplicationQualityReport(state),
|
|
8579
|
+
emailReview
|
|
8580
|
+
};
|
|
8581
|
+
if (!responseText) {
|
|
8582
|
+
responseText = emailText;
|
|
8583
|
+
} else {
|
|
8584
|
+
responseText += `
|
|
8175
8585
|
|
|
8176
8586
|
${emailText}`;
|
|
8587
|
+
}
|
|
8588
|
+
} catch (error) {
|
|
8589
|
+
await log?.(`Batch email generation failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
8177
8590
|
}
|
|
8178
|
-
} catch (error) {
|
|
8179
|
-
await log?.(`Batch email generation failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
8180
8591
|
}
|
|
8181
8592
|
} else {
|
|
8182
8593
|
state.status = "confirming";
|
|
@@ -8990,6 +9401,42 @@ ${item.text}`).join("\n\n");
|
|
|
8990
9401
|
return { evidence, contextSummary };
|
|
8991
9402
|
}
|
|
8992
9403
|
|
|
9404
|
+
// src/query/workflow.ts
|
|
9405
|
+
function shouldRetrieveForClassification(classification) {
|
|
9406
|
+
return classification.requiresDocumentLookup || classification.requiresChunkSearch;
|
|
9407
|
+
}
|
|
9408
|
+
function buildInitialQueryWorkflowPlan(params) {
|
|
9409
|
+
const { classification, attachmentEvidence } = params;
|
|
9410
|
+
const actions = [];
|
|
9411
|
+
const shouldRetrieve = shouldRetrieveForClassification(classification);
|
|
9412
|
+
if (shouldRetrieve) {
|
|
9413
|
+
actions.push({
|
|
9414
|
+
type: "retrieve",
|
|
9415
|
+
subQuestions: classification.subQuestions,
|
|
9416
|
+
reason: "classification requested document or chunk lookup"
|
|
9417
|
+
});
|
|
9418
|
+
}
|
|
9419
|
+
actions.push({
|
|
9420
|
+
type: "reason",
|
|
9421
|
+
subQuestions: classification.subQuestions,
|
|
9422
|
+
reason: shouldRetrieve ? "answer with retrieved evidence and any attachment evidence" : attachmentEvidence.length > 0 ? "answer with attachment evidence only" : "answer without document retrieval"
|
|
9423
|
+
});
|
|
9424
|
+
actions.push(
|
|
9425
|
+
{
|
|
9426
|
+
type: "verify",
|
|
9427
|
+
reason: "check grounding and request targeted retries when needed"
|
|
9428
|
+
},
|
|
9429
|
+
{
|
|
9430
|
+
type: "respond",
|
|
9431
|
+
reason: "compose final response"
|
|
9432
|
+
}
|
|
9433
|
+
);
|
|
9434
|
+
return { actions, shouldRetrieve };
|
|
9435
|
+
}
|
|
9436
|
+
function getWorkflowAction(plan, type) {
|
|
9437
|
+
return plan.actions.find((action) => action.type === type);
|
|
9438
|
+
}
|
|
9439
|
+
|
|
8993
9440
|
// src/query/coordinator.ts
|
|
8994
9441
|
function createQueryAgent(config) {
|
|
8995
9442
|
const {
|
|
@@ -9034,29 +9481,37 @@ function createQueryAgent(config) {
|
|
|
9034
9481
|
onProgress?.("Classifying query...");
|
|
9035
9482
|
const classification = await classify(question, conversationId, attachmentContext);
|
|
9036
9483
|
await pipelineCtx.save("classify", { classification, attachmentEvidence });
|
|
9037
|
-
onProgress?.(`Retrieving evidence for ${classification.subQuestions.length} sub-question(s)...`);
|
|
9038
9484
|
const retrieverConfig = {
|
|
9039
9485
|
documentStore,
|
|
9040
9486
|
memoryStore,
|
|
9041
9487
|
retrievalLimit,
|
|
9042
9488
|
log
|
|
9043
9489
|
};
|
|
9044
|
-
const
|
|
9045
|
-
|
|
9046
|
-
|
|
9047
|
-
|
|
9048
|
-
)
|
|
9490
|
+
const workflowPlan = buildInitialQueryWorkflowPlan({ classification, attachmentEvidence });
|
|
9491
|
+
const retrieveAction = getWorkflowAction(workflowPlan, "retrieve");
|
|
9492
|
+
const reasonAction = getWorkflowAction(workflowPlan, "reason");
|
|
9493
|
+
await pipelineCtx.save("workflow", { classification, attachmentEvidence, workflowPlan });
|
|
9494
|
+
const retrievalResults = retrieveAction ? await (async () => {
|
|
9495
|
+
onProgress?.(`Retrieving evidence for ${retrieveAction.subQuestions.length} sub-question(s)...`);
|
|
9496
|
+
return Promise.all(
|
|
9497
|
+
retrieveAction.subQuestions.map(
|
|
9498
|
+
(sq) => limit(() => retrieve(sq, conversationId, retrieverConfig))
|
|
9499
|
+
)
|
|
9500
|
+
);
|
|
9501
|
+
})() : [];
|
|
9049
9502
|
const allEvidence = [...attachmentEvidence, ...retrievalResults.flatMap((r) => r.evidence)];
|
|
9050
9503
|
await pipelineCtx.save("retrieve", { classification, attachmentEvidence, evidence: allEvidence });
|
|
9051
9504
|
onProgress?.("Reasoning over evidence...");
|
|
9052
9505
|
const reasonerConfig = { generateObject, providerOptions };
|
|
9506
|
+
const subQuestionsToReason = reasonAction?.subQuestions ?? classification.subQuestions;
|
|
9053
9507
|
const reasonResults = await Promise.allSettled(
|
|
9054
|
-
|
|
9055
|
-
(sq
|
|
9508
|
+
subQuestionsToReason.map(
|
|
9509
|
+
(sq) => limit(async () => {
|
|
9510
|
+
const retrievedEvidence = retrievalResults.find((r) => r.subQuestion === sq.question)?.evidence ?? [];
|
|
9056
9511
|
const { subAnswer, usage } = await reason(
|
|
9057
9512
|
sq.question,
|
|
9058
9513
|
sq.intent,
|
|
9059
|
-
[...attachmentEvidence, ...
|
|
9514
|
+
[...attachmentEvidence, ...retrievedEvidence],
|
|
9060
9515
|
reasonerConfig
|
|
9061
9516
|
);
|
|
9062
9517
|
trackUsage(usage);
|
|
@@ -9070,9 +9525,9 @@ function createQueryAgent(config) {
|
|
|
9070
9525
|
if (result.status === "fulfilled") {
|
|
9071
9526
|
subAnswers.push(result.value);
|
|
9072
9527
|
} else {
|
|
9073
|
-
await log?.(`Reasoner failed for sub-question "${
|
|
9528
|
+
await log?.(`Reasoner failed for sub-question "${subQuestionsToReason[i].question}": ${result.reason}`);
|
|
9074
9529
|
subAnswers.push({
|
|
9075
|
-
subQuestion:
|
|
9530
|
+
subQuestion: subQuestionsToReason[i].question,
|
|
9076
9531
|
answer: "Unable to answer this part of the question due to a processing error.",
|
|
9077
9532
|
citations: [],
|
|
9078
9533
|
confidence: 0,
|