@claritylabs/cl-sdk 0.17.0 → 0.17.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -4
- package/dist/index.d.mts +10 -2
- package/dist/index.d.ts +10 -2
- package/dist/index.js +1086 -631
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +1086 -631
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -438,7 +438,14 @@ function createPipelineContext(opts) {
|
|
|
438
438
|
let latest = opts.resumeFrom;
|
|
439
439
|
const completedPhases = /* @__PURE__ */ new Set();
|
|
440
440
|
if (opts.resumeFrom) {
|
|
441
|
-
|
|
441
|
+
const phaseIndex = opts.phaseOrder?.indexOf(opts.resumeFrom.phase) ?? -1;
|
|
442
|
+
if (phaseIndex >= 0 && opts.phaseOrder) {
|
|
443
|
+
for (const phase of opts.phaseOrder.slice(0, phaseIndex + 1)) {
|
|
444
|
+
completedPhases.add(phase);
|
|
445
|
+
}
|
|
446
|
+
} else {
|
|
447
|
+
completedPhases.add(opts.resumeFrom.phase);
|
|
448
|
+
}
|
|
442
449
|
}
|
|
443
450
|
return {
|
|
444
451
|
id: opts.id,
|
|
@@ -1969,6 +1976,53 @@ async function runExtractor(params) {
|
|
|
1969
1976
|
};
|
|
1970
1977
|
}
|
|
1971
1978
|
|
|
1979
|
+
// src/extraction/memory.ts
|
|
1980
|
+
function isMemoryRecord(value) {
|
|
1981
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
1982
|
+
}
|
|
1983
|
+
function readMemoryRecord(memory, key) {
|
|
1984
|
+
const value = memory.get(key);
|
|
1985
|
+
return isMemoryRecord(value) ? value : void 0;
|
|
1986
|
+
}
|
|
1987
|
+
function readRecordValue(record, key) {
|
|
1988
|
+
return record?.[key];
|
|
1989
|
+
}
|
|
1990
|
+
function readRecordArray(record, key) {
|
|
1991
|
+
const value = readRecordValue(record, key);
|
|
1992
|
+
return Array.isArray(value) ? value : void 0;
|
|
1993
|
+
}
|
|
1994
|
+
function getCarrierInfo(memory) {
|
|
1995
|
+
return readMemoryRecord(memory, "carrier_info");
|
|
1996
|
+
}
|
|
1997
|
+
function getNamedInsured(memory) {
|
|
1998
|
+
return readMemoryRecord(memory, "named_insured");
|
|
1999
|
+
}
|
|
2000
|
+
function getCoverageLimits(memory) {
|
|
2001
|
+
return readMemoryRecord(memory, "coverage_limits");
|
|
2002
|
+
}
|
|
2003
|
+
function getCoverageLimitCoverages(memory) {
|
|
2004
|
+
return readRecordArray(getCoverageLimits(memory), "coverages") ?? [];
|
|
2005
|
+
}
|
|
2006
|
+
function getSectionsPayload(memory) {
|
|
2007
|
+
return readMemoryRecord(memory, "sections");
|
|
2008
|
+
}
|
|
2009
|
+
function getSections(memory) {
|
|
2010
|
+
return readRecordArray(getSectionsPayload(memory), "sections");
|
|
2011
|
+
}
|
|
2012
|
+
function getDefinitionsPayload(memory) {
|
|
2013
|
+
return readMemoryRecord(memory, "definitions");
|
|
2014
|
+
}
|
|
2015
|
+
function getDefinitions(memory) {
|
|
2016
|
+
return readRecordArray(getDefinitionsPayload(memory), "definitions");
|
|
2017
|
+
}
|
|
2018
|
+
function getCoveredReasonsPayload(memory) {
|
|
2019
|
+
return readMemoryRecord(memory, "covered_reasons");
|
|
2020
|
+
}
|
|
2021
|
+
function getCoveredReasons(memory) {
|
|
2022
|
+
const payload = getCoveredReasonsPayload(memory);
|
|
2023
|
+
return readRecordArray(payload, "coveredReasons") ?? readRecordArray(payload, "covered_reasons");
|
|
2024
|
+
}
|
|
2025
|
+
|
|
1972
2026
|
// src/extraction/promote.ts
|
|
1973
2027
|
function getDeclarationFields(doc) {
|
|
1974
2028
|
const decl = doc.declarations;
|
|
@@ -1995,20 +2049,29 @@ function findRawString(raw, keys) {
|
|
|
1995
2049
|
}
|
|
1996
2050
|
return void 0;
|
|
1997
2051
|
}
|
|
2052
|
+
function promoteRawFields(raw, mappings) {
|
|
2053
|
+
for (const { from, to } of mappings) {
|
|
2054
|
+
if (!raw[to] && raw[from]) {
|
|
2055
|
+
raw[to] = raw[from];
|
|
2056
|
+
}
|
|
2057
|
+
delete raw[from];
|
|
2058
|
+
}
|
|
2059
|
+
}
|
|
2060
|
+
function findRawOrDeclarationValue(raw, fields, lookup) {
|
|
2061
|
+
return (lookup.rawKey ? raw[lookup.rawKey] : void 0) || findFieldValue(fields, lookup.patterns, lookup.reject);
|
|
2062
|
+
}
|
|
2063
|
+
function promoteRawOrDeclarationString(raw, fields, targetKey, rawKeys, lookup) {
|
|
2064
|
+
if (raw[targetKey]) return;
|
|
2065
|
+
const value = findRawString(raw, rawKeys) ?? findFieldValue(fields, lookup.patterns, lookup.reject);
|
|
2066
|
+
if (value) raw[targetKey] = value;
|
|
2067
|
+
}
|
|
1998
2068
|
function promoteCarrierFields(doc) {
|
|
1999
2069
|
const raw = doc;
|
|
2000
|
-
|
|
2001
|
-
|
|
2002
|
-
|
|
2003
|
-
|
|
2004
|
-
|
|
2005
|
-
}
|
|
2006
|
-
if (!raw.carrierAdmittedStatus && raw.admittedStatus) {
|
|
2007
|
-
raw.carrierAdmittedStatus = raw.admittedStatus;
|
|
2008
|
-
}
|
|
2009
|
-
delete raw.naicNumber;
|
|
2010
|
-
delete raw.amBestRating;
|
|
2011
|
-
delete raw.admittedStatus;
|
|
2070
|
+
promoteRawFields(raw, [
|
|
2071
|
+
{ from: "naicNumber", to: "carrierNaicNumber" },
|
|
2072
|
+
{ from: "amBestRating", to: "carrierAmBestRating" },
|
|
2073
|
+
{ from: "admittedStatus", to: "carrierAdmittedStatus" }
|
|
2074
|
+
]);
|
|
2012
2075
|
if (!raw.insurer && raw.carrierLegalName) {
|
|
2013
2076
|
raw.insurer = {
|
|
2014
2077
|
legalName: raw.carrierLegalName,
|
|
@@ -2049,12 +2112,21 @@ var BROKER_ADDRESS_PATTERNS = ["brokerAddress", "agentAddress", "producerAddress
|
|
|
2049
2112
|
function promoteBroker(doc) {
|
|
2050
2113
|
const raw = doc;
|
|
2051
2114
|
const fields = getDeclarationFields(doc);
|
|
2052
|
-
const brokerAgency = raw
|
|
2053
|
-
|
|
2054
|
-
|
|
2055
|
-
|
|
2056
|
-
const
|
|
2057
|
-
|
|
2115
|
+
const brokerAgency = findRawOrDeclarationValue(raw, fields, {
|
|
2116
|
+
rawKey: "brokerAgency",
|
|
2117
|
+
patterns: BROKER_NAME_PATTERNS
|
|
2118
|
+
});
|
|
2119
|
+
const brokerContact = findRawOrDeclarationValue(raw, fields, {
|
|
2120
|
+
rawKey: "brokerContactName",
|
|
2121
|
+
patterns: BROKER_CONTACT_PATTERNS
|
|
2122
|
+
});
|
|
2123
|
+
const brokerLicense = findRawOrDeclarationValue(raw, fields, {
|
|
2124
|
+
rawKey: "brokerLicenseNumber",
|
|
2125
|
+
patterns: BROKER_LICENSE_PATTERNS
|
|
2126
|
+
});
|
|
2127
|
+
const brokerPhone = findRawOrDeclarationValue(raw, fields, { patterns: BROKER_PHONE_PATTERNS });
|
|
2128
|
+
const brokerEmail = findRawOrDeclarationValue(raw, fields, { patterns: BROKER_EMAIL_PATTERNS });
|
|
2129
|
+
const brokerAddress = findRawOrDeclarationValue(raw, fields, { patterns: BROKER_ADDRESS_PATTERNS });
|
|
2058
2130
|
if (brokerAgency) raw.brokerAgency = brokerAgency;
|
|
2059
2131
|
if (brokerContact) raw.brokerContactName = brokerContact;
|
|
2060
2132
|
if (brokerLicense) raw.brokerLicenseNumber = brokerLicense;
|
|
@@ -2430,20 +2502,27 @@ function taxFeeKey(item) {
|
|
|
2430
2502
|
item.type ?? ""
|
|
2431
2503
|
].join("|");
|
|
2432
2504
|
}
|
|
2505
|
+
function taxFeeItemFromField(field) {
|
|
2506
|
+
const type = taxFeeType(field.field);
|
|
2507
|
+
return {
|
|
2508
|
+
name: titleizeFieldName(field.field),
|
|
2509
|
+
amount: absorbNegative(field.value),
|
|
2510
|
+
...type ? { type } : {}
|
|
2511
|
+
};
|
|
2512
|
+
}
|
|
2433
2513
|
function absorbNegative(value) {
|
|
2434
2514
|
return value.replace(/^-\s*/, "").replace(/^\(\s*(.*?)\s*\)$/, "$1");
|
|
2435
2515
|
}
|
|
2436
2516
|
function promotePremium(doc) {
|
|
2437
2517
|
const raw = doc;
|
|
2438
2518
|
const fields = getDeclarationFields(doc);
|
|
2439
|
-
|
|
2440
|
-
|
|
2441
|
-
|
|
2442
|
-
}
|
|
2443
|
-
|
|
2444
|
-
|
|
2445
|
-
|
|
2446
|
-
}
|
|
2519
|
+
promoteRawOrDeclarationString(raw, fields, "premium", PREMIUM_RAW_KEYS, {
|
|
2520
|
+
patterns: PREMIUM_PATTERNS,
|
|
2521
|
+
reject: (field) => isTaxOrFeeField(field.field)
|
|
2522
|
+
});
|
|
2523
|
+
promoteRawOrDeclarationString(raw, fields, "totalCost", TOTAL_COST_RAW_KEYS, {
|
|
2524
|
+
patterns: TOTAL_COST_PATTERNS
|
|
2525
|
+
});
|
|
2447
2526
|
if (typeof raw.premium === "string") raw.premium = absorbNegative(raw.premium);
|
|
2448
2527
|
if (typeof raw.totalCost === "string") raw.totalCost = absorbNegative(raw.totalCost);
|
|
2449
2528
|
}
|
|
@@ -2461,11 +2540,7 @@ function synthesizeTaxesAndFees(doc) {
|
|
|
2461
2540
|
if (!field.value?.trim()) continue;
|
|
2462
2541
|
if (!isTaxOrFeeField(field.field)) continue;
|
|
2463
2542
|
if (isTotalCostField(field.field)) continue;
|
|
2464
|
-
const item =
|
|
2465
|
-
name: titleizeFieldName(field.field),
|
|
2466
|
-
amount: absorbNegative(field.value),
|
|
2467
|
-
...taxFeeType(field.field) ? { type: taxFeeType(field.field) } : {}
|
|
2468
|
-
};
|
|
2543
|
+
const item = taxFeeItemFromField(field);
|
|
2469
2544
|
byKey.set(taxFeeKey(item), item);
|
|
2470
2545
|
}
|
|
2471
2546
|
if (byKey.size > 0) {
|
|
@@ -2485,43 +2560,47 @@ function promoteExtractedFields(doc) {
|
|
|
2485
2560
|
|
|
2486
2561
|
// src/extraction/assembler.ts
|
|
2487
2562
|
function assembleDocument(documentId, documentType, memory) {
|
|
2488
|
-
const carrier = memory
|
|
2489
|
-
const insured = memory
|
|
2490
|
-
const coverages = memory
|
|
2491
|
-
const endorsements = memory
|
|
2492
|
-
const exclusions = memory
|
|
2493
|
-
const conditions = memory
|
|
2494
|
-
const premium = memory
|
|
2495
|
-
const declarations = memory
|
|
2496
|
-
const lossHistory = memory
|
|
2497
|
-
const
|
|
2498
|
-
const
|
|
2499
|
-
const
|
|
2500
|
-
const
|
|
2501
|
-
const
|
|
2502
|
-
const classify = memory.get("classify");
|
|
2563
|
+
const carrier = getCarrierInfo(memory);
|
|
2564
|
+
const insured = getNamedInsured(memory);
|
|
2565
|
+
const coverages = getCoverageLimits(memory);
|
|
2566
|
+
const endorsements = readMemoryRecord(memory, "endorsements");
|
|
2567
|
+
const exclusions = readMemoryRecord(memory, "exclusions");
|
|
2568
|
+
const conditions = readMemoryRecord(memory, "conditions");
|
|
2569
|
+
const premium = readMemoryRecord(memory, "premium_breakdown");
|
|
2570
|
+
const declarations = readMemoryRecord(memory, "declarations");
|
|
2571
|
+
const lossHistory = readMemoryRecord(memory, "loss_history");
|
|
2572
|
+
const supplementary = readMemoryRecord(memory, "supplementary");
|
|
2573
|
+
const formInventory = readMemoryRecord(memory, "form_inventory");
|
|
2574
|
+
const classify = readMemoryRecord(memory, "classify");
|
|
2575
|
+
const lossPayees = readRecordArray(insured, "lossPayees");
|
|
2576
|
+
const mortgageHolders = readRecordArray(insured, "mortgageHolders");
|
|
2503
2577
|
const base = {
|
|
2504
2578
|
id: documentId,
|
|
2505
|
-
carrier: carrier
|
|
2506
|
-
insuredName: insured
|
|
2507
|
-
coverages:
|
|
2508
|
-
policyTypes: classify
|
|
2579
|
+
carrier: readRecordValue(carrier, "carrierName") ?? "Unknown",
|
|
2580
|
+
insuredName: readRecordValue(insured, "insuredName") ?? "Unknown",
|
|
2581
|
+
coverages: getCoverageLimitCoverages(memory),
|
|
2582
|
+
policyTypes: readRecordValue(classify, "policyTypes"),
|
|
2509
2583
|
...sanitizeNulls(carrier ?? {}),
|
|
2510
2584
|
...sanitizeNulls(insured ?? {}),
|
|
2511
2585
|
// Map named_insured extractor's loss payees/mortgage holders to EndorsementParty shape
|
|
2512
|
-
...
|
|
2513
|
-
...
|
|
2586
|
+
...lossPayees && lossPayees.length > 0 ? { lossPayees: lossPayees.map((lp) => ({ ...lp, role: "loss_payee" })) } : {},
|
|
2587
|
+
...mortgageHolders && mortgageHolders.length > 0 ? {
|
|
2588
|
+
mortgageHolders: mortgageHolders.map((mh) => ({
|
|
2589
|
+
...mh,
|
|
2590
|
+
role: "mortgage_holder"
|
|
2591
|
+
}))
|
|
2592
|
+
} : {},
|
|
2514
2593
|
...sanitizeNulls(coverages ?? {}),
|
|
2515
2594
|
...sanitizeNulls(premium ?? {}),
|
|
2516
2595
|
...sanitizeNulls(supplementary ?? {}),
|
|
2517
|
-
supplementaryFacts: supplementary
|
|
2518
|
-
endorsements: endorsements
|
|
2519
|
-
exclusions: exclusions
|
|
2520
|
-
conditions: conditions
|
|
2521
|
-
sections:
|
|
2522
|
-
formInventory: formInventory
|
|
2523
|
-
definitions:
|
|
2524
|
-
coveredReasons:
|
|
2596
|
+
supplementaryFacts: readRecordValue(supplementary, "auxiliaryFacts"),
|
|
2597
|
+
endorsements: readRecordValue(endorsements, "endorsements"),
|
|
2598
|
+
exclusions: readRecordValue(exclusions, "exclusions"),
|
|
2599
|
+
conditions: readRecordValue(conditions, "conditions"),
|
|
2600
|
+
sections: getSections(memory),
|
|
2601
|
+
formInventory: readRecordValue(formInventory, "forms"),
|
|
2602
|
+
definitions: getDefinitions(memory),
|
|
2603
|
+
coveredReasons: getCoveredReasons(memory),
|
|
2525
2604
|
declarations: declarations ? sanitizeNulls(declarations) : void 0,
|
|
2526
2605
|
...sanitizeNulls(lossHistory ?? {})
|
|
2527
2606
|
};
|
|
@@ -2530,21 +2609,21 @@ function assembleDocument(documentId, documentType, memory) {
|
|
|
2530
2609
|
doc = {
|
|
2531
2610
|
...base,
|
|
2532
2611
|
type: "policy",
|
|
2533
|
-
policyNumber: carrier
|
|
2534
|
-
effectiveDate: carrier
|
|
2535
|
-
expirationDate: carrier
|
|
2536
|
-
policyTermType: carrier
|
|
2612
|
+
policyNumber: readRecordValue(carrier, "policyNumber") ?? readRecordValue(insured, "policyNumber") ?? "Unknown",
|
|
2613
|
+
effectiveDate: readRecordValue(carrier, "effectiveDate") ?? readRecordValue(insured, "effectiveDate") ?? "Unknown",
|
|
2614
|
+
expirationDate: readRecordValue(carrier, "expirationDate"),
|
|
2615
|
+
policyTermType: readRecordValue(carrier, "policyTermType")
|
|
2537
2616
|
};
|
|
2538
2617
|
} else {
|
|
2539
2618
|
doc = {
|
|
2540
2619
|
...base,
|
|
2541
2620
|
type: "quote",
|
|
2542
|
-
quoteNumber: carrier
|
|
2543
|
-
proposedEffectiveDate: carrier
|
|
2544
|
-
proposedExpirationDate: carrier
|
|
2545
|
-
subjectivities: coverages
|
|
2546
|
-
underwritingConditions: coverages
|
|
2547
|
-
premiumBreakdown: premium
|
|
2621
|
+
quoteNumber: readRecordValue(carrier, "quoteNumber") ?? "Unknown",
|
|
2622
|
+
proposedEffectiveDate: readRecordValue(carrier, "proposedEffectiveDate"),
|
|
2623
|
+
proposedExpirationDate: readRecordValue(carrier, "proposedExpirationDate"),
|
|
2624
|
+
subjectivities: readRecordValue(coverages, "subjectivities"),
|
|
2625
|
+
underwritingConditions: readRecordValue(coverages, "underwritingConditions"),
|
|
2626
|
+
premiumBreakdown: readRecordValue(premium, "premiumBreakdown")
|
|
2548
2627
|
};
|
|
2549
2628
|
}
|
|
2550
2629
|
promoteExtractedFields(doc);
|
|
@@ -2646,6 +2725,23 @@ ${block}`;
|
|
|
2646
2725
|
}
|
|
2647
2726
|
|
|
2648
2727
|
// src/extraction/formatter.ts
|
|
2728
|
+
var LONG_CONTENT_THRESHOLD = 1200;
|
|
2729
|
+
function shouldFormatContent(text) {
|
|
2730
|
+
const trimmed = text.trim();
|
|
2731
|
+
if (trimmed.length === 0) return false;
|
|
2732
|
+
if (trimmed.length >= LONG_CONTENT_THRESHOLD) return true;
|
|
2733
|
+
if (/```|~~~|<br\s*\/?>/i.test(trimmed)) return true;
|
|
2734
|
+
if (/(^|\s)(\*\*|__|`)/.test(trimmed)) return true;
|
|
2735
|
+
if (/!?\[[^\]]+\]\([^)]+\)/.test(trimmed)) return true;
|
|
2736
|
+
if (/^\s{0,3}#{1,6}\s*\S/m.test(trimmed)) return true;
|
|
2737
|
+
if (/^\s{0,6}(?:[-*+]|\d+[.)])\s+\S/m.test(trimmed)) return true;
|
|
2738
|
+
if (/\t|[^\S\r\n]{3,}|\n{3,}|[ \t]+$/m.test(text)) return true;
|
|
2739
|
+
const lines = trimmed.split(/\r?\n/).map((line) => line.trim()).filter(Boolean);
|
|
2740
|
+
if (lines.some((line) => (line.match(/\|/g)?.length ?? 0) >= 2)) return true;
|
|
2741
|
+
const spaceAlignedRows = lines.filter((line) => /\S\s{2,}\S\s{2,}\S/.test(line));
|
|
2742
|
+
if (spaceAlignedRows.length >= 2) return true;
|
|
2743
|
+
return false;
|
|
2744
|
+
}
|
|
2649
2745
|
function collectContentFields(doc) {
|
|
2650
2746
|
const entries = [];
|
|
2651
2747
|
let id = 0;
|
|
@@ -2749,7 +2845,7 @@ function applyFormattedContent(doc, entries, formatted) {
|
|
|
2749
2845
|
}
|
|
2750
2846
|
var MAX_ENTRIES_PER_BATCH = 20;
|
|
2751
2847
|
async function formatDocumentContent(doc, generateText, options) {
|
|
2752
|
-
const entries = collectContentFields(doc);
|
|
2848
|
+
const entries = collectContentFields(doc).filter((entry) => shouldFormatContent(entry.text));
|
|
2753
2849
|
const totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
2754
2850
|
if (entries.length === 0) {
|
|
2755
2851
|
return { document: doc, usage: totalUsage };
|
|
@@ -2826,11 +2922,22 @@ function chunkDocument(doc) {
|
|
|
2826
2922
|
if (policyTypesStr) base.policyTypes = policyTypesStr;
|
|
2827
2923
|
return base;
|
|
2828
2924
|
}
|
|
2829
|
-
|
|
2830
|
-
|
|
2831
|
-
|
|
2832
|
-
|
|
2833
|
-
|
|
2925
|
+
function lines(values) {
|
|
2926
|
+
return values.filter(Boolean).join("\n");
|
|
2927
|
+
}
|
|
2928
|
+
function pushChunk(idSuffix, type, text, metadata) {
|
|
2929
|
+
chunks.push({
|
|
2930
|
+
id: `${docId}:${idSuffix}`,
|
|
2931
|
+
documentId: docId,
|
|
2932
|
+
type,
|
|
2933
|
+
text,
|
|
2934
|
+
metadata: stringMetadata(metadata)
|
|
2935
|
+
});
|
|
2936
|
+
}
|
|
2937
|
+
pushChunk(
|
|
2938
|
+
"carrier_info:0",
|
|
2939
|
+
"carrier_info",
|
|
2940
|
+
lines([
|
|
2834
2941
|
`Carrier: ${doc.carrier}`,
|
|
2835
2942
|
doc.carrierLegalName ? `Legal Name: ${doc.carrierLegalName}` : null,
|
|
2836
2943
|
doc.carrierNaicNumber ? `NAIC: ${doc.carrierNaicNumber}` : null,
|
|
@@ -2847,94 +2954,83 @@ function chunkDocument(doc) {
|
|
|
2847
2954
|
doc.isPackage != null ? `Package: ${doc.isPackage ? "Yes" : "No"}` : null,
|
|
2848
2955
|
doc.security ? `Security: ${doc.security}` : null,
|
|
2849
2956
|
doc.policyTypes?.length ? `Policy Types: ${doc.policyTypes.join(", ")}` : null
|
|
2850
|
-
]
|
|
2851
|
-
|
|
2852
|
-
|
|
2957
|
+
]),
|
|
2958
|
+
{ carrier: doc.carrier, documentType: doc.type }
|
|
2959
|
+
);
|
|
2853
2960
|
if (doc.summary) {
|
|
2854
|
-
|
|
2855
|
-
id: `${docId}:declaration:summary`,
|
|
2856
|
-
documentId: docId,
|
|
2857
|
-
type: "declaration",
|
|
2858
|
-
text: `Policy Summary: ${doc.summary}`,
|
|
2859
|
-
metadata: stringMetadata({ documentType: doc.type })
|
|
2860
|
-
});
|
|
2961
|
+
pushChunk("declaration:summary", "declaration", `Policy Summary: ${doc.summary}`, { documentType: doc.type });
|
|
2861
2962
|
}
|
|
2862
2963
|
if (doc.type === "policy") {
|
|
2863
2964
|
const pol = doc;
|
|
2864
|
-
|
|
2865
|
-
|
|
2866
|
-
|
|
2867
|
-
|
|
2868
|
-
text: [
|
|
2965
|
+
pushChunk(
|
|
2966
|
+
"declaration:policy_details",
|
|
2967
|
+
"declaration",
|
|
2968
|
+
lines([
|
|
2869
2969
|
`Policy Number: ${pol.policyNumber}`,
|
|
2870
2970
|
`Effective Date: ${pol.effectiveDate}`,
|
|
2871
2971
|
pol.expirationDate ? `Expiration Date: ${pol.expirationDate}` : null,
|
|
2872
2972
|
pol.policyTermType ? `Term Type: ${pol.policyTermType}` : null,
|
|
2873
2973
|
pol.effectiveTime ? `Effective Time: ${pol.effectiveTime}` : null,
|
|
2874
2974
|
pol.nextReviewDate ? `Next Review Date: ${pol.nextReviewDate}` : null
|
|
2875
|
-
]
|
|
2876
|
-
|
|
2975
|
+
]),
|
|
2976
|
+
{
|
|
2877
2977
|
policyNumber: pol.policyNumber,
|
|
2878
2978
|
effectiveDate: pol.effectiveDate,
|
|
2879
2979
|
expirationDate: pol.expirationDate,
|
|
2880
2980
|
documentType: doc.type
|
|
2881
|
-
}
|
|
2882
|
-
|
|
2981
|
+
}
|
|
2982
|
+
);
|
|
2883
2983
|
} else {
|
|
2884
2984
|
const quote = doc;
|
|
2885
|
-
|
|
2886
|
-
|
|
2887
|
-
|
|
2888
|
-
|
|
2889
|
-
text: [
|
|
2985
|
+
pushChunk(
|
|
2986
|
+
"declaration:quote_details",
|
|
2987
|
+
"declaration",
|
|
2988
|
+
lines([
|
|
2890
2989
|
`Quote Number: ${quote.quoteNumber}`,
|
|
2891
2990
|
quote.proposedEffectiveDate ? `Proposed Effective Date: ${quote.proposedEffectiveDate}` : null,
|
|
2892
2991
|
quote.proposedExpirationDate ? `Proposed Expiration Date: ${quote.proposedExpirationDate}` : null,
|
|
2893
2992
|
quote.quoteExpirationDate ? `Quote Expiration Date: ${quote.quoteExpirationDate}` : null
|
|
2894
|
-
]
|
|
2895
|
-
|
|
2993
|
+
]),
|
|
2994
|
+
{
|
|
2896
2995
|
quoteNumber: quote.quoteNumber,
|
|
2897
2996
|
documentType: doc.type
|
|
2898
|
-
}
|
|
2899
|
-
|
|
2997
|
+
}
|
|
2998
|
+
);
|
|
2900
2999
|
}
|
|
2901
3000
|
if (doc.insurer) {
|
|
2902
|
-
|
|
2903
|
-
|
|
2904
|
-
|
|
2905
|
-
|
|
2906
|
-
text: [
|
|
3001
|
+
pushChunk(
|
|
3002
|
+
"party:insurer",
|
|
3003
|
+
"party",
|
|
3004
|
+
lines([
|
|
2907
3005
|
`Insurer: ${doc.insurer.legalName}`,
|
|
2908
3006
|
doc.insurer.naicNumber ? `NAIC: ${doc.insurer.naicNumber}` : null,
|
|
2909
3007
|
doc.insurer.amBestRating ? `AM Best Rating: ${doc.insurer.amBestRating}` : null,
|
|
2910
3008
|
doc.insurer.amBestNumber ? `AM Best Number: ${doc.insurer.amBestNumber}` : null,
|
|
2911
3009
|
doc.insurer.admittedStatus ? `Admitted Status: ${doc.insurer.admittedStatus}` : null,
|
|
2912
3010
|
doc.insurer.stateOfDomicile ? `State of Domicile: ${doc.insurer.stateOfDomicile}` : null
|
|
2913
|
-
]
|
|
2914
|
-
|
|
2915
|
-
|
|
3011
|
+
]),
|
|
3012
|
+
{ partyRole: "insurer", partyName: doc.insurer.legalName, documentType: doc.type }
|
|
3013
|
+
);
|
|
2916
3014
|
}
|
|
2917
3015
|
if (doc.producer) {
|
|
2918
|
-
|
|
2919
|
-
|
|
2920
|
-
|
|
2921
|
-
|
|
2922
|
-
text: [
|
|
3016
|
+
pushChunk(
|
|
3017
|
+
"party:producer",
|
|
3018
|
+
"party",
|
|
3019
|
+
lines([
|
|
2923
3020
|
`Producer/Broker: ${doc.producer.agencyName}`,
|
|
2924
3021
|
doc.producer.contactName ? `Contact: ${doc.producer.contactName}` : null,
|
|
2925
3022
|
doc.producer.licenseNumber ? `License: ${doc.producer.licenseNumber}` : null,
|
|
2926
3023
|
doc.producer.phone ? `Phone: ${doc.producer.phone}` : null,
|
|
2927
3024
|
doc.producer.email ? `Email: ${doc.producer.email}` : null,
|
|
2928
3025
|
doc.producer.address ? `Address: ${formatAddress(doc.producer.address)}` : null
|
|
2929
|
-
]
|
|
2930
|
-
|
|
2931
|
-
|
|
3026
|
+
]),
|
|
3027
|
+
{ partyRole: "producer", partyName: doc.producer.agencyName, documentType: doc.type }
|
|
3028
|
+
);
|
|
2932
3029
|
}
|
|
2933
|
-
|
|
2934
|
-
|
|
2935
|
-
|
|
2936
|
-
|
|
2937
|
-
text: [
|
|
3030
|
+
pushChunk(
|
|
3031
|
+
"named_insured:0",
|
|
3032
|
+
"named_insured",
|
|
3033
|
+
lines([
|
|
2938
3034
|
`Insured: ${doc.insuredName}`,
|
|
2939
3035
|
doc.insuredDba ? `DBA: ${doc.insuredDba}` : null,
|
|
2940
3036
|
doc.insuredEntityType ? `Entity Type: ${doc.insuredEntityType}` : null,
|
|
@@ -2942,36 +3038,34 @@ function chunkDocument(doc) {
|
|
|
2942
3038
|
doc.insuredSicCode ? `SIC: ${doc.insuredSicCode}` : null,
|
|
2943
3039
|
doc.insuredNaicsCode ? `NAICS: ${doc.insuredNaicsCode}` : null,
|
|
2944
3040
|
doc.insuredAddress ? `Address: ${formatAddress(doc.insuredAddress)}` : null
|
|
2945
|
-
]
|
|
2946
|
-
|
|
2947
|
-
|
|
3041
|
+
]),
|
|
3042
|
+
{ insuredName: doc.insuredName, documentType: doc.type }
|
|
3043
|
+
);
|
|
2948
3044
|
doc.additionalNamedInsureds?.forEach((insured, i) => {
|
|
2949
|
-
|
|
2950
|
-
|
|
2951
|
-
|
|
2952
|
-
|
|
2953
|
-
text: [
|
|
3045
|
+
pushChunk(
|
|
3046
|
+
`named_insured:${i + 1}`,
|
|
3047
|
+
"named_insured",
|
|
3048
|
+
lines([
|
|
2954
3049
|
`Additional Named Insured: ${insured.name}`,
|
|
2955
3050
|
insured.address ? `Address: ${formatAddress(insured.address)}` : null,
|
|
2956
3051
|
insured.relationship ? `Relationship: ${insured.relationship}` : null
|
|
2957
|
-
]
|
|
2958
|
-
|
|
2959
|
-
|
|
3052
|
+
]),
|
|
3053
|
+
{ insuredName: insured.name, role: "additional_named_insured", documentType: doc.type }
|
|
3054
|
+
);
|
|
2960
3055
|
});
|
|
2961
3056
|
doc.coverages.forEach((cov, i) => {
|
|
2962
|
-
|
|
2963
|
-
|
|
2964
|
-
|
|
2965
|
-
|
|
2966
|
-
text: [
|
|
3057
|
+
pushChunk(
|
|
3058
|
+
`coverage:${i}`,
|
|
3059
|
+
"coverage",
|
|
3060
|
+
lines([
|
|
2967
3061
|
`Coverage: ${cov.name}`,
|
|
2968
3062
|
`Limit: ${cov.limit}`,
|
|
2969
3063
|
cov.limitValueType ? `Limit Type: ${cov.limitValueType}` : null,
|
|
2970
3064
|
cov.deductible ? `Deductible: ${cov.deductible}` : null,
|
|
2971
3065
|
cov.deductibleValueType ? `Deductible Type: ${cov.deductibleValueType}` : null,
|
|
2972
3066
|
cov.originalContent ? `Source: ${cov.originalContent}` : null
|
|
2973
|
-
]
|
|
2974
|
-
|
|
3067
|
+
]),
|
|
3068
|
+
{
|
|
2975
3069
|
coverageName: cov.name,
|
|
2976
3070
|
limit: cov.limit,
|
|
2977
3071
|
limitValueType: cov.limitValueType,
|
|
@@ -2981,15 +3075,14 @@ function chunkDocument(doc) {
|
|
|
2981
3075
|
pageNumber: cov.pageNumber,
|
|
2982
3076
|
sectionRef: cov.sectionRef,
|
|
2983
3077
|
documentType: doc.type
|
|
2984
|
-
}
|
|
2985
|
-
|
|
3078
|
+
}
|
|
3079
|
+
);
|
|
2986
3080
|
});
|
|
2987
3081
|
doc.enrichedCoverages?.forEach((cov, i) => {
|
|
2988
|
-
|
|
2989
|
-
|
|
2990
|
-
|
|
2991
|
-
|
|
2992
|
-
text: [
|
|
3082
|
+
pushChunk(
|
|
3083
|
+
`coverage:enriched:${i}`,
|
|
3084
|
+
"coverage",
|
|
3085
|
+
lines([
|
|
2993
3086
|
`Coverage: ${cov.name}`,
|
|
2994
3087
|
cov.coverageCode ? `Code: ${cov.coverageCode}` : null,
|
|
2995
3088
|
`Limit: ${cov.limit}`,
|
|
@@ -3006,8 +3099,8 @@ function chunkDocument(doc) {
|
|
|
3006
3099
|
`Included: ${cov.included ? "Yes" : "No"}`,
|
|
3007
3100
|
cov.premium ? `Premium: ${cov.premium}` : null,
|
|
3008
3101
|
cov.originalContent ? `Source: ${cov.originalContent}` : null
|
|
3009
|
-
]
|
|
3010
|
-
|
|
3102
|
+
]),
|
|
3103
|
+
{
|
|
3011
3104
|
coverageName: cov.name,
|
|
3012
3105
|
coverageCode: cov.coverageCode,
|
|
3013
3106
|
limit: cov.limit,
|
|
@@ -3016,8 +3109,8 @@ function chunkDocument(doc) {
|
|
|
3016
3109
|
pageNumber: cov.pageNumber,
|
|
3017
3110
|
included: cov.included,
|
|
3018
3111
|
documentType: doc.type
|
|
3019
|
-
}
|
|
3020
|
-
|
|
3112
|
+
}
|
|
3113
|
+
);
|
|
3021
3114
|
});
|
|
3022
3115
|
if (doc.limits) {
|
|
3023
3116
|
const limitLines = ["Limit Schedule"];
|
|
@@ -3041,39 +3134,31 @@ function chunkDocument(doc) {
|
|
|
3041
3134
|
limitLines.push(`Employers Liability \u2014 Each Accident: ${lim.employersLiability.eachAccident}, Disease Policy Limit: ${lim.employersLiability.diseasePolicyLimit}, Disease Each Employee: ${lim.employersLiability.diseaseEachEmployee}`);
|
|
3042
3135
|
}
|
|
3043
3136
|
if (lim.defenseCostTreatment) limitLines.push(`Defense Cost Treatment: ${lim.defenseCostTreatment}`);
|
|
3044
|
-
|
|
3045
|
-
id: `${docId}:coverage:limit_schedule`,
|
|
3046
|
-
documentId: docId,
|
|
3047
|
-
type: "coverage",
|
|
3048
|
-
text: limitLines.join("\n"),
|
|
3049
|
-
metadata: stringMetadata({ coverageName: "limit_schedule", documentType: doc.type })
|
|
3050
|
-
});
|
|
3137
|
+
pushChunk("coverage:limit_schedule", "coverage", limitLines.join("\n"), { coverageName: "limit_schedule", documentType: doc.type });
|
|
3051
3138
|
lim.sublimits?.forEach((sub, i) => {
|
|
3052
|
-
|
|
3053
|
-
|
|
3054
|
-
|
|
3055
|
-
|
|
3056
|
-
text: [
|
|
3139
|
+
pushChunk(
|
|
3140
|
+
`coverage:sublimit:${i}`,
|
|
3141
|
+
"coverage",
|
|
3142
|
+
lines([
|
|
3057
3143
|
`Sublimit: ${sub.name}`,
|
|
3058
3144
|
`Limit: ${sub.limit}`,
|
|
3059
3145
|
sub.appliesTo ? `Applies To: ${sub.appliesTo}` : null,
|
|
3060
3146
|
sub.deductible ? `Deductible: ${sub.deductible}` : null
|
|
3061
|
-
]
|
|
3062
|
-
|
|
3063
|
-
|
|
3147
|
+
]),
|
|
3148
|
+
{ coverageName: sub.name, limit: sub.limit, documentType: doc.type }
|
|
3149
|
+
);
|
|
3064
3150
|
});
|
|
3065
3151
|
lim.sharedLimits?.forEach((sl, i) => {
|
|
3066
|
-
|
|
3067
|
-
|
|
3068
|
-
|
|
3069
|
-
|
|
3070
|
-
text: [
|
|
3152
|
+
pushChunk(
|
|
3153
|
+
`coverage:shared_limit:${i}`,
|
|
3154
|
+
"coverage",
|
|
3155
|
+
[
|
|
3071
3156
|
`Shared Limit: ${sl.description}`,
|
|
3072
3157
|
`Limit: ${sl.limit}`,
|
|
3073
3158
|
`Coverage Parts: ${sl.coverageParts.join(", ")}`
|
|
3074
3159
|
].join("\n"),
|
|
3075
|
-
|
|
3076
|
-
|
|
3160
|
+
{ coverageName: sl.description, limit: sl.limit, documentType: doc.type }
|
|
3161
|
+
);
|
|
3077
3162
|
});
|
|
3078
3163
|
}
|
|
3079
3164
|
if (doc.deductibles) {
|
|
@@ -3087,12 +3172,9 @@ function chunkDocument(doc) {
|
|
|
3087
3172
|
if (ded.waitingPeriod) dedLines.push(`Waiting Period: ${ded.waitingPeriod}`);
|
|
3088
3173
|
if (ded.appliesTo) dedLines.push(`Applies To: ${ded.appliesTo}`);
|
|
3089
3174
|
if (dedLines.length > 1) {
|
|
3090
|
-
|
|
3091
|
-
|
|
3092
|
-
|
|
3093
|
-
type: "coverage",
|
|
3094
|
-
text: dedLines.join("\n"),
|
|
3095
|
-
metadata: stringMetadata({ coverageName: "deductible_schedule", documentType: doc.type })
|
|
3175
|
+
pushChunk("coverage:deductible_schedule", "coverage", dedLines.join("\n"), {
|
|
3176
|
+
coverageName: "deductible_schedule",
|
|
3177
|
+
documentType: doc.type
|
|
3096
3178
|
});
|
|
3097
3179
|
}
|
|
3098
3180
|
}
|
|
@@ -3104,99 +3186,90 @@ function chunkDocument(doc) {
|
|
|
3104
3186
|
doc.extendedReportingPeriod?.supplementalPremium ? `Extended Reporting Period Premium: ${doc.extendedReportingPeriod.supplementalPremium}` : null
|
|
3105
3187
|
].filter(Boolean);
|
|
3106
3188
|
if (claimsMadeLines.length > 0) {
|
|
3107
|
-
|
|
3108
|
-
|
|
3109
|
-
|
|
3110
|
-
type: "coverage",
|
|
3111
|
-
text: claimsMadeLines.join("\n"),
|
|
3112
|
-
metadata: stringMetadata({ coverageName: "claims_made_details", documentType: doc.type })
|
|
3189
|
+
pushChunk("coverage:claims_made_details", "coverage", claimsMadeLines.join("\n"), {
|
|
3190
|
+
coverageName: "claims_made_details",
|
|
3191
|
+
documentType: doc.type
|
|
3113
3192
|
});
|
|
3114
3193
|
}
|
|
3115
3194
|
doc.formInventory?.forEach((form, i) => {
|
|
3116
|
-
|
|
3117
|
-
|
|
3118
|
-
|
|
3119
|
-
|
|
3120
|
-
text: [
|
|
3195
|
+
pushChunk(
|
|
3196
|
+
`declaration:form:${i}`,
|
|
3197
|
+
"declaration",
|
|
3198
|
+
lines([
|
|
3121
3199
|
`Form: ${form.formNumber}`,
|
|
3122
3200
|
form.title ? `Title: ${form.title}` : null,
|
|
3123
3201
|
`Type: ${form.formType}`,
|
|
3124
3202
|
form.editionDate ? `Edition: ${form.editionDate}` : null,
|
|
3125
3203
|
form.pageStart ? `Pages: ${form.pageStart}${form.pageEnd ? `-${form.pageEnd}` : ""}` : null
|
|
3126
|
-
]
|
|
3127
|
-
|
|
3204
|
+
]),
|
|
3205
|
+
{
|
|
3128
3206
|
formNumber: form.formNumber,
|
|
3129
3207
|
formType: form.formType,
|
|
3130
3208
|
documentType: doc.type
|
|
3131
|
-
}
|
|
3132
|
-
|
|
3209
|
+
}
|
|
3210
|
+
);
|
|
3133
3211
|
});
|
|
3134
3212
|
doc.endorsements?.forEach((end, i) => {
|
|
3135
|
-
|
|
3136
|
-
|
|
3137
|
-
|
|
3138
|
-
|
|
3139
|
-
text: `Endorsement: ${end.title}
|
|
3213
|
+
pushChunk(
|
|
3214
|
+
`endorsement:${i}`,
|
|
3215
|
+
"endorsement",
|
|
3216
|
+
`Endorsement: ${end.title}
|
|
3140
3217
|
${end.content}`.trim(),
|
|
3141
|
-
|
|
3218
|
+
{
|
|
3142
3219
|
endorsementType: end.endorsementType,
|
|
3143
3220
|
formNumber: end.formNumber,
|
|
3144
3221
|
pageStart: end.pageStart,
|
|
3145
3222
|
pageEnd: end.pageEnd,
|
|
3146
3223
|
documentType: doc.type
|
|
3147
|
-
}
|
|
3148
|
-
|
|
3224
|
+
}
|
|
3225
|
+
);
|
|
3149
3226
|
});
|
|
3150
3227
|
doc.exclusions?.forEach((exc, i) => {
|
|
3151
|
-
|
|
3152
|
-
|
|
3153
|
-
|
|
3154
|
-
|
|
3155
|
-
|
|
3156
|
-
${exc.content}`.trim(),
|
|
3157
|
-
metadata: stringMetadata({ formNumber: exc.formNumber, pageNumber: exc.pageNumber, documentType: doc.type })
|
|
3228
|
+
pushChunk(`exclusion:${i}`, "exclusion", `Exclusion: ${exc.name}
|
|
3229
|
+
${exc.content}`.trim(), {
|
|
3230
|
+
formNumber: exc.formNumber,
|
|
3231
|
+
pageNumber: exc.pageNumber,
|
|
3232
|
+
documentType: doc.type
|
|
3158
3233
|
});
|
|
3159
3234
|
});
|
|
3160
3235
|
doc.conditions?.forEach((cond, i) => {
|
|
3161
|
-
|
|
3162
|
-
|
|
3163
|
-
|
|
3164
|
-
|
|
3165
|
-
text: [
|
|
3236
|
+
pushChunk(
|
|
3237
|
+
`condition:${i}`,
|
|
3238
|
+
"condition",
|
|
3239
|
+
[
|
|
3166
3240
|
`Condition: ${cond.name}`,
|
|
3167
3241
|
`Type: ${cond.conditionType}`,
|
|
3168
3242
|
cond.content,
|
|
3169
3243
|
...cond.keyValues?.map((kv) => `${kv.key}: ${kv.value}`) ?? []
|
|
3170
3244
|
].join("\n"),
|
|
3171
|
-
|
|
3245
|
+
{
|
|
3172
3246
|
conditionName: cond.name,
|
|
3173
3247
|
conditionType: cond.conditionType,
|
|
3174
3248
|
pageNumber: cond.pageNumber,
|
|
3175
3249
|
documentType: doc.type
|
|
3176
|
-
}
|
|
3177
|
-
|
|
3250
|
+
}
|
|
3251
|
+
);
|
|
3178
3252
|
});
|
|
3179
3253
|
asRecordArray(extendedDoc.definitions).forEach((definition, i) => {
|
|
3180
3254
|
const term = firstString(definition, ["term", "name", "title"]) ?? `Definition ${i + 1}`;
|
|
3181
3255
|
const body = firstString(definition, ["definition", "content", "text", "meaning"]);
|
|
3182
|
-
|
|
3183
|
-
|
|
3184
|
-
|
|
3185
|
-
|
|
3186
|
-
text: [
|
|
3256
|
+
pushChunk(
|
|
3257
|
+
`definition:${i}`,
|
|
3258
|
+
"definition",
|
|
3259
|
+
lines([
|
|
3187
3260
|
`Definition: ${term}`,
|
|
3188
3261
|
body,
|
|
3189
3262
|
firstString(definition, ["originalContent", "source"]) ? `Source: ${firstString(definition, ["originalContent", "source"])}` : null
|
|
3190
|
-
]
|
|
3191
|
-
|
|
3263
|
+
]),
|
|
3264
|
+
{
|
|
3192
3265
|
term,
|
|
3193
3266
|
formNumber: firstString(definition, ["formNumber"]),
|
|
3194
3267
|
formTitle: firstString(definition, ["formTitle"]),
|
|
3195
3268
|
pageNumber: typeof definition.pageNumber === "number" ? definition.pageNumber : void 0,
|
|
3196
3269
|
sectionRef: firstString(definition, ["sectionRef", "sectionTitle"]),
|
|
3197
3270
|
documentType: doc.type
|
|
3198
|
-
}
|
|
3199
|
-
|
|
3271
|
+
}
|
|
3272
|
+
);
|
|
3200
3273
|
});
|
|
3201
3274
|
const coveredReasons = asRecordArray(extendedDoc.coveredReasons ?? extendedDoc.covered_reasons);
|
|
3202
3275
|
coveredReasons.forEach((coveredReason, i) => {
|
|
@@ -3204,18 +3277,17 @@ ${exc.content}`.trim(),
|
|
|
3204
3277
|
const coverageName = firstString(coveredReason, ["coverageName", "coverage", "coveragePart"]);
|
|
3205
3278
|
const reasonNumber = firstString(coveredReason, ["reasonNumber", "number"]);
|
|
3206
3279
|
const body = firstString(coveredReason, ["content", "description", "text", "coverageGrant"]);
|
|
3207
|
-
|
|
3208
|
-
|
|
3209
|
-
|
|
3210
|
-
|
|
3211
|
-
text: [
|
|
3280
|
+
pushChunk(
|
|
3281
|
+
`covered_reason:${i}`,
|
|
3282
|
+
"covered_reason",
|
|
3283
|
+
lines([
|
|
3212
3284
|
coverageName ? `Coverage: ${coverageName}` : null,
|
|
3213
3285
|
reasonNumber ? `Reason Number: ${reasonNumber}` : null,
|
|
3214
3286
|
`Covered Reason: ${title}`,
|
|
3215
3287
|
body,
|
|
3216
3288
|
firstString(coveredReason, ["originalContent", "source"]) ? `Source: ${firstString(coveredReason, ["originalContent", "source"])}` : null
|
|
3217
|
-
]
|
|
3218
|
-
|
|
3289
|
+
]),
|
|
3290
|
+
{
|
|
3219
3291
|
coverageName,
|
|
3220
3292
|
reasonNumber,
|
|
3221
3293
|
title,
|
|
@@ -3224,21 +3296,20 @@ ${exc.content}`.trim(),
|
|
|
3224
3296
|
pageNumber: typeof coveredReason.pageNumber === "number" ? coveredReason.pageNumber : void 0,
|
|
3225
3297
|
sectionRef: firstString(coveredReason, ["sectionRef", "sectionTitle"]),
|
|
3226
3298
|
documentType: doc.type
|
|
3227
|
-
}
|
|
3228
|
-
|
|
3299
|
+
}
|
|
3300
|
+
);
|
|
3229
3301
|
const conditions = Array.isArray(coveredReason.conditions) ? coveredReason.conditions.filter((condition) => typeof condition === "string" && condition.trim().length > 0) : [];
|
|
3230
3302
|
conditions.forEach((condition, conditionIndex) => {
|
|
3231
|
-
|
|
3232
|
-
|
|
3233
|
-
|
|
3234
|
-
|
|
3235
|
-
text: [
|
|
3303
|
+
pushChunk(
|
|
3304
|
+
`covered_reason:${i}:condition:${conditionIndex}`,
|
|
3305
|
+
"covered_reason",
|
|
3306
|
+
lines([
|
|
3236
3307
|
coverageName ? `Coverage: ${coverageName}` : null,
|
|
3237
3308
|
reasonNumber ? `Reason Number: ${reasonNumber}` : null,
|
|
3238
3309
|
`Covered Reason Condition: ${title}`,
|
|
3239
3310
|
condition
|
|
3240
|
-
]
|
|
3241
|
-
|
|
3311
|
+
]),
|
|
3312
|
+
{
|
|
3242
3313
|
coverageName,
|
|
3243
3314
|
reasonNumber,
|
|
3244
3315
|
title,
|
|
@@ -3248,8 +3319,8 @@ ${exc.content}`.trim(),
|
|
|
3248
3319
|
pageNumber: typeof coveredReason.pageNumber === "number" ? coveredReason.pageNumber : void 0,
|
|
3249
3320
|
sectionRef: firstString(coveredReason, ["sectionRef", "sectionTitle"]),
|
|
3250
3321
|
documentType: doc.type
|
|
3251
|
-
}
|
|
3252
|
-
|
|
3322
|
+
}
|
|
3323
|
+
);
|
|
3253
3324
|
});
|
|
3254
3325
|
});
|
|
3255
3326
|
if (doc.declarations) {
|
|
@@ -3264,50 +3335,42 @@ ${exc.content}`.trim(),
|
|
|
3264
3335
|
const declMeta = { documentType: doc.type };
|
|
3265
3336
|
if (typeof decl.formType === "string") declMeta.formType = decl.formType;
|
|
3266
3337
|
if (typeof decl.line === "string") declMeta.declarationLine = decl.line;
|
|
3267
|
-
|
|
3268
|
-
|
|
3269
|
-
documentId: docId,
|
|
3270
|
-
type: "declaration",
|
|
3271
|
-
text: `Declarations
|
|
3272
|
-
${declLines.join("\n")}`,
|
|
3273
|
-
metadata: stringMetadata(declMeta)
|
|
3274
|
-
});
|
|
3338
|
+
pushChunk("declaration:0", "declaration", `Declarations
|
|
3339
|
+
${declLines.join("\n")}`, declMeta);
|
|
3275
3340
|
}
|
|
3276
3341
|
}
|
|
3277
3342
|
doc.sections?.forEach((sec, i) => {
|
|
3278
3343
|
const hasSubsections = sec.subsections && sec.subsections.length > 0;
|
|
3279
3344
|
const contentLength = sec.content.length;
|
|
3280
3345
|
if (hasSubsections) {
|
|
3281
|
-
|
|
3282
|
-
|
|
3283
|
-
|
|
3284
|
-
|
|
3285
|
-
text: `Section: ${sec.title}
|
|
3346
|
+
pushChunk(
|
|
3347
|
+
`section:${i}`,
|
|
3348
|
+
"section",
|
|
3349
|
+
`Section: ${sec.title}
|
|
3286
3350
|
${sec.content}`,
|
|
3287
|
-
|
|
3351
|
+
{
|
|
3288
3352
|
sectionType: sec.type,
|
|
3289
3353
|
sectionNumber: sec.sectionNumber,
|
|
3290
3354
|
pageStart: sec.pageStart,
|
|
3291
3355
|
pageEnd: sec.pageEnd,
|
|
3292
3356
|
documentType: doc.type,
|
|
3293
3357
|
hasSubsections: "true"
|
|
3294
|
-
}
|
|
3295
|
-
|
|
3358
|
+
}
|
|
3359
|
+
);
|
|
3296
3360
|
sec.subsections.forEach((sub, j) => {
|
|
3297
|
-
|
|
3298
|
-
|
|
3299
|
-
|
|
3300
|
-
|
|
3301
|
-
text: `${sec.title} > ${sub.title}
|
|
3361
|
+
pushChunk(
|
|
3362
|
+
`section:${i}:sub:${j}`,
|
|
3363
|
+
"section",
|
|
3364
|
+
`${sec.title} > ${sub.title}
|
|
3302
3365
|
${sub.content}`,
|
|
3303
|
-
|
|
3366
|
+
{
|
|
3304
3367
|
sectionType: sec.type,
|
|
3305
3368
|
parentSection: sec.title,
|
|
3306
3369
|
sectionNumber: sub.sectionNumber,
|
|
3307
3370
|
pageNumber: sub.pageNumber,
|
|
3308
3371
|
documentType: doc.type
|
|
3309
|
-
}
|
|
3310
|
-
|
|
3372
|
+
}
|
|
3373
|
+
);
|
|
3311
3374
|
});
|
|
3312
3375
|
} else if (contentLength > 2e3) {
|
|
3313
3376
|
const paragraphs = sec.content.split(/\n\n+/);
|
|
@@ -3315,58 +3378,55 @@ ${sub.content}`,
|
|
|
3315
3378
|
let chunkIndex = 0;
|
|
3316
3379
|
for (const para of paragraphs) {
|
|
3317
3380
|
if (currentChunk.length + para.length > 1e3 && currentChunk.length > 0) {
|
|
3318
|
-
|
|
3319
|
-
|
|
3320
|
-
|
|
3321
|
-
|
|
3322
|
-
text: `Section: ${sec.title} (part ${chunkIndex + 1})
|
|
3381
|
+
pushChunk(
|
|
3382
|
+
`section:${i}:part:${chunkIndex}`,
|
|
3383
|
+
"section",
|
|
3384
|
+
`Section: ${sec.title} (part ${chunkIndex + 1})
|
|
3323
3385
|
${currentChunk.trim()}`,
|
|
3324
|
-
|
|
3386
|
+
{
|
|
3325
3387
|
sectionType: sec.type,
|
|
3326
3388
|
sectionNumber: sec.sectionNumber,
|
|
3327
3389
|
pageStart: sec.pageStart,
|
|
3328
3390
|
pageEnd: sec.pageEnd,
|
|
3329
3391
|
documentType: doc.type,
|
|
3330
3392
|
partIndex: chunkIndex
|
|
3331
|
-
}
|
|
3332
|
-
|
|
3393
|
+
}
|
|
3394
|
+
);
|
|
3333
3395
|
currentChunk = "";
|
|
3334
3396
|
chunkIndex++;
|
|
3335
3397
|
}
|
|
3336
3398
|
currentChunk += (currentChunk ? "\n\n" : "") + para;
|
|
3337
3399
|
}
|
|
3338
3400
|
if (currentChunk.trim()) {
|
|
3339
|
-
|
|
3340
|
-
|
|
3341
|
-
|
|
3342
|
-
|
|
3343
|
-
text: `Section: ${sec.title} (part ${chunkIndex + 1})
|
|
3401
|
+
pushChunk(
|
|
3402
|
+
`section:${i}:part:${chunkIndex}`,
|
|
3403
|
+
"section",
|
|
3404
|
+
`Section: ${sec.title} (part ${chunkIndex + 1})
|
|
3344
3405
|
${currentChunk.trim()}`,
|
|
3345
|
-
|
|
3406
|
+
{
|
|
3346
3407
|
sectionType: sec.type,
|
|
3347
3408
|
sectionNumber: sec.sectionNumber,
|
|
3348
3409
|
pageStart: sec.pageStart,
|
|
3349
3410
|
pageEnd: sec.pageEnd,
|
|
3350
3411
|
documentType: doc.type,
|
|
3351
3412
|
partIndex: chunkIndex
|
|
3352
|
-
}
|
|
3353
|
-
|
|
3413
|
+
}
|
|
3414
|
+
);
|
|
3354
3415
|
}
|
|
3355
3416
|
} else {
|
|
3356
|
-
|
|
3357
|
-
|
|
3358
|
-
|
|
3359
|
-
|
|
3360
|
-
text: `Section: ${sec.title}
|
|
3417
|
+
pushChunk(
|
|
3418
|
+
`section:${i}`,
|
|
3419
|
+
"section",
|
|
3420
|
+
`Section: ${sec.title}
|
|
3361
3421
|
${sec.content}`,
|
|
3362
|
-
|
|
3422
|
+
{
|
|
3363
3423
|
sectionType: sec.type,
|
|
3364
3424
|
sectionNumber: sec.sectionNumber,
|
|
3365
3425
|
pageStart: sec.pageStart,
|
|
3366
3426
|
pageEnd: sec.pageEnd,
|
|
3367
3427
|
documentType: doc.type
|
|
3368
|
-
}
|
|
3369
|
-
|
|
3428
|
+
}
|
|
3429
|
+
);
|
|
3370
3430
|
}
|
|
3371
3431
|
});
|
|
3372
3432
|
doc.locations?.forEach((loc, i) => {
|
|
@@ -4944,12 +5004,15 @@ var ReviewResultSchema = import_zod21.z.object({
|
|
|
4944
5004
|
description: import_zod21.z.string()
|
|
4945
5005
|
}))
|
|
4946
5006
|
});
|
|
4947
|
-
function buildReviewPrompt(templateExpected, extractedKeys, extractionSummary, pageMapSummary) {
|
|
4948
|
-
return `You are
|
|
5007
|
+
function buildReviewPrompt(templateExpected, extractedKeys, extractionSummary, pageMapSummary, extractorCatalog) {
|
|
5008
|
+
return `You are the extraction coordinator for an insurance-document agent system. Review the current extraction state, decide whether the result is complete enough, and choose any follow-up extractor tasks needed to improve it.
|
|
4949
5009
|
|
|
4950
5010
|
EXPECTED FIELDS (from document type template):
|
|
4951
5011
|
${templateExpected.map((f) => `- ${f}`).join("\n")}
|
|
4952
5012
|
|
|
5013
|
+
AVAILABLE FOLLOW-UP EXTRACTORS:
|
|
5014
|
+
${extractorCatalog}
|
|
5015
|
+
|
|
4953
5016
|
FIELDS ALREADY EXTRACTED:
|
|
4954
5017
|
${extractedKeys.map((f) => `- ${f}`).join("\n")}
|
|
4955
5018
|
|
|
@@ -4963,7 +5026,7 @@ Determine:
|
|
|
4963
5026
|
1. Is the extraction complete enough?
|
|
4964
5027
|
2. What fields are missing?
|
|
4965
5028
|
3. What quality issues are present?
|
|
4966
|
-
4.
|
|
5029
|
+
4. Which follow-up extraction tasks, if any, should be dispatched?
|
|
4967
5030
|
|
|
4968
5031
|
Mark the extraction as NOT complete if any of these are true:
|
|
4969
5032
|
- required fields are missing
|
|
@@ -4974,7 +5037,9 @@ Mark the extraction as NOT complete if any of these are true:
|
|
|
4974
5037
|
- page assignments suggest declaration, schedule, endorsement, exclusion, or condition pages were not actually extracted with the matching focused extractor
|
|
4975
5038
|
- a focused extractor exists but returned too little substance for the relevant pages
|
|
4976
5039
|
|
|
4977
|
-
When reviewing CURRENT EXTRACTION SUMMARY, compare the page-map counts to extracted counts.
|
|
5040
|
+
When reviewing CURRENT EXTRACTION SUMMARY, compare the page-map counts to extracted counts. If an assigned extractor produced no useful records, produce a quality issue and a narrow follow-up task over the mapped page range.
|
|
5041
|
+
|
|
5042
|
+
Choose follow-up tasks from AVAILABLE FOLLOW-UP EXTRACTORS. You may dispatch any listed extractor when the page map, current extraction summary, or quality evidence shows that the focused extraction is missing, generic, referential, or too thin. Do not invent extractor names.
|
|
4978
5043
|
|
|
4979
5044
|
Return JSON:
|
|
4980
5045
|
{
|
|
@@ -4986,7 +5051,7 @@ Return JSON:
|
|
|
4986
5051
|
]
|
|
4987
5052
|
}
|
|
4988
5053
|
|
|
4989
|
-
Use the page map to target follow-up extraction pages precisely. Prefer narrow, declaration/schedule-focused follow-up tasks over broad page ranges.
|
|
5054
|
+
Use the page map to target follow-up extraction pages precisely. Prefer narrow, declaration/schedule-focused follow-up tasks over broad page ranges. If no additional model work is likely to improve the extraction, return an empty additionalTasks array.
|
|
4990
5055
|
|
|
4991
5056
|
Respond with JSON only.`;
|
|
4992
5057
|
}
|
|
@@ -5521,6 +5586,7 @@ var SectionsSchema = import_zod32.z.object({
|
|
|
5521
5586
|
"policy_form",
|
|
5522
5587
|
"endorsement",
|
|
5523
5588
|
"application",
|
|
5589
|
+
"covered_reason",
|
|
5524
5590
|
"exclusion",
|
|
5525
5591
|
"condition",
|
|
5526
5592
|
"definition",
|
|
@@ -5544,6 +5610,7 @@ For each section, classify its type:
|
|
|
5544
5610
|
- "policy_form" \u2014 named ISO or proprietary forms (e.g. CG 00 01, IL 00 17). All sections within a named form should be typed as "policy_form"
|
|
5545
5611
|
- "endorsement" \u2014 standalone endorsements modifying the base policy
|
|
5546
5612
|
- "application" \u2014 the insurance application or supplemental application
|
|
5613
|
+
- "covered_reason" \u2014 affirmative grants of coverage, covered causes of loss, covered perils, or named covered events
|
|
5547
5614
|
- "insuring_agreement" \u2014 the insuring agreement clause (only if standalone, not inside a policy_form)
|
|
5548
5615
|
- "exclusion", "condition", "definition" \u2014 for standalone sections only
|
|
5549
5616
|
- "schedule" \u2014 coverage or rating schedules
|
|
@@ -5721,6 +5788,53 @@ Return JSON only.`;
|
|
|
5721
5788
|
}
|
|
5722
5789
|
|
|
5723
5790
|
// src/prompts/extractors/index.ts
|
|
5791
|
+
function asRecord(data) {
|
|
5792
|
+
return data && typeof data === "object" ? data : void 0;
|
|
5793
|
+
}
|
|
5794
|
+
function getSections2(data) {
|
|
5795
|
+
const sections = asRecord(data)?.sections;
|
|
5796
|
+
return Array.isArray(sections) ? sections : [];
|
|
5797
|
+
}
|
|
5798
|
+
function isCoveredReasonsEmpty(data) {
|
|
5799
|
+
const record = asRecord(data);
|
|
5800
|
+
if (!record) return true;
|
|
5801
|
+
const coveredReasons = Array.isArray(record.coveredReasons) ? record.coveredReasons : Array.isArray(record.covered_reasons) ? record.covered_reasons : [];
|
|
5802
|
+
return coveredReasons.length === 0;
|
|
5803
|
+
}
|
|
5804
|
+
function isDefinitionsEmpty(data) {
|
|
5805
|
+
const definitions = asRecord(data)?.definitions;
|
|
5806
|
+
return !Array.isArray(definitions) || definitions.length === 0;
|
|
5807
|
+
}
|
|
5808
|
+
function sectionLooksLikeCoveredReason(section) {
|
|
5809
|
+
const type = String(section.type ?? "").toLowerCase();
|
|
5810
|
+
const title = String(section.title ?? "").toLowerCase();
|
|
5811
|
+
return type === "covered_reason" || title.includes("covered cause") || title.includes("covered reason") || title.includes("covered peril") || title.includes("named peril") || title.includes("insuring agreement");
|
|
5812
|
+
}
|
|
5813
|
+
function deriveCoveredReasonsFromSections(data) {
|
|
5814
|
+
const coveredReasons = getSections2(data).filter(sectionLooksLikeCoveredReason).map((section) => ({
|
|
5815
|
+
coverageName: String(section.coverageName ?? section.formTitle ?? section.title ?? "Covered Reasons"),
|
|
5816
|
+
title: typeof section.title === "string" ? section.title : void 0,
|
|
5817
|
+
content: String(section.content ?? ""),
|
|
5818
|
+
pageNumber: typeof section.pageStart === "number" ? section.pageStart : void 0,
|
|
5819
|
+
formNumber: typeof section.formNumber === "string" ? section.formNumber : void 0,
|
|
5820
|
+
formTitle: typeof section.formTitle === "string" ? section.formTitle : void 0,
|
|
5821
|
+
sectionRef: typeof section.sectionNumber === "string" ? section.sectionNumber : void 0,
|
|
5822
|
+
originalContent: typeof section.content === "string" ? section.content.slice(0, 500) : void 0
|
|
5823
|
+
})).filter((coveredReason) => coveredReason.content.trim().length > 0);
|
|
5824
|
+
return coveredReasons.length > 0 ? { coveredReasons } : void 0;
|
|
5825
|
+
}
|
|
5826
|
+
function deriveDefinitionsFromSections(data) {
|
|
5827
|
+
const definitions = getSections2(data).filter((section) => String(section.type ?? "").toLowerCase() === "definition").map((section) => ({
|
|
5828
|
+
term: String(section.title ?? "Definitions"),
|
|
5829
|
+
definition: String(section.content ?? ""),
|
|
5830
|
+
pageNumber: typeof section.pageStart === "number" ? section.pageStart : void 0,
|
|
5831
|
+
formNumber: typeof section.formNumber === "string" ? section.formNumber : void 0,
|
|
5832
|
+
formTitle: typeof section.formTitle === "string" ? section.formTitle : void 0,
|
|
5833
|
+
sectionRef: typeof section.sectionNumber === "string" ? section.sectionNumber : void 0,
|
|
5834
|
+
originalContent: typeof section.content === "string" ? section.content.slice(0, 500) : void 0
|
|
5835
|
+
})).filter((definition) => definition.definition.trim().length > 0);
|
|
5836
|
+
return definitions.length > 0 ? { definitions } : void 0;
|
|
5837
|
+
}
|
|
5724
5838
|
var EXTRACTORS = {
|
|
5725
5839
|
carrier_info: { buildPrompt: buildCarrierInfoPrompt, schema: CarrierInfoSchema, maxTokens: 2048 },
|
|
5726
5840
|
named_insured: { buildPrompt: buildNamedInsuredPrompt, schema: NamedInsuredSchema2, maxTokens: 2048 },
|
|
@@ -5733,12 +5847,36 @@ var EXTRACTORS = {
|
|
|
5733
5847
|
loss_history: { buildPrompt: buildLossHistoryPrompt, schema: LossHistorySchema, maxTokens: 4096 },
|
|
5734
5848
|
sections: { buildPrompt: buildSectionsPrompt, schema: SectionsSchema, maxTokens: 8192 },
|
|
5735
5849
|
supplementary: { buildPrompt: buildSupplementaryPrompt, schema: SupplementarySchema, maxTokens: 2048 },
|
|
5736
|
-
definitions: {
|
|
5737
|
-
|
|
5850
|
+
definitions: {
|
|
5851
|
+
buildPrompt: buildDefinitionsPrompt,
|
|
5852
|
+
schema: DefinitionsSchema,
|
|
5853
|
+
maxTokens: 8192,
|
|
5854
|
+
fallback: {
|
|
5855
|
+
extractorName: "sections",
|
|
5856
|
+
isEmpty: isDefinitionsEmpty,
|
|
5857
|
+
deriveFocusedResult: deriveDefinitionsFromSections
|
|
5858
|
+
}
|
|
5859
|
+
},
|
|
5860
|
+
covered_reasons: {
|
|
5861
|
+
buildPrompt: buildCoveredReasonsPrompt,
|
|
5862
|
+
schema: CoveredReasonsSchema,
|
|
5863
|
+
maxTokens: 8192,
|
|
5864
|
+
fallback: {
|
|
5865
|
+
extractorName: "sections",
|
|
5866
|
+
isEmpty: isCoveredReasonsEmpty,
|
|
5867
|
+
deriveFocusedResult: deriveCoveredReasonsFromSections
|
|
5868
|
+
}
|
|
5869
|
+
}
|
|
5738
5870
|
};
|
|
5739
5871
|
function getExtractor(name) {
|
|
5740
5872
|
return EXTRACTORS[name];
|
|
5741
5873
|
}
|
|
5874
|
+
function formatExtractorCatalogForPrompt() {
|
|
5875
|
+
return Object.entries(EXTRACTORS).map(([name, extractor]) => {
|
|
5876
|
+
const fallback = extractor.fallback ? `; fallback: ${extractor.fallback.extractorName}` : "";
|
|
5877
|
+
return `- ${name} (maxTokens: ${extractor.maxTokens ?? 4096}${fallback})`;
|
|
5878
|
+
}).join("\n");
|
|
5879
|
+
}
|
|
5742
5880
|
|
|
5743
5881
|
// src/extraction/resolve-referential.ts
|
|
5744
5882
|
var import_zod37 = require("zod");
|
|
@@ -5790,18 +5928,124 @@ Your task:
|
|
|
5790
5928
|
Return JSON only.`;
|
|
5791
5929
|
}
|
|
5792
5930
|
|
|
5793
|
-
// src/extraction/
|
|
5931
|
+
// src/extraction/heuristics.ts
|
|
5794
5932
|
function looksReferential(value) {
|
|
5795
5933
|
if (typeof value !== "string") return false;
|
|
5796
5934
|
const normalized = value.toLowerCase();
|
|
5797
5935
|
return normalized.includes("shown in the declarations") || normalized.includes("shown in declarations") || normalized.includes("shown in the schedule") || normalized.includes("as stated") || normalized.includes("if applicable");
|
|
5798
5936
|
}
|
|
5937
|
+
function looksCoveredReasonSection(section) {
|
|
5938
|
+
const title = String(section.title ?? "").toLowerCase();
|
|
5939
|
+
const type = String(section.type ?? "").toLowerCase();
|
|
5940
|
+
return type === "covered_reason" || title.includes("covered cause") || title.includes("covered reason") || title.includes("covered peril");
|
|
5941
|
+
}
|
|
5942
|
+
|
|
5943
|
+
// src/extraction/referential-workflow.ts
|
|
5944
|
+
function normalizeText(value) {
|
|
5945
|
+
return typeof value === "string" ? value.trim().toLowerCase() : "";
|
|
5946
|
+
}
|
|
5947
|
+
function containsTarget(value, target) {
|
|
5948
|
+
const normalizedValue = normalizeText(value);
|
|
5949
|
+
return Boolean(normalizedValue && target && normalizedValue.includes(target));
|
|
5950
|
+
}
|
|
5951
|
+
function pageRangeFrom(startPage, endPage) {
|
|
5952
|
+
if (typeof startPage !== "number" || !Number.isFinite(startPage) || startPage <= 0) {
|
|
5953
|
+
return void 0;
|
|
5954
|
+
}
|
|
5955
|
+
const normalizedEnd = typeof endPage === "number" && Number.isFinite(endPage) && endPage >= startPage ? endPage : startPage;
|
|
5956
|
+
return { startPage, endPage: normalizedEnd };
|
|
5957
|
+
}
|
|
5958
|
+
function parseReferentialTarget(rawTarget) {
|
|
5959
|
+
const raw = rawTarget?.trim() || "unknown";
|
|
5960
|
+
const normalized = raw.toLowerCase();
|
|
5961
|
+
if (normalized === "unknown") return { raw, normalized, kind: "unknown" };
|
|
5962
|
+
if (/declarations?|dec\b|decs\b/.test(normalized)) return { raw, normalized, kind: "declarations" };
|
|
5963
|
+
if (/schedule|scheduled/.test(normalized)) return { raw, normalized, kind: "schedule" };
|
|
5964
|
+
if (/\bitem\b/.test(normalized)) return { raw, normalized, kind: "item" };
|
|
5965
|
+
if (/premises?|location|building/.test(normalized)) return { raw, normalized, kind: "premises" };
|
|
5966
|
+
if (/\bsection\b/.test(normalized)) return { raw, normalized, kind: "section" };
|
|
5967
|
+
if (/policy|coverage\s+part|coverage\s+form/.test(normalized)) return { raw, normalized, kind: "policy" };
|
|
5968
|
+
return { raw, normalized, kind: "unknown" };
|
|
5969
|
+
}
|
|
5970
|
+
function findLocalReferentialPages(params) {
|
|
5971
|
+
const targetLower = params.referenceTarget.toLowerCase();
|
|
5972
|
+
for (const section of params.sections) {
|
|
5973
|
+
if (containsTarget(section.title, targetLower)) {
|
|
5974
|
+
const range = pageRangeFrom(section.pageStart, section.pageEnd);
|
|
5975
|
+
if (range) return range;
|
|
5976
|
+
}
|
|
5977
|
+
}
|
|
5978
|
+
for (const form of params.formInventory) {
|
|
5979
|
+
const titleMatch = containsTarget(form.title, targetLower);
|
|
5980
|
+
const typeMatch = containsTarget(form.formType, targetLower);
|
|
5981
|
+
const numberMatch = containsTarget(form.formNumber, targetLower);
|
|
5982
|
+
if (titleMatch || typeMatch || numberMatch) {
|
|
5983
|
+
const range = pageRangeFrom(form.pageStart, form.pageEnd);
|
|
5984
|
+
if (range) return range;
|
|
5985
|
+
}
|
|
5986
|
+
}
|
|
5987
|
+
return void 0;
|
|
5988
|
+
}
|
|
5989
|
+
function findDeclarationsSchedulePages(parsedTarget, formInventory) {
|
|
5990
|
+
for (const form of formInventory) {
|
|
5991
|
+
const formType = normalizeText(form.formType);
|
|
5992
|
+
const title = normalizeText(form.title);
|
|
5993
|
+
const matchesDeclarations = formType === "declarations" || /declarations?|dec\b|decs\b/.test(title);
|
|
5994
|
+
const matchesSchedule = /schedule|scheduled|coverage/.test(title) || formType === "coverage";
|
|
5995
|
+
const shouldUse = parsedTarget.kind === "declarations" ? matchesDeclarations : parsedTarget.kind === "schedule" || parsedTarget.kind === "item" || parsedTarget.kind === "premises" ? matchesSchedule || matchesDeclarations : parsedTarget.kind === "policy" ? matchesDeclarations || matchesSchedule : false;
|
|
5996
|
+
if (shouldUse) {
|
|
5997
|
+
const range = pageRangeFrom(form.pageStart, form.pageEnd);
|
|
5998
|
+
if (range) return range;
|
|
5999
|
+
}
|
|
6000
|
+
}
|
|
6001
|
+
return void 0;
|
|
6002
|
+
}
|
|
6003
|
+
function findSectionPages(parsedTarget, sections) {
|
|
6004
|
+
for (const section of sections) {
|
|
6005
|
+
const title = normalizeText(section.title);
|
|
6006
|
+
const type = normalizeText(section.type);
|
|
6007
|
+
const matchesKind = parsedTarget.kind === "declarations" && (type === "declarations" || /declarations?/.test(title)) || parsedTarget.kind === "schedule" && (type === "schedule" || /schedule|scheduled/.test(title)) || parsedTarget.kind === "premises" && /premises?|location|building/.test(title) || parsedTarget.kind === "item" && /\bitem\b|schedule|scheduled/.test(title) || parsedTarget.kind === "section" && containsTarget(title, parsedTarget.normalized);
|
|
6008
|
+
if (matchesKind) {
|
|
6009
|
+
const range = pageRangeFrom(section.pageStart, section.pageEnd);
|
|
6010
|
+
if (range) return range;
|
|
6011
|
+
}
|
|
6012
|
+
}
|
|
6013
|
+
return void 0;
|
|
6014
|
+
}
|
|
6015
|
+
function decideReferentialResolutionAction(params) {
|
|
6016
|
+
if (params.localPageRange) {
|
|
6017
|
+
return { kind: "lookup_pages", source: "local", pageRange: params.localPageRange };
|
|
6018
|
+
}
|
|
6019
|
+
const parsedTarget = parseReferentialTarget(params.referenceTarget);
|
|
6020
|
+
const declarationsScheduleRange = findDeclarationsSchedulePages(parsedTarget, params.formInventory);
|
|
6021
|
+
if (declarationsScheduleRange) {
|
|
6022
|
+
return {
|
|
6023
|
+
kind: "lookup_pages",
|
|
6024
|
+
source: "declarations_schedule",
|
|
6025
|
+
pageRange: declarationsScheduleRange
|
|
6026
|
+
};
|
|
6027
|
+
}
|
|
6028
|
+
const sectionRange = findSectionPages(parsedTarget, params.sections);
|
|
6029
|
+
if (sectionRange) {
|
|
6030
|
+
return { kind: "lookup_pages", source: "sections", pageRange: sectionRange };
|
|
6031
|
+
}
|
|
6032
|
+
if (parsedTarget.kind === "unknown") {
|
|
6033
|
+
return { kind: "skip", reason: "no concrete reference target" };
|
|
6034
|
+
}
|
|
6035
|
+
return { kind: "page_location" };
|
|
6036
|
+
}
|
|
6037
|
+
|
|
6038
|
+
// src/extraction/resolve-referential.ts
|
|
5799
6039
|
function parseReferenceTarget(text) {
|
|
5800
6040
|
if (typeof text !== "string") return void 0;
|
|
5801
6041
|
const normalized = text.trim();
|
|
5802
6042
|
if (!normalized) return void 0;
|
|
5803
6043
|
const sectionMatch = normalized.match(/\b(Section\s+\d+[A-Za-z]?)/i);
|
|
5804
6044
|
if (sectionMatch) return sectionMatch[1];
|
|
6045
|
+
const itemMatch = normalized.match(/\b(Item\s+\d+[A-Za-z]?)/i);
|
|
6046
|
+
if (itemMatch) return itemMatch[1];
|
|
6047
|
+
const premisesMatch = normalized.match(/\b(Premises?(?:\s+No\.?\s*\d+[A-Za-z]?|\s+\d+[A-Za-z]?)?)/i);
|
|
6048
|
+
if (premisesMatch) return premisesMatch[1].trim();
|
|
5805
6049
|
if (/declarations/i.test(normalized)) return "Declarations";
|
|
5806
6050
|
const scheduleMatch = normalized.match(/\b(Schedule(?:\s+of\s+[A-Za-z ]+)?)/i);
|
|
5807
6051
|
if (scheduleMatch) return scheduleMatch[1].trim();
|
|
@@ -5827,26 +6071,31 @@ async function findReferencedPages(params) {
|
|
|
5827
6071
|
pageCount,
|
|
5828
6072
|
generateObject,
|
|
5829
6073
|
providerOptions,
|
|
6074
|
+
trackUsage,
|
|
5830
6075
|
log
|
|
5831
6076
|
} = params;
|
|
5832
|
-
const
|
|
5833
|
-
|
|
5834
|
-
|
|
5835
|
-
|
|
5836
|
-
|
|
5837
|
-
|
|
5838
|
-
|
|
5839
|
-
|
|
6077
|
+
const localPageRange = findLocalReferentialPages({
|
|
6078
|
+
referenceTarget,
|
|
6079
|
+
sections,
|
|
6080
|
+
formInventory
|
|
6081
|
+
});
|
|
6082
|
+
const action = decideReferentialResolutionAction({
|
|
6083
|
+
referenceTarget,
|
|
6084
|
+
sections,
|
|
6085
|
+
formInventory,
|
|
6086
|
+
localPageRange
|
|
6087
|
+
});
|
|
6088
|
+
if (action.kind === "lookup_pages") {
|
|
6089
|
+
await log?.(
|
|
6090
|
+
`Referential target "${referenceTarget}" resolved to pages ${action.pageRange.startPage}-${action.pageRange.endPage} via ${action.source}.`
|
|
6091
|
+
);
|
|
6092
|
+
return action.pageRange;
|
|
5840
6093
|
}
|
|
5841
|
-
|
|
5842
|
-
|
|
5843
|
-
|
|
5844
|
-
|
|
5845
|
-
|
|
5846
|
-
startPage: form.pageStart,
|
|
5847
|
-
endPage: form.pageEnd ?? form.pageStart
|
|
5848
|
-
};
|
|
5849
|
-
}
|
|
6094
|
+
if (action.kind === "skip") {
|
|
6095
|
+
await log?.(
|
|
6096
|
+
`Skipping referential target "${referenceTarget}": ${action.reason}.`
|
|
6097
|
+
);
|
|
6098
|
+
return void 0;
|
|
5850
6099
|
}
|
|
5851
6100
|
try {
|
|
5852
6101
|
const result = await safeGenerateObject(
|
|
@@ -5874,6 +6123,7 @@ Return JSON only.`,
|
|
|
5874
6123
|
)
|
|
5875
6124
|
}
|
|
5876
6125
|
);
|
|
6126
|
+
trackUsage?.(result.usage);
|
|
5877
6127
|
if (result.object.startPage > 0 && result.object.endPage > 0) {
|
|
5878
6128
|
return {
|
|
5879
6129
|
startPage: result.object.startPage,
|
|
@@ -5931,7 +6181,9 @@ async function resolveReferentialCoverages(params) {
|
|
|
5931
6181
|
for (let i = 0; i < referentialCoverages.length; i++) {
|
|
5932
6182
|
const cov = referentialCoverages[i];
|
|
5933
6183
|
const refString = (looksReferential(cov.limit) ? cov.limit : void 0) ?? (looksReferential(cov.deductible) ? cov.deductible : void 0) ?? cov.limit ?? "";
|
|
5934
|
-
const
|
|
6184
|
+
const sectionRef = typeof cov.sectionRef === "string" ? cov.sectionRef : "";
|
|
6185
|
+
const parsedTarget = parseReferenceTarget(refString) ?? parseReferenceTarget(sectionRef) ?? sectionRef;
|
|
6186
|
+
const target = parsedTarget || "unknown";
|
|
5935
6187
|
const group = targetGroups.get(target) ?? [];
|
|
5936
6188
|
group.push({ coverage: cov, index: i });
|
|
5937
6189
|
targetGroups.set(target, group);
|
|
@@ -5955,6 +6207,7 @@ async function resolveReferentialCoverages(params) {
|
|
|
5955
6207
|
pageCount,
|
|
5956
6208
|
generateObject,
|
|
5957
6209
|
providerOptions,
|
|
6210
|
+
trackUsage,
|
|
5958
6211
|
log
|
|
5959
6212
|
});
|
|
5960
6213
|
if (!pageRange) {
|
|
@@ -6072,6 +6325,78 @@ async function resolveReferentialCoverages(params) {
|
|
|
6072
6325
|
};
|
|
6073
6326
|
}
|
|
6074
6327
|
|
|
6328
|
+
// src/extraction/focused-dispatch.ts
|
|
6329
|
+
async function runFocusedExtractorWithFallback(params) {
|
|
6330
|
+
const {
|
|
6331
|
+
task,
|
|
6332
|
+
pdfInput,
|
|
6333
|
+
generateObject,
|
|
6334
|
+
convertPdfToImages,
|
|
6335
|
+
providerOptions,
|
|
6336
|
+
trackUsage,
|
|
6337
|
+
log
|
|
6338
|
+
} = params;
|
|
6339
|
+
const ext = getExtractor(task.extractorName);
|
|
6340
|
+
if (!ext) {
|
|
6341
|
+
await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
|
|
6342
|
+
return null;
|
|
6343
|
+
}
|
|
6344
|
+
try {
|
|
6345
|
+
const result = await runExtractor({
|
|
6346
|
+
name: task.extractorName,
|
|
6347
|
+
prompt: ext.buildPrompt(),
|
|
6348
|
+
schema: ext.schema,
|
|
6349
|
+
pdfInput,
|
|
6350
|
+
startPage: task.startPage,
|
|
6351
|
+
endPage: task.endPage,
|
|
6352
|
+
generateObject,
|
|
6353
|
+
convertPdfToImages,
|
|
6354
|
+
maxTokens: ext.maxTokens ?? 4096,
|
|
6355
|
+
providerOptions
|
|
6356
|
+
});
|
|
6357
|
+
trackUsage(result.usage);
|
|
6358
|
+
if (!ext.fallback?.isEmpty(result.data)) {
|
|
6359
|
+
return result;
|
|
6360
|
+
}
|
|
6361
|
+
if (!ext.fallback) {
|
|
6362
|
+
return result;
|
|
6363
|
+
}
|
|
6364
|
+
} catch (error) {
|
|
6365
|
+
await log?.(`Extractor ${task.extractorName} failed: ${error}`);
|
|
6366
|
+
if (!ext.fallback) {
|
|
6367
|
+
return null;
|
|
6368
|
+
}
|
|
6369
|
+
}
|
|
6370
|
+
const fallbackExt = getExtractor(ext.fallback.extractorName);
|
|
6371
|
+
if (!fallbackExt) return null;
|
|
6372
|
+
await log?.(
|
|
6373
|
+
`Extractor ${task.extractorName} produced no usable records; trying ${ext.fallback.extractorName} fallback for pages ${task.startPage}-${task.endPage}`
|
|
6374
|
+
);
|
|
6375
|
+
try {
|
|
6376
|
+
const fallbackResult = await runExtractor({
|
|
6377
|
+
name: ext.fallback.extractorName,
|
|
6378
|
+
prompt: fallbackExt.buildPrompt(),
|
|
6379
|
+
schema: fallbackExt.schema,
|
|
6380
|
+
pdfInput,
|
|
6381
|
+
startPage: task.startPage,
|
|
6382
|
+
endPage: task.endPage,
|
|
6383
|
+
generateObject,
|
|
6384
|
+
convertPdfToImages,
|
|
6385
|
+
maxTokens: fallbackExt.maxTokens ?? 4096,
|
|
6386
|
+
providerOptions
|
|
6387
|
+
});
|
|
6388
|
+
trackUsage(fallbackResult.usage);
|
|
6389
|
+
const focusedData = ext.fallback.deriveFocusedResult(fallbackResult.data);
|
|
6390
|
+
return focusedData ? [
|
|
6391
|
+
fallbackResult,
|
|
6392
|
+
{ name: task.extractorName, data: focusedData, usage: void 0 }
|
|
6393
|
+
] : fallbackResult;
|
|
6394
|
+
} catch (fallbackError) {
|
|
6395
|
+
await log?.(`${ext.fallback.extractorName} fallback for ${task.extractorName} failed: ${fallbackError}`);
|
|
6396
|
+
return null;
|
|
6397
|
+
}
|
|
6398
|
+
}
|
|
6399
|
+
|
|
6075
6400
|
// src/core/quality.ts
|
|
6076
6401
|
function evaluateQualityGate(params) {
|
|
6077
6402
|
const { issues, hasRoundWarnings = false } = params;
|
|
@@ -6108,11 +6433,6 @@ function addFormEntry(inventory, formNumber, source, extra) {
|
|
|
6108
6433
|
sources: [source]
|
|
6109
6434
|
});
|
|
6110
6435
|
}
|
|
6111
|
-
function looksReferential2(value) {
|
|
6112
|
-
if (typeof value !== "string") return false;
|
|
6113
|
-
const normalized = value.toLowerCase();
|
|
6114
|
-
return normalized.includes("shown in the declarations") || normalized.includes("shown in declarations") || normalized.includes("shown in the schedule") || normalized.includes("as stated") || normalized.includes("if applicable");
|
|
6115
|
-
}
|
|
6116
6436
|
function looksTocArtifact(value) {
|
|
6117
6437
|
if (typeof value !== "string") return false;
|
|
6118
6438
|
return /\.{4,}\d{1,3}$/.test(value.trim()) || /^\d+\.\s+[A-Z][\s\S]*\.{3,}\d{1,3}$/.test(value.trim());
|
|
@@ -6139,11 +6459,7 @@ function buildExtractionReviewReport(params) {
|
|
|
6139
6459
|
const definitionsResult = memory.get("definitions");
|
|
6140
6460
|
const coveredReasonsResult = memory.get("covered_reasons");
|
|
6141
6461
|
const definitions = Array.isArray(definitionsResult?.definitions) ? definitionsResult.definitions : sections.filter((section) => section.type === "definition");
|
|
6142
|
-
const coveredReasons = Array.isArray(coveredReasonsResult?.coveredReasons) ? coveredReasonsResult.coveredReasons : Array.isArray(coveredReasonsResult?.covered_reasons) ? coveredReasonsResult.covered_reasons : sections.filter(
|
|
6143
|
-
const title = String(section.title ?? "").toLowerCase();
|
|
6144
|
-
const type = String(section.type ?? "").toLowerCase();
|
|
6145
|
-
return type === "covered_reason" || title.includes("covered cause") || title.includes("covered reason") || title.includes("covered peril");
|
|
6146
|
-
});
|
|
6462
|
+
const coveredReasons = Array.isArray(coveredReasonsResult?.coveredReasons) ? coveredReasonsResult.coveredReasons : Array.isArray(coveredReasonsResult?.covered_reasons) ? coveredReasonsResult.covered_reasons : sections.filter(looksCoveredReasonSection);
|
|
6147
6463
|
const mappedDefinitions = params.pageAssignments.some((assignment) => assignment.extractorNames.includes("definitions"));
|
|
6148
6464
|
const mappedCoveredReasons = params.pageAssignments.some((assignment) => assignment.extractorNames.includes("covered_reasons"));
|
|
6149
6465
|
if (mappedDefinitions && definitions.length === 0) {
|
|
@@ -6258,7 +6574,7 @@ function buildExtractionReviewReport(params) {
|
|
|
6258
6574
|
itemName: typeof coverage.name === "string" ? coverage.name : void 0
|
|
6259
6575
|
});
|
|
6260
6576
|
}
|
|
6261
|
-
if (
|
|
6577
|
+
if (looksReferential(coverage.limit) || looksReferential(coverage.deductible)) {
|
|
6262
6578
|
deterministicIssues.push({
|
|
6263
6579
|
code: "coverage_referential_value",
|
|
6264
6580
|
severity: "warning",
|
|
@@ -6408,7 +6724,7 @@ function buildExtractionReviewReport(params) {
|
|
|
6408
6724
|
itemName
|
|
6409
6725
|
});
|
|
6410
6726
|
}
|
|
6411
|
-
if (
|
|
6727
|
+
if (looksReferential(content) || looksReferential(coveredReason.reason)) {
|
|
6412
6728
|
deterministicIssues.push({
|
|
6413
6729
|
code: "covered_reason_referential_value",
|
|
6414
6730
|
severity: "warning",
|
|
@@ -6469,6 +6785,134 @@ function toReviewRoundRecord(round, review) {
|
|
|
6469
6785
|
};
|
|
6470
6786
|
}
|
|
6471
6787
|
|
|
6788
|
+
// src/extraction/planning.ts
|
|
6789
|
+
function normalizePageAssignments(pageAssignments, formInventory) {
|
|
6790
|
+
const pageFormTypes = /* @__PURE__ */ new Map();
|
|
6791
|
+
if (formInventory) {
|
|
6792
|
+
for (const form of formInventory.forms) {
|
|
6793
|
+
if (form.pageStart != null) {
|
|
6794
|
+
const end = form.pageEnd ?? form.pageStart;
|
|
6795
|
+
for (let p = form.pageStart; p <= end; p += 1) {
|
|
6796
|
+
const types = pageFormTypes.get(p) ?? /* @__PURE__ */ new Set();
|
|
6797
|
+
types.add(form.formType);
|
|
6798
|
+
pageFormTypes.set(p, types);
|
|
6799
|
+
}
|
|
6800
|
+
}
|
|
6801
|
+
}
|
|
6802
|
+
}
|
|
6803
|
+
return pageAssignments.map((assignment) => {
|
|
6804
|
+
let extractorNames = [...new Set(
|
|
6805
|
+
(assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"]).filter(Boolean)
|
|
6806
|
+
)];
|
|
6807
|
+
const hasDeclarations = extractorNames.includes("declarations");
|
|
6808
|
+
const hasConditions = extractorNames.includes("conditions");
|
|
6809
|
+
const hasExclusions = extractorNames.includes("exclusions");
|
|
6810
|
+
const hasEndorsements = extractorNames.includes("endorsements");
|
|
6811
|
+
const looksLikeScheduleValues = assignment.hasScheduleValues === true;
|
|
6812
|
+
const roleBlocksCoverageLimits = assignment.pageRole === "policy_form" || assignment.pageRole === "condition_exclusion_form" || assignment.pageRole === "endorsement_form";
|
|
6813
|
+
const inventoryTypes = pageFormTypes.get(assignment.localPageNumber);
|
|
6814
|
+
const inventoryBlocksCoverageLimits = inventoryTypes != null && !looksLikeScheduleValues && !hasDeclarations && (inventoryTypes.has("endorsement") || inventoryTypes.has("notice") || inventoryTypes.has("application"));
|
|
6815
|
+
if (extractorNames.includes("coverage_limits")) {
|
|
6816
|
+
const shouldDropCoverageLimits = inventoryBlocksCoverageLimits || !looksLikeScheduleValues && roleBlocksCoverageLimits || !hasDeclarations && !looksLikeScheduleValues && (hasConditions || hasExclusions) || !hasDeclarations && !looksLikeScheduleValues && hasEndorsements;
|
|
6817
|
+
if (shouldDropCoverageLimits) {
|
|
6818
|
+
extractorNames = extractorNames.filter((name) => name !== "coverage_limits");
|
|
6819
|
+
}
|
|
6820
|
+
}
|
|
6821
|
+
if (inventoryTypes?.has("endorsement") && !extractorNames.includes("endorsements")) {
|
|
6822
|
+
extractorNames = [...extractorNames, "endorsements"];
|
|
6823
|
+
}
|
|
6824
|
+
if (extractorNames.length === 0) {
|
|
6825
|
+
extractorNames = ["sections"];
|
|
6826
|
+
}
|
|
6827
|
+
return {
|
|
6828
|
+
...assignment,
|
|
6829
|
+
extractorNames
|
|
6830
|
+
};
|
|
6831
|
+
});
|
|
6832
|
+
}
|
|
6833
|
+
function buildTemplateHints(primaryType, documentType, pageCount, template) {
|
|
6834
|
+
return [
|
|
6835
|
+
`Document type: ${primaryType} ${documentType}`,
|
|
6836
|
+
`Expected sections: ${template.expectedSections.join(", ")}`,
|
|
6837
|
+
`Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
|
|
6838
|
+
`Total pages: ${pageCount}`
|
|
6839
|
+
].join("\n");
|
|
6840
|
+
}
|
|
6841
|
+
function groupContiguousPages(pages) {
|
|
6842
|
+
if (pages.length === 0) return [];
|
|
6843
|
+
const sorted = [...new Set(pages)].sort((a, b) => a - b);
|
|
6844
|
+
const ranges = [];
|
|
6845
|
+
let start = sorted[0];
|
|
6846
|
+
let previous = sorted[0];
|
|
6847
|
+
for (let i = 1; i < sorted.length; i += 1) {
|
|
6848
|
+
const current = sorted[i];
|
|
6849
|
+
if (current === previous + 1) {
|
|
6850
|
+
previous = current;
|
|
6851
|
+
continue;
|
|
6852
|
+
}
|
|
6853
|
+
ranges.push({ startPage: start, endPage: previous });
|
|
6854
|
+
start = current;
|
|
6855
|
+
previous = current;
|
|
6856
|
+
}
|
|
6857
|
+
ranges.push({ startPage: start, endPage: previous });
|
|
6858
|
+
return ranges;
|
|
6859
|
+
}
|
|
6860
|
+
function buildPlanFromPageAssignments(pageAssignments, pageCount, formInventory) {
|
|
6861
|
+
const extractorPages = /* @__PURE__ */ new Map();
|
|
6862
|
+
for (const assignment of pageAssignments) {
|
|
6863
|
+
const extractors = assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"];
|
|
6864
|
+
for (const extractorName of extractors) {
|
|
6865
|
+
extractorPages.set(extractorName, [...extractorPages.get(extractorName) ?? [], assignment.localPageNumber]);
|
|
6866
|
+
}
|
|
6867
|
+
}
|
|
6868
|
+
const coveredPages = /* @__PURE__ */ new Set();
|
|
6869
|
+
for (const pages of extractorPages.values()) {
|
|
6870
|
+
for (const page of pages) coveredPages.add(page);
|
|
6871
|
+
}
|
|
6872
|
+
for (let page = 1; page <= pageCount; page += 1) {
|
|
6873
|
+
if (!coveredPages.has(page)) {
|
|
6874
|
+
extractorPages.set("sections", [...extractorPages.get("sections") ?? [], page]);
|
|
6875
|
+
}
|
|
6876
|
+
}
|
|
6877
|
+
const contextualExtractors = /* @__PURE__ */ new Set(["conditions", "covered_reasons", "definitions", "exclusions", "endorsements"]);
|
|
6878
|
+
const contextualForms = (formInventory?.forms ?? []).filter(
|
|
6879
|
+
(form) => form.pageStart != null && (form.pageEnd ?? form.pageStart) != null
|
|
6880
|
+
);
|
|
6881
|
+
const expandPagesToFormRanges = (extractorName, pages) => {
|
|
6882
|
+
if (!contextualExtractors.has(extractorName)) return pages;
|
|
6883
|
+
const expanded = new Set(pages);
|
|
6884
|
+
for (const page of pages) {
|
|
6885
|
+
for (const form of contextualForms) {
|
|
6886
|
+
const pageStart = form.pageStart;
|
|
6887
|
+
const pageEnd = form.pageEnd ?? form.pageStart;
|
|
6888
|
+
const formType = form.formType;
|
|
6889
|
+
const supportsContextualExpansion = extractorName === "endorsements" ? formType === "endorsement" : formType === "coverage" || formType === "endorsement";
|
|
6890
|
+
if (!supportsContextualExpansion) continue;
|
|
6891
|
+
if (page < pageStart || page > pageEnd) continue;
|
|
6892
|
+
for (let current = pageStart; current <= pageEnd; current += 1) {
|
|
6893
|
+
expanded.add(current);
|
|
6894
|
+
}
|
|
6895
|
+
}
|
|
6896
|
+
}
|
|
6897
|
+
return [...expanded].sort((a, b) => a - b);
|
|
6898
|
+
};
|
|
6899
|
+
const tasks = [...extractorPages.entries()].flatMap(
|
|
6900
|
+
([extractorName, pages]) => groupContiguousPages(expandPagesToFormRanges(extractorName, pages)).map(({ startPage, endPage }) => ({
|
|
6901
|
+
extractorName,
|
|
6902
|
+
startPage,
|
|
6903
|
+
endPage,
|
|
6904
|
+
description: `Page-mapped ${extractorName} extraction for pages ${startPage}-${endPage}`
|
|
6905
|
+
}))
|
|
6906
|
+
).sort((a, b) => a.startPage - b.startPage || a.extractorName.localeCompare(b.extractorName));
|
|
6907
|
+
return {
|
|
6908
|
+
tasks,
|
|
6909
|
+
pageMap: [...extractorPages.entries()].map(([section, pages]) => ({
|
|
6910
|
+
section,
|
|
6911
|
+
pages: `pages ${[...new Set(pages)].sort((a, b) => a - b).join(", ")}`
|
|
6912
|
+
}))
|
|
6913
|
+
};
|
|
6914
|
+
}
|
|
6915
|
+
|
|
6472
6916
|
// src/extraction/coordinator.ts
|
|
6473
6917
|
function createExtractor(config) {
|
|
6474
6918
|
const {
|
|
@@ -6485,6 +6929,7 @@ function createExtractor(config) {
|
|
|
6485
6929
|
onCheckpointSave
|
|
6486
6930
|
} = config;
|
|
6487
6931
|
const limit = pLimit(concurrency);
|
|
6932
|
+
const extractorCatalog = formatExtractorCatalogForPrompt();
|
|
6488
6933
|
let totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
6489
6934
|
let modelCalls = 0;
|
|
6490
6935
|
let callsWithUsage = 0;
|
|
@@ -6505,43 +6950,56 @@ function createExtractor(config) {
|
|
|
6505
6950
|
memory.set(name, mergeExtractorResult(name, existing, data));
|
|
6506
6951
|
}
|
|
6507
6952
|
function summarizeExtraction(memory) {
|
|
6508
|
-
const
|
|
6509
|
-
const
|
|
6510
|
-
const
|
|
6511
|
-
const
|
|
6512
|
-
const
|
|
6513
|
-
const
|
|
6514
|
-
const
|
|
6515
|
-
const
|
|
6516
|
-
const
|
|
6517
|
-
const definitionCount = Array.isArray(definitionsResult?.definitions) ? definitionsResult.definitions.length : sections.filter((section) => section.type === "definition").length;
|
|
6518
|
-
const coveredReasonCount = Array.isArray(coveredReasonsResult?.coveredReasons) ? coveredReasonsResult.coveredReasons.length : Array.isArray(coveredReasonsResult?.covered_reasons) ? coveredReasonsResult.covered_reasons.length : sections.filter((section) => {
|
|
6519
|
-
const title = String(section.title ?? "").toLowerCase();
|
|
6520
|
-
const type = String(section.type ?? "").toLowerCase();
|
|
6521
|
-
return type === "covered_reason" || title.includes("covered cause") || title.includes("covered reason") || title.includes("covered peril");
|
|
6522
|
-
}).length;
|
|
6523
|
-
const coverageSummary = Array.isArray(coverageResult?.coverages) ? coverageResult.coverages.slice(0, 12).map((coverage) => ({
|
|
6953
|
+
const declarationResult = readMemoryRecord(memory, "declarations");
|
|
6954
|
+
const endorsements = readRecordArray(readMemoryRecord(memory, "endorsements"), "endorsements") ?? [];
|
|
6955
|
+
const exclusions = readRecordArray(readMemoryRecord(memory, "exclusions"), "exclusions") ?? [];
|
|
6956
|
+
const conditions = readRecordArray(readMemoryRecord(memory, "conditions"), "conditions") ?? [];
|
|
6957
|
+
const sections = getSections(memory) ?? [];
|
|
6958
|
+
const definitions = getDefinitions(memory) ?? sections.filter((section) => section.type === "definition");
|
|
6959
|
+
const coveredReasons = getCoveredReasons(memory) ?? sections.filter(looksCoveredReasonSection);
|
|
6960
|
+
const coverages = getCoverageLimitCoverages(memory);
|
|
6961
|
+
const coverageSummary = coverages.slice(0, 12).map((coverage) => ({
|
|
6524
6962
|
name: coverage.name,
|
|
6525
6963
|
limit: coverage.limit,
|
|
6526
6964
|
deductible: coverage.deductible,
|
|
6527
6965
|
formNumber: coverage.formNumber
|
|
6528
|
-
}))
|
|
6966
|
+
}));
|
|
6529
6967
|
return JSON.stringify({
|
|
6530
6968
|
extractedKeys: [...memory.keys()].filter((key) => key !== "classify"),
|
|
6531
6969
|
declarationFieldCount: Array.isArray(declarationResult?.fields) ? declarationResult.fields.length : 0,
|
|
6532
|
-
coverageCount:
|
|
6970
|
+
coverageCount: coverages.length,
|
|
6533
6971
|
coverageSamples: coverageSummary,
|
|
6534
|
-
endorsementCount:
|
|
6535
|
-
exclusionCount:
|
|
6536
|
-
conditionCount:
|
|
6537
|
-
definitionCount,
|
|
6538
|
-
coveredReasonCount,
|
|
6972
|
+
endorsementCount: endorsements.length,
|
|
6973
|
+
exclusionCount: exclusions.length,
|
|
6974
|
+
conditionCount: conditions.length,
|
|
6975
|
+
definitionCount: definitions.length,
|
|
6976
|
+
coveredReasonCount: coveredReasons.length,
|
|
6539
6977
|
sectionCount: sections.length
|
|
6540
6978
|
}, null, 2);
|
|
6541
6979
|
}
|
|
6980
|
+
function textIncludesSupplementarySignal(value) {
|
|
6981
|
+
if (typeof value !== "string") return false;
|
|
6982
|
+
return /\b(supplementary|regulatory|department of insurance|ombudsman|complaint|claim|claims|contact|phone|email|cancellation|cancelled|nonrenewal|non-renewal|non renew|notice|governing law|jurisdiction|third[- ]party administrator|tpa)\b/i.test(value);
|
|
6983
|
+
}
|
|
6984
|
+
function hasSupplementaryExtractionSignal(pageAssignments, formInventory, memory) {
|
|
6985
|
+
const hasPageSignal = pageAssignments.some(
|
|
6986
|
+
(assignment) => assignment.pageRole === "supplementary" || assignment.extractorNames.includes("supplementary") || textIncludesSupplementarySignal(assignment.notes)
|
|
6987
|
+
);
|
|
6988
|
+
if (hasPageSignal) return true;
|
|
6989
|
+
const hasFormSignal = (formInventory?.forms ?? []).some(
|
|
6990
|
+
(form) => form.formType === "notice" || textIncludesSupplementarySignal(form.title) || textIncludesSupplementarySignal(form.formNumber)
|
|
6991
|
+
);
|
|
6992
|
+
if (hasFormSignal) return true;
|
|
6993
|
+
const likelySupplementaryKeys = ["sections", "conditions", "endorsements", "exclusions"];
|
|
6994
|
+
return likelySupplementaryKeys.some((key) => {
|
|
6995
|
+
const value = memory.get(key);
|
|
6996
|
+
if (!value) return false;
|
|
6997
|
+
return textIncludesSupplementarySignal(JSON.stringify(value));
|
|
6998
|
+
});
|
|
6999
|
+
}
|
|
6542
7000
|
function buildAlreadyExtractedSummary(memory) {
|
|
6543
7001
|
const lines = [];
|
|
6544
|
-
const declarationResult = memory
|
|
7002
|
+
const declarationResult = readMemoryRecord(memory, "declarations");
|
|
6545
7003
|
if (Array.isArray(declarationResult?.fields)) {
|
|
6546
7004
|
for (const field of declarationResult.fields) {
|
|
6547
7005
|
if (field.key && field.value) {
|
|
@@ -6550,20 +7008,17 @@ function createExtractor(config) {
|
|
|
6550
7008
|
}
|
|
6551
7009
|
}
|
|
6552
7010
|
}
|
|
6553
|
-
const
|
|
6554
|
-
|
|
6555
|
-
|
|
6556
|
-
const parts = [cov.name, cov.limit && `limit=${cov.limit}`, cov.deductible && `deductible=${cov.deductible}`].filter(Boolean);
|
|
6557
|
-
if (parts.length > 0) lines.push(`- coverage: ${parts.join(", ")}`);
|
|
6558
|
-
}
|
|
7011
|
+
for (const cov of getCoverageLimitCoverages(memory)) {
|
|
7012
|
+
const parts = [cov.name, cov.limit && `limit=${cov.limit}`, cov.deductible && `deductible=${cov.deductible}`].filter(Boolean);
|
|
7013
|
+
if (parts.length > 0) lines.push(`- coverage: ${parts.join(", ")}`);
|
|
6559
7014
|
}
|
|
6560
|
-
const namedInsured = memory
|
|
7015
|
+
const namedInsured = getNamedInsured(memory);
|
|
6561
7016
|
if (namedInsured) {
|
|
6562
7017
|
for (const [key, value] of Object.entries(namedInsured)) {
|
|
6563
7018
|
if (value && typeof value === "string") lines.push(`- ${key}: ${value}`);
|
|
6564
7019
|
}
|
|
6565
7020
|
}
|
|
6566
|
-
const carrierInfo = memory
|
|
7021
|
+
const carrierInfo = getCarrierInfo(memory);
|
|
6567
7022
|
if (carrierInfo) {
|
|
6568
7023
|
for (const [key, value] of Object.entries(carrierInfo)) {
|
|
6569
7024
|
if (value && typeof value === "string") lines.push(`- ${key}: ${value}`);
|
|
@@ -6571,6 +7026,34 @@ function createExtractor(config) {
|
|
|
6571
7026
|
}
|
|
6572
7027
|
return lines.length > 0 ? lines.join("\n") : "";
|
|
6573
7028
|
}
|
|
7029
|
+
async function runFocusedExtractorTask(task, pdfInput, memory) {
|
|
7030
|
+
if (task.extractorName === "supplementary") {
|
|
7031
|
+
const alreadyExtractedSummary = buildAlreadyExtractedSummary(memory);
|
|
7032
|
+
const result = await runExtractor({
|
|
7033
|
+
name: "supplementary",
|
|
7034
|
+
prompt: buildSupplementaryPrompt(alreadyExtractedSummary),
|
|
7035
|
+
schema: SupplementarySchema,
|
|
7036
|
+
pdfInput,
|
|
7037
|
+
startPage: task.startPage,
|
|
7038
|
+
endPage: task.endPage,
|
|
7039
|
+
generateObject,
|
|
7040
|
+
convertPdfToImages,
|
|
7041
|
+
maxTokens: 4096,
|
|
7042
|
+
providerOptions
|
|
7043
|
+
});
|
|
7044
|
+
trackUsage(result.usage);
|
|
7045
|
+
return result;
|
|
7046
|
+
}
|
|
7047
|
+
return runFocusedExtractorWithFallback({
|
|
7048
|
+
task,
|
|
7049
|
+
pdfInput,
|
|
7050
|
+
generateObject,
|
|
7051
|
+
convertPdfToImages,
|
|
7052
|
+
providerOptions,
|
|
7053
|
+
trackUsage,
|
|
7054
|
+
log
|
|
7055
|
+
});
|
|
7056
|
+
}
|
|
6574
7057
|
function formatPageMapSummary(pageAssignments) {
|
|
6575
7058
|
const extractorPages = /* @__PURE__ */ new Map();
|
|
6576
7059
|
for (const assignment of pageAssignments) {
|
|
@@ -6581,132 +7064,6 @@ function createExtractor(config) {
|
|
|
6581
7064
|
if (extractorPages.size === 0) return "No page assignments available.";
|
|
6582
7065
|
return [...extractorPages.entries()].map(([extractorName, pages]) => `${extractorName}: ${pages.length} page(s), pages ${pages.join(", ")}`).join("\n");
|
|
6583
7066
|
}
|
|
6584
|
-
function normalizePageAssignments(pageAssignments, formInventory) {
|
|
6585
|
-
const pageFormTypes = /* @__PURE__ */ new Map();
|
|
6586
|
-
if (formInventory) {
|
|
6587
|
-
for (const form of formInventory.forms) {
|
|
6588
|
-
if (form.pageStart != null) {
|
|
6589
|
-
const end = form.pageEnd ?? form.pageStart;
|
|
6590
|
-
for (let p = form.pageStart; p <= end; p++) {
|
|
6591
|
-
const types = pageFormTypes.get(p) ?? /* @__PURE__ */ new Set();
|
|
6592
|
-
types.add(form.formType);
|
|
6593
|
-
pageFormTypes.set(p, types);
|
|
6594
|
-
}
|
|
6595
|
-
}
|
|
6596
|
-
}
|
|
6597
|
-
}
|
|
6598
|
-
return pageAssignments.map((assignment) => {
|
|
6599
|
-
let extractorNames = [...new Set(
|
|
6600
|
-
(assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"]).filter(Boolean)
|
|
6601
|
-
)];
|
|
6602
|
-
const hasDeclarations = extractorNames.includes("declarations");
|
|
6603
|
-
const hasConditions = extractorNames.includes("conditions");
|
|
6604
|
-
const hasExclusions = extractorNames.includes("exclusions");
|
|
6605
|
-
const hasEndorsements = extractorNames.includes("endorsements");
|
|
6606
|
-
const looksLikeScheduleValues = assignment.hasScheduleValues === true;
|
|
6607
|
-
const roleBlocksCoverageLimits = assignment.pageRole === "policy_form" || assignment.pageRole === "condition_exclusion_form" || assignment.pageRole === "endorsement_form";
|
|
6608
|
-
const inventoryTypes = pageFormTypes.get(assignment.localPageNumber);
|
|
6609
|
-
const inventoryBlocksCoverageLimits = inventoryTypes != null && !looksLikeScheduleValues && !hasDeclarations && (inventoryTypes.has("endorsement") || inventoryTypes.has("notice") || inventoryTypes.has("application"));
|
|
6610
|
-
if (extractorNames.includes("coverage_limits")) {
|
|
6611
|
-
const shouldDropCoverageLimits = inventoryBlocksCoverageLimits || !looksLikeScheduleValues && roleBlocksCoverageLimits || !hasDeclarations && !looksLikeScheduleValues && (hasConditions || hasExclusions) || !hasDeclarations && !looksLikeScheduleValues && hasEndorsements;
|
|
6612
|
-
if (shouldDropCoverageLimits) {
|
|
6613
|
-
extractorNames = extractorNames.filter((name) => name !== "coverage_limits");
|
|
6614
|
-
}
|
|
6615
|
-
}
|
|
6616
|
-
if (inventoryTypes?.has("endorsement") && !extractorNames.includes("endorsements")) {
|
|
6617
|
-
extractorNames = [...extractorNames, "endorsements"];
|
|
6618
|
-
}
|
|
6619
|
-
if (extractorNames.length === 0) {
|
|
6620
|
-
extractorNames = ["sections"];
|
|
6621
|
-
}
|
|
6622
|
-
return {
|
|
6623
|
-
...assignment,
|
|
6624
|
-
extractorNames
|
|
6625
|
-
};
|
|
6626
|
-
});
|
|
6627
|
-
}
|
|
6628
|
-
function buildTemplateHints(primaryType, documentType, pageCount, template) {
|
|
6629
|
-
return [
|
|
6630
|
-
`Document type: ${primaryType} ${documentType}`,
|
|
6631
|
-
`Expected sections: ${template.expectedSections.join(", ")}`,
|
|
6632
|
-
`Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
|
|
6633
|
-
`Total pages: ${pageCount}`
|
|
6634
|
-
].join("\n");
|
|
6635
|
-
}
|
|
6636
|
-
function groupContiguousPages(pages) {
|
|
6637
|
-
if (pages.length === 0) return [];
|
|
6638
|
-
const sorted = [...new Set(pages)].sort((a, b) => a - b);
|
|
6639
|
-
const ranges = [];
|
|
6640
|
-
let start = sorted[0];
|
|
6641
|
-
let previous = sorted[0];
|
|
6642
|
-
for (let i = 1; i < sorted.length; i += 1) {
|
|
6643
|
-
const current = sorted[i];
|
|
6644
|
-
if (current === previous + 1) {
|
|
6645
|
-
previous = current;
|
|
6646
|
-
continue;
|
|
6647
|
-
}
|
|
6648
|
-
ranges.push({ startPage: start, endPage: previous });
|
|
6649
|
-
start = current;
|
|
6650
|
-
previous = current;
|
|
6651
|
-
}
|
|
6652
|
-
ranges.push({ startPage: start, endPage: previous });
|
|
6653
|
-
return ranges;
|
|
6654
|
-
}
|
|
6655
|
-
function buildPlanFromPageAssignments(pageAssignments, pageCount, formInventory) {
|
|
6656
|
-
const extractorPages = /* @__PURE__ */ new Map();
|
|
6657
|
-
for (const assignment of pageAssignments) {
|
|
6658
|
-
const extractors = assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"];
|
|
6659
|
-
for (const extractorName of extractors) {
|
|
6660
|
-
extractorPages.set(extractorName, [...extractorPages.get(extractorName) ?? [], assignment.localPageNumber]);
|
|
6661
|
-
}
|
|
6662
|
-
}
|
|
6663
|
-
const coveredPages = /* @__PURE__ */ new Set();
|
|
6664
|
-
for (const pages of extractorPages.values()) {
|
|
6665
|
-
for (const page of pages) coveredPages.add(page);
|
|
6666
|
-
}
|
|
6667
|
-
for (let page = 1; page <= pageCount; page += 1) {
|
|
6668
|
-
if (!coveredPages.has(page)) {
|
|
6669
|
-
extractorPages.set("sections", [...extractorPages.get("sections") ?? [], page]);
|
|
6670
|
-
}
|
|
6671
|
-
}
|
|
6672
|
-
const contextualExtractors = /* @__PURE__ */ new Set(["conditions", "covered_reasons", "definitions", "exclusions", "endorsements"]);
|
|
6673
|
-
const contextualForms = (formInventory?.forms ?? []).filter(
|
|
6674
|
-
(form) => form.pageStart != null && (form.pageEnd ?? form.pageStart) != null
|
|
6675
|
-
);
|
|
6676
|
-
const expandPagesToFormRanges = (extractorName, pages) => {
|
|
6677
|
-
if (!contextualExtractors.has(extractorName)) return pages;
|
|
6678
|
-
const expanded = new Set(pages);
|
|
6679
|
-
for (const page of pages) {
|
|
6680
|
-
for (const form of contextualForms) {
|
|
6681
|
-
const pageStart = form.pageStart;
|
|
6682
|
-
const pageEnd = form.pageEnd ?? form.pageStart;
|
|
6683
|
-
const formType = form.formType;
|
|
6684
|
-
const supportsContextualExpansion = extractorName === "endorsements" ? formType === "endorsement" : formType === "coverage" || formType === "endorsement";
|
|
6685
|
-
if (!supportsContextualExpansion) continue;
|
|
6686
|
-
if (page < pageStart || page > pageEnd) continue;
|
|
6687
|
-
for (let current = pageStart; current <= pageEnd; current += 1) {
|
|
6688
|
-
expanded.add(current);
|
|
6689
|
-
}
|
|
6690
|
-
}
|
|
6691
|
-
}
|
|
6692
|
-
return [...expanded].sort((a, b) => a - b);
|
|
6693
|
-
};
|
|
6694
|
-
const tasks = [...extractorPages.entries()].flatMap(
|
|
6695
|
-
([extractorName, pages]) => groupContiguousPages(expandPagesToFormRanges(extractorName, pages)).map(({ startPage, endPage }) => ({
|
|
6696
|
-
extractorName,
|
|
6697
|
-
startPage,
|
|
6698
|
-
endPage,
|
|
6699
|
-
description: `Page-mapped ${extractorName} extraction for pages ${startPage}-${endPage}`
|
|
6700
|
-
}))
|
|
6701
|
-
).sort((a, b) => a.startPage - b.startPage || a.extractorName.localeCompare(b.extractorName));
|
|
6702
|
-
return {
|
|
6703
|
-
tasks,
|
|
6704
|
-
pageMap: [...extractorPages.entries()].map(([section, pages]) => ({
|
|
6705
|
-
section,
|
|
6706
|
-
pages: `pages ${[...new Set(pages)].sort((a, b) => a - b).join(", ")}`
|
|
6707
|
-
}))
|
|
6708
|
-
};
|
|
6709
|
-
}
|
|
6710
7067
|
async function extract(pdfInput, documentId, options) {
|
|
6711
7068
|
const id = documentId ?? `doc-${Date.now()}`;
|
|
6712
7069
|
const memory = /* @__PURE__ */ new Map();
|
|
@@ -6717,7 +7074,8 @@ function createExtractor(config) {
|
|
|
6717
7074
|
const pipelineCtx = createPipelineContext({
|
|
6718
7075
|
id,
|
|
6719
7076
|
onSave: onCheckpointSave,
|
|
6720
|
-
resumeFrom: options?.resumeFrom
|
|
7077
|
+
resumeFrom: options?.resumeFrom,
|
|
7078
|
+
phaseOrder: ["classify", "form_inventory", "page_map", "plan", "extract", "resolve_referential", "review", "assemble"]
|
|
6721
7079
|
});
|
|
6722
7080
|
const resumed = pipelineCtx.getCheckpoint()?.state;
|
|
6723
7081
|
if (resumed?.memory) {
|
|
@@ -6885,40 +7243,18 @@ function createExtractor(config) {
|
|
|
6885
7243
|
const extractorResults = await Promise.all(
|
|
6886
7244
|
tasks.map(
|
|
6887
7245
|
(task) => limit(async () => {
|
|
6888
|
-
const ext = getExtractor(task.extractorName) ?? (task.extractorName === "definitions" || task.extractorName === "covered_reasons" ? getExtractor("sections") : void 0);
|
|
6889
|
-
if (!ext) {
|
|
6890
|
-
await log?.(`Unknown extractor: ${task.extractorName}, skipping`);
|
|
6891
|
-
return null;
|
|
6892
|
-
}
|
|
6893
7246
|
onProgress?.(`Extracting ${task.extractorName} (pages ${task.startPage}-${task.endPage})...`);
|
|
6894
|
-
|
|
6895
|
-
const result = await runExtractor({
|
|
6896
|
-
name: task.extractorName,
|
|
6897
|
-
prompt: ext.buildPrompt(),
|
|
6898
|
-
schema: ext.schema,
|
|
6899
|
-
pdfInput,
|
|
6900
|
-
startPage: task.startPage,
|
|
6901
|
-
endPage: task.endPage,
|
|
6902
|
-
generateObject,
|
|
6903
|
-
convertPdfToImages,
|
|
6904
|
-
maxTokens: ext.maxTokens ?? 4096,
|
|
6905
|
-
providerOptions
|
|
6906
|
-
});
|
|
6907
|
-
trackUsage(result.usage);
|
|
6908
|
-
return result;
|
|
6909
|
-
} catch (error) {
|
|
6910
|
-
await log?.(`Extractor ${task.extractorName} failed: ${error}`);
|
|
6911
|
-
return null;
|
|
6912
|
-
}
|
|
7247
|
+
return runFocusedExtractorTask(task, pdfInput, memory);
|
|
6913
7248
|
})
|
|
6914
7249
|
)
|
|
6915
7250
|
);
|
|
6916
|
-
for (const result of extractorResults) {
|
|
7251
|
+
for (const result of extractorResults.flatMap((item) => Array.isArray(item) ? item : item ? [item] : [])) {
|
|
6917
7252
|
if (result) {
|
|
6918
7253
|
mergeMemoryResult(result.name, result.data, memory);
|
|
6919
7254
|
}
|
|
6920
7255
|
}
|
|
6921
|
-
|
|
7256
|
+
const planIncludesSupplementary = tasks.some((task) => task.extractorName === "supplementary");
|
|
7257
|
+
if (!planIncludesSupplementary && hasSupplementaryExtractionSignal(pageAssignments, formInventory, memory)) {
|
|
6922
7258
|
onProgress?.("Extracting supplementary retrieval facts...");
|
|
6923
7259
|
try {
|
|
6924
7260
|
const alreadyExtractedSummary = buildAlreadyExtractedSummary(memory);
|
|
@@ -6992,7 +7328,7 @@ function createExtractor(config) {
|
|
|
6992
7328
|
const reviewResponse = await safeGenerateObject(
|
|
6993
7329
|
generateObject,
|
|
6994
7330
|
{
|
|
6995
|
-
prompt: buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary),
|
|
7331
|
+
prompt: buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary, extractorCatalog),
|
|
6996
7332
|
schema: ReviewResultSchema,
|
|
6997
7333
|
maxTokens: 1536,
|
|
6998
7334
|
providerOptions: await buildPdfProviderOptions(pdfInput, providerOptions)
|
|
@@ -7016,31 +7352,11 @@ function createExtractor(config) {
|
|
|
7016
7352
|
const followUpResults = await Promise.all(
|
|
7017
7353
|
reviewResponse.object.additionalTasks.map(
|
|
7018
7354
|
(task) => limit(async () => {
|
|
7019
|
-
|
|
7020
|
-
if (!ext) return null;
|
|
7021
|
-
try {
|
|
7022
|
-
const result = await runExtractor({
|
|
7023
|
-
name: task.extractorName,
|
|
7024
|
-
prompt: ext.buildPrompt(),
|
|
7025
|
-
schema: ext.schema,
|
|
7026
|
-
pdfInput,
|
|
7027
|
-
startPage: task.startPage,
|
|
7028
|
-
endPage: task.endPage,
|
|
7029
|
-
generateObject,
|
|
7030
|
-
convertPdfToImages,
|
|
7031
|
-
maxTokens: ext.maxTokens ?? 4096,
|
|
7032
|
-
providerOptions
|
|
7033
|
-
});
|
|
7034
|
-
trackUsage(result.usage);
|
|
7035
|
-
return result;
|
|
7036
|
-
} catch (error) {
|
|
7037
|
-
await log?.(`Follow-up extractor ${task.extractorName} failed: ${error}`);
|
|
7038
|
-
return null;
|
|
7039
|
-
}
|
|
7355
|
+
return runFocusedExtractorTask(task, pdfInput, memory);
|
|
7040
7356
|
})
|
|
7041
7357
|
)
|
|
7042
7358
|
);
|
|
7043
|
-
for (const result of followUpResults) {
|
|
7359
|
+
for (const result of followUpResults.flatMap((item) => Array.isArray(item) ? item : item ? [item] : [])) {
|
|
7044
7360
|
if (result) {
|
|
7045
7361
|
mergeMemoryResult(result.name, result.data, memory);
|
|
7046
7362
|
}
|
|
@@ -8079,6 +8395,70 @@ function reviewBatchEmail(text, batchFields) {
|
|
|
8079
8395
|
};
|
|
8080
8396
|
}
|
|
8081
8397
|
|
|
8398
|
+
// src/application/workflow.ts
|
|
8399
|
+
var MAX_DOCUMENT_SEARCH_FIELDS = 5;
|
|
8400
|
+
var LOW_VALUE_FIELD_RATIO_LIMIT = 0.6;
|
|
8401
|
+
function planApplicationWorkflow(input) {
|
|
8402
|
+
const unfilledFields = input.fields.filter(isUnfilled);
|
|
8403
|
+
const documentSearchFields = planDocumentSearchFields(
|
|
8404
|
+
unfilledFields,
|
|
8405
|
+
input.hasDocumentStore && input.hasMemoryStore
|
|
8406
|
+
);
|
|
8407
|
+
return {
|
|
8408
|
+
runBackfill: input.hasBackfillProvider && unfilledFields.length > 0,
|
|
8409
|
+
runContextAutoFill: input.orgContextCount > 0 && unfilledFields.length > 0,
|
|
8410
|
+
documentSearchFields,
|
|
8411
|
+
runBatching: unfilledFields.length > 0,
|
|
8412
|
+
unfilledFields
|
|
8413
|
+
};
|
|
8414
|
+
}
|
|
8415
|
+
function planReplyActions(input) {
|
|
8416
|
+
const hasCurrentFields = input.currentBatchFields.length > 0;
|
|
8417
|
+
const nextBatchNeedsAnswers = (input.nextBatchFields ?? []).some(isUnfilled);
|
|
8418
|
+
const hasLookupRequests = (input.intent.lookupRequests?.length ?? 0) > 0;
|
|
8419
|
+
return {
|
|
8420
|
+
parseAnswers: input.intent.hasAnswers && hasCurrentFields,
|
|
8421
|
+
runLookup: hasLookupRequests && input.hasDocumentStore,
|
|
8422
|
+
answerQuestion: Boolean(input.intent.questionText) && (input.intent.primaryIntent === "question" || input.intent.primaryIntent === "mixed"),
|
|
8423
|
+
advanceBatch: hasCurrentFields && input.currentBatchFields.every((field) => !isUnfilled(field)),
|
|
8424
|
+
generateNextEmail: nextBatchNeedsAnswers
|
|
8425
|
+
};
|
|
8426
|
+
}
|
|
8427
|
+
function planDocumentSearchFields(unfilledFields, hasStores) {
|
|
8428
|
+
if (!hasStores || unfilledFields.length === 0) return [];
|
|
8429
|
+
const searchableFields = unfilledFields.filter(isHighValueLookupField);
|
|
8430
|
+
if (searchableFields.length === 0) return [];
|
|
8431
|
+
const lowValueRatio = 1 - searchableFields.length / unfilledFields.length;
|
|
8432
|
+
if (unfilledFields.length > MAX_DOCUMENT_SEARCH_FIELDS && lowValueRatio > LOW_VALUE_FIELD_RATIO_LIMIT) {
|
|
8433
|
+
return [];
|
|
8434
|
+
}
|
|
8435
|
+
return searchableFields.slice(0, MAX_DOCUMENT_SEARCH_FIELDS);
|
|
8436
|
+
}
|
|
8437
|
+
function isUnfilled(field) {
|
|
8438
|
+
return field.value === void 0 || field.value.trim() === "";
|
|
8439
|
+
}
|
|
8440
|
+
function isHighValueLookupField(field) {
|
|
8441
|
+
const text = `${field.section} ${field.label}`.toLowerCase();
|
|
8442
|
+
if (field.required) return true;
|
|
8443
|
+
return [
|
|
8444
|
+
"carrier",
|
|
8445
|
+
"policy",
|
|
8446
|
+
"premium",
|
|
8447
|
+
"limit",
|
|
8448
|
+
"deductible",
|
|
8449
|
+
"insured",
|
|
8450
|
+
"address",
|
|
8451
|
+
"revenue",
|
|
8452
|
+
"payroll",
|
|
8453
|
+
"effective",
|
|
8454
|
+
"expiration",
|
|
8455
|
+
"coverage",
|
|
8456
|
+
"class code",
|
|
8457
|
+
"fein",
|
|
8458
|
+
"entity"
|
|
8459
|
+
].some((term) => text.includes(term));
|
|
8460
|
+
}
|
|
8461
|
+
|
|
8082
8462
|
// src/application/coordinator.ts
|
|
8083
8463
|
function createApplicationPipeline(config) {
|
|
8084
8464
|
const {
|
|
@@ -8177,27 +8557,37 @@ function createApplicationPipeline(config) {
|
|
|
8177
8557
|
state.updatedAt = Date.now();
|
|
8178
8558
|
await applicationStore?.save(state);
|
|
8179
8559
|
onProgress?.(`Auto-filling ${fields.length} fields...`);
|
|
8180
|
-
|
|
8181
|
-
|
|
8182
|
-
|
|
8183
|
-
|
|
8184
|
-
|
|
8185
|
-
|
|
8186
|
-
|
|
8187
|
-
|
|
8188
|
-
|
|
8189
|
-
|
|
8190
|
-
|
|
8191
|
-
|
|
8192
|
-
|
|
8193
|
-
|
|
8194
|
-
|
|
8195
|
-
|
|
8560
|
+
let workflowPlan = planApplicationWorkflow({
|
|
8561
|
+
fields: state.fields,
|
|
8562
|
+
hasBackfillProvider: Boolean(backfillProvider),
|
|
8563
|
+
orgContextCount: orgContext.length,
|
|
8564
|
+
hasDocumentStore: Boolean(documentStore),
|
|
8565
|
+
hasMemoryStore: Boolean(memoryStore)
|
|
8566
|
+
});
|
|
8567
|
+
if (workflowPlan.runBackfill && backfillProvider) {
|
|
8568
|
+
try {
|
|
8569
|
+
const priorAnswers = await backfillFromPriorAnswers(state.fields, backfillProvider);
|
|
8570
|
+
for (const pa of priorAnswers) {
|
|
8571
|
+
const field = state.fields.find((f) => f.id === pa.fieldId);
|
|
8572
|
+
if (field && !field.value && pa.relevance > 0.8) {
|
|
8573
|
+
field.value = pa.value;
|
|
8574
|
+
field.source = `backfill: ${pa.source}`;
|
|
8575
|
+
field.confidence = "high";
|
|
8196
8576
|
}
|
|
8197
|
-
}
|
|
8198
|
-
)
|
|
8577
|
+
}
|
|
8578
|
+
} catch (e) {
|
|
8579
|
+
await log?.(`Backfill failed: ${e}`);
|
|
8580
|
+
}
|
|
8199
8581
|
}
|
|
8200
|
-
|
|
8582
|
+
workflowPlan = planApplicationWorkflow({
|
|
8583
|
+
fields: state.fields,
|
|
8584
|
+
hasBackfillProvider: false,
|
|
8585
|
+
orgContextCount: orgContext.length,
|
|
8586
|
+
hasDocumentStore: Boolean(documentStore),
|
|
8587
|
+
hasMemoryStore: Boolean(memoryStore)
|
|
8588
|
+
});
|
|
8589
|
+
const fillTasks = [];
|
|
8590
|
+
if (workflowPlan.runContextAutoFill) {
|
|
8201
8591
|
fillTasks.push(
|
|
8202
8592
|
limit(async () => {
|
|
8203
8593
|
const unfilledFields2 = state.fields.filter((f) => !f.value);
|
|
@@ -8224,18 +8614,13 @@ function createApplicationPipeline(config) {
|
|
|
8224
8614
|
})
|
|
8225
8615
|
);
|
|
8226
8616
|
}
|
|
8227
|
-
if (
|
|
8617
|
+
if (workflowPlan.documentSearchFields.length > 0 && memoryStore) {
|
|
8228
8618
|
fillTasks.push(
|
|
8229
8619
|
(async () => {
|
|
8230
8620
|
try {
|
|
8231
|
-
const
|
|
8232
|
-
const searchPromises = unfilledFields2.slice(0, 10).map(
|
|
8621
|
+
const searchPromises = workflowPlan.documentSearchFields.map(
|
|
8233
8622
|
(f) => limit(async () => {
|
|
8234
|
-
|
|
8235
|
-
for (const chunk of chunks) {
|
|
8236
|
-
if (!state.fields.find((sf) => sf.id === f.id)?.value) {
|
|
8237
|
-
}
|
|
8238
|
-
}
|
|
8623
|
+
await memoryStore.search(f.label, { limit: 3 });
|
|
8239
8624
|
})
|
|
8240
8625
|
);
|
|
8241
8626
|
await Promise.all(searchPromises);
|
|
@@ -8248,8 +8633,15 @@ function createApplicationPipeline(config) {
|
|
|
8248
8633
|
await Promise.all(fillTasks);
|
|
8249
8634
|
state.updatedAt = Date.now();
|
|
8250
8635
|
await applicationStore?.save(state);
|
|
8251
|
-
|
|
8252
|
-
|
|
8636
|
+
workflowPlan = planApplicationWorkflow({
|
|
8637
|
+
fields: state.fields,
|
|
8638
|
+
hasBackfillProvider: false,
|
|
8639
|
+
orgContextCount: 0,
|
|
8640
|
+
hasDocumentStore: false,
|
|
8641
|
+
hasMemoryStore: false
|
|
8642
|
+
});
|
|
8643
|
+
const unfilledFields = workflowPlan.unfilledFields;
|
|
8644
|
+
if (workflowPlan.runBatching) {
|
|
8253
8645
|
onProgress?.(`Batching ${unfilledFields.length} remaining questions...`);
|
|
8254
8646
|
state.status = "batching";
|
|
8255
8647
|
try {
|
|
@@ -8316,7 +8708,12 @@ function createApplicationPipeline(config) {
|
|
|
8316
8708
|
}
|
|
8317
8709
|
let fieldsFilled = 0;
|
|
8318
8710
|
let responseText;
|
|
8319
|
-
|
|
8711
|
+
let replyPlan = planReplyActions({
|
|
8712
|
+
intent,
|
|
8713
|
+
currentBatchFields,
|
|
8714
|
+
hasDocumentStore: Boolean(documentStore)
|
|
8715
|
+
});
|
|
8716
|
+
if (replyPlan.parseAnswers) {
|
|
8320
8717
|
onProgress?.("Parsing answers...");
|
|
8321
8718
|
try {
|
|
8322
8719
|
const { result: parseResult, usage: parseUsage } = await parseAnswers(
|
|
@@ -8339,7 +8736,7 @@ function createApplicationPipeline(config) {
|
|
|
8339
8736
|
await log?.(`Answer parsing failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
8340
8737
|
}
|
|
8341
8738
|
}
|
|
8342
|
-
if (intent.lookupRequests?.length) {
|
|
8739
|
+
if (replyPlan.runLookup && intent.lookupRequests?.length) {
|
|
8343
8740
|
onProgress?.("Processing lookup requests...");
|
|
8344
8741
|
let availableData = "";
|
|
8345
8742
|
if (documentStore) {
|
|
@@ -8380,64 +8777,78 @@ function createApplicationPipeline(config) {
|
|
|
8380
8777
|
}
|
|
8381
8778
|
}
|
|
8382
8779
|
}
|
|
8383
|
-
if (
|
|
8384
|
-
|
|
8385
|
-
|
|
8386
|
-
|
|
8387
|
-
prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
|
|
8780
|
+
if (replyPlan.answerQuestion && intent.questionText) {
|
|
8781
|
+
try {
|
|
8782
|
+
const { text, usage } = await generateText({
|
|
8783
|
+
prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
|
|
8388
8784
|
|
|
8389
8785
|
Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with the answer when you're ready and I'll fill it in."`,
|
|
8390
|
-
|
|
8391
|
-
|
|
8392
|
-
|
|
8393
|
-
|
|
8394
|
-
|
|
8395
|
-
|
|
8396
|
-
|
|
8397
|
-
|
|
8398
|
-
}
|
|
8786
|
+
maxTokens: 512,
|
|
8787
|
+
providerOptions
|
|
8788
|
+
});
|
|
8789
|
+
trackUsage(usage);
|
|
8790
|
+
responseText = text;
|
|
8791
|
+
} catch (error) {
|
|
8792
|
+
await log?.(`Question response generation failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
8793
|
+
responseText = `I wasn't able to generate an explanation for your question. Could you rephrase it, or just provide the answer directly?`;
|
|
8399
8794
|
}
|
|
8400
8795
|
}
|
|
8401
8796
|
const currentBatchComplete = currentBatchFieldIds.every(
|
|
8402
8797
|
(fid) => state.fields.find((f) => f.id === fid)?.value
|
|
8403
8798
|
);
|
|
8404
|
-
|
|
8405
|
-
|
|
8406
|
-
|
|
8407
|
-
|
|
8408
|
-
const
|
|
8409
|
-
|
|
8410
|
-
|
|
8799
|
+
let nextBatchIndex;
|
|
8800
|
+
let nextBatchFields;
|
|
8801
|
+
if (state.batches) {
|
|
8802
|
+
for (let index = state.currentBatchIndex + 1; index < state.batches.length; index++) {
|
|
8803
|
+
const candidateFields = state.fields.filter((f) => state.batches[index].includes(f.id));
|
|
8804
|
+
if (candidateFields.some((f) => !f.value)) {
|
|
8805
|
+
nextBatchIndex = index;
|
|
8806
|
+
nextBatchFields = candidateFields;
|
|
8807
|
+
break;
|
|
8808
|
+
}
|
|
8809
|
+
}
|
|
8810
|
+
}
|
|
8811
|
+
replyPlan = planReplyActions({
|
|
8812
|
+
intent,
|
|
8813
|
+
currentBatchFields,
|
|
8814
|
+
nextBatchFields,
|
|
8815
|
+
hasDocumentStore: Boolean(documentStore)
|
|
8816
|
+
});
|
|
8817
|
+
if (currentBatchComplete && replyPlan.advanceBatch && state.batches) {
|
|
8818
|
+
if (nextBatchIndex !== void 0 && nextBatchFields) {
|
|
8819
|
+
state.currentBatchIndex = nextBatchIndex;
|
|
8411
8820
|
const filledCount = state.fields.filter((f) => f.value).length;
|
|
8412
|
-
|
|
8413
|
-
|
|
8414
|
-
|
|
8415
|
-
|
|
8416
|
-
|
|
8417
|
-
|
|
8418
|
-
|
|
8419
|
-
|
|
8420
|
-
|
|
8421
|
-
|
|
8422
|
-
|
|
8423
|
-
|
|
8424
|
-
|
|
8425
|
-
|
|
8426
|
-
|
|
8427
|
-
|
|
8428
|
-
|
|
8429
|
-
|
|
8430
|
-
|
|
8431
|
-
|
|
8432
|
-
|
|
8433
|
-
responseText
|
|
8434
|
-
|
|
8435
|
-
|
|
8821
|
+
if (replyPlan.generateNextEmail) {
|
|
8822
|
+
try {
|
|
8823
|
+
const { text: emailText, usage: emailUsage } = await generateBatchEmail(
|
|
8824
|
+
nextBatchFields,
|
|
8825
|
+
state.currentBatchIndex,
|
|
8826
|
+
state.batches.length,
|
|
8827
|
+
{
|
|
8828
|
+
appTitle: state.title,
|
|
8829
|
+
totalFieldCount: state.fields.length,
|
|
8830
|
+
filledFieldCount: filledCount,
|
|
8831
|
+
companyName: context?.companyName
|
|
8832
|
+
},
|
|
8833
|
+
generateText,
|
|
8834
|
+
providerOptions
|
|
8835
|
+
);
|
|
8836
|
+
trackUsage(emailUsage);
|
|
8837
|
+
const emailReview = reviewBatchEmail(emailText, nextBatchFields);
|
|
8838
|
+
state.qualityReport = {
|
|
8839
|
+
...buildApplicationQualityReport(state),
|
|
8840
|
+
emailReview
|
|
8841
|
+
};
|
|
8842
|
+
if (!responseText) {
|
|
8843
|
+
responseText = emailText;
|
|
8844
|
+
} else {
|
|
8845
|
+
responseText += `
|
|
8436
8846
|
|
|
8437
8847
|
${emailText}`;
|
|
8848
|
+
}
|
|
8849
|
+
} catch (error) {
|
|
8850
|
+
await log?.(`Batch email generation failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
8438
8851
|
}
|
|
8439
|
-
} catch (error) {
|
|
8440
|
-
await log?.(`Batch email generation failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
8441
8852
|
}
|
|
8442
8853
|
} else {
|
|
8443
8854
|
state.status = "confirming";
|
|
@@ -9251,6 +9662,42 @@ ${item.text}`).join("\n\n");
|
|
|
9251
9662
|
return { evidence, contextSummary };
|
|
9252
9663
|
}
|
|
9253
9664
|
|
|
9665
|
+
// src/query/workflow.ts
|
|
9666
|
+
function shouldRetrieveForClassification(classification) {
|
|
9667
|
+
return classification.requiresDocumentLookup || classification.requiresChunkSearch;
|
|
9668
|
+
}
|
|
9669
|
+
function buildInitialQueryWorkflowPlan(params) {
|
|
9670
|
+
const { classification, attachmentEvidence } = params;
|
|
9671
|
+
const actions = [];
|
|
9672
|
+
const shouldRetrieve = shouldRetrieveForClassification(classification);
|
|
9673
|
+
if (shouldRetrieve) {
|
|
9674
|
+
actions.push({
|
|
9675
|
+
type: "retrieve",
|
|
9676
|
+
subQuestions: classification.subQuestions,
|
|
9677
|
+
reason: "classification requested document or chunk lookup"
|
|
9678
|
+
});
|
|
9679
|
+
}
|
|
9680
|
+
actions.push({
|
|
9681
|
+
type: "reason",
|
|
9682
|
+
subQuestions: classification.subQuestions,
|
|
9683
|
+
reason: shouldRetrieve ? "answer with retrieved evidence and any attachment evidence" : attachmentEvidence.length > 0 ? "answer with attachment evidence only" : "answer without document retrieval"
|
|
9684
|
+
});
|
|
9685
|
+
actions.push(
|
|
9686
|
+
{
|
|
9687
|
+
type: "verify",
|
|
9688
|
+
reason: "check grounding and request targeted retries when needed"
|
|
9689
|
+
},
|
|
9690
|
+
{
|
|
9691
|
+
type: "respond",
|
|
9692
|
+
reason: "compose final response"
|
|
9693
|
+
}
|
|
9694
|
+
);
|
|
9695
|
+
return { actions, shouldRetrieve };
|
|
9696
|
+
}
|
|
9697
|
+
function getWorkflowAction(plan, type) {
|
|
9698
|
+
return plan.actions.find((action) => action.type === type);
|
|
9699
|
+
}
|
|
9700
|
+
|
|
9254
9701
|
// src/query/coordinator.ts
|
|
9255
9702
|
function createQueryAgent(config) {
|
|
9256
9703
|
const {
|
|
@@ -9295,29 +9742,37 @@ function createQueryAgent(config) {
|
|
|
9295
9742
|
onProgress?.("Classifying query...");
|
|
9296
9743
|
const classification = await classify(question, conversationId, attachmentContext);
|
|
9297
9744
|
await pipelineCtx.save("classify", { classification, attachmentEvidence });
|
|
9298
|
-
onProgress?.(`Retrieving evidence for ${classification.subQuestions.length} sub-question(s)...`);
|
|
9299
9745
|
const retrieverConfig = {
|
|
9300
9746
|
documentStore,
|
|
9301
9747
|
memoryStore,
|
|
9302
9748
|
retrievalLimit,
|
|
9303
9749
|
log
|
|
9304
9750
|
};
|
|
9305
|
-
const
|
|
9306
|
-
|
|
9307
|
-
|
|
9308
|
-
|
|
9309
|
-
)
|
|
9751
|
+
const workflowPlan = buildInitialQueryWorkflowPlan({ classification, attachmentEvidence });
|
|
9752
|
+
const retrieveAction = getWorkflowAction(workflowPlan, "retrieve");
|
|
9753
|
+
const reasonAction = getWorkflowAction(workflowPlan, "reason");
|
|
9754
|
+
await pipelineCtx.save("workflow", { classification, attachmentEvidence, workflowPlan });
|
|
9755
|
+
const retrievalResults = retrieveAction ? await (async () => {
|
|
9756
|
+
onProgress?.(`Retrieving evidence for ${retrieveAction.subQuestions.length} sub-question(s)...`);
|
|
9757
|
+
return Promise.all(
|
|
9758
|
+
retrieveAction.subQuestions.map(
|
|
9759
|
+
(sq) => limit(() => retrieve(sq, conversationId, retrieverConfig))
|
|
9760
|
+
)
|
|
9761
|
+
);
|
|
9762
|
+
})() : [];
|
|
9310
9763
|
const allEvidence = [...attachmentEvidence, ...retrievalResults.flatMap((r) => r.evidence)];
|
|
9311
9764
|
await pipelineCtx.save("retrieve", { classification, attachmentEvidence, evidence: allEvidence });
|
|
9312
9765
|
onProgress?.("Reasoning over evidence...");
|
|
9313
9766
|
const reasonerConfig = { generateObject, providerOptions };
|
|
9767
|
+
const subQuestionsToReason = reasonAction?.subQuestions ?? classification.subQuestions;
|
|
9314
9768
|
const reasonResults = await Promise.allSettled(
|
|
9315
|
-
|
|
9316
|
-
(sq
|
|
9769
|
+
subQuestionsToReason.map(
|
|
9770
|
+
(sq) => limit(async () => {
|
|
9771
|
+
const retrievedEvidence = retrievalResults.find((r) => r.subQuestion === sq.question)?.evidence ?? [];
|
|
9317
9772
|
const { subAnswer, usage } = await reason(
|
|
9318
9773
|
sq.question,
|
|
9319
9774
|
sq.intent,
|
|
9320
|
-
[...attachmentEvidence, ...
|
|
9775
|
+
[...attachmentEvidence, ...retrievedEvidence],
|
|
9321
9776
|
reasonerConfig
|
|
9322
9777
|
);
|
|
9323
9778
|
trackUsage(usage);
|
|
@@ -9331,9 +9786,9 @@ function createQueryAgent(config) {
|
|
|
9331
9786
|
if (result.status === "fulfilled") {
|
|
9332
9787
|
subAnswers.push(result.value);
|
|
9333
9788
|
} else {
|
|
9334
|
-
await log?.(`Reasoner failed for sub-question "${
|
|
9789
|
+
await log?.(`Reasoner failed for sub-question "${subQuestionsToReason[i].question}": ${result.reason}`);
|
|
9335
9790
|
subAnswers.push({
|
|
9336
|
-
subQuestion:
|
|
9791
|
+
subQuestion: subQuestionsToReason[i].question,
|
|
9337
9792
|
answer: "Unable to answer this part of the question due to a processing error.",
|
|
9338
9793
|
citations: [],
|
|
9339
9794
|
confidence: 0,
|