@claritylabs/cl-sdk 0.8.1 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -623
- package/dist/index.d.mts +865 -65
- package/dist/index.d.ts +865 -65
- package/dist/index.js +1536 -386
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +1530 -386
- package/dist/index.mjs.map +1 -1
- package/dist/storage-sqlite.d.mts +114 -24
- package/dist/storage-sqlite.d.ts +114 -24
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -489,7 +489,9 @@ var FormReferenceSchema = z3.object({
|
|
|
489
489
|
formNumber: z3.string(),
|
|
490
490
|
editionDate: z3.string().optional(),
|
|
491
491
|
title: z3.string().optional(),
|
|
492
|
-
formType: z3.enum(["coverage", "endorsement", "declarations", "application", "notice", "other"])
|
|
492
|
+
formType: z3.enum(["coverage", "endorsement", "declarations", "application", "notice", "other"]),
|
|
493
|
+
pageStart: z3.number().optional(),
|
|
494
|
+
pageEnd: z3.number().optional()
|
|
493
495
|
});
|
|
494
496
|
var TaxFeeItemSchema = z3.object({
|
|
495
497
|
name: z3.string(),
|
|
@@ -526,12 +528,25 @@ var NamedInsuredSchema = z3.object({
|
|
|
526
528
|
|
|
527
529
|
// src/schemas/coverage.ts
|
|
528
530
|
import { z as z4 } from "zod";
|
|
531
|
+
var CoverageValueTypeSchema = z4.enum([
|
|
532
|
+
"numeric",
|
|
533
|
+
"included",
|
|
534
|
+
"not_included",
|
|
535
|
+
"as_stated",
|
|
536
|
+
"waiting_period",
|
|
537
|
+
"referential",
|
|
538
|
+
"other"
|
|
539
|
+
]);
|
|
529
540
|
var CoverageSchema = z4.object({
|
|
530
541
|
name: z4.string(),
|
|
531
542
|
limit: z4.string(),
|
|
543
|
+
limitValueType: CoverageValueTypeSchema.optional(),
|
|
532
544
|
deductible: z4.string().optional(),
|
|
545
|
+
deductibleValueType: CoverageValueTypeSchema.optional(),
|
|
546
|
+
formNumber: z4.string().optional(),
|
|
533
547
|
pageNumber: z4.number().optional(),
|
|
534
|
-
sectionRef: z4.string().optional()
|
|
548
|
+
sectionRef: z4.string().optional(),
|
|
549
|
+
originalContent: z4.string().optional()
|
|
535
550
|
});
|
|
536
551
|
var EnrichedCoverageSchema = z4.object({
|
|
537
552
|
name: z4.string(),
|
|
@@ -540,8 +555,10 @@ var EnrichedCoverageSchema = z4.object({
|
|
|
540
555
|
formEditionDate: z4.string().optional(),
|
|
541
556
|
limit: z4.string(),
|
|
542
557
|
limitType: LimitTypeSchema.optional(),
|
|
558
|
+
limitValueType: CoverageValueTypeSchema.optional(),
|
|
543
559
|
deductible: z4.string().optional(),
|
|
544
560
|
deductibleType: DeductibleTypeSchema.optional(),
|
|
561
|
+
deductibleValueType: CoverageValueTypeSchema.optional(),
|
|
545
562
|
sir: z4.string().optional(),
|
|
546
563
|
sublimit: z4.string().optional(),
|
|
547
564
|
coinsurance: z4.string().optional(),
|
|
@@ -552,7 +569,8 @@ var EnrichedCoverageSchema = z4.object({
|
|
|
552
569
|
included: z4.boolean(),
|
|
553
570
|
premium: z4.string().optional(),
|
|
554
571
|
pageNumber: z4.number().optional(),
|
|
555
|
-
sectionRef: z4.string().optional()
|
|
572
|
+
sectionRef: z4.string().optional(),
|
|
573
|
+
originalContent: z4.string().optional()
|
|
556
574
|
});
|
|
557
575
|
|
|
558
576
|
// src/schemas/endorsement.ts
|
|
@@ -1569,6 +1587,7 @@ function assembleDocument(documentId, documentType, memory) {
|
|
|
1569
1587
|
const lossHistory = memory.get("loss_history");
|
|
1570
1588
|
const sections = memory.get("sections");
|
|
1571
1589
|
const supplementary = memory.get("supplementary");
|
|
1590
|
+
const formInventory = memory.get("form_inventory");
|
|
1572
1591
|
const classify = memory.get("classify");
|
|
1573
1592
|
const base = {
|
|
1574
1593
|
id: documentId,
|
|
@@ -1585,6 +1604,7 @@ function assembleDocument(documentId, documentType, memory) {
|
|
|
1585
1604
|
exclusions: exclusions?.exclusions,
|
|
1586
1605
|
conditions: conditions?.conditions,
|
|
1587
1606
|
sections: sections?.sections,
|
|
1607
|
+
formInventory: formInventory?.forms,
|
|
1588
1608
|
declarations: declarations ? sanitizeNulls(declarations) : void 0,
|
|
1589
1609
|
...sanitizeNulls(lossHistory ?? {})
|
|
1590
1610
|
};
|
|
@@ -1826,6 +1846,11 @@ async function formatDocumentContent(doc, generateText, options) {
|
|
|
1826
1846
|
function chunkDocument(doc) {
|
|
1827
1847
|
const chunks = [];
|
|
1828
1848
|
const docId = doc.id;
|
|
1849
|
+
function stringMetadata(entries) {
|
|
1850
|
+
return Object.fromEntries(
|
|
1851
|
+
Object.entries(entries).filter(([, value]) => value !== void 0 && value !== null && String(value).length > 0).map(([key, value]) => [key, String(value)])
|
|
1852
|
+
);
|
|
1853
|
+
}
|
|
1829
1854
|
chunks.push({
|
|
1830
1855
|
id: `${docId}:carrier_info:0`,
|
|
1831
1856
|
documentId: docId,
|
|
@@ -1837,7 +1862,7 @@ function chunkDocument(doc) {
|
|
|
1837
1862
|
doc.carrierAmBestRating ? `AM Best: ${doc.carrierAmBestRating}` : null,
|
|
1838
1863
|
doc.mga ? `MGA: ${doc.mga}` : null
|
|
1839
1864
|
].filter(Boolean).join("\n"),
|
|
1840
|
-
metadata: { carrier: doc.carrier, documentType: doc.type }
|
|
1865
|
+
metadata: stringMetadata({ carrier: doc.carrier, documentType: doc.type })
|
|
1841
1866
|
});
|
|
1842
1867
|
chunks.push({
|
|
1843
1868
|
id: `${docId}:named_insured:0`,
|
|
@@ -1849,17 +1874,32 @@ function chunkDocument(doc) {
|
|
|
1849
1874
|
doc.insuredFein ? `FEIN: ${doc.insuredFein}` : null,
|
|
1850
1875
|
doc.insuredAddress ? `Address: ${doc.insuredAddress.street1}, ${doc.insuredAddress.city}, ${doc.insuredAddress.state} ${doc.insuredAddress.zip}` : null
|
|
1851
1876
|
].filter(Boolean).join("\n"),
|
|
1852
|
-
metadata: { insuredName: doc.insuredName, documentType: doc.type }
|
|
1877
|
+
metadata: stringMetadata({ insuredName: doc.insuredName, documentType: doc.type })
|
|
1853
1878
|
});
|
|
1854
1879
|
doc.coverages.forEach((cov, i) => {
|
|
1855
1880
|
chunks.push({
|
|
1856
1881
|
id: `${docId}:coverage:${i}`,
|
|
1857
1882
|
documentId: docId,
|
|
1858
1883
|
type: "coverage",
|
|
1859
|
-
text:
|
|
1860
|
-
|
|
1861
|
-
|
|
1862
|
-
|
|
1884
|
+
text: [
|
|
1885
|
+
`Coverage: ${cov.name}`,
|
|
1886
|
+
`Limit: ${cov.limit}`,
|
|
1887
|
+
cov.limitValueType ? `Limit Type: ${cov.limitValueType}` : null,
|
|
1888
|
+
cov.deductible ? `Deductible: ${cov.deductible}` : null,
|
|
1889
|
+
cov.deductibleValueType ? `Deductible Type: ${cov.deductibleValueType}` : null,
|
|
1890
|
+
cov.originalContent ? `Source: ${cov.originalContent}` : null
|
|
1891
|
+
].filter(Boolean).join("\n"),
|
|
1892
|
+
metadata: stringMetadata({
|
|
1893
|
+
coverageName: cov.name,
|
|
1894
|
+
limit: cov.limit,
|
|
1895
|
+
limitValueType: cov.limitValueType,
|
|
1896
|
+
deductible: cov.deductible,
|
|
1897
|
+
deductibleValueType: cov.deductibleValueType,
|
|
1898
|
+
formNumber: cov.formNumber,
|
|
1899
|
+
pageNumber: cov.pageNumber,
|
|
1900
|
+
sectionRef: cov.sectionRef,
|
|
1901
|
+
documentType: doc.type
|
|
1902
|
+
})
|
|
1863
1903
|
});
|
|
1864
1904
|
});
|
|
1865
1905
|
doc.endorsements?.forEach((end, i) => {
|
|
@@ -1869,7 +1909,13 @@ Deductible: ${cov.deductible}` : ""}`,
|
|
|
1869
1909
|
type: "endorsement",
|
|
1870
1910
|
text: `Endorsement: ${end.title}
|
|
1871
1911
|
${end.content}`.trim(),
|
|
1872
|
-
metadata: {
|
|
1912
|
+
metadata: stringMetadata({
|
|
1913
|
+
endorsementType: end.endorsementType,
|
|
1914
|
+
formNumber: end.formNumber,
|
|
1915
|
+
pageStart: end.pageStart,
|
|
1916
|
+
pageEnd: end.pageEnd,
|
|
1917
|
+
documentType: doc.type
|
|
1918
|
+
})
|
|
1873
1919
|
});
|
|
1874
1920
|
});
|
|
1875
1921
|
doc.exclusions?.forEach((exc, i) => {
|
|
@@ -1879,7 +1925,7 @@ ${end.content}`.trim(),
|
|
|
1879
1925
|
type: "exclusion",
|
|
1880
1926
|
text: `Exclusion: ${exc.name}
|
|
1881
1927
|
${exc.content}`.trim(),
|
|
1882
|
-
metadata: { documentType: doc.type }
|
|
1928
|
+
metadata: stringMetadata({ formNumber: exc.formNumber, pageNumber: exc.pageNumber, documentType: doc.type })
|
|
1883
1929
|
});
|
|
1884
1930
|
});
|
|
1885
1931
|
doc.sections?.forEach((sec, i) => {
|
|
@@ -1889,7 +1935,7 @@ ${exc.content}`.trim(),
|
|
|
1889
1935
|
type: "section",
|
|
1890
1936
|
text: `Section: ${sec.title}
|
|
1891
1937
|
${sec.content}`,
|
|
1892
|
-
metadata: { sectionType: sec.type, documentType: doc.type }
|
|
1938
|
+
metadata: stringMetadata({ sectionType: sec.type, pageStart: sec.pageStart, pageEnd: sec.pageEnd, documentType: doc.type })
|
|
1893
1939
|
});
|
|
1894
1940
|
});
|
|
1895
1941
|
if (doc.premium) {
|
|
@@ -1899,12 +1945,138 @@ ${sec.content}`,
|
|
|
1899
1945
|
type: "premium",
|
|
1900
1946
|
text: `Premium: ${doc.premium}${doc.totalCost ? `
|
|
1901
1947
|
Total Cost: ${doc.totalCost}` : ""}`,
|
|
1902
|
-
metadata: { premium: doc.premium, documentType: doc.type }
|
|
1948
|
+
metadata: stringMetadata({ premium: doc.premium, documentType: doc.type })
|
|
1903
1949
|
});
|
|
1904
1950
|
}
|
|
1905
1951
|
return chunks;
|
|
1906
1952
|
}
|
|
1907
1953
|
|
|
1954
|
+
// src/extraction/merge.ts
|
|
1955
|
+
function isPresent(value) {
|
|
1956
|
+
if (value === void 0 || value === null) return false;
|
|
1957
|
+
if (typeof value === "string") return value.trim().length > 0;
|
|
1958
|
+
if (Array.isArray(value)) return value.length > 0;
|
|
1959
|
+
return true;
|
|
1960
|
+
}
|
|
1961
|
+
function dedupeByKey(items, keyFn) {
|
|
1962
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1963
|
+
const merged = [];
|
|
1964
|
+
for (const item of items) {
|
|
1965
|
+
const key = keyFn(item);
|
|
1966
|
+
if (seen.has(key)) continue;
|
|
1967
|
+
seen.add(key);
|
|
1968
|
+
merged.push(item);
|
|
1969
|
+
}
|
|
1970
|
+
return merged;
|
|
1971
|
+
}
|
|
1972
|
+
function mergeUniqueObjects(existing, incoming, keyFn) {
|
|
1973
|
+
return dedupeByKey([...existing, ...incoming], keyFn);
|
|
1974
|
+
}
|
|
1975
|
+
function mergeShallowPreferPresent(existing, incoming) {
|
|
1976
|
+
const merged = { ...existing };
|
|
1977
|
+
for (const [key, value] of Object.entries(incoming)) {
|
|
1978
|
+
const current = merged[key];
|
|
1979
|
+
if (Array.isArray(current) && Array.isArray(value)) {
|
|
1980
|
+
merged[key] = [...current, ...value];
|
|
1981
|
+
continue;
|
|
1982
|
+
}
|
|
1983
|
+
if (current && value && typeof current === "object" && typeof value === "object" && !Array.isArray(current) && !Array.isArray(value)) {
|
|
1984
|
+
merged[key] = mergeShallowPreferPresent(
|
|
1985
|
+
current,
|
|
1986
|
+
value
|
|
1987
|
+
);
|
|
1988
|
+
continue;
|
|
1989
|
+
}
|
|
1990
|
+
if (!isPresent(current) && isPresent(value)) {
|
|
1991
|
+
merged[key] = value;
|
|
1992
|
+
}
|
|
1993
|
+
}
|
|
1994
|
+
return merged;
|
|
1995
|
+
}
|
|
1996
|
+
function mergeCoverageLimits(existing, incoming) {
|
|
1997
|
+
const merged = mergeShallowPreferPresent(existing, incoming);
|
|
1998
|
+
const existingCoverages = Array.isArray(existing.coverages) ? existing.coverages : [];
|
|
1999
|
+
const incomingCoverages = Array.isArray(incoming.coverages) ? incoming.coverages : [];
|
|
2000
|
+
const coverageKey = (coverage) => [
|
|
2001
|
+
String(coverage.name ?? "").toLowerCase(),
|
|
2002
|
+
String(coverage.limit ?? "").toLowerCase(),
|
|
2003
|
+
String(coverage.deductible ?? "").toLowerCase(),
|
|
2004
|
+
String(coverage.formNumber ?? "").toLowerCase()
|
|
2005
|
+
].join("|");
|
|
2006
|
+
const byKey = /* @__PURE__ */ new Map();
|
|
2007
|
+
for (const coverage of [...existingCoverages, ...incomingCoverages]) {
|
|
2008
|
+
const key = coverageKey(coverage);
|
|
2009
|
+
const current = byKey.get(key);
|
|
2010
|
+
byKey.set(key, current ? mergeShallowPreferPresent(current, coverage) : coverage);
|
|
2011
|
+
}
|
|
2012
|
+
merged.coverages = [...byKey.values()];
|
|
2013
|
+
return merged;
|
|
2014
|
+
}
|
|
2015
|
+
function mergeDeclarations(existing, incoming) {
|
|
2016
|
+
const merged = mergeShallowPreferPresent(existing, incoming);
|
|
2017
|
+
const existingFields = Array.isArray(existing.fields) ? existing.fields : [];
|
|
2018
|
+
const incomingFields = Array.isArray(incoming.fields) ? incoming.fields : [];
|
|
2019
|
+
merged.fields = mergeUniqueObjects(existingFields, incomingFields, (field) => [
|
|
2020
|
+
String(field.field ?? "").toLowerCase(),
|
|
2021
|
+
String(field.value ?? "").toLowerCase(),
|
|
2022
|
+
String(field.section ?? "").toLowerCase()
|
|
2023
|
+
].join("|"));
|
|
2024
|
+
return merged;
|
|
2025
|
+
}
|
|
2026
|
+
function mergeArrayPayload(existing, incoming, arrayKey, keyFn) {
|
|
2027
|
+
const merged = mergeShallowPreferPresent(existing, incoming);
|
|
2028
|
+
const existingItems = Array.isArray(existing[arrayKey]) ? existing[arrayKey] : [];
|
|
2029
|
+
const incomingItems = Array.isArray(incoming[arrayKey]) ? incoming[arrayKey] : [];
|
|
2030
|
+
merged[arrayKey] = mergeUniqueObjects(existingItems, incomingItems, keyFn);
|
|
2031
|
+
return merged;
|
|
2032
|
+
}
|
|
2033
|
+
function mergeExtractorResult(extractorName, existing, incoming) {
|
|
2034
|
+
if (!existing) return incoming;
|
|
2035
|
+
if (!incoming) return existing;
|
|
2036
|
+
if (typeof existing !== "object" || typeof incoming !== "object") return incoming;
|
|
2037
|
+
const current = existing;
|
|
2038
|
+
const next = incoming;
|
|
2039
|
+
switch (extractorName) {
|
|
2040
|
+
case "carrier_info":
|
|
2041
|
+
case "named_insured":
|
|
2042
|
+
case "loss_history":
|
|
2043
|
+
case "supplementary":
|
|
2044
|
+
case "premium_breakdown":
|
|
2045
|
+
return mergeShallowPreferPresent(current, next);
|
|
2046
|
+
case "coverage_limits":
|
|
2047
|
+
return mergeCoverageLimits(current, next);
|
|
2048
|
+
case "declarations":
|
|
2049
|
+
return mergeDeclarations(current, next);
|
|
2050
|
+
case "endorsements":
|
|
2051
|
+
return mergeArrayPayload(current, next, "endorsements", (item) => [
|
|
2052
|
+
String(item.formNumber ?? "").toLowerCase(),
|
|
2053
|
+
String(item.title ?? "").toLowerCase(),
|
|
2054
|
+
String(item.pageStart ?? "")
|
|
2055
|
+
].join("|"));
|
|
2056
|
+
case "exclusions":
|
|
2057
|
+
return mergeArrayPayload(current, next, "exclusions", (item) => [
|
|
2058
|
+
String(item.name ?? "").toLowerCase(),
|
|
2059
|
+
String(item.formNumber ?? "").toLowerCase(),
|
|
2060
|
+
String(item.pageNumber ?? "")
|
|
2061
|
+
].join("|"));
|
|
2062
|
+
case "conditions":
|
|
2063
|
+
return mergeArrayPayload(current, next, "conditions", (item) => [
|
|
2064
|
+
String(item.name ?? "").toLowerCase(),
|
|
2065
|
+
String(item.conditionType ?? "").toLowerCase(),
|
|
2066
|
+
String(item.pageNumber ?? "")
|
|
2067
|
+
].join("|"));
|
|
2068
|
+
case "sections":
|
|
2069
|
+
return mergeArrayPayload(current, next, "sections", (item) => [
|
|
2070
|
+
String(item.title ?? "").toLowerCase(),
|
|
2071
|
+
String(item.type ?? "").toLowerCase(),
|
|
2072
|
+
String(item.pageStart ?? ""),
|
|
2073
|
+
String(item.pageEnd ?? "")
|
|
2074
|
+
].join("|"));
|
|
2075
|
+
default:
|
|
2076
|
+
return mergeShallowPreferPresent(current, next);
|
|
2077
|
+
}
|
|
2078
|
+
}
|
|
2079
|
+
|
|
1908
2080
|
// src/prompts/templates/homeowners.ts
|
|
1909
2081
|
var HOMEOWNERS_TEMPLATE = {
|
|
1910
2082
|
type: "homeowners",
|
|
@@ -2694,74 +2866,156 @@ Return JSON only:
|
|
|
2694
2866
|
}`;
|
|
2695
2867
|
}
|
|
2696
2868
|
|
|
2697
|
-
// src/prompts/coordinator/
|
|
2869
|
+
// src/prompts/coordinator/form-inventory.ts
|
|
2698
2870
|
import { z as z19 } from "zod";
|
|
2699
|
-
var
|
|
2700
|
-
|
|
2701
|
-
|
|
2702
|
-
|
|
2703
|
-
description: z19.string()
|
|
2871
|
+
var FormInventoryEntrySchema = FormReferenceSchema.extend({
|
|
2872
|
+
formNumber: FormReferenceSchema.shape.formNumber.describe("Form number or identifier, e.g. PR5070CF"),
|
|
2873
|
+
pageStart: FormReferenceSchema.shape.pageStart.describe("Original document page where the form begins"),
|
|
2874
|
+
pageEnd: FormReferenceSchema.shape.pageEnd.describe("Original document page where the form ends")
|
|
2704
2875
|
});
|
|
2705
|
-
var
|
|
2706
|
-
|
|
2707
|
-
pages: z19.string()
|
|
2876
|
+
var FormInventorySchema = z19.object({
|
|
2877
|
+
forms: z19.array(FormInventoryEntrySchema)
|
|
2708
2878
|
});
|
|
2709
|
-
|
|
2710
|
-
|
|
2711
|
-
pageMap: z19.array(PageMapEntrySchema).optional()
|
|
2712
|
-
});
|
|
2713
|
-
function buildPlanPrompt(templateHints) {
|
|
2714
|
-
return `You are planning the extraction of an insurance document. You have already classified this document. Now scan the full document and create a page map + extraction plan.
|
|
2879
|
+
function buildFormInventoryPrompt(templateHints) {
|
|
2880
|
+
return `You are building a form inventory for an insurance document.
|
|
2715
2881
|
|
|
2716
2882
|
DOCUMENT TYPE HINTS:
|
|
2717
2883
|
${templateHints}
|
|
2718
2884
|
|
|
2719
|
-
|
|
2885
|
+
Extract every distinct declarations page set, policy form, coverage form, endorsement, application form, and notice form that appears in the document.
|
|
2886
|
+
|
|
2887
|
+
For EACH form, extract:
|
|
2888
|
+
- formNumber: REQUIRED when present
|
|
2889
|
+
- editionDate: if shown
|
|
2890
|
+
- title: if shown
|
|
2891
|
+
- formType: one of coverage, endorsement, declarations, application, notice, other
|
|
2892
|
+
- pageStart: original page where the form begins
|
|
2893
|
+
- pageEnd: original page where the form ends
|
|
2894
|
+
|
|
2895
|
+
Critical rules:
|
|
2896
|
+
- Include declarations page sets even if they do not show a standard form number.
|
|
2897
|
+
- Use original document page numbers, not local chunk page numbers.
|
|
2898
|
+
- Do not emit duplicate entries for repeated headers/footers.
|
|
2899
|
+
- Multi-page forms should be represented once with pageStart/pageEnd covering the full span when visible.
|
|
2900
|
+
- If a form number is visible in endorsements, schedules, or form headers, include it even if the full form title is partial.
|
|
2901
|
+
|
|
2902
|
+
Respond with JSON only.`;
|
|
2903
|
+
}
|
|
2904
|
+
|
|
2905
|
+
// src/prompts/coordinator/page-map.ts
|
|
2906
|
+
import { z as z20 } from "zod";
|
|
2907
|
+
var PageExtractorSchema = z20.enum([
|
|
2908
|
+
"carrier_info",
|
|
2909
|
+
"named_insured",
|
|
2910
|
+
"coverage_limits",
|
|
2911
|
+
"endorsements",
|
|
2912
|
+
"exclusions",
|
|
2913
|
+
"conditions",
|
|
2914
|
+
"premium_breakdown",
|
|
2915
|
+
"declarations",
|
|
2916
|
+
"loss_history",
|
|
2917
|
+
"sections",
|
|
2918
|
+
"supplementary"
|
|
2919
|
+
]);
|
|
2920
|
+
var PageAssignmentSchema = z20.object({
|
|
2921
|
+
localPageNumber: z20.number().int().positive().describe("1-based page number within this supplied PDF chunk"),
|
|
2922
|
+
extractorNames: z20.array(PageExtractorSchema).describe("Focused extractors that should inspect this page"),
|
|
2923
|
+
pageRole: z20.enum([
|
|
2924
|
+
"declarations_schedule",
|
|
2925
|
+
"endorsement_schedule",
|
|
2926
|
+
"policy_form",
|
|
2927
|
+
"endorsement_form",
|
|
2928
|
+
"condition_exclusion_form",
|
|
2929
|
+
"supplementary",
|
|
2930
|
+
"other"
|
|
2931
|
+
]).optional().describe("Primary role of the page"),
|
|
2932
|
+
hasScheduleValues: z20.boolean().optional().describe("True only when the page contains insured-specific declaration or schedule values, tables, or rows to extract"),
|
|
2933
|
+
confidence: z20.number().min(0).max(1).optional().describe("Confidence in the page assignment"),
|
|
2934
|
+
notes: z20.string().optional().describe("Short explanation of what appears on the page")
|
|
2935
|
+
});
|
|
2936
|
+
var PageMapChunkSchema = z20.object({
|
|
2937
|
+
pages: z20.array(PageAssignmentSchema)
|
|
2938
|
+
});
|
|
2939
|
+
function buildPageMapPrompt(templateHints, startPage, endPage, formInventoryHint) {
|
|
2940
|
+
const inventoryBlock = formInventoryHint ? `
|
|
2941
|
+
FORM INVENTORY (already identified \u2014 use this to constrain your assignments):
|
|
2942
|
+
${formInventoryHint}
|
|
2943
|
+
` : "";
|
|
2944
|
+
return `You are mapping insurance document pages to focused extractors.
|
|
2945
|
+
|
|
2946
|
+
These supplied pages are ORIGINAL DOCUMENT PAGES ${startPage}-${endPage}.
|
|
2947
|
+
|
|
2948
|
+
DOCUMENT TYPE HINTS:
|
|
2949
|
+
${templateHints}
|
|
2950
|
+
${inventoryBlock}
|
|
2951
|
+
For each page in this supplied PDF chunk, decide which extractor(s) should inspect it.
|
|
2720
2952
|
|
|
2721
2953
|
Available extractors:
|
|
2722
|
-
- carrier_info
|
|
2723
|
-
- named_insured
|
|
2724
|
-
- coverage_limits
|
|
2725
|
-
- endorsements
|
|
2726
|
-
- exclusions
|
|
2727
|
-
- conditions
|
|
2728
|
-
- premium_breakdown
|
|
2729
|
-
- declarations
|
|
2730
|
-
- loss_history
|
|
2731
|
-
- sections
|
|
2732
|
-
- supplementary
|
|
2954
|
+
- carrier_info
|
|
2955
|
+
- named_insured
|
|
2956
|
+
- coverage_limits
|
|
2957
|
+
- endorsements
|
|
2958
|
+
- exclusions
|
|
2959
|
+
- conditions
|
|
2960
|
+
- premium_breakdown
|
|
2961
|
+
- declarations
|
|
2962
|
+
- loss_history
|
|
2963
|
+
- sections
|
|
2964
|
+
- supplementary
|
|
2965
|
+
|
|
2966
|
+
Rules:
|
|
2967
|
+
- Use specific extractors for declarations, schedules, endorsements, exclusions, conditions, premium pages, and loss runs.
|
|
2968
|
+
- Use "sections" for pages that contain substantive policy text or mixed content that should still be preserved as raw sections.
|
|
2969
|
+
- Avoid assigning broad ranges mentally; decide page by page.
|
|
2970
|
+
- A page may map to multiple extractors if it legitimately contains multiple relevant sections.
|
|
2971
|
+
- Prefer declarations and schedules for numeric limits/deductibles over later generic form wording.
|
|
2972
|
+
- Assign "coverage_limits" only when the page itself contains insured-specific declaration or schedule values to capture, such as location/building rows, coverage tables, limits, deductibles, coinsurance percentages, or scheduled amounts tied to this policy.
|
|
2973
|
+
- Do NOT assign "coverage_limits" for generic policy-form or endorsement text that merely explains how limits, deductibles, waiting periods, or coinsurance work, or that says values are "shown in the declarations", "shown in the schedule", "as stated", or "if applicable".
|
|
2974
|
+
- Headings like "Limits of Insurance", "Deductible", "Coinsurance", "Loss Conditions", or "Definitions" inside a policy form usually indicate form language, not declarations or schedules.
|
|
2975
|
+
- Continuation pages near the end of a form should stay mapped to "sections" plus "conditions"/"exclusions" when applicable, even if they mention limits or deductibles.
|
|
2976
|
+
- When a form inventory entry identifies a page range as a specific form type (e.g., endorsement, coverage, application), use that classification to guide your extractor choice. Do not assign "coverage_limits" to pages the inventory identifies as endorsement or condition/exclusion forms unless the page contains actual schedule values.
|
|
2977
|
+
- Return every page in the supplied chunk exactly once.
|
|
2733
2978
|
|
|
2734
2979
|
Return JSON:
|
|
2735
2980
|
{
|
|
2736
|
-
"
|
|
2737
|
-
{
|
|
2738
|
-
|
|
2739
|
-
|
|
2740
|
-
|
|
2741
|
-
|
|
2742
|
-
|
|
2981
|
+
"pages": [
|
|
2982
|
+
{
|
|
2983
|
+
"localPageNumber": 1,
|
|
2984
|
+
"extractorNames": ["declarations", "carrier_info", "named_insured", "coverage_limits"],
|
|
2985
|
+
"pageRole": "declarations_schedule",
|
|
2986
|
+
"hasScheduleValues": true,
|
|
2987
|
+
"confidence": 0.96,
|
|
2988
|
+
"notes": "Declarations page with insured, policy period, and scheduled limits"
|
|
2989
|
+
}
|
|
2743
2990
|
]
|
|
2744
2991
|
}
|
|
2745
2992
|
|
|
2746
|
-
Create tasks that cover the entire document. Prefer specific extractors over generic "sections" where possible. Keep page ranges tight \u2014 only include pages relevant to each extractor.
|
|
2747
|
-
|
|
2748
2993
|
Respond with JSON only.`;
|
|
2749
2994
|
}
|
|
2995
|
+
function formatFormInventoryForPageMap(forms) {
|
|
2996
|
+
if (forms.length === 0) return "";
|
|
2997
|
+
return forms.filter((f) => f.pageStart != null).map((f) => {
|
|
2998
|
+
const range = f.pageEnd && f.pageEnd !== f.pageStart ? `pages ${f.pageStart}-${f.pageEnd}` : `page ${f.pageStart}`;
|
|
2999
|
+
const title = f.title ? ` "${f.title}"` : "";
|
|
3000
|
+
return `- ${f.formNumber}${title} [${f.formType}] \u2192 ${range}`;
|
|
3001
|
+
}).join("\n");
|
|
3002
|
+
}
|
|
2750
3003
|
|
|
2751
3004
|
// src/prompts/coordinator/review.ts
|
|
2752
|
-
import { z as
|
|
2753
|
-
var ReviewResultSchema =
|
|
2754
|
-
complete:
|
|
2755
|
-
missingFields:
|
|
2756
|
-
|
|
2757
|
-
|
|
2758
|
-
|
|
2759
|
-
|
|
2760
|
-
|
|
3005
|
+
import { z as z21 } from "zod";
|
|
3006
|
+
var ReviewResultSchema = z21.object({
|
|
3007
|
+
complete: z21.boolean(),
|
|
3008
|
+
missingFields: z21.array(z21.string()),
|
|
3009
|
+
qualityIssues: z21.array(z21.string()).optional(),
|
|
3010
|
+
additionalTasks: z21.array(z21.object({
|
|
3011
|
+
extractorName: z21.string(),
|
|
3012
|
+
startPage: z21.number(),
|
|
3013
|
+
endPage: z21.number(),
|
|
3014
|
+
description: z21.string()
|
|
2761
3015
|
}))
|
|
2762
3016
|
});
|
|
2763
|
-
function buildReviewPrompt(templateExpected, extractedKeys) {
|
|
2764
|
-
return `You are reviewing an extraction for completeness. Compare what was expected vs what was found.
|
|
3017
|
+
function buildReviewPrompt(templateExpected, extractedKeys, extractionSummary, pageMapSummary) {
|
|
3018
|
+
return `You are reviewing an extraction for completeness and quality. Compare what was expected vs what was found.
|
|
2765
3019
|
|
|
2766
3020
|
EXPECTED FIELDS (from document type template):
|
|
2767
3021
|
${templateExpected.map((f) => `- ${f}`).join("\n")}
|
|
@@ -2769,40 +3023,55 @@ ${templateExpected.map((f) => `- ${f}`).join("\n")}
|
|
|
2769
3023
|
FIELDS ALREADY EXTRACTED:
|
|
2770
3024
|
${extractedKeys.map((f) => `- ${f}`).join("\n")}
|
|
2771
3025
|
|
|
3026
|
+
PAGE MAP SUMMARY:
|
|
3027
|
+
${pageMapSummary}
|
|
3028
|
+
|
|
3029
|
+
CURRENT EXTRACTION SUMMARY:
|
|
3030
|
+
${extractionSummary}
|
|
3031
|
+
|
|
2772
3032
|
Determine:
|
|
2773
|
-
1. Is the extraction complete enough?
|
|
3033
|
+
1. Is the extraction complete enough?
|
|
2774
3034
|
2. What fields are missing?
|
|
2775
|
-
3.
|
|
3035
|
+
3. What quality issues are present?
|
|
3036
|
+
4. Should any additional extraction tasks be dispatched?
|
|
3037
|
+
|
|
3038
|
+
Mark the extraction as NOT complete if any of these are true:
|
|
3039
|
+
- required fields are missing
|
|
3040
|
+
- extracted values are generic placeholders like "shown in declarations", "per schedule", "if applicable", "as stated"
|
|
3041
|
+
- coverage limits or deductibles appear to come from generic form language instead of declaration/schedule-specific values
|
|
3042
|
+
- page assignments suggest declaration, schedule, endorsement, exclusion, or condition pages were not actually extracted with the matching focused extractor
|
|
3043
|
+
- a focused extractor exists but returned too little substance for the relevant pages
|
|
2776
3044
|
|
|
2777
3045
|
Return JSON:
|
|
2778
3046
|
{
|
|
2779
3047
|
"complete": boolean,
|
|
2780
3048
|
"missingFields": ["field1", "field2"],
|
|
3049
|
+
"qualityIssues": ["issue 1", "issue 2"],
|
|
2781
3050
|
"additionalTasks": [
|
|
2782
3051
|
{ "extractorName": "...", "startPage": N, "endPage": N, "description": "..." }
|
|
2783
3052
|
]
|
|
2784
3053
|
}
|
|
2785
3054
|
|
|
2786
|
-
|
|
3055
|
+
Use the page map to target follow-up extraction pages precisely. Prefer narrow, declaration/schedule-focused follow-up tasks over broad page ranges.
|
|
2787
3056
|
|
|
2788
3057
|
Respond with JSON only.`;
|
|
2789
3058
|
}
|
|
2790
3059
|
|
|
2791
3060
|
// src/prompts/extractors/carrier-info.ts
|
|
2792
|
-
import { z as
|
|
2793
|
-
var CarrierInfoSchema =
|
|
2794
|
-
carrierName:
|
|
2795
|
-
carrierLegalName:
|
|
2796
|
-
naicNumber:
|
|
2797
|
-
amBestRating:
|
|
2798
|
-
admittedStatus:
|
|
2799
|
-
mga:
|
|
2800
|
-
underwriter:
|
|
2801
|
-
policyNumber:
|
|
2802
|
-
effectiveDate:
|
|
2803
|
-
expirationDate:
|
|
2804
|
-
quoteNumber:
|
|
2805
|
-
proposedEffectiveDate:
|
|
3061
|
+
import { z as z22 } from "zod";
|
|
3062
|
+
var CarrierInfoSchema = z22.object({
|
|
3063
|
+
carrierName: z22.string().describe("Primary insurance company name for display"),
|
|
3064
|
+
carrierLegalName: z22.string().optional().describe("Legal entity name of insurer"),
|
|
3065
|
+
naicNumber: z22.string().optional().describe("NAIC company code"),
|
|
3066
|
+
amBestRating: z22.string().optional().describe("AM Best rating, e.g. 'A+ XV'"),
|
|
3067
|
+
admittedStatus: z22.enum(["admitted", "non_admitted", "surplus_lines"]).optional().describe("Admitted status of the carrier"),
|
|
3068
|
+
mga: z22.string().optional().describe("Managing General Agent or Program Administrator name"),
|
|
3069
|
+
underwriter: z22.string().optional().describe("Named individual underwriter"),
|
|
3070
|
+
policyNumber: z22.string().optional().describe("Policy or quote reference number"),
|
|
3071
|
+
effectiveDate: z22.string().optional().describe("Policy effective date (MM/DD/YYYY)"),
|
|
3072
|
+
expirationDate: z22.string().optional().describe("Policy expiration date (MM/DD/YYYY)"),
|
|
3073
|
+
quoteNumber: z22.string().optional().describe("Quote or proposal reference number"),
|
|
3074
|
+
proposedEffectiveDate: z22.string().optional().describe("Proposed effective date for quotes (MM/DD/YYYY)")
|
|
2806
3075
|
});
|
|
2807
3076
|
function buildCarrierInfoPrompt() {
|
|
2808
3077
|
return `You are an expert insurance document analyst. Extract carrier and policy identification information from this document.
|
|
@@ -2822,18 +3091,18 @@ Return JSON only.`;
|
|
|
2822
3091
|
}
|
|
2823
3092
|
|
|
2824
3093
|
// src/prompts/extractors/named-insured.ts
|
|
2825
|
-
import { z as
|
|
2826
|
-
var AddressSchema2 =
|
|
2827
|
-
street1:
|
|
2828
|
-
city:
|
|
2829
|
-
state:
|
|
2830
|
-
zip:
|
|
3094
|
+
import { z as z23 } from "zod";
|
|
3095
|
+
var AddressSchema2 = z23.object({
|
|
3096
|
+
street1: z23.string(),
|
|
3097
|
+
city: z23.string(),
|
|
3098
|
+
state: z23.string(),
|
|
3099
|
+
zip: z23.string()
|
|
2831
3100
|
});
|
|
2832
|
-
var NamedInsuredSchema2 =
|
|
2833
|
-
insuredName:
|
|
2834
|
-
insuredDba:
|
|
3101
|
+
var NamedInsuredSchema2 = z23.object({
|
|
3102
|
+
insuredName: z23.string().describe("Name of primary named insured"),
|
|
3103
|
+
insuredDba: z23.string().optional().describe("Doing-business-as name"),
|
|
2835
3104
|
insuredAddress: AddressSchema2.optional().describe("Primary insured mailing address"),
|
|
2836
|
-
insuredEntityType:
|
|
3105
|
+
insuredEntityType: z23.enum([
|
|
2837
3106
|
"corporation",
|
|
2838
3107
|
"llc",
|
|
2839
3108
|
"partnership",
|
|
@@ -2846,13 +3115,13 @@ var NamedInsuredSchema2 = z22.object({
|
|
|
2846
3115
|
"married_couple",
|
|
2847
3116
|
"other"
|
|
2848
3117
|
]).optional().describe("Legal entity type of the insured"),
|
|
2849
|
-
insuredFein:
|
|
2850
|
-
insuredSicCode:
|
|
2851
|
-
insuredNaicsCode:
|
|
2852
|
-
additionalNamedInsureds:
|
|
2853
|
-
|
|
2854
|
-
name:
|
|
2855
|
-
relationship:
|
|
3118
|
+
insuredFein: z23.string().optional().describe("Federal Employer Identification Number"),
|
|
3119
|
+
insuredSicCode: z23.string().optional().describe("SIC code"),
|
|
3120
|
+
insuredNaicsCode: z23.string().optional().describe("NAICS code"),
|
|
3121
|
+
additionalNamedInsureds: z23.array(
|
|
3122
|
+
z23.object({
|
|
3123
|
+
name: z23.string(),
|
|
3124
|
+
relationship: z23.string().optional().describe("e.g. subsidiary, affiliate"),
|
|
2856
3125
|
address: AddressSchema2.optional()
|
|
2857
3126
|
})
|
|
2858
3127
|
).optional().describe("Additional named insureds listed on the policy")
|
|
@@ -2873,23 +3142,20 @@ Return JSON only.`;
|
|
|
2873
3142
|
}
|
|
2874
3143
|
|
|
2875
3144
|
// src/prompts/extractors/coverage-limits.ts
|
|
2876
|
-
import { z as
|
|
2877
|
-
var
|
|
2878
|
-
|
|
2879
|
-
|
|
2880
|
-
|
|
2881
|
-
|
|
2882
|
-
|
|
2883
|
-
|
|
2884
|
-
formNumber: z23.string().optional().describe("Associated form number, e.g. 'CG 00 01'")
|
|
2885
|
-
})
|
|
2886
|
-
).describe("All coverages with their limits"),
|
|
2887
|
-
coverageForm: z23.enum(["occurrence", "claims_made", "accident"]).optional().describe("Primary coverage trigger type"),
|
|
2888
|
-
retroactiveDate: z23.string().optional().describe("Retroactive date for claims-made policies (MM/DD/YYYY)")
|
|
3145
|
+
import { z as z24 } from "zod";
|
|
3146
|
+
var ExtractorCoverageSchema = CoverageSchema.extend({
|
|
3147
|
+
coverageCode: z24.string().optional().describe("Coverage code or class code")
|
|
3148
|
+
});
|
|
3149
|
+
var CoverageLimitsSchema = z24.object({
|
|
3150
|
+
coverages: z24.array(ExtractorCoverageSchema).describe("All coverages with their limits"),
|
|
3151
|
+
coverageForm: z24.enum(["occurrence", "claims_made", "accident"]).optional().describe("Primary coverage trigger type"),
|
|
3152
|
+
retroactiveDate: z24.string().optional().describe("Retroactive date for claims-made policies (MM/DD/YYYY)")
|
|
2889
3153
|
});
|
|
2890
3154
|
function buildCoverageLimitsPrompt() {
|
|
2891
3155
|
return `You are an expert insurance document analyst. Extract all coverage limits and deductibles from this document.
|
|
2892
3156
|
|
|
3157
|
+
Extract only insured-specific declaration, schedule, or endorsement entries that state actual coverage terms for this policy.
|
|
3158
|
+
|
|
2893
3159
|
Focus on:
|
|
2894
3160
|
- Every coverage listed on the declarations page or coverage schedule
|
|
2895
3161
|
- Per-occurrence, aggregate, and sub-limits for each coverage
|
|
@@ -2900,20 +3166,34 @@ Focus on:
|
|
|
2900
3166
|
- Standard limit fields: per occurrence, general aggregate, products/completed ops aggregate, personal & advertising injury, fire damage, medical expense, combined single limit, BI/PD splits, umbrella each occurrence/aggregate/retention, statutory (WC), employers liability
|
|
2901
3167
|
- Defense cost treatment: inside limits, outside limits, or supplementary
|
|
2902
3168
|
|
|
2903
|
-
|
|
3169
|
+
For EACH coverage, also extract:
|
|
3170
|
+
- pageNumber: the original page number where the coverage row/value appears
|
|
3171
|
+
- sectionRef: the declarations/schedule/endorsement section heading where it appears
|
|
3172
|
+
- originalContent: the verbatim row or short source snippet used for this coverage
|
|
3173
|
+
- limitValueType: classify the limit as numeric, included, not_included, as_stated, waiting_period, referential, or other
|
|
3174
|
+
- deductibleValueType: classify the deductible/value term similarly when deductible is present
|
|
3175
|
+
|
|
3176
|
+
Critical rules:
|
|
3177
|
+
- Do not extract table-of-contents lines, index entries, headers, footers, page labels, or cross-references as coverages.
|
|
3178
|
+
- Do not create a coverage entry from generic policy-form text that only says a limit/deductible is "shown in the declarations", "shown in the Business Income Declarations", "as stated", "if applicable", or similar referential wording.
|
|
3179
|
+
- Do not treat a generic waiting period, deductible explanation, limits clause, coinsurance clause, or definitions text as a standalone coverage unless the page contains an actual policy-specific schedule row or declaration entry.
|
|
3180
|
+
- Values like "Included" or "Not Included" are valid only when they appear as an explicit declarations/schedule/endorsement entry for a named coverage. Do not infer them from narrative form language.
|
|
3181
|
+
- If a waiting period or hour deductible is shown as part of a specific declarations/schedule row, it may be captured in deductible. Otherwise omit it.
|
|
3182
|
+
- Use limitValueType or deductibleValueType to preserve non-numeric terms precisely instead of forcing them into numeric semantics.
|
|
3183
|
+
- Preserve one row per real coverage entry. Do not merge adjacent schedule rows into malformed names.
|
|
2904
3184
|
|
|
2905
3185
|
Return JSON only.`;
|
|
2906
3186
|
}
|
|
2907
3187
|
|
|
2908
3188
|
// src/prompts/extractors/endorsements.ts
|
|
2909
|
-
import { z as
|
|
2910
|
-
var EndorsementsSchema =
|
|
2911
|
-
endorsements:
|
|
2912
|
-
|
|
2913
|
-
formNumber:
|
|
2914
|
-
editionDate:
|
|
2915
|
-
title:
|
|
2916
|
-
endorsementType:
|
|
3189
|
+
import { z as z25 } from "zod";
|
|
3190
|
+
var EndorsementsSchema = z25.object({
|
|
3191
|
+
endorsements: z25.array(
|
|
3192
|
+
z25.object({
|
|
3193
|
+
formNumber: z25.string().describe("Form number, e.g. 'CG 21 47'"),
|
|
3194
|
+
editionDate: z25.string().optional().describe("Edition date, e.g. '12 07'"),
|
|
3195
|
+
title: z25.string().describe("Endorsement title"),
|
|
3196
|
+
endorsementType: z25.enum([
|
|
2917
3197
|
"additional_insured",
|
|
2918
3198
|
"waiver_of_subrogation",
|
|
2919
3199
|
"primary_noncontributory",
|
|
@@ -2933,12 +3213,12 @@ var EndorsementsSchema = z24.object({
|
|
|
2933
3213
|
"territorial_extension",
|
|
2934
3214
|
"other"
|
|
2935
3215
|
]).describe("Endorsement type classification"),
|
|
2936
|
-
effectiveDate:
|
|
2937
|
-
affectedCoverageParts:
|
|
2938
|
-
namedParties:
|
|
2939
|
-
|
|
2940
|
-
name:
|
|
2941
|
-
role:
|
|
3216
|
+
effectiveDate: z25.string().optional().describe("Endorsement effective date"),
|
|
3217
|
+
affectedCoverageParts: z25.array(z25.string()).optional().describe("Coverage parts affected by this endorsement"),
|
|
3218
|
+
namedParties: z25.array(
|
|
3219
|
+
z25.object({
|
|
3220
|
+
name: z25.string().describe("Party name"),
|
|
3221
|
+
role: z25.enum([
|
|
2942
3222
|
"additional_insured",
|
|
2943
3223
|
"loss_payee",
|
|
2944
3224
|
"mortgage_holder",
|
|
@@ -2947,15 +3227,15 @@ var EndorsementsSchema = z24.object({
|
|
|
2947
3227
|
"designated_person",
|
|
2948
3228
|
"other"
|
|
2949
3229
|
]).describe("Party role"),
|
|
2950
|
-
relationship:
|
|
2951
|
-
scope:
|
|
3230
|
+
relationship: z25.string().optional().describe("Relationship to insured"),
|
|
3231
|
+
scope: z25.string().optional().describe("Scope of coverage for this party")
|
|
2952
3232
|
})
|
|
2953
3233
|
).optional().describe("Named parties (additional insureds, loss payees, etc.)"),
|
|
2954
|
-
keyTerms:
|
|
2955
|
-
premiumImpact:
|
|
2956
|
-
content:
|
|
2957
|
-
pageStart:
|
|
2958
|
-
pageEnd:
|
|
3234
|
+
keyTerms: z25.array(z25.string()).optional().describe("Key terms or notable provisions in the endorsement"),
|
|
3235
|
+
premiumImpact: z25.string().optional().describe("Additional premium or credit"),
|
|
3236
|
+
content: z25.string().describe("Full verbatim text of the endorsement"),
|
|
3237
|
+
pageStart: z25.number().describe("Starting page number of this endorsement"),
|
|
3238
|
+
pageEnd: z25.number().optional().describe("Ending page number of this endorsement")
|
|
2959
3239
|
})
|
|
2960
3240
|
).describe("All endorsements found in the document")
|
|
2961
3241
|
});
|
|
@@ -2986,20 +3266,20 @@ Return JSON only.`;
|
|
|
2986
3266
|
}
|
|
2987
3267
|
|
|
2988
3268
|
// src/prompts/extractors/exclusions.ts
|
|
2989
|
-
import { z as
|
|
2990
|
-
var ExclusionsSchema =
|
|
2991
|
-
exclusions:
|
|
2992
|
-
|
|
2993
|
-
name:
|
|
2994
|
-
formNumber:
|
|
2995
|
-
excludedPerils:
|
|
2996
|
-
isAbsolute:
|
|
2997
|
-
exceptions:
|
|
2998
|
-
buybackAvailable:
|
|
2999
|
-
buybackEndorsement:
|
|
3000
|
-
appliesTo:
|
|
3001
|
-
content:
|
|
3002
|
-
pageNumber:
|
|
3269
|
+
import { z as z26 } from "zod";
|
|
3270
|
+
var ExclusionsSchema = z26.object({
|
|
3271
|
+
exclusions: z26.array(
|
|
3272
|
+
z26.object({
|
|
3273
|
+
name: z26.string().describe("Exclusion title or short description"),
|
|
3274
|
+
formNumber: z26.string().optional().describe("Form number if part of a named endorsement"),
|
|
3275
|
+
excludedPerils: z26.array(z26.string()).optional().describe("Specific perils excluded"),
|
|
3276
|
+
isAbsolute: z26.boolean().optional().describe("Whether the exclusion is absolute (no exceptions)"),
|
|
3277
|
+
exceptions: z26.array(z26.string()).optional().describe("Exceptions to the exclusion, if any"),
|
|
3278
|
+
buybackAvailable: z26.boolean().optional().describe("Whether coverage can be bought back via endorsement"),
|
|
3279
|
+
buybackEndorsement: z26.string().optional().describe("Form number of the buyback endorsement if available"),
|
|
3280
|
+
appliesTo: z26.array(z26.string()).optional().describe("Coverage types this exclusion applies to"),
|
|
3281
|
+
content: z26.string().describe("Full verbatim exclusion text"),
|
|
3282
|
+
pageNumber: z26.number().optional().describe("Page number where exclusion appears")
|
|
3003
3283
|
})
|
|
3004
3284
|
).describe("All exclusions found in the document")
|
|
3005
3285
|
});
|
|
@@ -3024,18 +3304,23 @@ Focus on:
|
|
|
3024
3304
|
- Exclusions within insuring agreements or conditions if clearly labeled
|
|
3025
3305
|
- Full verbatim exclusion text \u2014 do not summarize
|
|
3026
3306
|
|
|
3307
|
+
Critical rules:
|
|
3308
|
+
- Ignore table-of-contents entries, running headers/footers, and references that only point to another page or section.
|
|
3309
|
+
- Do not emit a standalone exclusion from a fragment unless the fragment itself contains substantive exclusion wording.
|
|
3310
|
+
- Always include pageNumber when the exclusion appears on a specific page in the supplied document chunk.
|
|
3311
|
+
|
|
3027
3312
|
Common personal lines exclusion patterns: animal liability, business pursuits, home daycare, watercraft, aircraft.
|
|
3028
3313
|
|
|
3029
3314
|
Return JSON only.`;
|
|
3030
3315
|
}
|
|
3031
3316
|
|
|
3032
3317
|
// src/prompts/extractors/conditions.ts
|
|
3033
|
-
import { z as
|
|
3034
|
-
var ConditionsSchema =
|
|
3035
|
-
conditions:
|
|
3036
|
-
|
|
3037
|
-
name:
|
|
3038
|
-
conditionType:
|
|
3318
|
+
import { z as z27 } from "zod";
|
|
3319
|
+
var ConditionsSchema = z27.object({
|
|
3320
|
+
conditions: z27.array(
|
|
3321
|
+
z27.object({
|
|
3322
|
+
name: z27.string().describe("Condition title"),
|
|
3323
|
+
conditionType: z27.enum([
|
|
3039
3324
|
"duties_after_loss",
|
|
3040
3325
|
"notice_requirements",
|
|
3041
3326
|
"other_insurance",
|
|
@@ -3054,14 +3339,14 @@ var ConditionsSchema = z26.object({
|
|
|
3054
3339
|
"separation_of_insureds",
|
|
3055
3340
|
"other"
|
|
3056
3341
|
]).describe("Condition category"),
|
|
3057
|
-
content:
|
|
3058
|
-
keyValues:
|
|
3059
|
-
|
|
3060
|
-
key:
|
|
3061
|
-
value:
|
|
3342
|
+
content: z27.string().describe("Full verbatim condition text"),
|
|
3343
|
+
keyValues: z27.array(
|
|
3344
|
+
z27.object({
|
|
3345
|
+
key: z27.string().describe("Key name (e.g. 'noticePeriod', 'suitDeadline')"),
|
|
3346
|
+
value: z27.string().describe("Value (e.g. '30 days', '2 years')")
|
|
3062
3347
|
})
|
|
3063
3348
|
).optional().describe("Key values extracted from the condition (notice periods, deadlines, etc.)"),
|
|
3064
|
-
pageNumber:
|
|
3349
|
+
pageNumber: z27.number().optional().describe("Page number where condition appears")
|
|
3065
3350
|
})
|
|
3066
3351
|
).describe("All policy conditions found in the document")
|
|
3067
3352
|
});
|
|
@@ -3073,7 +3358,7 @@ For EACH condition, extract:
|
|
|
3073
3358
|
- conditionType: classify as one of: duties_after_loss, notice_requirements, other_insurance, cancellation, nonrenewal, transfer_of_rights, liberalization, arbitration, concealment_fraud, examination_under_oath, legal_action, loss_payment, appraisal, mortgage_holders, policy_territory, separation_of_insureds, other \u2014 REQUIRED
|
|
3074
3359
|
- content: full verbatim condition text \u2014 REQUIRED
|
|
3075
3360
|
- keyValues: extract specific values as key-value pairs (e.g. noticePeriod: "30 days", suitDeadline: "2 years")
|
|
3076
|
-
- pageNumber: page number where the condition appears
|
|
3361
|
+
- pageNumber: original document page number where the substantive condition text appears
|
|
3077
3362
|
|
|
3078
3363
|
Focus on:
|
|
3079
3364
|
- Duties after loss / notice of occurrence conditions
|
|
@@ -3090,32 +3375,37 @@ Focus on:
|
|
|
3090
3375
|
- Mortgage holders clause
|
|
3091
3376
|
- Any other named conditions
|
|
3092
3377
|
|
|
3378
|
+
Critical rules:
|
|
3379
|
+
- Ignore table-of-contents entries, section indexes, running headers/footers, and page references such as "Appraisal ..... 19".
|
|
3380
|
+
- Do not emit a condition unless the page contains substantive condition text, not just a heading or reference.
|
|
3381
|
+
- If a condition continues from a prior page, keep the substantive text together and use the page where the condition text appears in this extracted chunk.
|
|
3382
|
+
|
|
3093
3383
|
Return JSON only.`;
|
|
3094
3384
|
}
|
|
3095
3385
|
|
|
3096
3386
|
// src/prompts/extractors/premium-breakdown.ts
|
|
3097
|
-
import { z as
|
|
3098
|
-
var PremiumBreakdownSchema =
|
|
3099
|
-
premium:
|
|
3100
|
-
totalCost:
|
|
3101
|
-
premiumBreakdown:
|
|
3102
|
-
|
|
3103
|
-
line:
|
|
3104
|
-
amount:
|
|
3387
|
+
import { z as z28 } from "zod";
|
|
3388
|
+
var PremiumBreakdownSchema = z28.object({
|
|
3389
|
+
premium: z28.string().optional().describe("Total premium amount, e.g. '$5,000'"),
|
|
3390
|
+
totalCost: z28.string().optional().describe("Total cost including taxes and fees, e.g. '$5,250'"),
|
|
3391
|
+
premiumBreakdown: z28.array(
|
|
3392
|
+
z28.object({
|
|
3393
|
+
line: z28.string().describe("Coverage line name"),
|
|
3394
|
+
amount: z28.string().describe("Premium amount for this line")
|
|
3105
3395
|
})
|
|
3106
3396
|
).optional().describe("Per-coverage-line premium breakdown"),
|
|
3107
|
-
taxesAndFees:
|
|
3108
|
-
|
|
3109
|
-
name:
|
|
3110
|
-
amount:
|
|
3111
|
-
type:
|
|
3397
|
+
taxesAndFees: z28.array(
|
|
3398
|
+
z28.object({
|
|
3399
|
+
name: z28.string().describe("Fee or tax name"),
|
|
3400
|
+
amount: z28.string().describe("Dollar amount"),
|
|
3401
|
+
type: z28.enum(["tax", "fee", "surcharge", "assessment"]).optional().describe("Fee category")
|
|
3112
3402
|
})
|
|
3113
3403
|
).optional().describe("Taxes, fees, surcharges, and assessments"),
|
|
3114
|
-
minimumPremium:
|
|
3115
|
-
depositPremium:
|
|
3116
|
-
paymentPlan:
|
|
3117
|
-
auditType:
|
|
3118
|
-
ratingBasis:
|
|
3404
|
+
minimumPremium: z28.string().optional().describe("Minimum premium if stated"),
|
|
3405
|
+
depositPremium: z28.string().optional().describe("Deposit premium if stated"),
|
|
3406
|
+
paymentPlan: z28.string().optional().describe("Payment plan description"),
|
|
3407
|
+
auditType: z28.enum(["annual", "semi_annual", "quarterly", "monthly", "final", "self"]).optional().describe("Premium audit type"),
|
|
3408
|
+
ratingBasis: z28.string().optional().describe("Rating basis, e.g. payroll, revenue, area, units")
|
|
3119
3409
|
});
|
|
3120
3410
|
function buildPremiumBreakdownPrompt() {
|
|
3121
3411
|
return `You are an expert insurance document analyst. Extract all premium and cost information from this document.
|
|
@@ -3135,14 +3425,14 @@ Return JSON only.`;
|
|
|
3135
3425
|
}
|
|
3136
3426
|
|
|
3137
3427
|
// src/prompts/extractors/declarations.ts
|
|
3138
|
-
import { z as
|
|
3139
|
-
var DeclarationsFieldSchema =
|
|
3140
|
-
field:
|
|
3141
|
-
value:
|
|
3142
|
-
section:
|
|
3428
|
+
import { z as z29 } from "zod";
|
|
3429
|
+
var DeclarationsFieldSchema = z29.object({
|
|
3430
|
+
field: z29.string().describe("Descriptive field name (e.g. 'policyNumber', 'effectiveDate', 'coverageALimit')"),
|
|
3431
|
+
value: z29.string().describe("Extracted value exactly as it appears in the document"),
|
|
3432
|
+
section: z29.string().optional().describe("Section or grouping this field belongs to (e.g. 'Coverage Limits', 'Vehicle Schedule')")
|
|
3143
3433
|
});
|
|
3144
|
-
var DeclarationsExtractSchema =
|
|
3145
|
-
fields:
|
|
3434
|
+
var DeclarationsExtractSchema = z29.object({
|
|
3435
|
+
fields: z29.array(DeclarationsFieldSchema).describe("All declarations page fields extracted as key-value pairs. Structure varies by line of business.")
|
|
3146
3436
|
});
|
|
3147
3437
|
function buildDeclarationsPrompt() {
|
|
3148
3438
|
return `You are an expert insurance document analyst. Extract all declarations page data from this document into a flexible key-value structure.
|
|
@@ -3182,21 +3472,21 @@ Preserve original values exactly as they appear. Return JSON only.`;
|
|
|
3182
3472
|
}
|
|
3183
3473
|
|
|
3184
3474
|
// src/prompts/extractors/loss-history.ts
|
|
3185
|
-
import { z as
|
|
3186
|
-
var LossHistorySchema =
|
|
3187
|
-
lossSummary:
|
|
3188
|
-
individualClaims:
|
|
3189
|
-
|
|
3190
|
-
date:
|
|
3191
|
-
type:
|
|
3192
|
-
description:
|
|
3193
|
-
amountPaid:
|
|
3194
|
-
amountReserved:
|
|
3195
|
-
status:
|
|
3196
|
-
claimNumber:
|
|
3475
|
+
import { z as z30 } from "zod";
|
|
3476
|
+
var LossHistorySchema = z30.object({
|
|
3477
|
+
lossSummary: z30.string().optional().describe("Summary of loss history, e.g. '3 claims in past 5 years totaling $125,000'"),
|
|
3478
|
+
individualClaims: z30.array(
|
|
3479
|
+
z30.object({
|
|
3480
|
+
date: z30.string().optional().describe("Date of loss or claim"),
|
|
3481
|
+
type: z30.string().optional().describe("Type of claim, e.g. 'property damage', 'bodily injury'"),
|
|
3482
|
+
description: z30.string().optional().describe("Brief description of the claim"),
|
|
3483
|
+
amountPaid: z30.string().optional().describe("Amount paid"),
|
|
3484
|
+
amountReserved: z30.string().optional().describe("Amount reserved"),
|
|
3485
|
+
status: z30.enum(["open", "closed", "reopened"]).optional().describe("Claim status"),
|
|
3486
|
+
claimNumber: z30.string().optional().describe("Claim reference number")
|
|
3197
3487
|
})
|
|
3198
3488
|
).optional().describe("Individual claim records"),
|
|
3199
|
-
experienceMod:
|
|
3489
|
+
experienceMod: z30.string().optional().describe("Experience modification factor for workers comp, e.g. '0.85'")
|
|
3200
3490
|
});
|
|
3201
3491
|
function buildLossHistoryPrompt() {
|
|
3202
3492
|
return `You are an expert insurance document analyst. Extract all loss history and claims information from this document.
|
|
@@ -3213,18 +3503,18 @@ Return JSON only.`;
|
|
|
3213
3503
|
}
|
|
3214
3504
|
|
|
3215
3505
|
// src/prompts/extractors/sections.ts
|
|
3216
|
-
import { z as
|
|
3217
|
-
var SubsectionSchema2 =
|
|
3218
|
-
title:
|
|
3219
|
-
sectionNumber:
|
|
3220
|
-
pageNumber:
|
|
3221
|
-
content:
|
|
3506
|
+
import { z as z31 } from "zod";
|
|
3507
|
+
var SubsectionSchema2 = z31.object({
|
|
3508
|
+
title: z31.string().describe("Subsection title"),
|
|
3509
|
+
sectionNumber: z31.string().optional().describe("Subsection number"),
|
|
3510
|
+
pageNumber: z31.number().optional().describe("Page number"),
|
|
3511
|
+
content: z31.string().describe("Full verbatim text")
|
|
3222
3512
|
});
|
|
3223
|
-
var SectionsSchema =
|
|
3224
|
-
sections:
|
|
3225
|
-
|
|
3226
|
-
title:
|
|
3227
|
-
type:
|
|
3513
|
+
var SectionsSchema = z31.object({
|
|
3514
|
+
sections: z31.array(
|
|
3515
|
+
z31.object({
|
|
3516
|
+
title: z31.string().describe("Section title"),
|
|
3517
|
+
type: z31.enum([
|
|
3228
3518
|
"declarations",
|
|
3229
3519
|
"insuring_agreement",
|
|
3230
3520
|
"policy_form",
|
|
@@ -3238,10 +3528,10 @@ var SectionsSchema = z30.object({
|
|
|
3238
3528
|
"regulatory",
|
|
3239
3529
|
"other"
|
|
3240
3530
|
]).describe("Section type classification"),
|
|
3241
|
-
content:
|
|
3242
|
-
pageStart:
|
|
3243
|
-
pageEnd:
|
|
3244
|
-
subsections:
|
|
3531
|
+
content: z31.string().describe("Full verbatim text of the section"),
|
|
3532
|
+
pageStart: z31.number().describe("Starting page number"),
|
|
3533
|
+
pageEnd: z31.number().optional().describe("Ending page number"),
|
|
3534
|
+
subsections: z31.array(SubsectionSchema2).optional().describe("Subsections within this section")
|
|
3245
3535
|
})
|
|
3246
3536
|
).describe("All document sections")
|
|
3247
3537
|
});
|
|
@@ -3260,25 +3550,31 @@ For each section, classify its type:
|
|
|
3260
3550
|
- "other" \u2014 anything that doesn't fit the above categories
|
|
3261
3551
|
|
|
3262
3552
|
Include accurate page numbers for every section. Include subsections only if the section has clearly defined subsections with their own titles.
|
|
3553
|
+
If a page begins or ends in the middle of a section, treat it as a continuation of the existing section instead of creating a new orphan section from the fragment.
|
|
3554
|
+
|
|
3555
|
+
Critical rules:
|
|
3556
|
+
- Ignore table-of-contents entries, page-number references, repeating headers/footers, and other navigational artifacts.
|
|
3557
|
+
- Do not create a new section from a lone continuation fragment such as a single paragraph tail or list item that clearly belongs to the previous page's section.
|
|
3558
|
+
- When a section spans multiple pages, keep it as one section with pageStart/pageEnd covering the full span represented in this extraction.
|
|
3263
3559
|
|
|
3264
3560
|
Return JSON only.`;
|
|
3265
3561
|
}
|
|
3266
3562
|
|
|
3267
3563
|
// src/prompts/extractors/supplementary.ts
|
|
3268
|
-
import { z as
|
|
3269
|
-
var ContactSchema2 =
|
|
3270
|
-
name:
|
|
3271
|
-
phone:
|
|
3272
|
-
email:
|
|
3273
|
-
address:
|
|
3274
|
-
type:
|
|
3564
|
+
import { z as z32 } from "zod";
|
|
3565
|
+
var ContactSchema2 = z32.object({
|
|
3566
|
+
name: z32.string().optional().describe("Organization or person name"),
|
|
3567
|
+
phone: z32.string().optional().describe("Phone number"),
|
|
3568
|
+
email: z32.string().optional().describe("Email address"),
|
|
3569
|
+
address: z32.string().optional().describe("Mailing address"),
|
|
3570
|
+
type: z32.string().optional().describe("Contact type, e.g. 'State Department of Insurance'")
|
|
3275
3571
|
});
|
|
3276
|
-
var SupplementarySchema =
|
|
3277
|
-
regulatoryContacts:
|
|
3278
|
-
claimsContacts:
|
|
3279
|
-
thirdPartyAdministrators:
|
|
3280
|
-
cancellationNoticeDays:
|
|
3281
|
-
nonrenewalNoticeDays:
|
|
3572
|
+
var SupplementarySchema = z32.object({
|
|
3573
|
+
regulatoryContacts: z32.array(ContactSchema2).optional().describe("Regulatory body contacts (state department of insurance, ombudsman)"),
|
|
3574
|
+
claimsContacts: z32.array(ContactSchema2).optional().describe("Claims reporting contacts and instructions"),
|
|
3575
|
+
thirdPartyAdministrators: z32.array(ContactSchema2).optional().describe("Third-party administrators for claims handling"),
|
|
3576
|
+
cancellationNoticeDays: z32.number().optional().describe("Required notice period for cancellation in days"),
|
|
3577
|
+
nonrenewalNoticeDays: z32.number().optional().describe("Required notice period for nonrenewal in days")
|
|
3282
3578
|
});
|
|
3283
3579
|
function buildSupplementaryPrompt() {
|
|
3284
3580
|
return `You are an expert insurance document analyst. Extract supplementary and regulatory information from this document.
|
|
@@ -3315,6 +3611,313 @@ function getExtractor(name) {
|
|
|
3315
3611
|
return EXTRACTORS[name];
|
|
3316
3612
|
}
|
|
3317
3613
|
|
|
3614
|
+
// src/core/quality.ts
|
|
3615
|
+
function evaluateQualityGate(params) {
|
|
3616
|
+
const { issues, hasRoundWarnings = false } = params;
|
|
3617
|
+
const hasBlocking = issues.some((issue) => issue.severity === "blocking");
|
|
3618
|
+
const hasWarnings = issues.some((issue) => issue.severity === "warning") || hasRoundWarnings;
|
|
3619
|
+
return hasBlocking ? "failed" : hasWarnings ? "warning" : "passed";
|
|
3620
|
+
}
|
|
3621
|
+
function shouldFailQualityGate(mode, status) {
|
|
3622
|
+
return mode === "strict" && status === "failed";
|
|
3623
|
+
}
|
|
3624
|
+
|
|
3625
|
+
// src/extraction/quality.ts
|
|
3626
|
+
function normalizeFormNumber(value) {
|
|
3627
|
+
if (typeof value !== "string") return void 0;
|
|
3628
|
+
const trimmed = value.trim();
|
|
3629
|
+
if (!trimmed) return void 0;
|
|
3630
|
+
return trimmed;
|
|
3631
|
+
}
|
|
3632
|
+
function addFormEntry(inventory, formNumber, source, extra) {
|
|
3633
|
+
if (!formNumber) return;
|
|
3634
|
+
const existing = inventory.get(formNumber);
|
|
3635
|
+
if (existing) {
|
|
3636
|
+
if (!existing.title && extra?.title) existing.title = extra.title;
|
|
3637
|
+
if (!existing.pageStart && extra?.pageStart) existing.pageStart = extra.pageStart;
|
|
3638
|
+
if (!existing.pageEnd && extra?.pageEnd) existing.pageEnd = extra.pageEnd;
|
|
3639
|
+
if (!existing.sources.includes(source)) existing.sources.push(source);
|
|
3640
|
+
return;
|
|
3641
|
+
}
|
|
3642
|
+
inventory.set(formNumber, {
|
|
3643
|
+
formNumber,
|
|
3644
|
+
title: extra?.title,
|
|
3645
|
+
pageStart: extra?.pageStart,
|
|
3646
|
+
pageEnd: extra?.pageEnd,
|
|
3647
|
+
sources: [source]
|
|
3648
|
+
});
|
|
3649
|
+
}
|
|
3650
|
+
function looksReferential(value) {
|
|
3651
|
+
if (typeof value !== "string") return false;
|
|
3652
|
+
const normalized = value.toLowerCase();
|
|
3653
|
+
return normalized.includes("shown in the declarations") || normalized.includes("shown in declarations") || normalized.includes("shown in the schedule") || normalized.includes("as stated") || normalized.includes("if applicable");
|
|
3654
|
+
}
|
|
3655
|
+
function looksTocArtifact(value) {
|
|
3656
|
+
if (typeof value !== "string") return false;
|
|
3657
|
+
return /\.{4,}\d{1,3}$/.test(value.trim()) || /^\d+\.\s+[A-Z][\s\S]*\.{3,}\d{1,3}$/.test(value.trim());
|
|
3658
|
+
}
|
|
3659
|
+
function sourcePrecedence(sectionRef) {
|
|
3660
|
+
if (typeof sectionRef !== "string") return 0;
|
|
3661
|
+
const normalized = sectionRef.toLowerCase();
|
|
3662
|
+
if (normalized.includes("declaration") || normalized.includes("scheduled coverages") || normalized.includes("schedule")) return 4;
|
|
3663
|
+
if (normalized.includes("endorsement")) return 3;
|
|
3664
|
+
if (normalized.includes("additional coverages")) return 2;
|
|
3665
|
+
if (normalized.includes("coverage form") || normalized.includes("policy form")) return 1;
|
|
3666
|
+
return 0;
|
|
3667
|
+
}
|
|
3668
|
+
function buildExtractionReviewReport(params) {
|
|
3669
|
+
const { memory, reviewRounds } = params;
|
|
3670
|
+
const deterministicIssues = [];
|
|
3671
|
+
const inventory = /* @__PURE__ */ new Map();
|
|
3672
|
+
const extractedFormInventory = memory.get("form_inventory")?.forms ?? [];
|
|
3673
|
+
const coverages = memory.get("coverage_limits")?.coverages ?? [];
|
|
3674
|
+
const endorsements = memory.get("endorsements")?.endorsements ?? [];
|
|
3675
|
+
const exclusions = memory.get("exclusions")?.exclusions ?? [];
|
|
3676
|
+
const conditions = memory.get("conditions")?.conditions ?? [];
|
|
3677
|
+
const sections = memory.get("sections")?.sections ?? [];
|
|
3678
|
+
for (const form of extractedFormInventory) {
|
|
3679
|
+
addFormEntry(
|
|
3680
|
+
inventory,
|
|
3681
|
+
normalizeFormNumber(form.formNumber),
|
|
3682
|
+
"form_inventory",
|
|
3683
|
+
{
|
|
3684
|
+
title: form.title,
|
|
3685
|
+
pageStart: form.pageStart,
|
|
3686
|
+
pageEnd: form.pageEnd
|
|
3687
|
+
}
|
|
3688
|
+
);
|
|
3689
|
+
}
|
|
3690
|
+
for (const endorsement of endorsements) {
|
|
3691
|
+
addFormEntry(
|
|
3692
|
+
inventory,
|
|
3693
|
+
normalizeFormNumber(endorsement.formNumber),
|
|
3694
|
+
"endorsements",
|
|
3695
|
+
{
|
|
3696
|
+
title: typeof endorsement.title === "string" ? endorsement.title : void 0,
|
|
3697
|
+
pageStart: typeof endorsement.pageStart === "number" ? endorsement.pageStart : void 0,
|
|
3698
|
+
pageEnd: typeof endorsement.pageEnd === "number" ? endorsement.pageEnd : void 0
|
|
3699
|
+
}
|
|
3700
|
+
);
|
|
3701
|
+
if (typeof endorsement.formNumber !== "string" || !endorsement.formNumber.trim()) {
|
|
3702
|
+
deterministicIssues.push({
|
|
3703
|
+
code: "endorsement_missing_form_number",
|
|
3704
|
+
severity: "blocking",
|
|
3705
|
+
message: "Endorsement is missing formNumber.",
|
|
3706
|
+
extractorName: "endorsements",
|
|
3707
|
+
pageNumber: typeof endorsement.pageStart === "number" ? endorsement.pageStart : void 0,
|
|
3708
|
+
itemName: typeof endorsement.title === "string" ? endorsement.title : void 0
|
|
3709
|
+
});
|
|
3710
|
+
}
|
|
3711
|
+
const endorsementFormNumber = normalizeFormNumber(endorsement.formNumber);
|
|
3712
|
+
if (endorsementFormNumber && !inventory.has(endorsementFormNumber)) {
|
|
3713
|
+
deterministicIssues.push({
|
|
3714
|
+
code: "endorsement_form_missing_from_inventory",
|
|
3715
|
+
severity: "warning",
|
|
3716
|
+
message: `Endorsement "${String(endorsement.title ?? endorsementFormNumber)}" is not present in form inventory.`,
|
|
3717
|
+
extractorName: "endorsements",
|
|
3718
|
+
formNumber: endorsementFormNumber,
|
|
3719
|
+
pageNumber: typeof endorsement.pageStart === "number" ? endorsement.pageStart : void 0,
|
|
3720
|
+
itemName: typeof endorsement.title === "string" ? endorsement.title : void 0
|
|
3721
|
+
});
|
|
3722
|
+
}
|
|
3723
|
+
}
|
|
3724
|
+
for (const coverage of coverages) {
|
|
3725
|
+
const formNumber = normalizeFormNumber(coverage.formNumber);
|
|
3726
|
+
addFormEntry(inventory, formNumber, "coverage_limits", {
|
|
3727
|
+
title: typeof coverage.name === "string" ? coverage.name : void 0,
|
|
3728
|
+
pageStart: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
|
|
3729
|
+
pageEnd: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0
|
|
3730
|
+
});
|
|
3731
|
+
if (typeof coverage.name === "string" && /coverage form$/i.test(coverage.name.trim())) {
|
|
3732
|
+
deterministicIssues.push({
|
|
3733
|
+
code: "generic_form_row_as_coverage",
|
|
3734
|
+
severity: "blocking",
|
|
3735
|
+
message: `Coverage "${coverage.name}" looks like a form header rather than a real coverage row.`,
|
|
3736
|
+
extractorName: "coverage_limits",
|
|
3737
|
+
formNumber,
|
|
3738
|
+
pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
|
|
3739
|
+
itemName: coverage.name
|
|
3740
|
+
});
|
|
3741
|
+
}
|
|
3742
|
+
if (typeof coverage.pageNumber !== "number") {
|
|
3743
|
+
deterministicIssues.push({
|
|
3744
|
+
code: "coverage_missing_page_number",
|
|
3745
|
+
severity: "warning",
|
|
3746
|
+
message: `Coverage "${String(coverage.name ?? "unknown")}" is missing pageNumber provenance.`,
|
|
3747
|
+
extractorName: "coverage_limits",
|
|
3748
|
+
formNumber,
|
|
3749
|
+
itemName: typeof coverage.name === "string" ? coverage.name : void 0
|
|
3750
|
+
});
|
|
3751
|
+
}
|
|
3752
|
+
if (typeof coverage.sectionRef !== "string" || !coverage.sectionRef.trim()) {
|
|
3753
|
+
deterministicIssues.push({
|
|
3754
|
+
code: "coverage_missing_section_ref",
|
|
3755
|
+
severity: "warning",
|
|
3756
|
+
message: `Coverage "${String(coverage.name ?? "unknown")}" is missing sectionRef provenance.`,
|
|
3757
|
+
extractorName: "coverage_limits",
|
|
3758
|
+
formNumber,
|
|
3759
|
+
pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
|
|
3760
|
+
itemName: typeof coverage.name === "string" ? coverage.name : void 0
|
|
3761
|
+
});
|
|
3762
|
+
}
|
|
3763
|
+
if (typeof coverage.originalContent !== "string" || !coverage.originalContent.trim()) {
|
|
3764
|
+
deterministicIssues.push({
|
|
3765
|
+
code: "coverage_missing_original_content",
|
|
3766
|
+
severity: "warning",
|
|
3767
|
+
message: `Coverage "${String(coverage.name ?? "unknown")}" is missing originalContent source text.`,
|
|
3768
|
+
extractorName: "coverage_limits",
|
|
3769
|
+
formNumber,
|
|
3770
|
+
pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
|
|
3771
|
+
itemName: typeof coverage.name === "string" ? coverage.name : void 0
|
|
3772
|
+
});
|
|
3773
|
+
}
|
|
3774
|
+
if (looksReferential(coverage.limit) || looksReferential(coverage.deductible)) {
|
|
3775
|
+
deterministicIssues.push({
|
|
3776
|
+
code: "coverage_referential_value",
|
|
3777
|
+
severity: "warning",
|
|
3778
|
+
message: `Coverage "${String(coverage.name ?? "unknown")}" contains referential language instead of a concrete scheduled term.`,
|
|
3779
|
+
extractorName: "coverage_limits",
|
|
3780
|
+
formNumber,
|
|
3781
|
+
pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
|
|
3782
|
+
itemName: typeof coverage.name === "string" ? coverage.name : void 0
|
|
3783
|
+
});
|
|
3784
|
+
}
|
|
3785
|
+
if (formNumber && !inventory.has(formNumber)) {
|
|
3786
|
+
deterministicIssues.push({
|
|
3787
|
+
code: "coverage_form_missing_from_inventory",
|
|
3788
|
+
severity: "warning",
|
|
3789
|
+
message: `Coverage "${String(coverage.name ?? "unknown")}" references form "${formNumber}" that is missing from form inventory.`,
|
|
3790
|
+
extractorName: "coverage_limits",
|
|
3791
|
+
formNumber,
|
|
3792
|
+
pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
|
|
3793
|
+
itemName: typeof coverage.name === "string" ? coverage.name : void 0
|
|
3794
|
+
});
|
|
3795
|
+
}
|
|
3796
|
+
}
|
|
3797
|
+
const coverageGroups = /* @__PURE__ */ new Map();
|
|
3798
|
+
for (const coverage of coverages) {
|
|
3799
|
+
const key = [
|
|
3800
|
+
String(coverage.name ?? "").toLowerCase(),
|
|
3801
|
+
String(coverage.formNumber ?? "").toLowerCase()
|
|
3802
|
+
].join("|");
|
|
3803
|
+
coverageGroups.set(key, [...coverageGroups.get(key) ?? [], coverage]);
|
|
3804
|
+
}
|
|
3805
|
+
for (const [key, groupedCoverages] of coverageGroups.entries()) {
|
|
3806
|
+
if (groupedCoverages.length < 2) continue;
|
|
3807
|
+
const sorted = [...groupedCoverages].sort((a, b) => sourcePrecedence(b.sectionRef) - sourcePrecedence(a.sectionRef));
|
|
3808
|
+
const highest = sorted[0];
|
|
3809
|
+
for (const lower of sorted.slice(1)) {
|
|
3810
|
+
const highestLimit = String(highest.limit ?? "").trim();
|
|
3811
|
+
const lowerLimit = String(lower.limit ?? "").trim();
|
|
3812
|
+
const highestDeductible = String(highest.deductible ?? "").trim();
|
|
3813
|
+
const lowerDeductible = String(lower.deductible ?? "").trim();
|
|
3814
|
+
if (highestLimit && lowerLimit && highestLimit !== lowerLimit || highestDeductible && lowerDeductible && highestDeductible !== lowerDeductible) {
|
|
3815
|
+
deterministicIssues.push({
|
|
3816
|
+
code: "coverage_precedence_conflict",
|
|
3817
|
+
severity: "warning",
|
|
3818
|
+
message: `Coverage "${String(highest.name ?? key)}" has conflicting extracted terms across sources with different precedence.`,
|
|
3819
|
+
extractorName: "coverage_limits",
|
|
3820
|
+
formNumber: normalizeFormNumber(highest.formNumber) ?? normalizeFormNumber(lower.formNumber),
|
|
3821
|
+
pageNumber: typeof lower.pageNumber === "number" ? lower.pageNumber : void 0,
|
|
3822
|
+
itemName: typeof highest.name === "string" ? highest.name : void 0
|
|
3823
|
+
});
|
|
3824
|
+
}
|
|
3825
|
+
}
|
|
3826
|
+
}
|
|
3827
|
+
for (const exclusion of exclusions) {
|
|
3828
|
+
addFormEntry(inventory, normalizeFormNumber(exclusion.formNumber), "exclusions", {
|
|
3829
|
+
title: typeof exclusion.name === "string" ? exclusion.name : void 0,
|
|
3830
|
+
pageStart: typeof exclusion.pageNumber === "number" ? exclusion.pageNumber : void 0,
|
|
3831
|
+
pageEnd: typeof exclusion.pageNumber === "number" ? exclusion.pageNumber : void 0
|
|
3832
|
+
});
|
|
3833
|
+
if (typeof exclusion.pageNumber !== "number") {
|
|
3834
|
+
deterministicIssues.push({
|
|
3835
|
+
code: "exclusion_missing_page_number",
|
|
3836
|
+
severity: "warning",
|
|
3837
|
+
message: `Exclusion "${String(exclusion.name ?? "unknown")}" is missing pageNumber provenance.`,
|
|
3838
|
+
extractorName: "exclusions",
|
|
3839
|
+
formNumber: normalizeFormNumber(exclusion.formNumber),
|
|
3840
|
+
itemName: typeof exclusion.name === "string" ? exclusion.name : void 0
|
|
3841
|
+
});
|
|
3842
|
+
}
|
|
3843
|
+
if (looksTocArtifact(exclusion.content)) {
|
|
3844
|
+
deterministicIssues.push({
|
|
3845
|
+
code: "exclusion_toc_artifact",
|
|
3846
|
+
severity: "blocking",
|
|
3847
|
+
message: `Exclusion "${String(exclusion.name ?? "unknown")}" appears to be a table-of-contents artifact.`,
|
|
3848
|
+
extractorName: "exclusions",
|
|
3849
|
+
pageNumber: typeof exclusion.pageNumber === "number" ? exclusion.pageNumber : void 0,
|
|
3850
|
+
itemName: typeof exclusion.name === "string" ? exclusion.name : void 0
|
|
3851
|
+
});
|
|
3852
|
+
}
|
|
3853
|
+
}
|
|
3854
|
+
for (const condition of conditions) {
|
|
3855
|
+
if (typeof condition.pageNumber !== "number") {
|
|
3856
|
+
deterministicIssues.push({
|
|
3857
|
+
code: "condition_missing_page_number",
|
|
3858
|
+
severity: "warning",
|
|
3859
|
+
message: `Condition "${String(condition.name ?? "unknown")}" is missing pageNumber provenance.`,
|
|
3860
|
+
extractorName: "conditions",
|
|
3861
|
+
itemName: typeof condition.name === "string" ? condition.name : void 0
|
|
3862
|
+
});
|
|
3863
|
+
}
|
|
3864
|
+
if (looksTocArtifact(condition.content)) {
|
|
3865
|
+
deterministicIssues.push({
|
|
3866
|
+
code: "condition_toc_artifact",
|
|
3867
|
+
severity: "blocking",
|
|
3868
|
+
message: `Condition "${String(condition.name ?? "unknown")}" appears to be a table-of-contents artifact.`,
|
|
3869
|
+
extractorName: "conditions",
|
|
3870
|
+
pageNumber: typeof condition.pageNumber === "number" ? condition.pageNumber : void 0,
|
|
3871
|
+
itemName: typeof condition.name === "string" ? condition.name : void 0
|
|
3872
|
+
});
|
|
3873
|
+
}
|
|
3874
|
+
}
|
|
3875
|
+
for (const section of sections) {
|
|
3876
|
+
if (typeof section.content === "string" && section.content.trim().length < 120 && typeof section.pageStart === "number" && (!("pageEnd" in section) || section.pageEnd === section.pageStart || section.pageEnd === void 0)) {
|
|
3877
|
+
deterministicIssues.push({
|
|
3878
|
+
code: "section_short_fragment",
|
|
3879
|
+
severity: "warning",
|
|
3880
|
+
message: `Section "${String(section.title ?? "unknown")}" may be an orphan continuation fragment.`,
|
|
3881
|
+
extractorName: "sections",
|
|
3882
|
+
pageNumber: typeof section.pageStart === "number" ? section.pageStart : void 0,
|
|
3883
|
+
itemName: typeof section.title === "string" ? section.title : void 0
|
|
3884
|
+
});
|
|
3885
|
+
}
|
|
3886
|
+
}
|
|
3887
|
+
const formInventory = [...inventory.values()].sort((a, b) => a.formNumber.localeCompare(b.formNumber));
|
|
3888
|
+
const rounds = reviewRounds.map((round) => ({
|
|
3889
|
+
round: round.round,
|
|
3890
|
+
kind: "llm_review",
|
|
3891
|
+
status: round.complete && round.qualityIssues.length === 0 ? "passed" : "warning",
|
|
3892
|
+
summary: round.qualityIssues[0] ?? (round.complete ? "Review passed." : "Review requested follow-up extraction.")
|
|
3893
|
+
}));
|
|
3894
|
+
const artifacts = [
|
|
3895
|
+
{ kind: "form_inventory", label: "Form Inventory", itemCount: formInventory.length },
|
|
3896
|
+
{ kind: "page_map", label: "Page Map", itemCount: params.pageAssignments.length }
|
|
3897
|
+
];
|
|
3898
|
+
const qualityGateStatus = evaluateQualityGate({
|
|
3899
|
+
issues: deterministicIssues,
|
|
3900
|
+
hasRoundWarnings: reviewRounds.some((round) => round.qualityIssues.length > 0 || !round.complete)
|
|
3901
|
+
});
|
|
3902
|
+
return {
|
|
3903
|
+
issues: deterministicIssues,
|
|
3904
|
+
rounds,
|
|
3905
|
+
artifacts,
|
|
3906
|
+
reviewRoundRecords: reviewRounds,
|
|
3907
|
+
formInventory,
|
|
3908
|
+
qualityGateStatus
|
|
3909
|
+
};
|
|
3910
|
+
}
|
|
3911
|
+
function toReviewRoundRecord(round, review) {
|
|
3912
|
+
return {
|
|
3913
|
+
round,
|
|
3914
|
+
complete: review.complete,
|
|
3915
|
+
missingFields: review.missingFields,
|
|
3916
|
+
qualityIssues: review.qualityIssues ?? [],
|
|
3917
|
+
additionalTasks: review.additionalTasks
|
|
3918
|
+
};
|
|
3919
|
+
}
|
|
3920
|
+
|
|
3318
3921
|
// src/extraction/coordinator.ts
|
|
3319
3922
|
function createExtractor(config) {
|
|
3320
3923
|
const {
|
|
@@ -3327,21 +3930,174 @@ function createExtractor(config) {
|
|
|
3327
3930
|
onProgress,
|
|
3328
3931
|
log,
|
|
3329
3932
|
providerOptions,
|
|
3933
|
+
qualityGate = "warn",
|
|
3330
3934
|
onCheckpointSave
|
|
3331
3935
|
} = config;
|
|
3332
3936
|
const limit = pLimit(concurrency);
|
|
3333
3937
|
let totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
3938
|
+
let modelCalls = 0;
|
|
3939
|
+
let callsWithUsage = 0;
|
|
3940
|
+
let callsMissingUsage = 0;
|
|
3334
3941
|
function trackUsage(usage) {
|
|
3942
|
+
modelCalls += 1;
|
|
3335
3943
|
if (usage) {
|
|
3944
|
+
callsWithUsage += 1;
|
|
3336
3945
|
totalUsage.inputTokens += usage.inputTokens;
|
|
3337
3946
|
totalUsage.outputTokens += usage.outputTokens;
|
|
3338
3947
|
onTokenUsage?.(usage);
|
|
3948
|
+
} else {
|
|
3949
|
+
callsMissingUsage += 1;
|
|
3950
|
+
}
|
|
3951
|
+
}
|
|
3952
|
+
function mergeMemoryResult(name, data, memory) {
|
|
3953
|
+
const existing = memory.get(name);
|
|
3954
|
+
memory.set(name, mergeExtractorResult(name, existing, data));
|
|
3955
|
+
}
|
|
3956
|
+
function summarizeExtraction(memory) {
|
|
3957
|
+
const coverageResult = memory.get("coverage_limits");
|
|
3958
|
+
const declarationResult = memory.get("declarations");
|
|
3959
|
+
const endorsementResult = memory.get("endorsements");
|
|
3960
|
+
const exclusionResult = memory.get("exclusions");
|
|
3961
|
+
const conditionResult = memory.get("conditions");
|
|
3962
|
+
const sectionResult = memory.get("sections");
|
|
3963
|
+
const coverageSummary = Array.isArray(coverageResult?.coverages) ? coverageResult.coverages.slice(0, 12).map((coverage) => ({
|
|
3964
|
+
name: coverage.name,
|
|
3965
|
+
limit: coverage.limit,
|
|
3966
|
+
deductible: coverage.deductible,
|
|
3967
|
+
formNumber: coverage.formNumber
|
|
3968
|
+
})) : [];
|
|
3969
|
+
return JSON.stringify({
|
|
3970
|
+
extractedKeys: [...memory.keys()].filter((key) => key !== "classify"),
|
|
3971
|
+
declarationFieldCount: Array.isArray(declarationResult?.fields) ? declarationResult.fields.length : 0,
|
|
3972
|
+
coverageCount: Array.isArray(coverageResult?.coverages) ? coverageResult.coverages.length : 0,
|
|
3973
|
+
coverageSamples: coverageSummary,
|
|
3974
|
+
endorsementCount: Array.isArray(endorsementResult?.endorsements) ? endorsementResult.endorsements.length : 0,
|
|
3975
|
+
exclusionCount: Array.isArray(exclusionResult?.exclusions) ? exclusionResult.exclusions.length : 0,
|
|
3976
|
+
conditionCount: Array.isArray(conditionResult?.conditions) ? conditionResult.conditions.length : 0,
|
|
3977
|
+
sectionCount: Array.isArray(sectionResult?.sections) ? sectionResult.sections.length : 0
|
|
3978
|
+
}, null, 2);
|
|
3979
|
+
}
|
|
3980
|
+
function formatPageMapSummary(pageAssignments) {
|
|
3981
|
+
const extractorPages = /* @__PURE__ */ new Map();
|
|
3982
|
+
for (const assignment of pageAssignments) {
|
|
3983
|
+
for (const extractorName of assignment.extractorNames) {
|
|
3984
|
+
extractorPages.set(extractorName, [...extractorPages.get(extractorName) ?? [], assignment.localPageNumber]);
|
|
3985
|
+
}
|
|
3339
3986
|
}
|
|
3987
|
+
if (extractorPages.size === 0) return "No page assignments available.";
|
|
3988
|
+
return [...extractorPages.entries()].map(([extractorName, pages]) => `${extractorName}: pages ${pages.join(", ")}`).join("\n");
|
|
3989
|
+
}
|
|
3990
|
+
function normalizePageAssignments(pageAssignments, formInventory) {
|
|
3991
|
+
const pageFormTypes = /* @__PURE__ */ new Map();
|
|
3992
|
+
if (formInventory) {
|
|
3993
|
+
for (const form of formInventory.forms) {
|
|
3994
|
+
if (form.pageStart != null) {
|
|
3995
|
+
const end = form.pageEnd ?? form.pageStart;
|
|
3996
|
+
for (let p = form.pageStart; p <= end; p++) {
|
|
3997
|
+
const types = pageFormTypes.get(p) ?? /* @__PURE__ */ new Set();
|
|
3998
|
+
types.add(form.formType);
|
|
3999
|
+
pageFormTypes.set(p, types);
|
|
4000
|
+
}
|
|
4001
|
+
}
|
|
4002
|
+
}
|
|
4003
|
+
}
|
|
4004
|
+
return pageAssignments.map((assignment) => {
|
|
4005
|
+
let extractorNames = [...new Set(
|
|
4006
|
+
(assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"]).filter(Boolean)
|
|
4007
|
+
)];
|
|
4008
|
+
const hasDeclarations = extractorNames.includes("declarations");
|
|
4009
|
+
const hasConditions = extractorNames.includes("conditions");
|
|
4010
|
+
const hasExclusions = extractorNames.includes("exclusions");
|
|
4011
|
+
const hasEndorsements = extractorNames.includes("endorsements");
|
|
4012
|
+
const looksLikeScheduleValues = assignment.hasScheduleValues === true;
|
|
4013
|
+
const roleBlocksCoverageLimits = assignment.pageRole === "policy_form" || assignment.pageRole === "condition_exclusion_form" || assignment.pageRole === "endorsement_form";
|
|
4014
|
+
const inventoryTypes = pageFormTypes.get(assignment.localPageNumber);
|
|
4015
|
+
const inventoryBlocksCoverageLimits = inventoryTypes != null && !looksLikeScheduleValues && !hasDeclarations && (inventoryTypes.has("endorsement") || inventoryTypes.has("notice") || inventoryTypes.has("application"));
|
|
4016
|
+
if (extractorNames.includes("coverage_limits")) {
|
|
4017
|
+
const shouldDropCoverageLimits = inventoryBlocksCoverageLimits || !looksLikeScheduleValues && roleBlocksCoverageLimits || !hasDeclarations && !looksLikeScheduleValues && (hasConditions || hasExclusions) || !hasDeclarations && !looksLikeScheduleValues && hasEndorsements;
|
|
4018
|
+
if (shouldDropCoverageLimits) {
|
|
4019
|
+
extractorNames = extractorNames.filter((name) => name !== "coverage_limits");
|
|
4020
|
+
}
|
|
4021
|
+
}
|
|
4022
|
+
if (inventoryTypes?.has("endorsement") && !extractorNames.includes("endorsements")) {
|
|
4023
|
+
extractorNames = [...extractorNames, "endorsements"];
|
|
4024
|
+
}
|
|
4025
|
+
if (extractorNames.length === 0) {
|
|
4026
|
+
extractorNames = ["sections"];
|
|
4027
|
+
}
|
|
4028
|
+
return {
|
|
4029
|
+
...assignment,
|
|
4030
|
+
extractorNames
|
|
4031
|
+
};
|
|
4032
|
+
});
|
|
4033
|
+
}
|
|
4034
|
+
function buildTemplateHints(primaryType, documentType, pageCount, template) {
|
|
4035
|
+
return [
|
|
4036
|
+
`Document type: ${primaryType} ${documentType}`,
|
|
4037
|
+
`Expected sections: ${template.expectedSections.join(", ")}`,
|
|
4038
|
+
`Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
|
|
4039
|
+
`Total pages: ${pageCount}`
|
|
4040
|
+
].join("\n");
|
|
4041
|
+
}
|
|
4042
|
+
function groupContiguousPages(pages) {
|
|
4043
|
+
if (pages.length === 0) return [];
|
|
4044
|
+
const sorted = [...new Set(pages)].sort((a, b) => a - b);
|
|
4045
|
+
const ranges = [];
|
|
4046
|
+
let start = sorted[0];
|
|
4047
|
+
let previous = sorted[0];
|
|
4048
|
+
for (let i = 1; i < sorted.length; i += 1) {
|
|
4049
|
+
const current = sorted[i];
|
|
4050
|
+
if (current === previous + 1) {
|
|
4051
|
+
previous = current;
|
|
4052
|
+
continue;
|
|
4053
|
+
}
|
|
4054
|
+
ranges.push({ startPage: start, endPage: previous });
|
|
4055
|
+
start = current;
|
|
4056
|
+
previous = current;
|
|
4057
|
+
}
|
|
4058
|
+
ranges.push({ startPage: start, endPage: previous });
|
|
4059
|
+
return ranges;
|
|
4060
|
+
}
|
|
4061
|
+
function buildPlanFromPageAssignments(pageAssignments, pageCount) {
|
|
4062
|
+
const extractorPages = /* @__PURE__ */ new Map();
|
|
4063
|
+
for (const assignment of pageAssignments) {
|
|
4064
|
+
const extractors = assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"];
|
|
4065
|
+
for (const extractorName of extractors) {
|
|
4066
|
+
extractorPages.set(extractorName, [...extractorPages.get(extractorName) ?? [], assignment.localPageNumber]);
|
|
4067
|
+
}
|
|
4068
|
+
}
|
|
4069
|
+
const coveredPages = /* @__PURE__ */ new Set();
|
|
4070
|
+
for (const pages of extractorPages.values()) {
|
|
4071
|
+
for (const page of pages) coveredPages.add(page);
|
|
4072
|
+
}
|
|
4073
|
+
for (let page = 1; page <= pageCount; page += 1) {
|
|
4074
|
+
if (!coveredPages.has(page)) {
|
|
4075
|
+
extractorPages.set("sections", [...extractorPages.get("sections") ?? [], page]);
|
|
4076
|
+
}
|
|
4077
|
+
}
|
|
4078
|
+
const tasks = [...extractorPages.entries()].flatMap(
|
|
4079
|
+
([extractorName, pages]) => groupContiguousPages(pages).map(({ startPage, endPage }) => ({
|
|
4080
|
+
extractorName,
|
|
4081
|
+
startPage,
|
|
4082
|
+
endPage,
|
|
4083
|
+
description: `Page-mapped ${extractorName} extraction for pages ${startPage}-${endPage}`
|
|
4084
|
+
}))
|
|
4085
|
+
).sort((a, b) => a.startPage - b.startPage || a.extractorName.localeCompare(b.extractorName));
|
|
4086
|
+
return {
|
|
4087
|
+
tasks,
|
|
4088
|
+
pageMap: [...extractorPages.entries()].map(([section, pages]) => ({
|
|
4089
|
+
section,
|
|
4090
|
+
pages: `pages ${[...new Set(pages)].sort((a, b) => a - b).join(", ")}`
|
|
4091
|
+
}))
|
|
4092
|
+
};
|
|
3340
4093
|
}
|
|
3341
4094
|
async function extract(pdfBase64, documentId, options) {
|
|
3342
4095
|
const id = documentId ?? `doc-${Date.now()}`;
|
|
3343
4096
|
const memory = /* @__PURE__ */ new Map();
|
|
3344
4097
|
totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
4098
|
+
modelCalls = 0;
|
|
4099
|
+
callsWithUsage = 0;
|
|
4100
|
+
callsMissingUsage = 0;
|
|
3345
4101
|
const pipelineCtx = createPipelineContext({
|
|
3346
4102
|
id,
|
|
3347
4103
|
onSave: onCheckpointSave,
|
|
@@ -3392,40 +4148,109 @@ function createExtractor(config) {
|
|
|
3392
4148
|
const primaryType = policyTypes[0] ?? "other";
|
|
3393
4149
|
const template = getTemplate(primaryType);
|
|
3394
4150
|
const pageCount = resumed?.pageCount ?? await getPdfPageCount(pdfBase64);
|
|
3395
|
-
|
|
3396
|
-
|
|
3397
|
-
|
|
3398
|
-
|
|
4151
|
+
const templateHints = buildTemplateHints(primaryType, documentType, pageCount, template);
|
|
4152
|
+
let formInventory;
|
|
4153
|
+
if (resumed?.formInventory && pipelineCtx.isPhaseComplete("form_inventory")) {
|
|
4154
|
+
formInventory = resumed.formInventory;
|
|
4155
|
+
memory.set("form_inventory", formInventory);
|
|
4156
|
+
onProgress?.("Resuming from checkpoint (form inventory complete)...");
|
|
3399
4157
|
} else {
|
|
3400
|
-
onProgress?.(`
|
|
3401
|
-
const
|
|
3402
|
-
`Document type: ${primaryType} ${documentType}`,
|
|
3403
|
-
`Expected sections: ${template.expectedSections.join(", ")}`,
|
|
3404
|
-
`Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
|
|
3405
|
-
`Total pages: ${pageCount}`
|
|
3406
|
-
].join("\n");
|
|
3407
|
-
const planResponse = await safeGenerateObject(
|
|
4158
|
+
onProgress?.(`Building form inventory for ${primaryType} ${documentType}...`);
|
|
4159
|
+
const formInventoryResponse = await safeGenerateObject(
|
|
3408
4160
|
generateObject,
|
|
3409
4161
|
{
|
|
3410
|
-
prompt:
|
|
3411
|
-
schema:
|
|
4162
|
+
prompt: buildFormInventoryPrompt(templateHints),
|
|
4163
|
+
schema: FormInventorySchema,
|
|
3412
4164
|
maxTokens: 2048,
|
|
3413
4165
|
providerOptions: { ...providerOptions, pdfBase64 }
|
|
3414
4166
|
},
|
|
3415
4167
|
{
|
|
3416
|
-
fallback: {
|
|
3417
|
-
tasks: [{ extractorName: "sections", startPage: 1, endPage: pageCount, description: "Full document fallback extraction" }]
|
|
3418
|
-
},
|
|
4168
|
+
fallback: { forms: [] },
|
|
3419
4169
|
log,
|
|
3420
|
-
onError: (err, attempt) => log?.(`
|
|
4170
|
+
onError: (err, attempt) => log?.(`Form inventory attempt ${attempt + 1} failed: ${err instanceof Error ? err.message : String(err)}`)
|
|
3421
4171
|
}
|
|
3422
4172
|
);
|
|
3423
|
-
trackUsage(
|
|
3424
|
-
|
|
4173
|
+
trackUsage(formInventoryResponse.usage);
|
|
4174
|
+
formInventory = formInventoryResponse.object;
|
|
4175
|
+
memory.set("form_inventory", formInventory);
|
|
4176
|
+
await pipelineCtx.save("form_inventory", {
|
|
4177
|
+
id,
|
|
4178
|
+
pageCount,
|
|
4179
|
+
classifyResult,
|
|
4180
|
+
formInventory,
|
|
4181
|
+
memory: Object.fromEntries(memory)
|
|
4182
|
+
});
|
|
4183
|
+
}
|
|
4184
|
+
let pageAssignments;
|
|
4185
|
+
if (resumed?.pageAssignments && pipelineCtx.isPhaseComplete("page_map")) {
|
|
4186
|
+
pageAssignments = resumed.pageAssignments;
|
|
4187
|
+
onProgress?.("Resuming from checkpoint (page map complete)...");
|
|
4188
|
+
} else {
|
|
4189
|
+
onProgress?.(`Mapping document pages for ${primaryType} ${documentType}...`);
|
|
4190
|
+
const chunkSize = 8;
|
|
4191
|
+
const collectedAssignments = [];
|
|
4192
|
+
const formInventoryHint = formInventory?.forms.length ? formatFormInventoryForPageMap(formInventory.forms) : void 0;
|
|
4193
|
+
for (let startPage = 1; startPage <= pageCount; startPage += chunkSize) {
|
|
4194
|
+
const endPage = Math.min(pageCount, startPage + chunkSize - 1);
|
|
4195
|
+
const pagesPdf = await extractPageRange(pdfBase64, startPage, endPage);
|
|
4196
|
+
const mapResponse = await safeGenerateObject(
|
|
4197
|
+
generateObject,
|
|
4198
|
+
{
|
|
4199
|
+
prompt: buildPageMapPrompt(templateHints, startPage, endPage, formInventoryHint),
|
|
4200
|
+
schema: PageMapChunkSchema,
|
|
4201
|
+
maxTokens: 2048,
|
|
4202
|
+
providerOptions: { ...providerOptions, pdfBase64: pagesPdf }
|
|
4203
|
+
},
|
|
4204
|
+
{
|
|
4205
|
+
fallback: {
|
|
4206
|
+
pages: Array.from({ length: endPage - startPage + 1 }, (_, index) => ({
|
|
4207
|
+
localPageNumber: index + 1,
|
|
4208
|
+
extractorNames: index === 0 && startPage === 1 ? ["carrier_info", "named_insured", "declarations", "coverage_limits"] : ["sections"],
|
|
4209
|
+
confidence: 0,
|
|
4210
|
+
notes: "Fallback page assignment"
|
|
4211
|
+
}))
|
|
4212
|
+
},
|
|
4213
|
+
log,
|
|
4214
|
+
onError: (err, attempt) => log?.(`Page map attempt ${attempt + 1} failed for pages ${startPage}-${endPage}: ${err}`)
|
|
4215
|
+
}
|
|
4216
|
+
);
|
|
4217
|
+
trackUsage(mapResponse.usage);
|
|
4218
|
+
for (const assignment of mapResponse.object.pages) {
|
|
4219
|
+
collectedAssignments.push({
|
|
4220
|
+
...assignment,
|
|
4221
|
+
localPageNumber: startPage + assignment.localPageNumber - 1
|
|
4222
|
+
});
|
|
4223
|
+
}
|
|
4224
|
+
}
|
|
4225
|
+
pageAssignments = collectedAssignments.length > 0 ? collectedAssignments : Array.from({ length: pageCount }, (_, index) => ({
|
|
4226
|
+
localPageNumber: index + 1,
|
|
4227
|
+
extractorNames: index === 0 ? ["carrier_info", "named_insured", "declarations", "coverage_limits"] : ["sections"],
|
|
4228
|
+
confidence: 0,
|
|
4229
|
+
notes: "Full-document fallback page assignment"
|
|
4230
|
+
}));
|
|
4231
|
+
pageAssignments = normalizePageAssignments(pageAssignments, formInventory);
|
|
4232
|
+
await pipelineCtx.save("page_map", {
|
|
4233
|
+
id,
|
|
4234
|
+
pageCount,
|
|
4235
|
+
classifyResult,
|
|
4236
|
+
formInventory,
|
|
4237
|
+
pageAssignments,
|
|
4238
|
+
memory: Object.fromEntries(memory)
|
|
4239
|
+
});
|
|
4240
|
+
}
|
|
4241
|
+
let plan;
|
|
4242
|
+
if (resumed?.plan && pipelineCtx.isPhaseComplete("plan")) {
|
|
4243
|
+
plan = resumed.plan;
|
|
4244
|
+
onProgress?.("Resuming from checkpoint (plan complete)...");
|
|
4245
|
+
} else {
|
|
4246
|
+
onProgress?.(`Building extraction plan from page map for ${primaryType} ${documentType}...`);
|
|
4247
|
+
plan = buildPlanFromPageAssignments(pageAssignments, pageCount);
|
|
3425
4248
|
await pipelineCtx.save("plan", {
|
|
3426
4249
|
id,
|
|
3427
4250
|
pageCount,
|
|
3428
4251
|
classifyResult,
|
|
4252
|
+
formInventory,
|
|
4253
|
+
pageAssignments,
|
|
3429
4254
|
plan,
|
|
3430
4255
|
memory: Object.fromEntries(memory)
|
|
3431
4256
|
});
|
|
@@ -3466,35 +4291,46 @@ function createExtractor(config) {
|
|
|
3466
4291
|
);
|
|
3467
4292
|
for (const result of extractorResults) {
|
|
3468
4293
|
if (result) {
|
|
3469
|
-
|
|
4294
|
+
mergeMemoryResult(result.name, result.data, memory);
|
|
3470
4295
|
}
|
|
3471
4296
|
}
|
|
3472
4297
|
await pipelineCtx.save("extract", {
|
|
3473
4298
|
id,
|
|
3474
4299
|
pageCount,
|
|
3475
4300
|
classifyResult,
|
|
4301
|
+
formInventory,
|
|
4302
|
+
pageAssignments,
|
|
3476
4303
|
plan,
|
|
3477
4304
|
memory: Object.fromEntries(memory)
|
|
3478
4305
|
});
|
|
3479
4306
|
}
|
|
4307
|
+
let reviewRounds = resumed?.reviewReport?.reviewRoundRecords ?? [];
|
|
4308
|
+
let reviewReport = resumed?.reviewReport;
|
|
3480
4309
|
if (!pipelineCtx.isPhaseComplete("review")) {
|
|
4310
|
+
reviewRounds = [];
|
|
3481
4311
|
for (let round = 0; round < maxReviewRounds; round++) {
|
|
3482
4312
|
const extractedKeys = [...memory.keys()].filter((k) => k !== "classify");
|
|
4313
|
+
const extractionSummary = summarizeExtraction(memory);
|
|
4314
|
+
const pageMapSummary = formatPageMapSummary(pageAssignments);
|
|
3483
4315
|
const reviewResponse = await safeGenerateObject(
|
|
3484
4316
|
generateObject,
|
|
3485
4317
|
{
|
|
3486
|
-
prompt: buildReviewPrompt(template.required, extractedKeys),
|
|
4318
|
+
prompt: buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary),
|
|
3487
4319
|
schema: ReviewResultSchema,
|
|
3488
|
-
maxTokens:
|
|
3489
|
-
providerOptions
|
|
4320
|
+
maxTokens: 1536,
|
|
4321
|
+
providerOptions: { ...providerOptions, pdfBase64 }
|
|
3490
4322
|
},
|
|
3491
4323
|
{
|
|
3492
|
-
fallback: { complete: true, missingFields: [], additionalTasks: [] },
|
|
4324
|
+
fallback: { complete: true, missingFields: [], qualityIssues: [], additionalTasks: [] },
|
|
3493
4325
|
log,
|
|
3494
4326
|
onError: (err, attempt) => log?.(`Review round ${round + 1} attempt ${attempt + 1} failed: ${err}`)
|
|
3495
4327
|
}
|
|
3496
4328
|
);
|
|
3497
4329
|
trackUsage(reviewResponse.usage);
|
|
4330
|
+
reviewRounds.push(toReviewRoundRecord(round + 1, reviewResponse.object));
|
|
4331
|
+
if (reviewResponse.object.qualityIssues?.length) {
|
|
4332
|
+
await log?.(`Review round ${round + 1} quality issues: ${reviewResponse.object.qualityIssues.join("; ")}`);
|
|
4333
|
+
}
|
|
3498
4334
|
if (reviewResponse.object.complete || reviewResponse.object.additionalTasks.length === 0) {
|
|
3499
4335
|
onProgress?.("Extraction complete.");
|
|
3500
4336
|
break;
|
|
@@ -3529,25 +4365,49 @@ function createExtractor(config) {
|
|
|
3529
4365
|
);
|
|
3530
4366
|
for (const result of followUpResults) {
|
|
3531
4367
|
if (result) {
|
|
3532
|
-
|
|
4368
|
+
mergeMemoryResult(result.name, result.data, memory);
|
|
3533
4369
|
}
|
|
3534
4370
|
}
|
|
3535
4371
|
}
|
|
4372
|
+
reviewReport = buildExtractionReviewReport({
|
|
4373
|
+
memory,
|
|
4374
|
+
pageAssignments,
|
|
4375
|
+
reviewRounds
|
|
4376
|
+
});
|
|
4377
|
+
if (reviewReport.issues.length > 0) {
|
|
4378
|
+
await log?.(
|
|
4379
|
+
`Deterministic review issues: ${reviewReport.issues.map((issue) => issue.message).join("; ")}`
|
|
4380
|
+
);
|
|
4381
|
+
}
|
|
4382
|
+
if (shouldFailQualityGate(qualityGate, reviewReport.qualityGateStatus)) {
|
|
4383
|
+
throw new Error("Extraction quality gate failed. See reviewReport for blocking issues.");
|
|
4384
|
+
}
|
|
3536
4385
|
await pipelineCtx.save("review", {
|
|
3537
4386
|
id,
|
|
3538
4387
|
pageCount,
|
|
3539
4388
|
classifyResult,
|
|
4389
|
+
formInventory,
|
|
4390
|
+
pageAssignments,
|
|
3540
4391
|
plan,
|
|
4392
|
+
reviewReport,
|
|
3541
4393
|
memory: Object.fromEntries(memory)
|
|
3542
4394
|
});
|
|
3543
4395
|
}
|
|
4396
|
+
reviewReport ?? (reviewReport = buildExtractionReviewReport({
|
|
4397
|
+
memory,
|
|
4398
|
+
pageAssignments,
|
|
4399
|
+
reviewRounds
|
|
4400
|
+
}));
|
|
3544
4401
|
onProgress?.("Assembling document...");
|
|
3545
4402
|
const document = assembleDocument(id, documentType, memory);
|
|
3546
4403
|
await pipelineCtx.save("assemble", {
|
|
3547
4404
|
id,
|
|
3548
4405
|
pageCount,
|
|
3549
4406
|
classifyResult,
|
|
4407
|
+
formInventory,
|
|
4408
|
+
pageAssignments,
|
|
3550
4409
|
plan,
|
|
4410
|
+
reviewReport,
|
|
3551
4411
|
memory: Object.fromEntries(memory),
|
|
3552
4412
|
document
|
|
3553
4413
|
});
|
|
@@ -3560,11 +4420,21 @@ function createExtractor(config) {
|
|
|
3560
4420
|
trackUsage(formatResult.usage);
|
|
3561
4421
|
const chunks = chunkDocument(formatResult.document);
|
|
3562
4422
|
const finalCheckpoint = pipelineCtx.getCheckpoint();
|
|
4423
|
+
if (callsMissingUsage > 0) {
|
|
4424
|
+
await log?.(`Token usage was unavailable for ${callsMissingUsage}/${modelCalls} model calls. Check that your provider callbacks return usage.`);
|
|
4425
|
+
onProgress?.(`Token usage unavailable for ${callsMissingUsage}/${modelCalls} model calls.`);
|
|
4426
|
+
}
|
|
3563
4427
|
return {
|
|
3564
4428
|
document: formatResult.document,
|
|
3565
4429
|
chunks,
|
|
3566
4430
|
tokenUsage: totalUsage,
|
|
3567
|
-
|
|
4431
|
+
usageReporting: {
|
|
4432
|
+
modelCalls,
|
|
4433
|
+
callsWithUsage,
|
|
4434
|
+
callsMissingUsage
|
|
4435
|
+
},
|
|
4436
|
+
checkpoint: finalCheckpoint,
|
|
4437
|
+
reviewReport
|
|
3568
4438
|
};
|
|
3569
4439
|
}
|
|
3570
4440
|
return { extract };
|
|
@@ -3784,8 +4654,8 @@ Respond with JSON only:
|
|
|
3784
4654
|
}`;
|
|
3785
4655
|
|
|
3786
4656
|
// src/schemas/application.ts
|
|
3787
|
-
import { z as
|
|
3788
|
-
var FieldTypeSchema =
|
|
4657
|
+
import { z as z33 } from "zod";
|
|
4658
|
+
var FieldTypeSchema = z33.enum([
|
|
3789
4659
|
"text",
|
|
3790
4660
|
"numeric",
|
|
3791
4661
|
"currency",
|
|
@@ -3794,100 +4664,131 @@ var FieldTypeSchema = z32.enum([
|
|
|
3794
4664
|
"table",
|
|
3795
4665
|
"declaration"
|
|
3796
4666
|
]);
|
|
3797
|
-
var ApplicationFieldSchema =
|
|
3798
|
-
id:
|
|
3799
|
-
label:
|
|
3800
|
-
section:
|
|
4667
|
+
var ApplicationFieldSchema = z33.object({
|
|
4668
|
+
id: z33.string(),
|
|
4669
|
+
label: z33.string(),
|
|
4670
|
+
section: z33.string(),
|
|
3801
4671
|
fieldType: FieldTypeSchema,
|
|
3802
|
-
required:
|
|
3803
|
-
options:
|
|
3804
|
-
columns:
|
|
3805
|
-
requiresExplanationIfYes:
|
|
3806
|
-
condition:
|
|
3807
|
-
dependsOn:
|
|
3808
|
-
whenValue:
|
|
4672
|
+
required: z33.boolean(),
|
|
4673
|
+
options: z33.array(z33.string()).optional(),
|
|
4674
|
+
columns: z33.array(z33.string()).optional(),
|
|
4675
|
+
requiresExplanationIfYes: z33.boolean().optional(),
|
|
4676
|
+
condition: z33.object({
|
|
4677
|
+
dependsOn: z33.string(),
|
|
4678
|
+
whenValue: z33.string()
|
|
3809
4679
|
}).optional(),
|
|
3810
|
-
value:
|
|
3811
|
-
source:
|
|
3812
|
-
confidence:
|
|
4680
|
+
value: z33.string().optional(),
|
|
4681
|
+
source: z33.string().optional().describe("Where the value came from: auto-fill, user, lookup"),
|
|
4682
|
+
confidence: z33.enum(["confirmed", "high", "medium", "low"]).optional()
|
|
4683
|
+
});
|
|
4684
|
+
var ApplicationClassifyResultSchema = z33.object({
|
|
4685
|
+
isApplication: z33.boolean(),
|
|
4686
|
+
confidence: z33.number().min(0).max(1),
|
|
4687
|
+
applicationType: z33.string().nullable()
|
|
4688
|
+
});
|
|
4689
|
+
var FieldExtractionResultSchema = z33.object({
|
|
4690
|
+
fields: z33.array(ApplicationFieldSchema)
|
|
4691
|
+
});
|
|
4692
|
+
var AutoFillMatchSchema = z33.object({
|
|
4693
|
+
fieldId: z33.string(),
|
|
4694
|
+
value: z33.string(),
|
|
4695
|
+
confidence: z33.enum(["confirmed"]),
|
|
4696
|
+
contextKey: z33.string()
|
|
3813
4697
|
});
|
|
3814
|
-
var
|
|
3815
|
-
|
|
3816
|
-
confidence: z32.number().min(0).max(1),
|
|
3817
|
-
applicationType: z32.string().nullable()
|
|
4698
|
+
var AutoFillResultSchema = z33.object({
|
|
4699
|
+
matches: z33.array(AutoFillMatchSchema)
|
|
3818
4700
|
});
|
|
3819
|
-
var
|
|
3820
|
-
|
|
4701
|
+
var QuestionBatchResultSchema = z33.object({
|
|
4702
|
+
batches: z33.array(z33.array(z33.string()).describe("Array of field IDs in this batch"))
|
|
3821
4703
|
});
|
|
3822
|
-
var
|
|
3823
|
-
|
|
3824
|
-
|
|
3825
|
-
|
|
3826
|
-
|
|
4704
|
+
var LookupRequestSchema = z33.object({
|
|
4705
|
+
type: z33.string().describe("Type of lookup: 'records', 'website', 'policy'"),
|
|
4706
|
+
description: z33.string(),
|
|
4707
|
+
url: z33.string().optional(),
|
|
4708
|
+
targetFieldIds: z33.array(z33.string())
|
|
3827
4709
|
});
|
|
3828
|
-
var
|
|
3829
|
-
|
|
4710
|
+
var ReplyIntentSchema = z33.object({
|
|
4711
|
+
primaryIntent: z33.enum(["answers_only", "question", "lookup_request", "mixed"]),
|
|
4712
|
+
hasAnswers: z33.boolean(),
|
|
4713
|
+
questionText: z33.string().optional(),
|
|
4714
|
+
questionFieldIds: z33.array(z33.string()).optional(),
|
|
4715
|
+
lookupRequests: z33.array(LookupRequestSchema).optional()
|
|
3830
4716
|
});
|
|
3831
|
-
var
|
|
3832
|
-
|
|
4717
|
+
var ParsedAnswerSchema = z33.object({
|
|
4718
|
+
fieldId: z33.string(),
|
|
4719
|
+
value: z33.string(),
|
|
4720
|
+
explanation: z33.string().optional()
|
|
3833
4721
|
});
|
|
3834
|
-
var
|
|
3835
|
-
|
|
3836
|
-
|
|
3837
|
-
url: z32.string().optional(),
|
|
3838
|
-
targetFieldIds: z32.array(z32.string())
|
|
4722
|
+
var AnswerParsingResultSchema = z33.object({
|
|
4723
|
+
answers: z33.array(ParsedAnswerSchema),
|
|
4724
|
+
unanswered: z33.array(z33.string()).describe("Field IDs that were not answered")
|
|
3839
4725
|
});
|
|
3840
|
-
var
|
|
3841
|
-
|
|
3842
|
-
|
|
3843
|
-
|
|
3844
|
-
questionFieldIds: z32.array(z32.string()).optional(),
|
|
3845
|
-
lookupRequests: z32.array(LookupRequestSchema).optional()
|
|
4726
|
+
var LookupFillSchema = z33.object({
|
|
4727
|
+
fieldId: z33.string(),
|
|
4728
|
+
value: z33.string(),
|
|
4729
|
+
source: z33.string().describe("Specific citable reference, e.g. 'GL Policy #POL-12345 (Hartford)'")
|
|
3846
4730
|
});
|
|
3847
|
-
var
|
|
3848
|
-
|
|
3849
|
-
|
|
3850
|
-
explanation:
|
|
4731
|
+
var LookupFillResultSchema = z33.object({
|
|
4732
|
+
fills: z33.array(LookupFillSchema),
|
|
4733
|
+
unfillable: z33.array(z33.string()),
|
|
4734
|
+
explanation: z33.string().optional()
|
|
3851
4735
|
});
|
|
3852
|
-
var
|
|
3853
|
-
|
|
3854
|
-
|
|
4736
|
+
var FlatPdfPlacementSchema = z33.object({
|
|
4737
|
+
fieldId: z33.string(),
|
|
4738
|
+
page: z33.number(),
|
|
4739
|
+
x: z33.number().describe("Percentage from left edge (0-100)"),
|
|
4740
|
+
y: z33.number().describe("Percentage from top edge (0-100)"),
|
|
4741
|
+
text: z33.string(),
|
|
4742
|
+
fontSize: z33.number().optional(),
|
|
4743
|
+
isCheckmark: z33.boolean().optional()
|
|
3855
4744
|
});
|
|
3856
|
-
var
|
|
3857
|
-
fieldId:
|
|
3858
|
-
|
|
3859
|
-
|
|
4745
|
+
var AcroFormMappingSchema = z33.object({
|
|
4746
|
+
fieldId: z33.string(),
|
|
4747
|
+
acroFormName: z33.string(),
|
|
4748
|
+
value: z33.string()
|
|
3860
4749
|
});
|
|
3861
|
-
var
|
|
3862
|
-
|
|
3863
|
-
|
|
3864
|
-
|
|
4750
|
+
var QualityGateStatusSchema = z33.enum(["passed", "warning", "failed"]);
|
|
4751
|
+
var QualitySeveritySchema = z33.enum(["info", "warning", "blocking"]);
|
|
4752
|
+
var ApplicationQualityIssueSchema = z33.object({
|
|
4753
|
+
code: z33.string(),
|
|
4754
|
+
severity: QualitySeveritySchema,
|
|
4755
|
+
message: z33.string(),
|
|
4756
|
+
fieldId: z33.string().optional()
|
|
3865
4757
|
});
|
|
3866
|
-
var
|
|
3867
|
-
|
|
3868
|
-
|
|
3869
|
-
|
|
3870
|
-
|
|
3871
|
-
text: z32.string(),
|
|
3872
|
-
fontSize: z32.number().optional(),
|
|
3873
|
-
isCheckmark: z32.boolean().optional()
|
|
4758
|
+
var ApplicationQualityRoundSchema = z33.object({
|
|
4759
|
+
round: z33.number(),
|
|
4760
|
+
kind: z33.string(),
|
|
4761
|
+
status: QualityGateStatusSchema,
|
|
4762
|
+
summary: z33.string().optional()
|
|
3874
4763
|
});
|
|
3875
|
-
var
|
|
3876
|
-
|
|
3877
|
-
|
|
3878
|
-
|
|
4764
|
+
var ApplicationQualityArtifactSchema = z33.object({
|
|
4765
|
+
kind: z33.string(),
|
|
4766
|
+
label: z33.string().optional(),
|
|
4767
|
+
itemCount: z33.number().optional()
|
|
3879
4768
|
});
|
|
3880
|
-
var
|
|
3881
|
-
|
|
3882
|
-
|
|
3883
|
-
|
|
3884
|
-
|
|
3885
|
-
|
|
3886
|
-
|
|
3887
|
-
|
|
3888
|
-
|
|
3889
|
-
|
|
3890
|
-
|
|
4769
|
+
var ApplicationEmailReviewSchema = z33.object({
|
|
4770
|
+
issues: z33.array(ApplicationQualityIssueSchema),
|
|
4771
|
+
qualityGateStatus: QualityGateStatusSchema
|
|
4772
|
+
});
|
|
4773
|
+
var ApplicationQualityReportSchema = z33.object({
|
|
4774
|
+
issues: z33.array(ApplicationQualityIssueSchema),
|
|
4775
|
+
rounds: z33.array(ApplicationQualityRoundSchema).optional(),
|
|
4776
|
+
artifacts: z33.array(ApplicationQualityArtifactSchema).optional(),
|
|
4777
|
+
emailReview: ApplicationEmailReviewSchema.optional(),
|
|
4778
|
+
qualityGateStatus: QualityGateStatusSchema
|
|
4779
|
+
});
|
|
4780
|
+
var ApplicationStateSchema = z33.object({
|
|
4781
|
+
id: z33.string(),
|
|
4782
|
+
pdfBase64: z33.string().optional().describe("Original PDF, omitted after extraction"),
|
|
4783
|
+
title: z33.string().optional(),
|
|
4784
|
+
applicationType: z33.string().nullable().optional(),
|
|
4785
|
+
fields: z33.array(ApplicationFieldSchema),
|
|
4786
|
+
batches: z33.array(z33.array(z33.string())).optional(),
|
|
4787
|
+
currentBatchIndex: z33.number().default(0),
|
|
4788
|
+
qualityReport: ApplicationQualityReportSchema.optional(),
|
|
4789
|
+
status: z33.enum(["classifying", "extracting", "auto_filling", "batching", "collecting", "confirming", "mapping", "complete"]),
|
|
4790
|
+
createdAt: z33.number(),
|
|
4791
|
+
updatedAt: z33.number()
|
|
3891
4792
|
});
|
|
3892
4793
|
|
|
3893
4794
|
// src/application/agents/classifier.ts
|
|
@@ -4395,6 +5296,87 @@ async function generateBatchEmail(batchFields, batchIndex, totalBatches, opts, g
|
|
|
4395
5296
|
return { text, usage };
|
|
4396
5297
|
}
|
|
4397
5298
|
|
|
5299
|
+
// src/application/quality.ts
|
|
5300
|
+
function isVagueSource(source) {
|
|
5301
|
+
if (!source) return true;
|
|
5302
|
+
const normalized = source.trim().toLowerCase();
|
|
5303
|
+
return normalized === "unknown" || normalized.includes("existing records") || normalized.includes("available data") || normalized === "context" || normalized === "user provided";
|
|
5304
|
+
}
|
|
5305
|
+
function buildApplicationQualityReport(state) {
|
|
5306
|
+
const issues = [];
|
|
5307
|
+
const seenIds = /* @__PURE__ */ new Set();
|
|
5308
|
+
for (const field of state.fields) {
|
|
5309
|
+
if (seenIds.has(field.id)) {
|
|
5310
|
+
issues.push({
|
|
5311
|
+
code: "duplicate_field_id",
|
|
5312
|
+
severity: "blocking",
|
|
5313
|
+
message: `Field "${field.label}" has a duplicate id "${field.id}".`,
|
|
5314
|
+
fieldId: field.id
|
|
5315
|
+
});
|
|
5316
|
+
}
|
|
5317
|
+
seenIds.add(field.id);
|
|
5318
|
+
if (field.required && !field.value) {
|
|
5319
|
+
issues.push({
|
|
5320
|
+
code: "required_field_unfilled",
|
|
5321
|
+
severity: "warning",
|
|
5322
|
+
message: `Required field "${field.label}" is still unfilled.`,
|
|
5323
|
+
fieldId: field.id
|
|
5324
|
+
});
|
|
5325
|
+
}
|
|
5326
|
+
if (field.value && !field.source) {
|
|
5327
|
+
issues.push({
|
|
5328
|
+
code: "filled_field_missing_source",
|
|
5329
|
+
severity: "blocking",
|
|
5330
|
+
message: `Filled field "${field.label}" is missing source provenance.`,
|
|
5331
|
+
fieldId: field.id
|
|
5332
|
+
});
|
|
5333
|
+
}
|
|
5334
|
+
if (field.value && isVagueSource(field.source)) {
|
|
5335
|
+
issues.push({
|
|
5336
|
+
code: "filled_field_vague_source",
|
|
5337
|
+
severity: "warning",
|
|
5338
|
+
message: `Filled field "${field.label}" has a vague or non-citable source.`,
|
|
5339
|
+
fieldId: field.id
|
|
5340
|
+
});
|
|
5341
|
+
}
|
|
5342
|
+
if (field.value && (!field.confidence || field.confidence === "low")) {
|
|
5343
|
+
issues.push({
|
|
5344
|
+
code: "filled_field_low_confidence",
|
|
5345
|
+
severity: "warning",
|
|
5346
|
+
message: `Filled field "${field.label}" has low or missing confidence.`,
|
|
5347
|
+
fieldId: field.id
|
|
5348
|
+
});
|
|
5349
|
+
}
|
|
5350
|
+
}
|
|
5351
|
+
return {
|
|
5352
|
+
issues,
|
|
5353
|
+
rounds: [],
|
|
5354
|
+
artifacts: [
|
|
5355
|
+
{ kind: "application_fields", label: "Application Fields", itemCount: state.fields.length }
|
|
5356
|
+
],
|
|
5357
|
+
qualityGateStatus: evaluateQualityGate({ issues })
|
|
5358
|
+
};
|
|
5359
|
+
}
|
|
5360
|
+
function reviewBatchEmail(text, batchFields) {
|
|
5361
|
+
const issues = [];
|
|
5362
|
+
const normalized = text.toLowerCase();
|
|
5363
|
+
for (const field of batchFields) {
|
|
5364
|
+
const label = field.label.trim().toLowerCase();
|
|
5365
|
+
if (label.length >= 6 && !normalized.includes(label)) {
|
|
5366
|
+
issues.push({
|
|
5367
|
+
code: "email_missing_field_prompt",
|
|
5368
|
+
severity: "warning",
|
|
5369
|
+
message: `Generated email does not clearly mention field "${field.label}".`,
|
|
5370
|
+
fieldId: field.id
|
|
5371
|
+
});
|
|
5372
|
+
}
|
|
5373
|
+
}
|
|
5374
|
+
return {
|
|
5375
|
+
issues,
|
|
5376
|
+
qualityGateStatus: evaluateQualityGate({ issues })
|
|
5377
|
+
};
|
|
5378
|
+
}
|
|
5379
|
+
|
|
4398
5380
|
// src/application/coordinator.ts
|
|
4399
5381
|
function createApplicationPipeline(config) {
|
|
4400
5382
|
const {
|
|
@@ -4409,7 +5391,8 @@ function createApplicationPipeline(config) {
|
|
|
4409
5391
|
onTokenUsage,
|
|
4410
5392
|
onProgress,
|
|
4411
5393
|
log,
|
|
4412
|
-
providerOptions
|
|
5394
|
+
providerOptions,
|
|
5395
|
+
qualityGate = "warn"
|
|
4413
5396
|
} = config;
|
|
4414
5397
|
const limit = pLimit(concurrency);
|
|
4415
5398
|
let totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
@@ -4431,6 +5414,7 @@ function createApplicationPipeline(config) {
|
|
|
4431
5414
|
title: void 0,
|
|
4432
5415
|
applicationType: null,
|
|
4433
5416
|
fields: [],
|
|
5417
|
+
qualityReport: void 0,
|
|
4434
5418
|
batches: void 0,
|
|
4435
5419
|
currentBatchIndex: 0,
|
|
4436
5420
|
status: "classifying",
|
|
@@ -4455,8 +5439,9 @@ function createApplicationPipeline(config) {
|
|
|
4455
5439
|
if (!classifyResult.isApplication) {
|
|
4456
5440
|
state.status = "complete";
|
|
4457
5441
|
state.updatedAt = Date.now();
|
|
5442
|
+
state.qualityReport = buildApplicationQualityReport(state);
|
|
4458
5443
|
await applicationStore?.save(state);
|
|
4459
|
-
return { state, tokenUsage: totalUsage };
|
|
5444
|
+
return { state, tokenUsage: totalUsage, reviewReport: state.qualityReport };
|
|
4460
5445
|
}
|
|
4461
5446
|
state.applicationType = classifyResult.applicationType;
|
|
4462
5447
|
state.status = "extracting";
|
|
@@ -4480,8 +5465,9 @@ function createApplicationPipeline(config) {
|
|
|
4480
5465
|
await log?.("No fields extracted, completing pipeline with empty result");
|
|
4481
5466
|
state.status = "complete";
|
|
4482
5467
|
state.updatedAt = Date.now();
|
|
5468
|
+
state.qualityReport = buildApplicationQualityReport(state);
|
|
4483
5469
|
await applicationStore?.save(state);
|
|
4484
|
-
return { state, tokenUsage: totalUsage };
|
|
5470
|
+
return { state, tokenUsage: totalUsage, reviewReport: state.qualityReport };
|
|
4485
5471
|
}
|
|
4486
5472
|
state.fields = fields;
|
|
4487
5473
|
state.title = classifyResult.applicationType ?? void 0;
|
|
@@ -4581,11 +5567,15 @@ function createApplicationPipeline(config) {
|
|
|
4581
5567
|
} else {
|
|
4582
5568
|
state.status = "confirming";
|
|
4583
5569
|
}
|
|
5570
|
+
state.qualityReport = buildApplicationQualityReport(state);
|
|
4584
5571
|
state.updatedAt = Date.now();
|
|
4585
5572
|
await applicationStore?.save(state);
|
|
5573
|
+
if (shouldFailQualityGate(qualityGate, state.qualityReport.qualityGateStatus)) {
|
|
5574
|
+
throw new Error("Application quality gate failed. See state.qualityReport for blocking issues.");
|
|
5575
|
+
}
|
|
4586
5576
|
const filledCount = state.fields.filter((f) => f.value).length;
|
|
4587
5577
|
onProgress?.(`Application processed: ${filledCount}/${state.fields.length} fields filled, ${state.batches?.length ?? 0} batches to collect.`);
|
|
4588
|
-
return { state, tokenUsage: totalUsage };
|
|
5578
|
+
return { state, tokenUsage: totalUsage, reviewReport: state.qualityReport };
|
|
4589
5579
|
}
|
|
4590
5580
|
async function processReply(input) {
|
|
4591
5581
|
totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
@@ -4732,6 +5722,11 @@ Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with
|
|
|
4732
5722
|
providerOptions
|
|
4733
5723
|
);
|
|
4734
5724
|
trackUsage(emailUsage);
|
|
5725
|
+
const emailReview = reviewBatchEmail(emailText, nextBatchFields);
|
|
5726
|
+
state.qualityReport = {
|
|
5727
|
+
...buildApplicationQualityReport(state),
|
|
5728
|
+
emailReview
|
|
5729
|
+
};
|
|
4735
5730
|
if (!responseText) {
|
|
4736
5731
|
responseText = emailText;
|
|
4737
5732
|
} else {
|
|
@@ -4747,13 +5742,18 @@ ${emailText}`;
|
|
|
4747
5742
|
}
|
|
4748
5743
|
}
|
|
4749
5744
|
state.updatedAt = Date.now();
|
|
5745
|
+
state.qualityReport = state.qualityReport ?? buildApplicationQualityReport(state);
|
|
4750
5746
|
await applicationStore?.save(state);
|
|
5747
|
+
if (shouldFailQualityGate(qualityGate, state.qualityReport.qualityGateStatus)) {
|
|
5748
|
+
throw new Error("Application quality gate failed. See state.qualityReport for blocking issues.");
|
|
5749
|
+
}
|
|
4751
5750
|
return {
|
|
4752
5751
|
state,
|
|
4753
5752
|
intent: intent.primaryIntent,
|
|
4754
5753
|
fieldsFilled,
|
|
4755
5754
|
responseText,
|
|
4756
|
-
tokenUsage: totalUsage
|
|
5755
|
+
tokenUsage: totalUsage,
|
|
5756
|
+
reviewReport: state.qualityReport
|
|
4757
5757
|
};
|
|
4758
5758
|
}
|
|
4759
5759
|
async function generateCurrentBatchEmail(applicationId, opts) {
|
|
@@ -4779,6 +5779,12 @@ ${emailText}`;
|
|
|
4779
5779
|
providerOptions
|
|
4780
5780
|
);
|
|
4781
5781
|
trackUsage(usage);
|
|
5782
|
+
const emailReview = reviewBatchEmail(text, batchFields);
|
|
5783
|
+
state.qualityReport = {
|
|
5784
|
+
...buildApplicationQualityReport(state),
|
|
5785
|
+
emailReview
|
|
5786
|
+
};
|
|
5787
|
+
await applicationStore?.save(state);
|
|
4782
5788
|
return { text, tokenUsage: totalUsage };
|
|
4783
5789
|
}
|
|
4784
5790
|
async function getConfirmationSummary(applicationId) {
|
|
@@ -4915,73 +5921,73 @@ Respond with the final answer, deduplicated citations array, overall confidence
|
|
|
4915
5921
|
}
|
|
4916
5922
|
|
|
4917
5923
|
// src/schemas/query.ts
|
|
4918
|
-
import { z as
|
|
4919
|
-
var QueryIntentSchema =
|
|
5924
|
+
import { z as z34 } from "zod";
|
|
5925
|
+
var QueryIntentSchema = z34.enum([
|
|
4920
5926
|
"policy_question",
|
|
4921
5927
|
"coverage_comparison",
|
|
4922
5928
|
"document_search",
|
|
4923
5929
|
"claims_inquiry",
|
|
4924
5930
|
"general_knowledge"
|
|
4925
5931
|
]);
|
|
4926
|
-
var SubQuestionSchema =
|
|
4927
|
-
question:
|
|
5932
|
+
var SubQuestionSchema = z34.object({
|
|
5933
|
+
question: z34.string().describe("Atomic sub-question to retrieve and answer independently"),
|
|
4928
5934
|
intent: QueryIntentSchema,
|
|
4929
|
-
chunkTypes:
|
|
4930
|
-
documentFilters:
|
|
4931
|
-
type:
|
|
4932
|
-
carrier:
|
|
4933
|
-
insuredName:
|
|
4934
|
-
policyNumber:
|
|
4935
|
-
quoteNumber:
|
|
5935
|
+
chunkTypes: z34.array(z34.string()).optional().describe("Chunk types to filter retrieval (e.g. coverage, endorsement, declaration)"),
|
|
5936
|
+
documentFilters: z34.object({
|
|
5937
|
+
type: z34.enum(["policy", "quote"]).optional(),
|
|
5938
|
+
carrier: z34.string().optional(),
|
|
5939
|
+
insuredName: z34.string().optional(),
|
|
5940
|
+
policyNumber: z34.string().optional(),
|
|
5941
|
+
quoteNumber: z34.string().optional()
|
|
4936
5942
|
}).optional().describe("Structured filters to narrow document lookup")
|
|
4937
5943
|
});
|
|
4938
|
-
var QueryClassifyResultSchema =
|
|
5944
|
+
var QueryClassifyResultSchema = z34.object({
|
|
4939
5945
|
intent: QueryIntentSchema,
|
|
4940
|
-
subQuestions:
|
|
4941
|
-
requiresDocumentLookup:
|
|
4942
|
-
requiresChunkSearch:
|
|
4943
|
-
requiresConversationHistory:
|
|
5946
|
+
subQuestions: z34.array(SubQuestionSchema).min(1).describe("Decomposed atomic sub-questions"),
|
|
5947
|
+
requiresDocumentLookup: z34.boolean().describe("Whether structured document lookup is needed"),
|
|
5948
|
+
requiresChunkSearch: z34.boolean().describe("Whether semantic chunk search is needed"),
|
|
5949
|
+
requiresConversationHistory: z34.boolean().describe("Whether conversation history is relevant")
|
|
4944
5950
|
});
|
|
4945
|
-
var EvidenceItemSchema =
|
|
4946
|
-
source:
|
|
4947
|
-
chunkId:
|
|
4948
|
-
documentId:
|
|
4949
|
-
turnId:
|
|
4950
|
-
text:
|
|
4951
|
-
relevance:
|
|
4952
|
-
metadata:
|
|
5951
|
+
var EvidenceItemSchema = z34.object({
|
|
5952
|
+
source: z34.enum(["chunk", "document", "conversation"]),
|
|
5953
|
+
chunkId: z34.string().optional(),
|
|
5954
|
+
documentId: z34.string().optional(),
|
|
5955
|
+
turnId: z34.string().optional(),
|
|
5956
|
+
text: z34.string().describe("Text excerpt from the source"),
|
|
5957
|
+
relevance: z34.number().min(0).max(1),
|
|
5958
|
+
metadata: z34.array(z34.object({ key: z34.string(), value: z34.string() })).optional()
|
|
4953
5959
|
});
|
|
4954
|
-
var RetrievalResultSchema =
|
|
4955
|
-
subQuestion:
|
|
4956
|
-
evidence:
|
|
5960
|
+
var RetrievalResultSchema = z34.object({
|
|
5961
|
+
subQuestion: z34.string(),
|
|
5962
|
+
evidence: z34.array(EvidenceItemSchema)
|
|
4957
5963
|
});
|
|
4958
|
-
var CitationSchema =
|
|
4959
|
-
index:
|
|
4960
|
-
chunkId:
|
|
4961
|
-
documentId:
|
|
4962
|
-
documentType:
|
|
4963
|
-
field:
|
|
4964
|
-
quote:
|
|
4965
|
-
relevance:
|
|
5964
|
+
var CitationSchema = z34.object({
|
|
5965
|
+
index: z34.number().describe("Citation number [1], [2], etc."),
|
|
5966
|
+
chunkId: z34.string().describe("Source chunk ID, e.g. doc-123:coverage:2"),
|
|
5967
|
+
documentId: z34.string(),
|
|
5968
|
+
documentType: z34.enum(["policy", "quote"]).optional(),
|
|
5969
|
+
field: z34.string().optional().describe("Specific field path, e.g. coverages[0].deductible"),
|
|
5970
|
+
quote: z34.string().describe("Exact text from source that supports the claim"),
|
|
5971
|
+
relevance: z34.number().min(0).max(1)
|
|
4966
5972
|
});
|
|
4967
|
-
var SubAnswerSchema =
|
|
4968
|
-
subQuestion:
|
|
4969
|
-
answer:
|
|
4970
|
-
citations:
|
|
4971
|
-
confidence:
|
|
4972
|
-
needsMoreContext:
|
|
5973
|
+
var SubAnswerSchema = z34.object({
|
|
5974
|
+
subQuestion: z34.string(),
|
|
5975
|
+
answer: z34.string(),
|
|
5976
|
+
citations: z34.array(CitationSchema),
|
|
5977
|
+
confidence: z34.number().min(0).max(1),
|
|
5978
|
+
needsMoreContext: z34.boolean().describe("True if evidence was insufficient to answer fully")
|
|
4973
5979
|
});
|
|
4974
|
-
var VerifyResultSchema =
|
|
4975
|
-
approved:
|
|
4976
|
-
issues:
|
|
4977
|
-
retrySubQuestions:
|
|
5980
|
+
var VerifyResultSchema = z34.object({
|
|
5981
|
+
approved: z34.boolean().describe("Whether all sub-answers are adequately grounded"),
|
|
5982
|
+
issues: z34.array(z34.string()).describe("Specific grounding or consistency issues found"),
|
|
5983
|
+
retrySubQuestions: z34.array(z34.string()).optional().describe("Sub-questions that need additional retrieval or re-reasoning")
|
|
4978
5984
|
});
|
|
4979
|
-
var QueryResultSchema =
|
|
4980
|
-
answer:
|
|
4981
|
-
citations:
|
|
5985
|
+
var QueryResultSchema = z34.object({
|
|
5986
|
+
answer: z34.string(),
|
|
5987
|
+
citations: z34.array(CitationSchema),
|
|
4982
5988
|
intent: QueryIntentSchema,
|
|
4983
|
-
confidence:
|
|
4984
|
-
followUp:
|
|
5989
|
+
confidence: z34.number().min(0).max(1),
|
|
5990
|
+
followUp: z34.string().optional().describe("Suggested follow-up question if applicable")
|
|
4985
5991
|
});
|
|
4986
5992
|
|
|
4987
5993
|
// src/query/retriever.ts
|
|
@@ -5269,6 +6275,112 @@ async function verify(originalQuestion, subAnswers, allEvidence, config) {
|
|
|
5269
6275
|
return { result: object, usage };
|
|
5270
6276
|
}
|
|
5271
6277
|
|
|
6278
|
+
// src/query/quality.ts
|
|
6279
|
+
function sourceIdForEvidence(evidence) {
|
|
6280
|
+
return evidence.chunkId ?? evidence.documentId ?? evidence.turnId;
|
|
6281
|
+
}
|
|
6282
|
+
function citationSourceId(citation) {
|
|
6283
|
+
return citation.chunkId || citation.documentId;
|
|
6284
|
+
}
|
|
6285
|
+
function buildQueryReviewReport(params) {
|
|
6286
|
+
const { subAnswers, evidence, finalResult, verifyRounds } = params;
|
|
6287
|
+
const issues = [];
|
|
6288
|
+
const evidenceBySource = /* @__PURE__ */ new Map();
|
|
6289
|
+
for (const item of evidence) {
|
|
6290
|
+
const sourceId = sourceIdForEvidence(item);
|
|
6291
|
+
if (!sourceId) continue;
|
|
6292
|
+
evidenceBySource.set(sourceId, [...evidenceBySource.get(sourceId) ?? [], item]);
|
|
6293
|
+
}
|
|
6294
|
+
for (const subAnswer of subAnswers) {
|
|
6295
|
+
if (!subAnswer.needsMoreContext && subAnswer.citations.length === 0) {
|
|
6296
|
+
issues.push({
|
|
6297
|
+
code: "subanswer_missing_citations",
|
|
6298
|
+
severity: "blocking",
|
|
6299
|
+
message: `Sub-answer "${subAnswer.subQuestion}" has no citations despite claiming an answer.`,
|
|
6300
|
+
subQuestion: subAnswer.subQuestion
|
|
6301
|
+
});
|
|
6302
|
+
}
|
|
6303
|
+
if (subAnswer.confidence >= 0.85 && subAnswer.citations.length === 0) {
|
|
6304
|
+
issues.push({
|
|
6305
|
+
code: "subanswer_high_confidence_without_citations",
|
|
6306
|
+
severity: "blocking",
|
|
6307
|
+
message: `Sub-answer "${subAnswer.subQuestion}" has high confidence without citations.`,
|
|
6308
|
+
subQuestion: subAnswer.subQuestion
|
|
6309
|
+
});
|
|
6310
|
+
}
|
|
6311
|
+
for (const citation of subAnswer.citations) {
|
|
6312
|
+
const sourceId = citationSourceId(citation);
|
|
6313
|
+
const supportedEvidence = sourceId ? evidenceBySource.get(sourceId) ?? [] : [];
|
|
6314
|
+
if (!sourceId || supportedEvidence.length === 0) {
|
|
6315
|
+
issues.push({
|
|
6316
|
+
code: "citation_missing_from_evidence",
|
|
6317
|
+
severity: "blocking",
|
|
6318
|
+
message: `Citation [${citation.index}] in "${subAnswer.subQuestion}" does not map to retrieved evidence.`,
|
|
6319
|
+
subQuestion: subAnswer.subQuestion,
|
|
6320
|
+
citationIndex: citation.index,
|
|
6321
|
+
sourceId
|
|
6322
|
+
});
|
|
6323
|
+
continue;
|
|
6324
|
+
}
|
|
6325
|
+
const quoteFound = supportedEvidence.some((item) => item.text.includes(citation.quote));
|
|
6326
|
+
if (!quoteFound) {
|
|
6327
|
+
issues.push({
|
|
6328
|
+
code: "citation_quote_not_in_evidence",
|
|
6329
|
+
severity: "warning",
|
|
6330
|
+
message: `Citation [${citation.index}] quote in "${subAnswer.subQuestion}" was not found verbatim in retrieved evidence.`,
|
|
6331
|
+
subQuestion: subAnswer.subQuestion,
|
|
6332
|
+
citationIndex: citation.index,
|
|
6333
|
+
sourceId
|
|
6334
|
+
});
|
|
6335
|
+
}
|
|
6336
|
+
}
|
|
6337
|
+
}
|
|
6338
|
+
if (finalResult) {
|
|
6339
|
+
if (finalResult.answer.trim().length > 0 && finalResult.citations.length === 0 && finalResult.confidence > 0.4) {
|
|
6340
|
+
issues.push({
|
|
6341
|
+
code: "final_answer_missing_citations",
|
|
6342
|
+
severity: "blocking",
|
|
6343
|
+
message: "Final answer has non-trivial confidence but no citations."
|
|
6344
|
+
});
|
|
6345
|
+
}
|
|
6346
|
+
const knownCitationIds = new Set(
|
|
6347
|
+
subAnswers.flatMap((sa) => sa.citations.map((citation) => `${citation.index}|${citation.chunkId}|${citation.documentId}`))
|
|
6348
|
+
);
|
|
6349
|
+
for (const citation of finalResult.citations) {
|
|
6350
|
+
const key = `${citation.index}|${citation.chunkId}|${citation.documentId}`;
|
|
6351
|
+
if (!knownCitationIds.has(key)) {
|
|
6352
|
+
issues.push({
|
|
6353
|
+
code: "final_answer_unknown_citation",
|
|
6354
|
+
severity: "warning",
|
|
6355
|
+
message: `Final answer citation [${citation.index}] was not present in verified sub-answers.`,
|
|
6356
|
+
citationIndex: citation.index,
|
|
6357
|
+
sourceId: citationSourceId(citation)
|
|
6358
|
+
});
|
|
6359
|
+
}
|
|
6360
|
+
}
|
|
6361
|
+
}
|
|
6362
|
+
const rounds = verifyRounds.map((round) => ({
|
|
6363
|
+
round: round.round,
|
|
6364
|
+
kind: "verification",
|
|
6365
|
+
status: round.approved && round.issues.length === 0 ? "passed" : "warning",
|
|
6366
|
+
summary: round.issues[0] ?? (round.approved ? "Verification passed." : "Verification requested retry.")
|
|
6367
|
+
}));
|
|
6368
|
+
const artifacts = [
|
|
6369
|
+
{ kind: "evidence", label: "Retrieved Evidence", itemCount: evidence.length },
|
|
6370
|
+
{ kind: "sub_answers", label: "Sub Answers", itemCount: subAnswers.length }
|
|
6371
|
+
];
|
|
6372
|
+
return {
|
|
6373
|
+
issues,
|
|
6374
|
+
rounds,
|
|
6375
|
+
artifacts,
|
|
6376
|
+
verifyRounds,
|
|
6377
|
+
qualityGateStatus: evaluateQualityGate({
|
|
6378
|
+
issues,
|
|
6379
|
+
hasRoundWarnings: verifyRounds.some((round) => !round.approved || round.issues.length > 0)
|
|
6380
|
+
})
|
|
6381
|
+
};
|
|
6382
|
+
}
|
|
6383
|
+
|
|
5272
6384
|
// src/query/coordinator.ts
|
|
5273
6385
|
function createQueryAgent(config) {
|
|
5274
6386
|
const {
|
|
@@ -5282,7 +6394,8 @@ function createQueryAgent(config) {
|
|
|
5282
6394
|
onTokenUsage,
|
|
5283
6395
|
onProgress,
|
|
5284
6396
|
log,
|
|
5285
|
-
providerOptions
|
|
6397
|
+
providerOptions,
|
|
6398
|
+
qualityGate = "warn"
|
|
5286
6399
|
} = config;
|
|
5287
6400
|
const limit = pLimit(concurrency);
|
|
5288
6401
|
let totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
@@ -5351,6 +6464,7 @@ function createQueryAgent(config) {
|
|
|
5351
6464
|
await pipelineCtx.save("reason", { classification, evidence: allEvidence, subAnswers });
|
|
5352
6465
|
onProgress?.("Verifying answer grounding...");
|
|
5353
6466
|
const verifierConfig = { generateObject, providerOptions };
|
|
6467
|
+
const verifyRounds = [];
|
|
5354
6468
|
for (let round = 0; round < maxVerifyRounds; round++) {
|
|
5355
6469
|
const { result: verifyResult, usage } = await safeVerify(
|
|
5356
6470
|
question,
|
|
@@ -5359,6 +6473,12 @@ function createQueryAgent(config) {
|
|
|
5359
6473
|
verifierConfig
|
|
5360
6474
|
);
|
|
5361
6475
|
trackUsage(usage);
|
|
6476
|
+
verifyRounds.push({
|
|
6477
|
+
round: round + 1,
|
|
6478
|
+
approved: verifyResult.approved,
|
|
6479
|
+
issues: verifyResult.issues,
|
|
6480
|
+
retrySubQuestions: verifyResult.retrySubQuestions
|
|
6481
|
+
});
|
|
5362
6482
|
if (verifyResult.approved) {
|
|
5363
6483
|
onProgress?.("Verification passed.");
|
|
5364
6484
|
break;
|
|
@@ -5416,6 +6536,24 @@ function createQueryAgent(config) {
|
|
|
5416
6536
|
classification,
|
|
5417
6537
|
context?.platform
|
|
5418
6538
|
);
|
|
6539
|
+
const reviewReport = buildQueryReviewReport({
|
|
6540
|
+
subAnswers,
|
|
6541
|
+
evidence: allEvidence,
|
|
6542
|
+
finalResult: queryResult,
|
|
6543
|
+
verifyRounds
|
|
6544
|
+
});
|
|
6545
|
+
await pipelineCtx.save("review", {
|
|
6546
|
+
classification,
|
|
6547
|
+
evidence: allEvidence,
|
|
6548
|
+
subAnswers,
|
|
6549
|
+
reviewReport
|
|
6550
|
+
});
|
|
6551
|
+
if (reviewReport.issues.length > 0) {
|
|
6552
|
+
await log?.(`Query deterministic review issues: ${reviewReport.issues.map((issue) => issue.message).join("; ")}`);
|
|
6553
|
+
}
|
|
6554
|
+
if (shouldFailQualityGate(qualityGate, reviewReport.qualityGateStatus)) {
|
|
6555
|
+
throw new Error("Query quality gate failed. See reviewReport for blocking issues.");
|
|
6556
|
+
}
|
|
5419
6557
|
if (conversationId) {
|
|
5420
6558
|
try {
|
|
5421
6559
|
await memoryStore.addTurn({
|
|
@@ -5436,7 +6574,7 @@ function createQueryAgent(config) {
|
|
|
5436
6574
|
await log?.(`Failed to store conversation turn: ${e}`);
|
|
5437
6575
|
}
|
|
5438
6576
|
}
|
|
5439
|
-
return { ...queryResult, tokenUsage: totalUsage };
|
|
6577
|
+
return { ...queryResult, tokenUsage: totalUsage, reviewReport };
|
|
5440
6578
|
}
|
|
5441
6579
|
async function classify(question, conversationId) {
|
|
5442
6580
|
let conversationContext;
|
|
@@ -5657,7 +6795,12 @@ export {
|
|
|
5657
6795
|
AdmittedStatusSchema,
|
|
5658
6796
|
AnswerParsingResultSchema,
|
|
5659
6797
|
ApplicationClassifyResultSchema,
|
|
6798
|
+
ApplicationEmailReviewSchema,
|
|
5660
6799
|
ApplicationFieldSchema,
|
|
6800
|
+
ApplicationQualityArtifactSchema,
|
|
6801
|
+
ApplicationQualityIssueSchema,
|
|
6802
|
+
ApplicationQualityReportSchema,
|
|
6803
|
+
ApplicationQualityRoundSchema,
|
|
5661
6804
|
ApplicationStateSchema,
|
|
5662
6805
|
AuditTypeSchema,
|
|
5663
6806
|
AutoFillMatchSchema,
|
|
@@ -5689,6 +6832,7 @@ export {
|
|
|
5689
6832
|
CoverageFormSchema,
|
|
5690
6833
|
CoverageSchema,
|
|
5691
6834
|
CoverageTriggerSchema,
|
|
6835
|
+
CoverageValueTypeSchema,
|
|
5692
6836
|
CrimeDeclarationsSchema,
|
|
5693
6837
|
CyberDeclarationsSchema,
|
|
5694
6838
|
DEDUCTIBLE_TYPES,
|