@claritylabs/cl-sdk 0.9.0 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -636
- package/dist/index.d.mts +841 -65
- package/dist/index.d.ts +841 -65
- package/dist/index.js +1175 -335
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +1169 -335
- package/dist/index.mjs.map +1 -1
- package/dist/storage-sqlite.d.mts +114 -24
- package/dist/storage-sqlite.d.ts +114 -24
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -489,7 +489,9 @@ var FormReferenceSchema = z3.object({
|
|
|
489
489
|
formNumber: z3.string(),
|
|
490
490
|
editionDate: z3.string().optional(),
|
|
491
491
|
title: z3.string().optional(),
|
|
492
|
-
formType: z3.enum(["coverage", "endorsement", "declarations", "application", "notice", "other"])
|
|
492
|
+
formType: z3.enum(["coverage", "endorsement", "declarations", "application", "notice", "other"]),
|
|
493
|
+
pageStart: z3.number().optional(),
|
|
494
|
+
pageEnd: z3.number().optional()
|
|
493
495
|
});
|
|
494
496
|
var TaxFeeItemSchema = z3.object({
|
|
495
497
|
name: z3.string(),
|
|
@@ -526,12 +528,25 @@ var NamedInsuredSchema = z3.object({
|
|
|
526
528
|
|
|
527
529
|
// src/schemas/coverage.ts
|
|
528
530
|
import { z as z4 } from "zod";
|
|
531
|
+
var CoverageValueTypeSchema = z4.enum([
|
|
532
|
+
"numeric",
|
|
533
|
+
"included",
|
|
534
|
+
"not_included",
|
|
535
|
+
"as_stated",
|
|
536
|
+
"waiting_period",
|
|
537
|
+
"referential",
|
|
538
|
+
"other"
|
|
539
|
+
]);
|
|
529
540
|
var CoverageSchema = z4.object({
|
|
530
541
|
name: z4.string(),
|
|
531
542
|
limit: z4.string(),
|
|
543
|
+
limitValueType: CoverageValueTypeSchema.optional(),
|
|
532
544
|
deductible: z4.string().optional(),
|
|
545
|
+
deductibleValueType: CoverageValueTypeSchema.optional(),
|
|
546
|
+
formNumber: z4.string().optional(),
|
|
533
547
|
pageNumber: z4.number().optional(),
|
|
534
|
-
sectionRef: z4.string().optional()
|
|
548
|
+
sectionRef: z4.string().optional(),
|
|
549
|
+
originalContent: z4.string().optional()
|
|
535
550
|
});
|
|
536
551
|
var EnrichedCoverageSchema = z4.object({
|
|
537
552
|
name: z4.string(),
|
|
@@ -540,8 +555,10 @@ var EnrichedCoverageSchema = z4.object({
|
|
|
540
555
|
formEditionDate: z4.string().optional(),
|
|
541
556
|
limit: z4.string(),
|
|
542
557
|
limitType: LimitTypeSchema.optional(),
|
|
558
|
+
limitValueType: CoverageValueTypeSchema.optional(),
|
|
543
559
|
deductible: z4.string().optional(),
|
|
544
560
|
deductibleType: DeductibleTypeSchema.optional(),
|
|
561
|
+
deductibleValueType: CoverageValueTypeSchema.optional(),
|
|
545
562
|
sir: z4.string().optional(),
|
|
546
563
|
sublimit: z4.string().optional(),
|
|
547
564
|
coinsurance: z4.string().optional(),
|
|
@@ -552,7 +569,8 @@ var EnrichedCoverageSchema = z4.object({
|
|
|
552
569
|
included: z4.boolean(),
|
|
553
570
|
premium: z4.string().optional(),
|
|
554
571
|
pageNumber: z4.number().optional(),
|
|
555
|
-
sectionRef: z4.string().optional()
|
|
572
|
+
sectionRef: z4.string().optional(),
|
|
573
|
+
originalContent: z4.string().optional()
|
|
556
574
|
});
|
|
557
575
|
|
|
558
576
|
// src/schemas/endorsement.ts
|
|
@@ -1569,6 +1587,7 @@ function assembleDocument(documentId, documentType, memory) {
|
|
|
1569
1587
|
const lossHistory = memory.get("loss_history");
|
|
1570
1588
|
const sections = memory.get("sections");
|
|
1571
1589
|
const supplementary = memory.get("supplementary");
|
|
1590
|
+
const formInventory = memory.get("form_inventory");
|
|
1572
1591
|
const classify = memory.get("classify");
|
|
1573
1592
|
const base = {
|
|
1574
1593
|
id: documentId,
|
|
@@ -1585,6 +1604,7 @@ function assembleDocument(documentId, documentType, memory) {
|
|
|
1585
1604
|
exclusions: exclusions?.exclusions,
|
|
1586
1605
|
conditions: conditions?.conditions,
|
|
1587
1606
|
sections: sections?.sections,
|
|
1607
|
+
formInventory: formInventory?.forms,
|
|
1588
1608
|
declarations: declarations ? sanitizeNulls(declarations) : void 0,
|
|
1589
1609
|
...sanitizeNulls(lossHistory ?? {})
|
|
1590
1610
|
};
|
|
@@ -1826,6 +1846,11 @@ async function formatDocumentContent(doc, generateText, options) {
|
|
|
1826
1846
|
function chunkDocument(doc) {
|
|
1827
1847
|
const chunks = [];
|
|
1828
1848
|
const docId = doc.id;
|
|
1849
|
+
function stringMetadata(entries) {
|
|
1850
|
+
return Object.fromEntries(
|
|
1851
|
+
Object.entries(entries).filter(([, value]) => value !== void 0 && value !== null && String(value).length > 0).map(([key, value]) => [key, String(value)])
|
|
1852
|
+
);
|
|
1853
|
+
}
|
|
1829
1854
|
chunks.push({
|
|
1830
1855
|
id: `${docId}:carrier_info:0`,
|
|
1831
1856
|
documentId: docId,
|
|
@@ -1837,7 +1862,7 @@ function chunkDocument(doc) {
|
|
|
1837
1862
|
doc.carrierAmBestRating ? `AM Best: ${doc.carrierAmBestRating}` : null,
|
|
1838
1863
|
doc.mga ? `MGA: ${doc.mga}` : null
|
|
1839
1864
|
].filter(Boolean).join("\n"),
|
|
1840
|
-
metadata: { carrier: doc.carrier, documentType: doc.type }
|
|
1865
|
+
metadata: stringMetadata({ carrier: doc.carrier, documentType: doc.type })
|
|
1841
1866
|
});
|
|
1842
1867
|
chunks.push({
|
|
1843
1868
|
id: `${docId}:named_insured:0`,
|
|
@@ -1849,17 +1874,32 @@ function chunkDocument(doc) {
|
|
|
1849
1874
|
doc.insuredFein ? `FEIN: ${doc.insuredFein}` : null,
|
|
1850
1875
|
doc.insuredAddress ? `Address: ${doc.insuredAddress.street1}, ${doc.insuredAddress.city}, ${doc.insuredAddress.state} ${doc.insuredAddress.zip}` : null
|
|
1851
1876
|
].filter(Boolean).join("\n"),
|
|
1852
|
-
metadata: { insuredName: doc.insuredName, documentType: doc.type }
|
|
1877
|
+
metadata: stringMetadata({ insuredName: doc.insuredName, documentType: doc.type })
|
|
1853
1878
|
});
|
|
1854
1879
|
doc.coverages.forEach((cov, i) => {
|
|
1855
1880
|
chunks.push({
|
|
1856
1881
|
id: `${docId}:coverage:${i}`,
|
|
1857
1882
|
documentId: docId,
|
|
1858
1883
|
type: "coverage",
|
|
1859
|
-
text:
|
|
1860
|
-
|
|
1861
|
-
|
|
1862
|
-
|
|
1884
|
+
text: [
|
|
1885
|
+
`Coverage: ${cov.name}`,
|
|
1886
|
+
`Limit: ${cov.limit}`,
|
|
1887
|
+
cov.limitValueType ? `Limit Type: ${cov.limitValueType}` : null,
|
|
1888
|
+
cov.deductible ? `Deductible: ${cov.deductible}` : null,
|
|
1889
|
+
cov.deductibleValueType ? `Deductible Type: ${cov.deductibleValueType}` : null,
|
|
1890
|
+
cov.originalContent ? `Source: ${cov.originalContent}` : null
|
|
1891
|
+
].filter(Boolean).join("\n"),
|
|
1892
|
+
metadata: stringMetadata({
|
|
1893
|
+
coverageName: cov.name,
|
|
1894
|
+
limit: cov.limit,
|
|
1895
|
+
limitValueType: cov.limitValueType,
|
|
1896
|
+
deductible: cov.deductible,
|
|
1897
|
+
deductibleValueType: cov.deductibleValueType,
|
|
1898
|
+
formNumber: cov.formNumber,
|
|
1899
|
+
pageNumber: cov.pageNumber,
|
|
1900
|
+
sectionRef: cov.sectionRef,
|
|
1901
|
+
documentType: doc.type
|
|
1902
|
+
})
|
|
1863
1903
|
});
|
|
1864
1904
|
});
|
|
1865
1905
|
doc.endorsements?.forEach((end, i) => {
|
|
@@ -1869,7 +1909,13 @@ Deductible: ${cov.deductible}` : ""}`,
|
|
|
1869
1909
|
type: "endorsement",
|
|
1870
1910
|
text: `Endorsement: ${end.title}
|
|
1871
1911
|
${end.content}`.trim(),
|
|
1872
|
-
metadata: {
|
|
1912
|
+
metadata: stringMetadata({
|
|
1913
|
+
endorsementType: end.endorsementType,
|
|
1914
|
+
formNumber: end.formNumber,
|
|
1915
|
+
pageStart: end.pageStart,
|
|
1916
|
+
pageEnd: end.pageEnd,
|
|
1917
|
+
documentType: doc.type
|
|
1918
|
+
})
|
|
1873
1919
|
});
|
|
1874
1920
|
});
|
|
1875
1921
|
doc.exclusions?.forEach((exc, i) => {
|
|
@@ -1879,7 +1925,7 @@ ${end.content}`.trim(),
|
|
|
1879
1925
|
type: "exclusion",
|
|
1880
1926
|
text: `Exclusion: ${exc.name}
|
|
1881
1927
|
${exc.content}`.trim(),
|
|
1882
|
-
metadata: { documentType: doc.type }
|
|
1928
|
+
metadata: stringMetadata({ formNumber: exc.formNumber, pageNumber: exc.pageNumber, documentType: doc.type })
|
|
1883
1929
|
});
|
|
1884
1930
|
});
|
|
1885
1931
|
doc.sections?.forEach((sec, i) => {
|
|
@@ -1889,7 +1935,7 @@ ${exc.content}`.trim(),
|
|
|
1889
1935
|
type: "section",
|
|
1890
1936
|
text: `Section: ${sec.title}
|
|
1891
1937
|
${sec.content}`,
|
|
1892
|
-
metadata: { sectionType: sec.type, documentType: doc.type }
|
|
1938
|
+
metadata: stringMetadata({ sectionType: sec.type, pageStart: sec.pageStart, pageEnd: sec.pageEnd, documentType: doc.type })
|
|
1893
1939
|
});
|
|
1894
1940
|
});
|
|
1895
1941
|
if (doc.premium) {
|
|
@@ -1899,7 +1945,7 @@ ${sec.content}`,
|
|
|
1899
1945
|
type: "premium",
|
|
1900
1946
|
text: `Premium: ${doc.premium}${doc.totalCost ? `
|
|
1901
1947
|
Total Cost: ${doc.totalCost}` : ""}`,
|
|
1902
|
-
metadata: { premium: doc.premium, documentType: doc.type }
|
|
1948
|
+
metadata: stringMetadata({ premium: doc.premium, documentType: doc.type })
|
|
1903
1949
|
});
|
|
1904
1950
|
}
|
|
1905
1951
|
return chunks;
|
|
@@ -1951,12 +1997,19 @@ function mergeCoverageLimits(existing, incoming) {
|
|
|
1951
1997
|
const merged = mergeShallowPreferPresent(existing, incoming);
|
|
1952
1998
|
const existingCoverages = Array.isArray(existing.coverages) ? existing.coverages : [];
|
|
1953
1999
|
const incomingCoverages = Array.isArray(incoming.coverages) ? incoming.coverages : [];
|
|
1954
|
-
|
|
2000
|
+
const coverageKey = (coverage) => [
|
|
1955
2001
|
String(coverage.name ?? "").toLowerCase(),
|
|
1956
2002
|
String(coverage.limit ?? "").toLowerCase(),
|
|
1957
2003
|
String(coverage.deductible ?? "").toLowerCase(),
|
|
1958
2004
|
String(coverage.formNumber ?? "").toLowerCase()
|
|
1959
|
-
].join("|")
|
|
2005
|
+
].join("|");
|
|
2006
|
+
const byKey = /* @__PURE__ */ new Map();
|
|
2007
|
+
for (const coverage of [...existingCoverages, ...incomingCoverages]) {
|
|
2008
|
+
const key = coverageKey(coverage);
|
|
2009
|
+
const current = byKey.get(key);
|
|
2010
|
+
byKey.set(key, current ? mergeShallowPreferPresent(current, coverage) : coverage);
|
|
2011
|
+
}
|
|
2012
|
+
merged.coverages = [...byKey.values()];
|
|
1960
2013
|
return merged;
|
|
1961
2014
|
}
|
|
1962
2015
|
function mergeDeclarations(existing, incoming) {
|
|
@@ -2813,9 +2866,45 @@ Return JSON only:
|
|
|
2813
2866
|
}`;
|
|
2814
2867
|
}
|
|
2815
2868
|
|
|
2816
|
-
// src/prompts/coordinator/
|
|
2869
|
+
// src/prompts/coordinator/form-inventory.ts
|
|
2817
2870
|
import { z as z19 } from "zod";
|
|
2818
|
-
var
|
|
2871
|
+
var FormInventoryEntrySchema = FormReferenceSchema.extend({
|
|
2872
|
+
formNumber: FormReferenceSchema.shape.formNumber.describe("Form number or identifier, e.g. PR5070CF"),
|
|
2873
|
+
pageStart: FormReferenceSchema.shape.pageStart.describe("Original document page where the form begins"),
|
|
2874
|
+
pageEnd: FormReferenceSchema.shape.pageEnd.describe("Original document page where the form ends")
|
|
2875
|
+
});
|
|
2876
|
+
var FormInventorySchema = z19.object({
|
|
2877
|
+
forms: z19.array(FormInventoryEntrySchema)
|
|
2878
|
+
});
|
|
2879
|
+
function buildFormInventoryPrompt(templateHints) {
|
|
2880
|
+
return `You are building a form inventory for an insurance document.
|
|
2881
|
+
|
|
2882
|
+
DOCUMENT TYPE HINTS:
|
|
2883
|
+
${templateHints}
|
|
2884
|
+
|
|
2885
|
+
Extract every distinct declarations page set, policy form, coverage form, endorsement, application form, and notice form that appears in the document.
|
|
2886
|
+
|
|
2887
|
+
For EACH form, extract:
|
|
2888
|
+
- formNumber: REQUIRED when present
|
|
2889
|
+
- editionDate: if shown
|
|
2890
|
+
- title: if shown
|
|
2891
|
+
- formType: one of coverage, endorsement, declarations, application, notice, other
|
|
2892
|
+
- pageStart: original page where the form begins
|
|
2893
|
+
- pageEnd: original page where the form ends
|
|
2894
|
+
|
|
2895
|
+
Critical rules:
|
|
2896
|
+
- Include declarations page sets even if they do not show a standard form number.
|
|
2897
|
+
- Use original document page numbers, not local chunk page numbers.
|
|
2898
|
+
- Do not emit duplicate entries for repeated headers/footers.
|
|
2899
|
+
- Multi-page forms should be represented once with pageStart/pageEnd covering the full span when visible.
|
|
2900
|
+
- If a form number is visible in endorsements, schedules, or form headers, include it even if the full form title is partial.
|
|
2901
|
+
|
|
2902
|
+
Respond with JSON only.`;
|
|
2903
|
+
}
|
|
2904
|
+
|
|
2905
|
+
// src/prompts/coordinator/page-map.ts
|
|
2906
|
+
import { z as z20 } from "zod";
|
|
2907
|
+
var PageExtractorSchema = z20.enum([
|
|
2819
2908
|
"carrier_info",
|
|
2820
2909
|
"named_insured",
|
|
2821
2910
|
"coverage_limits",
|
|
@@ -2828,23 +2917,37 @@ var PageExtractorSchema = z19.enum([
|
|
|
2828
2917
|
"sections",
|
|
2829
2918
|
"supplementary"
|
|
2830
2919
|
]);
|
|
2831
|
-
var PageAssignmentSchema =
|
|
2832
|
-
localPageNumber:
|
|
2833
|
-
extractorNames:
|
|
2834
|
-
|
|
2835
|
-
|
|
2920
|
+
var PageAssignmentSchema = z20.object({
|
|
2921
|
+
localPageNumber: z20.number().int().positive().describe("1-based page number within this supplied PDF chunk"),
|
|
2922
|
+
extractorNames: z20.array(PageExtractorSchema).describe("Focused extractors that should inspect this page"),
|
|
2923
|
+
pageRole: z20.enum([
|
|
2924
|
+
"declarations_schedule",
|
|
2925
|
+
"endorsement_schedule",
|
|
2926
|
+
"policy_form",
|
|
2927
|
+
"endorsement_form",
|
|
2928
|
+
"condition_exclusion_form",
|
|
2929
|
+
"supplementary",
|
|
2930
|
+
"other"
|
|
2931
|
+
]).optional().describe("Primary role of the page"),
|
|
2932
|
+
hasScheduleValues: z20.boolean().optional().describe("True only when the page contains insured-specific declaration or schedule values, tables, or rows to extract"),
|
|
2933
|
+
confidence: z20.number().min(0).max(1).optional().describe("Confidence in the page assignment"),
|
|
2934
|
+
notes: z20.string().optional().describe("Short explanation of what appears on the page")
|
|
2836
2935
|
});
|
|
2837
|
-
var PageMapChunkSchema =
|
|
2838
|
-
pages:
|
|
2936
|
+
var PageMapChunkSchema = z20.object({
|
|
2937
|
+
pages: z20.array(PageAssignmentSchema)
|
|
2839
2938
|
});
|
|
2840
|
-
function buildPageMapPrompt(templateHints, startPage, endPage) {
|
|
2939
|
+
function buildPageMapPrompt(templateHints, startPage, endPage, formInventoryHint) {
|
|
2940
|
+
const inventoryBlock = formInventoryHint ? `
|
|
2941
|
+
FORM INVENTORY (already identified \u2014 use this to constrain your assignments):
|
|
2942
|
+
${formInventoryHint}
|
|
2943
|
+
` : "";
|
|
2841
2944
|
return `You are mapping insurance document pages to focused extractors.
|
|
2842
2945
|
|
|
2843
2946
|
These supplied pages are ORIGINAL DOCUMENT PAGES ${startPage}-${endPage}.
|
|
2844
2947
|
|
|
2845
2948
|
DOCUMENT TYPE HINTS:
|
|
2846
2949
|
${templateHints}
|
|
2847
|
-
|
|
2950
|
+
${inventoryBlock}
|
|
2848
2951
|
For each page in this supplied PDF chunk, decide which extractor(s) should inspect it.
|
|
2849
2952
|
|
|
2850
2953
|
Available extractors:
|
|
@@ -2866,7 +2969,11 @@ Rules:
|
|
|
2866
2969
|
- Avoid assigning broad ranges mentally; decide page by page.
|
|
2867
2970
|
- A page may map to multiple extractors if it legitimately contains multiple relevant sections.
|
|
2868
2971
|
- Prefer declarations and schedules for numeric limits/deductibles over later generic form wording.
|
|
2869
|
-
-
|
|
2972
|
+
- Assign "coverage_limits" only when the page itself contains insured-specific declaration or schedule values to capture, such as location/building rows, coverage tables, limits, deductibles, coinsurance percentages, or scheduled amounts tied to this policy.
|
|
2973
|
+
- Do NOT assign "coverage_limits" for generic policy-form or endorsement text that merely explains how limits, deductibles, waiting periods, or coinsurance work, or that says values are "shown in the declarations", "shown in the schedule", "as stated", or "if applicable".
|
|
2974
|
+
- Headings like "Limits of Insurance", "Deductible", "Coinsurance", "Loss Conditions", or "Definitions" inside a policy form usually indicate form language, not declarations or schedules.
|
|
2975
|
+
- Continuation pages near the end of a form should stay mapped to "sections" plus "conditions"/"exclusions" when applicable, even if they mention limits or deductibles.
|
|
2976
|
+
- When a form inventory entry identifies a page range as a specific form type (e.g., endorsement, coverage, application), use that classification to guide your extractor choice. Do not assign "coverage_limits" to pages the inventory identifies as endorsement or condition/exclusion forms unless the page contains actual schedule values.
|
|
2870
2977
|
- Return every page in the supplied chunk exactly once.
|
|
2871
2978
|
|
|
2872
2979
|
Return JSON:
|
|
@@ -2875,6 +2982,8 @@ Return JSON:
|
|
|
2875
2982
|
{
|
|
2876
2983
|
"localPageNumber": 1,
|
|
2877
2984
|
"extractorNames": ["declarations", "carrier_info", "named_insured", "coverage_limits"],
|
|
2985
|
+
"pageRole": "declarations_schedule",
|
|
2986
|
+
"hasScheduleValues": true,
|
|
2878
2987
|
"confidence": 0.96,
|
|
2879
2988
|
"notes": "Declarations page with insured, policy period, and scheduled limits"
|
|
2880
2989
|
}
|
|
@@ -2883,18 +2992,26 @@ Return JSON:
|
|
|
2883
2992
|
|
|
2884
2993
|
Respond with JSON only.`;
|
|
2885
2994
|
}
|
|
2995
|
+
function formatFormInventoryForPageMap(forms) {
|
|
2996
|
+
if (forms.length === 0) return "";
|
|
2997
|
+
return forms.filter((f) => f.pageStart != null).map((f) => {
|
|
2998
|
+
const range = f.pageEnd && f.pageEnd !== f.pageStart ? `pages ${f.pageStart}-${f.pageEnd}` : `page ${f.pageStart}`;
|
|
2999
|
+
const title = f.title ? ` "${f.title}"` : "";
|
|
3000
|
+
return `- ${f.formNumber}${title} [${f.formType}] \u2192 ${range}`;
|
|
3001
|
+
}).join("\n");
|
|
3002
|
+
}
|
|
2886
3003
|
|
|
2887
3004
|
// src/prompts/coordinator/review.ts
|
|
2888
|
-
import { z as
|
|
2889
|
-
var ReviewResultSchema =
|
|
2890
|
-
complete:
|
|
2891
|
-
missingFields:
|
|
2892
|
-
qualityIssues:
|
|
2893
|
-
additionalTasks:
|
|
2894
|
-
extractorName:
|
|
2895
|
-
startPage:
|
|
2896
|
-
endPage:
|
|
2897
|
-
description:
|
|
3005
|
+
import { z as z21 } from "zod";
|
|
3006
|
+
var ReviewResultSchema = z21.object({
|
|
3007
|
+
complete: z21.boolean(),
|
|
3008
|
+
missingFields: z21.array(z21.string()),
|
|
3009
|
+
qualityIssues: z21.array(z21.string()).optional(),
|
|
3010
|
+
additionalTasks: z21.array(z21.object({
|
|
3011
|
+
extractorName: z21.string(),
|
|
3012
|
+
startPage: z21.number(),
|
|
3013
|
+
endPage: z21.number(),
|
|
3014
|
+
description: z21.string()
|
|
2898
3015
|
}))
|
|
2899
3016
|
});
|
|
2900
3017
|
function buildReviewPrompt(templateExpected, extractedKeys, extractionSummary, pageMapSummary) {
|
|
@@ -2941,20 +3058,20 @@ Respond with JSON only.`;
|
|
|
2941
3058
|
}
|
|
2942
3059
|
|
|
2943
3060
|
// src/prompts/extractors/carrier-info.ts
|
|
2944
|
-
import { z as
|
|
2945
|
-
var CarrierInfoSchema =
|
|
2946
|
-
carrierName:
|
|
2947
|
-
carrierLegalName:
|
|
2948
|
-
naicNumber:
|
|
2949
|
-
amBestRating:
|
|
2950
|
-
admittedStatus:
|
|
2951
|
-
mga:
|
|
2952
|
-
underwriter:
|
|
2953
|
-
policyNumber:
|
|
2954
|
-
effectiveDate:
|
|
2955
|
-
expirationDate:
|
|
2956
|
-
quoteNumber:
|
|
2957
|
-
proposedEffectiveDate:
|
|
3061
|
+
import { z as z22 } from "zod";
|
|
3062
|
+
var CarrierInfoSchema = z22.object({
|
|
3063
|
+
carrierName: z22.string().describe("Primary insurance company name for display"),
|
|
3064
|
+
carrierLegalName: z22.string().optional().describe("Legal entity name of insurer"),
|
|
3065
|
+
naicNumber: z22.string().optional().describe("NAIC company code"),
|
|
3066
|
+
amBestRating: z22.string().optional().describe("AM Best rating, e.g. 'A+ XV'"),
|
|
3067
|
+
admittedStatus: z22.enum(["admitted", "non_admitted", "surplus_lines"]).optional().describe("Admitted status of the carrier"),
|
|
3068
|
+
mga: z22.string().optional().describe("Managing General Agent or Program Administrator name"),
|
|
3069
|
+
underwriter: z22.string().optional().describe("Named individual underwriter"),
|
|
3070
|
+
policyNumber: z22.string().optional().describe("Policy or quote reference number"),
|
|
3071
|
+
effectiveDate: z22.string().optional().describe("Policy effective date (MM/DD/YYYY)"),
|
|
3072
|
+
expirationDate: z22.string().optional().describe("Policy expiration date (MM/DD/YYYY)"),
|
|
3073
|
+
quoteNumber: z22.string().optional().describe("Quote or proposal reference number"),
|
|
3074
|
+
proposedEffectiveDate: z22.string().optional().describe("Proposed effective date for quotes (MM/DD/YYYY)")
|
|
2958
3075
|
});
|
|
2959
3076
|
function buildCarrierInfoPrompt() {
|
|
2960
3077
|
return `You are an expert insurance document analyst. Extract carrier and policy identification information from this document.
|
|
@@ -2974,18 +3091,18 @@ Return JSON only.`;
|
|
|
2974
3091
|
}
|
|
2975
3092
|
|
|
2976
3093
|
// src/prompts/extractors/named-insured.ts
|
|
2977
|
-
import { z as
|
|
2978
|
-
var AddressSchema2 =
|
|
2979
|
-
street1:
|
|
2980
|
-
city:
|
|
2981
|
-
state:
|
|
2982
|
-
zip:
|
|
3094
|
+
import { z as z23 } from "zod";
|
|
3095
|
+
var AddressSchema2 = z23.object({
|
|
3096
|
+
street1: z23.string(),
|
|
3097
|
+
city: z23.string(),
|
|
3098
|
+
state: z23.string(),
|
|
3099
|
+
zip: z23.string()
|
|
2983
3100
|
});
|
|
2984
|
-
var NamedInsuredSchema2 =
|
|
2985
|
-
insuredName:
|
|
2986
|
-
insuredDba:
|
|
3101
|
+
var NamedInsuredSchema2 = z23.object({
|
|
3102
|
+
insuredName: z23.string().describe("Name of primary named insured"),
|
|
3103
|
+
insuredDba: z23.string().optional().describe("Doing-business-as name"),
|
|
2987
3104
|
insuredAddress: AddressSchema2.optional().describe("Primary insured mailing address"),
|
|
2988
|
-
insuredEntityType:
|
|
3105
|
+
insuredEntityType: z23.enum([
|
|
2989
3106
|
"corporation",
|
|
2990
3107
|
"llc",
|
|
2991
3108
|
"partnership",
|
|
@@ -2998,13 +3115,13 @@ var NamedInsuredSchema2 = z22.object({
|
|
|
2998
3115
|
"married_couple",
|
|
2999
3116
|
"other"
|
|
3000
3117
|
]).optional().describe("Legal entity type of the insured"),
|
|
3001
|
-
insuredFein:
|
|
3002
|
-
insuredSicCode:
|
|
3003
|
-
insuredNaicsCode:
|
|
3004
|
-
additionalNamedInsureds:
|
|
3005
|
-
|
|
3006
|
-
name:
|
|
3007
|
-
relationship:
|
|
3118
|
+
insuredFein: z23.string().optional().describe("Federal Employer Identification Number"),
|
|
3119
|
+
insuredSicCode: z23.string().optional().describe("SIC code"),
|
|
3120
|
+
insuredNaicsCode: z23.string().optional().describe("NAICS code"),
|
|
3121
|
+
additionalNamedInsureds: z23.array(
|
|
3122
|
+
z23.object({
|
|
3123
|
+
name: z23.string(),
|
|
3124
|
+
relationship: z23.string().optional().describe("e.g. subsidiary, affiliate"),
|
|
3008
3125
|
address: AddressSchema2.optional()
|
|
3009
3126
|
})
|
|
3010
3127
|
).optional().describe("Additional named insureds listed on the policy")
|
|
@@ -3025,23 +3142,20 @@ Return JSON only.`;
|
|
|
3025
3142
|
}
|
|
3026
3143
|
|
|
3027
3144
|
// src/prompts/extractors/coverage-limits.ts
|
|
3028
|
-
import { z as
|
|
3029
|
-
var
|
|
3030
|
-
|
|
3031
|
-
|
|
3032
|
-
|
|
3033
|
-
|
|
3034
|
-
|
|
3035
|
-
|
|
3036
|
-
formNumber: z23.string().optional().describe("Associated form number, e.g. 'CG 00 01'")
|
|
3037
|
-
})
|
|
3038
|
-
).describe("All coverages with their limits"),
|
|
3039
|
-
coverageForm: z23.enum(["occurrence", "claims_made", "accident"]).optional().describe("Primary coverage trigger type"),
|
|
3040
|
-
retroactiveDate: z23.string().optional().describe("Retroactive date for claims-made policies (MM/DD/YYYY)")
|
|
3145
|
+
import { z as z24 } from "zod";
|
|
3146
|
+
var ExtractorCoverageSchema = CoverageSchema.extend({
|
|
3147
|
+
coverageCode: z24.string().optional().describe("Coverage code or class code")
|
|
3148
|
+
});
|
|
3149
|
+
var CoverageLimitsSchema = z24.object({
|
|
3150
|
+
coverages: z24.array(ExtractorCoverageSchema).describe("All coverages with their limits"),
|
|
3151
|
+
coverageForm: z24.enum(["occurrence", "claims_made", "accident"]).optional().describe("Primary coverage trigger type"),
|
|
3152
|
+
retroactiveDate: z24.string().optional().describe("Retroactive date for claims-made policies (MM/DD/YYYY)")
|
|
3041
3153
|
});
|
|
3042
3154
|
function buildCoverageLimitsPrompt() {
|
|
3043
3155
|
return `You are an expert insurance document analyst. Extract all coverage limits and deductibles from this document.
|
|
3044
3156
|
|
|
3157
|
+
Extract only insured-specific declaration, schedule, or endorsement entries that state actual coverage terms for this policy.
|
|
3158
|
+
|
|
3045
3159
|
Focus on:
|
|
3046
3160
|
- Every coverage listed on the declarations page or coverage schedule
|
|
3047
3161
|
- Per-occurrence, aggregate, and sub-limits for each coverage
|
|
@@ -3052,20 +3166,34 @@ Focus on:
|
|
|
3052
3166
|
- Standard limit fields: per occurrence, general aggregate, products/completed ops aggregate, personal & advertising injury, fire damage, medical expense, combined single limit, BI/PD splits, umbrella each occurrence/aggregate/retention, statutory (WC), employers liability
|
|
3053
3167
|
- Defense cost treatment: inside limits, outside limits, or supplementary
|
|
3054
3168
|
|
|
3055
|
-
|
|
3169
|
+
For EACH coverage, also extract:
|
|
3170
|
+
- pageNumber: the original page number where the coverage row/value appears
|
|
3171
|
+
- sectionRef: the declarations/schedule/endorsement section heading where it appears
|
|
3172
|
+
- originalContent: the verbatim row or short source snippet used for this coverage
|
|
3173
|
+
- limitValueType: classify the limit as numeric, included, not_included, as_stated, waiting_period, referential, or other
|
|
3174
|
+
- deductibleValueType: classify the deductible/value term similarly when deductible is present
|
|
3175
|
+
|
|
3176
|
+
Critical rules:
|
|
3177
|
+
- Do not extract table-of-contents lines, index entries, headers, footers, page labels, or cross-references as coverages.
|
|
3178
|
+
- Do not create a coverage entry from generic policy-form text that only says a limit/deductible is "shown in the declarations", "shown in the Business Income Declarations", "as stated", "if applicable", or similar referential wording.
|
|
3179
|
+
- Do not treat a generic waiting period, deductible explanation, limits clause, coinsurance clause, or definitions text as a standalone coverage unless the page contains an actual policy-specific schedule row or declaration entry.
|
|
3180
|
+
- Values like "Included" or "Not Included" are valid only when they appear as an explicit declarations/schedule/endorsement entry for a named coverage. Do not infer them from narrative form language.
|
|
3181
|
+
- If a waiting period or hour deductible is shown as part of a specific declarations/schedule row, it may be captured in deductible. Otherwise omit it.
|
|
3182
|
+
- Use limitValueType or deductibleValueType to preserve non-numeric terms precisely instead of forcing them into numeric semantics.
|
|
3183
|
+
- Preserve one row per real coverage entry. Do not merge adjacent schedule rows into malformed names.
|
|
3056
3184
|
|
|
3057
3185
|
Return JSON only.`;
|
|
3058
3186
|
}
|
|
3059
3187
|
|
|
3060
3188
|
// src/prompts/extractors/endorsements.ts
|
|
3061
|
-
import { z as
|
|
3062
|
-
var EndorsementsSchema =
|
|
3063
|
-
endorsements:
|
|
3064
|
-
|
|
3065
|
-
formNumber:
|
|
3066
|
-
editionDate:
|
|
3067
|
-
title:
|
|
3068
|
-
endorsementType:
|
|
3189
|
+
import { z as z25 } from "zod";
|
|
3190
|
+
var EndorsementsSchema = z25.object({
|
|
3191
|
+
endorsements: z25.array(
|
|
3192
|
+
z25.object({
|
|
3193
|
+
formNumber: z25.string().describe("Form number, e.g. 'CG 21 47'"),
|
|
3194
|
+
editionDate: z25.string().optional().describe("Edition date, e.g. '12 07'"),
|
|
3195
|
+
title: z25.string().describe("Endorsement title"),
|
|
3196
|
+
endorsementType: z25.enum([
|
|
3069
3197
|
"additional_insured",
|
|
3070
3198
|
"waiver_of_subrogation",
|
|
3071
3199
|
"primary_noncontributory",
|
|
@@ -3085,12 +3213,12 @@ var EndorsementsSchema = z24.object({
|
|
|
3085
3213
|
"territorial_extension",
|
|
3086
3214
|
"other"
|
|
3087
3215
|
]).describe("Endorsement type classification"),
|
|
3088
|
-
effectiveDate:
|
|
3089
|
-
affectedCoverageParts:
|
|
3090
|
-
namedParties:
|
|
3091
|
-
|
|
3092
|
-
name:
|
|
3093
|
-
role:
|
|
3216
|
+
effectiveDate: z25.string().optional().describe("Endorsement effective date"),
|
|
3217
|
+
affectedCoverageParts: z25.array(z25.string()).optional().describe("Coverage parts affected by this endorsement"),
|
|
3218
|
+
namedParties: z25.array(
|
|
3219
|
+
z25.object({
|
|
3220
|
+
name: z25.string().describe("Party name"),
|
|
3221
|
+
role: z25.enum([
|
|
3094
3222
|
"additional_insured",
|
|
3095
3223
|
"loss_payee",
|
|
3096
3224
|
"mortgage_holder",
|
|
@@ -3099,15 +3227,15 @@ var EndorsementsSchema = z24.object({
|
|
|
3099
3227
|
"designated_person",
|
|
3100
3228
|
"other"
|
|
3101
3229
|
]).describe("Party role"),
|
|
3102
|
-
relationship:
|
|
3103
|
-
scope:
|
|
3230
|
+
relationship: z25.string().optional().describe("Relationship to insured"),
|
|
3231
|
+
scope: z25.string().optional().describe("Scope of coverage for this party")
|
|
3104
3232
|
})
|
|
3105
3233
|
).optional().describe("Named parties (additional insureds, loss payees, etc.)"),
|
|
3106
|
-
keyTerms:
|
|
3107
|
-
premiumImpact:
|
|
3108
|
-
content:
|
|
3109
|
-
pageStart:
|
|
3110
|
-
pageEnd:
|
|
3234
|
+
keyTerms: z25.array(z25.string()).optional().describe("Key terms or notable provisions in the endorsement"),
|
|
3235
|
+
premiumImpact: z25.string().optional().describe("Additional premium or credit"),
|
|
3236
|
+
content: z25.string().describe("Full verbatim text of the endorsement"),
|
|
3237
|
+
pageStart: z25.number().describe("Starting page number of this endorsement"),
|
|
3238
|
+
pageEnd: z25.number().optional().describe("Ending page number of this endorsement")
|
|
3111
3239
|
})
|
|
3112
3240
|
).describe("All endorsements found in the document")
|
|
3113
3241
|
});
|
|
@@ -3138,20 +3266,20 @@ Return JSON only.`;
|
|
|
3138
3266
|
}
|
|
3139
3267
|
|
|
3140
3268
|
// src/prompts/extractors/exclusions.ts
|
|
3141
|
-
import { z as
|
|
3142
|
-
var ExclusionsSchema =
|
|
3143
|
-
exclusions:
|
|
3144
|
-
|
|
3145
|
-
name:
|
|
3146
|
-
formNumber:
|
|
3147
|
-
excludedPerils:
|
|
3148
|
-
isAbsolute:
|
|
3149
|
-
exceptions:
|
|
3150
|
-
buybackAvailable:
|
|
3151
|
-
buybackEndorsement:
|
|
3152
|
-
appliesTo:
|
|
3153
|
-
content:
|
|
3154
|
-
pageNumber:
|
|
3269
|
+
import { z as z26 } from "zod";
|
|
3270
|
+
var ExclusionsSchema = z26.object({
|
|
3271
|
+
exclusions: z26.array(
|
|
3272
|
+
z26.object({
|
|
3273
|
+
name: z26.string().describe("Exclusion title or short description"),
|
|
3274
|
+
formNumber: z26.string().optional().describe("Form number if part of a named endorsement"),
|
|
3275
|
+
excludedPerils: z26.array(z26.string()).optional().describe("Specific perils excluded"),
|
|
3276
|
+
isAbsolute: z26.boolean().optional().describe("Whether the exclusion is absolute (no exceptions)"),
|
|
3277
|
+
exceptions: z26.array(z26.string()).optional().describe("Exceptions to the exclusion, if any"),
|
|
3278
|
+
buybackAvailable: z26.boolean().optional().describe("Whether coverage can be bought back via endorsement"),
|
|
3279
|
+
buybackEndorsement: z26.string().optional().describe("Form number of the buyback endorsement if available"),
|
|
3280
|
+
appliesTo: z26.array(z26.string()).optional().describe("Coverage types this exclusion applies to"),
|
|
3281
|
+
content: z26.string().describe("Full verbatim exclusion text"),
|
|
3282
|
+
pageNumber: z26.number().optional().describe("Page number where exclusion appears")
|
|
3155
3283
|
})
|
|
3156
3284
|
).describe("All exclusions found in the document")
|
|
3157
3285
|
});
|
|
@@ -3176,18 +3304,23 @@ Focus on:
|
|
|
3176
3304
|
- Exclusions within insuring agreements or conditions if clearly labeled
|
|
3177
3305
|
- Full verbatim exclusion text \u2014 do not summarize
|
|
3178
3306
|
|
|
3307
|
+
Critical rules:
|
|
3308
|
+
- Ignore table-of-contents entries, running headers/footers, and references that only point to another page or section.
|
|
3309
|
+
- Do not emit a standalone exclusion from a fragment unless the fragment itself contains substantive exclusion wording.
|
|
3310
|
+
- Always include pageNumber when the exclusion appears on a specific page in the supplied document chunk.
|
|
3311
|
+
|
|
3179
3312
|
Common personal lines exclusion patterns: animal liability, business pursuits, home daycare, watercraft, aircraft.
|
|
3180
3313
|
|
|
3181
3314
|
Return JSON only.`;
|
|
3182
3315
|
}
|
|
3183
3316
|
|
|
3184
3317
|
// src/prompts/extractors/conditions.ts
|
|
3185
|
-
import { z as
|
|
3186
|
-
var ConditionsSchema =
|
|
3187
|
-
conditions:
|
|
3188
|
-
|
|
3189
|
-
name:
|
|
3190
|
-
conditionType:
|
|
3318
|
+
import { z as z27 } from "zod";
|
|
3319
|
+
var ConditionsSchema = z27.object({
|
|
3320
|
+
conditions: z27.array(
|
|
3321
|
+
z27.object({
|
|
3322
|
+
name: z27.string().describe("Condition title"),
|
|
3323
|
+
conditionType: z27.enum([
|
|
3191
3324
|
"duties_after_loss",
|
|
3192
3325
|
"notice_requirements",
|
|
3193
3326
|
"other_insurance",
|
|
@@ -3206,14 +3339,14 @@ var ConditionsSchema = z26.object({
|
|
|
3206
3339
|
"separation_of_insureds",
|
|
3207
3340
|
"other"
|
|
3208
3341
|
]).describe("Condition category"),
|
|
3209
|
-
content:
|
|
3210
|
-
keyValues:
|
|
3211
|
-
|
|
3212
|
-
key:
|
|
3213
|
-
value:
|
|
3342
|
+
content: z27.string().describe("Full verbatim condition text"),
|
|
3343
|
+
keyValues: z27.array(
|
|
3344
|
+
z27.object({
|
|
3345
|
+
key: z27.string().describe("Key name (e.g. 'noticePeriod', 'suitDeadline')"),
|
|
3346
|
+
value: z27.string().describe("Value (e.g. '30 days', '2 years')")
|
|
3214
3347
|
})
|
|
3215
3348
|
).optional().describe("Key values extracted from the condition (notice periods, deadlines, etc.)"),
|
|
3216
|
-
pageNumber:
|
|
3349
|
+
pageNumber: z27.number().optional().describe("Page number where condition appears")
|
|
3217
3350
|
})
|
|
3218
3351
|
).describe("All policy conditions found in the document")
|
|
3219
3352
|
});
|
|
@@ -3225,7 +3358,7 @@ For EACH condition, extract:
|
|
|
3225
3358
|
- conditionType: classify as one of: duties_after_loss, notice_requirements, other_insurance, cancellation, nonrenewal, transfer_of_rights, liberalization, arbitration, concealment_fraud, examination_under_oath, legal_action, loss_payment, appraisal, mortgage_holders, policy_territory, separation_of_insureds, other \u2014 REQUIRED
|
|
3226
3359
|
- content: full verbatim condition text \u2014 REQUIRED
|
|
3227
3360
|
- keyValues: extract specific values as key-value pairs (e.g. noticePeriod: "30 days", suitDeadline: "2 years")
|
|
3228
|
-
- pageNumber: page number where the condition appears
|
|
3361
|
+
- pageNumber: original document page number where the substantive condition text appears
|
|
3229
3362
|
|
|
3230
3363
|
Focus on:
|
|
3231
3364
|
- Duties after loss / notice of occurrence conditions
|
|
@@ -3242,32 +3375,37 @@ Focus on:
|
|
|
3242
3375
|
- Mortgage holders clause
|
|
3243
3376
|
- Any other named conditions
|
|
3244
3377
|
|
|
3378
|
+
Critical rules:
|
|
3379
|
+
- Ignore table-of-contents entries, section indexes, running headers/footers, and page references such as "Appraisal ..... 19".
|
|
3380
|
+
- Do not emit a condition unless the page contains substantive condition text, not just a heading or reference.
|
|
3381
|
+
- If a condition continues from a prior page, keep the substantive text together and use the page where the condition text appears in this extracted chunk.
|
|
3382
|
+
|
|
3245
3383
|
Return JSON only.`;
|
|
3246
3384
|
}
|
|
3247
3385
|
|
|
3248
3386
|
// src/prompts/extractors/premium-breakdown.ts
|
|
3249
|
-
import { z as
|
|
3250
|
-
var PremiumBreakdownSchema =
|
|
3251
|
-
premium:
|
|
3252
|
-
totalCost:
|
|
3253
|
-
premiumBreakdown:
|
|
3254
|
-
|
|
3255
|
-
line:
|
|
3256
|
-
amount:
|
|
3387
|
+
import { z as z28 } from "zod";
|
|
3388
|
+
var PremiumBreakdownSchema = z28.object({
|
|
3389
|
+
premium: z28.string().optional().describe("Total premium amount, e.g. '$5,000'"),
|
|
3390
|
+
totalCost: z28.string().optional().describe("Total cost including taxes and fees, e.g. '$5,250'"),
|
|
3391
|
+
premiumBreakdown: z28.array(
|
|
3392
|
+
z28.object({
|
|
3393
|
+
line: z28.string().describe("Coverage line name"),
|
|
3394
|
+
amount: z28.string().describe("Premium amount for this line")
|
|
3257
3395
|
})
|
|
3258
3396
|
).optional().describe("Per-coverage-line premium breakdown"),
|
|
3259
|
-
taxesAndFees:
|
|
3260
|
-
|
|
3261
|
-
name:
|
|
3262
|
-
amount:
|
|
3263
|
-
type:
|
|
3397
|
+
taxesAndFees: z28.array(
|
|
3398
|
+
z28.object({
|
|
3399
|
+
name: z28.string().describe("Fee or tax name"),
|
|
3400
|
+
amount: z28.string().describe("Dollar amount"),
|
|
3401
|
+
type: z28.enum(["tax", "fee", "surcharge", "assessment"]).optional().describe("Fee category")
|
|
3264
3402
|
})
|
|
3265
3403
|
).optional().describe("Taxes, fees, surcharges, and assessments"),
|
|
3266
|
-
minimumPremium:
|
|
3267
|
-
depositPremium:
|
|
3268
|
-
paymentPlan:
|
|
3269
|
-
auditType:
|
|
3270
|
-
ratingBasis:
|
|
3404
|
+
minimumPremium: z28.string().optional().describe("Minimum premium if stated"),
|
|
3405
|
+
depositPremium: z28.string().optional().describe("Deposit premium if stated"),
|
|
3406
|
+
paymentPlan: z28.string().optional().describe("Payment plan description"),
|
|
3407
|
+
auditType: z28.enum(["annual", "semi_annual", "quarterly", "monthly", "final", "self"]).optional().describe("Premium audit type"),
|
|
3408
|
+
ratingBasis: z28.string().optional().describe("Rating basis, e.g. payroll, revenue, area, units")
|
|
3271
3409
|
});
|
|
3272
3410
|
function buildPremiumBreakdownPrompt() {
|
|
3273
3411
|
return `You are an expert insurance document analyst. Extract all premium and cost information from this document.
|
|
@@ -3287,14 +3425,14 @@ Return JSON only.`;
|
|
|
3287
3425
|
}
|
|
3288
3426
|
|
|
3289
3427
|
// src/prompts/extractors/declarations.ts
|
|
3290
|
-
import { z as
|
|
3291
|
-
var DeclarationsFieldSchema =
|
|
3292
|
-
field:
|
|
3293
|
-
value:
|
|
3294
|
-
section:
|
|
3428
|
+
import { z as z29 } from "zod";
|
|
3429
|
+
var DeclarationsFieldSchema = z29.object({
|
|
3430
|
+
field: z29.string().describe("Descriptive field name (e.g. 'policyNumber', 'effectiveDate', 'coverageALimit')"),
|
|
3431
|
+
value: z29.string().describe("Extracted value exactly as it appears in the document"),
|
|
3432
|
+
section: z29.string().optional().describe("Section or grouping this field belongs to (e.g. 'Coverage Limits', 'Vehicle Schedule')")
|
|
3295
3433
|
});
|
|
3296
|
-
var DeclarationsExtractSchema =
|
|
3297
|
-
fields:
|
|
3434
|
+
var DeclarationsExtractSchema = z29.object({
|
|
3435
|
+
fields: z29.array(DeclarationsFieldSchema).describe("All declarations page fields extracted as key-value pairs. Structure varies by line of business.")
|
|
3298
3436
|
});
|
|
3299
3437
|
function buildDeclarationsPrompt() {
|
|
3300
3438
|
return `You are an expert insurance document analyst. Extract all declarations page data from this document into a flexible key-value structure.
|
|
@@ -3334,21 +3472,21 @@ Preserve original values exactly as they appear. Return JSON only.`;
|
|
|
3334
3472
|
}
|
|
3335
3473
|
|
|
3336
3474
|
// src/prompts/extractors/loss-history.ts
|
|
3337
|
-
import { z as
|
|
3338
|
-
var LossHistorySchema =
|
|
3339
|
-
lossSummary:
|
|
3340
|
-
individualClaims:
|
|
3341
|
-
|
|
3342
|
-
date:
|
|
3343
|
-
type:
|
|
3344
|
-
description:
|
|
3345
|
-
amountPaid:
|
|
3346
|
-
amountReserved:
|
|
3347
|
-
status:
|
|
3348
|
-
claimNumber:
|
|
3475
|
+
import { z as z30 } from "zod";
|
|
3476
|
+
var LossHistorySchema = z30.object({
|
|
3477
|
+
lossSummary: z30.string().optional().describe("Summary of loss history, e.g. '3 claims in past 5 years totaling $125,000'"),
|
|
3478
|
+
individualClaims: z30.array(
|
|
3479
|
+
z30.object({
|
|
3480
|
+
date: z30.string().optional().describe("Date of loss or claim"),
|
|
3481
|
+
type: z30.string().optional().describe("Type of claim, e.g. 'property damage', 'bodily injury'"),
|
|
3482
|
+
description: z30.string().optional().describe("Brief description of the claim"),
|
|
3483
|
+
amountPaid: z30.string().optional().describe("Amount paid"),
|
|
3484
|
+
amountReserved: z30.string().optional().describe("Amount reserved"),
|
|
3485
|
+
status: z30.enum(["open", "closed", "reopened"]).optional().describe("Claim status"),
|
|
3486
|
+
claimNumber: z30.string().optional().describe("Claim reference number")
|
|
3349
3487
|
})
|
|
3350
3488
|
).optional().describe("Individual claim records"),
|
|
3351
|
-
experienceMod:
|
|
3489
|
+
experienceMod: z30.string().optional().describe("Experience modification factor for workers comp, e.g. '0.85'")
|
|
3352
3490
|
});
|
|
3353
3491
|
function buildLossHistoryPrompt() {
|
|
3354
3492
|
return `You are an expert insurance document analyst. Extract all loss history and claims information from this document.
|
|
@@ -3365,18 +3503,18 @@ Return JSON only.`;
|
|
|
3365
3503
|
}
|
|
3366
3504
|
|
|
3367
3505
|
// src/prompts/extractors/sections.ts
|
|
3368
|
-
import { z as
|
|
3369
|
-
var SubsectionSchema2 =
|
|
3370
|
-
title:
|
|
3371
|
-
sectionNumber:
|
|
3372
|
-
pageNumber:
|
|
3373
|
-
content:
|
|
3506
|
+
import { z as z31 } from "zod";
|
|
3507
|
+
var SubsectionSchema2 = z31.object({
|
|
3508
|
+
title: z31.string().describe("Subsection title"),
|
|
3509
|
+
sectionNumber: z31.string().optional().describe("Subsection number"),
|
|
3510
|
+
pageNumber: z31.number().optional().describe("Page number"),
|
|
3511
|
+
content: z31.string().describe("Full verbatim text")
|
|
3374
3512
|
});
|
|
3375
|
-
var SectionsSchema =
|
|
3376
|
-
sections:
|
|
3377
|
-
|
|
3378
|
-
title:
|
|
3379
|
-
type:
|
|
3513
|
+
var SectionsSchema = z31.object({
|
|
3514
|
+
sections: z31.array(
|
|
3515
|
+
z31.object({
|
|
3516
|
+
title: z31.string().describe("Section title"),
|
|
3517
|
+
type: z31.enum([
|
|
3380
3518
|
"declarations",
|
|
3381
3519
|
"insuring_agreement",
|
|
3382
3520
|
"policy_form",
|
|
@@ -3390,10 +3528,10 @@ var SectionsSchema = z30.object({
|
|
|
3390
3528
|
"regulatory",
|
|
3391
3529
|
"other"
|
|
3392
3530
|
]).describe("Section type classification"),
|
|
3393
|
-
content:
|
|
3394
|
-
pageStart:
|
|
3395
|
-
pageEnd:
|
|
3396
|
-
subsections:
|
|
3531
|
+
content: z31.string().describe("Full verbatim text of the section"),
|
|
3532
|
+
pageStart: z31.number().describe("Starting page number"),
|
|
3533
|
+
pageEnd: z31.number().optional().describe("Ending page number"),
|
|
3534
|
+
subsections: z31.array(SubsectionSchema2).optional().describe("Subsections within this section")
|
|
3397
3535
|
})
|
|
3398
3536
|
).describe("All document sections")
|
|
3399
3537
|
});
|
|
@@ -3412,25 +3550,31 @@ For each section, classify its type:
|
|
|
3412
3550
|
- "other" \u2014 anything that doesn't fit the above categories
|
|
3413
3551
|
|
|
3414
3552
|
Include accurate page numbers for every section. Include subsections only if the section has clearly defined subsections with their own titles.
|
|
3553
|
+
If a page begins or ends in the middle of a section, treat it as a continuation of the existing section instead of creating a new orphan section from the fragment.
|
|
3554
|
+
|
|
3555
|
+
Critical rules:
|
|
3556
|
+
- Ignore table-of-contents entries, page-number references, repeating headers/footers, and other navigational artifacts.
|
|
3557
|
+
- Do not create a new section from a lone continuation fragment such as a single paragraph tail or list item that clearly belongs to the previous page's section.
|
|
3558
|
+
- When a section spans multiple pages, keep it as one section with pageStart/pageEnd covering the full span represented in this extraction.
|
|
3415
3559
|
|
|
3416
3560
|
Return JSON only.`;
|
|
3417
3561
|
}
|
|
3418
3562
|
|
|
3419
3563
|
// src/prompts/extractors/supplementary.ts
|
|
3420
|
-
import { z as
|
|
3421
|
-
var ContactSchema2 =
|
|
3422
|
-
name:
|
|
3423
|
-
phone:
|
|
3424
|
-
email:
|
|
3425
|
-
address:
|
|
3426
|
-
type:
|
|
3564
|
+
import { z as z32 } from "zod";
|
|
3565
|
+
var ContactSchema2 = z32.object({
|
|
3566
|
+
name: z32.string().optional().describe("Organization or person name"),
|
|
3567
|
+
phone: z32.string().optional().describe("Phone number"),
|
|
3568
|
+
email: z32.string().optional().describe("Email address"),
|
|
3569
|
+
address: z32.string().optional().describe("Mailing address"),
|
|
3570
|
+
type: z32.string().optional().describe("Contact type, e.g. 'State Department of Insurance'")
|
|
3427
3571
|
});
|
|
3428
|
-
var SupplementarySchema =
|
|
3429
|
-
regulatoryContacts:
|
|
3430
|
-
claimsContacts:
|
|
3431
|
-
thirdPartyAdministrators:
|
|
3432
|
-
cancellationNoticeDays:
|
|
3433
|
-
nonrenewalNoticeDays:
|
|
3572
|
+
var SupplementarySchema = z32.object({
|
|
3573
|
+
regulatoryContacts: z32.array(ContactSchema2).optional().describe("Regulatory body contacts (state department of insurance, ombudsman)"),
|
|
3574
|
+
claimsContacts: z32.array(ContactSchema2).optional().describe("Claims reporting contacts and instructions"),
|
|
3575
|
+
thirdPartyAdministrators: z32.array(ContactSchema2).optional().describe("Third-party administrators for claims handling"),
|
|
3576
|
+
cancellationNoticeDays: z32.number().optional().describe("Required notice period for cancellation in days"),
|
|
3577
|
+
nonrenewalNoticeDays: z32.number().optional().describe("Required notice period for nonrenewal in days")
|
|
3434
3578
|
});
|
|
3435
3579
|
function buildSupplementaryPrompt() {
|
|
3436
3580
|
return `You are an expert insurance document analyst. Extract supplementary and regulatory information from this document.
|
|
@@ -3467,6 +3611,313 @@ function getExtractor(name) {
|
|
|
3467
3611
|
return EXTRACTORS[name];
|
|
3468
3612
|
}
|
|
3469
3613
|
|
|
3614
|
+
// src/core/quality.ts
|
|
3615
|
+
function evaluateQualityGate(params) {
|
|
3616
|
+
const { issues, hasRoundWarnings = false } = params;
|
|
3617
|
+
const hasBlocking = issues.some((issue) => issue.severity === "blocking");
|
|
3618
|
+
const hasWarnings = issues.some((issue) => issue.severity === "warning") || hasRoundWarnings;
|
|
3619
|
+
return hasBlocking ? "failed" : hasWarnings ? "warning" : "passed";
|
|
3620
|
+
}
|
|
3621
|
+
function shouldFailQualityGate(mode, status) {
|
|
3622
|
+
return mode === "strict" && status === "failed";
|
|
3623
|
+
}
|
|
3624
|
+
|
|
3625
|
+
// src/extraction/quality.ts
|
|
3626
|
+
function normalizeFormNumber(value) {
|
|
3627
|
+
if (typeof value !== "string") return void 0;
|
|
3628
|
+
const trimmed = value.trim();
|
|
3629
|
+
if (!trimmed) return void 0;
|
|
3630
|
+
return trimmed;
|
|
3631
|
+
}
|
|
3632
|
+
function addFormEntry(inventory, formNumber, source, extra) {
|
|
3633
|
+
if (!formNumber) return;
|
|
3634
|
+
const existing = inventory.get(formNumber);
|
|
3635
|
+
if (existing) {
|
|
3636
|
+
if (!existing.title && extra?.title) existing.title = extra.title;
|
|
3637
|
+
if (!existing.pageStart && extra?.pageStart) existing.pageStart = extra.pageStart;
|
|
3638
|
+
if (!existing.pageEnd && extra?.pageEnd) existing.pageEnd = extra.pageEnd;
|
|
3639
|
+
if (!existing.sources.includes(source)) existing.sources.push(source);
|
|
3640
|
+
return;
|
|
3641
|
+
}
|
|
3642
|
+
inventory.set(formNumber, {
|
|
3643
|
+
formNumber,
|
|
3644
|
+
title: extra?.title,
|
|
3645
|
+
pageStart: extra?.pageStart,
|
|
3646
|
+
pageEnd: extra?.pageEnd,
|
|
3647
|
+
sources: [source]
|
|
3648
|
+
});
|
|
3649
|
+
}
|
|
3650
|
+
function looksReferential(value) {
|
|
3651
|
+
if (typeof value !== "string") return false;
|
|
3652
|
+
const normalized = value.toLowerCase();
|
|
3653
|
+
return normalized.includes("shown in the declarations") || normalized.includes("shown in declarations") || normalized.includes("shown in the schedule") || normalized.includes("as stated") || normalized.includes("if applicable");
|
|
3654
|
+
}
|
|
3655
|
+
function looksTocArtifact(value) {
|
|
3656
|
+
if (typeof value !== "string") return false;
|
|
3657
|
+
return /\.{4,}\d{1,3}$/.test(value.trim()) || /^\d+\.\s+[A-Z][\s\S]*\.{3,}\d{1,3}$/.test(value.trim());
|
|
3658
|
+
}
|
|
3659
|
+
function sourcePrecedence(sectionRef) {
|
|
3660
|
+
if (typeof sectionRef !== "string") return 0;
|
|
3661
|
+
const normalized = sectionRef.toLowerCase();
|
|
3662
|
+
if (normalized.includes("declaration") || normalized.includes("scheduled coverages") || normalized.includes("schedule")) return 4;
|
|
3663
|
+
if (normalized.includes("endorsement")) return 3;
|
|
3664
|
+
if (normalized.includes("additional coverages")) return 2;
|
|
3665
|
+
if (normalized.includes("coverage form") || normalized.includes("policy form")) return 1;
|
|
3666
|
+
return 0;
|
|
3667
|
+
}
|
|
3668
|
+
function buildExtractionReviewReport(params) {
|
|
3669
|
+
const { memory, reviewRounds } = params;
|
|
3670
|
+
const deterministicIssues = [];
|
|
3671
|
+
const inventory = /* @__PURE__ */ new Map();
|
|
3672
|
+
const extractedFormInventory = memory.get("form_inventory")?.forms ?? [];
|
|
3673
|
+
const coverages = memory.get("coverage_limits")?.coverages ?? [];
|
|
3674
|
+
const endorsements = memory.get("endorsements")?.endorsements ?? [];
|
|
3675
|
+
const exclusions = memory.get("exclusions")?.exclusions ?? [];
|
|
3676
|
+
const conditions = memory.get("conditions")?.conditions ?? [];
|
|
3677
|
+
const sections = memory.get("sections")?.sections ?? [];
|
|
3678
|
+
for (const form of extractedFormInventory) {
|
|
3679
|
+
addFormEntry(
|
|
3680
|
+
inventory,
|
|
3681
|
+
normalizeFormNumber(form.formNumber),
|
|
3682
|
+
"form_inventory",
|
|
3683
|
+
{
|
|
3684
|
+
title: form.title,
|
|
3685
|
+
pageStart: form.pageStart,
|
|
3686
|
+
pageEnd: form.pageEnd
|
|
3687
|
+
}
|
|
3688
|
+
);
|
|
3689
|
+
}
|
|
3690
|
+
for (const endorsement of endorsements) {
|
|
3691
|
+
addFormEntry(
|
|
3692
|
+
inventory,
|
|
3693
|
+
normalizeFormNumber(endorsement.formNumber),
|
|
3694
|
+
"endorsements",
|
|
3695
|
+
{
|
|
3696
|
+
title: typeof endorsement.title === "string" ? endorsement.title : void 0,
|
|
3697
|
+
pageStart: typeof endorsement.pageStart === "number" ? endorsement.pageStart : void 0,
|
|
3698
|
+
pageEnd: typeof endorsement.pageEnd === "number" ? endorsement.pageEnd : void 0
|
|
3699
|
+
}
|
|
3700
|
+
);
|
|
3701
|
+
if (typeof endorsement.formNumber !== "string" || !endorsement.formNumber.trim()) {
|
|
3702
|
+
deterministicIssues.push({
|
|
3703
|
+
code: "endorsement_missing_form_number",
|
|
3704
|
+
severity: "blocking",
|
|
3705
|
+
message: "Endorsement is missing formNumber.",
|
|
3706
|
+
extractorName: "endorsements",
|
|
3707
|
+
pageNumber: typeof endorsement.pageStart === "number" ? endorsement.pageStart : void 0,
|
|
3708
|
+
itemName: typeof endorsement.title === "string" ? endorsement.title : void 0
|
|
3709
|
+
});
|
|
3710
|
+
}
|
|
3711
|
+
const endorsementFormNumber = normalizeFormNumber(endorsement.formNumber);
|
|
3712
|
+
if (endorsementFormNumber && !inventory.has(endorsementFormNumber)) {
|
|
3713
|
+
deterministicIssues.push({
|
|
3714
|
+
code: "endorsement_form_missing_from_inventory",
|
|
3715
|
+
severity: "warning",
|
|
3716
|
+
message: `Endorsement "${String(endorsement.title ?? endorsementFormNumber)}" is not present in form inventory.`,
|
|
3717
|
+
extractorName: "endorsements",
|
|
3718
|
+
formNumber: endorsementFormNumber,
|
|
3719
|
+
pageNumber: typeof endorsement.pageStart === "number" ? endorsement.pageStart : void 0,
|
|
3720
|
+
itemName: typeof endorsement.title === "string" ? endorsement.title : void 0
|
|
3721
|
+
});
|
|
3722
|
+
}
|
|
3723
|
+
}
|
|
3724
|
+
for (const coverage of coverages) {
|
|
3725
|
+
const formNumber = normalizeFormNumber(coverage.formNumber);
|
|
3726
|
+
addFormEntry(inventory, formNumber, "coverage_limits", {
|
|
3727
|
+
title: typeof coverage.name === "string" ? coverage.name : void 0,
|
|
3728
|
+
pageStart: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
|
|
3729
|
+
pageEnd: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0
|
|
3730
|
+
});
|
|
3731
|
+
if (typeof coverage.name === "string" && /coverage form$/i.test(coverage.name.trim())) {
|
|
3732
|
+
deterministicIssues.push({
|
|
3733
|
+
code: "generic_form_row_as_coverage",
|
|
3734
|
+
severity: "blocking",
|
|
3735
|
+
message: `Coverage "${coverage.name}" looks like a form header rather than a real coverage row.`,
|
|
3736
|
+
extractorName: "coverage_limits",
|
|
3737
|
+
formNumber,
|
|
3738
|
+
pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
|
|
3739
|
+
itemName: coverage.name
|
|
3740
|
+
});
|
|
3741
|
+
}
|
|
3742
|
+
if (typeof coverage.pageNumber !== "number") {
|
|
3743
|
+
deterministicIssues.push({
|
|
3744
|
+
code: "coverage_missing_page_number",
|
|
3745
|
+
severity: "warning",
|
|
3746
|
+
message: `Coverage "${String(coverage.name ?? "unknown")}" is missing pageNumber provenance.`,
|
|
3747
|
+
extractorName: "coverage_limits",
|
|
3748
|
+
formNumber,
|
|
3749
|
+
itemName: typeof coverage.name === "string" ? coverage.name : void 0
|
|
3750
|
+
});
|
|
3751
|
+
}
|
|
3752
|
+
if (typeof coverage.sectionRef !== "string" || !coverage.sectionRef.trim()) {
|
|
3753
|
+
deterministicIssues.push({
|
|
3754
|
+
code: "coverage_missing_section_ref",
|
|
3755
|
+
severity: "warning",
|
|
3756
|
+
message: `Coverage "${String(coverage.name ?? "unknown")}" is missing sectionRef provenance.`,
|
|
3757
|
+
extractorName: "coverage_limits",
|
|
3758
|
+
formNumber,
|
|
3759
|
+
pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
|
|
3760
|
+
itemName: typeof coverage.name === "string" ? coverage.name : void 0
|
|
3761
|
+
});
|
|
3762
|
+
}
|
|
3763
|
+
if (typeof coverage.originalContent !== "string" || !coverage.originalContent.trim()) {
|
|
3764
|
+
deterministicIssues.push({
|
|
3765
|
+
code: "coverage_missing_original_content",
|
|
3766
|
+
severity: "warning",
|
|
3767
|
+
message: `Coverage "${String(coverage.name ?? "unknown")}" is missing originalContent source text.`,
|
|
3768
|
+
extractorName: "coverage_limits",
|
|
3769
|
+
formNumber,
|
|
3770
|
+
pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
|
|
3771
|
+
itemName: typeof coverage.name === "string" ? coverage.name : void 0
|
|
3772
|
+
});
|
|
3773
|
+
}
|
|
3774
|
+
if (looksReferential(coverage.limit) || looksReferential(coverage.deductible)) {
|
|
3775
|
+
deterministicIssues.push({
|
|
3776
|
+
code: "coverage_referential_value",
|
|
3777
|
+
severity: "warning",
|
|
3778
|
+
message: `Coverage "${String(coverage.name ?? "unknown")}" contains referential language instead of a concrete scheduled term.`,
|
|
3779
|
+
extractorName: "coverage_limits",
|
|
3780
|
+
formNumber,
|
|
3781
|
+
pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
|
|
3782
|
+
itemName: typeof coverage.name === "string" ? coverage.name : void 0
|
|
3783
|
+
});
|
|
3784
|
+
}
|
|
3785
|
+
if (formNumber && !inventory.has(formNumber)) {
|
|
3786
|
+
deterministicIssues.push({
|
|
3787
|
+
code: "coverage_form_missing_from_inventory",
|
|
3788
|
+
severity: "warning",
|
|
3789
|
+
message: `Coverage "${String(coverage.name ?? "unknown")}" references form "${formNumber}" that is missing from form inventory.`,
|
|
3790
|
+
extractorName: "coverage_limits",
|
|
3791
|
+
formNumber,
|
|
3792
|
+
pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
|
|
3793
|
+
itemName: typeof coverage.name === "string" ? coverage.name : void 0
|
|
3794
|
+
});
|
|
3795
|
+
}
|
|
3796
|
+
}
|
|
3797
|
+
const coverageGroups = /* @__PURE__ */ new Map();
|
|
3798
|
+
for (const coverage of coverages) {
|
|
3799
|
+
const key = [
|
|
3800
|
+
String(coverage.name ?? "").toLowerCase(),
|
|
3801
|
+
String(coverage.formNumber ?? "").toLowerCase()
|
|
3802
|
+
].join("|");
|
|
3803
|
+
coverageGroups.set(key, [...coverageGroups.get(key) ?? [], coverage]);
|
|
3804
|
+
}
|
|
3805
|
+
for (const [key, groupedCoverages] of coverageGroups.entries()) {
|
|
3806
|
+
if (groupedCoverages.length < 2) continue;
|
|
3807
|
+
const sorted = [...groupedCoverages].sort((a, b) => sourcePrecedence(b.sectionRef) - sourcePrecedence(a.sectionRef));
|
|
3808
|
+
const highest = sorted[0];
|
|
3809
|
+
for (const lower of sorted.slice(1)) {
|
|
3810
|
+
const highestLimit = String(highest.limit ?? "").trim();
|
|
3811
|
+
const lowerLimit = String(lower.limit ?? "").trim();
|
|
3812
|
+
const highestDeductible = String(highest.deductible ?? "").trim();
|
|
3813
|
+
const lowerDeductible = String(lower.deductible ?? "").trim();
|
|
3814
|
+
if (highestLimit && lowerLimit && highestLimit !== lowerLimit || highestDeductible && lowerDeductible && highestDeductible !== lowerDeductible) {
|
|
3815
|
+
deterministicIssues.push({
|
|
3816
|
+
code: "coverage_precedence_conflict",
|
|
3817
|
+
severity: "warning",
|
|
3818
|
+
message: `Coverage "${String(highest.name ?? key)}" has conflicting extracted terms across sources with different precedence.`,
|
|
3819
|
+
extractorName: "coverage_limits",
|
|
3820
|
+
formNumber: normalizeFormNumber(highest.formNumber) ?? normalizeFormNumber(lower.formNumber),
|
|
3821
|
+
pageNumber: typeof lower.pageNumber === "number" ? lower.pageNumber : void 0,
|
|
3822
|
+
itemName: typeof highest.name === "string" ? highest.name : void 0
|
|
3823
|
+
});
|
|
3824
|
+
}
|
|
3825
|
+
}
|
|
3826
|
+
}
|
|
3827
|
+
for (const exclusion of exclusions) {
|
|
3828
|
+
addFormEntry(inventory, normalizeFormNumber(exclusion.formNumber), "exclusions", {
|
|
3829
|
+
title: typeof exclusion.name === "string" ? exclusion.name : void 0,
|
|
3830
|
+
pageStart: typeof exclusion.pageNumber === "number" ? exclusion.pageNumber : void 0,
|
|
3831
|
+
pageEnd: typeof exclusion.pageNumber === "number" ? exclusion.pageNumber : void 0
|
|
3832
|
+
});
|
|
3833
|
+
if (typeof exclusion.pageNumber !== "number") {
|
|
3834
|
+
deterministicIssues.push({
|
|
3835
|
+
code: "exclusion_missing_page_number",
|
|
3836
|
+
severity: "warning",
|
|
3837
|
+
message: `Exclusion "${String(exclusion.name ?? "unknown")}" is missing pageNumber provenance.`,
|
|
3838
|
+
extractorName: "exclusions",
|
|
3839
|
+
formNumber: normalizeFormNumber(exclusion.formNumber),
|
|
3840
|
+
itemName: typeof exclusion.name === "string" ? exclusion.name : void 0
|
|
3841
|
+
});
|
|
3842
|
+
}
|
|
3843
|
+
if (looksTocArtifact(exclusion.content)) {
|
|
3844
|
+
deterministicIssues.push({
|
|
3845
|
+
code: "exclusion_toc_artifact",
|
|
3846
|
+
severity: "blocking",
|
|
3847
|
+
message: `Exclusion "${String(exclusion.name ?? "unknown")}" appears to be a table-of-contents artifact.`,
|
|
3848
|
+
extractorName: "exclusions",
|
|
3849
|
+
pageNumber: typeof exclusion.pageNumber === "number" ? exclusion.pageNumber : void 0,
|
|
3850
|
+
itemName: typeof exclusion.name === "string" ? exclusion.name : void 0
|
|
3851
|
+
});
|
|
3852
|
+
}
|
|
3853
|
+
}
|
|
3854
|
+
for (const condition of conditions) {
|
|
3855
|
+
if (typeof condition.pageNumber !== "number") {
|
|
3856
|
+
deterministicIssues.push({
|
|
3857
|
+
code: "condition_missing_page_number",
|
|
3858
|
+
severity: "warning",
|
|
3859
|
+
message: `Condition "${String(condition.name ?? "unknown")}" is missing pageNumber provenance.`,
|
|
3860
|
+
extractorName: "conditions",
|
|
3861
|
+
itemName: typeof condition.name === "string" ? condition.name : void 0
|
|
3862
|
+
});
|
|
3863
|
+
}
|
|
3864
|
+
if (looksTocArtifact(condition.content)) {
|
|
3865
|
+
deterministicIssues.push({
|
|
3866
|
+
code: "condition_toc_artifact",
|
|
3867
|
+
severity: "blocking",
|
|
3868
|
+
message: `Condition "${String(condition.name ?? "unknown")}" appears to be a table-of-contents artifact.`,
|
|
3869
|
+
extractorName: "conditions",
|
|
3870
|
+
pageNumber: typeof condition.pageNumber === "number" ? condition.pageNumber : void 0,
|
|
3871
|
+
itemName: typeof condition.name === "string" ? condition.name : void 0
|
|
3872
|
+
});
|
|
3873
|
+
}
|
|
3874
|
+
}
|
|
3875
|
+
for (const section of sections) {
|
|
3876
|
+
if (typeof section.content === "string" && section.content.trim().length < 120 && typeof section.pageStart === "number" && (!("pageEnd" in section) || section.pageEnd === section.pageStart || section.pageEnd === void 0)) {
|
|
3877
|
+
deterministicIssues.push({
|
|
3878
|
+
code: "section_short_fragment",
|
|
3879
|
+
severity: "warning",
|
|
3880
|
+
message: `Section "${String(section.title ?? "unknown")}" may be an orphan continuation fragment.`,
|
|
3881
|
+
extractorName: "sections",
|
|
3882
|
+
pageNumber: typeof section.pageStart === "number" ? section.pageStart : void 0,
|
|
3883
|
+
itemName: typeof section.title === "string" ? section.title : void 0
|
|
3884
|
+
});
|
|
3885
|
+
}
|
|
3886
|
+
}
|
|
3887
|
+
const formInventory = [...inventory.values()].sort((a, b) => a.formNumber.localeCompare(b.formNumber));
|
|
3888
|
+
const rounds = reviewRounds.map((round) => ({
|
|
3889
|
+
round: round.round,
|
|
3890
|
+
kind: "llm_review",
|
|
3891
|
+
status: round.complete && round.qualityIssues.length === 0 ? "passed" : "warning",
|
|
3892
|
+
summary: round.qualityIssues[0] ?? (round.complete ? "Review passed." : "Review requested follow-up extraction.")
|
|
3893
|
+
}));
|
|
3894
|
+
const artifacts = [
|
|
3895
|
+
{ kind: "form_inventory", label: "Form Inventory", itemCount: formInventory.length },
|
|
3896
|
+
{ kind: "page_map", label: "Page Map", itemCount: params.pageAssignments.length }
|
|
3897
|
+
];
|
|
3898
|
+
const qualityGateStatus = evaluateQualityGate({
|
|
3899
|
+
issues: deterministicIssues,
|
|
3900
|
+
hasRoundWarnings: reviewRounds.some((round) => round.qualityIssues.length > 0 || !round.complete)
|
|
3901
|
+
});
|
|
3902
|
+
return {
|
|
3903
|
+
issues: deterministicIssues,
|
|
3904
|
+
rounds,
|
|
3905
|
+
artifacts,
|
|
3906
|
+
reviewRoundRecords: reviewRounds,
|
|
3907
|
+
formInventory,
|
|
3908
|
+
qualityGateStatus
|
|
3909
|
+
};
|
|
3910
|
+
}
|
|
3911
|
+
function toReviewRoundRecord(round, review) {
|
|
3912
|
+
return {
|
|
3913
|
+
round,
|
|
3914
|
+
complete: review.complete,
|
|
3915
|
+
missingFields: review.missingFields,
|
|
3916
|
+
qualityIssues: review.qualityIssues ?? [],
|
|
3917
|
+
additionalTasks: review.additionalTasks
|
|
3918
|
+
};
|
|
3919
|
+
}
|
|
3920
|
+
|
|
3470
3921
|
// src/extraction/coordinator.ts
|
|
3471
3922
|
function createExtractor(config) {
|
|
3472
3923
|
const {
|
|
@@ -3479,6 +3930,7 @@ function createExtractor(config) {
|
|
|
3479
3930
|
onProgress,
|
|
3480
3931
|
log,
|
|
3481
3932
|
providerOptions,
|
|
3933
|
+
qualityGate = "warn",
|
|
3482
3934
|
onCheckpointSave
|
|
3483
3935
|
} = config;
|
|
3484
3936
|
const limit = pLimit(concurrency);
|
|
@@ -3535,6 +3987,50 @@ function createExtractor(config) {
|
|
|
3535
3987
|
if (extractorPages.size === 0) return "No page assignments available.";
|
|
3536
3988
|
return [...extractorPages.entries()].map(([extractorName, pages]) => `${extractorName}: pages ${pages.join(", ")}`).join("\n");
|
|
3537
3989
|
}
|
|
3990
|
+
function normalizePageAssignments(pageAssignments, formInventory) {
|
|
3991
|
+
const pageFormTypes = /* @__PURE__ */ new Map();
|
|
3992
|
+
if (formInventory) {
|
|
3993
|
+
for (const form of formInventory.forms) {
|
|
3994
|
+
if (form.pageStart != null) {
|
|
3995
|
+
const end = form.pageEnd ?? form.pageStart;
|
|
3996
|
+
for (let p = form.pageStart; p <= end; p++) {
|
|
3997
|
+
const types = pageFormTypes.get(p) ?? /* @__PURE__ */ new Set();
|
|
3998
|
+
types.add(form.formType);
|
|
3999
|
+
pageFormTypes.set(p, types);
|
|
4000
|
+
}
|
|
4001
|
+
}
|
|
4002
|
+
}
|
|
4003
|
+
}
|
|
4004
|
+
return pageAssignments.map((assignment) => {
|
|
4005
|
+
let extractorNames = [...new Set(
|
|
4006
|
+
(assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"]).filter(Boolean)
|
|
4007
|
+
)];
|
|
4008
|
+
const hasDeclarations = extractorNames.includes("declarations");
|
|
4009
|
+
const hasConditions = extractorNames.includes("conditions");
|
|
4010
|
+
const hasExclusions = extractorNames.includes("exclusions");
|
|
4011
|
+
const hasEndorsements = extractorNames.includes("endorsements");
|
|
4012
|
+
const looksLikeScheduleValues = assignment.hasScheduleValues === true;
|
|
4013
|
+
const roleBlocksCoverageLimits = assignment.pageRole === "policy_form" || assignment.pageRole === "condition_exclusion_form" || assignment.pageRole === "endorsement_form";
|
|
4014
|
+
const inventoryTypes = pageFormTypes.get(assignment.localPageNumber);
|
|
4015
|
+
const inventoryBlocksCoverageLimits = inventoryTypes != null && !looksLikeScheduleValues && !hasDeclarations && (inventoryTypes.has("endorsement") || inventoryTypes.has("notice") || inventoryTypes.has("application"));
|
|
4016
|
+
if (extractorNames.includes("coverage_limits")) {
|
|
4017
|
+
const shouldDropCoverageLimits = inventoryBlocksCoverageLimits || !looksLikeScheduleValues && roleBlocksCoverageLimits || !hasDeclarations && !looksLikeScheduleValues && (hasConditions || hasExclusions) || !hasDeclarations && !looksLikeScheduleValues && hasEndorsements;
|
|
4018
|
+
if (shouldDropCoverageLimits) {
|
|
4019
|
+
extractorNames = extractorNames.filter((name) => name !== "coverage_limits");
|
|
4020
|
+
}
|
|
4021
|
+
}
|
|
4022
|
+
if (inventoryTypes?.has("endorsement") && !extractorNames.includes("endorsements")) {
|
|
4023
|
+
extractorNames = [...extractorNames, "endorsements"];
|
|
4024
|
+
}
|
|
4025
|
+
if (extractorNames.length === 0) {
|
|
4026
|
+
extractorNames = ["sections"];
|
|
4027
|
+
}
|
|
4028
|
+
return {
|
|
4029
|
+
...assignment,
|
|
4030
|
+
extractorNames
|
|
4031
|
+
};
|
|
4032
|
+
});
|
|
4033
|
+
}
|
|
3538
4034
|
function buildTemplateHints(primaryType, documentType, pageCount, template) {
|
|
3539
4035
|
return [
|
|
3540
4036
|
`Document type: ${primaryType} ${documentType}`,
|
|
@@ -3653,6 +4149,38 @@ function createExtractor(config) {
|
|
|
3653
4149
|
const template = getTemplate(primaryType);
|
|
3654
4150
|
const pageCount = resumed?.pageCount ?? await getPdfPageCount(pdfBase64);
|
|
3655
4151
|
const templateHints = buildTemplateHints(primaryType, documentType, pageCount, template);
|
|
4152
|
+
let formInventory;
|
|
4153
|
+
if (resumed?.formInventory && pipelineCtx.isPhaseComplete("form_inventory")) {
|
|
4154
|
+
formInventory = resumed.formInventory;
|
|
4155
|
+
memory.set("form_inventory", formInventory);
|
|
4156
|
+
onProgress?.("Resuming from checkpoint (form inventory complete)...");
|
|
4157
|
+
} else {
|
|
4158
|
+
onProgress?.(`Building form inventory for ${primaryType} ${documentType}...`);
|
|
4159
|
+
const formInventoryResponse = await safeGenerateObject(
|
|
4160
|
+
generateObject,
|
|
4161
|
+
{
|
|
4162
|
+
prompt: buildFormInventoryPrompt(templateHints),
|
|
4163
|
+
schema: FormInventorySchema,
|
|
4164
|
+
maxTokens: 2048,
|
|
4165
|
+
providerOptions: { ...providerOptions, pdfBase64 }
|
|
4166
|
+
},
|
|
4167
|
+
{
|
|
4168
|
+
fallback: { forms: [] },
|
|
4169
|
+
log,
|
|
4170
|
+
onError: (err, attempt) => log?.(`Form inventory attempt ${attempt + 1} failed: ${err instanceof Error ? err.message : String(err)}`)
|
|
4171
|
+
}
|
|
4172
|
+
);
|
|
4173
|
+
trackUsage(formInventoryResponse.usage);
|
|
4174
|
+
formInventory = formInventoryResponse.object;
|
|
4175
|
+
memory.set("form_inventory", formInventory);
|
|
4176
|
+
await pipelineCtx.save("form_inventory", {
|
|
4177
|
+
id,
|
|
4178
|
+
pageCount,
|
|
4179
|
+
classifyResult,
|
|
4180
|
+
formInventory,
|
|
4181
|
+
memory: Object.fromEntries(memory)
|
|
4182
|
+
});
|
|
4183
|
+
}
|
|
3656
4184
|
let pageAssignments;
|
|
3657
4185
|
if (resumed?.pageAssignments && pipelineCtx.isPhaseComplete("page_map")) {
|
|
3658
4186
|
pageAssignments = resumed.pageAssignments;
|
|
@@ -3661,13 +4189,14 @@ function createExtractor(config) {
|
|
|
3661
4189
|
onProgress?.(`Mapping document pages for ${primaryType} ${documentType}...`);
|
|
3662
4190
|
const chunkSize = 8;
|
|
3663
4191
|
const collectedAssignments = [];
|
|
4192
|
+
const formInventoryHint = formInventory?.forms.length ? formatFormInventoryForPageMap(formInventory.forms) : void 0;
|
|
3664
4193
|
for (let startPage = 1; startPage <= pageCount; startPage += chunkSize) {
|
|
3665
4194
|
const endPage = Math.min(pageCount, startPage + chunkSize - 1);
|
|
3666
4195
|
const pagesPdf = await extractPageRange(pdfBase64, startPage, endPage);
|
|
3667
4196
|
const mapResponse = await safeGenerateObject(
|
|
3668
4197
|
generateObject,
|
|
3669
4198
|
{
|
|
3670
|
-
prompt: buildPageMapPrompt(templateHints, startPage, endPage),
|
|
4199
|
+
prompt: buildPageMapPrompt(templateHints, startPage, endPage, formInventoryHint),
|
|
3671
4200
|
schema: PageMapChunkSchema,
|
|
3672
4201
|
maxTokens: 2048,
|
|
3673
4202
|
providerOptions: { ...providerOptions, pdfBase64: pagesPdf }
|
|
@@ -3699,10 +4228,12 @@ function createExtractor(config) {
|
|
|
3699
4228
|
confidence: 0,
|
|
3700
4229
|
notes: "Full-document fallback page assignment"
|
|
3701
4230
|
}));
|
|
4231
|
+
pageAssignments = normalizePageAssignments(pageAssignments, formInventory);
|
|
3702
4232
|
await pipelineCtx.save("page_map", {
|
|
3703
4233
|
id,
|
|
3704
4234
|
pageCount,
|
|
3705
4235
|
classifyResult,
|
|
4236
|
+
formInventory,
|
|
3706
4237
|
pageAssignments,
|
|
3707
4238
|
memory: Object.fromEntries(memory)
|
|
3708
4239
|
});
|
|
@@ -3718,6 +4249,7 @@ function createExtractor(config) {
|
|
|
3718
4249
|
id,
|
|
3719
4250
|
pageCount,
|
|
3720
4251
|
classifyResult,
|
|
4252
|
+
formInventory,
|
|
3721
4253
|
pageAssignments,
|
|
3722
4254
|
plan,
|
|
3723
4255
|
memory: Object.fromEntries(memory)
|
|
@@ -3766,12 +4298,16 @@ function createExtractor(config) {
|
|
|
3766
4298
|
id,
|
|
3767
4299
|
pageCount,
|
|
3768
4300
|
classifyResult,
|
|
4301
|
+
formInventory,
|
|
3769
4302
|
pageAssignments,
|
|
3770
4303
|
plan,
|
|
3771
4304
|
memory: Object.fromEntries(memory)
|
|
3772
4305
|
});
|
|
3773
4306
|
}
|
|
4307
|
+
let reviewRounds = resumed?.reviewReport?.reviewRoundRecords ?? [];
|
|
4308
|
+
let reviewReport = resumed?.reviewReport;
|
|
3774
4309
|
if (!pipelineCtx.isPhaseComplete("review")) {
|
|
4310
|
+
reviewRounds = [];
|
|
3775
4311
|
for (let round = 0; round < maxReviewRounds; round++) {
|
|
3776
4312
|
const extractedKeys = [...memory.keys()].filter((k) => k !== "classify");
|
|
3777
4313
|
const extractionSummary = summarizeExtraction(memory);
|
|
@@ -3791,6 +4327,7 @@ function createExtractor(config) {
|
|
|
3791
4327
|
}
|
|
3792
4328
|
);
|
|
3793
4329
|
trackUsage(reviewResponse.usage);
|
|
4330
|
+
reviewRounds.push(toReviewRoundRecord(round + 1, reviewResponse.object));
|
|
3794
4331
|
if (reviewResponse.object.qualityIssues?.length) {
|
|
3795
4332
|
await log?.(`Review round ${round + 1} quality issues: ${reviewResponse.object.qualityIssues.join("; ")}`);
|
|
3796
4333
|
}
|
|
@@ -3832,23 +4369,45 @@ function createExtractor(config) {
|
|
|
3832
4369
|
}
|
|
3833
4370
|
}
|
|
3834
4371
|
}
|
|
4372
|
+
reviewReport = buildExtractionReviewReport({
|
|
4373
|
+
memory,
|
|
4374
|
+
pageAssignments,
|
|
4375
|
+
reviewRounds
|
|
4376
|
+
});
|
|
4377
|
+
if (reviewReport.issues.length > 0) {
|
|
4378
|
+
await log?.(
|
|
4379
|
+
`Deterministic review issues: ${reviewReport.issues.map((issue) => issue.message).join("; ")}`
|
|
4380
|
+
);
|
|
4381
|
+
}
|
|
4382
|
+
if (shouldFailQualityGate(qualityGate, reviewReport.qualityGateStatus)) {
|
|
4383
|
+
throw new Error("Extraction quality gate failed. See reviewReport for blocking issues.");
|
|
4384
|
+
}
|
|
3835
4385
|
await pipelineCtx.save("review", {
|
|
3836
4386
|
id,
|
|
3837
4387
|
pageCount,
|
|
3838
4388
|
classifyResult,
|
|
4389
|
+
formInventory,
|
|
3839
4390
|
pageAssignments,
|
|
3840
4391
|
plan,
|
|
4392
|
+
reviewReport,
|
|
3841
4393
|
memory: Object.fromEntries(memory)
|
|
3842
4394
|
});
|
|
3843
4395
|
}
|
|
4396
|
+
reviewReport ?? (reviewReport = buildExtractionReviewReport({
|
|
4397
|
+
memory,
|
|
4398
|
+
pageAssignments,
|
|
4399
|
+
reviewRounds
|
|
4400
|
+
}));
|
|
3844
4401
|
onProgress?.("Assembling document...");
|
|
3845
4402
|
const document = assembleDocument(id, documentType, memory);
|
|
3846
4403
|
await pipelineCtx.save("assemble", {
|
|
3847
4404
|
id,
|
|
3848
4405
|
pageCount,
|
|
3849
4406
|
classifyResult,
|
|
4407
|
+
formInventory,
|
|
3850
4408
|
pageAssignments,
|
|
3851
4409
|
plan,
|
|
4410
|
+
reviewReport,
|
|
3852
4411
|
memory: Object.fromEntries(memory),
|
|
3853
4412
|
document
|
|
3854
4413
|
});
|
|
@@ -3874,7 +4433,8 @@ function createExtractor(config) {
|
|
|
3874
4433
|
callsWithUsage,
|
|
3875
4434
|
callsMissingUsage
|
|
3876
4435
|
},
|
|
3877
|
-
checkpoint: finalCheckpoint
|
|
4436
|
+
checkpoint: finalCheckpoint,
|
|
4437
|
+
reviewReport
|
|
3878
4438
|
};
|
|
3879
4439
|
}
|
|
3880
4440
|
return { extract };
|
|
@@ -4094,8 +4654,8 @@ Respond with JSON only:
|
|
|
4094
4654
|
}`;
|
|
4095
4655
|
|
|
4096
4656
|
// src/schemas/application.ts
|
|
4097
|
-
import { z as
|
|
4098
|
-
var FieldTypeSchema =
|
|
4657
|
+
import { z as z33 } from "zod";
|
|
4658
|
+
var FieldTypeSchema = z33.enum([
|
|
4099
4659
|
"text",
|
|
4100
4660
|
"numeric",
|
|
4101
4661
|
"currency",
|
|
@@ -4104,100 +4664,131 @@ var FieldTypeSchema = z32.enum([
|
|
|
4104
4664
|
"table",
|
|
4105
4665
|
"declaration"
|
|
4106
4666
|
]);
|
|
4107
|
-
var ApplicationFieldSchema =
|
|
4108
|
-
id:
|
|
4109
|
-
label:
|
|
4110
|
-
section:
|
|
4667
|
+
var ApplicationFieldSchema = z33.object({
|
|
4668
|
+
id: z33.string(),
|
|
4669
|
+
label: z33.string(),
|
|
4670
|
+
section: z33.string(),
|
|
4111
4671
|
fieldType: FieldTypeSchema,
|
|
4112
|
-
required:
|
|
4113
|
-
options:
|
|
4114
|
-
columns:
|
|
4115
|
-
requiresExplanationIfYes:
|
|
4116
|
-
condition:
|
|
4117
|
-
dependsOn:
|
|
4118
|
-
whenValue:
|
|
4672
|
+
required: z33.boolean(),
|
|
4673
|
+
options: z33.array(z33.string()).optional(),
|
|
4674
|
+
columns: z33.array(z33.string()).optional(),
|
|
4675
|
+
requiresExplanationIfYes: z33.boolean().optional(),
|
|
4676
|
+
condition: z33.object({
|
|
4677
|
+
dependsOn: z33.string(),
|
|
4678
|
+
whenValue: z33.string()
|
|
4119
4679
|
}).optional(),
|
|
4120
|
-
value:
|
|
4121
|
-
source:
|
|
4122
|
-
confidence:
|
|
4680
|
+
value: z33.string().optional(),
|
|
4681
|
+
source: z33.string().optional().describe("Where the value came from: auto-fill, user, lookup"),
|
|
4682
|
+
confidence: z33.enum(["confirmed", "high", "medium", "low"]).optional()
|
|
4123
4683
|
});
|
|
4124
|
-
var ApplicationClassifyResultSchema =
|
|
4125
|
-
isApplication:
|
|
4126
|
-
confidence:
|
|
4127
|
-
applicationType:
|
|
4684
|
+
var ApplicationClassifyResultSchema = z33.object({
|
|
4685
|
+
isApplication: z33.boolean(),
|
|
4686
|
+
confidence: z33.number().min(0).max(1),
|
|
4687
|
+
applicationType: z33.string().nullable()
|
|
4688
|
+
});
|
|
4689
|
+
var FieldExtractionResultSchema = z33.object({
|
|
4690
|
+
fields: z33.array(ApplicationFieldSchema)
|
|
4691
|
+
});
|
|
4692
|
+
var AutoFillMatchSchema = z33.object({
|
|
4693
|
+
fieldId: z33.string(),
|
|
4694
|
+
value: z33.string(),
|
|
4695
|
+
confidence: z33.enum(["confirmed"]),
|
|
4696
|
+
contextKey: z33.string()
|
|
4697
|
+
});
|
|
4698
|
+
var AutoFillResultSchema = z33.object({
|
|
4699
|
+
matches: z33.array(AutoFillMatchSchema)
|
|
4128
4700
|
});
|
|
4129
|
-
var
|
|
4130
|
-
|
|
4701
|
+
var QuestionBatchResultSchema = z33.object({
|
|
4702
|
+
batches: z33.array(z33.array(z33.string()).describe("Array of field IDs in this batch"))
|
|
4131
4703
|
});
|
|
4132
|
-
var
|
|
4133
|
-
|
|
4134
|
-
|
|
4135
|
-
|
|
4136
|
-
|
|
4704
|
+
var LookupRequestSchema = z33.object({
|
|
4705
|
+
type: z33.string().describe("Type of lookup: 'records', 'website', 'policy'"),
|
|
4706
|
+
description: z33.string(),
|
|
4707
|
+
url: z33.string().optional(),
|
|
4708
|
+
targetFieldIds: z33.array(z33.string())
|
|
4137
4709
|
});
|
|
4138
|
-
var
|
|
4139
|
-
|
|
4710
|
+
var ReplyIntentSchema = z33.object({
|
|
4711
|
+
primaryIntent: z33.enum(["answers_only", "question", "lookup_request", "mixed"]),
|
|
4712
|
+
hasAnswers: z33.boolean(),
|
|
4713
|
+
questionText: z33.string().optional(),
|
|
4714
|
+
questionFieldIds: z33.array(z33.string()).optional(),
|
|
4715
|
+
lookupRequests: z33.array(LookupRequestSchema).optional()
|
|
4140
4716
|
});
|
|
4141
|
-
var
|
|
4142
|
-
|
|
4717
|
+
var ParsedAnswerSchema = z33.object({
|
|
4718
|
+
fieldId: z33.string(),
|
|
4719
|
+
value: z33.string(),
|
|
4720
|
+
explanation: z33.string().optional()
|
|
4143
4721
|
});
|
|
4144
|
-
var
|
|
4145
|
-
|
|
4146
|
-
|
|
4147
|
-
url: z32.string().optional(),
|
|
4148
|
-
targetFieldIds: z32.array(z32.string())
|
|
4722
|
+
var AnswerParsingResultSchema = z33.object({
|
|
4723
|
+
answers: z33.array(ParsedAnswerSchema),
|
|
4724
|
+
unanswered: z33.array(z33.string()).describe("Field IDs that were not answered")
|
|
4149
4725
|
});
|
|
4150
|
-
var
|
|
4151
|
-
|
|
4152
|
-
|
|
4153
|
-
|
|
4154
|
-
questionFieldIds: z32.array(z32.string()).optional(),
|
|
4155
|
-
lookupRequests: z32.array(LookupRequestSchema).optional()
|
|
4726
|
+
var LookupFillSchema = z33.object({
|
|
4727
|
+
fieldId: z33.string(),
|
|
4728
|
+
value: z33.string(),
|
|
4729
|
+
source: z33.string().describe("Specific citable reference, e.g. 'GL Policy #POL-12345 (Hartford)'")
|
|
4156
4730
|
});
|
|
4157
|
-
var
|
|
4158
|
-
|
|
4159
|
-
|
|
4160
|
-
explanation:
|
|
4731
|
+
var LookupFillResultSchema = z33.object({
|
|
4732
|
+
fills: z33.array(LookupFillSchema),
|
|
4733
|
+
unfillable: z33.array(z33.string()),
|
|
4734
|
+
explanation: z33.string().optional()
|
|
4161
4735
|
});
|
|
4162
|
-
var
|
|
4163
|
-
|
|
4164
|
-
|
|
4736
|
+
var FlatPdfPlacementSchema = z33.object({
|
|
4737
|
+
fieldId: z33.string(),
|
|
4738
|
+
page: z33.number(),
|
|
4739
|
+
x: z33.number().describe("Percentage from left edge (0-100)"),
|
|
4740
|
+
y: z33.number().describe("Percentage from top edge (0-100)"),
|
|
4741
|
+
text: z33.string(),
|
|
4742
|
+
fontSize: z33.number().optional(),
|
|
4743
|
+
isCheckmark: z33.boolean().optional()
|
|
4165
4744
|
});
|
|
4166
|
-
var
|
|
4167
|
-
fieldId:
|
|
4168
|
-
|
|
4169
|
-
|
|
4745
|
+
var AcroFormMappingSchema = z33.object({
|
|
4746
|
+
fieldId: z33.string(),
|
|
4747
|
+
acroFormName: z33.string(),
|
|
4748
|
+
value: z33.string()
|
|
4170
4749
|
});
|
|
4171
|
-
var
|
|
4172
|
-
|
|
4173
|
-
|
|
4174
|
-
|
|
4750
|
+
var QualityGateStatusSchema = z33.enum(["passed", "warning", "failed"]);
|
|
4751
|
+
var QualitySeveritySchema = z33.enum(["info", "warning", "blocking"]);
|
|
4752
|
+
var ApplicationQualityIssueSchema = z33.object({
|
|
4753
|
+
code: z33.string(),
|
|
4754
|
+
severity: QualitySeveritySchema,
|
|
4755
|
+
message: z33.string(),
|
|
4756
|
+
fieldId: z33.string().optional()
|
|
4175
4757
|
});
|
|
4176
|
-
var
|
|
4177
|
-
|
|
4178
|
-
|
|
4179
|
-
|
|
4180
|
-
|
|
4181
|
-
text: z32.string(),
|
|
4182
|
-
fontSize: z32.number().optional(),
|
|
4183
|
-
isCheckmark: z32.boolean().optional()
|
|
4758
|
+
var ApplicationQualityRoundSchema = z33.object({
|
|
4759
|
+
round: z33.number(),
|
|
4760
|
+
kind: z33.string(),
|
|
4761
|
+
status: QualityGateStatusSchema,
|
|
4762
|
+
summary: z33.string().optional()
|
|
4184
4763
|
});
|
|
4185
|
-
var
|
|
4186
|
-
|
|
4187
|
-
|
|
4188
|
-
|
|
4764
|
+
var ApplicationQualityArtifactSchema = z33.object({
|
|
4765
|
+
kind: z33.string(),
|
|
4766
|
+
label: z33.string().optional(),
|
|
4767
|
+
itemCount: z33.number().optional()
|
|
4189
4768
|
});
|
|
4190
|
-
var
|
|
4191
|
-
|
|
4192
|
-
|
|
4193
|
-
|
|
4194
|
-
|
|
4195
|
-
|
|
4196
|
-
|
|
4197
|
-
|
|
4198
|
-
|
|
4199
|
-
|
|
4200
|
-
|
|
4769
|
+
var ApplicationEmailReviewSchema = z33.object({
|
|
4770
|
+
issues: z33.array(ApplicationQualityIssueSchema),
|
|
4771
|
+
qualityGateStatus: QualityGateStatusSchema
|
|
4772
|
+
});
|
|
4773
|
+
var ApplicationQualityReportSchema = z33.object({
|
|
4774
|
+
issues: z33.array(ApplicationQualityIssueSchema),
|
|
4775
|
+
rounds: z33.array(ApplicationQualityRoundSchema).optional(),
|
|
4776
|
+
artifacts: z33.array(ApplicationQualityArtifactSchema).optional(),
|
|
4777
|
+
emailReview: ApplicationEmailReviewSchema.optional(),
|
|
4778
|
+
qualityGateStatus: QualityGateStatusSchema
|
|
4779
|
+
});
|
|
4780
|
+
var ApplicationStateSchema = z33.object({
|
|
4781
|
+
id: z33.string(),
|
|
4782
|
+
pdfBase64: z33.string().optional().describe("Original PDF, omitted after extraction"),
|
|
4783
|
+
title: z33.string().optional(),
|
|
4784
|
+
applicationType: z33.string().nullable().optional(),
|
|
4785
|
+
fields: z33.array(ApplicationFieldSchema),
|
|
4786
|
+
batches: z33.array(z33.array(z33.string())).optional(),
|
|
4787
|
+
currentBatchIndex: z33.number().default(0),
|
|
4788
|
+
qualityReport: ApplicationQualityReportSchema.optional(),
|
|
4789
|
+
status: z33.enum(["classifying", "extracting", "auto_filling", "batching", "collecting", "confirming", "mapping", "complete"]),
|
|
4790
|
+
createdAt: z33.number(),
|
|
4791
|
+
updatedAt: z33.number()
|
|
4201
4792
|
});
|
|
4202
4793
|
|
|
4203
4794
|
// src/application/agents/classifier.ts
|
|
@@ -4705,6 +5296,87 @@ async function generateBatchEmail(batchFields, batchIndex, totalBatches, opts, g
|
|
|
4705
5296
|
return { text, usage };
|
|
4706
5297
|
}
|
|
4707
5298
|
|
|
5299
|
+
// src/application/quality.ts
|
|
5300
|
+
function isVagueSource(source) {
|
|
5301
|
+
if (!source) return true;
|
|
5302
|
+
const normalized = source.trim().toLowerCase();
|
|
5303
|
+
return normalized === "unknown" || normalized.includes("existing records") || normalized.includes("available data") || normalized === "context" || normalized === "user provided";
|
|
5304
|
+
}
|
|
5305
|
+
function buildApplicationQualityReport(state) {
|
|
5306
|
+
const issues = [];
|
|
5307
|
+
const seenIds = /* @__PURE__ */ new Set();
|
|
5308
|
+
for (const field of state.fields) {
|
|
5309
|
+
if (seenIds.has(field.id)) {
|
|
5310
|
+
issues.push({
|
|
5311
|
+
code: "duplicate_field_id",
|
|
5312
|
+
severity: "blocking",
|
|
5313
|
+
message: `Field "${field.label}" has a duplicate id "${field.id}".`,
|
|
5314
|
+
fieldId: field.id
|
|
5315
|
+
});
|
|
5316
|
+
}
|
|
5317
|
+
seenIds.add(field.id);
|
|
5318
|
+
if (field.required && !field.value) {
|
|
5319
|
+
issues.push({
|
|
5320
|
+
code: "required_field_unfilled",
|
|
5321
|
+
severity: "warning",
|
|
5322
|
+
message: `Required field "${field.label}" is still unfilled.`,
|
|
5323
|
+
fieldId: field.id
|
|
5324
|
+
});
|
|
5325
|
+
}
|
|
5326
|
+
if (field.value && !field.source) {
|
|
5327
|
+
issues.push({
|
|
5328
|
+
code: "filled_field_missing_source",
|
|
5329
|
+
severity: "blocking",
|
|
5330
|
+
message: `Filled field "${field.label}" is missing source provenance.`,
|
|
5331
|
+
fieldId: field.id
|
|
5332
|
+
});
|
|
5333
|
+
}
|
|
5334
|
+
if (field.value && isVagueSource(field.source)) {
|
|
5335
|
+
issues.push({
|
|
5336
|
+
code: "filled_field_vague_source",
|
|
5337
|
+
severity: "warning",
|
|
5338
|
+
message: `Filled field "${field.label}" has a vague or non-citable source.`,
|
|
5339
|
+
fieldId: field.id
|
|
5340
|
+
});
|
|
5341
|
+
}
|
|
5342
|
+
if (field.value && (!field.confidence || field.confidence === "low")) {
|
|
5343
|
+
issues.push({
|
|
5344
|
+
code: "filled_field_low_confidence",
|
|
5345
|
+
severity: "warning",
|
|
5346
|
+
message: `Filled field "${field.label}" has low or missing confidence.`,
|
|
5347
|
+
fieldId: field.id
|
|
5348
|
+
});
|
|
5349
|
+
}
|
|
5350
|
+
}
|
|
5351
|
+
return {
|
|
5352
|
+
issues,
|
|
5353
|
+
rounds: [],
|
|
5354
|
+
artifacts: [
|
|
5355
|
+
{ kind: "application_fields", label: "Application Fields", itemCount: state.fields.length }
|
|
5356
|
+
],
|
|
5357
|
+
qualityGateStatus: evaluateQualityGate({ issues })
|
|
5358
|
+
};
|
|
5359
|
+
}
|
|
5360
|
+
function reviewBatchEmail(text, batchFields) {
|
|
5361
|
+
const issues = [];
|
|
5362
|
+
const normalized = text.toLowerCase();
|
|
5363
|
+
for (const field of batchFields) {
|
|
5364
|
+
const label = field.label.trim().toLowerCase();
|
|
5365
|
+
if (label.length >= 6 && !normalized.includes(label)) {
|
|
5366
|
+
issues.push({
|
|
5367
|
+
code: "email_missing_field_prompt",
|
|
5368
|
+
severity: "warning",
|
|
5369
|
+
message: `Generated email does not clearly mention field "${field.label}".`,
|
|
5370
|
+
fieldId: field.id
|
|
5371
|
+
});
|
|
5372
|
+
}
|
|
5373
|
+
}
|
|
5374
|
+
return {
|
|
5375
|
+
issues,
|
|
5376
|
+
qualityGateStatus: evaluateQualityGate({ issues })
|
|
5377
|
+
};
|
|
5378
|
+
}
|
|
5379
|
+
|
|
4708
5380
|
// src/application/coordinator.ts
|
|
4709
5381
|
function createApplicationPipeline(config) {
|
|
4710
5382
|
const {
|
|
@@ -4719,7 +5391,8 @@ function createApplicationPipeline(config) {
|
|
|
4719
5391
|
onTokenUsage,
|
|
4720
5392
|
onProgress,
|
|
4721
5393
|
log,
|
|
4722
|
-
providerOptions
|
|
5394
|
+
providerOptions,
|
|
5395
|
+
qualityGate = "warn"
|
|
4723
5396
|
} = config;
|
|
4724
5397
|
const limit = pLimit(concurrency);
|
|
4725
5398
|
let totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
@@ -4741,6 +5414,7 @@ function createApplicationPipeline(config) {
|
|
|
4741
5414
|
title: void 0,
|
|
4742
5415
|
applicationType: null,
|
|
4743
5416
|
fields: [],
|
|
5417
|
+
qualityReport: void 0,
|
|
4744
5418
|
batches: void 0,
|
|
4745
5419
|
currentBatchIndex: 0,
|
|
4746
5420
|
status: "classifying",
|
|
@@ -4765,8 +5439,9 @@ function createApplicationPipeline(config) {
|
|
|
4765
5439
|
if (!classifyResult.isApplication) {
|
|
4766
5440
|
state.status = "complete";
|
|
4767
5441
|
state.updatedAt = Date.now();
|
|
5442
|
+
state.qualityReport = buildApplicationQualityReport(state);
|
|
4768
5443
|
await applicationStore?.save(state);
|
|
4769
|
-
return { state, tokenUsage: totalUsage };
|
|
5444
|
+
return { state, tokenUsage: totalUsage, reviewReport: state.qualityReport };
|
|
4770
5445
|
}
|
|
4771
5446
|
state.applicationType = classifyResult.applicationType;
|
|
4772
5447
|
state.status = "extracting";
|
|
@@ -4790,8 +5465,9 @@ function createApplicationPipeline(config) {
|
|
|
4790
5465
|
await log?.("No fields extracted, completing pipeline with empty result");
|
|
4791
5466
|
state.status = "complete";
|
|
4792
5467
|
state.updatedAt = Date.now();
|
|
5468
|
+
state.qualityReport = buildApplicationQualityReport(state);
|
|
4793
5469
|
await applicationStore?.save(state);
|
|
4794
|
-
return { state, tokenUsage: totalUsage };
|
|
5470
|
+
return { state, tokenUsage: totalUsage, reviewReport: state.qualityReport };
|
|
4795
5471
|
}
|
|
4796
5472
|
state.fields = fields;
|
|
4797
5473
|
state.title = classifyResult.applicationType ?? void 0;
|
|
@@ -4891,11 +5567,15 @@ function createApplicationPipeline(config) {
|
|
|
4891
5567
|
} else {
|
|
4892
5568
|
state.status = "confirming";
|
|
4893
5569
|
}
|
|
5570
|
+
state.qualityReport = buildApplicationQualityReport(state);
|
|
4894
5571
|
state.updatedAt = Date.now();
|
|
4895
5572
|
await applicationStore?.save(state);
|
|
5573
|
+
if (shouldFailQualityGate(qualityGate, state.qualityReport.qualityGateStatus)) {
|
|
5574
|
+
throw new Error("Application quality gate failed. See state.qualityReport for blocking issues.");
|
|
5575
|
+
}
|
|
4896
5576
|
const filledCount = state.fields.filter((f) => f.value).length;
|
|
4897
5577
|
onProgress?.(`Application processed: ${filledCount}/${state.fields.length} fields filled, ${state.batches?.length ?? 0} batches to collect.`);
|
|
4898
|
-
return { state, tokenUsage: totalUsage };
|
|
5578
|
+
return { state, tokenUsage: totalUsage, reviewReport: state.qualityReport };
|
|
4899
5579
|
}
|
|
4900
5580
|
async function processReply(input) {
|
|
4901
5581
|
totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
@@ -5042,6 +5722,11 @@ Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with
|
|
|
5042
5722
|
providerOptions
|
|
5043
5723
|
);
|
|
5044
5724
|
trackUsage(emailUsage);
|
|
5725
|
+
const emailReview = reviewBatchEmail(emailText, nextBatchFields);
|
|
5726
|
+
state.qualityReport = {
|
|
5727
|
+
...buildApplicationQualityReport(state),
|
|
5728
|
+
emailReview
|
|
5729
|
+
};
|
|
5045
5730
|
if (!responseText) {
|
|
5046
5731
|
responseText = emailText;
|
|
5047
5732
|
} else {
|
|
@@ -5057,13 +5742,18 @@ ${emailText}`;
|
|
|
5057
5742
|
}
|
|
5058
5743
|
}
|
|
5059
5744
|
state.updatedAt = Date.now();
|
|
5745
|
+
state.qualityReport = state.qualityReport ?? buildApplicationQualityReport(state);
|
|
5060
5746
|
await applicationStore?.save(state);
|
|
5747
|
+
if (shouldFailQualityGate(qualityGate, state.qualityReport.qualityGateStatus)) {
|
|
5748
|
+
throw new Error("Application quality gate failed. See state.qualityReport for blocking issues.");
|
|
5749
|
+
}
|
|
5061
5750
|
return {
|
|
5062
5751
|
state,
|
|
5063
5752
|
intent: intent.primaryIntent,
|
|
5064
5753
|
fieldsFilled,
|
|
5065
5754
|
responseText,
|
|
5066
|
-
tokenUsage: totalUsage
|
|
5755
|
+
tokenUsage: totalUsage,
|
|
5756
|
+
reviewReport: state.qualityReport
|
|
5067
5757
|
};
|
|
5068
5758
|
}
|
|
5069
5759
|
async function generateCurrentBatchEmail(applicationId, opts) {
|
|
@@ -5089,6 +5779,12 @@ ${emailText}`;
|
|
|
5089
5779
|
providerOptions
|
|
5090
5780
|
);
|
|
5091
5781
|
trackUsage(usage);
|
|
5782
|
+
const emailReview = reviewBatchEmail(text, batchFields);
|
|
5783
|
+
state.qualityReport = {
|
|
5784
|
+
...buildApplicationQualityReport(state),
|
|
5785
|
+
emailReview
|
|
5786
|
+
};
|
|
5787
|
+
await applicationStore?.save(state);
|
|
5092
5788
|
return { text, tokenUsage: totalUsage };
|
|
5093
5789
|
}
|
|
5094
5790
|
async function getConfirmationSummary(applicationId) {
|
|
@@ -5225,73 +5921,73 @@ Respond with the final answer, deduplicated citations array, overall confidence
|
|
|
5225
5921
|
}
|
|
5226
5922
|
|
|
5227
5923
|
// src/schemas/query.ts
|
|
5228
|
-
import { z as
|
|
5229
|
-
var QueryIntentSchema =
|
|
5924
|
+
import { z as z34 } from "zod";
|
|
5925
|
+
var QueryIntentSchema = z34.enum([
|
|
5230
5926
|
"policy_question",
|
|
5231
5927
|
"coverage_comparison",
|
|
5232
5928
|
"document_search",
|
|
5233
5929
|
"claims_inquiry",
|
|
5234
5930
|
"general_knowledge"
|
|
5235
5931
|
]);
|
|
5236
|
-
var SubQuestionSchema =
|
|
5237
|
-
question:
|
|
5932
|
+
var SubQuestionSchema = z34.object({
|
|
5933
|
+
question: z34.string().describe("Atomic sub-question to retrieve and answer independently"),
|
|
5238
5934
|
intent: QueryIntentSchema,
|
|
5239
|
-
chunkTypes:
|
|
5240
|
-
documentFilters:
|
|
5241
|
-
type:
|
|
5242
|
-
carrier:
|
|
5243
|
-
insuredName:
|
|
5244
|
-
policyNumber:
|
|
5245
|
-
quoteNumber:
|
|
5935
|
+
chunkTypes: z34.array(z34.string()).optional().describe("Chunk types to filter retrieval (e.g. coverage, endorsement, declaration)"),
|
|
5936
|
+
documentFilters: z34.object({
|
|
5937
|
+
type: z34.enum(["policy", "quote"]).optional(),
|
|
5938
|
+
carrier: z34.string().optional(),
|
|
5939
|
+
insuredName: z34.string().optional(),
|
|
5940
|
+
policyNumber: z34.string().optional(),
|
|
5941
|
+
quoteNumber: z34.string().optional()
|
|
5246
5942
|
}).optional().describe("Structured filters to narrow document lookup")
|
|
5247
5943
|
});
|
|
5248
|
-
var QueryClassifyResultSchema =
|
|
5944
|
+
var QueryClassifyResultSchema = z34.object({
|
|
5249
5945
|
intent: QueryIntentSchema,
|
|
5250
|
-
subQuestions:
|
|
5251
|
-
requiresDocumentLookup:
|
|
5252
|
-
requiresChunkSearch:
|
|
5253
|
-
requiresConversationHistory:
|
|
5946
|
+
subQuestions: z34.array(SubQuestionSchema).min(1).describe("Decomposed atomic sub-questions"),
|
|
5947
|
+
requiresDocumentLookup: z34.boolean().describe("Whether structured document lookup is needed"),
|
|
5948
|
+
requiresChunkSearch: z34.boolean().describe("Whether semantic chunk search is needed"),
|
|
5949
|
+
requiresConversationHistory: z34.boolean().describe("Whether conversation history is relevant")
|
|
5254
5950
|
});
|
|
5255
|
-
var EvidenceItemSchema =
|
|
5256
|
-
source:
|
|
5257
|
-
chunkId:
|
|
5258
|
-
documentId:
|
|
5259
|
-
turnId:
|
|
5260
|
-
text:
|
|
5261
|
-
relevance:
|
|
5262
|
-
metadata:
|
|
5951
|
+
var EvidenceItemSchema = z34.object({
|
|
5952
|
+
source: z34.enum(["chunk", "document", "conversation"]),
|
|
5953
|
+
chunkId: z34.string().optional(),
|
|
5954
|
+
documentId: z34.string().optional(),
|
|
5955
|
+
turnId: z34.string().optional(),
|
|
5956
|
+
text: z34.string().describe("Text excerpt from the source"),
|
|
5957
|
+
relevance: z34.number().min(0).max(1),
|
|
5958
|
+
metadata: z34.array(z34.object({ key: z34.string(), value: z34.string() })).optional()
|
|
5263
5959
|
});
|
|
5264
|
-
var RetrievalResultSchema =
|
|
5265
|
-
subQuestion:
|
|
5266
|
-
evidence:
|
|
5960
|
+
var RetrievalResultSchema = z34.object({
|
|
5961
|
+
subQuestion: z34.string(),
|
|
5962
|
+
evidence: z34.array(EvidenceItemSchema)
|
|
5267
5963
|
});
|
|
5268
|
-
var CitationSchema =
|
|
5269
|
-
index:
|
|
5270
|
-
chunkId:
|
|
5271
|
-
documentId:
|
|
5272
|
-
documentType:
|
|
5273
|
-
field:
|
|
5274
|
-
quote:
|
|
5275
|
-
relevance:
|
|
5964
|
+
var CitationSchema = z34.object({
|
|
5965
|
+
index: z34.number().describe("Citation number [1], [2], etc."),
|
|
5966
|
+
chunkId: z34.string().describe("Source chunk ID, e.g. doc-123:coverage:2"),
|
|
5967
|
+
documentId: z34.string(),
|
|
5968
|
+
documentType: z34.enum(["policy", "quote"]).optional(),
|
|
5969
|
+
field: z34.string().optional().describe("Specific field path, e.g. coverages[0].deductible"),
|
|
5970
|
+
quote: z34.string().describe("Exact text from source that supports the claim"),
|
|
5971
|
+
relevance: z34.number().min(0).max(1)
|
|
5276
5972
|
});
|
|
5277
|
-
var SubAnswerSchema =
|
|
5278
|
-
subQuestion:
|
|
5279
|
-
answer:
|
|
5280
|
-
citations:
|
|
5281
|
-
confidence:
|
|
5282
|
-
needsMoreContext:
|
|
5973
|
+
var SubAnswerSchema = z34.object({
|
|
5974
|
+
subQuestion: z34.string(),
|
|
5975
|
+
answer: z34.string(),
|
|
5976
|
+
citations: z34.array(CitationSchema),
|
|
5977
|
+
confidence: z34.number().min(0).max(1),
|
|
5978
|
+
needsMoreContext: z34.boolean().describe("True if evidence was insufficient to answer fully")
|
|
5283
5979
|
});
|
|
5284
|
-
var VerifyResultSchema =
|
|
5285
|
-
approved:
|
|
5286
|
-
issues:
|
|
5287
|
-
retrySubQuestions:
|
|
5980
|
+
var VerifyResultSchema = z34.object({
|
|
5981
|
+
approved: z34.boolean().describe("Whether all sub-answers are adequately grounded"),
|
|
5982
|
+
issues: z34.array(z34.string()).describe("Specific grounding or consistency issues found"),
|
|
5983
|
+
retrySubQuestions: z34.array(z34.string()).optional().describe("Sub-questions that need additional retrieval or re-reasoning")
|
|
5288
5984
|
});
|
|
5289
|
-
var QueryResultSchema =
|
|
5290
|
-
answer:
|
|
5291
|
-
citations:
|
|
5985
|
+
var QueryResultSchema = z34.object({
|
|
5986
|
+
answer: z34.string(),
|
|
5987
|
+
citations: z34.array(CitationSchema),
|
|
5292
5988
|
intent: QueryIntentSchema,
|
|
5293
|
-
confidence:
|
|
5294
|
-
followUp:
|
|
5989
|
+
confidence: z34.number().min(0).max(1),
|
|
5990
|
+
followUp: z34.string().optional().describe("Suggested follow-up question if applicable")
|
|
5295
5991
|
});
|
|
5296
5992
|
|
|
5297
5993
|
// src/query/retriever.ts
|
|
@@ -5579,6 +6275,112 @@ async function verify(originalQuestion, subAnswers, allEvidence, config) {
|
|
|
5579
6275
|
return { result: object, usage };
|
|
5580
6276
|
}
|
|
5581
6277
|
|
|
6278
|
+
// src/query/quality.ts
|
|
6279
|
+
function sourceIdForEvidence(evidence) {
|
|
6280
|
+
return evidence.chunkId ?? evidence.documentId ?? evidence.turnId;
|
|
6281
|
+
}
|
|
6282
|
+
function citationSourceId(citation) {
|
|
6283
|
+
return citation.chunkId || citation.documentId;
|
|
6284
|
+
}
|
|
6285
|
+
function buildQueryReviewReport(params) {
|
|
6286
|
+
const { subAnswers, evidence, finalResult, verifyRounds } = params;
|
|
6287
|
+
const issues = [];
|
|
6288
|
+
const evidenceBySource = /* @__PURE__ */ new Map();
|
|
6289
|
+
for (const item of evidence) {
|
|
6290
|
+
const sourceId = sourceIdForEvidence(item);
|
|
6291
|
+
if (!sourceId) continue;
|
|
6292
|
+
evidenceBySource.set(sourceId, [...evidenceBySource.get(sourceId) ?? [], item]);
|
|
6293
|
+
}
|
|
6294
|
+
for (const subAnswer of subAnswers) {
|
|
6295
|
+
if (!subAnswer.needsMoreContext && subAnswer.citations.length === 0) {
|
|
6296
|
+
issues.push({
|
|
6297
|
+
code: "subanswer_missing_citations",
|
|
6298
|
+
severity: "blocking",
|
|
6299
|
+
message: `Sub-answer "${subAnswer.subQuestion}" has no citations despite claiming an answer.`,
|
|
6300
|
+
subQuestion: subAnswer.subQuestion
|
|
6301
|
+
});
|
|
6302
|
+
}
|
|
6303
|
+
if (subAnswer.confidence >= 0.85 && subAnswer.citations.length === 0) {
|
|
6304
|
+
issues.push({
|
|
6305
|
+
code: "subanswer_high_confidence_without_citations",
|
|
6306
|
+
severity: "blocking",
|
|
6307
|
+
message: `Sub-answer "${subAnswer.subQuestion}" has high confidence without citations.`,
|
|
6308
|
+
subQuestion: subAnswer.subQuestion
|
|
6309
|
+
});
|
|
6310
|
+
}
|
|
6311
|
+
for (const citation of subAnswer.citations) {
|
|
6312
|
+
const sourceId = citationSourceId(citation);
|
|
6313
|
+
const supportedEvidence = sourceId ? evidenceBySource.get(sourceId) ?? [] : [];
|
|
6314
|
+
if (!sourceId || supportedEvidence.length === 0) {
|
|
6315
|
+
issues.push({
|
|
6316
|
+
code: "citation_missing_from_evidence",
|
|
6317
|
+
severity: "blocking",
|
|
6318
|
+
message: `Citation [${citation.index}] in "${subAnswer.subQuestion}" does not map to retrieved evidence.`,
|
|
6319
|
+
subQuestion: subAnswer.subQuestion,
|
|
6320
|
+
citationIndex: citation.index,
|
|
6321
|
+
sourceId
|
|
6322
|
+
});
|
|
6323
|
+
continue;
|
|
6324
|
+
}
|
|
6325
|
+
const quoteFound = supportedEvidence.some((item) => item.text.includes(citation.quote));
|
|
6326
|
+
if (!quoteFound) {
|
|
6327
|
+
issues.push({
|
|
6328
|
+
code: "citation_quote_not_in_evidence",
|
|
6329
|
+
severity: "warning",
|
|
6330
|
+
message: `Citation [${citation.index}] quote in "${subAnswer.subQuestion}" was not found verbatim in retrieved evidence.`,
|
|
6331
|
+
subQuestion: subAnswer.subQuestion,
|
|
6332
|
+
citationIndex: citation.index,
|
|
6333
|
+
sourceId
|
|
6334
|
+
});
|
|
6335
|
+
}
|
|
6336
|
+
}
|
|
6337
|
+
}
|
|
6338
|
+
if (finalResult) {
|
|
6339
|
+
if (finalResult.answer.trim().length > 0 && finalResult.citations.length === 0 && finalResult.confidence > 0.4) {
|
|
6340
|
+
issues.push({
|
|
6341
|
+
code: "final_answer_missing_citations",
|
|
6342
|
+
severity: "blocking",
|
|
6343
|
+
message: "Final answer has non-trivial confidence but no citations."
|
|
6344
|
+
});
|
|
6345
|
+
}
|
|
6346
|
+
const knownCitationIds = new Set(
|
|
6347
|
+
subAnswers.flatMap((sa) => sa.citations.map((citation) => `${citation.index}|${citation.chunkId}|${citation.documentId}`))
|
|
6348
|
+
);
|
|
6349
|
+
for (const citation of finalResult.citations) {
|
|
6350
|
+
const key = `${citation.index}|${citation.chunkId}|${citation.documentId}`;
|
|
6351
|
+
if (!knownCitationIds.has(key)) {
|
|
6352
|
+
issues.push({
|
|
6353
|
+
code: "final_answer_unknown_citation",
|
|
6354
|
+
severity: "warning",
|
|
6355
|
+
message: `Final answer citation [${citation.index}] was not present in verified sub-answers.`,
|
|
6356
|
+
citationIndex: citation.index,
|
|
6357
|
+
sourceId: citationSourceId(citation)
|
|
6358
|
+
});
|
|
6359
|
+
}
|
|
6360
|
+
}
|
|
6361
|
+
}
|
|
6362
|
+
const rounds = verifyRounds.map((round) => ({
|
|
6363
|
+
round: round.round,
|
|
6364
|
+
kind: "verification",
|
|
6365
|
+
status: round.approved && round.issues.length === 0 ? "passed" : "warning",
|
|
6366
|
+
summary: round.issues[0] ?? (round.approved ? "Verification passed." : "Verification requested retry.")
|
|
6367
|
+
}));
|
|
6368
|
+
const artifacts = [
|
|
6369
|
+
{ kind: "evidence", label: "Retrieved Evidence", itemCount: evidence.length },
|
|
6370
|
+
{ kind: "sub_answers", label: "Sub Answers", itemCount: subAnswers.length }
|
|
6371
|
+
];
|
|
6372
|
+
return {
|
|
6373
|
+
issues,
|
|
6374
|
+
rounds,
|
|
6375
|
+
artifacts,
|
|
6376
|
+
verifyRounds,
|
|
6377
|
+
qualityGateStatus: evaluateQualityGate({
|
|
6378
|
+
issues,
|
|
6379
|
+
hasRoundWarnings: verifyRounds.some((round) => !round.approved || round.issues.length > 0)
|
|
6380
|
+
})
|
|
6381
|
+
};
|
|
6382
|
+
}
|
|
6383
|
+
|
|
5582
6384
|
// src/query/coordinator.ts
|
|
5583
6385
|
function createQueryAgent(config) {
|
|
5584
6386
|
const {
|
|
@@ -5592,7 +6394,8 @@ function createQueryAgent(config) {
|
|
|
5592
6394
|
onTokenUsage,
|
|
5593
6395
|
onProgress,
|
|
5594
6396
|
log,
|
|
5595
|
-
providerOptions
|
|
6397
|
+
providerOptions,
|
|
6398
|
+
qualityGate = "warn"
|
|
5596
6399
|
} = config;
|
|
5597
6400
|
const limit = pLimit(concurrency);
|
|
5598
6401
|
let totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
@@ -5661,6 +6464,7 @@ function createQueryAgent(config) {
|
|
|
5661
6464
|
await pipelineCtx.save("reason", { classification, evidence: allEvidence, subAnswers });
|
|
5662
6465
|
onProgress?.("Verifying answer grounding...");
|
|
5663
6466
|
const verifierConfig = { generateObject, providerOptions };
|
|
6467
|
+
const verifyRounds = [];
|
|
5664
6468
|
for (let round = 0; round < maxVerifyRounds; round++) {
|
|
5665
6469
|
const { result: verifyResult, usage } = await safeVerify(
|
|
5666
6470
|
question,
|
|
@@ -5669,6 +6473,12 @@ function createQueryAgent(config) {
|
|
|
5669
6473
|
verifierConfig
|
|
5670
6474
|
);
|
|
5671
6475
|
trackUsage(usage);
|
|
6476
|
+
verifyRounds.push({
|
|
6477
|
+
round: round + 1,
|
|
6478
|
+
approved: verifyResult.approved,
|
|
6479
|
+
issues: verifyResult.issues,
|
|
6480
|
+
retrySubQuestions: verifyResult.retrySubQuestions
|
|
6481
|
+
});
|
|
5672
6482
|
if (verifyResult.approved) {
|
|
5673
6483
|
onProgress?.("Verification passed.");
|
|
5674
6484
|
break;
|
|
@@ -5726,6 +6536,24 @@ function createQueryAgent(config) {
|
|
|
5726
6536
|
classification,
|
|
5727
6537
|
context?.platform
|
|
5728
6538
|
);
|
|
6539
|
+
const reviewReport = buildQueryReviewReport({
|
|
6540
|
+
subAnswers,
|
|
6541
|
+
evidence: allEvidence,
|
|
6542
|
+
finalResult: queryResult,
|
|
6543
|
+
verifyRounds
|
|
6544
|
+
});
|
|
6545
|
+
await pipelineCtx.save("review", {
|
|
6546
|
+
classification,
|
|
6547
|
+
evidence: allEvidence,
|
|
6548
|
+
subAnswers,
|
|
6549
|
+
reviewReport
|
|
6550
|
+
});
|
|
6551
|
+
if (reviewReport.issues.length > 0) {
|
|
6552
|
+
await log?.(`Query deterministic review issues: ${reviewReport.issues.map((issue) => issue.message).join("; ")}`);
|
|
6553
|
+
}
|
|
6554
|
+
if (shouldFailQualityGate(qualityGate, reviewReport.qualityGateStatus)) {
|
|
6555
|
+
throw new Error("Query quality gate failed. See reviewReport for blocking issues.");
|
|
6556
|
+
}
|
|
5729
6557
|
if (conversationId) {
|
|
5730
6558
|
try {
|
|
5731
6559
|
await memoryStore.addTurn({
|
|
@@ -5746,7 +6574,7 @@ function createQueryAgent(config) {
|
|
|
5746
6574
|
await log?.(`Failed to store conversation turn: ${e}`);
|
|
5747
6575
|
}
|
|
5748
6576
|
}
|
|
5749
|
-
return { ...queryResult, tokenUsage: totalUsage };
|
|
6577
|
+
return { ...queryResult, tokenUsage: totalUsage, reviewReport };
|
|
5750
6578
|
}
|
|
5751
6579
|
async function classify(question, conversationId) {
|
|
5752
6580
|
let conversationContext;
|
|
@@ -5967,7 +6795,12 @@ export {
|
|
|
5967
6795
|
AdmittedStatusSchema,
|
|
5968
6796
|
AnswerParsingResultSchema,
|
|
5969
6797
|
ApplicationClassifyResultSchema,
|
|
6798
|
+
ApplicationEmailReviewSchema,
|
|
5970
6799
|
ApplicationFieldSchema,
|
|
6800
|
+
ApplicationQualityArtifactSchema,
|
|
6801
|
+
ApplicationQualityIssueSchema,
|
|
6802
|
+
ApplicationQualityReportSchema,
|
|
6803
|
+
ApplicationQualityRoundSchema,
|
|
5971
6804
|
ApplicationStateSchema,
|
|
5972
6805
|
AuditTypeSchema,
|
|
5973
6806
|
AutoFillMatchSchema,
|
|
@@ -5999,6 +6832,7 @@ export {
|
|
|
5999
6832
|
CoverageFormSchema,
|
|
6000
6833
|
CoverageSchema,
|
|
6001
6834
|
CoverageTriggerSchema,
|
|
6835
|
+
CoverageValueTypeSchema,
|
|
6002
6836
|
CrimeDeclarationsSchema,
|
|
6003
6837
|
CyberDeclarationsSchema,
|
|
6004
6838
|
DEDUCTIBLE_TYPES,
|