@claritylabs/cl-sdk 0.8.1 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -489,7 +489,9 @@ var FormReferenceSchema = z3.object({
489
489
  formNumber: z3.string(),
490
490
  editionDate: z3.string().optional(),
491
491
  title: z3.string().optional(),
492
- formType: z3.enum(["coverage", "endorsement", "declarations", "application", "notice", "other"])
492
+ formType: z3.enum(["coverage", "endorsement", "declarations", "application", "notice", "other"]),
493
+ pageStart: z3.number().optional(),
494
+ pageEnd: z3.number().optional()
493
495
  });
494
496
  var TaxFeeItemSchema = z3.object({
495
497
  name: z3.string(),
@@ -526,12 +528,25 @@ var NamedInsuredSchema = z3.object({
526
528
 
527
529
  // src/schemas/coverage.ts
528
530
  import { z as z4 } from "zod";
531
+ var CoverageValueTypeSchema = z4.enum([
532
+ "numeric",
533
+ "included",
534
+ "not_included",
535
+ "as_stated",
536
+ "waiting_period",
537
+ "referential",
538
+ "other"
539
+ ]);
529
540
  var CoverageSchema = z4.object({
530
541
  name: z4.string(),
531
542
  limit: z4.string(),
543
+ limitValueType: CoverageValueTypeSchema.optional(),
532
544
  deductible: z4.string().optional(),
545
+ deductibleValueType: CoverageValueTypeSchema.optional(),
546
+ formNumber: z4.string().optional(),
533
547
  pageNumber: z4.number().optional(),
534
- sectionRef: z4.string().optional()
548
+ sectionRef: z4.string().optional(),
549
+ originalContent: z4.string().optional()
535
550
  });
536
551
  var EnrichedCoverageSchema = z4.object({
537
552
  name: z4.string(),
@@ -540,8 +555,10 @@ var EnrichedCoverageSchema = z4.object({
540
555
  formEditionDate: z4.string().optional(),
541
556
  limit: z4.string(),
542
557
  limitType: LimitTypeSchema.optional(),
558
+ limitValueType: CoverageValueTypeSchema.optional(),
543
559
  deductible: z4.string().optional(),
544
560
  deductibleType: DeductibleTypeSchema.optional(),
561
+ deductibleValueType: CoverageValueTypeSchema.optional(),
545
562
  sir: z4.string().optional(),
546
563
  sublimit: z4.string().optional(),
547
564
  coinsurance: z4.string().optional(),
@@ -552,7 +569,8 @@ var EnrichedCoverageSchema = z4.object({
552
569
  included: z4.boolean(),
553
570
  premium: z4.string().optional(),
554
571
  pageNumber: z4.number().optional(),
555
- sectionRef: z4.string().optional()
572
+ sectionRef: z4.string().optional(),
573
+ originalContent: z4.string().optional()
556
574
  });
557
575
 
558
576
  // src/schemas/endorsement.ts
@@ -1569,6 +1587,7 @@ function assembleDocument(documentId, documentType, memory) {
1569
1587
  const lossHistory = memory.get("loss_history");
1570
1588
  const sections = memory.get("sections");
1571
1589
  const supplementary = memory.get("supplementary");
1590
+ const formInventory = memory.get("form_inventory");
1572
1591
  const classify = memory.get("classify");
1573
1592
  const base = {
1574
1593
  id: documentId,
@@ -1585,6 +1604,7 @@ function assembleDocument(documentId, documentType, memory) {
1585
1604
  exclusions: exclusions?.exclusions,
1586
1605
  conditions: conditions?.conditions,
1587
1606
  sections: sections?.sections,
1607
+ formInventory: formInventory?.forms,
1588
1608
  declarations: declarations ? sanitizeNulls(declarations) : void 0,
1589
1609
  ...sanitizeNulls(lossHistory ?? {})
1590
1610
  };
@@ -1826,6 +1846,11 @@ async function formatDocumentContent(doc, generateText, options) {
1826
1846
  function chunkDocument(doc) {
1827
1847
  const chunks = [];
1828
1848
  const docId = doc.id;
1849
+ function stringMetadata(entries) {
1850
+ return Object.fromEntries(
1851
+ Object.entries(entries).filter(([, value]) => value !== void 0 && value !== null && String(value).length > 0).map(([key, value]) => [key, String(value)])
1852
+ );
1853
+ }
1829
1854
  chunks.push({
1830
1855
  id: `${docId}:carrier_info:0`,
1831
1856
  documentId: docId,
@@ -1837,7 +1862,7 @@ function chunkDocument(doc) {
1837
1862
  doc.carrierAmBestRating ? `AM Best: ${doc.carrierAmBestRating}` : null,
1838
1863
  doc.mga ? `MGA: ${doc.mga}` : null
1839
1864
  ].filter(Boolean).join("\n"),
1840
- metadata: { carrier: doc.carrier, documentType: doc.type }
1865
+ metadata: stringMetadata({ carrier: doc.carrier, documentType: doc.type })
1841
1866
  });
1842
1867
  chunks.push({
1843
1868
  id: `${docId}:named_insured:0`,
@@ -1849,17 +1874,32 @@ function chunkDocument(doc) {
1849
1874
  doc.insuredFein ? `FEIN: ${doc.insuredFein}` : null,
1850
1875
  doc.insuredAddress ? `Address: ${doc.insuredAddress.street1}, ${doc.insuredAddress.city}, ${doc.insuredAddress.state} ${doc.insuredAddress.zip}` : null
1851
1876
  ].filter(Boolean).join("\n"),
1852
- metadata: { insuredName: doc.insuredName, documentType: doc.type }
1877
+ metadata: stringMetadata({ insuredName: doc.insuredName, documentType: doc.type })
1853
1878
  });
1854
1879
  doc.coverages.forEach((cov, i) => {
1855
1880
  chunks.push({
1856
1881
  id: `${docId}:coverage:${i}`,
1857
1882
  documentId: docId,
1858
1883
  type: "coverage",
1859
- text: `Coverage: ${cov.name}
1860
- Limit: ${cov.limit}${cov.deductible ? `
1861
- Deductible: ${cov.deductible}` : ""}`,
1862
- metadata: { coverageName: cov.name, limit: cov.limit, documentType: doc.type }
1884
+ text: [
1885
+ `Coverage: ${cov.name}`,
1886
+ `Limit: ${cov.limit}`,
1887
+ cov.limitValueType ? `Limit Type: ${cov.limitValueType}` : null,
1888
+ cov.deductible ? `Deductible: ${cov.deductible}` : null,
1889
+ cov.deductibleValueType ? `Deductible Type: ${cov.deductibleValueType}` : null,
1890
+ cov.originalContent ? `Source: ${cov.originalContent}` : null
1891
+ ].filter(Boolean).join("\n"),
1892
+ metadata: stringMetadata({
1893
+ coverageName: cov.name,
1894
+ limit: cov.limit,
1895
+ limitValueType: cov.limitValueType,
1896
+ deductible: cov.deductible,
1897
+ deductibleValueType: cov.deductibleValueType,
1898
+ formNumber: cov.formNumber,
1899
+ pageNumber: cov.pageNumber,
1900
+ sectionRef: cov.sectionRef,
1901
+ documentType: doc.type
1902
+ })
1863
1903
  });
1864
1904
  });
1865
1905
  doc.endorsements?.forEach((end, i) => {
@@ -1869,7 +1909,13 @@ Deductible: ${cov.deductible}` : ""}`,
1869
1909
  type: "endorsement",
1870
1910
  text: `Endorsement: ${end.title}
1871
1911
  ${end.content}`.trim(),
1872
- metadata: { endorsementType: end.endorsementType, formNumber: end.formNumber, documentType: doc.type }
1912
+ metadata: stringMetadata({
1913
+ endorsementType: end.endorsementType,
1914
+ formNumber: end.formNumber,
1915
+ pageStart: end.pageStart,
1916
+ pageEnd: end.pageEnd,
1917
+ documentType: doc.type
1918
+ })
1873
1919
  });
1874
1920
  });
1875
1921
  doc.exclusions?.forEach((exc, i) => {
@@ -1879,7 +1925,7 @@ ${end.content}`.trim(),
1879
1925
  type: "exclusion",
1880
1926
  text: `Exclusion: ${exc.name}
1881
1927
  ${exc.content}`.trim(),
1882
- metadata: { documentType: doc.type }
1928
+ metadata: stringMetadata({ formNumber: exc.formNumber, pageNumber: exc.pageNumber, documentType: doc.type })
1883
1929
  });
1884
1930
  });
1885
1931
  doc.sections?.forEach((sec, i) => {
@@ -1889,7 +1935,7 @@ ${exc.content}`.trim(),
1889
1935
  type: "section",
1890
1936
  text: `Section: ${sec.title}
1891
1937
  ${sec.content}`,
1892
- metadata: { sectionType: sec.type, documentType: doc.type }
1938
+ metadata: stringMetadata({ sectionType: sec.type, pageStart: sec.pageStart, pageEnd: sec.pageEnd, documentType: doc.type })
1893
1939
  });
1894
1940
  });
1895
1941
  if (doc.premium) {
@@ -1899,12 +1945,138 @@ ${sec.content}`,
1899
1945
  type: "premium",
1900
1946
  text: `Premium: ${doc.premium}${doc.totalCost ? `
1901
1947
  Total Cost: ${doc.totalCost}` : ""}`,
1902
- metadata: { premium: doc.premium, documentType: doc.type }
1948
+ metadata: stringMetadata({ premium: doc.premium, documentType: doc.type })
1903
1949
  });
1904
1950
  }
1905
1951
  return chunks;
1906
1952
  }
1907
1953
 
1954
+ // src/extraction/merge.ts
1955
+ function isPresent(value) {
1956
+ if (value === void 0 || value === null) return false;
1957
+ if (typeof value === "string") return value.trim().length > 0;
1958
+ if (Array.isArray(value)) return value.length > 0;
1959
+ return true;
1960
+ }
1961
+ function dedupeByKey(items, keyFn) {
1962
+ const seen = /* @__PURE__ */ new Set();
1963
+ const merged = [];
1964
+ for (const item of items) {
1965
+ const key = keyFn(item);
1966
+ if (seen.has(key)) continue;
1967
+ seen.add(key);
1968
+ merged.push(item);
1969
+ }
1970
+ return merged;
1971
+ }
1972
+ function mergeUniqueObjects(existing, incoming, keyFn) {
1973
+ return dedupeByKey([...existing, ...incoming], keyFn);
1974
+ }
1975
+ function mergeShallowPreferPresent(existing, incoming) {
1976
+ const merged = { ...existing };
1977
+ for (const [key, value] of Object.entries(incoming)) {
1978
+ const current = merged[key];
1979
+ if (Array.isArray(current) && Array.isArray(value)) {
1980
+ merged[key] = [...current, ...value];
1981
+ continue;
1982
+ }
1983
+ if (current && value && typeof current === "object" && typeof value === "object" && !Array.isArray(current) && !Array.isArray(value)) {
1984
+ merged[key] = mergeShallowPreferPresent(
1985
+ current,
1986
+ value
1987
+ );
1988
+ continue;
1989
+ }
1990
+ if (!isPresent(current) && isPresent(value)) {
1991
+ merged[key] = value;
1992
+ }
1993
+ }
1994
+ return merged;
1995
+ }
1996
+ function mergeCoverageLimits(existing, incoming) {
1997
+ const merged = mergeShallowPreferPresent(existing, incoming);
1998
+ const existingCoverages = Array.isArray(existing.coverages) ? existing.coverages : [];
1999
+ const incomingCoverages = Array.isArray(incoming.coverages) ? incoming.coverages : [];
2000
+ const coverageKey = (coverage) => [
2001
+ String(coverage.name ?? "").toLowerCase(),
2002
+ String(coverage.limit ?? "").toLowerCase(),
2003
+ String(coverage.deductible ?? "").toLowerCase(),
2004
+ String(coverage.formNumber ?? "").toLowerCase()
2005
+ ].join("|");
2006
+ const byKey = /* @__PURE__ */ new Map();
2007
+ for (const coverage of [...existingCoverages, ...incomingCoverages]) {
2008
+ const key = coverageKey(coverage);
2009
+ const current = byKey.get(key);
2010
+ byKey.set(key, current ? mergeShallowPreferPresent(current, coverage) : coverage);
2011
+ }
2012
+ merged.coverages = [...byKey.values()];
2013
+ return merged;
2014
+ }
2015
+ function mergeDeclarations(existing, incoming) {
2016
+ const merged = mergeShallowPreferPresent(existing, incoming);
2017
+ const existingFields = Array.isArray(existing.fields) ? existing.fields : [];
2018
+ const incomingFields = Array.isArray(incoming.fields) ? incoming.fields : [];
2019
+ merged.fields = mergeUniqueObjects(existingFields, incomingFields, (field) => [
2020
+ String(field.field ?? "").toLowerCase(),
2021
+ String(field.value ?? "").toLowerCase(),
2022
+ String(field.section ?? "").toLowerCase()
2023
+ ].join("|"));
2024
+ return merged;
2025
+ }
2026
+ function mergeArrayPayload(existing, incoming, arrayKey, keyFn) {
2027
+ const merged = mergeShallowPreferPresent(existing, incoming);
2028
+ const existingItems = Array.isArray(existing[arrayKey]) ? existing[arrayKey] : [];
2029
+ const incomingItems = Array.isArray(incoming[arrayKey]) ? incoming[arrayKey] : [];
2030
+ merged[arrayKey] = mergeUniqueObjects(existingItems, incomingItems, keyFn);
2031
+ return merged;
2032
+ }
2033
+ function mergeExtractorResult(extractorName, existing, incoming) {
2034
+ if (!existing) return incoming;
2035
+ if (!incoming) return existing;
2036
+ if (typeof existing !== "object" || typeof incoming !== "object") return incoming;
2037
+ const current = existing;
2038
+ const next = incoming;
2039
+ switch (extractorName) {
2040
+ case "carrier_info":
2041
+ case "named_insured":
2042
+ case "loss_history":
2043
+ case "supplementary":
2044
+ case "premium_breakdown":
2045
+ return mergeShallowPreferPresent(current, next);
2046
+ case "coverage_limits":
2047
+ return mergeCoverageLimits(current, next);
2048
+ case "declarations":
2049
+ return mergeDeclarations(current, next);
2050
+ case "endorsements":
2051
+ return mergeArrayPayload(current, next, "endorsements", (item) => [
2052
+ String(item.formNumber ?? "").toLowerCase(),
2053
+ String(item.title ?? "").toLowerCase(),
2054
+ String(item.pageStart ?? "")
2055
+ ].join("|"));
2056
+ case "exclusions":
2057
+ return mergeArrayPayload(current, next, "exclusions", (item) => [
2058
+ String(item.name ?? "").toLowerCase(),
2059
+ String(item.formNumber ?? "").toLowerCase(),
2060
+ String(item.pageNumber ?? "")
2061
+ ].join("|"));
2062
+ case "conditions":
2063
+ return mergeArrayPayload(current, next, "conditions", (item) => [
2064
+ String(item.name ?? "").toLowerCase(),
2065
+ String(item.conditionType ?? "").toLowerCase(),
2066
+ String(item.pageNumber ?? "")
2067
+ ].join("|"));
2068
+ case "sections":
2069
+ return mergeArrayPayload(current, next, "sections", (item) => [
2070
+ String(item.title ?? "").toLowerCase(),
2071
+ String(item.type ?? "").toLowerCase(),
2072
+ String(item.pageStart ?? ""),
2073
+ String(item.pageEnd ?? "")
2074
+ ].join("|"));
2075
+ default:
2076
+ return mergeShallowPreferPresent(current, next);
2077
+ }
2078
+ }
2079
+
1908
2080
  // src/prompts/templates/homeowners.ts
1909
2081
  var HOMEOWNERS_TEMPLATE = {
1910
2082
  type: "homeowners",
@@ -2694,74 +2866,156 @@ Return JSON only:
2694
2866
  }`;
2695
2867
  }
2696
2868
 
2697
- // src/prompts/coordinator/plan.ts
2869
+ // src/prompts/coordinator/form-inventory.ts
2698
2870
  import { z as z19 } from "zod";
2699
- var ExtractionTaskSchema = z19.object({
2700
- extractorName: z19.string(),
2701
- startPage: z19.number(),
2702
- endPage: z19.number(),
2703
- description: z19.string()
2871
+ var FormInventoryEntrySchema = FormReferenceSchema.extend({
2872
+ formNumber: FormReferenceSchema.shape.formNumber.describe("Form number or identifier, e.g. PR5070CF"),
2873
+ pageStart: FormReferenceSchema.shape.pageStart.describe("Original document page where the form begins"),
2874
+ pageEnd: FormReferenceSchema.shape.pageEnd.describe("Original document page where the form ends")
2704
2875
  });
2705
- var PageMapEntrySchema = z19.object({
2706
- section: z19.string(),
2707
- pages: z19.string()
2876
+ var FormInventorySchema = z19.object({
2877
+ forms: z19.array(FormInventoryEntrySchema)
2708
2878
  });
2709
- var ExtractionPlanSchema = z19.object({
2710
- tasks: z19.array(ExtractionTaskSchema),
2711
- pageMap: z19.array(PageMapEntrySchema).optional()
2712
- });
2713
- function buildPlanPrompt(templateHints) {
2714
- return `You are planning the extraction of an insurance document. You have already classified this document. Now scan the full document and create a page map + extraction plan.
2879
+ function buildFormInventoryPrompt(templateHints) {
2880
+ return `You are building a form inventory for an insurance document.
2715
2881
 
2716
2882
  DOCUMENT TYPE HINTS:
2717
2883
  ${templateHints}
2718
2884
 
2719
- For each section of the document, decide which extractor should handle it and which pages to send.
2885
+ Extract every distinct declarations page set, policy form, coverage form, endorsement, application form, and notice form that appears in the document.
2886
+
2887
+ For EACH form, extract:
2888
+ - formNumber: REQUIRED when present
2889
+ - editionDate: if shown
2890
+ - title: if shown
2891
+ - formType: one of coverage, endorsement, declarations, application, notice, other
2892
+ - pageStart: original page where the form begins
2893
+ - pageEnd: original page where the form ends
2894
+
2895
+ Critical rules:
2896
+ - Include declarations page sets even if they do not show a standard form number.
2897
+ - Use original document page numbers, not local chunk page numbers.
2898
+ - Do not emit duplicate entries for repeated headers/footers.
2899
+ - Multi-page forms should be represented once with pageStart/pageEnd covering the full span when visible.
2900
+ - If a form number is visible in endorsements, schedules, or form headers, include it even if the full form title is partial.
2901
+
2902
+ Respond with JSON only.`;
2903
+ }
2904
+
2905
+ // src/prompts/coordinator/page-map.ts
2906
+ import { z as z20 } from "zod";
2907
+ var PageExtractorSchema = z20.enum([
2908
+ "carrier_info",
2909
+ "named_insured",
2910
+ "coverage_limits",
2911
+ "endorsements",
2912
+ "exclusions",
2913
+ "conditions",
2914
+ "premium_breakdown",
2915
+ "declarations",
2916
+ "loss_history",
2917
+ "sections",
2918
+ "supplementary"
2919
+ ]);
2920
+ var PageAssignmentSchema = z20.object({
2921
+ localPageNumber: z20.number().int().positive().describe("1-based page number within this supplied PDF chunk"),
2922
+ extractorNames: z20.array(PageExtractorSchema).describe("Focused extractors that should inspect this page"),
2923
+ pageRole: z20.enum([
2924
+ "declarations_schedule",
2925
+ "endorsement_schedule",
2926
+ "policy_form",
2927
+ "endorsement_form",
2928
+ "condition_exclusion_form",
2929
+ "supplementary",
2930
+ "other"
2931
+ ]).optional().describe("Primary role of the page"),
2932
+ hasScheduleValues: z20.boolean().optional().describe("True only when the page contains insured-specific declaration or schedule values, tables, or rows to extract"),
2933
+ confidence: z20.number().min(0).max(1).optional().describe("Confidence in the page assignment"),
2934
+ notes: z20.string().optional().describe("Short explanation of what appears on the page")
2935
+ });
2936
+ var PageMapChunkSchema = z20.object({
2937
+ pages: z20.array(PageAssignmentSchema)
2938
+ });
2939
+ function buildPageMapPrompt(templateHints, startPage, endPage, formInventoryHint) {
2940
+ const inventoryBlock = formInventoryHint ? `
2941
+ FORM INVENTORY (already identified \u2014 use this to constrain your assignments):
2942
+ ${formInventoryHint}
2943
+ ` : "";
2944
+ return `You are mapping insurance document pages to focused extractors.
2945
+
2946
+ These supplied pages are ORIGINAL DOCUMENT PAGES ${startPage}-${endPage}.
2947
+
2948
+ DOCUMENT TYPE HINTS:
2949
+ ${templateHints}
2950
+ ${inventoryBlock}
2951
+ For each page in this supplied PDF chunk, decide which extractor(s) should inspect it.
2720
2952
 
2721
2953
  Available extractors:
2722
- - carrier_info: Carrier name, legal name, NAIC, AM Best rating, admitted status, MGA, underwriter
2723
- - named_insured: Insured name, DBA, address, entity type, FEIN, SIC/NAICS codes, additional named insureds
2724
- - coverage_limits: Coverage names, limits, deductibles, coverage form, triggers
2725
- - endorsements: Endorsement forms, titles, types, content, affected parties
2726
- - exclusions: Exclusion titles, content, applicability
2727
- - conditions: Policy conditions (duties after loss, cancellation, etc.)
2728
- - premium_breakdown: Premium amounts, taxes, fees, payment plans, rating basis
2729
- - declarations: Line-specific structured declarations data (varies by policy type)
2730
- - loss_history: Loss runs, claim records, experience modification
2731
- - sections: Raw section content (for sections that don't fit other extractors)
2732
- - supplementary: Regulatory context, contacts, claims contacts, third-party administrators
2954
+ - carrier_info
2955
+ - named_insured
2956
+ - coverage_limits
2957
+ - endorsements
2958
+ - exclusions
2959
+ - conditions
2960
+ - premium_breakdown
2961
+ - declarations
2962
+ - loss_history
2963
+ - sections
2964
+ - supplementary
2965
+
2966
+ Rules:
2967
+ - Use specific extractors for declarations, schedules, endorsements, exclusions, conditions, premium pages, and loss runs.
2968
+ - Use "sections" for pages that contain substantive policy text or mixed content that should still be preserved as raw sections.
2969
+ - Avoid assigning broad ranges mentally; decide page by page.
2970
+ - A page may map to multiple extractors if it legitimately contains multiple relevant sections.
2971
+ - Prefer declarations and schedules for numeric limits/deductibles over later generic form wording.
2972
+ - Assign "coverage_limits" only when the page itself contains insured-specific declaration or schedule values to capture, such as location/building rows, coverage tables, limits, deductibles, coinsurance percentages, or scheduled amounts tied to this policy.
2973
+ - Do NOT assign "coverage_limits" for generic policy-form or endorsement text that merely explains how limits, deductibles, waiting periods, or coinsurance work, or that says values are "shown in the declarations", "shown in the schedule", "as stated", or "if applicable".
2974
+ - Headings like "Limits of Insurance", "Deductible", "Coinsurance", "Loss Conditions", or "Definitions" inside a policy form usually indicate form language, not declarations or schedules.
2975
+ - Continuation pages near the end of a form should stay mapped to "sections" plus "conditions"/"exclusions" when applicable, even if they mention limits or deductibles.
2976
+ - When a form inventory entry identifies a page range as a specific form type (e.g., endorsement, coverage, application), use that classification to guide your extractor choice. Do not assign "coverage_limits" to pages the inventory identifies as endorsement or condition/exclusion forms unless the page contains actual schedule values.
2977
+ - Return every page in the supplied chunk exactly once.
2733
2978
 
2734
2979
  Return JSON:
2735
2980
  {
2736
- "tasks": [
2737
- { "extractorName": "carrier_info", "startPage": 1, "endPage": 2, "description": "Extract carrier details from declarations page" },
2738
- ...
2739
- ],
2740
- "pageMap": [
2741
- { "section": "declarations", "pages": "pages 1-3" },
2742
- { "section": "endorsements", "pages": "pages 15-22" }
2981
+ "pages": [
2982
+ {
2983
+ "localPageNumber": 1,
2984
+ "extractorNames": ["declarations", "carrier_info", "named_insured", "coverage_limits"],
2985
+ "pageRole": "declarations_schedule",
2986
+ "hasScheduleValues": true,
2987
+ "confidence": 0.96,
2988
+ "notes": "Declarations page with insured, policy period, and scheduled limits"
2989
+ }
2743
2990
  ]
2744
2991
  }
2745
2992
 
2746
- Create tasks that cover the entire document. Prefer specific extractors over generic "sections" where possible. Keep page ranges tight \u2014 only include pages relevant to each extractor.
2747
-
2748
2993
  Respond with JSON only.`;
2749
2994
  }
2995
+ function formatFormInventoryForPageMap(forms) {
2996
+ if (forms.length === 0) return "";
2997
+ return forms.filter((f) => f.pageStart != null).map((f) => {
2998
+ const range = f.pageEnd && f.pageEnd !== f.pageStart ? `pages ${f.pageStart}-${f.pageEnd}` : `page ${f.pageStart}`;
2999
+ const title = f.title ? ` "${f.title}"` : "";
3000
+ return `- ${f.formNumber}${title} [${f.formType}] \u2192 ${range}`;
3001
+ }).join("\n");
3002
+ }
2750
3003
 
2751
3004
  // src/prompts/coordinator/review.ts
2752
- import { z as z20 } from "zod";
2753
- var ReviewResultSchema = z20.object({
2754
- complete: z20.boolean(),
2755
- missingFields: z20.array(z20.string()),
2756
- additionalTasks: z20.array(z20.object({
2757
- extractorName: z20.string(),
2758
- startPage: z20.number(),
2759
- endPage: z20.number(),
2760
- description: z20.string()
3005
+ import { z as z21 } from "zod";
3006
+ var ReviewResultSchema = z21.object({
3007
+ complete: z21.boolean(),
3008
+ missingFields: z21.array(z21.string()),
3009
+ qualityIssues: z21.array(z21.string()).optional(),
3010
+ additionalTasks: z21.array(z21.object({
3011
+ extractorName: z21.string(),
3012
+ startPage: z21.number(),
3013
+ endPage: z21.number(),
3014
+ description: z21.string()
2761
3015
  }))
2762
3016
  });
2763
- function buildReviewPrompt(templateExpected, extractedKeys) {
2764
- return `You are reviewing an extraction for completeness. Compare what was expected vs what was found.
3017
+ function buildReviewPrompt(templateExpected, extractedKeys, extractionSummary, pageMapSummary) {
3018
+ return `You are reviewing an extraction for completeness and quality. Compare what was expected vs what was found.
2765
3019
 
2766
3020
  EXPECTED FIELDS (from document type template):
2767
3021
  ${templateExpected.map((f) => `- ${f}`).join("\n")}
@@ -2769,40 +3023,55 @@ ${templateExpected.map((f) => `- ${f}`).join("\n")}
2769
3023
  FIELDS ALREADY EXTRACTED:
2770
3024
  ${extractedKeys.map((f) => `- ${f}`).join("\n")}
2771
3025
 
3026
+ PAGE MAP SUMMARY:
3027
+ ${pageMapSummary}
3028
+
3029
+ CURRENT EXTRACTION SUMMARY:
3030
+ ${extractionSummary}
3031
+
2772
3032
  Determine:
2773
- 1. Is the extraction complete enough? (required fields present = complete)
3033
+ 1. Is the extraction complete enough?
2774
3034
  2. What fields are missing?
2775
- 3. Should any additional extraction tasks be dispatched?
3035
+ 3. What quality issues are present?
3036
+ 4. Should any additional extraction tasks be dispatched?
3037
+
3038
+ Mark the extraction as NOT complete if any of these are true:
3039
+ - required fields are missing
3040
+ - extracted values are generic placeholders like "shown in declarations", "per schedule", "if applicable", "as stated"
3041
+ - coverage limits or deductibles appear to come from generic form language instead of declaration/schedule-specific values
3042
+ - page assignments suggest declaration, schedule, endorsement, exclusion, or condition pages were not actually extracted with the matching focused extractor
3043
+ - a focused extractor exists but returned too little substance for the relevant pages
2776
3044
 
2777
3045
  Return JSON:
2778
3046
  {
2779
3047
  "complete": boolean,
2780
3048
  "missingFields": ["field1", "field2"],
3049
+ "qualityIssues": ["issue 1", "issue 2"],
2781
3050
  "additionalTasks": [
2782
3051
  { "extractorName": "...", "startPage": N, "endPage": N, "description": "..." }
2783
3052
  ]
2784
3053
  }
2785
3054
 
2786
- If all required fields are present, set complete=true even if some optional fields are missing.
3055
+ Use the page map to target follow-up extraction pages precisely. Prefer narrow, declaration/schedule-focused follow-up tasks over broad page ranges.
2787
3056
 
2788
3057
  Respond with JSON only.`;
2789
3058
  }
2790
3059
 
2791
3060
  // src/prompts/extractors/carrier-info.ts
2792
- import { z as z21 } from "zod";
2793
- var CarrierInfoSchema = z21.object({
2794
- carrierName: z21.string().describe("Primary insurance company name for display"),
2795
- carrierLegalName: z21.string().optional().describe("Legal entity name of insurer"),
2796
- naicNumber: z21.string().optional().describe("NAIC company code"),
2797
- amBestRating: z21.string().optional().describe("AM Best rating, e.g. 'A+ XV'"),
2798
- admittedStatus: z21.enum(["admitted", "non_admitted", "surplus_lines"]).optional().describe("Admitted status of the carrier"),
2799
- mga: z21.string().optional().describe("Managing General Agent or Program Administrator name"),
2800
- underwriter: z21.string().optional().describe("Named individual underwriter"),
2801
- policyNumber: z21.string().optional().describe("Policy or quote reference number"),
2802
- effectiveDate: z21.string().optional().describe("Policy effective date (MM/DD/YYYY)"),
2803
- expirationDate: z21.string().optional().describe("Policy expiration date (MM/DD/YYYY)"),
2804
- quoteNumber: z21.string().optional().describe("Quote or proposal reference number"),
2805
- proposedEffectiveDate: z21.string().optional().describe("Proposed effective date for quotes (MM/DD/YYYY)")
3061
+ import { z as z22 } from "zod";
3062
+ var CarrierInfoSchema = z22.object({
3063
+ carrierName: z22.string().describe("Primary insurance company name for display"),
3064
+ carrierLegalName: z22.string().optional().describe("Legal entity name of insurer"),
3065
+ naicNumber: z22.string().optional().describe("NAIC company code"),
3066
+ amBestRating: z22.string().optional().describe("AM Best rating, e.g. 'A+ XV'"),
3067
+ admittedStatus: z22.enum(["admitted", "non_admitted", "surplus_lines"]).optional().describe("Admitted status of the carrier"),
3068
+ mga: z22.string().optional().describe("Managing General Agent or Program Administrator name"),
3069
+ underwriter: z22.string().optional().describe("Named individual underwriter"),
3070
+ policyNumber: z22.string().optional().describe("Policy or quote reference number"),
3071
+ effectiveDate: z22.string().optional().describe("Policy effective date (MM/DD/YYYY)"),
3072
+ expirationDate: z22.string().optional().describe("Policy expiration date (MM/DD/YYYY)"),
3073
+ quoteNumber: z22.string().optional().describe("Quote or proposal reference number"),
3074
+ proposedEffectiveDate: z22.string().optional().describe("Proposed effective date for quotes (MM/DD/YYYY)")
2806
3075
  });
2807
3076
  function buildCarrierInfoPrompt() {
2808
3077
  return `You are an expert insurance document analyst. Extract carrier and policy identification information from this document.
@@ -2822,18 +3091,18 @@ Return JSON only.`;
2822
3091
  }
2823
3092
 
2824
3093
  // src/prompts/extractors/named-insured.ts
2825
- import { z as z22 } from "zod";
2826
- var AddressSchema2 = z22.object({
2827
- street1: z22.string(),
2828
- city: z22.string(),
2829
- state: z22.string(),
2830
- zip: z22.string()
3094
+ import { z as z23 } from "zod";
3095
+ var AddressSchema2 = z23.object({
3096
+ street1: z23.string(),
3097
+ city: z23.string(),
3098
+ state: z23.string(),
3099
+ zip: z23.string()
2831
3100
  });
2832
- var NamedInsuredSchema2 = z22.object({
2833
- insuredName: z22.string().describe("Name of primary named insured"),
2834
- insuredDba: z22.string().optional().describe("Doing-business-as name"),
3101
+ var NamedInsuredSchema2 = z23.object({
3102
+ insuredName: z23.string().describe("Name of primary named insured"),
3103
+ insuredDba: z23.string().optional().describe("Doing-business-as name"),
2835
3104
  insuredAddress: AddressSchema2.optional().describe("Primary insured mailing address"),
2836
- insuredEntityType: z22.enum([
3105
+ insuredEntityType: z23.enum([
2837
3106
  "corporation",
2838
3107
  "llc",
2839
3108
  "partnership",
@@ -2846,13 +3115,13 @@ var NamedInsuredSchema2 = z22.object({
2846
3115
  "married_couple",
2847
3116
  "other"
2848
3117
  ]).optional().describe("Legal entity type of the insured"),
2849
- insuredFein: z22.string().optional().describe("Federal Employer Identification Number"),
2850
- insuredSicCode: z22.string().optional().describe("SIC code"),
2851
- insuredNaicsCode: z22.string().optional().describe("NAICS code"),
2852
- additionalNamedInsureds: z22.array(
2853
- z22.object({
2854
- name: z22.string(),
2855
- relationship: z22.string().optional().describe("e.g. subsidiary, affiliate"),
3118
+ insuredFein: z23.string().optional().describe("Federal Employer Identification Number"),
3119
+ insuredSicCode: z23.string().optional().describe("SIC code"),
3120
+ insuredNaicsCode: z23.string().optional().describe("NAICS code"),
3121
+ additionalNamedInsureds: z23.array(
3122
+ z23.object({
3123
+ name: z23.string(),
3124
+ relationship: z23.string().optional().describe("e.g. subsidiary, affiliate"),
2856
3125
  address: AddressSchema2.optional()
2857
3126
  })
2858
3127
  ).optional().describe("Additional named insureds listed on the policy")
@@ -2873,23 +3142,20 @@ Return JSON only.`;
2873
3142
  }
2874
3143
 
2875
3144
  // src/prompts/extractors/coverage-limits.ts
2876
- import { z as z23 } from "zod";
2877
- var CoverageLimitsSchema = z23.object({
2878
- coverages: z23.array(
2879
- z23.object({
2880
- name: z23.string().describe("Coverage name"),
2881
- limit: z23.string().describe("Coverage limit, e.g. '$1,000,000'"),
2882
- deductible: z23.string().optional().describe("Deductible amount"),
2883
- coverageCode: z23.string().optional().describe("Coverage code or class code"),
2884
- formNumber: z23.string().optional().describe("Associated form number, e.g. 'CG 00 01'")
2885
- })
2886
- ).describe("All coverages with their limits"),
2887
- coverageForm: z23.enum(["occurrence", "claims_made", "accident"]).optional().describe("Primary coverage trigger type"),
2888
- retroactiveDate: z23.string().optional().describe("Retroactive date for claims-made policies (MM/DD/YYYY)")
3145
+ import { z as z24 } from "zod";
3146
+ var ExtractorCoverageSchema = CoverageSchema.extend({
3147
+ coverageCode: z24.string().optional().describe("Coverage code or class code")
3148
+ });
3149
+ var CoverageLimitsSchema = z24.object({
3150
+ coverages: z24.array(ExtractorCoverageSchema).describe("All coverages with their limits"),
3151
+ coverageForm: z24.enum(["occurrence", "claims_made", "accident"]).optional().describe("Primary coverage trigger type"),
3152
+ retroactiveDate: z24.string().optional().describe("Retroactive date for claims-made policies (MM/DD/YYYY)")
2889
3153
  });
2890
3154
  function buildCoverageLimitsPrompt() {
2891
3155
  return `You are an expert insurance document analyst. Extract all coverage limits and deductibles from this document.
2892
3156
 
3157
+ Extract only insured-specific declaration, schedule, or endorsement entries that state actual coverage terms for this policy.
3158
+
2893
3159
  Focus on:
2894
3160
  - Every coverage listed on the declarations page or coverage schedule
2895
3161
  - Per-occurrence, aggregate, and sub-limits for each coverage
@@ -2900,20 +3166,34 @@ Focus on:
2900
3166
  - Standard limit fields: per occurrence, general aggregate, products/completed ops aggregate, personal & advertising injury, fire damage, medical expense, combined single limit, BI/PD splits, umbrella each occurrence/aggregate/retention, statutory (WC), employers liability
2901
3167
  - Defense cost treatment: inside limits, outside limits, or supplementary
2902
3168
 
2903
- Extract ALL coverages \u2014 do not omit any coverage line that appears in the document.
3169
+ For EACH coverage, also extract:
3170
+ - pageNumber: the original page number where the coverage row/value appears
3171
+ - sectionRef: the declarations/schedule/endorsement section heading where it appears
3172
+ - originalContent: the verbatim row or short source snippet used for this coverage
3173
+ - limitValueType: classify the limit as numeric, included, not_included, as_stated, waiting_period, referential, or other
3174
+ - deductibleValueType: classify the deductible/value term similarly when deductible is present
3175
+
3176
+ Critical rules:
3177
+ - Do not extract table-of-contents lines, index entries, headers, footers, page labels, or cross-references as coverages.
3178
+ - Do not create a coverage entry from generic policy-form text that only says a limit/deductible is "shown in the declarations", "shown in the Business Income Declarations", "as stated", "if applicable", or similar referential wording.
3179
+ - Do not treat a generic waiting period, deductible explanation, limits clause, coinsurance clause, or definitions text as a standalone coverage unless the page contains an actual policy-specific schedule row or declaration entry.
3180
+ - Values like "Included" or "Not Included" are valid only when they appear as an explicit declarations/schedule/endorsement entry for a named coverage. Do not infer them from narrative form language.
3181
+ - If a waiting period or hour deductible is shown as part of a specific declarations/schedule row, it may be captured in deductible. Otherwise omit it.
3182
+ - Use limitValueType or deductibleValueType to preserve non-numeric terms precisely instead of forcing them into numeric semantics.
3183
+ - Preserve one row per real coverage entry. Do not merge adjacent schedule rows into malformed names.
2904
3184
 
2905
3185
  Return JSON only.`;
2906
3186
  }
2907
3187
 
2908
3188
  // src/prompts/extractors/endorsements.ts
2909
- import { z as z24 } from "zod";
2910
- var EndorsementsSchema = z24.object({
2911
- endorsements: z24.array(
2912
- z24.object({
2913
- formNumber: z24.string().describe("Form number, e.g. 'CG 21 47'"),
2914
- editionDate: z24.string().optional().describe("Edition date, e.g. '12 07'"),
2915
- title: z24.string().describe("Endorsement title"),
2916
- endorsementType: z24.enum([
3189
+ import { z as z25 } from "zod";
3190
+ var EndorsementsSchema = z25.object({
3191
+ endorsements: z25.array(
3192
+ z25.object({
3193
+ formNumber: z25.string().describe("Form number, e.g. 'CG 21 47'"),
3194
+ editionDate: z25.string().optional().describe("Edition date, e.g. '12 07'"),
3195
+ title: z25.string().describe("Endorsement title"),
3196
+ endorsementType: z25.enum([
2917
3197
  "additional_insured",
2918
3198
  "waiver_of_subrogation",
2919
3199
  "primary_noncontributory",
@@ -2933,12 +3213,12 @@ var EndorsementsSchema = z24.object({
2933
3213
  "territorial_extension",
2934
3214
  "other"
2935
3215
  ]).describe("Endorsement type classification"),
2936
- effectiveDate: z24.string().optional().describe("Endorsement effective date"),
2937
- affectedCoverageParts: z24.array(z24.string()).optional().describe("Coverage parts affected by this endorsement"),
2938
- namedParties: z24.array(
2939
- z24.object({
2940
- name: z24.string().describe("Party name"),
2941
- role: z24.enum([
3216
+ effectiveDate: z25.string().optional().describe("Endorsement effective date"),
3217
+ affectedCoverageParts: z25.array(z25.string()).optional().describe("Coverage parts affected by this endorsement"),
3218
+ namedParties: z25.array(
3219
+ z25.object({
3220
+ name: z25.string().describe("Party name"),
3221
+ role: z25.enum([
2942
3222
  "additional_insured",
2943
3223
  "loss_payee",
2944
3224
  "mortgage_holder",
@@ -2947,15 +3227,15 @@ var EndorsementsSchema = z24.object({
2947
3227
  "designated_person",
2948
3228
  "other"
2949
3229
  ]).describe("Party role"),
2950
- relationship: z24.string().optional().describe("Relationship to insured"),
2951
- scope: z24.string().optional().describe("Scope of coverage for this party")
3230
+ relationship: z25.string().optional().describe("Relationship to insured"),
3231
+ scope: z25.string().optional().describe("Scope of coverage for this party")
2952
3232
  })
2953
3233
  ).optional().describe("Named parties (additional insureds, loss payees, etc.)"),
2954
- keyTerms: z24.array(z24.string()).optional().describe("Key terms or notable provisions in the endorsement"),
2955
- premiumImpact: z24.string().optional().describe("Additional premium or credit"),
2956
- content: z24.string().describe("Full verbatim text of the endorsement"),
2957
- pageStart: z24.number().describe("Starting page number of this endorsement"),
2958
- pageEnd: z24.number().optional().describe("Ending page number of this endorsement")
3234
+ keyTerms: z25.array(z25.string()).optional().describe("Key terms or notable provisions in the endorsement"),
3235
+ premiumImpact: z25.string().optional().describe("Additional premium or credit"),
3236
+ content: z25.string().describe("Full verbatim text of the endorsement"),
3237
+ pageStart: z25.number().describe("Starting page number of this endorsement"),
3238
+ pageEnd: z25.number().optional().describe("Ending page number of this endorsement")
2959
3239
  })
2960
3240
  ).describe("All endorsements found in the document")
2961
3241
  });
@@ -2986,20 +3266,20 @@ Return JSON only.`;
2986
3266
  }
2987
3267
 
2988
3268
  // src/prompts/extractors/exclusions.ts
2989
- import { z as z25 } from "zod";
2990
- var ExclusionsSchema = z25.object({
2991
- exclusions: z25.array(
2992
- z25.object({
2993
- name: z25.string().describe("Exclusion title or short description"),
2994
- formNumber: z25.string().optional().describe("Form number if part of a named endorsement"),
2995
- excludedPerils: z25.array(z25.string()).optional().describe("Specific perils excluded"),
2996
- isAbsolute: z25.boolean().optional().describe("Whether the exclusion is absolute (no exceptions)"),
2997
- exceptions: z25.array(z25.string()).optional().describe("Exceptions to the exclusion, if any"),
2998
- buybackAvailable: z25.boolean().optional().describe("Whether coverage can be bought back via endorsement"),
2999
- buybackEndorsement: z25.string().optional().describe("Form number of the buyback endorsement if available"),
3000
- appliesTo: z25.array(z25.string()).optional().describe("Coverage types this exclusion applies to"),
3001
- content: z25.string().describe("Full verbatim exclusion text"),
3002
- pageNumber: z25.number().optional().describe("Page number where exclusion appears")
3269
+ import { z as z26 } from "zod";
3270
+ var ExclusionsSchema = z26.object({
3271
+ exclusions: z26.array(
3272
+ z26.object({
3273
+ name: z26.string().describe("Exclusion title or short description"),
3274
+ formNumber: z26.string().optional().describe("Form number if part of a named endorsement"),
3275
+ excludedPerils: z26.array(z26.string()).optional().describe("Specific perils excluded"),
3276
+ isAbsolute: z26.boolean().optional().describe("Whether the exclusion is absolute (no exceptions)"),
3277
+ exceptions: z26.array(z26.string()).optional().describe("Exceptions to the exclusion, if any"),
3278
+ buybackAvailable: z26.boolean().optional().describe("Whether coverage can be bought back via endorsement"),
3279
+ buybackEndorsement: z26.string().optional().describe("Form number of the buyback endorsement if available"),
3280
+ appliesTo: z26.array(z26.string()).optional().describe("Coverage types this exclusion applies to"),
3281
+ content: z26.string().describe("Full verbatim exclusion text"),
3282
+ pageNumber: z26.number().optional().describe("Page number where exclusion appears")
3003
3283
  })
3004
3284
  ).describe("All exclusions found in the document")
3005
3285
  });
@@ -3024,18 +3304,23 @@ Focus on:
3024
3304
  - Exclusions within insuring agreements or conditions if clearly labeled
3025
3305
  - Full verbatim exclusion text \u2014 do not summarize
3026
3306
 
3307
+ Critical rules:
3308
+ - Ignore table-of-contents entries, running headers/footers, and references that only point to another page or section.
3309
+ - Do not emit a standalone exclusion from a fragment unless the fragment itself contains substantive exclusion wording.
3310
+ - Always include pageNumber when the exclusion appears on a specific page in the supplied document chunk.
3311
+
3027
3312
  Common personal lines exclusion patterns: animal liability, business pursuits, home daycare, watercraft, aircraft.
3028
3313
 
3029
3314
  Return JSON only.`;
3030
3315
  }
3031
3316
 
3032
3317
  // src/prompts/extractors/conditions.ts
3033
- import { z as z26 } from "zod";
3034
- var ConditionsSchema = z26.object({
3035
- conditions: z26.array(
3036
- z26.object({
3037
- name: z26.string().describe("Condition title"),
3038
- conditionType: z26.enum([
3318
+ import { z as z27 } from "zod";
3319
+ var ConditionsSchema = z27.object({
3320
+ conditions: z27.array(
3321
+ z27.object({
3322
+ name: z27.string().describe("Condition title"),
3323
+ conditionType: z27.enum([
3039
3324
  "duties_after_loss",
3040
3325
  "notice_requirements",
3041
3326
  "other_insurance",
@@ -3054,14 +3339,14 @@ var ConditionsSchema = z26.object({
3054
3339
  "separation_of_insureds",
3055
3340
  "other"
3056
3341
  ]).describe("Condition category"),
3057
- content: z26.string().describe("Full verbatim condition text"),
3058
- keyValues: z26.array(
3059
- z26.object({
3060
- key: z26.string().describe("Key name (e.g. 'noticePeriod', 'suitDeadline')"),
3061
- value: z26.string().describe("Value (e.g. '30 days', '2 years')")
3342
+ content: z27.string().describe("Full verbatim condition text"),
3343
+ keyValues: z27.array(
3344
+ z27.object({
3345
+ key: z27.string().describe("Key name (e.g. 'noticePeriod', 'suitDeadline')"),
3346
+ value: z27.string().describe("Value (e.g. '30 days', '2 years')")
3062
3347
  })
3063
3348
  ).optional().describe("Key values extracted from the condition (notice periods, deadlines, etc.)"),
3064
- pageNumber: z26.number().optional().describe("Page number where condition appears")
3349
+ pageNumber: z27.number().optional().describe("Page number where condition appears")
3065
3350
  })
3066
3351
  ).describe("All policy conditions found in the document")
3067
3352
  });
@@ -3073,7 +3358,7 @@ For EACH condition, extract:
3073
3358
  - conditionType: classify as one of: duties_after_loss, notice_requirements, other_insurance, cancellation, nonrenewal, transfer_of_rights, liberalization, arbitration, concealment_fraud, examination_under_oath, legal_action, loss_payment, appraisal, mortgage_holders, policy_territory, separation_of_insureds, other \u2014 REQUIRED
3074
3359
  - content: full verbatim condition text \u2014 REQUIRED
3075
3360
  - keyValues: extract specific values as key-value pairs (e.g. noticePeriod: "30 days", suitDeadline: "2 years")
3076
- - pageNumber: page number where the condition appears
3361
+ - pageNumber: original document page number where the substantive condition text appears
3077
3362
 
3078
3363
  Focus on:
3079
3364
  - Duties after loss / notice of occurrence conditions
@@ -3090,32 +3375,37 @@ Focus on:
3090
3375
  - Mortgage holders clause
3091
3376
  - Any other named conditions
3092
3377
 
3378
+ Critical rules:
3379
+ - Ignore table-of-contents entries, section indexes, running headers/footers, and page references such as "Appraisal ..... 19".
3380
+ - Do not emit a condition unless the page contains substantive condition text, not just a heading or reference.
3381
+ - If a condition continues from a prior page, keep the substantive text together and use the page where the condition text appears in this extracted chunk.
3382
+
3093
3383
  Return JSON only.`;
3094
3384
  }
3095
3385
 
3096
3386
  // src/prompts/extractors/premium-breakdown.ts
3097
- import { z as z27 } from "zod";
3098
- var PremiumBreakdownSchema = z27.object({
3099
- premium: z27.string().optional().describe("Total premium amount, e.g. '$5,000'"),
3100
- totalCost: z27.string().optional().describe("Total cost including taxes and fees, e.g. '$5,250'"),
3101
- premiumBreakdown: z27.array(
3102
- z27.object({
3103
- line: z27.string().describe("Coverage line name"),
3104
- amount: z27.string().describe("Premium amount for this line")
3387
+ import { z as z28 } from "zod";
3388
+ var PremiumBreakdownSchema = z28.object({
3389
+ premium: z28.string().optional().describe("Total premium amount, e.g. '$5,000'"),
3390
+ totalCost: z28.string().optional().describe("Total cost including taxes and fees, e.g. '$5,250'"),
3391
+ premiumBreakdown: z28.array(
3392
+ z28.object({
3393
+ line: z28.string().describe("Coverage line name"),
3394
+ amount: z28.string().describe("Premium amount for this line")
3105
3395
  })
3106
3396
  ).optional().describe("Per-coverage-line premium breakdown"),
3107
- taxesAndFees: z27.array(
3108
- z27.object({
3109
- name: z27.string().describe("Fee or tax name"),
3110
- amount: z27.string().describe("Dollar amount"),
3111
- type: z27.enum(["tax", "fee", "surcharge", "assessment"]).optional().describe("Fee category")
3397
+ taxesAndFees: z28.array(
3398
+ z28.object({
3399
+ name: z28.string().describe("Fee or tax name"),
3400
+ amount: z28.string().describe("Dollar amount"),
3401
+ type: z28.enum(["tax", "fee", "surcharge", "assessment"]).optional().describe("Fee category")
3112
3402
  })
3113
3403
  ).optional().describe("Taxes, fees, surcharges, and assessments"),
3114
- minimumPremium: z27.string().optional().describe("Minimum premium if stated"),
3115
- depositPremium: z27.string().optional().describe("Deposit premium if stated"),
3116
- paymentPlan: z27.string().optional().describe("Payment plan description"),
3117
- auditType: z27.enum(["annual", "semi_annual", "quarterly", "monthly", "final", "self"]).optional().describe("Premium audit type"),
3118
- ratingBasis: z27.string().optional().describe("Rating basis, e.g. payroll, revenue, area, units")
3404
+ minimumPremium: z28.string().optional().describe("Minimum premium if stated"),
3405
+ depositPremium: z28.string().optional().describe("Deposit premium if stated"),
3406
+ paymentPlan: z28.string().optional().describe("Payment plan description"),
3407
+ auditType: z28.enum(["annual", "semi_annual", "quarterly", "monthly", "final", "self"]).optional().describe("Premium audit type"),
3408
+ ratingBasis: z28.string().optional().describe("Rating basis, e.g. payroll, revenue, area, units")
3119
3409
  });
3120
3410
  function buildPremiumBreakdownPrompt() {
3121
3411
  return `You are an expert insurance document analyst. Extract all premium and cost information from this document.
@@ -3135,14 +3425,14 @@ Return JSON only.`;
3135
3425
  }
3136
3426
 
3137
3427
  // src/prompts/extractors/declarations.ts
3138
- import { z as z28 } from "zod";
3139
- var DeclarationsFieldSchema = z28.object({
3140
- field: z28.string().describe("Descriptive field name (e.g. 'policyNumber', 'effectiveDate', 'coverageALimit')"),
3141
- value: z28.string().describe("Extracted value exactly as it appears in the document"),
3142
- section: z28.string().optional().describe("Section or grouping this field belongs to (e.g. 'Coverage Limits', 'Vehicle Schedule')")
3428
+ import { z as z29 } from "zod";
3429
+ var DeclarationsFieldSchema = z29.object({
3430
+ field: z29.string().describe("Descriptive field name (e.g. 'policyNumber', 'effectiveDate', 'coverageALimit')"),
3431
+ value: z29.string().describe("Extracted value exactly as it appears in the document"),
3432
+ section: z29.string().optional().describe("Section or grouping this field belongs to (e.g. 'Coverage Limits', 'Vehicle Schedule')")
3143
3433
  });
3144
- var DeclarationsExtractSchema = z28.object({
3145
- fields: z28.array(DeclarationsFieldSchema).describe("All declarations page fields extracted as key-value pairs. Structure varies by line of business.")
3434
+ var DeclarationsExtractSchema = z29.object({
3435
+ fields: z29.array(DeclarationsFieldSchema).describe("All declarations page fields extracted as key-value pairs. Structure varies by line of business.")
3146
3436
  });
3147
3437
  function buildDeclarationsPrompt() {
3148
3438
  return `You are an expert insurance document analyst. Extract all declarations page data from this document into a flexible key-value structure.
@@ -3182,21 +3472,21 @@ Preserve original values exactly as they appear. Return JSON only.`;
3182
3472
  }
3183
3473
 
3184
3474
  // src/prompts/extractors/loss-history.ts
3185
- import { z as z29 } from "zod";
3186
- var LossHistorySchema = z29.object({
3187
- lossSummary: z29.string().optional().describe("Summary of loss history, e.g. '3 claims in past 5 years totaling $125,000'"),
3188
- individualClaims: z29.array(
3189
- z29.object({
3190
- date: z29.string().optional().describe("Date of loss or claim"),
3191
- type: z29.string().optional().describe("Type of claim, e.g. 'property damage', 'bodily injury'"),
3192
- description: z29.string().optional().describe("Brief description of the claim"),
3193
- amountPaid: z29.string().optional().describe("Amount paid"),
3194
- amountReserved: z29.string().optional().describe("Amount reserved"),
3195
- status: z29.enum(["open", "closed", "reopened"]).optional().describe("Claim status"),
3196
- claimNumber: z29.string().optional().describe("Claim reference number")
3475
+ import { z as z30 } from "zod";
3476
+ var LossHistorySchema = z30.object({
3477
+ lossSummary: z30.string().optional().describe("Summary of loss history, e.g. '3 claims in past 5 years totaling $125,000'"),
3478
+ individualClaims: z30.array(
3479
+ z30.object({
3480
+ date: z30.string().optional().describe("Date of loss or claim"),
3481
+ type: z30.string().optional().describe("Type of claim, e.g. 'property damage', 'bodily injury'"),
3482
+ description: z30.string().optional().describe("Brief description of the claim"),
3483
+ amountPaid: z30.string().optional().describe("Amount paid"),
3484
+ amountReserved: z30.string().optional().describe("Amount reserved"),
3485
+ status: z30.enum(["open", "closed", "reopened"]).optional().describe("Claim status"),
3486
+ claimNumber: z30.string().optional().describe("Claim reference number")
3197
3487
  })
3198
3488
  ).optional().describe("Individual claim records"),
3199
- experienceMod: z29.string().optional().describe("Experience modification factor for workers comp, e.g. '0.85'")
3489
+ experienceMod: z30.string().optional().describe("Experience modification factor for workers comp, e.g. '0.85'")
3200
3490
  });
3201
3491
  function buildLossHistoryPrompt() {
3202
3492
  return `You are an expert insurance document analyst. Extract all loss history and claims information from this document.
@@ -3213,18 +3503,18 @@ Return JSON only.`;
3213
3503
  }
3214
3504
 
3215
3505
  // src/prompts/extractors/sections.ts
3216
- import { z as z30 } from "zod";
3217
- var SubsectionSchema2 = z30.object({
3218
- title: z30.string().describe("Subsection title"),
3219
- sectionNumber: z30.string().optional().describe("Subsection number"),
3220
- pageNumber: z30.number().optional().describe("Page number"),
3221
- content: z30.string().describe("Full verbatim text")
3506
+ import { z as z31 } from "zod";
3507
+ var SubsectionSchema2 = z31.object({
3508
+ title: z31.string().describe("Subsection title"),
3509
+ sectionNumber: z31.string().optional().describe("Subsection number"),
3510
+ pageNumber: z31.number().optional().describe("Page number"),
3511
+ content: z31.string().describe("Full verbatim text")
3222
3512
  });
3223
- var SectionsSchema = z30.object({
3224
- sections: z30.array(
3225
- z30.object({
3226
- title: z30.string().describe("Section title"),
3227
- type: z30.enum([
3513
+ var SectionsSchema = z31.object({
3514
+ sections: z31.array(
3515
+ z31.object({
3516
+ title: z31.string().describe("Section title"),
3517
+ type: z31.enum([
3228
3518
  "declarations",
3229
3519
  "insuring_agreement",
3230
3520
  "policy_form",
@@ -3238,10 +3528,10 @@ var SectionsSchema = z30.object({
3238
3528
  "regulatory",
3239
3529
  "other"
3240
3530
  ]).describe("Section type classification"),
3241
- content: z30.string().describe("Full verbatim text of the section"),
3242
- pageStart: z30.number().describe("Starting page number"),
3243
- pageEnd: z30.number().optional().describe("Ending page number"),
3244
- subsections: z30.array(SubsectionSchema2).optional().describe("Subsections within this section")
3531
+ content: z31.string().describe("Full verbatim text of the section"),
3532
+ pageStart: z31.number().describe("Starting page number"),
3533
+ pageEnd: z31.number().optional().describe("Ending page number"),
3534
+ subsections: z31.array(SubsectionSchema2).optional().describe("Subsections within this section")
3245
3535
  })
3246
3536
  ).describe("All document sections")
3247
3537
  });
@@ -3260,25 +3550,31 @@ For each section, classify its type:
3260
3550
  - "other" \u2014 anything that doesn't fit the above categories
3261
3551
 
3262
3552
  Include accurate page numbers for every section. Include subsections only if the section has clearly defined subsections with their own titles.
3553
+ If a page begins or ends in the middle of a section, treat it as a continuation of the existing section instead of creating a new orphan section from the fragment.
3554
+
3555
+ Critical rules:
3556
+ - Ignore table-of-contents entries, page-number references, repeating headers/footers, and other navigational artifacts.
3557
+ - Do not create a new section from a lone continuation fragment such as a single paragraph tail or list item that clearly belongs to the previous page's section.
3558
+ - When a section spans multiple pages, keep it as one section with pageStart/pageEnd covering the full span represented in this extraction.
3263
3559
 
3264
3560
  Return JSON only.`;
3265
3561
  }
3266
3562
 
3267
3563
  // src/prompts/extractors/supplementary.ts
3268
- import { z as z31 } from "zod";
3269
- var ContactSchema2 = z31.object({
3270
- name: z31.string().optional().describe("Organization or person name"),
3271
- phone: z31.string().optional().describe("Phone number"),
3272
- email: z31.string().optional().describe("Email address"),
3273
- address: z31.string().optional().describe("Mailing address"),
3274
- type: z31.string().optional().describe("Contact type, e.g. 'State Department of Insurance'")
3564
+ import { z as z32 } from "zod";
3565
+ var ContactSchema2 = z32.object({
3566
+ name: z32.string().optional().describe("Organization or person name"),
3567
+ phone: z32.string().optional().describe("Phone number"),
3568
+ email: z32.string().optional().describe("Email address"),
3569
+ address: z32.string().optional().describe("Mailing address"),
3570
+ type: z32.string().optional().describe("Contact type, e.g. 'State Department of Insurance'")
3275
3571
  });
3276
- var SupplementarySchema = z31.object({
3277
- regulatoryContacts: z31.array(ContactSchema2).optional().describe("Regulatory body contacts (state department of insurance, ombudsman)"),
3278
- claimsContacts: z31.array(ContactSchema2).optional().describe("Claims reporting contacts and instructions"),
3279
- thirdPartyAdministrators: z31.array(ContactSchema2).optional().describe("Third-party administrators for claims handling"),
3280
- cancellationNoticeDays: z31.number().optional().describe("Required notice period for cancellation in days"),
3281
- nonrenewalNoticeDays: z31.number().optional().describe("Required notice period for nonrenewal in days")
3572
+ var SupplementarySchema = z32.object({
3573
+ regulatoryContacts: z32.array(ContactSchema2).optional().describe("Regulatory body contacts (state department of insurance, ombudsman)"),
3574
+ claimsContacts: z32.array(ContactSchema2).optional().describe("Claims reporting contacts and instructions"),
3575
+ thirdPartyAdministrators: z32.array(ContactSchema2).optional().describe("Third-party administrators for claims handling"),
3576
+ cancellationNoticeDays: z32.number().optional().describe("Required notice period for cancellation in days"),
3577
+ nonrenewalNoticeDays: z32.number().optional().describe("Required notice period for nonrenewal in days")
3282
3578
  });
3283
3579
  function buildSupplementaryPrompt() {
3284
3580
  return `You are an expert insurance document analyst. Extract supplementary and regulatory information from this document.
@@ -3315,6 +3611,313 @@ function getExtractor(name) {
3315
3611
  return EXTRACTORS[name];
3316
3612
  }
3317
3613
 
3614
+ // src/core/quality.ts
3615
+ function evaluateQualityGate(params) {
3616
+ const { issues, hasRoundWarnings = false } = params;
3617
+ const hasBlocking = issues.some((issue) => issue.severity === "blocking");
3618
+ const hasWarnings = issues.some((issue) => issue.severity === "warning") || hasRoundWarnings;
3619
+ return hasBlocking ? "failed" : hasWarnings ? "warning" : "passed";
3620
+ }
3621
+ function shouldFailQualityGate(mode, status) {
3622
+ return mode === "strict" && status === "failed";
3623
+ }
3624
+
3625
+ // src/extraction/quality.ts
3626
+ function normalizeFormNumber(value) {
3627
+ if (typeof value !== "string") return void 0;
3628
+ const trimmed = value.trim();
3629
+ if (!trimmed) return void 0;
3630
+ return trimmed;
3631
+ }
3632
+ function addFormEntry(inventory, formNumber, source, extra) {
3633
+ if (!formNumber) return;
3634
+ const existing = inventory.get(formNumber);
3635
+ if (existing) {
3636
+ if (!existing.title && extra?.title) existing.title = extra.title;
3637
+ if (!existing.pageStart && extra?.pageStart) existing.pageStart = extra.pageStart;
3638
+ if (!existing.pageEnd && extra?.pageEnd) existing.pageEnd = extra.pageEnd;
3639
+ if (!existing.sources.includes(source)) existing.sources.push(source);
3640
+ return;
3641
+ }
3642
+ inventory.set(formNumber, {
3643
+ formNumber,
3644
+ title: extra?.title,
3645
+ pageStart: extra?.pageStart,
3646
+ pageEnd: extra?.pageEnd,
3647
+ sources: [source]
3648
+ });
3649
+ }
3650
+ function looksReferential(value) {
3651
+ if (typeof value !== "string") return false;
3652
+ const normalized = value.toLowerCase();
3653
+ return normalized.includes("shown in the declarations") || normalized.includes("shown in declarations") || normalized.includes("shown in the schedule") || normalized.includes("as stated") || normalized.includes("if applicable");
3654
+ }
3655
+ function looksTocArtifact(value) {
3656
+ if (typeof value !== "string") return false;
3657
+ return /\.{4,}\d{1,3}$/.test(value.trim()) || /^\d+\.\s+[A-Z][\s\S]*\.{3,}\d{1,3}$/.test(value.trim());
3658
+ }
3659
+ function sourcePrecedence(sectionRef) {
3660
+ if (typeof sectionRef !== "string") return 0;
3661
+ const normalized = sectionRef.toLowerCase();
3662
+ if (normalized.includes("declaration") || normalized.includes("scheduled coverages") || normalized.includes("schedule")) return 4;
3663
+ if (normalized.includes("endorsement")) return 3;
3664
+ if (normalized.includes("additional coverages")) return 2;
3665
+ if (normalized.includes("coverage form") || normalized.includes("policy form")) return 1;
3666
+ return 0;
3667
+ }
3668
+ function buildExtractionReviewReport(params) {
3669
+ const { memory, reviewRounds } = params;
3670
+ const deterministicIssues = [];
3671
+ const inventory = /* @__PURE__ */ new Map();
3672
+ const extractedFormInventory = memory.get("form_inventory")?.forms ?? [];
3673
+ const coverages = memory.get("coverage_limits")?.coverages ?? [];
3674
+ const endorsements = memory.get("endorsements")?.endorsements ?? [];
3675
+ const exclusions = memory.get("exclusions")?.exclusions ?? [];
3676
+ const conditions = memory.get("conditions")?.conditions ?? [];
3677
+ const sections = memory.get("sections")?.sections ?? [];
3678
+ for (const form of extractedFormInventory) {
3679
+ addFormEntry(
3680
+ inventory,
3681
+ normalizeFormNumber(form.formNumber),
3682
+ "form_inventory",
3683
+ {
3684
+ title: form.title,
3685
+ pageStart: form.pageStart,
3686
+ pageEnd: form.pageEnd
3687
+ }
3688
+ );
3689
+ }
3690
+ for (const endorsement of endorsements) {
3691
+ addFormEntry(
3692
+ inventory,
3693
+ normalizeFormNumber(endorsement.formNumber),
3694
+ "endorsements",
3695
+ {
3696
+ title: typeof endorsement.title === "string" ? endorsement.title : void 0,
3697
+ pageStart: typeof endorsement.pageStart === "number" ? endorsement.pageStart : void 0,
3698
+ pageEnd: typeof endorsement.pageEnd === "number" ? endorsement.pageEnd : void 0
3699
+ }
3700
+ );
3701
+ if (typeof endorsement.formNumber !== "string" || !endorsement.formNumber.trim()) {
3702
+ deterministicIssues.push({
3703
+ code: "endorsement_missing_form_number",
3704
+ severity: "blocking",
3705
+ message: "Endorsement is missing formNumber.",
3706
+ extractorName: "endorsements",
3707
+ pageNumber: typeof endorsement.pageStart === "number" ? endorsement.pageStart : void 0,
3708
+ itemName: typeof endorsement.title === "string" ? endorsement.title : void 0
3709
+ });
3710
+ }
3711
+ const endorsementFormNumber = normalizeFormNumber(endorsement.formNumber);
3712
+ if (endorsementFormNumber && !inventory.has(endorsementFormNumber)) {
3713
+ deterministicIssues.push({
3714
+ code: "endorsement_form_missing_from_inventory",
3715
+ severity: "warning",
3716
+ message: `Endorsement "${String(endorsement.title ?? endorsementFormNumber)}" is not present in form inventory.`,
3717
+ extractorName: "endorsements",
3718
+ formNumber: endorsementFormNumber,
3719
+ pageNumber: typeof endorsement.pageStart === "number" ? endorsement.pageStart : void 0,
3720
+ itemName: typeof endorsement.title === "string" ? endorsement.title : void 0
3721
+ });
3722
+ }
3723
+ }
3724
+ for (const coverage of coverages) {
3725
+ const formNumber = normalizeFormNumber(coverage.formNumber);
3726
+ addFormEntry(inventory, formNumber, "coverage_limits", {
3727
+ title: typeof coverage.name === "string" ? coverage.name : void 0,
3728
+ pageStart: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
3729
+ pageEnd: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0
3730
+ });
3731
+ if (typeof coverage.name === "string" && /coverage form$/i.test(coverage.name.trim())) {
3732
+ deterministicIssues.push({
3733
+ code: "generic_form_row_as_coverage",
3734
+ severity: "blocking",
3735
+ message: `Coverage "${coverage.name}" looks like a form header rather than a real coverage row.`,
3736
+ extractorName: "coverage_limits",
3737
+ formNumber,
3738
+ pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
3739
+ itemName: coverage.name
3740
+ });
3741
+ }
3742
+ if (typeof coverage.pageNumber !== "number") {
3743
+ deterministicIssues.push({
3744
+ code: "coverage_missing_page_number",
3745
+ severity: "warning",
3746
+ message: `Coverage "${String(coverage.name ?? "unknown")}" is missing pageNumber provenance.`,
3747
+ extractorName: "coverage_limits",
3748
+ formNumber,
3749
+ itemName: typeof coverage.name === "string" ? coverage.name : void 0
3750
+ });
3751
+ }
3752
+ if (typeof coverage.sectionRef !== "string" || !coverage.sectionRef.trim()) {
3753
+ deterministicIssues.push({
3754
+ code: "coverage_missing_section_ref",
3755
+ severity: "warning",
3756
+ message: `Coverage "${String(coverage.name ?? "unknown")}" is missing sectionRef provenance.`,
3757
+ extractorName: "coverage_limits",
3758
+ formNumber,
3759
+ pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
3760
+ itemName: typeof coverage.name === "string" ? coverage.name : void 0
3761
+ });
3762
+ }
3763
+ if (typeof coverage.originalContent !== "string" || !coverage.originalContent.trim()) {
3764
+ deterministicIssues.push({
3765
+ code: "coverage_missing_original_content",
3766
+ severity: "warning",
3767
+ message: `Coverage "${String(coverage.name ?? "unknown")}" is missing originalContent source text.`,
3768
+ extractorName: "coverage_limits",
3769
+ formNumber,
3770
+ pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
3771
+ itemName: typeof coverage.name === "string" ? coverage.name : void 0
3772
+ });
3773
+ }
3774
+ if (looksReferential(coverage.limit) || looksReferential(coverage.deductible)) {
3775
+ deterministicIssues.push({
3776
+ code: "coverage_referential_value",
3777
+ severity: "warning",
3778
+ message: `Coverage "${String(coverage.name ?? "unknown")}" contains referential language instead of a concrete scheduled term.`,
3779
+ extractorName: "coverage_limits",
3780
+ formNumber,
3781
+ pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
3782
+ itemName: typeof coverage.name === "string" ? coverage.name : void 0
3783
+ });
3784
+ }
3785
+ if (formNumber && !inventory.has(formNumber)) {
3786
+ deterministicIssues.push({
3787
+ code: "coverage_form_missing_from_inventory",
3788
+ severity: "warning",
3789
+ message: `Coverage "${String(coverage.name ?? "unknown")}" references form "${formNumber}" that is missing from form inventory.`,
3790
+ extractorName: "coverage_limits",
3791
+ formNumber,
3792
+ pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
3793
+ itemName: typeof coverage.name === "string" ? coverage.name : void 0
3794
+ });
3795
+ }
3796
+ }
3797
+ const coverageGroups = /* @__PURE__ */ new Map();
3798
+ for (const coverage of coverages) {
3799
+ const key = [
3800
+ String(coverage.name ?? "").toLowerCase(),
3801
+ String(coverage.formNumber ?? "").toLowerCase()
3802
+ ].join("|");
3803
+ coverageGroups.set(key, [...coverageGroups.get(key) ?? [], coverage]);
3804
+ }
3805
+ for (const [key, groupedCoverages] of coverageGroups.entries()) {
3806
+ if (groupedCoverages.length < 2) continue;
3807
+ const sorted = [...groupedCoverages].sort((a, b) => sourcePrecedence(b.sectionRef) - sourcePrecedence(a.sectionRef));
3808
+ const highest = sorted[0];
3809
+ for (const lower of sorted.slice(1)) {
3810
+ const highestLimit = String(highest.limit ?? "").trim();
3811
+ const lowerLimit = String(lower.limit ?? "").trim();
3812
+ const highestDeductible = String(highest.deductible ?? "").trim();
3813
+ const lowerDeductible = String(lower.deductible ?? "").trim();
3814
+ if (highestLimit && lowerLimit && highestLimit !== lowerLimit || highestDeductible && lowerDeductible && highestDeductible !== lowerDeductible) {
3815
+ deterministicIssues.push({
3816
+ code: "coverage_precedence_conflict",
3817
+ severity: "warning",
3818
+ message: `Coverage "${String(highest.name ?? key)}" has conflicting extracted terms across sources with different precedence.`,
3819
+ extractorName: "coverage_limits",
3820
+ formNumber: normalizeFormNumber(highest.formNumber) ?? normalizeFormNumber(lower.formNumber),
3821
+ pageNumber: typeof lower.pageNumber === "number" ? lower.pageNumber : void 0,
3822
+ itemName: typeof highest.name === "string" ? highest.name : void 0
3823
+ });
3824
+ }
3825
+ }
3826
+ }
3827
+ for (const exclusion of exclusions) {
3828
+ addFormEntry(inventory, normalizeFormNumber(exclusion.formNumber), "exclusions", {
3829
+ title: typeof exclusion.name === "string" ? exclusion.name : void 0,
3830
+ pageStart: typeof exclusion.pageNumber === "number" ? exclusion.pageNumber : void 0,
3831
+ pageEnd: typeof exclusion.pageNumber === "number" ? exclusion.pageNumber : void 0
3832
+ });
3833
+ if (typeof exclusion.pageNumber !== "number") {
3834
+ deterministicIssues.push({
3835
+ code: "exclusion_missing_page_number",
3836
+ severity: "warning",
3837
+ message: `Exclusion "${String(exclusion.name ?? "unknown")}" is missing pageNumber provenance.`,
3838
+ extractorName: "exclusions",
3839
+ formNumber: normalizeFormNumber(exclusion.formNumber),
3840
+ itemName: typeof exclusion.name === "string" ? exclusion.name : void 0
3841
+ });
3842
+ }
3843
+ if (looksTocArtifact(exclusion.content)) {
3844
+ deterministicIssues.push({
3845
+ code: "exclusion_toc_artifact",
3846
+ severity: "blocking",
3847
+ message: `Exclusion "${String(exclusion.name ?? "unknown")}" appears to be a table-of-contents artifact.`,
3848
+ extractorName: "exclusions",
3849
+ pageNumber: typeof exclusion.pageNumber === "number" ? exclusion.pageNumber : void 0,
3850
+ itemName: typeof exclusion.name === "string" ? exclusion.name : void 0
3851
+ });
3852
+ }
3853
+ }
3854
+ for (const condition of conditions) {
3855
+ if (typeof condition.pageNumber !== "number") {
3856
+ deterministicIssues.push({
3857
+ code: "condition_missing_page_number",
3858
+ severity: "warning",
3859
+ message: `Condition "${String(condition.name ?? "unknown")}" is missing pageNumber provenance.`,
3860
+ extractorName: "conditions",
3861
+ itemName: typeof condition.name === "string" ? condition.name : void 0
3862
+ });
3863
+ }
3864
+ if (looksTocArtifact(condition.content)) {
3865
+ deterministicIssues.push({
3866
+ code: "condition_toc_artifact",
3867
+ severity: "blocking",
3868
+ message: `Condition "${String(condition.name ?? "unknown")}" appears to be a table-of-contents artifact.`,
3869
+ extractorName: "conditions",
3870
+ pageNumber: typeof condition.pageNumber === "number" ? condition.pageNumber : void 0,
3871
+ itemName: typeof condition.name === "string" ? condition.name : void 0
3872
+ });
3873
+ }
3874
+ }
3875
+ for (const section of sections) {
3876
+ if (typeof section.content === "string" && section.content.trim().length < 120 && typeof section.pageStart === "number" && (!("pageEnd" in section) || section.pageEnd === section.pageStart || section.pageEnd === void 0)) {
3877
+ deterministicIssues.push({
3878
+ code: "section_short_fragment",
3879
+ severity: "warning",
3880
+ message: `Section "${String(section.title ?? "unknown")}" may be an orphan continuation fragment.`,
3881
+ extractorName: "sections",
3882
+ pageNumber: typeof section.pageStart === "number" ? section.pageStart : void 0,
3883
+ itemName: typeof section.title === "string" ? section.title : void 0
3884
+ });
3885
+ }
3886
+ }
3887
+ const formInventory = [...inventory.values()].sort((a, b) => a.formNumber.localeCompare(b.formNumber));
3888
+ const rounds = reviewRounds.map((round) => ({
3889
+ round: round.round,
3890
+ kind: "llm_review",
3891
+ status: round.complete && round.qualityIssues.length === 0 ? "passed" : "warning",
3892
+ summary: round.qualityIssues[0] ?? (round.complete ? "Review passed." : "Review requested follow-up extraction.")
3893
+ }));
3894
+ const artifacts = [
3895
+ { kind: "form_inventory", label: "Form Inventory", itemCount: formInventory.length },
3896
+ { kind: "page_map", label: "Page Map", itemCount: params.pageAssignments.length }
3897
+ ];
3898
+ const qualityGateStatus = evaluateQualityGate({
3899
+ issues: deterministicIssues,
3900
+ hasRoundWarnings: reviewRounds.some((round) => round.qualityIssues.length > 0 || !round.complete)
3901
+ });
3902
+ return {
3903
+ issues: deterministicIssues,
3904
+ rounds,
3905
+ artifacts,
3906
+ reviewRoundRecords: reviewRounds,
3907
+ formInventory,
3908
+ qualityGateStatus
3909
+ };
3910
+ }
3911
+ function toReviewRoundRecord(round, review) {
3912
+ return {
3913
+ round,
3914
+ complete: review.complete,
3915
+ missingFields: review.missingFields,
3916
+ qualityIssues: review.qualityIssues ?? [],
3917
+ additionalTasks: review.additionalTasks
3918
+ };
3919
+ }
3920
+
3318
3921
  // src/extraction/coordinator.ts
3319
3922
  function createExtractor(config) {
3320
3923
  const {
@@ -3327,21 +3930,174 @@ function createExtractor(config) {
3327
3930
  onProgress,
3328
3931
  log,
3329
3932
  providerOptions,
3933
+ qualityGate = "warn",
3330
3934
  onCheckpointSave
3331
3935
  } = config;
3332
3936
  const limit = pLimit(concurrency);
3333
3937
  let totalUsage = { inputTokens: 0, outputTokens: 0 };
3938
+ let modelCalls = 0;
3939
+ let callsWithUsage = 0;
3940
+ let callsMissingUsage = 0;
3334
3941
  function trackUsage(usage) {
3942
+ modelCalls += 1;
3335
3943
  if (usage) {
3944
+ callsWithUsage += 1;
3336
3945
  totalUsage.inputTokens += usage.inputTokens;
3337
3946
  totalUsage.outputTokens += usage.outputTokens;
3338
3947
  onTokenUsage?.(usage);
3948
+ } else {
3949
+ callsMissingUsage += 1;
3950
+ }
3951
+ }
3952
+ function mergeMemoryResult(name, data, memory) {
3953
+ const existing = memory.get(name);
3954
+ memory.set(name, mergeExtractorResult(name, existing, data));
3955
+ }
3956
+ function summarizeExtraction(memory) {
3957
+ const coverageResult = memory.get("coverage_limits");
3958
+ const declarationResult = memory.get("declarations");
3959
+ const endorsementResult = memory.get("endorsements");
3960
+ const exclusionResult = memory.get("exclusions");
3961
+ const conditionResult = memory.get("conditions");
3962
+ const sectionResult = memory.get("sections");
3963
+ const coverageSummary = Array.isArray(coverageResult?.coverages) ? coverageResult.coverages.slice(0, 12).map((coverage) => ({
3964
+ name: coverage.name,
3965
+ limit: coverage.limit,
3966
+ deductible: coverage.deductible,
3967
+ formNumber: coverage.formNumber
3968
+ })) : [];
3969
+ return JSON.stringify({
3970
+ extractedKeys: [...memory.keys()].filter((key) => key !== "classify"),
3971
+ declarationFieldCount: Array.isArray(declarationResult?.fields) ? declarationResult.fields.length : 0,
3972
+ coverageCount: Array.isArray(coverageResult?.coverages) ? coverageResult.coverages.length : 0,
3973
+ coverageSamples: coverageSummary,
3974
+ endorsementCount: Array.isArray(endorsementResult?.endorsements) ? endorsementResult.endorsements.length : 0,
3975
+ exclusionCount: Array.isArray(exclusionResult?.exclusions) ? exclusionResult.exclusions.length : 0,
3976
+ conditionCount: Array.isArray(conditionResult?.conditions) ? conditionResult.conditions.length : 0,
3977
+ sectionCount: Array.isArray(sectionResult?.sections) ? sectionResult.sections.length : 0
3978
+ }, null, 2);
3979
+ }
3980
+ function formatPageMapSummary(pageAssignments) {
3981
+ const extractorPages = /* @__PURE__ */ new Map();
3982
+ for (const assignment of pageAssignments) {
3983
+ for (const extractorName of assignment.extractorNames) {
3984
+ extractorPages.set(extractorName, [...extractorPages.get(extractorName) ?? [], assignment.localPageNumber]);
3985
+ }
3339
3986
  }
3987
+ if (extractorPages.size === 0) return "No page assignments available.";
3988
+ return [...extractorPages.entries()].map(([extractorName, pages]) => `${extractorName}: pages ${pages.join(", ")}`).join("\n");
3989
+ }
3990
+ function normalizePageAssignments(pageAssignments, formInventory) {
3991
+ const pageFormTypes = /* @__PURE__ */ new Map();
3992
+ if (formInventory) {
3993
+ for (const form of formInventory.forms) {
3994
+ if (form.pageStart != null) {
3995
+ const end = form.pageEnd ?? form.pageStart;
3996
+ for (let p = form.pageStart; p <= end; p++) {
3997
+ const types = pageFormTypes.get(p) ?? /* @__PURE__ */ new Set();
3998
+ types.add(form.formType);
3999
+ pageFormTypes.set(p, types);
4000
+ }
4001
+ }
4002
+ }
4003
+ }
4004
+ return pageAssignments.map((assignment) => {
4005
+ let extractorNames = [...new Set(
4006
+ (assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"]).filter(Boolean)
4007
+ )];
4008
+ const hasDeclarations = extractorNames.includes("declarations");
4009
+ const hasConditions = extractorNames.includes("conditions");
4010
+ const hasExclusions = extractorNames.includes("exclusions");
4011
+ const hasEndorsements = extractorNames.includes("endorsements");
4012
+ const looksLikeScheduleValues = assignment.hasScheduleValues === true;
4013
+ const roleBlocksCoverageLimits = assignment.pageRole === "policy_form" || assignment.pageRole === "condition_exclusion_form" || assignment.pageRole === "endorsement_form";
4014
+ const inventoryTypes = pageFormTypes.get(assignment.localPageNumber);
4015
+ const inventoryBlocksCoverageLimits = inventoryTypes != null && !looksLikeScheduleValues && !hasDeclarations && (inventoryTypes.has("endorsement") || inventoryTypes.has("notice") || inventoryTypes.has("application"));
4016
+ if (extractorNames.includes("coverage_limits")) {
4017
+ const shouldDropCoverageLimits = inventoryBlocksCoverageLimits || !looksLikeScheduleValues && roleBlocksCoverageLimits || !hasDeclarations && !looksLikeScheduleValues && (hasConditions || hasExclusions) || !hasDeclarations && !looksLikeScheduleValues && hasEndorsements;
4018
+ if (shouldDropCoverageLimits) {
4019
+ extractorNames = extractorNames.filter((name) => name !== "coverage_limits");
4020
+ }
4021
+ }
4022
+ if (inventoryTypes?.has("endorsement") && !extractorNames.includes("endorsements")) {
4023
+ extractorNames = [...extractorNames, "endorsements"];
4024
+ }
4025
+ if (extractorNames.length === 0) {
4026
+ extractorNames = ["sections"];
4027
+ }
4028
+ return {
4029
+ ...assignment,
4030
+ extractorNames
4031
+ };
4032
+ });
4033
+ }
4034
+ function buildTemplateHints(primaryType, documentType, pageCount, template) {
4035
+ return [
4036
+ `Document type: ${primaryType} ${documentType}`,
4037
+ `Expected sections: ${template.expectedSections.join(", ")}`,
4038
+ `Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
4039
+ `Total pages: ${pageCount}`
4040
+ ].join("\n");
4041
+ }
4042
+ function groupContiguousPages(pages) {
4043
+ if (pages.length === 0) return [];
4044
+ const sorted = [...new Set(pages)].sort((a, b) => a - b);
4045
+ const ranges = [];
4046
+ let start = sorted[0];
4047
+ let previous = sorted[0];
4048
+ for (let i = 1; i < sorted.length; i += 1) {
4049
+ const current = sorted[i];
4050
+ if (current === previous + 1) {
4051
+ previous = current;
4052
+ continue;
4053
+ }
4054
+ ranges.push({ startPage: start, endPage: previous });
4055
+ start = current;
4056
+ previous = current;
4057
+ }
4058
+ ranges.push({ startPage: start, endPage: previous });
4059
+ return ranges;
4060
+ }
4061
+ function buildPlanFromPageAssignments(pageAssignments, pageCount) {
4062
+ const extractorPages = /* @__PURE__ */ new Map();
4063
+ for (const assignment of pageAssignments) {
4064
+ const extractors = assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"];
4065
+ for (const extractorName of extractors) {
4066
+ extractorPages.set(extractorName, [...extractorPages.get(extractorName) ?? [], assignment.localPageNumber]);
4067
+ }
4068
+ }
4069
+ const coveredPages = /* @__PURE__ */ new Set();
4070
+ for (const pages of extractorPages.values()) {
4071
+ for (const page of pages) coveredPages.add(page);
4072
+ }
4073
+ for (let page = 1; page <= pageCount; page += 1) {
4074
+ if (!coveredPages.has(page)) {
4075
+ extractorPages.set("sections", [...extractorPages.get("sections") ?? [], page]);
4076
+ }
4077
+ }
4078
+ const tasks = [...extractorPages.entries()].flatMap(
4079
+ ([extractorName, pages]) => groupContiguousPages(pages).map(({ startPage, endPage }) => ({
4080
+ extractorName,
4081
+ startPage,
4082
+ endPage,
4083
+ description: `Page-mapped ${extractorName} extraction for pages ${startPage}-${endPage}`
4084
+ }))
4085
+ ).sort((a, b) => a.startPage - b.startPage || a.extractorName.localeCompare(b.extractorName));
4086
+ return {
4087
+ tasks,
4088
+ pageMap: [...extractorPages.entries()].map(([section, pages]) => ({
4089
+ section,
4090
+ pages: `pages ${[...new Set(pages)].sort((a, b) => a - b).join(", ")}`
4091
+ }))
4092
+ };
3340
4093
  }
3341
4094
  async function extract(pdfBase64, documentId, options) {
3342
4095
  const id = documentId ?? `doc-${Date.now()}`;
3343
4096
  const memory = /* @__PURE__ */ new Map();
3344
4097
  totalUsage = { inputTokens: 0, outputTokens: 0 };
4098
+ modelCalls = 0;
4099
+ callsWithUsage = 0;
4100
+ callsMissingUsage = 0;
3345
4101
  const pipelineCtx = createPipelineContext({
3346
4102
  id,
3347
4103
  onSave: onCheckpointSave,
@@ -3392,40 +4148,109 @@ function createExtractor(config) {
3392
4148
  const primaryType = policyTypes[0] ?? "other";
3393
4149
  const template = getTemplate(primaryType);
3394
4150
  const pageCount = resumed?.pageCount ?? await getPdfPageCount(pdfBase64);
3395
- let plan;
3396
- if (resumed?.plan && pipelineCtx.isPhaseComplete("plan")) {
3397
- plan = resumed.plan;
3398
- onProgress?.("Resuming from checkpoint (plan complete)...");
4151
+ const templateHints = buildTemplateHints(primaryType, documentType, pageCount, template);
4152
+ let formInventory;
4153
+ if (resumed?.formInventory && pipelineCtx.isPhaseComplete("form_inventory")) {
4154
+ formInventory = resumed.formInventory;
4155
+ memory.set("form_inventory", formInventory);
4156
+ onProgress?.("Resuming from checkpoint (form inventory complete)...");
3399
4157
  } else {
3400
- onProgress?.(`Planning extraction for ${primaryType} ${documentType}...`);
3401
- const templateHints = [
3402
- `Document type: ${primaryType} ${documentType}`,
3403
- `Expected sections: ${template.expectedSections.join(", ")}`,
3404
- `Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
3405
- `Total pages: ${pageCount}`
3406
- ].join("\n");
3407
- const planResponse = await safeGenerateObject(
4158
+ onProgress?.(`Building form inventory for ${primaryType} ${documentType}...`);
4159
+ const formInventoryResponse = await safeGenerateObject(
3408
4160
  generateObject,
3409
4161
  {
3410
- prompt: buildPlanPrompt(templateHints),
3411
- schema: ExtractionPlanSchema,
4162
+ prompt: buildFormInventoryPrompt(templateHints),
4163
+ schema: FormInventorySchema,
3412
4164
  maxTokens: 2048,
3413
4165
  providerOptions: { ...providerOptions, pdfBase64 }
3414
4166
  },
3415
4167
  {
3416
- fallback: {
3417
- tasks: [{ extractorName: "sections", startPage: 1, endPage: pageCount, description: "Full document fallback extraction" }]
3418
- },
4168
+ fallback: { forms: [] },
3419
4169
  log,
3420
- onError: (err, attempt) => log?.(`Plan attempt ${attempt + 1} failed: ${err}`)
4170
+ onError: (err, attempt) => log?.(`Form inventory attempt ${attempt + 1} failed: ${err instanceof Error ? err.message : String(err)}`)
3421
4171
  }
3422
4172
  );
3423
- trackUsage(planResponse.usage);
3424
- plan = planResponse.object;
4173
+ trackUsage(formInventoryResponse.usage);
4174
+ formInventory = formInventoryResponse.object;
4175
+ memory.set("form_inventory", formInventory);
4176
+ await pipelineCtx.save("form_inventory", {
4177
+ id,
4178
+ pageCount,
4179
+ classifyResult,
4180
+ formInventory,
4181
+ memory: Object.fromEntries(memory)
4182
+ });
4183
+ }
4184
+ let pageAssignments;
4185
+ if (resumed?.pageAssignments && pipelineCtx.isPhaseComplete("page_map")) {
4186
+ pageAssignments = resumed.pageAssignments;
4187
+ onProgress?.("Resuming from checkpoint (page map complete)...");
4188
+ } else {
4189
+ onProgress?.(`Mapping document pages for ${primaryType} ${documentType}...`);
4190
+ const chunkSize = 8;
4191
+ const collectedAssignments = [];
4192
+ const formInventoryHint = formInventory?.forms.length ? formatFormInventoryForPageMap(formInventory.forms) : void 0;
4193
+ for (let startPage = 1; startPage <= pageCount; startPage += chunkSize) {
4194
+ const endPage = Math.min(pageCount, startPage + chunkSize - 1);
4195
+ const pagesPdf = await extractPageRange(pdfBase64, startPage, endPage);
4196
+ const mapResponse = await safeGenerateObject(
4197
+ generateObject,
4198
+ {
4199
+ prompt: buildPageMapPrompt(templateHints, startPage, endPage, formInventoryHint),
4200
+ schema: PageMapChunkSchema,
4201
+ maxTokens: 2048,
4202
+ providerOptions: { ...providerOptions, pdfBase64: pagesPdf }
4203
+ },
4204
+ {
4205
+ fallback: {
4206
+ pages: Array.from({ length: endPage - startPage + 1 }, (_, index) => ({
4207
+ localPageNumber: index + 1,
4208
+ extractorNames: index === 0 && startPage === 1 ? ["carrier_info", "named_insured", "declarations", "coverage_limits"] : ["sections"],
4209
+ confidence: 0,
4210
+ notes: "Fallback page assignment"
4211
+ }))
4212
+ },
4213
+ log,
4214
+ onError: (err, attempt) => log?.(`Page map attempt ${attempt + 1} failed for pages ${startPage}-${endPage}: ${err}`)
4215
+ }
4216
+ );
4217
+ trackUsage(mapResponse.usage);
4218
+ for (const assignment of mapResponse.object.pages) {
4219
+ collectedAssignments.push({
4220
+ ...assignment,
4221
+ localPageNumber: startPage + assignment.localPageNumber - 1
4222
+ });
4223
+ }
4224
+ }
4225
+ pageAssignments = collectedAssignments.length > 0 ? collectedAssignments : Array.from({ length: pageCount }, (_, index) => ({
4226
+ localPageNumber: index + 1,
4227
+ extractorNames: index === 0 ? ["carrier_info", "named_insured", "declarations", "coverage_limits"] : ["sections"],
4228
+ confidence: 0,
4229
+ notes: "Full-document fallback page assignment"
4230
+ }));
4231
+ pageAssignments = normalizePageAssignments(pageAssignments, formInventory);
4232
+ await pipelineCtx.save("page_map", {
4233
+ id,
4234
+ pageCount,
4235
+ classifyResult,
4236
+ formInventory,
4237
+ pageAssignments,
4238
+ memory: Object.fromEntries(memory)
4239
+ });
4240
+ }
4241
+ let plan;
4242
+ if (resumed?.plan && pipelineCtx.isPhaseComplete("plan")) {
4243
+ plan = resumed.plan;
4244
+ onProgress?.("Resuming from checkpoint (plan complete)...");
4245
+ } else {
4246
+ onProgress?.(`Building extraction plan from page map for ${primaryType} ${documentType}...`);
4247
+ plan = buildPlanFromPageAssignments(pageAssignments, pageCount);
3425
4248
  await pipelineCtx.save("plan", {
3426
4249
  id,
3427
4250
  pageCount,
3428
4251
  classifyResult,
4252
+ formInventory,
4253
+ pageAssignments,
3429
4254
  plan,
3430
4255
  memory: Object.fromEntries(memory)
3431
4256
  });
@@ -3466,35 +4291,46 @@ function createExtractor(config) {
3466
4291
  );
3467
4292
  for (const result of extractorResults) {
3468
4293
  if (result) {
3469
- memory.set(result.name, result.data);
4294
+ mergeMemoryResult(result.name, result.data, memory);
3470
4295
  }
3471
4296
  }
3472
4297
  await pipelineCtx.save("extract", {
3473
4298
  id,
3474
4299
  pageCount,
3475
4300
  classifyResult,
4301
+ formInventory,
4302
+ pageAssignments,
3476
4303
  plan,
3477
4304
  memory: Object.fromEntries(memory)
3478
4305
  });
3479
4306
  }
4307
+ let reviewRounds = resumed?.reviewReport?.reviewRoundRecords ?? [];
4308
+ let reviewReport = resumed?.reviewReport;
3480
4309
  if (!pipelineCtx.isPhaseComplete("review")) {
4310
+ reviewRounds = [];
3481
4311
  for (let round = 0; round < maxReviewRounds; round++) {
3482
4312
  const extractedKeys = [...memory.keys()].filter((k) => k !== "classify");
4313
+ const extractionSummary = summarizeExtraction(memory);
4314
+ const pageMapSummary = formatPageMapSummary(pageAssignments);
3483
4315
  const reviewResponse = await safeGenerateObject(
3484
4316
  generateObject,
3485
4317
  {
3486
- prompt: buildReviewPrompt(template.required, extractedKeys),
4318
+ prompt: buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary),
3487
4319
  schema: ReviewResultSchema,
3488
- maxTokens: 1024,
3489
- providerOptions
4320
+ maxTokens: 1536,
4321
+ providerOptions: { ...providerOptions, pdfBase64 }
3490
4322
  },
3491
4323
  {
3492
- fallback: { complete: true, missingFields: [], additionalTasks: [] },
4324
+ fallback: { complete: true, missingFields: [], qualityIssues: [], additionalTasks: [] },
3493
4325
  log,
3494
4326
  onError: (err, attempt) => log?.(`Review round ${round + 1} attempt ${attempt + 1} failed: ${err}`)
3495
4327
  }
3496
4328
  );
3497
4329
  trackUsage(reviewResponse.usage);
4330
+ reviewRounds.push(toReviewRoundRecord(round + 1, reviewResponse.object));
4331
+ if (reviewResponse.object.qualityIssues?.length) {
4332
+ await log?.(`Review round ${round + 1} quality issues: ${reviewResponse.object.qualityIssues.join("; ")}`);
4333
+ }
3498
4334
  if (reviewResponse.object.complete || reviewResponse.object.additionalTasks.length === 0) {
3499
4335
  onProgress?.("Extraction complete.");
3500
4336
  break;
@@ -3529,25 +4365,49 @@ function createExtractor(config) {
3529
4365
  );
3530
4366
  for (const result of followUpResults) {
3531
4367
  if (result) {
3532
- memory.set(result.name, result.data);
4368
+ mergeMemoryResult(result.name, result.data, memory);
3533
4369
  }
3534
4370
  }
3535
4371
  }
4372
+ reviewReport = buildExtractionReviewReport({
4373
+ memory,
4374
+ pageAssignments,
4375
+ reviewRounds
4376
+ });
4377
+ if (reviewReport.issues.length > 0) {
4378
+ await log?.(
4379
+ `Deterministic review issues: ${reviewReport.issues.map((issue) => issue.message).join("; ")}`
4380
+ );
4381
+ }
4382
+ if (shouldFailQualityGate(qualityGate, reviewReport.qualityGateStatus)) {
4383
+ throw new Error("Extraction quality gate failed. See reviewReport for blocking issues.");
4384
+ }
3536
4385
  await pipelineCtx.save("review", {
3537
4386
  id,
3538
4387
  pageCount,
3539
4388
  classifyResult,
4389
+ formInventory,
4390
+ pageAssignments,
3540
4391
  plan,
4392
+ reviewReport,
3541
4393
  memory: Object.fromEntries(memory)
3542
4394
  });
3543
4395
  }
4396
+ reviewReport ?? (reviewReport = buildExtractionReviewReport({
4397
+ memory,
4398
+ pageAssignments,
4399
+ reviewRounds
4400
+ }));
3544
4401
  onProgress?.("Assembling document...");
3545
4402
  const document = assembleDocument(id, documentType, memory);
3546
4403
  await pipelineCtx.save("assemble", {
3547
4404
  id,
3548
4405
  pageCount,
3549
4406
  classifyResult,
4407
+ formInventory,
4408
+ pageAssignments,
3550
4409
  plan,
4410
+ reviewReport,
3551
4411
  memory: Object.fromEntries(memory),
3552
4412
  document
3553
4413
  });
@@ -3560,11 +4420,21 @@ function createExtractor(config) {
3560
4420
  trackUsage(formatResult.usage);
3561
4421
  const chunks = chunkDocument(formatResult.document);
3562
4422
  const finalCheckpoint = pipelineCtx.getCheckpoint();
4423
+ if (callsMissingUsage > 0) {
4424
+ await log?.(`Token usage was unavailable for ${callsMissingUsage}/${modelCalls} model calls. Check that your provider callbacks return usage.`);
4425
+ onProgress?.(`Token usage unavailable for ${callsMissingUsage}/${modelCalls} model calls.`);
4426
+ }
3563
4427
  return {
3564
4428
  document: formatResult.document,
3565
4429
  chunks,
3566
4430
  tokenUsage: totalUsage,
3567
- checkpoint: finalCheckpoint
4431
+ usageReporting: {
4432
+ modelCalls,
4433
+ callsWithUsage,
4434
+ callsMissingUsage
4435
+ },
4436
+ checkpoint: finalCheckpoint,
4437
+ reviewReport
3568
4438
  };
3569
4439
  }
3570
4440
  return { extract };
@@ -3784,8 +4654,8 @@ Respond with JSON only:
3784
4654
  }`;
3785
4655
 
3786
4656
  // src/schemas/application.ts
3787
- import { z as z32 } from "zod";
3788
- var FieldTypeSchema = z32.enum([
4657
+ import { z as z33 } from "zod";
4658
+ var FieldTypeSchema = z33.enum([
3789
4659
  "text",
3790
4660
  "numeric",
3791
4661
  "currency",
@@ -3794,100 +4664,131 @@ var FieldTypeSchema = z32.enum([
3794
4664
  "table",
3795
4665
  "declaration"
3796
4666
  ]);
3797
- var ApplicationFieldSchema = z32.object({
3798
- id: z32.string(),
3799
- label: z32.string(),
3800
- section: z32.string(),
4667
+ var ApplicationFieldSchema = z33.object({
4668
+ id: z33.string(),
4669
+ label: z33.string(),
4670
+ section: z33.string(),
3801
4671
  fieldType: FieldTypeSchema,
3802
- required: z32.boolean(),
3803
- options: z32.array(z32.string()).optional(),
3804
- columns: z32.array(z32.string()).optional(),
3805
- requiresExplanationIfYes: z32.boolean().optional(),
3806
- condition: z32.object({
3807
- dependsOn: z32.string(),
3808
- whenValue: z32.string()
4672
+ required: z33.boolean(),
4673
+ options: z33.array(z33.string()).optional(),
4674
+ columns: z33.array(z33.string()).optional(),
4675
+ requiresExplanationIfYes: z33.boolean().optional(),
4676
+ condition: z33.object({
4677
+ dependsOn: z33.string(),
4678
+ whenValue: z33.string()
3809
4679
  }).optional(),
3810
- value: z32.string().optional(),
3811
- source: z32.string().optional().describe("Where the value came from: auto-fill, user, lookup"),
3812
- confidence: z32.enum(["confirmed", "high", "medium", "low"]).optional()
4680
+ value: z33.string().optional(),
4681
+ source: z33.string().optional().describe("Where the value came from: auto-fill, user, lookup"),
4682
+ confidence: z33.enum(["confirmed", "high", "medium", "low"]).optional()
4683
+ });
4684
+ var ApplicationClassifyResultSchema = z33.object({
4685
+ isApplication: z33.boolean(),
4686
+ confidence: z33.number().min(0).max(1),
4687
+ applicationType: z33.string().nullable()
4688
+ });
4689
+ var FieldExtractionResultSchema = z33.object({
4690
+ fields: z33.array(ApplicationFieldSchema)
4691
+ });
4692
+ var AutoFillMatchSchema = z33.object({
4693
+ fieldId: z33.string(),
4694
+ value: z33.string(),
4695
+ confidence: z33.enum(["confirmed"]),
4696
+ contextKey: z33.string()
3813
4697
  });
3814
- var ApplicationClassifyResultSchema = z32.object({
3815
- isApplication: z32.boolean(),
3816
- confidence: z32.number().min(0).max(1),
3817
- applicationType: z32.string().nullable()
4698
+ var AutoFillResultSchema = z33.object({
4699
+ matches: z33.array(AutoFillMatchSchema)
3818
4700
  });
3819
- var FieldExtractionResultSchema = z32.object({
3820
- fields: z32.array(ApplicationFieldSchema)
4701
+ var QuestionBatchResultSchema = z33.object({
4702
+ batches: z33.array(z33.array(z33.string()).describe("Array of field IDs in this batch"))
3821
4703
  });
3822
- var AutoFillMatchSchema = z32.object({
3823
- fieldId: z32.string(),
3824
- value: z32.string(),
3825
- confidence: z32.enum(["confirmed"]),
3826
- contextKey: z32.string()
4704
+ var LookupRequestSchema = z33.object({
4705
+ type: z33.string().describe("Type of lookup: 'records', 'website', 'policy'"),
4706
+ description: z33.string(),
4707
+ url: z33.string().optional(),
4708
+ targetFieldIds: z33.array(z33.string())
3827
4709
  });
3828
- var AutoFillResultSchema = z32.object({
3829
- matches: z32.array(AutoFillMatchSchema)
4710
+ var ReplyIntentSchema = z33.object({
4711
+ primaryIntent: z33.enum(["answers_only", "question", "lookup_request", "mixed"]),
4712
+ hasAnswers: z33.boolean(),
4713
+ questionText: z33.string().optional(),
4714
+ questionFieldIds: z33.array(z33.string()).optional(),
4715
+ lookupRequests: z33.array(LookupRequestSchema).optional()
3830
4716
  });
3831
- var QuestionBatchResultSchema = z32.object({
3832
- batches: z32.array(z32.array(z32.string()).describe("Array of field IDs in this batch"))
4717
+ var ParsedAnswerSchema = z33.object({
4718
+ fieldId: z33.string(),
4719
+ value: z33.string(),
4720
+ explanation: z33.string().optional()
3833
4721
  });
3834
- var LookupRequestSchema = z32.object({
3835
- type: z32.string().describe("Type of lookup: 'records', 'website', 'policy'"),
3836
- description: z32.string(),
3837
- url: z32.string().optional(),
3838
- targetFieldIds: z32.array(z32.string())
4722
+ var AnswerParsingResultSchema = z33.object({
4723
+ answers: z33.array(ParsedAnswerSchema),
4724
+ unanswered: z33.array(z33.string()).describe("Field IDs that were not answered")
3839
4725
  });
3840
- var ReplyIntentSchema = z32.object({
3841
- primaryIntent: z32.enum(["answers_only", "question", "lookup_request", "mixed"]),
3842
- hasAnswers: z32.boolean(),
3843
- questionText: z32.string().optional(),
3844
- questionFieldIds: z32.array(z32.string()).optional(),
3845
- lookupRequests: z32.array(LookupRequestSchema).optional()
4726
+ var LookupFillSchema = z33.object({
4727
+ fieldId: z33.string(),
4728
+ value: z33.string(),
4729
+ source: z33.string().describe("Specific citable reference, e.g. 'GL Policy #POL-12345 (Hartford)'")
3846
4730
  });
3847
- var ParsedAnswerSchema = z32.object({
3848
- fieldId: z32.string(),
3849
- value: z32.string(),
3850
- explanation: z32.string().optional()
4731
+ var LookupFillResultSchema = z33.object({
4732
+ fills: z33.array(LookupFillSchema),
4733
+ unfillable: z33.array(z33.string()),
4734
+ explanation: z33.string().optional()
3851
4735
  });
3852
- var AnswerParsingResultSchema = z32.object({
3853
- answers: z32.array(ParsedAnswerSchema),
3854
- unanswered: z32.array(z32.string()).describe("Field IDs that were not answered")
4736
+ var FlatPdfPlacementSchema = z33.object({
4737
+ fieldId: z33.string(),
4738
+ page: z33.number(),
4739
+ x: z33.number().describe("Percentage from left edge (0-100)"),
4740
+ y: z33.number().describe("Percentage from top edge (0-100)"),
4741
+ text: z33.string(),
4742
+ fontSize: z33.number().optional(),
4743
+ isCheckmark: z33.boolean().optional()
3855
4744
  });
3856
- var LookupFillSchema = z32.object({
3857
- fieldId: z32.string(),
3858
- value: z32.string(),
3859
- source: z32.string().describe("Specific citable reference, e.g. 'GL Policy #POL-12345 (Hartford)'")
4745
+ var AcroFormMappingSchema = z33.object({
4746
+ fieldId: z33.string(),
4747
+ acroFormName: z33.string(),
4748
+ value: z33.string()
3860
4749
  });
3861
- var LookupFillResultSchema = z32.object({
3862
- fills: z32.array(LookupFillSchema),
3863
- unfillable: z32.array(z32.string()),
3864
- explanation: z32.string().optional()
4750
+ var QualityGateStatusSchema = z33.enum(["passed", "warning", "failed"]);
4751
+ var QualitySeveritySchema = z33.enum(["info", "warning", "blocking"]);
4752
+ var ApplicationQualityIssueSchema = z33.object({
4753
+ code: z33.string(),
4754
+ severity: QualitySeveritySchema,
4755
+ message: z33.string(),
4756
+ fieldId: z33.string().optional()
3865
4757
  });
3866
- var FlatPdfPlacementSchema = z32.object({
3867
- fieldId: z32.string(),
3868
- page: z32.number(),
3869
- x: z32.number().describe("Percentage from left edge (0-100)"),
3870
- y: z32.number().describe("Percentage from top edge (0-100)"),
3871
- text: z32.string(),
3872
- fontSize: z32.number().optional(),
3873
- isCheckmark: z32.boolean().optional()
4758
+ var ApplicationQualityRoundSchema = z33.object({
4759
+ round: z33.number(),
4760
+ kind: z33.string(),
4761
+ status: QualityGateStatusSchema,
4762
+ summary: z33.string().optional()
3874
4763
  });
3875
- var AcroFormMappingSchema = z32.object({
3876
- fieldId: z32.string(),
3877
- acroFormName: z32.string(),
3878
- value: z32.string()
4764
+ var ApplicationQualityArtifactSchema = z33.object({
4765
+ kind: z33.string(),
4766
+ label: z33.string().optional(),
4767
+ itemCount: z33.number().optional()
3879
4768
  });
3880
- var ApplicationStateSchema = z32.object({
3881
- id: z32.string(),
3882
- pdfBase64: z32.string().optional().describe("Original PDF, omitted after extraction"),
3883
- title: z32.string().optional(),
3884
- applicationType: z32.string().nullable().optional(),
3885
- fields: z32.array(ApplicationFieldSchema),
3886
- batches: z32.array(z32.array(z32.string())).optional(),
3887
- currentBatchIndex: z32.number().default(0),
3888
- status: z32.enum(["classifying", "extracting", "auto_filling", "batching", "collecting", "confirming", "mapping", "complete"]),
3889
- createdAt: z32.number(),
3890
- updatedAt: z32.number()
4769
+ var ApplicationEmailReviewSchema = z33.object({
4770
+ issues: z33.array(ApplicationQualityIssueSchema),
4771
+ qualityGateStatus: QualityGateStatusSchema
4772
+ });
4773
+ var ApplicationQualityReportSchema = z33.object({
4774
+ issues: z33.array(ApplicationQualityIssueSchema),
4775
+ rounds: z33.array(ApplicationQualityRoundSchema).optional(),
4776
+ artifacts: z33.array(ApplicationQualityArtifactSchema).optional(),
4777
+ emailReview: ApplicationEmailReviewSchema.optional(),
4778
+ qualityGateStatus: QualityGateStatusSchema
4779
+ });
4780
+ var ApplicationStateSchema = z33.object({
4781
+ id: z33.string(),
4782
+ pdfBase64: z33.string().optional().describe("Original PDF, omitted after extraction"),
4783
+ title: z33.string().optional(),
4784
+ applicationType: z33.string().nullable().optional(),
4785
+ fields: z33.array(ApplicationFieldSchema),
4786
+ batches: z33.array(z33.array(z33.string())).optional(),
4787
+ currentBatchIndex: z33.number().default(0),
4788
+ qualityReport: ApplicationQualityReportSchema.optional(),
4789
+ status: z33.enum(["classifying", "extracting", "auto_filling", "batching", "collecting", "confirming", "mapping", "complete"]),
4790
+ createdAt: z33.number(),
4791
+ updatedAt: z33.number()
3891
4792
  });
3892
4793
 
3893
4794
  // src/application/agents/classifier.ts
@@ -4395,6 +5296,87 @@ async function generateBatchEmail(batchFields, batchIndex, totalBatches, opts, g
4395
5296
  return { text, usage };
4396
5297
  }
4397
5298
 
5299
+ // src/application/quality.ts
5300
+ function isVagueSource(source) {
5301
+ if (!source) return true;
5302
+ const normalized = source.trim().toLowerCase();
5303
+ return normalized === "unknown" || normalized.includes("existing records") || normalized.includes("available data") || normalized === "context" || normalized === "user provided";
5304
+ }
5305
+ function buildApplicationQualityReport(state) {
5306
+ const issues = [];
5307
+ const seenIds = /* @__PURE__ */ new Set();
5308
+ for (const field of state.fields) {
5309
+ if (seenIds.has(field.id)) {
5310
+ issues.push({
5311
+ code: "duplicate_field_id",
5312
+ severity: "blocking",
5313
+ message: `Field "${field.label}" has a duplicate id "${field.id}".`,
5314
+ fieldId: field.id
5315
+ });
5316
+ }
5317
+ seenIds.add(field.id);
5318
+ if (field.required && !field.value) {
5319
+ issues.push({
5320
+ code: "required_field_unfilled",
5321
+ severity: "warning",
5322
+ message: `Required field "${field.label}" is still unfilled.`,
5323
+ fieldId: field.id
5324
+ });
5325
+ }
5326
+ if (field.value && !field.source) {
5327
+ issues.push({
5328
+ code: "filled_field_missing_source",
5329
+ severity: "blocking",
5330
+ message: `Filled field "${field.label}" is missing source provenance.`,
5331
+ fieldId: field.id
5332
+ });
5333
+ }
5334
+ if (field.value && isVagueSource(field.source)) {
5335
+ issues.push({
5336
+ code: "filled_field_vague_source",
5337
+ severity: "warning",
5338
+ message: `Filled field "${field.label}" has a vague or non-citable source.`,
5339
+ fieldId: field.id
5340
+ });
5341
+ }
5342
+ if (field.value && (!field.confidence || field.confidence === "low")) {
5343
+ issues.push({
5344
+ code: "filled_field_low_confidence",
5345
+ severity: "warning",
5346
+ message: `Filled field "${field.label}" has low or missing confidence.`,
5347
+ fieldId: field.id
5348
+ });
5349
+ }
5350
+ }
5351
+ return {
5352
+ issues,
5353
+ rounds: [],
5354
+ artifacts: [
5355
+ { kind: "application_fields", label: "Application Fields", itemCount: state.fields.length }
5356
+ ],
5357
+ qualityGateStatus: evaluateQualityGate({ issues })
5358
+ };
5359
+ }
5360
+ function reviewBatchEmail(text, batchFields) {
5361
+ const issues = [];
5362
+ const normalized = text.toLowerCase();
5363
+ for (const field of batchFields) {
5364
+ const label = field.label.trim().toLowerCase();
5365
+ if (label.length >= 6 && !normalized.includes(label)) {
5366
+ issues.push({
5367
+ code: "email_missing_field_prompt",
5368
+ severity: "warning",
5369
+ message: `Generated email does not clearly mention field "${field.label}".`,
5370
+ fieldId: field.id
5371
+ });
5372
+ }
5373
+ }
5374
+ return {
5375
+ issues,
5376
+ qualityGateStatus: evaluateQualityGate({ issues })
5377
+ };
5378
+ }
5379
+
4398
5380
  // src/application/coordinator.ts
4399
5381
  function createApplicationPipeline(config) {
4400
5382
  const {
@@ -4409,7 +5391,8 @@ function createApplicationPipeline(config) {
4409
5391
  onTokenUsage,
4410
5392
  onProgress,
4411
5393
  log,
4412
- providerOptions
5394
+ providerOptions,
5395
+ qualityGate = "warn"
4413
5396
  } = config;
4414
5397
  const limit = pLimit(concurrency);
4415
5398
  let totalUsage = { inputTokens: 0, outputTokens: 0 };
@@ -4431,6 +5414,7 @@ function createApplicationPipeline(config) {
4431
5414
  title: void 0,
4432
5415
  applicationType: null,
4433
5416
  fields: [],
5417
+ qualityReport: void 0,
4434
5418
  batches: void 0,
4435
5419
  currentBatchIndex: 0,
4436
5420
  status: "classifying",
@@ -4455,8 +5439,9 @@ function createApplicationPipeline(config) {
4455
5439
  if (!classifyResult.isApplication) {
4456
5440
  state.status = "complete";
4457
5441
  state.updatedAt = Date.now();
5442
+ state.qualityReport = buildApplicationQualityReport(state);
4458
5443
  await applicationStore?.save(state);
4459
- return { state, tokenUsage: totalUsage };
5444
+ return { state, tokenUsage: totalUsage, reviewReport: state.qualityReport };
4460
5445
  }
4461
5446
  state.applicationType = classifyResult.applicationType;
4462
5447
  state.status = "extracting";
@@ -4480,8 +5465,9 @@ function createApplicationPipeline(config) {
4480
5465
  await log?.("No fields extracted, completing pipeline with empty result");
4481
5466
  state.status = "complete";
4482
5467
  state.updatedAt = Date.now();
5468
+ state.qualityReport = buildApplicationQualityReport(state);
4483
5469
  await applicationStore?.save(state);
4484
- return { state, tokenUsage: totalUsage };
5470
+ return { state, tokenUsage: totalUsage, reviewReport: state.qualityReport };
4485
5471
  }
4486
5472
  state.fields = fields;
4487
5473
  state.title = classifyResult.applicationType ?? void 0;
@@ -4581,11 +5567,15 @@ function createApplicationPipeline(config) {
4581
5567
  } else {
4582
5568
  state.status = "confirming";
4583
5569
  }
5570
+ state.qualityReport = buildApplicationQualityReport(state);
4584
5571
  state.updatedAt = Date.now();
4585
5572
  await applicationStore?.save(state);
5573
+ if (shouldFailQualityGate(qualityGate, state.qualityReport.qualityGateStatus)) {
5574
+ throw new Error("Application quality gate failed. See state.qualityReport for blocking issues.");
5575
+ }
4586
5576
  const filledCount = state.fields.filter((f) => f.value).length;
4587
5577
  onProgress?.(`Application processed: ${filledCount}/${state.fields.length} fields filled, ${state.batches?.length ?? 0} batches to collect.`);
4588
- return { state, tokenUsage: totalUsage };
5578
+ return { state, tokenUsage: totalUsage, reviewReport: state.qualityReport };
4589
5579
  }
4590
5580
  async function processReply(input) {
4591
5581
  totalUsage = { inputTokens: 0, outputTokens: 0 };
@@ -4732,6 +5722,11 @@ Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with
4732
5722
  providerOptions
4733
5723
  );
4734
5724
  trackUsage(emailUsage);
5725
+ const emailReview = reviewBatchEmail(emailText, nextBatchFields);
5726
+ state.qualityReport = {
5727
+ ...buildApplicationQualityReport(state),
5728
+ emailReview
5729
+ };
4735
5730
  if (!responseText) {
4736
5731
  responseText = emailText;
4737
5732
  } else {
@@ -4747,13 +5742,18 @@ ${emailText}`;
4747
5742
  }
4748
5743
  }
4749
5744
  state.updatedAt = Date.now();
5745
+ state.qualityReport = state.qualityReport ?? buildApplicationQualityReport(state);
4750
5746
  await applicationStore?.save(state);
5747
+ if (shouldFailQualityGate(qualityGate, state.qualityReport.qualityGateStatus)) {
5748
+ throw new Error("Application quality gate failed. See state.qualityReport for blocking issues.");
5749
+ }
4751
5750
  return {
4752
5751
  state,
4753
5752
  intent: intent.primaryIntent,
4754
5753
  fieldsFilled,
4755
5754
  responseText,
4756
- tokenUsage: totalUsage
5755
+ tokenUsage: totalUsage,
5756
+ reviewReport: state.qualityReport
4757
5757
  };
4758
5758
  }
4759
5759
  async function generateCurrentBatchEmail(applicationId, opts) {
@@ -4779,6 +5779,12 @@ ${emailText}`;
4779
5779
  providerOptions
4780
5780
  );
4781
5781
  trackUsage(usage);
5782
+ const emailReview = reviewBatchEmail(text, batchFields);
5783
+ state.qualityReport = {
5784
+ ...buildApplicationQualityReport(state),
5785
+ emailReview
5786
+ };
5787
+ await applicationStore?.save(state);
4782
5788
  return { text, tokenUsage: totalUsage };
4783
5789
  }
4784
5790
  async function getConfirmationSummary(applicationId) {
@@ -4915,73 +5921,73 @@ Respond with the final answer, deduplicated citations array, overall confidence
4915
5921
  }
4916
5922
 
4917
5923
  // src/schemas/query.ts
4918
- import { z as z33 } from "zod";
4919
- var QueryIntentSchema = z33.enum([
5924
+ import { z as z34 } from "zod";
5925
+ var QueryIntentSchema = z34.enum([
4920
5926
  "policy_question",
4921
5927
  "coverage_comparison",
4922
5928
  "document_search",
4923
5929
  "claims_inquiry",
4924
5930
  "general_knowledge"
4925
5931
  ]);
4926
- var SubQuestionSchema = z33.object({
4927
- question: z33.string().describe("Atomic sub-question to retrieve and answer independently"),
5932
+ var SubQuestionSchema = z34.object({
5933
+ question: z34.string().describe("Atomic sub-question to retrieve and answer independently"),
4928
5934
  intent: QueryIntentSchema,
4929
- chunkTypes: z33.array(z33.string()).optional().describe("Chunk types to filter retrieval (e.g. coverage, endorsement, declaration)"),
4930
- documentFilters: z33.object({
4931
- type: z33.enum(["policy", "quote"]).optional(),
4932
- carrier: z33.string().optional(),
4933
- insuredName: z33.string().optional(),
4934
- policyNumber: z33.string().optional(),
4935
- quoteNumber: z33.string().optional()
5935
+ chunkTypes: z34.array(z34.string()).optional().describe("Chunk types to filter retrieval (e.g. coverage, endorsement, declaration)"),
5936
+ documentFilters: z34.object({
5937
+ type: z34.enum(["policy", "quote"]).optional(),
5938
+ carrier: z34.string().optional(),
5939
+ insuredName: z34.string().optional(),
5940
+ policyNumber: z34.string().optional(),
5941
+ quoteNumber: z34.string().optional()
4936
5942
  }).optional().describe("Structured filters to narrow document lookup")
4937
5943
  });
4938
- var QueryClassifyResultSchema = z33.object({
5944
+ var QueryClassifyResultSchema = z34.object({
4939
5945
  intent: QueryIntentSchema,
4940
- subQuestions: z33.array(SubQuestionSchema).min(1).describe("Decomposed atomic sub-questions"),
4941
- requiresDocumentLookup: z33.boolean().describe("Whether structured document lookup is needed"),
4942
- requiresChunkSearch: z33.boolean().describe("Whether semantic chunk search is needed"),
4943
- requiresConversationHistory: z33.boolean().describe("Whether conversation history is relevant")
5946
+ subQuestions: z34.array(SubQuestionSchema).min(1).describe("Decomposed atomic sub-questions"),
5947
+ requiresDocumentLookup: z34.boolean().describe("Whether structured document lookup is needed"),
5948
+ requiresChunkSearch: z34.boolean().describe("Whether semantic chunk search is needed"),
5949
+ requiresConversationHistory: z34.boolean().describe("Whether conversation history is relevant")
4944
5950
  });
4945
- var EvidenceItemSchema = z33.object({
4946
- source: z33.enum(["chunk", "document", "conversation"]),
4947
- chunkId: z33.string().optional(),
4948
- documentId: z33.string().optional(),
4949
- turnId: z33.string().optional(),
4950
- text: z33.string().describe("Text excerpt from the source"),
4951
- relevance: z33.number().min(0).max(1),
4952
- metadata: z33.array(z33.object({ key: z33.string(), value: z33.string() })).optional()
5951
+ var EvidenceItemSchema = z34.object({
5952
+ source: z34.enum(["chunk", "document", "conversation"]),
5953
+ chunkId: z34.string().optional(),
5954
+ documentId: z34.string().optional(),
5955
+ turnId: z34.string().optional(),
5956
+ text: z34.string().describe("Text excerpt from the source"),
5957
+ relevance: z34.number().min(0).max(1),
5958
+ metadata: z34.array(z34.object({ key: z34.string(), value: z34.string() })).optional()
4953
5959
  });
4954
- var RetrievalResultSchema = z33.object({
4955
- subQuestion: z33.string(),
4956
- evidence: z33.array(EvidenceItemSchema)
5960
+ var RetrievalResultSchema = z34.object({
5961
+ subQuestion: z34.string(),
5962
+ evidence: z34.array(EvidenceItemSchema)
4957
5963
  });
4958
- var CitationSchema = z33.object({
4959
- index: z33.number().describe("Citation number [1], [2], etc."),
4960
- chunkId: z33.string().describe("Source chunk ID, e.g. doc-123:coverage:2"),
4961
- documentId: z33.string(),
4962
- documentType: z33.enum(["policy", "quote"]).optional(),
4963
- field: z33.string().optional().describe("Specific field path, e.g. coverages[0].deductible"),
4964
- quote: z33.string().describe("Exact text from source that supports the claim"),
4965
- relevance: z33.number().min(0).max(1)
5964
+ var CitationSchema = z34.object({
5965
+ index: z34.number().describe("Citation number [1], [2], etc."),
5966
+ chunkId: z34.string().describe("Source chunk ID, e.g. doc-123:coverage:2"),
5967
+ documentId: z34.string(),
5968
+ documentType: z34.enum(["policy", "quote"]).optional(),
5969
+ field: z34.string().optional().describe("Specific field path, e.g. coverages[0].deductible"),
5970
+ quote: z34.string().describe("Exact text from source that supports the claim"),
5971
+ relevance: z34.number().min(0).max(1)
4966
5972
  });
4967
- var SubAnswerSchema = z33.object({
4968
- subQuestion: z33.string(),
4969
- answer: z33.string(),
4970
- citations: z33.array(CitationSchema),
4971
- confidence: z33.number().min(0).max(1),
4972
- needsMoreContext: z33.boolean().describe("True if evidence was insufficient to answer fully")
5973
+ var SubAnswerSchema = z34.object({
5974
+ subQuestion: z34.string(),
5975
+ answer: z34.string(),
5976
+ citations: z34.array(CitationSchema),
5977
+ confidence: z34.number().min(0).max(1),
5978
+ needsMoreContext: z34.boolean().describe("True if evidence was insufficient to answer fully")
4973
5979
  });
4974
- var VerifyResultSchema = z33.object({
4975
- approved: z33.boolean().describe("Whether all sub-answers are adequately grounded"),
4976
- issues: z33.array(z33.string()).describe("Specific grounding or consistency issues found"),
4977
- retrySubQuestions: z33.array(z33.string()).optional().describe("Sub-questions that need additional retrieval or re-reasoning")
5980
+ var VerifyResultSchema = z34.object({
5981
+ approved: z34.boolean().describe("Whether all sub-answers are adequately grounded"),
5982
+ issues: z34.array(z34.string()).describe("Specific grounding or consistency issues found"),
5983
+ retrySubQuestions: z34.array(z34.string()).optional().describe("Sub-questions that need additional retrieval or re-reasoning")
4978
5984
  });
4979
- var QueryResultSchema = z33.object({
4980
- answer: z33.string(),
4981
- citations: z33.array(CitationSchema),
5985
+ var QueryResultSchema = z34.object({
5986
+ answer: z34.string(),
5987
+ citations: z34.array(CitationSchema),
4982
5988
  intent: QueryIntentSchema,
4983
- confidence: z33.number().min(0).max(1),
4984
- followUp: z33.string().optional().describe("Suggested follow-up question if applicable")
5989
+ confidence: z34.number().min(0).max(1),
5990
+ followUp: z34.string().optional().describe("Suggested follow-up question if applicable")
4985
5991
  });
4986
5992
 
4987
5993
  // src/query/retriever.ts
@@ -5269,6 +6275,112 @@ async function verify(originalQuestion, subAnswers, allEvidence, config) {
5269
6275
  return { result: object, usage };
5270
6276
  }
5271
6277
 
6278
+ // src/query/quality.ts
6279
+ function sourceIdForEvidence(evidence) {
6280
+ return evidence.chunkId ?? evidence.documentId ?? evidence.turnId;
6281
+ }
6282
+ function citationSourceId(citation) {
6283
+ return citation.chunkId || citation.documentId;
6284
+ }
6285
+ function buildQueryReviewReport(params) {
6286
+ const { subAnswers, evidence, finalResult, verifyRounds } = params;
6287
+ const issues = [];
6288
+ const evidenceBySource = /* @__PURE__ */ new Map();
6289
+ for (const item of evidence) {
6290
+ const sourceId = sourceIdForEvidence(item);
6291
+ if (!sourceId) continue;
6292
+ evidenceBySource.set(sourceId, [...evidenceBySource.get(sourceId) ?? [], item]);
6293
+ }
6294
+ for (const subAnswer of subAnswers) {
6295
+ if (!subAnswer.needsMoreContext && subAnswer.citations.length === 0) {
6296
+ issues.push({
6297
+ code: "subanswer_missing_citations",
6298
+ severity: "blocking",
6299
+ message: `Sub-answer "${subAnswer.subQuestion}" has no citations despite claiming an answer.`,
6300
+ subQuestion: subAnswer.subQuestion
6301
+ });
6302
+ }
6303
+ if (subAnswer.confidence >= 0.85 && subAnswer.citations.length === 0) {
6304
+ issues.push({
6305
+ code: "subanswer_high_confidence_without_citations",
6306
+ severity: "blocking",
6307
+ message: `Sub-answer "${subAnswer.subQuestion}" has high confidence without citations.`,
6308
+ subQuestion: subAnswer.subQuestion
6309
+ });
6310
+ }
6311
+ for (const citation of subAnswer.citations) {
6312
+ const sourceId = citationSourceId(citation);
6313
+ const supportedEvidence = sourceId ? evidenceBySource.get(sourceId) ?? [] : [];
6314
+ if (!sourceId || supportedEvidence.length === 0) {
6315
+ issues.push({
6316
+ code: "citation_missing_from_evidence",
6317
+ severity: "blocking",
6318
+ message: `Citation [${citation.index}] in "${subAnswer.subQuestion}" does not map to retrieved evidence.`,
6319
+ subQuestion: subAnswer.subQuestion,
6320
+ citationIndex: citation.index,
6321
+ sourceId
6322
+ });
6323
+ continue;
6324
+ }
6325
+ const quoteFound = supportedEvidence.some((item) => item.text.includes(citation.quote));
6326
+ if (!quoteFound) {
6327
+ issues.push({
6328
+ code: "citation_quote_not_in_evidence",
6329
+ severity: "warning",
6330
+ message: `Citation [${citation.index}] quote in "${subAnswer.subQuestion}" was not found verbatim in retrieved evidence.`,
6331
+ subQuestion: subAnswer.subQuestion,
6332
+ citationIndex: citation.index,
6333
+ sourceId
6334
+ });
6335
+ }
6336
+ }
6337
+ }
6338
+ if (finalResult) {
6339
+ if (finalResult.answer.trim().length > 0 && finalResult.citations.length === 0 && finalResult.confidence > 0.4) {
6340
+ issues.push({
6341
+ code: "final_answer_missing_citations",
6342
+ severity: "blocking",
6343
+ message: "Final answer has non-trivial confidence but no citations."
6344
+ });
6345
+ }
6346
+ const knownCitationIds = new Set(
6347
+ subAnswers.flatMap((sa) => sa.citations.map((citation) => `${citation.index}|${citation.chunkId}|${citation.documentId}`))
6348
+ );
6349
+ for (const citation of finalResult.citations) {
6350
+ const key = `${citation.index}|${citation.chunkId}|${citation.documentId}`;
6351
+ if (!knownCitationIds.has(key)) {
6352
+ issues.push({
6353
+ code: "final_answer_unknown_citation",
6354
+ severity: "warning",
6355
+ message: `Final answer citation [${citation.index}] was not present in verified sub-answers.`,
6356
+ citationIndex: citation.index,
6357
+ sourceId: citationSourceId(citation)
6358
+ });
6359
+ }
6360
+ }
6361
+ }
6362
+ const rounds = verifyRounds.map((round) => ({
6363
+ round: round.round,
6364
+ kind: "verification",
6365
+ status: round.approved && round.issues.length === 0 ? "passed" : "warning",
6366
+ summary: round.issues[0] ?? (round.approved ? "Verification passed." : "Verification requested retry.")
6367
+ }));
6368
+ const artifacts = [
6369
+ { kind: "evidence", label: "Retrieved Evidence", itemCount: evidence.length },
6370
+ { kind: "sub_answers", label: "Sub Answers", itemCount: subAnswers.length }
6371
+ ];
6372
+ return {
6373
+ issues,
6374
+ rounds,
6375
+ artifacts,
6376
+ verifyRounds,
6377
+ qualityGateStatus: evaluateQualityGate({
6378
+ issues,
6379
+ hasRoundWarnings: verifyRounds.some((round) => !round.approved || round.issues.length > 0)
6380
+ })
6381
+ };
6382
+ }
6383
+
5272
6384
  // src/query/coordinator.ts
5273
6385
  function createQueryAgent(config) {
5274
6386
  const {
@@ -5282,7 +6394,8 @@ function createQueryAgent(config) {
5282
6394
  onTokenUsage,
5283
6395
  onProgress,
5284
6396
  log,
5285
- providerOptions
6397
+ providerOptions,
6398
+ qualityGate = "warn"
5286
6399
  } = config;
5287
6400
  const limit = pLimit(concurrency);
5288
6401
  let totalUsage = { inputTokens: 0, outputTokens: 0 };
@@ -5351,6 +6464,7 @@ function createQueryAgent(config) {
5351
6464
  await pipelineCtx.save("reason", { classification, evidence: allEvidence, subAnswers });
5352
6465
  onProgress?.("Verifying answer grounding...");
5353
6466
  const verifierConfig = { generateObject, providerOptions };
6467
+ const verifyRounds = [];
5354
6468
  for (let round = 0; round < maxVerifyRounds; round++) {
5355
6469
  const { result: verifyResult, usage } = await safeVerify(
5356
6470
  question,
@@ -5359,6 +6473,12 @@ function createQueryAgent(config) {
5359
6473
  verifierConfig
5360
6474
  );
5361
6475
  trackUsage(usage);
6476
+ verifyRounds.push({
6477
+ round: round + 1,
6478
+ approved: verifyResult.approved,
6479
+ issues: verifyResult.issues,
6480
+ retrySubQuestions: verifyResult.retrySubQuestions
6481
+ });
5362
6482
  if (verifyResult.approved) {
5363
6483
  onProgress?.("Verification passed.");
5364
6484
  break;
@@ -5416,6 +6536,24 @@ function createQueryAgent(config) {
5416
6536
  classification,
5417
6537
  context?.platform
5418
6538
  );
6539
+ const reviewReport = buildQueryReviewReport({
6540
+ subAnswers,
6541
+ evidence: allEvidence,
6542
+ finalResult: queryResult,
6543
+ verifyRounds
6544
+ });
6545
+ await pipelineCtx.save("review", {
6546
+ classification,
6547
+ evidence: allEvidence,
6548
+ subAnswers,
6549
+ reviewReport
6550
+ });
6551
+ if (reviewReport.issues.length > 0) {
6552
+ await log?.(`Query deterministic review issues: ${reviewReport.issues.map((issue) => issue.message).join("; ")}`);
6553
+ }
6554
+ if (shouldFailQualityGate(qualityGate, reviewReport.qualityGateStatus)) {
6555
+ throw new Error("Query quality gate failed. See reviewReport for blocking issues.");
6556
+ }
5419
6557
  if (conversationId) {
5420
6558
  try {
5421
6559
  await memoryStore.addTurn({
@@ -5436,7 +6574,7 @@ function createQueryAgent(config) {
5436
6574
  await log?.(`Failed to store conversation turn: ${e}`);
5437
6575
  }
5438
6576
  }
5439
- return { ...queryResult, tokenUsage: totalUsage };
6577
+ return { ...queryResult, tokenUsage: totalUsage, reviewReport };
5440
6578
  }
5441
6579
  async function classify(question, conversationId) {
5442
6580
  let conversationContext;
@@ -5657,7 +6795,12 @@ export {
5657
6795
  AdmittedStatusSchema,
5658
6796
  AnswerParsingResultSchema,
5659
6797
  ApplicationClassifyResultSchema,
6798
+ ApplicationEmailReviewSchema,
5660
6799
  ApplicationFieldSchema,
6800
+ ApplicationQualityArtifactSchema,
6801
+ ApplicationQualityIssueSchema,
6802
+ ApplicationQualityReportSchema,
6803
+ ApplicationQualityRoundSchema,
5661
6804
  ApplicationStateSchema,
5662
6805
  AuditTypeSchema,
5663
6806
  AutoFillMatchSchema,
@@ -5689,6 +6832,7 @@ export {
5689
6832
  CoverageFormSchema,
5690
6833
  CoverageSchema,
5691
6834
  CoverageTriggerSchema,
6835
+ CoverageValueTypeSchema,
5692
6836
  CrimeDeclarationsSchema,
5693
6837
  CyberDeclarationsSchema,
5694
6838
  DEDUCTIBLE_TYPES,