@claritylabs/cl-sdk 0.9.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -489,7 +489,9 @@ var FormReferenceSchema = z3.object({
489
489
  formNumber: z3.string(),
490
490
  editionDate: z3.string().optional(),
491
491
  title: z3.string().optional(),
492
- formType: z3.enum(["coverage", "endorsement", "declarations", "application", "notice", "other"])
492
+ formType: z3.enum(["coverage", "endorsement", "declarations", "application", "notice", "other"]),
493
+ pageStart: z3.number().optional(),
494
+ pageEnd: z3.number().optional()
493
495
  });
494
496
  var TaxFeeItemSchema = z3.object({
495
497
  name: z3.string(),
@@ -526,12 +528,25 @@ var NamedInsuredSchema = z3.object({
526
528
 
527
529
  // src/schemas/coverage.ts
528
530
  import { z as z4 } from "zod";
531
+ var CoverageValueTypeSchema = z4.enum([
532
+ "numeric",
533
+ "included",
534
+ "not_included",
535
+ "as_stated",
536
+ "waiting_period",
537
+ "referential",
538
+ "other"
539
+ ]);
529
540
  var CoverageSchema = z4.object({
530
541
  name: z4.string(),
531
542
  limit: z4.string(),
543
+ limitValueType: CoverageValueTypeSchema.optional(),
532
544
  deductible: z4.string().optional(),
545
+ deductibleValueType: CoverageValueTypeSchema.optional(),
546
+ formNumber: z4.string().optional(),
533
547
  pageNumber: z4.number().optional(),
534
- sectionRef: z4.string().optional()
548
+ sectionRef: z4.string().optional(),
549
+ originalContent: z4.string().optional()
535
550
  });
536
551
  var EnrichedCoverageSchema = z4.object({
537
552
  name: z4.string(),
@@ -540,8 +555,10 @@ var EnrichedCoverageSchema = z4.object({
540
555
  formEditionDate: z4.string().optional(),
541
556
  limit: z4.string(),
542
557
  limitType: LimitTypeSchema.optional(),
558
+ limitValueType: CoverageValueTypeSchema.optional(),
543
559
  deductible: z4.string().optional(),
544
560
  deductibleType: DeductibleTypeSchema.optional(),
561
+ deductibleValueType: CoverageValueTypeSchema.optional(),
545
562
  sir: z4.string().optional(),
546
563
  sublimit: z4.string().optional(),
547
564
  coinsurance: z4.string().optional(),
@@ -552,7 +569,8 @@ var EnrichedCoverageSchema = z4.object({
552
569
  included: z4.boolean(),
553
570
  premium: z4.string().optional(),
554
571
  pageNumber: z4.number().optional(),
555
- sectionRef: z4.string().optional()
572
+ sectionRef: z4.string().optional(),
573
+ originalContent: z4.string().optional()
556
574
  });
557
575
 
558
576
  // src/schemas/endorsement.ts
@@ -1569,6 +1587,7 @@ function assembleDocument(documentId, documentType, memory) {
1569
1587
  const lossHistory = memory.get("loss_history");
1570
1588
  const sections = memory.get("sections");
1571
1589
  const supplementary = memory.get("supplementary");
1590
+ const formInventory = memory.get("form_inventory");
1572
1591
  const classify = memory.get("classify");
1573
1592
  const base = {
1574
1593
  id: documentId,
@@ -1585,6 +1604,7 @@ function assembleDocument(documentId, documentType, memory) {
1585
1604
  exclusions: exclusions?.exclusions,
1586
1605
  conditions: conditions?.conditions,
1587
1606
  sections: sections?.sections,
1607
+ formInventory: formInventory?.forms,
1588
1608
  declarations: declarations ? sanitizeNulls(declarations) : void 0,
1589
1609
  ...sanitizeNulls(lossHistory ?? {})
1590
1610
  };
@@ -1826,6 +1846,11 @@ async function formatDocumentContent(doc, generateText, options) {
1826
1846
  function chunkDocument(doc) {
1827
1847
  const chunks = [];
1828
1848
  const docId = doc.id;
1849
+ function stringMetadata(entries) {
1850
+ return Object.fromEntries(
1851
+ Object.entries(entries).filter(([, value]) => value !== void 0 && value !== null && String(value).length > 0).map(([key, value]) => [key, String(value)])
1852
+ );
1853
+ }
1829
1854
  chunks.push({
1830
1855
  id: `${docId}:carrier_info:0`,
1831
1856
  documentId: docId,
@@ -1837,7 +1862,7 @@ function chunkDocument(doc) {
1837
1862
  doc.carrierAmBestRating ? `AM Best: ${doc.carrierAmBestRating}` : null,
1838
1863
  doc.mga ? `MGA: ${doc.mga}` : null
1839
1864
  ].filter(Boolean).join("\n"),
1840
- metadata: { carrier: doc.carrier, documentType: doc.type }
1865
+ metadata: stringMetadata({ carrier: doc.carrier, documentType: doc.type })
1841
1866
  });
1842
1867
  chunks.push({
1843
1868
  id: `${docId}:named_insured:0`,
@@ -1849,17 +1874,32 @@ function chunkDocument(doc) {
1849
1874
  doc.insuredFein ? `FEIN: ${doc.insuredFein}` : null,
1850
1875
  doc.insuredAddress ? `Address: ${doc.insuredAddress.street1}, ${doc.insuredAddress.city}, ${doc.insuredAddress.state} ${doc.insuredAddress.zip}` : null
1851
1876
  ].filter(Boolean).join("\n"),
1852
- metadata: { insuredName: doc.insuredName, documentType: doc.type }
1877
+ metadata: stringMetadata({ insuredName: doc.insuredName, documentType: doc.type })
1853
1878
  });
1854
1879
  doc.coverages.forEach((cov, i) => {
1855
1880
  chunks.push({
1856
1881
  id: `${docId}:coverage:${i}`,
1857
1882
  documentId: docId,
1858
1883
  type: "coverage",
1859
- text: `Coverage: ${cov.name}
1860
- Limit: ${cov.limit}${cov.deductible ? `
1861
- Deductible: ${cov.deductible}` : ""}`,
1862
- metadata: { coverageName: cov.name, limit: cov.limit, documentType: doc.type }
1884
+ text: [
1885
+ `Coverage: ${cov.name}`,
1886
+ `Limit: ${cov.limit}`,
1887
+ cov.limitValueType ? `Limit Type: ${cov.limitValueType}` : null,
1888
+ cov.deductible ? `Deductible: ${cov.deductible}` : null,
1889
+ cov.deductibleValueType ? `Deductible Type: ${cov.deductibleValueType}` : null,
1890
+ cov.originalContent ? `Source: ${cov.originalContent}` : null
1891
+ ].filter(Boolean).join("\n"),
1892
+ metadata: stringMetadata({
1893
+ coverageName: cov.name,
1894
+ limit: cov.limit,
1895
+ limitValueType: cov.limitValueType,
1896
+ deductible: cov.deductible,
1897
+ deductibleValueType: cov.deductibleValueType,
1898
+ formNumber: cov.formNumber,
1899
+ pageNumber: cov.pageNumber,
1900
+ sectionRef: cov.sectionRef,
1901
+ documentType: doc.type
1902
+ })
1863
1903
  });
1864
1904
  });
1865
1905
  doc.endorsements?.forEach((end, i) => {
@@ -1869,7 +1909,13 @@ Deductible: ${cov.deductible}` : ""}`,
1869
1909
  type: "endorsement",
1870
1910
  text: `Endorsement: ${end.title}
1871
1911
  ${end.content}`.trim(),
1872
- metadata: { endorsementType: end.endorsementType, formNumber: end.formNumber, documentType: doc.type }
1912
+ metadata: stringMetadata({
1913
+ endorsementType: end.endorsementType,
1914
+ formNumber: end.formNumber,
1915
+ pageStart: end.pageStart,
1916
+ pageEnd: end.pageEnd,
1917
+ documentType: doc.type
1918
+ })
1873
1919
  });
1874
1920
  });
1875
1921
  doc.exclusions?.forEach((exc, i) => {
@@ -1879,7 +1925,7 @@ ${end.content}`.trim(),
1879
1925
  type: "exclusion",
1880
1926
  text: `Exclusion: ${exc.name}
1881
1927
  ${exc.content}`.trim(),
1882
- metadata: { documentType: doc.type }
1928
+ metadata: stringMetadata({ formNumber: exc.formNumber, pageNumber: exc.pageNumber, documentType: doc.type })
1883
1929
  });
1884
1930
  });
1885
1931
  doc.sections?.forEach((sec, i) => {
@@ -1889,7 +1935,7 @@ ${exc.content}`.trim(),
1889
1935
  type: "section",
1890
1936
  text: `Section: ${sec.title}
1891
1937
  ${sec.content}`,
1892
- metadata: { sectionType: sec.type, documentType: doc.type }
1938
+ metadata: stringMetadata({ sectionType: sec.type, pageStart: sec.pageStart, pageEnd: sec.pageEnd, documentType: doc.type })
1893
1939
  });
1894
1940
  });
1895
1941
  if (doc.premium) {
@@ -1899,7 +1945,7 @@ ${sec.content}`,
1899
1945
  type: "premium",
1900
1946
  text: `Premium: ${doc.premium}${doc.totalCost ? `
1901
1947
  Total Cost: ${doc.totalCost}` : ""}`,
1902
- metadata: { premium: doc.premium, documentType: doc.type }
1948
+ metadata: stringMetadata({ premium: doc.premium, documentType: doc.type })
1903
1949
  });
1904
1950
  }
1905
1951
  return chunks;
@@ -1951,12 +1997,19 @@ function mergeCoverageLimits(existing, incoming) {
1951
1997
  const merged = mergeShallowPreferPresent(existing, incoming);
1952
1998
  const existingCoverages = Array.isArray(existing.coverages) ? existing.coverages : [];
1953
1999
  const incomingCoverages = Array.isArray(incoming.coverages) ? incoming.coverages : [];
1954
- merged.coverages = mergeUniqueObjects(existingCoverages, incomingCoverages, (coverage) => [
2000
+ const coverageKey = (coverage) => [
1955
2001
  String(coverage.name ?? "").toLowerCase(),
1956
2002
  String(coverage.limit ?? "").toLowerCase(),
1957
2003
  String(coverage.deductible ?? "").toLowerCase(),
1958
2004
  String(coverage.formNumber ?? "").toLowerCase()
1959
- ].join("|"));
2005
+ ].join("|");
2006
+ const byKey = /* @__PURE__ */ new Map();
2007
+ for (const coverage of [...existingCoverages, ...incomingCoverages]) {
2008
+ const key = coverageKey(coverage);
2009
+ const current = byKey.get(key);
2010
+ byKey.set(key, current ? mergeShallowPreferPresent(current, coverage) : coverage);
2011
+ }
2012
+ merged.coverages = [...byKey.values()];
1960
2013
  return merged;
1961
2014
  }
1962
2015
  function mergeDeclarations(existing, incoming) {
@@ -2813,9 +2866,45 @@ Return JSON only:
2813
2866
  }`;
2814
2867
  }
2815
2868
 
2816
- // src/prompts/coordinator/page-map.ts
2869
+ // src/prompts/coordinator/form-inventory.ts
2817
2870
  import { z as z19 } from "zod";
2818
- var PageExtractorSchema = z19.enum([
2871
+ var FormInventoryEntrySchema = FormReferenceSchema.extend({
2872
+ formNumber: FormReferenceSchema.shape.formNumber.describe("Form number or identifier, e.g. PR5070CF"),
2873
+ pageStart: FormReferenceSchema.shape.pageStart.describe("Original document page where the form begins"),
2874
+ pageEnd: FormReferenceSchema.shape.pageEnd.describe("Original document page where the form ends")
2875
+ });
2876
+ var FormInventorySchema = z19.object({
2877
+ forms: z19.array(FormInventoryEntrySchema)
2878
+ });
2879
+ function buildFormInventoryPrompt(templateHints) {
2880
+ return `You are building a form inventory for an insurance document.
2881
+
2882
+ DOCUMENT TYPE HINTS:
2883
+ ${templateHints}
2884
+
2885
+ Extract every distinct declarations page set, policy form, coverage form, endorsement, application form, and notice form that appears in the document.
2886
+
2887
+ For EACH form, extract:
2888
+ - formNumber: REQUIRED when present
2889
+ - editionDate: if shown
2890
+ - title: if shown
2891
+ - formType: one of coverage, endorsement, declarations, application, notice, other
2892
+ - pageStart: original page where the form begins
2893
+ - pageEnd: original page where the form ends
2894
+
2895
+ Critical rules:
2896
+ - Include declarations page sets even if they do not show a standard form number.
2897
+ - Use original document page numbers, not local chunk page numbers.
2898
+ - Do not emit duplicate entries for repeated headers/footers.
2899
+ - Multi-page forms should be represented once with pageStart/pageEnd covering the full span when visible.
2900
+ - If a form number is visible in endorsements, schedules, or form headers, include it even if the full form title is partial.
2901
+
2902
+ Respond with JSON only.`;
2903
+ }
2904
+
2905
+ // src/prompts/coordinator/page-map.ts
2906
+ import { z as z20 } from "zod";
2907
+ var PageExtractorSchema = z20.enum([
2819
2908
  "carrier_info",
2820
2909
  "named_insured",
2821
2910
  "coverage_limits",
@@ -2828,23 +2917,37 @@ var PageExtractorSchema = z19.enum([
2828
2917
  "sections",
2829
2918
  "supplementary"
2830
2919
  ]);
2831
- var PageAssignmentSchema = z19.object({
2832
- localPageNumber: z19.number().int().positive().describe("1-based page number within this supplied PDF chunk"),
2833
- extractorNames: z19.array(PageExtractorSchema).describe("Focused extractors that should inspect this page"),
2834
- confidence: z19.number().min(0).max(1).optional().describe("Confidence in the page assignment"),
2835
- notes: z19.string().optional().describe("Short explanation of what appears on the page")
2920
+ var PageAssignmentSchema = z20.object({
2921
+ localPageNumber: z20.number().int().positive().describe("1-based page number within this supplied PDF chunk"),
2922
+ extractorNames: z20.array(PageExtractorSchema).describe("Focused extractors that should inspect this page"),
2923
+ pageRole: z20.enum([
2924
+ "declarations_schedule",
2925
+ "endorsement_schedule",
2926
+ "policy_form",
2927
+ "endorsement_form",
2928
+ "condition_exclusion_form",
2929
+ "supplementary",
2930
+ "other"
2931
+ ]).optional().describe("Primary role of the page"),
2932
+ hasScheduleValues: z20.boolean().optional().describe("True only when the page contains insured-specific declaration or schedule values, tables, or rows to extract"),
2933
+ confidence: z20.number().min(0).max(1).optional().describe("Confidence in the page assignment"),
2934
+ notes: z20.string().optional().describe("Short explanation of what appears on the page")
2836
2935
  });
2837
- var PageMapChunkSchema = z19.object({
2838
- pages: z19.array(PageAssignmentSchema)
2936
+ var PageMapChunkSchema = z20.object({
2937
+ pages: z20.array(PageAssignmentSchema)
2839
2938
  });
2840
- function buildPageMapPrompt(templateHints, startPage, endPage) {
2939
+ function buildPageMapPrompt(templateHints, startPage, endPage, formInventoryHint) {
2940
+ const inventoryBlock = formInventoryHint ? `
2941
+ FORM INVENTORY (already identified \u2014 use this to constrain your assignments):
2942
+ ${formInventoryHint}
2943
+ ` : "";
2841
2944
  return `You are mapping insurance document pages to focused extractors.
2842
2945
 
2843
2946
  These supplied pages are ORIGINAL DOCUMENT PAGES ${startPage}-${endPage}.
2844
2947
 
2845
2948
  DOCUMENT TYPE HINTS:
2846
2949
  ${templateHints}
2847
-
2950
+ ${inventoryBlock}
2848
2951
  For each page in this supplied PDF chunk, decide which extractor(s) should inspect it.
2849
2952
 
2850
2953
  Available extractors:
@@ -2866,7 +2969,11 @@ Rules:
2866
2969
  - Avoid assigning broad ranges mentally; decide page by page.
2867
2970
  - A page may map to multiple extractors if it legitimately contains multiple relevant sections.
2868
2971
  - Prefer declarations and schedules for numeric limits/deductibles over later generic form wording.
2869
- - If a page is mostly generic form language with no declaration-specific values, do not assign "coverage_limits" unless it clearly contains schedule-specific limits.
2972
+ - Assign "coverage_limits" only when the page itself contains insured-specific declaration or schedule values to capture, such as location/building rows, coverage tables, limits, deductibles, coinsurance percentages, or scheduled amounts tied to this policy.
2973
+ - Do NOT assign "coverage_limits" for generic policy-form or endorsement text that merely explains how limits, deductibles, waiting periods, or coinsurance work, or that says values are "shown in the declarations", "shown in the schedule", "as stated", or "if applicable".
2974
+ - Headings like "Limits of Insurance", "Deductible", "Coinsurance", "Loss Conditions", or "Definitions" inside a policy form usually indicate form language, not declarations or schedules.
2975
+ - Continuation pages near the end of a form should stay mapped to "sections" plus "conditions"/"exclusions" when applicable, even if they mention limits or deductibles.
2976
+ - When a form inventory entry identifies a page range as a specific form type (e.g., endorsement, coverage, application), use that classification to guide your extractor choice. Do not assign "coverage_limits" to pages the inventory identifies as endorsement or condition/exclusion forms unless the page contains actual schedule values.
2870
2977
  - Return every page in the supplied chunk exactly once.
2871
2978
 
2872
2979
  Return JSON:
@@ -2875,6 +2982,8 @@ Return JSON:
2875
2982
  {
2876
2983
  "localPageNumber": 1,
2877
2984
  "extractorNames": ["declarations", "carrier_info", "named_insured", "coverage_limits"],
2985
+ "pageRole": "declarations_schedule",
2986
+ "hasScheduleValues": true,
2878
2987
  "confidence": 0.96,
2879
2988
  "notes": "Declarations page with insured, policy period, and scheduled limits"
2880
2989
  }
@@ -2883,18 +2992,26 @@ Return JSON:
2883
2992
 
2884
2993
  Respond with JSON only.`;
2885
2994
  }
2995
+ function formatFormInventoryForPageMap(forms) {
2996
+ if (forms.length === 0) return "";
2997
+ return forms.filter((f) => f.pageStart != null).map((f) => {
2998
+ const range = f.pageEnd && f.pageEnd !== f.pageStart ? `pages ${f.pageStart}-${f.pageEnd}` : `page ${f.pageStart}`;
2999
+ const title = f.title ? ` "${f.title}"` : "";
3000
+ return `- ${f.formNumber}${title} [${f.formType}] \u2192 ${range}`;
3001
+ }).join("\n");
3002
+ }
2886
3003
 
2887
3004
  // src/prompts/coordinator/review.ts
2888
- import { z as z20 } from "zod";
2889
- var ReviewResultSchema = z20.object({
2890
- complete: z20.boolean(),
2891
- missingFields: z20.array(z20.string()),
2892
- qualityIssues: z20.array(z20.string()).optional(),
2893
- additionalTasks: z20.array(z20.object({
2894
- extractorName: z20.string(),
2895
- startPage: z20.number(),
2896
- endPage: z20.number(),
2897
- description: z20.string()
3005
+ import { z as z21 } from "zod";
3006
+ var ReviewResultSchema = z21.object({
3007
+ complete: z21.boolean(),
3008
+ missingFields: z21.array(z21.string()),
3009
+ qualityIssues: z21.array(z21.string()).optional(),
3010
+ additionalTasks: z21.array(z21.object({
3011
+ extractorName: z21.string(),
3012
+ startPage: z21.number(),
3013
+ endPage: z21.number(),
3014
+ description: z21.string()
2898
3015
  }))
2899
3016
  });
2900
3017
  function buildReviewPrompt(templateExpected, extractedKeys, extractionSummary, pageMapSummary) {
@@ -2941,20 +3058,20 @@ Respond with JSON only.`;
2941
3058
  }
2942
3059
 
2943
3060
  // src/prompts/extractors/carrier-info.ts
2944
- import { z as z21 } from "zod";
2945
- var CarrierInfoSchema = z21.object({
2946
- carrierName: z21.string().describe("Primary insurance company name for display"),
2947
- carrierLegalName: z21.string().optional().describe("Legal entity name of insurer"),
2948
- naicNumber: z21.string().optional().describe("NAIC company code"),
2949
- amBestRating: z21.string().optional().describe("AM Best rating, e.g. 'A+ XV'"),
2950
- admittedStatus: z21.enum(["admitted", "non_admitted", "surplus_lines"]).optional().describe("Admitted status of the carrier"),
2951
- mga: z21.string().optional().describe("Managing General Agent or Program Administrator name"),
2952
- underwriter: z21.string().optional().describe("Named individual underwriter"),
2953
- policyNumber: z21.string().optional().describe("Policy or quote reference number"),
2954
- effectiveDate: z21.string().optional().describe("Policy effective date (MM/DD/YYYY)"),
2955
- expirationDate: z21.string().optional().describe("Policy expiration date (MM/DD/YYYY)"),
2956
- quoteNumber: z21.string().optional().describe("Quote or proposal reference number"),
2957
- proposedEffectiveDate: z21.string().optional().describe("Proposed effective date for quotes (MM/DD/YYYY)")
3061
+ import { z as z22 } from "zod";
3062
+ var CarrierInfoSchema = z22.object({
3063
+ carrierName: z22.string().describe("Primary insurance company name for display"),
3064
+ carrierLegalName: z22.string().optional().describe("Legal entity name of insurer"),
3065
+ naicNumber: z22.string().optional().describe("NAIC company code"),
3066
+ amBestRating: z22.string().optional().describe("AM Best rating, e.g. 'A+ XV'"),
3067
+ admittedStatus: z22.enum(["admitted", "non_admitted", "surplus_lines"]).optional().describe("Admitted status of the carrier"),
3068
+ mga: z22.string().optional().describe("Managing General Agent or Program Administrator name"),
3069
+ underwriter: z22.string().optional().describe("Named individual underwriter"),
3070
+ policyNumber: z22.string().optional().describe("Policy or quote reference number"),
3071
+ effectiveDate: z22.string().optional().describe("Policy effective date (MM/DD/YYYY)"),
3072
+ expirationDate: z22.string().optional().describe("Policy expiration date (MM/DD/YYYY)"),
3073
+ quoteNumber: z22.string().optional().describe("Quote or proposal reference number"),
3074
+ proposedEffectiveDate: z22.string().optional().describe("Proposed effective date for quotes (MM/DD/YYYY)")
2958
3075
  });
2959
3076
  function buildCarrierInfoPrompt() {
2960
3077
  return `You are an expert insurance document analyst. Extract carrier and policy identification information from this document.
@@ -2974,18 +3091,18 @@ Return JSON only.`;
2974
3091
  }
2975
3092
 
2976
3093
  // src/prompts/extractors/named-insured.ts
2977
- import { z as z22 } from "zod";
2978
- var AddressSchema2 = z22.object({
2979
- street1: z22.string(),
2980
- city: z22.string(),
2981
- state: z22.string(),
2982
- zip: z22.string()
3094
+ import { z as z23 } from "zod";
3095
+ var AddressSchema2 = z23.object({
3096
+ street1: z23.string(),
3097
+ city: z23.string(),
3098
+ state: z23.string(),
3099
+ zip: z23.string()
2983
3100
  });
2984
- var NamedInsuredSchema2 = z22.object({
2985
- insuredName: z22.string().describe("Name of primary named insured"),
2986
- insuredDba: z22.string().optional().describe("Doing-business-as name"),
3101
+ var NamedInsuredSchema2 = z23.object({
3102
+ insuredName: z23.string().describe("Name of primary named insured"),
3103
+ insuredDba: z23.string().optional().describe("Doing-business-as name"),
2987
3104
  insuredAddress: AddressSchema2.optional().describe("Primary insured mailing address"),
2988
- insuredEntityType: z22.enum([
3105
+ insuredEntityType: z23.enum([
2989
3106
  "corporation",
2990
3107
  "llc",
2991
3108
  "partnership",
@@ -2998,13 +3115,13 @@ var NamedInsuredSchema2 = z22.object({
2998
3115
  "married_couple",
2999
3116
  "other"
3000
3117
  ]).optional().describe("Legal entity type of the insured"),
3001
- insuredFein: z22.string().optional().describe("Federal Employer Identification Number"),
3002
- insuredSicCode: z22.string().optional().describe("SIC code"),
3003
- insuredNaicsCode: z22.string().optional().describe("NAICS code"),
3004
- additionalNamedInsureds: z22.array(
3005
- z22.object({
3006
- name: z22.string(),
3007
- relationship: z22.string().optional().describe("e.g. subsidiary, affiliate"),
3118
+ insuredFein: z23.string().optional().describe("Federal Employer Identification Number"),
3119
+ insuredSicCode: z23.string().optional().describe("SIC code"),
3120
+ insuredNaicsCode: z23.string().optional().describe("NAICS code"),
3121
+ additionalNamedInsureds: z23.array(
3122
+ z23.object({
3123
+ name: z23.string(),
3124
+ relationship: z23.string().optional().describe("e.g. subsidiary, affiliate"),
3008
3125
  address: AddressSchema2.optional()
3009
3126
  })
3010
3127
  ).optional().describe("Additional named insureds listed on the policy")
@@ -3025,23 +3142,20 @@ Return JSON only.`;
3025
3142
  }
3026
3143
 
3027
3144
  // src/prompts/extractors/coverage-limits.ts
3028
- import { z as z23 } from "zod";
3029
- var CoverageLimitsSchema = z23.object({
3030
- coverages: z23.array(
3031
- z23.object({
3032
- name: z23.string().describe("Coverage name"),
3033
- limit: z23.string().describe("Coverage limit, e.g. '$1,000,000'"),
3034
- deductible: z23.string().optional().describe("Deductible amount"),
3035
- coverageCode: z23.string().optional().describe("Coverage code or class code"),
3036
- formNumber: z23.string().optional().describe("Associated form number, e.g. 'CG 00 01'")
3037
- })
3038
- ).describe("All coverages with their limits"),
3039
- coverageForm: z23.enum(["occurrence", "claims_made", "accident"]).optional().describe("Primary coverage trigger type"),
3040
- retroactiveDate: z23.string().optional().describe("Retroactive date for claims-made policies (MM/DD/YYYY)")
3145
+ import { z as z24 } from "zod";
3146
+ var ExtractorCoverageSchema = CoverageSchema.extend({
3147
+ coverageCode: z24.string().optional().describe("Coverage code or class code")
3148
+ });
3149
+ var CoverageLimitsSchema = z24.object({
3150
+ coverages: z24.array(ExtractorCoverageSchema).describe("All coverages with their limits"),
3151
+ coverageForm: z24.enum(["occurrence", "claims_made", "accident"]).optional().describe("Primary coverage trigger type"),
3152
+ retroactiveDate: z24.string().optional().describe("Retroactive date for claims-made policies (MM/DD/YYYY)")
3041
3153
  });
3042
3154
  function buildCoverageLimitsPrompt() {
3043
3155
  return `You are an expert insurance document analyst. Extract all coverage limits and deductibles from this document.
3044
3156
 
3157
+ Extract only insured-specific declaration, schedule, or endorsement entries that state actual coverage terms for this policy.
3158
+
3045
3159
  Focus on:
3046
3160
  - Every coverage listed on the declarations page or coverage schedule
3047
3161
  - Per-occurrence, aggregate, and sub-limits for each coverage
@@ -3052,20 +3166,34 @@ Focus on:
3052
3166
  - Standard limit fields: per occurrence, general aggregate, products/completed ops aggregate, personal & advertising injury, fire damage, medical expense, combined single limit, BI/PD splits, umbrella each occurrence/aggregate/retention, statutory (WC), employers liability
3053
3167
  - Defense cost treatment: inside limits, outside limits, or supplementary
3054
3168
 
3055
- Extract ALL coverages \u2014 do not omit any coverage line that appears in the document.
3169
+ For EACH coverage, also extract:
3170
+ - pageNumber: the original page number where the coverage row/value appears
3171
+ - sectionRef: the declarations/schedule/endorsement section heading where it appears
3172
+ - originalContent: the verbatim row or short source snippet used for this coverage
3173
+ - limitValueType: classify the limit as numeric, included, not_included, as_stated, waiting_period, referential, or other
3174
+ - deductibleValueType: classify the deductible/value term similarly when deductible is present
3175
+
3176
+ Critical rules:
3177
+ - Do not extract table-of-contents lines, index entries, headers, footers, page labels, or cross-references as coverages.
3178
+ - Do not create a coverage entry from generic policy-form text that only says a limit/deductible is "shown in the declarations", "shown in the Business Income Declarations", "as stated", "if applicable", or similar referential wording.
3179
+ - Do not treat a generic waiting period, deductible explanation, limits clause, coinsurance clause, or definitions text as a standalone coverage unless the page contains an actual policy-specific schedule row or declaration entry.
3180
+ - Values like "Included" or "Not Included" are valid only when they appear as an explicit declarations/schedule/endorsement entry for a named coverage. Do not infer them from narrative form language.
3181
+ - If a waiting period or hour deductible is shown as part of a specific declarations/schedule row, it may be captured in deductible. Otherwise omit it.
3182
+ - Use limitValueType or deductibleValueType to preserve non-numeric terms precisely instead of forcing them into numeric semantics.
3183
+ - Preserve one row per real coverage entry. Do not merge adjacent schedule rows into malformed names.
3056
3184
 
3057
3185
  Return JSON only.`;
3058
3186
  }
3059
3187
 
3060
3188
  // src/prompts/extractors/endorsements.ts
3061
- import { z as z24 } from "zod";
3062
- var EndorsementsSchema = z24.object({
3063
- endorsements: z24.array(
3064
- z24.object({
3065
- formNumber: z24.string().describe("Form number, e.g. 'CG 21 47'"),
3066
- editionDate: z24.string().optional().describe("Edition date, e.g. '12 07'"),
3067
- title: z24.string().describe("Endorsement title"),
3068
- endorsementType: z24.enum([
3189
+ import { z as z25 } from "zod";
3190
+ var EndorsementsSchema = z25.object({
3191
+ endorsements: z25.array(
3192
+ z25.object({
3193
+ formNumber: z25.string().describe("Form number, e.g. 'CG 21 47'"),
3194
+ editionDate: z25.string().optional().describe("Edition date, e.g. '12 07'"),
3195
+ title: z25.string().describe("Endorsement title"),
3196
+ endorsementType: z25.enum([
3069
3197
  "additional_insured",
3070
3198
  "waiver_of_subrogation",
3071
3199
  "primary_noncontributory",
@@ -3085,12 +3213,12 @@ var EndorsementsSchema = z24.object({
3085
3213
  "territorial_extension",
3086
3214
  "other"
3087
3215
  ]).describe("Endorsement type classification"),
3088
- effectiveDate: z24.string().optional().describe("Endorsement effective date"),
3089
- affectedCoverageParts: z24.array(z24.string()).optional().describe("Coverage parts affected by this endorsement"),
3090
- namedParties: z24.array(
3091
- z24.object({
3092
- name: z24.string().describe("Party name"),
3093
- role: z24.enum([
3216
+ effectiveDate: z25.string().optional().describe("Endorsement effective date"),
3217
+ affectedCoverageParts: z25.array(z25.string()).optional().describe("Coverage parts affected by this endorsement"),
3218
+ namedParties: z25.array(
3219
+ z25.object({
3220
+ name: z25.string().describe("Party name"),
3221
+ role: z25.enum([
3094
3222
  "additional_insured",
3095
3223
  "loss_payee",
3096
3224
  "mortgage_holder",
@@ -3099,15 +3227,15 @@ var EndorsementsSchema = z24.object({
3099
3227
  "designated_person",
3100
3228
  "other"
3101
3229
  ]).describe("Party role"),
3102
- relationship: z24.string().optional().describe("Relationship to insured"),
3103
- scope: z24.string().optional().describe("Scope of coverage for this party")
3230
+ relationship: z25.string().optional().describe("Relationship to insured"),
3231
+ scope: z25.string().optional().describe("Scope of coverage for this party")
3104
3232
  })
3105
3233
  ).optional().describe("Named parties (additional insureds, loss payees, etc.)"),
3106
- keyTerms: z24.array(z24.string()).optional().describe("Key terms or notable provisions in the endorsement"),
3107
- premiumImpact: z24.string().optional().describe("Additional premium or credit"),
3108
- content: z24.string().describe("Full verbatim text of the endorsement"),
3109
- pageStart: z24.number().describe("Starting page number of this endorsement"),
3110
- pageEnd: z24.number().optional().describe("Ending page number of this endorsement")
3234
+ keyTerms: z25.array(z25.string()).optional().describe("Key terms or notable provisions in the endorsement"),
3235
+ premiumImpact: z25.string().optional().describe("Additional premium or credit"),
3236
+ content: z25.string().describe("Full verbatim text of the endorsement"),
3237
+ pageStart: z25.number().describe("Starting page number of this endorsement"),
3238
+ pageEnd: z25.number().optional().describe("Ending page number of this endorsement")
3111
3239
  })
3112
3240
  ).describe("All endorsements found in the document")
3113
3241
  });
@@ -3138,20 +3266,20 @@ Return JSON only.`;
3138
3266
  }
3139
3267
 
3140
3268
  // src/prompts/extractors/exclusions.ts
3141
- import { z as z25 } from "zod";
3142
- var ExclusionsSchema = z25.object({
3143
- exclusions: z25.array(
3144
- z25.object({
3145
- name: z25.string().describe("Exclusion title or short description"),
3146
- formNumber: z25.string().optional().describe("Form number if part of a named endorsement"),
3147
- excludedPerils: z25.array(z25.string()).optional().describe("Specific perils excluded"),
3148
- isAbsolute: z25.boolean().optional().describe("Whether the exclusion is absolute (no exceptions)"),
3149
- exceptions: z25.array(z25.string()).optional().describe("Exceptions to the exclusion, if any"),
3150
- buybackAvailable: z25.boolean().optional().describe("Whether coverage can be bought back via endorsement"),
3151
- buybackEndorsement: z25.string().optional().describe("Form number of the buyback endorsement if available"),
3152
- appliesTo: z25.array(z25.string()).optional().describe("Coverage types this exclusion applies to"),
3153
- content: z25.string().describe("Full verbatim exclusion text"),
3154
- pageNumber: z25.number().optional().describe("Page number where exclusion appears")
3269
+ import { z as z26 } from "zod";
3270
+ var ExclusionsSchema = z26.object({
3271
+ exclusions: z26.array(
3272
+ z26.object({
3273
+ name: z26.string().describe("Exclusion title or short description"),
3274
+ formNumber: z26.string().optional().describe("Form number if part of a named endorsement"),
3275
+ excludedPerils: z26.array(z26.string()).optional().describe("Specific perils excluded"),
3276
+ isAbsolute: z26.boolean().optional().describe("Whether the exclusion is absolute (no exceptions)"),
3277
+ exceptions: z26.array(z26.string()).optional().describe("Exceptions to the exclusion, if any"),
3278
+ buybackAvailable: z26.boolean().optional().describe("Whether coverage can be bought back via endorsement"),
3279
+ buybackEndorsement: z26.string().optional().describe("Form number of the buyback endorsement if available"),
3280
+ appliesTo: z26.array(z26.string()).optional().describe("Coverage types this exclusion applies to"),
3281
+ content: z26.string().describe("Full verbatim exclusion text"),
3282
+ pageNumber: z26.number().optional().describe("Page number where exclusion appears")
3155
3283
  })
3156
3284
  ).describe("All exclusions found in the document")
3157
3285
  });
@@ -3176,18 +3304,23 @@ Focus on:
3176
3304
  - Exclusions within insuring agreements or conditions if clearly labeled
3177
3305
  - Full verbatim exclusion text \u2014 do not summarize
3178
3306
 
3307
+ Critical rules:
3308
+ - Ignore table-of-contents entries, running headers/footers, and references that only point to another page or section.
3309
+ - Do not emit a standalone exclusion from a fragment unless the fragment itself contains substantive exclusion wording.
3310
+ - Always include pageNumber when the exclusion appears on a specific page in the supplied document chunk.
3311
+
3179
3312
  Common personal lines exclusion patterns: animal liability, business pursuits, home daycare, watercraft, aircraft.
3180
3313
 
3181
3314
  Return JSON only.`;
3182
3315
  }
3183
3316
 
3184
3317
  // src/prompts/extractors/conditions.ts
3185
- import { z as z26 } from "zod";
3186
- var ConditionsSchema = z26.object({
3187
- conditions: z26.array(
3188
- z26.object({
3189
- name: z26.string().describe("Condition title"),
3190
- conditionType: z26.enum([
3318
+ import { z as z27 } from "zod";
3319
+ var ConditionsSchema = z27.object({
3320
+ conditions: z27.array(
3321
+ z27.object({
3322
+ name: z27.string().describe("Condition title"),
3323
+ conditionType: z27.enum([
3191
3324
  "duties_after_loss",
3192
3325
  "notice_requirements",
3193
3326
  "other_insurance",
@@ -3206,14 +3339,14 @@ var ConditionsSchema = z26.object({
3206
3339
  "separation_of_insureds",
3207
3340
  "other"
3208
3341
  ]).describe("Condition category"),
3209
- content: z26.string().describe("Full verbatim condition text"),
3210
- keyValues: z26.array(
3211
- z26.object({
3212
- key: z26.string().describe("Key name (e.g. 'noticePeriod', 'suitDeadline')"),
3213
- value: z26.string().describe("Value (e.g. '30 days', '2 years')")
3342
+ content: z27.string().describe("Full verbatim condition text"),
3343
+ keyValues: z27.array(
3344
+ z27.object({
3345
+ key: z27.string().describe("Key name (e.g. 'noticePeriod', 'suitDeadline')"),
3346
+ value: z27.string().describe("Value (e.g. '30 days', '2 years')")
3214
3347
  })
3215
3348
  ).optional().describe("Key values extracted from the condition (notice periods, deadlines, etc.)"),
3216
- pageNumber: z26.number().optional().describe("Page number where condition appears")
3349
+ pageNumber: z27.number().optional().describe("Page number where condition appears")
3217
3350
  })
3218
3351
  ).describe("All policy conditions found in the document")
3219
3352
  });
@@ -3225,7 +3358,7 @@ For EACH condition, extract:
3225
3358
  - conditionType: classify as one of: duties_after_loss, notice_requirements, other_insurance, cancellation, nonrenewal, transfer_of_rights, liberalization, arbitration, concealment_fraud, examination_under_oath, legal_action, loss_payment, appraisal, mortgage_holders, policy_territory, separation_of_insureds, other \u2014 REQUIRED
3226
3359
  - content: full verbatim condition text \u2014 REQUIRED
3227
3360
  - keyValues: extract specific values as key-value pairs (e.g. noticePeriod: "30 days", suitDeadline: "2 years")
3228
- - pageNumber: page number where the condition appears
3361
+ - pageNumber: original document page number where the substantive condition text appears
3229
3362
 
3230
3363
  Focus on:
3231
3364
  - Duties after loss / notice of occurrence conditions
@@ -3242,32 +3375,37 @@ Focus on:
3242
3375
  - Mortgage holders clause
3243
3376
  - Any other named conditions
3244
3377
 
3378
+ Critical rules:
3379
+ - Ignore table-of-contents entries, section indexes, running headers/footers, and page references such as "Appraisal ..... 19".
3380
+ - Do not emit a condition unless the page contains substantive condition text, not just a heading or reference.
3381
+ - If a condition continues from a prior page, keep the substantive text together and use the page where the condition text appears in this extracted chunk.
3382
+
3245
3383
  Return JSON only.`;
3246
3384
  }
3247
3385
 
3248
3386
  // src/prompts/extractors/premium-breakdown.ts
3249
- import { z as z27 } from "zod";
3250
- var PremiumBreakdownSchema = z27.object({
3251
- premium: z27.string().optional().describe("Total premium amount, e.g. '$5,000'"),
3252
- totalCost: z27.string().optional().describe("Total cost including taxes and fees, e.g. '$5,250'"),
3253
- premiumBreakdown: z27.array(
3254
- z27.object({
3255
- line: z27.string().describe("Coverage line name"),
3256
- amount: z27.string().describe("Premium amount for this line")
3387
+ import { z as z28 } from "zod";
3388
+ var PremiumBreakdownSchema = z28.object({
3389
+ premium: z28.string().optional().describe("Total premium amount, e.g. '$5,000'"),
3390
+ totalCost: z28.string().optional().describe("Total cost including taxes and fees, e.g. '$5,250'"),
3391
+ premiumBreakdown: z28.array(
3392
+ z28.object({
3393
+ line: z28.string().describe("Coverage line name"),
3394
+ amount: z28.string().describe("Premium amount for this line")
3257
3395
  })
3258
3396
  ).optional().describe("Per-coverage-line premium breakdown"),
3259
- taxesAndFees: z27.array(
3260
- z27.object({
3261
- name: z27.string().describe("Fee or tax name"),
3262
- amount: z27.string().describe("Dollar amount"),
3263
- type: z27.enum(["tax", "fee", "surcharge", "assessment"]).optional().describe("Fee category")
3397
+ taxesAndFees: z28.array(
3398
+ z28.object({
3399
+ name: z28.string().describe("Fee or tax name"),
3400
+ amount: z28.string().describe("Dollar amount"),
3401
+ type: z28.enum(["tax", "fee", "surcharge", "assessment"]).optional().describe("Fee category")
3264
3402
  })
3265
3403
  ).optional().describe("Taxes, fees, surcharges, and assessments"),
3266
- minimumPremium: z27.string().optional().describe("Minimum premium if stated"),
3267
- depositPremium: z27.string().optional().describe("Deposit premium if stated"),
3268
- paymentPlan: z27.string().optional().describe("Payment plan description"),
3269
- auditType: z27.enum(["annual", "semi_annual", "quarterly", "monthly", "final", "self"]).optional().describe("Premium audit type"),
3270
- ratingBasis: z27.string().optional().describe("Rating basis, e.g. payroll, revenue, area, units")
3404
+ minimumPremium: z28.string().optional().describe("Minimum premium if stated"),
3405
+ depositPremium: z28.string().optional().describe("Deposit premium if stated"),
3406
+ paymentPlan: z28.string().optional().describe("Payment plan description"),
3407
+ auditType: z28.enum(["annual", "semi_annual", "quarterly", "monthly", "final", "self"]).optional().describe("Premium audit type"),
3408
+ ratingBasis: z28.string().optional().describe("Rating basis, e.g. payroll, revenue, area, units")
3271
3409
  });
3272
3410
  function buildPremiumBreakdownPrompt() {
3273
3411
  return `You are an expert insurance document analyst. Extract all premium and cost information from this document.
@@ -3287,14 +3425,14 @@ Return JSON only.`;
3287
3425
  }
3288
3426
 
3289
3427
  // src/prompts/extractors/declarations.ts
3290
- import { z as z28 } from "zod";
3291
- var DeclarationsFieldSchema = z28.object({
3292
- field: z28.string().describe("Descriptive field name (e.g. 'policyNumber', 'effectiveDate', 'coverageALimit')"),
3293
- value: z28.string().describe("Extracted value exactly as it appears in the document"),
3294
- section: z28.string().optional().describe("Section or grouping this field belongs to (e.g. 'Coverage Limits', 'Vehicle Schedule')")
3428
+ import { z as z29 } from "zod";
3429
+ var DeclarationsFieldSchema = z29.object({
3430
+ field: z29.string().describe("Descriptive field name (e.g. 'policyNumber', 'effectiveDate', 'coverageALimit')"),
3431
+ value: z29.string().describe("Extracted value exactly as it appears in the document"),
3432
+ section: z29.string().optional().describe("Section or grouping this field belongs to (e.g. 'Coverage Limits', 'Vehicle Schedule')")
3295
3433
  });
3296
- var DeclarationsExtractSchema = z28.object({
3297
- fields: z28.array(DeclarationsFieldSchema).describe("All declarations page fields extracted as key-value pairs. Structure varies by line of business.")
3434
+ var DeclarationsExtractSchema = z29.object({
3435
+ fields: z29.array(DeclarationsFieldSchema).describe("All declarations page fields extracted as key-value pairs. Structure varies by line of business.")
3298
3436
  });
3299
3437
  function buildDeclarationsPrompt() {
3300
3438
  return `You are an expert insurance document analyst. Extract all declarations page data from this document into a flexible key-value structure.
@@ -3334,21 +3472,21 @@ Preserve original values exactly as they appear. Return JSON only.`;
3334
3472
  }
3335
3473
 
3336
3474
  // src/prompts/extractors/loss-history.ts
3337
- import { z as z29 } from "zod";
3338
- var LossHistorySchema = z29.object({
3339
- lossSummary: z29.string().optional().describe("Summary of loss history, e.g. '3 claims in past 5 years totaling $125,000'"),
3340
- individualClaims: z29.array(
3341
- z29.object({
3342
- date: z29.string().optional().describe("Date of loss or claim"),
3343
- type: z29.string().optional().describe("Type of claim, e.g. 'property damage', 'bodily injury'"),
3344
- description: z29.string().optional().describe("Brief description of the claim"),
3345
- amountPaid: z29.string().optional().describe("Amount paid"),
3346
- amountReserved: z29.string().optional().describe("Amount reserved"),
3347
- status: z29.enum(["open", "closed", "reopened"]).optional().describe("Claim status"),
3348
- claimNumber: z29.string().optional().describe("Claim reference number")
3475
+ import { z as z30 } from "zod";
3476
+ var LossHistorySchema = z30.object({
3477
+ lossSummary: z30.string().optional().describe("Summary of loss history, e.g. '3 claims in past 5 years totaling $125,000'"),
3478
+ individualClaims: z30.array(
3479
+ z30.object({
3480
+ date: z30.string().optional().describe("Date of loss or claim"),
3481
+ type: z30.string().optional().describe("Type of claim, e.g. 'property damage', 'bodily injury'"),
3482
+ description: z30.string().optional().describe("Brief description of the claim"),
3483
+ amountPaid: z30.string().optional().describe("Amount paid"),
3484
+ amountReserved: z30.string().optional().describe("Amount reserved"),
3485
+ status: z30.enum(["open", "closed", "reopened"]).optional().describe("Claim status"),
3486
+ claimNumber: z30.string().optional().describe("Claim reference number")
3349
3487
  })
3350
3488
  ).optional().describe("Individual claim records"),
3351
- experienceMod: z29.string().optional().describe("Experience modification factor for workers comp, e.g. '0.85'")
3489
+ experienceMod: z30.string().optional().describe("Experience modification factor for workers comp, e.g. '0.85'")
3352
3490
  });
3353
3491
  function buildLossHistoryPrompt() {
3354
3492
  return `You are an expert insurance document analyst. Extract all loss history and claims information from this document.
@@ -3365,18 +3503,18 @@ Return JSON only.`;
3365
3503
  }
3366
3504
 
3367
3505
  // src/prompts/extractors/sections.ts
3368
- import { z as z30 } from "zod";
3369
- var SubsectionSchema2 = z30.object({
3370
- title: z30.string().describe("Subsection title"),
3371
- sectionNumber: z30.string().optional().describe("Subsection number"),
3372
- pageNumber: z30.number().optional().describe("Page number"),
3373
- content: z30.string().describe("Full verbatim text")
3506
+ import { z as z31 } from "zod";
3507
+ var SubsectionSchema2 = z31.object({
3508
+ title: z31.string().describe("Subsection title"),
3509
+ sectionNumber: z31.string().optional().describe("Subsection number"),
3510
+ pageNumber: z31.number().optional().describe("Page number"),
3511
+ content: z31.string().describe("Full verbatim text")
3374
3512
  });
3375
- var SectionsSchema = z30.object({
3376
- sections: z30.array(
3377
- z30.object({
3378
- title: z30.string().describe("Section title"),
3379
- type: z30.enum([
3513
+ var SectionsSchema = z31.object({
3514
+ sections: z31.array(
3515
+ z31.object({
3516
+ title: z31.string().describe("Section title"),
3517
+ type: z31.enum([
3380
3518
  "declarations",
3381
3519
  "insuring_agreement",
3382
3520
  "policy_form",
@@ -3390,10 +3528,10 @@ var SectionsSchema = z30.object({
3390
3528
  "regulatory",
3391
3529
  "other"
3392
3530
  ]).describe("Section type classification"),
3393
- content: z30.string().describe("Full verbatim text of the section"),
3394
- pageStart: z30.number().describe("Starting page number"),
3395
- pageEnd: z30.number().optional().describe("Ending page number"),
3396
- subsections: z30.array(SubsectionSchema2).optional().describe("Subsections within this section")
3531
+ content: z31.string().describe("Full verbatim text of the section"),
3532
+ pageStart: z31.number().describe("Starting page number"),
3533
+ pageEnd: z31.number().optional().describe("Ending page number"),
3534
+ subsections: z31.array(SubsectionSchema2).optional().describe("Subsections within this section")
3397
3535
  })
3398
3536
  ).describe("All document sections")
3399
3537
  });
@@ -3412,25 +3550,31 @@ For each section, classify its type:
3412
3550
  - "other" \u2014 anything that doesn't fit the above categories
3413
3551
 
3414
3552
  Include accurate page numbers for every section. Include subsections only if the section has clearly defined subsections with their own titles.
3553
+ If a page begins or ends in the middle of a section, treat it as a continuation of the existing section instead of creating a new orphan section from the fragment.
3554
+
3555
+ Critical rules:
3556
+ - Ignore table-of-contents entries, page-number references, repeating headers/footers, and other navigational artifacts.
3557
+ - Do not create a new section from a lone continuation fragment such as a single paragraph tail or list item that clearly belongs to the previous page's section.
3558
+ - When a section spans multiple pages, keep it as one section with pageStart/pageEnd covering the full span represented in this extraction.
3415
3559
 
3416
3560
  Return JSON only.`;
3417
3561
  }
3418
3562
 
3419
3563
  // src/prompts/extractors/supplementary.ts
3420
- import { z as z31 } from "zod";
3421
- var ContactSchema2 = z31.object({
3422
- name: z31.string().optional().describe("Organization or person name"),
3423
- phone: z31.string().optional().describe("Phone number"),
3424
- email: z31.string().optional().describe("Email address"),
3425
- address: z31.string().optional().describe("Mailing address"),
3426
- type: z31.string().optional().describe("Contact type, e.g. 'State Department of Insurance'")
3564
+ import { z as z32 } from "zod";
3565
+ var ContactSchema2 = z32.object({
3566
+ name: z32.string().optional().describe("Organization or person name"),
3567
+ phone: z32.string().optional().describe("Phone number"),
3568
+ email: z32.string().optional().describe("Email address"),
3569
+ address: z32.string().optional().describe("Mailing address"),
3570
+ type: z32.string().optional().describe("Contact type, e.g. 'State Department of Insurance'")
3427
3571
  });
3428
- var SupplementarySchema = z31.object({
3429
- regulatoryContacts: z31.array(ContactSchema2).optional().describe("Regulatory body contacts (state department of insurance, ombudsman)"),
3430
- claimsContacts: z31.array(ContactSchema2).optional().describe("Claims reporting contacts and instructions"),
3431
- thirdPartyAdministrators: z31.array(ContactSchema2).optional().describe("Third-party administrators for claims handling"),
3432
- cancellationNoticeDays: z31.number().optional().describe("Required notice period for cancellation in days"),
3433
- nonrenewalNoticeDays: z31.number().optional().describe("Required notice period for nonrenewal in days")
3572
+ var SupplementarySchema = z32.object({
3573
+ regulatoryContacts: z32.array(ContactSchema2).optional().describe("Regulatory body contacts (state department of insurance, ombudsman)"),
3574
+ claimsContacts: z32.array(ContactSchema2).optional().describe("Claims reporting contacts and instructions"),
3575
+ thirdPartyAdministrators: z32.array(ContactSchema2).optional().describe("Third-party administrators for claims handling"),
3576
+ cancellationNoticeDays: z32.number().optional().describe("Required notice period for cancellation in days"),
3577
+ nonrenewalNoticeDays: z32.number().optional().describe("Required notice period for nonrenewal in days")
3434
3578
  });
3435
3579
  function buildSupplementaryPrompt() {
3436
3580
  return `You are an expert insurance document analyst. Extract supplementary and regulatory information from this document.
@@ -3467,6 +3611,313 @@ function getExtractor(name) {
3467
3611
  return EXTRACTORS[name];
3468
3612
  }
3469
3613
 
3614
+ // src/core/quality.ts
3615
+ function evaluateQualityGate(params) {
3616
+ const { issues, hasRoundWarnings = false } = params;
3617
+ const hasBlocking = issues.some((issue) => issue.severity === "blocking");
3618
+ const hasWarnings = issues.some((issue) => issue.severity === "warning") || hasRoundWarnings;
3619
+ return hasBlocking ? "failed" : hasWarnings ? "warning" : "passed";
3620
+ }
3621
+ function shouldFailQualityGate(mode, status) {
3622
+ return mode === "strict" && status === "failed";
3623
+ }
3624
+
3625
+ // src/extraction/quality.ts
3626
+ function normalizeFormNumber(value) {
3627
+ if (typeof value !== "string") return void 0;
3628
+ const trimmed = value.trim();
3629
+ if (!trimmed) return void 0;
3630
+ return trimmed;
3631
+ }
3632
+ function addFormEntry(inventory, formNumber, source, extra) {
3633
+ if (!formNumber) return;
3634
+ const existing = inventory.get(formNumber);
3635
+ if (existing) {
3636
+ if (!existing.title && extra?.title) existing.title = extra.title;
3637
+ if (!existing.pageStart && extra?.pageStart) existing.pageStart = extra.pageStart;
3638
+ if (!existing.pageEnd && extra?.pageEnd) existing.pageEnd = extra.pageEnd;
3639
+ if (!existing.sources.includes(source)) existing.sources.push(source);
3640
+ return;
3641
+ }
3642
+ inventory.set(formNumber, {
3643
+ formNumber,
3644
+ title: extra?.title,
3645
+ pageStart: extra?.pageStart,
3646
+ pageEnd: extra?.pageEnd,
3647
+ sources: [source]
3648
+ });
3649
+ }
3650
+ function looksReferential(value) {
3651
+ if (typeof value !== "string") return false;
3652
+ const normalized = value.toLowerCase();
3653
+ return normalized.includes("shown in the declarations") || normalized.includes("shown in declarations") || normalized.includes("shown in the schedule") || normalized.includes("as stated") || normalized.includes("if applicable");
3654
+ }
3655
+ function looksTocArtifact(value) {
3656
+ if (typeof value !== "string") return false;
3657
+ return /\.{4,}\d{1,3}$/.test(value.trim()) || /^\d+\.\s+[A-Z][\s\S]*\.{3,}\d{1,3}$/.test(value.trim());
3658
+ }
3659
+ function sourcePrecedence(sectionRef) {
3660
+ if (typeof sectionRef !== "string") return 0;
3661
+ const normalized = sectionRef.toLowerCase();
3662
+ if (normalized.includes("declaration") || normalized.includes("scheduled coverages") || normalized.includes("schedule")) return 4;
3663
+ if (normalized.includes("endorsement")) return 3;
3664
+ if (normalized.includes("additional coverages")) return 2;
3665
+ if (normalized.includes("coverage form") || normalized.includes("policy form")) return 1;
3666
+ return 0;
3667
+ }
3668
+ function buildExtractionReviewReport(params) {
3669
+ const { memory, reviewRounds } = params;
3670
+ const deterministicIssues = [];
3671
+ const inventory = /* @__PURE__ */ new Map();
3672
+ const extractedFormInventory = memory.get("form_inventory")?.forms ?? [];
3673
+ const coverages = memory.get("coverage_limits")?.coverages ?? [];
3674
+ const endorsements = memory.get("endorsements")?.endorsements ?? [];
3675
+ const exclusions = memory.get("exclusions")?.exclusions ?? [];
3676
+ const conditions = memory.get("conditions")?.conditions ?? [];
3677
+ const sections = memory.get("sections")?.sections ?? [];
3678
+ for (const form of extractedFormInventory) {
3679
+ addFormEntry(
3680
+ inventory,
3681
+ normalizeFormNumber(form.formNumber),
3682
+ "form_inventory",
3683
+ {
3684
+ title: form.title,
3685
+ pageStart: form.pageStart,
3686
+ pageEnd: form.pageEnd
3687
+ }
3688
+ );
3689
+ }
3690
+ for (const endorsement of endorsements) {
3691
+ addFormEntry(
3692
+ inventory,
3693
+ normalizeFormNumber(endorsement.formNumber),
3694
+ "endorsements",
3695
+ {
3696
+ title: typeof endorsement.title === "string" ? endorsement.title : void 0,
3697
+ pageStart: typeof endorsement.pageStart === "number" ? endorsement.pageStart : void 0,
3698
+ pageEnd: typeof endorsement.pageEnd === "number" ? endorsement.pageEnd : void 0
3699
+ }
3700
+ );
3701
+ if (typeof endorsement.formNumber !== "string" || !endorsement.formNumber.trim()) {
3702
+ deterministicIssues.push({
3703
+ code: "endorsement_missing_form_number",
3704
+ severity: "blocking",
3705
+ message: "Endorsement is missing formNumber.",
3706
+ extractorName: "endorsements",
3707
+ pageNumber: typeof endorsement.pageStart === "number" ? endorsement.pageStart : void 0,
3708
+ itemName: typeof endorsement.title === "string" ? endorsement.title : void 0
3709
+ });
3710
+ }
3711
+ const endorsementFormNumber = normalizeFormNumber(endorsement.formNumber);
3712
+ if (endorsementFormNumber && !inventory.has(endorsementFormNumber)) {
3713
+ deterministicIssues.push({
3714
+ code: "endorsement_form_missing_from_inventory",
3715
+ severity: "warning",
3716
+ message: `Endorsement "${String(endorsement.title ?? endorsementFormNumber)}" is not present in form inventory.`,
3717
+ extractorName: "endorsements",
3718
+ formNumber: endorsementFormNumber,
3719
+ pageNumber: typeof endorsement.pageStart === "number" ? endorsement.pageStart : void 0,
3720
+ itemName: typeof endorsement.title === "string" ? endorsement.title : void 0
3721
+ });
3722
+ }
3723
+ }
3724
+ for (const coverage of coverages) {
3725
+ const formNumber = normalizeFormNumber(coverage.formNumber);
3726
+ addFormEntry(inventory, formNumber, "coverage_limits", {
3727
+ title: typeof coverage.name === "string" ? coverage.name : void 0,
3728
+ pageStart: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
3729
+ pageEnd: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0
3730
+ });
3731
+ if (typeof coverage.name === "string" && /coverage form$/i.test(coverage.name.trim())) {
3732
+ deterministicIssues.push({
3733
+ code: "generic_form_row_as_coverage",
3734
+ severity: "blocking",
3735
+ message: `Coverage "${coverage.name}" looks like a form header rather than a real coverage row.`,
3736
+ extractorName: "coverage_limits",
3737
+ formNumber,
3738
+ pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
3739
+ itemName: coverage.name
3740
+ });
3741
+ }
3742
+ if (typeof coverage.pageNumber !== "number") {
3743
+ deterministicIssues.push({
3744
+ code: "coverage_missing_page_number",
3745
+ severity: "warning",
3746
+ message: `Coverage "${String(coverage.name ?? "unknown")}" is missing pageNumber provenance.`,
3747
+ extractorName: "coverage_limits",
3748
+ formNumber,
3749
+ itemName: typeof coverage.name === "string" ? coverage.name : void 0
3750
+ });
3751
+ }
3752
+ if (typeof coverage.sectionRef !== "string" || !coverage.sectionRef.trim()) {
3753
+ deterministicIssues.push({
3754
+ code: "coverage_missing_section_ref",
3755
+ severity: "warning",
3756
+ message: `Coverage "${String(coverage.name ?? "unknown")}" is missing sectionRef provenance.`,
3757
+ extractorName: "coverage_limits",
3758
+ formNumber,
3759
+ pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
3760
+ itemName: typeof coverage.name === "string" ? coverage.name : void 0
3761
+ });
3762
+ }
3763
+ if (typeof coverage.originalContent !== "string" || !coverage.originalContent.trim()) {
3764
+ deterministicIssues.push({
3765
+ code: "coverage_missing_original_content",
3766
+ severity: "warning",
3767
+ message: `Coverage "${String(coverage.name ?? "unknown")}" is missing originalContent source text.`,
3768
+ extractorName: "coverage_limits",
3769
+ formNumber,
3770
+ pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
3771
+ itemName: typeof coverage.name === "string" ? coverage.name : void 0
3772
+ });
3773
+ }
3774
+ if (looksReferential(coverage.limit) || looksReferential(coverage.deductible)) {
3775
+ deterministicIssues.push({
3776
+ code: "coverage_referential_value",
3777
+ severity: "warning",
3778
+ message: `Coverage "${String(coverage.name ?? "unknown")}" contains referential language instead of a concrete scheduled term.`,
3779
+ extractorName: "coverage_limits",
3780
+ formNumber,
3781
+ pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
3782
+ itemName: typeof coverage.name === "string" ? coverage.name : void 0
3783
+ });
3784
+ }
3785
+ if (formNumber && !inventory.has(formNumber)) {
3786
+ deterministicIssues.push({
3787
+ code: "coverage_form_missing_from_inventory",
3788
+ severity: "warning",
3789
+ message: `Coverage "${String(coverage.name ?? "unknown")}" references form "${formNumber}" that is missing from form inventory.`,
3790
+ extractorName: "coverage_limits",
3791
+ formNumber,
3792
+ pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
3793
+ itemName: typeof coverage.name === "string" ? coverage.name : void 0
3794
+ });
3795
+ }
3796
+ }
3797
+ const coverageGroups = /* @__PURE__ */ new Map();
3798
+ for (const coverage of coverages) {
3799
+ const key = [
3800
+ String(coverage.name ?? "").toLowerCase(),
3801
+ String(coverage.formNumber ?? "").toLowerCase()
3802
+ ].join("|");
3803
+ coverageGroups.set(key, [...coverageGroups.get(key) ?? [], coverage]);
3804
+ }
3805
+ for (const [key, groupedCoverages] of coverageGroups.entries()) {
3806
+ if (groupedCoverages.length < 2) continue;
3807
+ const sorted = [...groupedCoverages].sort((a, b) => sourcePrecedence(b.sectionRef) - sourcePrecedence(a.sectionRef));
3808
+ const highest = sorted[0];
3809
+ for (const lower of sorted.slice(1)) {
3810
+ const highestLimit = String(highest.limit ?? "").trim();
3811
+ const lowerLimit = String(lower.limit ?? "").trim();
3812
+ const highestDeductible = String(highest.deductible ?? "").trim();
3813
+ const lowerDeductible = String(lower.deductible ?? "").trim();
3814
+ if (highestLimit && lowerLimit && highestLimit !== lowerLimit || highestDeductible && lowerDeductible && highestDeductible !== lowerDeductible) {
3815
+ deterministicIssues.push({
3816
+ code: "coverage_precedence_conflict",
3817
+ severity: "warning",
3818
+ message: `Coverage "${String(highest.name ?? key)}" has conflicting extracted terms across sources with different precedence.`,
3819
+ extractorName: "coverage_limits",
3820
+ formNumber: normalizeFormNumber(highest.formNumber) ?? normalizeFormNumber(lower.formNumber),
3821
+ pageNumber: typeof lower.pageNumber === "number" ? lower.pageNumber : void 0,
3822
+ itemName: typeof highest.name === "string" ? highest.name : void 0
3823
+ });
3824
+ }
3825
+ }
3826
+ }
3827
+ for (const exclusion of exclusions) {
3828
+ addFormEntry(inventory, normalizeFormNumber(exclusion.formNumber), "exclusions", {
3829
+ title: typeof exclusion.name === "string" ? exclusion.name : void 0,
3830
+ pageStart: typeof exclusion.pageNumber === "number" ? exclusion.pageNumber : void 0,
3831
+ pageEnd: typeof exclusion.pageNumber === "number" ? exclusion.pageNumber : void 0
3832
+ });
3833
+ if (typeof exclusion.pageNumber !== "number") {
3834
+ deterministicIssues.push({
3835
+ code: "exclusion_missing_page_number",
3836
+ severity: "warning",
3837
+ message: `Exclusion "${String(exclusion.name ?? "unknown")}" is missing pageNumber provenance.`,
3838
+ extractorName: "exclusions",
3839
+ formNumber: normalizeFormNumber(exclusion.formNumber),
3840
+ itemName: typeof exclusion.name === "string" ? exclusion.name : void 0
3841
+ });
3842
+ }
3843
+ if (looksTocArtifact(exclusion.content)) {
3844
+ deterministicIssues.push({
3845
+ code: "exclusion_toc_artifact",
3846
+ severity: "blocking",
3847
+ message: `Exclusion "${String(exclusion.name ?? "unknown")}" appears to be a table-of-contents artifact.`,
3848
+ extractorName: "exclusions",
3849
+ pageNumber: typeof exclusion.pageNumber === "number" ? exclusion.pageNumber : void 0,
3850
+ itemName: typeof exclusion.name === "string" ? exclusion.name : void 0
3851
+ });
3852
+ }
3853
+ }
3854
+ for (const condition of conditions) {
3855
+ if (typeof condition.pageNumber !== "number") {
3856
+ deterministicIssues.push({
3857
+ code: "condition_missing_page_number",
3858
+ severity: "warning",
3859
+ message: `Condition "${String(condition.name ?? "unknown")}" is missing pageNumber provenance.`,
3860
+ extractorName: "conditions",
3861
+ itemName: typeof condition.name === "string" ? condition.name : void 0
3862
+ });
3863
+ }
3864
+ if (looksTocArtifact(condition.content)) {
3865
+ deterministicIssues.push({
3866
+ code: "condition_toc_artifact",
3867
+ severity: "blocking",
3868
+ message: `Condition "${String(condition.name ?? "unknown")}" appears to be a table-of-contents artifact.`,
3869
+ extractorName: "conditions",
3870
+ pageNumber: typeof condition.pageNumber === "number" ? condition.pageNumber : void 0,
3871
+ itemName: typeof condition.name === "string" ? condition.name : void 0
3872
+ });
3873
+ }
3874
+ }
3875
+ for (const section of sections) {
3876
+ if (typeof section.content === "string" && section.content.trim().length < 120 && typeof section.pageStart === "number" && (!("pageEnd" in section) || section.pageEnd === section.pageStart || section.pageEnd === void 0)) {
3877
+ deterministicIssues.push({
3878
+ code: "section_short_fragment",
3879
+ severity: "warning",
3880
+ message: `Section "${String(section.title ?? "unknown")}" may be an orphan continuation fragment.`,
3881
+ extractorName: "sections",
3882
+ pageNumber: typeof section.pageStart === "number" ? section.pageStart : void 0,
3883
+ itemName: typeof section.title === "string" ? section.title : void 0
3884
+ });
3885
+ }
3886
+ }
3887
+ const formInventory = [...inventory.values()].sort((a, b) => a.formNumber.localeCompare(b.formNumber));
3888
+ const rounds = reviewRounds.map((round) => ({
3889
+ round: round.round,
3890
+ kind: "llm_review",
3891
+ status: round.complete && round.qualityIssues.length === 0 ? "passed" : "warning",
3892
+ summary: round.qualityIssues[0] ?? (round.complete ? "Review passed." : "Review requested follow-up extraction.")
3893
+ }));
3894
+ const artifacts = [
3895
+ { kind: "form_inventory", label: "Form Inventory", itemCount: formInventory.length },
3896
+ { kind: "page_map", label: "Page Map", itemCount: params.pageAssignments.length }
3897
+ ];
3898
+ const qualityGateStatus = evaluateQualityGate({
3899
+ issues: deterministicIssues,
3900
+ hasRoundWarnings: reviewRounds.some((round) => round.qualityIssues.length > 0 || !round.complete)
3901
+ });
3902
+ return {
3903
+ issues: deterministicIssues,
3904
+ rounds,
3905
+ artifacts,
3906
+ reviewRoundRecords: reviewRounds,
3907
+ formInventory,
3908
+ qualityGateStatus
3909
+ };
3910
+ }
3911
+ function toReviewRoundRecord(round, review) {
3912
+ return {
3913
+ round,
3914
+ complete: review.complete,
3915
+ missingFields: review.missingFields,
3916
+ qualityIssues: review.qualityIssues ?? [],
3917
+ additionalTasks: review.additionalTasks
3918
+ };
3919
+ }
3920
+
3470
3921
  // src/extraction/coordinator.ts
3471
3922
  function createExtractor(config) {
3472
3923
  const {
@@ -3479,6 +3930,7 @@ function createExtractor(config) {
3479
3930
  onProgress,
3480
3931
  log,
3481
3932
  providerOptions,
3933
+ qualityGate = "warn",
3482
3934
  onCheckpointSave
3483
3935
  } = config;
3484
3936
  const limit = pLimit(concurrency);
@@ -3535,6 +3987,50 @@ function createExtractor(config) {
3535
3987
  if (extractorPages.size === 0) return "No page assignments available.";
3536
3988
  return [...extractorPages.entries()].map(([extractorName, pages]) => `${extractorName}: pages ${pages.join(", ")}`).join("\n");
3537
3989
  }
3990
+ function normalizePageAssignments(pageAssignments, formInventory) {
3991
+ const pageFormTypes = /* @__PURE__ */ new Map();
3992
+ if (formInventory) {
3993
+ for (const form of formInventory.forms) {
3994
+ if (form.pageStart != null) {
3995
+ const end = form.pageEnd ?? form.pageStart;
3996
+ for (let p = form.pageStart; p <= end; p++) {
3997
+ const types = pageFormTypes.get(p) ?? /* @__PURE__ */ new Set();
3998
+ types.add(form.formType);
3999
+ pageFormTypes.set(p, types);
4000
+ }
4001
+ }
4002
+ }
4003
+ }
4004
+ return pageAssignments.map((assignment) => {
4005
+ let extractorNames = [...new Set(
4006
+ (assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"]).filter(Boolean)
4007
+ )];
4008
+ const hasDeclarations = extractorNames.includes("declarations");
4009
+ const hasConditions = extractorNames.includes("conditions");
4010
+ const hasExclusions = extractorNames.includes("exclusions");
4011
+ const hasEndorsements = extractorNames.includes("endorsements");
4012
+ const looksLikeScheduleValues = assignment.hasScheduleValues === true;
4013
+ const roleBlocksCoverageLimits = assignment.pageRole === "policy_form" || assignment.pageRole === "condition_exclusion_form" || assignment.pageRole === "endorsement_form";
4014
+ const inventoryTypes = pageFormTypes.get(assignment.localPageNumber);
4015
+ const inventoryBlocksCoverageLimits = inventoryTypes != null && !looksLikeScheduleValues && !hasDeclarations && (inventoryTypes.has("endorsement") || inventoryTypes.has("notice") || inventoryTypes.has("application"));
4016
+ if (extractorNames.includes("coverage_limits")) {
4017
+ const shouldDropCoverageLimits = inventoryBlocksCoverageLimits || !looksLikeScheduleValues && roleBlocksCoverageLimits || !hasDeclarations && !looksLikeScheduleValues && (hasConditions || hasExclusions) || !hasDeclarations && !looksLikeScheduleValues && hasEndorsements;
4018
+ if (shouldDropCoverageLimits) {
4019
+ extractorNames = extractorNames.filter((name) => name !== "coverage_limits");
4020
+ }
4021
+ }
4022
+ if (inventoryTypes?.has("endorsement") && !extractorNames.includes("endorsements")) {
4023
+ extractorNames = [...extractorNames, "endorsements"];
4024
+ }
4025
+ if (extractorNames.length === 0) {
4026
+ extractorNames = ["sections"];
4027
+ }
4028
+ return {
4029
+ ...assignment,
4030
+ extractorNames
4031
+ };
4032
+ });
4033
+ }
3538
4034
  function buildTemplateHints(primaryType, documentType, pageCount, template) {
3539
4035
  return [
3540
4036
  `Document type: ${primaryType} ${documentType}`,
@@ -3653,6 +4149,38 @@ function createExtractor(config) {
3653
4149
  const template = getTemplate(primaryType);
3654
4150
  const pageCount = resumed?.pageCount ?? await getPdfPageCount(pdfBase64);
3655
4151
  const templateHints = buildTemplateHints(primaryType, documentType, pageCount, template);
4152
+ let formInventory;
4153
+ if (resumed?.formInventory && pipelineCtx.isPhaseComplete("form_inventory")) {
4154
+ formInventory = resumed.formInventory;
4155
+ memory.set("form_inventory", formInventory);
4156
+ onProgress?.("Resuming from checkpoint (form inventory complete)...");
4157
+ } else {
4158
+ onProgress?.(`Building form inventory for ${primaryType} ${documentType}...`);
4159
+ const formInventoryResponse = await safeGenerateObject(
4160
+ generateObject,
4161
+ {
4162
+ prompt: buildFormInventoryPrompt(templateHints),
4163
+ schema: FormInventorySchema,
4164
+ maxTokens: 2048,
4165
+ providerOptions: { ...providerOptions, pdfBase64 }
4166
+ },
4167
+ {
4168
+ fallback: { forms: [] },
4169
+ log,
4170
+ onError: (err, attempt) => log?.(`Form inventory attempt ${attempt + 1} failed: ${err instanceof Error ? err.message : String(err)}`)
4171
+ }
4172
+ );
4173
+ trackUsage(formInventoryResponse.usage);
4174
+ formInventory = formInventoryResponse.object;
4175
+ memory.set("form_inventory", formInventory);
4176
+ await pipelineCtx.save("form_inventory", {
4177
+ id,
4178
+ pageCount,
4179
+ classifyResult,
4180
+ formInventory,
4181
+ memory: Object.fromEntries(memory)
4182
+ });
4183
+ }
3656
4184
  let pageAssignments;
3657
4185
  if (resumed?.pageAssignments && pipelineCtx.isPhaseComplete("page_map")) {
3658
4186
  pageAssignments = resumed.pageAssignments;
@@ -3661,13 +4189,14 @@ function createExtractor(config) {
3661
4189
  onProgress?.(`Mapping document pages for ${primaryType} ${documentType}...`);
3662
4190
  const chunkSize = 8;
3663
4191
  const collectedAssignments = [];
4192
+ const formInventoryHint = formInventory?.forms.length ? formatFormInventoryForPageMap(formInventory.forms) : void 0;
3664
4193
  for (let startPage = 1; startPage <= pageCount; startPage += chunkSize) {
3665
4194
  const endPage = Math.min(pageCount, startPage + chunkSize - 1);
3666
4195
  const pagesPdf = await extractPageRange(pdfBase64, startPage, endPage);
3667
4196
  const mapResponse = await safeGenerateObject(
3668
4197
  generateObject,
3669
4198
  {
3670
- prompt: buildPageMapPrompt(templateHints, startPage, endPage),
4199
+ prompt: buildPageMapPrompt(templateHints, startPage, endPage, formInventoryHint),
3671
4200
  schema: PageMapChunkSchema,
3672
4201
  maxTokens: 2048,
3673
4202
  providerOptions: { ...providerOptions, pdfBase64: pagesPdf }
@@ -3699,10 +4228,12 @@ function createExtractor(config) {
3699
4228
  confidence: 0,
3700
4229
  notes: "Full-document fallback page assignment"
3701
4230
  }));
4231
+ pageAssignments = normalizePageAssignments(pageAssignments, formInventory);
3702
4232
  await pipelineCtx.save("page_map", {
3703
4233
  id,
3704
4234
  pageCount,
3705
4235
  classifyResult,
4236
+ formInventory,
3706
4237
  pageAssignments,
3707
4238
  memory: Object.fromEntries(memory)
3708
4239
  });
@@ -3718,6 +4249,7 @@ function createExtractor(config) {
3718
4249
  id,
3719
4250
  pageCount,
3720
4251
  classifyResult,
4252
+ formInventory,
3721
4253
  pageAssignments,
3722
4254
  plan,
3723
4255
  memory: Object.fromEntries(memory)
@@ -3766,12 +4298,16 @@ function createExtractor(config) {
3766
4298
  id,
3767
4299
  pageCount,
3768
4300
  classifyResult,
4301
+ formInventory,
3769
4302
  pageAssignments,
3770
4303
  plan,
3771
4304
  memory: Object.fromEntries(memory)
3772
4305
  });
3773
4306
  }
4307
+ let reviewRounds = resumed?.reviewReport?.reviewRoundRecords ?? [];
4308
+ let reviewReport = resumed?.reviewReport;
3774
4309
  if (!pipelineCtx.isPhaseComplete("review")) {
4310
+ reviewRounds = [];
3775
4311
  for (let round = 0; round < maxReviewRounds; round++) {
3776
4312
  const extractedKeys = [...memory.keys()].filter((k) => k !== "classify");
3777
4313
  const extractionSummary = summarizeExtraction(memory);
@@ -3791,6 +4327,7 @@ function createExtractor(config) {
3791
4327
  }
3792
4328
  );
3793
4329
  trackUsage(reviewResponse.usage);
4330
+ reviewRounds.push(toReviewRoundRecord(round + 1, reviewResponse.object));
3794
4331
  if (reviewResponse.object.qualityIssues?.length) {
3795
4332
  await log?.(`Review round ${round + 1} quality issues: ${reviewResponse.object.qualityIssues.join("; ")}`);
3796
4333
  }
@@ -3832,23 +4369,45 @@ function createExtractor(config) {
3832
4369
  }
3833
4370
  }
3834
4371
  }
4372
+ reviewReport = buildExtractionReviewReport({
4373
+ memory,
4374
+ pageAssignments,
4375
+ reviewRounds
4376
+ });
4377
+ if (reviewReport.issues.length > 0) {
4378
+ await log?.(
4379
+ `Deterministic review issues: ${reviewReport.issues.map((issue) => issue.message).join("; ")}`
4380
+ );
4381
+ }
4382
+ if (shouldFailQualityGate(qualityGate, reviewReport.qualityGateStatus)) {
4383
+ throw new Error("Extraction quality gate failed. See reviewReport for blocking issues.");
4384
+ }
3835
4385
  await pipelineCtx.save("review", {
3836
4386
  id,
3837
4387
  pageCount,
3838
4388
  classifyResult,
4389
+ formInventory,
3839
4390
  pageAssignments,
3840
4391
  plan,
4392
+ reviewReport,
3841
4393
  memory: Object.fromEntries(memory)
3842
4394
  });
3843
4395
  }
4396
+ reviewReport ?? (reviewReport = buildExtractionReviewReport({
4397
+ memory,
4398
+ pageAssignments,
4399
+ reviewRounds
4400
+ }));
3844
4401
  onProgress?.("Assembling document...");
3845
4402
  const document = assembleDocument(id, documentType, memory);
3846
4403
  await pipelineCtx.save("assemble", {
3847
4404
  id,
3848
4405
  pageCount,
3849
4406
  classifyResult,
4407
+ formInventory,
3850
4408
  pageAssignments,
3851
4409
  plan,
4410
+ reviewReport,
3852
4411
  memory: Object.fromEntries(memory),
3853
4412
  document
3854
4413
  });
@@ -3874,7 +4433,8 @@ function createExtractor(config) {
3874
4433
  callsWithUsage,
3875
4434
  callsMissingUsage
3876
4435
  },
3877
- checkpoint: finalCheckpoint
4436
+ checkpoint: finalCheckpoint,
4437
+ reviewReport
3878
4438
  };
3879
4439
  }
3880
4440
  return { extract };
@@ -4094,8 +4654,8 @@ Respond with JSON only:
4094
4654
  }`;
4095
4655
 
4096
4656
  // src/schemas/application.ts
4097
- import { z as z32 } from "zod";
4098
- var FieldTypeSchema = z32.enum([
4657
+ import { z as z33 } from "zod";
4658
+ var FieldTypeSchema = z33.enum([
4099
4659
  "text",
4100
4660
  "numeric",
4101
4661
  "currency",
@@ -4104,100 +4664,131 @@ var FieldTypeSchema = z32.enum([
4104
4664
  "table",
4105
4665
  "declaration"
4106
4666
  ]);
4107
- var ApplicationFieldSchema = z32.object({
4108
- id: z32.string(),
4109
- label: z32.string(),
4110
- section: z32.string(),
4667
+ var ApplicationFieldSchema = z33.object({
4668
+ id: z33.string(),
4669
+ label: z33.string(),
4670
+ section: z33.string(),
4111
4671
  fieldType: FieldTypeSchema,
4112
- required: z32.boolean(),
4113
- options: z32.array(z32.string()).optional(),
4114
- columns: z32.array(z32.string()).optional(),
4115
- requiresExplanationIfYes: z32.boolean().optional(),
4116
- condition: z32.object({
4117
- dependsOn: z32.string(),
4118
- whenValue: z32.string()
4672
+ required: z33.boolean(),
4673
+ options: z33.array(z33.string()).optional(),
4674
+ columns: z33.array(z33.string()).optional(),
4675
+ requiresExplanationIfYes: z33.boolean().optional(),
4676
+ condition: z33.object({
4677
+ dependsOn: z33.string(),
4678
+ whenValue: z33.string()
4119
4679
  }).optional(),
4120
- value: z32.string().optional(),
4121
- source: z32.string().optional().describe("Where the value came from: auto-fill, user, lookup"),
4122
- confidence: z32.enum(["confirmed", "high", "medium", "low"]).optional()
4680
+ value: z33.string().optional(),
4681
+ source: z33.string().optional().describe("Where the value came from: auto-fill, user, lookup"),
4682
+ confidence: z33.enum(["confirmed", "high", "medium", "low"]).optional()
4123
4683
  });
4124
- var ApplicationClassifyResultSchema = z32.object({
4125
- isApplication: z32.boolean(),
4126
- confidence: z32.number().min(0).max(1),
4127
- applicationType: z32.string().nullable()
4684
+ var ApplicationClassifyResultSchema = z33.object({
4685
+ isApplication: z33.boolean(),
4686
+ confidence: z33.number().min(0).max(1),
4687
+ applicationType: z33.string().nullable()
4688
+ });
4689
+ var FieldExtractionResultSchema = z33.object({
4690
+ fields: z33.array(ApplicationFieldSchema)
4691
+ });
4692
+ var AutoFillMatchSchema = z33.object({
4693
+ fieldId: z33.string(),
4694
+ value: z33.string(),
4695
+ confidence: z33.enum(["confirmed"]),
4696
+ contextKey: z33.string()
4697
+ });
4698
+ var AutoFillResultSchema = z33.object({
4699
+ matches: z33.array(AutoFillMatchSchema)
4128
4700
  });
4129
- var FieldExtractionResultSchema = z32.object({
4130
- fields: z32.array(ApplicationFieldSchema)
4701
+ var QuestionBatchResultSchema = z33.object({
4702
+ batches: z33.array(z33.array(z33.string()).describe("Array of field IDs in this batch"))
4131
4703
  });
4132
- var AutoFillMatchSchema = z32.object({
4133
- fieldId: z32.string(),
4134
- value: z32.string(),
4135
- confidence: z32.enum(["confirmed"]),
4136
- contextKey: z32.string()
4704
+ var LookupRequestSchema = z33.object({
4705
+ type: z33.string().describe("Type of lookup: 'records', 'website', 'policy'"),
4706
+ description: z33.string(),
4707
+ url: z33.string().optional(),
4708
+ targetFieldIds: z33.array(z33.string())
4137
4709
  });
4138
- var AutoFillResultSchema = z32.object({
4139
- matches: z32.array(AutoFillMatchSchema)
4710
+ var ReplyIntentSchema = z33.object({
4711
+ primaryIntent: z33.enum(["answers_only", "question", "lookup_request", "mixed"]),
4712
+ hasAnswers: z33.boolean(),
4713
+ questionText: z33.string().optional(),
4714
+ questionFieldIds: z33.array(z33.string()).optional(),
4715
+ lookupRequests: z33.array(LookupRequestSchema).optional()
4140
4716
  });
4141
- var QuestionBatchResultSchema = z32.object({
4142
- batches: z32.array(z32.array(z32.string()).describe("Array of field IDs in this batch"))
4717
+ var ParsedAnswerSchema = z33.object({
4718
+ fieldId: z33.string(),
4719
+ value: z33.string(),
4720
+ explanation: z33.string().optional()
4143
4721
  });
4144
- var LookupRequestSchema = z32.object({
4145
- type: z32.string().describe("Type of lookup: 'records', 'website', 'policy'"),
4146
- description: z32.string(),
4147
- url: z32.string().optional(),
4148
- targetFieldIds: z32.array(z32.string())
4722
+ var AnswerParsingResultSchema = z33.object({
4723
+ answers: z33.array(ParsedAnswerSchema),
4724
+ unanswered: z33.array(z33.string()).describe("Field IDs that were not answered")
4149
4725
  });
4150
- var ReplyIntentSchema = z32.object({
4151
- primaryIntent: z32.enum(["answers_only", "question", "lookup_request", "mixed"]),
4152
- hasAnswers: z32.boolean(),
4153
- questionText: z32.string().optional(),
4154
- questionFieldIds: z32.array(z32.string()).optional(),
4155
- lookupRequests: z32.array(LookupRequestSchema).optional()
4726
+ var LookupFillSchema = z33.object({
4727
+ fieldId: z33.string(),
4728
+ value: z33.string(),
4729
+ source: z33.string().describe("Specific citable reference, e.g. 'GL Policy #POL-12345 (Hartford)'")
4156
4730
  });
4157
- var ParsedAnswerSchema = z32.object({
4158
- fieldId: z32.string(),
4159
- value: z32.string(),
4160
- explanation: z32.string().optional()
4731
+ var LookupFillResultSchema = z33.object({
4732
+ fills: z33.array(LookupFillSchema),
4733
+ unfillable: z33.array(z33.string()),
4734
+ explanation: z33.string().optional()
4161
4735
  });
4162
- var AnswerParsingResultSchema = z32.object({
4163
- answers: z32.array(ParsedAnswerSchema),
4164
- unanswered: z32.array(z32.string()).describe("Field IDs that were not answered")
4736
+ var FlatPdfPlacementSchema = z33.object({
4737
+ fieldId: z33.string(),
4738
+ page: z33.number(),
4739
+ x: z33.number().describe("Percentage from left edge (0-100)"),
4740
+ y: z33.number().describe("Percentage from top edge (0-100)"),
4741
+ text: z33.string(),
4742
+ fontSize: z33.number().optional(),
4743
+ isCheckmark: z33.boolean().optional()
4165
4744
  });
4166
- var LookupFillSchema = z32.object({
4167
- fieldId: z32.string(),
4168
- value: z32.string(),
4169
- source: z32.string().describe("Specific citable reference, e.g. 'GL Policy #POL-12345 (Hartford)'")
4745
+ var AcroFormMappingSchema = z33.object({
4746
+ fieldId: z33.string(),
4747
+ acroFormName: z33.string(),
4748
+ value: z33.string()
4170
4749
  });
4171
- var LookupFillResultSchema = z32.object({
4172
- fills: z32.array(LookupFillSchema),
4173
- unfillable: z32.array(z32.string()),
4174
- explanation: z32.string().optional()
4750
+ var QualityGateStatusSchema = z33.enum(["passed", "warning", "failed"]);
4751
+ var QualitySeveritySchema = z33.enum(["info", "warning", "blocking"]);
4752
+ var ApplicationQualityIssueSchema = z33.object({
4753
+ code: z33.string(),
4754
+ severity: QualitySeveritySchema,
4755
+ message: z33.string(),
4756
+ fieldId: z33.string().optional()
4175
4757
  });
4176
- var FlatPdfPlacementSchema = z32.object({
4177
- fieldId: z32.string(),
4178
- page: z32.number(),
4179
- x: z32.number().describe("Percentage from left edge (0-100)"),
4180
- y: z32.number().describe("Percentage from top edge (0-100)"),
4181
- text: z32.string(),
4182
- fontSize: z32.number().optional(),
4183
- isCheckmark: z32.boolean().optional()
4758
+ var ApplicationQualityRoundSchema = z33.object({
4759
+ round: z33.number(),
4760
+ kind: z33.string(),
4761
+ status: QualityGateStatusSchema,
4762
+ summary: z33.string().optional()
4184
4763
  });
4185
- var AcroFormMappingSchema = z32.object({
4186
- fieldId: z32.string(),
4187
- acroFormName: z32.string(),
4188
- value: z32.string()
4764
+ var ApplicationQualityArtifactSchema = z33.object({
4765
+ kind: z33.string(),
4766
+ label: z33.string().optional(),
4767
+ itemCount: z33.number().optional()
4189
4768
  });
4190
- var ApplicationStateSchema = z32.object({
4191
- id: z32.string(),
4192
- pdfBase64: z32.string().optional().describe("Original PDF, omitted after extraction"),
4193
- title: z32.string().optional(),
4194
- applicationType: z32.string().nullable().optional(),
4195
- fields: z32.array(ApplicationFieldSchema),
4196
- batches: z32.array(z32.array(z32.string())).optional(),
4197
- currentBatchIndex: z32.number().default(0),
4198
- status: z32.enum(["classifying", "extracting", "auto_filling", "batching", "collecting", "confirming", "mapping", "complete"]),
4199
- createdAt: z32.number(),
4200
- updatedAt: z32.number()
4769
+ var ApplicationEmailReviewSchema = z33.object({
4770
+ issues: z33.array(ApplicationQualityIssueSchema),
4771
+ qualityGateStatus: QualityGateStatusSchema
4772
+ });
4773
+ var ApplicationQualityReportSchema = z33.object({
4774
+ issues: z33.array(ApplicationQualityIssueSchema),
4775
+ rounds: z33.array(ApplicationQualityRoundSchema).optional(),
4776
+ artifacts: z33.array(ApplicationQualityArtifactSchema).optional(),
4777
+ emailReview: ApplicationEmailReviewSchema.optional(),
4778
+ qualityGateStatus: QualityGateStatusSchema
4779
+ });
4780
+ var ApplicationStateSchema = z33.object({
4781
+ id: z33.string(),
4782
+ pdfBase64: z33.string().optional().describe("Original PDF, omitted after extraction"),
4783
+ title: z33.string().optional(),
4784
+ applicationType: z33.string().nullable().optional(),
4785
+ fields: z33.array(ApplicationFieldSchema),
4786
+ batches: z33.array(z33.array(z33.string())).optional(),
4787
+ currentBatchIndex: z33.number().default(0),
4788
+ qualityReport: ApplicationQualityReportSchema.optional(),
4789
+ status: z33.enum(["classifying", "extracting", "auto_filling", "batching", "collecting", "confirming", "mapping", "complete"]),
4790
+ createdAt: z33.number(),
4791
+ updatedAt: z33.number()
4201
4792
  });
4202
4793
 
4203
4794
  // src/application/agents/classifier.ts
@@ -4705,6 +5296,87 @@ async function generateBatchEmail(batchFields, batchIndex, totalBatches, opts, g
4705
5296
  return { text, usage };
4706
5297
  }
4707
5298
 
5299
+ // src/application/quality.ts
5300
+ function isVagueSource(source) {
5301
+ if (!source) return true;
5302
+ const normalized = source.trim().toLowerCase();
5303
+ return normalized === "unknown" || normalized.includes("existing records") || normalized.includes("available data") || normalized === "context" || normalized === "user provided";
5304
+ }
5305
+ function buildApplicationQualityReport(state) {
5306
+ const issues = [];
5307
+ const seenIds = /* @__PURE__ */ new Set();
5308
+ for (const field of state.fields) {
5309
+ if (seenIds.has(field.id)) {
5310
+ issues.push({
5311
+ code: "duplicate_field_id",
5312
+ severity: "blocking",
5313
+ message: `Field "${field.label}" has a duplicate id "${field.id}".`,
5314
+ fieldId: field.id
5315
+ });
5316
+ }
5317
+ seenIds.add(field.id);
5318
+ if (field.required && !field.value) {
5319
+ issues.push({
5320
+ code: "required_field_unfilled",
5321
+ severity: "warning",
5322
+ message: `Required field "${field.label}" is still unfilled.`,
5323
+ fieldId: field.id
5324
+ });
5325
+ }
5326
+ if (field.value && !field.source) {
5327
+ issues.push({
5328
+ code: "filled_field_missing_source",
5329
+ severity: "blocking",
5330
+ message: `Filled field "${field.label}" is missing source provenance.`,
5331
+ fieldId: field.id
5332
+ });
5333
+ }
5334
+ if (field.value && isVagueSource(field.source)) {
5335
+ issues.push({
5336
+ code: "filled_field_vague_source",
5337
+ severity: "warning",
5338
+ message: `Filled field "${field.label}" has a vague or non-citable source.`,
5339
+ fieldId: field.id
5340
+ });
5341
+ }
5342
+ if (field.value && (!field.confidence || field.confidence === "low")) {
5343
+ issues.push({
5344
+ code: "filled_field_low_confidence",
5345
+ severity: "warning",
5346
+ message: `Filled field "${field.label}" has low or missing confidence.`,
5347
+ fieldId: field.id
5348
+ });
5349
+ }
5350
+ }
5351
+ return {
5352
+ issues,
5353
+ rounds: [],
5354
+ artifacts: [
5355
+ { kind: "application_fields", label: "Application Fields", itemCount: state.fields.length }
5356
+ ],
5357
+ qualityGateStatus: evaluateQualityGate({ issues })
5358
+ };
5359
+ }
5360
+ function reviewBatchEmail(text, batchFields) {
5361
+ const issues = [];
5362
+ const normalized = text.toLowerCase();
5363
+ for (const field of batchFields) {
5364
+ const label = field.label.trim().toLowerCase();
5365
+ if (label.length >= 6 && !normalized.includes(label)) {
5366
+ issues.push({
5367
+ code: "email_missing_field_prompt",
5368
+ severity: "warning",
5369
+ message: `Generated email does not clearly mention field "${field.label}".`,
5370
+ fieldId: field.id
5371
+ });
5372
+ }
5373
+ }
5374
+ return {
5375
+ issues,
5376
+ qualityGateStatus: evaluateQualityGate({ issues })
5377
+ };
5378
+ }
5379
+
4708
5380
  // src/application/coordinator.ts
4709
5381
  function createApplicationPipeline(config) {
4710
5382
  const {
@@ -4719,7 +5391,8 @@ function createApplicationPipeline(config) {
4719
5391
  onTokenUsage,
4720
5392
  onProgress,
4721
5393
  log,
4722
- providerOptions
5394
+ providerOptions,
5395
+ qualityGate = "warn"
4723
5396
  } = config;
4724
5397
  const limit = pLimit(concurrency);
4725
5398
  let totalUsage = { inputTokens: 0, outputTokens: 0 };
@@ -4741,6 +5414,7 @@ function createApplicationPipeline(config) {
4741
5414
  title: void 0,
4742
5415
  applicationType: null,
4743
5416
  fields: [],
5417
+ qualityReport: void 0,
4744
5418
  batches: void 0,
4745
5419
  currentBatchIndex: 0,
4746
5420
  status: "classifying",
@@ -4765,8 +5439,9 @@ function createApplicationPipeline(config) {
4765
5439
  if (!classifyResult.isApplication) {
4766
5440
  state.status = "complete";
4767
5441
  state.updatedAt = Date.now();
5442
+ state.qualityReport = buildApplicationQualityReport(state);
4768
5443
  await applicationStore?.save(state);
4769
- return { state, tokenUsage: totalUsage };
5444
+ return { state, tokenUsage: totalUsage, reviewReport: state.qualityReport };
4770
5445
  }
4771
5446
  state.applicationType = classifyResult.applicationType;
4772
5447
  state.status = "extracting";
@@ -4790,8 +5465,9 @@ function createApplicationPipeline(config) {
4790
5465
  await log?.("No fields extracted, completing pipeline with empty result");
4791
5466
  state.status = "complete";
4792
5467
  state.updatedAt = Date.now();
5468
+ state.qualityReport = buildApplicationQualityReport(state);
4793
5469
  await applicationStore?.save(state);
4794
- return { state, tokenUsage: totalUsage };
5470
+ return { state, tokenUsage: totalUsage, reviewReport: state.qualityReport };
4795
5471
  }
4796
5472
  state.fields = fields;
4797
5473
  state.title = classifyResult.applicationType ?? void 0;
@@ -4891,11 +5567,15 @@ function createApplicationPipeline(config) {
4891
5567
  } else {
4892
5568
  state.status = "confirming";
4893
5569
  }
5570
+ state.qualityReport = buildApplicationQualityReport(state);
4894
5571
  state.updatedAt = Date.now();
4895
5572
  await applicationStore?.save(state);
5573
+ if (shouldFailQualityGate(qualityGate, state.qualityReport.qualityGateStatus)) {
5574
+ throw new Error("Application quality gate failed. See state.qualityReport for blocking issues.");
5575
+ }
4896
5576
  const filledCount = state.fields.filter((f) => f.value).length;
4897
5577
  onProgress?.(`Application processed: ${filledCount}/${state.fields.length} fields filled, ${state.batches?.length ?? 0} batches to collect.`);
4898
- return { state, tokenUsage: totalUsage };
5578
+ return { state, tokenUsage: totalUsage, reviewReport: state.qualityReport };
4899
5579
  }
4900
5580
  async function processReply(input) {
4901
5581
  totalUsage = { inputTokens: 0, outputTokens: 0 };
@@ -5042,6 +5722,11 @@ Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with
5042
5722
  providerOptions
5043
5723
  );
5044
5724
  trackUsage(emailUsage);
5725
+ const emailReview = reviewBatchEmail(emailText, nextBatchFields);
5726
+ state.qualityReport = {
5727
+ ...buildApplicationQualityReport(state),
5728
+ emailReview
5729
+ };
5045
5730
  if (!responseText) {
5046
5731
  responseText = emailText;
5047
5732
  } else {
@@ -5057,13 +5742,18 @@ ${emailText}`;
5057
5742
  }
5058
5743
  }
5059
5744
  state.updatedAt = Date.now();
5745
+ state.qualityReport = state.qualityReport ?? buildApplicationQualityReport(state);
5060
5746
  await applicationStore?.save(state);
5747
+ if (shouldFailQualityGate(qualityGate, state.qualityReport.qualityGateStatus)) {
5748
+ throw new Error("Application quality gate failed. See state.qualityReport for blocking issues.");
5749
+ }
5061
5750
  return {
5062
5751
  state,
5063
5752
  intent: intent.primaryIntent,
5064
5753
  fieldsFilled,
5065
5754
  responseText,
5066
- tokenUsage: totalUsage
5755
+ tokenUsage: totalUsage,
5756
+ reviewReport: state.qualityReport
5067
5757
  };
5068
5758
  }
5069
5759
  async function generateCurrentBatchEmail(applicationId, opts) {
@@ -5089,6 +5779,12 @@ ${emailText}`;
5089
5779
  providerOptions
5090
5780
  );
5091
5781
  trackUsage(usage);
5782
+ const emailReview = reviewBatchEmail(text, batchFields);
5783
+ state.qualityReport = {
5784
+ ...buildApplicationQualityReport(state),
5785
+ emailReview
5786
+ };
5787
+ await applicationStore?.save(state);
5092
5788
  return { text, tokenUsage: totalUsage };
5093
5789
  }
5094
5790
  async function getConfirmationSummary(applicationId) {
@@ -5225,73 +5921,73 @@ Respond with the final answer, deduplicated citations array, overall confidence
5225
5921
  }
5226
5922
 
5227
5923
  // src/schemas/query.ts
5228
- import { z as z33 } from "zod";
5229
- var QueryIntentSchema = z33.enum([
5924
+ import { z as z34 } from "zod";
5925
+ var QueryIntentSchema = z34.enum([
5230
5926
  "policy_question",
5231
5927
  "coverage_comparison",
5232
5928
  "document_search",
5233
5929
  "claims_inquiry",
5234
5930
  "general_knowledge"
5235
5931
  ]);
5236
- var SubQuestionSchema = z33.object({
5237
- question: z33.string().describe("Atomic sub-question to retrieve and answer independently"),
5932
+ var SubQuestionSchema = z34.object({
5933
+ question: z34.string().describe("Atomic sub-question to retrieve and answer independently"),
5238
5934
  intent: QueryIntentSchema,
5239
- chunkTypes: z33.array(z33.string()).optional().describe("Chunk types to filter retrieval (e.g. coverage, endorsement, declaration)"),
5240
- documentFilters: z33.object({
5241
- type: z33.enum(["policy", "quote"]).optional(),
5242
- carrier: z33.string().optional(),
5243
- insuredName: z33.string().optional(),
5244
- policyNumber: z33.string().optional(),
5245
- quoteNumber: z33.string().optional()
5935
+ chunkTypes: z34.array(z34.string()).optional().describe("Chunk types to filter retrieval (e.g. coverage, endorsement, declaration)"),
5936
+ documentFilters: z34.object({
5937
+ type: z34.enum(["policy", "quote"]).optional(),
5938
+ carrier: z34.string().optional(),
5939
+ insuredName: z34.string().optional(),
5940
+ policyNumber: z34.string().optional(),
5941
+ quoteNumber: z34.string().optional()
5246
5942
  }).optional().describe("Structured filters to narrow document lookup")
5247
5943
  });
5248
- var QueryClassifyResultSchema = z33.object({
5944
+ var QueryClassifyResultSchema = z34.object({
5249
5945
  intent: QueryIntentSchema,
5250
- subQuestions: z33.array(SubQuestionSchema).min(1).describe("Decomposed atomic sub-questions"),
5251
- requiresDocumentLookup: z33.boolean().describe("Whether structured document lookup is needed"),
5252
- requiresChunkSearch: z33.boolean().describe("Whether semantic chunk search is needed"),
5253
- requiresConversationHistory: z33.boolean().describe("Whether conversation history is relevant")
5946
+ subQuestions: z34.array(SubQuestionSchema).min(1).describe("Decomposed atomic sub-questions"),
5947
+ requiresDocumentLookup: z34.boolean().describe("Whether structured document lookup is needed"),
5948
+ requiresChunkSearch: z34.boolean().describe("Whether semantic chunk search is needed"),
5949
+ requiresConversationHistory: z34.boolean().describe("Whether conversation history is relevant")
5254
5950
  });
5255
- var EvidenceItemSchema = z33.object({
5256
- source: z33.enum(["chunk", "document", "conversation"]),
5257
- chunkId: z33.string().optional(),
5258
- documentId: z33.string().optional(),
5259
- turnId: z33.string().optional(),
5260
- text: z33.string().describe("Text excerpt from the source"),
5261
- relevance: z33.number().min(0).max(1),
5262
- metadata: z33.array(z33.object({ key: z33.string(), value: z33.string() })).optional()
5951
+ var EvidenceItemSchema = z34.object({
5952
+ source: z34.enum(["chunk", "document", "conversation"]),
5953
+ chunkId: z34.string().optional(),
5954
+ documentId: z34.string().optional(),
5955
+ turnId: z34.string().optional(),
5956
+ text: z34.string().describe("Text excerpt from the source"),
5957
+ relevance: z34.number().min(0).max(1),
5958
+ metadata: z34.array(z34.object({ key: z34.string(), value: z34.string() })).optional()
5263
5959
  });
5264
- var RetrievalResultSchema = z33.object({
5265
- subQuestion: z33.string(),
5266
- evidence: z33.array(EvidenceItemSchema)
5960
+ var RetrievalResultSchema = z34.object({
5961
+ subQuestion: z34.string(),
5962
+ evidence: z34.array(EvidenceItemSchema)
5267
5963
  });
5268
- var CitationSchema = z33.object({
5269
- index: z33.number().describe("Citation number [1], [2], etc."),
5270
- chunkId: z33.string().describe("Source chunk ID, e.g. doc-123:coverage:2"),
5271
- documentId: z33.string(),
5272
- documentType: z33.enum(["policy", "quote"]).optional(),
5273
- field: z33.string().optional().describe("Specific field path, e.g. coverages[0].deductible"),
5274
- quote: z33.string().describe("Exact text from source that supports the claim"),
5275
- relevance: z33.number().min(0).max(1)
5964
+ var CitationSchema = z34.object({
5965
+ index: z34.number().describe("Citation number [1], [2], etc."),
5966
+ chunkId: z34.string().describe("Source chunk ID, e.g. doc-123:coverage:2"),
5967
+ documentId: z34.string(),
5968
+ documentType: z34.enum(["policy", "quote"]).optional(),
5969
+ field: z34.string().optional().describe("Specific field path, e.g. coverages[0].deductible"),
5970
+ quote: z34.string().describe("Exact text from source that supports the claim"),
5971
+ relevance: z34.number().min(0).max(1)
5276
5972
  });
5277
- var SubAnswerSchema = z33.object({
5278
- subQuestion: z33.string(),
5279
- answer: z33.string(),
5280
- citations: z33.array(CitationSchema),
5281
- confidence: z33.number().min(0).max(1),
5282
- needsMoreContext: z33.boolean().describe("True if evidence was insufficient to answer fully")
5973
+ var SubAnswerSchema = z34.object({
5974
+ subQuestion: z34.string(),
5975
+ answer: z34.string(),
5976
+ citations: z34.array(CitationSchema),
5977
+ confidence: z34.number().min(0).max(1),
5978
+ needsMoreContext: z34.boolean().describe("True if evidence was insufficient to answer fully")
5283
5979
  });
5284
- var VerifyResultSchema = z33.object({
5285
- approved: z33.boolean().describe("Whether all sub-answers are adequately grounded"),
5286
- issues: z33.array(z33.string()).describe("Specific grounding or consistency issues found"),
5287
- retrySubQuestions: z33.array(z33.string()).optional().describe("Sub-questions that need additional retrieval or re-reasoning")
5980
+ var VerifyResultSchema = z34.object({
5981
+ approved: z34.boolean().describe("Whether all sub-answers are adequately grounded"),
5982
+ issues: z34.array(z34.string()).describe("Specific grounding or consistency issues found"),
5983
+ retrySubQuestions: z34.array(z34.string()).optional().describe("Sub-questions that need additional retrieval or re-reasoning")
5288
5984
  });
5289
- var QueryResultSchema = z33.object({
5290
- answer: z33.string(),
5291
- citations: z33.array(CitationSchema),
5985
+ var QueryResultSchema = z34.object({
5986
+ answer: z34.string(),
5987
+ citations: z34.array(CitationSchema),
5292
5988
  intent: QueryIntentSchema,
5293
- confidence: z33.number().min(0).max(1),
5294
- followUp: z33.string().optional().describe("Suggested follow-up question if applicable")
5989
+ confidence: z34.number().min(0).max(1),
5990
+ followUp: z34.string().optional().describe("Suggested follow-up question if applicable")
5295
5991
  });
5296
5992
 
5297
5993
  // src/query/retriever.ts
@@ -5579,6 +6275,112 @@ async function verify(originalQuestion, subAnswers, allEvidence, config) {
5579
6275
  return { result: object, usage };
5580
6276
  }
5581
6277
 
6278
+ // src/query/quality.ts
6279
+ function sourceIdForEvidence(evidence) {
6280
+ return evidence.chunkId ?? evidence.documentId ?? evidence.turnId;
6281
+ }
6282
+ function citationSourceId(citation) {
6283
+ return citation.chunkId || citation.documentId;
6284
+ }
6285
+ function buildQueryReviewReport(params) {
6286
+ const { subAnswers, evidence, finalResult, verifyRounds } = params;
6287
+ const issues = [];
6288
+ const evidenceBySource = /* @__PURE__ */ new Map();
6289
+ for (const item of evidence) {
6290
+ const sourceId = sourceIdForEvidence(item);
6291
+ if (!sourceId) continue;
6292
+ evidenceBySource.set(sourceId, [...evidenceBySource.get(sourceId) ?? [], item]);
6293
+ }
6294
+ for (const subAnswer of subAnswers) {
6295
+ if (!subAnswer.needsMoreContext && subAnswer.citations.length === 0) {
6296
+ issues.push({
6297
+ code: "subanswer_missing_citations",
6298
+ severity: "blocking",
6299
+ message: `Sub-answer "${subAnswer.subQuestion}" has no citations despite claiming an answer.`,
6300
+ subQuestion: subAnswer.subQuestion
6301
+ });
6302
+ }
6303
+ if (subAnswer.confidence >= 0.85 && subAnswer.citations.length === 0) {
6304
+ issues.push({
6305
+ code: "subanswer_high_confidence_without_citations",
6306
+ severity: "blocking",
6307
+ message: `Sub-answer "${subAnswer.subQuestion}" has high confidence without citations.`,
6308
+ subQuestion: subAnswer.subQuestion
6309
+ });
6310
+ }
6311
+ for (const citation of subAnswer.citations) {
6312
+ const sourceId = citationSourceId(citation);
6313
+ const supportedEvidence = sourceId ? evidenceBySource.get(sourceId) ?? [] : [];
6314
+ if (!sourceId || supportedEvidence.length === 0) {
6315
+ issues.push({
6316
+ code: "citation_missing_from_evidence",
6317
+ severity: "blocking",
6318
+ message: `Citation [${citation.index}] in "${subAnswer.subQuestion}" does not map to retrieved evidence.`,
6319
+ subQuestion: subAnswer.subQuestion,
6320
+ citationIndex: citation.index,
6321
+ sourceId
6322
+ });
6323
+ continue;
6324
+ }
6325
+ const quoteFound = supportedEvidence.some((item) => item.text.includes(citation.quote));
6326
+ if (!quoteFound) {
6327
+ issues.push({
6328
+ code: "citation_quote_not_in_evidence",
6329
+ severity: "warning",
6330
+ message: `Citation [${citation.index}] quote in "${subAnswer.subQuestion}" was not found verbatim in retrieved evidence.`,
6331
+ subQuestion: subAnswer.subQuestion,
6332
+ citationIndex: citation.index,
6333
+ sourceId
6334
+ });
6335
+ }
6336
+ }
6337
+ }
6338
+ if (finalResult) {
6339
+ if (finalResult.answer.trim().length > 0 && finalResult.citations.length === 0 && finalResult.confidence > 0.4) {
6340
+ issues.push({
6341
+ code: "final_answer_missing_citations",
6342
+ severity: "blocking",
6343
+ message: "Final answer has non-trivial confidence but no citations."
6344
+ });
6345
+ }
6346
+ const knownCitationIds = new Set(
6347
+ subAnswers.flatMap((sa) => sa.citations.map((citation) => `${citation.index}|${citation.chunkId}|${citation.documentId}`))
6348
+ );
6349
+ for (const citation of finalResult.citations) {
6350
+ const key = `${citation.index}|${citation.chunkId}|${citation.documentId}`;
6351
+ if (!knownCitationIds.has(key)) {
6352
+ issues.push({
6353
+ code: "final_answer_unknown_citation",
6354
+ severity: "warning",
6355
+ message: `Final answer citation [${citation.index}] was not present in verified sub-answers.`,
6356
+ citationIndex: citation.index,
6357
+ sourceId: citationSourceId(citation)
6358
+ });
6359
+ }
6360
+ }
6361
+ }
6362
+ const rounds = verifyRounds.map((round) => ({
6363
+ round: round.round,
6364
+ kind: "verification",
6365
+ status: round.approved && round.issues.length === 0 ? "passed" : "warning",
6366
+ summary: round.issues[0] ?? (round.approved ? "Verification passed." : "Verification requested retry.")
6367
+ }));
6368
+ const artifacts = [
6369
+ { kind: "evidence", label: "Retrieved Evidence", itemCount: evidence.length },
6370
+ { kind: "sub_answers", label: "Sub Answers", itemCount: subAnswers.length }
6371
+ ];
6372
+ return {
6373
+ issues,
6374
+ rounds,
6375
+ artifacts,
6376
+ verifyRounds,
6377
+ qualityGateStatus: evaluateQualityGate({
6378
+ issues,
6379
+ hasRoundWarnings: verifyRounds.some((round) => !round.approved || round.issues.length > 0)
6380
+ })
6381
+ };
6382
+ }
6383
+
5582
6384
  // src/query/coordinator.ts
5583
6385
  function createQueryAgent(config) {
5584
6386
  const {
@@ -5592,7 +6394,8 @@ function createQueryAgent(config) {
5592
6394
  onTokenUsage,
5593
6395
  onProgress,
5594
6396
  log,
5595
- providerOptions
6397
+ providerOptions,
6398
+ qualityGate = "warn"
5596
6399
  } = config;
5597
6400
  const limit = pLimit(concurrency);
5598
6401
  let totalUsage = { inputTokens: 0, outputTokens: 0 };
@@ -5661,6 +6464,7 @@ function createQueryAgent(config) {
5661
6464
  await pipelineCtx.save("reason", { classification, evidence: allEvidence, subAnswers });
5662
6465
  onProgress?.("Verifying answer grounding...");
5663
6466
  const verifierConfig = { generateObject, providerOptions };
6467
+ const verifyRounds = [];
5664
6468
  for (let round = 0; round < maxVerifyRounds; round++) {
5665
6469
  const { result: verifyResult, usage } = await safeVerify(
5666
6470
  question,
@@ -5669,6 +6473,12 @@ function createQueryAgent(config) {
5669
6473
  verifierConfig
5670
6474
  );
5671
6475
  trackUsage(usage);
6476
+ verifyRounds.push({
6477
+ round: round + 1,
6478
+ approved: verifyResult.approved,
6479
+ issues: verifyResult.issues,
6480
+ retrySubQuestions: verifyResult.retrySubQuestions
6481
+ });
5672
6482
  if (verifyResult.approved) {
5673
6483
  onProgress?.("Verification passed.");
5674
6484
  break;
@@ -5726,6 +6536,24 @@ function createQueryAgent(config) {
5726
6536
  classification,
5727
6537
  context?.platform
5728
6538
  );
6539
+ const reviewReport = buildQueryReviewReport({
6540
+ subAnswers,
6541
+ evidence: allEvidence,
6542
+ finalResult: queryResult,
6543
+ verifyRounds
6544
+ });
6545
+ await pipelineCtx.save("review", {
6546
+ classification,
6547
+ evidence: allEvidence,
6548
+ subAnswers,
6549
+ reviewReport
6550
+ });
6551
+ if (reviewReport.issues.length > 0) {
6552
+ await log?.(`Query deterministic review issues: ${reviewReport.issues.map((issue) => issue.message).join("; ")}`);
6553
+ }
6554
+ if (shouldFailQualityGate(qualityGate, reviewReport.qualityGateStatus)) {
6555
+ throw new Error("Query quality gate failed. See reviewReport for blocking issues.");
6556
+ }
5729
6557
  if (conversationId) {
5730
6558
  try {
5731
6559
  await memoryStore.addTurn({
@@ -5746,7 +6574,7 @@ function createQueryAgent(config) {
5746
6574
  await log?.(`Failed to store conversation turn: ${e}`);
5747
6575
  }
5748
6576
  }
5749
- return { ...queryResult, tokenUsage: totalUsage };
6577
+ return { ...queryResult, tokenUsage: totalUsage, reviewReport };
5750
6578
  }
5751
6579
  async function classify(question, conversationId) {
5752
6580
  let conversationContext;
@@ -5967,7 +6795,12 @@ export {
5967
6795
  AdmittedStatusSchema,
5968
6796
  AnswerParsingResultSchema,
5969
6797
  ApplicationClassifyResultSchema,
6798
+ ApplicationEmailReviewSchema,
5970
6799
  ApplicationFieldSchema,
6800
+ ApplicationQualityArtifactSchema,
6801
+ ApplicationQualityIssueSchema,
6802
+ ApplicationQualityReportSchema,
6803
+ ApplicationQualityRoundSchema,
5971
6804
  ApplicationStateSchema,
5972
6805
  AuditTypeSchema,
5973
6806
  AutoFillMatchSchema,
@@ -5999,6 +6832,7 @@ export {
5999
6832
  CoverageFormSchema,
6000
6833
  CoverageSchema,
6001
6834
  CoverageTriggerSchema,
6835
+ CoverageValueTypeSchema,
6002
6836
  CrimeDeclarationsSchema,
6003
6837
  CyberDeclarationsSchema,
6004
6838
  DEDUCTIBLE_TYPES,