@claritylabs/cl-sdk 0.8.1 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -29,7 +29,12 @@ __export(index_exports, {
29
29
  AdmittedStatusSchema: () => AdmittedStatusSchema,
30
30
  AnswerParsingResultSchema: () => AnswerParsingResultSchema,
31
31
  ApplicationClassifyResultSchema: () => ApplicationClassifyResultSchema,
32
+ ApplicationEmailReviewSchema: () => ApplicationEmailReviewSchema,
32
33
  ApplicationFieldSchema: () => ApplicationFieldSchema,
34
+ ApplicationQualityArtifactSchema: () => ApplicationQualityArtifactSchema,
35
+ ApplicationQualityIssueSchema: () => ApplicationQualityIssueSchema,
36
+ ApplicationQualityReportSchema: () => ApplicationQualityReportSchema,
37
+ ApplicationQualityRoundSchema: () => ApplicationQualityRoundSchema,
33
38
  ApplicationStateSchema: () => ApplicationStateSchema,
34
39
  AuditTypeSchema: () => AuditTypeSchema,
35
40
  AutoFillMatchSchema: () => AutoFillMatchSchema,
@@ -61,6 +66,7 @@ __export(index_exports, {
61
66
  CoverageFormSchema: () => CoverageFormSchema,
62
67
  CoverageSchema: () => CoverageSchema,
63
68
  CoverageTriggerSchema: () => CoverageTriggerSchema,
69
+ CoverageValueTypeSchema: () => CoverageValueTypeSchema,
64
70
  CrimeDeclarationsSchema: () => CrimeDeclarationsSchema,
65
71
  CyberDeclarationsSchema: () => CyberDeclarationsSchema,
66
72
  DEDUCTIBLE_TYPES: () => DEDUCTIBLE_TYPES,
@@ -730,7 +736,9 @@ var FormReferenceSchema = import_zod3.z.object({
730
736
  formNumber: import_zod3.z.string(),
731
737
  editionDate: import_zod3.z.string().optional(),
732
738
  title: import_zod3.z.string().optional(),
733
- formType: import_zod3.z.enum(["coverage", "endorsement", "declarations", "application", "notice", "other"])
739
+ formType: import_zod3.z.enum(["coverage", "endorsement", "declarations", "application", "notice", "other"]),
740
+ pageStart: import_zod3.z.number().optional(),
741
+ pageEnd: import_zod3.z.number().optional()
734
742
  });
735
743
  var TaxFeeItemSchema = import_zod3.z.object({
736
744
  name: import_zod3.z.string(),
@@ -767,12 +775,25 @@ var NamedInsuredSchema = import_zod3.z.object({
767
775
 
768
776
  // src/schemas/coverage.ts
769
777
  var import_zod4 = require("zod");
778
+ var CoverageValueTypeSchema = import_zod4.z.enum([
779
+ "numeric",
780
+ "included",
781
+ "not_included",
782
+ "as_stated",
783
+ "waiting_period",
784
+ "referential",
785
+ "other"
786
+ ]);
770
787
  var CoverageSchema = import_zod4.z.object({
771
788
  name: import_zod4.z.string(),
772
789
  limit: import_zod4.z.string(),
790
+ limitValueType: CoverageValueTypeSchema.optional(),
773
791
  deductible: import_zod4.z.string().optional(),
792
+ deductibleValueType: CoverageValueTypeSchema.optional(),
793
+ formNumber: import_zod4.z.string().optional(),
774
794
  pageNumber: import_zod4.z.number().optional(),
775
- sectionRef: import_zod4.z.string().optional()
795
+ sectionRef: import_zod4.z.string().optional(),
796
+ originalContent: import_zod4.z.string().optional()
776
797
  });
777
798
  var EnrichedCoverageSchema = import_zod4.z.object({
778
799
  name: import_zod4.z.string(),
@@ -781,8 +802,10 @@ var EnrichedCoverageSchema = import_zod4.z.object({
781
802
  formEditionDate: import_zod4.z.string().optional(),
782
803
  limit: import_zod4.z.string(),
783
804
  limitType: LimitTypeSchema.optional(),
805
+ limitValueType: CoverageValueTypeSchema.optional(),
784
806
  deductible: import_zod4.z.string().optional(),
785
807
  deductibleType: DeductibleTypeSchema.optional(),
808
+ deductibleValueType: CoverageValueTypeSchema.optional(),
786
809
  sir: import_zod4.z.string().optional(),
787
810
  sublimit: import_zod4.z.string().optional(),
788
811
  coinsurance: import_zod4.z.string().optional(),
@@ -793,7 +816,8 @@ var EnrichedCoverageSchema = import_zod4.z.object({
793
816
  included: import_zod4.z.boolean(),
794
817
  premium: import_zod4.z.string().optional(),
795
818
  pageNumber: import_zod4.z.number().optional(),
796
- sectionRef: import_zod4.z.string().optional()
819
+ sectionRef: import_zod4.z.string().optional(),
820
+ originalContent: import_zod4.z.string().optional()
797
821
  });
798
822
 
799
823
  // src/schemas/endorsement.ts
@@ -1802,6 +1826,7 @@ function assembleDocument(documentId, documentType, memory) {
1802
1826
  const lossHistory = memory.get("loss_history");
1803
1827
  const sections = memory.get("sections");
1804
1828
  const supplementary = memory.get("supplementary");
1829
+ const formInventory = memory.get("form_inventory");
1805
1830
  const classify = memory.get("classify");
1806
1831
  const base = {
1807
1832
  id: documentId,
@@ -1818,6 +1843,7 @@ function assembleDocument(documentId, documentType, memory) {
1818
1843
  exclusions: exclusions?.exclusions,
1819
1844
  conditions: conditions?.conditions,
1820
1845
  sections: sections?.sections,
1846
+ formInventory: formInventory?.forms,
1821
1847
  declarations: declarations ? sanitizeNulls(declarations) : void 0,
1822
1848
  ...sanitizeNulls(lossHistory ?? {})
1823
1849
  };
@@ -2059,6 +2085,11 @@ async function formatDocumentContent(doc, generateText, options) {
2059
2085
  function chunkDocument(doc) {
2060
2086
  const chunks = [];
2061
2087
  const docId = doc.id;
2088
+ function stringMetadata(entries) {
2089
+ return Object.fromEntries(
2090
+ Object.entries(entries).filter(([, value]) => value !== void 0 && value !== null && String(value).length > 0).map(([key, value]) => [key, String(value)])
2091
+ );
2092
+ }
2062
2093
  chunks.push({
2063
2094
  id: `${docId}:carrier_info:0`,
2064
2095
  documentId: docId,
@@ -2070,7 +2101,7 @@ function chunkDocument(doc) {
2070
2101
  doc.carrierAmBestRating ? `AM Best: ${doc.carrierAmBestRating}` : null,
2071
2102
  doc.mga ? `MGA: ${doc.mga}` : null
2072
2103
  ].filter(Boolean).join("\n"),
2073
- metadata: { carrier: doc.carrier, documentType: doc.type }
2104
+ metadata: stringMetadata({ carrier: doc.carrier, documentType: doc.type })
2074
2105
  });
2075
2106
  chunks.push({
2076
2107
  id: `${docId}:named_insured:0`,
@@ -2082,17 +2113,32 @@ function chunkDocument(doc) {
2082
2113
  doc.insuredFein ? `FEIN: ${doc.insuredFein}` : null,
2083
2114
  doc.insuredAddress ? `Address: ${doc.insuredAddress.street1}, ${doc.insuredAddress.city}, ${doc.insuredAddress.state} ${doc.insuredAddress.zip}` : null
2084
2115
  ].filter(Boolean).join("\n"),
2085
- metadata: { insuredName: doc.insuredName, documentType: doc.type }
2116
+ metadata: stringMetadata({ insuredName: doc.insuredName, documentType: doc.type })
2086
2117
  });
2087
2118
  doc.coverages.forEach((cov, i) => {
2088
2119
  chunks.push({
2089
2120
  id: `${docId}:coverage:${i}`,
2090
2121
  documentId: docId,
2091
2122
  type: "coverage",
2092
- text: `Coverage: ${cov.name}
2093
- Limit: ${cov.limit}${cov.deductible ? `
2094
- Deductible: ${cov.deductible}` : ""}`,
2095
- metadata: { coverageName: cov.name, limit: cov.limit, documentType: doc.type }
2123
+ text: [
2124
+ `Coverage: ${cov.name}`,
2125
+ `Limit: ${cov.limit}`,
2126
+ cov.limitValueType ? `Limit Type: ${cov.limitValueType}` : null,
2127
+ cov.deductible ? `Deductible: ${cov.deductible}` : null,
2128
+ cov.deductibleValueType ? `Deductible Type: ${cov.deductibleValueType}` : null,
2129
+ cov.originalContent ? `Source: ${cov.originalContent}` : null
2130
+ ].filter(Boolean).join("\n"),
2131
+ metadata: stringMetadata({
2132
+ coverageName: cov.name,
2133
+ limit: cov.limit,
2134
+ limitValueType: cov.limitValueType,
2135
+ deductible: cov.deductible,
2136
+ deductibleValueType: cov.deductibleValueType,
2137
+ formNumber: cov.formNumber,
2138
+ pageNumber: cov.pageNumber,
2139
+ sectionRef: cov.sectionRef,
2140
+ documentType: doc.type
2141
+ })
2096
2142
  });
2097
2143
  });
2098
2144
  doc.endorsements?.forEach((end, i) => {
@@ -2102,7 +2148,13 @@ Deductible: ${cov.deductible}` : ""}`,
2102
2148
  type: "endorsement",
2103
2149
  text: `Endorsement: ${end.title}
2104
2150
  ${end.content}`.trim(),
2105
- metadata: { endorsementType: end.endorsementType, formNumber: end.formNumber, documentType: doc.type }
2151
+ metadata: stringMetadata({
2152
+ endorsementType: end.endorsementType,
2153
+ formNumber: end.formNumber,
2154
+ pageStart: end.pageStart,
2155
+ pageEnd: end.pageEnd,
2156
+ documentType: doc.type
2157
+ })
2106
2158
  });
2107
2159
  });
2108
2160
  doc.exclusions?.forEach((exc, i) => {
@@ -2112,7 +2164,7 @@ ${end.content}`.trim(),
2112
2164
  type: "exclusion",
2113
2165
  text: `Exclusion: ${exc.name}
2114
2166
  ${exc.content}`.trim(),
2115
- metadata: { documentType: doc.type }
2167
+ metadata: stringMetadata({ formNumber: exc.formNumber, pageNumber: exc.pageNumber, documentType: doc.type })
2116
2168
  });
2117
2169
  });
2118
2170
  doc.sections?.forEach((sec, i) => {
@@ -2122,7 +2174,7 @@ ${exc.content}`.trim(),
2122
2174
  type: "section",
2123
2175
  text: `Section: ${sec.title}
2124
2176
  ${sec.content}`,
2125
- metadata: { sectionType: sec.type, documentType: doc.type }
2177
+ metadata: stringMetadata({ sectionType: sec.type, pageStart: sec.pageStart, pageEnd: sec.pageEnd, documentType: doc.type })
2126
2178
  });
2127
2179
  });
2128
2180
  if (doc.premium) {
@@ -2132,12 +2184,138 @@ ${sec.content}`,
2132
2184
  type: "premium",
2133
2185
  text: `Premium: ${doc.premium}${doc.totalCost ? `
2134
2186
  Total Cost: ${doc.totalCost}` : ""}`,
2135
- metadata: { premium: doc.premium, documentType: doc.type }
2187
+ metadata: stringMetadata({ premium: doc.premium, documentType: doc.type })
2136
2188
  });
2137
2189
  }
2138
2190
  return chunks;
2139
2191
  }
2140
2192
 
2193
+ // src/extraction/merge.ts
2194
+ function isPresent(value) {
2195
+ if (value === void 0 || value === null) return false;
2196
+ if (typeof value === "string") return value.trim().length > 0;
2197
+ if (Array.isArray(value)) return value.length > 0;
2198
+ return true;
2199
+ }
2200
+ function dedupeByKey(items, keyFn) {
2201
+ const seen = /* @__PURE__ */ new Set();
2202
+ const merged = [];
2203
+ for (const item of items) {
2204
+ const key = keyFn(item);
2205
+ if (seen.has(key)) continue;
2206
+ seen.add(key);
2207
+ merged.push(item);
2208
+ }
2209
+ return merged;
2210
+ }
2211
+ function mergeUniqueObjects(existing, incoming, keyFn) {
2212
+ return dedupeByKey([...existing, ...incoming], keyFn);
2213
+ }
2214
+ function mergeShallowPreferPresent(existing, incoming) {
2215
+ const merged = { ...existing };
2216
+ for (const [key, value] of Object.entries(incoming)) {
2217
+ const current = merged[key];
2218
+ if (Array.isArray(current) && Array.isArray(value)) {
2219
+ merged[key] = [...current, ...value];
2220
+ continue;
2221
+ }
2222
+ if (current && value && typeof current === "object" && typeof value === "object" && !Array.isArray(current) && !Array.isArray(value)) {
2223
+ merged[key] = mergeShallowPreferPresent(
2224
+ current,
2225
+ value
2226
+ );
2227
+ continue;
2228
+ }
2229
+ if (!isPresent(current) && isPresent(value)) {
2230
+ merged[key] = value;
2231
+ }
2232
+ }
2233
+ return merged;
2234
+ }
2235
+ function mergeCoverageLimits(existing, incoming) {
2236
+ const merged = mergeShallowPreferPresent(existing, incoming);
2237
+ const existingCoverages = Array.isArray(existing.coverages) ? existing.coverages : [];
2238
+ const incomingCoverages = Array.isArray(incoming.coverages) ? incoming.coverages : [];
2239
+ const coverageKey = (coverage) => [
2240
+ String(coverage.name ?? "").toLowerCase(),
2241
+ String(coverage.limit ?? "").toLowerCase(),
2242
+ String(coverage.deductible ?? "").toLowerCase(),
2243
+ String(coverage.formNumber ?? "").toLowerCase()
2244
+ ].join("|");
2245
+ const byKey = /* @__PURE__ */ new Map();
2246
+ for (const coverage of [...existingCoverages, ...incomingCoverages]) {
2247
+ const key = coverageKey(coverage);
2248
+ const current = byKey.get(key);
2249
+ byKey.set(key, current ? mergeShallowPreferPresent(current, coverage) : coverage);
2250
+ }
2251
+ merged.coverages = [...byKey.values()];
2252
+ return merged;
2253
+ }
2254
+ function mergeDeclarations(existing, incoming) {
2255
+ const merged = mergeShallowPreferPresent(existing, incoming);
2256
+ const existingFields = Array.isArray(existing.fields) ? existing.fields : [];
2257
+ const incomingFields = Array.isArray(incoming.fields) ? incoming.fields : [];
2258
+ merged.fields = mergeUniqueObjects(existingFields, incomingFields, (field) => [
2259
+ String(field.field ?? "").toLowerCase(),
2260
+ String(field.value ?? "").toLowerCase(),
2261
+ String(field.section ?? "").toLowerCase()
2262
+ ].join("|"));
2263
+ return merged;
2264
+ }
2265
+ function mergeArrayPayload(existing, incoming, arrayKey, keyFn) {
2266
+ const merged = mergeShallowPreferPresent(existing, incoming);
2267
+ const existingItems = Array.isArray(existing[arrayKey]) ? existing[arrayKey] : [];
2268
+ const incomingItems = Array.isArray(incoming[arrayKey]) ? incoming[arrayKey] : [];
2269
+ merged[arrayKey] = mergeUniqueObjects(existingItems, incomingItems, keyFn);
2270
+ return merged;
2271
+ }
2272
+ function mergeExtractorResult(extractorName, existing, incoming) {
2273
+ if (!existing) return incoming;
2274
+ if (!incoming) return existing;
2275
+ if (typeof existing !== "object" || typeof incoming !== "object") return incoming;
2276
+ const current = existing;
2277
+ const next = incoming;
2278
+ switch (extractorName) {
2279
+ case "carrier_info":
2280
+ case "named_insured":
2281
+ case "loss_history":
2282
+ case "supplementary":
2283
+ case "premium_breakdown":
2284
+ return mergeShallowPreferPresent(current, next);
2285
+ case "coverage_limits":
2286
+ return mergeCoverageLimits(current, next);
2287
+ case "declarations":
2288
+ return mergeDeclarations(current, next);
2289
+ case "endorsements":
2290
+ return mergeArrayPayload(current, next, "endorsements", (item) => [
2291
+ String(item.formNumber ?? "").toLowerCase(),
2292
+ String(item.title ?? "").toLowerCase(),
2293
+ String(item.pageStart ?? "")
2294
+ ].join("|"));
2295
+ case "exclusions":
2296
+ return mergeArrayPayload(current, next, "exclusions", (item) => [
2297
+ String(item.name ?? "").toLowerCase(),
2298
+ String(item.formNumber ?? "").toLowerCase(),
2299
+ String(item.pageNumber ?? "")
2300
+ ].join("|"));
2301
+ case "conditions":
2302
+ return mergeArrayPayload(current, next, "conditions", (item) => [
2303
+ String(item.name ?? "").toLowerCase(),
2304
+ String(item.conditionType ?? "").toLowerCase(),
2305
+ String(item.pageNumber ?? "")
2306
+ ].join("|"));
2307
+ case "sections":
2308
+ return mergeArrayPayload(current, next, "sections", (item) => [
2309
+ String(item.title ?? "").toLowerCase(),
2310
+ String(item.type ?? "").toLowerCase(),
2311
+ String(item.pageStart ?? ""),
2312
+ String(item.pageEnd ?? "")
2313
+ ].join("|"));
2314
+ default:
2315
+ return mergeShallowPreferPresent(current, next);
2316
+ }
2317
+ }
2318
+
2141
2319
  // src/prompts/templates/homeowners.ts
2142
2320
  var HOMEOWNERS_TEMPLATE = {
2143
2321
  type: "homeowners",
@@ -2927,74 +3105,156 @@ Return JSON only:
2927
3105
  }`;
2928
3106
  }
2929
3107
 
2930
- // src/prompts/coordinator/plan.ts
3108
+ // src/prompts/coordinator/form-inventory.ts
2931
3109
  var import_zod19 = require("zod");
2932
- var ExtractionTaskSchema = import_zod19.z.object({
2933
- extractorName: import_zod19.z.string(),
2934
- startPage: import_zod19.z.number(),
2935
- endPage: import_zod19.z.number(),
2936
- description: import_zod19.z.string()
3110
+ var FormInventoryEntrySchema = FormReferenceSchema.extend({
3111
+ formNumber: FormReferenceSchema.shape.formNumber.describe("Form number or identifier, e.g. PR5070CF"),
3112
+ pageStart: FormReferenceSchema.shape.pageStart.describe("Original document page where the form begins"),
3113
+ pageEnd: FormReferenceSchema.shape.pageEnd.describe("Original document page where the form ends")
2937
3114
  });
2938
- var PageMapEntrySchema = import_zod19.z.object({
2939
- section: import_zod19.z.string(),
2940
- pages: import_zod19.z.string()
3115
+ var FormInventorySchema = import_zod19.z.object({
3116
+ forms: import_zod19.z.array(FormInventoryEntrySchema)
2941
3117
  });
2942
- var ExtractionPlanSchema = import_zod19.z.object({
2943
- tasks: import_zod19.z.array(ExtractionTaskSchema),
2944
- pageMap: import_zod19.z.array(PageMapEntrySchema).optional()
2945
- });
2946
- function buildPlanPrompt(templateHints) {
2947
- return `You are planning the extraction of an insurance document. You have already classified this document. Now scan the full document and create a page map + extraction plan.
3118
+ function buildFormInventoryPrompt(templateHints) {
3119
+ return `You are building a form inventory for an insurance document.
2948
3120
 
2949
3121
  DOCUMENT TYPE HINTS:
2950
3122
  ${templateHints}
2951
3123
 
2952
- For each section of the document, decide which extractor should handle it and which pages to send.
3124
+ Extract every distinct declarations page set, policy form, coverage form, endorsement, application form, and notice form that appears in the document.
3125
+
3126
+ For EACH form, extract:
3127
+ - formNumber: REQUIRED when present
3128
+ - editionDate: if shown
3129
+ - title: if shown
3130
+ - formType: one of coverage, endorsement, declarations, application, notice, other
3131
+ - pageStart: original page where the form begins
3132
+ - pageEnd: original page where the form ends
3133
+
3134
+ Critical rules:
3135
+ - Include declarations page sets even if they do not show a standard form number.
3136
+ - Use original document page numbers, not local chunk page numbers.
3137
+ - Do not emit duplicate entries for repeated headers/footers.
3138
+ - Multi-page forms should be represented once with pageStart/pageEnd covering the full span when visible.
3139
+ - If a form number is visible in endorsements, schedules, or form headers, include it even if the full form title is partial.
3140
+
3141
+ Respond with JSON only.`;
3142
+ }
3143
+
3144
+ // src/prompts/coordinator/page-map.ts
3145
+ var import_zod20 = require("zod");
3146
+ var PageExtractorSchema = import_zod20.z.enum([
3147
+ "carrier_info",
3148
+ "named_insured",
3149
+ "coverage_limits",
3150
+ "endorsements",
3151
+ "exclusions",
3152
+ "conditions",
3153
+ "premium_breakdown",
3154
+ "declarations",
3155
+ "loss_history",
3156
+ "sections",
3157
+ "supplementary"
3158
+ ]);
3159
+ var PageAssignmentSchema = import_zod20.z.object({
3160
+ localPageNumber: import_zod20.z.number().int().positive().describe("1-based page number within this supplied PDF chunk"),
3161
+ extractorNames: import_zod20.z.array(PageExtractorSchema).describe("Focused extractors that should inspect this page"),
3162
+ pageRole: import_zod20.z.enum([
3163
+ "declarations_schedule",
3164
+ "endorsement_schedule",
3165
+ "policy_form",
3166
+ "endorsement_form",
3167
+ "condition_exclusion_form",
3168
+ "supplementary",
3169
+ "other"
3170
+ ]).optional().describe("Primary role of the page"),
3171
+ hasScheduleValues: import_zod20.z.boolean().optional().describe("True only when the page contains insured-specific declaration or schedule values, tables, or rows to extract"),
3172
+ confidence: import_zod20.z.number().min(0).max(1).optional().describe("Confidence in the page assignment"),
3173
+ notes: import_zod20.z.string().optional().describe("Short explanation of what appears on the page")
3174
+ });
3175
+ var PageMapChunkSchema = import_zod20.z.object({
3176
+ pages: import_zod20.z.array(PageAssignmentSchema)
3177
+ });
3178
+ function buildPageMapPrompt(templateHints, startPage, endPage, formInventoryHint) {
3179
+ const inventoryBlock = formInventoryHint ? `
3180
+ FORM INVENTORY (already identified \u2014 use this to constrain your assignments):
3181
+ ${formInventoryHint}
3182
+ ` : "";
3183
+ return `You are mapping insurance document pages to focused extractors.
3184
+
3185
+ These supplied pages are ORIGINAL DOCUMENT PAGES ${startPage}-${endPage}.
3186
+
3187
+ DOCUMENT TYPE HINTS:
3188
+ ${templateHints}
3189
+ ${inventoryBlock}
3190
+ For each page in this supplied PDF chunk, decide which extractor(s) should inspect it.
2953
3191
 
2954
3192
  Available extractors:
2955
- - carrier_info: Carrier name, legal name, NAIC, AM Best rating, admitted status, MGA, underwriter
2956
- - named_insured: Insured name, DBA, address, entity type, FEIN, SIC/NAICS codes, additional named insureds
2957
- - coverage_limits: Coverage names, limits, deductibles, coverage form, triggers
2958
- - endorsements: Endorsement forms, titles, types, content, affected parties
2959
- - exclusions: Exclusion titles, content, applicability
2960
- - conditions: Policy conditions (duties after loss, cancellation, etc.)
2961
- - premium_breakdown: Premium amounts, taxes, fees, payment plans, rating basis
2962
- - declarations: Line-specific structured declarations data (varies by policy type)
2963
- - loss_history: Loss runs, claim records, experience modification
2964
- - sections: Raw section content (for sections that don't fit other extractors)
2965
- - supplementary: Regulatory context, contacts, claims contacts, third-party administrators
3193
+ - carrier_info
3194
+ - named_insured
3195
+ - coverage_limits
3196
+ - endorsements
3197
+ - exclusions
3198
+ - conditions
3199
+ - premium_breakdown
3200
+ - declarations
3201
+ - loss_history
3202
+ - sections
3203
+ - supplementary
3204
+
3205
+ Rules:
3206
+ - Use specific extractors for declarations, schedules, endorsements, exclusions, conditions, premium pages, and loss runs.
3207
+ - Use "sections" for pages that contain substantive policy text or mixed content that should still be preserved as raw sections.
3208
+ - Avoid assigning broad ranges mentally; decide page by page.
3209
+ - A page may map to multiple extractors if it legitimately contains multiple relevant sections.
3210
+ - Prefer declarations and schedules for numeric limits/deductibles over later generic form wording.
3211
+ - Assign "coverage_limits" only when the page itself contains insured-specific declaration or schedule values to capture, such as location/building rows, coverage tables, limits, deductibles, coinsurance percentages, or scheduled amounts tied to this policy.
3212
+ - Do NOT assign "coverage_limits" for generic policy-form or endorsement text that merely explains how limits, deductibles, waiting periods, or coinsurance work, or that says values are "shown in the declarations", "shown in the schedule", "as stated", or "if applicable".
3213
+ - Headings like "Limits of Insurance", "Deductible", "Coinsurance", "Loss Conditions", or "Definitions" inside a policy form usually indicate form language, not declarations or schedules.
3214
+ - Continuation pages near the end of a form should stay mapped to "sections" plus "conditions"/"exclusions" when applicable, even if they mention limits or deductibles.
3215
+ - When a form inventory entry identifies a page range as a specific form type (e.g., endorsement, coverage, application), use that classification to guide your extractor choice. Do not assign "coverage_limits" to pages the inventory identifies as endorsement or condition/exclusion forms unless the page contains actual schedule values.
3216
+ - Return every page in the supplied chunk exactly once.
2966
3217
 
2967
3218
  Return JSON:
2968
3219
  {
2969
- "tasks": [
2970
- { "extractorName": "carrier_info", "startPage": 1, "endPage": 2, "description": "Extract carrier details from declarations page" },
2971
- ...
2972
- ],
2973
- "pageMap": [
2974
- { "section": "declarations", "pages": "pages 1-3" },
2975
- { "section": "endorsements", "pages": "pages 15-22" }
3220
+ "pages": [
3221
+ {
3222
+ "localPageNumber": 1,
3223
+ "extractorNames": ["declarations", "carrier_info", "named_insured", "coverage_limits"],
3224
+ "pageRole": "declarations_schedule",
3225
+ "hasScheduleValues": true,
3226
+ "confidence": 0.96,
3227
+ "notes": "Declarations page with insured, policy period, and scheduled limits"
3228
+ }
2976
3229
  ]
2977
3230
  }
2978
3231
 
2979
- Create tasks that cover the entire document. Prefer specific extractors over generic "sections" where possible. Keep page ranges tight \u2014 only include pages relevant to each extractor.
2980
-
2981
3232
  Respond with JSON only.`;
2982
3233
  }
3234
+ function formatFormInventoryForPageMap(forms) {
3235
+ if (forms.length === 0) return "";
3236
+ return forms.filter((f) => f.pageStart != null).map((f) => {
3237
+ const range = f.pageEnd && f.pageEnd !== f.pageStart ? `pages ${f.pageStart}-${f.pageEnd}` : `page ${f.pageStart}`;
3238
+ const title = f.title ? ` "${f.title}"` : "";
3239
+ return `- ${f.formNumber}${title} [${f.formType}] \u2192 ${range}`;
3240
+ }).join("\n");
3241
+ }
2983
3242
 
2984
3243
  // src/prompts/coordinator/review.ts
2985
- var import_zod20 = require("zod");
2986
- var ReviewResultSchema = import_zod20.z.object({
2987
- complete: import_zod20.z.boolean(),
2988
- missingFields: import_zod20.z.array(import_zod20.z.string()),
2989
- additionalTasks: import_zod20.z.array(import_zod20.z.object({
2990
- extractorName: import_zod20.z.string(),
2991
- startPage: import_zod20.z.number(),
2992
- endPage: import_zod20.z.number(),
2993
- description: import_zod20.z.string()
3244
+ var import_zod21 = require("zod");
3245
+ var ReviewResultSchema = import_zod21.z.object({
3246
+ complete: import_zod21.z.boolean(),
3247
+ missingFields: import_zod21.z.array(import_zod21.z.string()),
3248
+ qualityIssues: import_zod21.z.array(import_zod21.z.string()).optional(),
3249
+ additionalTasks: import_zod21.z.array(import_zod21.z.object({
3250
+ extractorName: import_zod21.z.string(),
3251
+ startPage: import_zod21.z.number(),
3252
+ endPage: import_zod21.z.number(),
3253
+ description: import_zod21.z.string()
2994
3254
  }))
2995
3255
  });
2996
- function buildReviewPrompt(templateExpected, extractedKeys) {
2997
- return `You are reviewing an extraction for completeness. Compare what was expected vs what was found.
3256
+ function buildReviewPrompt(templateExpected, extractedKeys, extractionSummary, pageMapSummary) {
3257
+ return `You are reviewing an extraction for completeness and quality. Compare what was expected vs what was found.
2998
3258
 
2999
3259
  EXPECTED FIELDS (from document type template):
3000
3260
  ${templateExpected.map((f) => `- ${f}`).join("\n")}
@@ -3002,40 +3262,55 @@ ${templateExpected.map((f) => `- ${f}`).join("\n")}
3002
3262
  FIELDS ALREADY EXTRACTED:
3003
3263
  ${extractedKeys.map((f) => `- ${f}`).join("\n")}
3004
3264
 
3265
+ PAGE MAP SUMMARY:
3266
+ ${pageMapSummary}
3267
+
3268
+ CURRENT EXTRACTION SUMMARY:
3269
+ ${extractionSummary}
3270
+
3005
3271
  Determine:
3006
- 1. Is the extraction complete enough? (required fields present = complete)
3272
+ 1. Is the extraction complete enough?
3007
3273
  2. What fields are missing?
3008
- 3. Should any additional extraction tasks be dispatched?
3274
+ 3. What quality issues are present?
3275
+ 4. Should any additional extraction tasks be dispatched?
3276
+
3277
+ Mark the extraction as NOT complete if any of these are true:
3278
+ - required fields are missing
3279
+ - extracted values are generic placeholders like "shown in declarations", "per schedule", "if applicable", "as stated"
3280
+ - coverage limits or deductibles appear to come from generic form language instead of declaration/schedule-specific values
3281
+ - page assignments suggest declaration, schedule, endorsement, exclusion, or condition pages were not actually extracted with the matching focused extractor
3282
+ - a focused extractor exists but returned too little substance for the relevant pages
3009
3283
 
3010
3284
  Return JSON:
3011
3285
  {
3012
3286
  "complete": boolean,
3013
3287
  "missingFields": ["field1", "field2"],
3288
+ "qualityIssues": ["issue 1", "issue 2"],
3014
3289
  "additionalTasks": [
3015
3290
  { "extractorName": "...", "startPage": N, "endPage": N, "description": "..." }
3016
3291
  ]
3017
3292
  }
3018
3293
 
3019
- If all required fields are present, set complete=true even if some optional fields are missing.
3294
+ Use the page map to target follow-up extraction pages precisely. Prefer narrow, declaration/schedule-focused follow-up tasks over broad page ranges.
3020
3295
 
3021
3296
  Respond with JSON only.`;
3022
3297
  }
3023
3298
 
3024
3299
  // src/prompts/extractors/carrier-info.ts
3025
- var import_zod21 = require("zod");
3026
- var CarrierInfoSchema = import_zod21.z.object({
3027
- carrierName: import_zod21.z.string().describe("Primary insurance company name for display"),
3028
- carrierLegalName: import_zod21.z.string().optional().describe("Legal entity name of insurer"),
3029
- naicNumber: import_zod21.z.string().optional().describe("NAIC company code"),
3030
- amBestRating: import_zod21.z.string().optional().describe("AM Best rating, e.g. 'A+ XV'"),
3031
- admittedStatus: import_zod21.z.enum(["admitted", "non_admitted", "surplus_lines"]).optional().describe("Admitted status of the carrier"),
3032
- mga: import_zod21.z.string().optional().describe("Managing General Agent or Program Administrator name"),
3033
- underwriter: import_zod21.z.string().optional().describe("Named individual underwriter"),
3034
- policyNumber: import_zod21.z.string().optional().describe("Policy or quote reference number"),
3035
- effectiveDate: import_zod21.z.string().optional().describe("Policy effective date (MM/DD/YYYY)"),
3036
- expirationDate: import_zod21.z.string().optional().describe("Policy expiration date (MM/DD/YYYY)"),
3037
- quoteNumber: import_zod21.z.string().optional().describe("Quote or proposal reference number"),
3038
- proposedEffectiveDate: import_zod21.z.string().optional().describe("Proposed effective date for quotes (MM/DD/YYYY)")
3300
+ var import_zod22 = require("zod");
3301
+ var CarrierInfoSchema = import_zod22.z.object({
3302
+ carrierName: import_zod22.z.string().describe("Primary insurance company name for display"),
3303
+ carrierLegalName: import_zod22.z.string().optional().describe("Legal entity name of insurer"),
3304
+ naicNumber: import_zod22.z.string().optional().describe("NAIC company code"),
3305
+ amBestRating: import_zod22.z.string().optional().describe("AM Best rating, e.g. 'A+ XV'"),
3306
+ admittedStatus: import_zod22.z.enum(["admitted", "non_admitted", "surplus_lines"]).optional().describe("Admitted status of the carrier"),
3307
+ mga: import_zod22.z.string().optional().describe("Managing General Agent or Program Administrator name"),
3308
+ underwriter: import_zod22.z.string().optional().describe("Named individual underwriter"),
3309
+ policyNumber: import_zod22.z.string().optional().describe("Policy or quote reference number"),
3310
+ effectiveDate: import_zod22.z.string().optional().describe("Policy effective date (MM/DD/YYYY)"),
3311
+ expirationDate: import_zod22.z.string().optional().describe("Policy expiration date (MM/DD/YYYY)"),
3312
+ quoteNumber: import_zod22.z.string().optional().describe("Quote or proposal reference number"),
3313
+ proposedEffectiveDate: import_zod22.z.string().optional().describe("Proposed effective date for quotes (MM/DD/YYYY)")
3039
3314
  });
3040
3315
  function buildCarrierInfoPrompt() {
3041
3316
  return `You are an expert insurance document analyst. Extract carrier and policy identification information from this document.
@@ -3055,18 +3330,18 @@ Return JSON only.`;
3055
3330
  }
3056
3331
 
3057
3332
  // src/prompts/extractors/named-insured.ts
3058
- var import_zod22 = require("zod");
3059
- var AddressSchema2 = import_zod22.z.object({
3060
- street1: import_zod22.z.string(),
3061
- city: import_zod22.z.string(),
3062
- state: import_zod22.z.string(),
3063
- zip: import_zod22.z.string()
3333
+ var import_zod23 = require("zod");
3334
+ var AddressSchema2 = import_zod23.z.object({
3335
+ street1: import_zod23.z.string(),
3336
+ city: import_zod23.z.string(),
3337
+ state: import_zod23.z.string(),
3338
+ zip: import_zod23.z.string()
3064
3339
  });
3065
- var NamedInsuredSchema2 = import_zod22.z.object({
3066
- insuredName: import_zod22.z.string().describe("Name of primary named insured"),
3067
- insuredDba: import_zod22.z.string().optional().describe("Doing-business-as name"),
3340
+ var NamedInsuredSchema2 = import_zod23.z.object({
3341
+ insuredName: import_zod23.z.string().describe("Name of primary named insured"),
3342
+ insuredDba: import_zod23.z.string().optional().describe("Doing-business-as name"),
3068
3343
  insuredAddress: AddressSchema2.optional().describe("Primary insured mailing address"),
3069
- insuredEntityType: import_zod22.z.enum([
3344
+ insuredEntityType: import_zod23.z.enum([
3070
3345
  "corporation",
3071
3346
  "llc",
3072
3347
  "partnership",
@@ -3079,13 +3354,13 @@ var NamedInsuredSchema2 = import_zod22.z.object({
3079
3354
  "married_couple",
3080
3355
  "other"
3081
3356
  ]).optional().describe("Legal entity type of the insured"),
3082
- insuredFein: import_zod22.z.string().optional().describe("Federal Employer Identification Number"),
3083
- insuredSicCode: import_zod22.z.string().optional().describe("SIC code"),
3084
- insuredNaicsCode: import_zod22.z.string().optional().describe("NAICS code"),
3085
- additionalNamedInsureds: import_zod22.z.array(
3086
- import_zod22.z.object({
3087
- name: import_zod22.z.string(),
3088
- relationship: import_zod22.z.string().optional().describe("e.g. subsidiary, affiliate"),
3357
+ insuredFein: import_zod23.z.string().optional().describe("Federal Employer Identification Number"),
3358
+ insuredSicCode: import_zod23.z.string().optional().describe("SIC code"),
3359
+ insuredNaicsCode: import_zod23.z.string().optional().describe("NAICS code"),
3360
+ additionalNamedInsureds: import_zod23.z.array(
3361
+ import_zod23.z.object({
3362
+ name: import_zod23.z.string(),
3363
+ relationship: import_zod23.z.string().optional().describe("e.g. subsidiary, affiliate"),
3089
3364
  address: AddressSchema2.optional()
3090
3365
  })
3091
3366
  ).optional().describe("Additional named insureds listed on the policy")
@@ -3106,23 +3381,20 @@ Return JSON only.`;
3106
3381
  }
3107
3382
 
3108
3383
  // src/prompts/extractors/coverage-limits.ts
3109
- var import_zod23 = require("zod");
3110
- var CoverageLimitsSchema = import_zod23.z.object({
3111
- coverages: import_zod23.z.array(
3112
- import_zod23.z.object({
3113
- name: import_zod23.z.string().describe("Coverage name"),
3114
- limit: import_zod23.z.string().describe("Coverage limit, e.g. '$1,000,000'"),
3115
- deductible: import_zod23.z.string().optional().describe("Deductible amount"),
3116
- coverageCode: import_zod23.z.string().optional().describe("Coverage code or class code"),
3117
- formNumber: import_zod23.z.string().optional().describe("Associated form number, e.g. 'CG 00 01'")
3118
- })
3119
- ).describe("All coverages with their limits"),
3120
- coverageForm: import_zod23.z.enum(["occurrence", "claims_made", "accident"]).optional().describe("Primary coverage trigger type"),
3121
- retroactiveDate: import_zod23.z.string().optional().describe("Retroactive date for claims-made policies (MM/DD/YYYY)")
3384
+ var import_zod24 = require("zod");
3385
+ var ExtractorCoverageSchema = CoverageSchema.extend({
3386
+ coverageCode: import_zod24.z.string().optional().describe("Coverage code or class code")
3387
+ });
3388
+ var CoverageLimitsSchema = import_zod24.z.object({
3389
+ coverages: import_zod24.z.array(ExtractorCoverageSchema).describe("All coverages with their limits"),
3390
+ coverageForm: import_zod24.z.enum(["occurrence", "claims_made", "accident"]).optional().describe("Primary coverage trigger type"),
3391
+ retroactiveDate: import_zod24.z.string().optional().describe("Retroactive date for claims-made policies (MM/DD/YYYY)")
3122
3392
  });
3123
3393
  function buildCoverageLimitsPrompt() {
3124
3394
  return `You are an expert insurance document analyst. Extract all coverage limits and deductibles from this document.
3125
3395
 
3396
+ Extract only insured-specific declaration, schedule, or endorsement entries that state actual coverage terms for this policy.
3397
+
3126
3398
  Focus on:
3127
3399
  - Every coverage listed on the declarations page or coverage schedule
3128
3400
  - Per-occurrence, aggregate, and sub-limits for each coverage
@@ -3133,20 +3405,34 @@ Focus on:
3133
3405
  - Standard limit fields: per occurrence, general aggregate, products/completed ops aggregate, personal & advertising injury, fire damage, medical expense, combined single limit, BI/PD splits, umbrella each occurrence/aggregate/retention, statutory (WC), employers liability
3134
3406
  - Defense cost treatment: inside limits, outside limits, or supplementary
3135
3407
 
3136
- Extract ALL coverages \u2014 do not omit any coverage line that appears in the document.
3408
+ For EACH coverage, also extract:
3409
+ - pageNumber: the original page number where the coverage row/value appears
3410
+ - sectionRef: the declarations/schedule/endorsement section heading where it appears
3411
+ - originalContent: the verbatim row or short source snippet used for this coverage
3412
+ - limitValueType: classify the limit as numeric, included, not_included, as_stated, waiting_period, referential, or other
3413
+ - deductibleValueType: classify the deductible/value term similarly when deductible is present
3414
+
3415
+ Critical rules:
3416
+ - Do not extract table-of-contents lines, index entries, headers, footers, page labels, or cross-references as coverages.
3417
+ - Do not create a coverage entry from generic policy-form text that only says a limit/deductible is "shown in the declarations", "shown in the Business Income Declarations", "as stated", "if applicable", or similar referential wording.
3418
+ - Do not treat a generic waiting period, deductible explanation, limits clause, coinsurance clause, or definitions text as a standalone coverage unless the page contains an actual policy-specific schedule row or declaration entry.
3419
+ - Values like "Included" or "Not Included" are valid only when they appear as an explicit declarations/schedule/endorsement entry for a named coverage. Do not infer them from narrative form language.
3420
+ - If a waiting period or hour deductible is shown as part of a specific declarations/schedule row, it may be captured in deductible. Otherwise omit it.
3421
+ - Use limitValueType or deductibleValueType to preserve non-numeric terms precisely instead of forcing them into numeric semantics.
3422
+ - Preserve one row per real coverage entry. Do not merge adjacent schedule rows into malformed names.
3137
3423
 
3138
3424
  Return JSON only.`;
3139
3425
  }
3140
3426
 
3141
3427
  // src/prompts/extractors/endorsements.ts
3142
- var import_zod24 = require("zod");
3143
- var EndorsementsSchema = import_zod24.z.object({
3144
- endorsements: import_zod24.z.array(
3145
- import_zod24.z.object({
3146
- formNumber: import_zod24.z.string().describe("Form number, e.g. 'CG 21 47'"),
3147
- editionDate: import_zod24.z.string().optional().describe("Edition date, e.g. '12 07'"),
3148
- title: import_zod24.z.string().describe("Endorsement title"),
3149
- endorsementType: import_zod24.z.enum([
3428
+ var import_zod25 = require("zod");
3429
+ var EndorsementsSchema = import_zod25.z.object({
3430
+ endorsements: import_zod25.z.array(
3431
+ import_zod25.z.object({
3432
+ formNumber: import_zod25.z.string().describe("Form number, e.g. 'CG 21 47'"),
3433
+ editionDate: import_zod25.z.string().optional().describe("Edition date, e.g. '12 07'"),
3434
+ title: import_zod25.z.string().describe("Endorsement title"),
3435
+ endorsementType: import_zod25.z.enum([
3150
3436
  "additional_insured",
3151
3437
  "waiver_of_subrogation",
3152
3438
  "primary_noncontributory",
@@ -3166,12 +3452,12 @@ var EndorsementsSchema = import_zod24.z.object({
3166
3452
  "territorial_extension",
3167
3453
  "other"
3168
3454
  ]).describe("Endorsement type classification"),
3169
- effectiveDate: import_zod24.z.string().optional().describe("Endorsement effective date"),
3170
- affectedCoverageParts: import_zod24.z.array(import_zod24.z.string()).optional().describe("Coverage parts affected by this endorsement"),
3171
- namedParties: import_zod24.z.array(
3172
- import_zod24.z.object({
3173
- name: import_zod24.z.string().describe("Party name"),
3174
- role: import_zod24.z.enum([
3455
+ effectiveDate: import_zod25.z.string().optional().describe("Endorsement effective date"),
3456
+ affectedCoverageParts: import_zod25.z.array(import_zod25.z.string()).optional().describe("Coverage parts affected by this endorsement"),
3457
+ namedParties: import_zod25.z.array(
3458
+ import_zod25.z.object({
3459
+ name: import_zod25.z.string().describe("Party name"),
3460
+ role: import_zod25.z.enum([
3175
3461
  "additional_insured",
3176
3462
  "loss_payee",
3177
3463
  "mortgage_holder",
@@ -3180,15 +3466,15 @@ var EndorsementsSchema = import_zod24.z.object({
3180
3466
  "designated_person",
3181
3467
  "other"
3182
3468
  ]).describe("Party role"),
3183
- relationship: import_zod24.z.string().optional().describe("Relationship to insured"),
3184
- scope: import_zod24.z.string().optional().describe("Scope of coverage for this party")
3469
+ relationship: import_zod25.z.string().optional().describe("Relationship to insured"),
3470
+ scope: import_zod25.z.string().optional().describe("Scope of coverage for this party")
3185
3471
  })
3186
3472
  ).optional().describe("Named parties (additional insureds, loss payees, etc.)"),
3187
- keyTerms: import_zod24.z.array(import_zod24.z.string()).optional().describe("Key terms or notable provisions in the endorsement"),
3188
- premiumImpact: import_zod24.z.string().optional().describe("Additional premium or credit"),
3189
- content: import_zod24.z.string().describe("Full verbatim text of the endorsement"),
3190
- pageStart: import_zod24.z.number().describe("Starting page number of this endorsement"),
3191
- pageEnd: import_zod24.z.number().optional().describe("Ending page number of this endorsement")
3473
+ keyTerms: import_zod25.z.array(import_zod25.z.string()).optional().describe("Key terms or notable provisions in the endorsement"),
3474
+ premiumImpact: import_zod25.z.string().optional().describe("Additional premium or credit"),
3475
+ content: import_zod25.z.string().describe("Full verbatim text of the endorsement"),
3476
+ pageStart: import_zod25.z.number().describe("Starting page number of this endorsement"),
3477
+ pageEnd: import_zod25.z.number().optional().describe("Ending page number of this endorsement")
3192
3478
  })
3193
3479
  ).describe("All endorsements found in the document")
3194
3480
  });
@@ -3219,20 +3505,20 @@ Return JSON only.`;
3219
3505
  }
3220
3506
 
3221
3507
  // src/prompts/extractors/exclusions.ts
3222
- var import_zod25 = require("zod");
3223
- var ExclusionsSchema = import_zod25.z.object({
3224
- exclusions: import_zod25.z.array(
3225
- import_zod25.z.object({
3226
- name: import_zod25.z.string().describe("Exclusion title or short description"),
3227
- formNumber: import_zod25.z.string().optional().describe("Form number if part of a named endorsement"),
3228
- excludedPerils: import_zod25.z.array(import_zod25.z.string()).optional().describe("Specific perils excluded"),
3229
- isAbsolute: import_zod25.z.boolean().optional().describe("Whether the exclusion is absolute (no exceptions)"),
3230
- exceptions: import_zod25.z.array(import_zod25.z.string()).optional().describe("Exceptions to the exclusion, if any"),
3231
- buybackAvailable: import_zod25.z.boolean().optional().describe("Whether coverage can be bought back via endorsement"),
3232
- buybackEndorsement: import_zod25.z.string().optional().describe("Form number of the buyback endorsement if available"),
3233
- appliesTo: import_zod25.z.array(import_zod25.z.string()).optional().describe("Coverage types this exclusion applies to"),
3234
- content: import_zod25.z.string().describe("Full verbatim exclusion text"),
3235
- pageNumber: import_zod25.z.number().optional().describe("Page number where exclusion appears")
3508
+ var import_zod26 = require("zod");
3509
+ var ExclusionsSchema = import_zod26.z.object({
3510
+ exclusions: import_zod26.z.array(
3511
+ import_zod26.z.object({
3512
+ name: import_zod26.z.string().describe("Exclusion title or short description"),
3513
+ formNumber: import_zod26.z.string().optional().describe("Form number if part of a named endorsement"),
3514
+ excludedPerils: import_zod26.z.array(import_zod26.z.string()).optional().describe("Specific perils excluded"),
3515
+ isAbsolute: import_zod26.z.boolean().optional().describe("Whether the exclusion is absolute (no exceptions)"),
3516
+ exceptions: import_zod26.z.array(import_zod26.z.string()).optional().describe("Exceptions to the exclusion, if any"),
3517
+ buybackAvailable: import_zod26.z.boolean().optional().describe("Whether coverage can be bought back via endorsement"),
3518
+ buybackEndorsement: import_zod26.z.string().optional().describe("Form number of the buyback endorsement if available"),
3519
+ appliesTo: import_zod26.z.array(import_zod26.z.string()).optional().describe("Coverage types this exclusion applies to"),
3520
+ content: import_zod26.z.string().describe("Full verbatim exclusion text"),
3521
+ pageNumber: import_zod26.z.number().optional().describe("Page number where exclusion appears")
3236
3522
  })
3237
3523
  ).describe("All exclusions found in the document")
3238
3524
  });
@@ -3257,18 +3543,23 @@ Focus on:
3257
3543
  - Exclusions within insuring agreements or conditions if clearly labeled
3258
3544
  - Full verbatim exclusion text \u2014 do not summarize
3259
3545
 
3546
+ Critical rules:
3547
+ - Ignore table-of-contents entries, running headers/footers, and references that only point to another page or section.
3548
+ - Do not emit a standalone exclusion from a fragment unless the fragment itself contains substantive exclusion wording.
3549
+ - Always include pageNumber when the exclusion appears on a specific page in the supplied document chunk.
3550
+
3260
3551
  Common personal lines exclusion patterns: animal liability, business pursuits, home daycare, watercraft, aircraft.
3261
3552
 
3262
3553
  Return JSON only.`;
3263
3554
  }
3264
3555
 
3265
3556
  // src/prompts/extractors/conditions.ts
3266
- var import_zod26 = require("zod");
3267
- var ConditionsSchema = import_zod26.z.object({
3268
- conditions: import_zod26.z.array(
3269
- import_zod26.z.object({
3270
- name: import_zod26.z.string().describe("Condition title"),
3271
- conditionType: import_zod26.z.enum([
3557
+ var import_zod27 = require("zod");
3558
+ var ConditionsSchema = import_zod27.z.object({
3559
+ conditions: import_zod27.z.array(
3560
+ import_zod27.z.object({
3561
+ name: import_zod27.z.string().describe("Condition title"),
3562
+ conditionType: import_zod27.z.enum([
3272
3563
  "duties_after_loss",
3273
3564
  "notice_requirements",
3274
3565
  "other_insurance",
@@ -3287,14 +3578,14 @@ var ConditionsSchema = import_zod26.z.object({
3287
3578
  "separation_of_insureds",
3288
3579
  "other"
3289
3580
  ]).describe("Condition category"),
3290
- content: import_zod26.z.string().describe("Full verbatim condition text"),
3291
- keyValues: import_zod26.z.array(
3292
- import_zod26.z.object({
3293
- key: import_zod26.z.string().describe("Key name (e.g. 'noticePeriod', 'suitDeadline')"),
3294
- value: import_zod26.z.string().describe("Value (e.g. '30 days', '2 years')")
3581
+ content: import_zod27.z.string().describe("Full verbatim condition text"),
3582
+ keyValues: import_zod27.z.array(
3583
+ import_zod27.z.object({
3584
+ key: import_zod27.z.string().describe("Key name (e.g. 'noticePeriod', 'suitDeadline')"),
3585
+ value: import_zod27.z.string().describe("Value (e.g. '30 days', '2 years')")
3295
3586
  })
3296
3587
  ).optional().describe("Key values extracted from the condition (notice periods, deadlines, etc.)"),
3297
- pageNumber: import_zod26.z.number().optional().describe("Page number where condition appears")
3588
+ pageNumber: import_zod27.z.number().optional().describe("Page number where condition appears")
3298
3589
  })
3299
3590
  ).describe("All policy conditions found in the document")
3300
3591
  });
@@ -3306,7 +3597,7 @@ For EACH condition, extract:
3306
3597
  - conditionType: classify as one of: duties_after_loss, notice_requirements, other_insurance, cancellation, nonrenewal, transfer_of_rights, liberalization, arbitration, concealment_fraud, examination_under_oath, legal_action, loss_payment, appraisal, mortgage_holders, policy_territory, separation_of_insureds, other \u2014 REQUIRED
3307
3598
  - content: full verbatim condition text \u2014 REQUIRED
3308
3599
  - keyValues: extract specific values as key-value pairs (e.g. noticePeriod: "30 days", suitDeadline: "2 years")
3309
- - pageNumber: page number where the condition appears
3600
+ - pageNumber: original document page number where the substantive condition text appears
3310
3601
 
3311
3602
  Focus on:
3312
3603
  - Duties after loss / notice of occurrence conditions
@@ -3323,32 +3614,37 @@ Focus on:
3323
3614
  - Mortgage holders clause
3324
3615
  - Any other named conditions
3325
3616
 
3617
+ Critical rules:
3618
+ - Ignore table-of-contents entries, section indexes, running headers/footers, and page references such as "Appraisal ..... 19".
3619
+ - Do not emit a condition unless the page contains substantive condition text, not just a heading or reference.
3620
+ - If a condition continues from a prior page, keep the substantive text together and use the page where the condition text appears in this extracted chunk.
3621
+
3326
3622
  Return JSON only.`;
3327
3623
  }
3328
3624
 
3329
3625
  // src/prompts/extractors/premium-breakdown.ts
3330
- var import_zod27 = require("zod");
3331
- var PremiumBreakdownSchema = import_zod27.z.object({
3332
- premium: import_zod27.z.string().optional().describe("Total premium amount, e.g. '$5,000'"),
3333
- totalCost: import_zod27.z.string().optional().describe("Total cost including taxes and fees, e.g. '$5,250'"),
3334
- premiumBreakdown: import_zod27.z.array(
3335
- import_zod27.z.object({
3336
- line: import_zod27.z.string().describe("Coverage line name"),
3337
- amount: import_zod27.z.string().describe("Premium amount for this line")
3626
+ var import_zod28 = require("zod");
3627
+ var PremiumBreakdownSchema = import_zod28.z.object({
3628
+ premium: import_zod28.z.string().optional().describe("Total premium amount, e.g. '$5,000'"),
3629
+ totalCost: import_zod28.z.string().optional().describe("Total cost including taxes and fees, e.g. '$5,250'"),
3630
+ premiumBreakdown: import_zod28.z.array(
3631
+ import_zod28.z.object({
3632
+ line: import_zod28.z.string().describe("Coverage line name"),
3633
+ amount: import_zod28.z.string().describe("Premium amount for this line")
3338
3634
  })
3339
3635
  ).optional().describe("Per-coverage-line premium breakdown"),
3340
- taxesAndFees: import_zod27.z.array(
3341
- import_zod27.z.object({
3342
- name: import_zod27.z.string().describe("Fee or tax name"),
3343
- amount: import_zod27.z.string().describe("Dollar amount"),
3344
- type: import_zod27.z.enum(["tax", "fee", "surcharge", "assessment"]).optional().describe("Fee category")
3636
+ taxesAndFees: import_zod28.z.array(
3637
+ import_zod28.z.object({
3638
+ name: import_zod28.z.string().describe("Fee or tax name"),
3639
+ amount: import_zod28.z.string().describe("Dollar amount"),
3640
+ type: import_zod28.z.enum(["tax", "fee", "surcharge", "assessment"]).optional().describe("Fee category")
3345
3641
  })
3346
3642
  ).optional().describe("Taxes, fees, surcharges, and assessments"),
3347
- minimumPremium: import_zod27.z.string().optional().describe("Minimum premium if stated"),
3348
- depositPremium: import_zod27.z.string().optional().describe("Deposit premium if stated"),
3349
- paymentPlan: import_zod27.z.string().optional().describe("Payment plan description"),
3350
- auditType: import_zod27.z.enum(["annual", "semi_annual", "quarterly", "monthly", "final", "self"]).optional().describe("Premium audit type"),
3351
- ratingBasis: import_zod27.z.string().optional().describe("Rating basis, e.g. payroll, revenue, area, units")
3643
+ minimumPremium: import_zod28.z.string().optional().describe("Minimum premium if stated"),
3644
+ depositPremium: import_zod28.z.string().optional().describe("Deposit premium if stated"),
3645
+ paymentPlan: import_zod28.z.string().optional().describe("Payment plan description"),
3646
+ auditType: import_zod28.z.enum(["annual", "semi_annual", "quarterly", "monthly", "final", "self"]).optional().describe("Premium audit type"),
3647
+ ratingBasis: import_zod28.z.string().optional().describe("Rating basis, e.g. payroll, revenue, area, units")
3352
3648
  });
3353
3649
  function buildPremiumBreakdownPrompt() {
3354
3650
  return `You are an expert insurance document analyst. Extract all premium and cost information from this document.
@@ -3368,14 +3664,14 @@ Return JSON only.`;
3368
3664
  }
3369
3665
 
3370
3666
  // src/prompts/extractors/declarations.ts
3371
- var import_zod28 = require("zod");
3372
- var DeclarationsFieldSchema = import_zod28.z.object({
3373
- field: import_zod28.z.string().describe("Descriptive field name (e.g. 'policyNumber', 'effectiveDate', 'coverageALimit')"),
3374
- value: import_zod28.z.string().describe("Extracted value exactly as it appears in the document"),
3375
- section: import_zod28.z.string().optional().describe("Section or grouping this field belongs to (e.g. 'Coverage Limits', 'Vehicle Schedule')")
3667
+ var import_zod29 = require("zod");
3668
+ var DeclarationsFieldSchema = import_zod29.z.object({
3669
+ field: import_zod29.z.string().describe("Descriptive field name (e.g. 'policyNumber', 'effectiveDate', 'coverageALimit')"),
3670
+ value: import_zod29.z.string().describe("Extracted value exactly as it appears in the document"),
3671
+ section: import_zod29.z.string().optional().describe("Section or grouping this field belongs to (e.g. 'Coverage Limits', 'Vehicle Schedule')")
3376
3672
  });
3377
- var DeclarationsExtractSchema = import_zod28.z.object({
3378
- fields: import_zod28.z.array(DeclarationsFieldSchema).describe("All declarations page fields extracted as key-value pairs. Structure varies by line of business.")
3673
+ var DeclarationsExtractSchema = import_zod29.z.object({
3674
+ fields: import_zod29.z.array(DeclarationsFieldSchema).describe("All declarations page fields extracted as key-value pairs. Structure varies by line of business.")
3379
3675
  });
3380
3676
  function buildDeclarationsPrompt() {
3381
3677
  return `You are an expert insurance document analyst. Extract all declarations page data from this document into a flexible key-value structure.
@@ -3415,21 +3711,21 @@ Preserve original values exactly as they appear. Return JSON only.`;
3415
3711
  }
3416
3712
 
3417
3713
  // src/prompts/extractors/loss-history.ts
3418
- var import_zod29 = require("zod");
3419
- var LossHistorySchema = import_zod29.z.object({
3420
- lossSummary: import_zod29.z.string().optional().describe("Summary of loss history, e.g. '3 claims in past 5 years totaling $125,000'"),
3421
- individualClaims: import_zod29.z.array(
3422
- import_zod29.z.object({
3423
- date: import_zod29.z.string().optional().describe("Date of loss or claim"),
3424
- type: import_zod29.z.string().optional().describe("Type of claim, e.g. 'property damage', 'bodily injury'"),
3425
- description: import_zod29.z.string().optional().describe("Brief description of the claim"),
3426
- amountPaid: import_zod29.z.string().optional().describe("Amount paid"),
3427
- amountReserved: import_zod29.z.string().optional().describe("Amount reserved"),
3428
- status: import_zod29.z.enum(["open", "closed", "reopened"]).optional().describe("Claim status"),
3429
- claimNumber: import_zod29.z.string().optional().describe("Claim reference number")
3714
+ var import_zod30 = require("zod");
3715
+ var LossHistorySchema = import_zod30.z.object({
3716
+ lossSummary: import_zod30.z.string().optional().describe("Summary of loss history, e.g. '3 claims in past 5 years totaling $125,000'"),
3717
+ individualClaims: import_zod30.z.array(
3718
+ import_zod30.z.object({
3719
+ date: import_zod30.z.string().optional().describe("Date of loss or claim"),
3720
+ type: import_zod30.z.string().optional().describe("Type of claim, e.g. 'property damage', 'bodily injury'"),
3721
+ description: import_zod30.z.string().optional().describe("Brief description of the claim"),
3722
+ amountPaid: import_zod30.z.string().optional().describe("Amount paid"),
3723
+ amountReserved: import_zod30.z.string().optional().describe("Amount reserved"),
3724
+ status: import_zod30.z.enum(["open", "closed", "reopened"]).optional().describe("Claim status"),
3725
+ claimNumber: import_zod30.z.string().optional().describe("Claim reference number")
3430
3726
  })
3431
3727
  ).optional().describe("Individual claim records"),
3432
- experienceMod: import_zod29.z.string().optional().describe("Experience modification factor for workers comp, e.g. '0.85'")
3728
+ experienceMod: import_zod30.z.string().optional().describe("Experience modification factor for workers comp, e.g. '0.85'")
3433
3729
  });
3434
3730
  function buildLossHistoryPrompt() {
3435
3731
  return `You are an expert insurance document analyst. Extract all loss history and claims information from this document.
@@ -3446,18 +3742,18 @@ Return JSON only.`;
3446
3742
  }
3447
3743
 
3448
3744
  // src/prompts/extractors/sections.ts
3449
- var import_zod30 = require("zod");
3450
- var SubsectionSchema2 = import_zod30.z.object({
3451
- title: import_zod30.z.string().describe("Subsection title"),
3452
- sectionNumber: import_zod30.z.string().optional().describe("Subsection number"),
3453
- pageNumber: import_zod30.z.number().optional().describe("Page number"),
3454
- content: import_zod30.z.string().describe("Full verbatim text")
3745
+ var import_zod31 = require("zod");
3746
+ var SubsectionSchema2 = import_zod31.z.object({
3747
+ title: import_zod31.z.string().describe("Subsection title"),
3748
+ sectionNumber: import_zod31.z.string().optional().describe("Subsection number"),
3749
+ pageNumber: import_zod31.z.number().optional().describe("Page number"),
3750
+ content: import_zod31.z.string().describe("Full verbatim text")
3455
3751
  });
3456
- var SectionsSchema = import_zod30.z.object({
3457
- sections: import_zod30.z.array(
3458
- import_zod30.z.object({
3459
- title: import_zod30.z.string().describe("Section title"),
3460
- type: import_zod30.z.enum([
3752
+ var SectionsSchema = import_zod31.z.object({
3753
+ sections: import_zod31.z.array(
3754
+ import_zod31.z.object({
3755
+ title: import_zod31.z.string().describe("Section title"),
3756
+ type: import_zod31.z.enum([
3461
3757
  "declarations",
3462
3758
  "insuring_agreement",
3463
3759
  "policy_form",
@@ -3471,10 +3767,10 @@ var SectionsSchema = import_zod30.z.object({
3471
3767
  "regulatory",
3472
3768
  "other"
3473
3769
  ]).describe("Section type classification"),
3474
- content: import_zod30.z.string().describe("Full verbatim text of the section"),
3475
- pageStart: import_zod30.z.number().describe("Starting page number"),
3476
- pageEnd: import_zod30.z.number().optional().describe("Ending page number"),
3477
- subsections: import_zod30.z.array(SubsectionSchema2).optional().describe("Subsections within this section")
3770
+ content: import_zod31.z.string().describe("Full verbatim text of the section"),
3771
+ pageStart: import_zod31.z.number().describe("Starting page number"),
3772
+ pageEnd: import_zod31.z.number().optional().describe("Ending page number"),
3773
+ subsections: import_zod31.z.array(SubsectionSchema2).optional().describe("Subsections within this section")
3478
3774
  })
3479
3775
  ).describe("All document sections")
3480
3776
  });
@@ -3493,25 +3789,31 @@ For each section, classify its type:
3493
3789
  - "other" \u2014 anything that doesn't fit the above categories
3494
3790
 
3495
3791
  Include accurate page numbers for every section. Include subsections only if the section has clearly defined subsections with their own titles.
3792
+ If a page begins or ends in the middle of a section, treat it as a continuation of the existing section instead of creating a new orphan section from the fragment.
3793
+
3794
+ Critical rules:
3795
+ - Ignore table-of-contents entries, page-number references, repeating headers/footers, and other navigational artifacts.
3796
+ - Do not create a new section from a lone continuation fragment such as a single paragraph tail or list item that clearly belongs to the previous page's section.
3797
+ - When a section spans multiple pages, keep it as one section with pageStart/pageEnd covering the full span represented in this extraction.
3496
3798
 
3497
3799
  Return JSON only.`;
3498
3800
  }
3499
3801
 
3500
3802
  // src/prompts/extractors/supplementary.ts
3501
- var import_zod31 = require("zod");
3502
- var ContactSchema2 = import_zod31.z.object({
3503
- name: import_zod31.z.string().optional().describe("Organization or person name"),
3504
- phone: import_zod31.z.string().optional().describe("Phone number"),
3505
- email: import_zod31.z.string().optional().describe("Email address"),
3506
- address: import_zod31.z.string().optional().describe("Mailing address"),
3507
- type: import_zod31.z.string().optional().describe("Contact type, e.g. 'State Department of Insurance'")
3803
+ var import_zod32 = require("zod");
3804
+ var ContactSchema2 = import_zod32.z.object({
3805
+ name: import_zod32.z.string().optional().describe("Organization or person name"),
3806
+ phone: import_zod32.z.string().optional().describe("Phone number"),
3807
+ email: import_zod32.z.string().optional().describe("Email address"),
3808
+ address: import_zod32.z.string().optional().describe("Mailing address"),
3809
+ type: import_zod32.z.string().optional().describe("Contact type, e.g. 'State Department of Insurance'")
3508
3810
  });
3509
- var SupplementarySchema = import_zod31.z.object({
3510
- regulatoryContacts: import_zod31.z.array(ContactSchema2).optional().describe("Regulatory body contacts (state department of insurance, ombudsman)"),
3511
- claimsContacts: import_zod31.z.array(ContactSchema2).optional().describe("Claims reporting contacts and instructions"),
3512
- thirdPartyAdministrators: import_zod31.z.array(ContactSchema2).optional().describe("Third-party administrators for claims handling"),
3513
- cancellationNoticeDays: import_zod31.z.number().optional().describe("Required notice period for cancellation in days"),
3514
- nonrenewalNoticeDays: import_zod31.z.number().optional().describe("Required notice period for nonrenewal in days")
3811
+ var SupplementarySchema = import_zod32.z.object({
3812
+ regulatoryContacts: import_zod32.z.array(ContactSchema2).optional().describe("Regulatory body contacts (state department of insurance, ombudsman)"),
3813
+ claimsContacts: import_zod32.z.array(ContactSchema2).optional().describe("Claims reporting contacts and instructions"),
3814
+ thirdPartyAdministrators: import_zod32.z.array(ContactSchema2).optional().describe("Third-party administrators for claims handling"),
3815
+ cancellationNoticeDays: import_zod32.z.number().optional().describe("Required notice period for cancellation in days"),
3816
+ nonrenewalNoticeDays: import_zod32.z.number().optional().describe("Required notice period for nonrenewal in days")
3515
3817
  });
3516
3818
  function buildSupplementaryPrompt() {
3517
3819
  return `You are an expert insurance document analyst. Extract supplementary and regulatory information from this document.
@@ -3548,6 +3850,313 @@ function getExtractor(name) {
3548
3850
  return EXTRACTORS[name];
3549
3851
  }
3550
3852
 
3853
+ // src/core/quality.ts
3854
+ function evaluateQualityGate(params) {
3855
+ const { issues, hasRoundWarnings = false } = params;
3856
+ const hasBlocking = issues.some((issue) => issue.severity === "blocking");
3857
+ const hasWarnings = issues.some((issue) => issue.severity === "warning") || hasRoundWarnings;
3858
+ return hasBlocking ? "failed" : hasWarnings ? "warning" : "passed";
3859
+ }
3860
+ function shouldFailQualityGate(mode, status) {
3861
+ return mode === "strict" && status === "failed";
3862
+ }
3863
+
3864
+ // src/extraction/quality.ts
3865
+ function normalizeFormNumber(value) {
3866
+ if (typeof value !== "string") return void 0;
3867
+ const trimmed = value.trim();
3868
+ if (!trimmed) return void 0;
3869
+ return trimmed;
3870
+ }
3871
+ function addFormEntry(inventory, formNumber, source, extra) {
3872
+ if (!formNumber) return;
3873
+ const existing = inventory.get(formNumber);
3874
+ if (existing) {
3875
+ if (!existing.title && extra?.title) existing.title = extra.title;
3876
+ if (!existing.pageStart && extra?.pageStart) existing.pageStart = extra.pageStart;
3877
+ if (!existing.pageEnd && extra?.pageEnd) existing.pageEnd = extra.pageEnd;
3878
+ if (!existing.sources.includes(source)) existing.sources.push(source);
3879
+ return;
3880
+ }
3881
+ inventory.set(formNumber, {
3882
+ formNumber,
3883
+ title: extra?.title,
3884
+ pageStart: extra?.pageStart,
3885
+ pageEnd: extra?.pageEnd,
3886
+ sources: [source]
3887
+ });
3888
+ }
3889
+ function looksReferential(value) {
3890
+ if (typeof value !== "string") return false;
3891
+ const normalized = value.toLowerCase();
3892
+ return normalized.includes("shown in the declarations") || normalized.includes("shown in declarations") || normalized.includes("shown in the schedule") || normalized.includes("as stated") || normalized.includes("if applicable");
3893
+ }
3894
+ function looksTocArtifact(value) {
3895
+ if (typeof value !== "string") return false;
3896
+ return /\.{4,}\d{1,3}$/.test(value.trim()) || /^\d+\.\s+[A-Z][\s\S]*\.{3,}\d{1,3}$/.test(value.trim());
3897
+ }
3898
+ function sourcePrecedence(sectionRef) {
3899
+ if (typeof sectionRef !== "string") return 0;
3900
+ const normalized = sectionRef.toLowerCase();
3901
+ if (normalized.includes("declaration") || normalized.includes("scheduled coverages") || normalized.includes("schedule")) return 4;
3902
+ if (normalized.includes("endorsement")) return 3;
3903
+ if (normalized.includes("additional coverages")) return 2;
3904
+ if (normalized.includes("coverage form") || normalized.includes("policy form")) return 1;
3905
+ return 0;
3906
+ }
3907
+ function buildExtractionReviewReport(params) {
3908
+ const { memory, reviewRounds } = params;
3909
+ const deterministicIssues = [];
3910
+ const inventory = /* @__PURE__ */ new Map();
3911
+ const extractedFormInventory = memory.get("form_inventory")?.forms ?? [];
3912
+ const coverages = memory.get("coverage_limits")?.coverages ?? [];
3913
+ const endorsements = memory.get("endorsements")?.endorsements ?? [];
3914
+ const exclusions = memory.get("exclusions")?.exclusions ?? [];
3915
+ const conditions = memory.get("conditions")?.conditions ?? [];
3916
+ const sections = memory.get("sections")?.sections ?? [];
3917
+ for (const form of extractedFormInventory) {
3918
+ addFormEntry(
3919
+ inventory,
3920
+ normalizeFormNumber(form.formNumber),
3921
+ "form_inventory",
3922
+ {
3923
+ title: form.title,
3924
+ pageStart: form.pageStart,
3925
+ pageEnd: form.pageEnd
3926
+ }
3927
+ );
3928
+ }
3929
+ for (const endorsement of endorsements) {
3930
+ addFormEntry(
3931
+ inventory,
3932
+ normalizeFormNumber(endorsement.formNumber),
3933
+ "endorsements",
3934
+ {
3935
+ title: typeof endorsement.title === "string" ? endorsement.title : void 0,
3936
+ pageStart: typeof endorsement.pageStart === "number" ? endorsement.pageStart : void 0,
3937
+ pageEnd: typeof endorsement.pageEnd === "number" ? endorsement.pageEnd : void 0
3938
+ }
3939
+ );
3940
+ if (typeof endorsement.formNumber !== "string" || !endorsement.formNumber.trim()) {
3941
+ deterministicIssues.push({
3942
+ code: "endorsement_missing_form_number",
3943
+ severity: "blocking",
3944
+ message: "Endorsement is missing formNumber.",
3945
+ extractorName: "endorsements",
3946
+ pageNumber: typeof endorsement.pageStart === "number" ? endorsement.pageStart : void 0,
3947
+ itemName: typeof endorsement.title === "string" ? endorsement.title : void 0
3948
+ });
3949
+ }
3950
+ const endorsementFormNumber = normalizeFormNumber(endorsement.formNumber);
3951
+ if (endorsementFormNumber && !inventory.has(endorsementFormNumber)) {
3952
+ deterministicIssues.push({
3953
+ code: "endorsement_form_missing_from_inventory",
3954
+ severity: "warning",
3955
+ message: `Endorsement "${String(endorsement.title ?? endorsementFormNumber)}" is not present in form inventory.`,
3956
+ extractorName: "endorsements",
3957
+ formNumber: endorsementFormNumber,
3958
+ pageNumber: typeof endorsement.pageStart === "number" ? endorsement.pageStart : void 0,
3959
+ itemName: typeof endorsement.title === "string" ? endorsement.title : void 0
3960
+ });
3961
+ }
3962
+ }
3963
+ for (const coverage of coverages) {
3964
+ const formNumber = normalizeFormNumber(coverage.formNumber);
3965
+ addFormEntry(inventory, formNumber, "coverage_limits", {
3966
+ title: typeof coverage.name === "string" ? coverage.name : void 0,
3967
+ pageStart: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
3968
+ pageEnd: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0
3969
+ });
3970
+ if (typeof coverage.name === "string" && /coverage form$/i.test(coverage.name.trim())) {
3971
+ deterministicIssues.push({
3972
+ code: "generic_form_row_as_coverage",
3973
+ severity: "blocking",
3974
+ message: `Coverage "${coverage.name}" looks like a form header rather than a real coverage row.`,
3975
+ extractorName: "coverage_limits",
3976
+ formNumber,
3977
+ pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
3978
+ itemName: coverage.name
3979
+ });
3980
+ }
3981
+ if (typeof coverage.pageNumber !== "number") {
3982
+ deterministicIssues.push({
3983
+ code: "coverage_missing_page_number",
3984
+ severity: "warning",
3985
+ message: `Coverage "${String(coverage.name ?? "unknown")}" is missing pageNumber provenance.`,
3986
+ extractorName: "coverage_limits",
3987
+ formNumber,
3988
+ itemName: typeof coverage.name === "string" ? coverage.name : void 0
3989
+ });
3990
+ }
3991
+ if (typeof coverage.sectionRef !== "string" || !coverage.sectionRef.trim()) {
3992
+ deterministicIssues.push({
3993
+ code: "coverage_missing_section_ref",
3994
+ severity: "warning",
3995
+ message: `Coverage "${String(coverage.name ?? "unknown")}" is missing sectionRef provenance.`,
3996
+ extractorName: "coverage_limits",
3997
+ formNumber,
3998
+ pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
3999
+ itemName: typeof coverage.name === "string" ? coverage.name : void 0
4000
+ });
4001
+ }
4002
+ if (typeof coverage.originalContent !== "string" || !coverage.originalContent.trim()) {
4003
+ deterministicIssues.push({
4004
+ code: "coverage_missing_original_content",
4005
+ severity: "warning",
4006
+ message: `Coverage "${String(coverage.name ?? "unknown")}" is missing originalContent source text.`,
4007
+ extractorName: "coverage_limits",
4008
+ formNumber,
4009
+ pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
4010
+ itemName: typeof coverage.name === "string" ? coverage.name : void 0
4011
+ });
4012
+ }
4013
+ if (looksReferential(coverage.limit) || looksReferential(coverage.deductible)) {
4014
+ deterministicIssues.push({
4015
+ code: "coverage_referential_value",
4016
+ severity: "warning",
4017
+ message: `Coverage "${String(coverage.name ?? "unknown")}" contains referential language instead of a concrete scheduled term.`,
4018
+ extractorName: "coverage_limits",
4019
+ formNumber,
4020
+ pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
4021
+ itemName: typeof coverage.name === "string" ? coverage.name : void 0
4022
+ });
4023
+ }
4024
+ if (formNumber && !inventory.has(formNumber)) {
4025
+ deterministicIssues.push({
4026
+ code: "coverage_form_missing_from_inventory",
4027
+ severity: "warning",
4028
+ message: `Coverage "${String(coverage.name ?? "unknown")}" references form "${formNumber}" that is missing from form inventory.`,
4029
+ extractorName: "coverage_limits",
4030
+ formNumber,
4031
+ pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
4032
+ itemName: typeof coverage.name === "string" ? coverage.name : void 0
4033
+ });
4034
+ }
4035
+ }
4036
+ const coverageGroups = /* @__PURE__ */ new Map();
4037
+ for (const coverage of coverages) {
4038
+ const key = [
4039
+ String(coverage.name ?? "").toLowerCase(),
4040
+ String(coverage.formNumber ?? "").toLowerCase()
4041
+ ].join("|");
4042
+ coverageGroups.set(key, [...coverageGroups.get(key) ?? [], coverage]);
4043
+ }
4044
+ for (const [key, groupedCoverages] of coverageGroups.entries()) {
4045
+ if (groupedCoverages.length < 2) continue;
4046
+ const sorted = [...groupedCoverages].sort((a, b) => sourcePrecedence(b.sectionRef) - sourcePrecedence(a.sectionRef));
4047
+ const highest = sorted[0];
4048
+ for (const lower of sorted.slice(1)) {
4049
+ const highestLimit = String(highest.limit ?? "").trim();
4050
+ const lowerLimit = String(lower.limit ?? "").trim();
4051
+ const highestDeductible = String(highest.deductible ?? "").trim();
4052
+ const lowerDeductible = String(lower.deductible ?? "").trim();
4053
+ if (highestLimit && lowerLimit && highestLimit !== lowerLimit || highestDeductible && lowerDeductible && highestDeductible !== lowerDeductible) {
4054
+ deterministicIssues.push({
4055
+ code: "coverage_precedence_conflict",
4056
+ severity: "warning",
4057
+ message: `Coverage "${String(highest.name ?? key)}" has conflicting extracted terms across sources with different precedence.`,
4058
+ extractorName: "coverage_limits",
4059
+ formNumber: normalizeFormNumber(highest.formNumber) ?? normalizeFormNumber(lower.formNumber),
4060
+ pageNumber: typeof lower.pageNumber === "number" ? lower.pageNumber : void 0,
4061
+ itemName: typeof highest.name === "string" ? highest.name : void 0
4062
+ });
4063
+ }
4064
+ }
4065
+ }
4066
+ for (const exclusion of exclusions) {
4067
+ addFormEntry(inventory, normalizeFormNumber(exclusion.formNumber), "exclusions", {
4068
+ title: typeof exclusion.name === "string" ? exclusion.name : void 0,
4069
+ pageStart: typeof exclusion.pageNumber === "number" ? exclusion.pageNumber : void 0,
4070
+ pageEnd: typeof exclusion.pageNumber === "number" ? exclusion.pageNumber : void 0
4071
+ });
4072
+ if (typeof exclusion.pageNumber !== "number") {
4073
+ deterministicIssues.push({
4074
+ code: "exclusion_missing_page_number",
4075
+ severity: "warning",
4076
+ message: `Exclusion "${String(exclusion.name ?? "unknown")}" is missing pageNumber provenance.`,
4077
+ extractorName: "exclusions",
4078
+ formNumber: normalizeFormNumber(exclusion.formNumber),
4079
+ itemName: typeof exclusion.name === "string" ? exclusion.name : void 0
4080
+ });
4081
+ }
4082
+ if (looksTocArtifact(exclusion.content)) {
4083
+ deterministicIssues.push({
4084
+ code: "exclusion_toc_artifact",
4085
+ severity: "blocking",
4086
+ message: `Exclusion "${String(exclusion.name ?? "unknown")}" appears to be a table-of-contents artifact.`,
4087
+ extractorName: "exclusions",
4088
+ pageNumber: typeof exclusion.pageNumber === "number" ? exclusion.pageNumber : void 0,
4089
+ itemName: typeof exclusion.name === "string" ? exclusion.name : void 0
4090
+ });
4091
+ }
4092
+ }
4093
+ for (const condition of conditions) {
4094
+ if (typeof condition.pageNumber !== "number") {
4095
+ deterministicIssues.push({
4096
+ code: "condition_missing_page_number",
4097
+ severity: "warning",
4098
+ message: `Condition "${String(condition.name ?? "unknown")}" is missing pageNumber provenance.`,
4099
+ extractorName: "conditions",
4100
+ itemName: typeof condition.name === "string" ? condition.name : void 0
4101
+ });
4102
+ }
4103
+ if (looksTocArtifact(condition.content)) {
4104
+ deterministicIssues.push({
4105
+ code: "condition_toc_artifact",
4106
+ severity: "blocking",
4107
+ message: `Condition "${String(condition.name ?? "unknown")}" appears to be a table-of-contents artifact.`,
4108
+ extractorName: "conditions",
4109
+ pageNumber: typeof condition.pageNumber === "number" ? condition.pageNumber : void 0,
4110
+ itemName: typeof condition.name === "string" ? condition.name : void 0
4111
+ });
4112
+ }
4113
+ }
4114
+ for (const section of sections) {
4115
+ if (typeof section.content === "string" && section.content.trim().length < 120 && typeof section.pageStart === "number" && (!("pageEnd" in section) || section.pageEnd === section.pageStart || section.pageEnd === void 0)) {
4116
+ deterministicIssues.push({
4117
+ code: "section_short_fragment",
4118
+ severity: "warning",
4119
+ message: `Section "${String(section.title ?? "unknown")}" may be an orphan continuation fragment.`,
4120
+ extractorName: "sections",
4121
+ pageNumber: typeof section.pageStart === "number" ? section.pageStart : void 0,
4122
+ itemName: typeof section.title === "string" ? section.title : void 0
4123
+ });
4124
+ }
4125
+ }
4126
+ const formInventory = [...inventory.values()].sort((a, b) => a.formNumber.localeCompare(b.formNumber));
4127
+ const rounds = reviewRounds.map((round) => ({
4128
+ round: round.round,
4129
+ kind: "llm_review",
4130
+ status: round.complete && round.qualityIssues.length === 0 ? "passed" : "warning",
4131
+ summary: round.qualityIssues[0] ?? (round.complete ? "Review passed." : "Review requested follow-up extraction.")
4132
+ }));
4133
+ const artifacts = [
4134
+ { kind: "form_inventory", label: "Form Inventory", itemCount: formInventory.length },
4135
+ { kind: "page_map", label: "Page Map", itemCount: params.pageAssignments.length }
4136
+ ];
4137
+ const qualityGateStatus = evaluateQualityGate({
4138
+ issues: deterministicIssues,
4139
+ hasRoundWarnings: reviewRounds.some((round) => round.qualityIssues.length > 0 || !round.complete)
4140
+ });
4141
+ return {
4142
+ issues: deterministicIssues,
4143
+ rounds,
4144
+ artifacts,
4145
+ reviewRoundRecords: reviewRounds,
4146
+ formInventory,
4147
+ qualityGateStatus
4148
+ };
4149
+ }
4150
+ function toReviewRoundRecord(round, review) {
4151
+ return {
4152
+ round,
4153
+ complete: review.complete,
4154
+ missingFields: review.missingFields,
4155
+ qualityIssues: review.qualityIssues ?? [],
4156
+ additionalTasks: review.additionalTasks
4157
+ };
4158
+ }
4159
+
3551
4160
  // src/extraction/coordinator.ts
3552
4161
  function createExtractor(config) {
3553
4162
  const {
@@ -3560,21 +4169,174 @@ function createExtractor(config) {
3560
4169
  onProgress,
3561
4170
  log,
3562
4171
  providerOptions,
4172
+ qualityGate = "warn",
3563
4173
  onCheckpointSave
3564
4174
  } = config;
3565
4175
  const limit = pLimit(concurrency);
3566
4176
  let totalUsage = { inputTokens: 0, outputTokens: 0 };
4177
+ let modelCalls = 0;
4178
+ let callsWithUsage = 0;
4179
+ let callsMissingUsage = 0;
3567
4180
  function trackUsage(usage) {
4181
+ modelCalls += 1;
3568
4182
  if (usage) {
4183
+ callsWithUsage += 1;
3569
4184
  totalUsage.inputTokens += usage.inputTokens;
3570
4185
  totalUsage.outputTokens += usage.outputTokens;
3571
4186
  onTokenUsage?.(usage);
4187
+ } else {
4188
+ callsMissingUsage += 1;
4189
+ }
4190
+ }
4191
+ function mergeMemoryResult(name, data, memory) {
4192
+ const existing = memory.get(name);
4193
+ memory.set(name, mergeExtractorResult(name, existing, data));
4194
+ }
4195
+ function summarizeExtraction(memory) {
4196
+ const coverageResult = memory.get("coverage_limits");
4197
+ const declarationResult = memory.get("declarations");
4198
+ const endorsementResult = memory.get("endorsements");
4199
+ const exclusionResult = memory.get("exclusions");
4200
+ const conditionResult = memory.get("conditions");
4201
+ const sectionResult = memory.get("sections");
4202
+ const coverageSummary = Array.isArray(coverageResult?.coverages) ? coverageResult.coverages.slice(0, 12).map((coverage) => ({
4203
+ name: coverage.name,
4204
+ limit: coverage.limit,
4205
+ deductible: coverage.deductible,
4206
+ formNumber: coverage.formNumber
4207
+ })) : [];
4208
+ return JSON.stringify({
4209
+ extractedKeys: [...memory.keys()].filter((key) => key !== "classify"),
4210
+ declarationFieldCount: Array.isArray(declarationResult?.fields) ? declarationResult.fields.length : 0,
4211
+ coverageCount: Array.isArray(coverageResult?.coverages) ? coverageResult.coverages.length : 0,
4212
+ coverageSamples: coverageSummary,
4213
+ endorsementCount: Array.isArray(endorsementResult?.endorsements) ? endorsementResult.endorsements.length : 0,
4214
+ exclusionCount: Array.isArray(exclusionResult?.exclusions) ? exclusionResult.exclusions.length : 0,
4215
+ conditionCount: Array.isArray(conditionResult?.conditions) ? conditionResult.conditions.length : 0,
4216
+ sectionCount: Array.isArray(sectionResult?.sections) ? sectionResult.sections.length : 0
4217
+ }, null, 2);
4218
+ }
4219
+ function formatPageMapSummary(pageAssignments) {
4220
+ const extractorPages = /* @__PURE__ */ new Map();
4221
+ for (const assignment of pageAssignments) {
4222
+ for (const extractorName of assignment.extractorNames) {
4223
+ extractorPages.set(extractorName, [...extractorPages.get(extractorName) ?? [], assignment.localPageNumber]);
4224
+ }
3572
4225
  }
4226
+ if (extractorPages.size === 0) return "No page assignments available.";
4227
+ return [...extractorPages.entries()].map(([extractorName, pages]) => `${extractorName}: pages ${pages.join(", ")}`).join("\n");
4228
+ }
4229
+ function normalizePageAssignments(pageAssignments, formInventory) {
4230
+ const pageFormTypes = /* @__PURE__ */ new Map();
4231
+ if (formInventory) {
4232
+ for (const form of formInventory.forms) {
4233
+ if (form.pageStart != null) {
4234
+ const end = form.pageEnd ?? form.pageStart;
4235
+ for (let p = form.pageStart; p <= end; p++) {
4236
+ const types = pageFormTypes.get(p) ?? /* @__PURE__ */ new Set();
4237
+ types.add(form.formType);
4238
+ pageFormTypes.set(p, types);
4239
+ }
4240
+ }
4241
+ }
4242
+ }
4243
+ return pageAssignments.map((assignment) => {
4244
+ let extractorNames = [...new Set(
4245
+ (assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"]).filter(Boolean)
4246
+ )];
4247
+ const hasDeclarations = extractorNames.includes("declarations");
4248
+ const hasConditions = extractorNames.includes("conditions");
4249
+ const hasExclusions = extractorNames.includes("exclusions");
4250
+ const hasEndorsements = extractorNames.includes("endorsements");
4251
+ const looksLikeScheduleValues = assignment.hasScheduleValues === true;
4252
+ const roleBlocksCoverageLimits = assignment.pageRole === "policy_form" || assignment.pageRole === "condition_exclusion_form" || assignment.pageRole === "endorsement_form";
4253
+ const inventoryTypes = pageFormTypes.get(assignment.localPageNumber);
4254
+ const inventoryBlocksCoverageLimits = inventoryTypes != null && !looksLikeScheduleValues && !hasDeclarations && (inventoryTypes.has("endorsement") || inventoryTypes.has("notice") || inventoryTypes.has("application"));
4255
+ if (extractorNames.includes("coverage_limits")) {
4256
+ const shouldDropCoverageLimits = inventoryBlocksCoverageLimits || !looksLikeScheduleValues && roleBlocksCoverageLimits || !hasDeclarations && !looksLikeScheduleValues && (hasConditions || hasExclusions) || !hasDeclarations && !looksLikeScheduleValues && hasEndorsements;
4257
+ if (shouldDropCoverageLimits) {
4258
+ extractorNames = extractorNames.filter((name) => name !== "coverage_limits");
4259
+ }
4260
+ }
4261
+ if (inventoryTypes?.has("endorsement") && !extractorNames.includes("endorsements")) {
4262
+ extractorNames = [...extractorNames, "endorsements"];
4263
+ }
4264
+ if (extractorNames.length === 0) {
4265
+ extractorNames = ["sections"];
4266
+ }
4267
+ return {
4268
+ ...assignment,
4269
+ extractorNames
4270
+ };
4271
+ });
4272
+ }
4273
+ function buildTemplateHints(primaryType, documentType, pageCount, template) {
4274
+ return [
4275
+ `Document type: ${primaryType} ${documentType}`,
4276
+ `Expected sections: ${template.expectedSections.join(", ")}`,
4277
+ `Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
4278
+ `Total pages: ${pageCount}`
4279
+ ].join("\n");
4280
+ }
4281
+ function groupContiguousPages(pages) {
4282
+ if (pages.length === 0) return [];
4283
+ const sorted = [...new Set(pages)].sort((a, b) => a - b);
4284
+ const ranges = [];
4285
+ let start = sorted[0];
4286
+ let previous = sorted[0];
4287
+ for (let i = 1; i < sorted.length; i += 1) {
4288
+ const current = sorted[i];
4289
+ if (current === previous + 1) {
4290
+ previous = current;
4291
+ continue;
4292
+ }
4293
+ ranges.push({ startPage: start, endPage: previous });
4294
+ start = current;
4295
+ previous = current;
4296
+ }
4297
+ ranges.push({ startPage: start, endPage: previous });
4298
+ return ranges;
4299
+ }
4300
+ function buildPlanFromPageAssignments(pageAssignments, pageCount) {
4301
+ const extractorPages = /* @__PURE__ */ new Map();
4302
+ for (const assignment of pageAssignments) {
4303
+ const extractors = assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"];
4304
+ for (const extractorName of extractors) {
4305
+ extractorPages.set(extractorName, [...extractorPages.get(extractorName) ?? [], assignment.localPageNumber]);
4306
+ }
4307
+ }
4308
+ const coveredPages = /* @__PURE__ */ new Set();
4309
+ for (const pages of extractorPages.values()) {
4310
+ for (const page of pages) coveredPages.add(page);
4311
+ }
4312
+ for (let page = 1; page <= pageCount; page += 1) {
4313
+ if (!coveredPages.has(page)) {
4314
+ extractorPages.set("sections", [...extractorPages.get("sections") ?? [], page]);
4315
+ }
4316
+ }
4317
+ const tasks = [...extractorPages.entries()].flatMap(
4318
+ ([extractorName, pages]) => groupContiguousPages(pages).map(({ startPage, endPage }) => ({
4319
+ extractorName,
4320
+ startPage,
4321
+ endPage,
4322
+ description: `Page-mapped ${extractorName} extraction for pages ${startPage}-${endPage}`
4323
+ }))
4324
+ ).sort((a, b) => a.startPage - b.startPage || a.extractorName.localeCompare(b.extractorName));
4325
+ return {
4326
+ tasks,
4327
+ pageMap: [...extractorPages.entries()].map(([section, pages]) => ({
4328
+ section,
4329
+ pages: `pages ${[...new Set(pages)].sort((a, b) => a - b).join(", ")}`
4330
+ }))
4331
+ };
3573
4332
  }
3574
4333
  async function extract(pdfBase64, documentId, options) {
3575
4334
  const id = documentId ?? `doc-${Date.now()}`;
3576
4335
  const memory = /* @__PURE__ */ new Map();
3577
4336
  totalUsage = { inputTokens: 0, outputTokens: 0 };
4337
+ modelCalls = 0;
4338
+ callsWithUsage = 0;
4339
+ callsMissingUsage = 0;
3578
4340
  const pipelineCtx = createPipelineContext({
3579
4341
  id,
3580
4342
  onSave: onCheckpointSave,
@@ -3625,40 +4387,109 @@ function createExtractor(config) {
3625
4387
  const primaryType = policyTypes[0] ?? "other";
3626
4388
  const template = getTemplate(primaryType);
3627
4389
  const pageCount = resumed?.pageCount ?? await getPdfPageCount(pdfBase64);
3628
- let plan;
3629
- if (resumed?.plan && pipelineCtx.isPhaseComplete("plan")) {
3630
- plan = resumed.plan;
3631
- onProgress?.("Resuming from checkpoint (plan complete)...");
4390
+ const templateHints = buildTemplateHints(primaryType, documentType, pageCount, template);
4391
+ let formInventory;
4392
+ if (resumed?.formInventory && pipelineCtx.isPhaseComplete("form_inventory")) {
4393
+ formInventory = resumed.formInventory;
4394
+ memory.set("form_inventory", formInventory);
4395
+ onProgress?.("Resuming from checkpoint (form inventory complete)...");
3632
4396
  } else {
3633
- onProgress?.(`Planning extraction for ${primaryType} ${documentType}...`);
3634
- const templateHints = [
3635
- `Document type: ${primaryType} ${documentType}`,
3636
- `Expected sections: ${template.expectedSections.join(", ")}`,
3637
- `Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
3638
- `Total pages: ${pageCount}`
3639
- ].join("\n");
3640
- const planResponse = await safeGenerateObject(
4397
+ onProgress?.(`Building form inventory for ${primaryType} ${documentType}...`);
4398
+ const formInventoryResponse = await safeGenerateObject(
3641
4399
  generateObject,
3642
4400
  {
3643
- prompt: buildPlanPrompt(templateHints),
3644
- schema: ExtractionPlanSchema,
4401
+ prompt: buildFormInventoryPrompt(templateHints),
4402
+ schema: FormInventorySchema,
3645
4403
  maxTokens: 2048,
3646
4404
  providerOptions: { ...providerOptions, pdfBase64 }
3647
4405
  },
3648
4406
  {
3649
- fallback: {
3650
- tasks: [{ extractorName: "sections", startPage: 1, endPage: pageCount, description: "Full document fallback extraction" }]
3651
- },
4407
+ fallback: { forms: [] },
3652
4408
  log,
3653
- onError: (err, attempt) => log?.(`Plan attempt ${attempt + 1} failed: ${err}`)
4409
+ onError: (err, attempt) => log?.(`Form inventory attempt ${attempt + 1} failed: ${err instanceof Error ? err.message : String(err)}`)
3654
4410
  }
3655
4411
  );
3656
- trackUsage(planResponse.usage);
3657
- plan = planResponse.object;
4412
+ trackUsage(formInventoryResponse.usage);
4413
+ formInventory = formInventoryResponse.object;
4414
+ memory.set("form_inventory", formInventory);
4415
+ await pipelineCtx.save("form_inventory", {
4416
+ id,
4417
+ pageCount,
4418
+ classifyResult,
4419
+ formInventory,
4420
+ memory: Object.fromEntries(memory)
4421
+ });
4422
+ }
4423
+ let pageAssignments;
4424
+ if (resumed?.pageAssignments && pipelineCtx.isPhaseComplete("page_map")) {
4425
+ pageAssignments = resumed.pageAssignments;
4426
+ onProgress?.("Resuming from checkpoint (page map complete)...");
4427
+ } else {
4428
+ onProgress?.(`Mapping document pages for ${primaryType} ${documentType}...`);
4429
+ const chunkSize = 8;
4430
+ const collectedAssignments = [];
4431
+ const formInventoryHint = formInventory?.forms.length ? formatFormInventoryForPageMap(formInventory.forms) : void 0;
4432
+ for (let startPage = 1; startPage <= pageCount; startPage += chunkSize) {
4433
+ const endPage = Math.min(pageCount, startPage + chunkSize - 1);
4434
+ const pagesPdf = await extractPageRange(pdfBase64, startPage, endPage);
4435
+ const mapResponse = await safeGenerateObject(
4436
+ generateObject,
4437
+ {
4438
+ prompt: buildPageMapPrompt(templateHints, startPage, endPage, formInventoryHint),
4439
+ schema: PageMapChunkSchema,
4440
+ maxTokens: 2048,
4441
+ providerOptions: { ...providerOptions, pdfBase64: pagesPdf }
4442
+ },
4443
+ {
4444
+ fallback: {
4445
+ pages: Array.from({ length: endPage - startPage + 1 }, (_, index) => ({
4446
+ localPageNumber: index + 1,
4447
+ extractorNames: index === 0 && startPage === 1 ? ["carrier_info", "named_insured", "declarations", "coverage_limits"] : ["sections"],
4448
+ confidence: 0,
4449
+ notes: "Fallback page assignment"
4450
+ }))
4451
+ },
4452
+ log,
4453
+ onError: (err, attempt) => log?.(`Page map attempt ${attempt + 1} failed for pages ${startPage}-${endPage}: ${err}`)
4454
+ }
4455
+ );
4456
+ trackUsage(mapResponse.usage);
4457
+ for (const assignment of mapResponse.object.pages) {
4458
+ collectedAssignments.push({
4459
+ ...assignment,
4460
+ localPageNumber: startPage + assignment.localPageNumber - 1
4461
+ });
4462
+ }
4463
+ }
4464
+ pageAssignments = collectedAssignments.length > 0 ? collectedAssignments : Array.from({ length: pageCount }, (_, index) => ({
4465
+ localPageNumber: index + 1,
4466
+ extractorNames: index === 0 ? ["carrier_info", "named_insured", "declarations", "coverage_limits"] : ["sections"],
4467
+ confidence: 0,
4468
+ notes: "Full-document fallback page assignment"
4469
+ }));
4470
+ pageAssignments = normalizePageAssignments(pageAssignments, formInventory);
4471
+ await pipelineCtx.save("page_map", {
4472
+ id,
4473
+ pageCount,
4474
+ classifyResult,
4475
+ formInventory,
4476
+ pageAssignments,
4477
+ memory: Object.fromEntries(memory)
4478
+ });
4479
+ }
4480
+ let plan;
4481
+ if (resumed?.plan && pipelineCtx.isPhaseComplete("plan")) {
4482
+ plan = resumed.plan;
4483
+ onProgress?.("Resuming from checkpoint (plan complete)...");
4484
+ } else {
4485
+ onProgress?.(`Building extraction plan from page map for ${primaryType} ${documentType}...`);
4486
+ plan = buildPlanFromPageAssignments(pageAssignments, pageCount);
3658
4487
  await pipelineCtx.save("plan", {
3659
4488
  id,
3660
4489
  pageCount,
3661
4490
  classifyResult,
4491
+ formInventory,
4492
+ pageAssignments,
3662
4493
  plan,
3663
4494
  memory: Object.fromEntries(memory)
3664
4495
  });
@@ -3699,35 +4530,46 @@ function createExtractor(config) {
3699
4530
  );
3700
4531
  for (const result of extractorResults) {
3701
4532
  if (result) {
3702
- memory.set(result.name, result.data);
4533
+ mergeMemoryResult(result.name, result.data, memory);
3703
4534
  }
3704
4535
  }
3705
4536
  await pipelineCtx.save("extract", {
3706
4537
  id,
3707
4538
  pageCount,
3708
4539
  classifyResult,
4540
+ formInventory,
4541
+ pageAssignments,
3709
4542
  plan,
3710
4543
  memory: Object.fromEntries(memory)
3711
4544
  });
3712
4545
  }
4546
+ let reviewRounds = resumed?.reviewReport?.reviewRoundRecords ?? [];
4547
+ let reviewReport = resumed?.reviewReport;
3713
4548
  if (!pipelineCtx.isPhaseComplete("review")) {
4549
+ reviewRounds = [];
3714
4550
  for (let round = 0; round < maxReviewRounds; round++) {
3715
4551
  const extractedKeys = [...memory.keys()].filter((k) => k !== "classify");
4552
+ const extractionSummary = summarizeExtraction(memory);
4553
+ const pageMapSummary = formatPageMapSummary(pageAssignments);
3716
4554
  const reviewResponse = await safeGenerateObject(
3717
4555
  generateObject,
3718
4556
  {
3719
- prompt: buildReviewPrompt(template.required, extractedKeys),
4557
+ prompt: buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary),
3720
4558
  schema: ReviewResultSchema,
3721
- maxTokens: 1024,
3722
- providerOptions
4559
+ maxTokens: 1536,
4560
+ providerOptions: { ...providerOptions, pdfBase64 }
3723
4561
  },
3724
4562
  {
3725
- fallback: { complete: true, missingFields: [], additionalTasks: [] },
4563
+ fallback: { complete: true, missingFields: [], qualityIssues: [], additionalTasks: [] },
3726
4564
  log,
3727
4565
  onError: (err, attempt) => log?.(`Review round ${round + 1} attempt ${attempt + 1} failed: ${err}`)
3728
4566
  }
3729
4567
  );
3730
4568
  trackUsage(reviewResponse.usage);
4569
+ reviewRounds.push(toReviewRoundRecord(round + 1, reviewResponse.object));
4570
+ if (reviewResponse.object.qualityIssues?.length) {
4571
+ await log?.(`Review round ${round + 1} quality issues: ${reviewResponse.object.qualityIssues.join("; ")}`);
4572
+ }
3731
4573
  if (reviewResponse.object.complete || reviewResponse.object.additionalTasks.length === 0) {
3732
4574
  onProgress?.("Extraction complete.");
3733
4575
  break;
@@ -3762,25 +4604,49 @@ function createExtractor(config) {
3762
4604
  );
3763
4605
  for (const result of followUpResults) {
3764
4606
  if (result) {
3765
- memory.set(result.name, result.data);
4607
+ mergeMemoryResult(result.name, result.data, memory);
3766
4608
  }
3767
4609
  }
3768
4610
  }
4611
+ reviewReport = buildExtractionReviewReport({
4612
+ memory,
4613
+ pageAssignments,
4614
+ reviewRounds
4615
+ });
4616
+ if (reviewReport.issues.length > 0) {
4617
+ await log?.(
4618
+ `Deterministic review issues: ${reviewReport.issues.map((issue) => issue.message).join("; ")}`
4619
+ );
4620
+ }
4621
+ if (shouldFailQualityGate(qualityGate, reviewReport.qualityGateStatus)) {
4622
+ throw new Error("Extraction quality gate failed. See reviewReport for blocking issues.");
4623
+ }
3769
4624
  await pipelineCtx.save("review", {
3770
4625
  id,
3771
4626
  pageCount,
3772
4627
  classifyResult,
4628
+ formInventory,
4629
+ pageAssignments,
3773
4630
  plan,
4631
+ reviewReport,
3774
4632
  memory: Object.fromEntries(memory)
3775
4633
  });
3776
4634
  }
4635
+ reviewReport ?? (reviewReport = buildExtractionReviewReport({
4636
+ memory,
4637
+ pageAssignments,
4638
+ reviewRounds
4639
+ }));
3777
4640
  onProgress?.("Assembling document...");
3778
4641
  const document = assembleDocument(id, documentType, memory);
3779
4642
  await pipelineCtx.save("assemble", {
3780
4643
  id,
3781
4644
  pageCount,
3782
4645
  classifyResult,
4646
+ formInventory,
4647
+ pageAssignments,
3783
4648
  plan,
4649
+ reviewReport,
3784
4650
  memory: Object.fromEntries(memory),
3785
4651
  document
3786
4652
  });
@@ -3793,11 +4659,21 @@ function createExtractor(config) {
3793
4659
  trackUsage(formatResult.usage);
3794
4660
  const chunks = chunkDocument(formatResult.document);
3795
4661
  const finalCheckpoint = pipelineCtx.getCheckpoint();
4662
+ if (callsMissingUsage > 0) {
4663
+ await log?.(`Token usage was unavailable for ${callsMissingUsage}/${modelCalls} model calls. Check that your provider callbacks return usage.`);
4664
+ onProgress?.(`Token usage unavailable for ${callsMissingUsage}/${modelCalls} model calls.`);
4665
+ }
3796
4666
  return {
3797
4667
  document: formatResult.document,
3798
4668
  chunks,
3799
4669
  tokenUsage: totalUsage,
3800
- checkpoint: finalCheckpoint
4670
+ usageReporting: {
4671
+ modelCalls,
4672
+ callsWithUsage,
4673
+ callsMissingUsage
4674
+ },
4675
+ checkpoint: finalCheckpoint,
4676
+ reviewReport
3801
4677
  };
3802
4678
  }
3803
4679
  return { extract };
@@ -4017,8 +4893,8 @@ Respond with JSON only:
4017
4893
  }`;
4018
4894
 
4019
4895
  // src/schemas/application.ts
4020
- var import_zod32 = require("zod");
4021
- var FieldTypeSchema = import_zod32.z.enum([
4896
+ var import_zod33 = require("zod");
4897
+ var FieldTypeSchema = import_zod33.z.enum([
4022
4898
  "text",
4023
4899
  "numeric",
4024
4900
  "currency",
@@ -4027,100 +4903,131 @@ var FieldTypeSchema = import_zod32.z.enum([
4027
4903
  "table",
4028
4904
  "declaration"
4029
4905
  ]);
4030
- var ApplicationFieldSchema = import_zod32.z.object({
4031
- id: import_zod32.z.string(),
4032
- label: import_zod32.z.string(),
4033
- section: import_zod32.z.string(),
4906
+ var ApplicationFieldSchema = import_zod33.z.object({
4907
+ id: import_zod33.z.string(),
4908
+ label: import_zod33.z.string(),
4909
+ section: import_zod33.z.string(),
4034
4910
  fieldType: FieldTypeSchema,
4035
- required: import_zod32.z.boolean(),
4036
- options: import_zod32.z.array(import_zod32.z.string()).optional(),
4037
- columns: import_zod32.z.array(import_zod32.z.string()).optional(),
4038
- requiresExplanationIfYes: import_zod32.z.boolean().optional(),
4039
- condition: import_zod32.z.object({
4040
- dependsOn: import_zod32.z.string(),
4041
- whenValue: import_zod32.z.string()
4911
+ required: import_zod33.z.boolean(),
4912
+ options: import_zod33.z.array(import_zod33.z.string()).optional(),
4913
+ columns: import_zod33.z.array(import_zod33.z.string()).optional(),
4914
+ requiresExplanationIfYes: import_zod33.z.boolean().optional(),
4915
+ condition: import_zod33.z.object({
4916
+ dependsOn: import_zod33.z.string(),
4917
+ whenValue: import_zod33.z.string()
4042
4918
  }).optional(),
4043
- value: import_zod32.z.string().optional(),
4044
- source: import_zod32.z.string().optional().describe("Where the value came from: auto-fill, user, lookup"),
4045
- confidence: import_zod32.z.enum(["confirmed", "high", "medium", "low"]).optional()
4919
+ value: import_zod33.z.string().optional(),
4920
+ source: import_zod33.z.string().optional().describe("Where the value came from: auto-fill, user, lookup"),
4921
+ confidence: import_zod33.z.enum(["confirmed", "high", "medium", "low"]).optional()
4922
+ });
4923
+ var ApplicationClassifyResultSchema = import_zod33.z.object({
4924
+ isApplication: import_zod33.z.boolean(),
4925
+ confidence: import_zod33.z.number().min(0).max(1),
4926
+ applicationType: import_zod33.z.string().nullable()
4927
+ });
4928
+ var FieldExtractionResultSchema = import_zod33.z.object({
4929
+ fields: import_zod33.z.array(ApplicationFieldSchema)
4930
+ });
4931
+ var AutoFillMatchSchema = import_zod33.z.object({
4932
+ fieldId: import_zod33.z.string(),
4933
+ value: import_zod33.z.string(),
4934
+ confidence: import_zod33.z.enum(["confirmed"]),
4935
+ contextKey: import_zod33.z.string()
4046
4936
  });
4047
- var ApplicationClassifyResultSchema = import_zod32.z.object({
4048
- isApplication: import_zod32.z.boolean(),
4049
- confidence: import_zod32.z.number().min(0).max(1),
4050
- applicationType: import_zod32.z.string().nullable()
4937
+ var AutoFillResultSchema = import_zod33.z.object({
4938
+ matches: import_zod33.z.array(AutoFillMatchSchema)
4051
4939
  });
4052
- var FieldExtractionResultSchema = import_zod32.z.object({
4053
- fields: import_zod32.z.array(ApplicationFieldSchema)
4940
+ var QuestionBatchResultSchema = import_zod33.z.object({
4941
+ batches: import_zod33.z.array(import_zod33.z.array(import_zod33.z.string()).describe("Array of field IDs in this batch"))
4054
4942
  });
4055
- var AutoFillMatchSchema = import_zod32.z.object({
4056
- fieldId: import_zod32.z.string(),
4057
- value: import_zod32.z.string(),
4058
- confidence: import_zod32.z.enum(["confirmed"]),
4059
- contextKey: import_zod32.z.string()
4943
+ var LookupRequestSchema = import_zod33.z.object({
4944
+ type: import_zod33.z.string().describe("Type of lookup: 'records', 'website', 'policy'"),
4945
+ description: import_zod33.z.string(),
4946
+ url: import_zod33.z.string().optional(),
4947
+ targetFieldIds: import_zod33.z.array(import_zod33.z.string())
4060
4948
  });
4061
- var AutoFillResultSchema = import_zod32.z.object({
4062
- matches: import_zod32.z.array(AutoFillMatchSchema)
4949
+ var ReplyIntentSchema = import_zod33.z.object({
4950
+ primaryIntent: import_zod33.z.enum(["answers_only", "question", "lookup_request", "mixed"]),
4951
+ hasAnswers: import_zod33.z.boolean(),
4952
+ questionText: import_zod33.z.string().optional(),
4953
+ questionFieldIds: import_zod33.z.array(import_zod33.z.string()).optional(),
4954
+ lookupRequests: import_zod33.z.array(LookupRequestSchema).optional()
4063
4955
  });
4064
- var QuestionBatchResultSchema = import_zod32.z.object({
4065
- batches: import_zod32.z.array(import_zod32.z.array(import_zod32.z.string()).describe("Array of field IDs in this batch"))
4956
+ var ParsedAnswerSchema = import_zod33.z.object({
4957
+ fieldId: import_zod33.z.string(),
4958
+ value: import_zod33.z.string(),
4959
+ explanation: import_zod33.z.string().optional()
4066
4960
  });
4067
- var LookupRequestSchema = import_zod32.z.object({
4068
- type: import_zod32.z.string().describe("Type of lookup: 'records', 'website', 'policy'"),
4069
- description: import_zod32.z.string(),
4070
- url: import_zod32.z.string().optional(),
4071
- targetFieldIds: import_zod32.z.array(import_zod32.z.string())
4961
+ var AnswerParsingResultSchema = import_zod33.z.object({
4962
+ answers: import_zod33.z.array(ParsedAnswerSchema),
4963
+ unanswered: import_zod33.z.array(import_zod33.z.string()).describe("Field IDs that were not answered")
4072
4964
  });
4073
- var ReplyIntentSchema = import_zod32.z.object({
4074
- primaryIntent: import_zod32.z.enum(["answers_only", "question", "lookup_request", "mixed"]),
4075
- hasAnswers: import_zod32.z.boolean(),
4076
- questionText: import_zod32.z.string().optional(),
4077
- questionFieldIds: import_zod32.z.array(import_zod32.z.string()).optional(),
4078
- lookupRequests: import_zod32.z.array(LookupRequestSchema).optional()
4965
+ var LookupFillSchema = import_zod33.z.object({
4966
+ fieldId: import_zod33.z.string(),
4967
+ value: import_zod33.z.string(),
4968
+ source: import_zod33.z.string().describe("Specific citable reference, e.g. 'GL Policy #POL-12345 (Hartford)'")
4079
4969
  });
4080
- var ParsedAnswerSchema = import_zod32.z.object({
4081
- fieldId: import_zod32.z.string(),
4082
- value: import_zod32.z.string(),
4083
- explanation: import_zod32.z.string().optional()
4970
+ var LookupFillResultSchema = import_zod33.z.object({
4971
+ fills: import_zod33.z.array(LookupFillSchema),
4972
+ unfillable: import_zod33.z.array(import_zod33.z.string()),
4973
+ explanation: import_zod33.z.string().optional()
4084
4974
  });
4085
- var AnswerParsingResultSchema = import_zod32.z.object({
4086
- answers: import_zod32.z.array(ParsedAnswerSchema),
4087
- unanswered: import_zod32.z.array(import_zod32.z.string()).describe("Field IDs that were not answered")
4975
+ var FlatPdfPlacementSchema = import_zod33.z.object({
4976
+ fieldId: import_zod33.z.string(),
4977
+ page: import_zod33.z.number(),
4978
+ x: import_zod33.z.number().describe("Percentage from left edge (0-100)"),
4979
+ y: import_zod33.z.number().describe("Percentage from top edge (0-100)"),
4980
+ text: import_zod33.z.string(),
4981
+ fontSize: import_zod33.z.number().optional(),
4982
+ isCheckmark: import_zod33.z.boolean().optional()
4088
4983
  });
4089
- var LookupFillSchema = import_zod32.z.object({
4090
- fieldId: import_zod32.z.string(),
4091
- value: import_zod32.z.string(),
4092
- source: import_zod32.z.string().describe("Specific citable reference, e.g. 'GL Policy #POL-12345 (Hartford)'")
4984
+ var AcroFormMappingSchema = import_zod33.z.object({
4985
+ fieldId: import_zod33.z.string(),
4986
+ acroFormName: import_zod33.z.string(),
4987
+ value: import_zod33.z.string()
4093
4988
  });
4094
- var LookupFillResultSchema = import_zod32.z.object({
4095
- fills: import_zod32.z.array(LookupFillSchema),
4096
- unfillable: import_zod32.z.array(import_zod32.z.string()),
4097
- explanation: import_zod32.z.string().optional()
4989
+ var QualityGateStatusSchema = import_zod33.z.enum(["passed", "warning", "failed"]);
4990
+ var QualitySeveritySchema = import_zod33.z.enum(["info", "warning", "blocking"]);
4991
+ var ApplicationQualityIssueSchema = import_zod33.z.object({
4992
+ code: import_zod33.z.string(),
4993
+ severity: QualitySeveritySchema,
4994
+ message: import_zod33.z.string(),
4995
+ fieldId: import_zod33.z.string().optional()
4098
4996
  });
4099
- var FlatPdfPlacementSchema = import_zod32.z.object({
4100
- fieldId: import_zod32.z.string(),
4101
- page: import_zod32.z.number(),
4102
- x: import_zod32.z.number().describe("Percentage from left edge (0-100)"),
4103
- y: import_zod32.z.number().describe("Percentage from top edge (0-100)"),
4104
- text: import_zod32.z.string(),
4105
- fontSize: import_zod32.z.number().optional(),
4106
- isCheckmark: import_zod32.z.boolean().optional()
4997
+ var ApplicationQualityRoundSchema = import_zod33.z.object({
4998
+ round: import_zod33.z.number(),
4999
+ kind: import_zod33.z.string(),
5000
+ status: QualityGateStatusSchema,
5001
+ summary: import_zod33.z.string().optional()
4107
5002
  });
4108
- var AcroFormMappingSchema = import_zod32.z.object({
4109
- fieldId: import_zod32.z.string(),
4110
- acroFormName: import_zod32.z.string(),
4111
- value: import_zod32.z.string()
5003
+ var ApplicationQualityArtifactSchema = import_zod33.z.object({
5004
+ kind: import_zod33.z.string(),
5005
+ label: import_zod33.z.string().optional(),
5006
+ itemCount: import_zod33.z.number().optional()
4112
5007
  });
4113
- var ApplicationStateSchema = import_zod32.z.object({
4114
- id: import_zod32.z.string(),
4115
- pdfBase64: import_zod32.z.string().optional().describe("Original PDF, omitted after extraction"),
4116
- title: import_zod32.z.string().optional(),
4117
- applicationType: import_zod32.z.string().nullable().optional(),
4118
- fields: import_zod32.z.array(ApplicationFieldSchema),
4119
- batches: import_zod32.z.array(import_zod32.z.array(import_zod32.z.string())).optional(),
4120
- currentBatchIndex: import_zod32.z.number().default(0),
4121
- status: import_zod32.z.enum(["classifying", "extracting", "auto_filling", "batching", "collecting", "confirming", "mapping", "complete"]),
4122
- createdAt: import_zod32.z.number(),
4123
- updatedAt: import_zod32.z.number()
5008
+ var ApplicationEmailReviewSchema = import_zod33.z.object({
5009
+ issues: import_zod33.z.array(ApplicationQualityIssueSchema),
5010
+ qualityGateStatus: QualityGateStatusSchema
5011
+ });
5012
+ var ApplicationQualityReportSchema = import_zod33.z.object({
5013
+ issues: import_zod33.z.array(ApplicationQualityIssueSchema),
5014
+ rounds: import_zod33.z.array(ApplicationQualityRoundSchema).optional(),
5015
+ artifacts: import_zod33.z.array(ApplicationQualityArtifactSchema).optional(),
5016
+ emailReview: ApplicationEmailReviewSchema.optional(),
5017
+ qualityGateStatus: QualityGateStatusSchema
5018
+ });
5019
+ var ApplicationStateSchema = import_zod33.z.object({
5020
+ id: import_zod33.z.string(),
5021
+ pdfBase64: import_zod33.z.string().optional().describe("Original PDF, omitted after extraction"),
5022
+ title: import_zod33.z.string().optional(),
5023
+ applicationType: import_zod33.z.string().nullable().optional(),
5024
+ fields: import_zod33.z.array(ApplicationFieldSchema),
5025
+ batches: import_zod33.z.array(import_zod33.z.array(import_zod33.z.string())).optional(),
5026
+ currentBatchIndex: import_zod33.z.number().default(0),
5027
+ qualityReport: ApplicationQualityReportSchema.optional(),
5028
+ status: import_zod33.z.enum(["classifying", "extracting", "auto_filling", "batching", "collecting", "confirming", "mapping", "complete"]),
5029
+ createdAt: import_zod33.z.number(),
5030
+ updatedAt: import_zod33.z.number()
4124
5031
  });
4125
5032
 
4126
5033
  // src/application/agents/classifier.ts
@@ -4628,6 +5535,87 @@ async function generateBatchEmail(batchFields, batchIndex, totalBatches, opts, g
4628
5535
  return { text, usage };
4629
5536
  }
4630
5537
 
5538
+ // src/application/quality.ts
5539
+ function isVagueSource(source) {
5540
+ if (!source) return true;
5541
+ const normalized = source.trim().toLowerCase();
5542
+ return normalized === "unknown" || normalized.includes("existing records") || normalized.includes("available data") || normalized === "context" || normalized === "user provided";
5543
+ }
5544
+ function buildApplicationQualityReport(state) {
5545
+ const issues = [];
5546
+ const seenIds = /* @__PURE__ */ new Set();
5547
+ for (const field of state.fields) {
5548
+ if (seenIds.has(field.id)) {
5549
+ issues.push({
5550
+ code: "duplicate_field_id",
5551
+ severity: "blocking",
5552
+ message: `Field "${field.label}" has a duplicate id "${field.id}".`,
5553
+ fieldId: field.id
5554
+ });
5555
+ }
5556
+ seenIds.add(field.id);
5557
+ if (field.required && !field.value) {
5558
+ issues.push({
5559
+ code: "required_field_unfilled",
5560
+ severity: "warning",
5561
+ message: `Required field "${field.label}" is still unfilled.`,
5562
+ fieldId: field.id
5563
+ });
5564
+ }
5565
+ if (field.value && !field.source) {
5566
+ issues.push({
5567
+ code: "filled_field_missing_source",
5568
+ severity: "blocking",
5569
+ message: `Filled field "${field.label}" is missing source provenance.`,
5570
+ fieldId: field.id
5571
+ });
5572
+ }
5573
+ if (field.value && isVagueSource(field.source)) {
5574
+ issues.push({
5575
+ code: "filled_field_vague_source",
5576
+ severity: "warning",
5577
+ message: `Filled field "${field.label}" has a vague or non-citable source.`,
5578
+ fieldId: field.id
5579
+ });
5580
+ }
5581
+ if (field.value && (!field.confidence || field.confidence === "low")) {
5582
+ issues.push({
5583
+ code: "filled_field_low_confidence",
5584
+ severity: "warning",
5585
+ message: `Filled field "${field.label}" has low or missing confidence.`,
5586
+ fieldId: field.id
5587
+ });
5588
+ }
5589
+ }
5590
+ return {
5591
+ issues,
5592
+ rounds: [],
5593
+ artifacts: [
5594
+ { kind: "application_fields", label: "Application Fields", itemCount: state.fields.length }
5595
+ ],
5596
+ qualityGateStatus: evaluateQualityGate({ issues })
5597
+ };
5598
+ }
5599
+ function reviewBatchEmail(text, batchFields) {
5600
+ const issues = [];
5601
+ const normalized = text.toLowerCase();
5602
+ for (const field of batchFields) {
5603
+ const label = field.label.trim().toLowerCase();
5604
+ if (label.length >= 6 && !normalized.includes(label)) {
5605
+ issues.push({
5606
+ code: "email_missing_field_prompt",
5607
+ severity: "warning",
5608
+ message: `Generated email does not clearly mention field "${field.label}".`,
5609
+ fieldId: field.id
5610
+ });
5611
+ }
5612
+ }
5613
+ return {
5614
+ issues,
5615
+ qualityGateStatus: evaluateQualityGate({ issues })
5616
+ };
5617
+ }
5618
+
4631
5619
  // src/application/coordinator.ts
4632
5620
  function createApplicationPipeline(config) {
4633
5621
  const {
@@ -4642,7 +5630,8 @@ function createApplicationPipeline(config) {
4642
5630
  onTokenUsage,
4643
5631
  onProgress,
4644
5632
  log,
4645
- providerOptions
5633
+ providerOptions,
5634
+ qualityGate = "warn"
4646
5635
  } = config;
4647
5636
  const limit = pLimit(concurrency);
4648
5637
  let totalUsage = { inputTokens: 0, outputTokens: 0 };
@@ -4664,6 +5653,7 @@ function createApplicationPipeline(config) {
4664
5653
  title: void 0,
4665
5654
  applicationType: null,
4666
5655
  fields: [],
5656
+ qualityReport: void 0,
4667
5657
  batches: void 0,
4668
5658
  currentBatchIndex: 0,
4669
5659
  status: "classifying",
@@ -4688,8 +5678,9 @@ function createApplicationPipeline(config) {
4688
5678
  if (!classifyResult.isApplication) {
4689
5679
  state.status = "complete";
4690
5680
  state.updatedAt = Date.now();
5681
+ state.qualityReport = buildApplicationQualityReport(state);
4691
5682
  await applicationStore?.save(state);
4692
- return { state, tokenUsage: totalUsage };
5683
+ return { state, tokenUsage: totalUsage, reviewReport: state.qualityReport };
4693
5684
  }
4694
5685
  state.applicationType = classifyResult.applicationType;
4695
5686
  state.status = "extracting";
@@ -4713,8 +5704,9 @@ function createApplicationPipeline(config) {
4713
5704
  await log?.("No fields extracted, completing pipeline with empty result");
4714
5705
  state.status = "complete";
4715
5706
  state.updatedAt = Date.now();
5707
+ state.qualityReport = buildApplicationQualityReport(state);
4716
5708
  await applicationStore?.save(state);
4717
- return { state, tokenUsage: totalUsage };
5709
+ return { state, tokenUsage: totalUsage, reviewReport: state.qualityReport };
4718
5710
  }
4719
5711
  state.fields = fields;
4720
5712
  state.title = classifyResult.applicationType ?? void 0;
@@ -4814,11 +5806,15 @@ function createApplicationPipeline(config) {
4814
5806
  } else {
4815
5807
  state.status = "confirming";
4816
5808
  }
5809
+ state.qualityReport = buildApplicationQualityReport(state);
4817
5810
  state.updatedAt = Date.now();
4818
5811
  await applicationStore?.save(state);
5812
+ if (shouldFailQualityGate(qualityGate, state.qualityReport.qualityGateStatus)) {
5813
+ throw new Error("Application quality gate failed. See state.qualityReport for blocking issues.");
5814
+ }
4819
5815
  const filledCount = state.fields.filter((f) => f.value).length;
4820
5816
  onProgress?.(`Application processed: ${filledCount}/${state.fields.length} fields filled, ${state.batches?.length ?? 0} batches to collect.`);
4821
- return { state, tokenUsage: totalUsage };
5817
+ return { state, tokenUsage: totalUsage, reviewReport: state.qualityReport };
4822
5818
  }
4823
5819
  async function processReply(input) {
4824
5820
  totalUsage = { inputTokens: 0, outputTokens: 0 };
@@ -4965,6 +5961,11 @@ Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with
4965
5961
  providerOptions
4966
5962
  );
4967
5963
  trackUsage(emailUsage);
5964
+ const emailReview = reviewBatchEmail(emailText, nextBatchFields);
5965
+ state.qualityReport = {
5966
+ ...buildApplicationQualityReport(state),
5967
+ emailReview
5968
+ };
4968
5969
  if (!responseText) {
4969
5970
  responseText = emailText;
4970
5971
  } else {
@@ -4980,13 +5981,18 @@ ${emailText}`;
4980
5981
  }
4981
5982
  }
4982
5983
  state.updatedAt = Date.now();
5984
+ state.qualityReport = state.qualityReport ?? buildApplicationQualityReport(state);
4983
5985
  await applicationStore?.save(state);
5986
+ if (shouldFailQualityGate(qualityGate, state.qualityReport.qualityGateStatus)) {
5987
+ throw new Error("Application quality gate failed. See state.qualityReport for blocking issues.");
5988
+ }
4984
5989
  return {
4985
5990
  state,
4986
5991
  intent: intent.primaryIntent,
4987
5992
  fieldsFilled,
4988
5993
  responseText,
4989
- tokenUsage: totalUsage
5994
+ tokenUsage: totalUsage,
5995
+ reviewReport: state.qualityReport
4990
5996
  };
4991
5997
  }
4992
5998
  async function generateCurrentBatchEmail(applicationId, opts) {
@@ -5012,6 +6018,12 @@ ${emailText}`;
5012
6018
  providerOptions
5013
6019
  );
5014
6020
  trackUsage(usage);
6021
+ const emailReview = reviewBatchEmail(text, batchFields);
6022
+ state.qualityReport = {
6023
+ ...buildApplicationQualityReport(state),
6024
+ emailReview
6025
+ };
6026
+ await applicationStore?.save(state);
5015
6027
  return { text, tokenUsage: totalUsage };
5016
6028
  }
5017
6029
  async function getConfirmationSummary(applicationId) {
@@ -5148,73 +6160,73 @@ Respond with the final answer, deduplicated citations array, overall confidence
5148
6160
  }
5149
6161
 
5150
6162
  // src/schemas/query.ts
5151
- var import_zod33 = require("zod");
5152
- var QueryIntentSchema = import_zod33.z.enum([
6163
+ var import_zod34 = require("zod");
6164
+ var QueryIntentSchema = import_zod34.z.enum([
5153
6165
  "policy_question",
5154
6166
  "coverage_comparison",
5155
6167
  "document_search",
5156
6168
  "claims_inquiry",
5157
6169
  "general_knowledge"
5158
6170
  ]);
5159
- var SubQuestionSchema = import_zod33.z.object({
5160
- question: import_zod33.z.string().describe("Atomic sub-question to retrieve and answer independently"),
6171
+ var SubQuestionSchema = import_zod34.z.object({
6172
+ question: import_zod34.z.string().describe("Atomic sub-question to retrieve and answer independently"),
5161
6173
  intent: QueryIntentSchema,
5162
- chunkTypes: import_zod33.z.array(import_zod33.z.string()).optional().describe("Chunk types to filter retrieval (e.g. coverage, endorsement, declaration)"),
5163
- documentFilters: import_zod33.z.object({
5164
- type: import_zod33.z.enum(["policy", "quote"]).optional(),
5165
- carrier: import_zod33.z.string().optional(),
5166
- insuredName: import_zod33.z.string().optional(),
5167
- policyNumber: import_zod33.z.string().optional(),
5168
- quoteNumber: import_zod33.z.string().optional()
6174
+ chunkTypes: import_zod34.z.array(import_zod34.z.string()).optional().describe("Chunk types to filter retrieval (e.g. coverage, endorsement, declaration)"),
6175
+ documentFilters: import_zod34.z.object({
6176
+ type: import_zod34.z.enum(["policy", "quote"]).optional(),
6177
+ carrier: import_zod34.z.string().optional(),
6178
+ insuredName: import_zod34.z.string().optional(),
6179
+ policyNumber: import_zod34.z.string().optional(),
6180
+ quoteNumber: import_zod34.z.string().optional()
5169
6181
  }).optional().describe("Structured filters to narrow document lookup")
5170
6182
  });
5171
- var QueryClassifyResultSchema = import_zod33.z.object({
6183
+ var QueryClassifyResultSchema = import_zod34.z.object({
5172
6184
  intent: QueryIntentSchema,
5173
- subQuestions: import_zod33.z.array(SubQuestionSchema).min(1).describe("Decomposed atomic sub-questions"),
5174
- requiresDocumentLookup: import_zod33.z.boolean().describe("Whether structured document lookup is needed"),
5175
- requiresChunkSearch: import_zod33.z.boolean().describe("Whether semantic chunk search is needed"),
5176
- requiresConversationHistory: import_zod33.z.boolean().describe("Whether conversation history is relevant")
6185
+ subQuestions: import_zod34.z.array(SubQuestionSchema).min(1).describe("Decomposed atomic sub-questions"),
6186
+ requiresDocumentLookup: import_zod34.z.boolean().describe("Whether structured document lookup is needed"),
6187
+ requiresChunkSearch: import_zod34.z.boolean().describe("Whether semantic chunk search is needed"),
6188
+ requiresConversationHistory: import_zod34.z.boolean().describe("Whether conversation history is relevant")
5177
6189
  });
5178
- var EvidenceItemSchema = import_zod33.z.object({
5179
- source: import_zod33.z.enum(["chunk", "document", "conversation"]),
5180
- chunkId: import_zod33.z.string().optional(),
5181
- documentId: import_zod33.z.string().optional(),
5182
- turnId: import_zod33.z.string().optional(),
5183
- text: import_zod33.z.string().describe("Text excerpt from the source"),
5184
- relevance: import_zod33.z.number().min(0).max(1),
5185
- metadata: import_zod33.z.array(import_zod33.z.object({ key: import_zod33.z.string(), value: import_zod33.z.string() })).optional()
6190
+ var EvidenceItemSchema = import_zod34.z.object({
6191
+ source: import_zod34.z.enum(["chunk", "document", "conversation"]),
6192
+ chunkId: import_zod34.z.string().optional(),
6193
+ documentId: import_zod34.z.string().optional(),
6194
+ turnId: import_zod34.z.string().optional(),
6195
+ text: import_zod34.z.string().describe("Text excerpt from the source"),
6196
+ relevance: import_zod34.z.number().min(0).max(1),
6197
+ metadata: import_zod34.z.array(import_zod34.z.object({ key: import_zod34.z.string(), value: import_zod34.z.string() })).optional()
5186
6198
  });
5187
- var RetrievalResultSchema = import_zod33.z.object({
5188
- subQuestion: import_zod33.z.string(),
5189
- evidence: import_zod33.z.array(EvidenceItemSchema)
6199
+ var RetrievalResultSchema = import_zod34.z.object({
6200
+ subQuestion: import_zod34.z.string(),
6201
+ evidence: import_zod34.z.array(EvidenceItemSchema)
5190
6202
  });
5191
- var CitationSchema = import_zod33.z.object({
5192
- index: import_zod33.z.number().describe("Citation number [1], [2], etc."),
5193
- chunkId: import_zod33.z.string().describe("Source chunk ID, e.g. doc-123:coverage:2"),
5194
- documentId: import_zod33.z.string(),
5195
- documentType: import_zod33.z.enum(["policy", "quote"]).optional(),
5196
- field: import_zod33.z.string().optional().describe("Specific field path, e.g. coverages[0].deductible"),
5197
- quote: import_zod33.z.string().describe("Exact text from source that supports the claim"),
5198
- relevance: import_zod33.z.number().min(0).max(1)
6203
+ var CitationSchema = import_zod34.z.object({
6204
+ index: import_zod34.z.number().describe("Citation number [1], [2], etc."),
6205
+ chunkId: import_zod34.z.string().describe("Source chunk ID, e.g. doc-123:coverage:2"),
6206
+ documentId: import_zod34.z.string(),
6207
+ documentType: import_zod34.z.enum(["policy", "quote"]).optional(),
6208
+ field: import_zod34.z.string().optional().describe("Specific field path, e.g. coverages[0].deductible"),
6209
+ quote: import_zod34.z.string().describe("Exact text from source that supports the claim"),
6210
+ relevance: import_zod34.z.number().min(0).max(1)
5199
6211
  });
5200
- var SubAnswerSchema = import_zod33.z.object({
5201
- subQuestion: import_zod33.z.string(),
5202
- answer: import_zod33.z.string(),
5203
- citations: import_zod33.z.array(CitationSchema),
5204
- confidence: import_zod33.z.number().min(0).max(1),
5205
- needsMoreContext: import_zod33.z.boolean().describe("True if evidence was insufficient to answer fully")
6212
+ var SubAnswerSchema = import_zod34.z.object({
6213
+ subQuestion: import_zod34.z.string(),
6214
+ answer: import_zod34.z.string(),
6215
+ citations: import_zod34.z.array(CitationSchema),
6216
+ confidence: import_zod34.z.number().min(0).max(1),
6217
+ needsMoreContext: import_zod34.z.boolean().describe("True if evidence was insufficient to answer fully")
5206
6218
  });
5207
- var VerifyResultSchema = import_zod33.z.object({
5208
- approved: import_zod33.z.boolean().describe("Whether all sub-answers are adequately grounded"),
5209
- issues: import_zod33.z.array(import_zod33.z.string()).describe("Specific grounding or consistency issues found"),
5210
- retrySubQuestions: import_zod33.z.array(import_zod33.z.string()).optional().describe("Sub-questions that need additional retrieval or re-reasoning")
6219
+ var VerifyResultSchema = import_zod34.z.object({
6220
+ approved: import_zod34.z.boolean().describe("Whether all sub-answers are adequately grounded"),
6221
+ issues: import_zod34.z.array(import_zod34.z.string()).describe("Specific grounding or consistency issues found"),
6222
+ retrySubQuestions: import_zod34.z.array(import_zod34.z.string()).optional().describe("Sub-questions that need additional retrieval or re-reasoning")
5211
6223
  });
5212
- var QueryResultSchema = import_zod33.z.object({
5213
- answer: import_zod33.z.string(),
5214
- citations: import_zod33.z.array(CitationSchema),
6224
+ var QueryResultSchema = import_zod34.z.object({
6225
+ answer: import_zod34.z.string(),
6226
+ citations: import_zod34.z.array(CitationSchema),
5215
6227
  intent: QueryIntentSchema,
5216
- confidence: import_zod33.z.number().min(0).max(1),
5217
- followUp: import_zod33.z.string().optional().describe("Suggested follow-up question if applicable")
6228
+ confidence: import_zod34.z.number().min(0).max(1),
6229
+ followUp: import_zod34.z.string().optional().describe("Suggested follow-up question if applicable")
5218
6230
  });
5219
6231
 
5220
6232
  // src/query/retriever.ts
@@ -5502,6 +6514,112 @@ async function verify(originalQuestion, subAnswers, allEvidence, config) {
5502
6514
  return { result: object, usage };
5503
6515
  }
5504
6516
 
6517
+ // src/query/quality.ts
6518
+ function sourceIdForEvidence(evidence) {
6519
+ return evidence.chunkId ?? evidence.documentId ?? evidence.turnId;
6520
+ }
6521
+ function citationSourceId(citation) {
6522
+ return citation.chunkId || citation.documentId;
6523
+ }
6524
+ function buildQueryReviewReport(params) {
6525
+ const { subAnswers, evidence, finalResult, verifyRounds } = params;
6526
+ const issues = [];
6527
+ const evidenceBySource = /* @__PURE__ */ new Map();
6528
+ for (const item of evidence) {
6529
+ const sourceId = sourceIdForEvidence(item);
6530
+ if (!sourceId) continue;
6531
+ evidenceBySource.set(sourceId, [...evidenceBySource.get(sourceId) ?? [], item]);
6532
+ }
6533
+ for (const subAnswer of subAnswers) {
6534
+ if (!subAnswer.needsMoreContext && subAnswer.citations.length === 0) {
6535
+ issues.push({
6536
+ code: "subanswer_missing_citations",
6537
+ severity: "blocking",
6538
+ message: `Sub-answer "${subAnswer.subQuestion}" has no citations despite claiming an answer.`,
6539
+ subQuestion: subAnswer.subQuestion
6540
+ });
6541
+ }
6542
+ if (subAnswer.confidence >= 0.85 && subAnswer.citations.length === 0) {
6543
+ issues.push({
6544
+ code: "subanswer_high_confidence_without_citations",
6545
+ severity: "blocking",
6546
+ message: `Sub-answer "${subAnswer.subQuestion}" has high confidence without citations.`,
6547
+ subQuestion: subAnswer.subQuestion
6548
+ });
6549
+ }
6550
+ for (const citation of subAnswer.citations) {
6551
+ const sourceId = citationSourceId(citation);
6552
+ const supportedEvidence = sourceId ? evidenceBySource.get(sourceId) ?? [] : [];
6553
+ if (!sourceId || supportedEvidence.length === 0) {
6554
+ issues.push({
6555
+ code: "citation_missing_from_evidence",
6556
+ severity: "blocking",
6557
+ message: `Citation [${citation.index}] in "${subAnswer.subQuestion}" does not map to retrieved evidence.`,
6558
+ subQuestion: subAnswer.subQuestion,
6559
+ citationIndex: citation.index,
6560
+ sourceId
6561
+ });
6562
+ continue;
6563
+ }
6564
+ const quoteFound = supportedEvidence.some((item) => item.text.includes(citation.quote));
6565
+ if (!quoteFound) {
6566
+ issues.push({
6567
+ code: "citation_quote_not_in_evidence",
6568
+ severity: "warning",
6569
+ message: `Citation [${citation.index}] quote in "${subAnswer.subQuestion}" was not found verbatim in retrieved evidence.`,
6570
+ subQuestion: subAnswer.subQuestion,
6571
+ citationIndex: citation.index,
6572
+ sourceId
6573
+ });
6574
+ }
6575
+ }
6576
+ }
6577
+ if (finalResult) {
6578
+ if (finalResult.answer.trim().length > 0 && finalResult.citations.length === 0 && finalResult.confidence > 0.4) {
6579
+ issues.push({
6580
+ code: "final_answer_missing_citations",
6581
+ severity: "blocking",
6582
+ message: "Final answer has non-trivial confidence but no citations."
6583
+ });
6584
+ }
6585
+ const knownCitationIds = new Set(
6586
+ subAnswers.flatMap((sa) => sa.citations.map((citation) => `${citation.index}|${citation.chunkId}|${citation.documentId}`))
6587
+ );
6588
+ for (const citation of finalResult.citations) {
6589
+ const key = `${citation.index}|${citation.chunkId}|${citation.documentId}`;
6590
+ if (!knownCitationIds.has(key)) {
6591
+ issues.push({
6592
+ code: "final_answer_unknown_citation",
6593
+ severity: "warning",
6594
+ message: `Final answer citation [${citation.index}] was not present in verified sub-answers.`,
6595
+ citationIndex: citation.index,
6596
+ sourceId: citationSourceId(citation)
6597
+ });
6598
+ }
6599
+ }
6600
+ }
6601
+ const rounds = verifyRounds.map((round) => ({
6602
+ round: round.round,
6603
+ kind: "verification",
6604
+ status: round.approved && round.issues.length === 0 ? "passed" : "warning",
6605
+ summary: round.issues[0] ?? (round.approved ? "Verification passed." : "Verification requested retry.")
6606
+ }));
6607
+ const artifacts = [
6608
+ { kind: "evidence", label: "Retrieved Evidence", itemCount: evidence.length },
6609
+ { kind: "sub_answers", label: "Sub Answers", itemCount: subAnswers.length }
6610
+ ];
6611
+ return {
6612
+ issues,
6613
+ rounds,
6614
+ artifacts,
6615
+ verifyRounds,
6616
+ qualityGateStatus: evaluateQualityGate({
6617
+ issues,
6618
+ hasRoundWarnings: verifyRounds.some((round) => !round.approved || round.issues.length > 0)
6619
+ })
6620
+ };
6621
+ }
6622
+
5505
6623
  // src/query/coordinator.ts
5506
6624
  function createQueryAgent(config) {
5507
6625
  const {
@@ -5515,7 +6633,8 @@ function createQueryAgent(config) {
5515
6633
  onTokenUsage,
5516
6634
  onProgress,
5517
6635
  log,
5518
- providerOptions
6636
+ providerOptions,
6637
+ qualityGate = "warn"
5519
6638
  } = config;
5520
6639
  const limit = pLimit(concurrency);
5521
6640
  let totalUsage = { inputTokens: 0, outputTokens: 0 };
@@ -5584,6 +6703,7 @@ function createQueryAgent(config) {
5584
6703
  await pipelineCtx.save("reason", { classification, evidence: allEvidence, subAnswers });
5585
6704
  onProgress?.("Verifying answer grounding...");
5586
6705
  const verifierConfig = { generateObject, providerOptions };
6706
+ const verifyRounds = [];
5587
6707
  for (let round = 0; round < maxVerifyRounds; round++) {
5588
6708
  const { result: verifyResult, usage } = await safeVerify(
5589
6709
  question,
@@ -5592,6 +6712,12 @@ function createQueryAgent(config) {
5592
6712
  verifierConfig
5593
6713
  );
5594
6714
  trackUsage(usage);
6715
+ verifyRounds.push({
6716
+ round: round + 1,
6717
+ approved: verifyResult.approved,
6718
+ issues: verifyResult.issues,
6719
+ retrySubQuestions: verifyResult.retrySubQuestions
6720
+ });
5595
6721
  if (verifyResult.approved) {
5596
6722
  onProgress?.("Verification passed.");
5597
6723
  break;
@@ -5649,6 +6775,24 @@ function createQueryAgent(config) {
5649
6775
  classification,
5650
6776
  context?.platform
5651
6777
  );
6778
+ const reviewReport = buildQueryReviewReport({
6779
+ subAnswers,
6780
+ evidence: allEvidence,
6781
+ finalResult: queryResult,
6782
+ verifyRounds
6783
+ });
6784
+ await pipelineCtx.save("review", {
6785
+ classification,
6786
+ evidence: allEvidence,
6787
+ subAnswers,
6788
+ reviewReport
6789
+ });
6790
+ if (reviewReport.issues.length > 0) {
6791
+ await log?.(`Query deterministic review issues: ${reviewReport.issues.map((issue) => issue.message).join("; ")}`);
6792
+ }
6793
+ if (shouldFailQualityGate(qualityGate, reviewReport.qualityGateStatus)) {
6794
+ throw new Error("Query quality gate failed. See reviewReport for blocking issues.");
6795
+ }
5652
6796
  if (conversationId) {
5653
6797
  try {
5654
6798
  await memoryStore.addTurn({
@@ -5669,7 +6813,7 @@ function createQueryAgent(config) {
5669
6813
  await log?.(`Failed to store conversation turn: ${e}`);
5670
6814
  }
5671
6815
  }
5672
- return { ...queryResult, tokenUsage: totalUsage };
6816
+ return { ...queryResult, tokenUsage: totalUsage, reviewReport };
5673
6817
  }
5674
6818
  async function classify(question, conversationId) {
5675
6819
  let conversationContext;
@@ -5891,7 +7035,12 @@ var AGENT_TOOLS = [
5891
7035
  AdmittedStatusSchema,
5892
7036
  AnswerParsingResultSchema,
5893
7037
  ApplicationClassifyResultSchema,
7038
+ ApplicationEmailReviewSchema,
5894
7039
  ApplicationFieldSchema,
7040
+ ApplicationQualityArtifactSchema,
7041
+ ApplicationQualityIssueSchema,
7042
+ ApplicationQualityReportSchema,
7043
+ ApplicationQualityRoundSchema,
5895
7044
  ApplicationStateSchema,
5896
7045
  AuditTypeSchema,
5897
7046
  AutoFillMatchSchema,
@@ -5923,6 +7072,7 @@ var AGENT_TOOLS = [
5923
7072
  CoverageFormSchema,
5924
7073
  CoverageSchema,
5925
7074
  CoverageTriggerSchema,
7075
+ CoverageValueTypeSchema,
5926
7076
  CrimeDeclarationsSchema,
5927
7077
  CyberDeclarationsSchema,
5928
7078
  DEDUCTIBLE_TYPES,