@claritylabs/cl-sdk 0.8.1 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -623
- package/dist/index.d.mts +865 -65
- package/dist/index.d.ts +865 -65
- package/dist/index.js +1536 -386
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +1530 -386
- package/dist/index.mjs.map +1 -1
- package/dist/storage-sqlite.d.mts +114 -24
- package/dist/storage-sqlite.d.ts +114 -24
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -29,7 +29,12 @@ __export(index_exports, {
|
|
|
29
29
|
AdmittedStatusSchema: () => AdmittedStatusSchema,
|
|
30
30
|
AnswerParsingResultSchema: () => AnswerParsingResultSchema,
|
|
31
31
|
ApplicationClassifyResultSchema: () => ApplicationClassifyResultSchema,
|
|
32
|
+
ApplicationEmailReviewSchema: () => ApplicationEmailReviewSchema,
|
|
32
33
|
ApplicationFieldSchema: () => ApplicationFieldSchema,
|
|
34
|
+
ApplicationQualityArtifactSchema: () => ApplicationQualityArtifactSchema,
|
|
35
|
+
ApplicationQualityIssueSchema: () => ApplicationQualityIssueSchema,
|
|
36
|
+
ApplicationQualityReportSchema: () => ApplicationQualityReportSchema,
|
|
37
|
+
ApplicationQualityRoundSchema: () => ApplicationQualityRoundSchema,
|
|
33
38
|
ApplicationStateSchema: () => ApplicationStateSchema,
|
|
34
39
|
AuditTypeSchema: () => AuditTypeSchema,
|
|
35
40
|
AutoFillMatchSchema: () => AutoFillMatchSchema,
|
|
@@ -61,6 +66,7 @@ __export(index_exports, {
|
|
|
61
66
|
CoverageFormSchema: () => CoverageFormSchema,
|
|
62
67
|
CoverageSchema: () => CoverageSchema,
|
|
63
68
|
CoverageTriggerSchema: () => CoverageTriggerSchema,
|
|
69
|
+
CoverageValueTypeSchema: () => CoverageValueTypeSchema,
|
|
64
70
|
CrimeDeclarationsSchema: () => CrimeDeclarationsSchema,
|
|
65
71
|
CyberDeclarationsSchema: () => CyberDeclarationsSchema,
|
|
66
72
|
DEDUCTIBLE_TYPES: () => DEDUCTIBLE_TYPES,
|
|
@@ -730,7 +736,9 @@ var FormReferenceSchema = import_zod3.z.object({
|
|
|
730
736
|
formNumber: import_zod3.z.string(),
|
|
731
737
|
editionDate: import_zod3.z.string().optional(),
|
|
732
738
|
title: import_zod3.z.string().optional(),
|
|
733
|
-
formType: import_zod3.z.enum(["coverage", "endorsement", "declarations", "application", "notice", "other"])
|
|
739
|
+
formType: import_zod3.z.enum(["coverage", "endorsement", "declarations", "application", "notice", "other"]),
|
|
740
|
+
pageStart: import_zod3.z.number().optional(),
|
|
741
|
+
pageEnd: import_zod3.z.number().optional()
|
|
734
742
|
});
|
|
735
743
|
var TaxFeeItemSchema = import_zod3.z.object({
|
|
736
744
|
name: import_zod3.z.string(),
|
|
@@ -767,12 +775,25 @@ var NamedInsuredSchema = import_zod3.z.object({
|
|
|
767
775
|
|
|
768
776
|
// src/schemas/coverage.ts
|
|
769
777
|
var import_zod4 = require("zod");
|
|
778
|
+
var CoverageValueTypeSchema = import_zod4.z.enum([
|
|
779
|
+
"numeric",
|
|
780
|
+
"included",
|
|
781
|
+
"not_included",
|
|
782
|
+
"as_stated",
|
|
783
|
+
"waiting_period",
|
|
784
|
+
"referential",
|
|
785
|
+
"other"
|
|
786
|
+
]);
|
|
770
787
|
var CoverageSchema = import_zod4.z.object({
|
|
771
788
|
name: import_zod4.z.string(),
|
|
772
789
|
limit: import_zod4.z.string(),
|
|
790
|
+
limitValueType: CoverageValueTypeSchema.optional(),
|
|
773
791
|
deductible: import_zod4.z.string().optional(),
|
|
792
|
+
deductibleValueType: CoverageValueTypeSchema.optional(),
|
|
793
|
+
formNumber: import_zod4.z.string().optional(),
|
|
774
794
|
pageNumber: import_zod4.z.number().optional(),
|
|
775
|
-
sectionRef: import_zod4.z.string().optional()
|
|
795
|
+
sectionRef: import_zod4.z.string().optional(),
|
|
796
|
+
originalContent: import_zod4.z.string().optional()
|
|
776
797
|
});
|
|
777
798
|
var EnrichedCoverageSchema = import_zod4.z.object({
|
|
778
799
|
name: import_zod4.z.string(),
|
|
@@ -781,8 +802,10 @@ var EnrichedCoverageSchema = import_zod4.z.object({
|
|
|
781
802
|
formEditionDate: import_zod4.z.string().optional(),
|
|
782
803
|
limit: import_zod4.z.string(),
|
|
783
804
|
limitType: LimitTypeSchema.optional(),
|
|
805
|
+
limitValueType: CoverageValueTypeSchema.optional(),
|
|
784
806
|
deductible: import_zod4.z.string().optional(),
|
|
785
807
|
deductibleType: DeductibleTypeSchema.optional(),
|
|
808
|
+
deductibleValueType: CoverageValueTypeSchema.optional(),
|
|
786
809
|
sir: import_zod4.z.string().optional(),
|
|
787
810
|
sublimit: import_zod4.z.string().optional(),
|
|
788
811
|
coinsurance: import_zod4.z.string().optional(),
|
|
@@ -793,7 +816,8 @@ var EnrichedCoverageSchema = import_zod4.z.object({
|
|
|
793
816
|
included: import_zod4.z.boolean(),
|
|
794
817
|
premium: import_zod4.z.string().optional(),
|
|
795
818
|
pageNumber: import_zod4.z.number().optional(),
|
|
796
|
-
sectionRef: import_zod4.z.string().optional()
|
|
819
|
+
sectionRef: import_zod4.z.string().optional(),
|
|
820
|
+
originalContent: import_zod4.z.string().optional()
|
|
797
821
|
});
|
|
798
822
|
|
|
799
823
|
// src/schemas/endorsement.ts
|
|
@@ -1802,6 +1826,7 @@ function assembleDocument(documentId, documentType, memory) {
|
|
|
1802
1826
|
const lossHistory = memory.get("loss_history");
|
|
1803
1827
|
const sections = memory.get("sections");
|
|
1804
1828
|
const supplementary = memory.get("supplementary");
|
|
1829
|
+
const formInventory = memory.get("form_inventory");
|
|
1805
1830
|
const classify = memory.get("classify");
|
|
1806
1831
|
const base = {
|
|
1807
1832
|
id: documentId,
|
|
@@ -1818,6 +1843,7 @@ function assembleDocument(documentId, documentType, memory) {
|
|
|
1818
1843
|
exclusions: exclusions?.exclusions,
|
|
1819
1844
|
conditions: conditions?.conditions,
|
|
1820
1845
|
sections: sections?.sections,
|
|
1846
|
+
formInventory: formInventory?.forms,
|
|
1821
1847
|
declarations: declarations ? sanitizeNulls(declarations) : void 0,
|
|
1822
1848
|
...sanitizeNulls(lossHistory ?? {})
|
|
1823
1849
|
};
|
|
@@ -2059,6 +2085,11 @@ async function formatDocumentContent(doc, generateText, options) {
|
|
|
2059
2085
|
function chunkDocument(doc) {
|
|
2060
2086
|
const chunks = [];
|
|
2061
2087
|
const docId = doc.id;
|
|
2088
|
+
function stringMetadata(entries) {
|
|
2089
|
+
return Object.fromEntries(
|
|
2090
|
+
Object.entries(entries).filter(([, value]) => value !== void 0 && value !== null && String(value).length > 0).map(([key, value]) => [key, String(value)])
|
|
2091
|
+
);
|
|
2092
|
+
}
|
|
2062
2093
|
chunks.push({
|
|
2063
2094
|
id: `${docId}:carrier_info:0`,
|
|
2064
2095
|
documentId: docId,
|
|
@@ -2070,7 +2101,7 @@ function chunkDocument(doc) {
|
|
|
2070
2101
|
doc.carrierAmBestRating ? `AM Best: ${doc.carrierAmBestRating}` : null,
|
|
2071
2102
|
doc.mga ? `MGA: ${doc.mga}` : null
|
|
2072
2103
|
].filter(Boolean).join("\n"),
|
|
2073
|
-
metadata: { carrier: doc.carrier, documentType: doc.type }
|
|
2104
|
+
metadata: stringMetadata({ carrier: doc.carrier, documentType: doc.type })
|
|
2074
2105
|
});
|
|
2075
2106
|
chunks.push({
|
|
2076
2107
|
id: `${docId}:named_insured:0`,
|
|
@@ -2082,17 +2113,32 @@ function chunkDocument(doc) {
|
|
|
2082
2113
|
doc.insuredFein ? `FEIN: ${doc.insuredFein}` : null,
|
|
2083
2114
|
doc.insuredAddress ? `Address: ${doc.insuredAddress.street1}, ${doc.insuredAddress.city}, ${doc.insuredAddress.state} ${doc.insuredAddress.zip}` : null
|
|
2084
2115
|
].filter(Boolean).join("\n"),
|
|
2085
|
-
metadata: { insuredName: doc.insuredName, documentType: doc.type }
|
|
2116
|
+
metadata: stringMetadata({ insuredName: doc.insuredName, documentType: doc.type })
|
|
2086
2117
|
});
|
|
2087
2118
|
doc.coverages.forEach((cov, i) => {
|
|
2088
2119
|
chunks.push({
|
|
2089
2120
|
id: `${docId}:coverage:${i}`,
|
|
2090
2121
|
documentId: docId,
|
|
2091
2122
|
type: "coverage",
|
|
2092
|
-
text:
|
|
2093
|
-
|
|
2094
|
-
|
|
2095
|
-
|
|
2123
|
+
text: [
|
|
2124
|
+
`Coverage: ${cov.name}`,
|
|
2125
|
+
`Limit: ${cov.limit}`,
|
|
2126
|
+
cov.limitValueType ? `Limit Type: ${cov.limitValueType}` : null,
|
|
2127
|
+
cov.deductible ? `Deductible: ${cov.deductible}` : null,
|
|
2128
|
+
cov.deductibleValueType ? `Deductible Type: ${cov.deductibleValueType}` : null,
|
|
2129
|
+
cov.originalContent ? `Source: ${cov.originalContent}` : null
|
|
2130
|
+
].filter(Boolean).join("\n"),
|
|
2131
|
+
metadata: stringMetadata({
|
|
2132
|
+
coverageName: cov.name,
|
|
2133
|
+
limit: cov.limit,
|
|
2134
|
+
limitValueType: cov.limitValueType,
|
|
2135
|
+
deductible: cov.deductible,
|
|
2136
|
+
deductibleValueType: cov.deductibleValueType,
|
|
2137
|
+
formNumber: cov.formNumber,
|
|
2138
|
+
pageNumber: cov.pageNumber,
|
|
2139
|
+
sectionRef: cov.sectionRef,
|
|
2140
|
+
documentType: doc.type
|
|
2141
|
+
})
|
|
2096
2142
|
});
|
|
2097
2143
|
});
|
|
2098
2144
|
doc.endorsements?.forEach((end, i) => {
|
|
@@ -2102,7 +2148,13 @@ Deductible: ${cov.deductible}` : ""}`,
|
|
|
2102
2148
|
type: "endorsement",
|
|
2103
2149
|
text: `Endorsement: ${end.title}
|
|
2104
2150
|
${end.content}`.trim(),
|
|
2105
|
-
metadata: {
|
|
2151
|
+
metadata: stringMetadata({
|
|
2152
|
+
endorsementType: end.endorsementType,
|
|
2153
|
+
formNumber: end.formNumber,
|
|
2154
|
+
pageStart: end.pageStart,
|
|
2155
|
+
pageEnd: end.pageEnd,
|
|
2156
|
+
documentType: doc.type
|
|
2157
|
+
})
|
|
2106
2158
|
});
|
|
2107
2159
|
});
|
|
2108
2160
|
doc.exclusions?.forEach((exc, i) => {
|
|
@@ -2112,7 +2164,7 @@ ${end.content}`.trim(),
|
|
|
2112
2164
|
type: "exclusion",
|
|
2113
2165
|
text: `Exclusion: ${exc.name}
|
|
2114
2166
|
${exc.content}`.trim(),
|
|
2115
|
-
metadata: { documentType: doc.type }
|
|
2167
|
+
metadata: stringMetadata({ formNumber: exc.formNumber, pageNumber: exc.pageNumber, documentType: doc.type })
|
|
2116
2168
|
});
|
|
2117
2169
|
});
|
|
2118
2170
|
doc.sections?.forEach((sec, i) => {
|
|
@@ -2122,7 +2174,7 @@ ${exc.content}`.trim(),
|
|
|
2122
2174
|
type: "section",
|
|
2123
2175
|
text: `Section: ${sec.title}
|
|
2124
2176
|
${sec.content}`,
|
|
2125
|
-
metadata: { sectionType: sec.type, documentType: doc.type }
|
|
2177
|
+
metadata: stringMetadata({ sectionType: sec.type, pageStart: sec.pageStart, pageEnd: sec.pageEnd, documentType: doc.type })
|
|
2126
2178
|
});
|
|
2127
2179
|
});
|
|
2128
2180
|
if (doc.premium) {
|
|
@@ -2132,12 +2184,138 @@ ${sec.content}`,
|
|
|
2132
2184
|
type: "premium",
|
|
2133
2185
|
text: `Premium: ${doc.premium}${doc.totalCost ? `
|
|
2134
2186
|
Total Cost: ${doc.totalCost}` : ""}`,
|
|
2135
|
-
metadata: { premium: doc.premium, documentType: doc.type }
|
|
2187
|
+
metadata: stringMetadata({ premium: doc.premium, documentType: doc.type })
|
|
2136
2188
|
});
|
|
2137
2189
|
}
|
|
2138
2190
|
return chunks;
|
|
2139
2191
|
}
|
|
2140
2192
|
|
|
2193
|
+
// src/extraction/merge.ts
|
|
2194
|
+
function isPresent(value) {
|
|
2195
|
+
if (value === void 0 || value === null) return false;
|
|
2196
|
+
if (typeof value === "string") return value.trim().length > 0;
|
|
2197
|
+
if (Array.isArray(value)) return value.length > 0;
|
|
2198
|
+
return true;
|
|
2199
|
+
}
|
|
2200
|
+
function dedupeByKey(items, keyFn) {
|
|
2201
|
+
const seen = /* @__PURE__ */ new Set();
|
|
2202
|
+
const merged = [];
|
|
2203
|
+
for (const item of items) {
|
|
2204
|
+
const key = keyFn(item);
|
|
2205
|
+
if (seen.has(key)) continue;
|
|
2206
|
+
seen.add(key);
|
|
2207
|
+
merged.push(item);
|
|
2208
|
+
}
|
|
2209
|
+
return merged;
|
|
2210
|
+
}
|
|
2211
|
+
function mergeUniqueObjects(existing, incoming, keyFn) {
|
|
2212
|
+
return dedupeByKey([...existing, ...incoming], keyFn);
|
|
2213
|
+
}
|
|
2214
|
+
function mergeShallowPreferPresent(existing, incoming) {
|
|
2215
|
+
const merged = { ...existing };
|
|
2216
|
+
for (const [key, value] of Object.entries(incoming)) {
|
|
2217
|
+
const current = merged[key];
|
|
2218
|
+
if (Array.isArray(current) && Array.isArray(value)) {
|
|
2219
|
+
merged[key] = [...current, ...value];
|
|
2220
|
+
continue;
|
|
2221
|
+
}
|
|
2222
|
+
if (current && value && typeof current === "object" && typeof value === "object" && !Array.isArray(current) && !Array.isArray(value)) {
|
|
2223
|
+
merged[key] = mergeShallowPreferPresent(
|
|
2224
|
+
current,
|
|
2225
|
+
value
|
|
2226
|
+
);
|
|
2227
|
+
continue;
|
|
2228
|
+
}
|
|
2229
|
+
if (!isPresent(current) && isPresent(value)) {
|
|
2230
|
+
merged[key] = value;
|
|
2231
|
+
}
|
|
2232
|
+
}
|
|
2233
|
+
return merged;
|
|
2234
|
+
}
|
|
2235
|
+
function mergeCoverageLimits(existing, incoming) {
|
|
2236
|
+
const merged = mergeShallowPreferPresent(existing, incoming);
|
|
2237
|
+
const existingCoverages = Array.isArray(existing.coverages) ? existing.coverages : [];
|
|
2238
|
+
const incomingCoverages = Array.isArray(incoming.coverages) ? incoming.coverages : [];
|
|
2239
|
+
const coverageKey = (coverage) => [
|
|
2240
|
+
String(coverage.name ?? "").toLowerCase(),
|
|
2241
|
+
String(coverage.limit ?? "").toLowerCase(),
|
|
2242
|
+
String(coverage.deductible ?? "").toLowerCase(),
|
|
2243
|
+
String(coverage.formNumber ?? "").toLowerCase()
|
|
2244
|
+
].join("|");
|
|
2245
|
+
const byKey = /* @__PURE__ */ new Map();
|
|
2246
|
+
for (const coverage of [...existingCoverages, ...incomingCoverages]) {
|
|
2247
|
+
const key = coverageKey(coverage);
|
|
2248
|
+
const current = byKey.get(key);
|
|
2249
|
+
byKey.set(key, current ? mergeShallowPreferPresent(current, coverage) : coverage);
|
|
2250
|
+
}
|
|
2251
|
+
merged.coverages = [...byKey.values()];
|
|
2252
|
+
return merged;
|
|
2253
|
+
}
|
|
2254
|
+
function mergeDeclarations(existing, incoming) {
|
|
2255
|
+
const merged = mergeShallowPreferPresent(existing, incoming);
|
|
2256
|
+
const existingFields = Array.isArray(existing.fields) ? existing.fields : [];
|
|
2257
|
+
const incomingFields = Array.isArray(incoming.fields) ? incoming.fields : [];
|
|
2258
|
+
merged.fields = mergeUniqueObjects(existingFields, incomingFields, (field) => [
|
|
2259
|
+
String(field.field ?? "").toLowerCase(),
|
|
2260
|
+
String(field.value ?? "").toLowerCase(),
|
|
2261
|
+
String(field.section ?? "").toLowerCase()
|
|
2262
|
+
].join("|"));
|
|
2263
|
+
return merged;
|
|
2264
|
+
}
|
|
2265
|
+
function mergeArrayPayload(existing, incoming, arrayKey, keyFn) {
|
|
2266
|
+
const merged = mergeShallowPreferPresent(existing, incoming);
|
|
2267
|
+
const existingItems = Array.isArray(existing[arrayKey]) ? existing[arrayKey] : [];
|
|
2268
|
+
const incomingItems = Array.isArray(incoming[arrayKey]) ? incoming[arrayKey] : [];
|
|
2269
|
+
merged[arrayKey] = mergeUniqueObjects(existingItems, incomingItems, keyFn);
|
|
2270
|
+
return merged;
|
|
2271
|
+
}
|
|
2272
|
+
function mergeExtractorResult(extractorName, existing, incoming) {
|
|
2273
|
+
if (!existing) return incoming;
|
|
2274
|
+
if (!incoming) return existing;
|
|
2275
|
+
if (typeof existing !== "object" || typeof incoming !== "object") return incoming;
|
|
2276
|
+
const current = existing;
|
|
2277
|
+
const next = incoming;
|
|
2278
|
+
switch (extractorName) {
|
|
2279
|
+
case "carrier_info":
|
|
2280
|
+
case "named_insured":
|
|
2281
|
+
case "loss_history":
|
|
2282
|
+
case "supplementary":
|
|
2283
|
+
case "premium_breakdown":
|
|
2284
|
+
return mergeShallowPreferPresent(current, next);
|
|
2285
|
+
case "coverage_limits":
|
|
2286
|
+
return mergeCoverageLimits(current, next);
|
|
2287
|
+
case "declarations":
|
|
2288
|
+
return mergeDeclarations(current, next);
|
|
2289
|
+
case "endorsements":
|
|
2290
|
+
return mergeArrayPayload(current, next, "endorsements", (item) => [
|
|
2291
|
+
String(item.formNumber ?? "").toLowerCase(),
|
|
2292
|
+
String(item.title ?? "").toLowerCase(),
|
|
2293
|
+
String(item.pageStart ?? "")
|
|
2294
|
+
].join("|"));
|
|
2295
|
+
case "exclusions":
|
|
2296
|
+
return mergeArrayPayload(current, next, "exclusions", (item) => [
|
|
2297
|
+
String(item.name ?? "").toLowerCase(),
|
|
2298
|
+
String(item.formNumber ?? "").toLowerCase(),
|
|
2299
|
+
String(item.pageNumber ?? "")
|
|
2300
|
+
].join("|"));
|
|
2301
|
+
case "conditions":
|
|
2302
|
+
return mergeArrayPayload(current, next, "conditions", (item) => [
|
|
2303
|
+
String(item.name ?? "").toLowerCase(),
|
|
2304
|
+
String(item.conditionType ?? "").toLowerCase(),
|
|
2305
|
+
String(item.pageNumber ?? "")
|
|
2306
|
+
].join("|"));
|
|
2307
|
+
case "sections":
|
|
2308
|
+
return mergeArrayPayload(current, next, "sections", (item) => [
|
|
2309
|
+
String(item.title ?? "").toLowerCase(),
|
|
2310
|
+
String(item.type ?? "").toLowerCase(),
|
|
2311
|
+
String(item.pageStart ?? ""),
|
|
2312
|
+
String(item.pageEnd ?? "")
|
|
2313
|
+
].join("|"));
|
|
2314
|
+
default:
|
|
2315
|
+
return mergeShallowPreferPresent(current, next);
|
|
2316
|
+
}
|
|
2317
|
+
}
|
|
2318
|
+
|
|
2141
2319
|
// src/prompts/templates/homeowners.ts
|
|
2142
2320
|
var HOMEOWNERS_TEMPLATE = {
|
|
2143
2321
|
type: "homeowners",
|
|
@@ -2927,74 +3105,156 @@ Return JSON only:
|
|
|
2927
3105
|
}`;
|
|
2928
3106
|
}
|
|
2929
3107
|
|
|
2930
|
-
// src/prompts/coordinator/
|
|
3108
|
+
// src/prompts/coordinator/form-inventory.ts
|
|
2931
3109
|
var import_zod19 = require("zod");
|
|
2932
|
-
var
|
|
2933
|
-
|
|
2934
|
-
|
|
2935
|
-
|
|
2936
|
-
description: import_zod19.z.string()
|
|
3110
|
+
var FormInventoryEntrySchema = FormReferenceSchema.extend({
|
|
3111
|
+
formNumber: FormReferenceSchema.shape.formNumber.describe("Form number or identifier, e.g. PR5070CF"),
|
|
3112
|
+
pageStart: FormReferenceSchema.shape.pageStart.describe("Original document page where the form begins"),
|
|
3113
|
+
pageEnd: FormReferenceSchema.shape.pageEnd.describe("Original document page where the form ends")
|
|
2937
3114
|
});
|
|
2938
|
-
var
|
|
2939
|
-
|
|
2940
|
-
pages: import_zod19.z.string()
|
|
3115
|
+
var FormInventorySchema = import_zod19.z.object({
|
|
3116
|
+
forms: import_zod19.z.array(FormInventoryEntrySchema)
|
|
2941
3117
|
});
|
|
2942
|
-
|
|
2943
|
-
|
|
2944
|
-
pageMap: import_zod19.z.array(PageMapEntrySchema).optional()
|
|
2945
|
-
});
|
|
2946
|
-
function buildPlanPrompt(templateHints) {
|
|
2947
|
-
return `You are planning the extraction of an insurance document. You have already classified this document. Now scan the full document and create a page map + extraction plan.
|
|
3118
|
+
function buildFormInventoryPrompt(templateHints) {
|
|
3119
|
+
return `You are building a form inventory for an insurance document.
|
|
2948
3120
|
|
|
2949
3121
|
DOCUMENT TYPE HINTS:
|
|
2950
3122
|
${templateHints}
|
|
2951
3123
|
|
|
2952
|
-
|
|
3124
|
+
Extract every distinct declarations page set, policy form, coverage form, endorsement, application form, and notice form that appears in the document.
|
|
3125
|
+
|
|
3126
|
+
For EACH form, extract:
|
|
3127
|
+
- formNumber: REQUIRED when present
|
|
3128
|
+
- editionDate: if shown
|
|
3129
|
+
- title: if shown
|
|
3130
|
+
- formType: one of coverage, endorsement, declarations, application, notice, other
|
|
3131
|
+
- pageStart: original page where the form begins
|
|
3132
|
+
- pageEnd: original page where the form ends
|
|
3133
|
+
|
|
3134
|
+
Critical rules:
|
|
3135
|
+
- Include declarations page sets even if they do not show a standard form number.
|
|
3136
|
+
- Use original document page numbers, not local chunk page numbers.
|
|
3137
|
+
- Do not emit duplicate entries for repeated headers/footers.
|
|
3138
|
+
- Multi-page forms should be represented once with pageStart/pageEnd covering the full span when visible.
|
|
3139
|
+
- If a form number is visible in endorsements, schedules, or form headers, include it even if the full form title is partial.
|
|
3140
|
+
|
|
3141
|
+
Respond with JSON only.`;
|
|
3142
|
+
}
|
|
3143
|
+
|
|
3144
|
+
// src/prompts/coordinator/page-map.ts
|
|
3145
|
+
var import_zod20 = require("zod");
|
|
3146
|
+
var PageExtractorSchema = import_zod20.z.enum([
|
|
3147
|
+
"carrier_info",
|
|
3148
|
+
"named_insured",
|
|
3149
|
+
"coverage_limits",
|
|
3150
|
+
"endorsements",
|
|
3151
|
+
"exclusions",
|
|
3152
|
+
"conditions",
|
|
3153
|
+
"premium_breakdown",
|
|
3154
|
+
"declarations",
|
|
3155
|
+
"loss_history",
|
|
3156
|
+
"sections",
|
|
3157
|
+
"supplementary"
|
|
3158
|
+
]);
|
|
3159
|
+
var PageAssignmentSchema = import_zod20.z.object({
|
|
3160
|
+
localPageNumber: import_zod20.z.number().int().positive().describe("1-based page number within this supplied PDF chunk"),
|
|
3161
|
+
extractorNames: import_zod20.z.array(PageExtractorSchema).describe("Focused extractors that should inspect this page"),
|
|
3162
|
+
pageRole: import_zod20.z.enum([
|
|
3163
|
+
"declarations_schedule",
|
|
3164
|
+
"endorsement_schedule",
|
|
3165
|
+
"policy_form",
|
|
3166
|
+
"endorsement_form",
|
|
3167
|
+
"condition_exclusion_form",
|
|
3168
|
+
"supplementary",
|
|
3169
|
+
"other"
|
|
3170
|
+
]).optional().describe("Primary role of the page"),
|
|
3171
|
+
hasScheduleValues: import_zod20.z.boolean().optional().describe("True only when the page contains insured-specific declaration or schedule values, tables, or rows to extract"),
|
|
3172
|
+
confidence: import_zod20.z.number().min(0).max(1).optional().describe("Confidence in the page assignment"),
|
|
3173
|
+
notes: import_zod20.z.string().optional().describe("Short explanation of what appears on the page")
|
|
3174
|
+
});
|
|
3175
|
+
var PageMapChunkSchema = import_zod20.z.object({
|
|
3176
|
+
pages: import_zod20.z.array(PageAssignmentSchema)
|
|
3177
|
+
});
|
|
3178
|
+
function buildPageMapPrompt(templateHints, startPage, endPage, formInventoryHint) {
|
|
3179
|
+
const inventoryBlock = formInventoryHint ? `
|
|
3180
|
+
FORM INVENTORY (already identified \u2014 use this to constrain your assignments):
|
|
3181
|
+
${formInventoryHint}
|
|
3182
|
+
` : "";
|
|
3183
|
+
return `You are mapping insurance document pages to focused extractors.
|
|
3184
|
+
|
|
3185
|
+
These supplied pages are ORIGINAL DOCUMENT PAGES ${startPage}-${endPage}.
|
|
3186
|
+
|
|
3187
|
+
DOCUMENT TYPE HINTS:
|
|
3188
|
+
${templateHints}
|
|
3189
|
+
${inventoryBlock}
|
|
3190
|
+
For each page in this supplied PDF chunk, decide which extractor(s) should inspect it.
|
|
2953
3191
|
|
|
2954
3192
|
Available extractors:
|
|
2955
|
-
- carrier_info
|
|
2956
|
-
- named_insured
|
|
2957
|
-
- coverage_limits
|
|
2958
|
-
- endorsements
|
|
2959
|
-
- exclusions
|
|
2960
|
-
- conditions
|
|
2961
|
-
- premium_breakdown
|
|
2962
|
-
- declarations
|
|
2963
|
-
- loss_history
|
|
2964
|
-
- sections
|
|
2965
|
-
- supplementary
|
|
3193
|
+
- carrier_info
|
|
3194
|
+
- named_insured
|
|
3195
|
+
- coverage_limits
|
|
3196
|
+
- endorsements
|
|
3197
|
+
- exclusions
|
|
3198
|
+
- conditions
|
|
3199
|
+
- premium_breakdown
|
|
3200
|
+
- declarations
|
|
3201
|
+
- loss_history
|
|
3202
|
+
- sections
|
|
3203
|
+
- supplementary
|
|
3204
|
+
|
|
3205
|
+
Rules:
|
|
3206
|
+
- Use specific extractors for declarations, schedules, endorsements, exclusions, conditions, premium pages, and loss runs.
|
|
3207
|
+
- Use "sections" for pages that contain substantive policy text or mixed content that should still be preserved as raw sections.
|
|
3208
|
+
- Avoid assigning broad ranges mentally; decide page by page.
|
|
3209
|
+
- A page may map to multiple extractors if it legitimately contains multiple relevant sections.
|
|
3210
|
+
- Prefer declarations and schedules for numeric limits/deductibles over later generic form wording.
|
|
3211
|
+
- Assign "coverage_limits" only when the page itself contains insured-specific declaration or schedule values to capture, such as location/building rows, coverage tables, limits, deductibles, coinsurance percentages, or scheduled amounts tied to this policy.
|
|
3212
|
+
- Do NOT assign "coverage_limits" for generic policy-form or endorsement text that merely explains how limits, deductibles, waiting periods, or coinsurance work, or that says values are "shown in the declarations", "shown in the schedule", "as stated", or "if applicable".
|
|
3213
|
+
- Headings like "Limits of Insurance", "Deductible", "Coinsurance", "Loss Conditions", or "Definitions" inside a policy form usually indicate form language, not declarations or schedules.
|
|
3214
|
+
- Continuation pages near the end of a form should stay mapped to "sections" plus "conditions"/"exclusions" when applicable, even if they mention limits or deductibles.
|
|
3215
|
+
- When a form inventory entry identifies a page range as a specific form type (e.g., endorsement, coverage, application), use that classification to guide your extractor choice. Do not assign "coverage_limits" to pages the inventory identifies as endorsement or condition/exclusion forms unless the page contains actual schedule values.
|
|
3216
|
+
- Return every page in the supplied chunk exactly once.
|
|
2966
3217
|
|
|
2967
3218
|
Return JSON:
|
|
2968
3219
|
{
|
|
2969
|
-
"
|
|
2970
|
-
{
|
|
2971
|
-
|
|
2972
|
-
|
|
2973
|
-
|
|
2974
|
-
|
|
2975
|
-
|
|
3220
|
+
"pages": [
|
|
3221
|
+
{
|
|
3222
|
+
"localPageNumber": 1,
|
|
3223
|
+
"extractorNames": ["declarations", "carrier_info", "named_insured", "coverage_limits"],
|
|
3224
|
+
"pageRole": "declarations_schedule",
|
|
3225
|
+
"hasScheduleValues": true,
|
|
3226
|
+
"confidence": 0.96,
|
|
3227
|
+
"notes": "Declarations page with insured, policy period, and scheduled limits"
|
|
3228
|
+
}
|
|
2976
3229
|
]
|
|
2977
3230
|
}
|
|
2978
3231
|
|
|
2979
|
-
Create tasks that cover the entire document. Prefer specific extractors over generic "sections" where possible. Keep page ranges tight \u2014 only include pages relevant to each extractor.
|
|
2980
|
-
|
|
2981
3232
|
Respond with JSON only.`;
|
|
2982
3233
|
}
|
|
3234
|
+
function formatFormInventoryForPageMap(forms) {
|
|
3235
|
+
if (forms.length === 0) return "";
|
|
3236
|
+
return forms.filter((f) => f.pageStart != null).map((f) => {
|
|
3237
|
+
const range = f.pageEnd && f.pageEnd !== f.pageStart ? `pages ${f.pageStart}-${f.pageEnd}` : `page ${f.pageStart}`;
|
|
3238
|
+
const title = f.title ? ` "${f.title}"` : "";
|
|
3239
|
+
return `- ${f.formNumber}${title} [${f.formType}] \u2192 ${range}`;
|
|
3240
|
+
}).join("\n");
|
|
3241
|
+
}
|
|
2983
3242
|
|
|
2984
3243
|
// src/prompts/coordinator/review.ts
|
|
2985
|
-
var
|
|
2986
|
-
var ReviewResultSchema =
|
|
2987
|
-
complete:
|
|
2988
|
-
missingFields:
|
|
2989
|
-
|
|
2990
|
-
|
|
2991
|
-
|
|
2992
|
-
|
|
2993
|
-
|
|
3244
|
+
var import_zod21 = require("zod");
|
|
3245
|
+
var ReviewResultSchema = import_zod21.z.object({
|
|
3246
|
+
complete: import_zod21.z.boolean(),
|
|
3247
|
+
missingFields: import_zod21.z.array(import_zod21.z.string()),
|
|
3248
|
+
qualityIssues: import_zod21.z.array(import_zod21.z.string()).optional(),
|
|
3249
|
+
additionalTasks: import_zod21.z.array(import_zod21.z.object({
|
|
3250
|
+
extractorName: import_zod21.z.string(),
|
|
3251
|
+
startPage: import_zod21.z.number(),
|
|
3252
|
+
endPage: import_zod21.z.number(),
|
|
3253
|
+
description: import_zod21.z.string()
|
|
2994
3254
|
}))
|
|
2995
3255
|
});
|
|
2996
|
-
function buildReviewPrompt(templateExpected, extractedKeys) {
|
|
2997
|
-
return `You are reviewing an extraction for completeness. Compare what was expected vs what was found.
|
|
3256
|
+
function buildReviewPrompt(templateExpected, extractedKeys, extractionSummary, pageMapSummary) {
|
|
3257
|
+
return `You are reviewing an extraction for completeness and quality. Compare what was expected vs what was found.
|
|
2998
3258
|
|
|
2999
3259
|
EXPECTED FIELDS (from document type template):
|
|
3000
3260
|
${templateExpected.map((f) => `- ${f}`).join("\n")}
|
|
@@ -3002,40 +3262,55 @@ ${templateExpected.map((f) => `- ${f}`).join("\n")}
|
|
|
3002
3262
|
FIELDS ALREADY EXTRACTED:
|
|
3003
3263
|
${extractedKeys.map((f) => `- ${f}`).join("\n")}
|
|
3004
3264
|
|
|
3265
|
+
PAGE MAP SUMMARY:
|
|
3266
|
+
${pageMapSummary}
|
|
3267
|
+
|
|
3268
|
+
CURRENT EXTRACTION SUMMARY:
|
|
3269
|
+
${extractionSummary}
|
|
3270
|
+
|
|
3005
3271
|
Determine:
|
|
3006
|
-
1. Is the extraction complete enough?
|
|
3272
|
+
1. Is the extraction complete enough?
|
|
3007
3273
|
2. What fields are missing?
|
|
3008
|
-
3.
|
|
3274
|
+
3. What quality issues are present?
|
|
3275
|
+
4. Should any additional extraction tasks be dispatched?
|
|
3276
|
+
|
|
3277
|
+
Mark the extraction as NOT complete if any of these are true:
|
|
3278
|
+
- required fields are missing
|
|
3279
|
+
- extracted values are generic placeholders like "shown in declarations", "per schedule", "if applicable", "as stated"
|
|
3280
|
+
- coverage limits or deductibles appear to come from generic form language instead of declaration/schedule-specific values
|
|
3281
|
+
- page assignments suggest declaration, schedule, endorsement, exclusion, or condition pages were not actually extracted with the matching focused extractor
|
|
3282
|
+
- a focused extractor exists but returned too little substance for the relevant pages
|
|
3009
3283
|
|
|
3010
3284
|
Return JSON:
|
|
3011
3285
|
{
|
|
3012
3286
|
"complete": boolean,
|
|
3013
3287
|
"missingFields": ["field1", "field2"],
|
|
3288
|
+
"qualityIssues": ["issue 1", "issue 2"],
|
|
3014
3289
|
"additionalTasks": [
|
|
3015
3290
|
{ "extractorName": "...", "startPage": N, "endPage": N, "description": "..." }
|
|
3016
3291
|
]
|
|
3017
3292
|
}
|
|
3018
3293
|
|
|
3019
|
-
|
|
3294
|
+
Use the page map to target follow-up extraction pages precisely. Prefer narrow, declaration/schedule-focused follow-up tasks over broad page ranges.
|
|
3020
3295
|
|
|
3021
3296
|
Respond with JSON only.`;
|
|
3022
3297
|
}
|
|
3023
3298
|
|
|
3024
3299
|
// src/prompts/extractors/carrier-info.ts
|
|
3025
|
-
var
|
|
3026
|
-
var CarrierInfoSchema =
|
|
3027
|
-
carrierName:
|
|
3028
|
-
carrierLegalName:
|
|
3029
|
-
naicNumber:
|
|
3030
|
-
amBestRating:
|
|
3031
|
-
admittedStatus:
|
|
3032
|
-
mga:
|
|
3033
|
-
underwriter:
|
|
3034
|
-
policyNumber:
|
|
3035
|
-
effectiveDate:
|
|
3036
|
-
expirationDate:
|
|
3037
|
-
quoteNumber:
|
|
3038
|
-
proposedEffectiveDate:
|
|
3300
|
+
var import_zod22 = require("zod");
|
|
3301
|
+
var CarrierInfoSchema = import_zod22.z.object({
|
|
3302
|
+
carrierName: import_zod22.z.string().describe("Primary insurance company name for display"),
|
|
3303
|
+
carrierLegalName: import_zod22.z.string().optional().describe("Legal entity name of insurer"),
|
|
3304
|
+
naicNumber: import_zod22.z.string().optional().describe("NAIC company code"),
|
|
3305
|
+
amBestRating: import_zod22.z.string().optional().describe("AM Best rating, e.g. 'A+ XV'"),
|
|
3306
|
+
admittedStatus: import_zod22.z.enum(["admitted", "non_admitted", "surplus_lines"]).optional().describe("Admitted status of the carrier"),
|
|
3307
|
+
mga: import_zod22.z.string().optional().describe("Managing General Agent or Program Administrator name"),
|
|
3308
|
+
underwriter: import_zod22.z.string().optional().describe("Named individual underwriter"),
|
|
3309
|
+
policyNumber: import_zod22.z.string().optional().describe("Policy or quote reference number"),
|
|
3310
|
+
effectiveDate: import_zod22.z.string().optional().describe("Policy effective date (MM/DD/YYYY)"),
|
|
3311
|
+
expirationDate: import_zod22.z.string().optional().describe("Policy expiration date (MM/DD/YYYY)"),
|
|
3312
|
+
quoteNumber: import_zod22.z.string().optional().describe("Quote or proposal reference number"),
|
|
3313
|
+
proposedEffectiveDate: import_zod22.z.string().optional().describe("Proposed effective date for quotes (MM/DD/YYYY)")
|
|
3039
3314
|
});
|
|
3040
3315
|
function buildCarrierInfoPrompt() {
|
|
3041
3316
|
return `You are an expert insurance document analyst. Extract carrier and policy identification information from this document.
|
|
@@ -3055,18 +3330,18 @@ Return JSON only.`;
|
|
|
3055
3330
|
}
|
|
3056
3331
|
|
|
3057
3332
|
// src/prompts/extractors/named-insured.ts
|
|
3058
|
-
var
|
|
3059
|
-
var AddressSchema2 =
|
|
3060
|
-
street1:
|
|
3061
|
-
city:
|
|
3062
|
-
state:
|
|
3063
|
-
zip:
|
|
3333
|
+
var import_zod23 = require("zod");
|
|
3334
|
+
var AddressSchema2 = import_zod23.z.object({
|
|
3335
|
+
street1: import_zod23.z.string(),
|
|
3336
|
+
city: import_zod23.z.string(),
|
|
3337
|
+
state: import_zod23.z.string(),
|
|
3338
|
+
zip: import_zod23.z.string()
|
|
3064
3339
|
});
|
|
3065
|
-
var NamedInsuredSchema2 =
|
|
3066
|
-
insuredName:
|
|
3067
|
-
insuredDba:
|
|
3340
|
+
var NamedInsuredSchema2 = import_zod23.z.object({
|
|
3341
|
+
insuredName: import_zod23.z.string().describe("Name of primary named insured"),
|
|
3342
|
+
insuredDba: import_zod23.z.string().optional().describe("Doing-business-as name"),
|
|
3068
3343
|
insuredAddress: AddressSchema2.optional().describe("Primary insured mailing address"),
|
|
3069
|
-
insuredEntityType:
|
|
3344
|
+
insuredEntityType: import_zod23.z.enum([
|
|
3070
3345
|
"corporation",
|
|
3071
3346
|
"llc",
|
|
3072
3347
|
"partnership",
|
|
@@ -3079,13 +3354,13 @@ var NamedInsuredSchema2 = import_zod22.z.object({
|
|
|
3079
3354
|
"married_couple",
|
|
3080
3355
|
"other"
|
|
3081
3356
|
]).optional().describe("Legal entity type of the insured"),
|
|
3082
|
-
insuredFein:
|
|
3083
|
-
insuredSicCode:
|
|
3084
|
-
insuredNaicsCode:
|
|
3085
|
-
additionalNamedInsureds:
|
|
3086
|
-
|
|
3087
|
-
name:
|
|
3088
|
-
relationship:
|
|
3357
|
+
insuredFein: import_zod23.z.string().optional().describe("Federal Employer Identification Number"),
|
|
3358
|
+
insuredSicCode: import_zod23.z.string().optional().describe("SIC code"),
|
|
3359
|
+
insuredNaicsCode: import_zod23.z.string().optional().describe("NAICS code"),
|
|
3360
|
+
additionalNamedInsureds: import_zod23.z.array(
|
|
3361
|
+
import_zod23.z.object({
|
|
3362
|
+
name: import_zod23.z.string(),
|
|
3363
|
+
relationship: import_zod23.z.string().optional().describe("e.g. subsidiary, affiliate"),
|
|
3089
3364
|
address: AddressSchema2.optional()
|
|
3090
3365
|
})
|
|
3091
3366
|
).optional().describe("Additional named insureds listed on the policy")
|
|
@@ -3106,23 +3381,20 @@ Return JSON only.`;
|
|
|
3106
3381
|
}
|
|
3107
3382
|
|
|
3108
3383
|
// src/prompts/extractors/coverage-limits.ts
|
|
3109
|
-
var
|
|
3110
|
-
var
|
|
3111
|
-
|
|
3112
|
-
|
|
3113
|
-
|
|
3114
|
-
|
|
3115
|
-
|
|
3116
|
-
|
|
3117
|
-
formNumber: import_zod23.z.string().optional().describe("Associated form number, e.g. 'CG 00 01'")
|
|
3118
|
-
})
|
|
3119
|
-
).describe("All coverages with their limits"),
|
|
3120
|
-
coverageForm: import_zod23.z.enum(["occurrence", "claims_made", "accident"]).optional().describe("Primary coverage trigger type"),
|
|
3121
|
-
retroactiveDate: import_zod23.z.string().optional().describe("Retroactive date for claims-made policies (MM/DD/YYYY)")
|
|
3384
|
+
var import_zod24 = require("zod");
|
|
3385
|
+
var ExtractorCoverageSchema = CoverageSchema.extend({
|
|
3386
|
+
coverageCode: import_zod24.z.string().optional().describe("Coverage code or class code")
|
|
3387
|
+
});
|
|
3388
|
+
var CoverageLimitsSchema = import_zod24.z.object({
|
|
3389
|
+
coverages: import_zod24.z.array(ExtractorCoverageSchema).describe("All coverages with their limits"),
|
|
3390
|
+
coverageForm: import_zod24.z.enum(["occurrence", "claims_made", "accident"]).optional().describe("Primary coverage trigger type"),
|
|
3391
|
+
retroactiveDate: import_zod24.z.string().optional().describe("Retroactive date for claims-made policies (MM/DD/YYYY)")
|
|
3122
3392
|
});
|
|
3123
3393
|
function buildCoverageLimitsPrompt() {
|
|
3124
3394
|
return `You are an expert insurance document analyst. Extract all coverage limits and deductibles from this document.
|
|
3125
3395
|
|
|
3396
|
+
Extract only insured-specific declaration, schedule, or endorsement entries that state actual coverage terms for this policy.
|
|
3397
|
+
|
|
3126
3398
|
Focus on:
|
|
3127
3399
|
- Every coverage listed on the declarations page or coverage schedule
|
|
3128
3400
|
- Per-occurrence, aggregate, and sub-limits for each coverage
|
|
@@ -3133,20 +3405,34 @@ Focus on:
|
|
|
3133
3405
|
- Standard limit fields: per occurrence, general aggregate, products/completed ops aggregate, personal & advertising injury, fire damage, medical expense, combined single limit, BI/PD splits, umbrella each occurrence/aggregate/retention, statutory (WC), employers liability
|
|
3134
3406
|
- Defense cost treatment: inside limits, outside limits, or supplementary
|
|
3135
3407
|
|
|
3136
|
-
|
|
3408
|
+
For EACH coverage, also extract:
|
|
3409
|
+
- pageNumber: the original page number where the coverage row/value appears
|
|
3410
|
+
- sectionRef: the declarations/schedule/endorsement section heading where it appears
|
|
3411
|
+
- originalContent: the verbatim row or short source snippet used for this coverage
|
|
3412
|
+
- limitValueType: classify the limit as numeric, included, not_included, as_stated, waiting_period, referential, or other
|
|
3413
|
+
- deductibleValueType: classify the deductible/value term similarly when deductible is present
|
|
3414
|
+
|
|
3415
|
+
Critical rules:
|
|
3416
|
+
- Do not extract table-of-contents lines, index entries, headers, footers, page labels, or cross-references as coverages.
|
|
3417
|
+
- Do not create a coverage entry from generic policy-form text that only says a limit/deductible is "shown in the declarations", "shown in the Business Income Declarations", "as stated", "if applicable", or similar referential wording.
|
|
3418
|
+
- Do not treat a generic waiting period, deductible explanation, limits clause, coinsurance clause, or definitions text as a standalone coverage unless the page contains an actual policy-specific schedule row or declaration entry.
|
|
3419
|
+
- Values like "Included" or "Not Included" are valid only when they appear as an explicit declarations/schedule/endorsement entry for a named coverage. Do not infer them from narrative form language.
|
|
3420
|
+
- If a waiting period or hour deductible is shown as part of a specific declarations/schedule row, it may be captured in deductible. Otherwise omit it.
|
|
3421
|
+
- Use limitValueType or deductibleValueType to preserve non-numeric terms precisely instead of forcing them into numeric semantics.
|
|
3422
|
+
- Preserve one row per real coverage entry. Do not merge adjacent schedule rows into malformed names.
|
|
3137
3423
|
|
|
3138
3424
|
Return JSON only.`;
|
|
3139
3425
|
}
|
|
3140
3426
|
|
|
3141
3427
|
// src/prompts/extractors/endorsements.ts
|
|
3142
|
-
var
|
|
3143
|
-
var EndorsementsSchema =
|
|
3144
|
-
endorsements:
|
|
3145
|
-
|
|
3146
|
-
formNumber:
|
|
3147
|
-
editionDate:
|
|
3148
|
-
title:
|
|
3149
|
-
endorsementType:
|
|
3428
|
+
var import_zod25 = require("zod");
|
|
3429
|
+
var EndorsementsSchema = import_zod25.z.object({
|
|
3430
|
+
endorsements: import_zod25.z.array(
|
|
3431
|
+
import_zod25.z.object({
|
|
3432
|
+
formNumber: import_zod25.z.string().describe("Form number, e.g. 'CG 21 47'"),
|
|
3433
|
+
editionDate: import_zod25.z.string().optional().describe("Edition date, e.g. '12 07'"),
|
|
3434
|
+
title: import_zod25.z.string().describe("Endorsement title"),
|
|
3435
|
+
endorsementType: import_zod25.z.enum([
|
|
3150
3436
|
"additional_insured",
|
|
3151
3437
|
"waiver_of_subrogation",
|
|
3152
3438
|
"primary_noncontributory",
|
|
@@ -3166,12 +3452,12 @@ var EndorsementsSchema = import_zod24.z.object({
|
|
|
3166
3452
|
"territorial_extension",
|
|
3167
3453
|
"other"
|
|
3168
3454
|
]).describe("Endorsement type classification"),
|
|
3169
|
-
effectiveDate:
|
|
3170
|
-
affectedCoverageParts:
|
|
3171
|
-
namedParties:
|
|
3172
|
-
|
|
3173
|
-
name:
|
|
3174
|
-
role:
|
|
3455
|
+
effectiveDate: import_zod25.z.string().optional().describe("Endorsement effective date"),
|
|
3456
|
+
affectedCoverageParts: import_zod25.z.array(import_zod25.z.string()).optional().describe("Coverage parts affected by this endorsement"),
|
|
3457
|
+
namedParties: import_zod25.z.array(
|
|
3458
|
+
import_zod25.z.object({
|
|
3459
|
+
name: import_zod25.z.string().describe("Party name"),
|
|
3460
|
+
role: import_zod25.z.enum([
|
|
3175
3461
|
"additional_insured",
|
|
3176
3462
|
"loss_payee",
|
|
3177
3463
|
"mortgage_holder",
|
|
@@ -3180,15 +3466,15 @@ var EndorsementsSchema = import_zod24.z.object({
|
|
|
3180
3466
|
"designated_person",
|
|
3181
3467
|
"other"
|
|
3182
3468
|
]).describe("Party role"),
|
|
3183
|
-
relationship:
|
|
3184
|
-
scope:
|
|
3469
|
+
relationship: import_zod25.z.string().optional().describe("Relationship to insured"),
|
|
3470
|
+
scope: import_zod25.z.string().optional().describe("Scope of coverage for this party")
|
|
3185
3471
|
})
|
|
3186
3472
|
).optional().describe("Named parties (additional insureds, loss payees, etc.)"),
|
|
3187
|
-
keyTerms:
|
|
3188
|
-
premiumImpact:
|
|
3189
|
-
content:
|
|
3190
|
-
pageStart:
|
|
3191
|
-
pageEnd:
|
|
3473
|
+
keyTerms: import_zod25.z.array(import_zod25.z.string()).optional().describe("Key terms or notable provisions in the endorsement"),
|
|
3474
|
+
premiumImpact: import_zod25.z.string().optional().describe("Additional premium or credit"),
|
|
3475
|
+
content: import_zod25.z.string().describe("Full verbatim text of the endorsement"),
|
|
3476
|
+
pageStart: import_zod25.z.number().describe("Starting page number of this endorsement"),
|
|
3477
|
+
pageEnd: import_zod25.z.number().optional().describe("Ending page number of this endorsement")
|
|
3192
3478
|
})
|
|
3193
3479
|
).describe("All endorsements found in the document")
|
|
3194
3480
|
});
|
|
@@ -3219,20 +3505,20 @@ Return JSON only.`;
|
|
|
3219
3505
|
}
|
|
3220
3506
|
|
|
3221
3507
|
// src/prompts/extractors/exclusions.ts
|
|
3222
|
-
var
|
|
3223
|
-
var ExclusionsSchema =
|
|
3224
|
-
exclusions:
|
|
3225
|
-
|
|
3226
|
-
name:
|
|
3227
|
-
formNumber:
|
|
3228
|
-
excludedPerils:
|
|
3229
|
-
isAbsolute:
|
|
3230
|
-
exceptions:
|
|
3231
|
-
buybackAvailable:
|
|
3232
|
-
buybackEndorsement:
|
|
3233
|
-
appliesTo:
|
|
3234
|
-
content:
|
|
3235
|
-
pageNumber:
|
|
3508
|
+
var import_zod26 = require("zod");
|
|
3509
|
+
var ExclusionsSchema = import_zod26.z.object({
|
|
3510
|
+
exclusions: import_zod26.z.array(
|
|
3511
|
+
import_zod26.z.object({
|
|
3512
|
+
name: import_zod26.z.string().describe("Exclusion title or short description"),
|
|
3513
|
+
formNumber: import_zod26.z.string().optional().describe("Form number if part of a named endorsement"),
|
|
3514
|
+
excludedPerils: import_zod26.z.array(import_zod26.z.string()).optional().describe("Specific perils excluded"),
|
|
3515
|
+
isAbsolute: import_zod26.z.boolean().optional().describe("Whether the exclusion is absolute (no exceptions)"),
|
|
3516
|
+
exceptions: import_zod26.z.array(import_zod26.z.string()).optional().describe("Exceptions to the exclusion, if any"),
|
|
3517
|
+
buybackAvailable: import_zod26.z.boolean().optional().describe("Whether coverage can be bought back via endorsement"),
|
|
3518
|
+
buybackEndorsement: import_zod26.z.string().optional().describe("Form number of the buyback endorsement if available"),
|
|
3519
|
+
appliesTo: import_zod26.z.array(import_zod26.z.string()).optional().describe("Coverage types this exclusion applies to"),
|
|
3520
|
+
content: import_zod26.z.string().describe("Full verbatim exclusion text"),
|
|
3521
|
+
pageNumber: import_zod26.z.number().optional().describe("Page number where exclusion appears")
|
|
3236
3522
|
})
|
|
3237
3523
|
).describe("All exclusions found in the document")
|
|
3238
3524
|
});
|
|
@@ -3257,18 +3543,23 @@ Focus on:
|
|
|
3257
3543
|
- Exclusions within insuring agreements or conditions if clearly labeled
|
|
3258
3544
|
- Full verbatim exclusion text \u2014 do not summarize
|
|
3259
3545
|
|
|
3546
|
+
Critical rules:
|
|
3547
|
+
- Ignore table-of-contents entries, running headers/footers, and references that only point to another page or section.
|
|
3548
|
+
- Do not emit a standalone exclusion from a fragment unless the fragment itself contains substantive exclusion wording.
|
|
3549
|
+
- Always include pageNumber when the exclusion appears on a specific page in the supplied document chunk.
|
|
3550
|
+
|
|
3260
3551
|
Common personal lines exclusion patterns: animal liability, business pursuits, home daycare, watercraft, aircraft.
|
|
3261
3552
|
|
|
3262
3553
|
Return JSON only.`;
|
|
3263
3554
|
}
|
|
3264
3555
|
|
|
3265
3556
|
// src/prompts/extractors/conditions.ts
|
|
3266
|
-
var
|
|
3267
|
-
var ConditionsSchema =
|
|
3268
|
-
conditions:
|
|
3269
|
-
|
|
3270
|
-
name:
|
|
3271
|
-
conditionType:
|
|
3557
|
+
var import_zod27 = require("zod");
|
|
3558
|
+
var ConditionsSchema = import_zod27.z.object({
|
|
3559
|
+
conditions: import_zod27.z.array(
|
|
3560
|
+
import_zod27.z.object({
|
|
3561
|
+
name: import_zod27.z.string().describe("Condition title"),
|
|
3562
|
+
conditionType: import_zod27.z.enum([
|
|
3272
3563
|
"duties_after_loss",
|
|
3273
3564
|
"notice_requirements",
|
|
3274
3565
|
"other_insurance",
|
|
@@ -3287,14 +3578,14 @@ var ConditionsSchema = import_zod26.z.object({
|
|
|
3287
3578
|
"separation_of_insureds",
|
|
3288
3579
|
"other"
|
|
3289
3580
|
]).describe("Condition category"),
|
|
3290
|
-
content:
|
|
3291
|
-
keyValues:
|
|
3292
|
-
|
|
3293
|
-
key:
|
|
3294
|
-
value:
|
|
3581
|
+
content: import_zod27.z.string().describe("Full verbatim condition text"),
|
|
3582
|
+
keyValues: import_zod27.z.array(
|
|
3583
|
+
import_zod27.z.object({
|
|
3584
|
+
key: import_zod27.z.string().describe("Key name (e.g. 'noticePeriod', 'suitDeadline')"),
|
|
3585
|
+
value: import_zod27.z.string().describe("Value (e.g. '30 days', '2 years')")
|
|
3295
3586
|
})
|
|
3296
3587
|
).optional().describe("Key values extracted from the condition (notice periods, deadlines, etc.)"),
|
|
3297
|
-
pageNumber:
|
|
3588
|
+
pageNumber: import_zod27.z.number().optional().describe("Page number where condition appears")
|
|
3298
3589
|
})
|
|
3299
3590
|
).describe("All policy conditions found in the document")
|
|
3300
3591
|
});
|
|
@@ -3306,7 +3597,7 @@ For EACH condition, extract:
|
|
|
3306
3597
|
- conditionType: classify as one of: duties_after_loss, notice_requirements, other_insurance, cancellation, nonrenewal, transfer_of_rights, liberalization, arbitration, concealment_fraud, examination_under_oath, legal_action, loss_payment, appraisal, mortgage_holders, policy_territory, separation_of_insureds, other \u2014 REQUIRED
|
|
3307
3598
|
- content: full verbatim condition text \u2014 REQUIRED
|
|
3308
3599
|
- keyValues: extract specific values as key-value pairs (e.g. noticePeriod: "30 days", suitDeadline: "2 years")
|
|
3309
|
-
- pageNumber: page number where the condition appears
|
|
3600
|
+
- pageNumber: original document page number where the substantive condition text appears
|
|
3310
3601
|
|
|
3311
3602
|
Focus on:
|
|
3312
3603
|
- Duties after loss / notice of occurrence conditions
|
|
@@ -3323,32 +3614,37 @@ Focus on:
|
|
|
3323
3614
|
- Mortgage holders clause
|
|
3324
3615
|
- Any other named conditions
|
|
3325
3616
|
|
|
3617
|
+
Critical rules:
|
|
3618
|
+
- Ignore table-of-contents entries, section indexes, running headers/footers, and page references such as "Appraisal ..... 19".
|
|
3619
|
+
- Do not emit a condition unless the page contains substantive condition text, not just a heading or reference.
|
|
3620
|
+
- If a condition continues from a prior page, keep the substantive text together and use the page where the condition text appears in this extracted chunk.
|
|
3621
|
+
|
|
3326
3622
|
Return JSON only.`;
|
|
3327
3623
|
}
|
|
3328
3624
|
|
|
3329
3625
|
// src/prompts/extractors/premium-breakdown.ts
|
|
3330
|
-
var
|
|
3331
|
-
var PremiumBreakdownSchema =
|
|
3332
|
-
premium:
|
|
3333
|
-
totalCost:
|
|
3334
|
-
premiumBreakdown:
|
|
3335
|
-
|
|
3336
|
-
line:
|
|
3337
|
-
amount:
|
|
3626
|
+
var import_zod28 = require("zod");
|
|
3627
|
+
var PremiumBreakdownSchema = import_zod28.z.object({
|
|
3628
|
+
premium: import_zod28.z.string().optional().describe("Total premium amount, e.g. '$5,000'"),
|
|
3629
|
+
totalCost: import_zod28.z.string().optional().describe("Total cost including taxes and fees, e.g. '$5,250'"),
|
|
3630
|
+
premiumBreakdown: import_zod28.z.array(
|
|
3631
|
+
import_zod28.z.object({
|
|
3632
|
+
line: import_zod28.z.string().describe("Coverage line name"),
|
|
3633
|
+
amount: import_zod28.z.string().describe("Premium amount for this line")
|
|
3338
3634
|
})
|
|
3339
3635
|
).optional().describe("Per-coverage-line premium breakdown"),
|
|
3340
|
-
taxesAndFees:
|
|
3341
|
-
|
|
3342
|
-
name:
|
|
3343
|
-
amount:
|
|
3344
|
-
type:
|
|
3636
|
+
taxesAndFees: import_zod28.z.array(
|
|
3637
|
+
import_zod28.z.object({
|
|
3638
|
+
name: import_zod28.z.string().describe("Fee or tax name"),
|
|
3639
|
+
amount: import_zod28.z.string().describe("Dollar amount"),
|
|
3640
|
+
type: import_zod28.z.enum(["tax", "fee", "surcharge", "assessment"]).optional().describe("Fee category")
|
|
3345
3641
|
})
|
|
3346
3642
|
).optional().describe("Taxes, fees, surcharges, and assessments"),
|
|
3347
|
-
minimumPremium:
|
|
3348
|
-
depositPremium:
|
|
3349
|
-
paymentPlan:
|
|
3350
|
-
auditType:
|
|
3351
|
-
ratingBasis:
|
|
3643
|
+
minimumPremium: import_zod28.z.string().optional().describe("Minimum premium if stated"),
|
|
3644
|
+
depositPremium: import_zod28.z.string().optional().describe("Deposit premium if stated"),
|
|
3645
|
+
paymentPlan: import_zod28.z.string().optional().describe("Payment plan description"),
|
|
3646
|
+
auditType: import_zod28.z.enum(["annual", "semi_annual", "quarterly", "monthly", "final", "self"]).optional().describe("Premium audit type"),
|
|
3647
|
+
ratingBasis: import_zod28.z.string().optional().describe("Rating basis, e.g. payroll, revenue, area, units")
|
|
3352
3648
|
});
|
|
3353
3649
|
function buildPremiumBreakdownPrompt() {
|
|
3354
3650
|
return `You are an expert insurance document analyst. Extract all premium and cost information from this document.
|
|
@@ -3368,14 +3664,14 @@ Return JSON only.`;
|
|
|
3368
3664
|
}
|
|
3369
3665
|
|
|
3370
3666
|
// src/prompts/extractors/declarations.ts
|
|
3371
|
-
var
|
|
3372
|
-
var DeclarationsFieldSchema =
|
|
3373
|
-
field:
|
|
3374
|
-
value:
|
|
3375
|
-
section:
|
|
3667
|
+
var import_zod29 = require("zod");
|
|
3668
|
+
var DeclarationsFieldSchema = import_zod29.z.object({
|
|
3669
|
+
field: import_zod29.z.string().describe("Descriptive field name (e.g. 'policyNumber', 'effectiveDate', 'coverageALimit')"),
|
|
3670
|
+
value: import_zod29.z.string().describe("Extracted value exactly as it appears in the document"),
|
|
3671
|
+
section: import_zod29.z.string().optional().describe("Section or grouping this field belongs to (e.g. 'Coverage Limits', 'Vehicle Schedule')")
|
|
3376
3672
|
});
|
|
3377
|
-
var DeclarationsExtractSchema =
|
|
3378
|
-
fields:
|
|
3673
|
+
var DeclarationsExtractSchema = import_zod29.z.object({
|
|
3674
|
+
fields: import_zod29.z.array(DeclarationsFieldSchema).describe("All declarations page fields extracted as key-value pairs. Structure varies by line of business.")
|
|
3379
3675
|
});
|
|
3380
3676
|
function buildDeclarationsPrompt() {
|
|
3381
3677
|
return `You are an expert insurance document analyst. Extract all declarations page data from this document into a flexible key-value structure.
|
|
@@ -3415,21 +3711,21 @@ Preserve original values exactly as they appear. Return JSON only.`;
|
|
|
3415
3711
|
}
|
|
3416
3712
|
|
|
3417
3713
|
// src/prompts/extractors/loss-history.ts
|
|
3418
|
-
var
|
|
3419
|
-
var LossHistorySchema =
|
|
3420
|
-
lossSummary:
|
|
3421
|
-
individualClaims:
|
|
3422
|
-
|
|
3423
|
-
date:
|
|
3424
|
-
type:
|
|
3425
|
-
description:
|
|
3426
|
-
amountPaid:
|
|
3427
|
-
amountReserved:
|
|
3428
|
-
status:
|
|
3429
|
-
claimNumber:
|
|
3714
|
+
var import_zod30 = require("zod");
|
|
3715
|
+
var LossHistorySchema = import_zod30.z.object({
|
|
3716
|
+
lossSummary: import_zod30.z.string().optional().describe("Summary of loss history, e.g. '3 claims in past 5 years totaling $125,000'"),
|
|
3717
|
+
individualClaims: import_zod30.z.array(
|
|
3718
|
+
import_zod30.z.object({
|
|
3719
|
+
date: import_zod30.z.string().optional().describe("Date of loss or claim"),
|
|
3720
|
+
type: import_zod30.z.string().optional().describe("Type of claim, e.g. 'property damage', 'bodily injury'"),
|
|
3721
|
+
description: import_zod30.z.string().optional().describe("Brief description of the claim"),
|
|
3722
|
+
amountPaid: import_zod30.z.string().optional().describe("Amount paid"),
|
|
3723
|
+
amountReserved: import_zod30.z.string().optional().describe("Amount reserved"),
|
|
3724
|
+
status: import_zod30.z.enum(["open", "closed", "reopened"]).optional().describe("Claim status"),
|
|
3725
|
+
claimNumber: import_zod30.z.string().optional().describe("Claim reference number")
|
|
3430
3726
|
})
|
|
3431
3727
|
).optional().describe("Individual claim records"),
|
|
3432
|
-
experienceMod:
|
|
3728
|
+
experienceMod: import_zod30.z.string().optional().describe("Experience modification factor for workers comp, e.g. '0.85'")
|
|
3433
3729
|
});
|
|
3434
3730
|
function buildLossHistoryPrompt() {
|
|
3435
3731
|
return `You are an expert insurance document analyst. Extract all loss history and claims information from this document.
|
|
@@ -3446,18 +3742,18 @@ Return JSON only.`;
|
|
|
3446
3742
|
}
|
|
3447
3743
|
|
|
3448
3744
|
// src/prompts/extractors/sections.ts
|
|
3449
|
-
var
|
|
3450
|
-
var SubsectionSchema2 =
|
|
3451
|
-
title:
|
|
3452
|
-
sectionNumber:
|
|
3453
|
-
pageNumber:
|
|
3454
|
-
content:
|
|
3745
|
+
var import_zod31 = require("zod");
|
|
3746
|
+
var SubsectionSchema2 = import_zod31.z.object({
|
|
3747
|
+
title: import_zod31.z.string().describe("Subsection title"),
|
|
3748
|
+
sectionNumber: import_zod31.z.string().optional().describe("Subsection number"),
|
|
3749
|
+
pageNumber: import_zod31.z.number().optional().describe("Page number"),
|
|
3750
|
+
content: import_zod31.z.string().describe("Full verbatim text")
|
|
3455
3751
|
});
|
|
3456
|
-
var SectionsSchema =
|
|
3457
|
-
sections:
|
|
3458
|
-
|
|
3459
|
-
title:
|
|
3460
|
-
type:
|
|
3752
|
+
var SectionsSchema = import_zod31.z.object({
|
|
3753
|
+
sections: import_zod31.z.array(
|
|
3754
|
+
import_zod31.z.object({
|
|
3755
|
+
title: import_zod31.z.string().describe("Section title"),
|
|
3756
|
+
type: import_zod31.z.enum([
|
|
3461
3757
|
"declarations",
|
|
3462
3758
|
"insuring_agreement",
|
|
3463
3759
|
"policy_form",
|
|
@@ -3471,10 +3767,10 @@ var SectionsSchema = import_zod30.z.object({
|
|
|
3471
3767
|
"regulatory",
|
|
3472
3768
|
"other"
|
|
3473
3769
|
]).describe("Section type classification"),
|
|
3474
|
-
content:
|
|
3475
|
-
pageStart:
|
|
3476
|
-
pageEnd:
|
|
3477
|
-
subsections:
|
|
3770
|
+
content: import_zod31.z.string().describe("Full verbatim text of the section"),
|
|
3771
|
+
pageStart: import_zod31.z.number().describe("Starting page number"),
|
|
3772
|
+
pageEnd: import_zod31.z.number().optional().describe("Ending page number"),
|
|
3773
|
+
subsections: import_zod31.z.array(SubsectionSchema2).optional().describe("Subsections within this section")
|
|
3478
3774
|
})
|
|
3479
3775
|
).describe("All document sections")
|
|
3480
3776
|
});
|
|
@@ -3493,25 +3789,31 @@ For each section, classify its type:
|
|
|
3493
3789
|
- "other" \u2014 anything that doesn't fit the above categories
|
|
3494
3790
|
|
|
3495
3791
|
Include accurate page numbers for every section. Include subsections only if the section has clearly defined subsections with their own titles.
|
|
3792
|
+
If a page begins or ends in the middle of a section, treat it as a continuation of the existing section instead of creating a new orphan section from the fragment.
|
|
3793
|
+
|
|
3794
|
+
Critical rules:
|
|
3795
|
+
- Ignore table-of-contents entries, page-number references, repeating headers/footers, and other navigational artifacts.
|
|
3796
|
+
- Do not create a new section from a lone continuation fragment such as a single paragraph tail or list item that clearly belongs to the previous page's section.
|
|
3797
|
+
- When a section spans multiple pages, keep it as one section with pageStart/pageEnd covering the full span represented in this extraction.
|
|
3496
3798
|
|
|
3497
3799
|
Return JSON only.`;
|
|
3498
3800
|
}
|
|
3499
3801
|
|
|
3500
3802
|
// src/prompts/extractors/supplementary.ts
|
|
3501
|
-
var
|
|
3502
|
-
var ContactSchema2 =
|
|
3503
|
-
name:
|
|
3504
|
-
phone:
|
|
3505
|
-
email:
|
|
3506
|
-
address:
|
|
3507
|
-
type:
|
|
3803
|
+
var import_zod32 = require("zod");
|
|
3804
|
+
var ContactSchema2 = import_zod32.z.object({
|
|
3805
|
+
name: import_zod32.z.string().optional().describe("Organization or person name"),
|
|
3806
|
+
phone: import_zod32.z.string().optional().describe("Phone number"),
|
|
3807
|
+
email: import_zod32.z.string().optional().describe("Email address"),
|
|
3808
|
+
address: import_zod32.z.string().optional().describe("Mailing address"),
|
|
3809
|
+
type: import_zod32.z.string().optional().describe("Contact type, e.g. 'State Department of Insurance'")
|
|
3508
3810
|
});
|
|
3509
|
-
var SupplementarySchema =
|
|
3510
|
-
regulatoryContacts:
|
|
3511
|
-
claimsContacts:
|
|
3512
|
-
thirdPartyAdministrators:
|
|
3513
|
-
cancellationNoticeDays:
|
|
3514
|
-
nonrenewalNoticeDays:
|
|
3811
|
+
var SupplementarySchema = import_zod32.z.object({
|
|
3812
|
+
regulatoryContacts: import_zod32.z.array(ContactSchema2).optional().describe("Regulatory body contacts (state department of insurance, ombudsman)"),
|
|
3813
|
+
claimsContacts: import_zod32.z.array(ContactSchema2).optional().describe("Claims reporting contacts and instructions"),
|
|
3814
|
+
thirdPartyAdministrators: import_zod32.z.array(ContactSchema2).optional().describe("Third-party administrators for claims handling"),
|
|
3815
|
+
cancellationNoticeDays: import_zod32.z.number().optional().describe("Required notice period for cancellation in days"),
|
|
3816
|
+
nonrenewalNoticeDays: import_zod32.z.number().optional().describe("Required notice period for nonrenewal in days")
|
|
3515
3817
|
});
|
|
3516
3818
|
function buildSupplementaryPrompt() {
|
|
3517
3819
|
return `You are an expert insurance document analyst. Extract supplementary and regulatory information from this document.
|
|
@@ -3548,6 +3850,313 @@ function getExtractor(name) {
|
|
|
3548
3850
|
return EXTRACTORS[name];
|
|
3549
3851
|
}
|
|
3550
3852
|
|
|
3853
|
+
// src/core/quality.ts
|
|
3854
|
+
function evaluateQualityGate(params) {
|
|
3855
|
+
const { issues, hasRoundWarnings = false } = params;
|
|
3856
|
+
const hasBlocking = issues.some((issue) => issue.severity === "blocking");
|
|
3857
|
+
const hasWarnings = issues.some((issue) => issue.severity === "warning") || hasRoundWarnings;
|
|
3858
|
+
return hasBlocking ? "failed" : hasWarnings ? "warning" : "passed";
|
|
3859
|
+
}
|
|
3860
|
+
function shouldFailQualityGate(mode, status) {
|
|
3861
|
+
return mode === "strict" && status === "failed";
|
|
3862
|
+
}
|
|
3863
|
+
|
|
3864
|
+
// src/extraction/quality.ts
|
|
3865
|
+
function normalizeFormNumber(value) {
|
|
3866
|
+
if (typeof value !== "string") return void 0;
|
|
3867
|
+
const trimmed = value.trim();
|
|
3868
|
+
if (!trimmed) return void 0;
|
|
3869
|
+
return trimmed;
|
|
3870
|
+
}
|
|
3871
|
+
function addFormEntry(inventory, formNumber, source, extra) {
|
|
3872
|
+
if (!formNumber) return;
|
|
3873
|
+
const existing = inventory.get(formNumber);
|
|
3874
|
+
if (existing) {
|
|
3875
|
+
if (!existing.title && extra?.title) existing.title = extra.title;
|
|
3876
|
+
if (!existing.pageStart && extra?.pageStart) existing.pageStart = extra.pageStart;
|
|
3877
|
+
if (!existing.pageEnd && extra?.pageEnd) existing.pageEnd = extra.pageEnd;
|
|
3878
|
+
if (!existing.sources.includes(source)) existing.sources.push(source);
|
|
3879
|
+
return;
|
|
3880
|
+
}
|
|
3881
|
+
inventory.set(formNumber, {
|
|
3882
|
+
formNumber,
|
|
3883
|
+
title: extra?.title,
|
|
3884
|
+
pageStart: extra?.pageStart,
|
|
3885
|
+
pageEnd: extra?.pageEnd,
|
|
3886
|
+
sources: [source]
|
|
3887
|
+
});
|
|
3888
|
+
}
|
|
3889
|
+
function looksReferential(value) {
|
|
3890
|
+
if (typeof value !== "string") return false;
|
|
3891
|
+
const normalized = value.toLowerCase();
|
|
3892
|
+
return normalized.includes("shown in the declarations") || normalized.includes("shown in declarations") || normalized.includes("shown in the schedule") || normalized.includes("as stated") || normalized.includes("if applicable");
|
|
3893
|
+
}
|
|
3894
|
+
function looksTocArtifact(value) {
|
|
3895
|
+
if (typeof value !== "string") return false;
|
|
3896
|
+
return /\.{4,}\d{1,3}$/.test(value.trim()) || /^\d+\.\s+[A-Z][\s\S]*\.{3,}\d{1,3}$/.test(value.trim());
|
|
3897
|
+
}
|
|
3898
|
+
function sourcePrecedence(sectionRef) {
|
|
3899
|
+
if (typeof sectionRef !== "string") return 0;
|
|
3900
|
+
const normalized = sectionRef.toLowerCase();
|
|
3901
|
+
if (normalized.includes("declaration") || normalized.includes("scheduled coverages") || normalized.includes("schedule")) return 4;
|
|
3902
|
+
if (normalized.includes("endorsement")) return 3;
|
|
3903
|
+
if (normalized.includes("additional coverages")) return 2;
|
|
3904
|
+
if (normalized.includes("coverage form") || normalized.includes("policy form")) return 1;
|
|
3905
|
+
return 0;
|
|
3906
|
+
}
|
|
3907
|
+
function buildExtractionReviewReport(params) {
|
|
3908
|
+
const { memory, reviewRounds } = params;
|
|
3909
|
+
const deterministicIssues = [];
|
|
3910
|
+
const inventory = /* @__PURE__ */ new Map();
|
|
3911
|
+
const extractedFormInventory = memory.get("form_inventory")?.forms ?? [];
|
|
3912
|
+
const coverages = memory.get("coverage_limits")?.coverages ?? [];
|
|
3913
|
+
const endorsements = memory.get("endorsements")?.endorsements ?? [];
|
|
3914
|
+
const exclusions = memory.get("exclusions")?.exclusions ?? [];
|
|
3915
|
+
const conditions = memory.get("conditions")?.conditions ?? [];
|
|
3916
|
+
const sections = memory.get("sections")?.sections ?? [];
|
|
3917
|
+
for (const form of extractedFormInventory) {
|
|
3918
|
+
addFormEntry(
|
|
3919
|
+
inventory,
|
|
3920
|
+
normalizeFormNumber(form.formNumber),
|
|
3921
|
+
"form_inventory",
|
|
3922
|
+
{
|
|
3923
|
+
title: form.title,
|
|
3924
|
+
pageStart: form.pageStart,
|
|
3925
|
+
pageEnd: form.pageEnd
|
|
3926
|
+
}
|
|
3927
|
+
);
|
|
3928
|
+
}
|
|
3929
|
+
for (const endorsement of endorsements) {
|
|
3930
|
+
addFormEntry(
|
|
3931
|
+
inventory,
|
|
3932
|
+
normalizeFormNumber(endorsement.formNumber),
|
|
3933
|
+
"endorsements",
|
|
3934
|
+
{
|
|
3935
|
+
title: typeof endorsement.title === "string" ? endorsement.title : void 0,
|
|
3936
|
+
pageStart: typeof endorsement.pageStart === "number" ? endorsement.pageStart : void 0,
|
|
3937
|
+
pageEnd: typeof endorsement.pageEnd === "number" ? endorsement.pageEnd : void 0
|
|
3938
|
+
}
|
|
3939
|
+
);
|
|
3940
|
+
if (typeof endorsement.formNumber !== "string" || !endorsement.formNumber.trim()) {
|
|
3941
|
+
deterministicIssues.push({
|
|
3942
|
+
code: "endorsement_missing_form_number",
|
|
3943
|
+
severity: "blocking",
|
|
3944
|
+
message: "Endorsement is missing formNumber.",
|
|
3945
|
+
extractorName: "endorsements",
|
|
3946
|
+
pageNumber: typeof endorsement.pageStart === "number" ? endorsement.pageStart : void 0,
|
|
3947
|
+
itemName: typeof endorsement.title === "string" ? endorsement.title : void 0
|
|
3948
|
+
});
|
|
3949
|
+
}
|
|
3950
|
+
const endorsementFormNumber = normalizeFormNumber(endorsement.formNumber);
|
|
3951
|
+
if (endorsementFormNumber && !inventory.has(endorsementFormNumber)) {
|
|
3952
|
+
deterministicIssues.push({
|
|
3953
|
+
code: "endorsement_form_missing_from_inventory",
|
|
3954
|
+
severity: "warning",
|
|
3955
|
+
message: `Endorsement "${String(endorsement.title ?? endorsementFormNumber)}" is not present in form inventory.`,
|
|
3956
|
+
extractorName: "endorsements",
|
|
3957
|
+
formNumber: endorsementFormNumber,
|
|
3958
|
+
pageNumber: typeof endorsement.pageStart === "number" ? endorsement.pageStart : void 0,
|
|
3959
|
+
itemName: typeof endorsement.title === "string" ? endorsement.title : void 0
|
|
3960
|
+
});
|
|
3961
|
+
}
|
|
3962
|
+
}
|
|
3963
|
+
for (const coverage of coverages) {
|
|
3964
|
+
const formNumber = normalizeFormNumber(coverage.formNumber);
|
|
3965
|
+
addFormEntry(inventory, formNumber, "coverage_limits", {
|
|
3966
|
+
title: typeof coverage.name === "string" ? coverage.name : void 0,
|
|
3967
|
+
pageStart: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
|
|
3968
|
+
pageEnd: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0
|
|
3969
|
+
});
|
|
3970
|
+
if (typeof coverage.name === "string" && /coverage form$/i.test(coverage.name.trim())) {
|
|
3971
|
+
deterministicIssues.push({
|
|
3972
|
+
code: "generic_form_row_as_coverage",
|
|
3973
|
+
severity: "blocking",
|
|
3974
|
+
message: `Coverage "${coverage.name}" looks like a form header rather than a real coverage row.`,
|
|
3975
|
+
extractorName: "coverage_limits",
|
|
3976
|
+
formNumber,
|
|
3977
|
+
pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
|
|
3978
|
+
itemName: coverage.name
|
|
3979
|
+
});
|
|
3980
|
+
}
|
|
3981
|
+
if (typeof coverage.pageNumber !== "number") {
|
|
3982
|
+
deterministicIssues.push({
|
|
3983
|
+
code: "coverage_missing_page_number",
|
|
3984
|
+
severity: "warning",
|
|
3985
|
+
message: `Coverage "${String(coverage.name ?? "unknown")}" is missing pageNumber provenance.`,
|
|
3986
|
+
extractorName: "coverage_limits",
|
|
3987
|
+
formNumber,
|
|
3988
|
+
itemName: typeof coverage.name === "string" ? coverage.name : void 0
|
|
3989
|
+
});
|
|
3990
|
+
}
|
|
3991
|
+
if (typeof coverage.sectionRef !== "string" || !coverage.sectionRef.trim()) {
|
|
3992
|
+
deterministicIssues.push({
|
|
3993
|
+
code: "coverage_missing_section_ref",
|
|
3994
|
+
severity: "warning",
|
|
3995
|
+
message: `Coverage "${String(coverage.name ?? "unknown")}" is missing sectionRef provenance.`,
|
|
3996
|
+
extractorName: "coverage_limits",
|
|
3997
|
+
formNumber,
|
|
3998
|
+
pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
|
|
3999
|
+
itemName: typeof coverage.name === "string" ? coverage.name : void 0
|
|
4000
|
+
});
|
|
4001
|
+
}
|
|
4002
|
+
if (typeof coverage.originalContent !== "string" || !coverage.originalContent.trim()) {
|
|
4003
|
+
deterministicIssues.push({
|
|
4004
|
+
code: "coverage_missing_original_content",
|
|
4005
|
+
severity: "warning",
|
|
4006
|
+
message: `Coverage "${String(coverage.name ?? "unknown")}" is missing originalContent source text.`,
|
|
4007
|
+
extractorName: "coverage_limits",
|
|
4008
|
+
formNumber,
|
|
4009
|
+
pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
|
|
4010
|
+
itemName: typeof coverage.name === "string" ? coverage.name : void 0
|
|
4011
|
+
});
|
|
4012
|
+
}
|
|
4013
|
+
if (looksReferential(coverage.limit) || looksReferential(coverage.deductible)) {
|
|
4014
|
+
deterministicIssues.push({
|
|
4015
|
+
code: "coverage_referential_value",
|
|
4016
|
+
severity: "warning",
|
|
4017
|
+
message: `Coverage "${String(coverage.name ?? "unknown")}" contains referential language instead of a concrete scheduled term.`,
|
|
4018
|
+
extractorName: "coverage_limits",
|
|
4019
|
+
formNumber,
|
|
4020
|
+
pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
|
|
4021
|
+
itemName: typeof coverage.name === "string" ? coverage.name : void 0
|
|
4022
|
+
});
|
|
4023
|
+
}
|
|
4024
|
+
if (formNumber && !inventory.has(formNumber)) {
|
|
4025
|
+
deterministicIssues.push({
|
|
4026
|
+
code: "coverage_form_missing_from_inventory",
|
|
4027
|
+
severity: "warning",
|
|
4028
|
+
message: `Coverage "${String(coverage.name ?? "unknown")}" references form "${formNumber}" that is missing from form inventory.`,
|
|
4029
|
+
extractorName: "coverage_limits",
|
|
4030
|
+
formNumber,
|
|
4031
|
+
pageNumber: typeof coverage.pageNumber === "number" ? coverage.pageNumber : void 0,
|
|
4032
|
+
itemName: typeof coverage.name === "string" ? coverage.name : void 0
|
|
4033
|
+
});
|
|
4034
|
+
}
|
|
4035
|
+
}
|
|
4036
|
+
const coverageGroups = /* @__PURE__ */ new Map();
|
|
4037
|
+
for (const coverage of coverages) {
|
|
4038
|
+
const key = [
|
|
4039
|
+
String(coverage.name ?? "").toLowerCase(),
|
|
4040
|
+
String(coverage.formNumber ?? "").toLowerCase()
|
|
4041
|
+
].join("|");
|
|
4042
|
+
coverageGroups.set(key, [...coverageGroups.get(key) ?? [], coverage]);
|
|
4043
|
+
}
|
|
4044
|
+
for (const [key, groupedCoverages] of coverageGroups.entries()) {
|
|
4045
|
+
if (groupedCoverages.length < 2) continue;
|
|
4046
|
+
const sorted = [...groupedCoverages].sort((a, b) => sourcePrecedence(b.sectionRef) - sourcePrecedence(a.sectionRef));
|
|
4047
|
+
const highest = sorted[0];
|
|
4048
|
+
for (const lower of sorted.slice(1)) {
|
|
4049
|
+
const highestLimit = String(highest.limit ?? "").trim();
|
|
4050
|
+
const lowerLimit = String(lower.limit ?? "").trim();
|
|
4051
|
+
const highestDeductible = String(highest.deductible ?? "").trim();
|
|
4052
|
+
const lowerDeductible = String(lower.deductible ?? "").trim();
|
|
4053
|
+
if (highestLimit && lowerLimit && highestLimit !== lowerLimit || highestDeductible && lowerDeductible && highestDeductible !== lowerDeductible) {
|
|
4054
|
+
deterministicIssues.push({
|
|
4055
|
+
code: "coverage_precedence_conflict",
|
|
4056
|
+
severity: "warning",
|
|
4057
|
+
message: `Coverage "${String(highest.name ?? key)}" has conflicting extracted terms across sources with different precedence.`,
|
|
4058
|
+
extractorName: "coverage_limits",
|
|
4059
|
+
formNumber: normalizeFormNumber(highest.formNumber) ?? normalizeFormNumber(lower.formNumber),
|
|
4060
|
+
pageNumber: typeof lower.pageNumber === "number" ? lower.pageNumber : void 0,
|
|
4061
|
+
itemName: typeof highest.name === "string" ? highest.name : void 0
|
|
4062
|
+
});
|
|
4063
|
+
}
|
|
4064
|
+
}
|
|
4065
|
+
}
|
|
4066
|
+
for (const exclusion of exclusions) {
|
|
4067
|
+
addFormEntry(inventory, normalizeFormNumber(exclusion.formNumber), "exclusions", {
|
|
4068
|
+
title: typeof exclusion.name === "string" ? exclusion.name : void 0,
|
|
4069
|
+
pageStart: typeof exclusion.pageNumber === "number" ? exclusion.pageNumber : void 0,
|
|
4070
|
+
pageEnd: typeof exclusion.pageNumber === "number" ? exclusion.pageNumber : void 0
|
|
4071
|
+
});
|
|
4072
|
+
if (typeof exclusion.pageNumber !== "number") {
|
|
4073
|
+
deterministicIssues.push({
|
|
4074
|
+
code: "exclusion_missing_page_number",
|
|
4075
|
+
severity: "warning",
|
|
4076
|
+
message: `Exclusion "${String(exclusion.name ?? "unknown")}" is missing pageNumber provenance.`,
|
|
4077
|
+
extractorName: "exclusions",
|
|
4078
|
+
formNumber: normalizeFormNumber(exclusion.formNumber),
|
|
4079
|
+
itemName: typeof exclusion.name === "string" ? exclusion.name : void 0
|
|
4080
|
+
});
|
|
4081
|
+
}
|
|
4082
|
+
if (looksTocArtifact(exclusion.content)) {
|
|
4083
|
+
deterministicIssues.push({
|
|
4084
|
+
code: "exclusion_toc_artifact",
|
|
4085
|
+
severity: "blocking",
|
|
4086
|
+
message: `Exclusion "${String(exclusion.name ?? "unknown")}" appears to be a table-of-contents artifact.`,
|
|
4087
|
+
extractorName: "exclusions",
|
|
4088
|
+
pageNumber: typeof exclusion.pageNumber === "number" ? exclusion.pageNumber : void 0,
|
|
4089
|
+
itemName: typeof exclusion.name === "string" ? exclusion.name : void 0
|
|
4090
|
+
});
|
|
4091
|
+
}
|
|
4092
|
+
}
|
|
4093
|
+
for (const condition of conditions) {
|
|
4094
|
+
if (typeof condition.pageNumber !== "number") {
|
|
4095
|
+
deterministicIssues.push({
|
|
4096
|
+
code: "condition_missing_page_number",
|
|
4097
|
+
severity: "warning",
|
|
4098
|
+
message: `Condition "${String(condition.name ?? "unknown")}" is missing pageNumber provenance.`,
|
|
4099
|
+
extractorName: "conditions",
|
|
4100
|
+
itemName: typeof condition.name === "string" ? condition.name : void 0
|
|
4101
|
+
});
|
|
4102
|
+
}
|
|
4103
|
+
if (looksTocArtifact(condition.content)) {
|
|
4104
|
+
deterministicIssues.push({
|
|
4105
|
+
code: "condition_toc_artifact",
|
|
4106
|
+
severity: "blocking",
|
|
4107
|
+
message: `Condition "${String(condition.name ?? "unknown")}" appears to be a table-of-contents artifact.`,
|
|
4108
|
+
extractorName: "conditions",
|
|
4109
|
+
pageNumber: typeof condition.pageNumber === "number" ? condition.pageNumber : void 0,
|
|
4110
|
+
itemName: typeof condition.name === "string" ? condition.name : void 0
|
|
4111
|
+
});
|
|
4112
|
+
}
|
|
4113
|
+
}
|
|
4114
|
+
for (const section of sections) {
|
|
4115
|
+
if (typeof section.content === "string" && section.content.trim().length < 120 && typeof section.pageStart === "number" && (!("pageEnd" in section) || section.pageEnd === section.pageStart || section.pageEnd === void 0)) {
|
|
4116
|
+
deterministicIssues.push({
|
|
4117
|
+
code: "section_short_fragment",
|
|
4118
|
+
severity: "warning",
|
|
4119
|
+
message: `Section "${String(section.title ?? "unknown")}" may be an orphan continuation fragment.`,
|
|
4120
|
+
extractorName: "sections",
|
|
4121
|
+
pageNumber: typeof section.pageStart === "number" ? section.pageStart : void 0,
|
|
4122
|
+
itemName: typeof section.title === "string" ? section.title : void 0
|
|
4123
|
+
});
|
|
4124
|
+
}
|
|
4125
|
+
}
|
|
4126
|
+
const formInventory = [...inventory.values()].sort((a, b) => a.formNumber.localeCompare(b.formNumber));
|
|
4127
|
+
const rounds = reviewRounds.map((round) => ({
|
|
4128
|
+
round: round.round,
|
|
4129
|
+
kind: "llm_review",
|
|
4130
|
+
status: round.complete && round.qualityIssues.length === 0 ? "passed" : "warning",
|
|
4131
|
+
summary: round.qualityIssues[0] ?? (round.complete ? "Review passed." : "Review requested follow-up extraction.")
|
|
4132
|
+
}));
|
|
4133
|
+
const artifacts = [
|
|
4134
|
+
{ kind: "form_inventory", label: "Form Inventory", itemCount: formInventory.length },
|
|
4135
|
+
{ kind: "page_map", label: "Page Map", itemCount: params.pageAssignments.length }
|
|
4136
|
+
];
|
|
4137
|
+
const qualityGateStatus = evaluateQualityGate({
|
|
4138
|
+
issues: deterministicIssues,
|
|
4139
|
+
hasRoundWarnings: reviewRounds.some((round) => round.qualityIssues.length > 0 || !round.complete)
|
|
4140
|
+
});
|
|
4141
|
+
return {
|
|
4142
|
+
issues: deterministicIssues,
|
|
4143
|
+
rounds,
|
|
4144
|
+
artifacts,
|
|
4145
|
+
reviewRoundRecords: reviewRounds,
|
|
4146
|
+
formInventory,
|
|
4147
|
+
qualityGateStatus
|
|
4148
|
+
};
|
|
4149
|
+
}
|
|
4150
|
+
function toReviewRoundRecord(round, review) {
|
|
4151
|
+
return {
|
|
4152
|
+
round,
|
|
4153
|
+
complete: review.complete,
|
|
4154
|
+
missingFields: review.missingFields,
|
|
4155
|
+
qualityIssues: review.qualityIssues ?? [],
|
|
4156
|
+
additionalTasks: review.additionalTasks
|
|
4157
|
+
};
|
|
4158
|
+
}
|
|
4159
|
+
|
|
3551
4160
|
// src/extraction/coordinator.ts
|
|
3552
4161
|
function createExtractor(config) {
|
|
3553
4162
|
const {
|
|
@@ -3560,21 +4169,174 @@ function createExtractor(config) {
|
|
|
3560
4169
|
onProgress,
|
|
3561
4170
|
log,
|
|
3562
4171
|
providerOptions,
|
|
4172
|
+
qualityGate = "warn",
|
|
3563
4173
|
onCheckpointSave
|
|
3564
4174
|
} = config;
|
|
3565
4175
|
const limit = pLimit(concurrency);
|
|
3566
4176
|
let totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
4177
|
+
let modelCalls = 0;
|
|
4178
|
+
let callsWithUsage = 0;
|
|
4179
|
+
let callsMissingUsage = 0;
|
|
3567
4180
|
function trackUsage(usage) {
|
|
4181
|
+
modelCalls += 1;
|
|
3568
4182
|
if (usage) {
|
|
4183
|
+
callsWithUsage += 1;
|
|
3569
4184
|
totalUsage.inputTokens += usage.inputTokens;
|
|
3570
4185
|
totalUsage.outputTokens += usage.outputTokens;
|
|
3571
4186
|
onTokenUsage?.(usage);
|
|
4187
|
+
} else {
|
|
4188
|
+
callsMissingUsage += 1;
|
|
4189
|
+
}
|
|
4190
|
+
}
|
|
4191
|
+
function mergeMemoryResult(name, data, memory) {
|
|
4192
|
+
const existing = memory.get(name);
|
|
4193
|
+
memory.set(name, mergeExtractorResult(name, existing, data));
|
|
4194
|
+
}
|
|
4195
|
+
function summarizeExtraction(memory) {
|
|
4196
|
+
const coverageResult = memory.get("coverage_limits");
|
|
4197
|
+
const declarationResult = memory.get("declarations");
|
|
4198
|
+
const endorsementResult = memory.get("endorsements");
|
|
4199
|
+
const exclusionResult = memory.get("exclusions");
|
|
4200
|
+
const conditionResult = memory.get("conditions");
|
|
4201
|
+
const sectionResult = memory.get("sections");
|
|
4202
|
+
const coverageSummary = Array.isArray(coverageResult?.coverages) ? coverageResult.coverages.slice(0, 12).map((coverage) => ({
|
|
4203
|
+
name: coverage.name,
|
|
4204
|
+
limit: coverage.limit,
|
|
4205
|
+
deductible: coverage.deductible,
|
|
4206
|
+
formNumber: coverage.formNumber
|
|
4207
|
+
})) : [];
|
|
4208
|
+
return JSON.stringify({
|
|
4209
|
+
extractedKeys: [...memory.keys()].filter((key) => key !== "classify"),
|
|
4210
|
+
declarationFieldCount: Array.isArray(declarationResult?.fields) ? declarationResult.fields.length : 0,
|
|
4211
|
+
coverageCount: Array.isArray(coverageResult?.coverages) ? coverageResult.coverages.length : 0,
|
|
4212
|
+
coverageSamples: coverageSummary,
|
|
4213
|
+
endorsementCount: Array.isArray(endorsementResult?.endorsements) ? endorsementResult.endorsements.length : 0,
|
|
4214
|
+
exclusionCount: Array.isArray(exclusionResult?.exclusions) ? exclusionResult.exclusions.length : 0,
|
|
4215
|
+
conditionCount: Array.isArray(conditionResult?.conditions) ? conditionResult.conditions.length : 0,
|
|
4216
|
+
sectionCount: Array.isArray(sectionResult?.sections) ? sectionResult.sections.length : 0
|
|
4217
|
+
}, null, 2);
|
|
4218
|
+
}
|
|
4219
|
+
function formatPageMapSummary(pageAssignments) {
|
|
4220
|
+
const extractorPages = /* @__PURE__ */ new Map();
|
|
4221
|
+
for (const assignment of pageAssignments) {
|
|
4222
|
+
for (const extractorName of assignment.extractorNames) {
|
|
4223
|
+
extractorPages.set(extractorName, [...extractorPages.get(extractorName) ?? [], assignment.localPageNumber]);
|
|
4224
|
+
}
|
|
3572
4225
|
}
|
|
4226
|
+
if (extractorPages.size === 0) return "No page assignments available.";
|
|
4227
|
+
return [...extractorPages.entries()].map(([extractorName, pages]) => `${extractorName}: pages ${pages.join(", ")}`).join("\n");
|
|
4228
|
+
}
|
|
4229
|
+
function normalizePageAssignments(pageAssignments, formInventory) {
|
|
4230
|
+
const pageFormTypes = /* @__PURE__ */ new Map();
|
|
4231
|
+
if (formInventory) {
|
|
4232
|
+
for (const form of formInventory.forms) {
|
|
4233
|
+
if (form.pageStart != null) {
|
|
4234
|
+
const end = form.pageEnd ?? form.pageStart;
|
|
4235
|
+
for (let p = form.pageStart; p <= end; p++) {
|
|
4236
|
+
const types = pageFormTypes.get(p) ?? /* @__PURE__ */ new Set();
|
|
4237
|
+
types.add(form.formType);
|
|
4238
|
+
pageFormTypes.set(p, types);
|
|
4239
|
+
}
|
|
4240
|
+
}
|
|
4241
|
+
}
|
|
4242
|
+
}
|
|
4243
|
+
return pageAssignments.map((assignment) => {
|
|
4244
|
+
let extractorNames = [...new Set(
|
|
4245
|
+
(assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"]).filter(Boolean)
|
|
4246
|
+
)];
|
|
4247
|
+
const hasDeclarations = extractorNames.includes("declarations");
|
|
4248
|
+
const hasConditions = extractorNames.includes("conditions");
|
|
4249
|
+
const hasExclusions = extractorNames.includes("exclusions");
|
|
4250
|
+
const hasEndorsements = extractorNames.includes("endorsements");
|
|
4251
|
+
const looksLikeScheduleValues = assignment.hasScheduleValues === true;
|
|
4252
|
+
const roleBlocksCoverageLimits = assignment.pageRole === "policy_form" || assignment.pageRole === "condition_exclusion_form" || assignment.pageRole === "endorsement_form";
|
|
4253
|
+
const inventoryTypes = pageFormTypes.get(assignment.localPageNumber);
|
|
4254
|
+
const inventoryBlocksCoverageLimits = inventoryTypes != null && !looksLikeScheduleValues && !hasDeclarations && (inventoryTypes.has("endorsement") || inventoryTypes.has("notice") || inventoryTypes.has("application"));
|
|
4255
|
+
if (extractorNames.includes("coverage_limits")) {
|
|
4256
|
+
const shouldDropCoverageLimits = inventoryBlocksCoverageLimits || !looksLikeScheduleValues && roleBlocksCoverageLimits || !hasDeclarations && !looksLikeScheduleValues && (hasConditions || hasExclusions) || !hasDeclarations && !looksLikeScheduleValues && hasEndorsements;
|
|
4257
|
+
if (shouldDropCoverageLimits) {
|
|
4258
|
+
extractorNames = extractorNames.filter((name) => name !== "coverage_limits");
|
|
4259
|
+
}
|
|
4260
|
+
}
|
|
4261
|
+
if (inventoryTypes?.has("endorsement") && !extractorNames.includes("endorsements")) {
|
|
4262
|
+
extractorNames = [...extractorNames, "endorsements"];
|
|
4263
|
+
}
|
|
4264
|
+
if (extractorNames.length === 0) {
|
|
4265
|
+
extractorNames = ["sections"];
|
|
4266
|
+
}
|
|
4267
|
+
return {
|
|
4268
|
+
...assignment,
|
|
4269
|
+
extractorNames
|
|
4270
|
+
};
|
|
4271
|
+
});
|
|
4272
|
+
}
|
|
4273
|
+
function buildTemplateHints(primaryType, documentType, pageCount, template) {
|
|
4274
|
+
return [
|
|
4275
|
+
`Document type: ${primaryType} ${documentType}`,
|
|
4276
|
+
`Expected sections: ${template.expectedSections.join(", ")}`,
|
|
4277
|
+
`Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
|
|
4278
|
+
`Total pages: ${pageCount}`
|
|
4279
|
+
].join("\n");
|
|
4280
|
+
}
|
|
4281
|
+
function groupContiguousPages(pages) {
|
|
4282
|
+
if (pages.length === 0) return [];
|
|
4283
|
+
const sorted = [...new Set(pages)].sort((a, b) => a - b);
|
|
4284
|
+
const ranges = [];
|
|
4285
|
+
let start = sorted[0];
|
|
4286
|
+
let previous = sorted[0];
|
|
4287
|
+
for (let i = 1; i < sorted.length; i += 1) {
|
|
4288
|
+
const current = sorted[i];
|
|
4289
|
+
if (current === previous + 1) {
|
|
4290
|
+
previous = current;
|
|
4291
|
+
continue;
|
|
4292
|
+
}
|
|
4293
|
+
ranges.push({ startPage: start, endPage: previous });
|
|
4294
|
+
start = current;
|
|
4295
|
+
previous = current;
|
|
4296
|
+
}
|
|
4297
|
+
ranges.push({ startPage: start, endPage: previous });
|
|
4298
|
+
return ranges;
|
|
4299
|
+
}
|
|
4300
|
+
function buildPlanFromPageAssignments(pageAssignments, pageCount) {
|
|
4301
|
+
const extractorPages = /* @__PURE__ */ new Map();
|
|
4302
|
+
for (const assignment of pageAssignments) {
|
|
4303
|
+
const extractors = assignment.extractorNames.length > 0 ? assignment.extractorNames : ["sections"];
|
|
4304
|
+
for (const extractorName of extractors) {
|
|
4305
|
+
extractorPages.set(extractorName, [...extractorPages.get(extractorName) ?? [], assignment.localPageNumber]);
|
|
4306
|
+
}
|
|
4307
|
+
}
|
|
4308
|
+
const coveredPages = /* @__PURE__ */ new Set();
|
|
4309
|
+
for (const pages of extractorPages.values()) {
|
|
4310
|
+
for (const page of pages) coveredPages.add(page);
|
|
4311
|
+
}
|
|
4312
|
+
for (let page = 1; page <= pageCount; page += 1) {
|
|
4313
|
+
if (!coveredPages.has(page)) {
|
|
4314
|
+
extractorPages.set("sections", [...extractorPages.get("sections") ?? [], page]);
|
|
4315
|
+
}
|
|
4316
|
+
}
|
|
4317
|
+
const tasks = [...extractorPages.entries()].flatMap(
|
|
4318
|
+
([extractorName, pages]) => groupContiguousPages(pages).map(({ startPage, endPage }) => ({
|
|
4319
|
+
extractorName,
|
|
4320
|
+
startPage,
|
|
4321
|
+
endPage,
|
|
4322
|
+
description: `Page-mapped ${extractorName} extraction for pages ${startPage}-${endPage}`
|
|
4323
|
+
}))
|
|
4324
|
+
).sort((a, b) => a.startPage - b.startPage || a.extractorName.localeCompare(b.extractorName));
|
|
4325
|
+
return {
|
|
4326
|
+
tasks,
|
|
4327
|
+
pageMap: [...extractorPages.entries()].map(([section, pages]) => ({
|
|
4328
|
+
section,
|
|
4329
|
+
pages: `pages ${[...new Set(pages)].sort((a, b) => a - b).join(", ")}`
|
|
4330
|
+
}))
|
|
4331
|
+
};
|
|
3573
4332
|
}
|
|
3574
4333
|
async function extract(pdfBase64, documentId, options) {
|
|
3575
4334
|
const id = documentId ?? `doc-${Date.now()}`;
|
|
3576
4335
|
const memory = /* @__PURE__ */ new Map();
|
|
3577
4336
|
totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
4337
|
+
modelCalls = 0;
|
|
4338
|
+
callsWithUsage = 0;
|
|
4339
|
+
callsMissingUsage = 0;
|
|
3578
4340
|
const pipelineCtx = createPipelineContext({
|
|
3579
4341
|
id,
|
|
3580
4342
|
onSave: onCheckpointSave,
|
|
@@ -3625,40 +4387,109 @@ function createExtractor(config) {
|
|
|
3625
4387
|
const primaryType = policyTypes[0] ?? "other";
|
|
3626
4388
|
const template = getTemplate(primaryType);
|
|
3627
4389
|
const pageCount = resumed?.pageCount ?? await getPdfPageCount(pdfBase64);
|
|
3628
|
-
|
|
3629
|
-
|
|
3630
|
-
|
|
3631
|
-
|
|
4390
|
+
const templateHints = buildTemplateHints(primaryType, documentType, pageCount, template);
|
|
4391
|
+
let formInventory;
|
|
4392
|
+
if (resumed?.formInventory && pipelineCtx.isPhaseComplete("form_inventory")) {
|
|
4393
|
+
formInventory = resumed.formInventory;
|
|
4394
|
+
memory.set("form_inventory", formInventory);
|
|
4395
|
+
onProgress?.("Resuming from checkpoint (form inventory complete)...");
|
|
3632
4396
|
} else {
|
|
3633
|
-
onProgress?.(`
|
|
3634
|
-
const
|
|
3635
|
-
`Document type: ${primaryType} ${documentType}`,
|
|
3636
|
-
`Expected sections: ${template.expectedSections.join(", ")}`,
|
|
3637
|
-
`Page hints: ${Object.entries(template.pageHints).map(([k, v]) => `${k}: ${v}`).join("; ")}`,
|
|
3638
|
-
`Total pages: ${pageCount}`
|
|
3639
|
-
].join("\n");
|
|
3640
|
-
const planResponse = await safeGenerateObject(
|
|
4397
|
+
onProgress?.(`Building form inventory for ${primaryType} ${documentType}...`);
|
|
4398
|
+
const formInventoryResponse = await safeGenerateObject(
|
|
3641
4399
|
generateObject,
|
|
3642
4400
|
{
|
|
3643
|
-
prompt:
|
|
3644
|
-
schema:
|
|
4401
|
+
prompt: buildFormInventoryPrompt(templateHints),
|
|
4402
|
+
schema: FormInventorySchema,
|
|
3645
4403
|
maxTokens: 2048,
|
|
3646
4404
|
providerOptions: { ...providerOptions, pdfBase64 }
|
|
3647
4405
|
},
|
|
3648
4406
|
{
|
|
3649
|
-
fallback: {
|
|
3650
|
-
tasks: [{ extractorName: "sections", startPage: 1, endPage: pageCount, description: "Full document fallback extraction" }]
|
|
3651
|
-
},
|
|
4407
|
+
fallback: { forms: [] },
|
|
3652
4408
|
log,
|
|
3653
|
-
onError: (err, attempt) => log?.(`
|
|
4409
|
+
onError: (err, attempt) => log?.(`Form inventory attempt ${attempt + 1} failed: ${err instanceof Error ? err.message : String(err)}`)
|
|
3654
4410
|
}
|
|
3655
4411
|
);
|
|
3656
|
-
trackUsage(
|
|
3657
|
-
|
|
4412
|
+
trackUsage(formInventoryResponse.usage);
|
|
4413
|
+
formInventory = formInventoryResponse.object;
|
|
4414
|
+
memory.set("form_inventory", formInventory);
|
|
4415
|
+
await pipelineCtx.save("form_inventory", {
|
|
4416
|
+
id,
|
|
4417
|
+
pageCount,
|
|
4418
|
+
classifyResult,
|
|
4419
|
+
formInventory,
|
|
4420
|
+
memory: Object.fromEntries(memory)
|
|
4421
|
+
});
|
|
4422
|
+
}
|
|
4423
|
+
let pageAssignments;
|
|
4424
|
+
if (resumed?.pageAssignments && pipelineCtx.isPhaseComplete("page_map")) {
|
|
4425
|
+
pageAssignments = resumed.pageAssignments;
|
|
4426
|
+
onProgress?.("Resuming from checkpoint (page map complete)...");
|
|
4427
|
+
} else {
|
|
4428
|
+
onProgress?.(`Mapping document pages for ${primaryType} ${documentType}...`);
|
|
4429
|
+
const chunkSize = 8;
|
|
4430
|
+
const collectedAssignments = [];
|
|
4431
|
+
const formInventoryHint = formInventory?.forms.length ? formatFormInventoryForPageMap(formInventory.forms) : void 0;
|
|
4432
|
+
for (let startPage = 1; startPage <= pageCount; startPage += chunkSize) {
|
|
4433
|
+
const endPage = Math.min(pageCount, startPage + chunkSize - 1);
|
|
4434
|
+
const pagesPdf = await extractPageRange(pdfBase64, startPage, endPage);
|
|
4435
|
+
const mapResponse = await safeGenerateObject(
|
|
4436
|
+
generateObject,
|
|
4437
|
+
{
|
|
4438
|
+
prompt: buildPageMapPrompt(templateHints, startPage, endPage, formInventoryHint),
|
|
4439
|
+
schema: PageMapChunkSchema,
|
|
4440
|
+
maxTokens: 2048,
|
|
4441
|
+
providerOptions: { ...providerOptions, pdfBase64: pagesPdf }
|
|
4442
|
+
},
|
|
4443
|
+
{
|
|
4444
|
+
fallback: {
|
|
4445
|
+
pages: Array.from({ length: endPage - startPage + 1 }, (_, index) => ({
|
|
4446
|
+
localPageNumber: index + 1,
|
|
4447
|
+
extractorNames: index === 0 && startPage === 1 ? ["carrier_info", "named_insured", "declarations", "coverage_limits"] : ["sections"],
|
|
4448
|
+
confidence: 0,
|
|
4449
|
+
notes: "Fallback page assignment"
|
|
4450
|
+
}))
|
|
4451
|
+
},
|
|
4452
|
+
log,
|
|
4453
|
+
onError: (err, attempt) => log?.(`Page map attempt ${attempt + 1} failed for pages ${startPage}-${endPage}: ${err}`)
|
|
4454
|
+
}
|
|
4455
|
+
);
|
|
4456
|
+
trackUsage(mapResponse.usage);
|
|
4457
|
+
for (const assignment of mapResponse.object.pages) {
|
|
4458
|
+
collectedAssignments.push({
|
|
4459
|
+
...assignment,
|
|
4460
|
+
localPageNumber: startPage + assignment.localPageNumber - 1
|
|
4461
|
+
});
|
|
4462
|
+
}
|
|
4463
|
+
}
|
|
4464
|
+
pageAssignments = collectedAssignments.length > 0 ? collectedAssignments : Array.from({ length: pageCount }, (_, index) => ({
|
|
4465
|
+
localPageNumber: index + 1,
|
|
4466
|
+
extractorNames: index === 0 ? ["carrier_info", "named_insured", "declarations", "coverage_limits"] : ["sections"],
|
|
4467
|
+
confidence: 0,
|
|
4468
|
+
notes: "Full-document fallback page assignment"
|
|
4469
|
+
}));
|
|
4470
|
+
pageAssignments = normalizePageAssignments(pageAssignments, formInventory);
|
|
4471
|
+
await pipelineCtx.save("page_map", {
|
|
4472
|
+
id,
|
|
4473
|
+
pageCount,
|
|
4474
|
+
classifyResult,
|
|
4475
|
+
formInventory,
|
|
4476
|
+
pageAssignments,
|
|
4477
|
+
memory: Object.fromEntries(memory)
|
|
4478
|
+
});
|
|
4479
|
+
}
|
|
4480
|
+
let plan;
|
|
4481
|
+
if (resumed?.plan && pipelineCtx.isPhaseComplete("plan")) {
|
|
4482
|
+
plan = resumed.plan;
|
|
4483
|
+
onProgress?.("Resuming from checkpoint (plan complete)...");
|
|
4484
|
+
} else {
|
|
4485
|
+
onProgress?.(`Building extraction plan from page map for ${primaryType} ${documentType}...`);
|
|
4486
|
+
plan = buildPlanFromPageAssignments(pageAssignments, pageCount);
|
|
3658
4487
|
await pipelineCtx.save("plan", {
|
|
3659
4488
|
id,
|
|
3660
4489
|
pageCount,
|
|
3661
4490
|
classifyResult,
|
|
4491
|
+
formInventory,
|
|
4492
|
+
pageAssignments,
|
|
3662
4493
|
plan,
|
|
3663
4494
|
memory: Object.fromEntries(memory)
|
|
3664
4495
|
});
|
|
@@ -3699,35 +4530,46 @@ function createExtractor(config) {
|
|
|
3699
4530
|
);
|
|
3700
4531
|
for (const result of extractorResults) {
|
|
3701
4532
|
if (result) {
|
|
3702
|
-
|
|
4533
|
+
mergeMemoryResult(result.name, result.data, memory);
|
|
3703
4534
|
}
|
|
3704
4535
|
}
|
|
3705
4536
|
await pipelineCtx.save("extract", {
|
|
3706
4537
|
id,
|
|
3707
4538
|
pageCount,
|
|
3708
4539
|
classifyResult,
|
|
4540
|
+
formInventory,
|
|
4541
|
+
pageAssignments,
|
|
3709
4542
|
plan,
|
|
3710
4543
|
memory: Object.fromEntries(memory)
|
|
3711
4544
|
});
|
|
3712
4545
|
}
|
|
4546
|
+
let reviewRounds = resumed?.reviewReport?.reviewRoundRecords ?? [];
|
|
4547
|
+
let reviewReport = resumed?.reviewReport;
|
|
3713
4548
|
if (!pipelineCtx.isPhaseComplete("review")) {
|
|
4549
|
+
reviewRounds = [];
|
|
3714
4550
|
for (let round = 0; round < maxReviewRounds; round++) {
|
|
3715
4551
|
const extractedKeys = [...memory.keys()].filter((k) => k !== "classify");
|
|
4552
|
+
const extractionSummary = summarizeExtraction(memory);
|
|
4553
|
+
const pageMapSummary = formatPageMapSummary(pageAssignments);
|
|
3716
4554
|
const reviewResponse = await safeGenerateObject(
|
|
3717
4555
|
generateObject,
|
|
3718
4556
|
{
|
|
3719
|
-
prompt: buildReviewPrompt(template.required, extractedKeys),
|
|
4557
|
+
prompt: buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary),
|
|
3720
4558
|
schema: ReviewResultSchema,
|
|
3721
|
-
maxTokens:
|
|
3722
|
-
providerOptions
|
|
4559
|
+
maxTokens: 1536,
|
|
4560
|
+
providerOptions: { ...providerOptions, pdfBase64 }
|
|
3723
4561
|
},
|
|
3724
4562
|
{
|
|
3725
|
-
fallback: { complete: true, missingFields: [], additionalTasks: [] },
|
|
4563
|
+
fallback: { complete: true, missingFields: [], qualityIssues: [], additionalTasks: [] },
|
|
3726
4564
|
log,
|
|
3727
4565
|
onError: (err, attempt) => log?.(`Review round ${round + 1} attempt ${attempt + 1} failed: ${err}`)
|
|
3728
4566
|
}
|
|
3729
4567
|
);
|
|
3730
4568
|
trackUsage(reviewResponse.usage);
|
|
4569
|
+
reviewRounds.push(toReviewRoundRecord(round + 1, reviewResponse.object));
|
|
4570
|
+
if (reviewResponse.object.qualityIssues?.length) {
|
|
4571
|
+
await log?.(`Review round ${round + 1} quality issues: ${reviewResponse.object.qualityIssues.join("; ")}`);
|
|
4572
|
+
}
|
|
3731
4573
|
if (reviewResponse.object.complete || reviewResponse.object.additionalTasks.length === 0) {
|
|
3732
4574
|
onProgress?.("Extraction complete.");
|
|
3733
4575
|
break;
|
|
@@ -3762,25 +4604,49 @@ function createExtractor(config) {
|
|
|
3762
4604
|
);
|
|
3763
4605
|
for (const result of followUpResults) {
|
|
3764
4606
|
if (result) {
|
|
3765
|
-
|
|
4607
|
+
mergeMemoryResult(result.name, result.data, memory);
|
|
3766
4608
|
}
|
|
3767
4609
|
}
|
|
3768
4610
|
}
|
|
4611
|
+
reviewReport = buildExtractionReviewReport({
|
|
4612
|
+
memory,
|
|
4613
|
+
pageAssignments,
|
|
4614
|
+
reviewRounds
|
|
4615
|
+
});
|
|
4616
|
+
if (reviewReport.issues.length > 0) {
|
|
4617
|
+
await log?.(
|
|
4618
|
+
`Deterministic review issues: ${reviewReport.issues.map((issue) => issue.message).join("; ")}`
|
|
4619
|
+
);
|
|
4620
|
+
}
|
|
4621
|
+
if (shouldFailQualityGate(qualityGate, reviewReport.qualityGateStatus)) {
|
|
4622
|
+
throw new Error("Extraction quality gate failed. See reviewReport for blocking issues.");
|
|
4623
|
+
}
|
|
3769
4624
|
await pipelineCtx.save("review", {
|
|
3770
4625
|
id,
|
|
3771
4626
|
pageCount,
|
|
3772
4627
|
classifyResult,
|
|
4628
|
+
formInventory,
|
|
4629
|
+
pageAssignments,
|
|
3773
4630
|
plan,
|
|
4631
|
+
reviewReport,
|
|
3774
4632
|
memory: Object.fromEntries(memory)
|
|
3775
4633
|
});
|
|
3776
4634
|
}
|
|
4635
|
+
reviewReport ?? (reviewReport = buildExtractionReviewReport({
|
|
4636
|
+
memory,
|
|
4637
|
+
pageAssignments,
|
|
4638
|
+
reviewRounds
|
|
4639
|
+
}));
|
|
3777
4640
|
onProgress?.("Assembling document...");
|
|
3778
4641
|
const document = assembleDocument(id, documentType, memory);
|
|
3779
4642
|
await pipelineCtx.save("assemble", {
|
|
3780
4643
|
id,
|
|
3781
4644
|
pageCount,
|
|
3782
4645
|
classifyResult,
|
|
4646
|
+
formInventory,
|
|
4647
|
+
pageAssignments,
|
|
3783
4648
|
plan,
|
|
4649
|
+
reviewReport,
|
|
3784
4650
|
memory: Object.fromEntries(memory),
|
|
3785
4651
|
document
|
|
3786
4652
|
});
|
|
@@ -3793,11 +4659,21 @@ function createExtractor(config) {
|
|
|
3793
4659
|
trackUsage(formatResult.usage);
|
|
3794
4660
|
const chunks = chunkDocument(formatResult.document);
|
|
3795
4661
|
const finalCheckpoint = pipelineCtx.getCheckpoint();
|
|
4662
|
+
if (callsMissingUsage > 0) {
|
|
4663
|
+
await log?.(`Token usage was unavailable for ${callsMissingUsage}/${modelCalls} model calls. Check that your provider callbacks return usage.`);
|
|
4664
|
+
onProgress?.(`Token usage unavailable for ${callsMissingUsage}/${modelCalls} model calls.`);
|
|
4665
|
+
}
|
|
3796
4666
|
return {
|
|
3797
4667
|
document: formatResult.document,
|
|
3798
4668
|
chunks,
|
|
3799
4669
|
tokenUsage: totalUsage,
|
|
3800
|
-
|
|
4670
|
+
usageReporting: {
|
|
4671
|
+
modelCalls,
|
|
4672
|
+
callsWithUsage,
|
|
4673
|
+
callsMissingUsage
|
|
4674
|
+
},
|
|
4675
|
+
checkpoint: finalCheckpoint,
|
|
4676
|
+
reviewReport
|
|
3801
4677
|
};
|
|
3802
4678
|
}
|
|
3803
4679
|
return { extract };
|
|
@@ -4017,8 +4893,8 @@ Respond with JSON only:
|
|
|
4017
4893
|
}`;
|
|
4018
4894
|
|
|
4019
4895
|
// src/schemas/application.ts
|
|
4020
|
-
var
|
|
4021
|
-
var FieldTypeSchema =
|
|
4896
|
+
var import_zod33 = require("zod");
|
|
4897
|
+
var FieldTypeSchema = import_zod33.z.enum([
|
|
4022
4898
|
"text",
|
|
4023
4899
|
"numeric",
|
|
4024
4900
|
"currency",
|
|
@@ -4027,100 +4903,131 @@ var FieldTypeSchema = import_zod32.z.enum([
|
|
|
4027
4903
|
"table",
|
|
4028
4904
|
"declaration"
|
|
4029
4905
|
]);
|
|
4030
|
-
var ApplicationFieldSchema =
|
|
4031
|
-
id:
|
|
4032
|
-
label:
|
|
4033
|
-
section:
|
|
4906
|
+
var ApplicationFieldSchema = import_zod33.z.object({
|
|
4907
|
+
id: import_zod33.z.string(),
|
|
4908
|
+
label: import_zod33.z.string(),
|
|
4909
|
+
section: import_zod33.z.string(),
|
|
4034
4910
|
fieldType: FieldTypeSchema,
|
|
4035
|
-
required:
|
|
4036
|
-
options:
|
|
4037
|
-
columns:
|
|
4038
|
-
requiresExplanationIfYes:
|
|
4039
|
-
condition:
|
|
4040
|
-
dependsOn:
|
|
4041
|
-
whenValue:
|
|
4911
|
+
required: import_zod33.z.boolean(),
|
|
4912
|
+
options: import_zod33.z.array(import_zod33.z.string()).optional(),
|
|
4913
|
+
columns: import_zod33.z.array(import_zod33.z.string()).optional(),
|
|
4914
|
+
requiresExplanationIfYes: import_zod33.z.boolean().optional(),
|
|
4915
|
+
condition: import_zod33.z.object({
|
|
4916
|
+
dependsOn: import_zod33.z.string(),
|
|
4917
|
+
whenValue: import_zod33.z.string()
|
|
4042
4918
|
}).optional(),
|
|
4043
|
-
value:
|
|
4044
|
-
source:
|
|
4045
|
-
confidence:
|
|
4919
|
+
value: import_zod33.z.string().optional(),
|
|
4920
|
+
source: import_zod33.z.string().optional().describe("Where the value came from: auto-fill, user, lookup"),
|
|
4921
|
+
confidence: import_zod33.z.enum(["confirmed", "high", "medium", "low"]).optional()
|
|
4922
|
+
});
|
|
4923
|
+
var ApplicationClassifyResultSchema = import_zod33.z.object({
|
|
4924
|
+
isApplication: import_zod33.z.boolean(),
|
|
4925
|
+
confidence: import_zod33.z.number().min(0).max(1),
|
|
4926
|
+
applicationType: import_zod33.z.string().nullable()
|
|
4927
|
+
});
|
|
4928
|
+
var FieldExtractionResultSchema = import_zod33.z.object({
|
|
4929
|
+
fields: import_zod33.z.array(ApplicationFieldSchema)
|
|
4930
|
+
});
|
|
4931
|
+
var AutoFillMatchSchema = import_zod33.z.object({
|
|
4932
|
+
fieldId: import_zod33.z.string(),
|
|
4933
|
+
value: import_zod33.z.string(),
|
|
4934
|
+
confidence: import_zod33.z.enum(["confirmed"]),
|
|
4935
|
+
contextKey: import_zod33.z.string()
|
|
4046
4936
|
});
|
|
4047
|
-
var
|
|
4048
|
-
|
|
4049
|
-
confidence: import_zod32.z.number().min(0).max(1),
|
|
4050
|
-
applicationType: import_zod32.z.string().nullable()
|
|
4937
|
+
var AutoFillResultSchema = import_zod33.z.object({
|
|
4938
|
+
matches: import_zod33.z.array(AutoFillMatchSchema)
|
|
4051
4939
|
});
|
|
4052
|
-
var
|
|
4053
|
-
|
|
4940
|
+
var QuestionBatchResultSchema = import_zod33.z.object({
|
|
4941
|
+
batches: import_zod33.z.array(import_zod33.z.array(import_zod33.z.string()).describe("Array of field IDs in this batch"))
|
|
4054
4942
|
});
|
|
4055
|
-
var
|
|
4056
|
-
|
|
4057
|
-
|
|
4058
|
-
|
|
4059
|
-
|
|
4943
|
+
var LookupRequestSchema = import_zod33.z.object({
|
|
4944
|
+
type: import_zod33.z.string().describe("Type of lookup: 'records', 'website', 'policy'"),
|
|
4945
|
+
description: import_zod33.z.string(),
|
|
4946
|
+
url: import_zod33.z.string().optional(),
|
|
4947
|
+
targetFieldIds: import_zod33.z.array(import_zod33.z.string())
|
|
4060
4948
|
});
|
|
4061
|
-
var
|
|
4062
|
-
|
|
4949
|
+
var ReplyIntentSchema = import_zod33.z.object({
|
|
4950
|
+
primaryIntent: import_zod33.z.enum(["answers_only", "question", "lookup_request", "mixed"]),
|
|
4951
|
+
hasAnswers: import_zod33.z.boolean(),
|
|
4952
|
+
questionText: import_zod33.z.string().optional(),
|
|
4953
|
+
questionFieldIds: import_zod33.z.array(import_zod33.z.string()).optional(),
|
|
4954
|
+
lookupRequests: import_zod33.z.array(LookupRequestSchema).optional()
|
|
4063
4955
|
});
|
|
4064
|
-
var
|
|
4065
|
-
|
|
4956
|
+
var ParsedAnswerSchema = import_zod33.z.object({
|
|
4957
|
+
fieldId: import_zod33.z.string(),
|
|
4958
|
+
value: import_zod33.z.string(),
|
|
4959
|
+
explanation: import_zod33.z.string().optional()
|
|
4066
4960
|
});
|
|
4067
|
-
var
|
|
4068
|
-
|
|
4069
|
-
|
|
4070
|
-
url: import_zod32.z.string().optional(),
|
|
4071
|
-
targetFieldIds: import_zod32.z.array(import_zod32.z.string())
|
|
4961
|
+
var AnswerParsingResultSchema = import_zod33.z.object({
|
|
4962
|
+
answers: import_zod33.z.array(ParsedAnswerSchema),
|
|
4963
|
+
unanswered: import_zod33.z.array(import_zod33.z.string()).describe("Field IDs that were not answered")
|
|
4072
4964
|
});
|
|
4073
|
-
var
|
|
4074
|
-
|
|
4075
|
-
|
|
4076
|
-
|
|
4077
|
-
questionFieldIds: import_zod32.z.array(import_zod32.z.string()).optional(),
|
|
4078
|
-
lookupRequests: import_zod32.z.array(LookupRequestSchema).optional()
|
|
4965
|
+
var LookupFillSchema = import_zod33.z.object({
|
|
4966
|
+
fieldId: import_zod33.z.string(),
|
|
4967
|
+
value: import_zod33.z.string(),
|
|
4968
|
+
source: import_zod33.z.string().describe("Specific citable reference, e.g. 'GL Policy #POL-12345 (Hartford)'")
|
|
4079
4969
|
});
|
|
4080
|
-
var
|
|
4081
|
-
|
|
4082
|
-
|
|
4083
|
-
explanation:
|
|
4970
|
+
var LookupFillResultSchema = import_zod33.z.object({
|
|
4971
|
+
fills: import_zod33.z.array(LookupFillSchema),
|
|
4972
|
+
unfillable: import_zod33.z.array(import_zod33.z.string()),
|
|
4973
|
+
explanation: import_zod33.z.string().optional()
|
|
4084
4974
|
});
|
|
4085
|
-
var
|
|
4086
|
-
|
|
4087
|
-
|
|
4975
|
+
var FlatPdfPlacementSchema = import_zod33.z.object({
|
|
4976
|
+
fieldId: import_zod33.z.string(),
|
|
4977
|
+
page: import_zod33.z.number(),
|
|
4978
|
+
x: import_zod33.z.number().describe("Percentage from left edge (0-100)"),
|
|
4979
|
+
y: import_zod33.z.number().describe("Percentage from top edge (0-100)"),
|
|
4980
|
+
text: import_zod33.z.string(),
|
|
4981
|
+
fontSize: import_zod33.z.number().optional(),
|
|
4982
|
+
isCheckmark: import_zod33.z.boolean().optional()
|
|
4088
4983
|
});
|
|
4089
|
-
var
|
|
4090
|
-
fieldId:
|
|
4091
|
-
|
|
4092
|
-
|
|
4984
|
+
var AcroFormMappingSchema = import_zod33.z.object({
|
|
4985
|
+
fieldId: import_zod33.z.string(),
|
|
4986
|
+
acroFormName: import_zod33.z.string(),
|
|
4987
|
+
value: import_zod33.z.string()
|
|
4093
4988
|
});
|
|
4094
|
-
var
|
|
4095
|
-
|
|
4096
|
-
|
|
4097
|
-
|
|
4989
|
+
var QualityGateStatusSchema = import_zod33.z.enum(["passed", "warning", "failed"]);
|
|
4990
|
+
var QualitySeveritySchema = import_zod33.z.enum(["info", "warning", "blocking"]);
|
|
4991
|
+
var ApplicationQualityIssueSchema = import_zod33.z.object({
|
|
4992
|
+
code: import_zod33.z.string(),
|
|
4993
|
+
severity: QualitySeveritySchema,
|
|
4994
|
+
message: import_zod33.z.string(),
|
|
4995
|
+
fieldId: import_zod33.z.string().optional()
|
|
4098
4996
|
});
|
|
4099
|
-
var
|
|
4100
|
-
|
|
4101
|
-
|
|
4102
|
-
|
|
4103
|
-
|
|
4104
|
-
text: import_zod32.z.string(),
|
|
4105
|
-
fontSize: import_zod32.z.number().optional(),
|
|
4106
|
-
isCheckmark: import_zod32.z.boolean().optional()
|
|
4997
|
+
var ApplicationQualityRoundSchema = import_zod33.z.object({
|
|
4998
|
+
round: import_zod33.z.number(),
|
|
4999
|
+
kind: import_zod33.z.string(),
|
|
5000
|
+
status: QualityGateStatusSchema,
|
|
5001
|
+
summary: import_zod33.z.string().optional()
|
|
4107
5002
|
});
|
|
4108
|
-
var
|
|
4109
|
-
|
|
4110
|
-
|
|
4111
|
-
|
|
5003
|
+
var ApplicationQualityArtifactSchema = import_zod33.z.object({
|
|
5004
|
+
kind: import_zod33.z.string(),
|
|
5005
|
+
label: import_zod33.z.string().optional(),
|
|
5006
|
+
itemCount: import_zod33.z.number().optional()
|
|
4112
5007
|
});
|
|
4113
|
-
var
|
|
4114
|
-
|
|
4115
|
-
|
|
4116
|
-
|
|
4117
|
-
|
|
4118
|
-
|
|
4119
|
-
|
|
4120
|
-
|
|
4121
|
-
|
|
4122
|
-
|
|
4123
|
-
|
|
5008
|
+
var ApplicationEmailReviewSchema = import_zod33.z.object({
|
|
5009
|
+
issues: import_zod33.z.array(ApplicationQualityIssueSchema),
|
|
5010
|
+
qualityGateStatus: QualityGateStatusSchema
|
|
5011
|
+
});
|
|
5012
|
+
var ApplicationQualityReportSchema = import_zod33.z.object({
|
|
5013
|
+
issues: import_zod33.z.array(ApplicationQualityIssueSchema),
|
|
5014
|
+
rounds: import_zod33.z.array(ApplicationQualityRoundSchema).optional(),
|
|
5015
|
+
artifacts: import_zod33.z.array(ApplicationQualityArtifactSchema).optional(),
|
|
5016
|
+
emailReview: ApplicationEmailReviewSchema.optional(),
|
|
5017
|
+
qualityGateStatus: QualityGateStatusSchema
|
|
5018
|
+
});
|
|
5019
|
+
var ApplicationStateSchema = import_zod33.z.object({
|
|
5020
|
+
id: import_zod33.z.string(),
|
|
5021
|
+
pdfBase64: import_zod33.z.string().optional().describe("Original PDF, omitted after extraction"),
|
|
5022
|
+
title: import_zod33.z.string().optional(),
|
|
5023
|
+
applicationType: import_zod33.z.string().nullable().optional(),
|
|
5024
|
+
fields: import_zod33.z.array(ApplicationFieldSchema),
|
|
5025
|
+
batches: import_zod33.z.array(import_zod33.z.array(import_zod33.z.string())).optional(),
|
|
5026
|
+
currentBatchIndex: import_zod33.z.number().default(0),
|
|
5027
|
+
qualityReport: ApplicationQualityReportSchema.optional(),
|
|
5028
|
+
status: import_zod33.z.enum(["classifying", "extracting", "auto_filling", "batching", "collecting", "confirming", "mapping", "complete"]),
|
|
5029
|
+
createdAt: import_zod33.z.number(),
|
|
5030
|
+
updatedAt: import_zod33.z.number()
|
|
4124
5031
|
});
|
|
4125
5032
|
|
|
4126
5033
|
// src/application/agents/classifier.ts
|
|
@@ -4628,6 +5535,87 @@ async function generateBatchEmail(batchFields, batchIndex, totalBatches, opts, g
|
|
|
4628
5535
|
return { text, usage };
|
|
4629
5536
|
}
|
|
4630
5537
|
|
|
5538
|
+
// src/application/quality.ts
|
|
5539
|
+
function isVagueSource(source) {
|
|
5540
|
+
if (!source) return true;
|
|
5541
|
+
const normalized = source.trim().toLowerCase();
|
|
5542
|
+
return normalized === "unknown" || normalized.includes("existing records") || normalized.includes("available data") || normalized === "context" || normalized === "user provided";
|
|
5543
|
+
}
|
|
5544
|
+
function buildApplicationQualityReport(state) {
|
|
5545
|
+
const issues = [];
|
|
5546
|
+
const seenIds = /* @__PURE__ */ new Set();
|
|
5547
|
+
for (const field of state.fields) {
|
|
5548
|
+
if (seenIds.has(field.id)) {
|
|
5549
|
+
issues.push({
|
|
5550
|
+
code: "duplicate_field_id",
|
|
5551
|
+
severity: "blocking",
|
|
5552
|
+
message: `Field "${field.label}" has a duplicate id "${field.id}".`,
|
|
5553
|
+
fieldId: field.id
|
|
5554
|
+
});
|
|
5555
|
+
}
|
|
5556
|
+
seenIds.add(field.id);
|
|
5557
|
+
if (field.required && !field.value) {
|
|
5558
|
+
issues.push({
|
|
5559
|
+
code: "required_field_unfilled",
|
|
5560
|
+
severity: "warning",
|
|
5561
|
+
message: `Required field "${field.label}" is still unfilled.`,
|
|
5562
|
+
fieldId: field.id
|
|
5563
|
+
});
|
|
5564
|
+
}
|
|
5565
|
+
if (field.value && !field.source) {
|
|
5566
|
+
issues.push({
|
|
5567
|
+
code: "filled_field_missing_source",
|
|
5568
|
+
severity: "blocking",
|
|
5569
|
+
message: `Filled field "${field.label}" is missing source provenance.`,
|
|
5570
|
+
fieldId: field.id
|
|
5571
|
+
});
|
|
5572
|
+
}
|
|
5573
|
+
if (field.value && isVagueSource(field.source)) {
|
|
5574
|
+
issues.push({
|
|
5575
|
+
code: "filled_field_vague_source",
|
|
5576
|
+
severity: "warning",
|
|
5577
|
+
message: `Filled field "${field.label}" has a vague or non-citable source.`,
|
|
5578
|
+
fieldId: field.id
|
|
5579
|
+
});
|
|
5580
|
+
}
|
|
5581
|
+
if (field.value && (!field.confidence || field.confidence === "low")) {
|
|
5582
|
+
issues.push({
|
|
5583
|
+
code: "filled_field_low_confidence",
|
|
5584
|
+
severity: "warning",
|
|
5585
|
+
message: `Filled field "${field.label}" has low or missing confidence.`,
|
|
5586
|
+
fieldId: field.id
|
|
5587
|
+
});
|
|
5588
|
+
}
|
|
5589
|
+
}
|
|
5590
|
+
return {
|
|
5591
|
+
issues,
|
|
5592
|
+
rounds: [],
|
|
5593
|
+
artifacts: [
|
|
5594
|
+
{ kind: "application_fields", label: "Application Fields", itemCount: state.fields.length }
|
|
5595
|
+
],
|
|
5596
|
+
qualityGateStatus: evaluateQualityGate({ issues })
|
|
5597
|
+
};
|
|
5598
|
+
}
|
|
5599
|
+
function reviewBatchEmail(text, batchFields) {
|
|
5600
|
+
const issues = [];
|
|
5601
|
+
const normalized = text.toLowerCase();
|
|
5602
|
+
for (const field of batchFields) {
|
|
5603
|
+
const label = field.label.trim().toLowerCase();
|
|
5604
|
+
if (label.length >= 6 && !normalized.includes(label)) {
|
|
5605
|
+
issues.push({
|
|
5606
|
+
code: "email_missing_field_prompt",
|
|
5607
|
+
severity: "warning",
|
|
5608
|
+
message: `Generated email does not clearly mention field "${field.label}".`,
|
|
5609
|
+
fieldId: field.id
|
|
5610
|
+
});
|
|
5611
|
+
}
|
|
5612
|
+
}
|
|
5613
|
+
return {
|
|
5614
|
+
issues,
|
|
5615
|
+
qualityGateStatus: evaluateQualityGate({ issues })
|
|
5616
|
+
};
|
|
5617
|
+
}
|
|
5618
|
+
|
|
4631
5619
|
// src/application/coordinator.ts
|
|
4632
5620
|
function createApplicationPipeline(config) {
|
|
4633
5621
|
const {
|
|
@@ -4642,7 +5630,8 @@ function createApplicationPipeline(config) {
|
|
|
4642
5630
|
onTokenUsage,
|
|
4643
5631
|
onProgress,
|
|
4644
5632
|
log,
|
|
4645
|
-
providerOptions
|
|
5633
|
+
providerOptions,
|
|
5634
|
+
qualityGate = "warn"
|
|
4646
5635
|
} = config;
|
|
4647
5636
|
const limit = pLimit(concurrency);
|
|
4648
5637
|
let totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
@@ -4664,6 +5653,7 @@ function createApplicationPipeline(config) {
|
|
|
4664
5653
|
title: void 0,
|
|
4665
5654
|
applicationType: null,
|
|
4666
5655
|
fields: [],
|
|
5656
|
+
qualityReport: void 0,
|
|
4667
5657
|
batches: void 0,
|
|
4668
5658
|
currentBatchIndex: 0,
|
|
4669
5659
|
status: "classifying",
|
|
@@ -4688,8 +5678,9 @@ function createApplicationPipeline(config) {
|
|
|
4688
5678
|
if (!classifyResult.isApplication) {
|
|
4689
5679
|
state.status = "complete";
|
|
4690
5680
|
state.updatedAt = Date.now();
|
|
5681
|
+
state.qualityReport = buildApplicationQualityReport(state);
|
|
4691
5682
|
await applicationStore?.save(state);
|
|
4692
|
-
return { state, tokenUsage: totalUsage };
|
|
5683
|
+
return { state, tokenUsage: totalUsage, reviewReport: state.qualityReport };
|
|
4693
5684
|
}
|
|
4694
5685
|
state.applicationType = classifyResult.applicationType;
|
|
4695
5686
|
state.status = "extracting";
|
|
@@ -4713,8 +5704,9 @@ function createApplicationPipeline(config) {
|
|
|
4713
5704
|
await log?.("No fields extracted, completing pipeline with empty result");
|
|
4714
5705
|
state.status = "complete";
|
|
4715
5706
|
state.updatedAt = Date.now();
|
|
5707
|
+
state.qualityReport = buildApplicationQualityReport(state);
|
|
4716
5708
|
await applicationStore?.save(state);
|
|
4717
|
-
return { state, tokenUsage: totalUsage };
|
|
5709
|
+
return { state, tokenUsage: totalUsage, reviewReport: state.qualityReport };
|
|
4718
5710
|
}
|
|
4719
5711
|
state.fields = fields;
|
|
4720
5712
|
state.title = classifyResult.applicationType ?? void 0;
|
|
@@ -4814,11 +5806,15 @@ function createApplicationPipeline(config) {
|
|
|
4814
5806
|
} else {
|
|
4815
5807
|
state.status = "confirming";
|
|
4816
5808
|
}
|
|
5809
|
+
state.qualityReport = buildApplicationQualityReport(state);
|
|
4817
5810
|
state.updatedAt = Date.now();
|
|
4818
5811
|
await applicationStore?.save(state);
|
|
5812
|
+
if (shouldFailQualityGate(qualityGate, state.qualityReport.qualityGateStatus)) {
|
|
5813
|
+
throw new Error("Application quality gate failed. See state.qualityReport for blocking issues.");
|
|
5814
|
+
}
|
|
4819
5815
|
const filledCount = state.fields.filter((f) => f.value).length;
|
|
4820
5816
|
onProgress?.(`Application processed: ${filledCount}/${state.fields.length} fields filled, ${state.batches?.length ?? 0} batches to collect.`);
|
|
4821
|
-
return { state, tokenUsage: totalUsage };
|
|
5817
|
+
return { state, tokenUsage: totalUsage, reviewReport: state.qualityReport };
|
|
4822
5818
|
}
|
|
4823
5819
|
async function processReply(input) {
|
|
4824
5820
|
totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
@@ -4965,6 +5961,11 @@ Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with
|
|
|
4965
5961
|
providerOptions
|
|
4966
5962
|
);
|
|
4967
5963
|
trackUsage(emailUsage);
|
|
5964
|
+
const emailReview = reviewBatchEmail(emailText, nextBatchFields);
|
|
5965
|
+
state.qualityReport = {
|
|
5966
|
+
...buildApplicationQualityReport(state),
|
|
5967
|
+
emailReview
|
|
5968
|
+
};
|
|
4968
5969
|
if (!responseText) {
|
|
4969
5970
|
responseText = emailText;
|
|
4970
5971
|
} else {
|
|
@@ -4980,13 +5981,18 @@ ${emailText}`;
|
|
|
4980
5981
|
}
|
|
4981
5982
|
}
|
|
4982
5983
|
state.updatedAt = Date.now();
|
|
5984
|
+
state.qualityReport = state.qualityReport ?? buildApplicationQualityReport(state);
|
|
4983
5985
|
await applicationStore?.save(state);
|
|
5986
|
+
if (shouldFailQualityGate(qualityGate, state.qualityReport.qualityGateStatus)) {
|
|
5987
|
+
throw new Error("Application quality gate failed. See state.qualityReport for blocking issues.");
|
|
5988
|
+
}
|
|
4984
5989
|
return {
|
|
4985
5990
|
state,
|
|
4986
5991
|
intent: intent.primaryIntent,
|
|
4987
5992
|
fieldsFilled,
|
|
4988
5993
|
responseText,
|
|
4989
|
-
tokenUsage: totalUsage
|
|
5994
|
+
tokenUsage: totalUsage,
|
|
5995
|
+
reviewReport: state.qualityReport
|
|
4990
5996
|
};
|
|
4991
5997
|
}
|
|
4992
5998
|
async function generateCurrentBatchEmail(applicationId, opts) {
|
|
@@ -5012,6 +6018,12 @@ ${emailText}`;
|
|
|
5012
6018
|
providerOptions
|
|
5013
6019
|
);
|
|
5014
6020
|
trackUsage(usage);
|
|
6021
|
+
const emailReview = reviewBatchEmail(text, batchFields);
|
|
6022
|
+
state.qualityReport = {
|
|
6023
|
+
...buildApplicationQualityReport(state),
|
|
6024
|
+
emailReview
|
|
6025
|
+
};
|
|
6026
|
+
await applicationStore?.save(state);
|
|
5015
6027
|
return { text, tokenUsage: totalUsage };
|
|
5016
6028
|
}
|
|
5017
6029
|
async function getConfirmationSummary(applicationId) {
|
|
@@ -5148,73 +6160,73 @@ Respond with the final answer, deduplicated citations array, overall confidence
|
|
|
5148
6160
|
}
|
|
5149
6161
|
|
|
5150
6162
|
// src/schemas/query.ts
|
|
5151
|
-
var
|
|
5152
|
-
var QueryIntentSchema =
|
|
6163
|
+
var import_zod34 = require("zod");
|
|
6164
|
+
var QueryIntentSchema = import_zod34.z.enum([
|
|
5153
6165
|
"policy_question",
|
|
5154
6166
|
"coverage_comparison",
|
|
5155
6167
|
"document_search",
|
|
5156
6168
|
"claims_inquiry",
|
|
5157
6169
|
"general_knowledge"
|
|
5158
6170
|
]);
|
|
5159
|
-
var SubQuestionSchema =
|
|
5160
|
-
question:
|
|
6171
|
+
var SubQuestionSchema = import_zod34.z.object({
|
|
6172
|
+
question: import_zod34.z.string().describe("Atomic sub-question to retrieve and answer independently"),
|
|
5161
6173
|
intent: QueryIntentSchema,
|
|
5162
|
-
chunkTypes:
|
|
5163
|
-
documentFilters:
|
|
5164
|
-
type:
|
|
5165
|
-
carrier:
|
|
5166
|
-
insuredName:
|
|
5167
|
-
policyNumber:
|
|
5168
|
-
quoteNumber:
|
|
6174
|
+
chunkTypes: import_zod34.z.array(import_zod34.z.string()).optional().describe("Chunk types to filter retrieval (e.g. coverage, endorsement, declaration)"),
|
|
6175
|
+
documentFilters: import_zod34.z.object({
|
|
6176
|
+
type: import_zod34.z.enum(["policy", "quote"]).optional(),
|
|
6177
|
+
carrier: import_zod34.z.string().optional(),
|
|
6178
|
+
insuredName: import_zod34.z.string().optional(),
|
|
6179
|
+
policyNumber: import_zod34.z.string().optional(),
|
|
6180
|
+
quoteNumber: import_zod34.z.string().optional()
|
|
5169
6181
|
}).optional().describe("Structured filters to narrow document lookup")
|
|
5170
6182
|
});
|
|
5171
|
-
var QueryClassifyResultSchema =
|
|
6183
|
+
var QueryClassifyResultSchema = import_zod34.z.object({
|
|
5172
6184
|
intent: QueryIntentSchema,
|
|
5173
|
-
subQuestions:
|
|
5174
|
-
requiresDocumentLookup:
|
|
5175
|
-
requiresChunkSearch:
|
|
5176
|
-
requiresConversationHistory:
|
|
6185
|
+
subQuestions: import_zod34.z.array(SubQuestionSchema).min(1).describe("Decomposed atomic sub-questions"),
|
|
6186
|
+
requiresDocumentLookup: import_zod34.z.boolean().describe("Whether structured document lookup is needed"),
|
|
6187
|
+
requiresChunkSearch: import_zod34.z.boolean().describe("Whether semantic chunk search is needed"),
|
|
6188
|
+
requiresConversationHistory: import_zod34.z.boolean().describe("Whether conversation history is relevant")
|
|
5177
6189
|
});
|
|
5178
|
-
var EvidenceItemSchema =
|
|
5179
|
-
source:
|
|
5180
|
-
chunkId:
|
|
5181
|
-
documentId:
|
|
5182
|
-
turnId:
|
|
5183
|
-
text:
|
|
5184
|
-
relevance:
|
|
5185
|
-
metadata:
|
|
6190
|
+
var EvidenceItemSchema = import_zod34.z.object({
|
|
6191
|
+
source: import_zod34.z.enum(["chunk", "document", "conversation"]),
|
|
6192
|
+
chunkId: import_zod34.z.string().optional(),
|
|
6193
|
+
documentId: import_zod34.z.string().optional(),
|
|
6194
|
+
turnId: import_zod34.z.string().optional(),
|
|
6195
|
+
text: import_zod34.z.string().describe("Text excerpt from the source"),
|
|
6196
|
+
relevance: import_zod34.z.number().min(0).max(1),
|
|
6197
|
+
metadata: import_zod34.z.array(import_zod34.z.object({ key: import_zod34.z.string(), value: import_zod34.z.string() })).optional()
|
|
5186
6198
|
});
|
|
5187
|
-
var RetrievalResultSchema =
|
|
5188
|
-
subQuestion:
|
|
5189
|
-
evidence:
|
|
6199
|
+
var RetrievalResultSchema = import_zod34.z.object({
|
|
6200
|
+
subQuestion: import_zod34.z.string(),
|
|
6201
|
+
evidence: import_zod34.z.array(EvidenceItemSchema)
|
|
5190
6202
|
});
|
|
5191
|
-
var CitationSchema =
|
|
5192
|
-
index:
|
|
5193
|
-
chunkId:
|
|
5194
|
-
documentId:
|
|
5195
|
-
documentType:
|
|
5196
|
-
field:
|
|
5197
|
-
quote:
|
|
5198
|
-
relevance:
|
|
6203
|
+
var CitationSchema = import_zod34.z.object({
|
|
6204
|
+
index: import_zod34.z.number().describe("Citation number [1], [2], etc."),
|
|
6205
|
+
chunkId: import_zod34.z.string().describe("Source chunk ID, e.g. doc-123:coverage:2"),
|
|
6206
|
+
documentId: import_zod34.z.string(),
|
|
6207
|
+
documentType: import_zod34.z.enum(["policy", "quote"]).optional(),
|
|
6208
|
+
field: import_zod34.z.string().optional().describe("Specific field path, e.g. coverages[0].deductible"),
|
|
6209
|
+
quote: import_zod34.z.string().describe("Exact text from source that supports the claim"),
|
|
6210
|
+
relevance: import_zod34.z.number().min(0).max(1)
|
|
5199
6211
|
});
|
|
5200
|
-
var SubAnswerSchema =
|
|
5201
|
-
subQuestion:
|
|
5202
|
-
answer:
|
|
5203
|
-
citations:
|
|
5204
|
-
confidence:
|
|
5205
|
-
needsMoreContext:
|
|
6212
|
+
var SubAnswerSchema = import_zod34.z.object({
|
|
6213
|
+
subQuestion: import_zod34.z.string(),
|
|
6214
|
+
answer: import_zod34.z.string(),
|
|
6215
|
+
citations: import_zod34.z.array(CitationSchema),
|
|
6216
|
+
confidence: import_zod34.z.number().min(0).max(1),
|
|
6217
|
+
needsMoreContext: import_zod34.z.boolean().describe("True if evidence was insufficient to answer fully")
|
|
5206
6218
|
});
|
|
5207
|
-
var VerifyResultSchema =
|
|
5208
|
-
approved:
|
|
5209
|
-
issues:
|
|
5210
|
-
retrySubQuestions:
|
|
6219
|
+
var VerifyResultSchema = import_zod34.z.object({
|
|
6220
|
+
approved: import_zod34.z.boolean().describe("Whether all sub-answers are adequately grounded"),
|
|
6221
|
+
issues: import_zod34.z.array(import_zod34.z.string()).describe("Specific grounding or consistency issues found"),
|
|
6222
|
+
retrySubQuestions: import_zod34.z.array(import_zod34.z.string()).optional().describe("Sub-questions that need additional retrieval or re-reasoning")
|
|
5211
6223
|
});
|
|
5212
|
-
var QueryResultSchema =
|
|
5213
|
-
answer:
|
|
5214
|
-
citations:
|
|
6224
|
+
var QueryResultSchema = import_zod34.z.object({
|
|
6225
|
+
answer: import_zod34.z.string(),
|
|
6226
|
+
citations: import_zod34.z.array(CitationSchema),
|
|
5215
6227
|
intent: QueryIntentSchema,
|
|
5216
|
-
confidence:
|
|
5217
|
-
followUp:
|
|
6228
|
+
confidence: import_zod34.z.number().min(0).max(1),
|
|
6229
|
+
followUp: import_zod34.z.string().optional().describe("Suggested follow-up question if applicable")
|
|
5218
6230
|
});
|
|
5219
6231
|
|
|
5220
6232
|
// src/query/retriever.ts
|
|
@@ -5502,6 +6514,112 @@ async function verify(originalQuestion, subAnswers, allEvidence, config) {
|
|
|
5502
6514
|
return { result: object, usage };
|
|
5503
6515
|
}
|
|
5504
6516
|
|
|
6517
|
+
// src/query/quality.ts
|
|
6518
|
+
function sourceIdForEvidence(evidence) {
|
|
6519
|
+
return evidence.chunkId ?? evidence.documentId ?? evidence.turnId;
|
|
6520
|
+
}
|
|
6521
|
+
function citationSourceId(citation) {
|
|
6522
|
+
return citation.chunkId || citation.documentId;
|
|
6523
|
+
}
|
|
6524
|
+
function buildQueryReviewReport(params) {
|
|
6525
|
+
const { subAnswers, evidence, finalResult, verifyRounds } = params;
|
|
6526
|
+
const issues = [];
|
|
6527
|
+
const evidenceBySource = /* @__PURE__ */ new Map();
|
|
6528
|
+
for (const item of evidence) {
|
|
6529
|
+
const sourceId = sourceIdForEvidence(item);
|
|
6530
|
+
if (!sourceId) continue;
|
|
6531
|
+
evidenceBySource.set(sourceId, [...evidenceBySource.get(sourceId) ?? [], item]);
|
|
6532
|
+
}
|
|
6533
|
+
for (const subAnswer of subAnswers) {
|
|
6534
|
+
if (!subAnswer.needsMoreContext && subAnswer.citations.length === 0) {
|
|
6535
|
+
issues.push({
|
|
6536
|
+
code: "subanswer_missing_citations",
|
|
6537
|
+
severity: "blocking",
|
|
6538
|
+
message: `Sub-answer "${subAnswer.subQuestion}" has no citations despite claiming an answer.`,
|
|
6539
|
+
subQuestion: subAnswer.subQuestion
|
|
6540
|
+
});
|
|
6541
|
+
}
|
|
6542
|
+
if (subAnswer.confidence >= 0.85 && subAnswer.citations.length === 0) {
|
|
6543
|
+
issues.push({
|
|
6544
|
+
code: "subanswer_high_confidence_without_citations",
|
|
6545
|
+
severity: "blocking",
|
|
6546
|
+
message: `Sub-answer "${subAnswer.subQuestion}" has high confidence without citations.`,
|
|
6547
|
+
subQuestion: subAnswer.subQuestion
|
|
6548
|
+
});
|
|
6549
|
+
}
|
|
6550
|
+
for (const citation of subAnswer.citations) {
|
|
6551
|
+
const sourceId = citationSourceId(citation);
|
|
6552
|
+
const supportedEvidence = sourceId ? evidenceBySource.get(sourceId) ?? [] : [];
|
|
6553
|
+
if (!sourceId || supportedEvidence.length === 0) {
|
|
6554
|
+
issues.push({
|
|
6555
|
+
code: "citation_missing_from_evidence",
|
|
6556
|
+
severity: "blocking",
|
|
6557
|
+
message: `Citation [${citation.index}] in "${subAnswer.subQuestion}" does not map to retrieved evidence.`,
|
|
6558
|
+
subQuestion: subAnswer.subQuestion,
|
|
6559
|
+
citationIndex: citation.index,
|
|
6560
|
+
sourceId
|
|
6561
|
+
});
|
|
6562
|
+
continue;
|
|
6563
|
+
}
|
|
6564
|
+
const quoteFound = supportedEvidence.some((item) => item.text.includes(citation.quote));
|
|
6565
|
+
if (!quoteFound) {
|
|
6566
|
+
issues.push({
|
|
6567
|
+
code: "citation_quote_not_in_evidence",
|
|
6568
|
+
severity: "warning",
|
|
6569
|
+
message: `Citation [${citation.index}] quote in "${subAnswer.subQuestion}" was not found verbatim in retrieved evidence.`,
|
|
6570
|
+
subQuestion: subAnswer.subQuestion,
|
|
6571
|
+
citationIndex: citation.index,
|
|
6572
|
+
sourceId
|
|
6573
|
+
});
|
|
6574
|
+
}
|
|
6575
|
+
}
|
|
6576
|
+
}
|
|
6577
|
+
if (finalResult) {
|
|
6578
|
+
if (finalResult.answer.trim().length > 0 && finalResult.citations.length === 0 && finalResult.confidence > 0.4) {
|
|
6579
|
+
issues.push({
|
|
6580
|
+
code: "final_answer_missing_citations",
|
|
6581
|
+
severity: "blocking",
|
|
6582
|
+
message: "Final answer has non-trivial confidence but no citations."
|
|
6583
|
+
});
|
|
6584
|
+
}
|
|
6585
|
+
const knownCitationIds = new Set(
|
|
6586
|
+
subAnswers.flatMap((sa) => sa.citations.map((citation) => `${citation.index}|${citation.chunkId}|${citation.documentId}`))
|
|
6587
|
+
);
|
|
6588
|
+
for (const citation of finalResult.citations) {
|
|
6589
|
+
const key = `${citation.index}|${citation.chunkId}|${citation.documentId}`;
|
|
6590
|
+
if (!knownCitationIds.has(key)) {
|
|
6591
|
+
issues.push({
|
|
6592
|
+
code: "final_answer_unknown_citation",
|
|
6593
|
+
severity: "warning",
|
|
6594
|
+
message: `Final answer citation [${citation.index}] was not present in verified sub-answers.`,
|
|
6595
|
+
citationIndex: citation.index,
|
|
6596
|
+
sourceId: citationSourceId(citation)
|
|
6597
|
+
});
|
|
6598
|
+
}
|
|
6599
|
+
}
|
|
6600
|
+
}
|
|
6601
|
+
const rounds = verifyRounds.map((round) => ({
|
|
6602
|
+
round: round.round,
|
|
6603
|
+
kind: "verification",
|
|
6604
|
+
status: round.approved && round.issues.length === 0 ? "passed" : "warning",
|
|
6605
|
+
summary: round.issues[0] ?? (round.approved ? "Verification passed." : "Verification requested retry.")
|
|
6606
|
+
}));
|
|
6607
|
+
const artifacts = [
|
|
6608
|
+
{ kind: "evidence", label: "Retrieved Evidence", itemCount: evidence.length },
|
|
6609
|
+
{ kind: "sub_answers", label: "Sub Answers", itemCount: subAnswers.length }
|
|
6610
|
+
];
|
|
6611
|
+
return {
|
|
6612
|
+
issues,
|
|
6613
|
+
rounds,
|
|
6614
|
+
artifacts,
|
|
6615
|
+
verifyRounds,
|
|
6616
|
+
qualityGateStatus: evaluateQualityGate({
|
|
6617
|
+
issues,
|
|
6618
|
+
hasRoundWarnings: verifyRounds.some((round) => !round.approved || round.issues.length > 0)
|
|
6619
|
+
})
|
|
6620
|
+
};
|
|
6621
|
+
}
|
|
6622
|
+
|
|
5505
6623
|
// src/query/coordinator.ts
|
|
5506
6624
|
function createQueryAgent(config) {
|
|
5507
6625
|
const {
|
|
@@ -5515,7 +6633,8 @@ function createQueryAgent(config) {
|
|
|
5515
6633
|
onTokenUsage,
|
|
5516
6634
|
onProgress,
|
|
5517
6635
|
log,
|
|
5518
|
-
providerOptions
|
|
6636
|
+
providerOptions,
|
|
6637
|
+
qualityGate = "warn"
|
|
5519
6638
|
} = config;
|
|
5520
6639
|
const limit = pLimit(concurrency);
|
|
5521
6640
|
let totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
@@ -5584,6 +6703,7 @@ function createQueryAgent(config) {
|
|
|
5584
6703
|
await pipelineCtx.save("reason", { classification, evidence: allEvidence, subAnswers });
|
|
5585
6704
|
onProgress?.("Verifying answer grounding...");
|
|
5586
6705
|
const verifierConfig = { generateObject, providerOptions };
|
|
6706
|
+
const verifyRounds = [];
|
|
5587
6707
|
for (let round = 0; round < maxVerifyRounds; round++) {
|
|
5588
6708
|
const { result: verifyResult, usage } = await safeVerify(
|
|
5589
6709
|
question,
|
|
@@ -5592,6 +6712,12 @@ function createQueryAgent(config) {
|
|
|
5592
6712
|
verifierConfig
|
|
5593
6713
|
);
|
|
5594
6714
|
trackUsage(usage);
|
|
6715
|
+
verifyRounds.push({
|
|
6716
|
+
round: round + 1,
|
|
6717
|
+
approved: verifyResult.approved,
|
|
6718
|
+
issues: verifyResult.issues,
|
|
6719
|
+
retrySubQuestions: verifyResult.retrySubQuestions
|
|
6720
|
+
});
|
|
5595
6721
|
if (verifyResult.approved) {
|
|
5596
6722
|
onProgress?.("Verification passed.");
|
|
5597
6723
|
break;
|
|
@@ -5649,6 +6775,24 @@ function createQueryAgent(config) {
|
|
|
5649
6775
|
classification,
|
|
5650
6776
|
context?.platform
|
|
5651
6777
|
);
|
|
6778
|
+
const reviewReport = buildQueryReviewReport({
|
|
6779
|
+
subAnswers,
|
|
6780
|
+
evidence: allEvidence,
|
|
6781
|
+
finalResult: queryResult,
|
|
6782
|
+
verifyRounds
|
|
6783
|
+
});
|
|
6784
|
+
await pipelineCtx.save("review", {
|
|
6785
|
+
classification,
|
|
6786
|
+
evidence: allEvidence,
|
|
6787
|
+
subAnswers,
|
|
6788
|
+
reviewReport
|
|
6789
|
+
});
|
|
6790
|
+
if (reviewReport.issues.length > 0) {
|
|
6791
|
+
await log?.(`Query deterministic review issues: ${reviewReport.issues.map((issue) => issue.message).join("; ")}`);
|
|
6792
|
+
}
|
|
6793
|
+
if (shouldFailQualityGate(qualityGate, reviewReport.qualityGateStatus)) {
|
|
6794
|
+
throw new Error("Query quality gate failed. See reviewReport for blocking issues.");
|
|
6795
|
+
}
|
|
5652
6796
|
if (conversationId) {
|
|
5653
6797
|
try {
|
|
5654
6798
|
await memoryStore.addTurn({
|
|
@@ -5669,7 +6813,7 @@ function createQueryAgent(config) {
|
|
|
5669
6813
|
await log?.(`Failed to store conversation turn: ${e}`);
|
|
5670
6814
|
}
|
|
5671
6815
|
}
|
|
5672
|
-
return { ...queryResult, tokenUsage: totalUsage };
|
|
6816
|
+
return { ...queryResult, tokenUsage: totalUsage, reviewReport };
|
|
5673
6817
|
}
|
|
5674
6818
|
async function classify(question, conversationId) {
|
|
5675
6819
|
let conversationContext;
|
|
@@ -5891,7 +7035,12 @@ var AGENT_TOOLS = [
|
|
|
5891
7035
|
AdmittedStatusSchema,
|
|
5892
7036
|
AnswerParsingResultSchema,
|
|
5893
7037
|
ApplicationClassifyResultSchema,
|
|
7038
|
+
ApplicationEmailReviewSchema,
|
|
5894
7039
|
ApplicationFieldSchema,
|
|
7040
|
+
ApplicationQualityArtifactSchema,
|
|
7041
|
+
ApplicationQualityIssueSchema,
|
|
7042
|
+
ApplicationQualityReportSchema,
|
|
7043
|
+
ApplicationQualityRoundSchema,
|
|
5895
7044
|
ApplicationStateSchema,
|
|
5896
7045
|
AuditTypeSchema,
|
|
5897
7046
|
AutoFillMatchSchema,
|
|
@@ -5923,6 +7072,7 @@ var AGENT_TOOLS = [
|
|
|
5923
7072
|
CoverageFormSchema,
|
|
5924
7073
|
CoverageSchema,
|
|
5925
7074
|
CoverageTriggerSchema,
|
|
7075
|
+
CoverageValueTypeSchema,
|
|
5926
7076
|
CrimeDeclarationsSchema,
|
|
5927
7077
|
CyberDeclarationsSchema,
|
|
5928
7078
|
DEDUCTIBLE_TYPES,
|