@claritylabs/cl-sdk 0.14.2 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +86 -19
- package/dist/index.d.ts +86 -19
- package/dist/index.js +723 -201
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +708 -201
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -1423,34 +1423,134 @@ import {
|
|
|
1423
1423
|
StandardFonts,
|
|
1424
1424
|
rgb
|
|
1425
1425
|
} from "pdf-lib";
|
|
1426
|
-
|
|
1427
|
-
|
|
1426
|
+
function isFileIdRef(input) {
|
|
1427
|
+
return typeof input === "object" && input !== null && "fileId" in input;
|
|
1428
|
+
}
|
|
1429
|
+
function isUrl(input) {
|
|
1430
|
+
return input instanceof URL;
|
|
1431
|
+
}
|
|
1432
|
+
function isBytes(input) {
|
|
1433
|
+
return input instanceof Uint8Array;
|
|
1434
|
+
}
|
|
1435
|
+
async function pdfInputToBytes(input) {
|
|
1436
|
+
if (isFileIdRef(input)) {
|
|
1437
|
+
throw new Error(
|
|
1438
|
+
"Cannot convert fileId reference to bytes. Pass the fileId directly to your provider callback instead."
|
|
1439
|
+
);
|
|
1440
|
+
}
|
|
1441
|
+
if (isUrl(input)) {
|
|
1442
|
+
if (input.protocol === "file:") {
|
|
1443
|
+
if (typeof process !== "undefined" && process.versions?.node) {
|
|
1444
|
+
const fs = await import("fs/promises");
|
|
1445
|
+
const buffer = await fs.readFile(input.pathname);
|
|
1446
|
+
return new Uint8Array(buffer);
|
|
1447
|
+
}
|
|
1448
|
+
throw new Error("File URLs not supported in browser environment");
|
|
1449
|
+
}
|
|
1450
|
+
const response = await fetch(input.toString());
|
|
1451
|
+
if (!response.ok) {
|
|
1452
|
+
throw new Error(`Failed to fetch PDF: ${response.status} ${response.statusText}`);
|
|
1453
|
+
}
|
|
1454
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
1455
|
+
return new Uint8Array(arrayBuffer);
|
|
1456
|
+
}
|
|
1457
|
+
if (isBytes(input)) {
|
|
1458
|
+
return input;
|
|
1459
|
+
}
|
|
1460
|
+
if (typeof Buffer !== "undefined") {
|
|
1461
|
+
return new Uint8Array(Buffer.from(input, "base64"));
|
|
1462
|
+
}
|
|
1463
|
+
return Uint8Array.from(atob(input), (c) => c.charCodeAt(0));
|
|
1464
|
+
}
|
|
1465
|
+
async function pdfInputToBase64(input) {
|
|
1466
|
+
if (isFileIdRef(input)) {
|
|
1467
|
+
throw new Error(
|
|
1468
|
+
"Cannot convert fileId reference to base64. Pass the fileId directly to your provider callback instead."
|
|
1469
|
+
);
|
|
1470
|
+
}
|
|
1471
|
+
if (isUrl(input)) {
|
|
1472
|
+
const bytes = await pdfInputToBytes(input);
|
|
1473
|
+
return bytesToBase64(bytes);
|
|
1474
|
+
}
|
|
1475
|
+
if (isBytes(input)) {
|
|
1476
|
+
return bytesToBase64(input);
|
|
1477
|
+
}
|
|
1478
|
+
return input;
|
|
1479
|
+
}
|
|
1480
|
+
function bytesToBase64(bytes) {
|
|
1481
|
+
if (typeof Buffer !== "undefined") {
|
|
1482
|
+
return Buffer.from(bytes).toString("base64");
|
|
1483
|
+
}
|
|
1484
|
+
let binary = "";
|
|
1485
|
+
for (let i = 0; i < bytes.length; i++) {
|
|
1486
|
+
binary += String.fromCharCode(bytes[i]);
|
|
1487
|
+
}
|
|
1488
|
+
return btoa(binary);
|
|
1489
|
+
}
|
|
1490
|
+
function isFileReference(input) {
|
|
1491
|
+
return isFileIdRef(input) || isUrl(input);
|
|
1492
|
+
}
|
|
1493
|
+
function getFileIdentifier(input) {
|
|
1494
|
+
if (isFileIdRef(input)) {
|
|
1495
|
+
return { fileId: input.fileId };
|
|
1496
|
+
}
|
|
1497
|
+
if (isUrl(input)) {
|
|
1498
|
+
return { url: input.toString() };
|
|
1499
|
+
}
|
|
1500
|
+
return void 0;
|
|
1501
|
+
}
|
|
1502
|
+
async function getPdfPageCount(input) {
|
|
1503
|
+
const bytes = await pdfInputToBytes(input);
|
|
1504
|
+
const doc = await PDFDocument.load(bytes, { ignoreEncryption: true });
|
|
1505
|
+
return doc.getPageCount();
|
|
1506
|
+
}
|
|
1507
|
+
async function extractPageRange(input, startPage, endPage) {
|
|
1508
|
+
if (isFileIdRef(input)) {
|
|
1509
|
+
throw new Error(
|
|
1510
|
+
"Cannot extract page range from fileId reference. The provider must handle fileId inputs directly or you must pass the full PDF as base64/bytes."
|
|
1511
|
+
);
|
|
1512
|
+
}
|
|
1513
|
+
if (isUrl(input) && (input.protocol === "http:" || input.protocol === "https:")) {
|
|
1514
|
+
throw new Error(
|
|
1515
|
+
"Cannot extract page range from remote URL. Either pass the full PDF as base64/bytes, or download it first."
|
|
1516
|
+
);
|
|
1517
|
+
}
|
|
1518
|
+
const srcBytes = await pdfInputToBytes(input);
|
|
1428
1519
|
const srcDoc = await PDFDocument.load(srcBytes, { ignoreEncryption: true });
|
|
1429
1520
|
const totalPages = srcDoc.getPageCount();
|
|
1430
1521
|
const start = Math.max(startPage - 1, 0);
|
|
1431
1522
|
const end = Math.min(endPage, totalPages) - 1;
|
|
1432
1523
|
if (start === 0 && end >= totalPages - 1) {
|
|
1433
|
-
|
|
1524
|
+
if (isBytes(input)) {
|
|
1525
|
+
return bytesToBase64(input);
|
|
1526
|
+
}
|
|
1527
|
+
if (typeof input === "string") {
|
|
1528
|
+
return input;
|
|
1529
|
+
}
|
|
1530
|
+
return bytesToBase64(srcBytes);
|
|
1434
1531
|
}
|
|
1435
1532
|
const newDoc = await PDFDocument.create();
|
|
1436
1533
|
const indices = Array.from({ length: end - start + 1 }, (_, i) => start + i);
|
|
1437
1534
|
const pages = await newDoc.copyPages(srcDoc, indices);
|
|
1438
1535
|
pages.forEach((page) => newDoc.addPage(page));
|
|
1439
1536
|
const bytes = await newDoc.save();
|
|
1440
|
-
|
|
1441
|
-
|
|
1537
|
+
return bytesToBase64(new Uint8Array(bytes));
|
|
1538
|
+
}
|
|
1539
|
+
async function buildPdfProviderOptions(input, existingOptions) {
|
|
1540
|
+
const options = { ...existingOptions };
|
|
1541
|
+
if (isFileIdRef(input)) {
|
|
1542
|
+
options.fileId = input.fileId;
|
|
1543
|
+
if (input.mimeType) {
|
|
1544
|
+
options.fileMimeType = input.mimeType;
|
|
1545
|
+
}
|
|
1546
|
+
return options;
|
|
1442
1547
|
}
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
binary += String.fromCharCode(uint8[i]);
|
|
1548
|
+
if (isUrl(input)) {
|
|
1549
|
+
options.pdfUrl = input;
|
|
1550
|
+
return options;
|
|
1447
1551
|
}
|
|
1448
|
-
|
|
1449
|
-
|
|
1450
|
-
async function getPdfPageCount(pdfBase64) {
|
|
1451
|
-
const srcBytes = typeof Buffer !== "undefined" ? Buffer.from(pdfBase64, "base64") : Uint8Array.from(atob(pdfBase64), (c) => c.charCodeAt(0));
|
|
1452
|
-
const doc = await PDFDocument.load(srcBytes, { ignoreEncryption: true });
|
|
1453
|
-
return doc.getPageCount();
|
|
1552
|
+
options.pdfBase64 = await pdfInputToBase64(input);
|
|
1553
|
+
return options;
|
|
1454
1554
|
}
|
|
1455
1555
|
function getAcroFormFields(pdfDoc) {
|
|
1456
1556
|
const form = pdfDoc.getForm();
|
|
@@ -1543,7 +1643,7 @@ async function runExtractor(params) {
|
|
|
1543
1643
|
name,
|
|
1544
1644
|
prompt,
|
|
1545
1645
|
schema,
|
|
1546
|
-
|
|
1646
|
+
pdfInput,
|
|
1547
1647
|
startPage,
|
|
1548
1648
|
endPage,
|
|
1549
1649
|
generateObject,
|
|
@@ -1553,6 +1653,7 @@ async function runExtractor(params) {
|
|
|
1553
1653
|
} = params;
|
|
1554
1654
|
const extractorProviderOptions = { ...providerOptions };
|
|
1555
1655
|
let fullPrompt;
|
|
1656
|
+
const pdfBase64 = await pdfInputToBase64(pdfInput);
|
|
1556
1657
|
if (convertPdfToImages) {
|
|
1557
1658
|
const images = await convertPdfToImages(pdfBase64, startPage, endPage);
|
|
1558
1659
|
extractorProviderOptions.images = images;
|
|
@@ -2232,10 +2333,13 @@ function chunkDocument(doc) {
|
|
|
2232
2333
|
};
|
|
2233
2334
|
const chunks = [];
|
|
2234
2335
|
const docId = doc.id;
|
|
2336
|
+
const policyTypesStr = doc.policyTypes?.length ? doc.policyTypes.join(",") : void 0;
|
|
2235
2337
|
function stringMetadata(entries) {
|
|
2236
|
-
|
|
2338
|
+
const base = Object.fromEntries(
|
|
2237
2339
|
Object.entries(entries).filter(([, value]) => value !== void 0 && value !== null && String(value).length > 0).map(([key, value]) => [key, String(value)])
|
|
2238
2340
|
);
|
|
2341
|
+
if (policyTypesStr) base.policyTypes = policyTypesStr;
|
|
2342
|
+
return base;
|
|
2239
2343
|
}
|
|
2240
2344
|
chunks.push({
|
|
2241
2345
|
id: `${docId}:carrier_info:0`,
|
|
@@ -2596,13 +2700,16 @@ ${exc.content}`.trim(),
|
|
|
2596
2700
|
}
|
|
2597
2701
|
}
|
|
2598
2702
|
if (declLines.length > 0) {
|
|
2703
|
+
const declMeta = { documentType: doc.type };
|
|
2704
|
+
if (typeof decl.formType === "string") declMeta.formType = decl.formType;
|
|
2705
|
+
if (typeof decl.line === "string") declMeta.declarationLine = decl.line;
|
|
2599
2706
|
chunks.push({
|
|
2600
2707
|
id: `${docId}:declaration:0`,
|
|
2601
2708
|
documentId: docId,
|
|
2602
2709
|
type: "declaration",
|
|
2603
2710
|
text: `Declarations
|
|
2604
2711
|
${declLines.join("\n")}`,
|
|
2605
|
-
metadata: stringMetadata(
|
|
2712
|
+
metadata: stringMetadata(declMeta)
|
|
2606
2713
|
});
|
|
2607
2714
|
}
|
|
2608
2715
|
}
|
|
@@ -4023,11 +4130,30 @@ COMMERCIAL LINES \u2014 match these values:
|
|
|
4023
4130
|
- "property" \u2014 standalone property
|
|
4024
4131
|
|
|
4025
4132
|
PERSONAL LINES \u2014 match these values:
|
|
4026
|
-
|
|
4027
|
-
|
|
4028
|
-
- "
|
|
4029
|
-
|
|
4030
|
-
|
|
4133
|
+
|
|
4134
|
+
HOMEOWNER FORM CLASSIFICATION \u2014 pay close attention to these distinctions:
|
|
4135
|
+
- "homeowners_ho3" \u2014 HO-3 Special Form. Standard homeowner policy for OWNER-OCCUPIED dwellings.
|
|
4136
|
+
Key indicators: Coverage A (Dwelling) present, open-peril dwelling coverage, named-peril personal property,
|
|
4137
|
+
references to "special form", "HO 00 03", or "HO-3". The insured OWNS the home.
|
|
4138
|
+
- "homeowners_ho5" \u2014 HO-5 Comprehensive Form. Premium homeowner policy for OWNER-OCCUPIED dwellings.
|
|
4139
|
+
Key indicators: Coverage A (Dwelling) present, BOTH dwelling AND personal property on open-peril basis,
|
|
4140
|
+
references to "comprehensive form", "HO 00 05", or "HO-5". Higher coverage than HO-3.
|
|
4141
|
+
- "renters_ho4" \u2014 HO-4 Contents Broad Form. Renters/tenants insurance \u2014 NO dwelling coverage.
|
|
4142
|
+
Key indicators: NO Coverage A (Dwelling), only Coverage C (Personal Property) and Coverage E/F (Liability/Medical),
|
|
4143
|
+
references to "contents broad form", "HO 00 04", "HO-4", "renters", "tenants". The insured RENTS, does not own.
|
|
4144
|
+
- "condo_ho6" \u2014 HO-6 Unit-Owners Form. Condo/co-op unit-owner insurance.
|
|
4145
|
+
Key indicators: Coverage A applies to interior walls/improvements only (not full structure),
|
|
4146
|
+
references to "unit-owners form", "HO 00 06", "HO-6", "condominium", "co-op unit". The building's
|
|
4147
|
+
master policy covers the structure; HO-6 covers the unit interior, personal property, and liability.
|
|
4148
|
+
|
|
4149
|
+
DISAMBIGUATION RULES for homeowner forms:
|
|
4150
|
+
1. If the document has Coverage A (Dwelling) with full structure coverage \u2192 HO-3 or HO-5 (check if open-peril on personal property \u2192 HO-5, named-peril \u2192 HO-3)
|
|
4151
|
+
2. If NO Coverage A / no dwelling coverage and the insured is a renter/tenant \u2192 renters_ho4
|
|
4152
|
+
3. If Coverage A covers only unit interior/improvements and mentions condo/co-op \u2192 condo_ho6
|
|
4153
|
+
4. Look for the actual form number (HO 00 03, HO 00 04, HO 00 05, HO 00 06) on the declarations page \u2014 this is the most reliable indicator
|
|
4154
|
+
5. Do NOT default to homeowners_ho3 when uncertain \u2014 check for the distinguishing signals above
|
|
4155
|
+
|
|
4156
|
+
- "dwelling_fire" \u2014 DP-1, DP-3, dwelling fire (non-owner-occupied or investment property)
|
|
4031
4157
|
- "mobile_home" \u2014 mobile home, manufactured home
|
|
4032
4158
|
- "personal_auto" \u2014 personal auto, PAP
|
|
4033
4159
|
- "personal_umbrella" \u2014 personal umbrella
|
|
@@ -4038,7 +4164,10 @@ PERSONAL LINES \u2014 match these values:
|
|
|
4038
4164
|
- "watercraft" \u2014 watercraft, boat
|
|
4039
4165
|
- "recreational_vehicle" \u2014 RV, recreational vehicle, ATV
|
|
4040
4166
|
- "farm_ranch" \u2014 farm, ranch
|
|
4041
|
-
- "pet" \u2014 pet insurance
|
|
4167
|
+
- "pet" \u2014 standalone pet insurance policy. Key indicators: named pet, species/breed, accident/illness coverage,
|
|
4168
|
+
wellness plans, per-incident or annual limits for veterinary costs. Do NOT confuse with pet liability endorsements
|
|
4169
|
+
on a homeowners policy \u2014 those are still homeowner policies (ho3/ho4/ho5/ho6), not "pet".
|
|
4170
|
+
Only classify as "pet" when the ENTIRE policy is dedicated to pet health/accident coverage.
|
|
4042
4171
|
- "travel" \u2014 travel insurance
|
|
4043
4172
|
- "identity_theft" \u2014 identity theft
|
|
4044
4173
|
- "title" \u2014 title insurance
|
|
@@ -4894,6 +5023,338 @@ function getExtractor(name) {
|
|
|
4894
5023
|
return EXTRACTORS[name];
|
|
4895
5024
|
}
|
|
4896
5025
|
|
|
5026
|
+
// src/extraction/resolve-referential.ts
|
|
5027
|
+
import { z as z35 } from "zod";
|
|
5028
|
+
|
|
5029
|
+
// src/prompts/extractors/referential-lookup.ts
|
|
5030
|
+
import { z as z34 } from "zod";
|
|
5031
|
+
var ReferentialLookupSchema = z34.object({
|
|
5032
|
+
resolvedCoverages: z34.array(
|
|
5033
|
+
z34.object({
|
|
5034
|
+
coverageName: z34.string().describe("The coverage name that was referenced"),
|
|
5035
|
+
resolvedLimit: z34.string().optional().describe("The concrete limit value found, if any"),
|
|
5036
|
+
resolvedLimitValueType: CoverageValueTypeSchema.optional(),
|
|
5037
|
+
resolvedDeductible: z34.string().optional().describe("The concrete deductible value found, if any"),
|
|
5038
|
+
resolvedDeductibleValueType: CoverageValueTypeSchema.optional(),
|
|
5039
|
+
pageNumber: z34.number().optional().describe("Page where the resolved value was found"),
|
|
5040
|
+
originalContent: z34.string().optional().describe("Verbatim source text for the resolved value"),
|
|
5041
|
+
confidence: z34.enum(["high", "medium", "low"]).describe("Confidence in the resolution")
|
|
5042
|
+
})
|
|
5043
|
+
)
|
|
5044
|
+
});
|
|
5045
|
+
function buildReferentialLookupPrompt(coverages) {
|
|
5046
|
+
const coverageList = coverages.map((c, i) => {
|
|
5047
|
+
const parts = [` ${i + 1}. Coverage: "${c.name}" \u2014 Limit: "${c.limit}"`];
|
|
5048
|
+
if (c.deductible) {
|
|
5049
|
+
parts.push(` Deductible: "${c.deductible}"`);
|
|
5050
|
+
}
|
|
5051
|
+
if (c.sectionRef) {
|
|
5052
|
+
parts.push(` Referenced section: "${c.sectionRef}"`);
|
|
5053
|
+
}
|
|
5054
|
+
return parts.join("\n");
|
|
5055
|
+
}).join("\n");
|
|
5056
|
+
return `You are an expert insurance document analyst. You are looking at a specific section of an insurance document to resolve referential coverage limits.
|
|
5057
|
+
|
|
5058
|
+
The following coverages had referential limits or deductibles (e.g. "As stated in Policy", "As stated in Section 4 of Policy", "See Declarations") instead of concrete values:
|
|
5059
|
+
|
|
5060
|
+
${coverageList}
|
|
5061
|
+
|
|
5062
|
+
Your task:
|
|
5063
|
+
- Find the concrete/actual limit and deductible values for each coverage listed above.
|
|
5064
|
+
- Search the declarations page, coverage schedules, and any referenced sections for the real numeric or defined values.
|
|
5065
|
+
- Only return values you can actually find in the document \u2014 do not guess or infer values that are not explicitly stated.
|
|
5066
|
+
- For each resolved coverage, include:
|
|
5067
|
+
- pageNumber: the page where the resolved value appears
|
|
5068
|
+
- originalContent: the verbatim text snippet containing the resolved value
|
|
5069
|
+
- confidence: "high" if the value is clearly and unambiguously stated, "medium" if it requires interpretation, "low" if uncertain
|
|
5070
|
+
- If a coverage cannot be resolved (no concrete value found), still include it with confidence "low" and omit the resolved fields.
|
|
5071
|
+
- Classify resolvedLimitValueType and resolvedDeductibleValueType as numeric, included, not_included, as_stated, waiting_period, referential, or other.
|
|
5072
|
+
|
|
5073
|
+
Return JSON only.`;
|
|
5074
|
+
}
|
|
5075
|
+
|
|
5076
|
+
// src/extraction/resolve-referential.ts
|
|
5077
|
+
function looksReferential(value) {
|
|
5078
|
+
if (typeof value !== "string") return false;
|
|
5079
|
+
const normalized = value.toLowerCase();
|
|
5080
|
+
return normalized.includes("shown in the declarations") || normalized.includes("shown in declarations") || normalized.includes("shown in the schedule") || normalized.includes("as stated") || normalized.includes("if applicable");
|
|
5081
|
+
}
|
|
5082
|
+
function parseReferenceTarget(text) {
|
|
5083
|
+
if (typeof text !== "string") return void 0;
|
|
5084
|
+
const normalized = text.trim();
|
|
5085
|
+
if (!normalized) return void 0;
|
|
5086
|
+
const sectionMatch = normalized.match(/\b(Section\s+\d+[A-Za-z]?)/i);
|
|
5087
|
+
if (sectionMatch) return sectionMatch[1];
|
|
5088
|
+
if (/declarations/i.test(normalized)) return "Declarations";
|
|
5089
|
+
const scheduleMatch = normalized.match(/\b(Schedule(?:\s+of\s+[A-Za-z ]+)?)/i);
|
|
5090
|
+
if (scheduleMatch) return scheduleMatch[1].trim();
|
|
5091
|
+
const asStatedMatch = normalized.match(/(?:as\s+stated\s+in|see|shown\s+in(?:\s+the)?)\s+(.+)/i);
|
|
5092
|
+
if (asStatedMatch) {
|
|
5093
|
+
let target = asStatedMatch[1].trim().replace(/\s+of\s+the\s+policy$/i, "").trim();
|
|
5094
|
+
target = target.replace(/\.+$/, "").trim();
|
|
5095
|
+
if (target) return target;
|
|
5096
|
+
}
|
|
5097
|
+
if (/if applicable/i.test(normalized)) return void 0;
|
|
5098
|
+
return void 0;
|
|
5099
|
+
}
|
|
5100
|
+
var PageLocationSchema = z35.object({
|
|
5101
|
+
startPage: z35.number(),
|
|
5102
|
+
endPage: z35.number()
|
|
5103
|
+
});
|
|
5104
|
+
async function findReferencedPages(params) {
|
|
5105
|
+
const {
|
|
5106
|
+
referenceTarget,
|
|
5107
|
+
sections,
|
|
5108
|
+
formInventory,
|
|
5109
|
+
pdfInput,
|
|
5110
|
+
pageCount,
|
|
5111
|
+
generateObject,
|
|
5112
|
+
providerOptions,
|
|
5113
|
+
log
|
|
5114
|
+
} = params;
|
|
5115
|
+
const targetLower = referenceTarget.toLowerCase();
|
|
5116
|
+
for (const section of sections) {
|
|
5117
|
+
if (section.title && section.pageStart != null && section.title.toLowerCase().includes(targetLower)) {
|
|
5118
|
+
return {
|
|
5119
|
+
startPage: section.pageStart,
|
|
5120
|
+
endPage: section.pageEnd ?? section.pageStart
|
|
5121
|
+
};
|
|
5122
|
+
}
|
|
5123
|
+
}
|
|
5124
|
+
for (const form of formInventory) {
|
|
5125
|
+
const titleMatch = form.title && form.title.toLowerCase().includes(targetLower);
|
|
5126
|
+
const typeMatch = form.formType && form.formType.toLowerCase().includes(targetLower);
|
|
5127
|
+
if ((titleMatch || typeMatch) && form.pageStart != null) {
|
|
5128
|
+
return {
|
|
5129
|
+
startPage: form.pageStart,
|
|
5130
|
+
endPage: form.pageEnd ?? form.pageStart
|
|
5131
|
+
};
|
|
5132
|
+
}
|
|
5133
|
+
}
|
|
5134
|
+
try {
|
|
5135
|
+
const result = await safeGenerateObject(
|
|
5136
|
+
generateObject,
|
|
5137
|
+
{
|
|
5138
|
+
prompt: `You are analyzing an insurance document (${pageCount} pages total).
|
|
5139
|
+
|
|
5140
|
+
Find the pages that contain the section or area referenced as "${referenceTarget}".
|
|
5141
|
+
|
|
5142
|
+
Return the page range (1-indexed) where this section is located. If the section spans a single page, startPage and endPage should be the same.
|
|
5143
|
+
|
|
5144
|
+
If you cannot find the section, return startPage: 0 and endPage: 0.
|
|
5145
|
+
|
|
5146
|
+
Return JSON only.`,
|
|
5147
|
+
schema: PageLocationSchema,
|
|
5148
|
+
maxTokens: 256,
|
|
5149
|
+
providerOptions: await buildPdfProviderOptions(pdfInput, providerOptions)
|
|
5150
|
+
},
|
|
5151
|
+
{
|
|
5152
|
+
fallback: { startPage: 0, endPage: 0 },
|
|
5153
|
+
maxRetries: 1,
|
|
5154
|
+
log,
|
|
5155
|
+
onError: (err, attempt) => log?.(
|
|
5156
|
+
`Page location attempt ${attempt + 1} failed for "${referenceTarget}": ${err instanceof Error ? err.message : String(err)}`
|
|
5157
|
+
)
|
|
5158
|
+
}
|
|
5159
|
+
);
|
|
5160
|
+
if (result.object.startPage > 0 && result.object.endPage > 0) {
|
|
5161
|
+
return {
|
|
5162
|
+
startPage: result.object.startPage,
|
|
5163
|
+
endPage: result.object.endPage
|
|
5164
|
+
};
|
|
5165
|
+
}
|
|
5166
|
+
} catch (error) {
|
|
5167
|
+
await log?.(
|
|
5168
|
+
`Failed to locate pages for "${referenceTarget}": ${error instanceof Error ? error.message : String(error)}`
|
|
5169
|
+
);
|
|
5170
|
+
}
|
|
5171
|
+
return void 0;
|
|
5172
|
+
}
|
|
5173
|
+
async function resolveReferentialCoverages(params) {
|
|
5174
|
+
const {
|
|
5175
|
+
memory,
|
|
5176
|
+
pdfInput,
|
|
5177
|
+
pageCount,
|
|
5178
|
+
generateObject,
|
|
5179
|
+
convertPdfToImages,
|
|
5180
|
+
concurrency = 2,
|
|
5181
|
+
providerOptions,
|
|
5182
|
+
log,
|
|
5183
|
+
onProgress
|
|
5184
|
+
} = params;
|
|
5185
|
+
const limit = pLimit(concurrency);
|
|
5186
|
+
let totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
5187
|
+
function trackUsage(usage) {
|
|
5188
|
+
if (usage) {
|
|
5189
|
+
totalUsage.inputTokens += usage.inputTokens;
|
|
5190
|
+
totalUsage.outputTokens += usage.outputTokens;
|
|
5191
|
+
}
|
|
5192
|
+
}
|
|
5193
|
+
const coverageData = memory.get("coverage_limits");
|
|
5194
|
+
const coverages = coverageData?.coverages ?? [];
|
|
5195
|
+
const referentialCoverages = coverages.filter((cov) => {
|
|
5196
|
+
const limitType = cov.limitValueType;
|
|
5197
|
+
const deductibleType = cov.deductibleValueType;
|
|
5198
|
+
return limitType === "referential" || limitType === "as_stated" || deductibleType === "referential" || deductibleType === "as_stated" || looksReferential(cov.limit) || looksReferential(cov.deductible);
|
|
5199
|
+
});
|
|
5200
|
+
const attempts = referentialCoverages.length;
|
|
5201
|
+
if (attempts === 0) {
|
|
5202
|
+
return {
|
|
5203
|
+
resolved: 0,
|
|
5204
|
+
unresolved: 0,
|
|
5205
|
+
attempts: 0,
|
|
5206
|
+
usage: totalUsage,
|
|
5207
|
+
details: []
|
|
5208
|
+
};
|
|
5209
|
+
}
|
|
5210
|
+
onProgress?.(
|
|
5211
|
+
`Found ${attempts} referential coverage(s) to resolve...`
|
|
5212
|
+
);
|
|
5213
|
+
const targetGroups = /* @__PURE__ */ new Map();
|
|
5214
|
+
for (let i = 0; i < referentialCoverages.length; i++) {
|
|
5215
|
+
const cov = referentialCoverages[i];
|
|
5216
|
+
const refString = (looksReferential(cov.limit) ? cov.limit : void 0) ?? (looksReferential(cov.deductible) ? cov.deductible : void 0) ?? cov.limit ?? "";
|
|
5217
|
+
const target = parseReferenceTarget(refString) ?? "unknown";
|
|
5218
|
+
const group = targetGroups.get(target) ?? [];
|
|
5219
|
+
group.push({ coverage: cov, index: i });
|
|
5220
|
+
targetGroups.set(target, group);
|
|
5221
|
+
}
|
|
5222
|
+
const sectionsData = memory.get("sections");
|
|
5223
|
+
const sections = sectionsData?.sections ?? [];
|
|
5224
|
+
const formInventoryData = memory.get("form_inventory");
|
|
5225
|
+
const formInventory = formInventoryData?.forms ?? [];
|
|
5226
|
+
const details = [];
|
|
5227
|
+
let resolved = 0;
|
|
5228
|
+
let unresolved = 0;
|
|
5229
|
+
const targetEntries = Array.from(targetGroups.entries());
|
|
5230
|
+
await Promise.all(
|
|
5231
|
+
targetEntries.map(
|
|
5232
|
+
([target, group]) => limit(async () => {
|
|
5233
|
+
const pageRange = await findReferencedPages({
|
|
5234
|
+
referenceTarget: target,
|
|
5235
|
+
sections,
|
|
5236
|
+
formInventory,
|
|
5237
|
+
pdfInput,
|
|
5238
|
+
pageCount,
|
|
5239
|
+
generateObject,
|
|
5240
|
+
providerOptions,
|
|
5241
|
+
log
|
|
5242
|
+
});
|
|
5243
|
+
if (!pageRange) {
|
|
5244
|
+
await log?.(
|
|
5245
|
+
`Could not locate pages for reference target "${target}"`
|
|
5246
|
+
);
|
|
5247
|
+
for (const { coverage } of group) {
|
|
5248
|
+
details.push({
|
|
5249
|
+
coverageName: String(coverage.name ?? "unknown"),
|
|
5250
|
+
referenceTarget: target === "unknown" ? void 0 : target,
|
|
5251
|
+
status: "pages_not_found"
|
|
5252
|
+
});
|
|
5253
|
+
unresolved++;
|
|
5254
|
+
}
|
|
5255
|
+
return;
|
|
5256
|
+
}
|
|
5257
|
+
onProgress?.(
|
|
5258
|
+
`Resolving "${target}" from pages ${pageRange.startPage}-${pageRange.endPage}...`
|
|
5259
|
+
);
|
|
5260
|
+
const promptCoverages = group.map(({ coverage }) => ({
|
|
5261
|
+
name: String(coverage.name ?? "unknown"),
|
|
5262
|
+
limit: String(coverage.limit ?? ""),
|
|
5263
|
+
deductible: coverage.deductible ? String(coverage.deductible) : void 0,
|
|
5264
|
+
sectionRef: coverage.sectionRef ? String(coverage.sectionRef) : void 0
|
|
5265
|
+
}));
|
|
5266
|
+
try {
|
|
5267
|
+
const result = await runExtractor({
|
|
5268
|
+
name: "referential_lookup",
|
|
5269
|
+
prompt: buildReferentialLookupPrompt(promptCoverages),
|
|
5270
|
+
schema: ReferentialLookupSchema,
|
|
5271
|
+
pdfInput,
|
|
5272
|
+
startPage: pageRange.startPage,
|
|
5273
|
+
endPage: pageRange.endPage,
|
|
5274
|
+
generateObject,
|
|
5275
|
+
convertPdfToImages,
|
|
5276
|
+
maxTokens: 4096,
|
|
5277
|
+
providerOptions
|
|
5278
|
+
});
|
|
5279
|
+
trackUsage(result.usage);
|
|
5280
|
+
const resolvedMap = /* @__PURE__ */ new Map();
|
|
5281
|
+
for (const rc of result.data.resolvedCoverages) {
|
|
5282
|
+
resolvedMap.set(rc.coverageName.toLowerCase(), rc);
|
|
5283
|
+
}
|
|
5284
|
+
for (const { coverage } of group) {
|
|
5285
|
+
const covName = String(coverage.name ?? "unknown");
|
|
5286
|
+
const rc = resolvedMap.get(covName.toLowerCase());
|
|
5287
|
+
if (!rc) {
|
|
5288
|
+
details.push({
|
|
5289
|
+
coverageName: covName,
|
|
5290
|
+
referenceTarget: target === "unknown" ? void 0 : target,
|
|
5291
|
+
status: "unresolved"
|
|
5292
|
+
});
|
|
5293
|
+
unresolved++;
|
|
5294
|
+
continue;
|
|
5295
|
+
}
|
|
5296
|
+
const limitResolved = rc.resolvedLimit && rc.resolvedLimitValueType !== "referential" && rc.resolvedLimitValueType !== "as_stated" && !looksReferential(rc.resolvedLimit);
|
|
5297
|
+
const deductibleResolved = rc.resolvedDeductible && rc.resolvedDeductibleValueType !== "referential" && rc.resolvedDeductibleValueType !== "as_stated" && !looksReferential(rc.resolvedDeductible);
|
|
5298
|
+
if (limitResolved || deductibleResolved) {
|
|
5299
|
+
if (limitResolved) {
|
|
5300
|
+
coverage.limit = rc.resolvedLimit;
|
|
5301
|
+
coverage.limitValueType = rc.resolvedLimitValueType ?? "numeric";
|
|
5302
|
+
}
|
|
5303
|
+
if (deductibleResolved) {
|
|
5304
|
+
coverage.deductible = rc.resolvedDeductible;
|
|
5305
|
+
coverage.deductibleValueType = rc.resolvedDeductibleValueType ?? "numeric";
|
|
5306
|
+
}
|
|
5307
|
+
if (rc.pageNumber != null) {
|
|
5308
|
+
coverage.resolvedFromPage = rc.pageNumber;
|
|
5309
|
+
}
|
|
5310
|
+
if (rc.originalContent) {
|
|
5311
|
+
coverage.resolvedOriginalContent = rc.originalContent;
|
|
5312
|
+
}
|
|
5313
|
+
details.push({
|
|
5314
|
+
coverageName: covName,
|
|
5315
|
+
referenceTarget: target === "unknown" ? void 0 : target,
|
|
5316
|
+
resolvedLimit: limitResolved ? rc.resolvedLimit : void 0,
|
|
5317
|
+
resolvedDeductible: deductibleResolved ? rc.resolvedDeductible : void 0,
|
|
5318
|
+
status: "resolved"
|
|
5319
|
+
});
|
|
5320
|
+
resolved++;
|
|
5321
|
+
} else {
|
|
5322
|
+
details.push({
|
|
5323
|
+
coverageName: covName,
|
|
5324
|
+
referenceTarget: target === "unknown" ? void 0 : target,
|
|
5325
|
+
status: "unresolved"
|
|
5326
|
+
});
|
|
5327
|
+
unresolved++;
|
|
5328
|
+
}
|
|
5329
|
+
}
|
|
5330
|
+
} catch (error) {
|
|
5331
|
+
await log?.(
|
|
5332
|
+
`Referential lookup extraction failed for target "${target}": ${error instanceof Error ? error.message : String(error)}`
|
|
5333
|
+
);
|
|
5334
|
+
for (const { coverage } of group) {
|
|
5335
|
+
details.push({
|
|
5336
|
+
coverageName: String(coverage.name ?? "unknown"),
|
|
5337
|
+
referenceTarget: target === "unknown" ? void 0 : target,
|
|
5338
|
+
status: "unresolved"
|
|
5339
|
+
});
|
|
5340
|
+
unresolved++;
|
|
5341
|
+
}
|
|
5342
|
+
}
|
|
5343
|
+
})
|
|
5344
|
+
)
|
|
5345
|
+
);
|
|
5346
|
+
onProgress?.(
|
|
5347
|
+
`Referential resolution complete: ${resolved} resolved, ${unresolved} unresolved out of ${attempts} attempts.`
|
|
5348
|
+
);
|
|
5349
|
+
return {
|
|
5350
|
+
resolved,
|
|
5351
|
+
unresolved,
|
|
5352
|
+
attempts,
|
|
5353
|
+
usage: totalUsage,
|
|
5354
|
+
details
|
|
5355
|
+
};
|
|
5356
|
+
}
|
|
5357
|
+
|
|
4897
5358
|
// src/core/quality.ts
|
|
4898
5359
|
function evaluateQualityGate(params) {
|
|
4899
5360
|
const { issues, hasRoundWarnings = false } = params;
|
|
@@ -4930,7 +5391,7 @@ function addFormEntry(inventory, formNumber, source, extra) {
|
|
|
4930
5391
|
sources: [source]
|
|
4931
5392
|
});
|
|
4932
5393
|
}
|
|
4933
|
-
function
|
|
5394
|
+
function looksReferential2(value) {
|
|
4934
5395
|
if (typeof value !== "string") return false;
|
|
4935
5396
|
const normalized = value.toLowerCase();
|
|
4936
5397
|
return normalized.includes("shown in the declarations") || normalized.includes("shown in declarations") || normalized.includes("shown in the schedule") || normalized.includes("as stated") || normalized.includes("if applicable");
|
|
@@ -5054,7 +5515,7 @@ function buildExtractionReviewReport(params) {
|
|
|
5054
5515
|
itemName: typeof coverage.name === "string" ? coverage.name : void 0
|
|
5055
5516
|
});
|
|
5056
5517
|
}
|
|
5057
|
-
if (
|
|
5518
|
+
if (looksReferential2(coverage.limit) || looksReferential2(coverage.deductible)) {
|
|
5058
5519
|
deterministicIssues.push({
|
|
5059
5520
|
code: "coverage_referential_value",
|
|
5060
5521
|
severity: "warning",
|
|
@@ -5176,7 +5637,8 @@ function buildExtractionReviewReport(params) {
|
|
|
5176
5637
|
}));
|
|
5177
5638
|
const artifacts = [
|
|
5178
5639
|
{ kind: "form_inventory", label: "Form Inventory", itemCount: formInventory.length },
|
|
5179
|
-
{ kind: "page_map", label: "Page Map", itemCount: params.pageAssignments.length }
|
|
5640
|
+
{ kind: "page_map", label: "Page Map", itemCount: params.pageAssignments.length },
|
|
5641
|
+
{ kind: "referential_resolution", label: "Referential Resolution", itemCount: coverages.filter((c) => c.limitValueType === "referential" || c.limitValueType === "as_stated" || c.deductibleValueType === "referential" || c.deductibleValueType === "as_stated").length }
|
|
5180
5642
|
];
|
|
5181
5643
|
const qualityGateStatus = evaluateQualityGate({
|
|
5182
5644
|
issues: deterministicIssues,
|
|
@@ -5428,7 +5890,7 @@ function createExtractor(config) {
|
|
|
5428
5890
|
}))
|
|
5429
5891
|
};
|
|
5430
5892
|
}
|
|
5431
|
-
async function extract(
|
|
5893
|
+
async function extract(pdfInput, documentId, options) {
|
|
5432
5894
|
const id = documentId ?? `doc-${Date.now()}`;
|
|
5433
5895
|
const memory = /* @__PURE__ */ new Map();
|
|
5434
5896
|
totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
@@ -5446,20 +5908,27 @@ function createExtractor(config) {
|
|
|
5446
5908
|
memory.set(k, v);
|
|
5447
5909
|
}
|
|
5448
5910
|
}
|
|
5911
|
+
let pdfBase64Cache;
|
|
5912
|
+
async function getPdfBase64ForExtraction() {
|
|
5913
|
+
if (pdfBase64Cache === void 0) {
|
|
5914
|
+
pdfBase64Cache = await pdfInputToBase64(pdfInput);
|
|
5915
|
+
}
|
|
5916
|
+
return pdfBase64Cache;
|
|
5917
|
+
}
|
|
5449
5918
|
let classifyResult;
|
|
5450
5919
|
if (resumed?.classifyResult && pipelineCtx.isPhaseComplete("classify")) {
|
|
5451
5920
|
classifyResult = resumed.classifyResult;
|
|
5452
5921
|
onProgress?.("Resuming from checkpoint (classify complete)...");
|
|
5453
5922
|
} else {
|
|
5454
5923
|
onProgress?.("Classifying document...");
|
|
5455
|
-
const pageCount2 = await getPdfPageCount(
|
|
5924
|
+
const pageCount2 = await getPdfPageCount(pdfInput);
|
|
5456
5925
|
const classifyResponse = await safeGenerateObject(
|
|
5457
5926
|
generateObject,
|
|
5458
5927
|
{
|
|
5459
5928
|
prompt: buildClassifyPrompt(),
|
|
5460
5929
|
schema: ClassifyResultSchema,
|
|
5461
5930
|
maxTokens: 512,
|
|
5462
|
-
providerOptions:
|
|
5931
|
+
providerOptions: await buildPdfProviderOptions(pdfInput, providerOptions)
|
|
5463
5932
|
},
|
|
5464
5933
|
{
|
|
5465
5934
|
fallback: { documentType: "policy", policyTypes: ["other"], confidence: 0 },
|
|
@@ -5484,7 +5953,7 @@ function createExtractor(config) {
|
|
|
5484
5953
|
const { documentType, policyTypes } = classifyResult;
|
|
5485
5954
|
const primaryType = policyTypes[0] ?? "other";
|
|
5486
5955
|
const template = getTemplate(primaryType);
|
|
5487
|
-
const pageCount = resumed?.pageCount ?? await getPdfPageCount(
|
|
5956
|
+
const pageCount = resumed?.pageCount ?? await getPdfPageCount(pdfInput);
|
|
5488
5957
|
const templateHints = buildTemplateHints(primaryType, documentType, pageCount, template);
|
|
5489
5958
|
let formInventory;
|
|
5490
5959
|
if (resumed?.formInventory && pipelineCtx.isPhaseComplete("form_inventory")) {
|
|
@@ -5499,7 +5968,7 @@ function createExtractor(config) {
|
|
|
5499
5968
|
prompt: buildFormInventoryPrompt(templateHints),
|
|
5500
5969
|
schema: FormInventorySchema,
|
|
5501
5970
|
maxTokens: 2048,
|
|
5502
|
-
providerOptions:
|
|
5971
|
+
providerOptions: await buildPdfProviderOptions(pdfInput, providerOptions)
|
|
5503
5972
|
},
|
|
5504
5973
|
{
|
|
5505
5974
|
fallback: { forms: [] },
|
|
@@ -5527,9 +5996,10 @@ function createExtractor(config) {
|
|
|
5527
5996
|
const chunkSize = 8;
|
|
5528
5997
|
const collectedAssignments = [];
|
|
5529
5998
|
const formInventoryHint = formInventory?.forms.length ? formatFormInventoryForPageMap(formInventory.forms) : void 0;
|
|
5999
|
+
const extractionBase64 = await getPdfBase64ForExtraction();
|
|
5530
6000
|
for (let startPage = 1; startPage <= pageCount; startPage += chunkSize) {
|
|
5531
6001
|
const endPage = Math.min(pageCount, startPage + chunkSize - 1);
|
|
5532
|
-
const pagesPdf = await extractPageRange(
|
|
6002
|
+
const pagesPdf = await extractPageRange(extractionBase64, startPage, endPage);
|
|
5533
6003
|
const mapResponse = await safeGenerateObject(
|
|
5534
6004
|
generateObject,
|
|
5535
6005
|
{
|
|
@@ -5609,7 +6079,7 @@ function createExtractor(config) {
|
|
|
5609
6079
|
name: task.extractorName,
|
|
5610
6080
|
prompt: ext.buildPrompt(),
|
|
5611
6081
|
schema: ext.schema,
|
|
5612
|
-
|
|
6082
|
+
pdfInput,
|
|
5613
6083
|
startPage: task.startPage,
|
|
5614
6084
|
endPage: task.endPage,
|
|
5615
6085
|
generateObject,
|
|
@@ -5639,7 +6109,7 @@ function createExtractor(config) {
|
|
|
5639
6109
|
name: "supplementary",
|
|
5640
6110
|
prompt: buildSupplementaryPrompt(alreadyExtractedSummary),
|
|
5641
6111
|
schema: SupplementarySchema,
|
|
5642
|
-
|
|
6112
|
+
pdfInput,
|
|
5643
6113
|
startPage: 1,
|
|
5644
6114
|
endPage: pageCount,
|
|
5645
6115
|
generateObject,
|
|
@@ -5663,6 +6133,37 @@ function createExtractor(config) {
|
|
|
5663
6133
|
memory: Object.fromEntries(memory)
|
|
5664
6134
|
});
|
|
5665
6135
|
}
|
|
6136
|
+
if (!pipelineCtx.isPhaseComplete("resolve_referential")) {
|
|
6137
|
+
onProgress?.("Resolving referential coverage limits...");
|
|
6138
|
+
try {
|
|
6139
|
+
const resolution = await resolveReferentialCoverages({
|
|
6140
|
+
memory,
|
|
6141
|
+
pdfInput,
|
|
6142
|
+
pageCount,
|
|
6143
|
+
generateObject,
|
|
6144
|
+
convertPdfToImages,
|
|
6145
|
+
concurrency,
|
|
6146
|
+
providerOptions,
|
|
6147
|
+
log,
|
|
6148
|
+
onProgress
|
|
6149
|
+
});
|
|
6150
|
+
trackUsage(resolution.usage);
|
|
6151
|
+
if (resolution.attempts > 0) {
|
|
6152
|
+
await log?.(`Referential resolution: ${resolution.resolved}/${resolution.attempts} resolved, ${resolution.unresolved} unresolved`);
|
|
6153
|
+
}
|
|
6154
|
+
} catch (error) {
|
|
6155
|
+
await log?.(`Referential resolution failed, continuing: ${error instanceof Error ? error.message : String(error)}`);
|
|
6156
|
+
}
|
|
6157
|
+
await pipelineCtx.save("resolve_referential", {
|
|
6158
|
+
id,
|
|
6159
|
+
pageCount,
|
|
6160
|
+
classifyResult,
|
|
6161
|
+
formInventory,
|
|
6162
|
+
pageAssignments,
|
|
6163
|
+
plan,
|
|
6164
|
+
memory: Object.fromEntries(memory)
|
|
6165
|
+
});
|
|
6166
|
+
}
|
|
5666
6167
|
let reviewRounds = resumed?.reviewReport?.reviewRoundRecords ?? [];
|
|
5667
6168
|
let reviewReport = resumed?.reviewReport;
|
|
5668
6169
|
if (!pipelineCtx.isPhaseComplete("review")) {
|
|
@@ -5677,7 +6178,7 @@ function createExtractor(config) {
|
|
|
5677
6178
|
prompt: buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary),
|
|
5678
6179
|
schema: ReviewResultSchema,
|
|
5679
6180
|
maxTokens: 1536,
|
|
5680
|
-
providerOptions:
|
|
6181
|
+
providerOptions: await buildPdfProviderOptions(pdfInput, providerOptions)
|
|
5681
6182
|
},
|
|
5682
6183
|
{
|
|
5683
6184
|
fallback: { complete: true, missingFields: [], qualityIssues: [], additionalTasks: [] },
|
|
@@ -5705,7 +6206,7 @@ function createExtractor(config) {
|
|
|
5705
6206
|
name: task.extractorName,
|
|
5706
6207
|
prompt: ext.buildPrompt(),
|
|
5707
6208
|
schema: ext.schema,
|
|
5708
|
-
|
|
6209
|
+
pdfInput,
|
|
5709
6210
|
startPage: task.startPage,
|
|
5710
6211
|
endPage: task.endPage,
|
|
5711
6212
|
generateObject,
|
|
@@ -6038,8 +6539,8 @@ Respond with JSON only:
|
|
|
6038
6539
|
}`;
|
|
6039
6540
|
|
|
6040
6541
|
// src/schemas/application.ts
|
|
6041
|
-
import { z as
|
|
6042
|
-
var FieldTypeSchema =
|
|
6542
|
+
import { z as z36 } from "zod";
|
|
6543
|
+
var FieldTypeSchema = z36.enum([
|
|
6043
6544
|
"text",
|
|
6044
6545
|
"numeric",
|
|
6045
6546
|
"currency",
|
|
@@ -6048,131 +6549,131 @@ var FieldTypeSchema = z34.enum([
|
|
|
6048
6549
|
"table",
|
|
6049
6550
|
"declaration"
|
|
6050
6551
|
]);
|
|
6051
|
-
var ApplicationFieldSchema =
|
|
6052
|
-
id:
|
|
6053
|
-
label:
|
|
6054
|
-
section:
|
|
6552
|
+
var ApplicationFieldSchema = z36.object({
|
|
6553
|
+
id: z36.string(),
|
|
6554
|
+
label: z36.string(),
|
|
6555
|
+
section: z36.string(),
|
|
6055
6556
|
fieldType: FieldTypeSchema,
|
|
6056
|
-
required:
|
|
6057
|
-
options:
|
|
6058
|
-
columns:
|
|
6059
|
-
requiresExplanationIfYes:
|
|
6060
|
-
condition:
|
|
6061
|
-
dependsOn:
|
|
6062
|
-
whenValue:
|
|
6557
|
+
required: z36.boolean(),
|
|
6558
|
+
options: z36.array(z36.string()).optional(),
|
|
6559
|
+
columns: z36.array(z36.string()).optional(),
|
|
6560
|
+
requiresExplanationIfYes: z36.boolean().optional(),
|
|
6561
|
+
condition: z36.object({
|
|
6562
|
+
dependsOn: z36.string(),
|
|
6563
|
+
whenValue: z36.string()
|
|
6063
6564
|
}).optional(),
|
|
6064
|
-
value:
|
|
6065
|
-
source:
|
|
6066
|
-
confidence:
|
|
6565
|
+
value: z36.string().optional(),
|
|
6566
|
+
source: z36.string().optional().describe("Where the value came from: auto-fill, user, lookup"),
|
|
6567
|
+
confidence: z36.enum(["confirmed", "high", "medium", "low"]).optional()
|
|
6067
6568
|
});
|
|
6068
|
-
var ApplicationClassifyResultSchema =
|
|
6069
|
-
isApplication:
|
|
6070
|
-
confidence:
|
|
6071
|
-
applicationType:
|
|
6569
|
+
var ApplicationClassifyResultSchema = z36.object({
|
|
6570
|
+
isApplication: z36.boolean(),
|
|
6571
|
+
confidence: z36.number().min(0).max(1),
|
|
6572
|
+
applicationType: z36.string().nullable()
|
|
6072
6573
|
});
|
|
6073
|
-
var FieldExtractionResultSchema =
|
|
6074
|
-
fields:
|
|
6574
|
+
var FieldExtractionResultSchema = z36.object({
|
|
6575
|
+
fields: z36.array(ApplicationFieldSchema)
|
|
6075
6576
|
});
|
|
6076
|
-
var AutoFillMatchSchema =
|
|
6077
|
-
fieldId:
|
|
6078
|
-
value:
|
|
6079
|
-
confidence:
|
|
6080
|
-
contextKey:
|
|
6577
|
+
var AutoFillMatchSchema = z36.object({
|
|
6578
|
+
fieldId: z36.string(),
|
|
6579
|
+
value: z36.string(),
|
|
6580
|
+
confidence: z36.enum(["confirmed"]),
|
|
6581
|
+
contextKey: z36.string()
|
|
6081
6582
|
});
|
|
6082
|
-
var AutoFillResultSchema =
|
|
6083
|
-
matches:
|
|
6583
|
+
var AutoFillResultSchema = z36.object({
|
|
6584
|
+
matches: z36.array(AutoFillMatchSchema)
|
|
6084
6585
|
});
|
|
6085
|
-
var QuestionBatchResultSchema =
|
|
6086
|
-
batches:
|
|
6586
|
+
var QuestionBatchResultSchema = z36.object({
|
|
6587
|
+
batches: z36.array(z36.array(z36.string()).describe("Array of field IDs in this batch"))
|
|
6087
6588
|
});
|
|
6088
|
-
var LookupRequestSchema =
|
|
6089
|
-
type:
|
|
6090
|
-
description:
|
|
6091
|
-
url:
|
|
6092
|
-
targetFieldIds:
|
|
6589
|
+
var LookupRequestSchema = z36.object({
|
|
6590
|
+
type: z36.string().describe("Type of lookup: 'records', 'website', 'policy'"),
|
|
6591
|
+
description: z36.string(),
|
|
6592
|
+
url: z36.string().optional(),
|
|
6593
|
+
targetFieldIds: z36.array(z36.string())
|
|
6093
6594
|
});
|
|
6094
|
-
var ReplyIntentSchema =
|
|
6095
|
-
primaryIntent:
|
|
6096
|
-
hasAnswers:
|
|
6097
|
-
questionText:
|
|
6098
|
-
questionFieldIds:
|
|
6099
|
-
lookupRequests:
|
|
6595
|
+
var ReplyIntentSchema = z36.object({
|
|
6596
|
+
primaryIntent: z36.enum(["answers_only", "question", "lookup_request", "mixed"]),
|
|
6597
|
+
hasAnswers: z36.boolean(),
|
|
6598
|
+
questionText: z36.string().optional(),
|
|
6599
|
+
questionFieldIds: z36.array(z36.string()).optional(),
|
|
6600
|
+
lookupRequests: z36.array(LookupRequestSchema).optional()
|
|
6100
6601
|
});
|
|
6101
|
-
var ParsedAnswerSchema =
|
|
6102
|
-
fieldId:
|
|
6103
|
-
value:
|
|
6104
|
-
explanation:
|
|
6602
|
+
var ParsedAnswerSchema = z36.object({
|
|
6603
|
+
fieldId: z36.string(),
|
|
6604
|
+
value: z36.string(),
|
|
6605
|
+
explanation: z36.string().optional()
|
|
6105
6606
|
});
|
|
6106
|
-
var AnswerParsingResultSchema =
|
|
6107
|
-
answers:
|
|
6108
|
-
unanswered:
|
|
6607
|
+
var AnswerParsingResultSchema = z36.object({
|
|
6608
|
+
answers: z36.array(ParsedAnswerSchema),
|
|
6609
|
+
unanswered: z36.array(z36.string()).describe("Field IDs that were not answered")
|
|
6109
6610
|
});
|
|
6110
|
-
var LookupFillSchema =
|
|
6111
|
-
fieldId:
|
|
6112
|
-
value:
|
|
6113
|
-
source:
|
|
6611
|
+
var LookupFillSchema = z36.object({
|
|
6612
|
+
fieldId: z36.string(),
|
|
6613
|
+
value: z36.string(),
|
|
6614
|
+
source: z36.string().describe("Specific citable reference, e.g. 'GL Policy #POL-12345 (Hartford)'")
|
|
6114
6615
|
});
|
|
6115
|
-
var LookupFillResultSchema =
|
|
6116
|
-
fills:
|
|
6117
|
-
unfillable:
|
|
6118
|
-
explanation:
|
|
6616
|
+
var LookupFillResultSchema = z36.object({
|
|
6617
|
+
fills: z36.array(LookupFillSchema),
|
|
6618
|
+
unfillable: z36.array(z36.string()),
|
|
6619
|
+
explanation: z36.string().optional()
|
|
6119
6620
|
});
|
|
6120
|
-
var FlatPdfPlacementSchema =
|
|
6121
|
-
fieldId:
|
|
6122
|
-
page:
|
|
6123
|
-
x:
|
|
6124
|
-
y:
|
|
6125
|
-
text:
|
|
6126
|
-
fontSize:
|
|
6127
|
-
isCheckmark:
|
|
6621
|
+
var FlatPdfPlacementSchema = z36.object({
|
|
6622
|
+
fieldId: z36.string(),
|
|
6623
|
+
page: z36.number(),
|
|
6624
|
+
x: z36.number().describe("Percentage from left edge (0-100)"),
|
|
6625
|
+
y: z36.number().describe("Percentage from top edge (0-100)"),
|
|
6626
|
+
text: z36.string(),
|
|
6627
|
+
fontSize: z36.number().optional(),
|
|
6628
|
+
isCheckmark: z36.boolean().optional()
|
|
6128
6629
|
});
|
|
6129
|
-
var AcroFormMappingSchema =
|
|
6130
|
-
fieldId:
|
|
6131
|
-
acroFormName:
|
|
6132
|
-
value:
|
|
6630
|
+
var AcroFormMappingSchema = z36.object({
|
|
6631
|
+
fieldId: z36.string(),
|
|
6632
|
+
acroFormName: z36.string(),
|
|
6633
|
+
value: z36.string()
|
|
6133
6634
|
});
|
|
6134
|
-
var QualityGateStatusSchema =
|
|
6135
|
-
var QualitySeveritySchema =
|
|
6136
|
-
var ApplicationQualityIssueSchema =
|
|
6137
|
-
code:
|
|
6635
|
+
var QualityGateStatusSchema = z36.enum(["passed", "warning", "failed"]);
|
|
6636
|
+
var QualitySeveritySchema = z36.enum(["info", "warning", "blocking"]);
|
|
6637
|
+
var ApplicationQualityIssueSchema = z36.object({
|
|
6638
|
+
code: z36.string(),
|
|
6138
6639
|
severity: QualitySeveritySchema,
|
|
6139
|
-
message:
|
|
6140
|
-
fieldId:
|
|
6640
|
+
message: z36.string(),
|
|
6641
|
+
fieldId: z36.string().optional()
|
|
6141
6642
|
});
|
|
6142
|
-
var ApplicationQualityRoundSchema =
|
|
6143
|
-
round:
|
|
6144
|
-
kind:
|
|
6643
|
+
var ApplicationQualityRoundSchema = z36.object({
|
|
6644
|
+
round: z36.number(),
|
|
6645
|
+
kind: z36.string(),
|
|
6145
6646
|
status: QualityGateStatusSchema,
|
|
6146
|
-
summary:
|
|
6647
|
+
summary: z36.string().optional()
|
|
6147
6648
|
});
|
|
6148
|
-
var ApplicationQualityArtifactSchema =
|
|
6149
|
-
kind:
|
|
6150
|
-
label:
|
|
6151
|
-
itemCount:
|
|
6649
|
+
var ApplicationQualityArtifactSchema = z36.object({
|
|
6650
|
+
kind: z36.string(),
|
|
6651
|
+
label: z36.string().optional(),
|
|
6652
|
+
itemCount: z36.number().optional()
|
|
6152
6653
|
});
|
|
6153
|
-
var ApplicationEmailReviewSchema =
|
|
6154
|
-
issues:
|
|
6654
|
+
var ApplicationEmailReviewSchema = z36.object({
|
|
6655
|
+
issues: z36.array(ApplicationQualityIssueSchema),
|
|
6155
6656
|
qualityGateStatus: QualityGateStatusSchema
|
|
6156
6657
|
});
|
|
6157
|
-
var ApplicationQualityReportSchema =
|
|
6158
|
-
issues:
|
|
6159
|
-
rounds:
|
|
6160
|
-
artifacts:
|
|
6658
|
+
var ApplicationQualityReportSchema = z36.object({
|
|
6659
|
+
issues: z36.array(ApplicationQualityIssueSchema),
|
|
6660
|
+
rounds: z36.array(ApplicationQualityRoundSchema).optional(),
|
|
6661
|
+
artifacts: z36.array(ApplicationQualityArtifactSchema).optional(),
|
|
6161
6662
|
emailReview: ApplicationEmailReviewSchema.optional(),
|
|
6162
6663
|
qualityGateStatus: QualityGateStatusSchema
|
|
6163
6664
|
});
|
|
6164
|
-
var ApplicationStateSchema =
|
|
6165
|
-
id:
|
|
6166
|
-
pdfBase64:
|
|
6167
|
-
title:
|
|
6168
|
-
applicationType:
|
|
6169
|
-
fields:
|
|
6170
|
-
batches:
|
|
6171
|
-
currentBatchIndex:
|
|
6665
|
+
var ApplicationStateSchema = z36.object({
|
|
6666
|
+
id: z36.string(),
|
|
6667
|
+
pdfBase64: z36.string().optional().describe("Original PDF, omitted after extraction"),
|
|
6668
|
+
title: z36.string().optional(),
|
|
6669
|
+
applicationType: z36.string().nullable().optional(),
|
|
6670
|
+
fields: z36.array(ApplicationFieldSchema),
|
|
6671
|
+
batches: z36.array(z36.array(z36.string())).optional(),
|
|
6672
|
+
currentBatchIndex: z36.number().default(0),
|
|
6172
6673
|
qualityReport: ApplicationQualityReportSchema.optional(),
|
|
6173
|
-
status:
|
|
6174
|
-
createdAt:
|
|
6175
|
-
updatedAt:
|
|
6674
|
+
status: z36.enum(["classifying", "extracting", "auto_filling", "batching", "collecting", "confirming", "mapping", "complete"]),
|
|
6675
|
+
createdAt: z36.number(),
|
|
6676
|
+
updatedAt: z36.number()
|
|
6176
6677
|
});
|
|
6177
6678
|
|
|
6178
6679
|
// src/application/agents/classifier.ts
|
|
@@ -7309,90 +7810,91 @@ Respond with the final answer, deduplicated citations array, overall confidence
|
|
|
7309
7810
|
}
|
|
7310
7811
|
|
|
7311
7812
|
// src/schemas/query.ts
|
|
7312
|
-
import { z as
|
|
7313
|
-
var QueryIntentSchema =
|
|
7813
|
+
import { z as z37 } from "zod";
|
|
7814
|
+
var QueryIntentSchema = z37.enum([
|
|
7314
7815
|
"policy_question",
|
|
7315
7816
|
"coverage_comparison",
|
|
7316
7817
|
"document_search",
|
|
7317
7818
|
"claims_inquiry",
|
|
7318
7819
|
"general_knowledge"
|
|
7319
7820
|
]);
|
|
7320
|
-
var QueryAttachmentKindSchema =
|
|
7321
|
-
var QueryAttachmentSchema =
|
|
7322
|
-
id:
|
|
7821
|
+
var QueryAttachmentKindSchema = z37.enum(["image", "pdf", "text"]);
|
|
7822
|
+
var QueryAttachmentSchema = z37.object({
|
|
7823
|
+
id: z37.string().optional().describe("Optional stable attachment ID from the caller"),
|
|
7323
7824
|
kind: QueryAttachmentKindSchema,
|
|
7324
|
-
name:
|
|
7325
|
-
mimeType:
|
|
7326
|
-
base64:
|
|
7327
|
-
text:
|
|
7328
|
-
description:
|
|
7825
|
+
name: z37.string().optional().describe("Original filename or user-facing label"),
|
|
7826
|
+
mimeType: z37.string().optional().describe("MIME type such as image/jpeg or application/pdf"),
|
|
7827
|
+
base64: z37.string().optional().describe("Base64-encoded file content for image/pdf attachments"),
|
|
7828
|
+
text: z37.string().optional().describe("Plain-text attachment content when available"),
|
|
7829
|
+
description: z37.string().optional().describe("Caller-provided description of the attachment")
|
|
7329
7830
|
});
|
|
7330
|
-
var SubQuestionSchema =
|
|
7331
|
-
question:
|
|
7831
|
+
var SubQuestionSchema = z37.object({
|
|
7832
|
+
question: z37.string().describe("Atomic sub-question to retrieve and answer independently"),
|
|
7332
7833
|
intent: QueryIntentSchema,
|
|
7333
|
-
chunkTypes:
|
|
7334
|
-
documentFilters:
|
|
7335
|
-
type:
|
|
7336
|
-
carrier:
|
|
7337
|
-
insuredName:
|
|
7338
|
-
policyNumber:
|
|
7339
|
-
quoteNumber:
|
|
7834
|
+
chunkTypes: z37.array(z37.string()).optional().describe("Chunk types to filter retrieval (e.g. coverage, endorsement, declaration)"),
|
|
7835
|
+
documentFilters: z37.object({
|
|
7836
|
+
type: z37.enum(["policy", "quote"]).optional(),
|
|
7837
|
+
carrier: z37.string().optional(),
|
|
7838
|
+
insuredName: z37.string().optional(),
|
|
7839
|
+
policyNumber: z37.string().optional(),
|
|
7840
|
+
quoteNumber: z37.string().optional(),
|
|
7841
|
+
policyTypes: z37.array(PolicyTypeSchema).optional().describe("Filter by policy type (e.g. homeowners_ho3, renters_ho4, pet) to avoid mixing up similar policies")
|
|
7340
7842
|
}).optional().describe("Structured filters to narrow document lookup")
|
|
7341
7843
|
});
|
|
7342
|
-
var QueryClassifyResultSchema =
|
|
7844
|
+
var QueryClassifyResultSchema = z37.object({
|
|
7343
7845
|
intent: QueryIntentSchema,
|
|
7344
|
-
subQuestions:
|
|
7345
|
-
requiresDocumentLookup:
|
|
7346
|
-
requiresChunkSearch:
|
|
7347
|
-
requiresConversationHistory:
|
|
7846
|
+
subQuestions: z37.array(SubQuestionSchema).min(1).describe("Decomposed atomic sub-questions"),
|
|
7847
|
+
requiresDocumentLookup: z37.boolean().describe("Whether structured document lookup is needed"),
|
|
7848
|
+
requiresChunkSearch: z37.boolean().describe("Whether semantic chunk search is needed"),
|
|
7849
|
+
requiresConversationHistory: z37.boolean().describe("Whether conversation history is relevant")
|
|
7348
7850
|
});
|
|
7349
|
-
var EvidenceItemSchema =
|
|
7350
|
-
source:
|
|
7351
|
-
chunkId:
|
|
7352
|
-
documentId:
|
|
7353
|
-
turnId:
|
|
7354
|
-
attachmentId:
|
|
7355
|
-
text:
|
|
7356
|
-
relevance:
|
|
7357
|
-
metadata:
|
|
7851
|
+
var EvidenceItemSchema = z37.object({
|
|
7852
|
+
source: z37.enum(["chunk", "document", "conversation", "attachment"]),
|
|
7853
|
+
chunkId: z37.string().optional(),
|
|
7854
|
+
documentId: z37.string().optional(),
|
|
7855
|
+
turnId: z37.string().optional(),
|
|
7856
|
+
attachmentId: z37.string().optional(),
|
|
7857
|
+
text: z37.string().describe("Text excerpt from the source"),
|
|
7858
|
+
relevance: z37.number().min(0).max(1),
|
|
7859
|
+
metadata: z37.array(z37.object({ key: z37.string(), value: z37.string() })).optional()
|
|
7358
7860
|
});
|
|
7359
|
-
var AttachmentInterpretationSchema =
|
|
7360
|
-
summary:
|
|
7361
|
-
extractedFacts:
|
|
7362
|
-
recommendedFocus:
|
|
7363
|
-
confidence:
|
|
7861
|
+
var AttachmentInterpretationSchema = z37.object({
|
|
7862
|
+
summary: z37.string().describe("Concise summary of what the attachment shows or contains"),
|
|
7863
|
+
extractedFacts: z37.array(z37.string()).describe("Specific observable or document facts grounded in the attachment"),
|
|
7864
|
+
recommendedFocus: z37.array(z37.string()).describe("Important details to incorporate when answering follow-up questions"),
|
|
7865
|
+
confidence: z37.number().min(0).max(1)
|
|
7364
7866
|
});
|
|
7365
|
-
var RetrievalResultSchema =
|
|
7366
|
-
subQuestion:
|
|
7367
|
-
evidence:
|
|
7867
|
+
var RetrievalResultSchema = z37.object({
|
|
7868
|
+
subQuestion: z37.string(),
|
|
7869
|
+
evidence: z37.array(EvidenceItemSchema)
|
|
7368
7870
|
});
|
|
7369
|
-
var CitationSchema =
|
|
7370
|
-
index:
|
|
7371
|
-
chunkId:
|
|
7372
|
-
documentId:
|
|
7373
|
-
documentType:
|
|
7374
|
-
field:
|
|
7375
|
-
quote:
|
|
7376
|
-
relevance:
|
|
7871
|
+
var CitationSchema = z37.object({
|
|
7872
|
+
index: z37.number().describe("Citation number [1], [2], etc."),
|
|
7873
|
+
chunkId: z37.string().describe("Source chunk ID, e.g. doc-123:coverage:2"),
|
|
7874
|
+
documentId: z37.string(),
|
|
7875
|
+
documentType: z37.enum(["policy", "quote"]).optional(),
|
|
7876
|
+
field: z37.string().optional().describe("Specific field path, e.g. coverages[0].deductible"),
|
|
7877
|
+
quote: z37.string().describe("Exact text from source that supports the claim"),
|
|
7878
|
+
relevance: z37.number().min(0).max(1)
|
|
7377
7879
|
});
|
|
7378
|
-
var SubAnswerSchema =
|
|
7379
|
-
subQuestion:
|
|
7380
|
-
answer:
|
|
7381
|
-
citations:
|
|
7382
|
-
confidence:
|
|
7383
|
-
needsMoreContext:
|
|
7880
|
+
var SubAnswerSchema = z37.object({
|
|
7881
|
+
subQuestion: z37.string(),
|
|
7882
|
+
answer: z37.string(),
|
|
7883
|
+
citations: z37.array(CitationSchema),
|
|
7884
|
+
confidence: z37.number().min(0).max(1),
|
|
7885
|
+
needsMoreContext: z37.boolean().describe("True if evidence was insufficient to answer fully")
|
|
7384
7886
|
});
|
|
7385
|
-
var VerifyResultSchema =
|
|
7386
|
-
approved:
|
|
7387
|
-
issues:
|
|
7388
|
-
retrySubQuestions:
|
|
7887
|
+
var VerifyResultSchema = z37.object({
|
|
7888
|
+
approved: z37.boolean().describe("Whether all sub-answers are adequately grounded"),
|
|
7889
|
+
issues: z37.array(z37.string()).describe("Specific grounding or consistency issues found"),
|
|
7890
|
+
retrySubQuestions: z37.array(z37.string()).optional().describe("Sub-questions that need additional retrieval or re-reasoning")
|
|
7389
7891
|
});
|
|
7390
|
-
var QueryResultSchema =
|
|
7391
|
-
answer:
|
|
7392
|
-
citations:
|
|
7892
|
+
var QueryResultSchema = z37.object({
|
|
7893
|
+
answer: z37.string(),
|
|
7894
|
+
citations: z37.array(CitationSchema),
|
|
7393
7895
|
intent: QueryIntentSchema,
|
|
7394
|
-
confidence:
|
|
7395
|
-
followUp:
|
|
7896
|
+
confidence: z37.number().min(0).max(1),
|
|
7897
|
+
followUp: z37.string().optional().describe("Suggested follow-up question if applicable")
|
|
7396
7898
|
});
|
|
7397
7899
|
|
|
7398
7900
|
// src/query/retriever.ts
|
|
@@ -8549,6 +9051,7 @@ export {
|
|
|
8549
9051
|
buildIntentPrompt,
|
|
8550
9052
|
buildInterpretAttachmentPrompt,
|
|
8551
9053
|
buildLookupFillPrompt,
|
|
9054
|
+
buildPdfProviderOptions,
|
|
8552
9055
|
buildQueryClassifyPrompt,
|
|
8553
9056
|
buildQuestionBatchPrompt,
|
|
8554
9057
|
buildQuotesPoliciesPrompt,
|
|
@@ -8566,10 +9069,14 @@ export {
|
|
|
8566
9069
|
fillAcroForm,
|
|
8567
9070
|
getAcroFormFields,
|
|
8568
9071
|
getExtractor,
|
|
9072
|
+
getFileIdentifier,
|
|
8569
9073
|
getPdfPageCount,
|
|
8570
9074
|
getTemplate,
|
|
9075
|
+
isFileReference,
|
|
8571
9076
|
overlayTextOnPdf,
|
|
8572
9077
|
pLimit,
|
|
9078
|
+
pdfInputToBase64,
|
|
9079
|
+
pdfInputToBytes,
|
|
8573
9080
|
safeGenerateObject,
|
|
8574
9081
|
sanitizeNulls,
|
|
8575
9082
|
stripFences,
|