@claritylabs/cl-sdk 0.14.2 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1423,34 +1423,134 @@ import {
1423
1423
  StandardFonts,
1424
1424
  rgb
1425
1425
  } from "pdf-lib";
1426
- async function extractPageRange(pdfBase64, startPage, endPage) {
1427
- const srcBytes = typeof Buffer !== "undefined" ? Buffer.from(pdfBase64, "base64") : Uint8Array.from(atob(pdfBase64), (c) => c.charCodeAt(0));
1426
+ function isFileIdRef(input) {
1427
+ return typeof input === "object" && input !== null && "fileId" in input;
1428
+ }
1429
+ function isUrl(input) {
1430
+ return input instanceof URL;
1431
+ }
1432
+ function isBytes(input) {
1433
+ return input instanceof Uint8Array;
1434
+ }
1435
+ async function pdfInputToBytes(input) {
1436
+ if (isFileIdRef(input)) {
1437
+ throw new Error(
1438
+ "Cannot convert fileId reference to bytes. Pass the fileId directly to your provider callback instead."
1439
+ );
1440
+ }
1441
+ if (isUrl(input)) {
1442
+ if (input.protocol === "file:") {
1443
+ if (typeof process !== "undefined" && process.versions?.node) {
1444
+ const fs = await import("fs/promises");
1445
+ const buffer = await fs.readFile(input.pathname);
1446
+ return new Uint8Array(buffer);
1447
+ }
1448
+ throw new Error("File URLs not supported in browser environment");
1449
+ }
1450
+ const response = await fetch(input.toString());
1451
+ if (!response.ok) {
1452
+ throw new Error(`Failed to fetch PDF: ${response.status} ${response.statusText}`);
1453
+ }
1454
+ const arrayBuffer = await response.arrayBuffer();
1455
+ return new Uint8Array(arrayBuffer);
1456
+ }
1457
+ if (isBytes(input)) {
1458
+ return input;
1459
+ }
1460
+ if (typeof Buffer !== "undefined") {
1461
+ return new Uint8Array(Buffer.from(input, "base64"));
1462
+ }
1463
+ return Uint8Array.from(atob(input), (c) => c.charCodeAt(0));
1464
+ }
1465
+ async function pdfInputToBase64(input) {
1466
+ if (isFileIdRef(input)) {
1467
+ throw new Error(
1468
+ "Cannot convert fileId reference to base64. Pass the fileId directly to your provider callback instead."
1469
+ );
1470
+ }
1471
+ if (isUrl(input)) {
1472
+ const bytes = await pdfInputToBytes(input);
1473
+ return bytesToBase64(bytes);
1474
+ }
1475
+ if (isBytes(input)) {
1476
+ return bytesToBase64(input);
1477
+ }
1478
+ return input;
1479
+ }
1480
+ function bytesToBase64(bytes) {
1481
+ if (typeof Buffer !== "undefined") {
1482
+ return Buffer.from(bytes).toString("base64");
1483
+ }
1484
+ let binary = "";
1485
+ for (let i = 0; i < bytes.length; i++) {
1486
+ binary += String.fromCharCode(bytes[i]);
1487
+ }
1488
+ return btoa(binary);
1489
+ }
1490
+ function isFileReference(input) {
1491
+ return isFileIdRef(input) || isUrl(input);
1492
+ }
1493
+ function getFileIdentifier(input) {
1494
+ if (isFileIdRef(input)) {
1495
+ return { fileId: input.fileId };
1496
+ }
1497
+ if (isUrl(input)) {
1498
+ return { url: input.toString() };
1499
+ }
1500
+ return void 0;
1501
+ }
1502
+ async function getPdfPageCount(input) {
1503
+ const bytes = await pdfInputToBytes(input);
1504
+ const doc = await PDFDocument.load(bytes, { ignoreEncryption: true });
1505
+ return doc.getPageCount();
1506
+ }
1507
+ async function extractPageRange(input, startPage, endPage) {
1508
+ if (isFileIdRef(input)) {
1509
+ throw new Error(
1510
+ "Cannot extract page range from fileId reference. The provider must handle fileId inputs directly or you must pass the full PDF as base64/bytes."
1511
+ );
1512
+ }
1513
+ if (isUrl(input) && (input.protocol === "http:" || input.protocol === "https:")) {
1514
+ throw new Error(
1515
+ "Cannot extract page range from remote URL. Either pass the full PDF as base64/bytes, or download it first."
1516
+ );
1517
+ }
1518
+ const srcBytes = await pdfInputToBytes(input);
1428
1519
  const srcDoc = await PDFDocument.load(srcBytes, { ignoreEncryption: true });
1429
1520
  const totalPages = srcDoc.getPageCount();
1430
1521
  const start = Math.max(startPage - 1, 0);
1431
1522
  const end = Math.min(endPage, totalPages) - 1;
1432
1523
  if (start === 0 && end >= totalPages - 1) {
1433
- return pdfBase64;
1524
+ if (isBytes(input)) {
1525
+ return bytesToBase64(input);
1526
+ }
1527
+ if (typeof input === "string") {
1528
+ return input;
1529
+ }
1530
+ return bytesToBase64(srcBytes);
1434
1531
  }
1435
1532
  const newDoc = await PDFDocument.create();
1436
1533
  const indices = Array.from({ length: end - start + 1 }, (_, i) => start + i);
1437
1534
  const pages = await newDoc.copyPages(srcDoc, indices);
1438
1535
  pages.forEach((page) => newDoc.addPage(page));
1439
1536
  const bytes = await newDoc.save();
1440
- if (typeof Buffer !== "undefined") {
1441
- return Buffer.from(bytes).toString("base64");
1537
+ return bytesToBase64(new Uint8Array(bytes));
1538
+ }
1539
+ async function buildPdfProviderOptions(input, existingOptions) {
1540
+ const options = { ...existingOptions };
1541
+ if (isFileIdRef(input)) {
1542
+ options.fileId = input.fileId;
1543
+ if (input.mimeType) {
1544
+ options.fileMimeType = input.mimeType;
1545
+ }
1546
+ return options;
1442
1547
  }
1443
- let binary = "";
1444
- const uint8 = new Uint8Array(bytes);
1445
- for (let i = 0; i < uint8.length; i++) {
1446
- binary += String.fromCharCode(uint8[i]);
1548
+ if (isUrl(input)) {
1549
+ options.pdfUrl = input;
1550
+ return options;
1447
1551
  }
1448
- return btoa(binary);
1449
- }
1450
- async function getPdfPageCount(pdfBase64) {
1451
- const srcBytes = typeof Buffer !== "undefined" ? Buffer.from(pdfBase64, "base64") : Uint8Array.from(atob(pdfBase64), (c) => c.charCodeAt(0));
1452
- const doc = await PDFDocument.load(srcBytes, { ignoreEncryption: true });
1453
- return doc.getPageCount();
1552
+ options.pdfBase64 = await pdfInputToBase64(input);
1553
+ return options;
1454
1554
  }
1455
1555
  function getAcroFormFields(pdfDoc) {
1456
1556
  const form = pdfDoc.getForm();
@@ -1543,7 +1643,7 @@ async function runExtractor(params) {
1543
1643
  name,
1544
1644
  prompt,
1545
1645
  schema,
1546
- pdfBase64,
1646
+ pdfInput,
1547
1647
  startPage,
1548
1648
  endPage,
1549
1649
  generateObject,
@@ -1553,6 +1653,7 @@ async function runExtractor(params) {
1553
1653
  } = params;
1554
1654
  const extractorProviderOptions = { ...providerOptions };
1555
1655
  let fullPrompt;
1656
+ const pdfBase64 = await pdfInputToBase64(pdfInput);
1556
1657
  if (convertPdfToImages) {
1557
1658
  const images = await convertPdfToImages(pdfBase64, startPage, endPage);
1558
1659
  extractorProviderOptions.images = images;
@@ -2232,10 +2333,13 @@ function chunkDocument(doc) {
2232
2333
  };
2233
2334
  const chunks = [];
2234
2335
  const docId = doc.id;
2336
+ const policyTypesStr = doc.policyTypes?.length ? doc.policyTypes.join(",") : void 0;
2235
2337
  function stringMetadata(entries) {
2236
- return Object.fromEntries(
2338
+ const base = Object.fromEntries(
2237
2339
  Object.entries(entries).filter(([, value]) => value !== void 0 && value !== null && String(value).length > 0).map(([key, value]) => [key, String(value)])
2238
2340
  );
2341
+ if (policyTypesStr) base.policyTypes = policyTypesStr;
2342
+ return base;
2239
2343
  }
2240
2344
  chunks.push({
2241
2345
  id: `${docId}:carrier_info:0`,
@@ -2596,13 +2700,16 @@ ${exc.content}`.trim(),
2596
2700
  }
2597
2701
  }
2598
2702
  if (declLines.length > 0) {
2703
+ const declMeta = { documentType: doc.type };
2704
+ if (typeof decl.formType === "string") declMeta.formType = decl.formType;
2705
+ if (typeof decl.line === "string") declMeta.declarationLine = decl.line;
2599
2706
  chunks.push({
2600
2707
  id: `${docId}:declaration:0`,
2601
2708
  documentId: docId,
2602
2709
  type: "declaration",
2603
2710
  text: `Declarations
2604
2711
  ${declLines.join("\n")}`,
2605
- metadata: stringMetadata({ documentType: doc.type })
2712
+ metadata: stringMetadata(declMeta)
2606
2713
  });
2607
2714
  }
2608
2715
  }
@@ -4023,11 +4130,30 @@ COMMERCIAL LINES \u2014 match these values:
4023
4130
  - "property" \u2014 standalone property
4024
4131
 
4025
4132
  PERSONAL LINES \u2014 match these values:
4026
- - "homeowners_ho3" \u2014 HO-3, special form homeowners
4027
- - "homeowners_ho5" \u2014 HO-5, comprehensive form homeowners
4028
- - "renters_ho4" \u2014 HO-4, renters insurance
4029
- - "condo_ho6" \u2014 HO-6, condo unit-owners
4030
- - "dwelling_fire" \u2014 DP-1, DP-3, dwelling fire
4133
+
4134
+ HOMEOWNER FORM CLASSIFICATION \u2014 pay close attention to these distinctions:
4135
+ - "homeowners_ho3" \u2014 HO-3 Special Form. Standard homeowner policy for OWNER-OCCUPIED dwellings.
4136
+ Key indicators: Coverage A (Dwelling) present, open-peril dwelling coverage, named-peril personal property,
4137
+ references to "special form", "HO 00 03", or "HO-3". The insured OWNS the home.
4138
+ - "homeowners_ho5" \u2014 HO-5 Comprehensive Form. Premium homeowner policy for OWNER-OCCUPIED dwellings.
4139
+ Key indicators: Coverage A (Dwelling) present, BOTH dwelling AND personal property on open-peril basis,
4140
+ references to "comprehensive form", "HO 00 05", or "HO-5". Higher coverage than HO-3.
4141
+ - "renters_ho4" \u2014 HO-4 Contents Broad Form. Renters/tenants insurance \u2014 NO dwelling coverage.
4142
+ Key indicators: NO Coverage A (Dwelling), only Coverage C (Personal Property) and Coverage E/F (Liability/Medical),
4143
+ references to "contents broad form", "HO 00 04", "HO-4", "renters", "tenants". The insured RENTS, does not own.
4144
+ - "condo_ho6" \u2014 HO-6 Unit-Owners Form. Condo/co-op unit-owner insurance.
4145
+ Key indicators: Coverage A applies to interior walls/improvements only (not full structure),
4146
+ references to "unit-owners form", "HO 00 06", "HO-6", "condominium", "co-op unit". The building's
4147
+ master policy covers the structure; HO-6 covers the unit interior, personal property, and liability.
4148
+
4149
+ DISAMBIGUATION RULES for homeowner forms:
4150
+ 1. If the document has Coverage A (Dwelling) with full structure coverage \u2192 HO-3 or HO-5 (check if open-peril on personal property \u2192 HO-5, named-peril \u2192 HO-3)
4151
+ 2. If NO Coverage A / no dwelling coverage and the insured is a renter/tenant \u2192 renters_ho4
4152
+ 3. If Coverage A covers only unit interior/improvements and mentions condo/co-op \u2192 condo_ho6
4153
+ 4. Look for the actual form number (HO 00 03, HO 00 04, HO 00 05, HO 00 06) on the declarations page \u2014 this is the most reliable indicator
4154
+ 5. Do NOT default to homeowners_ho3 when uncertain \u2014 check for the distinguishing signals above
4155
+
4156
+ - "dwelling_fire" \u2014 DP-1, DP-3, dwelling fire (non-owner-occupied or investment property)
4031
4157
  - "mobile_home" \u2014 mobile home, manufactured home
4032
4158
  - "personal_auto" \u2014 personal auto, PAP
4033
4159
  - "personal_umbrella" \u2014 personal umbrella
@@ -4038,7 +4164,10 @@ PERSONAL LINES \u2014 match these values:
4038
4164
  - "watercraft" \u2014 watercraft, boat
4039
4165
  - "recreational_vehicle" \u2014 RV, recreational vehicle, ATV
4040
4166
  - "farm_ranch" \u2014 farm, ranch
4041
- - "pet" \u2014 pet insurance
4167
+ - "pet" \u2014 standalone pet insurance policy. Key indicators: named pet, species/breed, accident/illness coverage,
4168
+ wellness plans, per-incident or annual limits for veterinary costs. Do NOT confuse with pet liability endorsements
4169
+ on a homeowners policy \u2014 those are still homeowner policies (ho3/ho4/ho5/ho6), not "pet".
4170
+ Only classify as "pet" when the ENTIRE policy is dedicated to pet health/accident coverage.
4042
4171
  - "travel" \u2014 travel insurance
4043
4172
  - "identity_theft" \u2014 identity theft
4044
4173
  - "title" \u2014 title insurance
@@ -4894,6 +5023,338 @@ function getExtractor(name) {
4894
5023
  return EXTRACTORS[name];
4895
5024
  }
4896
5025
 
5026
+ // src/extraction/resolve-referential.ts
5027
+ import { z as z35 } from "zod";
5028
+
5029
+ // src/prompts/extractors/referential-lookup.ts
5030
+ import { z as z34 } from "zod";
5031
+ var ReferentialLookupSchema = z34.object({
5032
+ resolvedCoverages: z34.array(
5033
+ z34.object({
5034
+ coverageName: z34.string().describe("The coverage name that was referenced"),
5035
+ resolvedLimit: z34.string().optional().describe("The concrete limit value found, if any"),
5036
+ resolvedLimitValueType: CoverageValueTypeSchema.optional(),
5037
+ resolvedDeductible: z34.string().optional().describe("The concrete deductible value found, if any"),
5038
+ resolvedDeductibleValueType: CoverageValueTypeSchema.optional(),
5039
+ pageNumber: z34.number().optional().describe("Page where the resolved value was found"),
5040
+ originalContent: z34.string().optional().describe("Verbatim source text for the resolved value"),
5041
+ confidence: z34.enum(["high", "medium", "low"]).describe("Confidence in the resolution")
5042
+ })
5043
+ )
5044
+ });
5045
+ function buildReferentialLookupPrompt(coverages) {
5046
+ const coverageList = coverages.map((c, i) => {
5047
+ const parts = [` ${i + 1}. Coverage: "${c.name}" \u2014 Limit: "${c.limit}"`];
5048
+ if (c.deductible) {
5049
+ parts.push(` Deductible: "${c.deductible}"`);
5050
+ }
5051
+ if (c.sectionRef) {
5052
+ parts.push(` Referenced section: "${c.sectionRef}"`);
5053
+ }
5054
+ return parts.join("\n");
5055
+ }).join("\n");
5056
+ return `You are an expert insurance document analyst. You are looking at a specific section of an insurance document to resolve referential coverage limits.
5057
+
5058
+ The following coverages had referential limits or deductibles (e.g. "As stated in Policy", "As stated in Section 4 of Policy", "See Declarations") instead of concrete values:
5059
+
5060
+ ${coverageList}
5061
+
5062
+ Your task:
5063
+ - Find the concrete/actual limit and deductible values for each coverage listed above.
5064
+ - Search the declarations page, coverage schedules, and any referenced sections for the real numeric or defined values.
5065
+ - Only return values you can actually find in the document \u2014 do not guess or infer values that are not explicitly stated.
5066
+ - For each resolved coverage, include:
5067
+ - pageNumber: the page where the resolved value appears
5068
+ - originalContent: the verbatim text snippet containing the resolved value
5069
+ - confidence: "high" if the value is clearly and unambiguously stated, "medium" if it requires interpretation, "low" if uncertain
5070
+ - If a coverage cannot be resolved (no concrete value found), still include it with confidence "low" and omit the resolved fields.
5071
+ - Classify resolvedLimitValueType and resolvedDeductibleValueType as numeric, included, not_included, as_stated, waiting_period, referential, or other.
5072
+
5073
+ Return JSON only.`;
5074
+ }
5075
+
5076
+ // src/extraction/resolve-referential.ts
5077
+ function looksReferential(value) {
5078
+ if (typeof value !== "string") return false;
5079
+ const normalized = value.toLowerCase();
5080
+ return normalized.includes("shown in the declarations") || normalized.includes("shown in declarations") || normalized.includes("shown in the schedule") || normalized.includes("as stated") || normalized.includes("if applicable");
5081
+ }
5082
+ function parseReferenceTarget(text) {
5083
+ if (typeof text !== "string") return void 0;
5084
+ const normalized = text.trim();
5085
+ if (!normalized) return void 0;
5086
+ const sectionMatch = normalized.match(/\b(Section\s+\d+[A-Za-z]?)/i);
5087
+ if (sectionMatch) return sectionMatch[1];
5088
+ if (/declarations/i.test(normalized)) return "Declarations";
5089
+ const scheduleMatch = normalized.match(/\b(Schedule(?:\s+of\s+[A-Za-z ]+)?)/i);
5090
+ if (scheduleMatch) return scheduleMatch[1].trim();
5091
+ const asStatedMatch = normalized.match(/(?:as\s+stated\s+in|see|shown\s+in(?:\s+the)?)\s+(.+)/i);
5092
+ if (asStatedMatch) {
5093
+ let target = asStatedMatch[1].trim().replace(/\s+of\s+the\s+policy$/i, "").trim();
5094
+ target = target.replace(/\.+$/, "").trim();
5095
+ if (target) return target;
5096
+ }
5097
+ if (/if applicable/i.test(normalized)) return void 0;
5098
+ return void 0;
5099
+ }
5100
+ var PageLocationSchema = z35.object({
5101
+ startPage: z35.number(),
5102
+ endPage: z35.number()
5103
+ });
5104
+ async function findReferencedPages(params) {
5105
+ const {
5106
+ referenceTarget,
5107
+ sections,
5108
+ formInventory,
5109
+ pdfInput,
5110
+ pageCount,
5111
+ generateObject,
5112
+ providerOptions,
5113
+ log
5114
+ } = params;
5115
+ const targetLower = referenceTarget.toLowerCase();
5116
+ for (const section of sections) {
5117
+ if (section.title && section.pageStart != null && section.title.toLowerCase().includes(targetLower)) {
5118
+ return {
5119
+ startPage: section.pageStart,
5120
+ endPage: section.pageEnd ?? section.pageStart
5121
+ };
5122
+ }
5123
+ }
5124
+ for (const form of formInventory) {
5125
+ const titleMatch = form.title && form.title.toLowerCase().includes(targetLower);
5126
+ const typeMatch = form.formType && form.formType.toLowerCase().includes(targetLower);
5127
+ if ((titleMatch || typeMatch) && form.pageStart != null) {
5128
+ return {
5129
+ startPage: form.pageStart,
5130
+ endPage: form.pageEnd ?? form.pageStart
5131
+ };
5132
+ }
5133
+ }
5134
+ try {
5135
+ const result = await safeGenerateObject(
5136
+ generateObject,
5137
+ {
5138
+ prompt: `You are analyzing an insurance document (${pageCount} pages total).
5139
+
5140
+ Find the pages that contain the section or area referenced as "${referenceTarget}".
5141
+
5142
+ Return the page range (1-indexed) where this section is located. If the section spans a single page, startPage and endPage should be the same.
5143
+
5144
+ If you cannot find the section, return startPage: 0 and endPage: 0.
5145
+
5146
+ Return JSON only.`,
5147
+ schema: PageLocationSchema,
5148
+ maxTokens: 256,
5149
+ providerOptions: await buildPdfProviderOptions(pdfInput, providerOptions)
5150
+ },
5151
+ {
5152
+ fallback: { startPage: 0, endPage: 0 },
5153
+ maxRetries: 1,
5154
+ log,
5155
+ onError: (err, attempt) => log?.(
5156
+ `Page location attempt ${attempt + 1} failed for "${referenceTarget}": ${err instanceof Error ? err.message : String(err)}`
5157
+ )
5158
+ }
5159
+ );
5160
+ if (result.object.startPage > 0 && result.object.endPage > 0) {
5161
+ return {
5162
+ startPage: result.object.startPage,
5163
+ endPage: result.object.endPage
5164
+ };
5165
+ }
5166
+ } catch (error) {
5167
+ await log?.(
5168
+ `Failed to locate pages for "${referenceTarget}": ${error instanceof Error ? error.message : String(error)}`
5169
+ );
5170
+ }
5171
+ return void 0;
5172
+ }
5173
+ async function resolveReferentialCoverages(params) {
5174
+ const {
5175
+ memory,
5176
+ pdfInput,
5177
+ pageCount,
5178
+ generateObject,
5179
+ convertPdfToImages,
5180
+ concurrency = 2,
5181
+ providerOptions,
5182
+ log,
5183
+ onProgress
5184
+ } = params;
5185
+ const limit = pLimit(concurrency);
5186
+ let totalUsage = { inputTokens: 0, outputTokens: 0 };
5187
+ function trackUsage(usage) {
5188
+ if (usage) {
5189
+ totalUsage.inputTokens += usage.inputTokens;
5190
+ totalUsage.outputTokens += usage.outputTokens;
5191
+ }
5192
+ }
5193
+ const coverageData = memory.get("coverage_limits");
5194
+ const coverages = coverageData?.coverages ?? [];
5195
+ const referentialCoverages = coverages.filter((cov) => {
5196
+ const limitType = cov.limitValueType;
5197
+ const deductibleType = cov.deductibleValueType;
5198
+ return limitType === "referential" || limitType === "as_stated" || deductibleType === "referential" || deductibleType === "as_stated" || looksReferential(cov.limit) || looksReferential(cov.deductible);
5199
+ });
5200
+ const attempts = referentialCoverages.length;
5201
+ if (attempts === 0) {
5202
+ return {
5203
+ resolved: 0,
5204
+ unresolved: 0,
5205
+ attempts: 0,
5206
+ usage: totalUsage,
5207
+ details: []
5208
+ };
5209
+ }
5210
+ onProgress?.(
5211
+ `Found ${attempts} referential coverage(s) to resolve...`
5212
+ );
5213
+ const targetGroups = /* @__PURE__ */ new Map();
5214
+ for (let i = 0; i < referentialCoverages.length; i++) {
5215
+ const cov = referentialCoverages[i];
5216
+ const refString = (looksReferential(cov.limit) ? cov.limit : void 0) ?? (looksReferential(cov.deductible) ? cov.deductible : void 0) ?? cov.limit ?? "";
5217
+ const target = parseReferenceTarget(refString) ?? "unknown";
5218
+ const group = targetGroups.get(target) ?? [];
5219
+ group.push({ coverage: cov, index: i });
5220
+ targetGroups.set(target, group);
5221
+ }
5222
+ const sectionsData = memory.get("sections");
5223
+ const sections = sectionsData?.sections ?? [];
5224
+ const formInventoryData = memory.get("form_inventory");
5225
+ const formInventory = formInventoryData?.forms ?? [];
5226
+ const details = [];
5227
+ let resolved = 0;
5228
+ let unresolved = 0;
5229
+ const targetEntries = Array.from(targetGroups.entries());
5230
+ await Promise.all(
5231
+ targetEntries.map(
5232
+ ([target, group]) => limit(async () => {
5233
+ const pageRange = await findReferencedPages({
5234
+ referenceTarget: target,
5235
+ sections,
5236
+ formInventory,
5237
+ pdfInput,
5238
+ pageCount,
5239
+ generateObject,
5240
+ providerOptions,
5241
+ log
5242
+ });
5243
+ if (!pageRange) {
5244
+ await log?.(
5245
+ `Could not locate pages for reference target "${target}"`
5246
+ );
5247
+ for (const { coverage } of group) {
5248
+ details.push({
5249
+ coverageName: String(coverage.name ?? "unknown"),
5250
+ referenceTarget: target === "unknown" ? void 0 : target,
5251
+ status: "pages_not_found"
5252
+ });
5253
+ unresolved++;
5254
+ }
5255
+ return;
5256
+ }
5257
+ onProgress?.(
5258
+ `Resolving "${target}" from pages ${pageRange.startPage}-${pageRange.endPage}...`
5259
+ );
5260
+ const promptCoverages = group.map(({ coverage }) => ({
5261
+ name: String(coverage.name ?? "unknown"),
5262
+ limit: String(coverage.limit ?? ""),
5263
+ deductible: coverage.deductible ? String(coverage.deductible) : void 0,
5264
+ sectionRef: coverage.sectionRef ? String(coverage.sectionRef) : void 0
5265
+ }));
5266
+ try {
5267
+ const result = await runExtractor({
5268
+ name: "referential_lookup",
5269
+ prompt: buildReferentialLookupPrompt(promptCoverages),
5270
+ schema: ReferentialLookupSchema,
5271
+ pdfInput,
5272
+ startPage: pageRange.startPage,
5273
+ endPage: pageRange.endPage,
5274
+ generateObject,
5275
+ convertPdfToImages,
5276
+ maxTokens: 4096,
5277
+ providerOptions
5278
+ });
5279
+ trackUsage(result.usage);
5280
+ const resolvedMap = /* @__PURE__ */ new Map();
5281
+ for (const rc of result.data.resolvedCoverages) {
5282
+ resolvedMap.set(rc.coverageName.toLowerCase(), rc);
5283
+ }
5284
+ for (const { coverage } of group) {
5285
+ const covName = String(coverage.name ?? "unknown");
5286
+ const rc = resolvedMap.get(covName.toLowerCase());
5287
+ if (!rc) {
5288
+ details.push({
5289
+ coverageName: covName,
5290
+ referenceTarget: target === "unknown" ? void 0 : target,
5291
+ status: "unresolved"
5292
+ });
5293
+ unresolved++;
5294
+ continue;
5295
+ }
5296
+ const limitResolved = rc.resolvedLimit && rc.resolvedLimitValueType !== "referential" && rc.resolvedLimitValueType !== "as_stated" && !looksReferential(rc.resolvedLimit);
5297
+ const deductibleResolved = rc.resolvedDeductible && rc.resolvedDeductibleValueType !== "referential" && rc.resolvedDeductibleValueType !== "as_stated" && !looksReferential(rc.resolvedDeductible);
5298
+ if (limitResolved || deductibleResolved) {
5299
+ if (limitResolved) {
5300
+ coverage.limit = rc.resolvedLimit;
5301
+ coverage.limitValueType = rc.resolvedLimitValueType ?? "numeric";
5302
+ }
5303
+ if (deductibleResolved) {
5304
+ coverage.deductible = rc.resolvedDeductible;
5305
+ coverage.deductibleValueType = rc.resolvedDeductibleValueType ?? "numeric";
5306
+ }
5307
+ if (rc.pageNumber != null) {
5308
+ coverage.resolvedFromPage = rc.pageNumber;
5309
+ }
5310
+ if (rc.originalContent) {
5311
+ coverage.resolvedOriginalContent = rc.originalContent;
5312
+ }
5313
+ details.push({
5314
+ coverageName: covName,
5315
+ referenceTarget: target === "unknown" ? void 0 : target,
5316
+ resolvedLimit: limitResolved ? rc.resolvedLimit : void 0,
5317
+ resolvedDeductible: deductibleResolved ? rc.resolvedDeductible : void 0,
5318
+ status: "resolved"
5319
+ });
5320
+ resolved++;
5321
+ } else {
5322
+ details.push({
5323
+ coverageName: covName,
5324
+ referenceTarget: target === "unknown" ? void 0 : target,
5325
+ status: "unresolved"
5326
+ });
5327
+ unresolved++;
5328
+ }
5329
+ }
5330
+ } catch (error) {
5331
+ await log?.(
5332
+ `Referential lookup extraction failed for target "${target}": ${error instanceof Error ? error.message : String(error)}`
5333
+ );
5334
+ for (const { coverage } of group) {
5335
+ details.push({
5336
+ coverageName: String(coverage.name ?? "unknown"),
5337
+ referenceTarget: target === "unknown" ? void 0 : target,
5338
+ status: "unresolved"
5339
+ });
5340
+ unresolved++;
5341
+ }
5342
+ }
5343
+ })
5344
+ )
5345
+ );
5346
+ onProgress?.(
5347
+ `Referential resolution complete: ${resolved} resolved, ${unresolved} unresolved out of ${attempts} attempts.`
5348
+ );
5349
+ return {
5350
+ resolved,
5351
+ unresolved,
5352
+ attempts,
5353
+ usage: totalUsage,
5354
+ details
5355
+ };
5356
+ }
5357
+
4897
5358
  // src/core/quality.ts
4898
5359
  function evaluateQualityGate(params) {
4899
5360
  const { issues, hasRoundWarnings = false } = params;
@@ -4930,7 +5391,7 @@ function addFormEntry(inventory, formNumber, source, extra) {
4930
5391
  sources: [source]
4931
5392
  });
4932
5393
  }
4933
- function looksReferential(value) {
5394
+ function looksReferential2(value) {
4934
5395
  if (typeof value !== "string") return false;
4935
5396
  const normalized = value.toLowerCase();
4936
5397
  return normalized.includes("shown in the declarations") || normalized.includes("shown in declarations") || normalized.includes("shown in the schedule") || normalized.includes("as stated") || normalized.includes("if applicable");
@@ -5054,7 +5515,7 @@ function buildExtractionReviewReport(params) {
5054
5515
  itemName: typeof coverage.name === "string" ? coverage.name : void 0
5055
5516
  });
5056
5517
  }
5057
- if (looksReferential(coverage.limit) || looksReferential(coverage.deductible)) {
5518
+ if (looksReferential2(coverage.limit) || looksReferential2(coverage.deductible)) {
5058
5519
  deterministicIssues.push({
5059
5520
  code: "coverage_referential_value",
5060
5521
  severity: "warning",
@@ -5176,7 +5637,8 @@ function buildExtractionReviewReport(params) {
5176
5637
  }));
5177
5638
  const artifacts = [
5178
5639
  { kind: "form_inventory", label: "Form Inventory", itemCount: formInventory.length },
5179
- { kind: "page_map", label: "Page Map", itemCount: params.pageAssignments.length }
5640
+ { kind: "page_map", label: "Page Map", itemCount: params.pageAssignments.length },
5641
+ { kind: "referential_resolution", label: "Referential Resolution", itemCount: coverages.filter((c) => c.limitValueType === "referential" || c.limitValueType === "as_stated" || c.deductibleValueType === "referential" || c.deductibleValueType === "as_stated").length }
5180
5642
  ];
5181
5643
  const qualityGateStatus = evaluateQualityGate({
5182
5644
  issues: deterministicIssues,
@@ -5428,7 +5890,7 @@ function createExtractor(config) {
5428
5890
  }))
5429
5891
  };
5430
5892
  }
5431
- async function extract(pdfBase64, documentId, options) {
5893
+ async function extract(pdfInput, documentId, options) {
5432
5894
  const id = documentId ?? `doc-${Date.now()}`;
5433
5895
  const memory = /* @__PURE__ */ new Map();
5434
5896
  totalUsage = { inputTokens: 0, outputTokens: 0 };
@@ -5446,20 +5908,27 @@ function createExtractor(config) {
5446
5908
  memory.set(k, v);
5447
5909
  }
5448
5910
  }
5911
+ let pdfBase64Cache;
5912
+ async function getPdfBase64ForExtraction() {
5913
+ if (pdfBase64Cache === void 0) {
5914
+ pdfBase64Cache = await pdfInputToBase64(pdfInput);
5915
+ }
5916
+ return pdfBase64Cache;
5917
+ }
5449
5918
  let classifyResult;
5450
5919
  if (resumed?.classifyResult && pipelineCtx.isPhaseComplete("classify")) {
5451
5920
  classifyResult = resumed.classifyResult;
5452
5921
  onProgress?.("Resuming from checkpoint (classify complete)...");
5453
5922
  } else {
5454
5923
  onProgress?.("Classifying document...");
5455
- const pageCount2 = await getPdfPageCount(pdfBase64);
5924
+ const pageCount2 = await getPdfPageCount(pdfInput);
5456
5925
  const classifyResponse = await safeGenerateObject(
5457
5926
  generateObject,
5458
5927
  {
5459
5928
  prompt: buildClassifyPrompt(),
5460
5929
  schema: ClassifyResultSchema,
5461
5930
  maxTokens: 512,
5462
- providerOptions: { ...providerOptions, pdfBase64 }
5931
+ providerOptions: await buildPdfProviderOptions(pdfInput, providerOptions)
5463
5932
  },
5464
5933
  {
5465
5934
  fallback: { documentType: "policy", policyTypes: ["other"], confidence: 0 },
@@ -5484,7 +5953,7 @@ function createExtractor(config) {
5484
5953
  const { documentType, policyTypes } = classifyResult;
5485
5954
  const primaryType = policyTypes[0] ?? "other";
5486
5955
  const template = getTemplate(primaryType);
5487
- const pageCount = resumed?.pageCount ?? await getPdfPageCount(pdfBase64);
5956
+ const pageCount = resumed?.pageCount ?? await getPdfPageCount(pdfInput);
5488
5957
  const templateHints = buildTemplateHints(primaryType, documentType, pageCount, template);
5489
5958
  let formInventory;
5490
5959
  if (resumed?.formInventory && pipelineCtx.isPhaseComplete("form_inventory")) {
@@ -5499,7 +5968,7 @@ function createExtractor(config) {
5499
5968
  prompt: buildFormInventoryPrompt(templateHints),
5500
5969
  schema: FormInventorySchema,
5501
5970
  maxTokens: 2048,
5502
- providerOptions: { ...providerOptions, pdfBase64 }
5971
+ providerOptions: await buildPdfProviderOptions(pdfInput, providerOptions)
5503
5972
  },
5504
5973
  {
5505
5974
  fallback: { forms: [] },
@@ -5527,9 +5996,10 @@ function createExtractor(config) {
5527
5996
  const chunkSize = 8;
5528
5997
  const collectedAssignments = [];
5529
5998
  const formInventoryHint = formInventory?.forms.length ? formatFormInventoryForPageMap(formInventory.forms) : void 0;
5999
+ const extractionBase64 = await getPdfBase64ForExtraction();
5530
6000
  for (let startPage = 1; startPage <= pageCount; startPage += chunkSize) {
5531
6001
  const endPage = Math.min(pageCount, startPage + chunkSize - 1);
5532
- const pagesPdf = await extractPageRange(pdfBase64, startPage, endPage);
6002
+ const pagesPdf = await extractPageRange(extractionBase64, startPage, endPage);
5533
6003
  const mapResponse = await safeGenerateObject(
5534
6004
  generateObject,
5535
6005
  {
@@ -5609,7 +6079,7 @@ function createExtractor(config) {
5609
6079
  name: task.extractorName,
5610
6080
  prompt: ext.buildPrompt(),
5611
6081
  schema: ext.schema,
5612
- pdfBase64,
6082
+ pdfInput,
5613
6083
  startPage: task.startPage,
5614
6084
  endPage: task.endPage,
5615
6085
  generateObject,
@@ -5639,7 +6109,7 @@ function createExtractor(config) {
5639
6109
  name: "supplementary",
5640
6110
  prompt: buildSupplementaryPrompt(alreadyExtractedSummary),
5641
6111
  schema: SupplementarySchema,
5642
- pdfBase64,
6112
+ pdfInput,
5643
6113
  startPage: 1,
5644
6114
  endPage: pageCount,
5645
6115
  generateObject,
@@ -5663,6 +6133,37 @@ function createExtractor(config) {
5663
6133
  memory: Object.fromEntries(memory)
5664
6134
  });
5665
6135
  }
6136
+ if (!pipelineCtx.isPhaseComplete("resolve_referential")) {
6137
+ onProgress?.("Resolving referential coverage limits...");
6138
+ try {
6139
+ const resolution = await resolveReferentialCoverages({
6140
+ memory,
6141
+ pdfInput,
6142
+ pageCount,
6143
+ generateObject,
6144
+ convertPdfToImages,
6145
+ concurrency,
6146
+ providerOptions,
6147
+ log,
6148
+ onProgress
6149
+ });
6150
+ trackUsage(resolution.usage);
6151
+ if (resolution.attempts > 0) {
6152
+ await log?.(`Referential resolution: ${resolution.resolved}/${resolution.attempts} resolved, ${resolution.unresolved} unresolved`);
6153
+ }
6154
+ } catch (error) {
6155
+ await log?.(`Referential resolution failed, continuing: ${error instanceof Error ? error.message : String(error)}`);
6156
+ }
6157
+ await pipelineCtx.save("resolve_referential", {
6158
+ id,
6159
+ pageCount,
6160
+ classifyResult,
6161
+ formInventory,
6162
+ pageAssignments,
6163
+ plan,
6164
+ memory: Object.fromEntries(memory)
6165
+ });
6166
+ }
5666
6167
  let reviewRounds = resumed?.reviewReport?.reviewRoundRecords ?? [];
5667
6168
  let reviewReport = resumed?.reviewReport;
5668
6169
  if (!pipelineCtx.isPhaseComplete("review")) {
@@ -5677,7 +6178,7 @@ function createExtractor(config) {
5677
6178
  prompt: buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary),
5678
6179
  schema: ReviewResultSchema,
5679
6180
  maxTokens: 1536,
5680
- providerOptions: { ...providerOptions, pdfBase64 }
6181
+ providerOptions: await buildPdfProviderOptions(pdfInput, providerOptions)
5681
6182
  },
5682
6183
  {
5683
6184
  fallback: { complete: true, missingFields: [], qualityIssues: [], additionalTasks: [] },
@@ -5705,7 +6206,7 @@ function createExtractor(config) {
5705
6206
  name: task.extractorName,
5706
6207
  prompt: ext.buildPrompt(),
5707
6208
  schema: ext.schema,
5708
- pdfBase64,
6209
+ pdfInput,
5709
6210
  startPage: task.startPage,
5710
6211
  endPage: task.endPage,
5711
6212
  generateObject,
@@ -6038,8 +6539,8 @@ Respond with JSON only:
6038
6539
  }`;
6039
6540
 
6040
6541
  // src/schemas/application.ts
6041
- import { z as z34 } from "zod";
6042
- var FieldTypeSchema = z34.enum([
6542
+ import { z as z36 } from "zod";
6543
+ var FieldTypeSchema = z36.enum([
6043
6544
  "text",
6044
6545
  "numeric",
6045
6546
  "currency",
@@ -6048,131 +6549,131 @@ var FieldTypeSchema = z34.enum([
6048
6549
  "table",
6049
6550
  "declaration"
6050
6551
  ]);
6051
- var ApplicationFieldSchema = z34.object({
6052
- id: z34.string(),
6053
- label: z34.string(),
6054
- section: z34.string(),
6552
+ var ApplicationFieldSchema = z36.object({
6553
+ id: z36.string(),
6554
+ label: z36.string(),
6555
+ section: z36.string(),
6055
6556
  fieldType: FieldTypeSchema,
6056
- required: z34.boolean(),
6057
- options: z34.array(z34.string()).optional(),
6058
- columns: z34.array(z34.string()).optional(),
6059
- requiresExplanationIfYes: z34.boolean().optional(),
6060
- condition: z34.object({
6061
- dependsOn: z34.string(),
6062
- whenValue: z34.string()
6557
+ required: z36.boolean(),
6558
+ options: z36.array(z36.string()).optional(),
6559
+ columns: z36.array(z36.string()).optional(),
6560
+ requiresExplanationIfYes: z36.boolean().optional(),
6561
+ condition: z36.object({
6562
+ dependsOn: z36.string(),
6563
+ whenValue: z36.string()
6063
6564
  }).optional(),
6064
- value: z34.string().optional(),
6065
- source: z34.string().optional().describe("Where the value came from: auto-fill, user, lookup"),
6066
- confidence: z34.enum(["confirmed", "high", "medium", "low"]).optional()
6565
+ value: z36.string().optional(),
6566
+ source: z36.string().optional().describe("Where the value came from: auto-fill, user, lookup"),
6567
+ confidence: z36.enum(["confirmed", "high", "medium", "low"]).optional()
6067
6568
  });
6068
- var ApplicationClassifyResultSchema = z34.object({
6069
- isApplication: z34.boolean(),
6070
- confidence: z34.number().min(0).max(1),
6071
- applicationType: z34.string().nullable()
6569
+ var ApplicationClassifyResultSchema = z36.object({
6570
+ isApplication: z36.boolean(),
6571
+ confidence: z36.number().min(0).max(1),
6572
+ applicationType: z36.string().nullable()
6072
6573
  });
6073
- var FieldExtractionResultSchema = z34.object({
6074
- fields: z34.array(ApplicationFieldSchema)
6574
+ var FieldExtractionResultSchema = z36.object({
6575
+ fields: z36.array(ApplicationFieldSchema)
6075
6576
  });
6076
- var AutoFillMatchSchema = z34.object({
6077
- fieldId: z34.string(),
6078
- value: z34.string(),
6079
- confidence: z34.enum(["confirmed"]),
6080
- contextKey: z34.string()
6577
+ var AutoFillMatchSchema = z36.object({
6578
+ fieldId: z36.string(),
6579
+ value: z36.string(),
6580
+ confidence: z36.enum(["confirmed"]),
6581
+ contextKey: z36.string()
6081
6582
  });
6082
- var AutoFillResultSchema = z34.object({
6083
- matches: z34.array(AutoFillMatchSchema)
6583
+ var AutoFillResultSchema = z36.object({
6584
+ matches: z36.array(AutoFillMatchSchema)
6084
6585
  });
6085
- var QuestionBatchResultSchema = z34.object({
6086
- batches: z34.array(z34.array(z34.string()).describe("Array of field IDs in this batch"))
6586
+ var QuestionBatchResultSchema = z36.object({
6587
+ batches: z36.array(z36.array(z36.string()).describe("Array of field IDs in this batch"))
6087
6588
  });
6088
- var LookupRequestSchema = z34.object({
6089
- type: z34.string().describe("Type of lookup: 'records', 'website', 'policy'"),
6090
- description: z34.string(),
6091
- url: z34.string().optional(),
6092
- targetFieldIds: z34.array(z34.string())
6589
+ var LookupRequestSchema = z36.object({
6590
+ type: z36.string().describe("Type of lookup: 'records', 'website', 'policy'"),
6591
+ description: z36.string(),
6592
+ url: z36.string().optional(),
6593
+ targetFieldIds: z36.array(z36.string())
6093
6594
  });
6094
- var ReplyIntentSchema = z34.object({
6095
- primaryIntent: z34.enum(["answers_only", "question", "lookup_request", "mixed"]),
6096
- hasAnswers: z34.boolean(),
6097
- questionText: z34.string().optional(),
6098
- questionFieldIds: z34.array(z34.string()).optional(),
6099
- lookupRequests: z34.array(LookupRequestSchema).optional()
6595
+ var ReplyIntentSchema = z36.object({
6596
+ primaryIntent: z36.enum(["answers_only", "question", "lookup_request", "mixed"]),
6597
+ hasAnswers: z36.boolean(),
6598
+ questionText: z36.string().optional(),
6599
+ questionFieldIds: z36.array(z36.string()).optional(),
6600
+ lookupRequests: z36.array(LookupRequestSchema).optional()
6100
6601
  });
6101
- var ParsedAnswerSchema = z34.object({
6102
- fieldId: z34.string(),
6103
- value: z34.string(),
6104
- explanation: z34.string().optional()
6602
+ var ParsedAnswerSchema = z36.object({
6603
+ fieldId: z36.string(),
6604
+ value: z36.string(),
6605
+ explanation: z36.string().optional()
6105
6606
  });
6106
- var AnswerParsingResultSchema = z34.object({
6107
- answers: z34.array(ParsedAnswerSchema),
6108
- unanswered: z34.array(z34.string()).describe("Field IDs that were not answered")
6607
+ var AnswerParsingResultSchema = z36.object({
6608
+ answers: z36.array(ParsedAnswerSchema),
6609
+ unanswered: z36.array(z36.string()).describe("Field IDs that were not answered")
6109
6610
  });
6110
- var LookupFillSchema = z34.object({
6111
- fieldId: z34.string(),
6112
- value: z34.string(),
6113
- source: z34.string().describe("Specific citable reference, e.g. 'GL Policy #POL-12345 (Hartford)'")
6611
+ var LookupFillSchema = z36.object({
6612
+ fieldId: z36.string(),
6613
+ value: z36.string(),
6614
+ source: z36.string().describe("Specific citable reference, e.g. 'GL Policy #POL-12345 (Hartford)'")
6114
6615
  });
6115
- var LookupFillResultSchema = z34.object({
6116
- fills: z34.array(LookupFillSchema),
6117
- unfillable: z34.array(z34.string()),
6118
- explanation: z34.string().optional()
6616
+ var LookupFillResultSchema = z36.object({
6617
+ fills: z36.array(LookupFillSchema),
6618
+ unfillable: z36.array(z36.string()),
6619
+ explanation: z36.string().optional()
6119
6620
  });
6120
- var FlatPdfPlacementSchema = z34.object({
6121
- fieldId: z34.string(),
6122
- page: z34.number(),
6123
- x: z34.number().describe("Percentage from left edge (0-100)"),
6124
- y: z34.number().describe("Percentage from top edge (0-100)"),
6125
- text: z34.string(),
6126
- fontSize: z34.number().optional(),
6127
- isCheckmark: z34.boolean().optional()
6621
+ var FlatPdfPlacementSchema = z36.object({
6622
+ fieldId: z36.string(),
6623
+ page: z36.number(),
6624
+ x: z36.number().describe("Percentage from left edge (0-100)"),
6625
+ y: z36.number().describe("Percentage from top edge (0-100)"),
6626
+ text: z36.string(),
6627
+ fontSize: z36.number().optional(),
6628
+ isCheckmark: z36.boolean().optional()
6128
6629
  });
6129
- var AcroFormMappingSchema = z34.object({
6130
- fieldId: z34.string(),
6131
- acroFormName: z34.string(),
6132
- value: z34.string()
6630
+ var AcroFormMappingSchema = z36.object({
6631
+ fieldId: z36.string(),
6632
+ acroFormName: z36.string(),
6633
+ value: z36.string()
6133
6634
  });
6134
- var QualityGateStatusSchema = z34.enum(["passed", "warning", "failed"]);
6135
- var QualitySeveritySchema = z34.enum(["info", "warning", "blocking"]);
6136
- var ApplicationQualityIssueSchema = z34.object({
6137
- code: z34.string(),
6635
+ var QualityGateStatusSchema = z36.enum(["passed", "warning", "failed"]);
6636
+ var QualitySeveritySchema = z36.enum(["info", "warning", "blocking"]);
6637
+ var ApplicationQualityIssueSchema = z36.object({
6638
+ code: z36.string(),
6138
6639
  severity: QualitySeveritySchema,
6139
- message: z34.string(),
6140
- fieldId: z34.string().optional()
6640
+ message: z36.string(),
6641
+ fieldId: z36.string().optional()
6141
6642
  });
6142
- var ApplicationQualityRoundSchema = z34.object({
6143
- round: z34.number(),
6144
- kind: z34.string(),
6643
+ var ApplicationQualityRoundSchema = z36.object({
6644
+ round: z36.number(),
6645
+ kind: z36.string(),
6145
6646
  status: QualityGateStatusSchema,
6146
- summary: z34.string().optional()
6647
+ summary: z36.string().optional()
6147
6648
  });
6148
- var ApplicationQualityArtifactSchema = z34.object({
6149
- kind: z34.string(),
6150
- label: z34.string().optional(),
6151
- itemCount: z34.number().optional()
6649
+ var ApplicationQualityArtifactSchema = z36.object({
6650
+ kind: z36.string(),
6651
+ label: z36.string().optional(),
6652
+ itemCount: z36.number().optional()
6152
6653
  });
6153
- var ApplicationEmailReviewSchema = z34.object({
6154
- issues: z34.array(ApplicationQualityIssueSchema),
6654
+ var ApplicationEmailReviewSchema = z36.object({
6655
+ issues: z36.array(ApplicationQualityIssueSchema),
6155
6656
  qualityGateStatus: QualityGateStatusSchema
6156
6657
  });
6157
- var ApplicationQualityReportSchema = z34.object({
6158
- issues: z34.array(ApplicationQualityIssueSchema),
6159
- rounds: z34.array(ApplicationQualityRoundSchema).optional(),
6160
- artifacts: z34.array(ApplicationQualityArtifactSchema).optional(),
6658
+ var ApplicationQualityReportSchema = z36.object({
6659
+ issues: z36.array(ApplicationQualityIssueSchema),
6660
+ rounds: z36.array(ApplicationQualityRoundSchema).optional(),
6661
+ artifacts: z36.array(ApplicationQualityArtifactSchema).optional(),
6161
6662
  emailReview: ApplicationEmailReviewSchema.optional(),
6162
6663
  qualityGateStatus: QualityGateStatusSchema
6163
6664
  });
6164
- var ApplicationStateSchema = z34.object({
6165
- id: z34.string(),
6166
- pdfBase64: z34.string().optional().describe("Original PDF, omitted after extraction"),
6167
- title: z34.string().optional(),
6168
- applicationType: z34.string().nullable().optional(),
6169
- fields: z34.array(ApplicationFieldSchema),
6170
- batches: z34.array(z34.array(z34.string())).optional(),
6171
- currentBatchIndex: z34.number().default(0),
6665
+ var ApplicationStateSchema = z36.object({
6666
+ id: z36.string(),
6667
+ pdfBase64: z36.string().optional().describe("Original PDF, omitted after extraction"),
6668
+ title: z36.string().optional(),
6669
+ applicationType: z36.string().nullable().optional(),
6670
+ fields: z36.array(ApplicationFieldSchema),
6671
+ batches: z36.array(z36.array(z36.string())).optional(),
6672
+ currentBatchIndex: z36.number().default(0),
6172
6673
  qualityReport: ApplicationQualityReportSchema.optional(),
6173
- status: z34.enum(["classifying", "extracting", "auto_filling", "batching", "collecting", "confirming", "mapping", "complete"]),
6174
- createdAt: z34.number(),
6175
- updatedAt: z34.number()
6674
+ status: z36.enum(["classifying", "extracting", "auto_filling", "batching", "collecting", "confirming", "mapping", "complete"]),
6675
+ createdAt: z36.number(),
6676
+ updatedAt: z36.number()
6176
6677
  });
6177
6678
 
6178
6679
  // src/application/agents/classifier.ts
@@ -7309,90 +7810,91 @@ Respond with the final answer, deduplicated citations array, overall confidence
7309
7810
  }
7310
7811
 
7311
7812
  // src/schemas/query.ts
7312
- import { z as z35 } from "zod";
7313
- var QueryIntentSchema = z35.enum([
7813
+ import { z as z37 } from "zod";
7814
+ var QueryIntentSchema = z37.enum([
7314
7815
  "policy_question",
7315
7816
  "coverage_comparison",
7316
7817
  "document_search",
7317
7818
  "claims_inquiry",
7318
7819
  "general_knowledge"
7319
7820
  ]);
7320
- var QueryAttachmentKindSchema = z35.enum(["image", "pdf", "text"]);
7321
- var QueryAttachmentSchema = z35.object({
7322
- id: z35.string().optional().describe("Optional stable attachment ID from the caller"),
7821
+ var QueryAttachmentKindSchema = z37.enum(["image", "pdf", "text"]);
7822
+ var QueryAttachmentSchema = z37.object({
7823
+ id: z37.string().optional().describe("Optional stable attachment ID from the caller"),
7323
7824
  kind: QueryAttachmentKindSchema,
7324
- name: z35.string().optional().describe("Original filename or user-facing label"),
7325
- mimeType: z35.string().optional().describe("MIME type such as image/jpeg or application/pdf"),
7326
- base64: z35.string().optional().describe("Base64-encoded file content for image/pdf attachments"),
7327
- text: z35.string().optional().describe("Plain-text attachment content when available"),
7328
- description: z35.string().optional().describe("Caller-provided description of the attachment")
7825
+ name: z37.string().optional().describe("Original filename or user-facing label"),
7826
+ mimeType: z37.string().optional().describe("MIME type such as image/jpeg or application/pdf"),
7827
+ base64: z37.string().optional().describe("Base64-encoded file content for image/pdf attachments"),
7828
+ text: z37.string().optional().describe("Plain-text attachment content when available"),
7829
+ description: z37.string().optional().describe("Caller-provided description of the attachment")
7329
7830
  });
7330
- var SubQuestionSchema = z35.object({
7331
- question: z35.string().describe("Atomic sub-question to retrieve and answer independently"),
7831
+ var SubQuestionSchema = z37.object({
7832
+ question: z37.string().describe("Atomic sub-question to retrieve and answer independently"),
7332
7833
  intent: QueryIntentSchema,
7333
- chunkTypes: z35.array(z35.string()).optional().describe("Chunk types to filter retrieval (e.g. coverage, endorsement, declaration)"),
7334
- documentFilters: z35.object({
7335
- type: z35.enum(["policy", "quote"]).optional(),
7336
- carrier: z35.string().optional(),
7337
- insuredName: z35.string().optional(),
7338
- policyNumber: z35.string().optional(),
7339
- quoteNumber: z35.string().optional()
7834
+ chunkTypes: z37.array(z37.string()).optional().describe("Chunk types to filter retrieval (e.g. coverage, endorsement, declaration)"),
7835
+ documentFilters: z37.object({
7836
+ type: z37.enum(["policy", "quote"]).optional(),
7837
+ carrier: z37.string().optional(),
7838
+ insuredName: z37.string().optional(),
7839
+ policyNumber: z37.string().optional(),
7840
+ quoteNumber: z37.string().optional(),
7841
+ policyTypes: z37.array(PolicyTypeSchema).optional().describe("Filter by policy type (e.g. homeowners_ho3, renters_ho4, pet) to avoid mixing up similar policies")
7340
7842
  }).optional().describe("Structured filters to narrow document lookup")
7341
7843
  });
7342
- var QueryClassifyResultSchema = z35.object({
7844
+ var QueryClassifyResultSchema = z37.object({
7343
7845
  intent: QueryIntentSchema,
7344
- subQuestions: z35.array(SubQuestionSchema).min(1).describe("Decomposed atomic sub-questions"),
7345
- requiresDocumentLookup: z35.boolean().describe("Whether structured document lookup is needed"),
7346
- requiresChunkSearch: z35.boolean().describe("Whether semantic chunk search is needed"),
7347
- requiresConversationHistory: z35.boolean().describe("Whether conversation history is relevant")
7846
+ subQuestions: z37.array(SubQuestionSchema).min(1).describe("Decomposed atomic sub-questions"),
7847
+ requiresDocumentLookup: z37.boolean().describe("Whether structured document lookup is needed"),
7848
+ requiresChunkSearch: z37.boolean().describe("Whether semantic chunk search is needed"),
7849
+ requiresConversationHistory: z37.boolean().describe("Whether conversation history is relevant")
7348
7850
  });
7349
- var EvidenceItemSchema = z35.object({
7350
- source: z35.enum(["chunk", "document", "conversation", "attachment"]),
7351
- chunkId: z35.string().optional(),
7352
- documentId: z35.string().optional(),
7353
- turnId: z35.string().optional(),
7354
- attachmentId: z35.string().optional(),
7355
- text: z35.string().describe("Text excerpt from the source"),
7356
- relevance: z35.number().min(0).max(1),
7357
- metadata: z35.array(z35.object({ key: z35.string(), value: z35.string() })).optional()
7851
+ var EvidenceItemSchema = z37.object({
7852
+ source: z37.enum(["chunk", "document", "conversation", "attachment"]),
7853
+ chunkId: z37.string().optional(),
7854
+ documentId: z37.string().optional(),
7855
+ turnId: z37.string().optional(),
7856
+ attachmentId: z37.string().optional(),
7857
+ text: z37.string().describe("Text excerpt from the source"),
7858
+ relevance: z37.number().min(0).max(1),
7859
+ metadata: z37.array(z37.object({ key: z37.string(), value: z37.string() })).optional()
7358
7860
  });
7359
- var AttachmentInterpretationSchema = z35.object({
7360
- summary: z35.string().describe("Concise summary of what the attachment shows or contains"),
7361
- extractedFacts: z35.array(z35.string()).describe("Specific observable or document facts grounded in the attachment"),
7362
- recommendedFocus: z35.array(z35.string()).describe("Important details to incorporate when answering follow-up questions"),
7363
- confidence: z35.number().min(0).max(1)
7861
+ var AttachmentInterpretationSchema = z37.object({
7862
+ summary: z37.string().describe("Concise summary of what the attachment shows or contains"),
7863
+ extractedFacts: z37.array(z37.string()).describe("Specific observable or document facts grounded in the attachment"),
7864
+ recommendedFocus: z37.array(z37.string()).describe("Important details to incorporate when answering follow-up questions"),
7865
+ confidence: z37.number().min(0).max(1)
7364
7866
  });
7365
- var RetrievalResultSchema = z35.object({
7366
- subQuestion: z35.string(),
7367
- evidence: z35.array(EvidenceItemSchema)
7867
+ var RetrievalResultSchema = z37.object({
7868
+ subQuestion: z37.string(),
7869
+ evidence: z37.array(EvidenceItemSchema)
7368
7870
  });
7369
- var CitationSchema = z35.object({
7370
- index: z35.number().describe("Citation number [1], [2], etc."),
7371
- chunkId: z35.string().describe("Source chunk ID, e.g. doc-123:coverage:2"),
7372
- documentId: z35.string(),
7373
- documentType: z35.enum(["policy", "quote"]).optional(),
7374
- field: z35.string().optional().describe("Specific field path, e.g. coverages[0].deductible"),
7375
- quote: z35.string().describe("Exact text from source that supports the claim"),
7376
- relevance: z35.number().min(0).max(1)
7871
+ var CitationSchema = z37.object({
7872
+ index: z37.number().describe("Citation number [1], [2], etc."),
7873
+ chunkId: z37.string().describe("Source chunk ID, e.g. doc-123:coverage:2"),
7874
+ documentId: z37.string(),
7875
+ documentType: z37.enum(["policy", "quote"]).optional(),
7876
+ field: z37.string().optional().describe("Specific field path, e.g. coverages[0].deductible"),
7877
+ quote: z37.string().describe("Exact text from source that supports the claim"),
7878
+ relevance: z37.number().min(0).max(1)
7377
7879
  });
7378
- var SubAnswerSchema = z35.object({
7379
- subQuestion: z35.string(),
7380
- answer: z35.string(),
7381
- citations: z35.array(CitationSchema),
7382
- confidence: z35.number().min(0).max(1),
7383
- needsMoreContext: z35.boolean().describe("True if evidence was insufficient to answer fully")
7880
+ var SubAnswerSchema = z37.object({
7881
+ subQuestion: z37.string(),
7882
+ answer: z37.string(),
7883
+ citations: z37.array(CitationSchema),
7884
+ confidence: z37.number().min(0).max(1),
7885
+ needsMoreContext: z37.boolean().describe("True if evidence was insufficient to answer fully")
7384
7886
  });
7385
- var VerifyResultSchema = z35.object({
7386
- approved: z35.boolean().describe("Whether all sub-answers are adequately grounded"),
7387
- issues: z35.array(z35.string()).describe("Specific grounding or consistency issues found"),
7388
- retrySubQuestions: z35.array(z35.string()).optional().describe("Sub-questions that need additional retrieval or re-reasoning")
7887
+ var VerifyResultSchema = z37.object({
7888
+ approved: z37.boolean().describe("Whether all sub-answers are adequately grounded"),
7889
+ issues: z37.array(z37.string()).describe("Specific grounding or consistency issues found"),
7890
+ retrySubQuestions: z37.array(z37.string()).optional().describe("Sub-questions that need additional retrieval or re-reasoning")
7389
7891
  });
7390
- var QueryResultSchema = z35.object({
7391
- answer: z35.string(),
7392
- citations: z35.array(CitationSchema),
7892
+ var QueryResultSchema = z37.object({
7893
+ answer: z37.string(),
7894
+ citations: z37.array(CitationSchema),
7393
7895
  intent: QueryIntentSchema,
7394
- confidence: z35.number().min(0).max(1),
7395
- followUp: z35.string().optional().describe("Suggested follow-up question if applicable")
7896
+ confidence: z37.number().min(0).max(1),
7897
+ followUp: z37.string().optional().describe("Suggested follow-up question if applicable")
7396
7898
  });
7397
7899
 
7398
7900
  // src/query/retriever.ts
@@ -8549,6 +9051,7 @@ export {
8549
9051
  buildIntentPrompt,
8550
9052
  buildInterpretAttachmentPrompt,
8551
9053
  buildLookupFillPrompt,
9054
+ buildPdfProviderOptions,
8552
9055
  buildQueryClassifyPrompt,
8553
9056
  buildQuestionBatchPrompt,
8554
9057
  buildQuotesPoliciesPrompt,
@@ -8566,10 +9069,14 @@ export {
8566
9069
  fillAcroForm,
8567
9070
  getAcroFormFields,
8568
9071
  getExtractor,
9072
+ getFileIdentifier,
8569
9073
  getPdfPageCount,
8570
9074
  getTemplate,
9075
+ isFileReference,
8571
9076
  overlayTextOnPdf,
8572
9077
  pLimit,
9078
+ pdfInputToBase64,
9079
+ pdfInputToBytes,
8573
9080
  safeGenerateObject,
8574
9081
  sanitizeNulls,
8575
9082
  stripFences,