@claritylabs/cl-sdk 0.14.2 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,7 +1,9 @@
1
1
  "use strict";
2
+ var __create = Object.create;
2
3
  var __defProp = Object.defineProperty;
3
4
  var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
5
  var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
5
7
  var __hasOwnProp = Object.prototype.hasOwnProperty;
6
8
  var __export = (target, all) => {
7
9
  for (var name in all)
@@ -15,6 +17,14 @@ var __copyProps = (to, from, except, desc) => {
15
17
  }
16
18
  return to;
17
19
  };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
18
28
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
29
 
20
30
  // src/index.ts
@@ -221,6 +231,7 @@ __export(index_exports, {
221
231
  buildIntentPrompt: () => buildIntentPrompt,
222
232
  buildInterpretAttachmentPrompt: () => buildInterpretAttachmentPrompt,
223
233
  buildLookupFillPrompt: () => buildLookupFillPrompt,
234
+ buildPdfProviderOptions: () => buildPdfProviderOptions,
224
235
  buildQueryClassifyPrompt: () => buildQueryClassifyPrompt,
225
236
  buildQuestionBatchPrompt: () => buildQuestionBatchPrompt,
226
237
  buildQuotesPoliciesPrompt: () => buildQuotesPoliciesPrompt,
@@ -238,10 +249,14 @@ __export(index_exports, {
238
249
  fillAcroForm: () => fillAcroForm,
239
250
  getAcroFormFields: () => getAcroFormFields,
240
251
  getExtractor: () => getExtractor,
252
+ getFileIdentifier: () => getFileIdentifier,
241
253
  getPdfPageCount: () => getPdfPageCount,
242
254
  getTemplate: () => getTemplate,
255
+ isFileReference: () => isFileReference,
243
256
  overlayTextOnPdf: () => overlayTextOnPdf,
244
257
  pLimit: () => pLimit,
258
+ pdfInputToBase64: () => pdfInputToBase64,
259
+ pdfInputToBytes: () => pdfInputToBytes,
245
260
  safeGenerateObject: () => safeGenerateObject,
246
261
  sanitizeNulls: () => sanitizeNulls,
247
262
  stripFences: () => stripFences,
@@ -1667,34 +1682,134 @@ var CONTEXT_KEY_MAP = [
1667
1682
 
1668
1683
  // src/extraction/pdf.ts
1669
1684
  var import_pdf_lib = require("pdf-lib");
1670
- async function extractPageRange(pdfBase64, startPage, endPage) {
1671
- const srcBytes = typeof Buffer !== "undefined" ? Buffer.from(pdfBase64, "base64") : Uint8Array.from(atob(pdfBase64), (c) => c.charCodeAt(0));
1685
+ function isFileIdRef(input) {
1686
+ return typeof input === "object" && input !== null && "fileId" in input;
1687
+ }
1688
+ function isUrl(input) {
1689
+ return input instanceof URL;
1690
+ }
1691
+ function isBytes(input) {
1692
+ return input instanceof Uint8Array;
1693
+ }
1694
+ async function pdfInputToBytes(input) {
1695
+ if (isFileIdRef(input)) {
1696
+ throw new Error(
1697
+ "Cannot convert fileId reference to bytes. Pass the fileId directly to your provider callback instead."
1698
+ );
1699
+ }
1700
+ if (isUrl(input)) {
1701
+ if (input.protocol === "file:") {
1702
+ if (typeof process !== "undefined" && process.versions?.node) {
1703
+ const fs = await import("fs/promises");
1704
+ const buffer = await fs.readFile(input.pathname);
1705
+ return new Uint8Array(buffer);
1706
+ }
1707
+ throw new Error("File URLs not supported in browser environment");
1708
+ }
1709
+ const response = await fetch(input.toString());
1710
+ if (!response.ok) {
1711
+ throw new Error(`Failed to fetch PDF: ${response.status} ${response.statusText}`);
1712
+ }
1713
+ const arrayBuffer = await response.arrayBuffer();
1714
+ return new Uint8Array(arrayBuffer);
1715
+ }
1716
+ if (isBytes(input)) {
1717
+ return input;
1718
+ }
1719
+ if (typeof Buffer !== "undefined") {
1720
+ return new Uint8Array(Buffer.from(input, "base64"));
1721
+ }
1722
+ return Uint8Array.from(atob(input), (c) => c.charCodeAt(0));
1723
+ }
1724
+ async function pdfInputToBase64(input) {
1725
+ if (isFileIdRef(input)) {
1726
+ throw new Error(
1727
+ "Cannot convert fileId reference to base64. Pass the fileId directly to your provider callback instead."
1728
+ );
1729
+ }
1730
+ if (isUrl(input)) {
1731
+ const bytes = await pdfInputToBytes(input);
1732
+ return bytesToBase64(bytes);
1733
+ }
1734
+ if (isBytes(input)) {
1735
+ return bytesToBase64(input);
1736
+ }
1737
+ return input;
1738
+ }
1739
+ function bytesToBase64(bytes) {
1740
+ if (typeof Buffer !== "undefined") {
1741
+ return Buffer.from(bytes).toString("base64");
1742
+ }
1743
+ let binary = "";
1744
+ for (let i = 0; i < bytes.length; i++) {
1745
+ binary += String.fromCharCode(bytes[i]);
1746
+ }
1747
+ return btoa(binary);
1748
+ }
1749
+ function isFileReference(input) {
1750
+ return isFileIdRef(input) || isUrl(input);
1751
+ }
1752
+ function getFileIdentifier(input) {
1753
+ if (isFileIdRef(input)) {
1754
+ return { fileId: input.fileId };
1755
+ }
1756
+ if (isUrl(input)) {
1757
+ return { url: input.toString() };
1758
+ }
1759
+ return void 0;
1760
+ }
1761
+ async function getPdfPageCount(input) {
1762
+ const bytes = await pdfInputToBytes(input);
1763
+ const doc = await import_pdf_lib.PDFDocument.load(bytes, { ignoreEncryption: true });
1764
+ return doc.getPageCount();
1765
+ }
1766
+ async function extractPageRange(input, startPage, endPage) {
1767
+ if (isFileIdRef(input)) {
1768
+ throw new Error(
1769
+ "Cannot extract page range from fileId reference. The provider must handle fileId inputs directly or you must pass the full PDF as base64/bytes."
1770
+ );
1771
+ }
1772
+ if (isUrl(input) && (input.protocol === "http:" || input.protocol === "https:")) {
1773
+ throw new Error(
1774
+ "Cannot extract page range from remote URL. Either pass the full PDF as base64/bytes, or download it first."
1775
+ );
1776
+ }
1777
+ const srcBytes = await pdfInputToBytes(input);
1672
1778
  const srcDoc = await import_pdf_lib.PDFDocument.load(srcBytes, { ignoreEncryption: true });
1673
1779
  const totalPages = srcDoc.getPageCount();
1674
1780
  const start = Math.max(startPage - 1, 0);
1675
1781
  const end = Math.min(endPage, totalPages) - 1;
1676
1782
  if (start === 0 && end >= totalPages - 1) {
1677
- return pdfBase64;
1783
+ if (isBytes(input)) {
1784
+ return bytesToBase64(input);
1785
+ }
1786
+ if (typeof input === "string") {
1787
+ return input;
1788
+ }
1789
+ return bytesToBase64(srcBytes);
1678
1790
  }
1679
1791
  const newDoc = await import_pdf_lib.PDFDocument.create();
1680
1792
  const indices = Array.from({ length: end - start + 1 }, (_, i) => start + i);
1681
1793
  const pages = await newDoc.copyPages(srcDoc, indices);
1682
1794
  pages.forEach((page) => newDoc.addPage(page));
1683
1795
  const bytes = await newDoc.save();
1684
- if (typeof Buffer !== "undefined") {
1685
- return Buffer.from(bytes).toString("base64");
1796
+ return bytesToBase64(new Uint8Array(bytes));
1797
+ }
1798
+ async function buildPdfProviderOptions(input, existingOptions) {
1799
+ const options = { ...existingOptions };
1800
+ if (isFileIdRef(input)) {
1801
+ options.fileId = input.fileId;
1802
+ if (input.mimeType) {
1803
+ options.fileMimeType = input.mimeType;
1804
+ }
1805
+ return options;
1686
1806
  }
1687
- let binary = "";
1688
- const uint8 = new Uint8Array(bytes);
1689
- for (let i = 0; i < uint8.length; i++) {
1690
- binary += String.fromCharCode(uint8[i]);
1807
+ if (isUrl(input)) {
1808
+ options.pdfUrl = input;
1809
+ return options;
1691
1810
  }
1692
- return btoa(binary);
1693
- }
1694
- async function getPdfPageCount(pdfBase64) {
1695
- const srcBytes = typeof Buffer !== "undefined" ? Buffer.from(pdfBase64, "base64") : Uint8Array.from(atob(pdfBase64), (c) => c.charCodeAt(0));
1696
- const doc = await import_pdf_lib.PDFDocument.load(srcBytes, { ignoreEncryption: true });
1697
- return doc.getPageCount();
1811
+ options.pdfBase64 = await pdfInputToBase64(input);
1812
+ return options;
1698
1813
  }
1699
1814
  function getAcroFormFields(pdfDoc) {
1700
1815
  const form = pdfDoc.getForm();
@@ -1787,7 +1902,7 @@ async function runExtractor(params) {
1787
1902
  name,
1788
1903
  prompt,
1789
1904
  schema,
1790
- pdfBase64,
1905
+ pdfInput,
1791
1906
  startPage,
1792
1907
  endPage,
1793
1908
  generateObject,
@@ -1797,6 +1912,7 @@ async function runExtractor(params) {
1797
1912
  } = params;
1798
1913
  const extractorProviderOptions = { ...providerOptions };
1799
1914
  let fullPrompt;
1915
+ const pdfBase64 = await pdfInputToBase64(pdfInput);
1800
1916
  if (convertPdfToImages) {
1801
1917
  const images = await convertPdfToImages(pdfBase64, startPage, endPage);
1802
1918
  extractorProviderOptions.images = images;
@@ -2476,10 +2592,13 @@ function chunkDocument(doc) {
2476
2592
  };
2477
2593
  const chunks = [];
2478
2594
  const docId = doc.id;
2595
+ const policyTypesStr = doc.policyTypes?.length ? doc.policyTypes.join(",") : void 0;
2479
2596
  function stringMetadata(entries) {
2480
- return Object.fromEntries(
2597
+ const base = Object.fromEntries(
2481
2598
  Object.entries(entries).filter(([, value]) => value !== void 0 && value !== null && String(value).length > 0).map(([key, value]) => [key, String(value)])
2482
2599
  );
2600
+ if (policyTypesStr) base.policyTypes = policyTypesStr;
2601
+ return base;
2483
2602
  }
2484
2603
  chunks.push({
2485
2604
  id: `${docId}:carrier_info:0`,
@@ -2840,13 +2959,16 @@ ${exc.content}`.trim(),
2840
2959
  }
2841
2960
  }
2842
2961
  if (declLines.length > 0) {
2962
+ const declMeta = { documentType: doc.type };
2963
+ if (typeof decl.formType === "string") declMeta.formType = decl.formType;
2964
+ if (typeof decl.line === "string") declMeta.declarationLine = decl.line;
2843
2965
  chunks.push({
2844
2966
  id: `${docId}:declaration:0`,
2845
2967
  documentId: docId,
2846
2968
  type: "declaration",
2847
2969
  text: `Declarations
2848
2970
  ${declLines.join("\n")}`,
2849
- metadata: stringMetadata({ documentType: doc.type })
2971
+ metadata: stringMetadata(declMeta)
2850
2972
  });
2851
2973
  }
2852
2974
  }
@@ -4267,11 +4389,30 @@ COMMERCIAL LINES \u2014 match these values:
4267
4389
  - "property" \u2014 standalone property
4268
4390
 
4269
4391
  PERSONAL LINES \u2014 match these values:
4270
- - "homeowners_ho3" \u2014 HO-3, special form homeowners
4271
- - "homeowners_ho5" \u2014 HO-5, comprehensive form homeowners
4272
- - "renters_ho4" \u2014 HO-4, renters insurance
4273
- - "condo_ho6" \u2014 HO-6, condo unit-owners
4274
- - "dwelling_fire" \u2014 DP-1, DP-3, dwelling fire
4392
+
4393
+ HOMEOWNER FORM CLASSIFICATION \u2014 pay close attention to these distinctions:
4394
+ - "homeowners_ho3" \u2014 HO-3 Special Form. Standard homeowner policy for OWNER-OCCUPIED dwellings.
4395
+ Key indicators: Coverage A (Dwelling) present, open-peril dwelling coverage, named-peril personal property,
4396
+ references to "special form", "HO 00 03", or "HO-3". The insured OWNS the home.
4397
+ - "homeowners_ho5" \u2014 HO-5 Comprehensive Form. Premium homeowner policy for OWNER-OCCUPIED dwellings.
4398
+ Key indicators: Coverage A (Dwelling) present, BOTH dwelling AND personal property on open-peril basis,
4399
+ references to "comprehensive form", "HO 00 05", or "HO-5". Higher coverage than HO-3.
4400
+ - "renters_ho4" \u2014 HO-4 Contents Broad Form. Renters/tenants insurance \u2014 NO dwelling coverage.
4401
+ Key indicators: NO Coverage A (Dwelling), only Coverage C (Personal Property) and Coverage E/F (Liability/Medical),
4402
+ references to "contents broad form", "HO 00 04", "HO-4", "renters", "tenants". The insured RENTS, does not own.
4403
+ - "condo_ho6" \u2014 HO-6 Unit-Owners Form. Condo/co-op unit-owner insurance.
4404
+ Key indicators: Coverage A applies to interior walls/improvements only (not full structure),
4405
+ references to "unit-owners form", "HO 00 06", "HO-6", "condominium", "co-op unit". The building's
4406
+ master policy covers the structure; HO-6 covers the unit interior, personal property, and liability.
4407
+
4408
+ DISAMBIGUATION RULES for homeowner forms:
4409
+ 1. If the document has Coverage A (Dwelling) with full structure coverage \u2192 HO-3 or HO-5 (check if open-peril on personal property \u2192 HO-5, named-peril \u2192 HO-3)
4410
+ 2. If NO Coverage A / no dwelling coverage and the insured is a renter/tenant \u2192 renters_ho4
4411
+ 3. If Coverage A covers only unit interior/improvements and mentions condo/co-op \u2192 condo_ho6
4412
+ 4. Look for the actual form number (HO 00 03, HO 00 04, HO 00 05, HO 00 06) on the declarations page \u2014 this is the most reliable indicator
4413
+ 5. Do NOT default to homeowners_ho3 when uncertain \u2014 check for the distinguishing signals above
4414
+
4415
+ - "dwelling_fire" \u2014 DP-1, DP-3, dwelling fire (non-owner-occupied or investment property)
4275
4416
  - "mobile_home" \u2014 mobile home, manufactured home
4276
4417
  - "personal_auto" \u2014 personal auto, PAP
4277
4418
  - "personal_umbrella" \u2014 personal umbrella
@@ -4282,7 +4423,10 @@ PERSONAL LINES \u2014 match these values:
4282
4423
  - "watercraft" \u2014 watercraft, boat
4283
4424
  - "recreational_vehicle" \u2014 RV, recreational vehicle, ATV
4284
4425
  - "farm_ranch" \u2014 farm, ranch
4285
- - "pet" \u2014 pet insurance
4426
+ - "pet" \u2014 standalone pet insurance policy. Key indicators: named pet, species/breed, accident/illness coverage,
4427
+ wellness plans, per-incident or annual limits for veterinary costs. Do NOT confuse with pet liability endorsements
4428
+ on a homeowners policy \u2014 those are still homeowner policies (ho3/ho4/ho5/ho6), not "pet".
4429
+ Only classify as "pet" when the ENTIRE policy is dedicated to pet health/accident coverage.
4286
4430
  - "travel" \u2014 travel insurance
4287
4431
  - "identity_theft" \u2014 identity theft
4288
4432
  - "title" \u2014 title insurance
@@ -5138,6 +5282,338 @@ function getExtractor(name) {
5138
5282
  return EXTRACTORS[name];
5139
5283
  }
5140
5284
 
5285
+ // src/extraction/resolve-referential.ts
5286
+ var import_zod35 = require("zod");
5287
+
5288
+ // src/prompts/extractors/referential-lookup.ts
5289
+ var import_zod34 = require("zod");
5290
+ var ReferentialLookupSchema = import_zod34.z.object({
5291
+ resolvedCoverages: import_zod34.z.array(
5292
+ import_zod34.z.object({
5293
+ coverageName: import_zod34.z.string().describe("The coverage name that was referenced"),
5294
+ resolvedLimit: import_zod34.z.string().optional().describe("The concrete limit value found, if any"),
5295
+ resolvedLimitValueType: CoverageValueTypeSchema.optional(),
5296
+ resolvedDeductible: import_zod34.z.string().optional().describe("The concrete deductible value found, if any"),
5297
+ resolvedDeductibleValueType: CoverageValueTypeSchema.optional(),
5298
+ pageNumber: import_zod34.z.number().optional().describe("Page where the resolved value was found"),
5299
+ originalContent: import_zod34.z.string().optional().describe("Verbatim source text for the resolved value"),
5300
+ confidence: import_zod34.z.enum(["high", "medium", "low"]).describe("Confidence in the resolution")
5301
+ })
5302
+ )
5303
+ });
5304
+ function buildReferentialLookupPrompt(coverages) {
5305
+ const coverageList = coverages.map((c, i) => {
5306
+ const parts = [` ${i + 1}. Coverage: "${c.name}" \u2014 Limit: "${c.limit}"`];
5307
+ if (c.deductible) {
5308
+ parts.push(` Deductible: "${c.deductible}"`);
5309
+ }
5310
+ if (c.sectionRef) {
5311
+ parts.push(` Referenced section: "${c.sectionRef}"`);
5312
+ }
5313
+ return parts.join("\n");
5314
+ }).join("\n");
5315
+ return `You are an expert insurance document analyst. You are looking at a specific section of an insurance document to resolve referential coverage limits.
5316
+
5317
+ The following coverages had referential limits or deductibles (e.g. "As stated in Policy", "As stated in Section 4 of Policy", "See Declarations") instead of concrete values:
5318
+
5319
+ ${coverageList}
5320
+
5321
+ Your task:
5322
+ - Find the concrete/actual limit and deductible values for each coverage listed above.
5323
+ - Search the declarations page, coverage schedules, and any referenced sections for the real numeric or defined values.
5324
+ - Only return values you can actually find in the document \u2014 do not guess or infer values that are not explicitly stated.
5325
+ - For each resolved coverage, include:
5326
+ - pageNumber: the page where the resolved value appears
5327
+ - originalContent: the verbatim text snippet containing the resolved value
5328
+ - confidence: "high" if the value is clearly and unambiguously stated, "medium" if it requires interpretation, "low" if uncertain
5329
+ - If a coverage cannot be resolved (no concrete value found), still include it with confidence "low" and omit the resolved fields.
5330
+ - Classify resolvedLimitValueType and resolvedDeductibleValueType as numeric, included, not_included, as_stated, waiting_period, referential, or other.
5331
+
5332
+ Return JSON only.`;
5333
+ }
5334
+
5335
+ // src/extraction/resolve-referential.ts
5336
+ function looksReferential(value) {
5337
+ if (typeof value !== "string") return false;
5338
+ const normalized = value.toLowerCase();
5339
+ return normalized.includes("shown in the declarations") || normalized.includes("shown in declarations") || normalized.includes("shown in the schedule") || normalized.includes("as stated") || normalized.includes("if applicable");
5340
+ }
5341
+ function parseReferenceTarget(text) {
5342
+ if (typeof text !== "string") return void 0;
5343
+ const normalized = text.trim();
5344
+ if (!normalized) return void 0;
5345
+ const sectionMatch = normalized.match(/\b(Section\s+\d+[A-Za-z]?)/i);
5346
+ if (sectionMatch) return sectionMatch[1];
5347
+ if (/declarations/i.test(normalized)) return "Declarations";
5348
+ const scheduleMatch = normalized.match(/\b(Schedule(?:\s+of\s+[A-Za-z ]+)?)/i);
5349
+ if (scheduleMatch) return scheduleMatch[1].trim();
5350
+ const asStatedMatch = normalized.match(/(?:as\s+stated\s+in|see|shown\s+in(?:\s+the)?)\s+(.+)/i);
5351
+ if (asStatedMatch) {
5352
+ let target = asStatedMatch[1].trim().replace(/\s+of\s+the\s+policy$/i, "").trim();
5353
+ target = target.replace(/\.+$/, "").trim();
5354
+ if (target) return target;
5355
+ }
5356
+ if (/if applicable/i.test(normalized)) return void 0;
5357
+ return void 0;
5358
+ }
5359
+ var PageLocationSchema = import_zod35.z.object({
5360
+ startPage: import_zod35.z.number(),
5361
+ endPage: import_zod35.z.number()
5362
+ });
5363
+ async function findReferencedPages(params) {
5364
+ const {
5365
+ referenceTarget,
5366
+ sections,
5367
+ formInventory,
5368
+ pdfInput,
5369
+ pageCount,
5370
+ generateObject,
5371
+ providerOptions,
5372
+ log
5373
+ } = params;
5374
+ const targetLower = referenceTarget.toLowerCase();
5375
+ for (const section of sections) {
5376
+ if (section.title && section.pageStart != null && section.title.toLowerCase().includes(targetLower)) {
5377
+ return {
5378
+ startPage: section.pageStart,
5379
+ endPage: section.pageEnd ?? section.pageStart
5380
+ };
5381
+ }
5382
+ }
5383
+ for (const form of formInventory) {
5384
+ const titleMatch = form.title && form.title.toLowerCase().includes(targetLower);
5385
+ const typeMatch = form.formType && form.formType.toLowerCase().includes(targetLower);
5386
+ if ((titleMatch || typeMatch) && form.pageStart != null) {
5387
+ return {
5388
+ startPage: form.pageStart,
5389
+ endPage: form.pageEnd ?? form.pageStart
5390
+ };
5391
+ }
5392
+ }
5393
+ try {
5394
+ const result = await safeGenerateObject(
5395
+ generateObject,
5396
+ {
5397
+ prompt: `You are analyzing an insurance document (${pageCount} pages total).
5398
+
5399
+ Find the pages that contain the section or area referenced as "${referenceTarget}".
5400
+
5401
+ Return the page range (1-indexed) where this section is located. If the section spans a single page, startPage and endPage should be the same.
5402
+
5403
+ If you cannot find the section, return startPage: 0 and endPage: 0.
5404
+
5405
+ Return JSON only.`,
5406
+ schema: PageLocationSchema,
5407
+ maxTokens: 256,
5408
+ providerOptions: await buildPdfProviderOptions(pdfInput, providerOptions)
5409
+ },
5410
+ {
5411
+ fallback: { startPage: 0, endPage: 0 },
5412
+ maxRetries: 1,
5413
+ log,
5414
+ onError: (err, attempt) => log?.(
5415
+ `Page location attempt ${attempt + 1} failed for "${referenceTarget}": ${err instanceof Error ? err.message : String(err)}`
5416
+ )
5417
+ }
5418
+ );
5419
+ if (result.object.startPage > 0 && result.object.endPage > 0) {
5420
+ return {
5421
+ startPage: result.object.startPage,
5422
+ endPage: result.object.endPage
5423
+ };
5424
+ }
5425
+ } catch (error) {
5426
+ await log?.(
5427
+ `Failed to locate pages for "${referenceTarget}": ${error instanceof Error ? error.message : String(error)}`
5428
+ );
5429
+ }
5430
+ return void 0;
5431
+ }
5432
+ async function resolveReferentialCoverages(params) {
5433
+ const {
5434
+ memory,
5435
+ pdfInput,
5436
+ pageCount,
5437
+ generateObject,
5438
+ convertPdfToImages,
5439
+ concurrency = 2,
5440
+ providerOptions,
5441
+ log,
5442
+ onProgress
5443
+ } = params;
5444
+ const limit = pLimit(concurrency);
5445
+ let totalUsage = { inputTokens: 0, outputTokens: 0 };
5446
+ function trackUsage(usage) {
5447
+ if (usage) {
5448
+ totalUsage.inputTokens += usage.inputTokens;
5449
+ totalUsage.outputTokens += usage.outputTokens;
5450
+ }
5451
+ }
5452
+ const coverageData = memory.get("coverage_limits");
5453
+ const coverages = coverageData?.coverages ?? [];
5454
+ const referentialCoverages = coverages.filter((cov) => {
5455
+ const limitType = cov.limitValueType;
5456
+ const deductibleType = cov.deductibleValueType;
5457
+ return limitType === "referential" || limitType === "as_stated" || deductibleType === "referential" || deductibleType === "as_stated" || looksReferential(cov.limit) || looksReferential(cov.deductible);
5458
+ });
5459
+ const attempts = referentialCoverages.length;
5460
+ if (attempts === 0) {
5461
+ return {
5462
+ resolved: 0,
5463
+ unresolved: 0,
5464
+ attempts: 0,
5465
+ usage: totalUsage,
5466
+ details: []
5467
+ };
5468
+ }
5469
+ onProgress?.(
5470
+ `Found ${attempts} referential coverage(s) to resolve...`
5471
+ );
5472
+ const targetGroups = /* @__PURE__ */ new Map();
5473
+ for (let i = 0; i < referentialCoverages.length; i++) {
5474
+ const cov = referentialCoverages[i];
5475
+ const refString = (looksReferential(cov.limit) ? cov.limit : void 0) ?? (looksReferential(cov.deductible) ? cov.deductible : void 0) ?? cov.limit ?? "";
5476
+ const target = parseReferenceTarget(refString) ?? "unknown";
5477
+ const group = targetGroups.get(target) ?? [];
5478
+ group.push({ coverage: cov, index: i });
5479
+ targetGroups.set(target, group);
5480
+ }
5481
+ const sectionsData = memory.get("sections");
5482
+ const sections = sectionsData?.sections ?? [];
5483
+ const formInventoryData = memory.get("form_inventory");
5484
+ const formInventory = formInventoryData?.forms ?? [];
5485
+ const details = [];
5486
+ let resolved = 0;
5487
+ let unresolved = 0;
5488
+ const targetEntries = Array.from(targetGroups.entries());
5489
+ await Promise.all(
5490
+ targetEntries.map(
5491
+ ([target, group]) => limit(async () => {
5492
+ const pageRange = await findReferencedPages({
5493
+ referenceTarget: target,
5494
+ sections,
5495
+ formInventory,
5496
+ pdfInput,
5497
+ pageCount,
5498
+ generateObject,
5499
+ providerOptions,
5500
+ log
5501
+ });
5502
+ if (!pageRange) {
5503
+ await log?.(
5504
+ `Could not locate pages for reference target "${target}"`
5505
+ );
5506
+ for (const { coverage } of group) {
5507
+ details.push({
5508
+ coverageName: String(coverage.name ?? "unknown"),
5509
+ referenceTarget: target === "unknown" ? void 0 : target,
5510
+ status: "pages_not_found"
5511
+ });
5512
+ unresolved++;
5513
+ }
5514
+ return;
5515
+ }
5516
+ onProgress?.(
5517
+ `Resolving "${target}" from pages ${pageRange.startPage}-${pageRange.endPage}...`
5518
+ );
5519
+ const promptCoverages = group.map(({ coverage }) => ({
5520
+ name: String(coverage.name ?? "unknown"),
5521
+ limit: String(coverage.limit ?? ""),
5522
+ deductible: coverage.deductible ? String(coverage.deductible) : void 0,
5523
+ sectionRef: coverage.sectionRef ? String(coverage.sectionRef) : void 0
5524
+ }));
5525
+ try {
5526
+ const result = await runExtractor({
5527
+ name: "referential_lookup",
5528
+ prompt: buildReferentialLookupPrompt(promptCoverages),
5529
+ schema: ReferentialLookupSchema,
5530
+ pdfInput,
5531
+ startPage: pageRange.startPage,
5532
+ endPage: pageRange.endPage,
5533
+ generateObject,
5534
+ convertPdfToImages,
5535
+ maxTokens: 4096,
5536
+ providerOptions
5537
+ });
5538
+ trackUsage(result.usage);
5539
+ const resolvedMap = /* @__PURE__ */ new Map();
5540
+ for (const rc of result.data.resolvedCoverages) {
5541
+ resolvedMap.set(rc.coverageName.toLowerCase(), rc);
5542
+ }
5543
+ for (const { coverage } of group) {
5544
+ const covName = String(coverage.name ?? "unknown");
5545
+ const rc = resolvedMap.get(covName.toLowerCase());
5546
+ if (!rc) {
5547
+ details.push({
5548
+ coverageName: covName,
5549
+ referenceTarget: target === "unknown" ? void 0 : target,
5550
+ status: "unresolved"
5551
+ });
5552
+ unresolved++;
5553
+ continue;
5554
+ }
5555
+ const limitResolved = rc.resolvedLimit && rc.resolvedLimitValueType !== "referential" && rc.resolvedLimitValueType !== "as_stated" && !looksReferential(rc.resolvedLimit);
5556
+ const deductibleResolved = rc.resolvedDeductible && rc.resolvedDeductibleValueType !== "referential" && rc.resolvedDeductibleValueType !== "as_stated" && !looksReferential(rc.resolvedDeductible);
5557
+ if (limitResolved || deductibleResolved) {
5558
+ if (limitResolved) {
5559
+ coverage.limit = rc.resolvedLimit;
5560
+ coverage.limitValueType = rc.resolvedLimitValueType ?? "numeric";
5561
+ }
5562
+ if (deductibleResolved) {
5563
+ coverage.deductible = rc.resolvedDeductible;
5564
+ coverage.deductibleValueType = rc.resolvedDeductibleValueType ?? "numeric";
5565
+ }
5566
+ if (rc.pageNumber != null) {
5567
+ coverage.resolvedFromPage = rc.pageNumber;
5568
+ }
5569
+ if (rc.originalContent) {
5570
+ coverage.resolvedOriginalContent = rc.originalContent;
5571
+ }
5572
+ details.push({
5573
+ coverageName: covName,
5574
+ referenceTarget: target === "unknown" ? void 0 : target,
5575
+ resolvedLimit: limitResolved ? rc.resolvedLimit : void 0,
5576
+ resolvedDeductible: deductibleResolved ? rc.resolvedDeductible : void 0,
5577
+ status: "resolved"
5578
+ });
5579
+ resolved++;
5580
+ } else {
5581
+ details.push({
5582
+ coverageName: covName,
5583
+ referenceTarget: target === "unknown" ? void 0 : target,
5584
+ status: "unresolved"
5585
+ });
5586
+ unresolved++;
5587
+ }
5588
+ }
5589
+ } catch (error) {
5590
+ await log?.(
5591
+ `Referential lookup extraction failed for target "${target}": ${error instanceof Error ? error.message : String(error)}`
5592
+ );
5593
+ for (const { coverage } of group) {
5594
+ details.push({
5595
+ coverageName: String(coverage.name ?? "unknown"),
5596
+ referenceTarget: target === "unknown" ? void 0 : target,
5597
+ status: "unresolved"
5598
+ });
5599
+ unresolved++;
5600
+ }
5601
+ }
5602
+ })
5603
+ )
5604
+ );
5605
+ onProgress?.(
5606
+ `Referential resolution complete: ${resolved} resolved, ${unresolved} unresolved out of ${attempts} attempts.`
5607
+ );
5608
+ return {
5609
+ resolved,
5610
+ unresolved,
5611
+ attempts,
5612
+ usage: totalUsage,
5613
+ details
5614
+ };
5615
+ }
5616
+
5141
5617
  // src/core/quality.ts
5142
5618
  function evaluateQualityGate(params) {
5143
5619
  const { issues, hasRoundWarnings = false } = params;
@@ -5174,7 +5650,7 @@ function addFormEntry(inventory, formNumber, source, extra) {
5174
5650
  sources: [source]
5175
5651
  });
5176
5652
  }
5177
- function looksReferential(value) {
5653
+ function looksReferential2(value) {
5178
5654
  if (typeof value !== "string") return false;
5179
5655
  const normalized = value.toLowerCase();
5180
5656
  return normalized.includes("shown in the declarations") || normalized.includes("shown in declarations") || normalized.includes("shown in the schedule") || normalized.includes("as stated") || normalized.includes("if applicable");
@@ -5298,7 +5774,7 @@ function buildExtractionReviewReport(params) {
5298
5774
  itemName: typeof coverage.name === "string" ? coverage.name : void 0
5299
5775
  });
5300
5776
  }
5301
- if (looksReferential(coverage.limit) || looksReferential(coverage.deductible)) {
5777
+ if (looksReferential2(coverage.limit) || looksReferential2(coverage.deductible)) {
5302
5778
  deterministicIssues.push({
5303
5779
  code: "coverage_referential_value",
5304
5780
  severity: "warning",
@@ -5420,7 +5896,8 @@ function buildExtractionReviewReport(params) {
5420
5896
  }));
5421
5897
  const artifacts = [
5422
5898
  { kind: "form_inventory", label: "Form Inventory", itemCount: formInventory.length },
5423
- { kind: "page_map", label: "Page Map", itemCount: params.pageAssignments.length }
5899
+ { kind: "page_map", label: "Page Map", itemCount: params.pageAssignments.length },
5900
+ { kind: "referential_resolution", label: "Referential Resolution", itemCount: coverages.filter((c) => c.limitValueType === "referential" || c.limitValueType === "as_stated" || c.deductibleValueType === "referential" || c.deductibleValueType === "as_stated").length }
5424
5901
  ];
5425
5902
  const qualityGateStatus = evaluateQualityGate({
5426
5903
  issues: deterministicIssues,
@@ -5672,7 +6149,7 @@ function createExtractor(config) {
5672
6149
  }))
5673
6150
  };
5674
6151
  }
5675
- async function extract(pdfBase64, documentId, options) {
6152
+ async function extract(pdfInput, documentId, options) {
5676
6153
  const id = documentId ?? `doc-${Date.now()}`;
5677
6154
  const memory = /* @__PURE__ */ new Map();
5678
6155
  totalUsage = { inputTokens: 0, outputTokens: 0 };
@@ -5690,20 +6167,27 @@ function createExtractor(config) {
5690
6167
  memory.set(k, v);
5691
6168
  }
5692
6169
  }
6170
+ let pdfBase64Cache;
6171
+ async function getPdfBase64ForExtraction() {
6172
+ if (pdfBase64Cache === void 0) {
6173
+ pdfBase64Cache = await pdfInputToBase64(pdfInput);
6174
+ }
6175
+ return pdfBase64Cache;
6176
+ }
5693
6177
  let classifyResult;
5694
6178
  if (resumed?.classifyResult && pipelineCtx.isPhaseComplete("classify")) {
5695
6179
  classifyResult = resumed.classifyResult;
5696
6180
  onProgress?.("Resuming from checkpoint (classify complete)...");
5697
6181
  } else {
5698
6182
  onProgress?.("Classifying document...");
5699
- const pageCount2 = await getPdfPageCount(pdfBase64);
6183
+ const pageCount2 = await getPdfPageCount(pdfInput);
5700
6184
  const classifyResponse = await safeGenerateObject(
5701
6185
  generateObject,
5702
6186
  {
5703
6187
  prompt: buildClassifyPrompt(),
5704
6188
  schema: ClassifyResultSchema,
5705
6189
  maxTokens: 512,
5706
- providerOptions: { ...providerOptions, pdfBase64 }
6190
+ providerOptions: await buildPdfProviderOptions(pdfInput, providerOptions)
5707
6191
  },
5708
6192
  {
5709
6193
  fallback: { documentType: "policy", policyTypes: ["other"], confidence: 0 },
@@ -5728,7 +6212,7 @@ function createExtractor(config) {
5728
6212
  const { documentType, policyTypes } = classifyResult;
5729
6213
  const primaryType = policyTypes[0] ?? "other";
5730
6214
  const template = getTemplate(primaryType);
5731
- const pageCount = resumed?.pageCount ?? await getPdfPageCount(pdfBase64);
6215
+ const pageCount = resumed?.pageCount ?? await getPdfPageCount(pdfInput);
5732
6216
  const templateHints = buildTemplateHints(primaryType, documentType, pageCount, template);
5733
6217
  let formInventory;
5734
6218
  if (resumed?.formInventory && pipelineCtx.isPhaseComplete("form_inventory")) {
@@ -5743,7 +6227,7 @@ function createExtractor(config) {
5743
6227
  prompt: buildFormInventoryPrompt(templateHints),
5744
6228
  schema: FormInventorySchema,
5745
6229
  maxTokens: 2048,
5746
- providerOptions: { ...providerOptions, pdfBase64 }
6230
+ providerOptions: await buildPdfProviderOptions(pdfInput, providerOptions)
5747
6231
  },
5748
6232
  {
5749
6233
  fallback: { forms: [] },
@@ -5771,9 +6255,10 @@ function createExtractor(config) {
5771
6255
  const chunkSize = 8;
5772
6256
  const collectedAssignments = [];
5773
6257
  const formInventoryHint = formInventory?.forms.length ? formatFormInventoryForPageMap(formInventory.forms) : void 0;
6258
+ const extractionBase64 = await getPdfBase64ForExtraction();
5774
6259
  for (let startPage = 1; startPage <= pageCount; startPage += chunkSize) {
5775
6260
  const endPage = Math.min(pageCount, startPage + chunkSize - 1);
5776
- const pagesPdf = await extractPageRange(pdfBase64, startPage, endPage);
6261
+ const pagesPdf = await extractPageRange(extractionBase64, startPage, endPage);
5777
6262
  const mapResponse = await safeGenerateObject(
5778
6263
  generateObject,
5779
6264
  {
@@ -5853,7 +6338,7 @@ function createExtractor(config) {
5853
6338
  name: task.extractorName,
5854
6339
  prompt: ext.buildPrompt(),
5855
6340
  schema: ext.schema,
5856
- pdfBase64,
6341
+ pdfInput,
5857
6342
  startPage: task.startPage,
5858
6343
  endPage: task.endPage,
5859
6344
  generateObject,
@@ -5883,7 +6368,7 @@ function createExtractor(config) {
5883
6368
  name: "supplementary",
5884
6369
  prompt: buildSupplementaryPrompt(alreadyExtractedSummary),
5885
6370
  schema: SupplementarySchema,
5886
- pdfBase64,
6371
+ pdfInput,
5887
6372
  startPage: 1,
5888
6373
  endPage: pageCount,
5889
6374
  generateObject,
@@ -5907,6 +6392,37 @@ function createExtractor(config) {
5907
6392
  memory: Object.fromEntries(memory)
5908
6393
  });
5909
6394
  }
6395
+ if (!pipelineCtx.isPhaseComplete("resolve_referential")) {
6396
+ onProgress?.("Resolving referential coverage limits...");
6397
+ try {
6398
+ const resolution = await resolveReferentialCoverages({
6399
+ memory,
6400
+ pdfInput,
6401
+ pageCount,
6402
+ generateObject,
6403
+ convertPdfToImages,
6404
+ concurrency,
6405
+ providerOptions,
6406
+ log,
6407
+ onProgress
6408
+ });
6409
+ trackUsage(resolution.usage);
6410
+ if (resolution.attempts > 0) {
6411
+ await log?.(`Referential resolution: ${resolution.resolved}/${resolution.attempts} resolved, ${resolution.unresolved} unresolved`);
6412
+ }
6413
+ } catch (error) {
6414
+ await log?.(`Referential resolution failed, continuing: ${error instanceof Error ? error.message : String(error)}`);
6415
+ }
6416
+ await pipelineCtx.save("resolve_referential", {
6417
+ id,
6418
+ pageCount,
6419
+ classifyResult,
6420
+ formInventory,
6421
+ pageAssignments,
6422
+ plan,
6423
+ memory: Object.fromEntries(memory)
6424
+ });
6425
+ }
5910
6426
  let reviewRounds = resumed?.reviewReport?.reviewRoundRecords ?? [];
5911
6427
  let reviewReport = resumed?.reviewReport;
5912
6428
  if (!pipelineCtx.isPhaseComplete("review")) {
@@ -5921,7 +6437,7 @@ function createExtractor(config) {
5921
6437
  prompt: buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary),
5922
6438
  schema: ReviewResultSchema,
5923
6439
  maxTokens: 1536,
5924
- providerOptions: { ...providerOptions, pdfBase64 }
6440
+ providerOptions: await buildPdfProviderOptions(pdfInput, providerOptions)
5925
6441
  },
5926
6442
  {
5927
6443
  fallback: { complete: true, missingFields: [], qualityIssues: [], additionalTasks: [] },
@@ -5949,7 +6465,7 @@ function createExtractor(config) {
5949
6465
  name: task.extractorName,
5950
6466
  prompt: ext.buildPrompt(),
5951
6467
  schema: ext.schema,
5952
- pdfBase64,
6468
+ pdfInput,
5953
6469
  startPage: task.startPage,
5954
6470
  endPage: task.endPage,
5955
6471
  generateObject,
@@ -6282,8 +6798,8 @@ Respond with JSON only:
6282
6798
  }`;
6283
6799
 
6284
6800
  // src/schemas/application.ts
6285
- var import_zod34 = require("zod");
6286
- var FieldTypeSchema = import_zod34.z.enum([
6801
+ var import_zod36 = require("zod");
6802
+ var FieldTypeSchema = import_zod36.z.enum([
6287
6803
  "text",
6288
6804
  "numeric",
6289
6805
  "currency",
@@ -6292,131 +6808,131 @@ var FieldTypeSchema = import_zod34.z.enum([
6292
6808
  "table",
6293
6809
  "declaration"
6294
6810
  ]);
6295
- var ApplicationFieldSchema = import_zod34.z.object({
6296
- id: import_zod34.z.string(),
6297
- label: import_zod34.z.string(),
6298
- section: import_zod34.z.string(),
6811
+ var ApplicationFieldSchema = import_zod36.z.object({
6812
+ id: import_zod36.z.string(),
6813
+ label: import_zod36.z.string(),
6814
+ section: import_zod36.z.string(),
6299
6815
  fieldType: FieldTypeSchema,
6300
- required: import_zod34.z.boolean(),
6301
- options: import_zod34.z.array(import_zod34.z.string()).optional(),
6302
- columns: import_zod34.z.array(import_zod34.z.string()).optional(),
6303
- requiresExplanationIfYes: import_zod34.z.boolean().optional(),
6304
- condition: import_zod34.z.object({
6305
- dependsOn: import_zod34.z.string(),
6306
- whenValue: import_zod34.z.string()
6816
+ required: import_zod36.z.boolean(),
6817
+ options: import_zod36.z.array(import_zod36.z.string()).optional(),
6818
+ columns: import_zod36.z.array(import_zod36.z.string()).optional(),
6819
+ requiresExplanationIfYes: import_zod36.z.boolean().optional(),
6820
+ condition: import_zod36.z.object({
6821
+ dependsOn: import_zod36.z.string(),
6822
+ whenValue: import_zod36.z.string()
6307
6823
  }).optional(),
6308
- value: import_zod34.z.string().optional(),
6309
- source: import_zod34.z.string().optional().describe("Where the value came from: auto-fill, user, lookup"),
6310
- confidence: import_zod34.z.enum(["confirmed", "high", "medium", "low"]).optional()
6824
+ value: import_zod36.z.string().optional(),
6825
+ source: import_zod36.z.string().optional().describe("Where the value came from: auto-fill, user, lookup"),
6826
+ confidence: import_zod36.z.enum(["confirmed", "high", "medium", "low"]).optional()
6311
6827
  });
6312
- var ApplicationClassifyResultSchema = import_zod34.z.object({
6313
- isApplication: import_zod34.z.boolean(),
6314
- confidence: import_zod34.z.number().min(0).max(1),
6315
- applicationType: import_zod34.z.string().nullable()
6828
+ var ApplicationClassifyResultSchema = import_zod36.z.object({
6829
+ isApplication: import_zod36.z.boolean(),
6830
+ confidence: import_zod36.z.number().min(0).max(1),
6831
+ applicationType: import_zod36.z.string().nullable()
6316
6832
  });
6317
- var FieldExtractionResultSchema = import_zod34.z.object({
6318
- fields: import_zod34.z.array(ApplicationFieldSchema)
6833
+ var FieldExtractionResultSchema = import_zod36.z.object({
6834
+ fields: import_zod36.z.array(ApplicationFieldSchema)
6319
6835
  });
6320
- var AutoFillMatchSchema = import_zod34.z.object({
6321
- fieldId: import_zod34.z.string(),
6322
- value: import_zod34.z.string(),
6323
- confidence: import_zod34.z.enum(["confirmed"]),
6324
- contextKey: import_zod34.z.string()
6836
+ var AutoFillMatchSchema = import_zod36.z.object({
6837
+ fieldId: import_zod36.z.string(),
6838
+ value: import_zod36.z.string(),
6839
+ confidence: import_zod36.z.enum(["confirmed"]),
6840
+ contextKey: import_zod36.z.string()
6325
6841
  });
6326
- var AutoFillResultSchema = import_zod34.z.object({
6327
- matches: import_zod34.z.array(AutoFillMatchSchema)
6842
+ var AutoFillResultSchema = import_zod36.z.object({
6843
+ matches: import_zod36.z.array(AutoFillMatchSchema)
6328
6844
  });
6329
- var QuestionBatchResultSchema = import_zod34.z.object({
6330
- batches: import_zod34.z.array(import_zod34.z.array(import_zod34.z.string()).describe("Array of field IDs in this batch"))
6845
+ var QuestionBatchResultSchema = import_zod36.z.object({
6846
+ batches: import_zod36.z.array(import_zod36.z.array(import_zod36.z.string()).describe("Array of field IDs in this batch"))
6331
6847
  });
6332
- var LookupRequestSchema = import_zod34.z.object({
6333
- type: import_zod34.z.string().describe("Type of lookup: 'records', 'website', 'policy'"),
6334
- description: import_zod34.z.string(),
6335
- url: import_zod34.z.string().optional(),
6336
- targetFieldIds: import_zod34.z.array(import_zod34.z.string())
6848
+ var LookupRequestSchema = import_zod36.z.object({
6849
+ type: import_zod36.z.string().describe("Type of lookup: 'records', 'website', 'policy'"),
6850
+ description: import_zod36.z.string(),
6851
+ url: import_zod36.z.string().optional(),
6852
+ targetFieldIds: import_zod36.z.array(import_zod36.z.string())
6337
6853
  });
6338
- var ReplyIntentSchema = import_zod34.z.object({
6339
- primaryIntent: import_zod34.z.enum(["answers_only", "question", "lookup_request", "mixed"]),
6340
- hasAnswers: import_zod34.z.boolean(),
6341
- questionText: import_zod34.z.string().optional(),
6342
- questionFieldIds: import_zod34.z.array(import_zod34.z.string()).optional(),
6343
- lookupRequests: import_zod34.z.array(LookupRequestSchema).optional()
6854
+ var ReplyIntentSchema = import_zod36.z.object({
6855
+ primaryIntent: import_zod36.z.enum(["answers_only", "question", "lookup_request", "mixed"]),
6856
+ hasAnswers: import_zod36.z.boolean(),
6857
+ questionText: import_zod36.z.string().optional(),
6858
+ questionFieldIds: import_zod36.z.array(import_zod36.z.string()).optional(),
6859
+ lookupRequests: import_zod36.z.array(LookupRequestSchema).optional()
6344
6860
  });
6345
- var ParsedAnswerSchema = import_zod34.z.object({
6346
- fieldId: import_zod34.z.string(),
6347
- value: import_zod34.z.string(),
6348
- explanation: import_zod34.z.string().optional()
6861
+ var ParsedAnswerSchema = import_zod36.z.object({
6862
+ fieldId: import_zod36.z.string(),
6863
+ value: import_zod36.z.string(),
6864
+ explanation: import_zod36.z.string().optional()
6349
6865
  });
6350
- var AnswerParsingResultSchema = import_zod34.z.object({
6351
- answers: import_zod34.z.array(ParsedAnswerSchema),
6352
- unanswered: import_zod34.z.array(import_zod34.z.string()).describe("Field IDs that were not answered")
6866
+ var AnswerParsingResultSchema = import_zod36.z.object({
6867
+ answers: import_zod36.z.array(ParsedAnswerSchema),
6868
+ unanswered: import_zod36.z.array(import_zod36.z.string()).describe("Field IDs that were not answered")
6353
6869
  });
6354
- var LookupFillSchema = import_zod34.z.object({
6355
- fieldId: import_zod34.z.string(),
6356
- value: import_zod34.z.string(),
6357
- source: import_zod34.z.string().describe("Specific citable reference, e.g. 'GL Policy #POL-12345 (Hartford)'")
6870
+ var LookupFillSchema = import_zod36.z.object({
6871
+ fieldId: import_zod36.z.string(),
6872
+ value: import_zod36.z.string(),
6873
+ source: import_zod36.z.string().describe("Specific citable reference, e.g. 'GL Policy #POL-12345 (Hartford)'")
6358
6874
  });
6359
- var LookupFillResultSchema = import_zod34.z.object({
6360
- fills: import_zod34.z.array(LookupFillSchema),
6361
- unfillable: import_zod34.z.array(import_zod34.z.string()),
6362
- explanation: import_zod34.z.string().optional()
6875
+ var LookupFillResultSchema = import_zod36.z.object({
6876
+ fills: import_zod36.z.array(LookupFillSchema),
6877
+ unfillable: import_zod36.z.array(import_zod36.z.string()),
6878
+ explanation: import_zod36.z.string().optional()
6363
6879
  });
6364
- var FlatPdfPlacementSchema = import_zod34.z.object({
6365
- fieldId: import_zod34.z.string(),
6366
- page: import_zod34.z.number(),
6367
- x: import_zod34.z.number().describe("Percentage from left edge (0-100)"),
6368
- y: import_zod34.z.number().describe("Percentage from top edge (0-100)"),
6369
- text: import_zod34.z.string(),
6370
- fontSize: import_zod34.z.number().optional(),
6371
- isCheckmark: import_zod34.z.boolean().optional()
6880
+ var FlatPdfPlacementSchema = import_zod36.z.object({
6881
+ fieldId: import_zod36.z.string(),
6882
+ page: import_zod36.z.number(),
6883
+ x: import_zod36.z.number().describe("Percentage from left edge (0-100)"),
6884
+ y: import_zod36.z.number().describe("Percentage from top edge (0-100)"),
6885
+ text: import_zod36.z.string(),
6886
+ fontSize: import_zod36.z.number().optional(),
6887
+ isCheckmark: import_zod36.z.boolean().optional()
6372
6888
  });
6373
- var AcroFormMappingSchema = import_zod34.z.object({
6374
- fieldId: import_zod34.z.string(),
6375
- acroFormName: import_zod34.z.string(),
6376
- value: import_zod34.z.string()
6889
+ var AcroFormMappingSchema = import_zod36.z.object({
6890
+ fieldId: import_zod36.z.string(),
6891
+ acroFormName: import_zod36.z.string(),
6892
+ value: import_zod36.z.string()
6377
6893
  });
6378
- var QualityGateStatusSchema = import_zod34.z.enum(["passed", "warning", "failed"]);
6379
- var QualitySeveritySchema = import_zod34.z.enum(["info", "warning", "blocking"]);
6380
- var ApplicationQualityIssueSchema = import_zod34.z.object({
6381
- code: import_zod34.z.string(),
6894
+ var QualityGateStatusSchema = import_zod36.z.enum(["passed", "warning", "failed"]);
6895
+ var QualitySeveritySchema = import_zod36.z.enum(["info", "warning", "blocking"]);
6896
+ var ApplicationQualityIssueSchema = import_zod36.z.object({
6897
+ code: import_zod36.z.string(),
6382
6898
  severity: QualitySeveritySchema,
6383
- message: import_zod34.z.string(),
6384
- fieldId: import_zod34.z.string().optional()
6899
+ message: import_zod36.z.string(),
6900
+ fieldId: import_zod36.z.string().optional()
6385
6901
  });
6386
- var ApplicationQualityRoundSchema = import_zod34.z.object({
6387
- round: import_zod34.z.number(),
6388
- kind: import_zod34.z.string(),
6902
+ var ApplicationQualityRoundSchema = import_zod36.z.object({
6903
+ round: import_zod36.z.number(),
6904
+ kind: import_zod36.z.string(),
6389
6905
  status: QualityGateStatusSchema,
6390
- summary: import_zod34.z.string().optional()
6906
+ summary: import_zod36.z.string().optional()
6391
6907
  });
6392
- var ApplicationQualityArtifactSchema = import_zod34.z.object({
6393
- kind: import_zod34.z.string(),
6394
- label: import_zod34.z.string().optional(),
6395
- itemCount: import_zod34.z.number().optional()
6908
+ var ApplicationQualityArtifactSchema = import_zod36.z.object({
6909
+ kind: import_zod36.z.string(),
6910
+ label: import_zod36.z.string().optional(),
6911
+ itemCount: import_zod36.z.number().optional()
6396
6912
  });
6397
- var ApplicationEmailReviewSchema = import_zod34.z.object({
6398
- issues: import_zod34.z.array(ApplicationQualityIssueSchema),
6913
+ var ApplicationEmailReviewSchema = import_zod36.z.object({
6914
+ issues: import_zod36.z.array(ApplicationQualityIssueSchema),
6399
6915
  qualityGateStatus: QualityGateStatusSchema
6400
6916
  });
6401
- var ApplicationQualityReportSchema = import_zod34.z.object({
6402
- issues: import_zod34.z.array(ApplicationQualityIssueSchema),
6403
- rounds: import_zod34.z.array(ApplicationQualityRoundSchema).optional(),
6404
- artifacts: import_zod34.z.array(ApplicationQualityArtifactSchema).optional(),
6917
+ var ApplicationQualityReportSchema = import_zod36.z.object({
6918
+ issues: import_zod36.z.array(ApplicationQualityIssueSchema),
6919
+ rounds: import_zod36.z.array(ApplicationQualityRoundSchema).optional(),
6920
+ artifacts: import_zod36.z.array(ApplicationQualityArtifactSchema).optional(),
6405
6921
  emailReview: ApplicationEmailReviewSchema.optional(),
6406
6922
  qualityGateStatus: QualityGateStatusSchema
6407
6923
  });
6408
- var ApplicationStateSchema = import_zod34.z.object({
6409
- id: import_zod34.z.string(),
6410
- pdfBase64: import_zod34.z.string().optional().describe("Original PDF, omitted after extraction"),
6411
- title: import_zod34.z.string().optional(),
6412
- applicationType: import_zod34.z.string().nullable().optional(),
6413
- fields: import_zod34.z.array(ApplicationFieldSchema),
6414
- batches: import_zod34.z.array(import_zod34.z.array(import_zod34.z.string())).optional(),
6415
- currentBatchIndex: import_zod34.z.number().default(0),
6924
+ var ApplicationStateSchema = import_zod36.z.object({
6925
+ id: import_zod36.z.string(),
6926
+ pdfBase64: import_zod36.z.string().optional().describe("Original PDF, omitted after extraction"),
6927
+ title: import_zod36.z.string().optional(),
6928
+ applicationType: import_zod36.z.string().nullable().optional(),
6929
+ fields: import_zod36.z.array(ApplicationFieldSchema),
6930
+ batches: import_zod36.z.array(import_zod36.z.array(import_zod36.z.string())).optional(),
6931
+ currentBatchIndex: import_zod36.z.number().default(0),
6416
6932
  qualityReport: ApplicationQualityReportSchema.optional(),
6417
- status: import_zod34.z.enum(["classifying", "extracting", "auto_filling", "batching", "collecting", "confirming", "mapping", "complete"]),
6418
- createdAt: import_zod34.z.number(),
6419
- updatedAt: import_zod34.z.number()
6933
+ status: import_zod36.z.enum(["classifying", "extracting", "auto_filling", "batching", "collecting", "confirming", "mapping", "complete"]),
6934
+ createdAt: import_zod36.z.number(),
6935
+ updatedAt: import_zod36.z.number()
6420
6936
  });
6421
6937
 
6422
6938
  // src/application/agents/classifier.ts
@@ -7553,90 +8069,91 @@ Respond with the final answer, deduplicated citations array, overall confidence
7553
8069
  }
7554
8070
 
7555
8071
  // src/schemas/query.ts
7556
- var import_zod35 = require("zod");
7557
- var QueryIntentSchema = import_zod35.z.enum([
8072
+ var import_zod37 = require("zod");
8073
+ var QueryIntentSchema = import_zod37.z.enum([
7558
8074
  "policy_question",
7559
8075
  "coverage_comparison",
7560
8076
  "document_search",
7561
8077
  "claims_inquiry",
7562
8078
  "general_knowledge"
7563
8079
  ]);
7564
- var QueryAttachmentKindSchema = import_zod35.z.enum(["image", "pdf", "text"]);
7565
- var QueryAttachmentSchema = import_zod35.z.object({
7566
- id: import_zod35.z.string().optional().describe("Optional stable attachment ID from the caller"),
8080
+ var QueryAttachmentKindSchema = import_zod37.z.enum(["image", "pdf", "text"]);
8081
+ var QueryAttachmentSchema = import_zod37.z.object({
8082
+ id: import_zod37.z.string().optional().describe("Optional stable attachment ID from the caller"),
7567
8083
  kind: QueryAttachmentKindSchema,
7568
- name: import_zod35.z.string().optional().describe("Original filename or user-facing label"),
7569
- mimeType: import_zod35.z.string().optional().describe("MIME type such as image/jpeg or application/pdf"),
7570
- base64: import_zod35.z.string().optional().describe("Base64-encoded file content for image/pdf attachments"),
7571
- text: import_zod35.z.string().optional().describe("Plain-text attachment content when available"),
7572
- description: import_zod35.z.string().optional().describe("Caller-provided description of the attachment")
8084
+ name: import_zod37.z.string().optional().describe("Original filename or user-facing label"),
8085
+ mimeType: import_zod37.z.string().optional().describe("MIME type such as image/jpeg or application/pdf"),
8086
+ base64: import_zod37.z.string().optional().describe("Base64-encoded file content for image/pdf attachments"),
8087
+ text: import_zod37.z.string().optional().describe("Plain-text attachment content when available"),
8088
+ description: import_zod37.z.string().optional().describe("Caller-provided description of the attachment")
7573
8089
  });
7574
- var SubQuestionSchema = import_zod35.z.object({
7575
- question: import_zod35.z.string().describe("Atomic sub-question to retrieve and answer independently"),
8090
+ var SubQuestionSchema = import_zod37.z.object({
8091
+ question: import_zod37.z.string().describe("Atomic sub-question to retrieve and answer independently"),
7576
8092
  intent: QueryIntentSchema,
7577
- chunkTypes: import_zod35.z.array(import_zod35.z.string()).optional().describe("Chunk types to filter retrieval (e.g. coverage, endorsement, declaration)"),
7578
- documentFilters: import_zod35.z.object({
7579
- type: import_zod35.z.enum(["policy", "quote"]).optional(),
7580
- carrier: import_zod35.z.string().optional(),
7581
- insuredName: import_zod35.z.string().optional(),
7582
- policyNumber: import_zod35.z.string().optional(),
7583
- quoteNumber: import_zod35.z.string().optional()
8093
+ chunkTypes: import_zod37.z.array(import_zod37.z.string()).optional().describe("Chunk types to filter retrieval (e.g. coverage, endorsement, declaration)"),
8094
+ documentFilters: import_zod37.z.object({
8095
+ type: import_zod37.z.enum(["policy", "quote"]).optional(),
8096
+ carrier: import_zod37.z.string().optional(),
8097
+ insuredName: import_zod37.z.string().optional(),
8098
+ policyNumber: import_zod37.z.string().optional(),
8099
+ quoteNumber: import_zod37.z.string().optional(),
8100
+ policyTypes: import_zod37.z.array(PolicyTypeSchema).optional().describe("Filter by policy type (e.g. homeowners_ho3, renters_ho4, pet) to avoid mixing up similar policies")
7584
8101
  }).optional().describe("Structured filters to narrow document lookup")
7585
8102
  });
7586
- var QueryClassifyResultSchema = import_zod35.z.object({
8103
+ var QueryClassifyResultSchema = import_zod37.z.object({
7587
8104
  intent: QueryIntentSchema,
7588
- subQuestions: import_zod35.z.array(SubQuestionSchema).min(1).describe("Decomposed atomic sub-questions"),
7589
- requiresDocumentLookup: import_zod35.z.boolean().describe("Whether structured document lookup is needed"),
7590
- requiresChunkSearch: import_zod35.z.boolean().describe("Whether semantic chunk search is needed"),
7591
- requiresConversationHistory: import_zod35.z.boolean().describe("Whether conversation history is relevant")
8105
+ subQuestions: import_zod37.z.array(SubQuestionSchema).min(1).describe("Decomposed atomic sub-questions"),
8106
+ requiresDocumentLookup: import_zod37.z.boolean().describe("Whether structured document lookup is needed"),
8107
+ requiresChunkSearch: import_zod37.z.boolean().describe("Whether semantic chunk search is needed"),
8108
+ requiresConversationHistory: import_zod37.z.boolean().describe("Whether conversation history is relevant")
7592
8109
  });
7593
- var EvidenceItemSchema = import_zod35.z.object({
7594
- source: import_zod35.z.enum(["chunk", "document", "conversation", "attachment"]),
7595
- chunkId: import_zod35.z.string().optional(),
7596
- documentId: import_zod35.z.string().optional(),
7597
- turnId: import_zod35.z.string().optional(),
7598
- attachmentId: import_zod35.z.string().optional(),
7599
- text: import_zod35.z.string().describe("Text excerpt from the source"),
7600
- relevance: import_zod35.z.number().min(0).max(1),
7601
- metadata: import_zod35.z.array(import_zod35.z.object({ key: import_zod35.z.string(), value: import_zod35.z.string() })).optional()
8110
+ var EvidenceItemSchema = import_zod37.z.object({
8111
+ source: import_zod37.z.enum(["chunk", "document", "conversation", "attachment"]),
8112
+ chunkId: import_zod37.z.string().optional(),
8113
+ documentId: import_zod37.z.string().optional(),
8114
+ turnId: import_zod37.z.string().optional(),
8115
+ attachmentId: import_zod37.z.string().optional(),
8116
+ text: import_zod37.z.string().describe("Text excerpt from the source"),
8117
+ relevance: import_zod37.z.number().min(0).max(1),
8118
+ metadata: import_zod37.z.array(import_zod37.z.object({ key: import_zod37.z.string(), value: import_zod37.z.string() })).optional()
7602
8119
  });
7603
- var AttachmentInterpretationSchema = import_zod35.z.object({
7604
- summary: import_zod35.z.string().describe("Concise summary of what the attachment shows or contains"),
7605
- extractedFacts: import_zod35.z.array(import_zod35.z.string()).describe("Specific observable or document facts grounded in the attachment"),
7606
- recommendedFocus: import_zod35.z.array(import_zod35.z.string()).describe("Important details to incorporate when answering follow-up questions"),
7607
- confidence: import_zod35.z.number().min(0).max(1)
8120
+ var AttachmentInterpretationSchema = import_zod37.z.object({
8121
+ summary: import_zod37.z.string().describe("Concise summary of what the attachment shows or contains"),
8122
+ extractedFacts: import_zod37.z.array(import_zod37.z.string()).describe("Specific observable or document facts grounded in the attachment"),
8123
+ recommendedFocus: import_zod37.z.array(import_zod37.z.string()).describe("Important details to incorporate when answering follow-up questions"),
8124
+ confidence: import_zod37.z.number().min(0).max(1)
7608
8125
  });
7609
- var RetrievalResultSchema = import_zod35.z.object({
7610
- subQuestion: import_zod35.z.string(),
7611
- evidence: import_zod35.z.array(EvidenceItemSchema)
8126
+ var RetrievalResultSchema = import_zod37.z.object({
8127
+ subQuestion: import_zod37.z.string(),
8128
+ evidence: import_zod37.z.array(EvidenceItemSchema)
7612
8129
  });
7613
- var CitationSchema = import_zod35.z.object({
7614
- index: import_zod35.z.number().describe("Citation number [1], [2], etc."),
7615
- chunkId: import_zod35.z.string().describe("Source chunk ID, e.g. doc-123:coverage:2"),
7616
- documentId: import_zod35.z.string(),
7617
- documentType: import_zod35.z.enum(["policy", "quote"]).optional(),
7618
- field: import_zod35.z.string().optional().describe("Specific field path, e.g. coverages[0].deductible"),
7619
- quote: import_zod35.z.string().describe("Exact text from source that supports the claim"),
7620
- relevance: import_zod35.z.number().min(0).max(1)
8130
+ var CitationSchema = import_zod37.z.object({
8131
+ index: import_zod37.z.number().describe("Citation number [1], [2], etc."),
8132
+ chunkId: import_zod37.z.string().describe("Source chunk ID, e.g. doc-123:coverage:2"),
8133
+ documentId: import_zod37.z.string(),
8134
+ documentType: import_zod37.z.enum(["policy", "quote"]).optional(),
8135
+ field: import_zod37.z.string().optional().describe("Specific field path, e.g. coverages[0].deductible"),
8136
+ quote: import_zod37.z.string().describe("Exact text from source that supports the claim"),
8137
+ relevance: import_zod37.z.number().min(0).max(1)
7621
8138
  });
7622
- var SubAnswerSchema = import_zod35.z.object({
7623
- subQuestion: import_zod35.z.string(),
7624
- answer: import_zod35.z.string(),
7625
- citations: import_zod35.z.array(CitationSchema),
7626
- confidence: import_zod35.z.number().min(0).max(1),
7627
- needsMoreContext: import_zod35.z.boolean().describe("True if evidence was insufficient to answer fully")
8139
+ var SubAnswerSchema = import_zod37.z.object({
8140
+ subQuestion: import_zod37.z.string(),
8141
+ answer: import_zod37.z.string(),
8142
+ citations: import_zod37.z.array(CitationSchema),
8143
+ confidence: import_zod37.z.number().min(0).max(1),
8144
+ needsMoreContext: import_zod37.z.boolean().describe("True if evidence was insufficient to answer fully")
7628
8145
  });
7629
- var VerifyResultSchema = import_zod35.z.object({
7630
- approved: import_zod35.z.boolean().describe("Whether all sub-answers are adequately grounded"),
7631
- issues: import_zod35.z.array(import_zod35.z.string()).describe("Specific grounding or consistency issues found"),
7632
- retrySubQuestions: import_zod35.z.array(import_zod35.z.string()).optional().describe("Sub-questions that need additional retrieval or re-reasoning")
8146
+ var VerifyResultSchema = import_zod37.z.object({
8147
+ approved: import_zod37.z.boolean().describe("Whether all sub-answers are adequately grounded"),
8148
+ issues: import_zod37.z.array(import_zod37.z.string()).describe("Specific grounding or consistency issues found"),
8149
+ retrySubQuestions: import_zod37.z.array(import_zod37.z.string()).optional().describe("Sub-questions that need additional retrieval or re-reasoning")
7633
8150
  });
7634
- var QueryResultSchema = import_zod35.z.object({
7635
- answer: import_zod35.z.string(),
7636
- citations: import_zod35.z.array(CitationSchema),
8151
+ var QueryResultSchema = import_zod37.z.object({
8152
+ answer: import_zod37.z.string(),
8153
+ citations: import_zod37.z.array(CitationSchema),
7637
8154
  intent: QueryIntentSchema,
7638
- confidence: import_zod35.z.number().min(0).max(1),
7639
- followUp: import_zod35.z.string().optional().describe("Suggested follow-up question if applicable")
8155
+ confidence: import_zod37.z.number().min(0).max(1),
8156
+ followUp: import_zod37.z.string().optional().describe("Suggested follow-up question if applicable")
7640
8157
  });
7641
8158
 
7642
8159
  // src/query/retriever.ts
@@ -8794,6 +9311,7 @@ var AGENT_TOOLS = [
8794
9311
  buildIntentPrompt,
8795
9312
  buildInterpretAttachmentPrompt,
8796
9313
  buildLookupFillPrompt,
9314
+ buildPdfProviderOptions,
8797
9315
  buildQueryClassifyPrompt,
8798
9316
  buildQuestionBatchPrompt,
8799
9317
  buildQuotesPoliciesPrompt,
@@ -8811,10 +9329,14 @@ var AGENT_TOOLS = [
8811
9329
  fillAcroForm,
8812
9330
  getAcroFormFields,
8813
9331
  getExtractor,
9332
+ getFileIdentifier,
8814
9333
  getPdfPageCount,
8815
9334
  getTemplate,
9335
+ isFileReference,
8816
9336
  overlayTextOnPdf,
8817
9337
  pLimit,
9338
+ pdfInputToBase64,
9339
+ pdfInputToBytes,
8818
9340
  safeGenerateObject,
8819
9341
  sanitizeNulls,
8820
9342
  stripFences,