@claritylabs/cl-sdk 1.1.4 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -524,6 +524,20 @@ declare function extractQuoteFromPdf(pdfBase64: string, options?: ExtractOptions
524
524
  extracted: any;
525
525
  }>;
526
526
 
527
+ /**
528
+ * Extract a page range from a PDF and return as base64.
529
+ * Used to reduce API token usage by only sending relevant pages.
530
+ *
531
+ * @param pdfBase64 - Full PDF as base64 string.
532
+ * @param startPage - First page to include (1-indexed).
533
+ * @param endPage - Last page to include (1-indexed, clamped to total pages).
534
+ * @returns Base64 string of the trimmed PDF, or original if range covers all pages.
535
+ */
536
+ declare function extractPageRange(pdfBase64: string, startPage: number, endPage: number): Promise<string>;
537
+ /**
538
+ * Get the page count of a PDF without fully parsing it.
539
+ */
540
+ declare function getPdfPageCount(pdfBase64: string): Promise<number>;
527
541
  interface AcroFormFieldInfo {
528
542
  name: string;
529
543
  type: "text" | "checkbox" | "dropdown" | "radio";
@@ -548,4 +562,4 @@ interface TextOverlay {
548
562
  /** Overlay text on a flat PDF at specified coordinates. */
549
563
  declare function overlayTextOnPdf(pdfBytes: Uint8Array, overlays: TextOverlay[]): Promise<Uint8Array>;
550
564
 
551
- export { AGENT_TOOLS, APPLICATION_CLASSIFY_PROMPT, type AcroFormFieldInfo, type AgentContext, type BaseDocument, CLASSIFY_DOCUMENT_PROMPT, CLASSIFY_EMAIL_PROMPT, COI_GENERATION_TOOL, COVERAGE_COMPARISON_TOOL, type ClassifyOptions, type CommunicationIntent, type Coverage, DOCUMENT_LOOKUP_TOOL, EXTRACTION_PROMPT, type ExtractOptions, type ExtractSectionsOptions, type FieldMapping, HAIKU_MODEL, type InsuranceDocument, type LogFn, METADATA_PROMPT, MODEL_TOKEN_LIMITS, type ModelConfig, PLATFORM_CONFIGS, type Platform, type PlatformConfig, type PolicyDocument, type PremiumLine, type PromptBuilder, QUOTE_METADATA_PROMPT, type QuoteDocument, SONNET_MODEL, type Section, type Subjectivity, type Subsection, type TextOverlay, type TokenUsage, type ToolDefinition, type UnderwritingCondition, applyExtracted, applyExtractedQuote, buildAcroFormMappingPrompt, buildAgentSystemPrompt, buildAnswerParsingPrompt, buildAutoFillPrompt, buildBatchEmailGenerationPrompt, buildClassifyMessagePrompt, buildCoiRoutingPrompt, buildConfirmationSummaryPrompt, buildConversationMemoryContext, buildConversationMemoryGuidance, buildCoverageGapPrompt, buildDocumentContext, buildFieldExplanationPrompt, buildFieldExtractionPrompt, buildFlatPdfMappingPrompt, buildFormattingPrompt, buildIdentityPrompt, buildIntentPrompt, buildLookupFillPrompt, buildPolicyContext, buildPolicySectionsPrompt, buildQuestionBatchPrompt, buildQuoteSectionsPrompt, buildQuotesPoliciesPrompt, buildReplyIntentClassificationPrompt, buildSafetyPrompt, buildSectionsPrompt, buildSupplementaryEnrichmentPrompt, buildSystemPrompt, classifyDocumentType, createDefaultModelConfig, createUniformModelConfig, enrichSupplementaryFields, extractFromPdf, extractQuoteFromPdf, extractSectionsOnly, fillAcroForm, getAcroFormFields, getPageChunks, mergeChunkedQuoteSections, mergeChunkedSections, overlayTextOnPdf, sanitizeNulls, stripFences };
565
+ export { AGENT_TOOLS, APPLICATION_CLASSIFY_PROMPT, type AcroFormFieldInfo, type AgentContext, type BaseDocument, CLASSIFY_DOCUMENT_PROMPT, CLASSIFY_EMAIL_PROMPT, COI_GENERATION_TOOL, COVERAGE_COMPARISON_TOOL, type ClassifyOptions, type CommunicationIntent, type Coverage, DOCUMENT_LOOKUP_TOOL, EXTRACTION_PROMPT, type ExtractOptions, type ExtractSectionsOptions, type FieldMapping, HAIKU_MODEL, type InsuranceDocument, type LogFn, METADATA_PROMPT, MODEL_TOKEN_LIMITS, type ModelConfig, PLATFORM_CONFIGS, type Platform, type PlatformConfig, type PolicyDocument, type PremiumLine, type PromptBuilder, QUOTE_METADATA_PROMPT, type QuoteDocument, SONNET_MODEL, type Section, type Subjectivity, type Subsection, type TextOverlay, type TokenUsage, type ToolDefinition, type UnderwritingCondition, applyExtracted, applyExtractedQuote, buildAcroFormMappingPrompt, buildAgentSystemPrompt, buildAnswerParsingPrompt, buildAutoFillPrompt, buildBatchEmailGenerationPrompt, buildClassifyMessagePrompt, buildCoiRoutingPrompt, buildConfirmationSummaryPrompt, buildConversationMemoryContext, buildConversationMemoryGuidance, buildCoverageGapPrompt, buildDocumentContext, buildFieldExplanationPrompt, buildFieldExtractionPrompt, buildFlatPdfMappingPrompt, buildFormattingPrompt, buildIdentityPrompt, buildIntentPrompt, buildLookupFillPrompt, buildPolicyContext, buildPolicySectionsPrompt, buildQuestionBatchPrompt, buildQuoteSectionsPrompt, buildQuotesPoliciesPrompt, buildReplyIntentClassificationPrompt, buildSafetyPrompt, buildSectionsPrompt, buildSupplementaryEnrichmentPrompt, buildSystemPrompt, classifyDocumentType, createDefaultModelConfig, createUniformModelConfig, enrichSupplementaryFields, extractFromPdf, extractPageRange, extractQuoteFromPdf, extractSectionsOnly, fillAcroForm, getAcroFormFields, getPageChunks, getPdfPageCount, mergeChunkedQuoteSections, mergeChunkedSections, overlayTextOnPdf, sanitizeNulls, stripFences };
package/dist/index.d.ts CHANGED
@@ -524,6 +524,20 @@ declare function extractQuoteFromPdf(pdfBase64: string, options?: ExtractOptions
524
524
  extracted: any;
525
525
  }>;
526
526
 
527
+ /**
528
+ * Extract a page range from a PDF and return as base64.
529
+ * Used to reduce API token usage by only sending relevant pages.
530
+ *
531
+ * @param pdfBase64 - Full PDF as base64 string.
532
+ * @param startPage - First page to include (1-indexed).
533
+ * @param endPage - Last page to include (1-indexed, clamped to total pages).
534
+ * @returns Base64 string of the trimmed PDF, or original if range covers all pages.
535
+ */
536
+ declare function extractPageRange(pdfBase64: string, startPage: number, endPage: number): Promise<string>;
537
+ /**
538
+ * Get the page count of a PDF without fully parsing it.
539
+ */
540
+ declare function getPdfPageCount(pdfBase64: string): Promise<number>;
527
541
  interface AcroFormFieldInfo {
528
542
  name: string;
529
543
  type: "text" | "checkbox" | "dropdown" | "radio";
@@ -548,4 +562,4 @@ interface TextOverlay {
548
562
  /** Overlay text on a flat PDF at specified coordinates. */
549
563
  declare function overlayTextOnPdf(pdfBytes: Uint8Array, overlays: TextOverlay[]): Promise<Uint8Array>;
550
564
 
551
- export { AGENT_TOOLS, APPLICATION_CLASSIFY_PROMPT, type AcroFormFieldInfo, type AgentContext, type BaseDocument, CLASSIFY_DOCUMENT_PROMPT, CLASSIFY_EMAIL_PROMPT, COI_GENERATION_TOOL, COVERAGE_COMPARISON_TOOL, type ClassifyOptions, type CommunicationIntent, type Coverage, DOCUMENT_LOOKUP_TOOL, EXTRACTION_PROMPT, type ExtractOptions, type ExtractSectionsOptions, type FieldMapping, HAIKU_MODEL, type InsuranceDocument, type LogFn, METADATA_PROMPT, MODEL_TOKEN_LIMITS, type ModelConfig, PLATFORM_CONFIGS, type Platform, type PlatformConfig, type PolicyDocument, type PremiumLine, type PromptBuilder, QUOTE_METADATA_PROMPT, type QuoteDocument, SONNET_MODEL, type Section, type Subjectivity, type Subsection, type TextOverlay, type TokenUsage, type ToolDefinition, type UnderwritingCondition, applyExtracted, applyExtractedQuote, buildAcroFormMappingPrompt, buildAgentSystemPrompt, buildAnswerParsingPrompt, buildAutoFillPrompt, buildBatchEmailGenerationPrompt, buildClassifyMessagePrompt, buildCoiRoutingPrompt, buildConfirmationSummaryPrompt, buildConversationMemoryContext, buildConversationMemoryGuidance, buildCoverageGapPrompt, buildDocumentContext, buildFieldExplanationPrompt, buildFieldExtractionPrompt, buildFlatPdfMappingPrompt, buildFormattingPrompt, buildIdentityPrompt, buildIntentPrompt, buildLookupFillPrompt, buildPolicyContext, buildPolicySectionsPrompt, buildQuestionBatchPrompt, buildQuoteSectionsPrompt, buildQuotesPoliciesPrompt, buildReplyIntentClassificationPrompt, buildSafetyPrompt, buildSectionsPrompt, buildSupplementaryEnrichmentPrompt, buildSystemPrompt, classifyDocumentType, createDefaultModelConfig, createUniformModelConfig, enrichSupplementaryFields, extractFromPdf, extractQuoteFromPdf, extractSectionsOnly, fillAcroForm, getAcroFormFields, getPageChunks, mergeChunkedQuoteSections, mergeChunkedSections, overlayTextOnPdf, sanitizeNulls, stripFences };
565
+ export { AGENT_TOOLS, APPLICATION_CLASSIFY_PROMPT, type AcroFormFieldInfo, type AgentContext, type BaseDocument, CLASSIFY_DOCUMENT_PROMPT, CLASSIFY_EMAIL_PROMPT, COI_GENERATION_TOOL, COVERAGE_COMPARISON_TOOL, type ClassifyOptions, type CommunicationIntent, type Coverage, DOCUMENT_LOOKUP_TOOL, EXTRACTION_PROMPT, type ExtractOptions, type ExtractSectionsOptions, type FieldMapping, HAIKU_MODEL, type InsuranceDocument, type LogFn, METADATA_PROMPT, MODEL_TOKEN_LIMITS, type ModelConfig, PLATFORM_CONFIGS, type Platform, type PlatformConfig, type PolicyDocument, type PremiumLine, type PromptBuilder, QUOTE_METADATA_PROMPT, type QuoteDocument, SONNET_MODEL, type Section, type Subjectivity, type Subsection, type TextOverlay, type TokenUsage, type ToolDefinition, type UnderwritingCondition, applyExtracted, applyExtractedQuote, buildAcroFormMappingPrompt, buildAgentSystemPrompt, buildAnswerParsingPrompt, buildAutoFillPrompt, buildBatchEmailGenerationPrompt, buildClassifyMessagePrompt, buildCoiRoutingPrompt, buildConfirmationSummaryPrompt, buildConversationMemoryContext, buildConversationMemoryGuidance, buildCoverageGapPrompt, buildDocumentContext, buildFieldExplanationPrompt, buildFieldExtractionPrompt, buildFlatPdfMappingPrompt, buildFormattingPrompt, buildIdentityPrompt, buildIntentPrompt, buildLookupFillPrompt, buildPolicyContext, buildPolicySectionsPrompt, buildQuestionBatchPrompt, buildQuoteSectionsPrompt, buildQuotesPoliciesPrompt, buildReplyIntentClassificationPrompt, buildSafetyPrompt, buildSectionsPrompt, buildSupplementaryEnrichmentPrompt, buildSystemPrompt, classifyDocumentType, createDefaultModelConfig, createUniformModelConfig, enrichSupplementaryFields, extractFromPdf, extractPageRange, extractQuoteFromPdf, extractSectionsOnly, fillAcroForm, getAcroFormFields, getPageChunks, getPdfPageCount, mergeChunkedQuoteSections, mergeChunkedSections, overlayTextOnPdf, sanitizeNulls, stripFences };
package/dist/index.js CHANGED
@@ -28532,11 +28532,13 @@ __export(index_exports, {
28532
28532
  createUniformModelConfig: () => createUniformModelConfig,
28533
28533
  enrichSupplementaryFields: () => enrichSupplementaryFields,
28534
28534
  extractFromPdf: () => extractFromPdf,
28535
+ extractPageRange: () => extractPageRange,
28535
28536
  extractQuoteFromPdf: () => extractQuoteFromPdf,
28536
28537
  extractSectionsOnly: () => extractSectionsOnly,
28537
28538
  fillAcroForm: () => fillAcroForm,
28538
28539
  getAcroFormFields: () => getAcroFormFields,
28539
28540
  getPageChunks: () => getPageChunks,
28541
+ getPdfPageCount: () => getPdfPageCount,
28540
28542
  mergeChunkedQuoteSections: () => mergeChunkedQuoteSections,
28541
28543
  mergeChunkedSections: () => mergeChunkedSections,
28542
28544
  overlayTextOnPdf: () => overlayTextOnPdf,
@@ -29862,6 +29864,124 @@ var AGENT_TOOLS = [
29862
29864
 
29863
29865
  // src/extraction/pipeline.ts
29864
29866
  var import_ai = require("ai");
29867
+
29868
+ // src/extraction/pdf.ts
29869
+ var import_pdf_lib = require("pdf-lib");
29870
+ async function extractPageRange(pdfBase64, startPage, endPage) {
29871
+ const srcBytes = typeof Buffer !== "undefined" ? Buffer.from(pdfBase64, "base64") : Uint8Array.from(atob(pdfBase64), (c) => c.charCodeAt(0));
29872
+ const srcDoc = await import_pdf_lib.PDFDocument.load(srcBytes, { ignoreEncryption: true });
29873
+ const totalPages = srcDoc.getPageCount();
29874
+ const start = Math.max(startPage - 1, 0);
29875
+ const end = Math.min(endPage, totalPages) - 1;
29876
+ if (start === 0 && end >= totalPages - 1) {
29877
+ return pdfBase64;
29878
+ }
29879
+ const newDoc = await import_pdf_lib.PDFDocument.create();
29880
+ const indices = Array.from({ length: end - start + 1 }, (_, i) => start + i);
29881
+ const pages = await newDoc.copyPages(srcDoc, indices);
29882
+ pages.forEach((page) => newDoc.addPage(page));
29883
+ const bytes = await newDoc.save();
29884
+ if (typeof Buffer !== "undefined") {
29885
+ return Buffer.from(bytes).toString("base64");
29886
+ }
29887
+ let binary = "";
29888
+ const uint8 = new Uint8Array(bytes);
29889
+ for (let i = 0; i < uint8.length; i++) {
29890
+ binary += String.fromCharCode(uint8[i]);
29891
+ }
29892
+ return btoa(binary);
29893
+ }
29894
+ async function getPdfPageCount(pdfBase64) {
29895
+ const srcBytes = typeof Buffer !== "undefined" ? Buffer.from(pdfBase64, "base64") : Uint8Array.from(atob(pdfBase64), (c) => c.charCodeAt(0));
29896
+ const doc = await import_pdf_lib.PDFDocument.load(srcBytes, { ignoreEncryption: true });
29897
+ return doc.getPageCount();
29898
+ }
29899
+ function getAcroFormFields(pdfDoc) {
29900
+ const form = pdfDoc.getForm();
29901
+ const fields = form.getFields();
29902
+ if (fields.length === 0) return [];
29903
+ return fields.map((field) => {
29904
+ const name = field.getName();
29905
+ if (field instanceof import_pdf_lib.PDFTextField) {
29906
+ return { name, type: "text" };
29907
+ }
29908
+ if (field instanceof import_pdf_lib.PDFCheckBox) {
29909
+ return { name, type: "checkbox" };
29910
+ }
29911
+ if (field instanceof import_pdf_lib.PDFDropdown) {
29912
+ return { name, type: "dropdown", options: field.getOptions() };
29913
+ }
29914
+ if (field instanceof import_pdf_lib.PDFRadioGroup) {
29915
+ return { name, type: "radio", options: field.getOptions() };
29916
+ }
29917
+ return { name, type: "text" };
29918
+ });
29919
+ }
29920
+ async function fillAcroForm(pdfBytes, mappings) {
29921
+ const pdfDoc = await import_pdf_lib.PDFDocument.load(pdfBytes, { ignoreEncryption: true });
29922
+ const form = pdfDoc.getForm();
29923
+ for (const { acroFormName, value } of mappings) {
29924
+ try {
29925
+ const field = form.getField(acroFormName);
29926
+ if (field instanceof import_pdf_lib.PDFTextField) {
29927
+ field.setText(value);
29928
+ } else if (field instanceof import_pdf_lib.PDFCheckBox) {
29929
+ const lower = value.toLowerCase();
29930
+ if (["yes", "true", "x", "checked", "on"].includes(lower)) {
29931
+ field.check();
29932
+ } else {
29933
+ field.uncheck();
29934
+ }
29935
+ } else if (field instanceof import_pdf_lib.PDFDropdown) {
29936
+ try {
29937
+ field.select(value);
29938
+ } catch {
29939
+ }
29940
+ } else if (field instanceof import_pdf_lib.PDFRadioGroup) {
29941
+ try {
29942
+ field.select(value);
29943
+ } catch {
29944
+ }
29945
+ }
29946
+ } catch {
29947
+ }
29948
+ }
29949
+ form.flatten();
29950
+ return await pdfDoc.save();
29951
+ }
29952
+ async function overlayTextOnPdf(pdfBytes, overlays) {
29953
+ const pdfDoc = await import_pdf_lib.PDFDocument.load(pdfBytes, { ignoreEncryption: true });
29954
+ const font = await pdfDoc.embedFont(import_pdf_lib.StandardFonts.Helvetica);
29955
+ const pageCount = pdfDoc.getPageCount();
29956
+ for (const overlay of overlays) {
29957
+ if (overlay.page < 0 || overlay.page >= pageCount) continue;
29958
+ const page = pdfDoc.getPage(overlay.page);
29959
+ const { width, height } = page.getSize();
29960
+ const fontSize = overlay.fontSize ?? 10;
29961
+ const x = overlay.x / 100 * width;
29962
+ const y = height - overlay.y / 100 * height - fontSize;
29963
+ if (overlay.isCheckmark) {
29964
+ page.drawText("X", {
29965
+ x,
29966
+ y,
29967
+ size: fontSize,
29968
+ font,
29969
+ color: (0, import_pdf_lib.rgb)(0, 0, 0)
29970
+ });
29971
+ } else {
29972
+ page.drawText(overlay.text, {
29973
+ x,
29974
+ y,
29975
+ size: fontSize,
29976
+ font,
29977
+ color: (0, import_pdf_lib.rgb)(0, 0, 0)
29978
+ });
29979
+ }
29980
+ }
29981
+ return await pdfDoc.save();
29982
+ }
29983
+
29984
+ // src/extraction/pipeline.ts
29865
29985
  var SONNET_MODEL = "claude-sonnet-4-6";
29866
29986
  var HAIKU_MODEL = "claude-haiku-4-5-20251001";
29867
29987
  var DEFAULT_METADATA_PROVIDER_OPTIONS = {
@@ -29998,8 +30118,10 @@ function getPageChunks(totalPages, chunkSize = 30) {
29998
30118
  }
29999
30119
  return chunks;
30000
30120
  }
30001
- async function callModel(model, pdfBase64, prompt, maxTokens, providerOptions, log, onTokenUsage) {
30002
- await log?.(`Calling model (max ${maxTokens} tokens)...`);
30121
+ async function callModel(model, pdfBase64, prompt, maxTokens, providerOptions, log, onTokenUsage, pageRange) {
30122
+ const pdfToSend = pageRange ? await extractPageRange(pdfBase64, pageRange[0], pageRange[1]) : pdfBase64;
30123
+ const rangeLabel = pageRange ? ` [pages ${pageRange[0]}\u2013${pageRange[1]}]` : "";
30124
+ await log?.(`Calling model (max ${maxTokens} tokens)${rangeLabel}...`);
30003
30125
  const start = Date.now();
30004
30126
  const { text, usage } = await withRetry(
30005
30127
  () => (0, import_ai.generateText)({
@@ -30008,7 +30130,7 @@ async function callModel(model, pdfBase64, prompt, maxTokens, providerOptions, l
30008
30130
  messages: [{
30009
30131
  role: "user",
30010
30132
  content: [
30011
- { type: "file", data: pdfBase64, mediaType: "application/pdf" },
30133
+ { type: "file", data: pdfToSend, mediaType: "application/pdf" },
30012
30134
  { type: "text", text: prompt }
30013
30135
  ]
30014
30136
  }],
@@ -30114,7 +30236,9 @@ async function classifyDocumentType(pdfBase64, options) {
30114
30236
  MODEL_TOKEN_LIMITS.classification,
30115
30237
  void 0,
30116
30238
  log,
30117
- onTokenUsage
30239
+ onTokenUsage,
30240
+ [1, 3]
30241
+ // Only need first 3 pages for classification
30118
30242
  );
30119
30243
  try {
30120
30244
  const parsed = JSON.parse(stripFences(raw));
@@ -30203,7 +30327,9 @@ async function extractChunkWithRetry(models, pdfBase64, start, end, sizeIndex, p
30203
30327
  MODEL_TOKEN_LIMITS.sections,
30204
30328
  void 0,
30205
30329
  log,
30206
- onTokenUsage
30330
+ onTokenUsage,
30331
+ [start, end]
30332
+ // Only send this chunk's pages
30207
30333
  );
30208
30334
  try {
30209
30335
  return [JSON.parse(stripFences(chunkRaw))];
@@ -30245,7 +30371,9 @@ async function extractChunkWithRetry(models, pdfBase64, start, end, sizeIndex, p
30245
30371
  MODEL_TOKEN_LIMITS.sectionsFallback,
30246
30372
  fallbackProviderOptions,
30247
30373
  log,
30248
- onTokenUsage
30374
+ onTokenUsage,
30375
+ [start, end]
30376
+ // Only send this chunk's pages
30249
30377
  );
30250
30378
  try {
30251
30379
  return [JSON.parse(stripFences(fallbackRaw))];
@@ -30288,7 +30416,9 @@ async function extractFromPdf(pdfBase64, options) {
30288
30416
  onTokenUsage
30289
30417
  } = options ?? {};
30290
30418
  const resolved = resolveModels(models);
30419
+ const actualPageCount = await getPdfPageCount(pdfBase64);
30291
30420
  await log?.("Pass 1: Extracting metadata...");
30421
+ const metadataPageRange = [1, Math.min(10, actualPageCount)];
30292
30422
  const metadataRaw = await callModel(
30293
30423
  resolved.metadata,
30294
30424
  pdfBase64,
@@ -30296,7 +30426,8 @@ async function extractFromPdf(pdfBase64, options) {
30296
30426
  MODEL_TOKEN_LIMITS.metadata,
30297
30427
  metadataProviderOptions,
30298
30428
  log,
30299
- onTokenUsage
30429
+ onTokenUsage,
30430
+ metadataPageRange
30300
30431
  );
30301
30432
  let metadataResult;
30302
30433
  try {
@@ -30307,7 +30438,7 @@ async function extractFromPdf(pdfBase64, options) {
30307
30438
  throw new Error(`Metadata JSON parse failed: ${e.message}`);
30308
30439
  }
30309
30440
  await onMetadata?.(metadataRaw);
30310
- const pageCount = metadataResult.totalPages || 1;
30441
+ const pageCount = actualPageCount;
30311
30442
  await log?.(`Document: ${pageCount} page(s)`);
30312
30443
  const sectionChunks = await extractSectionChunks(
30313
30444
  resolved,
@@ -30375,7 +30506,9 @@ async function extractQuoteFromPdf(pdfBase64, options) {
30375
30506
  onTokenUsage
30376
30507
  } = options ?? {};
30377
30508
  const resolved = resolveModels(models);
30509
+ const actualPageCount = await getPdfPageCount(pdfBase64);
30378
30510
  await log?.("Pass 1: Extracting quote metadata...");
30511
+ const metadataPageRange = [1, Math.min(10, actualPageCount)];
30379
30512
  const metadataRaw = await callModel(
30380
30513
  resolved.metadata,
30381
30514
  pdfBase64,
@@ -30383,7 +30516,8 @@ async function extractQuoteFromPdf(pdfBase64, options) {
30383
30516
  MODEL_TOKEN_LIMITS.metadata,
30384
30517
  metadataProviderOptions,
30385
30518
  log,
30386
- onTokenUsage
30519
+ onTokenUsage,
30520
+ metadataPageRange
30387
30521
  );
30388
30522
  let metadataResult;
30389
30523
  try {
@@ -30394,7 +30528,7 @@ async function extractQuoteFromPdf(pdfBase64, options) {
30394
30528
  throw new Error(`Quote metadata JSON parse failed: ${e.message}`);
30395
30529
  }
30396
30530
  await onMetadata?.(metadataRaw);
30397
- const pageCount = metadataResult.totalPages || 1;
30531
+ const pageCount = actualPageCount;
30398
30532
  await log?.(`Quote document: ${pageCount} page(s)`);
30399
30533
  const sectionChunks = await extractSectionChunks(
30400
30534
  resolved,
@@ -30411,93 +30545,6 @@ async function extractQuoteFromPdf(pdfBase64, options) {
30411
30545
  const mergedRaw = JSON.stringify(merged);
30412
30546
  return { rawText: mergedRaw, extracted: merged };
30413
30547
  }
30414
-
30415
- // src/extraction/pdf.ts
30416
- var import_pdf_lib = require("pdf-lib");
30417
- function getAcroFormFields(pdfDoc) {
30418
- const form = pdfDoc.getForm();
30419
- const fields = form.getFields();
30420
- if (fields.length === 0) return [];
30421
- return fields.map((field) => {
30422
- const name = field.getName();
30423
- if (field instanceof import_pdf_lib.PDFTextField) {
30424
- return { name, type: "text" };
30425
- }
30426
- if (field instanceof import_pdf_lib.PDFCheckBox) {
30427
- return { name, type: "checkbox" };
30428
- }
30429
- if (field instanceof import_pdf_lib.PDFDropdown) {
30430
- return { name, type: "dropdown", options: field.getOptions() };
30431
- }
30432
- if (field instanceof import_pdf_lib.PDFRadioGroup) {
30433
- return { name, type: "radio", options: field.getOptions() };
30434
- }
30435
- return { name, type: "text" };
30436
- });
30437
- }
30438
- async function fillAcroForm(pdfBytes, mappings) {
30439
- const pdfDoc = await import_pdf_lib.PDFDocument.load(pdfBytes, { ignoreEncryption: true });
30440
- const form = pdfDoc.getForm();
30441
- for (const { acroFormName, value } of mappings) {
30442
- try {
30443
- const field = form.getField(acroFormName);
30444
- if (field instanceof import_pdf_lib.PDFTextField) {
30445
- field.setText(value);
30446
- } else if (field instanceof import_pdf_lib.PDFCheckBox) {
30447
- const lower = value.toLowerCase();
30448
- if (["yes", "true", "x", "checked", "on"].includes(lower)) {
30449
- field.check();
30450
- } else {
30451
- field.uncheck();
30452
- }
30453
- } else if (field instanceof import_pdf_lib.PDFDropdown) {
30454
- try {
30455
- field.select(value);
30456
- } catch {
30457
- }
30458
- } else if (field instanceof import_pdf_lib.PDFRadioGroup) {
30459
- try {
30460
- field.select(value);
30461
- } catch {
30462
- }
30463
- }
30464
- } catch {
30465
- }
30466
- }
30467
- form.flatten();
30468
- return await pdfDoc.save();
30469
- }
30470
- async function overlayTextOnPdf(pdfBytes, overlays) {
30471
- const pdfDoc = await import_pdf_lib.PDFDocument.load(pdfBytes, { ignoreEncryption: true });
30472
- const font = await pdfDoc.embedFont(import_pdf_lib.StandardFonts.Helvetica);
30473
- const pageCount = pdfDoc.getPageCount();
30474
- for (const overlay of overlays) {
30475
- if (overlay.page < 0 || overlay.page >= pageCount) continue;
30476
- const page = pdfDoc.getPage(overlay.page);
30477
- const { width, height } = page.getSize();
30478
- const fontSize = overlay.fontSize ?? 10;
30479
- const x = overlay.x / 100 * width;
30480
- const y = height - overlay.y / 100 * height - fontSize;
30481
- if (overlay.isCheckmark) {
30482
- page.drawText("X", {
30483
- x,
30484
- y,
30485
- size: fontSize,
30486
- font,
30487
- color: (0, import_pdf_lib.rgb)(0, 0, 0)
30488
- });
30489
- } else {
30490
- page.drawText(overlay.text, {
30491
- x,
30492
- y,
30493
- size: fontSize,
30494
- font,
30495
- color: (0, import_pdf_lib.rgb)(0, 0, 0)
30496
- });
30497
- }
30498
- }
30499
- return await pdfDoc.save();
30500
- }
30501
30548
  // Annotate the CommonJS export names for ESM import in node:
30502
30549
  0 && (module.exports = {
30503
30550
  AGENT_TOOLS,
@@ -30550,11 +30597,13 @@ async function overlayTextOnPdf(pdfBytes, overlays) {
30550
30597
  createUniformModelConfig,
30551
30598
  enrichSupplementaryFields,
30552
30599
  extractFromPdf,
30600
+ extractPageRange,
30553
30601
  extractQuoteFromPdf,
30554
30602
  extractSectionsOnly,
30555
30603
  fillAcroForm,
30556
30604
  getAcroFormFields,
30557
30605
  getPageChunks,
30606
+ getPdfPageCount,
30558
30607
  mergeChunkedQuoteSections,
30559
30608
  mergeChunkedSections,
30560
30609
  overlayTextOnPdf,