@claritylabs/cl-sdk 1.0.3 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -2471,6 +2471,254 @@ async function overlayTextOnPdf(pdfBytes, overlays) {
2471
2471
  return await pdfDoc.save();
2472
2472
  }
2473
2473
 
2474
+ // src/extraction/docling.ts
2475
+ function isDoclingExtractionInput(input) {
2476
+ return Boolean(
2477
+ input && typeof input === "object" && input.kind === "docling_document" && input.document && typeof input.document === "object"
2478
+ );
2479
+ }
2480
+ function normalizeDoclingDocument(document, options) {
2481
+ const itemMap = buildItemMap(document);
2482
+ const orderedRefs = getOrderedBodyRefs(document, itemMap);
2483
+ const orderedItems = orderedRefs.length > 0 ? orderedRefs.map((ref) => itemMap.get(ref)).filter((item) => Boolean(item)) : getFallbackOrderedItems(document, itemMap);
2484
+ const units = orderedItems.map(({ ref, item }) => normalizeItem(ref, item)).filter((unit) => Boolean(unit && unit.text.trim()));
2485
+ const pageCount = inferPageCount(document, units);
2486
+ const pageTexts = /* @__PURE__ */ new Map();
2487
+ for (const unit of units) {
2488
+ const page = clampPage(unit.pageStart ?? 1, pageCount);
2489
+ pageTexts.set(page, appendText(pageTexts.get(page), unit.text));
2490
+ }
2491
+ const fullText = Array.from({ length: pageCount }, (_, index) => {
2492
+ const pageNumber = index + 1;
2493
+ const text = pageTexts.get(pageNumber)?.trim();
2494
+ return text ? `Page ${pageNumber}
2495
+ ${text}` : "";
2496
+ }).filter(Boolean).join("\n\n");
2497
+ const sourceKind = options.sourceKind ?? "policy_pdf";
2498
+ const sourceSpans = units.map((unit, index) => {
2499
+ const span = buildSourceSpan(
2500
+ {
2501
+ documentId: options.documentId,
2502
+ sourceKind,
2503
+ text: unit.text,
2504
+ pageStart: unit.pageStart,
2505
+ pageEnd: unit.pageEnd,
2506
+ sectionId: unit.label,
2507
+ metadata: {
2508
+ sourceSystem: "docling",
2509
+ sourceUnit: "docling_item",
2510
+ doclingRef: unit.ref,
2511
+ ...unit.label ? { doclingLabel: unit.label } : {}
2512
+ }
2513
+ },
2514
+ index
2515
+ );
2516
+ return {
2517
+ ...span,
2518
+ kind: "plain_text",
2519
+ bbox: unit.bboxes?.length ? unit.bboxes : void 0
2520
+ };
2521
+ });
2522
+ return {
2523
+ pageCount,
2524
+ fullText,
2525
+ pageTexts,
2526
+ units,
2527
+ sourceSpans
2528
+ };
2529
+ }
2530
+ function getDoclingPageRangeText(normalized, startPage, endPage) {
2531
+ const start = clampPage(startPage, normalized.pageCount);
2532
+ const end = clampPage(endPage, normalized.pageCount);
2533
+ const lines = [];
2534
+ for (let page = start; page <= end; page++) {
2535
+ const text = normalized.pageTexts.get(page)?.trim();
2536
+ if (text) {
2537
+ lines.push(`Page ${page}
2538
+ ${text}`);
2539
+ }
2540
+ }
2541
+ return lines.join("\n\n");
2542
+ }
2543
+ function buildDoclingProviderOptions(normalized, existingOptions) {
2544
+ return {
2545
+ ...existingOptions,
2546
+ doclingText: normalized.fullText,
2547
+ doclingPageCount: normalized.pageCount
2548
+ };
2549
+ }
2550
+ function mergeSourceSpans(spans) {
2551
+ const seen = /* @__PURE__ */ new Set();
2552
+ const merged = [];
2553
+ for (const span of spans) {
2554
+ const key = [
2555
+ span.documentId,
2556
+ span.pageStart ?? span.location?.startPage ?? span.location?.page ?? "na",
2557
+ span.pageEnd ?? span.location?.endPage ?? span.pageStart ?? "na",
2558
+ span.sectionId ?? span.location?.fieldPath ?? "na",
2559
+ span.textHash ?? sourceSpanTextHash(span.text)
2560
+ ].join(":");
2561
+ if (seen.has(key)) continue;
2562
+ seen.add(key);
2563
+ merged.push(span);
2564
+ }
2565
+ return merged;
2566
+ }
2567
+ function buildItemMap(document) {
2568
+ const map = /* @__PURE__ */ new Map();
2569
+ addItems(map, "#/texts", document.texts ?? []);
2570
+ addItems(map, "#/tables", document.tables ?? []);
2571
+ addItems(map, "#/key_value_items", document.key_value_items ?? document.keyValueItems ?? []);
2572
+ addItems(map, "#/pictures", document.pictures ?? []);
2573
+ return map;
2574
+ }
2575
+ function addItems(map, baseRef, items) {
2576
+ items.forEach((item, index) => {
2577
+ const ref = getSelfRef(item) ?? `${baseRef}/${index}`;
2578
+ map.set(ref, { ref, item });
2579
+ });
2580
+ }
2581
+ function getFallbackOrderedItems(document, itemMap) {
2582
+ const refs = [
2583
+ ...(document.texts ?? []).map((item, index) => getSelfRef(item) ?? `#/texts/${index}`),
2584
+ ...(document.tables ?? []).map((item, index) => getSelfRef(item) ?? `#/tables/${index}`),
2585
+ ...(document.key_value_items ?? document.keyValueItems ?? []).map((item, index) => getSelfRef(item) ?? `#/key_value_items/${index}`)
2586
+ ];
2587
+ return refs.map((ref) => itemMap.get(ref)).filter((item) => Boolean(item));
2588
+ }
2589
+ function getOrderedBodyRefs(document, itemMap) {
2590
+ const groupMap = /* @__PURE__ */ new Map();
2591
+ (document.groups ?? []).forEach((group, index) => {
2592
+ groupMap.set(getSelfRef(group) ?? `#/groups/${index}`, group);
2593
+ });
2594
+ const refs = [];
2595
+ const visited = /* @__PURE__ */ new Set();
2596
+ const visitRef = (ref) => {
2597
+ const itemEntry = itemMap.get(ref);
2598
+ if (itemEntry) {
2599
+ if (!visited.has(ref)) {
2600
+ visited.add(ref);
2601
+ refs.push(ref);
2602
+ }
2603
+ visitNode(itemEntry.item);
2604
+ return;
2605
+ }
2606
+ visitNode(groupMap.get(ref));
2607
+ };
2608
+ const visitNode = (node) => {
2609
+ for (const child of node?.children ?? []) {
2610
+ const ref = getRef(child);
2611
+ if (!ref) continue;
2612
+ visitRef(ref);
2613
+ }
2614
+ };
2615
+ visitNode(document.body);
2616
+ return refs;
2617
+ }
2618
+ function normalizeItem(ref, item) {
2619
+ const text = getItemText(item).trim();
2620
+ if (!text) return void 0;
2621
+ const pages = (item.prov ?? []).map((prov) => getPageNumber(prov)).filter((page) => typeof page === "number" && page > 0);
2622
+ const pageStart = pages.length ? Math.min(...pages) : void 0;
2623
+ const pageEnd = pages.length ? Math.max(...pages) : pageStart;
2624
+ const bboxes = (item.prov ?? []).map((prov) => toSourceSpanBBox(prov)).filter((bbox) => Boolean(bbox));
2625
+ return {
2626
+ ref,
2627
+ label: typeof item.label === "string" ? item.label : void 0,
2628
+ text,
2629
+ pageStart,
2630
+ pageEnd,
2631
+ bboxes: bboxes.length ? bboxes : void 0
2632
+ };
2633
+ }
2634
+ function getItemText(item) {
2635
+ if (typeof item.text === "string" && item.text.trim()) return item.text;
2636
+ if (typeof item.orig === "string" && item.orig.trim()) return item.orig;
2637
+ const table = tableToMarkdown(item.data);
2638
+ if (table) return table;
2639
+ return "";
2640
+ }
2641
+ function tableToMarkdown(data) {
2642
+ const record = asRecord(data);
2643
+ const cells = Array.isArray(record?.table_cells) ? record.table_cells : Array.isArray(record?.tableCells) ? record.tableCells : void 0;
2644
+ if (!cells) return void 0;
2645
+ const parsedCells = cells.map((cell) => asRecord(cell)).filter((cell) => Boolean(cell)).map((cell) => ({
2646
+ row: firstNumber2([cell.start_row_offset, cell.row_header, cell.row, cell.rowIndex]) ?? 0,
2647
+ col: firstNumber2([cell.start_col_offset, cell.col, cell.colIndex]) ?? 0,
2648
+ text: firstString([cell.text, cell.orig, cell.content])
2649
+ })).filter((cell) => cell.text);
2650
+ if (parsedCells.length === 0) return void 0;
2651
+ const maxRow = Math.max(...parsedCells.map((cell) => cell.row));
2652
+ const maxCol = Math.max(...parsedCells.map((cell) => cell.col));
2653
+ const rows = Array.from({ length: maxRow + 1 }, () => Array.from({ length: maxCol + 1 }, () => ""));
2654
+ for (const cell of parsedCells) {
2655
+ rows[cell.row][cell.col] = cell.text;
2656
+ }
2657
+ if (rows.length === 1) return rows[0].filter(Boolean).join(" | ");
2658
+ const header = rows[0];
2659
+ const separator = header.map(() => "---");
2660
+ return [header, separator, ...rows.slice(1)].map((row) => `| ${row.map((value) => value.trim()).join(" | ")} |`).join("\n");
2661
+ }
2662
+ function inferPageCount(document, units) {
2663
+ const pages = document.pages;
2664
+ if (Array.isArray(pages)) return Math.max(1, pages.length);
2665
+ if (pages && typeof pages === "object") {
2666
+ const keys = Object.keys(pages);
2667
+ const numericMax = Math.max(0, ...keys.map((key) => Number(key)).filter((value) => Number.isFinite(value)));
2668
+ return Math.max(1, numericMax || keys.length);
2669
+ }
2670
+ return Math.max(1, ...units.flatMap((unit) => [unit.pageStart ?? 0, unit.pageEnd ?? 0]));
2671
+ }
2672
+ function getSelfRef(value) {
2673
+ return value.self_ref ?? value.selfRef;
2674
+ }
2675
+ function getRef(value) {
2676
+ if (typeof value === "string") return value;
2677
+ return value.$ref ?? value.ref;
2678
+ }
2679
+ function getPageNumber(prov) {
2680
+ return prov.page_no ?? prov.pageNo ?? prov.page;
2681
+ }
2682
+ function toSourceSpanBBox(prov) {
2683
+ const page = getPageNumber(prov);
2684
+ const bbox = asRecord(prov.bbox);
2685
+ if (!page || !bbox) return void 0;
2686
+ const x = firstNumber2([bbox.x, bbox.l, bbox.left]);
2687
+ const y = firstNumber2([bbox.y, bbox.t, bbox.top]);
2688
+ const width = firstNumber2([bbox.width]);
2689
+ const height = firstNumber2([bbox.height]);
2690
+ const right = firstNumber2([bbox.r, bbox.right]);
2691
+ const bottom = firstNumber2([bbox.b, bbox.bottom]);
2692
+ if (x == null || y == null) return void 0;
2693
+ const resolvedWidth = width ?? (right != null ? right - x : void 0);
2694
+ const resolvedHeight = height ?? (bottom != null ? bottom - y : void 0);
2695
+ if (resolvedWidth == null || resolvedHeight == null) return void 0;
2696
+ return { page, x, y, width: resolvedWidth, height: resolvedHeight };
2697
+ }
2698
+ function clampPage(page, pageCount) {
2699
+ return Math.max(1, Math.min(pageCount, page));
2700
+ }
2701
+ function appendText(existing, next) {
2702
+ return existing ? `${existing}
2703
+
2704
+ ${next}` : next;
2705
+ }
2706
+ function asRecord(value) {
2707
+ return value && typeof value === "object" && !Array.isArray(value) ? value : void 0;
2708
+ }
2709
+ function firstString(values) {
2710
+ for (const value of values) {
2711
+ if (typeof value === "string" && value.trim()) return value.trim();
2712
+ }
2713
+ return "";
2714
+ }
2715
+ function firstNumber2(values) {
2716
+ for (const value of values) {
2717
+ if (typeof value === "number" && Number.isFinite(value)) return value;
2718
+ }
2719
+ return void 0;
2720
+ }
2721
+
2474
2722
  // src/extraction/extractor.ts
2475
2723
  function sourceSpansForPageRange(providerOptions, startPage, endPage) {
2476
2724
  const sourceSpans = providerOptions?.sourceSpans;
@@ -2512,20 +2760,38 @@ async function runExtractor(params) {
2512
2760
  generateObject,
2513
2761
  convertPdfToImages,
2514
2762
  maxTokens = 4096,
2763
+ taskKind,
2764
+ budgetDiagnostics,
2515
2765
  providerOptions,
2516
2766
  pageRangeCache
2517
2767
  } = params;
2518
2768
  const extractorProviderOptions = { ...providerOptions };
2519
2769
  let fullPrompt;
2520
- const needsPdfBase64 = convertPdfToImages && !params.getPageImages || !convertPdfToImages && !params.getPageRangePdf;
2521
- const pdfBase64 = needsPdfBase64 ? await pdfInputToBase64(pdfInput) : void 0;
2522
- if (convertPdfToImages) {
2770
+ if (params.getPageRangeText) {
2771
+ const pageText = await params.getPageRangeText(startPage, endPage);
2772
+ extractorProviderOptions.doclingText = pageText;
2773
+ extractorProviderOptions.doclingPageRange = { startPage, endPage };
2774
+ fullPrompt = `${prompt}
2775
+
2776
+ [Document pages ${startPage}-${endPage} are provided below as Docling-extracted text.]
2777
+
2778
+ ${pageText || "(No Docling text was available for this page range.)"}`;
2779
+ } else if (convertPdfToImages) {
2780
+ if (!pdfInput) {
2781
+ throw new Error("pdfInput is required when extracting page images.");
2782
+ }
2783
+ const needsPdfBase64 = !params.getPageImages;
2784
+ const pdfBase64 = needsPdfBase64 ? await pdfInputToBase64(pdfInput) : void 0;
2523
2785
  const images = params.getPageImages ? await params.getPageImages(startPage, endPage) : await convertPdfToImages(pdfBase64, startPage, endPage);
2524
2786
  extractorProviderOptions.images = images;
2525
2787
  fullPrompt = `${prompt}
2526
2788
 
2527
2789
  [Document pages ${startPage}-${endPage} are provided as images.]`;
2528
2790
  } else {
2791
+ if (!pdfInput) {
2792
+ throw new Error("pdfInput is required when extracting page PDFs.");
2793
+ }
2794
+ const pdfBase64 = params.getPageRangePdf ? void 0 : await pdfInputToBase64(pdfInput);
2529
2795
  const cacheKey = `${startPage}-${endPage}`;
2530
2796
  const cachedPagesPdf = pageRangeCache?.get(cacheKey);
2531
2797
  const pagesPdf = cachedPagesPdf ?? (params.getPageRangePdf ? await params.getPageRangePdf(startPage, endPage) : await extractPageRange(pdfBase64, startPage, endPage));
@@ -2545,6 +2811,8 @@ async function runExtractor(params) {
2545
2811
  prompt: fullPrompt,
2546
2812
  schema: strictSchema,
2547
2813
  maxTokens,
2814
+ taskKind,
2815
+ budgetDiagnostics,
2548
2816
  providerOptions: extractorProviderOptions
2549
2817
  })
2550
2818
  );
@@ -3524,6 +3792,8 @@ async function formatDocumentContent(doc, generateText, options) {
3524
3792
  () => generateText({
3525
3793
  prompt,
3526
3794
  maxTokens: options?.maxTokens ?? 16384,
3795
+ taskKind: options?.taskKind,
3796
+ budgetDiagnostics: options?.budgetDiagnostics,
3527
3797
  providerOptions: options?.providerOptions
3528
3798
  })
3529
3799
  );
@@ -3561,7 +3831,7 @@ function formatAddress(addr) {
3561
3831
  function asRecordArray(value) {
3562
3832
  return Array.isArray(value) ? value.filter((item) => Boolean(item) && typeof item === "object" && !Array.isArray(item)) : [];
3563
3833
  }
3564
- function firstString(item, keys) {
3834
+ function firstString2(item, keys) {
3565
3835
  for (const key of keys) {
3566
3836
  const value = item[key];
3567
3837
  if (typeof value === "string" && value.trim()) return value;
@@ -3918,32 +4188,32 @@ ${exc.content}`.trim(), {
3918
4188
  );
3919
4189
  });
3920
4190
  asRecordArray(extendedDoc.definitions).forEach((definition, i) => {
3921
- const term = firstString(definition, ["term", "name", "title"]) ?? `Definition ${i + 1}`;
3922
- const body = firstString(definition, ["definition", "content", "text", "meaning"]);
4191
+ const term = firstString2(definition, ["term", "name", "title"]) ?? `Definition ${i + 1}`;
4192
+ const body = firstString2(definition, ["definition", "content", "text", "meaning"]);
3923
4193
  pushChunk(
3924
4194
  `definition:${i}`,
3925
4195
  "definition",
3926
4196
  lines([
3927
4197
  `Definition: ${term}`,
3928
4198
  body,
3929
- firstString(definition, ["originalContent", "source"]) ? `Source: ${firstString(definition, ["originalContent", "source"])}` : null
4199
+ firstString2(definition, ["originalContent", "source"]) ? `Source: ${firstString2(definition, ["originalContent", "source"])}` : null
3930
4200
  ]),
3931
4201
  {
3932
4202
  term,
3933
- formNumber: firstString(definition, ["formNumber"]),
3934
- formTitle: firstString(definition, ["formTitle"]),
4203
+ formNumber: firstString2(definition, ["formNumber"]),
4204
+ formTitle: firstString2(definition, ["formTitle"]),
3935
4205
  pageNumber: typeof definition.pageNumber === "number" ? definition.pageNumber : void 0,
3936
- sectionRef: firstString(definition, ["sectionRef", "sectionTitle"]),
4206
+ sectionRef: firstString2(definition, ["sectionRef", "sectionTitle"]),
3937
4207
  documentType: doc.type
3938
4208
  }
3939
4209
  );
3940
4210
  });
3941
4211
  const coveredReasons = asRecordArray(extendedDoc.coveredReasons ?? extendedDoc.covered_reasons);
3942
4212
  coveredReasons.forEach((coveredReason, i) => {
3943
- const title = firstString(coveredReason, ["title", "name", "reason", "peril", "cause"]) ?? `Covered Reason ${i + 1}`;
3944
- const coverageName = firstString(coveredReason, ["coverageName", "coverage", "coveragePart"]);
3945
- const reasonNumber = firstString(coveredReason, ["reasonNumber", "number"]);
3946
- const body = firstString(coveredReason, ["content", "description", "text", "coverageGrant"]);
4213
+ const title = firstString2(coveredReason, ["title", "name", "reason", "peril", "cause"]) ?? `Covered Reason ${i + 1}`;
4214
+ const coverageName = firstString2(coveredReason, ["coverageName", "coverage", "coveragePart"]);
4215
+ const reasonNumber = firstString2(coveredReason, ["reasonNumber", "number"]);
4216
+ const body = firstString2(coveredReason, ["content", "description", "text", "coverageGrant"]);
3947
4217
  pushChunk(
3948
4218
  `covered_reason:${i}`,
3949
4219
  "covered_reason",
@@ -3952,16 +4222,16 @@ ${exc.content}`.trim(), {
3952
4222
  reasonNumber ? `Reason Number: ${reasonNumber}` : null,
3953
4223
  `Covered Reason: ${title}`,
3954
4224
  body,
3955
- firstString(coveredReason, ["originalContent", "source"]) ? `Source: ${firstString(coveredReason, ["originalContent", "source"])}` : null
4225
+ firstString2(coveredReason, ["originalContent", "source"]) ? `Source: ${firstString2(coveredReason, ["originalContent", "source"])}` : null
3956
4226
  ]),
3957
4227
  {
3958
4228
  coverageName,
3959
4229
  reasonNumber,
3960
4230
  title,
3961
- formNumber: firstString(coveredReason, ["formNumber"]),
3962
- formTitle: firstString(coveredReason, ["formTitle"]),
4231
+ formNumber: firstString2(coveredReason, ["formNumber"]),
4232
+ formTitle: firstString2(coveredReason, ["formTitle"]),
3963
4233
  pageNumber: typeof coveredReason.pageNumber === "number" ? coveredReason.pageNumber : void 0,
3964
- sectionRef: firstString(coveredReason, ["sectionRef", "sectionTitle"]),
4234
+ sectionRef: firstString2(coveredReason, ["sectionRef", "sectionTitle"]),
3965
4235
  documentType: doc.type
3966
4236
  }
3967
4237
  );
@@ -3981,10 +4251,10 @@ ${exc.content}`.trim(), {
3981
4251
  reasonNumber,
3982
4252
  title,
3983
4253
  conditionIndex,
3984
- formNumber: firstString(coveredReason, ["formNumber"]),
3985
- formTitle: firstString(coveredReason, ["formTitle"]),
4254
+ formNumber: firstString2(coveredReason, ["formNumber"]),
4255
+ formTitle: firstString2(coveredReason, ["formTitle"]),
3986
4256
  pageNumber: typeof coveredReason.pageNumber === "number" ? coveredReason.pageNumber : void 0,
3987
- sectionRef: firstString(coveredReason, ["sectionRef", "sectionTitle"]),
4257
+ sectionRef: firstString2(coveredReason, ["sectionRef", "sectionTitle"]),
3988
4258
  documentType: doc.type
3989
4259
  }
3990
4260
  );
@@ -6455,21 +6725,21 @@ Return JSON only.`;
6455
6725
  }
6456
6726
 
6457
6727
  // src/prompts/extractors/index.ts
6458
- function asRecord(data) {
6728
+ function asRecord2(data) {
6459
6729
  return data && typeof data === "object" ? data : void 0;
6460
6730
  }
6461
6731
  function getSections2(data) {
6462
- const sections = asRecord(data)?.sections;
6732
+ const sections = asRecord2(data)?.sections;
6463
6733
  return Array.isArray(sections) ? sections : [];
6464
6734
  }
6465
6735
  function isCoveredReasonsEmpty(data) {
6466
- const record = asRecord(data);
6736
+ const record = asRecord2(data);
6467
6737
  if (!record) return true;
6468
6738
  const coveredReasons = Array.isArray(record.coveredReasons) ? record.coveredReasons : Array.isArray(record.covered_reasons) ? record.covered_reasons : [];
6469
6739
  return coveredReasons.length === 0;
6470
6740
  }
6471
6741
  function isDefinitionsEmpty(data) {
6472
- const definitions = asRecord(data)?.definitions;
6742
+ const definitions = asRecord2(data)?.definitions;
6473
6743
  return !Array.isArray(definitions) || definitions.length === 0;
6474
6744
  }
6475
6745
  function sectionLooksLikeCoveredReason(section) {
@@ -6703,6 +6973,14 @@ function decideReferentialResolutionAction(params) {
6703
6973
  }
6704
6974
 
6705
6975
  // src/extraction/resolve-referential.ts
6976
+ function formatDoclingTextContext(providerOptions) {
6977
+ const doclingText = providerOptions?.doclingText;
6978
+ if (typeof doclingText !== "string" || !doclingText.trim()) return "";
6979
+ return `
6980
+
6981
+ DOCLING DOCUMENT TEXT:
6982
+ ${doclingText}`;
6983
+ }
6706
6984
  function parseReferenceTarget(text) {
6707
6985
  if (typeof text !== "string") return void 0;
6708
6986
  const normalized = text.trim();
@@ -6784,10 +7062,12 @@ Return the page range (1-indexed) where this section is located. If the section
6784
7062
 
6785
7063
  If you cannot find the section, return startPage: 0 and endPage: 0.
6786
7064
 
6787
- Return JSON only.`,
7065
+ Return JSON only.${formatDoclingTextContext(providerOptions)}`,
6788
7066
  schema: PageLocationSchema,
6789
7067
  maxTokens: budget.maxTokens,
6790
- providerOptions: await buildPdfProviderOptions(pdfInput, providerOptions)
7068
+ taskKind: "extraction_referential_lookup",
7069
+ budgetDiagnostics: budget,
7070
+ providerOptions: pdfInput ? await buildPdfProviderOptions(pdfInput, providerOptions) : providerOptions
6791
7071
  },
6792
7072
  {
6793
7073
  fallback: { startPage: 0, endPage: 0 },
@@ -6821,6 +7101,7 @@ async function resolveReferentialCoverages(params) {
6821
7101
  convertPdfToImages,
6822
7102
  getPageRangePdf,
6823
7103
  getPageImages,
7104
+ getPageRangeText,
6824
7105
  concurrency = 2,
6825
7106
  providerOptions,
6826
7107
  modelCapabilities,
@@ -6932,7 +7213,10 @@ async function resolveReferentialCoverages(params) {
6932
7213
  convertPdfToImages,
6933
7214
  getPageRangePdf,
6934
7215
  getPageImages,
7216
+ getPageRangeText,
6935
7217
  maxTokens: budget.maxTokens,
7218
+ taskKind: "extraction_referential_lookup",
7219
+ budgetDiagnostics: budget,
6936
7220
  providerOptions
6937
7221
  });
6938
7222
  trackUsage(result.usage);
@@ -7025,6 +7309,7 @@ async function runFocusedExtractorWithFallback(params) {
7025
7309
  pageRangeCache,
7026
7310
  getPageRangePdf,
7027
7311
  getPageImages,
7312
+ getPageRangeText,
7028
7313
  trackUsage,
7029
7314
  resolveBudget,
7030
7315
  log
@@ -7049,10 +7334,13 @@ async function runFocusedExtractorWithFallback(params) {
7049
7334
  generateObject,
7050
7335
  convertPdfToImages,
7051
7336
  maxTokens: budget.maxTokens,
7337
+ taskKind,
7338
+ budgetDiagnostics: budget,
7052
7339
  providerOptions,
7053
7340
  pageRangeCache,
7054
7341
  getPageRangePdf,
7055
- getPageImages
7342
+ getPageImages,
7343
+ getPageRangeText
7056
7344
  });
7057
7345
  trackUsage(result.usage, {
7058
7346
  taskKind,
@@ -7092,10 +7380,13 @@ async function runFocusedExtractorWithFallback(params) {
7092
7380
  generateObject,
7093
7381
  convertPdfToImages,
7094
7382
  maxTokens: budget.maxTokens,
7383
+ taskKind,
7384
+ budgetDiagnostics: budget,
7095
7385
  providerOptions,
7096
7386
  pageRangeCache,
7097
7387
  getPageRangePdf,
7098
- getPageImages
7388
+ getPageImages,
7389
+ getPageRangeText
7099
7390
  });
7100
7391
  trackUsage(fallbackResult.usage, {
7101
7392
  taskKind,
@@ -7939,7 +8230,7 @@ function createExtractor(config) {
7939
8230
  }
7940
8231
  return lines.length > 0 ? lines.join("\n") : "";
7941
8232
  }
7942
- async function runFocusedExtractorTask(task, pdfInput, memory, pageRangeCache, getPageRangePdf, getPageImages) {
8233
+ async function runFocusedExtractorTask(task, pdfInput, memory, pageRangeCache, getPageRangePdf, getPageImages, getPageRangeText) {
7943
8234
  if (task.extractorName === "supplementary") {
7944
8235
  const alreadyExtractedSummary = buildAlreadyExtractedSummary(memory);
7945
8236
  const budget = resolveBudget("extraction_focused", 4096);
@@ -7954,10 +8245,13 @@ function createExtractor(config) {
7954
8245
  generateObject,
7955
8246
  convertPdfToImages,
7956
8247
  maxTokens: budget.maxTokens,
8248
+ taskKind: "extraction_focused",
8249
+ budgetDiagnostics: budget,
7957
8250
  providerOptions: activeProviderOptions,
7958
8251
  pageRangeCache,
7959
8252
  getPageRangePdf,
7960
- getPageImages
8253
+ getPageImages,
8254
+ getPageRangeText
7961
8255
  });
7962
8256
  trackUsage(result.usage, {
7963
8257
  taskKind: "extraction_focused",
@@ -7976,6 +8270,7 @@ function createExtractor(config) {
7976
8270
  pageRangeCache,
7977
8271
  getPageRangePdf,
7978
8272
  getPageImages,
8273
+ getPageRangeText,
7979
8274
  trackUsage,
7980
8275
  resolveBudget,
7981
8276
  log
@@ -7991,8 +8286,14 @@ function createExtractor(config) {
7991
8286
  if (extractorPages.size === 0) return "No page assignments available.";
7992
8287
  return [...extractorPages.entries()].map(([extractorName, pages]) => `${extractorName}: ${pages.length} page(s), pages ${pages.join(", ")}`).join("\n");
7993
8288
  }
7994
- async function extract(pdfInput, documentId, options) {
8289
+ async function extract(input, documentId, options) {
7995
8290
  const id = documentId ?? `doc-${Date.now()}`;
8291
+ const isDoclingInput = isDoclingExtractionInput(input);
8292
+ const pdfInput = isDoclingInput ? void 0 : input;
8293
+ const doclingDocument = isDoclingInput ? normalizeDoclingDocument(input.document, {
8294
+ documentId: id,
8295
+ sourceKind: input.sourceKind
8296
+ }) : void 0;
7996
8297
  const memory = /* @__PURE__ */ new Map();
7997
8298
  totalUsage = { inputTokens: 0, outputTokens: 0 };
7998
8299
  modelCalls = 0;
@@ -8002,7 +8303,10 @@ function createExtractor(config) {
8002
8303
  modelCalls: [],
8003
8304
  totalModelCallDurationMs: 0
8004
8305
  };
8005
- const sourceSpans = options?.sourceSpans ?? [];
8306
+ const sourceSpans = mergeSourceSpans([
8307
+ ...doclingDocument?.sourceSpans ?? [],
8308
+ ...options?.sourceSpans ?? []
8309
+ ]);
8006
8310
  const sourceChunks = sourceSpans.length ? chunkSourceSpans(sourceSpans) : [];
8007
8311
  activeProviderOptions = sourceSpans.length ? { ...providerOptions, sourceSpans, sourceChunks } : providerOptions;
8008
8312
  if (sourceStore && sourceSpans.length > 0) {
@@ -8031,24 +8335,40 @@ function createExtractor(config) {
8031
8335
  let fullPdfProviderOptionsPromise;
8032
8336
  let pageCountPromise;
8033
8337
  async function getPdfBase64ForExtraction() {
8338
+ if (!pdfInput) {
8339
+ throw new Error("PDF input is not available for Docling extraction.");
8340
+ }
8034
8341
  if (pdfBase64Cache === void 0) {
8035
8342
  pdfBase64Cache = await pdfInputToBase64(pdfInput);
8036
8343
  }
8037
8344
  return pdfBase64Cache;
8038
8345
  }
8039
8346
  async function getCachedPageCount() {
8347
+ if (doclingDocument) return doclingDocument.pageCount;
8348
+ if (!pdfInput) {
8349
+ throw new Error("PDF input is required to read page count.");
8350
+ }
8040
8351
  if (!pageCountPromise) {
8041
8352
  pageCountPromise = getPdfSlicer().then((slicer) => slicer.getPageCount()).catch(() => getPdfPageCount(pdfInput));
8042
8353
  }
8043
8354
  return pageCountPromise;
8044
8355
  }
8045
- async function getFullPdfProviderOptions() {
8356
+ async function getFullDocumentProviderOptions() {
8357
+ if (doclingDocument) {
8358
+ return buildDoclingProviderOptions(doclingDocument, activeProviderOptions);
8359
+ }
8360
+ if (!pdfInput) {
8361
+ return activeProviderOptions ?? {};
8362
+ }
8046
8363
  if (!fullPdfProviderOptionsPromise) {
8047
8364
  fullPdfProviderOptionsPromise = buildPdfProviderOptions(pdfInput, activeProviderOptions);
8048
8365
  }
8049
8366
  return fullPdfProviderOptionsPromise;
8050
8367
  }
8051
8368
  async function getPdfSlicer() {
8369
+ if (!pdfInput) {
8370
+ throw new Error("PDF input is not available for Docling extraction.");
8371
+ }
8052
8372
  if (!pdfSlicerPromise) {
8053
8373
  pdfSlicerPromise = createPdfPageSlicer(pdfInput);
8054
8374
  }
@@ -8087,6 +8407,23 @@ function createExtractor(config) {
8087
8407
  pageRangeImageCache.set(cacheKey, promise);
8088
8408
  return promise;
8089
8409
  }
8410
+ async function getPageRangeText(startPage, endPage) {
8411
+ return doclingDocument ? getDoclingPageRangeText(doclingDocument, startPage, endPage) : "";
8412
+ }
8413
+ function withFullDocumentTextContext(prompt) {
8414
+ if (!doclingDocument) return prompt;
8415
+ return `${prompt}
8416
+
8417
+ DOCLING DOCUMENT TEXT:
8418
+ ${doclingDocument.fullText}`;
8419
+ }
8420
+ function withPageRangeTextContext(prompt, startPage, endPage, pageText) {
8421
+ if (!doclingDocument) return prompt;
8422
+ return `${prompt}
8423
+
8424
+ DOCLING DOCUMENT PAGES ${startPage}-${endPage}:
8425
+ ${pageText || "(No Docling text was available for this page range.)"}`;
8426
+ }
8090
8427
  let classifyResult;
8091
8428
  if (resumed?.classifyResult && pipelineCtx.isPhaseComplete("classify")) {
8092
8429
  classifyResult = resumed.classifyResult;
@@ -8099,10 +8436,12 @@ function createExtractor(config) {
8099
8436
  const classifyResponse = await safeGenerateObject(
8100
8437
  generateObject,
8101
8438
  {
8102
- prompt: buildClassifyPrompt(),
8439
+ prompt: withFullDocumentTextContext(buildClassifyPrompt()),
8103
8440
  schema: ClassifyResultSchema,
8104
8441
  maxTokens: budget.maxTokens,
8105
- providerOptions: await getFullPdfProviderOptions()
8442
+ taskKind: "extraction_classify",
8443
+ budgetDiagnostics: budget,
8444
+ providerOptions: await getFullDocumentProviderOptions()
8106
8445
  },
8107
8446
  {
8108
8447
  fallback: { documentType: "policy", policyTypes: ["other"], confidence: 0 },
@@ -8147,10 +8486,12 @@ function createExtractor(config) {
8147
8486
  const formInventoryResponse = await safeGenerateObject(
8148
8487
  generateObject,
8149
8488
  {
8150
- prompt: buildFormInventoryPrompt(templateHints),
8489
+ prompt: withFullDocumentTextContext(buildFormInventoryPrompt(templateHints)),
8151
8490
  schema: FormInventorySchema,
8152
8491
  maxTokens: budget.maxTokens,
8153
- providerOptions: await getFullPdfProviderOptions()
8492
+ taskKind: "extraction_form_inventory",
8493
+ budgetDiagnostics: budget,
8494
+ providerOptions: await getFullDocumentProviderOptions()
8154
8495
  },
8155
8496
  {
8156
8497
  fallback: { forms: [] },
@@ -8193,16 +8534,24 @@ function createExtractor(config) {
8193
8534
  const pageMapResults = await Promise.all(
8194
8535
  pageMapChunks.map(
8195
8536
  ({ startPage, endPage }) => pageMapLimit(async () => {
8196
- const pagesPdf = await getPageRangePdf(startPage, endPage);
8537
+ const pagesPdf = doclingDocument ? void 0 : await getPageRangePdf(startPage, endPage);
8538
+ const pagesText = doclingDocument ? await getPageRangeText(startPage, endPage) : "";
8197
8539
  const budget = resolveBudget("extraction_page_map", 2048);
8198
8540
  const startedAt = Date.now();
8199
8541
  const mapResponse = await safeGenerateObject(
8200
8542
  generateObject,
8201
8543
  {
8202
- prompt: buildPageMapPrompt(templateHints, startPage, endPage, formInventoryHint),
8544
+ prompt: withPageRangeTextContext(
8545
+ buildPageMapPrompt(templateHints, startPage, endPage, formInventoryHint),
8546
+ startPage,
8547
+ endPage,
8548
+ pagesText
8549
+ ),
8203
8550
  schema: PageMapChunkSchema,
8204
8551
  maxTokens: budget.maxTokens,
8205
- providerOptions: { ...activeProviderOptions, pdfBase64: pagesPdf }
8552
+ taskKind: "extraction_page_map",
8553
+ budgetDiagnostics: budget,
8554
+ providerOptions: doclingDocument ? { ...activeProviderOptions, doclingText: pagesText, doclingPageRange: { startPage, endPage } } : { ...activeProviderOptions, pdfBase64: pagesPdf }
8206
8555
  },
8207
8556
  {
8208
8557
  fallback: {
@@ -8280,7 +8629,7 @@ function createExtractor(config) {
8280
8629
  }))
8281
8630
  ];
8282
8631
  onProgress?.(`Dispatching ${tasks.length} extractors...`);
8283
- const extractionPdfInput = await getPdfBase64ForExtraction();
8632
+ const extractionPdfInput = doclingDocument ? void 0 : await getPdfBase64ForExtraction();
8284
8633
  const extractorResults = await Promise.all(
8285
8634
  tasks.map(
8286
8635
  (task) => extractorLimit(async () => {
@@ -8291,7 +8640,8 @@ function createExtractor(config) {
8291
8640
  memory,
8292
8641
  completedPageRangePdfCache,
8293
8642
  getPageRangePdf,
8294
- convertPdfToImages ? getPageImages : void 0
8643
+ convertPdfToImages ? getPageImages : void 0,
8644
+ doclingDocument ? getPageRangeText : void 0
8295
8645
  );
8296
8646
  })
8297
8647
  )
@@ -8318,10 +8668,13 @@ function createExtractor(config) {
8318
8668
  generateObject,
8319
8669
  convertPdfToImages,
8320
8670
  maxTokens: budget.maxTokens,
8671
+ taskKind: "extraction_focused",
8672
+ budgetDiagnostics: budget,
8321
8673
  providerOptions: activeProviderOptions,
8322
8674
  pageRangeCache: completedPageRangePdfCache,
8323
8675
  getPageRangePdf,
8324
- getPageImages: convertPdfToImages ? getPageImages : void 0
8676
+ getPageImages: convertPdfToImages ? getPageImages : void 0,
8677
+ getPageRangeText: doclingDocument ? getPageRangeText : void 0
8325
8678
  });
8326
8679
  trackUsage(supplementaryResult.usage, {
8327
8680
  taskKind: "extraction_focused",
@@ -8357,6 +8710,7 @@ function createExtractor(config) {
8357
8710
  concurrency,
8358
8711
  getPageRangePdf,
8359
8712
  getPageImages: convertPdfToImages ? getPageImages : void 0,
8713
+ getPageRangeText: doclingDocument ? getPageRangeText : void 0,
8360
8714
  providerOptions: activeProviderOptions,
8361
8715
  modelCapabilities,
8362
8716
  modelBudgetConstraints,
@@ -8405,13 +8759,22 @@ function createExtractor(config) {
8405
8759
  const reviewResponse = await safeGenerateObject(
8406
8760
  generateObject,
8407
8761
  {
8408
- prompt: buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary, extractorCatalog),
8762
+ prompt: withFullDocumentTextContext(buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary, extractorCatalog)),
8409
8763
  schema: ReviewResultSchema,
8410
8764
  maxTokens: budget.maxTokens,
8411
- providerOptions: await getFullPdfProviderOptions()
8765
+ taskKind: "extraction_review",
8766
+ budgetDiagnostics: budget,
8767
+ providerOptions: await getFullDocumentProviderOptions()
8412
8768
  },
8413
8769
  {
8414
- fallback: { complete: true, missingFields: [], qualityIssues: [], additionalTasks: [] },
8770
+ fallback: {
8771
+ complete: false,
8772
+ missingFields: ["llm_review_unavailable"],
8773
+ qualityIssues: [
8774
+ "LLM extraction review failed; deterministic review was used and the result needs review."
8775
+ ],
8776
+ additionalTasks: []
8777
+ },
8415
8778
  log,
8416
8779
  onError: (err, attempt) => log?.(`Review round ${round + 1} attempt ${attempt + 1} failed: ${err}`)
8417
8780
  }
@@ -8431,7 +8794,7 @@ function createExtractor(config) {
8431
8794
  break;
8432
8795
  }
8433
8796
  onProgress?.(`Review round ${round + 1}: dispatching ${reviewResponse.object.additionalTasks.length} follow-up extractors...`);
8434
- const extractionPdfInput = await getPdfBase64ForExtraction();
8797
+ const extractionPdfInput = doclingDocument ? void 0 : await getPdfBase64ForExtraction();
8435
8798
  const followUpResults = await Promise.all(
8436
8799
  reviewResponse.object.additionalTasks.map(
8437
8800
  (task) => extractorLimit(async () => {
@@ -8441,7 +8804,8 @@ function createExtractor(config) {
8441
8804
  memory,
8442
8805
  completedPageRangePdfCache,
8443
8806
  getPageRangePdf,
8444
- convertPdfToImages ? getPageImages : void 0
8807
+ convertPdfToImages ? getPageImages : void 0,
8808
+ doclingDocument ? getPageRangeText : void 0
8445
8809
  );
8446
8810
  })
8447
8811
  )
@@ -8512,6 +8876,8 @@ function createExtractor(config) {
8512
8876
  prompt: buildSummaryPrompt(document),
8513
8877
  schema: SummaryResultSchema,
8514
8878
  maxTokens: budget.maxTokens,
8879
+ taskKind: "extraction_summary",
8880
+ budgetDiagnostics: budget,
8515
8881
  providerOptions: activeProviderOptions
8516
8882
  },
8517
8883
  {
@@ -8539,6 +8905,8 @@ function createExtractor(config) {
8539
8905
  const formatResult = await formatDocumentContent(document, generateText, {
8540
8906
  providerOptions: activeProviderOptions,
8541
8907
  maxTokens: formatBudget.maxTokens,
8908
+ taskKind: "extraction_format",
8909
+ budgetDiagnostics: formatBudget,
8542
8910
  concurrency: formatConcurrency ?? concurrency,
8543
8911
  onProgress,
8544
8912
  log
@@ -8941,6 +9309,7 @@ async function classifyApplication(pdfContent, generateObject, providerOptions,
8941
9309
  Analyze the attached insurance document. If text source units are provided in provider options, use them as supporting context. Do not infer from base64 text.`,
8942
9310
  schema: ApplicationClassifyResultSchema,
8943
9311
  maxTokens,
9312
+ taskKind: "application_classify",
8944
9313
  providerOptions: {
8945
9314
  ...providerOptions,
8946
9315
  pdfBase64: providerOptions?.pdfBase64 ?? pdfContent
@@ -9043,6 +9412,7 @@ Extract fields from the attached application PDF. Use provider-supplied source u
9043
9412
  prompt,
9044
9413
  schema: FieldExtractionResultSchema,
9045
9414
  maxTokens,
9415
+ taskKind: "application_extract_fields",
9046
9416
  providerOptions: {
9047
9417
  ...providerOptions,
9048
9418
  pdfBase64: providerOptions?.pdfBase64 ?? pdfContent
@@ -9096,6 +9466,7 @@ async function autoFillFromContext(fields, orgContext, generateObject, providerO
9096
9466
  prompt,
9097
9467
  schema: AutoFillResultSchema,
9098
9468
  maxTokens,
9469
+ taskKind: "application_auto_fill",
9099
9470
  providerOptions
9100
9471
  })
9101
9472
  );
@@ -9166,6 +9537,7 @@ async function batchQuestions(unfilledFields, generateObject, providerOptions, m
9166
9537
  prompt,
9167
9538
  schema: QuestionBatchResultSchema,
9168
9539
  maxTokens,
9540
+ taskKind: "application_batch",
9169
9541
  providerOptions
9170
9542
  })
9171
9543
  );
@@ -9217,6 +9589,7 @@ async function classifyReplyIntent(fields, replyText, generateObject, providerOp
9217
9589
  prompt,
9218
9590
  schema: ReplyIntentSchema,
9219
9591
  maxTokens,
9592
+ taskKind: "application_classify",
9220
9593
  providerOptions
9221
9594
  })
9222
9595
  );
@@ -9276,6 +9649,7 @@ async function parseAnswers(fields, replyText, generateObject, providerOptions,
9276
9649
  prompt,
9277
9650
  schema: AnswerParsingResultSchema,
9278
9651
  maxTokens,
9652
+ taskKind: "application_parse_answers",
9279
9653
  providerOptions
9280
9654
  })
9281
9655
  );
@@ -9405,6 +9779,7 @@ async function fillFromLookup(requests, targetFields, availableData, generateObj
9405
9779
  prompt,
9406
9780
  schema: LookupFillResultSchema,
9407
9781
  maxTokens,
9782
+ taskKind: "application_lookup",
9408
9783
  providerOptions
9409
9784
  })
9410
9785
  );
@@ -9487,6 +9862,7 @@ async function generateBatchEmail(batchFields, batchIndex, totalBatches, opts, g
9487
9862
  () => generateText({
9488
9863
  prompt,
9489
9864
  maxTokens,
9865
+ taskKind: "application_email",
9490
9866
  providerOptions
9491
9867
  })
9492
9868
  );
@@ -10009,11 +10385,14 @@ function createApplicationPipeline(config) {
10009
10385
  }
10010
10386
  if (replyPlan.answerQuestion && intent.questionText) {
10011
10387
  try {
10388
+ const budget = resolveBudget("application_email", 512);
10012
10389
  const { text, usage } = await generateText({
10013
10390
  prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
10014
10391
 
10015
10392
  Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with the answer when you're ready and I'll fill it in."`,
10016
- maxTokens: resolveBudget("application_email", 512).maxTokens,
10393
+ maxTokens: budget.maxTokens,
10394
+ taskKind: "application_email",
10395
+ budgetDiagnostics: budget,
10017
10396
  providerOptions
10018
10397
  });
10019
10398
  trackUsage(usage);
@@ -10138,6 +10517,7 @@ ${emailText}`;
10138
10517
  if (!state) throw new Error(`Application ${applicationId} not found`);
10139
10518
  const filledFields = state.fields.filter((f) => f.value);
10140
10519
  const fieldSummary = filledFields.map((f) => `${f.section} > ${f.label}: ${f.value} (source: ${f.source ?? "unknown"})`).join("\n");
10520
+ const budget = resolveBudget("application_email", 4096);
10141
10521
  const { text, usage } = await generateText({
10142
10522
  prompt: `Format these filled insurance application fields as a clean confirmation summary for the user to review. Group by section, show each field as "Label: Value". End with a note asking them to confirm or request changes.
10143
10523
 
@@ -10145,7 +10525,9 @@ Application: ${state.title ?? "Insurance Application"}
10145
10525
 
10146
10526
  Fields:
10147
10527
  ${fieldSummary}`,
10148
- maxTokens: resolveBudget("application_email", 4096).maxTokens,
10528
+ maxTokens: budget.maxTokens,
10529
+ taskKind: "application_email",
10530
+ budgetDiagnostics: budget,
10149
10531
  providerOptions
10150
10532
  });
10151
10533
  trackUsage(usage);
@@ -10619,6 +11001,8 @@ ${e.text}`;
10619
11001
  prompt,
10620
11002
  schema: SubAnswerSchema,
10621
11003
  maxTokens: budget.maxTokens,
11004
+ taskKind: "query_reason",
11005
+ budgetDiagnostics: budget,
10622
11006
  providerOptions
10623
11007
  })
10624
11008
  );
@@ -10842,6 +11226,8 @@ async function verify(originalQuestion, subAnswers, allEvidence, config) {
10842
11226
  prompt,
10843
11227
  schema: VerifyResultSchema,
10844
11228
  maxTokens: budget.maxTokens,
11229
+ taskKind: "query_verify",
11230
+ budgetDiagnostics: budget,
10845
11231
  providerOptions
10846
11232
  })
10847
11233
  );
@@ -10984,6 +11370,8 @@ async function interpretAttachments(params) {
10984
11370
  prompt,
10985
11371
  schema: AttachmentInterpretationSchema,
10986
11372
  maxTokens: budget.maxTokens,
11373
+ taskKind: "query_attachment",
11374
+ budgetDiagnostics: budget,
10987
11375
  providerOptions: buildAttachmentProviderOptions(attachment, providerOptions)
10988
11376
  },
10989
11377
  {
@@ -11321,6 +11709,8 @@ function createQueryAgent(config) {
11321
11709
  prompt,
11322
11710
  schema: QueryClassifyResultSchema,
11323
11711
  maxTokens: budget.maxTokens,
11712
+ taskKind: "query_classify",
11713
+ budgetDiagnostics: budget,
11324
11714
  providerOptions
11325
11715
  },
11326
11716
  {
@@ -11372,6 +11762,8 @@ function createQueryAgent(config) {
11372
11762
  prompt,
11373
11763
  schema: QueryResultSchema,
11374
11764
  maxTokens: budget.maxTokens,
11765
+ taskKind: "query_respond",
11766
+ budgetDiagnostics: budget,
11375
11767
  providerOptions
11376
11768
  },
11377
11769
  {
@@ -11467,6 +11859,8 @@ function createPceAgent(config = {}) {
11467
11859
  prompt: buildPceNormalizePrompt({ requestText: input.requestText, evidenceSources }),
11468
11860
  schema: PceNormalizationResultSchema,
11469
11861
  maxTokens: budget.maxTokens,
11862
+ taskKind: "pce_impact_analysis",
11863
+ budgetDiagnostics: budget,
11470
11864
  providerOptions: config.providerOptions
11471
11865
  },
11472
11866
  { fallback, maxRetries: 1, log: config.log }
@@ -11528,6 +11922,8 @@ function createPceAgent(config = {}) {
11528
11922
  }),
11529
11923
  schema: ReplyAnswersSchema,
11530
11924
  maxTokens: budget.maxTokens,
11925
+ taskKind: "pce_reply_parse",
11926
+ budgetDiagnostics: budget,
11531
11927
  providerOptions: config.providerOptions
11532
11928
  },
11533
11929
  { fallback: { answers }, maxRetries: 1, log: config.log }
@@ -12408,6 +12804,7 @@ export {
12408
12804
  buildConfirmationSummaryPrompt,
12409
12805
  buildConversationMemoryGuidance,
12410
12806
  buildCoverageGapPrompt,
12807
+ buildDoclingProviderOptions,
12411
12808
  buildFieldExplanationPrompt,
12412
12809
  buildFieldExtractionPrompt,
12413
12810
  buildFlatPdfMappingPrompt,
@@ -12449,12 +12846,16 @@ export {
12449
12846
  fillAcroForm,
12450
12847
  generateNextMessage,
12451
12848
  getAcroFormFields,
12849
+ getDoclingPageRangeText,
12452
12850
  getExtractor,
12453
12851
  getFileIdentifier,
12454
12852
  getPdfPageCount,
12455
12853
  getTemplate,
12854
+ isDoclingExtractionInput,
12456
12855
  isFileReference,
12457
12856
  mergeQuestionAnswers,
12857
+ mergeSourceSpans,
12858
+ normalizeDoclingDocument,
12458
12859
  normalizeForMatch,
12459
12860
  orderSourceEvidence,
12460
12861
  overlayTextOnPdf,