@claritylabs/cl-sdk 1.0.3 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -256,6 +256,7 @@ __export(index_exports, {
256
256
  buildConfirmationSummaryPrompt: () => buildConfirmationSummaryPrompt,
257
257
  buildConversationMemoryGuidance: () => buildConversationMemoryGuidance,
258
258
  buildCoverageGapPrompt: () => buildCoverageGapPrompt,
259
+ buildDoclingProviderOptions: () => buildDoclingProviderOptions,
259
260
  buildFieldExplanationPrompt: () => buildFieldExplanationPrompt,
260
261
  buildFieldExtractionPrompt: () => buildFieldExtractionPrompt,
261
262
  buildFlatPdfMappingPrompt: () => buildFlatPdfMappingPrompt,
@@ -297,12 +298,16 @@ __export(index_exports, {
297
298
  fillAcroForm: () => fillAcroForm,
298
299
  generateNextMessage: () => generateNextMessage,
299
300
  getAcroFormFields: () => getAcroFormFields,
301
+ getDoclingPageRangeText: () => getDoclingPageRangeText,
300
302
  getExtractor: () => getExtractor,
301
303
  getFileIdentifier: () => getFileIdentifier,
302
304
  getPdfPageCount: () => getPdfPageCount,
303
305
  getTemplate: () => getTemplate,
306
+ isDoclingExtractionInput: () => isDoclingExtractionInput,
304
307
  isFileReference: () => isFileReference,
305
308
  mergeQuestionAnswers: () => mergeQuestionAnswers,
309
+ mergeSourceSpans: () => mergeSourceSpans,
310
+ normalizeDoclingDocument: () => normalizeDoclingDocument,
306
311
  normalizeForMatch: () => normalizeForMatch,
307
312
  orderSourceEvidence: () => orderSourceEvidence,
308
313
  overlayTextOnPdf: () => overlayTextOnPdf,
@@ -2794,6 +2799,254 @@ async function overlayTextOnPdf(pdfBytes, overlays) {
2794
2799
  return await pdfDoc.save();
2795
2800
  }
2796
2801
 
2802
+ // src/extraction/docling.ts
2803
+ function isDoclingExtractionInput(input) {
2804
+ return Boolean(
2805
+ input && typeof input === "object" && input.kind === "docling_document" && input.document && typeof input.document === "object"
2806
+ );
2807
+ }
2808
+ function normalizeDoclingDocument(document, options) {
2809
+ const itemMap = buildItemMap(document);
2810
+ const orderedRefs = getOrderedBodyRefs(document, itemMap);
2811
+ const orderedItems = orderedRefs.length > 0 ? orderedRefs.map((ref) => itemMap.get(ref)).filter((item) => Boolean(item)) : getFallbackOrderedItems(document, itemMap);
2812
+ const units = orderedItems.map(({ ref, item }) => normalizeItem(ref, item)).filter((unit) => Boolean(unit && unit.text.trim()));
2813
+ const pageCount = inferPageCount(document, units);
2814
+ const pageTexts = /* @__PURE__ */ new Map();
2815
+ for (const unit of units) {
2816
+ const page = clampPage(unit.pageStart ?? 1, pageCount);
2817
+ pageTexts.set(page, appendText(pageTexts.get(page), unit.text));
2818
+ }
2819
+ const fullText = Array.from({ length: pageCount }, (_, index) => {
2820
+ const pageNumber = index + 1;
2821
+ const text = pageTexts.get(pageNumber)?.trim();
2822
+ return text ? `Page ${pageNumber}
2823
+ ${text}` : "";
2824
+ }).filter(Boolean).join("\n\n");
2825
+ const sourceKind = options.sourceKind ?? "policy_pdf";
2826
+ const sourceSpans = units.map((unit, index) => {
2827
+ const span = buildSourceSpan(
2828
+ {
2829
+ documentId: options.documentId,
2830
+ sourceKind,
2831
+ text: unit.text,
2832
+ pageStart: unit.pageStart,
2833
+ pageEnd: unit.pageEnd,
2834
+ sectionId: unit.label,
2835
+ metadata: {
2836
+ sourceSystem: "docling",
2837
+ sourceUnit: "docling_item",
2838
+ doclingRef: unit.ref,
2839
+ ...unit.label ? { doclingLabel: unit.label } : {}
2840
+ }
2841
+ },
2842
+ index
2843
+ );
2844
+ return {
2845
+ ...span,
2846
+ kind: "plain_text",
2847
+ bbox: unit.bboxes?.length ? unit.bboxes : void 0
2848
+ };
2849
+ });
2850
+ return {
2851
+ pageCount,
2852
+ fullText,
2853
+ pageTexts,
2854
+ units,
2855
+ sourceSpans
2856
+ };
2857
+ }
2858
+ function getDoclingPageRangeText(normalized, startPage, endPage) {
2859
+ const start = clampPage(startPage, normalized.pageCount);
2860
+ const end = clampPage(endPage, normalized.pageCount);
2861
+ const lines = [];
2862
+ for (let page = start; page <= end; page++) {
2863
+ const text = normalized.pageTexts.get(page)?.trim();
2864
+ if (text) {
2865
+ lines.push(`Page ${page}
2866
+ ${text}`);
2867
+ }
2868
+ }
2869
+ return lines.join("\n\n");
2870
+ }
2871
+ function buildDoclingProviderOptions(normalized, existingOptions) {
2872
+ return {
2873
+ ...existingOptions,
2874
+ doclingText: normalized.fullText,
2875
+ doclingPageCount: normalized.pageCount
2876
+ };
2877
+ }
2878
+ function mergeSourceSpans(spans) {
2879
+ const seen = /* @__PURE__ */ new Set();
2880
+ const merged = [];
2881
+ for (const span of spans) {
2882
+ const key = [
2883
+ span.documentId,
2884
+ span.pageStart ?? span.location?.startPage ?? span.location?.page ?? "na",
2885
+ span.pageEnd ?? span.location?.endPage ?? span.pageStart ?? "na",
2886
+ span.sectionId ?? span.location?.fieldPath ?? "na",
2887
+ span.textHash ?? sourceSpanTextHash(span.text)
2888
+ ].join(":");
2889
+ if (seen.has(key)) continue;
2890
+ seen.add(key);
2891
+ merged.push(span);
2892
+ }
2893
+ return merged;
2894
+ }
2895
+ function buildItemMap(document) {
2896
+ const map = /* @__PURE__ */ new Map();
2897
+ addItems(map, "#/texts", document.texts ?? []);
2898
+ addItems(map, "#/tables", document.tables ?? []);
2899
+ addItems(map, "#/key_value_items", document.key_value_items ?? document.keyValueItems ?? []);
2900
+ addItems(map, "#/pictures", document.pictures ?? []);
2901
+ return map;
2902
+ }
2903
+ function addItems(map, baseRef, items) {
2904
+ items.forEach((item, index) => {
2905
+ const ref = getSelfRef(item) ?? `${baseRef}/${index}`;
2906
+ map.set(ref, { ref, item });
2907
+ });
2908
+ }
2909
+ function getFallbackOrderedItems(document, itemMap) {
2910
+ const refs = [
2911
+ ...(document.texts ?? []).map((item, index) => getSelfRef(item) ?? `#/texts/${index}`),
2912
+ ...(document.tables ?? []).map((item, index) => getSelfRef(item) ?? `#/tables/${index}`),
2913
+ ...(document.key_value_items ?? document.keyValueItems ?? []).map((item, index) => getSelfRef(item) ?? `#/key_value_items/${index}`)
2914
+ ];
2915
+ return refs.map((ref) => itemMap.get(ref)).filter((item) => Boolean(item));
2916
+ }
2917
+ function getOrderedBodyRefs(document, itemMap) {
2918
+ const groupMap = /* @__PURE__ */ new Map();
2919
+ (document.groups ?? []).forEach((group, index) => {
2920
+ groupMap.set(getSelfRef(group) ?? `#/groups/${index}`, group);
2921
+ });
2922
+ const refs = [];
2923
+ const visited = /* @__PURE__ */ new Set();
2924
+ const visitRef = (ref) => {
2925
+ const itemEntry = itemMap.get(ref);
2926
+ if (itemEntry) {
2927
+ if (!visited.has(ref)) {
2928
+ visited.add(ref);
2929
+ refs.push(ref);
2930
+ }
2931
+ visitNode(itemEntry.item);
2932
+ return;
2933
+ }
2934
+ visitNode(groupMap.get(ref));
2935
+ };
2936
+ const visitNode = (node) => {
2937
+ for (const child of node?.children ?? []) {
2938
+ const ref = getRef(child);
2939
+ if (!ref) continue;
2940
+ visitRef(ref);
2941
+ }
2942
+ };
2943
+ visitNode(document.body);
2944
+ return refs;
2945
+ }
2946
+ function normalizeItem(ref, item) {
2947
+ const text = getItemText(item).trim();
2948
+ if (!text) return void 0;
2949
+ const pages = (item.prov ?? []).map((prov) => getPageNumber(prov)).filter((page) => typeof page === "number" && page > 0);
2950
+ const pageStart = pages.length ? Math.min(...pages) : void 0;
2951
+ const pageEnd = pages.length ? Math.max(...pages) : pageStart;
2952
+ const bboxes = (item.prov ?? []).map((prov) => toSourceSpanBBox(prov)).filter((bbox) => Boolean(bbox));
2953
+ return {
2954
+ ref,
2955
+ label: typeof item.label === "string" ? item.label : void 0,
2956
+ text,
2957
+ pageStart,
2958
+ pageEnd,
2959
+ bboxes: bboxes.length ? bboxes : void 0
2960
+ };
2961
+ }
2962
+ function getItemText(item) {
2963
+ if (typeof item.text === "string" && item.text.trim()) return item.text;
2964
+ if (typeof item.orig === "string" && item.orig.trim()) return item.orig;
2965
+ const table = tableToMarkdown(item.data);
2966
+ if (table) return table;
2967
+ return "";
2968
+ }
2969
+ function tableToMarkdown(data) {
2970
+ const record = asRecord(data);
2971
+ const cells = Array.isArray(record?.table_cells) ? record.table_cells : Array.isArray(record?.tableCells) ? record.tableCells : void 0;
2972
+ if (!cells) return void 0;
2973
+ const parsedCells = cells.map((cell) => asRecord(cell)).filter((cell) => Boolean(cell)).map((cell) => ({
2974
+ row: firstNumber2([cell.start_row_offset, cell.row_header, cell.row, cell.rowIndex]) ?? 0,
2975
+ col: firstNumber2([cell.start_col_offset, cell.col, cell.colIndex]) ?? 0,
2976
+ text: firstString([cell.text, cell.orig, cell.content])
2977
+ })).filter((cell) => cell.text);
2978
+ if (parsedCells.length === 0) return void 0;
2979
+ const maxRow = Math.max(...parsedCells.map((cell) => cell.row));
2980
+ const maxCol = Math.max(...parsedCells.map((cell) => cell.col));
2981
+ const rows = Array.from({ length: maxRow + 1 }, () => Array.from({ length: maxCol + 1 }, () => ""));
2982
+ for (const cell of parsedCells) {
2983
+ rows[cell.row][cell.col] = cell.text;
2984
+ }
2985
+ if (rows.length === 1) return rows[0].filter(Boolean).join(" | ");
2986
+ const header = rows[0];
2987
+ const separator = header.map(() => "---");
2988
+ return [header, separator, ...rows.slice(1)].map((row) => `| ${row.map((value) => value.trim()).join(" | ")} |`).join("\n");
2989
+ }
2990
+ function inferPageCount(document, units) {
2991
+ const pages = document.pages;
2992
+ if (Array.isArray(pages)) return Math.max(1, pages.length);
2993
+ if (pages && typeof pages === "object") {
2994
+ const keys = Object.keys(pages);
2995
+ const numericMax = Math.max(0, ...keys.map((key) => Number(key)).filter((value) => Number.isFinite(value)));
2996
+ return Math.max(1, numericMax || keys.length);
2997
+ }
2998
+ return Math.max(1, ...units.flatMap((unit) => [unit.pageStart ?? 0, unit.pageEnd ?? 0]));
2999
+ }
3000
+ function getSelfRef(value) {
3001
+ return value.self_ref ?? value.selfRef;
3002
+ }
3003
+ function getRef(value) {
3004
+ if (typeof value === "string") return value;
3005
+ return value.$ref ?? value.ref;
3006
+ }
3007
+ function getPageNumber(prov) {
3008
+ return prov.page_no ?? prov.pageNo ?? prov.page;
3009
+ }
3010
+ function toSourceSpanBBox(prov) {
3011
+ const page = getPageNumber(prov);
3012
+ const bbox = asRecord(prov.bbox);
3013
+ if (!page || !bbox) return void 0;
3014
+ const x = firstNumber2([bbox.x, bbox.l, bbox.left]);
3015
+ const y = firstNumber2([bbox.y, bbox.t, bbox.top]);
3016
+ const width = firstNumber2([bbox.width]);
3017
+ const height = firstNumber2([bbox.height]);
3018
+ const right = firstNumber2([bbox.r, bbox.right]);
3019
+ const bottom = firstNumber2([bbox.b, bbox.bottom]);
3020
+ if (x == null || y == null) return void 0;
3021
+ const resolvedWidth = width ?? (right != null ? right - x : void 0);
3022
+ const resolvedHeight = height ?? (bottom != null ? bottom - y : void 0);
3023
+ if (resolvedWidth == null || resolvedHeight == null) return void 0;
3024
+ return { page, x, y, width: resolvedWidth, height: resolvedHeight };
3025
+ }
3026
+ function clampPage(page, pageCount) {
3027
+ return Math.max(1, Math.min(pageCount, page));
3028
+ }
3029
+ function appendText(existing, next) {
3030
+ return existing ? `${existing}
3031
+
3032
+ ${next}` : next;
3033
+ }
3034
+ function asRecord(value) {
3035
+ return value && typeof value === "object" && !Array.isArray(value) ? value : void 0;
3036
+ }
3037
+ function firstString(values) {
3038
+ for (const value of values) {
3039
+ if (typeof value === "string" && value.trim()) return value.trim();
3040
+ }
3041
+ return "";
3042
+ }
3043
+ function firstNumber2(values) {
3044
+ for (const value of values) {
3045
+ if (typeof value === "number" && Number.isFinite(value)) return value;
3046
+ }
3047
+ return void 0;
3048
+ }
3049
+
2797
3050
  // src/extraction/extractor.ts
2798
3051
  function sourceSpansForPageRange(providerOptions, startPage, endPage) {
2799
3052
  const sourceSpans = providerOptions?.sourceSpans;
@@ -2835,20 +3088,38 @@ async function runExtractor(params) {
2835
3088
  generateObject,
2836
3089
  convertPdfToImages,
2837
3090
  maxTokens = 4096,
3091
+ taskKind,
3092
+ budgetDiagnostics,
2838
3093
  providerOptions,
2839
3094
  pageRangeCache
2840
3095
  } = params;
2841
3096
  const extractorProviderOptions = { ...providerOptions };
2842
3097
  let fullPrompt;
2843
- const needsPdfBase64 = convertPdfToImages && !params.getPageImages || !convertPdfToImages && !params.getPageRangePdf;
2844
- const pdfBase64 = needsPdfBase64 ? await pdfInputToBase64(pdfInput) : void 0;
2845
- if (convertPdfToImages) {
3098
+ if (params.getPageRangeText) {
3099
+ const pageText = await params.getPageRangeText(startPage, endPage);
3100
+ extractorProviderOptions.doclingText = pageText;
3101
+ extractorProviderOptions.doclingPageRange = { startPage, endPage };
3102
+ fullPrompt = `${prompt}
3103
+
3104
+ [Document pages ${startPage}-${endPage} are provided below as Docling-extracted text.]
3105
+
3106
+ ${pageText || "(No Docling text was available for this page range.)"}`;
3107
+ } else if (convertPdfToImages) {
3108
+ if (!pdfInput) {
3109
+ throw new Error("pdfInput is required when extracting page images.");
3110
+ }
3111
+ const needsPdfBase64 = !params.getPageImages;
3112
+ const pdfBase64 = needsPdfBase64 ? await pdfInputToBase64(pdfInput) : void 0;
2846
3113
  const images = params.getPageImages ? await params.getPageImages(startPage, endPage) : await convertPdfToImages(pdfBase64, startPage, endPage);
2847
3114
  extractorProviderOptions.images = images;
2848
3115
  fullPrompt = `${prompt}
2849
3116
 
2850
3117
  [Document pages ${startPage}-${endPage} are provided as images.]`;
2851
3118
  } else {
3119
+ if (!pdfInput) {
3120
+ throw new Error("pdfInput is required when extracting page PDFs.");
3121
+ }
3122
+ const pdfBase64 = params.getPageRangePdf ? void 0 : await pdfInputToBase64(pdfInput);
2852
3123
  const cacheKey = `${startPage}-${endPage}`;
2853
3124
  const cachedPagesPdf = pageRangeCache?.get(cacheKey);
2854
3125
  const pagesPdf = cachedPagesPdf ?? (params.getPageRangePdf ? await params.getPageRangePdf(startPage, endPage) : await extractPageRange(pdfBase64, startPage, endPage));
@@ -2868,6 +3139,8 @@ async function runExtractor(params) {
2868
3139
  prompt: fullPrompt,
2869
3140
  schema: strictSchema,
2870
3141
  maxTokens,
3142
+ taskKind,
3143
+ budgetDiagnostics,
2871
3144
  providerOptions: extractorProviderOptions
2872
3145
  })
2873
3146
  );
@@ -3847,6 +4120,8 @@ async function formatDocumentContent(doc, generateText, options) {
3847
4120
  () => generateText({
3848
4121
  prompt,
3849
4122
  maxTokens: options?.maxTokens ?? 16384,
4123
+ taskKind: options?.taskKind,
4124
+ budgetDiagnostics: options?.budgetDiagnostics,
3850
4125
  providerOptions: options?.providerOptions
3851
4126
  })
3852
4127
  );
@@ -3884,7 +4159,7 @@ function formatAddress(addr) {
3884
4159
  function asRecordArray(value) {
3885
4160
  return Array.isArray(value) ? value.filter((item) => Boolean(item) && typeof item === "object" && !Array.isArray(item)) : [];
3886
4161
  }
3887
- function firstString(item, keys) {
4162
+ function firstString2(item, keys) {
3888
4163
  for (const key of keys) {
3889
4164
  const value = item[key];
3890
4165
  if (typeof value === "string" && value.trim()) return value;
@@ -4241,32 +4516,32 @@ ${exc.content}`.trim(), {
4241
4516
  );
4242
4517
  });
4243
4518
  asRecordArray(extendedDoc.definitions).forEach((definition, i) => {
4244
- const term = firstString(definition, ["term", "name", "title"]) ?? `Definition ${i + 1}`;
4245
- const body = firstString(definition, ["definition", "content", "text", "meaning"]);
4519
+ const term = firstString2(definition, ["term", "name", "title"]) ?? `Definition ${i + 1}`;
4520
+ const body = firstString2(definition, ["definition", "content", "text", "meaning"]);
4246
4521
  pushChunk(
4247
4522
  `definition:${i}`,
4248
4523
  "definition",
4249
4524
  lines([
4250
4525
  `Definition: ${term}`,
4251
4526
  body,
4252
- firstString(definition, ["originalContent", "source"]) ? `Source: ${firstString(definition, ["originalContent", "source"])}` : null
4527
+ firstString2(definition, ["originalContent", "source"]) ? `Source: ${firstString2(definition, ["originalContent", "source"])}` : null
4253
4528
  ]),
4254
4529
  {
4255
4530
  term,
4256
- formNumber: firstString(definition, ["formNumber"]),
4257
- formTitle: firstString(definition, ["formTitle"]),
4531
+ formNumber: firstString2(definition, ["formNumber"]),
4532
+ formTitle: firstString2(definition, ["formTitle"]),
4258
4533
  pageNumber: typeof definition.pageNumber === "number" ? definition.pageNumber : void 0,
4259
- sectionRef: firstString(definition, ["sectionRef", "sectionTitle"]),
4534
+ sectionRef: firstString2(definition, ["sectionRef", "sectionTitle"]),
4260
4535
  documentType: doc.type
4261
4536
  }
4262
4537
  );
4263
4538
  });
4264
4539
  const coveredReasons = asRecordArray(extendedDoc.coveredReasons ?? extendedDoc.covered_reasons);
4265
4540
  coveredReasons.forEach((coveredReason, i) => {
4266
- const title = firstString(coveredReason, ["title", "name", "reason", "peril", "cause"]) ?? `Covered Reason ${i + 1}`;
4267
- const coverageName = firstString(coveredReason, ["coverageName", "coverage", "coveragePart"]);
4268
- const reasonNumber = firstString(coveredReason, ["reasonNumber", "number"]);
4269
- const body = firstString(coveredReason, ["content", "description", "text", "coverageGrant"]);
4541
+ const title = firstString2(coveredReason, ["title", "name", "reason", "peril", "cause"]) ?? `Covered Reason ${i + 1}`;
4542
+ const coverageName = firstString2(coveredReason, ["coverageName", "coverage", "coveragePart"]);
4543
+ const reasonNumber = firstString2(coveredReason, ["reasonNumber", "number"]);
4544
+ const body = firstString2(coveredReason, ["content", "description", "text", "coverageGrant"]);
4270
4545
  pushChunk(
4271
4546
  `covered_reason:${i}`,
4272
4547
  "covered_reason",
@@ -4275,16 +4550,16 @@ ${exc.content}`.trim(), {
4275
4550
  reasonNumber ? `Reason Number: ${reasonNumber}` : null,
4276
4551
  `Covered Reason: ${title}`,
4277
4552
  body,
4278
- firstString(coveredReason, ["originalContent", "source"]) ? `Source: ${firstString(coveredReason, ["originalContent", "source"])}` : null
4553
+ firstString2(coveredReason, ["originalContent", "source"]) ? `Source: ${firstString2(coveredReason, ["originalContent", "source"])}` : null
4279
4554
  ]),
4280
4555
  {
4281
4556
  coverageName,
4282
4557
  reasonNumber,
4283
4558
  title,
4284
- formNumber: firstString(coveredReason, ["formNumber"]),
4285
- formTitle: firstString(coveredReason, ["formTitle"]),
4559
+ formNumber: firstString2(coveredReason, ["formNumber"]),
4560
+ formTitle: firstString2(coveredReason, ["formTitle"]),
4286
4561
  pageNumber: typeof coveredReason.pageNumber === "number" ? coveredReason.pageNumber : void 0,
4287
- sectionRef: firstString(coveredReason, ["sectionRef", "sectionTitle"]),
4562
+ sectionRef: firstString2(coveredReason, ["sectionRef", "sectionTitle"]),
4288
4563
  documentType: doc.type
4289
4564
  }
4290
4565
  );
@@ -4304,10 +4579,10 @@ ${exc.content}`.trim(), {
4304
4579
  reasonNumber,
4305
4580
  title,
4306
4581
  conditionIndex,
4307
- formNumber: firstString(coveredReason, ["formNumber"]),
4308
- formTitle: firstString(coveredReason, ["formTitle"]),
4582
+ formNumber: firstString2(coveredReason, ["formNumber"]),
4583
+ formTitle: firstString2(coveredReason, ["formTitle"]),
4309
4584
  pageNumber: typeof coveredReason.pageNumber === "number" ? coveredReason.pageNumber : void 0,
4310
- sectionRef: firstString(coveredReason, ["sectionRef", "sectionTitle"]),
4585
+ sectionRef: firstString2(coveredReason, ["sectionRef", "sectionTitle"]),
4311
4586
  documentType: doc.type
4312
4587
  }
4313
4588
  );
@@ -6778,21 +7053,21 @@ Return JSON only.`;
6778
7053
  }
6779
7054
 
6780
7055
  // src/prompts/extractors/index.ts
6781
- function asRecord(data) {
7056
+ function asRecord2(data) {
6782
7057
  return data && typeof data === "object" ? data : void 0;
6783
7058
  }
6784
7059
  function getSections2(data) {
6785
- const sections = asRecord(data)?.sections;
7060
+ const sections = asRecord2(data)?.sections;
6786
7061
  return Array.isArray(sections) ? sections : [];
6787
7062
  }
6788
7063
  function isCoveredReasonsEmpty(data) {
6789
- const record = asRecord(data);
7064
+ const record = asRecord2(data);
6790
7065
  if (!record) return true;
6791
7066
  const coveredReasons = Array.isArray(record.coveredReasons) ? record.coveredReasons : Array.isArray(record.covered_reasons) ? record.covered_reasons : [];
6792
7067
  return coveredReasons.length === 0;
6793
7068
  }
6794
7069
  function isDefinitionsEmpty(data) {
6795
- const definitions = asRecord(data)?.definitions;
7070
+ const definitions = asRecord2(data)?.definitions;
6796
7071
  return !Array.isArray(definitions) || definitions.length === 0;
6797
7072
  }
6798
7073
  function sectionLooksLikeCoveredReason(section) {
@@ -7026,6 +7301,14 @@ function decideReferentialResolutionAction(params) {
7026
7301
  }
7027
7302
 
7028
7303
  // src/extraction/resolve-referential.ts
7304
+ function formatDoclingTextContext(providerOptions) {
7305
+ const doclingText = providerOptions?.doclingText;
7306
+ if (typeof doclingText !== "string" || !doclingText.trim()) return "";
7307
+ return `
7308
+
7309
+ DOCLING DOCUMENT TEXT:
7310
+ ${doclingText}`;
7311
+ }
7029
7312
  function parseReferenceTarget(text) {
7030
7313
  if (typeof text !== "string") return void 0;
7031
7314
  const normalized = text.trim();
@@ -7107,10 +7390,12 @@ Return the page range (1-indexed) where this section is located. If the section
7107
7390
 
7108
7391
  If you cannot find the section, return startPage: 0 and endPage: 0.
7109
7392
 
7110
- Return JSON only.`,
7393
+ Return JSON only.${formatDoclingTextContext(providerOptions)}`,
7111
7394
  schema: PageLocationSchema,
7112
7395
  maxTokens: budget.maxTokens,
7113
- providerOptions: await buildPdfProviderOptions(pdfInput, providerOptions)
7396
+ taskKind: "extraction_referential_lookup",
7397
+ budgetDiagnostics: budget,
7398
+ providerOptions: pdfInput ? await buildPdfProviderOptions(pdfInput, providerOptions) : providerOptions
7114
7399
  },
7115
7400
  {
7116
7401
  fallback: { startPage: 0, endPage: 0 },
@@ -7144,6 +7429,7 @@ async function resolveReferentialCoverages(params) {
7144
7429
  convertPdfToImages,
7145
7430
  getPageRangePdf,
7146
7431
  getPageImages,
7432
+ getPageRangeText,
7147
7433
  concurrency = 2,
7148
7434
  providerOptions,
7149
7435
  modelCapabilities,
@@ -7255,7 +7541,10 @@ async function resolveReferentialCoverages(params) {
7255
7541
  convertPdfToImages,
7256
7542
  getPageRangePdf,
7257
7543
  getPageImages,
7544
+ getPageRangeText,
7258
7545
  maxTokens: budget.maxTokens,
7546
+ taskKind: "extraction_referential_lookup",
7547
+ budgetDiagnostics: budget,
7259
7548
  providerOptions
7260
7549
  });
7261
7550
  trackUsage(result.usage);
@@ -7348,6 +7637,7 @@ async function runFocusedExtractorWithFallback(params) {
7348
7637
  pageRangeCache,
7349
7638
  getPageRangePdf,
7350
7639
  getPageImages,
7640
+ getPageRangeText,
7351
7641
  trackUsage,
7352
7642
  resolveBudget,
7353
7643
  log
@@ -7372,10 +7662,13 @@ async function runFocusedExtractorWithFallback(params) {
7372
7662
  generateObject,
7373
7663
  convertPdfToImages,
7374
7664
  maxTokens: budget.maxTokens,
7665
+ taskKind,
7666
+ budgetDiagnostics: budget,
7375
7667
  providerOptions,
7376
7668
  pageRangeCache,
7377
7669
  getPageRangePdf,
7378
- getPageImages
7670
+ getPageImages,
7671
+ getPageRangeText
7379
7672
  });
7380
7673
  trackUsage(result.usage, {
7381
7674
  taskKind,
@@ -7415,10 +7708,13 @@ async function runFocusedExtractorWithFallback(params) {
7415
7708
  generateObject,
7416
7709
  convertPdfToImages,
7417
7710
  maxTokens: budget.maxTokens,
7711
+ taskKind,
7712
+ budgetDiagnostics: budget,
7418
7713
  providerOptions,
7419
7714
  pageRangeCache,
7420
7715
  getPageRangePdf,
7421
- getPageImages
7716
+ getPageImages,
7717
+ getPageRangeText
7422
7718
  });
7423
7719
  trackUsage(fallbackResult.usage, {
7424
7720
  taskKind,
@@ -8262,7 +8558,7 @@ function createExtractor(config) {
8262
8558
  }
8263
8559
  return lines.length > 0 ? lines.join("\n") : "";
8264
8560
  }
8265
- async function runFocusedExtractorTask(task, pdfInput, memory, pageRangeCache, getPageRangePdf, getPageImages) {
8561
+ async function runFocusedExtractorTask(task, pdfInput, memory, pageRangeCache, getPageRangePdf, getPageImages, getPageRangeText) {
8266
8562
  if (task.extractorName === "supplementary") {
8267
8563
  const alreadyExtractedSummary = buildAlreadyExtractedSummary(memory);
8268
8564
  const budget = resolveBudget("extraction_focused", 4096);
@@ -8277,10 +8573,13 @@ function createExtractor(config) {
8277
8573
  generateObject,
8278
8574
  convertPdfToImages,
8279
8575
  maxTokens: budget.maxTokens,
8576
+ taskKind: "extraction_focused",
8577
+ budgetDiagnostics: budget,
8280
8578
  providerOptions: activeProviderOptions,
8281
8579
  pageRangeCache,
8282
8580
  getPageRangePdf,
8283
- getPageImages
8581
+ getPageImages,
8582
+ getPageRangeText
8284
8583
  });
8285
8584
  trackUsage(result.usage, {
8286
8585
  taskKind: "extraction_focused",
@@ -8299,6 +8598,7 @@ function createExtractor(config) {
8299
8598
  pageRangeCache,
8300
8599
  getPageRangePdf,
8301
8600
  getPageImages,
8601
+ getPageRangeText,
8302
8602
  trackUsage,
8303
8603
  resolveBudget,
8304
8604
  log
@@ -8314,8 +8614,14 @@ function createExtractor(config) {
8314
8614
  if (extractorPages.size === 0) return "No page assignments available.";
8315
8615
  return [...extractorPages.entries()].map(([extractorName, pages]) => `${extractorName}: ${pages.length} page(s), pages ${pages.join(", ")}`).join("\n");
8316
8616
  }
8317
- async function extract(pdfInput, documentId, options) {
8617
+ async function extract(input, documentId, options) {
8318
8618
  const id = documentId ?? `doc-${Date.now()}`;
8619
+ const isDoclingInput = isDoclingExtractionInput(input);
8620
+ const pdfInput = isDoclingInput ? void 0 : input;
8621
+ const doclingDocument = isDoclingInput ? normalizeDoclingDocument(input.document, {
8622
+ documentId: id,
8623
+ sourceKind: input.sourceKind
8624
+ }) : void 0;
8319
8625
  const memory = /* @__PURE__ */ new Map();
8320
8626
  totalUsage = { inputTokens: 0, outputTokens: 0 };
8321
8627
  modelCalls = 0;
@@ -8325,7 +8631,10 @@ function createExtractor(config) {
8325
8631
  modelCalls: [],
8326
8632
  totalModelCallDurationMs: 0
8327
8633
  };
8328
- const sourceSpans = options?.sourceSpans ?? [];
8634
+ const sourceSpans = mergeSourceSpans([
8635
+ ...doclingDocument?.sourceSpans ?? [],
8636
+ ...options?.sourceSpans ?? []
8637
+ ]);
8329
8638
  const sourceChunks = sourceSpans.length ? chunkSourceSpans(sourceSpans) : [];
8330
8639
  activeProviderOptions = sourceSpans.length ? { ...providerOptions, sourceSpans, sourceChunks } : providerOptions;
8331
8640
  if (sourceStore && sourceSpans.length > 0) {
@@ -8354,24 +8663,40 @@ function createExtractor(config) {
8354
8663
  let fullPdfProviderOptionsPromise;
8355
8664
  let pageCountPromise;
8356
8665
  async function getPdfBase64ForExtraction() {
8666
+ if (!pdfInput) {
8667
+ throw new Error("PDF input is not available for Docling extraction.");
8668
+ }
8357
8669
  if (pdfBase64Cache === void 0) {
8358
8670
  pdfBase64Cache = await pdfInputToBase64(pdfInput);
8359
8671
  }
8360
8672
  return pdfBase64Cache;
8361
8673
  }
8362
8674
  async function getCachedPageCount() {
8675
+ if (doclingDocument) return doclingDocument.pageCount;
8676
+ if (!pdfInput) {
8677
+ throw new Error("PDF input is required to read page count.");
8678
+ }
8363
8679
  if (!pageCountPromise) {
8364
8680
  pageCountPromise = getPdfSlicer().then((slicer) => slicer.getPageCount()).catch(() => getPdfPageCount(pdfInput));
8365
8681
  }
8366
8682
  return pageCountPromise;
8367
8683
  }
8368
- async function getFullPdfProviderOptions() {
8684
+ async function getFullDocumentProviderOptions() {
8685
+ if (doclingDocument) {
8686
+ return buildDoclingProviderOptions(doclingDocument, activeProviderOptions);
8687
+ }
8688
+ if (!pdfInput) {
8689
+ return activeProviderOptions ?? {};
8690
+ }
8369
8691
  if (!fullPdfProviderOptionsPromise) {
8370
8692
  fullPdfProviderOptionsPromise = buildPdfProviderOptions(pdfInput, activeProviderOptions);
8371
8693
  }
8372
8694
  return fullPdfProviderOptionsPromise;
8373
8695
  }
8374
8696
  async function getPdfSlicer() {
8697
+ if (!pdfInput) {
8698
+ throw new Error("PDF input is not available for Docling extraction.");
8699
+ }
8375
8700
  if (!pdfSlicerPromise) {
8376
8701
  pdfSlicerPromise = createPdfPageSlicer(pdfInput);
8377
8702
  }
@@ -8410,6 +8735,23 @@ function createExtractor(config) {
8410
8735
  pageRangeImageCache.set(cacheKey, promise);
8411
8736
  return promise;
8412
8737
  }
8738
+ async function getPageRangeText(startPage, endPage) {
8739
+ return doclingDocument ? getDoclingPageRangeText(doclingDocument, startPage, endPage) : "";
8740
+ }
8741
+ function withFullDocumentTextContext(prompt) {
8742
+ if (!doclingDocument) return prompt;
8743
+ return `${prompt}
8744
+
8745
+ DOCLING DOCUMENT TEXT:
8746
+ ${doclingDocument.fullText}`;
8747
+ }
8748
+ function withPageRangeTextContext(prompt, startPage, endPage, pageText) {
8749
+ if (!doclingDocument) return prompt;
8750
+ return `${prompt}
8751
+
8752
+ DOCLING DOCUMENT PAGES ${startPage}-${endPage}:
8753
+ ${pageText || "(No Docling text was available for this page range.)"}`;
8754
+ }
8413
8755
  let classifyResult;
8414
8756
  if (resumed?.classifyResult && pipelineCtx.isPhaseComplete("classify")) {
8415
8757
  classifyResult = resumed.classifyResult;
@@ -8422,10 +8764,12 @@ function createExtractor(config) {
8422
8764
  const classifyResponse = await safeGenerateObject(
8423
8765
  generateObject,
8424
8766
  {
8425
- prompt: buildClassifyPrompt(),
8767
+ prompt: withFullDocumentTextContext(buildClassifyPrompt()),
8426
8768
  schema: ClassifyResultSchema,
8427
8769
  maxTokens: budget.maxTokens,
8428
- providerOptions: await getFullPdfProviderOptions()
8770
+ taskKind: "extraction_classify",
8771
+ budgetDiagnostics: budget,
8772
+ providerOptions: await getFullDocumentProviderOptions()
8429
8773
  },
8430
8774
  {
8431
8775
  fallback: { documentType: "policy", policyTypes: ["other"], confidence: 0 },
@@ -8470,10 +8814,12 @@ function createExtractor(config) {
8470
8814
  const formInventoryResponse = await safeGenerateObject(
8471
8815
  generateObject,
8472
8816
  {
8473
- prompt: buildFormInventoryPrompt(templateHints),
8817
+ prompt: withFullDocumentTextContext(buildFormInventoryPrompt(templateHints)),
8474
8818
  schema: FormInventorySchema,
8475
8819
  maxTokens: budget.maxTokens,
8476
- providerOptions: await getFullPdfProviderOptions()
8820
+ taskKind: "extraction_form_inventory",
8821
+ budgetDiagnostics: budget,
8822
+ providerOptions: await getFullDocumentProviderOptions()
8477
8823
  },
8478
8824
  {
8479
8825
  fallback: { forms: [] },
@@ -8516,16 +8862,24 @@ function createExtractor(config) {
8516
8862
  const pageMapResults = await Promise.all(
8517
8863
  pageMapChunks.map(
8518
8864
  ({ startPage, endPage }) => pageMapLimit(async () => {
8519
- const pagesPdf = await getPageRangePdf(startPage, endPage);
8865
+ const pagesPdf = doclingDocument ? void 0 : await getPageRangePdf(startPage, endPage);
8866
+ const pagesText = doclingDocument ? await getPageRangeText(startPage, endPage) : "";
8520
8867
  const budget = resolveBudget("extraction_page_map", 2048);
8521
8868
  const startedAt = Date.now();
8522
8869
  const mapResponse = await safeGenerateObject(
8523
8870
  generateObject,
8524
8871
  {
8525
- prompt: buildPageMapPrompt(templateHints, startPage, endPage, formInventoryHint),
8872
+ prompt: withPageRangeTextContext(
8873
+ buildPageMapPrompt(templateHints, startPage, endPage, formInventoryHint),
8874
+ startPage,
8875
+ endPage,
8876
+ pagesText
8877
+ ),
8526
8878
  schema: PageMapChunkSchema,
8527
8879
  maxTokens: budget.maxTokens,
8528
- providerOptions: { ...activeProviderOptions, pdfBase64: pagesPdf }
8880
+ taskKind: "extraction_page_map",
8881
+ budgetDiagnostics: budget,
8882
+ providerOptions: doclingDocument ? { ...activeProviderOptions, doclingText: pagesText, doclingPageRange: { startPage, endPage } } : { ...activeProviderOptions, pdfBase64: pagesPdf }
8529
8883
  },
8530
8884
  {
8531
8885
  fallback: {
@@ -8603,7 +8957,7 @@ function createExtractor(config) {
8603
8957
  }))
8604
8958
  ];
8605
8959
  onProgress?.(`Dispatching ${tasks.length} extractors...`);
8606
- const extractionPdfInput = await getPdfBase64ForExtraction();
8960
+ const extractionPdfInput = doclingDocument ? void 0 : await getPdfBase64ForExtraction();
8607
8961
  const extractorResults = await Promise.all(
8608
8962
  tasks.map(
8609
8963
  (task) => extractorLimit(async () => {
@@ -8614,7 +8968,8 @@ function createExtractor(config) {
8614
8968
  memory,
8615
8969
  completedPageRangePdfCache,
8616
8970
  getPageRangePdf,
8617
- convertPdfToImages ? getPageImages : void 0
8971
+ convertPdfToImages ? getPageImages : void 0,
8972
+ doclingDocument ? getPageRangeText : void 0
8618
8973
  );
8619
8974
  })
8620
8975
  )
@@ -8641,10 +8996,13 @@ function createExtractor(config) {
8641
8996
  generateObject,
8642
8997
  convertPdfToImages,
8643
8998
  maxTokens: budget.maxTokens,
8999
+ taskKind: "extraction_focused",
9000
+ budgetDiagnostics: budget,
8644
9001
  providerOptions: activeProviderOptions,
8645
9002
  pageRangeCache: completedPageRangePdfCache,
8646
9003
  getPageRangePdf,
8647
- getPageImages: convertPdfToImages ? getPageImages : void 0
9004
+ getPageImages: convertPdfToImages ? getPageImages : void 0,
9005
+ getPageRangeText: doclingDocument ? getPageRangeText : void 0
8648
9006
  });
8649
9007
  trackUsage(supplementaryResult.usage, {
8650
9008
  taskKind: "extraction_focused",
@@ -8680,6 +9038,7 @@ function createExtractor(config) {
8680
9038
  concurrency,
8681
9039
  getPageRangePdf,
8682
9040
  getPageImages: convertPdfToImages ? getPageImages : void 0,
9041
+ getPageRangeText: doclingDocument ? getPageRangeText : void 0,
8683
9042
  providerOptions: activeProviderOptions,
8684
9043
  modelCapabilities,
8685
9044
  modelBudgetConstraints,
@@ -8728,13 +9087,22 @@ function createExtractor(config) {
8728
9087
  const reviewResponse = await safeGenerateObject(
8729
9088
  generateObject,
8730
9089
  {
8731
- prompt: buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary, extractorCatalog),
9090
+ prompt: withFullDocumentTextContext(buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary, extractorCatalog)),
8732
9091
  schema: ReviewResultSchema,
8733
9092
  maxTokens: budget.maxTokens,
8734
- providerOptions: await getFullPdfProviderOptions()
9093
+ taskKind: "extraction_review",
9094
+ budgetDiagnostics: budget,
9095
+ providerOptions: await getFullDocumentProviderOptions()
8735
9096
  },
8736
9097
  {
8737
- fallback: { complete: true, missingFields: [], qualityIssues: [], additionalTasks: [] },
9098
+ fallback: {
9099
+ complete: false,
9100
+ missingFields: ["llm_review_unavailable"],
9101
+ qualityIssues: [
9102
+ "LLM extraction review failed; deterministic review was used and the result needs review."
9103
+ ],
9104
+ additionalTasks: []
9105
+ },
8738
9106
  log,
8739
9107
  onError: (err, attempt) => log?.(`Review round ${round + 1} attempt ${attempt + 1} failed: ${err}`)
8740
9108
  }
@@ -8754,7 +9122,7 @@ function createExtractor(config) {
8754
9122
  break;
8755
9123
  }
8756
9124
  onProgress?.(`Review round ${round + 1}: dispatching ${reviewResponse.object.additionalTasks.length} follow-up extractors...`);
8757
- const extractionPdfInput = await getPdfBase64ForExtraction();
9125
+ const extractionPdfInput = doclingDocument ? void 0 : await getPdfBase64ForExtraction();
8758
9126
  const followUpResults = await Promise.all(
8759
9127
  reviewResponse.object.additionalTasks.map(
8760
9128
  (task) => extractorLimit(async () => {
@@ -8764,7 +9132,8 @@ function createExtractor(config) {
8764
9132
  memory,
8765
9133
  completedPageRangePdfCache,
8766
9134
  getPageRangePdf,
8767
- convertPdfToImages ? getPageImages : void 0
9135
+ convertPdfToImages ? getPageImages : void 0,
9136
+ doclingDocument ? getPageRangeText : void 0
8768
9137
  );
8769
9138
  })
8770
9139
  )
@@ -8835,6 +9204,8 @@ function createExtractor(config) {
8835
9204
  prompt: buildSummaryPrompt(document),
8836
9205
  schema: SummaryResultSchema,
8837
9206
  maxTokens: budget.maxTokens,
9207
+ taskKind: "extraction_summary",
9208
+ budgetDiagnostics: budget,
8838
9209
  providerOptions: activeProviderOptions
8839
9210
  },
8840
9211
  {
@@ -8862,6 +9233,8 @@ function createExtractor(config) {
8862
9233
  const formatResult = await formatDocumentContent(document, generateText, {
8863
9234
  providerOptions: activeProviderOptions,
8864
9235
  maxTokens: formatBudget.maxTokens,
9236
+ taskKind: "extraction_format",
9237
+ budgetDiagnostics: formatBudget,
8865
9238
  concurrency: formatConcurrency ?? concurrency,
8866
9239
  onProgress,
8867
9240
  log
@@ -9264,6 +9637,7 @@ async function classifyApplication(pdfContent, generateObject, providerOptions,
9264
9637
  Analyze the attached insurance document. If text source units are provided in provider options, use them as supporting context. Do not infer from base64 text.`,
9265
9638
  schema: ApplicationClassifyResultSchema,
9266
9639
  maxTokens,
9640
+ taskKind: "application_classify",
9267
9641
  providerOptions: {
9268
9642
  ...providerOptions,
9269
9643
  pdfBase64: providerOptions?.pdfBase64 ?? pdfContent
@@ -9366,6 +9740,7 @@ Extract fields from the attached application PDF. Use provider-supplied source u
9366
9740
  prompt,
9367
9741
  schema: FieldExtractionResultSchema,
9368
9742
  maxTokens,
9743
+ taskKind: "application_extract_fields",
9369
9744
  providerOptions: {
9370
9745
  ...providerOptions,
9371
9746
  pdfBase64: providerOptions?.pdfBase64 ?? pdfContent
@@ -9419,6 +9794,7 @@ async function autoFillFromContext(fields, orgContext, generateObject, providerO
9419
9794
  prompt,
9420
9795
  schema: AutoFillResultSchema,
9421
9796
  maxTokens,
9797
+ taskKind: "application_auto_fill",
9422
9798
  providerOptions
9423
9799
  })
9424
9800
  );
@@ -9489,6 +9865,7 @@ async function batchQuestions(unfilledFields, generateObject, providerOptions, m
9489
9865
  prompt,
9490
9866
  schema: QuestionBatchResultSchema,
9491
9867
  maxTokens,
9868
+ taskKind: "application_batch",
9492
9869
  providerOptions
9493
9870
  })
9494
9871
  );
@@ -9540,6 +9917,7 @@ async function classifyReplyIntent(fields, replyText, generateObject, providerOp
9540
9917
  prompt,
9541
9918
  schema: ReplyIntentSchema,
9542
9919
  maxTokens,
9920
+ taskKind: "application_classify",
9543
9921
  providerOptions
9544
9922
  })
9545
9923
  );
@@ -9599,6 +9977,7 @@ async function parseAnswers(fields, replyText, generateObject, providerOptions,
9599
9977
  prompt,
9600
9978
  schema: AnswerParsingResultSchema,
9601
9979
  maxTokens,
9980
+ taskKind: "application_parse_answers",
9602
9981
  providerOptions
9603
9982
  })
9604
9983
  );
@@ -9728,6 +10107,7 @@ async function fillFromLookup(requests, targetFields, availableData, generateObj
9728
10107
  prompt,
9729
10108
  schema: LookupFillResultSchema,
9730
10109
  maxTokens,
10110
+ taskKind: "application_lookup",
9731
10111
  providerOptions
9732
10112
  })
9733
10113
  );
@@ -9810,6 +10190,7 @@ async function generateBatchEmail(batchFields, batchIndex, totalBatches, opts, g
9810
10190
  () => generateText({
9811
10191
  prompt,
9812
10192
  maxTokens,
10193
+ taskKind: "application_email",
9813
10194
  providerOptions
9814
10195
  })
9815
10196
  );
@@ -10332,11 +10713,14 @@ function createApplicationPipeline(config) {
10332
10713
  }
10333
10714
  if (replyPlan.answerQuestion && intent.questionText) {
10334
10715
  try {
10716
+ const budget = resolveBudget("application_email", 512);
10335
10717
  const { text, usage } = await generateText({
10336
10718
  prompt: `The user is filling out an insurance application and asked: "${intent.questionText}"
10337
10719
 
10338
10720
  Provide a brief, helpful explanation (2-3 sentences). End with "Just reply with the answer when you're ready and I'll fill it in."`,
10339
- maxTokens: resolveBudget("application_email", 512).maxTokens,
10721
+ maxTokens: budget.maxTokens,
10722
+ taskKind: "application_email",
10723
+ budgetDiagnostics: budget,
10340
10724
  providerOptions
10341
10725
  });
10342
10726
  trackUsage(usage);
@@ -10461,6 +10845,7 @@ ${emailText}`;
10461
10845
  if (!state) throw new Error(`Application ${applicationId} not found`);
10462
10846
  const filledFields = state.fields.filter((f) => f.value);
10463
10847
  const fieldSummary = filledFields.map((f) => `${f.section} > ${f.label}: ${f.value} (source: ${f.source ?? "unknown"})`).join("\n");
10848
+ const budget = resolveBudget("application_email", 4096);
10464
10849
  const { text, usage } = await generateText({
10465
10850
  prompt: `Format these filled insurance application fields as a clean confirmation summary for the user to review. Group by section, show each field as "Label: Value". End with a note asking them to confirm or request changes.
10466
10851
 
@@ -10468,7 +10853,9 @@ Application: ${state.title ?? "Insurance Application"}
10468
10853
 
10469
10854
  Fields:
10470
10855
  ${fieldSummary}`,
10471
- maxTokens: resolveBudget("application_email", 4096).maxTokens,
10856
+ maxTokens: budget.maxTokens,
10857
+ taskKind: "application_email",
10858
+ budgetDiagnostics: budget,
10472
10859
  providerOptions
10473
10860
  });
10474
10861
  trackUsage(usage);
@@ -10942,6 +11329,8 @@ ${e.text}`;
10942
11329
  prompt,
10943
11330
  schema: SubAnswerSchema,
10944
11331
  maxTokens: budget.maxTokens,
11332
+ taskKind: "query_reason",
11333
+ budgetDiagnostics: budget,
10945
11334
  providerOptions
10946
11335
  })
10947
11336
  );
@@ -11165,6 +11554,8 @@ async function verify(originalQuestion, subAnswers, allEvidence, config) {
11165
11554
  prompt,
11166
11555
  schema: VerifyResultSchema,
11167
11556
  maxTokens: budget.maxTokens,
11557
+ taskKind: "query_verify",
11558
+ budgetDiagnostics: budget,
11168
11559
  providerOptions
11169
11560
  })
11170
11561
  );
@@ -11307,6 +11698,8 @@ async function interpretAttachments(params) {
11307
11698
  prompt,
11308
11699
  schema: AttachmentInterpretationSchema,
11309
11700
  maxTokens: budget.maxTokens,
11701
+ taskKind: "query_attachment",
11702
+ budgetDiagnostics: budget,
11310
11703
  providerOptions: buildAttachmentProviderOptions(attachment, providerOptions)
11311
11704
  },
11312
11705
  {
@@ -11644,6 +12037,8 @@ function createQueryAgent(config) {
11644
12037
  prompt,
11645
12038
  schema: QueryClassifyResultSchema,
11646
12039
  maxTokens: budget.maxTokens,
12040
+ taskKind: "query_classify",
12041
+ budgetDiagnostics: budget,
11647
12042
  providerOptions
11648
12043
  },
11649
12044
  {
@@ -11695,6 +12090,8 @@ function createQueryAgent(config) {
11695
12090
  prompt,
11696
12091
  schema: QueryResultSchema,
11697
12092
  maxTokens: budget.maxTokens,
12093
+ taskKind: "query_respond",
12094
+ budgetDiagnostics: budget,
11698
12095
  providerOptions
11699
12096
  },
11700
12097
  {
@@ -11790,6 +12187,8 @@ function createPceAgent(config = {}) {
11790
12187
  prompt: buildPceNormalizePrompt({ requestText: input.requestText, evidenceSources }),
11791
12188
  schema: PceNormalizationResultSchema,
11792
12189
  maxTokens: budget.maxTokens,
12190
+ taskKind: "pce_impact_analysis",
12191
+ budgetDiagnostics: budget,
11793
12192
  providerOptions: config.providerOptions
11794
12193
  },
11795
12194
  { fallback, maxRetries: 1, log: config.log }
@@ -11851,6 +12250,8 @@ function createPceAgent(config = {}) {
11851
12250
  }),
11852
12251
  schema: ReplyAnswersSchema,
11853
12252
  maxTokens: budget.maxTokens,
12253
+ taskKind: "pce_reply_parse",
12254
+ budgetDiagnostics: budget,
11854
12255
  providerOptions: config.providerOptions
11855
12256
  },
11856
12257
  { fallback: { answers }, maxRetries: 1, log: config.log }
@@ -12732,6 +13133,7 @@ var AGENT_TOOLS = [
12732
13133
  buildConfirmationSummaryPrompt,
12733
13134
  buildConversationMemoryGuidance,
12734
13135
  buildCoverageGapPrompt,
13136
+ buildDoclingProviderOptions,
12735
13137
  buildFieldExplanationPrompt,
12736
13138
  buildFieldExtractionPrompt,
12737
13139
  buildFlatPdfMappingPrompt,
@@ -12773,12 +13175,16 @@ var AGENT_TOOLS = [
12773
13175
  fillAcroForm,
12774
13176
  generateNextMessage,
12775
13177
  getAcroFormFields,
13178
+ getDoclingPageRangeText,
12776
13179
  getExtractor,
12777
13180
  getFileIdentifier,
12778
13181
  getPdfPageCount,
12779
13182
  getTemplate,
13183
+ isDoclingExtractionInput,
12780
13184
  isFileReference,
12781
13185
  mergeQuestionAnswers,
13186
+ mergeSourceSpans,
13187
+ normalizeDoclingDocument,
12782
13188
  normalizeForMatch,
12783
13189
  orderSourceEvidence,
12784
13190
  overlayTextOnPdf,