@claritylabs/cl-sdk 1.1.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -1
- package/dist/index.d.mts +72 -2
- package/dist/index.d.ts +72 -2
- package/dist/index.js +389 -48
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +384 -48
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -256,6 +256,7 @@ __export(index_exports, {
|
|
|
256
256
|
buildConfirmationSummaryPrompt: () => buildConfirmationSummaryPrompt,
|
|
257
257
|
buildConversationMemoryGuidance: () => buildConversationMemoryGuidance,
|
|
258
258
|
buildCoverageGapPrompt: () => buildCoverageGapPrompt,
|
|
259
|
+
buildDoclingProviderOptions: () => buildDoclingProviderOptions,
|
|
259
260
|
buildFieldExplanationPrompt: () => buildFieldExplanationPrompt,
|
|
260
261
|
buildFieldExtractionPrompt: () => buildFieldExtractionPrompt,
|
|
261
262
|
buildFlatPdfMappingPrompt: () => buildFlatPdfMappingPrompt,
|
|
@@ -297,12 +298,16 @@ __export(index_exports, {
|
|
|
297
298
|
fillAcroForm: () => fillAcroForm,
|
|
298
299
|
generateNextMessage: () => generateNextMessage,
|
|
299
300
|
getAcroFormFields: () => getAcroFormFields,
|
|
301
|
+
getDoclingPageRangeText: () => getDoclingPageRangeText,
|
|
300
302
|
getExtractor: () => getExtractor,
|
|
301
303
|
getFileIdentifier: () => getFileIdentifier,
|
|
302
304
|
getPdfPageCount: () => getPdfPageCount,
|
|
303
305
|
getTemplate: () => getTemplate,
|
|
306
|
+
isDoclingExtractionInput: () => isDoclingExtractionInput,
|
|
304
307
|
isFileReference: () => isFileReference,
|
|
305
308
|
mergeQuestionAnswers: () => mergeQuestionAnswers,
|
|
309
|
+
mergeSourceSpans: () => mergeSourceSpans,
|
|
310
|
+
normalizeDoclingDocument: () => normalizeDoclingDocument,
|
|
306
311
|
normalizeForMatch: () => normalizeForMatch,
|
|
307
312
|
orderSourceEvidence: () => orderSourceEvidence,
|
|
308
313
|
overlayTextOnPdf: () => overlayTextOnPdf,
|
|
@@ -2794,6 +2799,254 @@ async function overlayTextOnPdf(pdfBytes, overlays) {
|
|
|
2794
2799
|
return await pdfDoc.save();
|
|
2795
2800
|
}
|
|
2796
2801
|
|
|
2802
|
+
// src/extraction/docling.ts
|
|
2803
|
+
function isDoclingExtractionInput(input) {
|
|
2804
|
+
return Boolean(
|
|
2805
|
+
input && typeof input === "object" && input.kind === "docling_document" && input.document && typeof input.document === "object"
|
|
2806
|
+
);
|
|
2807
|
+
}
|
|
2808
|
+
function normalizeDoclingDocument(document, options) {
|
|
2809
|
+
const itemMap = buildItemMap(document);
|
|
2810
|
+
const orderedRefs = getOrderedBodyRefs(document, itemMap);
|
|
2811
|
+
const orderedItems = orderedRefs.length > 0 ? orderedRefs.map((ref) => itemMap.get(ref)).filter((item) => Boolean(item)) : getFallbackOrderedItems(document, itemMap);
|
|
2812
|
+
const units = orderedItems.map(({ ref, item }) => normalizeItem(ref, item)).filter((unit) => Boolean(unit && unit.text.trim()));
|
|
2813
|
+
const pageCount = inferPageCount(document, units);
|
|
2814
|
+
const pageTexts = /* @__PURE__ */ new Map();
|
|
2815
|
+
for (const unit of units) {
|
|
2816
|
+
const page = clampPage(unit.pageStart ?? 1, pageCount);
|
|
2817
|
+
pageTexts.set(page, appendText(pageTexts.get(page), unit.text));
|
|
2818
|
+
}
|
|
2819
|
+
const fullText = Array.from({ length: pageCount }, (_, index) => {
|
|
2820
|
+
const pageNumber = index + 1;
|
|
2821
|
+
const text = pageTexts.get(pageNumber)?.trim();
|
|
2822
|
+
return text ? `Page ${pageNumber}
|
|
2823
|
+
${text}` : "";
|
|
2824
|
+
}).filter(Boolean).join("\n\n");
|
|
2825
|
+
const sourceKind = options.sourceKind ?? "policy_pdf";
|
|
2826
|
+
const sourceSpans = units.map((unit, index) => {
|
|
2827
|
+
const span = buildSourceSpan(
|
|
2828
|
+
{
|
|
2829
|
+
documentId: options.documentId,
|
|
2830
|
+
sourceKind,
|
|
2831
|
+
text: unit.text,
|
|
2832
|
+
pageStart: unit.pageStart,
|
|
2833
|
+
pageEnd: unit.pageEnd,
|
|
2834
|
+
sectionId: unit.label,
|
|
2835
|
+
metadata: {
|
|
2836
|
+
sourceSystem: "docling",
|
|
2837
|
+
sourceUnit: "docling_item",
|
|
2838
|
+
doclingRef: unit.ref,
|
|
2839
|
+
...unit.label ? { doclingLabel: unit.label } : {}
|
|
2840
|
+
}
|
|
2841
|
+
},
|
|
2842
|
+
index
|
|
2843
|
+
);
|
|
2844
|
+
return {
|
|
2845
|
+
...span,
|
|
2846
|
+
kind: "plain_text",
|
|
2847
|
+
bbox: unit.bboxes?.length ? unit.bboxes : void 0
|
|
2848
|
+
};
|
|
2849
|
+
});
|
|
2850
|
+
return {
|
|
2851
|
+
pageCount,
|
|
2852
|
+
fullText,
|
|
2853
|
+
pageTexts,
|
|
2854
|
+
units,
|
|
2855
|
+
sourceSpans
|
|
2856
|
+
};
|
|
2857
|
+
}
|
|
2858
|
+
function getDoclingPageRangeText(normalized, startPage, endPage) {
|
|
2859
|
+
const start = clampPage(startPage, normalized.pageCount);
|
|
2860
|
+
const end = clampPage(endPage, normalized.pageCount);
|
|
2861
|
+
const lines = [];
|
|
2862
|
+
for (let page = start; page <= end; page++) {
|
|
2863
|
+
const text = normalized.pageTexts.get(page)?.trim();
|
|
2864
|
+
if (text) {
|
|
2865
|
+
lines.push(`Page ${page}
|
|
2866
|
+
${text}`);
|
|
2867
|
+
}
|
|
2868
|
+
}
|
|
2869
|
+
return lines.join("\n\n");
|
|
2870
|
+
}
|
|
2871
|
+
function buildDoclingProviderOptions(normalized, existingOptions) {
|
|
2872
|
+
return {
|
|
2873
|
+
...existingOptions,
|
|
2874
|
+
doclingText: normalized.fullText,
|
|
2875
|
+
doclingPageCount: normalized.pageCount
|
|
2876
|
+
};
|
|
2877
|
+
}
|
|
2878
|
+
function mergeSourceSpans(spans) {
|
|
2879
|
+
const seen = /* @__PURE__ */ new Set();
|
|
2880
|
+
const merged = [];
|
|
2881
|
+
for (const span of spans) {
|
|
2882
|
+
const key = [
|
|
2883
|
+
span.documentId,
|
|
2884
|
+
span.pageStart ?? span.location?.startPage ?? span.location?.page ?? "na",
|
|
2885
|
+
span.pageEnd ?? span.location?.endPage ?? span.pageStart ?? "na",
|
|
2886
|
+
span.sectionId ?? span.location?.fieldPath ?? "na",
|
|
2887
|
+
span.textHash ?? sourceSpanTextHash(span.text)
|
|
2888
|
+
].join(":");
|
|
2889
|
+
if (seen.has(key)) continue;
|
|
2890
|
+
seen.add(key);
|
|
2891
|
+
merged.push(span);
|
|
2892
|
+
}
|
|
2893
|
+
return merged;
|
|
2894
|
+
}
|
|
2895
|
+
function buildItemMap(document) {
|
|
2896
|
+
const map = /* @__PURE__ */ new Map();
|
|
2897
|
+
addItems(map, "#/texts", document.texts ?? []);
|
|
2898
|
+
addItems(map, "#/tables", document.tables ?? []);
|
|
2899
|
+
addItems(map, "#/key_value_items", document.key_value_items ?? document.keyValueItems ?? []);
|
|
2900
|
+
addItems(map, "#/pictures", document.pictures ?? []);
|
|
2901
|
+
return map;
|
|
2902
|
+
}
|
|
2903
|
+
function addItems(map, baseRef, items) {
|
|
2904
|
+
items.forEach((item, index) => {
|
|
2905
|
+
const ref = getSelfRef(item) ?? `${baseRef}/${index}`;
|
|
2906
|
+
map.set(ref, { ref, item });
|
|
2907
|
+
});
|
|
2908
|
+
}
|
|
2909
|
+
function getFallbackOrderedItems(document, itemMap) {
|
|
2910
|
+
const refs = [
|
|
2911
|
+
...(document.texts ?? []).map((item, index) => getSelfRef(item) ?? `#/texts/${index}`),
|
|
2912
|
+
...(document.tables ?? []).map((item, index) => getSelfRef(item) ?? `#/tables/${index}`),
|
|
2913
|
+
...(document.key_value_items ?? document.keyValueItems ?? []).map((item, index) => getSelfRef(item) ?? `#/key_value_items/${index}`)
|
|
2914
|
+
];
|
|
2915
|
+
return refs.map((ref) => itemMap.get(ref)).filter((item) => Boolean(item));
|
|
2916
|
+
}
|
|
2917
|
+
function getOrderedBodyRefs(document, itemMap) {
|
|
2918
|
+
const groupMap = /* @__PURE__ */ new Map();
|
|
2919
|
+
(document.groups ?? []).forEach((group, index) => {
|
|
2920
|
+
groupMap.set(getSelfRef(group) ?? `#/groups/${index}`, group);
|
|
2921
|
+
});
|
|
2922
|
+
const refs = [];
|
|
2923
|
+
const visited = /* @__PURE__ */ new Set();
|
|
2924
|
+
const visitRef = (ref) => {
|
|
2925
|
+
const itemEntry = itemMap.get(ref);
|
|
2926
|
+
if (itemEntry) {
|
|
2927
|
+
if (!visited.has(ref)) {
|
|
2928
|
+
visited.add(ref);
|
|
2929
|
+
refs.push(ref);
|
|
2930
|
+
}
|
|
2931
|
+
visitNode(itemEntry.item);
|
|
2932
|
+
return;
|
|
2933
|
+
}
|
|
2934
|
+
visitNode(groupMap.get(ref));
|
|
2935
|
+
};
|
|
2936
|
+
const visitNode = (node) => {
|
|
2937
|
+
for (const child of node?.children ?? []) {
|
|
2938
|
+
const ref = getRef(child);
|
|
2939
|
+
if (!ref) continue;
|
|
2940
|
+
visitRef(ref);
|
|
2941
|
+
}
|
|
2942
|
+
};
|
|
2943
|
+
visitNode(document.body);
|
|
2944
|
+
return refs;
|
|
2945
|
+
}
|
|
2946
|
+
function normalizeItem(ref, item) {
|
|
2947
|
+
const text = getItemText(item).trim();
|
|
2948
|
+
if (!text) return void 0;
|
|
2949
|
+
const pages = (item.prov ?? []).map((prov) => getPageNumber(prov)).filter((page) => typeof page === "number" && page > 0);
|
|
2950
|
+
const pageStart = pages.length ? Math.min(...pages) : void 0;
|
|
2951
|
+
const pageEnd = pages.length ? Math.max(...pages) : pageStart;
|
|
2952
|
+
const bboxes = (item.prov ?? []).map((prov) => toSourceSpanBBox(prov)).filter((bbox) => Boolean(bbox));
|
|
2953
|
+
return {
|
|
2954
|
+
ref,
|
|
2955
|
+
label: typeof item.label === "string" ? item.label : void 0,
|
|
2956
|
+
text,
|
|
2957
|
+
pageStart,
|
|
2958
|
+
pageEnd,
|
|
2959
|
+
bboxes: bboxes.length ? bboxes : void 0
|
|
2960
|
+
};
|
|
2961
|
+
}
|
|
2962
|
+
function getItemText(item) {
|
|
2963
|
+
if (typeof item.text === "string" && item.text.trim()) return item.text;
|
|
2964
|
+
if (typeof item.orig === "string" && item.orig.trim()) return item.orig;
|
|
2965
|
+
const table = tableToMarkdown(item.data);
|
|
2966
|
+
if (table) return table;
|
|
2967
|
+
return "";
|
|
2968
|
+
}
|
|
2969
|
+
function tableToMarkdown(data) {
|
|
2970
|
+
const record = asRecord(data);
|
|
2971
|
+
const cells = Array.isArray(record?.table_cells) ? record.table_cells : Array.isArray(record?.tableCells) ? record.tableCells : void 0;
|
|
2972
|
+
if (!cells) return void 0;
|
|
2973
|
+
const parsedCells = cells.map((cell) => asRecord(cell)).filter((cell) => Boolean(cell)).map((cell) => ({
|
|
2974
|
+
row: firstNumber2([cell.start_row_offset, cell.row_header, cell.row, cell.rowIndex]) ?? 0,
|
|
2975
|
+
col: firstNumber2([cell.start_col_offset, cell.col, cell.colIndex]) ?? 0,
|
|
2976
|
+
text: firstString([cell.text, cell.orig, cell.content])
|
|
2977
|
+
})).filter((cell) => cell.text);
|
|
2978
|
+
if (parsedCells.length === 0) return void 0;
|
|
2979
|
+
const maxRow = Math.max(...parsedCells.map((cell) => cell.row));
|
|
2980
|
+
const maxCol = Math.max(...parsedCells.map((cell) => cell.col));
|
|
2981
|
+
const rows = Array.from({ length: maxRow + 1 }, () => Array.from({ length: maxCol + 1 }, () => ""));
|
|
2982
|
+
for (const cell of parsedCells) {
|
|
2983
|
+
rows[cell.row][cell.col] = cell.text;
|
|
2984
|
+
}
|
|
2985
|
+
if (rows.length === 1) return rows[0].filter(Boolean).join(" | ");
|
|
2986
|
+
const header = rows[0];
|
|
2987
|
+
const separator = header.map(() => "---");
|
|
2988
|
+
return [header, separator, ...rows.slice(1)].map((row) => `| ${row.map((value) => value.trim()).join(" | ")} |`).join("\n");
|
|
2989
|
+
}
|
|
2990
|
+
function inferPageCount(document, units) {
|
|
2991
|
+
const pages = document.pages;
|
|
2992
|
+
if (Array.isArray(pages)) return Math.max(1, pages.length);
|
|
2993
|
+
if (pages && typeof pages === "object") {
|
|
2994
|
+
const keys = Object.keys(pages);
|
|
2995
|
+
const numericMax = Math.max(0, ...keys.map((key) => Number(key)).filter((value) => Number.isFinite(value)));
|
|
2996
|
+
return Math.max(1, numericMax || keys.length);
|
|
2997
|
+
}
|
|
2998
|
+
return Math.max(1, ...units.flatMap((unit) => [unit.pageStart ?? 0, unit.pageEnd ?? 0]));
|
|
2999
|
+
}
|
|
3000
|
+
function getSelfRef(value) {
|
|
3001
|
+
return value.self_ref ?? value.selfRef;
|
|
3002
|
+
}
|
|
3003
|
+
function getRef(value) {
|
|
3004
|
+
if (typeof value === "string") return value;
|
|
3005
|
+
return value.$ref ?? value.ref;
|
|
3006
|
+
}
|
|
3007
|
+
function getPageNumber(prov) {
|
|
3008
|
+
return prov.page_no ?? prov.pageNo ?? prov.page;
|
|
3009
|
+
}
|
|
3010
|
+
function toSourceSpanBBox(prov) {
|
|
3011
|
+
const page = getPageNumber(prov);
|
|
3012
|
+
const bbox = asRecord(prov.bbox);
|
|
3013
|
+
if (!page || !bbox) return void 0;
|
|
3014
|
+
const x = firstNumber2([bbox.x, bbox.l, bbox.left]);
|
|
3015
|
+
const y = firstNumber2([bbox.y, bbox.t, bbox.top]);
|
|
3016
|
+
const width = firstNumber2([bbox.width]);
|
|
3017
|
+
const height = firstNumber2([bbox.height]);
|
|
3018
|
+
const right = firstNumber2([bbox.r, bbox.right]);
|
|
3019
|
+
const bottom = firstNumber2([bbox.b, bbox.bottom]);
|
|
3020
|
+
if (x == null || y == null) return void 0;
|
|
3021
|
+
const resolvedWidth = width ?? (right != null ? right - x : void 0);
|
|
3022
|
+
const resolvedHeight = height ?? (bottom != null ? bottom - y : void 0);
|
|
3023
|
+
if (resolvedWidth == null || resolvedHeight == null) return void 0;
|
|
3024
|
+
return { page, x, y, width: resolvedWidth, height: resolvedHeight };
|
|
3025
|
+
}
|
|
3026
|
+
function clampPage(page, pageCount) {
|
|
3027
|
+
return Math.max(1, Math.min(pageCount, page));
|
|
3028
|
+
}
|
|
3029
|
+
function appendText(existing, next) {
|
|
3030
|
+
return existing ? `${existing}
|
|
3031
|
+
|
|
3032
|
+
${next}` : next;
|
|
3033
|
+
}
|
|
3034
|
+
function asRecord(value) {
|
|
3035
|
+
return value && typeof value === "object" && !Array.isArray(value) ? value : void 0;
|
|
3036
|
+
}
|
|
3037
|
+
function firstString(values) {
|
|
3038
|
+
for (const value of values) {
|
|
3039
|
+
if (typeof value === "string" && value.trim()) return value.trim();
|
|
3040
|
+
}
|
|
3041
|
+
return "";
|
|
3042
|
+
}
|
|
3043
|
+
function firstNumber2(values) {
|
|
3044
|
+
for (const value of values) {
|
|
3045
|
+
if (typeof value === "number" && Number.isFinite(value)) return value;
|
|
3046
|
+
}
|
|
3047
|
+
return void 0;
|
|
3048
|
+
}
|
|
3049
|
+
|
|
2797
3050
|
// src/extraction/extractor.ts
|
|
2798
3051
|
function sourceSpansForPageRange(providerOptions, startPage, endPage) {
|
|
2799
3052
|
const sourceSpans = providerOptions?.sourceSpans;
|
|
@@ -2842,15 +3095,31 @@ async function runExtractor(params) {
|
|
|
2842
3095
|
} = params;
|
|
2843
3096
|
const extractorProviderOptions = { ...providerOptions };
|
|
2844
3097
|
let fullPrompt;
|
|
2845
|
-
|
|
2846
|
-
|
|
2847
|
-
|
|
3098
|
+
if (params.getPageRangeText) {
|
|
3099
|
+
const pageText = await params.getPageRangeText(startPage, endPage);
|
|
3100
|
+
extractorProviderOptions.doclingText = pageText;
|
|
3101
|
+
extractorProviderOptions.doclingPageRange = { startPage, endPage };
|
|
3102
|
+
fullPrompt = `${prompt}
|
|
3103
|
+
|
|
3104
|
+
[Document pages ${startPage}-${endPage} are provided below as Docling-extracted text.]
|
|
3105
|
+
|
|
3106
|
+
${pageText || "(No Docling text was available for this page range.)"}`;
|
|
3107
|
+
} else if (convertPdfToImages) {
|
|
3108
|
+
if (!pdfInput) {
|
|
3109
|
+
throw new Error("pdfInput is required when extracting page images.");
|
|
3110
|
+
}
|
|
3111
|
+
const needsPdfBase64 = !params.getPageImages;
|
|
3112
|
+
const pdfBase64 = needsPdfBase64 ? await pdfInputToBase64(pdfInput) : void 0;
|
|
2848
3113
|
const images = params.getPageImages ? await params.getPageImages(startPage, endPage) : await convertPdfToImages(pdfBase64, startPage, endPage);
|
|
2849
3114
|
extractorProviderOptions.images = images;
|
|
2850
3115
|
fullPrompt = `${prompt}
|
|
2851
3116
|
|
|
2852
3117
|
[Document pages ${startPage}-${endPage} are provided as images.]`;
|
|
2853
3118
|
} else {
|
|
3119
|
+
if (!pdfInput) {
|
|
3120
|
+
throw new Error("pdfInput is required when extracting page PDFs.");
|
|
3121
|
+
}
|
|
3122
|
+
const pdfBase64 = params.getPageRangePdf ? void 0 : await pdfInputToBase64(pdfInput);
|
|
2854
3123
|
const cacheKey = `${startPage}-${endPage}`;
|
|
2855
3124
|
const cachedPagesPdf = pageRangeCache?.get(cacheKey);
|
|
2856
3125
|
const pagesPdf = cachedPagesPdf ?? (params.getPageRangePdf ? await params.getPageRangePdf(startPage, endPage) : await extractPageRange(pdfBase64, startPage, endPage));
|
|
@@ -3890,7 +4159,7 @@ function formatAddress(addr) {
|
|
|
3890
4159
|
function asRecordArray(value) {
|
|
3891
4160
|
return Array.isArray(value) ? value.filter((item) => Boolean(item) && typeof item === "object" && !Array.isArray(item)) : [];
|
|
3892
4161
|
}
|
|
3893
|
-
function
|
|
4162
|
+
function firstString2(item, keys) {
|
|
3894
4163
|
for (const key of keys) {
|
|
3895
4164
|
const value = item[key];
|
|
3896
4165
|
if (typeof value === "string" && value.trim()) return value;
|
|
@@ -4247,32 +4516,32 @@ ${exc.content}`.trim(), {
|
|
|
4247
4516
|
);
|
|
4248
4517
|
});
|
|
4249
4518
|
asRecordArray(extendedDoc.definitions).forEach((definition, i) => {
|
|
4250
|
-
const term =
|
|
4251
|
-
const body =
|
|
4519
|
+
const term = firstString2(definition, ["term", "name", "title"]) ?? `Definition ${i + 1}`;
|
|
4520
|
+
const body = firstString2(definition, ["definition", "content", "text", "meaning"]);
|
|
4252
4521
|
pushChunk(
|
|
4253
4522
|
`definition:${i}`,
|
|
4254
4523
|
"definition",
|
|
4255
4524
|
lines([
|
|
4256
4525
|
`Definition: ${term}`,
|
|
4257
4526
|
body,
|
|
4258
|
-
|
|
4527
|
+
firstString2(definition, ["originalContent", "source"]) ? `Source: ${firstString2(definition, ["originalContent", "source"])}` : null
|
|
4259
4528
|
]),
|
|
4260
4529
|
{
|
|
4261
4530
|
term,
|
|
4262
|
-
formNumber:
|
|
4263
|
-
formTitle:
|
|
4531
|
+
formNumber: firstString2(definition, ["formNumber"]),
|
|
4532
|
+
formTitle: firstString2(definition, ["formTitle"]),
|
|
4264
4533
|
pageNumber: typeof definition.pageNumber === "number" ? definition.pageNumber : void 0,
|
|
4265
|
-
sectionRef:
|
|
4534
|
+
sectionRef: firstString2(definition, ["sectionRef", "sectionTitle"]),
|
|
4266
4535
|
documentType: doc.type
|
|
4267
4536
|
}
|
|
4268
4537
|
);
|
|
4269
4538
|
});
|
|
4270
4539
|
const coveredReasons = asRecordArray(extendedDoc.coveredReasons ?? extendedDoc.covered_reasons);
|
|
4271
4540
|
coveredReasons.forEach((coveredReason, i) => {
|
|
4272
|
-
const title =
|
|
4273
|
-
const coverageName =
|
|
4274
|
-
const reasonNumber =
|
|
4275
|
-
const body =
|
|
4541
|
+
const title = firstString2(coveredReason, ["title", "name", "reason", "peril", "cause"]) ?? `Covered Reason ${i + 1}`;
|
|
4542
|
+
const coverageName = firstString2(coveredReason, ["coverageName", "coverage", "coveragePart"]);
|
|
4543
|
+
const reasonNumber = firstString2(coveredReason, ["reasonNumber", "number"]);
|
|
4544
|
+
const body = firstString2(coveredReason, ["content", "description", "text", "coverageGrant"]);
|
|
4276
4545
|
pushChunk(
|
|
4277
4546
|
`covered_reason:${i}`,
|
|
4278
4547
|
"covered_reason",
|
|
@@ -4281,16 +4550,16 @@ ${exc.content}`.trim(), {
|
|
|
4281
4550
|
reasonNumber ? `Reason Number: ${reasonNumber}` : null,
|
|
4282
4551
|
`Covered Reason: ${title}`,
|
|
4283
4552
|
body,
|
|
4284
|
-
|
|
4553
|
+
firstString2(coveredReason, ["originalContent", "source"]) ? `Source: ${firstString2(coveredReason, ["originalContent", "source"])}` : null
|
|
4285
4554
|
]),
|
|
4286
4555
|
{
|
|
4287
4556
|
coverageName,
|
|
4288
4557
|
reasonNumber,
|
|
4289
4558
|
title,
|
|
4290
|
-
formNumber:
|
|
4291
|
-
formTitle:
|
|
4559
|
+
formNumber: firstString2(coveredReason, ["formNumber"]),
|
|
4560
|
+
formTitle: firstString2(coveredReason, ["formTitle"]),
|
|
4292
4561
|
pageNumber: typeof coveredReason.pageNumber === "number" ? coveredReason.pageNumber : void 0,
|
|
4293
|
-
sectionRef:
|
|
4562
|
+
sectionRef: firstString2(coveredReason, ["sectionRef", "sectionTitle"]),
|
|
4294
4563
|
documentType: doc.type
|
|
4295
4564
|
}
|
|
4296
4565
|
);
|
|
@@ -4310,10 +4579,10 @@ ${exc.content}`.trim(), {
|
|
|
4310
4579
|
reasonNumber,
|
|
4311
4580
|
title,
|
|
4312
4581
|
conditionIndex,
|
|
4313
|
-
formNumber:
|
|
4314
|
-
formTitle:
|
|
4582
|
+
formNumber: firstString2(coveredReason, ["formNumber"]),
|
|
4583
|
+
formTitle: firstString2(coveredReason, ["formTitle"]),
|
|
4315
4584
|
pageNumber: typeof coveredReason.pageNumber === "number" ? coveredReason.pageNumber : void 0,
|
|
4316
|
-
sectionRef:
|
|
4585
|
+
sectionRef: firstString2(coveredReason, ["sectionRef", "sectionTitle"]),
|
|
4317
4586
|
documentType: doc.type
|
|
4318
4587
|
}
|
|
4319
4588
|
);
|
|
@@ -6784,21 +7053,21 @@ Return JSON only.`;
|
|
|
6784
7053
|
}
|
|
6785
7054
|
|
|
6786
7055
|
// src/prompts/extractors/index.ts
|
|
6787
|
-
function
|
|
7056
|
+
function asRecord2(data) {
|
|
6788
7057
|
return data && typeof data === "object" ? data : void 0;
|
|
6789
7058
|
}
|
|
6790
7059
|
function getSections2(data) {
|
|
6791
|
-
const sections =
|
|
7060
|
+
const sections = asRecord2(data)?.sections;
|
|
6792
7061
|
return Array.isArray(sections) ? sections : [];
|
|
6793
7062
|
}
|
|
6794
7063
|
function isCoveredReasonsEmpty(data) {
|
|
6795
|
-
const record =
|
|
7064
|
+
const record = asRecord2(data);
|
|
6796
7065
|
if (!record) return true;
|
|
6797
7066
|
const coveredReasons = Array.isArray(record.coveredReasons) ? record.coveredReasons : Array.isArray(record.covered_reasons) ? record.covered_reasons : [];
|
|
6798
7067
|
return coveredReasons.length === 0;
|
|
6799
7068
|
}
|
|
6800
7069
|
function isDefinitionsEmpty(data) {
|
|
6801
|
-
const definitions =
|
|
7070
|
+
const definitions = asRecord2(data)?.definitions;
|
|
6802
7071
|
return !Array.isArray(definitions) || definitions.length === 0;
|
|
6803
7072
|
}
|
|
6804
7073
|
function sectionLooksLikeCoveredReason(section) {
|
|
@@ -7032,6 +7301,14 @@ function decideReferentialResolutionAction(params) {
|
|
|
7032
7301
|
}
|
|
7033
7302
|
|
|
7034
7303
|
// src/extraction/resolve-referential.ts
|
|
7304
|
+
function formatDoclingTextContext(providerOptions) {
|
|
7305
|
+
const doclingText = providerOptions?.doclingText;
|
|
7306
|
+
if (typeof doclingText !== "string" || !doclingText.trim()) return "";
|
|
7307
|
+
return `
|
|
7308
|
+
|
|
7309
|
+
DOCLING DOCUMENT TEXT:
|
|
7310
|
+
${doclingText}`;
|
|
7311
|
+
}
|
|
7035
7312
|
function parseReferenceTarget(text) {
|
|
7036
7313
|
if (typeof text !== "string") return void 0;
|
|
7037
7314
|
const normalized = text.trim();
|
|
@@ -7113,12 +7390,12 @@ Return the page range (1-indexed) where this section is located. If the section
|
|
|
7113
7390
|
|
|
7114
7391
|
If you cannot find the section, return startPage: 0 and endPage: 0.
|
|
7115
7392
|
|
|
7116
|
-
Return JSON only
|
|
7393
|
+
Return JSON only.${formatDoclingTextContext(providerOptions)}`,
|
|
7117
7394
|
schema: PageLocationSchema,
|
|
7118
7395
|
maxTokens: budget.maxTokens,
|
|
7119
7396
|
taskKind: "extraction_referential_lookup",
|
|
7120
7397
|
budgetDiagnostics: budget,
|
|
7121
|
-
providerOptions: await buildPdfProviderOptions(pdfInput, providerOptions)
|
|
7398
|
+
providerOptions: pdfInput ? await buildPdfProviderOptions(pdfInput, providerOptions) : providerOptions
|
|
7122
7399
|
},
|
|
7123
7400
|
{
|
|
7124
7401
|
fallback: { startPage: 0, endPage: 0 },
|
|
@@ -7152,6 +7429,7 @@ async function resolveReferentialCoverages(params) {
|
|
|
7152
7429
|
convertPdfToImages,
|
|
7153
7430
|
getPageRangePdf,
|
|
7154
7431
|
getPageImages,
|
|
7432
|
+
getPageRangeText,
|
|
7155
7433
|
concurrency = 2,
|
|
7156
7434
|
providerOptions,
|
|
7157
7435
|
modelCapabilities,
|
|
@@ -7263,6 +7541,7 @@ async function resolveReferentialCoverages(params) {
|
|
|
7263
7541
|
convertPdfToImages,
|
|
7264
7542
|
getPageRangePdf,
|
|
7265
7543
|
getPageImages,
|
|
7544
|
+
getPageRangeText,
|
|
7266
7545
|
maxTokens: budget.maxTokens,
|
|
7267
7546
|
taskKind: "extraction_referential_lookup",
|
|
7268
7547
|
budgetDiagnostics: budget,
|
|
@@ -7358,6 +7637,7 @@ async function runFocusedExtractorWithFallback(params) {
|
|
|
7358
7637
|
pageRangeCache,
|
|
7359
7638
|
getPageRangePdf,
|
|
7360
7639
|
getPageImages,
|
|
7640
|
+
getPageRangeText,
|
|
7361
7641
|
trackUsage,
|
|
7362
7642
|
resolveBudget,
|
|
7363
7643
|
log
|
|
@@ -7387,7 +7667,8 @@ async function runFocusedExtractorWithFallback(params) {
|
|
|
7387
7667
|
providerOptions,
|
|
7388
7668
|
pageRangeCache,
|
|
7389
7669
|
getPageRangePdf,
|
|
7390
|
-
getPageImages
|
|
7670
|
+
getPageImages,
|
|
7671
|
+
getPageRangeText
|
|
7391
7672
|
});
|
|
7392
7673
|
trackUsage(result.usage, {
|
|
7393
7674
|
taskKind,
|
|
@@ -7432,7 +7713,8 @@ async function runFocusedExtractorWithFallback(params) {
|
|
|
7432
7713
|
providerOptions,
|
|
7433
7714
|
pageRangeCache,
|
|
7434
7715
|
getPageRangePdf,
|
|
7435
|
-
getPageImages
|
|
7716
|
+
getPageImages,
|
|
7717
|
+
getPageRangeText
|
|
7436
7718
|
});
|
|
7437
7719
|
trackUsage(fallbackResult.usage, {
|
|
7438
7720
|
taskKind,
|
|
@@ -8276,7 +8558,7 @@ function createExtractor(config) {
|
|
|
8276
8558
|
}
|
|
8277
8559
|
return lines.length > 0 ? lines.join("\n") : "";
|
|
8278
8560
|
}
|
|
8279
|
-
async function runFocusedExtractorTask(task, pdfInput, memory, pageRangeCache, getPageRangePdf, getPageImages) {
|
|
8561
|
+
async function runFocusedExtractorTask(task, pdfInput, memory, pageRangeCache, getPageRangePdf, getPageImages, getPageRangeText) {
|
|
8280
8562
|
if (task.extractorName === "supplementary") {
|
|
8281
8563
|
const alreadyExtractedSummary = buildAlreadyExtractedSummary(memory);
|
|
8282
8564
|
const budget = resolveBudget("extraction_focused", 4096);
|
|
@@ -8296,7 +8578,8 @@ function createExtractor(config) {
|
|
|
8296
8578
|
providerOptions: activeProviderOptions,
|
|
8297
8579
|
pageRangeCache,
|
|
8298
8580
|
getPageRangePdf,
|
|
8299
|
-
getPageImages
|
|
8581
|
+
getPageImages,
|
|
8582
|
+
getPageRangeText
|
|
8300
8583
|
});
|
|
8301
8584
|
trackUsage(result.usage, {
|
|
8302
8585
|
taskKind: "extraction_focused",
|
|
@@ -8315,6 +8598,7 @@ function createExtractor(config) {
|
|
|
8315
8598
|
pageRangeCache,
|
|
8316
8599
|
getPageRangePdf,
|
|
8317
8600
|
getPageImages,
|
|
8601
|
+
getPageRangeText,
|
|
8318
8602
|
trackUsage,
|
|
8319
8603
|
resolveBudget,
|
|
8320
8604
|
log
|
|
@@ -8330,8 +8614,14 @@ function createExtractor(config) {
|
|
|
8330
8614
|
if (extractorPages.size === 0) return "No page assignments available.";
|
|
8331
8615
|
return [...extractorPages.entries()].map(([extractorName, pages]) => `${extractorName}: ${pages.length} page(s), pages ${pages.join(", ")}`).join("\n");
|
|
8332
8616
|
}
|
|
8333
|
-
async function extract(
|
|
8617
|
+
async function extract(input, documentId, options) {
|
|
8334
8618
|
const id = documentId ?? `doc-${Date.now()}`;
|
|
8619
|
+
const isDoclingInput = isDoclingExtractionInput(input);
|
|
8620
|
+
const pdfInput = isDoclingInput ? void 0 : input;
|
|
8621
|
+
const doclingDocument = isDoclingInput ? normalizeDoclingDocument(input.document, {
|
|
8622
|
+
documentId: id,
|
|
8623
|
+
sourceKind: input.sourceKind
|
|
8624
|
+
}) : void 0;
|
|
8335
8625
|
const memory = /* @__PURE__ */ new Map();
|
|
8336
8626
|
totalUsage = { inputTokens: 0, outputTokens: 0 };
|
|
8337
8627
|
modelCalls = 0;
|
|
@@ -8341,7 +8631,10 @@ function createExtractor(config) {
|
|
|
8341
8631
|
modelCalls: [],
|
|
8342
8632
|
totalModelCallDurationMs: 0
|
|
8343
8633
|
};
|
|
8344
|
-
const sourceSpans =
|
|
8634
|
+
const sourceSpans = mergeSourceSpans([
|
|
8635
|
+
...doclingDocument?.sourceSpans ?? [],
|
|
8636
|
+
...options?.sourceSpans ?? []
|
|
8637
|
+
]);
|
|
8345
8638
|
const sourceChunks = sourceSpans.length ? chunkSourceSpans(sourceSpans) : [];
|
|
8346
8639
|
activeProviderOptions = sourceSpans.length ? { ...providerOptions, sourceSpans, sourceChunks } : providerOptions;
|
|
8347
8640
|
if (sourceStore && sourceSpans.length > 0) {
|
|
@@ -8370,24 +8663,40 @@ function createExtractor(config) {
|
|
|
8370
8663
|
let fullPdfProviderOptionsPromise;
|
|
8371
8664
|
let pageCountPromise;
|
|
8372
8665
|
async function getPdfBase64ForExtraction() {
|
|
8666
|
+
if (!pdfInput) {
|
|
8667
|
+
throw new Error("PDF input is not available for Docling extraction.");
|
|
8668
|
+
}
|
|
8373
8669
|
if (pdfBase64Cache === void 0) {
|
|
8374
8670
|
pdfBase64Cache = await pdfInputToBase64(pdfInput);
|
|
8375
8671
|
}
|
|
8376
8672
|
return pdfBase64Cache;
|
|
8377
8673
|
}
|
|
8378
8674
|
async function getCachedPageCount() {
|
|
8675
|
+
if (doclingDocument) return doclingDocument.pageCount;
|
|
8676
|
+
if (!pdfInput) {
|
|
8677
|
+
throw new Error("PDF input is required to read page count.");
|
|
8678
|
+
}
|
|
8379
8679
|
if (!pageCountPromise) {
|
|
8380
8680
|
pageCountPromise = getPdfSlicer().then((slicer) => slicer.getPageCount()).catch(() => getPdfPageCount(pdfInput));
|
|
8381
8681
|
}
|
|
8382
8682
|
return pageCountPromise;
|
|
8383
8683
|
}
|
|
8384
|
-
async function
|
|
8684
|
+
async function getFullDocumentProviderOptions() {
|
|
8685
|
+
if (doclingDocument) {
|
|
8686
|
+
return buildDoclingProviderOptions(doclingDocument, activeProviderOptions);
|
|
8687
|
+
}
|
|
8688
|
+
if (!pdfInput) {
|
|
8689
|
+
return activeProviderOptions ?? {};
|
|
8690
|
+
}
|
|
8385
8691
|
if (!fullPdfProviderOptionsPromise) {
|
|
8386
8692
|
fullPdfProviderOptionsPromise = buildPdfProviderOptions(pdfInput, activeProviderOptions);
|
|
8387
8693
|
}
|
|
8388
8694
|
return fullPdfProviderOptionsPromise;
|
|
8389
8695
|
}
|
|
8390
8696
|
async function getPdfSlicer() {
|
|
8697
|
+
if (!pdfInput) {
|
|
8698
|
+
throw new Error("PDF input is not available for Docling extraction.");
|
|
8699
|
+
}
|
|
8391
8700
|
if (!pdfSlicerPromise) {
|
|
8392
8701
|
pdfSlicerPromise = createPdfPageSlicer(pdfInput);
|
|
8393
8702
|
}
|
|
@@ -8426,6 +8735,23 @@ function createExtractor(config) {
|
|
|
8426
8735
|
pageRangeImageCache.set(cacheKey, promise);
|
|
8427
8736
|
return promise;
|
|
8428
8737
|
}
|
|
8738
|
+
async function getPageRangeText(startPage, endPage) {
|
|
8739
|
+
return doclingDocument ? getDoclingPageRangeText(doclingDocument, startPage, endPage) : "";
|
|
8740
|
+
}
|
|
8741
|
+
function withFullDocumentTextContext(prompt) {
|
|
8742
|
+
if (!doclingDocument) return prompt;
|
|
8743
|
+
return `${prompt}
|
|
8744
|
+
|
|
8745
|
+
DOCLING DOCUMENT TEXT:
|
|
8746
|
+
${doclingDocument.fullText}`;
|
|
8747
|
+
}
|
|
8748
|
+
function withPageRangeTextContext(prompt, startPage, endPage, pageText) {
|
|
8749
|
+
if (!doclingDocument) return prompt;
|
|
8750
|
+
return `${prompt}
|
|
8751
|
+
|
|
8752
|
+
DOCLING DOCUMENT PAGES ${startPage}-${endPage}:
|
|
8753
|
+
${pageText || "(No Docling text was available for this page range.)"}`;
|
|
8754
|
+
}
|
|
8429
8755
|
let classifyResult;
|
|
8430
8756
|
if (resumed?.classifyResult && pipelineCtx.isPhaseComplete("classify")) {
|
|
8431
8757
|
classifyResult = resumed.classifyResult;
|
|
@@ -8438,12 +8764,12 @@ function createExtractor(config) {
|
|
|
8438
8764
|
const classifyResponse = await safeGenerateObject(
|
|
8439
8765
|
generateObject,
|
|
8440
8766
|
{
|
|
8441
|
-
prompt: buildClassifyPrompt(),
|
|
8767
|
+
prompt: withFullDocumentTextContext(buildClassifyPrompt()),
|
|
8442
8768
|
schema: ClassifyResultSchema,
|
|
8443
8769
|
maxTokens: budget.maxTokens,
|
|
8444
8770
|
taskKind: "extraction_classify",
|
|
8445
8771
|
budgetDiagnostics: budget,
|
|
8446
|
-
providerOptions: await
|
|
8772
|
+
providerOptions: await getFullDocumentProviderOptions()
|
|
8447
8773
|
},
|
|
8448
8774
|
{
|
|
8449
8775
|
fallback: { documentType: "policy", policyTypes: ["other"], confidence: 0 },
|
|
@@ -8488,12 +8814,12 @@ function createExtractor(config) {
|
|
|
8488
8814
|
const formInventoryResponse = await safeGenerateObject(
|
|
8489
8815
|
generateObject,
|
|
8490
8816
|
{
|
|
8491
|
-
prompt: buildFormInventoryPrompt(templateHints),
|
|
8817
|
+
prompt: withFullDocumentTextContext(buildFormInventoryPrompt(templateHints)),
|
|
8492
8818
|
schema: FormInventorySchema,
|
|
8493
8819
|
maxTokens: budget.maxTokens,
|
|
8494
8820
|
taskKind: "extraction_form_inventory",
|
|
8495
8821
|
budgetDiagnostics: budget,
|
|
8496
|
-
providerOptions: await
|
|
8822
|
+
providerOptions: await getFullDocumentProviderOptions()
|
|
8497
8823
|
},
|
|
8498
8824
|
{
|
|
8499
8825
|
fallback: { forms: [] },
|
|
@@ -8536,18 +8862,24 @@ function createExtractor(config) {
|
|
|
8536
8862
|
const pageMapResults = await Promise.all(
|
|
8537
8863
|
pageMapChunks.map(
|
|
8538
8864
|
({ startPage, endPage }) => pageMapLimit(async () => {
|
|
8539
|
-
const pagesPdf = await getPageRangePdf(startPage, endPage);
|
|
8865
|
+
const pagesPdf = doclingDocument ? void 0 : await getPageRangePdf(startPage, endPage);
|
|
8866
|
+
const pagesText = doclingDocument ? await getPageRangeText(startPage, endPage) : "";
|
|
8540
8867
|
const budget = resolveBudget("extraction_page_map", 2048);
|
|
8541
8868
|
const startedAt = Date.now();
|
|
8542
8869
|
const mapResponse = await safeGenerateObject(
|
|
8543
8870
|
generateObject,
|
|
8544
8871
|
{
|
|
8545
|
-
prompt:
|
|
8872
|
+
prompt: withPageRangeTextContext(
|
|
8873
|
+
buildPageMapPrompt(templateHints, startPage, endPage, formInventoryHint),
|
|
8874
|
+
startPage,
|
|
8875
|
+
endPage,
|
|
8876
|
+
pagesText
|
|
8877
|
+
),
|
|
8546
8878
|
schema: PageMapChunkSchema,
|
|
8547
8879
|
maxTokens: budget.maxTokens,
|
|
8548
8880
|
taskKind: "extraction_page_map",
|
|
8549
8881
|
budgetDiagnostics: budget,
|
|
8550
|
-
providerOptions: { ...activeProviderOptions, pdfBase64: pagesPdf }
|
|
8882
|
+
providerOptions: doclingDocument ? { ...activeProviderOptions, doclingText: pagesText, doclingPageRange: { startPage, endPage } } : { ...activeProviderOptions, pdfBase64: pagesPdf }
|
|
8551
8883
|
},
|
|
8552
8884
|
{
|
|
8553
8885
|
fallback: {
|
|
@@ -8625,7 +8957,7 @@ function createExtractor(config) {
|
|
|
8625
8957
|
}))
|
|
8626
8958
|
];
|
|
8627
8959
|
onProgress?.(`Dispatching ${tasks.length} extractors...`);
|
|
8628
|
-
const extractionPdfInput = await getPdfBase64ForExtraction();
|
|
8960
|
+
const extractionPdfInput = doclingDocument ? void 0 : await getPdfBase64ForExtraction();
|
|
8629
8961
|
const extractorResults = await Promise.all(
|
|
8630
8962
|
tasks.map(
|
|
8631
8963
|
(task) => extractorLimit(async () => {
|
|
@@ -8636,7 +8968,8 @@ function createExtractor(config) {
|
|
|
8636
8968
|
memory,
|
|
8637
8969
|
completedPageRangePdfCache,
|
|
8638
8970
|
getPageRangePdf,
|
|
8639
|
-
convertPdfToImages ? getPageImages : void 0
|
|
8971
|
+
convertPdfToImages ? getPageImages : void 0,
|
|
8972
|
+
doclingDocument ? getPageRangeText : void 0
|
|
8640
8973
|
);
|
|
8641
8974
|
})
|
|
8642
8975
|
)
|
|
@@ -8668,7 +9001,8 @@ function createExtractor(config) {
|
|
|
8668
9001
|
providerOptions: activeProviderOptions,
|
|
8669
9002
|
pageRangeCache: completedPageRangePdfCache,
|
|
8670
9003
|
getPageRangePdf,
|
|
8671
|
-
getPageImages: convertPdfToImages ? getPageImages : void 0
|
|
9004
|
+
getPageImages: convertPdfToImages ? getPageImages : void 0,
|
|
9005
|
+
getPageRangeText: doclingDocument ? getPageRangeText : void 0
|
|
8672
9006
|
});
|
|
8673
9007
|
trackUsage(supplementaryResult.usage, {
|
|
8674
9008
|
taskKind: "extraction_focused",
|
|
@@ -8704,6 +9038,7 @@ function createExtractor(config) {
|
|
|
8704
9038
|
concurrency,
|
|
8705
9039
|
getPageRangePdf,
|
|
8706
9040
|
getPageImages: convertPdfToImages ? getPageImages : void 0,
|
|
9041
|
+
getPageRangeText: doclingDocument ? getPageRangeText : void 0,
|
|
8707
9042
|
providerOptions: activeProviderOptions,
|
|
8708
9043
|
modelCapabilities,
|
|
8709
9044
|
modelBudgetConstraints,
|
|
@@ -8752,12 +9087,12 @@ function createExtractor(config) {
|
|
|
8752
9087
|
const reviewResponse = await safeGenerateObject(
|
|
8753
9088
|
generateObject,
|
|
8754
9089
|
{
|
|
8755
|
-
prompt: buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary, extractorCatalog),
|
|
9090
|
+
prompt: withFullDocumentTextContext(buildReviewPrompt(template.required, extractedKeys, extractionSummary, pageMapSummary, extractorCatalog)),
|
|
8756
9091
|
schema: ReviewResultSchema,
|
|
8757
9092
|
maxTokens: budget.maxTokens,
|
|
8758
9093
|
taskKind: "extraction_review",
|
|
8759
9094
|
budgetDiagnostics: budget,
|
|
8760
|
-
providerOptions: await
|
|
9095
|
+
providerOptions: await getFullDocumentProviderOptions()
|
|
8761
9096
|
},
|
|
8762
9097
|
{
|
|
8763
9098
|
fallback: {
|
|
@@ -8787,7 +9122,7 @@ function createExtractor(config) {
|
|
|
8787
9122
|
break;
|
|
8788
9123
|
}
|
|
8789
9124
|
onProgress?.(`Review round ${round + 1}: dispatching ${reviewResponse.object.additionalTasks.length} follow-up extractors...`);
|
|
8790
|
-
const extractionPdfInput = await getPdfBase64ForExtraction();
|
|
9125
|
+
const extractionPdfInput = doclingDocument ? void 0 : await getPdfBase64ForExtraction();
|
|
8791
9126
|
const followUpResults = await Promise.all(
|
|
8792
9127
|
reviewResponse.object.additionalTasks.map(
|
|
8793
9128
|
(task) => extractorLimit(async () => {
|
|
@@ -8797,7 +9132,8 @@ function createExtractor(config) {
|
|
|
8797
9132
|
memory,
|
|
8798
9133
|
completedPageRangePdfCache,
|
|
8799
9134
|
getPageRangePdf,
|
|
8800
|
-
convertPdfToImages ? getPageImages : void 0
|
|
9135
|
+
convertPdfToImages ? getPageImages : void 0,
|
|
9136
|
+
doclingDocument ? getPageRangeText : void 0
|
|
8801
9137
|
);
|
|
8802
9138
|
})
|
|
8803
9139
|
)
|
|
@@ -12797,6 +13133,7 @@ var AGENT_TOOLS = [
|
|
|
12797
13133
|
buildConfirmationSummaryPrompt,
|
|
12798
13134
|
buildConversationMemoryGuidance,
|
|
12799
13135
|
buildCoverageGapPrompt,
|
|
13136
|
+
buildDoclingProviderOptions,
|
|
12800
13137
|
buildFieldExplanationPrompt,
|
|
12801
13138
|
buildFieldExtractionPrompt,
|
|
12802
13139
|
buildFlatPdfMappingPrompt,
|
|
@@ -12838,12 +13175,16 @@ var AGENT_TOOLS = [
|
|
|
12838
13175
|
fillAcroForm,
|
|
12839
13176
|
generateNextMessage,
|
|
12840
13177
|
getAcroFormFields,
|
|
13178
|
+
getDoclingPageRangeText,
|
|
12841
13179
|
getExtractor,
|
|
12842
13180
|
getFileIdentifier,
|
|
12843
13181
|
getPdfPageCount,
|
|
12844
13182
|
getTemplate,
|
|
13183
|
+
isDoclingExtractionInput,
|
|
12845
13184
|
isFileReference,
|
|
12846
13185
|
mergeQuestionAnswers,
|
|
13186
|
+
mergeSourceSpans,
|
|
13187
|
+
normalizeDoclingDocument,
|
|
12847
13188
|
normalizeForMatch,
|
|
12848
13189
|
orderSourceEvidence,
|
|
12849
13190
|
overlayTextOnPdf,
|