@doclo/providers-llm 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-7YPJIWRM.js +291 -0
- package/dist/chunk-7YPJIWRM.js.map +1 -0
- package/dist/index.d.ts +229 -3
- package/dist/index.js +306 -168
- package/dist/index.js.map +1 -1
- package/dist/schema-prompt-formatter-AIORLWUF.js +29 -0
- package/dist/schema-prompt-formatter-AIORLWUF.js.map +1 -0
- package/package.json +2 -2
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
// src/schema-prompt-formatter.ts
|
|
2
|
+
function formatSchemaForPrompt(schema, indent = 0) {
|
|
3
|
+
if (!schema || typeof schema !== "object") {
|
|
4
|
+
return "";
|
|
5
|
+
}
|
|
6
|
+
const indentStr = " ".repeat(indent);
|
|
7
|
+
let result = "";
|
|
8
|
+
if (schema.type === "object" && schema.properties) {
|
|
9
|
+
const properties = schema.properties;
|
|
10
|
+
const required = schema.required || [];
|
|
11
|
+
for (const [fieldName, fieldSchema] of Object.entries(properties)) {
|
|
12
|
+
const isRequired = required.includes(fieldName);
|
|
13
|
+
const requiredMarker = isRequired ? " (REQUIRED)" : " (optional)";
|
|
14
|
+
result += `${indentStr}- \`${fieldName}\`${requiredMarker}`;
|
|
15
|
+
const type = getTypeDescription(fieldSchema);
|
|
16
|
+
if (type) {
|
|
17
|
+
result += `: ${type}`;
|
|
18
|
+
}
|
|
19
|
+
if (fieldSchema.description) {
|
|
20
|
+
result += `
|
|
21
|
+
${indentStr} ${fieldSchema.description}`;
|
|
22
|
+
}
|
|
23
|
+
if (fieldSchema.enum) {
|
|
24
|
+
result += `
|
|
25
|
+
${indentStr} Allowed values: ${fieldSchema.enum.map((v) => JSON.stringify(v)).join(", ")}`;
|
|
26
|
+
}
|
|
27
|
+
result += "\n";
|
|
28
|
+
if (fieldSchema.type === "object" && fieldSchema.properties) {
|
|
29
|
+
result += formatSchemaForPrompt(fieldSchema, indent + 1);
|
|
30
|
+
}
|
|
31
|
+
if (fieldSchema.type === "array" && fieldSchema.items) {
|
|
32
|
+
result += `${indentStr} Array items:
|
|
33
|
+
`;
|
|
34
|
+
const itemSchema = Array.isArray(fieldSchema.items) ? fieldSchema.items[0] : fieldSchema.items;
|
|
35
|
+
if (itemSchema && itemSchema.type === "object" && itemSchema.properties) {
|
|
36
|
+
result += formatSchemaForPrompt(itemSchema, indent + 2);
|
|
37
|
+
} else if (itemSchema) {
|
|
38
|
+
const itemType = getTypeDescription(itemSchema);
|
|
39
|
+
result += `${indentStr} ${itemType}
|
|
40
|
+
`;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
return result;
|
|
46
|
+
}
|
|
47
|
+
function getTypeDescription(schema) {
|
|
48
|
+
if (!schema) return "any";
|
|
49
|
+
if (schema.type) {
|
|
50
|
+
const typeStr = Array.isArray(schema.type) ? schema.type.join(" | ") : schema.type;
|
|
51
|
+
if (typeStr === "array" || Array.isArray(schema.type) && schema.type.includes("array")) {
|
|
52
|
+
if (schema.items && !Array.isArray(schema.items) && schema.items.type) {
|
|
53
|
+
const itemType = Array.isArray(schema.items.type) ? schema.items.type.join(" | ") : schema.items.type;
|
|
54
|
+
return `array of ${itemType}`;
|
|
55
|
+
}
|
|
56
|
+
return "array";
|
|
57
|
+
}
|
|
58
|
+
if ((typeStr === "string" || Array.isArray(schema.type) && schema.type.includes("string")) && schema.format) {
|
|
59
|
+
const formatHints = {
|
|
60
|
+
"date": "YYYY-MM-DD",
|
|
61
|
+
"time": "HH:MM or HH:MM:SS",
|
|
62
|
+
"date-time": "YYYY-MM-DDTHH:MM:SS (ISO 8601)"
|
|
63
|
+
};
|
|
64
|
+
const hint = formatHints[schema.format];
|
|
65
|
+
if (hint) {
|
|
66
|
+
return `string (format: ${schema.format}, use ${hint})`;
|
|
67
|
+
}
|
|
68
|
+
return `string (format: ${schema.format})`;
|
|
69
|
+
}
|
|
70
|
+
return typeStr;
|
|
71
|
+
}
|
|
72
|
+
if (schema.anyOf) {
|
|
73
|
+
return schema.anyOf.map((s) => getTypeDescription(s)).join(" OR ");
|
|
74
|
+
}
|
|
75
|
+
if (schema.oneOf) {
|
|
76
|
+
return schema.oneOf.map((s) => getTypeDescription(s)).join(" OR ");
|
|
77
|
+
}
|
|
78
|
+
return "any";
|
|
79
|
+
}
|
|
80
|
+
function buildSchemaPromptSection(schema) {
|
|
81
|
+
const schemaFields = formatSchemaForPrompt(schema);
|
|
82
|
+
return `
|
|
83
|
+
==================================================
|
|
84
|
+
CRITICAL: OUTPUT STRUCTURE REQUIREMENTS
|
|
85
|
+
==================================================
|
|
86
|
+
|
|
87
|
+
YOU MUST RETURN JSON MATCHING THIS EXACT STRUCTURE:
|
|
88
|
+
|
|
89
|
+
${schemaFields}
|
|
90
|
+
|
|
91
|
+
CRITICAL FIELD NAME REQUIREMENTS:
|
|
92
|
+
\u2713 Use EXACTLY the field names shown above (character-for-character match)
|
|
93
|
+
\u2713 Preserve the exact casing (e.g., "fullName", not "full_name" or "FullName")
|
|
94
|
+
\u2713 Do NOT abbreviate field names (e.g., "dob" instead of "dateOfBirth")
|
|
95
|
+
\u2713 Do NOT invent alternative names (e.g., "directorName" instead of "fullName")
|
|
96
|
+
\u2713 Do NOT use snake_case if the schema uses camelCase
|
|
97
|
+
\u2713 Do NOT flatten nested structures or rename nested fields
|
|
98
|
+
\u2713 The schema above is the SINGLE SOURCE OF TRUTH for field naming
|
|
99
|
+
|
|
100
|
+
MISSING DATA:
|
|
101
|
+
- If a required field has no data in the document, use null
|
|
102
|
+
- If an optional field has no data, you may omit it or use null
|
|
103
|
+
- Do NOT invent data that isn't in the document
|
|
104
|
+
|
|
105
|
+
==================================================
|
|
106
|
+
`.trim();
|
|
107
|
+
}
|
|
108
|
+
function combineSchemaAndUserPrompt(schema, userPrompt) {
|
|
109
|
+
const schemaSection = buildSchemaPromptSection(schema);
|
|
110
|
+
if (!userPrompt || userPrompt.trim() === "") {
|
|
111
|
+
return schemaSection + "\n\nTASK: Extract structured data from the provided document.";
|
|
112
|
+
}
|
|
113
|
+
return schemaSection + "\n\n" + userPrompt;
|
|
114
|
+
}
|
|
115
|
+
function buildOutputFormatPrompt(options) {
|
|
116
|
+
const parts = [];
|
|
117
|
+
if (options.outputFormat) {
|
|
118
|
+
switch (options.outputFormat) {
|
|
119
|
+
case "markdown":
|
|
120
|
+
parts.push("Format all text content using markdown syntax. Use proper headings (#, ##, ###), lists (-, *), bold (**text**), and other markdown formatting where appropriate.");
|
|
121
|
+
break;
|
|
122
|
+
case "html":
|
|
123
|
+
parts.push("Format all text content as valid HTML. Use semantic tags like <p>, <h1>-<h6>, <ul>, <ol>, <strong>, <em> where appropriate.");
|
|
124
|
+
break;
|
|
125
|
+
case "json":
|
|
126
|
+
parts.push("For text fields that contain structured data, format them as embedded JSON strings where appropriate.");
|
|
127
|
+
break;
|
|
128
|
+
case "text":
|
|
129
|
+
parts.push("Return plain text without any formatting. No markdown, HTML, or other markup.");
|
|
130
|
+
break;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
if (options.tableFormat) {
|
|
134
|
+
switch (options.tableFormat) {
|
|
135
|
+
case "markdown":
|
|
136
|
+
parts.push("Format all tables using markdown table syntax with | column separators and header row with ---.");
|
|
137
|
+
break;
|
|
138
|
+
case "html":
|
|
139
|
+
parts.push("Format all tables as HTML <table> elements with <thead>, <tbody>, <tr>, <th>, and <td> tags.");
|
|
140
|
+
break;
|
|
141
|
+
case "csv":
|
|
142
|
+
parts.push("Format all tables as CSV with headers in the first row and comma-separated values.");
|
|
143
|
+
break;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
if (options.pageMarkers) {
|
|
147
|
+
parts.push('Insert "---" page break markers between content from different pages of the document.');
|
|
148
|
+
}
|
|
149
|
+
return parts.join("\n");
|
|
150
|
+
}
|
|
151
|
+
function buildLanguageHintsPrompt(languages) {
|
|
152
|
+
if (!languages || languages.length === 0) {
|
|
153
|
+
return "";
|
|
154
|
+
}
|
|
155
|
+
return `The document is written in ${languages.join(", ")}. Extract and preserve text in the original language(s).`;
|
|
156
|
+
}
|
|
157
|
+
function buildConfidencePrompt() {
|
|
158
|
+
return `
|
|
159
|
+
For each extracted field, assess your confidence level and include it in the "_confidence" object:
|
|
160
|
+
- Use a number from 0.0 to 1.0 where:
|
|
161
|
+
- 0.9-1.0: Very high confidence - text is clear and unambiguous
|
|
162
|
+
- 0.7-0.9: High confidence - minor ambiguity but likely correct
|
|
163
|
+
- 0.5-0.7: Medium confidence - some uncertainty or partial visibility
|
|
164
|
+
- 0.3-0.5: Low confidence - significant uncertainty
|
|
165
|
+
- 0.0-0.3: Very low confidence - guessing or text was unclear
|
|
166
|
+
|
|
167
|
+
Include "_confidence" as a sibling object mapping field paths to their scores.
|
|
168
|
+
Example: "_confidence": { "invoiceNumber": 0.95, "amount": 0.82 }
|
|
169
|
+
`.trim();
|
|
170
|
+
}
|
|
171
|
+
function buildSourcesPrompt() {
|
|
172
|
+
return `
|
|
173
|
+
For each extracted field, identify the source location in the document and include it in the "_sources" array:
|
|
174
|
+
Each source entry should contain:
|
|
175
|
+
- "field": The field name/path that was extracted
|
|
176
|
+
- "text": The exact text from the document used for extraction
|
|
177
|
+
- "bbox": Bounding box as [y_min, x_min, y_max, x_max] normalized to 0-1000 scale
|
|
178
|
+
- "page": Page number (0-indexed) where the text appears
|
|
179
|
+
|
|
180
|
+
Include "_sources" as a sibling array to your extracted data.
|
|
181
|
+
Example: "_sources": [{ "field": "invoiceNumber", "text": "INV-001", "bbox": [100, 50, 120, 150], "page": 0 }]
|
|
182
|
+
`.trim();
|
|
183
|
+
}
|
|
184
|
+
function buildBlockClassificationPrompt() {
|
|
185
|
+
return `
|
|
186
|
+
For each extracted element or text block, classify its type in a "_blockTypes" object:
|
|
187
|
+
- "title": Main document title or major section headers
|
|
188
|
+
- "heading": Section headings and subheadings
|
|
189
|
+
- "paragraph": Body text paragraphs
|
|
190
|
+
- "table": Tabular data
|
|
191
|
+
- "list": Bulleted or numbered lists
|
|
192
|
+
- "header": Page headers (repeated at top of pages)
|
|
193
|
+
- "footer": Page footers (repeated at bottom of pages)
|
|
194
|
+
- "caption": Image or figure captions
|
|
195
|
+
- "code": Code blocks or preformatted text
|
|
196
|
+
|
|
197
|
+
Include "_blockTypes" mapping field paths to their block type.
|
|
198
|
+
Example: "_blockTypes": { "summary": "paragraph", "items": "list" }
|
|
199
|
+
`.trim();
|
|
200
|
+
}
|
|
201
|
+
function buildHeaderFooterPrompt(options) {
|
|
202
|
+
const parts = [];
|
|
203
|
+
if (options.extractHeaders) {
|
|
204
|
+
parts.push('Identify and extract document headers (repeated content at the top of pages) into a "_headers" array.');
|
|
205
|
+
}
|
|
206
|
+
if (options.extractFooters) {
|
|
207
|
+
parts.push('Identify and extract document footers (repeated content at the bottom of pages, like page numbers) into a "_footers" array.');
|
|
208
|
+
}
|
|
209
|
+
if (parts.length > 0) {
|
|
210
|
+
parts.push('Each header/footer entry should include: { "text": "...", "pages": [0, 1, 2] } listing which pages contain it.');
|
|
211
|
+
}
|
|
212
|
+
return parts.join("\n");
|
|
213
|
+
}
|
|
214
|
+
function buildChunkingPrompt(strategy, maxChunkSize) {
|
|
215
|
+
const sizeNote = maxChunkSize ? ` Keep chunks under ${maxChunkSize} characters when possible.` : "";
|
|
216
|
+
switch (strategy) {
|
|
217
|
+
case "page":
|
|
218
|
+
return `Organize the extracted content by page. Include page number for each chunk.${sizeNote}`;
|
|
219
|
+
case "section":
|
|
220
|
+
return `Divide the document into logical sections based on headings and structure. Each section should be a coherent unit.${sizeNote}`;
|
|
221
|
+
case "paragraph":
|
|
222
|
+
return `Divide the content into individual paragraphs, preserving the natural paragraph breaks from the document.${sizeNote}`;
|
|
223
|
+
case "semantic":
|
|
224
|
+
return `Divide the document into semantically coherent chunks. Each chunk should be a self-contained unit of meaning that could stand alone.${sizeNote}`;
|
|
225
|
+
default:
|
|
226
|
+
return "";
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
function buildLLMDerivedFeaturesPrompt(options) {
|
|
230
|
+
const parts = [];
|
|
231
|
+
const formatPrompt = buildOutputFormatPrompt(options);
|
|
232
|
+
if (formatPrompt) {
|
|
233
|
+
parts.push(formatPrompt);
|
|
234
|
+
}
|
|
235
|
+
if (options.languageHints && options.languageHints.length > 0) {
|
|
236
|
+
parts.push(buildLanguageHintsPrompt(options.languageHints));
|
|
237
|
+
}
|
|
238
|
+
if (options.includeConfidence) {
|
|
239
|
+
parts.push(buildConfidencePrompt());
|
|
240
|
+
}
|
|
241
|
+
if (options.includeSources) {
|
|
242
|
+
parts.push(buildSourcesPrompt());
|
|
243
|
+
}
|
|
244
|
+
if (options.includeBlockTypes) {
|
|
245
|
+
parts.push(buildBlockClassificationPrompt());
|
|
246
|
+
}
|
|
247
|
+
if (options.extractHeaders || options.extractFooters) {
|
|
248
|
+
parts.push(buildHeaderFooterPrompt(options));
|
|
249
|
+
}
|
|
250
|
+
if (options.chunkingStrategy) {
|
|
251
|
+
parts.push(buildChunkingPrompt(options.chunkingStrategy, options.maxChunkSize));
|
|
252
|
+
}
|
|
253
|
+
if (parts.length === 0) {
|
|
254
|
+
return "";
|
|
255
|
+
}
|
|
256
|
+
return `
|
|
257
|
+
==================================================
|
|
258
|
+
ADDITIONAL OUTPUT REQUIREMENTS
|
|
259
|
+
==================================================
|
|
260
|
+
|
|
261
|
+
${parts.join("\n\n")}
|
|
262
|
+
|
|
263
|
+
==================================================
|
|
264
|
+
`.trim();
|
|
265
|
+
}
|
|
266
|
+
function combineSchemaUserAndDerivedPrompts(schema, userPrompt, derivedOptions) {
|
|
267
|
+
let result = combineSchemaAndUserPrompt(schema, userPrompt);
|
|
268
|
+
if (derivedOptions) {
|
|
269
|
+
const derivedPrompt = buildLLMDerivedFeaturesPrompt(derivedOptions);
|
|
270
|
+
if (derivedPrompt) {
|
|
271
|
+
result = result + "\n\n" + derivedPrompt;
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
return result;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
export {
|
|
278
|
+
formatSchemaForPrompt,
|
|
279
|
+
buildSchemaPromptSection,
|
|
280
|
+
combineSchemaAndUserPrompt,
|
|
281
|
+
buildOutputFormatPrompt,
|
|
282
|
+
buildLanguageHintsPrompt,
|
|
283
|
+
buildConfidencePrompt,
|
|
284
|
+
buildSourcesPrompt,
|
|
285
|
+
buildBlockClassificationPrompt,
|
|
286
|
+
buildHeaderFooterPrompt,
|
|
287
|
+
buildChunkingPrompt,
|
|
288
|
+
buildLLMDerivedFeaturesPrompt,
|
|
289
|
+
combineSchemaUserAndDerivedPrompts
|
|
290
|
+
};
|
|
291
|
+
//# sourceMappingURL=chunk-7YPJIWRM.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/schema-prompt-formatter.ts"],"sourcesContent":["/**\n * Utility for converting JSON Schema to human-readable prompt text\n * that emphasizes exact field name requirements for structured extraction.\n */\n\n/**\n * JSON Schema type used for prompt formatting.\n * Uses a recursive structure to support nested schemas.\n */\nexport interface JSONSchema {\n type?: string | string[]; // Can be array for union types (e.g., [\"string\", \"null\"])\n properties?: Record<string, JSONSchema>;\n items?: JSONSchema | JSONSchema[]; // Can be array for tuple validation\n description?: string;\n required?: string[];\n enum?: (string | number | boolean | null)[];\n anyOf?: JSONSchema[];\n oneOf?: JSONSchema[];\n allOf?: JSONSchema[];\n format?: string;\n [key: string]: unknown; // Allow additional properties\n}\n\n/**\n * Formats a JSON Schema into prompt text that emphasizes exact field names.\n * This helps LLMs understand they must use the exact field names specified\n * in the schema, not invent their own based on document content.\n */\nexport function formatSchemaForPrompt(schema: JSONSchema, indent: number = 0): string {\n if (!schema || typeof schema !== 'object') {\n return '';\n }\n\n const indentStr = ' '.repeat(indent);\n let result = '';\n\n // Handle object type with properties\n if (schema.type === 'object' && schema.properties) {\n const properties = schema.properties;\n const required = schema.required || [];\n\n for (const [fieldName, fieldSchema] of Object.entries(properties)) {\n const isRequired = required.includes(fieldName);\n const requiredMarker = isRequired ? ' (REQUIRED)' : ' (optional)';\n\n // Field name in backticks to emphasize exactness\n result += `${indentStr}- \\`${fieldName}\\`${requiredMarker}`;\n\n // Type information\n const type = getTypeDescription(fieldSchema);\n if (type) {\n result += `: ${type}`;\n }\n\n // Description if available\n if (fieldSchema.description) {\n result += `\\n${indentStr} ${fieldSchema.description}`;\n }\n\n // Enum values if specified\n if (fieldSchema.enum) {\n result += `\\n${indentStr} Allowed values: ${fieldSchema.enum.map((v) => JSON.stringify(v)).join(', ')}`;\n }\n\n result += '\\n';\n\n // Nested object properties\n if (fieldSchema.type === 'object' && fieldSchema.properties) {\n result += formatSchemaForPrompt(fieldSchema, indent + 1);\n }\n\n // Array item schema\n if (fieldSchema.type === 'array' && fieldSchema.items) {\n result += `${indentStr} Array items:\\n`;\n // Handle both single schema and tuple schemas (array of schemas)\n const itemSchema = Array.isArray(fieldSchema.items)\n ? fieldSchema.items[0] // For tuple validation, describe first item type\n : fieldSchema.items;\n if (itemSchema && itemSchema.type === 'object' && itemSchema.properties) {\n result += formatSchemaForPrompt(itemSchema, indent + 2);\n } else if (itemSchema) {\n const itemType = getTypeDescription(itemSchema);\n result += `${indentStr} ${itemType}\\n`;\n }\n }\n }\n }\n\n return result;\n}\n\n/**\n * Gets a human-readable type description from a schema property\n */\nfunction getTypeDescription(schema: JSONSchema): string {\n if (!schema) return 'any';\n\n if (schema.type) {\n // Handle array of types (e.g., [\"string\", \"null\"])\n const typeStr = Array.isArray(schema.type) ? schema.type.join(' | ') : schema.type;\n\n if (typeStr === 'array' || (Array.isArray(schema.type) && schema.type.includes('array'))) {\n if (schema.items && !Array.isArray(schema.items) && schema.items.type) {\n const itemType = Array.isArray(schema.items.type)\n ? schema.items.type.join(' | ')\n : schema.items.type;\n return `array of ${itemType}`;\n }\n return 'array';\n }\n // Include format information for strings (e.g., date, time, date-time, email, uri)\n if ((typeStr === 'string' || (Array.isArray(schema.type) && schema.type.includes('string'))) && schema.format) {\n const formatHints: Record<string, string> = {\n 'date': 'YYYY-MM-DD',\n 'time': 'HH:MM or HH:MM:SS',\n 'date-time': 'YYYY-MM-DDTHH:MM:SS (ISO 8601)',\n };\n const hint = formatHints[schema.format];\n if (hint) {\n return `string (format: ${schema.format}, use ${hint})`;\n }\n return `string (format: ${schema.format})`;\n }\n return typeStr;\n }\n\n // Handle anyOf, oneOf, allOf\n if (schema.anyOf) {\n return schema.anyOf.map((s) => getTypeDescription(s)).join(' OR ');\n }\n if (schema.oneOf) {\n return schema.oneOf.map((s) => getTypeDescription(s)).join(' OR ');\n }\n\n return 'any';\n}\n\n/**\n * Generates a complete prompt section with schema information and\n * strict field name instructions.\n */\nexport function buildSchemaPromptSection(schema: JSONSchema): string {\n const schemaFields = formatSchemaForPrompt(schema);\n\n return `\n==================================================\nCRITICAL: OUTPUT STRUCTURE REQUIREMENTS\n==================================================\n\nYOU MUST RETURN JSON MATCHING THIS EXACT STRUCTURE:\n\n${schemaFields}\n\nCRITICAL FIELD NAME REQUIREMENTS:\n✓ Use EXACTLY the field names shown above (character-for-character match)\n✓ Preserve the exact casing (e.g., \"fullName\", not \"full_name\" or \"FullName\")\n✓ Do NOT abbreviate field names (e.g., \"dob\" instead of \"dateOfBirth\")\n✓ Do NOT invent alternative names (e.g., \"directorName\" instead of \"fullName\")\n✓ Do NOT use snake_case if the schema uses camelCase\n✓ Do NOT flatten nested structures or rename nested fields\n✓ The schema above is the SINGLE SOURCE OF TRUTH for field naming\n\nMISSING DATA:\n- If a required field has no data in the document, use null\n- If an optional field has no data, you may omit it or use null\n- Do NOT invent data that isn't in the document\n\n==================================================\n`.trim();\n}\n\n/**\n * Combines schema prompt section with user's custom prompt\n */\nexport function combineSchemaAndUserPrompt(\n schema: JSONSchema,\n userPrompt: string\n): string {\n const schemaSection = buildSchemaPromptSection(schema);\n\n if (!userPrompt || userPrompt.trim() === '') {\n return schemaSection + '\\n\\nTASK: Extract structured data from the provided document.';\n }\n\n return schemaSection + '\\n\\n' + userPrompt;\n}\n\n// ============================================================================\n// LLM-Derived Feature Prompts\n// ============================================================================\n\n/**\n * Output format types for LLM text generation\n */\nexport type OutputFormat = 'markdown' | 'html' | 'json' | 'text';\nexport type TableFormat = 'markdown' | 'html' | 'csv';\nexport type ChunkingStrategy = 'page' | 'section' | 'paragraph' | 'semantic';\n\n/**\n * Options for LLM-derived features that are implemented via prompting\n */\nexport interface LLMDerivedPromptOptions {\n outputFormat?: OutputFormat;\n tableFormat?: TableFormat;\n pageMarkers?: boolean;\n includeConfidence?: boolean;\n includeSources?: boolean;\n includeBlockTypes?: boolean;\n extractHeaders?: boolean;\n extractFooters?: boolean;\n chunkingStrategy?: ChunkingStrategy;\n maxChunkSize?: number;\n languageHints?: string[];\n}\n\n/**\n * Builds prompt additions for output format options\n */\nexport function buildOutputFormatPrompt(options: LLMDerivedPromptOptions): string {\n const parts: string[] = [];\n\n // Output format\n if (options.outputFormat) {\n switch (options.outputFormat) {\n case 'markdown':\n parts.push('Format all text content using markdown syntax. Use proper headings (#, ##, ###), lists (-, *), bold (**text**), and other markdown formatting where appropriate.');\n break;\n case 'html':\n parts.push('Format all text content as valid HTML. Use semantic tags like <p>, <h1>-<h6>, <ul>, <ol>, <strong>, <em> where appropriate.');\n break;\n case 'json':\n parts.push('For text fields that contain structured data, format them as embedded JSON strings where appropriate.');\n break;\n case 'text':\n parts.push('Return plain text without any formatting. No markdown, HTML, or other markup.');\n break;\n }\n }\n\n // Table format\n if (options.tableFormat) {\n switch (options.tableFormat) {\n case 'markdown':\n parts.push('Format all tables using markdown table syntax with | column separators and header row with ---.');\n break;\n case 'html':\n parts.push('Format all tables as HTML <table> elements with <thead>, <tbody>, <tr>, <th>, and <td> tags.');\n break;\n case 'csv':\n parts.push('Format all tables as CSV with headers in the first row and comma-separated values.');\n break;\n }\n }\n\n // Page markers\n if (options.pageMarkers) {\n parts.push('Insert \"---\" page break markers between content from different pages of the document.');\n }\n\n return parts.join('\\n');\n}\n\n/**\n * Builds prompt additions for language hints\n */\nexport function buildLanguageHintsPrompt(languages: string[]): string {\n if (!languages || languages.length === 0) {\n return '';\n }\n return `The document is written in ${languages.join(', ')}. Extract and preserve text in the original language(s).`;\n}\n\n/**\n * Builds prompt additions for confidence scoring\n */\nexport function buildConfidencePrompt(): string {\n return `\nFor each extracted field, assess your confidence level and include it in the \"_confidence\" object:\n- Use a number from 0.0 to 1.0 where:\n - 0.9-1.0: Very high confidence - text is clear and unambiguous\n - 0.7-0.9: High confidence - minor ambiguity but likely correct\n - 0.5-0.7: Medium confidence - some uncertainty or partial visibility\n - 0.3-0.5: Low confidence - significant uncertainty\n - 0.0-0.3: Very low confidence - guessing or text was unclear\n\nInclude \"_confidence\" as a sibling object mapping field paths to their scores.\nExample: \"_confidence\": { \"invoiceNumber\": 0.95, \"amount\": 0.82 }\n`.trim();\n}\n\n/**\n * Builds prompt additions for source citations with bounding boxes\n */\nexport function buildSourcesPrompt(): string {\n return `\nFor each extracted field, identify the source location in the document and include it in the \"_sources\" array:\nEach source entry should contain:\n- \"field\": The field name/path that was extracted\n- \"text\": The exact text from the document used for extraction\n- \"bbox\": Bounding box as [y_min, x_min, y_max, x_max] normalized to 0-1000 scale\n- \"page\": Page number (0-indexed) where the text appears\n\nInclude \"_sources\" as a sibling array to your extracted data.\nExample: \"_sources\": [{ \"field\": \"invoiceNumber\", \"text\": \"INV-001\", \"bbox\": [100, 50, 120, 150], \"page\": 0 }]\n`.trim();\n}\n\n/**\n * Builds prompt additions for block type classification\n */\nexport function buildBlockClassificationPrompt(): string {\n return `\nFor each extracted element or text block, classify its type in a \"_blockTypes\" object:\n- \"title\": Main document title or major section headers\n- \"heading\": Section headings and subheadings\n- \"paragraph\": Body text paragraphs\n- \"table\": Tabular data\n- \"list\": Bulleted or numbered lists\n- \"header\": Page headers (repeated at top of pages)\n- \"footer\": Page footers (repeated at bottom of pages)\n- \"caption\": Image or figure captions\n- \"code\": Code blocks or preformatted text\n\nInclude \"_blockTypes\" mapping field paths to their block type.\nExample: \"_blockTypes\": { \"summary\": \"paragraph\", \"items\": \"list\" }\n`.trim();\n}\n\n/**\n * Builds prompt additions for header/footer extraction\n */\nexport function buildHeaderFooterPrompt(options: { extractHeaders?: boolean; extractFooters?: boolean }): string {\n const parts: string[] = [];\n\n if (options.extractHeaders) {\n parts.push('Identify and extract document headers (repeated content at the top of pages) into a \"_headers\" array.');\n }\n\n if (options.extractFooters) {\n parts.push('Identify and extract document footers (repeated content at the bottom of pages, like page numbers) into a \"_footers\" array.');\n }\n\n if (parts.length > 0) {\n parts.push('Each header/footer entry should include: { \"text\": \"...\", \"pages\": [0, 1, 2] } listing which pages contain it.');\n }\n\n return parts.join('\\n');\n}\n\n/**\n * Builds prompt additions for semantic chunking\n */\nexport function buildChunkingPrompt(strategy: ChunkingStrategy, maxChunkSize?: number): string {\n const sizeNote = maxChunkSize\n ? ` Keep chunks under ${maxChunkSize} characters when possible.`\n : '';\n\n switch (strategy) {\n case 'page':\n return `Organize the extracted content by page. Include page number for each chunk.${sizeNote}`;\n case 'section':\n return `Divide the document into logical sections based on headings and structure. Each section should be a coherent unit.${sizeNote}`;\n case 'paragraph':\n return `Divide the content into individual paragraphs, preserving the natural paragraph breaks from the document.${sizeNote}`;\n case 'semantic':\n return `Divide the document into semantically coherent chunks. Each chunk should be a self-contained unit of meaning that could stand alone.${sizeNote}`;\n default:\n return '';\n }\n}\n\n/**\n * Combines all LLM-derived feature prompts into a single prompt section\n */\nexport function buildLLMDerivedFeaturesPrompt(options: LLMDerivedPromptOptions): string {\n const parts: string[] = [];\n\n // Output format options\n const formatPrompt = buildOutputFormatPrompt(options);\n if (formatPrompt) {\n parts.push(formatPrompt);\n }\n\n // Language hints\n if (options.languageHints && options.languageHints.length > 0) {\n parts.push(buildLanguageHintsPrompt(options.languageHints));\n }\n\n // Metadata features (confidence, sources, block types)\n if (options.includeConfidence) {\n parts.push(buildConfidencePrompt());\n }\n\n if (options.includeSources) {\n parts.push(buildSourcesPrompt());\n }\n\n if (options.includeBlockTypes) {\n parts.push(buildBlockClassificationPrompt());\n }\n\n // Header/footer extraction\n if (options.extractHeaders || options.extractFooters) {\n parts.push(buildHeaderFooterPrompt(options));\n }\n\n // Chunking strategy\n if (options.chunkingStrategy) {\n parts.push(buildChunkingPrompt(options.chunkingStrategy, options.maxChunkSize));\n }\n\n if (parts.length === 0) {\n return '';\n }\n\n return `\n==================================================\nADDITIONAL OUTPUT REQUIREMENTS\n==================================================\n\n${parts.join('\\n\\n')}\n\n==================================================\n`.trim();\n}\n\n/**\n * Combines schema prompt with user prompt and LLM-derived features\n */\nexport function combineSchemaUserAndDerivedPrompts(\n schema: JSONSchema,\n userPrompt: string,\n derivedOptions?: LLMDerivedPromptOptions\n): string {\n let result = combineSchemaAndUserPrompt(schema, userPrompt);\n\n if (derivedOptions) {\n const derivedPrompt = buildLLMDerivedFeaturesPrompt(derivedOptions);\n if (derivedPrompt) {\n result = result + '\\n\\n' + derivedPrompt;\n }\n }\n\n return result;\n}\n"],"mappings":";AA4BO,SAAS,sBAAsB,QAAoB,SAAiB,GAAW;AACpF,MAAI,CAAC,UAAU,OAAO,WAAW,UAAU;AACzC,WAAO;AAAA,EACT;AAEA,QAAM,YAAY,KAAK,OAAO,MAAM;AACpC,MAAI,SAAS;AAGb,MAAI,OAAO,SAAS,YAAY,OAAO,YAAY;AACjD,UAAM,aAAa,OAAO;AAC1B,UAAM,WAAW,OAAO,YAAY,CAAC;AAErC,eAAW,CAAC,WAAW,WAAW,KAAK,OAAO,QAAQ,UAAU,GAAG;AACjE,YAAM,aAAa,SAAS,SAAS,SAAS;AAC9C,YAAM,iBAAiB,aAAa,gBAAgB;AAGpD,gBAAU,GAAG,SAAS,OAAO,SAAS,KAAK,cAAc;AAGzD,YAAM,OAAO,mBAAmB,WAAW;AAC3C,UAAI,MAAM;AACR,kBAAU,KAAK,IAAI;AAAA,MACrB;AAGA,UAAI,YAAY,aAAa;AAC3B,kBAAU;AAAA,EAAK,SAAS,KAAK,YAAY,WAAW;AAAA,MACtD;AAGA,UAAI,YAAY,MAAM;AACpB,kBAAU;AAAA,EAAK,SAAS,qBAAqB,YAAY,KAAK,IAAI,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,EAAE,KAAK,IAAI,CAAC;AAAA,MACxG;AAEA,gBAAU;AAGV,UAAI,YAAY,SAAS,YAAY,YAAY,YAAY;AAC3D,kBAAU,sBAAsB,aAAa,SAAS,CAAC;AAAA,MACzD;AAGA,UAAI,YAAY,SAAS,WAAW,YAAY,OAAO;AACrD,kBAAU,GAAG,SAAS;AAAA;AAEtB,cAAM,aAAa,MAAM,QAAQ,YAAY,KAAK,IAC9C,YAAY,MAAM,CAAC,IACnB,YAAY;AAChB,YAAI,cAAc,WAAW,SAAS,YAAY,WAAW,YAAY;AACvE,oBAAU,sBAAsB,YAAY,SAAS,CAAC;AAAA,QACxD,WAAW,YAAY;AACrB,gBAAM,WAAW,mBAAmB,UAAU;AAC9C,oBAAU,GAAG,SAAS,OAAO,QAAQ;AAAA;AAAA,QACvC;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;AAKA,SAAS,mBAAmB,QAA4B;AACtD,MAAI,CAAC,OAAQ,QAAO;AAEpB,MAAI,OAAO,MAAM;AAEf,UAAM,UAAU,MAAM,QAAQ,OAAO,IAAI,IAAI,OAAO,KAAK,KAAK,KAAK,IAAI,OAAO;AAE9E,QAAI,YAAY,WAAY,MAAM,QAAQ,OAAO,IAAI,KAAK,OAAO,KAAK,SAAS,OAAO,GAAI;AACxF,UAAI,OAAO,SAAS,CAAC,MAAM,QAAQ,OAAO,KAAK,KAAK,OAAO,MAAM,MAAM;AACrE,cAAM,WAAW,MAAM,QAAQ,OAAO,MAAM,IAAI,IAC5C,OAAO,MAAM,KAAK,KAAK,KAAK,IAC5B,OAAO,MAAM;AACjB,eAAO,YAAY,QAAQ;AAAA,MAC7B;AACA,aAAO;AAAA,IACT;AAEA,SAAK,YAAY,YAAa,MAAM,QAAQ,OAAO,IAAI,KAAK,OAAO,KAAK,SAAS,QAAQ,MAAO,OAAO,QAAQ;AAC7G,YAAM,cAAsC;AAAA,QAC1C,QAAQ;AAAA,QACR,QAAQ;AAAA,QACR,aAAa;AAAA,MACf;AACA,YAAM,OAAO,YAAY,OAAO,MAAM;AACtC,UAAI,MAAM;AACR,eAAO,mBAAmB,OAAO,MAAM,SAAS,IAAI;AAAA,MACtD;AACA,aAAO,mBAAmB,OAAO,MAAM;AAAA,IACzC;AACA,WAAO;AAAA,EACT;AAGA,MAAI,OAAO,OAAO;AAChB,WAAO,OAAO,MAAM,IAAI,CAAC,MAAM,mBAAmB,CAAC,CAAC,EAAE,KAAK,MAAM;AAAA,EACnE;AACA,MAAI,OAAO,OAAO;AAChB,WAAO,OAAO,MAAM,IAAI,CAAC,MAAM,mBAAmB,CAAC,CAAC,EAAE,KAAK,MAAM;AAAA,EACnE;AAEA,SAAO;AACT;AAMO,SAAS,yBAAyB,QAA4B;AACnE,QAAM,eAAe,sBAAsB,MAAM;AAEjD,SAAO;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOP,YAAY;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAiBZ,KAAK;AACP;AAKO,SAAS,2BACd,QACA,YACQ;AACR,QAAM,gBAAgB,yBAAyB,MAAM;AAErD,MAAI,CAAC,cAAc,WAAW,KAAK,MAAM,IAAI;AAC3C,WAAO,gBAAgB;AAAA,EACzB;AAEA,SAAO,gBAAgB,SAAS;AAClC;AAiCO,SAAS,wBAAwB,SAA0C;AAChF,QAAM,QAAkB,CAAC;AAGzB,MAAI,QAAQ,cAAc;AACxB,YAAQ,QAAQ,cAAc;AAAA,MAC5B,KAAK;AACH,cAAM,KAAK,kKAAkK;AAC7K;AAAA,MACF,KAAK;AACH,cAAM,KAAK,6HAA6H;AACxI;AAAA,MACF,KAAK;AACH,cAAM,KAAK,uGAAuG;AAClH;AAAA,MACF,KAAK;AACH,cAAM,KAAK,+EAA+E;AAC1F;AAAA,IACJ;AAAA,EACF;AAGA,MAAI,QAAQ,aAAa;AACvB,YAAQ,QAAQ,aAAa;AAAA,MAC3B,KAAK;AACH,cAAM,KAAK,iGAAiG;AAC5G;AAAA,MACF,KAAK;AACH,cAAM,KAAK,8FAA8F;AACzG;AAAA,MACF,KAAK;AACH,cAAM,KAAK,oFAAoF;AAC/F;AAAA,IACJ;AAAA,EACF;AAGA,MAAI,QAAQ,aAAa;AACvB,UAAM,KAAK,uFAAuF;AAAA,EACpG;AAEA,SAAO,MAAM,KAAK,IAAI;AACxB;AAKO,SAAS,yBAAyB,WAA6B;AACpE,MAAI,CAAC,aAAa,UAAU,WAAW,GAAG;AACxC,WAAO;AAAA,EACT;AACA,SAAO,8BAA8B,UAAU,KAAK,IAAI,CAAC;AAC3D;AAKO,SAAS,wBAAgC;AAC9C,SAAO;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWP,KAAK;AACP;AAKO,SAAS,qBAA6B;AAC3C,SAAO;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUP,KAAK;AACP;AAKO,SAAS,iCAAyC;AACvD,SAAO;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAcP,KAAK;AACP;AAKO,SAAS,wBAAwB,SAAyE;AAC/G,QAAM,QAAkB,CAAC;AAEzB,MAAI,QAAQ,gBAAgB;AAC1B,UAAM,KAAK,uGAAuG;AAAA,EACpH;AAEA,MAAI,QAAQ,gBAAgB;AAC1B,UAAM,KAAK,6HAA6H;AAAA,EAC1I;AAEA,MAAI,MAAM,SAAS,GAAG;AACpB,UAAM,KAAK,gHAAgH;AAAA,EAC7H;AAEA,SAAO,MAAM,KAAK,IAAI;AACxB;AAKO,SAAS,oBAAoB,UAA4B,cAA+B;AAC7F,QAAM,WAAW,eACb,sBAAsB,YAAY,+BAClC;AAEJ,UAAQ,UAAU;AAAA,IAChB,KAAK;AACH,aAAO,8EAA8E,QAAQ;AAAA,IAC/F,KAAK;AACH,aAAO,qHAAqH,QAAQ;AAAA,IACtI,KAAK;AACH,aAAO,4GAA4G,QAAQ;AAAA,IAC7H,KAAK;AACH,aAAO,uIAAuI,QAAQ;AAAA,IACxJ;AACE,aAAO;AAAA,EACX;AACF;AAKO,SAAS,8BAA8B,SAA0C;AACtF,QAAM,QAAkB,CAAC;AAGzB,QAAM,eAAe,wBAAwB,OAAO;AACpD,MAAI,cAAc;AAChB,UAAM,KAAK,YAAY;AAAA,EACzB;AAGA,MAAI,QAAQ,iBAAiB,QAAQ,cAAc,SAAS,GAAG;AAC7D,UAAM,KAAK,yBAAyB,QAAQ,aAAa,CAAC;AAAA,EAC5D;AAGA,MAAI,QAAQ,mBAAmB;AAC7B,UAAM,KAAK,sBAAsB,CAAC;AAAA,EACpC;AAEA,MAAI,QAAQ,gBAAgB;AAC1B,UAAM,KAAK,mBAAmB,CAAC;AAAA,EACjC;AAEA,MAAI,QAAQ,mBAAmB;AAC7B,UAAM,KAAK,+BAA+B,CAAC;AAAA,EAC7C;AAGA,MAAI,QAAQ,kBAAkB,QAAQ,gBAAgB;AACpD,UAAM,KAAK,wBAAwB,OAAO,CAAC;AAAA,EAC7C;AAGA,MAAI,QAAQ,kBAAkB;AAC5B,UAAM,KAAK,oBAAoB,QAAQ,kBAAkB,QAAQ,YAAY,CAAC;AAAA,EAChF;AAEA,MAAI,MAAM,WAAW,GAAG;AACtB,WAAO;AAAA,EACT;AAEA,SAAO;AAAA;AAAA;AAAA;AAAA;AAAA,EAKP,MAAM,KAAK,MAAM,CAAC;AAAA;AAAA;AAAA,EAGlB,KAAK;AACP;AAKO,SAAS,mCACd,QACA,YACA,gBACQ;AACR,MAAI,SAAS,2BAA2B,QAAQ,UAAU;AAE1D,MAAI,gBAAgB;AAClB,UAAM,gBAAgB,8BAA8B,cAAc;AAClE,QAAI,eAAe;AACjB,eAAS,SAAS,SAAS;AAAA,IAC7B;AAAA,EACF;AAEA,SAAO;AACT;","names":[]}
|
package/dist/index.d.ts
CHANGED
|
@@ -93,6 +93,7 @@ interface LLMResponse<T = unknown> {
|
|
|
93
93
|
metrics: ResponseMetrics;
|
|
94
94
|
reasoning?: string;
|
|
95
95
|
reasoning_details?: ReasoningDetail[];
|
|
96
|
+
metadata?: LLMExtractedMetadata;
|
|
96
97
|
}
|
|
97
98
|
/** Provider capability flags */
|
|
98
99
|
interface ProviderCapabilities {
|
|
@@ -106,6 +107,60 @@ interface ProviderCapabilities {
|
|
|
106
107
|
}
|
|
107
108
|
/** JSON output mode */
|
|
108
109
|
type JsonMode = 'strict' | 'relaxed';
|
|
110
|
+
/**
|
|
111
|
+
* LLM-derived feature options that are implemented via prompting
|
|
112
|
+
* These options are normalized across providers and work through prompt engineering
|
|
113
|
+
*/
|
|
114
|
+
interface LLMDerivedOptions {
|
|
115
|
+
/** Format for text output (markdown, html, json, text) */
|
|
116
|
+
outputFormat?: 'markdown' | 'html' | 'json' | 'text';
|
|
117
|
+
/** Format for tables within text fields */
|
|
118
|
+
tableFormat?: 'markdown' | 'html' | 'csv';
|
|
119
|
+
/** Add page break markers (---) between pages */
|
|
120
|
+
pageMarkers?: boolean;
|
|
121
|
+
/** Include per-field confidence scores (attached to result, not in JSON) */
|
|
122
|
+
includeConfidence?: boolean;
|
|
123
|
+
/** Include source citations with bounding boxes (attached to result, not in JSON) */
|
|
124
|
+
includeSources?: boolean;
|
|
125
|
+
/** Include block type classification for each extracted element */
|
|
126
|
+
includeBlockTypes?: boolean;
|
|
127
|
+
/** Extract document headers (repeated content at top of pages) */
|
|
128
|
+
extractHeaders?: boolean;
|
|
129
|
+
/** Extract document footers (repeated content at bottom of pages) */
|
|
130
|
+
extractFooters?: boolean;
|
|
131
|
+
/** Document chunking strategy */
|
|
132
|
+
chunkingStrategy?: 'page' | 'section' | 'paragraph' | 'semantic';
|
|
133
|
+
/** Maximum chunk size in characters (when using chunking) */
|
|
134
|
+
maxChunkSize?: number;
|
|
135
|
+
/** Language hints for the document */
|
|
136
|
+
languageHints?: string[];
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* Extracted metadata from LLM response (populated when derived options are enabled)
|
|
140
|
+
*/
|
|
141
|
+
interface LLMExtractedMetadata {
|
|
142
|
+
/** Per-field confidence scores (0-1) */
|
|
143
|
+
confidence?: Record<string, number>;
|
|
144
|
+
/** Source citations with bounding boxes */
|
|
145
|
+
sources?: Array<{
|
|
146
|
+
field: string;
|
|
147
|
+
text: string;
|
|
148
|
+
bbox?: [number, number, number, number];
|
|
149
|
+
page?: number;
|
|
150
|
+
}>;
|
|
151
|
+
/** Block type classifications */
|
|
152
|
+
blockTypes?: Record<string, string>;
|
|
153
|
+
/** Extracted headers */
|
|
154
|
+
headers?: Array<{
|
|
155
|
+
text: string;
|
|
156
|
+
pages: number[];
|
|
157
|
+
}>;
|
|
158
|
+
/** Extracted footers */
|
|
159
|
+
footers?: Array<{
|
|
160
|
+
text: string;
|
|
161
|
+
pages: number[];
|
|
162
|
+
}>;
|
|
163
|
+
}
|
|
109
164
|
/** Provider interface */
|
|
110
165
|
interface LLMProvider {
|
|
111
166
|
readonly name: string;
|
|
@@ -117,6 +172,7 @@ interface LLMProvider {
|
|
|
117
172
|
max_tokens?: number;
|
|
118
173
|
reasoning?: ReasoningConfig;
|
|
119
174
|
embedSchemaInPrompt?: boolean;
|
|
175
|
+
derivedOptions?: LLMDerivedOptions;
|
|
120
176
|
}): Promise<LLMResponse<T>>;
|
|
121
177
|
}
|
|
122
178
|
/** Reasoning configuration (normalized across providers) */
|
|
@@ -263,6 +319,82 @@ declare function buildSchemaPromptSection(schema: JSONSchema): string;
|
|
|
263
319
|
* Combines schema prompt section with user's custom prompt
|
|
264
320
|
*/
|
|
265
321
|
declare function combineSchemaAndUserPrompt(schema: JSONSchema, userPrompt: string): string;
|
|
322
|
+
/**
|
|
323
|
+
* Output format types for LLM text generation
|
|
324
|
+
*/
|
|
325
|
+
type OutputFormat = 'markdown' | 'html' | 'json' | 'text';
|
|
326
|
+
type TableFormat = 'markdown' | 'html' | 'csv';
|
|
327
|
+
type ChunkingStrategy = 'page' | 'section' | 'paragraph' | 'semantic';
|
|
328
|
+
/**
|
|
329
|
+
* Options for LLM-derived features that are implemented via prompting
|
|
330
|
+
*/
|
|
331
|
+
interface LLMDerivedPromptOptions {
|
|
332
|
+
outputFormat?: OutputFormat;
|
|
333
|
+
tableFormat?: TableFormat;
|
|
334
|
+
pageMarkers?: boolean;
|
|
335
|
+
includeConfidence?: boolean;
|
|
336
|
+
includeSources?: boolean;
|
|
337
|
+
includeBlockTypes?: boolean;
|
|
338
|
+
extractHeaders?: boolean;
|
|
339
|
+
extractFooters?: boolean;
|
|
340
|
+
chunkingStrategy?: ChunkingStrategy;
|
|
341
|
+
maxChunkSize?: number;
|
|
342
|
+
languageHints?: string[];
|
|
343
|
+
}
|
|
344
|
+
/**
|
|
345
|
+
* Builds prompt additions for output format options
|
|
346
|
+
*/
|
|
347
|
+
declare function buildOutputFormatPrompt(options: LLMDerivedPromptOptions): string;
|
|
348
|
+
/**
|
|
349
|
+
* Builds prompt additions for language hints
|
|
350
|
+
*/
|
|
351
|
+
declare function buildLanguageHintsPrompt(languages: string[]): string;
|
|
352
|
+
/**
|
|
353
|
+
* Builds prompt additions for confidence scoring
|
|
354
|
+
*/
|
|
355
|
+
declare function buildConfidencePrompt(): string;
|
|
356
|
+
/**
|
|
357
|
+
* Builds prompt additions for source citations with bounding boxes
|
|
358
|
+
*/
|
|
359
|
+
declare function buildSourcesPrompt(): string;
|
|
360
|
+
/**
|
|
361
|
+
* Builds prompt additions for block type classification
|
|
362
|
+
*/
|
|
363
|
+
declare function buildBlockClassificationPrompt(): string;
|
|
364
|
+
/**
|
|
365
|
+
* Combines all LLM-derived feature prompts into a single prompt section
|
|
366
|
+
*/
|
|
367
|
+
declare function buildLLMDerivedFeaturesPrompt(options: LLMDerivedPromptOptions): string;
|
|
368
|
+
/**
|
|
369
|
+
* Combines schema prompt with user prompt and LLM-derived features
|
|
370
|
+
*/
|
|
371
|
+
declare function combineSchemaUserAndDerivedPrompts(schema: JSONSchema, userPrompt: string, derivedOptions?: LLMDerivedPromptOptions): string;
|
|
372
|
+
|
|
373
|
+
/**
|
|
374
|
+
* Utility for extracting metadata from LLM responses
|
|
375
|
+
* Handles the `_` prefixed fields that contain confidence, sources, etc.
|
|
376
|
+
*/
|
|
377
|
+
|
|
378
|
+
/**
|
|
379
|
+
* Extracts metadata fields from a JSON response and returns clean JSON + metadata
|
|
380
|
+
*
|
|
381
|
+
* @param json - The raw JSON response from the LLM (may contain _ prefixed fields)
|
|
382
|
+
* @returns Object with clean JSON (metadata removed) and extracted metadata
|
|
383
|
+
*/
|
|
384
|
+
declare function extractMetadataFromResponse<T>(json: unknown): {
|
|
385
|
+
json: T;
|
|
386
|
+
metadata?: LLMExtractedMetadata;
|
|
387
|
+
};
|
|
388
|
+
/**
|
|
389
|
+
* Checks if derived options require metadata extraction
|
|
390
|
+
*/
|
|
391
|
+
declare function shouldExtractMetadata(derivedOptions?: {
|
|
392
|
+
includeConfidence?: boolean;
|
|
393
|
+
includeSources?: boolean;
|
|
394
|
+
includeBlockTypes?: boolean;
|
|
395
|
+
extractHeaders?: boolean;
|
|
396
|
+
extractFooters?: boolean;
|
|
397
|
+
}): boolean;
|
|
266
398
|
|
|
267
399
|
/**
|
|
268
400
|
* Factory function type for creating provider instances
|
|
@@ -331,6 +463,7 @@ declare class OpenAIProvider implements LLMProvider {
|
|
|
331
463
|
max_tokens?: number;
|
|
332
464
|
reasoning?: ReasoningConfig;
|
|
333
465
|
embedSchemaInPrompt?: boolean;
|
|
466
|
+
derivedOptions?: LLMDerivedOptions;
|
|
334
467
|
}): Promise<LLMResponse<T>>;
|
|
335
468
|
private buildReasoningConfig;
|
|
336
469
|
private buildMessages;
|
|
@@ -355,6 +488,7 @@ declare class AnthropicProvider implements LLMProvider {
|
|
|
355
488
|
max_tokens?: number;
|
|
356
489
|
reasoning?: ReasoningConfig;
|
|
357
490
|
embedSchemaInPrompt?: boolean;
|
|
491
|
+
derivedOptions?: LLMDerivedOptions;
|
|
358
492
|
}): Promise<LLMResponse<T>>;
|
|
359
493
|
private buildNativeThinkingConfig;
|
|
360
494
|
private translateToOpenRouterFormat;
|
|
@@ -394,6 +528,7 @@ declare class GoogleProvider implements LLMProvider {
|
|
|
394
528
|
max_tokens?: number;
|
|
395
529
|
reasoning?: ReasoningConfig;
|
|
396
530
|
embedSchemaInPrompt?: boolean;
|
|
531
|
+
derivedOptions?: LLMDerivedOptions;
|
|
397
532
|
}): Promise<LLMResponse<T>>;
|
|
398
533
|
private buildNativeThinkingConfig;
|
|
399
534
|
private translateToOpenRouterFormat;
|
|
@@ -421,6 +556,7 @@ declare class XAIProvider implements LLMProvider {
|
|
|
421
556
|
max_tokens?: number;
|
|
422
557
|
reasoning?: ReasoningConfig;
|
|
423
558
|
embedSchemaInPrompt?: boolean;
|
|
559
|
+
derivedOptions?: LLMDerivedOptions;
|
|
424
560
|
}): Promise<LLMResponse<T>>;
|
|
425
561
|
private buildReasoningConfig;
|
|
426
562
|
private buildMessages;
|
|
@@ -445,8 +581,6 @@ declare class FallbackManager {
|
|
|
445
581
|
}): Promise<LLMResponse<T>>;
|
|
446
582
|
private createProvider;
|
|
447
583
|
private validateResponse;
|
|
448
|
-
private isRetryable;
|
|
449
|
-
private calculateDelay;
|
|
450
584
|
private sleep;
|
|
451
585
|
private isCircuitOpen;
|
|
452
586
|
private recordSuccess;
|
|
@@ -458,6 +592,98 @@ declare class FallbackManager {
|
|
|
458
592
|
*/
|
|
459
593
|
declare function adaptToCoreLLMProvider(provider: LLMProvider): LLMJsonProvider;
|
|
460
594
|
|
|
595
|
+
/**
|
|
596
|
+
* Schema for Gemini bounding box detection
|
|
597
|
+
* Used for OCR-style parsing with spatial information
|
|
598
|
+
*
|
|
599
|
+
* Note: Gemini uses [y_min, x_min, y_max, x_max] coordinate order (Y first, not X!)
|
|
600
|
+
* Coordinates are normalized to 0-1000 (divide by 1000, multiply by image dimensions)
|
|
601
|
+
*/
|
|
602
|
+
|
|
603
|
+
/**
|
|
604
|
+
* Block types for document structure classification
|
|
605
|
+
*/
|
|
606
|
+
declare const BLOCK_TYPES: readonly ["title", "paragraph", "table", "list", "header", "footer", "caption", "code", "image", "form", "signature", "handwriting"];
|
|
607
|
+
type BlockType = typeof BLOCK_TYPES[number];
|
|
608
|
+
/**
|
|
609
|
+
* Single text block with bounding box
|
|
610
|
+
*/
|
|
611
|
+
interface GeminiBoundingBoxBlock {
|
|
612
|
+
/**
|
|
613
|
+
* Bounding box coordinates: [y_min, x_min, y_max, x_max]
|
|
614
|
+
* Normalized to 0-1000 (Gemini format)
|
|
615
|
+
*/
|
|
616
|
+
box_2d: [number, number, number, number];
|
|
617
|
+
/**
|
|
618
|
+
* Text content within the bounding box
|
|
619
|
+
*/
|
|
620
|
+
text: string;
|
|
621
|
+
/**
|
|
622
|
+
* Block type classification
|
|
623
|
+
*/
|
|
624
|
+
type: BlockType;
|
|
625
|
+
/**
|
|
626
|
+
* Confidence level (optional)
|
|
627
|
+
*/
|
|
628
|
+
confidence?: 'high' | 'medium' | 'low';
|
|
629
|
+
/**
|
|
630
|
+
* Page number (0-indexed, for multi-page documents)
|
|
631
|
+
*/
|
|
632
|
+
page?: number;
|
|
633
|
+
}
|
|
634
|
+
/**
|
|
635
|
+
* JSON Schema for Gemini bounding box extraction
|
|
636
|
+
* This schema is used with Gemini models to extract text with spatial information
|
|
637
|
+
*/
|
|
638
|
+
declare const geminiBoundingBoxSchema: UnifiedSchema<GeminiBoundingBoxBlock[]>;
|
|
639
|
+
/**
|
|
640
|
+
* Prompt for Gemini bounding box extraction
|
|
641
|
+
* This activates Gemini's spatial understanding capabilities
|
|
642
|
+
*/
|
|
643
|
+
declare const GEMINI_BBOX_EXTRACTION_PROMPT = "Analyze this document and extract all text with precise bounding box locations.\n\nFor each text block, provide:\n- box_2d: Bounding box as [y_min, x_min, y_max, x_max] normalized to 0-1000\n- text: The exact text content\n- type: Block classification (title, paragraph, table, list, header, footer, caption, code, image, form, signature, handwriting)\n- confidence: Your confidence level (high, medium, low)\n- page: Page number (0-indexed) for multi-page documents\n\nIMPORTANT coordinate format:\n- Use [y_min, x_min, y_max, x_max] order (Y coordinate first, then X)\n- Normalize all values to 0-1000 range (top-left is [0, 0], bottom-right is [1000, 1000])\n\nReturn ONLY a valid JSON array, no other text.";
|
|
644
|
+
/**
|
|
645
|
+
* Normalized bounding box format (0-1 range)
|
|
646
|
+
* This is the SDK's standard format after conversion from Gemini's 0-1000 format
|
|
647
|
+
*/
|
|
648
|
+
interface NormalizedBBox {
|
|
649
|
+
x: number;
|
|
650
|
+
y: number;
|
|
651
|
+
width: number;
|
|
652
|
+
height: number;
|
|
653
|
+
}
|
|
654
|
+
/**
|
|
655
|
+
* Convert Gemini 0-1000 coordinates to normalized 0-1 format
|
|
656
|
+
* Note: Gemini uses [y_min, x_min, y_max, x_max] order
|
|
657
|
+
*
|
|
658
|
+
* @param geminiBBox - Bounding box from Gemini [y_min, x_min, y_max, x_max] (0-1000)
|
|
659
|
+
* @returns Normalized bounding box with x, y, width, height (0-1)
|
|
660
|
+
*/
|
|
661
|
+
declare function normalizeGeminiBBox(geminiBBox: [number, number, number, number]): NormalizedBBox;
|
|
662
|
+
/**
|
|
663
|
+
* Convert normalized 0-1 format back to Gemini 0-1000 coordinates
|
|
664
|
+
*
|
|
665
|
+
* @param bbox - Normalized bounding box (0-1)
|
|
666
|
+
* @returns Gemini format [y_min, x_min, y_max, x_max] (0-1000)
|
|
667
|
+
*/
|
|
668
|
+
declare function toGeminiBBox(bbox: NormalizedBBox): [number, number, number, number];
|
|
669
|
+
/**
|
|
670
|
+
* Convert Gemini bounding box block to DocumentIR-compatible format
|
|
671
|
+
*/
|
|
672
|
+
interface DocumentBlock {
|
|
673
|
+
text: string;
|
|
674
|
+
bbox: NormalizedBBox;
|
|
675
|
+
type: BlockType;
|
|
676
|
+
confidence?: number;
|
|
677
|
+
page?: number;
|
|
678
|
+
}
|
|
679
|
+
/**
|
|
680
|
+
* Convert Gemini extraction result to DocumentIR blocks
|
|
681
|
+
*
|
|
682
|
+
* @param geminiBlocks - Raw blocks from Gemini extraction
|
|
683
|
+
* @returns Document blocks with normalized coordinates
|
|
684
|
+
*/
|
|
685
|
+
declare function convertGeminiBlocksToDocumentBlocks(geminiBlocks: GeminiBoundingBoxBlock[]): DocumentBlock[];
|
|
686
|
+
|
|
461
687
|
/**
|
|
462
688
|
* LLM Provider Metadata
|
|
463
689
|
*
|
|
@@ -1419,4 +1645,4 @@ declare function createVLMProvider(config: {
|
|
|
1419
1645
|
*/
|
|
1420
1646
|
declare function buildLLMProvider(config: FallbackConfig): VLMProvider;
|
|
1421
1647
|
|
|
1422
|
-
export { type AccessMethod, AnthropicProvider, type CircuitBreakerState, type FallbackConfig, FallbackManager, GoogleProvider, type ImageInput, type JsonMode, type LLMModelMetadata, type LLMProvider, type LLMProviderMetadata, type LLMProviderType, type LLMResponse, type MultimodalInput, type NodeType, OpenAIProvider, type PDFInput, PROVIDER_METADATA, type ProviderCapabilities, type ProviderConfig, type ProviderFactory, type ProviderInputType, type ProviderType, type ReasoningConfig, type ReasoningDetail, type ResourceLimits, type ResponseMetrics, SUPPORTED_IMAGE_TYPES, SchemaTranslator, type SupportedImageMimeType, type UnifiedSchema, XAIProvider, adaptToCoreLLMProvider, buildLLMProvider, buildSchemaPromptSection, combineSchemaAndUserPrompt, compareNativeVsOpenRouter, createProviderFromRegistry, createVLMProvider, estimateCost, formatSchemaForPrompt, getCheapestProvider, getProvidersForNode, isImageTypeSupported, isProviderCompatibleWithNode, providerRegistry, registerProvider, supportsPDFsInline };
|
|
1648
|
+
export { type AccessMethod, AnthropicProvider, BLOCK_TYPES, type BlockType, type CircuitBreakerState, type DocumentBlock, type FallbackConfig, FallbackManager, GEMINI_BBOX_EXTRACTION_PROMPT, type GeminiBoundingBoxBlock, GoogleProvider, type ImageInput, type JsonMode, type LLMDerivedOptions, type LLMExtractedMetadata, type LLMModelMetadata, type LLMProvider, type LLMProviderMetadata, type LLMProviderType, type LLMResponse, type MultimodalInput, type NodeType, type NormalizedBBox, OpenAIProvider, type PDFInput, PROVIDER_METADATA, type ProviderCapabilities, type ProviderConfig, type ProviderFactory, type ProviderInputType, type ProviderType, type ReasoningConfig, type ReasoningDetail, type ResourceLimits, type ResponseMetrics, SUPPORTED_IMAGE_TYPES, SchemaTranslator, type SupportedImageMimeType, type UnifiedSchema, XAIProvider, adaptToCoreLLMProvider, buildBlockClassificationPrompt, buildConfidencePrompt, buildLLMDerivedFeaturesPrompt, buildLLMProvider, buildLanguageHintsPrompt, buildOutputFormatPrompt, buildSchemaPromptSection, buildSourcesPrompt, combineSchemaAndUserPrompt, combineSchemaUserAndDerivedPrompts, compareNativeVsOpenRouter, convertGeminiBlocksToDocumentBlocks, createProviderFromRegistry, createVLMProvider, estimateCost, extractMetadataFromResponse, formatSchemaForPrompt, geminiBoundingBoxSchema, getCheapestProvider, getProvidersForNode, isImageTypeSupported, isProviderCompatibleWithNode, normalizeGeminiBBox, providerRegistry, registerProvider, shouldExtractMetadata, supportsPDFsInline, toGeminiBBox };
|