@doclo/providers-llm 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,291 @@
1
+ // src/schema-prompt-formatter.ts
2
+ function formatSchemaForPrompt(schema, indent = 0) {
3
+ if (!schema || typeof schema !== "object") {
4
+ return "";
5
+ }
6
+ const indentStr = " ".repeat(indent);
7
+ let result = "";
8
+ if (schema.type === "object" && schema.properties) {
9
+ const properties = schema.properties;
10
+ const required = schema.required || [];
11
+ for (const [fieldName, fieldSchema] of Object.entries(properties)) {
12
+ const isRequired = required.includes(fieldName);
13
+ const requiredMarker = isRequired ? " (REQUIRED)" : " (optional)";
14
+ result += `${indentStr}- \`${fieldName}\`${requiredMarker}`;
15
+ const type = getTypeDescription(fieldSchema);
16
+ if (type) {
17
+ result += `: ${type}`;
18
+ }
19
+ if (fieldSchema.description) {
20
+ result += `
21
+ ${indentStr} ${fieldSchema.description}`;
22
+ }
23
+ if (fieldSchema.enum) {
24
+ result += `
25
+ ${indentStr} Allowed values: ${fieldSchema.enum.map((v) => JSON.stringify(v)).join(", ")}`;
26
+ }
27
+ result += "\n";
28
+ if (fieldSchema.type === "object" && fieldSchema.properties) {
29
+ result += formatSchemaForPrompt(fieldSchema, indent + 1);
30
+ }
31
+ if (fieldSchema.type === "array" && fieldSchema.items) {
32
+ result += `${indentStr} Array items:
33
+ `;
34
+ const itemSchema = Array.isArray(fieldSchema.items) ? fieldSchema.items[0] : fieldSchema.items;
35
+ if (itemSchema && itemSchema.type === "object" && itemSchema.properties) {
36
+ result += formatSchemaForPrompt(itemSchema, indent + 2);
37
+ } else if (itemSchema) {
38
+ const itemType = getTypeDescription(itemSchema);
39
+ result += `${indentStr} ${itemType}
40
+ `;
41
+ }
42
+ }
43
+ }
44
+ }
45
+ return result;
46
+ }
47
+ function getTypeDescription(schema) {
48
+ if (!schema) return "any";
49
+ if (schema.type) {
50
+ const typeStr = Array.isArray(schema.type) ? schema.type.join(" | ") : schema.type;
51
+ if (typeStr === "array" || Array.isArray(schema.type) && schema.type.includes("array")) {
52
+ if (schema.items && !Array.isArray(schema.items) && schema.items.type) {
53
+ const itemType = Array.isArray(schema.items.type) ? schema.items.type.join(" | ") : schema.items.type;
54
+ return `array of ${itemType}`;
55
+ }
56
+ return "array";
57
+ }
58
+ if ((typeStr === "string" || Array.isArray(schema.type) && schema.type.includes("string")) && schema.format) {
59
+ const formatHints = {
60
+ "date": "YYYY-MM-DD",
61
+ "time": "HH:MM or HH:MM:SS",
62
+ "date-time": "YYYY-MM-DDTHH:MM:SS (ISO 8601)"
63
+ };
64
+ const hint = formatHints[schema.format];
65
+ if (hint) {
66
+ return `string (format: ${schema.format}, use ${hint})`;
67
+ }
68
+ return `string (format: ${schema.format})`;
69
+ }
70
+ return typeStr;
71
+ }
72
+ if (schema.anyOf) {
73
+ return schema.anyOf.map((s) => getTypeDescription(s)).join(" OR ");
74
+ }
75
+ if (schema.oneOf) {
76
+ return schema.oneOf.map((s) => getTypeDescription(s)).join(" OR ");
77
+ }
78
+ return "any";
79
+ }
80
+ function buildSchemaPromptSection(schema) {
81
+ const schemaFields = formatSchemaForPrompt(schema);
82
+ return `
83
+ ==================================================
84
+ CRITICAL: OUTPUT STRUCTURE REQUIREMENTS
85
+ ==================================================
86
+
87
+ YOU MUST RETURN JSON MATCHING THIS EXACT STRUCTURE:
88
+
89
+ ${schemaFields}
90
+
91
+ CRITICAL FIELD NAME REQUIREMENTS:
92
+ \u2713 Use EXACTLY the field names shown above (character-for-character match)
93
+ \u2713 Preserve the exact casing (e.g., "fullName", not "full_name" or "FullName")
94
+ \u2713 Do NOT abbreviate field names (e.g., "dob" instead of "dateOfBirth")
95
+ \u2713 Do NOT invent alternative names (e.g., "directorName" instead of "fullName")
96
+ \u2713 Do NOT use snake_case if the schema uses camelCase
97
+ \u2713 Do NOT flatten nested structures or rename nested fields
98
+ \u2713 The schema above is the SINGLE SOURCE OF TRUTH for field naming
99
+
100
+ MISSING DATA:
101
+ - If a required field has no data in the document, use null
102
+ - If an optional field has no data, you may omit it or use null
103
+ - Do NOT invent data that isn't in the document
104
+
105
+ ==================================================
106
+ `.trim();
107
+ }
108
+ function combineSchemaAndUserPrompt(schema, userPrompt) {
109
+ const schemaSection = buildSchemaPromptSection(schema);
110
+ if (!userPrompt || userPrompt.trim() === "") {
111
+ return schemaSection + "\n\nTASK: Extract structured data from the provided document.";
112
+ }
113
+ return schemaSection + "\n\n" + userPrompt;
114
+ }
115
+ function buildOutputFormatPrompt(options) {
116
+ const parts = [];
117
+ if (options.outputFormat) {
118
+ switch (options.outputFormat) {
119
+ case "markdown":
120
+ parts.push("Format all text content using markdown syntax. Use proper headings (#, ##, ###), lists (-, *), bold (**text**), and other markdown formatting where appropriate.");
121
+ break;
122
+ case "html":
123
+ parts.push("Format all text content as valid HTML. Use semantic tags like <p>, <h1>-<h6>, <ul>, <ol>, <strong>, <em> where appropriate.");
124
+ break;
125
+ case "json":
126
+ parts.push("For text fields that contain structured data, format them as embedded JSON strings where appropriate.");
127
+ break;
128
+ case "text":
129
+ parts.push("Return plain text without any formatting. No markdown, HTML, or other markup.");
130
+ break;
131
+ }
132
+ }
133
+ if (options.tableFormat) {
134
+ switch (options.tableFormat) {
135
+ case "markdown":
136
+ parts.push("Format all tables using markdown table syntax with | column separators and header row with ---.");
137
+ break;
138
+ case "html":
139
+ parts.push("Format all tables as HTML <table> elements with <thead>, <tbody>, <tr>, <th>, and <td> tags.");
140
+ break;
141
+ case "csv":
142
+ parts.push("Format all tables as CSV with headers in the first row and comma-separated values.");
143
+ break;
144
+ }
145
+ }
146
+ if (options.pageMarkers) {
147
+ parts.push('Insert "---" page break markers between content from different pages of the document.');
148
+ }
149
+ return parts.join("\n");
150
+ }
151
+ function buildLanguageHintsPrompt(languages) {
152
+ if (!languages || languages.length === 0) {
153
+ return "";
154
+ }
155
+ return `The document is written in ${languages.join(", ")}. Extract and preserve text in the original language(s).`;
156
+ }
157
+ function buildConfidencePrompt() {
158
+ return `
159
+ For each extracted field, assess your confidence level and include it in the "_confidence" object:
160
+ - Use a number from 0.0 to 1.0 where:
161
+ - 0.9-1.0: Very high confidence - text is clear and unambiguous
162
+ - 0.7-0.9: High confidence - minor ambiguity but likely correct
163
+ - 0.5-0.7: Medium confidence - some uncertainty or partial visibility
164
+ - 0.3-0.5: Low confidence - significant uncertainty
165
+ - 0.0-0.3: Very low confidence - guessing or text was unclear
166
+
167
+ Include "_confidence" as a sibling object mapping field paths to their scores.
168
+ Example: "_confidence": { "invoiceNumber": 0.95, "amount": 0.82 }
169
+ `.trim();
170
+ }
171
+ function buildSourcesPrompt() {
172
+ return `
173
+ For each extracted field, identify the source location in the document and include it in the "_sources" array:
174
+ Each source entry should contain:
175
+ - "field": The field name/path that was extracted
176
+ - "text": The exact text from the document used for extraction
177
+ - "bbox": Bounding box as [y_min, x_min, y_max, x_max] normalized to 0-1000 scale
178
+ - "page": Page number (0-indexed) where the text appears
179
+
180
+ Include "_sources" as a sibling array to your extracted data.
181
+ Example: "_sources": [{ "field": "invoiceNumber", "text": "INV-001", "bbox": [100, 50, 120, 150], "page": 0 }]
182
+ `.trim();
183
+ }
184
+ function buildBlockClassificationPrompt() {
185
+ return `
186
+ For each extracted element or text block, classify its type in a "_blockTypes" object:
187
+ - "title": Main document title or major section headers
188
+ - "heading": Section headings and subheadings
189
+ - "paragraph": Body text paragraphs
190
+ - "table": Tabular data
191
+ - "list": Bulleted or numbered lists
192
+ - "header": Page headers (repeated at top of pages)
193
+ - "footer": Page footers (repeated at bottom of pages)
194
+ - "caption": Image or figure captions
195
+ - "code": Code blocks or preformatted text
196
+
197
+ Include "_blockTypes" mapping field paths to their block type.
198
+ Example: "_blockTypes": { "summary": "paragraph", "items": "list" }
199
+ `.trim();
200
+ }
201
+ function buildHeaderFooterPrompt(options) {
202
+ const parts = [];
203
+ if (options.extractHeaders) {
204
+ parts.push('Identify and extract document headers (repeated content at the top of pages) into a "_headers" array.');
205
+ }
206
+ if (options.extractFooters) {
207
+ parts.push('Identify and extract document footers (repeated content at the bottom of pages, like page numbers) into a "_footers" array.');
208
+ }
209
+ if (parts.length > 0) {
210
+ parts.push('Each header/footer entry should include: { "text": "...", "pages": [0, 1, 2] } listing which pages contain it.');
211
+ }
212
+ return parts.join("\n");
213
+ }
214
+ function buildChunkingPrompt(strategy, maxChunkSize) {
215
+ const sizeNote = maxChunkSize ? ` Keep chunks under ${maxChunkSize} characters when possible.` : "";
216
+ switch (strategy) {
217
+ case "page":
218
+ return `Organize the extracted content by page. Include page number for each chunk.${sizeNote}`;
219
+ case "section":
220
+ return `Divide the document into logical sections based on headings and structure. Each section should be a coherent unit.${sizeNote}`;
221
+ case "paragraph":
222
+ return `Divide the content into individual paragraphs, preserving the natural paragraph breaks from the document.${sizeNote}`;
223
+ case "semantic":
224
+ return `Divide the document into semantically coherent chunks. Each chunk should be a self-contained unit of meaning that could stand alone.${sizeNote}`;
225
+ default:
226
+ return "";
227
+ }
228
+ }
229
+ function buildLLMDerivedFeaturesPrompt(options) {
230
+ const parts = [];
231
+ const formatPrompt = buildOutputFormatPrompt(options);
232
+ if (formatPrompt) {
233
+ parts.push(formatPrompt);
234
+ }
235
+ if (options.languageHints && options.languageHints.length > 0) {
236
+ parts.push(buildLanguageHintsPrompt(options.languageHints));
237
+ }
238
+ if (options.includeConfidence) {
239
+ parts.push(buildConfidencePrompt());
240
+ }
241
+ if (options.includeSources) {
242
+ parts.push(buildSourcesPrompt());
243
+ }
244
+ if (options.includeBlockTypes) {
245
+ parts.push(buildBlockClassificationPrompt());
246
+ }
247
+ if (options.extractHeaders || options.extractFooters) {
248
+ parts.push(buildHeaderFooterPrompt(options));
249
+ }
250
+ if (options.chunkingStrategy) {
251
+ parts.push(buildChunkingPrompt(options.chunkingStrategy, options.maxChunkSize));
252
+ }
253
+ if (parts.length === 0) {
254
+ return "";
255
+ }
256
+ return `
257
+ ==================================================
258
+ ADDITIONAL OUTPUT REQUIREMENTS
259
+ ==================================================
260
+
261
+ ${parts.join("\n\n")}
262
+
263
+ ==================================================
264
+ `.trim();
265
+ }
266
+ function combineSchemaUserAndDerivedPrompts(schema, userPrompt, derivedOptions) {
267
+ let result = combineSchemaAndUserPrompt(schema, userPrompt);
268
+ if (derivedOptions) {
269
+ const derivedPrompt = buildLLMDerivedFeaturesPrompt(derivedOptions);
270
+ if (derivedPrompt) {
271
+ result = result + "\n\n" + derivedPrompt;
272
+ }
273
+ }
274
+ return result;
275
+ }
276
+
277
+ export {
278
+ formatSchemaForPrompt,
279
+ buildSchemaPromptSection,
280
+ combineSchemaAndUserPrompt,
281
+ buildOutputFormatPrompt,
282
+ buildLanguageHintsPrompt,
283
+ buildConfidencePrompt,
284
+ buildSourcesPrompt,
285
+ buildBlockClassificationPrompt,
286
+ buildHeaderFooterPrompt,
287
+ buildChunkingPrompt,
288
+ buildLLMDerivedFeaturesPrompt,
289
+ combineSchemaUserAndDerivedPrompts
290
+ };
291
+ //# sourceMappingURL=chunk-7YPJIWRM.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/schema-prompt-formatter.ts"],"sourcesContent":["/**\n * Utility for converting JSON Schema to human-readable prompt text\n * that emphasizes exact field name requirements for structured extraction.\n */\n\n/**\n * JSON Schema type used for prompt formatting.\n * Uses a recursive structure to support nested schemas.\n */\nexport interface JSONSchema {\n type?: string | string[]; // Can be array for union types (e.g., [\"string\", \"null\"])\n properties?: Record<string, JSONSchema>;\n items?: JSONSchema | JSONSchema[]; // Can be array for tuple validation\n description?: string;\n required?: string[];\n enum?: (string | number | boolean | null)[];\n anyOf?: JSONSchema[];\n oneOf?: JSONSchema[];\n allOf?: JSONSchema[];\n format?: string;\n [key: string]: unknown; // Allow additional properties\n}\n\n/**\n * Formats a JSON Schema into prompt text that emphasizes exact field names.\n * This helps LLMs understand they must use the exact field names specified\n * in the schema, not invent their own based on document content.\n */\nexport function formatSchemaForPrompt(schema: JSONSchema, indent: number = 0): string {\n if (!schema || typeof schema !== 'object') {\n return '';\n }\n\n const indentStr = ' '.repeat(indent);\n let result = '';\n\n // Handle object type with properties\n if (schema.type === 'object' && schema.properties) {\n const properties = schema.properties;\n const required = schema.required || [];\n\n for (const [fieldName, fieldSchema] of Object.entries(properties)) {\n const isRequired = required.includes(fieldName);\n const requiredMarker = isRequired ? ' (REQUIRED)' : ' (optional)';\n\n // Field name in backticks to emphasize exactness\n result += `${indentStr}- \\`${fieldName}\\`${requiredMarker}`;\n\n // Type information\n const type = getTypeDescription(fieldSchema);\n if (type) {\n result += `: ${type}`;\n }\n\n // Description if available\n if (fieldSchema.description) {\n result += `\\n${indentStr} ${fieldSchema.description}`;\n }\n\n // Enum values if specified\n if (fieldSchema.enum) {\n result += `\\n${indentStr} Allowed values: ${fieldSchema.enum.map((v) => JSON.stringify(v)).join(', ')}`;\n }\n\n result += '\\n';\n\n // Nested object properties\n if (fieldSchema.type === 'object' && fieldSchema.properties) {\n result += formatSchemaForPrompt(fieldSchema, indent + 1);\n }\n\n // Array item schema\n if (fieldSchema.type === 'array' && fieldSchema.items) {\n result += `${indentStr} Array items:\\n`;\n // Handle both single schema and tuple schemas (array of schemas)\n const itemSchema = Array.isArray(fieldSchema.items)\n ? fieldSchema.items[0] // For tuple validation, describe first item type\n : fieldSchema.items;\n if (itemSchema && itemSchema.type === 'object' && itemSchema.properties) {\n result += formatSchemaForPrompt(itemSchema, indent + 2);\n } else if (itemSchema) {\n const itemType = getTypeDescription(itemSchema);\n result += `${indentStr} ${itemType}\\n`;\n }\n }\n }\n }\n\n return result;\n}\n\n/**\n * Gets a human-readable type description from a schema property\n */\nfunction getTypeDescription(schema: JSONSchema): string {\n if (!schema) return 'any';\n\n if (schema.type) {\n // Handle array of types (e.g., [\"string\", \"null\"])\n const typeStr = Array.isArray(schema.type) ? schema.type.join(' | ') : schema.type;\n\n if (typeStr === 'array' || (Array.isArray(schema.type) && schema.type.includes('array'))) {\n if (schema.items && !Array.isArray(schema.items) && schema.items.type) {\n const itemType = Array.isArray(schema.items.type)\n ? schema.items.type.join(' | ')\n : schema.items.type;\n return `array of ${itemType}`;\n }\n return 'array';\n }\n // Include format information for strings (e.g., date, time, date-time, email, uri)\n if ((typeStr === 'string' || (Array.isArray(schema.type) && schema.type.includes('string'))) && schema.format) {\n const formatHints: Record<string, string> = {\n 'date': 'YYYY-MM-DD',\n 'time': 'HH:MM or HH:MM:SS',\n 'date-time': 'YYYY-MM-DDTHH:MM:SS (ISO 8601)',\n };\n const hint = formatHints[schema.format];\n if (hint) {\n return `string (format: ${schema.format}, use ${hint})`;\n }\n return `string (format: ${schema.format})`;\n }\n return typeStr;\n }\n\n // Handle anyOf, oneOf, allOf\n if (schema.anyOf) {\n return schema.anyOf.map((s) => getTypeDescription(s)).join(' OR ');\n }\n if (schema.oneOf) {\n return schema.oneOf.map((s) => getTypeDescription(s)).join(' OR ');\n }\n\n return 'any';\n}\n\n/**\n * Generates a complete prompt section with schema information and\n * strict field name instructions.\n */\nexport function buildSchemaPromptSection(schema: JSONSchema): string {\n const schemaFields = formatSchemaForPrompt(schema);\n\n return `\n==================================================\nCRITICAL: OUTPUT STRUCTURE REQUIREMENTS\n==================================================\n\nYOU MUST RETURN JSON MATCHING THIS EXACT STRUCTURE:\n\n${schemaFields}\n\nCRITICAL FIELD NAME REQUIREMENTS:\n✓ Use EXACTLY the field names shown above (character-for-character match)\n✓ Preserve the exact casing (e.g., \"fullName\", not \"full_name\" or \"FullName\")\n✓ Do NOT abbreviate field names (e.g., \"dob\" instead of \"dateOfBirth\")\n✓ Do NOT invent alternative names (e.g., \"directorName\" instead of \"fullName\")\n✓ Do NOT use snake_case if the schema uses camelCase\n✓ Do NOT flatten nested structures or rename nested fields\n✓ The schema above is the SINGLE SOURCE OF TRUTH for field naming\n\nMISSING DATA:\n- If a required field has no data in the document, use null\n- If an optional field has no data, you may omit it or use null\n- Do NOT invent data that isn't in the document\n\n==================================================\n`.trim();\n}\n\n/**\n * Combines schema prompt section with user's custom prompt\n */\nexport function combineSchemaAndUserPrompt(\n schema: JSONSchema,\n userPrompt: string\n): string {\n const schemaSection = buildSchemaPromptSection(schema);\n\n if (!userPrompt || userPrompt.trim() === '') {\n return schemaSection + '\\n\\nTASK: Extract structured data from the provided document.';\n }\n\n return schemaSection + '\\n\\n' + userPrompt;\n}\n\n// ============================================================================\n// LLM-Derived Feature Prompts\n// ============================================================================\n\n/**\n * Output format types for LLM text generation\n */\nexport type OutputFormat = 'markdown' | 'html' | 'json' | 'text';\nexport type TableFormat = 'markdown' | 'html' | 'csv';\nexport type ChunkingStrategy = 'page' | 'section' | 'paragraph' | 'semantic';\n\n/**\n * Options for LLM-derived features that are implemented via prompting\n */\nexport interface LLMDerivedPromptOptions {\n outputFormat?: OutputFormat;\n tableFormat?: TableFormat;\n pageMarkers?: boolean;\n includeConfidence?: boolean;\n includeSources?: boolean;\n includeBlockTypes?: boolean;\n extractHeaders?: boolean;\n extractFooters?: boolean;\n chunkingStrategy?: ChunkingStrategy;\n maxChunkSize?: number;\n languageHints?: string[];\n}\n\n/**\n * Builds prompt additions for output format options\n */\nexport function buildOutputFormatPrompt(options: LLMDerivedPromptOptions): string {\n const parts: string[] = [];\n\n // Output format\n if (options.outputFormat) {\n switch (options.outputFormat) {\n case 'markdown':\n parts.push('Format all text content using markdown syntax. Use proper headings (#, ##, ###), lists (-, *), bold (**text**), and other markdown formatting where appropriate.');\n break;\n case 'html':\n parts.push('Format all text content as valid HTML. Use semantic tags like <p>, <h1>-<h6>, <ul>, <ol>, <strong>, <em> where appropriate.');\n break;\n case 'json':\n parts.push('For text fields that contain structured data, format them as embedded JSON strings where appropriate.');\n break;\n case 'text':\n parts.push('Return plain text without any formatting. No markdown, HTML, or other markup.');\n break;\n }\n }\n\n // Table format\n if (options.tableFormat) {\n switch (options.tableFormat) {\n case 'markdown':\n parts.push('Format all tables using markdown table syntax with | column separators and header row with ---.');\n break;\n case 'html':\n parts.push('Format all tables as HTML <table> elements with <thead>, <tbody>, <tr>, <th>, and <td> tags.');\n break;\n case 'csv':\n parts.push('Format all tables as CSV with headers in the first row and comma-separated values.');\n break;\n }\n }\n\n // Page markers\n if (options.pageMarkers) {\n parts.push('Insert \"---\" page break markers between content from different pages of the document.');\n }\n\n return parts.join('\\n');\n}\n\n/**\n * Builds prompt additions for language hints\n */\nexport function buildLanguageHintsPrompt(languages: string[]): string {\n if (!languages || languages.length === 0) {\n return '';\n }\n return `The document is written in ${languages.join(', ')}. Extract and preserve text in the original language(s).`;\n}\n\n/**\n * Builds prompt additions for confidence scoring\n */\nexport function buildConfidencePrompt(): string {\n return `\nFor each extracted field, assess your confidence level and include it in the \"_confidence\" object:\n- Use a number from 0.0 to 1.0 where:\n - 0.9-1.0: Very high confidence - text is clear and unambiguous\n - 0.7-0.9: High confidence - minor ambiguity but likely correct\n - 0.5-0.7: Medium confidence - some uncertainty or partial visibility\n - 0.3-0.5: Low confidence - significant uncertainty\n - 0.0-0.3: Very low confidence - guessing or text was unclear\n\nInclude \"_confidence\" as a sibling object mapping field paths to their scores.\nExample: \"_confidence\": { \"invoiceNumber\": 0.95, \"amount\": 0.82 }\n`.trim();\n}\n\n/**\n * Builds prompt additions for source citations with bounding boxes\n */\nexport function buildSourcesPrompt(): string {\n return `\nFor each extracted field, identify the source location in the document and include it in the \"_sources\" array:\nEach source entry should contain:\n- \"field\": The field name/path that was extracted\n- \"text\": The exact text from the document used for extraction\n- \"bbox\": Bounding box as [y_min, x_min, y_max, x_max] normalized to 0-1000 scale\n- \"page\": Page number (0-indexed) where the text appears\n\nInclude \"_sources\" as a sibling array to your extracted data.\nExample: \"_sources\": [{ \"field\": \"invoiceNumber\", \"text\": \"INV-001\", \"bbox\": [100, 50, 120, 150], \"page\": 0 }]\n`.trim();\n}\n\n/**\n * Builds prompt additions for block type classification\n */\nexport function buildBlockClassificationPrompt(): string {\n return `\nFor each extracted element or text block, classify its type in a \"_blockTypes\" object:\n- \"title\": Main document title or major section headers\n- \"heading\": Section headings and subheadings\n- \"paragraph\": Body text paragraphs\n- \"table\": Tabular data\n- \"list\": Bulleted or numbered lists\n- \"header\": Page headers (repeated at top of pages)\n- \"footer\": Page footers (repeated at bottom of pages)\n- \"caption\": Image or figure captions\n- \"code\": Code blocks or preformatted text\n\nInclude \"_blockTypes\" mapping field paths to their block type.\nExample: \"_blockTypes\": { \"summary\": \"paragraph\", \"items\": \"list\" }\n`.trim();\n}\n\n/**\n * Builds prompt additions for header/footer extraction\n */\nexport function buildHeaderFooterPrompt(options: { extractHeaders?: boolean; extractFooters?: boolean }): string {\n const parts: string[] = [];\n\n if (options.extractHeaders) {\n parts.push('Identify and extract document headers (repeated content at the top of pages) into a \"_headers\" array.');\n }\n\n if (options.extractFooters) {\n parts.push('Identify and extract document footers (repeated content at the bottom of pages, like page numbers) into a \"_footers\" array.');\n }\n\n if (parts.length > 0) {\n parts.push('Each header/footer entry should include: { \"text\": \"...\", \"pages\": [0, 1, 2] } listing which pages contain it.');\n }\n\n return parts.join('\\n');\n}\n\n/**\n * Builds prompt additions for semantic chunking\n */\nexport function buildChunkingPrompt(strategy: ChunkingStrategy, maxChunkSize?: number): string {\n const sizeNote = maxChunkSize\n ? ` Keep chunks under ${maxChunkSize} characters when possible.`\n : '';\n\n switch (strategy) {\n case 'page':\n return `Organize the extracted content by page. Include page number for each chunk.${sizeNote}`;\n case 'section':\n return `Divide the document into logical sections based on headings and structure. Each section should be a coherent unit.${sizeNote}`;\n case 'paragraph':\n return `Divide the content into individual paragraphs, preserving the natural paragraph breaks from the document.${sizeNote}`;\n case 'semantic':\n return `Divide the document into semantically coherent chunks. Each chunk should be a self-contained unit of meaning that could stand alone.${sizeNote}`;\n default:\n return '';\n }\n}\n\n/**\n * Combines all LLM-derived feature prompts into a single prompt section\n */\nexport function buildLLMDerivedFeaturesPrompt(options: LLMDerivedPromptOptions): string {\n const parts: string[] = [];\n\n // Output format options\n const formatPrompt = buildOutputFormatPrompt(options);\n if (formatPrompt) {\n parts.push(formatPrompt);\n }\n\n // Language hints\n if (options.languageHints && options.languageHints.length > 0) {\n parts.push(buildLanguageHintsPrompt(options.languageHints));\n }\n\n // Metadata features (confidence, sources, block types)\n if (options.includeConfidence) {\n parts.push(buildConfidencePrompt());\n }\n\n if (options.includeSources) {\n parts.push(buildSourcesPrompt());\n }\n\n if (options.includeBlockTypes) {\n parts.push(buildBlockClassificationPrompt());\n }\n\n // Header/footer extraction\n if (options.extractHeaders || options.extractFooters) {\n parts.push(buildHeaderFooterPrompt(options));\n }\n\n // Chunking strategy\n if (options.chunkingStrategy) {\n parts.push(buildChunkingPrompt(options.chunkingStrategy, options.maxChunkSize));\n }\n\n if (parts.length === 0) {\n return '';\n }\n\n return `\n==================================================\nADDITIONAL OUTPUT REQUIREMENTS\n==================================================\n\n${parts.join('\\n\\n')}\n\n==================================================\n`.trim();\n}\n\n/**\n * Combines schema prompt with user prompt and LLM-derived features\n */\nexport function combineSchemaUserAndDerivedPrompts(\n schema: JSONSchema,\n userPrompt: string,\n derivedOptions?: LLMDerivedPromptOptions\n): string {\n let result = combineSchemaAndUserPrompt(schema, userPrompt);\n\n if (derivedOptions) {\n const derivedPrompt = buildLLMDerivedFeaturesPrompt(derivedOptions);\n if (derivedPrompt) {\n result = result + '\\n\\n' + derivedPrompt;\n }\n }\n\n return result;\n}\n"],"mappings":";AA4BO,SAAS,sBAAsB,QAAoB,SAAiB,GAAW;AACpF,MAAI,CAAC,UAAU,OAAO,WAAW,UAAU;AACzC,WAAO;AAAA,EACT;AAEA,QAAM,YAAY,KAAK,OAAO,MAAM;AACpC,MAAI,SAAS;AAGb,MAAI,OAAO,SAAS,YAAY,OAAO,YAAY;AACjD,UAAM,aAAa,OAAO;AAC1B,UAAM,WAAW,OAAO,YAAY,CAAC;AAErC,eAAW,CAAC,WAAW,WAAW,KAAK,OAAO,QAAQ,UAAU,GAAG;AACjE,YAAM,aAAa,SAAS,SAAS,SAAS;AAC9C,YAAM,iBAAiB,aAAa,gBAAgB;AAGpD,gBAAU,GAAG,SAAS,OAAO,SAAS,KAAK,cAAc;AAGzD,YAAM,OAAO,mBAAmB,WAAW;AAC3C,UAAI,MAAM;AACR,kBAAU,KAAK,IAAI;AAAA,MACrB;AAGA,UAAI,YAAY,aAAa;AAC3B,kBAAU;AAAA,EAAK,SAAS,KAAK,YAAY,WAAW;AAAA,MACtD;AAGA,UAAI,YAAY,MAAM;AACpB,kBAAU;AAAA,EAAK,SAAS,qBAAqB,YAAY,KAAK,IAAI,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,EAAE,KAAK,IAAI,CAAC;AAAA,MACxG;AAEA,gBAAU;AAGV,UAAI,YAAY,SAAS,YAAY,YAAY,YAAY;AAC3D,kBAAU,sBAAsB,aAAa,SAAS,CAAC;AAAA,MACzD;AAGA,UAAI,YAAY,SAAS,WAAW,YAAY,OAAO;AACrD,kBAAU,GAAG,SAAS;AAAA;AAEtB,cAAM,aAAa,MAAM,QAAQ,YAAY,KAAK,IAC9C,YAAY,MAAM,CAAC,IACnB,YAAY;AAChB,YAAI,cAAc,WAAW,SAAS,YAAY,WAAW,YAAY;AACvE,oBAAU,sBAAsB,YAAY,SAAS,CAAC;AAAA,QACxD,WAAW,YAAY;AACrB,gBAAM,WAAW,mBAAmB,UAAU;AAC9C,oBAAU,GAAG,SAAS,OAAO,QAAQ;AAAA;AAAA,QACvC;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;AAKA,SAAS,mBAAmB,QAA4B;AACtD,MAAI,CAAC,OAAQ,QAAO;AAEpB,MAAI,OAAO,MAAM;AAEf,UAAM,UAAU,MAAM,QAAQ,OAAO,IAAI,IAAI,OAAO,KAAK,KAAK,KAAK,IAAI,OAAO;AAE9E,QAAI,YAAY,WAAY,MAAM,QAAQ,OAAO,IAAI,KAAK,OAAO,KAAK,SAAS,OAAO,GAAI;AACxF,UAAI,OAAO,SAAS,CAAC,MAAM,QAAQ,OAAO,KAAK,KAAK,OAAO,MAAM,MAAM;AACrE,cAAM,WAAW,MAAM,QAAQ,OAAO,MAAM,IAAI,IAC5C,OAAO,MAAM,KAAK,KAAK,KAAK,IAC5B,OAAO,MAAM;AACjB,eAAO,YAAY,QAAQ;AAAA,MAC7B;AACA,aAAO;AAAA,IACT;AAEA,SAAK,YAAY,YAAa,MAAM,QAAQ,OAAO,IAAI,KAAK,OAAO,KAAK,SAAS,QAAQ,MAAO,OAAO,QAAQ;AAC7G,YAAM,cAAsC;AAAA,QAC1C,QAAQ;AAAA,QACR,QAAQ;AAAA,QACR,aAAa;AAAA,MACf;AACA,YAAM,OAAO,YAAY,OAAO,MAAM;AACtC,UAAI,MAAM;AACR,eAAO,mBAAmB,OAAO,MAAM,SAAS,IAAI;AAAA,MACtD;AACA,aAAO,mBAAmB,OAAO,MAAM;AAAA,IACzC;AACA,WAAO;AAAA,EACT;AAGA,MAAI,OAAO,OAAO;AAChB,WAAO,OAAO,MAAM,IAAI,CAAC,MAAM,mBAAmB,CAAC,CAAC,EAAE,KAAK,MAAM;AAAA,EACnE;AACA,MAAI,OAAO,OAAO;AAChB,WAAO,OAAO,MAAM,IAAI,CAAC,MAAM,mBAAmB,CAAC,CAAC,EAAE,KAAK,MAAM;AAAA,EACnE;AAEA,SAAO;AACT;AAMO,SAAS,yBAAyB,QAA4B;AACnE,QAAM,eAAe,sBAAsB,MAAM;AAEjD,SAAO;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOP,YAAY;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAiBZ,KAAK;AACP;AAKO,SAAS,2BACd,QACA,YACQ;AACR,QAAM,gBAAgB,yBAAyB,MAAM;AAErD,MAAI,CAAC,cAAc,WAAW,KAAK,MAAM,IAAI;AAC3C,WAAO,gBAAgB;AAAA,EACzB;AAEA,SAAO,gBAAgB,SAAS;AAClC;AAiCO,SAAS,wBAAwB,SAA0C;AAChF,QAAM,QAAkB,CAAC;AAGzB,MAAI,QAAQ,cAAc;AACxB,YAAQ,QAAQ,cAAc;AAAA,MAC5B,KAAK;AACH,cAAM,KAAK,kKAAkK;AAC7K;AAAA,MACF,KAAK;AACH,cAAM,KAAK,6HAA6H;AACxI;AAAA,MACF,KAAK;AACH,cAAM,KAAK,uGAAuG;AAClH;AAAA,MACF,KAAK;AACH,cAAM,KAAK,+EAA+E;AAC1F;AAAA,IACJ;AAAA,EACF;AAGA,MAAI,QAAQ,aAAa;AACvB,YAAQ,QAAQ,aAAa;AAAA,MAC3B,KAAK;AACH,cAAM,KAAK,iGAAiG;AAC5G;AAAA,MACF,KAAK;AACH,cAAM,KAAK,8FAA8F;AACzG;AAAA,MACF,KAAK;AACH,cAAM,KAAK,oFAAoF;AAC/F;AAAA,IACJ;AAAA,EACF;AAGA,MAAI,QAAQ,aAAa;AACvB,UAAM,KAAK,uFAAuF;AAAA,EACpG;AAEA,SAAO,MAAM,KAAK,IAAI;AACxB;AAKO,SAAS,yBAAyB,WAA6B;AACpE,MAAI,CAAC,aAAa,UAAU,WAAW,GAAG;AACxC,WAAO;AAAA,EACT;AACA,SAAO,8BAA8B,UAAU,KAAK,IAAI,CAAC;AAC3D;AAKO,SAAS,wBAAgC;AAC9C,SAAO;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWP,KAAK;AACP;AAKO,SAAS,qBAA6B;AAC3C,SAAO;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUP,KAAK;AACP;AAKO,SAAS,iCAAyC;AACvD,SAAO;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAcP,KAAK;AACP;AAKO,SAAS,wBAAwB,SAAyE;AAC/G,QAAM,QAAkB,CAAC;AAEzB,MAAI,QAAQ,gBAAgB;AAC1B,UAAM,KAAK,uGAAuG;AAAA,EACpH;AAEA,MAAI,QAAQ,gBAAgB;AAC1B,UAAM,KAAK,6HAA6H;AAAA,EAC1I;AAEA,MAAI,MAAM,SAAS,GAAG;AACpB,UAAM,KAAK,gHAAgH;AAAA,EAC7H;AAEA,SAAO,MAAM,KAAK,IAAI;AACxB;AAKO,SAAS,oBAAoB,UAA4B,cAA+B;AAC7F,QAAM,WAAW,eACb,sBAAsB,YAAY,+BAClC;AAEJ,UAAQ,UAAU;AAAA,IAChB,KAAK;AACH,aAAO,8EAA8E,QAAQ;AAAA,IAC/F,KAAK;AACH,aAAO,qHAAqH,QAAQ;AAAA,IACtI,KAAK;AACH,aAAO,4GAA4G,QAAQ;AAAA,IAC7H,KAAK;AACH,aAAO,uIAAuI,QAAQ;AAAA,IACxJ;AACE,aAAO;AAAA,EACX;AACF;AAKO,SAAS,8BAA8B,SAA0C;AACtF,QAAM,QAAkB,CAAC;AAGzB,QAAM,eAAe,wBAAwB,OAAO;AACpD,MAAI,cAAc;AAChB,UAAM,KAAK,YAAY;AAAA,EACzB;AAGA,MAAI,QAAQ,iBAAiB,QAAQ,cAAc,SAAS,GAAG;AAC7D,UAAM,KAAK,yBAAyB,QAAQ,aAAa,CAAC;AAAA,EAC5D;AAGA,MAAI,QAAQ,mBAAmB;AAC7B,UAAM,KAAK,sBAAsB,CAAC;AAAA,EACpC;AAEA,MAAI,QAAQ,gBAAgB;AAC1B,UAAM,KAAK,mBAAmB,CAAC;AAAA,EACjC;AAEA,MAAI,QAAQ,mBAAmB;AAC7B,UAAM,KAAK,+BAA+B,CAAC;AAAA,EAC7C;AAGA,MAAI,QAAQ,kBAAkB,QAAQ,gBAAgB;AACpD,UAAM,KAAK,wBAAwB,OAAO,CAAC;AAAA,EAC7C;AAGA,MAAI,QAAQ,kBAAkB;AAC5B,UAAM,KAAK,oBAAoB,QAAQ,kBAAkB,QAAQ,YAAY,CAAC;AAAA,EAChF;AAEA,MAAI,MAAM,WAAW,GAAG;AACtB,WAAO;AAAA,EACT;AAEA,SAAO;AAAA;AAAA;AAAA;AAAA;AAAA,EAKP,MAAM,KAAK,MAAM,CAAC;AAAA;AAAA;AAAA,EAGlB,KAAK;AACP;AAKO,SAAS,mCACd,QACA,YACA,gBACQ;AACR,MAAI,SAAS,2BAA2B,QAAQ,UAAU;AAE1D,MAAI,gBAAgB;AAClB,UAAM,gBAAgB,8BAA8B,cAAc;AAClE,QAAI,eAAe;AACjB,eAAS,SAAS,SAAS;AAAA,IAC7B;AAAA,EACF;AAEA,SAAO;AACT;","names":[]}
package/dist/index.d.ts CHANGED
@@ -93,6 +93,7 @@ interface LLMResponse<T = unknown> {
93
93
  metrics: ResponseMetrics;
94
94
  reasoning?: string;
95
95
  reasoning_details?: ReasoningDetail[];
96
+ metadata?: LLMExtractedMetadata;
96
97
  }
97
98
  /** Provider capability flags */
98
99
  interface ProviderCapabilities {
@@ -106,6 +107,60 @@ interface ProviderCapabilities {
106
107
  }
107
108
  /** JSON output mode */
108
109
  type JsonMode = 'strict' | 'relaxed';
110
+ /**
111
+ * LLM-derived feature options that are implemented via prompting
112
+ * These options are normalized across providers and work through prompt engineering
113
+ */
114
+ interface LLMDerivedOptions {
115
+ /** Format for text output (markdown, html, json, text) */
116
+ outputFormat?: 'markdown' | 'html' | 'json' | 'text';
117
+ /** Format for tables within text fields */
118
+ tableFormat?: 'markdown' | 'html' | 'csv';
119
+ /** Add page break markers (---) between pages */
120
+ pageMarkers?: boolean;
121
+ /** Include per-field confidence scores (attached to result, not in JSON) */
122
+ includeConfidence?: boolean;
123
+ /** Include source citations with bounding boxes (attached to result, not in JSON) */
124
+ includeSources?: boolean;
125
+ /** Include block type classification for each extracted element */
126
+ includeBlockTypes?: boolean;
127
+ /** Extract document headers (repeated content at top of pages) */
128
+ extractHeaders?: boolean;
129
+ /** Extract document footers (repeated content at bottom of pages) */
130
+ extractFooters?: boolean;
131
+ /** Document chunking strategy */
132
+ chunkingStrategy?: 'page' | 'section' | 'paragraph' | 'semantic';
133
+ /** Maximum chunk size in characters (when using chunking) */
134
+ maxChunkSize?: number;
135
+ /** Language hints for the document */
136
+ languageHints?: string[];
137
+ }
138
+ /**
139
+ * Extracted metadata from LLM response (populated when derived options are enabled)
140
+ */
141
+ interface LLMExtractedMetadata {
142
+ /** Per-field confidence scores (0-1) */
143
+ confidence?: Record<string, number>;
144
+ /** Source citations with bounding boxes */
145
+ sources?: Array<{
146
+ field: string;
147
+ text: string;
148
+ bbox?: [number, number, number, number];
149
+ page?: number;
150
+ }>;
151
+ /** Block type classifications */
152
+ blockTypes?: Record<string, string>;
153
+ /** Extracted headers */
154
+ headers?: Array<{
155
+ text: string;
156
+ pages: number[];
157
+ }>;
158
+ /** Extracted footers */
159
+ footers?: Array<{
160
+ text: string;
161
+ pages: number[];
162
+ }>;
163
+ }
109
164
  /** Provider interface */
110
165
  interface LLMProvider {
111
166
  readonly name: string;
@@ -117,6 +172,7 @@ interface LLMProvider {
117
172
  max_tokens?: number;
118
173
  reasoning?: ReasoningConfig;
119
174
  embedSchemaInPrompt?: boolean;
175
+ derivedOptions?: LLMDerivedOptions;
120
176
  }): Promise<LLMResponse<T>>;
121
177
  }
122
178
  /** Reasoning configuration (normalized across providers) */
@@ -263,6 +319,82 @@ declare function buildSchemaPromptSection(schema: JSONSchema): string;
263
319
  * Combines schema prompt section with user's custom prompt
264
320
  */
265
321
  declare function combineSchemaAndUserPrompt(schema: JSONSchema, userPrompt: string): string;
322
+ /**
323
+ * Output format types for LLM text generation
324
+ */
325
+ type OutputFormat = 'markdown' | 'html' | 'json' | 'text';
326
+ type TableFormat = 'markdown' | 'html' | 'csv';
327
+ type ChunkingStrategy = 'page' | 'section' | 'paragraph' | 'semantic';
328
+ /**
329
+ * Options for LLM-derived features that are implemented via prompting
330
+ */
331
+ interface LLMDerivedPromptOptions {
332
+ outputFormat?: OutputFormat;
333
+ tableFormat?: TableFormat;
334
+ pageMarkers?: boolean;
335
+ includeConfidence?: boolean;
336
+ includeSources?: boolean;
337
+ includeBlockTypes?: boolean;
338
+ extractHeaders?: boolean;
339
+ extractFooters?: boolean;
340
+ chunkingStrategy?: ChunkingStrategy;
341
+ maxChunkSize?: number;
342
+ languageHints?: string[];
343
+ }
344
+ /**
345
+ * Builds prompt additions for output format options
346
+ */
347
+ declare function buildOutputFormatPrompt(options: LLMDerivedPromptOptions): string;
348
+ /**
349
+ * Builds prompt additions for language hints
350
+ */
351
+ declare function buildLanguageHintsPrompt(languages: string[]): string;
352
+ /**
353
+ * Builds prompt additions for confidence scoring
354
+ */
355
+ declare function buildConfidencePrompt(): string;
356
+ /**
357
+ * Builds prompt additions for source citations with bounding boxes
358
+ */
359
+ declare function buildSourcesPrompt(): string;
360
+ /**
361
+ * Builds prompt additions for block type classification
362
+ */
363
+ declare function buildBlockClassificationPrompt(): string;
364
+ /**
365
+ * Combines all LLM-derived feature prompts into a single prompt section
366
+ */
367
+ declare function buildLLMDerivedFeaturesPrompt(options: LLMDerivedPromptOptions): string;
368
+ /**
369
+ * Combines schema prompt with user prompt and LLM-derived features
370
+ */
371
+ declare function combineSchemaUserAndDerivedPrompts(schema: JSONSchema, userPrompt: string, derivedOptions?: LLMDerivedPromptOptions): string;
372
+
373
+ /**
374
+ * Utility for extracting metadata from LLM responses
375
+ * Handles the `_` prefixed fields that contain confidence, sources, etc.
376
+ */
377
+
378
+ /**
379
+ * Extracts metadata fields from a JSON response and returns clean JSON + metadata
380
+ *
381
+ * @param json - The raw JSON response from the LLM (may contain _ prefixed fields)
382
+ * @returns Object with clean JSON (metadata removed) and extracted metadata
383
+ */
384
+ declare function extractMetadataFromResponse<T>(json: unknown): {
385
+ json: T;
386
+ metadata?: LLMExtractedMetadata;
387
+ };
388
+ /**
389
+ * Checks if derived options require metadata extraction
390
+ */
391
+ declare function shouldExtractMetadata(derivedOptions?: {
392
+ includeConfidence?: boolean;
393
+ includeSources?: boolean;
394
+ includeBlockTypes?: boolean;
395
+ extractHeaders?: boolean;
396
+ extractFooters?: boolean;
397
+ }): boolean;
266
398
 
267
399
  /**
268
400
  * Factory function type for creating provider instances
@@ -331,6 +463,7 @@ declare class OpenAIProvider implements LLMProvider {
331
463
  max_tokens?: number;
332
464
  reasoning?: ReasoningConfig;
333
465
  embedSchemaInPrompt?: boolean;
466
+ derivedOptions?: LLMDerivedOptions;
334
467
  }): Promise<LLMResponse<T>>;
335
468
  private buildReasoningConfig;
336
469
  private buildMessages;
@@ -355,6 +488,7 @@ declare class AnthropicProvider implements LLMProvider {
355
488
  max_tokens?: number;
356
489
  reasoning?: ReasoningConfig;
357
490
  embedSchemaInPrompt?: boolean;
491
+ derivedOptions?: LLMDerivedOptions;
358
492
  }): Promise<LLMResponse<T>>;
359
493
  private buildNativeThinkingConfig;
360
494
  private translateToOpenRouterFormat;
@@ -394,6 +528,7 @@ declare class GoogleProvider implements LLMProvider {
394
528
  max_tokens?: number;
395
529
  reasoning?: ReasoningConfig;
396
530
  embedSchemaInPrompt?: boolean;
531
+ derivedOptions?: LLMDerivedOptions;
397
532
  }): Promise<LLMResponse<T>>;
398
533
  private buildNativeThinkingConfig;
399
534
  private translateToOpenRouterFormat;
@@ -421,6 +556,7 @@ declare class XAIProvider implements LLMProvider {
421
556
  max_tokens?: number;
422
557
  reasoning?: ReasoningConfig;
423
558
  embedSchemaInPrompt?: boolean;
559
+ derivedOptions?: LLMDerivedOptions;
424
560
  }): Promise<LLMResponse<T>>;
425
561
  private buildReasoningConfig;
426
562
  private buildMessages;
@@ -445,8 +581,6 @@ declare class FallbackManager {
445
581
  }): Promise<LLMResponse<T>>;
446
582
  private createProvider;
447
583
  private validateResponse;
448
- private isRetryable;
449
- private calculateDelay;
450
584
  private sleep;
451
585
  private isCircuitOpen;
452
586
  private recordSuccess;
@@ -458,6 +592,98 @@ declare class FallbackManager {
458
592
  */
459
593
  declare function adaptToCoreLLMProvider(provider: LLMProvider): LLMJsonProvider;
460
594
 
595
+ /**
596
+ * Schema for Gemini bounding box detection
597
+ * Used for OCR-style parsing with spatial information
598
+ *
599
+ * Note: Gemini uses [y_min, x_min, y_max, x_max] coordinate order (Y first, not X!)
600
+ * Coordinates are normalized to 0-1000 (divide by 1000, multiply by image dimensions)
601
+ */
602
+
603
+ /**
604
+ * Block types for document structure classification
605
+ */
606
+ declare const BLOCK_TYPES: readonly ["title", "paragraph", "table", "list", "header", "footer", "caption", "code", "image", "form", "signature", "handwriting"];
607
+ type BlockType = typeof BLOCK_TYPES[number];
608
+ /**
609
+ * Single text block with bounding box
610
+ */
611
+ interface GeminiBoundingBoxBlock {
612
+ /**
613
+ * Bounding box coordinates: [y_min, x_min, y_max, x_max]
614
+ * Normalized to 0-1000 (Gemini format)
615
+ */
616
+ box_2d: [number, number, number, number];
617
+ /**
618
+ * Text content within the bounding box
619
+ */
620
+ text: string;
621
+ /**
622
+ * Block type classification
623
+ */
624
+ type: BlockType;
625
+ /**
626
+ * Confidence level (optional)
627
+ */
628
+ confidence?: 'high' | 'medium' | 'low';
629
+ /**
630
+ * Page number (0-indexed, for multi-page documents)
631
+ */
632
+ page?: number;
633
+ }
634
+ /**
635
+ * JSON Schema for Gemini bounding box extraction
636
+ * This schema is used with Gemini models to extract text with spatial information
637
+ */
638
+ declare const geminiBoundingBoxSchema: UnifiedSchema<GeminiBoundingBoxBlock[]>;
639
+ /**
640
+ * Prompt for Gemini bounding box extraction
641
+ * This activates Gemini's spatial understanding capabilities
642
+ */
643
+ declare const GEMINI_BBOX_EXTRACTION_PROMPT = "Analyze this document and extract all text with precise bounding box locations.\n\nFor each text block, provide:\n- box_2d: Bounding box as [y_min, x_min, y_max, x_max] normalized to 0-1000\n- text: The exact text content\n- type: Block classification (title, paragraph, table, list, header, footer, caption, code, image, form, signature, handwriting)\n- confidence: Your confidence level (high, medium, low)\n- page: Page number (0-indexed) for multi-page documents\n\nIMPORTANT coordinate format:\n- Use [y_min, x_min, y_max, x_max] order (Y coordinate first, then X)\n- Normalize all values to 0-1000 range (top-left is [0, 0], bottom-right is [1000, 1000])\n\nReturn ONLY a valid JSON array, no other text.";
644
+ /**
645
+ * Normalized bounding box format (0-1 range)
646
+ * This is the SDK's standard format after conversion from Gemini's 0-1000 format
647
+ */
648
+ interface NormalizedBBox {
649
+ x: number;
650
+ y: number;
651
+ width: number;
652
+ height: number;
653
+ }
654
+ /**
655
+ * Convert Gemini 0-1000 coordinates to normalized 0-1 format
656
+ * Note: Gemini uses [y_min, x_min, y_max, x_max] order
657
+ *
658
+ * @param geminiBBox - Bounding box from Gemini [y_min, x_min, y_max, x_max] (0-1000)
659
+ * @returns Normalized bounding box with x, y, width, height (0-1)
660
+ */
661
+ declare function normalizeGeminiBBox(geminiBBox: [number, number, number, number]): NormalizedBBox;
662
+ /**
663
+ * Convert normalized 0-1 format back to Gemini 0-1000 coordinates
664
+ *
665
+ * @param bbox - Normalized bounding box (0-1)
666
+ * @returns Gemini format [y_min, x_min, y_max, x_max] (0-1000)
667
+ */
668
+ declare function toGeminiBBox(bbox: NormalizedBBox): [number, number, number, number];
669
+ /**
670
+ * Convert Gemini bounding box block to DocumentIR-compatible format
671
+ */
672
+ interface DocumentBlock {
673
+ text: string;
674
+ bbox: NormalizedBBox;
675
+ type: BlockType;
676
+ confidence?: number;
677
+ page?: number;
678
+ }
679
+ /**
680
+ * Convert Gemini extraction result to DocumentIR blocks
681
+ *
682
+ * @param geminiBlocks - Raw blocks from Gemini extraction
683
+ * @returns Document blocks with normalized coordinates
684
+ */
685
+ declare function convertGeminiBlocksToDocumentBlocks(geminiBlocks: GeminiBoundingBoxBlock[]): DocumentBlock[];
686
+
461
687
  /**
462
688
  * LLM Provider Metadata
463
689
  *
@@ -1419,4 +1645,4 @@ declare function createVLMProvider(config: {
1419
1645
  */
1420
1646
  declare function buildLLMProvider(config: FallbackConfig): VLMProvider;
1421
1647
 
1422
- export { type AccessMethod, AnthropicProvider, type CircuitBreakerState, type FallbackConfig, FallbackManager, GoogleProvider, type ImageInput, type JsonMode, type LLMModelMetadata, type LLMProvider, type LLMProviderMetadata, type LLMProviderType, type LLMResponse, type MultimodalInput, type NodeType, OpenAIProvider, type PDFInput, PROVIDER_METADATA, type ProviderCapabilities, type ProviderConfig, type ProviderFactory, type ProviderInputType, type ProviderType, type ReasoningConfig, type ReasoningDetail, type ResourceLimits, type ResponseMetrics, SUPPORTED_IMAGE_TYPES, SchemaTranslator, type SupportedImageMimeType, type UnifiedSchema, XAIProvider, adaptToCoreLLMProvider, buildLLMProvider, buildSchemaPromptSection, combineSchemaAndUserPrompt, compareNativeVsOpenRouter, createProviderFromRegistry, createVLMProvider, estimateCost, formatSchemaForPrompt, getCheapestProvider, getProvidersForNode, isImageTypeSupported, isProviderCompatibleWithNode, providerRegistry, registerProvider, supportsPDFsInline };
1648
+ export { type AccessMethod, AnthropicProvider, BLOCK_TYPES, type BlockType, type CircuitBreakerState, type DocumentBlock, type FallbackConfig, FallbackManager, GEMINI_BBOX_EXTRACTION_PROMPT, type GeminiBoundingBoxBlock, GoogleProvider, type ImageInput, type JsonMode, type LLMDerivedOptions, type LLMExtractedMetadata, type LLMModelMetadata, type LLMProvider, type LLMProviderMetadata, type LLMProviderType, type LLMResponse, type MultimodalInput, type NodeType, type NormalizedBBox, OpenAIProvider, type PDFInput, PROVIDER_METADATA, type ProviderCapabilities, type ProviderConfig, type ProviderFactory, type ProviderInputType, type ProviderType, type ReasoningConfig, type ReasoningDetail, type ResourceLimits, type ResponseMetrics, SUPPORTED_IMAGE_TYPES, SchemaTranslator, type SupportedImageMimeType, type UnifiedSchema, XAIProvider, adaptToCoreLLMProvider, buildBlockClassificationPrompt, buildConfidencePrompt, buildLLMDerivedFeaturesPrompt, buildLLMProvider, buildLanguageHintsPrompt, buildOutputFormatPrompt, buildSchemaPromptSection, buildSourcesPrompt, combineSchemaAndUserPrompt, combineSchemaUserAndDerivedPrompts, compareNativeVsOpenRouter, convertGeminiBlocksToDocumentBlocks, createProviderFromRegistry, createVLMProvider, estimateCost, extractMetadataFromResponse, formatSchemaForPrompt, geminiBoundingBoxSchema, getCheapestProvider, getProvidersForNode, isImageTypeSupported, isProviderCompatibleWithNode, normalizeGeminiBBox, providerRegistry, registerProvider, shouldExtractMetadata, supportsPDFsInline, toGeminiBBox };