@doclo/providers-llm 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,3 +1,16 @@
1
+ import {
2
+ buildBlockClassificationPrompt,
3
+ buildConfidencePrompt,
4
+ buildLLMDerivedFeaturesPrompt,
5
+ buildLanguageHintsPrompt,
6
+ buildOutputFormatPrompt,
7
+ buildSchemaPromptSection,
8
+ buildSourcesPrompt,
9
+ combineSchemaAndUserPrompt,
10
+ combineSchemaUserAndDerivedPrompts,
11
+ formatSchemaForPrompt
12
+ } from "./chunk-7YPJIWRM.js";
13
+
1
14
  // src/schema-translator.ts
2
15
  import { zodToJsonSchema } from "@alcyone-labs/zod-to-json-schema";
3
16
  var SchemaTranslator = class {
@@ -165,119 +178,72 @@ var SchemaTranslator = class {
165
178
  }
166
179
  };
167
180
 
168
- // src/schema-prompt-formatter.ts
169
- function formatSchemaForPrompt(schema, indent = 0) {
170
- if (!schema || typeof schema !== "object") {
171
- return "";
172
- }
173
- const indentStr = " ".repeat(indent);
174
- let result = "";
175
- if (schema.type === "object" && schema.properties) {
176
- const properties = schema.properties;
177
- const required = schema.required || [];
178
- for (const [fieldName, fieldSchema] of Object.entries(properties)) {
179
- const isRequired = required.includes(fieldName);
180
- const requiredMarker = isRequired ? " (REQUIRED)" : " (optional)";
181
- result += `${indentStr}- \`${fieldName}\`${requiredMarker}`;
182
- const type = getTypeDescription(fieldSchema);
183
- if (type) {
184
- result += `: ${type}`;
185
- }
186
- if (fieldSchema.description) {
187
- result += `
188
- ${indentStr} ${fieldSchema.description}`;
189
- }
190
- if (fieldSchema.enum) {
191
- result += `
192
- ${indentStr} Allowed values: ${fieldSchema.enum.map((v) => JSON.stringify(v)).join(", ")}`;
193
- }
194
- result += "\n";
195
- if (fieldSchema.type === "object" && fieldSchema.properties) {
196
- result += formatSchemaForPrompt(fieldSchema, indent + 1);
197
- }
198
- if (fieldSchema.type === "array" && fieldSchema.items) {
199
- result += `${indentStr} Array items:
200
- `;
201
- const itemSchema = Array.isArray(fieldSchema.items) ? fieldSchema.items[0] : fieldSchema.items;
202
- if (itemSchema && itemSchema.type === "object" && itemSchema.properties) {
203
- result += formatSchemaForPrompt(itemSchema, indent + 2);
204
- } else if (itemSchema) {
205
- const itemType = getTypeDescription(itemSchema);
206
- result += `${indentStr} ${itemType}
207
- `;
208
- }
209
- }
181
+ // src/metadata-extractor.ts
182
+ var METADATA_FIELDS = [
183
+ "_confidence",
184
+ "_sources",
185
+ "_blockTypes",
186
+ "_headers",
187
+ "_footers"
188
+ ];
189
+ function extractMetadataFromResponse(json) {
190
+ if (!json || typeof json !== "object") {
191
+ return { json };
192
+ }
193
+ const rawJson = json;
194
+ const metadata = {};
195
+ let hasMetadata = false;
196
+ if ("_confidence" in rawJson && rawJson._confidence) {
197
+ const confidence = rawJson._confidence;
198
+ if (typeof confidence === "object" && !Array.isArray(confidence)) {
199
+ metadata.confidence = confidence;
200
+ hasMetadata = true;
210
201
  }
211
202
  }
212
- return result;
213
- }
214
- function getTypeDescription(schema) {
215
- if (!schema) return "any";
216
- if (schema.type) {
217
- const typeStr = Array.isArray(schema.type) ? schema.type.join(" | ") : schema.type;
218
- if (typeStr === "array" || Array.isArray(schema.type) && schema.type.includes("array")) {
219
- if (schema.items && !Array.isArray(schema.items) && schema.items.type) {
220
- const itemType = Array.isArray(schema.items.type) ? schema.items.type.join(" | ") : schema.items.type;
221
- return `array of ${itemType}`;
222
- }
223
- return "array";
224
- }
225
- if ((typeStr === "string" || Array.isArray(schema.type) && schema.type.includes("string")) && schema.format) {
226
- const formatHints = {
227
- "date": "YYYY-MM-DD",
228
- "time": "HH:MM or HH:MM:SS",
229
- "date-time": "YYYY-MM-DDTHH:MM:SS (ISO 8601)"
230
- };
231
- const hint = formatHints[schema.format];
232
- if (hint) {
233
- return `string (format: ${schema.format}, use ${hint})`;
234
- }
235
- return `string (format: ${schema.format})`;
203
+ if ("_sources" in rawJson && Array.isArray(rawJson._sources)) {
204
+ metadata.sources = rawJson._sources.map((source) => ({
205
+ field: source.field || source.fieldPath || "",
206
+ text: source.text || source.sourceText || "",
207
+ bbox: source.bbox || source.box_2d,
208
+ page: source.page
209
+ }));
210
+ hasMetadata = true;
211
+ }
212
+ if ("_blockTypes" in rawJson && rawJson._blockTypes) {
213
+ const blockTypes = rawJson._blockTypes;
214
+ if (typeof blockTypes === "object" && !Array.isArray(blockTypes)) {
215
+ metadata.blockTypes = blockTypes;
216
+ hasMetadata = true;
236
217
  }
237
- return typeStr;
238
218
  }
239
- if (schema.anyOf) {
240
- return schema.anyOf.map((s) => getTypeDescription(s)).join(" OR ");
219
+ if ("_headers" in rawJson && Array.isArray(rawJson._headers)) {
220
+ metadata.headers = rawJson._headers.map((header) => ({
221
+ text: header.text || "",
222
+ pages: Array.isArray(header.pages) ? header.pages : []
223
+ }));
224
+ hasMetadata = true;
241
225
  }
242
- if (schema.oneOf) {
243
- return schema.oneOf.map((s) => getTypeDescription(s)).join(" OR ");
226
+ if ("_footers" in rawJson && Array.isArray(rawJson._footers)) {
227
+ metadata.footers = rawJson._footers.map((footer) => ({
228
+ text: footer.text || "",
229
+ pages: Array.isArray(footer.pages) ? footer.pages : []
230
+ }));
231
+ hasMetadata = true;
244
232
  }
245
- return "any";
246
- }
247
- function buildSchemaPromptSection(schema) {
248
- const schemaFields = formatSchemaForPrompt(schema);
249
- return `
250
- ==================================================
251
- CRITICAL: OUTPUT STRUCTURE REQUIREMENTS
252
- ==================================================
253
-
254
- YOU MUST RETURN JSON MATCHING THIS EXACT STRUCTURE:
255
-
256
- ${schemaFields}
257
-
258
- CRITICAL FIELD NAME REQUIREMENTS:
259
- \u2713 Use EXACTLY the field names shown above (character-for-character match)
260
- \u2713 Preserve the exact casing (e.g., "fullName", not "full_name" or "FullName")
261
- \u2713 Do NOT abbreviate field names (e.g., "dob" instead of "dateOfBirth")
262
- \u2713 Do NOT invent alternative names (e.g., "directorName" instead of "fullName")
263
- \u2713 Do NOT use snake_case if the schema uses camelCase
264
- \u2713 Do NOT flatten nested structures or rename nested fields
265
- \u2713 The schema above is the SINGLE SOURCE OF TRUTH for field naming
266
-
267
- MISSING DATA:
268
- - If a required field has no data in the document, use null
269
- - If an optional field has no data, you may omit it or use null
270
- - Do NOT invent data that isn't in the document
271
-
272
- ==================================================
273
- `.trim();
274
- }
275
- function combineSchemaAndUserPrompt(schema, userPrompt) {
276
- const schemaSection = buildSchemaPromptSection(schema);
277
- if (!userPrompt || userPrompt.trim() === "") {
278
- return schemaSection + "\n\nTASK: Extract structured data from the provided document.";
233
+ const cleanJson = {};
234
+ for (const [key, value] of Object.entries(rawJson)) {
235
+ if (!METADATA_FIELDS.includes(key)) {
236
+ cleanJson[key] = value;
237
+ }
279
238
  }
280
- return schemaSection + "\n\n" + userPrompt;
239
+ return {
240
+ json: cleanJson,
241
+ metadata: hasMetadata ? metadata : void 0
242
+ };
243
+ }
244
+ function shouldExtractMetadata(derivedOptions) {
245
+ if (!derivedOptions) return false;
246
+ return !!(derivedOptions.includeConfidence || derivedOptions.includeSources || derivedOptions.includeBlockTypes || derivedOptions.extractHeaders || derivedOptions.extractFooters);
281
247
  }
282
248
 
283
249
  // src/provider-registry.ts
@@ -381,11 +347,16 @@ var OpenAIProvider = class {
381
347
  if (mode === "strict" && !params.schema) {
382
348
  throw new Error('schema is required when mode is "strict"');
383
349
  }
350
+ const extractMetadata = shouldExtractMetadata(params.derivedOptions);
384
351
  const shouldEmbedSchema = params.embedSchemaInPrompt !== false && params.schema;
385
352
  let enhancedInput = params.input;
386
353
  if (shouldEmbedSchema) {
387
354
  const jsonSchema = this.translator.convertZodIfNeeded(params.schema);
388
- const enhancedText = combineSchemaAndUserPrompt(
355
+ const enhancedText = params.derivedOptions ? combineSchemaUserAndDerivedPrompts(
356
+ jsonSchema,
357
+ params.input.text || "",
358
+ params.derivedOptions
359
+ ) : combineSchemaAndUserPrompt(
389
360
  jsonSchema,
390
361
  params.input.text || ""
391
362
  );
@@ -393,6 +364,15 @@ var OpenAIProvider = class {
393
364
  ...params.input,
394
365
  text: enhancedText
395
366
  };
367
+ } else if (params.derivedOptions) {
368
+ const { buildLLMDerivedFeaturesPrompt: buildLLMDerivedFeaturesPrompt2 } = await import("./schema-prompt-formatter-AIORLWUF.js");
369
+ const derivedPrompt = buildLLMDerivedFeaturesPrompt2(params.derivedOptions);
370
+ if (derivedPrompt) {
371
+ enhancedInput = {
372
+ ...params.input,
373
+ text: (params.input.text || "") + "\n\n" + derivedPrompt
374
+ };
375
+ }
396
376
  }
397
377
  const messages = this.buildMessages(enhancedInput);
398
378
  const requestBody = {
@@ -473,7 +453,8 @@ var OpenAIProvider = class {
473
453
  const data = await response.json();
474
454
  const latencyMs = Date.now() - startTime;
475
455
  const content = data.choices?.[0]?.message?.content ?? "{}";
476
- const parsed = safeJsonParse(content);
456
+ const rawParsed = safeJsonParse(content);
457
+ const { json: parsed, metadata } = extractMetadata ? extractMetadataFromResponse(rawParsed) : { json: rawParsed, metadata: void 0 };
477
458
  const message = data.choices?.[0]?.message;
478
459
  const reasoning = message?.reasoning;
479
460
  const reasoning_details = message?.reasoning_details;
@@ -498,7 +479,8 @@ var OpenAIProvider = class {
498
479
  model: this.config.model
499
480
  },
500
481
  reasoning,
501
- reasoning_details
482
+ reasoning_details,
483
+ metadata
502
484
  };
503
485
  }
504
486
  buildReasoningConfig(reasoning) {
@@ -618,11 +600,16 @@ var AnthropicProvider = class {
618
600
  if (mode === "strict" && !params.schema) {
619
601
  throw new Error('schema is required when mode is "strict"');
620
602
  }
603
+ const extractMetadata = shouldExtractMetadata(params.derivedOptions);
621
604
  const shouldEmbedSchema = params.embedSchemaInPrompt !== false && params.schema;
622
605
  let enhancedInput = params.input;
623
606
  if (shouldEmbedSchema) {
624
607
  const jsonSchema = this.translator.convertZodIfNeeded(params.schema);
625
- const enhancedText = combineSchemaAndUserPrompt(
608
+ const enhancedText = params.derivedOptions ? combineSchemaUserAndDerivedPrompts(
609
+ jsonSchema,
610
+ params.input.text || "",
611
+ params.derivedOptions
612
+ ) : combineSchemaAndUserPrompt(
626
613
  jsonSchema,
627
614
  params.input.text || ""
628
615
  );
@@ -630,6 +617,15 @@ var AnthropicProvider = class {
630
617
  ...params.input,
631
618
  text: enhancedText
632
619
  };
620
+ } else if (params.derivedOptions) {
621
+ const { buildLLMDerivedFeaturesPrompt: buildLLMDerivedFeaturesPrompt2 } = await import("./schema-prompt-formatter-AIORLWUF.js");
622
+ const derivedPrompt = buildLLMDerivedFeaturesPrompt2(params.derivedOptions);
623
+ if (derivedPrompt) {
624
+ enhancedInput = {
625
+ ...params.input,
626
+ text: (params.input.text || "") + "\n\n" + derivedPrompt
627
+ };
628
+ }
633
629
  }
634
630
  const messages = await this.buildMessages(enhancedInput);
635
631
  const useNewStructuredOutputs = this.supportsNewStructuredOutputs();
@@ -703,10 +699,7 @@ var AnthropicProvider = class {
703
699
  }
704
700
  const data = await response.json();
705
701
  const message = data.choices?.[0]?.message;
706
- let content = message?.content ?? (useNewStructuredOutputs2 ? "{}" : "}");
707
- if (!useNewStructuredOutputs2) {
708
- content = "{" + content;
709
- }
702
+ let content = message?.content ?? "{}";
710
703
  const reasoning = message?.reasoning;
711
704
  const reasoning_details = message?.reasoning_details;
712
705
  content = content.replace(/^```json\s*\n?/, "").replace(/\n?```\s*$/, "").trim();
@@ -747,9 +740,10 @@ var AnthropicProvider = class {
747
740
  }
748
741
  const latencyMs = Date.now() - startTime;
749
742
  const baseProvider = extractProviderFromModel2(this.config.model, "anthropic");
743
+ const { json: cleanJson, metadata } = extractMetadata ? extractMetadataFromResponse(parsed) : { json: parsed, metadata: void 0 };
750
744
  return {
751
- json: parsed,
752
- rawText: JSON.stringify(parsed),
745
+ json: cleanJson,
746
+ rawText: JSON.stringify(cleanJson),
753
747
  metrics: {
754
748
  costUSD,
755
749
  inputTokens,
@@ -763,7 +757,8 @@ var AnthropicProvider = class {
763
757
  cacheReadInputTokens
764
758
  },
765
759
  reasoning,
766
- reasoning_details
760
+ reasoning_details,
761
+ metadata
767
762
  };
768
763
  } else {
769
764
  const endpoint = this.config.baseUrl || "https://api.anthropic.com/v1";
@@ -828,9 +823,10 @@ var AnthropicProvider = class {
828
823
  const reasoning = thinkingBlock?.thinking;
829
824
  const latencyMs = Date.now() - startTime;
830
825
  const baseProvider = extractProviderFromModel2(this.config.model, "anthropic");
826
+ const { json: cleanJson, metadata } = extractMetadata ? extractMetadataFromResponse(parsed) : { json: parsed, metadata: void 0 };
831
827
  return {
832
- json: parsed,
833
- rawText: JSON.stringify(parsed),
828
+ json: cleanJson,
829
+ rawText: JSON.stringify(cleanJson),
834
830
  metrics: {
835
831
  costUSD,
836
832
  inputTokens,
@@ -848,7 +844,8 @@ var AnthropicProvider = class {
848
844
  signature: null,
849
845
  id: "thinking-1",
850
846
  format: "anthropic-claude-v1"
851
- }] : void 0
847
+ }] : void 0,
848
+ metadata
852
849
  };
853
850
  }
854
851
  }
@@ -885,19 +882,13 @@ var AnthropicProvider = class {
885
882
  requestBody.response_format = {
886
883
  type: "json_object"
887
884
  };
888
- } else {
885
+ } else if (useNewStructuredOutputs) {
889
886
  const openRouterSchema = this.translator.toClaudeOpenRouterSchema(schema);
890
887
  const fixedSchema = this.fixSchemaForStrictMode(openRouterSchema);
891
888
  if (process.env.DEBUG_PROVIDERS) {
892
- console.log("[AnthropicProvider] Original schema:", JSON.stringify(openRouterSchema, null, 2));
889
+ console.log("[AnthropicProvider] Using json_schema (native support)");
893
890
  console.log("[AnthropicProvider] Fixed schema:", JSON.stringify(fixedSchema, null, 2));
894
891
  }
895
- if (!useNewStructuredOutputs) {
896
- messageArray.push({
897
- role: "assistant",
898
- content: "{"
899
- });
900
- }
901
892
  requestBody.response_format = {
902
893
  type: "json_schema",
903
894
  json_schema: {
@@ -906,6 +897,13 @@ var AnthropicProvider = class {
906
897
  schema: fixedSchema
907
898
  }
908
899
  };
900
+ } else {
901
+ if (process.env.DEBUG_PROVIDERS) {
902
+ console.log("[AnthropicProvider] Using json_object (legacy mode, schema in prompt)");
903
+ }
904
+ requestBody.response_format = {
905
+ type: "json_object"
906
+ };
909
907
  }
910
908
  if (reasoning) {
911
909
  requestBody.reasoning = this.buildReasoningConfig(reasoning, max_tokens);
@@ -1248,11 +1246,16 @@ var GoogleProvider = class {
1248
1246
  if (mode === "strict" && !params.schema) {
1249
1247
  throw new Error('schema is required when mode is "strict"');
1250
1248
  }
1249
+ const extractMetadata = shouldExtractMetadata(params.derivedOptions);
1251
1250
  const shouldEmbedSchema = params.embedSchemaInPrompt !== false && params.schema;
1252
1251
  let enhancedInput = normalizedInput;
1253
1252
  if (shouldEmbedSchema) {
1254
1253
  const jsonSchema = this.translator.convertZodIfNeeded(params.schema);
1255
- const enhancedText = combineSchemaAndUserPrompt(
1254
+ const enhancedText = params.derivedOptions ? combineSchemaUserAndDerivedPrompts(
1255
+ jsonSchema,
1256
+ normalizedInput.text || "",
1257
+ params.derivedOptions
1258
+ ) : combineSchemaAndUserPrompt(
1256
1259
  jsonSchema,
1257
1260
  normalizedInput.text || ""
1258
1261
  );
@@ -1260,6 +1263,15 @@ var GoogleProvider = class {
1260
1263
  ...normalizedInput,
1261
1264
  text: enhancedText
1262
1265
  };
1266
+ } else if (params.derivedOptions) {
1267
+ const { buildLLMDerivedFeaturesPrompt: buildLLMDerivedFeaturesPrompt2 } = await import("./schema-prompt-formatter-AIORLWUF.js");
1268
+ const derivedPrompt = buildLLMDerivedFeaturesPrompt2(params.derivedOptions);
1269
+ if (derivedPrompt) {
1270
+ enhancedInput = {
1271
+ ...normalizedInput,
1272
+ text: (normalizedInput.text || "") + "\n\n" + derivedPrompt
1273
+ };
1274
+ }
1263
1275
  }
1264
1276
  const contents = await this.buildContents(enhancedInput);
1265
1277
  const requestBody = {
@@ -1328,7 +1340,8 @@ var GoogleProvider = class {
1328
1340
  const reasoning = message?.reasoning;
1329
1341
  const reasoning_details = message?.reasoning_details;
1330
1342
  content = content.replace(/^```json\s*\n?/, "").replace(/\n?```\s*$/, "").trim();
1331
- const parsed = safeJsonParse3(content);
1343
+ const rawParsed = safeJsonParse3(content);
1344
+ const { json: parsed, metadata } = extractMetadata ? extractMetadataFromResponse(rawParsed) : { json: rawParsed, metadata: void 0 };
1332
1345
  const baseProvider = extractProviderFromModel3(this.config.model, "google");
1333
1346
  return {
1334
1347
  json: parsed,
@@ -1344,7 +1357,8 @@ var GoogleProvider = class {
1344
1357
  model: this.config.model
1345
1358
  },
1346
1359
  reasoning,
1347
- reasoning_details
1360
+ reasoning_details,
1361
+ metadata
1348
1362
  };
1349
1363
  } else {
1350
1364
  const candidate = data.candidates?.[0];
@@ -1354,7 +1368,8 @@ var GoogleProvider = class {
1354
1368
  costUSD = this.calculateCost(data.usageMetadata);
1355
1369
  const thinkingPart = candidate?.content?.parts?.find((part) => part.thought === true);
1356
1370
  const reasoning = thinkingPart?.text;
1357
- const parsed = safeJsonParse3(content);
1371
+ const rawParsed = safeJsonParse3(content);
1372
+ const { json: parsed, metadata } = extractMetadata ? extractMetadataFromResponse(rawParsed) : { json: rawParsed, metadata: void 0 };
1358
1373
  const baseProvider = extractProviderFromModel3(this.config.model, "google");
1359
1374
  return {
1360
1375
  json: parsed,
@@ -1376,7 +1391,8 @@ var GoogleProvider = class {
1376
1391
  signature: null,
1377
1392
  id: "thinking-1",
1378
1393
  format: "google-gemini-v1"
1379
- }] : void 0
1394
+ }] : void 0,
1395
+ metadata
1380
1396
  };
1381
1397
  }
1382
1398
  }
@@ -1584,11 +1600,16 @@ var XAIProvider = class {
1584
1600
  if (mode === "strict" && !params.schema) {
1585
1601
  throw new Error('schema is required when mode is "strict"');
1586
1602
  }
1603
+ const extractMetadata = shouldExtractMetadata(params.derivedOptions);
1587
1604
  const shouldEmbedSchema = params.embedSchemaInPrompt !== false && params.schema;
1588
1605
  let enhancedInput = params.input;
1589
1606
  if (shouldEmbedSchema) {
1590
1607
  const jsonSchema = this.translator.convertZodIfNeeded(params.schema);
1591
- const enhancedText = combineSchemaAndUserPrompt(
1608
+ const enhancedText = params.derivedOptions ? combineSchemaUserAndDerivedPrompts(
1609
+ jsonSchema,
1610
+ params.input.text || "",
1611
+ params.derivedOptions
1612
+ ) : combineSchemaAndUserPrompt(
1592
1613
  jsonSchema,
1593
1614
  params.input.text || ""
1594
1615
  );
@@ -1596,6 +1617,15 @@ var XAIProvider = class {
1596
1617
  ...params.input,
1597
1618
  text: enhancedText
1598
1619
  };
1620
+ } else if (params.derivedOptions) {
1621
+ const { buildLLMDerivedFeaturesPrompt: buildLLMDerivedFeaturesPrompt2 } = await import("./schema-prompt-formatter-AIORLWUF.js");
1622
+ const derivedPrompt = buildLLMDerivedFeaturesPrompt2(params.derivedOptions);
1623
+ if (derivedPrompt) {
1624
+ enhancedInput = {
1625
+ ...params.input,
1626
+ text: (params.input.text || "") + "\n\n" + derivedPrompt
1627
+ };
1628
+ }
1599
1629
  }
1600
1630
  const messages = await this.buildMessages(enhancedInput);
1601
1631
  const requestBody = {
@@ -1676,7 +1706,8 @@ var XAIProvider = class {
1676
1706
  const latencyMs = Date.now() - startTime;
1677
1707
  const message = data.choices?.[0]?.message;
1678
1708
  const content = message?.content ?? "{}";
1679
- const parsed = safeJsonParse4(content);
1709
+ const rawParsed = safeJsonParse4(content);
1710
+ const { json: parsed, metadata } = extractMetadata ? extractMetadataFromResponse(rawParsed) : { json: rawParsed, metadata: void 0 };
1680
1711
  const reasoning = message?.reasoning;
1681
1712
  const reasoning_details = message?.reasoning_details;
1682
1713
  let costUSD;
@@ -1700,7 +1731,8 @@ var XAIProvider = class {
1700
1731
  model: this.config.model
1701
1732
  },
1702
1733
  reasoning,
1703
- reasoning_details
1734
+ reasoning_details,
1735
+ metadata
1704
1736
  };
1705
1737
  }
1706
1738
  buildReasoningConfig(reasoning) {
@@ -2032,6 +2064,115 @@ function adaptToCoreLLMProvider(provider) {
2032
2064
  };
2033
2065
  }
2034
2066
 
2067
+ // src/gemini-bbox-schema.ts
2068
+ var BLOCK_TYPES = [
2069
+ "title",
2070
+ // Main document title or section headers
2071
+ "paragraph",
2072
+ // Body text paragraphs
2073
+ "table",
2074
+ // Tabular data
2075
+ "list",
2076
+ // Bulleted or numbered lists
2077
+ "header",
2078
+ // Page headers (repeated at top of pages)
2079
+ "footer",
2080
+ // Page footers (repeated at bottom of pages)
2081
+ "caption",
2082
+ // Image or figure captions
2083
+ "code",
2084
+ // Code blocks or preformatted text
2085
+ "image",
2086
+ // Image/figure placeholder
2087
+ "form",
2088
+ // Form fields
2089
+ "signature",
2090
+ // Signatures
2091
+ "handwriting"
2092
+ // Handwritten text
2093
+ ];
2094
+ var geminiBoundingBoxSchema = {
2095
+ type: "array",
2096
+ items: {
2097
+ type: "object",
2098
+ properties: {
2099
+ box_2d: {
2100
+ type: "array",
2101
+ items: { type: "number" },
2102
+ minItems: 4,
2103
+ maxItems: 4,
2104
+ description: "Bounding box coordinates [y_min, x_min, y_max, x_max] normalized 0-1000"
2105
+ },
2106
+ text: {
2107
+ type: "string",
2108
+ description: "Text content within the bounding box"
2109
+ },
2110
+ type: {
2111
+ type: "string",
2112
+ enum: [...BLOCK_TYPES],
2113
+ description: "Block type classification"
2114
+ },
2115
+ confidence: {
2116
+ type: "string",
2117
+ enum: ["high", "medium", "low"],
2118
+ nullable: true,
2119
+ description: "Confidence level of extraction"
2120
+ },
2121
+ page: {
2122
+ type: "integer",
2123
+ nullable: true,
2124
+ description: "Page number (0-indexed)"
2125
+ }
2126
+ },
2127
+ required: ["box_2d", "text", "type"],
2128
+ additionalProperties: false
2129
+ }
2130
+ };
2131
+ var GEMINI_BBOX_EXTRACTION_PROMPT = `Analyze this document and extract all text with precise bounding box locations.
2132
+
2133
+ For each text block, provide:
2134
+ - box_2d: Bounding box as [y_min, x_min, y_max, x_max] normalized to 0-1000
2135
+ - text: The exact text content
2136
+ - type: Block classification (title, paragraph, table, list, header, footer, caption, code, image, form, signature, handwriting)
2137
+ - confidence: Your confidence level (high, medium, low)
2138
+ - page: Page number (0-indexed) for multi-page documents
2139
+
2140
+ IMPORTANT coordinate format:
2141
+ - Use [y_min, x_min, y_max, x_max] order (Y coordinate first, then X)
2142
+ - Normalize all values to 0-1000 range (top-left is [0, 0], bottom-right is [1000, 1000])
2143
+
2144
+ Return ONLY a valid JSON array, no other text.`;
2145
+ function normalizeGeminiBBox(geminiBBox) {
2146
+ const [yMin, xMin, yMax, xMax] = geminiBBox;
2147
+ return {
2148
+ x: xMin / 1e3,
2149
+ y: yMin / 1e3,
2150
+ width: (xMax - xMin) / 1e3,
2151
+ height: (yMax - yMin) / 1e3
2152
+ };
2153
+ }
2154
+ function toGeminiBBox(bbox) {
2155
+ return [
2156
+ Math.round(bbox.y * 1e3),
2157
+ // y_min
2158
+ Math.round(bbox.x * 1e3),
2159
+ // x_min
2160
+ Math.round((bbox.y + bbox.height) * 1e3),
2161
+ // y_max
2162
+ Math.round((bbox.x + bbox.width) * 1e3)
2163
+ // x_max
2164
+ ];
2165
+ }
2166
+ function convertGeminiBlocksToDocumentBlocks(geminiBlocks) {
2167
+ return geminiBlocks.map((block) => ({
2168
+ text: block.text,
2169
+ bbox: normalizeGeminiBBox(block.box_2d),
2170
+ type: block.type,
2171
+ confidence: block.confidence === "high" ? 0.9 : block.confidence === "medium" ? 0.7 : block.confidence === "low" ? 0.5 : void 0,
2172
+ page: block.page
2173
+ }));
2174
+ }
2175
+
2035
2176
  // src/metadata.ts
2036
2177
  var SUPPORTED_IMAGE_TYPES = {
2037
2178
  COMMON: ["image/png", "image/jpeg", "image/webp", "image/gif"],
@@ -2703,7 +2844,9 @@ function buildLLMProvider(config) {
2703
2844
  }
2704
2845
  export {
2705
2846
  AnthropicProvider,
2847
+ BLOCK_TYPES,
2706
2848
  FallbackManager,
2849
+ GEMINI_BBOX_EXTRACTION_PROMPT,
2707
2850
  GoogleProvider,
2708
2851
  OpenAIProvider,
2709
2852
  PROVIDER_METADATA,
@@ -2711,20 +2854,33 @@ export {
2711
2854
  SchemaTranslator,
2712
2855
  XAIProvider,
2713
2856
  adaptToCoreLLMProvider,
2857
+ buildBlockClassificationPrompt,
2858
+ buildConfidencePrompt,
2859
+ buildLLMDerivedFeaturesPrompt,
2714
2860
  buildLLMProvider,
2861
+ buildLanguageHintsPrompt,
2862
+ buildOutputFormatPrompt,
2715
2863
  buildSchemaPromptSection,
2864
+ buildSourcesPrompt,
2716
2865
  combineSchemaAndUserPrompt,
2866
+ combineSchemaUserAndDerivedPrompts,
2717
2867
  compareNativeVsOpenRouter,
2868
+ convertGeminiBlocksToDocumentBlocks,
2718
2869
  createProviderFromRegistry,
2719
2870
  createVLMProvider,
2720
2871
  estimateCost,
2872
+ extractMetadataFromResponse,
2721
2873
  formatSchemaForPrompt,
2874
+ geminiBoundingBoxSchema,
2722
2875
  getCheapestProvider,
2723
2876
  getProvidersForNode,
2724
2877
  isImageTypeSupported,
2725
2878
  isProviderCompatibleWithNode,
2879
+ normalizeGeminiBBox,
2726
2880
  providerRegistry,
2727
2881
  registerProvider,
2728
- supportsPDFsInline
2882
+ shouldExtractMetadata,
2883
+ supportsPDFsInline,
2884
+ toGeminiBBox
2729
2885
  };
2730
2886
  //# sourceMappingURL=index.js.map