@doclo/providers-llm 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,3 +1,16 @@
1
+ import {
2
+ buildBlockClassificationPrompt,
3
+ buildConfidencePrompt,
4
+ buildLLMDerivedFeaturesPrompt,
5
+ buildLanguageHintsPrompt,
6
+ buildOutputFormatPrompt,
7
+ buildSchemaPromptSection,
8
+ buildSourcesPrompt,
9
+ combineSchemaAndUserPrompt,
10
+ combineSchemaUserAndDerivedPrompts,
11
+ formatSchemaForPrompt
12
+ } from "./chunk-7YPJIWRM.js";
13
+
1
14
  // src/schema-translator.ts
2
15
  import { zodToJsonSchema } from "@alcyone-labs/zod-to-json-schema";
3
16
  var SchemaTranslator = class {
@@ -165,119 +178,72 @@ var SchemaTranslator = class {
165
178
  }
166
179
  };
167
180
 
168
- // src/schema-prompt-formatter.ts
169
- function formatSchemaForPrompt(schema, indent = 0) {
170
- if (!schema || typeof schema !== "object") {
171
- return "";
172
- }
173
- const indentStr = " ".repeat(indent);
174
- let result = "";
175
- if (schema.type === "object" && schema.properties) {
176
- const properties = schema.properties;
177
- const required = schema.required || [];
178
- for (const [fieldName, fieldSchema] of Object.entries(properties)) {
179
- const isRequired = required.includes(fieldName);
180
- const requiredMarker = isRequired ? " (REQUIRED)" : " (optional)";
181
- result += `${indentStr}- \`${fieldName}\`${requiredMarker}`;
182
- const type = getTypeDescription(fieldSchema);
183
- if (type) {
184
- result += `: ${type}`;
185
- }
186
- if (fieldSchema.description) {
187
- result += `
188
- ${indentStr} ${fieldSchema.description}`;
189
- }
190
- if (fieldSchema.enum) {
191
- result += `
192
- ${indentStr} Allowed values: ${fieldSchema.enum.map((v) => JSON.stringify(v)).join(", ")}`;
193
- }
194
- result += "\n";
195
- if (fieldSchema.type === "object" && fieldSchema.properties) {
196
- result += formatSchemaForPrompt(fieldSchema, indent + 1);
197
- }
198
- if (fieldSchema.type === "array" && fieldSchema.items) {
199
- result += `${indentStr} Array items:
200
- `;
201
- const itemSchema = Array.isArray(fieldSchema.items) ? fieldSchema.items[0] : fieldSchema.items;
202
- if (itemSchema && itemSchema.type === "object" && itemSchema.properties) {
203
- result += formatSchemaForPrompt(itemSchema, indent + 2);
204
- } else if (itemSchema) {
205
- const itemType = getTypeDescription(itemSchema);
206
- result += `${indentStr} ${itemType}
207
- `;
208
- }
209
- }
181
+ // src/metadata-extractor.ts
182
+ var METADATA_FIELDS = [
183
+ "_confidence",
184
+ "_sources",
185
+ "_blockTypes",
186
+ "_headers",
187
+ "_footers"
188
+ ];
189
+ function extractMetadataFromResponse(json) {
190
+ if (!json || typeof json !== "object") {
191
+ return { json };
192
+ }
193
+ const rawJson = json;
194
+ const metadata = {};
195
+ let hasMetadata = false;
196
+ if ("_confidence" in rawJson && rawJson._confidence) {
197
+ const confidence = rawJson._confidence;
198
+ if (typeof confidence === "object" && !Array.isArray(confidence)) {
199
+ metadata.confidence = confidence;
200
+ hasMetadata = true;
210
201
  }
211
202
  }
212
- return result;
213
- }
214
- function getTypeDescription(schema) {
215
- if (!schema) return "any";
216
- if (schema.type) {
217
- const typeStr = Array.isArray(schema.type) ? schema.type.join(" | ") : schema.type;
218
- if (typeStr === "array" || Array.isArray(schema.type) && schema.type.includes("array")) {
219
- if (schema.items && !Array.isArray(schema.items) && schema.items.type) {
220
- const itemType = Array.isArray(schema.items.type) ? schema.items.type.join(" | ") : schema.items.type;
221
- return `array of ${itemType}`;
222
- }
223
- return "array";
224
- }
225
- if ((typeStr === "string" || Array.isArray(schema.type) && schema.type.includes("string")) && schema.format) {
226
- const formatHints = {
227
- "date": "YYYY-MM-DD",
228
- "time": "HH:MM or HH:MM:SS",
229
- "date-time": "YYYY-MM-DDTHH:MM:SS (ISO 8601)"
230
- };
231
- const hint = formatHints[schema.format];
232
- if (hint) {
233
- return `string (format: ${schema.format}, use ${hint})`;
234
- }
235
- return `string (format: ${schema.format})`;
203
+ if ("_sources" in rawJson && Array.isArray(rawJson._sources)) {
204
+ metadata.sources = rawJson._sources.map((source) => ({
205
+ field: source.field || source.fieldPath || "",
206
+ text: source.text || source.sourceText || "",
207
+ bbox: source.bbox || source.box_2d,
208
+ page: source.page
209
+ }));
210
+ hasMetadata = true;
211
+ }
212
+ if ("_blockTypes" in rawJson && rawJson._blockTypes) {
213
+ const blockTypes = rawJson._blockTypes;
214
+ if (typeof blockTypes === "object" && !Array.isArray(blockTypes)) {
215
+ metadata.blockTypes = blockTypes;
216
+ hasMetadata = true;
236
217
  }
237
- return typeStr;
238
218
  }
239
- if (schema.anyOf) {
240
- return schema.anyOf.map((s) => getTypeDescription(s)).join(" OR ");
219
+ if ("_headers" in rawJson && Array.isArray(rawJson._headers)) {
220
+ metadata.headers = rawJson._headers.map((header) => ({
221
+ text: header.text || "",
222
+ pages: Array.isArray(header.pages) ? header.pages : []
223
+ }));
224
+ hasMetadata = true;
241
225
  }
242
- if (schema.oneOf) {
243
- return schema.oneOf.map((s) => getTypeDescription(s)).join(" OR ");
226
+ if ("_footers" in rawJson && Array.isArray(rawJson._footers)) {
227
+ metadata.footers = rawJson._footers.map((footer) => ({
228
+ text: footer.text || "",
229
+ pages: Array.isArray(footer.pages) ? footer.pages : []
230
+ }));
231
+ hasMetadata = true;
244
232
  }
245
- return "any";
246
- }
247
- function buildSchemaPromptSection(schema) {
248
- const schemaFields = formatSchemaForPrompt(schema);
249
- return `
250
- ==================================================
251
- CRITICAL: OUTPUT STRUCTURE REQUIREMENTS
252
- ==================================================
253
-
254
- YOU MUST RETURN JSON MATCHING THIS EXACT STRUCTURE:
255
-
256
- ${schemaFields}
257
-
258
- CRITICAL FIELD NAME REQUIREMENTS:
259
- \u2713 Use EXACTLY the field names shown above (character-for-character match)
260
- \u2713 Preserve the exact casing (e.g., "fullName", not "full_name" or "FullName")
261
- \u2713 Do NOT abbreviate field names (e.g., "dob" instead of "dateOfBirth")
262
- \u2713 Do NOT invent alternative names (e.g., "directorName" instead of "fullName")
263
- \u2713 Do NOT use snake_case if the schema uses camelCase
264
- \u2713 Do NOT flatten nested structures or rename nested fields
265
- \u2713 The schema above is the SINGLE SOURCE OF TRUTH for field naming
266
-
267
- MISSING DATA:
268
- - If a required field has no data in the document, use null
269
- - If an optional field has no data, you may omit it or use null
270
- - Do NOT invent data that isn't in the document
271
-
272
- ==================================================
273
- `.trim();
274
- }
275
- function combineSchemaAndUserPrompt(schema, userPrompt) {
276
- const schemaSection = buildSchemaPromptSection(schema);
277
- if (!userPrompt || userPrompt.trim() === "") {
278
- return schemaSection + "\n\nTASK: Extract structured data from the provided document.";
233
+ const cleanJson = {};
234
+ for (const [key, value] of Object.entries(rawJson)) {
235
+ if (!METADATA_FIELDS.includes(key)) {
236
+ cleanJson[key] = value;
237
+ }
279
238
  }
280
- return schemaSection + "\n\n" + userPrompt;
239
+ return {
240
+ json: cleanJson,
241
+ metadata: hasMetadata ? metadata : void 0
242
+ };
243
+ }
244
+ function shouldExtractMetadata(derivedOptions) {
245
+ if (!derivedOptions) return false;
246
+ return !!(derivedOptions.includeConfidence || derivedOptions.includeSources || derivedOptions.includeBlockTypes || derivedOptions.extractHeaders || derivedOptions.extractFooters);
281
247
  }
282
248
 
283
249
  // src/provider-registry.ts
@@ -377,22 +343,41 @@ var OpenAIProvider = class {
377
343
  }
378
344
  async completeJson(params) {
379
345
  const startTime = Date.now();
346
+ const rawInput = params.input ?? params.prompt;
347
+ if (!rawInput) {
348
+ throw new Error("Either input or prompt must be provided");
349
+ }
350
+ const normalizedInput = typeof rawInput === "string" ? { text: rawInput } : rawInput;
380
351
  const mode = params.mode || (params.schema ? "strict" : "relaxed");
381
352
  if (mode === "strict" && !params.schema) {
382
353
  throw new Error('schema is required when mode is "strict"');
383
354
  }
355
+ const extractMetadata = shouldExtractMetadata(params.derivedOptions);
384
356
  const shouldEmbedSchema = params.embedSchemaInPrompt !== false && params.schema;
385
- let enhancedInput = params.input;
357
+ let enhancedInput = normalizedInput;
386
358
  if (shouldEmbedSchema) {
387
359
  const jsonSchema = this.translator.convertZodIfNeeded(params.schema);
388
- const enhancedText = combineSchemaAndUserPrompt(
360
+ const enhancedText = params.derivedOptions ? combineSchemaUserAndDerivedPrompts(
389
361
  jsonSchema,
390
- params.input.text || ""
362
+ normalizedInput.text || "",
363
+ params.derivedOptions
364
+ ) : combineSchemaAndUserPrompt(
365
+ jsonSchema,
366
+ normalizedInput.text || ""
391
367
  );
392
368
  enhancedInput = {
393
- ...params.input,
369
+ ...normalizedInput,
394
370
  text: enhancedText
395
371
  };
372
+ } else if (params.derivedOptions) {
373
+ const { buildLLMDerivedFeaturesPrompt: buildLLMDerivedFeaturesPrompt2 } = await import("./schema-prompt-formatter-AIORLWUF.js");
374
+ const derivedPrompt = buildLLMDerivedFeaturesPrompt2(params.derivedOptions);
375
+ if (derivedPrompt) {
376
+ enhancedInput = {
377
+ ...normalizedInput,
378
+ text: (normalizedInput.text || "") + "\n\n" + derivedPrompt
379
+ };
380
+ }
396
381
  }
397
382
  const messages = this.buildMessages(enhancedInput);
398
383
  const requestBody = {
@@ -473,7 +458,8 @@ var OpenAIProvider = class {
473
458
  const data = await response.json();
474
459
  const latencyMs = Date.now() - startTime;
475
460
  const content = data.choices?.[0]?.message?.content ?? "{}";
476
- const parsed = safeJsonParse(content);
461
+ const rawParsed = safeJsonParse(content);
462
+ const { json: parsed, metadata } = extractMetadata ? extractMetadataFromResponse(rawParsed) : { json: rawParsed, metadata: void 0 };
477
463
  const message = data.choices?.[0]?.message;
478
464
  const reasoning = message?.reasoning;
479
465
  const reasoning_details = message?.reasoning_details;
@@ -498,7 +484,8 @@ var OpenAIProvider = class {
498
484
  model: this.config.model
499
485
  },
500
486
  reasoning,
501
- reasoning_details
487
+ reasoning_details,
488
+ metadata
502
489
  };
503
490
  }
504
491
  buildReasoningConfig(reasoning) {
@@ -614,22 +601,41 @@ var AnthropicProvider = class {
614
601
  }
615
602
  async completeJson(params) {
616
603
  const startTime = Date.now();
604
+ const rawInput = params.input ?? params.prompt;
605
+ if (!rawInput) {
606
+ throw new Error("Either input or prompt must be provided");
607
+ }
608
+ const normalizedInput = typeof rawInput === "string" ? { text: rawInput } : rawInput;
617
609
  const mode = params.mode || (params.schema ? "strict" : "relaxed");
618
610
  if (mode === "strict" && !params.schema) {
619
611
  throw new Error('schema is required when mode is "strict"');
620
612
  }
613
+ const extractMetadata = shouldExtractMetadata(params.derivedOptions);
621
614
  const shouldEmbedSchema = params.embedSchemaInPrompt !== false && params.schema;
622
- let enhancedInput = params.input;
615
+ let enhancedInput = normalizedInput;
623
616
  if (shouldEmbedSchema) {
624
617
  const jsonSchema = this.translator.convertZodIfNeeded(params.schema);
625
- const enhancedText = combineSchemaAndUserPrompt(
618
+ const enhancedText = params.derivedOptions ? combineSchemaUserAndDerivedPrompts(
626
619
  jsonSchema,
627
- params.input.text || ""
620
+ normalizedInput.text || "",
621
+ params.derivedOptions
622
+ ) : combineSchemaAndUserPrompt(
623
+ jsonSchema,
624
+ normalizedInput.text || ""
628
625
  );
629
626
  enhancedInput = {
630
- ...params.input,
627
+ ...normalizedInput,
631
628
  text: enhancedText
632
629
  };
630
+ } else if (params.derivedOptions) {
631
+ const { buildLLMDerivedFeaturesPrompt: buildLLMDerivedFeaturesPrompt2 } = await import("./schema-prompt-formatter-AIORLWUF.js");
632
+ const derivedPrompt = buildLLMDerivedFeaturesPrompt2(params.derivedOptions);
633
+ if (derivedPrompt) {
634
+ enhancedInput = {
635
+ ...normalizedInput,
636
+ text: (normalizedInput.text || "") + "\n\n" + derivedPrompt
637
+ };
638
+ }
633
639
  }
634
640
  const messages = await this.buildMessages(enhancedInput);
635
641
  const useNewStructuredOutputs = this.supportsNewStructuredOutputs();
@@ -703,10 +709,7 @@ var AnthropicProvider = class {
703
709
  }
704
710
  const data = await response.json();
705
711
  const message = data.choices?.[0]?.message;
706
- let content = message?.content ?? (useNewStructuredOutputs2 ? "{}" : "}");
707
- if (!useNewStructuredOutputs2) {
708
- content = "{" + content;
709
- }
712
+ let content = message?.content ?? "{}";
710
713
  const reasoning = message?.reasoning;
711
714
  const reasoning_details = message?.reasoning_details;
712
715
  content = content.replace(/^```json\s*\n?/, "").replace(/\n?```\s*$/, "").trim();
@@ -747,9 +750,10 @@ var AnthropicProvider = class {
747
750
  }
748
751
  const latencyMs = Date.now() - startTime;
749
752
  const baseProvider = extractProviderFromModel2(this.config.model, "anthropic");
753
+ const { json: cleanJson, metadata } = extractMetadata ? extractMetadataFromResponse(parsed) : { json: parsed, metadata: void 0 };
750
754
  return {
751
- json: parsed,
752
- rawText: JSON.stringify(parsed),
755
+ json: cleanJson,
756
+ rawText: JSON.stringify(cleanJson),
753
757
  metrics: {
754
758
  costUSD,
755
759
  inputTokens,
@@ -763,7 +767,8 @@ var AnthropicProvider = class {
763
767
  cacheReadInputTokens
764
768
  },
765
769
  reasoning,
766
- reasoning_details
770
+ reasoning_details,
771
+ metadata
767
772
  };
768
773
  } else {
769
774
  const endpoint = this.config.baseUrl || "https://api.anthropic.com/v1";
@@ -828,9 +833,10 @@ var AnthropicProvider = class {
828
833
  const reasoning = thinkingBlock?.thinking;
829
834
  const latencyMs = Date.now() - startTime;
830
835
  const baseProvider = extractProviderFromModel2(this.config.model, "anthropic");
836
+ const { json: cleanJson, metadata } = extractMetadata ? extractMetadataFromResponse(parsed) : { json: parsed, metadata: void 0 };
831
837
  return {
832
- json: parsed,
833
- rawText: JSON.stringify(parsed),
838
+ json: cleanJson,
839
+ rawText: JSON.stringify(cleanJson),
834
840
  metrics: {
835
841
  costUSD,
836
842
  inputTokens,
@@ -848,7 +854,8 @@ var AnthropicProvider = class {
848
854
  signature: null,
849
855
  id: "thinking-1",
850
856
  format: "anthropic-claude-v1"
851
- }] : void 0
857
+ }] : void 0,
858
+ metadata
852
859
  };
853
860
  }
854
861
  }
@@ -885,19 +892,13 @@ var AnthropicProvider = class {
885
892
  requestBody.response_format = {
886
893
  type: "json_object"
887
894
  };
888
- } else {
895
+ } else if (useNewStructuredOutputs) {
889
896
  const openRouterSchema = this.translator.toClaudeOpenRouterSchema(schema);
890
897
  const fixedSchema = this.fixSchemaForStrictMode(openRouterSchema);
891
898
  if (process.env.DEBUG_PROVIDERS) {
892
- console.log("[AnthropicProvider] Original schema:", JSON.stringify(openRouterSchema, null, 2));
899
+ console.log("[AnthropicProvider] Using json_schema (native support)");
893
900
  console.log("[AnthropicProvider] Fixed schema:", JSON.stringify(fixedSchema, null, 2));
894
901
  }
895
- if (!useNewStructuredOutputs) {
896
- messageArray.push({
897
- role: "assistant",
898
- content: "{"
899
- });
900
- }
901
902
  requestBody.response_format = {
902
903
  type: "json_schema",
903
904
  json_schema: {
@@ -906,6 +907,13 @@ var AnthropicProvider = class {
906
907
  schema: fixedSchema
907
908
  }
908
909
  };
910
+ } else {
911
+ if (process.env.DEBUG_PROVIDERS) {
912
+ console.log("[AnthropicProvider] Using json_object (legacy mode, schema in prompt)");
913
+ }
914
+ requestBody.response_format = {
915
+ type: "json_object"
916
+ };
909
917
  }
910
918
  if (reasoning) {
911
919
  requestBody.reasoning = this.buildReasoningConfig(reasoning, max_tokens);
@@ -1248,11 +1256,16 @@ var GoogleProvider = class {
1248
1256
  if (mode === "strict" && !params.schema) {
1249
1257
  throw new Error('schema is required when mode is "strict"');
1250
1258
  }
1259
+ const extractMetadata = shouldExtractMetadata(params.derivedOptions);
1251
1260
  const shouldEmbedSchema = params.embedSchemaInPrompt !== false && params.schema;
1252
1261
  let enhancedInput = normalizedInput;
1253
1262
  if (shouldEmbedSchema) {
1254
1263
  const jsonSchema = this.translator.convertZodIfNeeded(params.schema);
1255
- const enhancedText = combineSchemaAndUserPrompt(
1264
+ const enhancedText = params.derivedOptions ? combineSchemaUserAndDerivedPrompts(
1265
+ jsonSchema,
1266
+ normalizedInput.text || "",
1267
+ params.derivedOptions
1268
+ ) : combineSchemaAndUserPrompt(
1256
1269
  jsonSchema,
1257
1270
  normalizedInput.text || ""
1258
1271
  );
@@ -1260,6 +1273,15 @@ var GoogleProvider = class {
1260
1273
  ...normalizedInput,
1261
1274
  text: enhancedText
1262
1275
  };
1276
+ } else if (params.derivedOptions) {
1277
+ const { buildLLMDerivedFeaturesPrompt: buildLLMDerivedFeaturesPrompt2 } = await import("./schema-prompt-formatter-AIORLWUF.js");
1278
+ const derivedPrompt = buildLLMDerivedFeaturesPrompt2(params.derivedOptions);
1279
+ if (derivedPrompt) {
1280
+ enhancedInput = {
1281
+ ...normalizedInput,
1282
+ text: (normalizedInput.text || "") + "\n\n" + derivedPrompt
1283
+ };
1284
+ }
1263
1285
  }
1264
1286
  const contents = await this.buildContents(enhancedInput);
1265
1287
  const requestBody = {
@@ -1328,7 +1350,8 @@ var GoogleProvider = class {
1328
1350
  const reasoning = message?.reasoning;
1329
1351
  const reasoning_details = message?.reasoning_details;
1330
1352
  content = content.replace(/^```json\s*\n?/, "").replace(/\n?```\s*$/, "").trim();
1331
- const parsed = safeJsonParse3(content);
1353
+ const rawParsed = safeJsonParse3(content);
1354
+ const { json: parsed, metadata } = extractMetadata ? extractMetadataFromResponse(rawParsed) : { json: rawParsed, metadata: void 0 };
1332
1355
  const baseProvider = extractProviderFromModel3(this.config.model, "google");
1333
1356
  return {
1334
1357
  json: parsed,
@@ -1344,7 +1367,8 @@ var GoogleProvider = class {
1344
1367
  model: this.config.model
1345
1368
  },
1346
1369
  reasoning,
1347
- reasoning_details
1370
+ reasoning_details,
1371
+ metadata
1348
1372
  };
1349
1373
  } else {
1350
1374
  const candidate = data.candidates?.[0];
@@ -1354,7 +1378,8 @@ var GoogleProvider = class {
1354
1378
  costUSD = this.calculateCost(data.usageMetadata);
1355
1379
  const thinkingPart = candidate?.content?.parts?.find((part) => part.thought === true);
1356
1380
  const reasoning = thinkingPart?.text;
1357
- const parsed = safeJsonParse3(content);
1381
+ const rawParsed = safeJsonParse3(content);
1382
+ const { json: parsed, metadata } = extractMetadata ? extractMetadataFromResponse(rawParsed) : { json: rawParsed, metadata: void 0 };
1358
1383
  const baseProvider = extractProviderFromModel3(this.config.model, "google");
1359
1384
  return {
1360
1385
  json: parsed,
@@ -1376,7 +1401,8 @@ var GoogleProvider = class {
1376
1401
  signature: null,
1377
1402
  id: "thinking-1",
1378
1403
  format: "google-gemini-v1"
1379
- }] : void 0
1404
+ }] : void 0,
1405
+ metadata
1380
1406
  };
1381
1407
  }
1382
1408
  }
@@ -1580,22 +1606,41 @@ var XAIProvider = class {
1580
1606
  }
1581
1607
  async completeJson(params) {
1582
1608
  const startTime = Date.now();
1609
+ const rawInput = params.input ?? params.prompt;
1610
+ if (!rawInput) {
1611
+ throw new Error("Either input or prompt must be provided");
1612
+ }
1613
+ const normalizedInput = typeof rawInput === "string" ? { text: rawInput } : rawInput;
1583
1614
  const mode = params.mode || (params.schema ? "strict" : "relaxed");
1584
1615
  if (mode === "strict" && !params.schema) {
1585
1616
  throw new Error('schema is required when mode is "strict"');
1586
1617
  }
1618
+ const extractMetadata = shouldExtractMetadata(params.derivedOptions);
1587
1619
  const shouldEmbedSchema = params.embedSchemaInPrompt !== false && params.schema;
1588
- let enhancedInput = params.input;
1620
+ let enhancedInput = normalizedInput;
1589
1621
  if (shouldEmbedSchema) {
1590
1622
  const jsonSchema = this.translator.convertZodIfNeeded(params.schema);
1591
- const enhancedText = combineSchemaAndUserPrompt(
1623
+ const enhancedText = params.derivedOptions ? combineSchemaUserAndDerivedPrompts(
1592
1624
  jsonSchema,
1593
- params.input.text || ""
1625
+ normalizedInput.text || "",
1626
+ params.derivedOptions
1627
+ ) : combineSchemaAndUserPrompt(
1628
+ jsonSchema,
1629
+ normalizedInput.text || ""
1594
1630
  );
1595
1631
  enhancedInput = {
1596
- ...params.input,
1632
+ ...normalizedInput,
1597
1633
  text: enhancedText
1598
1634
  };
1635
+ } else if (params.derivedOptions) {
1636
+ const { buildLLMDerivedFeaturesPrompt: buildLLMDerivedFeaturesPrompt2 } = await import("./schema-prompt-formatter-AIORLWUF.js");
1637
+ const derivedPrompt = buildLLMDerivedFeaturesPrompt2(params.derivedOptions);
1638
+ if (derivedPrompt) {
1639
+ enhancedInput = {
1640
+ ...normalizedInput,
1641
+ text: (normalizedInput.text || "") + "\n\n" + derivedPrompt
1642
+ };
1643
+ }
1599
1644
  }
1600
1645
  const messages = await this.buildMessages(enhancedInput);
1601
1646
  const requestBody = {
@@ -1676,7 +1721,8 @@ var XAIProvider = class {
1676
1721
  const latencyMs = Date.now() - startTime;
1677
1722
  const message = data.choices?.[0]?.message;
1678
1723
  const content = message?.content ?? "{}";
1679
- const parsed = safeJsonParse4(content);
1724
+ const rawParsed = safeJsonParse4(content);
1725
+ const { json: parsed, metadata } = extractMetadata ? extractMetadataFromResponse(rawParsed) : { json: rawParsed, metadata: void 0 };
1680
1726
  const reasoning = message?.reasoning;
1681
1727
  const reasoning_details = message?.reasoning_details;
1682
1728
  let costUSD;
@@ -1700,7 +1746,8 @@ var XAIProvider = class {
1700
1746
  model: this.config.model
1701
1747
  },
1702
1748
  reasoning,
1703
- reasoning_details
1749
+ reasoning_details,
1750
+ metadata
1704
1751
  };
1705
1752
  }
1706
1753
  buildReasoningConfig(reasoning) {
@@ -2032,6 +2079,115 @@ function adaptToCoreLLMProvider(provider) {
2032
2079
  };
2033
2080
  }
2034
2081
 
2082
+ // src/gemini-bbox-schema.ts
2083
+ var BLOCK_TYPES = [
2084
+ "title",
2085
+ // Main document title or section headers
2086
+ "paragraph",
2087
+ // Body text paragraphs
2088
+ "table",
2089
+ // Tabular data
2090
+ "list",
2091
+ // Bulleted or numbered lists
2092
+ "header",
2093
+ // Page headers (repeated at top of pages)
2094
+ "footer",
2095
+ // Page footers (repeated at bottom of pages)
2096
+ "caption",
2097
+ // Image or figure captions
2098
+ "code",
2099
+ // Code blocks or preformatted text
2100
+ "image",
2101
+ // Image/figure placeholder
2102
+ "form",
2103
+ // Form fields
2104
+ "signature",
2105
+ // Signatures
2106
+ "handwriting"
2107
+ // Handwritten text
2108
+ ];
2109
+ var geminiBoundingBoxSchema = {
2110
+ type: "array",
2111
+ items: {
2112
+ type: "object",
2113
+ properties: {
2114
+ box_2d: {
2115
+ type: "array",
2116
+ items: { type: "number" },
2117
+ minItems: 4,
2118
+ maxItems: 4,
2119
+ description: "Bounding box coordinates [y_min, x_min, y_max, x_max] normalized 0-1000"
2120
+ },
2121
+ text: {
2122
+ type: "string",
2123
+ description: "Text content within the bounding box"
2124
+ },
2125
+ type: {
2126
+ type: "string",
2127
+ enum: [...BLOCK_TYPES],
2128
+ description: "Block type classification"
2129
+ },
2130
+ confidence: {
2131
+ type: "string",
2132
+ enum: ["high", "medium", "low"],
2133
+ nullable: true,
2134
+ description: "Confidence level of extraction"
2135
+ },
2136
+ page: {
2137
+ type: "integer",
2138
+ nullable: true,
2139
+ description: "Page number (0-indexed)"
2140
+ }
2141
+ },
2142
+ required: ["box_2d", "text", "type"],
2143
+ additionalProperties: false
2144
+ }
2145
+ };
2146
+ var GEMINI_BBOX_EXTRACTION_PROMPT = `Analyze this document and extract all text with precise bounding box locations.
2147
+
2148
+ For each text block, provide:
2149
+ - box_2d: Bounding box as [y_min, x_min, y_max, x_max] normalized to 0-1000
2150
+ - text: The exact text content
2151
+ - type: Block classification (title, paragraph, table, list, header, footer, caption, code, image, form, signature, handwriting)
2152
+ - confidence: Your confidence level (high, medium, low)
2153
+ - page: Page number (0-indexed) for multi-page documents
2154
+
2155
+ IMPORTANT coordinate format:
2156
+ - Use [y_min, x_min, y_max, x_max] order (Y coordinate first, then X)
2157
+ - Normalize all values to 0-1000 range (top-left is [0, 0], bottom-right is [1000, 1000])
2158
+
2159
+ Return ONLY a valid JSON array, no other text.`;
2160
+ function normalizeGeminiBBox(geminiBBox) {
2161
+ const [yMin, xMin, yMax, xMax] = geminiBBox;
2162
+ return {
2163
+ x: xMin / 1e3,
2164
+ y: yMin / 1e3,
2165
+ width: (xMax - xMin) / 1e3,
2166
+ height: (yMax - yMin) / 1e3
2167
+ };
2168
+ }
2169
+ function toGeminiBBox(bbox) {
2170
+ return [
2171
+ Math.round(bbox.y * 1e3),
2172
+ // y_min
2173
+ Math.round(bbox.x * 1e3),
2174
+ // x_min
2175
+ Math.round((bbox.y + bbox.height) * 1e3),
2176
+ // y_max
2177
+ Math.round((bbox.x + bbox.width) * 1e3)
2178
+ // x_max
2179
+ ];
2180
+ }
2181
+ function convertGeminiBlocksToDocumentBlocks(geminiBlocks) {
2182
+ return geminiBlocks.map((block) => ({
2183
+ text: block.text,
2184
+ bbox: normalizeGeminiBBox(block.box_2d),
2185
+ type: block.type,
2186
+ confidence: block.confidence === "high" ? 0.9 : block.confidence === "medium" ? 0.7 : block.confidence === "low" ? 0.5 : void 0,
2187
+ page: block.page
2188
+ }));
2189
+ }
2190
+
2035
2191
  // src/metadata.ts
2036
2192
  var SUPPORTED_IMAGE_TYPES = {
2037
2193
  COMMON: ["image/png", "image/jpeg", "image/webp", "image/gif"],
@@ -2703,7 +2859,9 @@ function buildLLMProvider(config) {
2703
2859
  }
2704
2860
  export {
2705
2861
  AnthropicProvider,
2862
+ BLOCK_TYPES,
2706
2863
  FallbackManager,
2864
+ GEMINI_BBOX_EXTRACTION_PROMPT,
2707
2865
  GoogleProvider,
2708
2866
  OpenAIProvider,
2709
2867
  PROVIDER_METADATA,
@@ -2711,20 +2869,33 @@ export {
2711
2869
  SchemaTranslator,
2712
2870
  XAIProvider,
2713
2871
  adaptToCoreLLMProvider,
2872
+ buildBlockClassificationPrompt,
2873
+ buildConfidencePrompt,
2874
+ buildLLMDerivedFeaturesPrompt,
2714
2875
  buildLLMProvider,
2876
+ buildLanguageHintsPrompt,
2877
+ buildOutputFormatPrompt,
2715
2878
  buildSchemaPromptSection,
2879
+ buildSourcesPrompt,
2716
2880
  combineSchemaAndUserPrompt,
2881
+ combineSchemaUserAndDerivedPrompts,
2717
2882
  compareNativeVsOpenRouter,
2883
+ convertGeminiBlocksToDocumentBlocks,
2718
2884
  createProviderFromRegistry,
2719
2885
  createVLMProvider,
2720
2886
  estimateCost,
2887
+ extractMetadataFromResponse,
2721
2888
  formatSchemaForPrompt,
2889
+ geminiBoundingBoxSchema,
2722
2890
  getCheapestProvider,
2723
2891
  getProvidersForNode,
2724
2892
  isImageTypeSupported,
2725
2893
  isProviderCompatibleWithNode,
2894
+ normalizeGeminiBBox,
2726
2895
  providerRegistry,
2727
2896
  registerProvider,
2728
- supportsPDFsInline
2897
+ shouldExtractMetadata,
2898
+ supportsPDFsInline,
2899
+ toGeminiBBox
2729
2900
  };
2730
2901
  //# sourceMappingURL=index.js.map