@doclo/providers-llm 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-7YPJIWRM.js +291 -0
- package/dist/chunk-7YPJIWRM.js.map +1 -0
- package/dist/index.d.ts +229 -1
- package/dist/index.js +293 -137
- package/dist/index.js.map +1 -1
- package/dist/schema-prompt-formatter-AIORLWUF.js +29 -0
- package/dist/schema-prompt-formatter-AIORLWUF.js.map +1 -0
- package/package.json +2 -2
package/dist/index.js
CHANGED
|
@@ -1,3 +1,16 @@
|
|
|
1
|
+
import {
|
|
2
|
+
buildBlockClassificationPrompt,
|
|
3
|
+
buildConfidencePrompt,
|
|
4
|
+
buildLLMDerivedFeaturesPrompt,
|
|
5
|
+
buildLanguageHintsPrompt,
|
|
6
|
+
buildOutputFormatPrompt,
|
|
7
|
+
buildSchemaPromptSection,
|
|
8
|
+
buildSourcesPrompt,
|
|
9
|
+
combineSchemaAndUserPrompt,
|
|
10
|
+
combineSchemaUserAndDerivedPrompts,
|
|
11
|
+
formatSchemaForPrompt
|
|
12
|
+
} from "./chunk-7YPJIWRM.js";
|
|
13
|
+
|
|
1
14
|
// src/schema-translator.ts
|
|
2
15
|
import { zodToJsonSchema } from "@alcyone-labs/zod-to-json-schema";
|
|
3
16
|
var SchemaTranslator = class {
|
|
@@ -165,119 +178,72 @@ var SchemaTranslator = class {
|
|
|
165
178
|
}
|
|
166
179
|
};
|
|
167
180
|
|
|
168
|
-
// src/
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
${indentStr} ${fieldSchema.description}`;
|
|
189
|
-
}
|
|
190
|
-
if (fieldSchema.enum) {
|
|
191
|
-
result += `
|
|
192
|
-
${indentStr} Allowed values: ${fieldSchema.enum.map((v) => JSON.stringify(v)).join(", ")}`;
|
|
193
|
-
}
|
|
194
|
-
result += "\n";
|
|
195
|
-
if (fieldSchema.type === "object" && fieldSchema.properties) {
|
|
196
|
-
result += formatSchemaForPrompt(fieldSchema, indent + 1);
|
|
197
|
-
}
|
|
198
|
-
if (fieldSchema.type === "array" && fieldSchema.items) {
|
|
199
|
-
result += `${indentStr} Array items:
|
|
200
|
-
`;
|
|
201
|
-
const itemSchema = Array.isArray(fieldSchema.items) ? fieldSchema.items[0] : fieldSchema.items;
|
|
202
|
-
if (itemSchema && itemSchema.type === "object" && itemSchema.properties) {
|
|
203
|
-
result += formatSchemaForPrompt(itemSchema, indent + 2);
|
|
204
|
-
} else if (itemSchema) {
|
|
205
|
-
const itemType = getTypeDescription(itemSchema);
|
|
206
|
-
result += `${indentStr} ${itemType}
|
|
207
|
-
`;
|
|
208
|
-
}
|
|
209
|
-
}
|
|
181
|
+
// src/metadata-extractor.ts
|
|
182
|
+
var METADATA_FIELDS = [
|
|
183
|
+
"_confidence",
|
|
184
|
+
"_sources",
|
|
185
|
+
"_blockTypes",
|
|
186
|
+
"_headers",
|
|
187
|
+
"_footers"
|
|
188
|
+
];
|
|
189
|
+
function extractMetadataFromResponse(json) {
|
|
190
|
+
if (!json || typeof json !== "object") {
|
|
191
|
+
return { json };
|
|
192
|
+
}
|
|
193
|
+
const rawJson = json;
|
|
194
|
+
const metadata = {};
|
|
195
|
+
let hasMetadata = false;
|
|
196
|
+
if ("_confidence" in rawJson && rawJson._confidence) {
|
|
197
|
+
const confidence = rawJson._confidence;
|
|
198
|
+
if (typeof confidence === "object" && !Array.isArray(confidence)) {
|
|
199
|
+
metadata.confidence = confidence;
|
|
200
|
+
hasMetadata = true;
|
|
210
201
|
}
|
|
211
202
|
}
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
const formatHints = {
|
|
227
|
-
"date": "YYYY-MM-DD",
|
|
228
|
-
"time": "HH:MM or HH:MM:SS",
|
|
229
|
-
"date-time": "YYYY-MM-DDTHH:MM:SS (ISO 8601)"
|
|
230
|
-
};
|
|
231
|
-
const hint = formatHints[schema.format];
|
|
232
|
-
if (hint) {
|
|
233
|
-
return `string (format: ${schema.format}, use ${hint})`;
|
|
234
|
-
}
|
|
235
|
-
return `string (format: ${schema.format})`;
|
|
203
|
+
if ("_sources" in rawJson && Array.isArray(rawJson._sources)) {
|
|
204
|
+
metadata.sources = rawJson._sources.map((source) => ({
|
|
205
|
+
field: source.field || source.fieldPath || "",
|
|
206
|
+
text: source.text || source.sourceText || "",
|
|
207
|
+
bbox: source.bbox || source.box_2d,
|
|
208
|
+
page: source.page
|
|
209
|
+
}));
|
|
210
|
+
hasMetadata = true;
|
|
211
|
+
}
|
|
212
|
+
if ("_blockTypes" in rawJson && rawJson._blockTypes) {
|
|
213
|
+
const blockTypes = rawJson._blockTypes;
|
|
214
|
+
if (typeof blockTypes === "object" && !Array.isArray(blockTypes)) {
|
|
215
|
+
metadata.blockTypes = blockTypes;
|
|
216
|
+
hasMetadata = true;
|
|
236
217
|
}
|
|
237
|
-
return typeStr;
|
|
238
218
|
}
|
|
239
|
-
if (
|
|
240
|
-
|
|
219
|
+
if ("_headers" in rawJson && Array.isArray(rawJson._headers)) {
|
|
220
|
+
metadata.headers = rawJson._headers.map((header) => ({
|
|
221
|
+
text: header.text || "",
|
|
222
|
+
pages: Array.isArray(header.pages) ? header.pages : []
|
|
223
|
+
}));
|
|
224
|
+
hasMetadata = true;
|
|
241
225
|
}
|
|
242
|
-
if (
|
|
243
|
-
|
|
226
|
+
if ("_footers" in rawJson && Array.isArray(rawJson._footers)) {
|
|
227
|
+
metadata.footers = rawJson._footers.map((footer) => ({
|
|
228
|
+
text: footer.text || "",
|
|
229
|
+
pages: Array.isArray(footer.pages) ? footer.pages : []
|
|
230
|
+
}));
|
|
231
|
+
hasMetadata = true;
|
|
244
232
|
}
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
==================================================
|
|
251
|
-
CRITICAL: OUTPUT STRUCTURE REQUIREMENTS
|
|
252
|
-
==================================================
|
|
253
|
-
|
|
254
|
-
YOU MUST RETURN JSON MATCHING THIS EXACT STRUCTURE:
|
|
255
|
-
|
|
256
|
-
${schemaFields}
|
|
257
|
-
|
|
258
|
-
CRITICAL FIELD NAME REQUIREMENTS:
|
|
259
|
-
\u2713 Use EXACTLY the field names shown above (character-for-character match)
|
|
260
|
-
\u2713 Preserve the exact casing (e.g., "fullName", not "full_name" or "FullName")
|
|
261
|
-
\u2713 Do NOT abbreviate field names (e.g., "dob" instead of "dateOfBirth")
|
|
262
|
-
\u2713 Do NOT invent alternative names (e.g., "directorName" instead of "fullName")
|
|
263
|
-
\u2713 Do NOT use snake_case if the schema uses camelCase
|
|
264
|
-
\u2713 Do NOT flatten nested structures or rename nested fields
|
|
265
|
-
\u2713 The schema above is the SINGLE SOURCE OF TRUTH for field naming
|
|
266
|
-
|
|
267
|
-
MISSING DATA:
|
|
268
|
-
- If a required field has no data in the document, use null
|
|
269
|
-
- If an optional field has no data, you may omit it or use null
|
|
270
|
-
- Do NOT invent data that isn't in the document
|
|
271
|
-
|
|
272
|
-
==================================================
|
|
273
|
-
`.trim();
|
|
274
|
-
}
|
|
275
|
-
function combineSchemaAndUserPrompt(schema, userPrompt) {
|
|
276
|
-
const schemaSection = buildSchemaPromptSection(schema);
|
|
277
|
-
if (!userPrompt || userPrompt.trim() === "") {
|
|
278
|
-
return schemaSection + "\n\nTASK: Extract structured data from the provided document.";
|
|
233
|
+
const cleanJson = {};
|
|
234
|
+
for (const [key, value] of Object.entries(rawJson)) {
|
|
235
|
+
if (!METADATA_FIELDS.includes(key)) {
|
|
236
|
+
cleanJson[key] = value;
|
|
237
|
+
}
|
|
279
238
|
}
|
|
280
|
-
return
|
|
239
|
+
return {
|
|
240
|
+
json: cleanJson,
|
|
241
|
+
metadata: hasMetadata ? metadata : void 0
|
|
242
|
+
};
|
|
243
|
+
}
|
|
244
|
+
function shouldExtractMetadata(derivedOptions) {
|
|
245
|
+
if (!derivedOptions) return false;
|
|
246
|
+
return !!(derivedOptions.includeConfidence || derivedOptions.includeSources || derivedOptions.includeBlockTypes || derivedOptions.extractHeaders || derivedOptions.extractFooters);
|
|
281
247
|
}
|
|
282
248
|
|
|
283
249
|
// src/provider-registry.ts
|
|
@@ -381,11 +347,16 @@ var OpenAIProvider = class {
|
|
|
381
347
|
if (mode === "strict" && !params.schema) {
|
|
382
348
|
throw new Error('schema is required when mode is "strict"');
|
|
383
349
|
}
|
|
350
|
+
const extractMetadata = shouldExtractMetadata(params.derivedOptions);
|
|
384
351
|
const shouldEmbedSchema = params.embedSchemaInPrompt !== false && params.schema;
|
|
385
352
|
let enhancedInput = params.input;
|
|
386
353
|
if (shouldEmbedSchema) {
|
|
387
354
|
const jsonSchema = this.translator.convertZodIfNeeded(params.schema);
|
|
388
|
-
const enhancedText =
|
|
355
|
+
const enhancedText = params.derivedOptions ? combineSchemaUserAndDerivedPrompts(
|
|
356
|
+
jsonSchema,
|
|
357
|
+
params.input.text || "",
|
|
358
|
+
params.derivedOptions
|
|
359
|
+
) : combineSchemaAndUserPrompt(
|
|
389
360
|
jsonSchema,
|
|
390
361
|
params.input.text || ""
|
|
391
362
|
);
|
|
@@ -393,6 +364,15 @@ var OpenAIProvider = class {
|
|
|
393
364
|
...params.input,
|
|
394
365
|
text: enhancedText
|
|
395
366
|
};
|
|
367
|
+
} else if (params.derivedOptions) {
|
|
368
|
+
const { buildLLMDerivedFeaturesPrompt: buildLLMDerivedFeaturesPrompt2 } = await import("./schema-prompt-formatter-AIORLWUF.js");
|
|
369
|
+
const derivedPrompt = buildLLMDerivedFeaturesPrompt2(params.derivedOptions);
|
|
370
|
+
if (derivedPrompt) {
|
|
371
|
+
enhancedInput = {
|
|
372
|
+
...params.input,
|
|
373
|
+
text: (params.input.text || "") + "\n\n" + derivedPrompt
|
|
374
|
+
};
|
|
375
|
+
}
|
|
396
376
|
}
|
|
397
377
|
const messages = this.buildMessages(enhancedInput);
|
|
398
378
|
const requestBody = {
|
|
@@ -473,7 +453,8 @@ var OpenAIProvider = class {
|
|
|
473
453
|
const data = await response.json();
|
|
474
454
|
const latencyMs = Date.now() - startTime;
|
|
475
455
|
const content = data.choices?.[0]?.message?.content ?? "{}";
|
|
476
|
-
const
|
|
456
|
+
const rawParsed = safeJsonParse(content);
|
|
457
|
+
const { json: parsed, metadata } = extractMetadata ? extractMetadataFromResponse(rawParsed) : { json: rawParsed, metadata: void 0 };
|
|
477
458
|
const message = data.choices?.[0]?.message;
|
|
478
459
|
const reasoning = message?.reasoning;
|
|
479
460
|
const reasoning_details = message?.reasoning_details;
|
|
@@ -498,7 +479,8 @@ var OpenAIProvider = class {
|
|
|
498
479
|
model: this.config.model
|
|
499
480
|
},
|
|
500
481
|
reasoning,
|
|
501
|
-
reasoning_details
|
|
482
|
+
reasoning_details,
|
|
483
|
+
metadata
|
|
502
484
|
};
|
|
503
485
|
}
|
|
504
486
|
buildReasoningConfig(reasoning) {
|
|
@@ -618,11 +600,16 @@ var AnthropicProvider = class {
|
|
|
618
600
|
if (mode === "strict" && !params.schema) {
|
|
619
601
|
throw new Error('schema is required when mode is "strict"');
|
|
620
602
|
}
|
|
603
|
+
const extractMetadata = shouldExtractMetadata(params.derivedOptions);
|
|
621
604
|
const shouldEmbedSchema = params.embedSchemaInPrompt !== false && params.schema;
|
|
622
605
|
let enhancedInput = params.input;
|
|
623
606
|
if (shouldEmbedSchema) {
|
|
624
607
|
const jsonSchema = this.translator.convertZodIfNeeded(params.schema);
|
|
625
|
-
const enhancedText =
|
|
608
|
+
const enhancedText = params.derivedOptions ? combineSchemaUserAndDerivedPrompts(
|
|
609
|
+
jsonSchema,
|
|
610
|
+
params.input.text || "",
|
|
611
|
+
params.derivedOptions
|
|
612
|
+
) : combineSchemaAndUserPrompt(
|
|
626
613
|
jsonSchema,
|
|
627
614
|
params.input.text || ""
|
|
628
615
|
);
|
|
@@ -630,6 +617,15 @@ var AnthropicProvider = class {
|
|
|
630
617
|
...params.input,
|
|
631
618
|
text: enhancedText
|
|
632
619
|
};
|
|
620
|
+
} else if (params.derivedOptions) {
|
|
621
|
+
const { buildLLMDerivedFeaturesPrompt: buildLLMDerivedFeaturesPrompt2 } = await import("./schema-prompt-formatter-AIORLWUF.js");
|
|
622
|
+
const derivedPrompt = buildLLMDerivedFeaturesPrompt2(params.derivedOptions);
|
|
623
|
+
if (derivedPrompt) {
|
|
624
|
+
enhancedInput = {
|
|
625
|
+
...params.input,
|
|
626
|
+
text: (params.input.text || "") + "\n\n" + derivedPrompt
|
|
627
|
+
};
|
|
628
|
+
}
|
|
633
629
|
}
|
|
634
630
|
const messages = await this.buildMessages(enhancedInput);
|
|
635
631
|
const useNewStructuredOutputs = this.supportsNewStructuredOutputs();
|
|
@@ -703,10 +699,7 @@ var AnthropicProvider = class {
|
|
|
703
699
|
}
|
|
704
700
|
const data = await response.json();
|
|
705
701
|
const message = data.choices?.[0]?.message;
|
|
706
|
-
let content = message?.content ??
|
|
707
|
-
if (!useNewStructuredOutputs2) {
|
|
708
|
-
content = "{" + content;
|
|
709
|
-
}
|
|
702
|
+
let content = message?.content ?? "{}";
|
|
710
703
|
const reasoning = message?.reasoning;
|
|
711
704
|
const reasoning_details = message?.reasoning_details;
|
|
712
705
|
content = content.replace(/^```json\s*\n?/, "").replace(/\n?```\s*$/, "").trim();
|
|
@@ -747,9 +740,10 @@ var AnthropicProvider = class {
|
|
|
747
740
|
}
|
|
748
741
|
const latencyMs = Date.now() - startTime;
|
|
749
742
|
const baseProvider = extractProviderFromModel2(this.config.model, "anthropic");
|
|
743
|
+
const { json: cleanJson, metadata } = extractMetadata ? extractMetadataFromResponse(parsed) : { json: parsed, metadata: void 0 };
|
|
750
744
|
return {
|
|
751
|
-
json:
|
|
752
|
-
rawText: JSON.stringify(
|
|
745
|
+
json: cleanJson,
|
|
746
|
+
rawText: JSON.stringify(cleanJson),
|
|
753
747
|
metrics: {
|
|
754
748
|
costUSD,
|
|
755
749
|
inputTokens,
|
|
@@ -763,7 +757,8 @@ var AnthropicProvider = class {
|
|
|
763
757
|
cacheReadInputTokens
|
|
764
758
|
},
|
|
765
759
|
reasoning,
|
|
766
|
-
reasoning_details
|
|
760
|
+
reasoning_details,
|
|
761
|
+
metadata
|
|
767
762
|
};
|
|
768
763
|
} else {
|
|
769
764
|
const endpoint = this.config.baseUrl || "https://api.anthropic.com/v1";
|
|
@@ -828,9 +823,10 @@ var AnthropicProvider = class {
|
|
|
828
823
|
const reasoning = thinkingBlock?.thinking;
|
|
829
824
|
const latencyMs = Date.now() - startTime;
|
|
830
825
|
const baseProvider = extractProviderFromModel2(this.config.model, "anthropic");
|
|
826
|
+
const { json: cleanJson, metadata } = extractMetadata ? extractMetadataFromResponse(parsed) : { json: parsed, metadata: void 0 };
|
|
831
827
|
return {
|
|
832
|
-
json:
|
|
833
|
-
rawText: JSON.stringify(
|
|
828
|
+
json: cleanJson,
|
|
829
|
+
rawText: JSON.stringify(cleanJson),
|
|
834
830
|
metrics: {
|
|
835
831
|
costUSD,
|
|
836
832
|
inputTokens,
|
|
@@ -848,7 +844,8 @@ var AnthropicProvider = class {
|
|
|
848
844
|
signature: null,
|
|
849
845
|
id: "thinking-1",
|
|
850
846
|
format: "anthropic-claude-v1"
|
|
851
|
-
}] : void 0
|
|
847
|
+
}] : void 0,
|
|
848
|
+
metadata
|
|
852
849
|
};
|
|
853
850
|
}
|
|
854
851
|
}
|
|
@@ -885,19 +882,13 @@ var AnthropicProvider = class {
|
|
|
885
882
|
requestBody.response_format = {
|
|
886
883
|
type: "json_object"
|
|
887
884
|
};
|
|
888
|
-
} else {
|
|
885
|
+
} else if (useNewStructuredOutputs) {
|
|
889
886
|
const openRouterSchema = this.translator.toClaudeOpenRouterSchema(schema);
|
|
890
887
|
const fixedSchema = this.fixSchemaForStrictMode(openRouterSchema);
|
|
891
888
|
if (process.env.DEBUG_PROVIDERS) {
|
|
892
|
-
console.log("[AnthropicProvider]
|
|
889
|
+
console.log("[AnthropicProvider] Using json_schema (native support)");
|
|
893
890
|
console.log("[AnthropicProvider] Fixed schema:", JSON.stringify(fixedSchema, null, 2));
|
|
894
891
|
}
|
|
895
|
-
if (!useNewStructuredOutputs) {
|
|
896
|
-
messageArray.push({
|
|
897
|
-
role: "assistant",
|
|
898
|
-
content: "{"
|
|
899
|
-
});
|
|
900
|
-
}
|
|
901
892
|
requestBody.response_format = {
|
|
902
893
|
type: "json_schema",
|
|
903
894
|
json_schema: {
|
|
@@ -906,6 +897,13 @@ var AnthropicProvider = class {
|
|
|
906
897
|
schema: fixedSchema
|
|
907
898
|
}
|
|
908
899
|
};
|
|
900
|
+
} else {
|
|
901
|
+
if (process.env.DEBUG_PROVIDERS) {
|
|
902
|
+
console.log("[AnthropicProvider] Using json_object (legacy mode, schema in prompt)");
|
|
903
|
+
}
|
|
904
|
+
requestBody.response_format = {
|
|
905
|
+
type: "json_object"
|
|
906
|
+
};
|
|
909
907
|
}
|
|
910
908
|
if (reasoning) {
|
|
911
909
|
requestBody.reasoning = this.buildReasoningConfig(reasoning, max_tokens);
|
|
@@ -1248,11 +1246,16 @@ var GoogleProvider = class {
|
|
|
1248
1246
|
if (mode === "strict" && !params.schema) {
|
|
1249
1247
|
throw new Error('schema is required when mode is "strict"');
|
|
1250
1248
|
}
|
|
1249
|
+
const extractMetadata = shouldExtractMetadata(params.derivedOptions);
|
|
1251
1250
|
const shouldEmbedSchema = params.embedSchemaInPrompt !== false && params.schema;
|
|
1252
1251
|
let enhancedInput = normalizedInput;
|
|
1253
1252
|
if (shouldEmbedSchema) {
|
|
1254
1253
|
const jsonSchema = this.translator.convertZodIfNeeded(params.schema);
|
|
1255
|
-
const enhancedText =
|
|
1254
|
+
const enhancedText = params.derivedOptions ? combineSchemaUserAndDerivedPrompts(
|
|
1255
|
+
jsonSchema,
|
|
1256
|
+
normalizedInput.text || "",
|
|
1257
|
+
params.derivedOptions
|
|
1258
|
+
) : combineSchemaAndUserPrompt(
|
|
1256
1259
|
jsonSchema,
|
|
1257
1260
|
normalizedInput.text || ""
|
|
1258
1261
|
);
|
|
@@ -1260,6 +1263,15 @@ var GoogleProvider = class {
|
|
|
1260
1263
|
...normalizedInput,
|
|
1261
1264
|
text: enhancedText
|
|
1262
1265
|
};
|
|
1266
|
+
} else if (params.derivedOptions) {
|
|
1267
|
+
const { buildLLMDerivedFeaturesPrompt: buildLLMDerivedFeaturesPrompt2 } = await import("./schema-prompt-formatter-AIORLWUF.js");
|
|
1268
|
+
const derivedPrompt = buildLLMDerivedFeaturesPrompt2(params.derivedOptions);
|
|
1269
|
+
if (derivedPrompt) {
|
|
1270
|
+
enhancedInput = {
|
|
1271
|
+
...normalizedInput,
|
|
1272
|
+
text: (normalizedInput.text || "") + "\n\n" + derivedPrompt
|
|
1273
|
+
};
|
|
1274
|
+
}
|
|
1263
1275
|
}
|
|
1264
1276
|
const contents = await this.buildContents(enhancedInput);
|
|
1265
1277
|
const requestBody = {
|
|
@@ -1328,7 +1340,8 @@ var GoogleProvider = class {
|
|
|
1328
1340
|
const reasoning = message?.reasoning;
|
|
1329
1341
|
const reasoning_details = message?.reasoning_details;
|
|
1330
1342
|
content = content.replace(/^```json\s*\n?/, "").replace(/\n?```\s*$/, "").trim();
|
|
1331
|
-
const
|
|
1343
|
+
const rawParsed = safeJsonParse3(content);
|
|
1344
|
+
const { json: parsed, metadata } = extractMetadata ? extractMetadataFromResponse(rawParsed) : { json: rawParsed, metadata: void 0 };
|
|
1332
1345
|
const baseProvider = extractProviderFromModel3(this.config.model, "google");
|
|
1333
1346
|
return {
|
|
1334
1347
|
json: parsed,
|
|
@@ -1344,7 +1357,8 @@ var GoogleProvider = class {
|
|
|
1344
1357
|
model: this.config.model
|
|
1345
1358
|
},
|
|
1346
1359
|
reasoning,
|
|
1347
|
-
reasoning_details
|
|
1360
|
+
reasoning_details,
|
|
1361
|
+
metadata
|
|
1348
1362
|
};
|
|
1349
1363
|
} else {
|
|
1350
1364
|
const candidate = data.candidates?.[0];
|
|
@@ -1354,7 +1368,8 @@ var GoogleProvider = class {
|
|
|
1354
1368
|
costUSD = this.calculateCost(data.usageMetadata);
|
|
1355
1369
|
const thinkingPart = candidate?.content?.parts?.find((part) => part.thought === true);
|
|
1356
1370
|
const reasoning = thinkingPart?.text;
|
|
1357
|
-
const
|
|
1371
|
+
const rawParsed = safeJsonParse3(content);
|
|
1372
|
+
const { json: parsed, metadata } = extractMetadata ? extractMetadataFromResponse(rawParsed) : { json: rawParsed, metadata: void 0 };
|
|
1358
1373
|
const baseProvider = extractProviderFromModel3(this.config.model, "google");
|
|
1359
1374
|
return {
|
|
1360
1375
|
json: parsed,
|
|
@@ -1376,7 +1391,8 @@ var GoogleProvider = class {
|
|
|
1376
1391
|
signature: null,
|
|
1377
1392
|
id: "thinking-1",
|
|
1378
1393
|
format: "google-gemini-v1"
|
|
1379
|
-
}] : void 0
|
|
1394
|
+
}] : void 0,
|
|
1395
|
+
metadata
|
|
1380
1396
|
};
|
|
1381
1397
|
}
|
|
1382
1398
|
}
|
|
@@ -1584,11 +1600,16 @@ var XAIProvider = class {
|
|
|
1584
1600
|
if (mode === "strict" && !params.schema) {
|
|
1585
1601
|
throw new Error('schema is required when mode is "strict"');
|
|
1586
1602
|
}
|
|
1603
|
+
const extractMetadata = shouldExtractMetadata(params.derivedOptions);
|
|
1587
1604
|
const shouldEmbedSchema = params.embedSchemaInPrompt !== false && params.schema;
|
|
1588
1605
|
let enhancedInput = params.input;
|
|
1589
1606
|
if (shouldEmbedSchema) {
|
|
1590
1607
|
const jsonSchema = this.translator.convertZodIfNeeded(params.schema);
|
|
1591
|
-
const enhancedText =
|
|
1608
|
+
const enhancedText = params.derivedOptions ? combineSchemaUserAndDerivedPrompts(
|
|
1609
|
+
jsonSchema,
|
|
1610
|
+
params.input.text || "",
|
|
1611
|
+
params.derivedOptions
|
|
1612
|
+
) : combineSchemaAndUserPrompt(
|
|
1592
1613
|
jsonSchema,
|
|
1593
1614
|
params.input.text || ""
|
|
1594
1615
|
);
|
|
@@ -1596,6 +1617,15 @@ var XAIProvider = class {
|
|
|
1596
1617
|
...params.input,
|
|
1597
1618
|
text: enhancedText
|
|
1598
1619
|
};
|
|
1620
|
+
} else if (params.derivedOptions) {
|
|
1621
|
+
const { buildLLMDerivedFeaturesPrompt: buildLLMDerivedFeaturesPrompt2 } = await import("./schema-prompt-formatter-AIORLWUF.js");
|
|
1622
|
+
const derivedPrompt = buildLLMDerivedFeaturesPrompt2(params.derivedOptions);
|
|
1623
|
+
if (derivedPrompt) {
|
|
1624
|
+
enhancedInput = {
|
|
1625
|
+
...params.input,
|
|
1626
|
+
text: (params.input.text || "") + "\n\n" + derivedPrompt
|
|
1627
|
+
};
|
|
1628
|
+
}
|
|
1599
1629
|
}
|
|
1600
1630
|
const messages = await this.buildMessages(enhancedInput);
|
|
1601
1631
|
const requestBody = {
|
|
@@ -1676,7 +1706,8 @@ var XAIProvider = class {
|
|
|
1676
1706
|
const latencyMs = Date.now() - startTime;
|
|
1677
1707
|
const message = data.choices?.[0]?.message;
|
|
1678
1708
|
const content = message?.content ?? "{}";
|
|
1679
|
-
const
|
|
1709
|
+
const rawParsed = safeJsonParse4(content);
|
|
1710
|
+
const { json: parsed, metadata } = extractMetadata ? extractMetadataFromResponse(rawParsed) : { json: rawParsed, metadata: void 0 };
|
|
1680
1711
|
const reasoning = message?.reasoning;
|
|
1681
1712
|
const reasoning_details = message?.reasoning_details;
|
|
1682
1713
|
let costUSD;
|
|
@@ -1700,7 +1731,8 @@ var XAIProvider = class {
|
|
|
1700
1731
|
model: this.config.model
|
|
1701
1732
|
},
|
|
1702
1733
|
reasoning,
|
|
1703
|
-
reasoning_details
|
|
1734
|
+
reasoning_details,
|
|
1735
|
+
metadata
|
|
1704
1736
|
};
|
|
1705
1737
|
}
|
|
1706
1738
|
buildReasoningConfig(reasoning) {
|
|
@@ -2032,6 +2064,115 @@ function adaptToCoreLLMProvider(provider) {
|
|
|
2032
2064
|
};
|
|
2033
2065
|
}
|
|
2034
2066
|
|
|
2067
|
+
// src/gemini-bbox-schema.ts
|
|
2068
|
+
var BLOCK_TYPES = [
|
|
2069
|
+
"title",
|
|
2070
|
+
// Main document title or section headers
|
|
2071
|
+
"paragraph",
|
|
2072
|
+
// Body text paragraphs
|
|
2073
|
+
"table",
|
|
2074
|
+
// Tabular data
|
|
2075
|
+
"list",
|
|
2076
|
+
// Bulleted or numbered lists
|
|
2077
|
+
"header",
|
|
2078
|
+
// Page headers (repeated at top of pages)
|
|
2079
|
+
"footer",
|
|
2080
|
+
// Page footers (repeated at bottom of pages)
|
|
2081
|
+
"caption",
|
|
2082
|
+
// Image or figure captions
|
|
2083
|
+
"code",
|
|
2084
|
+
// Code blocks or preformatted text
|
|
2085
|
+
"image",
|
|
2086
|
+
// Image/figure placeholder
|
|
2087
|
+
"form",
|
|
2088
|
+
// Form fields
|
|
2089
|
+
"signature",
|
|
2090
|
+
// Signatures
|
|
2091
|
+
"handwriting"
|
|
2092
|
+
// Handwritten text
|
|
2093
|
+
];
|
|
2094
|
+
var geminiBoundingBoxSchema = {
|
|
2095
|
+
type: "array",
|
|
2096
|
+
items: {
|
|
2097
|
+
type: "object",
|
|
2098
|
+
properties: {
|
|
2099
|
+
box_2d: {
|
|
2100
|
+
type: "array",
|
|
2101
|
+
items: { type: "number" },
|
|
2102
|
+
minItems: 4,
|
|
2103
|
+
maxItems: 4,
|
|
2104
|
+
description: "Bounding box coordinates [y_min, x_min, y_max, x_max] normalized 0-1000"
|
|
2105
|
+
},
|
|
2106
|
+
text: {
|
|
2107
|
+
type: "string",
|
|
2108
|
+
description: "Text content within the bounding box"
|
|
2109
|
+
},
|
|
2110
|
+
type: {
|
|
2111
|
+
type: "string",
|
|
2112
|
+
enum: [...BLOCK_TYPES],
|
|
2113
|
+
description: "Block type classification"
|
|
2114
|
+
},
|
|
2115
|
+
confidence: {
|
|
2116
|
+
type: "string",
|
|
2117
|
+
enum: ["high", "medium", "low"],
|
|
2118
|
+
nullable: true,
|
|
2119
|
+
description: "Confidence level of extraction"
|
|
2120
|
+
},
|
|
2121
|
+
page: {
|
|
2122
|
+
type: "integer",
|
|
2123
|
+
nullable: true,
|
|
2124
|
+
description: "Page number (0-indexed)"
|
|
2125
|
+
}
|
|
2126
|
+
},
|
|
2127
|
+
required: ["box_2d", "text", "type"],
|
|
2128
|
+
additionalProperties: false
|
|
2129
|
+
}
|
|
2130
|
+
};
|
|
2131
|
+
var GEMINI_BBOX_EXTRACTION_PROMPT = `Analyze this document and extract all text with precise bounding box locations.
|
|
2132
|
+
|
|
2133
|
+
For each text block, provide:
|
|
2134
|
+
- box_2d: Bounding box as [y_min, x_min, y_max, x_max] normalized to 0-1000
|
|
2135
|
+
- text: The exact text content
|
|
2136
|
+
- type: Block classification (title, paragraph, table, list, header, footer, caption, code, image, form, signature, handwriting)
|
|
2137
|
+
- confidence: Your confidence level (high, medium, low)
|
|
2138
|
+
- page: Page number (0-indexed) for multi-page documents
|
|
2139
|
+
|
|
2140
|
+
IMPORTANT coordinate format:
|
|
2141
|
+
- Use [y_min, x_min, y_max, x_max] order (Y coordinate first, then X)
|
|
2142
|
+
- Normalize all values to 0-1000 range (top-left is [0, 0], bottom-right is [1000, 1000])
|
|
2143
|
+
|
|
2144
|
+
Return ONLY a valid JSON array, no other text.`;
|
|
2145
|
+
function normalizeGeminiBBox(geminiBBox) {
|
|
2146
|
+
const [yMin, xMin, yMax, xMax] = geminiBBox;
|
|
2147
|
+
return {
|
|
2148
|
+
x: xMin / 1e3,
|
|
2149
|
+
y: yMin / 1e3,
|
|
2150
|
+
width: (xMax - xMin) / 1e3,
|
|
2151
|
+
height: (yMax - yMin) / 1e3
|
|
2152
|
+
};
|
|
2153
|
+
}
|
|
2154
|
+
function toGeminiBBox(bbox) {
|
|
2155
|
+
return [
|
|
2156
|
+
Math.round(bbox.y * 1e3),
|
|
2157
|
+
// y_min
|
|
2158
|
+
Math.round(bbox.x * 1e3),
|
|
2159
|
+
// x_min
|
|
2160
|
+
Math.round((bbox.y + bbox.height) * 1e3),
|
|
2161
|
+
// y_max
|
|
2162
|
+
Math.round((bbox.x + bbox.width) * 1e3)
|
|
2163
|
+
// x_max
|
|
2164
|
+
];
|
|
2165
|
+
}
|
|
2166
|
+
function convertGeminiBlocksToDocumentBlocks(geminiBlocks) {
|
|
2167
|
+
return geminiBlocks.map((block) => ({
|
|
2168
|
+
text: block.text,
|
|
2169
|
+
bbox: normalizeGeminiBBox(block.box_2d),
|
|
2170
|
+
type: block.type,
|
|
2171
|
+
confidence: block.confidence === "high" ? 0.9 : block.confidence === "medium" ? 0.7 : block.confidence === "low" ? 0.5 : void 0,
|
|
2172
|
+
page: block.page
|
|
2173
|
+
}));
|
|
2174
|
+
}
|
|
2175
|
+
|
|
2035
2176
|
// src/metadata.ts
|
|
2036
2177
|
var SUPPORTED_IMAGE_TYPES = {
|
|
2037
2178
|
COMMON: ["image/png", "image/jpeg", "image/webp", "image/gif"],
|
|
@@ -2703,7 +2844,9 @@ function buildLLMProvider(config) {
|
|
|
2703
2844
|
}
|
|
2704
2845
|
export {
|
|
2705
2846
|
AnthropicProvider,
|
|
2847
|
+
BLOCK_TYPES,
|
|
2706
2848
|
FallbackManager,
|
|
2849
|
+
GEMINI_BBOX_EXTRACTION_PROMPT,
|
|
2707
2850
|
GoogleProvider,
|
|
2708
2851
|
OpenAIProvider,
|
|
2709
2852
|
PROVIDER_METADATA,
|
|
@@ -2711,20 +2854,33 @@ export {
|
|
|
2711
2854
|
SchemaTranslator,
|
|
2712
2855
|
XAIProvider,
|
|
2713
2856
|
adaptToCoreLLMProvider,
|
|
2857
|
+
buildBlockClassificationPrompt,
|
|
2858
|
+
buildConfidencePrompt,
|
|
2859
|
+
buildLLMDerivedFeaturesPrompt,
|
|
2714
2860
|
buildLLMProvider,
|
|
2861
|
+
buildLanguageHintsPrompt,
|
|
2862
|
+
buildOutputFormatPrompt,
|
|
2715
2863
|
buildSchemaPromptSection,
|
|
2864
|
+
buildSourcesPrompt,
|
|
2716
2865
|
combineSchemaAndUserPrompt,
|
|
2866
|
+
combineSchemaUserAndDerivedPrompts,
|
|
2717
2867
|
compareNativeVsOpenRouter,
|
|
2868
|
+
convertGeminiBlocksToDocumentBlocks,
|
|
2718
2869
|
createProviderFromRegistry,
|
|
2719
2870
|
createVLMProvider,
|
|
2720
2871
|
estimateCost,
|
|
2872
|
+
extractMetadataFromResponse,
|
|
2721
2873
|
formatSchemaForPrompt,
|
|
2874
|
+
geminiBoundingBoxSchema,
|
|
2722
2875
|
getCheapestProvider,
|
|
2723
2876
|
getProvidersForNode,
|
|
2724
2877
|
isImageTypeSupported,
|
|
2725
2878
|
isProviderCompatibleWithNode,
|
|
2879
|
+
normalizeGeminiBBox,
|
|
2726
2880
|
providerRegistry,
|
|
2727
2881
|
registerProvider,
|
|
2728
|
-
|
|
2882
|
+
shouldExtractMetadata,
|
|
2883
|
+
supportsPDFsInline,
|
|
2884
|
+
toGeminiBBox
|
|
2729
2885
|
};
|
|
2730
2886
|
//# sourceMappingURL=index.js.map
|