@doclo/providers-llm 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-7YPJIWRM.js +291 -0
- package/dist/chunk-7YPJIWRM.js.map +1 -0
- package/dist/index.d.ts +275 -4
- package/dist/index.js +317 -146
- package/dist/index.js.map +1 -1
- package/dist/schema-prompt-formatter-AIORLWUF.js +29 -0
- package/dist/schema-prompt-formatter-AIORLWUF.js.map +1 -0
- package/package.json +2 -2
package/dist/index.js
CHANGED
|
@@ -1,3 +1,16 @@
|
|
|
1
|
+
import {
|
|
2
|
+
buildBlockClassificationPrompt,
|
|
3
|
+
buildConfidencePrompt,
|
|
4
|
+
buildLLMDerivedFeaturesPrompt,
|
|
5
|
+
buildLanguageHintsPrompt,
|
|
6
|
+
buildOutputFormatPrompt,
|
|
7
|
+
buildSchemaPromptSection,
|
|
8
|
+
buildSourcesPrompt,
|
|
9
|
+
combineSchemaAndUserPrompt,
|
|
10
|
+
combineSchemaUserAndDerivedPrompts,
|
|
11
|
+
formatSchemaForPrompt
|
|
12
|
+
} from "./chunk-7YPJIWRM.js";
|
|
13
|
+
|
|
1
14
|
// src/schema-translator.ts
|
|
2
15
|
import { zodToJsonSchema } from "@alcyone-labs/zod-to-json-schema";
|
|
3
16
|
var SchemaTranslator = class {
|
|
@@ -165,119 +178,72 @@ var SchemaTranslator = class {
|
|
|
165
178
|
}
|
|
166
179
|
};
|
|
167
180
|
|
|
168
|
-
// src/
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
${indentStr} ${fieldSchema.description}`;
|
|
189
|
-
}
|
|
190
|
-
if (fieldSchema.enum) {
|
|
191
|
-
result += `
|
|
192
|
-
${indentStr} Allowed values: ${fieldSchema.enum.map((v) => JSON.stringify(v)).join(", ")}`;
|
|
193
|
-
}
|
|
194
|
-
result += "\n";
|
|
195
|
-
if (fieldSchema.type === "object" && fieldSchema.properties) {
|
|
196
|
-
result += formatSchemaForPrompt(fieldSchema, indent + 1);
|
|
197
|
-
}
|
|
198
|
-
if (fieldSchema.type === "array" && fieldSchema.items) {
|
|
199
|
-
result += `${indentStr} Array items:
|
|
200
|
-
`;
|
|
201
|
-
const itemSchema = Array.isArray(fieldSchema.items) ? fieldSchema.items[0] : fieldSchema.items;
|
|
202
|
-
if (itemSchema && itemSchema.type === "object" && itemSchema.properties) {
|
|
203
|
-
result += formatSchemaForPrompt(itemSchema, indent + 2);
|
|
204
|
-
} else if (itemSchema) {
|
|
205
|
-
const itemType = getTypeDescription(itemSchema);
|
|
206
|
-
result += `${indentStr} ${itemType}
|
|
207
|
-
`;
|
|
208
|
-
}
|
|
209
|
-
}
|
|
181
|
+
// src/metadata-extractor.ts
|
|
182
|
+
var METADATA_FIELDS = [
|
|
183
|
+
"_confidence",
|
|
184
|
+
"_sources",
|
|
185
|
+
"_blockTypes",
|
|
186
|
+
"_headers",
|
|
187
|
+
"_footers"
|
|
188
|
+
];
|
|
189
|
+
function extractMetadataFromResponse(json) {
|
|
190
|
+
if (!json || typeof json !== "object") {
|
|
191
|
+
return { json };
|
|
192
|
+
}
|
|
193
|
+
const rawJson = json;
|
|
194
|
+
const metadata = {};
|
|
195
|
+
let hasMetadata = false;
|
|
196
|
+
if ("_confidence" in rawJson && rawJson._confidence) {
|
|
197
|
+
const confidence = rawJson._confidence;
|
|
198
|
+
if (typeof confidence === "object" && !Array.isArray(confidence)) {
|
|
199
|
+
metadata.confidence = confidence;
|
|
200
|
+
hasMetadata = true;
|
|
210
201
|
}
|
|
211
202
|
}
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
const formatHints = {
|
|
227
|
-
"date": "YYYY-MM-DD",
|
|
228
|
-
"time": "HH:MM or HH:MM:SS",
|
|
229
|
-
"date-time": "YYYY-MM-DDTHH:MM:SS (ISO 8601)"
|
|
230
|
-
};
|
|
231
|
-
const hint = formatHints[schema.format];
|
|
232
|
-
if (hint) {
|
|
233
|
-
return `string (format: ${schema.format}, use ${hint})`;
|
|
234
|
-
}
|
|
235
|
-
return `string (format: ${schema.format})`;
|
|
203
|
+
if ("_sources" in rawJson && Array.isArray(rawJson._sources)) {
|
|
204
|
+
metadata.sources = rawJson._sources.map((source) => ({
|
|
205
|
+
field: source.field || source.fieldPath || "",
|
|
206
|
+
text: source.text || source.sourceText || "",
|
|
207
|
+
bbox: source.bbox || source.box_2d,
|
|
208
|
+
page: source.page
|
|
209
|
+
}));
|
|
210
|
+
hasMetadata = true;
|
|
211
|
+
}
|
|
212
|
+
if ("_blockTypes" in rawJson && rawJson._blockTypes) {
|
|
213
|
+
const blockTypes = rawJson._blockTypes;
|
|
214
|
+
if (typeof blockTypes === "object" && !Array.isArray(blockTypes)) {
|
|
215
|
+
metadata.blockTypes = blockTypes;
|
|
216
|
+
hasMetadata = true;
|
|
236
217
|
}
|
|
237
|
-
return typeStr;
|
|
238
218
|
}
|
|
239
|
-
if (
|
|
240
|
-
|
|
219
|
+
if ("_headers" in rawJson && Array.isArray(rawJson._headers)) {
|
|
220
|
+
metadata.headers = rawJson._headers.map((header) => ({
|
|
221
|
+
text: header.text || "",
|
|
222
|
+
pages: Array.isArray(header.pages) ? header.pages : []
|
|
223
|
+
}));
|
|
224
|
+
hasMetadata = true;
|
|
241
225
|
}
|
|
242
|
-
if (
|
|
243
|
-
|
|
226
|
+
if ("_footers" in rawJson && Array.isArray(rawJson._footers)) {
|
|
227
|
+
metadata.footers = rawJson._footers.map((footer) => ({
|
|
228
|
+
text: footer.text || "",
|
|
229
|
+
pages: Array.isArray(footer.pages) ? footer.pages : []
|
|
230
|
+
}));
|
|
231
|
+
hasMetadata = true;
|
|
244
232
|
}
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
==================================================
|
|
251
|
-
CRITICAL: OUTPUT STRUCTURE REQUIREMENTS
|
|
252
|
-
==================================================
|
|
253
|
-
|
|
254
|
-
YOU MUST RETURN JSON MATCHING THIS EXACT STRUCTURE:
|
|
255
|
-
|
|
256
|
-
${schemaFields}
|
|
257
|
-
|
|
258
|
-
CRITICAL FIELD NAME REQUIREMENTS:
|
|
259
|
-
\u2713 Use EXACTLY the field names shown above (character-for-character match)
|
|
260
|
-
\u2713 Preserve the exact casing (e.g., "fullName", not "full_name" or "FullName")
|
|
261
|
-
\u2713 Do NOT abbreviate field names (e.g., "dob" instead of "dateOfBirth")
|
|
262
|
-
\u2713 Do NOT invent alternative names (e.g., "directorName" instead of "fullName")
|
|
263
|
-
\u2713 Do NOT use snake_case if the schema uses camelCase
|
|
264
|
-
\u2713 Do NOT flatten nested structures or rename nested fields
|
|
265
|
-
\u2713 The schema above is the SINGLE SOURCE OF TRUTH for field naming
|
|
266
|
-
|
|
267
|
-
MISSING DATA:
|
|
268
|
-
- If a required field has no data in the document, use null
|
|
269
|
-
- If an optional field has no data, you may omit it or use null
|
|
270
|
-
- Do NOT invent data that isn't in the document
|
|
271
|
-
|
|
272
|
-
==================================================
|
|
273
|
-
`.trim();
|
|
274
|
-
}
|
|
275
|
-
function combineSchemaAndUserPrompt(schema, userPrompt) {
|
|
276
|
-
const schemaSection = buildSchemaPromptSection(schema);
|
|
277
|
-
if (!userPrompt || userPrompt.trim() === "") {
|
|
278
|
-
return schemaSection + "\n\nTASK: Extract structured data from the provided document.";
|
|
233
|
+
const cleanJson = {};
|
|
234
|
+
for (const [key, value] of Object.entries(rawJson)) {
|
|
235
|
+
if (!METADATA_FIELDS.includes(key)) {
|
|
236
|
+
cleanJson[key] = value;
|
|
237
|
+
}
|
|
279
238
|
}
|
|
280
|
-
return
|
|
239
|
+
return {
|
|
240
|
+
json: cleanJson,
|
|
241
|
+
metadata: hasMetadata ? metadata : void 0
|
|
242
|
+
};
|
|
243
|
+
}
|
|
244
|
+
function shouldExtractMetadata(derivedOptions) {
|
|
245
|
+
if (!derivedOptions) return false;
|
|
246
|
+
return !!(derivedOptions.includeConfidence || derivedOptions.includeSources || derivedOptions.includeBlockTypes || derivedOptions.extractHeaders || derivedOptions.extractFooters);
|
|
281
247
|
}
|
|
282
248
|
|
|
283
249
|
// src/provider-registry.ts
|
|
@@ -377,22 +343,41 @@ var OpenAIProvider = class {
|
|
|
377
343
|
}
|
|
378
344
|
async completeJson(params) {
|
|
379
345
|
const startTime = Date.now();
|
|
346
|
+
const rawInput = params.input ?? params.prompt;
|
|
347
|
+
if (!rawInput) {
|
|
348
|
+
throw new Error("Either input or prompt must be provided");
|
|
349
|
+
}
|
|
350
|
+
const normalizedInput = typeof rawInput === "string" ? { text: rawInput } : rawInput;
|
|
380
351
|
const mode = params.mode || (params.schema ? "strict" : "relaxed");
|
|
381
352
|
if (mode === "strict" && !params.schema) {
|
|
382
353
|
throw new Error('schema is required when mode is "strict"');
|
|
383
354
|
}
|
|
355
|
+
const extractMetadata = shouldExtractMetadata(params.derivedOptions);
|
|
384
356
|
const shouldEmbedSchema = params.embedSchemaInPrompt !== false && params.schema;
|
|
385
|
-
let enhancedInput =
|
|
357
|
+
let enhancedInput = normalizedInput;
|
|
386
358
|
if (shouldEmbedSchema) {
|
|
387
359
|
const jsonSchema = this.translator.convertZodIfNeeded(params.schema);
|
|
388
|
-
const enhancedText =
|
|
360
|
+
const enhancedText = params.derivedOptions ? combineSchemaUserAndDerivedPrompts(
|
|
389
361
|
jsonSchema,
|
|
390
|
-
|
|
362
|
+
normalizedInput.text || "",
|
|
363
|
+
params.derivedOptions
|
|
364
|
+
) : combineSchemaAndUserPrompt(
|
|
365
|
+
jsonSchema,
|
|
366
|
+
normalizedInput.text || ""
|
|
391
367
|
);
|
|
392
368
|
enhancedInput = {
|
|
393
|
-
...
|
|
369
|
+
...normalizedInput,
|
|
394
370
|
text: enhancedText
|
|
395
371
|
};
|
|
372
|
+
} else if (params.derivedOptions) {
|
|
373
|
+
const { buildLLMDerivedFeaturesPrompt: buildLLMDerivedFeaturesPrompt2 } = await import("./schema-prompt-formatter-AIORLWUF.js");
|
|
374
|
+
const derivedPrompt = buildLLMDerivedFeaturesPrompt2(params.derivedOptions);
|
|
375
|
+
if (derivedPrompt) {
|
|
376
|
+
enhancedInput = {
|
|
377
|
+
...normalizedInput,
|
|
378
|
+
text: (normalizedInput.text || "") + "\n\n" + derivedPrompt
|
|
379
|
+
};
|
|
380
|
+
}
|
|
396
381
|
}
|
|
397
382
|
const messages = this.buildMessages(enhancedInput);
|
|
398
383
|
const requestBody = {
|
|
@@ -473,7 +458,8 @@ var OpenAIProvider = class {
|
|
|
473
458
|
const data = await response.json();
|
|
474
459
|
const latencyMs = Date.now() - startTime;
|
|
475
460
|
const content = data.choices?.[0]?.message?.content ?? "{}";
|
|
476
|
-
const
|
|
461
|
+
const rawParsed = safeJsonParse(content);
|
|
462
|
+
const { json: parsed, metadata } = extractMetadata ? extractMetadataFromResponse(rawParsed) : { json: rawParsed, metadata: void 0 };
|
|
477
463
|
const message = data.choices?.[0]?.message;
|
|
478
464
|
const reasoning = message?.reasoning;
|
|
479
465
|
const reasoning_details = message?.reasoning_details;
|
|
@@ -498,7 +484,8 @@ var OpenAIProvider = class {
|
|
|
498
484
|
model: this.config.model
|
|
499
485
|
},
|
|
500
486
|
reasoning,
|
|
501
|
-
reasoning_details
|
|
487
|
+
reasoning_details,
|
|
488
|
+
metadata
|
|
502
489
|
};
|
|
503
490
|
}
|
|
504
491
|
buildReasoningConfig(reasoning) {
|
|
@@ -614,22 +601,41 @@ var AnthropicProvider = class {
|
|
|
614
601
|
}
|
|
615
602
|
async completeJson(params) {
|
|
616
603
|
const startTime = Date.now();
|
|
604
|
+
const rawInput = params.input ?? params.prompt;
|
|
605
|
+
if (!rawInput) {
|
|
606
|
+
throw new Error("Either input or prompt must be provided");
|
|
607
|
+
}
|
|
608
|
+
const normalizedInput = typeof rawInput === "string" ? { text: rawInput } : rawInput;
|
|
617
609
|
const mode = params.mode || (params.schema ? "strict" : "relaxed");
|
|
618
610
|
if (mode === "strict" && !params.schema) {
|
|
619
611
|
throw new Error('schema is required when mode is "strict"');
|
|
620
612
|
}
|
|
613
|
+
const extractMetadata = shouldExtractMetadata(params.derivedOptions);
|
|
621
614
|
const shouldEmbedSchema = params.embedSchemaInPrompt !== false && params.schema;
|
|
622
|
-
let enhancedInput =
|
|
615
|
+
let enhancedInput = normalizedInput;
|
|
623
616
|
if (shouldEmbedSchema) {
|
|
624
617
|
const jsonSchema = this.translator.convertZodIfNeeded(params.schema);
|
|
625
|
-
const enhancedText =
|
|
618
|
+
const enhancedText = params.derivedOptions ? combineSchemaUserAndDerivedPrompts(
|
|
626
619
|
jsonSchema,
|
|
627
|
-
|
|
620
|
+
normalizedInput.text || "",
|
|
621
|
+
params.derivedOptions
|
|
622
|
+
) : combineSchemaAndUserPrompt(
|
|
623
|
+
jsonSchema,
|
|
624
|
+
normalizedInput.text || ""
|
|
628
625
|
);
|
|
629
626
|
enhancedInput = {
|
|
630
|
-
...
|
|
627
|
+
...normalizedInput,
|
|
631
628
|
text: enhancedText
|
|
632
629
|
};
|
|
630
|
+
} else if (params.derivedOptions) {
|
|
631
|
+
const { buildLLMDerivedFeaturesPrompt: buildLLMDerivedFeaturesPrompt2 } = await import("./schema-prompt-formatter-AIORLWUF.js");
|
|
632
|
+
const derivedPrompt = buildLLMDerivedFeaturesPrompt2(params.derivedOptions);
|
|
633
|
+
if (derivedPrompt) {
|
|
634
|
+
enhancedInput = {
|
|
635
|
+
...normalizedInput,
|
|
636
|
+
text: (normalizedInput.text || "") + "\n\n" + derivedPrompt
|
|
637
|
+
};
|
|
638
|
+
}
|
|
633
639
|
}
|
|
634
640
|
const messages = await this.buildMessages(enhancedInput);
|
|
635
641
|
const useNewStructuredOutputs = this.supportsNewStructuredOutputs();
|
|
@@ -703,10 +709,7 @@ var AnthropicProvider = class {
|
|
|
703
709
|
}
|
|
704
710
|
const data = await response.json();
|
|
705
711
|
const message = data.choices?.[0]?.message;
|
|
706
|
-
let content = message?.content ??
|
|
707
|
-
if (!useNewStructuredOutputs2) {
|
|
708
|
-
content = "{" + content;
|
|
709
|
-
}
|
|
712
|
+
let content = message?.content ?? "{}";
|
|
710
713
|
const reasoning = message?.reasoning;
|
|
711
714
|
const reasoning_details = message?.reasoning_details;
|
|
712
715
|
content = content.replace(/^```json\s*\n?/, "").replace(/\n?```\s*$/, "").trim();
|
|
@@ -747,9 +750,10 @@ var AnthropicProvider = class {
|
|
|
747
750
|
}
|
|
748
751
|
const latencyMs = Date.now() - startTime;
|
|
749
752
|
const baseProvider = extractProviderFromModel2(this.config.model, "anthropic");
|
|
753
|
+
const { json: cleanJson, metadata } = extractMetadata ? extractMetadataFromResponse(parsed) : { json: parsed, metadata: void 0 };
|
|
750
754
|
return {
|
|
751
|
-
json:
|
|
752
|
-
rawText: JSON.stringify(
|
|
755
|
+
json: cleanJson,
|
|
756
|
+
rawText: JSON.stringify(cleanJson),
|
|
753
757
|
metrics: {
|
|
754
758
|
costUSD,
|
|
755
759
|
inputTokens,
|
|
@@ -763,7 +767,8 @@ var AnthropicProvider = class {
|
|
|
763
767
|
cacheReadInputTokens
|
|
764
768
|
},
|
|
765
769
|
reasoning,
|
|
766
|
-
reasoning_details
|
|
770
|
+
reasoning_details,
|
|
771
|
+
metadata
|
|
767
772
|
};
|
|
768
773
|
} else {
|
|
769
774
|
const endpoint = this.config.baseUrl || "https://api.anthropic.com/v1";
|
|
@@ -828,9 +833,10 @@ var AnthropicProvider = class {
|
|
|
828
833
|
const reasoning = thinkingBlock?.thinking;
|
|
829
834
|
const latencyMs = Date.now() - startTime;
|
|
830
835
|
const baseProvider = extractProviderFromModel2(this.config.model, "anthropic");
|
|
836
|
+
const { json: cleanJson, metadata } = extractMetadata ? extractMetadataFromResponse(parsed) : { json: parsed, metadata: void 0 };
|
|
831
837
|
return {
|
|
832
|
-
json:
|
|
833
|
-
rawText: JSON.stringify(
|
|
838
|
+
json: cleanJson,
|
|
839
|
+
rawText: JSON.stringify(cleanJson),
|
|
834
840
|
metrics: {
|
|
835
841
|
costUSD,
|
|
836
842
|
inputTokens,
|
|
@@ -848,7 +854,8 @@ var AnthropicProvider = class {
|
|
|
848
854
|
signature: null,
|
|
849
855
|
id: "thinking-1",
|
|
850
856
|
format: "anthropic-claude-v1"
|
|
851
|
-
}] : void 0
|
|
857
|
+
}] : void 0,
|
|
858
|
+
metadata
|
|
852
859
|
};
|
|
853
860
|
}
|
|
854
861
|
}
|
|
@@ -885,19 +892,13 @@ var AnthropicProvider = class {
|
|
|
885
892
|
requestBody.response_format = {
|
|
886
893
|
type: "json_object"
|
|
887
894
|
};
|
|
888
|
-
} else {
|
|
895
|
+
} else if (useNewStructuredOutputs) {
|
|
889
896
|
const openRouterSchema = this.translator.toClaudeOpenRouterSchema(schema);
|
|
890
897
|
const fixedSchema = this.fixSchemaForStrictMode(openRouterSchema);
|
|
891
898
|
if (process.env.DEBUG_PROVIDERS) {
|
|
892
|
-
console.log("[AnthropicProvider]
|
|
899
|
+
console.log("[AnthropicProvider] Using json_schema (native support)");
|
|
893
900
|
console.log("[AnthropicProvider] Fixed schema:", JSON.stringify(fixedSchema, null, 2));
|
|
894
901
|
}
|
|
895
|
-
if (!useNewStructuredOutputs) {
|
|
896
|
-
messageArray.push({
|
|
897
|
-
role: "assistant",
|
|
898
|
-
content: "{"
|
|
899
|
-
});
|
|
900
|
-
}
|
|
901
902
|
requestBody.response_format = {
|
|
902
903
|
type: "json_schema",
|
|
903
904
|
json_schema: {
|
|
@@ -906,6 +907,13 @@ var AnthropicProvider = class {
|
|
|
906
907
|
schema: fixedSchema
|
|
907
908
|
}
|
|
908
909
|
};
|
|
910
|
+
} else {
|
|
911
|
+
if (process.env.DEBUG_PROVIDERS) {
|
|
912
|
+
console.log("[AnthropicProvider] Using json_object (legacy mode, schema in prompt)");
|
|
913
|
+
}
|
|
914
|
+
requestBody.response_format = {
|
|
915
|
+
type: "json_object"
|
|
916
|
+
};
|
|
909
917
|
}
|
|
910
918
|
if (reasoning) {
|
|
911
919
|
requestBody.reasoning = this.buildReasoningConfig(reasoning, max_tokens);
|
|
@@ -1248,11 +1256,16 @@ var GoogleProvider = class {
|
|
|
1248
1256
|
if (mode === "strict" && !params.schema) {
|
|
1249
1257
|
throw new Error('schema is required when mode is "strict"');
|
|
1250
1258
|
}
|
|
1259
|
+
const extractMetadata = shouldExtractMetadata(params.derivedOptions);
|
|
1251
1260
|
const shouldEmbedSchema = params.embedSchemaInPrompt !== false && params.schema;
|
|
1252
1261
|
let enhancedInput = normalizedInput;
|
|
1253
1262
|
if (shouldEmbedSchema) {
|
|
1254
1263
|
const jsonSchema = this.translator.convertZodIfNeeded(params.schema);
|
|
1255
|
-
const enhancedText =
|
|
1264
|
+
const enhancedText = params.derivedOptions ? combineSchemaUserAndDerivedPrompts(
|
|
1265
|
+
jsonSchema,
|
|
1266
|
+
normalizedInput.text || "",
|
|
1267
|
+
params.derivedOptions
|
|
1268
|
+
) : combineSchemaAndUserPrompt(
|
|
1256
1269
|
jsonSchema,
|
|
1257
1270
|
normalizedInput.text || ""
|
|
1258
1271
|
);
|
|
@@ -1260,6 +1273,15 @@ var GoogleProvider = class {
|
|
|
1260
1273
|
...normalizedInput,
|
|
1261
1274
|
text: enhancedText
|
|
1262
1275
|
};
|
|
1276
|
+
} else if (params.derivedOptions) {
|
|
1277
|
+
const { buildLLMDerivedFeaturesPrompt: buildLLMDerivedFeaturesPrompt2 } = await import("./schema-prompt-formatter-AIORLWUF.js");
|
|
1278
|
+
const derivedPrompt = buildLLMDerivedFeaturesPrompt2(params.derivedOptions);
|
|
1279
|
+
if (derivedPrompt) {
|
|
1280
|
+
enhancedInput = {
|
|
1281
|
+
...normalizedInput,
|
|
1282
|
+
text: (normalizedInput.text || "") + "\n\n" + derivedPrompt
|
|
1283
|
+
};
|
|
1284
|
+
}
|
|
1263
1285
|
}
|
|
1264
1286
|
const contents = await this.buildContents(enhancedInput);
|
|
1265
1287
|
const requestBody = {
|
|
@@ -1328,7 +1350,8 @@ var GoogleProvider = class {
|
|
|
1328
1350
|
const reasoning = message?.reasoning;
|
|
1329
1351
|
const reasoning_details = message?.reasoning_details;
|
|
1330
1352
|
content = content.replace(/^```json\s*\n?/, "").replace(/\n?```\s*$/, "").trim();
|
|
1331
|
-
const
|
|
1353
|
+
const rawParsed = safeJsonParse3(content);
|
|
1354
|
+
const { json: parsed, metadata } = extractMetadata ? extractMetadataFromResponse(rawParsed) : { json: rawParsed, metadata: void 0 };
|
|
1332
1355
|
const baseProvider = extractProviderFromModel3(this.config.model, "google");
|
|
1333
1356
|
return {
|
|
1334
1357
|
json: parsed,
|
|
@@ -1344,7 +1367,8 @@ var GoogleProvider = class {
|
|
|
1344
1367
|
model: this.config.model
|
|
1345
1368
|
},
|
|
1346
1369
|
reasoning,
|
|
1347
|
-
reasoning_details
|
|
1370
|
+
reasoning_details,
|
|
1371
|
+
metadata
|
|
1348
1372
|
};
|
|
1349
1373
|
} else {
|
|
1350
1374
|
const candidate = data.candidates?.[0];
|
|
@@ -1354,7 +1378,8 @@ var GoogleProvider = class {
|
|
|
1354
1378
|
costUSD = this.calculateCost(data.usageMetadata);
|
|
1355
1379
|
const thinkingPart = candidate?.content?.parts?.find((part) => part.thought === true);
|
|
1356
1380
|
const reasoning = thinkingPart?.text;
|
|
1357
|
-
const
|
|
1381
|
+
const rawParsed = safeJsonParse3(content);
|
|
1382
|
+
const { json: parsed, metadata } = extractMetadata ? extractMetadataFromResponse(rawParsed) : { json: rawParsed, metadata: void 0 };
|
|
1358
1383
|
const baseProvider = extractProviderFromModel3(this.config.model, "google");
|
|
1359
1384
|
return {
|
|
1360
1385
|
json: parsed,
|
|
@@ -1376,7 +1401,8 @@ var GoogleProvider = class {
|
|
|
1376
1401
|
signature: null,
|
|
1377
1402
|
id: "thinking-1",
|
|
1378
1403
|
format: "google-gemini-v1"
|
|
1379
|
-
}] : void 0
|
|
1404
|
+
}] : void 0,
|
|
1405
|
+
metadata
|
|
1380
1406
|
};
|
|
1381
1407
|
}
|
|
1382
1408
|
}
|
|
@@ -1580,22 +1606,41 @@ var XAIProvider = class {
|
|
|
1580
1606
|
}
|
|
1581
1607
|
async completeJson(params) {
|
|
1582
1608
|
const startTime = Date.now();
|
|
1609
|
+
const rawInput = params.input ?? params.prompt;
|
|
1610
|
+
if (!rawInput) {
|
|
1611
|
+
throw new Error("Either input or prompt must be provided");
|
|
1612
|
+
}
|
|
1613
|
+
const normalizedInput = typeof rawInput === "string" ? { text: rawInput } : rawInput;
|
|
1583
1614
|
const mode = params.mode || (params.schema ? "strict" : "relaxed");
|
|
1584
1615
|
if (mode === "strict" && !params.schema) {
|
|
1585
1616
|
throw new Error('schema is required when mode is "strict"');
|
|
1586
1617
|
}
|
|
1618
|
+
const extractMetadata = shouldExtractMetadata(params.derivedOptions);
|
|
1587
1619
|
const shouldEmbedSchema = params.embedSchemaInPrompt !== false && params.schema;
|
|
1588
|
-
let enhancedInput =
|
|
1620
|
+
let enhancedInput = normalizedInput;
|
|
1589
1621
|
if (shouldEmbedSchema) {
|
|
1590
1622
|
const jsonSchema = this.translator.convertZodIfNeeded(params.schema);
|
|
1591
|
-
const enhancedText =
|
|
1623
|
+
const enhancedText = params.derivedOptions ? combineSchemaUserAndDerivedPrompts(
|
|
1592
1624
|
jsonSchema,
|
|
1593
|
-
|
|
1625
|
+
normalizedInput.text || "",
|
|
1626
|
+
params.derivedOptions
|
|
1627
|
+
) : combineSchemaAndUserPrompt(
|
|
1628
|
+
jsonSchema,
|
|
1629
|
+
normalizedInput.text || ""
|
|
1594
1630
|
);
|
|
1595
1631
|
enhancedInput = {
|
|
1596
|
-
...
|
|
1632
|
+
...normalizedInput,
|
|
1597
1633
|
text: enhancedText
|
|
1598
1634
|
};
|
|
1635
|
+
} else if (params.derivedOptions) {
|
|
1636
|
+
const { buildLLMDerivedFeaturesPrompt: buildLLMDerivedFeaturesPrompt2 } = await import("./schema-prompt-formatter-AIORLWUF.js");
|
|
1637
|
+
const derivedPrompt = buildLLMDerivedFeaturesPrompt2(params.derivedOptions);
|
|
1638
|
+
if (derivedPrompt) {
|
|
1639
|
+
enhancedInput = {
|
|
1640
|
+
...normalizedInput,
|
|
1641
|
+
text: (normalizedInput.text || "") + "\n\n" + derivedPrompt
|
|
1642
|
+
};
|
|
1643
|
+
}
|
|
1599
1644
|
}
|
|
1600
1645
|
const messages = await this.buildMessages(enhancedInput);
|
|
1601
1646
|
const requestBody = {
|
|
@@ -1676,7 +1721,8 @@ var XAIProvider = class {
|
|
|
1676
1721
|
const latencyMs = Date.now() - startTime;
|
|
1677
1722
|
const message = data.choices?.[0]?.message;
|
|
1678
1723
|
const content = message?.content ?? "{}";
|
|
1679
|
-
const
|
|
1724
|
+
const rawParsed = safeJsonParse4(content);
|
|
1725
|
+
const { json: parsed, metadata } = extractMetadata ? extractMetadataFromResponse(rawParsed) : { json: rawParsed, metadata: void 0 };
|
|
1680
1726
|
const reasoning = message?.reasoning;
|
|
1681
1727
|
const reasoning_details = message?.reasoning_details;
|
|
1682
1728
|
let costUSD;
|
|
@@ -1700,7 +1746,8 @@ var XAIProvider = class {
|
|
|
1700
1746
|
model: this.config.model
|
|
1701
1747
|
},
|
|
1702
1748
|
reasoning,
|
|
1703
|
-
reasoning_details
|
|
1749
|
+
reasoning_details,
|
|
1750
|
+
metadata
|
|
1704
1751
|
};
|
|
1705
1752
|
}
|
|
1706
1753
|
buildReasoningConfig(reasoning) {
|
|
@@ -2032,6 +2079,115 @@ function adaptToCoreLLMProvider(provider) {
|
|
|
2032
2079
|
};
|
|
2033
2080
|
}
|
|
2034
2081
|
|
|
2082
|
+
// src/gemini-bbox-schema.ts
|
|
2083
|
+
var BLOCK_TYPES = [
|
|
2084
|
+
"title",
|
|
2085
|
+
// Main document title or section headers
|
|
2086
|
+
"paragraph",
|
|
2087
|
+
// Body text paragraphs
|
|
2088
|
+
"table",
|
|
2089
|
+
// Tabular data
|
|
2090
|
+
"list",
|
|
2091
|
+
// Bulleted or numbered lists
|
|
2092
|
+
"header",
|
|
2093
|
+
// Page headers (repeated at top of pages)
|
|
2094
|
+
"footer",
|
|
2095
|
+
// Page footers (repeated at bottom of pages)
|
|
2096
|
+
"caption",
|
|
2097
|
+
// Image or figure captions
|
|
2098
|
+
"code",
|
|
2099
|
+
// Code blocks or preformatted text
|
|
2100
|
+
"image",
|
|
2101
|
+
// Image/figure placeholder
|
|
2102
|
+
"form",
|
|
2103
|
+
// Form fields
|
|
2104
|
+
"signature",
|
|
2105
|
+
// Signatures
|
|
2106
|
+
"handwriting"
|
|
2107
|
+
// Handwritten text
|
|
2108
|
+
];
|
|
2109
|
+
var geminiBoundingBoxSchema = {
|
|
2110
|
+
type: "array",
|
|
2111
|
+
items: {
|
|
2112
|
+
type: "object",
|
|
2113
|
+
properties: {
|
|
2114
|
+
box_2d: {
|
|
2115
|
+
type: "array",
|
|
2116
|
+
items: { type: "number" },
|
|
2117
|
+
minItems: 4,
|
|
2118
|
+
maxItems: 4,
|
|
2119
|
+
description: "Bounding box coordinates [y_min, x_min, y_max, x_max] normalized 0-1000"
|
|
2120
|
+
},
|
|
2121
|
+
text: {
|
|
2122
|
+
type: "string",
|
|
2123
|
+
description: "Text content within the bounding box"
|
|
2124
|
+
},
|
|
2125
|
+
type: {
|
|
2126
|
+
type: "string",
|
|
2127
|
+
enum: [...BLOCK_TYPES],
|
|
2128
|
+
description: "Block type classification"
|
|
2129
|
+
},
|
|
2130
|
+
confidence: {
|
|
2131
|
+
type: "string",
|
|
2132
|
+
enum: ["high", "medium", "low"],
|
|
2133
|
+
nullable: true,
|
|
2134
|
+
description: "Confidence level of extraction"
|
|
2135
|
+
},
|
|
2136
|
+
page: {
|
|
2137
|
+
type: "integer",
|
|
2138
|
+
nullable: true,
|
|
2139
|
+
description: "Page number (0-indexed)"
|
|
2140
|
+
}
|
|
2141
|
+
},
|
|
2142
|
+
required: ["box_2d", "text", "type"],
|
|
2143
|
+
additionalProperties: false
|
|
2144
|
+
}
|
|
2145
|
+
};
|
|
2146
|
+
var GEMINI_BBOX_EXTRACTION_PROMPT = `Analyze this document and extract all text with precise bounding box locations.
|
|
2147
|
+
|
|
2148
|
+
For each text block, provide:
|
|
2149
|
+
- box_2d: Bounding box as [y_min, x_min, y_max, x_max] normalized to 0-1000
|
|
2150
|
+
- text: The exact text content
|
|
2151
|
+
- type: Block classification (title, paragraph, table, list, header, footer, caption, code, image, form, signature, handwriting)
|
|
2152
|
+
- confidence: Your confidence level (high, medium, low)
|
|
2153
|
+
- page: Page number (0-indexed) for multi-page documents
|
|
2154
|
+
|
|
2155
|
+
IMPORTANT coordinate format:
|
|
2156
|
+
- Use [y_min, x_min, y_max, x_max] order (Y coordinate first, then X)
|
|
2157
|
+
- Normalize all values to 0-1000 range (top-left is [0, 0], bottom-right is [1000, 1000])
|
|
2158
|
+
|
|
2159
|
+
Return ONLY a valid JSON array, no other text.`;
|
|
2160
|
+
function normalizeGeminiBBox(geminiBBox) {
|
|
2161
|
+
const [yMin, xMin, yMax, xMax] = geminiBBox;
|
|
2162
|
+
return {
|
|
2163
|
+
x: xMin / 1e3,
|
|
2164
|
+
y: yMin / 1e3,
|
|
2165
|
+
width: (xMax - xMin) / 1e3,
|
|
2166
|
+
height: (yMax - yMin) / 1e3
|
|
2167
|
+
};
|
|
2168
|
+
}
|
|
2169
|
+
function toGeminiBBox(bbox) {
|
|
2170
|
+
return [
|
|
2171
|
+
Math.round(bbox.y * 1e3),
|
|
2172
|
+
// y_min
|
|
2173
|
+
Math.round(bbox.x * 1e3),
|
|
2174
|
+
// x_min
|
|
2175
|
+
Math.round((bbox.y + bbox.height) * 1e3),
|
|
2176
|
+
// y_max
|
|
2177
|
+
Math.round((bbox.x + bbox.width) * 1e3)
|
|
2178
|
+
// x_max
|
|
2179
|
+
];
|
|
2180
|
+
}
|
|
2181
|
+
function convertGeminiBlocksToDocumentBlocks(geminiBlocks) {
|
|
2182
|
+
return geminiBlocks.map((block) => ({
|
|
2183
|
+
text: block.text,
|
|
2184
|
+
bbox: normalizeGeminiBBox(block.box_2d),
|
|
2185
|
+
type: block.type,
|
|
2186
|
+
confidence: block.confidence === "high" ? 0.9 : block.confidence === "medium" ? 0.7 : block.confidence === "low" ? 0.5 : void 0,
|
|
2187
|
+
page: block.page
|
|
2188
|
+
}));
|
|
2189
|
+
}
|
|
2190
|
+
|
|
2035
2191
|
// src/metadata.ts
|
|
2036
2192
|
var SUPPORTED_IMAGE_TYPES = {
|
|
2037
2193
|
COMMON: ["image/png", "image/jpeg", "image/webp", "image/gif"],
|
|
@@ -2703,7 +2859,9 @@ function buildLLMProvider(config) {
|
|
|
2703
2859
|
}
|
|
2704
2860
|
export {
|
|
2705
2861
|
AnthropicProvider,
|
|
2862
|
+
BLOCK_TYPES,
|
|
2706
2863
|
FallbackManager,
|
|
2864
|
+
GEMINI_BBOX_EXTRACTION_PROMPT,
|
|
2707
2865
|
GoogleProvider,
|
|
2708
2866
|
OpenAIProvider,
|
|
2709
2867
|
PROVIDER_METADATA,
|
|
@@ -2711,20 +2869,33 @@ export {
|
|
|
2711
2869
|
SchemaTranslator,
|
|
2712
2870
|
XAIProvider,
|
|
2713
2871
|
adaptToCoreLLMProvider,
|
|
2872
|
+
buildBlockClassificationPrompt,
|
|
2873
|
+
buildConfidencePrompt,
|
|
2874
|
+
buildLLMDerivedFeaturesPrompt,
|
|
2714
2875
|
buildLLMProvider,
|
|
2876
|
+
buildLanguageHintsPrompt,
|
|
2877
|
+
buildOutputFormatPrompt,
|
|
2715
2878
|
buildSchemaPromptSection,
|
|
2879
|
+
buildSourcesPrompt,
|
|
2716
2880
|
combineSchemaAndUserPrompt,
|
|
2881
|
+
combineSchemaUserAndDerivedPrompts,
|
|
2717
2882
|
compareNativeVsOpenRouter,
|
|
2883
|
+
convertGeminiBlocksToDocumentBlocks,
|
|
2718
2884
|
createProviderFromRegistry,
|
|
2719
2885
|
createVLMProvider,
|
|
2720
2886
|
estimateCost,
|
|
2887
|
+
extractMetadataFromResponse,
|
|
2721
2888
|
formatSchemaForPrompt,
|
|
2889
|
+
geminiBoundingBoxSchema,
|
|
2722
2890
|
getCheapestProvider,
|
|
2723
2891
|
getProvidersForNode,
|
|
2724
2892
|
isImageTypeSupported,
|
|
2725
2893
|
isProviderCompatibleWithNode,
|
|
2894
|
+
normalizeGeminiBBox,
|
|
2726
2895
|
providerRegistry,
|
|
2727
2896
|
registerProvider,
|
|
2728
|
-
|
|
2897
|
+
shouldExtractMetadata,
|
|
2898
|
+
supportsPDFsInline,
|
|
2899
|
+
toGeminiBBox
|
|
2729
2900
|
};
|
|
2730
2901
|
//# sourceMappingURL=index.js.map
|