@doclo/providers-llm 0.1.8 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +48 -3
- package/dist/index.js +64 -26
- package/dist/index.js.map +1 -1
- package/package.json +3 -3
package/dist/index.d.ts
CHANGED
|
@@ -67,6 +67,8 @@ interface MultimodalInput {
|
|
|
67
67
|
text?: string;
|
|
68
68
|
images?: ImageInput[];
|
|
69
69
|
pdfs?: PDFInput[];
|
|
70
|
+
/** Optional system message (text-only, prepended to conversation) */
|
|
71
|
+
systemPrompt?: string;
|
|
70
72
|
}
|
|
71
73
|
/** Response metrics */
|
|
72
74
|
interface ResponseMetrics {
|
|
@@ -134,6 +136,27 @@ interface LLMDerivedOptions {
|
|
|
134
136
|
maxChunkSize?: number;
|
|
135
137
|
/** Language hints for the document */
|
|
136
138
|
languageHints?: string[];
|
|
139
|
+
/**
|
|
140
|
+
* Normalize date fields to ISO 8601 format (YYYY-MM-DD)
|
|
141
|
+
* When enabled, date fields in the extraction output will be formatted consistently.
|
|
142
|
+
* Native support: Extend.ai (extend:type: "date")
|
|
143
|
+
* LLM support: Via prompting
|
|
144
|
+
*/
|
|
145
|
+
dateNormalization?: boolean;
|
|
146
|
+
/**
|
|
147
|
+
* Normalize currency fields to { amount: number, currency: string } objects
|
|
148
|
+
* When enabled, monetary values are extracted as structured objects with ISO 4217 currency codes.
|
|
149
|
+
* Native support: Extend.ai (extend:type: "currency")
|
|
150
|
+
* LLM support: Via prompting
|
|
151
|
+
*/
|
|
152
|
+
currencyNormalization?: boolean;
|
|
153
|
+
/**
|
|
154
|
+
* Detect and extract signature fields from documents
|
|
155
|
+
* When enabled, signature presence is detected and locations are reported.
|
|
156
|
+
* Native support: Extend.ai (extend:type: "signature"), Reducto
|
|
157
|
+
* LLM support: Via prompting (less reliable)
|
|
158
|
+
*/
|
|
159
|
+
signatureDetection?: boolean;
|
|
137
160
|
}
|
|
138
161
|
/**
|
|
139
162
|
* Extracted metadata from LLM response (populated when derived options are enabled)
|
|
@@ -160,6 +183,25 @@ interface LLMExtractedMetadata {
|
|
|
160
183
|
text: string;
|
|
161
184
|
pages: number[];
|
|
162
185
|
}>;
|
|
186
|
+
/** Detected signatures with location and confidence */
|
|
187
|
+
signatures?: Array<{
|
|
188
|
+
field: string;
|
|
189
|
+
detected: boolean;
|
|
190
|
+
bbox?: [number, number, number, number];
|
|
191
|
+
page?: number;
|
|
192
|
+
confidence?: number;
|
|
193
|
+
}>;
|
|
194
|
+
/** Normalized currency values (original → normalized mapping) */
|
|
195
|
+
normalizedCurrencies?: Record<string, {
|
|
196
|
+
original: string;
|
|
197
|
+
amount: number;
|
|
198
|
+
currency: string;
|
|
199
|
+
}>;
|
|
200
|
+
/** Normalized date values (original → normalized mapping) */
|
|
201
|
+
normalizedDates?: Record<string, {
|
|
202
|
+
original: string;
|
|
203
|
+
normalized: string;
|
|
204
|
+
}>;
|
|
163
205
|
}
|
|
164
206
|
/** Provider interface */
|
|
165
207
|
interface LLMProvider {
|
|
@@ -457,7 +499,8 @@ declare class OpenAIProvider implements LLMProvider {
|
|
|
457
499
|
private limits;
|
|
458
500
|
constructor(config: ProviderConfig);
|
|
459
501
|
completeJson<T>(params: {
|
|
460
|
-
input
|
|
502
|
+
input?: MultimodalInput;
|
|
503
|
+
prompt?: MultimodalInput | string;
|
|
461
504
|
schema?: UnifiedSchema<T>;
|
|
462
505
|
mode?: JsonMode;
|
|
463
506
|
max_tokens?: number;
|
|
@@ -482,7 +525,8 @@ declare class AnthropicProvider implements LLMProvider {
|
|
|
482
525
|
private limits;
|
|
483
526
|
constructor(config: ProviderConfig);
|
|
484
527
|
completeJson<T>(params: {
|
|
485
|
-
input
|
|
528
|
+
input?: MultimodalInput;
|
|
529
|
+
prompt?: MultimodalInput | string;
|
|
486
530
|
schema?: UnifiedSchema<T>;
|
|
487
531
|
mode?: JsonMode;
|
|
488
532
|
max_tokens?: number;
|
|
@@ -550,7 +594,8 @@ declare class XAIProvider implements LLMProvider {
|
|
|
550
594
|
private limits;
|
|
551
595
|
constructor(config: ProviderConfig);
|
|
552
596
|
completeJson<T>(params: {
|
|
553
|
-
input
|
|
597
|
+
input?: MultimodalInput;
|
|
598
|
+
prompt?: MultimodalInput | string;
|
|
554
599
|
schema?: UnifiedSchema<T>;
|
|
555
600
|
mode?: JsonMode;
|
|
556
601
|
max_tokens?: number;
|
package/dist/index.js
CHANGED
|
@@ -343,25 +343,30 @@ var OpenAIProvider = class {
|
|
|
343
343
|
}
|
|
344
344
|
async completeJson(params) {
|
|
345
345
|
const startTime = Date.now();
|
|
346
|
+
const rawInput = params.input ?? params.prompt;
|
|
347
|
+
if (!rawInput) {
|
|
348
|
+
throw new Error("Either input or prompt must be provided");
|
|
349
|
+
}
|
|
350
|
+
const normalizedInput = typeof rawInput === "string" ? { text: rawInput } : rawInput;
|
|
346
351
|
const mode = params.mode || (params.schema ? "strict" : "relaxed");
|
|
347
352
|
if (mode === "strict" && !params.schema) {
|
|
348
353
|
throw new Error('schema is required when mode is "strict"');
|
|
349
354
|
}
|
|
350
355
|
const extractMetadata = shouldExtractMetadata(params.derivedOptions);
|
|
351
356
|
const shouldEmbedSchema = params.embedSchemaInPrompt !== false && params.schema;
|
|
352
|
-
let enhancedInput =
|
|
357
|
+
let enhancedInput = normalizedInput;
|
|
353
358
|
if (shouldEmbedSchema) {
|
|
354
359
|
const jsonSchema = this.translator.convertZodIfNeeded(params.schema);
|
|
355
360
|
const enhancedText = params.derivedOptions ? combineSchemaUserAndDerivedPrompts(
|
|
356
361
|
jsonSchema,
|
|
357
|
-
|
|
362
|
+
normalizedInput.text || "",
|
|
358
363
|
params.derivedOptions
|
|
359
364
|
) : combineSchemaAndUserPrompt(
|
|
360
365
|
jsonSchema,
|
|
361
|
-
|
|
366
|
+
normalizedInput.text || ""
|
|
362
367
|
);
|
|
363
368
|
enhancedInput = {
|
|
364
|
-
...
|
|
369
|
+
...normalizedInput,
|
|
365
370
|
text: enhancedText
|
|
366
371
|
};
|
|
367
372
|
} else if (params.derivedOptions) {
|
|
@@ -369,8 +374,8 @@ var OpenAIProvider = class {
|
|
|
369
374
|
const derivedPrompt = buildLLMDerivedFeaturesPrompt2(params.derivedOptions);
|
|
370
375
|
if (derivedPrompt) {
|
|
371
376
|
enhancedInput = {
|
|
372
|
-
...
|
|
373
|
-
text: (
|
|
377
|
+
...normalizedInput,
|
|
378
|
+
text: (normalizedInput.text || "") + "\n\n" + derivedPrompt
|
|
374
379
|
};
|
|
375
380
|
}
|
|
376
381
|
}
|
|
@@ -496,6 +501,10 @@ var OpenAIProvider = class {
|
|
|
496
501
|
return Object.keys(config).length > 0 ? config : void 0;
|
|
497
502
|
}
|
|
498
503
|
buildMessages(input) {
|
|
504
|
+
const messages = [];
|
|
505
|
+
if (input.systemPrompt) {
|
|
506
|
+
messages.push({ role: "system", content: input.systemPrompt });
|
|
507
|
+
}
|
|
499
508
|
const content = [];
|
|
500
509
|
if (input.text) {
|
|
501
510
|
content.push({ type: "text", text: input.text });
|
|
@@ -536,7 +545,8 @@ var OpenAIProvider = class {
|
|
|
536
545
|
});
|
|
537
546
|
}
|
|
538
547
|
}
|
|
539
|
-
|
|
548
|
+
messages.push({ role: "user", content });
|
|
549
|
+
return messages;
|
|
540
550
|
}
|
|
541
551
|
/**
|
|
542
552
|
* Extract base64 data from a data URL or return as-is if already raw base64
|
|
@@ -596,25 +606,30 @@ var AnthropicProvider = class {
|
|
|
596
606
|
}
|
|
597
607
|
async completeJson(params) {
|
|
598
608
|
const startTime = Date.now();
|
|
609
|
+
const rawInput = params.input ?? params.prompt;
|
|
610
|
+
if (!rawInput) {
|
|
611
|
+
throw new Error("Either input or prompt must be provided");
|
|
612
|
+
}
|
|
613
|
+
const normalizedInput = typeof rawInput === "string" ? { text: rawInput } : rawInput;
|
|
599
614
|
const mode = params.mode || (params.schema ? "strict" : "relaxed");
|
|
600
615
|
if (mode === "strict" && !params.schema) {
|
|
601
616
|
throw new Error('schema is required when mode is "strict"');
|
|
602
617
|
}
|
|
603
618
|
const extractMetadata = shouldExtractMetadata(params.derivedOptions);
|
|
604
619
|
const shouldEmbedSchema = params.embedSchemaInPrompt !== false && params.schema;
|
|
605
|
-
let enhancedInput =
|
|
620
|
+
let enhancedInput = normalizedInput;
|
|
606
621
|
if (shouldEmbedSchema) {
|
|
607
622
|
const jsonSchema = this.translator.convertZodIfNeeded(params.schema);
|
|
608
623
|
const enhancedText = params.derivedOptions ? combineSchemaUserAndDerivedPrompts(
|
|
609
624
|
jsonSchema,
|
|
610
|
-
|
|
625
|
+
normalizedInput.text || "",
|
|
611
626
|
params.derivedOptions
|
|
612
627
|
) : combineSchemaAndUserPrompt(
|
|
613
628
|
jsonSchema,
|
|
614
|
-
|
|
629
|
+
normalizedInput.text || ""
|
|
615
630
|
);
|
|
616
631
|
enhancedInput = {
|
|
617
|
-
...
|
|
632
|
+
...normalizedInput,
|
|
618
633
|
text: enhancedText
|
|
619
634
|
};
|
|
620
635
|
} else if (params.derivedOptions) {
|
|
@@ -622,8 +637,8 @@ var AnthropicProvider = class {
|
|
|
622
637
|
const derivedPrompt = buildLLMDerivedFeaturesPrompt2(params.derivedOptions);
|
|
623
638
|
if (derivedPrompt) {
|
|
624
639
|
enhancedInput = {
|
|
625
|
-
...
|
|
626
|
-
text: (
|
|
640
|
+
...normalizedInput,
|
|
641
|
+
text: (normalizedInput.text || "") + "\n\n" + derivedPrompt
|
|
627
642
|
};
|
|
628
643
|
}
|
|
629
644
|
}
|
|
@@ -632,7 +647,9 @@ var AnthropicProvider = class {
|
|
|
632
647
|
const requestBody = {
|
|
633
648
|
model: this.config.model,
|
|
634
649
|
max_tokens: params.max_tokens || 4096,
|
|
635
|
-
messages
|
|
650
|
+
messages,
|
|
651
|
+
// Native Anthropic API uses separate system parameter (text-only)
|
|
652
|
+
...enhancedInput.systemPrompt && { system: enhancedInput.systemPrompt }
|
|
636
653
|
};
|
|
637
654
|
if (mode === "relaxed") {
|
|
638
655
|
requestBody.messages.push({
|
|
@@ -677,7 +694,7 @@ var AnthropicProvider = class {
|
|
|
677
694
|
let costUSD;
|
|
678
695
|
if (this.config.via === "openrouter") {
|
|
679
696
|
const useNewStructuredOutputs2 = this.supportsNewStructuredOutputs();
|
|
680
|
-
const openRouterRequest = this.translateToOpenRouterFormat(messages, params.schema, mode, params.max_tokens, params.reasoning);
|
|
697
|
+
const openRouterRequest = this.translateToOpenRouterFormat(messages, params.schema, mode, params.max_tokens, params.reasoning, enhancedInput.systemPrompt);
|
|
681
698
|
if (process.env.DEBUG_PROVIDERS) {
|
|
682
699
|
console.log("[AnthropicProvider] OpenRouter request body (messages):");
|
|
683
700
|
console.log(JSON.stringify(openRouterRequest.messages, null, 2));
|
|
@@ -863,11 +880,15 @@ var AnthropicProvider = class {
|
|
|
863
880
|
budget_tokens
|
|
864
881
|
};
|
|
865
882
|
}
|
|
866
|
-
translateToOpenRouterFormat(messages, schema, mode, max_tokens, reasoning) {
|
|
883
|
+
translateToOpenRouterFormat(messages, schema, mode, max_tokens, reasoning, systemPrompt) {
|
|
867
884
|
const useNewStructuredOutputs = this.supportsNewStructuredOutputs();
|
|
885
|
+
const jsonInstructions = mode === "strict" ? "You must respond ONLY with valid JSON that matches the provided schema. Do not include any markdown formatting, explanations, or additional text." : "You must respond ONLY with valid JSON. Do not include any markdown formatting, explanations, or additional text.";
|
|
886
|
+
const systemContent = systemPrompt ? `${systemPrompt}
|
|
887
|
+
|
|
888
|
+
${jsonInstructions}` : `You are a data extraction assistant. ${jsonInstructions}`;
|
|
868
889
|
const systemMessage = {
|
|
869
890
|
role: "system",
|
|
870
|
-
content:
|
|
891
|
+
content: systemContent
|
|
871
892
|
};
|
|
872
893
|
const messageArray = [systemMessage, ...messages];
|
|
873
894
|
const requestBody = {
|
|
@@ -1281,6 +1302,10 @@ var GoogleProvider = class {
|
|
|
1281
1302
|
// Use JSON mode without responseSchema - schema is already in the prompt via combineSchemaAndUserPrompt.
|
|
1282
1303
|
// See: https://ubaidullahmomer.medium.com/why-google-geminis-response-schema-isn-t-ready-for-complex-json-46f35c3aaaea
|
|
1283
1304
|
responseMimeType: "application/json"
|
|
1305
|
+
},
|
|
1306
|
+
// Native Gemini API uses systemInstruction with parts array (text-only)
|
|
1307
|
+
...enhancedInput.systemPrompt && {
|
|
1308
|
+
systemInstruction: { parts: [{ text: enhancedInput.systemPrompt }] }
|
|
1284
1309
|
}
|
|
1285
1310
|
};
|
|
1286
1311
|
if (process.env.DEBUG_PROVIDERS) {
|
|
@@ -1297,7 +1322,7 @@ var GoogleProvider = class {
|
|
|
1297
1322
|
console.log("[GoogleProvider] Using via:", this.config.via, "Checking:", this.config.via === "openrouter");
|
|
1298
1323
|
}
|
|
1299
1324
|
if (this.config.via === "openrouter") {
|
|
1300
|
-
const openRouterRequest = this.translateToOpenRouterFormat(contents, mode, params.max_tokens, params.reasoning);
|
|
1325
|
+
const openRouterRequest = this.translateToOpenRouterFormat(contents, mode, params.max_tokens, params.reasoning, enhancedInput.systemPrompt);
|
|
1301
1326
|
response = await fetchWithTimeout3("https://openrouter.ai/api/v1/chat/completions", {
|
|
1302
1327
|
method: "POST",
|
|
1303
1328
|
headers: {
|
|
@@ -1409,8 +1434,11 @@ var GoogleProvider = class {
|
|
|
1409
1434
|
thinking_budget
|
|
1410
1435
|
};
|
|
1411
1436
|
}
|
|
1412
|
-
translateToOpenRouterFormat(contents, mode, max_tokens, reasoning) {
|
|
1437
|
+
translateToOpenRouterFormat(contents, mode, max_tokens, reasoning, systemPrompt) {
|
|
1413
1438
|
const messages = [];
|
|
1439
|
+
if (systemPrompt) {
|
|
1440
|
+
messages.push({ role: "system", content: systemPrompt });
|
|
1441
|
+
}
|
|
1414
1442
|
for (const content of contents) {
|
|
1415
1443
|
if (content.role === "user") {
|
|
1416
1444
|
const messageContent = [];
|
|
@@ -1596,25 +1624,30 @@ var XAIProvider = class {
|
|
|
1596
1624
|
}
|
|
1597
1625
|
async completeJson(params) {
|
|
1598
1626
|
const startTime = Date.now();
|
|
1627
|
+
const rawInput = params.input ?? params.prompt;
|
|
1628
|
+
if (!rawInput) {
|
|
1629
|
+
throw new Error("Either input or prompt must be provided");
|
|
1630
|
+
}
|
|
1631
|
+
const normalizedInput = typeof rawInput === "string" ? { text: rawInput } : rawInput;
|
|
1599
1632
|
const mode = params.mode || (params.schema ? "strict" : "relaxed");
|
|
1600
1633
|
if (mode === "strict" && !params.schema) {
|
|
1601
1634
|
throw new Error('schema is required when mode is "strict"');
|
|
1602
1635
|
}
|
|
1603
1636
|
const extractMetadata = shouldExtractMetadata(params.derivedOptions);
|
|
1604
1637
|
const shouldEmbedSchema = params.embedSchemaInPrompt !== false && params.schema;
|
|
1605
|
-
let enhancedInput =
|
|
1638
|
+
let enhancedInput = normalizedInput;
|
|
1606
1639
|
if (shouldEmbedSchema) {
|
|
1607
1640
|
const jsonSchema = this.translator.convertZodIfNeeded(params.schema);
|
|
1608
1641
|
const enhancedText = params.derivedOptions ? combineSchemaUserAndDerivedPrompts(
|
|
1609
1642
|
jsonSchema,
|
|
1610
|
-
|
|
1643
|
+
normalizedInput.text || "",
|
|
1611
1644
|
params.derivedOptions
|
|
1612
1645
|
) : combineSchemaAndUserPrompt(
|
|
1613
1646
|
jsonSchema,
|
|
1614
|
-
|
|
1647
|
+
normalizedInput.text || ""
|
|
1615
1648
|
);
|
|
1616
1649
|
enhancedInput = {
|
|
1617
|
-
...
|
|
1650
|
+
...normalizedInput,
|
|
1618
1651
|
text: enhancedText
|
|
1619
1652
|
};
|
|
1620
1653
|
} else if (params.derivedOptions) {
|
|
@@ -1622,8 +1655,8 @@ var XAIProvider = class {
|
|
|
1622
1655
|
const derivedPrompt = buildLLMDerivedFeaturesPrompt2(params.derivedOptions);
|
|
1623
1656
|
if (derivedPrompt) {
|
|
1624
1657
|
enhancedInput = {
|
|
1625
|
-
...
|
|
1626
|
-
text: (
|
|
1658
|
+
...normalizedInput,
|
|
1659
|
+
text: (normalizedInput.text || "") + "\n\n" + derivedPrompt
|
|
1627
1660
|
};
|
|
1628
1661
|
}
|
|
1629
1662
|
}
|
|
@@ -1748,6 +1781,10 @@ var XAIProvider = class {
|
|
|
1748
1781
|
return Object.keys(config).length > 0 ? config : void 0;
|
|
1749
1782
|
}
|
|
1750
1783
|
async buildMessages(input) {
|
|
1784
|
+
const messages = [];
|
|
1785
|
+
if (input.systemPrompt) {
|
|
1786
|
+
messages.push({ role: "system", content: input.systemPrompt });
|
|
1787
|
+
}
|
|
1751
1788
|
const content = [];
|
|
1752
1789
|
if (input.text) {
|
|
1753
1790
|
content.push({ type: "text", text: input.text });
|
|
@@ -1788,7 +1825,8 @@ var XAIProvider = class {
|
|
|
1788
1825
|
});
|
|
1789
1826
|
}
|
|
1790
1827
|
}
|
|
1791
|
-
|
|
1828
|
+
messages.push({ role: "user", content });
|
|
1829
|
+
return messages;
|
|
1792
1830
|
}
|
|
1793
1831
|
/**
|
|
1794
1832
|
* Extract base64 data from a data URL or return as-is if already raw base64
|