@doclo/providers-llm 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -67,6 +67,8 @@ interface MultimodalInput {
67
67
  text?: string;
68
68
  images?: ImageInput[];
69
69
  pdfs?: PDFInput[];
70
+ /** Optional system message (text-only, prepended to conversation) */
71
+ systemPrompt?: string;
70
72
  }
71
73
  /** Response metrics */
72
74
  interface ResponseMetrics {
@@ -134,6 +136,27 @@ interface LLMDerivedOptions {
134
136
  maxChunkSize?: number;
135
137
  /** Language hints for the document */
136
138
  languageHints?: string[];
139
+ /**
140
+ * Normalize date fields to ISO 8601 format (YYYY-MM-DD)
141
+ * When enabled, date fields in the extraction output will be formatted consistently.
142
+ * Native support: Extend.ai (extend:type: "date")
143
+ * LLM support: Via prompting
144
+ */
145
+ dateNormalization?: boolean;
146
+ /**
147
+ * Normalize currency fields to { amount: number, currency: string } objects
148
+ * When enabled, monetary values are extracted as structured objects with ISO 4217 currency codes.
149
+ * Native support: Extend.ai (extend:type: "currency")
150
+ * LLM support: Via prompting
151
+ */
152
+ currencyNormalization?: boolean;
153
+ /**
154
+ * Detect and extract signature fields from documents
155
+ * When enabled, signature presence is detected and locations are reported.
156
+ * Native support: Extend.ai (extend:type: "signature"), Reducto
157
+ * LLM support: Via prompting (less reliable)
158
+ */
159
+ signatureDetection?: boolean;
137
160
  }
138
161
  /**
139
162
  * Extracted metadata from LLM response (populated when derived options are enabled)
@@ -160,6 +183,25 @@ interface LLMExtractedMetadata {
160
183
  text: string;
161
184
  pages: number[];
162
185
  }>;
186
+ /** Detected signatures with location and confidence */
187
+ signatures?: Array<{
188
+ field: string;
189
+ detected: boolean;
190
+ bbox?: [number, number, number, number];
191
+ page?: number;
192
+ confidence?: number;
193
+ }>;
194
+ /** Normalized currency values (original → normalized mapping) */
195
+ normalizedCurrencies?: Record<string, {
196
+ original: string;
197
+ amount: number;
198
+ currency: string;
199
+ }>;
200
+ /** Normalized date values (original → normalized mapping) */
201
+ normalizedDates?: Record<string, {
202
+ original: string;
203
+ normalized: string;
204
+ }>;
163
205
  }
164
206
  /** Provider interface */
165
207
  interface LLMProvider {
@@ -457,7 +499,8 @@ declare class OpenAIProvider implements LLMProvider {
457
499
  private limits;
458
500
  constructor(config: ProviderConfig);
459
501
  completeJson<T>(params: {
460
- input: MultimodalInput;
502
+ input?: MultimodalInput;
503
+ prompt?: MultimodalInput | string;
461
504
  schema?: UnifiedSchema<T>;
462
505
  mode?: JsonMode;
463
506
  max_tokens?: number;
@@ -482,7 +525,8 @@ declare class AnthropicProvider implements LLMProvider {
482
525
  private limits;
483
526
  constructor(config: ProviderConfig);
484
527
  completeJson<T>(params: {
485
- input: MultimodalInput;
528
+ input?: MultimodalInput;
529
+ prompt?: MultimodalInput | string;
486
530
  schema?: UnifiedSchema<T>;
487
531
  mode?: JsonMode;
488
532
  max_tokens?: number;
@@ -550,7 +594,8 @@ declare class XAIProvider implements LLMProvider {
550
594
  private limits;
551
595
  constructor(config: ProviderConfig);
552
596
  completeJson<T>(params: {
553
- input: MultimodalInput;
597
+ input?: MultimodalInput;
598
+ prompt?: MultimodalInput | string;
554
599
  schema?: UnifiedSchema<T>;
555
600
  mode?: JsonMode;
556
601
  max_tokens?: number;
package/dist/index.js CHANGED
@@ -343,25 +343,30 @@ var OpenAIProvider = class {
343
343
  }
344
344
  async completeJson(params) {
345
345
  const startTime = Date.now();
346
+ const rawInput = params.input ?? params.prompt;
347
+ if (!rawInput) {
348
+ throw new Error("Either input or prompt must be provided");
349
+ }
350
+ const normalizedInput = typeof rawInput === "string" ? { text: rawInput } : rawInput;
346
351
  const mode = params.mode || (params.schema ? "strict" : "relaxed");
347
352
  if (mode === "strict" && !params.schema) {
348
353
  throw new Error('schema is required when mode is "strict"');
349
354
  }
350
355
  const extractMetadata = shouldExtractMetadata(params.derivedOptions);
351
356
  const shouldEmbedSchema = params.embedSchemaInPrompt !== false && params.schema;
352
- let enhancedInput = params.input;
357
+ let enhancedInput = normalizedInput;
353
358
  if (shouldEmbedSchema) {
354
359
  const jsonSchema = this.translator.convertZodIfNeeded(params.schema);
355
360
  const enhancedText = params.derivedOptions ? combineSchemaUserAndDerivedPrompts(
356
361
  jsonSchema,
357
- params.input.text || "",
362
+ normalizedInput.text || "",
358
363
  params.derivedOptions
359
364
  ) : combineSchemaAndUserPrompt(
360
365
  jsonSchema,
361
- params.input.text || ""
366
+ normalizedInput.text || ""
362
367
  );
363
368
  enhancedInput = {
364
- ...params.input,
369
+ ...normalizedInput,
365
370
  text: enhancedText
366
371
  };
367
372
  } else if (params.derivedOptions) {
@@ -369,8 +374,8 @@ var OpenAIProvider = class {
369
374
  const derivedPrompt = buildLLMDerivedFeaturesPrompt2(params.derivedOptions);
370
375
  if (derivedPrompt) {
371
376
  enhancedInput = {
372
- ...params.input,
373
- text: (params.input.text || "") + "\n\n" + derivedPrompt
377
+ ...normalizedInput,
378
+ text: (normalizedInput.text || "") + "\n\n" + derivedPrompt
374
379
  };
375
380
  }
376
381
  }
@@ -496,6 +501,10 @@ var OpenAIProvider = class {
496
501
  return Object.keys(config).length > 0 ? config : void 0;
497
502
  }
498
503
  buildMessages(input) {
504
+ const messages = [];
505
+ if (input.systemPrompt) {
506
+ messages.push({ role: "system", content: input.systemPrompt });
507
+ }
499
508
  const content = [];
500
509
  if (input.text) {
501
510
  content.push({ type: "text", text: input.text });
@@ -536,7 +545,8 @@ var OpenAIProvider = class {
536
545
  });
537
546
  }
538
547
  }
539
- return [{ role: "user", content }];
548
+ messages.push({ role: "user", content });
549
+ return messages;
540
550
  }
541
551
  /**
542
552
  * Extract base64 data from a data URL or return as-is if already raw base64
@@ -596,25 +606,30 @@ var AnthropicProvider = class {
596
606
  }
597
607
  async completeJson(params) {
598
608
  const startTime = Date.now();
609
+ const rawInput = params.input ?? params.prompt;
610
+ if (!rawInput) {
611
+ throw new Error("Either input or prompt must be provided");
612
+ }
613
+ const normalizedInput = typeof rawInput === "string" ? { text: rawInput } : rawInput;
599
614
  const mode = params.mode || (params.schema ? "strict" : "relaxed");
600
615
  if (mode === "strict" && !params.schema) {
601
616
  throw new Error('schema is required when mode is "strict"');
602
617
  }
603
618
  const extractMetadata = shouldExtractMetadata(params.derivedOptions);
604
619
  const shouldEmbedSchema = params.embedSchemaInPrompt !== false && params.schema;
605
- let enhancedInput = params.input;
620
+ let enhancedInput = normalizedInput;
606
621
  if (shouldEmbedSchema) {
607
622
  const jsonSchema = this.translator.convertZodIfNeeded(params.schema);
608
623
  const enhancedText = params.derivedOptions ? combineSchemaUserAndDerivedPrompts(
609
624
  jsonSchema,
610
- params.input.text || "",
625
+ normalizedInput.text || "",
611
626
  params.derivedOptions
612
627
  ) : combineSchemaAndUserPrompt(
613
628
  jsonSchema,
614
- params.input.text || ""
629
+ normalizedInput.text || ""
615
630
  );
616
631
  enhancedInput = {
617
- ...params.input,
632
+ ...normalizedInput,
618
633
  text: enhancedText
619
634
  };
620
635
  } else if (params.derivedOptions) {
@@ -622,8 +637,8 @@ var AnthropicProvider = class {
622
637
  const derivedPrompt = buildLLMDerivedFeaturesPrompt2(params.derivedOptions);
623
638
  if (derivedPrompt) {
624
639
  enhancedInput = {
625
- ...params.input,
626
- text: (params.input.text || "") + "\n\n" + derivedPrompt
640
+ ...normalizedInput,
641
+ text: (normalizedInput.text || "") + "\n\n" + derivedPrompt
627
642
  };
628
643
  }
629
644
  }
@@ -632,7 +647,9 @@ var AnthropicProvider = class {
632
647
  const requestBody = {
633
648
  model: this.config.model,
634
649
  max_tokens: params.max_tokens || 4096,
635
- messages
650
+ messages,
651
+ // Native Anthropic API uses separate system parameter (text-only)
652
+ ...enhancedInput.systemPrompt && { system: enhancedInput.systemPrompt }
636
653
  };
637
654
  if (mode === "relaxed") {
638
655
  requestBody.messages.push({
@@ -677,7 +694,7 @@ var AnthropicProvider = class {
677
694
  let costUSD;
678
695
  if (this.config.via === "openrouter") {
679
696
  const useNewStructuredOutputs2 = this.supportsNewStructuredOutputs();
680
- const openRouterRequest = this.translateToOpenRouterFormat(messages, params.schema, mode, params.max_tokens, params.reasoning);
697
+ const openRouterRequest = this.translateToOpenRouterFormat(messages, params.schema, mode, params.max_tokens, params.reasoning, enhancedInput.systemPrompt);
681
698
  if (process.env.DEBUG_PROVIDERS) {
682
699
  console.log("[AnthropicProvider] OpenRouter request body (messages):");
683
700
  console.log(JSON.stringify(openRouterRequest.messages, null, 2));
@@ -863,11 +880,15 @@ var AnthropicProvider = class {
863
880
  budget_tokens
864
881
  };
865
882
  }
866
- translateToOpenRouterFormat(messages, schema, mode, max_tokens, reasoning) {
883
+ translateToOpenRouterFormat(messages, schema, mode, max_tokens, reasoning, systemPrompt) {
867
884
  const useNewStructuredOutputs = this.supportsNewStructuredOutputs();
885
+ const jsonInstructions = mode === "strict" ? "You must respond ONLY with valid JSON that matches the provided schema. Do not include any markdown formatting, explanations, or additional text." : "You must respond ONLY with valid JSON. Do not include any markdown formatting, explanations, or additional text.";
886
+ const systemContent = systemPrompt ? `${systemPrompt}
887
+
888
+ ${jsonInstructions}` : `You are a data extraction assistant. ${jsonInstructions}`;
868
889
  const systemMessage = {
869
890
  role: "system",
870
- content: mode === "strict" ? "You are a data extraction assistant. You must respond ONLY with valid JSON that matches the provided schema. Do not include any markdown formatting, explanations, or additional text." : "You are a data extraction assistant. You must respond ONLY with valid JSON. Do not include any markdown formatting, explanations, or additional text."
891
+ content: systemContent
871
892
  };
872
893
  const messageArray = [systemMessage, ...messages];
873
894
  const requestBody = {
@@ -1281,6 +1302,10 @@ var GoogleProvider = class {
1281
1302
  // Use JSON mode without responseSchema - schema is already in the prompt via combineSchemaAndUserPrompt.
1282
1303
  // See: https://ubaidullahmomer.medium.com/why-google-geminis-response-schema-isn-t-ready-for-complex-json-46f35c3aaaea
1283
1304
  responseMimeType: "application/json"
1305
+ },
1306
+ // Native Gemini API uses systemInstruction with parts array (text-only)
1307
+ ...enhancedInput.systemPrompt && {
1308
+ systemInstruction: { parts: [{ text: enhancedInput.systemPrompt }] }
1284
1309
  }
1285
1310
  };
1286
1311
  if (process.env.DEBUG_PROVIDERS) {
@@ -1297,7 +1322,7 @@ var GoogleProvider = class {
1297
1322
  console.log("[GoogleProvider] Using via:", this.config.via, "Checking:", this.config.via === "openrouter");
1298
1323
  }
1299
1324
  if (this.config.via === "openrouter") {
1300
- const openRouterRequest = this.translateToOpenRouterFormat(contents, mode, params.max_tokens, params.reasoning);
1325
+ const openRouterRequest = this.translateToOpenRouterFormat(contents, mode, params.max_tokens, params.reasoning, enhancedInput.systemPrompt);
1301
1326
  response = await fetchWithTimeout3("https://openrouter.ai/api/v1/chat/completions", {
1302
1327
  method: "POST",
1303
1328
  headers: {
@@ -1409,8 +1434,11 @@ var GoogleProvider = class {
1409
1434
  thinking_budget
1410
1435
  };
1411
1436
  }
1412
- translateToOpenRouterFormat(contents, mode, max_tokens, reasoning) {
1437
+ translateToOpenRouterFormat(contents, mode, max_tokens, reasoning, systemPrompt) {
1413
1438
  const messages = [];
1439
+ if (systemPrompt) {
1440
+ messages.push({ role: "system", content: systemPrompt });
1441
+ }
1414
1442
  for (const content of contents) {
1415
1443
  if (content.role === "user") {
1416
1444
  const messageContent = [];
@@ -1596,25 +1624,30 @@ var XAIProvider = class {
1596
1624
  }
1597
1625
  async completeJson(params) {
1598
1626
  const startTime = Date.now();
1627
+ const rawInput = params.input ?? params.prompt;
1628
+ if (!rawInput) {
1629
+ throw new Error("Either input or prompt must be provided");
1630
+ }
1631
+ const normalizedInput = typeof rawInput === "string" ? { text: rawInput } : rawInput;
1599
1632
  const mode = params.mode || (params.schema ? "strict" : "relaxed");
1600
1633
  if (mode === "strict" && !params.schema) {
1601
1634
  throw new Error('schema is required when mode is "strict"');
1602
1635
  }
1603
1636
  const extractMetadata = shouldExtractMetadata(params.derivedOptions);
1604
1637
  const shouldEmbedSchema = params.embedSchemaInPrompt !== false && params.schema;
1605
- let enhancedInput = params.input;
1638
+ let enhancedInput = normalizedInput;
1606
1639
  if (shouldEmbedSchema) {
1607
1640
  const jsonSchema = this.translator.convertZodIfNeeded(params.schema);
1608
1641
  const enhancedText = params.derivedOptions ? combineSchemaUserAndDerivedPrompts(
1609
1642
  jsonSchema,
1610
- params.input.text || "",
1643
+ normalizedInput.text || "",
1611
1644
  params.derivedOptions
1612
1645
  ) : combineSchemaAndUserPrompt(
1613
1646
  jsonSchema,
1614
- params.input.text || ""
1647
+ normalizedInput.text || ""
1615
1648
  );
1616
1649
  enhancedInput = {
1617
- ...params.input,
1650
+ ...normalizedInput,
1618
1651
  text: enhancedText
1619
1652
  };
1620
1653
  } else if (params.derivedOptions) {
@@ -1622,8 +1655,8 @@ var XAIProvider = class {
1622
1655
  const derivedPrompt = buildLLMDerivedFeaturesPrompt2(params.derivedOptions);
1623
1656
  if (derivedPrompt) {
1624
1657
  enhancedInput = {
1625
- ...params.input,
1626
- text: (params.input.text || "") + "\n\n" + derivedPrompt
1658
+ ...normalizedInput,
1659
+ text: (normalizedInput.text || "") + "\n\n" + derivedPrompt
1627
1660
  };
1628
1661
  }
1629
1662
  }
@@ -1748,6 +1781,10 @@ var XAIProvider = class {
1748
1781
  return Object.keys(config).length > 0 ? config : void 0;
1749
1782
  }
1750
1783
  async buildMessages(input) {
1784
+ const messages = [];
1785
+ if (input.systemPrompt) {
1786
+ messages.push({ role: "system", content: input.systemPrompt });
1787
+ }
1751
1788
  const content = [];
1752
1789
  if (input.text) {
1753
1790
  content.push({ type: "text", text: input.text });
@@ -1788,7 +1825,8 @@ var XAIProvider = class {
1788
1825
  });
1789
1826
  }
1790
1827
  }
1791
- return [{ role: "user", content }];
1828
+ messages.push({ role: "user", content });
1829
+ return messages;
1792
1830
  }
1793
1831
  /**
1794
1832
  * Extract base64 data from a data URL or return as-is if already raw base64