@diabolicallabs/llm-client 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -38,7 +38,7 @@ function createAttemptController(callerSignal, timeoutMs) {
38
38
  timer.unref?.();
39
39
  const onCallerAbort = () => {
40
40
  reason ??= "caller";
41
- internal.abort(callerSignal.reason);
41
+ if (callerSignal !== void 0) internal.abort(callerSignal.reason);
42
42
  };
43
43
  if (callerSignal !== void 0) {
44
44
  if (callerSignal.aborted) {
@@ -126,7 +126,6 @@ function classifyAbort(err, abortReason, provider) {
126
126
  retryable: true,
127
127
  cause: err
128
128
  });
129
- case "caller":
130
129
  default:
131
130
  return new LlmError({
132
131
  message: "llm-client: cancelled by caller",
@@ -144,6 +143,128 @@ function isAbortError(err) {
144
143
  return false;
145
144
  }
146
145
 
146
+ // src/json-schema.ts
147
+ import { z } from "zod";
148
+ function isZodSchema(s) {
149
+ if (typeof s !== "object" || s === null) return false;
150
+ const hasZod4Marker = "_zod" in s && typeof s._zod === "object";
151
+ const hasZod3Marker = "_def" in s;
152
+ if (hasZod3Marker && !hasZod4Marker) {
153
+ throw new LlmError({
154
+ message: 'llm-client: detected a Zod 3 schema. Upgrade to Zod 4 to use strict structured-output mode, or pass providerOptions.structuredMode = "prompt" to keep the v0.3.0 prompt-only path.',
155
+ provider: "llm-client",
156
+ retryable: false,
157
+ kind: "unknown"
158
+ });
159
+ }
160
+ if (!hasZod4Marker) return false;
161
+ return typeof s.parse === "function";
162
+ }
163
+ function toProviderSchema(schema, profile) {
164
+ const target = profile === "gemini" ? "openapi-3.0" : "draft-2020-12";
165
+ let json;
166
+ try {
167
+ json = z.toJSONSchema(schema, {
168
+ target,
169
+ unrepresentable: "throw",
170
+ cycles: "throw"
171
+ });
172
+ } catch (e) {
173
+ throw new LlmError({
174
+ message: `llm-client: schema is not representable for ${profile} strict mode \u2014 ${e.message}. Pass providerOptions.structuredMode = 'prompt' to fall back to prompt-only mode.`,
175
+ provider: profile,
176
+ retryable: false,
177
+ kind: "unknown",
178
+ cause: e
179
+ });
180
+ }
181
+ if (profile === "openai") return openAIStrictPostprocess(json);
182
+ if (profile === "gemini") return geminiPostprocess(json);
183
+ return anthropicPostprocess(json);
184
+ }
185
+ function openAIStrictPostprocess(node) {
186
+ if (typeof node !== "object" || node === null) {
187
+ return node;
188
+ }
189
+ if (Array.isArray(node)) {
190
+ return node.map(openAIStrictPostprocess);
191
+ }
192
+ const src = node;
193
+ const obj = { ...src };
194
+ delete obj.$schema;
195
+ delete obj.format;
196
+ delete obj.pattern;
197
+ delete obj.default;
198
+ delete obj.examples;
199
+ if (obj.type === "object" && obj.properties !== void 0) {
200
+ const props = obj.properties;
201
+ const allKeys = Object.keys(props);
202
+ obj.required = allKeys;
203
+ obj.additionalProperties = false;
204
+ const processedProps = {};
205
+ for (const key of allKeys) {
206
+ processedProps[key] = openAIStrictPostprocess(props[key]);
207
+ }
208
+ obj.properties = processedProps;
209
+ }
210
+ if (obj.items !== void 0) {
211
+ obj.items = openAIStrictPostprocess(obj.items);
212
+ }
213
+ if (Array.isArray(obj.anyOf)) {
214
+ obj.anyOf = obj.anyOf.map(openAIStrictPostprocess);
215
+ }
216
+ if (Array.isArray(obj.oneOf)) {
217
+ obj.oneOf = obj.oneOf.map(openAIStrictPostprocess);
218
+ }
219
+ if (Array.isArray(obj.allOf)) {
220
+ obj.allOf = obj.allOf.map(openAIStrictPostprocess);
221
+ }
222
+ if (Array.isArray(obj.prefixItems)) {
223
+ obj.prefixItems = obj.prefixItems.map(openAIStrictPostprocess);
224
+ }
225
+ return obj;
226
+ }
227
+ function anthropicPostprocess(node) {
228
+ const obj = { ...node };
229
+ delete obj.$schema;
230
+ return obj;
231
+ }
232
+ function geminiPostprocess(node) {
233
+ if (typeof node !== "object" || node === null) {
234
+ return node;
235
+ }
236
+ if (Array.isArray(node)) {
237
+ return node.map(geminiPostprocess);
238
+ }
239
+ const src = node;
240
+ const obj = { ...src };
241
+ delete obj.$schema;
242
+ delete obj.additionalProperties;
243
+ delete obj.default;
244
+ delete obj.examples;
245
+ if (obj.properties !== void 0) {
246
+ const props = obj.properties;
247
+ const processedProps = {};
248
+ for (const key of Object.keys(props)) {
249
+ processedProps[key] = geminiPostprocess(props[key]);
250
+ }
251
+ obj.properties = processedProps;
252
+ }
253
+ if (obj.items !== void 0) {
254
+ obj.items = geminiPostprocess(obj.items);
255
+ }
256
+ if (Array.isArray(obj.anyOf)) {
257
+ obj.anyOf = obj.anyOf.map(geminiPostprocess);
258
+ }
259
+ if (Array.isArray(obj.oneOf)) {
260
+ obj.oneOf = obj.oneOf.map(geminiPostprocess);
261
+ }
262
+ if (Array.isArray(obj.allOf)) {
263
+ obj.allOf = obj.allOf.map(geminiPostprocess);
264
+ }
265
+ return obj;
266
+ }
267
+
147
268
  // src/retry.ts
148
269
  var RETRYABLE_HTTP_STATUSES = /* @__PURE__ */ new Set([429, 502, 503, 504]);
149
270
  var RETRYABLE_ERROR_CODES = /* @__PURE__ */ new Set(["ECONNRESET", "ETIMEDOUT", "ECONNABORTED"]);
@@ -219,7 +340,13 @@ function normalizeThrownError(err, provider) {
219
340
  cause: err
220
341
  });
221
342
  }
222
- return new LlmError({ message: err.message, provider, kind: "network", retryable: true, cause: err });
343
+ return new LlmError({
344
+ message: err.message,
345
+ provider,
346
+ kind: "network",
347
+ retryable: true,
348
+ cause: err
349
+ });
223
350
  }
224
351
  if (statusCode !== void 0) {
225
352
  const retryable = isRetryableStatus(statusCode);
@@ -317,33 +444,36 @@ function createAnthropicProvider(config) {
317
444
  const { system, messages: anthropicMessages } = buildAnthropicMessages(messages);
318
445
  const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
319
446
  const start = Date.now();
320
- return withRetry(async () => {
321
- const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
322
- try {
323
- const params = {
324
- model,
325
- messages: anthropicMessages,
326
- max_tokens: options?.maxTokens ?? config.maxTokens ?? 1024
327
- };
328
- if (system !== void 0) params.system = system;
329
- const temperature = options?.temperature ?? config.temperature;
330
- if (temperature !== void 0) {
331
- params.temperature = temperature;
447
+ return withRetry(
448
+ async () => {
449
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
450
+ try {
451
+ const params = {
452
+ model,
453
+ messages: anthropicMessages,
454
+ max_tokens: options?.maxTokens ?? config.maxTokens ?? 1024
455
+ };
456
+ if (system !== void 0) params.system = system;
457
+ const temperature = options?.temperature ?? config.temperature;
458
+ if (temperature !== void 0) {
459
+ params.temperature = temperature;
460
+ }
461
+ const response = await client.messages.create(params, { signal: ctl.signal });
462
+ const content = response.content.filter((block) => block.type === "text").map((block) => block.text).join("");
463
+ return {
464
+ content,
465
+ model: response.model,
466
+ usage: normalizeUsage(response.usage),
467
+ latencyMs: Date.now() - start
468
+ };
469
+ } catch (err) {
470
+ throw normalizeAnthropicError(classifyAbort(err, ctl.abortReason(), PROVIDER));
471
+ } finally {
472
+ ctl.dispose();
332
473
  }
333
- const response = await client.messages.create(params, { signal: ctl.signal });
334
- const content = response.content.filter((block) => block.type === "text").map((block) => block.text).join("");
335
- return {
336
- content,
337
- model: response.model,
338
- usage: normalizeUsage(response.usage),
339
- latencyMs: Date.now() - start
340
- };
341
- } catch (err) {
342
- throw normalizeAnthropicError(classifyAbort(err, ctl.abortReason(), PROVIDER));
343
- } finally {
344
- ctl.dispose();
345
- }
346
- }, mergeRetryOptsWithSignal(retryOpts, options?.signal));
474
+ },
475
+ mergeRetryOptsWithSignal(retryOpts, options?.signal)
476
+ );
347
477
  }
348
478
  async function* stream(messages, options) {
349
479
  const model = options?.model ?? config.model;
@@ -394,6 +524,76 @@ function createAnthropicProvider(config) {
394
524
  }
395
525
  }
396
526
  async function structured(messages, schema, options) {
527
+ const structuredMode = options?.providerOptions?.["structuredMode"];
528
+ const useStrict = isZodSchema(schema) && structuredMode !== "prompt";
529
+ if (!useStrict) {
530
+ return structuredPromptFallback(messages, schema, options);
531
+ }
532
+ const inputSchema = toProviderSchema(schema, "anthropic");
533
+ const { system, messages: anthropicMessages } = buildAnthropicMessages(messages);
534
+ const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
535
+ const start = Date.now();
536
+ const response = await withRetry(
537
+ async () => {
538
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
539
+ try {
540
+ const params = {
541
+ model: options?.model ?? config.model,
542
+ messages: anthropicMessages,
543
+ max_tokens: options?.maxTokens ?? config.maxTokens ?? 1024,
544
+ tools: [
545
+ {
546
+ name: "extract",
547
+ description: "Return the structured data.",
548
+ input_schema: inputSchema
549
+ }
550
+ ],
551
+ tool_choice: { type: "tool", name: "extract" }
552
+ };
553
+ if (system !== void 0) params.system = system;
554
+ const temperature = options?.temperature ?? config.temperature;
555
+ if (temperature !== void 0) params.temperature = temperature;
556
+ return await client.messages.create(params, { signal: ctl.signal });
557
+ } catch (err) {
558
+ throw normalizeAnthropicError(classifyAbort(err, ctl.abortReason(), PROVIDER));
559
+ } finally {
560
+ ctl.dispose();
561
+ }
562
+ },
563
+ mergeRetryOptsWithSignal(retryOpts, options?.signal)
564
+ );
565
+ const toolBlock = response.content.find(
566
+ (b) => b.type === "tool_use" && b.name === "extract"
567
+ );
568
+ if (toolBlock === void 0) {
569
+ const textContent = response.content.filter((b) => b.type === "text").map((b) => b.text).join("");
570
+ throw new LlmError({
571
+ message: `Anthropic structured: model did not call the extract tool (stop_reason=${response.stop_reason}). Text: ${textContent.slice(0, 200)}`,
572
+ provider: PROVIDER,
573
+ retryable: false,
574
+ kind: "unknown"
575
+ });
576
+ }
577
+ let data;
578
+ try {
579
+ data = schema.parse(toolBlock.input);
580
+ } catch (err) {
581
+ throw new LlmError({
582
+ message: `Anthropic structured output: tool response failed schema validation. ${String(err)}`,
583
+ provider: PROVIDER,
584
+ retryable: false,
585
+ cause: err
586
+ });
587
+ }
588
+ return {
589
+ data,
590
+ model: response.model,
591
+ id: response.id,
592
+ usage: normalizeUsage(response.usage),
593
+ latencyMs: Date.now() - start
594
+ };
595
+ }
596
+ async function structuredPromptFallback(messages, schema, options) {
397
597
  const jsonSystemInstruction = {
398
598
  role: "system",
399
599
  content: "You must respond with valid JSON only. No explanations, no markdown code fences, no extra text. Your entire response must be valid JSON that can be parsed with JSON.parse()."
@@ -426,6 +626,7 @@ function createAnthropicProvider(config) {
426
626
  }
427
627
  return {
428
628
  data,
629
+ model: response.model,
429
630
  usage: response.usage,
430
631
  latencyMs: Date.now() - start
431
632
  };
@@ -501,32 +702,35 @@ function createDeepSeekProvider(config) {
501
702
  const chatMessages = buildMessages(messages);
502
703
  const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
503
704
  const start = Date.now();
504
- return withRetry(async () => {
505
- const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
506
- try {
507
- const params = {
508
- model,
509
- messages: chatMessages,
510
- stream: false
511
- };
512
- const maxTokens = options?.maxTokens ?? config.maxTokens;
513
- if (maxTokens !== void 0) params.max_tokens = maxTokens;
514
- const temperature = options?.temperature ?? config.temperature;
515
- if (temperature !== void 0) params.temperature = temperature;
516
- const response = await client.chat.completions.create(params, { signal: ctl.signal });
517
- const content = response.choices.map((c) => c.message.content ?? "").join("");
518
- return {
519
- content,
520
- model: response.model,
521
- usage: normalizeUsage2(response.usage),
522
- latencyMs: Date.now() - start
523
- };
524
- } catch (err) {
525
- throw normalizeDeepSeekError(classifyAbort(err, ctl.abortReason(), PROVIDER2));
526
- } finally {
527
- ctl.dispose();
528
- }
529
- }, mergeRetryOptsWithSignal(retryOpts, options?.signal));
705
+ return withRetry(
706
+ async () => {
707
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
708
+ try {
709
+ const params = {
710
+ model,
711
+ messages: chatMessages,
712
+ stream: false
713
+ };
714
+ const maxTokens = options?.maxTokens ?? config.maxTokens;
715
+ if (maxTokens !== void 0) params.max_tokens = maxTokens;
716
+ const temperature = options?.temperature ?? config.temperature;
717
+ if (temperature !== void 0) params.temperature = temperature;
718
+ const response = await client.chat.completions.create(params, { signal: ctl.signal });
719
+ const content = response.choices.map((c) => c.message.content ?? "").join("");
720
+ return {
721
+ content,
722
+ model: response.model,
723
+ usage: normalizeUsage2(response.usage),
724
+ latencyMs: Date.now() - start
725
+ };
726
+ } catch (err) {
727
+ throw normalizeDeepSeekError(classifyAbort(err, ctl.abortReason(), PROVIDER2));
728
+ } finally {
729
+ ctl.dispose();
730
+ }
731
+ },
732
+ mergeRetryOptsWithSignal(retryOpts, options?.signal)
733
+ );
530
734
  }
531
735
  async function* stream(messages, options) {
532
736
  const model = options?.model ?? config.model;
@@ -581,25 +785,28 @@ function createDeepSeekProvider(config) {
581
785
  const chatMessages = buildMessages(augmentedMessages);
582
786
  const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
583
787
  const start = Date.now();
584
- const rawResponse = await withRetry(async () => {
585
- const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
586
- try {
587
- const params = {
588
- model,
589
- messages: chatMessages,
590
- stream: false
591
- };
592
- const maxTokens = options?.maxTokens ?? config.maxTokens;
593
- if (maxTokens !== void 0) params.max_tokens = maxTokens;
594
- const temperature = options?.temperature ?? config.temperature;
595
- if (temperature !== void 0) params.temperature = temperature;
596
- return await client.chat.completions.create(params, { signal: ctl.signal });
597
- } catch (err) {
598
- throw normalizeDeepSeekError(classifyAbort(err, ctl.abortReason(), PROVIDER2));
599
- } finally {
600
- ctl.dispose();
601
- }
602
- }, mergeRetryOptsWithSignal(retryOpts, options?.signal));
788
+ const rawResponse = await withRetry(
789
+ async () => {
790
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
791
+ try {
792
+ const params = {
793
+ model,
794
+ messages: chatMessages,
795
+ stream: false
796
+ };
797
+ const maxTokens = options?.maxTokens ?? config.maxTokens;
798
+ if (maxTokens !== void 0) params.max_tokens = maxTokens;
799
+ const temperature = options?.temperature ?? config.temperature;
800
+ if (temperature !== void 0) params.temperature = temperature;
801
+ return await client.chat.completions.create(params, { signal: ctl.signal });
802
+ } catch (err) {
803
+ throw normalizeDeepSeekError(classifyAbort(err, ctl.abortReason(), PROVIDER2));
804
+ } finally {
805
+ ctl.dispose();
806
+ }
807
+ },
808
+ mergeRetryOptsWithSignal(retryOpts, options?.signal)
809
+ );
603
810
  const rawContent = rawResponse.choices[0]?.message.content ?? "";
604
811
  let parsed;
605
812
  try {
@@ -626,6 +833,8 @@ function createDeepSeekProvider(config) {
626
833
  }
627
834
  return {
628
835
  data,
836
+ model: rawResponse.model,
837
+ id: rawResponse.id,
629
838
  usage: normalizeUsage2(rawResponse.usage),
630
839
  latencyMs: Date.now() - start
631
840
  };
@@ -709,31 +918,34 @@ function createGeminiProvider(config) {
709
918
  const { system, contents } = buildGeminiContents(messages);
710
919
  const effectiveTimeoutMs = options?.timeoutMs ?? configTimeoutMs;
711
920
  const start = Date.now();
712
- return withRetry(async () => {
713
- const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
714
- try {
715
- const geminiConfig = {};
716
- if (system !== void 0) geminiConfig.systemInstruction = system;
717
- const maxTokens = options?.maxTokens ?? config.maxTokens;
718
- if (maxTokens !== void 0) geminiConfig.maxOutputTokens = maxTokens;
719
- const temperature = options?.temperature ?? config.temperature;
720
- if (temperature !== void 0) geminiConfig.temperature = temperature;
721
- const response = await Promise.race([
722
- ai.models.generateContent({ model, contents, config: geminiConfig }),
723
- makeAbortRacePromise(ctl.signal)
724
- ]);
725
- return {
726
- content: response.text ?? "",
727
- model,
728
- usage: normalizeUsage3(response.usageMetadata),
729
- latencyMs: Date.now() - start
730
- };
731
- } catch (err) {
732
- throw normalizeGeminiError(classifyAbort(err, ctl.abortReason(), PROVIDER3));
733
- } finally {
734
- ctl.dispose();
735
- }
736
- }, mergeRetryOptsWithSignal(retryOpts, options?.signal));
921
+ return withRetry(
922
+ async () => {
923
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
924
+ try {
925
+ const geminiConfig = {};
926
+ if (system !== void 0) geminiConfig.systemInstruction = system;
927
+ const maxTokens = options?.maxTokens ?? config.maxTokens;
928
+ if (maxTokens !== void 0) geminiConfig.maxOutputTokens = maxTokens;
929
+ const temperature = options?.temperature ?? config.temperature;
930
+ if (temperature !== void 0) geminiConfig.temperature = temperature;
931
+ const response = await Promise.race([
932
+ ai.models.generateContent({ model, contents, config: geminiConfig }),
933
+ makeAbortRacePromise(ctl.signal)
934
+ ]);
935
+ return {
936
+ content: response.text ?? "",
937
+ model,
938
+ usage: normalizeUsage3(response.usageMetadata),
939
+ latencyMs: Date.now() - start
940
+ };
941
+ } catch (err) {
942
+ throw normalizeGeminiError(classifyAbort(err, ctl.abortReason(), PROVIDER3));
943
+ } finally {
944
+ ctl.dispose();
945
+ }
946
+ },
947
+ mergeRetryOptsWithSignal(retryOpts, options?.signal)
948
+ );
737
949
  }
738
950
  async function* stream(messages, options) {
739
951
  const model = options?.model ?? config.model;
@@ -778,6 +990,75 @@ function createGeminiProvider(config) {
778
990
  }
779
991
  }
780
992
  async function structured(messages, schema, options) {
993
+ const structuredMode = options?.providerOptions?.["structuredMode"];
994
+ const useStrict = isZodSchema(schema) && structuredMode !== "prompt";
995
+ if (!useStrict) {
996
+ return structuredPromptFallback(messages, schema, options);
997
+ }
998
+ const responseSchemaObj = toProviderSchema(schema, "gemini");
999
+ const model = options?.model ?? config.model;
1000
+ const { system, contents } = buildGeminiContents(messages);
1001
+ const effectiveTimeoutMs = options?.timeoutMs ?? configTimeoutMs;
1002
+ const start = Date.now();
1003
+ const rawResponse = await withRetry(
1004
+ async () => {
1005
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
1006
+ try {
1007
+ const geminiConfig = {
1008
+ responseMimeType: "application/json",
1009
+ // responseSchema SDK type is permissive; cast through never to avoid SDK type mismatch
1010
+ responseSchema: responseSchemaObj
1011
+ };
1012
+ if (system !== void 0) geminiConfig.systemInstruction = system;
1013
+ const maxTokens = options?.maxTokens ?? config.maxTokens;
1014
+ if (maxTokens !== void 0) geminiConfig.maxOutputTokens = maxTokens;
1015
+ const temperature = options?.temperature ?? config.temperature;
1016
+ if (temperature !== void 0) geminiConfig.temperature = temperature;
1017
+ return await Promise.race([
1018
+ ai.models.generateContent({ model, contents, config: geminiConfig }),
1019
+ makeAbortRacePromise(ctl.signal)
1020
+ ]);
1021
+ } catch (err) {
1022
+ throw normalizeGeminiError(classifyAbort(err, ctl.abortReason(), PROVIDER3));
1023
+ } finally {
1024
+ ctl.dispose();
1025
+ }
1026
+ },
1027
+ mergeRetryOptsWithSignal(retryOpts, options?.signal)
1028
+ );
1029
+ const rawContent = rawResponse.text ?? "";
1030
+ let parsed;
1031
+ try {
1032
+ const cleaned = rawContent.replace(/^```(?:json)?\s*/i, "").replace(/\s*```$/, "").trim();
1033
+ parsed = JSON.parse(cleaned);
1034
+ } catch (err) {
1035
+ throw new LlmError({
1036
+ message: `Gemini structured output: response is not valid JSON. Raw: ${rawContent.slice(0, 200)}`,
1037
+ provider: PROVIDER3,
1038
+ retryable: false,
1039
+ cause: err
1040
+ });
1041
+ }
1042
+ let data;
1043
+ try {
1044
+ data = schema.parse(parsed);
1045
+ } catch (err) {
1046
+ throw new LlmError({
1047
+ message: `Gemini structured output: response failed schema validation. ${String(err)}`,
1048
+ provider: PROVIDER3,
1049
+ retryable: false,
1050
+ cause: err
1051
+ });
1052
+ }
1053
+ return {
1054
+ data,
1055
+ // Gemini does not return a request ID; model comes from response.modelVersion if available
1056
+ model: rawResponse.modelVersion ?? model,
1057
+ usage: normalizeUsage3(rawResponse.usageMetadata),
1058
+ latencyMs: Date.now() - start
1059
+ };
1060
+ }
1061
+ async function structuredPromptFallback(messages, schema, options) {
781
1062
  const augmentedMessages = [
782
1063
  {
783
1064
  role: "system",
@@ -789,28 +1070,30 @@ function createGeminiProvider(config) {
789
1070
  const { system, contents } = buildGeminiContents(augmentedMessages);
790
1071
  const effectiveTimeoutMs = options?.timeoutMs ?? configTimeoutMs;
791
1072
  const start = Date.now();
792
- const rawResponse = await withRetry(async () => {
793
- const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
794
- try {
795
- const geminiConfig = {
796
- // Instruct Gemini to return JSON directly
797
- responseMimeType: "application/json"
798
- };
799
- if (system !== void 0) geminiConfig.systemInstruction = system;
800
- const maxTokens = options?.maxTokens ?? config.maxTokens;
801
- if (maxTokens !== void 0) geminiConfig.maxOutputTokens = maxTokens;
802
- const temperature = options?.temperature ?? config.temperature;
803
- if (temperature !== void 0) geminiConfig.temperature = temperature;
804
- return await Promise.race([
805
- ai.models.generateContent({ model, contents, config: geminiConfig }),
806
- makeAbortRacePromise(ctl.signal)
807
- ]);
808
- } catch (err) {
809
- throw normalizeGeminiError(classifyAbort(err, ctl.abortReason(), PROVIDER3));
810
- } finally {
811
- ctl.dispose();
812
- }
813
- }, mergeRetryOptsWithSignal(retryOpts, options?.signal));
1073
+ const rawResponse = await withRetry(
1074
+ async () => {
1075
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
1076
+ try {
1077
+ const geminiConfig = {
1078
+ responseMimeType: "application/json"
1079
+ };
1080
+ if (system !== void 0) geminiConfig.systemInstruction = system;
1081
+ const maxTokens = options?.maxTokens ?? config.maxTokens;
1082
+ if (maxTokens !== void 0) geminiConfig.maxOutputTokens = maxTokens;
1083
+ const temperature = options?.temperature ?? config.temperature;
1084
+ if (temperature !== void 0) geminiConfig.temperature = temperature;
1085
+ return await Promise.race([
1086
+ ai.models.generateContent({ model, contents, config: geminiConfig }),
1087
+ makeAbortRacePromise(ctl.signal)
1088
+ ]);
1089
+ } catch (err) {
1090
+ throw normalizeGeminiError(classifyAbort(err, ctl.abortReason(), PROVIDER3));
1091
+ } finally {
1092
+ ctl.dispose();
1093
+ }
1094
+ },
1095
+ mergeRetryOptsWithSignal(retryOpts, options?.signal)
1096
+ );
814
1097
  const rawContent = rawResponse.text ?? "";
815
1098
  let parsed;
816
1099
  try {
@@ -837,6 +1120,7 @@ function createGeminiProvider(config) {
837
1120
  }
838
1121
  return {
839
1122
  data,
1123
+ model,
840
1124
  usage: normalizeUsage3(rawResponse.usageMetadata),
841
1125
  latencyMs: Date.now() - start
842
1126
  };
@@ -910,32 +1194,35 @@ function createOpenAIProvider(config) {
910
1194
  const openAIMessages = buildOpenAIMessages(messages);
911
1195
  const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
912
1196
  const start = Date.now();
913
- return withRetry(async () => {
914
- const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
915
- try {
916
- const params = {
917
- model,
918
- messages: openAIMessages,
919
- stream: false
920
- };
921
- const maxTokens = options?.maxTokens ?? config.maxTokens;
922
- if (maxTokens !== void 0) params.max_tokens = maxTokens;
923
- const temperature = options?.temperature ?? config.temperature;
924
- if (temperature !== void 0) params.temperature = temperature;
925
- const response = await client.chat.completions.create(params, { signal: ctl.signal });
926
- const content = response.choices.map((c) => c.message.content ?? "").join("");
927
- return {
928
- content,
929
- model: response.model,
930
- usage: normalizeUsage4(response.usage),
931
- latencyMs: Date.now() - start
932
- };
933
- } catch (err) {
934
- throw normalizeOpenAIError(classifyAbort(err, ctl.abortReason(), PROVIDER4));
935
- } finally {
936
- ctl.dispose();
937
- }
938
- }, mergeRetryOptsWithSignal(retryOpts, options?.signal));
1197
+ return withRetry(
1198
+ async () => {
1199
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
1200
+ try {
1201
+ const params = {
1202
+ model,
1203
+ messages: openAIMessages,
1204
+ stream: false
1205
+ };
1206
+ const maxTokens = options?.maxTokens ?? config.maxTokens;
1207
+ if (maxTokens !== void 0) params.max_tokens = maxTokens;
1208
+ const temperature = options?.temperature ?? config.temperature;
1209
+ if (temperature !== void 0) params.temperature = temperature;
1210
+ const response = await client.chat.completions.create(params, { signal: ctl.signal });
1211
+ const content = response.choices.map((c) => c.message.content ?? "").join("");
1212
+ return {
1213
+ content,
1214
+ model: response.model,
1215
+ usage: normalizeUsage4(response.usage),
1216
+ latencyMs: Date.now() - start
1217
+ };
1218
+ } catch (err) {
1219
+ throw normalizeOpenAIError(classifyAbort(err, ctl.abortReason(), PROVIDER4));
1220
+ } finally {
1221
+ ctl.dispose();
1222
+ }
1223
+ },
1224
+ mergeRetryOptsWithSignal(retryOpts, options?.signal)
1225
+ );
939
1226
  }
940
1227
  async function* stream(messages, options) {
941
1228
  const model = options?.model ?? config.model;
@@ -981,6 +1268,86 @@ function createOpenAIProvider(config) {
981
1268
  }
982
1269
  }
983
1270
  async function structured(messages, schema, options) {
1271
+ const structuredMode = options?.providerOptions?.["structuredMode"];
1272
+ const useStrict = isZodSchema(schema) && structuredMode !== "prompt";
1273
+ if (!useStrict) {
1274
+ return structuredPromptFallback(messages, schema, options);
1275
+ }
1276
+ const jsonSchema = toProviderSchema(schema, "openai");
1277
+ const model = options?.model ?? config.model;
1278
+ const openAIMessages = buildOpenAIMessages(messages);
1279
+ const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
1280
+ const start = Date.now();
1281
+ const rawResponse = await withRetry(
1282
+ async () => {
1283
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
1284
+ try {
1285
+ const params = {
1286
+ model,
1287
+ messages: openAIMessages,
1288
+ stream: false,
1289
+ response_format: {
1290
+ type: "json_schema",
1291
+ json_schema: { name: "response", schema: jsonSchema, strict: true }
1292
+ }
1293
+ };
1294
+ const maxTokens = options?.maxTokens ?? config.maxTokens;
1295
+ if (maxTokens !== void 0) params.max_tokens = maxTokens;
1296
+ const temperature = options?.temperature ?? config.temperature;
1297
+ if (temperature !== void 0) params.temperature = temperature;
1298
+ return await client.chat.completions.create(
1299
+ params,
1300
+ { signal: ctl.signal }
1301
+ );
1302
+ } catch (err) {
1303
+ throw normalizeOpenAIError(classifyAbort(err, ctl.abortReason(), PROVIDER4));
1304
+ } finally {
1305
+ ctl.dispose();
1306
+ }
1307
+ },
1308
+ mergeRetryOptsWithSignal(retryOpts, options?.signal)
1309
+ );
1310
+ const choice = rawResponse.choices[0];
1311
+ if (choice?.message.refusal !== null && choice?.message.refusal !== void 0) {
1312
+ throw new LlmError({
1313
+ message: `OpenAI structured output: model refused to generate. Refusal: ${choice.message.refusal.slice(0, 200)}`,
1314
+ provider: PROVIDER4,
1315
+ retryable: false,
1316
+ kind: "unknown"
1317
+ });
1318
+ }
1319
+ const rawContent = choice?.message.content ?? "";
1320
+ let parsed;
1321
+ try {
1322
+ parsed = JSON.parse(rawContent);
1323
+ } catch (err) {
1324
+ throw new LlmError({
1325
+ message: `OpenAI structured output: response is not valid JSON. Raw: ${rawContent.slice(0, 200)}`,
1326
+ provider: PROVIDER4,
1327
+ retryable: false,
1328
+ cause: err
1329
+ });
1330
+ }
1331
+ let data;
1332
+ try {
1333
+ data = schema.parse(parsed);
1334
+ } catch (err) {
1335
+ throw new LlmError({
1336
+ message: `OpenAI structured output: response failed schema validation. ${String(err)}`,
1337
+ provider: PROVIDER4,
1338
+ retryable: false,
1339
+ cause: err
1340
+ });
1341
+ }
1342
+ return {
1343
+ data,
1344
+ model: rawResponse.model,
1345
+ id: rawResponse.id,
1346
+ usage: normalizeUsage4(rawResponse.usage),
1347
+ latencyMs: Date.now() - start
1348
+ };
1349
+ }
1350
+ async function structuredPromptFallback(messages, schema, options) {
984
1351
  const jsonSystemInstruction = {
985
1352
  role: "system",
986
1353
  content: "You must respond with valid JSON only. No explanations, no markdown code fences, no extra text. Your entire response must be valid JSON that can be parsed with JSON.parse()."
@@ -990,26 +1357,29 @@ function createOpenAIProvider(config) {
990
1357
  const openAIMessages = buildOpenAIMessages(augmentedMessages);
991
1358
  const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
992
1359
  const start = Date.now();
993
- const rawResponse = await withRetry(async () => {
994
- const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
995
- try {
996
- const params = {
997
- model,
998
- messages: openAIMessages,
999
- stream: false,
1000
- response_format: { type: "json_object" }
1001
- };
1002
- const maxTokens = options?.maxTokens ?? config.maxTokens;
1003
- if (maxTokens !== void 0) params.max_tokens = maxTokens;
1004
- const temperature = options?.temperature ?? config.temperature;
1005
- if (temperature !== void 0) params.temperature = temperature;
1006
- return await client.chat.completions.create(params, { signal: ctl.signal });
1007
- } catch (err) {
1008
- throw normalizeOpenAIError(classifyAbort(err, ctl.abortReason(), PROVIDER4));
1009
- } finally {
1010
- ctl.dispose();
1011
- }
1012
- }, mergeRetryOptsWithSignal(retryOpts, options?.signal));
1360
+ const rawResponse = await withRetry(
1361
+ async () => {
1362
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
1363
+ try {
1364
+ const params = {
1365
+ model,
1366
+ messages: openAIMessages,
1367
+ stream: false,
1368
+ response_format: { type: "json_object" }
1369
+ };
1370
+ const maxTokens = options?.maxTokens ?? config.maxTokens;
1371
+ if (maxTokens !== void 0) params.max_tokens = maxTokens;
1372
+ const temperature = options?.temperature ?? config.temperature;
1373
+ if (temperature !== void 0) params.temperature = temperature;
1374
+ return await client.chat.completions.create(params, { signal: ctl.signal });
1375
+ } catch (err) {
1376
+ throw normalizeOpenAIError(classifyAbort(err, ctl.abortReason(), PROVIDER4));
1377
+ } finally {
1378
+ ctl.dispose();
1379
+ }
1380
+ },
1381
+ mergeRetryOptsWithSignal(retryOpts, options?.signal)
1382
+ );
1013
1383
  const rawContent = rawResponse.choices[0]?.message.content ?? "";
1014
1384
  let parsed;
1015
1385
  try {
@@ -1035,6 +1405,8 @@ function createOpenAIProvider(config) {
1035
1405
  }
1036
1406
  return {
1037
1407
  data,
1408
+ model: rawResponse.model,
1409
+ id: rawResponse.id,
1038
1410
  usage: normalizeUsage4(rawResponse.usage),
1039
1411
  latencyMs: Date.now() - start
1040
1412
  };
@@ -1128,40 +1500,43 @@ function createPerplexityProvider(config) {
1128
1500
  const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
1129
1501
  const start = Date.now();
1130
1502
  const extraParams = extractProviderOptions(options?.providerOptions);
1131
- return withRetry(async () => {
1132
- const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
1133
- try {
1134
- const params = {
1135
- model,
1136
- messages: chatMessages,
1137
- stream: false,
1138
- ...extraParams
1139
- };
1140
- const maxTokens = options?.maxTokens ?? config.maxTokens;
1141
- if (maxTokens !== void 0) params.max_tokens = maxTokens;
1142
- const temperature = options?.temperature ?? config.temperature;
1143
- if (temperature !== void 0) params.temperature = temperature;
1144
- const rawResponse = await client.chat.completions.create(
1145
- params,
1146
- { signal: ctl.signal }
1147
- );
1148
- const response = rawResponse;
1149
- const content = response.choices.map((c) => c.message.content ?? "").join("");
1150
- const result = {
1151
- content,
1152
- model: response.model,
1153
- usage: normalizeUsage5(response.usage),
1154
- latencyMs: Date.now() - start
1155
- };
1156
- const citations = extractCitations(response);
1157
- if (citations !== void 0) result.citations = citations;
1158
- return result;
1159
- } catch (err) {
1160
- throw normalizePerplexityError(classifyAbort(err, ctl.abortReason(), PROVIDER5));
1161
- } finally {
1162
- ctl.dispose();
1163
- }
1164
- }, mergeRetryOptsWithSignal(retryOpts, options?.signal));
1503
+ return withRetry(
1504
+ async () => {
1505
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
1506
+ try {
1507
+ const params = {
1508
+ model,
1509
+ messages: chatMessages,
1510
+ stream: false,
1511
+ ...extraParams
1512
+ };
1513
+ const maxTokens = options?.maxTokens ?? config.maxTokens;
1514
+ if (maxTokens !== void 0) params.max_tokens = maxTokens;
1515
+ const temperature = options?.temperature ?? config.temperature;
1516
+ if (temperature !== void 0) params.temperature = temperature;
1517
+ const rawResponse = await client.chat.completions.create(
1518
+ params,
1519
+ { signal: ctl.signal }
1520
+ );
1521
+ const response = rawResponse;
1522
+ const content = response.choices.map((c) => c.message.content ?? "").join("");
1523
+ const result = {
1524
+ content,
1525
+ model: response.model,
1526
+ usage: normalizeUsage5(response.usage),
1527
+ latencyMs: Date.now() - start
1528
+ };
1529
+ const citations = extractCitations(response);
1530
+ if (citations !== void 0) result.citations = citations;
1531
+ return result;
1532
+ } catch (err) {
1533
+ throw normalizePerplexityError(classifyAbort(err, ctl.abortReason(), PROVIDER5));
1534
+ } finally {
1535
+ ctl.dispose();
1536
+ }
1537
+ },
1538
+ mergeRetryOptsWithSignal(retryOpts, options?.signal)
1539
+ );
1165
1540
  }
1166
1541
  async function* stream(messages, options) {
1167
1542
  const model = options?.model ?? config.model;
@@ -1222,30 +1597,34 @@ function createPerplexityProvider(config) {
1222
1597
  const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
1223
1598
  const start = Date.now();
1224
1599
  const extraParams = extractProviderOptions(options?.providerOptions);
1225
- const rawResponse = await withRetry(async () => {
1226
- const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
1227
- try {
1228
- const params = {
1229
- model,
1230
- messages: chatMessages,
1231
- stream: false,
1232
- ...extraParams
1233
- };
1234
- const maxTokens = options?.maxTokens ?? config.maxTokens;
1235
- if (maxTokens !== void 0) params.max_tokens = maxTokens;
1236
- const temperature = options?.temperature ?? config.temperature;
1237
- if (temperature !== void 0) params.temperature = temperature;
1238
- return await client.chat.completions.create(
1239
- params,
1240
- { signal: ctl.signal }
1241
- );
1242
- } catch (err) {
1243
- throw normalizePerplexityError(classifyAbort(err, ctl.abortReason(), PROVIDER5));
1244
- } finally {
1245
- ctl.dispose();
1246
- }
1247
- }, mergeRetryOptsWithSignal(retryOpts, options?.signal));
1248
- const rawContent = rawResponse.choices[0]?.message.content ?? "";
1600
+ const rawResponse = await withRetry(
1601
+ async () => {
1602
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
1603
+ try {
1604
+ const params = {
1605
+ model,
1606
+ messages: chatMessages,
1607
+ stream: false,
1608
+ ...extraParams
1609
+ };
1610
+ const maxTokens = options?.maxTokens ?? config.maxTokens;
1611
+ if (maxTokens !== void 0) params.max_tokens = maxTokens;
1612
+ const temperature = options?.temperature ?? config.temperature;
1613
+ if (temperature !== void 0) params.temperature = temperature;
1614
+ return await client.chat.completions.create(
1615
+ params,
1616
+ { signal: ctl.signal }
1617
+ );
1618
+ } catch (err) {
1619
+ throw normalizePerplexityError(classifyAbort(err, ctl.abortReason(), PROVIDER5));
1620
+ } finally {
1621
+ ctl.dispose();
1622
+ }
1623
+ },
1624
+ mergeRetryOptsWithSignal(retryOpts, options?.signal)
1625
+ );
1626
+ const response = rawResponse;
1627
+ const rawContent = response.choices[0]?.message.content ?? "";
1249
1628
  let parsed;
1250
1629
  try {
1251
1630
  const cleaned = rawContent.replace(/<think>[\s\S]*?<\/think>/i, "").replace(/^```(?:json)?\s*/i, "").replace(/\s*```$/, "").trim();
@@ -1269,11 +1648,16 @@ function createPerplexityProvider(config) {
1269
1648
  cause: err
1270
1649
  });
1271
1650
  }
1272
- return {
1651
+ const citations = extractCitations(response);
1652
+ const result = {
1273
1653
  data,
1274
- usage: normalizeUsage5(rawResponse.usage),
1654
+ model: response.model,
1655
+ id: response.id,
1656
+ usage: normalizeUsage5(response.usage),
1275
1657
  latencyMs: Date.now() - start
1276
1658
  };
1659
+ if (citations !== void 0) result.citations = citations;
1660
+ return result;
1277
1661
  }
1278
1662
  return {
1279
1663
  config,