npm - @diabolicallabs/llm-client - Versions diffs - 0.3.0 → 0.4.0 - Mend

@diabolicallabs/llm-client 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.js CHANGED Viewed

@@ -38,7 +38,7 @@ function createAttemptController(callerSignal, timeoutMs) {
   timer.unref?.();
   const onCallerAbort = () => {
     reason ??= "caller";
-    internal.abort(callerSignal.reason);
+    if (callerSignal !== void 0) internal.abort(callerSignal.reason);
   };
   if (callerSignal !== void 0) {
     if (callerSignal.aborted) {
@@ -126,7 +126,6 @@ function classifyAbort(err, abortReason, provider) {
         retryable: true,
         cause: err
       });
-    case "caller":
     default:
       return new LlmError({
         message: "llm-client: cancelled by caller",
@@ -144,6 +143,128 @@ function isAbortError(err) {
   return false;
 }
+// src/json-schema.ts
+import { z } from "zod";
+function isZodSchema(s) {
+  if (typeof s !== "object" || s === null) return false;
+  const hasZod4Marker = "_zod" in s && typeof s._zod === "object";
+  const hasZod3Marker = "_def" in s;
+  if (hasZod3Marker && !hasZod4Marker) {
+    throw new LlmError({
+      message: 'llm-client: detected a Zod 3 schema. Upgrade to Zod 4 to use strict structured-output mode, or pass providerOptions.structuredMode = "prompt" to keep the v0.3.0 prompt-only path.',
+      provider: "llm-client",
+      retryable: false,
+      kind: "unknown"
+    });
+  }
+  if (!hasZod4Marker) return false;
+  return typeof s.parse === "function";
+}
+function toProviderSchema(schema, profile) {
+  const target = profile === "gemini" ? "openapi-3.0" : "draft-2020-12";
+  let json;
+  try {
+    json = z.toJSONSchema(schema, {
+      target,
+      unrepresentable: "throw",
+      cycles: "throw"
+    });
+  } catch (e) {
+    throw new LlmError({
+      message: `llm-client: schema is not representable for ${profile} strict mode \u2014 ${e.message}. Pass providerOptions.structuredMode = 'prompt' to fall back to prompt-only mode.`,
+      provider: profile,
+      retryable: false,
+      kind: "unknown",
+      cause: e
+    });
+  }
+  if (profile === "openai") return openAIStrictPostprocess(json);
+  if (profile === "gemini") return geminiPostprocess(json);
+  return anthropicPostprocess(json);
+}
+function openAIStrictPostprocess(node) {
+  if (typeof node !== "object" || node === null) {
+    return node;
+  }
+  if (Array.isArray(node)) {
+    return node.map(openAIStrictPostprocess);
+  }
+  const src = node;
+  const obj = { ...src };
+  delete obj.$schema;
+  delete obj.format;
+  delete obj.pattern;
+  delete obj.default;
+  delete obj.examples;
+  if (obj.type === "object" && obj.properties !== void 0) {
+    const props = obj.properties;
+    const allKeys = Object.keys(props);
+    obj.required = allKeys;
+    obj.additionalProperties = false;
+    const processedProps = {};
+    for (const key of allKeys) {
+      processedProps[key] = openAIStrictPostprocess(props[key]);
+    }
+    obj.properties = processedProps;
+  }
+  if (obj.items !== void 0) {
+    obj.items = openAIStrictPostprocess(obj.items);
+  }
+  if (Array.isArray(obj.anyOf)) {
+    obj.anyOf = obj.anyOf.map(openAIStrictPostprocess);
+  }
+  if (Array.isArray(obj.oneOf)) {
+    obj.oneOf = obj.oneOf.map(openAIStrictPostprocess);
+  }
+  if (Array.isArray(obj.allOf)) {
+    obj.allOf = obj.allOf.map(openAIStrictPostprocess);
+  }
+  if (Array.isArray(obj.prefixItems)) {
+    obj.prefixItems = obj.prefixItems.map(openAIStrictPostprocess);
+  }
+  return obj;
+}
+function anthropicPostprocess(node) {
+  const obj = { ...node };
+  delete obj.$schema;
+  return obj;
+}
+function geminiPostprocess(node) {
+  if (typeof node !== "object" || node === null) {
+    return node;
+  }
+  if (Array.isArray(node)) {
+    return node.map(geminiPostprocess);
+  }
+  const src = node;
+  const obj = { ...src };
+  delete obj.$schema;
+  delete obj.additionalProperties;
+  delete obj.default;
+  delete obj.examples;
+  if (obj.properties !== void 0) {
+    const props = obj.properties;
+    const processedProps = {};
+    for (const key of Object.keys(props)) {
+      processedProps[key] = geminiPostprocess(props[key]);
+    }
+    obj.properties = processedProps;
+  }
+  if (obj.items !== void 0) {
+    obj.items = geminiPostprocess(obj.items);
+  }
+  if (Array.isArray(obj.anyOf)) {
+    obj.anyOf = obj.anyOf.map(geminiPostprocess);
+  }
+  if (Array.isArray(obj.oneOf)) {
+    obj.oneOf = obj.oneOf.map(geminiPostprocess);
+  }
+  if (Array.isArray(obj.allOf)) {
+    obj.allOf = obj.allOf.map(geminiPostprocess);
+  }
+  return obj;
+}
 // src/retry.ts
 var RETRYABLE_HTTP_STATUSES = /* @__PURE__ */ new Set([429, 502, 503, 504]);
 var RETRYABLE_ERROR_CODES = /* @__PURE__ */ new Set(["ECONNRESET", "ETIMEDOUT", "ECONNABORTED"]);
@@ -219,7 +340,13 @@ function normalizeThrownError(err, provider) {
           cause: err
         });
       }
-      return new LlmError({ message: err.message, provider, kind: "network", retryable: true, cause: err });
+      return new LlmError({
+        message: err.message,
+        provider,
+        kind: "network",
+        retryable: true,
+        cause: err
+      });
     }
     if (statusCode !== void 0) {
       const retryable = isRetryableStatus(statusCode);
@@ -317,33 +444,36 @@ function createAnthropicProvider(config) {
     const { system, messages: anthropicMessages } = buildAnthropicMessages(messages);
     const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
     const start = Date.now();
-    return withRetry(async () => {
-      const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
-      try {
-        const params = {
-          model,
-          messages: anthropicMessages,
-          max_tokens: options?.maxTokens ?? config.maxTokens ?? 1024
-        };
-        if (system !== void 0) params.system = system;
-        const temperature = options?.temperature ?? config.temperature;
-        if (temperature !== void 0) {
-          params.temperature = temperature;
+    return withRetry(
+      async () => {
+        const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
+        try {
+          const params = {
+            model,
+            messages: anthropicMessages,
+            max_tokens: options?.maxTokens ?? config.maxTokens ?? 1024
+          };
+          if (system !== void 0) params.system = system;
+          const temperature = options?.temperature ?? config.temperature;
+          if (temperature !== void 0) {
+            params.temperature = temperature;
+          }
+          const response = await client.messages.create(params, { signal: ctl.signal });
+          const content = response.content.filter((block) => block.type === "text").map((block) => block.text).join("");
+          return {
+            content,
+            model: response.model,
+            usage: normalizeUsage(response.usage),
+            latencyMs: Date.now() - start
+          };
+        } catch (err) {
+          throw normalizeAnthropicError(classifyAbort(err, ctl.abortReason(), PROVIDER));
+        } finally {
+          ctl.dispose();
         }
-        const response = await client.messages.create(params, { signal: ctl.signal });
-        const content = response.content.filter((block) => block.type === "text").map((block) => block.text).join("");
-        return {
-          content,
-          model: response.model,
-          usage: normalizeUsage(response.usage),
-          latencyMs: Date.now() - start
-        };
-      } catch (err) {
-        throw normalizeAnthropicError(classifyAbort(err, ctl.abortReason(), PROVIDER));
-      } finally {
-        ctl.dispose();
-      }
-    }, mergeRetryOptsWithSignal(retryOpts, options?.signal));
+      },
+      mergeRetryOptsWithSignal(retryOpts, options?.signal)
+    );
   }
   async function* stream(messages, options) {
     const model = options?.model ?? config.model;
@@ -394,6 +524,76 @@ function createAnthropicProvider(config) {
     }
   }
   async function structured(messages, schema, options) {
+    const structuredMode = options?.providerOptions?.["structuredMode"];
+    const useStrict = isZodSchema(schema) && structuredMode !== "prompt";
+    if (!useStrict) {
+      return structuredPromptFallback(messages, schema, options);
+    }
+    const inputSchema = toProviderSchema(schema, "anthropic");
+    const { system, messages: anthropicMessages } = buildAnthropicMessages(messages);
+    const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
+    const start = Date.now();
+    const response = await withRetry(
+      async () => {
+        const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
+        try {
+          const params = {
+            model: options?.model ?? config.model,
+            messages: anthropicMessages,
+            max_tokens: options?.maxTokens ?? config.maxTokens ?? 1024,
+            tools: [
+              {
+                name: "extract",
+                description: "Return the structured data.",
+                input_schema: inputSchema
+              }
+            ],
+            tool_choice: { type: "tool", name: "extract" }
+          };
+          if (system !== void 0) params.system = system;
+          const temperature = options?.temperature ?? config.temperature;
+          if (temperature !== void 0) params.temperature = temperature;
+          return await client.messages.create(params, { signal: ctl.signal });
+        } catch (err) {
+          throw normalizeAnthropicError(classifyAbort(err, ctl.abortReason(), PROVIDER));
+        } finally {
+          ctl.dispose();
+        }
+      },
+      mergeRetryOptsWithSignal(retryOpts, options?.signal)
+    );
+    const toolBlock = response.content.find(
+      (b) => b.type === "tool_use" && b.name === "extract"
+    );
+    if (toolBlock === void 0) {
+      const textContent = response.content.filter((b) => b.type === "text").map((b) => b.text).join("");
+      throw new LlmError({
+        message: `Anthropic structured: model did not call the extract tool (stop_reason=${response.stop_reason}). Text: ${textContent.slice(0, 200)}`,
+        provider: PROVIDER,
+        retryable: false,
+        kind: "unknown"
+      });
+    }
+    let data;
+    try {
+      data = schema.parse(toolBlock.input);
+    } catch (err) {
+      throw new LlmError({
+        message: `Anthropic structured output: tool response failed schema validation. ${String(err)}`,
+        provider: PROVIDER,
+        retryable: false,
+        cause: err
+      });
+    }
+    return {
+      data,
+      model: response.model,
+      id: response.id,
+      usage: normalizeUsage(response.usage),
+      latencyMs: Date.now() - start
+    };
+  }
+  async function structuredPromptFallback(messages, schema, options) {
     const jsonSystemInstruction = {
       role: "system",
       content: "You must respond with valid JSON only. No explanations, no markdown code fences, no extra text. Your entire response must be valid JSON that can be parsed with JSON.parse()."
@@ -426,6 +626,7 @@ function createAnthropicProvider(config) {
     }
     return {
       data,
+      model: response.model,
       usage: response.usage,
       latencyMs: Date.now() - start
     };
@@ -501,32 +702,35 @@ function createDeepSeekProvider(config) {
     const chatMessages = buildMessages(messages);
     const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
     const start = Date.now();
-    return withRetry(async () => {
-      const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
-      try {
-        const params = {
-          model,
-          messages: chatMessages,
-          stream: false
-        };
-        const maxTokens = options?.maxTokens ?? config.maxTokens;
-        if (maxTokens !== void 0) params.max_tokens = maxTokens;
-        const temperature = options?.temperature ?? config.temperature;
-        if (temperature !== void 0) params.temperature = temperature;
-        const response = await client.chat.completions.create(params, { signal: ctl.signal });
-        const content = response.choices.map((c) => c.message.content ?? "").join("");
-        return {
-          content,
-          model: response.model,
-          usage: normalizeUsage2(response.usage),
-          latencyMs: Date.now() - start
-        };
-      } catch (err) {
-        throw normalizeDeepSeekError(classifyAbort(err, ctl.abortReason(), PROVIDER2));
-      } finally {
-        ctl.dispose();
-      }
-    }, mergeRetryOptsWithSignal(retryOpts, options?.signal));
+    return withRetry(
+      async () => {
+        const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
+        try {
+          const params = {
+            model,
+            messages: chatMessages,
+            stream: false
+          };
+          const maxTokens = options?.maxTokens ?? config.maxTokens;
+          if (maxTokens !== void 0) params.max_tokens = maxTokens;
+          const temperature = options?.temperature ?? config.temperature;
+          if (temperature !== void 0) params.temperature = temperature;
+          const response = await client.chat.completions.create(params, { signal: ctl.signal });
+          const content = response.choices.map((c) => c.message.content ?? "").join("");
+          return {
+            content,
+            model: response.model,
+            usage: normalizeUsage2(response.usage),
+            latencyMs: Date.now() - start
+          };
+        } catch (err) {
+          throw normalizeDeepSeekError(classifyAbort(err, ctl.abortReason(), PROVIDER2));
+        } finally {
+          ctl.dispose();
+        }
+      },
+      mergeRetryOptsWithSignal(retryOpts, options?.signal)
+    );
   }
   async function* stream(messages, options) {
     const model = options?.model ?? config.model;
@@ -581,25 +785,28 @@ function createDeepSeekProvider(config) {
     const chatMessages = buildMessages(augmentedMessages);
     const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
     const start = Date.now();
-    const rawResponse = await withRetry(async () => {
-      const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
-      try {
-        const params = {
-          model,
-          messages: chatMessages,
-          stream: false
-        };
-        const maxTokens = options?.maxTokens ?? config.maxTokens;
-        if (maxTokens !== void 0) params.max_tokens = maxTokens;
-        const temperature = options?.temperature ?? config.temperature;
-        if (temperature !== void 0) params.temperature = temperature;
-        return await client.chat.completions.create(params, { signal: ctl.signal });
-      } catch (err) {
-        throw normalizeDeepSeekError(classifyAbort(err, ctl.abortReason(), PROVIDER2));
-      } finally {
-        ctl.dispose();
-      }
-    }, mergeRetryOptsWithSignal(retryOpts, options?.signal));
+    const rawResponse = await withRetry(
+      async () => {
+        const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
+        try {
+          const params = {
+            model,
+            messages: chatMessages,
+            stream: false
+          };
+          const maxTokens = options?.maxTokens ?? config.maxTokens;
+          if (maxTokens !== void 0) params.max_tokens = maxTokens;
+          const temperature = options?.temperature ?? config.temperature;
+          if (temperature !== void 0) params.temperature = temperature;
+          return await client.chat.completions.create(params, { signal: ctl.signal });
+        } catch (err) {
+          throw normalizeDeepSeekError(classifyAbort(err, ctl.abortReason(), PROVIDER2));
+        } finally {
+          ctl.dispose();
+        }
+      },
+      mergeRetryOptsWithSignal(retryOpts, options?.signal)
+    );
     const rawContent = rawResponse.choices[0]?.message.content ?? "";
     let parsed;
     try {
@@ -626,6 +833,8 @@ function createDeepSeekProvider(config) {
     }
     return {
       data,
+      model: rawResponse.model,
+      id: rawResponse.id,
       usage: normalizeUsage2(rawResponse.usage),
       latencyMs: Date.now() - start
     };
@@ -709,31 +918,34 @@ function createGeminiProvider(config) {
     const { system, contents } = buildGeminiContents(messages);
     const effectiveTimeoutMs = options?.timeoutMs ?? configTimeoutMs;
     const start = Date.now();
-    return withRetry(async () => {
-      const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
-      try {
-        const geminiConfig = {};
-        if (system !== void 0) geminiConfig.systemInstruction = system;
-        const maxTokens = options?.maxTokens ?? config.maxTokens;
-        if (maxTokens !== void 0) geminiConfig.maxOutputTokens = maxTokens;
-        const temperature = options?.temperature ?? config.temperature;
-        if (temperature !== void 0) geminiConfig.temperature = temperature;
-        const response = await Promise.race([
-          ai.models.generateContent({ model, contents, config: geminiConfig }),
-          makeAbortRacePromise(ctl.signal)
-        ]);
-        return {
-          content: response.text ?? "",
-          model,
-          usage: normalizeUsage3(response.usageMetadata),
-          latencyMs: Date.now() - start
-        };
-      } catch (err) {
-        throw normalizeGeminiError(classifyAbort(err, ctl.abortReason(), PROVIDER3));
-      } finally {
-        ctl.dispose();
-      }
-    }, mergeRetryOptsWithSignal(retryOpts, options?.signal));
+    return withRetry(
+      async () => {
+        const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
+        try {
+          const geminiConfig = {};
+          if (system !== void 0) geminiConfig.systemInstruction = system;
+          const maxTokens = options?.maxTokens ?? config.maxTokens;
+          if (maxTokens !== void 0) geminiConfig.maxOutputTokens = maxTokens;
+          const temperature = options?.temperature ?? config.temperature;
+          if (temperature !== void 0) geminiConfig.temperature = temperature;
+          const response = await Promise.race([
+            ai.models.generateContent({ model, contents, config: geminiConfig }),
+            makeAbortRacePromise(ctl.signal)
+          ]);
+          return {
+            content: response.text ?? "",
+            model,
+            usage: normalizeUsage3(response.usageMetadata),
+            latencyMs: Date.now() - start
+          };
+        } catch (err) {
+          throw normalizeGeminiError(classifyAbort(err, ctl.abortReason(), PROVIDER3));
+        } finally {
+          ctl.dispose();
+        }
+      },
+      mergeRetryOptsWithSignal(retryOpts, options?.signal)
+    );
   }
   async function* stream(messages, options) {
     const model = options?.model ?? config.model;
@@ -778,6 +990,75 @@ function createGeminiProvider(config) {
     }
   }
   async function structured(messages, schema, options) {
+    const structuredMode = options?.providerOptions?.["structuredMode"];
+    const useStrict = isZodSchema(schema) && structuredMode !== "prompt";
+    if (!useStrict) {
+      return structuredPromptFallback(messages, schema, options);
+    }
+    const responseSchemaObj = toProviderSchema(schema, "gemini");
+    const model = options?.model ?? config.model;
+    const { system, contents } = buildGeminiContents(messages);
+    const effectiveTimeoutMs = options?.timeoutMs ?? configTimeoutMs;
+    const start = Date.now();
+    const rawResponse = await withRetry(
+      async () => {
+        const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
+        try {
+          const geminiConfig = {
+            responseMimeType: "application/json",
+            // responseSchema SDK type is permissive; cast through never to avoid SDK type mismatch
+            responseSchema: responseSchemaObj
+          };
+          if (system !== void 0) geminiConfig.systemInstruction = system;
+          const maxTokens = options?.maxTokens ?? config.maxTokens;
+          if (maxTokens !== void 0) geminiConfig.maxOutputTokens = maxTokens;
+          const temperature = options?.temperature ?? config.temperature;
+          if (temperature !== void 0) geminiConfig.temperature = temperature;
+          return await Promise.race([
+            ai.models.generateContent({ model, contents, config: geminiConfig }),
+            makeAbortRacePromise(ctl.signal)
+          ]);
+        } catch (err) {
+          throw normalizeGeminiError(classifyAbort(err, ctl.abortReason(), PROVIDER3));
+        } finally {
+          ctl.dispose();
+        }
+      },
+      mergeRetryOptsWithSignal(retryOpts, options?.signal)
+    );
+    const rawContent = rawResponse.text ?? "";
+    let parsed;
+    try {
+      const cleaned = rawContent.replace(/^```(?:json)?\s*/i, "").replace(/\s*```$/, "").trim();
+      parsed = JSON.parse(cleaned);
+    } catch (err) {
+      throw new LlmError({
+        message: `Gemini structured output: response is not valid JSON. Raw: ${rawContent.slice(0, 200)}`,
+        provider: PROVIDER3,
+        retryable: false,
+        cause: err
+      });
+    }
+    let data;
+    try {
+      data = schema.parse(parsed);
+    } catch (err) {
+      throw new LlmError({
+        message: `Gemini structured output: response failed schema validation. ${String(err)}`,
+        provider: PROVIDER3,
+        retryable: false,
+        cause: err
+      });
+    }
+    return {
+      data,
+      // Gemini does not return a request ID; model comes from response.modelVersion if available
+      model: rawResponse.modelVersion ?? model,
+      usage: normalizeUsage3(rawResponse.usageMetadata),
+      latencyMs: Date.now() - start
+    };
+  }
+  async function structuredPromptFallback(messages, schema, options) {
     const augmentedMessages = [
       {
         role: "system",
@@ -789,28 +1070,30 @@ function createGeminiProvider(config) {
     const { system, contents } = buildGeminiContents(augmentedMessages);
     const effectiveTimeoutMs = options?.timeoutMs ?? configTimeoutMs;
     const start = Date.now();
-    const rawResponse = await withRetry(async () => {
-      const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
-      try {
-        const geminiConfig = {
-          // Instruct Gemini to return JSON directly
-          responseMimeType: "application/json"
-        };
-        if (system !== void 0) geminiConfig.systemInstruction = system;
-        const maxTokens = options?.maxTokens ?? config.maxTokens;
-        if (maxTokens !== void 0) geminiConfig.maxOutputTokens = maxTokens;
-        const temperature = options?.temperature ?? config.temperature;
-        if (temperature !== void 0) geminiConfig.temperature = temperature;
-        return await Promise.race([
-          ai.models.generateContent({ model, contents, config: geminiConfig }),
-          makeAbortRacePromise(ctl.signal)
-        ]);
-      } catch (err) {
-        throw normalizeGeminiError(classifyAbort(err, ctl.abortReason(), PROVIDER3));
-      } finally {
-        ctl.dispose();
-      }
-    }, mergeRetryOptsWithSignal(retryOpts, options?.signal));
+    const rawResponse = await withRetry(
+      async () => {
+        const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
+        try {
+          const geminiConfig = {
+            responseMimeType: "application/json"
+          };
+          if (system !== void 0) geminiConfig.systemInstruction = system;
+          const maxTokens = options?.maxTokens ?? config.maxTokens;
+          if (maxTokens !== void 0) geminiConfig.maxOutputTokens = maxTokens;
+          const temperature = options?.temperature ?? config.temperature;
+          if (temperature !== void 0) geminiConfig.temperature = temperature;
+          return await Promise.race([
+            ai.models.generateContent({ model, contents, config: geminiConfig }),
+            makeAbortRacePromise(ctl.signal)
+          ]);
+        } catch (err) {
+          throw normalizeGeminiError(classifyAbort(err, ctl.abortReason(), PROVIDER3));
+        } finally {
+          ctl.dispose();
+        }
+      },
+      mergeRetryOptsWithSignal(retryOpts, options?.signal)
+    );
     const rawContent = rawResponse.text ?? "";
     let parsed;
     try {
@@ -837,6 +1120,7 @@ function createGeminiProvider(config) {
     }
     return {
       data,
+      model,
       usage: normalizeUsage3(rawResponse.usageMetadata),
       latencyMs: Date.now() - start
     };
@@ -910,32 +1194,35 @@ function createOpenAIProvider(config) {
     const openAIMessages = buildOpenAIMessages(messages);
     const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
     const start = Date.now();
-    return withRetry(async () => {
-      const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
-      try {
-        const params = {
-          model,
-          messages: openAIMessages,
-          stream: false
-        };
-        const maxTokens = options?.maxTokens ?? config.maxTokens;
-        if (maxTokens !== void 0) params.max_tokens = maxTokens;
-        const temperature = options?.temperature ?? config.temperature;
-        if (temperature !== void 0) params.temperature = temperature;
-        const response = await client.chat.completions.create(params, { signal: ctl.signal });
-        const content = response.choices.map((c) => c.message.content ?? "").join("");
-        return {
-          content,
-          model: response.model,
-          usage: normalizeUsage4(response.usage),
-          latencyMs: Date.now() - start
-        };
-      } catch (err) {
-        throw normalizeOpenAIError(classifyAbort(err, ctl.abortReason(), PROVIDER4));
-      } finally {
-        ctl.dispose();
-      }
-    }, mergeRetryOptsWithSignal(retryOpts, options?.signal));
+    return withRetry(
+      async () => {
+        const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
+        try {
+          const params = {
+            model,
+            messages: openAIMessages,
+            stream: false
+          };
+          const maxTokens = options?.maxTokens ?? config.maxTokens;
+          if (maxTokens !== void 0) params.max_tokens = maxTokens;
+          const temperature = options?.temperature ?? config.temperature;
+          if (temperature !== void 0) params.temperature = temperature;
+          const response = await client.chat.completions.create(params, { signal: ctl.signal });
+          const content = response.choices.map((c) => c.message.content ?? "").join("");
+          return {
+            content,
+            model: response.model,
+            usage: normalizeUsage4(response.usage),
+            latencyMs: Date.now() - start
+          };
+        } catch (err) {
+          throw normalizeOpenAIError(classifyAbort(err, ctl.abortReason(), PROVIDER4));
+        } finally {
+          ctl.dispose();
+        }
+      },
+      mergeRetryOptsWithSignal(retryOpts, options?.signal)
+    );
   }
   async function* stream(messages, options) {
     const model = options?.model ?? config.model;
@@ -981,6 +1268,86 @@ function createOpenAIProvider(config) {
     }
   }
   async function structured(messages, schema, options) {
+    const structuredMode = options?.providerOptions?.["structuredMode"];
+    const useStrict = isZodSchema(schema) && structuredMode !== "prompt";
+    if (!useStrict) {
+      return structuredPromptFallback(messages, schema, options);
+    }
+    const jsonSchema = toProviderSchema(schema, "openai");
+    const model = options?.model ?? config.model;
+    const openAIMessages = buildOpenAIMessages(messages);
+    const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
+    const start = Date.now();
+    const rawResponse = await withRetry(
+      async () => {
+        const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
+        try {
+          const params = {
+            model,
+            messages: openAIMessages,
+            stream: false,
+            response_format: {
+              type: "json_schema",
+              json_schema: { name: "response", schema: jsonSchema, strict: true }
+            }
+          };
+          const maxTokens = options?.maxTokens ?? config.maxTokens;
+          if (maxTokens !== void 0) params.max_tokens = maxTokens;
+          const temperature = options?.temperature ?? config.temperature;
+          if (temperature !== void 0) params.temperature = temperature;
+          return await client.chat.completions.create(
+            params,
+            { signal: ctl.signal }
+          );
+        } catch (err) {
+          throw normalizeOpenAIError(classifyAbort(err, ctl.abortReason(), PROVIDER4));
+        } finally {
+          ctl.dispose();
+        }
+      },
+      mergeRetryOptsWithSignal(retryOpts, options?.signal)
+    );
+    const choice = rawResponse.choices[0];
+    if (choice?.message.refusal !== null && choice?.message.refusal !== void 0) {
+      throw new LlmError({
+        message: `OpenAI structured output: model refused to generate. Refusal: ${choice.message.refusal.slice(0, 200)}`,
+        provider: PROVIDER4,
+        retryable: false,
+        kind: "unknown"
+      });
+    }
+    const rawContent = choice?.message.content ?? "";
+    let parsed;
+    try {
+      parsed = JSON.parse(rawContent);
+    } catch (err) {
+      throw new LlmError({
+        message: `OpenAI structured output: response is not valid JSON. Raw: ${rawContent.slice(0, 200)}`,
+        provider: PROVIDER4,
+        retryable: false,
+        cause: err
+      });
+    }
+    let data;
+    try {
+      data = schema.parse(parsed);
+    } catch (err) {
+      throw new LlmError({
+        message: `OpenAI structured output: response failed schema validation. ${String(err)}`,
+        provider: PROVIDER4,
+        retryable: false,
+        cause: err
+      });
+    }
+    return {
+      data,
+      model: rawResponse.model,
+      id: rawResponse.id,
+      usage: normalizeUsage4(rawResponse.usage),
+      latencyMs: Date.now() - start
+    };
+  }
+  async function structuredPromptFallback(messages, schema, options) {
     const jsonSystemInstruction = {
       role: "system",
       content: "You must respond with valid JSON only. No explanations, no markdown code fences, no extra text. Your entire response must be valid JSON that can be parsed with JSON.parse()."
@@ -990,26 +1357,29 @@ function createOpenAIProvider(config) {
     const openAIMessages = buildOpenAIMessages(augmentedMessages);
     const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
     const start = Date.now();
-    const rawResponse = await withRetry(async () => {
-      const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
-      try {
-        const params = {
-          model,
-          messages: openAIMessages,
-          stream: false,
-          response_format: { type: "json_object" }
-        };
-        const maxTokens = options?.maxTokens ?? config.maxTokens;
-        if (maxTokens !== void 0) params.max_tokens = maxTokens;
-        const temperature = options?.temperature ?? config.temperature;
-        if (temperature !== void 0) params.temperature = temperature;
-        return await client.chat.completions.create(params, { signal: ctl.signal });
-      } catch (err) {
-        throw normalizeOpenAIError(classifyAbort(err, ctl.abortReason(), PROVIDER4));
-      } finally {
-        ctl.dispose();
-      }
-    }, mergeRetryOptsWithSignal(retryOpts, options?.signal));
+    const rawResponse = await withRetry(
+      async () => {
+        const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
+        try {
+          const params = {
+            model,
+            messages: openAIMessages,
+            stream: false,
+            response_format: { type: "json_object" }
+          };
+          const maxTokens = options?.maxTokens ?? config.maxTokens;
+          if (maxTokens !== void 0) params.max_tokens = maxTokens;
+          const temperature = options?.temperature ?? config.temperature;
+          if (temperature !== void 0) params.temperature = temperature;
+          return await client.chat.completions.create(params, { signal: ctl.signal });
+        } catch (err) {
+          throw normalizeOpenAIError(classifyAbort(err, ctl.abortReason(), PROVIDER4));
+        } finally {
+          ctl.dispose();
+        }
+      },
+      mergeRetryOptsWithSignal(retryOpts, options?.signal)
+    );
     const rawContent = rawResponse.choices[0]?.message.content ?? "";
     let parsed;
     try {
@@ -1035,6 +1405,8 @@ function createOpenAIProvider(config) {
     }
     return {
       data,
+      model: rawResponse.model,
+      id: rawResponse.id,
       usage: normalizeUsage4(rawResponse.usage),
       latencyMs: Date.now() - start
     };
@@ -1128,40 +1500,43 @@ function createPerplexityProvider(config) {
     const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
     const start = Date.now();
     const extraParams = extractProviderOptions(options?.providerOptions);
-    return withRetry(async () => {
-      const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
-      try {
-        const params = {
-          model,
-          messages: chatMessages,
-          stream: false,
-          ...extraParams
-        };
-        const maxTokens = options?.maxTokens ?? config.maxTokens;
-        if (maxTokens !== void 0) params.max_tokens = maxTokens;
-        const temperature = options?.temperature ?? config.temperature;
-        if (temperature !== void 0) params.temperature = temperature;
-        const rawResponse = await client.chat.completions.create(
-          params,
-          { signal: ctl.signal }
-        );
-        const response = rawResponse;
-        const content = response.choices.map((c) => c.message.content ?? "").join("");
-        const result = {
-          content,
-          model: response.model,
-          usage: normalizeUsage5(response.usage),
-          latencyMs: Date.now() - start
-        };
-        const citations = extractCitations(response);
-        if (citations !== void 0) result.citations = citations;
-        return result;
-      } catch (err) {
-        throw normalizePerplexityError(classifyAbort(err, ctl.abortReason(), PROVIDER5));
-      } finally {
-        ctl.dispose();
-      }
-    }, mergeRetryOptsWithSignal(retryOpts, options?.signal));
+    return withRetry(
+      async () => {
+        const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
+        try {
+          const params = {
+            model,
+            messages: chatMessages,
+            stream: false,
+            ...extraParams
+          };
+          const maxTokens = options?.maxTokens ?? config.maxTokens;
+          if (maxTokens !== void 0) params.max_tokens = maxTokens;
+          const temperature = options?.temperature ?? config.temperature;
+          if (temperature !== void 0) params.temperature = temperature;
+          const rawResponse = await client.chat.completions.create(
+            params,
+            { signal: ctl.signal }
+          );
+          const response = rawResponse;
+          const content = response.choices.map((c) => c.message.content ?? "").join("");
+          const result = {
+            content,
+            model: response.model,
+            usage: normalizeUsage5(response.usage),
+            latencyMs: Date.now() - start
+          };
+          const citations = extractCitations(response);
+          if (citations !== void 0) result.citations = citations;
+          return result;
+        } catch (err) {
+          throw normalizePerplexityError(classifyAbort(err, ctl.abortReason(), PROVIDER5));
+        } finally {
+          ctl.dispose();
+        }
+      },
+      mergeRetryOptsWithSignal(retryOpts, options?.signal)
+    );
   }
   async function* stream(messages, options) {
     const model = options?.model ?? config.model;
@@ -1222,30 +1597,34 @@ function createPerplexityProvider(config) {
     const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
     const start = Date.now();
     const extraParams = extractProviderOptions(options?.providerOptions);
-    const rawResponse = await withRetry(async () => {
-      const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
-      try {
-        const params = {
-          model,
-          messages: chatMessages,
-          stream: false,
-          ...extraParams
-        };
-        const maxTokens = options?.maxTokens ?? config.maxTokens;
-        if (maxTokens !== void 0) params.max_tokens = maxTokens;
-        const temperature = options?.temperature ?? config.temperature;
-        if (temperature !== void 0) params.temperature = temperature;
-        return await client.chat.completions.create(
-          params,
-          { signal: ctl.signal }
-        );
-      } catch (err) {
-        throw normalizePerplexityError(classifyAbort(err, ctl.abortReason(), PROVIDER5));
-      } finally {
-        ctl.dispose();
-      }
-    }, mergeRetryOptsWithSignal(retryOpts, options?.signal));
-    const rawContent = rawResponse.choices[0]?.message.content ?? "";
+    const rawResponse = await withRetry(
+      async () => {
+        const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
+        try {
+          const params = {
+            model,
+            messages: chatMessages,
+            stream: false,
+            ...extraParams
+          };
+          const maxTokens = options?.maxTokens ?? config.maxTokens;
+          if (maxTokens !== void 0) params.max_tokens = maxTokens;
+          const temperature = options?.temperature ?? config.temperature;
+          if (temperature !== void 0) params.temperature = temperature;
+          return await client.chat.completions.create(
+            params,
+            { signal: ctl.signal }
+          );
+        } catch (err) {
+          throw normalizePerplexityError(classifyAbort(err, ctl.abortReason(), PROVIDER5));
+        } finally {
+          ctl.dispose();
+        }
+      },
+      mergeRetryOptsWithSignal(retryOpts, options?.signal)
+    );
+    const response = rawResponse;
+    const rawContent = response.choices[0]?.message.content ?? "";
     let parsed;
     try {
       const cleaned = rawContent.replace(/<think>[\s\S]*?<\/think>/i, "").replace(/^```(?:json)?\s*/i, "").replace(/\s*```$/, "").trim();
@@ -1269,11 +1648,16 @@ function createPerplexityProvider(config) {
         cause: err
       });
     }
-    return {
+    const citations = extractCitations(response);
+    const result = {
       data,
-      usage: normalizeUsage5(rawResponse.usage),
+      model: response.model,
+      id: response.id,
+      usage: normalizeUsage5(response.usage),
       latencyMs: Date.now() - start
     };
+    if (citations !== void 0) result.citations = citations;
+    return result;
   }
   return {
     config,