npm - @agentfield/sdk - Versions diffs - 0.1.85-rc.4 → 0.1.85-rc.5 - Mend

@agentfield/sdk 0.1.85-rc.4 → 0.1.85-rc.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -1606,27 +1606,42 @@ declare class MediaProviderError extends Error {
         cause?: unknown;
     });
 }
+/** Frame guidance for image-to-video models (e.g. Veo). */
+interface VideoFrameImage {
+    /** Image content type — usually "image_url". */
+    type?: string;
+    /** Image URL or `data:` URL. */
+    imageUrl: {
+        url: string;
+    };
+    /** Which frame this image controls. */
+    frameType?: 'first_frame' | 'last_frame';
+}
+/** Reference image for style / subject guidance (Veo "reference-to-video"). */
+interface VideoInputReference {
+    type?: string;
+    imageUrl: {
+        url: string;
+    };
+}
 interface VideoRequest {
     prompt: string;
     model?: string;
+    /** Duration in seconds (model-dependent — typically 4, 6, or 8). */
     duration?: number;
     resolution?: '480p' | '720p' | '1080p' | '1K' | '2K' | '4K';
     aspectRatio?: '16:9' | '9:16' | '1:1' | '4:3' | '3:4' | '21:9' | '9:21';
+    /** Toggle synchronized audio track (when model supports it). */
     generateAudio?: boolean;
     seed?: number;
-    frameImages?: Array<{
-        type: string;
-        imageUrl: {
-            url: string;
-        };
-        frameType?: string;
-    }>;
-    inputReferences?: Array<{
-        type: string;
-        imageUrl: {
-            url: string;
-        };
-    }>;
+    /** Single input image for image-to-video (legacy convenience field). */
+    imageUrl?: string;
+    /** Per-frame guidance — first_frame / last_frame. Takes precedence over `imageUrl`. */
+    frameImages?: VideoFrameImage[];
+    /** Reference images for style/subject guidance. */
+    inputReferences?: VideoInputReference[];
+    /** Model-specific passthrough parameters (e.g. Veo's `personGeneration`). */
+    extra?: Record<string, unknown>;
     pollInterval?: number;
     timeout?: number;
 }
@@ -1635,21 +1650,37 @@ interface ImageRequest {
     model?: string;
     size?: string;
     quality?: string;
+    /** Reference / source image(s) for image+text→image models (e.g. grok-imagine). */
+    imageUrls?: string[];
     imageConfig?: {
         aspectRatio?: string;
         imageSize?: string;
+        /** Image-to-image blend strength (model-dependent, 0–1). */
+        strength?: number;
+        /** Style hint — Recraft V3 etc. */
+        style?: string;
+        /** RGB color palette — array of [r,g,b]. */
+        rgbColors?: number[][];
+        /** Background color hint as [r,g,b]. */
+        backgroundRgbColor?: number[];
         superResolutionReferences?: string[];
         fontInputs?: Array<{
             fontUrl: string;
             text: string;
         }>;
     };
+    /** Model-specific passthrough parameters. */
+    extra?: Record<string, unknown>;
 }
 interface AudioRequest {
     text: string;
     model?: string;
     voice?: string;
     format?: string;
+    /** Playback speed multiplier (OpenAI TTS only — other models ignore). */
+    speed?: number;
+    /** Model-specific passthrough parameters. */
+    extra?: Record<string, unknown>;
 }
 interface MediaResponse {
     text: string;
@@ -1713,11 +1744,32 @@ declare class OpenRouterMediaProvider implements MediaProvider {
     readonly supportedModalities: string[];
     private readonly baseUrl;
     constructor(options?: OpenRouterMediaProviderOptions);
+    /**
+     * Seed the metadata cache for a model. Useful when running against test
+     * servers that don't expose `GET /models/{id}/endpoints`, or when callers
+     * already know the routing they want.
+     *
+     * Output modalities follow OpenRouter's convention — `["speech"]` for
+     * TTS-only (Kokoro etc.), `["text","audio"]` for chat-audio (gpt-audio
+     * family), `["video"]`, `["image"]`, etc.
+     */
+    seedModelMeta(model: string, outputModalities: string[], inputModalities?: string[]): void;
+    /**
+     * Fetch + cache OpenRouter model metadata so we can route requests to the
+     * right endpoint. On any error returns an empty meta object so callers can
+     * fall back to defaults.
+     */
+    private fetchModelMeta;
     /** Prevent API key from leaking via JSON.stringify (CR-03). */
     toJSON(): Record<string, unknown>;
     generateVideo(request: VideoRequest): Promise<MediaResponse>;
     generateImage(request: ImageRequest): Promise<MediaResponse>;
     generateAudio(request: AudioRequest): Promise<MediaResponse>;
+    /**
+     * Call OpenRouter's OpenAI-compatible TTS endpoint (`POST /audio/speech`).
+     * Returns raw bytes for the requested format; wraps PCM → WAV when needed.
+     */
+    private generateAudioViaSpeechEndpoint;
     private post;
     private get;
 }
@@ -1827,4 +1879,4 @@ declare class ApprovalClient {
     waitForApproval(executionId: string, opts?: WaitForApprovalOptions): Promise<ApprovalStatusResponse>;
 }
-export { ACTIVE_STATUSES, AIClient, type AIConfig, type AIEmbeddingOptions, type AIRequestOptions, type AIStream, type AIToolRequestOptions, Agent, type AgentCapability, type AgentConfig, type AgentHandler, AgentRouter, type AgentRouterOptions, type AgentState, ApprovalClient, type ApprovalRequestResponse, type ApprovalStatusResponse, Audio, type AudioOutput, type AudioRequest, type AuditTrailExport, type AuditTrailFilters, type Awaitable, CANONICAL_STATUSES, type CompactCapability, type CompactDiscoveryResponse, DIDAuthenticator, type DIDIdentity, type DIDIdentityPackage, type DIDRegistrationRequest, type DIDRegistrationResponse, type DeploymentType, DidClient, DidInterface, DidManager, type DiscoveryFormat, type DiscoveryOptions, type DiscoveryPagination, type DiscoveryResponse, type DiscoveryResult, ExecutionContext, type ExecutionCredential, type ExecutionLogAttributes, type ExecutionLogBatchPayload, type ExecutionLogContext, type ExecutionLogEmitOptions, type ExecutionLogEntry, type ExecutionLogLevel, type ExecutionLogTransport, type ExecutionLogTransportPayload, type ExecutionLogWireEntry, ExecutionLogger, type ExecutionLoggerOptions, type ExecutionMetadata, ExecutionStatus, type ExecutionStatusValue, File, type FileOutput, type GenerateCredentialOptions, type GenerateCredentialParams, HEADER_CALLER_DID, HEADER_DID_NONCE, HEADER_DID_SIGNATURE, HEADER_DID_TIMESTAMP, type HarnessConfig, type HarnessOptions, type HarnessProvider, type HarnessResult, HarnessRunner, type HealthStatus, Image, type ImageOutput, type ImageRequest, type MediaProvider, MediaProviderError, type MediaResponse, MediaRouter, type MemoryChangeEvent, MemoryClient, MemoryClientBase, type MemoryConfig, MemoryEventClient, type MemoryEventHandler, type MemoryEventHistoryOptions, MemoryInterface, type MemoryRequestMetadata, type MemoryRequestOptions, type MemoryScope, type MemoryWatchHandler, type Metrics, type MultimodalContent, MultimodalResponse, OpenRouterMediaProvider, type OpenRouterMediaProviderOptions, RateLimitError, type RateLimiterOptions, type RawExecutionContext, type RawResult, type ReasonerCapability, ReasonerContext, type ReasonerDefinition, type ReasonerHandler, type ReasonerOptions, type RequestApprovalPayload, SUPPORTED_PROVIDERS, type ServerlessAdapter, type ServerlessEvent, type ServerlessResponse, type SkillCapability, SkillContext, type SkillDefinition, type SkillHandler, type SkillOptions, StatelessRateLimiter, TERMINAL_STATUSES, Text, type ToolCallConfig, type ToolCallRecord, type ToolCallTrace, type ToolsOption, type VectorSearchOptions, type VectorSearchResult, type VideoRequest, type WaitForApprovalOptions, type WorkflowCredential, type WorkflowMetadata, type WorkflowProgressOptions, WorkflowReporter, type ZodSchema, audioFromBase64, audioFromBuffer, audioFromFile, audioFromUrl, buildProvider, buildToolConfig, capabilitiesToTools, capabilityToMetadataTool, capabilityToTool, createExecutionLogger, createHarnessResult, createMetrics, createMultimodalResponse, createRawResult, executeToolCallLoop, fileFromBase64, fileFromBuffer, fileFromPath, fileFromUrl, getCurrentContext, getCurrentSkillContext, imageFromBase64, imageFromBuffer, imageFromFile, imageFromUrl, isActive, isExecutionLogBatchPayload, isTerminal, normalizeExecutionLogEntry, normalizeStatus, serializeExecutionLogEntry, text };
+export { ACTIVE_STATUSES, AIClient, type AIConfig, type AIEmbeddingOptions, type AIRequestOptions, type AIStream, type AIToolRequestOptions, Agent, type AgentCapability, type AgentConfig, type AgentHandler, AgentRouter, type AgentRouterOptions, type AgentState, ApprovalClient, type ApprovalRequestResponse, type ApprovalStatusResponse, Audio, type AudioOutput, type AudioRequest, type AuditTrailExport, type AuditTrailFilters, type Awaitable, CANONICAL_STATUSES, type CompactCapability, type CompactDiscoveryResponse, DIDAuthenticator, type DIDIdentity, type DIDIdentityPackage, type DIDRegistrationRequest, type DIDRegistrationResponse, type DeploymentType, DidClient, DidInterface, DidManager, type DiscoveryFormat, type DiscoveryOptions, type DiscoveryPagination, type DiscoveryResponse, type DiscoveryResult, ExecutionContext, type ExecutionCredential, type ExecutionLogAttributes, type ExecutionLogBatchPayload, type ExecutionLogContext, type ExecutionLogEmitOptions, type ExecutionLogEntry, type ExecutionLogLevel, type ExecutionLogTransport, type ExecutionLogTransportPayload, type ExecutionLogWireEntry, ExecutionLogger, type ExecutionLoggerOptions, type ExecutionMetadata, ExecutionStatus, type ExecutionStatusValue, File, type FileOutput, type GenerateCredentialOptions, type GenerateCredentialParams, HEADER_CALLER_DID, HEADER_DID_NONCE, HEADER_DID_SIGNATURE, HEADER_DID_TIMESTAMP, type HarnessConfig, type HarnessOptions, type HarnessProvider, type HarnessResult, HarnessRunner, type HealthStatus, Image, type ImageOutput, type ImageRequest, type MediaProvider, MediaProviderError, type MediaResponse, MediaRouter, type MemoryChangeEvent, MemoryClient, MemoryClientBase, type MemoryConfig, MemoryEventClient, type MemoryEventHandler, type MemoryEventHistoryOptions, MemoryInterface, type MemoryRequestMetadata, type MemoryRequestOptions, type MemoryScope, type MemoryWatchHandler, type Metrics, type MultimodalContent, MultimodalResponse, OpenRouterMediaProvider, type OpenRouterMediaProviderOptions, RateLimitError, type RateLimiterOptions, type RawExecutionContext, type RawResult, type ReasonerCapability, ReasonerContext, type ReasonerDefinition, type ReasonerHandler, type ReasonerOptions, type RequestApprovalPayload, SUPPORTED_PROVIDERS, type ServerlessAdapter, type ServerlessEvent, type ServerlessResponse, type SkillCapability, SkillContext, type SkillDefinition, type SkillHandler, type SkillOptions, StatelessRateLimiter, TERMINAL_STATUSES, Text, type ToolCallConfig, type ToolCallRecord, type ToolCallTrace, type ToolsOption, type VectorSearchOptions, type VectorSearchResult, type VideoFrameImage, type VideoInputReference, type VideoRequest, type WaitForApprovalOptions, type WorkflowCredential, type WorkflowMetadata, type WorkflowProgressOptions, WorkflowReporter, type ZodSchema, audioFromBase64, audioFromBuffer, audioFromFile, audioFromUrl, buildProvider, buildToolConfig, capabilitiesToTools, capabilityToMetadataTool, capabilityToTool, createExecutionLogger, createHarnessResult, createMetrics, createMultimodalResponse, createRawResult, executeToolCallLoop, fileFromBase64, fileFromBuffer, fileFromPath, fileFromUrl, getCurrentContext, getCurrentSkillContext, imageFromBase64, imageFromBuffer, imageFromFile, imageFromUrl, isActive, isExecutionLogBatchPayload, isTerminal, normalizeExecutionLogEntry, normalizeStatus, serializeExecutionLogEntry, text };

package/dist/index.js CHANGED Viewed

@@ -5578,12 +5578,55 @@ var API_TIMEOUT = 3e4;
 var DOWNLOAD_TIMEOUT = 12e4;
 var MAX_CONSECUTIVE_PARSE_ERRORS = 50;
 var apiKeyStore = /* @__PURE__ */ new WeakMap();
+var modelMetaStore = /* @__PURE__ */ new WeakMap();
 function emptyMediaResponse(raw) {
   return { text: "", images: [], audio: null, files: [], videos: [], rawResponse: raw };
 }
 function stripPrefix(model) {
   return model.startsWith("openrouter/") ? model.slice("openrouter/".length) : model;
 }
+function wrapPcm16AsWav(pcm, sampleRate = 24e3) {
+  const channels = 1;
+  const bitsPerSample = 16;
+  const byteRate = sampleRate * channels * bitsPerSample / 8;
+  const blockAlign = channels * bitsPerSample / 8;
+  const dataSize = pcm.byteLength;
+  const buffer = new ArrayBuffer(44 + dataSize);
+  const view = new DataView(buffer);
+  view.setUint8(0, 82);
+  view.setUint8(1, 73);
+  view.setUint8(2, 70);
+  view.setUint8(3, 70);
+  view.setUint32(4, 36 + dataSize, true);
+  view.setUint8(8, 87);
+  view.setUint8(9, 65);
+  view.setUint8(10, 86);
+  view.setUint8(11, 69);
+  view.setUint8(12, 102);
+  view.setUint8(13, 109);
+  view.setUint8(14, 116);
+  view.setUint8(15, 32);
+  view.setUint32(16, 16, true);
+  view.setUint16(20, 1, true);
+  view.setUint16(22, channels, true);
+  view.setUint32(24, sampleRate, true);
+  view.setUint32(28, byteRate, true);
+  view.setUint16(32, blockAlign, true);
+  view.setUint16(34, bitsPerSample, true);
+  view.setUint8(36, 100);
+  view.setUint8(37, 97);
+  view.setUint8(38, 116);
+  view.setUint8(39, 97);
+  view.setUint32(40, dataSize, true);
+  new Uint8Array(buffer, 44).set(pcm);
+  return new Uint8Array(buffer);
+}
+function bytesToBase64(bytes) {
+  return Buffer.from(bytes).toString("base64");
+}
+function base64ToBytes(b64) {
+  return new Uint8Array(Buffer.from(b64, "base64"));
+}
 function assertSafeUrl(urlStr) {
   let parsed;
   try {
@@ -5623,6 +5666,56 @@ var OpenRouterMediaProvider = class {
       });
     }
     apiKeyStore.set(this, key);
+    modelMetaStore.set(this, /* @__PURE__ */ new Map());
+  }
+  /**
+   * Seed the metadata cache for a model. Useful when running against test
+   * servers that don't expose `GET /models/{id}/endpoints`, or when callers
+   * already know the routing they want.
+   *
+   * Output modalities follow OpenRouter's convention — `["speech"]` for
+   * TTS-only (Kokoro etc.), `["text","audio"]` for chat-audio (gpt-audio
+   * family), `["video"]`, `["image"]`, etc.
+   */
+  seedModelMeta(model, outputModalities, inputModalities = []) {
+    const stripped = stripPrefix(model);
+    const cache = modelMetaStore.get(this);
+    cache.set(stripped, {
+      outputModalities: [...outputModalities],
+      inputModalities: [...inputModalities]
+    });
+  }
+  /**
+   * Fetch + cache OpenRouter model metadata so we can route requests to the
+   * right endpoint. On any error returns an empty meta object so callers can
+   * fall back to defaults.
+   */
+  async fetchModelMeta(model) {
+    const stripped = stripPrefix(model);
+    const cache = modelMetaStore.get(this);
+    const cached = cache.get(stripped);
+    if (cached) return cached;
+    const url = `${this.baseUrl}/models/${stripped}/endpoints`;
+    try {
+      const res = await this.get(url);
+      if (!res.ok) {
+        const meta2 = { outputModalities: [], inputModalities: [] };
+        cache.set(stripped, meta2);
+        return meta2;
+      }
+      const data = await res.json();
+      const arch = data?.data?.architecture ?? {};
+      const meta = {
+        outputModalities: arch.output_modalities ?? [],
+        inputModalities: arch.input_modalities ?? []
+      };
+      cache.set(stripped, meta);
+      return meta;
+    } catch {
+      const meta = { outputModalities: [], inputModalities: [] };
+      cache.set(stripped, meta);
+      return meta;
+    }
   }
   /** Prevent API key from leaking via JSON.stringify (CR-03). */
   toJSON() {
@@ -5646,8 +5739,21 @@ var OpenRouterMediaProvider = class {
     if (request.aspectRatio) body.aspect_ratio = request.aspectRatio;
     if (request.generateAudio != null) body.generate_audio = request.generateAudio;
     if (request.seed != null) body.seed = request.seed;
-    if (request.frameImages) body.frame_images = request.frameImages;
-    if (request.inputReferences) body.input_references = request.inputReferences;
+    if (request.imageUrl) body.image_url = request.imageUrl;
+    if (request.frameImages) {
+      body.frame_images = request.frameImages.map((fi) => ({
+        type: fi.type ?? "image_url",
+        image_url: fi.imageUrl,
+        ...fi.frameType ? { frame_type: fi.frameType } : {}
+      }));
+    }
+    if (request.inputReferences) {
+      body.input_references = request.inputReferences.map((ref) => ({
+        type: ref.type ?? "image_url",
+        image_url: ref.imageUrl
+      }));
+    }
+    if (request.extra) Object.assign(body, request.extra);
     const submitEndpoint = `${this.baseUrl}/videos`;
     const submitRes = await this.post(submitEndpoint, body);
     if (!submitRes.ok) {
@@ -5696,13 +5802,24 @@ var OpenRouterMediaProvider = class {
         { provider: "openrouter", model }
       );
     }
+    const unsignedUrls = jobData.unsigned_urls;
     const unsignedUrl = jobData.unsigned_url;
     const signedUrl = jobData.url;
-    const videoUrl = unsignedUrl ?? signedUrl;
+    const videoUrl = unsignedUrls?.[0] ?? unsignedUrl ?? signedUrl;
     let videoData;
     if (videoUrl) {
       assertSafeUrl(videoUrl);
+      const downloadHeaders = {};
+      try {
+        const host = new URL(videoUrl).hostname.toLowerCase();
+        if (host === "openrouter.ai" || host.endsWith(".openrouter.ai")) {
+          const key = apiKeyStore.get(this);
+          if (key) downloadHeaders.Authorization = `Bearer ${key}`;
+        }
+      } catch {
+      }
       const dlRes = await fetch(videoUrl, {
+        headers: downloadHeaders,
         signal: AbortSignal.timeout(DOWNLOAD_TIMEOUT),
         redirect: "error"
       });
@@ -5727,15 +5844,43 @@ var OpenRouterMediaProvider = class {
   // ── Image ──────────────────────────────────────────────────────────
   async generateImage(request) {
     const model = stripPrefix(request.model ?? "openai/gpt-image-1");
-    const messages = [{ role: "user", content: request.prompt }];
+    let userContent = request.prompt;
+    if (request.imageUrls && request.imageUrls.length > 0) {
+      userContent = [
+        { type: "text", text: request.prompt },
+        ...request.imageUrls.map((url) => ({
+          type: "image_url",
+          image_url: { url }
+        }))
+      ];
+    }
+    const messages = [{ role: "user", content: userContent }];
     const body = {
       model,
       messages,
-      modalities: ["image", "text"]
+      modalities: ["image"]
     };
     if (request.size) body.size = request.size;
     if (request.quality) body.quality = request.quality;
-    if (request.imageConfig) body.image_config = request.imageConfig;
+    if (request.imageConfig) {
+      const ic = request.imageConfig;
+      const out = {};
+      if (ic.aspectRatio) out.aspect_ratio = ic.aspectRatio;
+      if (ic.imageSize) out.image_size = ic.imageSize;
+      if (ic.strength != null) out.strength = ic.strength;
+      if (ic.style) out.style = ic.style;
+      if (ic.rgbColors) out.rgb_colors = ic.rgbColors;
+      if (ic.backgroundRgbColor) out.background_rgb_color = ic.backgroundRgbColor;
+      if (ic.superResolutionReferences) out.super_resolution_references = ic.superResolutionReferences;
+      if (ic.fontInputs) {
+        out.font_inputs = ic.fontInputs.map((fi) => ({
+          font_url: fi.fontUrl,
+          text: fi.text
+        }));
+      }
+      body.image_config = out;
+    }
+    if (request.extra) Object.assign(body, request.extra);
     const endpoint = `${this.baseUrl}/chat/completions`;
     const res = await this.post(endpoint, body);
     if (!res.ok) {
@@ -5746,6 +5891,15 @@ var OpenRouterMediaProvider = class {
     }
     const data = await res.json();
     const resp = emptyMediaResponse(data);
+    const pushImageFromUrl = (url) => {
+      if (!url) return;
+      if (url.startsWith("data:")) {
+        const b64 = url.split(",", 2)[1];
+        resp.images.push({ url, b64Json: b64 });
+      } else {
+        resp.images.push({ url });
+      }
+    };
     const choices = data.choices;
     if (choices) {
       for (const choice of choices) {
@@ -5761,16 +5915,17 @@ var OpenRouterMediaProvider = class {
               resp.text += p.text;
             } else if (p.type === "image_url") {
               const imgUrl = p.image_url;
-              const url = imgUrl?.url;
-              if (url?.startsWith("data:")) {
-                const b64 = url.split(",", 2)[1];
-                resp.images.push({ url, b64Json: b64 });
-              } else if (url) {
-                resp.images.push({ url });
-              }
+              pushImageFromUrl(imgUrl?.url);
             }
           }
         }
+        const images = msg.images;
+        if (Array.isArray(images)) {
+          for (const img of images) {
+            const imgUrl = img.image_url;
+            pushImageFromUrl(imgUrl?.url);
+          }
+        }
       }
     }
     return resp;
@@ -5778,6 +5933,20 @@ var OpenRouterMediaProvider = class {
   // ── Audio ──────────────────────────────────────────────────────────
   async generateAudio(request) {
     const model = stripPrefix(request.model ?? "openai/gpt-4o-mini-tts");
+    const requestedFormat = request.format ?? "wav";
+    const meta = await this.fetchModelMeta(model);
+    const outMods = meta.outputModalities;
+    const useSpeechEndpoint = outMods.includes("speech") || outMods.length === 0 || !outMods.includes("audio");
+    if (useSpeechEndpoint) {
+      return this.generateAudioViaSpeechEndpoint(
+        model,
+        request.text,
+        request.voice ?? "alloy",
+        requestedFormat,
+        request
+      );
+    }
+    const wireFormat = requestedFormat === "wav" ? "pcm16" : requestedFormat;
     const messages = [{ role: "user", content: request.text }];
     const body = {
       model,
@@ -5786,7 +5955,7 @@ var OpenRouterMediaProvider = class {
       stream: true,
       audio: {
         voice: request.voice ?? "alloy",
-        format: request.format ?? "wav"
+        format: wireFormat
       }
     };
     const endpoint = `${this.baseUrl}/chat/completions`;
@@ -5877,13 +6046,65 @@ var OpenRouterMediaProvider = class {
     const resp = emptyMediaResponse(null);
     resp.text = textContent;
     if (audioChunks.length > 0) {
+      let b64 = audioChunks.join("");
+      try {
+        const parts = audioChunks.map(base64ToBytes);
+        const total = parts.reduce((n, p) => n + p.byteLength, 0);
+        const merged = new Uint8Array(total);
+        let off = 0;
+        for (const p of parts) {
+          merged.set(p, off);
+          off += p.byteLength;
+        }
+        b64 = bytesToBase64(merged);
+        if (requestedFormat === "wav") {
+          b64 = bytesToBase64(wrapPcm16AsWav(merged));
+        }
+      } catch {
+      }
       resp.audio = {
-        data: audioChunks.join(""),
-        format: request.format ?? "wav"
+        data: b64,
+        format: requestedFormat
       };
     }
     return resp;
   }
+  /**
+   * Call OpenRouter's OpenAI-compatible TTS endpoint (`POST /audio/speech`).
+   * Returns raw bytes for the requested format; wraps PCM → WAV when needed.
+   */
+  async generateAudioViaSpeechEndpoint(model, text2, voice, requestedFormat, request) {
+    const wireFormat = requestedFormat === "wav" || requestedFormat === "pcm" || requestedFormat === "pcm16" ? "pcm" : requestedFormat;
+    const endpoint = `${this.baseUrl}/audio/speech`;
+    const body = {
+      model,
+      input: text2,
+      voice,
+      response_format: wireFormat
+    };
+    if (request?.speed != null) body.speed = request.speed;
+    if (request?.extra) Object.assign(body, request.extra);
+    const res = await this.post(endpoint, body);
+    if (!res.ok) {
+      throw new MediaProviderError(
+        `Audio generation failed [model=${model}] [endpoint=${endpoint}]: ${res.status} ${await res.text()}`,
+        { provider: "openrouter", model, endpoint }
+      );
+    }
+    const buf = new Uint8Array(await res.arrayBuffer());
+    const finalBytes = requestedFormat === "wav" ? wrapPcm16AsWav(buf) : buf;
+    const resp = emptyMediaResponse({
+      endpoint: "audio/speech",
+      model,
+      mime_type: res.headers.get("content-type") ?? ""
+    });
+    resp.text = text2;
+    resp.audio = {
+      data: bytesToBase64(finalBytes),
+      format: requestedFormat
+    };
+    return resp;
+  }
   // ── Helpers ────────────────────────────────────────────────────────
   post(url, body) {
     const key = apiKeyStore.get(this);