@genkit-ai/compat-oai 1.28.0 → 1.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/lib/{audio-CUuCwm-y.d.mts → audio-CJ8rzf35.d.mts} +7 -2
  2. package/lib/{audio-CUuCwm-y.d.ts → audio-CJ8rzf35.d.ts} +7 -2
  3. package/lib/audio.d.mts +2 -1
  4. package/lib/audio.d.ts +2 -1
  5. package/lib/audio.js +14 -5
  6. package/lib/audio.js.map +1 -1
  7. package/lib/audio.mjs +9 -4
  8. package/lib/audio.mjs.map +1 -1
  9. package/lib/deepseek/deepseek.d.mts +2 -1
  10. package/lib/deepseek/deepseek.d.ts +2 -1
  11. package/lib/deepseek/index.d.mts +2 -1
  12. package/lib/deepseek/index.d.ts +2 -1
  13. package/lib/embedder.d.mts +2 -1
  14. package/lib/embedder.d.ts +2 -1
  15. package/lib/image.d.mts +2 -1
  16. package/lib/image.d.ts +2 -1
  17. package/lib/index.d.mts +2 -1
  18. package/lib/index.d.ts +2 -1
  19. package/lib/model.d.mts +2 -1
  20. package/lib/model.d.ts +2 -1
  21. package/lib/model.js +1 -1
  22. package/lib/model.js.map +1 -1
  23. package/lib/model.mjs +1 -1
  24. package/lib/model.mjs.map +1 -1
  25. package/lib/openai/dalle.d.mts +2 -1
  26. package/lib/openai/dalle.d.ts +2 -1
  27. package/lib/openai/index.d.mts +4 -3
  28. package/lib/openai/index.d.ts +4 -3
  29. package/lib/openai/index.js +5 -5
  30. package/lib/openai/index.js.map +1 -1
  31. package/lib/openai/index.mjs +1 -4
  32. package/lib/openai/index.mjs.map +1 -1
  33. package/lib/openai/{whisper.d.mts → stt.d.mts} +1 -0
  34. package/lib/openai/{whisper.d.ts → stt.d.ts} +1 -0
  35. package/lib/openai/{whisper.js → stt.js} +7 -4
  36. package/lib/openai/stt.js.map +1 -0
  37. package/lib/openai/{whisper.mjs → stt.mjs} +4 -1
  38. package/lib/openai/stt.mjs.map +1 -0
  39. package/lib/utils.d.mts +2 -1
  40. package/lib/utils.d.ts +2 -1
  41. package/lib/xai/grok.d.mts +2 -1
  42. package/lib/xai/grok.d.ts +2 -1
  43. package/lib/xai/index.d.mts +2 -1
  44. package/lib/xai/index.d.ts +2 -1
  45. package/package.json +2 -2
  46. package/lib/openai/whisper.js.map +0 -1
  47. package/lib/openai/whisper.mjs.map +0 -1
@@ -1,7 +1,8 @@
1
1
  import { EmbedderReference, EmbedderAction, GenerateRequest, z, ModelReference, StreamingCallback, GenerateResponseChunkData, GenerateResponseData, Role, Part, MessageData, ToolRequestPart, ActionMetadata } from 'genkit';
2
2
  import { ModelInfo, ModelAction, ToolDefinition } from 'genkit/model';
3
3
  import OpenAI, { ClientOptions } from 'openai';
4
- import { SpeechCreateParams, TranscriptionCreateParams } from 'openai/resources/audio/index.mjs';
4
+ import { Response } from 'openai/core.mjs';
5
+ import { SpeechCreateParams, TranscriptionCreateParams, Transcription } from 'openai/resources/audio/index.mjs';
5
6
  import * as genkit_plugin from 'genkit/plugin';
6
7
  import { ResolvableAction } from 'genkit/plugin';
7
8
  import { ActionType } from 'genkit/registry';
@@ -476,6 +477,8 @@ declare const RESPONSE_FORMAT_MEDIA_TYPES: {
476
477
  wav: string;
477
478
  pcm: string;
478
479
  };
480
+ declare function toTTSRequest(modelName: string, request: GenerateRequest, requestBuilder?: SpeechRequestBuilder): SpeechCreateParams;
481
+ declare function speechToGenerateResponse(response: Response, responseFormat?: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' | 'pcm'): Promise<GenerateResponseData>;
479
482
  /**
480
483
  * Method to define a new Genkit Model that is compatible with the Open AI Audio
481
484
  * API.
@@ -507,6 +510,8 @@ declare function compatOaiSpeechModelRef<CustomOptions extends z.ZodTypeAny = z.
507
510
  config?: any;
508
511
  namespace?: string;
509
512
  }): ModelReference<any>;
513
+ declare function toSttRequest(modelName: string, request: GenerateRequest, requestBuilder?: TranscriptionRequestBuilder): TranscriptionCreateParams;
514
+ declare function transcriptionToGenerateResponse(result: Transcription | string): GenerateResponseData;
510
515
  /**
511
516
  * Method to define a new Genkit Model that is compatible with Open AI
512
517
  * Transcriptions API.
@@ -540,4 +545,4 @@ declare function compatOaiTranscriptionModelRef<CustomOptions extends z.ZodTypeA
540
545
  namespace?: string;
541
546
  }): ModelReference<any>;
542
547
 
543
- export { ChatCompletionCommonConfigSchema as C, ImageGenerationCommonConfigSchema as I, type ModelRequestBuilder as M, type PluginOptions as P, RESPONSE_FORMAT_MEDIA_TYPES as R, SpeechConfigSchema as S, TranscriptionConfigSchema as T, type ImageRequestBuilder as a, IMAGE_GENERATION_MODEL_INFO as b, defineCompatOpenAIImageModel as c, defineCompatOpenAIEmbedder as d, compatOaiImageModelRef as e, toOpenAITool as f, toOpenAITextAndMedia as g, toOpenAIMessages as h, fromOpenAIToolCall as i, fromOpenAIChoice as j, fromOpenAIChunkChoice as k, toOpenAIRequestBody as l, defineCompatOpenAIModel as m, compatOaiModelRef as n, openAIModelRunner as o, openAICompatible as p, compatOaiSpeechModelRef as q, compatOaiTranscriptionModelRef as r, defineCompatOpenAISpeechModel as s, toOpenAIRole as t, defineCompatOpenAITranscriptionModel as u, type SpeechRequestBuilder as v, type TranscriptionRequestBuilder as w, TRANSCRIPTION_MODEL_INFO as x, SPEECH_MODEL_INFO as y };
548
+ export { speechToGenerateResponse as A, toSttRequest as B, ChatCompletionCommonConfigSchema as C, transcriptionToGenerateResponse as D, ImageGenerationCommonConfigSchema as I, type ModelRequestBuilder as M, type PluginOptions as P, RESPONSE_FORMAT_MEDIA_TYPES as R, SpeechConfigSchema as S, TranscriptionConfigSchema as T, type ImageRequestBuilder as a, IMAGE_GENERATION_MODEL_INFO as b, defineCompatOpenAIImageModel as c, defineCompatOpenAIEmbedder as d, compatOaiImageModelRef as e, toOpenAITool as f, toOpenAITextAndMedia as g, toOpenAIMessages as h, fromOpenAIToolCall as i, fromOpenAIChoice as j, fromOpenAIChunkChoice as k, toOpenAIRequestBody as l, defineCompatOpenAIModel as m, compatOaiModelRef as n, openAIModelRunner as o, openAICompatible as p, compatOaiSpeechModelRef as q, compatOaiTranscriptionModelRef as r, defineCompatOpenAISpeechModel as s, toOpenAIRole as t, defineCompatOpenAITranscriptionModel as u, type SpeechRequestBuilder as v, type TranscriptionRequestBuilder as w, TRANSCRIPTION_MODEL_INFO as x, SPEECH_MODEL_INFO as y, toTTSRequest as z };
@@ -1,7 +1,8 @@
1
1
  import { EmbedderReference, EmbedderAction, GenerateRequest, z, ModelReference, StreamingCallback, GenerateResponseChunkData, GenerateResponseData, Role, Part, MessageData, ToolRequestPart, ActionMetadata } from 'genkit';
2
2
  import { ModelInfo, ModelAction, ToolDefinition } from 'genkit/model';
3
3
  import OpenAI, { ClientOptions } from 'openai';
4
- import { SpeechCreateParams, TranscriptionCreateParams } from 'openai/resources/audio/index.mjs';
4
+ import { Response } from 'openai/core.mjs';
5
+ import { SpeechCreateParams, TranscriptionCreateParams, Transcription } from 'openai/resources/audio/index.mjs';
5
6
  import * as genkit_plugin from 'genkit/plugin';
6
7
  import { ResolvableAction } from 'genkit/plugin';
7
8
  import { ActionType } from 'genkit/registry';
@@ -476,6 +477,8 @@ declare const RESPONSE_FORMAT_MEDIA_TYPES: {
476
477
  wav: string;
477
478
  pcm: string;
478
479
  };
480
+ declare function toTTSRequest(modelName: string, request: GenerateRequest, requestBuilder?: SpeechRequestBuilder): SpeechCreateParams;
481
+ declare function speechToGenerateResponse(response: Response, responseFormat?: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' | 'pcm'): Promise<GenerateResponseData>;
479
482
  /**
480
483
  * Method to define a new Genkit Model that is compatible with the Open AI Audio
481
484
  * API.
@@ -507,6 +510,8 @@ declare function compatOaiSpeechModelRef<CustomOptions extends z.ZodTypeAny = z.
507
510
  config?: any;
508
511
  namespace?: string;
509
512
  }): ModelReference<any>;
513
+ declare function toSttRequest(modelName: string, request: GenerateRequest, requestBuilder?: TranscriptionRequestBuilder): TranscriptionCreateParams;
514
+ declare function transcriptionToGenerateResponse(result: Transcription | string): GenerateResponseData;
510
515
  /**
511
516
  * Method to define a new Genkit Model that is compatible with Open AI
512
517
  * Transcriptions API.
@@ -540,4 +545,4 @@ declare function compatOaiTranscriptionModelRef<CustomOptions extends z.ZodTypeA
540
545
  namespace?: string;
541
546
  }): ModelReference<any>;
542
547
 
543
- export { ChatCompletionCommonConfigSchema as C, ImageGenerationCommonConfigSchema as I, type ModelRequestBuilder as M, type PluginOptions as P, RESPONSE_FORMAT_MEDIA_TYPES as R, SpeechConfigSchema as S, TranscriptionConfigSchema as T, type ImageRequestBuilder as a, IMAGE_GENERATION_MODEL_INFO as b, defineCompatOpenAIImageModel as c, defineCompatOpenAIEmbedder as d, compatOaiImageModelRef as e, toOpenAITool as f, toOpenAITextAndMedia as g, toOpenAIMessages as h, fromOpenAIToolCall as i, fromOpenAIChoice as j, fromOpenAIChunkChoice as k, toOpenAIRequestBody as l, defineCompatOpenAIModel as m, compatOaiModelRef as n, openAIModelRunner as o, openAICompatible as p, compatOaiSpeechModelRef as q, compatOaiTranscriptionModelRef as r, defineCompatOpenAISpeechModel as s, toOpenAIRole as t, defineCompatOpenAITranscriptionModel as u, type SpeechRequestBuilder as v, type TranscriptionRequestBuilder as w, TRANSCRIPTION_MODEL_INFO as x, SPEECH_MODEL_INFO as y };
548
+ export { speechToGenerateResponse as A, toSttRequest as B, ChatCompletionCommonConfigSchema as C, transcriptionToGenerateResponse as D, ImageGenerationCommonConfigSchema as I, type ModelRequestBuilder as M, type PluginOptions as P, RESPONSE_FORMAT_MEDIA_TYPES as R, SpeechConfigSchema as S, TranscriptionConfigSchema as T, type ImageRequestBuilder as a, IMAGE_GENERATION_MODEL_INFO as b, defineCompatOpenAIImageModel as c, defineCompatOpenAIEmbedder as d, compatOaiImageModelRef as e, toOpenAITool as f, toOpenAITextAndMedia as g, toOpenAIMessages as h, fromOpenAIToolCall as i, fromOpenAIChoice as j, fromOpenAIChunkChoice as k, toOpenAIRequestBody as l, defineCompatOpenAIModel as m, compatOaiModelRef as n, openAIModelRunner as o, openAICompatible as p, compatOaiSpeechModelRef as q, compatOaiTranscriptionModelRef as r, defineCompatOpenAISpeechModel as s, toOpenAIRole as t, defineCompatOpenAITranscriptionModel as u, type SpeechRequestBuilder as v, type TranscriptionRequestBuilder as w, TRANSCRIPTION_MODEL_INFO as x, SPEECH_MODEL_INFO as y, toTTSRequest as z };
package/lib/audio.d.mts CHANGED
@@ -1,8 +1,9 @@
1
1
  import 'genkit';
2
2
  import 'genkit/model';
3
3
  import 'openai';
4
+ import 'openai/core.mjs';
4
5
  import 'openai/resources/audio/index.mjs';
5
- export { R as RESPONSE_FORMAT_MEDIA_TYPES, y as SPEECH_MODEL_INFO, S as SpeechConfigSchema, v as SpeechRequestBuilder, x as TRANSCRIPTION_MODEL_INFO, T as TranscriptionConfigSchema, w as TranscriptionRequestBuilder, q as compatOaiSpeechModelRef, r as compatOaiTranscriptionModelRef, s as defineCompatOpenAISpeechModel, u as defineCompatOpenAITranscriptionModel } from './audio-CUuCwm-y.mjs';
6
+ export { R as RESPONSE_FORMAT_MEDIA_TYPES, y as SPEECH_MODEL_INFO, S as SpeechConfigSchema, v as SpeechRequestBuilder, x as TRANSCRIPTION_MODEL_INFO, T as TranscriptionConfigSchema, w as TranscriptionRequestBuilder, q as compatOaiSpeechModelRef, r as compatOaiTranscriptionModelRef, s as defineCompatOpenAISpeechModel, u as defineCompatOpenAITranscriptionModel, A as speechToGenerateResponse, B as toSttRequest, z as toTTSRequest, D as transcriptionToGenerateResponse } from './audio-CJ8rzf35.mjs';
6
7
  import 'genkit/plugin';
7
8
  import 'genkit/registry';
8
9
  import 'openai/resources/images.mjs';
package/lib/audio.d.ts CHANGED
@@ -1,8 +1,9 @@
1
1
  import 'genkit';
2
2
  import 'genkit/model';
3
3
  import 'openai';
4
+ import 'openai/core.mjs';
4
5
  import 'openai/resources/audio/index.mjs';
5
- export { R as RESPONSE_FORMAT_MEDIA_TYPES, y as SPEECH_MODEL_INFO, S as SpeechConfigSchema, v as SpeechRequestBuilder, x as TRANSCRIPTION_MODEL_INFO, T as TranscriptionConfigSchema, w as TranscriptionRequestBuilder, q as compatOaiSpeechModelRef, r as compatOaiTranscriptionModelRef, s as defineCompatOpenAISpeechModel, u as defineCompatOpenAITranscriptionModel } from './audio-CUuCwm-y.js';
6
+ export { R as RESPONSE_FORMAT_MEDIA_TYPES, y as SPEECH_MODEL_INFO, S as SpeechConfigSchema, v as SpeechRequestBuilder, x as TRANSCRIPTION_MODEL_INFO, T as TranscriptionConfigSchema, w as TranscriptionRequestBuilder, q as compatOaiSpeechModelRef, r as compatOaiTranscriptionModelRef, s as defineCompatOpenAISpeechModel, u as defineCompatOpenAITranscriptionModel, A as speechToGenerateResponse, B as toSttRequest, z as toTTSRequest, D as transcriptionToGenerateResponse } from './audio-CJ8rzf35.js';
6
7
  import 'genkit/plugin';
7
8
  import 'genkit/registry';
8
9
  import 'openai/resources/images.mjs';
package/lib/audio.js CHANGED
@@ -26,7 +26,11 @@ __export(audio_exports, {
26
26
  compatOaiSpeechModelRef: () => compatOaiSpeechModelRef,
27
27
  compatOaiTranscriptionModelRef: () => compatOaiTranscriptionModelRef,
28
28
  defineCompatOpenAISpeechModel: () => defineCompatOpenAISpeechModel,
29
- defineCompatOpenAITranscriptionModel: () => defineCompatOpenAITranscriptionModel
29
+ defineCompatOpenAITranscriptionModel: () => defineCompatOpenAITranscriptionModel,
30
+ speechToGenerateResponse: () => speechToGenerateResponse,
31
+ toSttRequest: () => toSttRequest,
32
+ toTTSRequest: () => toTTSRequest,
33
+ transcriptionToGenerateResponse: () => transcriptionToGenerateResponse
30
34
  });
31
35
  module.exports = __toCommonJS(audio_exports);
32
36
  var import_genkit = require("genkit");
@@ -111,7 +115,7 @@ function toTTSRequest(modelName, request, requestBuilder) {
111
115
  }
112
116
  return options;
113
117
  }
114
- async function toGenerateResponse(response, responseFormat = "mp3") {
118
+ async function speechToGenerateResponse(response, responseFormat = "mp3") {
115
119
  const resultArrayBuffer = await response.arrayBuffer();
116
120
  const resultBuffer = Buffer.from(new Uint8Array(resultArrayBuffer));
117
121
  const mediaType = RESPONSE_FORMAT_MEDIA_TYPES[responseFormat];
@@ -157,7 +161,7 @@ function defineCompatOpenAISpeechModel(params) {
157
161
  const result = await client.audio.speech.create(ttsRequest, {
158
162
  signal: abortSignal
159
163
  });
160
- return await toGenerateResponse(result, ttsRequest.response_format);
164
+ return await speechToGenerateResponse(result, ttsRequest.response_format);
161
165
  }
162
166
  );
163
167
  }
@@ -250,12 +254,13 @@ function transcriptionToGenerateResponse(result) {
250
254
  }
251
255
  function defineCompatOpenAITranscriptionModel(params) {
252
256
  const {
253
- name: modelName,
257
+ name,
254
258
  pluginOptions,
255
259
  client: defaultClient,
256
260
  modelRef: modelRef2,
257
261
  requestBuilder
258
262
  } = params;
263
+ const modelName = (0, import_utils.toModelName)(name, pluginOptions?.name);
259
264
  const actionName = modelRef2?.name ?? `${pluginOptions?.name ?? "compat-oai"}/${modelName}`;
260
265
  return (0, import_plugin.model)(
261
266
  {
@@ -307,6 +312,10 @@ function compatOaiTranscriptionModelRef(params) {
307
312
  compatOaiSpeechModelRef,
308
313
  compatOaiTranscriptionModelRef,
309
314
  defineCompatOpenAISpeechModel,
310
- defineCompatOpenAITranscriptionModel
315
+ defineCompatOpenAITranscriptionModel,
316
+ speechToGenerateResponse,
317
+ toSttRequest,
318
+ toTTSRequest,
319
+ transcriptionToGenerateResponse
311
320
  });
312
321
  //# sourceMappingURL=audio.js.map
package/lib/audio.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/audio.ts"],"sourcesContent":["/**\n * Copyright 2024 The Fire Company\n * Copyright 2024 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\nimport type {\n GenerateRequest,\n GenerateResponseData,\n ModelReference,\n} from 'genkit';\nimport { GenerationCommonConfigSchema, Message, modelRef, z } from 'genkit';\nimport type { ModelAction, ModelInfo } from 'genkit/model';\nimport { model } from 'genkit/plugin';\nimport OpenAI from 'openai';\nimport { Response } from 'openai/core.mjs';\nimport type {\n SpeechCreateParams,\n Transcription,\n TranscriptionCreateParams,\n} from 'openai/resources/audio/index.mjs';\nimport { PluginOptions } from './index.js';\nimport { maybeCreateRequestScopedOpenAIClient, toModelName } from './utils.js';\n\nexport type SpeechRequestBuilder = (\n req: GenerateRequest,\n params: SpeechCreateParams\n) => void;\nexport type TranscriptionRequestBuilder = (\n req: GenerateRequest,\n params: TranscriptionCreateParams\n) => void;\n\nexport const TRANSCRIPTION_MODEL_INFO = {\n supports: {\n media: true,\n output: ['text', 'json'],\n multiturn: false,\n systemRole: false,\n tools: false,\n },\n};\n\nexport const SPEECH_MODEL_INFO: ModelInfo = {\n supports: {\n media: false,\n output: ['media'],\n multiturn: false,\n systemRole: false,\n tools: false,\n },\n};\n\nconst ChunkingStrategySchema = z.object({\n type: z.string(),\n prefix_padding_ms: z.number().int().optional(),\n silence_duration_ms: z.number().int().optional(),\n threshold: z.number().min(0).max(1.0).optional(),\n});\nexport const TranscriptionConfigSchema = GenerationCommonConfigSchema.pick({\n temperature: true,\n}).extend({\n chunking_strategy: z\n .union([z.literal('auto'), ChunkingStrategySchema])\n .optional(),\n include: z.array(z.any()).optional(),\n language: z.string().optional(),\n timestamp_granularities: z.array(z.enum(['word', 'segment'])).optional(),\n response_format: z\n .enum(['json', 'text', 'srt', 'verbose_json', 'vtt'])\n .optional(),\n // TODO stream support\n});\n\nexport const SpeechConfigSchema = z.object({\n voice: z\n .enum(['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'])\n .default('alloy'),\n speed: z.number().min(0.25).max(4.0).optional(),\n response_format: z\n .enum(['mp3', 'opus', 'aac', 'flac', 'wav', 'pcm'])\n .optional(),\n});\n\n/**\n * Supported media formats for Audio generation\n */\nexport const RESPONSE_FORMAT_MEDIA_TYPES = {\n mp3: 'audio/mpeg',\n opus: 'audio/opus',\n aac: 'audio/aac',\n flac: 'audio/flac',\n wav: 'audio/wav',\n pcm: 'audio/L16',\n};\n\nfunction toTTSRequest(\n modelName: string,\n request: GenerateRequest,\n requestBuilder?: SpeechRequestBuilder\n): SpeechCreateParams {\n const {\n voice,\n version: modelVersion,\n temperature,\n maxOutputTokens,\n stopSequences,\n topK,\n topP,\n ...restOfConfig\n } = request.config ?? {};\n\n let options: SpeechCreateParams = {\n model: modelVersion ?? modelName,\n input: new Message(request.messages[0]).text,\n voice: voice ?? 'alloy',\n };\n if (requestBuilder) {\n requestBuilder(request, options);\n } else {\n options = {\n ...options,\n ...restOfConfig, // passthorugh rest of the config\n };\n }\n for (const k in options) {\n if (options[k] === undefined) {\n delete options[k];\n }\n }\n return options;\n}\n\nasync function toGenerateResponse(\n response: Response,\n responseFormat: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' | 'pcm' = 'mp3'\n): Promise<GenerateResponseData> {\n const resultArrayBuffer = await response.arrayBuffer();\n const resultBuffer = Buffer.from(new Uint8Array(resultArrayBuffer));\n const mediaType = RESPONSE_FORMAT_MEDIA_TYPES[responseFormat];\n return {\n message: {\n role: 'model',\n content: [\n {\n media: {\n contentType: mediaType,\n url: `data:${mediaType};base64,${resultBuffer.toString('base64')}`,\n },\n },\n ],\n },\n finishReason: 'stop',\n raw: response,\n };\n}\n\n/**\n * Method to define a new Genkit Model that is compatible with the Open AI Audio\n * API. \n *\n * These models are to be used to create audio speech from a given request.\n * @param params An object containing parameters for defining the OpenAI speech\n * model.\n * @param params.ai The Genkit AI instance.\n * @param params.name The name of the model.\n * @param params.client The OpenAI client instance.\n * @param params.modelRef Optional reference to the model's configuration and\n * custom options.\n\n * @returns the created {@link ModelAction}\n */\nexport function defineCompatOpenAISpeechModel<\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n>(params: {\n name: string;\n client: OpenAI;\n modelRef?: ModelReference<CustomOptions>;\n requestBuilder?: SpeechRequestBuilder;\n pluginOptions: PluginOptions;\n}): ModelAction {\n const {\n name,\n client: defaultClient,\n pluginOptions,\n modelRef,\n requestBuilder,\n } = params;\n const modelName = toModelName(name, pluginOptions?.name);\n const actionName = `${pluginOptions?.name ?? 'compat-oai'}/${modelName}`;\n\n return model(\n {\n name: actionName,\n ...modelRef?.info,\n configSchema: modelRef?.configSchema,\n },\n async (request, { abortSignal }) => {\n const ttsRequest = toTTSRequest(modelName, request, requestBuilder);\n const client = maybeCreateRequestScopedOpenAIClient(\n pluginOptions,\n request,\n defaultClient\n );\n const result = await client.audio.speech.create(ttsRequest, {\n signal: abortSignal,\n });\n return await toGenerateResponse(result, ttsRequest.response_format);\n }\n );\n}\n\n/** Speech generation ModelRef helper, with reasonable defaults for\n * OpenAI-compatible providers */\nexport function compatOaiSpeechModelRef<\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n>(params: {\n name: string;\n info?: ModelInfo;\n configSchema?: CustomOptions;\n config?: any;\n namespace?: string;\n}) {\n const {\n name,\n info = SPEECH_MODEL_INFO,\n configSchema,\n config = undefined,\n namespace,\n } = params;\n return modelRef({\n name,\n configSchema: configSchema || (SpeechConfigSchema as any),\n info,\n config,\n namespace,\n });\n}\n\nfunction toSttRequest(\n modelName: string,\n request: GenerateRequest,\n requestBuilder?: TranscriptionRequestBuilder\n): TranscriptionCreateParams {\n const message = new Message(request.messages[0]);\n const media = message.media;\n if (!media?.url) {\n throw new Error('No media found in the request');\n }\n const mediaBuffer = Buffer.from(\n media.url.slice(media.url.indexOf(',') + 1),\n 'base64'\n );\n const mediaFile = new File([mediaBuffer], 'input', {\n type:\n media.contentType ??\n media.url.slice('data:'.length, media.url.indexOf(';')),\n });\n const {\n temperature,\n version: modelVersion,\n maxOutputTokens,\n stopSequences,\n topK,\n topP,\n ...restOfConfig\n } = request.config ?? {};\n\n let options: TranscriptionCreateParams = {\n model: modelVersion ?? modelName,\n file: mediaFile,\n prompt: message.text,\n temperature,\n };\n if (requestBuilder) {\n requestBuilder(request, options);\n } else {\n options = {\n ...options,\n ...restOfConfig, // passthrough rest of the config\n };\n }\n const outputFormat = request.output?.format as 'json' | 'text' | 'media';\n const customFormat = request.config?.response_format;\n if (outputFormat && customFormat) {\n if (\n outputFormat === 'json' &&\n customFormat !== 'json' &&\n customFormat !== 'verbose_json'\n ) {\n throw new Error(\n `Custom response format ${customFormat} is not compatible with output format ${outputFormat}`\n );\n }\n }\n if (outputFormat === 'media') {\n throw new Error(`Output format ${outputFormat} is not supported.`);\n }\n options.response_format = customFormat || outputFormat || 'text';\n for (const k in options) {\n if (options[k] === undefined) {\n delete options[k];\n }\n }\n return options;\n}\n\nfunction transcriptionToGenerateResponse(\n result: Transcription | string\n): GenerateResponseData {\n return {\n message: {\n role: 'model',\n content: [\n {\n text: typeof result === 'string' ? result : result.text,\n },\n ],\n },\n finishReason: 'stop',\n raw: result,\n };\n}\n\n/**\n * Method to define a new Genkit Model that is compatible with Open AI\n * Transcriptions API. \n *\n * These models are to be used to transcribe audio to text.\n *\n * @param params An object containing parameters for defining the OpenAI\n * transcription model.\n * @param params.ai The Genkit AI instance.\n * @param params.name The name of the model.\n * @param params.client The OpenAI client instance.\n * @param params.modelRef Optional reference to the model's configuration and\n * custom options.\n\n * @returns the created {@link ModelAction}\n */\nexport function defineCompatOpenAITranscriptionModel<\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n>(params: {\n name: string;\n client: OpenAI;\n pluginOptions?: PluginOptions;\n modelRef?: ModelReference<CustomOptions>;\n requestBuilder?: TranscriptionRequestBuilder;\n}): ModelAction {\n const {\n name: modelName,\n pluginOptions,\n client: defaultClient,\n modelRef,\n requestBuilder,\n } = params;\n\n const actionName =\n modelRef?.name ?? `${pluginOptions?.name ?? 'compat-oai'}/${modelName}`;\n return model(\n {\n name: actionName,\n ...modelRef?.info,\n configSchema: modelRef?.configSchema,\n },\n async (request, { abortSignal }) => {\n const params = toSttRequest(modelName, request, requestBuilder);\n const client = maybeCreateRequestScopedOpenAIClient(\n pluginOptions,\n request,\n defaultClient\n );\n // Explicitly setting stream to false ensures we use the non-streaming overload\n const result = await client.audio.transcriptions.create(\n {\n ...params,\n stream: false,\n },\n { signal: abortSignal }\n );\n return transcriptionToGenerateResponse(result);\n }\n );\n}\n\n/** Transcription ModelRef helper, with reasonable defaults for\n * OpenAI-compatible providers */\nexport function compatOaiTranscriptionModelRef<\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n>(params: {\n name: string;\n info?: ModelInfo;\n configSchema?: CustomOptions;\n config?: any;\n namespace?: string;\n}) {\n const {\n name,\n info = TRANSCRIPTION_MODEL_INFO,\n configSchema,\n config = undefined,\n namespace,\n } = params;\n return modelRef({\n name,\n configSchema: configSchema || (TranscriptionConfigSchema as any),\n info,\n config,\n namespace,\n });\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAqBA,oBAAmE;AAEnE,oBAAsB;AAStB,mBAAkE;AAW3D,MAAM,2BAA2B;AAAA,EACtC,UAAU;AAAA,IACR,OAAO;AAAA,IACP,QAAQ,CAAC,QAAQ,MAAM;AAAA,IACvB,WAAW;AAAA,IACX,YAAY;AAAA,IACZ,OAAO;AAAA,EACT;AACF;AAEO,MAAM,oBAA+B;AAAA,EAC1C,UAAU;AAAA,IACR,OAAO;AAAA,IACP,QAAQ,CAAC,OAAO;AAAA,IAChB,WAAW;AAAA,IACX,YAAY;AAAA,IACZ,OAAO;AAAA,EACT;AACF;AAEA,MAAM,yBAAyB,gBAAE,OAAO;AAAA,EACtC,MAAM,gBAAE,OAAO;AAAA,EACf,mBAAmB,gBAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EAC7C,qBAAqB,gBAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EAC/C,WAAW,gBAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAG,EAAE,SAAS;AACjD,CAAC;AACM,MAAM,4BAA4B,2CAA6B,KAAK;AAAA,EACzE,aAAa;AACf,CAAC,EAAE,OAAO;AAAA,EACR,mBAAmB,gBAChB,MAAM,CAAC,gBAAE,QAAQ,MAAM,GAAG,sBAAsB,CAAC,EACjD,SAAS;AAAA,EACZ,SAAS,gBAAE,MAAM,gBAAE,IAAI,CAAC,EAAE,SAAS;AAAA,EACnC,UAAU,gBAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,yBAAyB,gBAAE,MAAM,gBAAE,KAAK,CAAC,QAAQ,SAAS,CAAC,CAAC,EAAE,SAAS;AAAA,EACvE,iBAAiB,gBACd,KAAK,CAAC,QAAQ,QAAQ,OAAO,gBAAgB,KAAK,CAAC,EACnD,SAAS;AAAA;AAEd,CAAC;AAEM,MAAM,qBAAqB,gBAAE,OAAO;AAAA,EACzC,OAAO,gBACJ,KAAK,CAAC,SAAS,QAAQ,SAAS,QAAQ,QAAQ,SAAS,CAAC,EAC1D,QAAQ,OAAO;AAAA,EAClB,OAAO,gBAAE,OAAO,EAAE,IAAI,IAAI,EAAE,IAAI,CAAG,EAAE,SAAS;AAAA,EAC9C,iBAAiB,gBACd,KAAK,CAAC,OAAO,QAAQ,OAAO,QAAQ,OAAO,KAAK,CAAC,EACjD,SAAS;AACd,CAAC;AAKM,MAAM,8BAA8B;AAAA,EACzC,KAAK;AAAA,EACL,MAAM;AAAA,EACN,KAAK;AAAA,EACL,MAAM;AAAA,EACN,KAAK;AAAA,EACL,KAAK;AACP;AAEA,SAAS,aACP,WACA,SACA,gBACoB;AACpB,QAAM;AAAA,IACJ;AAAA,IACA,SAAS;AAAA,IACT;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,GAAG;AAAA,EACL,IAAI,QAAQ,UAAU,CAAC;AAEvB,MAAI,UAA8B;AAAA,IAChC,OAAO,gBAAgB;AAAA,IACvB,OAAO,IAAI,sBAAQ,QAAQ,SAAS,CAAC,CAAC,EAAE;AAAA,IACxC,OAAO,SAAS;AAAA,EAClB;AACA,MAAI,gBAAgB;AAClB,mBAAe,SAAS,OAAO;AAAA,EACjC,OAAO;AACL,cAAU;AAAA,MACR,GAAG;AAAA,MACH,GAAG;AAAA;AAAA,IACL;AAAA,EACF;AACA,aAAW,KAAK,SAAS;AACvB,QAAI,QAAQ,CAAC,MAAM,QAAW;AAC5B,aAAO,QAAQ,CAAC;AAAA,IAClB;AAAA,EACF;AACA,SAAO;AACT;AAEA,eAAe,mBACb,UACA,iBAAkE,OACnC;AAC/B,QAAM,oBAAoB,MAAM,SAAS,YAAY;AACrD,QAAM,eAAe,OAAO,KAAK,IAAI,WAAW,iBAAiB,CAAC;AAClE,QAAM,YAAY,4BAA4B,cAAc;AAC5D,SAAO;AAAA,IACL,SAAS;AAAA,MACP,MAAM;AAAA,MACN,SAAS;AAAA,QACP;AAAA,UACE,OAAO;AAAA,YACL,aAAa;AAAA,YACb,KAAK,QAAQ,SAAS,WAAW,aAAa,SAAS,QAAQ,CAAC;AAAA,UAClE;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,IACA,cAAc;AAAA,IACd,KAAK;AAAA,EACP;AACF;AAiBO,SAAS,8BAEd,QAMc;AACd,QAAM;AAAA,IACJ;AAAA,IACA,QAAQ;AAAA,IACR;AAAA,IACA,UAAAA;AAAA,IACA;AAAA,EACF,IAAI;AACJ,QAAM,gBAAY,0BAAY,MAAM,eAAe,IAAI;AACvD,QAAM,aAAa,GAAG,eAAe,QAAQ,YAAY,IAAI,SAAS;AAEtE,aAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,GAAGA,WAAU;AAAA,MACb,cAAcA,WAAU;AAAA,IAC1B;AAAA,IACA,OAAO,SAAS,EAAE,YAAY,MAAM;AAClC,YAAM,aAAa,aAAa,WAAW,SAAS,cAAc;AAClE,YAAM,aAAS;AAAA,QACb;AAAA,QACA;AAAA,QACA;AAAA,MACF;AACA,YAAM,SAAS,MAAM,OAAO,MAAM,OAAO,OAAO,YAAY;AAAA,QAC1D,QAAQ;AAAA,MACV,CAAC;AACD,aAAO,MAAM,mBAAmB,QAAQ,WAAW,eAAe;AAAA,IACpE;AAAA,EACF;AACF;AAIO,SAAS,wBAEd,QAMC;AACD,QAAM;AAAA,IACJ;AAAA,IACA,OAAO;AAAA,IACP;AAAA,IACA,SAAS;AAAA,IACT;AAAA,EACF,IAAI;AACJ,aAAO,wBAAS;AAAA,IACd;AAAA,IACA,cAAc,gBAAiB;AAAA,IAC/B;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AACH;AAEA,SAAS,aACP,WACA,SACA,gBAC2B;AAC3B,QAAM,UAAU,IAAI,sBAAQ,QAAQ,SAAS,CAAC,CAAC;AAC/C,QAAM,QAAQ,QAAQ;AACtB,MAAI,CAAC,OAAO,KAAK;AACf,UAAM,IAAI,MAAM,+BAA+B;AAAA,EACjD;AACA,QAAM,cAAc,OAAO;AAAA,IACzB,MAAM,IAAI,MAAM,MAAM,IAAI,QAAQ,GAAG,IAAI,CAAC;AAAA,IAC1C;AAAA,EACF;AACA,QAAM,YAAY,IAAI,KAAK,CAAC,WAAW,GAAG,SAAS;AAAA,IACjD,MACE,MAAM,eACN,MAAM,IAAI,MAAM,QAAQ,QAAQ,MAAM,IAAI,QAAQ,GAAG,CAAC;AAAA,EAC1D,CAAC;AACD,QAAM;AAAA,IACJ;AAAA,IACA,SAAS;AAAA,IACT;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,GAAG;AAAA,EACL,IAAI,QAAQ,UAAU,CAAC;AAEvB,MAAI,UAAqC;AAAA,IACvC,OAAO,gBAAgB;AAAA,IACvB,MAAM;AAAA,IACN,QAAQ,QAAQ;AAAA,IAChB;AAAA,EACF;AACA,MAAI,gBAAgB;AAClB,mBAAe,SAAS,OAAO;AAAA,EACjC,OAAO;AACL,cAAU;AAAA,MACR,GAAG;AAAA,MACH,GAAG;AAAA;AAAA,IACL;AAAA,EACF;AACA,QAAM,eAAe,QAAQ,QAAQ;AACrC,QAAM,eAAe,QAAQ,QAAQ;AACrC,MAAI,gBAAgB,cAAc;AAChC,QACE,iBAAiB,UACjB,iBAAiB,UACjB,iBAAiB,gBACjB;AACA,YAAM,IAAI;AAAA,QACR,0BAA0B,YAAY,yCAAyC,YAAY;AAAA,MAC7F;AAAA,IACF;AAAA,EACF;AACA,MAAI,iBAAiB,SAAS;AAC5B,UAAM,IAAI,MAAM,iBAAiB,YAAY,oBAAoB;AAAA,EACnE;AACA,UAAQ,kBAAkB,gBAAgB,gBAAgB;AAC1D,aAAW,KAAK,SAAS;AACvB,QAAI,QAAQ,CAAC,MAAM,QAAW;AAC5B,aAAO,QAAQ,CAAC;AAAA,IAClB;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,gCACP,QACsB;AACtB,SAAO;AAAA,IACL,SAAS;AAAA,MACP,MAAM;AAAA,MACN,SAAS;AAAA,QACP;AAAA,UACE,MAAM,OAAO,WAAW,WAAW,SAAS,OAAO;AAAA,QACrD;AAAA,MACF;AAAA,IACF;AAAA,IACA,cAAc;AAAA,IACd,KAAK;AAAA,EACP;AACF;AAkBO,SAAS,qCAEd,QAMc;AACd,QAAM;AAAA,IACJ,MAAM;AAAA,IACN;AAAA,IACA,QAAQ;AAAA,IACR,UAAAA;AAAA,IACA;AAAA,EACF,IAAI;AAEJ,QAAM,aACJA,WAAU,QAAQ,GAAG,eAAe,QAAQ,YAAY,IAAI,SAAS;AACvE,aAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,GAAGA,WAAU;AAAA,MACb,cAAcA,WAAU;AAAA,IAC1B;AAAA,IACA,OAAO,SAAS,EAAE,YAAY,MAAM;AAClC,YAAMC,UAAS,aAAa,WAAW,SAAS,cAAc;AAC9D,YAAM,aAAS;AAAA,QACb;AAAA,QACA;AAAA,QACA;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,OAAO,MAAM,eAAe;AAAA,QAC/C;AAAA,UACE,GAAGA;AAAA,UACH,QAAQ;AAAA,QACV;AAAA,QACA,EAAE,QAAQ,YAAY;AAAA,MACxB;AACA,aAAO,gCAAgC,MAAM;AAAA,IAC/C;AAAA,EACF;AACF;AAIO,SAAS,+BAEd,QAMC;AACD,QAAM;AAAA,IACJ;AAAA,IACA,OAAO;AAAA,IACP;AAAA,IACA,SAAS;AAAA,IACT;AAAA,EACF,IAAI;AACJ,aAAO,wBAAS;AAAA,IACd;AAAA,IACA,cAAc,gBAAiB;AAAA,IAC/B;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AACH;","names":["modelRef","params"]}
1
+ {"version":3,"sources":["../src/audio.ts"],"sourcesContent":["/**\n * Copyright 2024 The Fire Company\n * Copyright 2024 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\nimport type {\n GenerateRequest,\n GenerateResponseData,\n ModelReference,\n} from 'genkit';\nimport { GenerationCommonConfigSchema, Message, modelRef, z } from 'genkit';\nimport type { ModelAction, ModelInfo } from 'genkit/model';\nimport { model } from 'genkit/plugin';\nimport OpenAI from 'openai';\nimport { Response } from 'openai/core.mjs';\nimport type {\n SpeechCreateParams,\n Transcription,\n TranscriptionCreateParams,\n} from 'openai/resources/audio/index.mjs';\nimport { PluginOptions } from './index.js';\nimport { maybeCreateRequestScopedOpenAIClient, toModelName } from './utils.js';\n\nexport type SpeechRequestBuilder = (\n req: GenerateRequest,\n params: SpeechCreateParams\n) => void;\nexport type TranscriptionRequestBuilder = (\n req: GenerateRequest,\n params: TranscriptionCreateParams\n) => void;\n\nexport const TRANSCRIPTION_MODEL_INFO = {\n supports: {\n media: true,\n output: ['text', 'json'],\n multiturn: false,\n systemRole: false,\n tools: false,\n },\n};\n\nexport const SPEECH_MODEL_INFO: ModelInfo = {\n supports: {\n media: false,\n output: ['media'],\n multiturn: false,\n systemRole: false,\n tools: false,\n },\n};\n\nconst ChunkingStrategySchema = z.object({\n type: z.string(),\n prefix_padding_ms: z.number().int().optional(),\n silence_duration_ms: z.number().int().optional(),\n threshold: z.number().min(0).max(1.0).optional(),\n});\nexport const TranscriptionConfigSchema = GenerationCommonConfigSchema.pick({\n temperature: true,\n}).extend({\n chunking_strategy: z\n .union([z.literal('auto'), ChunkingStrategySchema])\n .optional(),\n include: z.array(z.any()).optional(),\n language: z.string().optional(),\n timestamp_granularities: z.array(z.enum(['word', 'segment'])).optional(),\n response_format: z\n .enum(['json', 'text', 'srt', 'verbose_json', 'vtt'])\n .optional(),\n // TODO stream support\n});\n\nexport const SpeechConfigSchema = z.object({\n voice: z\n .enum(['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'])\n .default('alloy'),\n speed: z.number().min(0.25).max(4.0).optional(),\n response_format: z\n .enum(['mp3', 'opus', 'aac', 'flac', 'wav', 'pcm'])\n .optional(),\n});\n\n/**\n * Supported media formats for Audio generation\n */\nexport const RESPONSE_FORMAT_MEDIA_TYPES = {\n mp3: 'audio/mpeg',\n opus: 'audio/opus',\n aac: 'audio/aac',\n flac: 'audio/flac',\n wav: 'audio/wav',\n pcm: 'audio/L16',\n};\n\nexport function toTTSRequest(\n modelName: string,\n request: GenerateRequest,\n requestBuilder?: SpeechRequestBuilder\n): SpeechCreateParams {\n const {\n voice,\n version: modelVersion,\n temperature,\n maxOutputTokens,\n stopSequences,\n topK,\n topP,\n ...restOfConfig\n } = request.config ?? {};\n\n let options: SpeechCreateParams = {\n model: modelVersion ?? modelName,\n input: new Message(request.messages[0]).text,\n voice: voice ?? 'alloy',\n };\n if (requestBuilder) {\n requestBuilder(request, options);\n } else {\n options = {\n ...options,\n ...restOfConfig, // passthorugh rest of the config\n };\n }\n for (const k in options) {\n if (options[k] === undefined) {\n delete options[k];\n }\n }\n return options;\n}\n\nexport async function speechToGenerateResponse(\n response: Response,\n responseFormat: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' | 'pcm' = 'mp3'\n): Promise<GenerateResponseData> {\n const resultArrayBuffer = await response.arrayBuffer();\n const resultBuffer = Buffer.from(new Uint8Array(resultArrayBuffer));\n const mediaType = RESPONSE_FORMAT_MEDIA_TYPES[responseFormat];\n return {\n message: {\n role: 'model',\n content: [\n {\n media: {\n contentType: mediaType,\n url: `data:${mediaType};base64,${resultBuffer.toString('base64')}`,\n },\n },\n ],\n },\n finishReason: 'stop',\n raw: response,\n };\n}\n\n/**\n * Method to define a new Genkit Model that is compatible with the Open AI Audio\n * API. \n *\n * These models are to be used to create audio speech from a given request.\n * @param params An object containing parameters for defining the OpenAI speech\n * model.\n * @param params.ai The Genkit AI instance.\n * @param params.name The name of the model.\n * @param params.client The OpenAI client instance.\n * @param params.modelRef Optional reference to the model's configuration and\n * custom options.\n\n * @returns the created {@link ModelAction}\n */\nexport function defineCompatOpenAISpeechModel<\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n>(params: {\n name: string;\n client: OpenAI;\n modelRef?: ModelReference<CustomOptions>;\n requestBuilder?: SpeechRequestBuilder;\n pluginOptions: PluginOptions;\n}): ModelAction {\n const {\n name,\n client: defaultClient,\n pluginOptions,\n modelRef,\n requestBuilder,\n } = params;\n const modelName = toModelName(name, pluginOptions?.name);\n const actionName = `${pluginOptions?.name ?? 'compat-oai'}/${modelName}`;\n\n return model(\n {\n name: actionName,\n ...modelRef?.info,\n configSchema: modelRef?.configSchema,\n },\n async (request, { abortSignal }) => {\n const ttsRequest = toTTSRequest(modelName, request, requestBuilder);\n const client = maybeCreateRequestScopedOpenAIClient(\n pluginOptions,\n request,\n defaultClient\n );\n const result = await client.audio.speech.create(ttsRequest, {\n signal: abortSignal,\n });\n return await speechToGenerateResponse(result, ttsRequest.response_format);\n }\n );\n}\n\n/** Speech generation ModelRef helper, with reasonable defaults for\n * OpenAI-compatible providers */\nexport function compatOaiSpeechModelRef<\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n>(params: {\n name: string;\n info?: ModelInfo;\n configSchema?: CustomOptions;\n config?: any;\n namespace?: string;\n}) {\n const {\n name,\n info = SPEECH_MODEL_INFO,\n configSchema,\n config = undefined,\n namespace,\n } = params;\n return modelRef({\n name,\n configSchema: configSchema || (SpeechConfigSchema as any),\n info,\n config,\n namespace,\n });\n}\n\nexport function toSttRequest(\n modelName: string,\n request: GenerateRequest,\n requestBuilder?: TranscriptionRequestBuilder\n): TranscriptionCreateParams {\n const message = new Message(request.messages[0]);\n const media = message.media;\n if (!media?.url) {\n throw new Error('No media found in the request');\n }\n const mediaBuffer = Buffer.from(\n media.url.slice(media.url.indexOf(',') + 1),\n 'base64'\n );\n const mediaFile = new File([mediaBuffer], 'input', {\n type:\n media.contentType ??\n media.url.slice('data:'.length, media.url.indexOf(';')),\n });\n const {\n temperature,\n version: modelVersion,\n maxOutputTokens,\n stopSequences,\n topK,\n topP,\n ...restOfConfig\n } = request.config ?? {};\n\n let options: TranscriptionCreateParams = {\n model: modelVersion ?? modelName,\n file: mediaFile,\n prompt: message.text,\n temperature,\n };\n if (requestBuilder) {\n requestBuilder(request, options);\n } else {\n options = {\n ...options,\n ...restOfConfig, // passthrough rest of the config\n };\n }\n const outputFormat = request.output?.format as 'json' | 'text' | 'media';\n const customFormat = request.config?.response_format;\n if (outputFormat && customFormat) {\n if (\n outputFormat === 'json' &&\n customFormat !== 'json' &&\n customFormat !== 'verbose_json'\n ) {\n throw new Error(\n `Custom response format ${customFormat} is not compatible with output format ${outputFormat}`\n );\n }\n }\n if (outputFormat === 'media') {\n throw new Error(`Output format ${outputFormat} is not supported.`);\n }\n options.response_format = customFormat || outputFormat || 'text';\n for (const k in options) {\n if (options[k] === undefined) {\n delete options[k];\n }\n }\n return options;\n}\n\nexport function transcriptionToGenerateResponse(\n result: Transcription | string\n): GenerateResponseData {\n return {\n message: {\n role: 'model',\n content: [\n {\n text: typeof result === 'string' ? result : result.text,\n },\n ],\n },\n finishReason: 'stop',\n raw: result,\n };\n}\n\n/**\n * Method to define a new Genkit Model that is compatible with Open AI\n * Transcriptions API. \n *\n * These models are to be used to transcribe audio to text.\n *\n * @param params An object containing parameters for defining the OpenAI\n * transcription model.\n * @param params.ai The Genkit AI instance.\n * @param params.name The name of the model.\n * @param params.client The OpenAI client instance.\n * @param params.modelRef Optional reference to the model's configuration and\n * custom options.\n\n * @returns the created {@link ModelAction}\n */\nexport function defineCompatOpenAITranscriptionModel<\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n>(params: {\n name: string;\n client: OpenAI;\n pluginOptions?: PluginOptions;\n modelRef?: ModelReference<CustomOptions>;\n requestBuilder?: TranscriptionRequestBuilder;\n}): ModelAction {\n const {\n name,\n pluginOptions,\n client: defaultClient,\n modelRef,\n requestBuilder,\n } = params;\n const modelName = toModelName(name, pluginOptions?.name);\n const actionName =\n modelRef?.name ?? `${pluginOptions?.name ?? 'compat-oai'}/${modelName}`;\n\n return model(\n {\n name: actionName,\n ...modelRef?.info,\n configSchema: modelRef?.configSchema,\n },\n async (request, { abortSignal }) => {\n const params = toSttRequest(modelName, request, requestBuilder);\n const client = maybeCreateRequestScopedOpenAIClient(\n pluginOptions,\n request,\n defaultClient\n );\n // Explicitly setting stream to false ensures we use the non-streaming overload\n const result = await client.audio.transcriptions.create(\n {\n ...params,\n stream: false,\n },\n { signal: abortSignal }\n );\n return transcriptionToGenerateResponse(result);\n }\n );\n}\n\n/** Transcription ModelRef helper, with reasonable defaults for\n * OpenAI-compatible providers */\nexport function compatOaiTranscriptionModelRef<\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n>(params: {\n name: string;\n info?: ModelInfo;\n configSchema?: CustomOptions;\n config?: any;\n namespace?: string;\n}) {\n const {\n name,\n info = TRANSCRIPTION_MODEL_INFO,\n configSchema,\n config = undefined,\n namespace,\n } = params;\n return modelRef({\n name,\n configSchema: configSchema || (TranscriptionConfigSchema as any),\n info,\n config,\n namespace,\n });\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAqBA,oBAAmE;AAEnE,oBAAsB;AAStB,mBAAkE;AAW3D,MAAM,2BAA2B;AAAA,EACtC,UAAU;AAAA,IACR,OAAO;AAAA,IACP,QAAQ,CAAC,QAAQ,MAAM;AAAA,IACvB,WAAW;AAAA,IACX,YAAY;AAAA,IACZ,OAAO;AAAA,EACT;AACF;AAEO,MAAM,oBAA+B;AAAA,EAC1C,UAAU;AAAA,IACR,OAAO;AAAA,IACP,QAAQ,CAAC,OAAO;AAAA,IAChB,WAAW;AAAA,IACX,YAAY;AAAA,IACZ,OAAO;AAAA,EACT;AACF;AAEA,MAAM,yBAAyB,gBAAE,OAAO;AAAA,EACtC,MAAM,gBAAE,OAAO;AAAA,EACf,mBAAmB,gBAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EAC7C,qBAAqB,gBAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EAC/C,WAAW,gBAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAG,EAAE,SAAS;AACjD,CAAC;AACM,MAAM,4BAA4B,2CAA6B,KAAK;AAAA,EACzE,aAAa;AACf,CAAC,EAAE,OAAO;AAAA,EACR,mBAAmB,gBAChB,MAAM,CAAC,gBAAE,QAAQ,MAAM,GAAG,sBAAsB,CAAC,EACjD,SAAS;AAAA,EACZ,SAAS,gBAAE,MAAM,gBAAE,IAAI,CAAC,EAAE,SAAS;AAAA,EACnC,UAAU,gBAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,yBAAyB,gBAAE,MAAM,gBAAE,KAAK,CAAC,QAAQ,SAAS,CAAC,CAAC,EAAE,SAAS;AAAA,EACvE,iBAAiB,gBACd,KAAK,CAAC,QAAQ,QAAQ,OAAO,gBAAgB,KAAK,CAAC,EACnD,SAAS;AAAA;AAEd,CAAC;AAEM,MAAM,qBAAqB,gBAAE,OAAO;AAAA,EACzC,OAAO,gBACJ,KAAK,CAAC,SAAS,QAAQ,SAAS,QAAQ,QAAQ,SAAS,CAAC,EAC1D,QAAQ,OAAO;AAAA,EAClB,OAAO,gBAAE,OAAO,EAAE,IAAI,IAAI,EAAE,IAAI,CAAG,EAAE,SAAS;AAAA,EAC9C,iBAAiB,gBACd,KAAK,CAAC,OAAO,QAAQ,OAAO,QAAQ,OAAO,KAAK,CAAC,EACjD,SAAS;AACd,CAAC;AAKM,MAAM,8BAA8B;AAAA,EACzC,KAAK;AAAA,EACL,MAAM;AAAA,EACN,KAAK;AAAA,EACL,MAAM;AAAA,EACN,KAAK;AAAA,EACL,KAAK;AACP;AAEO,SAAS,aACd,WACA,SACA,gBACoB;AACpB,QAAM;AAAA,IACJ;AAAA,IACA,SAAS;AAAA,IACT;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,GAAG;AAAA,EACL,IAAI,QAAQ,UAAU,CAAC;AAEvB,MAAI,UAA8B;AAAA,IAChC,OAAO,gBAAgB;AAAA,IACvB,OAAO,IAAI,sBAAQ,QAAQ,SAAS,CAAC,CAAC,EAAE;AAAA,IACxC,OAAO,SAAS;AAAA,EAClB;AACA,MAAI,gBAAgB;AAClB,mBAAe,SAAS,OAAO;AAAA,EACjC,OAAO;AACL,cAAU;AAAA,MACR,GAAG;AAAA,MACH,GAAG;AAAA;AAAA,IACL;AAAA,EACF;AACA,aAAW,KAAK,SAAS;AACvB,QAAI,QAAQ,CAAC,MAAM,QAAW;AAC5B,aAAO,QAAQ,CAAC;AAAA,IAClB;AAAA,EACF;AACA,SAAO;AACT;AAEA,eAAsB,yBACpB,UACA,iBAAkE,OACnC;AAC/B,QAAM,oBAAoB,MAAM,SAAS,YAAY;AACrD,QAAM,eAAe,OAAO,KAAK,IAAI,WAAW,iBAAiB,CAAC;AAClE,QAAM,YAAY,4BAA4B,cAAc;AAC5D,SAAO;AAAA,IACL,SAAS;AAAA,MACP,MAAM;AAAA,MACN,SAAS;AAAA,QACP;AAAA,UACE,OAAO;AAAA,YACL,aAAa;AAAA,YACb,KAAK,QAAQ,SAAS,WAAW,aAAa,SAAS,QAAQ,CAAC;AAAA,UAClE;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,IACA,cAAc;AAAA,IACd,KAAK;AAAA,EACP;AACF;AAiBO,SAAS,8BAEd,QAMc;AACd,QAAM;AAAA,IACJ;AAAA,IACA,QAAQ;AAAA,IACR;AAAA,IACA,UAAAA;AAAA,IACA;AAAA,EACF,IAAI;AACJ,QAAM,gBAAY,0BAAY,MAAM,eAAe,IAAI;AACvD,QAAM,aAAa,GAAG,eAAe,QAAQ,YAAY,IAAI,SAAS;AAEtE,aAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,GAAGA,WAAU;AAAA,MACb,cAAcA,WAAU;AAAA,IAC1B;AAAA,IACA,OAAO,SAAS,EAAE,YAAY,MAAM;AAClC,YAAM,aAAa,aAAa,WAAW,SAAS,cAAc;AAClE,YAAM,aAAS;AAAA,QACb;AAAA,QACA;AAAA,QACA;AAAA,MACF;AACA,YAAM,SAAS,MAAM,OAAO,MAAM,OAAO,OAAO,YAAY;AAAA,QAC1D,QAAQ;AAAA,MACV,CAAC;AACD,aAAO,MAAM,yBAAyB,QAAQ,WAAW,eAAe;AAAA,IAC1E;AAAA,EACF;AACF;AAIO,SAAS,wBAEd,QAMC;AACD,QAAM;AAAA,IACJ;AAAA,IACA,OAAO;AAAA,IACP;AAAA,IACA,SAAS;AAAA,IACT;AAAA,EACF,IAAI;AACJ,aAAO,wBAAS;AAAA,IACd;AAAA,IACA,cAAc,gBAAiB;AAAA,IAC/B;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AACH;AAEO,SAAS,aACd,WACA,SACA,gBAC2B;AAC3B,QAAM,UAAU,IAAI,sBAAQ,QAAQ,SAAS,CAAC,CAAC;AAC/C,QAAM,QAAQ,QAAQ;AACtB,MAAI,CAAC,OAAO,KAAK;AACf,UAAM,IAAI,MAAM,+BAA+B;AAAA,EACjD;AACA,QAAM,cAAc,OAAO;AAAA,IACzB,MAAM,IAAI,MAAM,MAAM,IAAI,QAAQ,GAAG,IAAI,CAAC;AAAA,IAC1C;AAAA,EACF;AACA,QAAM,YAAY,IAAI,KAAK,CAAC,WAAW,GAAG,SAAS;AAAA,IACjD,MACE,MAAM,eACN,MAAM,IAAI,MAAM,QAAQ,QAAQ,MAAM,IAAI,QAAQ,GAAG,CAAC;AAAA,EAC1D,CAAC;AACD,QAAM;AAAA,IACJ;AAAA,IACA,SAAS;AAAA,IACT;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,GAAG;AAAA,EACL,IAAI,QAAQ,UAAU,CAAC;AAEvB,MAAI,UAAqC;AAAA,IACvC,OAAO,gBAAgB;AAAA,IACvB,MAAM;AAAA,IACN,QAAQ,QAAQ;AAAA,IAChB;AAAA,EACF;AACA,MAAI,gBAAgB;AAClB,mBAAe,SAAS,OAAO;AAAA,EACjC,OAAO;AACL,cAAU;AAAA,MACR,GAAG;AAAA,MACH,GAAG;AAAA;AAAA,IACL;AAAA,EACF;AACA,QAAM,eAAe,QAAQ,QAAQ;AACrC,QAAM,eAAe,QAAQ,QAAQ;AACrC,MAAI,gBAAgB,cAAc;AAChC,QACE,iBAAiB,UACjB,iBAAiB,UACjB,iBAAiB,gBACjB;AACA,YAAM,IAAI;AAAA,QACR,0BAA0B,YAAY,yCAAyC,YAAY;AAAA,MAC7F;AAAA,IACF;AAAA,EACF;AACA,MAAI,iBAAiB,SAAS;AAC5B,UAAM,IAAI,MAAM,iBAAiB,YAAY,oBAAoB;AAAA,EACnE;AACA,UAAQ,kBAAkB,gBAAgB,gBAAgB;AAC1D,aAAW,KAAK,SAAS;AACvB,QAAI,QAAQ,CAAC,MAAM,QAAW;AAC5B,aAAO,QAAQ,CAAC;AAAA,IAClB;AAAA,EACF;AACA,SAAO;AACT;AAEO,SAAS,gCACd,QACsB;AACtB,SAAO;AAAA,IACL,SAAS;AAAA,MACP,MAAM;AAAA,MACN,SAAS;AAAA,QACP;AAAA,UACE,MAAM,OAAO,WAAW,WAAW,SAAS,OAAO;AAAA,QACrD;AAAA,MACF;AAAA,IACF;AAAA,IACA,cAAc;AAAA,IACd,KAAK;AAAA,EACP;AACF;AAkBO,SAAS,qCAEd,QAMc;AACd,QAAM;AAAA,IACJ;AAAA,IACA;AAAA,IACA,QAAQ;AAAA,IACR,UAAAA;AAAA,IACA;AAAA,EACF,IAAI;AACJ,QAAM,gBAAY,0BAAY,MAAM,eAAe,IAAI;AACvD,QAAM,aACJA,WAAU,QAAQ,GAAG,eAAe,QAAQ,YAAY,IAAI,SAAS;AAEvE,aAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,GAAGA,WAAU;AAAA,MACb,cAAcA,WAAU;AAAA,IAC1B;AAAA,IACA,OAAO,SAAS,EAAE,YAAY,MAAM;AAClC,YAAMC,UAAS,aAAa,WAAW,SAAS,cAAc;AAC9D,YAAM,aAAS;AAAA,QACb;AAAA,QACA;AAAA,QACA;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,OAAO,MAAM,eAAe;AAAA,QAC/C;AAAA,UACE,GAAGA;AAAA,UACH,QAAQ;AAAA,QACV;AAAA,QACA,EAAE,QAAQ,YAAY;AAAA,MACxB;AACA,aAAO,gCAAgC,MAAM;AAAA,IAC/C;AAAA,EACF;AACF;AAIO,SAAS,+BAEd,QAMC;AACD,QAAM;AAAA,IACJ;AAAA,IACA,OAAO;AAAA,IACP;AAAA,IACA,SAAS;AAAA,IACT;AAAA,EACF,IAAI;AACJ,aAAO,wBAAS;AAAA,IACd;AAAA,IACA,cAAc,gBAAiB;AAAA,IAC/B;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AACH;","names":["modelRef","params"]}
package/lib/audio.mjs CHANGED
@@ -80,7 +80,7 @@ function toTTSRequest(modelName, request, requestBuilder) {
80
80
  }
81
81
  return options;
82
82
  }
83
- async function toGenerateResponse(response, responseFormat = "mp3") {
83
+ async function speechToGenerateResponse(response, responseFormat = "mp3") {
84
84
  const resultArrayBuffer = await response.arrayBuffer();
85
85
  const resultBuffer = Buffer.from(new Uint8Array(resultArrayBuffer));
86
86
  const mediaType = RESPONSE_FORMAT_MEDIA_TYPES[responseFormat];
@@ -126,7 +126,7 @@ function defineCompatOpenAISpeechModel(params) {
126
126
  const result = await client.audio.speech.create(ttsRequest, {
127
127
  signal: abortSignal
128
128
  });
129
- return await toGenerateResponse(result, ttsRequest.response_format);
129
+ return await speechToGenerateResponse(result, ttsRequest.response_format);
130
130
  }
131
131
  );
132
132
  }
@@ -219,12 +219,13 @@ function transcriptionToGenerateResponse(result) {
219
219
  }
220
220
  function defineCompatOpenAITranscriptionModel(params) {
221
221
  const {
222
- name: modelName,
222
+ name,
223
223
  pluginOptions,
224
224
  client: defaultClient,
225
225
  modelRef: modelRef2,
226
226
  requestBuilder
227
227
  } = params;
228
+ const modelName = toModelName(name, pluginOptions?.name);
228
229
  const actionName = modelRef2?.name ?? `${pluginOptions?.name ?? "compat-oai"}/${modelName}`;
229
230
  return model(
230
231
  {
@@ -275,6 +276,10 @@ export {
275
276
  compatOaiSpeechModelRef,
276
277
  compatOaiTranscriptionModelRef,
277
278
  defineCompatOpenAISpeechModel,
278
- defineCompatOpenAITranscriptionModel
279
+ defineCompatOpenAITranscriptionModel,
280
+ speechToGenerateResponse,
281
+ toSttRequest,
282
+ toTTSRequest,
283
+ transcriptionToGenerateResponse
279
284
  };
280
285
  //# sourceMappingURL=audio.mjs.map
package/lib/audio.mjs.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/audio.ts"],"sourcesContent":["/**\n * Copyright 2024 The Fire Company\n * Copyright 2024 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\nimport type {\n GenerateRequest,\n GenerateResponseData,\n ModelReference,\n} from 'genkit';\nimport { GenerationCommonConfigSchema, Message, modelRef, z } from 'genkit';\nimport type { ModelAction, ModelInfo } from 'genkit/model';\nimport { model } from 'genkit/plugin';\nimport OpenAI from 'openai';\nimport { Response } from 'openai/core.mjs';\nimport type {\n SpeechCreateParams,\n Transcription,\n TranscriptionCreateParams,\n} from 'openai/resources/audio/index.mjs';\nimport { PluginOptions } from './index.js';\nimport { maybeCreateRequestScopedOpenAIClient, toModelName } from './utils.js';\n\nexport type SpeechRequestBuilder = (\n req: GenerateRequest,\n params: SpeechCreateParams\n) => void;\nexport type TranscriptionRequestBuilder = (\n req: GenerateRequest,\n params: TranscriptionCreateParams\n) => void;\n\nexport const TRANSCRIPTION_MODEL_INFO = {\n supports: {\n media: true,\n output: ['text', 'json'],\n multiturn: false,\n systemRole: false,\n tools: false,\n },\n};\n\nexport const SPEECH_MODEL_INFO: ModelInfo = {\n supports: {\n media: false,\n output: ['media'],\n multiturn: false,\n systemRole: false,\n tools: false,\n },\n};\n\nconst ChunkingStrategySchema = z.object({\n type: z.string(),\n prefix_padding_ms: z.number().int().optional(),\n silence_duration_ms: z.number().int().optional(),\n threshold: z.number().min(0).max(1.0).optional(),\n});\nexport const TranscriptionConfigSchema = GenerationCommonConfigSchema.pick({\n temperature: true,\n}).extend({\n chunking_strategy: z\n .union([z.literal('auto'), ChunkingStrategySchema])\n .optional(),\n include: z.array(z.any()).optional(),\n language: z.string().optional(),\n timestamp_granularities: z.array(z.enum(['word', 'segment'])).optional(),\n response_format: z\n .enum(['json', 'text', 'srt', 'verbose_json', 'vtt'])\n .optional(),\n // TODO stream support\n});\n\nexport const SpeechConfigSchema = z.object({\n voice: z\n .enum(['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'])\n .default('alloy'),\n speed: z.number().min(0.25).max(4.0).optional(),\n response_format: z\n .enum(['mp3', 'opus', 'aac', 'flac', 'wav', 'pcm'])\n .optional(),\n});\n\n/**\n * Supported media formats for Audio generation\n */\nexport const RESPONSE_FORMAT_MEDIA_TYPES = {\n mp3: 'audio/mpeg',\n opus: 'audio/opus',\n aac: 'audio/aac',\n flac: 'audio/flac',\n wav: 'audio/wav',\n pcm: 'audio/L16',\n};\n\nfunction toTTSRequest(\n modelName: string,\n request: GenerateRequest,\n requestBuilder?: SpeechRequestBuilder\n): SpeechCreateParams {\n const {\n voice,\n version: modelVersion,\n temperature,\n maxOutputTokens,\n stopSequences,\n topK,\n topP,\n ...restOfConfig\n } = request.config ?? {};\n\n let options: SpeechCreateParams = {\n model: modelVersion ?? modelName,\n input: new Message(request.messages[0]).text,\n voice: voice ?? 'alloy',\n };\n if (requestBuilder) {\n requestBuilder(request, options);\n } else {\n options = {\n ...options,\n ...restOfConfig, // passthorugh rest of the config\n };\n }\n for (const k in options) {\n if (options[k] === undefined) {\n delete options[k];\n }\n }\n return options;\n}\n\nasync function toGenerateResponse(\n response: Response,\n responseFormat: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' | 'pcm' = 'mp3'\n): Promise<GenerateResponseData> {\n const resultArrayBuffer = await response.arrayBuffer();\n const resultBuffer = Buffer.from(new Uint8Array(resultArrayBuffer));\n const mediaType = RESPONSE_FORMAT_MEDIA_TYPES[responseFormat];\n return {\n message: {\n role: 'model',\n content: [\n {\n media: {\n contentType: mediaType,\n url: `data:${mediaType};base64,${resultBuffer.toString('base64')}`,\n },\n },\n ],\n },\n finishReason: 'stop',\n raw: response,\n };\n}\n\n/**\n * Method to define a new Genkit Model that is compatible with the Open AI Audio\n * API. \n *\n * These models are to be used to create audio speech from a given request.\n * @param params An object containing parameters for defining the OpenAI speech\n * model.\n * @param params.ai The Genkit AI instance.\n * @param params.name The name of the model.\n * @param params.client The OpenAI client instance.\n * @param params.modelRef Optional reference to the model's configuration and\n * custom options.\n\n * @returns the created {@link ModelAction}\n */\nexport function defineCompatOpenAISpeechModel<\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n>(params: {\n name: string;\n client: OpenAI;\n modelRef?: ModelReference<CustomOptions>;\n requestBuilder?: SpeechRequestBuilder;\n pluginOptions: PluginOptions;\n}): ModelAction {\n const {\n name,\n client: defaultClient,\n pluginOptions,\n modelRef,\n requestBuilder,\n } = params;\n const modelName = toModelName(name, pluginOptions?.name);\n const actionName = `${pluginOptions?.name ?? 'compat-oai'}/${modelName}`;\n\n return model(\n {\n name: actionName,\n ...modelRef?.info,\n configSchema: modelRef?.configSchema,\n },\n async (request, { abortSignal }) => {\n const ttsRequest = toTTSRequest(modelName, request, requestBuilder);\n const client = maybeCreateRequestScopedOpenAIClient(\n pluginOptions,\n request,\n defaultClient\n );\n const result = await client.audio.speech.create(ttsRequest, {\n signal: abortSignal,\n });\n return await toGenerateResponse(result, ttsRequest.response_format);\n }\n );\n}\n\n/** Speech generation ModelRef helper, with reasonable defaults for\n * OpenAI-compatible providers */\nexport function compatOaiSpeechModelRef<\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n>(params: {\n name: string;\n info?: ModelInfo;\n configSchema?: CustomOptions;\n config?: any;\n namespace?: string;\n}) {\n const {\n name,\n info = SPEECH_MODEL_INFO,\n configSchema,\n config = undefined,\n namespace,\n } = params;\n return modelRef({\n name,\n configSchema: configSchema || (SpeechConfigSchema as any),\n info,\n config,\n namespace,\n });\n}\n\nfunction toSttRequest(\n modelName: string,\n request: GenerateRequest,\n requestBuilder?: TranscriptionRequestBuilder\n): TranscriptionCreateParams {\n const message = new Message(request.messages[0]);\n const media = message.media;\n if (!media?.url) {\n throw new Error('No media found in the request');\n }\n const mediaBuffer = Buffer.from(\n media.url.slice(media.url.indexOf(',') + 1),\n 'base64'\n );\n const mediaFile = new File([mediaBuffer], 'input', {\n type:\n media.contentType ??\n media.url.slice('data:'.length, media.url.indexOf(';')),\n });\n const {\n temperature,\n version: modelVersion,\n maxOutputTokens,\n stopSequences,\n topK,\n topP,\n ...restOfConfig\n } = request.config ?? {};\n\n let options: TranscriptionCreateParams = {\n model: modelVersion ?? modelName,\n file: mediaFile,\n prompt: message.text,\n temperature,\n };\n if (requestBuilder) {\n requestBuilder(request, options);\n } else {\n options = {\n ...options,\n ...restOfConfig, // passthrough rest of the config\n };\n }\n const outputFormat = request.output?.format as 'json' | 'text' | 'media';\n const customFormat = request.config?.response_format;\n if (outputFormat && customFormat) {\n if (\n outputFormat === 'json' &&\n customFormat !== 'json' &&\n customFormat !== 'verbose_json'\n ) {\n throw new Error(\n `Custom response format ${customFormat} is not compatible with output format ${outputFormat}`\n );\n }\n }\n if (outputFormat === 'media') {\n throw new Error(`Output format ${outputFormat} is not supported.`);\n }\n options.response_format = customFormat || outputFormat || 'text';\n for (const k in options) {\n if (options[k] === undefined) {\n delete options[k];\n }\n }\n return options;\n}\n\nfunction transcriptionToGenerateResponse(\n result: Transcription | string\n): GenerateResponseData {\n return {\n message: {\n role: 'model',\n content: [\n {\n text: typeof result === 'string' ? result : result.text,\n },\n ],\n },\n finishReason: 'stop',\n raw: result,\n };\n}\n\n/**\n * Method to define a new Genkit Model that is compatible with Open AI\n * Transcriptions API. \n *\n * These models are to be used to transcribe audio to text.\n *\n * @param params An object containing parameters for defining the OpenAI\n * transcription model.\n * @param params.ai The Genkit AI instance.\n * @param params.name The name of the model.\n * @param params.client The OpenAI client instance.\n * @param params.modelRef Optional reference to the model's configuration and\n * custom options.\n\n * @returns the created {@link ModelAction}\n */\nexport function defineCompatOpenAITranscriptionModel<\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n>(params: {\n name: string;\n client: OpenAI;\n pluginOptions?: PluginOptions;\n modelRef?: ModelReference<CustomOptions>;\n requestBuilder?: TranscriptionRequestBuilder;\n}): ModelAction {\n const {\n name: modelName,\n pluginOptions,\n client: defaultClient,\n modelRef,\n requestBuilder,\n } = params;\n\n const actionName =\n modelRef?.name ?? `${pluginOptions?.name ?? 'compat-oai'}/${modelName}`;\n return model(\n {\n name: actionName,\n ...modelRef?.info,\n configSchema: modelRef?.configSchema,\n },\n async (request, { abortSignal }) => {\n const params = toSttRequest(modelName, request, requestBuilder);\n const client = maybeCreateRequestScopedOpenAIClient(\n pluginOptions,\n request,\n defaultClient\n );\n // Explicitly setting stream to false ensures we use the non-streaming overload\n const result = await client.audio.transcriptions.create(\n {\n ...params,\n stream: false,\n },\n { signal: abortSignal }\n );\n return transcriptionToGenerateResponse(result);\n }\n );\n}\n\n/** Transcription ModelRef helper, with reasonable defaults for\n * OpenAI-compatible providers */\nexport function compatOaiTranscriptionModelRef<\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n>(params: {\n name: string;\n info?: ModelInfo;\n configSchema?: CustomOptions;\n config?: any;\n namespace?: string;\n}) {\n const {\n name,\n info = TRANSCRIPTION_MODEL_INFO,\n configSchema,\n config = undefined,\n namespace,\n } = params;\n return modelRef({\n name,\n configSchema: configSchema || (TranscriptionConfigSchema as any),\n info,\n config,\n namespace,\n });\n}\n"],"mappings":"AAqBA,SAAS,8BAA8B,SAAS,UAAU,SAAS;AAEnE,SAAS,aAAa;AAStB,SAAS,sCAAsC,mBAAmB;AAW3D,MAAM,2BAA2B;AAAA,EACtC,UAAU;AAAA,IACR,OAAO;AAAA,IACP,QAAQ,CAAC,QAAQ,MAAM;AAAA,IACvB,WAAW;AAAA,IACX,YAAY;AAAA,IACZ,OAAO;AAAA,EACT;AACF;AAEO,MAAM,oBAA+B;AAAA,EAC1C,UAAU;AAAA,IACR,OAAO;AAAA,IACP,QAAQ,CAAC,OAAO;AAAA,IAChB,WAAW;AAAA,IACX,YAAY;AAAA,IACZ,OAAO;AAAA,EACT;AACF;AAEA,MAAM,yBAAyB,EAAE,OAAO;AAAA,EACtC,MAAM,EAAE,OAAO;AAAA,EACf,mBAAmB,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EAC7C,qBAAqB,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EAC/C,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAG,EAAE,SAAS;AACjD,CAAC;AACM,MAAM,4BAA4B,6BAA6B,KAAK;AAAA,EACzE,aAAa;AACf,CAAC,EAAE,OAAO;AAAA,EACR,mBAAmB,EAChB,MAAM,CAAC,EAAE,QAAQ,MAAM,GAAG,sBAAsB,CAAC,EACjD,SAAS;AAAA,EACZ,SAAS,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS;AAAA,EACnC,UAAU,EAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,yBAAyB,EAAE,MAAM,EAAE,KAAK,CAAC,QAAQ,SAAS,CAAC,CAAC,EAAE,SAAS;AAAA,EACvE,iBAAiB,EACd,KAAK,CAAC,QAAQ,QAAQ,OAAO,gBAAgB,KAAK,CAAC,EACnD,SAAS;AAAA;AAEd,CAAC;AAEM,MAAM,qBAAqB,EAAE,OAAO;AAAA,EACzC,OAAO,EACJ,KAAK,CAAC,SAAS,QAAQ,SAAS,QAAQ,QAAQ,SAAS,CAAC,EAC1D,QAAQ,OAAO;AAAA,EAClB,OAAO,EAAE,OAAO,EAAE,IAAI,IAAI,EAAE,IAAI,CAAG,EAAE,SAAS;AAAA,EAC9C,iBAAiB,EACd,KAAK,CAAC,OAAO,QAAQ,OAAO,QAAQ,OAAO,KAAK,CAAC,EACjD,SAAS;AACd,CAAC;AAKM,MAAM,8BAA8B;AAAA,EACzC,KAAK;AAAA,EACL,MAAM;AAAA,EACN,KAAK;AAAA,EACL,MAAM;AAAA,EACN,KAAK;AAAA,EACL,KAAK;AACP;AAEA,SAAS,aACP,WACA,SACA,gBACoB;AACpB,QAAM;AAAA,IACJ;AAAA,IACA,SAAS;AAAA,IACT;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,GAAG;AAAA,EACL,IAAI,QAAQ,UAAU,CAAC;AAEvB,MAAI,UAA8B;AAAA,IAChC,OAAO,gBAAgB;AAAA,IACvB,OAAO,IAAI,QAAQ,QAAQ,SAAS,CAAC,CAAC,EAAE;AAAA,IACxC,OAAO,SAAS;AAAA,EAClB;AACA,MAAI,gBAAgB;AAClB,mBAAe,SAAS,OAAO;AAAA,EACjC,OAAO;AACL,cAAU;AAAA,MACR,GAAG;AAAA,MACH,GAAG;AAAA;AAAA,IACL;AAAA,EACF;AACA,aAAW,KAAK,SAAS;AACvB,QAAI,QAAQ,CAAC,MAAM,QAAW;AAC5B,aAAO,QAAQ,CAAC;AAAA,IAClB;AAAA,EACF;AACA,SAAO;AACT;AAEA,eAAe,mBACb,UACA,iBAAkE,OACnC;AAC/B,QAAM,oBAAoB,MAAM,SAAS,YAAY;AACrD,QAAM,eAAe,OAAO,KAAK,IAAI,WAAW,iBAAiB,CAAC;AAClE,QAAM,YAAY,4BAA4B,cAAc;AAC5D,SAAO;AAAA,IACL,SAAS;AAAA,MACP,MAAM;AAAA,MACN,SAAS;AAAA,QACP;AAAA,UACE,OAAO;AAAA,YACL,aAAa;AAAA,YACb,KAAK,QAAQ,SAAS,WAAW,aAAa,SAAS,QAAQ,CAAC;AAAA,UAClE;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,IACA,cAAc;AAAA,IACd,KAAK;AAAA,EACP;AACF;AAiBO,SAAS,8BAEd,QAMc;AACd,QAAM;AAAA,IACJ;AAAA,IACA,QAAQ;AAAA,IACR;AAAA,IACA,UAAAA;AAAA,IACA;AAAA,EACF,IAAI;AACJ,QAAM,YAAY,YAAY,MAAM,eAAe,IAAI;AACvD,QAAM,aAAa,GAAG,eAAe,QAAQ,YAAY,IAAI,SAAS;AAEtE,SAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,GAAGA,WAAU;AAAA,MACb,cAAcA,WAAU;AAAA,IAC1B;AAAA,IACA,OAAO,SAAS,EAAE,YAAY,MAAM;AAClC,YAAM,aAAa,aAAa,WAAW,SAAS,cAAc;AAClE,YAAM,SAAS;AAAA,QACb;AAAA,QACA;AAAA,QACA;AAAA,MACF;AACA,YAAM,SAAS,MAAM,OAAO,MAAM,OAAO,OAAO,YAAY;AAAA,QAC1D,QAAQ;AAAA,MACV,CAAC;AACD,aAAO,MAAM,mBAAmB,QAAQ,WAAW,eAAe;AAAA,IACpE;AAAA,EACF;AACF;AAIO,SAAS,wBAEd,QAMC;AACD,QAAM;AAAA,IACJ;AAAA,IACA,OAAO;AAAA,IACP;AAAA,IACA,SAAS;AAAA,IACT;AAAA,EACF,IAAI;AACJ,SAAO,SAAS;AAAA,IACd;AAAA,IACA,cAAc,gBAAiB;AAAA,IAC/B;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AACH;AAEA,SAAS,aACP,WACA,SACA,gBAC2B;AAC3B,QAAM,UAAU,IAAI,QAAQ,QAAQ,SAAS,CAAC,CAAC;AAC/C,QAAM,QAAQ,QAAQ;AACtB,MAAI,CAAC,OAAO,KAAK;AACf,UAAM,IAAI,MAAM,+BAA+B;AAAA,EACjD;AACA,QAAM,cAAc,OAAO;AAAA,IACzB,MAAM,IAAI,MAAM,MAAM,IAAI,QAAQ,GAAG,IAAI,CAAC;AAAA,IAC1C;AAAA,EACF;AACA,QAAM,YAAY,IAAI,KAAK,CAAC,WAAW,GAAG,SAAS;AAAA,IACjD,MACE,MAAM,eACN,MAAM,IAAI,MAAM,QAAQ,QAAQ,MAAM,IAAI,QAAQ,GAAG,CAAC;AAAA,EAC1D,CAAC;AACD,QAAM;AAAA,IACJ;AAAA,IACA,SAAS;AAAA,IACT;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,GAAG;AAAA,EACL,IAAI,QAAQ,UAAU,CAAC;AAEvB,MAAI,UAAqC;AAAA,IACvC,OAAO,gBAAgB;AAAA,IACvB,MAAM;AAAA,IACN,QAAQ,QAAQ;AAAA,IAChB;AAAA,EACF;AACA,MAAI,gBAAgB;AAClB,mBAAe,SAAS,OAAO;AAAA,EACjC,OAAO;AACL,cAAU;AAAA,MACR,GAAG;AAAA,MACH,GAAG;AAAA;AAAA,IACL;AAAA,EACF;AACA,QAAM,eAAe,QAAQ,QAAQ;AACrC,QAAM,eAAe,QAAQ,QAAQ;AACrC,MAAI,gBAAgB,cAAc;AAChC,QACE,iBAAiB,UACjB,iBAAiB,UACjB,iBAAiB,gBACjB;AACA,YAAM,IAAI;AAAA,QACR,0BAA0B,YAAY,yCAAyC,YAAY;AAAA,MAC7F;AAAA,IACF;AAAA,EACF;AACA,MAAI,iBAAiB,SAAS;AAC5B,UAAM,IAAI,MAAM,iBAAiB,YAAY,oBAAoB;AAAA,EACnE;AACA,UAAQ,kBAAkB,gBAAgB,gBAAgB;AAC1D,aAAW,KAAK,SAAS;AACvB,QAAI,QAAQ,CAAC,MAAM,QAAW;AAC5B,aAAO,QAAQ,CAAC;AAAA,IAClB;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,gCACP,QACsB;AACtB,SAAO;AAAA,IACL,SAAS;AAAA,MACP,MAAM;AAAA,MACN,SAAS;AAAA,QACP;AAAA,UACE,MAAM,OAAO,WAAW,WAAW,SAAS,OAAO;AAAA,QACrD;AAAA,MACF;AAAA,IACF;AAAA,IACA,cAAc;AAAA,IACd,KAAK;AAAA,EACP;AACF;AAkBO,SAAS,qCAEd,QAMc;AACd,QAAM;AAAA,IACJ,MAAM;AAAA,IACN;AAAA,IACA,QAAQ;AAAA,IACR,UAAAA;AAAA,IACA;AAAA,EACF,IAAI;AAEJ,QAAM,aACJA,WAAU,QAAQ,GAAG,eAAe,QAAQ,YAAY,IAAI,SAAS;AACvE,SAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,GAAGA,WAAU;AAAA,MACb,cAAcA,WAAU;AAAA,IAC1B;AAAA,IACA,OAAO,SAAS,EAAE,YAAY,MAAM;AAClC,YAAMC,UAAS,aAAa,WAAW,SAAS,cAAc;AAC9D,YAAM,SAAS;AAAA,QACb;AAAA,QACA;AAAA,QACA;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,OAAO,MAAM,eAAe;AAAA,QAC/C;AAAA,UACE,GAAGA;AAAA,UACH,QAAQ;AAAA,QACV;AAAA,QACA,EAAE,QAAQ,YAAY;AAAA,MACxB;AACA,aAAO,gCAAgC,MAAM;AAAA,IAC/C;AAAA,EACF;AACF;AAIO,SAAS,+BAEd,QAMC;AACD,QAAM;AAAA,IACJ;AAAA,IACA,OAAO;AAAA,IACP;AAAA,IACA,SAAS;AAAA,IACT;AAAA,EACF,IAAI;AACJ,SAAO,SAAS;AAAA,IACd;AAAA,IACA,cAAc,gBAAiB;AAAA,IAC/B;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AACH;","names":["modelRef","params"]}
1
+ {"version":3,"sources":["../src/audio.ts"],"sourcesContent":["/**\n * Copyright 2024 The Fire Company\n * Copyright 2024 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\nimport type {\n GenerateRequest,\n GenerateResponseData,\n ModelReference,\n} from 'genkit';\nimport { GenerationCommonConfigSchema, Message, modelRef, z } from 'genkit';\nimport type { ModelAction, ModelInfo } from 'genkit/model';\nimport { model } from 'genkit/plugin';\nimport OpenAI from 'openai';\nimport { Response } from 'openai/core.mjs';\nimport type {\n SpeechCreateParams,\n Transcription,\n TranscriptionCreateParams,\n} from 'openai/resources/audio/index.mjs';\nimport { PluginOptions } from './index.js';\nimport { maybeCreateRequestScopedOpenAIClient, toModelName } from './utils.js';\n\nexport type SpeechRequestBuilder = (\n req: GenerateRequest,\n params: SpeechCreateParams\n) => void;\nexport type TranscriptionRequestBuilder = (\n req: GenerateRequest,\n params: TranscriptionCreateParams\n) => void;\n\nexport const TRANSCRIPTION_MODEL_INFO = {\n supports: {\n media: true,\n output: ['text', 'json'],\n multiturn: false,\n systemRole: false,\n tools: false,\n },\n};\n\nexport const SPEECH_MODEL_INFO: ModelInfo = {\n supports: {\n media: false,\n output: ['media'],\n multiturn: false,\n systemRole: false,\n tools: false,\n },\n};\n\nconst ChunkingStrategySchema = z.object({\n type: z.string(),\n prefix_padding_ms: z.number().int().optional(),\n silence_duration_ms: z.number().int().optional(),\n threshold: z.number().min(0).max(1.0).optional(),\n});\nexport const TranscriptionConfigSchema = GenerationCommonConfigSchema.pick({\n temperature: true,\n}).extend({\n chunking_strategy: z\n .union([z.literal('auto'), ChunkingStrategySchema])\n .optional(),\n include: z.array(z.any()).optional(),\n language: z.string().optional(),\n timestamp_granularities: z.array(z.enum(['word', 'segment'])).optional(),\n response_format: z\n .enum(['json', 'text', 'srt', 'verbose_json', 'vtt'])\n .optional(),\n // TODO stream support\n});\n\nexport const SpeechConfigSchema = z.object({\n voice: z\n .enum(['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'])\n .default('alloy'),\n speed: z.number().min(0.25).max(4.0).optional(),\n response_format: z\n .enum(['mp3', 'opus', 'aac', 'flac', 'wav', 'pcm'])\n .optional(),\n});\n\n/**\n * Supported media formats for Audio generation\n */\nexport const RESPONSE_FORMAT_MEDIA_TYPES = {\n mp3: 'audio/mpeg',\n opus: 'audio/opus',\n aac: 'audio/aac',\n flac: 'audio/flac',\n wav: 'audio/wav',\n pcm: 'audio/L16',\n};\n\nexport function toTTSRequest(\n modelName: string,\n request: GenerateRequest,\n requestBuilder?: SpeechRequestBuilder\n): SpeechCreateParams {\n const {\n voice,\n version: modelVersion,\n temperature,\n maxOutputTokens,\n stopSequences,\n topK,\n topP,\n ...restOfConfig\n } = request.config ?? {};\n\n let options: SpeechCreateParams = {\n model: modelVersion ?? modelName,\n input: new Message(request.messages[0]).text,\n voice: voice ?? 'alloy',\n };\n if (requestBuilder) {\n requestBuilder(request, options);\n } else {\n options = {\n ...options,\n ...restOfConfig, // passthorugh rest of the config\n };\n }\n for (const k in options) {\n if (options[k] === undefined) {\n delete options[k];\n }\n }\n return options;\n}\n\nexport async function speechToGenerateResponse(\n response: Response,\n responseFormat: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' | 'pcm' = 'mp3'\n): Promise<GenerateResponseData> {\n const resultArrayBuffer = await response.arrayBuffer();\n const resultBuffer = Buffer.from(new Uint8Array(resultArrayBuffer));\n const mediaType = RESPONSE_FORMAT_MEDIA_TYPES[responseFormat];\n return {\n message: {\n role: 'model',\n content: [\n {\n media: {\n contentType: mediaType,\n url: `data:${mediaType};base64,${resultBuffer.toString('base64')}`,\n },\n },\n ],\n },\n finishReason: 'stop',\n raw: response,\n };\n}\n\n/**\n * Method to define a new Genkit Model that is compatible with the Open AI Audio\n * API. \n *\n * These models are to be used to create audio speech from a given request.\n * @param params An object containing parameters for defining the OpenAI speech\n * model.\n * @param params.ai The Genkit AI instance.\n * @param params.name The name of the model.\n * @param params.client The OpenAI client instance.\n * @param params.modelRef Optional reference to the model's configuration and\n * custom options.\n\n * @returns the created {@link ModelAction}\n */\nexport function defineCompatOpenAISpeechModel<\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n>(params: {\n name: string;\n client: OpenAI;\n modelRef?: ModelReference<CustomOptions>;\n requestBuilder?: SpeechRequestBuilder;\n pluginOptions: PluginOptions;\n}): ModelAction {\n const {\n name,\n client: defaultClient,\n pluginOptions,\n modelRef,\n requestBuilder,\n } = params;\n const modelName = toModelName(name, pluginOptions?.name);\n const actionName = `${pluginOptions?.name ?? 'compat-oai'}/${modelName}`;\n\n return model(\n {\n name: actionName,\n ...modelRef?.info,\n configSchema: modelRef?.configSchema,\n },\n async (request, { abortSignal }) => {\n const ttsRequest = toTTSRequest(modelName, request, requestBuilder);\n const client = maybeCreateRequestScopedOpenAIClient(\n pluginOptions,\n request,\n defaultClient\n );\n const result = await client.audio.speech.create(ttsRequest, {\n signal: abortSignal,\n });\n return await speechToGenerateResponse(result, ttsRequest.response_format);\n }\n );\n}\n\n/** Speech generation ModelRef helper, with reasonable defaults for\n * OpenAI-compatible providers */\nexport function compatOaiSpeechModelRef<\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n>(params: {\n name: string;\n info?: ModelInfo;\n configSchema?: CustomOptions;\n config?: any;\n namespace?: string;\n}) {\n const {\n name,\n info = SPEECH_MODEL_INFO,\n configSchema,\n config = undefined,\n namespace,\n } = params;\n return modelRef({\n name,\n configSchema: configSchema || (SpeechConfigSchema as any),\n info,\n config,\n namespace,\n });\n}\n\nexport function toSttRequest(\n modelName: string,\n request: GenerateRequest,\n requestBuilder?: TranscriptionRequestBuilder\n): TranscriptionCreateParams {\n const message = new Message(request.messages[0]);\n const media = message.media;\n if (!media?.url) {\n throw new Error('No media found in the request');\n }\n const mediaBuffer = Buffer.from(\n media.url.slice(media.url.indexOf(',') + 1),\n 'base64'\n );\n const mediaFile = new File([mediaBuffer], 'input', {\n type:\n media.contentType ??\n media.url.slice('data:'.length, media.url.indexOf(';')),\n });\n const {\n temperature,\n version: modelVersion,\n maxOutputTokens,\n stopSequences,\n topK,\n topP,\n ...restOfConfig\n } = request.config ?? {};\n\n let options: TranscriptionCreateParams = {\n model: modelVersion ?? modelName,\n file: mediaFile,\n prompt: message.text,\n temperature,\n };\n if (requestBuilder) {\n requestBuilder(request, options);\n } else {\n options = {\n ...options,\n ...restOfConfig, // passthrough rest of the config\n };\n }\n const outputFormat = request.output?.format as 'json' | 'text' | 'media';\n const customFormat = request.config?.response_format;\n if (outputFormat && customFormat) {\n if (\n outputFormat === 'json' &&\n customFormat !== 'json' &&\n customFormat !== 'verbose_json'\n ) {\n throw new Error(\n `Custom response format ${customFormat} is not compatible with output format ${outputFormat}`\n );\n }\n }\n if (outputFormat === 'media') {\n throw new Error(`Output format ${outputFormat} is not supported.`);\n }\n options.response_format = customFormat || outputFormat || 'text';\n for (const k in options) {\n if (options[k] === undefined) {\n delete options[k];\n }\n }\n return options;\n}\n\nexport function transcriptionToGenerateResponse(\n result: Transcription | string\n): GenerateResponseData {\n return {\n message: {\n role: 'model',\n content: [\n {\n text: typeof result === 'string' ? result : result.text,\n },\n ],\n },\n finishReason: 'stop',\n raw: result,\n };\n}\n\n/**\n * Method to define a new Genkit Model that is compatible with Open AI\n * Transcriptions API. \n *\n * These models are to be used to transcribe audio to text.\n *\n * @param params An object containing parameters for defining the OpenAI\n * transcription model.\n * @param params.ai The Genkit AI instance.\n * @param params.name The name of the model.\n * @param params.client The OpenAI client instance.\n * @param params.modelRef Optional reference to the model's configuration and\n * custom options.\n\n * @returns the created {@link ModelAction}\n */\nexport function defineCompatOpenAITranscriptionModel<\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n>(params: {\n name: string;\n client: OpenAI;\n pluginOptions?: PluginOptions;\n modelRef?: ModelReference<CustomOptions>;\n requestBuilder?: TranscriptionRequestBuilder;\n}): ModelAction {\n const {\n name,\n pluginOptions,\n client: defaultClient,\n modelRef,\n requestBuilder,\n } = params;\n const modelName = toModelName(name, pluginOptions?.name);\n const actionName =\n modelRef?.name ?? `${pluginOptions?.name ?? 'compat-oai'}/${modelName}`;\n\n return model(\n {\n name: actionName,\n ...modelRef?.info,\n configSchema: modelRef?.configSchema,\n },\n async (request, { abortSignal }) => {\n const params = toSttRequest(modelName, request, requestBuilder);\n const client = maybeCreateRequestScopedOpenAIClient(\n pluginOptions,\n request,\n defaultClient\n );\n // Explicitly setting stream to false ensures we use the non-streaming overload\n const result = await client.audio.transcriptions.create(\n {\n ...params,\n stream: false,\n },\n { signal: abortSignal }\n );\n return transcriptionToGenerateResponse(result);\n }\n );\n}\n\n/** Transcription ModelRef helper, with reasonable defaults for\n * OpenAI-compatible providers */\nexport function compatOaiTranscriptionModelRef<\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n>(params: {\n name: string;\n info?: ModelInfo;\n configSchema?: CustomOptions;\n config?: any;\n namespace?: string;\n}) {\n const {\n name,\n info = TRANSCRIPTION_MODEL_INFO,\n configSchema,\n config = undefined,\n namespace,\n } = params;\n return modelRef({\n name,\n configSchema: configSchema || (TranscriptionConfigSchema as any),\n info,\n config,\n namespace,\n });\n}\n"],"mappings":"AAqBA,SAAS,8BAA8B,SAAS,UAAU,SAAS;AAEnE,SAAS,aAAa;AAStB,SAAS,sCAAsC,mBAAmB;AAW3D,MAAM,2BAA2B;AAAA,EACtC,UAAU;AAAA,IACR,OAAO;AAAA,IACP,QAAQ,CAAC,QAAQ,MAAM;AAAA,IACvB,WAAW;AAAA,IACX,YAAY;AAAA,IACZ,OAAO;AAAA,EACT;AACF;AAEO,MAAM,oBAA+B;AAAA,EAC1C,UAAU;AAAA,IACR,OAAO;AAAA,IACP,QAAQ,CAAC,OAAO;AAAA,IAChB,WAAW;AAAA,IACX,YAAY;AAAA,IACZ,OAAO;AAAA,EACT;AACF;AAEA,MAAM,yBAAyB,EAAE,OAAO;AAAA,EACtC,MAAM,EAAE,OAAO;AAAA,EACf,mBAAmB,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EAC7C,qBAAqB,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EAC/C,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAG,EAAE,SAAS;AACjD,CAAC;AACM,MAAM,4BAA4B,6BAA6B,KAAK;AAAA,EACzE,aAAa;AACf,CAAC,EAAE,OAAO;AAAA,EACR,mBAAmB,EAChB,MAAM,CAAC,EAAE,QAAQ,MAAM,GAAG,sBAAsB,CAAC,EACjD,SAAS;AAAA,EACZ,SAAS,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS;AAAA,EACnC,UAAU,EAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,yBAAyB,EAAE,MAAM,EAAE,KAAK,CAAC,QAAQ,SAAS,CAAC,CAAC,EAAE,SAAS;AAAA,EACvE,iBAAiB,EACd,KAAK,CAAC,QAAQ,QAAQ,OAAO,gBAAgB,KAAK,CAAC,EACnD,SAAS;AAAA;AAEd,CAAC;AAEM,MAAM,qBAAqB,EAAE,OAAO;AAAA,EACzC,OAAO,EACJ,KAAK,CAAC,SAAS,QAAQ,SAAS,QAAQ,QAAQ,SAAS,CAAC,EAC1D,QAAQ,OAAO;AAAA,EAClB,OAAO,EAAE,OAAO,EAAE,IAAI,IAAI,EAAE,IAAI,CAAG,EAAE,SAAS;AAAA,EAC9C,iBAAiB,EACd,KAAK,CAAC,OAAO,QAAQ,OAAO,QAAQ,OAAO,KAAK,CAAC,EACjD,SAAS;AACd,CAAC;AAKM,MAAM,8BAA8B;AAAA,EACzC,KAAK;AAAA,EACL,MAAM;AAAA,EACN,KAAK;AAAA,EACL,MAAM;AAAA,EACN,KAAK;AAAA,EACL,KAAK;AACP;AAEO,SAAS,aACd,WACA,SACA,gBACoB;AACpB,QAAM;AAAA,IACJ;AAAA,IACA,SAAS;AAAA,IACT;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,GAAG;AAAA,EACL,IAAI,QAAQ,UAAU,CAAC;AAEvB,MAAI,UAA8B;AAAA,IAChC,OAAO,gBAAgB;AAAA,IACvB,OAAO,IAAI,QAAQ,QAAQ,SAAS,CAAC,CAAC,EAAE;AAAA,IACxC,OAAO,SAAS;AAAA,EAClB;AACA,MAAI,gBAAgB;AAClB,mBAAe,SAAS,OAAO;AAAA,EACjC,OAAO;AACL,cAAU;AAAA,MACR,GAAG;AAAA,MACH,GAAG;AAAA;AAAA,IACL;AAAA,EACF;AACA,aAAW,KAAK,SAAS;AACvB,QAAI,QAAQ,CAAC,MAAM,QAAW;AAC5B,aAAO,QAAQ,CAAC;AAAA,IAClB;AAAA,EACF;AACA,SAAO;AACT;AAEA,eAAsB,yBACpB,UACA,iBAAkE,OACnC;AAC/B,QAAM,oBAAoB,MAAM,SAAS,YAAY;AACrD,QAAM,eAAe,OAAO,KAAK,IAAI,WAAW,iBAAiB,CAAC;AAClE,QAAM,YAAY,4BAA4B,cAAc;AAC5D,SAAO;AAAA,IACL,SAAS;AAAA,MACP,MAAM;AAAA,MACN,SAAS;AAAA,QACP;AAAA,UACE,OAAO;AAAA,YACL,aAAa;AAAA,YACb,KAAK,QAAQ,SAAS,WAAW,aAAa,SAAS,QAAQ,CAAC;AAAA,UAClE;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,IACA,cAAc;AAAA,IACd,KAAK;AAAA,EACP;AACF;AAiBO,SAAS,8BAEd,QAMc;AACd,QAAM;AAAA,IACJ;AAAA,IACA,QAAQ;AAAA,IACR;AAAA,IACA,UAAAA;AAAA,IACA;AAAA,EACF,IAAI;AACJ,QAAM,YAAY,YAAY,MAAM,eAAe,IAAI;AACvD,QAAM,aAAa,GAAG,eAAe,QAAQ,YAAY,IAAI,SAAS;AAEtE,SAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,GAAGA,WAAU;AAAA,MACb,cAAcA,WAAU;AAAA,IAC1B;AAAA,IACA,OAAO,SAAS,EAAE,YAAY,MAAM;AAClC,YAAM,aAAa,aAAa,WAAW,SAAS,cAAc;AAClE,YAAM,SAAS;AAAA,QACb;AAAA,QACA;AAAA,QACA;AAAA,MACF;AACA,YAAM,SAAS,MAAM,OAAO,MAAM,OAAO,OAAO,YAAY;AAAA,QAC1D,QAAQ;AAAA,MACV,CAAC;AACD,aAAO,MAAM,yBAAyB,QAAQ,WAAW,eAAe;AAAA,IAC1E;AAAA,EACF;AACF;AAIO,SAAS,wBAEd,QAMC;AACD,QAAM;AAAA,IACJ;AAAA,IACA,OAAO;AAAA,IACP;AAAA,IACA,SAAS;AAAA,IACT;AAAA,EACF,IAAI;AACJ,SAAO,SAAS;AAAA,IACd;AAAA,IACA,cAAc,gBAAiB;AAAA,IAC/B;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AACH;AAEO,SAAS,aACd,WACA,SACA,gBAC2B;AAC3B,QAAM,UAAU,IAAI,QAAQ,QAAQ,SAAS,CAAC,CAAC;AAC/C,QAAM,QAAQ,QAAQ;AACtB,MAAI,CAAC,OAAO,KAAK;AACf,UAAM,IAAI,MAAM,+BAA+B;AAAA,EACjD;AACA,QAAM,cAAc,OAAO;AAAA,IACzB,MAAM,IAAI,MAAM,MAAM,IAAI,QAAQ,GAAG,IAAI,CAAC;AAAA,IAC1C;AAAA,EACF;AACA,QAAM,YAAY,IAAI,KAAK,CAAC,WAAW,GAAG,SAAS;AAAA,IACjD,MACE,MAAM,eACN,MAAM,IAAI,MAAM,QAAQ,QAAQ,MAAM,IAAI,QAAQ,GAAG,CAAC;AAAA,EAC1D,CAAC;AACD,QAAM;AAAA,IACJ;AAAA,IACA,SAAS;AAAA,IACT;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,GAAG;AAAA,EACL,IAAI,QAAQ,UAAU,CAAC;AAEvB,MAAI,UAAqC;AAAA,IACvC,OAAO,gBAAgB;AAAA,IACvB,MAAM;AAAA,IACN,QAAQ,QAAQ;AAAA,IAChB;AAAA,EACF;AACA,MAAI,gBAAgB;AAClB,mBAAe,SAAS,OAAO;AAAA,EACjC,OAAO;AACL,cAAU;AAAA,MACR,GAAG;AAAA,MACH,GAAG;AAAA;AAAA,IACL;AAAA,EACF;AACA,QAAM,eAAe,QAAQ,QAAQ;AACrC,QAAM,eAAe,QAAQ,QAAQ;AACrC,MAAI,gBAAgB,cAAc;AAChC,QACE,iBAAiB,UACjB,iBAAiB,UACjB,iBAAiB,gBACjB;AACA,YAAM,IAAI;AAAA,QACR,0BAA0B,YAAY,yCAAyC,YAAY;AAAA,MAC7F;AAAA,IACF;AAAA,EACF;AACA,MAAI,iBAAiB,SAAS;AAC5B,UAAM,IAAI,MAAM,iBAAiB,YAAY,oBAAoB;AAAA,EACnE;AACA,UAAQ,kBAAkB,gBAAgB,gBAAgB;AAC1D,aAAW,KAAK,SAAS;AACvB,QAAI,QAAQ,CAAC,MAAM,QAAW;AAC5B,aAAO,QAAQ,CAAC;AAAA,IAClB;AAAA,EACF;AACA,SAAO;AACT;AAEO,SAAS,gCACd,QACsB;AACtB,SAAO;AAAA,IACL,SAAS;AAAA,MACP,MAAM;AAAA,MACN,SAAS;AAAA,QACP;AAAA,UACE,MAAM,OAAO,WAAW,WAAW,SAAS,OAAO;AAAA,QACrD;AAAA,MACF;AAAA,IACF;AAAA,IACA,cAAc;AAAA,IACd,KAAK;AAAA,EACP;AACF;AAkBO,SAAS,qCAEd,QAMc;AACd,QAAM;AAAA,IACJ;AAAA,IACA;AAAA,IACA,QAAQ;AAAA,IACR,UAAAA;AAAA,IACA;AAAA,EACF,IAAI;AACJ,QAAM,YAAY,YAAY,MAAM,eAAe,IAAI;AACvD,QAAM,aACJA,WAAU,QAAQ,GAAG,eAAe,QAAQ,YAAY,IAAI,SAAS;AAEvE,SAAO;AAAA,IACL;AAAA,MACE,MAAM;AAAA,MACN,GAAGA,WAAU;AAAA,MACb,cAAcA,WAAU;AAAA,IAC1B;AAAA,IACA,OAAO,SAAS,EAAE,YAAY,MAAM;AAClC,YAAMC,UAAS,aAAa,WAAW,SAAS,cAAc;AAC9D,YAAM,SAAS;AAAA,QACb;AAAA,QACA;AAAA,QACA;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,OAAO,MAAM,eAAe;AAAA,QAC/C;AAAA,UACE,GAAGA;AAAA,UACH,QAAQ;AAAA,QACV;AAAA,QACA,EAAE,QAAQ,YAAY;AAAA,MACxB;AACA,aAAO,gCAAgC,MAAM;AAAA,IAC/C;AAAA,EACF;AACF;AAIO,SAAS,+BAEd,QAMC;AACD,QAAM;AAAA,IACJ;AAAA,IACA,OAAO;AAAA,IACP;AAAA,IACA,SAAS;AAAA,IACT;AAAA,EACF,IAAI;AACJ,SAAO,SAAS;AAAA,IACd;AAAA,IACA,cAAc,gBAAiB;AAAA,IAC/B;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AACH;","names":["modelRef","params"]}
@@ -1,7 +1,8 @@
1
1
  import { z } from 'genkit';
2
2
  import { ModelInfo, ModelReference } from 'genkit/model';
3
- import { M as ModelRequestBuilder } from '../audio-CUuCwm-y.mjs';
3
+ import { M as ModelRequestBuilder } from '../audio-CJ8rzf35.mjs';
4
4
  import 'openai';
5
+ import 'openai/core.mjs';
5
6
  import 'openai/resources/audio/index.mjs';
6
7
  import 'genkit/plugin';
7
8
  import 'genkit/registry';
@@ -1,7 +1,8 @@
1
1
  import { z } from 'genkit';
2
2
  import { ModelInfo, ModelReference } from 'genkit/model';
3
- import { M as ModelRequestBuilder } from '../audio-CUuCwm-y.js';
3
+ import { M as ModelRequestBuilder } from '../audio-CJ8rzf35.js';
4
4
  import 'openai';
5
+ import 'openai/core.mjs';
5
6
  import 'openai/resources/audio/index.mjs';
6
7
  import 'genkit/plugin';
7
8
  import 'genkit/registry';
@@ -1,9 +1,10 @@
1
1
  import { z, ModelReference } from 'genkit';
2
2
  import { GenkitPluginV2 } from 'genkit/plugin';
3
- import { P as PluginOptions } from '../audio-CUuCwm-y.mjs';
3
+ import { P as PluginOptions } from '../audio-CJ8rzf35.mjs';
4
4
  import { SUPPORTED_DEEPSEEK_MODELS, DeepSeekChatCompletionConfigSchema } from './deepseek.mjs';
5
5
  import 'genkit/model';
6
6
  import 'openai';
7
+ import 'openai/core.mjs';
7
8
  import 'openai/resources/audio/index.mjs';
8
9
  import 'genkit/registry';
9
10
  import 'openai/resources/images.mjs';
@@ -1,9 +1,10 @@
1
1
  import { z, ModelReference } from 'genkit';
2
2
  import { GenkitPluginV2 } from 'genkit/plugin';
3
- import { P as PluginOptions } from '../audio-CUuCwm-y.js';
3
+ import { P as PluginOptions } from '../audio-CJ8rzf35.js';
4
4
  import { SUPPORTED_DEEPSEEK_MODELS, DeepSeekChatCompletionConfigSchema } from './deepseek.js';
5
5
  import 'genkit/model';
6
6
  import 'openai';
7
+ import 'openai/core.mjs';
7
8
  import 'openai/resources/audio/index.mjs';
8
9
  import 'genkit/registry';
9
10
  import 'openai/resources/images.mjs';
@@ -1,7 +1,8 @@
1
1
  import 'genkit';
2
2
  import 'openai';
3
- export { d as defineCompatOpenAIEmbedder } from './audio-CUuCwm-y.mjs';
3
+ export { d as defineCompatOpenAIEmbedder } from './audio-CJ8rzf35.mjs';
4
4
  import 'genkit/model';
5
+ import 'openai/core.mjs';
5
6
  import 'openai/resources/audio/index.mjs';
6
7
  import 'genkit/plugin';
7
8
  import 'genkit/registry';
package/lib/embedder.d.ts CHANGED
@@ -1,7 +1,8 @@
1
1
  import 'genkit';
2
2
  import 'openai';
3
- export { d as defineCompatOpenAIEmbedder } from './audio-CUuCwm-y.js';
3
+ export { d as defineCompatOpenAIEmbedder } from './audio-CJ8rzf35.js';
4
4
  import 'genkit/model';
5
+ import 'openai/core.mjs';
5
6
  import 'openai/resources/audio/index.mjs';
6
7
  import 'genkit/plugin';
7
8
  import 'genkit/registry';
package/lib/image.d.mts CHANGED
@@ -2,7 +2,8 @@ import 'genkit';
2
2
  import 'genkit/model';
3
3
  import 'openai';
4
4
  import 'openai/resources/images.mjs';
5
- export { b as IMAGE_GENERATION_MODEL_INFO, I as ImageGenerationCommonConfigSchema, a as ImageRequestBuilder, e as compatOaiImageModelRef, c as defineCompatOpenAIImageModel } from './audio-CUuCwm-y.mjs';
5
+ export { b as IMAGE_GENERATION_MODEL_INFO, I as ImageGenerationCommonConfigSchema, a as ImageRequestBuilder, e as compatOaiImageModelRef, c as defineCompatOpenAIImageModel } from './audio-CJ8rzf35.mjs';
6
+ import 'openai/core.mjs';
6
7
  import 'openai/resources/audio/index.mjs';
7
8
  import 'genkit/plugin';
8
9
  import 'genkit/registry';
package/lib/image.d.ts CHANGED
@@ -2,7 +2,8 @@ import 'genkit';
2
2
  import 'genkit/model';
3
3
  import 'openai';
4
4
  import 'openai/resources/images.mjs';
5
- export { b as IMAGE_GENERATION_MODEL_INFO, I as ImageGenerationCommonConfigSchema, a as ImageRequestBuilder, e as compatOaiImageModelRef, c as defineCompatOpenAIImageModel } from './audio-CUuCwm-y.js';
5
+ export { b as IMAGE_GENERATION_MODEL_INFO, I as ImageGenerationCommonConfigSchema, a as ImageRequestBuilder, e as compatOaiImageModelRef, c as defineCompatOpenAIImageModel } from './audio-CJ8rzf35.js';
6
+ import 'openai/core.mjs';
6
7
  import 'openai/resources/audio/index.mjs';
7
8
  import 'genkit/plugin';
8
9
  import 'genkit/registry';
package/lib/index.d.mts CHANGED
@@ -2,8 +2,9 @@ import 'genkit/plugin';
2
2
  import 'genkit';
3
3
  import 'genkit/registry';
4
4
  import 'openai';
5
- export { C as ChatCompletionCommonConfigSchema, I as ImageGenerationCommonConfigSchema, a as ImageRequestBuilder, M as ModelRequestBuilder, P as PluginOptions, S as SpeechConfigSchema, v as SpeechRequestBuilder, T as TranscriptionConfigSchema, w as TranscriptionRequestBuilder, e as compatOaiImageModelRef, n as compatOaiModelRef, q as compatOaiSpeechModelRef, r as compatOaiTranscriptionModelRef, p as default, d as defineCompatOpenAIEmbedder, c as defineCompatOpenAIImageModel, m as defineCompatOpenAIModel, s as defineCompatOpenAISpeechModel, u as defineCompatOpenAITranscriptionModel, p as openAICompatible, o as openAIModelRunner } from './audio-CUuCwm-y.mjs';
5
+ export { C as ChatCompletionCommonConfigSchema, I as ImageGenerationCommonConfigSchema, a as ImageRequestBuilder, M as ModelRequestBuilder, P as PluginOptions, S as SpeechConfigSchema, v as SpeechRequestBuilder, T as TranscriptionConfigSchema, w as TranscriptionRequestBuilder, e as compatOaiImageModelRef, n as compatOaiModelRef, q as compatOaiSpeechModelRef, r as compatOaiTranscriptionModelRef, p as default, d as defineCompatOpenAIEmbedder, c as defineCompatOpenAIImageModel, m as defineCompatOpenAIModel, s as defineCompatOpenAISpeechModel, u as defineCompatOpenAITranscriptionModel, p as openAICompatible, o as openAIModelRunner } from './audio-CJ8rzf35.mjs';
6
6
  import 'genkit/model';
7
+ import 'openai/core.mjs';
7
8
  import 'openai/resources/audio/index.mjs';
8
9
  import 'openai/resources/images.mjs';
9
10
  import 'openai/resources/index.mjs';
package/lib/index.d.ts CHANGED
@@ -2,8 +2,9 @@ import 'genkit/plugin';
2
2
  import 'genkit';
3
3
  import 'genkit/registry';
4
4
  import 'openai';
5
- export { C as ChatCompletionCommonConfigSchema, I as ImageGenerationCommonConfigSchema, a as ImageRequestBuilder, M as ModelRequestBuilder, P as PluginOptions, S as SpeechConfigSchema, v as SpeechRequestBuilder, T as TranscriptionConfigSchema, w as TranscriptionRequestBuilder, e as compatOaiImageModelRef, n as compatOaiModelRef, q as compatOaiSpeechModelRef, r as compatOaiTranscriptionModelRef, p as default, d as defineCompatOpenAIEmbedder, c as defineCompatOpenAIImageModel, m as defineCompatOpenAIModel, s as defineCompatOpenAISpeechModel, u as defineCompatOpenAITranscriptionModel, p as openAICompatible, o as openAIModelRunner } from './audio-CUuCwm-y.js';
5
+ export { C as ChatCompletionCommonConfigSchema, I as ImageGenerationCommonConfigSchema, a as ImageRequestBuilder, M as ModelRequestBuilder, P as PluginOptions, S as SpeechConfigSchema, v as SpeechRequestBuilder, T as TranscriptionConfigSchema, w as TranscriptionRequestBuilder, e as compatOaiImageModelRef, n as compatOaiModelRef, q as compatOaiSpeechModelRef, r as compatOaiTranscriptionModelRef, p as default, d as defineCompatOpenAIEmbedder, c as defineCompatOpenAIImageModel, m as defineCompatOpenAIModel, s as defineCompatOpenAISpeechModel, u as defineCompatOpenAITranscriptionModel, p as openAICompatible, o as openAIModelRunner } from './audio-CJ8rzf35.js';
6
6
  import 'genkit/model';
7
+ import 'openai/core.mjs';
7
8
  import 'openai/resources/audio/index.mjs';
8
9
  import 'openai/resources/images.mjs';
9
10
  import 'openai/resources/index.mjs';
package/lib/model.d.mts CHANGED
@@ -2,7 +2,8 @@ import 'genkit';
2
2
  import 'genkit/model';
3
3
  import 'openai';
4
4
  import 'openai/resources/index.mjs';
5
- export { C as ChatCompletionCommonConfigSchema, M as ModelRequestBuilder, n as compatOaiModelRef, m as defineCompatOpenAIModel, j as fromOpenAIChoice, k as fromOpenAIChunkChoice, i as fromOpenAIToolCall, o as openAIModelRunner, h as toOpenAIMessages, l as toOpenAIRequestBody, t as toOpenAIRole, g as toOpenAITextAndMedia, f as toOpenAITool } from './audio-CUuCwm-y.mjs';
5
+ export { C as ChatCompletionCommonConfigSchema, M as ModelRequestBuilder, n as compatOaiModelRef, m as defineCompatOpenAIModel, j as fromOpenAIChoice, k as fromOpenAIChunkChoice, i as fromOpenAIToolCall, o as openAIModelRunner, h as toOpenAIMessages, l as toOpenAIRequestBody, t as toOpenAIRole, g as toOpenAITextAndMedia, f as toOpenAITool } from './audio-CJ8rzf35.mjs';
6
+ import 'openai/core.mjs';
6
7
  import 'openai/resources/audio/index.mjs';
7
8
  import 'genkit/plugin';
8
9
  import 'genkit/registry';
package/lib/model.d.ts CHANGED
@@ -2,7 +2,8 @@ import 'genkit';
2
2
  import 'genkit/model';
3
3
  import 'openai';
4
4
  import 'openai/resources/index.mjs';
5
- export { C as ChatCompletionCommonConfigSchema, M as ModelRequestBuilder, n as compatOaiModelRef, m as defineCompatOpenAIModel, j as fromOpenAIChoice, k as fromOpenAIChunkChoice, i as fromOpenAIToolCall, o as openAIModelRunner, h as toOpenAIMessages, l as toOpenAIRequestBody, t as toOpenAIRole, g as toOpenAITextAndMedia, f as toOpenAITool } from './audio-CUuCwm-y.js';
5
+ export { C as ChatCompletionCommonConfigSchema, M as ModelRequestBuilder, n as compatOaiModelRef, m as defineCompatOpenAIModel, j as fromOpenAIChoice, k as fromOpenAIChunkChoice, i as fromOpenAIToolCall, o as openAIModelRunner, h as toOpenAIMessages, l as toOpenAIRequestBody, t as toOpenAIRole, g as toOpenAITextAndMedia, f as toOpenAITool } from './audio-CJ8rzf35.js';
6
+ import 'openai/core.mjs';
6
7
  import 'openai/resources/audio/index.mjs';
7
8
  import 'genkit/plugin';
8
9
  import 'genkit/registry';
package/lib/model.js CHANGED
@@ -103,7 +103,7 @@ function toOpenAITextAndMedia(part, visualDetailLevel) {
103
103
  contentType = extracted.contentType;
104
104
  }
105
105
  }
106
- if (isImageContentType(contentType)) {
106
+ if (!contentType || isImageContentType(contentType)) {
107
107
  return {
108
108
  type: "image_url",
109
109
  image_url: {