@ai-sdk/openai 1.3.7 → 1.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1611,16 +1611,196 @@ var openaiImageResponseSchema = z5.object({
1611
1611
  data: z5.array(z5.object({ b64_json: z5.string() }))
1612
1612
  });
1613
1613
 
1614
- // src/responses/openai-responses-language-model.ts
1614
+ // src/openai-transcription-model.ts
1615
1615
  import {
1616
1616
  combineHeaders as combineHeaders5,
1617
- createEventSourceResponseHandler as createEventSourceResponseHandler3,
1617
+ convertBase64ToUint8Array,
1618
1618
  createJsonResponseHandler as createJsonResponseHandler5,
1619
- generateId as generateId2,
1620
1619
  parseProviderOptions,
1621
- postJsonToApi as postJsonToApi5
1620
+ postFormDataToApi
1622
1621
  } from "@ai-sdk/provider-utils";
1623
1622
  import { z as z6 } from "zod";
1623
+ var OpenAIProviderOptionsSchema = z6.object({
1624
+ include: z6.array(z6.string()).optional().describe(
1625
+ "Additional information to include in the transcription response."
1626
+ ),
1627
+ language: z6.string().optional().describe("The language of the input audio in ISO-639-1 format."),
1628
+ prompt: z6.string().optional().describe(
1629
+ "An optional text to guide the model's style or continue a previous audio segment."
1630
+ ),
1631
+ temperature: z6.number().min(0).max(1).optional().default(0).describe("The sampling temperature, between 0 and 1."),
1632
+ timestampGranularities: z6.array(z6.enum(["word", "segment"])).optional().default(["segment"]).describe(
1633
+ "The timestamp granularities to populate for this transcription."
1634
+ )
1635
+ });
1636
+ var languageMap = {
1637
+ afrikaans: "af",
1638
+ arabic: "ar",
1639
+ armenian: "hy",
1640
+ azerbaijani: "az",
1641
+ belarusian: "be",
1642
+ bosnian: "bs",
1643
+ bulgarian: "bg",
1644
+ catalan: "ca",
1645
+ chinese: "zh",
1646
+ croatian: "hr",
1647
+ czech: "cs",
1648
+ danish: "da",
1649
+ dutch: "nl",
1650
+ english: "en",
1651
+ estonian: "et",
1652
+ finnish: "fi",
1653
+ french: "fr",
1654
+ galician: "gl",
1655
+ german: "de",
1656
+ greek: "el",
1657
+ hebrew: "he",
1658
+ hindi: "hi",
1659
+ hungarian: "hu",
1660
+ icelandic: "is",
1661
+ indonesian: "id",
1662
+ italian: "it",
1663
+ japanese: "ja",
1664
+ kannada: "kn",
1665
+ kazakh: "kk",
1666
+ korean: "ko",
1667
+ latvian: "lv",
1668
+ lithuanian: "lt",
1669
+ macedonian: "mk",
1670
+ malay: "ms",
1671
+ marathi: "mr",
1672
+ maori: "mi",
1673
+ nepali: "ne",
1674
+ norwegian: "no",
1675
+ persian: "fa",
1676
+ polish: "pl",
1677
+ portuguese: "pt",
1678
+ romanian: "ro",
1679
+ russian: "ru",
1680
+ serbian: "sr",
1681
+ slovak: "sk",
1682
+ slovenian: "sl",
1683
+ spanish: "es",
1684
+ swahili: "sw",
1685
+ swedish: "sv",
1686
+ tagalog: "tl",
1687
+ tamil: "ta",
1688
+ thai: "th",
1689
+ turkish: "tr",
1690
+ ukrainian: "uk",
1691
+ urdu: "ur",
1692
+ vietnamese: "vi",
1693
+ welsh: "cy"
1694
+ };
1695
+ var OpenAITranscriptionModel = class {
1696
+ constructor(modelId, config) {
1697
+ this.modelId = modelId;
1698
+ this.config = config;
1699
+ this.specificationVersion = "v1";
1700
+ }
1701
+ get provider() {
1702
+ return this.config.provider;
1703
+ }
1704
+ getArgs({
1705
+ audio,
1706
+ mediaType,
1707
+ providerOptions
1708
+ }) {
1709
+ const warnings = [];
1710
+ const openAIOptions = parseProviderOptions({
1711
+ provider: "openai",
1712
+ providerOptions,
1713
+ schema: OpenAIProviderOptionsSchema
1714
+ });
1715
+ const formData = new FormData();
1716
+ const blob = audio instanceof Uint8Array ? new Blob([audio]) : new Blob([convertBase64ToUint8Array(audio)]);
1717
+ formData.append("model", this.modelId);
1718
+ formData.append("file", new File([blob], "audio", { type: mediaType }));
1719
+ if (openAIOptions) {
1720
+ const transcriptionModelOptions = {
1721
+ include: openAIOptions.include,
1722
+ language: openAIOptions.language,
1723
+ prompt: openAIOptions.prompt,
1724
+ temperature: openAIOptions.temperature,
1725
+ timestamp_granularities: openAIOptions.timestampGranularities
1726
+ };
1727
+ for (const key in transcriptionModelOptions) {
1728
+ const value = transcriptionModelOptions[key];
1729
+ if (value !== void 0) {
1730
+ formData.append(key, value);
1731
+ }
1732
+ }
1733
+ }
1734
+ return {
1735
+ formData,
1736
+ warnings
1737
+ };
1738
+ }
1739
+ async doGenerate(options) {
1740
+ var _a, _b, _c, _d, _e, _f;
1741
+ const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
1742
+ const { formData, warnings } = this.getArgs(options);
1743
+ const {
1744
+ value: response,
1745
+ responseHeaders,
1746
+ rawValue: rawResponse
1747
+ } = await postFormDataToApi({
1748
+ url: this.config.url({
1749
+ path: "/audio/transcriptions",
1750
+ modelId: this.modelId
1751
+ }),
1752
+ headers: combineHeaders5(this.config.headers(), options.headers),
1753
+ formData,
1754
+ failedResponseHandler: openaiFailedResponseHandler,
1755
+ successfulResponseHandler: createJsonResponseHandler5(
1756
+ openaiTranscriptionResponseSchema
1757
+ ),
1758
+ abortSignal: options.abortSignal,
1759
+ fetch: this.config.fetch
1760
+ });
1761
+ const language = response.language != null && response.language in languageMap ? languageMap[response.language] : void 0;
1762
+ return {
1763
+ text: response.text,
1764
+ segments: (_e = (_d = response.words) == null ? void 0 : _d.map((word) => ({
1765
+ text: word.word,
1766
+ startSecond: word.start,
1767
+ endSecond: word.end
1768
+ }))) != null ? _e : [],
1769
+ language,
1770
+ durationInSeconds: (_f = response.duration) != null ? _f : void 0,
1771
+ warnings,
1772
+ response: {
1773
+ timestamp: currentDate,
1774
+ modelId: this.modelId,
1775
+ headers: responseHeaders,
1776
+ body: rawResponse
1777
+ }
1778
+ };
1779
+ }
1780
+ };
1781
+ var openaiTranscriptionResponseSchema = z6.object({
1782
+ text: z6.string(),
1783
+ language: z6.string().nullish(),
1784
+ duration: z6.number().nullish(),
1785
+ words: z6.array(
1786
+ z6.object({
1787
+ word: z6.string(),
1788
+ start: z6.number(),
1789
+ end: z6.number()
1790
+ })
1791
+ ).nullish()
1792
+ });
1793
+
1794
+ // src/responses/openai-responses-language-model.ts
1795
+ import {
1796
+ combineHeaders as combineHeaders6,
1797
+ createEventSourceResponseHandler as createEventSourceResponseHandler3,
1798
+ createJsonResponseHandler as createJsonResponseHandler6,
1799
+ generateId as generateId2,
1800
+ parseProviderOptions as parseProviderOptions2,
1801
+ postJsonToApi as postJsonToApi5
1802
+ } from "@ai-sdk/provider-utils";
1803
+ import { z as z7 } from "zod";
1624
1804
 
1625
1805
  // src/responses/convert-to-openai-responses-messages.ts
1626
1806
  import {
@@ -1911,7 +2091,7 @@ var OpenAIResponsesLanguageModel = class {
1911
2091
  systemMessageMode: modelConfig.systemMessageMode
1912
2092
  });
1913
2093
  warnings.push(...messageWarnings);
1914
- const openaiOptions = parseProviderOptions({
2094
+ const openaiOptions = parseProviderOptions2({
1915
2095
  provider: "openai",
1916
2096
  providerOptions: providerMetadata,
1917
2097
  schema: openaiResponsesProviderOptionsSchema
@@ -2036,53 +2216,53 @@ var OpenAIResponsesLanguageModel = class {
2036
2216
  path: "/responses",
2037
2217
  modelId: this.modelId
2038
2218
  }),
2039
- headers: combineHeaders5(this.config.headers(), options.headers),
2219
+ headers: combineHeaders6(this.config.headers(), options.headers),
2040
2220
  body,
2041
2221
  failedResponseHandler: openaiFailedResponseHandler,
2042
- successfulResponseHandler: createJsonResponseHandler5(
2043
- z6.object({
2044
- id: z6.string(),
2045
- created_at: z6.number(),
2046
- model: z6.string(),
2047
- output: z6.array(
2048
- z6.discriminatedUnion("type", [
2049
- z6.object({
2050
- type: z6.literal("message"),
2051
- role: z6.literal("assistant"),
2052
- content: z6.array(
2053
- z6.object({
2054
- type: z6.literal("output_text"),
2055
- text: z6.string(),
2056
- annotations: z6.array(
2057
- z6.object({
2058
- type: z6.literal("url_citation"),
2059
- start_index: z6.number(),
2060
- end_index: z6.number(),
2061
- url: z6.string(),
2062
- title: z6.string()
2222
+ successfulResponseHandler: createJsonResponseHandler6(
2223
+ z7.object({
2224
+ id: z7.string(),
2225
+ created_at: z7.number(),
2226
+ model: z7.string(),
2227
+ output: z7.array(
2228
+ z7.discriminatedUnion("type", [
2229
+ z7.object({
2230
+ type: z7.literal("message"),
2231
+ role: z7.literal("assistant"),
2232
+ content: z7.array(
2233
+ z7.object({
2234
+ type: z7.literal("output_text"),
2235
+ text: z7.string(),
2236
+ annotations: z7.array(
2237
+ z7.object({
2238
+ type: z7.literal("url_citation"),
2239
+ start_index: z7.number(),
2240
+ end_index: z7.number(),
2241
+ url: z7.string(),
2242
+ title: z7.string()
2063
2243
  })
2064
2244
  )
2065
2245
  })
2066
2246
  )
2067
2247
  }),
2068
- z6.object({
2069
- type: z6.literal("function_call"),
2070
- call_id: z6.string(),
2071
- name: z6.string(),
2072
- arguments: z6.string()
2248
+ z7.object({
2249
+ type: z7.literal("function_call"),
2250
+ call_id: z7.string(),
2251
+ name: z7.string(),
2252
+ arguments: z7.string()
2073
2253
  }),
2074
- z6.object({
2075
- type: z6.literal("web_search_call")
2254
+ z7.object({
2255
+ type: z7.literal("web_search_call")
2076
2256
  }),
2077
- z6.object({
2078
- type: z6.literal("computer_call")
2257
+ z7.object({
2258
+ type: z7.literal("computer_call")
2079
2259
  }),
2080
- z6.object({
2081
- type: z6.literal("reasoning")
2260
+ z7.object({
2261
+ type: z7.literal("reasoning")
2082
2262
  })
2083
2263
  ])
2084
2264
  ),
2085
- incomplete_details: z6.object({ reason: z6.string() }).nullable(),
2265
+ incomplete_details: z7.object({ reason: z7.string() }).nullable(),
2086
2266
  usage: usageSchema
2087
2267
  })
2088
2268
  ),
@@ -2151,7 +2331,7 @@ var OpenAIResponsesLanguageModel = class {
2151
2331
  path: "/responses",
2152
2332
  modelId: this.modelId
2153
2333
  }),
2154
- headers: combineHeaders5(this.config.headers(), options.headers),
2334
+ headers: combineHeaders6(this.config.headers(), options.headers),
2155
2335
  body: {
2156
2336
  ...body,
2157
2337
  stream: true
@@ -2280,79 +2460,79 @@ var OpenAIResponsesLanguageModel = class {
2280
2460
  };
2281
2461
  }
2282
2462
  };
2283
- var usageSchema = z6.object({
2284
- input_tokens: z6.number(),
2285
- input_tokens_details: z6.object({ cached_tokens: z6.number().nullish() }).nullish(),
2286
- output_tokens: z6.number(),
2287
- output_tokens_details: z6.object({ reasoning_tokens: z6.number().nullish() }).nullish()
2463
+ var usageSchema = z7.object({
2464
+ input_tokens: z7.number(),
2465
+ input_tokens_details: z7.object({ cached_tokens: z7.number().nullish() }).nullish(),
2466
+ output_tokens: z7.number(),
2467
+ output_tokens_details: z7.object({ reasoning_tokens: z7.number().nullish() }).nullish()
2288
2468
  });
2289
- var textDeltaChunkSchema = z6.object({
2290
- type: z6.literal("response.output_text.delta"),
2291
- delta: z6.string()
2469
+ var textDeltaChunkSchema = z7.object({
2470
+ type: z7.literal("response.output_text.delta"),
2471
+ delta: z7.string()
2292
2472
  });
2293
- var responseFinishedChunkSchema = z6.object({
2294
- type: z6.enum(["response.completed", "response.incomplete"]),
2295
- response: z6.object({
2296
- incomplete_details: z6.object({ reason: z6.string() }).nullish(),
2473
+ var responseFinishedChunkSchema = z7.object({
2474
+ type: z7.enum(["response.completed", "response.incomplete"]),
2475
+ response: z7.object({
2476
+ incomplete_details: z7.object({ reason: z7.string() }).nullish(),
2297
2477
  usage: usageSchema
2298
2478
  })
2299
2479
  });
2300
- var responseCreatedChunkSchema = z6.object({
2301
- type: z6.literal("response.created"),
2302
- response: z6.object({
2303
- id: z6.string(),
2304
- created_at: z6.number(),
2305
- model: z6.string()
2480
+ var responseCreatedChunkSchema = z7.object({
2481
+ type: z7.literal("response.created"),
2482
+ response: z7.object({
2483
+ id: z7.string(),
2484
+ created_at: z7.number(),
2485
+ model: z7.string()
2306
2486
  })
2307
2487
  });
2308
- var responseOutputItemDoneSchema = z6.object({
2309
- type: z6.literal("response.output_item.done"),
2310
- output_index: z6.number(),
2311
- item: z6.discriminatedUnion("type", [
2312
- z6.object({
2313
- type: z6.literal("message")
2488
+ var responseOutputItemDoneSchema = z7.object({
2489
+ type: z7.literal("response.output_item.done"),
2490
+ output_index: z7.number(),
2491
+ item: z7.discriminatedUnion("type", [
2492
+ z7.object({
2493
+ type: z7.literal("message")
2314
2494
  }),
2315
- z6.object({
2316
- type: z6.literal("function_call"),
2317
- id: z6.string(),
2318
- call_id: z6.string(),
2319
- name: z6.string(),
2320
- arguments: z6.string(),
2321
- status: z6.literal("completed")
2495
+ z7.object({
2496
+ type: z7.literal("function_call"),
2497
+ id: z7.string(),
2498
+ call_id: z7.string(),
2499
+ name: z7.string(),
2500
+ arguments: z7.string(),
2501
+ status: z7.literal("completed")
2322
2502
  })
2323
2503
  ])
2324
2504
  });
2325
- var responseFunctionCallArgumentsDeltaSchema = z6.object({
2326
- type: z6.literal("response.function_call_arguments.delta"),
2327
- item_id: z6.string(),
2328
- output_index: z6.number(),
2329
- delta: z6.string()
2505
+ var responseFunctionCallArgumentsDeltaSchema = z7.object({
2506
+ type: z7.literal("response.function_call_arguments.delta"),
2507
+ item_id: z7.string(),
2508
+ output_index: z7.number(),
2509
+ delta: z7.string()
2330
2510
  });
2331
- var responseOutputItemAddedSchema = z6.object({
2332
- type: z6.literal("response.output_item.added"),
2333
- output_index: z6.number(),
2334
- item: z6.discriminatedUnion("type", [
2335
- z6.object({
2336
- type: z6.literal("message")
2511
+ var responseOutputItemAddedSchema = z7.object({
2512
+ type: z7.literal("response.output_item.added"),
2513
+ output_index: z7.number(),
2514
+ item: z7.discriminatedUnion("type", [
2515
+ z7.object({
2516
+ type: z7.literal("message")
2337
2517
  }),
2338
- z6.object({
2339
- type: z6.literal("function_call"),
2340
- id: z6.string(),
2341
- call_id: z6.string(),
2342
- name: z6.string(),
2343
- arguments: z6.string()
2518
+ z7.object({
2519
+ type: z7.literal("function_call"),
2520
+ id: z7.string(),
2521
+ call_id: z7.string(),
2522
+ name: z7.string(),
2523
+ arguments: z7.string()
2344
2524
  })
2345
2525
  ])
2346
2526
  });
2347
- var responseAnnotationAddedSchema = z6.object({
2348
- type: z6.literal("response.output_text.annotation.added"),
2349
- annotation: z6.object({
2350
- type: z6.literal("url_citation"),
2351
- url: z6.string(),
2352
- title: z6.string()
2527
+ var responseAnnotationAddedSchema = z7.object({
2528
+ type: z7.literal("response.output_text.annotation.added"),
2529
+ annotation: z7.object({
2530
+ type: z7.literal("url_citation"),
2531
+ url: z7.string(),
2532
+ title: z7.string()
2353
2533
  })
2354
2534
  });
2355
- var openaiResponsesChunkSchema = z6.union([
2535
+ var openaiResponsesChunkSchema = z7.union([
2356
2536
  textDeltaChunkSchema,
2357
2537
  responseFinishedChunkSchema,
2358
2538
  responseCreatedChunkSchema,
@@ -2360,7 +2540,7 @@ var openaiResponsesChunkSchema = z6.union([
2360
2540
  responseFunctionCallArgumentsDeltaSchema,
2361
2541
  responseOutputItemAddedSchema,
2362
2542
  responseAnnotationAddedSchema,
2363
- z6.object({ type: z6.string() }).passthrough()
2543
+ z7.object({ type: z7.string() }).passthrough()
2364
2544
  // fallback for unknown chunks
2365
2545
  ]);
2366
2546
  function isTextDeltaChunk(chunk) {
@@ -2405,20 +2585,20 @@ function getResponsesModelConfig(modelId) {
2405
2585
  requiredAutoTruncation: false
2406
2586
  };
2407
2587
  }
2408
- var openaiResponsesProviderOptionsSchema = z6.object({
2409
- metadata: z6.any().nullish(),
2410
- parallelToolCalls: z6.boolean().nullish(),
2411
- previousResponseId: z6.string().nullish(),
2412
- store: z6.boolean().nullish(),
2413
- user: z6.string().nullish(),
2414
- reasoningEffort: z6.string().nullish(),
2415
- strictSchemas: z6.boolean().nullish(),
2416
- instructions: z6.string().nullish()
2588
+ var openaiResponsesProviderOptionsSchema = z7.object({
2589
+ metadata: z7.any().nullish(),
2590
+ parallelToolCalls: z7.boolean().nullish(),
2591
+ previousResponseId: z7.string().nullish(),
2592
+ store: z7.boolean().nullish(),
2593
+ user: z7.string().nullish(),
2594
+ reasoningEffort: z7.string().nullish(),
2595
+ strictSchemas: z7.boolean().nullish(),
2596
+ instructions: z7.string().nullish()
2417
2597
  });
2418
2598
 
2419
2599
  // src/openai-tools.ts
2420
- import { z as z7 } from "zod";
2421
- var WebSearchPreviewParameters = z7.object({});
2600
+ import { z as z8 } from "zod";
2601
+ var WebSearchPreviewParameters = z8.object({});
2422
2602
  function webSearchPreviewTool({
2423
2603
  searchContextSize,
2424
2604
  userLocation
@@ -2479,6 +2659,12 @@ function createOpenAI(options = {}) {
2479
2659
  headers: getHeaders,
2480
2660
  fetch: options.fetch
2481
2661
  });
2662
+ const createTranscriptionModel = (modelId) => new OpenAITranscriptionModel(modelId, {
2663
+ provider: `${providerName}.transcription`,
2664
+ url: ({ path }) => `${baseURL}${path}`,
2665
+ headers: getHeaders,
2666
+ fetch: options.fetch
2667
+ });
2482
2668
  const createLanguageModel = (modelId, settings) => {
2483
2669
  if (new.target) {
2484
2670
  throw new Error(
@@ -2513,6 +2699,8 @@ function createOpenAI(options = {}) {
2513
2699
  provider.textEmbeddingModel = createEmbeddingModel;
2514
2700
  provider.image = createImageModel;
2515
2701
  provider.imageModel = createImageModel;
2702
+ provider.transcription = createTranscriptionModel;
2703
+ provider.transcriptionModel = createTranscriptionModel;
2516
2704
  provider.tools = openaiTools;
2517
2705
  return provider;
2518
2706
  }