@ai-sdk/openai 1.3.7 → 1.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1611,16 +1611,201 @@ var openaiImageResponseSchema = z5.object({
1611
1611
  data: z5.array(z5.object({ b64_json: z5.string() }))
1612
1612
  });
1613
1613
 
1614
- // src/responses/openai-responses-language-model.ts
1614
+ // src/openai-transcription-model.ts
1615
1615
  import {
1616
1616
  combineHeaders as combineHeaders5,
1617
- createEventSourceResponseHandler as createEventSourceResponseHandler3,
1617
+ convertBase64ToUint8Array,
1618
1618
  createJsonResponseHandler as createJsonResponseHandler5,
1619
- generateId as generateId2,
1620
1619
  parseProviderOptions,
1621
- postJsonToApi as postJsonToApi5
1620
+ postFormDataToApi
1622
1621
  } from "@ai-sdk/provider-utils";
1623
1622
  import { z as z6 } from "zod";
1623
+ var OpenAIProviderOptionsSchema = z6.object({
1624
+ include: z6.array(z6.string()).optional().describe(
1625
+ "Additional information to include in the transcription response."
1626
+ ),
1627
+ language: z6.string().optional().describe("The language of the input audio in ISO-639-1 format."),
1628
+ prompt: z6.string().optional().describe(
1629
+ "An optional text to guide the model's style or continue a previous audio segment."
1630
+ ),
1631
+ temperature: z6.number().min(0).max(1).optional().default(0).describe("The sampling temperature, between 0 and 1."),
1632
+ timestampGranularities: z6.array(z6.enum(["word", "segment"])).optional().default(["segment"]).describe(
1633
+ "The timestamp granularities to populate for this transcription."
1634
+ )
1635
+ });
1636
+ var languageMap = {
1637
+ afrikaans: "af",
1638
+ arabic: "ar",
1639
+ armenian: "hy",
1640
+ azerbaijani: "az",
1641
+ belarusian: "be",
1642
+ bosnian: "bs",
1643
+ bulgarian: "bg",
1644
+ catalan: "ca",
1645
+ chinese: "zh",
1646
+ croatian: "hr",
1647
+ czech: "cs",
1648
+ danish: "da",
1649
+ dutch: "nl",
1650
+ english: "en",
1651
+ estonian: "et",
1652
+ finnish: "fi",
1653
+ french: "fr",
1654
+ galician: "gl",
1655
+ german: "de",
1656
+ greek: "el",
1657
+ hebrew: "he",
1658
+ hindi: "hi",
1659
+ hungarian: "hu",
1660
+ icelandic: "is",
1661
+ indonesian: "id",
1662
+ italian: "it",
1663
+ japanese: "ja",
1664
+ kannada: "kn",
1665
+ kazakh: "kk",
1666
+ korean: "ko",
1667
+ latvian: "lv",
1668
+ lithuanian: "lt",
1669
+ macedonian: "mk",
1670
+ malay: "ms",
1671
+ marathi: "mr",
1672
+ maori: "mi",
1673
+ nepali: "ne",
1674
+ norwegian: "no",
1675
+ persian: "fa",
1676
+ polish: "pl",
1677
+ portuguese: "pt",
1678
+ romanian: "ro",
1679
+ russian: "ru",
1680
+ serbian: "sr",
1681
+ slovak: "sk",
1682
+ slovenian: "sl",
1683
+ spanish: "es",
1684
+ swahili: "sw",
1685
+ swedish: "sv",
1686
+ tagalog: "tl",
1687
+ tamil: "ta",
1688
+ thai: "th",
1689
+ turkish: "tr",
1690
+ ukrainian: "uk",
1691
+ urdu: "ur",
1692
+ vietnamese: "vi",
1693
+ welsh: "cy"
1694
+ };
1695
+ var OpenAITranscriptionModel = class {
1696
+ constructor(modelId, config) {
1697
+ this.modelId = modelId;
1698
+ this.config = config;
1699
+ this.specificationVersion = "v1";
1700
+ }
1701
+ get provider() {
1702
+ return this.config.provider;
1703
+ }
1704
+ getArgs({
1705
+ audio,
1706
+ mimeType,
1707
+ providerOptions
1708
+ }) {
1709
+ const warnings = [];
1710
+ const openAIOptions = parseProviderOptions({
1711
+ provider: "openai",
1712
+ providerOptions,
1713
+ schema: OpenAIProviderOptionsSchema
1714
+ });
1715
+ const formData = new FormData();
1716
+ const blob = audio instanceof Uint8Array ? new Blob([audio]) : new Blob([convertBase64ToUint8Array(audio)]);
1717
+ formData.append("model", this.modelId);
1718
+ formData.append("file", new File([blob], "audio", { type: mimeType }));
1719
+ if (openAIOptions) {
1720
+ const transcriptionModelOptions = {
1721
+ include: openAIOptions.include,
1722
+ language: openAIOptions.language,
1723
+ prompt: openAIOptions.prompt,
1724
+ temperature: openAIOptions.temperature,
1725
+ timestamp_granularities: openAIOptions.timestampGranularities
1726
+ };
1727
+ for (const key in transcriptionModelOptions) {
1728
+ const value = transcriptionModelOptions[key];
1729
+ if (value !== void 0) {
1730
+ formData.append(key, value);
1731
+ }
1732
+ }
1733
+ }
1734
+ return {
1735
+ formData,
1736
+ warnings
1737
+ };
1738
+ }
1739
+ async doGenerate(options) {
1740
+ var _a, _b, _c;
1741
+ const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
1742
+ const { formData, warnings } = this.getArgs(options);
1743
+ const { value: response, responseHeaders } = await postFormDataToApi({
1744
+ url: this.config.url({
1745
+ path: "/audio/transcriptions",
1746
+ modelId: this.modelId
1747
+ }),
1748
+ headers: combineHeaders5(this.config.headers(), options.headers),
1749
+ formData,
1750
+ failedResponseHandler: openaiFailedResponseHandler,
1751
+ successfulResponseHandler: createJsonResponseHandler5(
1752
+ openaiTranscriptionResponseSchema
1753
+ ),
1754
+ abortSignal: options.abortSignal,
1755
+ fetch: this.config.fetch
1756
+ });
1757
+ let language;
1758
+ if (response.language && response.language in languageMap) {
1759
+ language = languageMap[response.language];
1760
+ }
1761
+ return {
1762
+ text: response.text,
1763
+ segments: response.words.map((word) => ({
1764
+ text: word.word,
1765
+ startSecond: word.start,
1766
+ endSecond: word.end
1767
+ })),
1768
+ language,
1769
+ durationInSeconds: response.duration,
1770
+ warnings,
1771
+ response: {
1772
+ timestamp: currentDate,
1773
+ modelId: this.modelId,
1774
+ headers: responseHeaders,
1775
+ body: response
1776
+ },
1777
+ // When using format `verbose_json` on `whisper-1`, OpenAI includes the things like `task` and enhanced `segments` information.
1778
+ providerMetadata: {
1779
+ openai: {
1780
+ transcript: response
1781
+ }
1782
+ }
1783
+ };
1784
+ }
1785
+ };
1786
+ var openaiTranscriptionResponseSchema = z6.object({
1787
+ text: z6.string(),
1788
+ language: z6.string().optional(),
1789
+ duration: z6.number().optional(),
1790
+ words: z6.array(
1791
+ z6.object({
1792
+ word: z6.string(),
1793
+ start: z6.number(),
1794
+ end: z6.number()
1795
+ })
1796
+ )
1797
+ });
1798
+
1799
+ // src/responses/openai-responses-language-model.ts
1800
+ import {
1801
+ combineHeaders as combineHeaders6,
1802
+ createEventSourceResponseHandler as createEventSourceResponseHandler3,
1803
+ createJsonResponseHandler as createJsonResponseHandler6,
1804
+ generateId as generateId2,
1805
+ parseProviderOptions as parseProviderOptions2,
1806
+ postJsonToApi as postJsonToApi5
1807
+ } from "@ai-sdk/provider-utils";
1808
+ import { z as z7 } from "zod";
1624
1809
 
1625
1810
  // src/responses/convert-to-openai-responses-messages.ts
1626
1811
  import {
@@ -1911,7 +2096,7 @@ var OpenAIResponsesLanguageModel = class {
1911
2096
  systemMessageMode: modelConfig.systemMessageMode
1912
2097
  });
1913
2098
  warnings.push(...messageWarnings);
1914
- const openaiOptions = parseProviderOptions({
2099
+ const openaiOptions = parseProviderOptions2({
1915
2100
  provider: "openai",
1916
2101
  providerOptions: providerMetadata,
1917
2102
  schema: openaiResponsesProviderOptionsSchema
@@ -2036,53 +2221,53 @@ var OpenAIResponsesLanguageModel = class {
2036
2221
  path: "/responses",
2037
2222
  modelId: this.modelId
2038
2223
  }),
2039
- headers: combineHeaders5(this.config.headers(), options.headers),
2224
+ headers: combineHeaders6(this.config.headers(), options.headers),
2040
2225
  body,
2041
2226
  failedResponseHandler: openaiFailedResponseHandler,
2042
- successfulResponseHandler: createJsonResponseHandler5(
2043
- z6.object({
2044
- id: z6.string(),
2045
- created_at: z6.number(),
2046
- model: z6.string(),
2047
- output: z6.array(
2048
- z6.discriminatedUnion("type", [
2049
- z6.object({
2050
- type: z6.literal("message"),
2051
- role: z6.literal("assistant"),
2052
- content: z6.array(
2053
- z6.object({
2054
- type: z6.literal("output_text"),
2055
- text: z6.string(),
2056
- annotations: z6.array(
2057
- z6.object({
2058
- type: z6.literal("url_citation"),
2059
- start_index: z6.number(),
2060
- end_index: z6.number(),
2061
- url: z6.string(),
2062
- title: z6.string()
2227
+ successfulResponseHandler: createJsonResponseHandler6(
2228
+ z7.object({
2229
+ id: z7.string(),
2230
+ created_at: z7.number(),
2231
+ model: z7.string(),
2232
+ output: z7.array(
2233
+ z7.discriminatedUnion("type", [
2234
+ z7.object({
2235
+ type: z7.literal("message"),
2236
+ role: z7.literal("assistant"),
2237
+ content: z7.array(
2238
+ z7.object({
2239
+ type: z7.literal("output_text"),
2240
+ text: z7.string(),
2241
+ annotations: z7.array(
2242
+ z7.object({
2243
+ type: z7.literal("url_citation"),
2244
+ start_index: z7.number(),
2245
+ end_index: z7.number(),
2246
+ url: z7.string(),
2247
+ title: z7.string()
2063
2248
  })
2064
2249
  )
2065
2250
  })
2066
2251
  )
2067
2252
  }),
2068
- z6.object({
2069
- type: z6.literal("function_call"),
2070
- call_id: z6.string(),
2071
- name: z6.string(),
2072
- arguments: z6.string()
2253
+ z7.object({
2254
+ type: z7.literal("function_call"),
2255
+ call_id: z7.string(),
2256
+ name: z7.string(),
2257
+ arguments: z7.string()
2073
2258
  }),
2074
- z6.object({
2075
- type: z6.literal("web_search_call")
2259
+ z7.object({
2260
+ type: z7.literal("web_search_call")
2076
2261
  }),
2077
- z6.object({
2078
- type: z6.literal("computer_call")
2262
+ z7.object({
2263
+ type: z7.literal("computer_call")
2079
2264
  }),
2080
- z6.object({
2081
- type: z6.literal("reasoning")
2265
+ z7.object({
2266
+ type: z7.literal("reasoning")
2082
2267
  })
2083
2268
  ])
2084
2269
  ),
2085
- incomplete_details: z6.object({ reason: z6.string() }).nullable(),
2270
+ incomplete_details: z7.object({ reason: z7.string() }).nullable(),
2086
2271
  usage: usageSchema
2087
2272
  })
2088
2273
  ),
@@ -2151,7 +2336,7 @@ var OpenAIResponsesLanguageModel = class {
2151
2336
  path: "/responses",
2152
2337
  modelId: this.modelId
2153
2338
  }),
2154
- headers: combineHeaders5(this.config.headers(), options.headers),
2339
+ headers: combineHeaders6(this.config.headers(), options.headers),
2155
2340
  body: {
2156
2341
  ...body,
2157
2342
  stream: true
@@ -2280,79 +2465,79 @@ var OpenAIResponsesLanguageModel = class {
2280
2465
  };
2281
2466
  }
2282
2467
  };
2283
- var usageSchema = z6.object({
2284
- input_tokens: z6.number(),
2285
- input_tokens_details: z6.object({ cached_tokens: z6.number().nullish() }).nullish(),
2286
- output_tokens: z6.number(),
2287
- output_tokens_details: z6.object({ reasoning_tokens: z6.number().nullish() }).nullish()
2468
+ var usageSchema = z7.object({
2469
+ input_tokens: z7.number(),
2470
+ input_tokens_details: z7.object({ cached_tokens: z7.number().nullish() }).nullish(),
2471
+ output_tokens: z7.number(),
2472
+ output_tokens_details: z7.object({ reasoning_tokens: z7.number().nullish() }).nullish()
2288
2473
  });
2289
- var textDeltaChunkSchema = z6.object({
2290
- type: z6.literal("response.output_text.delta"),
2291
- delta: z6.string()
2474
+ var textDeltaChunkSchema = z7.object({
2475
+ type: z7.literal("response.output_text.delta"),
2476
+ delta: z7.string()
2292
2477
  });
2293
- var responseFinishedChunkSchema = z6.object({
2294
- type: z6.enum(["response.completed", "response.incomplete"]),
2295
- response: z6.object({
2296
- incomplete_details: z6.object({ reason: z6.string() }).nullish(),
2478
+ var responseFinishedChunkSchema = z7.object({
2479
+ type: z7.enum(["response.completed", "response.incomplete"]),
2480
+ response: z7.object({
2481
+ incomplete_details: z7.object({ reason: z7.string() }).nullish(),
2297
2482
  usage: usageSchema
2298
2483
  })
2299
2484
  });
2300
- var responseCreatedChunkSchema = z6.object({
2301
- type: z6.literal("response.created"),
2302
- response: z6.object({
2303
- id: z6.string(),
2304
- created_at: z6.number(),
2305
- model: z6.string()
2485
+ var responseCreatedChunkSchema = z7.object({
2486
+ type: z7.literal("response.created"),
2487
+ response: z7.object({
2488
+ id: z7.string(),
2489
+ created_at: z7.number(),
2490
+ model: z7.string()
2306
2491
  })
2307
2492
  });
2308
- var responseOutputItemDoneSchema = z6.object({
2309
- type: z6.literal("response.output_item.done"),
2310
- output_index: z6.number(),
2311
- item: z6.discriminatedUnion("type", [
2312
- z6.object({
2313
- type: z6.literal("message")
2493
+ var responseOutputItemDoneSchema = z7.object({
2494
+ type: z7.literal("response.output_item.done"),
2495
+ output_index: z7.number(),
2496
+ item: z7.discriminatedUnion("type", [
2497
+ z7.object({
2498
+ type: z7.literal("message")
2314
2499
  }),
2315
- z6.object({
2316
- type: z6.literal("function_call"),
2317
- id: z6.string(),
2318
- call_id: z6.string(),
2319
- name: z6.string(),
2320
- arguments: z6.string(),
2321
- status: z6.literal("completed")
2500
+ z7.object({
2501
+ type: z7.literal("function_call"),
2502
+ id: z7.string(),
2503
+ call_id: z7.string(),
2504
+ name: z7.string(),
2505
+ arguments: z7.string(),
2506
+ status: z7.literal("completed")
2322
2507
  })
2323
2508
  ])
2324
2509
  });
2325
- var responseFunctionCallArgumentsDeltaSchema = z6.object({
2326
- type: z6.literal("response.function_call_arguments.delta"),
2327
- item_id: z6.string(),
2328
- output_index: z6.number(),
2329
- delta: z6.string()
2510
+ var responseFunctionCallArgumentsDeltaSchema = z7.object({
2511
+ type: z7.literal("response.function_call_arguments.delta"),
2512
+ item_id: z7.string(),
2513
+ output_index: z7.number(),
2514
+ delta: z7.string()
2330
2515
  });
2331
- var responseOutputItemAddedSchema = z6.object({
2332
- type: z6.literal("response.output_item.added"),
2333
- output_index: z6.number(),
2334
- item: z6.discriminatedUnion("type", [
2335
- z6.object({
2336
- type: z6.literal("message")
2516
+ var responseOutputItemAddedSchema = z7.object({
2517
+ type: z7.literal("response.output_item.added"),
2518
+ output_index: z7.number(),
2519
+ item: z7.discriminatedUnion("type", [
2520
+ z7.object({
2521
+ type: z7.literal("message")
2337
2522
  }),
2338
- z6.object({
2339
- type: z6.literal("function_call"),
2340
- id: z6.string(),
2341
- call_id: z6.string(),
2342
- name: z6.string(),
2343
- arguments: z6.string()
2523
+ z7.object({
2524
+ type: z7.literal("function_call"),
2525
+ id: z7.string(),
2526
+ call_id: z7.string(),
2527
+ name: z7.string(),
2528
+ arguments: z7.string()
2344
2529
  })
2345
2530
  ])
2346
2531
  });
2347
- var responseAnnotationAddedSchema = z6.object({
2348
- type: z6.literal("response.output_text.annotation.added"),
2349
- annotation: z6.object({
2350
- type: z6.literal("url_citation"),
2351
- url: z6.string(),
2352
- title: z6.string()
2532
+ var responseAnnotationAddedSchema = z7.object({
2533
+ type: z7.literal("response.output_text.annotation.added"),
2534
+ annotation: z7.object({
2535
+ type: z7.literal("url_citation"),
2536
+ url: z7.string(),
2537
+ title: z7.string()
2353
2538
  })
2354
2539
  });
2355
- var openaiResponsesChunkSchema = z6.union([
2540
+ var openaiResponsesChunkSchema = z7.union([
2356
2541
  textDeltaChunkSchema,
2357
2542
  responseFinishedChunkSchema,
2358
2543
  responseCreatedChunkSchema,
@@ -2360,7 +2545,7 @@ var openaiResponsesChunkSchema = z6.union([
2360
2545
  responseFunctionCallArgumentsDeltaSchema,
2361
2546
  responseOutputItemAddedSchema,
2362
2547
  responseAnnotationAddedSchema,
2363
- z6.object({ type: z6.string() }).passthrough()
2548
+ z7.object({ type: z7.string() }).passthrough()
2364
2549
  // fallback for unknown chunks
2365
2550
  ]);
2366
2551
  function isTextDeltaChunk(chunk) {
@@ -2405,20 +2590,20 @@ function getResponsesModelConfig(modelId) {
2405
2590
  requiredAutoTruncation: false
2406
2591
  };
2407
2592
  }
2408
- var openaiResponsesProviderOptionsSchema = z6.object({
2409
- metadata: z6.any().nullish(),
2410
- parallelToolCalls: z6.boolean().nullish(),
2411
- previousResponseId: z6.string().nullish(),
2412
- store: z6.boolean().nullish(),
2413
- user: z6.string().nullish(),
2414
- reasoningEffort: z6.string().nullish(),
2415
- strictSchemas: z6.boolean().nullish(),
2416
- instructions: z6.string().nullish()
2593
+ var openaiResponsesProviderOptionsSchema = z7.object({
2594
+ metadata: z7.any().nullish(),
2595
+ parallelToolCalls: z7.boolean().nullish(),
2596
+ previousResponseId: z7.string().nullish(),
2597
+ store: z7.boolean().nullish(),
2598
+ user: z7.string().nullish(),
2599
+ reasoningEffort: z7.string().nullish(),
2600
+ strictSchemas: z7.boolean().nullish(),
2601
+ instructions: z7.string().nullish()
2417
2602
  });
2418
2603
 
2419
2604
  // src/openai-tools.ts
2420
- import { z as z7 } from "zod";
2421
- var WebSearchPreviewParameters = z7.object({});
2605
+ import { z as z8 } from "zod";
2606
+ var WebSearchPreviewParameters = z8.object({});
2422
2607
  function webSearchPreviewTool({
2423
2608
  searchContextSize,
2424
2609
  userLocation
@@ -2479,6 +2664,12 @@ function createOpenAI(options = {}) {
2479
2664
  headers: getHeaders,
2480
2665
  fetch: options.fetch
2481
2666
  });
2667
+ const createTranscriptionModel = (modelId) => new OpenAITranscriptionModel(modelId, {
2668
+ provider: `${providerName}.transcription`,
2669
+ url: ({ path }) => `${baseURL}${path}`,
2670
+ headers: getHeaders,
2671
+ fetch: options.fetch
2672
+ });
2482
2673
  const createLanguageModel = (modelId, settings) => {
2483
2674
  if (new.target) {
2484
2675
  throw new Error(
@@ -2513,6 +2704,8 @@ function createOpenAI(options = {}) {
2513
2704
  provider.textEmbeddingModel = createEmbeddingModel;
2514
2705
  provider.image = createImageModel;
2515
2706
  provider.imageModel = createImageModel;
2707
+ provider.transcription = createTranscriptionModel;
2708
+ provider.transcriptionModel = createTranscriptionModel;
2516
2709
  provider.tools = openaiTools;
2517
2710
  return provider;
2518
2711
  }