@ai-sdk/openai 1.3.6 → 1.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1605,16 +1605,201 @@ var openaiImageResponseSchema = z5.object({
1605
1605
  data: z5.array(z5.object({ b64_json: z5.string() }))
1606
1606
  });
1607
1607
 
1608
- // src/responses/openai-responses-language-model.ts
1608
+ // src/openai-transcription-model.ts
1609
1609
  import {
1610
1610
  combineHeaders as combineHeaders5,
1611
- createEventSourceResponseHandler as createEventSourceResponseHandler3,
1611
+ convertBase64ToUint8Array,
1612
1612
  createJsonResponseHandler as createJsonResponseHandler5,
1613
- generateId as generateId2,
1614
1613
  parseProviderOptions,
1615
- postJsonToApi as postJsonToApi5
1614
+ postFormDataToApi
1616
1615
  } from "@ai-sdk/provider-utils";
1617
1616
  import { z as z6 } from "zod";
1617
+ var OpenAIProviderOptionsSchema = z6.object({
1618
+ include: z6.array(z6.string()).optional().describe(
1619
+ "Additional information to include in the transcription response."
1620
+ ),
1621
+ language: z6.string().optional().describe("The language of the input audio in ISO-639-1 format."),
1622
+ prompt: z6.string().optional().describe(
1623
+ "An optional text to guide the model's style or continue a previous audio segment."
1624
+ ),
1625
+ temperature: z6.number().min(0).max(1).optional().default(0).describe("The sampling temperature, between 0 and 1."),
1626
+ timestampGranularities: z6.array(z6.enum(["word", "segment"])).optional().default(["segment"]).describe(
1627
+ "The timestamp granularities to populate for this transcription."
1628
+ )
1629
+ });
1630
+ var languageMap = {
1631
+ afrikaans: "af",
1632
+ arabic: "ar",
1633
+ armenian: "hy",
1634
+ azerbaijani: "az",
1635
+ belarusian: "be",
1636
+ bosnian: "bs",
1637
+ bulgarian: "bg",
1638
+ catalan: "ca",
1639
+ chinese: "zh",
1640
+ croatian: "hr",
1641
+ czech: "cs",
1642
+ danish: "da",
1643
+ dutch: "nl",
1644
+ english: "en",
1645
+ estonian: "et",
1646
+ finnish: "fi",
1647
+ french: "fr",
1648
+ galician: "gl",
1649
+ german: "de",
1650
+ greek: "el",
1651
+ hebrew: "he",
1652
+ hindi: "hi",
1653
+ hungarian: "hu",
1654
+ icelandic: "is",
1655
+ indonesian: "id",
1656
+ italian: "it",
1657
+ japanese: "ja",
1658
+ kannada: "kn",
1659
+ kazakh: "kk",
1660
+ korean: "ko",
1661
+ latvian: "lv",
1662
+ lithuanian: "lt",
1663
+ macedonian: "mk",
1664
+ malay: "ms",
1665
+ marathi: "mr",
1666
+ maori: "mi",
1667
+ nepali: "ne",
1668
+ norwegian: "no",
1669
+ persian: "fa",
1670
+ polish: "pl",
1671
+ portuguese: "pt",
1672
+ romanian: "ro",
1673
+ russian: "ru",
1674
+ serbian: "sr",
1675
+ slovak: "sk",
1676
+ slovenian: "sl",
1677
+ spanish: "es",
1678
+ swahili: "sw",
1679
+ swedish: "sv",
1680
+ tagalog: "tl",
1681
+ tamil: "ta",
1682
+ thai: "th",
1683
+ turkish: "tr",
1684
+ ukrainian: "uk",
1685
+ urdu: "ur",
1686
+ vietnamese: "vi",
1687
+ welsh: "cy"
1688
+ };
1689
+ var OpenAITranscriptionModel = class {
1690
+ constructor(modelId, config) {
1691
+ this.modelId = modelId;
1692
+ this.config = config;
1693
+ this.specificationVersion = "v1";
1694
+ }
1695
+ get provider() {
1696
+ return this.config.provider;
1697
+ }
1698
+ getArgs({
1699
+ audio,
1700
+ mimeType,
1701
+ providerOptions
1702
+ }) {
1703
+ const warnings = [];
1704
+ const openAIOptions = parseProviderOptions({
1705
+ provider: "openai",
1706
+ providerOptions,
1707
+ schema: OpenAIProviderOptionsSchema
1708
+ });
1709
+ const formData = new FormData();
1710
+ const blob = audio instanceof Uint8Array ? new Blob([audio]) : new Blob([convertBase64ToUint8Array(audio)]);
1711
+ formData.append("model", this.modelId);
1712
+ formData.append("file", new File([blob], "audio", { type: mimeType }));
1713
+ if (openAIOptions) {
1714
+ const transcriptionModelOptions = {
1715
+ include: openAIOptions.include,
1716
+ language: openAIOptions.language,
1717
+ prompt: openAIOptions.prompt,
1718
+ temperature: openAIOptions.temperature,
1719
+ timestamp_granularities: openAIOptions.timestampGranularities
1720
+ };
1721
+ for (const key in transcriptionModelOptions) {
1722
+ const value = transcriptionModelOptions[key];
1723
+ if (value !== void 0) {
1724
+ formData.append(key, value);
1725
+ }
1726
+ }
1727
+ }
1728
+ return {
1729
+ formData,
1730
+ warnings
1731
+ };
1732
+ }
1733
+ async doGenerate(options) {
1734
+ var _a, _b, _c;
1735
+ const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
1736
+ const { formData, warnings } = this.getArgs(options);
1737
+ const { value: response, responseHeaders } = await postFormDataToApi({
1738
+ url: this.config.url({
1739
+ path: "/audio/transcriptions",
1740
+ modelId: this.modelId
1741
+ }),
1742
+ headers: combineHeaders5(this.config.headers(), options.headers),
1743
+ formData,
1744
+ failedResponseHandler: openaiFailedResponseHandler,
1745
+ successfulResponseHandler: createJsonResponseHandler5(
1746
+ openaiTranscriptionResponseSchema
1747
+ ),
1748
+ abortSignal: options.abortSignal,
1749
+ fetch: this.config.fetch
1750
+ });
1751
+ let language;
1752
+ if (response.language && response.language in languageMap) {
1753
+ language = languageMap[response.language];
1754
+ }
1755
+ return {
1756
+ text: response.text,
1757
+ segments: response.words.map((word) => ({
1758
+ text: word.word,
1759
+ startSecond: word.start,
1760
+ endSecond: word.end
1761
+ })),
1762
+ language,
1763
+ durationInSeconds: response.duration,
1764
+ warnings,
1765
+ response: {
1766
+ timestamp: currentDate,
1767
+ modelId: this.modelId,
1768
+ headers: responseHeaders,
1769
+ body: response
1770
+ },
1771
+ // When using format `verbose_json` on `whisper-1`, OpenAI includes the things like `task` and enhanced `segments` information.
1772
+ providerMetadata: {
1773
+ openai: {
1774
+ transcript: response
1775
+ }
1776
+ }
1777
+ };
1778
+ }
1779
+ };
1780
+ var openaiTranscriptionResponseSchema = z6.object({
1781
+ text: z6.string(),
1782
+ language: z6.string().optional(),
1783
+ duration: z6.number().optional(),
1784
+ words: z6.array(
1785
+ z6.object({
1786
+ word: z6.string(),
1787
+ start: z6.number(),
1788
+ end: z6.number()
1789
+ })
1790
+ )
1791
+ });
1792
+
1793
+ // src/responses/openai-responses-language-model.ts
1794
+ import {
1795
+ combineHeaders as combineHeaders6,
1796
+ createEventSourceResponseHandler as createEventSourceResponseHandler3,
1797
+ createJsonResponseHandler as createJsonResponseHandler6,
1798
+ generateId as generateId2,
1799
+ parseProviderOptions as parseProviderOptions2,
1800
+ postJsonToApi as postJsonToApi5
1801
+ } from "@ai-sdk/provider-utils";
1802
+ import { z as z7 } from "zod";
1618
1803
 
1619
1804
  // src/responses/convert-to-openai-responses-messages.ts
1620
1805
  import {
@@ -1905,7 +2090,7 @@ var OpenAIResponsesLanguageModel = class {
1905
2090
  systemMessageMode: modelConfig.systemMessageMode
1906
2091
  });
1907
2092
  warnings.push(...messageWarnings);
1908
- const openaiOptions = parseProviderOptions({
2093
+ const openaiOptions = parseProviderOptions2({
1909
2094
  provider: "openai",
1910
2095
  providerOptions: providerMetadata,
1911
2096
  schema: openaiResponsesProviderOptionsSchema
@@ -2030,53 +2215,53 @@ var OpenAIResponsesLanguageModel = class {
2030
2215
  path: "/responses",
2031
2216
  modelId: this.modelId
2032
2217
  }),
2033
- headers: combineHeaders5(this.config.headers(), options.headers),
2218
+ headers: combineHeaders6(this.config.headers(), options.headers),
2034
2219
  body,
2035
2220
  failedResponseHandler: openaiFailedResponseHandler,
2036
- successfulResponseHandler: createJsonResponseHandler5(
2037
- z6.object({
2038
- id: z6.string(),
2039
- created_at: z6.number(),
2040
- model: z6.string(),
2041
- output: z6.array(
2042
- z6.discriminatedUnion("type", [
2043
- z6.object({
2044
- type: z6.literal("message"),
2045
- role: z6.literal("assistant"),
2046
- content: z6.array(
2047
- z6.object({
2048
- type: z6.literal("output_text"),
2049
- text: z6.string(),
2050
- annotations: z6.array(
2051
- z6.object({
2052
- type: z6.literal("url_citation"),
2053
- start_index: z6.number(),
2054
- end_index: z6.number(),
2055
- url: z6.string(),
2056
- title: z6.string()
2221
+ successfulResponseHandler: createJsonResponseHandler6(
2222
+ z7.object({
2223
+ id: z7.string(),
2224
+ created_at: z7.number(),
2225
+ model: z7.string(),
2226
+ output: z7.array(
2227
+ z7.discriminatedUnion("type", [
2228
+ z7.object({
2229
+ type: z7.literal("message"),
2230
+ role: z7.literal("assistant"),
2231
+ content: z7.array(
2232
+ z7.object({
2233
+ type: z7.literal("output_text"),
2234
+ text: z7.string(),
2235
+ annotations: z7.array(
2236
+ z7.object({
2237
+ type: z7.literal("url_citation"),
2238
+ start_index: z7.number(),
2239
+ end_index: z7.number(),
2240
+ url: z7.string(),
2241
+ title: z7.string()
2057
2242
  })
2058
2243
  )
2059
2244
  })
2060
2245
  )
2061
2246
  }),
2062
- z6.object({
2063
- type: z6.literal("function_call"),
2064
- call_id: z6.string(),
2065
- name: z6.string(),
2066
- arguments: z6.string()
2247
+ z7.object({
2248
+ type: z7.literal("function_call"),
2249
+ call_id: z7.string(),
2250
+ name: z7.string(),
2251
+ arguments: z7.string()
2067
2252
  }),
2068
- z6.object({
2069
- type: z6.literal("web_search_call")
2253
+ z7.object({
2254
+ type: z7.literal("web_search_call")
2070
2255
  }),
2071
- z6.object({
2072
- type: z6.literal("computer_call")
2256
+ z7.object({
2257
+ type: z7.literal("computer_call")
2073
2258
  }),
2074
- z6.object({
2075
- type: z6.literal("reasoning")
2259
+ z7.object({
2260
+ type: z7.literal("reasoning")
2076
2261
  })
2077
2262
  ])
2078
2263
  ),
2079
- incomplete_details: z6.object({ reason: z6.string() }).nullable(),
2264
+ incomplete_details: z7.object({ reason: z7.string() }).nullable(),
2080
2265
  usage: usageSchema
2081
2266
  })
2082
2267
  ),
@@ -2145,7 +2330,7 @@ var OpenAIResponsesLanguageModel = class {
2145
2330
  path: "/responses",
2146
2331
  modelId: this.modelId
2147
2332
  }),
2148
- headers: combineHeaders5(this.config.headers(), options.headers),
2333
+ headers: combineHeaders6(this.config.headers(), options.headers),
2149
2334
  body: {
2150
2335
  ...body,
2151
2336
  stream: true
@@ -2274,79 +2459,79 @@ var OpenAIResponsesLanguageModel = class {
2274
2459
  };
2275
2460
  }
2276
2461
  };
2277
- var usageSchema = z6.object({
2278
- input_tokens: z6.number(),
2279
- input_tokens_details: z6.object({ cached_tokens: z6.number().nullish() }).nullish(),
2280
- output_tokens: z6.number(),
2281
- output_tokens_details: z6.object({ reasoning_tokens: z6.number().nullish() }).nullish()
2462
+ var usageSchema = z7.object({
2463
+ input_tokens: z7.number(),
2464
+ input_tokens_details: z7.object({ cached_tokens: z7.number().nullish() }).nullish(),
2465
+ output_tokens: z7.number(),
2466
+ output_tokens_details: z7.object({ reasoning_tokens: z7.number().nullish() }).nullish()
2282
2467
  });
2283
- var textDeltaChunkSchema = z6.object({
2284
- type: z6.literal("response.output_text.delta"),
2285
- delta: z6.string()
2468
+ var textDeltaChunkSchema = z7.object({
2469
+ type: z7.literal("response.output_text.delta"),
2470
+ delta: z7.string()
2286
2471
  });
2287
- var responseFinishedChunkSchema = z6.object({
2288
- type: z6.enum(["response.completed", "response.incomplete"]),
2289
- response: z6.object({
2290
- incomplete_details: z6.object({ reason: z6.string() }).nullish(),
2472
+ var responseFinishedChunkSchema = z7.object({
2473
+ type: z7.enum(["response.completed", "response.incomplete"]),
2474
+ response: z7.object({
2475
+ incomplete_details: z7.object({ reason: z7.string() }).nullish(),
2291
2476
  usage: usageSchema
2292
2477
  })
2293
2478
  });
2294
- var responseCreatedChunkSchema = z6.object({
2295
- type: z6.literal("response.created"),
2296
- response: z6.object({
2297
- id: z6.string(),
2298
- created_at: z6.number(),
2299
- model: z6.string()
2479
+ var responseCreatedChunkSchema = z7.object({
2480
+ type: z7.literal("response.created"),
2481
+ response: z7.object({
2482
+ id: z7.string(),
2483
+ created_at: z7.number(),
2484
+ model: z7.string()
2300
2485
  })
2301
2486
  });
2302
- var responseOutputItemDoneSchema = z6.object({
2303
- type: z6.literal("response.output_item.done"),
2304
- output_index: z6.number(),
2305
- item: z6.discriminatedUnion("type", [
2306
- z6.object({
2307
- type: z6.literal("message")
2487
+ var responseOutputItemDoneSchema = z7.object({
2488
+ type: z7.literal("response.output_item.done"),
2489
+ output_index: z7.number(),
2490
+ item: z7.discriminatedUnion("type", [
2491
+ z7.object({
2492
+ type: z7.literal("message")
2308
2493
  }),
2309
- z6.object({
2310
- type: z6.literal("function_call"),
2311
- id: z6.string(),
2312
- call_id: z6.string(),
2313
- name: z6.string(),
2314
- arguments: z6.string(),
2315
- status: z6.literal("completed")
2494
+ z7.object({
2495
+ type: z7.literal("function_call"),
2496
+ id: z7.string(),
2497
+ call_id: z7.string(),
2498
+ name: z7.string(),
2499
+ arguments: z7.string(),
2500
+ status: z7.literal("completed")
2316
2501
  })
2317
2502
  ])
2318
2503
  });
2319
- var responseFunctionCallArgumentsDeltaSchema = z6.object({
2320
- type: z6.literal("response.function_call_arguments.delta"),
2321
- item_id: z6.string(),
2322
- output_index: z6.number(),
2323
- delta: z6.string()
2504
+ var responseFunctionCallArgumentsDeltaSchema = z7.object({
2505
+ type: z7.literal("response.function_call_arguments.delta"),
2506
+ item_id: z7.string(),
2507
+ output_index: z7.number(),
2508
+ delta: z7.string()
2324
2509
  });
2325
- var responseOutputItemAddedSchema = z6.object({
2326
- type: z6.literal("response.output_item.added"),
2327
- output_index: z6.number(),
2328
- item: z6.discriminatedUnion("type", [
2329
- z6.object({
2330
- type: z6.literal("message")
2510
+ var responseOutputItemAddedSchema = z7.object({
2511
+ type: z7.literal("response.output_item.added"),
2512
+ output_index: z7.number(),
2513
+ item: z7.discriminatedUnion("type", [
2514
+ z7.object({
2515
+ type: z7.literal("message")
2331
2516
  }),
2332
- z6.object({
2333
- type: z6.literal("function_call"),
2334
- id: z6.string(),
2335
- call_id: z6.string(),
2336
- name: z6.string(),
2337
- arguments: z6.string()
2517
+ z7.object({
2518
+ type: z7.literal("function_call"),
2519
+ id: z7.string(),
2520
+ call_id: z7.string(),
2521
+ name: z7.string(),
2522
+ arguments: z7.string()
2338
2523
  })
2339
2524
  ])
2340
2525
  });
2341
- var responseAnnotationAddedSchema = z6.object({
2342
- type: z6.literal("response.output_text.annotation.added"),
2343
- annotation: z6.object({
2344
- type: z6.literal("url_citation"),
2345
- url: z6.string(),
2346
- title: z6.string()
2526
+ var responseAnnotationAddedSchema = z7.object({
2527
+ type: z7.literal("response.output_text.annotation.added"),
2528
+ annotation: z7.object({
2529
+ type: z7.literal("url_citation"),
2530
+ url: z7.string(),
2531
+ title: z7.string()
2347
2532
  })
2348
2533
  });
2349
- var openaiResponsesChunkSchema = z6.union([
2534
+ var openaiResponsesChunkSchema = z7.union([
2350
2535
  textDeltaChunkSchema,
2351
2536
  responseFinishedChunkSchema,
2352
2537
  responseCreatedChunkSchema,
@@ -2354,7 +2539,7 @@ var openaiResponsesChunkSchema = z6.union([
2354
2539
  responseFunctionCallArgumentsDeltaSchema,
2355
2540
  responseOutputItemAddedSchema,
2356
2541
  responseAnnotationAddedSchema,
2357
- z6.object({ type: z6.string() }).passthrough()
2542
+ z7.object({ type: z7.string() }).passthrough()
2358
2543
  // fallback for unknown chunks
2359
2544
  ]);
2360
2545
  function isTextDeltaChunk(chunk) {
@@ -2399,15 +2584,15 @@ function getResponsesModelConfig(modelId) {
2399
2584
  requiredAutoTruncation: false
2400
2585
  };
2401
2586
  }
2402
- var openaiResponsesProviderOptionsSchema = z6.object({
2403
- metadata: z6.any().nullish(),
2404
- parallelToolCalls: z6.boolean().nullish(),
2405
- previousResponseId: z6.string().nullish(),
2406
- store: z6.boolean().nullish(),
2407
- user: z6.string().nullish(),
2408
- reasoningEffort: z6.string().nullish(),
2409
- strictSchemas: z6.boolean().nullish(),
2410
- instructions: z6.string().nullish()
2587
+ var openaiResponsesProviderOptionsSchema = z7.object({
2588
+ metadata: z7.any().nullish(),
2589
+ parallelToolCalls: z7.boolean().nullish(),
2590
+ previousResponseId: z7.string().nullish(),
2591
+ store: z7.boolean().nullish(),
2592
+ user: z7.string().nullish(),
2593
+ reasoningEffort: z7.string().nullish(),
2594
+ strictSchemas: z7.boolean().nullish(),
2595
+ instructions: z7.string().nullish()
2411
2596
  });
2412
2597
  export {
2413
2598
  OpenAIChatLanguageModel,
@@ -2415,6 +2600,7 @@ export {
2415
2600
  OpenAIEmbeddingModel,
2416
2601
  OpenAIImageModel,
2417
2602
  OpenAIResponsesLanguageModel,
2603
+ OpenAITranscriptionModel,
2418
2604
  modelMaxImagesPerCall
2419
2605
  };
2420
2606
  //# sourceMappingURL=index.mjs.map