@ai-sdk/openai 1.3.7 → 1.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1605,16 +1605,196 @@ var openaiImageResponseSchema = z5.object({
1605
1605
  data: z5.array(z5.object({ b64_json: z5.string() }))
1606
1606
  });
1607
1607
 
1608
- // src/responses/openai-responses-language-model.ts
1608
+ // src/openai-transcription-model.ts
1609
1609
  import {
1610
1610
  combineHeaders as combineHeaders5,
1611
- createEventSourceResponseHandler as createEventSourceResponseHandler3,
1611
+ convertBase64ToUint8Array,
1612
1612
  createJsonResponseHandler as createJsonResponseHandler5,
1613
- generateId as generateId2,
1614
1613
  parseProviderOptions,
1615
- postJsonToApi as postJsonToApi5
1614
+ postFormDataToApi
1616
1615
  } from "@ai-sdk/provider-utils";
1617
1616
  import { z as z6 } from "zod";
1617
+ var OpenAIProviderOptionsSchema = z6.object({
1618
+ include: z6.array(z6.string()).optional().describe(
1619
+ "Additional information to include in the transcription response."
1620
+ ),
1621
+ language: z6.string().optional().describe("The language of the input audio in ISO-639-1 format."),
1622
+ prompt: z6.string().optional().describe(
1623
+ "An optional text to guide the model's style or continue a previous audio segment."
1624
+ ),
1625
+ temperature: z6.number().min(0).max(1).optional().default(0).describe("The sampling temperature, between 0 and 1."),
1626
+ timestampGranularities: z6.array(z6.enum(["word", "segment"])).optional().default(["segment"]).describe(
1627
+ "The timestamp granularities to populate for this transcription."
1628
+ )
1629
+ });
1630
+ var languageMap = {
1631
+ afrikaans: "af",
1632
+ arabic: "ar",
1633
+ armenian: "hy",
1634
+ azerbaijani: "az",
1635
+ belarusian: "be",
1636
+ bosnian: "bs",
1637
+ bulgarian: "bg",
1638
+ catalan: "ca",
1639
+ chinese: "zh",
1640
+ croatian: "hr",
1641
+ czech: "cs",
1642
+ danish: "da",
1643
+ dutch: "nl",
1644
+ english: "en",
1645
+ estonian: "et",
1646
+ finnish: "fi",
1647
+ french: "fr",
1648
+ galician: "gl",
1649
+ german: "de",
1650
+ greek: "el",
1651
+ hebrew: "he",
1652
+ hindi: "hi",
1653
+ hungarian: "hu",
1654
+ icelandic: "is",
1655
+ indonesian: "id",
1656
+ italian: "it",
1657
+ japanese: "ja",
1658
+ kannada: "kn",
1659
+ kazakh: "kk",
1660
+ korean: "ko",
1661
+ latvian: "lv",
1662
+ lithuanian: "lt",
1663
+ macedonian: "mk",
1664
+ malay: "ms",
1665
+ marathi: "mr",
1666
+ maori: "mi",
1667
+ nepali: "ne",
1668
+ norwegian: "no",
1669
+ persian: "fa",
1670
+ polish: "pl",
1671
+ portuguese: "pt",
1672
+ romanian: "ro",
1673
+ russian: "ru",
1674
+ serbian: "sr",
1675
+ slovak: "sk",
1676
+ slovenian: "sl",
1677
+ spanish: "es",
1678
+ swahili: "sw",
1679
+ swedish: "sv",
1680
+ tagalog: "tl",
1681
+ tamil: "ta",
1682
+ thai: "th",
1683
+ turkish: "tr",
1684
+ ukrainian: "uk",
1685
+ urdu: "ur",
1686
+ vietnamese: "vi",
1687
+ welsh: "cy"
1688
+ };
1689
+ var OpenAITranscriptionModel = class {
1690
+ constructor(modelId, config) {
1691
+ this.modelId = modelId;
1692
+ this.config = config;
1693
+ this.specificationVersion = "v1";
1694
+ }
1695
+ get provider() {
1696
+ return this.config.provider;
1697
+ }
1698
+ getArgs({
1699
+ audio,
1700
+ mediaType,
1701
+ providerOptions
1702
+ }) {
1703
+ const warnings = [];
1704
+ const openAIOptions = parseProviderOptions({
1705
+ provider: "openai",
1706
+ providerOptions,
1707
+ schema: OpenAIProviderOptionsSchema
1708
+ });
1709
+ const formData = new FormData();
1710
+ const blob = audio instanceof Uint8Array ? new Blob([audio]) : new Blob([convertBase64ToUint8Array(audio)]);
1711
+ formData.append("model", this.modelId);
1712
+ formData.append("file", new File([blob], "audio", { type: mediaType }));
1713
+ if (openAIOptions) {
1714
+ const transcriptionModelOptions = {
1715
+ include: openAIOptions.include,
1716
+ language: openAIOptions.language,
1717
+ prompt: openAIOptions.prompt,
1718
+ temperature: openAIOptions.temperature,
1719
+ timestamp_granularities: openAIOptions.timestampGranularities
1720
+ };
1721
+ for (const key in transcriptionModelOptions) {
1722
+ const value = transcriptionModelOptions[key];
1723
+ if (value !== void 0) {
1724
+ formData.append(key, value);
1725
+ }
1726
+ }
1727
+ }
1728
+ return {
1729
+ formData,
1730
+ warnings
1731
+ };
1732
+ }
1733
+ async doGenerate(options) {
1734
+ var _a, _b, _c, _d, _e, _f;
1735
+ const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
1736
+ const { formData, warnings } = this.getArgs(options);
1737
+ const {
1738
+ value: response,
1739
+ responseHeaders,
1740
+ rawValue: rawResponse
1741
+ } = await postFormDataToApi({
1742
+ url: this.config.url({
1743
+ path: "/audio/transcriptions",
1744
+ modelId: this.modelId
1745
+ }),
1746
+ headers: combineHeaders5(this.config.headers(), options.headers),
1747
+ formData,
1748
+ failedResponseHandler: openaiFailedResponseHandler,
1749
+ successfulResponseHandler: createJsonResponseHandler5(
1750
+ openaiTranscriptionResponseSchema
1751
+ ),
1752
+ abortSignal: options.abortSignal,
1753
+ fetch: this.config.fetch
1754
+ });
1755
+ const language = response.language != null && response.language in languageMap ? languageMap[response.language] : void 0;
1756
+ return {
1757
+ text: response.text,
1758
+ segments: (_e = (_d = response.words) == null ? void 0 : _d.map((word) => ({
1759
+ text: word.word,
1760
+ startSecond: word.start,
1761
+ endSecond: word.end
1762
+ }))) != null ? _e : [],
1763
+ language,
1764
+ durationInSeconds: (_f = response.duration) != null ? _f : void 0,
1765
+ warnings,
1766
+ response: {
1767
+ timestamp: currentDate,
1768
+ modelId: this.modelId,
1769
+ headers: responseHeaders,
1770
+ body: rawResponse
1771
+ }
1772
+ };
1773
+ }
1774
+ };
1775
+ var openaiTranscriptionResponseSchema = z6.object({
1776
+ text: z6.string(),
1777
+ language: z6.string().nullish(),
1778
+ duration: z6.number().nullish(),
1779
+ words: z6.array(
1780
+ z6.object({
1781
+ word: z6.string(),
1782
+ start: z6.number(),
1783
+ end: z6.number()
1784
+ })
1785
+ ).nullish()
1786
+ });
1787
+
1788
+ // src/responses/openai-responses-language-model.ts
1789
+ import {
1790
+ combineHeaders as combineHeaders6,
1791
+ createEventSourceResponseHandler as createEventSourceResponseHandler3,
1792
+ createJsonResponseHandler as createJsonResponseHandler6,
1793
+ generateId as generateId2,
1794
+ parseProviderOptions as parseProviderOptions2,
1795
+ postJsonToApi as postJsonToApi5
1796
+ } from "@ai-sdk/provider-utils";
1797
+ import { z as z7 } from "zod";
1618
1798
 
1619
1799
  // src/responses/convert-to-openai-responses-messages.ts
1620
1800
  import {
@@ -1905,7 +2085,7 @@ var OpenAIResponsesLanguageModel = class {
1905
2085
  systemMessageMode: modelConfig.systemMessageMode
1906
2086
  });
1907
2087
  warnings.push(...messageWarnings);
1908
- const openaiOptions = parseProviderOptions({
2088
+ const openaiOptions = parseProviderOptions2({
1909
2089
  provider: "openai",
1910
2090
  providerOptions: providerMetadata,
1911
2091
  schema: openaiResponsesProviderOptionsSchema
@@ -2030,53 +2210,53 @@ var OpenAIResponsesLanguageModel = class {
2030
2210
  path: "/responses",
2031
2211
  modelId: this.modelId
2032
2212
  }),
2033
- headers: combineHeaders5(this.config.headers(), options.headers),
2213
+ headers: combineHeaders6(this.config.headers(), options.headers),
2034
2214
  body,
2035
2215
  failedResponseHandler: openaiFailedResponseHandler,
2036
- successfulResponseHandler: createJsonResponseHandler5(
2037
- z6.object({
2038
- id: z6.string(),
2039
- created_at: z6.number(),
2040
- model: z6.string(),
2041
- output: z6.array(
2042
- z6.discriminatedUnion("type", [
2043
- z6.object({
2044
- type: z6.literal("message"),
2045
- role: z6.literal("assistant"),
2046
- content: z6.array(
2047
- z6.object({
2048
- type: z6.literal("output_text"),
2049
- text: z6.string(),
2050
- annotations: z6.array(
2051
- z6.object({
2052
- type: z6.literal("url_citation"),
2053
- start_index: z6.number(),
2054
- end_index: z6.number(),
2055
- url: z6.string(),
2056
- title: z6.string()
2216
+ successfulResponseHandler: createJsonResponseHandler6(
2217
+ z7.object({
2218
+ id: z7.string(),
2219
+ created_at: z7.number(),
2220
+ model: z7.string(),
2221
+ output: z7.array(
2222
+ z7.discriminatedUnion("type", [
2223
+ z7.object({
2224
+ type: z7.literal("message"),
2225
+ role: z7.literal("assistant"),
2226
+ content: z7.array(
2227
+ z7.object({
2228
+ type: z7.literal("output_text"),
2229
+ text: z7.string(),
2230
+ annotations: z7.array(
2231
+ z7.object({
2232
+ type: z7.literal("url_citation"),
2233
+ start_index: z7.number(),
2234
+ end_index: z7.number(),
2235
+ url: z7.string(),
2236
+ title: z7.string()
2057
2237
  })
2058
2238
  )
2059
2239
  })
2060
2240
  )
2061
2241
  }),
2062
- z6.object({
2063
- type: z6.literal("function_call"),
2064
- call_id: z6.string(),
2065
- name: z6.string(),
2066
- arguments: z6.string()
2242
+ z7.object({
2243
+ type: z7.literal("function_call"),
2244
+ call_id: z7.string(),
2245
+ name: z7.string(),
2246
+ arguments: z7.string()
2067
2247
  }),
2068
- z6.object({
2069
- type: z6.literal("web_search_call")
2248
+ z7.object({
2249
+ type: z7.literal("web_search_call")
2070
2250
  }),
2071
- z6.object({
2072
- type: z6.literal("computer_call")
2251
+ z7.object({
2252
+ type: z7.literal("computer_call")
2073
2253
  }),
2074
- z6.object({
2075
- type: z6.literal("reasoning")
2254
+ z7.object({
2255
+ type: z7.literal("reasoning")
2076
2256
  })
2077
2257
  ])
2078
2258
  ),
2079
- incomplete_details: z6.object({ reason: z6.string() }).nullable(),
2259
+ incomplete_details: z7.object({ reason: z7.string() }).nullable(),
2080
2260
  usage: usageSchema
2081
2261
  })
2082
2262
  ),
@@ -2145,7 +2325,7 @@ var OpenAIResponsesLanguageModel = class {
2145
2325
  path: "/responses",
2146
2326
  modelId: this.modelId
2147
2327
  }),
2148
- headers: combineHeaders5(this.config.headers(), options.headers),
2328
+ headers: combineHeaders6(this.config.headers(), options.headers),
2149
2329
  body: {
2150
2330
  ...body,
2151
2331
  stream: true
@@ -2274,79 +2454,79 @@ var OpenAIResponsesLanguageModel = class {
2274
2454
  };
2275
2455
  }
2276
2456
  };
2277
- var usageSchema = z6.object({
2278
- input_tokens: z6.number(),
2279
- input_tokens_details: z6.object({ cached_tokens: z6.number().nullish() }).nullish(),
2280
- output_tokens: z6.number(),
2281
- output_tokens_details: z6.object({ reasoning_tokens: z6.number().nullish() }).nullish()
2457
+ var usageSchema = z7.object({
2458
+ input_tokens: z7.number(),
2459
+ input_tokens_details: z7.object({ cached_tokens: z7.number().nullish() }).nullish(),
2460
+ output_tokens: z7.number(),
2461
+ output_tokens_details: z7.object({ reasoning_tokens: z7.number().nullish() }).nullish()
2282
2462
  });
2283
- var textDeltaChunkSchema = z6.object({
2284
- type: z6.literal("response.output_text.delta"),
2285
- delta: z6.string()
2463
+ var textDeltaChunkSchema = z7.object({
2464
+ type: z7.literal("response.output_text.delta"),
2465
+ delta: z7.string()
2286
2466
  });
2287
- var responseFinishedChunkSchema = z6.object({
2288
- type: z6.enum(["response.completed", "response.incomplete"]),
2289
- response: z6.object({
2290
- incomplete_details: z6.object({ reason: z6.string() }).nullish(),
2467
+ var responseFinishedChunkSchema = z7.object({
2468
+ type: z7.enum(["response.completed", "response.incomplete"]),
2469
+ response: z7.object({
2470
+ incomplete_details: z7.object({ reason: z7.string() }).nullish(),
2291
2471
  usage: usageSchema
2292
2472
  })
2293
2473
  });
2294
- var responseCreatedChunkSchema = z6.object({
2295
- type: z6.literal("response.created"),
2296
- response: z6.object({
2297
- id: z6.string(),
2298
- created_at: z6.number(),
2299
- model: z6.string()
2474
+ var responseCreatedChunkSchema = z7.object({
2475
+ type: z7.literal("response.created"),
2476
+ response: z7.object({
2477
+ id: z7.string(),
2478
+ created_at: z7.number(),
2479
+ model: z7.string()
2300
2480
  })
2301
2481
  });
2302
- var responseOutputItemDoneSchema = z6.object({
2303
- type: z6.literal("response.output_item.done"),
2304
- output_index: z6.number(),
2305
- item: z6.discriminatedUnion("type", [
2306
- z6.object({
2307
- type: z6.literal("message")
2482
+ var responseOutputItemDoneSchema = z7.object({
2483
+ type: z7.literal("response.output_item.done"),
2484
+ output_index: z7.number(),
2485
+ item: z7.discriminatedUnion("type", [
2486
+ z7.object({
2487
+ type: z7.literal("message")
2308
2488
  }),
2309
- z6.object({
2310
- type: z6.literal("function_call"),
2311
- id: z6.string(),
2312
- call_id: z6.string(),
2313
- name: z6.string(),
2314
- arguments: z6.string(),
2315
- status: z6.literal("completed")
2489
+ z7.object({
2490
+ type: z7.literal("function_call"),
2491
+ id: z7.string(),
2492
+ call_id: z7.string(),
2493
+ name: z7.string(),
2494
+ arguments: z7.string(),
2495
+ status: z7.literal("completed")
2316
2496
  })
2317
2497
  ])
2318
2498
  });
2319
- var responseFunctionCallArgumentsDeltaSchema = z6.object({
2320
- type: z6.literal("response.function_call_arguments.delta"),
2321
- item_id: z6.string(),
2322
- output_index: z6.number(),
2323
- delta: z6.string()
2499
+ var responseFunctionCallArgumentsDeltaSchema = z7.object({
2500
+ type: z7.literal("response.function_call_arguments.delta"),
2501
+ item_id: z7.string(),
2502
+ output_index: z7.number(),
2503
+ delta: z7.string()
2324
2504
  });
2325
- var responseOutputItemAddedSchema = z6.object({
2326
- type: z6.literal("response.output_item.added"),
2327
- output_index: z6.number(),
2328
- item: z6.discriminatedUnion("type", [
2329
- z6.object({
2330
- type: z6.literal("message")
2505
+ var responseOutputItemAddedSchema = z7.object({
2506
+ type: z7.literal("response.output_item.added"),
2507
+ output_index: z7.number(),
2508
+ item: z7.discriminatedUnion("type", [
2509
+ z7.object({
2510
+ type: z7.literal("message")
2331
2511
  }),
2332
- z6.object({
2333
- type: z6.literal("function_call"),
2334
- id: z6.string(),
2335
- call_id: z6.string(),
2336
- name: z6.string(),
2337
- arguments: z6.string()
2512
+ z7.object({
2513
+ type: z7.literal("function_call"),
2514
+ id: z7.string(),
2515
+ call_id: z7.string(),
2516
+ name: z7.string(),
2517
+ arguments: z7.string()
2338
2518
  })
2339
2519
  ])
2340
2520
  });
2341
- var responseAnnotationAddedSchema = z6.object({
2342
- type: z6.literal("response.output_text.annotation.added"),
2343
- annotation: z6.object({
2344
- type: z6.literal("url_citation"),
2345
- url: z6.string(),
2346
- title: z6.string()
2521
+ var responseAnnotationAddedSchema = z7.object({
2522
+ type: z7.literal("response.output_text.annotation.added"),
2523
+ annotation: z7.object({
2524
+ type: z7.literal("url_citation"),
2525
+ url: z7.string(),
2526
+ title: z7.string()
2347
2527
  })
2348
2528
  });
2349
- var openaiResponsesChunkSchema = z6.union([
2529
+ var openaiResponsesChunkSchema = z7.union([
2350
2530
  textDeltaChunkSchema,
2351
2531
  responseFinishedChunkSchema,
2352
2532
  responseCreatedChunkSchema,
@@ -2354,7 +2534,7 @@ var openaiResponsesChunkSchema = z6.union([
2354
2534
  responseFunctionCallArgumentsDeltaSchema,
2355
2535
  responseOutputItemAddedSchema,
2356
2536
  responseAnnotationAddedSchema,
2357
- z6.object({ type: z6.string() }).passthrough()
2537
+ z7.object({ type: z7.string() }).passthrough()
2358
2538
  // fallback for unknown chunks
2359
2539
  ]);
2360
2540
  function isTextDeltaChunk(chunk) {
@@ -2399,15 +2579,15 @@ function getResponsesModelConfig(modelId) {
2399
2579
  requiredAutoTruncation: false
2400
2580
  };
2401
2581
  }
2402
- var openaiResponsesProviderOptionsSchema = z6.object({
2403
- metadata: z6.any().nullish(),
2404
- parallelToolCalls: z6.boolean().nullish(),
2405
- previousResponseId: z6.string().nullish(),
2406
- store: z6.boolean().nullish(),
2407
- user: z6.string().nullish(),
2408
- reasoningEffort: z6.string().nullish(),
2409
- strictSchemas: z6.boolean().nullish(),
2410
- instructions: z6.string().nullish()
2582
+ var openaiResponsesProviderOptionsSchema = z7.object({
2583
+ metadata: z7.any().nullish(),
2584
+ parallelToolCalls: z7.boolean().nullish(),
2585
+ previousResponseId: z7.string().nullish(),
2586
+ store: z7.boolean().nullish(),
2587
+ user: z7.string().nullish(),
2588
+ reasoningEffort: z7.string().nullish(),
2589
+ strictSchemas: z7.boolean().nullish(),
2590
+ instructions: z7.string().nullish()
2411
2591
  });
2412
2592
  export {
2413
2593
  OpenAIChatLanguageModel,
@@ -2415,6 +2595,7 @@ export {
2415
2595
  OpenAIEmbeddingModel,
2416
2596
  OpenAIImageModel,
2417
2597
  OpenAIResponsesLanguageModel,
2598
+ OpenAITranscriptionModel,
2418
2599
  modelMaxImagesPerCall
2419
2600
  };
2420
2601
  //# sourceMappingURL=index.mjs.map