@ai-sdk/openai 1.3.9 → 1.3.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -520,6 +520,15 @@ var OpenAIChatLanguageModel = class {
520
520
  }
521
521
  baseArgs.max_tokens = void 0;
522
522
  }
523
+ } else if (this.modelId.startsWith("gpt-4o-search-preview")) {
524
+ if (baseArgs.temperature != null) {
525
+ baseArgs.temperature = void 0;
526
+ warnings.push({
527
+ type: "unsupported-setting",
528
+ setting: "temperature",
529
+ details: "temperature is not supported for the gpt-4o-search-preview model and has been removed."
530
+ });
531
+ }
523
532
  }
524
533
  switch (type) {
525
534
  case "regular": {
@@ -1614,18 +1623,12 @@ import {
1614
1623
  postFormDataToApi
1615
1624
  } from "@ai-sdk/provider-utils";
1616
1625
  import { z as z6 } from "zod";
1617
- var OpenAIProviderOptionsSchema = z6.object({
1618
- include: z6.array(z6.string()).optional().describe(
1619
- "Additional information to include in the transcription response."
1620
- ),
1621
- language: z6.string().optional().describe("The language of the input audio in ISO-639-1 format."),
1622
- prompt: z6.string().optional().describe(
1623
- "An optional text to guide the model's style or continue a previous audio segment."
1624
- ),
1625
- temperature: z6.number().min(0).max(1).optional().default(0).describe("The sampling temperature, between 0 and 1."),
1626
- timestampGranularities: z6.array(z6.enum(["word", "segment"])).optional().default(["segment"]).describe(
1627
- "The timestamp granularities to populate for this transcription."
1628
- )
1626
+ var openAIProviderOptionsSchema = z6.object({
1627
+ include: z6.array(z6.string()).nullish(),
1628
+ language: z6.string().nullish(),
1629
+ prompt: z6.string().nullish(),
1630
+ temperature: z6.number().min(0).max(1).nullish().default(0),
1631
+ timestampGranularities: z6.array(z6.enum(["word", "segment"])).nullish().default(["segment"])
1629
1632
  });
1630
1633
  var languageMap = {
1631
1634
  afrikaans: "af",
@@ -1700,11 +1703,12 @@ var OpenAITranscriptionModel = class {
1700
1703
  mediaType,
1701
1704
  providerOptions
1702
1705
  }) {
1706
+ var _a, _b, _c, _d, _e;
1703
1707
  const warnings = [];
1704
1708
  const openAIOptions = parseProviderOptions({
1705
1709
  provider: "openai",
1706
1710
  providerOptions,
1707
- schema: OpenAIProviderOptionsSchema
1711
+ schema: openAIProviderOptionsSchema
1708
1712
  });
1709
1713
  const formData = new FormData();
1710
1714
  const blob = audio instanceof Uint8Array ? new Blob([audio]) : new Blob([convertBase64ToUint8Array(audio)]);
@@ -1712,16 +1716,16 @@ var OpenAITranscriptionModel = class {
1712
1716
  formData.append("file", new File([blob], "audio", { type: mediaType }));
1713
1717
  if (openAIOptions) {
1714
1718
  const transcriptionModelOptions = {
1715
- include: openAIOptions.include,
1716
- language: openAIOptions.language,
1717
- prompt: openAIOptions.prompt,
1718
- temperature: openAIOptions.temperature,
1719
- timestamp_granularities: openAIOptions.timestampGranularities
1719
+ include: (_a = openAIOptions.include) != null ? _a : void 0,
1720
+ language: (_b = openAIOptions.language) != null ? _b : void 0,
1721
+ prompt: (_c = openAIOptions.prompt) != null ? _c : void 0,
1722
+ temperature: (_d = openAIOptions.temperature) != null ? _d : void 0,
1723
+ timestamp_granularities: (_e = openAIOptions.timestampGranularities) != null ? _e : void 0
1720
1724
  };
1721
1725
  for (const key in transcriptionModelOptions) {
1722
1726
  const value = transcriptionModelOptions[key];
1723
1727
  if (value !== void 0) {
1724
- formData.append(key, value);
1728
+ formData.append(key, String(value));
1725
1729
  }
1726
1730
  }
1727
1731
  }
@@ -1785,16 +1789,120 @@ var openaiTranscriptionResponseSchema = z6.object({
1785
1789
  ).nullish()
1786
1790
  });
1787
1791
 
1788
- // src/responses/openai-responses-language-model.ts
1792
+ // src/openai-speech-model.ts
1789
1793
  import {
1790
1794
  combineHeaders as combineHeaders6,
1791
- createEventSourceResponseHandler as createEventSourceResponseHandler3,
1792
- createJsonResponseHandler as createJsonResponseHandler6,
1793
- generateId as generateId2,
1795
+ createBinaryResponseHandler,
1794
1796
  parseProviderOptions as parseProviderOptions2,
1795
1797
  postJsonToApi as postJsonToApi5
1796
1798
  } from "@ai-sdk/provider-utils";
1797
1799
  import { z as z7 } from "zod";
1800
+ var OpenAIProviderOptionsSchema = z7.object({
1801
+ instructions: z7.string().nullish(),
1802
+ speed: z7.number().min(0.25).max(4).default(1).nullish()
1803
+ });
1804
+ var OpenAISpeechModel = class {
1805
+ constructor(modelId, config) {
1806
+ this.modelId = modelId;
1807
+ this.config = config;
1808
+ this.specificationVersion = "v1";
1809
+ }
1810
+ get provider() {
1811
+ return this.config.provider;
1812
+ }
1813
+ getArgs({
1814
+ text,
1815
+ voice = "alloy",
1816
+ outputFormat = "mp3",
1817
+ speed,
1818
+ instructions,
1819
+ providerOptions
1820
+ }) {
1821
+ const warnings = [];
1822
+ const openAIOptions = parseProviderOptions2({
1823
+ provider: "openai",
1824
+ providerOptions,
1825
+ schema: OpenAIProviderOptionsSchema
1826
+ });
1827
+ const requestBody = {
1828
+ model: this.modelId,
1829
+ input: text,
1830
+ voice,
1831
+ response_format: "mp3",
1832
+ speed,
1833
+ instructions
1834
+ };
1835
+ if (outputFormat) {
1836
+ if (["mp3", "opus", "aac", "flac", "wav", "pcm"].includes(outputFormat)) {
1837
+ requestBody.response_format = outputFormat;
1838
+ } else {
1839
+ warnings.push({
1840
+ type: "unsupported-setting",
1841
+ setting: "outputFormat",
1842
+ details: `Unsupported output format: ${outputFormat}. Using mp3 instead.`
1843
+ });
1844
+ }
1845
+ }
1846
+ if (openAIOptions) {
1847
+ const speechModelOptions = {};
1848
+ for (const key in speechModelOptions) {
1849
+ const value = speechModelOptions[key];
1850
+ if (value !== void 0) {
1851
+ requestBody[key] = value;
1852
+ }
1853
+ }
1854
+ }
1855
+ return {
1856
+ requestBody,
1857
+ warnings
1858
+ };
1859
+ }
1860
+ async doGenerate(options) {
1861
+ var _a, _b, _c;
1862
+ const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
1863
+ const { requestBody, warnings } = this.getArgs(options);
1864
+ const {
1865
+ value: audio,
1866
+ responseHeaders,
1867
+ rawValue: rawResponse
1868
+ } = await postJsonToApi5({
1869
+ url: this.config.url({
1870
+ path: "/audio/speech",
1871
+ modelId: this.modelId
1872
+ }),
1873
+ headers: combineHeaders6(this.config.headers(), options.headers),
1874
+ body: requestBody,
1875
+ failedResponseHandler: openaiFailedResponseHandler,
1876
+ successfulResponseHandler: createBinaryResponseHandler(),
1877
+ abortSignal: options.abortSignal,
1878
+ fetch: this.config.fetch
1879
+ });
1880
+ return {
1881
+ audio,
1882
+ warnings,
1883
+ request: {
1884
+ body: JSON.stringify(requestBody)
1885
+ },
1886
+ response: {
1887
+ timestamp: currentDate,
1888
+ modelId: this.modelId,
1889
+ headers: responseHeaders,
1890
+ body: rawResponse
1891
+ }
1892
+ };
1893
+ }
1894
+ };
1895
+
1896
+ // src/responses/openai-responses-language-model.ts
1897
+ import {
1898
+ combineHeaders as combineHeaders7,
1899
+ createEventSourceResponseHandler as createEventSourceResponseHandler3,
1900
+ createJsonResponseHandler as createJsonResponseHandler6,
1901
+ generateId as generateId2,
1902
+ parseProviderOptions as parseProviderOptions3,
1903
+ postJsonToApi as postJsonToApi6
1904
+ } from "@ai-sdk/provider-utils";
1905
+ import { z as z8 } from "zod";
1798
1906
 
1799
1907
  // src/responses/convert-to-openai-responses-messages.ts
1800
1908
  import {
@@ -2085,7 +2193,7 @@ var OpenAIResponsesLanguageModel = class {
2085
2193
  systemMessageMode: modelConfig.systemMessageMode
2086
2194
  });
2087
2195
  warnings.push(...messageWarnings);
2088
- const openaiOptions = parseProviderOptions2({
2196
+ const openaiOptions = parseProviderOptions3({
2089
2197
  provider: "openai",
2090
2198
  providerOptions: providerMetadata,
2091
2199
  schema: openaiResponsesProviderOptionsSchema
@@ -2205,58 +2313,58 @@ var OpenAIResponsesLanguageModel = class {
2205
2313
  responseHeaders,
2206
2314
  value: response,
2207
2315
  rawValue: rawResponse
2208
- } = await postJsonToApi5({
2316
+ } = await postJsonToApi6({
2209
2317
  url: this.config.url({
2210
2318
  path: "/responses",
2211
2319
  modelId: this.modelId
2212
2320
  }),
2213
- headers: combineHeaders6(this.config.headers(), options.headers),
2321
+ headers: combineHeaders7(this.config.headers(), options.headers),
2214
2322
  body,
2215
2323
  failedResponseHandler: openaiFailedResponseHandler,
2216
2324
  successfulResponseHandler: createJsonResponseHandler6(
2217
- z7.object({
2218
- id: z7.string(),
2219
- created_at: z7.number(),
2220
- model: z7.string(),
2221
- output: z7.array(
2222
- z7.discriminatedUnion("type", [
2223
- z7.object({
2224
- type: z7.literal("message"),
2225
- role: z7.literal("assistant"),
2226
- content: z7.array(
2227
- z7.object({
2228
- type: z7.literal("output_text"),
2229
- text: z7.string(),
2230
- annotations: z7.array(
2231
- z7.object({
2232
- type: z7.literal("url_citation"),
2233
- start_index: z7.number(),
2234
- end_index: z7.number(),
2235
- url: z7.string(),
2236
- title: z7.string()
2325
+ z8.object({
2326
+ id: z8.string(),
2327
+ created_at: z8.number(),
2328
+ model: z8.string(),
2329
+ output: z8.array(
2330
+ z8.discriminatedUnion("type", [
2331
+ z8.object({
2332
+ type: z8.literal("message"),
2333
+ role: z8.literal("assistant"),
2334
+ content: z8.array(
2335
+ z8.object({
2336
+ type: z8.literal("output_text"),
2337
+ text: z8.string(),
2338
+ annotations: z8.array(
2339
+ z8.object({
2340
+ type: z8.literal("url_citation"),
2341
+ start_index: z8.number(),
2342
+ end_index: z8.number(),
2343
+ url: z8.string(),
2344
+ title: z8.string()
2237
2345
  })
2238
2346
  )
2239
2347
  })
2240
2348
  )
2241
2349
  }),
2242
- z7.object({
2243
- type: z7.literal("function_call"),
2244
- call_id: z7.string(),
2245
- name: z7.string(),
2246
- arguments: z7.string()
2350
+ z8.object({
2351
+ type: z8.literal("function_call"),
2352
+ call_id: z8.string(),
2353
+ name: z8.string(),
2354
+ arguments: z8.string()
2247
2355
  }),
2248
- z7.object({
2249
- type: z7.literal("web_search_call")
2356
+ z8.object({
2357
+ type: z8.literal("web_search_call")
2250
2358
  }),
2251
- z7.object({
2252
- type: z7.literal("computer_call")
2359
+ z8.object({
2360
+ type: z8.literal("computer_call")
2253
2361
  }),
2254
- z7.object({
2255
- type: z7.literal("reasoning")
2362
+ z8.object({
2363
+ type: z8.literal("reasoning")
2256
2364
  })
2257
2365
  ])
2258
2366
  ),
2259
- incomplete_details: z7.object({ reason: z7.string() }).nullable(),
2367
+ incomplete_details: z8.object({ reason: z8.string() }).nullable(),
2260
2368
  usage: usageSchema
2261
2369
  })
2262
2370
  ),
@@ -2320,12 +2428,12 @@ var OpenAIResponsesLanguageModel = class {
2320
2428
  }
2321
2429
  async doStream(options) {
2322
2430
  const { args: body, warnings } = this.getArgs(options);
2323
- const { responseHeaders, value: response } = await postJsonToApi5({
2431
+ const { responseHeaders, value: response } = await postJsonToApi6({
2324
2432
  url: this.config.url({
2325
2433
  path: "/responses",
2326
2434
  modelId: this.modelId
2327
2435
  }),
2328
- headers: combineHeaders6(this.config.headers(), options.headers),
2436
+ headers: combineHeaders7(this.config.headers(), options.headers),
2329
2437
  body: {
2330
2438
  ...body,
2331
2439
  stream: true
@@ -2454,79 +2562,79 @@ var OpenAIResponsesLanguageModel = class {
2454
2562
  };
2455
2563
  }
2456
2564
  };
2457
- var usageSchema = z7.object({
2458
- input_tokens: z7.number(),
2459
- input_tokens_details: z7.object({ cached_tokens: z7.number().nullish() }).nullish(),
2460
- output_tokens: z7.number(),
2461
- output_tokens_details: z7.object({ reasoning_tokens: z7.number().nullish() }).nullish()
2565
+ var usageSchema = z8.object({
2566
+ input_tokens: z8.number(),
2567
+ input_tokens_details: z8.object({ cached_tokens: z8.number().nullish() }).nullish(),
2568
+ output_tokens: z8.number(),
2569
+ output_tokens_details: z8.object({ reasoning_tokens: z8.number().nullish() }).nullish()
2462
2570
  });
2463
- var textDeltaChunkSchema = z7.object({
2464
- type: z7.literal("response.output_text.delta"),
2465
- delta: z7.string()
2571
+ var textDeltaChunkSchema = z8.object({
2572
+ type: z8.literal("response.output_text.delta"),
2573
+ delta: z8.string()
2466
2574
  });
2467
- var responseFinishedChunkSchema = z7.object({
2468
- type: z7.enum(["response.completed", "response.incomplete"]),
2469
- response: z7.object({
2470
- incomplete_details: z7.object({ reason: z7.string() }).nullish(),
2575
+ var responseFinishedChunkSchema = z8.object({
2576
+ type: z8.enum(["response.completed", "response.incomplete"]),
2577
+ response: z8.object({
2578
+ incomplete_details: z8.object({ reason: z8.string() }).nullish(),
2471
2579
  usage: usageSchema
2472
2580
  })
2473
2581
  });
2474
- var responseCreatedChunkSchema = z7.object({
2475
- type: z7.literal("response.created"),
2476
- response: z7.object({
2477
- id: z7.string(),
2478
- created_at: z7.number(),
2479
- model: z7.string()
2582
+ var responseCreatedChunkSchema = z8.object({
2583
+ type: z8.literal("response.created"),
2584
+ response: z8.object({
2585
+ id: z8.string(),
2586
+ created_at: z8.number(),
2587
+ model: z8.string()
2480
2588
  })
2481
2589
  });
2482
- var responseOutputItemDoneSchema = z7.object({
2483
- type: z7.literal("response.output_item.done"),
2484
- output_index: z7.number(),
2485
- item: z7.discriminatedUnion("type", [
2486
- z7.object({
2487
- type: z7.literal("message")
2590
+ var responseOutputItemDoneSchema = z8.object({
2591
+ type: z8.literal("response.output_item.done"),
2592
+ output_index: z8.number(),
2593
+ item: z8.discriminatedUnion("type", [
2594
+ z8.object({
2595
+ type: z8.literal("message")
2488
2596
  }),
2489
- z7.object({
2490
- type: z7.literal("function_call"),
2491
- id: z7.string(),
2492
- call_id: z7.string(),
2493
- name: z7.string(),
2494
- arguments: z7.string(),
2495
- status: z7.literal("completed")
2597
+ z8.object({
2598
+ type: z8.literal("function_call"),
2599
+ id: z8.string(),
2600
+ call_id: z8.string(),
2601
+ name: z8.string(),
2602
+ arguments: z8.string(),
2603
+ status: z8.literal("completed")
2496
2604
  })
2497
2605
  ])
2498
2606
  });
2499
- var responseFunctionCallArgumentsDeltaSchema = z7.object({
2500
- type: z7.literal("response.function_call_arguments.delta"),
2501
- item_id: z7.string(),
2502
- output_index: z7.number(),
2503
- delta: z7.string()
2607
+ var responseFunctionCallArgumentsDeltaSchema = z8.object({
2608
+ type: z8.literal("response.function_call_arguments.delta"),
2609
+ item_id: z8.string(),
2610
+ output_index: z8.number(),
2611
+ delta: z8.string()
2504
2612
  });
2505
- var responseOutputItemAddedSchema = z7.object({
2506
- type: z7.literal("response.output_item.added"),
2507
- output_index: z7.number(),
2508
- item: z7.discriminatedUnion("type", [
2509
- z7.object({
2510
- type: z7.literal("message")
2613
+ var responseOutputItemAddedSchema = z8.object({
2614
+ type: z8.literal("response.output_item.added"),
2615
+ output_index: z8.number(),
2616
+ item: z8.discriminatedUnion("type", [
2617
+ z8.object({
2618
+ type: z8.literal("message")
2511
2619
  }),
2512
- z7.object({
2513
- type: z7.literal("function_call"),
2514
- id: z7.string(),
2515
- call_id: z7.string(),
2516
- name: z7.string(),
2517
- arguments: z7.string()
2620
+ z8.object({
2621
+ type: z8.literal("function_call"),
2622
+ id: z8.string(),
2623
+ call_id: z8.string(),
2624
+ name: z8.string(),
2625
+ arguments: z8.string()
2518
2626
  })
2519
2627
  ])
2520
2628
  });
2521
- var responseAnnotationAddedSchema = z7.object({
2522
- type: z7.literal("response.output_text.annotation.added"),
2523
- annotation: z7.object({
2524
- type: z7.literal("url_citation"),
2525
- url: z7.string(),
2526
- title: z7.string()
2629
+ var responseAnnotationAddedSchema = z8.object({
2630
+ type: z8.literal("response.output_text.annotation.added"),
2631
+ annotation: z8.object({
2632
+ type: z8.literal("url_citation"),
2633
+ url: z8.string(),
2634
+ title: z8.string()
2527
2635
  })
2528
2636
  });
2529
- var openaiResponsesChunkSchema = z7.union([
2637
+ var openaiResponsesChunkSchema = z8.union([
2530
2638
  textDeltaChunkSchema,
2531
2639
  responseFinishedChunkSchema,
2532
2640
  responseCreatedChunkSchema,
@@ -2534,7 +2642,7 @@ var openaiResponsesChunkSchema = z7.union([
2534
2642
  responseFunctionCallArgumentsDeltaSchema,
2535
2643
  responseOutputItemAddedSchema,
2536
2644
  responseAnnotationAddedSchema,
2537
- z7.object({ type: z7.string() }).passthrough()
2645
+ z8.object({ type: z8.string() }).passthrough()
2538
2646
  // fallback for unknown chunks
2539
2647
  ]);
2540
2648
  function isTextDeltaChunk(chunk) {
@@ -2579,15 +2687,15 @@ function getResponsesModelConfig(modelId) {
2579
2687
  requiredAutoTruncation: false
2580
2688
  };
2581
2689
  }
2582
- var openaiResponsesProviderOptionsSchema = z7.object({
2583
- metadata: z7.any().nullish(),
2584
- parallelToolCalls: z7.boolean().nullish(),
2585
- previousResponseId: z7.string().nullish(),
2586
- store: z7.boolean().nullish(),
2587
- user: z7.string().nullish(),
2588
- reasoningEffort: z7.string().nullish(),
2589
- strictSchemas: z7.boolean().nullish(),
2590
- instructions: z7.string().nullish()
2690
+ var openaiResponsesProviderOptionsSchema = z8.object({
2691
+ metadata: z8.any().nullish(),
2692
+ parallelToolCalls: z8.boolean().nullish(),
2693
+ previousResponseId: z8.string().nullish(),
2694
+ store: z8.boolean().nullish(),
2695
+ user: z8.string().nullish(),
2696
+ reasoningEffort: z8.string().nullish(),
2697
+ strictSchemas: z8.boolean().nullish(),
2698
+ instructions: z8.string().nullish()
2591
2699
  });
2592
2700
  export {
2593
2701
  OpenAIChatLanguageModel,
@@ -2595,6 +2703,7 @@ export {
2595
2703
  OpenAIEmbeddingModel,
2596
2704
  OpenAIImageModel,
2597
2705
  OpenAIResponsesLanguageModel,
2706
+ OpenAISpeechModel,
2598
2707
  OpenAITranscriptionModel,
2599
2708
  modelMaxImagesPerCall
2600
2709
  };