@ai-sdk/openai 1.3.8 → 1.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,22 @@
1
1
  # @ai-sdk/openai
2
2
 
3
+ ## 1.3.10
4
+
5
+ ### Patch Changes
6
+
7
+ - dbe53e7: adding support for gpt-4o-search-preview and handling unsupported parameters
8
+ - 84ffaba: fix: propagate openai transcription fixes
9
+
10
+ ## 1.3.9
11
+
12
+ ### Patch Changes
13
+
14
+ - 013faa8: core (ai): change transcription model mimeType to mediaType
15
+ - 013faa8: fix (provider/openai): increase transcription model resilience
16
+ - Updated dependencies [013faa8]
17
+ - @ai-sdk/provider@1.1.2
18
+ - @ai-sdk/provider-utils@2.2.6
19
+
3
20
  ## 1.3.8
4
21
 
5
22
  ### Patch Changes
package/dist/index.d.mts CHANGED
@@ -2,7 +2,7 @@ import { LanguageModelV1, ProviderV1, EmbeddingModelV1, ImageModelV1, Transcript
2
2
  import { FetchFunction } from '@ai-sdk/provider-utils';
3
3
  import { z } from 'zod';
4
4
 
5
- type OpenAIChatModelId = 'o1' | 'o1-2024-12-17' | 'o1-mini' | 'o1-mini-2024-09-12' | 'o1-preview' | 'o1-preview-2024-09-12' | 'o3-mini' | 'o3-mini-2025-01-31' | 'gpt-4o' | 'gpt-4o-2024-05-13' | 'gpt-4o-2024-08-06' | 'gpt-4o-2024-11-20' | 'gpt-4o-audio-preview' | 'gpt-4o-audio-preview-2024-10-01' | 'gpt-4o-audio-preview-2024-12-17' | 'gpt-4o-mini' | 'gpt-4o-mini-2024-07-18' | 'gpt-4-turbo' | 'gpt-4-turbo-2024-04-09' | 'gpt-4-turbo-preview' | 'gpt-4-0125-preview' | 'gpt-4-1106-preview' | 'gpt-4' | 'gpt-4-0613' | 'gpt-4.5-preview' | 'gpt-4.5-preview-2025-02-27' | 'gpt-3.5-turbo-0125' | 'gpt-3.5-turbo' | 'gpt-3.5-turbo-1106' | 'chatgpt-4o-latest' | (string & {});
5
+ type OpenAIChatModelId = 'o1' | 'o1-2024-12-17' | 'o1-mini' | 'o1-mini-2024-09-12' | 'o1-preview' | 'o1-preview-2024-09-12' | 'o3-mini' | 'o3-mini-2025-01-31' | 'gpt-4o' | 'gpt-4o-2024-05-13' | 'gpt-4o-2024-08-06' | 'gpt-4o-2024-11-20' | 'gpt-4o-audio-preview' | 'gpt-4o-audio-preview-2024-10-01' | 'gpt-4o-audio-preview-2024-12-17' | 'gpt-4o-search-preview' | 'gpt-4o-search-preview-2025-03-11' | 'gpt-4o-mini' | 'gpt-4o-mini-2024-07-18' | 'gpt-4-turbo' | 'gpt-4-turbo-2024-04-09' | 'gpt-4-turbo-preview' | 'gpt-4-0125-preview' | 'gpt-4-1106-preview' | 'gpt-4' | 'gpt-4-0613' | 'gpt-4.5-preview' | 'gpt-4.5-preview-2025-02-27' | 'gpt-3.5-turbo-0125' | 'gpt-3.5-turbo' | 'gpt-3.5-turbo-1106' | 'chatgpt-4o-latest' | (string & {});
6
6
  interface OpenAIChatSettings {
7
7
  /**
8
8
  Modify the likelihood of specified tokens appearing in the completion.
package/dist/index.d.ts CHANGED
@@ -2,7 +2,7 @@ import { LanguageModelV1, ProviderV1, EmbeddingModelV1, ImageModelV1, Transcript
2
2
  import { FetchFunction } from '@ai-sdk/provider-utils';
3
3
  import { z } from 'zod';
4
4
 
5
- type OpenAIChatModelId = 'o1' | 'o1-2024-12-17' | 'o1-mini' | 'o1-mini-2024-09-12' | 'o1-preview' | 'o1-preview-2024-09-12' | 'o3-mini' | 'o3-mini-2025-01-31' | 'gpt-4o' | 'gpt-4o-2024-05-13' | 'gpt-4o-2024-08-06' | 'gpt-4o-2024-11-20' | 'gpt-4o-audio-preview' | 'gpt-4o-audio-preview-2024-10-01' | 'gpt-4o-audio-preview-2024-12-17' | 'gpt-4o-mini' | 'gpt-4o-mini-2024-07-18' | 'gpt-4-turbo' | 'gpt-4-turbo-2024-04-09' | 'gpt-4-turbo-preview' | 'gpt-4-0125-preview' | 'gpt-4-1106-preview' | 'gpt-4' | 'gpt-4-0613' | 'gpt-4.5-preview' | 'gpt-4.5-preview-2025-02-27' | 'gpt-3.5-turbo-0125' | 'gpt-3.5-turbo' | 'gpt-3.5-turbo-1106' | 'chatgpt-4o-latest' | (string & {});
5
+ type OpenAIChatModelId = 'o1' | 'o1-2024-12-17' | 'o1-mini' | 'o1-mini-2024-09-12' | 'o1-preview' | 'o1-preview-2024-09-12' | 'o3-mini' | 'o3-mini-2025-01-31' | 'gpt-4o' | 'gpt-4o-2024-05-13' | 'gpt-4o-2024-08-06' | 'gpt-4o-2024-11-20' | 'gpt-4o-audio-preview' | 'gpt-4o-audio-preview-2024-10-01' | 'gpt-4o-audio-preview-2024-12-17' | 'gpt-4o-search-preview' | 'gpt-4o-search-preview-2025-03-11' | 'gpt-4o-mini' | 'gpt-4o-mini-2024-07-18' | 'gpt-4-turbo' | 'gpt-4-turbo-2024-04-09' | 'gpt-4-turbo-preview' | 'gpt-4-0125-preview' | 'gpt-4-1106-preview' | 'gpt-4' | 'gpt-4-0613' | 'gpt-4.5-preview' | 'gpt-4.5-preview-2025-02-27' | 'gpt-3.5-turbo-0125' | 'gpt-3.5-turbo' | 'gpt-3.5-turbo-1106' | 'chatgpt-4o-latest' | (string & {});
6
6
  interface OpenAIChatSettings {
7
7
  /**
8
8
  Modify the likelihood of specified tokens appearing in the completion.
package/dist/index.js CHANGED
@@ -536,6 +536,15 @@ var OpenAIChatLanguageModel = class {
536
536
  }
537
537
  baseArgs.max_tokens = void 0;
538
538
  }
539
+ } else if (this.modelId.startsWith("gpt-4o-search-preview")) {
540
+ if (baseArgs.temperature != null) {
541
+ baseArgs.temperature = void 0;
542
+ warnings.push({
543
+ type: "unsupported-setting",
544
+ setting: "temperature",
545
+ details: "temperature is not supported for the gpt-4o-search-preview model and has been removed."
546
+ });
547
+ }
539
548
  }
540
549
  switch (type) {
541
550
  case "regular": {
@@ -1605,17 +1614,11 @@ var openaiImageResponseSchema = import_zod5.z.object({
1605
1614
  var import_provider_utils7 = require("@ai-sdk/provider-utils");
1606
1615
  var import_zod6 = require("zod");
1607
1616
  var OpenAIProviderOptionsSchema = import_zod6.z.object({
1608
- include: import_zod6.z.array(import_zod6.z.string()).optional().describe(
1609
- "Additional information to include in the transcription response."
1610
- ),
1611
- language: import_zod6.z.string().optional().describe("The language of the input audio in ISO-639-1 format."),
1612
- prompt: import_zod6.z.string().optional().describe(
1613
- "An optional text to guide the model's style or continue a previous audio segment."
1614
- ),
1615
- temperature: import_zod6.z.number().min(0).max(1).optional().default(0).describe("The sampling temperature, between 0 and 1."),
1616
- timestampGranularities: import_zod6.z.array(import_zod6.z.enum(["word", "segment"])).optional().default(["segment"]).describe(
1617
- "The timestamp granularities to populate for this transcription."
1618
- )
1617
+ include: import_zod6.z.array(import_zod6.z.string()).nullish(),
1618
+ language: import_zod6.z.string().nullish(),
1619
+ prompt: import_zod6.z.string().nullish(),
1620
+ temperature: import_zod6.z.number().min(0).max(1).nullish().default(0),
1621
+ timestampGranularities: import_zod6.z.array(import_zod6.z.enum(["word", "segment"])).nullish().default(["segment"])
1619
1622
  });
1620
1623
  var languageMap = {
1621
1624
  afrikaans: "af",
@@ -1687,9 +1690,10 @@ var OpenAITranscriptionModel = class {
1687
1690
  }
1688
1691
  getArgs({
1689
1692
  audio,
1690
- mimeType,
1693
+ mediaType,
1691
1694
  providerOptions
1692
1695
  }) {
1696
+ var _a, _b, _c, _d, _e;
1693
1697
  const warnings = [];
1694
1698
  const openAIOptions = (0, import_provider_utils7.parseProviderOptions)({
1695
1699
  provider: "openai",
@@ -1699,19 +1703,19 @@ var OpenAITranscriptionModel = class {
1699
1703
  const formData = new FormData();
1700
1704
  const blob = audio instanceof Uint8Array ? new Blob([audio]) : new Blob([(0, import_provider_utils7.convertBase64ToUint8Array)(audio)]);
1701
1705
  formData.append("model", this.modelId);
1702
- formData.append("file", new File([blob], "audio", { type: mimeType }));
1706
+ formData.append("file", new File([blob], "audio", { type: mediaType }));
1703
1707
  if (openAIOptions) {
1704
1708
  const transcriptionModelOptions = {
1705
- include: openAIOptions.include,
1706
- language: openAIOptions.language,
1707
- prompt: openAIOptions.prompt,
1708
- temperature: openAIOptions.temperature,
1709
- timestamp_granularities: openAIOptions.timestampGranularities
1709
+ include: (_a = openAIOptions.include) != null ? _a : void 0,
1710
+ language: (_b = openAIOptions.language) != null ? _b : void 0,
1711
+ prompt: (_c = openAIOptions.prompt) != null ? _c : void 0,
1712
+ temperature: (_d = openAIOptions.temperature) != null ? _d : void 0,
1713
+ timestamp_granularities: (_e = openAIOptions.timestampGranularities) != null ? _e : void 0
1710
1714
  };
1711
1715
  for (const key in transcriptionModelOptions) {
1712
1716
  const value = transcriptionModelOptions[key];
1713
1717
  if (value !== void 0) {
1714
- formData.append(key, value);
1718
+ formData.append(key, String(value));
1715
1719
  }
1716
1720
  }
1717
1721
  }
@@ -1721,10 +1725,14 @@ var OpenAITranscriptionModel = class {
1721
1725
  };
1722
1726
  }
1723
1727
  async doGenerate(options) {
1724
- var _a, _b, _c;
1728
+ var _a, _b, _c, _d, _e, _f;
1725
1729
  const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
1726
1730
  const { formData, warnings } = this.getArgs(options);
1727
- const { value: response, responseHeaders } = await (0, import_provider_utils7.postFormDataToApi)({
1731
+ const {
1732
+ value: response,
1733
+ responseHeaders,
1734
+ rawValue: rawResponse
1735
+ } = await (0, import_provider_utils7.postFormDataToApi)({
1728
1736
  url: this.config.url({
1729
1737
  path: "/audio/transcriptions",
1730
1738
  modelId: this.modelId
@@ -1738,46 +1746,37 @@ var OpenAITranscriptionModel = class {
1738
1746
  abortSignal: options.abortSignal,
1739
1747
  fetch: this.config.fetch
1740
1748
  });
1741
- let language;
1742
- if (response.language && response.language in languageMap) {
1743
- language = languageMap[response.language];
1744
- }
1749
+ const language = response.language != null && response.language in languageMap ? languageMap[response.language] : void 0;
1745
1750
  return {
1746
1751
  text: response.text,
1747
- segments: response.words.map((word) => ({
1752
+ segments: (_e = (_d = response.words) == null ? void 0 : _d.map((word) => ({
1748
1753
  text: word.word,
1749
1754
  startSecond: word.start,
1750
1755
  endSecond: word.end
1751
- })),
1756
+ }))) != null ? _e : [],
1752
1757
  language,
1753
- durationInSeconds: response.duration,
1758
+ durationInSeconds: (_f = response.duration) != null ? _f : void 0,
1754
1759
  warnings,
1755
1760
  response: {
1756
1761
  timestamp: currentDate,
1757
1762
  modelId: this.modelId,
1758
1763
  headers: responseHeaders,
1759
- body: response
1760
- },
1761
- // When using format `verbose_json` on `whisper-1`, OpenAI includes the things like `task` and enhanced `segments` information.
1762
- providerMetadata: {
1763
- openai: {
1764
- transcript: response
1765
- }
1764
+ body: rawResponse
1766
1765
  }
1767
1766
  };
1768
1767
  }
1769
1768
  };
1770
1769
  var openaiTranscriptionResponseSchema = import_zod6.z.object({
1771
1770
  text: import_zod6.z.string(),
1772
- language: import_zod6.z.string().optional(),
1773
- duration: import_zod6.z.number().optional(),
1771
+ language: import_zod6.z.string().nullish(),
1772
+ duration: import_zod6.z.number().nullish(),
1774
1773
  words: import_zod6.z.array(
1775
1774
  import_zod6.z.object({
1776
1775
  word: import_zod6.z.string(),
1777
1776
  start: import_zod6.z.number(),
1778
1777
  end: import_zod6.z.number()
1779
1778
  })
1780
- )
1779
+ ).nullish()
1781
1780
  });
1782
1781
 
1783
1782
  // src/responses/openai-responses-language-model.ts