@ai-sdk/deepgram 2.0.0-beta.33 → 2.0.0-beta.34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/README.md +16 -2
- package/dist/index.d.mts +38 -2
- package/dist/index.d.ts +38 -2
- package/dist/index.js +395 -4
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +396 -1
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
# @ai-sdk/deepgram
|
|
2
2
|
|
|
3
|
+
## 2.0.0-beta.34
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- f118108: feat(deepgram): add text-to-speech support
|
|
8
|
+
|
|
9
|
+
Add text-to-speech support for Deepgram Aura models via the REST API. Supports all Aura voice models (aura-2-helena-en, aura-2-thalia-en, etc.) with proper audio format validation, encoding/container/sample_rate/bitrate combinations, and comprehensive parameter validation.
|
|
10
|
+
|
|
3
11
|
## 2.0.0-beta.33
|
|
4
12
|
|
|
5
13
|
### Patch Changes
|
package/README.md
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# AI SDK - Deepgram Provider
|
|
2
2
|
|
|
3
3
|
The **[Deepgram provider](https://ai-sdk.dev/providers/ai-sdk-providers/deepgram)** for the [AI SDK](https://ai-sdk.dev/docs)
|
|
4
|
-
contains transcription model support for the Deepgram transcription API.
|
|
4
|
+
contains transcription model support for the Deepgram transcription API and speech model support for the Deepgram text-to-speech API.
|
|
5
5
|
|
|
6
6
|
## Setup
|
|
7
7
|
|
|
@@ -19,7 +19,9 @@ You can import the default provider instance `deepgram` from `@ai-sdk/deepgram`:
|
|
|
19
19
|
import { deepgram } from '@ai-sdk/deepgram';
|
|
20
20
|
```
|
|
21
21
|
|
|
22
|
-
##
|
|
22
|
+
## Examples
|
|
23
|
+
|
|
24
|
+
### Transcription
|
|
23
25
|
|
|
24
26
|
```ts
|
|
25
27
|
import { deepgram } from '@ai-sdk/deepgram';
|
|
@@ -33,6 +35,18 @@ const { text } = await transcribe({
|
|
|
33
35
|
});
|
|
34
36
|
```
|
|
35
37
|
|
|
38
|
+
### Text-to-Speech
|
|
39
|
+
|
|
40
|
+
```ts
|
|
41
|
+
import { deepgram } from '@ai-sdk/deepgram';
|
|
42
|
+
import { experimental_generateSpeech as generateSpeech } from 'ai';
|
|
43
|
+
|
|
44
|
+
const { audio } = await generateSpeech({
|
|
45
|
+
model: deepgram.speech('aura-2-helena-en'),
|
|
46
|
+
text: 'Hello, welcome to Deepgram!',
|
|
47
|
+
});
|
|
48
|
+
```
|
|
49
|
+
|
|
36
50
|
## Documentation
|
|
37
51
|
|
|
38
52
|
Please check out the **[Deepgram provider documentation](https://ai-sdk.dev/providers/ai-sdk-providers/deepgram)** for more information.
|
package/dist/index.d.mts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
import { TranscriptionModelV3, ProviderV3 } from '@ai-sdk/provider';
|
|
1
|
+
import { TranscriptionModelV3, ProviderV3, SpeechModelV3 } from '@ai-sdk/provider';
|
|
2
2
|
import { FetchFunction } from '@ai-sdk/provider-utils';
|
|
3
|
+
import { z } from 'zod/v4';
|
|
3
4
|
|
|
4
5
|
type DeepgramConfig = {
|
|
5
6
|
provider: string;
|
|
@@ -29,6 +30,8 @@ declare class DeepgramTranscriptionModel implements TranscriptionModelV3 {
|
|
|
29
30
|
doGenerate(options: Parameters<TranscriptionModelV3['doGenerate']>[0]): Promise<Awaited<ReturnType<TranscriptionModelV3['doGenerate']>>>;
|
|
30
31
|
}
|
|
31
32
|
|
|
33
|
+
type DeepgramSpeechModelId = 'aura-asteria-en' | 'aura-2-asteria-en' | 'aura-2-thalia-en' | 'aura-2-helena-en' | 'aura-2-orpheus-en' | 'aura-2-zeus-en' | 'aura-luna-en' | 'aura-stella-en' | (string & {});
|
|
34
|
+
|
|
32
35
|
interface DeepgramProvider extends ProviderV3 {
|
|
33
36
|
(modelId: 'nova-3', settings?: {}): {
|
|
34
37
|
transcription: DeepgramTranscriptionModel;
|
|
@@ -37,6 +40,10 @@ interface DeepgramProvider extends ProviderV3 {
|
|
|
37
40
|
Creates a model for transcription.
|
|
38
41
|
*/
|
|
39
42
|
transcription(modelId: DeepgramTranscriptionModelId): TranscriptionModelV3;
|
|
43
|
+
/**
|
|
44
|
+
Creates a model for speech generation.
|
|
45
|
+
*/
|
|
46
|
+
speech(modelId: DeepgramSpeechModelId): SpeechModelV3;
|
|
40
47
|
}
|
|
41
48
|
interface DeepgramProviderSettings {
|
|
42
49
|
/**
|
|
@@ -62,6 +69,35 @@ Default Deepgram provider instance.
|
|
|
62
69
|
*/
|
|
63
70
|
declare const deepgram: DeepgramProvider;
|
|
64
71
|
|
|
72
|
+
declare const deepgramSpeechProviderOptionsSchema: z.ZodObject<{
|
|
73
|
+
bitRate: z.ZodOptional<z.ZodNullable<z.ZodUnion<readonly [z.ZodNumber, z.ZodString]>>>;
|
|
74
|
+
container: z.ZodOptional<z.ZodNullable<z.ZodString>>;
|
|
75
|
+
encoding: z.ZodOptional<z.ZodNullable<z.ZodString>>;
|
|
76
|
+
sampleRate: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
|
|
77
|
+
callback: z.ZodOptional<z.ZodNullable<z.ZodString>>;
|
|
78
|
+
callbackMethod: z.ZodOptional<z.ZodNullable<z.ZodEnum<{
|
|
79
|
+
POST: "POST";
|
|
80
|
+
PUT: "PUT";
|
|
81
|
+
}>>>;
|
|
82
|
+
mipOptOut: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
|
|
83
|
+
tag: z.ZodOptional<z.ZodNullable<z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>>>;
|
|
84
|
+
}, z.core.$strip>;
|
|
85
|
+
type DeepgramSpeechCallOptions = z.infer<typeof deepgramSpeechProviderOptionsSchema>;
|
|
86
|
+
interface DeepgramSpeechModelConfig extends DeepgramConfig {
|
|
87
|
+
_internal?: {
|
|
88
|
+
currentDate?: () => Date;
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
declare class DeepgramSpeechModel implements SpeechModelV3 {
|
|
92
|
+
readonly modelId: DeepgramSpeechModelId;
|
|
93
|
+
private readonly config;
|
|
94
|
+
readonly specificationVersion = "v3";
|
|
95
|
+
get provider(): string;
|
|
96
|
+
constructor(modelId: DeepgramSpeechModelId, config: DeepgramSpeechModelConfig);
|
|
97
|
+
private getArgs;
|
|
98
|
+
doGenerate(options: Parameters<SpeechModelV3['doGenerate']>[0]): Promise<Awaited<ReturnType<SpeechModelV3['doGenerate']>>>;
|
|
99
|
+
}
|
|
100
|
+
|
|
65
101
|
declare const VERSION: string;
|
|
66
102
|
|
|
67
|
-
export { type DeepgramProvider, type DeepgramProviderSettings, VERSION, createDeepgram, deepgram };
|
|
103
|
+
export { type DeepgramProvider, type DeepgramProviderSettings, type DeepgramSpeechCallOptions, DeepgramSpeechModel, type DeepgramSpeechModelId, VERSION, createDeepgram, deepgram };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
import { TranscriptionModelV3, ProviderV3 } from '@ai-sdk/provider';
|
|
1
|
+
import { TranscriptionModelV3, ProviderV3, SpeechModelV3 } from '@ai-sdk/provider';
|
|
2
2
|
import { FetchFunction } from '@ai-sdk/provider-utils';
|
|
3
|
+
import { z } from 'zod/v4';
|
|
3
4
|
|
|
4
5
|
type DeepgramConfig = {
|
|
5
6
|
provider: string;
|
|
@@ -29,6 +30,8 @@ declare class DeepgramTranscriptionModel implements TranscriptionModelV3 {
|
|
|
29
30
|
doGenerate(options: Parameters<TranscriptionModelV3['doGenerate']>[0]): Promise<Awaited<ReturnType<TranscriptionModelV3['doGenerate']>>>;
|
|
30
31
|
}
|
|
31
32
|
|
|
33
|
+
type DeepgramSpeechModelId = 'aura-asteria-en' | 'aura-2-asteria-en' | 'aura-2-thalia-en' | 'aura-2-helena-en' | 'aura-2-orpheus-en' | 'aura-2-zeus-en' | 'aura-luna-en' | 'aura-stella-en' | (string & {});
|
|
34
|
+
|
|
32
35
|
interface DeepgramProvider extends ProviderV3 {
|
|
33
36
|
(modelId: 'nova-3', settings?: {}): {
|
|
34
37
|
transcription: DeepgramTranscriptionModel;
|
|
@@ -37,6 +40,10 @@ interface DeepgramProvider extends ProviderV3 {
|
|
|
37
40
|
Creates a model for transcription.
|
|
38
41
|
*/
|
|
39
42
|
transcription(modelId: DeepgramTranscriptionModelId): TranscriptionModelV3;
|
|
43
|
+
/**
|
|
44
|
+
Creates a model for speech generation.
|
|
45
|
+
*/
|
|
46
|
+
speech(modelId: DeepgramSpeechModelId): SpeechModelV3;
|
|
40
47
|
}
|
|
41
48
|
interface DeepgramProviderSettings {
|
|
42
49
|
/**
|
|
@@ -62,6 +69,35 @@ Default Deepgram provider instance.
|
|
|
62
69
|
*/
|
|
63
70
|
declare const deepgram: DeepgramProvider;
|
|
64
71
|
|
|
72
|
+
declare const deepgramSpeechProviderOptionsSchema: z.ZodObject<{
|
|
73
|
+
bitRate: z.ZodOptional<z.ZodNullable<z.ZodUnion<readonly [z.ZodNumber, z.ZodString]>>>;
|
|
74
|
+
container: z.ZodOptional<z.ZodNullable<z.ZodString>>;
|
|
75
|
+
encoding: z.ZodOptional<z.ZodNullable<z.ZodString>>;
|
|
76
|
+
sampleRate: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
|
|
77
|
+
callback: z.ZodOptional<z.ZodNullable<z.ZodString>>;
|
|
78
|
+
callbackMethod: z.ZodOptional<z.ZodNullable<z.ZodEnum<{
|
|
79
|
+
POST: "POST";
|
|
80
|
+
PUT: "PUT";
|
|
81
|
+
}>>>;
|
|
82
|
+
mipOptOut: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
|
|
83
|
+
tag: z.ZodOptional<z.ZodNullable<z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>>>;
|
|
84
|
+
}, z.core.$strip>;
|
|
85
|
+
type DeepgramSpeechCallOptions = z.infer<typeof deepgramSpeechProviderOptionsSchema>;
|
|
86
|
+
interface DeepgramSpeechModelConfig extends DeepgramConfig {
|
|
87
|
+
_internal?: {
|
|
88
|
+
currentDate?: () => Date;
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
declare class DeepgramSpeechModel implements SpeechModelV3 {
|
|
92
|
+
readonly modelId: DeepgramSpeechModelId;
|
|
93
|
+
private readonly config;
|
|
94
|
+
readonly specificationVersion = "v3";
|
|
95
|
+
get provider(): string;
|
|
96
|
+
constructor(modelId: DeepgramSpeechModelId, config: DeepgramSpeechModelConfig);
|
|
97
|
+
private getArgs;
|
|
98
|
+
doGenerate(options: Parameters<SpeechModelV3['doGenerate']>[0]): Promise<Awaited<ReturnType<SpeechModelV3['doGenerate']>>>;
|
|
99
|
+
}
|
|
100
|
+
|
|
65
101
|
declare const VERSION: string;
|
|
66
102
|
|
|
67
|
-
export { type DeepgramProvider, type DeepgramProviderSettings, VERSION, createDeepgram, deepgram };
|
|
103
|
+
export { type DeepgramProvider, type DeepgramProviderSettings, type DeepgramSpeechCallOptions, DeepgramSpeechModel, type DeepgramSpeechModelId, VERSION, createDeepgram, deepgram };
|
package/dist/index.js
CHANGED
|
@@ -20,6 +20,7 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
20
20
|
// src/index.ts
|
|
21
21
|
var src_exports = {};
|
|
22
22
|
__export(src_exports, {
|
|
23
|
+
DeepgramSpeechModel: () => DeepgramSpeechModel,
|
|
23
24
|
VERSION: () => VERSION,
|
|
24
25
|
createDeepgram: () => createDeepgram,
|
|
25
26
|
deepgram: () => deepgram
|
|
@@ -28,7 +29,7 @@ module.exports = __toCommonJS(src_exports);
|
|
|
28
29
|
|
|
29
30
|
// src/deepgram-provider.ts
|
|
30
31
|
var import_provider = require("@ai-sdk/provider");
|
|
31
|
-
var
|
|
32
|
+
var import_provider_utils4 = require("@ai-sdk/provider-utils");
|
|
32
33
|
|
|
33
34
|
// src/deepgram-transcription-model.ts
|
|
34
35
|
var import_provider_utils2 = require("@ai-sdk/provider-utils");
|
|
@@ -206,14 +207,395 @@ var deepgramTranscriptionResponseSchema = import_v42.z.object({
|
|
|
206
207
|
}).nullish()
|
|
207
208
|
});
|
|
208
209
|
|
|
210
|
+
// src/deepgram-speech-model.ts
|
|
211
|
+
var import_provider_utils3 = require("@ai-sdk/provider-utils");
|
|
212
|
+
var import_v43 = require("zod/v4");
|
|
213
|
+
var deepgramSpeechProviderOptionsSchema = import_v43.z.object({
|
|
214
|
+
/** Bitrate of the audio in bits per second. Can be a number or predefined enum value. */
|
|
215
|
+
bitRate: import_v43.z.union([import_v43.z.number(), import_v43.z.string()]).nullish(),
|
|
216
|
+
/** Container format for the output audio (mp3, wav, etc.). */
|
|
217
|
+
container: import_v43.z.string().nullish(),
|
|
218
|
+
/** Encoding type for the audio output (linear16, mulaw, alaw, etc.). */
|
|
219
|
+
encoding: import_v43.z.string().nullish(),
|
|
220
|
+
/** Sample rate for the output audio in Hz (8000, 16000, 24000, 44100, 48000). */
|
|
221
|
+
sampleRate: import_v43.z.number().nullish(),
|
|
222
|
+
/** URL to which we'll make the callback request. */
|
|
223
|
+
callback: import_v43.z.string().url().nullish(),
|
|
224
|
+
/** HTTP method by which the callback request will be made (POST or PUT). */
|
|
225
|
+
callbackMethod: import_v43.z.enum(["POST", "PUT"]).nullish(),
|
|
226
|
+
/** Opts out requests from the Deepgram Model Improvement Program. */
|
|
227
|
+
mipOptOut: import_v43.z.boolean().nullish(),
|
|
228
|
+
/** Label your requests for the purpose of identification during usage reporting. */
|
|
229
|
+
tag: import_v43.z.union([import_v43.z.string(), import_v43.z.array(import_v43.z.string())]).nullish()
|
|
230
|
+
});
|
|
231
|
+
var DeepgramSpeechModel = class {
|
|
232
|
+
constructor(modelId, config) {
|
|
233
|
+
this.modelId = modelId;
|
|
234
|
+
this.config = config;
|
|
235
|
+
this.specificationVersion = "v3";
|
|
236
|
+
}
|
|
237
|
+
get provider() {
|
|
238
|
+
return this.config.provider;
|
|
239
|
+
}
|
|
240
|
+
async getArgs({
|
|
241
|
+
text,
|
|
242
|
+
voice,
|
|
243
|
+
outputFormat = "mp3",
|
|
244
|
+
speed,
|
|
245
|
+
language,
|
|
246
|
+
instructions,
|
|
247
|
+
providerOptions
|
|
248
|
+
}) {
|
|
249
|
+
var _a, _b, _c;
|
|
250
|
+
const warnings = [];
|
|
251
|
+
const deepgramOptions = await (0, import_provider_utils3.parseProviderOptions)({
|
|
252
|
+
provider: "deepgram",
|
|
253
|
+
providerOptions,
|
|
254
|
+
schema: deepgramSpeechProviderOptionsSchema
|
|
255
|
+
});
|
|
256
|
+
const requestBody = {
|
|
257
|
+
text
|
|
258
|
+
};
|
|
259
|
+
const queryParams = {
|
|
260
|
+
model: this.modelId
|
|
261
|
+
};
|
|
262
|
+
if (outputFormat) {
|
|
263
|
+
const formatLower = outputFormat.toLowerCase();
|
|
264
|
+
const formatMap = {
|
|
265
|
+
// MP3: no container, fixed 22050 sample rate, bitrate 32000/48000
|
|
266
|
+
mp3: { encoding: "mp3" },
|
|
267
|
+
// Don't set container or sample_rate for mp3
|
|
268
|
+
// Linear16: wav/none container, configurable sample rate
|
|
269
|
+
wav: { container: "wav", encoding: "linear16" },
|
|
270
|
+
linear16: { encoding: "linear16", container: "wav" },
|
|
271
|
+
// MuLaw: wav/none container, 8000/16000 sample rate
|
|
272
|
+
mulaw: { encoding: "mulaw", container: "wav" },
|
|
273
|
+
// ALaw: wav/none container, 8000/16000 sample rate
|
|
274
|
+
alaw: { encoding: "alaw", container: "wav" },
|
|
275
|
+
// Opus: ogg container, fixed 48000 sample rate
|
|
276
|
+
opus: { encoding: "opus", container: "ogg" },
|
|
277
|
+
ogg: { encoding: "opus", container: "ogg" },
|
|
278
|
+
// FLAC: no container, configurable sample rate
|
|
279
|
+
flac: { encoding: "flac" },
|
|
280
|
+
// AAC: no container, fixed 22050 sample rate
|
|
281
|
+
aac: { encoding: "aac" },
|
|
282
|
+
// Raw audio (no container)
|
|
283
|
+
pcm: { encoding: "linear16", container: "none" }
|
|
284
|
+
};
|
|
285
|
+
const mappedFormat = formatMap[formatLower];
|
|
286
|
+
if (mappedFormat) {
|
|
287
|
+
if (mappedFormat.encoding) {
|
|
288
|
+
queryParams.encoding = mappedFormat.encoding;
|
|
289
|
+
}
|
|
290
|
+
if (mappedFormat.container) {
|
|
291
|
+
queryParams.container = mappedFormat.container;
|
|
292
|
+
}
|
|
293
|
+
if (mappedFormat.sampleRate) {
|
|
294
|
+
queryParams.sample_rate = String(mappedFormat.sampleRate);
|
|
295
|
+
}
|
|
296
|
+
if (mappedFormat.bitRate) {
|
|
297
|
+
queryParams.bit_rate = String(mappedFormat.bitRate);
|
|
298
|
+
}
|
|
299
|
+
} else {
|
|
300
|
+
const parts = formatLower.split("_");
|
|
301
|
+
if (parts.length >= 2) {
|
|
302
|
+
const firstPart = parts[0];
|
|
303
|
+
const secondPart = parts[1];
|
|
304
|
+
const sampleRate = parseInt(secondPart, 10);
|
|
305
|
+
if ([
|
|
306
|
+
"linear16",
|
|
307
|
+
"mulaw",
|
|
308
|
+
"alaw",
|
|
309
|
+
"mp3",
|
|
310
|
+
"opus",
|
|
311
|
+
"flac",
|
|
312
|
+
"aac"
|
|
313
|
+
].includes(firstPart)) {
|
|
314
|
+
queryParams.encoding = firstPart;
|
|
315
|
+
if (["linear16", "mulaw", "alaw"].includes(firstPart)) {
|
|
316
|
+
queryParams.container = "wav";
|
|
317
|
+
} else if (firstPart === "opus") {
|
|
318
|
+
queryParams.container = "ogg";
|
|
319
|
+
}
|
|
320
|
+
if (!isNaN(sampleRate)) {
|
|
321
|
+
if (firstPart === "linear16" && [8e3, 16e3, 24e3, 32e3, 48e3].includes(sampleRate)) {
|
|
322
|
+
queryParams.sample_rate = String(sampleRate);
|
|
323
|
+
} else if (firstPart === "mulaw" && [8e3, 16e3].includes(sampleRate)) {
|
|
324
|
+
queryParams.sample_rate = String(sampleRate);
|
|
325
|
+
} else if (firstPart === "alaw" && [8e3, 16e3].includes(sampleRate)) {
|
|
326
|
+
queryParams.sample_rate = String(sampleRate);
|
|
327
|
+
} else if (firstPart === "flac" && [8e3, 16e3, 22050, 32e3, 48e3].includes(sampleRate)) {
|
|
328
|
+
queryParams.sample_rate = String(sampleRate);
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
} else if (["wav", "ogg"].includes(firstPart)) {
|
|
332
|
+
if (firstPart === "wav") {
|
|
333
|
+
queryParams.container = "wav";
|
|
334
|
+
queryParams.encoding = "linear16";
|
|
335
|
+
} else if (firstPart === "ogg") {
|
|
336
|
+
queryParams.container = "ogg";
|
|
337
|
+
queryParams.encoding = "opus";
|
|
338
|
+
}
|
|
339
|
+
if (!isNaN(sampleRate)) {
|
|
340
|
+
queryParams.sample_rate = String(sampleRate);
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
if (deepgramOptions) {
|
|
347
|
+
if (deepgramOptions.encoding) {
|
|
348
|
+
const newEncoding = deepgramOptions.encoding.toLowerCase();
|
|
349
|
+
queryParams.encoding = newEncoding;
|
|
350
|
+
if (deepgramOptions.container) {
|
|
351
|
+
if (["linear16", "mulaw", "alaw"].includes(newEncoding)) {
|
|
352
|
+
if (!["wav", "none"].includes(deepgramOptions.container.toLowerCase())) {
|
|
353
|
+
warnings.push({
|
|
354
|
+
type: "unsupported-setting",
|
|
355
|
+
setting: "providerOptions",
|
|
356
|
+
details: `Encoding "${newEncoding}" only supports containers "wav" or "none". Container "${deepgramOptions.container}" was ignored.`
|
|
357
|
+
});
|
|
358
|
+
} else {
|
|
359
|
+
queryParams.container = deepgramOptions.container.toLowerCase();
|
|
360
|
+
}
|
|
361
|
+
} else if (newEncoding === "opus") {
|
|
362
|
+
queryParams.container = "ogg";
|
|
363
|
+
} else if (["mp3", "flac", "aac"].includes(newEncoding)) {
|
|
364
|
+
warnings.push({
|
|
365
|
+
type: "unsupported-setting",
|
|
366
|
+
setting: "providerOptions",
|
|
367
|
+
details: `Encoding "${newEncoding}" does not support container parameter. Container "${deepgramOptions.container}" was ignored.`
|
|
368
|
+
});
|
|
369
|
+
delete queryParams.container;
|
|
370
|
+
}
|
|
371
|
+
} else {
|
|
372
|
+
if (["mp3", "flac", "aac"].includes(newEncoding)) {
|
|
373
|
+
delete queryParams.container;
|
|
374
|
+
} else if (["linear16", "mulaw", "alaw"].includes(newEncoding)) {
|
|
375
|
+
if (!queryParams.container) {
|
|
376
|
+
queryParams.container = "wav";
|
|
377
|
+
}
|
|
378
|
+
} else if (newEncoding === "opus") {
|
|
379
|
+
queryParams.container = "ogg";
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
if (["mp3", "opus", "aac"].includes(newEncoding)) {
|
|
383
|
+
delete queryParams.sample_rate;
|
|
384
|
+
}
|
|
385
|
+
if (["linear16", "mulaw", "alaw", "flac"].includes(newEncoding)) {
|
|
386
|
+
delete queryParams.bit_rate;
|
|
387
|
+
}
|
|
388
|
+
} else if (deepgramOptions.container) {
|
|
389
|
+
const container = deepgramOptions.container.toLowerCase();
|
|
390
|
+
const oldEncoding = (_a = queryParams.encoding) == null ? void 0 : _a.toLowerCase();
|
|
391
|
+
let newEncoding;
|
|
392
|
+
if (container === "wav") {
|
|
393
|
+
queryParams.container = "wav";
|
|
394
|
+
newEncoding = "linear16";
|
|
395
|
+
} else if (container === "ogg") {
|
|
396
|
+
queryParams.container = "ogg";
|
|
397
|
+
newEncoding = "opus";
|
|
398
|
+
} else if (container === "none") {
|
|
399
|
+
queryParams.container = "none";
|
|
400
|
+
newEncoding = "linear16";
|
|
401
|
+
}
|
|
402
|
+
if (newEncoding && newEncoding !== oldEncoding) {
|
|
403
|
+
queryParams.encoding = newEncoding;
|
|
404
|
+
if (["mp3", "opus", "aac"].includes(newEncoding)) {
|
|
405
|
+
delete queryParams.sample_rate;
|
|
406
|
+
}
|
|
407
|
+
if (["linear16", "mulaw", "alaw", "flac"].includes(newEncoding)) {
|
|
408
|
+
delete queryParams.bit_rate;
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
if (deepgramOptions.sampleRate != null) {
|
|
413
|
+
const encoding = ((_b = queryParams.encoding) == null ? void 0 : _b.toLowerCase()) || "";
|
|
414
|
+
const sampleRate = deepgramOptions.sampleRate;
|
|
415
|
+
if (encoding === "linear16") {
|
|
416
|
+
if (![8e3, 16e3, 24e3, 32e3, 48e3].includes(sampleRate)) {
|
|
417
|
+
warnings.push({
|
|
418
|
+
type: "unsupported-setting",
|
|
419
|
+
setting: "providerOptions",
|
|
420
|
+
details: `Encoding "linear16" only supports sample rates: 8000, 16000, 24000, 32000, 48000. Sample rate ${sampleRate} was ignored.`
|
|
421
|
+
});
|
|
422
|
+
} else {
|
|
423
|
+
queryParams.sample_rate = String(sampleRate);
|
|
424
|
+
}
|
|
425
|
+
} else if (encoding === "mulaw" || encoding === "alaw") {
|
|
426
|
+
if (![8e3, 16e3].includes(sampleRate)) {
|
|
427
|
+
warnings.push({
|
|
428
|
+
type: "unsupported-setting",
|
|
429
|
+
setting: "providerOptions",
|
|
430
|
+
details: `Encoding "${encoding}" only supports sample rates: 8000, 16000. Sample rate ${sampleRate} was ignored.`
|
|
431
|
+
});
|
|
432
|
+
} else {
|
|
433
|
+
queryParams.sample_rate = String(sampleRate);
|
|
434
|
+
}
|
|
435
|
+
} else if (encoding === "flac") {
|
|
436
|
+
if (![8e3, 16e3, 22050, 32e3, 48e3].includes(sampleRate)) {
|
|
437
|
+
warnings.push({
|
|
438
|
+
type: "unsupported-setting",
|
|
439
|
+
setting: "providerOptions",
|
|
440
|
+
details: `Encoding "flac" only supports sample rates: 8000, 16000, 22050, 32000, 48000. Sample rate ${sampleRate} was ignored.`
|
|
441
|
+
});
|
|
442
|
+
} else {
|
|
443
|
+
queryParams.sample_rate = String(sampleRate);
|
|
444
|
+
}
|
|
445
|
+
} else if (["mp3", "opus", "aac"].includes(encoding)) {
|
|
446
|
+
warnings.push({
|
|
447
|
+
type: "unsupported-setting",
|
|
448
|
+
setting: "providerOptions",
|
|
449
|
+
details: `Encoding "${encoding}" has a fixed sample rate and does not support sample_rate parameter. Sample rate ${sampleRate} was ignored.`
|
|
450
|
+
});
|
|
451
|
+
} else {
|
|
452
|
+
queryParams.sample_rate = String(sampleRate);
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
if (deepgramOptions.bitRate != null) {
|
|
456
|
+
const encoding = ((_c = queryParams.encoding) == null ? void 0 : _c.toLowerCase()) || "";
|
|
457
|
+
const bitRate = deepgramOptions.bitRate;
|
|
458
|
+
if (encoding === "mp3") {
|
|
459
|
+
if (![32e3, 48e3].includes(Number(bitRate))) {
|
|
460
|
+
warnings.push({
|
|
461
|
+
type: "unsupported-setting",
|
|
462
|
+
setting: "providerOptions",
|
|
463
|
+
details: `Encoding "mp3" only supports bit rates: 32000, 48000. Bit rate ${bitRate} was ignored.`
|
|
464
|
+
});
|
|
465
|
+
} else {
|
|
466
|
+
queryParams.bit_rate = String(bitRate);
|
|
467
|
+
}
|
|
468
|
+
} else if (encoding === "opus") {
|
|
469
|
+
const bitRateNum = Number(bitRate);
|
|
470
|
+
if (bitRateNum < 4e3 || bitRateNum > 65e4) {
|
|
471
|
+
warnings.push({
|
|
472
|
+
type: "unsupported-setting",
|
|
473
|
+
setting: "providerOptions",
|
|
474
|
+
details: `Encoding "opus" supports bit rates between 4000 and 650000. Bit rate ${bitRate} was ignored.`
|
|
475
|
+
});
|
|
476
|
+
} else {
|
|
477
|
+
queryParams.bit_rate = String(bitRate);
|
|
478
|
+
}
|
|
479
|
+
} else if (encoding === "aac") {
|
|
480
|
+
const bitRateNum = Number(bitRate);
|
|
481
|
+
if (bitRateNum < 4e3 || bitRateNum > 192e3) {
|
|
482
|
+
warnings.push({
|
|
483
|
+
type: "unsupported-setting",
|
|
484
|
+
setting: "providerOptions",
|
|
485
|
+
details: `Encoding "aac" supports bit rates between 4000 and 192000. Bit rate ${bitRate} was ignored.`
|
|
486
|
+
});
|
|
487
|
+
} else {
|
|
488
|
+
queryParams.bit_rate = String(bitRate);
|
|
489
|
+
}
|
|
490
|
+
} else if (["linear16", "mulaw", "alaw", "flac"].includes(encoding)) {
|
|
491
|
+
warnings.push({
|
|
492
|
+
type: "unsupported-setting",
|
|
493
|
+
setting: "providerOptions",
|
|
494
|
+
details: `Encoding "${encoding}" does not support bit_rate parameter. Bit rate ${bitRate} was ignored.`
|
|
495
|
+
});
|
|
496
|
+
} else {
|
|
497
|
+
queryParams.bit_rate = String(bitRate);
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
if (deepgramOptions.callback) {
|
|
501
|
+
queryParams.callback = deepgramOptions.callback;
|
|
502
|
+
}
|
|
503
|
+
if (deepgramOptions.callbackMethod) {
|
|
504
|
+
queryParams.callback_method = deepgramOptions.callbackMethod;
|
|
505
|
+
}
|
|
506
|
+
if (deepgramOptions.mipOptOut != null) {
|
|
507
|
+
queryParams.mip_opt_out = String(deepgramOptions.mipOptOut);
|
|
508
|
+
}
|
|
509
|
+
if (deepgramOptions.tag) {
|
|
510
|
+
if (Array.isArray(deepgramOptions.tag)) {
|
|
511
|
+
queryParams.tag = deepgramOptions.tag.join(",");
|
|
512
|
+
} else {
|
|
513
|
+
queryParams.tag = deepgramOptions.tag;
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
if (voice && voice !== this.modelId) {
|
|
518
|
+
warnings.push({
|
|
519
|
+
type: "unsupported-setting",
|
|
520
|
+
setting: "voice",
|
|
521
|
+
details: `Deepgram TTS models embed the voice in the model ID. The voice parameter "${voice}" was ignored. Use the model ID to select a voice (e.g., "aura-2-helena-en").`
|
|
522
|
+
});
|
|
523
|
+
}
|
|
524
|
+
if (speed != null) {
|
|
525
|
+
warnings.push({
|
|
526
|
+
type: "unsupported-setting",
|
|
527
|
+
setting: "speed",
|
|
528
|
+
details: `Deepgram TTS REST API does not support speed adjustment. Speed parameter was ignored.`
|
|
529
|
+
});
|
|
530
|
+
}
|
|
531
|
+
if (language) {
|
|
532
|
+
warnings.push({
|
|
533
|
+
type: "unsupported-setting",
|
|
534
|
+
setting: "language",
|
|
535
|
+
details: `Deepgram TTS models are language-specific via the model ID. Language parameter "${language}" was ignored. Select a model with the appropriate language suffix (e.g., "-en" for English).`
|
|
536
|
+
});
|
|
537
|
+
}
|
|
538
|
+
if (instructions) {
|
|
539
|
+
warnings.push({
|
|
540
|
+
type: "unsupported-setting",
|
|
541
|
+
setting: "instructions",
|
|
542
|
+
details: `Deepgram TTS REST API does not support instructions. Instructions parameter was ignored.`
|
|
543
|
+
});
|
|
544
|
+
}
|
|
545
|
+
return {
|
|
546
|
+
requestBody,
|
|
547
|
+
queryParams,
|
|
548
|
+
warnings
|
|
549
|
+
};
|
|
550
|
+
}
|
|
551
|
+
async doGenerate(options) {
|
|
552
|
+
var _a, _b, _c;
|
|
553
|
+
const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
|
|
554
|
+
const { requestBody, queryParams, warnings } = await this.getArgs(options);
|
|
555
|
+
const {
|
|
556
|
+
value: audio,
|
|
557
|
+
responseHeaders,
|
|
558
|
+
rawValue: rawResponse
|
|
559
|
+
} = await (0, import_provider_utils3.postJsonToApi)({
|
|
560
|
+
url: (() => {
|
|
561
|
+
const baseUrl = this.config.url({
|
|
562
|
+
path: "/v1/speak",
|
|
563
|
+
modelId: this.modelId
|
|
564
|
+
});
|
|
565
|
+
const queryString = new URLSearchParams(queryParams).toString();
|
|
566
|
+
return queryString ? `${baseUrl}?${queryString}` : baseUrl;
|
|
567
|
+
})(),
|
|
568
|
+
headers: (0, import_provider_utils3.combineHeaders)(this.config.headers(), options.headers),
|
|
569
|
+
body: requestBody,
|
|
570
|
+
failedResponseHandler: deepgramFailedResponseHandler,
|
|
571
|
+
successfulResponseHandler: (0, import_provider_utils3.createBinaryResponseHandler)(),
|
|
572
|
+
abortSignal: options.abortSignal,
|
|
573
|
+
fetch: this.config.fetch
|
|
574
|
+
});
|
|
575
|
+
return {
|
|
576
|
+
audio,
|
|
577
|
+
warnings,
|
|
578
|
+
request: {
|
|
579
|
+
body: JSON.stringify(requestBody)
|
|
580
|
+
},
|
|
581
|
+
response: {
|
|
582
|
+
timestamp: currentDate,
|
|
583
|
+
modelId: this.modelId,
|
|
584
|
+
headers: responseHeaders,
|
|
585
|
+
body: rawResponse
|
|
586
|
+
}
|
|
587
|
+
};
|
|
588
|
+
}
|
|
589
|
+
};
|
|
590
|
+
|
|
209
591
|
// src/version.ts
|
|
210
|
-
var VERSION = true ? "2.0.0-beta.
|
|
592
|
+
var VERSION = true ? "2.0.0-beta.34" : "0.0.0-test";
|
|
211
593
|
|
|
212
594
|
// src/deepgram-provider.ts
|
|
213
595
|
function createDeepgram(options = {}) {
|
|
214
|
-
const getHeaders = () => (0,
|
|
596
|
+
const getHeaders = () => (0, import_provider_utils4.withUserAgentSuffix)(
|
|
215
597
|
{
|
|
216
|
-
authorization: `Token ${(0,
|
|
598
|
+
authorization: `Token ${(0, import_provider_utils4.loadApiKey)({
|
|
217
599
|
apiKey: options.apiKey,
|
|
218
600
|
environmentVariableName: "DEEPGRAM_API_KEY",
|
|
219
601
|
description: "Deepgram"
|
|
@@ -228,6 +610,12 @@ function createDeepgram(options = {}) {
|
|
|
228
610
|
headers: getHeaders,
|
|
229
611
|
fetch: options.fetch
|
|
230
612
|
});
|
|
613
|
+
const createSpeechModel = (modelId) => new DeepgramSpeechModel(modelId, {
|
|
614
|
+
provider: `deepgram.speech`,
|
|
615
|
+
url: ({ path }) => `https://api.deepgram.com${path}`,
|
|
616
|
+
headers: getHeaders,
|
|
617
|
+
fetch: options.fetch
|
|
618
|
+
});
|
|
231
619
|
const provider = function(modelId) {
|
|
232
620
|
return {
|
|
233
621
|
transcription: createTranscriptionModel(modelId)
|
|
@@ -236,6 +624,8 @@ function createDeepgram(options = {}) {
|
|
|
236
624
|
provider.specificationVersion = "v3";
|
|
237
625
|
provider.transcription = createTranscriptionModel;
|
|
238
626
|
provider.transcriptionModel = createTranscriptionModel;
|
|
627
|
+
provider.speech = createSpeechModel;
|
|
628
|
+
provider.speechModel = createSpeechModel;
|
|
239
629
|
provider.languageModel = () => {
|
|
240
630
|
throw new import_provider.NoSuchModelError({
|
|
241
631
|
modelId: "unknown",
|
|
@@ -262,6 +652,7 @@ function createDeepgram(options = {}) {
|
|
|
262
652
|
var deepgram = createDeepgram();
|
|
263
653
|
// Annotate the CommonJS export names for ESM import in node:
|
|
264
654
|
0 && (module.exports = {
|
|
655
|
+
DeepgramSpeechModel,
|
|
265
656
|
VERSION,
|
|
266
657
|
createDeepgram,
|
|
267
658
|
deepgram
|