@ai-sdk/deepgram 3.0.0-beta.3 → 3.0.0-beta.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +242 -4
- package/README.md +2 -0
- package/dist/index.d.ts +63 -44
- package/dist/index.js +131 -115
- package/dist/index.js.map +1 -1
- package/package.json +11 -11
- package/src/deepgram-config.ts +2 -2
- package/src/deepgram-provider.ts +11 -11
- package/src/deepgram-speech-model-options.ts +25 -0
- package/src/deepgram-speech-model.ts +28 -35
- package/src/deepgram-transcription-model-options.ts +45 -0
- package/src/deepgram-transcription-model.ts +29 -55
- package/src/index.ts +2 -2
- package/dist/index.d.mts +0 -128
- package/dist/index.mjs +0 -650
- package/dist/index.mjs.map +0 -1
package/dist/index.mjs
DELETED
|
@@ -1,650 +0,0 @@
|
|
|
1
|
-
// src/deepgram-provider.ts
|
|
2
|
-
import {
|
|
3
|
-
NoSuchModelError
|
|
4
|
-
} from "@ai-sdk/provider";
|
|
5
|
-
import {
|
|
6
|
-
loadApiKey,
|
|
7
|
-
withUserAgentSuffix
|
|
8
|
-
} from "@ai-sdk/provider-utils";
|
|
9
|
-
|
|
10
|
-
// src/deepgram-transcription-model.ts
|
|
11
|
-
import {
|
|
12
|
-
combineHeaders,
|
|
13
|
-
createJsonResponseHandler,
|
|
14
|
-
parseProviderOptions,
|
|
15
|
-
postToApi
|
|
16
|
-
} from "@ai-sdk/provider-utils";
|
|
17
|
-
import { z as z2 } from "zod/v4";
|
|
18
|
-
|
|
19
|
-
// src/deepgram-error.ts
|
|
20
|
-
import { z } from "zod/v4";
|
|
21
|
-
import { createJsonErrorResponseHandler } from "@ai-sdk/provider-utils";
|
|
22
|
-
var deepgramErrorDataSchema = z.object({
|
|
23
|
-
error: z.object({
|
|
24
|
-
message: z.string(),
|
|
25
|
-
code: z.number()
|
|
26
|
-
})
|
|
27
|
-
});
|
|
28
|
-
var deepgramFailedResponseHandler = createJsonErrorResponseHandler({
|
|
29
|
-
errorSchema: deepgramErrorDataSchema,
|
|
30
|
-
errorToMessage: (data) => data.error.message
|
|
31
|
-
});
|
|
32
|
-
|
|
33
|
-
// src/deepgram-transcription-model.ts
|
|
34
|
-
var deepgramTranscriptionModelOptionsSchema = z2.object({
|
|
35
|
-
/** Language to use for transcription. If not specified, Deepgram defaults to English. Use `detectLanguage: true` to enable automatic language detection. */
|
|
36
|
-
language: z2.string().nullish(),
|
|
37
|
-
/** Whether to enable automatic language detection. When true, Deepgram will detect the language of the audio. */
|
|
38
|
-
detectLanguage: z2.boolean().nullish(),
|
|
39
|
-
/** Whether to use smart formatting, which formats written-out numbers, dates, times, etc. */
|
|
40
|
-
smartFormat: z2.boolean().nullish(),
|
|
41
|
-
/** Whether to add punctuation to the transcript. */
|
|
42
|
-
punctuate: z2.boolean().nullish(),
|
|
43
|
-
/** Whether to format the transcript into paragraphs. */
|
|
44
|
-
paragraphs: z2.boolean().nullish(),
|
|
45
|
-
/** Whether to generate a summary of the transcript. Use 'v2' for the latest version or false to disable. */
|
|
46
|
-
summarize: z2.union([z2.literal("v2"), z2.literal(false)]).nullish(),
|
|
47
|
-
/** Whether to identify topics in the transcript. */
|
|
48
|
-
topics: z2.boolean().nullish(),
|
|
49
|
-
/** Whether to identify intents in the transcript. */
|
|
50
|
-
intents: z2.boolean().nullish(),
|
|
51
|
-
/** Whether to analyze sentiment in the transcript. */
|
|
52
|
-
sentiment: z2.boolean().nullish(),
|
|
53
|
-
/** Whether to detect and tag named entities in the transcript. */
|
|
54
|
-
detectEntities: z2.boolean().nullish(),
|
|
55
|
-
/** Specify terms or patterns to redact from the transcript. Can be a string or array of strings. */
|
|
56
|
-
redact: z2.union([z2.string(), z2.array(z2.string())]).nullish(),
|
|
57
|
-
/** String to replace redacted content with. */
|
|
58
|
-
replace: z2.string().nullish(),
|
|
59
|
-
/** Term or phrase to search for in the transcript. */
|
|
60
|
-
search: z2.string().nullish(),
|
|
61
|
-
/** Key term to identify in the transcript. */
|
|
62
|
-
keyterm: z2.string().nullish(),
|
|
63
|
-
/** Whether to identify different speakers in the audio. */
|
|
64
|
-
diarize: z2.boolean().nullish(),
|
|
65
|
-
/** Whether to segment the transcript into utterances. */
|
|
66
|
-
utterances: z2.boolean().nullish(),
|
|
67
|
-
/** Minimum duration of silence (in seconds) to trigger a new utterance. */
|
|
68
|
-
uttSplit: z2.number().nullish(),
|
|
69
|
-
/** Whether to include filler words (um, uh, etc.) in the transcript. */
|
|
70
|
-
fillerWords: z2.boolean().nullish()
|
|
71
|
-
});
|
|
72
|
-
var DeepgramTranscriptionModel = class {
|
|
73
|
-
constructor(modelId, config) {
|
|
74
|
-
this.modelId = modelId;
|
|
75
|
-
this.config = config;
|
|
76
|
-
this.specificationVersion = "v3";
|
|
77
|
-
}
|
|
78
|
-
get provider() {
|
|
79
|
-
return this.config.provider;
|
|
80
|
-
}
|
|
81
|
-
async getArgs({
|
|
82
|
-
providerOptions
|
|
83
|
-
}) {
|
|
84
|
-
var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l;
|
|
85
|
-
const warnings = [];
|
|
86
|
-
const deepgramOptions = await parseProviderOptions({
|
|
87
|
-
provider: "deepgram",
|
|
88
|
-
providerOptions,
|
|
89
|
-
schema: deepgramTranscriptionModelOptionsSchema
|
|
90
|
-
});
|
|
91
|
-
const body = {
|
|
92
|
-
model: this.modelId,
|
|
93
|
-
diarize: true
|
|
94
|
-
};
|
|
95
|
-
if (deepgramOptions) {
|
|
96
|
-
body.detect_entities = (_a = deepgramOptions.detectEntities) != null ? _a : void 0;
|
|
97
|
-
body.detect_language = (_b = deepgramOptions.detectLanguage) != null ? _b : void 0;
|
|
98
|
-
body.filler_words = (_c = deepgramOptions.fillerWords) != null ? _c : void 0;
|
|
99
|
-
body.language = (_d = deepgramOptions.language) != null ? _d : void 0;
|
|
100
|
-
body.punctuate = (_e = deepgramOptions.punctuate) != null ? _e : void 0;
|
|
101
|
-
body.redact = (_f = deepgramOptions.redact) != null ? _f : void 0;
|
|
102
|
-
body.search = (_g = deepgramOptions.search) != null ? _g : void 0;
|
|
103
|
-
body.smart_format = (_h = deepgramOptions.smartFormat) != null ? _h : void 0;
|
|
104
|
-
body.summarize = (_i = deepgramOptions.summarize) != null ? _i : void 0;
|
|
105
|
-
body.topics = (_j = deepgramOptions.topics) != null ? _j : void 0;
|
|
106
|
-
body.utterances = (_k = deepgramOptions.utterances) != null ? _k : void 0;
|
|
107
|
-
body.utt_split = (_l = deepgramOptions.uttSplit) != null ? _l : void 0;
|
|
108
|
-
if (typeof deepgramOptions.diarize === "boolean") {
|
|
109
|
-
body.diarize = deepgramOptions.diarize;
|
|
110
|
-
}
|
|
111
|
-
}
|
|
112
|
-
const queryParams = new URLSearchParams();
|
|
113
|
-
for (const [key, value] of Object.entries(body)) {
|
|
114
|
-
if (value !== void 0) {
|
|
115
|
-
queryParams.append(key, String(value));
|
|
116
|
-
}
|
|
117
|
-
}
|
|
118
|
-
return {
|
|
119
|
-
queryParams,
|
|
120
|
-
warnings
|
|
121
|
-
};
|
|
122
|
-
}
|
|
123
|
-
async doGenerate(options) {
|
|
124
|
-
var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, _n, _o;
|
|
125
|
-
const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
|
|
126
|
-
const { queryParams, warnings } = await this.getArgs(options);
|
|
127
|
-
const {
|
|
128
|
-
value: response,
|
|
129
|
-
responseHeaders,
|
|
130
|
-
rawValue: rawResponse
|
|
131
|
-
} = await postToApi({
|
|
132
|
-
url: this.config.url({
|
|
133
|
-
path: "/v1/listen",
|
|
134
|
-
modelId: this.modelId
|
|
135
|
-
}) + "?" + queryParams.toString(),
|
|
136
|
-
headers: {
|
|
137
|
-
...combineHeaders(this.config.headers(), options.headers),
|
|
138
|
-
"Content-Type": options.mediaType
|
|
139
|
-
},
|
|
140
|
-
body: {
|
|
141
|
-
content: options.audio,
|
|
142
|
-
values: options.audio
|
|
143
|
-
},
|
|
144
|
-
failedResponseHandler: deepgramFailedResponseHandler,
|
|
145
|
-
successfulResponseHandler: createJsonResponseHandler(
|
|
146
|
-
deepgramTranscriptionResponseSchema
|
|
147
|
-
),
|
|
148
|
-
abortSignal: options.abortSignal,
|
|
149
|
-
fetch: this.config.fetch
|
|
150
|
-
});
|
|
151
|
-
return {
|
|
152
|
-
text: (_g = (_f = (_e = (_d = response.results) == null ? void 0 : _d.channels.at(0)) == null ? void 0 : _e.alternatives.at(0)) == null ? void 0 : _f.transcript) != null ? _g : "",
|
|
153
|
-
segments: (_j = (_i = (_h = response.results) == null ? void 0 : _h.channels[0].alternatives[0].words) == null ? void 0 : _i.map((word) => ({
|
|
154
|
-
text: word.word,
|
|
155
|
-
startSecond: word.start,
|
|
156
|
-
endSecond: word.end
|
|
157
|
-
}))) != null ? _j : [],
|
|
158
|
-
language: (_m = (_l = (_k = response.results) == null ? void 0 : _k.channels.at(0)) == null ? void 0 : _l.detected_language) != null ? _m : void 0,
|
|
159
|
-
durationInSeconds: (_o = (_n = response.metadata) == null ? void 0 : _n.duration) != null ? _o : void 0,
|
|
160
|
-
warnings,
|
|
161
|
-
response: {
|
|
162
|
-
timestamp: currentDate,
|
|
163
|
-
modelId: this.modelId,
|
|
164
|
-
headers: responseHeaders,
|
|
165
|
-
body: rawResponse
|
|
166
|
-
}
|
|
167
|
-
};
|
|
168
|
-
}
|
|
169
|
-
};
|
|
170
|
-
var deepgramTranscriptionResponseSchema = z2.object({
|
|
171
|
-
metadata: z2.object({
|
|
172
|
-
duration: z2.number()
|
|
173
|
-
}).nullish(),
|
|
174
|
-
results: z2.object({
|
|
175
|
-
channels: z2.array(
|
|
176
|
-
z2.object({
|
|
177
|
-
detected_language: z2.string().nullish(),
|
|
178
|
-
alternatives: z2.array(
|
|
179
|
-
z2.object({
|
|
180
|
-
transcript: z2.string(),
|
|
181
|
-
words: z2.array(
|
|
182
|
-
z2.object({
|
|
183
|
-
word: z2.string(),
|
|
184
|
-
start: z2.number(),
|
|
185
|
-
end: z2.number()
|
|
186
|
-
})
|
|
187
|
-
)
|
|
188
|
-
})
|
|
189
|
-
)
|
|
190
|
-
})
|
|
191
|
-
)
|
|
192
|
-
}).nullish()
|
|
193
|
-
});
|
|
194
|
-
|
|
195
|
-
// src/deepgram-speech-model.ts
|
|
196
|
-
import {
|
|
197
|
-
combineHeaders as combineHeaders2,
|
|
198
|
-
createBinaryResponseHandler,
|
|
199
|
-
parseProviderOptions as parseProviderOptions2,
|
|
200
|
-
postJsonToApi
|
|
201
|
-
} from "@ai-sdk/provider-utils";
|
|
202
|
-
import { z as z3 } from "zod/v4";
|
|
203
|
-
var deepgramSpeechModelOptionsSchema = z3.object({
|
|
204
|
-
/** Bitrate of the audio in bits per second. Can be a number or predefined enum value. */
|
|
205
|
-
bitRate: z3.union([z3.number(), z3.string()]).nullish(),
|
|
206
|
-
/** Container format for the output audio (mp3, wav, etc.). */
|
|
207
|
-
container: z3.string().nullish(),
|
|
208
|
-
/** Encoding type for the audio output (linear16, mulaw, alaw, etc.). */
|
|
209
|
-
encoding: z3.string().nullish(),
|
|
210
|
-
/** Sample rate for the output audio in Hz (8000, 16000, 24000, 44100, 48000). */
|
|
211
|
-
sampleRate: z3.number().nullish(),
|
|
212
|
-
/** URL to which we'll make the callback request. */
|
|
213
|
-
callback: z3.string().url().nullish(),
|
|
214
|
-
/** HTTP method by which the callback request will be made (POST or PUT). */
|
|
215
|
-
callbackMethod: z3.enum(["POST", "PUT"]).nullish(),
|
|
216
|
-
/** Opts out requests from the Deepgram Model Improvement Program. */
|
|
217
|
-
mipOptOut: z3.boolean().nullish(),
|
|
218
|
-
/** Label your requests for the purpose of identification during usage reporting. */
|
|
219
|
-
tag: z3.union([z3.string(), z3.array(z3.string())]).nullish()
|
|
220
|
-
});
|
|
221
|
-
var DeepgramSpeechModel = class {
|
|
222
|
-
constructor(modelId, config) {
|
|
223
|
-
this.modelId = modelId;
|
|
224
|
-
this.config = config;
|
|
225
|
-
this.specificationVersion = "v3";
|
|
226
|
-
}
|
|
227
|
-
get provider() {
|
|
228
|
-
return this.config.provider;
|
|
229
|
-
}
|
|
230
|
-
async getArgs({
|
|
231
|
-
text,
|
|
232
|
-
voice,
|
|
233
|
-
outputFormat = "mp3",
|
|
234
|
-
speed,
|
|
235
|
-
language,
|
|
236
|
-
instructions,
|
|
237
|
-
providerOptions
|
|
238
|
-
}) {
|
|
239
|
-
var _a, _b, _c;
|
|
240
|
-
const warnings = [];
|
|
241
|
-
const deepgramOptions = await parseProviderOptions2({
|
|
242
|
-
provider: "deepgram",
|
|
243
|
-
providerOptions,
|
|
244
|
-
schema: deepgramSpeechModelOptionsSchema
|
|
245
|
-
});
|
|
246
|
-
const requestBody = {
|
|
247
|
-
text
|
|
248
|
-
};
|
|
249
|
-
const queryParams = {
|
|
250
|
-
model: this.modelId
|
|
251
|
-
};
|
|
252
|
-
if (outputFormat) {
|
|
253
|
-
const formatLower = outputFormat.toLowerCase();
|
|
254
|
-
const formatMap = {
|
|
255
|
-
// MP3: no container, fixed 22050 sample rate, bitrate 32000/48000
|
|
256
|
-
mp3: { encoding: "mp3" },
|
|
257
|
-
// Don't set container or sample_rate for mp3
|
|
258
|
-
// Linear16: wav/none container, configurable sample rate
|
|
259
|
-
wav: { container: "wav", encoding: "linear16" },
|
|
260
|
-
linear16: { encoding: "linear16", container: "wav" },
|
|
261
|
-
// MuLaw: wav/none container, 8000/16000 sample rate
|
|
262
|
-
mulaw: { encoding: "mulaw", container: "wav" },
|
|
263
|
-
// ALaw: wav/none container, 8000/16000 sample rate
|
|
264
|
-
alaw: { encoding: "alaw", container: "wav" },
|
|
265
|
-
// Opus: ogg container, fixed 48000 sample rate
|
|
266
|
-
opus: { encoding: "opus", container: "ogg" },
|
|
267
|
-
ogg: { encoding: "opus", container: "ogg" },
|
|
268
|
-
// FLAC: no container, configurable sample rate
|
|
269
|
-
flac: { encoding: "flac" },
|
|
270
|
-
// AAC: no container, fixed 22050 sample rate
|
|
271
|
-
aac: { encoding: "aac" },
|
|
272
|
-
// Raw audio (no container)
|
|
273
|
-
pcm: { encoding: "linear16", container: "none" }
|
|
274
|
-
};
|
|
275
|
-
const mappedFormat = formatMap[formatLower];
|
|
276
|
-
if (mappedFormat) {
|
|
277
|
-
if (mappedFormat.encoding) {
|
|
278
|
-
queryParams.encoding = mappedFormat.encoding;
|
|
279
|
-
}
|
|
280
|
-
if (mappedFormat.container) {
|
|
281
|
-
queryParams.container = mappedFormat.container;
|
|
282
|
-
}
|
|
283
|
-
if (mappedFormat.sampleRate) {
|
|
284
|
-
queryParams.sample_rate = String(mappedFormat.sampleRate);
|
|
285
|
-
}
|
|
286
|
-
if (mappedFormat.bitRate) {
|
|
287
|
-
queryParams.bit_rate = String(mappedFormat.bitRate);
|
|
288
|
-
}
|
|
289
|
-
} else {
|
|
290
|
-
const parts = formatLower.split("_");
|
|
291
|
-
if (parts.length >= 2) {
|
|
292
|
-
const firstPart = parts[0];
|
|
293
|
-
const secondPart = parts[1];
|
|
294
|
-
const sampleRate = parseInt(secondPart, 10);
|
|
295
|
-
if ([
|
|
296
|
-
"linear16",
|
|
297
|
-
"mulaw",
|
|
298
|
-
"alaw",
|
|
299
|
-
"mp3",
|
|
300
|
-
"opus",
|
|
301
|
-
"flac",
|
|
302
|
-
"aac"
|
|
303
|
-
].includes(firstPart)) {
|
|
304
|
-
queryParams.encoding = firstPart;
|
|
305
|
-
if (["linear16", "mulaw", "alaw"].includes(firstPart)) {
|
|
306
|
-
queryParams.container = "wav";
|
|
307
|
-
} else if (firstPart === "opus") {
|
|
308
|
-
queryParams.container = "ogg";
|
|
309
|
-
}
|
|
310
|
-
if (!isNaN(sampleRate)) {
|
|
311
|
-
if (firstPart === "linear16" && [8e3, 16e3, 24e3, 32e3, 48e3].includes(sampleRate)) {
|
|
312
|
-
queryParams.sample_rate = String(sampleRate);
|
|
313
|
-
} else if (firstPart === "mulaw" && [8e3, 16e3].includes(sampleRate)) {
|
|
314
|
-
queryParams.sample_rate = String(sampleRate);
|
|
315
|
-
} else if (firstPart === "alaw" && [8e3, 16e3].includes(sampleRate)) {
|
|
316
|
-
queryParams.sample_rate = String(sampleRate);
|
|
317
|
-
} else if (firstPart === "flac" && [8e3, 16e3, 22050, 32e3, 48e3].includes(sampleRate)) {
|
|
318
|
-
queryParams.sample_rate = String(sampleRate);
|
|
319
|
-
}
|
|
320
|
-
}
|
|
321
|
-
} else if (["wav", "ogg"].includes(firstPart)) {
|
|
322
|
-
if (firstPart === "wav") {
|
|
323
|
-
queryParams.container = "wav";
|
|
324
|
-
queryParams.encoding = "linear16";
|
|
325
|
-
} else if (firstPart === "ogg") {
|
|
326
|
-
queryParams.container = "ogg";
|
|
327
|
-
queryParams.encoding = "opus";
|
|
328
|
-
}
|
|
329
|
-
if (!isNaN(sampleRate)) {
|
|
330
|
-
queryParams.sample_rate = String(sampleRate);
|
|
331
|
-
}
|
|
332
|
-
}
|
|
333
|
-
}
|
|
334
|
-
}
|
|
335
|
-
}
|
|
336
|
-
if (deepgramOptions) {
|
|
337
|
-
if (deepgramOptions.encoding) {
|
|
338
|
-
const newEncoding = deepgramOptions.encoding.toLowerCase();
|
|
339
|
-
queryParams.encoding = newEncoding;
|
|
340
|
-
if (deepgramOptions.container) {
|
|
341
|
-
if (["linear16", "mulaw", "alaw"].includes(newEncoding)) {
|
|
342
|
-
if (!["wav", "none"].includes(deepgramOptions.container.toLowerCase())) {
|
|
343
|
-
warnings.push({
|
|
344
|
-
type: "unsupported",
|
|
345
|
-
feature: "providerOptions",
|
|
346
|
-
details: `Encoding "${newEncoding}" only supports containers "wav" or "none". Container "${deepgramOptions.container}" was ignored.`
|
|
347
|
-
});
|
|
348
|
-
} else {
|
|
349
|
-
queryParams.container = deepgramOptions.container.toLowerCase();
|
|
350
|
-
}
|
|
351
|
-
} else if (newEncoding === "opus") {
|
|
352
|
-
queryParams.container = "ogg";
|
|
353
|
-
} else if (["mp3", "flac", "aac"].includes(newEncoding)) {
|
|
354
|
-
warnings.push({
|
|
355
|
-
type: "unsupported",
|
|
356
|
-
feature: "providerOptions",
|
|
357
|
-
details: `Encoding "${newEncoding}" does not support container parameter. Container "${deepgramOptions.container}" was ignored.`
|
|
358
|
-
});
|
|
359
|
-
delete queryParams.container;
|
|
360
|
-
}
|
|
361
|
-
} else {
|
|
362
|
-
if (["mp3", "flac", "aac"].includes(newEncoding)) {
|
|
363
|
-
delete queryParams.container;
|
|
364
|
-
} else if (["linear16", "mulaw", "alaw"].includes(newEncoding)) {
|
|
365
|
-
if (!queryParams.container) {
|
|
366
|
-
queryParams.container = "wav";
|
|
367
|
-
}
|
|
368
|
-
} else if (newEncoding === "opus") {
|
|
369
|
-
queryParams.container = "ogg";
|
|
370
|
-
}
|
|
371
|
-
}
|
|
372
|
-
if (["mp3", "opus", "aac"].includes(newEncoding)) {
|
|
373
|
-
delete queryParams.sample_rate;
|
|
374
|
-
}
|
|
375
|
-
if (["linear16", "mulaw", "alaw", "flac"].includes(newEncoding)) {
|
|
376
|
-
delete queryParams.bit_rate;
|
|
377
|
-
}
|
|
378
|
-
} else if (deepgramOptions.container) {
|
|
379
|
-
const container = deepgramOptions.container.toLowerCase();
|
|
380
|
-
const oldEncoding = (_a = queryParams.encoding) == null ? void 0 : _a.toLowerCase();
|
|
381
|
-
let newEncoding;
|
|
382
|
-
if (container === "wav") {
|
|
383
|
-
queryParams.container = "wav";
|
|
384
|
-
newEncoding = "linear16";
|
|
385
|
-
} else if (container === "ogg") {
|
|
386
|
-
queryParams.container = "ogg";
|
|
387
|
-
newEncoding = "opus";
|
|
388
|
-
} else if (container === "none") {
|
|
389
|
-
queryParams.container = "none";
|
|
390
|
-
newEncoding = "linear16";
|
|
391
|
-
}
|
|
392
|
-
if (newEncoding && newEncoding !== oldEncoding) {
|
|
393
|
-
queryParams.encoding = newEncoding;
|
|
394
|
-
if (["mp3", "opus", "aac"].includes(newEncoding)) {
|
|
395
|
-
delete queryParams.sample_rate;
|
|
396
|
-
}
|
|
397
|
-
if (["linear16", "mulaw", "alaw", "flac"].includes(newEncoding)) {
|
|
398
|
-
delete queryParams.bit_rate;
|
|
399
|
-
}
|
|
400
|
-
}
|
|
401
|
-
}
|
|
402
|
-
if (deepgramOptions.sampleRate != null) {
|
|
403
|
-
const encoding = ((_b = queryParams.encoding) == null ? void 0 : _b.toLowerCase()) || "";
|
|
404
|
-
const sampleRate = deepgramOptions.sampleRate;
|
|
405
|
-
if (encoding === "linear16") {
|
|
406
|
-
if (![8e3, 16e3, 24e3, 32e3, 48e3].includes(sampleRate)) {
|
|
407
|
-
warnings.push({
|
|
408
|
-
type: "unsupported",
|
|
409
|
-
feature: "providerOptions",
|
|
410
|
-
details: `Encoding "linear16" only supports sample rates: 8000, 16000, 24000, 32000, 48000. Sample rate ${sampleRate} was ignored.`
|
|
411
|
-
});
|
|
412
|
-
} else {
|
|
413
|
-
queryParams.sample_rate = String(sampleRate);
|
|
414
|
-
}
|
|
415
|
-
} else if (encoding === "mulaw" || encoding === "alaw") {
|
|
416
|
-
if (![8e3, 16e3].includes(sampleRate)) {
|
|
417
|
-
warnings.push({
|
|
418
|
-
type: "unsupported",
|
|
419
|
-
feature: "providerOptions",
|
|
420
|
-
details: `Encoding "${encoding}" only supports sample rates: 8000, 16000. Sample rate ${sampleRate} was ignored.`
|
|
421
|
-
});
|
|
422
|
-
} else {
|
|
423
|
-
queryParams.sample_rate = String(sampleRate);
|
|
424
|
-
}
|
|
425
|
-
} else if (encoding === "flac") {
|
|
426
|
-
if (![8e3, 16e3, 22050, 32e3, 48e3].includes(sampleRate)) {
|
|
427
|
-
warnings.push({
|
|
428
|
-
type: "unsupported",
|
|
429
|
-
feature: "providerOptions",
|
|
430
|
-
details: `Encoding "flac" only supports sample rates: 8000, 16000, 22050, 32000, 48000. Sample rate ${sampleRate} was ignored.`
|
|
431
|
-
});
|
|
432
|
-
} else {
|
|
433
|
-
queryParams.sample_rate = String(sampleRate);
|
|
434
|
-
}
|
|
435
|
-
} else if (["mp3", "opus", "aac"].includes(encoding)) {
|
|
436
|
-
warnings.push({
|
|
437
|
-
type: "unsupported",
|
|
438
|
-
feature: "providerOptions",
|
|
439
|
-
details: `Encoding "${encoding}" has a fixed sample rate and does not support sample_rate parameter. Sample rate ${sampleRate} was ignored.`
|
|
440
|
-
});
|
|
441
|
-
} else {
|
|
442
|
-
queryParams.sample_rate = String(sampleRate);
|
|
443
|
-
}
|
|
444
|
-
}
|
|
445
|
-
if (deepgramOptions.bitRate != null) {
|
|
446
|
-
const encoding = ((_c = queryParams.encoding) == null ? void 0 : _c.toLowerCase()) || "";
|
|
447
|
-
const bitRate = deepgramOptions.bitRate;
|
|
448
|
-
if (encoding === "mp3") {
|
|
449
|
-
if (![32e3, 48e3].includes(Number(bitRate))) {
|
|
450
|
-
warnings.push({
|
|
451
|
-
type: "unsupported",
|
|
452
|
-
feature: "providerOptions",
|
|
453
|
-
details: `Encoding "mp3" only supports bit rates: 32000, 48000. Bit rate ${bitRate} was ignored.`
|
|
454
|
-
});
|
|
455
|
-
} else {
|
|
456
|
-
queryParams.bit_rate = String(bitRate);
|
|
457
|
-
}
|
|
458
|
-
} else if (encoding === "opus") {
|
|
459
|
-
const bitRateNum = Number(bitRate);
|
|
460
|
-
if (bitRateNum < 4e3 || bitRateNum > 65e4) {
|
|
461
|
-
warnings.push({
|
|
462
|
-
type: "unsupported",
|
|
463
|
-
feature: "providerOptions",
|
|
464
|
-
details: `Encoding "opus" supports bit rates between 4000 and 650000. Bit rate ${bitRate} was ignored.`
|
|
465
|
-
});
|
|
466
|
-
} else {
|
|
467
|
-
queryParams.bit_rate = String(bitRate);
|
|
468
|
-
}
|
|
469
|
-
} else if (encoding === "aac") {
|
|
470
|
-
const bitRateNum = Number(bitRate);
|
|
471
|
-
if (bitRateNum < 4e3 || bitRateNum > 192e3) {
|
|
472
|
-
warnings.push({
|
|
473
|
-
type: "unsupported",
|
|
474
|
-
feature: "providerOptions",
|
|
475
|
-
details: `Encoding "aac" supports bit rates between 4000 and 192000. Bit rate ${bitRate} was ignored.`
|
|
476
|
-
});
|
|
477
|
-
} else {
|
|
478
|
-
queryParams.bit_rate = String(bitRate);
|
|
479
|
-
}
|
|
480
|
-
} else if (["linear16", "mulaw", "alaw", "flac"].includes(encoding)) {
|
|
481
|
-
warnings.push({
|
|
482
|
-
type: "unsupported",
|
|
483
|
-
feature: "providerOptions",
|
|
484
|
-
details: `Encoding "${encoding}" does not support bit_rate parameter. Bit rate ${bitRate} was ignored.`
|
|
485
|
-
});
|
|
486
|
-
} else {
|
|
487
|
-
queryParams.bit_rate = String(bitRate);
|
|
488
|
-
}
|
|
489
|
-
}
|
|
490
|
-
if (deepgramOptions.callback) {
|
|
491
|
-
queryParams.callback = deepgramOptions.callback;
|
|
492
|
-
}
|
|
493
|
-
if (deepgramOptions.callbackMethod) {
|
|
494
|
-
queryParams.callback_method = deepgramOptions.callbackMethod;
|
|
495
|
-
}
|
|
496
|
-
if (deepgramOptions.mipOptOut != null) {
|
|
497
|
-
queryParams.mip_opt_out = String(deepgramOptions.mipOptOut);
|
|
498
|
-
}
|
|
499
|
-
if (deepgramOptions.tag) {
|
|
500
|
-
if (Array.isArray(deepgramOptions.tag)) {
|
|
501
|
-
queryParams.tag = deepgramOptions.tag.join(",");
|
|
502
|
-
} else {
|
|
503
|
-
queryParams.tag = deepgramOptions.tag;
|
|
504
|
-
}
|
|
505
|
-
}
|
|
506
|
-
}
|
|
507
|
-
if (voice && voice !== this.modelId) {
|
|
508
|
-
warnings.push({
|
|
509
|
-
type: "unsupported",
|
|
510
|
-
feature: "voice",
|
|
511
|
-
details: `Deepgram TTS models embed the voice in the model ID. The voice parameter "${voice}" was ignored. Use the model ID to select a voice (e.g., "aura-2-helena-en").`
|
|
512
|
-
});
|
|
513
|
-
}
|
|
514
|
-
if (speed != null) {
|
|
515
|
-
warnings.push({
|
|
516
|
-
type: "unsupported",
|
|
517
|
-
feature: "speed",
|
|
518
|
-
details: `Deepgram TTS REST API does not support speed adjustment. Speed parameter was ignored.`
|
|
519
|
-
});
|
|
520
|
-
}
|
|
521
|
-
if (language) {
|
|
522
|
-
warnings.push({
|
|
523
|
-
type: "unsupported",
|
|
524
|
-
feature: "language",
|
|
525
|
-
details: `Deepgram TTS models are language-specific via the model ID. Language parameter "${language}" was ignored. Select a model with the appropriate language suffix (e.g., "-en" for English).`
|
|
526
|
-
});
|
|
527
|
-
}
|
|
528
|
-
if (instructions) {
|
|
529
|
-
warnings.push({
|
|
530
|
-
type: "unsupported",
|
|
531
|
-
feature: "instructions",
|
|
532
|
-
details: `Deepgram TTS REST API does not support instructions. Instructions parameter was ignored.`
|
|
533
|
-
});
|
|
534
|
-
}
|
|
535
|
-
return {
|
|
536
|
-
requestBody,
|
|
537
|
-
queryParams,
|
|
538
|
-
warnings
|
|
539
|
-
};
|
|
540
|
-
}
|
|
541
|
-
async doGenerate(options) {
|
|
542
|
-
var _a, _b, _c;
|
|
543
|
-
const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
|
|
544
|
-
const { requestBody, queryParams, warnings } = await this.getArgs(options);
|
|
545
|
-
const {
|
|
546
|
-
value: audio,
|
|
547
|
-
responseHeaders,
|
|
548
|
-
rawValue: rawResponse
|
|
549
|
-
} = await postJsonToApi({
|
|
550
|
-
url: (() => {
|
|
551
|
-
const baseUrl = this.config.url({
|
|
552
|
-
path: "/v1/speak",
|
|
553
|
-
modelId: this.modelId
|
|
554
|
-
});
|
|
555
|
-
const queryString = new URLSearchParams(queryParams).toString();
|
|
556
|
-
return queryString ? `${baseUrl}?${queryString}` : baseUrl;
|
|
557
|
-
})(),
|
|
558
|
-
headers: combineHeaders2(this.config.headers(), options.headers),
|
|
559
|
-
body: requestBody,
|
|
560
|
-
failedResponseHandler: deepgramFailedResponseHandler,
|
|
561
|
-
successfulResponseHandler: createBinaryResponseHandler(),
|
|
562
|
-
abortSignal: options.abortSignal,
|
|
563
|
-
fetch: this.config.fetch
|
|
564
|
-
});
|
|
565
|
-
return {
|
|
566
|
-
audio,
|
|
567
|
-
warnings,
|
|
568
|
-
request: {
|
|
569
|
-
body: JSON.stringify(requestBody)
|
|
570
|
-
},
|
|
571
|
-
response: {
|
|
572
|
-
timestamp: currentDate,
|
|
573
|
-
modelId: this.modelId,
|
|
574
|
-
headers: responseHeaders,
|
|
575
|
-
body: rawResponse
|
|
576
|
-
}
|
|
577
|
-
};
|
|
578
|
-
}
|
|
579
|
-
};
|
|
580
|
-
|
|
581
|
-
// src/version.ts
|
|
582
|
-
var VERSION = true ? "3.0.0-beta.3" : "0.0.0-test";
|
|
583
|
-
|
|
584
|
-
// src/deepgram-provider.ts
|
|
585
|
-
function createDeepgram(options = {}) {
|
|
586
|
-
const getHeaders = () => withUserAgentSuffix(
|
|
587
|
-
{
|
|
588
|
-
authorization: `Token ${loadApiKey({
|
|
589
|
-
apiKey: options.apiKey,
|
|
590
|
-
environmentVariableName: "DEEPGRAM_API_KEY",
|
|
591
|
-
description: "Deepgram"
|
|
592
|
-
})}`,
|
|
593
|
-
...options.headers
|
|
594
|
-
},
|
|
595
|
-
`ai-sdk/deepgram/${VERSION}`
|
|
596
|
-
);
|
|
597
|
-
const createTranscriptionModel = (modelId) => new DeepgramTranscriptionModel(modelId, {
|
|
598
|
-
provider: `deepgram.transcription`,
|
|
599
|
-
url: ({ path }) => `https://api.deepgram.com${path}`,
|
|
600
|
-
headers: getHeaders,
|
|
601
|
-
fetch: options.fetch
|
|
602
|
-
});
|
|
603
|
-
const createSpeechModel = (modelId) => new DeepgramSpeechModel(modelId, {
|
|
604
|
-
provider: `deepgram.speech`,
|
|
605
|
-
url: ({ path }) => `https://api.deepgram.com${path}`,
|
|
606
|
-
headers: getHeaders,
|
|
607
|
-
fetch: options.fetch
|
|
608
|
-
});
|
|
609
|
-
const provider = function(modelId) {
|
|
610
|
-
return {
|
|
611
|
-
transcription: createTranscriptionModel(modelId)
|
|
612
|
-
};
|
|
613
|
-
};
|
|
614
|
-
provider.specificationVersion = "v3";
|
|
615
|
-
provider.transcription = createTranscriptionModel;
|
|
616
|
-
provider.transcriptionModel = createTranscriptionModel;
|
|
617
|
-
provider.speech = createSpeechModel;
|
|
618
|
-
provider.speechModel = createSpeechModel;
|
|
619
|
-
provider.languageModel = (modelId) => {
|
|
620
|
-
throw new NoSuchModelError({
|
|
621
|
-
modelId,
|
|
622
|
-
modelType: "languageModel",
|
|
623
|
-
message: "Deepgram does not provide language models"
|
|
624
|
-
});
|
|
625
|
-
};
|
|
626
|
-
provider.embeddingModel = (modelId) => {
|
|
627
|
-
throw new NoSuchModelError({
|
|
628
|
-
modelId,
|
|
629
|
-
modelType: "embeddingModel",
|
|
630
|
-
message: "Deepgram does not provide text embedding models"
|
|
631
|
-
});
|
|
632
|
-
};
|
|
633
|
-
provider.textEmbeddingModel = provider.embeddingModel;
|
|
634
|
-
provider.imageModel = (modelId) => {
|
|
635
|
-
throw new NoSuchModelError({
|
|
636
|
-
modelId,
|
|
637
|
-
modelType: "imageModel",
|
|
638
|
-
message: "Deepgram does not provide image models"
|
|
639
|
-
});
|
|
640
|
-
};
|
|
641
|
-
return provider;
|
|
642
|
-
}
|
|
643
|
-
var deepgram = createDeepgram();
|
|
644
|
-
export {
|
|
645
|
-
DeepgramSpeechModel,
|
|
646
|
-
VERSION,
|
|
647
|
-
createDeepgram,
|
|
648
|
-
deepgram
|
|
649
|
-
};
|
|
650
|
-
//# sourceMappingURL=index.mjs.map
|