@ai-sdk/deepgram 2.0.8 → 2.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,249 @@
1
+ import { createTestServer } from '@ai-sdk/test-server/with-vitest';
2
+ import { DeepgramTranscriptionModel } from './deepgram-transcription-model';
3
+ import { createDeepgram } from './deepgram-provider';
4
+ import { readFile } from 'node:fs/promises';
5
+ import path from 'node:path';
6
+ import { describe, it, expect, vi } from 'vitest';
7
+
8
+ vi.mock('./version', () => ({
9
+ VERSION: '0.0.0-test',
10
+ }));
11
+
12
+ const audioData = await readFile(path.join(__dirname, 'transcript-test.mp3'));
13
+ const provider = createDeepgram({ apiKey: 'test-api-key' });
14
+ const model = provider.transcription('nova-3');
15
+
16
+ const server = createTestServer({
17
+ 'https://api.deepgram.com/v1/listen': {},
18
+ });
19
+
20
+ describe('doGenerate', () => {
21
+ function prepareJsonResponse({
22
+ headers,
23
+ detectedLanguage,
24
+ }: {
25
+ headers?: Record<string, string>;
26
+ detectedLanguage?: string;
27
+ } = {}) {
28
+ server.urls['https://api.deepgram.com/v1/listen'].response = {
29
+ type: 'json-value',
30
+ headers,
31
+ body: {
32
+ metadata: {
33
+ transaction_key: 'deprecated',
34
+ request_id: '2479c8c8-8185-40ac-9ac6-f0874419f793',
35
+ sha256:
36
+ '154e291ecfa8be6ab8343560bcc109008fa7853eb5372533e8efdefc9b504c33',
37
+ created: '2024-02-06T19:56:16.180Z',
38
+ duration: 25.933313,
39
+ channels: 1,
40
+ models: ['30089e05-99d1-4376-b32e-c263170674af'],
41
+ model_info: {
42
+ '30089e05-99d1-4376-b32e-c263170674af': {
43
+ name: '2-general-nova',
44
+ version: '2024-01-09.29447',
45
+ arch: 'nova-3',
46
+ },
47
+ },
48
+ },
49
+ results: {
50
+ channels: [
51
+ {
52
+ detected_language: detectedLanguage,
53
+ alternatives: [
54
+ {
55
+ transcript: 'Hello world!',
56
+ confidence: 0.99902344,
57
+ words: [
58
+ {
59
+ word: 'hello',
60
+ start: 0.08,
61
+ end: 0.32,
62
+ confidence: 0.9975586,
63
+ punctuated_word: 'Hello.',
64
+ },
65
+ {
66
+ word: 'world',
67
+ start: 0.32,
68
+ end: 0.79999995,
69
+ confidence: 0.9921875,
70
+ punctuated_word: 'World',
71
+ },
72
+ ],
73
+ paragraphs: {
74
+ transcript: 'Hello world!',
75
+ paragraphs: [
76
+ {
77
+ sentences: [
78
+ {
79
+ text: 'Hello world!',
80
+ start: 0.08,
81
+ end: 0.32,
82
+ },
83
+ ],
84
+ num_words: 2,
85
+ start: 0.08,
86
+ end: 0.79999995,
87
+ },
88
+ ],
89
+ },
90
+ },
91
+ ],
92
+ },
93
+ ],
94
+ },
95
+ },
96
+ };
97
+ }
98
+
99
+ it('should pass the model', async () => {
100
+ prepareJsonResponse();
101
+
102
+ await model.doGenerate({
103
+ audio: audioData,
104
+ mediaType: 'audio/wav',
105
+ });
106
+
107
+ expect(await server.calls[0].requestBodyMultipart).toMatchObject({});
108
+ });
109
+
110
+ it('should pass headers', async () => {
111
+ prepareJsonResponse();
112
+
113
+ const provider = createDeepgram({
114
+ apiKey: 'test-api-key',
115
+ headers: {
116
+ 'Custom-Provider-Header': 'provider-header-value',
117
+ },
118
+ });
119
+
120
+ await provider.transcription('nova-3').doGenerate({
121
+ audio: audioData,
122
+ mediaType: 'audio/wav',
123
+ headers: {
124
+ 'Custom-Request-Header': 'request-header-value',
125
+ },
126
+ });
127
+
128
+ expect(server.calls[0].requestHeaders).toMatchObject({
129
+ authorization: 'Token test-api-key',
130
+ 'content-type': 'audio/wav',
131
+ 'custom-provider-header': 'provider-header-value',
132
+ 'custom-request-header': 'request-header-value',
133
+ });
134
+ expect(server.calls[0].requestUserAgent).toContain(
135
+ `ai-sdk/deepgram/0.0.0-test`,
136
+ );
137
+ });
138
+
139
+ it('should extract the transcription text', async () => {
140
+ prepareJsonResponse();
141
+
142
+ const result = await model.doGenerate({
143
+ audio: audioData,
144
+ mediaType: 'audio/wav',
145
+ });
146
+
147
+ expect(result.text).toBe('Hello world!');
148
+ });
149
+
150
+ it('should include response data with timestamp, modelId and headers', async () => {
151
+ prepareJsonResponse({
152
+ headers: {
153
+ 'x-request-id': 'test-request-id',
154
+ 'x-ratelimit-remaining': '123',
155
+ },
156
+ });
157
+
158
+ const testDate = new Date(0);
159
+ const customModel = new DeepgramTranscriptionModel('nova-3', {
160
+ provider: 'test-provider',
161
+ url: () => 'https://api.deepgram.com/v1/listen',
162
+ headers: () => ({}),
163
+ _internal: {
164
+ currentDate: () => testDate,
165
+ },
166
+ });
167
+
168
+ const result = await customModel.doGenerate({
169
+ audio: audioData,
170
+ mediaType: 'audio/wav',
171
+ });
172
+
173
+ expect(result.response).toMatchObject({
174
+ timestamp: testDate,
175
+ modelId: 'nova-3',
176
+ headers: {
177
+ 'content-type': 'application/json',
178
+ 'x-request-id': 'test-request-id',
179
+ 'x-ratelimit-remaining': '123',
180
+ },
181
+ });
182
+ });
183
+
184
+ it('should use real date when no custom date provider is specified', async () => {
185
+ prepareJsonResponse();
186
+
187
+ const testDate = new Date(0);
188
+ const customModel = new DeepgramTranscriptionModel('nova-3', {
189
+ provider: 'test-provider',
190
+ url: () => 'https://api.deepgram.com/v1/listen',
191
+ headers: () => ({}),
192
+ _internal: {
193
+ currentDate: () => testDate,
194
+ },
195
+ });
196
+
197
+ const result = await customModel.doGenerate({
198
+ audio: audioData,
199
+ mediaType: 'audio/wav',
200
+ });
201
+
202
+ expect(result.response.timestamp.getTime()).toEqual(testDate.getTime());
203
+ expect(result.response.modelId).toBe('nova-3');
204
+ });
205
+
206
+ it('should pass detectLanguage as detect_language query parameter', async () => {
207
+ prepareJsonResponse();
208
+
209
+ await model.doGenerate({
210
+ audio: audioData,
211
+ mediaType: 'audio/wav',
212
+ providerOptions: {
213
+ deepgram: {
214
+ detectLanguage: true,
215
+ },
216
+ },
217
+ });
218
+
219
+ const requestUrl = server.calls[0].requestUrl;
220
+ expect(requestUrl).toContain('detect_language=true');
221
+ });
222
+
223
+ it('should return detected language from response', async () => {
224
+ prepareJsonResponse({ detectedLanguage: 'sv' });
225
+
226
+ const result = await model.doGenerate({
227
+ audio: audioData,
228
+ mediaType: 'audio/wav',
229
+ providerOptions: {
230
+ deepgram: {
231
+ detectLanguage: true,
232
+ },
233
+ },
234
+ });
235
+
236
+ expect(result.language).toBe('sv');
237
+ });
238
+
239
+ it('should return undefined language when not detected', async () => {
240
+ prepareJsonResponse();
241
+
242
+ const result = await model.doGenerate({
243
+ audio: audioData,
244
+ mediaType: 'audio/wav',
245
+ });
246
+
247
+ expect(result.language).toBeUndefined();
248
+ });
249
+ });
@@ -0,0 +1,211 @@
1
+ import { SharedV3Warning, TranscriptionModelV3 } from '@ai-sdk/provider';
2
+ import {
3
+ combineHeaders,
4
+ createJsonResponseHandler,
5
+ parseProviderOptions,
6
+ postToApi,
7
+ } from '@ai-sdk/provider-utils';
8
+ import { z } from 'zod/v4';
9
+ import { DeepgramTranscriptionAPITypes } from './deepgram-api-types';
10
+ import { DeepgramConfig } from './deepgram-config';
11
+ import { deepgramFailedResponseHandler } from './deepgram-error';
12
+ import { DeepgramTranscriptionModelId } from './deepgram-transcription-options';
13
+
14
+ // https://developers.deepgram.com/docs/pre-recorded-audio#results
15
+ const deepgramProviderOptionsSchema = z.object({
16
+ /** Language to use for transcription. If not specified, Deepgram defaults to English. Use `detectLanguage: true` to enable automatic language detection. */
17
+ language: z.string().nullish(),
18
+ /** Whether to enable automatic language detection. When true, Deepgram will detect the language of the audio. */
19
+ detectLanguage: z.boolean().nullish(),
20
+ /** Whether to use smart formatting, which formats written-out numbers, dates, times, etc. */
21
+ smartFormat: z.boolean().nullish(),
22
+ /** Whether to add punctuation to the transcript. */
23
+ punctuate: z.boolean().nullish(),
24
+ /** Whether to format the transcript into paragraphs. */
25
+ paragraphs: z.boolean().nullish(),
26
+ /** Whether to generate a summary of the transcript. Use 'v2' for the latest version or false to disable. */
27
+ summarize: z.union([z.literal('v2'), z.literal(false)]).nullish(),
28
+ /** Whether to identify topics in the transcript. */
29
+ topics: z.boolean().nullish(),
30
+ /** Whether to identify intents in the transcript. */
31
+ intents: z.boolean().nullish(),
32
+ /** Whether to analyze sentiment in the transcript. */
33
+ sentiment: z.boolean().nullish(),
34
+ /** Whether to detect and tag named entities in the transcript. */
35
+ detectEntities: z.boolean().nullish(),
36
+ /** Specify terms or patterns to redact from the transcript. Can be a string or array of strings. */
37
+ redact: z.union([z.string(), z.array(z.string())]).nullish(),
38
+ /** String to replace redacted content with. */
39
+ replace: z.string().nullish(),
40
+ /** Term or phrase to search for in the transcript. */
41
+ search: z.string().nullish(),
42
+ /** Key term to identify in the transcript. */
43
+ keyterm: z.string().nullish(),
44
+ /** Whether to identify different speakers in the audio. */
45
+ diarize: z.boolean().nullish(),
46
+ /** Whether to segment the transcript into utterances. */
47
+ utterances: z.boolean().nullish(),
48
+ /** Minimum duration of silence (in seconds) to trigger a new utterance. */
49
+ uttSplit: z.number().nullish(),
50
+ /** Whether to include filler words (um, uh, etc.) in the transcript. */
51
+ fillerWords: z.boolean().nullish(),
52
+ });
53
+
54
+ export type DeepgramTranscriptionCallOptions = z.infer<
55
+ typeof deepgramProviderOptionsSchema
56
+ >;
57
+
58
+ interface DeepgramTranscriptionModelConfig extends DeepgramConfig {
59
+ _internal?: {
60
+ currentDate?: () => Date;
61
+ };
62
+ }
63
+
64
+ export class DeepgramTranscriptionModel implements TranscriptionModelV3 {
65
+ readonly specificationVersion = 'v3';
66
+
67
+ get provider(): string {
68
+ return this.config.provider;
69
+ }
70
+
71
+ constructor(
72
+ readonly modelId: DeepgramTranscriptionModelId,
73
+ private readonly config: DeepgramTranscriptionModelConfig,
74
+ ) {}
75
+
76
+ private async getArgs({
77
+ providerOptions,
78
+ }: Parameters<TranscriptionModelV3['doGenerate']>[0]) {
79
+ const warnings: SharedV3Warning[] = [];
80
+
81
+ // Parse provider options
82
+ const deepgramOptions = await parseProviderOptions({
83
+ provider: 'deepgram',
84
+ providerOptions,
85
+ schema: deepgramProviderOptionsSchema,
86
+ });
87
+
88
+ const body: DeepgramTranscriptionAPITypes = {
89
+ model: this.modelId,
90
+ diarize: true,
91
+ };
92
+
93
+ // Add provider-specific options
94
+ if (deepgramOptions) {
95
+ body.detect_entities = deepgramOptions.detectEntities ?? undefined;
96
+ body.detect_language = deepgramOptions.detectLanguage ?? undefined;
97
+ body.filler_words = deepgramOptions.fillerWords ?? undefined;
98
+ body.language = deepgramOptions.language ?? undefined;
99
+ body.punctuate = deepgramOptions.punctuate ?? undefined;
100
+ body.redact = deepgramOptions.redact ?? undefined;
101
+ body.search = deepgramOptions.search ?? undefined;
102
+ body.smart_format = deepgramOptions.smartFormat ?? undefined;
103
+ body.summarize = deepgramOptions.summarize ?? undefined;
104
+ body.topics = deepgramOptions.topics ?? undefined;
105
+ body.utterances = deepgramOptions.utterances ?? undefined;
106
+ body.utt_split = deepgramOptions.uttSplit ?? undefined;
107
+
108
+ if (typeof deepgramOptions.diarize === 'boolean') {
109
+ body.diarize = deepgramOptions.diarize;
110
+ }
111
+ }
112
+
113
+ // Convert body to URL query parameters
114
+ const queryParams = new URLSearchParams();
115
+ for (const [key, value] of Object.entries(body)) {
116
+ if (value !== undefined) {
117
+ queryParams.append(key, String(value));
118
+ }
119
+ }
120
+
121
+ return {
122
+ queryParams,
123
+ warnings,
124
+ };
125
+ }
126
+
127
+ async doGenerate(
128
+ options: Parameters<TranscriptionModelV3['doGenerate']>[0],
129
+ ): Promise<Awaited<ReturnType<TranscriptionModelV3['doGenerate']>>> {
130
+ const currentDate = this.config._internal?.currentDate?.() ?? new Date();
131
+ const { queryParams, warnings } = await this.getArgs(options);
132
+
133
+ const {
134
+ value: response,
135
+ responseHeaders,
136
+ rawValue: rawResponse,
137
+ } = await postToApi({
138
+ url:
139
+ this.config.url({
140
+ path: '/v1/listen',
141
+ modelId: this.modelId,
142
+ }) +
143
+ '?' +
144
+ queryParams.toString(),
145
+ headers: {
146
+ ...combineHeaders(this.config.headers(), options.headers),
147
+ 'Content-Type': options.mediaType,
148
+ },
149
+ body: {
150
+ content: options.audio,
151
+ values: options.audio,
152
+ },
153
+ failedResponseHandler: deepgramFailedResponseHandler,
154
+ successfulResponseHandler: createJsonResponseHandler(
155
+ deepgramTranscriptionResponseSchema,
156
+ ),
157
+ abortSignal: options.abortSignal,
158
+ fetch: this.config.fetch,
159
+ });
160
+
161
+ return {
162
+ text:
163
+ response.results?.channels.at(0)?.alternatives.at(0)?.transcript ?? '',
164
+ segments:
165
+ response.results?.channels[0].alternatives[0].words?.map(word => ({
166
+ text: word.word,
167
+ startSecond: word.start,
168
+ endSecond: word.end,
169
+ })) ?? [],
170
+ language:
171
+ response.results?.channels.at(0)?.detected_language ?? undefined,
172
+ durationInSeconds: response.metadata?.duration ?? undefined,
173
+ warnings,
174
+ response: {
175
+ timestamp: currentDate,
176
+ modelId: this.modelId,
177
+ headers: responseHeaders,
178
+ body: rawResponse,
179
+ },
180
+ };
181
+ }
182
+ }
183
+
184
+ const deepgramTranscriptionResponseSchema = z.object({
185
+ metadata: z
186
+ .object({
187
+ duration: z.number(),
188
+ })
189
+ .nullish(),
190
+ results: z
191
+ .object({
192
+ channels: z.array(
193
+ z.object({
194
+ detected_language: z.string().nullish(),
195
+ alternatives: z.array(
196
+ z.object({
197
+ transcript: z.string(),
198
+ words: z.array(
199
+ z.object({
200
+ word: z.string(),
201
+ start: z.number(),
202
+ end: z.number(),
203
+ }),
204
+ ),
205
+ }),
206
+ ),
207
+ }),
208
+ ),
209
+ })
210
+ .nullish(),
211
+ });
@@ -0,0 +1,34 @@
1
+ export type DeepgramTranscriptionModelId =
2
+ | 'base'
3
+ | 'base-general'
4
+ | 'base-meeting'
5
+ | 'base-phonecall'
6
+ | 'base-finance'
7
+ | 'base-conversationalai'
8
+ | 'base-voicemail'
9
+ | 'base-video'
10
+ | 'enhanced'
11
+ | 'enhanced-general'
12
+ | 'enhanced-meeting'
13
+ | 'enhanced-phonecall'
14
+ | 'enhanced-finance'
15
+ | 'nova'
16
+ | 'nova-general'
17
+ | 'nova-phonecall'
18
+ | 'nova-medical'
19
+ | 'nova-2'
20
+ | 'nova-2-general'
21
+ | 'nova-2-meeting'
22
+ | 'nova-2-phonecall'
23
+ | 'nova-2-finance'
24
+ | 'nova-2-conversationalai'
25
+ | 'nova-2-voicemail'
26
+ | 'nova-2-video'
27
+ | 'nova-2-medical'
28
+ | 'nova-2-drivethru'
29
+ | 'nova-2-automotive'
30
+ | 'nova-2-atc'
31
+ | 'nova-3'
32
+ | 'nova-3-general'
33
+ | 'nova-3-medical'
34
+ | (string & {});
package/src/index.ts ADDED
@@ -0,0 +1,9 @@
1
+ export { createDeepgram, deepgram } from './deepgram-provider';
2
+ export type {
3
+ DeepgramProvider,
4
+ DeepgramProviderSettings,
5
+ } from './deepgram-provider';
6
+ export { DeepgramSpeechModel } from './deepgram-speech-model';
7
+ export type { DeepgramSpeechCallOptions } from './deepgram-speech-model';
8
+ export type { DeepgramSpeechModelId } from './deepgram-speech-options';
9
+ export { VERSION } from './version';
Binary file
package/src/version.ts ADDED
@@ -0,0 +1,6 @@
1
+ // Version string of this package injected at build time.
2
+ declare const __PACKAGE_VERSION__: string | undefined;
3
+ export const VERSION: string =
4
+ typeof __PACKAGE_VERSION__ !== 'undefined'
5
+ ? __PACKAGE_VERSION__
6
+ : '0.0.0-test';