@deepgram/sdk 1.0.2 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +53 -1
- package/README.md +39 -791
- package/dist/helpers/index.d.ts +1 -0
- package/dist/helpers/index.js +14 -0
- package/dist/helpers/index.js.map +1 -0
- package/dist/helpers/secondsToTimestamp.d.ts +1 -0
- package/dist/helpers/secondsToTimestamp.js +8 -0
- package/dist/helpers/secondsToTimestamp.js.map +1 -0
- package/dist/httpRequest.d.ts +2 -1
- package/dist/httpRequest.js +22 -14
- package/dist/httpRequest.js.map +1 -1
- package/dist/keys.d.ts +3 -2
- package/dist/keys.js +19 -5
- package/dist/keys.js.map +1 -1
- package/dist/projects.js +3 -3
- package/dist/projects.js.map +1 -1
- package/dist/transcription/index.js +1 -1
- package/dist/transcription/index.js.map +1 -1
- package/dist/transcription/liveTranscription.js +2 -2
- package/dist/transcription/liveTranscription.js.map +1 -1
- package/dist/transcription/preRecordedTranscription.js +35 -10
- package/dist/transcription/preRecordedTranscription.js.map +1 -1
- package/dist/types/createKeyOptions.d.ts +13 -0
- package/dist/types/createKeyOptions.js +3 -0
- package/dist/types/createKeyOptions.js.map +1 -0
- package/dist/types/index.d.ts +1 -0
- package/dist/types/index.js +1 -0
- package/dist/types/index.js.map +1 -1
- package/dist/types/prerecordedTranscriptionResponse.d.ts +16 -2
- package/dist/types/prerecordedTranscriptionResponse.js +48 -0
- package/dist/types/prerecordedTranscriptionResponse.js.map +1 -1
- package/dist/types/transcriptionSource.d.ts +6 -1
- package/dist/usage.js +4 -4
- package/dist/usage.js.map +1 -1
- package/package.json +18 -18
package/README.md
CHANGED
|
@@ -1,13 +1,20 @@
|
|
|
1
1
|
# Deepgram Node.js SDK
|
|
2
2
|
|
|
3
|
-
](https://github.com/deepgram/node-sdk/actions/workflows/CI.yml) [](https://www.npmjs.com/package/@deepgram/sdk) [](CODE_OF_CONDUCT.md)
|
|
4
4
|
|
|
5
5
|
Official Node.js SDK for [Deepgram](https://www.deepgram.com/)'s automated
|
|
6
6
|
speech recognition APIs.
|
|
7
7
|
|
|
8
|
+
> This SDK only supports hosted usage of api.deepgram.com.
|
|
9
|
+
|
|
8
10
|
To access the API you will need a Deepgram account. Sign up for free at
|
|
9
11
|
[signup][signup].
|
|
10
12
|
|
|
13
|
+
## Documentation
|
|
14
|
+
|
|
15
|
+
Full documentation of the Node.js SDK can be found on the
|
|
16
|
+
[Deepgram Developer Portal](https://developers.deepgram.com/sdks-tools/sdks/node-sdk/).
|
|
17
|
+
|
|
11
18
|
You can learn more about the full Deepgram API at [https://developers.deepgram.com](https://developers.deepgram.com).
|
|
12
19
|
|
|
13
20
|
## Installation
|
|
@@ -29,822 +36,63 @@ yarn add @deepgram/sdk
|
|
|
29
36
|
```js
|
|
30
37
|
const { Deepgram } = require("@deepgram/sdk");
|
|
31
38
|
|
|
32
|
-
const deepgram = new Deepgram(
|
|
33
|
-
apiKey: DEEPGRAM_API_KEY,
|
|
34
|
-
apiUrl: CUSTOM_API_URL, // Optionally used for on-premises customers
|
|
35
|
-
});
|
|
39
|
+
const deepgram = new Deepgram(DEEPGRAM_API_KEY);
|
|
36
40
|
```
|
|
37
41
|
|
|
38
|
-
##
|
|
42
|
+
## Examples
|
|
39
43
|
|
|
40
|
-
|
|
44
|
+
### Transcribe an Existing File
|
|
41
45
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
### Prerecorded Transcription
|
|
45
|
-
|
|
46
|
-
The `transcription.preRecorded` method handles sending an existing file or
|
|
47
|
-
buffer to the Deepgram API to generate a transcription. [Additional options](#options)
|
|
48
|
-
can be provided to customize the result.
|
|
46
|
+
#### Remote Files
|
|
49
47
|
|
|
50
48
|
```js
|
|
51
|
-
// Sending a file
|
|
52
49
|
const fileSource = { url: URL_OF_FILE };
|
|
53
50
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
// Both fileSource or bufferSource could be provided as the source parameter
|
|
58
|
-
const response = await deepgram.transcription.preRecorded(
|
|
59
|
-
fileSource | bufferSource,
|
|
60
|
-
{
|
|
61
|
-
punctuate: true,
|
|
62
|
-
// other options are available
|
|
63
|
-
}
|
|
64
|
-
);
|
|
65
|
-
```
|
|
66
|
-
|
|
67
|
-
#### Prerecorded Transcription Options
|
|
68
|
-
|
|
69
|
-
Additional transcription options can be provided for prerecorded transcriptions.
|
|
70
|
-
|
|
71
|
-
```js
|
|
72
|
-
{
|
|
73
|
-
/**
|
|
74
|
-
* AI model used to process submitted audio.
|
|
75
|
-
* @default general
|
|
76
|
-
* @remarks Possible values are general, phonecall, meeting or a custom string
|
|
77
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/model
|
|
78
|
-
*/
|
|
79
|
-
model?: Models | string;
|
|
80
|
-
|
|
81
|
-
/**
|
|
82
|
-
* Version of the model to use.
|
|
83
|
-
* @default latest
|
|
84
|
-
* @remarks latest OR <version_id>
|
|
85
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/version
|
|
86
|
-
*/
|
|
87
|
-
version: string;
|
|
88
|
-
/**
|
|
89
|
-
* BCP-47 language tag that hints at the primary spoken language.
|
|
90
|
-
* @default en-US
|
|
91
|
-
* @remarks Possible values are en-GB, en-IN, en-NZ, en-US, es, fr, ko, pt,
|
|
92
|
-
* pt-BR, ru, tr or null
|
|
93
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/language
|
|
94
|
-
*/
|
|
95
|
-
language?: string;
|
|
96
|
-
/**
|
|
97
|
-
* Indicates whether to add punctuation and capitalization to the transcript.
|
|
98
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/punctuate
|
|
99
|
-
*/
|
|
100
|
-
punctuate?: boolean;
|
|
101
|
-
/**
|
|
102
|
-
* Indicates whether to remove profanity from the transcript.
|
|
103
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/profanity_filter
|
|
104
|
-
*/
|
|
105
|
-
profanity_filter?: boolean;
|
|
106
|
-
/**
|
|
107
|
-
* Indicates whether to redact sensitive information, replacing redacted content with asterisks (*).
|
|
108
|
-
* @remarks Options include:
|
|
109
|
-
* `pci`: Redacts sensitive credit card information, including credit card number, expiration date, and CVV
|
|
110
|
-
* `numbers` (or `true)`: Aggressively redacts strings of numerals
|
|
111
|
-
* `ssn` (*beta*): Redacts social security numbers
|
|
112
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/redact
|
|
113
|
-
*/
|
|
114
|
-
redact?: Array<string>;
|
|
115
|
-
/**
|
|
116
|
-
* Indicates whether to recognize speaker changes. When set to true, each word
|
|
117
|
-
* in the transcript will be assigned a speaker number starting at 0.
|
|
118
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/diarize
|
|
119
|
-
*/
|
|
120
|
-
diarize?: boolean;
|
|
121
|
-
/**
|
|
122
|
-
* Indicates whether to transcribe each audio channel independently. When set
|
|
123
|
-
* to true, you will receive one transcript for each channel, which means you
|
|
124
|
-
* can apply a different model to each channel using the model parameter (e.g.,
|
|
125
|
-
* set model to general:phonecall, which applies the general model to channel
|
|
126
|
-
* 0 and the phonecall model to channel 1).
|
|
127
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/multichannel
|
|
128
|
-
*/
|
|
129
|
-
multichannel?: boolean;
|
|
130
|
-
/**
|
|
131
|
-
* Maximum number of transcript alternatives to return. Just like a human listener,
|
|
132
|
-
* Deepgram can provide multiple possible interpretations of what it hears.
|
|
133
|
-
* @default 1
|
|
134
|
-
*/
|
|
135
|
-
alternatives?: number;
|
|
136
|
-
/**
|
|
137
|
-
* Indicates whether to convert numbers from written format (e.g., one) to
|
|
138
|
-
* numerical format (e.g., 1). Deepgram can format numbers up to 999,999.
|
|
139
|
-
* @remarks Converted numbers do not include punctuation. For example,
|
|
140
|
-
* 999,999 would be transcribed as 999999.
|
|
141
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/numerals
|
|
142
|
-
*/
|
|
143
|
-
numerals?: boolean;
|
|
144
|
-
/**
|
|
145
|
-
* Terms or phrases to search for in the submitted audio. Deepgram searches
|
|
146
|
-
* for acoustic patterns in audio rather than text patterns in transcripts
|
|
147
|
-
* because we have noticed that acoustic pattern matching is more performant.
|
|
148
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/search
|
|
149
|
-
*/
|
|
150
|
-
search?: Array<string>;
|
|
151
|
-
/**
|
|
152
|
-
* Callback URL to provide if you would like your submitted audio to be
|
|
153
|
-
* processed asynchronously. When passed, Deepgram will immediately respond
|
|
154
|
-
* with a request_id. When it has finished analyzing the audio, it will send
|
|
155
|
-
* a POST request to the provided URL with an appropriate HTTP status code.
|
|
156
|
-
* @remarks You may embed basic authentication credentials in the callback URL.
|
|
157
|
-
* Only ports 80, 443, 8080, and 8443 can be used for callbacks.
|
|
158
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/callback
|
|
159
|
-
*/
|
|
160
|
-
callback?: string;
|
|
161
|
-
/**
|
|
162
|
-
* Keywords to which the model should pay particular attention to boosting
|
|
163
|
-
* or suppressing to help it understand context. Just like a human listener,
|
|
164
|
-
* Deepgram can better understand mumbled, distorted, or otherwise
|
|
165
|
-
* hard-to-decipher speech when it knows the context of the conversation.
|
|
166
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/keywords
|
|
167
|
-
*/
|
|
168
|
-
keywords?: Array<string>;
|
|
169
|
-
/**
|
|
170
|
-
* Indicates whether Deepgram will segment speech into meaningful semantic
|
|
171
|
-
* units, which allows the model to interact more naturally and effectively
|
|
172
|
-
* with speakers' spontaneous speech patterns. For example, when humans
|
|
173
|
-
* speak to each other conversationally, they often pause mid-sentence to
|
|
174
|
-
* reformulate their thoughts, or stop and restart a badly-worded sentence.
|
|
175
|
-
* When utterances is set to true, these utterances are identified and
|
|
176
|
-
* returned in the transcript results.
|
|
177
|
-
*
|
|
178
|
-
* By default, when utterances is enabled, it starts a new utterance after
|
|
179
|
-
* 0.8 s of silence. You can customize the length of time used to determine
|
|
180
|
-
* where to split utterances by submitting the utt_split parameter.
|
|
181
|
-
* @remarks **BETA FEATURE**
|
|
182
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/utterances
|
|
183
|
-
*/
|
|
184
|
-
utterances?: boolean;
|
|
185
|
-
/**
|
|
186
|
-
* Length of time in seconds of silence between words that Deepgram will
|
|
187
|
-
* use when determining where to split utterances. Used when utterances
|
|
188
|
-
* is enabled.
|
|
189
|
-
* @default 0.8 seconds
|
|
190
|
-
* @remarks **BETA FEATURE**
|
|
191
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/utt_split
|
|
192
|
-
*/
|
|
193
|
-
utt_split?: number;
|
|
194
|
-
}
|
|
195
|
-
```
|
|
196
|
-
|
|
197
|
-
#### Prerecorded Transcription Response
|
|
198
|
-
|
|
199
|
-
```ts
|
|
200
|
-
{
|
|
201
|
-
request_id?: string;
|
|
202
|
-
metadata?: {
|
|
203
|
-
request_id: string;
|
|
204
|
-
transaction_key: string;
|
|
205
|
-
sha256: string;
|
|
206
|
-
created: string;
|
|
207
|
-
duration: number;
|
|
208
|
-
channels: number;
|
|
209
|
-
};
|
|
210
|
-
results?: {
|
|
211
|
-
channels: Array<{
|
|
212
|
-
search?: Array<{
|
|
213
|
-
query: string;
|
|
214
|
-
hits: Array<{
|
|
215
|
-
confidence: number;
|
|
216
|
-
start: number;
|
|
217
|
-
end: number;
|
|
218
|
-
snippet: string;
|
|
219
|
-
}>;
|
|
220
|
-
}>;
|
|
221
|
-
alternatives: Array<{
|
|
222
|
-
transcript: string;
|
|
223
|
-
confidence: number;
|
|
224
|
-
words: Array<{
|
|
225
|
-
word: string;
|
|
226
|
-
start: number;
|
|
227
|
-
end: number;
|
|
228
|
-
confidence: number;
|
|
229
|
-
punctuated_word?: string;
|
|
230
|
-
}>;
|
|
231
|
-
}>;
|
|
232
|
-
}>;
|
|
233
|
-
utterances?: Array<{
|
|
234
|
-
start: number;
|
|
235
|
-
end: number;
|
|
236
|
-
confidence: number;
|
|
237
|
-
channel: number;
|
|
238
|
-
transcript: string;
|
|
239
|
-
words: Array<{
|
|
240
|
-
word: string;
|
|
241
|
-
start: number;
|
|
242
|
-
end: number;
|
|
243
|
-
confidence: number;
|
|
244
|
-
punctuated_word?: string;
|
|
245
|
-
}>;
|
|
246
|
-
speaker?: number;
|
|
247
|
-
id: string;
|
|
248
|
-
}>;
|
|
249
|
-
};
|
|
250
|
-
};
|
|
251
|
-
```
|
|
252
|
-
|
|
253
|
-
### Live Transcription
|
|
254
|
-
|
|
255
|
-
The `transcription.live` method provides access to a websocket connection
|
|
256
|
-
to the Deepgram API for generating streaming transcriptions. [Additional options](#options)
|
|
257
|
-
can be provided to customize the result.
|
|
258
|
-
|
|
259
|
-
```js
|
|
260
|
-
const deepgramLive = deepgram.transcription.live({ punctuate: true });
|
|
261
|
-
|
|
262
|
-
socket.on("microphone-stream", (stream) => {
|
|
263
|
-
deepgramSocket.send(stream);
|
|
264
|
-
});
|
|
265
|
-
|
|
266
|
-
/**
|
|
267
|
-
* Receive transcriptions based on sent streams
|
|
268
|
-
*/
|
|
269
|
-
deepgramLive.addListener("transcriptReceived", (transcription) => {
|
|
270
|
-
console.log(transcription.data);
|
|
51
|
+
const response = await deepgram.transcription.preRecorded(fileSource, {
|
|
52
|
+
punctuate: true,
|
|
271
53
|
});
|
|
272
54
|
```
|
|
273
55
|
|
|
274
|
-
####
|
|
275
|
-
|
|
276
|
-
The following events are fired by the live transcription object:
|
|
277
|
-
|
|
278
|
-
| Event | Description | Data |
|
|
279
|
-
| -------------------- | ----------------------------------------------------- | ------------------------------------------------- |
|
|
280
|
-
| `open` | The websocket connection to Deepgram has been opened. | The DG live transcription object |
|
|
281
|
-
| `close` | The websocket connection to Deepgram has been closed. | WebSocket.CloseEvent |
|
|
282
|
-
| `error` | An error occurred with the websocket connection | Error object |
|
|
283
|
-
| `transcriptReceived` | Deepgram has responded with a transcription | [Transcription Response](#transcription-response) |
|
|
284
|
-
|
|
285
|
-
#### Live Transcription Options
|
|
286
|
-
|
|
287
|
-
Additional transcription options can be provided for live transcriptions.
|
|
56
|
+
#### Local Files
|
|
288
57
|
|
|
289
58
|
```js
|
|
290
|
-
{
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
* @default general
|
|
294
|
-
* @remarks Possible values are general, phonecall, meeting or a custom string
|
|
295
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/model
|
|
296
|
-
*/
|
|
297
|
-
model?: Models | string;
|
|
298
|
-
|
|
299
|
-
/**
|
|
300
|
-
* Version of the model to use.
|
|
301
|
-
* @default latest
|
|
302
|
-
* @remarks latest OR <version_id>
|
|
303
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/version
|
|
304
|
-
*/
|
|
305
|
-
version: string;
|
|
306
|
-
/**
|
|
307
|
-
* BCP-47 language tag that hints at the primary spoken language.
|
|
308
|
-
* @default en-US
|
|
309
|
-
* @remarks Possible values are en-GB, en-IN, en-NZ, en-US, es, fr, ko, pt,
|
|
310
|
-
* pt-BR, ru, tr or null
|
|
311
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/language
|
|
312
|
-
*/
|
|
313
|
-
language?: string;
|
|
314
|
-
/**
|
|
315
|
-
* Indicates whether to add punctuation and capitalization to the transcript.
|
|
316
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/punctuate
|
|
317
|
-
*/
|
|
318
|
-
punctuate?: boolean;
|
|
319
|
-
/**
|
|
320
|
-
* Indicates whether to remove profanity from the transcript.
|
|
321
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/profanity_filter
|
|
322
|
-
*/
|
|
323
|
-
profanity_filter?: boolean;
|
|
324
|
-
/**
|
|
325
|
-
* Indicates whether to redact sensitive information, replacing redacted content with asterisks (*).
|
|
326
|
-
* @remarks Options include:
|
|
327
|
-
* `pci`: Redacts sensitive credit card information, including credit card number, expiration date, and CVV
|
|
328
|
-
* `numbers` (or `true)`: Aggressively redacts strings of numerals
|
|
329
|
-
* `ssn` (*beta*): Redacts social security numbers
|
|
330
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/redact
|
|
331
|
-
*/
|
|
332
|
-
redact?: Array<string>;
|
|
333
|
-
/**
|
|
334
|
-
* Indicates whether to recognize speaker changes. When set to true, each word
|
|
335
|
-
* in the transcript will be assigned a speaker number starting at 0.
|
|
336
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/diarize
|
|
337
|
-
*/
|
|
338
|
-
diarize?: boolean;
|
|
339
|
-
/**
|
|
340
|
-
* Indicates whether to transcribe each audio channel independently. When set
|
|
341
|
-
* to true, you will receive one transcript for each channel, which means you
|
|
342
|
-
* can apply a different model to each channel using the model parameter (e.g.,
|
|
343
|
-
* set model to general:phonecall, which applies the general model to channel
|
|
344
|
-
* 0 and the phonecall model to channel 1).
|
|
345
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/multichannel
|
|
346
|
-
*/
|
|
347
|
-
multichannel?: boolean;
|
|
348
|
-
/**
|
|
349
|
-
* Maximum number of transcript alternatives to return. Just like a human listener,
|
|
350
|
-
* Deepgram can provide multiple possible interpretations of what it hears.
|
|
351
|
-
* @default 1
|
|
352
|
-
*/
|
|
353
|
-
alternatives?: number;
|
|
354
|
-
/**
|
|
355
|
-
* Indicates whether to convert numbers from written format (e.g., one) to
|
|
356
|
-
* numerical format (e.g., 1). Deepgram can format numbers up to 999,999.
|
|
357
|
-
* @remarks Converted numbers do not include punctuation. For example,
|
|
358
|
-
* 999,999 would be transcribed as 999999.
|
|
359
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/numerals
|
|
360
|
-
*/
|
|
361
|
-
numerals?: boolean;
|
|
362
|
-
/**
|
|
363
|
-
* Terms or phrases to search for in the submitted audio. Deepgram searches
|
|
364
|
-
* for acoustic patterns in audio rather than text patterns in transcripts
|
|
365
|
-
* because we have noticed that acoustic pattern matching is more performant.
|
|
366
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/search
|
|
367
|
-
*/
|
|
368
|
-
search?: Array<string>;
|
|
369
|
-
/**
|
|
370
|
-
* Callback URL to provide if you would like your submitted audio to be
|
|
371
|
-
* processed asynchronously. When passed, Deepgram will immediately respond
|
|
372
|
-
* with a request_id. When it has finished analyzing the audio, it will send
|
|
373
|
-
* a POST request to the provided URL with an appropriate HTTP status code.
|
|
374
|
-
* @remarks You may embed basic authentication credentials in the callback URL.
|
|
375
|
-
* Only ports 80, 443, 8080, and 8443 can be used for callbacks.
|
|
376
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/callback
|
|
377
|
-
*/
|
|
378
|
-
callback?: string;
|
|
379
|
-
/**
|
|
380
|
-
* Keywords to which the model should pay particular attention to boosting
|
|
381
|
-
* or suppressing to help it understand context. Just like a human listener,
|
|
382
|
-
* Deepgram can better understand mumbled, distorted, or otherwise
|
|
383
|
-
* hard-to-decipher speech when it knows the context of the conversation.
|
|
384
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/keywords
|
|
385
|
-
*/
|
|
386
|
-
keywords?: Array<string>;
|
|
387
|
-
/**
|
|
388
|
-
* Indicates whether the streaming endpoint should send you updates to its
|
|
389
|
-
* transcription as more audio becomes available. By default, the streaming
|
|
390
|
-
* endpoint returns regular updates, which means transcription results will
|
|
391
|
-
* likely change for a period of time. You can avoid receiving these updates
|
|
392
|
-
* by setting this flag to false.
|
|
393
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeStreamingAudio/properties/interim_results
|
|
394
|
-
*/
|
|
395
|
-
interim_results?: boolean;
|
|
396
|
-
/**
|
|
397
|
-
* Indicates whether Deepgram will detect whether a speaker has finished
|
|
398
|
-
* speaking (or paused for a significant period of time, indicating the
|
|
399
|
-
* completion of an idea). When Deepgram detects an endpoint, it assumes
|
|
400
|
-
* that no additional data will improve its prediction, so it immediately
|
|
401
|
-
* finalizes the result for the processed time range and returns the
|
|
402
|
-
* transcript with a speech_final parameter set to true.
|
|
403
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeStreamingAudio/properties/endpointing
|
|
404
|
-
*/
|
|
405
|
-
endpointing?: boolean;
|
|
406
|
-
/**
|
|
407
|
-
* Length of time in milliseconds of silence that voice activation detection
|
|
408
|
-
* (VAD) will use to detect that a speaker has finished speaking. Used when
|
|
409
|
-
* endpointing is enabled. Defaults to 10 ms. Deepgram customers may configure
|
|
410
|
-
* a value between 10 ms and 500 ms; on-premise customers may remove this
|
|
411
|
-
* restriction.
|
|
412
|
-
* @default 10
|
|
413
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeStreamingAudio/properties/vad_turnoff
|
|
414
|
-
*/
|
|
415
|
-
vad_turnoff?: number;
|
|
416
|
-
/**
|
|
417
|
-
* Expected encoding of the submitted streaming audio.
|
|
418
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeStreamingAudio/properties/encoding
|
|
419
|
-
*/
|
|
420
|
-
encoding?: string;
|
|
421
|
-
/**
|
|
422
|
-
* Number of independent audio channels contained in submitted streaming
|
|
423
|
-
* audio. Only read when a value is provided for encoding.
|
|
424
|
-
* @default 1
|
|
425
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeStreamingAudio/properties/channels
|
|
426
|
-
*/
|
|
427
|
-
channels?: number;
|
|
428
|
-
/**
|
|
429
|
-
* Sample rate of submitted streaming audio. Required (and only read)
|
|
430
|
-
* when a value is provided for encoding.
|
|
431
|
-
* @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeStreamingAudio/properties/sample_rate
|
|
432
|
-
*/
|
|
433
|
-
sample_rate?: number;
|
|
434
|
-
}
|
|
435
|
-
```
|
|
436
|
-
|
|
437
|
-
#### Live Transcription Response
|
|
438
|
-
|
|
439
|
-
```ts
|
|
440
|
-
{
|
|
441
|
-
channel_index: Array<number>;
|
|
442
|
-
duration: number;
|
|
443
|
-
start: number;
|
|
444
|
-
is_final: boolean;
|
|
445
|
-
speech_final: boolean;
|
|
446
|
-
channel: {
|
|
447
|
-
search?: Array<{
|
|
448
|
-
query: string;
|
|
449
|
-
hits: Array<{
|
|
450
|
-
confidence: number;
|
|
451
|
-
start: number;
|
|
452
|
-
end: number;
|
|
453
|
-
snippet: string;
|
|
454
|
-
}>
|
|
455
|
-
}>,
|
|
456
|
-
alternatives: Array<{
|
|
457
|
-
transcript: string;
|
|
458
|
-
confidence: number;
|
|
459
|
-
words: Array<{
|
|
460
|
-
word: string;
|
|
461
|
-
start: number;
|
|
462
|
-
end: number;
|
|
463
|
-
confidence: number;
|
|
464
|
-
punctuated_word?: string;
|
|
465
|
-
}>
|
|
466
|
-
}>
|
|
467
|
-
}
|
|
59
|
+
const streamSource = {
|
|
60
|
+
stream: fs.createReadStream("/path/to/file"),
|
|
61
|
+
mimetype: MIMETYPE_OF_FILE,
|
|
468
62
|
};
|
|
469
|
-
```
|
|
470
|
-
|
|
471
|
-
## Project Management
|
|
472
|
-
|
|
473
|
-
### List Projects
|
|
474
|
-
|
|
475
|
-
Retrieve all projects
|
|
476
|
-
|
|
477
|
-
```js
|
|
478
|
-
const projects = await deepgram.projects.list();
|
|
479
|
-
```
|
|
480
|
-
|
|
481
|
-
#### List Projects Response
|
|
482
|
-
|
|
483
|
-
```ts
|
|
484
|
-
{
|
|
485
|
-
projects: [
|
|
486
|
-
{
|
|
487
|
-
id: string,
|
|
488
|
-
name: string,
|
|
489
|
-
},
|
|
490
|
-
],
|
|
491
|
-
}
|
|
492
|
-
```
|
|
493
|
-
|
|
494
|
-
### Get a Project
|
|
495
|
-
|
|
496
|
-
Retrieves all project based on the provided project id.
|
|
497
|
-
|
|
498
|
-
```js
|
|
499
|
-
const project = await deepgram.projects.get(PROJECT_ID);
|
|
500
|
-
```
|
|
501
|
-
|
|
502
|
-
#### Get a Project Response
|
|
503
|
-
|
|
504
|
-
```ts
|
|
505
|
-
{
|
|
506
|
-
id: string,
|
|
507
|
-
name: string,
|
|
508
|
-
}
|
|
509
|
-
```
|
|
510
|
-
|
|
511
|
-
### Update a Project
|
|
512
|
-
|
|
513
|
-
Updates a project based on a provided project object. This object must contain
|
|
514
|
-
`project_id` and `name` properties.
|
|
515
|
-
|
|
516
|
-
```js
|
|
517
|
-
const updateResponse = await deepgram.projects.update(project);
|
|
518
|
-
```
|
|
519
|
-
|
|
520
|
-
#### Update a Project Response
|
|
521
|
-
|
|
522
|
-
```ts
|
|
523
|
-
{
|
|
524
|
-
message: string;
|
|
525
|
-
}
|
|
526
|
-
```
|
|
527
|
-
|
|
528
|
-
## Key Management
|
|
529
|
-
|
|
530
|
-
### List Keys
|
|
531
63
|
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
```js
|
|
535
|
-
const response = await deepgram.keys.list(PROJECT_ID);
|
|
536
|
-
```
|
|
537
|
-
|
|
538
|
-
#### List Keys Response
|
|
539
|
-
|
|
540
|
-
```ts
|
|
541
|
-
{
|
|
542
|
-
api_keys: [
|
|
543
|
-
{
|
|
544
|
-
api_key_id: string,
|
|
545
|
-
comment: string,
|
|
546
|
-
created: string,
|
|
547
|
-
scopes: Array<string>
|
|
548
|
-
},
|
|
549
|
-
];
|
|
550
|
-
}
|
|
551
|
-
```
|
|
552
|
-
|
|
553
|
-
### Create Key
|
|
554
|
-
|
|
555
|
-
Create a new API key for a project using the `keys.create` method
|
|
556
|
-
with a name for the key.
|
|
557
|
-
|
|
558
|
-
```js
|
|
559
|
-
const response = await deepgram.keys.create(PROJECT_ID, COMMENT_FOR_KEY);
|
|
560
|
-
```
|
|
561
|
-
|
|
562
|
-
#### Create Key Response
|
|
563
|
-
|
|
564
|
-
```ts
|
|
565
|
-
{
|
|
566
|
-
api_key_id: string,
|
|
567
|
-
key: string,
|
|
568
|
-
comment: string,
|
|
569
|
-
created: string,
|
|
570
|
-
scopes: Array<string>
|
|
571
|
-
}
|
|
572
|
-
```
|
|
573
|
-
|
|
574
|
-
### Delete key
|
|
575
|
-
|
|
576
|
-
Delete an existing API key using the `keys.delete` method with the key to
|
|
577
|
-
delete.
|
|
578
|
-
|
|
579
|
-
```js
|
|
580
|
-
await deepgram.keys.delete(PROJECT_ID, KEY_ID);
|
|
581
|
-
```
|
|
582
|
-
|
|
583
|
-
## Usage
|
|
584
|
-
|
|
585
|
-
### Requests by Project
|
|
586
|
-
|
|
587
|
-
Retrieves transcription requests for a project based on the provided options.
|
|
588
|
-
|
|
589
|
-
```js
|
|
590
|
-
const response = await deepgram.usage.listRequests(PROJECT_ID, {
|
|
591
|
-
limit: 10,
|
|
592
|
-
// other options are available
|
|
64
|
+
const response = await deepgram.transcription.preRecorded(streamSource, {
|
|
65
|
+
punctuate: true,
|
|
593
66
|
});
|
|
594
67
|
```
|
|
595
68
|
|
|
596
|
-
|
|
69
|
+
### Transcribe Audio in Real-Time
|
|
597
70
|
|
|
598
71
|
```js
|
|
599
|
-
{
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
// Example: "2021-01-01T00:00:00+00:00"
|
|
605
|
-
end?: string,
|
|
606
|
-
// Page of requests to return
|
|
607
|
-
// Defaults to 0
|
|
608
|
-
page?: number,
|
|
609
|
-
// Number of requests to return per page
|
|
610
|
-
// Defaults to 10. Maximum of 100
|
|
611
|
-
limit?: number,
|
|
612
|
-
// Filter by succeeded or failed requests
|
|
613
|
-
// By default, all requests are returned
|
|
614
|
-
status?: 'succeeded' | 'failed'
|
|
615
|
-
}
|
|
616
|
-
```
|
|
72
|
+
navigator.mediaDevices.getUserMedia({ audio: true }).then((stream) => {
|
|
73
|
+
const mediaRecorder = new MediaRecorder(stream, {
|
|
74
|
+
mimeType: 'audio/webm',
|
|
75
|
+
});
|
|
76
|
+
const deepgramSocket = deepgram.transcription.live({ punctuate: true });
|
|
617
77
|
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
request_id: string;
|
|
627
|
-
created: string;
|
|
628
|
-
path: string;
|
|
629
|
-
accessor: string;
|
|
630
|
-
response?: {
|
|
631
|
-
details: {
|
|
632
|
-
usd: number;
|
|
633
|
-
duration: number;
|
|
634
|
-
total_audio: number;
|
|
635
|
-
channels: number;
|
|
636
|
-
streams: number;
|
|
637
|
-
model: string;
|
|
638
|
-
method: string;
|
|
639
|
-
tags: Array<string>;
|
|
640
|
-
features: Array<string>;
|
|
641
|
-
config: {
|
|
642
|
-
multichannel?: boolean;
|
|
643
|
-
interim_results?: boolean;
|
|
644
|
-
punctuate?: boolean;
|
|
645
|
-
ner?: boolean;
|
|
646
|
-
utterances?: boolean;
|
|
647
|
-
replace?: boolean;
|
|
648
|
-
profanity_filter?: boolean;
|
|
649
|
-
keywords?: boolean;
|
|
650
|
-
sentiment?: boolean;
|
|
651
|
-
diarize?: boolean;
|
|
652
|
-
detect_language?: boolean;
|
|
653
|
-
search?: boolean;
|
|
654
|
-
redact?: boolean;
|
|
655
|
-
alternatives?: boolean;
|
|
656
|
-
numerals?: boolean;
|
|
657
|
-
};
|
|
658
|
-
}
|
|
659
|
-
}, ||
|
|
660
|
-
{
|
|
661
|
-
message?: string;
|
|
662
|
-
},
|
|
663
|
-
callback?: {
|
|
664
|
-
code: number;
|
|
665
|
-
completed: string;
|
|
666
|
-
},
|
|
667
|
-
},
|
|
668
|
-
];
|
|
669
|
-
}
|
|
670
|
-
```
|
|
78
|
+
deepgramSocket.addListener('open', () => {
|
|
79
|
+
mediaRecorder.addEventListener('dataavailable', async (event) => {
|
|
80
|
+
if (event.data.size > 0 && deepgramSocket.readyState == 1) {
|
|
81
|
+
deepgramSocket.send(event.data)
|
|
82
|
+
}
|
|
83
|
+
})
|
|
84
|
+
mediaRecorder.start(1000)
|
|
85
|
+
});
|
|
671
86
|
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
```js
|
|
678
|
-
const response = await deepgram.usage.getRequest(PROJECT_ID, REQUEST_ID);
|
|
679
|
-
```
|
|
680
|
-
|
|
681
|
-
#### Specific Request Response
|
|
682
|
-
|
|
683
|
-
```ts
|
|
684
|
-
{
|
|
685
|
-
request_id: string;
|
|
686
|
-
created: string;
|
|
687
|
-
path: string;
|
|
688
|
-
accessor: string;
|
|
689
|
-
response?: {
|
|
690
|
-
details: {
|
|
691
|
-
usd: number;
|
|
692
|
-
duration: number;
|
|
693
|
-
total_audio: number;
|
|
694
|
-
channels: number;
|
|
695
|
-
streams: number;
|
|
696
|
-
model: string;
|
|
697
|
-
method: string;
|
|
698
|
-
tags: Array<string>;
|
|
699
|
-
features: Array<string>;
|
|
700
|
-
config: {
|
|
701
|
-
multichannel?: boolean;
|
|
702
|
-
interim_results?: boolean;
|
|
703
|
-
punctuate?: boolean;
|
|
704
|
-
ner?: boolean;
|
|
705
|
-
utterances?: boolean;
|
|
706
|
-
replace?: boolean;
|
|
707
|
-
profanity_filter?: boolean;
|
|
708
|
-
keywords?: boolean;
|
|
709
|
-
sentiment?: boolean;
|
|
710
|
-
diarize?: boolean;
|
|
711
|
-
detect_language?: boolean;
|
|
712
|
-
search?: boolean;
|
|
713
|
-
redact?: boolean;
|
|
714
|
-
alternatives?: boolean;
|
|
715
|
-
numerals?: boolean;
|
|
716
|
-
};
|
|
87
|
+
deepgramSocket.addListener("transcriptReceived", (received) => {
|
|
88
|
+
const transcript = received.channel.alternatives[0].transcript;
|
|
89
|
+
if (transcript && received.is_final) {
|
|
90
|
+
console.log(transcript);
|
|
717
91
|
}
|
|
718
|
-
}
|
|
719
|
-
{
|
|
720
|
-
message?: string;
|
|
721
|
-
},
|
|
722
|
-
callback?: {
|
|
723
|
-
code: number;
|
|
724
|
-
completed: string;
|
|
725
|
-
}
|
|
726
|
-
}
|
|
727
|
-
```
|
|
728
|
-
|
|
729
|
-
### Get Usage by Project
|
|
730
|
-
|
|
731
|
-
Retrieves aggregated usage data for a project based on the provided options.
|
|
732
|
-
|
|
733
|
-
```js
|
|
734
|
-
const response = await deepgram.usage.getUsage(PROJECT_ID, {
|
|
735
|
-
start: "2020-01-01T00:00:00+00:00",
|
|
736
|
-
// other options are available
|
|
92
|
+
});
|
|
737
93
|
});
|
|
738
94
|
```
|
|
739
95
|
|
|
740
|
-
#### Usage by Project Options
|
|
741
|
-
|
|
742
|
-
```js
|
|
743
|
-
{
|
|
744
|
-
// The time to retrieve requests made since
|
|
745
|
-
// Example: "2020-01-01T00:00:00+00:00"
|
|
746
|
-
start?: string,
|
|
747
|
-
// The time to retrieve requests made until
|
|
748
|
-
// Example: "2021-01-01T00:00:00+00:00"
|
|
749
|
-
end?: string,
|
|
750
|
-
// Specific identifer for a request
|
|
751
|
-
accessor?: string,
|
|
752
|
-
// Array of tags used in requests
|
|
753
|
-
tag?: Array<string>,
|
|
754
|
-
// Filter requests by method
|
|
755
|
-
method?: "sync" | "async" | "streaming",
|
|
756
|
-
// Filter requests by model used
|
|
757
|
-
model?: string,
|
|
758
|
-
// Filter only requests using multichannel feature
|
|
759
|
-
multichannel?: boolean,
|
|
760
|
-
// Filter only requests using interim results feature
|
|
761
|
-
interim_results?: boolean,
|
|
762
|
-
// Filter only requests using the punctuation feature
|
|
763
|
-
punctuate?: boolean,
|
|
764
|
-
// Filter only requests using ner feature
|
|
765
|
-
ner?: boolean,
|
|
766
|
-
// Filter only requests using utterances feature
|
|
767
|
-
utterances?: boolean,
|
|
768
|
-
// Filter only requests using replace feature
|
|
769
|
-
replace?: boolean,
|
|
770
|
-
// Filter only requests using profanity_filter feature
|
|
771
|
-
profanity_filter?: boolean,
|
|
772
|
-
// Filter only requests using keywords feature
|
|
773
|
-
keywords?: boolean,
|
|
774
|
-
// Filter only requests using sentiment feature
|
|
775
|
-
sentiment?: boolean,
|
|
776
|
-
// Filter only requests using diarization feature
|
|
777
|
-
diarize?: boolean,
|
|
778
|
-
// Filter only requests using detect_language feature
|
|
779
|
-
detect_language?: boolean,
|
|
780
|
-
// Filter only requests using search feature
|
|
781
|
-
search?: boolean,
|
|
782
|
-
// Filter only requests using redact feature
|
|
783
|
-
redact?: boolean,
|
|
784
|
-
// Filter only requests using alternatives feature
|
|
785
|
-
alternatives?: boolean,
|
|
786
|
-
// Filter only requests using numerals feature
|
|
787
|
-
numerals?: boolean
|
|
788
|
-
}
|
|
789
|
-
```
|
|
790
|
-
|
|
791
|
-
#### Get Usage Response
|
|
792
|
-
|
|
793
|
-
```ts
|
|
794
|
-
{
|
|
795
|
-
start: string,
|
|
796
|
-
end: string,
|
|
797
|
-
resolution: {
|
|
798
|
-
units: string,
|
|
799
|
-
amount: number
|
|
800
|
-
};
|
|
801
|
-
results: [
|
|
802
|
-
{
|
|
803
|
-
start: string,
|
|
804
|
-
end: string,
|
|
805
|
-
hours: number,
|
|
806
|
-
requests: number
|
|
807
|
-
}
|
|
808
|
-
];
|
|
809
|
-
}
|
|
810
|
-
```
|
|
811
|
-
|
|
812
|
-
### Get Fields
|
|
813
|
-
|
|
814
|
-
Retrieves features used by the provided projectId based on the provided options.
|
|
815
|
-
|
|
816
|
-
```js
|
|
817
|
-
const response = await deepgram.usage.getUsage(PROJECT_ID, {
|
|
818
|
-
start: "2020-01-01T00:00:00+00:00",
|
|
819
|
-
// other options are available
|
|
820
|
-
});
|
|
821
|
-
```
|
|
822
|
-
|
|
823
|
-
#### Get Fields Options
|
|
824
|
-
|
|
825
|
-
```js
|
|
826
|
-
{
|
|
827
|
-
// The time to retrieve requests made since
|
|
828
|
-
// Example: "2020-01-01T00:00:00+00:00"
|
|
829
|
-
start?: string,
|
|
830
|
-
// The time to retrieve requests made until
|
|
831
|
-
// Example: "2021-01-01T00:00:00+00:00"
|
|
832
|
-
end?: string
|
|
833
|
-
}
|
|
834
|
-
```
|
|
835
|
-
|
|
836
|
-
#### Get Fields Response
|
|
837
|
-
|
|
838
|
-
```ts
|
|
839
|
-
{
|
|
840
|
-
tags: Array<string>,
|
|
841
|
-
models: Array<string>,
|
|
842
|
-
processing_methods: Array<string>,
|
|
843
|
-
languages: Array<string>,
|
|
844
|
-
features: Array<string>
|
|
845
|
-
}
|
|
846
|
-
```
|
|
847
|
-
|
|
848
96
|
## Samples
|
|
849
97
|
|
|
850
98
|
To run the sample code, first run the following in your terminal:
|