@deepgram/sdk 1.0.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/CHANGELOG.md +89 -1
  2. package/README.md +41 -734
  3. package/dist/helpers/index.d.ts +1 -0
  4. package/dist/helpers/index.js +14 -0
  5. package/dist/helpers/index.js.map +1 -0
  6. package/dist/helpers/secondsToTimestamp.d.ts +1 -0
  7. package/dist/helpers/secondsToTimestamp.js +8 -0
  8. package/dist/helpers/secondsToTimestamp.js.map +1 -0
  9. package/dist/httpRequest.d.ts +2 -1
  10. package/dist/httpRequest.js +22 -13
  11. package/dist/httpRequest.js.map +1 -1
  12. package/dist/keys.d.ts +3 -2
  13. package/dist/keys.js +19 -5
  14. package/dist/keys.js.map +1 -1
  15. package/dist/projects.d.ts +6 -1
  16. package/dist/projects.js +13 -2
  17. package/dist/projects.js.map +1 -1
  18. package/dist/transcription/index.js +1 -1
  19. package/dist/transcription/index.js.map +1 -1
  20. package/dist/transcription/liveTranscription.js +1 -1
  21. package/dist/transcription/liveTranscription.js.map +1 -1
  22. package/dist/transcription/preRecordedTranscription.js +35 -10
  23. package/dist/transcription/preRecordedTranscription.js.map +1 -1
  24. package/dist/types/createKeyOptions.d.ts +13 -0
  25. package/dist/types/createKeyOptions.js +3 -0
  26. package/dist/types/createKeyOptions.js.map +1 -0
  27. package/dist/types/index.d.ts +3 -0
  28. package/dist/types/index.js +3 -0
  29. package/dist/types/index.js.map +1 -1
  30. package/dist/types/key.d.ts +2 -2
  31. package/dist/types/prerecordedTranscriptionResponse.d.ts +18 -2
  32. package/dist/types/prerecordedTranscriptionResponse.js +48 -0
  33. package/dist/types/prerecordedTranscriptionResponse.js.map +1 -1
  34. package/dist/types/project.d.ts +6 -2
  35. package/dist/types/projectPatchResponse.d.ts +6 -0
  36. package/dist/types/projectPatchResponse.js +3 -0
  37. package/dist/types/projectPatchResponse.js.map +1 -0
  38. package/dist/types/transcriptionSource.d.ts +6 -1
  39. package/dist/types/usageRequest.d.ts +1 -1
  40. package/dist/types/utterance.d.ts +38 -0
  41. package/dist/types/utterance.js +3 -0
  42. package/dist/types/utterance.js.map +1 -0
  43. package/dist/usage.js +4 -4
  44. package/dist/usage.js.map +1 -1
  45. package/package.json +18 -18
package/README.md CHANGED
@@ -1,13 +1,20 @@
1
1
  # Deepgram Node.js SDK
2
2
 
3
- ![GitHub Workflow Status (branch)](https://img.shields.io/github/workflow/status/deepgram/node-sdk/CI/main) ![npm (scoped)](https://img.shields.io/npm/v/@deepgram/sdk) [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg?style=flat-rounded)](CODE_OF_CONDUCT.md)
3
+ [![CI](https://github.com/deepgram/node-sdk/actions/workflows/CI.yml/badge.svg)](https://github.com/deepgram/node-sdk/actions/workflows/CI.yml) [![npm (scoped)](https://img.shields.io/npm/v/@deepgram/sdk)](https://www.npmjs.com/package/@deepgram/sdk) [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg?style=flat-rounded)](CODE_OF_CONDUCT.md)
4
4
 
5
5
  Official Node.js SDK for [Deepgram](https://www.deepgram.com/)'s automated
6
6
  speech recognition APIs.
7
7
 
8
+ > This SDK only supports hosted usage of api.deepgram.com.
9
+
8
10
  To access the API you will need a Deepgram account. Sign up for free at
9
11
  [signup][signup].
10
12
 
13
+ ## Documentation
14
+
15
+ Full documentation of the Node.js SDK can be found on the
16
+ [Deepgram Developer Portal](https://developers.deepgram.com/sdks-tools/sdks/node-sdk/).
17
+
11
18
  You can learn more about the full Deepgram API at [https://developers.deepgram.com](https://developers.deepgram.com).
12
19
 
13
20
  ## Installation
@@ -29,763 +36,63 @@ yarn add @deepgram/sdk
29
36
  ```js
30
37
  const { Deepgram } = require("@deepgram/sdk");
31
38
 
32
- const deepgram = new Deepgram({
33
- apiKey: DEEPGRAM_API_KEY,
34
- apiUrl: CUSTOM_API_URL, // Optionally used for on-premises customers
35
- });
39
+ const deepgram = new Deepgram(DEEPGRAM_API_KEY);
36
40
  ```
37
41
 
38
- ## Usage
42
+ ## Examples
39
43
 
40
- ## Transcription
44
+ ### Transcribe an Existing File
41
45
 
42
- The `transcription` property can handle both pre-recorded and live transcriptions.
43
-
44
- ### Prerecorded Transcription
45
-
46
- The `transcription.preRecorded` method handles sending an existing file or
47
- buffer to the Deepgram API to generate a transcription. [Additional options](#options)
48
- can be provided to customize the result.
46
+ #### Remote Files
49
47
 
50
48
  ```js
51
- // Sending a file
52
49
  const fileSource = { url: URL_OF_FILE };
53
50
 
54
- // Sending a buffer
55
- const bufferSource = { buffer: BUFFER_OF_FILE, mimetype: MIMETYPE_OF_FILE };
56
-
57
- // Both fileSource or bufferSource could be provided as the source parameter
58
- const response = await deepgram.transcription.preRecorded(
59
- fileSource | bufferSource,
60
- {
61
- punctuate: true,
62
- // other options are available
63
- }
64
- );
65
- ```
66
-
67
- #### Prerecorded Transcription Options
68
-
69
- Additional transcription options can be provided for prerecorded transcriptions.
70
-
71
- ```js
72
- {
73
- /**
74
- * AI model used to process submitted audio.
75
- * @default general
76
- * @remarks Possible values are general, phonecall, meeting or a custom string
77
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/model
78
- */
79
- model?: Models | string;
80
-
81
- /**
82
- * Version of the model to use.
83
- * @default latest
84
- * @remarks latest OR <version_id>
85
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/version
86
- */
87
- version: string;
88
- /**
89
- * BCP-47 language tag that hints at the primary spoken language.
90
- * @default en-US
91
- * @remarks Possible values are en-GB, en-IN, en-NZ, en-US, es, fr, ko, pt,
92
- * pt-BR, ru, tr or null
93
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/language
94
- */
95
- language?: string;
96
- /**
97
- * Indicates whether to add punctuation and capitalization to the transcript.
98
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/punctuate
99
- */
100
- punctuate?: boolean;
101
- /**
102
- * Indicates whether to remove profanity from the transcript.
103
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/profanity_filter
104
- */
105
- profanity_filter?: boolean;
106
- /**
107
- * Indicates whether to redact sensitive information, replacing redacted content with asterisks (*).
108
- * @remarks Options include:
109
- * `pci`: Redacts sensitive credit card information, including credit card number, expiration date, and CVV
110
- * `numbers` (or `true)`: Aggressively redacts strings of numerals
111
- * `ssn` (*beta*): Redacts social security numbers
112
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/redact
113
- */
114
- redact?: Array<string>;
115
- /**
116
- * Indicates whether to recognize speaker changes. When set to true, each word
117
- * in the transcript will be assigned a speaker number starting at 0.
118
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/diarize
119
- */
120
- diarize?: boolean;
121
- /**
122
- * Indicates whether to transcribe each audio channel independently. When set
123
- * to true, you will receive one transcript for each channel, which means you
124
- * can apply a different model to each channel using the model parameter (e.g.,
125
- * set model to general:phonecall, which applies the general model to channel
126
- * 0 and the phonecall model to channel 1).
127
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/multichannel
128
- */
129
- multichannel?: boolean;
130
- /**
131
- * Maximum number of transcript alternatives to return. Just like a human listener,
132
- * Deepgram can provide multiple possible interpretations of what it hears.
133
- * @default 1
134
- */
135
- alternatives?: number;
136
- /**
137
- * Indicates whether to convert numbers from written format (e.g., one) to
138
- * numerical format (e.g., 1). Deepgram can format numbers up to 999,999.
139
- * @remarks Converted numbers do not include punctuation. For example,
140
- * 999,999 would be transcribed as 999999.
141
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/numerals
142
- */
143
- numerals?: boolean;
144
- /**
145
- * Terms or phrases to search for in the submitted audio. Deepgram searches
146
- * for acoustic patterns in audio rather than text patterns in transcripts
147
- * because we have noticed that acoustic pattern matching is more performant.
148
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/search
149
- */
150
- search?: Array<string>;
151
- /**
152
- * Callback URL to provide if you would like your submitted audio to be
153
- * processed asynchronously. When passed, Deepgram will immediately respond
154
- * with a request_id. When it has finished analyzing the audio, it will send
155
- * a POST request to the provided URL with an appropriate HTTP status code.
156
- * @remarks You may embed basic authentication credentials in the callback URL.
157
- * Only ports 80, 443, 8080, and 8443 can be used for callbacks.
158
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/callback
159
- */
160
- callback?: string;
161
- /**
162
- * Keywords to which the model should pay particular attention to boosting
163
- * or suppressing to help it understand context. Just like a human listener,
164
- * Deepgram can better understand mumbled, distorted, or otherwise
165
- * hard-to-decipher speech when it knows the context of the conversation.
166
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/keywords
167
- */
168
- keywords?: Array<string>;
169
- /**
170
- * Indicates whether Deepgram will segment speech into meaningful semantic
171
- * units, which allows the model to interact more naturally and effectively
172
- * with speakers' spontaneous speech patterns. For example, when humans
173
- * speak to each other conversationally, they often pause mid-sentence to
174
- * reformulate their thoughts, or stop and restart a badly-worded sentence.
175
- * When utterances is set to true, these utterances are identified and
176
- * returned in the transcript results.
177
- *
178
- * By default, when utterances is enabled, it starts a new utterance after
179
- * 0.8 s of silence. You can customize the length of time used to determine
180
- * where to split utterances by submitting the utt_split parameter.
181
- * @remarks **BETA FEATURE**
182
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/utterances
183
- */
184
- utterances?: boolean;
185
- /**
186
- * Length of time in seconds of silence between words that Deepgram will
187
- * use when determining where to split utterances. Used when utterances
188
- * is enabled.
189
- * @default 0.8 seconds
190
- * @remarks **BETA FEATURE**
191
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/utt_split
192
- */
193
- utt_split?: number;
194
- }
195
- ```
196
-
197
- ### Live Transcription
198
-
199
- The `transcription.live` method provides access to a websocket connection
200
- to the Deepgram API for generating streaming transcriptions. [Additional options](#options)
201
- can be provided to customize the result.
202
-
203
- ```js
204
- const deepgramLive = deepgram.transcription.live({ punctuate: true });
205
-
206
- socket.on("microphone-stream", (stream) => {
207
- deepgramSocket.send(stream);
51
+ const response = await deepgram.transcription.preRecorded(fileSource, {
52
+ punctuate: true,
208
53
  });
209
-
210
- /**
211
- * Receive transcriptions based on sent streams
212
- */
213
- deepgramLive.addListener("transcriptReceived", (transcription) => {
214
- console.log(transcription.data);
215
- });
216
- ```
217
-
218
- #### Events
219
-
220
- The following events are fired by the live transcription object:
221
-
222
- | Event | Description | Data |
223
- | -------------------- | ----------------------------------------------------- | ------------------------------------------------- |
224
- | `open` | The websocket connection to Deepgram has been opened. | The DG live transcription object |
225
- | `close` | The websocket connection to Deepgram has been closed. | WebSocket.CloseEvent |
226
- | `error` | An error occurred with the websocket connection | Error object |
227
- | `transcriptReceived` | Deepgram has responded with a transcription | [Transcription Response](#transcription-response) |
228
-
229
- #### Live Transcription Options
230
-
231
- Additional transcription options can be provided for live transcriptions.
232
-
233
- ```js
234
- {
235
- /**
236
- * AI model used to process submitted audio.
237
- * @default general
238
- * @remarks Possible values are general, phonecall, meeting or a custom string
239
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/model
240
- */
241
- model?: Models | string;
242
-
243
- /**
244
- * Version of the model to use.
245
- * @default latest
246
- * @remarks latest OR <version_id>
247
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/version
248
- */
249
- version: string;
250
- /**
251
- * BCP-47 language tag that hints at the primary spoken language.
252
- * @default en-US
253
- * @remarks Possible values are en-GB, en-IN, en-NZ, en-US, es, fr, ko, pt,
254
- * pt-BR, ru, tr or null
255
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/language
256
- */
257
- language?: string;
258
- /**
259
- * Indicates whether to add punctuation and capitalization to the transcript.
260
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/punctuate
261
- */
262
- punctuate?: boolean;
263
- /**
264
- * Indicates whether to remove profanity from the transcript.
265
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/profanity_filter
266
- */
267
- profanity_filter?: boolean;
268
- /**
269
- * Indicates whether to redact sensitive information, replacing redacted content with asterisks (*).
270
- * @remarks Options include:
271
- * `pci`: Redacts sensitive credit card information, including credit card number, expiration date, and CVV
272
- * `numbers` (or `true)`: Aggressively redacts strings of numerals
273
- * `ssn` (*beta*): Redacts social security numbers
274
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/redact
275
- */
276
- redact?: Array<string>;
277
- /**
278
- * Indicates whether to recognize speaker changes. When set to true, each word
279
- * in the transcript will be assigned a speaker number starting at 0.
280
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/diarize
281
- */
282
- diarize?: boolean;
283
- /**
284
- * Indicates whether to transcribe each audio channel independently. When set
285
- * to true, you will receive one transcript for each channel, which means you
286
- * can apply a different model to each channel using the model parameter (e.g.,
287
- * set model to general:phonecall, which applies the general model to channel
288
- * 0 and the phonecall model to channel 1).
289
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/multichannel
290
- */
291
- multichannel?: boolean;
292
- /**
293
- * Maximum number of transcript alternatives to return. Just like a human listener,
294
- * Deepgram can provide multiple possible interpretations of what it hears.
295
- * @default 1
296
- */
297
- alternatives?: number;
298
- /**
299
- * Indicates whether to convert numbers from written format (e.g., one) to
300
- * numerical format (e.g., 1). Deepgram can format numbers up to 999,999.
301
- * @remarks Converted numbers do not include punctuation. For example,
302
- * 999,999 would be transcribed as 999999.
303
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/numerals
304
- */
305
- numerals?: boolean;
306
- /**
307
- * Terms or phrases to search for in the submitted audio. Deepgram searches
308
- * for acoustic patterns in audio rather than text patterns in transcripts
309
- * because we have noticed that acoustic pattern matching is more performant.
310
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/search
311
- */
312
- search?: Array<string>;
313
- /**
314
- * Callback URL to provide if you would like your submitted audio to be
315
- * processed asynchronously. When passed, Deepgram will immediately respond
316
- * with a request_id. When it has finished analyzing the audio, it will send
317
- * a POST request to the provided URL with an appropriate HTTP status code.
318
- * @remarks You may embed basic authentication credentials in the callback URL.
319
- * Only ports 80, 443, 8080, and 8443 can be used for callbacks.
320
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/callback
321
- */
322
- callback?: string;
323
- /**
324
- * Keywords to which the model should pay particular attention to boosting
325
- * or suppressing to help it understand context. Just like a human listener,
326
- * Deepgram can better understand mumbled, distorted, or otherwise
327
- * hard-to-decipher speech when it knows the context of the conversation.
328
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeAudio/properties/keywords
329
- */
330
- keywords?: Array<string>;
331
- /**
332
- * Indicates whether the streaming endpoint should send you updates to its
333
- * transcription as more audio becomes available. By default, the streaming
334
- * endpoint returns regular updates, which means transcription results will
335
- * likely change for a period of time. You can avoid receiving these updates
336
- * by setting this flag to false.
337
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeStreamingAudio/properties/interim_results
338
- */
339
- interim_results?: boolean;
340
- /**
341
- * Indicates whether Deepgram will detect whether a speaker has finished
342
- * speaking (or paused for a significant period of time, indicating the
343
- * completion of an idea). When Deepgram detects an endpoint, it assumes
344
- * that no additional data will improve its prediction, so it immediately
345
- * finalizes the result for the processed time range and returns the
346
- * transcript with a speech_final parameter set to true.
347
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeStreamingAudio/properties/endpointing
348
- */
349
- endpointing?: boolean;
350
- /**
351
- * Length of time in milliseconds of silence that voice activation detection
352
- * (VAD) will use to detect that a speaker has finished speaking. Used when
353
- * endpointing is enabled. Defaults to 10 ms. Deepgram customers may configure
354
- * a value between 10 ms and 500 ms; on-premise customers may remove this
355
- * restriction.
356
- * @default 10
357
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeStreamingAudio/properties/vad_turnoff
358
- */
359
- vad_turnoff?: number;
360
- /**
361
- * Expected encoding of the submitted streaming audio.
362
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeStreamingAudio/properties/encoding
363
- */
364
- encoding?: string;
365
- /**
366
- * Number of independent audio channels contained in submitted streaming
367
- * audio. Only read when a value is provided for encoding.
368
- * @default 1
369
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeStreamingAudio/properties/channels
370
- */
371
- channels?: number;
372
- /**
373
- * Sample rate of submitted streaming audio. Required (and only read)
374
- * when a value is provided for encoding.
375
- * @see https://developers.deepgram.com/api-reference/speech-recognition-api#operation/transcribeStreamingAudio/properties/sample_rate
376
- */
377
- sample_rate?: number;
378
- }
379
54
  ```
380
55
 
381
- ### Transcription Response
56
+ #### Local Files
382
57
 
383
58
  ```js
384
- {
385
- "metadata": {
386
- "request_id": "string",
387
- "transaction_key": "string",
388
- "sha256": "string",
389
- "created": "string",
390
- "duration": 0,
391
- "channels": 0
392
- },
393
- "results": {
394
- "channels": [
395
- {
396
- "search": [
397
- {
398
- "query": "string",
399
- "hits": [
400
- {
401
- "confidence": 0,
402
- "start": 0,
403
- "end": 0,
404
- "snippet": "string"
405
- }
406
- ]
407
- }
408
- ],
409
- "alternatives": [
410
- {
411
- "transcript": "string",
412
- "confidence": 0,
413
- "words": [
414
- {
415
- "word": "string",
416
- "start": 0,
417
- "end": 0,
418
- "confidence": 0
419
- }
420
- ]
421
- }
422
- ]
423
- }
424
- ]
425
- }
426
- }
427
- ```
428
-
429
- ## Project Management
430
-
431
- ### List Projects
432
-
433
- Retrieve all projects
434
-
435
- ```js
436
- const projects = await deepgram.projects.list();
437
- ```
438
-
439
- #### List Projects Response
440
-
441
- ```ts
442
- {
443
- projects: [
444
- {
445
- id: string,
446
- name: string,
447
- },
448
- ],
449
- }
450
- ```
451
-
452
- ### Get a Project
453
-
454
- Retrieves all project based on the provided project id.
455
-
456
- ```js
457
- const project = await deepgram.projects.get(PROJECT_ID);
458
- ```
459
-
460
- #### Get a Project Response
461
-
462
- ```ts
463
- {
464
- id: string,
465
- name: string,
466
- }
467
- ```
468
-
469
- ## Key Management
470
-
471
- ### List Keys
472
-
473
- Retrieves all keys for a given project.
474
-
475
- ```js
476
- const response = await deepgram.keys.list(PROJECT_ID);
477
- ```
478
-
479
- #### List Keys Response
480
-
481
- ```ts
482
- {
483
- keys: [
484
- {
485
- id: string,
486
- comment: string,
487
- created: Date,
488
- scopes: Array<string>
489
- },
490
- ];
491
- }
492
- ```
493
-
494
- ### Create Key
495
-
496
- Create a new API key for a project using the `keys.create` method
497
- with a name for the key.
498
-
499
- ```js
500
- const response = await deepgram.keys.create(PROJECT_ID, COMMENT_FOR_KEY);
501
- ```
502
-
503
- #### Create Key Response
504
-
505
- ```ts
506
- {
507
- id: string,
508
- key: string,
509
- comment: string,
510
- created: Date,
511
- scopes: Array<string>
512
- }
513
- ```
514
-
515
- ### Delete key
516
-
517
- Delete an existing API key using the `keys.delete` method with the key to
518
- delete.
519
-
520
- ```js
521
- await deepgram.keys.delete(PROJECT_ID, KEY_ID);
522
- ```
523
-
524
- ## Usage
525
-
526
- ### Requests by Project
527
-
528
- Retrieves transcription requests for a project based on the provided options.
529
-
530
- ```js
531
- const response = await deepgram.usage.listRequests(PROJECT_ID, {
532
- limit: 10,
533
- // other options are available
534
- });
535
- ```
536
-
537
- #### Requests by Project Options
538
-
539
- ```js
540
- {
541
- // The time to retrieve requests made since
542
- // Example: "2020-01-01T00:00:00+00:00"
543
- start?: string,
544
- // The time to retrieve requests made until
545
- // Example: "2021-01-01T00:00:00+00:00"
546
- end?: string,
547
- // Page of requests to return
548
- // Defaults to 0
549
- page?: number,
550
- // Number of requests to return per page
551
- // Defaults to 10. Maximum of 100
552
- limit?: number,
553
- // Filter by succeeded or failed requests
554
- // By default, all requests are returned
555
- status?: 'succeeded' | 'failed'
556
- }
557
- ```
558
-
559
- #### Requests by Project Response
560
-
561
- ```ts
562
- {
563
- page: number,
564
- limit: number,
565
- requests?: [
566
- {
567
- id: string;
568
- created: string;
569
- path: string;
570
- accessor: string;
571
- response?: {
572
- details: {
573
- usd: number;
574
- duration: number;
575
- total_audio: number;
576
- channels: number;
577
- streams: number;
578
- model: string;
579
- method: string;
580
- tags: Array<string>;
581
- features: Array<string>;
582
- config: {
583
- multichannel?: boolean;
584
- interim_results?: boolean;
585
- punctuate?: boolean;
586
- ner?: boolean;
587
- utterances?: boolean;
588
- replace?: boolean;
589
- profanity_filter?: boolean;
590
- keywords?: boolean;
591
- sentiment?: boolean;
592
- diarize?: boolean;
593
- detect_language?: boolean;
594
- search?: boolean;
595
- redact?: boolean;
596
- alternatives?: boolean;
597
- numerals?: boolean;
598
- };
599
- }
600
- }, ||
601
- {
602
- message?: string;
603
- },
604
- callback?: {
605
- code: number;
606
- completed: string;
607
- },
608
- },
609
- ];
610
- }
611
- ```
612
-
613
- ### Get Specific Request
614
-
615
- Retrieves a specific transcription request for a project based on the provided
616
- `projectId` and `requestId`.
617
-
618
- ```js
619
- const response = await deepgram.usage.getRequest(PROJECT_ID, REQUEST_ID);
620
- ```
621
-
622
- #### Specific Request Response
623
-
624
- ```ts
625
- {
626
- id: string;
627
- created: string;
628
- path: string;
629
- accessor: string;
630
- response?: {
631
- details: {
632
- usd: number;
633
- duration: number;
634
- total_audio: number;
635
- channels: number;
636
- streams: number;
637
- model: string;
638
- method: string;
639
- tags: Array<string>;
640
- features: Array<string>;
641
- config: {
642
- multichannel?: boolean;
643
- interim_results?: boolean;
644
- punctuate?: boolean;
645
- ner?: boolean;
646
- utterances?: boolean;
647
- replace?: boolean;
648
- profanity_filter?: boolean;
649
- keywords?: boolean;
650
- sentiment?: boolean;
651
- diarize?: boolean;
652
- detect_language?: boolean;
653
- search?: boolean;
654
- redact?: boolean;
655
- alternatives?: boolean;
656
- numerals?: boolean;
657
- };
658
- }
659
- }, ||
660
- {
661
- message?: string;
662
- },
663
- callback?: {
664
- code: number;
665
- completed: string;
666
- }
667
- }
668
- ```
669
-
670
- ### Get Usage by Project
671
-
672
- Retrieves aggregated usage data for a project based on the provided options.
59
+ const streamSource = {
60
+ stream: fs.createReadStream("/path/to/file"),
61
+ mimetype: MIMETYPE_OF_FILE,
62
+ };
673
63
 
674
- ```js
675
- const response = await deepgram.usage.getUsage(PROJECT_ID, {
676
- start: "2020-01-01T00:00:00+00:00",
677
- // other options are available
64
+ const response = await deepgram.transcription.preRecorded(streamSource, {
65
+ punctuate: true,
678
66
  });
679
67
  ```
680
68
 
681
- #### Usage by Project Options
69
+ ### Transcribe Audio in Real-Time
682
70
 
683
71
  ```js
684
- {
685
- // The time to retrieve requests made since
686
- // Example: "2020-01-01T00:00:00+00:00"
687
- start?: string,
688
- // The time to retrieve requests made until
689
- // Example: "2021-01-01T00:00:00+00:00"
690
- end?: string,
691
- // Specific identifer for a request
692
- accessor?: string,
693
- // Array of tags used in requests
694
- tag?: Array<string>,
695
- // Filter requests by method
696
- method?: "sync" | "async" | "streaming",
697
- // Filter requests by model used
698
- model?: string,
699
- // Filter only requests using multichannel feature
700
- multichannel?: boolean,
701
- // Filter only requests using interim results feature
702
- interim_results?: boolean,
703
- // Filter only requests using the punctuation feature
704
- punctuate?: boolean,
705
- // Filter only requests using ner feature
706
- ner?: boolean,
707
- // Filter only requests using utterances feature
708
- utterances?: boolean,
709
- // Filter only requests using replace feature
710
- replace?: boolean,
711
- // Filter only requests using profanity_filter feature
712
- profanity_filter?: boolean,
713
- // Filter only requests using keywords feature
714
- keywords?: boolean,
715
- // Filter only requests using sentiment feature
716
- sentiment?: boolean,
717
- // Filter only requests using diarization feature
718
- diarize?: boolean,
719
- // Filter only requests using detect_language feature
720
- detect_language?: boolean,
721
- // Filter only requests using search feature
722
- search?: boolean,
723
- // Filter only requests using redact feature
724
- redact?: boolean,
725
- // Filter only requests using alternatives feature
726
- alternatives?: boolean,
727
- // Filter only requests using numerals feature
728
- numerals?: boolean
729
- }
730
- ```
72
+ navigator.mediaDevices.getUserMedia({ audio: true }).then((stream) => {
73
+ const mediaRecorder = new MediaRecorder(stream, {
74
+ mimeType: 'audio/webm',
75
+ });
76
+ const deepgramSocket = deepgram.transcription.live({ punctuate: true });
731
77
 
732
- #### Get Usage Response
733
-
734
- ```ts
735
- {
736
- start: string,
737
- end: string,
738
- resolution: {
739
- units: string,
740
- amount: number
741
- };
742
- results: [
743
- {
744
- start: string,
745
- end: string,
746
- hours: number,
747
- requests: number
78
+ deepgramSocket.addListener('open', () => {
79
+ mediaRecorder.addEventListener('dataavailable', async (event) => {
80
+ if (event.data.size > 0 && deepgramSocket.readyState == 1) {
81
+ deepgramSocket.send(event.data)
82
+ }
83
+ })
84
+ mediaRecorder.start(1000)
85
+ });
86
+
87
+ deepgramSocket.addListener("transcriptReceived", (transcription) => {
88
+ const transcript = received.channel.alternatives[0].transcript;
89
+ if (transcript && received.is_final) {
90
+ console.log(transcript);
748
91
  }
749
- ];
750
- }
751
- ```
752
-
753
- ### Get Fields
754
-
755
- Retrieves features used by the provided projectId based on the provided options.
756
-
757
- ```js
758
- const response = await deepgram.usage.getUsage(PROJECT_ID, {
759
- start: "2020-01-01T00:00:00+00:00",
760
- // other options are available
92
+ });
761
93
  });
762
94
  ```
763
95
 
764
- #### Get Fields Options
765
-
766
- ```js
767
- {
768
- // The time to retrieve requests made since
769
- // Example: "2020-01-01T00:00:00+00:00"
770
- start?: string,
771
- // The time to retrieve requests made until
772
- // Example: "2021-01-01T00:00:00+00:00"
773
- end?: string
774
- }
775
- ```
776
-
777
- #### Get Fields Response
778
-
779
- ```ts
780
- {
781
- tags: Array<string>,
782
- models: Array<string>,
783
- processing_methods: Array<string>,
784
- languages: Array<string>,
785
- features: Array<string>
786
- }
787
- ```
788
-
789
96
  ## Samples
790
97
 
791
98
  To run the sample code, first run the following in your terminal: