@ai-sdk/revai 0.0.0-1c33ba03-20260114162300 → 0.0.0-4115c213-20260122152721
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -3
- package/dist/index.js +1 -1
- package/dist/index.mjs +1 -1
- package/docs/160-revai.mdx +206 -0
- package/package.json +14 -5
- package/src/index.ts +3 -0
- package/src/revai-api-types.ts +274 -0
- package/src/revai-config.ts +9 -0
- package/src/revai-error.ts +16 -0
- package/src/revai-provider.ts +120 -0
- package/src/revai-transcription-model.ts +516 -0
- package/src/revai-transcription-options.ts +1 -0
- package/src/transcript-test.mp3 +0 -0
- package/src/version.ts +6 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,11 +1,40 @@
|
|
|
1
1
|
# @ai-sdk/revai
|
|
2
2
|
|
|
3
|
-
## 0.0.0-
|
|
3
|
+
## 0.0.0-4115c213-20260122152721
|
|
4
4
|
|
|
5
5
|
### Patch Changes
|
|
6
6
|
|
|
7
|
-
-
|
|
8
|
-
|
|
7
|
+
- 4caafb2: chore: excluded tests from src folder in npm package
|
|
8
|
+
- Updated dependencies [4caafb2]
|
|
9
|
+
- @ai-sdk/provider@0.0.0-4115c213-20260122152721
|
|
10
|
+
- @ai-sdk/provider-utils@0.0.0-4115c213-20260122152721
|
|
11
|
+
|
|
12
|
+
## 2.0.10
|
|
13
|
+
|
|
14
|
+
### Patch Changes
|
|
15
|
+
|
|
16
|
+
- 2b8369d: chore: add docs to package dist
|
|
17
|
+
|
|
18
|
+
## 2.0.9
|
|
19
|
+
|
|
20
|
+
### Patch Changes
|
|
21
|
+
|
|
22
|
+
- 8dc54db: chore: add src folders to package bundle
|
|
23
|
+
|
|
24
|
+
## 2.0.8
|
|
25
|
+
|
|
26
|
+
### Patch Changes
|
|
27
|
+
|
|
28
|
+
- Updated dependencies [5c090e7]
|
|
29
|
+
- @ai-sdk/provider@3.0.4
|
|
30
|
+
- @ai-sdk/provider-utils@4.0.8
|
|
31
|
+
|
|
32
|
+
## 2.0.7
|
|
33
|
+
|
|
34
|
+
### Patch Changes
|
|
35
|
+
|
|
36
|
+
- Updated dependencies [46f46e4]
|
|
37
|
+
- @ai-sdk/provider-utils@4.0.7
|
|
9
38
|
|
|
10
39
|
## 2.0.6
|
|
11
40
|
|
package/dist/index.js
CHANGED
|
@@ -455,7 +455,7 @@ var revaiTranscriptionResponseSchema = import_v42.z.object({
|
|
|
455
455
|
});
|
|
456
456
|
|
|
457
457
|
// src/version.ts
|
|
458
|
-
var VERSION = true ? "0.0.0-
|
|
458
|
+
var VERSION = true ? "0.0.0-4115c213-20260122152721" : "0.0.0-test";
|
|
459
459
|
|
|
460
460
|
// src/revai-provider.ts
|
|
461
461
|
function createRevai(options = {}) {
|
package/dist/index.mjs
CHANGED
|
@@ -443,7 +443,7 @@ var revaiTranscriptionResponseSchema = z2.object({
|
|
|
443
443
|
});
|
|
444
444
|
|
|
445
445
|
// src/version.ts
|
|
446
|
-
var VERSION = true ? "0.0.0-
|
|
446
|
+
var VERSION = true ? "0.0.0-4115c213-20260122152721" : "0.0.0-test";
|
|
447
447
|
|
|
448
448
|
// src/revai-provider.ts
|
|
449
449
|
function createRevai(options = {}) {
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: Rev.ai
|
|
3
|
+
description: Learn how to use the Rev.ai provider for the AI SDK.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Rev.ai Provider
|
|
7
|
+
|
|
8
|
+
The [Rev.ai](https://www.rev.ai/) provider contains language model support for the Rev.ai transcription API.
|
|
9
|
+
|
|
10
|
+
## Setup
|
|
11
|
+
|
|
12
|
+
The Rev.ai provider is available in the `@ai-sdk/revai` module. You can install it with
|
|
13
|
+
|
|
14
|
+
<Tabs items={['pnpm', 'npm', 'yarn', 'bun']}>
|
|
15
|
+
<Tab>
|
|
16
|
+
<Snippet text="pnpm add @ai-sdk/revai" dark />
|
|
17
|
+
</Tab>
|
|
18
|
+
<Tab>
|
|
19
|
+
<Snippet text="npm install @ai-sdk/revai" dark />
|
|
20
|
+
</Tab>
|
|
21
|
+
<Tab>
|
|
22
|
+
<Snippet text="yarn add @ai-sdk/revai" dark />
|
|
23
|
+
</Tab>
|
|
24
|
+
|
|
25
|
+
<Tab>
|
|
26
|
+
<Snippet text="bun add @ai-sdk/revai" dark />
|
|
27
|
+
</Tab>
|
|
28
|
+
</Tabs>
|
|
29
|
+
|
|
30
|
+
## Provider Instance
|
|
31
|
+
|
|
32
|
+
You can import the default provider instance `revai` from `@ai-sdk/revai`:
|
|
33
|
+
|
|
34
|
+
```ts
|
|
35
|
+
import { revai } from '@ai-sdk/revai';
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
If you need a customized setup, you can import `createRevai` from `@ai-sdk/revai` and create a provider instance with your settings:
|
|
39
|
+
|
|
40
|
+
```ts
|
|
41
|
+
import { createRevai } from '@ai-sdk/revai';
|
|
42
|
+
|
|
43
|
+
const revai = createRevai({
|
|
44
|
+
// custom settings, e.g.
|
|
45
|
+
fetch: customFetch,
|
|
46
|
+
});
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
You can use the following optional settings to customize the Rev.ai provider instance:
|
|
50
|
+
|
|
51
|
+
- **apiKey** _string_
|
|
52
|
+
|
|
53
|
+
API key that is being sent using the `Authorization` header.
|
|
54
|
+
It defaults to the `REVAI_API_KEY` environment variable.
|
|
55
|
+
|
|
56
|
+
- **headers** _Record<string,string>_
|
|
57
|
+
|
|
58
|
+
Custom headers to include in the requests.
|
|
59
|
+
|
|
60
|
+
- **fetch** _(input: RequestInfo, init?: RequestInit) => Promise<Response>_
|
|
61
|
+
|
|
62
|
+
Custom [fetch](https://developer.mozilla.org/en-US/docs/Web/API/fetch) implementation.
|
|
63
|
+
Defaults to the global `fetch` function.
|
|
64
|
+
You can use it as a middleware to intercept requests,
|
|
65
|
+
or to provide a custom fetch implementation for e.g. testing.
|
|
66
|
+
|
|
67
|
+
## Transcription Models
|
|
68
|
+
|
|
69
|
+
You can create models that call the [Rev.ai transcription API](https://www.rev.ai/docs/api/transcription)
|
|
70
|
+
using the `.transcription()` factory method.
|
|
71
|
+
|
|
72
|
+
The first argument is the model id e.g. `machine`.
|
|
73
|
+
|
|
74
|
+
```ts
|
|
75
|
+
const model = revai.transcription('machine');
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
You can also pass additional provider-specific options using the `providerOptions` argument. For example, supplying the input language in ISO-639-1 (e.g. `en`) format can sometimes improve transcription performance if known beforehand.
|
|
79
|
+
|
|
80
|
+
```ts highlight="6"
|
|
81
|
+
import { experimental_transcribe as transcribe } from 'ai';
|
|
82
|
+
import { revai } from '@ai-sdk/revai';
|
|
83
|
+
import { readFile } from 'fs/promises';
|
|
84
|
+
|
|
85
|
+
const result = await transcribe({
|
|
86
|
+
model: revai.transcription('machine'),
|
|
87
|
+
audio: await readFile('audio.mp3'),
|
|
88
|
+
providerOptions: { revai: { language: 'en' } },
|
|
89
|
+
});
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
The following provider options are available:
|
|
93
|
+
|
|
94
|
+
- **metadata** _string_
|
|
95
|
+
|
|
96
|
+
Optional metadata that was provided during job submission.
|
|
97
|
+
|
|
98
|
+
- **notification_config** _object_
|
|
99
|
+
|
|
100
|
+
Optional configuration for a callback url to invoke when processing is complete.
|
|
101
|
+
|
|
102
|
+
- **url** _string_ - Callback url to invoke when processing is complete.
|
|
103
|
+
- **auth_headers** _object_ - Optional authorization headers, if needed to invoke the callback.
|
|
104
|
+
- **Authorization** _string_ - Authorization header value.
|
|
105
|
+
|
|
106
|
+
- **delete_after_seconds** _integer_
|
|
107
|
+
|
|
108
|
+
Amount of time after job completion when job is auto-deleted.
|
|
109
|
+
|
|
110
|
+
- **verbatim** _boolean_
|
|
111
|
+
|
|
112
|
+
Configures the transcriber to transcribe every syllable, including all false starts and disfluencies.
|
|
113
|
+
|
|
114
|
+
- **rush** _boolean_
|
|
115
|
+
|
|
116
|
+
[HIPAA Unsupported] Only available for human transcriber option. When set to true, your job is given higher priority.
|
|
117
|
+
|
|
118
|
+
- **skip_diarization** _boolean_
|
|
119
|
+
|
|
120
|
+
Specify if speaker diarization will be skipped by the speech engine.
|
|
121
|
+
|
|
122
|
+
- **skip_postprocessing** _boolean_
|
|
123
|
+
|
|
124
|
+
Only available for English and Spanish languages. User-supplied preference on whether to skip post-processing operations.
|
|
125
|
+
|
|
126
|
+
- **skip_punctuation** _boolean_
|
|
127
|
+
|
|
128
|
+
Specify if "punct" type elements will be skipped by the speech engine.
|
|
129
|
+
|
|
130
|
+
- **remove_disfluencies** _boolean_
|
|
131
|
+
|
|
132
|
+
When set to true, disfluencies (like 'ums' and 'uhs') will not appear in the transcript.
|
|
133
|
+
|
|
134
|
+
- **remove_atmospherics** _boolean_
|
|
135
|
+
|
|
136
|
+
When set to true, atmospherics (like `<laugh>`, `<affirmative>`) will not appear in the transcript.
|
|
137
|
+
|
|
138
|
+
- **filter_profanity** _boolean_
|
|
139
|
+
|
|
140
|
+
When enabled, profanities will be filtered by replacing characters with asterisks except for the first and last.
|
|
141
|
+
|
|
142
|
+
- **speaker_channels_count** _integer_
|
|
143
|
+
|
|
144
|
+
Only available for English, Spanish and French languages. Specify the total number of unique speaker channels in the audio.
|
|
145
|
+
|
|
146
|
+
- **speakers_count** _integer_
|
|
147
|
+
|
|
148
|
+
Only available for English, Spanish and French languages. Specify the total number of unique speakers in the audio.
|
|
149
|
+
|
|
150
|
+
- **diarization_type** _string_
|
|
151
|
+
|
|
152
|
+
Specify diarization type. Possible values: "standard" (default), "premium".
|
|
153
|
+
|
|
154
|
+
- **custom_vocabulary_id** _string_
|
|
155
|
+
|
|
156
|
+
Supply the id of a pre-completed custom vocabulary submitted through the Custom Vocabularies API.
|
|
157
|
+
|
|
158
|
+
- **custom_vocabularies** _Array_
|
|
159
|
+
|
|
160
|
+
Specify a collection of custom vocabulary to be used for this job.
|
|
161
|
+
|
|
162
|
+
- **strict_custom_vocabulary** _boolean_
|
|
163
|
+
|
|
164
|
+
If true, only exact phrases will be used as custom vocabulary.
|
|
165
|
+
|
|
166
|
+
- **summarization_config** _object_
|
|
167
|
+
|
|
168
|
+
Specify summarization options.
|
|
169
|
+
|
|
170
|
+
- **model** _string_ - Model type for summarization. Possible values: "standard" (default), "premium".
|
|
171
|
+
- **type** _string_ - Summarization formatting type. Possible values: "paragraph" (default), "bullets".
|
|
172
|
+
- **prompt** _string_ - Custom prompt for flexible summaries (mutually exclusive with type).
|
|
173
|
+
|
|
174
|
+
- **translation_config** _object_
|
|
175
|
+
|
|
176
|
+
Specify translation options.
|
|
177
|
+
|
|
178
|
+
- **target_languages** _Array_ - Array of target languages for translation.
|
|
179
|
+
- **model** _string_ - Model type for translation. Possible values: "standard" (default), "premium".
|
|
180
|
+
|
|
181
|
+
- **language** _string_
|
|
182
|
+
|
|
183
|
+
Language is provided as a ISO 639-1 language code. Default is "en".
|
|
184
|
+
|
|
185
|
+
- **forced_alignment** _boolean_
|
|
186
|
+
|
|
187
|
+
When enabled, provides improved accuracy for per-word timestamps for a transcript.
|
|
188
|
+
Default is `false`.
|
|
189
|
+
|
|
190
|
+
Currently supported languages:
|
|
191
|
+
|
|
192
|
+
- English (en, en-us, en-gb)
|
|
193
|
+
- French (fr)
|
|
194
|
+
- Italian (it)
|
|
195
|
+
- German (de)
|
|
196
|
+
- Spanish (es)
|
|
197
|
+
|
|
198
|
+
Note: This option is not available in low-cost environment.
|
|
199
|
+
|
|
200
|
+
### Model Capabilities
|
|
201
|
+
|
|
202
|
+
| Model | Transcription | Duration | Segments | Language |
|
|
203
|
+
| ---------- | ------------------- | ------------------- | ------------------- | ------------------- |
|
|
204
|
+
| `machine` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
|
|
205
|
+
| `low_cost` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
|
|
206
|
+
| `fusion` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ai-sdk/revai",
|
|
3
|
-
"version": "0.0.0-
|
|
3
|
+
"version": "0.0.0-4115c213-20260122152721",
|
|
4
4
|
"license": "Apache-2.0",
|
|
5
5
|
"sideEffects": false,
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -8,9 +8,18 @@
|
|
|
8
8
|
"types": "./dist/index.d.ts",
|
|
9
9
|
"files": [
|
|
10
10
|
"dist/**/*",
|
|
11
|
+
"docs/**/*",
|
|
12
|
+
"src",
|
|
13
|
+
"!src/**/*.test.ts",
|
|
14
|
+
"!src/**/*.test-d.ts",
|
|
15
|
+
"!src/**/__snapshots__",
|
|
16
|
+
"!src/**/__fixtures__",
|
|
11
17
|
"CHANGELOG.md",
|
|
12
18
|
"README.md"
|
|
13
19
|
],
|
|
20
|
+
"directories": {
|
|
21
|
+
"doc": "./docs"
|
|
22
|
+
},
|
|
14
23
|
"exports": {
|
|
15
24
|
"./package.json": "./package.json",
|
|
16
25
|
".": {
|
|
@@ -20,15 +29,15 @@
|
|
|
20
29
|
}
|
|
21
30
|
},
|
|
22
31
|
"dependencies": {
|
|
23
|
-
"@ai-sdk/provider": "
|
|
24
|
-
"@ai-sdk/provider-utils": "0.0.0-
|
|
32
|
+
"@ai-sdk/provider": "0.0.0-4115c213-20260122152721",
|
|
33
|
+
"@ai-sdk/provider-utils": "0.0.0-4115c213-20260122152721"
|
|
25
34
|
},
|
|
26
35
|
"devDependencies": {
|
|
27
36
|
"@types/node": "20.17.24",
|
|
28
37
|
"tsup": "^8",
|
|
29
38
|
"typescript": "5.6.3",
|
|
30
39
|
"zod": "3.25.76",
|
|
31
|
-
"@ai-sdk/test-server": "
|
|
40
|
+
"@ai-sdk/test-server": "0.0.0-4115c213-20260122152721",
|
|
32
41
|
"@vercel/ai-tsconfig": "0.0.0"
|
|
33
42
|
},
|
|
34
43
|
"peerDependencies": {
|
|
@@ -54,7 +63,7 @@
|
|
|
54
63
|
"scripts": {
|
|
55
64
|
"build": "tsup --tsconfig tsconfig.build.json",
|
|
56
65
|
"build:watch": "tsup --tsconfig tsconfig.build.json --watch",
|
|
57
|
-
"clean": "del-cli dist",
|
|
66
|
+
"clean": "del-cli dist docs",
|
|
58
67
|
"lint": "eslint \"./**/*.ts*\"",
|
|
59
68
|
"type-check": "tsc --noEmit",
|
|
60
69
|
"prettier-check": "prettier --check \"./**/*.ts*\"",
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
export type RevaiTranscriptionAPITypes = {
|
|
2
|
+
/**
|
|
3
|
+
* Optional metadata that was provided during job submission.
|
|
4
|
+
*/
|
|
5
|
+
metadata?: string | null;
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Optional configuration for a callback url to invoke when processing is complete,
|
|
9
|
+
* in addition to auth headers if they are needed to invoke the callback url.
|
|
10
|
+
* Cannot be set if callback_url is set. This option will not be visible in the submission response.
|
|
11
|
+
*/
|
|
12
|
+
notification_config?: {
|
|
13
|
+
/**
|
|
14
|
+
* Optional callback url to invoke when processing is complete
|
|
15
|
+
*/
|
|
16
|
+
url: string;
|
|
17
|
+
/**
|
|
18
|
+
* Optional authorization headers, if they are needed to invoke the callback.
|
|
19
|
+
* There are a few constraints: 1) the "Authorization" header is the only header that can be passed in,
|
|
20
|
+
* and 2) the header value must be of the form <scheme> <token>.
|
|
21
|
+
* For example: {"Authorization": "Bearer $BEARER_TOKEN"}
|
|
22
|
+
*/
|
|
23
|
+
auth_headers?: {
|
|
24
|
+
/**
|
|
25
|
+
* Authorization header
|
|
26
|
+
*/
|
|
27
|
+
Authorization: string;
|
|
28
|
+
} | null;
|
|
29
|
+
} | null;
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Amount of time after job completion when job is auto-deleted. Present only when preference set in job request.
|
|
33
|
+
*/
|
|
34
|
+
delete_after_seconds?: number | null;
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Select which service you would like to transcribe this file with.
|
|
38
|
+
* - machine: the default and routes to our standard (Reverb) model.
|
|
39
|
+
* - low_cost: low-cost transcription which uses quantized ASR model (Reverb Turbo) with low-cost environment.
|
|
40
|
+
* - fusion: higher quality ASR that combines multiple models to achieve the best results. Typically has better support for rare words.
|
|
41
|
+
* @default "machine"
|
|
42
|
+
*/
|
|
43
|
+
transcriber?: 'machine' | 'low_cost' | 'fusion' | null;
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Configures the transcriber to transcribe every syllable. This will include all false starts and disfluencies in the transcript.
|
|
47
|
+
*
|
|
48
|
+
* The behavior depends on the transcriber option:
|
|
49
|
+
* - machine: the default is true. To turn it off false should be explicitly provided
|
|
50
|
+
* - human: the default is false To turn it on true should be explicitly provided
|
|
51
|
+
*/
|
|
52
|
+
verbatim?: boolean;
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* [HIPAA Unsupported] Only available for human transcriber option
|
|
56
|
+
* When this field is set to true your job is given higher priority and will be worked on sooner by our human transcribers.
|
|
57
|
+
* @default false
|
|
58
|
+
*/
|
|
59
|
+
rush?: boolean | null;
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* [HIPAA Unsupported] Only available for human transcriber option
|
|
63
|
+
* When this field is set to true the behavior will mock a normal human transcription job except no transcription will happen.
|
|
64
|
+
* The primary use case is to test integrations without being charged for human transcription.
|
|
65
|
+
* @default false
|
|
66
|
+
*/
|
|
67
|
+
test_mode?: boolean | null;
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* [HIPAA Unsupported] Only available for human transcriber option.
|
|
71
|
+
* Use this option to specify which sections of the transcript need to be transcribed.
|
|
72
|
+
* Segments must be at least 1 minute in length and cannot overlap.
|
|
73
|
+
*/
|
|
74
|
+
segments_to_transcribe?: Array<{
|
|
75
|
+
/**
|
|
76
|
+
* The timestamp of the beginning of the segment relative to the beginning of the audio in seconds (centisecond precision)
|
|
77
|
+
*/
|
|
78
|
+
start: number;
|
|
79
|
+
/**
|
|
80
|
+
* The timestamp of the end of the segment relative to the beginning of the audio in seconds (centisecond precision)
|
|
81
|
+
*/
|
|
82
|
+
end: number;
|
|
83
|
+
}> | null;
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* [HIPAA Unsupported] Only available for human transcriber option.
|
|
87
|
+
* Use this option to specify up to 100 names of speakers in the transcript.
|
|
88
|
+
* Names may only be up to 50 characters long.
|
|
89
|
+
*/
|
|
90
|
+
speaker_names?: Array<{
|
|
91
|
+
/**
|
|
92
|
+
* The name of the speaker to be used when labeling monologues. Max of 50 characters.
|
|
93
|
+
*/
|
|
94
|
+
display_name: string;
|
|
95
|
+
}> | null;
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Specify if speaker diarization will be skipped by the speech engine
|
|
99
|
+
* @default false
|
|
100
|
+
*/
|
|
101
|
+
skip_diarization?: boolean | null;
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Only available for English and Spanish languages.
|
|
105
|
+
* User-supplied preference on whether to skip post-processing operations such as inverse text normalization (ITN), casing and punctuation.
|
|
106
|
+
* @default false
|
|
107
|
+
*/
|
|
108
|
+
skip_postprocessing?: boolean | null;
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Specify if "punct" type elements will be skipped by the speech engine.
|
|
112
|
+
* For JSON outputs, this includes removing spaces. For text outputs, words will still be delimited by a space
|
|
113
|
+
* @default false
|
|
114
|
+
*/
|
|
115
|
+
skip_punctuation?: boolean | null;
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Currently we only define disfluencies as 'ums' and 'uhs'.
|
|
119
|
+
* When set to true, disfluencies will not appear in the transcript.
|
|
120
|
+
* This option also removes atmospherics if the remove_atmospherics is not set.
|
|
121
|
+
* This option is not available for human transcription jobs.
|
|
122
|
+
* @default false
|
|
123
|
+
*/
|
|
124
|
+
remove_disfluencies?: boolean | null;
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* We define many atmospherics such <laugh>, <affirmative> etc.
|
|
128
|
+
* When set to true, atmospherics will not appear in the transcript.
|
|
129
|
+
* This option is not available for human transcription jobs.
|
|
130
|
+
* @default false
|
|
131
|
+
*/
|
|
132
|
+
remove_atmospherics?: boolean | null;
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Enabling this option will filter for approx. 600 profanities, which cover most use cases.
|
|
136
|
+
* If a transcribed word matches a word on this list, then all the characters of that word will be replaced by asterisks
|
|
137
|
+
* except for the first and last character.
|
|
138
|
+
* @default false
|
|
139
|
+
*/
|
|
140
|
+
filter_profanity?: boolean | null;
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Only available for English, Spanish and French languages.
|
|
144
|
+
* Use to specify the total number of unique speaker channels in the audio.
|
|
145
|
+
*
|
|
146
|
+
* Given the number of audio channels provided, each channel will be transcribed separately and the channel id assigned to the speaker label.
|
|
147
|
+
* The final output will be a combination of all individual channel outputs.
|
|
148
|
+
* Overlapping monologues will have ordering broken by the order in which the first spoken element of each monologue occurs.
|
|
149
|
+
* If speaker_channels_count is greater than the actual channels in the audio, the job will fail with invalid_media.
|
|
150
|
+
* This option is not available for human transcription jobs.
|
|
151
|
+
*/
|
|
152
|
+
speaker_channels_count?: number | null;
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Only available for English, Spanish and French languages.
|
|
156
|
+
* Use to specify the total number of unique speakers in the audio.
|
|
157
|
+
*
|
|
158
|
+
* Given the count of speakers provided, it will be used to improve the diarization accuracy.
|
|
159
|
+
* This option is not available for human transcription jobs.
|
|
160
|
+
* @default null
|
|
161
|
+
*/
|
|
162
|
+
speakers_count?: number | null;
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Use to specify diarization type. This option is not available for human transcription jobs and low-cost environment.
|
|
166
|
+
* @default "standard"
|
|
167
|
+
*/
|
|
168
|
+
diarization_type?: 'standard' | 'premium' | null;
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* This feature is in beta. You can supply the id of a pre-completed custom vocabulary that you submitted through the Custom Vocabularies API
|
|
172
|
+
* instead of uploading the list of phrases using the custom_vocabularies parameter.
|
|
173
|
+
* Using custom_vocabulary_id or custom_vocabularies with the same list of phrases yields the same transcription result,
|
|
174
|
+
* but custom_vocabulary_id enables your submission to finish processing faster by 6 seconds on average.
|
|
175
|
+
*
|
|
176
|
+
* You cannot use both custom_vocabulary_id and custom_vocabularies at the same time, and doing so will result in a 400 response.
|
|
177
|
+
* If the supplied id represents an incomplete, deleted, or non-existent custom vocabulary then you will receive a 404 response.
|
|
178
|
+
*/
|
|
179
|
+
custom_vocabulary_id?: string | null;
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Specify a collection of custom vocabulary to be used for this job.
|
|
183
|
+
* Custom vocabulary informs and biases the speech recognition to find those phrases (at the cost of slightly slower transcription).
|
|
184
|
+
*/
|
|
185
|
+
custom_vocabularies?: Array<object>;
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* If true, only exact phrases will be used as custom vocabulary, i.e. phrases will not be split into individual words for processing.
|
|
189
|
+
* By default is enabled.
|
|
190
|
+
*/
|
|
191
|
+
strict_custom_vocabulary?: boolean;
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Use to specify summarization options. This option is not available for human transcription jobs.
|
|
195
|
+
*/
|
|
196
|
+
summarization_config?: {
|
|
197
|
+
/**
|
|
198
|
+
* Model type for summarization.
|
|
199
|
+
* @default "standard"
|
|
200
|
+
*/
|
|
201
|
+
model?: 'standard' | 'premium' | null;
|
|
202
|
+
/**
|
|
203
|
+
* Summarization formatting type. Use Paragraph for a text summary or Bullets for a list of topics.
|
|
204
|
+
* prompt and type parameters are mutuially exclusive.
|
|
205
|
+
* @default "paragraph"
|
|
206
|
+
*/
|
|
207
|
+
type?: 'paragraph' | 'bullets' | null;
|
|
208
|
+
/**
|
|
209
|
+
* Custom prompt. Provides the most flexible way to create summaries, but may lead to unpredictable results.
|
|
210
|
+
* Summary is produced in Markdown format.
|
|
211
|
+
* prompt and type parameters are mutuially exclusive.
|
|
212
|
+
*/
|
|
213
|
+
prompt?: string | null;
|
|
214
|
+
} | null;
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
* Use to specify translation options. This option is not available for human transcription jobs.
|
|
218
|
+
*/
|
|
219
|
+
translation_config?: {
|
|
220
|
+
/**
|
|
221
|
+
* Target languages for translation.
|
|
222
|
+
*/
|
|
223
|
+
target_languages: Array<{
|
|
224
|
+
/**
|
|
225
|
+
* Target language for translation.
|
|
226
|
+
*/
|
|
227
|
+
language:
|
|
228
|
+
| 'en'
|
|
229
|
+
| 'en-us'
|
|
230
|
+
| 'en-gb'
|
|
231
|
+
| 'ar'
|
|
232
|
+
| 'pt'
|
|
233
|
+
| 'pt-br'
|
|
234
|
+
| 'pt-pt'
|
|
235
|
+
| 'fr'
|
|
236
|
+
| 'fr-ca'
|
|
237
|
+
| 'es'
|
|
238
|
+
| 'es-es'
|
|
239
|
+
| 'es-la'
|
|
240
|
+
| 'it'
|
|
241
|
+
| 'ja'
|
|
242
|
+
| 'ko'
|
|
243
|
+
| 'de'
|
|
244
|
+
| 'ru';
|
|
245
|
+
}>;
|
|
246
|
+
/**
|
|
247
|
+
* Model type for translation.
|
|
248
|
+
* @default "standard"
|
|
249
|
+
*/
|
|
250
|
+
model?: 'standard' | 'premium' | null;
|
|
251
|
+
} | null;
|
|
252
|
+
|
|
253
|
+
/**
|
|
254
|
+
* Language is provided as a ISO 639-1 language code, with exceptions.
|
|
255
|
+
* Only 1 language can be selected per audio, i.e. no multiple languages in one transcription job.
|
|
256
|
+
* @default "en"
|
|
257
|
+
*/
|
|
258
|
+
language?: string | null;
|
|
259
|
+
|
|
260
|
+
/**
|
|
261
|
+
* Provides improved accuracy for per-word timestamps for a transcript.
|
|
262
|
+
*
|
|
263
|
+
* The following languages are currently supported:
|
|
264
|
+
* - English (en, en-us, en-gb)
|
|
265
|
+
* - French (fr)
|
|
266
|
+
* - Italian (it)
|
|
267
|
+
* - German (de)
|
|
268
|
+
* - Spanish (es)
|
|
269
|
+
*
|
|
270
|
+
* This option is not available in low-cost environment
|
|
271
|
+
* @default false
|
|
272
|
+
*/
|
|
273
|
+
forced_alignment?: boolean | null;
|
|
274
|
+
};
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { FetchFunction } from '@ai-sdk/provider-utils';
|
|
2
|
+
|
|
3
|
+
export type RevaiConfig = {
|
|
4
|
+
provider: string;
|
|
5
|
+
url: (options: { modelId: string; path: string }) => string;
|
|
6
|
+
headers: () => Record<string, string | undefined>;
|
|
7
|
+
fetch?: FetchFunction;
|
|
8
|
+
generateId?: () => string;
|
|
9
|
+
};
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { z } from 'zod/v4';
|
|
2
|
+
import { createJsonErrorResponseHandler } from '@ai-sdk/provider-utils';
|
|
3
|
+
|
|
4
|
+
export const revaiErrorDataSchema = z.object({
|
|
5
|
+
error: z.object({
|
|
6
|
+
message: z.string(),
|
|
7
|
+
code: z.number(),
|
|
8
|
+
}),
|
|
9
|
+
});
|
|
10
|
+
|
|
11
|
+
export type RevaiErrorData = z.infer<typeof revaiErrorDataSchema>;
|
|
12
|
+
|
|
13
|
+
export const revaiFailedResponseHandler = createJsonErrorResponseHandler({
|
|
14
|
+
errorSchema: revaiErrorDataSchema,
|
|
15
|
+
errorToMessage: data => data.error.message,
|
|
16
|
+
});
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import {
|
|
2
|
+
TranscriptionModelV3,
|
|
3
|
+
ProviderV3,
|
|
4
|
+
NoSuchModelError,
|
|
5
|
+
} from '@ai-sdk/provider';
|
|
6
|
+
import {
|
|
7
|
+
FetchFunction,
|
|
8
|
+
loadApiKey,
|
|
9
|
+
withUserAgentSuffix,
|
|
10
|
+
} from '@ai-sdk/provider-utils';
|
|
11
|
+
import { RevaiTranscriptionModel } from './revai-transcription-model';
|
|
12
|
+
import { RevaiTranscriptionModelId } from './revai-transcription-options';
|
|
13
|
+
import { VERSION } from './version';
|
|
14
|
+
|
|
15
|
+
export interface RevaiProvider extends ProviderV3 {
|
|
16
|
+
(
|
|
17
|
+
modelId: 'machine',
|
|
18
|
+
settings?: {},
|
|
19
|
+
): {
|
|
20
|
+
transcription: RevaiTranscriptionModel;
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
Creates a model for transcription.
|
|
25
|
+
*/
|
|
26
|
+
transcription(modelId: RevaiTranscriptionModelId): TranscriptionModelV3;
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* @deprecated Use `embeddingModel` instead.
|
|
30
|
+
*/
|
|
31
|
+
textEmbeddingModel(modelId: string): never;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export interface RevaiProviderSettings {
|
|
35
|
+
/**
|
|
36
|
+
API key for authenticating requests.
|
|
37
|
+
*/
|
|
38
|
+
apiKey?: string;
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
Custom headers to include in the requests.
|
|
42
|
+
*/
|
|
43
|
+
headers?: Record<string, string>;
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
Custom fetch implementation. You can use it as a middleware to intercept requests,
|
|
47
|
+
or to provide a custom fetch implementation for e.g. testing.
|
|
48
|
+
*/
|
|
49
|
+
fetch?: FetchFunction;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
Create a Rev.ai provider instance.
|
|
54
|
+
*/
|
|
55
|
+
export function createRevai(
|
|
56
|
+
options: RevaiProviderSettings = {},
|
|
57
|
+
): RevaiProvider {
|
|
58
|
+
const getHeaders = () =>
|
|
59
|
+
withUserAgentSuffix(
|
|
60
|
+
{
|
|
61
|
+
authorization: `Bearer ${loadApiKey({
|
|
62
|
+
apiKey: options.apiKey,
|
|
63
|
+
environmentVariableName: 'REVAI_API_KEY',
|
|
64
|
+
description: 'Rev.ai',
|
|
65
|
+
})}`,
|
|
66
|
+
...options.headers,
|
|
67
|
+
},
|
|
68
|
+
`ai-sdk/revai/${VERSION}`,
|
|
69
|
+
);
|
|
70
|
+
|
|
71
|
+
const createTranscriptionModel = (modelId: RevaiTranscriptionModelId) =>
|
|
72
|
+
new RevaiTranscriptionModel(modelId, {
|
|
73
|
+
provider: `revai.transcription`,
|
|
74
|
+
url: ({ path }) => `https://api.rev.ai${path}`,
|
|
75
|
+
headers: getHeaders,
|
|
76
|
+
fetch: options.fetch,
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
const provider = function (modelId: RevaiTranscriptionModelId) {
|
|
80
|
+
return {
|
|
81
|
+
transcription: createTranscriptionModel(modelId),
|
|
82
|
+
};
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
provider.specificationVersion = 'v3' as const;
|
|
86
|
+
provider.transcription = createTranscriptionModel;
|
|
87
|
+
provider.transcriptionModel = createTranscriptionModel;
|
|
88
|
+
|
|
89
|
+
provider.languageModel = () => {
|
|
90
|
+
throw new NoSuchModelError({
|
|
91
|
+
modelId: 'unknown',
|
|
92
|
+
modelType: 'languageModel',
|
|
93
|
+
message: 'Rev.ai does not provide language models',
|
|
94
|
+
});
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
provider.embeddingModel = () => {
|
|
98
|
+
throw new NoSuchModelError({
|
|
99
|
+
modelId: 'unknown',
|
|
100
|
+
modelType: 'embeddingModel',
|
|
101
|
+
message: 'Rev.ai does not provide text embedding models',
|
|
102
|
+
});
|
|
103
|
+
};
|
|
104
|
+
provider.textEmbeddingModel = provider.embeddingModel;
|
|
105
|
+
|
|
106
|
+
provider.imageModel = () => {
|
|
107
|
+
throw new NoSuchModelError({
|
|
108
|
+
modelId: 'unknown',
|
|
109
|
+
modelType: 'imageModel',
|
|
110
|
+
message: 'Rev.ai does not provide image models',
|
|
111
|
+
});
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
return provider as RevaiProvider;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
Default Rev.ai provider instance.
|
|
119
|
+
*/
|
|
120
|
+
export const revai = createRevai();
|
|
@@ -0,0 +1,516 @@
|
|
|
1
|
+
import {
|
|
2
|
+
AISDKError,
|
|
3
|
+
TranscriptionModelV3,
|
|
4
|
+
SharedV3Warning,
|
|
5
|
+
} from '@ai-sdk/provider';
|
|
6
|
+
import {
|
|
7
|
+
combineHeaders,
|
|
8
|
+
convertBase64ToUint8Array,
|
|
9
|
+
createJsonResponseHandler,
|
|
10
|
+
mediaTypeToExtension,
|
|
11
|
+
delay,
|
|
12
|
+
getFromApi,
|
|
13
|
+
parseProviderOptions,
|
|
14
|
+
postFormDataToApi,
|
|
15
|
+
} from '@ai-sdk/provider-utils';
|
|
16
|
+
import { z } from 'zod/v4';
|
|
17
|
+
import { RevaiConfig } from './revai-config';
|
|
18
|
+
import { revaiFailedResponseHandler } from './revai-error';
|
|
19
|
+
import { RevaiTranscriptionModelId } from './revai-transcription-options';
|
|
20
|
+
import { RevaiTranscriptionAPITypes } from './revai-api-types';
|
|
21
|
+
|
|
22
|
+
// https://docs.rev.ai/api/asynchronous/reference/#operation/SubmitTranscriptionJob
|
|
23
|
+
const revaiProviderOptionsSchema = z.object({
|
|
24
|
+
/**
|
|
25
|
+
* Optional metadata string to associate with the transcription job.
|
|
26
|
+
*/
|
|
27
|
+
metadata: z.string().nullish(),
|
|
28
|
+
/**
|
|
29
|
+
* Configuration for webhook notifications when job is complete.
|
|
30
|
+
*/
|
|
31
|
+
notification_config: z
|
|
32
|
+
.object({
|
|
33
|
+
/**
|
|
34
|
+
* URL to send the notification to.
|
|
35
|
+
*/
|
|
36
|
+
url: z.string(),
|
|
37
|
+
/**
|
|
38
|
+
* Optional authorization headers for the notification request.
|
|
39
|
+
*/
|
|
40
|
+
auth_headers: z
|
|
41
|
+
.object({
|
|
42
|
+
Authorization: z.string(),
|
|
43
|
+
})
|
|
44
|
+
.nullish(),
|
|
45
|
+
})
|
|
46
|
+
.nullish(),
|
|
47
|
+
/**
|
|
48
|
+
* Number of seconds after which the job will be automatically deleted.
|
|
49
|
+
*/
|
|
50
|
+
delete_after_seconds: z.number().nullish(),
|
|
51
|
+
/**
|
|
52
|
+
* Whether to include filler words and false starts in the transcription.
|
|
53
|
+
*/
|
|
54
|
+
verbatim: z.boolean().optional(),
|
|
55
|
+
/**
|
|
56
|
+
* Whether to prioritize the job for faster processing.
|
|
57
|
+
*/
|
|
58
|
+
rush: z.boolean().nullish().default(false),
|
|
59
|
+
/**
|
|
60
|
+
* Whether to run the job in test mode.
|
|
61
|
+
*/
|
|
62
|
+
test_mode: z.boolean().nullish().default(false),
|
|
63
|
+
/**
|
|
64
|
+
* Specific segments of the audio to transcribe.
|
|
65
|
+
*/
|
|
66
|
+
segments_to_transcribe: z
|
|
67
|
+
.array(
|
|
68
|
+
z.object({
|
|
69
|
+
/**
|
|
70
|
+
* Start time of the segment in seconds.
|
|
71
|
+
*/
|
|
72
|
+
start: z.number(),
|
|
73
|
+
/**
|
|
74
|
+
* End time of the segment in seconds.
|
|
75
|
+
*/
|
|
76
|
+
end: z.number(),
|
|
77
|
+
}),
|
|
78
|
+
)
|
|
79
|
+
.nullish(),
|
|
80
|
+
/**
|
|
81
|
+
* Names to assign to speakers in the transcription.
|
|
82
|
+
*/
|
|
83
|
+
speaker_names: z
|
|
84
|
+
.array(
|
|
85
|
+
z.object({
|
|
86
|
+
/**
|
|
87
|
+
* Display name for the speaker.
|
|
88
|
+
*/
|
|
89
|
+
display_name: z.string(),
|
|
90
|
+
}),
|
|
91
|
+
)
|
|
92
|
+
.nullish(),
|
|
93
|
+
/**
|
|
94
|
+
* Whether to skip speaker diarization.
|
|
95
|
+
*/
|
|
96
|
+
skip_diarization: z.boolean().nullish().default(false),
|
|
97
|
+
/**
|
|
98
|
+
* Whether to skip post-processing steps.
|
|
99
|
+
*/
|
|
100
|
+
skip_postprocessing: z.boolean().nullish().default(false),
|
|
101
|
+
/**
|
|
102
|
+
* Whether to skip adding punctuation to the transcription.
|
|
103
|
+
*/
|
|
104
|
+
skip_punctuation: z.boolean().nullish().default(false),
|
|
105
|
+
/**
|
|
106
|
+
* Whether to remove disfluencies (um, uh, etc.) from the transcription.
|
|
107
|
+
*/
|
|
108
|
+
remove_disfluencies: z.boolean().nullish().default(false),
|
|
109
|
+
/**
|
|
110
|
+
* Whether to remove atmospheric sounds from the transcription.
|
|
111
|
+
*/
|
|
112
|
+
remove_atmospherics: z.boolean().nullish().default(false),
|
|
113
|
+
/**
|
|
114
|
+
* Whether to filter profanity from the transcription.
|
|
115
|
+
*/
|
|
116
|
+
filter_profanity: z.boolean().nullish().default(false),
|
|
117
|
+
/**
|
|
118
|
+
* Number of speaker channels in the audio.
|
|
119
|
+
*/
|
|
120
|
+
speaker_channels_count: z.number().nullish(),
|
|
121
|
+
/**
|
|
122
|
+
* Expected number of speakers in the audio.
|
|
123
|
+
*/
|
|
124
|
+
speakers_count: z.number().nullish(),
|
|
125
|
+
/**
|
|
126
|
+
* Type of diarization to use.
|
|
127
|
+
*/
|
|
128
|
+
diarization_type: z
|
|
129
|
+
.enum(['standard', 'premium'])
|
|
130
|
+
.nullish()
|
|
131
|
+
.default('standard'),
|
|
132
|
+
/**
|
|
133
|
+
* ID of a custom vocabulary to use for the transcription.
|
|
134
|
+
*/
|
|
135
|
+
custom_vocabulary_id: z.string().nullish(),
|
|
136
|
+
/**
|
|
137
|
+
* Custom vocabularies to use for the transcription.
|
|
138
|
+
*/
|
|
139
|
+
custom_vocabularies: z.array(z.object({})).optional(),
|
|
140
|
+
/**
|
|
141
|
+
* Whether to strictly enforce custom vocabulary.
|
|
142
|
+
*/
|
|
143
|
+
strict_custom_vocabulary: z.boolean().optional(),
|
|
144
|
+
/**
|
|
145
|
+
* Configuration for generating a summary of the transcription.
|
|
146
|
+
*/
|
|
147
|
+
summarization_config: z
|
|
148
|
+
.object({
|
|
149
|
+
/**
|
|
150
|
+
* Model to use for summarization.
|
|
151
|
+
*/
|
|
152
|
+
model: z.enum(['standard', 'premium']).nullish().default('standard'),
|
|
153
|
+
/**
|
|
154
|
+
* Format of the summary.
|
|
155
|
+
*/
|
|
156
|
+
type: z.enum(['paragraph', 'bullets']).nullish().default('paragraph'),
|
|
157
|
+
/**
|
|
158
|
+
* Custom prompt for the summarization.
|
|
159
|
+
*/
|
|
160
|
+
prompt: z.string().nullish(),
|
|
161
|
+
})
|
|
162
|
+
.nullish(),
|
|
163
|
+
/**
|
|
164
|
+
* Configuration for translating the transcription.
|
|
165
|
+
*/
|
|
166
|
+
translation_config: z
|
|
167
|
+
.object({
|
|
168
|
+
/**
|
|
169
|
+
* Target languages for translation.
|
|
170
|
+
*/
|
|
171
|
+
target_languages: z.array(
|
|
172
|
+
z.object({
|
|
173
|
+
/**
|
|
174
|
+
* Language code for translation target.
|
|
175
|
+
*/
|
|
176
|
+
language: z.enum([
|
|
177
|
+
'en',
|
|
178
|
+
'en-us',
|
|
179
|
+
'en-gb',
|
|
180
|
+
'ar',
|
|
181
|
+
'pt',
|
|
182
|
+
'pt-br',
|
|
183
|
+
'pt-pt',
|
|
184
|
+
'fr',
|
|
185
|
+
'fr-ca',
|
|
186
|
+
'es',
|
|
187
|
+
'es-es',
|
|
188
|
+
'es-la',
|
|
189
|
+
'it',
|
|
190
|
+
'ja',
|
|
191
|
+
'ko',
|
|
192
|
+
'de',
|
|
193
|
+
'ru',
|
|
194
|
+
]),
|
|
195
|
+
}),
|
|
196
|
+
),
|
|
197
|
+
/**
|
|
198
|
+
* Model to use for translation.
|
|
199
|
+
*/
|
|
200
|
+
model: z.enum(['standard', 'premium']).nullish().default('standard'),
|
|
201
|
+
})
|
|
202
|
+
.nullish(),
|
|
203
|
+
/**
|
|
204
|
+
* Language of the audio content.
|
|
205
|
+
*/
|
|
206
|
+
language: z.string().nullish().default('en'),
|
|
207
|
+
/**
|
|
208
|
+
* Whether to perform forced alignment.
|
|
209
|
+
*/
|
|
210
|
+
forced_alignment: z.boolean().nullish().default(false),
|
|
211
|
+
});
|
|
212
|
+
|
|
213
|
+
export type RevaiTranscriptionCallOptions = z.infer<
|
|
214
|
+
typeof revaiProviderOptionsSchema
|
|
215
|
+
>;
|
|
216
|
+
|
|
217
|
+
interface RevaiTranscriptionModelConfig extends RevaiConfig {
|
|
218
|
+
_internal?: {
|
|
219
|
+
currentDate?: () => Date;
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
export class RevaiTranscriptionModel implements TranscriptionModelV3 {
|
|
224
|
+
readonly specificationVersion = 'v3';
|
|
225
|
+
|
|
226
|
+
get provider(): string {
|
|
227
|
+
return this.config.provider;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
constructor(
|
|
231
|
+
readonly modelId: RevaiTranscriptionModelId,
|
|
232
|
+
private readonly config: RevaiTranscriptionModelConfig,
|
|
233
|
+
) {}
|
|
234
|
+
|
|
235
|
+
private async getArgs({
|
|
236
|
+
audio,
|
|
237
|
+
mediaType,
|
|
238
|
+
providerOptions,
|
|
239
|
+
}: Parameters<TranscriptionModelV3['doGenerate']>[0]) {
|
|
240
|
+
const warnings: SharedV3Warning[] = [];
|
|
241
|
+
|
|
242
|
+
// Parse provider options
|
|
243
|
+
const revaiOptions = await parseProviderOptions({
|
|
244
|
+
provider: 'revai',
|
|
245
|
+
providerOptions,
|
|
246
|
+
schema: revaiProviderOptionsSchema,
|
|
247
|
+
});
|
|
248
|
+
|
|
249
|
+
// Create form data with base fields
|
|
250
|
+
const formData = new FormData();
|
|
251
|
+
const blob =
|
|
252
|
+
audio instanceof Uint8Array
|
|
253
|
+
? new Blob([audio])
|
|
254
|
+
: new Blob([convertBase64ToUint8Array(audio)]);
|
|
255
|
+
|
|
256
|
+
const fileExtension = mediaTypeToExtension(mediaType);
|
|
257
|
+
formData.append(
|
|
258
|
+
'media',
|
|
259
|
+
new File([blob], 'audio', { type: mediaType }),
|
|
260
|
+
`audio.${fileExtension}`,
|
|
261
|
+
);
|
|
262
|
+
const transcriptionModelOptions: RevaiTranscriptionAPITypes = {
|
|
263
|
+
transcriber: this.modelId,
|
|
264
|
+
};
|
|
265
|
+
|
|
266
|
+
// Add provider-specific options
|
|
267
|
+
if (revaiOptions) {
|
|
268
|
+
const formDataConfig: RevaiTranscriptionAPITypes = {
|
|
269
|
+
metadata: revaiOptions.metadata ?? undefined,
|
|
270
|
+
notification_config: revaiOptions.notification_config ?? undefined,
|
|
271
|
+
delete_after_seconds: revaiOptions.delete_after_seconds ?? undefined,
|
|
272
|
+
verbatim: revaiOptions.verbatim ?? undefined,
|
|
273
|
+
rush: revaiOptions.rush ?? undefined,
|
|
274
|
+
test_mode: revaiOptions.test_mode ?? undefined,
|
|
275
|
+
segments_to_transcribe:
|
|
276
|
+
revaiOptions.segments_to_transcribe ?? undefined,
|
|
277
|
+
speaker_names: revaiOptions.speaker_names ?? undefined,
|
|
278
|
+
skip_diarization: revaiOptions.skip_diarization ?? undefined,
|
|
279
|
+
skip_postprocessing: revaiOptions.skip_postprocessing ?? undefined,
|
|
280
|
+
skip_punctuation: revaiOptions.skip_punctuation ?? undefined,
|
|
281
|
+
remove_disfluencies: revaiOptions.remove_disfluencies ?? undefined,
|
|
282
|
+
remove_atmospherics: revaiOptions.remove_atmospherics ?? undefined,
|
|
283
|
+
filter_profanity: revaiOptions.filter_profanity ?? undefined,
|
|
284
|
+
speaker_channels_count:
|
|
285
|
+
revaiOptions.speaker_channels_count ?? undefined,
|
|
286
|
+
speakers_count: revaiOptions.speakers_count ?? undefined,
|
|
287
|
+
diarization_type: revaiOptions.diarization_type ?? undefined,
|
|
288
|
+
custom_vocabulary_id: revaiOptions.custom_vocabulary_id ?? undefined,
|
|
289
|
+
custom_vocabularies: revaiOptions.custom_vocabularies ?? undefined,
|
|
290
|
+
strict_custom_vocabulary:
|
|
291
|
+
revaiOptions.strict_custom_vocabulary ?? undefined,
|
|
292
|
+
summarization_config: revaiOptions.summarization_config ?? undefined,
|
|
293
|
+
translation_config: revaiOptions.translation_config ?? undefined,
|
|
294
|
+
language: revaiOptions.language ?? undefined,
|
|
295
|
+
forced_alignment: revaiOptions.forced_alignment ?? undefined,
|
|
296
|
+
};
|
|
297
|
+
|
|
298
|
+
for (const key in formDataConfig) {
|
|
299
|
+
const value = formDataConfig[key as keyof RevaiTranscriptionAPITypes];
|
|
300
|
+
if (value !== undefined) {
|
|
301
|
+
(transcriptionModelOptions as Record<string, unknown>)[
|
|
302
|
+
key as keyof RevaiTranscriptionAPITypes
|
|
303
|
+
] = value;
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
formData.append('config', JSON.stringify(transcriptionModelOptions));
|
|
309
|
+
|
|
310
|
+
return {
|
|
311
|
+
formData,
|
|
312
|
+
warnings,
|
|
313
|
+
};
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
async doGenerate(
|
|
317
|
+
options: Parameters<TranscriptionModelV3['doGenerate']>[0],
|
|
318
|
+
): Promise<Awaited<ReturnType<TranscriptionModelV3['doGenerate']>>> {
|
|
319
|
+
const currentDate = this.config._internal?.currentDate?.() ?? new Date();
|
|
320
|
+
const { formData, warnings } = await this.getArgs(options);
|
|
321
|
+
|
|
322
|
+
const { value: submissionResponse } = await postFormDataToApi({
|
|
323
|
+
url: this.config.url({
|
|
324
|
+
path: '/speechtotext/v1/jobs',
|
|
325
|
+
modelId: this.modelId,
|
|
326
|
+
}),
|
|
327
|
+
headers: combineHeaders(this.config.headers(), options.headers),
|
|
328
|
+
formData,
|
|
329
|
+
failedResponseHandler: revaiFailedResponseHandler,
|
|
330
|
+
successfulResponseHandler: createJsonResponseHandler(
|
|
331
|
+
revaiTranscriptionJobResponseSchema,
|
|
332
|
+
),
|
|
333
|
+
abortSignal: options.abortSignal,
|
|
334
|
+
fetch: this.config.fetch,
|
|
335
|
+
});
|
|
336
|
+
|
|
337
|
+
if (submissionResponse.status === 'failed') {
|
|
338
|
+
throw new AISDKError({
|
|
339
|
+
message: 'Failed to submit transcription job to Rev.ai',
|
|
340
|
+
name: 'TranscriptionJobSubmissionFailed',
|
|
341
|
+
cause: submissionResponse,
|
|
342
|
+
});
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
const jobId = submissionResponse.id;
|
|
346
|
+
const timeoutMs = 60 * 1000; // 60 seconds timeout
|
|
347
|
+
const startTime = Date.now();
|
|
348
|
+
const pollingInterval = 1000;
|
|
349
|
+
let jobResponse = submissionResponse;
|
|
350
|
+
|
|
351
|
+
while (jobResponse.status !== 'transcribed') {
|
|
352
|
+
// Check if we've exceeded the timeout
|
|
353
|
+
if (Date.now() - startTime > timeoutMs) {
|
|
354
|
+
throw new AISDKError({
|
|
355
|
+
message: 'Transcription job polling timed out',
|
|
356
|
+
name: 'TranscriptionJobPollingTimedOut',
|
|
357
|
+
cause: submissionResponse,
|
|
358
|
+
});
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
// Poll for job status
|
|
362
|
+
const pollingResult = await getFromApi({
|
|
363
|
+
url: this.config.url({
|
|
364
|
+
path: `/speechtotext/v1/jobs/${jobId}`,
|
|
365
|
+
modelId: this.modelId,
|
|
366
|
+
}),
|
|
367
|
+
headers: combineHeaders(this.config.headers(), options.headers),
|
|
368
|
+
failedResponseHandler: revaiFailedResponseHandler,
|
|
369
|
+
successfulResponseHandler: createJsonResponseHandler(
|
|
370
|
+
revaiTranscriptionJobResponseSchema,
|
|
371
|
+
),
|
|
372
|
+
abortSignal: options.abortSignal,
|
|
373
|
+
fetch: this.config.fetch,
|
|
374
|
+
});
|
|
375
|
+
|
|
376
|
+
jobResponse = pollingResult.value;
|
|
377
|
+
|
|
378
|
+
if (jobResponse.status === 'failed') {
|
|
379
|
+
throw new AISDKError({
|
|
380
|
+
message: 'Transcription job failed',
|
|
381
|
+
name: 'TranscriptionJobFailed',
|
|
382
|
+
cause: jobResponse,
|
|
383
|
+
});
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
// Wait before polling again (only if we need to continue polling)
|
|
387
|
+
if (jobResponse.status !== 'transcribed') {
|
|
388
|
+
await delay(pollingInterval);
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
const {
|
|
393
|
+
value: transcriptionResult,
|
|
394
|
+
responseHeaders,
|
|
395
|
+
rawValue: rawResponse,
|
|
396
|
+
} = await getFromApi({
|
|
397
|
+
url: this.config.url({
|
|
398
|
+
path: `/speechtotext/v1/jobs/${jobId}/transcript`,
|
|
399
|
+
modelId: this.modelId,
|
|
400
|
+
}),
|
|
401
|
+
headers: combineHeaders(this.config.headers(), options.headers),
|
|
402
|
+
failedResponseHandler: revaiFailedResponseHandler,
|
|
403
|
+
successfulResponseHandler: createJsonResponseHandler(
|
|
404
|
+
revaiTranscriptionResponseSchema,
|
|
405
|
+
),
|
|
406
|
+
abortSignal: options.abortSignal,
|
|
407
|
+
fetch: this.config.fetch,
|
|
408
|
+
});
|
|
409
|
+
|
|
410
|
+
let durationInSeconds = 0;
|
|
411
|
+
const segments: {
|
|
412
|
+
text: string;
|
|
413
|
+
startSecond: number;
|
|
414
|
+
endSecond: number;
|
|
415
|
+
}[] = [];
|
|
416
|
+
|
|
417
|
+
for (const monologue of transcriptionResult.monologues ?? []) {
|
|
418
|
+
// Process each monologue to extract segments with timing information
|
|
419
|
+
let currentSegmentText = '';
|
|
420
|
+
let segmentStartSecond = 0;
|
|
421
|
+
let hasStartedSegment = false;
|
|
422
|
+
|
|
423
|
+
for (const element of monologue?.elements ?? []) {
|
|
424
|
+
// Add the element value to the current segment text
|
|
425
|
+
currentSegmentText += element.value;
|
|
426
|
+
|
|
427
|
+
// For text elements, track timing information
|
|
428
|
+
if (element.type === 'text') {
|
|
429
|
+
// Update the overall duration if this is the latest timestamp
|
|
430
|
+
if (element.end_ts && element.end_ts > durationInSeconds) {
|
|
431
|
+
durationInSeconds = element.end_ts;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
// If this is the first text element in a segment, mark the start time
|
|
435
|
+
if (!hasStartedSegment && typeof element.ts === 'number') {
|
|
436
|
+
segmentStartSecond = element.ts;
|
|
437
|
+
hasStartedSegment = true;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
// If we have an end timestamp, we can complete a segment
|
|
441
|
+
if (typeof element.end_ts === 'number' && hasStartedSegment) {
|
|
442
|
+
// Only add non-empty segments
|
|
443
|
+
if (currentSegmentText.trim()) {
|
|
444
|
+
segments.push({
|
|
445
|
+
text: currentSegmentText.trim(),
|
|
446
|
+
startSecond: segmentStartSecond,
|
|
447
|
+
endSecond: element.end_ts,
|
|
448
|
+
});
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
// Reset for the next segment
|
|
452
|
+
currentSegmentText = '';
|
|
453
|
+
hasStartedSegment = false;
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
// Handle any remaining segment text that wasn't added
|
|
459
|
+
if (hasStartedSegment && currentSegmentText.trim()) {
|
|
460
|
+
const endSecond =
|
|
461
|
+
durationInSeconds > segmentStartSecond
|
|
462
|
+
? durationInSeconds
|
|
463
|
+
: segmentStartSecond + 1;
|
|
464
|
+
segments.push({
|
|
465
|
+
text: currentSegmentText.trim(),
|
|
466
|
+
startSecond: segmentStartSecond,
|
|
467
|
+
endSecond: endSecond,
|
|
468
|
+
});
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
return {
|
|
473
|
+
text:
|
|
474
|
+
transcriptionResult.monologues
|
|
475
|
+
?.map(monologue =>
|
|
476
|
+
monologue?.elements?.map(element => element.value).join(''),
|
|
477
|
+
)
|
|
478
|
+
.join(' ') ?? '',
|
|
479
|
+
segments,
|
|
480
|
+
language: submissionResponse.language ?? undefined,
|
|
481
|
+
durationInSeconds,
|
|
482
|
+
warnings,
|
|
483
|
+
response: {
|
|
484
|
+
timestamp: currentDate,
|
|
485
|
+
modelId: this.modelId,
|
|
486
|
+
headers: responseHeaders,
|
|
487
|
+
body: rawResponse,
|
|
488
|
+
},
|
|
489
|
+
};
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
const revaiTranscriptionJobResponseSchema = z.object({
|
|
494
|
+
id: z.string().nullish(),
|
|
495
|
+
status: z.string().nullish(),
|
|
496
|
+
language: z.string().nullish(),
|
|
497
|
+
});
|
|
498
|
+
|
|
499
|
+
const revaiTranscriptionResponseSchema = z.object({
|
|
500
|
+
monologues: z
|
|
501
|
+
.array(
|
|
502
|
+
z.object({
|
|
503
|
+
elements: z
|
|
504
|
+
.array(
|
|
505
|
+
z.object({
|
|
506
|
+
type: z.string().nullish(),
|
|
507
|
+
value: z.string().nullish(),
|
|
508
|
+
ts: z.number().nullish(),
|
|
509
|
+
end_ts: z.number().nullish(),
|
|
510
|
+
}),
|
|
511
|
+
)
|
|
512
|
+
.nullish(),
|
|
513
|
+
}),
|
|
514
|
+
)
|
|
515
|
+
.nullish(),
|
|
516
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export type RevaiTranscriptionModelId = 'machine' | 'low_cost' | 'fusion';
|
|
Binary file
|