@ai-sdk/revai 0.0.0-1c33ba03-20260114162300 → 0.0.0-4115c213-20260122152721

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,11 +1,40 @@
1
1
  # @ai-sdk/revai
2
2
 
3
- ## 0.0.0-1c33ba03-20260114162300
3
+ ## 0.0.0-4115c213-20260122152721
4
4
 
5
5
  ### Patch Changes
6
6
 
7
- - Updated dependencies [261c011]
8
- - @ai-sdk/provider-utils@0.0.0-1c33ba03-20260114162300
7
+ - 4caafb2: chore: excluded tests from src folder in npm package
8
+ - Updated dependencies [4caafb2]
9
+ - @ai-sdk/provider@0.0.0-4115c213-20260122152721
10
+ - @ai-sdk/provider-utils@0.0.0-4115c213-20260122152721
11
+
12
+ ## 2.0.10
13
+
14
+ ### Patch Changes
15
+
16
+ - 2b8369d: chore: add docs to package dist
17
+
18
+ ## 2.0.9
19
+
20
+ ### Patch Changes
21
+
22
+ - 8dc54db: chore: add src folders to package bundle
23
+
24
+ ## 2.0.8
25
+
26
+ ### Patch Changes
27
+
28
+ - Updated dependencies [5c090e7]
29
+ - @ai-sdk/provider@3.0.4
30
+ - @ai-sdk/provider-utils@4.0.8
31
+
32
+ ## 2.0.7
33
+
34
+ ### Patch Changes
35
+
36
+ - Updated dependencies [46f46e4]
37
+ - @ai-sdk/provider-utils@4.0.7
9
38
 
10
39
  ## 2.0.6
11
40
 
package/dist/index.js CHANGED
@@ -455,7 +455,7 @@ var revaiTranscriptionResponseSchema = import_v42.z.object({
455
455
  });
456
456
 
457
457
  // src/version.ts
458
- var VERSION = true ? "0.0.0-1c33ba03-20260114162300" : "0.0.0-test";
458
+ var VERSION = true ? "0.0.0-4115c213-20260122152721" : "0.0.0-test";
459
459
 
460
460
  // src/revai-provider.ts
461
461
  function createRevai(options = {}) {
package/dist/index.mjs CHANGED
@@ -443,7 +443,7 @@ var revaiTranscriptionResponseSchema = z2.object({
443
443
  });
444
444
 
445
445
  // src/version.ts
446
- var VERSION = true ? "0.0.0-1c33ba03-20260114162300" : "0.0.0-test";
446
+ var VERSION = true ? "0.0.0-4115c213-20260122152721" : "0.0.0-test";
447
447
 
448
448
  // src/revai-provider.ts
449
449
  function createRevai(options = {}) {
@@ -0,0 +1,206 @@
1
+ ---
2
+ title: Rev.ai
3
+ description: Learn how to use the Rev.ai provider for the AI SDK.
4
+ ---
5
+
6
+ # Rev.ai Provider
7
+
8
+ The [Rev.ai](https://www.rev.ai/) provider contains language model support for the Rev.ai transcription API.
9
+
10
+ ## Setup
11
+
12
+ The Rev.ai provider is available in the `@ai-sdk/revai` module. You can install it with
13
+
14
+ <Tabs items={['pnpm', 'npm', 'yarn', 'bun']}>
15
+ <Tab>
16
+ <Snippet text="pnpm add @ai-sdk/revai" dark />
17
+ </Tab>
18
+ <Tab>
19
+ <Snippet text="npm install @ai-sdk/revai" dark />
20
+ </Tab>
21
+ <Tab>
22
+ <Snippet text="yarn add @ai-sdk/revai" dark />
23
+ </Tab>
24
+
25
+ <Tab>
26
+ <Snippet text="bun add @ai-sdk/revai" dark />
27
+ </Tab>
28
+ </Tabs>
29
+
30
+ ## Provider Instance
31
+
32
+ You can import the default provider instance `revai` from `@ai-sdk/revai`:
33
+
34
+ ```ts
35
+ import { revai } from '@ai-sdk/revai';
36
+ ```
37
+
38
+ If you need a customized setup, you can import `createRevai` from `@ai-sdk/revai` and create a provider instance with your settings:
39
+
40
+ ```ts
41
+ import { createRevai } from '@ai-sdk/revai';
42
+
43
+ const revai = createRevai({
44
+ // custom settings, e.g.
45
+ fetch: customFetch,
46
+ });
47
+ ```
48
+
49
+ You can use the following optional settings to customize the Rev.ai provider instance:
50
+
51
+ - **apiKey** _string_
52
+
53
+ API key that is being sent using the `Authorization` header.
54
+ It defaults to the `REVAI_API_KEY` environment variable.
55
+
56
+ - **headers** _Record&lt;string,string&gt;_
57
+
58
+ Custom headers to include in the requests.
59
+
60
+ - **fetch** _(input: RequestInfo, init?: RequestInit) => Promise&lt;Response&gt;_
61
+
62
+ Custom [fetch](https://developer.mozilla.org/en-US/docs/Web/API/fetch) implementation.
63
+ Defaults to the global `fetch` function.
64
+ You can use it as a middleware to intercept requests,
65
+ or to provide a custom fetch implementation for e.g. testing.
66
+
67
+ ## Transcription Models
68
+
69
+ You can create models that call the [Rev.ai transcription API](https://www.rev.ai/docs/api/transcription)
70
+ using the `.transcription()` factory method.
71
+
72
+ The first argument is the model id e.g. `machine`.
73
+
74
+ ```ts
75
+ const model = revai.transcription('machine');
76
+ ```
77
+
78
+ You can also pass additional provider-specific options using the `providerOptions` argument. For example, supplying the input language in ISO-639-1 (e.g. `en`) format can sometimes improve transcription performance if known beforehand.
79
+
80
+ ```ts highlight="6"
81
+ import { experimental_transcribe as transcribe } from 'ai';
82
+ import { revai } from '@ai-sdk/revai';
83
+ import { readFile } from 'fs/promises';
84
+
85
+ const result = await transcribe({
86
+ model: revai.transcription('machine'),
87
+ audio: await readFile('audio.mp3'),
88
+ providerOptions: { revai: { language: 'en' } },
89
+ });
90
+ ```
91
+
92
+ The following provider options are available:
93
+
94
+ - **metadata** _string_
95
+
96
+ Optional metadata that was provided during job submission.
97
+
98
+ - **notification_config** _object_
99
+
100
+ Optional configuration for a callback url to invoke when processing is complete.
101
+
102
+ - **url** _string_ - Callback url to invoke when processing is complete.
103
+ - **auth_headers** _object_ - Optional authorization headers, if needed to invoke the callback.
104
+ - **Authorization** _string_ - Authorization header value.
105
+
106
+ - **delete_after_seconds** _integer_
107
+
108
+ Amount of time after job completion when job is auto-deleted.
109
+
110
+ - **verbatim** _boolean_
111
+
112
+ Configures the transcriber to transcribe every syllable, including all false starts and disfluencies.
113
+
114
+ - **rush** _boolean_
115
+
116
+ [HIPAA Unsupported] Only available for human transcriber option. When set to true, your job is given higher priority.
117
+
118
+ - **skip_diarization** _boolean_
119
+
120
+ Specify if speaker diarization will be skipped by the speech engine.
121
+
122
+ - **skip_postprocessing** _boolean_
123
+
124
+ Only available for English and Spanish languages. User-supplied preference on whether to skip post-processing operations.
125
+
126
+ - **skip_punctuation** _boolean_
127
+
128
+ Specify if "punct" type elements will be skipped by the speech engine.
129
+
130
+ - **remove_disfluencies** _boolean_
131
+
132
+ When set to true, disfluencies (like 'ums' and 'uhs') will not appear in the transcript.
133
+
134
+ - **remove_atmospherics** _boolean_
135
+
136
+ When set to true, atmospherics (like `<laugh>`, `<affirmative>`) will not appear in the transcript.
137
+
138
+ - **filter_profanity** _boolean_
139
+
140
+ When enabled, profanities will be filtered by replacing characters with asterisks except for the first and last.
141
+
142
+ - **speaker_channels_count** _integer_
143
+
144
+ Only available for English, Spanish and French languages. Specify the total number of unique speaker channels in the audio.
145
+
146
+ - **speakers_count** _integer_
147
+
148
+ Only available for English, Spanish and French languages. Specify the total number of unique speakers in the audio.
149
+
150
+ - **diarization_type** _string_
151
+
152
+ Specify diarization type. Possible values: "standard" (default), "premium".
153
+
154
+ - **custom_vocabulary_id** _string_
155
+
156
+ Supply the id of a pre-completed custom vocabulary submitted through the Custom Vocabularies API.
157
+
158
+ - **custom_vocabularies** _Array_
159
+
160
+ Specify a collection of custom vocabulary to be used for this job.
161
+
162
+ - **strict_custom_vocabulary** _boolean_
163
+
164
+ If true, only exact phrases will be used as custom vocabulary.
165
+
166
+ - **summarization_config** _object_
167
+
168
+ Specify summarization options.
169
+
170
+ - **model** _string_ - Model type for summarization. Possible values: "standard" (default), "premium".
171
+ - **type** _string_ - Summarization formatting type. Possible values: "paragraph" (default), "bullets".
172
+ - **prompt** _string_ - Custom prompt for flexible summaries (mutually exclusive with type).
173
+
174
+ - **translation_config** _object_
175
+
176
+ Specify translation options.
177
+
178
+ - **target_languages** _Array_ - Array of target languages for translation.
179
+ - **model** _string_ - Model type for translation. Possible values: "standard" (default), "premium".
180
+
181
+ - **language** _string_
182
+
183
+ Language is provided as a ISO 639-1 language code. Default is "en".
184
+
185
+ - **forced_alignment** _boolean_
186
+
187
+ When enabled, provides improved accuracy for per-word timestamps for a transcript.
188
+ Default is `false`.
189
+
190
+ Currently supported languages:
191
+
192
+ - English (en, en-us, en-gb)
193
+ - French (fr)
194
+ - Italian (it)
195
+ - German (de)
196
+ - Spanish (es)
197
+
198
+ Note: This option is not available in low-cost environment.
199
+
200
+ ### Model Capabilities
201
+
202
+ | Model | Transcription | Duration | Segments | Language |
203
+ | ---------- | ------------------- | ------------------- | ------------------- | ------------------- |
204
+ | `machine` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
205
+ | `low_cost` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
206
+ | `fusion` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ai-sdk/revai",
3
- "version": "0.0.0-1c33ba03-20260114162300",
3
+ "version": "0.0.0-4115c213-20260122152721",
4
4
  "license": "Apache-2.0",
5
5
  "sideEffects": false,
6
6
  "main": "./dist/index.js",
@@ -8,9 +8,18 @@
8
8
  "types": "./dist/index.d.ts",
9
9
  "files": [
10
10
  "dist/**/*",
11
+ "docs/**/*",
12
+ "src",
13
+ "!src/**/*.test.ts",
14
+ "!src/**/*.test-d.ts",
15
+ "!src/**/__snapshots__",
16
+ "!src/**/__fixtures__",
11
17
  "CHANGELOG.md",
12
18
  "README.md"
13
19
  ],
20
+ "directories": {
21
+ "doc": "./docs"
22
+ },
14
23
  "exports": {
15
24
  "./package.json": "./package.json",
16
25
  ".": {
@@ -20,15 +29,15 @@
20
29
  }
21
30
  },
22
31
  "dependencies": {
23
- "@ai-sdk/provider": "3.0.3",
24
- "@ai-sdk/provider-utils": "0.0.0-1c33ba03-20260114162300"
32
+ "@ai-sdk/provider": "0.0.0-4115c213-20260122152721",
33
+ "@ai-sdk/provider-utils": "0.0.0-4115c213-20260122152721"
25
34
  },
26
35
  "devDependencies": {
27
36
  "@types/node": "20.17.24",
28
37
  "tsup": "^8",
29
38
  "typescript": "5.6.3",
30
39
  "zod": "3.25.76",
31
- "@ai-sdk/test-server": "1.0.1",
40
+ "@ai-sdk/test-server": "0.0.0-4115c213-20260122152721",
32
41
  "@vercel/ai-tsconfig": "0.0.0"
33
42
  },
34
43
  "peerDependencies": {
@@ -54,7 +63,7 @@
54
63
  "scripts": {
55
64
  "build": "tsup --tsconfig tsconfig.build.json",
56
65
  "build:watch": "tsup --tsconfig tsconfig.build.json --watch",
57
- "clean": "del-cli dist",
66
+ "clean": "del-cli dist docs",
58
67
  "lint": "eslint \"./**/*.ts*\"",
59
68
  "type-check": "tsc --noEmit",
60
69
  "prettier-check": "prettier --check \"./**/*.ts*\"",
package/src/index.ts ADDED
@@ -0,0 +1,3 @@
1
+ export { createRevai, revai } from './revai-provider';
2
+ export type { RevaiProvider, RevaiProviderSettings } from './revai-provider';
3
+ export { VERSION } from './version';
@@ -0,0 +1,274 @@
1
+ export type RevaiTranscriptionAPITypes = {
2
+ /**
3
+ * Optional metadata that was provided during job submission.
4
+ */
5
+ metadata?: string | null;
6
+
7
+ /**
8
+ * Optional configuration for a callback url to invoke when processing is complete,
9
+ * in addition to auth headers if they are needed to invoke the callback url.
10
+ * Cannot be set if callback_url is set. This option will not be visible in the submission response.
11
+ */
12
+ notification_config?: {
13
+ /**
14
+ * Optional callback url to invoke when processing is complete
15
+ */
16
+ url: string;
17
+ /**
18
+ * Optional authorization headers, if they are needed to invoke the callback.
19
+ * There are a few constraints: 1) the "Authorization" header is the only header that can be passed in,
20
+ * and 2) the header value must be of the form <scheme> <token>.
21
+ * For example: {"Authorization": "Bearer $BEARER_TOKEN"}
22
+ */
23
+ auth_headers?: {
24
+ /**
25
+ * Authorization header
26
+ */
27
+ Authorization: string;
28
+ } | null;
29
+ } | null;
30
+
31
+ /**
32
+ * Amount of time after job completion when job is auto-deleted. Present only when preference set in job request.
33
+ */
34
+ delete_after_seconds?: number | null;
35
+
36
+ /**
37
+ * Select which service you would like to transcribe this file with.
38
+ * - machine: the default and routes to our standard (Reverb) model.
39
+ * - low_cost: low-cost transcription which uses quantized ASR model (Reverb Turbo) with low-cost environment.
40
+ * - fusion: higher quality ASR that combines multiple models to achieve the best results. Typically has better support for rare words.
41
+ * @default "machine"
42
+ */
43
+ transcriber?: 'machine' | 'low_cost' | 'fusion' | null;
44
+
45
+ /**
46
+ * Configures the transcriber to transcribe every syllable. This will include all false starts and disfluencies in the transcript.
47
+ *
48
+ * The behavior depends on the transcriber option:
49
+ * - machine: the default is true. To turn it off false should be explicitly provided
50
+ * - human: the default is false To turn it on true should be explicitly provided
51
+ */
52
+ verbatim?: boolean;
53
+
54
+ /**
55
+ * [HIPAA Unsupported] Only available for human transcriber option
56
+ * When this field is set to true your job is given higher priority and will be worked on sooner by our human transcribers.
57
+ * @default false
58
+ */
59
+ rush?: boolean | null;
60
+
61
+ /**
62
+ * [HIPAA Unsupported] Only available for human transcriber option
63
+ * When this field is set to true the behavior will mock a normal human transcription job except no transcription will happen.
64
+ * The primary use case is to test integrations without being charged for human transcription.
65
+ * @default false
66
+ */
67
+ test_mode?: boolean | null;
68
+
69
+ /**
70
+ * [HIPAA Unsupported] Only available for human transcriber option.
71
+ * Use this option to specify which sections of the transcript need to be transcribed.
72
+ * Segments must be at least 1 minute in length and cannot overlap.
73
+ */
74
+ segments_to_transcribe?: Array<{
75
+ /**
76
+ * The timestamp of the beginning of the segment relative to the beginning of the audio in seconds (centisecond precision)
77
+ */
78
+ start: number;
79
+ /**
80
+ * The timestamp of the end of the segment relative to the beginning of the audio in seconds (centisecond precision)
81
+ */
82
+ end: number;
83
+ }> | null;
84
+
85
+ /**
86
+ * [HIPAA Unsupported] Only available for human transcriber option.
87
+ * Use this option to specify up to 100 names of speakers in the transcript.
88
+ * Names may only be up to 50 characters long.
89
+ */
90
+ speaker_names?: Array<{
91
+ /**
92
+ * The name of the speaker to be used when labeling monologues. Max of 50 characters.
93
+ */
94
+ display_name: string;
95
+ }> | null;
96
+
97
+ /**
98
+ * Specify if speaker diarization will be skipped by the speech engine
99
+ * @default false
100
+ */
101
+ skip_diarization?: boolean | null;
102
+
103
+ /**
104
+ * Only available for English and Spanish languages.
105
+ * User-supplied preference on whether to skip post-processing operations such as inverse text normalization (ITN), casing and punctuation.
106
+ * @default false
107
+ */
108
+ skip_postprocessing?: boolean | null;
109
+
110
+ /**
111
+ * Specify if "punct" type elements will be skipped by the speech engine.
112
+ * For JSON outputs, this includes removing spaces. For text outputs, words will still be delimited by a space
113
+ * @default false
114
+ */
115
+ skip_punctuation?: boolean | null;
116
+
117
+ /**
118
+ * Currently we only define disfluencies as 'ums' and 'uhs'.
119
+ * When set to true, disfluencies will not appear in the transcript.
120
+ * This option also removes atmospherics if the remove_atmospherics is not set.
121
+ * This option is not available for human transcription jobs.
122
+ * @default false
123
+ */
124
+ remove_disfluencies?: boolean | null;
125
+
126
+ /**
127
+ * We define many atmospherics such <laugh>, <affirmative> etc.
128
+ * When set to true, atmospherics will not appear in the transcript.
129
+ * This option is not available for human transcription jobs.
130
+ * @default false
131
+ */
132
+ remove_atmospherics?: boolean | null;
133
+
134
+ /**
135
+ * Enabling this option will filter for approx. 600 profanities, which cover most use cases.
136
+ * If a transcribed word matches a word on this list, then all the characters of that word will be replaced by asterisks
137
+ * except for the first and last character.
138
+ * @default false
139
+ */
140
+ filter_profanity?: boolean | null;
141
+
142
+ /**
143
+ * Only available for English, Spanish and French languages.
144
+ * Use to specify the total number of unique speaker channels in the audio.
145
+ *
146
+ * Given the number of audio channels provided, each channel will be transcribed separately and the channel id assigned to the speaker label.
147
+ * The final output will be a combination of all individual channel outputs.
148
+ * Overlapping monologues will have ordering broken by the order in which the first spoken element of each monologue occurs.
149
+ * If speaker_channels_count is greater than the actual channels in the audio, the job will fail with invalid_media.
150
+ * This option is not available for human transcription jobs.
151
+ */
152
+ speaker_channels_count?: number | null;
153
+
154
+ /**
155
+ * Only available for English, Spanish and French languages.
156
+ * Use to specify the total number of unique speakers in the audio.
157
+ *
158
+ * Given the count of speakers provided, it will be used to improve the diarization accuracy.
159
+ * This option is not available for human transcription jobs.
160
+ * @default null
161
+ */
162
+ speakers_count?: number | null;
163
+
164
+ /**
165
+ * Use to specify diarization type. This option is not available for human transcription jobs and low-cost environment.
166
+ * @default "standard"
167
+ */
168
+ diarization_type?: 'standard' | 'premium' | null;
169
+
170
+ /**
171
+ * This feature is in beta. You can supply the id of a pre-completed custom vocabulary that you submitted through the Custom Vocabularies API
172
+ * instead of uploading the list of phrases using the custom_vocabularies parameter.
173
+ * Using custom_vocabulary_id or custom_vocabularies with the same list of phrases yields the same transcription result,
174
+ * but custom_vocabulary_id enables your submission to finish processing faster by 6 seconds on average.
175
+ *
176
+ * You cannot use both custom_vocabulary_id and custom_vocabularies at the same time, and doing so will result in a 400 response.
177
+ * If the supplied id represents an incomplete, deleted, or non-existent custom vocabulary then you will receive a 404 response.
178
+ */
179
+ custom_vocabulary_id?: string | null;
180
+
181
+ /**
182
+ * Specify a collection of custom vocabulary to be used for this job.
183
+ * Custom vocabulary informs and biases the speech recognition to find those phrases (at the cost of slightly slower transcription).
184
+ */
185
+ custom_vocabularies?: Array<object>;
186
+
187
+ /**
188
+ * If true, only exact phrases will be used as custom vocabulary, i.e. phrases will not be split into individual words for processing.
189
+ * By default is enabled.
190
+ */
191
+ strict_custom_vocabulary?: boolean;
192
+
193
+ /**
194
+ * Use to specify summarization options. This option is not available for human transcription jobs.
195
+ */
196
+ summarization_config?: {
197
+ /**
198
+ * Model type for summarization.
199
+ * @default "standard"
200
+ */
201
+ model?: 'standard' | 'premium' | null;
202
+ /**
203
+ * Summarization formatting type. Use Paragraph for a text summary or Bullets for a list of topics.
204
+ * prompt and type parameters are mutuially exclusive.
205
+ * @default "paragraph"
206
+ */
207
+ type?: 'paragraph' | 'bullets' | null;
208
+ /**
209
+ * Custom prompt. Provides the most flexible way to create summaries, but may lead to unpredictable results.
210
+ * Summary is produced in Markdown format.
211
+ * prompt and type parameters are mutuially exclusive.
212
+ */
213
+ prompt?: string | null;
214
+ } | null;
215
+
216
+ /**
217
+ * Use to specify translation options. This option is not available for human transcription jobs.
218
+ */
219
+ translation_config?: {
220
+ /**
221
+ * Target languages for translation.
222
+ */
223
+ target_languages: Array<{
224
+ /**
225
+ * Target language for translation.
226
+ */
227
+ language:
228
+ | 'en'
229
+ | 'en-us'
230
+ | 'en-gb'
231
+ | 'ar'
232
+ | 'pt'
233
+ | 'pt-br'
234
+ | 'pt-pt'
235
+ | 'fr'
236
+ | 'fr-ca'
237
+ | 'es'
238
+ | 'es-es'
239
+ | 'es-la'
240
+ | 'it'
241
+ | 'ja'
242
+ | 'ko'
243
+ | 'de'
244
+ | 'ru';
245
+ }>;
246
+ /**
247
+ * Model type for translation.
248
+ * @default "standard"
249
+ */
250
+ model?: 'standard' | 'premium' | null;
251
+ } | null;
252
+
253
+ /**
254
+ * Language is provided as a ISO 639-1 language code, with exceptions.
255
+ * Only 1 language can be selected per audio, i.e. no multiple languages in one transcription job.
256
+ * @default "en"
257
+ */
258
+ language?: string | null;
259
+
260
+ /**
261
+ * Provides improved accuracy for per-word timestamps for a transcript.
262
+ *
263
+ * The following languages are currently supported:
264
+ * - English (en, en-us, en-gb)
265
+ * - French (fr)
266
+ * - Italian (it)
267
+ * - German (de)
268
+ * - Spanish (es)
269
+ *
270
+ * This option is not available in low-cost environment
271
+ * @default false
272
+ */
273
+ forced_alignment?: boolean | null;
274
+ };
@@ -0,0 +1,9 @@
1
+ import { FetchFunction } from '@ai-sdk/provider-utils';
2
+
3
+ export type RevaiConfig = {
4
+ provider: string;
5
+ url: (options: { modelId: string; path: string }) => string;
6
+ headers: () => Record<string, string | undefined>;
7
+ fetch?: FetchFunction;
8
+ generateId?: () => string;
9
+ };
@@ -0,0 +1,16 @@
1
+ import { z } from 'zod/v4';
2
+ import { createJsonErrorResponseHandler } from '@ai-sdk/provider-utils';
3
+
4
+ export const revaiErrorDataSchema = z.object({
5
+ error: z.object({
6
+ message: z.string(),
7
+ code: z.number(),
8
+ }),
9
+ });
10
+
11
+ export type RevaiErrorData = z.infer<typeof revaiErrorDataSchema>;
12
+
13
+ export const revaiFailedResponseHandler = createJsonErrorResponseHandler({
14
+ errorSchema: revaiErrorDataSchema,
15
+ errorToMessage: data => data.error.message,
16
+ });
@@ -0,0 +1,120 @@
1
+ import {
2
+ TranscriptionModelV3,
3
+ ProviderV3,
4
+ NoSuchModelError,
5
+ } from '@ai-sdk/provider';
6
+ import {
7
+ FetchFunction,
8
+ loadApiKey,
9
+ withUserAgentSuffix,
10
+ } from '@ai-sdk/provider-utils';
11
+ import { RevaiTranscriptionModel } from './revai-transcription-model';
12
+ import { RevaiTranscriptionModelId } from './revai-transcription-options';
13
+ import { VERSION } from './version';
14
+
15
+ export interface RevaiProvider extends ProviderV3 {
16
+ (
17
+ modelId: 'machine',
18
+ settings?: {},
19
+ ): {
20
+ transcription: RevaiTranscriptionModel;
21
+ };
22
+
23
+ /**
24
+ Creates a model for transcription.
25
+ */
26
+ transcription(modelId: RevaiTranscriptionModelId): TranscriptionModelV3;
27
+
28
+ /**
29
+ * @deprecated Use `embeddingModel` instead.
30
+ */
31
+ textEmbeddingModel(modelId: string): never;
32
+ }
33
+
34
+ export interface RevaiProviderSettings {
35
+ /**
36
+ API key for authenticating requests.
37
+ */
38
+ apiKey?: string;
39
+
40
+ /**
41
+ Custom headers to include in the requests.
42
+ */
43
+ headers?: Record<string, string>;
44
+
45
+ /**
46
+ Custom fetch implementation. You can use it as a middleware to intercept requests,
47
+ or to provide a custom fetch implementation for e.g. testing.
48
+ */
49
+ fetch?: FetchFunction;
50
+ }
51
+
52
+ /**
53
+ Create a Rev.ai provider instance.
54
+ */
55
+ export function createRevai(
56
+ options: RevaiProviderSettings = {},
57
+ ): RevaiProvider {
58
+ const getHeaders = () =>
59
+ withUserAgentSuffix(
60
+ {
61
+ authorization: `Bearer ${loadApiKey({
62
+ apiKey: options.apiKey,
63
+ environmentVariableName: 'REVAI_API_KEY',
64
+ description: 'Rev.ai',
65
+ })}`,
66
+ ...options.headers,
67
+ },
68
+ `ai-sdk/revai/${VERSION}`,
69
+ );
70
+
71
+ const createTranscriptionModel = (modelId: RevaiTranscriptionModelId) =>
72
+ new RevaiTranscriptionModel(modelId, {
73
+ provider: `revai.transcription`,
74
+ url: ({ path }) => `https://api.rev.ai${path}`,
75
+ headers: getHeaders,
76
+ fetch: options.fetch,
77
+ });
78
+
79
+ const provider = function (modelId: RevaiTranscriptionModelId) {
80
+ return {
81
+ transcription: createTranscriptionModel(modelId),
82
+ };
83
+ };
84
+
85
+ provider.specificationVersion = 'v3' as const;
86
+ provider.transcription = createTranscriptionModel;
87
+ provider.transcriptionModel = createTranscriptionModel;
88
+
89
+ provider.languageModel = () => {
90
+ throw new NoSuchModelError({
91
+ modelId: 'unknown',
92
+ modelType: 'languageModel',
93
+ message: 'Rev.ai does not provide language models',
94
+ });
95
+ };
96
+
97
+ provider.embeddingModel = () => {
98
+ throw new NoSuchModelError({
99
+ modelId: 'unknown',
100
+ modelType: 'embeddingModel',
101
+ message: 'Rev.ai does not provide text embedding models',
102
+ });
103
+ };
104
+ provider.textEmbeddingModel = provider.embeddingModel;
105
+
106
+ provider.imageModel = () => {
107
+ throw new NoSuchModelError({
108
+ modelId: 'unknown',
109
+ modelType: 'imageModel',
110
+ message: 'Rev.ai does not provide image models',
111
+ });
112
+ };
113
+
114
+ return provider as RevaiProvider;
115
+ }
116
+
117
+ /**
118
+ Default Rev.ai provider instance.
119
+ */
120
+ export const revai = createRevai();
@@ -0,0 +1,516 @@
1
+ import {
2
+ AISDKError,
3
+ TranscriptionModelV3,
4
+ SharedV3Warning,
5
+ } from '@ai-sdk/provider';
6
+ import {
7
+ combineHeaders,
8
+ convertBase64ToUint8Array,
9
+ createJsonResponseHandler,
10
+ mediaTypeToExtension,
11
+ delay,
12
+ getFromApi,
13
+ parseProviderOptions,
14
+ postFormDataToApi,
15
+ } from '@ai-sdk/provider-utils';
16
+ import { z } from 'zod/v4';
17
+ import { RevaiConfig } from './revai-config';
18
+ import { revaiFailedResponseHandler } from './revai-error';
19
+ import { RevaiTranscriptionModelId } from './revai-transcription-options';
20
+ import { RevaiTranscriptionAPITypes } from './revai-api-types';
21
+
22
+ // https://docs.rev.ai/api/asynchronous/reference/#operation/SubmitTranscriptionJob
23
+ const revaiProviderOptionsSchema = z.object({
24
+ /**
25
+ * Optional metadata string to associate with the transcription job.
26
+ */
27
+ metadata: z.string().nullish(),
28
+ /**
29
+ * Configuration for webhook notifications when job is complete.
30
+ */
31
+ notification_config: z
32
+ .object({
33
+ /**
34
+ * URL to send the notification to.
35
+ */
36
+ url: z.string(),
37
+ /**
38
+ * Optional authorization headers for the notification request.
39
+ */
40
+ auth_headers: z
41
+ .object({
42
+ Authorization: z.string(),
43
+ })
44
+ .nullish(),
45
+ })
46
+ .nullish(),
47
+ /**
48
+ * Number of seconds after which the job will be automatically deleted.
49
+ */
50
+ delete_after_seconds: z.number().nullish(),
51
+ /**
52
+ * Whether to include filler words and false starts in the transcription.
53
+ */
54
+ verbatim: z.boolean().optional(),
55
+ /**
56
+ * Whether to prioritize the job for faster processing.
57
+ */
58
+ rush: z.boolean().nullish().default(false),
59
+ /**
60
+ * Whether to run the job in test mode.
61
+ */
62
+ test_mode: z.boolean().nullish().default(false),
63
+ /**
64
+ * Specific segments of the audio to transcribe.
65
+ */
66
+ segments_to_transcribe: z
67
+ .array(
68
+ z.object({
69
+ /**
70
+ * Start time of the segment in seconds.
71
+ */
72
+ start: z.number(),
73
+ /**
74
+ * End time of the segment in seconds.
75
+ */
76
+ end: z.number(),
77
+ }),
78
+ )
79
+ .nullish(),
80
+ /**
81
+ * Names to assign to speakers in the transcription.
82
+ */
83
+ speaker_names: z
84
+ .array(
85
+ z.object({
86
+ /**
87
+ * Display name for the speaker.
88
+ */
89
+ display_name: z.string(),
90
+ }),
91
+ )
92
+ .nullish(),
93
+ /**
94
+ * Whether to skip speaker diarization.
95
+ */
96
+ skip_diarization: z.boolean().nullish().default(false),
97
+ /**
98
+ * Whether to skip post-processing steps.
99
+ */
100
+ skip_postprocessing: z.boolean().nullish().default(false),
101
+ /**
102
+ * Whether to skip adding punctuation to the transcription.
103
+ */
104
+ skip_punctuation: z.boolean().nullish().default(false),
105
+ /**
106
+ * Whether to remove disfluencies (um, uh, etc.) from the transcription.
107
+ */
108
+ remove_disfluencies: z.boolean().nullish().default(false),
109
+ /**
110
+ * Whether to remove atmospheric sounds from the transcription.
111
+ */
112
+ remove_atmospherics: z.boolean().nullish().default(false),
113
+ /**
114
+ * Whether to filter profanity from the transcription.
115
+ */
116
+ filter_profanity: z.boolean().nullish().default(false),
117
+ /**
118
+ * Number of speaker channels in the audio.
119
+ */
120
+ speaker_channels_count: z.number().nullish(),
121
+ /**
122
+ * Expected number of speakers in the audio.
123
+ */
124
+ speakers_count: z.number().nullish(),
125
+ /**
126
+ * Type of diarization to use.
127
+ */
128
+ diarization_type: z
129
+ .enum(['standard', 'premium'])
130
+ .nullish()
131
+ .default('standard'),
132
+ /**
133
+ * ID of a custom vocabulary to use for the transcription.
134
+ */
135
+ custom_vocabulary_id: z.string().nullish(),
136
+ /**
137
+ * Custom vocabularies to use for the transcription.
138
+ */
139
+ custom_vocabularies: z.array(z.object({})).optional(),
140
+ /**
141
+ * Whether to strictly enforce custom vocabulary.
142
+ */
143
+ strict_custom_vocabulary: z.boolean().optional(),
144
+ /**
145
+ * Configuration for generating a summary of the transcription.
146
+ */
147
+ summarization_config: z
148
+ .object({
149
+ /**
150
+ * Model to use for summarization.
151
+ */
152
+ model: z.enum(['standard', 'premium']).nullish().default('standard'),
153
+ /**
154
+ * Format of the summary.
155
+ */
156
+ type: z.enum(['paragraph', 'bullets']).nullish().default('paragraph'),
157
+ /**
158
+ * Custom prompt for the summarization.
159
+ */
160
+ prompt: z.string().nullish(),
161
+ })
162
+ .nullish(),
163
+ /**
164
+ * Configuration for translating the transcription.
165
+ */
166
+ translation_config: z
167
+ .object({
168
+ /**
169
+ * Target languages for translation.
170
+ */
171
+ target_languages: z.array(
172
+ z.object({
173
+ /**
174
+ * Language code for translation target.
175
+ */
176
+ language: z.enum([
177
+ 'en',
178
+ 'en-us',
179
+ 'en-gb',
180
+ 'ar',
181
+ 'pt',
182
+ 'pt-br',
183
+ 'pt-pt',
184
+ 'fr',
185
+ 'fr-ca',
186
+ 'es',
187
+ 'es-es',
188
+ 'es-la',
189
+ 'it',
190
+ 'ja',
191
+ 'ko',
192
+ 'de',
193
+ 'ru',
194
+ ]),
195
+ }),
196
+ ),
197
+ /**
198
+ * Model to use for translation.
199
+ */
200
+ model: z.enum(['standard', 'premium']).nullish().default('standard'),
201
+ })
202
+ .nullish(),
203
+ /**
204
+ * Language of the audio content.
205
+ */
206
+ language: z.string().nullish().default('en'),
207
+ /**
208
+ * Whether to perform forced alignment.
209
+ */
210
+ forced_alignment: z.boolean().nullish().default(false),
211
+ });
212
+
213
+ export type RevaiTranscriptionCallOptions = z.infer<
214
+ typeof revaiProviderOptionsSchema
215
+ >;
216
+
217
+ interface RevaiTranscriptionModelConfig extends RevaiConfig {
218
+ _internal?: {
219
+ currentDate?: () => Date;
220
+ };
221
+ }
222
+
223
+ export class RevaiTranscriptionModel implements TranscriptionModelV3 {
224
+ readonly specificationVersion = 'v3';
225
+
226
+ get provider(): string {
227
+ return this.config.provider;
228
+ }
229
+
230
+ constructor(
231
+ readonly modelId: RevaiTranscriptionModelId,
232
+ private readonly config: RevaiTranscriptionModelConfig,
233
+ ) {}
234
+
235
+ private async getArgs({
236
+ audio,
237
+ mediaType,
238
+ providerOptions,
239
+ }: Parameters<TranscriptionModelV3['doGenerate']>[0]) {
240
+ const warnings: SharedV3Warning[] = [];
241
+
242
+ // Parse provider options
243
+ const revaiOptions = await parseProviderOptions({
244
+ provider: 'revai',
245
+ providerOptions,
246
+ schema: revaiProviderOptionsSchema,
247
+ });
248
+
249
+ // Create form data with base fields
250
+ const formData = new FormData();
251
+ const blob =
252
+ audio instanceof Uint8Array
253
+ ? new Blob([audio])
254
+ : new Blob([convertBase64ToUint8Array(audio)]);
255
+
256
+ const fileExtension = mediaTypeToExtension(mediaType);
257
+ formData.append(
258
+ 'media',
259
+ new File([blob], 'audio', { type: mediaType }),
260
+ `audio.${fileExtension}`,
261
+ );
262
+ const transcriptionModelOptions: RevaiTranscriptionAPITypes = {
263
+ transcriber: this.modelId,
264
+ };
265
+
266
+ // Add provider-specific options
267
+ if (revaiOptions) {
268
+ const formDataConfig: RevaiTranscriptionAPITypes = {
269
+ metadata: revaiOptions.metadata ?? undefined,
270
+ notification_config: revaiOptions.notification_config ?? undefined,
271
+ delete_after_seconds: revaiOptions.delete_after_seconds ?? undefined,
272
+ verbatim: revaiOptions.verbatim ?? undefined,
273
+ rush: revaiOptions.rush ?? undefined,
274
+ test_mode: revaiOptions.test_mode ?? undefined,
275
+ segments_to_transcribe:
276
+ revaiOptions.segments_to_transcribe ?? undefined,
277
+ speaker_names: revaiOptions.speaker_names ?? undefined,
278
+ skip_diarization: revaiOptions.skip_diarization ?? undefined,
279
+ skip_postprocessing: revaiOptions.skip_postprocessing ?? undefined,
280
+ skip_punctuation: revaiOptions.skip_punctuation ?? undefined,
281
+ remove_disfluencies: revaiOptions.remove_disfluencies ?? undefined,
282
+ remove_atmospherics: revaiOptions.remove_atmospherics ?? undefined,
283
+ filter_profanity: revaiOptions.filter_profanity ?? undefined,
284
+ speaker_channels_count:
285
+ revaiOptions.speaker_channels_count ?? undefined,
286
+ speakers_count: revaiOptions.speakers_count ?? undefined,
287
+ diarization_type: revaiOptions.diarization_type ?? undefined,
288
+ custom_vocabulary_id: revaiOptions.custom_vocabulary_id ?? undefined,
289
+ custom_vocabularies: revaiOptions.custom_vocabularies ?? undefined,
290
+ strict_custom_vocabulary:
291
+ revaiOptions.strict_custom_vocabulary ?? undefined,
292
+ summarization_config: revaiOptions.summarization_config ?? undefined,
293
+ translation_config: revaiOptions.translation_config ?? undefined,
294
+ language: revaiOptions.language ?? undefined,
295
+ forced_alignment: revaiOptions.forced_alignment ?? undefined,
296
+ };
297
+
298
+ for (const key in formDataConfig) {
299
+ const value = formDataConfig[key as keyof RevaiTranscriptionAPITypes];
300
+ if (value !== undefined) {
301
+ (transcriptionModelOptions as Record<string, unknown>)[
302
+ key as keyof RevaiTranscriptionAPITypes
303
+ ] = value;
304
+ }
305
+ }
306
+ }
307
+
308
+ formData.append('config', JSON.stringify(transcriptionModelOptions));
309
+
310
+ return {
311
+ formData,
312
+ warnings,
313
+ };
314
+ }
315
+
316
+ async doGenerate(
317
+ options: Parameters<TranscriptionModelV3['doGenerate']>[0],
318
+ ): Promise<Awaited<ReturnType<TranscriptionModelV3['doGenerate']>>> {
319
+ const currentDate = this.config._internal?.currentDate?.() ?? new Date();
320
+ const { formData, warnings } = await this.getArgs(options);
321
+
322
+ const { value: submissionResponse } = await postFormDataToApi({
323
+ url: this.config.url({
324
+ path: '/speechtotext/v1/jobs',
325
+ modelId: this.modelId,
326
+ }),
327
+ headers: combineHeaders(this.config.headers(), options.headers),
328
+ formData,
329
+ failedResponseHandler: revaiFailedResponseHandler,
330
+ successfulResponseHandler: createJsonResponseHandler(
331
+ revaiTranscriptionJobResponseSchema,
332
+ ),
333
+ abortSignal: options.abortSignal,
334
+ fetch: this.config.fetch,
335
+ });
336
+
337
+ if (submissionResponse.status === 'failed') {
338
+ throw new AISDKError({
339
+ message: 'Failed to submit transcription job to Rev.ai',
340
+ name: 'TranscriptionJobSubmissionFailed',
341
+ cause: submissionResponse,
342
+ });
343
+ }
344
+
345
+ const jobId = submissionResponse.id;
346
+ const timeoutMs = 60 * 1000; // 60 seconds timeout
347
+ const startTime = Date.now();
348
+ const pollingInterval = 1000;
349
+ let jobResponse = submissionResponse;
350
+
351
+ while (jobResponse.status !== 'transcribed') {
352
+ // Check if we've exceeded the timeout
353
+ if (Date.now() - startTime > timeoutMs) {
354
+ throw new AISDKError({
355
+ message: 'Transcription job polling timed out',
356
+ name: 'TranscriptionJobPollingTimedOut',
357
+ cause: submissionResponse,
358
+ });
359
+ }
360
+
361
+ // Poll for job status
362
+ const pollingResult = await getFromApi({
363
+ url: this.config.url({
364
+ path: `/speechtotext/v1/jobs/${jobId}`,
365
+ modelId: this.modelId,
366
+ }),
367
+ headers: combineHeaders(this.config.headers(), options.headers),
368
+ failedResponseHandler: revaiFailedResponseHandler,
369
+ successfulResponseHandler: createJsonResponseHandler(
370
+ revaiTranscriptionJobResponseSchema,
371
+ ),
372
+ abortSignal: options.abortSignal,
373
+ fetch: this.config.fetch,
374
+ });
375
+
376
+ jobResponse = pollingResult.value;
377
+
378
+ if (jobResponse.status === 'failed') {
379
+ throw new AISDKError({
380
+ message: 'Transcription job failed',
381
+ name: 'TranscriptionJobFailed',
382
+ cause: jobResponse,
383
+ });
384
+ }
385
+
386
+ // Wait before polling again (only if we need to continue polling)
387
+ if (jobResponse.status !== 'transcribed') {
388
+ await delay(pollingInterval);
389
+ }
390
+ }
391
+
392
+ const {
393
+ value: transcriptionResult,
394
+ responseHeaders,
395
+ rawValue: rawResponse,
396
+ } = await getFromApi({
397
+ url: this.config.url({
398
+ path: `/speechtotext/v1/jobs/${jobId}/transcript`,
399
+ modelId: this.modelId,
400
+ }),
401
+ headers: combineHeaders(this.config.headers(), options.headers),
402
+ failedResponseHandler: revaiFailedResponseHandler,
403
+ successfulResponseHandler: createJsonResponseHandler(
404
+ revaiTranscriptionResponseSchema,
405
+ ),
406
+ abortSignal: options.abortSignal,
407
+ fetch: this.config.fetch,
408
+ });
409
+
410
+ let durationInSeconds = 0;
411
+ const segments: {
412
+ text: string;
413
+ startSecond: number;
414
+ endSecond: number;
415
+ }[] = [];
416
+
417
+ for (const monologue of transcriptionResult.monologues ?? []) {
418
+ // Process each monologue to extract segments with timing information
419
+ let currentSegmentText = '';
420
+ let segmentStartSecond = 0;
421
+ let hasStartedSegment = false;
422
+
423
+ for (const element of monologue?.elements ?? []) {
424
+ // Add the element value to the current segment text
425
+ currentSegmentText += element.value;
426
+
427
+ // For text elements, track timing information
428
+ if (element.type === 'text') {
429
+ // Update the overall duration if this is the latest timestamp
430
+ if (element.end_ts && element.end_ts > durationInSeconds) {
431
+ durationInSeconds = element.end_ts;
432
+ }
433
+
434
+ // If this is the first text element in a segment, mark the start time
435
+ if (!hasStartedSegment && typeof element.ts === 'number') {
436
+ segmentStartSecond = element.ts;
437
+ hasStartedSegment = true;
438
+ }
439
+
440
+ // If we have an end timestamp, we can complete a segment
441
+ if (typeof element.end_ts === 'number' && hasStartedSegment) {
442
+ // Only add non-empty segments
443
+ if (currentSegmentText.trim()) {
444
+ segments.push({
445
+ text: currentSegmentText.trim(),
446
+ startSecond: segmentStartSecond,
447
+ endSecond: element.end_ts,
448
+ });
449
+ }
450
+
451
+ // Reset for the next segment
452
+ currentSegmentText = '';
453
+ hasStartedSegment = false;
454
+ }
455
+ }
456
+ }
457
+
458
+ // Handle any remaining segment text that wasn't added
459
+ if (hasStartedSegment && currentSegmentText.trim()) {
460
+ const endSecond =
461
+ durationInSeconds > segmentStartSecond
462
+ ? durationInSeconds
463
+ : segmentStartSecond + 1;
464
+ segments.push({
465
+ text: currentSegmentText.trim(),
466
+ startSecond: segmentStartSecond,
467
+ endSecond: endSecond,
468
+ });
469
+ }
470
+ }
471
+
472
+ return {
473
+ text:
474
+ transcriptionResult.monologues
475
+ ?.map(monologue =>
476
+ monologue?.elements?.map(element => element.value).join(''),
477
+ )
478
+ .join(' ') ?? '',
479
+ segments,
480
+ language: submissionResponse.language ?? undefined,
481
+ durationInSeconds,
482
+ warnings,
483
+ response: {
484
+ timestamp: currentDate,
485
+ modelId: this.modelId,
486
+ headers: responseHeaders,
487
+ body: rawResponse,
488
+ },
489
+ };
490
+ }
491
+ }
492
+
493
+ const revaiTranscriptionJobResponseSchema = z.object({
494
+ id: z.string().nullish(),
495
+ status: z.string().nullish(),
496
+ language: z.string().nullish(),
497
+ });
498
+
499
+ const revaiTranscriptionResponseSchema = z.object({
500
+ monologues: z
501
+ .array(
502
+ z.object({
503
+ elements: z
504
+ .array(
505
+ z.object({
506
+ type: z.string().nullish(),
507
+ value: z.string().nullish(),
508
+ ts: z.number().nullish(),
509
+ end_ts: z.number().nullish(),
510
+ }),
511
+ )
512
+ .nullish(),
513
+ }),
514
+ )
515
+ .nullish(),
516
+ });
@@ -0,0 +1 @@
1
+ export type RevaiTranscriptionModelId = 'machine' | 'low_cost' | 'fusion';
Binary file
package/src/version.ts ADDED
@@ -0,0 +1,6 @@
1
+ // Version string of this package injected at build time.
2
+ declare const __PACKAGE_VERSION__: string | undefined;
3
+ export const VERSION: string =
4
+ typeof __PACKAGE_VERSION__ !== 'undefined'
5
+ ? __PACKAGE_VERSION__
6
+ : '0.0.0-test';