@ai-sdk/revai 2.0.7 → 2.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # @ai-sdk/revai
2
2
 
3
+ ## 2.0.9
4
+
5
+ ### Patch Changes
6
+
7
+ - 8dc54db: chore: add src folders to package bundle
8
+
9
+ ## 2.0.8
10
+
11
+ ### Patch Changes
12
+
13
+ - Updated dependencies [5c090e7]
14
+ - @ai-sdk/provider@3.0.4
15
+ - @ai-sdk/provider-utils@4.0.8
16
+
3
17
  ## 2.0.7
4
18
 
5
19
  ### Patch Changes
package/dist/index.js CHANGED
@@ -455,7 +455,7 @@ var revaiTranscriptionResponseSchema = import_v42.z.object({
455
455
  });
456
456
 
457
457
  // src/version.ts
458
- var VERSION = true ? "2.0.7" : "0.0.0-test";
458
+ var VERSION = true ? "2.0.9" : "0.0.0-test";
459
459
 
460
460
  // src/revai-provider.ts
461
461
  function createRevai(options = {}) {
package/dist/index.mjs CHANGED
@@ -443,7 +443,7 @@ var revaiTranscriptionResponseSchema = z2.object({
443
443
  });
444
444
 
445
445
  // src/version.ts
446
- var VERSION = true ? "2.0.7" : "0.0.0-test";
446
+ var VERSION = true ? "2.0.9" : "0.0.0-test";
447
447
 
448
448
  // src/revai-provider.ts
449
449
  function createRevai(options = {}) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ai-sdk/revai",
3
- "version": "2.0.7",
3
+ "version": "2.0.9",
4
4
  "license": "Apache-2.0",
5
5
  "sideEffects": false,
6
6
  "main": "./dist/index.js",
@@ -8,6 +8,7 @@
8
8
  "types": "./dist/index.d.ts",
9
9
  "files": [
10
10
  "dist/**/*",
11
+ "src",
11
12
  "CHANGELOG.md",
12
13
  "README.md"
13
14
  ],
@@ -20,15 +21,15 @@
20
21
  }
21
22
  },
22
23
  "dependencies": {
23
- "@ai-sdk/provider": "3.0.3",
24
- "@ai-sdk/provider-utils": "4.0.7"
24
+ "@ai-sdk/provider": "3.0.4",
25
+ "@ai-sdk/provider-utils": "4.0.8"
25
26
  },
26
27
  "devDependencies": {
27
28
  "@types/node": "20.17.24",
28
29
  "tsup": "^8",
29
30
  "typescript": "5.6.3",
30
31
  "zod": "3.25.76",
31
- "@ai-sdk/test-server": "1.0.1",
32
+ "@ai-sdk/test-server": "1.0.2",
32
33
  "@vercel/ai-tsconfig": "0.0.0"
33
34
  },
34
35
  "peerDependencies": {
package/src/index.ts ADDED
@@ -0,0 +1,3 @@
1
+ export { createRevai, revai } from './revai-provider';
2
+ export type { RevaiProvider, RevaiProviderSettings } from './revai-provider';
3
+ export { VERSION } from './version';
@@ -0,0 +1,274 @@
1
+ export type RevaiTranscriptionAPITypes = {
2
+ /**
3
+ * Optional metadata that was provided during job submission.
4
+ */
5
+ metadata?: string | null;
6
+
7
+ /**
8
+ * Optional configuration for a callback url to invoke when processing is complete,
9
+ * in addition to auth headers if they are needed to invoke the callback url.
10
+ * Cannot be set if callback_url is set. This option will not be visible in the submission response.
11
+ */
12
+ notification_config?: {
13
+ /**
14
+ * Optional callback url to invoke when processing is complete
15
+ */
16
+ url: string;
17
+ /**
18
+ * Optional authorization headers, if they are needed to invoke the callback.
19
+ * There are a few constraints: 1) the "Authorization" header is the only header that can be passed in,
20
+ * and 2) the header value must be of the form <scheme> <token>.
21
+ * For example: {"Authorization": "Bearer $BEARER_TOKEN"}
22
+ */
23
+ auth_headers?: {
24
+ /**
25
+ * Authorization header
26
+ */
27
+ Authorization: string;
28
+ } | null;
29
+ } | null;
30
+
31
+ /**
32
+ * Amount of time after job completion when job is auto-deleted. Present only when preference set in job request.
33
+ */
34
+ delete_after_seconds?: number | null;
35
+
36
+ /**
37
+ * Select which service you would like to transcribe this file with.
38
+ * - machine: the default and routes to our standard (Reverb) model.
39
+ * - low_cost: low-cost transcription which uses quantized ASR model (Reverb Turbo) with low-cost environment.
40
+ * - fusion: higher quality ASR that combines multiple models to achieve the best results. Typically has better support for rare words.
41
+ * @default "machine"
42
+ */
43
+ transcriber?: 'machine' | 'low_cost' | 'fusion' | null;
44
+
45
+ /**
46
+ * Configures the transcriber to transcribe every syllable. This will include all false starts and disfluencies in the transcript.
47
+ *
48
+ * The behavior depends on the transcriber option:
49
+ * - machine: the default is true. To turn it off false should be explicitly provided
50
+ * - human: the default is false To turn it on true should be explicitly provided
51
+ */
52
+ verbatim?: boolean;
53
+
54
+ /**
55
+ * [HIPAA Unsupported] Only available for human transcriber option
56
+ * When this field is set to true your job is given higher priority and will be worked on sooner by our human transcribers.
57
+ * @default false
58
+ */
59
+ rush?: boolean | null;
60
+
61
+ /**
62
+ * [HIPAA Unsupported] Only available for human transcriber option
63
+ * When this field is set to true the behavior will mock a normal human transcription job except no transcription will happen.
64
+ * The primary use case is to test integrations without being charged for human transcription.
65
+ * @default false
66
+ */
67
+ test_mode?: boolean | null;
68
+
69
+ /**
70
+ * [HIPAA Unsupported] Only available for human transcriber option.
71
+ * Use this option to specify which sections of the transcript need to be transcribed.
72
+ * Segments must be at least 1 minute in length and cannot overlap.
73
+ */
74
+ segments_to_transcribe?: Array<{
75
+ /**
76
+ * The timestamp of the beginning of the segment relative to the beginning of the audio in seconds (centisecond precision)
77
+ */
78
+ start: number;
79
+ /**
80
+ * The timestamp of the end of the segment relative to the beginning of the audio in seconds (centisecond precision)
81
+ */
82
+ end: number;
83
+ }> | null;
84
+
85
+ /**
86
+ * [HIPAA Unsupported] Only available for human transcriber option.
87
+ * Use this option to specify up to 100 names of speakers in the transcript.
88
+ * Names may only be up to 50 characters long.
89
+ */
90
+ speaker_names?: Array<{
91
+ /**
92
+ * The name of the speaker to be used when labeling monologues. Max of 50 characters.
93
+ */
94
+ display_name: string;
95
+ }> | null;
96
+
97
+ /**
98
+ * Specify if speaker diarization will be skipped by the speech engine
99
+ * @default false
100
+ */
101
+ skip_diarization?: boolean | null;
102
+
103
+ /**
104
+ * Only available for English and Spanish languages.
105
+ * User-supplied preference on whether to skip post-processing operations such as inverse text normalization (ITN), casing and punctuation.
106
+ * @default false
107
+ */
108
+ skip_postprocessing?: boolean | null;
109
+
110
+ /**
111
+ * Specify if "punct" type elements will be skipped by the speech engine.
112
+ * For JSON outputs, this includes removing spaces. For text outputs, words will still be delimited by a space
113
+ * @default false
114
+ */
115
+ skip_punctuation?: boolean | null;
116
+
117
+ /**
118
+ * Currently we only define disfluencies as 'ums' and 'uhs'.
119
+ * When set to true, disfluencies will not appear in the transcript.
120
+ * This option also removes atmospherics if the remove_atmospherics is not set.
121
+ * This option is not available for human transcription jobs.
122
+ * @default false
123
+ */
124
+ remove_disfluencies?: boolean | null;
125
+
126
+ /**
127
+ * We define many atmospherics such <laugh>, <affirmative> etc.
128
+ * When set to true, atmospherics will not appear in the transcript.
129
+ * This option is not available for human transcription jobs.
130
+ * @default false
131
+ */
132
+ remove_atmospherics?: boolean | null;
133
+
134
+ /**
135
+ * Enabling this option will filter for approx. 600 profanities, which cover most use cases.
136
+ * If a transcribed word matches a word on this list, then all the characters of that word will be replaced by asterisks
137
+ * except for the first and last character.
138
+ * @default false
139
+ */
140
+ filter_profanity?: boolean | null;
141
+
142
+ /**
143
+ * Only available for English, Spanish and French languages.
144
+ * Use to specify the total number of unique speaker channels in the audio.
145
+ *
146
+ * Given the number of audio channels provided, each channel will be transcribed separately and the channel id assigned to the speaker label.
147
+ * The final output will be a combination of all individual channel outputs.
148
+ * Overlapping monologues will have ordering broken by the order in which the first spoken element of each monologue occurs.
149
+ * If speaker_channels_count is greater than the actual channels in the audio, the job will fail with invalid_media.
150
+ * This option is not available for human transcription jobs.
151
+ */
152
+ speaker_channels_count?: number | null;
153
+
154
+ /**
155
+ * Only available for English, Spanish and French languages.
156
+ * Use to specify the total number of unique speakers in the audio.
157
+ *
158
+ * Given the count of speakers provided, it will be used to improve the diarization accuracy.
159
+ * This option is not available for human transcription jobs.
160
+ * @default null
161
+ */
162
+ speakers_count?: number | null;
163
+
164
+ /**
165
+ * Use to specify diarization type. This option is not available for human transcription jobs and low-cost environment.
166
+ * @default "standard"
167
+ */
168
+ diarization_type?: 'standard' | 'premium' | null;
169
+
170
+ /**
171
+ * This feature is in beta. You can supply the id of a pre-completed custom vocabulary that you submitted through the Custom Vocabularies API
172
+ * instead of uploading the list of phrases using the custom_vocabularies parameter.
173
+ * Using custom_vocabulary_id or custom_vocabularies with the same list of phrases yields the same transcription result,
174
+ * but custom_vocabulary_id enables your submission to finish processing faster by 6 seconds on average.
175
+ *
176
+ * You cannot use both custom_vocabulary_id and custom_vocabularies at the same time, and doing so will result in a 400 response.
177
+ * If the supplied id represents an incomplete, deleted, or non-existent custom vocabulary then you will receive a 404 response.
178
+ */
179
+ custom_vocabulary_id?: string | null;
180
+
181
+ /**
182
+ * Specify a collection of custom vocabulary to be used for this job.
183
+ * Custom vocabulary informs and biases the speech recognition to find those phrases (at the cost of slightly slower transcription).
184
+ */
185
+ custom_vocabularies?: Array<object>;
186
+
187
+ /**
188
+ * If true, only exact phrases will be used as custom vocabulary, i.e. phrases will not be split into individual words for processing.
189
+ * By default is enabled.
190
+ */
191
+ strict_custom_vocabulary?: boolean;
192
+
193
+ /**
194
+ * Use to specify summarization options. This option is not available for human transcription jobs.
195
+ */
196
+ summarization_config?: {
197
+ /**
198
+ * Model type for summarization.
199
+ * @default "standard"
200
+ */
201
+ model?: 'standard' | 'premium' | null;
202
+ /**
203
+ * Summarization formatting type. Use Paragraph for a text summary or Bullets for a list of topics.
204
+ * prompt and type parameters are mutuially exclusive.
205
+ * @default "paragraph"
206
+ */
207
+ type?: 'paragraph' | 'bullets' | null;
208
+ /**
209
+ * Custom prompt. Provides the most flexible way to create summaries, but may lead to unpredictable results.
210
+ * Summary is produced in Markdown format.
211
+ * prompt and type parameters are mutuially exclusive.
212
+ */
213
+ prompt?: string | null;
214
+ } | null;
215
+
216
+ /**
217
+ * Use to specify translation options. This option is not available for human transcription jobs.
218
+ */
219
+ translation_config?: {
220
+ /**
221
+ * Target languages for translation.
222
+ */
223
+ target_languages: Array<{
224
+ /**
225
+ * Target language for translation.
226
+ */
227
+ language:
228
+ | 'en'
229
+ | 'en-us'
230
+ | 'en-gb'
231
+ | 'ar'
232
+ | 'pt'
233
+ | 'pt-br'
234
+ | 'pt-pt'
235
+ | 'fr'
236
+ | 'fr-ca'
237
+ | 'es'
238
+ | 'es-es'
239
+ | 'es-la'
240
+ | 'it'
241
+ | 'ja'
242
+ | 'ko'
243
+ | 'de'
244
+ | 'ru';
245
+ }>;
246
+ /**
247
+ * Model type for translation.
248
+ * @default "standard"
249
+ */
250
+ model?: 'standard' | 'premium' | null;
251
+ } | null;
252
+
253
+ /**
254
+ * Language is provided as a ISO 639-1 language code, with exceptions.
255
+ * Only 1 language can be selected per audio, i.e. no multiple languages in one transcription job.
256
+ * @default "en"
257
+ */
258
+ language?: string | null;
259
+
260
+ /**
261
+ * Provides improved accuracy for per-word timestamps for a transcript.
262
+ *
263
+ * The following languages are currently supported:
264
+ * - English (en, en-us, en-gb)
265
+ * - French (fr)
266
+ * - Italian (it)
267
+ * - German (de)
268
+ * - Spanish (es)
269
+ *
270
+ * This option is not available in low-cost environment
271
+ * @default false
272
+ */
273
+ forced_alignment?: boolean | null;
274
+ };
@@ -0,0 +1,9 @@
1
+ import { FetchFunction } from '@ai-sdk/provider-utils';
2
+
3
+ export type RevaiConfig = {
4
+ provider: string;
5
+ url: (options: { modelId: string; path: string }) => string;
6
+ headers: () => Record<string, string | undefined>;
7
+ fetch?: FetchFunction;
8
+ generateId?: () => string;
9
+ };
@@ -0,0 +1,34 @@
1
+ import { safeParseJSON } from '@ai-sdk/provider-utils';
2
+ import { revaiErrorDataSchema } from './revai-error';
3
+ import { describe, it, expect } from 'vitest';
4
+
5
+ describe('revaiErrorDataSchema', () => {
6
+ it('should parse Rev.ai resource exhausted error', async () => {
7
+ const error = `
8
+ {"error":{"message":"{\\n \\"error\\": {\\n \\"code\\": 429,\\n \\"message\\": \\"Resource has been exhausted (e.g. check quota).\\",\\n \\"status\\": \\"RESOURCE_EXHAUSTED\\"\\n }\\n}\\n","code":429}}
9
+ `;
10
+
11
+ const result = await safeParseJSON({
12
+ text: error,
13
+ schema: revaiErrorDataSchema,
14
+ });
15
+
16
+ expect(result).toStrictEqual({
17
+ success: true,
18
+ value: {
19
+ error: {
20
+ message:
21
+ '{\n "error": {\n "code": 429,\n "message": "Resource has been exhausted (e.g. check quota).",\n "status": "RESOURCE_EXHAUSTED"\n }\n}\n',
22
+ code: 429,
23
+ },
24
+ },
25
+ rawValue: {
26
+ error: {
27
+ message:
28
+ '{\n "error": {\n "code": 429,\n "message": "Resource has been exhausted (e.g. check quota).",\n "status": "RESOURCE_EXHAUSTED"\n }\n}\n',
29
+ code: 429,
30
+ },
31
+ },
32
+ });
33
+ });
34
+ });
@@ -0,0 +1,16 @@
1
+ import { z } from 'zod/v4';
2
+ import { createJsonErrorResponseHandler } from '@ai-sdk/provider-utils';
3
+
4
+ export const revaiErrorDataSchema = z.object({
5
+ error: z.object({
6
+ message: z.string(),
7
+ code: z.number(),
8
+ }),
9
+ });
10
+
11
+ export type RevaiErrorData = z.infer<typeof revaiErrorDataSchema>;
12
+
13
+ export const revaiFailedResponseHandler = createJsonErrorResponseHandler({
14
+ errorSchema: revaiErrorDataSchema,
15
+ errorToMessage: data => data.error.message,
16
+ });
@@ -0,0 +1,120 @@
1
+ import {
2
+ TranscriptionModelV3,
3
+ ProviderV3,
4
+ NoSuchModelError,
5
+ } from '@ai-sdk/provider';
6
+ import {
7
+ FetchFunction,
8
+ loadApiKey,
9
+ withUserAgentSuffix,
10
+ } from '@ai-sdk/provider-utils';
11
+ import { RevaiTranscriptionModel } from './revai-transcription-model';
12
+ import { RevaiTranscriptionModelId } from './revai-transcription-options';
13
+ import { VERSION } from './version';
14
+
15
+ export interface RevaiProvider extends ProviderV3 {
16
+ (
17
+ modelId: 'machine',
18
+ settings?: {},
19
+ ): {
20
+ transcription: RevaiTranscriptionModel;
21
+ };
22
+
23
+ /**
24
+ Creates a model for transcription.
25
+ */
26
+ transcription(modelId: RevaiTranscriptionModelId): TranscriptionModelV3;
27
+
28
+ /**
29
+ * @deprecated Use `embeddingModel` instead.
30
+ */
31
+ textEmbeddingModel(modelId: string): never;
32
+ }
33
+
34
+ export interface RevaiProviderSettings {
35
+ /**
36
+ API key for authenticating requests.
37
+ */
38
+ apiKey?: string;
39
+
40
+ /**
41
+ Custom headers to include in the requests.
42
+ */
43
+ headers?: Record<string, string>;
44
+
45
+ /**
46
+ Custom fetch implementation. You can use it as a middleware to intercept requests,
47
+ or to provide a custom fetch implementation for e.g. testing.
48
+ */
49
+ fetch?: FetchFunction;
50
+ }
51
+
52
+ /**
53
+ Create a Rev.ai provider instance.
54
+ */
55
+ export function createRevai(
56
+ options: RevaiProviderSettings = {},
57
+ ): RevaiProvider {
58
+ const getHeaders = () =>
59
+ withUserAgentSuffix(
60
+ {
61
+ authorization: `Bearer ${loadApiKey({
62
+ apiKey: options.apiKey,
63
+ environmentVariableName: 'REVAI_API_KEY',
64
+ description: 'Rev.ai',
65
+ })}`,
66
+ ...options.headers,
67
+ },
68
+ `ai-sdk/revai/${VERSION}`,
69
+ );
70
+
71
+ const createTranscriptionModel = (modelId: RevaiTranscriptionModelId) =>
72
+ new RevaiTranscriptionModel(modelId, {
73
+ provider: `revai.transcription`,
74
+ url: ({ path }) => `https://api.rev.ai${path}`,
75
+ headers: getHeaders,
76
+ fetch: options.fetch,
77
+ });
78
+
79
+ const provider = function (modelId: RevaiTranscriptionModelId) {
80
+ return {
81
+ transcription: createTranscriptionModel(modelId),
82
+ };
83
+ };
84
+
85
+ provider.specificationVersion = 'v3' as const;
86
+ provider.transcription = createTranscriptionModel;
87
+ provider.transcriptionModel = createTranscriptionModel;
88
+
89
+ provider.languageModel = () => {
90
+ throw new NoSuchModelError({
91
+ modelId: 'unknown',
92
+ modelType: 'languageModel',
93
+ message: 'Rev.ai does not provide language models',
94
+ });
95
+ };
96
+
97
+ provider.embeddingModel = () => {
98
+ throw new NoSuchModelError({
99
+ modelId: 'unknown',
100
+ modelType: 'embeddingModel',
101
+ message: 'Rev.ai does not provide text embedding models',
102
+ });
103
+ };
104
+ provider.textEmbeddingModel = provider.embeddingModel;
105
+
106
+ provider.imageModel = () => {
107
+ throw new NoSuchModelError({
108
+ modelId: 'unknown',
109
+ modelType: 'imageModel',
110
+ message: 'Rev.ai does not provide image models',
111
+ });
112
+ };
113
+
114
+ return provider as RevaiProvider;
115
+ }
116
+
117
+ /**
118
+ Default Rev.ai provider instance.
119
+ */
120
+ export const revai = createRevai();
@@ -0,0 +1,282 @@
1
+ import { createTestServer } from '@ai-sdk/test-server/with-vitest';
2
+ import { RevaiTranscriptionModel } from './revai-transcription-model';
3
+ import { createRevai } from './revai-provider';
4
+ import { readFile } from 'node:fs/promises';
5
+ import path from 'node:path';
6
+ import { describe, it, expect, vi } from 'vitest';
7
+
8
+ vi.mock('./version', () => ({
9
+ VERSION: '0.0.0-test',
10
+ }));
11
+
12
+ const audioData = await readFile(path.join(__dirname, 'transcript-test.mp3'));
13
+ const provider = createRevai({ apiKey: 'test-api-key' });
14
+ const model = provider.transcription('machine');
15
+
16
+ const server = createTestServer({
17
+ 'https://api.rev.ai/speechtotext/v1/jobs': {},
18
+ 'https://api.rev.ai/speechtotext/v1/jobs/test-id': {},
19
+ 'https://api.rev.ai/speechtotext/v1/jobs/test-id/transcript': {},
20
+ });
21
+
22
+ describe('doGenerate', () => {
23
+ function prepareJsonResponse({
24
+ headers,
25
+ }: {
26
+ headers?: Record<string, string>;
27
+ } = {}) {
28
+ server.urls['https://api.rev.ai/speechtotext/v1/jobs'].response = {
29
+ type: 'json-value',
30
+ headers,
31
+ body: {
32
+ id: 'test-id',
33
+ status: 'in_progress',
34
+ language: 'en',
35
+ created_on: '2018-05-05T23:23:22.29Z',
36
+ transcriber: 'machine',
37
+ },
38
+ };
39
+ server.urls['https://api.rev.ai/speechtotext/v1/jobs/test-id'].response = {
40
+ type: 'json-value',
41
+ headers,
42
+ body: {
43
+ id: 'test-id',
44
+ status: 'transcribed',
45
+ language: 'en',
46
+ created_on: '2018-05-05T23:23:22.29Z',
47
+ transcriber: 'machine',
48
+ },
49
+ };
50
+ server.urls[
51
+ 'https://api.rev.ai/speechtotext/v1/jobs/test-id/transcript'
52
+ ].response = {
53
+ type: 'json-value',
54
+ headers,
55
+ body: {
56
+ monologues: [
57
+ {
58
+ speaker: 1,
59
+ elements: [
60
+ {
61
+ type: 'text',
62
+ value: 'Hello',
63
+ ts: 0.5,
64
+ end_ts: 1.5,
65
+ confidence: 1,
66
+ },
67
+ {
68
+ type: 'punct',
69
+ value: ' ',
70
+ },
71
+ {
72
+ type: 'text',
73
+ value: 'World',
74
+ ts: 1.75,
75
+ end_ts: 2.85,
76
+ confidence: 0.8,
77
+ },
78
+ {
79
+ type: 'punct',
80
+ value: '.',
81
+ },
82
+ ],
83
+ },
84
+ {
85
+ speaker: 2,
86
+ elements: [
87
+ {
88
+ type: 'text',
89
+ value: 'monologues',
90
+ ts: 3,
91
+ end_ts: 3.5,
92
+ confidence: 1,
93
+ },
94
+ {
95
+ type: 'punct',
96
+ value: ' ',
97
+ },
98
+ {
99
+ type: 'text',
100
+ value: 'are',
101
+ ts: 3.6,
102
+ end_ts: 3.9,
103
+ confidence: 1,
104
+ },
105
+ {
106
+ type: 'punct',
107
+ value: ' ',
108
+ },
109
+ {
110
+ type: 'text',
111
+ value: 'a',
112
+ ts: 4,
113
+ end_ts: 4.3,
114
+ confidence: 1,
115
+ },
116
+ {
117
+ type: 'punct',
118
+ value: ' ',
119
+ },
120
+ {
121
+ type: 'text',
122
+ value: 'block',
123
+ ts: 4.5,
124
+ end_ts: 5.5,
125
+ confidence: 1,
126
+ },
127
+ {
128
+ type: 'punct',
129
+ value: ' ',
130
+ },
131
+ {
132
+ type: 'text',
133
+ value: 'of',
134
+ ts: 5.75,
135
+ end_ts: 6.14,
136
+ confidence: 1,
137
+ },
138
+ {
139
+ type: 'punct',
140
+ value: ' ',
141
+ },
142
+ {
143
+ type: 'unknown',
144
+ value: '<inaudible>',
145
+ },
146
+ {
147
+ type: 'punct',
148
+ value: ' ',
149
+ },
150
+ {
151
+ type: 'text',
152
+ value: 'text',
153
+ ts: 6.5,
154
+ end_ts: 7.78,
155
+ confidence: 1,
156
+ },
157
+ {
158
+ type: 'punct',
159
+ value: '.',
160
+ },
161
+ ],
162
+ },
163
+ ],
164
+ },
165
+ };
166
+ }
167
+
168
+ it('should pass the model', async () => {
169
+ prepareJsonResponse();
170
+
171
+ await model.doGenerate({
172
+ audio: audioData,
173
+ mediaType: 'audio/wav',
174
+ });
175
+
176
+ expect(await server.calls[0].requestBodyMultipart).toMatchObject({
177
+ media: expect.any(File),
178
+ config: '{"transcriber":"machine"}',
179
+ });
180
+ });
181
+
182
+ it('should pass headers', async () => {
183
+ prepareJsonResponse();
184
+
185
+ const provider = createRevai({
186
+ apiKey: 'test-api-key',
187
+ headers: {
188
+ 'Custom-Provider-Header': 'provider-header-value',
189
+ },
190
+ });
191
+
192
+ await provider.transcription('machine').doGenerate({
193
+ audio: audioData,
194
+ mediaType: 'audio/wav',
195
+ headers: {
196
+ 'Custom-Request-Header': 'request-header-value',
197
+ },
198
+ });
199
+
200
+ expect(server.calls[0].requestHeaders).toMatchObject({
201
+ authorization: 'Bearer test-api-key',
202
+ 'content-type': expect.stringMatching(
203
+ /^multipart\/form-data; boundary=----formdata-undici-\d+$/,
204
+ ),
205
+ 'custom-provider-header': 'provider-header-value',
206
+ 'custom-request-header': 'request-header-value',
207
+ });
208
+
209
+ expect(server.calls[0].requestUserAgent).toContain(
210
+ `ai-sdk/revai/0.0.0-test`,
211
+ );
212
+ });
213
+
214
+ it('should extract the transcription text', async () => {
215
+ prepareJsonResponse();
216
+
217
+ const result = await model.doGenerate({
218
+ audio: audioData,
219
+ mediaType: 'audio/wav',
220
+ });
221
+
222
+ expect(result.text).toBe(
223
+ 'Hello World. monologues are a block of <inaudible> text.',
224
+ );
225
+ });
226
+
227
+ it('should include response data with timestamp, modelId and headers', async () => {
228
+ prepareJsonResponse({
229
+ headers: {
230
+ 'x-request-id': 'test-request-id',
231
+ 'x-ratelimit-remaining': '123',
232
+ },
233
+ });
234
+
235
+ const testDate = new Date(0);
236
+ const customModel = new RevaiTranscriptionModel('machine', {
237
+ provider: 'test-provider',
238
+ url: ({ path }) => `https://api.rev.ai${path}`,
239
+ headers: () => ({}),
240
+ _internal: {
241
+ currentDate: () => testDate,
242
+ },
243
+ });
244
+
245
+ const result = await customModel.doGenerate({
246
+ audio: audioData,
247
+ mediaType: 'audio/wav',
248
+ });
249
+
250
+ expect(result.response).toMatchObject({
251
+ timestamp: testDate,
252
+ modelId: 'machine',
253
+ headers: {
254
+ 'content-type': 'application/json',
255
+ 'x-request-id': 'test-request-id',
256
+ 'x-ratelimit-remaining': '123',
257
+ },
258
+ });
259
+ });
260
+
261
+ it('should use real date when no custom date provider is specified', async () => {
262
+ prepareJsonResponse();
263
+
264
+ const testDate = new Date(0);
265
+ const customModel = new RevaiTranscriptionModel('machine', {
266
+ provider: 'test-provider',
267
+ url: ({ path }) => `https://api.rev.ai${path}`,
268
+ headers: () => ({}),
269
+ _internal: {
270
+ currentDate: () => testDate,
271
+ },
272
+ });
273
+
274
+ const result = await customModel.doGenerate({
275
+ audio: audioData,
276
+ mediaType: 'audio/wav',
277
+ });
278
+
279
+ expect(result.response.timestamp.getTime()).toEqual(testDate.getTime());
280
+ expect(result.response.modelId).toBe('machine');
281
+ });
282
+ });
@@ -0,0 +1,516 @@
1
+ import {
2
+ AISDKError,
3
+ TranscriptionModelV3,
4
+ SharedV3Warning,
5
+ } from '@ai-sdk/provider';
6
+ import {
7
+ combineHeaders,
8
+ convertBase64ToUint8Array,
9
+ createJsonResponseHandler,
10
+ mediaTypeToExtension,
11
+ delay,
12
+ getFromApi,
13
+ parseProviderOptions,
14
+ postFormDataToApi,
15
+ } from '@ai-sdk/provider-utils';
16
+ import { z } from 'zod/v4';
17
+ import { RevaiConfig } from './revai-config';
18
+ import { revaiFailedResponseHandler } from './revai-error';
19
+ import { RevaiTranscriptionModelId } from './revai-transcription-options';
20
+ import { RevaiTranscriptionAPITypes } from './revai-api-types';
21
+
22
+ // https://docs.rev.ai/api/asynchronous/reference/#operation/SubmitTranscriptionJob
23
+ const revaiProviderOptionsSchema = z.object({
24
+ /**
25
+ * Optional metadata string to associate with the transcription job.
26
+ */
27
+ metadata: z.string().nullish(),
28
+ /**
29
+ * Configuration for webhook notifications when job is complete.
30
+ */
31
+ notification_config: z
32
+ .object({
33
+ /**
34
+ * URL to send the notification to.
35
+ */
36
+ url: z.string(),
37
+ /**
38
+ * Optional authorization headers for the notification request.
39
+ */
40
+ auth_headers: z
41
+ .object({
42
+ Authorization: z.string(),
43
+ })
44
+ .nullish(),
45
+ })
46
+ .nullish(),
47
+ /**
48
+ * Number of seconds after which the job will be automatically deleted.
49
+ */
50
+ delete_after_seconds: z.number().nullish(),
51
+ /**
52
+ * Whether to include filler words and false starts in the transcription.
53
+ */
54
+ verbatim: z.boolean().optional(),
55
+ /**
56
+ * Whether to prioritize the job for faster processing.
57
+ */
58
+ rush: z.boolean().nullish().default(false),
59
+ /**
60
+ * Whether to run the job in test mode.
61
+ */
62
+ test_mode: z.boolean().nullish().default(false),
63
+ /**
64
+ * Specific segments of the audio to transcribe.
65
+ */
66
+ segments_to_transcribe: z
67
+ .array(
68
+ z.object({
69
+ /**
70
+ * Start time of the segment in seconds.
71
+ */
72
+ start: z.number(),
73
+ /**
74
+ * End time of the segment in seconds.
75
+ */
76
+ end: z.number(),
77
+ }),
78
+ )
79
+ .nullish(),
80
+ /**
81
+ * Names to assign to speakers in the transcription.
82
+ */
83
+ speaker_names: z
84
+ .array(
85
+ z.object({
86
+ /**
87
+ * Display name for the speaker.
88
+ */
89
+ display_name: z.string(),
90
+ }),
91
+ )
92
+ .nullish(),
93
+ /**
94
+ * Whether to skip speaker diarization.
95
+ */
96
+ skip_diarization: z.boolean().nullish().default(false),
97
+ /**
98
+ * Whether to skip post-processing steps.
99
+ */
100
+ skip_postprocessing: z.boolean().nullish().default(false),
101
+ /**
102
+ * Whether to skip adding punctuation to the transcription.
103
+ */
104
+ skip_punctuation: z.boolean().nullish().default(false),
105
+ /**
106
+ * Whether to remove disfluencies (um, uh, etc.) from the transcription.
107
+ */
108
+ remove_disfluencies: z.boolean().nullish().default(false),
109
+ /**
110
+ * Whether to remove atmospheric sounds from the transcription.
111
+ */
112
+ remove_atmospherics: z.boolean().nullish().default(false),
113
+ /**
114
+ * Whether to filter profanity from the transcription.
115
+ */
116
+ filter_profanity: z.boolean().nullish().default(false),
117
+ /**
118
+ * Number of speaker channels in the audio.
119
+ */
120
+ speaker_channels_count: z.number().nullish(),
121
+ /**
122
+ * Expected number of speakers in the audio.
123
+ */
124
+ speakers_count: z.number().nullish(),
125
+ /**
126
+ * Type of diarization to use.
127
+ */
128
+ diarization_type: z
129
+ .enum(['standard', 'premium'])
130
+ .nullish()
131
+ .default('standard'),
132
+ /**
133
+ * ID of a custom vocabulary to use for the transcription.
134
+ */
135
+ custom_vocabulary_id: z.string().nullish(),
136
+ /**
137
+ * Custom vocabularies to use for the transcription.
138
+ */
139
+ custom_vocabularies: z.array(z.object({})).optional(),
140
+ /**
141
+ * Whether to strictly enforce custom vocabulary.
142
+ */
143
+ strict_custom_vocabulary: z.boolean().optional(),
144
+ /**
145
+ * Configuration for generating a summary of the transcription.
146
+ */
147
+ summarization_config: z
148
+ .object({
149
+ /**
150
+ * Model to use for summarization.
151
+ */
152
+ model: z.enum(['standard', 'premium']).nullish().default('standard'),
153
+ /**
154
+ * Format of the summary.
155
+ */
156
+ type: z.enum(['paragraph', 'bullets']).nullish().default('paragraph'),
157
+ /**
158
+ * Custom prompt for the summarization.
159
+ */
160
+ prompt: z.string().nullish(),
161
+ })
162
+ .nullish(),
163
+ /**
164
+ * Configuration for translating the transcription.
165
+ */
166
+ translation_config: z
167
+ .object({
168
+ /**
169
+ * Target languages for translation.
170
+ */
171
+ target_languages: z.array(
172
+ z.object({
173
+ /**
174
+ * Language code for translation target.
175
+ */
176
+ language: z.enum([
177
+ 'en',
178
+ 'en-us',
179
+ 'en-gb',
180
+ 'ar',
181
+ 'pt',
182
+ 'pt-br',
183
+ 'pt-pt',
184
+ 'fr',
185
+ 'fr-ca',
186
+ 'es',
187
+ 'es-es',
188
+ 'es-la',
189
+ 'it',
190
+ 'ja',
191
+ 'ko',
192
+ 'de',
193
+ 'ru',
194
+ ]),
195
+ }),
196
+ ),
197
+ /**
198
+ * Model to use for translation.
199
+ */
200
+ model: z.enum(['standard', 'premium']).nullish().default('standard'),
201
+ })
202
+ .nullish(),
203
+ /**
204
+ * Language of the audio content.
205
+ */
206
+ language: z.string().nullish().default('en'),
207
+ /**
208
+ * Whether to perform forced alignment.
209
+ */
210
+ forced_alignment: z.boolean().nullish().default(false),
211
+ });
212
+
213
+ export type RevaiTranscriptionCallOptions = z.infer<
214
+ typeof revaiProviderOptionsSchema
215
+ >;
216
+
217
+ interface RevaiTranscriptionModelConfig extends RevaiConfig {
218
+ _internal?: {
219
+ currentDate?: () => Date;
220
+ };
221
+ }
222
+
223
+ export class RevaiTranscriptionModel implements TranscriptionModelV3 {
224
+ readonly specificationVersion = 'v3';
225
+
226
+ get provider(): string {
227
+ return this.config.provider;
228
+ }
229
+
230
+ constructor(
231
+ readonly modelId: RevaiTranscriptionModelId,
232
+ private readonly config: RevaiTranscriptionModelConfig,
233
+ ) {}
234
+
235
+ private async getArgs({
236
+ audio,
237
+ mediaType,
238
+ providerOptions,
239
+ }: Parameters<TranscriptionModelV3['doGenerate']>[0]) {
240
+ const warnings: SharedV3Warning[] = [];
241
+
242
+ // Parse provider options
243
+ const revaiOptions = await parseProviderOptions({
244
+ provider: 'revai',
245
+ providerOptions,
246
+ schema: revaiProviderOptionsSchema,
247
+ });
248
+
249
+ // Create form data with base fields
250
+ const formData = new FormData();
251
+ const blob =
252
+ audio instanceof Uint8Array
253
+ ? new Blob([audio])
254
+ : new Blob([convertBase64ToUint8Array(audio)]);
255
+
256
+ const fileExtension = mediaTypeToExtension(mediaType);
257
+ formData.append(
258
+ 'media',
259
+ new File([blob], 'audio', { type: mediaType }),
260
+ `audio.${fileExtension}`,
261
+ );
262
+ const transcriptionModelOptions: RevaiTranscriptionAPITypes = {
263
+ transcriber: this.modelId,
264
+ };
265
+
266
+ // Add provider-specific options
267
+ if (revaiOptions) {
268
+ const formDataConfig: RevaiTranscriptionAPITypes = {
269
+ metadata: revaiOptions.metadata ?? undefined,
270
+ notification_config: revaiOptions.notification_config ?? undefined,
271
+ delete_after_seconds: revaiOptions.delete_after_seconds ?? undefined,
272
+ verbatim: revaiOptions.verbatim ?? undefined,
273
+ rush: revaiOptions.rush ?? undefined,
274
+ test_mode: revaiOptions.test_mode ?? undefined,
275
+ segments_to_transcribe:
276
+ revaiOptions.segments_to_transcribe ?? undefined,
277
+ speaker_names: revaiOptions.speaker_names ?? undefined,
278
+ skip_diarization: revaiOptions.skip_diarization ?? undefined,
279
+ skip_postprocessing: revaiOptions.skip_postprocessing ?? undefined,
280
+ skip_punctuation: revaiOptions.skip_punctuation ?? undefined,
281
+ remove_disfluencies: revaiOptions.remove_disfluencies ?? undefined,
282
+ remove_atmospherics: revaiOptions.remove_atmospherics ?? undefined,
283
+ filter_profanity: revaiOptions.filter_profanity ?? undefined,
284
+ speaker_channels_count:
285
+ revaiOptions.speaker_channels_count ?? undefined,
286
+ speakers_count: revaiOptions.speakers_count ?? undefined,
287
+ diarization_type: revaiOptions.diarization_type ?? undefined,
288
+ custom_vocabulary_id: revaiOptions.custom_vocabulary_id ?? undefined,
289
+ custom_vocabularies: revaiOptions.custom_vocabularies ?? undefined,
290
+ strict_custom_vocabulary:
291
+ revaiOptions.strict_custom_vocabulary ?? undefined,
292
+ summarization_config: revaiOptions.summarization_config ?? undefined,
293
+ translation_config: revaiOptions.translation_config ?? undefined,
294
+ language: revaiOptions.language ?? undefined,
295
+ forced_alignment: revaiOptions.forced_alignment ?? undefined,
296
+ };
297
+
298
+ for (const key in formDataConfig) {
299
+ const value = formDataConfig[key as keyof RevaiTranscriptionAPITypes];
300
+ if (value !== undefined) {
301
+ (transcriptionModelOptions as Record<string, unknown>)[
302
+ key as keyof RevaiTranscriptionAPITypes
303
+ ] = value;
304
+ }
305
+ }
306
+ }
307
+
308
+ formData.append('config', JSON.stringify(transcriptionModelOptions));
309
+
310
+ return {
311
+ formData,
312
+ warnings,
313
+ };
314
+ }
315
+
316
+ async doGenerate(
317
+ options: Parameters<TranscriptionModelV3['doGenerate']>[0],
318
+ ): Promise<Awaited<ReturnType<TranscriptionModelV3['doGenerate']>>> {
319
+ const currentDate = this.config._internal?.currentDate?.() ?? new Date();
320
+ const { formData, warnings } = await this.getArgs(options);
321
+
322
+ const { value: submissionResponse } = await postFormDataToApi({
323
+ url: this.config.url({
324
+ path: '/speechtotext/v1/jobs',
325
+ modelId: this.modelId,
326
+ }),
327
+ headers: combineHeaders(this.config.headers(), options.headers),
328
+ formData,
329
+ failedResponseHandler: revaiFailedResponseHandler,
330
+ successfulResponseHandler: createJsonResponseHandler(
331
+ revaiTranscriptionJobResponseSchema,
332
+ ),
333
+ abortSignal: options.abortSignal,
334
+ fetch: this.config.fetch,
335
+ });
336
+
337
+ if (submissionResponse.status === 'failed') {
338
+ throw new AISDKError({
339
+ message: 'Failed to submit transcription job to Rev.ai',
340
+ name: 'TranscriptionJobSubmissionFailed',
341
+ cause: submissionResponse,
342
+ });
343
+ }
344
+
345
+ const jobId = submissionResponse.id;
346
+ const timeoutMs = 60 * 1000; // 60 seconds timeout
347
+ const startTime = Date.now();
348
+ const pollingInterval = 1000;
349
+ let jobResponse = submissionResponse;
350
+
351
+ while (jobResponse.status !== 'transcribed') {
352
+ // Check if we've exceeded the timeout
353
+ if (Date.now() - startTime > timeoutMs) {
354
+ throw new AISDKError({
355
+ message: 'Transcription job polling timed out',
356
+ name: 'TranscriptionJobPollingTimedOut',
357
+ cause: submissionResponse,
358
+ });
359
+ }
360
+
361
+ // Poll for job status
362
+ const pollingResult = await getFromApi({
363
+ url: this.config.url({
364
+ path: `/speechtotext/v1/jobs/${jobId}`,
365
+ modelId: this.modelId,
366
+ }),
367
+ headers: combineHeaders(this.config.headers(), options.headers),
368
+ failedResponseHandler: revaiFailedResponseHandler,
369
+ successfulResponseHandler: createJsonResponseHandler(
370
+ revaiTranscriptionJobResponseSchema,
371
+ ),
372
+ abortSignal: options.abortSignal,
373
+ fetch: this.config.fetch,
374
+ });
375
+
376
+ jobResponse = pollingResult.value;
377
+
378
+ if (jobResponse.status === 'failed') {
379
+ throw new AISDKError({
380
+ message: 'Transcription job failed',
381
+ name: 'TranscriptionJobFailed',
382
+ cause: jobResponse,
383
+ });
384
+ }
385
+
386
+ // Wait before polling again (only if we need to continue polling)
387
+ if (jobResponse.status !== 'transcribed') {
388
+ await delay(pollingInterval);
389
+ }
390
+ }
391
+
392
+ const {
393
+ value: transcriptionResult,
394
+ responseHeaders,
395
+ rawValue: rawResponse,
396
+ } = await getFromApi({
397
+ url: this.config.url({
398
+ path: `/speechtotext/v1/jobs/${jobId}/transcript`,
399
+ modelId: this.modelId,
400
+ }),
401
+ headers: combineHeaders(this.config.headers(), options.headers),
402
+ failedResponseHandler: revaiFailedResponseHandler,
403
+ successfulResponseHandler: createJsonResponseHandler(
404
+ revaiTranscriptionResponseSchema,
405
+ ),
406
+ abortSignal: options.abortSignal,
407
+ fetch: this.config.fetch,
408
+ });
409
+
410
+ let durationInSeconds = 0;
411
+ const segments: {
412
+ text: string;
413
+ startSecond: number;
414
+ endSecond: number;
415
+ }[] = [];
416
+
417
+ for (const monologue of transcriptionResult.monologues ?? []) {
418
+ // Process each monologue to extract segments with timing information
419
+ let currentSegmentText = '';
420
+ let segmentStartSecond = 0;
421
+ let hasStartedSegment = false;
422
+
423
+ for (const element of monologue?.elements ?? []) {
424
+ // Add the element value to the current segment text
425
+ currentSegmentText += element.value;
426
+
427
+ // For text elements, track timing information
428
+ if (element.type === 'text') {
429
+ // Update the overall duration if this is the latest timestamp
430
+ if (element.end_ts && element.end_ts > durationInSeconds) {
431
+ durationInSeconds = element.end_ts;
432
+ }
433
+
434
+ // If this is the first text element in a segment, mark the start time
435
+ if (!hasStartedSegment && typeof element.ts === 'number') {
436
+ segmentStartSecond = element.ts;
437
+ hasStartedSegment = true;
438
+ }
439
+
440
+ // If we have an end timestamp, we can complete a segment
441
+ if (typeof element.end_ts === 'number' && hasStartedSegment) {
442
+ // Only add non-empty segments
443
+ if (currentSegmentText.trim()) {
444
+ segments.push({
445
+ text: currentSegmentText.trim(),
446
+ startSecond: segmentStartSecond,
447
+ endSecond: element.end_ts,
448
+ });
449
+ }
450
+
451
+ // Reset for the next segment
452
+ currentSegmentText = '';
453
+ hasStartedSegment = false;
454
+ }
455
+ }
456
+ }
457
+
458
+ // Handle any remaining segment text that wasn't added
459
+ if (hasStartedSegment && currentSegmentText.trim()) {
460
+ const endSecond =
461
+ durationInSeconds > segmentStartSecond
462
+ ? durationInSeconds
463
+ : segmentStartSecond + 1;
464
+ segments.push({
465
+ text: currentSegmentText.trim(),
466
+ startSecond: segmentStartSecond,
467
+ endSecond: endSecond,
468
+ });
469
+ }
470
+ }
471
+
472
+ return {
473
+ text:
474
+ transcriptionResult.monologues
475
+ ?.map(monologue =>
476
+ monologue?.elements?.map(element => element.value).join(''),
477
+ )
478
+ .join(' ') ?? '',
479
+ segments,
480
+ language: submissionResponse.language ?? undefined,
481
+ durationInSeconds,
482
+ warnings,
483
+ response: {
484
+ timestamp: currentDate,
485
+ modelId: this.modelId,
486
+ headers: responseHeaders,
487
+ body: rawResponse,
488
+ },
489
+ };
490
+ }
491
+ }
492
+
493
+ const revaiTranscriptionJobResponseSchema = z.object({
494
+ id: z.string().nullish(),
495
+ status: z.string().nullish(),
496
+ language: z.string().nullish(),
497
+ });
498
+
499
+ const revaiTranscriptionResponseSchema = z.object({
500
+ monologues: z
501
+ .array(
502
+ z.object({
503
+ elements: z
504
+ .array(
505
+ z.object({
506
+ type: z.string().nullish(),
507
+ value: z.string().nullish(),
508
+ ts: z.number().nullish(),
509
+ end_ts: z.number().nullish(),
510
+ }),
511
+ )
512
+ .nullish(),
513
+ }),
514
+ )
515
+ .nullish(),
516
+ });
@@ -0,0 +1 @@
1
+ export type RevaiTranscriptionModelId = 'machine' | 'low_cost' | 'fusion';
Binary file
package/src/version.ts ADDED
@@ -0,0 +1,6 @@
1
+ // Version string of this package injected at build time.
2
+ declare const __PACKAGE_VERSION__: string | undefined;
3
+ export const VERSION: string =
4
+ typeof __PACKAGE_VERSION__ !== 'undefined'
5
+ ? __PACKAGE_VERSION__
6
+ : '0.0.0-test';