@ai-sdk/assemblyai 0.0.0-1c33ba03-20260114162300 → 0.0.0-4115c213-20260122152721

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,11 +1,40 @@
1
1
  # @ai-sdk/assemblyai
2
2
 
3
- ## 0.0.0-1c33ba03-20260114162300
3
+ ## 0.0.0-4115c213-20260122152721
4
4
 
5
5
  ### Patch Changes
6
6
 
7
- - Updated dependencies [261c011]
8
- - @ai-sdk/provider-utils@0.0.0-1c33ba03-20260114162300
7
+ - 4caafb2: chore: excluded tests from src folder in npm package
8
+ - Updated dependencies [4caafb2]
9
+ - @ai-sdk/provider@0.0.0-4115c213-20260122152721
10
+ - @ai-sdk/provider-utils@0.0.0-4115c213-20260122152721
11
+
12
+ ## 2.0.10
13
+
14
+ ### Patch Changes
15
+
16
+ - 2b8369d: chore: add docs to package dist
17
+
18
+ ## 2.0.9
19
+
20
+ ### Patch Changes
21
+
22
+ - 8dc54db: chore: add src folders to package bundle
23
+
24
+ ## 2.0.8
25
+
26
+ ### Patch Changes
27
+
28
+ - Updated dependencies [5c090e7]
29
+ - @ai-sdk/provider@3.0.4
30
+ - @ai-sdk/provider-utils@4.0.8
31
+
32
+ ## 2.0.7
33
+
34
+ ### Patch Changes
35
+
36
+ - Updated dependencies [46f46e4]
37
+ - @ai-sdk/provider-utils@4.0.7
9
38
 
10
39
  ## 2.0.6
11
40
 
package/dist/index.js CHANGED
@@ -401,7 +401,7 @@ var assemblyaiTranscriptionResponseSchema = import_v42.z.object({
401
401
  });
402
402
 
403
403
  // src/version.ts
404
- var VERSION = true ? "0.0.0-1c33ba03-20260114162300" : "0.0.0-test";
404
+ var VERSION = true ? "0.0.0-4115c213-20260122152721" : "0.0.0-test";
405
405
 
406
406
  // src/assemblyai-provider.ts
407
407
  function createAssemblyAI(options = {}) {
package/dist/index.mjs CHANGED
@@ -385,7 +385,7 @@ var assemblyaiTranscriptionResponseSchema = z2.object({
385
385
  });
386
386
 
387
387
  // src/version.ts
388
- var VERSION = true ? "0.0.0-1c33ba03-20260114162300" : "0.0.0-test";
388
+ var VERSION = true ? "0.0.0-4115c213-20260122152721" : "0.0.0-test";
389
389
 
390
390
  // src/assemblyai-provider.ts
391
391
  function createAssemblyAI(options = {}) {
@@ -0,0 +1,282 @@
1
+ ---
2
+ title: AssemblyAI
3
+ description: Learn how to use the AssemblyAI provider for the AI SDK.
4
+ ---
5
+
6
+ # AssemblyAI Provider
7
+
8
+ The [AssemblyAI](https://assemblyai.com/) provider contains language model support for the AssemblyAI transcription API.
9
+
10
+ ## Setup
11
+
12
+ The AssemblyAI provider is available in the `@ai-sdk/assemblyai` module. You can install it with
13
+
14
+ <Tabs items={['pnpm', 'npm', 'yarn', 'bun']}>
15
+ <Tab>
16
+ <Snippet text="pnpm add @ai-sdk/assemblyai" dark />
17
+ </Tab>
18
+ <Tab>
19
+ <Snippet text="npm install @ai-sdk/assemblyai" dark />
20
+ </Tab>
21
+ <Tab>
22
+ <Snippet text="yarn add @ai-sdk/assemblyai" dark />
23
+ </Tab>
24
+
25
+ <Tab>
26
+ <Snippet text="bun add @ai-sdk/assemblyai" dark />
27
+ </Tab>
28
+ </Tabs>
29
+
30
+ ## Provider Instance
31
+
32
+ You can import the default provider instance `assemblyai` from `@ai-sdk/assemblyai`:
33
+
34
+ ```ts
35
+ import { assemblyai } from '@ai-sdk/assemblyai';
36
+ ```
37
+
38
+ If you need a customized setup, you can import `createAssemblyAI` from `@ai-sdk/assemblyai` and create a provider instance with your settings:
39
+
40
+ ```ts
41
+ import { createAssemblyAI } from '@ai-sdk/assemblyai';
42
+
43
+ const assemblyai = createAssemblyAI({
44
+ // custom settings, e.g.
45
+ fetch: customFetch,
46
+ });
47
+ ```
48
+
49
+ You can use the following optional settings to customize the AssemblyAI provider instance:
50
+
51
+ - **apiKey** _string_
52
+
53
+ API key that is being sent using the `Authorization` header.
54
+ It defaults to the `ASSEMBLYAI_API_KEY` environment variable.
55
+
56
+ - **headers** _Record&lt;string,string&gt;_
57
+
58
+ Custom headers to include in the requests.
59
+
60
+ - **fetch** _(input: RequestInfo, init?: RequestInit) => Promise&lt;Response&gt;_
61
+
62
+ Custom [fetch](https://developer.mozilla.org/en-US/docs/Web/API/fetch) implementation.
63
+ Defaults to the global `fetch` function.
64
+ You can use it as a middleware to intercept requests,
65
+ or to provide a custom fetch implementation for e.g. testing.
66
+
67
+ ## Transcription Models
68
+
69
+ You can create models that call the [AssemblyAI transcription API](https://www.assemblyai.com/docs/getting-started/transcribe-an-audio-file/typescript)
70
+ using the `.transcription()` factory method.
71
+
72
+ The first argument is the model id e.g. `best`.
73
+
74
+ ```ts
75
+ const model = assemblyai.transcription('best');
76
+ ```
77
+
78
+ You can also pass additional provider-specific options using the `providerOptions` argument. For example, supplying the `contentSafety` option will enable content safety filtering.
79
+
80
+ ```ts highlight="6"
81
+ import { experimental_transcribe as transcribe } from 'ai';
82
+ import { assemblyai } from '@ai-sdk/assemblyai';
83
+ import { readFile } from 'fs/promises';
84
+
85
+ const result = await transcribe({
86
+ model: assemblyai.transcription('best'),
87
+ audio: await readFile('audio.mp3'),
88
+ providerOptions: { assemblyai: { contentSafety: true } },
89
+ });
90
+ ```
91
+
92
+ The following provider options are available:
93
+
94
+ - **audioEndAt** _number_
95
+
96
+ End time of the audio in milliseconds.
97
+ Optional.
98
+
99
+ - **audioStartFrom** _number_
100
+
101
+ Start time of the audio in milliseconds.
102
+ Optional.
103
+
104
+ - **autoChapters** _boolean_
105
+
106
+ Whether to automatically generate chapters for the transcription.
107
+ Optional.
108
+
109
+ - **autoHighlights** _boolean_
110
+
111
+ Whether to automatically generate highlights for the transcription.
112
+ Optional.
113
+
114
+ - **boostParam** _enum_
115
+
116
+ Boost parameter for the transcription.
117
+ Allowed values: `'low'`, `'default'`, `'high'`.
118
+ Optional.
119
+
120
+ - **contentSafety** _boolean_
121
+
122
+ Whether to enable content safety filtering.
123
+ Optional.
124
+
125
+ - **contentSafetyConfidence** _number_
126
+
127
+ Confidence threshold for content safety filtering (25-100).
128
+ Optional.
129
+
130
+ - **customSpelling** _array of objects_
131
+
132
+ Custom spelling rules for the transcription.
133
+ Each object has `from` (array of strings) and `to` (string) properties.
134
+ Optional.
135
+
136
+ - **disfluencies** _boolean_
137
+
138
+ Whether to include disfluencies (um, uh, etc.) in the transcription.
139
+ Optional.
140
+
141
+ - **entityDetection** _boolean_
142
+
143
+ Whether to detect entities in the transcription.
144
+ Optional.
145
+
146
+ - **filterProfanity** _boolean_
147
+
148
+ Whether to filter profanity in the transcription.
149
+ Optional.
150
+
151
+ - **formatText** _boolean_
152
+
153
+ Whether to format the text in the transcription.
154
+ Optional.
155
+
156
+ - **iabCategories** _boolean_
157
+
158
+ Whether to include IAB categories in the transcription.
159
+ Optional.
160
+
161
+ - **languageCode** _string_
162
+
163
+ Language code for the audio.
164
+ Supports numerous ISO-639-1 and ISO-639-3 language codes.
165
+ Optional.
166
+
167
+ - **languageConfidenceThreshold** _number_
168
+
169
+ Confidence threshold for language detection.
170
+ Optional.
171
+
172
+ - **languageDetection** _boolean_
173
+
174
+ Whether to enable language detection.
175
+ Optional.
176
+
177
+ - **multichannel** _boolean_
178
+
179
+ Whether to process multiple audio channels separately.
180
+ Optional.
181
+
182
+ - **punctuate** _boolean_
183
+
184
+ Whether to add punctuation to the transcription.
185
+ Optional.
186
+
187
+ - **redactPii** _boolean_
188
+
189
+ Whether to redact personally identifiable information.
190
+ Optional.
191
+
192
+ - **redactPiiAudio** _boolean_
193
+
194
+ Whether to redact PII in the audio file.
195
+ Optional.
196
+
197
+ - **redactPiiAudioQuality** _enum_
198
+
199
+ Quality of the redacted audio file.
200
+ Allowed values: `'mp3'`, `'wav'`.
201
+ Optional.
202
+
203
+ - **redactPiiPolicies** _array of enums_
204
+
205
+ Policies for PII redaction, specifying which types of information to redact.
206
+ Supports numerous types like `'person_name'`, `'phone_number'`, etc.
207
+ Optional.
208
+
209
+ - **redactPiiSub** _enum_
210
+
211
+ Substitution method for redacted PII.
212
+ Allowed values: `'entity_name'`, `'hash'`.
213
+ Optional.
214
+
215
+ - **sentimentAnalysis** _boolean_
216
+
217
+ Whether to perform sentiment analysis on the transcription.
218
+ Optional.
219
+
220
+ - **speakerLabels** _boolean_
221
+
222
+ Whether to label different speakers in the transcription.
223
+ Optional.
224
+
225
+ - **speakersExpected** _number_
226
+
227
+ Expected number of speakers in the audio.
228
+ Optional.
229
+
230
+ - **speechThreshold** _number_
231
+
232
+ Threshold for speech detection (0-1).
233
+ Optional.
234
+
235
+ - **summarization** _boolean_
236
+
237
+ Whether to generate a summary of the transcription.
238
+ Optional.
239
+
240
+ - **summaryModel** _enum_
241
+
242
+ Model to use for summarization.
243
+ Allowed values: `'informative'`, `'conversational'`, `'catchy'`.
244
+ Optional.
245
+
246
+ - **summaryType** _enum_
247
+
248
+ Type of summary to generate.
249
+ Allowed values: `'bullets'`, `'bullets_verbose'`, `'gist'`, `'headline'`, `'paragraph'`.
250
+ Optional.
251
+
252
+ - **topics** _array of strings_
253
+
254
+ List of topics to detect in the transcription.
255
+ Optional.
256
+
257
+ - **webhookAuthHeaderName** _string_
258
+
259
+ Name of the authentication header for webhook requests.
260
+ Optional.
261
+
262
+ - **webhookAuthHeaderValue** _string_
263
+
264
+ Value of the authentication header for webhook requests.
265
+ Optional.
266
+
267
+ - **webhookUrl** _string_
268
+
269
+ URL to send webhook notifications to.
270
+ Optional.
271
+
272
+ - **wordBoost** _array of strings_
273
+
274
+ List of words to boost in the transcription.
275
+ Optional.
276
+
277
+ ### Model Capabilities
278
+
279
+ | Model | Transcription | Duration | Segments | Language |
280
+ | ------ | ------------------- | ------------------- | ------------------- | ------------------- |
281
+ | `best` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
282
+ | `nano` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ai-sdk/assemblyai",
3
- "version": "0.0.0-1c33ba03-20260114162300",
3
+ "version": "0.0.0-4115c213-20260122152721",
4
4
  "license": "Apache-2.0",
5
5
  "sideEffects": false,
6
6
  "main": "./dist/index.js",
@@ -8,9 +8,18 @@
8
8
  "types": "./dist/index.d.ts",
9
9
  "files": [
10
10
  "dist/**/*",
11
+ "docs/**/*",
12
+ "src",
13
+ "!src/**/*.test.ts",
14
+ "!src/**/*.test-d.ts",
15
+ "!src/**/__snapshots__",
16
+ "!src/**/__fixtures__",
11
17
  "CHANGELOG.md",
12
18
  "README.md"
13
19
  ],
20
+ "directories": {
21
+ "doc": "./docs"
22
+ },
14
23
  "exports": {
15
24
  "./package.json": "./package.json",
16
25
  ".": {
@@ -20,15 +29,15 @@
20
29
  }
21
30
  },
22
31
  "dependencies": {
23
- "@ai-sdk/provider": "3.0.3",
24
- "@ai-sdk/provider-utils": "0.0.0-1c33ba03-20260114162300"
32
+ "@ai-sdk/provider": "0.0.0-4115c213-20260122152721",
33
+ "@ai-sdk/provider-utils": "0.0.0-4115c213-20260122152721"
25
34
  },
26
35
  "devDependencies": {
27
36
  "@types/node": "20.17.24",
28
37
  "tsup": "^8",
29
38
  "typescript": "5.6.3",
30
39
  "zod": "3.25.76",
31
- "@ai-sdk/test-server": "1.0.1",
40
+ "@ai-sdk/test-server": "0.0.0-4115c213-20260122152721",
32
41
  "@vercel/ai-tsconfig": "0.0.0"
33
42
  },
34
43
  "peerDependencies": {
@@ -54,7 +63,7 @@
54
63
  "scripts": {
55
64
  "build": "pnpm clean && tsup --tsconfig tsconfig.build.json",
56
65
  "build:watch": "pnpm clean && tsup --watch --tsconfig tsconfig.build.json",
57
- "clean": "del-cli dist *.tsbuildinfo",
66
+ "clean": "del-cli dist docs *.tsbuildinfo",
58
67
  "lint": "eslint \"./**/*.ts*\"",
59
68
  "type-check": "tsc --build",
60
69
  "prettier-check": "prettier --check \"./**/*.ts*\"",
@@ -0,0 +1,362 @@
1
+ export type AssemblyAITranscriptionAPITypes = {
2
+ /**
3
+ * The URL of the audio or video file to transcribe.
4
+ */
5
+ audio_url: string;
6
+
7
+ /**
8
+ * The point in time, in milliseconds, to stop transcribing in your media file
9
+ */
10
+ audio_end_at?: number;
11
+
12
+ /**
13
+ * The point in time, in milliseconds, to begin transcribing in your media file
14
+ */
15
+ audio_start_from?: number;
16
+
17
+ /**
18
+ * Enable Auto Chapters, can be true or false
19
+ * @default false
20
+ */
21
+ auto_chapters?: boolean;
22
+
23
+ /**
24
+ * Enable Key Phrases, either true or false
25
+ * @default false
26
+ */
27
+ auto_highlights?: boolean;
28
+
29
+ /**
30
+ * How much to boost specified words
31
+ */
32
+ boost_param?: 'low' | 'default' | 'high';
33
+
34
+ /**
35
+ * Enable Content Moderation, can be true or false
36
+ * @default false
37
+ */
38
+ content_safety?: boolean;
39
+
40
+ /**
41
+ * The confidence threshold for the Content Moderation model. Values must be between 25 and 100.
42
+ * @default 50
43
+ */
44
+ content_safety_confidence?: number;
45
+
46
+ /**
47
+ * Customize how words are spelled and formatted using to and from values
48
+ */
49
+ custom_spelling?: Array<{
50
+ /**
51
+ * Words or phrases to replace
52
+ */
53
+ from: string[];
54
+ /**
55
+ * Word to replace with
56
+ */
57
+ to: string;
58
+ }>;
59
+
60
+ /**
61
+ * Transcribe Filler Words, like "umm", in your media file; can be true or false
62
+ * @default false
63
+ */
64
+ disfluencies?: boolean;
65
+
66
+ /**
67
+ * Enable Entity Detection, can be true or false
68
+ * @default false
69
+ */
70
+ entity_detection?: boolean;
71
+
72
+ /**
73
+ * Filter profanity from the transcribed text, can be true or false
74
+ * @default false
75
+ */
76
+ filter_profanity?: boolean;
77
+
78
+ /**
79
+ * Enable Text Formatting, can be true or false
80
+ * @default true
81
+ */
82
+ format_text?: boolean;
83
+
84
+ /**
85
+ * Enable Topic Detection, can be true or false
86
+ * @default false
87
+ */
88
+ iab_categories?: boolean;
89
+
90
+ /**
91
+ * The language of your audio file. Possible values are found in Supported Languages.
92
+ * @default 'en_us'
93
+ */
94
+ language_code?:
95
+ | 'en'
96
+ | 'en_au'
97
+ | 'en_uk'
98
+ | 'en_us'
99
+ | 'es'
100
+ | 'fr'
101
+ | 'de'
102
+ | 'it'
103
+ | 'pt'
104
+ | 'nl'
105
+ | 'af'
106
+ | 'sq'
107
+ | 'am'
108
+ | 'ar'
109
+ | 'hy'
110
+ | 'as'
111
+ | 'az'
112
+ | 'ba'
113
+ | 'eu'
114
+ | 'be'
115
+ | 'bn'
116
+ | 'bs'
117
+ | 'br'
118
+ | 'bg'
119
+ | 'my'
120
+ | 'ca'
121
+ | 'zh'
122
+ | 'hr'
123
+ | 'cs'
124
+ | 'da'
125
+ | 'et'
126
+ | 'fo'
127
+ | 'fi'
128
+ | 'gl'
129
+ | 'ka'
130
+ | 'el'
131
+ | 'gu'
132
+ | 'ht'
133
+ | 'ha'
134
+ | 'haw'
135
+ | 'he'
136
+ | 'hi'
137
+ | 'hu'
138
+ | 'is'
139
+ | 'id'
140
+ | 'ja'
141
+ | 'jw'
142
+ | 'kn'
143
+ | 'kk'
144
+ | 'km'
145
+ | 'ko'
146
+ | 'lo'
147
+ | 'la'
148
+ | 'lv'
149
+ | 'ln'
150
+ | 'lt'
151
+ | 'lb'
152
+ | 'mk'
153
+ | 'mg'
154
+ | 'ms'
155
+ | 'ml'
156
+ | 'mt'
157
+ | 'mi'
158
+ | 'mr'
159
+ | 'mn'
160
+ | 'ne'
161
+ | 'no'
162
+ | 'nn'
163
+ | 'oc'
164
+ | 'pa'
165
+ | 'ps'
166
+ | 'fa'
167
+ | 'pl'
168
+ | 'ro'
169
+ | 'ru'
170
+ | 'sa'
171
+ | 'sr'
172
+ | 'sn'
173
+ | 'sd'
174
+ | 'si'
175
+ | 'sk'
176
+ | 'sl'
177
+ | 'so'
178
+ | 'su'
179
+ | 'sw'
180
+ | 'sv'
181
+ | 'tl'
182
+ | 'tg'
183
+ | 'ta'
184
+ | 'tt'
185
+ | 'te'
186
+ | 'th'
187
+ | 'bo'
188
+ | 'tr'
189
+ | 'tk'
190
+ | 'uk'
191
+ | 'ur'
192
+ | 'uz'
193
+ | 'vi'
194
+ | 'cy'
195
+ | 'yi'
196
+ | 'yo';
197
+
198
+ /**
199
+ * The confidence threshold for the automatically detected language. An error will be returned if the language confidence is below this threshold.
200
+ * @default 0
201
+ */
202
+ language_confidence_threshold?: number;
203
+
204
+ /**
205
+ * Enable Automatic language detection, either true or false.
206
+ * @default false
207
+ */
208
+ language_detection?: boolean;
209
+
210
+ /**
211
+ * Enable Multichannel transcription, can be true or false.
212
+ * @default false
213
+ */
214
+ multichannel?: boolean;
215
+
216
+ /**
217
+ * Enable Automatic Punctuation, can be true or false
218
+ * @default true
219
+ */
220
+ punctuate?: boolean;
221
+
222
+ /**
223
+ * Redact PII from the transcribed text using the Redact PII model, can be true or false
224
+ * @default false
225
+ */
226
+ redact_pii?: boolean;
227
+
228
+ /**
229
+ * Generate a copy of the original media file with spoken PII "beeped" out, can be true or false.
230
+ * @default false
231
+ */
232
+ redact_pii_audio?: boolean;
233
+
234
+ /**
235
+ * Controls the filetype of the audio created by redact_pii_audio. Currently supports mp3 (default) and wav.
236
+ */
237
+ redact_pii_audio_quality?: 'mp3' | 'wav';
238
+
239
+ /**
240
+ * The list of PII Redaction policies to enable.
241
+ */
242
+ redact_pii_policies?: Array<
243
+ | 'account_number'
244
+ | 'banking_information'
245
+ | 'blood_type'
246
+ | 'credit_card_cvv'
247
+ | 'credit_card_expiration'
248
+ | 'credit_card_number'
249
+ | 'date'
250
+ | 'date_interval'
251
+ | 'date_of_birth'
252
+ | 'drivers_license'
253
+ | 'drug'
254
+ | 'duration'
255
+ | 'email_address'
256
+ | 'event'
257
+ | 'filename'
258
+ | 'gender_sexuality'
259
+ | 'healthcare_number'
260
+ | 'injury'
261
+ | 'ip_address'
262
+ | 'language'
263
+ | 'location'
264
+ | 'marital_status'
265
+ | 'medical_condition'
266
+ | 'medical_process'
267
+ | 'money_amount'
268
+ | 'nationality'
269
+ | 'number_sequence'
270
+ | 'occupation'
271
+ | 'organization'
272
+ | 'passport_number'
273
+ | 'password'
274
+ | 'person_age'
275
+ | 'person_name'
276
+ | 'phone_number'
277
+ | 'physical_attribute'
278
+ | 'political_affiliation'
279
+ | 'religion'
280
+ | 'statistics'
281
+ | 'time'
282
+ | 'url'
283
+ | 'us_social_security_number'
284
+ | 'username'
285
+ | 'vehicle_id'
286
+ | 'zodiac_sign'
287
+ >;
288
+
289
+ /**
290
+ * The replacement logic for detected PII, can be "entity_name" or "hash".
291
+ */
292
+ redact_pii_sub?: 'entity_name' | 'hash';
293
+
294
+ /**
295
+ * Enable Sentiment Analysis, can be true or false
296
+ * @default false
297
+ */
298
+ sentiment_analysis?: boolean;
299
+
300
+ /**
301
+ * Enable Speaker diarization, can be true or false
302
+ * @default false
303
+ */
304
+ speaker_labels?: boolean;
305
+
306
+ /**
307
+ * Tells the speaker label model how many speakers it should attempt to identify, up to 10.
308
+ */
309
+ speakers_expected?: number;
310
+
311
+ /**
312
+ * The speech model to use for the transcription.
313
+ */
314
+ speech_model?: 'best' | 'nano';
315
+
316
+ /**
317
+ * Reject audio files that contain less than this fraction of speech. Valid values are in the range [0, 1] inclusive.
318
+ */
319
+ speech_threshold?: number;
320
+
321
+ /**
322
+ * Enable Summarization, can be true or false
323
+ * @default false
324
+ */
325
+ summarization?: boolean;
326
+
327
+ /**
328
+ * The model to summarize the transcript
329
+ */
330
+ summary_model?: 'informative' | 'conversational' | 'catchy';
331
+
332
+ /**
333
+ * The type of summary
334
+ */
335
+ summary_type?:
336
+ | 'bullets'
337
+ | 'bullets_verbose'
338
+ | 'gist'
339
+ | 'headline'
340
+ | 'paragraph';
341
+
342
+ /**
343
+ * The header name to be sent with the transcript completed or failed webhook requests
344
+ */
345
+ webhook_auth_header_name?: string;
346
+
347
+ /**
348
+ * The header value to send back with the transcript completed or failed webhook requests for added security
349
+ */
350
+ webhook_auth_header_value?: string;
351
+
352
+ /**
353
+ * The URL to which we send webhook requests. We sends two different types of webhook requests.
354
+ * One request when a transcript is completed or failed, and one request when the redacted audio is ready if redact_pii_audio is enabled.
355
+ */
356
+ webhook_url?: string;
357
+
358
+ /**
359
+ * The list of custom vocabulary to boost transcription probability for
360
+ */
361
+ word_boost?: string[];
362
+ };
@@ -0,0 +1,9 @@
1
+ import { FetchFunction } from '@ai-sdk/provider-utils';
2
+
3
+ export type AssemblyAIConfig = {
4
+ provider: string;
5
+ url: (options: { modelId: string; path: string }) => string;
6
+ headers: () => Record<string, string | undefined>;
7
+ fetch?: FetchFunction;
8
+ generateId?: () => string;
9
+ };
@@ -0,0 +1,16 @@
1
+ import { z } from 'zod/v4';
2
+ import { createJsonErrorResponseHandler } from '@ai-sdk/provider-utils';
3
+
4
+ export const assemblyaiErrorDataSchema = z.object({
5
+ error: z.object({
6
+ message: z.string(),
7
+ code: z.number(),
8
+ }),
9
+ });
10
+
11
+ export type AssemblyAIErrorData = z.infer<typeof assemblyaiErrorDataSchema>;
12
+
13
+ export const assemblyaiFailedResponseHandler = createJsonErrorResponseHandler({
14
+ errorSchema: assemblyaiErrorDataSchema,
15
+ errorToMessage: data => data.error.message,
16
+ });
@@ -0,0 +1,112 @@
1
+ import {
2
+ TranscriptionModelV3,
3
+ ProviderV3,
4
+ NoSuchModelError,
5
+ } from '@ai-sdk/provider';
6
+ import {
7
+ FetchFunction,
8
+ loadApiKey,
9
+ withUserAgentSuffix,
10
+ } from '@ai-sdk/provider-utils';
11
+ import { AssemblyAITranscriptionModel } from './assemblyai-transcription-model';
12
+ import { AssemblyAITranscriptionModelId } from './assemblyai-transcription-settings';
13
+ import { VERSION } from './version';
14
+
15
+ export interface AssemblyAIProvider extends ProviderV3 {
16
+ (
17
+ modelId: 'best',
18
+ settings?: {},
19
+ ): {
20
+ transcription: AssemblyAITranscriptionModel;
21
+ };
22
+
23
+ /**
24
+ Creates a model for transcription.
25
+ */
26
+ transcription(modelId: AssemblyAITranscriptionModelId): TranscriptionModelV3;
27
+
28
+ /**
29
+ * @deprecated Use `embeddingModel` instead.
30
+ */
31
+ textEmbeddingModel(modelId: string): never;
32
+ }
33
+
34
+ export interface AssemblyAIProviderSettings {
35
+ /**
36
+ API key for authenticating requests.
37
+ */
38
+ apiKey?: string;
39
+
40
+ /**
41
+ Custom headers to include in the requests.
42
+ */
43
+ headers?: Record<string, string>;
44
+
45
+ /**
46
+ Custom fetch implementation. You can use it as a middleware to intercept requests,
47
+ or to provide a custom fetch implementation for e.g. testing.
48
+ */
49
+ fetch?: FetchFunction;
50
+ }
51
+
52
+ /**
53
+ Create an AssemblyAI provider instance.
54
+ */
55
+ export function createAssemblyAI(
56
+ options: AssemblyAIProviderSettings = {},
57
+ ): AssemblyAIProvider {
58
+ const getHeaders = () =>
59
+ withUserAgentSuffix(
60
+ {
61
+ authorization: loadApiKey({
62
+ apiKey: options.apiKey,
63
+ environmentVariableName: 'ASSEMBLYAI_API_KEY',
64
+ description: 'AssemblyAI',
65
+ }),
66
+ ...options.headers,
67
+ },
68
+ `ai-sdk/assemblyai/${VERSION}`,
69
+ );
70
+
71
+ const createTranscriptionModel = (modelId: AssemblyAITranscriptionModelId) =>
72
+ new AssemblyAITranscriptionModel(modelId, {
73
+ provider: `assemblyai.transcription`,
74
+ url: ({ path }) => `https://api.assemblyai.com${path}`,
75
+ headers: getHeaders,
76
+ fetch: options.fetch,
77
+ });
78
+
79
+ const provider = function (modelId: AssemblyAITranscriptionModelId) {
80
+ return {
81
+ transcription: createTranscriptionModel(modelId),
82
+ };
83
+ };
84
+
85
+ provider.specificationVersion = 'v3' as const;
86
+ provider.transcription = createTranscriptionModel;
87
+ provider.transcriptionModel = createTranscriptionModel;
88
+
89
+ provider.languageModel = () => {
90
+ throw new NoSuchModelError({
91
+ modelId: 'unknown',
92
+ modelType: 'languageModel',
93
+ message: 'AssemblyAI does not provide language models',
94
+ });
95
+ };
96
+
97
+ provider.embeddingModel = (modelId: string) => {
98
+ throw new NoSuchModelError({ modelId, modelType: 'embeddingModel' });
99
+ };
100
+ provider.textEmbeddingModel = provider.embeddingModel;
101
+
102
+ provider.imageModel = (modelId: string) => {
103
+ throw new NoSuchModelError({ modelId, modelType: 'imageModel' });
104
+ };
105
+
106
+ return provider as AssemblyAIProvider;
107
+ }
108
+
109
+ /**
110
+ Default AssemblyAI provider instance.
111
+ */
112
+ export const assemblyai = createAssemblyAI();
@@ -0,0 +1,430 @@
1
+ import { TranscriptionModelV3, SharedV3Warning } from '@ai-sdk/provider';
2
+ import {
3
+ combineHeaders,
4
+ createJsonResponseHandler,
5
+ extractResponseHeaders,
6
+ parseProviderOptions,
7
+ postJsonToApi,
8
+ postToApi,
9
+ } from '@ai-sdk/provider-utils';
10
+ import { z } from 'zod/v4';
11
+ import { AssemblyAIConfig } from './assemblyai-config';
12
+ import { assemblyaiFailedResponseHandler } from './assemblyai-error';
13
+ import { AssemblyAITranscriptionModelId } from './assemblyai-transcription-settings';
14
+ import { AssemblyAITranscriptionAPITypes } from './assemblyai-api-types';
15
+
16
+ // https://www.assemblyai.com/docs/api-reference/transcripts/submit
17
+ const assemblyaiProviderOptionsSchema = z.object({
18
+ /**
19
+ * End time of the audio in milliseconds.
20
+ */
21
+ audioEndAt: z.number().int().nullish(),
22
+ /**
23
+ * Start time of the audio in milliseconds.
24
+ */
25
+ audioStartFrom: z.number().int().nullish(),
26
+ /**
27
+ * Whether to automatically generate chapters for the transcription.
28
+ */
29
+ autoChapters: z.boolean().nullish(),
30
+ /**
31
+ * Whether to automatically generate highlights for the transcription.
32
+ */
33
+ autoHighlights: z.boolean().nullish(),
34
+ /**
35
+ * Boost parameter for the transcription.
36
+ * Allowed values: 'low', 'default', 'high'.
37
+ */
38
+ boostParam: z.string().nullish(),
39
+ /**
40
+ * Whether to enable content safety filtering.
41
+ */
42
+ contentSafety: z.boolean().nullish(),
43
+ /**
44
+ * Confidence threshold for content safety filtering (25-100).
45
+ */
46
+ contentSafetyConfidence: z.number().int().min(25).max(100).nullish(),
47
+ /**
48
+ * Custom spelling rules for the transcription.
49
+ */
50
+ customSpelling: z
51
+ .array(
52
+ z.object({
53
+ from: z.array(z.string()),
54
+ to: z.string(),
55
+ }),
56
+ )
57
+ .nullish(),
58
+ /**
59
+ * Whether to include filler words (um, uh, etc.) in the transcription.
60
+ */
61
+ disfluencies: z.boolean().nullish(),
62
+ /**
63
+ * Whether to enable entity detection.
64
+ */
65
+ entityDetection: z.boolean().nullish(),
66
+ /**
67
+ * Whether to filter profanity from the transcription.
68
+ */
69
+ filterProfanity: z.boolean().nullish(),
70
+ /**
71
+ * Whether to format text with punctuation and capitalization.
72
+ */
73
+ formatText: z.boolean().nullish(),
74
+ /**
75
+ * Whether to enable IAB categories detection.
76
+ */
77
+ iabCategories: z.boolean().nullish(),
78
+ /**
79
+ * Language code for the transcription.
80
+ */
81
+ languageCode: z.union([z.literal('en'), z.string()]).nullish(),
82
+ /**
83
+ * Confidence threshold for language detection.
84
+ */
85
+ languageConfidenceThreshold: z.number().nullish(),
86
+ /**
87
+ * Whether to enable language detection.
88
+ */
89
+ languageDetection: z.boolean().nullish(),
90
+ /**
91
+ * Whether to process audio as multichannel.
92
+ */
93
+ multichannel: z.boolean().nullish(),
94
+ /**
95
+ * Whether to add punctuation to the transcription.
96
+ */
97
+ punctuate: z.boolean().nullish(),
98
+ /**
99
+ * Whether to redact personally identifiable information (PII).
100
+ */
101
+ redactPii: z.boolean().nullish(),
102
+ /**
103
+ * Whether to redact PII in the audio file.
104
+ */
105
+ redactPiiAudio: z.boolean().nullish(),
106
+ /**
107
+ * Audio format for PII redaction.
108
+ */
109
+ redactPiiAudioQuality: z.string().nullish(),
110
+ /**
111
+ * List of PII types to redact.
112
+ */
113
+ redactPiiPolicies: z.array(z.string()).nullish(),
114
+ /**
115
+ * Substitution method for redacted PII.
116
+ */
117
+ redactPiiSub: z.string().nullish(),
118
+ /**
119
+ * Whether to enable sentiment analysis.
120
+ */
121
+ sentimentAnalysis: z.boolean().nullish(),
122
+ /**
123
+ * Whether to identify different speakers in the audio.
124
+ */
125
+ speakerLabels: z.boolean().nullish(),
126
+ /**
127
+ * Number of speakers expected in the audio.
128
+ */
129
+ speakersExpected: z.number().int().nullish(),
130
+ /**
131
+ * Threshold for speech detection (0-1).
132
+ */
133
+ speechThreshold: z.number().min(0).max(1).nullish(),
134
+ /**
135
+ * Whether to generate a summary of the transcription.
136
+ */
137
+ summarization: z.boolean().nullish(),
138
+ /**
139
+ * Model to use for summarization.
140
+ */
141
+ summaryModel: z.string().nullish(),
142
+ /**
143
+ * Type of summary to generate.
144
+ */
145
+ summaryType: z.string().nullish(),
146
+ /**
147
+ * Name of the authentication header for webhook requests.
148
+ */
149
+ webhookAuthHeaderName: z.string().nullish(),
150
+ /**
151
+ * Value of the authentication header for webhook requests.
152
+ */
153
+ webhookAuthHeaderValue: z.string().nullish(),
154
+ /**
155
+ * URL to send webhook notifications to.
156
+ */
157
+ webhookUrl: z.string().nullish(),
158
+ /**
159
+ * List of words to boost recognition for.
160
+ */
161
+ wordBoost: z.array(z.string()).nullish(),
162
+ });
163
+
164
+ export type AssemblyAITranscriptionCallOptions = z.infer<
165
+ typeof assemblyaiProviderOptionsSchema
166
+ >;
167
+
168
+ interface AssemblyAITranscriptionModelConfig extends AssemblyAIConfig {
169
+ _internal?: {
170
+ currentDate?: () => Date;
171
+ };
172
+ /**
173
+ * The polling interval for checking transcript status in milliseconds.
174
+ */
175
+ pollingInterval?: number;
176
+ }
177
+
178
+ export class AssemblyAITranscriptionModel implements TranscriptionModelV3 {
179
+ readonly specificationVersion = 'v3';
180
+ private readonly POLLING_INTERVAL_MS = 3000;
181
+
182
+ get provider(): string {
183
+ return this.config.provider;
184
+ }
185
+
186
+ constructor(
187
+ readonly modelId: AssemblyAITranscriptionModelId,
188
+ private readonly config: AssemblyAITranscriptionModelConfig,
189
+ ) {}
190
+
191
+ private async getArgs({
192
+ providerOptions,
193
+ }: Parameters<TranscriptionModelV3['doGenerate']>[0]) {
194
+ const warnings: SharedV3Warning[] = [];
195
+
196
+ // Parse provider options
197
+ const assemblyaiOptions = await parseProviderOptions({
198
+ provider: 'assemblyai',
199
+ providerOptions,
200
+ schema: assemblyaiProviderOptionsSchema,
201
+ });
202
+
203
+ const body: Omit<AssemblyAITranscriptionAPITypes, 'audio_url'> = {
204
+ speech_model: this.modelId,
205
+ };
206
+
207
+ // Add provider-specific options
208
+ if (assemblyaiOptions) {
209
+ body.audio_end_at = assemblyaiOptions.audioEndAt ?? undefined;
210
+ body.audio_start_from = assemblyaiOptions.audioStartFrom ?? undefined;
211
+ body.auto_chapters = assemblyaiOptions.autoChapters ?? undefined;
212
+ body.auto_highlights = assemblyaiOptions.autoHighlights ?? undefined;
213
+ body.boost_param = (assemblyaiOptions.boostParam as never) ?? undefined;
214
+ body.content_safety = assemblyaiOptions.contentSafety ?? undefined;
215
+ body.content_safety_confidence =
216
+ assemblyaiOptions.contentSafetyConfidence ?? undefined;
217
+ body.custom_spelling =
218
+ (assemblyaiOptions.customSpelling as never) ?? undefined;
219
+ body.disfluencies = assemblyaiOptions.disfluencies ?? undefined;
220
+ body.entity_detection = assemblyaiOptions.entityDetection ?? undefined;
221
+ body.filter_profanity = assemblyaiOptions.filterProfanity ?? undefined;
222
+ body.format_text = assemblyaiOptions.formatText ?? undefined;
223
+ body.iab_categories = assemblyaiOptions.iabCategories ?? undefined;
224
+ body.language_code =
225
+ (assemblyaiOptions.languageCode as never) ?? undefined;
226
+ body.language_confidence_threshold =
227
+ assemblyaiOptions.languageConfidenceThreshold ?? undefined;
228
+ body.language_detection =
229
+ assemblyaiOptions.languageDetection ?? undefined;
230
+ body.multichannel = assemblyaiOptions.multichannel ?? undefined;
231
+ body.punctuate = assemblyaiOptions.punctuate ?? undefined;
232
+ body.redact_pii = assemblyaiOptions.redactPii ?? undefined;
233
+ body.redact_pii_audio = assemblyaiOptions.redactPiiAudio ?? undefined;
234
+ body.redact_pii_audio_quality =
235
+ (assemblyaiOptions.redactPiiAudioQuality as never) ?? undefined;
236
+ body.redact_pii_policies =
237
+ (assemblyaiOptions.redactPiiPolicies as never) ?? undefined;
238
+ body.redact_pii_sub =
239
+ (assemblyaiOptions.redactPiiSub as never) ?? undefined;
240
+ body.sentiment_analysis =
241
+ assemblyaiOptions.sentimentAnalysis ?? undefined;
242
+ body.speaker_labels = assemblyaiOptions.speakerLabels ?? undefined;
243
+ body.speakers_expected = assemblyaiOptions.speakersExpected ?? undefined;
244
+ body.speech_threshold = assemblyaiOptions.speechThreshold ?? undefined;
245
+ body.summarization = assemblyaiOptions.summarization ?? undefined;
246
+ body.summary_model =
247
+ (assemblyaiOptions.summaryModel as never) ?? undefined;
248
+ body.summary_type = (assemblyaiOptions.summaryType as never) ?? undefined;
249
+ body.webhook_auth_header_name =
250
+ assemblyaiOptions.webhookAuthHeaderName ?? undefined;
251
+ body.webhook_auth_header_value =
252
+ assemblyaiOptions.webhookAuthHeaderValue ?? undefined;
253
+ body.webhook_url = assemblyaiOptions.webhookUrl ?? undefined;
254
+ body.word_boost = assemblyaiOptions.wordBoost ?? undefined;
255
+ }
256
+
257
+ return {
258
+ body,
259
+ warnings,
260
+ };
261
+ }
262
+
263
+ /**
264
+ * Polls the given transcript until we have a status other than `processing` or `queued`.
265
+ *
266
+ * @see https://www.assemblyai.com/docs/getting-started/transcribe-an-audio-file#step-33
267
+ */
268
+ private async waitForCompletion(
269
+ transcriptId: string,
270
+ headers: Record<string, string | undefined> | undefined,
271
+ abortSignal?: AbortSignal,
272
+ ): Promise<{
273
+ transcript: z.infer<typeof assemblyaiTranscriptionResponseSchema>;
274
+ responseHeaders: Record<string, string>;
275
+ }> {
276
+ const pollingInterval =
277
+ this.config.pollingInterval ?? this.POLLING_INTERVAL_MS;
278
+
279
+ while (true) {
280
+ if (abortSignal?.aborted) {
281
+ throw new Error('Transcription request was aborted');
282
+ }
283
+
284
+ const response = await fetch(
285
+ this.config.url({
286
+ path: `/v2/transcript/${transcriptId}`,
287
+ modelId: this.modelId,
288
+ }),
289
+ {
290
+ method: 'GET',
291
+ headers: combineHeaders(
292
+ this.config.headers(),
293
+ headers,
294
+ ) as HeadersInit,
295
+ signal: abortSignal,
296
+ },
297
+ );
298
+
299
+ if (!response.ok) {
300
+ throw await assemblyaiFailedResponseHandler({
301
+ response,
302
+ url: this.config.url({
303
+ path: `/v2/transcript/${transcriptId}`,
304
+ modelId: this.modelId,
305
+ }),
306
+ requestBodyValues: {},
307
+ });
308
+ }
309
+
310
+ const transcript = assemblyaiTranscriptionResponseSchema.parse(
311
+ await response.json(),
312
+ );
313
+
314
+ if (transcript.status === 'completed') {
315
+ return {
316
+ transcript,
317
+ responseHeaders: extractResponseHeaders(response),
318
+ };
319
+ }
320
+
321
+ if (transcript.status === 'error') {
322
+ throw new Error(
323
+ `Transcription failed: ${transcript.error ?? 'Unknown error'}`,
324
+ );
325
+ }
326
+
327
+ await new Promise(resolve => setTimeout(resolve, pollingInterval));
328
+ }
329
+ }
330
+
331
+ async doGenerate(
332
+ options: Parameters<TranscriptionModelV3['doGenerate']>[0],
333
+ ): Promise<Awaited<ReturnType<TranscriptionModelV3['doGenerate']>>> {
334
+ const currentDate = this.config._internal?.currentDate?.() ?? new Date();
335
+
336
+ const { value: uploadResponse } = await postToApi({
337
+ url: this.config.url({
338
+ path: '/v2/upload',
339
+ modelId: '',
340
+ }),
341
+ headers: {
342
+ 'Content-Type': 'application/octet-stream',
343
+ ...combineHeaders(this.config.headers(), options.headers),
344
+ },
345
+ body: {
346
+ content: options.audio,
347
+ values: options.audio,
348
+ },
349
+ failedResponseHandler: assemblyaiFailedResponseHandler,
350
+ successfulResponseHandler: createJsonResponseHandler(
351
+ assemblyaiUploadResponseSchema,
352
+ ),
353
+ abortSignal: options.abortSignal,
354
+ fetch: this.config.fetch,
355
+ });
356
+
357
+ const { body, warnings } = await this.getArgs(options);
358
+
359
+ const { value: submitResponse } = await postJsonToApi({
360
+ url: this.config.url({
361
+ path: '/v2/transcript',
362
+ modelId: this.modelId,
363
+ }),
364
+ headers: combineHeaders(this.config.headers(), options.headers),
365
+ body: {
366
+ ...body,
367
+ audio_url: uploadResponse.upload_url,
368
+ },
369
+ failedResponseHandler: assemblyaiFailedResponseHandler,
370
+ successfulResponseHandler: createJsonResponseHandler(
371
+ assemblyaiSubmitResponseSchema,
372
+ ),
373
+ abortSignal: options.abortSignal,
374
+ fetch: this.config.fetch,
375
+ });
376
+
377
+ const { transcript, responseHeaders } = await this.waitForCompletion(
378
+ submitResponse.id,
379
+ options.headers,
380
+ options.abortSignal,
381
+ );
382
+
383
+ return {
384
+ text: transcript.text ?? '',
385
+ segments:
386
+ transcript.words?.map(word => ({
387
+ text: word.text,
388
+ startSecond: word.start,
389
+ endSecond: word.end,
390
+ })) ?? [],
391
+ language: transcript.language_code ?? undefined,
392
+ durationInSeconds:
393
+ transcript.audio_duration ?? transcript.words?.at(-1)?.end ?? undefined,
394
+ warnings,
395
+ response: {
396
+ timestamp: currentDate,
397
+ modelId: this.modelId,
398
+ headers: responseHeaders, // Headers from final GET request
399
+ body: transcript, // Raw response from final GET request
400
+ },
401
+ };
402
+ }
403
+ }
404
+
405
+ const assemblyaiUploadResponseSchema = z.object({
406
+ upload_url: z.string(),
407
+ });
408
+
409
+ const assemblyaiSubmitResponseSchema = z.object({
410
+ id: z.string(),
411
+ status: z.enum(['queued', 'processing', 'completed', 'error']),
412
+ });
413
+
414
+ const assemblyaiTranscriptionResponseSchema = z.object({
415
+ id: z.string(),
416
+ status: z.enum(['queued', 'processing', 'completed', 'error']),
417
+ text: z.string().nullish(),
418
+ language_code: z.string().nullish(),
419
+ words: z
420
+ .array(
421
+ z.object({
422
+ start: z.number(),
423
+ end: z.number(),
424
+ text: z.string(),
425
+ }),
426
+ )
427
+ .nullish(),
428
+ audio_duration: z.number().nullish(),
429
+ error: z.string().nullish(),
430
+ });
@@ -0,0 +1 @@
1
+ export type AssemblyAITranscriptionModelId = 'best' | 'nano';
package/src/index.ts ADDED
@@ -0,0 +1,6 @@
1
+ export { createAssemblyAI, assemblyai } from './assemblyai-provider';
2
+ export type {
3
+ AssemblyAIProvider,
4
+ AssemblyAIProviderSettings,
5
+ } from './assemblyai-provider';
6
+ export { VERSION } from './version';
Binary file
package/src/version.ts ADDED
@@ -0,0 +1,6 @@
1
+ // Version string of this package injected at build time.
2
+ declare const __PACKAGE_VERSION__: string | undefined;
3
+ export const VERSION: string =
4
+ typeof __PACKAGE_VERSION__ !== 'undefined'
5
+ ? __PACKAGE_VERSION__
6
+ : '0.0.0-test';