@ai-sdk/gladia 3.0.0-beta.3 → 3.0.0-beta.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  import {
2
2
  AISDKError,
3
- TranscriptionModelV3,
4
- SharedV3Warning,
3
+ type TranscriptionModelV4,
4
+ type SharedV4Warning,
5
5
  } from '@ai-sdk/provider';
6
6
  import {
7
7
  combineHeaders,
@@ -13,319 +13,15 @@ import {
13
13
  parseProviderOptions,
14
14
  postFormDataToApi,
15
15
  postJsonToApi,
16
+ serializeModelOptions,
17
+ WORKFLOW_SERIALIZE,
18
+ WORKFLOW_DESERIALIZE,
16
19
  } from '@ai-sdk/provider-utils';
17
20
  import { z } from 'zod/v4';
18
- import { GladiaConfig } from './gladia-config';
21
+ import type { GladiaConfig } from './gladia-config';
19
22
  import { gladiaFailedResponseHandler } from './gladia-error';
20
- import { GladiaTranscriptionInitiateAPITypes } from './gladia-api-types';
21
-
22
- // https://docs.gladia.io/api-reference/v2/pre-recorded/init
23
- const gladiaTranscriptionModelOptionsSchema = z.object({
24
- /**
25
- * Optional context prompt to guide the transcription.
26
- */
27
- contextPrompt: z.string().nullish(),
28
-
29
- /**
30
- * Custom vocabulary to improve transcription accuracy.
31
- * Can be a boolean or an array of custom terms.
32
- */
33
- customVocabulary: z.union([z.boolean(), z.array(z.any())]).nullish(),
34
-
35
- /**
36
- * Configuration for custom vocabulary.
37
- */
38
- customVocabularyConfig: z
39
- .object({
40
- /**
41
- * Array of vocabulary terms or objects with pronunciation details.
42
- */
43
- vocabulary: z.array(
44
- z.union([
45
- z.string(),
46
- z.object({
47
- /**
48
- * The vocabulary term.
49
- */
50
- value: z.string(),
51
- /**
52
- * Intensity of the term in recognition (optional).
53
- */
54
- intensity: z.number().nullish(),
55
- /**
56
- * Alternative pronunciations for the term (optional).
57
- */
58
- pronunciations: z.array(z.string()).nullish(),
59
- /**
60
- * Language of the term (optional).
61
- */
62
- language: z.string().nullish(),
63
- }),
64
- ]),
65
- ),
66
- /**
67
- * Default intensity for all vocabulary terms.
68
- */
69
- defaultIntensity: z.number().nullish(),
70
- })
71
- .nullish(),
72
-
73
- /**
74
- * Whether to automatically detect the language of the audio.
75
- */
76
- detectLanguage: z.boolean().nullish(),
77
-
78
- /**
79
- * Whether to enable code switching (multiple languages in the same audio).
80
- */
81
- enableCodeSwitching: z.boolean().nullish(),
82
-
83
- /**
84
- * Configuration for code switching.
85
- */
86
- codeSwitchingConfig: z
87
- .object({
88
- /**
89
- * Languages to consider for code switching.
90
- */
91
- languages: z.array(z.string()).nullish(),
92
- })
93
- .nullish(),
94
-
95
- /**
96
- * Specific language for transcription.
97
- */
98
- language: z.string().nullish(),
99
-
100
- /**
101
- * Whether to enable callback when transcription is complete.
102
- */
103
- callback: z.boolean().nullish(),
104
-
105
- /**
106
- * Configuration for callback.
107
- */
108
- callbackConfig: z
109
- .object({
110
- /**
111
- * URL to send the callback to.
112
- */
113
- url: z.string(),
114
- /**
115
- * HTTP method for the callback.
116
- */
117
- method: z.enum(['POST', 'PUT']).nullish(),
118
- })
119
- .nullish(),
120
-
121
- /**
122
- * Whether to generate subtitles.
123
- */
124
- subtitles: z.boolean().nullish(),
125
-
126
- /**
127
- * Configuration for subtitles generation.
128
- */
129
- subtitlesConfig: z
130
- .object({
131
- /**
132
- * Subtitle file formats to generate.
133
- */
134
- formats: z.array(z.enum(['srt', 'vtt'])).nullish(),
135
- /**
136
- * Minimum duration for subtitle segments.
137
- */
138
- minimumDuration: z.number().nullish(),
139
- /**
140
- * Maximum duration for subtitle segments.
141
- */
142
- maximumDuration: z.number().nullish(),
143
- /**
144
- * Maximum characters per row in subtitles.
145
- */
146
- maximumCharactersPerRow: z.number().nullish(),
147
- /**
148
- * Maximum rows per caption in subtitles.
149
- */
150
- maximumRowsPerCaption: z.number().nullish(),
151
- /**
152
- * Style of subtitles.
153
- */
154
- style: z.enum(['default', 'compliance']).nullish(),
155
- })
156
- .nullish(),
157
-
158
- /**
159
- * Whether to enable speaker diarization (speaker identification).
160
- */
161
- diarization: z.boolean().nullish(),
162
-
163
- /**
164
- * Configuration for diarization.
165
- */
166
- diarizationConfig: z
167
- .object({
168
- /**
169
- * Exact number of speakers to identify.
170
- */
171
- numberOfSpeakers: z.number().nullish(),
172
- /**
173
- * Minimum number of speakers to identify.
174
- */
175
- minSpeakers: z.number().nullish(),
176
- /**
177
- * Maximum number of speakers to identify.
178
- */
179
- maxSpeakers: z.number().nullish(),
180
- /**
181
- * Whether to use enhanced diarization.
182
- */
183
- enhanced: z.boolean().nullish(),
184
- })
185
- .nullish(),
186
-
187
- /**
188
- * Whether to translate the transcription.
189
- */
190
- translation: z.boolean().nullish(),
191
-
192
- /**
193
- * Configuration for translation.
194
- */
195
- translationConfig: z
196
- .object({
197
- /**
198
- * Target languages for translation.
199
- */
200
- targetLanguages: z.array(z.string()),
201
- /**
202
- * Translation model to use.
203
- */
204
- model: z.enum(['base', 'enhanced']).nullish(),
205
- /**
206
- * Whether to match original utterances in translation.
207
- */
208
- matchOriginalUtterances: z.boolean().nullish(),
209
- })
210
- .nullish(),
211
-
212
- /**
213
- * Whether to generate a summary of the transcription.
214
- */
215
- summarization: z.boolean().nullish(),
216
-
217
- /**
218
- * Configuration for summarization.
219
- */
220
- summarizationConfig: z
221
- .object({
222
- /**
223
- * Type of summary to generate.
224
- */
225
- type: z.enum(['general', 'bullet_points', 'concise']).nullish(),
226
- })
227
- .nullish(),
228
-
229
- /**
230
- * Whether to enable content moderation.
231
- */
232
- moderation: z.boolean().nullish(),
233
-
234
- /**
235
- * Whether to enable named entity recognition.
236
- */
237
- namedEntityRecognition: z.boolean().nullish(),
238
-
239
- /**
240
- * Whether to enable automatic chapter creation.
241
- */
242
- chapterization: z.boolean().nullish(),
243
-
244
- /**
245
- * Whether to ensure consistent naming of entities.
246
- */
247
- nameConsistency: z.boolean().nullish(),
248
-
249
- /**
250
- * Whether to enable custom spelling.
251
- */
252
- customSpelling: z.boolean().nullish(),
253
-
254
- /**
255
- * Configuration for custom spelling.
256
- */
257
- customSpellingConfig: z
258
- .object({
259
- /**
260
- * Dictionary of custom spellings.
261
- */
262
- spellingDictionary: z.record(z.string(), z.array(z.string())),
263
- })
264
- .nullish(),
265
-
266
- /**
267
- * Whether to extract structured data from the transcription.
268
- */
269
- structuredDataExtraction: z.boolean().nullish(),
270
-
271
- /**
272
- * Configuration for structured data extraction.
273
- */
274
- structuredDataExtractionConfig: z
275
- .object({
276
- /**
277
- * Classes of data to extract.
278
- */
279
- classes: z.array(z.string()),
280
- })
281
- .nullish(),
282
-
283
- /**
284
- * Whether to perform sentiment analysis on the transcription.
285
- */
286
- sentimentAnalysis: z.boolean().nullish(),
287
-
288
- /**
289
- * Whether to send audio to a language model for processing.
290
- */
291
- audioToLlm: z.boolean().nullish(),
292
-
293
- /**
294
- * Configuration for audio to language model processing.
295
- */
296
- audioToLlmConfig: z
297
- .object({
298
- /**
299
- * Prompts to send to the language model.
300
- */
301
- prompts: z.array(z.string()),
302
- })
303
- .nullish(),
304
-
305
- /**
306
- * Custom metadata to include with the transcription.
307
- */
308
- customMetadata: z.record(z.string(), z.any()).nullish(),
309
-
310
- /**
311
- * Whether to include sentence-level segmentation.
312
- */
313
- sentences: z.boolean().nullish(),
314
-
315
- /**
316
- * Whether to enable display mode.
317
- */
318
- displayMode: z.boolean().nullish(),
319
-
320
- /**
321
- * Whether to enhance punctuation in the transcription.
322
- */
323
- punctuationEnhanced: z.boolean().nullish(),
324
- });
325
-
326
- export type GladiaTranscriptionModelOptions = z.infer<
327
- typeof gladiaTranscriptionModelOptionsSchema
328
- >;
23
+ import { gladiaTranscriptionModelOptionsSchema } from './gladia-transcription-model-options';
24
+ import type { GladiaTranscriptionInitiateAPITypes } from './gladia-api-types';
329
25
 
330
26
  interface GladiaTranscriptionModelConfig extends GladiaConfig {
331
27
  _internal?: {
@@ -333,22 +29,36 @@ interface GladiaTranscriptionModelConfig extends GladiaConfig {
333
29
  };
334
30
  }
335
31
 
336
- export class GladiaTranscriptionModel implements TranscriptionModelV3 {
337
- readonly specificationVersion = 'v3';
32
+ export class GladiaTranscriptionModel implements TranscriptionModelV4 {
33
+ readonly specificationVersion = 'v4';
338
34
 
339
35
  get provider(): string {
340
36
  return this.config.provider;
341
37
  }
342
38
 
39
+ static [WORKFLOW_SERIALIZE](model: GladiaTranscriptionModel) {
40
+ return serializeModelOptions({
41
+ modelId: model.modelId,
42
+ config: model.config,
43
+ });
44
+ }
45
+
46
+ static [WORKFLOW_DESERIALIZE](options: {
47
+ modelId: 'default';
48
+ config: GladiaTranscriptionModelConfig;
49
+ }) {
50
+ return new GladiaTranscriptionModel(options.modelId, options.config);
51
+ }
52
+
343
53
  constructor(
344
- readonly modelId: 'default',
54
+ readonly modelId: string,
345
55
  private readonly config: GladiaTranscriptionModelConfig,
346
56
  ) {}
347
57
 
348
58
  private async getArgs({
349
59
  providerOptions,
350
- }: Parameters<TranscriptionModelV3['doGenerate']>[0]) {
351
- const warnings: SharedV3Warning[] = [];
60
+ }: Parameters<TranscriptionModelV4['doGenerate']>[0]) {
61
+ const warnings: SharedV4Warning[] = [];
352
62
 
353
63
  // Parse provider options
354
64
  const gladiaOptions = await parseProviderOptions({
@@ -487,8 +197,8 @@ export class GladiaTranscriptionModel implements TranscriptionModelV3 {
487
197
  }
488
198
 
489
199
  async doGenerate(
490
- options: Parameters<TranscriptionModelV3['doGenerate']>[0],
491
- ): Promise<Awaited<ReturnType<TranscriptionModelV3['doGenerate']>>> {
200
+ options: Parameters<TranscriptionModelV4['doGenerate']>[0],
201
+ ): Promise<Awaited<ReturnType<TranscriptionModelV4['doGenerate']>>> {
492
202
  const currentDate = this.config._internal?.currentDate?.() ?? new Date();
493
203
 
494
204
  // Create form data with base fields
@@ -510,7 +220,7 @@ export class GladiaTranscriptionModel implements TranscriptionModelV3 {
510
220
  path: '/v2/upload',
511
221
  modelId: 'default',
512
222
  }),
513
- headers: combineHeaders(this.config.headers(), options.headers),
223
+ headers: combineHeaders(this.config.headers?.(), options.headers),
514
224
  formData,
515
225
  failedResponseHandler: gladiaFailedResponseHandler,
516
226
  successfulResponseHandler: createJsonResponseHandler(
@@ -527,7 +237,7 @@ export class GladiaTranscriptionModel implements TranscriptionModelV3 {
527
237
  path: '/v2/pre-recorded',
528
238
  modelId: 'default',
529
239
  }),
530
- headers: combineHeaders(this.config.headers(), options.headers),
240
+ headers: combineHeaders(this.config.headers?.(), options.headers),
531
241
  body: {
532
242
  ...body,
533
243
  audio_url: uploadResponse.audio_url,
@@ -560,7 +270,7 @@ export class GladiaTranscriptionModel implements TranscriptionModelV3 {
560
270
 
561
271
  const response = await getFromApi({
562
272
  url: resultUrl,
563
- headers: combineHeaders(this.config.headers(), options.headers),
273
+ headers: combineHeaders(this.config.headers?.(), options.headers),
564
274
  failedResponseHandler: gladiaFailedResponseHandler,
565
275
  successfulResponseHandler: createJsonResponseHandler(
566
276
  gladiaTranscriptionResultResponseSchema,
package/src/index.ts CHANGED
@@ -1,4 +1,4 @@
1
1
  export { createGladia, gladia } from './gladia-provider';
2
2
  export type { GladiaProvider, GladiaProviderSettings } from './gladia-provider';
3
- export type { GladiaTranscriptionModelOptions } from './gladia-transcription-model';
3
+ export type { GladiaTranscriptionModelOptions } from './gladia-transcription-model-options';
4
4
  export { VERSION } from './version';
package/dist/index.d.mts DELETED
@@ -1,158 +0,0 @@
1
- import { TranscriptionModelV3, ProviderV3 } from '@ai-sdk/provider';
2
- import { FetchFunction } from '@ai-sdk/provider-utils';
3
- import { z } from 'zod/v4';
4
-
5
- type GladiaConfig = {
6
- provider: string;
7
- url: (options: {
8
- modelId: string;
9
- path: string;
10
- }) => string;
11
- headers: () => Record<string, string | undefined>;
12
- fetch?: FetchFunction;
13
- generateId?: () => string;
14
- };
15
-
16
- declare const gladiaTranscriptionModelOptionsSchema: z.ZodObject<{
17
- contextPrompt: z.ZodOptional<z.ZodNullable<z.ZodString>>;
18
- customVocabulary: z.ZodOptional<z.ZodNullable<z.ZodUnion<readonly [z.ZodBoolean, z.ZodArray<z.ZodAny>]>>>;
19
- customVocabularyConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
20
- vocabulary: z.ZodArray<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
21
- value: z.ZodString;
22
- intensity: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
23
- pronunciations: z.ZodOptional<z.ZodNullable<z.ZodArray<z.ZodString>>>;
24
- language: z.ZodOptional<z.ZodNullable<z.ZodString>>;
25
- }, z.core.$strip>]>>;
26
- defaultIntensity: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
27
- }, z.core.$strip>>>;
28
- detectLanguage: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
29
- enableCodeSwitching: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
30
- codeSwitchingConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
31
- languages: z.ZodOptional<z.ZodNullable<z.ZodArray<z.ZodString>>>;
32
- }, z.core.$strip>>>;
33
- language: z.ZodOptional<z.ZodNullable<z.ZodString>>;
34
- callback: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
35
- callbackConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
36
- url: z.ZodString;
37
- method: z.ZodOptional<z.ZodNullable<z.ZodEnum<{
38
- POST: "POST";
39
- PUT: "PUT";
40
- }>>>;
41
- }, z.core.$strip>>>;
42
- subtitles: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
43
- subtitlesConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
44
- formats: z.ZodOptional<z.ZodNullable<z.ZodArray<z.ZodEnum<{
45
- srt: "srt";
46
- vtt: "vtt";
47
- }>>>>;
48
- minimumDuration: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
49
- maximumDuration: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
50
- maximumCharactersPerRow: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
51
- maximumRowsPerCaption: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
52
- style: z.ZodOptional<z.ZodNullable<z.ZodEnum<{
53
- default: "default";
54
- compliance: "compliance";
55
- }>>>;
56
- }, z.core.$strip>>>;
57
- diarization: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
58
- diarizationConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
59
- numberOfSpeakers: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
60
- minSpeakers: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
61
- maxSpeakers: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
62
- enhanced: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
63
- }, z.core.$strip>>>;
64
- translation: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
65
- translationConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
66
- targetLanguages: z.ZodArray<z.ZodString>;
67
- model: z.ZodOptional<z.ZodNullable<z.ZodEnum<{
68
- base: "base";
69
- enhanced: "enhanced";
70
- }>>>;
71
- matchOriginalUtterances: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
72
- }, z.core.$strip>>>;
73
- summarization: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
74
- summarizationConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
75
- type: z.ZodOptional<z.ZodNullable<z.ZodEnum<{
76
- general: "general";
77
- bullet_points: "bullet_points";
78
- concise: "concise";
79
- }>>>;
80
- }, z.core.$strip>>>;
81
- moderation: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
82
- namedEntityRecognition: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
83
- chapterization: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
84
- nameConsistency: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
85
- customSpelling: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
86
- customSpellingConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
87
- spellingDictionary: z.ZodRecord<z.ZodString, z.ZodArray<z.ZodString>>;
88
- }, z.core.$strip>>>;
89
- structuredDataExtraction: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
90
- structuredDataExtractionConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
91
- classes: z.ZodArray<z.ZodString>;
92
- }, z.core.$strip>>>;
93
- sentimentAnalysis: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
94
- audioToLlm: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
95
- audioToLlmConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
96
- prompts: z.ZodArray<z.ZodString>;
97
- }, z.core.$strip>>>;
98
- customMetadata: z.ZodOptional<z.ZodNullable<z.ZodRecord<z.ZodString, z.ZodAny>>>;
99
- sentences: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
100
- displayMode: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
101
- punctuationEnhanced: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
102
- }, z.core.$strip>;
103
- type GladiaTranscriptionModelOptions = z.infer<typeof gladiaTranscriptionModelOptionsSchema>;
104
- interface GladiaTranscriptionModelConfig extends GladiaConfig {
105
- _internal?: {
106
- currentDate?: () => Date;
107
- };
108
- }
109
- declare class GladiaTranscriptionModel implements TranscriptionModelV3 {
110
- readonly modelId: 'default';
111
- private readonly config;
112
- readonly specificationVersion = "v3";
113
- get provider(): string;
114
- constructor(modelId: 'default', config: GladiaTranscriptionModelConfig);
115
- private getArgs;
116
- doGenerate(options: Parameters<TranscriptionModelV3['doGenerate']>[0]): Promise<Awaited<ReturnType<TranscriptionModelV3['doGenerate']>>>;
117
- }
118
-
119
- interface GladiaProvider extends ProviderV3 {
120
- (): {
121
- transcription: GladiaTranscriptionModel;
122
- };
123
- /**
124
- * Creates a model for transcription.
125
- */
126
- transcription(): TranscriptionModelV3;
127
- /**
128
- * @deprecated Use `embeddingModel` instead.
129
- */
130
- textEmbeddingModel(modelId: string): never;
131
- }
132
- interface GladiaProviderSettings {
133
- /**
134
- * API key for authenticating requests.
135
- */
136
- apiKey?: string;
137
- /**
138
- * Custom headers to include in the requests.
139
- */
140
- headers?: Record<string, string>;
141
- /**
142
- * Custom fetch implementation. You can use it as a middleware to intercept requests,
143
- * or to provide a custom fetch implementation for e.g. testing.
144
- */
145
- fetch?: FetchFunction;
146
- }
147
- /**
148
- * Create a Gladia provider instance.
149
- */
150
- declare function createGladia(options?: GladiaProviderSettings): GladiaProvider;
151
- /**
152
- * Default Gladia provider instance.
153
- */
154
- declare const gladia: GladiaProvider;
155
-
156
- declare const VERSION: string;
157
-
158
- export { type GladiaProvider, type GladiaProviderSettings, type GladiaTranscriptionModelOptions, VERSION, createGladia, gladia };