@ai-sdk/gladia 3.0.0-beta.4 → 3.0.0-beta.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  import {
2
2
  AISDKError,
3
- TranscriptionModelV4,
4
- SharedV4Warning,
3
+ type TranscriptionModelV4,
4
+ type SharedV4Warning,
5
5
  } from '@ai-sdk/provider';
6
6
  import {
7
7
  combineHeaders,
@@ -10,322 +10,19 @@ import {
10
10
  mediaTypeToExtension,
11
11
  delay,
12
12
  getFromApi,
13
+ isSameOrigin,
13
14
  parseProviderOptions,
14
15
  postFormDataToApi,
15
16
  postJsonToApi,
17
+ serializeModelOptions,
18
+ WORKFLOW_SERIALIZE,
19
+ WORKFLOW_DESERIALIZE,
16
20
  } from '@ai-sdk/provider-utils';
17
21
  import { z } from 'zod/v4';
18
- import { GladiaConfig } from './gladia-config';
22
+ import type { GladiaConfig } from './gladia-config';
19
23
  import { gladiaFailedResponseHandler } from './gladia-error';
20
- import { GladiaTranscriptionInitiateAPITypes } from './gladia-api-types';
21
-
22
- // https://docs.gladia.io/api-reference/v2/pre-recorded/init
23
- const gladiaTranscriptionModelOptionsSchema = z.object({
24
- /**
25
- * Optional context prompt to guide the transcription.
26
- */
27
- contextPrompt: z.string().nullish(),
28
-
29
- /**
30
- * Custom vocabulary to improve transcription accuracy.
31
- * Can be a boolean or an array of custom terms.
32
- */
33
- customVocabulary: z.union([z.boolean(), z.array(z.any())]).nullish(),
34
-
35
- /**
36
- * Configuration for custom vocabulary.
37
- */
38
- customVocabularyConfig: z
39
- .object({
40
- /**
41
- * Array of vocabulary terms or objects with pronunciation details.
42
- */
43
- vocabulary: z.array(
44
- z.union([
45
- z.string(),
46
- z.object({
47
- /**
48
- * The vocabulary term.
49
- */
50
- value: z.string(),
51
- /**
52
- * Intensity of the term in recognition (optional).
53
- */
54
- intensity: z.number().nullish(),
55
- /**
56
- * Alternative pronunciations for the term (optional).
57
- */
58
- pronunciations: z.array(z.string()).nullish(),
59
- /**
60
- * Language of the term (optional).
61
- */
62
- language: z.string().nullish(),
63
- }),
64
- ]),
65
- ),
66
- /**
67
- * Default intensity for all vocabulary terms.
68
- */
69
- defaultIntensity: z.number().nullish(),
70
- })
71
- .nullish(),
72
-
73
- /**
74
- * Whether to automatically detect the language of the audio.
75
- */
76
- detectLanguage: z.boolean().nullish(),
77
-
78
- /**
79
- * Whether to enable code switching (multiple languages in the same audio).
80
- */
81
- enableCodeSwitching: z.boolean().nullish(),
82
-
83
- /**
84
- * Configuration for code switching.
85
- */
86
- codeSwitchingConfig: z
87
- .object({
88
- /**
89
- * Languages to consider for code switching.
90
- */
91
- languages: z.array(z.string()).nullish(),
92
- })
93
- .nullish(),
94
-
95
- /**
96
- * Specific language for transcription.
97
- */
98
- language: z.string().nullish(),
99
-
100
- /**
101
- * Whether to enable callback when transcription is complete.
102
- */
103
- callback: z.boolean().nullish(),
104
-
105
- /**
106
- * Configuration for callback.
107
- */
108
- callbackConfig: z
109
- .object({
110
- /**
111
- * URL to send the callback to.
112
- */
113
- url: z.string(),
114
- /**
115
- * HTTP method for the callback.
116
- */
117
- method: z.enum(['POST', 'PUT']).nullish(),
118
- })
119
- .nullish(),
120
-
121
- /**
122
- * Whether to generate subtitles.
123
- */
124
- subtitles: z.boolean().nullish(),
125
-
126
- /**
127
- * Configuration for subtitles generation.
128
- */
129
- subtitlesConfig: z
130
- .object({
131
- /**
132
- * Subtitle file formats to generate.
133
- */
134
- formats: z.array(z.enum(['srt', 'vtt'])).nullish(),
135
- /**
136
- * Minimum duration for subtitle segments.
137
- */
138
- minimumDuration: z.number().nullish(),
139
- /**
140
- * Maximum duration for subtitle segments.
141
- */
142
- maximumDuration: z.number().nullish(),
143
- /**
144
- * Maximum characters per row in subtitles.
145
- */
146
- maximumCharactersPerRow: z.number().nullish(),
147
- /**
148
- * Maximum rows per caption in subtitles.
149
- */
150
- maximumRowsPerCaption: z.number().nullish(),
151
- /**
152
- * Style of subtitles.
153
- */
154
- style: z.enum(['default', 'compliance']).nullish(),
155
- })
156
- .nullish(),
157
-
158
- /**
159
- * Whether to enable speaker diarization (speaker identification).
160
- */
161
- diarization: z.boolean().nullish(),
162
-
163
- /**
164
- * Configuration for diarization.
165
- */
166
- diarizationConfig: z
167
- .object({
168
- /**
169
- * Exact number of speakers to identify.
170
- */
171
- numberOfSpeakers: z.number().nullish(),
172
- /**
173
- * Minimum number of speakers to identify.
174
- */
175
- minSpeakers: z.number().nullish(),
176
- /**
177
- * Maximum number of speakers to identify.
178
- */
179
- maxSpeakers: z.number().nullish(),
180
- /**
181
- * Whether to use enhanced diarization.
182
- */
183
- enhanced: z.boolean().nullish(),
184
- })
185
- .nullish(),
186
-
187
- /**
188
- * Whether to translate the transcription.
189
- */
190
- translation: z.boolean().nullish(),
191
-
192
- /**
193
- * Configuration for translation.
194
- */
195
- translationConfig: z
196
- .object({
197
- /**
198
- * Target languages for translation.
199
- */
200
- targetLanguages: z.array(z.string()),
201
- /**
202
- * Translation model to use.
203
- */
204
- model: z.enum(['base', 'enhanced']).nullish(),
205
- /**
206
- * Whether to match original utterances in translation.
207
- */
208
- matchOriginalUtterances: z.boolean().nullish(),
209
- })
210
- .nullish(),
211
-
212
- /**
213
- * Whether to generate a summary of the transcription.
214
- */
215
- summarization: z.boolean().nullish(),
216
-
217
- /**
218
- * Configuration for summarization.
219
- */
220
- summarizationConfig: z
221
- .object({
222
- /**
223
- * Type of summary to generate.
224
- */
225
- type: z.enum(['general', 'bullet_points', 'concise']).nullish(),
226
- })
227
- .nullish(),
228
-
229
- /**
230
- * Whether to enable content moderation.
231
- */
232
- moderation: z.boolean().nullish(),
233
-
234
- /**
235
- * Whether to enable named entity recognition.
236
- */
237
- namedEntityRecognition: z.boolean().nullish(),
238
-
239
- /**
240
- * Whether to enable automatic chapter creation.
241
- */
242
- chapterization: z.boolean().nullish(),
243
-
244
- /**
245
- * Whether to ensure consistent naming of entities.
246
- */
247
- nameConsistency: z.boolean().nullish(),
248
-
249
- /**
250
- * Whether to enable custom spelling.
251
- */
252
- customSpelling: z.boolean().nullish(),
253
-
254
- /**
255
- * Configuration for custom spelling.
256
- */
257
- customSpellingConfig: z
258
- .object({
259
- /**
260
- * Dictionary of custom spellings.
261
- */
262
- spellingDictionary: z.record(z.string(), z.array(z.string())),
263
- })
264
- .nullish(),
265
-
266
- /**
267
- * Whether to extract structured data from the transcription.
268
- */
269
- structuredDataExtraction: z.boolean().nullish(),
270
-
271
- /**
272
- * Configuration for structured data extraction.
273
- */
274
- structuredDataExtractionConfig: z
275
- .object({
276
- /**
277
- * Classes of data to extract.
278
- */
279
- classes: z.array(z.string()),
280
- })
281
- .nullish(),
282
-
283
- /**
284
- * Whether to perform sentiment analysis on the transcription.
285
- */
286
- sentimentAnalysis: z.boolean().nullish(),
287
-
288
- /**
289
- * Whether to send audio to a language model for processing.
290
- */
291
- audioToLlm: z.boolean().nullish(),
292
-
293
- /**
294
- * Configuration for audio to language model processing.
295
- */
296
- audioToLlmConfig: z
297
- .object({
298
- /**
299
- * Prompts to send to the language model.
300
- */
301
- prompts: z.array(z.string()),
302
- })
303
- .nullish(),
304
-
305
- /**
306
- * Custom metadata to include with the transcription.
307
- */
308
- customMetadata: z.record(z.string(), z.any()).nullish(),
309
-
310
- /**
311
- * Whether to include sentence-level segmentation.
312
- */
313
- sentences: z.boolean().nullish(),
314
-
315
- /**
316
- * Whether to enable display mode.
317
- */
318
- displayMode: z.boolean().nullish(),
319
-
320
- /**
321
- * Whether to enhance punctuation in the transcription.
322
- */
323
- punctuationEnhanced: z.boolean().nullish(),
324
- });
325
-
326
- export type GladiaTranscriptionModelOptions = z.infer<
327
- typeof gladiaTranscriptionModelOptionsSchema
328
- >;
24
+ import { gladiaTranscriptionModelOptionsSchema } from './gladia-transcription-model-options';
25
+ import type { GladiaTranscriptionInitiateAPITypes } from './gladia-api-types';
329
26
 
330
27
  interface GladiaTranscriptionModelConfig extends GladiaConfig {
331
28
  _internal?: {
@@ -340,8 +37,22 @@ export class GladiaTranscriptionModel implements TranscriptionModelV4 {
340
37
  return this.config.provider;
341
38
  }
342
39
 
40
+ static [WORKFLOW_SERIALIZE](model: GladiaTranscriptionModel) {
41
+ return serializeModelOptions({
42
+ modelId: model.modelId,
43
+ config: model.config,
44
+ });
45
+ }
46
+
47
+ static [WORKFLOW_DESERIALIZE](options: {
48
+ modelId: 'default';
49
+ config: GladiaTranscriptionModelConfig;
50
+ }) {
51
+ return new GladiaTranscriptionModel(options.modelId, options.config);
52
+ }
53
+
343
54
  constructor(
344
- readonly modelId: 'default',
55
+ readonly modelId: string,
345
56
  private readonly config: GladiaTranscriptionModelConfig,
346
57
  ) {}
347
58
 
@@ -510,7 +221,7 @@ export class GladiaTranscriptionModel implements TranscriptionModelV4 {
510
221
  path: '/v2/upload',
511
222
  modelId: 'default',
512
223
  }),
513
- headers: combineHeaders(this.config.headers(), options.headers),
224
+ headers: combineHeaders(this.config.headers?.(), options.headers),
514
225
  formData,
515
226
  failedResponseHandler: gladiaFailedResponseHandler,
516
227
  successfulResponseHandler: createJsonResponseHandler(
@@ -527,7 +238,7 @@ export class GladiaTranscriptionModel implements TranscriptionModelV4 {
527
238
  path: '/v2/pre-recorded',
528
239
  modelId: 'default',
529
240
  }),
530
- headers: combineHeaders(this.config.headers(), options.headers),
241
+ headers: combineHeaders(this.config.headers?.(), options.headers),
531
242
  body: {
532
243
  ...body,
533
244
  audio_url: uploadResponse.audio_url,
@@ -540,8 +251,11 @@ export class GladiaTranscriptionModel implements TranscriptionModelV4 {
540
251
  fetch: this.config.fetch,
541
252
  });
542
253
 
543
- // Poll the result URL until the transcription is done or an error occurs
254
+ // Poll the result URL until the transcription is done or an error occurs.
255
+ // The result URL comes from the provider response; only send credentials
256
+ // when it stays on the provider's own origin.
544
257
  const resultUrl = transcriptionInitResponse.result_url;
258
+ const apiOrigin = this.config.url({ modelId: 'default', path: '' });
545
259
  let transcriptionResult;
546
260
  let transcriptionResultHeaders;
547
261
  const timeoutMs = 60 * 1000; // 60 seconds timeout
@@ -560,7 +274,9 @@ export class GladiaTranscriptionModel implements TranscriptionModelV4 {
560
274
 
561
275
  const response = await getFromApi({
562
276
  url: resultUrl,
563
- headers: combineHeaders(this.config.headers(), options.headers),
277
+ headers: isSameOrigin(resultUrl, apiOrigin)
278
+ ? combineHeaders(this.config.headers?.(), options.headers)
279
+ : undefined,
564
280
  failedResponseHandler: gladiaFailedResponseHandler,
565
281
  successfulResponseHandler: createJsonResponseHandler(
566
282
  gladiaTranscriptionResultResponseSchema,
package/src/index.ts CHANGED
@@ -1,4 +1,4 @@
1
1
  export { createGladia, gladia } from './gladia-provider';
2
2
  export type { GladiaProvider, GladiaProviderSettings } from './gladia-provider';
3
- export type { GladiaTranscriptionModelOptions } from './gladia-transcription-model';
3
+ export type { GladiaTranscriptionModelOptions } from './gladia-transcription-model-options';
4
4
  export { VERSION } from './version';
package/dist/index.d.mts DELETED
@@ -1,158 +0,0 @@
1
- import { TranscriptionModelV4, ProviderV4 } from '@ai-sdk/provider';
2
- import { FetchFunction } from '@ai-sdk/provider-utils';
3
- import { z } from 'zod/v4';
4
-
5
- type GladiaConfig = {
6
- provider: string;
7
- url: (options: {
8
- modelId: string;
9
- path: string;
10
- }) => string;
11
- headers: () => Record<string, string | undefined>;
12
- fetch?: FetchFunction;
13
- generateId?: () => string;
14
- };
15
-
16
- declare const gladiaTranscriptionModelOptionsSchema: z.ZodObject<{
17
- contextPrompt: z.ZodOptional<z.ZodNullable<z.ZodString>>;
18
- customVocabulary: z.ZodOptional<z.ZodNullable<z.ZodUnion<readonly [z.ZodBoolean, z.ZodArray<z.ZodAny>]>>>;
19
- customVocabularyConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
20
- vocabulary: z.ZodArray<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
21
- value: z.ZodString;
22
- intensity: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
23
- pronunciations: z.ZodOptional<z.ZodNullable<z.ZodArray<z.ZodString>>>;
24
- language: z.ZodOptional<z.ZodNullable<z.ZodString>>;
25
- }, z.core.$strip>]>>;
26
- defaultIntensity: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
27
- }, z.core.$strip>>>;
28
- detectLanguage: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
29
- enableCodeSwitching: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
30
- codeSwitchingConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
31
- languages: z.ZodOptional<z.ZodNullable<z.ZodArray<z.ZodString>>>;
32
- }, z.core.$strip>>>;
33
- language: z.ZodOptional<z.ZodNullable<z.ZodString>>;
34
- callback: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
35
- callbackConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
36
- url: z.ZodString;
37
- method: z.ZodOptional<z.ZodNullable<z.ZodEnum<{
38
- POST: "POST";
39
- PUT: "PUT";
40
- }>>>;
41
- }, z.core.$strip>>>;
42
- subtitles: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
43
- subtitlesConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
44
- formats: z.ZodOptional<z.ZodNullable<z.ZodArray<z.ZodEnum<{
45
- srt: "srt";
46
- vtt: "vtt";
47
- }>>>>;
48
- minimumDuration: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
49
- maximumDuration: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
50
- maximumCharactersPerRow: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
51
- maximumRowsPerCaption: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
52
- style: z.ZodOptional<z.ZodNullable<z.ZodEnum<{
53
- default: "default";
54
- compliance: "compliance";
55
- }>>>;
56
- }, z.core.$strip>>>;
57
- diarization: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
58
- diarizationConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
59
- numberOfSpeakers: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
60
- minSpeakers: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
61
- maxSpeakers: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
62
- enhanced: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
63
- }, z.core.$strip>>>;
64
- translation: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
65
- translationConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
66
- targetLanguages: z.ZodArray<z.ZodString>;
67
- model: z.ZodOptional<z.ZodNullable<z.ZodEnum<{
68
- base: "base";
69
- enhanced: "enhanced";
70
- }>>>;
71
- matchOriginalUtterances: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
72
- }, z.core.$strip>>>;
73
- summarization: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
74
- summarizationConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
75
- type: z.ZodOptional<z.ZodNullable<z.ZodEnum<{
76
- general: "general";
77
- bullet_points: "bullet_points";
78
- concise: "concise";
79
- }>>>;
80
- }, z.core.$strip>>>;
81
- moderation: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
82
- namedEntityRecognition: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
83
- chapterization: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
84
- nameConsistency: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
85
- customSpelling: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
86
- customSpellingConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
87
- spellingDictionary: z.ZodRecord<z.ZodString, z.ZodArray<z.ZodString>>;
88
- }, z.core.$strip>>>;
89
- structuredDataExtraction: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
90
- structuredDataExtractionConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
91
- classes: z.ZodArray<z.ZodString>;
92
- }, z.core.$strip>>>;
93
- sentimentAnalysis: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
94
- audioToLlm: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
95
- audioToLlmConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
96
- prompts: z.ZodArray<z.ZodString>;
97
- }, z.core.$strip>>>;
98
- customMetadata: z.ZodOptional<z.ZodNullable<z.ZodRecord<z.ZodString, z.ZodAny>>>;
99
- sentences: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
100
- displayMode: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
101
- punctuationEnhanced: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
102
- }, z.core.$strip>;
103
- type GladiaTranscriptionModelOptions = z.infer<typeof gladiaTranscriptionModelOptionsSchema>;
104
- interface GladiaTranscriptionModelConfig extends GladiaConfig {
105
- _internal?: {
106
- currentDate?: () => Date;
107
- };
108
- }
109
- declare class GladiaTranscriptionModel implements TranscriptionModelV4 {
110
- readonly modelId: 'default';
111
- private readonly config;
112
- readonly specificationVersion = "v4";
113
- get provider(): string;
114
- constructor(modelId: 'default', config: GladiaTranscriptionModelConfig);
115
- private getArgs;
116
- doGenerate(options: Parameters<TranscriptionModelV4['doGenerate']>[0]): Promise<Awaited<ReturnType<TranscriptionModelV4['doGenerate']>>>;
117
- }
118
-
119
- interface GladiaProvider extends ProviderV4 {
120
- (): {
121
- transcription: GladiaTranscriptionModel;
122
- };
123
- /**
124
- * Creates a model for transcription.
125
- */
126
- transcription(): TranscriptionModelV4;
127
- /**
128
- * @deprecated Use `embeddingModel` instead.
129
- */
130
- textEmbeddingModel(modelId: string): never;
131
- }
132
- interface GladiaProviderSettings {
133
- /**
134
- * API key for authenticating requests.
135
- */
136
- apiKey?: string;
137
- /**
138
- * Custom headers to include in the requests.
139
- */
140
- headers?: Record<string, string>;
141
- /**
142
- * Custom fetch implementation. You can use it as a middleware to intercept requests,
143
- * or to provide a custom fetch implementation for e.g. testing.
144
- */
145
- fetch?: FetchFunction;
146
- }
147
- /**
148
- * Create a Gladia provider instance.
149
- */
150
- declare function createGladia(options?: GladiaProviderSettings): GladiaProvider;
151
- /**
152
- * Default Gladia provider instance.
153
- */
154
- declare const gladia: GladiaProvider;
155
-
156
- declare const VERSION: string;
157
-
158
- export { type GladiaProvider, type GladiaProviderSettings, type GladiaTranscriptionModelOptions, VERSION, createGladia, gladia };