npm - @ai-sdk/gladia - Versions diffs - 3.0.0-beta.3 → 3.0.0-beta.31 - Mend

@ai-sdk/gladia 3.0.0-beta.3 → 3.0.0-beta.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/CHANGELOG.md +242 -4
package/README.md +2 -0
package/dist/index.d.ts +65 -55
package/dist/index.js +152 -149
package/dist/index.js.map +1 -1
package/package.json +12 -12
package/src/gladia-config.ts +2 -2
package/src/gladia-provider.ts +7 -7
package/src/gladia-transcription-model-options.ts +309 -0
package/src/gladia-transcription-model.ts +32 -322
package/src/index.ts +1 -1
package/dist/index.d.mts +0 -158
package/dist/index.mjs +0 -604
package/dist/index.mjs.map +0 -1

package/src/gladia-transcription-model.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import {
   AISDKError,
-  TranscriptionModelV3,
-  SharedV3Warning,
+  type TranscriptionModelV4,
+  type SharedV4Warning,
 } from '@ai-sdk/provider';
 import {
   combineHeaders,
@@ -13,319 +13,15 @@ import {
   parseProviderOptions,
   postFormDataToApi,
   postJsonToApi,
+  serializeModelOptions,
+  WORKFLOW_SERIALIZE,
+  WORKFLOW_DESERIALIZE,
 } from '@ai-sdk/provider-utils';
 import { z } from 'zod/v4';
-import { GladiaConfig } from './gladia-config';
+import type { GladiaConfig } from './gladia-config';
 import { gladiaFailedResponseHandler } from './gladia-error';
-import { GladiaTranscriptionInitiateAPITypes } from './gladia-api-types';
-// https://docs.gladia.io/api-reference/v2/pre-recorded/init
-const gladiaTranscriptionModelOptionsSchema = z.object({
-  /**
-   * Optional context prompt to guide the transcription.
-   */
-  contextPrompt: z.string().nullish(),
-  /**
-   * Custom vocabulary to improve transcription accuracy.
-   * Can be a boolean or an array of custom terms.
-   */
-  customVocabulary: z.union([z.boolean(), z.array(z.any())]).nullish(),
-  /**
-   * Configuration for custom vocabulary.
-   */
-  customVocabularyConfig: z
-    .object({
-      /**
-       * Array of vocabulary terms or objects with pronunciation details.
-       */
-      vocabulary: z.array(
-        z.union([
-          z.string(),
-          z.object({
-            /**
-             * The vocabulary term.
-             */
-            value: z.string(),
-            /**
-             * Intensity of the term in recognition (optional).
-             */
-            intensity: z.number().nullish(),
-            /**
-             * Alternative pronunciations for the term (optional).
-             */
-            pronunciations: z.array(z.string()).nullish(),
-            /**
-             * Language of the term (optional).
-             */
-            language: z.string().nullish(),
-          }),
-        ]),
-      ),
-      /**
-       * Default intensity for all vocabulary terms.
-       */
-      defaultIntensity: z.number().nullish(),
-    })
-    .nullish(),
-  /**
-   * Whether to automatically detect the language of the audio.
-   */
-  detectLanguage: z.boolean().nullish(),
-  /**
-   * Whether to enable code switching (multiple languages in the same audio).
-   */
-  enableCodeSwitching: z.boolean().nullish(),
-  /**
-   * Configuration for code switching.
-   */
-  codeSwitchingConfig: z
-    .object({
-      /**
-       * Languages to consider for code switching.
-       */
-      languages: z.array(z.string()).nullish(),
-    })
-    .nullish(),
-  /**
-   * Specific language for transcription.
-   */
-  language: z.string().nullish(),
-  /**
-   * Whether to enable callback when transcription is complete.
-   */
-  callback: z.boolean().nullish(),
-  /**
-   * Configuration for callback.
-   */
-  callbackConfig: z
-    .object({
-      /**
-       * URL to send the callback to.
-       */
-      url: z.string(),
-      /**
-       * HTTP method for the callback.
-       */
-      method: z.enum(['POST', 'PUT']).nullish(),
-    })
-    .nullish(),
-  /**
-   * Whether to generate subtitles.
-   */
-  subtitles: z.boolean().nullish(),
-  /**
-   * Configuration for subtitles generation.
-   */
-  subtitlesConfig: z
-    .object({
-      /**
-       * Subtitle file formats to generate.
-       */
-      formats: z.array(z.enum(['srt', 'vtt'])).nullish(),
-      /**
-       * Minimum duration for subtitle segments.
-       */
-      minimumDuration: z.number().nullish(),
-      /**
-       * Maximum duration for subtitle segments.
-       */
-      maximumDuration: z.number().nullish(),
-      /**
-       * Maximum characters per row in subtitles.
-       */
-      maximumCharactersPerRow: z.number().nullish(),
-      /**
-       * Maximum rows per caption in subtitles.
-       */
-      maximumRowsPerCaption: z.number().nullish(),
-      /**
-       * Style of subtitles.
-       */
-      style: z.enum(['default', 'compliance']).nullish(),
-    })
-    .nullish(),
-  /**
-   * Whether to enable speaker diarization (speaker identification).
-   */
-  diarization: z.boolean().nullish(),
-  /**
-   * Configuration for diarization.
-   */
-  diarizationConfig: z
-    .object({
-      /**
-       * Exact number of speakers to identify.
-       */
-      numberOfSpeakers: z.number().nullish(),
-      /**
-       * Minimum number of speakers to identify.
-       */
-      minSpeakers: z.number().nullish(),
-      /**
-       * Maximum number of speakers to identify.
-       */
-      maxSpeakers: z.number().nullish(),
-      /**
-       * Whether to use enhanced diarization.
-       */
-      enhanced: z.boolean().nullish(),
-    })
-    .nullish(),
-  /**
-   * Whether to translate the transcription.
-   */
-  translation: z.boolean().nullish(),
-  /**
-   * Configuration for translation.
-   */
-  translationConfig: z
-    .object({
-      /**
-       * Target languages for translation.
-       */
-      targetLanguages: z.array(z.string()),
-      /**
-       * Translation model to use.
-       */
-      model: z.enum(['base', 'enhanced']).nullish(),
-      /**
-       * Whether to match original utterances in translation.
-       */
-      matchOriginalUtterances: z.boolean().nullish(),
-    })
-    .nullish(),
-  /**
-   * Whether to generate a summary of the transcription.
-   */
-  summarization: z.boolean().nullish(),
-  /**
-   * Configuration for summarization.
-   */
-  summarizationConfig: z
-    .object({
-      /**
-       * Type of summary to generate.
-       */
-      type: z.enum(['general', 'bullet_points', 'concise']).nullish(),
-    })
-    .nullish(),
-  /**
-   * Whether to enable content moderation.
-   */
-  moderation: z.boolean().nullish(),
-  /**
-   * Whether to enable named entity recognition.
-   */
-  namedEntityRecognition: z.boolean().nullish(),
-  /**
-   * Whether to enable automatic chapter creation.
-   */
-  chapterization: z.boolean().nullish(),
-  /**
-   * Whether to ensure consistent naming of entities.
-   */
-  nameConsistency: z.boolean().nullish(),
-  /**
-   * Whether to enable custom spelling.
-   */
-  customSpelling: z.boolean().nullish(),
-  /**
-   * Configuration for custom spelling.
-   */
-  customSpellingConfig: z
-    .object({
-      /**
-       * Dictionary of custom spellings.
-       */
-      spellingDictionary: z.record(z.string(), z.array(z.string())),
-    })
-    .nullish(),
-  /**
-   * Whether to extract structured data from the transcription.
-   */
-  structuredDataExtraction: z.boolean().nullish(),
-  /**
-   * Configuration for structured data extraction.
-   */
-  structuredDataExtractionConfig: z
-    .object({
-      /**
-       * Classes of data to extract.
-       */
-      classes: z.array(z.string()),
-    })
-    .nullish(),
-  /**
-   * Whether to perform sentiment analysis on the transcription.
-   */
-  sentimentAnalysis: z.boolean().nullish(),
-  /**
-   * Whether to send audio to a language model for processing.
-   */
-  audioToLlm: z.boolean().nullish(),
-  /**
-   * Configuration for audio to language model processing.
-   */
-  audioToLlmConfig: z
-    .object({
-      /**
-       * Prompts to send to the language model.
-       */
-      prompts: z.array(z.string()),
-    })
-    .nullish(),
-  /**
-   * Custom metadata to include with the transcription.
-   */
-  customMetadata: z.record(z.string(), z.any()).nullish(),
-  /**
-   * Whether to include sentence-level segmentation.
-   */
-  sentences: z.boolean().nullish(),
-  /**
-   * Whether to enable display mode.
-   */
-  displayMode: z.boolean().nullish(),
-  /**
-   * Whether to enhance punctuation in the transcription.
-   */
-  punctuationEnhanced: z.boolean().nullish(),
-});
-export type GladiaTranscriptionModelOptions = z.infer<
-  typeof gladiaTranscriptionModelOptionsSchema
->;
+import { gladiaTranscriptionModelOptionsSchema } from './gladia-transcription-model-options';
+import type { GladiaTranscriptionInitiateAPITypes } from './gladia-api-types';
 interface GladiaTranscriptionModelConfig extends GladiaConfig {
   _internal?: {
@@ -333,22 +29,36 @@ interface GladiaTranscriptionModelConfig extends GladiaConfig {
   };
 }
-export class GladiaTranscriptionModel implements TranscriptionModelV3 {
-  readonly specificationVersion = 'v3';
+export class GladiaTranscriptionModel implements TranscriptionModelV4 {
+  readonly specificationVersion = 'v4';
   get provider(): string {
     return this.config.provider;
   }
+  static [WORKFLOW_SERIALIZE](model: GladiaTranscriptionModel) {
+    return serializeModelOptions({
+      modelId: model.modelId,
+      config: model.config,
+    });
+  }
+  static [WORKFLOW_DESERIALIZE](options: {
+    modelId: 'default';
+    config: GladiaTranscriptionModelConfig;
+  }) {
+    return new GladiaTranscriptionModel(options.modelId, options.config);
+  }
   constructor(
-    readonly modelId: 'default',
+    readonly modelId: string,
     private readonly config: GladiaTranscriptionModelConfig,
   ) {}
   private async getArgs({
     providerOptions,
-  }: Parameters<TranscriptionModelV3['doGenerate']>[0]) {
-    const warnings: SharedV3Warning[] = [];
+  }: Parameters<TranscriptionModelV4['doGenerate']>[0]) {
+    const warnings: SharedV4Warning[] = [];
     // Parse provider options
     const gladiaOptions = await parseProviderOptions({
@@ -487,8 +197,8 @@ export class GladiaTranscriptionModel implements TranscriptionModelV3 {
   }
   async doGenerate(
-    options: Parameters<TranscriptionModelV3['doGenerate']>[0],
-  ): Promise<Awaited<ReturnType<TranscriptionModelV3['doGenerate']>>> {
+    options: Parameters<TranscriptionModelV4['doGenerate']>[0],
+  ): Promise<Awaited<ReturnType<TranscriptionModelV4['doGenerate']>>> {
     const currentDate = this.config._internal?.currentDate?.() ?? new Date();
     // Create form data with base fields
@@ -510,7 +220,7 @@ export class GladiaTranscriptionModel implements TranscriptionModelV3 {
         path: '/v2/upload',
         modelId: 'default',
       }),
-      headers: combineHeaders(this.config.headers(), options.headers),
+      headers: combineHeaders(this.config.headers?.(), options.headers),
       formData,
       failedResponseHandler: gladiaFailedResponseHandler,
       successfulResponseHandler: createJsonResponseHandler(
@@ -527,7 +237,7 @@ export class GladiaTranscriptionModel implements TranscriptionModelV3 {
         path: '/v2/pre-recorded',
         modelId: 'default',
       }),
-      headers: combineHeaders(this.config.headers(), options.headers),
+      headers: combineHeaders(this.config.headers?.(), options.headers),
       body: {
         ...body,
         audio_url: uploadResponse.audio_url,
@@ -560,7 +270,7 @@ export class GladiaTranscriptionModel implements TranscriptionModelV3 {
       const response = await getFromApi({
         url: resultUrl,
-        headers: combineHeaders(this.config.headers(), options.headers),
+        headers: combineHeaders(this.config.headers?.(), options.headers),
         failedResponseHandler: gladiaFailedResponseHandler,
         successfulResponseHandler: createJsonResponseHandler(
           gladiaTranscriptionResultResponseSchema,

package/src/index.ts CHANGED Viewed

@@ -1,4 +1,4 @@
 export { createGladia, gladia } from './gladia-provider';
 export type { GladiaProvider, GladiaProviderSettings } from './gladia-provider';
-export type { GladiaTranscriptionModelOptions } from './gladia-transcription-model';
+export type { GladiaTranscriptionModelOptions } from './gladia-transcription-model-options';
 export { VERSION } from './version';

package/dist/index.d.mts DELETED Viewed

@@ -1,158 +0,0 @@
-import { TranscriptionModelV3, ProviderV3 } from '@ai-sdk/provider';
-import { FetchFunction } from '@ai-sdk/provider-utils';
-import { z } from 'zod/v4';
-type GladiaConfig = {
-    provider: string;
-    url: (options: {
-        modelId: string;
-        path: string;
-    }) => string;
-    headers: () => Record<string, string | undefined>;
-    fetch?: FetchFunction;
-    generateId?: () => string;
-};
-declare const gladiaTranscriptionModelOptionsSchema: z.ZodObject<{
-    contextPrompt: z.ZodOptional<z.ZodNullable<z.ZodString>>;
-    customVocabulary: z.ZodOptional<z.ZodNullable<z.ZodUnion<readonly [z.ZodBoolean, z.ZodArray<z.ZodAny>]>>>;
-    customVocabularyConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
-        vocabulary: z.ZodArray<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
-            value: z.ZodString;
-            intensity: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
-            pronunciations: z.ZodOptional<z.ZodNullable<z.ZodArray<z.ZodString>>>;
-            language: z.ZodOptional<z.ZodNullable<z.ZodString>>;
-        }, z.core.$strip>]>>;
-        defaultIntensity: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
-    }, z.core.$strip>>>;
-    detectLanguage: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
-    enableCodeSwitching: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
-    codeSwitchingConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
-        languages: z.ZodOptional<z.ZodNullable<z.ZodArray<z.ZodString>>>;
-    }, z.core.$strip>>>;
-    language: z.ZodOptional<z.ZodNullable<z.ZodString>>;
-    callback: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
-    callbackConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
-        url: z.ZodString;
-        method: z.ZodOptional<z.ZodNullable<z.ZodEnum<{
-            POST: "POST";
-            PUT: "PUT";
-        }>>>;
-    }, z.core.$strip>>>;
-    subtitles: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
-    subtitlesConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
-        formats: z.ZodOptional<z.ZodNullable<z.ZodArray<z.ZodEnum<{
-            srt: "srt";
-            vtt: "vtt";
-        }>>>>;
-        minimumDuration: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
-        maximumDuration: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
-        maximumCharactersPerRow: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
-        maximumRowsPerCaption: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
-        style: z.ZodOptional<z.ZodNullable<z.ZodEnum<{
-            default: "default";
-            compliance: "compliance";
-        }>>>;
-    }, z.core.$strip>>>;
-    diarization: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
-    diarizationConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
-        numberOfSpeakers: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
-        minSpeakers: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
-        maxSpeakers: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
-        enhanced: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
-    }, z.core.$strip>>>;
-    translation: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
-    translationConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
-        targetLanguages: z.ZodArray<z.ZodString>;
-        model: z.ZodOptional<z.ZodNullable<z.ZodEnum<{
-            base: "base";
-            enhanced: "enhanced";
-        }>>>;
-        matchOriginalUtterances: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
-    }, z.core.$strip>>>;
-    summarization: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
-    summarizationConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
-        type: z.ZodOptional<z.ZodNullable<z.ZodEnum<{
-            general: "general";
-            bullet_points: "bullet_points";
-            concise: "concise";
-        }>>>;
-    }, z.core.$strip>>>;
-    moderation: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
-    namedEntityRecognition: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
-    chapterization: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
-    nameConsistency: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
-    customSpelling: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
-    customSpellingConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
-        spellingDictionary: z.ZodRecord<z.ZodString, z.ZodArray<z.ZodString>>;
-    }, z.core.$strip>>>;
-    structuredDataExtraction: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
-    structuredDataExtractionConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
-        classes: z.ZodArray<z.ZodString>;
-    }, z.core.$strip>>>;
-    sentimentAnalysis: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
-    audioToLlm: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
-    audioToLlmConfig: z.ZodOptional<z.ZodNullable<z.ZodObject<{
-        prompts: z.ZodArray<z.ZodString>;
-    }, z.core.$strip>>>;
-    customMetadata: z.ZodOptional<z.ZodNullable<z.ZodRecord<z.ZodString, z.ZodAny>>>;
-    sentences: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
-    displayMode: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
-    punctuationEnhanced: z.ZodOptional<z.ZodNullable<z.ZodBoolean>>;
-}, z.core.$strip>;
-type GladiaTranscriptionModelOptions = z.infer<typeof gladiaTranscriptionModelOptionsSchema>;
-interface GladiaTranscriptionModelConfig extends GladiaConfig {
-    _internal?: {
-        currentDate?: () => Date;
-    };
-}
-declare class GladiaTranscriptionModel implements TranscriptionModelV3 {
-    readonly modelId: 'default';
-    private readonly config;
-    readonly specificationVersion = "v3";
-    get provider(): string;
-    constructor(modelId: 'default', config: GladiaTranscriptionModelConfig);
-    private getArgs;
-    doGenerate(options: Parameters<TranscriptionModelV3['doGenerate']>[0]): Promise<Awaited<ReturnType<TranscriptionModelV3['doGenerate']>>>;
-}
-interface GladiaProvider extends ProviderV3 {
-    (): {
-        transcription: GladiaTranscriptionModel;
-    };
-    /**
-     * Creates a model for transcription.
-     */
-    transcription(): TranscriptionModelV3;
-    /**
-     * @deprecated Use `embeddingModel` instead.
-     */
-    textEmbeddingModel(modelId: string): never;
-}
-interface GladiaProviderSettings {
-    /**
-     * API key for authenticating requests.
-     */
-    apiKey?: string;
-    /**
-     * Custom headers to include in the requests.
-     */
-    headers?: Record<string, string>;
-    /**
-     * Custom fetch implementation. You can use it as a middleware to intercept requests,
-     * or to provide a custom fetch implementation for e.g. testing.
-     */
-    fetch?: FetchFunction;
-}
-/**
- * Create a Gladia provider instance.
- */
-declare function createGladia(options?: GladiaProviderSettings): GladiaProvider;
-/**
- * Default Gladia provider instance.
- */
-declare const gladia: GladiaProvider;
-declare const VERSION: string;
-export { type GladiaProvider, type GladiaProviderSettings, type GladiaTranscriptionModelOptions, VERSION, createGladia, gladia };