@wovin/tranz 0.0.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,47 @@
1
+ /**
2
+ * Utilities for merging transcription results from split audio segments
3
+ */
4
+ import type { TranscriptionResult } from '../transcription/providers.js';
5
+ import type { AudioSegment } from './split.js';
6
+ /**
7
+ * Word-level data with timing information
8
+ */
9
+ export interface WordData {
10
+ word: string;
11
+ start: number;
12
+ end: number;
13
+ confidence?: number;
14
+ speaker?: string;
15
+ }
16
+ /**
17
+ * Merged transcription result with segment information
18
+ */
19
+ export interface MergedTranscriptionResult extends TranscriptionResult {
20
+ /** Segment metadata for reference */
21
+ segments?: {
22
+ index: number;
23
+ startSec: number;
24
+ endSec: number;
25
+ text: string;
26
+ }[];
27
+ /** Total segments that were merged */
28
+ totalSegments?: number;
29
+ }
30
+ /**
31
+ * Merge multiple transcription results from audio segments into one
32
+ * Adjusts word timestamps to be relative to the original audio
33
+ *
34
+ * @param results - Array of transcription results from each segment
35
+ * @param segments - Array of audio segment metadata
36
+ * @returns Merged transcription result
37
+ */
38
+ export declare function mergeTranscriptionResults(results: TranscriptionResult[], segments: AudioSegment[]): MergedTranscriptionResult;
39
+ /**
40
+ * Format merged results with optional segment markers in the text
41
+ *
42
+ * @param result - Merged transcription result
43
+ * @param includeMarkers - Whether to include [Segment N] markers
44
+ * @returns Formatted text
45
+ */
46
+ export declare function formatMergedText(result: MergedTranscriptionResult, includeMarkers?: boolean): string;
47
+ //# sourceMappingURL=merge-results.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"merge-results.d.ts","sourceRoot":"","sources":["../../../src/utils/audio/merge-results.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,+BAA+B,CAAA;AACxE,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAE9C;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,MAAM,CAAA;IACb,GAAG,EAAE,MAAM,CAAA;IACX,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,OAAO,CAAC,EAAE,MAAM,CAAA;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,yBAA0B,SAAQ,mBAAmB;IACpE,qCAAqC;IACrC,QAAQ,CAAC,EAAE;QACT,KAAK,EAAE,MAAM,CAAA;QACb,QAAQ,EAAE,MAAM,CAAA;QAChB,MAAM,EAAE,MAAM,CAAA;QACd,IAAI,EAAE,MAAM,CAAA;KACb,EAAE,CAAA;IACH,sCAAsC;IACtC,aAAa,CAAC,EAAE,MAAM,CAAA;CACvB;AAED;;;;;;;GAOG;AACH,wBAAgB,yBAAyB,CACvC,OAAO,EAAE,mBAAmB,EAAE,EAC9B,QAAQ,EAAE,YAAY,EAAE,GACvB,yBAAyB,CAqF3B;AAED;;;;;;GAMG;AACH,wBAAgB,gBAAgB,CAC9B,MAAM,EAAE,yBAAyB,EACjC,cAAc,GAAE,OAAe,GAC9B,MAAM,CAeR"}
@@ -0,0 +1,106 @@
1
+ /**
2
+ * Audio splitting utilities for tranz-cli
3
+ * Provides silence detection and optimal split point calculation
4
+ */
5
+ /**
6
+ * Configuration for audio splitting
7
+ */
8
+ export interface SplitConfig {
9
+ /** Maximum segment duration in seconds (default: 600 = 10min) */
10
+ maxDurationSec: number;
11
+ /** Minimum silence duration to consider for split (default: 1.0s) */
12
+ minSilenceDurSec: number;
13
+ /** FFmpeg silence threshold (default: '-35dB') */
14
+ silenceThreshold: string;
15
+ /** Prefer longer silences for splits (default: true) */
16
+ preferLongerSilence: boolean;
17
+ /** Buffer to leave at silence edges (default: 0.2s) */
18
+ silenceBuffer: number;
19
+ }
20
+ /**
21
+ * A detected silence region in the audio
22
+ */
23
+ export interface SilenceRegion {
24
+ startSec: number;
25
+ endSec: number;
26
+ durationSec: number;
27
+ }
28
+ /**
29
+ * A calculated split point
30
+ */
31
+ export interface SplitPoint {
32
+ /** Time in seconds where to split (middle of silence) */
33
+ timeSec: number;
34
+ /** Duration of the silence at this split point */
35
+ silenceDuration: number;
36
+ }
37
+ /**
38
+ * An audio segment after splitting
39
+ */
40
+ export interface AudioSegment {
41
+ index: number;
42
+ startSec: number;
43
+ endSec: number;
44
+ durationSec: number;
45
+ outputPath: string;
46
+ }
47
+ /**
48
+ * Default split configuration
49
+ */
50
+ export declare const DEFAULT_SPLIT_CONFIG: SplitConfig;
51
+ /**
52
+ * Get the duration of an audio file in seconds
53
+ */
54
+ export declare function getAudioDuration(audioPath: string): Promise<number>;
55
+ /**
56
+ * Detect silence regions in an audio file using FFmpeg
57
+ * Uses spawn directly for better compatibility
58
+ */
59
+ export declare function detectSilenceRegions(audioPath: string, config?: Partial<SplitConfig>): Promise<SilenceRegion[]>;
60
+ /**
61
+ * Find optimal split points in audio based on silence regions
62
+ * Prefers splitting at longer silences when possible
63
+ *
64
+ * @param silenceRegions - Detected silence regions
65
+ * @param totalDuration - Total audio duration in seconds
66
+ * @param config - Split configuration
67
+ * @returns Array of optimal split points
68
+ */
69
+ export declare function findOptimalSplitPoints(silenceRegions: SilenceRegion[], totalDuration: number, config?: Partial<SplitConfig>): SplitPoint[];
70
+ /**
71
+ * Split audio file at specified points using FFmpeg
72
+ *
73
+ * @param audioPath - Path to source audio file
74
+ * @param splitPoints - Where to split the audio
75
+ * @param totalDuration - Total duration of source audio
76
+ * @param outputDir - Directory to write segments
77
+ * @param baseName - Base name for output files
78
+ * @returns Array of created audio segments
79
+ */
80
+ export declare function splitAudioAtPoints(audioPath: string, splitPoints: SplitPoint[], totalDuration: number, outputDir: string, baseName: string): Promise<AudioSegment[]>;
81
+ /**
82
+ * Auto-split an audio file if it exceeds the maximum duration
83
+ * Returns the original file path if no split is needed
84
+ *
85
+ * @param audioPath - Path to source audio file
86
+ * @param outputDir - Directory for split segments
87
+ * @param config - Split configuration
88
+ * @returns Array of audio segment paths (single element if no split needed)
89
+ */
90
+ export declare function autoSplitAudio(audioPath: string, outputDir: string, config?: Partial<SplitConfig>): Promise<AudioSegment[]>;
91
+ /**
92
+ * Information about split points for logging/debugging
93
+ */
94
+ export interface SplitAnalysis {
95
+ totalDuration: number;
96
+ numSegments: number;
97
+ splitPoints: SplitPoint[];
98
+ silenceRegions: SilenceRegion[];
99
+ needsSplit: boolean;
100
+ }
101
+ /**
102
+ * Analyze audio file and return split information without actually splitting
103
+ * Useful for preview/dry-run functionality
104
+ */
105
+ export declare function analyzeSplitPoints(audioPath: string, config?: Partial<SplitConfig>): Promise<SplitAnalysis>;
106
+ //# sourceMappingURL=split.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"split.d.ts","sourceRoot":"","sources":["../../../src/utils/audio/split.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAOH;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,iEAAiE;IACjE,cAAc,EAAE,MAAM,CAAA;IACtB,qEAAqE;IACrE,gBAAgB,EAAE,MAAM,CAAA;IACxB,kDAAkD;IAClD,gBAAgB,EAAE,MAAM,CAAA;IACxB,wDAAwD;IACxD,mBAAmB,EAAE,OAAO,CAAA;IAC5B,uDAAuD;IACvD,aAAa,EAAE,MAAM,CAAA;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,EAAE,MAAM,CAAA;IACd,WAAW,EAAE,MAAM,CAAA;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,yDAAyD;IACzD,OAAO,EAAE,MAAM,CAAA;IACf,kDAAkD;IAClD,eAAe,EAAE,MAAM,CAAA;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,MAAM,CAAA;IACb,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,EAAE,MAAM,CAAA;IACd,WAAW,EAAE,MAAM,CAAA;IACnB,UAAU,EAAE,MAAM,CAAA;CACnB;AAED;;GAEG;AACH,eAAO,MAAM,oBAAoB,EAAE,WAMlC,CAAA;AAED;;GAEG;AACH,wBAAsB,gBAAgB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAezE;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,CACxC,SAAS,EAAE,MAAM,EACjB,MAAM,GAAE,OAAO,CAAC,WAAW,CAAM,GAChC,OAAO,CAAC,aAAa,EAAE,CAAC,CAsD1B;AAED;;;;;;;;GAQG;AACH,wBAAgB,sBAAsB,CACpC,cAAc,EAAE,aAAa,EAAE,EAC/B,aAAa,EAAE,MAAM,EACrB,MAAM,GAAE,OAAO,CAAC,WAAW,CAAM,GAChC,UAAU,EAAE,CAwEd;AAED;;;;;;;;;GASG;AACH,wBAAsB,kBAAkB,CACtC,SAAS,EAAE,MAAM,EACjB,WAAW,EAAE,UAAU,EAAE,EACzB,aAAa,EAAE,MAAM,EACrB,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,YAAY,EAAE,CAAC,CA8CzB;AAED;;;;;;;;GAQG;AACH,wBAAsB,cAAc,CAClC,SAAS,EAAE,MAAM,EACjB,SAAS,EAAE,MAAM,EACjB,MAAM,GAAE,OAAO,CAAC,WAAW,CAAM,GAChC,OAAO,CAAC,YAAY,EAAE,CAAC,CAuCzB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,aAAa,EAAE,MAAM,CAAA;IACrB,WAAW,EAAE,MAAM,CAAA;IACnB,WAAW,EAAE,UAAU,EAAE,CAAA;IACzB,cAAc,EAAE,aAAa,EAAE,CAAA;IAC/B,UAAU,EAAE,OAAO,CAAA;CACpB;AAED;;;GAGG;AACH,wBAAsB,kBAAkB,CACtC,SAAS,EAAE,MAAM,EACjB,MAAM,GAAE,OAAO,CAAC,WAAW,CAAM,GAChC,OAAO,CAAC,aAAa,CAAC,CA0BxB"}
@@ -0,0 +1,6 @@
1
+ import path from 'path';
2
+ export declare const getExt: (filePath: string) => string;
3
+ export declare const getName: (filePath: string) => string;
4
+ export declare const getNameWithExt: (filePath: string) => string;
5
+ export declare const getFileInfo: (filePath: string) => path.ParsedPath;
6
+ //# sourceMappingURL=file-utils.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"file-utils.d.ts","sourceRoot":"","sources":["../../src/utils/file-utils.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAA;AAEvB,eAAO,MAAM,MAAM,GAAI,UAAU,MAAM,WAEtC,CAAA;AACD,eAAO,MAAM,OAAO,GAAI,UAAU,MAAM,WAEvC,CAAA;AACD,eAAO,MAAM,cAAc,GAAI,UAAU,MAAM,WAG9C,CAAA;AACD,eAAO,MAAM,WAAW,GAAI,UAAU,MAAM,oBAG3C,CAAA"}
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Format transcript with line breaks based on pauses between words
3
+ * @param transcript The punctuated transcript text from the API
4
+ * @param words Array of word objects with timing data
5
+ * @param shortPauseThreshold Threshold for single line break (default: 1.0s)
6
+ * @param longPauseThreshold Threshold for paragraph break/double newline (default: 5.0s)
7
+ */
8
+ export declare function formatTranscriptWithPauses(transcript: string, words: Array<{
9
+ word: string;
10
+ start: number;
11
+ end: number;
12
+ confidence: number;
13
+ }>, shortPauseThreshold?: number, longPauseThreshold?: number): string;
14
+ //# sourceMappingURL=format.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"format.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/format.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,wBAAgB,0BAA0B,CACzC,UAAU,EAAE,MAAM,EAClB,KAAK,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,CAAA;CAAE,CAAC,EAC9E,mBAAmB,SAAM,EACzB,kBAAkB,SAAM,GACtB,MAAM,CAgDR"}
@@ -0,0 +1,25 @@
1
+ import { Buffer } from 'node:buffer';
2
+ /**
3
+ * Detect audio format from buffer magic bytes
4
+ *
5
+ * @param buffer - The audio buffer to analyze
6
+ * @returns The detected MIME type string ('audio/mpeg', 'audio/ogg', 'audio/wav', 'audio/flac')
7
+ *
8
+ * @description
9
+ * This function analyzes the magic bytes at the beginning of an audio buffer to determine
10
+ * its format. It handles MP3 files with ID3 tags by skipping over them before checking
11
+ * for the actual audio frame header.
12
+ *
13
+ * Supported formats:
14
+ * - MP3 (MPEG Audio): FF FB, FF FA, FF F3, FF F2 with optional ID3 tag
15
+ * - OGG: "OggS" (4F 67 67 53)
16
+ * - WAV (RIFF): "RIFF" (52 49 46 46)
17
+ * - FLAC: "fLaC" (66 4C 61 43)
18
+ *
19
+ * @example
20
+ * const buffer = readFileSync('audio.mp3')
21
+ * const mimeType = detectAudioMimeType(buffer)
22
+ * console.log(mimeType) // 'audio/mpeg'
23
+ */
24
+ export declare function detectAudioMimeType(buffer: Buffer): string;
25
+ //# sourceMappingURL=mime-detection.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"mime-detection.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/mime-detection.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAA;AAEpC;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,wBAAgB,mBAAmB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAuD1D"}
@@ -0,0 +1,146 @@
1
+ /**
2
+ * Transcription provider types and interfaces
3
+ * Defines the contract for all transcription providers
4
+ */
5
+ /**
6
+ * Result object returned from transcription operations
7
+ * Contains the transcribed text and optional provider-specific metadata
8
+ */
9
+ export interface TranscriptionResult {
10
+ /** The transcribed text content */
11
+ text: string;
12
+ /** Raw response object from the provider (optional, for debugging) */
13
+ rawResponse?: any;
14
+ /** Error message if transcription failed */
15
+ error?: string;
16
+ /** Confidence score of the transcription (0-1) */
17
+ confidence?: number;
18
+ /** Word-level data (timing, confidence, etc.) */
19
+ words?: any[];
20
+ /** Duration of audio in seconds */
21
+ duration?: number;
22
+ /** Detected or specified language code */
23
+ language?: string;
24
+ /** Model used for transcription (as returned by provider) */
25
+ model?: string;
26
+ }
27
+ /**
28
+ * Interface that all transcription providers must implement
29
+ * Defines the standard contract for transcription functionality
30
+ */
31
+ export interface TranscriptionProvider {
32
+ /** Provider name/identifier */
33
+ name: string;
34
+ /** Maximum audio duration in seconds (undefined = no limit) */
35
+ maxAudioDurationSec?: number;
36
+ /**
37
+ * Transcribe audio from the given parameters
38
+ * @param params - Transcription parameters
39
+ * @returns Promise resolving to transcription result
40
+ */
41
+ transcribe(params: TranscribeParams): Promise<TranscriptionResult>;
42
+ }
43
+ /**
44
+ * Parameters for transcription operations
45
+ * Supports both common and provider-specific options
46
+ */
47
+ export interface TranscribeParams {
48
+ /** Path to the audio file to transcribe */
49
+ audioPath?: string;
50
+ /** Audio buffer to transcribe */
51
+ audioBuffer?: Buffer;
52
+ /** MIME type for audioBuffer (auto-detected if not provided) */
53
+ mimeType?: string;
54
+ /** URL to audio file (e.g., IPFS gateway URL) */
55
+ audioUrl?: string;
56
+ /** Model to use for transcription (provider-specific) */
57
+ model?: string;
58
+ /** Language code for transcription (e.g., 'en', 'fr') */
59
+ language?: string;
60
+ /** API key for authentication (provider-specific) */
61
+ apiKey?: string;
62
+ /** Enable speaker diarization (Whisper-specific) */
63
+ diarize?: boolean;
64
+ /** Timestamp granularity for transcription (Mistral-specific) */
65
+ timestampGranularity?: 'segment' | 'word';
66
+ /** Path to model file (Whisper-specific) */
67
+ modelPath?: string;
68
+ /** Output directory for results (Whisper-specific) */
69
+ outputDir?: string;
70
+ /** Provider configuration object */
71
+ config?: any;
72
+ }
73
+ /**
74
+ * Union type for supported provider names
75
+ */
76
+ export type ProviderName = 'whisper' | 'mistral' | 'greenpt';
77
+ /**
78
+ * Factory function to create a transcription provider instance
79
+ * @param providerName - Name of the provider to create
80
+ * @param config - Optional configuration object for the provider
81
+ * @returns Transcription provider instance
82
+ */
83
+ export declare function createProvider(providerName: ProviderName, config?: any): TranscriptionProvider;
84
+ /**
85
+ * Whisper provider for local whisper.cpp transcription
86
+ * Manages model caching and local transcription execution
87
+ */
88
+ export declare class WhisperProvider implements TranscriptionProvider {
89
+ name: string;
90
+ private cacheDir;
91
+ static DEFAULTS: {
92
+ DIARIZE: boolean;
93
+ SILDUR: string;
94
+ SILBUF: number;
95
+ SILTHR: string;
96
+ MODEL_KEYS: {
97
+ tinyd: string;
98
+ small: string;
99
+ medium: string;
100
+ };
101
+ MODELS: {
102
+ tinyd: string;
103
+ small: string;
104
+ medium: string;
105
+ };
106
+ };
107
+ constructor(config?: any);
108
+ transcribe(params: TranscribeParams): Promise<TranscriptionResult>;
109
+ private ensureRequestedModelIsCached;
110
+ }
111
+ /**
112
+ * Voxtral/Mistral API limits
113
+ * These may need adjustment based on actual API constraints
114
+ */
115
+ export declare const VOXTRAL_LIMITS: {
116
+ /** Maximum audio duration in seconds (3 hours for Voxtral Transcribe 2) */
117
+ maxAudioDurationSec: number;
118
+ /** Recommended max duration before splitting (for reliability) */
119
+ recommendedMaxDurationSec: number;
120
+ /** Maximum context biasing words/phrases */
121
+ maxContextBiasingTerms: number;
122
+ /** Maximum file size in bytes (1GB) */
123
+ maxFileSizeBytes: number;
124
+ };
125
+ export declare class MistralProvider implements TranscriptionProvider {
126
+ name: string;
127
+ maxAudioDurationSec: number;
128
+ /**
129
+ * Check if audio duration exceeds recommended limits
130
+ */
131
+ static shouldSplit(durationSec: number): boolean;
132
+ /**
133
+ * Get the recommended max segment duration for splitting
134
+ */
135
+ static getRecommendedMaxSegment(): number;
136
+ transcribe(params: TranscribeParams): Promise<TranscriptionResult>;
137
+ }
138
+ /**
139
+ * GreenPT transcription provider
140
+ * Uses GreenPT API for audio transcription with Deepgram-compatible response format
141
+ */
142
+ export declare class GreenPTProvider implements TranscriptionProvider {
143
+ name: string;
144
+ transcribe(params: TranscribeParams): Promise<TranscriptionResult>;
145
+ }
146
+ //# sourceMappingURL=providers.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"providers.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/providers.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAUH;;;GAGG;AACH,MAAM,WAAW,mBAAmB;IAClC,mCAAmC;IACnC,IAAI,EAAE,MAAM,CAAA;IACZ,sEAAsE;IACtE,WAAW,CAAC,EAAE,GAAG,CAAA;IACjB,4CAA4C;IAC5C,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,kDAAkD;IAClD,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,iDAAiD;IACjD,KAAK,CAAC,EAAE,GAAG,EAAE,CAAA;IACb,mCAAmC;IACnC,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,0CAA0C;IAC1C,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,6DAA6D;IAC7D,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AAED;;;GAGG;AACH,MAAM,WAAW,qBAAqB;IACpC,+BAA+B;IAC/B,IAAI,EAAE,MAAM,CAAA;IACZ,+DAA+D;IAC/D,mBAAmB,CAAC,EAAE,MAAM,CAAA;IAC5B;;;;OAIG;IACH,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAA;CACnE;AAED;;;GAGG;AACH,MAAM,WAAW,gBAAgB;IAC/B,2CAA2C;IAC3C,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,iCAAiC;IACjC,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,gEAAgE;IAChE,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,yDAAyD;IACzD,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,yDAAyD;IACzD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,qDAAqD;IACrD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,oDAAoD;IACpD,OAAO,CAAC,EAAE,OAAO,CAAA;IACjB,iEAAiE;IACjE,oBAAoB,CAAC,EAAE,SAAS,GAAG,MAAM,CAAA;IACzC,4CAA4C;IAC5C,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,sDAAsD;IACtD,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,oCAAoC;IACpC,MAAM,CAAC,EAAE,GAAG,CAAA;CACb;AAED;;GAEG;AACH,MAAM,MAAM,YAAY,GAAG,SAAS,GAAG,SAAS,GAAG,SAAS,CAAA;AAE5D;;;;;GAKG;AACH,wBAAgB,cAAc,CAAC,YAAY,EAAE,YAAY,EAAE,MAAM,CAAC,EAAE,GAAG,GAAG,qBAAqB,CAW9F;AASD;;;GAGG;AACH,qBAAa,eAAgB,YAAW,qBAAqB;IAC3D,IAAI,SAAY;IAEhB,OAAO,CAAC,QAAQ,CAAQ;IAExB,MAAM,CAAC,QAAQ;;;;;;;;;;;;;;;MAkBd;gBAEW,MAAM,CAAC,EAAE,GAAG;IAKlB,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC;YA+F1D,4BAA4B;CAkC3C;AAED;;;GAGG;AACH,eAAO,MAAM,cAAc;IACzB,2EAA2E;;IAE3E,kEAAkE;;IAElE,4CAA4C;;IAE5C,uCAAuC;;CAExC,CAAA;AAED,qBAAa,eAAgB,YAAW,qBAAqB;IAC3D,IAAI,SAAY;IAChB,mBAAmB,SAAqC;IAExD;;OAEG;IACH,MAAM,CAAC,WAAW,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO;IAIhD;;OAEG;IACH,MAAM,CAAC,wBAAwB,IAAI,MAAM;IAInC,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC;CA8FzE;AAED;;;GAGG;AACH,qBAAa,eAAgB,YAAW,qBAAqB;IAC3D,IAAI,SAAY;IAEV,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC;CAkGzE"}
@@ -0,0 +1,59 @@
1
+ /**
2
+ * Simple high-level transcription API with good defaults
3
+ */
4
+ import { type MergedTranscriptionResult } from '../audio/merge-results.js';
5
+ export interface TranscribeOptions {
6
+ /** Path to audio file */
7
+ audioPath?: string;
8
+ /** Audio buffer to transcribe */
9
+ audioBuffer?: Buffer;
10
+ /** MIME type for audioBuffer (auto-detected if not provided) */
11
+ mimeType?: string;
12
+ /** URL to audio file (e.g., IPFS gateway URL) */
13
+ audioUrl?: string;
14
+ /** Language code (e.g. 'en', 'fr') - note: disables word timestamps for Mistral */
15
+ language?: string;
16
+ /** Model to use (default: voxtral-mini-latest) */
17
+ model?: string;
18
+ /** Enable speaker diarization (default: true) */
19
+ diarize?: boolean;
20
+ /** Timestamp granularity: 'word' | 'segment' (default: 'word', disabled if language set) */
21
+ timestamps?: 'word' | 'segment';
22
+ /** Auto-split long audio (default: true, only works with audioPath) */
23
+ autoSplit?: boolean;
24
+ /** Output directory for split segments (default: system temp) */
25
+ splitOutputDir?: string;
26
+ }
27
+ export interface MistralTranscriberConfig {
28
+ /** Mistral API key */
29
+ apiKey: string;
30
+ /** Default model (default: voxtral-mini-latest) */
31
+ model?: string;
32
+ }
33
+ /**
34
+ * Simple Mistral transcriber with auto-splitting and good defaults
35
+ *
36
+ * @example
37
+ * ```ts
38
+ * const transcriber = createMistralTranscriber({ apiKey: process.env.MISTRAL_API_KEY })
39
+ *
40
+ * // From file (supports auto-split for long audio)
41
+ * const result = await transcriber.transcribe({ audioPath: './interview.mp3' })
42
+ *
43
+ * // From URL (e.g., IPFS gateway)
44
+ * const result = await transcriber.transcribe({ audioUrl: 'https://gateway.ipfs.io/ipfs/Qm...' })
45
+ *
46
+ * // From buffer
47
+ * const result = await transcriber.transcribe({ audioBuffer: buffer, mimeType: 'audio/mpeg' })
48
+ * ```
49
+ */
50
+ export declare function createMistralTranscriber(config: MistralTranscriberConfig): {
51
+ /**
52
+ * Transcribe audio with auto-splitting for long files (file path only)
53
+ * Diarization and word timestamps enabled by default
54
+ */
55
+ transcribe(options: TranscribeOptions): Promise<MergedTranscriptionResult>;
56
+ };
57
+ /** Alias for simpler import */
58
+ export declare const transcribe: typeof createMistralTranscriber;
59
+ //# sourceMappingURL=transcribe.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"transcribe.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/transcribe.ts"],"names":[],"mappings":"AAAA;;GAEG;AAOH,OAAO,EAA6B,KAAK,yBAAyB,EAAE,MAAM,2BAA2B,CAAA;AAErG,MAAM,WAAW,iBAAiB;IAChC,yBAAyB;IACzB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,iCAAiC;IACjC,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,gEAAgE;IAChE,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,mFAAmF;IACnF,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,kDAAkD;IAClD,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,iDAAiD;IACjD,OAAO,CAAC,EAAE,OAAO,CAAA;IACjB,4FAA4F;IAC5F,UAAU,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;IAC/B,uEAAuE;IACvE,SAAS,CAAC,EAAE,OAAO,CAAA;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAA;CACxB;AAED,MAAM,WAAW,wBAAwB;IACvC,sBAAsB;IACtB,MAAM,EAAE,MAAM,CAAA;IACd,mDAAmD;IACnD,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,wBAAwB;IAKrE;;;OAGG;wBACuB,iBAAiB,GAAG,OAAO,CAAC,yBAAyB,CAAC;EA4EnF;AAED,+BAA+B;AAC/B,eAAO,MAAM,UAAU,iCAA2B,CAAA"}
package/package.json ADDED
@@ -0,0 +1,62 @@
1
+ {
2
+ "name": "@wovin/tranz",
3
+ "version": "0.0.26",
4
+ "type": "module",
5
+ "description": "Audio transcription library with provider support and auto-splitting",
6
+ "author": "gotjoshua @gotjoshua",
7
+ "license": "ISC",
8
+ "homepage": "https://gitlab.com/onezoomin/ztax/tranz",
9
+ "repository": "https://gitlab.com/onezoomin/ztax/tranz",
10
+ "bugs": "https://gitlab.com/onezoomin/ztax/tranz/-/issues",
11
+ "main": "./dist/index.min.js",
12
+ "module": "./dist/index.min.js",
13
+ "types": "./dist/index.d.ts",
14
+ "exports": {
15
+ ".": {
16
+ "import": "./dist/index.min.js",
17
+ "types": "./dist/index.d.ts"
18
+ },
19
+ "./providers": {
20
+ "import": "./dist/providers.min.js",
21
+ "types": "./dist/providers.d.ts"
22
+ },
23
+ "./audio": {
24
+ "import": "./dist/audio.min.js",
25
+ "types": "./dist/audio.d.ts"
26
+ }
27
+ },
28
+ "files": [
29
+ "./dist/"
30
+ ],
31
+ "publishConfig": {
32
+ "access": "public"
33
+ },
34
+ "dependencies": {
35
+ "fluent-ffmpeg": "^2.1.2"
36
+ },
37
+ "devDependencies": {
38
+ "@types/fluent-ffmpeg": "^2.1.21",
39
+ "@types/node": "^24.10.1",
40
+ "concurrently": "^8.2.2",
41
+ "tsup": "^8.5.0",
42
+ "typescript": "^5.9.3",
43
+ "tsupconfig": "^0.0.0"
44
+ },
45
+ "keywords": [
46
+ "transcription",
47
+ "audio",
48
+ "mistral",
49
+ "whisper",
50
+ "voxtral"
51
+ ],
52
+ "scripts": {
53
+ "build": "rm -rf dist/ && concurrently \"pnpm build:code\" \"pnpm build:types\"",
54
+ "build:code": "tsup",
55
+ "build:types": "tsc --emitDeclarationOnly --declaration",
56
+ "dev": "concurrently \"pnpm dev:code\" \"pnpm dev:types\"",
57
+ "dev:code": "tsup --watch",
58
+ "dev:types": "tsc --emitDeclarationOnly --declaration --watch",
59
+ "clean": "rm -rf .turbo && rm -rf node_modules && rm -rf dist",
60
+ "pub": "npm publish --tag latest --access=public"
61
+ }
62
+ }