@wovin/tranz 0.0.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +661 -0
- package/dist/audio.d.ts +6 -0
- package/dist/audio.d.ts.map +1 -0
- package/dist/audio.min.js +302 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.min.js +769 -0
- package/dist/providers.d.ts +6 -0
- package/dist/providers.d.ts.map +1 -0
- package/dist/providers.min.js +681 -0
- package/dist/utils/audio/index.d.ts +6 -0
- package/dist/utils/audio/index.d.ts.map +1 -0
- package/dist/utils/audio/merge-results.d.ts +47 -0
- package/dist/utils/audio/merge-results.d.ts.map +1 -0
- package/dist/utils/audio/split.d.ts +106 -0
- package/dist/utils/audio/split.d.ts.map +1 -0
- package/dist/utils/file-utils.d.ts +6 -0
- package/dist/utils/file-utils.d.ts.map +1 -0
- package/dist/utils/transcription/format.d.ts +14 -0
- package/dist/utils/transcription/format.d.ts.map +1 -0
- package/dist/utils/transcription/mime-detection.d.ts +25 -0
- package/dist/utils/transcription/mime-detection.d.ts.map +1 -0
- package/dist/utils/transcription/providers.d.ts +146 -0
- package/dist/utils/transcription/providers.d.ts.map +1 -0
- package/dist/utils/transcription/transcribe.d.ts +59 -0
- package/dist/utils/transcription/transcribe.d.ts.map +1 -0
- package/package.json +62 -0
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Utilities for merging transcription results from split audio segments
|
|
3
|
+
*/
|
|
4
|
+
import type { TranscriptionResult } from '../transcription/providers.js';
|
|
5
|
+
import type { AudioSegment } from './split.js';
|
|
6
|
+
/**
|
|
7
|
+
* Word-level data with timing information
|
|
8
|
+
*/
|
|
9
|
+
export interface WordData {
|
|
10
|
+
word: string;
|
|
11
|
+
start: number;
|
|
12
|
+
end: number;
|
|
13
|
+
confidence?: number;
|
|
14
|
+
speaker?: string;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Merged transcription result with segment information
|
|
18
|
+
*/
|
|
19
|
+
export interface MergedTranscriptionResult extends TranscriptionResult {
|
|
20
|
+
/** Segment metadata for reference */
|
|
21
|
+
segments?: {
|
|
22
|
+
index: number;
|
|
23
|
+
startSec: number;
|
|
24
|
+
endSec: number;
|
|
25
|
+
text: string;
|
|
26
|
+
}[];
|
|
27
|
+
/** Total segments that were merged */
|
|
28
|
+
totalSegments?: number;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Merge multiple transcription results from audio segments into one
|
|
32
|
+
* Adjusts word timestamps to be relative to the original audio
|
|
33
|
+
*
|
|
34
|
+
* @param results - Array of transcription results from each segment
|
|
35
|
+
* @param segments - Array of audio segment metadata
|
|
36
|
+
* @returns Merged transcription result
|
|
37
|
+
*/
|
|
38
|
+
export declare function mergeTranscriptionResults(results: TranscriptionResult[], segments: AudioSegment[]): MergedTranscriptionResult;
|
|
39
|
+
/**
|
|
40
|
+
* Format merged results with optional segment markers in the text
|
|
41
|
+
*
|
|
42
|
+
* @param result - Merged transcription result
|
|
43
|
+
* @param includeMarkers - Whether to include [Segment N] markers
|
|
44
|
+
* @returns Formatted text
|
|
45
|
+
*/
|
|
46
|
+
export declare function formatMergedText(result: MergedTranscriptionResult, includeMarkers?: boolean): string;
|
|
47
|
+
//# sourceMappingURL=merge-results.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"merge-results.d.ts","sourceRoot":"","sources":["../../../src/utils/audio/merge-results.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,+BAA+B,CAAA;AACxE,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAE9C;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,MAAM,CAAA;IACb,GAAG,EAAE,MAAM,CAAA;IACX,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,OAAO,CAAC,EAAE,MAAM,CAAA;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,yBAA0B,SAAQ,mBAAmB;IACpE,qCAAqC;IACrC,QAAQ,CAAC,EAAE;QACT,KAAK,EAAE,MAAM,CAAA;QACb,QAAQ,EAAE,MAAM,CAAA;QAChB,MAAM,EAAE,MAAM,CAAA;QACd,IAAI,EAAE,MAAM,CAAA;KACb,EAAE,CAAA;IACH,sCAAsC;IACtC,aAAa,CAAC,EAAE,MAAM,CAAA;CACvB;AAED;;;;;;;GAOG;AACH,wBAAgB,yBAAyB,CACvC,OAAO,EAAE,mBAAmB,EAAE,EAC9B,QAAQ,EAAE,YAAY,EAAE,GACvB,yBAAyB,CAqF3B;AAED;;;;;;GAMG;AACH,wBAAgB,gBAAgB,CAC9B,MAAM,EAAE,yBAAyB,EACjC,cAAc,GAAE,OAAe,GAC9B,MAAM,CAeR"}
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Audio splitting utilities for tranz-cli
|
|
3
|
+
* Provides silence detection and optimal split point calculation
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Configuration for audio splitting
|
|
7
|
+
*/
|
|
8
|
+
export interface SplitConfig {
|
|
9
|
+
/** Maximum segment duration in seconds (default: 600 = 10min) */
|
|
10
|
+
maxDurationSec: number;
|
|
11
|
+
/** Minimum silence duration to consider for split (default: 1.0s) */
|
|
12
|
+
minSilenceDurSec: number;
|
|
13
|
+
/** FFmpeg silence threshold (default: '-35dB') */
|
|
14
|
+
silenceThreshold: string;
|
|
15
|
+
/** Prefer longer silences for splits (default: true) */
|
|
16
|
+
preferLongerSilence: boolean;
|
|
17
|
+
/** Buffer to leave at silence edges (default: 0.2s) */
|
|
18
|
+
silenceBuffer: number;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* A detected silence region in the audio
|
|
22
|
+
*/
|
|
23
|
+
export interface SilenceRegion {
|
|
24
|
+
startSec: number;
|
|
25
|
+
endSec: number;
|
|
26
|
+
durationSec: number;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* A calculated split point
|
|
30
|
+
*/
|
|
31
|
+
export interface SplitPoint {
|
|
32
|
+
/** Time in seconds where to split (middle of silence) */
|
|
33
|
+
timeSec: number;
|
|
34
|
+
/** Duration of the silence at this split point */
|
|
35
|
+
silenceDuration: number;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* An audio segment after splitting
|
|
39
|
+
*/
|
|
40
|
+
export interface AudioSegment {
|
|
41
|
+
index: number;
|
|
42
|
+
startSec: number;
|
|
43
|
+
endSec: number;
|
|
44
|
+
durationSec: number;
|
|
45
|
+
outputPath: string;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Default split configuration
|
|
49
|
+
*/
|
|
50
|
+
export declare const DEFAULT_SPLIT_CONFIG: SplitConfig;
|
|
51
|
+
/**
|
|
52
|
+
* Get the duration of an audio file in seconds
|
|
53
|
+
*/
|
|
54
|
+
export declare function getAudioDuration(audioPath: string): Promise<number>;
|
|
55
|
+
/**
|
|
56
|
+
* Detect silence regions in an audio file using FFmpeg
|
|
57
|
+
* Uses spawn directly for better compatibility
|
|
58
|
+
*/
|
|
59
|
+
export declare function detectSilenceRegions(audioPath: string, config?: Partial<SplitConfig>): Promise<SilenceRegion[]>;
|
|
60
|
+
/**
|
|
61
|
+
* Find optimal split points in audio based on silence regions
|
|
62
|
+
* Prefers splitting at longer silences when possible
|
|
63
|
+
*
|
|
64
|
+
* @param silenceRegions - Detected silence regions
|
|
65
|
+
* @param totalDuration - Total audio duration in seconds
|
|
66
|
+
* @param config - Split configuration
|
|
67
|
+
* @returns Array of optimal split points
|
|
68
|
+
*/
|
|
69
|
+
export declare function findOptimalSplitPoints(silenceRegions: SilenceRegion[], totalDuration: number, config?: Partial<SplitConfig>): SplitPoint[];
|
|
70
|
+
/**
|
|
71
|
+
* Split audio file at specified points using FFmpeg
|
|
72
|
+
*
|
|
73
|
+
* @param audioPath - Path to source audio file
|
|
74
|
+
* @param splitPoints - Where to split the audio
|
|
75
|
+
* @param totalDuration - Total duration of source audio
|
|
76
|
+
* @param outputDir - Directory to write segments
|
|
77
|
+
* @param baseName - Base name for output files
|
|
78
|
+
* @returns Array of created audio segments
|
|
79
|
+
*/
|
|
80
|
+
export declare function splitAudioAtPoints(audioPath: string, splitPoints: SplitPoint[], totalDuration: number, outputDir: string, baseName: string): Promise<AudioSegment[]>;
|
|
81
|
+
/**
|
|
82
|
+
* Auto-split an audio file if it exceeds the maximum duration
|
|
83
|
+
* Returns the original file path if no split is needed
|
|
84
|
+
*
|
|
85
|
+
* @param audioPath - Path to source audio file
|
|
86
|
+
* @param outputDir - Directory for split segments
|
|
87
|
+
* @param config - Split configuration
|
|
88
|
+
* @returns Array of audio segment paths (single element if no split needed)
|
|
89
|
+
*/
|
|
90
|
+
export declare function autoSplitAudio(audioPath: string, outputDir: string, config?: Partial<SplitConfig>): Promise<AudioSegment[]>;
|
|
91
|
+
/**
|
|
92
|
+
* Information about split points for logging/debugging
|
|
93
|
+
*/
|
|
94
|
+
export interface SplitAnalysis {
|
|
95
|
+
totalDuration: number;
|
|
96
|
+
numSegments: number;
|
|
97
|
+
splitPoints: SplitPoint[];
|
|
98
|
+
silenceRegions: SilenceRegion[];
|
|
99
|
+
needsSplit: boolean;
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Analyze audio file and return split information without actually splitting
|
|
103
|
+
* Useful for preview/dry-run functionality
|
|
104
|
+
*/
|
|
105
|
+
export declare function analyzeSplitPoints(audioPath: string, config?: Partial<SplitConfig>): Promise<SplitAnalysis>;
|
|
106
|
+
//# sourceMappingURL=split.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"split.d.ts","sourceRoot":"","sources":["../../../src/utils/audio/split.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAOH;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,iEAAiE;IACjE,cAAc,EAAE,MAAM,CAAA;IACtB,qEAAqE;IACrE,gBAAgB,EAAE,MAAM,CAAA;IACxB,kDAAkD;IAClD,gBAAgB,EAAE,MAAM,CAAA;IACxB,wDAAwD;IACxD,mBAAmB,EAAE,OAAO,CAAA;IAC5B,uDAAuD;IACvD,aAAa,EAAE,MAAM,CAAA;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,EAAE,MAAM,CAAA;IACd,WAAW,EAAE,MAAM,CAAA;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,yDAAyD;IACzD,OAAO,EAAE,MAAM,CAAA;IACf,kDAAkD;IAClD,eAAe,EAAE,MAAM,CAAA;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,MAAM,CAAA;IACb,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,EAAE,MAAM,CAAA;IACd,WAAW,EAAE,MAAM,CAAA;IACnB,UAAU,EAAE,MAAM,CAAA;CACnB;AAED;;GAEG;AACH,eAAO,MAAM,oBAAoB,EAAE,WAMlC,CAAA;AAED;;GAEG;AACH,wBAAsB,gBAAgB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAezE;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,CACxC,SAAS,EAAE,MAAM,EACjB,MAAM,GAAE,OAAO,CAAC,WAAW,CAAM,GAChC,OAAO,CAAC,aAAa,EAAE,CAAC,CAsD1B;AAED;;;;;;;;GAQG;AACH,wBAAgB,sBAAsB,CACpC,cAAc,EAAE,aAAa,EAAE,EAC/B,aAAa,EAAE,MAAM,EACrB,MAAM,GAAE,OAAO,CAAC,WAAW,CAAM,GAChC,UAAU,EAAE,CAwEd;AAED;;;;;;;;;GASG;AACH,wBAAsB,kBAAkB,CACtC,SAAS,EAAE,MAAM,EACjB,WAAW,EAAE,UAAU,EAAE,EACzB,aAAa,EAAE,MAAM,EACrB,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,YAAY,EAAE,CAAC,CA8CzB;AAED;;;;;;;;GAQG;AACH,wBAAsB,cAAc,CAClC,SAAS,EAAE,MAAM,EACjB,SAAS,EAAE,MAAM,EACjB,MAAM,GAAE,OAAO,CAAC,WAAW,CAAM,GAChC,OAAO,CAAC,YAAY,EAAE,CAAC,CAuCzB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,aAAa,EAAE,MAAM,CAAA;IACrB,WAAW,EAAE,MAAM,CAAA;IACnB,WAAW,EAAE,UAAU,EAAE,CAAA;IACzB,cAAc,EAAE,aAAa,EAAE,CAAA;IAC/B,UAAU,EAAE,OAAO,CAAA;CACpB;AAED;;;GAGG;AACH,wBAAsB,kBAAkB,CACtC,SAAS,EAAE,MAAM,EACjB,MAAM,GAAE,OAAO,CAAC,WAAW,CAAM,GAChC,OAAO,CAAC,aAAa,CAAC,CA0BxB"}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import path from 'path';
|
|
2
|
+
export declare const getExt: (filePath: string) => string;
|
|
3
|
+
export declare const getName: (filePath: string) => string;
|
|
4
|
+
export declare const getNameWithExt: (filePath: string) => string;
|
|
5
|
+
export declare const getFileInfo: (filePath: string) => path.ParsedPath;
|
|
6
|
+
//# sourceMappingURL=file-utils.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"file-utils.d.ts","sourceRoot":"","sources":["../../src/utils/file-utils.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAA;AAEvB,eAAO,MAAM,MAAM,GAAI,UAAU,MAAM,WAEtC,CAAA;AACD,eAAO,MAAM,OAAO,GAAI,UAAU,MAAM,WAEvC,CAAA;AACD,eAAO,MAAM,cAAc,GAAI,UAAU,MAAM,WAG9C,CAAA;AACD,eAAO,MAAM,WAAW,GAAI,UAAU,MAAM,oBAG3C,CAAA"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Format transcript with line breaks based on pauses between words
|
|
3
|
+
* @param transcript The punctuated transcript text from the API
|
|
4
|
+
* @param words Array of word objects with timing data
|
|
5
|
+
* @param shortPauseThreshold Threshold for single line break (default: 1.0s)
|
|
6
|
+
* @param longPauseThreshold Threshold for paragraph break/double newline (default: 5.0s)
|
|
7
|
+
*/
|
|
8
|
+
export declare function formatTranscriptWithPauses(transcript: string, words: Array<{
|
|
9
|
+
word: string;
|
|
10
|
+
start: number;
|
|
11
|
+
end: number;
|
|
12
|
+
confidence: number;
|
|
13
|
+
}>, shortPauseThreshold?: number, longPauseThreshold?: number): string;
|
|
14
|
+
//# sourceMappingURL=format.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"format.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/format.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,wBAAgB,0BAA0B,CACzC,UAAU,EAAE,MAAM,EAClB,KAAK,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,CAAA;CAAE,CAAC,EAC9E,mBAAmB,SAAM,EACzB,kBAAkB,SAAM,GACtB,MAAM,CAgDR"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { Buffer } from 'node:buffer';
|
|
2
|
+
/**
|
|
3
|
+
* Detect audio format from buffer magic bytes
|
|
4
|
+
*
|
|
5
|
+
* @param buffer - The audio buffer to analyze
|
|
6
|
+
* @returns The detected MIME type string ('audio/mpeg', 'audio/ogg', 'audio/wav', 'audio/flac')
|
|
7
|
+
*
|
|
8
|
+
* @description
|
|
9
|
+
* This function analyzes the magic bytes at the beginning of an audio buffer to determine
|
|
10
|
+
* its format. It handles MP3 files with ID3 tags by skipping over them before checking
|
|
11
|
+
* for the actual audio frame header.
|
|
12
|
+
*
|
|
13
|
+
* Supported formats:
|
|
14
|
+
* - MP3 (MPEG Audio): FF FB, FF FA, FF F3, FF F2 with optional ID3 tag
|
|
15
|
+
* - OGG: "OggS" (4F 67 67 53)
|
|
16
|
+
* - WAV (RIFF): "RIFF" (52 49 46 46)
|
|
17
|
+
* - FLAC: "fLaC" (66 4C 61 43)
|
|
18
|
+
*
|
|
19
|
+
* @example
|
|
20
|
+
* const buffer = readFileSync('audio.mp3')
|
|
21
|
+
* const mimeType = detectAudioMimeType(buffer)
|
|
22
|
+
* console.log(mimeType) // 'audio/mpeg'
|
|
23
|
+
*/
|
|
24
|
+
export declare function detectAudioMimeType(buffer: Buffer): string;
|
|
25
|
+
//# sourceMappingURL=mime-detection.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mime-detection.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/mime-detection.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAA;AAEpC;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,wBAAgB,mBAAmB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAuD1D"}
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Transcription provider types and interfaces
|
|
3
|
+
* Defines the contract for all transcription providers
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Result object returned from transcription operations
|
|
7
|
+
* Contains the transcribed text and optional provider-specific metadata
|
|
8
|
+
*/
|
|
9
|
+
export interface TranscriptionResult {
|
|
10
|
+
/** The transcribed text content */
|
|
11
|
+
text: string;
|
|
12
|
+
/** Raw response object from the provider (optional, for debugging) */
|
|
13
|
+
rawResponse?: any;
|
|
14
|
+
/** Error message if transcription failed */
|
|
15
|
+
error?: string;
|
|
16
|
+
/** Confidence score of the transcription (0-1) */
|
|
17
|
+
confidence?: number;
|
|
18
|
+
/** Word-level data (timing, confidence, etc.) */
|
|
19
|
+
words?: any[];
|
|
20
|
+
/** Duration of audio in seconds */
|
|
21
|
+
duration?: number;
|
|
22
|
+
/** Detected or specified language code */
|
|
23
|
+
language?: string;
|
|
24
|
+
/** Model used for transcription (as returned by provider) */
|
|
25
|
+
model?: string;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Interface that all transcription providers must implement
|
|
29
|
+
* Defines the standard contract for transcription functionality
|
|
30
|
+
*/
|
|
31
|
+
export interface TranscriptionProvider {
|
|
32
|
+
/** Provider name/identifier */
|
|
33
|
+
name: string;
|
|
34
|
+
/** Maximum audio duration in seconds (undefined = no limit) */
|
|
35
|
+
maxAudioDurationSec?: number;
|
|
36
|
+
/**
|
|
37
|
+
* Transcribe audio from the given parameters
|
|
38
|
+
* @param params - Transcription parameters
|
|
39
|
+
* @returns Promise resolving to transcription result
|
|
40
|
+
*/
|
|
41
|
+
transcribe(params: TranscribeParams): Promise<TranscriptionResult>;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Parameters for transcription operations
|
|
45
|
+
* Supports both common and provider-specific options
|
|
46
|
+
*/
|
|
47
|
+
export interface TranscribeParams {
|
|
48
|
+
/** Path to the audio file to transcribe */
|
|
49
|
+
audioPath?: string;
|
|
50
|
+
/** Audio buffer to transcribe */
|
|
51
|
+
audioBuffer?: Buffer;
|
|
52
|
+
/** MIME type for audioBuffer (auto-detected if not provided) */
|
|
53
|
+
mimeType?: string;
|
|
54
|
+
/** URL to audio file (e.g., IPFS gateway URL) */
|
|
55
|
+
audioUrl?: string;
|
|
56
|
+
/** Model to use for transcription (provider-specific) */
|
|
57
|
+
model?: string;
|
|
58
|
+
/** Language code for transcription (e.g., 'en', 'fr') */
|
|
59
|
+
language?: string;
|
|
60
|
+
/** API key for authentication (provider-specific) */
|
|
61
|
+
apiKey?: string;
|
|
62
|
+
/** Enable speaker diarization (Whisper-specific) */
|
|
63
|
+
diarize?: boolean;
|
|
64
|
+
/** Timestamp granularity for transcription (Mistral-specific) */
|
|
65
|
+
timestampGranularity?: 'segment' | 'word';
|
|
66
|
+
/** Path to model file (Whisper-specific) */
|
|
67
|
+
modelPath?: string;
|
|
68
|
+
/** Output directory for results (Whisper-specific) */
|
|
69
|
+
outputDir?: string;
|
|
70
|
+
/** Provider configuration object */
|
|
71
|
+
config?: any;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Union type for supported provider names
|
|
75
|
+
*/
|
|
76
|
+
export type ProviderName = 'whisper' | 'mistral' | 'greenpt';
|
|
77
|
+
/**
|
|
78
|
+
* Factory function to create a transcription provider instance
|
|
79
|
+
* @param providerName - Name of the provider to create
|
|
80
|
+
* @param config - Optional configuration object for the provider
|
|
81
|
+
* @returns Transcription provider instance
|
|
82
|
+
*/
|
|
83
|
+
export declare function createProvider(providerName: ProviderName, config?: any): TranscriptionProvider;
|
|
84
|
+
/**
|
|
85
|
+
* Whisper provider for local whisper.cpp transcription
|
|
86
|
+
* Manages model caching and local transcription execution
|
|
87
|
+
*/
|
|
88
|
+
export declare class WhisperProvider implements TranscriptionProvider {
|
|
89
|
+
name: string;
|
|
90
|
+
private cacheDir;
|
|
91
|
+
static DEFAULTS: {
|
|
92
|
+
DIARIZE: boolean;
|
|
93
|
+
SILDUR: string;
|
|
94
|
+
SILBUF: number;
|
|
95
|
+
SILTHR: string;
|
|
96
|
+
MODEL_KEYS: {
|
|
97
|
+
tinyd: string;
|
|
98
|
+
small: string;
|
|
99
|
+
medium: string;
|
|
100
|
+
};
|
|
101
|
+
MODELS: {
|
|
102
|
+
tinyd: string;
|
|
103
|
+
small: string;
|
|
104
|
+
medium: string;
|
|
105
|
+
};
|
|
106
|
+
};
|
|
107
|
+
constructor(config?: any);
|
|
108
|
+
transcribe(params: TranscribeParams): Promise<TranscriptionResult>;
|
|
109
|
+
private ensureRequestedModelIsCached;
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Voxtral/Mistral API limits
|
|
113
|
+
* These may need adjustment based on actual API constraints
|
|
114
|
+
*/
|
|
115
|
+
export declare const VOXTRAL_LIMITS: {
|
|
116
|
+
/** Maximum audio duration in seconds (3 hours for Voxtral Transcribe 2) */
|
|
117
|
+
maxAudioDurationSec: number;
|
|
118
|
+
/** Recommended max duration before splitting (for reliability) */
|
|
119
|
+
recommendedMaxDurationSec: number;
|
|
120
|
+
/** Maximum context biasing words/phrases */
|
|
121
|
+
maxContextBiasingTerms: number;
|
|
122
|
+
/** Maximum file size in bytes (1GB) */
|
|
123
|
+
maxFileSizeBytes: number;
|
|
124
|
+
};
|
|
125
|
+
export declare class MistralProvider implements TranscriptionProvider {
|
|
126
|
+
name: string;
|
|
127
|
+
maxAudioDurationSec: number;
|
|
128
|
+
/**
|
|
129
|
+
* Check if audio duration exceeds recommended limits
|
|
130
|
+
*/
|
|
131
|
+
static shouldSplit(durationSec: number): boolean;
|
|
132
|
+
/**
|
|
133
|
+
* Get the recommended max segment duration for splitting
|
|
134
|
+
*/
|
|
135
|
+
static getRecommendedMaxSegment(): number;
|
|
136
|
+
transcribe(params: TranscribeParams): Promise<TranscriptionResult>;
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* GreenPT transcription provider
|
|
140
|
+
* Uses GreenPT API for audio transcription with Deepgram-compatible response format
|
|
141
|
+
*/
|
|
142
|
+
export declare class GreenPTProvider implements TranscriptionProvider {
|
|
143
|
+
name: string;
|
|
144
|
+
transcribe(params: TranscribeParams): Promise<TranscriptionResult>;
|
|
145
|
+
}
|
|
146
|
+
//# sourceMappingURL=providers.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"providers.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/providers.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAUH;;;GAGG;AACH,MAAM,WAAW,mBAAmB;IAClC,mCAAmC;IACnC,IAAI,EAAE,MAAM,CAAA;IACZ,sEAAsE;IACtE,WAAW,CAAC,EAAE,GAAG,CAAA;IACjB,4CAA4C;IAC5C,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,kDAAkD;IAClD,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,iDAAiD;IACjD,KAAK,CAAC,EAAE,GAAG,EAAE,CAAA;IACb,mCAAmC;IACnC,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,0CAA0C;IAC1C,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,6DAA6D;IAC7D,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AAED;;;GAGG;AACH,MAAM,WAAW,qBAAqB;IACpC,+BAA+B;IAC/B,IAAI,EAAE,MAAM,CAAA;IACZ,+DAA+D;IAC/D,mBAAmB,CAAC,EAAE,MAAM,CAAA;IAC5B;;;;OAIG;IACH,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAA;CACnE;AAED;;;GAGG;AACH,MAAM,WAAW,gBAAgB;IAC/B,2CAA2C;IAC3C,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,iCAAiC;IACjC,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,gEAAgE;IAChE,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,yDAAyD;IACzD,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,yDAAyD;IACzD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,qDAAqD;IACrD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,oDAAoD;IACpD,OAAO,CAAC,EAAE,OAAO,CAAA;IACjB,iEAAiE;IACjE,oBAAoB,CAAC,EAAE,SAAS,GAAG,MAAM,CAAA;IACzC,4CAA4C;IAC5C,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,sDAAsD;IACtD,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,oCAAoC;IACpC,MAAM,CAAC,EAAE,GAAG,CAAA;CACb;AAED;;GAEG;AACH,MAAM,MAAM,YAAY,GAAG,SAAS,GAAG,SAAS,GAAG,SAAS,CAAA;AAE5D;;;;;GAKG;AACH,wBAAgB,cAAc,CAAC,YAAY,EAAE,YAAY,EAAE,MAAM,CAAC,EAAE,GAAG,GAAG,qBAAqB,CAW9F;AASD;;;GAGG;AACH,qBAAa,eAAgB,YAAW,qBAAqB;IAC3D,IAAI,SAAY;IAEhB,OAAO,CAAC,QAAQ,CAAQ;IAExB,MAAM,CAAC,QAAQ;;;;;;;;;;;;;;;MAkBd;gBAEW,MAAM,CAAC,EAAE,GAAG;IAKlB,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC;YA+F1D,4BAA4B;CAkC3C;AAED;;;GAGG;AACH,eAAO,MAAM,cAAc;IACzB,2EAA2E;;IAE3E,kEAAkE;;IAElE,4CAA4C;;IAE5C,uCAAuC;;CAExC,CAAA;AAED,qBAAa,eAAgB,YAAW,qBAAqB;IAC3D,IAAI,SAAY;IAChB,mBAAmB,SAAqC;IAExD;;OAEG;IACH,MAAM,CAAC,WAAW,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO;IAIhD;;OAEG;IACH,MAAM,CAAC,wBAAwB,IAAI,MAAM;IAInC,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC;CA8FzE;AAED;;;GAGG;AACH,qBAAa,eAAgB,YAAW,qBAAqB;IAC3D,IAAI,SAAY;IAEV,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC;CAkGzE"}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Simple high-level transcription API with good defaults
|
|
3
|
+
*/
|
|
4
|
+
import { type MergedTranscriptionResult } from '../audio/merge-results.js';
|
|
5
|
+
export interface TranscribeOptions {
|
|
6
|
+
/** Path to audio file */
|
|
7
|
+
audioPath?: string;
|
|
8
|
+
/** Audio buffer to transcribe */
|
|
9
|
+
audioBuffer?: Buffer;
|
|
10
|
+
/** MIME type for audioBuffer (auto-detected if not provided) */
|
|
11
|
+
mimeType?: string;
|
|
12
|
+
/** URL to audio file (e.g., IPFS gateway URL) */
|
|
13
|
+
audioUrl?: string;
|
|
14
|
+
/** Language code (e.g. 'en', 'fr') - note: disables word timestamps for Mistral */
|
|
15
|
+
language?: string;
|
|
16
|
+
/** Model to use (default: voxtral-mini-latest) */
|
|
17
|
+
model?: string;
|
|
18
|
+
/** Enable speaker diarization (default: true) */
|
|
19
|
+
diarize?: boolean;
|
|
20
|
+
/** Timestamp granularity: 'word' | 'segment' (default: 'word', disabled if language set) */
|
|
21
|
+
timestamps?: 'word' | 'segment';
|
|
22
|
+
/** Auto-split long audio (default: true, only works with audioPath) */
|
|
23
|
+
autoSplit?: boolean;
|
|
24
|
+
/** Output directory for split segments (default: system temp) */
|
|
25
|
+
splitOutputDir?: string;
|
|
26
|
+
}
|
|
27
|
+
export interface MistralTranscriberConfig {
|
|
28
|
+
/** Mistral API key */
|
|
29
|
+
apiKey: string;
|
|
30
|
+
/** Default model (default: voxtral-mini-latest) */
|
|
31
|
+
model?: string;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Simple Mistral transcriber with auto-splitting and good defaults
|
|
35
|
+
*
|
|
36
|
+
* @example
|
|
37
|
+
* ```ts
|
|
38
|
+
* const transcriber = createMistralTranscriber({ apiKey: process.env.MISTRAL_API_KEY })
|
|
39
|
+
*
|
|
40
|
+
* // From file (supports auto-split for long audio)
|
|
41
|
+
* const result = await transcriber.transcribe({ audioPath: './interview.mp3' })
|
|
42
|
+
*
|
|
43
|
+
* // From URL (e.g., IPFS gateway)
|
|
44
|
+
* const result = await transcriber.transcribe({ audioUrl: 'https://gateway.ipfs.io/ipfs/Qm...' })
|
|
45
|
+
*
|
|
46
|
+
* // From buffer
|
|
47
|
+
* const result = await transcriber.transcribe({ audioBuffer: buffer, mimeType: 'audio/mpeg' })
|
|
48
|
+
* ```
|
|
49
|
+
*/
|
|
50
|
+
export declare function createMistralTranscriber(config: MistralTranscriberConfig): {
|
|
51
|
+
/**
|
|
52
|
+
* Transcribe audio with auto-splitting for long files (file path only)
|
|
53
|
+
* Diarization and word timestamps enabled by default
|
|
54
|
+
*/
|
|
55
|
+
transcribe(options: TranscribeOptions): Promise<MergedTranscriptionResult>;
|
|
56
|
+
};
|
|
57
|
+
/** Alias for simpler import */
|
|
58
|
+
export declare const transcribe: typeof createMistralTranscriber;
|
|
59
|
+
//# sourceMappingURL=transcribe.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"transcribe.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/transcribe.ts"],"names":[],"mappings":"AAAA;;GAEG;AAOH,OAAO,EAA6B,KAAK,yBAAyB,EAAE,MAAM,2BAA2B,CAAA;AAErG,MAAM,WAAW,iBAAiB;IAChC,yBAAyB;IACzB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,iCAAiC;IACjC,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,gEAAgE;IAChE,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,mFAAmF;IACnF,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,kDAAkD;IAClD,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,iDAAiD;IACjD,OAAO,CAAC,EAAE,OAAO,CAAA;IACjB,4FAA4F;IAC5F,UAAU,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;IAC/B,uEAAuE;IACvE,SAAS,CAAC,EAAE,OAAO,CAAA;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAA;CACxB;AAED,MAAM,WAAW,wBAAwB;IACvC,sBAAsB;IACtB,MAAM,EAAE,MAAM,CAAA;IACd,mDAAmD;IACnD,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,wBAAwB;IAKrE;;;OAGG;wBACuB,iBAAiB,GAAG,OAAO,CAAC,yBAAyB,CAAC;EA4EnF;AAED,+BAA+B;AAC/B,eAAO,MAAM,UAAU,iCAA2B,CAAA"}
|
package/package.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@wovin/tranz",
|
|
3
|
+
"version": "0.0.26",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"description": "Audio transcription library with provider support and auto-splitting",
|
|
6
|
+
"author": "gotjoshua @gotjoshua",
|
|
7
|
+
"license": "ISC",
|
|
8
|
+
"homepage": "https://gitlab.com/onezoomin/ztax/tranz",
|
|
9
|
+
"repository": "https://gitlab.com/onezoomin/ztax/tranz",
|
|
10
|
+
"bugs": "https://gitlab.com/onezoomin/ztax/tranz/-/issues",
|
|
11
|
+
"main": "./dist/index.min.js",
|
|
12
|
+
"module": "./dist/index.min.js",
|
|
13
|
+
"types": "./dist/index.d.ts",
|
|
14
|
+
"exports": {
|
|
15
|
+
".": {
|
|
16
|
+
"import": "./dist/index.min.js",
|
|
17
|
+
"types": "./dist/index.d.ts"
|
|
18
|
+
},
|
|
19
|
+
"./providers": {
|
|
20
|
+
"import": "./dist/providers.min.js",
|
|
21
|
+
"types": "./dist/providers.d.ts"
|
|
22
|
+
},
|
|
23
|
+
"./audio": {
|
|
24
|
+
"import": "./dist/audio.min.js",
|
|
25
|
+
"types": "./dist/audio.d.ts"
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
"files": [
|
|
29
|
+
"./dist/"
|
|
30
|
+
],
|
|
31
|
+
"publishConfig": {
|
|
32
|
+
"access": "public"
|
|
33
|
+
},
|
|
34
|
+
"dependencies": {
|
|
35
|
+
"fluent-ffmpeg": "^2.1.2"
|
|
36
|
+
},
|
|
37
|
+
"devDependencies": {
|
|
38
|
+
"@types/fluent-ffmpeg": "^2.1.21",
|
|
39
|
+
"@types/node": "^24.10.1",
|
|
40
|
+
"concurrently": "^8.2.2",
|
|
41
|
+
"tsup": "^8.5.0",
|
|
42
|
+
"typescript": "^5.9.3",
|
|
43
|
+
"tsupconfig": "^0.0.0"
|
|
44
|
+
},
|
|
45
|
+
"keywords": [
|
|
46
|
+
"transcription",
|
|
47
|
+
"audio",
|
|
48
|
+
"mistral",
|
|
49
|
+
"whisper",
|
|
50
|
+
"voxtral"
|
|
51
|
+
],
|
|
52
|
+
"scripts": {
|
|
53
|
+
"build": "rm -rf dist/ && concurrently \"pnpm build:code\" \"pnpm build:types\"",
|
|
54
|
+
"build:code": "tsup",
|
|
55
|
+
"build:types": "tsc --emitDeclarationOnly --declaration",
|
|
56
|
+
"dev": "concurrently \"pnpm dev:code\" \"pnpm dev:types\"",
|
|
57
|
+
"dev:code": "tsup --watch",
|
|
58
|
+
"dev:types": "tsc --emitDeclarationOnly --declaration --watch",
|
|
59
|
+
"clean": "rm -rf .turbo && rm -rf node_modules && rm -rf dist",
|
|
60
|
+
"pub": "npm publish --tag latest --access=public"
|
|
61
|
+
}
|
|
62
|
+
}
|