@wovin/tranz 0.0.26 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE CHANGED
@@ -630,11 +630,11 @@ state the exclusion of warranty; and each file should have at least
630
630
  the "copyright" line and a pointer to where the full notice is found.
631
631
 
632
632
  <one line to give the program's name and a brief idea of what it does.>
633
- Copyright (C) 2024 Manu [tennox]
633
+ Copyright (C) <year> <name of author>
634
634
 
635
635
  This program is free software: you can redistribute it and/or modify
636
- it under the terms of the GNU Affero General Public License as published
637
- by the Free Software Foundation, either version 3 of the License, or
636
+ it under the terms of the GNU Affero General Public License as published by
637
+ the Free Software Foundation, either version 3 of the License, or
638
638
  (at your option) any later version.
639
639
 
640
640
  This program is distributed in the hope that it will be useful,
package/README.md ADDED
@@ -0,0 +1,252 @@
1
+ # @wovin/tranz
2
+
3
+ Audio transcription library with provider support and auto-splitting for long audio files.
4
+
5
+ [![JSR](https://jsr.io/badges/@wovin/tranz)](https://jsr.io/@wovin/tranz)
6
+ [![JSR Score](https://jsr.io/badges/@wovin/tranz/score)](https://jsr.io/@wovin/tranz)
7
+
8
+ ## Features
9
+
10
+ - **Multiple Transcription Providers**: Mistral Voxtral, Whisper, GreenPT
11
+ - **Automatic Audio Splitting**: Handles long audio files by intelligently splitting at silence points
12
+ - **Smart Input Support**: Files, URLs (with HTTP range probing), or buffers
13
+ - **Speaker Diarization**: Identify different speakers in audio
14
+ - **Flexible Timestamps**: Word-level or segment-level timing
15
+ - **Result Merging**: Automatically merge split segment results with accurate timing
16
+
17
+ ## Installation
18
+
19
+ ```sh
20
+ # npm
21
+ npm install @wovin/tranz
22
+
23
+ # pnpm
24
+ pnpm add @wovin/tranz
25
+
26
+ # yarn
27
+ yarn add @wovin/tranz
28
+
29
+ # deno
30
+ deno add @wovin/tranz
31
+
32
+ # jsr (for any runtime)
33
+ npx jsr add @wovin/tranz
34
+ ```
35
+
36
+ ## Quick Start
37
+
38
+ ```typescript
39
+ import { createMistralTranscriber } from '@wovin/tranz'
40
+
41
+ // Create a transcriber instance
42
+ const transcriber = createMistralTranscriber({
43
+ apiKey: process.env.MISTRAL_API_KEY,
44
+ model: 'voxtral-mini-latest'
45
+ })
46
+
47
+ // Transcribe from file (auto-splits if too long)
48
+ const result = await transcriber.transcribe({
49
+ audioPath: './interview.mp3',
50
+ diarize: true,
51
+ timestamps: 'word'
52
+ })
53
+
54
+ console.log(result.text)
55
+ console.log(result.words) // word-level timestamps
56
+ console.log(result.speakers) // speaker segments
57
+ ```
58
+
59
+ ## Usage Examples
60
+
61
+ ### Transcribe from URL
62
+
63
+ ```typescript
64
+ // Smart handling: probes duration via HTTP, downloads only if splitting needed
65
+ const result = await transcriber.transcribe({
66
+ audioUrl: 'https://example.com/audio.mp3'
67
+ })
68
+
69
+ // If you know the duration, skip detection
70
+ const result = await transcriber.transcribe({
71
+ audioUrl: 'https://example.com/audio.mp3',
72
+ duration: 120 // seconds
73
+ })
74
+ ```
75
+
76
+ ### Transcribe from Buffer
77
+
78
+ ```typescript
79
+ const audioBuffer = fs.readFileSync('./audio.mp3')
80
+ const result = await transcriber.transcribe({
81
+ audioBuffer,
82
+ mimeType: 'audio/mpeg'
83
+ })
84
+ ```
85
+
86
+ ### Control Auto-Splitting
87
+
88
+ ```typescript
89
+ // Disable auto-split (use for short audio)
90
+ const result = await transcriber.transcribe({
91
+ audioPath: './short-clip.mp3',
92
+ autoSplit: false
93
+ })
94
+
95
+ // Specify custom split output directory
96
+ const result = await transcriber.transcribe({
97
+ audioPath: './long-audio.mp3',
98
+ splitOutputDir: './segments'
99
+ })
100
+ ```
101
+
102
+ ### Language Specification
103
+
104
+ ```typescript
105
+ // Note: setting language disables word-level timestamps for Mistral
106
+ const result = await transcriber.transcribe({
107
+ audioPath: './french-audio.mp3',
108
+ language: 'fr',
109
+ timestamps: 'segment'
110
+ })
111
+ ```
112
+
113
+ ### Custom Logging
114
+
115
+ ```typescript
116
+ const result = await transcriber.transcribe({
117
+ audioPath: './audio.mp3',
118
+ logger: {
119
+ info: (msg) => console.log(`[INFO] ${msg}`),
120
+ warn: (msg) => console.warn(`[WARN] ${msg}`),
121
+ debug: (msg) => console.debug(`[DEBUG] ${msg}`)
122
+ },
123
+ verbose: true // promotes debug logs to info level
124
+ })
125
+ ```
126
+
127
+ ## Advanced: Using Providers Directly
128
+
129
+ ```typescript
130
+ import { MistralProvider, WhisperProvider } from '@wovin/tranz/providers'
131
+
132
+ // Mistral provider
133
+ const mistral = new MistralProvider()
134
+ const result = await mistral.transcribe({
135
+ audioPath: './audio.mp3',
136
+ apiKey: process.env.MISTRAL_API_KEY,
137
+ model: 'voxtral-mini-latest',
138
+ diarize: true,
139
+ timestampGranularity: 'word'
140
+ })
141
+
142
+ // Whisper provider (local)
143
+ const whisper = new WhisperProvider()
144
+ const result = await whisper.transcribe({
145
+ audioPath: './audio.mp3',
146
+ model: 'base'
147
+ })
148
+ ```
149
+
150
+ ## Advanced: Audio Utilities
151
+
152
+ ```typescript
153
+ import {
154
+ autoSplitAudio,
155
+ getAudioDuration,
156
+ mergeTranscriptionResults
157
+ } from '@wovin/tranz/audio'
158
+
159
+ // Get audio duration
160
+ const duration = await getAudioDuration('./audio.mp3')
161
+
162
+ // Split long audio at optimal silence points
163
+ const segments = await autoSplitAudio('./long-audio.mp3', './output-dir', {
164
+ maxDurationSec: 300, // 5 minutes
165
+ minSilenceDuration: 0.5,
166
+ silenceThreshold: -40
167
+ })
168
+
169
+ // Manually transcribe and merge segments
170
+ const results = await Promise.all(
171
+ segments.map(seg => transcribe(seg.outputPath))
172
+ )
173
+ const merged = mergeTranscriptionResults(results, segments)
174
+ ```
175
+
176
+ ## API Reference
177
+
178
+ ### `createMistralTranscriber(config)`
179
+
180
+ Creates a Mistral transcriber instance with auto-splitting support.
181
+
182
+ **Config:**
183
+ - `apiKey: string` - Mistral API key (required)
184
+ - `model?: string` - Model name (default: 'voxtral-mini-latest')
185
+
186
+ **Returns:** `MistralTranscriber` with `transcribe(options)` method
187
+
188
+ ### `TranscribeOptions`
189
+
190
+ Options for the `transcribe()` method:
191
+
192
+ - `audioPath?: string` - Path to audio file
193
+ - `audioBuffer?: Buffer` - Audio data as buffer
194
+ - `mimeType?: string` - MIME type for buffer (auto-detected if omitted)
195
+ - `audioUrl?: string` - URL to audio file (supports HTTP range probing)
196
+ - `duration?: number` - Known duration in seconds (skips detection)
197
+ - `language?: string` - Language code (e.g., 'en', 'fr') - disables word timestamps
198
+ - `model?: string` - Override default model
199
+ - `diarize?: boolean` - Enable speaker diarization (default: true)
200
+ - `timestamps?: 'word' | 'segment'` - Timestamp granularity (default: 'word')
201
+ - `autoSplit?: boolean` - Auto-split long audio (default: true)
202
+ - `splitOutputDir?: string` - Directory for split segments (default: system temp)
203
+ - `logger?: TranscribeLogger` - Custom logger
204
+ - `verbose?: boolean` - Enable debug logging
205
+
206
+ ### `TranscriptionResult`
207
+
208
+ Result from transcription:
209
+
210
+ - `text: string` - Full transcription text
211
+ - `duration?: number` - Audio duration in seconds
212
+ - `language?: string` - Detected or specified language
213
+ - `words?: WordData[]` - Word-level timestamps and confidence
214
+ - `speakers?: SpeakerSegment[]` - Speaker diarization data
215
+ - `error?: string` - Error message if transcription failed
216
+
217
+ ### `MergedTranscriptionResult`
218
+
219
+ Extended result for multi-segment transcriptions:
220
+
221
+ - All fields from `TranscriptionResult`
222
+ - `totalSegments: number` - Number of segments merged
223
+ - `segments?: TranscriptionResult[]` - Individual segment results
224
+
225
+ ## Providers
226
+
227
+ ### Mistral (Voxtral)
228
+
229
+ - Models: `voxtral-mini-latest`, `voxtral-large-latest`
230
+ - Max recommended duration: 300s (5 minutes)
231
+ - Auto-split supported: Yes
232
+ - Speaker diarization: Yes
233
+ - Word timestamps: Yes (unless language specified)
234
+
235
+ ### Whisper (Local)
236
+
237
+ - Requires local Whisper installation
238
+ - Models: `tiny`, `base`, `small`, `medium`, `large`
239
+ - No API key required
240
+
241
+ ### GreenPT
242
+
243
+ - API-based transcription
244
+ - Requires `GREENPT_API_KEY`
245
+
246
+ ## License
247
+
248
+ AGPL-3.0-or-later
249
+
250
+ ## Repository
251
+
252
+ [GitLab: onezoomin/ztax/tranz](https://gitlab.com/onezoomin/ztax/tranz)
package/dist/audio.d.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  /**
2
2
  * @wovin/tranz/audio - Audio utilities for splitting and merging
3
3
  */
4
- export { autoSplitAudio, analyzeSplitPoints, detectSilenceRegions, getAudioDuration, findOptimalSplitPoints, splitAudioAtPoints, DEFAULT_SPLIT_CONFIG, type SplitConfig, type SilenceRegion, type SplitPoint, type AudioSegment, type SplitAnalysis, } from './utils/audio/split.js';
5
- export { mergeTranscriptionResults, formatMergedText, type MergedTranscriptionResult, type WordData, } from './utils/audio/merge-results.js';
4
+ export { autoSplitAudio, analyzeSplitPoints, detectSilenceRegions, getAudioDuration, findOptimalSplitPoints, splitAudioAtPoints, DEFAULT_SPLIT_CONFIG, type SplitConfig, type SilenceRegion, type SplitPoint, type AudioSegment, type SplitAnalysis, } from './utils/audio/split.ts';
5
+ export { mergeTranscriptionResults, formatMergedText, type MergedTranscriptionResult, type WordData, } from './utils/audio/merge-results.ts';
6
6
  //# sourceMappingURL=audio.d.ts.map
package/dist/index.d.ts CHANGED
@@ -1,9 +1,9 @@
1
1
  /**
2
2
  * @wovin/tranz - Audio transcription library
3
3
  */
4
- export { createProvider, MistralProvider, WhisperProvider, GreenPTProvider, VOXTRAL_LIMITS, type ProviderName, type TranscribeParams, type TranscriptionResult, type TranscriptionProvider, } from './utils/transcription/providers.js';
5
- export { autoSplitAudio, analyzeSplitPoints, detectSilenceRegions, getAudioDuration, findOptimalSplitPoints, splitAudioAtPoints, DEFAULT_SPLIT_CONFIG, type SplitConfig, type SilenceRegion, type SplitPoint, type AudioSegment, type SplitAnalysis, } from './utils/audio/split.js';
6
- export { mergeTranscriptionResults, formatMergedText, type MergedTranscriptionResult, type WordData, } from './utils/audio/merge-results.js';
7
- export { formatTranscriptWithPauses } from './utils/transcription/format.js';
8
- export { createMistralTranscriber, transcribe, type TranscribeOptions, type MistralTranscriberConfig, } from './utils/transcription/transcribe.js';
4
+ export { createProvider, MistralProvider, WhisperProvider, GreenPTProvider, VOXTRAL_LIMITS, type ProviderName, type TranscribeParams, type TranscriptionResult, type TranscriptionProvider, } from './utils/transcription/providers.ts';
5
+ export { autoSplitAudio, analyzeSplitPoints, detectSilenceRegions, getAudioDuration, findOptimalSplitPoints, splitAudioAtPoints, DEFAULT_SPLIT_CONFIG, type SplitConfig, type SilenceRegion, type SplitPoint, type AudioSegment, type SplitAnalysis, } from './utils/audio/split.ts';
6
+ export { mergeTranscriptionResults, formatMergedText, type MergedTranscriptionResult, type WordData, } from './utils/audio/merge-results.ts';
7
+ export { formatTranscriptWithPauses } from './utils/transcription/format.ts';
8
+ export { createMistralTranscriber, transcribe, type TranscribeOptions, type MistralTranscriberConfig, type MistralTranscriber, } from './utils/transcription/transcribe.ts';
9
9
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EACL,cAAc,EACd,eAAe,EACf,eAAe,EACf,eAAe,EACf,cAAc,EACd,KAAK,YAAY,EACjB,KAAK,gBAAgB,EACrB,KAAK,mBAAmB,EACxB,KAAK,qBAAqB,GAC3B,MAAM,oCAAoC,CAAA;AAG3C,OAAO,EACL,cAAc,EACd,kBAAkB,EAClB,oBAAoB,EACpB,gBAAgB,EAChB,sBAAsB,EACtB,kBAAkB,EAClB,oBAAoB,EACpB,KAAK,WAAW,EAChB,KAAK,aAAa,EAClB,KAAK,UAAU,EACf,KAAK,YAAY,EACjB,KAAK,aAAa,GACnB,MAAM,wBAAwB,CAAA;AAG/B,OAAO,EACL,yBAAyB,EACzB,gBAAgB,EAChB,KAAK,yBAAyB,EAC9B,KAAK,QAAQ,GACd,MAAM,gCAAgC,CAAA;AAGvC,OAAO,EAAE,0BAA0B,EAAE,MAAM,iCAAiC,CAAA;AAG5E,OAAO,EACL,wBAAwB,EACxB,UAAU,EACV,KAAK,iBAAiB,EACtB,KAAK,wBAAwB,GAC9B,MAAM,qCAAqC,CAAA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EACL,cAAc,EACd,eAAe,EACf,eAAe,EACf,eAAe,EACf,cAAc,EACd,KAAK,YAAY,EACjB,KAAK,gBAAgB,EACrB,KAAK,mBAAmB,EACxB,KAAK,qBAAqB,GAC3B,MAAM,oCAAoC,CAAA;AAG3C,OAAO,EACL,cAAc,EACd,kBAAkB,EAClB,oBAAoB,EACpB,gBAAgB,EAChB,sBAAsB,EACtB,kBAAkB,EAClB,oBAAoB,EACpB,KAAK,WAAW,EAChB,KAAK,aAAa,EAClB,KAAK,UAAU,EACf,KAAK,YAAY,EACjB,KAAK,aAAa,GACnB,MAAM,wBAAwB,CAAA;AAG/B,OAAO,EACL,yBAAyB,EACzB,gBAAgB,EAChB,KAAK,yBAAyB,EAC9B,KAAK,QAAQ,GACd,MAAM,gCAAgC,CAAA;AAGvC,OAAO,EAAE,0BAA0B,EAAE,MAAM,iCAAiC,CAAA;AAG5E,OAAO,EACL,wBAAwB,EACxB,UAAU,EACV,KAAK,iBAAiB,EACtB,KAAK,wBAAwB,EAC7B,KAAK,kBAAkB,GACxB,MAAM,qCAAqC,CAAA"}
package/dist/index.min.js CHANGED
@@ -674,15 +674,64 @@ function formatTranscriptWithPauses(transcript, words, shortPauseThreshold = 1,
674
674
 
675
675
  // src/utils/transcription/transcribe.ts
676
676
  import * as fs3 from "fs";
677
+ import * as https from "https";
678
+ import * as http from "http";
677
679
  import * as os from "os";
678
680
  import * as path3 from "path";
681
+ var defaultLogger = {
682
+ info: (msg) => console.log(`[tranz] ${msg}`),
683
+ warn: (msg) => console.warn(`[tranz] ${msg}`),
684
+ debug: () => {
685
+ }
686
+ // silent by default
687
+ };
688
+ async function downloadToTempFile(url, outputDir) {
689
+ const tempPath = path3.join(outputDir, `download-${Date.now()}.audio`);
690
+ const file = fs3.createWriteStream(tempPath);
691
+ return new Promise((resolve, reject) => {
692
+ const protocol = url.startsWith("https") ? https : http;
693
+ protocol.get(url, (response) => {
694
+ if (response.statusCode === 301 || response.statusCode === 302) {
695
+ const redirectUrl = response.headers.location;
696
+ if (redirectUrl) {
697
+ file.close();
698
+ fs3.unlinkSync(tempPath);
699
+ downloadToTempFile(redirectUrl, outputDir).then(resolve).catch(reject);
700
+ return;
701
+ }
702
+ }
703
+ if (response.statusCode !== 200) {
704
+ reject(new Error(`Failed to download: HTTP ${response.statusCode}`));
705
+ return;
706
+ }
707
+ response.pipe(file);
708
+ file.on("finish", () => {
709
+ file.close();
710
+ resolve(tempPath);
711
+ });
712
+ }).on("error", (err) => {
713
+ fs3.unlink(tempPath, () => {
714
+ });
715
+ reject(err);
716
+ });
717
+ });
718
+ }
719
+ async function tryGetUrlDuration(url) {
720
+ try {
721
+ return await getAudioDuration(url);
722
+ } catch {
723
+ return void 0;
724
+ }
725
+ }
679
726
  function createMistralTranscriber(config) {
680
727
  const provider = new MistralProvider();
681
728
  const defaultModel = config.model || "voxtral-mini-latest";
682
729
  return {
683
730
  /**
684
- * Transcribe audio with auto-splitting for long files (file path only)
685
- * Diarization and word timestamps enabled by default
731
+ * Transcribe audio with smart auto-splitting
732
+ * - For files: checks duration and splits if needed
733
+ * - For URLs: probes duration via HTTP range request, downloads only if splitting needed
734
+ * - For buffers: transcribes directly (no splitting)
686
735
  */
687
736
  async transcribe(options) {
688
737
  const {
@@ -690,16 +739,22 @@ function createMistralTranscriber(config) {
690
739
  audioBuffer,
691
740
  mimeType,
692
741
  audioUrl,
742
+ duration: knownDuration,
693
743
  language,
694
744
  model = defaultModel,
695
745
  diarize = true,
696
746
  timestamps = language ? void 0 : "word",
697
- autoSplit = true,
698
- splitOutputDir
747
+ autoSplit,
748
+ splitOutputDir,
749
+ logger: customLogger,
750
+ verbose
699
751
  } = options;
700
- if (audioUrl || audioBuffer) {
752
+ const log = customLogger || defaultLogger;
753
+ if (verbose) log.debug = log.info;
754
+ const maxDuration = VOXTRAL_LIMITS.recommendedMaxDurationSec;
755
+ if (audioBuffer) {
756
+ log.info(`Transcribing from buffer (${(audioBuffer.length / 1024 / 1024).toFixed(2)} MB)`);
701
757
  const result = await provider.transcribe({
702
- audioUrl,
703
758
  audioBuffer,
704
759
  mimeType,
705
760
  apiKey: config.apiKey,
@@ -710,12 +765,74 @@ function createMistralTranscriber(config) {
710
765
  });
711
766
  return { ...result, totalSegments: 1 };
712
767
  }
768
+ if (audioUrl) {
769
+ if (autoSplit === false) {
770
+ log.info(`Transcribing URL directly (autoSplit disabled)`);
771
+ const result2 = await provider.transcribe({
772
+ audioUrl,
773
+ apiKey: config.apiKey,
774
+ model,
775
+ language,
776
+ diarize,
777
+ timestampGranularity: timestamps
778
+ });
779
+ return { ...result2, totalSegments: 1 };
780
+ }
781
+ let duration2 = knownDuration;
782
+ if (duration2 === void 0) {
783
+ log.info(`Probing URL duration via ffprobe...`);
784
+ duration2 = await tryGetUrlDuration(audioUrl);
785
+ if (duration2 !== void 0) {
786
+ log.info(`Duration detected: ${duration2.toFixed(1)}s`);
787
+ } else {
788
+ log.warn(`Duration detection failed, will download to check`);
789
+ }
790
+ } else {
791
+ log.debug(`Using provided duration: ${duration2.toFixed(1)}s`);
792
+ }
793
+ if (duration2 !== void 0 && duration2 <= maxDuration) {
794
+ log.info(`Duration ${duration2.toFixed(1)}s <= ${maxDuration}s, using URL directly`);
795
+ const result2 = await provider.transcribe({
796
+ audioUrl,
797
+ apiKey: config.apiKey,
798
+ model,
799
+ language,
800
+ diarize,
801
+ timestampGranularity: timestamps
802
+ });
803
+ return { ...result2, totalSegments: 1 };
804
+ }
805
+ log.info(`Downloading URL to temp file for processing...`);
806
+ const outDir2 = splitOutputDir || path3.join(os.tmpdir(), `tranz-${Date.now()}`);
807
+ fs3.mkdirSync(outDir2, { recursive: true });
808
+ const tempFile = await downloadToTempFile(audioUrl, outDir2);
809
+ log.info(`Downloaded to ${tempFile}`);
810
+ const result = await this.transcribe({
811
+ audioPath: tempFile,
812
+ language,
813
+ model,
814
+ diarize,
815
+ timestamps,
816
+ autoSplit: true,
817
+ splitOutputDir: outDir2,
818
+ logger: customLogger,
819
+ verbose
820
+ });
821
+ try {
822
+ fs3.unlinkSync(tempFile);
823
+ } catch {
824
+ }
825
+ return result;
826
+ }
713
827
  if (!audioPath) {
714
828
  return { text: "", error: "No audio input provided (audioPath, audioBuffer, or audioUrl required)" };
715
829
  }
716
- const duration = await getAudioDuration(audioPath);
717
- const needsSplit = autoSplit && duration > VOXTRAL_LIMITS.recommendedMaxDurationSec;
830
+ log.debug(`Processing file: ${audioPath}`);
831
+ const duration = knownDuration ?? await getAudioDuration(audioPath);
832
+ log.info(`Audio duration: ${duration.toFixed(1)}s`);
833
+ const needsSplit = autoSplit !== false && duration > maxDuration;
718
834
  if (!needsSplit) {
835
+ log.info(`Transcribing file directly (no split needed)`);
719
836
  const result = await provider.transcribe({
720
837
  audioPath,
721
838
  apiKey: config.apiKey,
@@ -726,13 +843,17 @@ function createMistralTranscriber(config) {
726
843
  });
727
844
  return { ...result, totalSegments: 1 };
728
845
  }
846
+ log.info(`Duration ${duration.toFixed(1)}s > ${maxDuration}s, splitting audio...`);
729
847
  const outDir = splitOutputDir || path3.join(os.tmpdir(), `tranz-split-${Date.now()}`);
730
848
  fs3.mkdirSync(outDir, { recursive: true });
731
849
  const segments = await autoSplitAudio(audioPath, outDir, {
732
- maxDurationSec: VOXTRAL_LIMITS.recommendedMaxDurationSec
850
+ maxDurationSec: maxDuration
733
851
  });
852
+ log.info(`Split into ${segments.length} segments`);
734
853
  const results = [];
735
- for (const segment of segments) {
854
+ for (let i = 0; i < segments.length; i++) {
855
+ const segment = segments[i];
856
+ log.info(`Transcribing segment ${i + 1}/${segments.length} (${segment.durationSec.toFixed(1)}s)`);
736
857
  const result = await provider.transcribe({
737
858
  audioPath: segment.outputPath,
738
859
  apiKey: config.apiKey,
@@ -743,6 +864,7 @@ function createMistralTranscriber(config) {
743
864
  });
744
865
  results.push(result);
745
866
  }
867
+ log.info(`Merging ${segments.length} segments`);
746
868
  return mergeTranscriptionResults(results, segments);
747
869
  }
748
870
  };
@@ -1,6 +1,6 @@
1
1
  /**
2
2
  * @wovin/tranz/providers - Transcription provider implementations
3
3
  */
4
- export { createProvider, MistralProvider, WhisperProvider, GreenPTProvider, VOXTRAL_LIMITS, type ProviderName, type TranscribeParams, type TranscriptionResult, type TranscriptionProvider, } from './utils/transcription/providers.js';
5
- export { createMistralTranscriber, transcribe, type TranscribeOptions, type MistralTranscriberConfig, } from './utils/transcription/transcribe.js';
4
+ export { createProvider, MistralProvider, WhisperProvider, GreenPTProvider, VOXTRAL_LIMITS, type ProviderName, type TranscribeParams, type TranscriptionResult, type TranscriptionProvider, } from './utils/transcription/providers.ts';
5
+ export { createMistralTranscriber, transcribe, type TranscribeOptions, type MistralTranscriberConfig, } from './utils/transcription/transcribe.ts';
6
6
  //# sourceMappingURL=providers.d.ts.map
@@ -347,6 +347,8 @@ var GreenPTProvider = class {
347
347
 
348
348
  // src/utils/transcription/transcribe.ts
349
349
  import * as fs3 from "fs";
350
+ import * as https from "https";
351
+ import * as http from "http";
350
352
  import * as os from "os";
351
353
  import * as path3 from "path";
352
354
 
@@ -598,13 +600,60 @@ function mergeTranscriptionResults(results, segments) {
598
600
  }
599
601
 
600
602
  // src/utils/transcription/transcribe.ts
603
+ var defaultLogger = {
604
+ info: (msg) => console.log(`[tranz] ${msg}`),
605
+ warn: (msg) => console.warn(`[tranz] ${msg}`),
606
+ debug: () => {
607
+ }
608
+ // silent by default
609
+ };
610
+ async function downloadToTempFile(url, outputDir) {
611
+ const tempPath = path3.join(outputDir, `download-${Date.now()}.audio`);
612
+ const file = fs3.createWriteStream(tempPath);
613
+ return new Promise((resolve, reject) => {
614
+ const protocol = url.startsWith("https") ? https : http;
615
+ protocol.get(url, (response) => {
616
+ if (response.statusCode === 301 || response.statusCode === 302) {
617
+ const redirectUrl = response.headers.location;
618
+ if (redirectUrl) {
619
+ file.close();
620
+ fs3.unlinkSync(tempPath);
621
+ downloadToTempFile(redirectUrl, outputDir).then(resolve).catch(reject);
622
+ return;
623
+ }
624
+ }
625
+ if (response.statusCode !== 200) {
626
+ reject(new Error(`Failed to download: HTTP ${response.statusCode}`));
627
+ return;
628
+ }
629
+ response.pipe(file);
630
+ file.on("finish", () => {
631
+ file.close();
632
+ resolve(tempPath);
633
+ });
634
+ }).on("error", (err) => {
635
+ fs3.unlink(tempPath, () => {
636
+ });
637
+ reject(err);
638
+ });
639
+ });
640
+ }
641
+ async function tryGetUrlDuration(url) {
642
+ try {
643
+ return await getAudioDuration(url);
644
+ } catch {
645
+ return void 0;
646
+ }
647
+ }
601
648
  function createMistralTranscriber(config) {
602
649
  const provider = new MistralProvider();
603
650
  const defaultModel = config.model || "voxtral-mini-latest";
604
651
  return {
605
652
  /**
606
- * Transcribe audio with auto-splitting for long files (file path only)
607
- * Diarization and word timestamps enabled by default
653
+ * Transcribe audio with smart auto-splitting
654
+ * - For files: checks duration and splits if needed
655
+ * - For URLs: probes duration via HTTP range request, downloads only if splitting needed
656
+ * - For buffers: transcribes directly (no splitting)
608
657
  */
609
658
  async transcribe(options) {
610
659
  const {
@@ -612,16 +661,22 @@ function createMistralTranscriber(config) {
612
661
  audioBuffer,
613
662
  mimeType,
614
663
  audioUrl,
664
+ duration: knownDuration,
615
665
  language,
616
666
  model = defaultModel,
617
667
  diarize = true,
618
668
  timestamps = language ? void 0 : "word",
619
- autoSplit = true,
620
- splitOutputDir
669
+ autoSplit,
670
+ splitOutputDir,
671
+ logger: customLogger,
672
+ verbose
621
673
  } = options;
622
- if (audioUrl || audioBuffer) {
674
+ const log = customLogger || defaultLogger;
675
+ if (verbose) log.debug = log.info;
676
+ const maxDuration = VOXTRAL_LIMITS.recommendedMaxDurationSec;
677
+ if (audioBuffer) {
678
+ log.info(`Transcribing from buffer (${(audioBuffer.length / 1024 / 1024).toFixed(2)} MB)`);
623
679
  const result = await provider.transcribe({
624
- audioUrl,
625
680
  audioBuffer,
626
681
  mimeType,
627
682
  apiKey: config.apiKey,
@@ -632,12 +687,74 @@ function createMistralTranscriber(config) {
632
687
  });
633
688
  return { ...result, totalSegments: 1 };
634
689
  }
690
+ if (audioUrl) {
691
+ if (autoSplit === false) {
692
+ log.info(`Transcribing URL directly (autoSplit disabled)`);
693
+ const result2 = await provider.transcribe({
694
+ audioUrl,
695
+ apiKey: config.apiKey,
696
+ model,
697
+ language,
698
+ diarize,
699
+ timestampGranularity: timestamps
700
+ });
701
+ return { ...result2, totalSegments: 1 };
702
+ }
703
+ let duration2 = knownDuration;
704
+ if (duration2 === void 0) {
705
+ log.info(`Probing URL duration via ffprobe...`);
706
+ duration2 = await tryGetUrlDuration(audioUrl);
707
+ if (duration2 !== void 0) {
708
+ log.info(`Duration detected: ${duration2.toFixed(1)}s`);
709
+ } else {
710
+ log.warn(`Duration detection failed, will download to check`);
711
+ }
712
+ } else {
713
+ log.debug(`Using provided duration: ${duration2.toFixed(1)}s`);
714
+ }
715
+ if (duration2 !== void 0 && duration2 <= maxDuration) {
716
+ log.info(`Duration ${duration2.toFixed(1)}s <= ${maxDuration}s, using URL directly`);
717
+ const result2 = await provider.transcribe({
718
+ audioUrl,
719
+ apiKey: config.apiKey,
720
+ model,
721
+ language,
722
+ diarize,
723
+ timestampGranularity: timestamps
724
+ });
725
+ return { ...result2, totalSegments: 1 };
726
+ }
727
+ log.info(`Downloading URL to temp file for processing...`);
728
+ const outDir2 = splitOutputDir || path3.join(os.tmpdir(), `tranz-${Date.now()}`);
729
+ fs3.mkdirSync(outDir2, { recursive: true });
730
+ const tempFile = await downloadToTempFile(audioUrl, outDir2);
731
+ log.info(`Downloaded to ${tempFile}`);
732
+ const result = await this.transcribe({
733
+ audioPath: tempFile,
734
+ language,
735
+ model,
736
+ diarize,
737
+ timestamps,
738
+ autoSplit: true,
739
+ splitOutputDir: outDir2,
740
+ logger: customLogger,
741
+ verbose
742
+ });
743
+ try {
744
+ fs3.unlinkSync(tempFile);
745
+ } catch {
746
+ }
747
+ return result;
748
+ }
635
749
  if (!audioPath) {
636
750
  return { text: "", error: "No audio input provided (audioPath, audioBuffer, or audioUrl required)" };
637
751
  }
638
- const duration = await getAudioDuration(audioPath);
639
- const needsSplit = autoSplit && duration > VOXTRAL_LIMITS.recommendedMaxDurationSec;
752
+ log.debug(`Processing file: ${audioPath}`);
753
+ const duration = knownDuration ?? await getAudioDuration(audioPath);
754
+ log.info(`Audio duration: ${duration.toFixed(1)}s`);
755
+ const needsSplit = autoSplit !== false && duration > maxDuration;
640
756
  if (!needsSplit) {
757
+ log.info(`Transcribing file directly (no split needed)`);
641
758
  const result = await provider.transcribe({
642
759
  audioPath,
643
760
  apiKey: config.apiKey,
@@ -648,13 +765,17 @@ function createMistralTranscriber(config) {
648
765
  });
649
766
  return { ...result, totalSegments: 1 };
650
767
  }
768
+ log.info(`Duration ${duration.toFixed(1)}s > ${maxDuration}s, splitting audio...`);
651
769
  const outDir = splitOutputDir || path3.join(os.tmpdir(), `tranz-split-${Date.now()}`);
652
770
  fs3.mkdirSync(outDir, { recursive: true });
653
771
  const segments = await autoSplitAudio(audioPath, outDir, {
654
- maxDurationSec: VOXTRAL_LIMITS.recommendedMaxDurationSec
772
+ maxDurationSec: maxDuration
655
773
  });
774
+ log.info(`Split into ${segments.length} segments`);
656
775
  const results = [];
657
- for (const segment of segments) {
776
+ for (let i = 0; i < segments.length; i++) {
777
+ const segment = segments[i];
778
+ log.info(`Transcribing segment ${i + 1}/${segments.length} (${segment.durationSec.toFixed(1)}s)`);
658
779
  const result = await provider.transcribe({
659
780
  audioPath: segment.outputPath,
660
781
  apiKey: config.apiKey,
@@ -665,6 +786,7 @@ function createMistralTranscriber(config) {
665
786
  });
666
787
  results.push(result);
667
788
  }
789
+ log.info(`Merging ${segments.length} segments`);
668
790
  return mergeTranscriptionResults(results, segments);
669
791
  }
670
792
  };
@@ -1,6 +1,6 @@
1
1
  /**
2
2
  * Audio utilities for tranz-cli
3
3
  */
4
- export * from './split.js';
5
- export * from './merge-results.js';
4
+ export * from './split.ts';
5
+ export * from './merge-results.ts';
6
6
  //# sourceMappingURL=index.d.ts.map
@@ -1,8 +1,8 @@
1
1
  /**
2
2
  * Utilities for merging transcription results from split audio segments
3
3
  */
4
- import type { TranscriptionResult } from '../transcription/providers.js';
5
- import type { AudioSegment } from './split.js';
4
+ import type { TranscriptionResult } from '../transcription/providers.ts';
5
+ import type { AudioSegment } from './split.ts';
6
6
  /**
7
7
  * Word-level data with timing information
8
8
  */
@@ -1,4 +1,4 @@
1
- import path from 'path';
1
+ import path from 'node:path';
2
2
  export declare const getExt: (filePath: string) => string;
3
3
  export declare const getName: (filePath: string) => string;
4
4
  export declare const getNameWithExt: (filePath: string) => string;
@@ -1 +1 @@
1
- {"version":3,"file":"file-utils.d.ts","sourceRoot":"","sources":["../../src/utils/file-utils.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAA;AAEvB,eAAO,MAAM,MAAM,GAAI,UAAU,MAAM,WAEtC,CAAA;AACD,eAAO,MAAM,OAAO,GAAI,UAAU,MAAM,WAEvC,CAAA;AACD,eAAO,MAAM,cAAc,GAAI,UAAU,MAAM,WAG9C,CAAA;AACD,eAAO,MAAM,WAAW,GAAI,UAAU,MAAM,oBAG3C,CAAA"}
1
+ {"version":3,"file":"file-utils.d.ts","sourceRoot":"","sources":["../../src/utils/file-utils.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,WAAW,CAAA;AAE5B,eAAO,MAAM,MAAM,GAAI,UAAU,MAAM,WAEtC,CAAA;AACD,eAAO,MAAM,OAAO,GAAI,UAAU,MAAM,WAEvC,CAAA;AACD,eAAO,MAAM,cAAc,GAAI,UAAU,MAAM,WAG9C,CAAA;AACD,eAAO,MAAM,WAAW,GAAI,UAAU,MAAM,oBAG3C,CAAA"}
@@ -1,7 +1,13 @@
1
1
  /**
2
2
  * Simple high-level transcription API with good defaults
3
3
  */
4
- import { type MergedTranscriptionResult } from '../audio/merge-results.js';
4
+ import { type MergedTranscriptionResult } from '../audio/merge-results.ts';
5
+ /** Logger interface for transcription progress */
6
+ export interface TranscribeLogger {
7
+ info: (msg: string) => void;
8
+ warn: (msg: string) => void;
9
+ debug: (msg: string) => void;
10
+ }
5
11
  export interface TranscribeOptions {
6
12
  /** Path to audio file */
7
13
  audioPath?: string;
@@ -11,6 +17,8 @@ export interface TranscribeOptions {
11
17
  mimeType?: string;
12
18
  /** URL to audio file (e.g., IPFS gateway URL) */
13
19
  audioUrl?: string;
20
+ /** Known duration in seconds (skips duration detection for URL input) */
21
+ duration?: number;
14
22
  /** Language code (e.g. 'en', 'fr') - note: disables word timestamps for Mistral */
15
23
  language?: string;
16
24
  /** Model to use (default: voxtral-mini-latest) */
@@ -19,10 +27,14 @@ export interface TranscribeOptions {
19
27
  diarize?: boolean;
20
28
  /** Timestamp granularity: 'word' | 'segment' (default: 'word', disabled if language set) */
21
29
  timestamps?: 'word' | 'segment';
22
- /** Auto-split long audio (default: true, only works with audioPath) */
30
+ /** Auto-split long audio (default: true). For URLs, detects duration first. */
23
31
  autoSplit?: boolean;
24
32
  /** Output directory for split segments (default: system temp) */
25
33
  splitOutputDir?: string;
34
+ /** Custom logger (default: console) */
35
+ logger?: TranscribeLogger;
36
+ /** Enable verbose/debug logging */
37
+ verbose?: boolean;
26
38
  }
27
39
  export interface MistralTranscriberConfig {
28
40
  /** Mistral API key */
@@ -40,20 +52,21 @@ export interface MistralTranscriberConfig {
40
52
  * // From file (supports auto-split for long audio)
41
53
  * const result = await transcriber.transcribe({ audioPath: './interview.mp3' })
42
54
  *
43
- * // From URL (e.g., IPFS gateway)
55
+ * // From URL (auto-detects if splitting needed, downloads only if necessary)
44
56
  * const result = await transcriber.transcribe({ audioUrl: 'https://gateway.ipfs.io/ipfs/Qm...' })
45
57
  *
58
+ * // From URL with known duration (skips detection)
59
+ * const result = await transcriber.transcribe({ audioUrl: '...', duration: 120 })
60
+ *
46
61
  * // From buffer
47
62
  * const result = await transcriber.transcribe({ audioBuffer: buffer, mimeType: 'audio/mpeg' })
48
63
  * ```
49
64
  */
50
- export declare function createMistralTranscriber(config: MistralTranscriberConfig): {
51
- /**
52
- * Transcribe audio with auto-splitting for long files (file path only)
53
- * Diarization and word timestamps enabled by default
54
- */
65
+ /** Transcriber interface returned by createMistralTranscriber */
66
+ export interface MistralTranscriber {
55
67
  transcribe(options: TranscribeOptions): Promise<MergedTranscriptionResult>;
56
- };
68
+ }
69
+ export declare function createMistralTranscriber(config: MistralTranscriberConfig): MistralTranscriber;
57
70
  /** Alias for simpler import */
58
71
  export declare const transcribe: typeof createMistralTranscriber;
59
72
  //# sourceMappingURL=transcribe.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"transcribe.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/transcribe.ts"],"names":[],"mappings":"AAAA;;GAEG;AAOH,OAAO,EAA6B,KAAK,yBAAyB,EAAE,MAAM,2BAA2B,CAAA;AAErG,MAAM,WAAW,iBAAiB;IAChC,yBAAyB;IACzB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,iCAAiC;IACjC,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,gEAAgE;IAChE,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,mFAAmF;IACnF,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,kDAAkD;IAClD,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,iDAAiD;IACjD,OAAO,CAAC,EAAE,OAAO,CAAA;IACjB,4FAA4F;IAC5F,UAAU,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;IAC/B,uEAAuE;IACvE,SAAS,CAAC,EAAE,OAAO,CAAA;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAA;CACxB;AAED,MAAM,WAAW,wBAAwB;IACvC,sBAAsB;IACtB,MAAM,EAAE,MAAM,CAAA;IACd,mDAAmD;IACnD,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,wBAAwB;IAKrE;;;OAGG;wBACuB,iBAAiB,GAAG,OAAO,CAAC,yBAAyB,CAAC;EA4EnF;AAED,+BAA+B;AAC/B,eAAO,MAAM,UAAU,iCAA2B,CAAA"}
1
+ {"version":3,"file":"transcribe.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/transcribe.ts"],"names":[],"mappings":"AAAA;;GAEG;AASH,OAAO,EAA6B,KAAK,yBAAyB,EAAE,MAAM,2BAA2B,CAAA;AAErG,kDAAkD;AAClD,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAA;IAC3B,IAAI,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAA;IAC3B,KAAK,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAA;CAC7B;AAQD,MAAM,WAAW,iBAAiB;IAChC,yBAAyB;IACzB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,iCAAiC;IACjC,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,gEAAgE;IAChE,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,yEAAyE;IACzE,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,mFAAmF;IACnF,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,kDAAkD;IAClD,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,iDAAiD;IACjD,OAAO,CAAC,EAAE,OAAO,CAAA;IACjB,4FAA4F;IAC5F,UAAU,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;IAC/B,+EAA+E;IAC/E,SAAS,CAAC,EAAE,OAAO,CAAA;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,uCAAuC;IACvC,MAAM,CAAC,EAAE,gBAAgB,CAAA;IACzB,mCAAmC;IACnC,OAAO,CAAC,EAAE,OAAO,CAAA;CAClB;AAED,MAAM,WAAW,wBAAwB;IACvC,sBAAsB;IACtB,MAAM,EAAE,MAAM,CAAA;IACd,mDAAmD;IACnD,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AAkDD;;;;;;;;;;;;;;;;;;;GAmBG;AACH,iEAAiE;AACjE,MAAM,WAAW,kBAAkB;IACjC,UAAU,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,yBAAyB,CAAC,CAAA;CAC3E;AAED,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,wBAAwB,GAAG,kBAAkB,CA4K7F;AAED,+BAA+B;AAC/B,eAAO,MAAM,UAAU,iCAA2B,CAAA"}
package/package.json CHANGED
@@ -1,12 +1,16 @@
1
1
  {
2
2
  "name": "@wovin/tranz",
3
- "version": "0.0.26",
3
+ "version": "0.1.0",
4
4
  "type": "module",
5
5
  "description": "Audio transcription library with provider support and auto-splitting",
6
6
  "author": "gotjoshua @gotjoshua",
7
- "license": "ISC",
7
+ "license": "AGPL-3.0-or-later",
8
8
  "homepage": "https://gitlab.com/onezoomin/ztax/tranz",
9
- "repository": "https://gitlab.com/onezoomin/ztax/tranz",
9
+ "repository": {
10
+ "type": "git",
11
+ "url": "git+https://gitlab.com/wovin/wovin.git",
12
+ "directory": "packages/@wovin/tranz"
13
+ },
10
14
  "bugs": "https://gitlab.com/onezoomin/ztax/tranz/-/issues",
11
15
  "main": "./dist/index.min.js",
12
16
  "module": "./dist/index.min.js",
@@ -56,7 +60,6 @@
56
60
  "dev": "concurrently \"pnpm dev:code\" \"pnpm dev:types\"",
57
61
  "dev:code": "tsup --watch",
58
62
  "dev:types": "tsc --emitDeclarationOnly --declaration --watch",
59
- "clean": "rm -rf .turbo && rm -rf node_modules && rm -rf dist",
60
- "pub": "npm publish --tag latest --access=public"
63
+ "clean": "rm -rf .turbo && rm -rf node_modules && rm -rf dist"
61
64
  }
62
65
  }