@wovin/tranz 0.1.36 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,40 @@
1
+ /**
2
+ * Runtime environment detection for WebSocket implementations
3
+ *
4
+ * Provides environment-aware WebSocket constructor selection:
5
+ * - Browser/Deno: Uses global WebSocket API
6
+ * - Node.js: Dynamically imports 'ws' package
7
+ */
8
+
9
+ /**
10
+ * Get the appropriate WebSocket implementation for the current runtime
11
+ *
12
+ * @returns WebSocket constructor (browser WebSocket or ws package)
13
+ * @throws Error if WebSocket is not available in any form
14
+ */
15
+ export async function getWebSocketImpl(): Promise<any> {
16
+ // Check if we're in a browser/Deno environment (has DOM or navigator)
17
+ const isBrowser = typeof globalThis !== "undefined" &&
18
+ (typeof globalThis.document !== "undefined" || typeof globalThis.navigator !== "undefined");
19
+
20
+ // Browser/Deno - use global WebSocket
21
+ if (isBrowser && typeof globalThis.WebSocket !== "undefined") {
22
+ return globalThis.WebSocket;
23
+ }
24
+
25
+ // Node.js - dynamically import ws
26
+ if (!isBrowser) {
27
+ try {
28
+ const WS = await import("ws");
29
+ return WS.default || WS;
30
+ } catch (err) {
31
+ throw new Error(
32
+ "WebSocket not available. In Node.js, install 'ws' package: npm install ws"
33
+ );
34
+ }
35
+ }
36
+
37
+ throw new Error(
38
+ "WebSocket not available in this environment"
39
+ );
40
+ }
@@ -0,0 +1,366 @@
1
+ /**
2
+ * Simple high-level transcription API with good defaults
3
+ */
4
+
5
+ import * as fs from 'node:fs'
6
+ import * as https from 'node:https'
7
+ import * as http from 'node:http'
8
+ import * as os from 'node:os'
9
+ import * as path from 'node:path'
10
+ import { MistralProvider, VOXTRAL_LIMITS, type TranscriptionResult } from './providers.ts'
11
+ import { autoSplitAudio, getAudioDuration, type AudioSegment } from '../audio/split.ts'
12
+ import { mergeTranscriptionResults, type MergedTranscriptionResult } from '../audio/merge-results.ts'
13
+
14
+ /** Logger interface for transcription progress */
15
+ export interface TranscribeLogger {
16
+ info: (msg: string) => void
17
+ warn: (msg: string) => void
18
+ debug: (msg: string) => void
19
+ }
20
+
21
+ const defaultLogger: TranscribeLogger = {
22
+ info: (msg) => console.log(`[tranz] ${msg}`),
23
+ warn: (msg) => console.warn(`[tranz] ${msg}`),
24
+ debug: () => {}, // silent by default
25
+ }
26
+
27
+ export interface TranscribeOptions {
28
+ /** Path to audio file */
29
+ audioPath?: string
30
+ /** Audio buffer to transcribe */
31
+ audioBuffer?: Buffer
32
+ /** MIME type for audioBuffer (auto-detected if not provided) */
33
+ mimeType?: string
34
+ /** URL to audio file (e.g., IPFS gateway URL) */
35
+ audioUrl?: string
36
+ /** Known duration in seconds (skips duration detection for URL input) */
37
+ duration?: number
38
+ /** Language code (e.g. 'en', 'fr') - note: disables word timestamps for Mistral */
39
+ language?: string
40
+ /** Model to use (default: voxtral-mini-latest) */
41
+ model?: string
42
+ /** Enable speaker diarization (default: true) */
43
+ diarize?: boolean
44
+ /** Timestamp granularity: 'word' | 'segment' (default: 'segment' when diarize=true, disabled if language set) */
45
+ timestamps?: 'word' | 'segment'
46
+ /**
47
+ * Context biasing terms — up to `VOXTRAL_LIMITS.maxContextBiasingTerms` (100)
48
+ * custom-vocabulary entries passed to Voxtral as `context_bias[]`. Mistral only.
49
+ */
50
+ contextBias?: string[]
51
+ /** Auto-split long audio (default: true). For URLs, detects duration first. */
52
+ autoSplit?: boolean
53
+ /** Output directory for split segments (default: system temp) */
54
+ splitOutputDir?: string
55
+ /** Custom logger (default: console) */
56
+ logger?: TranscribeLogger
57
+ /** Enable verbose/debug logging */
58
+ verbose?: boolean
59
+ }
60
+
61
+ export interface MistralTranscriberConfig {
62
+ /** Mistral API key */
63
+ apiKey: string
64
+ /** Default model (default: voxtral-mini-latest) */
65
+ model?: string
66
+ }
67
+
68
+ /** Map of MIME types to file extensions */
69
+ const MIME_TO_EXT: Record<string, string> = {
70
+ 'audio/mpeg': '.mp3',
71
+ 'audio/mp3': '.mp3',
72
+ 'audio/wav': '.wav',
73
+ 'audio/x-wav': '.wav',
74
+ 'audio/ogg': '.ogg',
75
+ 'audio/flac': '.flac',
76
+ 'audio/x-flac': '.flac',
77
+ 'audio/mp4': '.m4a',
78
+ 'audio/m4a': '.m4a',
79
+ 'audio/aac': '.aac',
80
+ 'audio/webm': '.webm',
81
+ 'audio/opus': '.opus',
82
+ }
83
+
84
+ /**
85
+ * Get file extension from Content-Type header or URL
86
+ */
87
+ function getExtFromContentType(contentType: string | undefined, url: string): string {
88
+ // Try Content-Type first
89
+ if (contentType) {
90
+ const mimeType = contentType.split(';')[0].trim().toLowerCase()
91
+ if (MIME_TO_EXT[mimeType]) {
92
+ return MIME_TO_EXT[mimeType]
93
+ }
94
+ }
95
+ // Fall back to URL path extension
96
+ try {
97
+ const urlPath = new URL(url).pathname
98
+ const ext = path.extname(urlPath).toLowerCase()
99
+ if (ext && ['.mp3', '.wav', '.ogg', '.flac', '.m4a', '.aac', '.webm', '.opus'].includes(ext)) {
100
+ return ext
101
+ }
102
+ } catch {}
103
+ // Default to .audio (ffprobe will probe the format)
104
+ return '.audio'
105
+ }
106
+
107
+ /**
108
+ * Download a URL to a temporary file
109
+ */
110
+ async function downloadToTempFile(url: string, outputDir: string): Promise<string> {
111
+ return new Promise((resolve, reject) => {
112
+ const protocol = url.startsWith('https') ? https : http
113
+ protocol.get(url, (response) => {
114
+ if (response.statusCode === 301 || response.statusCode === 302) {
115
+ // Handle redirect
116
+ const redirectUrl = response.headers.location
117
+ if (redirectUrl) {
118
+ downloadToTempFile(redirectUrl, outputDir).then(resolve).catch(reject)
119
+ return
120
+ }
121
+ }
122
+ if (response.statusCode !== 200) {
123
+ reject(new Error(`Failed to download: HTTP ${response.statusCode}`))
124
+ return
125
+ }
126
+
127
+ // Determine file extension from Content-Type or URL
128
+ const ext = getExtFromContentType(response.headers['content-type'], url)
129
+ const tempPath = path.join(outputDir, `download-${Date.now()}${ext}`)
130
+ const file = fs.createWriteStream(tempPath)
131
+
132
+ response.pipe(file)
133
+ file.on('finish', () => {
134
+ file.close()
135
+ resolve(tempPath)
136
+ })
137
+ file.on('error', (err) => {
138
+ fs.unlink(tempPath, () => {})
139
+ reject(err)
140
+ })
141
+ }).on('error', (err) => {
142
+ reject(err)
143
+ })
144
+ })
145
+ }
146
+
147
+ /**
148
+ * Try to get duration from URL using ffprobe (uses HTTP range requests)
149
+ * Returns undefined if detection fails
150
+ */
151
+ async function tryGetUrlDuration(url: string): Promise<number | undefined> {
152
+ try {
153
+ return await getAudioDuration(url)
154
+ } catch {
155
+ return undefined
156
+ }
157
+ }
158
+
159
+ /**
160
+ * Simple Mistral transcriber with auto-splitting and good defaults
161
+ *
162
+ * @example
163
+ * ```ts
164
+ * const transcriber = createMistralTranscriber({ apiKey: process.env.MISTRAL_API_KEY })
165
+ *
166
+ * // From file (supports auto-split for long audio)
167
+ * const result = await transcriber.transcribe({ audioPath: './interview.mp3' })
168
+ *
169
+ * // From URL (auto-detects if splitting needed, downloads only if necessary)
170
+ * const result = await transcriber.transcribe({ audioUrl: 'https://gateway.ipfs.io/ipfs/Qm...' })
171
+ *
172
+ * // From URL with known duration (skips detection)
173
+ * const result = await transcriber.transcribe({ audioUrl: '...', duration: 120 })
174
+ *
175
+ * // From buffer
176
+ * const result = await transcriber.transcribe({ audioBuffer: buffer, mimeType: 'audio/mpeg' })
177
+ * ```
178
+ */
179
+ /** Transcriber interface returned by createMistralTranscriber */
180
+ export interface MistralTranscriber {
181
+ transcribe(options: TranscribeOptions): Promise<MergedTranscriptionResult>
182
+ }
183
+
184
+ export function createMistralTranscriber(config: MistralTranscriberConfig): MistralTranscriber {
185
+ const provider = new MistralProvider()
186
+ const defaultModel = config.model || 'voxtral-mini-latest'
187
+
188
+ return {
189
+ /**
190
+ * Transcribe audio with smart auto-splitting
191
+ * - For files: checks duration and splits if needed
192
+ * - For URLs: probes duration via HTTP range request, downloads only if splitting needed
193
+ * - For buffers: transcribes directly (no splitting)
194
+ */
195
+ async transcribe(options: TranscribeOptions): Promise<MergedTranscriptionResult> {
196
+ const {
197
+ audioPath,
198
+ audioBuffer,
199
+ mimeType,
200
+ audioUrl,
201
+ duration: knownDuration,
202
+ language,
203
+ model = defaultModel,
204
+ contextBias,
205
+ diarize = true,
206
+ timestamps = language ? undefined : 'segment',
207
+ autoSplit,
208
+ splitOutputDir,
209
+ logger: customLogger,
210
+ verbose,
211
+ } = options
212
+
213
+ const log = customLogger || defaultLogger
214
+ if (verbose) log.debug = log.info // promote debug to info when verbose
215
+
216
+ const maxDuration = VOXTRAL_LIMITS.maxAudioDurationSec
217
+
218
+ // Buffer input - no auto-split support, transcribe directly
219
+ if (audioBuffer) {
220
+ log.info(`Transcribing from buffer (${(audioBuffer.length / 1024 / 1024).toFixed(2)} MB)`)
221
+ const result = await provider.transcribe({
222
+ audioBuffer,
223
+ mimeType,
224
+ apiKey: config.apiKey,
225
+ model,
226
+ language,
227
+ diarize,
228
+ timestampGranularity: timestamps,
229
+ contextBias,
230
+ })
231
+ return result
232
+ }
233
+
234
+ // URL input - smart handling
235
+ if (audioUrl) {
236
+ // If autoSplit explicitly disabled, use URL directly
237
+ if (autoSplit === false) {
238
+ log.info(`Transcribing URL directly (autoSplit disabled)`)
239
+ const result = await provider.transcribe({
240
+ audioUrl,
241
+ apiKey: config.apiKey,
242
+ model,
243
+ language,
244
+ diarize,
245
+ timestampGranularity: timestamps,
246
+ contextBias,
247
+ })
248
+ return result
249
+ }
250
+
251
+ // Check duration (use known or detect)
252
+ let duration = knownDuration
253
+ if (duration === undefined) {
254
+ log.info(`Probing URL duration via ffprobe...`)
255
+ duration = await tryGetUrlDuration(audioUrl)
256
+ if (duration !== undefined) {
257
+ log.info(`Duration detected: ${duration.toFixed(1)}s`)
258
+ } else {
259
+ log.warn(`Duration detection failed, will download to check`)
260
+ }
261
+ } else {
262
+ log.debug(`Using provided duration: ${duration.toFixed(1)}s`)
263
+ }
264
+
265
+ // If duration known and short enough, use URL directly
266
+ if (duration !== undefined && duration <= maxDuration) {
267
+ log.info(`Duration ${duration.toFixed(1)}s <= ${maxDuration}s, using URL directly`)
268
+ const result = await provider.transcribe({
269
+ audioUrl,
270
+ apiKey: config.apiKey,
271
+ model,
272
+ language,
273
+ diarize,
274
+ timestampGranularity: timestamps,
275
+ contextBias,
276
+ })
277
+ return result
278
+ }
279
+
280
+ // Duration unknown or too long - download and process as file
281
+ log.info(`Downloading URL to temp file for processing...`)
282
+ const outDir = splitOutputDir || path.join(os.tmpdir(), `tranz-${Date.now()}`)
283
+ fs.mkdirSync(outDir, { recursive: true })
284
+
285
+ const tempFile = await downloadToTempFile(audioUrl, outDir)
286
+ log.info(`Downloaded to ${tempFile}`)
287
+
288
+ // Recurse with file path
289
+ const result = await this.transcribe({
290
+ audioPath: tempFile,
291
+ language,
292
+ model,
293
+ diarize,
294
+ timestamps,
295
+ contextBias,
296
+ autoSplit: true,
297
+ splitOutputDir: outDir,
298
+ logger: customLogger,
299
+ verbose,
300
+ })
301
+
302
+ // Cleanup temp file (segments are in outDir)
303
+ try { fs.unlinkSync(tempFile) } catch {}
304
+
305
+ return result
306
+ }
307
+
308
+ // File path input
309
+ if (!audioPath) {
310
+ return { text: '', error: 'No audio input provided (audioPath, audioBuffer, or audioUrl required)' }
311
+ }
312
+
313
+ log.debug(`Processing file: ${audioPath}`)
314
+ const duration = knownDuration ?? await getAudioDuration(audioPath)
315
+ log.info(`Audio duration: ${duration.toFixed(1)}s`)
316
+ const needsSplit = autoSplit !== false && duration > maxDuration
317
+
318
+ if (!needsSplit) {
319
+ log.info(`Transcribing file directly (no split needed)`)
320
+ const result = await provider.transcribe({
321
+ audioPath,
322
+ apiKey: config.apiKey,
323
+ model,
324
+ language,
325
+ diarize,
326
+ timestampGranularity: timestamps,
327
+ contextBias,
328
+ })
329
+ return result
330
+ }
331
+
332
+ // Auto-split and transcribe segments
333
+ log.info(`Duration ${duration.toFixed(1)}s > ${maxDuration}s, splitting audio...`)
334
+ const outDir = splitOutputDir || path.join(os.tmpdir(), `tranz-split-${Date.now()}`)
335
+ fs.mkdirSync(outDir, { recursive: true })
336
+
337
+ const segments = await autoSplitAudio(audioPath, outDir, {
338
+ maxDurationSec: maxDuration,
339
+ })
340
+ log.info(`Split into ${segments.length} segments`)
341
+
342
+ // Transcribe each segment
343
+ const results: TranscriptionResult[] = []
344
+ for (let i = 0; i < segments.length; i++) {
345
+ const segment = segments[i]
346
+ log.info(`Transcribing segment ${i + 1}/${segments.length} (${segment.durationSec.toFixed(1)}s)`)
347
+ const result = await provider.transcribe({
348
+ audioPath: segment.outputPath,
349
+ apiKey: config.apiKey,
350
+ model,
351
+ language,
352
+ diarize,
353
+ timestampGranularity: timestamps,
354
+ contextBias,
355
+ })
356
+ results.push(result)
357
+ }
358
+
359
+ log.info(`Merging ${segments.length} segments`)
360
+ return mergeTranscriptionResults(results, segments)
361
+ },
362
+ }
363
+ }
364
+
365
+ /** Alias for simpler import */
366
+ export const transcribe = createMistralTranscriber
File without changes