@wovin/tranz 0.1.9 → 0.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -49,7 +49,7 @@ const transcriber = createMistralTranscriber({
49
49
  const result = await transcriber.transcribe({
50
50
  audioPath: './interview.mp3',
51
51
  diarize: true,
52
- timestamps: 'word'
52
+ timestamps: 'segment'
53
53
  })
54
54
 
55
55
  console.log(result.text)
@@ -137,7 +137,7 @@ const result = await mistral.transcribe({
137
137
  apiKey: process.env.MISTRAL_API_KEY,
138
138
  model: 'voxtral-mini-latest',
139
139
  diarize: true,
140
- timestampGranularity: 'word'
140
+ timestampGranularity: 'segment'
141
141
  })
142
142
 
143
143
  // Whisper provider (local)
@@ -264,7 +264,7 @@ Options for the `transcribe()` method:
264
264
  - `language?: string` - Language code (e.g., 'en', 'fr') - disables word timestamps
265
265
  - `model?: string` - Override default model
266
266
  - `diarize?: boolean` - Enable speaker diarization (default: true)
267
- - `timestamps?: 'word' | 'segment'` - Timestamp granularity (default: 'word')
267
+ - `timestamps?: 'word' | 'segment'` - Timestamp granularity (default: 'segment' when diarize is true, disabled if language is set)
268
268
  - `autoSplit?: boolean` - Auto-split long audio (default: true)
269
269
  - `splitOutputDir?: string` - Directory for split segments (default: system temp)
270
270
  - `logger?: TranscribeLogger` - Custom logger
package/dist/audio.min.js CHANGED
@@ -15,10 +15,11 @@ async function execFFprobe(audioPath) {
15
15
  try {
16
16
  const { stdout } = await execa("ffprobe", [
17
17
  "-v",
18
- "quiet",
18
+ "error",
19
19
  "-print_format",
20
20
  "json",
21
21
  "-show_format",
22
+ "-show_streams",
22
23
  audioPath
23
24
  ]);
24
25
  return JSON.parse(stdout);
@@ -52,13 +53,61 @@ async function extractAudioSegment(inputPath, outputPath, startSec, durationSec)
52
53
  throw new Error(`Failed to extract segment: ${err instanceof Error ? err.message : String(err)}`);
53
54
  }
54
55
  }
56
+ async function getDurationViaFfmpeg(audioPath) {
57
+ try {
58
+ const { stderr } = await execa("ffmpeg", [
59
+ "-i",
60
+ audioPath,
61
+ "-f",
62
+ "null",
63
+ "-"
64
+ ], { reject: false });
65
+ const durationMatch = stderr.match(/Duration:\s*(\d+):(\d+):(\d+(?:\.\d+)?)/);
66
+ if (durationMatch) {
67
+ const hours = parseFloat(durationMatch[1]);
68
+ const minutes = parseFloat(durationMatch[2]);
69
+ const seconds = parseFloat(durationMatch[3]);
70
+ return hours * 3600 + minutes * 60 + seconds;
71
+ }
72
+ const timeMatches = [...stderr.matchAll(/time=(\d+):(\d+):(\d+(?:\.\d+)?)/g)];
73
+ if (timeMatches.length > 0) {
74
+ const lastMatch = timeMatches[timeMatches.length - 1];
75
+ const hours = parseFloat(lastMatch[1]);
76
+ const minutes = parseFloat(lastMatch[2]);
77
+ const seconds = parseFloat(lastMatch[3]);
78
+ return hours * 3600 + minutes * 60 + seconds;
79
+ }
80
+ } catch {
81
+ }
82
+ return void 0;
83
+ }
55
84
  async function getAudioDuration(audioPath) {
56
85
  const metadata = await execFFprobe(audioPath);
57
- const duration = metadata.format.duration;
58
- if (typeof duration !== "number") {
59
- throw new Error("Could not determine audio duration");
86
+ if (metadata.format?.duration) {
87
+ const duration = parseFloat(String(metadata.format.duration));
88
+ if (!isNaN(duration) && duration > 0) {
89
+ return duration;
90
+ }
60
91
  }
61
- return duration;
92
+ if (metadata.streams?.length) {
93
+ for (const stream of metadata.streams) {
94
+ if (stream.duration) {
95
+ const duration = parseFloat(String(stream.duration));
96
+ if (!isNaN(duration) && duration > 0) {
97
+ return duration;
98
+ }
99
+ }
100
+ }
101
+ }
102
+ const ffmpegDuration = await getDurationViaFfmpeg(audioPath);
103
+ if (ffmpegDuration !== void 0 && ffmpegDuration > 0) {
104
+ return ffmpegDuration;
105
+ }
106
+ const hasFormat = !!metadata.format;
107
+ const hasStreams = !!metadata.streams?.length;
108
+ throw new Error(
109
+ `Could not determine audio duration (format: ${hasFormat}, streams: ${hasStreams}). File may be corrupted or in an unsupported format.`
110
+ );
62
111
  }
63
112
  async function detectSilenceRegions(audioPath, config = {}) {
64
113
  const { minSilenceDurSec, silenceThreshold } = { ...DEFAULT_SPLIT_CONFIG, ...config };
package/dist/index.d.ts CHANGED
@@ -5,5 +5,6 @@ export { createProvider, MistralProvider, WhisperProvider, GreenPTProvider, VOXT
5
5
  export { autoSplitAudio, analyzeSplitPoints, detectSilenceRegions, getAudioDuration, findOptimalSplitPoints, splitAudioAtPoints, DEFAULT_SPLIT_CONFIG, type SplitConfig, type SilenceRegion, type SplitPoint, type AudioSegment, type SplitAnalysis, } from './utils/audio/split.ts';
6
6
  export { mergeTranscriptionResults, formatMergedText, type MergedTranscriptionResult, type WordData, } from './utils/audio/merge-results.ts';
7
7
  export { formatTranscriptWithPauses } from './utils/transcription/format.ts';
8
+ export { detectAudioMimeType } from './utils/transcription/mime-detection.ts';
8
9
  export { createMistralTranscriber, transcribe, type TranscribeOptions, type MistralTranscriberConfig, type MistralTranscriber, } from './utils/transcription/transcribe.ts';
9
10
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EACL,cAAc,EACd,eAAe,EACf,eAAe,EACf,eAAe,EACf,cAAc,EACd,KAAK,YAAY,EACjB,KAAK,gBAAgB,EACrB,KAAK,mBAAmB,EACxB,KAAK,qBAAqB,GAC3B,MAAM,oCAAoC,CAAA;AAG3C,OAAO,EACL,cAAc,EACd,kBAAkB,EAClB,oBAAoB,EACpB,gBAAgB,EAChB,sBAAsB,EACtB,kBAAkB,EAClB,oBAAoB,EACpB,KAAK,WAAW,EAChB,KAAK,aAAa,EAClB,KAAK,UAAU,EACf,KAAK,YAAY,EACjB,KAAK,aAAa,GACnB,MAAM,wBAAwB,CAAA;AAG/B,OAAO,EACL,yBAAyB,EACzB,gBAAgB,EAChB,KAAK,yBAAyB,EAC9B,KAAK,QAAQ,GACd,MAAM,gCAAgC,CAAA;AAGvC,OAAO,EAAE,0BAA0B,EAAE,MAAM,iCAAiC,CAAA;AAG5E,OAAO,EACL,wBAAwB,EACxB,UAAU,EACV,KAAK,iBAAiB,EACtB,KAAK,wBAAwB,EAC7B,KAAK,kBAAkB,GACxB,MAAM,qCAAqC,CAAA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EACL,cAAc,EACd,eAAe,EACf,eAAe,EACf,eAAe,EACf,cAAc,EACd,KAAK,YAAY,EACjB,KAAK,gBAAgB,EACrB,KAAK,mBAAmB,EACxB,KAAK,qBAAqB,GAC3B,MAAM,oCAAoC,CAAA;AAG3C,OAAO,EACL,cAAc,EACd,kBAAkB,EAClB,oBAAoB,EACpB,gBAAgB,EAChB,sBAAsB,EACtB,kBAAkB,EAClB,oBAAoB,EACpB,KAAK,WAAW,EAChB,KAAK,aAAa,EAClB,KAAK,UAAU,EACf,KAAK,YAAY,EACjB,KAAK,aAAa,GACnB,MAAM,wBAAwB,CAAA;AAG/B,OAAO,EACL,yBAAyB,EACzB,gBAAgB,EAChB,KAAK,yBAAyB,EAC9B,KAAK,QAAQ,GACd,MAAM,gCAAgC,CAAA;AAGvC,OAAO,EAAE,0BAA0B,EAAE,MAAM,iCAAiC,CAAA;AAG5E,OAAO,EAAE,mBAAmB,EAAE,MAAM,yCAAyC,CAAA;AAG7E,OAAO,EACL,wBAAwB,EACxB,UAAU,EACV,KAAK,iBAAiB,EACtB,KAAK,wBAAwB,EAC7B,KAAK,kBAAkB,GACxB,MAAM,qCAAqC,CAAA"}
package/dist/index.min.js CHANGED
@@ -1,3 +1,7 @@
1
+ var __defProp = Object.defineProperty;
2
+ var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
3
+ var __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value);
4
+
1
5
  // src/utils/transcription/providers.ts
2
6
  import { spawn } from "child_process";
3
7
  import * as fs from "fs";
@@ -57,26 +61,10 @@ function createProvider(providerName, config) {
57
61
  throw new Error(`Unknown provider: ${providerName}`);
58
62
  }
59
63
  }
60
- var WhisperProvider = class _WhisperProvider {
61
- name = "whisper";
62
- cacheDir;
63
- static DEFAULTS = {
64
- DIARIZE: false,
65
- SILDUR: "1.3",
66
- SILBUF: 0.2,
67
- SILTHR: "-35dB",
68
- MODEL_KEYS: {
69
- tinyd: "ggml-small.en-tdrz.bin",
70
- small: "ggml-small.bin",
71
- medium: "ggml-medium.bin"
72
- },
73
- MODELS: {
74
- tinyd: "https://huggingface.co/akashmjn/tinydiarize-whisper.cpp/resolve/main/ggml-small.en-tdrz.bin",
75
- small: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin",
76
- medium: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.bin"
77
- }
78
- };
64
+ var _WhisperProvider = class _WhisperProvider {
79
65
  constructor(config) {
66
+ __publicField(this, "name", "whisper");
67
+ __publicField(this, "cacheDir");
80
68
  this.cacheDir = config?.cacheDir || `${process.env.HOME}/.cache/whisper-models`;
81
69
  }
82
70
  async transcribe(params) {
@@ -187,6 +175,23 @@ var WhisperProvider = class _WhisperProvider {
187
175
  return modelPath;
188
176
  }
189
177
  };
178
+ __publicField(_WhisperProvider, "DEFAULTS", {
179
+ DIARIZE: false,
180
+ SILDUR: "1.3",
181
+ SILBUF: 0.2,
182
+ SILTHR: "-35dB",
183
+ MODEL_KEYS: {
184
+ tinyd: "ggml-small.en-tdrz.bin",
185
+ small: "ggml-small.bin",
186
+ medium: "ggml-medium.bin"
187
+ },
188
+ MODELS: {
189
+ tinyd: "https://huggingface.co/akashmjn/tinydiarize-whisper.cpp/resolve/main/ggml-small.en-tdrz.bin",
190
+ small: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin",
191
+ medium: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.bin"
192
+ }
193
+ });
194
+ var WhisperProvider = _WhisperProvider;
190
195
  var VOXTRAL_LIMITS = {
191
196
  /** Maximum audio duration in seconds (3 hours for Voxtral Transcribe 2) */
192
197
  maxAudioDurationSec: 3 * 60 * 60,
@@ -200,8 +205,10 @@ var VOXTRAL_LIMITS = {
200
205
  maxFileSizeBytes: 1024 * 1024 * 1024
201
206
  };
202
207
  var MistralProvider = class {
203
- name = "mistral";
204
- maxAudioDurationSec = VOXTRAL_LIMITS.maxAudioDurationSec;
208
+ constructor() {
209
+ __publicField(this, "name", "mistral");
210
+ __publicField(this, "maxAudioDurationSec", VOXTRAL_LIMITS.maxAudioDurationSec);
211
+ }
205
212
  /**
206
213
  * Check if audio duration exceeds recommended limits
207
214
  */
@@ -216,7 +223,11 @@ var MistralProvider = class {
216
223
  }
217
224
  async transcribe(params) {
218
225
  if (params.language && params.timestampGranularity) {
219
- return { text: "", error: "Cannot use both language and timestampGranularity (Mistral API limitation)" };
226
+ throw new Error("Cannot use both language and timestampGranularity (Mistral API limitation)");
227
+ }
228
+ const diarize = params.diarize ?? true;
229
+ if (diarize && params.timestampGranularity === "word") {
230
+ throw new Error('When diarize is set to true, the timestamp granularity must be set to ["segment"], got ["word"]');
220
231
  }
221
232
  const formData = new FormData();
222
233
  if (params.audioUrl) {
@@ -242,7 +253,6 @@ var MistralProvider = class {
242
253
  if (params.language) {
243
254
  formData.append("language", params.language);
244
255
  }
245
- const diarize = params.diarize ?? true;
246
256
  if (diarize) {
247
257
  formData.append("diarize", "true");
248
258
  }
@@ -278,7 +288,9 @@ var MistralProvider = class {
278
288
  }
279
289
  };
280
290
  var GreenPTProvider = class {
281
- name = "greenpt";
291
+ constructor() {
292
+ __publicField(this, "name", "greenpt");
293
+ }
282
294
  async transcribe(params) {
283
295
  if (!params.apiKey) {
284
296
  return { text: "", error: "API key is required for GreenPT provider" };
@@ -365,10 +377,11 @@ async function execFFprobe(audioPath) {
365
377
  try {
366
378
  const { stdout } = await execa("ffprobe", [
367
379
  "-v",
368
- "quiet",
380
+ "error",
369
381
  "-print_format",
370
382
  "json",
371
383
  "-show_format",
384
+ "-show_streams",
372
385
  audioPath
373
386
  ]);
374
387
  return JSON.parse(stdout);
@@ -402,13 +415,61 @@ async function extractAudioSegment(inputPath, outputPath, startSec, durationSec)
402
415
  throw new Error(`Failed to extract segment: ${err instanceof Error ? err.message : String(err)}`);
403
416
  }
404
417
  }
418
+ async function getDurationViaFfmpeg(audioPath) {
419
+ try {
420
+ const { stderr } = await execa("ffmpeg", [
421
+ "-i",
422
+ audioPath,
423
+ "-f",
424
+ "null",
425
+ "-"
426
+ ], { reject: false });
427
+ const durationMatch = stderr.match(/Duration:\s*(\d+):(\d+):(\d+(?:\.\d+)?)/);
428
+ if (durationMatch) {
429
+ const hours = parseFloat(durationMatch[1]);
430
+ const minutes = parseFloat(durationMatch[2]);
431
+ const seconds = parseFloat(durationMatch[3]);
432
+ return hours * 3600 + minutes * 60 + seconds;
433
+ }
434
+ const timeMatches = [...stderr.matchAll(/time=(\d+):(\d+):(\d+(?:\.\d+)?)/g)];
435
+ if (timeMatches.length > 0) {
436
+ const lastMatch = timeMatches[timeMatches.length - 1];
437
+ const hours = parseFloat(lastMatch[1]);
438
+ const minutes = parseFloat(lastMatch[2]);
439
+ const seconds = parseFloat(lastMatch[3]);
440
+ return hours * 3600 + minutes * 60 + seconds;
441
+ }
442
+ } catch {
443
+ }
444
+ return void 0;
445
+ }
405
446
  async function getAudioDuration(audioPath) {
406
447
  const metadata = await execFFprobe(audioPath);
407
- const duration = metadata.format.duration;
408
- if (typeof duration !== "number") {
409
- throw new Error("Could not determine audio duration");
448
+ if (metadata.format?.duration) {
449
+ const duration = parseFloat(String(metadata.format.duration));
450
+ if (!isNaN(duration) && duration > 0) {
451
+ return duration;
452
+ }
453
+ }
454
+ if (metadata.streams?.length) {
455
+ for (const stream of metadata.streams) {
456
+ if (stream.duration) {
457
+ const duration = parseFloat(String(stream.duration));
458
+ if (!isNaN(duration) && duration > 0) {
459
+ return duration;
460
+ }
461
+ }
462
+ }
410
463
  }
411
- return duration;
464
+ const ffmpegDuration = await getDurationViaFfmpeg(audioPath);
465
+ if (ffmpegDuration !== void 0 && ffmpegDuration > 0) {
466
+ return ffmpegDuration;
467
+ }
468
+ const hasFormat = !!metadata.format;
469
+ const hasStreams = !!metadata.streams?.length;
470
+ throw new Error(
471
+ `Could not determine audio duration (format: ${hasFormat}, streams: ${hasStreams}). File may be corrupted or in an unsupported format.`
472
+ );
412
473
  }
413
474
  async function detectSilenceRegions(audioPath, config = {}) {
414
475
  const { minSilenceDurSec, silenceThreshold } = { ...DEFAULT_SPLIT_CONFIG, ...config };
@@ -726,17 +787,44 @@ var defaultLogger = {
726
787
  }
727
788
  // silent by default
728
789
  };
790
+ var MIME_TO_EXT = {
791
+ "audio/mpeg": ".mp3",
792
+ "audio/mp3": ".mp3",
793
+ "audio/wav": ".wav",
794
+ "audio/x-wav": ".wav",
795
+ "audio/ogg": ".ogg",
796
+ "audio/flac": ".flac",
797
+ "audio/x-flac": ".flac",
798
+ "audio/mp4": ".m4a",
799
+ "audio/m4a": ".m4a",
800
+ "audio/aac": ".aac",
801
+ "audio/webm": ".webm",
802
+ "audio/opus": ".opus"
803
+ };
804
+ function getExtFromContentType(contentType, url) {
805
+ if (contentType) {
806
+ const mimeType = contentType.split(";")[0].trim().toLowerCase();
807
+ if (MIME_TO_EXT[mimeType]) {
808
+ return MIME_TO_EXT[mimeType];
809
+ }
810
+ }
811
+ try {
812
+ const urlPath = new URL(url).pathname;
813
+ const ext = path3.extname(urlPath).toLowerCase();
814
+ if (ext && [".mp3", ".wav", ".ogg", ".flac", ".m4a", ".aac", ".webm", ".opus"].includes(ext)) {
815
+ return ext;
816
+ }
817
+ } catch {
818
+ }
819
+ return ".audio";
820
+ }
729
821
  async function downloadToTempFile(url, outputDir) {
730
- const tempPath = path3.join(outputDir, `download-${Date.now()}.audio`);
731
- const file = fs3.createWriteStream(tempPath);
732
822
  return new Promise((resolve, reject) => {
733
823
  const protocol = url.startsWith("https") ? https : http;
734
824
  protocol.get(url, (response) => {
735
825
  if (response.statusCode === 301 || response.statusCode === 302) {
736
826
  const redirectUrl = response.headers.location;
737
827
  if (redirectUrl) {
738
- file.close();
739
- fs3.unlinkSync(tempPath);
740
828
  downloadToTempFile(redirectUrl, outputDir).then(resolve).catch(reject);
741
829
  return;
742
830
  }
@@ -745,14 +833,20 @@ async function downloadToTempFile(url, outputDir) {
745
833
  reject(new Error(`Failed to download: HTTP ${response.statusCode}`));
746
834
  return;
747
835
  }
836
+ const ext = getExtFromContentType(response.headers["content-type"], url);
837
+ const tempPath = path3.join(outputDir, `download-${Date.now()}${ext}`);
838
+ const file = fs3.createWriteStream(tempPath);
748
839
  response.pipe(file);
749
840
  file.on("finish", () => {
750
841
  file.close();
751
842
  resolve(tempPath);
752
843
  });
753
- }).on("error", (err) => {
754
- fs3.unlink(tempPath, () => {
844
+ file.on("error", (err) => {
845
+ fs3.unlink(tempPath, () => {
846
+ });
847
+ reject(err);
755
848
  });
849
+ }).on("error", (err) => {
756
850
  reject(err);
757
851
  });
758
852
  });
@@ -784,7 +878,7 @@ function createMistralTranscriber(config) {
784
878
  language,
785
879
  model = defaultModel,
786
880
  diarize = true,
787
- timestamps = language ? void 0 : "word",
881
+ timestamps = language ? void 0 : "segment",
788
882
  autoSplit,
789
883
  splitOutputDir,
790
884
  logger: customLogger,
@@ -921,6 +1015,7 @@ export {
921
1015
  autoSplitAudio,
922
1016
  createMistralTranscriber,
923
1017
  createProvider,
1018
+ detectAudioMimeType,
924
1019
  detectSilenceRegions,
925
1020
  findOptimalSplitPoints,
926
1021
  formatMergedText,
@@ -1,3 +1,7 @@
1
+ var __defProp = Object.defineProperty;
2
+ var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
3
+ var __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value);
4
+
1
5
  // src/utils/transcription/providers.ts
2
6
  import { spawn } from "child_process";
3
7
  import * as fs from "fs";
@@ -57,26 +61,10 @@ function createProvider(providerName, config) {
57
61
  throw new Error(`Unknown provider: ${providerName}`);
58
62
  }
59
63
  }
60
- var WhisperProvider = class _WhisperProvider {
61
- name = "whisper";
62
- cacheDir;
63
- static DEFAULTS = {
64
- DIARIZE: false,
65
- SILDUR: "1.3",
66
- SILBUF: 0.2,
67
- SILTHR: "-35dB",
68
- MODEL_KEYS: {
69
- tinyd: "ggml-small.en-tdrz.bin",
70
- small: "ggml-small.bin",
71
- medium: "ggml-medium.bin"
72
- },
73
- MODELS: {
74
- tinyd: "https://huggingface.co/akashmjn/tinydiarize-whisper.cpp/resolve/main/ggml-small.en-tdrz.bin",
75
- small: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin",
76
- medium: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.bin"
77
- }
78
- };
64
+ var _WhisperProvider = class _WhisperProvider {
79
65
  constructor(config) {
66
+ __publicField(this, "name", "whisper");
67
+ __publicField(this, "cacheDir");
80
68
  this.cacheDir = config?.cacheDir || `${process.env.HOME}/.cache/whisper-models`;
81
69
  }
82
70
  async transcribe(params) {
@@ -187,6 +175,23 @@ var WhisperProvider = class _WhisperProvider {
187
175
  return modelPath;
188
176
  }
189
177
  };
178
+ __publicField(_WhisperProvider, "DEFAULTS", {
179
+ DIARIZE: false,
180
+ SILDUR: "1.3",
181
+ SILBUF: 0.2,
182
+ SILTHR: "-35dB",
183
+ MODEL_KEYS: {
184
+ tinyd: "ggml-small.en-tdrz.bin",
185
+ small: "ggml-small.bin",
186
+ medium: "ggml-medium.bin"
187
+ },
188
+ MODELS: {
189
+ tinyd: "https://huggingface.co/akashmjn/tinydiarize-whisper.cpp/resolve/main/ggml-small.en-tdrz.bin",
190
+ small: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin",
191
+ medium: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.bin"
192
+ }
193
+ });
194
+ var WhisperProvider = _WhisperProvider;
190
195
  var VOXTRAL_LIMITS = {
191
196
  /** Maximum audio duration in seconds (3 hours for Voxtral Transcribe 2) */
192
197
  maxAudioDurationSec: 3 * 60 * 60,
@@ -200,8 +205,10 @@ var VOXTRAL_LIMITS = {
200
205
  maxFileSizeBytes: 1024 * 1024 * 1024
201
206
  };
202
207
  var MistralProvider = class {
203
- name = "mistral";
204
- maxAudioDurationSec = VOXTRAL_LIMITS.maxAudioDurationSec;
208
+ constructor() {
209
+ __publicField(this, "name", "mistral");
210
+ __publicField(this, "maxAudioDurationSec", VOXTRAL_LIMITS.maxAudioDurationSec);
211
+ }
205
212
  /**
206
213
  * Check if audio duration exceeds recommended limits
207
214
  */
@@ -216,7 +223,11 @@ var MistralProvider = class {
216
223
  }
217
224
  async transcribe(params) {
218
225
  if (params.language && params.timestampGranularity) {
219
- return { text: "", error: "Cannot use both language and timestampGranularity (Mistral API limitation)" };
226
+ throw new Error("Cannot use both language and timestampGranularity (Mistral API limitation)");
227
+ }
228
+ const diarize = params.diarize ?? true;
229
+ if (diarize && params.timestampGranularity === "word") {
230
+ throw new Error('When diarize is set to true, the timestamp granularity must be set to ["segment"], got ["word"]');
220
231
  }
221
232
  const formData = new FormData();
222
233
  if (params.audioUrl) {
@@ -242,7 +253,6 @@ var MistralProvider = class {
242
253
  if (params.language) {
243
254
  formData.append("language", params.language);
244
255
  }
245
- const diarize = params.diarize ?? true;
246
256
  if (diarize) {
247
257
  formData.append("diarize", "true");
248
258
  }
@@ -278,7 +288,9 @@ var MistralProvider = class {
278
288
  }
279
289
  };
280
290
  var GreenPTProvider = class {
281
- name = "greenpt";
291
+ constructor() {
292
+ __publicField(this, "name", "greenpt");
293
+ }
282
294
  async transcribe(params) {
283
295
  if (!params.apiKey) {
284
296
  return { text: "", error: "API key is required for GreenPT provider" };
@@ -372,10 +384,11 @@ async function execFFprobe(audioPath) {
372
384
  try {
373
385
  const { stdout } = await execa("ffprobe", [
374
386
  "-v",
375
- "quiet",
387
+ "error",
376
388
  "-print_format",
377
389
  "json",
378
390
  "-show_format",
391
+ "-show_streams",
379
392
  audioPath
380
393
  ]);
381
394
  return JSON.parse(stdout);
@@ -409,13 +422,61 @@ async function extractAudioSegment(inputPath, outputPath, startSec, durationSec)
409
422
  throw new Error(`Failed to extract segment: ${err instanceof Error ? err.message : String(err)}`);
410
423
  }
411
424
  }
425
+ async function getDurationViaFfmpeg(audioPath) {
426
+ try {
427
+ const { stderr } = await execa("ffmpeg", [
428
+ "-i",
429
+ audioPath,
430
+ "-f",
431
+ "null",
432
+ "-"
433
+ ], { reject: false });
434
+ const durationMatch = stderr.match(/Duration:\s*(\d+):(\d+):(\d+(?:\.\d+)?)/);
435
+ if (durationMatch) {
436
+ const hours = parseFloat(durationMatch[1]);
437
+ const minutes = parseFloat(durationMatch[2]);
438
+ const seconds = parseFloat(durationMatch[3]);
439
+ return hours * 3600 + minutes * 60 + seconds;
440
+ }
441
+ const timeMatches = [...stderr.matchAll(/time=(\d+):(\d+):(\d+(?:\.\d+)?)/g)];
442
+ if (timeMatches.length > 0) {
443
+ const lastMatch = timeMatches[timeMatches.length - 1];
444
+ const hours = parseFloat(lastMatch[1]);
445
+ const minutes = parseFloat(lastMatch[2]);
446
+ const seconds = parseFloat(lastMatch[3]);
447
+ return hours * 3600 + minutes * 60 + seconds;
448
+ }
449
+ } catch {
450
+ }
451
+ return void 0;
452
+ }
412
453
  async function getAudioDuration(audioPath) {
413
454
  const metadata = await execFFprobe(audioPath);
414
- const duration = metadata.format.duration;
415
- if (typeof duration !== "number") {
416
- throw new Error("Could not determine audio duration");
455
+ if (metadata.format?.duration) {
456
+ const duration = parseFloat(String(metadata.format.duration));
457
+ if (!isNaN(duration) && duration > 0) {
458
+ return duration;
459
+ }
460
+ }
461
+ if (metadata.streams?.length) {
462
+ for (const stream of metadata.streams) {
463
+ if (stream.duration) {
464
+ const duration = parseFloat(String(stream.duration));
465
+ if (!isNaN(duration) && duration > 0) {
466
+ return duration;
467
+ }
468
+ }
469
+ }
417
470
  }
418
- return duration;
471
+ const ffmpegDuration = await getDurationViaFfmpeg(audioPath);
472
+ if (ffmpegDuration !== void 0 && ffmpegDuration > 0) {
473
+ return ffmpegDuration;
474
+ }
475
+ const hasFormat = !!metadata.format;
476
+ const hasStreams = !!metadata.streams?.length;
477
+ throw new Error(
478
+ `Could not determine audio duration (format: ${hasFormat}, streams: ${hasStreams}). File may be corrupted or in an unsupported format.`
479
+ );
419
480
  }
420
481
  async function detectSilenceRegions(audioPath, config = {}) {
421
482
  const { minSilenceDurSec, silenceThreshold } = { ...DEFAULT_SPLIT_CONFIG, ...config };
@@ -648,17 +709,44 @@ var defaultLogger = {
648
709
  }
649
710
  // silent by default
650
711
  };
712
+ var MIME_TO_EXT = {
713
+ "audio/mpeg": ".mp3",
714
+ "audio/mp3": ".mp3",
715
+ "audio/wav": ".wav",
716
+ "audio/x-wav": ".wav",
717
+ "audio/ogg": ".ogg",
718
+ "audio/flac": ".flac",
719
+ "audio/x-flac": ".flac",
720
+ "audio/mp4": ".m4a",
721
+ "audio/m4a": ".m4a",
722
+ "audio/aac": ".aac",
723
+ "audio/webm": ".webm",
724
+ "audio/opus": ".opus"
725
+ };
726
+ function getExtFromContentType(contentType, url) {
727
+ if (contentType) {
728
+ const mimeType = contentType.split(";")[0].trim().toLowerCase();
729
+ if (MIME_TO_EXT[mimeType]) {
730
+ return MIME_TO_EXT[mimeType];
731
+ }
732
+ }
733
+ try {
734
+ const urlPath = new URL(url).pathname;
735
+ const ext = path3.extname(urlPath).toLowerCase();
736
+ if (ext && [".mp3", ".wav", ".ogg", ".flac", ".m4a", ".aac", ".webm", ".opus"].includes(ext)) {
737
+ return ext;
738
+ }
739
+ } catch {
740
+ }
741
+ return ".audio";
742
+ }
651
743
  async function downloadToTempFile(url, outputDir) {
652
- const tempPath = path3.join(outputDir, `download-${Date.now()}.audio`);
653
- const file = fs3.createWriteStream(tempPath);
654
744
  return new Promise((resolve, reject) => {
655
745
  const protocol = url.startsWith("https") ? https : http;
656
746
  protocol.get(url, (response) => {
657
747
  if (response.statusCode === 301 || response.statusCode === 302) {
658
748
  const redirectUrl = response.headers.location;
659
749
  if (redirectUrl) {
660
- file.close();
661
- fs3.unlinkSync(tempPath);
662
750
  downloadToTempFile(redirectUrl, outputDir).then(resolve).catch(reject);
663
751
  return;
664
752
  }
@@ -667,14 +755,20 @@ async function downloadToTempFile(url, outputDir) {
667
755
  reject(new Error(`Failed to download: HTTP ${response.statusCode}`));
668
756
  return;
669
757
  }
758
+ const ext = getExtFromContentType(response.headers["content-type"], url);
759
+ const tempPath = path3.join(outputDir, `download-${Date.now()}${ext}`);
760
+ const file = fs3.createWriteStream(tempPath);
670
761
  response.pipe(file);
671
762
  file.on("finish", () => {
672
763
  file.close();
673
764
  resolve(tempPath);
674
765
  });
675
- }).on("error", (err) => {
676
- fs3.unlink(tempPath, () => {
766
+ file.on("error", (err) => {
767
+ fs3.unlink(tempPath, () => {
768
+ });
769
+ reject(err);
677
770
  });
771
+ }).on("error", (err) => {
678
772
  reject(err);
679
773
  });
680
774
  });
@@ -706,7 +800,7 @@ function createMistralTranscriber(config) {
706
800
  language,
707
801
  model = defaultModel,
708
802
  diarize = true,
709
- timestamps = language ? void 0 : "word",
803
+ timestamps = language ? void 0 : "segment",
710
804
  autoSplit,
711
805
  splitOutputDir,
712
806
  logger: customLogger,
@@ -4,7 +4,10 @@
4
4
  * This module provides a simple, event-driven interface for streaming audio
5
5
  * transcription using Mistral's realtime WebSocket API.
6
6
  *
7
- * @example
7
+ * **Node.js only** - Browser support is currently disabled due to WebSocket
8
+ * authentication limitations with Mistral API.
9
+ *
10
+ * @example Node.js
8
11
  * ```typescript
9
12
  * import {
10
13
  * createRealtimeTranscriber,
@@ -15,7 +18,7 @@
15
18
  * apiKey: process.env.MISTRAL_API_KEY,
16
19
  * })
17
20
  *
18
- * const { stream, stop } = captureAudioFromMicrophone(16000)
21
+ * const { stream, stop } = await captureAudioFromMicrophone(16000)
19
22
  *
20
23
  * try {
21
24
  * for await (const event of transcriber.transcribe(stream)) {
@@ -33,6 +36,5 @@
33
36
  *
34
37
  * @module @wovin/tranz/realtime
35
38
  */
36
- export { createRealtimeTranscriber, captureAudioFromMicrophone, type RealtimeEvent, type RealtimeConfig, type RealtimeTranscriber, type TranscribeOptions, type AudioFormat, type AudioCaptureResult, type SessionCreatedEvent, type SessionUpdatedEvent, type TranscriptionTextDeltaEvent, type TranscriptionLanguageEvent, type TranscriptionSegmentEvent, type TranscriptionDoneEvent, type ErrorEvent, } from "./utils/transcription/realtime.js";
37
- export { AudioEncoding } from "@mistralai/mistralai/extra/realtime";
39
+ export { createRealtimeTranscriber, captureAudioFromMicrophone, captureAudioFromBrowser, AudioEncoding, type RealtimeEvent, type RealtimeConfig, type RealtimeTranscriber, type TranscribeOptions, type AudioFormat, type AudioCaptureResult, type SessionCreatedEvent, type SessionUpdatedEvent, type TranscriptionTextDeltaEvent, type TranscriptionLanguageEvent, type TranscriptionSegmentEvent, type TranscriptionDoneEvent, type ErrorEvent, } from "./utils/transcription/realtime.js";
38
40
  //# sourceMappingURL=realtime.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"realtime.d.ts","sourceRoot":"","sources":["../src/realtime.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AAEH,OAAO,EACL,yBAAyB,EACzB,0BAA0B,EAC1B,KAAK,aAAa,EAClB,KAAK,cAAc,EACnB,KAAK,mBAAmB,EACxB,KAAK,iBAAiB,EACtB,KAAK,WAAW,EAChB,KAAK,kBAAkB,EACvB,KAAK,mBAAmB,EACxB,KAAK,mBAAmB,EACxB,KAAK,2BAA2B,EAChC,KAAK,0BAA0B,EAC/B,KAAK,yBAAyB,EAC9B,KAAK,sBAAsB,EAC3B,KAAK,UAAU,GAChB,MAAM,mCAAmC,CAAC;AAG3C,OAAO,EAAE,aAAa,EAAE,MAAM,qCAAqC,CAAC"}
1
+ {"version":3,"file":"realtime.d.ts","sourceRoot":"","sources":["../src/realtime.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAEH,OAAO,EACL,yBAAyB,EACzB,0BAA0B,EAC1B,uBAAuB,EACvB,aAAa,EACb,KAAK,aAAa,EAClB,KAAK,cAAc,EACnB,KAAK,mBAAmB,EACxB,KAAK,iBAAiB,EACtB,KAAK,WAAW,EAChB,KAAK,kBAAkB,EACvB,KAAK,mBAAmB,EACxB,KAAK,mBAAmB,EACxB,KAAK,2BAA2B,EAChC,KAAK,0BAA0B,EAC/B,KAAK,yBAAyB,EAC9B,KAAK,sBAAsB,EAC3B,KAAK,UAAU,GAChB,MAAM,mCAAmC,CAAC"}
@@ -1,32 +1,309 @@
1
+ // src/utils/transcription/runtime.ts
2
+ async function getWebSocketImpl() {
3
+ const isBrowser = typeof globalThis !== "undefined" && (typeof globalThis.document !== "undefined" || typeof globalThis.navigator !== "undefined");
4
+ if (isBrowser && typeof globalThis.WebSocket !== "undefined") {
5
+ return globalThis.WebSocket;
6
+ }
7
+ if (!isBrowser) {
8
+ try {
9
+ const WS = await import("ws");
10
+ return WS.default || WS;
11
+ } catch (err) {
12
+ throw new Error(
13
+ "WebSocket not available. In Node.js, install 'ws' package: npm install ws"
14
+ );
15
+ }
16
+ }
17
+ throw new Error(
18
+ "WebSocket not available in this environment"
19
+ );
20
+ }
21
+
1
22
  // src/utils/transcription/realtime.ts
2
- import { spawn } from "child_process";
3
- import {
4
- AudioEncoding,
5
- RealtimeTranscription
6
- } from "@mistralai/mistralai/extra/realtime";
23
+ var AudioEncoding = /* @__PURE__ */ ((AudioEncoding2) => {
24
+ AudioEncoding2["PcmS16le"] = "pcm_s16le";
25
+ AudioEncoding2["PcmS16be"] = "pcm_s16be";
26
+ AudioEncoding2["PcmU16le"] = "pcm_u16le";
27
+ AudioEncoding2["PcmU16be"] = "pcm_u16be";
28
+ AudioEncoding2["PcmS24le"] = "pcm_s24le";
29
+ AudioEncoding2["PcmS24be"] = "pcm_s24be";
30
+ AudioEncoding2["PcmU24le"] = "pcm_u24le";
31
+ AudioEncoding2["PcmU24be"] = "pcm_u24be";
32
+ AudioEncoding2["PcmS32le"] = "pcm_s32le";
33
+ AudioEncoding2["PcmS32be"] = "pcm_s32be";
34
+ AudioEncoding2["PcmU32le"] = "pcm_u32le";
35
+ AudioEncoding2["PcmU32be"] = "pcm_u32be";
36
+ AudioEncoding2["PcmF32le"] = "pcm_f32le";
37
+ AudioEncoding2["PcmF32be"] = "pcm_f32be";
38
+ AudioEncoding2["PcmF64le"] = "pcm_f64le";
39
+ AudioEncoding2["PcmF64be"] = "pcm_f64be";
40
+ return AudioEncoding2;
41
+ })(AudioEncoding || {});
7
42
  function createRealtimeTranscriber(config) {
43
+ const isBrowser = typeof window !== "undefined" && typeof document !== "undefined" && typeof navigator !== "undefined";
44
+ if (isBrowser) {
45
+ throw new Error(
46
+ "Realtime transcription is not yet supported in browsers. Browser WebSocket API does not support authentication headers required by Mistral API. Use this API in Node.js or server-side environments only. See: https://github.com/wovin/tranz/issues"
47
+ );
48
+ }
8
49
  const model = config.model ?? "voxtral-mini-transcribe-realtime-2602";
9
50
  const baseUrl = config.baseUrl ?? "wss://api.mistral.ai";
10
- const client = new RealtimeTranscription({
11
- apiKey: config.apiKey,
12
- serverURL: baseUrl
13
- });
14
51
  return {
15
52
  async *transcribe(audioStream, options) {
16
53
  const audioFormat = {
17
- encoding: options?.audioFormat?.encoding ?? AudioEncoding.PcmS16le,
54
+ encoding: options?.audioFormat?.encoding ?? "pcm_s16le" /* PcmS16le */,
18
55
  sampleRate: options?.audioFormat?.sampleRate ?? 16e3
19
56
  };
20
- const eventStream = client.transcribeStream(audioStream, model, {
57
+ const connection = await createConnection(
58
+ config.apiKey,
59
+ baseUrl,
60
+ model,
21
61
  audioFormat
22
- });
23
- for await (const event of eventStream) {
24
- yield event;
62
+ );
63
+ try {
64
+ let stopRequested = false;
65
+ const sendAudioTask = (async () => {
66
+ try {
67
+ for await (const chunk of audioStream) {
68
+ if (stopRequested || connection.isClosed) {
69
+ break;
70
+ }
71
+ await connection.sendAudio(chunk);
72
+ }
73
+ } finally {
74
+ await connection.endAudio();
75
+ }
76
+ })();
77
+ for await (const event of connection.events()) {
78
+ yield event;
79
+ if (event.type === "transcription.done" || event.type === "error") {
80
+ break;
81
+ }
82
+ }
83
+ await sendAudioTask;
84
+ } finally {
85
+ await connection.close();
86
+ const maybeReturn = audioStream.return;
87
+ if (typeof maybeReturn === "function") {
88
+ await maybeReturn.call(audioStream);
89
+ }
25
90
  }
26
91
  }
27
92
  };
28
93
  }
29
- function captureAudioFromMicrophone(sampleRate = 16e3) {
94
+ async function createConnection(apiKey, baseUrl, model, audioFormat) {
95
+ const WebSocketImpl = await getWebSocketImpl();
96
+ const wsUrl = buildWebSocketUrl(baseUrl, model, apiKey);
97
+ const isNodeWs = typeof process !== "undefined" && process.versions?.node;
98
+ const ws = isNodeWs ? new WebSocketImpl(wsUrl, {
99
+ headers: {
100
+ Authorization: `Bearer ${apiKey}`
101
+ }
102
+ }) : new WebSocketImpl(wsUrl);
103
+ const session = await waitForSession(ws);
104
+ let closed = false;
105
+ const websocket = ws;
106
+ const connection = {
107
+ get isClosed() {
108
+ return closed || websocket.readyState === 2 || websocket.readyState === 3;
109
+ },
110
+ async *events() {
111
+ const queue = [];
112
+ let resolver = null;
113
+ let done = false;
114
+ const push = (item) => {
115
+ if (done) return;
116
+ if (resolver) {
117
+ const resolve = resolver;
118
+ resolver = null;
119
+ resolve(item);
120
+ return;
121
+ }
122
+ queue.push(item);
123
+ };
124
+ const handleMessage = (event) => {
125
+ push({ kind: "message", data: event.data });
126
+ };
127
+ const handleClose = () => {
128
+ closed = true;
129
+ push({ kind: "close" });
130
+ };
131
+ const handleError = (event) => {
132
+ push({
133
+ kind: "error",
134
+ error: new Error("WebSocket connection error")
135
+ });
136
+ };
137
+ websocket.addEventListener("message", handleMessage);
138
+ websocket.addEventListener("close", handleClose);
139
+ websocket.addEventListener("error", handleError);
140
+ try {
141
+ while (true) {
142
+ const item = queue.length > 0 ? queue.shift() : await new Promise((resolve) => {
143
+ resolver = resolve;
144
+ });
145
+ if (item.kind === "close") break;
146
+ if (item.kind === "error") {
147
+ const error = item.error ?? new Error("WebSocket connection error");
148
+ yield {
149
+ type: "error",
150
+ error: { message: error.message }
151
+ };
152
+ continue;
153
+ }
154
+ const event = parseRealtimeEvent(item.data);
155
+ yield event;
156
+ }
157
+ } finally {
158
+ done = true;
159
+ websocket.removeEventListener("message", handleMessage);
160
+ websocket.removeEventListener("close", handleClose);
161
+ websocket.removeEventListener("error", handleError);
162
+ if (resolver !== null) {
163
+ const resolve = resolver;
164
+ resolver = null;
165
+ resolve({ kind: "close" });
166
+ }
167
+ }
168
+ },
169
+ async sendAudio(chunk) {
170
+ if (connection.isClosed) {
171
+ throw new Error("Connection is closed");
172
+ }
173
+ const base64Audio = arrayBufferToBase64(chunk);
174
+ const message = {
175
+ type: "input_audio.append",
176
+ audio: base64Audio
177
+ };
178
+ await sendJson(websocket, message);
179
+ },
180
+ async endAudio() {
181
+ if (connection.isClosed) return;
182
+ await sendJson(websocket, { type: "input_audio.end" });
183
+ },
184
+ async close() {
185
+ if (closed) return;
186
+ closed = true;
187
+ if (websocket.readyState === 3) return;
188
+ await new Promise((resolve) => {
189
+ const finalize = () => {
190
+ websocket.removeEventListener("close", finalize);
191
+ resolve();
192
+ };
193
+ websocket.addEventListener("close", finalize);
194
+ websocket.close(1e3, "");
195
+ });
196
+ }
197
+ };
198
+ return connection;
199
+ }
200
+ function buildWebSocketUrl(baseUrl, model, apiKey) {
201
+ const url = new URL("v1/audio/transcriptions/realtime", baseUrl);
202
+ url.searchParams.set("model", model);
203
+ return url.toString();
204
+ }
205
+ async function waitForSession(ws) {
206
+ return new Promise((resolve, reject) => {
207
+ const timeout = setTimeout(() => {
208
+ cleanup();
209
+ ws.close();
210
+ reject(new Error("Timeout waiting for session creation"));
211
+ }, 1e4);
212
+ const cleanup = () => {
213
+ clearTimeout(timeout);
214
+ ws.removeEventListener("message", handleMessage);
215
+ ws.removeEventListener("close", handleClose);
216
+ ws.removeEventListener("error", handleError);
217
+ };
218
+ const handleMessage = (event) => {
219
+ try {
220
+ const parsed = parseRealtimeEvent(event.data);
221
+ if (parsed.type === "session.created") {
222
+ cleanup();
223
+ resolve(parsed);
224
+ } else if (parsed.type === "error") {
225
+ cleanup();
226
+ ws.close();
227
+ reject(
228
+ new Error(
229
+ `Realtime transcription error: ${JSON.stringify(parsed.error)}`
230
+ )
231
+ );
232
+ }
233
+ } catch (err) {
234
+ cleanup();
235
+ ws.close();
236
+ reject(err);
237
+ }
238
+ };
239
+ const handleClose = () => {
240
+ cleanup();
241
+ reject(new Error("WebSocket closed during handshake"));
242
+ };
243
+ const handleError = () => {
244
+ cleanup();
245
+ reject(new Error("WebSocket error during handshake"));
246
+ };
247
+ ws.addEventListener("message", handleMessage);
248
+ ws.addEventListener("close", handleClose);
249
+ ws.addEventListener("error", handleError);
250
+ });
251
+ }
252
+ function parseRealtimeEvent(data) {
253
+ try {
254
+ const text = typeof data === "string" ? data : new TextDecoder().decode(data);
255
+ const payload = JSON.parse(text);
256
+ if (typeof payload.type !== "string") {
257
+ return {
258
+ type: "error",
259
+ error: { message: "Invalid event: missing type" }
260
+ };
261
+ }
262
+ return payload;
263
+ } catch (err) {
264
+ return {
265
+ type: "error",
266
+ error: { message: `Failed to parse event: ${err}` }
267
+ };
268
+ }
269
+ }
270
+ async function sendJson(ws, payload) {
271
+ return new Promise((resolve, reject) => {
272
+ const message = JSON.stringify(payload);
273
+ if (typeof ws.send === "function") {
274
+ const send = ws.send.bind(ws);
275
+ try {
276
+ send(message, (err) => {
277
+ if (err) reject(err);
278
+ else resolve();
279
+ });
280
+ } catch {
281
+ ws.send(message);
282
+ resolve();
283
+ }
284
+ } else {
285
+ ws.send(message);
286
+ resolve();
287
+ }
288
+ });
289
+ }
290
+ function arrayBufferToBase64(buffer) {
291
+ if (typeof btoa !== "undefined") {
292
+ const binary = Array.from(buffer).map((byte) => String.fromCharCode(byte)).join("");
293
+ return btoa(binary);
294
+ }
295
+ if (typeof Buffer !== "undefined") {
296
+ return Buffer.from(buffer).toString("base64");
297
+ }
298
+ throw new Error("No base64 encoding available");
299
+ }
300
+ async function captureAudioFromMicrophone(sampleRate = 16e3) {
301
+ if (typeof process === "undefined" || !process.versions?.node) {
302
+ throw new Error(
303
+ "captureAudioFromMicrophone() is Node.js only. Use captureAudioFromBrowser() in browsers."
304
+ );
305
+ }
306
+ const { spawn } = await import("child_process");
30
307
  const recorder = spawn(
31
308
  "rec",
32
309
  [
@@ -53,8 +330,7 @@ function captureAudioFromMicrophone(sampleRate = 16e3) {
53
330
  { stdio: ["ignore", "pipe", "ignore"] }
54
331
  );
55
332
  recorder.on("error", (err) => {
56
- const error = err;
57
- if (error.code === "ENOENT") {
333
+ if (err.code === "ENOENT") {
58
334
  console.error(
59
335
  "\nError: 'rec' command not found. Please install SoX:",
60
336
  "\n macOS: brew install sox",
@@ -85,11 +361,88 @@ function captureAudioFromMicrophone(sampleRate = 16e3) {
85
361
  };
86
362
  return { stream, stop };
87
363
  }
88
-
89
- // src/realtime.ts
90
- import { AudioEncoding as AudioEncoding2 } from "@mistralai/mistralai/extra/realtime";
364
+ async function captureAudioFromBrowser(sampleRate = 16e3) {
365
+ throw new Error(
366
+ "Browser realtime transcription is not yet supported. Browser WebSocket API does not support authentication headers required by Mistral API. Use captureAudioFromMicrophone() in Node.js environments instead."
367
+ );
368
+ if (typeof navigator === "undefined" || !navigator.mediaDevices) {
369
+ throw new Error(
370
+ "captureAudioFromBrowser() requires a browser environment with getUserMedia support"
371
+ );
372
+ }
373
+ const mediaStream = await navigator.mediaDevices.getUserMedia({
374
+ audio: {
375
+ channelCount: 1,
376
+ sampleRate,
377
+ echoCancellation: true,
378
+ noiseSuppression: true
379
+ }
380
+ });
381
+ const audioContext = new AudioContext({ sampleRate });
382
+ const source = audioContext.createMediaStreamSource(mediaStream);
383
+ const processor = audioContext.createScriptProcessor(4096, 1, 1);
384
+ let stopped = false;
385
+ const chunks = [];
386
+ let resolver = null;
387
+ processor.onaudioprocess = (event) => {
388
+ if (stopped) return;
389
+ const inputData = event.inputBuffer.getChannelData(0);
390
+ const pcm16 = new Int16Array(inputData.length);
391
+ for (let i = 0; i < inputData.length; i++) {
392
+ const sample = Math.max(-1, Math.min(1, inputData[i]));
393
+ pcm16[i] = sample < 0 ? sample * 32768 : sample * 32767;
394
+ }
395
+ const uint8 = new Uint8Array(pcm16.length * 2);
396
+ for (let i = 0; i < pcm16.length; i++) {
397
+ uint8[i * 2] = pcm16[i] & 255;
398
+ uint8[i * 2 + 1] = pcm16[i] >> 8 & 255;
399
+ }
400
+ if (resolver) {
401
+ const resolve = resolver;
402
+ resolver = null;
403
+ resolve({ value: uint8, done: false });
404
+ } else {
405
+ chunks.push(pcm16);
406
+ }
407
+ };
408
+ source.connect(processor);
409
+ processor.connect(audioContext.destination);
410
+ const stream = (async function* () {
411
+ try {
412
+ while (!stopped) {
413
+ if (chunks.length > 0) {
414
+ const pcm16 = chunks.shift();
415
+ const uint8 = new Uint8Array(pcm16.length * 2);
416
+ for (let i = 0; i < pcm16.length; i++) {
417
+ uint8[i * 2] = pcm16[i] & 255;
418
+ uint8[i * 2 + 1] = pcm16[i] >> 8 & 255;
419
+ }
420
+ yield uint8;
421
+ } else {
422
+ await new Promise((resolve) => {
423
+ resolver = resolve;
424
+ });
425
+ }
426
+ }
427
+ } finally {
428
+ processor.disconnect();
429
+ source.disconnect();
430
+ mediaStream.getTracks().forEach((track) => track.stop());
431
+ await audioContext.close();
432
+ }
433
+ })();
434
+ const stop = () => {
435
+ stopped = true;
436
+ if (resolver) {
437
+ resolver({ value: void 0, done: true });
438
+ resolver = null;
439
+ }
440
+ };
441
+ return { stream, stop };
442
+ }
91
443
  export {
92
- AudioEncoding2 as AudioEncoding,
444
+ AudioEncoding,
445
+ captureAudioFromBrowser,
93
446
  captureAudioFromMicrophone,
94
447
  createRealtimeTranscriber
95
448
  };
@@ -50,6 +50,8 @@ export interface AudioSegment {
50
50
  export declare const DEFAULT_SPLIT_CONFIG: SplitConfig;
51
51
  /**
52
52
  * Get the duration of an audio file in seconds
53
+ * Tries format.duration first, then falls back to stream duration,
54
+ * and finally uses ffmpeg decode as last resort
53
55
  */
54
56
  export declare function getAudioDuration(audioPath: string): Promise<number>;
55
57
  /**
@@ -1 +1 @@
1
- {"version":3,"file":"split.d.ts","sourceRoot":"","sources":["../../../src/utils/audio/split.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAOH;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,iEAAiE;IACjE,cAAc,EAAE,MAAM,CAAA;IACtB,qEAAqE;IACrE,gBAAgB,EAAE,MAAM,CAAA;IACxB,kDAAkD;IAClD,gBAAgB,EAAE,MAAM,CAAA;IACxB,wDAAwD;IACxD,mBAAmB,EAAE,OAAO,CAAA;IAC5B,uDAAuD;IACvD,aAAa,EAAE,MAAM,CAAA;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,EAAE,MAAM,CAAA;IACd,WAAW,EAAE,MAAM,CAAA;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,yDAAyD;IACzD,OAAO,EAAE,MAAM,CAAA;IACf,kDAAkD;IAClD,eAAe,EAAE,MAAM,CAAA;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,MAAM,CAAA;IACb,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,EAAE,MAAM,CAAA;IACd,WAAW,EAAE,MAAM,CAAA;IACnB,UAAU,EAAE,MAAM,CAAA;CACnB;AAED;;GAEG;AACH,eAAO,MAAM,oBAAoB,EAAE,WAMlC,CAAA;AA4CD;;GAEG;AACH,wBAAsB,gBAAgB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAOzE;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,CACxC,SAAS,EAAE,MAAM,EACjB,MAAM,GAAE,OAAO,CAAC,WAAW,CAAM,GAChC,OAAO,CAAC,aAAa,EAAE,CAAC,CAsD1B;AAED;;;;;;;;GAQG;AACH,wBAAgB,sBAAsB,CACpC,cAAc,EAAE,aAAa,EAAE,EAC/B,aAAa,EAAE,MAAM,EACrB,MAAM,GAAE,OAAO,CAAC,WAAW,CAAM,GAChC,UAAU,EAAE,CAwEd;AAED;;;;;;;;;GASG;AACH,wBAAsB,kBAAkB,CACtC,SAAS,EAAE,MAAM,EACjB,WAAW,EAAE,UAAU,EAAE,EACzB,aAAa,EAAE,MAAM,EACrB,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,YAAY,EAAE,CAAC,CA2CzB;AAED;;;;;;;;GAQG;AACH,wBAAsB,cAAc,CAClC,SAAS,EAAE,MAAM,EACjB,SAAS,EAAE,MAAM,EACjB,MAAM,GAAE,OAAO,CAAC,WAAW,CAAM,GAChC,OAAO,CAAC,YAAY,EAAE,CAAC,CAuCzB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,aAAa,EAAE,MAAM,CAAA;IACrB,WAAW,EAAE,MAAM,CAAA;IACnB,WAAW,EAAE,UAAU,EAAE,CAAA;IACzB,cAAc,EAAE,aAAa,EAAE,CAAA;IAC/B,UAAU,EAAE,OAAO,CAAA;CACpB;AAED;;;GAGG;AACH,wBAAsB,kBAAkB,CACtC,SAAS,EAAE,MAAM,EACjB,MAAM,GAAE,OAAO,CAAC,WAAW,CAAM,GAChC,OAAO,CAAC,aAAa,CAAC,CA0BxB"}
1
+ {"version":3,"file":"split.d.ts","sourceRoot":"","sources":["../../../src/utils/audio/split.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAOH;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,iEAAiE;IACjE,cAAc,EAAE,MAAM,CAAA;IACtB,qEAAqE;IACrE,gBAAgB,EAAE,MAAM,CAAA;IACxB,kDAAkD;IAClD,gBAAgB,EAAE,MAAM,CAAA;IACxB,wDAAwD;IACxD,mBAAmB,EAAE,OAAO,CAAA;IAC5B,uDAAuD;IACvD,aAAa,EAAE,MAAM,CAAA;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,EAAE,MAAM,CAAA;IACd,WAAW,EAAE,MAAM,CAAA;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,yDAAyD;IACzD,OAAO,EAAE,MAAM,CAAA;IACf,kDAAkD;IAClD,eAAe,EAAE,MAAM,CAAA;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,MAAM,CAAA;IACb,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,EAAE,MAAM,CAAA;IACd,WAAW,EAAE,MAAM,CAAA;IACnB,UAAU,EAAE,MAAM,CAAA;CACnB;AAED;;GAEG;AACH,eAAO,MAAM,oBAAoB,EAAE,WAMlC,CAAA;AAsFD;;;;GAIG;AACH,wBAAsB,gBAAgB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAoCzE;AAED;;;GAGG;AACH,wBAAsB,oBAAoB,CACxC,SAAS,EAAE,MAAM,EACjB,MAAM,GAAE,OAAO,CAAC,WAAW,CAAM,GAChC,OAAO,CAAC,aAAa,EAAE,CAAC,CAsD1B;AAED;;;;;;;;GAQG;AACH,wBAAgB,sBAAsB,CACpC,cAAc,EAAE,aAAa,EAAE,EAC/B,aAAa,EAAE,MAAM,EACrB,MAAM,GAAE,OAAO,CAAC,WAAW,CAAM,GAChC,UAAU,EAAE,CAwEd;AAED;;;;;;;;;GASG;AACH,wBAAsB,kBAAkB,CACtC,SAAS,EAAE,MAAM,EACjB,WAAW,EAAE,UAAU,EAAE,EACzB,aAAa,EAAE,MAAM,EACrB,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,YAAY,EAAE,CAAC,CA2CzB;AAED;;;;;;;;GAQG;AACH,wBAAsB,cAAc,CAClC,SAAS,EAAE,MAAM,EACjB,SAAS,EAAE,MAAM,EACjB,MAAM,GAAE,OAAO,CAAC,WAAW,CAAM,GAChC,OAAO,CAAC,YAAY,EAAE,CAAC,CAuCzB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,aAAa,EAAE,MAAM,CAAA;IACrB,WAAW,EAAE,MAAM,CAAA;IACnB,WAAW,EAAE,UAAU,EAAE,CAAA;IACzB,cAAc,EAAE,aAAa,EAAE,CAAA;IAC/B,UAAU,EAAE,OAAO,CAAA;CACpB;AAED;;;GAGG;AACH,wBAAsB,kBAAkB,CACtC,SAAS,EAAE,MAAM,EACjB,MAAM,GAAE,OAAO,CAAC,WAAW,CAAM,GAChC,OAAO,CAAC,aAAa,CAAC,CA0BxB"}
@@ -1 +1 @@
1
- {"version":3,"file":"providers.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/providers.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAUH;;;GAGG;AACH,MAAM,WAAW,mBAAmB;IAClC,mCAAmC;IACnC,IAAI,EAAE,MAAM,CAAA;IACZ,sEAAsE;IACtE,WAAW,CAAC,EAAE,GAAG,CAAA;IACjB,4CAA4C;IAC5C,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,kDAAkD;IAClD,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,iDAAiD;IACjD,KAAK,CAAC,EAAE,GAAG,EAAE,CAAA;IACb,mCAAmC;IACnC,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,0CAA0C;IAC1C,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,6DAA6D;IAC7D,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AAED;;;GAGG;AACH,MAAM,WAAW,qBAAqB;IACpC,+BAA+B;IAC/B,IAAI,EAAE,MAAM,CAAA;IACZ,+DAA+D;IAC/D,mBAAmB,CAAC,EAAE,MAAM,CAAA;IAC5B;;;;OAIG;IACH,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAA;CACnE;AAED;;;GAGG;AACH,MAAM,WAAW,gBAAgB;IAC/B,2CAA2C;IAC3C,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,iCAAiC;IACjC,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,gEAAgE;IAChE,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,yDAAyD;IACzD,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,yDAAyD;IACzD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,qDAAqD;IACrD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,oDAAoD;IACpD,OAAO,CAAC,EAAE,OAAO,CAAA;IACjB,iEAAiE;IACjE,oBAAoB,CAAC,EAAE,SAAS,GAAG,MAAM,CAAA;IACzC,4CAA4C;IAC5C,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,sDAAsD;IACtD,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,oCAAoC;IACpC,MAAM,CAAC,EAAE,GAAG,CAAA;CACb;AAED;;GAEG;AACH,MAAM,MAAM,YAAY,GAAG,SAAS,GAAG,SAAS,GAAG,SAAS,CAAA;AAE5D;;;;;GAKG;AACH,wBAAgB,cAAc,CAAC,YAAY,EAAE,YAAY,EAAE,MAAM,CAAC,EAAE,GAAG,GAAG,qBAAqB,CAW9F;AASD;;;GAGG;AACH,qBAAa,eAAgB,YAAW,qBAAqB;IAC3D,IAAI,SAAY;IAEhB,OAAO,CAAC,QAAQ,CAAQ;IAExB,MAAM,CAAC,QAAQ;;;;;;;;;;;;;;;MAkBd;gBAEW,MAAM,CAAC,EAAE,GAAG;IAKlB,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC;YA+F1D,4BAA4B;CAkC3C;AAED;;;GAGG;AACH,eAAO,MAAM,cAAc;IACzB,2EAA2E;;IAE3E,kEAAkE;;IAElE,4CAA4C;;IAE5C,uCAAuC;;CAExC,CAAA;AAED,qBAAa,eAAgB,YAAW,qBAAqB;IAC3D,IAAI,SAAY;IAChB,mBAAmB,SAAqC;IAExD;;OAEG;IACH,MAAM,CAAC,WAAW,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO;IAIhD;;OAEG;IACH,MAAM,CAAC,wBAAwB,IAAI,MAAM;IAInC,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC;CAmGzE;AAED;;;GAGG;AACH,qBAAa,eAAgB,YAAW,qBAAqB;IAC3D,IAAI,SAAY;IAEV,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC;CAkGzE"}
1
+ {"version":3,"file":"providers.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/providers.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAUH;;;GAGG;AACH,MAAM,WAAW,mBAAmB;IAClC,mCAAmC;IACnC,IAAI,EAAE,MAAM,CAAA;IACZ,sEAAsE;IACtE,WAAW,CAAC,EAAE,GAAG,CAAA;IACjB,4CAA4C;IAC5C,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,kDAAkD;IAClD,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,iDAAiD;IACjD,KAAK,CAAC,EAAE,GAAG,EAAE,CAAA;IACb,mCAAmC;IACnC,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,0CAA0C;IAC1C,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,6DAA6D;IAC7D,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AAED;;;GAGG;AACH,MAAM,WAAW,qBAAqB;IACpC,+BAA+B;IAC/B,IAAI,EAAE,MAAM,CAAA;IACZ,+DAA+D;IAC/D,mBAAmB,CAAC,EAAE,MAAM,CAAA;IAC5B;;;;OAIG;IACH,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAA;CACnE;AAED;;;GAGG;AACH,MAAM,WAAW,gBAAgB;IAC/B,2CAA2C;IAC3C,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,iCAAiC;IACjC,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,gEAAgE;IAChE,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,yDAAyD;IACzD,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,yDAAyD;IACzD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,qDAAqD;IACrD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,oDAAoD;IACpD,OAAO,CAAC,EAAE,OAAO,CAAA;IACjB,iEAAiE;IACjE,oBAAoB,CAAC,EAAE,SAAS,GAAG,MAAM,CAAA;IACzC,4CAA4C;IAC5C,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,sDAAsD;IACtD,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,oCAAoC;IACpC,MAAM,CAAC,EAAE,GAAG,CAAA;CACb;AAED;;GAEG;AACH,MAAM,MAAM,YAAY,GAAG,SAAS,GAAG,SAAS,GAAG,SAAS,CAAA;AAE5D;;;;;GAKG;AACH,wBAAgB,cAAc,CAAC,YAAY,EAAE,YAAY,EAAE,MAAM,CAAC,EAAE,GAAG,GAAG,qBAAqB,CAW9F;AASD;;;GAGG;AACH,qBAAa,eAAgB,YAAW,qBAAqB;IAC3D,IAAI,SAAY;IAEhB,OAAO,CAAC,QAAQ,CAAQ;IAExB,MAAM,CAAC,QAAQ;;;;;;;;;;;;;;;MAkBd;gBAEW,MAAM,CAAC,EAAE,GAAG;IAKlB,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC;YA+F1D,4BAA4B;CAkC3C;AAED;;;GAGG;AACH,eAAO,MAAM,cAAc;IACzB,2EAA2E;;IAE3E,kEAAkE;;IAElE,4CAA4C;;IAE5C,uCAAuC;;CAExC,CAAA;AAED,qBAAa,eAAgB,YAAW,qBAAqB;IAC3D,IAAI,SAAY;IAChB,mBAAmB,SAAqC;IAExD;;OAEG;IACH,MAAM,CAAC,WAAW,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO;IAIhD;;OAEG;IACH,MAAM,CAAC,wBAAwB,IAAI,MAAM;IAInC,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC;CAwGzE;AAED;;;GAGG;AACH,qBAAa,eAAgB,YAAW,qBAAqB;IAC3D,IAAI,SAAY;IAEV,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC;CAkGzE"}
@@ -4,6 +4,8 @@
4
4
  * Provides a simple, event-driven interface for streaming audio transcription.
5
5
  * Users provide audio as AsyncIterable<Uint8Array> and receive typed events.
6
6
  *
7
+ * Browser-compatible: Uses native WebSocket in browsers/Deno, 'ws' package in Node.js
8
+ *
7
9
  * @example
8
10
  * ```typescript
9
11
  * import { createRealtimeTranscriber } from '@wovin/tranz/realtime'
@@ -19,7 +21,27 @@
19
21
  * }
20
22
  * ```
21
23
  */
22
- import { AudioEncoding } from "@mistralai/mistralai/extra/realtime";
24
+ /**
25
+ * Audio encoding formats supported by the transcription service
26
+ */
27
+ export declare enum AudioEncoding {
28
+ PcmS16le = "pcm_s16le",
29
+ PcmS16be = "pcm_s16be",
30
+ PcmU16le = "pcm_u16le",
31
+ PcmU16be = "pcm_u16be",
32
+ PcmS24le = "pcm_s24le",
33
+ PcmS24be = "pcm_s24be",
34
+ PcmU24le = "pcm_u24le",
35
+ PcmU24be = "pcm_u24be",
36
+ PcmS32le = "pcm_s32le",
37
+ PcmS32be = "pcm_s32be",
38
+ PcmU32le = "pcm_u32le",
39
+ PcmU32be = "pcm_u32be",
40
+ PcmF32le = "pcm_f32le",
41
+ PcmF32be = "pcm_f32be",
42
+ PcmF64le = "pcm_f64le",
43
+ PcmF64be = "pcm_f64be"
44
+ }
23
45
  /**
24
46
  * Audio format configuration for realtime transcription
25
47
  */
@@ -59,6 +81,7 @@ export interface SessionCreatedEvent {
59
81
  type: "session.created";
60
82
  session: {
61
83
  id: string;
84
+ audioFormat: AudioFormat;
62
85
  };
63
86
  }
64
87
  /**
@@ -112,6 +135,7 @@ export interface ErrorEvent {
112
135
  type: "error";
113
136
  error: {
114
137
  message: string | unknown;
138
+ code?: string;
115
139
  };
116
140
  }
117
141
  /**
@@ -167,7 +191,7 @@ export interface AudioCaptureResult {
167
191
  stop: () => void;
168
192
  }
169
193
  /**
170
- * Capture audio from microphone using SoX `rec` command
194
+ * Capture audio from microphone using SoX `rec` command (Node.js only)
171
195
  *
172
196
  * Yields PCM 16-bit signed little-endian mono audio chunks suitable for
173
197
  * realtime transcription.
@@ -177,6 +201,8 @@ export interface AudioCaptureResult {
177
201
  * - macOS: `brew install sox`
178
202
  * - Linux: `sudo apt install sox`
179
203
  *
204
+ * **Note:** This is Node.js only. For browser audio capture, use `captureAudioFromBrowser()`
205
+ *
180
206
  * @param sampleRate - Sample rate in Hz (default: 16000)
181
207
  * @returns Object with audio stream and stop function
182
208
  *
@@ -193,5 +219,24 @@ export interface AudioCaptureResult {
193
219
  * }
194
220
  * ```
195
221
  */
196
- export declare function captureAudioFromMicrophone(sampleRate?: number): AudioCaptureResult;
222
+ export declare function captureAudioFromMicrophone(sampleRate?: number): Promise<AudioCaptureResult>;
223
+ /**
224
+ * Capture audio from browser microphone using Web Audio API
225
+ *
226
+ * **CURRENTLY DISABLED** - Browser support is not available yet due to
227
+ * WebSocket authentication limitations with Mistral API.
228
+ *
229
+ * @deprecated Browser realtime transcription is not yet supported.
230
+ * Use captureAudioFromMicrophone() in Node.js instead.
231
+ *
232
+ * @param sampleRate - Target sample rate in Hz (default: 16000)
233
+ * @returns Object with audio stream and stop function
234
+ *
235
+ * @throws Error - Always throws as browser mode is disabled
236
+ *
237
+ * @todo Enable when Mistral API supports browser WebSocket authentication
238
+ * @todo Migrate to AudioWorklet for better performance
239
+ * See: https://web.dev/patterns/media/microphone-process/
240
+ */
241
+ export declare function captureAudioFromBrowser(sampleRate?: number): Promise<AudioCaptureResult>;
197
242
  //# sourceMappingURL=realtime.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"realtime.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/realtime.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAGH,OAAO,EACL,aAAa,EAEd,MAAM,qCAAqC,CAAC;AAM7C;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,iDAAiD;IACjD,QAAQ,EAAE,aAAa,CAAC;IACxB,yCAAyC;IACzC,UAAU,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,sBAAsB;IACtB,MAAM,EAAE,MAAM,CAAC;IACf,gEAAgE;IAChE,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,yDAAyD;IACzD,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,2EAA2E;IAC3E,WAAW,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,CAAC;CACpC;AAED;;;GAGG;AACH,MAAM,MAAM,aAAa,GACrB,mBAAmB,GACnB,mBAAmB,GACnB,2BAA2B,GAC3B,0BAA0B,GAC1B,yBAAyB,GACzB,sBAAsB,GACtB,UAAU,CAAC;AAEf;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,iBAAiB,CAAC;IACxB,OAAO,EAAE;QACP,EAAE,EAAE,MAAM,CAAC;KACZ,CAAC;CACH;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,iBAAiB,CAAC;IACxB,OAAO,EAAE;QACP,WAAW,EAAE,WAAW,CAAC;KAC1B,CAAC;CACH;AAED;;;GAGG;AACH,MAAM,WAAW,2BAA2B;IAC1C,IAAI,EAAE,0BAA0B,CAAC;IACjC,IAAI,EAAE,MAAM,CAAC;CACd;AAED;;GAEG;AACH,MAAM,WAAW,0BAA0B;IACzC,IAAI,EAAE,wBAAwB,CAAC;IAC/B,aAAa,EAAE,MAAM,CAAC;CACvB;AAED;;;GAGG;AACH,MAAM,WAAW,yBAAyB;IACxC,IAAI,EAAE,uBAAuB,CAAC;IAC9B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;;GAGG;AACH,MAAM,WAAW,sBAAsB;IACrC,IAAI,EAAE,oBAAoB,CAAC;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,OAAO,CAAC;IACd,KAAK,EAAE;QACL,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;KAC3B,CAAC;CACH;AAMD;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC;;;;;;;;;;;;;;;;;;;;OAoBG;IACH,UAAU,CACR,WAAW,EAAE,aAAa,CAAC,UAAU,CAAC,EACtC,OAAO,CAAC,EAAE,iBAAiB,GAC1B,aAAa,CAAC,aAAa,CAAC,CAAC;CACjC;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,yBAAyB,CACvC,MAAM,EAAE,cAAc,GACrB,mBAAmB,CA+BrB;AAMD;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,2CAA2C;IAC3C,MAAM,EAAE,cAAc,CAAC,UAAU,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;IAClD,qCAAqC;IACrC,IAAI,EAAE,MAAM,IAAI,CAAC;CAClB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,wBAAgB,0BAA0B,CACxC,UAAU,GAAE,MAAc,GACzB,kBAAkB,CAuDpB"}
1
+ {"version":3,"file":"realtime.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/realtime.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAQH;;GAEG;AACH,oBAAY,aAAa;IACvB,QAAQ,cAAc;IACtB,QAAQ,cAAc;IACtB,QAAQ,cAAc;IACtB,QAAQ,cAAc;IACtB,QAAQ,cAAc;IACtB,QAAQ,cAAc;IACtB,QAAQ,cAAc;IACtB,QAAQ,cAAc;IACtB,QAAQ,cAAc;IACtB,QAAQ,cAAc;IACtB,QAAQ,cAAc;IACtB,QAAQ,cAAc;IACtB,QAAQ,cAAc;IACtB,QAAQ,cAAc;IACtB,QAAQ,cAAc;IACtB,QAAQ,cAAc;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,iDAAiD;IACjD,QAAQ,EAAE,aAAa,CAAC;IACxB,yCAAyC;IACzC,UAAU,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,sBAAsB;IACtB,MAAM,EAAE,MAAM,CAAC;IACf,gEAAgE;IAChE,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,yDAAyD;IACzD,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,2EAA2E;IAC3E,WAAW,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,CAAC;CACpC;AAED;;;GAGG;AACH,MAAM,MAAM,aAAa,GACrB,mBAAmB,GACnB,mBAAmB,GACnB,2BAA2B,GAC3B,0BAA0B,GAC1B,yBAAyB,GACzB,sBAAsB,GACtB,UAAU,CAAC;AAEf;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,iBAAiB,CAAC;IACxB,OAAO,EAAE;QACP,EAAE,EAAE,MAAM,CAAC;QACX,WAAW,EAAE,WAAW,CAAC;KAC1B,CAAC;CACH;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,iBAAiB,CAAC;IACxB,OAAO,EAAE;QACP,WAAW,EAAE,WAAW,CAAC;KAC1B,CAAC;CACH;AAED;;;GAGG;AACH,MAAM,WAAW,2BAA2B;IAC1C,IAAI,EAAE,0BAA0B,CAAC;IACjC,IAAI,EAAE,MAAM,CAAC;CACd;AAED;;GAEG;AACH,MAAM,WAAW,0BAA0B;IACzC,IAAI,EAAE,wBAAwB,CAAC;IAC/B,aAAa,EAAE,MAAM,CAAC;CACvB;AAED;;;GAGG;AACH,MAAM,WAAW,yBAAyB;IACxC,IAAI,EAAE,uBAAuB,CAAC;IAC9B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;;GAGG;AACH,MAAM,WAAW,sBAAsB;IACrC,IAAI,EAAE,oBAAoB,CAAC;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,OAAO,CAAC;IACd,KAAK,EAAE;QACL,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;QAC1B,IAAI,CAAC,EAAE,MAAM,CAAC;KACf,CAAC;CACH;AAMD;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC;;;;;;;;;;;;;;;;;;;;OAoBG;IACH,UAAU,CACR,WAAW,EAAE,aAAa,CAAC,UAAU,CAAC,EACtC,OAAO,CAAC,EAAE,iBAAiB,GAC1B,aAAa,CAAC,aAAa,CAAC,CAAC;CACjC;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,yBAAyB,CACvC,MAAM,EAAE,cAAc,GACrB,mBAAmB,CAgFrB;AA0SD;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,2CAA2C;IAC3C,MAAM,EAAE,cAAc,CAAC,UAAU,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;IAClD,qCAAqC;IACrC,IAAI,EAAE,MAAM,IAAI,CAAC;CAClB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,wBAAsB,0BAA0B,CAC9C,UAAU,GAAE,MAAc,GACzB,OAAO,CAAC,kBAAkB,CAAC,CAgE7B;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,wBAAsB,uBAAuB,CAC3C,UAAU,GAAE,MAAc,GACzB,OAAO,CAAC,kBAAkB,CAAC,CAiG7B"}
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Runtime environment detection for WebSocket implementations
3
+ *
4
+ * Provides environment-aware WebSocket constructor selection:
5
+ * - Browser/Deno: Uses global WebSocket API
6
+ * - Node.js: Dynamically imports 'ws' package
7
+ */
8
+ /**
9
+ * Get the appropriate WebSocket implementation for the current runtime
10
+ *
11
+ * @returns WebSocket constructor (browser WebSocket or ws package)
12
+ * @throws Error if WebSocket is not available in any form
13
+ */
14
+ export declare function getWebSocketImpl(): Promise<any>;
15
+ //# sourceMappingURL=runtime.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"runtime.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/runtime.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH;;;;;GAKG;AACH,wBAAsB,gBAAgB,IAAI,OAAO,CAAC,GAAG,CAAC,CAyBrD"}
@@ -25,7 +25,7 @@ export interface TranscribeOptions {
25
25
  model?: string;
26
26
  /** Enable speaker diarization (default: true) */
27
27
  diarize?: boolean;
28
- /** Timestamp granularity: 'word' | 'segment' (default: 'word', disabled if language set) */
28
+ /** Timestamp granularity: 'word' | 'segment' (default: 'segment' when diarize=true, disabled if language set) */
29
29
  timestamps?: 'word' | 'segment';
30
30
  /** Auto-split long audio (default: true). For URLs, detects duration first. */
31
31
  autoSplit?: boolean;
@@ -1 +1 @@
1
- {"version":3,"file":"transcribe.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/transcribe.ts"],"names":[],"mappings":"AAAA;;GAEG;AASH,OAAO,EAA6B,KAAK,yBAAyB,EAAE,MAAM,2BAA2B,CAAA;AAErG,kDAAkD;AAClD,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAA;IAC3B,IAAI,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAA;IAC3B,KAAK,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAA;CAC7B;AAQD,MAAM,WAAW,iBAAiB;IAChC,yBAAyB;IACzB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,iCAAiC;IACjC,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,gEAAgE;IAChE,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,yEAAyE;IACzE,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,mFAAmF;IACnF,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,kDAAkD;IAClD,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,iDAAiD;IACjD,OAAO,CAAC,EAAE,OAAO,CAAA;IACjB,4FAA4F;IAC5F,UAAU,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;IAC/B,+EAA+E;IAC/E,SAAS,CAAC,EAAE,OAAO,CAAA;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,uCAAuC;IACvC,MAAM,CAAC,EAAE,gBAAgB,CAAA;IACzB,mCAAmC;IACnC,OAAO,CAAC,EAAE,OAAO,CAAA;CAClB;AAED,MAAM,WAAW,wBAAwB;IACvC,sBAAsB;IACtB,MAAM,EAAE,MAAM,CAAA;IACd,mDAAmD;IACnD,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AAkDD;;;;;;;;;;;;;;;;;;;GAmBG;AACH,iEAAiE;AACjE,MAAM,WAAW,kBAAkB;IACjC,UAAU,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,yBAAyB,CAAC,CAAA;CAC3E;AAED,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,wBAAwB,GAAG,kBAAkB,CA4K7F;AAED,+BAA+B;AAC/B,eAAO,MAAM,UAAU,iCAA2B,CAAA"}
1
+ {"version":3,"file":"transcribe.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/transcribe.ts"],"names":[],"mappings":"AAAA;;GAEG;AASH,OAAO,EAA6B,KAAK,yBAAyB,EAAE,MAAM,2BAA2B,CAAA;AAErG,kDAAkD;AAClD,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAA;IAC3B,IAAI,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAA;IAC3B,KAAK,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAA;CAC7B;AAQD,MAAM,WAAW,iBAAiB;IAChC,yBAAyB;IACzB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,iCAAiC;IACjC,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,gEAAgE;IAChE,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,yEAAyE;IACzE,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,mFAAmF;IACnF,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,kDAAkD;IAClD,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,iDAAiD;IACjD,OAAO,CAAC,EAAE,OAAO,CAAA;IACjB,iHAAiH;IACjH,UAAU,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;IAC/B,+EAA+E;IAC/E,SAAS,CAAC,EAAE,OAAO,CAAA;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,uCAAuC;IACvC,MAAM,CAAC,EAAE,gBAAgB,CAAA;IACzB,mCAAmC;IACnC,OAAO,CAAC,EAAE,OAAO,CAAA;CAClB;AAED,MAAM,WAAW,wBAAwB;IACvC,sBAAsB;IACtB,MAAM,EAAE,MAAM,CAAA;IACd,mDAAmD;IACnD,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AA6FD;;;;;;;;;;;;;;;;;;;GAmBG;AACH,iEAAiE;AACjE,MAAM,WAAW,kBAAkB;IACjC,UAAU,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,yBAAyB,CAAC,CAAA;CAC3E;AAED,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,wBAAwB,GAAG,kBAAkB,CA4K7F;AAED,+BAA+B;AAC/B,eAAO,MAAM,UAAU,iCAA2B,CAAA"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@wovin/tranz",
3
- "version": "0.1.9",
3
+ "version": "0.1.13",
4
4
  "type": "module",
5
5
  "description": "Audio transcription library with provider support and auto-splitting",
6
6
  "author": "gotjoshua @gotjoshua",
@@ -43,6 +43,9 @@
43
43
  "@mistralai/mistralai": "^1.14.0",
44
44
  "execa": "^9.6.1"
45
45
  },
46
+ "optionalDependencies": {
47
+ "ws": "^8.19.0"
48
+ },
46
49
  "devDependencies": {
47
50
  "@types/node": "^24.10.1",
48
51
  "@types/ws": "^8.5.13",
@@ -51,7 +54,6 @@
51
54
  "tsup": "^8.5.0",
52
55
  "tsx": "^4.19.2",
53
56
  "typescript": "^5.9.3",
54
- "ws": "^8.18.0",
55
57
  "yargs": "^17.7.2",
56
58
  "tsupconfig": "^0.0.0"
57
59
  },