@wovin/tranz 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -8,6 +8,7 @@ Audio transcription library with provider support and auto-splitting for long au
8
8
  ## Features
9
9
 
10
10
  - **Multiple Transcription Providers**: Mistral Voxtral, Whisper, GreenPT
11
+ - **Realtime Transcription**: Stream audio from microphone or other sources for live transcription
11
12
  - **Automatic Audio Splitting**: Handles long audio files by intelligently splitting at silence points
12
13
  - **Smart Input Support**: Files, URLs (with HTTP range probing), or buffers
13
14
  - **Speaker Diarization**: Identify different speakers in audio
@@ -147,6 +148,72 @@ const result = await whisper.transcribe({
147
148
  })
148
149
  ```
149
150
 
151
+ ## Realtime Transcription
152
+
153
+ Stream audio for real-time transcription using Mistral's WebSocket API:
154
+
155
+ ```typescript
156
+ import {
157
+ createRealtimeTranscriber,
158
+ captureAudioFromMicrophone,
159
+ } from '@wovin/tranz/realtime'
160
+
161
+ // Create realtime transcriber
162
+ const transcriber = createRealtimeTranscriber({
163
+ apiKey: process.env.MISTRAL_API_KEY,
164
+ })
165
+
166
+ // Capture audio from microphone (requires SoX)
167
+ const { stream, stop } = captureAudioFromMicrophone(16000)
168
+
169
+ try {
170
+ for await (const event of transcriber.transcribe(stream)) {
171
+ if (event.type === 'transcription.text.delta') {
172
+ process.stdout.write(event.text)
173
+ } else if (event.type === 'transcription.done') {
174
+ console.log('\nComplete:', event.text)
175
+ break
176
+ }
177
+ }
178
+ } finally {
179
+ stop()
180
+ }
181
+ ```
182
+
183
+ ### Custom Audio Source
184
+
185
+ You can provide any `AsyncIterable<Uint8Array>` as an audio source:
186
+
187
+ ```typescript
188
+ async function* myAudioSource() {
189
+ // Read from file, socket, etc.
190
+ const buffer = await readSomeAudio()
191
+ yield new Uint8Array(buffer)
192
+ }
193
+
194
+ for await (const event of transcriber.transcribe(myAudioSource())) {
195
+ // Handle events
196
+ }
197
+ ```
198
+
199
+ ### Realtime Event Types
200
+
201
+ - `session.created` - WebSocket connection established
202
+ - `session.updated` - Audio format confirmed
203
+ - `transcription.text.delta` - Transcription text chunks (use for live display)
204
+ - `transcription.language` - Detected audio language
205
+ - `transcription.done` - Complete transcript available
206
+ - `error` - Error occurred
207
+
208
+ ### Limitations
209
+
210
+ The WebSocket realtime API has some limitations compared to batch transcription:
211
+ - No timestamp information (no word or segment timing)
212
+ - No speaker diarization
213
+ - Designed for streaming/live use cases, not long audio files
214
+
215
+ For timestamped transcriptions or speaker identification, use the batch API instead.
216
+
150
217
  ## Advanced: Audio Utilities
151
218
 
152
219
  ```typescript
package/dist/index.min.js CHANGED
@@ -215,6 +215,9 @@ var MistralProvider = class {
215
215
  return VOXTRAL_LIMITS.recommendedMaxDurationSec;
216
216
  }
217
217
  async transcribe(params) {
218
+ if (params.language && params.timestampGranularity) {
219
+ return { text: "", error: "Cannot use both language and timestampGranularity (Mistral API limitation)" };
220
+ }
218
221
  const formData = new FormData();
219
222
  if (params.audioUrl) {
220
223
  formData.append("file_url", params.audioUrl);
@@ -239,14 +242,14 @@ var MistralProvider = class {
239
242
  if (params.language) {
240
243
  formData.append("language", params.language);
241
244
  }
242
- const timestampGranularity = params.timestampGranularity ?? (params.language ? void 0 : "word");
243
- if (timestampGranularity) {
244
- formData.append("timestamp_granularities", timestampGranularity);
245
- }
246
245
  const diarize = params.diarize ?? true;
247
246
  if (diarize) {
248
247
  formData.append("diarize", "true");
249
248
  }
249
+ const timestampGranularity = params.language ? void 0 : params.timestampGranularity ?? "segment";
250
+ if (timestampGranularity) {
251
+ formData.append("timestamp_granularities", timestampGranularity);
252
+ }
250
253
  const response = await fetch("https://api.mistral.ai/v1/audio/transcriptions", {
251
254
  method: "POST",
252
255
  headers: {
@@ -215,6 +215,9 @@ var MistralProvider = class {
215
215
  return VOXTRAL_LIMITS.recommendedMaxDurationSec;
216
216
  }
217
217
  async transcribe(params) {
218
+ if (params.language && params.timestampGranularity) {
219
+ return { text: "", error: "Cannot use both language and timestampGranularity (Mistral API limitation)" };
220
+ }
218
221
  const formData = new FormData();
219
222
  if (params.audioUrl) {
220
223
  formData.append("file_url", params.audioUrl);
@@ -239,14 +242,14 @@ var MistralProvider = class {
239
242
  if (params.language) {
240
243
  formData.append("language", params.language);
241
244
  }
242
- const timestampGranularity = params.timestampGranularity ?? (params.language ? void 0 : "word");
243
- if (timestampGranularity) {
244
- formData.append("timestamp_granularities", timestampGranularity);
245
- }
246
245
  const diarize = params.diarize ?? true;
247
246
  if (diarize) {
248
247
  formData.append("diarize", "true");
249
248
  }
249
+ const timestampGranularity = params.language ? void 0 : params.timestampGranularity ?? "segment";
250
+ if (timestampGranularity) {
251
+ formData.append("timestamp_granularities", timestampGranularity);
252
+ }
250
253
  const response = await fetch("https://api.mistral.ai/v1/audio/transcriptions", {
251
254
  method: "POST",
252
255
  headers: {
@@ -0,0 +1,38 @@
1
+ /**
2
+ * Realtime transcription API
3
+ *
4
+ * This module provides a simple, event-driven interface for streaming audio
5
+ * transcription using Mistral's realtime WebSocket API.
6
+ *
7
+ * @example
8
+ * ```typescript
9
+ * import {
10
+ * createRealtimeTranscriber,
11
+ * captureAudioFromMicrophone,
12
+ * } from '@wovin/tranz/realtime'
13
+ *
14
+ * const transcriber = createRealtimeTranscriber({
15
+ * apiKey: process.env.MISTRAL_API_KEY,
16
+ * })
17
+ *
18
+ * const { stream, stop } = captureAudioFromMicrophone(16000)
19
+ *
20
+ * try {
21
+ * for await (const event of transcriber.transcribe(stream)) {
22
+ * if (event.type === 'transcription.text.delta') {
23
+ * process.stdout.write(event.text)
24
+ * } else if (event.type === 'transcription.done') {
25
+ * console.log('\nComplete!')
26
+ * break
27
+ * }
28
+ * }
29
+ * } finally {
30
+ * stop()
31
+ * }
32
+ * ```
33
+ *
34
+ * @module @wovin/tranz/realtime
35
+ */
36
+ export { createRealtimeTranscriber, captureAudioFromMicrophone, type RealtimeEvent, type RealtimeConfig, type RealtimeTranscriber, type TranscribeOptions, type AudioFormat, type AudioCaptureResult, type SessionCreatedEvent, type SessionUpdatedEvent, type TranscriptionTextDeltaEvent, type TranscriptionLanguageEvent, type TranscriptionSegmentEvent, type TranscriptionDoneEvent, type ErrorEvent, } from "./utils/transcription/realtime.js";
37
+ export { AudioEncoding } from "@mistralai/mistralai/extra/realtime";
38
+ //# sourceMappingURL=realtime.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"realtime.d.ts","sourceRoot":"","sources":["../src/realtime.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AAEH,OAAO,EACL,yBAAyB,EACzB,0BAA0B,EAC1B,KAAK,aAAa,EAClB,KAAK,cAAc,EACnB,KAAK,mBAAmB,EACxB,KAAK,iBAAiB,EACtB,KAAK,WAAW,EAChB,KAAK,kBAAkB,EACvB,KAAK,mBAAmB,EACxB,KAAK,mBAAmB,EACxB,KAAK,2BAA2B,EAChC,KAAK,0BAA0B,EAC/B,KAAK,yBAAyB,EAC9B,KAAK,sBAAsB,EAC3B,KAAK,UAAU,GAChB,MAAM,mCAAmC,CAAC;AAG3C,OAAO,EAAE,aAAa,EAAE,MAAM,qCAAqC,CAAC"}
@@ -0,0 +1,95 @@
1
+ // src/utils/transcription/realtime.ts
2
+ import { spawn } from "child_process";
3
+ import {
4
+ AudioEncoding,
5
+ RealtimeTranscription
6
+ } from "@mistralai/mistralai/extra/realtime";
7
+ function createRealtimeTranscriber(config) {
8
+ const model = config.model ?? "voxtral-mini-transcribe-realtime-2602";
9
+ const baseUrl = config.baseUrl ?? "wss://api.mistral.ai";
10
+ const client = new RealtimeTranscription({
11
+ apiKey: config.apiKey,
12
+ serverURL: baseUrl
13
+ });
14
+ return {
15
+ async *transcribe(audioStream, options) {
16
+ const audioFormat = {
17
+ encoding: options?.audioFormat?.encoding ?? AudioEncoding.PcmS16le,
18
+ sampleRate: options?.audioFormat?.sampleRate ?? 16e3
19
+ };
20
+ const eventStream = client.transcribeStream(audioStream, model, {
21
+ audioFormat
22
+ });
23
+ for await (const event of eventStream) {
24
+ yield event;
25
+ }
26
+ }
27
+ };
28
+ }
29
+ function captureAudioFromMicrophone(sampleRate = 16e3) {
30
+ const recorder = spawn(
31
+ "rec",
32
+ [
33
+ "-q",
34
+ // Quiet mode
35
+ "-t",
36
+ "raw",
37
+ // Raw PCM output
38
+ "-b",
39
+ "16",
40
+ // 16-bit samples
41
+ "-e",
42
+ "signed-integer",
43
+ // Signed PCM
44
+ "-r",
45
+ String(sampleRate),
46
+ // Sample rate
47
+ "-c",
48
+ "1",
49
+ // Mono (1 channel)
50
+ "-"
51
+ // Output to stdout
52
+ ],
53
+ { stdio: ["ignore", "pipe", "ignore"] }
54
+ );
55
+ recorder.on("error", (err) => {
56
+ const error = err;
57
+ if (error.code === "ENOENT") {
58
+ console.error(
59
+ "\nError: 'rec' command not found. Please install SoX:",
60
+ "\n macOS: brew install sox",
61
+ "\n Linux: sudo apt install sox"
62
+ );
63
+ process.exit(1);
64
+ }
65
+ throw err;
66
+ });
67
+ const stream = (async function* () {
68
+ try {
69
+ if (!recorder.stdout) {
70
+ throw new Error("Failed to create audio capture stream");
71
+ }
72
+ for await (const chunk of recorder.stdout) {
73
+ yield new Uint8Array(chunk);
74
+ }
75
+ } finally {
76
+ if (!recorder.killed) {
77
+ recorder.kill("SIGTERM");
78
+ }
79
+ }
80
+ })();
81
+ const stop = () => {
82
+ if (!recorder.killed) {
83
+ recorder.kill("SIGTERM");
84
+ }
85
+ };
86
+ return { stream, stop };
87
+ }
88
+
89
+ // src/realtime.ts
90
+ import { AudioEncoding as AudioEncoding2 } from "@mistralai/mistralai/extra/realtime";
91
+ export {
92
+ AudioEncoding2 as AudioEncoding,
93
+ captureAudioFromMicrophone,
94
+ createRealtimeTranscriber
95
+ };
@@ -1 +1 @@
1
- {"version":3,"file":"providers.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/providers.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAUH;;;GAGG;AACH,MAAM,WAAW,mBAAmB;IAClC,mCAAmC;IACnC,IAAI,EAAE,MAAM,CAAA;IACZ,sEAAsE;IACtE,WAAW,CAAC,EAAE,GAAG,CAAA;IACjB,4CAA4C;IAC5C,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,kDAAkD;IAClD,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,iDAAiD;IACjD,KAAK,CAAC,EAAE,GAAG,EAAE,CAAA;IACb,mCAAmC;IACnC,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,0CAA0C;IAC1C,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,6DAA6D;IAC7D,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AAED;;;GAGG;AACH,MAAM,WAAW,qBAAqB;IACpC,+BAA+B;IAC/B,IAAI,EAAE,MAAM,CAAA;IACZ,+DAA+D;IAC/D,mBAAmB,CAAC,EAAE,MAAM,CAAA;IAC5B;;;;OAIG;IACH,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAA;CACnE;AAED;;;GAGG;AACH,MAAM,WAAW,gBAAgB;IAC/B,2CAA2C;IAC3C,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,iCAAiC;IACjC,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,gEAAgE;IAChE,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,yDAAyD;IACzD,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,yDAAyD;IACzD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,qDAAqD;IACrD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,oDAAoD;IACpD,OAAO,CAAC,EAAE,OAAO,CAAA;IACjB,iEAAiE;IACjE,oBAAoB,CAAC,EAAE,SAAS,GAAG,MAAM,CAAA;IACzC,4CAA4C;IAC5C,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,sDAAsD;IACtD,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,oCAAoC;IACpC,MAAM,CAAC,EAAE,GAAG,CAAA;CACb;AAED;;GAEG;AACH,MAAM,MAAM,YAAY,GAAG,SAAS,GAAG,SAAS,GAAG,SAAS,CAAA;AAE5D;;;;;GAKG;AACH,wBAAgB,cAAc,CAAC,YAAY,EAAE,YAAY,EAAE,MAAM,CAAC,EAAE,GAAG,GAAG,qBAAqB,CAW9F;AASD;;;GAGG;AACH,qBAAa,eAAgB,YAAW,qBAAqB;IAC3D,IAAI,SAAY;IAEhB,OAAO,CAAC,QAAQ,CAAQ;IAExB,MAAM,CAAC,QAAQ;;;;;;;;;;;;;;;MAkBd;gBAEW,MAAM,CAAC,EAAE,GAAG;IAKlB,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC;YA+F1D,4BAA4B;CAkC3C;AAED;;;GAGG;AACH,eAAO,MAAM,cAAc;IACzB,2EAA2E;;IAE3E,kEAAkE;;IAElE,4CAA4C;;IAE5C,uCAAuC;;CAExC,CAAA;AAED,qBAAa,eAAgB,YAAW,qBAAqB;IAC3D,IAAI,SAAY;IAChB,mBAAmB,SAAqC;IAExD;;OAEG;IACH,MAAM,CAAC,WAAW,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO;IAIhD;;OAEG;IACH,MAAM,CAAC,wBAAwB,IAAI,MAAM;IAInC,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC;CA8FzE;AAED;;;GAGG;AACH,qBAAa,eAAgB,YAAW,qBAAqB;IAC3D,IAAI,SAAY;IAEV,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC;CAkGzE"}
1
+ {"version":3,"file":"providers.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/providers.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAUH;;;GAGG;AACH,MAAM,WAAW,mBAAmB;IAClC,mCAAmC;IACnC,IAAI,EAAE,MAAM,CAAA;IACZ,sEAAsE;IACtE,WAAW,CAAC,EAAE,GAAG,CAAA;IACjB,4CAA4C;IAC5C,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,kDAAkD;IAClD,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,iDAAiD;IACjD,KAAK,CAAC,EAAE,GAAG,EAAE,CAAA;IACb,mCAAmC;IACnC,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,0CAA0C;IAC1C,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,6DAA6D;IAC7D,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AAED;;;GAGG;AACH,MAAM,WAAW,qBAAqB;IACpC,+BAA+B;IAC/B,IAAI,EAAE,MAAM,CAAA;IACZ,+DAA+D;IAC/D,mBAAmB,CAAC,EAAE,MAAM,CAAA;IAC5B;;;;OAIG;IACH,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAA;CACnE;AAED;;;GAGG;AACH,MAAM,WAAW,gBAAgB;IAC/B,2CAA2C;IAC3C,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,iCAAiC;IACjC,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,gEAAgE;IAChE,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,yDAAyD;IACzD,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,yDAAyD;IACzD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,qDAAqD;IACrD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,oDAAoD;IACpD,OAAO,CAAC,EAAE,OAAO,CAAA;IACjB,iEAAiE;IACjE,oBAAoB,CAAC,EAAE,SAAS,GAAG,MAAM,CAAA;IACzC,4CAA4C;IAC5C,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,sDAAsD;IACtD,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,oCAAoC;IACpC,MAAM,CAAC,EAAE,GAAG,CAAA;CACb;AAED;;GAEG;AACH,MAAM,MAAM,YAAY,GAAG,SAAS,GAAG,SAAS,GAAG,SAAS,CAAA;AAE5D;;;;;GAKG;AACH,wBAAgB,cAAc,CAAC,YAAY,EAAE,YAAY,EAAE,MAAM,CAAC,EAAE,GAAG,GAAG,qBAAqB,CAW9F;AASD;;;GAGG;AACH,qBAAa,eAAgB,YAAW,qBAAqB;IAC3D,IAAI,SAAY;IAEhB,OAAO,CAAC,QAAQ,CAAQ;IAExB,MAAM,CAAC,QAAQ;;;;;;;;;;;;;;;MAkBd;gBAEW,MAAM,CAAC,EAAE,GAAG;IAKlB,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC;YA+F1D,4BAA4B;CAkC3C;AAED;;;GAGG;AACH,eAAO,MAAM,cAAc;IACzB,2EAA2E;;IAE3E,kEAAkE;;IAElE,4CAA4C;;IAE5C,uCAAuC;;CAExC,CAAA;AAED,qBAAa,eAAgB,YAAW,qBAAqB;IAC3D,IAAI,SAAY;IAChB,mBAAmB,SAAqC;IAExD;;OAEG;IACH,MAAM,CAAC,WAAW,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO;IAIhD;;OAEG;IACH,MAAM,CAAC,wBAAwB,IAAI,MAAM;IAInC,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC;CAmGzE;AAED;;;GAGG;AACH,qBAAa,eAAgB,YAAW,qBAAqB;IAC3D,IAAI,SAAY;IAEV,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC;CAkGzE"}
@@ -0,0 +1,197 @@
1
+ /**
2
+ * Realtime transcription API for Mistral's WebSocket-based transcription service
3
+ *
4
+ * Provides a simple, event-driven interface for streaming audio transcription.
5
+ * Users provide audio as AsyncIterable<Uint8Array> and receive typed events.
6
+ *
7
+ * @example
8
+ * ```typescript
9
+ * import { createRealtimeTranscriber } from '@wovin/tranz/realtime'
10
+ *
11
+ * const transcriber = createRealtimeTranscriber({
12
+ * apiKey: process.env.MISTRAL_API_KEY,
13
+ * })
14
+ *
15
+ * for await (const event of transcriber.transcribe(audioStream)) {
16
+ * if (event.type === 'transcription.text.delta') {
17
+ * process.stdout.write(event.text)
18
+ * }
19
+ * }
20
+ * ```
21
+ */
22
+ import { AudioEncoding } from "@mistralai/mistralai/extra/realtime";
23
+ /**
24
+ * Audio format configuration for realtime transcription
25
+ */
26
+ export interface AudioFormat {
27
+ /** Audio encoding format (default: pcm_s16le) */
28
+ encoding: AudioEncoding;
29
+ /** Sample rate in Hz (default: 16000) */
30
+ sampleRate: number;
31
+ }
32
+ /**
33
+ * Configuration for creating a realtime transcriber
34
+ */
35
+ export interface RealtimeConfig {
36
+ /** Mistral API key */
37
+ apiKey: string;
38
+ /** Model ID (default: voxtral-mini-transcribe-realtime-2602) */
39
+ model?: string;
40
+ /** WebSocket base URL (default: wss://api.mistral.ai) */
41
+ baseUrl?: string;
42
+ }
43
+ /**
44
+ * Options for transcription
45
+ */
46
+ export interface TranscribeOptions {
47
+ /** Audio format configuration (optional, uses defaults if not provided) */
48
+ audioFormat?: Partial<AudioFormat>;
49
+ }
50
+ /**
51
+ * Union type for all realtime transcription events
52
+ * These events are yielded as the transcription progresses
53
+ */
54
+ export type RealtimeEvent = SessionCreatedEvent | SessionUpdatedEvent | TranscriptionTextDeltaEvent | TranscriptionLanguageEvent | TranscriptionSegmentEvent | TranscriptionDoneEvent | ErrorEvent;
55
+ /**
56
+ * Session created event - emitted when WebSocket connection is established
57
+ */
58
+ export interface SessionCreatedEvent {
59
+ type: "session.created";
60
+ session: {
61
+ id: string;
62
+ };
63
+ }
64
+ /**
65
+ * Session updated event - emitted when audio format is confirmed
66
+ */
67
+ export interface SessionUpdatedEvent {
68
+ type: "session.updated";
69
+ session: {
70
+ audioFormat: AudioFormat;
71
+ };
72
+ }
73
+ /**
74
+ * Text delta event - emitted as transcription text arrives in chunks
75
+ * This is the primary event for displaying real-time transcription
76
+ */
77
+ export interface TranscriptionTextDeltaEvent {
78
+ type: "transcription.text.delta";
79
+ text: string;
80
+ }
81
+ /**
82
+ * Language detection event - emitted when audio language is detected
83
+ */
84
+ export interface TranscriptionLanguageEvent {
85
+ type: "transcription.language";
86
+ audioLanguage: string;
87
+ }
88
+ /**
89
+ * Segment event - emitted for timestamped segments
90
+ * NOTE: WebSocket realtime API does NOT support this - included for completeness
91
+ */
92
+ export interface TranscriptionSegmentEvent {
93
+ type: "transcription.segment";
94
+ start?: number;
95
+ end?: number;
96
+ text: string;
97
+ speakerId?: string;
98
+ }
99
+ /**
100
+ * Transcription done event - emitted when transcription completes
101
+ * Contains the complete transcript
102
+ */
103
+ export interface TranscriptionDoneEvent {
104
+ type: "transcription.done";
105
+ text: string;
106
+ language?: string;
107
+ }
108
+ /**
109
+ * Error event - emitted when an error occurs
110
+ */
111
+ export interface ErrorEvent {
112
+ type: "error";
113
+ error: {
114
+ message: string | unknown;
115
+ };
116
+ }
117
+ /**
118
+ * Realtime transcriber interface
119
+ */
120
+ export interface RealtimeTranscriber {
121
+ /**
122
+ * Transcribe audio stream and yield events as they arrive
123
+ *
124
+ * @param audioStream - AsyncIterable of audio chunks (Uint8Array)
125
+ * @param options - Optional transcription options
126
+ * @returns AsyncIterable of transcription events
127
+ *
128
+ * @example
129
+ * ```typescript
130
+ * const transcriber = createRealtimeTranscriber({ apiKey: 'xxx' })
131
+ *
132
+ * for await (const event of transcriber.transcribe(audioStream)) {
133
+ * if (event.type === 'transcription.text.delta') {
134
+ * console.log(event.text)
135
+ * } else if (event.type === 'transcription.done') {
136
+ * console.log('Complete:', event.text)
137
+ * break
138
+ * }
139
+ * }
140
+ * ```
141
+ */
142
+ transcribe(audioStream: AsyncIterable<Uint8Array>, options?: TranscribeOptions): AsyncIterable<RealtimeEvent>;
143
+ }
144
+ /**
145
+ * Create a realtime transcriber instance
146
+ *
147
+ * @param config - Configuration including API key and optional model/baseUrl
148
+ * @returns RealtimeTranscriber instance
149
+ *
150
+ * @example
151
+ * ```typescript
152
+ * const transcriber = createRealtimeTranscriber({
153
+ * apiKey: process.env.MISTRAL_API_KEY,
154
+ * model: 'voxtral-mini-transcribe-realtime-2602', // optional
155
+ * baseUrl: 'wss://api.mistral.ai', // optional
156
+ * })
157
+ * ```
158
+ */
159
+ export declare function createRealtimeTranscriber(config: RealtimeConfig): RealtimeTranscriber;
160
+ /**
161
+ * Result from audio capture - includes stream and stop function
162
+ */
163
+ export interface AudioCaptureResult {
164
+ /** AsyncGenerator yielding audio chunks */
165
+ stream: AsyncGenerator<Uint8Array, void, unknown>;
166
+ /** Function to stop audio capture */
167
+ stop: () => void;
168
+ }
169
+ /**
170
+ * Capture audio from microphone using SoX `rec` command
171
+ *
172
+ * Yields PCM 16-bit signed little-endian mono audio chunks suitable for
173
+ * realtime transcription.
174
+ *
175
+ * **Requirements:**
176
+ * - SoX audio tools must be installed
177
+ * - macOS: `brew install sox`
178
+ * - Linux: `sudo apt install sox`
179
+ *
180
+ * @param sampleRate - Sample rate in Hz (default: 16000)
181
+ * @returns Object with audio stream and stop function
182
+ *
183
+ * @example
184
+ * ```typescript
185
+ * const { stream, stop } = captureAudioFromMicrophone(16000)
186
+ *
187
+ * try {
188
+ * for await (const event of transcriber.transcribe(stream)) {
189
+ * // ... handle events
190
+ * }
191
+ * } finally {
192
+ * stop() // Clean up audio capture
193
+ * }
194
+ * ```
195
+ */
196
+ export declare function captureAudioFromMicrophone(sampleRate?: number): AudioCaptureResult;
197
+ //# sourceMappingURL=realtime.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"realtime.d.ts","sourceRoot":"","sources":["../../../src/utils/transcription/realtime.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAGH,OAAO,EACL,aAAa,EAEd,MAAM,qCAAqC,CAAC;AAM7C;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,iDAAiD;IACjD,QAAQ,EAAE,aAAa,CAAC;IACxB,yCAAyC;IACzC,UAAU,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,sBAAsB;IACtB,MAAM,EAAE,MAAM,CAAC;IACf,gEAAgE;IAChE,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,yDAAyD;IACzD,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,2EAA2E;IAC3E,WAAW,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,CAAC;CACpC;AAED;;;GAGG;AACH,MAAM,MAAM,aAAa,GACrB,mBAAmB,GACnB,mBAAmB,GACnB,2BAA2B,GAC3B,0BAA0B,GAC1B,yBAAyB,GACzB,sBAAsB,GACtB,UAAU,CAAC;AAEf;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,iBAAiB,CAAC;IACxB,OAAO,EAAE;QACP,EAAE,EAAE,MAAM,CAAC;KACZ,CAAC;CACH;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,iBAAiB,CAAC;IACxB,OAAO,EAAE;QACP,WAAW,EAAE,WAAW,CAAC;KAC1B,CAAC;CACH;AAED;;;GAGG;AACH,MAAM,WAAW,2BAA2B;IAC1C,IAAI,EAAE,0BAA0B,CAAC;IACjC,IAAI,EAAE,MAAM,CAAC;CACd;AAED;;GAEG;AACH,MAAM,WAAW,0BAA0B;IACzC,IAAI,EAAE,wBAAwB,CAAC;IAC/B,aAAa,EAAE,MAAM,CAAC;CACvB;AAED;;;GAGG;AACH,MAAM,WAAW,yBAAyB;IACxC,IAAI,EAAE,uBAAuB,CAAC;IAC9B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;;GAGG;AACH,MAAM,WAAW,sBAAsB;IACrC,IAAI,EAAE,oBAAoB,CAAC;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,OAAO,CAAC;IACd,KAAK,EAAE;QACL,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;KAC3B,CAAC;CACH;AAMD;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC;;;;;;;;;;;;;;;;;;;;OAoBG;IACH,UAAU,CACR,WAAW,EAAE,aAAa,CAAC,UAAU,CAAC,EACtC,OAAO,CAAC,EAAE,iBAAiB,GAC1B,aAAa,CAAC,aAAa,CAAC,CAAC;CACjC;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,yBAAyB,CACvC,MAAM,EAAE,cAAc,GACrB,mBAAmB,CA+BrB;AAMD;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,2CAA2C;IAC3C,MAAM,EAAE,cAAc,CAAC,UAAU,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;IAClD,qCAAqC;IACrC,IAAI,EAAE,MAAM,IAAI,CAAC;CAClB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,wBAAgB,0BAA0B,CACxC,UAAU,GAAE,MAAc,GACzB,kBAAkB,CAuDpB"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@wovin/tranz",
3
- "version": "0.1.4",
3
+ "version": "0.1.6",
4
4
  "type": "module",
5
5
  "description": "Audio transcription library with provider support and auto-splitting",
6
6
  "author": "gotjoshua @gotjoshua",
@@ -27,6 +27,10 @@
27
27
  "./audio": {
28
28
  "import": "./dist/audio.min.js",
29
29
  "types": "./dist/audio.d.ts"
30
+ },
31
+ "./realtime": {
32
+ "import": "./dist/realtime.min.js",
33
+ "types": "./dist/realtime.d.ts"
30
34
  }
31
35
  },
32
36
  "files": [
@@ -36,14 +40,20 @@
36
40
  "access": "public"
37
41
  },
38
42
  "dependencies": {
43
+ "@mistralai/mistralai": "^1.14.0",
39
44
  "fluent-ffmpeg": "^2.1.2"
40
45
  },
41
46
  "devDependencies": {
42
47
  "@types/fluent-ffmpeg": "^2.1.21",
43
48
  "@types/node": "^24.10.1",
49
+ "@types/ws": "^8.5.13",
50
+ "@types/yargs": "^17.0.33",
44
51
  "concurrently": "^8.2.2",
45
52
  "tsup": "^8.5.0",
53
+ "tsx": "^4.19.2",
46
54
  "typescript": "^5.9.3",
55
+ "ws": "^8.18.0",
56
+ "yargs": "^17.7.2",
47
57
  "tsupconfig": "^0.0.0"
48
58
  },
49
59
  "keywords": [
@@ -60,6 +70,8 @@
60
70
  "dev": "concurrently \"pnpm dev:code\" \"pnpm dev:types\"",
61
71
  "dev:code": "tsup --watch",
62
72
  "dev:types": "tsc --emitDeclarationOnly --declaration --watch",
63
- "clean": "rm -rf .turbo && rm -rf node_modules && rm -rf dist"
73
+ "clean": "rm -rf .turbo && rm -rf node_modules && rm -rf dist",
74
+ "test:realtime": "tsx test/realtime-transcription.ts",
75
+ "test:realtime-api": "tsx test/realtime-api-test.ts"
64
76
  }
65
77
  }