pyannote-cpp-node 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -26,6 +26,10 @@ The API supports three modes: **offline** batch processing (`transcribeOffline`)
26
26
  - **Streaming mode**: incremental push/finalize with real-time `segments` events and `audio` chunk streaming
27
27
  - Deterministic output for the same audio/models/config
28
28
  - CoreML-accelerated inference on macOS
29
+ - **Shared model cache**: all models loaded once during `Pipeline.load()`, reused across offline/streaming/session modes
30
+ - **Runtime backend switching**: switch Whisper between GPU-only and CoreML-accelerated without reloading the pipeline
31
+ - **Progress reporting**: optional `onProgress` callback for `transcribeOffline` reports Whisper, diarization, and alignment phases
32
+ - **Real-time segment streaming**: optional `onSegment` callback for `transcribeOffline` delivers each Whisper segment (start, end, text) as it's produced — enables live transcript preview and time-based loading bars
29
33
  - TypeScript-first API with complete type definitions
30
34
 
31
35
  ## Requirements
@@ -90,11 +94,12 @@ pipeline.close();
90
94
  ```typescript
91
95
  class Pipeline {
92
96
  static async load(config: ModelConfig): Promise<Pipeline>;
93
- async transcribeOffline(audio: Float32Array): Promise<TranscriptionResult>;
97
+ async transcribeOffline(audio: Float32Array, onProgress?: (phase: number, progress: number) => void, onSegment?: (start: number, end: number, text: string) => void): Promise<TranscriptionResult>;
94
98
  async transcribe(audio: Float32Array): Promise<TranscriptionResult>;
95
99
  setLanguage(language: string): void;
96
100
  setDecodeOptions(options: DecodeOptions): void;
97
101
  createSession(): PipelineSession;
102
+ async setUseCoreml(useCoreml: boolean): Promise<void>;
98
103
  close(): void;
99
104
  get isClosed(): boolean;
100
105
  }
@@ -102,12 +107,50 @@ class Pipeline {
102
107
 
103
108
  #### `static async load(config: ModelConfig): Promise<Pipeline>`
104
109
 
105
- Validates model paths and initializes native pipeline resources.
110
+ Validates model paths and loads all models (Whisper, CoreML segmentation/embedding, PLDA, and optionally VAD) into a shared cache on a background thread. Models are loaded once and reused across all subsequent `transcribe()`, `transcribeOffline()`, and `createSession()` calls — no redundant loading occurs when switching between modes. Models are freed only when `close()` is called.
106
111
 
107
- #### `async transcribeOffline(audio: Float32Array): Promise<TranscriptionResult>`
112
+ #### `async transcribeOffline(audio: Float32Array, onProgress?, onSegment?): Promise<TranscriptionResult>`
108
113
 
109
114
  Runs Whisper on the **entire** audio buffer in a single `whisper_full()` call, then runs offline diarization and WhisperX-style speaker alignment. This is the fastest mode for batch processing — no streaming infrastructure is involved.
110
115
 
116
+ The optional `onProgress` callback receives `(phase, progress)` updates:
117
+
118
+ | Phase | Value | Meaning |
119
+ | --- | --- | --- |
120
+ | `0` | `0`–`100` | Whisper transcription progress (percentage) |
121
+ | `1` | `0` | Diarization started |
122
+ | `2` | `0` | Speaker alignment started |
123
+
124
+ ```typescript
125
+ const result = await pipeline.transcribeOffline(audio, (phase, progress) => {
126
+ if (phase === 0) console.log(`Transcribing: ${progress}%`);
127
+ if (phase === 1) console.log('Running diarization...');
128
+ if (phase === 2) console.log('Aligning speakers...');
129
+ });
130
+ ```
131
+
132
+ The optional `onSegment` callback receives `(start, end, text)` for each Whisper segment as it's produced during transcription. Times are in seconds. This enables live transcript preview before diarization and alignment complete.
133
+
134
+ ```typescript
135
+ const result = await pipeline.transcribeOffline(audio, undefined, (start, end, text) => {
136
+ console.log(`[${start.toFixed(2)}-${end.toFixed(2)}] ${text}`);
137
+ });
138
+ ```
139
+
140
+ Both callbacks can be used simultaneously:
141
+
142
+ ```typescript
143
+ const result = await pipeline.transcribeOffline(
144
+ audio,
145
+ (phase, progress) => {
146
+ if (phase === 0) updateProgressBar(progress);
147
+ },
148
+ (start, end, text) => {
149
+ appendToTranscriptPreview(start, end, text);
150
+ },
151
+ );
152
+ ```
153
+
111
154
  #### `async transcribe(audio: Float32Array): Promise<TranscriptionResult>`
112
155
 
113
156
  Runs one-shot transcription + diarization using the streaming pipeline internally (pushes 1-second chunks then finalizes).
@@ -120,6 +163,29 @@ Updates the Whisper decode language for subsequent `transcribe()` calls. This is
120
163
 
121
164
  Updates one or more Whisper decode options for subsequent `transcribe()` calls. Only the fields you pass are changed; others retain their current values. See `DecodeOptions` for available fields.
122
165
 
166
+
167
+ #### `async setUseCoreml(useCoreml: boolean): Promise<void>`
168
+
169
+ Switches the Whisper inference backend between GPU-only (`false`) and GPU+CoreML (`true`) at runtime. The method reloads the Whisper context on a background thread with the new `use_coreml` setting. The promise resolves when the new context is ready.
170
+
171
+ - If the requested mode matches the current mode, returns immediately (no reload).
172
+ - Throws if the pipeline is closed, busy, or models are not loaded.
173
+ - After switching, all subsequent `transcribe()`, `transcribeOffline()`, and streaming session calls use the new backend.
174
+
175
+ ```typescript
176
+ // Start with GPU-only Whisper
177
+ const pipeline = await Pipeline.load({
178
+ ...modelPaths,
179
+ useCoreml: false,
180
+ });
181
+
182
+ // Switch to CoreML-accelerated Whisper at runtime
183
+ await pipeline.setUseCoreml(true);
184
+ const result = await pipeline.transcribeOffline(audio);
185
+
186
+ // Switch back to GPU-only
187
+ await pipeline.setUseCoreml(false);
188
+ ```
123
189
  #### `createSession(): PipelineSession`
124
190
 
125
191
  Creates an independent streaming session for incremental processing.
@@ -181,6 +247,8 @@ Flushes all stages, runs final recluster + alignment, and returns the definitive
181
247
  ```typescript
182
248
  type TranscriptionResult = {
183
249
  segments: AlignedSegment[];
250
+ /** Silence-filtered audio when VAD model is loaded. Timestamps align to this audio. */
251
+ filteredAudio?: Float32Array;
184
252
  };
185
253
  ```
186
254
 
@@ -364,6 +432,14 @@ export interface AlignedSegment {
364
432
  export interface TranscriptionResult {
365
433
  /** Full speaker-labeled transcript segments. */
366
434
  segments: AlignedSegment[];
435
+ /**
436
+ * Silence-filtered audio (16 kHz mono Float32Array).
437
+ * Present when a VAD model is loaded (`vadModelPath` in config).
438
+ * Silence longer than 2 seconds is compressed to 2 seconds.
439
+ * All segment timestamps are aligned to this audio —
440
+ * save it directly and timestamps will sync correctly.
441
+ */
442
+ filteredAudio?: Float32Array;
367
443
  }
368
444
  ```
369
445
 
@@ -396,6 +472,77 @@ async function runOffline(audio: Float32Array) {
396
472
  }
397
473
  ```
398
474
 
475
+ ### Offline transcription with silence filtering
476
+
477
+ When a VAD model is provided, `transcribeOffline` automatically compresses silence longer than 2 seconds down to 2 seconds before running Whisper and diarization. The filtered audio is returned alongside segments so you can save it with correctly aligned timestamps.
478
+
479
+ ```typescript
480
+ import { Pipeline } from 'pyannote-cpp-node';
481
+ import { writeFileSync } from 'node:fs';
482
+
483
+ async function runOfflineWithVAD(audio: Float32Array) {
484
+ const pipeline = await Pipeline.load({
485
+ segModelPath: './models/segmentation.gguf',
486
+ embModelPath: './models/embedding.gguf',
487
+ pldaPath: './models/plda.gguf',
488
+ coremlPath: './models/embedding.mlpackage',
489
+ segCoremlPath: './models/segmentation.mlpackage',
490
+ whisperModelPath: './models/ggml-large-v3-turbo-q5_0.bin',
491
+ vadModelPath: './models/ggml-silero-v6.2.0.bin', // enables silence filtering
492
+ });
493
+
494
+ const result = await pipeline.transcribeOffline(audio);
495
+
496
+ // Save the silence-filtered audio — timestamps in result.segments align to this
497
+ if (result.filteredAudio) {
498
+ // filteredAudio is 16 kHz mono Float32Array with silence compressed
499
+ writeFileSync('./output-filtered.pcm', Buffer.from(result.filteredAudio.buffer));
500
+ console.log(`Filtered: ${audio.length} -> ${result.filteredAudio.length} samples`);
501
+ }
502
+
503
+ for (const seg of result.segments) {
504
+ const end = seg.start + seg.duration;
505
+ console.log(`[${seg.speaker}] ${seg.start.toFixed(2)}-${end.toFixed(2)} ${seg.text.trim()}`);
506
+ }
507
+
508
+ pipeline.close();
509
+ }
510
+ ```
511
+
512
+ ### Offline transcription with progress and live transcript preview
513
+
514
+ ```typescript
515
+ import { Pipeline } from 'pyannote-cpp-node';
516
+
517
+ async function runOfflineWithCallbacks(audio: Float32Array) {
518
+ const pipeline = await Pipeline.load({
519
+ segModelPath: './models/segmentation.gguf',
520
+ embModelPath: './models/embedding.gguf',
521
+ pldaPath: './models/plda.gguf',
522
+ coremlPath: './models/embedding.mlpackage',
523
+ segCoremlPath: './models/segmentation.mlpackage',
524
+ whisperModelPath: './models/ggml-large-v3-turbo-q5_0.bin',
525
+ });
526
+
527
+ const result = await pipeline.transcribeOffline(
528
+ audio,
529
+ // Progress callback — phase 0 is Whisper (0-100%), phase 1 is diarization, phase 2 is alignment
530
+ (phase, progress) => {
531
+ if (phase === 0) updateProgressBar(progress);
532
+ if (phase === 1) showStatus('Identifying speakers...');
533
+ if (phase === 2) showStatus('Aligning speakers to transcript...');
534
+ },
535
+ // Segment callback — each Whisper segment as it's produced (before diarization)
536
+ (start, end, text) => {
537
+ appendToLivePreview(`[${start.toFixed(2)}-${end.toFixed(2)}] ${text}`);
538
+ },
539
+ );
540
+
541
+ console.log(`Done: ${result.segments.length} speaker-labeled segments`);
542
+ pipeline.close();
543
+ }
544
+ ```
545
+
399
546
  ### One-shot transcription (streaming internals)
400
547
 
401
548
  ```typescript
@@ -543,6 +690,34 @@ const result3 = await pipeline.transcribe(chineseAudio);
543
690
  pipeline.close();
544
691
  ```
545
692
 
693
+ ### Switching Whisper backend at runtime
694
+
695
+ ```typescript
696
+ import { Pipeline } from 'pyannote-cpp-node';
697
+
698
+ // Start with GPU-only Whisper (default)
699
+ const pipeline = await Pipeline.load({
700
+ segModelPath: './models/segmentation.gguf',
701
+ embModelPath: './models/embedding.gguf',
702
+ pldaPath: './models/plda.gguf',
703
+ coremlPath: './models/embedding.mlpackage',
704
+ segCoremlPath: './models/segmentation.mlpackage',
705
+ whisperModelPath: './models/ggml-large-v3-turbo-q5_0.bin',
706
+ useCoreml: false,
707
+ });
708
+
709
+ // Switch to CoreML-accelerated Whisper encoder at runtime
710
+ // (requires ggml-large-v3-turbo-q5_0-encoder.mlmodelc next to the GGUF)
711
+ await pipeline.setUseCoreml(true);
712
+ const result1 = await pipeline.transcribeOffline(audio);
713
+
714
+ // Switch back to GPU-only
715
+ await pipeline.setUseCoreml(false);
716
+ const result2 = await pipeline.transcribeOffline(audio);
717
+
718
+ pipeline.close();
719
+ ```
720
+
546
721
  Streaming sessions also support runtime changes:
547
722
 
548
723
  ```typescript
@@ -594,9 +769,11 @@ All API methods expect decoded PCM samples; file decoding/resampling is handled
594
769
 
595
770
  ### Offline mode (`transcribeOffline`)
596
771
 
597
- 1. Single `whisper_full()` call on entire audio
598
- 2. Offline diarization (segmentation powerset embeddings → PLDA → AHC → VBx)
599
- 3. WhisperX-style alignment (speaker assignment by maximum segment overlap)
772
+ 1. VAD silence filter (optional compresses silence >2s to 2s when `vadModelPath` provided)
773
+ 2. Single `whisper_full()` call on filtered audio
774
+ 3. Offline diarization (segmentation powerset embeddings → PLDA → AHC → VBx) on filtered audio
775
+ 4. WhisperX-style alignment (speaker assignment by maximum segment overlap)
776
+ 5. Return segments + filtered audio bytes (timestamps aligned to filtered audio)
600
777
 
601
778
  ### Streaming mode (`transcribe` / `createSession`)
602
779
 
@@ -5,10 +5,11 @@ export declare class Pipeline {
5
5
  private constructor();
6
6
  static load(config: ModelConfig): Promise<Pipeline>;
7
7
  transcribe(audio: Float32Array): Promise<TranscriptionResult>;
8
- transcribeOffline(audio: Float32Array): Promise<TranscriptionResult>;
8
+ transcribeOffline(audio: Float32Array, onProgress?: (phase: number, progress: number) => void, onSegment?: (start: number, end: number, text: string) => void): Promise<TranscriptionResult>;
9
9
  setLanguage(language: string): void;
10
10
  setDecodeOptions(options: DecodeOptions): void;
11
11
  createSession(): PipelineSession;
12
+ setUseCoreml(useCoreml: boolean): Promise<void>;
12
13
  close(): void;
13
14
  get isClosed(): boolean;
14
15
  }
@@ -1 +1 @@
1
- {"version":3,"file":"Pipeline.d.ts","sourceRoot":"","sources":["../src/Pipeline.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AACvD,OAAO,KAAK,EAAE,WAAW,EAAE,mBAAmB,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAElF,qBAAa,QAAQ;IACnB,OAAO,CAAC,MAAM,CAAsB;IAEpC,OAAO;WAIM,IAAI,CAAC,MAAM,EAAE,WAAW,GAAG,OAAO,CAAC,QAAQ,CAAC;IAiBnD,UAAU,CAAC,KAAK,EAAE,YAAY,GAAG,OAAO,CAAC,mBAAmB,CAAC;IAO7D,iBAAiB,CAAC,KAAK,EAAE,YAAY,GAAG,OAAO,CAAC,mBAAmB,CAAC;IAO1E,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI;IAKnC,gBAAgB,CAAC,OAAO,EAAE,aAAa,GAAG,IAAI;IAK9C,aAAa,IAAI,eAAe;IAWhC,KAAK,IAAI,IAAI;IACb,IAAI,QAAQ,IAAI,OAAO,CAAiC;CACzD"}
1
+ {"version":3,"file":"Pipeline.d.ts","sourceRoot":"","sources":["../src/Pipeline.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AACvD,OAAO,KAAK,EAAE,WAAW,EAAE,mBAAmB,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAElF,qBAAa,QAAQ;IACnB,OAAO,CAAC,MAAM,CAAsB;IAEpC,OAAO;WAIM,IAAI,CAAC,MAAM,EAAE,WAAW,GAAG,OAAO,CAAC,QAAQ,CAAC;IAkBnD,UAAU,CAAC,KAAK,EAAE,YAAY,GAAG,OAAO,CAAC,mBAAmB,CAAC;IAO7D,iBAAiB,CACrB,KAAK,EAAE,YAAY,EACnB,UAAU,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,KAAK,IAAI,EACtD,SAAS,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,KAAK,IAAI,GAC7D,OAAO,CAAC,mBAAmB,CAAC;IAO/B,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI;IAKnC,gBAAgB,CAAC,OAAO,EAAE,aAAa,GAAG,IAAI;IAK9C,aAAa,IAAI,eAAe;IAW1B,YAAY,CAAC,SAAS,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC;IAKrD,KAAK,IAAI,IAAI;IACb,IAAI,QAAQ,IAAI,OAAO,CAAiC;CACzD"}
package/dist/Pipeline.js CHANGED
@@ -21,6 +21,7 @@ export class Pipeline {
21
21
  accessSync(config.vadModelPath);
22
22
  const binding = getBinding();
23
23
  const native = new binding.PipelineModel(config);
24
+ await native.loadModels();
24
25
  return new Pipeline(native);
25
26
  }
26
27
  async transcribe(audio) {
@@ -32,14 +33,14 @@ export class Pipeline {
32
33
  throw new Error('Audio must not be empty');
33
34
  return this.native.transcribe(audio);
34
35
  }
35
- async transcribeOffline(audio) {
36
+ async transcribeOffline(audio, onProgress, onSegment) {
36
37
  if (this.native.isClosed)
37
38
  throw new Error('Pipeline is closed');
38
39
  if (!(audio instanceof Float32Array))
39
40
  throw new TypeError('Expected Float32Array');
40
41
  if (audio.length === 0)
41
42
  throw new Error('Audio must not be empty');
42
- return this.native.transcribeOffline(audio);
43
+ return this.native.transcribeOffline(audio, onProgress, onSegment);
43
44
  }
44
45
  setLanguage(language) {
45
46
  if (this.native.isClosed)
@@ -59,6 +60,11 @@ export class Pipeline {
59
60
  session._setNative(nativeSession);
60
61
  return session;
61
62
  }
63
+ async setUseCoreml(useCoreml) {
64
+ if (this.native.isClosed)
65
+ throw new Error('Pipeline is closed');
66
+ return this.native.switchWhisperMode(useCoreml);
67
+ }
62
68
  close() { this.native.close(); }
63
69
  get isClosed() { return this.native.isClosed; }
64
70
  }
@@ -1 +1 @@
1
- {"version":3,"file":"Pipeline.js","sourceRoot":"","sources":["../src/Pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACrC,OAAO,EAAE,UAAU,EAA4B,MAAM,cAAc,CAAC;AACpE,OAAO,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAGvD,MAAM,OAAO,QAAQ;IACX,MAAM,CAAsB;IAEpC,YAAoB,MAA2B;QAC7C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,MAAmB;QACnC,MAAM,aAAa,GAAG;YACpB,MAAM,CAAC,YAAY;YACnB,MAAM,CAAC,YAAY;YACnB,MAAM,CAAC,QAAQ;YACf,MAAM,CAAC,UAAU;YACjB,MAAM,CAAC,aAAa;YACpB,MAAM,CAAC,gBAAgB;SACxB,CAAC;QACF,KAAK,MAAM,IAAI,IAAI,aAAa;YAAE,UAAU,CAAC,IAAI,CAAC,CAAC;QACnD,IAAI,MAAM,CAAC,YAAY;YAAE,UAAU,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;QAEzD,MAAM,OAAO,GAAG,UAAU,EAAE,CAAC;QAC7B,MAAM,MAAM,GAAG,IAAI,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;QACjD,OAAO,IAAI,QAAQ,CAAC,MAAM,CAAC,CAAC;IAC9B,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,KAAmB;QAClC,IAAI,IAAI,CAAC,MAAM,CAAC,QAAQ;YAAE,MAAM,IAAI,KAAK,CAAC,oBAAoB,CAAC,CAAC;QAChE,IAAI,CAAC,CAAC,KAAK,YAAY,YAAY,CAAC;YAAE,MAAM,IAAI,SAAS,CAAC,uBAAuB,CAAC,CAAC;QACnF,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;QACnE,OAAO,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;IACvC,CAAC;IAED,KAAK,CAAC,iBAAiB,CAAC,KAAmB;QACzC,IAAI,IAAI,CAAC,MAAM,CAAC,QAAQ;YAAE,MAAM,IAAI,KAAK,CAAC,oBAAoB,CAAC,CAAC;QAChE,IAAI,CAAC,CAAC,KAAK,YAAY,YAAY,CAAC;YAAE,MAAM,IAAI,SAAS,CAAC,uBAAuB,CAAC,CAAC;QACnF,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;QACnE,OAAO,IAAI,CAAC,MAAM,CAAC,iBAAiB,CAAC,KAAK,CAAC,CAAC;IAC9C,CAAC;IAED,WAAW,CAAC,QAAgB;QAC1B,IAAI,IAAI,CAAC,MAAM,CAAC,QAAQ;YAAE,MAAM,IAAI,KAAK,CAAC,oBAAoB,CAAC,CAAC;QAChE,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;IACpC,CAAC;IAED,gBAAgB,CAAC,OAAsB;QACrC,IAAI,IAAI,CAAC,MAAM,CAAC,QAAQ;YAAE,MAAM,IAAI,KAAK,CAAC,oBAAoB,CAAC,CAAC;QAChE,IAAI,CAAC,MAAM,CAAC,gBAAgB,CAAC,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC;IAC/C,CAAC;IAED,aAAa;QACX,IAAI,IAAI,CAAC,MAAM,CAAC,QAAQ;YAAE,MAAM,IAAI,KAAK,CAAC,oBAAoB,CAAC,CAAC;QAChE,MAAM,OAAO,GAAG,IAAI,eAAe,EAAE,CAAC;QACtC,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,aAAa,CAC7C,CAAC,QAAe,EAAE,EAAE,CAAC,OAAO,CAAC,mBAAmB,CAAC,QAAQ,CAAC,EAC1D,CAAC,KAAmB,EAAE,EAAE,CAAC,OAAO,CAAC,gBAAgB,CAAC,KAAK,CAAC,CACzD,CAAC;QACF,OAAO,CAAC,UAAU,CAAC,aAAa,CAAC,CAAC;QAClC,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,KAAK,KAAW,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IACtC,IAAI,QAAQ,KAAc,OAAO,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;CACzD"}
1
+ {"version":3,"file":"Pipeline.js","sourceRoot":"","sources":["../src/Pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACrC,OAAO,EAAE,UAAU,EAA4B,MAAM,cAAc,CAAC;AACpE,OAAO,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAGvD,MAAM,OAAO,QAAQ;IACX,MAAM,CAAsB;IAEpC,YAAoB,MAA2B;QAC7C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,MAAmB;QACnC,MAAM,aAAa,GAAG;YACpB,MAAM,CAAC,YAAY;YACnB,MAAM,CAAC,YAAY;YACnB,MAAM,CAAC,QAAQ;YACf,MAAM,CAAC,UAAU;YACjB,MAAM,CAAC,aAAa;YACpB,MAAM,CAAC,gBAAgB;SACxB,CAAC;QACF,KAAK,MAAM,IAAI,IAAI,aAAa;YAAE,UAAU,CAAC,IAAI,CAAC,CAAC;QACnD,IAAI,MAAM,CAAC,YAAY;YAAE,UAAU,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;QAEzD,MAAM,OAAO,GAAG,UAAU,EAAE,CAAC;QAC7B,MAAM,MAAM,GAAG,IAAI,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;QACjD,MAAM,MAAM,CAAC,UAAU,EAAE,CAAC;QAC1B,OAAO,IAAI,QAAQ,CAAC,MAAM,CAAC,CAAC;IAC9B,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,KAAmB;QAClC,IAAI,IAAI,CAAC,MAAM,CAAC,QAAQ;YAAE,MAAM,IAAI,KAAK,CAAC,oBAAoB,CAAC,CAAC;QAChE,IAAI,CAAC,CAAC,KAAK,YAAY,YAAY,CAAC;YAAE,MAAM,IAAI,SAAS,CAAC,uBAAuB,CAAC,CAAC;QACnF,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;QACnE,OAAO,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;IACvC,CAAC;IAED,KAAK,CAAC,iBAAiB,CACrB,KAAmB,EACnB,UAAsD,EACtD,SAA8D;QAE9D,IAAI,IAAI,CAAC,MAAM,CAAC,QAAQ;YAAE,MAAM,IAAI,KAAK,CAAC,oBAAoB,CAAC,CAAC;QAChE,IAAI,CAAC,CAAC,KAAK,YAAY,YAAY,CAAC;YAAE,MAAM,IAAI,SAAS,CAAC,uBAAuB,CAAC,CAAC;QACnF,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;QACnE,OAAO,IAAI,CAAC,MAAM,CAAC,iBAAiB,CAAC,KAAK,EAAE,UAAU,EAAE,SAAS,CAAC,CAAC;IACrE,CAAC;IAED,WAAW,CAAC,QAAgB;QAC1B,IAAI,IAAI,CAAC,MAAM,CAAC,QAAQ;YAAE,MAAM,IAAI,KAAK,CAAC,oBAAoB,CAAC,CAAC;QAChE,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;IACpC,CAAC;IAED,gBAAgB,CAAC,OAAsB;QACrC,IAAI,IAAI,CAAC,MAAM,CAAC,QAAQ;YAAE,MAAM,IAAI,KAAK,CAAC,oBAAoB,CAAC,CAAC;QAChE,IAAI,CAAC,MAAM,CAAC,gBAAgB,CAAC,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC;IAC/C,CAAC;IAED,aAAa;QACX,IAAI,IAAI,CAAC,MAAM,CAAC,QAAQ;YAAE,MAAM,IAAI,KAAK,CAAC,oBAAoB,CAAC,CAAC;QAChE,MAAM,OAAO,GAAG,IAAI,eAAe,EAAE,CAAC;QACtC,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,aAAa,CAC7C,CAAC,QAAe,EAAE,EAAE,CAAC,OAAO,CAAC,mBAAmB,CAAC,QAAQ,CAAC,EAC1D,CAAC,KAAmB,EAAE,EAAE,CAAC,OAAO,CAAC,gBAAgB,CAAC,KAAK,CAAC,CACzD,CAAC;QACF,OAAO,CAAC,UAAU,CAAC,aAAa,CAAC,CAAC;QAClC,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,KAAK,CAAC,YAAY,CAAC,SAAkB;QACnC,IAAI,IAAI,CAAC,MAAM,CAAC,QAAQ;YAAE,MAAM,IAAI,KAAK,CAAC,oBAAoB,CAAC,CAAC;QAChE,OAAO,IAAI,CAAC,MAAM,CAAC,iBAAiB,CAAC,SAAS,CAAC,CAAC;IAClD,CAAC;IAED,KAAK,KAAW,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IACtC,IAAI,QAAQ,KAAc,OAAO,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;CACzD"}
package/dist/binding.d.ts CHANGED
@@ -1,12 +1,15 @@
1
1
  import type { ModelConfig, TranscriptionResult } from './types.js';
2
2
  export interface NativePipelineModel {
3
3
  transcribe(audio: Float32Array): Promise<TranscriptionResult>;
4
- transcribeOffline(audio: Float32Array): Promise<TranscriptionResult>;
4
+ transcribeOffline(audio: Float32Array, onProgress?: (phase: number, progress: number) => void, onSegment?: (start: number, end: number, text: string) => void): Promise<TranscriptionResult>;
5
5
  setLanguage(language: string): void;
6
6
  setDecodeOptions(options: Record<string, unknown>): void;
7
7
  createSession(segmentsCb: (segments: any[]) => void, audioCb: (audio: Float32Array) => void): NativePipelineSession;
8
8
  close(): void;
9
9
  isClosed: boolean;
10
+ loadModels(): Promise<void>;
11
+ isLoaded: boolean;
12
+ switchWhisperMode(useCoreml: boolean): Promise<void>;
10
13
  }
11
14
  export interface NativePipelineSession {
12
15
  push(audio: Float32Array): Promise<boolean[]>;
@@ -1 +1 @@
1
- {"version":3,"file":"binding.d.ts","sourceRoot":"","sources":["../src/binding.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,WAAW,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAC;AAInE,MAAM,WAAW,mBAAmB;IAClC,UAAU,CAAC,KAAK,EAAE,YAAY,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IAC9D,iBAAiB,CAAC,KAAK,EAAE,YAAY,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACrE,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,gBAAgB,CAAC,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IACzD,aAAa,CACX,UAAU,EAAE,CAAC,QAAQ,EAAE,GAAG,EAAE,KAAK,IAAI,EACrC,OAAO,EAAE,CAAC,KAAK,EAAE,YAAY,KAAK,IAAI,GACrC,qBAAqB,CAAC;IACzB,KAAK,IAAI,IAAI,CAAC;IACd,QAAQ,EAAE,OAAO,CAAC;CACnB;AAED,MAAM,WAAW,qBAAqB;IACpC,IAAI,CAAC,KAAK,EAAE,YAAY,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;IAC9C,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,gBAAgB,CAAC,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IACzD,QAAQ,IAAI,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACzC,KAAK,IAAI,IAAI,CAAC;IACd,QAAQ,EAAE,OAAO,CAAC;CACnB;AAED,MAAM,WAAW,aAAa;IAC5B,aAAa,EAAE,KAAK,MAAM,EAAE,WAAW,KAAK,mBAAmB,CAAC;IAChE,eAAe,EAAE,KAAK,GAAG,IAAI,EAAE,OAAO,EAAE,KAAK,qBAAqB,CAAC;CACpE;AAoCD,wBAAgB,UAAU,IAAI,aAAa,CAyB1C"}
1
+ {"version":3,"file":"binding.d.ts","sourceRoot":"","sources":["../src/binding.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,WAAW,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAC;AAInE,MAAM,WAAW,mBAAmB;IAClC,UAAU,CAAC,KAAK,EAAE,YAAY,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IAC9D,iBAAiB,CAAC,KAAK,EAAE,YAAY,EAAE,UAAU,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,KAAK,IAAI,EAAE,SAAS,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,KAAK,IAAI,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;IAC7L,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,gBAAgB,CAAC,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IACzD,aAAa,CACX,UAAU,EAAE,CAAC,QAAQ,EAAE,GAAG,EAAE,KAAK,IAAI,EACrC,OAAO,EAAE,CAAC,KAAK,EAAE,YAAY,KAAK,IAAI,GACrC,qBAAqB,CAAC;IACzB,KAAK,IAAI,IAAI,CAAC;IACd,QAAQ,EAAE,OAAO,CAAC;IAClB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IAC5B,QAAQ,EAAE,OAAO,CAAC;IAClB,iBAAiB,CAAC,SAAS,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACtD;AAED,MAAM,WAAW,qBAAqB;IACpC,IAAI,CAAC,KAAK,EAAE,YAAY,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;IAC9C,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,gBAAgB,CAAC,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IACzD,QAAQ,IAAI,OAAO,CAAC,mBAAmB,CAAC,CAAC;IACzC,KAAK,IAAI,IAAI,CAAC;IACd,QAAQ,EAAE,OAAO,CAAC;CACnB;AAED,MAAM,WAAW,aAAa;IAC5B,aAAa,EAAE,KAAK,MAAM,EAAE,WAAW,KAAK,mBAAmB,CAAC;IAChE,eAAe,EAAE,KAAK,GAAG,IAAI,EAAE,OAAO,EAAE,KAAK,qBAAqB,CAAC;CACpE;AAoCD,wBAAgB,UAAU,IAAI,aAAa,CAyB1C"}
@@ -1 +1 @@
1
- {"version":3,"file":"binding.js","sourceRoot":"","sources":["../src/binding.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,QAAQ,CAAC;AAIvC,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AA6B/C,IAAI,aAAa,GAAyB,IAAI,CAAC;AAE/C,SAAS,cAAc;IACrB,IAAI,OAAO,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;QAClC,MAAM,IAAI,KAAK,CACb,yBAAyB,OAAO,CAAC,QAAQ,oDAAoD,CAC9F,CAAC;IACJ,CAAC;IAED,IAAI,OAAO,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;QAC7B,OAAO,iCAAiC,CAAC;IAC3C,CAAC;IAED,IAAI,OAAO,CAAC,IAAI,KAAK,KAAK,EAAE,CAAC;QAC3B,OAAO,+BAA+B,CAAC;IACzC,CAAC;IAED,MAAM,IAAI,KAAK,CACb,sCAAsC,OAAO,CAAC,IAAI,8CAA8C,CACjG,CAAC;AACJ,CAAC;AAED,SAAS,eAAe,CAAC,KAAc;IACrC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;QAChD,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,SAAS,GAAG,KAAgC,CAAC;IACnD,OAAO,CACL,OAAO,SAAS,CAAC,aAAa,KAAK,UAAU;QAC7C,OAAO,SAAS,CAAC,eAAe,KAAK,UAAU,CAChD,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,UAAU;IACxB,IAAI,aAAa,KAAK,IAAI,EAAE,CAAC;QAC3B,OAAO,aAAa,CAAC;IACvB,CAAC;IAED,MAAM,WAAW,GAAG,cAAc,EAAE,CAAC;IAErC,IAAI,MAAe,CAAC;IACpB,IAAI,CAAC;QACH,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;IAChC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QACvE,MAAM,IAAI,KAAK,CACb,iCAAiC,WAAW,gEAAgE,OAAO,EAAE,CACtH,CAAC;IACJ,CAAC;IAED,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,EAAE,CAAC;QAC7B,MAAM,IAAI,KAAK,CACb,sCAAsC,WAAW,6DAA6D,CAC/G,CAAC;IACJ,CAAC;IAED,aAAa,GAAG,MAAM,CAAC;IACvB,OAAO,aAAa,CAAC;AACvB,CAAC"}
1
+ {"version":3,"file":"binding.js","sourceRoot":"","sources":["../src/binding.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,QAAQ,CAAC;AAIvC,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAgC/C,IAAI,aAAa,GAAyB,IAAI,CAAC;AAE/C,SAAS,cAAc;IACrB,IAAI,OAAO,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;QAClC,MAAM,IAAI,KAAK,CACb,yBAAyB,OAAO,CAAC,QAAQ,oDAAoD,CAC9F,CAAC;IACJ,CAAC;IAED,IAAI,OAAO,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;QAC7B,OAAO,iCAAiC,CAAC;IAC3C,CAAC;IAED,IAAI,OAAO,CAAC,IAAI,KAAK,KAAK,EAAE,CAAC;QAC3B,OAAO,+BAA+B,CAAC;IACzC,CAAC;IAED,MAAM,IAAI,KAAK,CACb,sCAAsC,OAAO,CAAC,IAAI,8CAA8C,CACjG,CAAC;AACJ,CAAC;AAED,SAAS,eAAe,CAAC,KAAc;IACrC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;QAChD,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,SAAS,GAAG,KAAgC,CAAC;IACnD,OAAO,CACL,OAAO,SAAS,CAAC,aAAa,KAAK,UAAU;QAC7C,OAAO,SAAS,CAAC,eAAe,KAAK,UAAU,CAChD,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,UAAU;IACxB,IAAI,aAAa,KAAK,IAAI,EAAE,CAAC;QAC3B,OAAO,aAAa,CAAC;IACvB,CAAC;IAED,MAAM,WAAW,GAAG,cAAc,EAAE,CAAC;IAErC,IAAI,MAAe,CAAC;IACpB,IAAI,CAAC;QACH,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;IAChC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QACvE,MAAM,IAAI,KAAK,CACb,iCAAiC,WAAW,gEAAgE,OAAO,EAAE,CACtH,CAAC;IACJ,CAAC;IAED,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,EAAE,CAAC;QAC7B,MAAM,IAAI,KAAK,CACb,sCAAsC,WAAW,6DAA6D,CAC/G,CAAC;IACJ,CAAC;IAED,aAAa,GAAG,MAAM,CAAC;IACvB,OAAO,aAAa,CAAC;AACvB,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pyannote-cpp-node",
3
- "version": "0.4.0",
3
+ "version": "0.6.0",
4
4
  "type": "module",
5
5
  "main": "./dist/index.js",
6
6
  "types": "./dist/index.d.ts",
@@ -17,8 +17,8 @@
17
17
  "access": "public"
18
18
  },
19
19
  "optionalDependencies": {
20
- "@pyannote-cpp-node/darwin-arm64": "0.4.0",
21
- "@pyannote-cpp-node/darwin-x64": "0.4.0"
20
+ "@pyannote-cpp-node/darwin-arm64": "0.6.0",
21
+ "@pyannote-cpp-node/darwin-x64": "0.6.0"
22
22
  },
23
23
  "devDependencies": {
24
24
  "typescript": "^5.7.0"