sherpa-onnx-node 1.13.1 → 1.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
1
  /** @typedef {import('./types').KeywordSpotterConfig} KeywordSpotterConfig */
2
+ /** @typedef {import('./types').KeywordSpotterHandle} KeywordSpotterHandle */
2
3
  /** @typedef {import('./types').KeywordResult} KeywordResult */
3
4
  /** @typedef {import('./streaming-asr').OnlineStream} OnlineStream */
4
5
 
@@ -1,4 +1,6 @@
1
1
  /** @typedef {import('./types').OfflineStreamObject} OfflineStreamObject */
2
+ /** @typedef {import('./types').OfflineStreamHandle} OfflineStreamHandle */
3
+ /** @typedef {import('./types').OfflineRecognizerHandle} OfflineRecognizerHandle */
2
4
  /** @typedef {import('./types').Waveform} Waveform */
3
5
  /**
4
6
  * @typedef {import('./types').OfflineRecognizerConfig} OfflineRecognizerConfig
@@ -1,4 +1,5 @@
1
1
  /** @typedef {import('./types').OfflineSpeakerDiarizationConfig} OfflineSpeakerDiarizationConfig */
2
+ /** @typedef {import('./types').OfflineSpeakerDiarizationHandle} OfflineSpeakerDiarizationHandle */
2
3
  /** @typedef {import('./types').SpeakerDiarizationSegment} SpeakerDiarizationSegment */
3
4
 
4
5
  const addon = require('./addon.js');
@@ -1,4 +1,5 @@
1
1
  /** @typedef {import('./types').OfflineSpeechDenoiserConfig} OfflineSpeechDenoiserConfig */
2
+ /** @typedef {import('./types').OfflineSpeechDenoiserHandle} OfflineSpeechDenoiserHandle */
2
3
  /** @typedef {import('./types').GeneratedAudio} GeneratedAudio */
3
4
  /** @typedef {import('./types').AudioProcessRequest} AudioProcessRequest */
4
5
 
@@ -1,4 +1,5 @@
1
1
  /** @typedef {import('./types').OfflineTtsConfig} OfflineTtsConfig */
2
+ /** @typedef {import('./types').OfflineTtsHandle} OfflineTtsHandle */
2
3
  /** @typedef {import('./types').TtsRequest} TtsRequest */
3
4
  /** @typedef {import('./types').GeneratedAudio} GeneratedAudio */
4
5
 
@@ -83,7 +84,7 @@ class OfflineTts {
83
84
  *
84
85
  * The progress callback receives streaming audio chunks.
85
86
  *
86
- * @param {TtsRequest & { generationConfig?: object, onProgress?: (info: {
87
+ * @param {TtsRequest & { generationConfig?: GenerationConfig, onProgress?: (info: {
87
88
  * samples: Float32Array, progress: number }) => number | boolean | void
88
89
  * }} obj
89
90
  * @returns {Promise<GeneratedAudio>}
@@ -1,4 +1,5 @@
1
1
  /** @typedef {import('./types').OnlineSpeechDenoiserConfig} OnlineSpeechDenoiserConfig */
2
+ /** @typedef {import('./types').OnlineSpeechDenoiserHandle} OnlineSpeechDenoiserHandle */
2
3
  /** @typedef {import('./types').GeneratedAudio} GeneratedAudio */
3
4
  /** @typedef {import('./types').AudioProcessRequest} AudioProcessRequest */
4
5
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sherpa-onnx-node",
3
- "version": "1.13.1",
3
+ "version": "1.13.3",
4
4
  "description": "Speech-to-text, text-to-speech, speaker diarization, and speech enhancement using Next-gen Kaldi without internet connection",
5
5
  "main": "sherpa-onnx.js",
6
6
  "scripts": {
@@ -57,11 +57,11 @@
57
57
  },
58
58
  "homepage": "https://github.com/csukuangfj/sherpa-onnx#readme",
59
59
  "optionalDependencies": {
60
- "sherpa-onnx-darwin-arm64": "^1.13.1",
61
- "sherpa-onnx-darwin-x64": "^1.13.1",
62
- "sherpa-onnx-linux-x64": "^1.13.1",
63
- "sherpa-onnx-linux-arm64": "^1.13.1",
64
- "sherpa-onnx-win-x64": "^1.13.1",
65
- "sherpa-onnx-win-ia32": "^1.13.1"
60
+ "sherpa-onnx-darwin-arm64": "^1.13.3",
61
+ "sherpa-onnx-darwin-x64": "^1.13.3",
62
+ "sherpa-onnx-linux-x64": "^1.13.3",
63
+ "sherpa-onnx-linux-arm64": "^1.13.3",
64
+ "sherpa-onnx-win-x64": "^1.13.3",
65
+ "sherpa-onnx-win-ia32": "^1.13.3"
66
66
  }
67
67
  }
package/punctuation.js CHANGED
@@ -1,6 +1,7 @@
1
1
  /** @typedef {import('./types').OfflinePunctuationHandle} OfflinePunctuationHandle */
2
2
  /** @typedef {import('./types').OfflinePunctuationConfig} OfflinePunctuationConfig */
3
3
  /** @typedef {import('./types').OnlinePunctuationConfig} OnlinePunctuationConfig */
4
+ /** @typedef {import('./types').OnlinePunctuationHandle} OnlinePunctuationHandle */
4
5
 
5
6
  const addon = require('./addon.js');
6
7
 
package/resampler.js ADDED
@@ -0,0 +1,80 @@
1
+ /** @typedef {import('./types').LinearResamplerHandle} LinearResamplerHandle */
2
+
3
+ const addon = require('./addon.js');
4
+
5
+ /**
6
+ * A linear resampler that converts audio from one sample rate to another.
7
+ */
8
+ class LinearResampler {
9
+ /**
10
+ * Create a linear resampler.
11
+ *
12
+ * @param {number} inputSampleRate - Input sample rate in Hz.
13
+ * @param {number} outputSampleRate - Output sample rate in Hz.
14
+ */
15
+ constructor(inputSampleRate, outputSampleRate) {
16
+ /** @type {LinearResamplerHandle} */
17
+ this.handle =
18
+ addon.createLinearResampler(inputSampleRate, outputSampleRate);
19
+ this.inputSampleRate = inputSampleRate;
20
+ this.outputSampleRate = outputSampleRate;
21
+ }
22
+
23
+ /**
24
+ * Resample a chunk of audio samples.
25
+ *
26
+ * Call this for each chunk of input audio. For the final chunk, call
27
+ * {@link flush} instead so that any internally buffered samples are
28
+ * emitted.
29
+ *
30
+ * @param {Float32Array} samples - Input audio samples.
31
+ * @returns {Float32Array} Resampled audio samples.
32
+ */
33
+ resample(samples) {
34
+ return addon.resampleLinear(this.handle, samples, 0);
35
+ }
36
+
37
+ /**
38
+ * Resample the final chunk of audio and flush internal buffers.
39
+ *
40
+ * This is the same as {@link resample} but sets flush=1 so that any
41
+ * remaining samples buffered inside the resampler are emitted. Call
42
+ * this once after the last chunk of input audio.
43
+ *
44
+ * @param {Float32Array} samples - The final chunk of input audio samples.
45
+ * @returns {Float32Array} Resampled audio samples including buffered tail.
46
+ */
47
+ flush(samples) {
48
+ return addon.resampleLinear(this.handle, samples, 1);
49
+ }
50
+
51
+ /**
52
+ * Reset the resampler to its initial state, discarding any internal
53
+ * buffered samples.
54
+ */
55
+ reset() {
56
+ addon.linearResamplerReset(this.handle);
57
+ }
58
+
59
+ /**
60
+ * Get the input sample rate.
61
+ *
62
+ * @returns {number} Input sample rate in Hz.
63
+ */
64
+ getInputSampleRate() {
65
+ return addon.linearResamplerGetInputSampleRate(this.handle);
66
+ }
67
+
68
+ /**
69
+ * Get the output sample rate.
70
+ *
71
+ * @returns {number} Output sample rate in Hz.
72
+ */
73
+ getOutputSampleRate() {
74
+ return addon.linearResamplerGetOutputSampleRate(this.handle);
75
+ }
76
+ }
77
+
78
+ module.exports = {
79
+ LinearResampler,
80
+ }
package/sherpa-onnx.js CHANGED
@@ -19,6 +19,7 @@ const kws = require('./keyword-spotter.js');
19
19
  const sd = require('./non-streaming-speaker-diarization.js');
20
20
  const speech_denoiser = require('./non-streaming-speech-denoiser.js');
21
21
  const online_speech_denoiser = require('./online-speech-denoiser.js');
22
+ const resampler = require('./resampler.js');
22
23
 
23
24
  module.exports = {
24
25
  OnlineRecognizer : streaming_asr.OnlineRecognizer,
@@ -40,6 +41,7 @@ module.exports = {
40
41
  OfflineSpeakerDiarization : sd.OfflineSpeakerDiarization,
41
42
  OfflineSpeechDenoiser : speech_denoiser.OfflineSpeechDenoiser,
42
43
  OnlineSpeechDenoiser : online_speech_denoiser.OnlineSpeechDenoiser,
44
+ LinearResampler : resampler.LinearResampler,
43
45
  version : addon.version,
44
46
  gitSha1 : addon.gitSha1,
45
47
  gitDate : addon.gitDate,
@@ -2,6 +2,8 @@
2
2
  /** @typedef {import('./types').SpeakerEmbeddingManagerSearchObj} SpeakerEmbeddingManagerSearchObj */
3
3
  /** @typedef {import('./types').SpeakerEmbeddingManagerVerifyObj} SpeakerEmbeddingManagerVerifyObj */
4
4
  /** @typedef {import('./types').SpeakerEmbeddingExtractorConfig} SpeakerEmbeddingExtractorConfig */
5
+ /** @typedef {import('./types').SpeakerEmbeddingExtractorHandle} SpeakerEmbeddingExtractorHandle */
6
+ /** @typedef {import('./types').SpeakerEmbeddingManagerHandle} SpeakerEmbeddingManagerHandle */
5
7
  /** @typedef {import('./streaming-asr').OnlineStream} OnlineStream */
6
8
 
7
9
  const addon = require('./addon.js');
@@ -1,4 +1,5 @@
1
1
  /** @typedef {import('./types').SpokenLanguageIdentificationConfig} SpokenLanguageIdentificationConfig */
2
+ /** @typedef {import('./types').SpokenLanguageIdentificationHandle} SpokenLanguageIdentificationHandle */
2
3
  /** @typedef {import('./non-streaming-asr').OfflineStream} OfflineStream */
3
4
 
4
5
  const addon = require('./addon.js');
package/streaming-asr.js CHANGED
@@ -1,5 +1,7 @@
1
1
  /** @typedef {import('./types').OnlineStreamObject} OnlineStreamObject */
2
2
  /** @typedef {import('./types').OnlineRecognizerHandle} OnlineRecognizerHandle */
3
+ /** @typedef {import('./types').OnlineStreamHandle} OnlineStreamHandle */
4
+ /** @typedef {import('./types').DisplayHandle} DisplayHandle */
3
5
  /** @typedef {import('./types').DisplayObject} DisplayObject */
4
6
  /** @typedef {import('./types').OnlineRecognizerConfig} OnlineRecognizerConfig */
5
7
  /** @typedef {import('./types').Waveform} Waveform */
package/types.js CHANGED
@@ -52,7 +52,57 @@
52
52
 
53
53
  /**
54
54
  * @typedef {Object} OfflinePunctuationHandle
55
- * @see src/offline-punctuation.cc
55
+ * @see src/offline-punctuation.cc
56
+ */
57
+
58
+ /**
59
+ * @typedef {Object} LinearResamplerHandle
60
+ * @see src/resampler.cc
61
+ */
62
+
63
+ /**
64
+ * @typedef {Object} OfflineTtsHandle
65
+ * @see src/non-streaming-tts.cc
66
+ */
67
+
68
+ /**
69
+ * @typedef {Object} OnlinePunctuationHandle
70
+ * @see src/punctuation.cc
71
+ */
72
+
73
+ /**
74
+ * @typedef {Object} KeywordSpotterHandle
75
+ * @see src/keyword-spotter.cc
76
+ */
77
+
78
+ /**
79
+ * @typedef {Object} SpeakerEmbeddingExtractorHandle
80
+ * @see src/speaker-identification.cc
81
+ */
82
+
83
+ /**
84
+ * @typedef {Object} SpeakerEmbeddingManagerHandle
85
+ * @see src/speaker-identification.cc
86
+ */
87
+
88
+ /**
89
+ * @typedef {Object} SpokenLanguageIdentificationHandle
90
+ * @see src/spoken-language-identification.cc
91
+ */
92
+
93
+ /**
94
+ * @typedef {Object} OfflineSpeakerDiarizationHandle
95
+ * @see src/non-streaming-speaker-diarization.cc
96
+ */
97
+
98
+ /**
99
+ * @typedef {Object} OfflineSpeechDenoiserHandle
100
+ * @see src/non-streaming-speech-denoiser.cc
101
+ */
102
+
103
+ /**
104
+ * @typedef {Object} OnlineSpeechDenoiserHandle
105
+ * @see src/online-speech-denoiser.cc
56
106
  */
57
107
 
58
108
  /**
@@ -234,19 +284,19 @@
234
284
  * @property {number} [useInverseTextNormalization]
235
285
  */
236
286
 
237
- /**
238
- * Offline Cohere Transcribe model config
239
- * @typedef {Object} OfflineCohereTranscribeModelConfig
240
- * @property {string} [encoder]
241
- * @property {string} [decoder]
242
- * @property {string} [language]
243
- * @property {number} [usePunct]
244
- * @property {number} [useItn]
245
- */
246
-
247
- /**
248
- * Offline model config.
249
- * @typedef {Object} OfflineModelConfig
287
+ /**
288
+ * Offline Cohere Transcribe model config
289
+ * @typedef {Object} OfflineCohereTranscribeModelConfig
290
+ * @property {string} [encoder]
291
+ * @property {string} [decoder]
292
+ * @property {string} [language]
293
+ * @property {number} [usePunct]
294
+ * @property {number} [useItn]
295
+ */
296
+
297
+ /**
298
+ * Offline model config.
299
+ * @typedef {Object} OfflineModelConfig
250
300
  * @property {OfflineTransducerModelConfig} [transducer]
251
301
  * @property {OfflineParaformerModelConfig} [paraformer]
252
302
  * @property {OfflineZipformerCtcModelConfig} [zipformerCtc]
@@ -255,14 +305,14 @@
255
305
  * @property {OfflineMedAsrCtcModelConfig} [medasr]
256
306
  * @property {OfflineDolphinModelConfig} [dolphin]
257
307
  * @property {OfflineNeMoCtcModelConfig} [nemoCtc]
258
- * @property {OfflineCanaryModelConfig} [canary]
259
- * @property {OfflineWhisperModelConfig} [whisper]
260
- * @property {OfflineFireRedAsrModelConfig} [fireRedAsr]
261
- * @property {OfflineMoonshineModelConfig} [moonshine]
262
- * @property {OfflineTdnnModelConfig} [tdnn]
263
- * @property {OfflineSenseVoiceModelConfig} [senseVoice]
264
- * @property {OfflineCohereTranscribeModelConfig} [cohereTranscribe]
265
- * @property {string} [tokens]
308
+ * @property {OfflineCanaryModelConfig} [canary]
309
+ * @property {OfflineWhisperModelConfig} [whisper]
310
+ * @property {OfflineFireRedAsrModelConfig} [fireRedAsr]
311
+ * @property {OfflineMoonshineModelConfig} [moonshine]
312
+ * @property {OfflineTdnnModelConfig} [tdnn]
313
+ * @property {OfflineSenseVoiceModelConfig} [senseVoice]
314
+ * @property {OfflineCohereTranscribeModelConfig} [cohereTranscribe]
315
+ * @property {string} [tokens]
266
316
  * @property {number} [numThreads]
267
317
  * @property {boolean|number} [debug]
268
318
  * @property {string} [provider]
@@ -512,24 +562,24 @@
512
562
  * @property {number} [lengthScale]
513
563
  */
514
564
 
515
- /**
516
- * @typedef {Object} OfflineTtsZipvoiceModelConfig
517
- * @property {string} [tokens]
518
- * @property {string} [encoder]
519
- * @property {string} [decoder]
520
- * @property {string} [vocoder]
521
- * @property {string} [dataDir]
522
- * @property {string} [lexicon]
523
- * @property {number} [featScale]
524
- * @property {number} [tShift]
525
- * @property {number} [targetRms]
526
- * @property {number} [guidanceScale]
527
- */
528
-
529
- /**
530
- * @typedef {Object} OfflineTtsPocketModelConfig
531
- * @property {string} [lmFlow]
532
- * @property {string} [lmMain]
565
+ /**
566
+ * @typedef {Object} OfflineTtsZipvoiceModelConfig
567
+ * @property {string} [tokens]
568
+ * @property {string} [encoder]
569
+ * @property {string} [decoder]
570
+ * @property {string} [vocoder]
571
+ * @property {string} [dataDir]
572
+ * @property {string} [lexicon]
573
+ * @property {number} [featScale]
574
+ * @property {number} [tShift]
575
+ * @property {number} [targetRms]
576
+ * @property {number} [guidanceScale]
577
+ */
578
+
579
+ /**
580
+ * @typedef {Object} OfflineTtsPocketModelConfig
581
+ * @property {string} [lmFlow]
582
+ * @property {string} [lmMain]
533
583
  * @property {string} [encoder]
534
584
  * @property {string} [decoder]
535
585
  * @property {string} [textConditioner]
@@ -542,12 +592,12 @@
542
592
  * Offline TTS model config
543
593
  * @typedef {Object} OfflineTtsModelConfig
544
594
  * @property {OfflineTtsVitsModelConfig} [vits]
545
- * @property {OfflineTtsMatchaModelConfig} [matcha]
546
- * @property {OfflineTtsKokoroModelConfig} [kokoro]
547
- * @property {OfflineTtsKittenModelConfig} [kitten]
548
- * @property {OfflineTtsZipvoiceModelConfig} [zipvoice]
549
- * @property {OfflineTtsPocketModelConfig} [pocket]
550
- */
595
+ * @property {OfflineTtsMatchaModelConfig} [matcha]
596
+ * @property {OfflineTtsKokoroModelConfig} [kokoro]
597
+ * @property {OfflineTtsKittenModelConfig} [kitten]
598
+ * @property {OfflineTtsZipvoiceModelConfig} [zipvoice]
599
+ * @property {OfflineTtsPocketModelConfig} [pocket]
600
+ */
551
601
 
552
602
  /**
553
603
  * Offline TTS configuration (partial, commonly used props).
@@ -559,39 +609,39 @@
559
609
  * @property {string} [provider]
560
610
  */
561
611
 
562
- /**
563
- * Offline Speech Denoiser model config
564
- * @typedef {Object} OfflineSpeechDenoiserGtcrnModelConfig
565
- * @property {string} [model]
566
- */
567
-
568
- /**
569
- * Offline Speech Denoiser model config
570
- * @typedef {Object} OfflineSpeechDenoiserDpdfNetModelConfig
571
- * @property {string} [model]
572
- */
573
-
574
- /**
575
- * Offline Speech Denoiser model config
576
- * @typedef {Object} OfflineSpeechDenoiserModelConfig
577
- * @property {OfflineSpeechDenoiserGtcrnModelConfig} [gtcrn]
578
- * @property {OfflineSpeechDenoiserDpdfNetModelConfig} [dpdfnet]
579
- * @property {number} [numThreads]
580
- * @property {boolean|number} [debug]
581
- * @property {string} [provider]
582
- */
583
-
584
- /**
585
- * Offline Speech Denoiser configuration (partial).
586
- * @typedef {Object} OfflineSpeechDenoiserConfig
587
- * @property {OfflineSpeechDenoiserModelConfig} [model]
588
- */
589
-
590
- /**
591
- * Online Speech Denoiser configuration (partial).
592
- * @typedef {Object} OnlineSpeechDenoiserConfig
593
- * @property {OfflineSpeechDenoiserModelConfig} [model]
594
- */
612
+ /**
613
+ * Offline Speech Denoiser model config
614
+ * @typedef {Object} OfflineSpeechDenoiserGtcrnModelConfig
615
+ * @property {string} [model]
616
+ */
617
+
618
+ /**
619
+ * Offline Speech Denoiser model config
620
+ * @typedef {Object} OfflineSpeechDenoiserDpdfNetModelConfig
621
+ * @property {string} [model]
622
+ */
623
+
624
+ /**
625
+ * Offline Speech Denoiser model config
626
+ * @typedef {Object} OfflineSpeechDenoiserModelConfig
627
+ * @property {OfflineSpeechDenoiserGtcrnModelConfig} [gtcrn]
628
+ * @property {OfflineSpeechDenoiserDpdfNetModelConfig} [dpdfnet]
629
+ * @property {number} [numThreads]
630
+ * @property {boolean|number} [debug]
631
+ * @property {string} [provider]
632
+ */
633
+
634
+ /**
635
+ * Offline Speech Denoiser configuration (partial).
636
+ * @typedef {Object} OfflineSpeechDenoiserConfig
637
+ * @property {OfflineSpeechDenoiserModelConfig} [model]
638
+ */
639
+
640
+ /**
641
+ * Online Speech Denoiser configuration (partial).
642
+ * @typedef {Object} OnlineSpeechDenoiserConfig
643
+ * @property {OfflineSpeechDenoiserModelConfig} [model]
644
+ */
595
645
 
596
646
  /**
597
647
  * Offline speaker segmentation (pyannote) model config
package/vad.js CHANGED
@@ -1,4 +1,5 @@
1
1
  /** @typedef {import('./types').CircularBufferHandle} CircularBufferHandle */
2
+ /** @typedef {import('./types').VoiceActivityDetectorHandle} VoiceActivityDetectorHandle */
2
3
  /** @typedef {import('./types').SpeechSegment} SpeechSegment */
3
4
  /** @typedef {import('./types').VadConfig} VadConfig */
4
5