npm - sherpa-onnx-node - Versions diffs - 1.12.21 → 1.12.23 - Mend

sherpa-onnx-node 1.12.21 → 1.12.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/addon.js +26 -0
package/audio-tagg.js +22 -3
package/keyword-spotter.js +31 -0
package/non-streaming-asr.js +97 -9
package/non-streaming-speaker-diarization.js +13 -13
package/non-streaming-speech-denoiser.js +12 -9
package/non-streaming-tts.js +12 -6
package/package.json +7 -7
package/punctuation.js +17 -1
package/sherpa-onnx.js +4 -0
package/speaker-identification.js +58 -10
package/spoken-language-identification.js +15 -8
package/streaming-asr.js +62 -3
package/types.js +665 -0
package/vad.js +54 -17

package/addon.js CHANGED Viewed

@@ -1,3 +1,5 @@
+/** @typedef {import('./types').WaveObject} WaveObject */
 const os = require('os');
 const path = require('path');
@@ -64,3 +66,27 @@ if (!found) {
   throw new Error(msg)
 }
+/**
+ * Read a wave file from disk.
+ * @function module.exports.readWave
+ * @param {string} filename
+ * @param {boolean} [enableExternalBuffer=true]
+ * @returns {WaveObject}
+ */
+/**
+ * Read a wave from binary buffer.
+ * @function module.exports.readWaveFromBinary
+ * @param {Uint8Array} data - Binary contents of a wave file.
+ * @param {boolean} [enableExternalBuffer=true]
+ * @returns {WaveObject}
+ */
+/**
+ * Write a wave file to disk.
+ * @function module.exports.writeWave
+ * @param {string} filename
+ * @param {WaveObject} obj - { samples: Float32Array, sampleRate: number }
+ * @returns {boolean}
+ */

package/audio-tagg.js CHANGED Viewed

@@ -1,20 +1,39 @@
+/** @typedef {import('./types').AudioTaggingConfig} AudioTaggingConfig */
+/** @typedef {import('./types').AudioEvent} AudioEvent */
+/** @typedef {import('./types').AudioTaggingHandle} AudioTaggingHandle */
+/** @typedef {import('./non-streaming-asr').OfflineStream} OfflineStream */
 const addon = require('./addon.js');
 const non_streaming_asr = require('./non-streaming-asr.js');
+/**
+ * AudioTagging utility.
+ * @class
+ */
 class AudioTagging {
+  /**
+   * Create an AudioTagging instance.
+   * @param {AudioTaggingConfig} config
+   */
   constructor(config) {
     this.handle = addon.createAudioTagging(config);
     this.config = config;
   }
+  /**
+   * Create an offline stream bound to this AudioTagging instance.
+   * @returns {OfflineStream}
+   */
   createStream() {
     return new non_streaming_asr.OfflineStream(
         addon.audioTaggingCreateOfflineStream(this.handle));
   }
-  /* Return an array. Each element is
-   * an object {name: "xxx", prob: xxx, index: xxx};
-   *
+  /**
+   * Compute audio tags from an offline stream.
+   * @param {OfflineStream} stream - An offline stream created by `AudioTagging.createStream()`.
+   * @param {number} [topK=-1] - Return top K results; -1 for all.
+   * @returns {AudioEvent[]}
    */
   compute(stream, topK = -1) {
     return addon.audioTaggingCompute(this.handle, stream.handle, topK);

package/keyword-spotter.js CHANGED Viewed

@@ -1,29 +1,60 @@
+/** @typedef {import('./types').KeywordSpotterConfig} KeywordSpotterConfig */
+/** @typedef {import('./types').KeywordResult} KeywordResult */
+/** @typedef {import('./streaming-asr').OnlineStream} OnlineStream */
 const addon = require('./addon.js');
 const streaming_asr = require('./streaming-asr.js');
+/**
+ * KeywordSpotter handles keyword detection.
+ */
 class KeywordSpotter {
+  /**
+   * @param {KeywordSpotterConfig} config
+   */
   constructor(config) {
     this.handle = addon.createKeywordSpotter(config);
     this.config = config
   }
+  /**
+   * Create an OnlineStream for the spotter.
+   * @returns {OnlineStream}
+   */
   createStream() {
     const handle = addon.createKeywordStream(this.handle);
     return new streaming_asr.OnlineStream(handle);
   }
+  /**
+   * @param {OnlineStream} stream
+   * @returns {boolean}
+   */
   isReady(stream) {
     return addon.isKeywordStreamReady(this.handle, stream.handle);
   }
+  /**
+   * Trigger decode on a stream.
+   * @param {OnlineStream} stream
+   */
   decode(stream) {
     addon.decodeKeywordStream(this.handle, stream.handle);
   }
+  /**
+   * Reset a stream.
+   * @param {OnlineStream} stream
+   */
   reset(stream) {
     addon.resetKeywordStream(this.handle, stream.handle);
   }
+  /**
+   * Get the keyword result for a stream.
+   * @param {OnlineStream} stream
+   * @returns {KeywordResult}
+   */
   getResult(stream) {
     const jsonStr = addon.getKeywordResultAsJson(this.handle, stream.handle);

package/non-streaming-asr.js CHANGED Viewed

@@ -1,40 +1,128 @@
+/** @typedef {import('./types').OfflineStreamObject} OfflineStreamObject */
+/** @typedef {import('./types').Waveform} Waveform */
+/**
+ * @typedef {import('./types').OfflineRecognizerConfig} OfflineRecognizerConfig
+ */
+/**
+ * @typedef {import('./types').OfflineRecognizerResult} OfflineRecognizerResult
+ */
 const addon = require('./addon.js');
+/**
+ * Internal symbol to mark async-created recognizers.
+ * Not accessible unless someone has a reference to this Symbol.
+ */
+const kFromAsyncFactory = Symbol('OfflineRecognizer.fromAsync');
+/**
+ * OfflineStream represents a synchronous offline audio stream.
+ */
 class OfflineStream {
+  /**
+   * @param {OfflineStreamObject|Object} handle
+   */
   constructor(handle) {
     this.handle = handle;
   }
-  // obj is {samples: samples, sampleRate: sampleRate}
-  // samples is a float32 array containing samples in the range [-1, 1]
-  // sampleRate is a number
+  /**
+   * Accept a chunk of waveform samples.
+   * @param {Waveform} obj - { samples: Float32Array, sampleRate: number }
+   */
   acceptWaveform(obj) {
-    addon.acceptWaveformOffline(this.handle, obj)
+    addon.acceptWaveformOffline(this.handle, obj);
   }
 }
+/**
+ * OfflineRecognizer wraps the native offline recognizer.
+ */
 class OfflineRecognizer {
-  constructor(config) {
-    this.handle = addon.createOfflineRecognizer(config);
-    this.config = config
+  /**
+   * Constructor (SYNC path).
+   *
+   * Users call:
+   *   new OfflineRecognizer(config)
+   *
+   * Async factory calls this with an internal descriptor.
+   *
+   * @param {OfflineRecognizerConfig | Object} configOrInternal
+   */
+  constructor(configOrInternal) {
+    // ----- async factory path -----
+    if (configOrInternal && typeof configOrInternal === 'object' &&
+        configOrInternal[kFromAsyncFactory]) {
+      this.handle = configOrInternal.handle;
+      this.config = configOrInternal.config;
+      return;
+    }
+    // ----- sync constructor path -----
+    this.config = configOrInternal;
+    this.handle = addon.createOfflineRecognizer(this.config);
+  }
+  /**
+   * Create an OfflineRecognizer asynchronously (non-blocking).
+   *
+   * @param {OfflineRecognizerConfig} config
+   * @returns {Promise<OfflineRecognizer>}
+   */
+  static async createAsync(config) {
+    const handle = await addon.createOfflineRecognizerAsync(config);
+    return new OfflineRecognizer({
+      [kFromAsyncFactory]: true,
+      handle,
+      config,
+    });
   }
+  /**
+   * Create a new OfflineStream bound to this recognizer.
+   * @returns {OfflineStream}
+   */
   createStream() {
     const handle = addon.createOfflineStream(this.handle);
     return new OfflineStream(handle);
   }
+  /**
+   * Replace the recognizer config at runtime.
+   * @param {OfflineRecognizerConfig} config
+   */
   setConfig(config) {
+    this.config = config;
     addon.offlineRecognizerSetConfig(this.handle, config);
   }
+  /**
+   * Decode an offline stream (synchronous).
+   * @param {OfflineStream} stream
+   */
   decode(stream) {
     addon.decodeOfflineStream(this.handle, stream.handle);
   }
+  /**
+   * Decode an offline stream asynchronously (non-blocking).
+   * @param {OfflineStream} stream
+   * @returns {Promise<OfflineRecognizerResult>}
+   */
+  async decodeAsync(stream) {
+    const jsonStr =
+        await addon.decodeOfflineStreamAsync(this.handle, stream.handle);
+    return JSON.parse(jsonStr);
+  }
+  /**
+   * Get recognition result for a stream.
+   * @param {OfflineStream} stream
+   * @returns {OfflineRecognizerResult}
+   */
   getResult(stream) {
     const jsonStr = addon.getOfflineStreamResultAsJson(stream.handle);
     return JSON.parse(jsonStr);
   }
 }
@@ -42,4 +130,4 @@ class OfflineRecognizer {
 module.exports = {
   OfflineRecognizer,
   OfflineStream,
-}
+};

package/non-streaming-speaker-diarization.js CHANGED Viewed

@@ -1,6 +1,12 @@
+/** @typedef {import('./types').OfflineSpeakerDiarizationConfig} OfflineSpeakerDiarizationConfig */
+/** @typedef {import('./types').SpeakerDiarizationSegment} SpeakerDiarizationSegment */
 const addon = require('./addon.js');
 class OfflineSpeakerDiarization {
+  /**
+   * @param {OfflineSpeakerDiarizationConfig} config
+   */
   constructor(config) {
     this.handle = addon.createOfflineSpeakerDiarization(config);
     this.config = config;
@@ -9,23 +15,17 @@ class OfflineSpeakerDiarization {
   }
   /**
-   * samples is a 1-d float32 array. Each element of the array should be
-   * in the range [-1, 1].
-   *
-   * We assume its sample rate equals to this.sampleRate.
-   *
-   * Returns an array of object, where an object is
-   *
-   *  {
-   *    "start": start_time_in_seconds,
-   *    "end": end_time_in_seconds,
-   *    "speaker": an_integer,
-   *  }
+   * @param {Float32Array} samples - 1-D float32 array in [-1, 1]
+   * @returns {SpeakerDiarizationSegment[]}
    */
   process(samples) {
     return addon.offlineSpeakerDiarizationProcess(this.handle, samples);
   }
+  /**
+   * Set clustering configuration.
+   * @param {{clustering: import('./types').FastClusteringConfig}} config
+   */
   setConfig(config) {
     addon.offlineSpeakerDiarizationSetConfig(this.handle, config);
     this.config.clustering = config.clustering;
@@ -34,4 +34,4 @@ class OfflineSpeakerDiarization {
 module.exports = {
   OfflineSpeakerDiarization,
-}
+}

package/non-streaming-speech-denoiser.js CHANGED Viewed

@@ -1,6 +1,13 @@
+/** @typedef {import('./types').OfflineSpeechDenoiserConfig} OfflineSpeechDenoiserConfig */
+/** @typedef {import('./types').GeneratedAudio} GeneratedAudio */
+/** @typedef {import('./types').AudioProcessRequest} AudioProcessRequest */
 const addon = require('./addon.js');
 class OfflineSpeechDenoiser {
+  /**
+   * @param {OfflineSpeechDenoiserConfig} config
+   */
   constructor(config) {
     this.handle = addon.createOfflineSpeechDenoiser(config);
     this.config = config;
@@ -9,14 +16,10 @@ class OfflineSpeechDenoiser {
         addon.offlineSpeechDenoiserGetSampleRateWrapper(this.handle);
   }
-  /*
-    obj is
-    {samples: samples, sampleRate: sampleRate, enableExternalBuffer: true}
-    samples is a float32 array containing samples in the range [-1, 1]
-    sampleRate is a number
-   return an object {samples: Float32Array, sampleRate: <a number>}
+  /**
+   * Run denoiser synchronously.
+   * @param {AudioProcessRequest} obj - { samples: Float32Array, sampleRate: number, enableExternalBuffer?: boolean }
+   * @returns {GeneratedAudio}
    */
   run(obj) {
     return addon.offlineSpeechDenoiserRunWrapper(this.handle, obj);
@@ -25,4 +28,4 @@ class OfflineSpeechDenoiser {
 module.exports = {
   OfflineSpeechDenoiser,
-}
+}

package/non-streaming-tts.js CHANGED Viewed

@@ -1,6 +1,13 @@
+/** @typedef {import('./types').OfflineTtsConfig} OfflineTtsConfig */
+/** @typedef {import('./types').TtsRequest} TtsRequest */
+/** @typedef {import('./types').GeneratedAudio} GeneratedAudio */
 const addon = require('./addon.js');
 class OfflineTts {
+  /**
+   * @param {OfflineTtsConfig} config
+   */
   constructor(config) {
     this.handle = addon.createOfflineTts(config);
     this.config = config;
@@ -9,11 +16,10 @@ class OfflineTts {
     this.sampleRate = addon.getOfflineTtsSampleRate(this.handle);
   }
-  /*
-   input obj: {text: "xxxx", sid: 0, speed: 1.0}
-   where text is a string, sid is a int32, speed is a float
-   return an object {samples: Float32Array, sampleRate: <a number>}
+  /**
+   * Generate audio synchronously.
+   * @param {TtsRequest} obj
+   * @returns {GeneratedAudio}
    */
   generate(obj) {
     return addon.offlineTtsGenerate(this.handle, obj);
@@ -22,4 +28,4 @@ class OfflineTts {
 module.exports = {
   OfflineTts,
-}
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "sherpa-onnx-node",
-  "version": "1.12.21",
+  "version": "1.12.23",
   "description": "Speech-to-text, text-to-speech, speaker diarization, and speech enhancement using Next-gen Kaldi without internet connection",
   "main": "sherpa-onnx.js",
   "scripts": {
@@ -57,11 +57,11 @@
   },
   "homepage": "https://github.com/csukuangfj/sherpa-onnx#readme",
   "optionalDependencies": {
-    "sherpa-onnx-darwin-arm64": "^1.12.21",
-    "sherpa-onnx-darwin-x64": "^1.12.21",
-    "sherpa-onnx-linux-x64": "^1.12.21",
-    "sherpa-onnx-linux-arm64": "^1.12.21",
-    "sherpa-onnx-win-x64": "^1.12.21",
-    "sherpa-onnx-win-ia32": "^1.12.21"
+    "sherpa-onnx-darwin-arm64": "^1.12.23",
+    "sherpa-onnx-darwin-x64": "^1.12.23",
+    "sherpa-onnx-linux-x64": "^1.12.23",
+    "sherpa-onnx-linux-arm64": "^1.12.23",
+    "sherpa-onnx-win-x64": "^1.12.23",
+    "sherpa-onnx-win-ia32": "^1.12.23"
   }
 }

package/punctuation.js CHANGED Viewed

@@ -1,20 +1,36 @@
+/** @typedef {import('./types').OfflinePunctuationHandle} OfflinePunctuationHandle */
+/** @typedef {import('./types').OfflinePunctuationConfig} OfflinePunctuationConfig */
+/** @typedef {import('./types').OnlinePunctuationConfig} OnlinePunctuationConfig */
 const addon = require('./addon.js');
 class OfflinePunctuation {
+  /**
+   * @param {OfflinePunctuationConfig} config
+   */
   constructor(config) {
     this.handle = addon.createOfflinePunctuation(config);
     this.config = config;
   }
+  /**
+   * Add punctuation to `text` and return the punctuated text.
+   * @param {string} text
+   * @returns {string}
+   */
   addPunct(text) {
     return addon.offlinePunctuationAddPunct(this.handle, text);
   }
 }
 class OnlinePunctuation {
+  /**
+   * @param {OnlinePunctuationConfig} config
+   */
   constructor(config) {
     this.handle = addon.createOnlinePunctuation(config);
     this.config = config;
   }
+  /** @param {string} text @returns {string} */
   addPunct(text) {
     return addon.onlinePunctuationAddPunct(this.handle, text);
   }
@@ -23,4 +39,4 @@ class OnlinePunctuation {
 module.exports = {
   OfflinePunctuation,
   OnlinePunctuation,
-}
+}

package/sherpa-onnx.js CHANGED Viewed

@@ -1,3 +1,7 @@
+/** @typedef {import('./types').WaveObject} WaveObject */
+/** @typedef {import('./types').OnlineRecognizerResult} OnlineRecognizerResult */
+/** @typedef {import('./types').OfflineRecognizerResult} OfflineRecognizerResult */
 const addon = require('./addon.js')
 const streaming_asr = require('./streaming-asr.js');
 const non_streaming_asr = require('./non-streaming-asr.js');

package/speaker-identification.js CHANGED Viewed

@@ -1,29 +1,58 @@
+/** @typedef {import('./types').SpeakerEmbeddingEntry} SpeakerEmbeddingEntry */
+/** @typedef {import('./types').SpeakerEmbeddingManagerSearchObj} SpeakerEmbeddingManagerSearchObj */
+/** @typedef {import('./types').SpeakerEmbeddingManagerVerifyObj} SpeakerEmbeddingManagerVerifyObj */
+/** @typedef {import('./types').SpeakerEmbeddingExtractorConfig} SpeakerEmbeddingExtractorConfig */
+/** @typedef {import('./streaming-asr').OnlineStream} OnlineStream */
 const addon = require('./addon.js');
 const streaming_asr = require('./streaming-asr.js');
+/**
+ * SpeakerEmbeddingExtractor wraps native speaker embedding extractor.
+ */
 class SpeakerEmbeddingExtractor {
+  /**
+   * @param {SpeakerEmbeddingExtractorConfig} config
+   */
   constructor(config) {
     this.handle = addon.createSpeakerEmbeddingExtractor(config);
     this.config = config;
     this.dim = addon.speakerEmbeddingExtractorDim(this.handle);
   }
+  /**
+   * @returns {OnlineStream}
+   */
   createStream() {
     return new streaming_asr.OnlineStream(
         addon.speakerEmbeddingExtractorCreateStream(this.handle));
   }
+  /**
+   * @param {OnlineStream} stream
+   * @returns {boolean}
+   */
   isReady(stream) {
     return addon.speakerEmbeddingExtractorIsReady(this.handle, stream.handle);
   }
-  // return a float32 array
+  /**
+   * Compute embedding and return a Float32Array
+   * @param {OnlineStream} stream
+   * @param {boolean} [enableExternalBuffer=true]
+   * @returns {Float32Array}
+   */
   compute(stream, enableExternalBuffer = true) {
     return addon.speakerEmbeddingExtractorComputeEmbedding(
         this.handle, stream.handle, enableExternalBuffer);
   }
 }
+/**
+ * Flattens an array of Float32Arrays into a single Float32Array.
+ * @param {Float32Array[]} arrayList
+ * @returns {Float32Array}
+ */
 function flatten(arrayList) {
   let n = 0;
   for (let i = 0; i < arrayList.length; ++i) {
@@ -39,22 +68,29 @@ function flatten(arrayList) {
   return ans;
 }
+/**
+ * Manager for speaker embeddings.
+ */
 class SpeakerEmbeddingManager {
+  /**
+   * @param {number} dim - The embedding dimension
+   */
   constructor(dim) {
     this.handle = addon.createSpeakerEmbeddingManager(dim);
     this.dim = dim;
   }
-  /*
-   obj = {name: "xxx", v: a-float32-array}
+  /**
+   * @param {SpeakerEmbeddingEntry} obj
+   * @returns {boolean}
    */
   add(obj) {
     return addon.speakerEmbeddingManagerAdd(this.handle, obj);
   }
-  /*
-   * obj =
-   * {name: "xxx", v: [float32_array1, float32_array2, ..., float32_arrayn]
+  /**
+   * @param {{name:string, v: Float32Array[]}} obj
+   * @returns {boolean}
    */
   addMulti(obj) {
     const c = {
@@ -65,32 +101,44 @@ class SpeakerEmbeddingManager {
     return addon.speakerEmbeddingManagerAddListFlattened(this.handle, c);
   }
+  /**
+   * @param {string} name
+   * @returns {boolean}
+   */
   remove(name) {
     return addon.speakerEmbeddingManagerRemove(this.handle, name);
   }
-  /*
-   * obj = {v: a-float32-array, threshold: a-float }
+  /**
+   * @param {SpeakerEmbeddingManagerSearchObj} obj
+   * @returns {string}
    */
   search(obj) {
     return addon.speakerEmbeddingManagerSearch(this.handle, obj);
   }
-  /*
-   * obj = {name: 'xxx', v: a-float32-array, threshold: a-float }
+  /**
+   * @param {SpeakerEmbeddingManagerVerifyObj} obj
+   * @returns {boolean}
    */
   verify(obj) {
     return addon.speakerEmbeddingManagerVerify(this.handle, obj);
   }
+  /**
+   * @param {string} name
+   * @returns {boolean}
+   */
   contains(name) {
     return addon.speakerEmbeddingManagerContains(this.handle, name);
   }
+  /** @returns {number} */
   getNumSpeakers() {
     return addon.speakerEmbeddingManagerNumSpeakers(this.handle);
   }
+  /** @returns {string[]} */
   getAllSpeakerNames() {
     return addon.speakerEmbeddingManagerGetAllSpeakers(this.handle);
   }

package/spoken-language-identification.js CHANGED Viewed

@@ -1,24 +1,31 @@
+/** @typedef {import('./types').SpokenLanguageIdentificationConfig} SpokenLanguageIdentificationConfig */
+/** @typedef {import('./non-streaming-asr').OfflineStream} OfflineStream */
 const addon = require('./addon.js');
 const non_streaming_asr = require('./non-streaming-asr.js');
 class SpokenLanguageIdentification {
+  /**
+   * @param {SpokenLanguageIdentificationConfig} config
+   */
   constructor(config) {
     this.handle = addon.createSpokenLanguageIdentification(config);
     this.config = config;
   }
+  /**
+   * @returns {OfflineStream}
+   */
   createStream() {
     return new non_streaming_asr.OfflineStream(
         addon.createSpokenLanguageIdentificationOfflineStream(this.handle));
   }
-  // return a string containing the language code (2 characters),
-  // e.g., en, de, fr, es, zh
-  // en -> English
-  // de -> German
-  // fr -> French
-  // es -> Spanish
-  // zh -> Chinese
+  /**
+   * Return a 2-letter language code, e.g. 'en', 'de', 'fr', 'es', 'zh'
+   * @param {OfflineStream} stream
+   * @returns {string}
+   */
   compute(stream) {
     return addon.spokenLanguageIdentificationCompute(
         this.handle, stream.handle);
@@ -27,4 +34,4 @@ class SpokenLanguageIdentification {
 module.exports = {
   SpokenLanguageIdentification,
-}
+}