@ai-coustics/aic-sdk 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +147 -47
  2. package/index.js +470 -111
  3. package/package.json +9 -8
package/README.md CHANGED
@@ -1,10 +1,11 @@
1
- # ai-coustics Speech Enhancement SDK for Node.js
1
+ # aic-sdk - Node.js Bindings for ai-coustics SDK
2
2
 
3
- Node.js bindings for the ai-coustics Speech Enhancement SDK.
3
+ Node.js wrapper for the ai-coustics Speech Enhancement SDK.
4
4
 
5
- ## Prerequisites
5
+ For comprehensive documentation, visit [docs.ai-coustics.com](https://docs.ai-coustics.com).
6
6
 
7
- - SDK license key from [ai-coustics Developer Portal](https://developers.ai-coustics.io)
7
+ > [!NOTE]
8
+ > This SDK requires a license key. Generate your key at [developers.ai-coustics.io](https://developers.ai-coustics.io).
8
9
 
9
10
  ## Installation
10
11
 
@@ -12,63 +13,162 @@ Node.js bindings for the ai-coustics Speech Enhancement SDK.
12
13
  npm install @ai-coustics/aic-sdk
13
14
  ```
14
15
 
15
- ### Supported Platforms
16
+ ## Quick Start
16
17
 
17
- - Linux: x64, ARM64 (GNU libc)
18
- - macOS: x64, ARM64
19
- - Windows: x64, ARM64 (MSVC)
18
+ ```javascript
19
+ const { Model, Processor } = require("@ai-coustics/aic-sdk");
20
+
21
+ // Get your license key from the environment variable
22
+ const licenseKey = process.env.AIC_SDK_LICENSE;
23
+
24
+ // Download and load a model (or download manually at https://artifacts.ai-coustics.io/)
25
+ const modelPath = Model.download("sparrow-xxs-48khz", "./models");
26
+ const model = Model.fromFile(modelPath);
27
+
28
+ // Get optimal configuration
29
+ const sampleRate = model.getOptimalSampleRate();
30
+ const numFrames = model.getOptimalNumFrames(sampleRate);
31
+ const numChannels = 2;
32
+
33
+ // Create and initialize processor
34
+ const processor = new Processor(model, licenseKey);
35
+ processor.initialize(sampleRate, numChannels, numFrames, false);
36
+
37
+ // Process audio (Float32Array, interleaved: [L0, R0, L1, R1, ...])
38
+ const audioBuffer = new Float32Array(numChannels * numFrames);
39
+ processor.processInterleaved(audioBuffer);
40
+ ```
41
+
42
+ ## Usage
43
+
44
+ ### SDK Information
45
+
46
+ ```javascript
47
+ const { getVersion, getCompatibleModelVersion } = require("@ai-coustics/aic-sdk");
48
+
49
+ // Get SDK version
50
+ console.log(`SDK version: ${getVersion()}`);
20
51
 
21
- ## Example
52
+ // Get compatible model version
53
+ console.log(`Compatible model version: ${getCompatibleModelVersion()}`);
54
+ ```
55
+
56
+ ### Loading Models
57
+
58
+ Download models and find available IDs at [artifacts.ai-coustics.io](https://artifacts.ai-coustics.io/).
59
+
60
+ #### From File
61
+ ```javascript
62
+ const model = Model.fromFile("path/to/model.aicmodel");
63
+ ```
22
64
 
65
+ #### Download from CDN
23
66
  ```javascript
24
- const { Model, ModelType, EnhancementParameter } = require('@ai-coustics/aic-sdk');
67
+ const modelPath = Model.download("sparrow-xxs-48khz", "./models");
68
+ const model = Model.fromFile(modelPath);
69
+ ```
70
+
71
+ ### Model Information
72
+
73
+ ```javascript
74
+ // Get model ID
75
+ const modelId = model.getId();
25
76
 
26
- const model = new Model(ModelType.QuailS48, process.env.AIC_SDK_LICENSE);
77
+ // Get optimal sample rate for the model
78
+ const optimalRate = model.getOptimalSampleRate();
27
79
 
28
- // Get optimal settings
29
- const sampleRate = model.optimalSampleRate();
30
- const numFrames = model.optimalNumFrames(sampleRate);
80
+ // Get optimal frame count for a specific sample rate
81
+ const optimalFrames = model.getOptimalNumFrames(48000);
82
+ ```
31
83
 
32
- // Initialize for stereo audio
33
- model.initialize(sampleRate, 2, numFrames, false);
84
+ ### Configuring the Processor
85
+
86
+ ```javascript
87
+ // Create processor
88
+ const processor = new Processor(model, licenseKey);
89
+
90
+ // Initialize with audio settings
91
+ processor.initialize(
92
+ sampleRate, // Sample rate in Hz (8000 - 192000)
93
+ numChannels, // Number of audio channels
94
+ numFrames, // Samples per channel per processing call
95
+ allowVariableFrames // Allow variable frame sizes (default: false)
96
+ );
97
+ ```
98
+
99
+ ### Processing Audio
100
+
101
+ ```javascript
102
+ // Interleaved audio: [L0, R0, L1, R1, ...]
103
+ const buffer = new Float32Array(numChannels * numFrames);
104
+ processor.processInterleaved(buffer);
105
+
106
+ // Sequential audio: [L0, L1, ..., R0, R1, ...]
107
+ processor.processSequential(buffer);
108
+
109
+ // Planar audio: separate buffer per channel
110
+ const left = new Float32Array(numFrames);
111
+ const right = new Float32Array(numFrames);
112
+ processor.processPlanar([left, right]);
113
+ ```
114
+
115
+ ### Processor Context
116
+
117
+ ```javascript
118
+ const { ProcessorParameter } = require("@ai-coustics/aic-sdk");
119
+
120
+ // Get processor context
121
+ const procCtx = processor.getProcessorContext();
122
+
123
+ // Get output delay in samples
124
+ const delay = procCtx.getOutputDelay();
125
+
126
+ // Reset processor state (clears internal buffers)
127
+ procCtx.reset();
34
128
 
35
129
  // Set enhancement parameters
36
- model.setParameter(EnhancementParameter.EnhancementLevel, 0.7);
37
- model.setParameter(EnhancementParameter.VoiceGain, 1.5);
38
-
39
- // Process interleaved audio
40
- const interleavedBuffer = new Float32Array(2 * numFrames);
41
- model.processInterleaved(interleavedBuffer, 2, numFrames);
42
-
43
- // Or process planar audio
44
- const planarBuffers = [
45
- new Float32Array(numFrames), // Left channel
46
- new Float32Array(numFrames), // Right channel
47
- ];
48
- model.processPlanar(planarBuffers);
130
+ procCtx.setParameter(ProcessorParameter.EnhancementLevel, 0.8);
131
+ procCtx.setParameter(ProcessorParameter.VoiceGain, 1.5);
132
+ procCtx.setParameter(ProcessorParameter.Bypass, 0.0);
133
+
134
+ // Get parameter values
135
+ const level = procCtx.getParameter(ProcessorParameter.EnhancementLevel);
136
+ console.log(`Enhancement level: ${level}`);
137
+ ```
138
+
139
+ ### Voice Activity Detection (VAD)
140
+
141
+ ```javascript
142
+ const { VadParameter } = require("@ai-coustics/aic-sdk");
143
+
144
+ // Get VAD context from processor
145
+ const vadCtx = processor.getVadContext();
146
+
147
+ // Configure VAD parameters
148
+ vadCtx.setParameter(VadParameter.Sensitivity, 6.0);
149
+ vadCtx.setParameter(VadParameter.SpeechHoldDuration, 0.05);
150
+ vadCtx.setParameter(VadParameter.MinimumSpeechDuration, 0.0);
151
+
152
+ // Get parameter values
153
+ const sensitivity = vadCtx.getParameter(VadParameter.Sensitivity);
154
+ console.log(`VAD sensitivity: ${sensitivity}`);
155
+
156
+ // Check for speech (after processing audio through the processor)
157
+ if (vadCtx.isSpeechDetected()) {
158
+ console.log("Speech detected!");
159
+ }
49
160
  ```
50
161
 
51
- ## Links
162
+ ## Examples
52
163
 
53
- - [Example Usage](examples/basic.js)
54
- - [API Reference](index.js)
55
- - [C SDK Reference](https://github.com/ai-coustics/aic-sdk-c/blob/HEAD/sdk-reference.md)
56
- - [Documentation](https://docs.ai-coustics.com/)
57
- - [Issues](https://github.com/ai-coustics/aic-sdk-node/issues)
164
+ See the [`basic.js`](examples/basic.js) file for a complete working example.
58
165
 
59
- ### Other SDKs
166
+ ## Documentation
60
167
 
61
- | Platform | Repository |
62
- |----------|------------|
63
- | **C** | [aic-sdk-c](https://github.com/ai-coustics/aic-sdk-c) |
64
- | **C++** | [aic-sdk-cpp](https://github.com/ai-coustics/aic-sdk-cpp) |
65
- | **Python** | [aic-sdk-py](https://github.com/ai-coustics/aic-sdk-py) |
66
- | **Rust** | [aic-sdk-rs](https://github.com/ai-coustics/aic-sdk-rs) |
67
- | **Web (WASM)** | [aic-sdk-wasm](https://github.com/ai-coustics/aic-sdk-wasm) |
68
- | **Demo Plugin** | [aic-sdk-plugin](https://github.com/ai-coustics/aic-sdk-plugin) |
168
+ - **Full Documentation**: [docs.ai-coustics.com](https://docs.ai-coustics.com)
169
+ - **Node.js API Reference**: See the [index.js](index.js) for detailed JSDoc documentation
170
+ - **Available Models**: [artifacts.ai-coustics.io](https://artifacts.ai-coustics.io)
69
171
 
70
172
  ## License
71
173
 
72
- Dual-licensed:
73
- - Node.js wrapper code: Apache License 2.0
74
- - AIC SDK binaries: Proprietary AIC-SDK Binary License Agreement
174
+ This Node.js wrapper is distributed under the Apache 2.0 license. The core C SDK is distributed under the proprietary AIC-SDK license.
package/index.js CHANGED
@@ -1,7 +1,6 @@
1
1
  // Platform-specific binary loader
2
2
  let native;
3
3
  try {
4
- // Try to load platform-specific binary from optional dependencies
5
4
  const platform = process.platform;
6
5
  const arch = process.arch;
7
6
 
@@ -21,11 +20,9 @@ try {
21
20
  try {
22
21
  native = require(platformPackage);
23
22
  } catch (e) {
24
- // Fall back to local binary
25
23
  native = require("./index.node");
26
24
  }
27
25
  } else {
28
- // Fall back to local binary
29
26
  native = require("./index.node");
30
27
  }
31
28
  } catch (e) {
@@ -37,204 +34,566 @@ try {
37
34
  }
38
35
 
39
36
  /**
40
- * Model types available in the SDK
37
+ * Configurable parameters for audio enhancement.
38
+ * @enum {number}
41
39
  */
42
- const ModelType = {
43
- QuailL48: "QuailL48",
44
- QuailL16: "QuailL16",
45
- QuailL8: "QuailL8",
46
- QuailS48: "QuailS48",
47
- QuailS16: "QuailS16",
48
- QuailS8: "QuailS8",
49
- QuailXs: "QuailXs",
50
- QuailXxs: "QuailXxs",
51
- QuailSttL16: "QuailSttL16",
52
- QuailSttL8: "QuailSttL8",
53
- QuailSttS16: "QuailSttS16",
54
- QuailSttS8: "QuailSttS8",
55
- QuailVfSttL16: "QuailVfSttL16",
56
- };
40
+ const ProcessorParameter = {
41
+ /**
42
+ * Controls whether audio processing is bypassed while preserving algorithmic delay.
43
+ *
44
+ * When enabled, the input audio passes through unmodified, but the output is still
45
+ * delayed by the same amount as during normal processing. This ensures seamless
46
+ * transitions when toggling enhancement on/off without audible clicks or timing shifts.
47
+ *
48
+ * Range: 0.0 to 1.0
49
+ * - 0.0: Enhancement active (normal processing)
50
+ * - 1.0: Bypass enabled (latency-compensated passthrough)
51
+ *
52
+ * Default: 0.0
53
+ */
54
+ Bypass: native.PROCESSOR_PARAM_BYPASS,
57
55
 
58
- /**
59
- * Enhancement parameters
60
- */
61
- const EnhancementParameter = {
62
- Bypass: native.ENHANCEMENT_PARAM_BYPASS,
63
- EnhancementLevel: native.ENHANCEMENT_PARAM_ENHANCEMENT_LEVEL,
64
- VoiceGain: native.ENHANCEMENT_PARAM_VOICE_GAIN,
56
+ /**
57
+ * Controls the intensity of speech enhancement processing.
58
+ *
59
+ * Range: 0.0 to 1.0
60
+ * - 0.0: Bypass mode - original signal passes through unchanged
61
+ * - 1.0: Full enhancement - maximum noise reduction but also more audible artifacts
62
+ *
63
+ * Default: 1.0
64
+ */
65
+ EnhancementLevel: native.PROCESSOR_PARAM_ENHANCEMENT_LEVEL,
66
+
67
+ /**
68
+ * Compensates for perceived volume reduction after noise removal.
69
+ *
70
+ * Range: 0.1 to 4.0 (linear amplitude multiplier)
71
+ * - 0.1: Significant volume reduction (-20 dB)
72
+ * - 1.0: No gain change (0 dB, default)
73
+ * - 2.0: Double amplitude (+6 dB)
74
+ * - 4.0: Maximum boost (+12 dB)
75
+ *
76
+ * Formula: Gain (dB) = 20 × log₁₀(value)
77
+ *
78
+ * Default: 1.0
79
+ */
80
+ VoiceGain: native.PROCESSOR_PARAM_VOICE_GAIN,
65
81
  };
66
82
 
67
83
  /**
68
- * VAD (Voice Activity Detection) parameters
84
+ * Configurable parameters for Voice Activity Detection.
85
+ * @enum {number}
69
86
  */
70
87
  const VadParameter = {
88
+ /**
89
+ * Controls for how long the VAD continues to detect speech after the audio signal
90
+ * no longer contains speech.
91
+ *
92
+ * The VAD reports speech detected if the audio signal contained speech in at least 50%
93
+ * of the frames processed in the last speech_hold_duration seconds.
94
+ *
95
+ * This affects the stability of speech detected -> not detected transitions.
96
+ *
97
+ * Note: The VAD returns a value per processed buffer, so this duration is rounded
98
+ * to the closest model window length.
99
+ *
100
+ * Range: 0.0 to 20x model window length (value in seconds)
101
+ * Default: 0.05 (50 ms)
102
+ */
71
103
  SpeechHoldDuration: native.VAD_PARAM_SPEECH_HOLD_DURATION,
104
+
105
+ /**
106
+ * Controls the sensitivity (energy threshold) of the VAD.
107
+ *
108
+ * This value is used by the VAD as the threshold a speech audio signal's energy
109
+ * has to exceed in order to be considered speech.
110
+ *
111
+ * Range: 1.0 to 15.0
112
+ * Formula: Energy threshold = 10 ^ (-sensitivity)
113
+ * Default: 6.0
114
+ */
72
115
  Sensitivity: native.VAD_PARAM_SENSITIVITY,
116
+
117
+ /**
118
+ * Controls for how long speech needs to be present in the audio signal before
119
+ * the VAD considers it speech.
120
+ *
121
+ * This affects the stability of speech not detected -> detected transitions.
122
+ *
123
+ * Note: The VAD returns a value per processed buffer, so this duration is rounded
124
+ * to the closest model window length.
125
+ *
126
+ * Range: 0.0 to 1.0 (value in seconds)
127
+ * Default: 0.0
128
+ */
73
129
  MinimumSpeechDuration: native.VAD_PARAM_MINIMUM_SPEECH_DURATION,
74
130
  };
75
131
 
76
132
  /**
77
- * Voice Activity Detector
133
+ * Context for managing processor state and parameters.
134
+ * Created via Processor.getProcessorContext().
135
+ */
136
+ class ProcessorContext {
137
+ constructor(nativeContext) {
138
+ this._context = nativeContext;
139
+ }
140
+
141
+ /**
142
+ * Clears all internal state and buffers.
143
+ *
144
+ * Call this when the audio stream is interrupted or when seeking
145
+ * to prevent artifacts from previous audio content.
146
+ *
147
+ * The processor stays initialized to the configured settings.
148
+ *
149
+ * Thread Safety: Real-time safe. Can be called from audio processing threads.
150
+ */
151
+ reset() {
152
+ native.processorContextReset(this._context);
153
+ }
154
+
155
+ /**
156
+ * Modifies a processor parameter.
157
+ *
158
+ * All parameters can be changed during audio processing.
159
+ * This function can be called from any thread.
160
+ *
161
+ * @param {ProcessorParameter} parameter - Parameter to modify
162
+ * @param {number} value - New parameter value. See parameter documentation for ranges
163
+ * @throws {Error} If the parameter value is out of range.
164
+ *
165
+ * @example
166
+ * processorContext.setParameter(ProcessorParameter.EnhancementLevel, 0.8);
167
+ */
168
+ setParameter(parameter, value) {
169
+ native.processorContextSetParameter(this._context, parameter, value);
170
+ }
171
+
172
+ /**
173
+ * Retrieves the current value of a parameter.
174
+ *
175
+ * This function can be called from any thread.
176
+ *
177
+ * @param {ProcessorParameter} parameter - Parameter to query
178
+ * @returns {number} The current parameter value.
179
+ *
180
+ * @example
181
+ * const level = processorContext.getParameter(ProcessorParameter.EnhancementLevel);
182
+ */
183
+ getParameter(parameter) {
184
+ return native.processorContextGetParameter(this._context, parameter);
185
+ }
186
+
187
+ /**
188
+ * Returns the total output delay in samples for the current audio configuration.
189
+ *
190
+ * This function provides the complete end-to-end latency introduced by the model,
191
+ * which includes both algorithmic processing delay and any buffering overhead.
192
+ * Use this value to synchronize enhanced audio with other streams or to implement
193
+ * delay compensation in your application.
194
+ *
195
+ * Delay behavior:
196
+ * - Before initialization: Returns the base processing delay using the model's
197
+ * optimal frame size at its native sample rate
198
+ * - After initialization: Returns the actual delay for your specific configuration,
199
+ * including any additional buffering introduced by non-optimal frame sizes
200
+ *
201
+ * Important: The delay value is always expressed in samples at the sample rate
202
+ * you configured during initialize(). To convert to time units:
203
+ * delay_ms = (delay_samples * 1000) / sample_rate
204
+ *
205
+ * Note: Using frame sizes different from the optimal value returned by
206
+ * Model.getOptimalNumFrames() will increase the delay beyond the model's base latency.
207
+ *
208
+ * @returns {number} The delay in samples.
209
+ *
210
+ * @example
211
+ * const delay = processorContext.getOutputDelay();
212
+ * console.log(`Output delay: ${delay} samples`);
213
+ */
214
+ getOutputDelay() {
215
+ return native.processorContextGetOutputDelay(this._context);
216
+ }
217
+ }
218
+
219
+ /**
220
+ * Voice Activity Detector backed by an ai-coustics speech enhancement model.
221
+ *
222
+ * The VAD works automatically using the enhanced audio output of the model
223
+ * that created the VAD.
224
+ *
225
+ * Important:
226
+ * - The latency of the VAD prediction is equal to the backing model's processing latency.
227
+ * - If the backing model stops being processed, the VAD will not update its speech detection prediction.
228
+ *
229
+ * Created via Processor.getVadContext().
230
+ *
231
+ * @example
232
+ * const vad = processor.getVadContext();
233
+ * vad.setParameter(VadParameter.Sensitivity, 5.0);
234
+ * if (vad.isSpeechDetected()) {
235
+ * console.log("Speech detected!");
236
+ * }
78
237
  */
79
- class Vad {
80
- constructor(nativeVad) {
81
- this._vad = nativeVad;
238
+ class VadContext {
239
+ constructor(nativeContext) {
240
+ this._context = nativeContext;
82
241
  }
83
242
 
84
243
  /**
85
- * Check if speech is detected
86
- * @returns {boolean}
244
+ * Returns the VAD's prediction.
245
+ *
246
+ * Important:
247
+ * - The latency of the VAD prediction is equal to the backing model's processing latency.
248
+ * - If the backing model stops being processed, the VAD will not update its speech detection prediction.
249
+ *
250
+ * @returns {boolean} True if speech is detected, False otherwise.
87
251
  */
88
252
  isSpeechDetected() {
89
- return native.vadIsSpeechDetected(this._vad);
253
+ return native.vadContextIsSpeechDetected(this._context);
90
254
  }
91
255
 
92
256
  /**
93
- * Set a VAD parameter
94
- * @param {number} parameter - Parameter constant from VadParameter
95
- * @param {number} value - Parameter value
96
- * @throws {Error} If parameter setting fails (invalid parameter, out of range, etc.)
257
+ * Modifies a VAD parameter.
258
+ *
259
+ * @param {VadParameter} parameter - Parameter to modify
260
+ * @param {number} value - New parameter value. See parameter documentation for ranges
261
+ * @throws {Error} If the parameter value is out of range.
262
+ *
263
+ * @example
264
+ * vad.setParameter(VadParameter.SpeechHoldDuration, 0.08);
265
+ * vad.setParameter(VadParameter.Sensitivity, 5.0);
97
266
  */
98
267
  setParameter(parameter, value) {
99
- native.vadSetParameter(this._vad, parameter, value);
268
+ native.vadContextSetParameter(this._context, parameter, value);
100
269
  }
101
270
 
102
271
  /**
103
- * Get a VAD parameter value
104
- * @param {number} parameter - Parameter constant from VadParameter
105
- * @returns {number}
272
+ * Retrieves the current value of a VAD parameter.
273
+ *
274
+ * @param {VadParameter} parameter - Parameter to query
275
+ * @returns {number} The current parameter value.
276
+ *
277
+ * @example
278
+ * const sensitivity = vad.getParameter(VadParameter.Sensitivity);
279
+ * console.log(`Current sensitivity: ${sensitivity}`);
106
280
  */
107
281
  getParameter(parameter) {
108
- return native.vadGetParameter(this._vad, parameter);
282
+ return native.vadContextGetParameter(this._context, parameter);
109
283
  }
110
284
  }
111
285
 
112
286
  /**
113
- * AI-Coustics audio enhancement model
287
+ * High-level wrapper for the ai-coustics audio enhancement model.
288
+ *
289
+ * This class provides a safe, JavaScript-friendly interface to the underlying native library.
290
+ * It handles memory management automatically.
291
+ *
292
+ * @example
293
+ * const model = Model.fromFile("/path/to/model.aicmodel");
294
+ * const processor = new Processor(model, licenseKey);
295
+ * processor.initialize(model.getOptimalSampleRate(), 2, model.getOptimalNumFrames(sampleRate), false);
114
296
  */
115
297
  class Model {
298
+ constructor(nativeModel) {
299
+ this._model = nativeModel;
300
+ }
301
+
116
302
  /**
117
- * Create a new model instance
118
- * @param {string} modelType - Model type from ModelType enum
119
- * @param {string} licenseKey - SDK license key
120
- * @throws {Error} If model creation fails (invalid license, unsupported model type, etc.)
303
+ * Creates a new audio enhancement model instance from a file.
304
+ *
305
+ * Multiple models can be created to process different audio streams simultaneously
306
+ * or to switch between different enhancement algorithms during runtime.
307
+ *
308
+ * @param {string} path - Path to the model file (.aicmodel). You can download models manually
309
+ * from https://artifacts.ai-coustics.io or use Model.download() to fetch them programmatically.
310
+ * @returns {Model} A new Model instance.
311
+ * @throws {Error} If model creation fails.
312
+ *
313
+ * @see https://artifacts.ai-coustics.io for available model IDs and downloads.
314
+ *
315
+ * @example
316
+ * const model = Model.fromFile("/path/to/model.aicmodel");
121
317
  */
122
- constructor(modelType, licenseKey) {
123
- this._model = native.modelNew(modelType, licenseKey);
318
+ static fromFile(path) {
319
+ const nativeModel = native.modelFromFile(path);
320
+ return new Model(nativeModel);
124
321
  }
125
322
 
126
323
  /**
127
- * Get the optimal sample rate for this model
128
- * @returns {number} Sample rate in Hz
324
+ * Downloads a model file from the ai-coustics artifact CDN.
325
+ *
326
+ * This method fetches the model manifest, checks whether the requested model
327
+ * exists in a version compatible with this library, and downloads the model
328
+ * file into the provided directory.
329
+ *
330
+ * Note: This is a blocking operation.
331
+ *
332
+ * @param {string} modelId - The model identifier as listed in the manifest (e.g. "sparrow-l-16khz").
333
+ * Find available model IDs at https://artifacts.ai-coustics.io
334
+ * @param {string} downloadDir - Directory where the downloaded model file should be stored
335
+ * @returns {string} The full path to the downloaded model file.
336
+ * @throws {Error} If the download operation fails.
337
+ *
338
+ * @see https://artifacts.ai-coustics.io for available model IDs.
339
+ *
340
+ * @example
341
+ * const path = Model.download("sparrow-l-16khz", "/tmp/models");
342
+ * const model = Model.fromFile(path);
129
343
  */
130
- optimalSampleRate() {
131
- return native.modelOptimalSampleRate(this._model);
344
+ static download(modelId, downloadDir) {
345
+ return native.modelDownload(modelId, downloadDir);
132
346
  }
133
347
 
134
348
  /**
135
- * Get the optimal number of frames for a given sample rate
136
- * @param {number} sampleRate - Sample rate in Hz
137
- * @returns {number} Number of frames
349
+ * Returns the model identifier string.
350
+ *
351
+ * @returns {string} The model ID string.
138
352
  */
139
- optimalNumFrames(sampleRate) {
140
- return native.modelOptimalNumFrames(this._model, sampleRate);
353
+ getId() {
354
+ return native.modelId(this._model);
141
355
  }
142
356
 
143
357
  /**
144
- * Initialize the model with audio configuration
145
- * @param {number} sampleRate - Sample rate in Hz
146
- * @param {number} numChannels - Number of audio channels
147
- * @param {number} numFrames - Number of frames per process call
148
- * @param {boolean} allowVariableFrames - Allow variable frame counts
149
- * @throws {Error} If initialization fails (invalid parameters)
358
+ * Retrieves the native sample rate of the model.
359
+ *
360
+ * Each model is optimized for a specific sample rate, which determines the frequency
361
+ * range of the enhanced audio output. While you can process audio at any sample rate,
362
+ * understanding the model's native rate helps predict the enhancement quality.
363
+ *
364
+ * How sample rate affects enhancement:
365
+ * - Models trained at lower sample rates (e.g., 8 kHz) can only enhance frequencies
366
+ * up to their Nyquist limit (4 kHz for 8 kHz models)
367
+ * - When processing higher sample rate input (e.g., 48 kHz) with a lower-rate model,
368
+ * only the lower frequency components will be enhanced
369
+ *
370
+ * Recommendation: For maximum enhancement quality across the full frequency spectrum,
371
+ * match your input sample rate to the model's native rate when possible.
372
+ *
373
+ * @returns {number} The model's native sample rate in Hz.
374
+ *
375
+ * @example
376
+ * const optimalRate = model.getOptimalSampleRate();
377
+ * console.log(`Optimal sample rate: ${optimalRate} Hz`);
150
378
  */
151
- initialize(sampleRate, numChannels, numFrames, allowVariableFrames = false) {
152
- native.modelInitialize(
153
- this._model,
154
- sampleRate,
155
- numChannels,
156
- numFrames,
157
- allowVariableFrames,
158
- );
379
+ getOptimalSampleRate() {
380
+ return native.modelGetOptimalSampleRate(this._model);
159
381
  }
160
382
 
161
383
  /**
162
- * Get the output delay in samples
163
- * @returns {number} Delay in samples
384
+ * Retrieves the optimal number of frames for the model at a given sample rate.
385
+ *
386
+ * Using the optimal number of frames minimizes latency by avoiding internal buffering.
387
+ *
388
+ * When you use a different frame count than the optimal value, the model will
389
+ * introduce additional buffering latency on top of its base processing delay.
390
+ *
391
+ * The optimal frame count varies based on the sample rate. Each model operates on a
392
+ * fixed time window duration, so the required number of frames changes with sample rate.
393
+ * For example, a model designed for 10 ms processing windows requires 480 frames at
394
+ * 48 kHz, but only 160 frames at 16 kHz to capture the same duration of audio.
395
+ *
396
+ * Call this function with your intended sample rate before calling
397
+ * Processor.initialize() to determine the best frame count for minimal latency.
398
+ *
399
+ * @param {number} sampleRate - The sample rate in Hz for which to calculate the optimal frame count
400
+ * @returns {number} The optimal frame count for the given sample rate.
401
+ *
402
+ * @example
403
+ * const sampleRate = model.getOptimalSampleRate();
404
+ * const optimalFrames = model.getOptimalNumFrames(sampleRate);
405
+ * console.log(`Optimal frame count: ${optimalFrames}`);
164
406
  */
165
- outputDelay() {
166
- return native.modelOutputDelay(this._model);
407
+ getOptimalNumFrames(sampleRate) {
408
+ return native.modelGetOptimalNumFrames(this._model, sampleRate);
167
409
  }
410
+ }
168
411
 
412
+ /**
413
+ * High-level wrapper for the ai-coustics audio enhancement processor.
414
+ *
415
+ * This class provides a safe, JavaScript-friendly interface to the underlying native library.
416
+ * It handles memory management automatically.
417
+ *
418
+ * @example
419
+ * const model = Model.fromFile("/path/to/model.aicmodel");
420
+ * const processor = new Processor(model, licenseKey);
421
+ * const sampleRate = model.getOptimalSampleRate();
422
+ * const numFrames = model.getOptimalNumFrames(sampleRate);
423
+ * processor.initialize(sampleRate, 2, numFrames, false);
424
+ * const audio = new Float32Array(2 * numFrames);
425
+ * processor.processInterleaved(audio);
426
+ */
427
+ class Processor {
169
428
  /**
170
- * Reset the model's internal state
429
+ * Creates a new audio enhancement processor instance.
430
+ *
431
+ * Multiple processors can be created to process different audio streams simultaneously
432
+ * or to switch between different enhancement algorithms during runtime.
433
+ *
434
+ * @param {Model} model - The loaded model instance
435
+ * @param {string} licenseKey - License key for the ai-coustics SDK
436
+ * (generate your key at https://developers.ai-coustics.com/)
437
+ * @throws {Error} If processor creation fails.
438
+ *
439
+ * @example
440
+ * const model = Model.fromFile("/path/to/model.aicmodel");
441
+ * const processor = new Processor(model, licenseKey);
442
+ * processor.initialize(sampleRate, numChannels, numFrames, false);
171
443
  */
172
- reset() {
173
- native.modelReset(this._model);
444
+ constructor(model, licenseKey) {
445
+ this._processor = native.processorNew(model._model, licenseKey);
174
446
  }
175
447
 
176
448
  /**
177
- * Set an enhancement parameter
178
- * @param {number} parameter - Parameter constant from EnhancementParameter
179
- * @param {number} value - Parameter value
180
- * @throws {Error} If parameter setting fails (invalid parameter, out of range, etc)
449
+ * Configures the processor for specific audio settings.
450
+ *
451
+ * This function must be called before processing any audio.
452
+ * For the lowest delay use the sample rate and frame size returned by
453
+ * Model.getOptimalSampleRate() and Model.getOptimalNumFrames().
454
+ *
455
+ * Warning: Do not call from audio processing threads as this allocates memory.
456
+ *
457
+ * Note: All channels are mixed to mono for processing. To process channels
458
+ * independently, create separate Processor instances.
459
+ *
460
+ * @param {number} sampleRate - Sample rate in Hz (8000 - 192000)
461
+ * @param {number} numChannels - Number of audio channels
462
+ * @param {number} numFrames - Samples per channel provided to each processing call
463
+ * @param {boolean} [allowVariableFrames=false] - Allow variable frame sizes (adds latency)
464
+ * @throws {Error} If the audio configuration is unsupported.
465
+ *
466
+ * @example
467
+ * const sampleRate = model.getOptimalSampleRate();
468
+ * const numFrames = model.getOptimalNumFrames(sampleRate);
469
+ * processor.initialize(sampleRate, 2, numFrames, false);
181
470
  */
182
- setParameter(parameter, value) {
183
- native.modelSetParameter(this._model, parameter, value);
471
+ initialize(sampleRate, numChannels, numFrames, allowVariableFrames = false) {
472
+ native.processorInitialize(
473
+ this._processor,
474
+ sampleRate,
475
+ numChannels,
476
+ numFrames,
477
+ allowVariableFrames,
478
+ );
184
479
  }
185
480
 
186
481
  /**
187
- * Get an enhancement parameter value
188
- * @param {number} parameter - Parameter constant from EnhancementParameter
189
- * @returns {number}
482
+ * Processes interleaved audio (all channels mixed in one buffer).
483
+ *
484
+ * Enhances speech in the provided audio buffer. The buffer is modified in-place.
485
+ *
486
+ * @param {Float32Array} buffer - Interleaved audio buffer (channel samples alternating)
487
+ * @throws {Error} If processing fails (processor not initialized, invalid buffer size, etc.)
488
+ *
489
+ * @example
490
+ * // For stereo: [L0, R0, L1, R1, L2, R2, ...]
491
+ * const buffer = new Float32Array(numChannels * numFrames);
492
+ * processor.processInterleaved(buffer);
190
493
  */
191
- getParameter(parameter) {
192
- return native.modelGetParameter(this._model, parameter);
494
+ processInterleaved(buffer) {
495
+ native.processorProcessInterleaved(this._processor, buffer);
193
496
  }
194
497
 
195
498
  /**
196
- * Process interleaved audio (all channels mixed in one buffer)
197
- * @param {Float32Array} buffer - Interleaved audio buffer
198
- * @param {number} numChannels - Number of channels
199
- * @param {number} numFrames - Number of frames
200
- * @throws {Error} If processing fails (model not initialized, invalid buffer size, etc.)
499
+ * Processes sequential/channel-contiguous audio.
500
+ *
501
+ * Enhances speech in the provided audio buffer. The buffer is modified in-place.
502
+ * All samples for each channel are stored contiguously.
503
+ *
504
+ * @param {Float32Array} buffer - Sequential audio buffer (all channel 0 samples, then all channel 1 samples, etc.)
505
+ * @throws {Error} If processing fails (processor not initialized, invalid buffer size, etc.)
506
+ *
507
+ * @example
508
+ * // For stereo: [L0, L1, L2, ..., R0, R1, R2, ...]
509
+ * const buffer = new Float32Array(numChannels * numFrames);
510
+ * processor.processSequential(buffer);
201
511
  */
202
- processInterleaved(buffer, numChannels, numFrames) {
203
- native.modelProcessInterleaved(this._model, buffer, numChannels, numFrames);
512
+ processSequential(buffer) {
513
+ native.processorProcessSequential(this._processor, buffer);
204
514
  }
205
515
 
206
516
  /**
207
- * Process planar audio (separate buffer for each channel)
517
+ * Processes planar audio (separate buffer for each channel).
518
+ *
519
+ * Enhances speech in the provided audio buffers. The buffers are modified in-place.
520
+ *
208
521
  * @param {Float32Array[]} buffers - Array of audio buffers, one per channel (max 16 channels)
209
- * @throws {Error} If processing fails (model not initialized, too many channels, invalid buffer size, etc.)
522
+ * @throws {Error} If processing fails (processor not initialized, too many channels, invalid buffer size, etc.)
523
+ *
524
+ * @example
525
+ * const left = new Float32Array(numFrames);
526
+ * const right = new Float32Array(numFrames);
527
+ * processor.processPlanar([left, right]);
210
528
  */
211
529
  processPlanar(buffers) {
212
- native.modelProcessPlanar(this._model, buffers);
530
+ native.processorProcessPlanar(this._processor, buffers);
531
+ }
532
+
533
+ /**
534
+ * Creates a ProcessorContext instance.
535
+ *
536
+ * This can be used to control all parameters and other settings of the processor.
537
+ *
538
+ * @returns {ProcessorContext} A new ProcessorContext instance.
539
+ *
540
+ * @example
541
+ * const processorContext = processor.getProcessorContext();
542
+ * processorContext.setParameter(ProcessorParameter.EnhancementLevel, 0.8);
543
+ */
544
+ getProcessorContext() {
545
+ const nativeContext = native.processorGetProcessorContext(this._processor);
546
+ return new ProcessorContext(nativeContext);
213
547
  }
214
548
 
215
549
  /**
216
- * Create a Voice Activity Detector for this model
217
- * @returns {Vad}
550
+ * Creates a Voice Activity Detector Context instance.
551
+ *
552
+ * @returns {VadContext} A new VadContext instance.
553
+ *
554
+ * @example
555
+ * const vad = processor.getVadContext();
556
+ * if (vad.isSpeechDetected()) {
557
+ * console.log("Speech detected!");
558
+ * }
218
559
  */
219
- createVad() {
220
- const nativeVad = native.modelCreateVad(this._model);
221
- return new Vad(nativeVad);
560
+ getVadContext() {
561
+ const nativeContext = native.processorGetVadContext(this._processor);
562
+ return new VadContext(nativeContext);
222
563
  }
223
564
  }
224
565
 
225
566
  /**
226
- * Get the SDK version
227
- * @returns {string}
567
+ * Returns the version of the ai-coustics core SDK library used by this package.
568
+ *
569
+ * Note: This is not necessarily the same as this package's version.
570
+ *
571
+ * @returns {string} The library version as a string.
572
+ *
573
+ * @example
574
+ * const version = getVersion();
575
+ * console.log(`ai-coustics SDK version: ${version}`);
228
576
  */
229
577
  function getVersion() {
230
578
  return native.getVersion();
231
579
  }
232
580
 
581
+ /**
582
+ * Returns the model version number compatible with this SDK build.
583
+ *
584
+ * @returns {number} The compatible model version number.
585
+ */
586
+ function getCompatibleModelVersion() {
587
+ return native.getCompatibleModelVersion();
588
+ }
589
+
233
590
  module.exports = {
234
591
  Model,
235
- Vad,
236
- ModelType,
237
- EnhancementParameter,
592
+ Processor,
593
+ ProcessorContext,
594
+ VadContext,
595
+ ProcessorParameter,
238
596
  VadParameter,
239
597
  getVersion,
598
+ getCompatibleModelVersion,
240
599
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ai-coustics/aic-sdk",
3
- "version": "0.12.0",
3
+ "version": "0.13.0",
4
4
  "description": "Node.js package of ai-coustics SDK",
5
5
  "main": "index.js",
6
6
  "scripts": {
@@ -34,14 +34,15 @@
34
34
  "LICENSE"
35
35
  ],
36
36
  "optionalDependencies": {
37
- "@ai-coustics/aic-sdk-darwin-arm64": "0.12.0",
38
- "@ai-coustics/aic-sdk-darwin-x64": "0.12.0",
39
- "@ai-coustics/aic-sdk-linux-arm64-gnu": "0.12.0",
40
- "@ai-coustics/aic-sdk-linux-x64-gnu": "0.12.0",
41
- "@ai-coustics/aic-sdk-win32-arm64-msvc": "0.12.0",
42
- "@ai-coustics/aic-sdk-win32-x64-msvc": "0.12.0"
37
+ "@ai-coustics/aic-sdk-darwin-arm64": "0.13.0",
38
+ "@ai-coustics/aic-sdk-darwin-x64": "0.13.0",
39
+ "@ai-coustics/aic-sdk-linux-arm64-gnu": "0.13.0",
40
+ "@ai-coustics/aic-sdk-linux-x64-gnu": "0.13.0",
41
+ "@ai-coustics/aic-sdk-win32-arm64-msvc": "0.13.0",
42
+ "@ai-coustics/aic-sdk-win32-x64-msvc": "0.13.0"
43
43
  },
44
44
  "devDependencies": {
45
- "@neon-rs/cli": "0.1.82"
45
+ "@neon-rs/cli": "0.1.82",
46
+ "wavefile": "^11.0.0"
46
47
  }
47
48
  }