npm - dspx - Versions diffs - 1.0.1 → 1.1.2 - Mend

dspx 1.0.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/README.md +56 -16
package/dist/bindings.d.ts +272 -1
package/dist/bindings.d.ts.map +1 -1
package/dist/bindings.js +419 -1
package/dist/bindings.js.map +1 -1
package/dist/types.d.ts +235 -0
package/dist/types.d.ts.map +1 -1
package/package.json +4 -1
package/prebuilds/darwin-arm64/dspx.node +0 -0
package/prebuilds/win32-x64/dspx.node +0 -0
package/scripts/install.js +98 -0
package/scripts/postinstall-verify.js +32 -0
package/scripts/test-install.js +82 -0
package/scripts/test.js +24 -0
package/src/native/DspPipeline.cc +105 -0
package/src/native/adapters/MelSpectrogramStage.h +211 -0
package/src/native/adapters/MfccStage.h +220 -0
package/src/native/adapters/StftStage.h +466 -0
package/src/native/core/DctEngine.h +172 -0

package/src/native/adapters/MelSpectrogramStage.h ADDED Viewed

@@ -0,0 +1,211 @@
+/**
+ * Mel Spectrogram Pipeline Stage
+ *
+ * Converts power spectrum to Mel-scale representation using filterbank matrix multiplication.
+ * This is a STATELESS operation that applies the Mel filterbank to incoming power spectra.
+ *
+ * Features:
+ * - High-performance matrix multiplication using Eigen
+ * - Pre-computed Mel filterbank (passed from TypeScript)
+ * - Processes power spectrum bins → Mel frequency bins
+ * - Multi-channel support (each channel processed independently)
+ *
+ * Mathematical Operation:
+ *   mel_energies = filterbank × power_spectrum
+ *   where:
+ *     - power_spectrum is (numBins × 1) vector
+ *     - filterbank is (numMelBands × numBins) matrix
+ *     - mel_energies is (numMelBands × 1) vector
+ *
+ * Typical Pipeline:
+ *   STFT → Power → MelSpectrogram → Log → MFCC
+ *
+ * Parameters:
+ * - filterbankMatrix: Pre-computed Mel filterbank (TypeScript provides this)
+ * - numBins: Number of input frequency bins (from STFT/FFT)
+ * - numMelBands: Number of Mel frequency bands (output size)
+ */
+#pragma once
+#include "../IDspStage.h"
+#include <Eigen/Dense>
+#include <vector>
+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <cmath>
+namespace dsp::adapters
+{
+    class MelSpectrogramStage : public IDspStage
+    {
+    public:
+        /**
+         * @brief Constructs a Mel Spectrogram stage
+         * @param filterbank_matrix Pre-computed Mel filterbank (numMelBands × numBins), row-major
+         * @param num_bins Number of input frequency bins
+         * @param num_mel_bands Number of output Mel frequency bands
+         */
+        explicit MelSpectrogramStage(
+            const std::vector<float> &filterbank_matrix,
+            size_t num_bins,
+            size_t num_mel_bands)
+            : m_numBins(num_bins),
+              m_numMelBands(num_mel_bands),
+              m_filterbank(num_mel_bands, num_bins)
+        {
+            // Validate parameters
+            if (m_numBins == 0)
+            {
+                throw std::invalid_argument("MelSpectrogram: num_bins must be greater than 0");
+            }
+            if (m_numMelBands == 0)
+            {
+                throw std::invalid_argument("MelSpectrogram: num_mel_bands must be greater than 0");
+            }
+            if (filterbank_matrix.size() != num_mel_bands * num_bins)
+            {
+                throw std::invalid_argument(
+                    "MelSpectrogram: filterbank matrix size (" +
+                    std::to_string(filterbank_matrix.size()) +
+                    ") must equal numMelBands × numBins (" +
+                    std::to_string(num_mel_bands * num_bins) + ")");
+            }
+            // Copy filterbank matrix (input is row-major from TypeScript)
+            // Eigen uses column-major by default, so we need to specify row-major
+            m_filterbank = Eigen::Map<const Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>(
+                filterbank_matrix.data(), num_mel_bands, num_bins);
+        }
+        const char *getType() const override
+        {
+            return "melSpectrogram";
+        }
+        bool isResizing() const override
+        {
+            return true; // This stage changes output size
+        }
+        void process(float *buffer, size_t numSamples, int numChannels, const float *timestamps = nullptr) override
+        {
+            // This stage changes output size - processResizing() should be called instead
+            throw std::runtime_error("MelSpectrogram stage requires processResizing() to be called");
+        }
+        size_t calculateOutputSize(size_t inputSize) const override
+        {
+            // Input has numBins per frame, output has numMelBands per frame
+            // Example: 10 samples, 2 channels, numBins=5 → samplesPerChannel=5, numFrames=1
+            // Output: 1 frame × numMelBands × numChannels
+            // Since we don't know numChannels here, we need to handle it in processResizing
+            // For now, return based on the ratio: (numMelBands / numBins) * inputSize
+            return (inputSize / m_numBins) * m_numMelBands;
+        }
+        void processResizing(const float *inputBuffer, size_t inputSize,
+                             float *outputBuffer, size_t &outputSize,
+                             int numChannels, const float *timestamps = nullptr) override
+        {
+            // Calculate how many complete spectrum frames we have
+            // Each frame should be numBins samples per channel
+            size_t samplesPerChannel = inputSize / numChannels;
+            size_t numFrames = samplesPerChannel / m_numBins;
+            if (numFrames == 0)
+            {
+                // Not enough data for even one frame - output nothing
+                outputSize = 0;
+                return;
+            }
+            // Calculate output size
+            outputSize = numFrames * m_numMelBands * numChannels;
+            // Temporary buffers for Eigen operations
+            Eigen::VectorXf input(m_numBins);
+            Eigen::VectorXf output(m_numMelBands);
+            // Process each channel independently
+            for (int ch = 0; ch < numChannels; ++ch)
+            {
+                // Process each frame for this channel
+                for (size_t frame = 0; frame < numFrames; ++frame)
+                {
+                    // Extract input spectrum (de-interleaved)
+                    for (size_t i = 0; i < m_numBins; ++i)
+                    {
+                        size_t index = (frame * m_numBins + i) * numChannels + ch;
+                        input(i) = inputBuffer[index];
+                    }
+                    // Apply Mel filterbank: mel_energies = filterbank × power_spectrum
+                    output = m_filterbank * input;
+                    // Write output (re-interleaved)
+                    for (size_t i = 0; i < m_numMelBands; ++i)
+                    {
+                        size_t outIndex = (frame * m_numMelBands + i) * numChannels + ch;
+                        outputBuffer[outIndex] = output(i);
+                    }
+                }
+            }
+        }
+        Napi::Object serializeState(Napi::Env env) const override
+        {
+            Napi::Object state = Napi::Object::New(env);
+            state.Set("numBins", Napi::Number::New(env, m_numBins));
+            state.Set("numMelBands", Napi::Number::New(env, m_numMelBands));
+            // Serialize filterbank matrix (row-major)
+            Napi::Array filterbankArray = Napi::Array::New(env, m_numMelBands * m_numBins);
+            for (size_t i = 0; i < m_numMelBands; ++i)
+            {
+                for (size_t j = 0; j < m_numBins; ++j)
+                {
+                    filterbankArray.Set(i * m_numBins + j, Napi::Number::New(env, m_filterbank(i, j)));
+                }
+            }
+            state.Set("filterbank", filterbankArray);
+            return state;
+        }
+        void deserializeState(const Napi::Object &state) override
+        {
+            size_t numBins = state.Get("numBins").As<Napi::Number>().Uint32Value();
+            size_t numMelBands = state.Get("numMelBands").As<Napi::Number>().Uint32Value();
+            if (numBins != m_numBins || numMelBands != m_numMelBands)
+            {
+                throw std::runtime_error("MelSpectrogram: Dimension mismatch during deserialization");
+            }
+            // Restore filterbank matrix
+            Napi::Array filterbankArray = state.Get("filterbank").As<Napi::Array>();
+            for (size_t i = 0; i < m_numMelBands; ++i)
+            {
+                for (size_t j = 0; j < m_numBins; ++j)
+                {
+                    m_filterbank(i, j) = filterbankArray.Get(i * m_numBins + j).As<Napi::Number>().FloatValue();
+                }
+            }
+        }
+        void reset() override
+        {
+            // Stateless - no reset needed
+        }
+    private:
+        size_t m_numBins;             // Number of input frequency bins
+        size_t m_numMelBands;         // Number of output Mel bands
+        Eigen::MatrixXf m_filterbank; // Mel filterbank matrix (numMelBands × numBins)
+    };
+} // namespace dsp::adapters

package/src/native/adapters/MfccStage.h ADDED Viewed

@@ -0,0 +1,220 @@
+/**
+ * MFCC (Mel-Frequency Cepstral Coefficients) Pipeline Stage
+ *
+ * Applies Discrete Cosine Transform (DCT) to log Mel-scale energies to produce MFCCs.
+ * This is a STATELESS operation that leverages the DCT engine.
+ *
+ * Features:
+ * - High-performance DCT using pre-computed cosine tables
+ * - Optional log-energy normalization
+ * - Coefficient selection (keep first N coefficients)
+ * - Multi-channel support
+ *
+ * Mathematical Operation:
+ *   1. Input: log(mel_energies) from Mel spectrogram
+ *   2. Apply DCT-II: mfcc[k] = DCT(log_mel_energies)
+ *   3. Keep first numCoefficients (typically 13-20)
+ *
+ * Typical Pipeline:
+ *   STFT → Power → MelSpectrogram → Log → MFCC
+ *
+ * Parameters:
+ * - numMelBands: Number of input Mel bands (from MelSpectrogram)
+ * - numCoefficients: Number of MFCC coefficients to output (default: 13)
+ * - useLogEnergy: Apply log to input before DCT (default: true)
+ * - lifterCoefficient: Optional cepstral liftering (default: 0 = disabled)
+ */
+#pragma once
+#include "../IDspStage.h"
+#include "../core/DctEngine.h"
+#include <vector>
+#include <memory>
+#include <stdexcept>
+#include <string>
+#include <cmath>
+#include <algorithm>
+namespace dsp::adapters
+{
+    class MfccStage : public IDspStage
+    {
+    public:
+        /**
+         * @brief Constructs an MFCC stage
+         * @param num_mel_bands Number of input Mel frequency bands
+         * @param num_coefficients Number of MFCC coefficients to output (default: 13)
+         * @param use_log_energy Apply log to input energies before DCT (default: true)
+         * @param lifter_coefficient Cepstral liftering parameter (0 = disabled)
+         */
+        explicit MfccStage(
+            size_t num_mel_bands,
+            size_t num_coefficients = 13,
+            bool use_log_energy = true,
+            float lifter_coefficient = 0.0f)
+            : m_numMelBands(num_mel_bands),
+              m_numCoefficients(num_coefficients),
+              m_useLogEnergy(use_log_energy),
+              m_lifterCoefficient(lifter_coefficient)
+        {
+            // Validate parameters
+            if (m_numMelBands == 0)
+            {
+                throw std::invalid_argument("MFCC: num_mel_bands must be greater than 0");
+            }
+            if (m_numCoefficients == 0 || m_numCoefficients > m_numMelBands)
+            {
+                throw std::invalid_argument(
+                    "MFCC: num_coefficients must be in range [1, num_mel_bands]");
+            }
+            // Create DCT engine (size = numMelBands)
+            m_dctEngine = std::make_unique<dsp::core::DctEngine<float>>(m_numMelBands);
+            // Allocate working buffers
+            m_logEnergies.resize(m_numMelBands);
+            m_dctOutput.resize(m_numMelBands);
+            // Pre-compute lifter weights if liftering is enabled
+            if (m_lifterCoefficient > 0)
+            {
+                m_lifterWeights.resize(m_numCoefficients);
+                for (size_t i = 0; i < m_numCoefficients; ++i)
+                {
+                    m_lifterWeights[i] = 1.0f + (m_lifterCoefficient / 2.0f) *
+                                                    std::sin(M_PI * static_cast<float>(i) / m_lifterCoefficient);
+                }
+            }
+        }
+        const char *getType() const override
+        {
+            return "mfcc";
+        }
+        bool isResizing() const override
+        {
+            return true; // This stage changes output size
+        }
+        void process(float *buffer, size_t numSamples, int numChannels, const float *timestamps = nullptr) override
+        {
+            // This stage changes output size - processResizing() should be called instead
+            throw std::runtime_error("MFCC stage requires processResizing() to be called");
+        }
+        size_t calculateOutputSize(size_t inputSize) const override
+        {
+            // Input has numMelBands per frame, output has numCoefficients per frame
+            // Calculate output size based on the ratio: (numCoefficients / numMelBands) * inputSize
+            return (inputSize / m_numMelBands) * m_numCoefficients;
+        }
+        void processResizing(const float *inputBuffer, size_t inputSize,
+                             float *outputBuffer, size_t &outputSize,
+                             int numChannels, const float *timestamps = nullptr) override
+        {
+            // Calculate how many complete Mel spectrum frames we have
+            size_t samplesPerChannel = inputSize / numChannels;
+            size_t numFrames = samplesPerChannel / m_numMelBands;
+            if (numFrames == 0)
+            {
+                // Not enough data for even one frame - output nothing
+                outputSize = 0;
+                return;
+            }
+            // Calculate output size
+            outputSize = numFrames * m_numCoefficients * numChannels;
+            // Process each channel independently
+            for (int ch = 0; ch < numChannels; ++ch)
+            {
+                // Process each frame for this channel
+                for (size_t frame = 0; frame < numFrames; ++frame)
+                {
+                    // Extract Mel energies for this frame (de-interleaved)
+                    for (size_t i = 0; i < m_numMelBands; ++i)
+                    {
+                        size_t index = (frame * m_numMelBands + i) * numChannels + ch;
+                        float energy = inputBuffer[index];
+                        // Apply log if requested (add small epsilon to avoid log(0))
+                        if (m_useLogEnergy)
+                        {
+                            const float epsilon = 1e-10f;
+                            m_logEnergies[i] = std::log(energy + epsilon);
+                        }
+                        else
+                        {
+                            m_logEnergies[i] = energy;
+                        }
+                    }
+                    // Apply DCT to get MFCCs
+                    m_dctEngine->dct(m_logEnergies.data(), m_dctOutput.data());
+                    // Extract first numCoefficients and apply liftering if enabled
+                    for (size_t i = 0; i < m_numCoefficients; ++i)
+                    {
+                        float coeff = m_dctOutput[i];
+                        // Apply cepstral liftering
+                        if (m_lifterCoefficient > 0)
+                        {
+                            coeff *= m_lifterWeights[i];
+                        }
+                        // Write output (re-interleaved)
+                        size_t outIndex = (frame * m_numCoefficients + i) * numChannels + ch;
+                        outputBuffer[outIndex] = coeff;
+                    }
+                }
+            }
+        }
+        Napi::Object serializeState(Napi::Env env) const override
+        {
+            Napi::Object state = Napi::Object::New(env);
+            state.Set("numMelBands", Napi::Number::New(env, m_numMelBands));
+            state.Set("numCoefficients", Napi::Number::New(env, m_numCoefficients));
+            state.Set("useLogEnergy", Napi::Boolean::New(env, m_useLogEnergy));
+            state.Set("lifterCoefficient", Napi::Number::New(env, m_lifterCoefficient));
+            return state;
+        }
+        void deserializeState(const Napi::Object &state) override
+        {
+            size_t numMelBands = state.Get("numMelBands").As<Napi::Number>().Uint32Value();
+            size_t numCoefficients = state.Get("numCoefficients").As<Napi::Number>().Uint32Value();
+            if (numMelBands != m_numMelBands || numCoefficients != m_numCoefficients)
+            {
+                throw std::runtime_error("MFCC: Dimension mismatch during deserialization");
+            }
+        }
+        void reset() override
+        {
+            // Stateless - no reset needed
+        }
+    private:
+        size_t m_numMelBands;      // Number of input Mel bands
+        size_t m_numCoefficients;  // Number of MFCC coefficients to output
+        bool m_useLogEnergy;       // Apply log to input energies
+        float m_lifterCoefficient; // Cepstral liftering parameter (0 = disabled)
+        // DCT engine
+        std::unique_ptr<dsp::core::DctEngine<float>> m_dctEngine;
+        // Working buffers
+        std::vector<float> m_logEnergies;   // Log Mel energies (input to DCT)
+        std::vector<float> m_dctOutput;     // Full DCT output (before truncation)
+        std::vector<float> m_lifterWeights; // Pre-computed lifter weights
+    };
+} // namespace dsp::adapters