npm - speechflow - Versions diffs - 1.6.5 → 1.6.6 - Mend

speechflow 1.6.5 → 1.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (149) hide show

package/speechflow-cli/src/speechflow-node-a2a-pitch.ts ADDED Viewed

@@ -0,0 +1,221 @@
+/*
+**  SpeechFlow - Speech Processing Flow Graph
+**  Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
+**  Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
+*/
+/*  standard dependencies  */
+import path   from "node:path"
+import Stream from "node:stream"
+/*  external dependencies  */
+import { AudioWorkletNode } from "node-web-audio-api"
+/*  internal dependencies  */
+import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
+import * as util                           from "./speechflow-util"
+/*  parameter configuration  */
+type AudioPitchShifterConfig = {
+    rate?:      number
+    tempo?:     number
+    pitch?:     number
+    semitones?: number
+}
+/*  audio pitch shifter class using SoundTouch WebAudio worklet  */
+class AudioPitchShifter extends util.WebAudio {
+    /*  internal state  */
+    private pitchNode: AudioWorkletNode | null = null
+    private config: Required<AudioPitchShifterConfig>
+    /*  construct object  */
+    constructor (
+        sampleRate: number,
+        channels:   number,
+        config:     AudioPitchShifterConfig = {}
+    ) {
+        super(sampleRate, channels)
+        this.config = {
+            rate:      config.rate      ?? 1.0,
+            tempo:     config.tempo     ?? 1.0,
+            pitch:     config.pitch     ?? 1.0,
+            semitones: config.semitones ?? 0.0
+        }
+    }
+    /*  setup object  */
+    public async setup (): Promise<void> {
+        await super.setup()
+        /*  add SoundTouch worklet module  */
+        const packagePath = path.join(__dirname, "../node_modules/@soundtouchjs/audio-worklet")
+        const workletPath = path.join(packagePath, "dist/soundtouch-worklet.js")
+        await this.audioContext.audioWorklet.addModule(workletPath)
+        /*  create SoundTouch worklet node  */
+        this.pitchNode = new AudioWorkletNode(this.audioContext, "soundtouch-processor", {
+            numberOfInputs:  1,
+            numberOfOutputs: 1,
+            outputChannelCount: [ this.channels ]
+        })
+        /*  set initial parameter values  */
+        const params = this.pitchNode.parameters as Map<string, AudioParam>
+        params.get("rate")!.value           = this.config.rate
+        params.get("tempo")!.value          = this.config.tempo
+        params.get("pitch")!.value          = this.config.pitch
+        params.get("pitchSemitones")!.value = this.config.semitones
+        /*  connect nodes: source -> pitch -> capture  */
+        this.sourceNode!.connect(this.pitchNode)
+        this.pitchNode.connect(this.captureNode!)
+    }
+    /*  update an audio parameter value  */
+    private updateParameter (
+        paramName:   string,
+        value:       number,
+        configField: keyof Required<AudioPitchShifterConfig>
+    ): void {
+        const params = this.pitchNode?.parameters as Map<string, AudioParam>
+        params?.get(paramName)?.setValueAtTime(value, this.audioContext.currentTime)
+        this.config[configField] = value
+    }
+    /*  update rate value  */
+    public setRate (rate: number): void {
+        this.updateParameter("rate", rate, "rate")
+    }
+    /*  update tempo value  */
+    public setTempo (tempo: number): void {
+        this.updateParameter("tempo", tempo, "tempo")
+    }
+    /*  update pitch shift value  */
+    public setPitch (pitch: number): void {
+        this.updateParameter("pitch", pitch, "pitch")
+    }
+    /*  update pitch semitones setting  */
+    public setSemitones (semitones: number): void {
+        this.updateParameter("pitchSemitones", semitones, "semitones")
+    }
+    /*  destroy the pitch shifter  */
+    public async destroy (): Promise<void> {
+        /*  disconnect pitch node  */
+        if (this.pitchNode !== null) {
+            this.pitchNode.disconnect()
+            this.pitchNode = null
+        }
+        /*  destroy parent  */
+        await super.destroy()
+    }
+}
+/*  SpeechFlow node for pitch adjustment using SoundTouch WebAudio  */
+export default class SpeechFlowNodeA2APitch extends SpeechFlowNode {
+    /*  declare official node name  */
+    public static name = "a2a-pitch"
+    /*  internal state  */
+    private closing = false
+    private pitchShifter: AudioPitchShifter | null = null
+    /*  construct node  */
+    constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
+        super(id, cfg, opts, args)
+        /*  declare node configuration parameters  */
+        this.configure({
+            rate:      { type: "number",  val: 1.0,  match: (n: number) => n >= 0.25 && n <= 4.0 },
+            tempo:     { type: "number",  val: 1.0,  match: (n: number) => n >= 0.25 && n <= 4.0 },
+            pitch:     { type: "number",  val: 1.0,  match: (n: number) => n >= 0.25 && n <= 4.0 },
+            semitones: { type: "number",  val: 0.0,  match: (n: number) => n >= -24  && n <= 24  }
+        })
+        /*  declare node input/output format  */
+        this.input  = "audio"
+        this.output = "audio"
+    }
+    /*  open node  */
+    async open () {
+        /*  clear destruction flag  */
+        this.closing = false
+        /*  setup pitch shifter  */
+        this.pitchShifter = new AudioPitchShifter(
+            this.config.audioSampleRate,
+            this.config.audioChannels, {
+                rate:      this.params.rate,
+                tempo:     this.params.tempo,
+                pitch:     this.params.pitch,
+                semitones: this.params.semitones
+            }
+        )
+        await this.pitchShifter.setup()
+        /*  establish a transform stream  */
+        const self = this
+        this.stream = new Stream.Transform({
+            readableObjectMode: true,
+            writableObjectMode: true,
+            decodeStrings:      false,
+            transform (chunk: SpeechFlowChunk & { payload: Buffer }, encoding, callback) {
+                if (self.closing) {
+                    callback(new Error("stream already destroyed"))
+                    return
+                }
+                if (!Buffer.isBuffer(chunk.payload))
+                    callback(new Error("invalid chunk payload type"))
+                else {
+                    /*  shift pitch of audio chunk  */
+                    const payload = util.convertBufToI16(chunk.payload, self.config.audioLittleEndian)
+                    self.pitchShifter?.process(payload).then((result) => {
+                        if (self.closing)
+                            throw new Error("stream already destroyed")
+                        /*  take over pitch-shifted data  */
+                        const payload = util.convertI16ToBuf(result, self.config.audioLittleEndian)
+                        chunk.payload = payload
+                        this.push(chunk)
+                        callback()
+                    }).catch((error: unknown) => {
+                        if (!self.closing)
+                            callback(util.ensureError(error, "pitch shifting failed"))
+                    })
+                }
+            },
+            final (callback) {
+                if (self.closing) {
+                    callback()
+                    return
+                }
+                this.push(null)
+                callback()
+            }
+        })
+    }
+    /*  close node  */
+    async close () {
+        /*  indicate closing  */
+        this.closing = true
+        /*  destroy pitch shifter  */
+        if (this.pitchShifter !== null) {
+            await this.pitchShifter.destroy()
+            this.pitchShifter = null
+        }
+        /*  shutdown stream  */
+        if (this.stream !== null) {
+            await util.destroyStream(this.stream)
+            this.stream = null
+        }
+    }
+}

package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts CHANGED Viewed

@@ -19,7 +19,7 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
     public static name = "a2a-rnnoise"
     /*  internal state  */
-    private destroyed = false
+    private closing = false
     private sampleSize = 480 /* = 10ms at 48KHz, as required by RNNoise! */
     private worker: Worker | null = null
@@ -38,7 +38,7 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
     /*  open node  */
     async open () {
         /*  clear destruction flag  */
-        this.destroyed = false
+        this.closing = false
         /*  initialize worker  */
         this.worker = new Worker(resolve(__dirname, "speechflow-node-a2a-rnnoise-wt.js"))
@@ -89,7 +89,7 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
         /*  send message to worker  */
         let seq = 0
         const workerProcessSegment = async (segment: Int16Array<ArrayBuffer>) => {
-            if (this.destroyed)
+            if (this.closing)
                 return segment
             const id = `${seq++}`
             return new Promise<Int16Array<ArrayBuffer>>((resolve) => {
@@ -105,7 +105,7 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
             writableObjectMode: true,
             decodeStrings:      false,
             transform (chunk: SpeechFlowChunk & { payload: Buffer }, encoding, callback) {
-                if (self.destroyed) {
+                if (self.closing) {
                     callback(new Error("stream already destroyed"))
                     return
                 }
@@ -128,14 +128,15 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
                         /*  forward updated chunk  */
                         this.push(chunk)
                         callback()
-                    }).catch((err: Error) => {
-                        self.log("warning", `processing of chunk failed: ${err}`)
-                        callback(err)
+                    }).catch((err: unknown) => {
+                        const error = util.ensureError(err)
+                        self.log("warning", `processing of chunk failed: ${error.message}`)
+                        callback(error)
                     })
                 }
             },
             final (callback) {
-                if (self.destroyed) {
+                if (self.closing) {
                     callback()
                     return
                 }
@@ -147,8 +148,8 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
     /*  close node  */
     async close () {
-        /*  indicate destruction  */
-        this.destroyed = true
+        /*  indicate closing  */
+        this.closing = true
         /*  shutdown worker  */
         if (this.worker !== null) {
@@ -156,9 +157,9 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
             this.worker = null
         }
-        /*  close stream  */
+        /*  shutdown stream  */
         if (this.stream !== null) {
-            this.stream.destroy()
+            await util.destroyStream(this.stream)
             this.stream = null
         }
     }

package/speechflow-cli/src/speechflow-node-a2a-speex.ts CHANGED Viewed

@@ -22,7 +22,7 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
     public static name = "a2a-speex"
     /*  internal state  */
-    private destroyed = false
+    private closing = false
     private sampleSize = 480 /* = 10ms at 48KHz */
     private speexProcessor: SpeexPreprocessor | null = null
@@ -43,7 +43,7 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
     /*  open node  */
     async open () {
         /*  clear destruction flag  */
-        this.destroyed = false
+        this.closing = false
         /*  validate sample rate compatibility  */
         if (this.config.audioSampleRate !== 48000)
@@ -71,7 +71,7 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
             writableObjectMode: true,
             decodeStrings:      false,
             transform (chunk: SpeechFlowChunk & { payload: Buffer }, encoding, callback) {
-                if (self.destroyed) {
+                if (self.closing) {
                     callback(new Error("stream already destroyed"))
                     return
                 }
@@ -83,12 +83,12 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
                     /*  process Int16Array in necessary fixed-size segments  */
                     util.processInt16ArrayInSegments(payload, self.sampleSize, (segment) => {
-                        if (self.destroyed)
+                        if (self.closing)
                             throw new Error("stream already destroyed")
                         self.speexProcessor?.processInt16(segment)
                         return Promise.resolve(segment)
                     }).then((payload: Int16Array<ArrayBuffer>) => {
-                        if (self.destroyed)
+                        if (self.closing)
                             throw new Error("stream already destroyed")
                         /*  convert Int16Array back into Buffer  */
@@ -100,14 +100,15 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
                         /*  forward updated chunk  */
                         this.push(chunk)
                         callback()
-                    }).catch((err: Error) => {
-                        self.log("warning", `processing of chunk failed: ${err}`)
-                        callback(err)
+                    }).catch((err: unknown) => {
+                        const error = util.ensureError(err)
+                        self.log("warning", `processing of chunk failed: ${error.message}`)
+                        callback(error)
                     })
                 }
             },
             final (callback) {
-                if (self.destroyed) {
+                if (self.closing) {
                     callback()
                     return
                 }
@@ -119,8 +120,8 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
     /*  close node  */
     async close () {
-        /*  indicate destruction  */
-        this.destroyed = true
+        /*  indicate closing  */
+        this.closing = true
         /*  destroy processor  */
         if (this.speexProcessor !== null) {
@@ -128,9 +129,9 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
             this.speexProcessor = null
         }
-        /*  close stream  */
+        /*  shutdown stream  */
         if (this.stream !== null) {
-            this.stream.destroy()
+            await util.destroyStream(this.stream)
             this.stream = null
         }
     }

package/speechflow-cli/src/speechflow-node-a2a-vad.ts CHANGED Viewed

@@ -40,7 +40,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
     private queueRecv = this.queue.pointerUse("recv")
     private queueVAD  = this.queue.pointerUse("vad")
     private queueSend = this.queue.pointerUse("send")
-    private destroyed = false
+    private closing = false
     private tailTimer: ReturnType<typeof setTimeout> | null = null
     private activeEventListeners = new Set<() => void>()
@@ -71,7 +71,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
             throw new Error("VAD node currently supports PCM-S16LE audio only")
         /*  clear destruction flag  */
-        this.destroyed = false
+        this.closing = false
         /*  internal processing constants  */
         const vadSampleRateTarget = 16000 /* internal target of VAD */
@@ -98,7 +98,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
                 redemptionFrames:        this.params.redemptionFrames,
                 preSpeechPadFrames:      this.params.preSpeechPadFrames,
                 onSpeechStart: () => {
-                    if (this.destroyed)
+                    if (this.closing)
                         return
                     this.log("info", "VAD: speech start")
                     if (this.params.mode === "unplugged") {
@@ -107,7 +107,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
                     }
                 },
                 onSpeechEnd: (audio) => {
-                    if (this.destroyed)
+                    if (this.closing)
                         return
                     const duration = util.audioArrayDuration(audio, vadSampleRateTarget)
                     this.log("info", `VAD: speech end (duration: ${duration.toFixed(2)}s)`)
@@ -115,7 +115,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
                         tail = true
                         clearTailTimer()
                         this.tailTimer = setTimeout(() => {
-                            if (this.destroyed || this.tailTimer === null)
+                            if (this.closing || this.tailTimer === null)
                                 return
                             tail = false
                             this.tailTimer = null
@@ -123,14 +123,14 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
                     }
                 },
                 onVADMisfire: () => {
-                    if (this.destroyed)
+                    if (this.closing)
                         return
                     this.log("info", "VAD: speech end (segment too short)")
                     if (this.params.mode === "unplugged") {
                         tail = true
                         clearTailTimer()
                         this.tailTimer = setTimeout(() => {
-                            if (this.destroyed || this.tailTimer === null)
+                            if (this.closing || this.tailTimer === null)
                                 return
                             tail = false
                             this.tailTimer = null
@@ -138,7 +138,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
                     }
                 },
                 onFrameProcessed: (audio) => {
-                    if (this.destroyed)
+                    if (this.closing)
                         return
                     try {
                         /*  annotate the current audio segment  */
@@ -178,7 +178,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
             /*  receive audio chunk (writable side of stream)  */
             write (chunk: SpeechFlowChunk, encoding, callback) {
-                if (self.destroyed) {
+                if (self.closing) {
                     callback(new Error("stream already destroyed"))
                     return
                 }
@@ -217,7 +217,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
                         })
                         /*  push segments through Voice Activity Detection (VAD)  */
-                        if (self.vad && !self.destroyed) {
+                        if (self.vad && !self.closing) {
                             try {
                                 for (const segment of segmentData)
                                     self.vad.processAudio(segment.data)
@@ -230,14 +230,14 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
                         callback()
                     }
                     catch (error) {
-                        callback(error instanceof Error ? error : new Error("VAD processing failed"))
+                        callback(util.ensureError(error, "VAD processing failed"))
                     }
                 }
             },
             /*  receive no more audio chunks (writable side of stream)  */
             final (callback) {
-                if (self.destroyed) {
+                if (self.closing) {
                     callback()
                     return
                 }
@@ -249,14 +249,14 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
             /*  send audio chunk(s) (readable side of stream)  */
             read (_size) {
-                if (self.destroyed) {
+                if (self.closing) {
                     this.push(null)
                     return
                 }
                 /*  try to perform read operation from scratch  */
                 const tryToRead = () => {
-                    if (self.destroyed) {
+                    if (self.closing) {
                         this.push(null)
                         return
                     }
@@ -265,7 +265,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
                     const flushPendingChunks = () => {
                         let pushed = 0
                         while (true) {
-                            if (self.destroyed) {
+                            if (self.closing) {
                                 this.push(null)
                                 return
                             }
@@ -297,7 +297,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
                                     mode we else would be never called again until
                                     we at least once push a new chunk as the result  */
                                 setTimeout(() => {
-                                    if (self.destroyed)
+                                    if (self.closing || self.queue === null)
                                         return
                                     tryToRead()
                                 }, 0)
@@ -308,14 +308,15 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
                     /*  await forthcoming audio chunks  */
                     const awaitForthcomingChunks = () => {
-                        if (self.destroyed)
+                        self.activeEventListeners.delete(awaitForthcomingChunks)
+                        if (self.closing)
                             return
                         const element = self.queueSend.peek()
                         if (element !== undefined
                             && element.type === "audio-frame"
                             && element.isSpeech !== undefined)
                             flushPendingChunks()
-                        else if (!self.destroyed && !self.activeEventListeners.has(awaitForthcomingChunks)) {
+                        else if (!self.closing && !self.activeEventListeners.has(awaitForthcomingChunks)) {
                             self.queue.once("write", awaitForthcomingChunks)
                             self.activeEventListeners.add(awaitForthcomingChunks)
                         }
@@ -328,7 +329,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
                         && element.type === "audio-frame"
                         && element.isSpeech !== undefined)
                         flushPendingChunks()
-                    else if (!self.destroyed && !self.activeEventListeners.has(awaitForthcomingChunks)) {
+                    else if (!self.closing && !self.activeEventListeners.has(awaitForthcomingChunks)) {
                         self.queue.once("write", awaitForthcomingChunks)
                         self.activeEventListeners.add(awaitForthcomingChunks)
                     }
@@ -340,8 +341,8 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
     /*  close node  */
     async close () {
-        /*  indicate destruction  */
-        this.destroyed = true
+        /*  indicate closing  */
+        this.closing = true
         /*  cleanup tail timer  */
         if (this.tailTimer !== null) {
@@ -355,9 +356,9 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
         })
         this.activeEventListeners.clear()
-        /*  close stream  */
+        /*  shutdown stream  */
         if (this.stream !== null) {
-            this.stream.destroy()
+            await util.destroyStream(this.stream)
             this.stream = null
         }

package/speechflow-cli/src/speechflow-node-a2a-wav.ts CHANGED Viewed

@@ -9,6 +9,7 @@ import Stream           from "node:stream"
 /*  internal dependencies  */
 import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
+import * as util                           from "./speechflow-util"
 /*  write WAV header  */
 const writeWavHeader = (
@@ -190,13 +191,7 @@ export default class SpeechFlowNodeA2AWAV extends SpeechFlowNode {
     async close () {
         /*  shutdown stream  */
         if (this.stream !== null) {
-            await new Promise<void>((resolve) => {
-                if (this.stream instanceof Stream.Duplex)
-                    this.stream.end(() => { resolve() })
-                else
-                    resolve()
-            })
-            this.stream.destroy()
+            await util.destroyStream(this.stream)
             this.stream = null
         }
     }