npm - speechflow - Versions diffs - 1.1.0 → 1.2.0 - Mend

speechflow 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

package/CHANGELOG.md +11 -0
package/README.md +37 -3
package/dst/speechflow-node-a2a-gender.d.ts +17 -0
package/dst/speechflow-node-a2a-gender.js +272 -0
package/dst/speechflow-node-a2a-gender.js.map +1 -0
package/dst/speechflow-node-a2a-meter.js +2 -2
package/dst/speechflow-node-a2a-meter.js.map +1 -1
package/dst/speechflow-node-a2a-mute.js +1 -0
package/dst/speechflow-node-a2a-mute.js.map +1 -1
package/dst/speechflow-node-a2a-vad.js +47 -63
package/dst/speechflow-node-a2a-vad.js.map +1 -1
package/dst/speechflow-node-a2a-wav.js +145 -122
package/dst/speechflow-node-a2a-wav.js.map +1 -1
package/dst/speechflow-node-a2t-deepgram.js +13 -3
package/dst/speechflow-node-a2t-deepgram.js.map +1 -1
package/dst/speechflow-node-t2a-elevenlabs.js +10 -5
package/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
package/dst/speechflow-node-t2a-kokoro.js.map +1 -1
package/dst/speechflow-node-t2t-deepl.js.map +1 -1
package/dst/speechflow-node-t2t-format.js.map +1 -1
package/dst/speechflow-node-t2t-ollama.js.map +1 -1
package/dst/speechflow-node-t2t-openai.js.map +1 -1
package/dst/speechflow-node-t2t-subtitle.js.map +1 -1
package/dst/speechflow-node-t2t-transformers.js.map +1 -1
package/dst/speechflow-node-x2x-filter.d.ts +11 -0
package/dst/speechflow-node-x2x-filter.js +113 -0
package/dst/speechflow-node-x2x-filter.js.map +1 -0
package/dst/speechflow-node-x2x-trace.js +24 -10
package/dst/speechflow-node-x2x-trace.js.map +1 -1
package/dst/speechflow-node-xio-device.js +14 -5
package/dst/speechflow-node-xio-device.js.map +1 -1
package/dst/speechflow-node-xio-file.js +58 -27
package/dst/speechflow-node-xio-file.js.map +1 -1
package/dst/speechflow-node-xio-mqtt.js.map +1 -1
package/dst/speechflow-node-xio-websocket.js.map +1 -1
package/dst/speechflow-node.js +1 -0
package/dst/speechflow-node.js.map +1 -1
package/dst/speechflow-utils.d.ts +14 -1
package/dst/speechflow-utils.js +110 -2
package/dst/speechflow-utils.js.map +1 -1
package/dst/speechflow.js +23 -4
package/dst/speechflow.js.map +1 -1
package/etc/speechflow.yaml +51 -24
package/package.json +6 -5
package/src/speechflow-node-a2a-gender.ts +272 -0
package/src/speechflow-node-a2a-meter.ts +3 -3
package/src/speechflow-node-a2a-mute.ts +1 -0
package/src/speechflow-node-a2a-vad.ts +58 -68
package/src/speechflow-node-a2a-wav.ts +128 -91
package/src/speechflow-node-a2t-deepgram.ts +15 -4
package/src/speechflow-node-t2a-elevenlabs.ts +13 -8
package/src/speechflow-node-t2a-kokoro.ts +3 -3
package/src/speechflow-node-t2t-deepl.ts +2 -2
package/src/speechflow-node-t2t-format.ts +2 -2
package/src/speechflow-node-t2t-ollama.ts +2 -2
package/src/speechflow-node-t2t-openai.ts +2 -2
package/src/speechflow-node-t2t-subtitle.ts +1 -1
package/src/speechflow-node-t2t-transformers.ts +2 -2
package/src/speechflow-node-x2x-filter.ts +122 -0
package/src/speechflow-node-x2x-trace.ts +28 -11
package/src/speechflow-node-xio-device.ts +20 -8
package/src/speechflow-node-xio-file.ts +74 -36
package/src/speechflow-node-xio-mqtt.ts +3 -3
package/src/speechflow-node-xio-websocket.ts +1 -1
package/src/speechflow-node.ts +2 -0
package/src/speechflow-utils.ts +81 -2
package/src/speechflow.ts +46 -27

package/src/speechflow-node-a2a-vad.ts CHANGED Viewed

@@ -9,19 +9,24 @@ import Stream             from "node:stream"
 /*  external dependencies  */
 import { RealTimeVAD }    from "@ericedouard/vad-node-realtime"
-import { Duration }       from "luxon"
 /*  internal dependencies  */
 import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
 import * as utils                          from "./speechflow-utils"
 /*  audio stream queue element */
+type AudioQueueElementSegment = {
+    data:        Float32Array,
+    isSpeech?:   boolean
+}
 type AudioQueueElement = {
-    type:      "audio-frame",
-    chunk:     SpeechFlowChunk,
-    isSpeech?: boolean
+    type:       "audio-frame",
+    chunk:       SpeechFlowChunk,
+    segmentIdx:  number,
+    segmentData: AudioQueueElementSegment[],
+    isSpeech?:   boolean
 } | {
-    type:      "audio-eof"
+    type:        "audio-eof"
 }
 /*  SpeechFlow node for VAD speech-to-speech processing  */
@@ -89,10 +94,22 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
                 log("info", "VAD: speech end (segment too short)")
             },
             onFrameProcessed: (audio) => {
-                /*  annotate the current audio frame  */
+                /*  annotate the current audio segment  */
                 const element = this.queueVAD.peek()
-                if (element !== undefined && element.type === "audio-frame") {
-                    const isSpeech = audio.isSpeech > audio.notSpeech
+                if (element === undefined || element.type !== "audio-frame")
+                    throw new Error("internal error which cannot happen: no more queued element")
+                const segment = element.segmentData[element.segmentIdx++]
+                segment.isSpeech = (audio.isSpeech > audio.notSpeech)
+                /*  annotate the entire audio chunk  */
+                if (element.segmentIdx >= element.segmentData.length) {
+                    let isSpeech = false
+                    for (const segment of element.segmentData) {
+                        if (segment.isSpeech) {
+                            isSpeech = true
+                            break
+                        }
+                    }
                     element.isSpeech = isSpeech
                     this.queueVAD.touch()
                     this.queueVAD.walk(+1)
@@ -102,14 +119,7 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
         this.vad.start()
         /*  provide Duplex stream and internally attach to VAD  */
-        const vad       = this.vad
-        const cfg       = this.config
-        const queue     = this.queue
-        const queueRecv = this.queueRecv
-        const queueSend = this.queueSend
-        const mode      = this.params.mode
-        let carrySamples = new Float32Array()
-        let carryStart   = Duration.fromDurationLike(0)
+        const self = this
         this.stream = new Stream.Duplex({
             writableObjectMode: true,
             readableObjectMode: true,
@@ -123,38 +133,34 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
                     callback()
                 else {
                     /*  convert audio samples from PCM/I16 to PCM/F32  */
-                    let data = utils.convertBufToF32(chunk.payload, cfg.audioLittleEndian)
-                    let start = chunk.timestampStart
-                    /*  merge previous carry samples  */
-                    if (carrySamples.length > 0) {
-                        start = carryStart
-                        const merged = new Float32Array(carrySamples.length + data.length)
-                        merged.set(carrySamples)
-                        merged.set(data, carrySamples.length)
-                        data = merged
-                        carrySamples = new Float32Array()
-                    }
+                    const data = utils.convertBufToF32(chunk.payload, self.config.audioLittleEndian)
-                    /*  queue audio samples as individual VAD-sized frames
-                        and in parallel send it into the Voice Activity Detection (VAD)  */
-                    const chunkSize = (vadSamplesPerFrame * (cfg.audioSampleRate / vadSampleRateTarget))
+                    /*  segment audio samples as individual VAD-sized frames  */
+                    const segmentData: AudioQueueElementSegment[] = []
+                    const chunkSize = vadSamplesPerFrame * (self.config.audioSampleRate / vadSampleRateTarget)
                     const chunks = Math.trunc(data.length / chunkSize)
                     for (let i = 0; i < chunks; i++) {
                         const frame = data.slice(i * chunkSize, (i + 1) * chunkSize)
-                        const buf = utils.convertF32ToBuf(frame)
-                        const duration = utils.audioBufferDuration(buf)
-                        const end = start.plus(duration)
-                        const chunk = new SpeechFlowChunk(start, end, "final", "audio", buf)
-                        queueRecv.append({ type: "audio-frame", chunk })
-                        vad.processAudio(frame)
-                        start = end
+                        const segment: AudioQueueElementSegment = { data: frame }
+                        segmentData.push(segment)
                     }
+                    if ((chunks * chunkSize) < data.length) {
+                        const frame = new Float32Array(chunkSize)
+                        frame.fill(0)
+                        frame.set(data.slice(chunks * chunkSize, data.length))
+                        const segment: AudioQueueElementSegment = { data: frame }
+                        segmentData.push(segment)
+                    }
+                    /*  queue the results  */
+                    self.queueRecv.append({
+                        type: "audio-frame", chunk,
+                        segmentIdx: 0, segmentData
+                    })
-                    /*  remember new carry samples  */
-                    const bulkLen = chunks * chunkSize
-                    carrySamples = data.slice(bulkLen)
-                    carryStart = start
+                    /*  push segments through Voice Activity Detection (VAD)  */
+                    for (const segment of segmentData)
+                        self.vad!.processAudio(segment.data)
                     callback()
                 }
@@ -162,25 +168,8 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
             /*  receive no more audio chunks (writable side of stream)  */
             final (callback) {
-                /*  flush pending audio chunks  */
-                if (carrySamples.length > 0) {
-                    const chunkSize = (vadSamplesPerFrame * (cfg.audioSampleRate / vadSampleRateTarget))
-                    if (carrySamples.length < chunkSize) {
-                        const merged = new Float32Array(chunkSize)
-                        merged.set(carrySamples)
-                        merged.fill(0.0, carrySamples.length, chunkSize)
-                        carrySamples = merged
-                    }
-                    const buf = utils.convertF32ToBuf(carrySamples)
-                    const duration = utils.audioBufferDuration(buf)
-                    const end = carryStart.plus(duration)
-                    const chunk = new SpeechFlowChunk(carryStart, end, "final", "audio", buf)
-                    queueRecv.append({ type: "audio-frame", chunk })
-                    vad.processAudio(carrySamples)
-                }
                 /*  signal end of file  */
-                queueRecv.append({ type: "audio-eof" })
+                self.queueRecv.append({ type: "audio-eof" })
                 callback()
             },
@@ -192,7 +181,7 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
                     const flushPendingChunks = () => {
                         let pushed = 0
                         while (true) {
-                            const element = queueSend.peek()
+                            const element = self.queueSend.peek()
                             if (element === undefined)
                                 break
                             else if (element.type === "audio-eof") {
@@ -202,19 +191,20 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
                             else if (element.type === "audio-frame"
                                 && element.isSpeech === undefined)
                                 break
-                            queueSend.walk(+1)
+                            self.queueSend.walk(+1)
+                            self.queue.trim()
                             if (element.isSpeech) {
                                 this.push(element.chunk)
                                 pushed++
                             }
-                            else if (mode === "silenced") {
+                            else if (self.params.mode === "silenced") {
                                 const chunk = element.chunk.clone()
                                 const buffer = chunk.payload as Buffer
                                 buffer.fill(0)
                                 this.push(chunk)
                                 pushed++
                             }
-                            else if (mode === "unplugged" && pushed === 0)
+                            else if (self.params.mode === "unplugged" && pushed === 0)
                                 /*  we have to await chunks now, as in unplugged
                                     mode we else would be never called again until
                                     we at least once push a new chunk as the result  */
@@ -224,16 +214,16 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
                     /*  await forthcoming audio chunks  */
                     const awaitForthcomingChunks = () => {
-                        const element = queueSend.peek()
+                        const element = self.queueSend.peek()
                         if (element !== undefined
                             && element.type === "audio-frame"
                             && element.isSpeech !== undefined)
                             flushPendingChunks()
                         else
-                            queue.once("write", awaitForthcomingChunks)
+                            self.queue.once("write", awaitForthcomingChunks)
                     }
-                    const element = queueSend.peek()
+                    const element = self.queueSend.peek()
                     if (element !== undefined && element.type === "audio-eof")
                         this.push(null)
                     else if (element !== undefined
@@ -241,7 +231,7 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
                         && element.isSpeech !== undefined)
                         flushPendingChunks()
                     else
-                        queue.once("write", awaitForthcomingChunks)
+                        self.queue.once("write", awaitForthcomingChunks)
                 }
                 tryToRead()
             }

package/src/speechflow-node-a2a-wav.ts CHANGED Viewed

@@ -7,52 +7,69 @@
 /*  standard dependencies  */
 import Stream           from "node:stream"
-/*  external dependencies  */
-import wav              from "wav"
 /*  internal dependencies  */
-import SpeechFlowNode   from "./speechflow-node"
-import * as utils       from "./speechflow-utils"
-/*  utility class for wrapping a custom stream into a regular Transform stream  */
-class StreamWrapper extends Stream.Transform {
-    private foreignStream: any
-    constructor (foreignStream: any, options: Stream.TransformOptions = {}) {
-        options.readableObjectMode = true
-        options.writableObjectMode = true
-        super(options)
-        this.foreignStream = foreignStream
-        this.foreignStream.on("data", (chunk: any) => {
-            this.push(chunk)
-        })
-        this.foreignStream.on("error", (err: Error) => {
-            this.emit("error", err)
-        })
-        this.foreignStream.on("end", () => {
-            this.push(null)
-        })
-    }
-    _transform (chunk: any, encoding: BufferEncoding, callback: Stream.TransformCallback): void {
-        try {
-            const canContinue = this.foreignStream.write(chunk)
-            if (canContinue)
-                callback()
-            else
-                this.foreignStream.once("drain", callback)
-        }
-        catch (err) {
-            callback(err as Error)
-        }
-    }
-    _flush (callback: Stream.TransformCallback): void {
-        try {
-            if (typeof this.foreignStream.end === "function")
-                this.foreignStream.end()
-            callback()
-        }
-        catch (err) {
-            callback(err as Error)
-        }
+import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
+/*  write WAV header  */
+const writeWavHeader = (
+    length: number,
+    options?: { audioFormat?: number, channels?: number, sampleRate?: number, bitDepth?: number }
+) => {
+    const audioFormat  = options?.audioFormat ?? 0x001 /* PCM */
+    const channels     = options?.channels    ?? 1     /* mono */
+    const sampleRate   = options?.sampleRate  ?? 44100 /* 44KHz */
+    const bitDepth     = options?.bitDepth    ?? 16    /* 16-Bit */
+    const headerLength = 44
+    const dataLength   = length || (4294967295 - 100)
+    const fileSize     = dataLength + headerLength
+    const header       = Buffer.alloc(headerLength)
+    const RIFF         = Buffer.alloc(4, "RIFF")
+    const WAVE         = Buffer.alloc(4, "WAVE")
+    const fmt          = Buffer.alloc(4, "fmt ")
+    const data         = Buffer.alloc(4, "data")
+    const byteRate     = (sampleRate * channels * bitDepth) / 8
+    const blockAlign   = (channels * bitDepth) / 8
+    let offset = 0
+    RIFF.copy(header, offset);                  offset += RIFF.length
+    header.writeUInt32LE(fileSize - 8, offset); offset += 4
+    WAVE.copy(header, offset);                  offset += WAVE.length
+    fmt.copy(header, offset);                   offset += fmt.length
+    header.writeUInt32LE(16, offset);           offset += 4
+    header.writeUInt16LE(audioFormat, offset);  offset += 2
+    header.writeUInt16LE(channels, offset);     offset += 2
+    header.writeUInt32LE(sampleRate, offset);   offset += 4
+    header.writeUInt32LE(byteRate, offset);     offset += 4
+    header.writeUInt16LE(blockAlign, offset);   offset += 2
+    header.writeUInt16LE(bitDepth, offset);     offset += 2
+    data.copy(header, offset);                  offset += data.length
+    header.writeUInt32LE(dataLength, offset);   offset += 4
+    return header
+}
+/*  read WAV header  */
+const readWavHeader = (buffer: Buffer) => {
+    let offset = 0
+    const riffHead     = buffer.subarray(offset, offset + 4).toString(); offset += 4
+    const fileSize     = buffer.readUInt32LE(offset);                    offset += 4
+    const waveHead     = buffer.subarray(offset, offset + 4).toString(); offset += 4
+    const fmtHead      = buffer.subarray(offset, offset + 4).toString(); offset += 4
+    const formatLength = buffer.readUInt32LE(offset);                    offset += 4
+    const audioFormat  = buffer.readUInt16LE(offset);                    offset += 2
+    const channels     = buffer.readUInt16LE(offset);                    offset += 2
+    const sampleRate   = buffer.readUInt32LE(offset);                    offset += 4
+    const byteRate     = buffer.readUInt32LE(offset);                    offset += 4
+    const blockAlign   = buffer.readUInt16LE(offset);                    offset += 2
+    const bitDepth     = buffer.readUInt16LE(offset);                    offset += 2
+    const data         = buffer.subarray(offset, offset + 4).toString(); offset += 4
+    const dataLength   = buffer.readUInt32LE(offset);                    offset += 4
+    return {
+        riffHead, fileSize, waveHead, fmtHead, formatLength, audioFormat,
+        channels, sampleRate, byteRate, blockAlign, bitDepth, data, dataLength
     }
 }
@@ -77,52 +94,72 @@ export default class SpeechFlowNodeWAV extends SpeechFlowNode {
     /*  open node  */
     async open () {
-        if (this.params.mode === "encode") {
-            /*  convert raw/PCM to WAV/PCM  */
-            /*  NOTICE: as this is a continuous stream, the resulting WAV header is not 100%
-                conforming to the WAV standard, as it has to use a zero duration information.
-                This cannot be changed in a stream-based processing.  */
-            const writer = new wav.Writer({
-                format:     0x0001 /* PCM */,
-                channels:   this.config.audioChannels,
-                sampleRate: this.config.audioSampleRate,
-                bitDepth:   this.config.audioBitDepth
-            })
-            this.stream = new StreamWrapper(writer)
-        }
-        else if (this.params.mode === "decode") {
-            /*  convert WAV/PCM to raw/PCM  */
-            const reader = new wav.Reader()
-            reader.on("format", (format: any) => {
-                this.log("info", `WAV audio stream: format=${format.audioFormat === 0x0001 ? "PCM" :
-                    "0x" + (format.audioFormat as number).toString(16).padStart(4, "0")} ` +
-                    `bitDepth=${format.bitDepth} ` +
-                    `signed=${format.signed ? "yes" : "no"} ` +
-                    `endian=${format.endianness} ` +
-                    `sampleRate=${format.sampleRate} ` +
-                    `channels=${format.channels}`)
-                if (format.audioFormat !== 0x0001 /* PCM */)
-                    throw new Error("WAV not based on PCM format")
-                if (format.bitDepth !== 16)
-                    throw new Error("WAV not based on 16 bit samples")
-                if (!format.signed)
-                    throw new Error("WAV not based on signed integers")
-                if (format.endianness !== "LE")
-                    throw new Error("WAV not based on little endianness")
-                if (format.sampleRate !== 48000)
-                    throw new Error("WAV not based on 48Khz sample rate")
-                if (format.channels !== 1)
-                    throw new Error("WAV not based on mono channel")
-            })
-            this.stream = new StreamWrapper(reader)
-        }
-        else
-            throw new Error(`invalid operation mode "${this.params.mode}"`)
-        /*  convert regular stream into object-mode stream  */
-        const wrapper1 = utils.createTransformStreamForWritableSide()
-        const wrapper2 = utils.createTransformStreamForReadableSide("audio", () => this.timeZero)
-        this.stream = Stream.compose(wrapper1, this.stream, wrapper2)
+        /*  establish a transform stream  */
+        const self = this
+        let firstChunk = true
+        this.stream = new Stream.Transform({
+            readableObjectMode: true,
+            writableObjectMode: true,
+            decodeStrings:      false,
+            transform (chunk: SpeechFlowChunk, encoding, callback) {
+                if (!Buffer.isBuffer(chunk.payload))
+                    callback(new Error("invalid chunk payload type"))
+                else if (firstChunk) {
+                    if (self.params.mode === "encode") {
+                        /*  convert raw/PCM to WAV/PCM
+                            (NOTICE: as this is a continuous stream, the
+                            resulting WAV header is not 100% conforming
+                            to the WAV standard, as it has to use a zero
+                            duration information. This cannot be changed in
+                            a stream-based processing.)  */
+                        const headerBuffer = writeWavHeader(0, {
+                            audioFormat: 0x0001 /* PCM */,
+                            channels:    self.config.audioChannels,
+                            sampleRate:  self.config.audioSampleRate,
+                            bitDepth:    self.config.audioBitDepth
+                        })
+                        const headerChunk = chunk.clone()
+                        headerChunk.payload = headerBuffer
+                        this.push(headerChunk)
+                        this.push(chunk)
+                        callback()
+                    }
+                    else if (self.params.mode === "decode") {
+                        /*  convert WAV/PCM to raw/PCM  */
+                        const header = readWavHeader(chunk.payload)
+                        self.log("info", "WAV audio stream: " +
+                            `audioFormat=${header.audioFormat === 0x0001 ? "PCM" :
+                                "0x" + (header.audioFormat as number).toString(16).padStart(4, "0")} ` +
+                            `channels=${header.channels} ` +
+                            `sampleRate=${header.sampleRate} ` +
+                            `bitDepth=${header.bitDepth}`)
+                        if (header.audioFormat !== 0x0001 /* PCM */)
+                            throw new Error("WAV not based on PCM format")
+                        if (header.bitDepth !== 16)
+                            throw new Error("WAV not based on 16 bit samples")
+                        if (header.sampleRate !== 48000)
+                            throw new Error("WAV not based on 48Khz sample rate")
+                        if (header.channels !== 1)
+                            throw new Error("WAV not based on mono channel")
+                        chunk.payload = chunk.payload.subarray(44)
+                        this.push(chunk)
+                        callback()
+                    }
+                    else
+                        throw new Error(`invalid operation mode "${self.params.mode}"`)
+                }
+                else {
+                    /*  pass-through original chunk  */
+                    this.push(chunk)
+                    callback()
+                }
+                firstChunk = false
+            },
+            final (callback) {
+                this.push(null)
+                callback()
+            }
+        })
     }
     /*  close node  */

package/src/speechflow-node-a2t-deepgram.ts CHANGED Viewed

@@ -5,7 +5,7 @@
 */
 /*  standard dependencies  */
-import Stream           from "node:stream"
+import Stream from "node:stream"
 /*  external dependencies  */
 import * as Deepgram          from "@deepgram/sdk"
@@ -65,6 +65,9 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
         /*  create queue for results  */
         const queue = new utils.SingleQueue<SpeechFlowChunk>()
+        /*  create a store for the meta information  */
+        const metastore = new utils.TimeStore<Map<string, any>>()
         /*  connect to Deepgram API  */
         const deepgram = Deepgram.createClient(this.params.key)
         let language = "en"
@@ -86,21 +89,27 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
             smart_format:     true,
             punctuate:        true,
             filler_words:     true,
-            diarize:          true, /* still not used by us */
+            diarize:          false,
             numerals:         true,
             profanity_filter: false
         })
         /*  hook onto Deepgram API events  */
         this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => {
-            const text = (data.channel?.alternatives[0].transcript as string) ?? ""
+            const text = (data.channel?.alternatives[0]?.transcript as string) ?? ""
             if (text === "")
                 this.log("info", `Deepgram: empty/dummy text received (start: ${data.start}s, duration: ${data.duration}s)`)
             else {
                 this.log("info", `Deepgram: text received (start: ${data.start}s, duration: ${data.duration}s): "${text}"`)
                 const start = Duration.fromMillis(data.start * 1000).plus(this.timeZeroOffset)
                 const end   = start.plus({ seconds: data.duration })
-                const chunk = new SpeechFlowChunk(start, end, "final", "text", text)
+                const metas = metastore.fetch(start, end)
+                const meta = metas.reduce((prev: Map<string, any>, curr: Map<string, any>) => {
+                    curr.forEach((val, key) => { prev.set(key, val) })
+                    return prev
+                }, new Map<string, any>())
+                metastore.prune(start)
+                const chunk = new SpeechFlowChunk(start, end, "final", "text", text, meta)
                 queue.write(chunk)
             }
         })
@@ -180,6 +189,8 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
                     if (chunk.payload.byteLength > 0) {
                         log("info", `Deepgram: send data (${chunk.payload.byteLength} bytes)`)
                         initTimeoutStart()
+                        if (chunk.meta.size > 0)
+                            metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
                         dg.send(chunk.payload.buffer) /* intentionally discard all time information  */
                     }
                     callback()

package/src/speechflow-node-t2a-elevenlabs.ts CHANGED Viewed

@@ -5,7 +5,7 @@
 */
 /*  standard dependencies  */
-import Stream                from "node:stream"
+import Stream from "node:stream"
 /*  external dependencies  */
 import * as ElevenLabs       from "@elevenlabs/elevenlabs-js"
@@ -30,11 +30,13 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
         /*  declare node configuration parameters  */
         this.configure({
-            key:      { type: "string", val: process.env.SPEECHFLOW_ELEVENLABS_KEY },
-            voice:    { type: "string", val: "Brian",   pos: 0, match: /^(?:.+)$/ },
-            language: { type: "string", val: "en",      pos: 1, match: /^(?:de|en)$/ },
-            speed:    { type: "number", val: 1.05,      pos: 2, match: (n: number) => n >= 0.7 && n <= 1.2 },
-            optimize: { type: "string", val: "latency", pos: 3, match: /^(?:latency|quality)$/ }
+            key:        { type: "string", val: process.env.SPEECHFLOW_ELEVENLABS_KEY },
+            voice:      { type: "string", val: "Brian",   pos: 0, match: /^(?:Brittney|Cassidy|Leonie|Mark|Brian)$/ },
+            language:   { type: "string", val: "en",      pos: 1, match: /^(?:de|en)$/ },
+            speed:      { type: "number", val: 1.00,      pos: 2, match: (n: number) => n >= 0.7 && n <= 1.2 },
+            stability:  { type: "number", val: 0.5,       pos: 3, match: (n: number) => n >= 0.0 && n <= 1.0 },
+            similarity: { type: "number", val: 0.75,      pos: 4, match: (n: number) => n >= 0.0 && n <= 1.0 },
+            optimize:   { type: "string", val: "latency", pos: 5, match: /^(?:latency|quality)$/ }
         })
         /*  declare node input/output format  */
@@ -90,7 +92,7 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
         /*  perform text-to-speech operation with Elevenlabs API  */
         const model = this.params.optimize === "quality" ?
-            "eleven_multilingual_v2" :
+            "eleven_turbo_v2_5" :
             "eleven_flash_v2_5"
         const speechStream = (text: string) => {
             this.log("info", `ElevenLabs: send text "${text}"`)
@@ -101,7 +103,9 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
                 outputFormat:     `pcm_${maxSampleRate}` as ElevenLabs.ElevenLabs.OutputFormat,
                 seed:             815, /* arbitrary, but fixated by us */
                 voiceSettings: {
-                    speed:        this.params.speed
+                    speed:           this.params.speed,
+                    stability:       this.params.stability,
+                    similarityBoost: this.params.similarity
                 }
             }, {
                 timeoutInSeconds: 30,
@@ -128,6 +132,7 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
                 if (Buffer.isBuffer(chunk.payload))
                     callback(new Error("invalid chunk payload type"))
                 else {
+                    log("info", `ElevenLabs: send text: ${JSON.stringify(chunk.payload)}`)
                     speechStream(chunk.payload).then((stream) => {
                         getStreamAsBuffer(stream).then((buffer) => {
                             const bufferResampled = resampler.processChunk(buffer)

package/src/speechflow-node-t2a-kokoro.ts CHANGED Viewed

@@ -5,11 +5,11 @@
 */
 /*  standard dependencies  */
-import Stream                from "node:stream"
+import Stream from "node:stream"
 /*  external dependencies  */
-import { KokoroTTS }         from "kokoro-js"
-import SpeexResampler        from "speex-resampler"
+import { KokoroTTS }  from "kokoro-js"
+import SpeexResampler from "speex-resampler"
 /*  internal dependencies  */
 import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"

package/src/speechflow-node-t2t-deepl.ts CHANGED Viewed

@@ -5,10 +5,10 @@
 */
 /*  standard dependencies  */
-import Stream           from "node:stream"
+import Stream from "node:stream"
 /*  external dependencies  */
-import * as DeepL       from "deepl-node"
+import * as DeepL from "deepl-node"
 /*  internal dependencies  */
 import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"

package/src/speechflow-node-t2t-format.ts CHANGED Viewed

@@ -5,10 +5,10 @@
 */
 /*  standard dependencies  */
-import Stream           from "node:stream"
+import Stream   from "node:stream"
 /*  external dependencies  */
-import wrapText         from "wrap-text"
+import wrapText from "wrap-text"
 /*  internal dependencies  */
 import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"

package/src/speechflow-node-t2t-ollama.ts CHANGED Viewed

@@ -5,10 +5,10 @@
 */
 /*  standard dependencies  */
-import Stream           from "node:stream"
+import Stream     from "node:stream"
 /*  external dependencies  */
-import { Ollama }       from "ollama"
+import { Ollama } from "ollama"
 /*  internal dependencies  */
 import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"

package/src/speechflow-node-t2t-openai.ts CHANGED Viewed

@@ -5,10 +5,10 @@
 */
 /*  standard dependencies  */
-import Stream           from "node:stream"
+import Stream from "node:stream"
 /*  external dependencies  */
-import OpenAI           from "openai"
+import OpenAI from "openai"
 /*  internal dependencies  */
 import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"