speechflow 2.2.1 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/{etc/claude.md → AGENTS.md} +8 -3
- package/CHANGELOG.md +98 -1
- package/README.md +28 -4
- package/etc/speechflow.yaml +3 -1
- package/etc/stx.conf +1 -1
- package/package.json +6 -6
- package/speechflow-cli/dst/speechflow-main-api.d.ts +2 -1
- package/speechflow-cli/dst/speechflow-main-api.js +57 -16
- package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-cli.js +2 -2
- package/speechflow-cli/dst/speechflow-main-config.js +1 -1
- package/speechflow-cli/dst/speechflow-main-graph.js +55 -21
- package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-nodes.js +1 -1
- package/speechflow-cli/dst/speechflow-main-status.js +6 -3
- package/speechflow-cli/dst/speechflow-main-status.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +17 -19
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +25 -8
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +16 -13
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js +6 -5
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +7 -7
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js +7 -4
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js +21 -16
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gtcrn-wt.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gtcrn.js +33 -11
- package/speechflow-cli/dst/speechflow-node-a2a-gtcrn.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js +2 -2
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-pitch.js +4 -3
- package/speechflow-cli/dst/speechflow-node-a2a-pitch.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js +2 -2
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +19 -11
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js +8 -8
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js +33 -29
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js +6 -5
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.d.ts +2 -1
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +42 -23
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +13 -5
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-google.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2t-google.js +8 -2
- package/speechflow-cli/dst/speechflow-node-a2t-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js +33 -27
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +16 -5
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +17 -5
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-google.js +17 -5
- package/speechflow-cli/dst/speechflow-node-t2a-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kitten.d.ts +15 -0
- package/speechflow-cli/dst/speechflow-node-t2a-kitten.js +194 -0
- package/speechflow-cli/dst/speechflow-node-t2a-kitten.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +24 -10
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-openai.js +17 -5
- package/speechflow-cli/dst/speechflow-node-t2a-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js +22 -7
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-amazon.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-format.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-google.js +4 -2
- package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-modify.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-opus.js +10 -2
- package/speechflow-cli/dst/speechflow-node-t2t-opus.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-profanity.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.d.ts +3 -0
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +160 -57
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +34 -14
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-summary.js +3 -3
- package/speechflow-cli/dst/speechflow-node-t2t-summary.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-translate.js +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js +3 -2
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-device.js +18 -7
- package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-exec.js +27 -15
- package/speechflow-cli/dst/speechflow-node-xio-exec.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-file.js +13 -7
- package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +25 -12
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-vban.js +32 -20
- package/speechflow-cli/dst/speechflow-node-xio-vban.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.js +84 -63
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-websocket.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js +75 -20
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node.js +5 -7
- package/speechflow-cli/dst/speechflow-node.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-audio-wt.js +31 -5
- package/speechflow-cli/dst/speechflow-util-audio-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-audio.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-util-audio.js +28 -15
- package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-error.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-util-error.js +2 -2
- package/speechflow-cli/dst/speechflow-util-error.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-llm.js +13 -3
- package/speechflow-cli/dst/speechflow-util-llm.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-misc.d.ts +3 -2
- package/speechflow-cli/dst/speechflow-util-misc.js +63 -6
- package/speechflow-cli/dst/speechflow-util-misc.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-queue.d.ts +9 -17
- package/speechflow-cli/dst/speechflow-util-queue.js +98 -78
- package/speechflow-cli/dst/speechflow-util-queue.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-stream.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-util-stream.js +35 -8
- package/speechflow-cli/dst/speechflow-util-stream.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util.js +1 -1
- package/speechflow-cli/dst/speechflow.d.ts +1 -1
- package/speechflow-cli/dst/speechflow.js +1 -1
- package/speechflow-cli/etc/eslint.mjs +1 -1
- package/speechflow-cli/etc/oxlint.jsonc +2 -1
- package/speechflow-cli/etc/stx.conf +8 -2
- package/speechflow-cli/package.d/@ericedouard+vad-node-realtime+0.2.0.patch +2 -1
- package/speechflow-cli/package.d/@typescript-eslint+typescript-estree+8.57.2.patch +12 -0
- package/speechflow-cli/package.d/kitten-tts-js+0.1.2.patch +24 -0
- package/speechflow-cli/package.d/speex-resampler+3.0.1.patch +56 -0
- package/speechflow-cli/package.json +40 -30
- package/speechflow-cli/src/lib.d.ts +19 -1
- package/speechflow-cli/src/speechflow-main-api.ts +64 -19
- package/speechflow-cli/src/speechflow-main-cli.ts +2 -2
- package/speechflow-cli/src/speechflow-main-config.ts +1 -1
- package/speechflow-cli/src/speechflow-main-graph.ts +56 -22
- package/speechflow-cli/src/speechflow-main-nodes.ts +1 -1
- package/speechflow-cli/src/speechflow-main-status.ts +6 -3
- package/speechflow-cli/src/speechflow-main.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +19 -20
- package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +31 -13
- package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +17 -13
- package/speechflow-cli/src/speechflow-node-a2a-expander.ts +6 -5
- package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +9 -8
- package/speechflow-cli/src/speechflow-node-a2a-filler.ts +8 -4
- package/speechflow-cli/src/speechflow-node-a2a-gain.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2a-gender.ts +22 -18
- package/speechflow-cli/src/speechflow-node-a2a-gtcrn-wt.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2a-gtcrn.ts +43 -16
- package/speechflow-cli/src/speechflow-node-a2a-meter.ts +2 -2
- package/speechflow-cli/src/speechflow-node-a2a-mute.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2a-pitch.ts +4 -3
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise-wt.ts +2 -2
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +24 -12
- package/speechflow-cli/src/speechflow-node-a2a-speex.ts +10 -9
- package/speechflow-cli/src/speechflow-node-a2a-vad.ts +38 -31
- package/speechflow-cli/src/speechflow-node-a2a-wav.ts +6 -5
- package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +47 -25
- package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +17 -6
- package/speechflow-cli/src/speechflow-node-a2t-google.ts +12 -4
- package/speechflow-cli/src/speechflow-node-a2t-openai.ts +39 -31
- package/speechflow-cli/src/speechflow-node-t2a-amazon.ts +16 -5
- package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +17 -5
- package/speechflow-cli/src/speechflow-node-t2a-google.ts +17 -5
- package/speechflow-cli/src/speechflow-node-t2a-kitten.ts +178 -0
- package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +24 -10
- package/speechflow-cli/src/speechflow-node-t2a-openai.ts +17 -5
- package/speechflow-cli/src/speechflow-node-t2a-supertonic.ts +22 -7
- package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-format.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-google.ts +4 -2
- package/speechflow-cli/src/speechflow-node-t2t-modify.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-opus.ts +10 -2
- package/speechflow-cli/src/speechflow-node-t2t-profanity.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-punctuation.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-sentence.ts +215 -62
- package/speechflow-cli/src/speechflow-node-t2t-spellcheck.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +39 -15
- package/speechflow-cli/src/speechflow-node-t2t-summary.ts +3 -3
- package/speechflow-cli/src/speechflow-node-t2t-translate.ts +1 -1
- package/speechflow-cli/src/speechflow-node-x2x-filter.ts +4 -3
- package/speechflow-cli/src/speechflow-node-x2x-trace.ts +1 -1
- package/speechflow-cli/src/speechflow-node-xio-device.ts +21 -7
- package/speechflow-cli/src/speechflow-node-xio-exec.ts +30 -16
- package/speechflow-cli/src/speechflow-node-xio-file.ts +15 -7
- package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +28 -15
- package/speechflow-cli/src/speechflow-node-xio-vban.ts +35 -22
- package/speechflow-cli/src/speechflow-node-xio-webrtc.ts +92 -70
- package/speechflow-cli/src/speechflow-node-xio-websocket.ts +79 -22
- package/speechflow-cli/src/speechflow-node.ts +7 -8
- package/speechflow-cli/src/speechflow-util-audio-wt.ts +46 -7
- package/speechflow-cli/src/speechflow-util-audio.ts +31 -17
- package/speechflow-cli/src/speechflow-util-error.ts +3 -3
- package/speechflow-cli/src/speechflow-util-llm.ts +14 -3
- package/speechflow-cli/src/speechflow-util-misc.ts +63 -6
- package/speechflow-cli/src/speechflow-util-queue.ts +103 -81
- package/speechflow-cli/src/speechflow-util-stream.ts +40 -8
- package/speechflow-cli/src/speechflow-util.ts +1 -1
- package/speechflow-cli/src/speechflow.ts +1 -1
- package/speechflow-ui-db/dst/index.html +1 -1
- package/speechflow-ui-db/dst/index.js +15 -15
- package/speechflow-ui-db/etc/eslint.mjs +1 -1
- package/speechflow-ui-db/etc/oxlint.jsonc +1 -1
- package/speechflow-ui-db/etc/stx.conf +1 -1
- package/speechflow-ui-db/etc/stylelint.js +1 -1
- package/speechflow-ui-db/etc/stylelint.yaml +1 -1
- package/speechflow-ui-db/etc/vite-client.mts +1 -1
- package/speechflow-ui-db/package.d/@typescript-eslint+typescript-estree+8.57.2.patch +12 -0
- package/speechflow-ui-db/package.json +22 -16
- package/speechflow-ui-db/src/app.styl +1 -1
- package/speechflow-ui-db/src/app.vue +1 -1
- package/speechflow-ui-db/src/index.html +1 -1
- package/speechflow-ui-db/src/index.ts +1 -1
- package/speechflow-ui-st/dst/index.html +1 -1
- package/speechflow-ui-st/dst/index.js +31 -31
- package/speechflow-ui-st/etc/eslint.mjs +1 -1
- package/speechflow-ui-st/etc/oxlint.jsonc +1 -1
- package/speechflow-ui-st/etc/stx.conf +1 -1
- package/speechflow-ui-st/etc/stylelint.js +1 -1
- package/speechflow-ui-st/etc/stylelint.yaml +1 -1
- package/speechflow-ui-st/etc/vite-client.mts +1 -1
- package/speechflow-ui-st/package.d/@typescript-eslint+typescript-estree+8.57.2.patch +12 -0
- package/speechflow-ui-st/package.json +23 -17
- package/speechflow-ui-st/src/app.styl +1 -1
- package/speechflow-ui-st/src/app.vue +1 -1
- package/speechflow-ui-st/src/index.html +1 -1
- package/speechflow-ui-st/src/index.ts +1 -1
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -39,24 +39,23 @@ class ExpanderProcessor extends AudioWorkletProcessor {
|
|
|
39
39
|
if (ratio <= 1.0)
|
|
40
40
|
return 0
|
|
41
41
|
|
|
42
|
-
/* determine
|
|
42
|
+
/* determine knee boundaries (symmetric around threshold) */
|
|
43
43
|
const halfKnee = kneeDB * 0.5
|
|
44
44
|
const belowKnee = levelDB < (thresholdDB - halfKnee)
|
|
45
|
-
const
|
|
45
|
+
const aboveKnee = levelDB > (thresholdDB + halfKnee)
|
|
46
46
|
|
|
47
|
-
/* short-circuit for no expansion (above
|
|
48
|
-
if (
|
|
47
|
+
/* short-circuit for no expansion (above knee) */
|
|
48
|
+
if (aboveKnee)
|
|
49
49
|
return 0
|
|
50
50
|
|
|
51
|
-
/* apply soft-knee */
|
|
51
|
+
/* apply soft-knee (standard textbook quadratic) */
|
|
52
52
|
if (kneeDB > 0 && !belowKnee) {
|
|
53
|
-
const
|
|
54
|
-
|
|
55
|
-
return idealGainDB * x * x
|
|
53
|
+
const d = thresholdDB + halfKnee - levelDB
|
|
54
|
+
return (1.0 - ratio) * d * d / (2.0 * kneeDB)
|
|
56
55
|
}
|
|
57
56
|
|
|
58
57
|
/* determine target level */
|
|
59
|
-
const targetOut = thresholdDB + (levelDB - thresholdDB)
|
|
58
|
+
const targetOut = thresholdDB + (levelDB - thresholdDB) * ratio
|
|
60
59
|
|
|
61
60
|
/* return gain difference */
|
|
62
61
|
return targetOut - levelDB
|
|
@@ -97,16 +96,17 @@ class ExpanderProcessor extends AudioWorkletProcessor {
|
|
|
97
96
|
const releaseS = Math.max(parameters["release"][0], 1 / this.sampleRate)
|
|
98
97
|
const makeupDB = parameters["makeup"][0]
|
|
99
98
|
|
|
100
|
-
/* update envelope per channel */
|
|
99
|
+
/* update envelope per channel and collect RMS values */
|
|
100
|
+
const rms = Array.from<number>({ length: nCh })
|
|
101
101
|
for (let ch = 0; ch < nCh; ch++)
|
|
102
|
-
|
|
102
|
+
rms[ch] = util.updateEnvelopeForChannel(this.env, this.sampleRate, ch, input[ch], attackS, releaseS)
|
|
103
103
|
|
|
104
104
|
/* determine linear value from decibel makeup value */
|
|
105
105
|
const makeUpLin = util.dB2lin(makeupDB)
|
|
106
106
|
|
|
107
107
|
/* iterate over all channels */
|
|
108
108
|
for (let ch = 0; ch < nCh; ch++) {
|
|
109
|
-
const levelDB = util.lin2dB(
|
|
109
|
+
const levelDB = util.lin2dB(rms[ch])
|
|
110
110
|
const gainDB = this.gainDBFor(levelDB, thresholdDB, ratio, kneeDB)
|
|
111
111
|
let gainLin = util.dB2lin(gainDB) * makeUpLin
|
|
112
112
|
|
|
@@ -117,6 +117,10 @@ class ExpanderProcessor extends AudioWorkletProcessor {
|
|
|
117
117
|
gainLin *= util.dB2lin(neededLiftDB)
|
|
118
118
|
}
|
|
119
119
|
|
|
120
|
+
/* guard against IEEE 754 edge case (0 * Infinity = NaN on silence) */
|
|
121
|
+
if (!Number.isFinite(gainLin))
|
|
122
|
+
gainLin = 0
|
|
123
|
+
|
|
120
124
|
/* apply gain change to channel */
|
|
121
125
|
const inp = input[ch]
|
|
122
126
|
const out = output[ch]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -168,7 +168,7 @@ export default class SpeechFlowNodeA2AExpander extends SpeechFlowNode {
|
|
|
168
168
|
callback(new Error("expander not initialized"))
|
|
169
169
|
else {
|
|
170
170
|
/* expand chunk */
|
|
171
|
-
const payload = util.convertBufToI16(chunk.payload)
|
|
171
|
+
const payload = util.convertBufToI16(chunk.payload, self.config.audioLittleEndian)
|
|
172
172
|
self.expander.process(payload).then((result) => {
|
|
173
173
|
if (self.closing) {
|
|
174
174
|
callback(new Error("stream already destroyed"))
|
|
@@ -176,9 +176,10 @@ export default class SpeechFlowNodeA2AExpander extends SpeechFlowNode {
|
|
|
176
176
|
}
|
|
177
177
|
|
|
178
178
|
/* take over expanded data */
|
|
179
|
-
const payload = util.convertI16ToBuf(result)
|
|
180
|
-
|
|
181
|
-
|
|
179
|
+
const payload = util.convertI16ToBuf(result, self.config.audioLittleEndian)
|
|
180
|
+
const chunkNew = chunk.clone()
|
|
181
|
+
chunkNew.payload = payload
|
|
182
|
+
this.push(chunkNew)
|
|
182
183
|
callback()
|
|
183
184
|
}).catch((error: unknown) => {
|
|
184
185
|
if (self.closing)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -100,18 +100,13 @@ export default class SpeechFlowNodeA2AFFMPEG extends SpeechFlowNode {
|
|
|
100
100
|
|
|
101
101
|
/* wrap streams with conversions for chunk vs plain audio */
|
|
102
102
|
const wrapper1 = util.createTransformStreamForWritableSide("audio", 1)
|
|
103
|
-
const wrapper2 = util.createTransformStreamForReadableSide("audio", () => this.timeZero
|
|
103
|
+
const wrapper2 = util.createTransformStreamForReadableSide("audio", () => this.timeZero, undefined,
|
|
104
|
+
this.config.audioSampleRate, this.config.audioBitDepth, this.config.audioChannels)
|
|
104
105
|
this.stream = Stream.compose(wrapper1, ffmpegStream, wrapper2)
|
|
105
106
|
}
|
|
106
107
|
|
|
107
108
|
/* close node */
|
|
108
109
|
async close () {
|
|
109
|
-
/* shutdown stream */
|
|
110
|
-
if (this.stream !== null) {
|
|
111
|
-
await util.destroyStream(this.stream)
|
|
112
|
-
this.stream = null
|
|
113
|
-
}
|
|
114
|
-
|
|
115
110
|
/* shutdown FFmpeg */
|
|
116
111
|
if (this.ffmpeg !== null) {
|
|
117
112
|
util.run("stopping FFmpeg process",
|
|
@@ -119,6 +114,12 @@ export default class SpeechFlowNodeA2AFFMPEG extends SpeechFlowNode {
|
|
|
119
114
|
() => {})
|
|
120
115
|
this.ffmpeg = null
|
|
121
116
|
}
|
|
117
|
+
|
|
118
|
+
/* shutdown stream */
|
|
119
|
+
if (this.stream !== null) {
|
|
120
|
+
await util.destroyStream(this.stream)
|
|
121
|
+
this.stream = null
|
|
122
|
+
}
|
|
122
123
|
}
|
|
123
124
|
}
|
|
124
125
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -196,6 +196,12 @@ export default class SpeechFlowNodeA2AFiller extends SpeechFlowNode {
|
|
|
196
196
|
self.filler.done()
|
|
197
197
|
await util.sleep(10)
|
|
198
198
|
|
|
199
|
+
/* re-check after await (close() may have run during sleep) */
|
|
200
|
+
if (self.closing || self.sendQueue === null) {
|
|
201
|
+
callback()
|
|
202
|
+
return
|
|
203
|
+
}
|
|
204
|
+
|
|
199
205
|
/* signal end of stream */
|
|
200
206
|
self.sendQueue.write(null)
|
|
201
207
|
}
|
|
@@ -221,10 +227,8 @@ export default class SpeechFlowNodeA2AFiller extends SpeechFlowNode {
|
|
|
221
227
|
self.log("info", "received EOF signal")
|
|
222
228
|
this.push(null)
|
|
223
229
|
}
|
|
224
|
-
else if (!(chunk.payload
|
|
230
|
+
else if (!Buffer.isBuffer(chunk.payload))
|
|
225
231
|
self.log("warning", "invalid chunk (expected audio buffer)")
|
|
226
|
-
this.push(null)
|
|
227
|
-
}
|
|
228
232
|
else {
|
|
229
233
|
self.log("debug", `received data (${chunk.payload.byteLength} bytes)`)
|
|
230
234
|
this.push(chunk)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -105,10 +105,13 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
|
|
|
105
105
|
device: "auto",
|
|
106
106
|
progress_callback: progressCallback
|
|
107
107
|
})
|
|
108
|
+
const ac = new AbortController()
|
|
108
109
|
this.classifier = await Promise.race([
|
|
109
110
|
pipelinePromise,
|
|
110
|
-
util.timeout(30 * 1000, "model initialization timeout")
|
|
111
|
-
])
|
|
111
|
+
util.timeout(30 * 1000, "model initialization timeout", ac.signal)
|
|
112
|
+
]).finally(() => {
|
|
113
|
+
ac.abort()
|
|
114
|
+
}) as Transformers.AudioClassificationPipeline
|
|
112
115
|
}
|
|
113
116
|
catch (error) {
|
|
114
117
|
if (this.progressInterval) {
|
|
@@ -147,13 +150,13 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
|
|
|
147
150
|
return genderLast
|
|
148
151
|
|
|
149
152
|
/* classify audio */
|
|
150
|
-
const
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
153
|
+
const ac = new AbortController()
|
|
154
|
+
const classified = await Promise.race([
|
|
155
|
+
this.classifier(data) as Promise<Transformers.AudioClassificationOutput>,
|
|
156
|
+
util.timeout(30 * 1000, "classification timeout", ac.signal)
|
|
157
|
+
]).finally(() => {
|
|
158
|
+
ac.abort()
|
|
159
|
+
})
|
|
157
160
|
const c1 = classified.find((c) => c.label === "male")
|
|
158
161
|
const c2 = classified.find((c) => c.label === "female")
|
|
159
162
|
const male = c1 ? c1.score : 0.0
|
|
@@ -199,7 +202,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
|
|
|
199
202
|
const element = this.queueAC.peek(pos)
|
|
200
203
|
if (element === undefined || element.type !== "audio-frame")
|
|
201
204
|
break
|
|
202
|
-
if ((samples + element.data.length)
|
|
205
|
+
if ((samples + element.data.length) <= frameWindowSamples) {
|
|
203
206
|
data.set(element.data, samples)
|
|
204
207
|
samples += element.data.length
|
|
205
208
|
}
|
|
@@ -224,12 +227,13 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
|
|
|
224
227
|
catch (error) {
|
|
225
228
|
this.log("error", `gender classification error: ${error}`)
|
|
226
229
|
}
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
230
|
+
finally {
|
|
231
|
+
/* re-initiate working off round */
|
|
232
|
+
workingOff = false
|
|
233
|
+
if (!this.closing) {
|
|
234
|
+
this.workingOffTimer = setTimeout(workOffQueue, 100)
|
|
235
|
+
this.queue.once("write", workOffQueue)
|
|
236
|
+
}
|
|
233
237
|
}
|
|
234
238
|
}
|
|
235
239
|
this.queue.once("write", workOffQueue)
|
|
@@ -313,7 +317,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
|
|
|
313
317
|
else if (element.type === "audio-frame"
|
|
314
318
|
&& element.gender === undefined)
|
|
315
319
|
break
|
|
316
|
-
const duration = util.audioArrayDuration(element.data)
|
|
320
|
+
const duration = util.audioArrayDuration(element.data, sampleRateTarget)
|
|
317
321
|
const fmtTime = (t: Duration) => t.toFormat("hh:mm:ss.SSS")
|
|
318
322
|
const times = `start: ${fmtTime(element.chunk.timestampStart)}, ` +
|
|
319
323
|
`end: ${fmtTime(element.chunk.timestampEnd)}`
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -114,17 +114,27 @@ export default class SpeechFlowNodeA2AGTCRN extends SpeechFlowNode {
|
|
|
114
114
|
})
|
|
115
115
|
})
|
|
116
116
|
|
|
117
|
-
/*
|
|
118
|
-
const pending = new Map<string,
|
|
117
|
+
/* track pending promises */
|
|
118
|
+
const pending = new Map<string, {
|
|
119
|
+
resolve: (arr: Float32Array<ArrayBuffer>) => void,
|
|
120
|
+
reject: (err: Error) => void
|
|
121
|
+
}>()
|
|
122
|
+
|
|
123
|
+
/* reject all pending promises on worker exit */
|
|
119
124
|
this.worker.on("exit", () => {
|
|
125
|
+
const err = new Error("worker terminated")
|
|
126
|
+
for (const cb of pending.values())
|
|
127
|
+
cb.reject(err)
|
|
120
128
|
pending.clear()
|
|
121
129
|
})
|
|
130
|
+
|
|
131
|
+
/* receive message from worker */
|
|
122
132
|
this.worker.on("message", (msg: any) => {
|
|
123
133
|
if (typeof msg === "object" && msg !== null && msg.type === "process-done") {
|
|
124
134
|
const cb = pending.get(msg.id)
|
|
125
135
|
pending.delete(msg.id)
|
|
126
136
|
if (cb)
|
|
127
|
-
cb(msg.data)
|
|
137
|
+
cb.resolve(msg.data)
|
|
128
138
|
else
|
|
129
139
|
this.log("warning", `GTCRN worker thread sent back unexpected id: ${msg.id}`)
|
|
130
140
|
}
|
|
@@ -140,8 +150,8 @@ export default class SpeechFlowNodeA2AGTCRN extends SpeechFlowNode {
|
|
|
140
150
|
if (this.closing)
|
|
141
151
|
return samples
|
|
142
152
|
const id = `${seq++}`
|
|
143
|
-
return new Promise<Float32Array<ArrayBuffer>>((resolve) => {
|
|
144
|
-
pending.set(id,
|
|
153
|
+
return new Promise<Float32Array<ArrayBuffer>>((resolve, reject) => {
|
|
154
|
+
pending.set(id, { resolve, reject })
|
|
145
155
|
this.worker!.postMessage({ type: "process", id, samples }, [ samples.buffer ])
|
|
146
156
|
})
|
|
147
157
|
}
|
|
@@ -161,24 +171,37 @@ export default class SpeechFlowNodeA2AGTCRN extends SpeechFlowNode {
|
|
|
161
171
|
callback(new Error("invalid chunk payload type"))
|
|
162
172
|
else {
|
|
163
173
|
/* resample Buffer from 48KHz (SpeechFlow) to 16KHz (GTCRN) */
|
|
164
|
-
|
|
174
|
+
if (self.resamplerDown === null) {
|
|
175
|
+
callback(new Error("resamplerDown already destroyed"))
|
|
176
|
+
return
|
|
177
|
+
}
|
|
178
|
+
const resampledDown = self.resamplerDown.processChunk(chunk.payload)
|
|
165
179
|
|
|
166
180
|
/* convert Buffer into Float32Array */
|
|
167
181
|
const payload = util.convertBufToF32(resampledDown)
|
|
168
182
|
|
|
169
183
|
/* process with GTCRN */
|
|
170
184
|
workerProcess(payload).then((result: Float32Array<ArrayBuffer>) => {
|
|
185
|
+
/* short-circuit if already closing */
|
|
186
|
+
if (self.closing) {
|
|
187
|
+
callback()
|
|
188
|
+
return
|
|
189
|
+
}
|
|
190
|
+
|
|
171
191
|
/* convert Float32Array into Buffer */
|
|
172
192
|
const buf = util.convertF32ToBuf(result)
|
|
173
193
|
|
|
174
194
|
/* resample Buffer from 16KHz (GTCRN) back to 48KHz (SpeechFlow) */
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
195
|
+
if (self.resamplerUp === null) {
|
|
196
|
+
callback(new Error("resamplerUp already destroyed"))
|
|
197
|
+
return
|
|
198
|
+
}
|
|
199
|
+
const resampledUp = self.resamplerUp.processChunk(buf)
|
|
200
|
+
|
|
201
|
+
/* forward cloned chunk with updated payload */
|
|
202
|
+
const chunkNew = chunk.clone()
|
|
203
|
+
chunkNew.payload = resampledUp
|
|
204
|
+
this.push(chunkNew)
|
|
182
205
|
callback()
|
|
183
206
|
}).catch((err: unknown) => {
|
|
184
207
|
const error = util.ensureError(err)
|
|
@@ -211,9 +234,13 @@ export default class SpeechFlowNodeA2AGTCRN extends SpeechFlowNode {
|
|
|
211
234
|
}
|
|
212
235
|
|
|
213
236
|
/* destroy resamplers */
|
|
214
|
-
if (this.resamplerDown !== null)
|
|
237
|
+
if (this.resamplerDown !== null) {
|
|
238
|
+
this.resamplerDown.destroy()
|
|
215
239
|
this.resamplerDown = null
|
|
216
|
-
|
|
240
|
+
}
|
|
241
|
+
if (this.resamplerUp !== null) {
|
|
242
|
+
this.resamplerUp.destroy()
|
|
217
243
|
this.resamplerUp = null
|
|
244
|
+
}
|
|
218
245
|
}
|
|
219
246
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -81,7 +81,7 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
|
|
|
81
81
|
|
|
82
82
|
/* grab the accumulated chunk data */
|
|
83
83
|
const chunkData = this.chunkBuffer
|
|
84
|
-
this.chunkBuffer = chunkData.
|
|
84
|
+
this.chunkBuffer = chunkData.slice(samplesPerChunk)
|
|
85
85
|
|
|
86
86
|
/* update internal audio sample sliding window for LUFS-M */
|
|
87
87
|
if (chunkData.length > sampleWindow.length)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -185,8 +185,9 @@ export default class SpeechFlowNodeA2APitch extends SpeechFlowNode {
|
|
|
185
185
|
|
|
186
186
|
/* take over pitch-shifted data */
|
|
187
187
|
const payload = util.convertI16ToBuf(result, self.config.audioLittleEndian)
|
|
188
|
-
|
|
189
|
-
|
|
188
|
+
const chunkNew = chunk.clone()
|
|
189
|
+
chunkNew.payload = payload
|
|
190
|
+
this.push(chunkNew)
|
|
190
191
|
callback()
|
|
191
192
|
}).catch((error: unknown) => {
|
|
192
193
|
if (self.closing)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -46,7 +46,7 @@ parentPort!.on("message", (msg) => {
|
|
|
46
46
|
/* convert back Float32Array to Int16Array */
|
|
47
47
|
const i16 = new Int16Array(data.length)
|
|
48
48
|
for (let i = 0; i < data.length; i++)
|
|
49
|
-
i16[i] = Math.round(f32a[i])
|
|
49
|
+
i16[i] = Math.max(-32768, Math.min(32767, Math.round(f32a[i])))
|
|
50
50
|
|
|
51
51
|
/* send processed frame back to parent */
|
|
52
52
|
parentPort!.postMessage({ type: "process-done", id, data: i16 }, [ i16.buffer ])
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -71,14 +71,27 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
|
|
|
71
71
|
})
|
|
72
72
|
})
|
|
73
73
|
|
|
74
|
+
/* track pending promises */
|
|
75
|
+
const pending = new Map<string, {
|
|
76
|
+
resolve: (arr: Int16Array<ArrayBuffer>) => void,
|
|
77
|
+
reject: (err: Error) => void
|
|
78
|
+
}>()
|
|
79
|
+
|
|
80
|
+
/* reject all pending promises on worker exit */
|
|
81
|
+
this.worker.on("exit", () => {
|
|
82
|
+
const err = new Error("worker terminated")
|
|
83
|
+
for (const cb of pending.values())
|
|
84
|
+
cb.reject(err)
|
|
85
|
+
pending.clear()
|
|
86
|
+
})
|
|
87
|
+
|
|
74
88
|
/* receive message from worker */
|
|
75
|
-
const pending = new Map<string, (arr: Int16Array<ArrayBuffer>) => void>()
|
|
76
89
|
this.worker.on("message", (msg: any) => {
|
|
77
90
|
if (typeof msg === "object" && msg !== null && msg.type === "process-done") {
|
|
78
91
|
const cb = pending.get(msg.id)
|
|
79
92
|
pending.delete(msg.id)
|
|
80
93
|
if (cb)
|
|
81
|
-
cb(msg.data)
|
|
94
|
+
cb.resolve(msg.data)
|
|
82
95
|
else
|
|
83
96
|
this.log("warning", `RNNoise worker thread sent back unexpected id: ${msg.id}`)
|
|
84
97
|
}
|
|
@@ -92,8 +105,8 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
|
|
|
92
105
|
if (this.closing)
|
|
93
106
|
return segment
|
|
94
107
|
const id = `${seq++}`
|
|
95
|
-
return new Promise<Int16Array<ArrayBuffer>>((resolve) => {
|
|
96
|
-
pending.set(id,
|
|
108
|
+
return new Promise<Int16Array<ArrayBuffer>>((resolve, reject) => {
|
|
109
|
+
pending.set(id, { resolve, reject })
|
|
97
110
|
this.worker!.postMessage({ type: "process", id, data: segment }, [ segment.buffer ])
|
|
98
111
|
})
|
|
99
112
|
}
|
|
@@ -113,20 +126,19 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
|
|
|
113
126
|
callback(new Error("invalid chunk payload type"))
|
|
114
127
|
else {
|
|
115
128
|
/* convert Buffer into Int16Array */
|
|
116
|
-
const payload = util.convertBufToI16(chunk.payload)
|
|
129
|
+
const payload = util.convertBufToI16(chunk.payload, self.config.audioLittleEndian)
|
|
117
130
|
|
|
118
131
|
/* process Int16Array in necessary segments */
|
|
119
132
|
util.processInt16ArrayInSegments(payload, self.sampleSize, (segment) =>
|
|
120
133
|
workerProcessSegment(segment)
|
|
121
134
|
).then((payload: Int16Array<ArrayBuffer>) => {
|
|
122
135
|
/* convert Int16Array into Buffer */
|
|
123
|
-
const buf = util.convertI16ToBuf(payload)
|
|
124
|
-
|
|
125
|
-
/* update chunk */
|
|
126
|
-
chunk.payload = buf
|
|
136
|
+
const buf = util.convertI16ToBuf(payload, self.config.audioLittleEndian)
|
|
127
137
|
|
|
128
|
-
/* forward updated
|
|
129
|
-
|
|
138
|
+
/* forward cloned chunk with updated payload */
|
|
139
|
+
const chunkNew = chunk.clone()
|
|
140
|
+
chunkNew.payload = buf
|
|
141
|
+
this.push(chunkNew)
|
|
130
142
|
callback()
|
|
131
143
|
}).catch((err: unknown) => {
|
|
132
144
|
const error = util.ensureError(err)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-
|
|
3
|
+
** Copyright (c) 2024-2026 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
4
|
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
5
|
*/
|
|
6
6
|
|
|
@@ -53,7 +53,9 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
|
|
|
53
53
|
const wasmBinary = await fs.promises.readFile(
|
|
54
54
|
path.join(__dirname, "../node_modules/@sapphi-red/speex-preprocess-wasm/dist/speex.wasm"))
|
|
55
55
|
const speexModule = await loadSpeexModule({
|
|
56
|
-
wasmBinary: wasmBinary.buffer
|
|
56
|
+
wasmBinary: wasmBinary.buffer.slice(
|
|
57
|
+
wasmBinary.byteOffset,
|
|
58
|
+
wasmBinary.byteOffset + wasmBinary.byteLength)
|
|
57
59
|
})
|
|
58
60
|
this.speexProcessor = new SpeexPreprocessor(
|
|
59
61
|
speexModule, this.sampleSize, this.config.audioSampleRate)
|
|
@@ -79,7 +81,7 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
|
|
|
79
81
|
callback(new Error("invalid chunk payload type"))
|
|
80
82
|
else {
|
|
81
83
|
/* convert Buffer into Int16Array */
|
|
82
|
-
const payload = util.convertBufToI16(chunk.payload)
|
|
84
|
+
const payload = util.convertBufToI16(chunk.payload, self.config.audioLittleEndian)
|
|
83
85
|
|
|
84
86
|
/* process Int16Array in necessary fixed-size segments */
|
|
85
87
|
util.processInt16ArrayInSegments(payload, self.sampleSize, (segment) => {
|
|
@@ -94,13 +96,12 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
|
|
|
94
96
|
throw new Error("stream already destroyed")
|
|
95
97
|
|
|
96
98
|
/* convert Int16Array back into Buffer */
|
|
97
|
-
const buf = util.convertI16ToBuf(payload)
|
|
99
|
+
const buf = util.convertI16ToBuf(payload, self.config.audioLittleEndian)
|
|
98
100
|
|
|
99
|
-
/*
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
this.push(chunk)
|
|
101
|
+
/* forward cloned chunk with updated payload */
|
|
102
|
+
const chunkNew = chunk.clone()
|
|
103
|
+
chunkNew.payload = buf
|
|
104
|
+
this.push(chunkNew)
|
|
104
105
|
callback()
|
|
105
106
|
}).catch((err: unknown) => {
|
|
106
107
|
const error = util.ensureError(err)
|