speechflow 1.4.5 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -0
- package/README.md +220 -7
- package/etc/claude.md +70 -0
- package/etc/speechflow.yaml +5 -3
- package/etc/stx.conf +7 -0
- package/package.json +7 -6
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +155 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.d.ts +15 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +287 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.js +208 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics.d.ts +15 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics.js +312 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +161 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander.d.ts +13 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js +208 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +13 -3
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-filler.d.ts +14 -0
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js +233 -0
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gain.d.ts +12 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js +125 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gender.d.ts +0 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js +28 -12
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-meter.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js +12 -8
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js +2 -1
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js +55 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.d.ts +14 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +184 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-speex.d.ts +14 -0
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js +156 -0
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js +3 -3
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js +22 -17
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.d.ts +18 -0
- package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.js +317 -0
- package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +15 -13
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.d.ts +19 -0
- package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.js +351 -0
- package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-awspolly.d.ts +16 -0
- package/speechflow-cli/dst/speechflow-node-t2a-awspolly.js +171 -0
- package/speechflow-cli/dst/speechflow-node-t2a-awspolly.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +19 -14
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +11 -6
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.d.ts +13 -0
- package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.js +141 -0
- package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +13 -15
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-format.js +10 -15
- package/speechflow-cli/dst/speechflow-node-t2t-format.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +44 -31
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js +44 -45
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +8 -8
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +10 -12
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-transformers.js +22 -27
- package/speechflow-cli/dst/speechflow-node-t2t-transformers.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-filter.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js +50 -15
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js +17 -18
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-device.js +13 -21
- package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +22 -16
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js +19 -19
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node.d.ts +6 -3
- package/speechflow-cli/dst/speechflow-node.js +13 -2
- package/speechflow-cli/dst/speechflow-node.js.map +1 -1
- package/speechflow-cli/dst/speechflow-utils-audio-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-utils-audio-wt.js +124 -0
- package/speechflow-cli/dst/speechflow-utils-audio-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-utils-audio.d.ts +13 -0
- package/speechflow-cli/dst/speechflow-utils-audio.js +137 -0
- package/speechflow-cli/dst/speechflow-utils-audio.js.map +1 -0
- package/speechflow-cli/dst/speechflow-utils.d.ts +18 -0
- package/speechflow-cli/dst/speechflow-utils.js +123 -35
- package/speechflow-cli/dst/speechflow-utils.js.map +1 -1
- package/speechflow-cli/dst/speechflow.js +69 -14
- package/speechflow-cli/dst/speechflow.js.map +1 -1
- package/speechflow-cli/etc/oxlint.jsonc +112 -11
- package/speechflow-cli/etc/stx.conf +2 -2
- package/speechflow-cli/etc/tsconfig.json +1 -1
- package/speechflow-cli/package.d/@shiguredo+rnnoise-wasm+2025.1.5.patch +25 -0
- package/speechflow-cli/package.json +102 -94
- package/speechflow-cli/src/lib.d.ts +24 -0
- package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +151 -0
- package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +303 -0
- package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +158 -0
- package/speechflow-cli/src/speechflow-node-a2a-expander.ts +212 -0
- package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +13 -3
- package/speechflow-cli/src/speechflow-node-a2a-filler.ts +223 -0
- package/speechflow-cli/src/speechflow-node-a2a-gain.ts +98 -0
- package/speechflow-cli/src/speechflow-node-a2a-gender.ts +31 -17
- package/speechflow-cli/src/speechflow-node-a2a-meter.ts +13 -9
- package/speechflow-cli/src/speechflow-node-a2a-mute.ts +3 -2
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise-wt.ts +62 -0
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +164 -0
- package/speechflow-cli/src/speechflow-node-a2a-speex.ts +137 -0
- package/speechflow-cli/src/speechflow-node-a2a-vad.ts +3 -3
- package/speechflow-cli/src/speechflow-node-a2a-wav.ts +20 -13
- package/speechflow-cli/src/speechflow-node-a2t-awstranscribe.ts +308 -0
- package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +15 -13
- package/speechflow-cli/src/speechflow-node-a2t-openaitranscribe.ts +337 -0
- package/speechflow-cli/src/speechflow-node-t2a-awspolly.ts +187 -0
- package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +19 -14
- package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +12 -7
- package/speechflow-cli/src/speechflow-node-t2t-awstranslate.ts +152 -0
- package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +13 -15
- package/speechflow-cli/src/speechflow-node-t2t-format.ts +10 -15
- package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +55 -42
- package/speechflow-cli/src/speechflow-node-t2t-openai.ts +58 -58
- package/speechflow-cli/src/speechflow-node-t2t-sentence.ts +10 -10
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +15 -16
- package/speechflow-cli/src/speechflow-node-t2t-transformers.ts +27 -32
- package/speechflow-cli/src/speechflow-node-x2x-filter.ts +20 -16
- package/speechflow-cli/src/speechflow-node-x2x-trace.ts +20 -19
- package/speechflow-cli/src/speechflow-node-xio-device.ts +15 -23
- package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +23 -16
- package/speechflow-cli/src/speechflow-node-xio-websocket.ts +19 -19
- package/speechflow-cli/src/speechflow-node.ts +21 -8
- package/speechflow-cli/src/speechflow-utils-audio-wt.ts +172 -0
- package/speechflow-cli/src/speechflow-utils-audio.ts +147 -0
- package/speechflow-cli/src/speechflow-utils.ts +125 -32
- package/speechflow-cli/src/speechflow.ts +74 -17
- package/speechflow-ui-db/dst/index.js +31 -31
- package/speechflow-ui-db/etc/eslint.mjs +0 -1
- package/speechflow-ui-db/etc/tsc-client.json +3 -3
- package/speechflow-ui-db/package.json +11 -10
- package/speechflow-ui-db/src/app.vue +20 -6
- package/speechflow-ui-st/dst/index.js +26 -26
- package/speechflow-ui-st/etc/eslint.mjs +0 -1
- package/speechflow-ui-st/etc/tsc-client.json +3 -3
- package/speechflow-ui-st/package.json +11 -10
- package/speechflow-ui-st/src/app.vue +5 -12
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
import { Worker } from "node:worker_threads"
|
|
10
|
+
import { resolve } from "node:path"
|
|
11
|
+
|
|
12
|
+
/* internal dependencies */
|
|
13
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
14
|
+
import * as utils from "./speechflow-utils"
|
|
15
|
+
|
|
16
|
+
/* SpeechFlow node for RNNoise based noise suppression in audio-to-audio passing */
|
|
17
|
+
export default class SpeechFlowNodeRNNoise extends SpeechFlowNode {
|
|
18
|
+
/* declare official node name */
|
|
19
|
+
public static name = "rnnoise"
|
|
20
|
+
|
|
21
|
+
/* internal state */
|
|
22
|
+
private destroyed = false
|
|
23
|
+
private sampleSize = 480 /* = 10ms at 48KHz, as required by RNNoise! */
|
|
24
|
+
private worker: Worker | null = null
|
|
25
|
+
|
|
26
|
+
/* construct node */
|
|
27
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
28
|
+
super(id, cfg, opts, args)
|
|
29
|
+
|
|
30
|
+
/* declare node configuration parameters */
|
|
31
|
+
this.configure({})
|
|
32
|
+
|
|
33
|
+
/* declare node input/output format */
|
|
34
|
+
this.input = "audio"
|
|
35
|
+
this.output = "audio"
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/* open node */
|
|
39
|
+
async open () {
|
|
40
|
+
/* clear destruction flag */
|
|
41
|
+
this.destroyed = false
|
|
42
|
+
|
|
43
|
+
/* initialize worker */
|
|
44
|
+
this.worker = new Worker(resolve(__dirname, "speechflow-node-a2a-rnnoise-wt.js"))
|
|
45
|
+
this.worker.on("error", (err) => {
|
|
46
|
+
this.log("error", `RNNoise worker thread error: ${err}`)
|
|
47
|
+
})
|
|
48
|
+
this.worker.on("exit", (code) => {
|
|
49
|
+
if (code !== 0)
|
|
50
|
+
this.log("error", `RNNoise worker thread exited with error code ${code}`)
|
|
51
|
+
else
|
|
52
|
+
this.log("info", `RNNoise worker thread exited with regular code ${code}`)
|
|
53
|
+
})
|
|
54
|
+
await new Promise<void>((resolve, reject) => {
|
|
55
|
+
const timeout = setTimeout(() => {
|
|
56
|
+
reject(new Error("RNNoise worker thread initialization timeout"))
|
|
57
|
+
}, 5000)
|
|
58
|
+
this.worker!.once("message", (msg: any) => {
|
|
59
|
+
clearTimeout(timeout)
|
|
60
|
+
if (typeof msg === "object" && msg !== null && msg.type === "ready")
|
|
61
|
+
resolve()
|
|
62
|
+
else if (typeof msg === "object" && msg !== null && msg.type === "failed")
|
|
63
|
+
reject(new Error(msg.message ?? "RNNoise worker thread initialization failed"))
|
|
64
|
+
else
|
|
65
|
+
reject(new Error(`RNNoise worker thread sent unexpected message on startup`))
|
|
66
|
+
})
|
|
67
|
+
this.worker!.once("error", (err) => {
|
|
68
|
+
clearTimeout(timeout)
|
|
69
|
+
reject(err)
|
|
70
|
+
})
|
|
71
|
+
})
|
|
72
|
+
|
|
73
|
+
/* receive message from worker */
|
|
74
|
+
const pending = new Map<string, (arr: Int16Array<ArrayBuffer>) => void>()
|
|
75
|
+
this.worker.on("message", (msg: any) => {
|
|
76
|
+
if (typeof msg === "object" && msg !== null && msg.type === "process-done") {
|
|
77
|
+
const cb = pending.get(msg.id)
|
|
78
|
+
pending.delete(msg.id)
|
|
79
|
+
if (cb)
|
|
80
|
+
cb(msg.data)
|
|
81
|
+
else
|
|
82
|
+
this.log("warning", `RNNoise worker thread sent back unexpected id: ${msg.id}`)
|
|
83
|
+
}
|
|
84
|
+
else
|
|
85
|
+
this.log("warning", `RNNoise worker thread sent unexpected message: ${JSON.stringify(msg)}`)
|
|
86
|
+
})
|
|
87
|
+
|
|
88
|
+
/* send message to worker */
|
|
89
|
+
let seq = 0
|
|
90
|
+
const workerProcessSegment = async (segment: Int16Array<ArrayBuffer>) => {
|
|
91
|
+
if (this.destroyed)
|
|
92
|
+
return segment
|
|
93
|
+
const id = `${seq++}`
|
|
94
|
+
return new Promise<Int16Array<ArrayBuffer>>((resolve) => {
|
|
95
|
+
pending.set(id, (segment: Int16Array<ArrayBuffer>) => { resolve(segment) })
|
|
96
|
+
this.worker!.postMessage({ type: "process", id, data: segment }, [ segment.buffer ])
|
|
97
|
+
})
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/* establish a transform stream */
|
|
101
|
+
const self = this
|
|
102
|
+
this.stream = new Stream.Transform({
|
|
103
|
+
readableObjectMode: true,
|
|
104
|
+
writableObjectMode: true,
|
|
105
|
+
decodeStrings: false,
|
|
106
|
+
transform (chunk: SpeechFlowChunk & { payload: Buffer }, encoding, callback) {
|
|
107
|
+
if (self.destroyed) {
|
|
108
|
+
callback(new Error("stream already destroyed"))
|
|
109
|
+
return
|
|
110
|
+
}
|
|
111
|
+
if (!Buffer.isBuffer(chunk.payload))
|
|
112
|
+
callback(new Error("invalid chunk payload type"))
|
|
113
|
+
else {
|
|
114
|
+
/* convert Buffer into Int16Array */
|
|
115
|
+
const payload = utils.convertBufToI16(chunk.payload)
|
|
116
|
+
|
|
117
|
+
/* process Int16Array in necessary segments */
|
|
118
|
+
utils.processInt16ArrayInSegments(payload, self.sampleSize, (segment) =>
|
|
119
|
+
workerProcessSegment(segment)
|
|
120
|
+
).then((payload: Int16Array<ArrayBuffer>) => {
|
|
121
|
+
/* convert Int16Array into Buffer */
|
|
122
|
+
const buf = utils.convertI16ToBuf(payload)
|
|
123
|
+
|
|
124
|
+
/* update chunk */
|
|
125
|
+
chunk.payload = buf
|
|
126
|
+
|
|
127
|
+
/* forward updated chunk */
|
|
128
|
+
this.push(chunk)
|
|
129
|
+
callback()
|
|
130
|
+
}).catch((err: Error) => {
|
|
131
|
+
self.log("warning", `processing of chunk failed: ${err}`)
|
|
132
|
+
callback(err)
|
|
133
|
+
})
|
|
134
|
+
}
|
|
135
|
+
},
|
|
136
|
+
final (callback) {
|
|
137
|
+
if (self.destroyed) {
|
|
138
|
+
callback()
|
|
139
|
+
return
|
|
140
|
+
}
|
|
141
|
+
this.push(null)
|
|
142
|
+
callback()
|
|
143
|
+
}
|
|
144
|
+
})
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/* close node */
|
|
148
|
+
async close () {
|
|
149
|
+
/* indicate destruction */
|
|
150
|
+
this.destroyed = true
|
|
151
|
+
|
|
152
|
+
/* shutdown worker */
|
|
153
|
+
if (this.worker !== null) {
|
|
154
|
+
this.worker.terminate()
|
|
155
|
+
this.worker = null
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
/* close stream */
|
|
159
|
+
if (this.stream !== null) {
|
|
160
|
+
this.stream.destroy()
|
|
161
|
+
this.stream = null
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import path from "node:path"
|
|
9
|
+
import fs from "node:fs"
|
|
10
|
+
import Stream from "node:stream"
|
|
11
|
+
|
|
12
|
+
/* external dependencies */
|
|
13
|
+
import { loadSpeexModule, SpeexPreprocessor } from "@sapphi-red/speex-preprocess-wasm"
|
|
14
|
+
|
|
15
|
+
/* internal dependencies */
|
|
16
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
17
|
+
import * as utils from "./speechflow-utils"
|
|
18
|
+
|
|
19
|
+
/* SpeechFlow node for Speex based noise suppression in audio-to-audio passing */
|
|
20
|
+
export default class SpeechFlowNodeSpeex extends SpeechFlowNode {
|
|
21
|
+
/* declare official node name */
|
|
22
|
+
public static name = "speex"
|
|
23
|
+
|
|
24
|
+
/* internal state */
|
|
25
|
+
private destroyed = false
|
|
26
|
+
private sampleSize = 480 /* = 10ms at 48KHz */
|
|
27
|
+
private speexProcessor: SpeexPreprocessor | null = null
|
|
28
|
+
|
|
29
|
+
/* construct node */
|
|
30
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
31
|
+
super(id, cfg, opts, args)
|
|
32
|
+
|
|
33
|
+
/* declare node configuration parameters */
|
|
34
|
+
this.configure({
|
|
35
|
+
attenuate: { type: "number", val: -18, pos: 0, match: (n: number) => n >= -60 && n <= 0 },
|
|
36
|
+
})
|
|
37
|
+
|
|
38
|
+
/* declare node input/output format */
|
|
39
|
+
this.input = "audio"
|
|
40
|
+
this.output = "audio"
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/* open node */
|
|
44
|
+
async open () {
|
|
45
|
+
/* clear destruction flag */
|
|
46
|
+
this.destroyed = false
|
|
47
|
+
|
|
48
|
+
/* validate sample rate compatibility */
|
|
49
|
+
if (this.config.audioSampleRate !== 48000)
|
|
50
|
+
throw new Error(`Speex node requires 48KHz sample rate, got ${this.config.audioSampleRate}Hz`)
|
|
51
|
+
|
|
52
|
+
/* initialize and configure Speex pre-processor */
|
|
53
|
+
const wasmBinary = await fs.promises.readFile(
|
|
54
|
+
path.join(__dirname, "../node_modules/@sapphi-red/speex-preprocess-wasm/dist/speex.wasm"))
|
|
55
|
+
const speexModule = await loadSpeexModule({
|
|
56
|
+
wasmBinary: wasmBinary.buffer as ArrayBuffer
|
|
57
|
+
})
|
|
58
|
+
this.speexProcessor = new SpeexPreprocessor(
|
|
59
|
+
speexModule, this.sampleSize, this.config.audioSampleRate)
|
|
60
|
+
this.speexProcessor.denoise = true
|
|
61
|
+
this.speexProcessor.noiseSuppress = this.params.attenuate
|
|
62
|
+
this.speexProcessor.agc = false
|
|
63
|
+
this.speexProcessor.vad = false
|
|
64
|
+
this.speexProcessor.echoSuppress = 0
|
|
65
|
+
this.speexProcessor.echoSuppressActive = 0
|
|
66
|
+
|
|
67
|
+
/* establish a transform stream */
|
|
68
|
+
const self = this
|
|
69
|
+
this.stream = new Stream.Transform({
|
|
70
|
+
readableObjectMode: true,
|
|
71
|
+
writableObjectMode: true,
|
|
72
|
+
decodeStrings: false,
|
|
73
|
+
transform (chunk: SpeechFlowChunk & { payload: Buffer }, encoding, callback) {
|
|
74
|
+
if (self.destroyed) {
|
|
75
|
+
callback(new Error("stream already destroyed"))
|
|
76
|
+
return
|
|
77
|
+
}
|
|
78
|
+
if (!Buffer.isBuffer(chunk.payload))
|
|
79
|
+
callback(new Error("invalid chunk payload type"))
|
|
80
|
+
else {
|
|
81
|
+
/* convert Buffer into Int16Array */
|
|
82
|
+
const payload = utils.convertBufToI16(chunk.payload)
|
|
83
|
+
|
|
84
|
+
/* process Int16Array in necessary fixed-size segments */
|
|
85
|
+
utils.processInt16ArrayInSegments(payload, self.sampleSize, (segment) => {
|
|
86
|
+
if (self.destroyed)
|
|
87
|
+
throw new Error("stream already destroyed")
|
|
88
|
+
self.speexProcessor?.processInt16(segment)
|
|
89
|
+
return Promise.resolve(segment)
|
|
90
|
+
}).then((payload: Int16Array<ArrayBuffer>) => {
|
|
91
|
+
if (self.destroyed)
|
|
92
|
+
throw new Error("stream already destroyed")
|
|
93
|
+
|
|
94
|
+
/* convert Int16Array back into Buffer */
|
|
95
|
+
const buf = utils.convertI16ToBuf(payload)
|
|
96
|
+
|
|
97
|
+
/* update chunk */
|
|
98
|
+
chunk.payload = buf
|
|
99
|
+
|
|
100
|
+
/* forward updated chunk */
|
|
101
|
+
this.push(chunk)
|
|
102
|
+
callback()
|
|
103
|
+
}).catch((err: Error) => {
|
|
104
|
+
self.log("warning", `processing of chunk failed: ${err}`)
|
|
105
|
+
callback(err)
|
|
106
|
+
})
|
|
107
|
+
}
|
|
108
|
+
},
|
|
109
|
+
final (callback) {
|
|
110
|
+
if (self.destroyed) {
|
|
111
|
+
callback()
|
|
112
|
+
return
|
|
113
|
+
}
|
|
114
|
+
this.push(null)
|
|
115
|
+
callback()
|
|
116
|
+
}
|
|
117
|
+
})
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/* close node */
|
|
121
|
+
async close () {
|
|
122
|
+
/* indicate destruction */
|
|
123
|
+
this.destroyed = true
|
|
124
|
+
|
|
125
|
+
/* destroy processor */
|
|
126
|
+
if (this.speexProcessor !== null) {
|
|
127
|
+
this.speexProcessor.destroy()
|
|
128
|
+
this.speexProcessor = null
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/* close stream */
|
|
132
|
+
if (this.stream !== null) {
|
|
133
|
+
this.stream.destroy()
|
|
134
|
+
this.stream = null
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
@@ -205,7 +205,7 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
|
|
|
205
205
|
if ((chunks * chunkSize) < data.length) {
|
|
206
206
|
const frame = new Float32Array(chunkSize)
|
|
207
207
|
frame.fill(0)
|
|
208
|
-
frame.set(data.slice(chunks * chunkSize
|
|
208
|
+
frame.set(data.slice(chunks * chunkSize))
|
|
209
209
|
const segment: AudioQueueElementSegment = { data: frame }
|
|
210
210
|
segmentData.push(segment)
|
|
211
211
|
}
|
|
@@ -315,7 +315,7 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
|
|
|
315
315
|
&& element.type === "audio-frame"
|
|
316
316
|
&& element.isSpeech !== undefined)
|
|
317
317
|
flushPendingChunks()
|
|
318
|
-
else if (!self.destroyed) {
|
|
318
|
+
else if (!self.destroyed && !self.activeEventListeners.has(awaitForthcomingChunks)) {
|
|
319
319
|
self.queue.once("write", awaitForthcomingChunks)
|
|
320
320
|
self.activeEventListeners.add(awaitForthcomingChunks)
|
|
321
321
|
}
|
|
@@ -328,7 +328,7 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
|
|
|
328
328
|
&& element.type === "audio-frame"
|
|
329
329
|
&& element.isSpeech !== undefined)
|
|
330
330
|
flushPendingChunks()
|
|
331
|
-
else if (!self.destroyed) {
|
|
331
|
+
else if (!self.destroyed && !self.activeEventListeners.has(awaitForthcomingChunks)) {
|
|
332
332
|
self.queue.once("write", awaitForthcomingChunks)
|
|
333
333
|
self.activeEventListeners.add(awaitForthcomingChunks)
|
|
334
334
|
}
|
|
@@ -21,22 +21,19 @@ const writeWavHeader = (
|
|
|
21
21
|
const bitDepth = options?.bitDepth ?? 16 /* 16-Bit */
|
|
22
22
|
|
|
23
23
|
const headerLength = 44
|
|
24
|
-
const
|
|
24
|
+
const maxDataSize = Math.pow(2, 32) - 100 /* safe maximum for 32-bit WAV files */
|
|
25
|
+
const dataLength = length ?? maxDataSize
|
|
25
26
|
const fileSize = dataLength + headerLength
|
|
26
27
|
const header = Buffer.alloc(headerLength)
|
|
27
28
|
|
|
28
|
-
const RIFF = Buffer.alloc(4, "RIFF")
|
|
29
|
-
const WAVE = Buffer.alloc(4, "WAVE")
|
|
30
|
-
const fmt = Buffer.alloc(4, "fmt ")
|
|
31
|
-
const data = Buffer.alloc(4, "data")
|
|
32
29
|
const byteRate = (sampleRate * channels * bitDepth) / 8
|
|
33
30
|
const blockAlign = (channels * bitDepth) / 8
|
|
34
31
|
|
|
35
32
|
let offset = 0
|
|
36
|
-
|
|
33
|
+
header.write("RIFF", offset); offset += 4
|
|
37
34
|
header.writeUInt32LE(fileSize - 8, offset); offset += 4
|
|
38
|
-
|
|
39
|
-
|
|
35
|
+
header.write("WAVE", offset); offset += 4
|
|
36
|
+
header.write("fmt ", offset); offset += 4
|
|
40
37
|
header.writeUInt32LE(16, offset); offset += 4
|
|
41
38
|
header.writeUInt16LE(audioFormat, offset); offset += 2
|
|
42
39
|
header.writeUInt16LE(channels, offset); offset += 2
|
|
@@ -44,7 +41,7 @@ const writeWavHeader = (
|
|
|
44
41
|
header.writeUInt32LE(byteRate, offset); offset += 4
|
|
45
42
|
header.writeUInt16LE(blockAlign, offset); offset += 2
|
|
46
43
|
header.writeUInt16LE(bitDepth, offset); offset += 2
|
|
47
|
-
|
|
44
|
+
header.write("data", offset); offset += 4
|
|
48
45
|
header.writeUInt32LE(dataLength, offset); offset += 4
|
|
49
46
|
|
|
50
47
|
return header
|
|
@@ -52,6 +49,9 @@ const writeWavHeader = (
|
|
|
52
49
|
|
|
53
50
|
/* read WAV header */
|
|
54
51
|
const readWavHeader = (buffer: Buffer) => {
|
|
52
|
+
if (buffer.length < 44)
|
|
53
|
+
throw new Error("WAV header too short, expected at least 44 bytes")
|
|
54
|
+
|
|
55
55
|
let offset = 0
|
|
56
56
|
const riffHead = buffer.subarray(offset, offset + 4).toString(); offset += 4
|
|
57
57
|
const fileSize = buffer.readUInt32LE(offset); offset += 4
|
|
@@ -67,6 +67,15 @@ const readWavHeader = (buffer: Buffer) => {
|
|
|
67
67
|
const data = buffer.subarray(offset, offset + 4).toString(); offset += 4
|
|
68
68
|
const dataLength = buffer.readUInt32LE(offset); offset += 4
|
|
69
69
|
|
|
70
|
+
if (riffHead !== "RIFF")
|
|
71
|
+
throw new Error(`Invalid WAV file: expected RIFF header, got "${riffHead}"`)
|
|
72
|
+
if (waveHead !== "WAVE")
|
|
73
|
+
throw new Error(`Invalid WAV file: expected WAVE header, got "${waveHead}"`)
|
|
74
|
+
if (fmtHead !== "fmt ")
|
|
75
|
+
throw new Error(`Invalid WAV file: expected "fmt " header, got "${fmtHead}"`)
|
|
76
|
+
if (data !== "data")
|
|
77
|
+
throw new Error(`Invalid WAV file: expected "data" header, got "${data}"`)
|
|
78
|
+
|
|
70
79
|
return {
|
|
71
80
|
riffHead, fileSize, waveHead, fmtHead, formatLength, audioFormat,
|
|
72
81
|
channels, sampleRate, byteRate, blockAlign, bitDepth, data, dataLength
|
|
@@ -103,10 +112,8 @@ export default class SpeechFlowNodeWAV extends SpeechFlowNode {
|
|
|
103
112
|
decodeStrings: false,
|
|
104
113
|
highWaterMark: 1,
|
|
105
114
|
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
106
|
-
if (!Buffer.isBuffer(chunk.payload))
|
|
115
|
+
if (!Buffer.isBuffer(chunk.payload))
|
|
107
116
|
callback(new Error("invalid chunk payload type"))
|
|
108
|
-
return
|
|
109
|
-
}
|
|
110
117
|
else if (firstChunk) {
|
|
111
118
|
if (self.params.mode === "encode") {
|
|
112
119
|
/* convert raw/PCM to WAV/PCM
|
|
@@ -164,13 +171,13 @@ export default class SpeechFlowNodeWAV extends SpeechFlowNode {
|
|
|
164
171
|
callback(new Error(`invalid operation mode "${self.params.mode}"`))
|
|
165
172
|
return
|
|
166
173
|
}
|
|
174
|
+
firstChunk = false
|
|
167
175
|
}
|
|
168
176
|
else {
|
|
169
177
|
/* pass-through original chunk */
|
|
170
178
|
this.push(chunk)
|
|
171
179
|
callback()
|
|
172
180
|
}
|
|
173
|
-
firstChunk = false
|
|
174
181
|
},
|
|
175
182
|
final (callback) {
|
|
176
183
|
this.push(null)
|