speechflow 1.4.5 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -0
- package/README.md +220 -7
- package/etc/claude.md +70 -0
- package/etc/speechflow.yaml +5 -3
- package/etc/stx.conf +7 -0
- package/package.json +7 -6
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +155 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.d.ts +15 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +287 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.js +208 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics.d.ts +15 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics.js +312 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +161 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander.d.ts +13 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js +208 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +13 -3
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-filler.d.ts +14 -0
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js +233 -0
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gain.d.ts +12 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js +125 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gender.d.ts +0 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js +28 -12
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-meter.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js +12 -8
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js +2 -1
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js +55 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.d.ts +14 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +184 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-speex.d.ts +14 -0
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js +156 -0
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js +3 -3
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js +22 -17
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.d.ts +18 -0
- package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.js +317 -0
- package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +15 -13
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.d.ts +19 -0
- package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.js +351 -0
- package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-awspolly.d.ts +16 -0
- package/speechflow-cli/dst/speechflow-node-t2a-awspolly.js +171 -0
- package/speechflow-cli/dst/speechflow-node-t2a-awspolly.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +19 -14
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +11 -6
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.d.ts +13 -0
- package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.js +141 -0
- package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +13 -15
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-format.js +10 -15
- package/speechflow-cli/dst/speechflow-node-t2t-format.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +44 -31
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js +44 -45
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +8 -8
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +10 -12
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-transformers.js +22 -27
- package/speechflow-cli/dst/speechflow-node-t2t-transformers.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-filter.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js +50 -15
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js +17 -18
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-device.js +13 -21
- package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +22 -16
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js +19 -19
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node.d.ts +6 -3
- package/speechflow-cli/dst/speechflow-node.js +13 -2
- package/speechflow-cli/dst/speechflow-node.js.map +1 -1
- package/speechflow-cli/dst/speechflow-utils-audio-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-utils-audio-wt.js +124 -0
- package/speechflow-cli/dst/speechflow-utils-audio-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-utils-audio.d.ts +13 -0
- package/speechflow-cli/dst/speechflow-utils-audio.js +137 -0
- package/speechflow-cli/dst/speechflow-utils-audio.js.map +1 -0
- package/speechflow-cli/dst/speechflow-utils.d.ts +18 -0
- package/speechflow-cli/dst/speechflow-utils.js +123 -35
- package/speechflow-cli/dst/speechflow-utils.js.map +1 -1
- package/speechflow-cli/dst/speechflow.js +69 -14
- package/speechflow-cli/dst/speechflow.js.map +1 -1
- package/speechflow-cli/etc/oxlint.jsonc +112 -11
- package/speechflow-cli/etc/stx.conf +2 -2
- package/speechflow-cli/etc/tsconfig.json +1 -1
- package/speechflow-cli/package.d/@shiguredo+rnnoise-wasm+2025.1.5.patch +25 -0
- package/speechflow-cli/package.json +102 -94
- package/speechflow-cli/src/lib.d.ts +24 -0
- package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +151 -0
- package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +303 -0
- package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +158 -0
- package/speechflow-cli/src/speechflow-node-a2a-expander.ts +212 -0
- package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +13 -3
- package/speechflow-cli/src/speechflow-node-a2a-filler.ts +223 -0
- package/speechflow-cli/src/speechflow-node-a2a-gain.ts +98 -0
- package/speechflow-cli/src/speechflow-node-a2a-gender.ts +31 -17
- package/speechflow-cli/src/speechflow-node-a2a-meter.ts +13 -9
- package/speechflow-cli/src/speechflow-node-a2a-mute.ts +3 -2
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise-wt.ts +62 -0
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +164 -0
- package/speechflow-cli/src/speechflow-node-a2a-speex.ts +137 -0
- package/speechflow-cli/src/speechflow-node-a2a-vad.ts +3 -3
- package/speechflow-cli/src/speechflow-node-a2a-wav.ts +20 -13
- package/speechflow-cli/src/speechflow-node-a2t-awstranscribe.ts +308 -0
- package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +15 -13
- package/speechflow-cli/src/speechflow-node-a2t-openaitranscribe.ts +337 -0
- package/speechflow-cli/src/speechflow-node-t2a-awspolly.ts +187 -0
- package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +19 -14
- package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +12 -7
- package/speechflow-cli/src/speechflow-node-t2t-awstranslate.ts +152 -0
- package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +13 -15
- package/speechflow-cli/src/speechflow-node-t2t-format.ts +10 -15
- package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +55 -42
- package/speechflow-cli/src/speechflow-node-t2t-openai.ts +58 -58
- package/speechflow-cli/src/speechflow-node-t2t-sentence.ts +10 -10
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +15 -16
- package/speechflow-cli/src/speechflow-node-t2t-transformers.ts +27 -32
- package/speechflow-cli/src/speechflow-node-x2x-filter.ts +20 -16
- package/speechflow-cli/src/speechflow-node-x2x-trace.ts +20 -19
- package/speechflow-cli/src/speechflow-node-xio-device.ts +15 -23
- package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +23 -16
- package/speechflow-cli/src/speechflow-node-xio-websocket.ts +19 -19
- package/speechflow-cli/src/speechflow-node.ts +21 -8
- package/speechflow-cli/src/speechflow-utils-audio-wt.ts +172 -0
- package/speechflow-cli/src/speechflow-utils-audio.ts +147 -0
- package/speechflow-cli/src/speechflow-utils.ts +125 -32
- package/speechflow-cli/src/speechflow.ts +74 -17
- package/speechflow-ui-db/dst/index.js +31 -31
- package/speechflow-ui-db/etc/eslint.mjs +0 -1
- package/speechflow-ui-db/etc/tsc-client.json +3 -3
- package/speechflow-ui-db/package.json +11 -10
- package/speechflow-ui-db/src/app.vue +20 -6
- package/speechflow-ui-st/dst/index.js +26 -26
- package/speechflow-ui-st/etc/eslint.mjs +0 -1
- package/speechflow-ui-st/etc/tsc-client.json +3 -3
- package/speechflow-ui-st/package.json +11 -10
- package/speechflow-ui-st/src/app.vue +5 -12
|
@@ -75,7 +75,7 @@ export default class SpeechFlowNodeFFmpeg extends SpeechFlowNode {
|
|
|
75
75
|
"c:a": "pcm_s16le",
|
|
76
76
|
"ar": this.config.audioSampleRate,
|
|
77
77
|
"ac": this.config.audioChannels,
|
|
78
|
-
"f": "s16le"
|
|
78
|
+
"f": "s16le"
|
|
79
79
|
} : {}),
|
|
80
80
|
...(this.params.dst === "wav" ? {
|
|
81
81
|
"f": "wav"
|
|
@@ -90,7 +90,12 @@ export default class SpeechFlowNodeFFmpeg extends SpeechFlowNode {
|
|
|
90
90
|
"f": "opus"
|
|
91
91
|
} : {})
|
|
92
92
|
})
|
|
93
|
-
|
|
93
|
+
try {
|
|
94
|
+
this.ffmpeg.run()
|
|
95
|
+
}
|
|
96
|
+
catch (err) {
|
|
97
|
+
throw new Error(`failed to start FFmpeg process: ${err}`)
|
|
98
|
+
}
|
|
94
99
|
|
|
95
100
|
/* establish a duplex stream and connect it to FFmpeg */
|
|
96
101
|
this.stream = Stream.Duplex.from({
|
|
@@ -120,7 +125,12 @@ export default class SpeechFlowNodeFFmpeg extends SpeechFlowNode {
|
|
|
120
125
|
|
|
121
126
|
/* shutdown FFmpeg */
|
|
122
127
|
if (this.ffmpeg !== null) {
|
|
123
|
-
|
|
128
|
+
try {
|
|
129
|
+
this.ffmpeg.kill()
|
|
130
|
+
}
|
|
131
|
+
catch {
|
|
132
|
+
/* ignore kill errors during cleanup */
|
|
133
|
+
}
|
|
124
134
|
this.ffmpeg = null
|
|
125
135
|
}
|
|
126
136
|
}
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
import { EventEmitter } from "node:events"
|
|
10
|
+
import { Duration } from "luxon"
|
|
11
|
+
|
|
12
|
+
/* internal dependencies */
|
|
13
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
14
|
+
import * as utils from "./speechflow-utils"
|
|
15
|
+
|
|
16
|
+
class AudioFiller extends EventEmitter {
|
|
17
|
+
private emittedEndSamples = 0 /* stream position in samples already emitted */
|
|
18
|
+
private readonly bytesPerSample = 2 /* PCM I16 */
|
|
19
|
+
private readonly bytesPerFrame: number
|
|
20
|
+
private readonly sampleTolerance = 0.5 /* tolerance for floating-point sample comparisons */
|
|
21
|
+
|
|
22
|
+
constructor (private sampleRate = 48000, private channels = 1) {
|
|
23
|
+
super()
|
|
24
|
+
this.bytesPerFrame = this.channels * this.bytesPerSample
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/* optional helper to allow subscribing with strong typing */
|
|
28
|
+
public on(event: "chunk", listener: (chunk: SpeechFlowChunk) => void): this
|
|
29
|
+
public on(event: string, listener: (...args: any[]) => void): this {
|
|
30
|
+
return super.on(event, listener)
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/* convert fractional samples to duration */
|
|
34
|
+
private samplesFromDuration(duration: Duration): number {
|
|
35
|
+
const seconds = duration.as("seconds")
|
|
36
|
+
const samples = seconds * this.sampleRate
|
|
37
|
+
return samples
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/* convert duration to fractional samples */
|
|
41
|
+
private durationFromSamples(samples: number): Duration {
|
|
42
|
+
const seconds = samples / this.sampleRate
|
|
43
|
+
return Duration.fromObject({ seconds })
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/* emit a chunk of silence */
|
|
47
|
+
private emitSilence (fromSamples: number, toSamples: number) {
|
|
48
|
+
const frames = Math.max(0, Math.floor(toSamples - fromSamples))
|
|
49
|
+
if (frames <= 0)
|
|
50
|
+
return
|
|
51
|
+
const payload = Buffer.alloc(frames * this.bytesPerFrame) /* already zeroed */
|
|
52
|
+
const timestampStart = this.durationFromSamples(fromSamples)
|
|
53
|
+
const timestampEnd = this.durationFromSamples(toSamples)
|
|
54
|
+
const chunk = new SpeechFlowChunk(timestampStart, timestampEnd, "final", "audio", payload)
|
|
55
|
+
this.emit("chunk", chunk)
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/* add a chunk of audio for processing */
|
|
59
|
+
public add (chunk: SpeechFlowChunk & { type: "audio", payload: Buffer }): void {
|
|
60
|
+
const startSamp = this.samplesFromDuration(chunk.timestampStart)
|
|
61
|
+
const endSamp = this.samplesFromDuration(chunk.timestampEnd)
|
|
62
|
+
if (endSamp < startSamp)
|
|
63
|
+
throw new Error("invalid timestamps")
|
|
64
|
+
|
|
65
|
+
/* if chunk starts beyond what we've emitted, insert silence for the gap */
|
|
66
|
+
if (startSamp > this.emittedEndSamples + this.sampleTolerance) {
|
|
67
|
+
this.emitSilence(this.emittedEndSamples, startSamp)
|
|
68
|
+
this.emittedEndSamples = startSamp
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/* if chunk ends before or at emitted end, we have it fully covered, so drop it */
|
|
72
|
+
if (endSamp <= this.emittedEndSamples + this.sampleTolerance)
|
|
73
|
+
return
|
|
74
|
+
|
|
75
|
+
/* trim any overlap at the head */
|
|
76
|
+
const trimHead = Math.max(0, Math.floor(this.emittedEndSamples - startSamp))
|
|
77
|
+
const availableFrames = Math.floor((endSamp - startSamp) - trimHead)
|
|
78
|
+
if (availableFrames <= 0)
|
|
79
|
+
return
|
|
80
|
+
|
|
81
|
+
/* determine how many frames the buffer actually has; trust timestamps primarily */
|
|
82
|
+
const bufFrames = Math.floor(chunk.payload.length / this.bytesPerFrame)
|
|
83
|
+
const startFrame = Math.min(trimHead, bufFrames)
|
|
84
|
+
const endFrame = Math.min(startFrame + availableFrames, bufFrames)
|
|
85
|
+
if (endFrame <= startFrame)
|
|
86
|
+
return
|
|
87
|
+
|
|
88
|
+
/* determine trimmed/normalized chunk */
|
|
89
|
+
const payload = chunk.payload.subarray(
|
|
90
|
+
startFrame * this.bytesPerFrame,
|
|
91
|
+
endFrame * this.bytesPerFrame)
|
|
92
|
+
|
|
93
|
+
/* emit trimmed/normalized chunk */
|
|
94
|
+
const outStartSamples = startSamp + startFrame
|
|
95
|
+
const outEndSamples = outStartSamples + Math.floor(payload.length / this.bytesPerFrame)
|
|
96
|
+
const timestampStart = this.durationFromSamples(outStartSamples)
|
|
97
|
+
const timestampEnd = this.durationFromSamples(outEndSamples)
|
|
98
|
+
const c = new SpeechFlowChunk(timestampStart, timestampEnd, "final", "audio", payload)
|
|
99
|
+
this.emit("chunk", c)
|
|
100
|
+
|
|
101
|
+
/* advance emitted cursor */
|
|
102
|
+
this.emittedEndSamples = Math.max(this.emittedEndSamples, outEndSamples)
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/* SpeechFlow node for filling audio gaps */
|
|
107
|
+
export default class SpeechFlowNodeFiller extends SpeechFlowNode {
|
|
108
|
+
/* declare official node name */
|
|
109
|
+
public static name = "filler"
|
|
110
|
+
|
|
111
|
+
/* internal state */
|
|
112
|
+
private destroyed = false
|
|
113
|
+
private filler: AudioFiller | null = null
|
|
114
|
+
private sendQueue: utils.AsyncQueue<SpeechFlowChunk | null> | null = null
|
|
115
|
+
|
|
116
|
+
/* construct node */
|
|
117
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
118
|
+
super(id, cfg, opts, args)
|
|
119
|
+
|
|
120
|
+
/* declare node configuration parameters */
|
|
121
|
+
this.configure({
|
|
122
|
+
segment: { type: "number", val: 50, pos: 0, match: (n: number) => n >= 10 && n <= 1000 }
|
|
123
|
+
})
|
|
124
|
+
|
|
125
|
+
/* declare node input/output format */
|
|
126
|
+
this.input = "audio"
|
|
127
|
+
this.output = "audio"
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/* open node */
|
|
131
|
+
async open () {
|
|
132
|
+
/* clear destruction flag */
|
|
133
|
+
this.destroyed = false
|
|
134
|
+
|
|
135
|
+
/* establish queues */
|
|
136
|
+
this.filler = new AudioFiller(this.config.audioSampleRate, this.config.audioChannels)
|
|
137
|
+
this.sendQueue = new utils.AsyncQueue<SpeechFlowChunk | null>()
|
|
138
|
+
|
|
139
|
+
/* shift chunks from filler to send queue */
|
|
140
|
+
this.filler.on("chunk", (chunk) => {
|
|
141
|
+
this.sendQueue?.write(chunk)
|
|
142
|
+
})
|
|
143
|
+
|
|
144
|
+
/* establish a duplex stream */
|
|
145
|
+
const self = this
|
|
146
|
+
this.stream = new Stream.Duplex({
|
|
147
|
+
readableObjectMode: true,
|
|
148
|
+
writableObjectMode: true,
|
|
149
|
+
decodeStrings: false,
|
|
150
|
+
write (chunk: SpeechFlowChunk & { type: "audio", payload: Buffer }, encoding, callback) {
|
|
151
|
+
if (self.destroyed || self.filler === null)
|
|
152
|
+
callback(new Error("stream already destroyed"))
|
|
153
|
+
else if (!Buffer.isBuffer(chunk.payload))
|
|
154
|
+
callback(new Error("invalid chunk payload type"))
|
|
155
|
+
else {
|
|
156
|
+
try {
|
|
157
|
+
self.filler.add(chunk)
|
|
158
|
+
callback()
|
|
159
|
+
}
|
|
160
|
+
catch (error: any) {
|
|
161
|
+
callback(error)
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
},
|
|
165
|
+
read (size) {
|
|
166
|
+
if (self.destroyed || self.sendQueue === null) {
|
|
167
|
+
this.push(null)
|
|
168
|
+
return
|
|
169
|
+
}
|
|
170
|
+
self.sendQueue.read().then((chunk) => {
|
|
171
|
+
if (self.destroyed) {
|
|
172
|
+
this.push(null)
|
|
173
|
+
return
|
|
174
|
+
}
|
|
175
|
+
if (chunk === null) {
|
|
176
|
+
self.log("info", "received EOF signal")
|
|
177
|
+
this.push(null)
|
|
178
|
+
}
|
|
179
|
+
else {
|
|
180
|
+
self.log("debug", `received data (${chunk.payload.length} bytes)`)
|
|
181
|
+
this.push(chunk)
|
|
182
|
+
}
|
|
183
|
+
}).catch((error) => {
|
|
184
|
+
if (!self.destroyed)
|
|
185
|
+
self.log("error", `queue read error: ${error.message}`)
|
|
186
|
+
})
|
|
187
|
+
},
|
|
188
|
+
final (callback) {
|
|
189
|
+
if (self.destroyed) {
|
|
190
|
+
callback()
|
|
191
|
+
return
|
|
192
|
+
}
|
|
193
|
+
this.push(null)
|
|
194
|
+
callback()
|
|
195
|
+
}
|
|
196
|
+
})
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/* close node */
|
|
200
|
+
async close () {
|
|
201
|
+
/* indicate destruction */
|
|
202
|
+
this.destroyed = true
|
|
203
|
+
|
|
204
|
+
/* destroy queues */
|
|
205
|
+
if (this.sendQueue !== null) {
|
|
206
|
+
this.sendQueue.destroy()
|
|
207
|
+
this.sendQueue = null
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/* destroy filler */
|
|
211
|
+
if (this.filler !== null) {
|
|
212
|
+
this.filler.removeAllListeners()
|
|
213
|
+
this.filler = null
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
/* close stream */
|
|
217
|
+
if (this.stream !== null) {
|
|
218
|
+
this.stream.destroy()
|
|
219
|
+
this.stream = null
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
|
|
10
|
+
/* internal dependencies */
|
|
11
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
12
|
+
import * as utils from "./speechflow-utils"
|
|
13
|
+
|
|
14
|
+
/* SpeechFlow node for gain adjustment in audio-to-audio passing */
|
|
15
|
+
export default class SpeechFlowNodeGain extends SpeechFlowNode {
|
|
16
|
+
/* declare official node name */
|
|
17
|
+
public static name = "gain"
|
|
18
|
+
|
|
19
|
+
/* internal state */
|
|
20
|
+
private destroyed = false
|
|
21
|
+
|
|
22
|
+
/* construct node */
|
|
23
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
24
|
+
super(id, cfg, opts, args)
|
|
25
|
+
|
|
26
|
+
/* declare node configuration parameters */
|
|
27
|
+
this.configure({
|
|
28
|
+
db: { type: "number", val: 0, pos: 0, match: (n: number) => n >= -60 && n <= 60 }
|
|
29
|
+
})
|
|
30
|
+
|
|
31
|
+
/* declare node input/output format */
|
|
32
|
+
this.input = "audio"
|
|
33
|
+
this.output = "audio"
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/* open node */
|
|
37
|
+
async open () {
|
|
38
|
+
/* clear destruction flag */
|
|
39
|
+
this.destroyed = false
|
|
40
|
+
|
|
41
|
+
/* adjust gain */
|
|
42
|
+
const adjustGain = (chunk: SpeechFlowChunk & { payload: Buffer }, db: number) => {
|
|
43
|
+
const dv = new DataView(chunk.payload.buffer, chunk.payload.byteOffset, chunk.payload.byteLength)
|
|
44
|
+
const gainFactor = utils.dB2lin(db)
|
|
45
|
+
for (let i = 0; i < dv.byteLength; i += 2) {
|
|
46
|
+
let sample = dv.getInt16(i, true)
|
|
47
|
+
sample *= gainFactor
|
|
48
|
+
sample = Math.max(Math.min(sample, 32767), -32768)
|
|
49
|
+
dv.setInt16(i, sample, true)
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/* establish a transform stream */
|
|
54
|
+
const self = this
|
|
55
|
+
this.stream = new Stream.Transform({
|
|
56
|
+
readableObjectMode: true,
|
|
57
|
+
writableObjectMode: true,
|
|
58
|
+
decodeStrings: false,
|
|
59
|
+
transform (chunk: SpeechFlowChunk & { payload: Buffer }, encoding, callback) {
|
|
60
|
+
if (self.destroyed) {
|
|
61
|
+
callback(new Error("stream already destroyed"))
|
|
62
|
+
return
|
|
63
|
+
}
|
|
64
|
+
if (!Buffer.isBuffer(chunk.payload))
|
|
65
|
+
callback(new Error("invalid chunk payload type"))
|
|
66
|
+
else if (chunk.payload.byteLength % 2 !== 0)
|
|
67
|
+
callback(new Error("invalid audio buffer size (not 16-bit aligned)"))
|
|
68
|
+
else {
|
|
69
|
+
/* adjust chunk */
|
|
70
|
+
adjustGain(chunk, self.params.db)
|
|
71
|
+
this.push(chunk)
|
|
72
|
+
callback()
|
|
73
|
+
}
|
|
74
|
+
},
|
|
75
|
+
final (callback) {
|
|
76
|
+
if (self.destroyed) {
|
|
77
|
+
callback()
|
|
78
|
+
return
|
|
79
|
+
}
|
|
80
|
+
this.push(null)
|
|
81
|
+
callback()
|
|
82
|
+
}
|
|
83
|
+
})
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/* close node */
|
|
87
|
+
async close () {
|
|
88
|
+
/* indicate destruction */
|
|
89
|
+
this.destroyed = true
|
|
90
|
+
|
|
91
|
+
/* close stream */
|
|
92
|
+
if (this.stream !== null) {
|
|
93
|
+
this.stream.destroy()
|
|
94
|
+
this.stream = null
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
@@ -21,7 +21,7 @@ type AudioQueueElement = {
|
|
|
21
21
|
type: "audio-frame",
|
|
22
22
|
chunk: SpeechFlowChunk,
|
|
23
23
|
data: Float32Array,
|
|
24
|
-
gender?: "male" | "female"
|
|
24
|
+
gender?: "male" | "female" | "unknown"
|
|
25
25
|
} | {
|
|
26
26
|
type: "audio-eof"
|
|
27
27
|
}
|
|
@@ -32,7 +32,6 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
|
|
|
32
32
|
public static name = "gender"
|
|
33
33
|
|
|
34
34
|
/* internal state */
|
|
35
|
-
private static speexInitialized = false
|
|
36
35
|
private classifier: Transformers.AudioClassificationPipeline | null = null
|
|
37
36
|
private queue = new utils.Queue<AudioQueueElement>()
|
|
38
37
|
private queueRecv = this.queue.pointerUse("recv")
|
|
@@ -66,7 +65,7 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
|
|
|
66
65
|
this.shutdown = false
|
|
67
66
|
|
|
68
67
|
/* pass-through logging */
|
|
69
|
-
const log =
|
|
68
|
+
const log = this.log.bind(this)
|
|
70
69
|
|
|
71
70
|
/* the used model */
|
|
72
71
|
const model = "Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech"
|
|
@@ -81,7 +80,7 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
|
|
|
81
80
|
artifact += `:${progress.file}`
|
|
82
81
|
let percent = 0
|
|
83
82
|
if (typeof progress.loaded === "number" && typeof progress.total === "number")
|
|
84
|
-
percent = (progress.loaded
|
|
83
|
+
percent = (progress.loaded / progress.total) * 100
|
|
85
84
|
else if (typeof progress.progress === "number")
|
|
86
85
|
percent = progress.progress
|
|
87
86
|
if (percent > 0)
|
|
@@ -92,7 +91,7 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
|
|
|
92
91
|
return
|
|
93
92
|
for (const [ artifact, percent ] of progressState) {
|
|
94
93
|
this.log("info", `downloaded ${percent.toFixed(2)}% of artifact "${artifact}"`)
|
|
95
|
-
if (percent >=
|
|
94
|
+
if (percent >= 100.0)
|
|
96
95
|
progressState.delete(artifact)
|
|
97
96
|
}
|
|
98
97
|
}, 1000)
|
|
@@ -103,11 +102,17 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
|
|
|
103
102
|
device: "auto",
|
|
104
103
|
progress_callback: progressCallback
|
|
105
104
|
})
|
|
106
|
-
|
|
107
|
-
|
|
105
|
+
let timeoutId: ReturnType<typeof setTimeout> | null = null
|
|
106
|
+
const timeoutPromise = new Promise((resolve, reject) => {
|
|
107
|
+
timeoutId = setTimeout(() =>
|
|
108
|
+
reject(new Error("model initialization timeout")), 30 * 1000)
|
|
109
|
+
})
|
|
108
110
|
this.classifier = await Promise.race([
|
|
109
111
|
pipelinePromise, timeoutPromise
|
|
110
|
-
])
|
|
112
|
+
]).finally(() => {
|
|
113
|
+
if (timeoutId !== null)
|
|
114
|
+
clearTimeout(timeoutId)
|
|
115
|
+
}) as Transformers.AudioClassificationPipeline
|
|
111
116
|
}
|
|
112
117
|
catch (error) {
|
|
113
118
|
if (this.progressInterval) {
|
|
@@ -128,10 +133,15 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
|
|
|
128
133
|
if (this.shutdown || this.classifier === null)
|
|
129
134
|
throw new Error("classifier shutdown during operation")
|
|
130
135
|
const classifyPromise = this.classifier(data)
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
136
|
+
let timeoutId: ReturnType<typeof setTimeout> | null = null
|
|
137
|
+
const timeoutPromise = new Promise((resolve, reject) => {
|
|
138
|
+
timeoutId = setTimeout(() =>
|
|
139
|
+
reject(new Error("classification timeout")), 30 * 1000)
|
|
140
|
+
})
|
|
141
|
+
const result = await Promise.race([ classifyPromise, timeoutPromise ]).finally(() => {
|
|
142
|
+
if (timeoutId !== null)
|
|
143
|
+
clearTimeout(timeoutId)
|
|
144
|
+
}) as Transformers.AudioClassificationOutput | Transformers.AudioClassificationOutput[]
|
|
135
145
|
const classified = Array.isArray(result) ?
|
|
136
146
|
result as Transformers.AudioClassificationOutput :
|
|
137
147
|
[ result ]
|
|
@@ -139,15 +149,20 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
|
|
|
139
149
|
const c2 = classified.find((c: any) => c.label === "female")
|
|
140
150
|
const male = c1 ? c1.score : 0.0
|
|
141
151
|
const female = c2 ? c2.score : 0.0
|
|
142
|
-
|
|
152
|
+
if (male > female)
|
|
153
|
+
return "male"
|
|
154
|
+
else if (male < female)
|
|
155
|
+
return "female"
|
|
156
|
+
else
|
|
157
|
+
return "unknown"
|
|
143
158
|
}
|
|
144
159
|
|
|
145
160
|
/* define sample rate required by model */
|
|
146
161
|
const sampleRateTarget = 16000
|
|
147
162
|
|
|
148
163
|
/* work off queued audio frames */
|
|
149
|
-
const frameWindowDuration =
|
|
150
|
-
const frameWindowSamples = frameWindowDuration * sampleRateTarget
|
|
164
|
+
const frameWindowDuration = this.params.window / 1000
|
|
165
|
+
const frameWindowSamples = Math.floor(frameWindowDuration * sampleRateTarget)
|
|
151
166
|
let lastGender = ""
|
|
152
167
|
let workingOff = false
|
|
153
168
|
const workOffQueue = async () => {
|
|
@@ -236,8 +251,7 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
|
|
|
236
251
|
const wav = new WaveFile()
|
|
237
252
|
wav.fromScratch(self.config.audioChannels, self.config.audioSampleRate, "32f", data)
|
|
238
253
|
wav.toSampleRate(sampleRateTarget, { method: "cubic" })
|
|
239
|
-
data = wav.getSamples(false, Float32Array<ArrayBuffer>
|
|
240
|
-
any as Float32Array<ArrayBuffer>
|
|
254
|
+
data = wav.getSamples(false, Float32Array) as any as Float32Array<ArrayBuffer>
|
|
241
255
|
|
|
242
256
|
/* queue chunk and converted data */
|
|
243
257
|
self.queueRecv.append({ type: "audio-frame", chunk, data })
|
|
@@ -22,6 +22,7 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
|
|
|
22
22
|
/* internal state */
|
|
23
23
|
private emitInterval: ReturnType<typeof setInterval> | null = null
|
|
24
24
|
private calcInterval: ReturnType<typeof setInterval> | null = null
|
|
25
|
+
private silenceTimer: ReturnType<typeof setTimeout> | null = null
|
|
25
26
|
private chunkBuffer = new Float32Array(0)
|
|
26
27
|
private destroyed = false
|
|
27
28
|
|
|
@@ -63,7 +64,6 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
|
|
|
63
64
|
this.chunkBuffer = new Float32Array(0)
|
|
64
65
|
|
|
65
66
|
/* define chunk processing function */
|
|
66
|
-
let timer: ReturnType<typeof setTimeout> | null = null
|
|
67
67
|
const processChunk = (chunkData: Float32Array) => {
|
|
68
68
|
/* update internal audio sample sliding window */
|
|
69
69
|
const newWindow = new Float32Array(sampleWindowSize)
|
|
@@ -86,11 +86,11 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
|
|
|
86
86
|
calculateLoudnessRange: false,
|
|
87
87
|
calculateTruePeak: false
|
|
88
88
|
})
|
|
89
|
-
lufss = lufs.shortTerm ? lufs.shortTerm[0] :
|
|
89
|
+
lufss = lufs.shortTerm ? lufs.shortTerm[0] : -60
|
|
90
90
|
rms = getRMS(audioData, { asDB: true })
|
|
91
|
-
if (
|
|
92
|
-
clearTimeout(
|
|
93
|
-
|
|
91
|
+
if (this.silenceTimer !== null)
|
|
92
|
+
clearTimeout(this.silenceTimer)
|
|
93
|
+
this.silenceTimer = setTimeout(() => {
|
|
94
94
|
lufss = -60
|
|
95
95
|
rms = -60
|
|
96
96
|
}, 500)
|
|
@@ -117,7 +117,7 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
|
|
|
117
117
|
this.sendResponse([ "meter", "LUFS-S", lufss ])
|
|
118
118
|
this.sendResponse([ "meter", "RMS", rms ])
|
|
119
119
|
if (this.params.dashboard !== "")
|
|
120
|
-
this.
|
|
120
|
+
this.sendDashboard("audio", this.params.dashboard, "final", lufss)
|
|
121
121
|
}, this.params.interval)
|
|
122
122
|
|
|
123
123
|
/* provide Duplex stream and internally attach to meter */
|
|
@@ -172,9 +172,6 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
|
|
|
172
172
|
|
|
173
173
|
/* close node */
|
|
174
174
|
async close () {
|
|
175
|
-
/* indicate destruction */
|
|
176
|
-
this.destroyed = true
|
|
177
|
-
|
|
178
175
|
/* stop intervals */
|
|
179
176
|
if (this.emitInterval !== null) {
|
|
180
177
|
clearInterval(this.emitInterval)
|
|
@@ -184,11 +181,18 @@ export default class SpeechFlowNodeMeter extends SpeechFlowNode {
|
|
|
184
181
|
clearInterval(this.calcInterval)
|
|
185
182
|
this.calcInterval = null
|
|
186
183
|
}
|
|
184
|
+
if (this.silenceTimer !== null) {
|
|
185
|
+
clearTimeout(this.silenceTimer)
|
|
186
|
+
this.silenceTimer = null
|
|
187
|
+
}
|
|
187
188
|
|
|
188
189
|
/* close stream */
|
|
189
190
|
if (this.stream !== null) {
|
|
190
191
|
this.stream.destroy()
|
|
191
192
|
this.stream = null
|
|
192
193
|
}
|
|
194
|
+
|
|
195
|
+
/* indicate destruction */
|
|
196
|
+
this.destroyed = true
|
|
193
197
|
}
|
|
194
198
|
}
|
|
@@ -43,9 +43,10 @@ export default class SpeechFlowNodeMute extends SpeechFlowNode {
|
|
|
43
43
|
throw new Error("mute: node already destroyed")
|
|
44
44
|
try {
|
|
45
45
|
if (params.length === 2 && params[0] === "mode") {
|
|
46
|
-
if (
|
|
46
|
+
if (typeof params[1] !== "string" ||
|
|
47
|
+
!params[1].match(/^(?:none|silenced|unplugged)$/))
|
|
47
48
|
throw new Error("mute: invalid mode argument in external request")
|
|
48
|
-
const muteMode
|
|
49
|
+
const muteMode = params[1] as MuteMode
|
|
49
50
|
this.setMuteMode(muteMode)
|
|
50
51
|
this.sendResponse([ "mute", "mode", muteMode ])
|
|
51
52
|
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* internal dependencies */
|
|
8
|
+
import { parentPort } from "node:worker_threads"
|
|
9
|
+
|
|
10
|
+
/* external dependencies */
|
|
11
|
+
import { type DenoiseState, Rnnoise } from "@shiguredo/rnnoise-wasm"
|
|
12
|
+
|
|
13
|
+
/* WASM state */
|
|
14
|
+
let rnnoise: Rnnoise
|
|
15
|
+
let denoiseState: DenoiseState
|
|
16
|
+
|
|
17
|
+
/* global initialization */
|
|
18
|
+
;(async () => {
|
|
19
|
+
try {
|
|
20
|
+
rnnoise = await Rnnoise.load()
|
|
21
|
+
denoiseState = rnnoise.createDenoiseState()
|
|
22
|
+
parentPort!.postMessage({ type: "ready" })
|
|
23
|
+
}
|
|
24
|
+
catch (err) {
|
|
25
|
+
parentPort!.postMessage({ type: "failed", message: `failed to initialize RNNoise: ${err}` })
|
|
26
|
+
process.exit(1)
|
|
27
|
+
}
|
|
28
|
+
})()
|
|
29
|
+
|
|
30
|
+
/* receive messages */
|
|
31
|
+
parentPort!.on("message", (msg) => {
|
|
32
|
+
if (msg.type === "process") {
|
|
33
|
+
/* process a single audio frame */
|
|
34
|
+
const { id, data } = msg
|
|
35
|
+
|
|
36
|
+
/* convert regular Int16Array [-32768,32768]
|
|
37
|
+
to unusual non-normalized Float32Array [-32768,32768]
|
|
38
|
+
as required by RNNoise */
|
|
39
|
+
const f32a = new Float32Array(data.length)
|
|
40
|
+
for (let i = 0; i < data.length; i++)
|
|
41
|
+
f32a[i] = data[i]
|
|
42
|
+
|
|
43
|
+
/* process frame with RNNoise WASM */
|
|
44
|
+
denoiseState.processFrame(f32a)
|
|
45
|
+
|
|
46
|
+
/* convert back Float32Array to Int16Array */
|
|
47
|
+
const i16 = new Int16Array(data.length)
|
|
48
|
+
for (let i = 0; i < data.length; i++)
|
|
49
|
+
i16[i] = Math.round(f32a[i])
|
|
50
|
+
|
|
51
|
+
parentPort!.postMessage({ type: "process-done", id, data: i16 }, [ i16.buffer ])
|
|
52
|
+
}
|
|
53
|
+
else if (msg.type === "close") {
|
|
54
|
+
/* shutdown this process */
|
|
55
|
+
try {
|
|
56
|
+
denoiseState.destroy()
|
|
57
|
+
}
|
|
58
|
+
finally {
|
|
59
|
+
process.exit(0)
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
})
|