speechflow 1.4.4 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +37 -0
- package/README.md +273 -7
- package/etc/claude.md +70 -0
- package/etc/speechflow.png +0 -0
- package/etc/speechflow.yaml +29 -11
- package/etc/stx.conf +7 -0
- package/package.json +7 -6
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +155 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.d.ts +15 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +287 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.js +208 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics.d.ts +15 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics.js +312 -0
- package/speechflow-cli/dst/speechflow-node-a2a-dynamics.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +161 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander.d.ts +13 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js +208 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +13 -3
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-filler.d.ts +14 -0
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js +233 -0
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gain.d.ts +12 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js +125 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-gender.d.ts +0 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js +28 -12
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-meter.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js +35 -53
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js +2 -1
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js +55 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.d.ts +14 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +184 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-speex.d.ts +14 -0
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js +156 -0
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js +3 -3
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js +22 -17
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.d.ts +18 -0
- package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.js +317 -0
- package/speechflow-cli/dst/speechflow-node-a2t-awstranscribe.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +16 -33
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.d.ts +19 -0
- package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.js +351 -0
- package/speechflow-cli/dst/speechflow-node-a2t-openaitranscribe.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-awspolly.d.ts +16 -0
- package/speechflow-cli/dst/speechflow-node-t2a-awspolly.js +171 -0
- package/speechflow-cli/dst/speechflow-node-t2a-awspolly.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +19 -14
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +11 -6
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.d.ts +13 -0
- package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.js +141 -0
- package/speechflow-cli/dst/speechflow-node-t2t-awstranslate.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +13 -15
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-format.js +10 -15
- package/speechflow-cli/dst/speechflow-node-t2t-format.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +44 -31
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js +44 -45
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +8 -8
- package/speechflow-cli/dst/speechflow-node-t2t-sentence.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +10 -12
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-transformers.js +22 -27
- package/speechflow-cli/dst/speechflow-node-t2t-transformers.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-filter.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js +50 -15
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js +17 -18
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-device.js +13 -21
- package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +22 -16
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js +19 -19
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node.d.ts +6 -3
- package/speechflow-cli/dst/speechflow-node.js +13 -2
- package/speechflow-cli/dst/speechflow-node.js.map +1 -1
- package/speechflow-cli/dst/speechflow-utils-audio-wt.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-utils-audio-wt.js +124 -0
- package/speechflow-cli/dst/speechflow-utils-audio-wt.js.map +1 -0
- package/speechflow-cli/dst/speechflow-utils-audio.d.ts +13 -0
- package/speechflow-cli/dst/speechflow-utils-audio.js +137 -0
- package/speechflow-cli/dst/speechflow-utils-audio.js.map +1 -0
- package/speechflow-cli/dst/speechflow-utils.d.ts +18 -0
- package/speechflow-cli/dst/speechflow-utils.js +123 -35
- package/speechflow-cli/dst/speechflow-utils.js.map +1 -1
- package/speechflow-cli/dst/speechflow.js +114 -27
- package/speechflow-cli/dst/speechflow.js.map +1 -1
- package/speechflow-cli/etc/oxlint.jsonc +112 -11
- package/speechflow-cli/etc/stx.conf +2 -2
- package/speechflow-cli/etc/tsconfig.json +1 -1
- package/speechflow-cli/package.d/@shiguredo+rnnoise-wasm+2025.1.5.patch +25 -0
- package/speechflow-cli/package.json +102 -94
- package/speechflow-cli/src/lib.d.ts +24 -0
- package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +151 -0
- package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +303 -0
- package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +158 -0
- package/speechflow-cli/src/speechflow-node-a2a-expander.ts +212 -0
- package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +13 -3
- package/speechflow-cli/src/speechflow-node-a2a-filler.ts +223 -0
- package/speechflow-cli/src/speechflow-node-a2a-gain.ts +98 -0
- package/speechflow-cli/src/speechflow-node-a2a-gender.ts +31 -17
- package/speechflow-cli/src/speechflow-node-a2a-meter.ts +37 -56
- package/speechflow-cli/src/speechflow-node-a2a-mute.ts +3 -2
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise-wt.ts +62 -0
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +164 -0
- package/speechflow-cli/src/speechflow-node-a2a-speex.ts +137 -0
- package/speechflow-cli/src/speechflow-node-a2a-vad.ts +3 -3
- package/speechflow-cli/src/speechflow-node-a2a-wav.ts +20 -13
- package/speechflow-cli/src/speechflow-node-a2t-awstranscribe.ts +308 -0
- package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +16 -33
- package/speechflow-cli/src/speechflow-node-a2t-openaitranscribe.ts +337 -0
- package/speechflow-cli/src/speechflow-node-t2a-awspolly.ts +187 -0
- package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +19 -14
- package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +12 -7
- package/speechflow-cli/src/speechflow-node-t2t-awstranslate.ts +152 -0
- package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +13 -15
- package/speechflow-cli/src/speechflow-node-t2t-format.ts +10 -15
- package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +55 -42
- package/speechflow-cli/src/speechflow-node-t2t-openai.ts +58 -58
- package/speechflow-cli/src/speechflow-node-t2t-sentence.ts +10 -10
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +15 -16
- package/speechflow-cli/src/speechflow-node-t2t-transformers.ts +27 -32
- package/speechflow-cli/src/speechflow-node-x2x-filter.ts +20 -16
- package/speechflow-cli/src/speechflow-node-x2x-trace.ts +20 -19
- package/speechflow-cli/src/speechflow-node-xio-device.ts +15 -23
- package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +23 -16
- package/speechflow-cli/src/speechflow-node-xio-websocket.ts +19 -19
- package/speechflow-cli/src/speechflow-node.ts +21 -8
- package/speechflow-cli/src/speechflow-utils-audio-wt.ts +172 -0
- package/speechflow-cli/src/speechflow-utils-audio.ts +147 -0
- package/speechflow-cli/src/speechflow-utils.ts +125 -32
- package/speechflow-cli/src/speechflow.ts +118 -30
- package/speechflow-ui-db/dst/index.css +1 -1
- package/speechflow-ui-db/dst/index.js +31 -31
- package/speechflow-ui-db/etc/eslint.mjs +0 -1
- package/speechflow-ui-db/etc/tsc-client.json +3 -3
- package/speechflow-ui-db/package.json +11 -10
- package/speechflow-ui-db/src/app.vue +96 -78
- package/speechflow-ui-st/dst/index.js +26 -26
- package/speechflow-ui-st/etc/eslint.mjs +0 -1
- package/speechflow-ui-st/etc/tsc-client.json +3 -3
- package/speechflow-ui-st/package.json +11 -10
- package/speechflow-ui-st/src/app.vue +5 -12
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import path from "node:path"
|
|
9
|
+
import Stream from "node:stream"
|
|
10
|
+
|
|
11
|
+
/* external dependencies */
|
|
12
|
+
import { AudioWorkletNode } from "node-web-audio-api"
|
|
13
|
+
|
|
14
|
+
/* internal dependencies */
|
|
15
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
16
|
+
import * as utils from "./speechflow-utils"
|
|
17
|
+
import { WebAudio } from "./speechflow-utils-audio"
|
|
18
|
+
|
|
19
|
+
/* internal types */
|
|
20
|
+
interface AudioExpanderConfig {
|
|
21
|
+
thresholdDb?: number
|
|
22
|
+
floorDb?: number
|
|
23
|
+
ratio?: number
|
|
24
|
+
attackMs?: number
|
|
25
|
+
releaseMs?: number
|
|
26
|
+
kneeDb?: number
|
|
27
|
+
makeupDb?: number
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/* audio noise expander class */
|
|
31
|
+
class AudioExpander extends WebAudio {
|
|
32
|
+
/* internal state */
|
|
33
|
+
private config: Required<AudioExpanderConfig>
|
|
34
|
+
private expanderNode: AudioWorkletNode | null = null
|
|
35
|
+
|
|
36
|
+
/* construct object */
|
|
37
|
+
constructor(
|
|
38
|
+
sampleRate: number,
|
|
39
|
+
channels: number,
|
|
40
|
+
config: AudioExpanderConfig = {}
|
|
41
|
+
) {
|
|
42
|
+
super(sampleRate, channels)
|
|
43
|
+
|
|
44
|
+
/* store configuration */
|
|
45
|
+
this.config = {
|
|
46
|
+
thresholdDb: config.thresholdDb ?? -45,
|
|
47
|
+
floorDb: config.floorDb ?? -64,
|
|
48
|
+
ratio: config.ratio ?? 4.0,
|
|
49
|
+
attackMs: config.attackMs ?? 10,
|
|
50
|
+
releaseMs: config.releaseMs ?? 50,
|
|
51
|
+
kneeDb: config.kneeDb ?? 6.0,
|
|
52
|
+
makeupDb: config.makeupDb ?? 0
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/* initialize object */
|
|
57
|
+
public async setup (): Promise<void> {
|
|
58
|
+
await super.setup()
|
|
59
|
+
|
|
60
|
+
/* add audio worklet module */
|
|
61
|
+
const url = path.resolve(__dirname, "speechflow-node-a2a-expander-wt.js")
|
|
62
|
+
await this.audioContext.audioWorklet.addModule(url)
|
|
63
|
+
|
|
64
|
+
/* create expander node */
|
|
65
|
+
this.expanderNode = new AudioWorkletNode(this.audioContext, "expander", {
|
|
66
|
+
numberOfInputs: 1,
|
|
67
|
+
numberOfOutputs: 1,
|
|
68
|
+
processorOptions: {
|
|
69
|
+
sampleRate: this.audioContext.sampleRate
|
|
70
|
+
}
|
|
71
|
+
})
|
|
72
|
+
|
|
73
|
+
/* configure expander node */
|
|
74
|
+
const currentTime = this.audioContext.currentTime
|
|
75
|
+
const node = this.expanderNode!
|
|
76
|
+
const params = node.parameters as Map<string, AudioParam>
|
|
77
|
+
params.get("threshold")!.setValueAtTime(this.config.thresholdDb, currentTime)
|
|
78
|
+
params.get("floor")!.setValueAtTime(this.config.floorDb, currentTime)
|
|
79
|
+
params.get("ratio")!.setValueAtTime(this.config.ratio, currentTime)
|
|
80
|
+
params.get("attack")!.setValueAtTime(this.config.attackMs / 1000, currentTime)
|
|
81
|
+
params.get("release")!.setValueAtTime(this.config.releaseMs / 1000, currentTime)
|
|
82
|
+
params.get("knee")!.setValueAtTime(this.config.kneeDb, currentTime)
|
|
83
|
+
params.get("makeup")!.setValueAtTime(this.config.makeupDb, currentTime)
|
|
84
|
+
|
|
85
|
+
/* connect nodes */
|
|
86
|
+
this.sourceNode!.connect(this.expanderNode)
|
|
87
|
+
this.expanderNode.connect(this.captureNode!)
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
public async destroy (): Promise<void> {
|
|
91
|
+
await super.destroy()
|
|
92
|
+
|
|
93
|
+
/* destroy expander node */
|
|
94
|
+
if (this.expanderNode !== null) {
|
|
95
|
+
this.expanderNode.disconnect()
|
|
96
|
+
this.expanderNode = null
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/* SpeechFlow node for noise expander in audio-to-audio passing */
|
|
102
|
+
export default class SpeechFlowNodeExpander extends SpeechFlowNode {
|
|
103
|
+
/* declare official node name */
|
|
104
|
+
public static name = "expander"
|
|
105
|
+
|
|
106
|
+
/* internal state */
|
|
107
|
+
private destroyed = false
|
|
108
|
+
private expander: AudioExpander | null = null
|
|
109
|
+
|
|
110
|
+
/* construct node */
|
|
111
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
112
|
+
super(id, cfg, opts, args)
|
|
113
|
+
|
|
114
|
+
/* declare node configuration parameters */
|
|
115
|
+
this.configure({
|
|
116
|
+
thresholdDb: { type: "number", val: -45, match: (n: number) => n <= 0 && n >= -100 },
|
|
117
|
+
floorDb: { type: "number", val: -64, match: (n: number) => n <= 0 && n >= -100 },
|
|
118
|
+
ratio: { type: "number", val: 4.0, match: (n: number) => n >= 1 && n <= 20 },
|
|
119
|
+
attackMs: { type: "number", val: 10, match: (n: number) => n >= 0 && n <= 1000 },
|
|
120
|
+
releaseMs: { type: "number", val: 50, match: (n: number) => n >= 0 && n <= 1000 },
|
|
121
|
+
kneeDb: { type: "number", val: 6.0, match: (n: number) => n >= 0 && n <= 40 },
|
|
122
|
+
makeupDb: { type: "number", val: 0, match: (n: number) => n >= -24 && n <= 24 }
|
|
123
|
+
})
|
|
124
|
+
|
|
125
|
+
/* sanity check floor vs threshold */
|
|
126
|
+
if (this.params.floorDb >= this.params.thresholdDb)
|
|
127
|
+
throw new Error("floor dB must be less than threshold dB for proper expansion")
|
|
128
|
+
|
|
129
|
+
/* declare node input/output format */
|
|
130
|
+
this.input = "audio"
|
|
131
|
+
this.output = "audio"
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/* open node */
|
|
135
|
+
async open () {
|
|
136
|
+
/* clear destruction flag */
|
|
137
|
+
this.destroyed = false
|
|
138
|
+
|
|
139
|
+
/* setup expander */
|
|
140
|
+
this.expander = new AudioExpander(
|
|
141
|
+
this.config.audioSampleRate,
|
|
142
|
+
this.config.audioChannels, {
|
|
143
|
+
thresholdDb: this.params.thresholdDb,
|
|
144
|
+
floorDb: this.params.floorDb,
|
|
145
|
+
ratio: this.params.ratio,
|
|
146
|
+
attackMs: this.params.attackMs,
|
|
147
|
+
releaseMs: this.params.releaseMs,
|
|
148
|
+
kneeDb: this.params.kneeDb,
|
|
149
|
+
makeupDb: this.params.makeupDb
|
|
150
|
+
}
|
|
151
|
+
)
|
|
152
|
+
await this.expander.setup()
|
|
153
|
+
|
|
154
|
+
/* establish a transform stream */
|
|
155
|
+
const self = this
|
|
156
|
+
this.stream = new Stream.Transform({
|
|
157
|
+
readableObjectMode: true,
|
|
158
|
+
writableObjectMode: true,
|
|
159
|
+
decodeStrings: false,
|
|
160
|
+
transform (chunk: SpeechFlowChunk & { payload: Buffer }, encoding, callback) {
|
|
161
|
+
if (self.destroyed) {
|
|
162
|
+
callback(new Error("stream already destroyed"))
|
|
163
|
+
return
|
|
164
|
+
}
|
|
165
|
+
if (!Buffer.isBuffer(chunk.payload))
|
|
166
|
+
callback(new Error("invalid chunk payload type"))
|
|
167
|
+
else {
|
|
168
|
+
/* expand chunk */
|
|
169
|
+
const payload = utils.convertBufToI16(chunk.payload)
|
|
170
|
+
self.expander?.process(payload).then((result) => {
|
|
171
|
+
if (self.destroyed)
|
|
172
|
+
throw new Error("stream already destroyed")
|
|
173
|
+
|
|
174
|
+
/* take over expanded data */
|
|
175
|
+
const payload = utils.convertI16ToBuf(result)
|
|
176
|
+
chunk.payload = payload
|
|
177
|
+
this.push(chunk)
|
|
178
|
+
callback()
|
|
179
|
+
}).catch((error) => {
|
|
180
|
+
callback(new Error(`expansion failed: ${error}`))
|
|
181
|
+
})
|
|
182
|
+
}
|
|
183
|
+
},
|
|
184
|
+
final (callback) {
|
|
185
|
+
if (self.destroyed) {
|
|
186
|
+
callback()
|
|
187
|
+
return
|
|
188
|
+
}
|
|
189
|
+
this.push(null)
|
|
190
|
+
callback()
|
|
191
|
+
}
|
|
192
|
+
})
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/* close node */
|
|
196
|
+
async close () {
|
|
197
|
+
/* indicate destruction */
|
|
198
|
+
this.destroyed = true
|
|
199
|
+
|
|
200
|
+
/* destroy expander */
|
|
201
|
+
if (this.expander !== null) {
|
|
202
|
+
await this.expander.destroy()
|
|
203
|
+
this.expander = null
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
/* close stream */
|
|
207
|
+
if (this.stream !== null) {
|
|
208
|
+
this.stream.destroy()
|
|
209
|
+
this.stream = null
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
}
|
|
@@ -75,7 +75,7 @@ export default class SpeechFlowNodeFFmpeg extends SpeechFlowNode {
|
|
|
75
75
|
"c:a": "pcm_s16le",
|
|
76
76
|
"ar": this.config.audioSampleRate,
|
|
77
77
|
"ac": this.config.audioChannels,
|
|
78
|
-
"f": "s16le"
|
|
78
|
+
"f": "s16le"
|
|
79
79
|
} : {}),
|
|
80
80
|
...(this.params.dst === "wav" ? {
|
|
81
81
|
"f": "wav"
|
|
@@ -90,7 +90,12 @@ export default class SpeechFlowNodeFFmpeg extends SpeechFlowNode {
|
|
|
90
90
|
"f": "opus"
|
|
91
91
|
} : {})
|
|
92
92
|
})
|
|
93
|
-
|
|
93
|
+
try {
|
|
94
|
+
this.ffmpeg.run()
|
|
95
|
+
}
|
|
96
|
+
catch (err) {
|
|
97
|
+
throw new Error(`failed to start FFmpeg process: ${err}`)
|
|
98
|
+
}
|
|
94
99
|
|
|
95
100
|
/* establish a duplex stream and connect it to FFmpeg */
|
|
96
101
|
this.stream = Stream.Duplex.from({
|
|
@@ -120,7 +125,12 @@ export default class SpeechFlowNodeFFmpeg extends SpeechFlowNode {
|
|
|
120
125
|
|
|
121
126
|
/* shutdown FFmpeg */
|
|
122
127
|
if (this.ffmpeg !== null) {
|
|
123
|
-
|
|
128
|
+
try {
|
|
129
|
+
this.ffmpeg.kill()
|
|
130
|
+
}
|
|
131
|
+
catch {
|
|
132
|
+
/* ignore kill errors during cleanup */
|
|
133
|
+
}
|
|
124
134
|
this.ffmpeg = null
|
|
125
135
|
}
|
|
126
136
|
}
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
import { EventEmitter } from "node:events"
|
|
10
|
+
import { Duration } from "luxon"
|
|
11
|
+
|
|
12
|
+
/* internal dependencies */
|
|
13
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
14
|
+
import * as utils from "./speechflow-utils"
|
|
15
|
+
|
|
16
|
+
class AudioFiller extends EventEmitter {
|
|
17
|
+
private emittedEndSamples = 0 /* stream position in samples already emitted */
|
|
18
|
+
private readonly bytesPerSample = 2 /* PCM I16 */
|
|
19
|
+
private readonly bytesPerFrame: number
|
|
20
|
+
private readonly sampleTolerance = 0.5 /* tolerance for floating-point sample comparisons */
|
|
21
|
+
|
|
22
|
+
constructor (private sampleRate = 48000, private channels = 1) {
|
|
23
|
+
super()
|
|
24
|
+
this.bytesPerFrame = this.channels * this.bytesPerSample
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/* optional helper to allow subscribing with strong typing */
|
|
28
|
+
public on(event: "chunk", listener: (chunk: SpeechFlowChunk) => void): this
|
|
29
|
+
public on(event: string, listener: (...args: any[]) => void): this {
|
|
30
|
+
return super.on(event, listener)
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/* convert fractional samples to duration */
|
|
34
|
+
private samplesFromDuration(duration: Duration): number {
|
|
35
|
+
const seconds = duration.as("seconds")
|
|
36
|
+
const samples = seconds * this.sampleRate
|
|
37
|
+
return samples
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/* convert duration to fractional samples */
|
|
41
|
+
private durationFromSamples(samples: number): Duration {
|
|
42
|
+
const seconds = samples / this.sampleRate
|
|
43
|
+
return Duration.fromObject({ seconds })
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/* emit a chunk of silence */
|
|
47
|
+
private emitSilence (fromSamples: number, toSamples: number) {
|
|
48
|
+
const frames = Math.max(0, Math.floor(toSamples - fromSamples))
|
|
49
|
+
if (frames <= 0)
|
|
50
|
+
return
|
|
51
|
+
const payload = Buffer.alloc(frames * this.bytesPerFrame) /* already zeroed */
|
|
52
|
+
const timestampStart = this.durationFromSamples(fromSamples)
|
|
53
|
+
const timestampEnd = this.durationFromSamples(toSamples)
|
|
54
|
+
const chunk = new SpeechFlowChunk(timestampStart, timestampEnd, "final", "audio", payload)
|
|
55
|
+
this.emit("chunk", chunk)
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/* add a chunk of audio for processing */
|
|
59
|
+
public add (chunk: SpeechFlowChunk & { type: "audio", payload: Buffer }): void {
|
|
60
|
+
const startSamp = this.samplesFromDuration(chunk.timestampStart)
|
|
61
|
+
const endSamp = this.samplesFromDuration(chunk.timestampEnd)
|
|
62
|
+
if (endSamp < startSamp)
|
|
63
|
+
throw new Error("invalid timestamps")
|
|
64
|
+
|
|
65
|
+
/* if chunk starts beyond what we've emitted, insert silence for the gap */
|
|
66
|
+
if (startSamp > this.emittedEndSamples + this.sampleTolerance) {
|
|
67
|
+
this.emitSilence(this.emittedEndSamples, startSamp)
|
|
68
|
+
this.emittedEndSamples = startSamp
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/* if chunk ends before or at emitted end, we have it fully covered, so drop it */
|
|
72
|
+
if (endSamp <= this.emittedEndSamples + this.sampleTolerance)
|
|
73
|
+
return
|
|
74
|
+
|
|
75
|
+
/* trim any overlap at the head */
|
|
76
|
+
const trimHead = Math.max(0, Math.floor(this.emittedEndSamples - startSamp))
|
|
77
|
+
const availableFrames = Math.floor((endSamp - startSamp) - trimHead)
|
|
78
|
+
if (availableFrames <= 0)
|
|
79
|
+
return
|
|
80
|
+
|
|
81
|
+
/* determine how many frames the buffer actually has; trust timestamps primarily */
|
|
82
|
+
const bufFrames = Math.floor(chunk.payload.length / this.bytesPerFrame)
|
|
83
|
+
const startFrame = Math.min(trimHead, bufFrames)
|
|
84
|
+
const endFrame = Math.min(startFrame + availableFrames, bufFrames)
|
|
85
|
+
if (endFrame <= startFrame)
|
|
86
|
+
return
|
|
87
|
+
|
|
88
|
+
/* determine trimmed/normalized chunk */
|
|
89
|
+
const payload = chunk.payload.subarray(
|
|
90
|
+
startFrame * this.bytesPerFrame,
|
|
91
|
+
endFrame * this.bytesPerFrame)
|
|
92
|
+
|
|
93
|
+
/* emit trimmed/normalized chunk */
|
|
94
|
+
const outStartSamples = startSamp + startFrame
|
|
95
|
+
const outEndSamples = outStartSamples + Math.floor(payload.length / this.bytesPerFrame)
|
|
96
|
+
const timestampStart = this.durationFromSamples(outStartSamples)
|
|
97
|
+
const timestampEnd = this.durationFromSamples(outEndSamples)
|
|
98
|
+
const c = new SpeechFlowChunk(timestampStart, timestampEnd, "final", "audio", payload)
|
|
99
|
+
this.emit("chunk", c)
|
|
100
|
+
|
|
101
|
+
/* advance emitted cursor */
|
|
102
|
+
this.emittedEndSamples = Math.max(this.emittedEndSamples, outEndSamples)
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/* SpeechFlow node for filling audio gaps */
|
|
107
|
+
export default class SpeechFlowNodeFiller extends SpeechFlowNode {
|
|
108
|
+
/* declare official node name */
|
|
109
|
+
public static name = "filler"
|
|
110
|
+
|
|
111
|
+
/* internal state */
|
|
112
|
+
private destroyed = false
|
|
113
|
+
private filler: AudioFiller | null = null
|
|
114
|
+
private sendQueue: utils.AsyncQueue<SpeechFlowChunk | null> | null = null
|
|
115
|
+
|
|
116
|
+
/* construct node */
|
|
117
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
118
|
+
super(id, cfg, opts, args)
|
|
119
|
+
|
|
120
|
+
/* declare node configuration parameters */
|
|
121
|
+
this.configure({
|
|
122
|
+
segment: { type: "number", val: 50, pos: 0, match: (n: number) => n >= 10 && n <= 1000 }
|
|
123
|
+
})
|
|
124
|
+
|
|
125
|
+
/* declare node input/output format */
|
|
126
|
+
this.input = "audio"
|
|
127
|
+
this.output = "audio"
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/* open node */
|
|
131
|
+
async open () {
|
|
132
|
+
/* clear destruction flag */
|
|
133
|
+
this.destroyed = false
|
|
134
|
+
|
|
135
|
+
/* establish queues */
|
|
136
|
+
this.filler = new AudioFiller(this.config.audioSampleRate, this.config.audioChannels)
|
|
137
|
+
this.sendQueue = new utils.AsyncQueue<SpeechFlowChunk | null>()
|
|
138
|
+
|
|
139
|
+
/* shift chunks from filler to send queue */
|
|
140
|
+
this.filler.on("chunk", (chunk) => {
|
|
141
|
+
this.sendQueue?.write(chunk)
|
|
142
|
+
})
|
|
143
|
+
|
|
144
|
+
/* establish a duplex stream */
|
|
145
|
+
const self = this
|
|
146
|
+
this.stream = new Stream.Duplex({
|
|
147
|
+
readableObjectMode: true,
|
|
148
|
+
writableObjectMode: true,
|
|
149
|
+
decodeStrings: false,
|
|
150
|
+
write (chunk: SpeechFlowChunk & { type: "audio", payload: Buffer }, encoding, callback) {
|
|
151
|
+
if (self.destroyed || self.filler === null)
|
|
152
|
+
callback(new Error("stream already destroyed"))
|
|
153
|
+
else if (!Buffer.isBuffer(chunk.payload))
|
|
154
|
+
callback(new Error("invalid chunk payload type"))
|
|
155
|
+
else {
|
|
156
|
+
try {
|
|
157
|
+
self.filler.add(chunk)
|
|
158
|
+
callback()
|
|
159
|
+
}
|
|
160
|
+
catch (error: any) {
|
|
161
|
+
callback(error)
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
},
|
|
165
|
+
read (size) {
|
|
166
|
+
if (self.destroyed || self.sendQueue === null) {
|
|
167
|
+
this.push(null)
|
|
168
|
+
return
|
|
169
|
+
}
|
|
170
|
+
self.sendQueue.read().then((chunk) => {
|
|
171
|
+
if (self.destroyed) {
|
|
172
|
+
this.push(null)
|
|
173
|
+
return
|
|
174
|
+
}
|
|
175
|
+
if (chunk === null) {
|
|
176
|
+
self.log("info", "received EOF signal")
|
|
177
|
+
this.push(null)
|
|
178
|
+
}
|
|
179
|
+
else {
|
|
180
|
+
self.log("debug", `received data (${chunk.payload.length} bytes)`)
|
|
181
|
+
this.push(chunk)
|
|
182
|
+
}
|
|
183
|
+
}).catch((error) => {
|
|
184
|
+
if (!self.destroyed)
|
|
185
|
+
self.log("error", `queue read error: ${error.message}`)
|
|
186
|
+
})
|
|
187
|
+
},
|
|
188
|
+
final (callback) {
|
|
189
|
+
if (self.destroyed) {
|
|
190
|
+
callback()
|
|
191
|
+
return
|
|
192
|
+
}
|
|
193
|
+
this.push(null)
|
|
194
|
+
callback()
|
|
195
|
+
}
|
|
196
|
+
})
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/* close node */
|
|
200
|
+
async close () {
|
|
201
|
+
/* indicate destruction */
|
|
202
|
+
this.destroyed = true
|
|
203
|
+
|
|
204
|
+
/* destroy queues */
|
|
205
|
+
if (this.sendQueue !== null) {
|
|
206
|
+
this.sendQueue.destroy()
|
|
207
|
+
this.sendQueue = null
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/* destroy filler */
|
|
211
|
+
if (this.filler !== null) {
|
|
212
|
+
this.filler.removeAllListeners()
|
|
213
|
+
this.filler = null
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
/* close stream */
|
|
217
|
+
if (this.stream !== null) {
|
|
218
|
+
this.stream.destroy()
|
|
219
|
+
this.stream = null
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
|
|
10
|
+
/* internal dependencies */
|
|
11
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
12
|
+
import * as utils from "./speechflow-utils"
|
|
13
|
+
|
|
14
|
+
/* SpeechFlow node for gain adjustment in audio-to-audio passing */
|
|
15
|
+
export default class SpeechFlowNodeGain extends SpeechFlowNode {
|
|
16
|
+
/* declare official node name */
|
|
17
|
+
public static name = "gain"
|
|
18
|
+
|
|
19
|
+
/* internal state */
|
|
20
|
+
private destroyed = false
|
|
21
|
+
|
|
22
|
+
/* construct node */
|
|
23
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
24
|
+
super(id, cfg, opts, args)
|
|
25
|
+
|
|
26
|
+
/* declare node configuration parameters */
|
|
27
|
+
this.configure({
|
|
28
|
+
db: { type: "number", val: 0, pos: 0, match: (n: number) => n >= -60 && n <= 60 }
|
|
29
|
+
})
|
|
30
|
+
|
|
31
|
+
/* declare node input/output format */
|
|
32
|
+
this.input = "audio"
|
|
33
|
+
this.output = "audio"
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/* open node */
|
|
37
|
+
async open () {
|
|
38
|
+
/* clear destruction flag */
|
|
39
|
+
this.destroyed = false
|
|
40
|
+
|
|
41
|
+
/* adjust gain */
|
|
42
|
+
const adjustGain = (chunk: SpeechFlowChunk & { payload: Buffer }, db: number) => {
|
|
43
|
+
const dv = new DataView(chunk.payload.buffer, chunk.payload.byteOffset, chunk.payload.byteLength)
|
|
44
|
+
const gainFactor = utils.dB2lin(db)
|
|
45
|
+
for (let i = 0; i < dv.byteLength; i += 2) {
|
|
46
|
+
let sample = dv.getInt16(i, true)
|
|
47
|
+
sample *= gainFactor
|
|
48
|
+
sample = Math.max(Math.min(sample, 32767), -32768)
|
|
49
|
+
dv.setInt16(i, sample, true)
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/* establish a transform stream */
|
|
54
|
+
const self = this
|
|
55
|
+
this.stream = new Stream.Transform({
|
|
56
|
+
readableObjectMode: true,
|
|
57
|
+
writableObjectMode: true,
|
|
58
|
+
decodeStrings: false,
|
|
59
|
+
transform (chunk: SpeechFlowChunk & { payload: Buffer }, encoding, callback) {
|
|
60
|
+
if (self.destroyed) {
|
|
61
|
+
callback(new Error("stream already destroyed"))
|
|
62
|
+
return
|
|
63
|
+
}
|
|
64
|
+
if (!Buffer.isBuffer(chunk.payload))
|
|
65
|
+
callback(new Error("invalid chunk payload type"))
|
|
66
|
+
else if (chunk.payload.byteLength % 2 !== 0)
|
|
67
|
+
callback(new Error("invalid audio buffer size (not 16-bit aligned)"))
|
|
68
|
+
else {
|
|
69
|
+
/* adjust chunk */
|
|
70
|
+
adjustGain(chunk, self.params.db)
|
|
71
|
+
this.push(chunk)
|
|
72
|
+
callback()
|
|
73
|
+
}
|
|
74
|
+
},
|
|
75
|
+
final (callback) {
|
|
76
|
+
if (self.destroyed) {
|
|
77
|
+
callback()
|
|
78
|
+
return
|
|
79
|
+
}
|
|
80
|
+
this.push(null)
|
|
81
|
+
callback()
|
|
82
|
+
}
|
|
83
|
+
})
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/* close node */
|
|
87
|
+
async close () {
|
|
88
|
+
/* indicate destruction */
|
|
89
|
+
this.destroyed = true
|
|
90
|
+
|
|
91
|
+
/* close stream */
|
|
92
|
+
if (this.stream !== null) {
|
|
93
|
+
this.stream.destroy()
|
|
94
|
+
this.stream = null
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
@@ -21,7 +21,7 @@ type AudioQueueElement = {
|
|
|
21
21
|
type: "audio-frame",
|
|
22
22
|
chunk: SpeechFlowChunk,
|
|
23
23
|
data: Float32Array,
|
|
24
|
-
gender?: "male" | "female"
|
|
24
|
+
gender?: "male" | "female" | "unknown"
|
|
25
25
|
} | {
|
|
26
26
|
type: "audio-eof"
|
|
27
27
|
}
|
|
@@ -32,7 +32,6 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
|
|
|
32
32
|
public static name = "gender"
|
|
33
33
|
|
|
34
34
|
/* internal state */
|
|
35
|
-
private static speexInitialized = false
|
|
36
35
|
private classifier: Transformers.AudioClassificationPipeline | null = null
|
|
37
36
|
private queue = new utils.Queue<AudioQueueElement>()
|
|
38
37
|
private queueRecv = this.queue.pointerUse("recv")
|
|
@@ -66,7 +65,7 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
|
|
|
66
65
|
this.shutdown = false
|
|
67
66
|
|
|
68
67
|
/* pass-through logging */
|
|
69
|
-
const log =
|
|
68
|
+
const log = this.log.bind(this)
|
|
70
69
|
|
|
71
70
|
/* the used model */
|
|
72
71
|
const model = "Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech"
|
|
@@ -81,7 +80,7 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
|
|
|
81
80
|
artifact += `:${progress.file}`
|
|
82
81
|
let percent = 0
|
|
83
82
|
if (typeof progress.loaded === "number" && typeof progress.total === "number")
|
|
84
|
-
percent = (progress.loaded
|
|
83
|
+
percent = (progress.loaded / progress.total) * 100
|
|
85
84
|
else if (typeof progress.progress === "number")
|
|
86
85
|
percent = progress.progress
|
|
87
86
|
if (percent > 0)
|
|
@@ -92,7 +91,7 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
|
|
|
92
91
|
return
|
|
93
92
|
for (const [ artifact, percent ] of progressState) {
|
|
94
93
|
this.log("info", `downloaded ${percent.toFixed(2)}% of artifact "${artifact}"`)
|
|
95
|
-
if (percent >=
|
|
94
|
+
if (percent >= 100.0)
|
|
96
95
|
progressState.delete(artifact)
|
|
97
96
|
}
|
|
98
97
|
}, 1000)
|
|
@@ -103,11 +102,17 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
|
|
|
103
102
|
device: "auto",
|
|
104
103
|
progress_callback: progressCallback
|
|
105
104
|
})
|
|
106
|
-
|
|
107
|
-
|
|
105
|
+
let timeoutId: ReturnType<typeof setTimeout> | null = null
|
|
106
|
+
const timeoutPromise = new Promise((resolve, reject) => {
|
|
107
|
+
timeoutId = setTimeout(() =>
|
|
108
|
+
reject(new Error("model initialization timeout")), 30 * 1000)
|
|
109
|
+
})
|
|
108
110
|
this.classifier = await Promise.race([
|
|
109
111
|
pipelinePromise, timeoutPromise
|
|
110
|
-
])
|
|
112
|
+
]).finally(() => {
|
|
113
|
+
if (timeoutId !== null)
|
|
114
|
+
clearTimeout(timeoutId)
|
|
115
|
+
}) as Transformers.AudioClassificationPipeline
|
|
111
116
|
}
|
|
112
117
|
catch (error) {
|
|
113
118
|
if (this.progressInterval) {
|
|
@@ -128,10 +133,15 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
|
|
|
128
133
|
if (this.shutdown || this.classifier === null)
|
|
129
134
|
throw new Error("classifier shutdown during operation")
|
|
130
135
|
const classifyPromise = this.classifier(data)
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
136
|
+
let timeoutId: ReturnType<typeof setTimeout> | null = null
|
|
137
|
+
const timeoutPromise = new Promise((resolve, reject) => {
|
|
138
|
+
timeoutId = setTimeout(() =>
|
|
139
|
+
reject(new Error("classification timeout")), 30 * 1000)
|
|
140
|
+
})
|
|
141
|
+
const result = await Promise.race([ classifyPromise, timeoutPromise ]).finally(() => {
|
|
142
|
+
if (timeoutId !== null)
|
|
143
|
+
clearTimeout(timeoutId)
|
|
144
|
+
}) as Transformers.AudioClassificationOutput | Transformers.AudioClassificationOutput[]
|
|
135
145
|
const classified = Array.isArray(result) ?
|
|
136
146
|
result as Transformers.AudioClassificationOutput :
|
|
137
147
|
[ result ]
|
|
@@ -139,15 +149,20 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
|
|
|
139
149
|
const c2 = classified.find((c: any) => c.label === "female")
|
|
140
150
|
const male = c1 ? c1.score : 0.0
|
|
141
151
|
const female = c2 ? c2.score : 0.0
|
|
142
|
-
|
|
152
|
+
if (male > female)
|
|
153
|
+
return "male"
|
|
154
|
+
else if (male < female)
|
|
155
|
+
return "female"
|
|
156
|
+
else
|
|
157
|
+
return "unknown"
|
|
143
158
|
}
|
|
144
159
|
|
|
145
160
|
/* define sample rate required by model */
|
|
146
161
|
const sampleRateTarget = 16000
|
|
147
162
|
|
|
148
163
|
/* work off queued audio frames */
|
|
149
|
-
const frameWindowDuration =
|
|
150
|
-
const frameWindowSamples = frameWindowDuration * sampleRateTarget
|
|
164
|
+
const frameWindowDuration = this.params.window / 1000
|
|
165
|
+
const frameWindowSamples = Math.floor(frameWindowDuration * sampleRateTarget)
|
|
151
166
|
let lastGender = ""
|
|
152
167
|
let workingOff = false
|
|
153
168
|
const workOffQueue = async () => {
|
|
@@ -236,8 +251,7 @@ export default class SpeechFlowNodeGender extends SpeechFlowNode {
|
|
|
236
251
|
const wav = new WaveFile()
|
|
237
252
|
wav.fromScratch(self.config.audioChannels, self.config.audioSampleRate, "32f", data)
|
|
238
253
|
wav.toSampleRate(sampleRateTarget, { method: "cubic" })
|
|
239
|
-
data = wav.getSamples(false, Float32Array<ArrayBuffer>
|
|
240
|
-
any as Float32Array<ArrayBuffer>
|
|
254
|
+
data = wav.getSamples(false, Float32Array) as any as Float32Array<ArrayBuffer>
|
|
241
255
|
|
|
242
256
|
/* queue chunk and converted data */
|
|
243
257
|
self.queueRecv.append({ type: "audio-frame", chunk, data })
|