speechflow 1.3.2 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -0
- package/etc/stx.conf +54 -58
- package/package.json +25 -106
- package/speechflow-cli/etc/stx.conf +77 -0
- package/speechflow-cli/package.json +116 -0
- package/speechflow-cli/src/speechflow-node-a2a-meter.ts +217 -0
- package/{src → speechflow-cli/src}/speechflow-node-a2a-vad.ts +14 -21
- package/{src → speechflow-cli/src}/speechflow-node-a2t-deepgram.ts +21 -38
- package/{src → speechflow-cli/src}/speechflow-node-t2a-elevenlabs.ts +10 -16
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +276 -0
- package/{src → speechflow-cli/src}/speechflow-node-x2x-filter.ts +5 -1
- package/{src → speechflow-cli/src}/speechflow-node-x2x-trace.ts +15 -7
- package/{src → speechflow-cli/src}/speechflow-node.ts +7 -0
- package/{src → speechflow-cli/src}/speechflow.ts +81 -25
- package/speechflow-ui-db/etc/eslint.mjs +106 -0
- package/speechflow-ui-db/etc/htmllint.json +55 -0
- package/speechflow-ui-db/etc/stx.conf +79 -0
- package/speechflow-ui-db/etc/stylelint.js +46 -0
- package/speechflow-ui-db/etc/stylelint.yaml +33 -0
- package/speechflow-ui-db/etc/tsc-client.json +30 -0
- package/speechflow-ui-db/etc/tsc.node.json +9 -0
- package/speechflow-ui-db/etc/vite-client.mts +63 -0
- package/speechflow-ui-db/package.d/htmllint-cli+0.0.7.patch +20 -0
- package/speechflow-ui-db/package.json +75 -0
- package/speechflow-ui-db/src/app-icon.ai +1989 -4
- package/speechflow-ui-db/src/app-icon.svg +26 -0
- package/speechflow-ui-db/src/app.styl +64 -0
- package/speechflow-ui-db/src/app.vue +221 -0
- package/speechflow-ui-db/src/index.html +23 -0
- package/speechflow-ui-db/src/index.ts +26 -0
- package/{dst/speechflow.d.ts → speechflow-ui-db/src/lib.d.ts} +5 -3
- package/speechflow-ui-db/src/tsconfig.json +3 -0
- package/speechflow-ui-st/etc/eslint.mjs +106 -0
- package/speechflow-ui-st/etc/htmllint.json +55 -0
- package/speechflow-ui-st/etc/stx.conf +79 -0
- package/speechflow-ui-st/etc/stylelint.js +46 -0
- package/speechflow-ui-st/etc/stylelint.yaml +33 -0
- package/speechflow-ui-st/etc/tsc-client.json +30 -0
- package/speechflow-ui-st/etc/tsc.node.json +9 -0
- package/speechflow-ui-st/etc/vite-client.mts +63 -0
- package/speechflow-ui-st/package.d/htmllint-cli+0.0.7.patch +20 -0
- package/speechflow-ui-st/package.json +79 -0
- package/speechflow-ui-st/src/app-icon.ai +1989 -4
- package/speechflow-ui-st/src/app-icon.svg +26 -0
- package/speechflow-ui-st/src/app.styl +64 -0
- package/speechflow-ui-st/src/app.vue +142 -0
- package/speechflow-ui-st/src/index.html +23 -0
- package/speechflow-ui-st/src/index.ts +26 -0
- package/speechflow-ui-st/src/lib.d.ts +9 -0
- package/speechflow-ui-st/src/tsconfig.json +3 -0
- package/dst/speechflow-node-a2a-ffmpeg.d.ts +0 -13
- package/dst/speechflow-node-a2a-ffmpeg.js +0 -153
- package/dst/speechflow-node-a2a-ffmpeg.js.map +0 -1
- package/dst/speechflow-node-a2a-gender.d.ts +0 -20
- package/dst/speechflow-node-a2a-gender.js +0 -349
- package/dst/speechflow-node-a2a-gender.js.map +0 -1
- package/dst/speechflow-node-a2a-meter.d.ts +0 -14
- package/dst/speechflow-node-a2a-meter.js +0 -196
- package/dst/speechflow-node-a2a-meter.js.map +0 -1
- package/dst/speechflow-node-a2a-mute.d.ts +0 -17
- package/dst/speechflow-node-a2a-mute.js +0 -117
- package/dst/speechflow-node-a2a-mute.js.map +0 -1
- package/dst/speechflow-node-a2a-vad.d.ts +0 -19
- package/dst/speechflow-node-a2a-vad.js +0 -383
- package/dst/speechflow-node-a2a-vad.js.map +0 -1
- package/dst/speechflow-node-a2a-wav.d.ts +0 -11
- package/dst/speechflow-node-a2a-wav.js +0 -211
- package/dst/speechflow-node-a2a-wav.js.map +0 -1
- package/dst/speechflow-node-a2t-deepgram.d.ts +0 -19
- package/dst/speechflow-node-a2t-deepgram.js +0 -345
- package/dst/speechflow-node-a2t-deepgram.js.map +0 -1
- package/dst/speechflow-node-t2a-elevenlabs.d.ts +0 -18
- package/dst/speechflow-node-t2a-elevenlabs.js +0 -244
- package/dst/speechflow-node-t2a-elevenlabs.js.map +0 -1
- package/dst/speechflow-node-t2a-kokoro.d.ts +0 -14
- package/dst/speechflow-node-t2a-kokoro.js +0 -155
- package/dst/speechflow-node-t2a-kokoro.js.map +0 -1
- package/dst/speechflow-node-t2t-deepl.d.ts +0 -15
- package/dst/speechflow-node-t2t-deepl.js +0 -146
- package/dst/speechflow-node-t2t-deepl.js.map +0 -1
- package/dst/speechflow-node-t2t-format.d.ts +0 -11
- package/dst/speechflow-node-t2t-format.js +0 -82
- package/dst/speechflow-node-t2t-format.js.map +0 -1
- package/dst/speechflow-node-t2t-ollama.d.ts +0 -13
- package/dst/speechflow-node-t2t-ollama.js +0 -247
- package/dst/speechflow-node-t2t-ollama.js.map +0 -1
- package/dst/speechflow-node-t2t-openai.d.ts +0 -13
- package/dst/speechflow-node-t2t-openai.js +0 -227
- package/dst/speechflow-node-t2t-openai.js.map +0 -1
- package/dst/speechflow-node-t2t-sentence.d.ts +0 -17
- package/dst/speechflow-node-t2t-sentence.js +0 -250
- package/dst/speechflow-node-t2t-sentence.js.map +0 -1
- package/dst/speechflow-node-t2t-subtitle.d.ts +0 -12
- package/dst/speechflow-node-t2t-subtitle.js +0 -166
- package/dst/speechflow-node-t2t-subtitle.js.map +0 -1
- package/dst/speechflow-node-t2t-transformers.d.ts +0 -14
- package/dst/speechflow-node-t2t-transformers.js +0 -265
- package/dst/speechflow-node-t2t-transformers.js.map +0 -1
- package/dst/speechflow-node-x2x-filter.d.ts +0 -11
- package/dst/speechflow-node-x2x-filter.js +0 -117
- package/dst/speechflow-node-x2x-filter.js.map +0 -1
- package/dst/speechflow-node-x2x-trace.d.ts +0 -11
- package/dst/speechflow-node-x2x-trace.js +0 -104
- package/dst/speechflow-node-x2x-trace.js.map +0 -1
- package/dst/speechflow-node-xio-device.d.ts +0 -13
- package/dst/speechflow-node-xio-device.js +0 -230
- package/dst/speechflow-node-xio-device.js.map +0 -1
- package/dst/speechflow-node-xio-file.d.ts +0 -11
- package/dst/speechflow-node-xio-file.js +0 -216
- package/dst/speechflow-node-xio-file.js.map +0 -1
- package/dst/speechflow-node-xio-mqtt.d.ts +0 -13
- package/dst/speechflow-node-xio-mqtt.js +0 -188
- package/dst/speechflow-node-xio-mqtt.js.map +0 -1
- package/dst/speechflow-node-xio-websocket.d.ts +0 -13
- package/dst/speechflow-node-xio-websocket.js +0 -278
- package/dst/speechflow-node-xio-websocket.js.map +0 -1
- package/dst/speechflow-node.d.ts +0 -63
- package/dst/speechflow-node.js +0 -177
- package/dst/speechflow-node.js.map +0 -1
- package/dst/speechflow-utils.d.ts +0 -74
- package/dst/speechflow-utils.js +0 -519
- package/dst/speechflow-utils.js.map +0 -1
- package/dst/speechflow.js +0 -787
- package/dst/speechflow.js.map +0 -1
- package/src/speechflow-node-a2a-meter.ts +0 -177
- package/src/speechflow-node-t2t-subtitle.ts +0 -149
- /package/{etc → speechflow-cli/etc}/biome.jsonc +0 -0
- /package/{etc → speechflow-cli/etc}/eslint.mjs +0 -0
- /package/{etc → speechflow-cli/etc}/oxlint.jsonc +0 -0
- /package/{etc → speechflow-cli/etc}/speechflow.bat +0 -0
- /package/{etc → speechflow-cli/etc}/speechflow.sh +0 -0
- /package/{etc → speechflow-cli/etc}/speechflow.yaml +0 -0
- /package/{etc → speechflow-cli/etc}/tsconfig.json +0 -0
- /package/{package.d → speechflow-cli/package.d}/@ericedouard+vad-node-realtime+0.2.0.patch +0 -0
- /package/{src → speechflow-cli/src}/lib.d.ts +0 -0
- /package/{src → speechflow-cli/src}/speechflow-logo.ai +0 -0
- /package/{src → speechflow-cli/src}/speechflow-logo.svg +0 -0
- /package/{src → speechflow-cli/src}/speechflow-node-a2a-ffmpeg.ts +0 -0
- /package/{src → speechflow-cli/src}/speechflow-node-a2a-gender.ts +0 -0
- /package/{src → speechflow-cli/src}/speechflow-node-a2a-mute.ts +0 -0
- /package/{src → speechflow-cli/src}/speechflow-node-a2a-wav.ts +0 -0
- /package/{src → speechflow-cli/src}/speechflow-node-t2a-kokoro.ts +0 -0
- /package/{src → speechflow-cli/src}/speechflow-node-t2t-deepl.ts +0 -0
- /package/{src → speechflow-cli/src}/speechflow-node-t2t-format.ts +0 -0
- /package/{src → speechflow-cli/src}/speechflow-node-t2t-ollama.ts +0 -0
- /package/{src → speechflow-cli/src}/speechflow-node-t2t-openai.ts +0 -0
- /package/{src → speechflow-cli/src}/speechflow-node-t2t-sentence.ts +0 -0
- /package/{src → speechflow-cli/src}/speechflow-node-t2t-transformers.ts +0 -0
- /package/{src → speechflow-cli/src}/speechflow-node-xio-device.ts +0 -0
- /package/{src → speechflow-cli/src}/speechflow-node-xio-file.ts +0 -0
- /package/{src → speechflow-cli/src}/speechflow-node-xio-mqtt.ts +0 -0
- /package/{src → speechflow-cli/src}/speechflow-node-xio-websocket.ts +0 -0
- /package/{src → speechflow-cli/src}/speechflow-utils.ts +0 -0
- /package/{tsconfig.json → speechflow-cli/tsconfig.json} +0 -0
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
|
|
10
|
+
/* external dependencies */
|
|
11
|
+
import { getLUFS, getRMS, AudioData } from "audio-inspect"
|
|
12
|
+
|
|
13
|
+
/* internal dependencies */
|
|
14
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
15
|
+
import * as utils from "./speechflow-utils"
|
|
16
|
+
|
|
17
|
+
/* SpeechFlow node for audio metering */
|
|
18
|
+
export default class SpeechFlowNodeMeter extends SpeechFlowNode {
|
|
19
|
+
/* declare official node name */
|
|
20
|
+
public static name = "meter"
|
|
21
|
+
|
|
22
|
+
/* internal state */
|
|
23
|
+
private emitInterval: ReturnType<typeof setInterval> | null = null
|
|
24
|
+
private calcInterval: ReturnType<typeof setInterval> | null = null
|
|
25
|
+
private pendingCalculations = new Set<ReturnType<typeof setTimeout>>()
|
|
26
|
+
private chunkBuffer = new Float32Array(0)
|
|
27
|
+
private destroyed = false
|
|
28
|
+
|
|
29
|
+
/* construct node */
|
|
30
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
31
|
+
super(id, cfg, opts, args)
|
|
32
|
+
|
|
33
|
+
/* declare node configuration parameters */
|
|
34
|
+
this.configure({
|
|
35
|
+
interval: { type: "number", pos: 0, val: 250 },
|
|
36
|
+
dashboard: { type: "string", val: "" }
|
|
37
|
+
})
|
|
38
|
+
|
|
39
|
+
/* declare node input/output format */
|
|
40
|
+
this.input = "audio"
|
|
41
|
+
this.output = "audio"
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/* open node */
|
|
45
|
+
async open () {
|
|
46
|
+
/* sanity check situation */
|
|
47
|
+
if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
|
|
48
|
+
throw new Error("meter node currently supports PCM-S16LE audio only")
|
|
49
|
+
|
|
50
|
+
/* clear destruction flag */
|
|
51
|
+
this.destroyed = false
|
|
52
|
+
|
|
53
|
+
/* internal state */
|
|
54
|
+
const sampleWindowDuration = 3 /* LUFS-S requires 3s */
|
|
55
|
+
const sampleWindowSize = Math.floor(this.config.audioSampleRate * sampleWindowDuration)
|
|
56
|
+
let sampleWindow = new Float32Array(sampleWindowSize)
|
|
57
|
+
sampleWindow.fill(0, 0, sampleWindowSize)
|
|
58
|
+
let lufss = -60
|
|
59
|
+
let rms = -60
|
|
60
|
+
|
|
61
|
+
/* chunk processing state */
|
|
62
|
+
const chunkDuration = 0.050 /* meter update frequency is about 50ms */
|
|
63
|
+
const samplesPerChunk = Math.floor(this.config.audioSampleRate * chunkDuration)
|
|
64
|
+
this.chunkBuffer = new Float32Array(0)
|
|
65
|
+
|
|
66
|
+
/* define chunk processing function */
|
|
67
|
+
const processChunk = (chunkData: Float32Array) => {
|
|
68
|
+
/* update internal audio sample sliding window */
|
|
69
|
+
const newWindow = new Float32Array(sampleWindowSize)
|
|
70
|
+
const keepSize = sampleWindowSize - chunkData.length
|
|
71
|
+
newWindow.set(sampleWindow.slice(sampleWindow.length - keepSize), 0)
|
|
72
|
+
newWindow.set(chunkData, keepSize)
|
|
73
|
+
sampleWindow = newWindow
|
|
74
|
+
|
|
75
|
+
/* asynchronously calculate the LUFS-S metric */
|
|
76
|
+
const calculator = setTimeout(() => {
|
|
77
|
+
if (this.destroyed)
|
|
78
|
+
return
|
|
79
|
+
try {
|
|
80
|
+
this.pendingCalculations.delete(calculator)
|
|
81
|
+
const audioData = {
|
|
82
|
+
sampleRate: this.config.audioSampleRate,
|
|
83
|
+
numberOfChannels: this.config.audioChannels,
|
|
84
|
+
channelData: [ sampleWindow ],
|
|
85
|
+
duration: sampleWindowDuration,
|
|
86
|
+
length: sampleWindow.length
|
|
87
|
+
} satisfies AudioData
|
|
88
|
+
const lufs = getLUFS(audioData, {
|
|
89
|
+
channelMode: this.config.audioChannels === 1 ? "mono" : "stereo",
|
|
90
|
+
calculateShortTerm: true,
|
|
91
|
+
calculateMomentary: false,
|
|
92
|
+
calculateLoudnessRange: false,
|
|
93
|
+
calculateTruePeak: false
|
|
94
|
+
})
|
|
95
|
+
if (!this.destroyed) {
|
|
96
|
+
if (timer !== null) {
|
|
97
|
+
clearTimeout(timer)
|
|
98
|
+
timer = null
|
|
99
|
+
}
|
|
100
|
+
lufss = lufs.shortTerm ? lufs.shortTerm[0] : 0
|
|
101
|
+
rms = getRMS(audioData, { asDB: true })
|
|
102
|
+
timer = setTimeout(() => {
|
|
103
|
+
lufss = -60
|
|
104
|
+
rms = -60
|
|
105
|
+
}, 500)
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
catch (error) {
|
|
109
|
+
if (!this.destroyed)
|
|
110
|
+
this.log("warning", `meter calculation error: ${error}`)
|
|
111
|
+
}
|
|
112
|
+
}, 0)
|
|
113
|
+
this.pendingCalculations.add(calculator)
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/* setup chunking interval */
|
|
117
|
+
this.calcInterval = setInterval(() => {
|
|
118
|
+
if (this.destroyed)
|
|
119
|
+
return
|
|
120
|
+
|
|
121
|
+
/* process one single 50ms chunk if available */
|
|
122
|
+
if (this.chunkBuffer.length >= samplesPerChunk) {
|
|
123
|
+
const chunkData = this.chunkBuffer.slice(0, samplesPerChunk)
|
|
124
|
+
processChunk(chunkData)
|
|
125
|
+
this.chunkBuffer = this.chunkBuffer.slice(samplesPerChunk)
|
|
126
|
+
}
|
|
127
|
+
}, chunkDuration * 1000)
|
|
128
|
+
|
|
129
|
+
/* setup loudness emitting interval */
|
|
130
|
+
this.emitInterval = setInterval(() => {
|
|
131
|
+
if (this.destroyed)
|
|
132
|
+
return
|
|
133
|
+
this.log("debug", `LUFS-S: ${lufss.toFixed(1)} dB, RMS: ${rms.toFixed(1)} dB`)
|
|
134
|
+
this.sendResponse([ "meter", "LUFS-S", lufss ])
|
|
135
|
+
this.sendResponse([ "meter", "RMS", rms ])
|
|
136
|
+
if (this.params.dashboard !== "")
|
|
137
|
+
this.dashboardInfo("audio", this.params.dashboard, "final", lufss)
|
|
138
|
+
}, this.params.interval)
|
|
139
|
+
|
|
140
|
+
/* provide Duplex stream and internally attach to meter */
|
|
141
|
+
const self = this
|
|
142
|
+
let timer: ReturnType<typeof setTimeout> | null = null
|
|
143
|
+
this.stream = new Stream.Transform({
|
|
144
|
+
writableObjectMode: true,
|
|
145
|
+
readableObjectMode: true,
|
|
146
|
+
decodeStrings: false,
|
|
147
|
+
highWaterMark: 1,
|
|
148
|
+
|
|
149
|
+
/* transform audio chunk */
|
|
150
|
+
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
151
|
+
if (self.destroyed) {
|
|
152
|
+
callback(new Error("stream already destroyed"))
|
|
153
|
+
return
|
|
154
|
+
}
|
|
155
|
+
if (!Buffer.isBuffer(chunk.payload))
|
|
156
|
+
callback(new Error("expected audio input as Buffer chunks"))
|
|
157
|
+
else if (chunk.payload.byteLength === 0)
|
|
158
|
+
callback()
|
|
159
|
+
else {
|
|
160
|
+
try {
|
|
161
|
+
/* convert audio samples from PCM/I16 to PCM/F32 */
|
|
162
|
+
const data = utils.convertBufToF32(chunk.payload, self.config.audioLittleEndian)
|
|
163
|
+
|
|
164
|
+
/* append new data to buffer */
|
|
165
|
+
const combinedLength = self.chunkBuffer.length + data.length
|
|
166
|
+
const newBuffer = new Float32Array(combinedLength)
|
|
167
|
+
newBuffer.set(self.chunkBuffer, 0)
|
|
168
|
+
newBuffer.set(data, self.chunkBuffer.length)
|
|
169
|
+
self.chunkBuffer = newBuffer
|
|
170
|
+
|
|
171
|
+
/* pass-through original audio chunk */
|
|
172
|
+
this.push(chunk)
|
|
173
|
+
callback()
|
|
174
|
+
}
|
|
175
|
+
catch (error) {
|
|
176
|
+
callback(error instanceof Error ? error : new Error("Meter processing failed"))
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
},
|
|
180
|
+
final (callback) {
|
|
181
|
+
if (self.destroyed) {
|
|
182
|
+
callback()
|
|
183
|
+
return
|
|
184
|
+
}
|
|
185
|
+
this.push(null)
|
|
186
|
+
callback()
|
|
187
|
+
}
|
|
188
|
+
})
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/* close node */
|
|
192
|
+
async close () {
|
|
193
|
+
/* indicate destruction */
|
|
194
|
+
this.destroyed = true
|
|
195
|
+
|
|
196
|
+
/* clear all pending calculations */
|
|
197
|
+
for (const timeout of this.pendingCalculations)
|
|
198
|
+
clearTimeout(timeout)
|
|
199
|
+
this.pendingCalculations.clear()
|
|
200
|
+
|
|
201
|
+
/* stop intervals */
|
|
202
|
+
if (this.emitInterval !== null) {
|
|
203
|
+
clearInterval(this.emitInterval)
|
|
204
|
+
this.emitInterval = null
|
|
205
|
+
}
|
|
206
|
+
if (this.calcInterval !== null) {
|
|
207
|
+
clearInterval(this.calcInterval)
|
|
208
|
+
this.calcInterval = null
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/* close stream */
|
|
212
|
+
if (this.stream !== null) {
|
|
213
|
+
this.stream.destroy()
|
|
214
|
+
this.stream = null
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
}
|
|
@@ -77,6 +77,14 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
|
|
|
77
77
|
const vadSampleRateTarget = 16000 /* internal target of VAD */
|
|
78
78
|
const vadSamplesPerFrame = 512 /* required for VAD v5 */
|
|
79
79
|
|
|
80
|
+
/* helper function for timer cleanup */
|
|
81
|
+
const clearTailTimer = () => {
|
|
82
|
+
if (this.tailTimer !== null) {
|
|
83
|
+
clearTimeout(this.tailTimer)
|
|
84
|
+
this.tailTimer = null
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
80
88
|
/* establish Voice Activity Detection (VAD) facility */
|
|
81
89
|
let tail = false
|
|
82
90
|
try {
|
|
@@ -95,10 +103,7 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
|
|
|
95
103
|
this.log("info", "VAD: speech start")
|
|
96
104
|
if (this.params.mode === "unplugged") {
|
|
97
105
|
tail = false
|
|
98
|
-
|
|
99
|
-
clearTimeout(this.tailTimer)
|
|
100
|
-
this.tailTimer = null
|
|
101
|
-
}
|
|
106
|
+
clearTailTimer()
|
|
102
107
|
}
|
|
103
108
|
},
|
|
104
109
|
onSpeechEnd: (audio) => {
|
|
@@ -108,10 +113,7 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
|
|
|
108
113
|
this.log("info", `VAD: speech end (duration: ${duration.toFixed(2)}s)`)
|
|
109
114
|
if (this.params.mode === "unplugged") {
|
|
110
115
|
tail = true
|
|
111
|
-
|
|
112
|
-
clearTimeout(this.tailTimer)
|
|
113
|
-
this.tailTimer = null
|
|
114
|
-
}
|
|
116
|
+
clearTailTimer()
|
|
115
117
|
this.tailTimer = setTimeout(() => {
|
|
116
118
|
if (this.destroyed || this.tailTimer === null)
|
|
117
119
|
return
|
|
@@ -121,14 +123,12 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
|
|
|
121
123
|
}
|
|
122
124
|
},
|
|
123
125
|
onVADMisfire: () => {
|
|
124
|
-
if (this.destroyed)
|
|
126
|
+
if (this.destroyed)
|
|
127
|
+
return
|
|
125
128
|
this.log("info", "VAD: speech end (segment too short)")
|
|
126
129
|
if (this.params.mode === "unplugged") {
|
|
127
130
|
tail = true
|
|
128
|
-
|
|
129
|
-
clearTimeout(this.tailTimer)
|
|
130
|
-
this.tailTimer = null
|
|
131
|
-
}
|
|
131
|
+
clearTailTimer()
|
|
132
132
|
this.tailTimer = setTimeout(() => {
|
|
133
133
|
if (this.destroyed || this.tailTimer === null)
|
|
134
134
|
return
|
|
@@ -152,14 +152,7 @@ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
|
|
|
152
152
|
|
|
153
153
|
/* annotate the entire audio chunk */
|
|
154
154
|
if (element.segmentIdx >= element.segmentData.length) {
|
|
155
|
-
|
|
156
|
-
for (const segment of element.segmentData) {
|
|
157
|
-
if (segment.isSpeech) {
|
|
158
|
-
isSpeech = true
|
|
159
|
-
break
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
element.isSpeech = isSpeech
|
|
155
|
+
element.isSpeech = element.segmentData.some(segment => segment.isSpeech)
|
|
163
156
|
this.queueVAD.touch()
|
|
164
157
|
this.queueVAD.walk(+1)
|
|
165
158
|
}
|
|
@@ -33,11 +33,12 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
33
33
|
|
|
34
34
|
/* declare node configuration parameters */
|
|
35
35
|
this.configure({
|
|
36
|
-
key: { type: "string",
|
|
37
|
-
keyAdm: { type: "string",
|
|
38
|
-
model: { type: "string",
|
|
39
|
-
version: { type: "string",
|
|
40
|
-
language: { type: "string",
|
|
36
|
+
key: { type: "string", val: process.env.SPEECHFLOW_DEEPGRAM_KEY },
|
|
37
|
+
keyAdm: { type: "string", val: process.env.SPEECHFLOW_DEEPGRAM_KEY_ADM },
|
|
38
|
+
model: { type: "string", val: "nova-2", pos: 0 },
|
|
39
|
+
version: { type: "string", val: "latest", pos: 1 },
|
|
40
|
+
language: { type: "string", val: "multi", pos: 2 },
|
|
41
|
+
interim: { type: "boolean", val: false, pos: 3 }
|
|
41
42
|
})
|
|
42
43
|
|
|
43
44
|
/* declare node input/output format */
|
|
@@ -96,14 +97,15 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
96
97
|
sample_rate: this.config.audioSampleRate,
|
|
97
98
|
encoding: "linear16",
|
|
98
99
|
multichannel: false,
|
|
99
|
-
endpointing:
|
|
100
|
-
interim_results:
|
|
100
|
+
endpointing: false,
|
|
101
|
+
interim_results: this.params.interim,
|
|
101
102
|
smart_format: true,
|
|
102
103
|
punctuate: true,
|
|
103
104
|
filler_words: true,
|
|
104
|
-
diarize: false,
|
|
105
105
|
numerals: true,
|
|
106
|
-
|
|
106
|
+
diarize: false,
|
|
107
|
+
profanity_filter: false,
|
|
108
|
+
redact: false
|
|
107
109
|
})
|
|
108
110
|
|
|
109
111
|
/* hook onto Deepgram API events */
|
|
@@ -113,6 +115,7 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
113
115
|
const text = (data.channel?.alternatives[0]?.transcript ?? "") as string
|
|
114
116
|
const words = (data.channel?.alternatives[0]?.words ?? []) as
|
|
115
117
|
{ word: string, punctuated_word?: string, start: number, end: number }[]
|
|
118
|
+
const isFinal = (data.is_final ?? false) as boolean
|
|
116
119
|
if (text === "")
|
|
117
120
|
this.log("info", `empty/dummy text received (start: ${data.start}s, duration: ${data.duration.toFixed(2)}s)`)
|
|
118
121
|
else {
|
|
@@ -130,10 +133,17 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
130
133
|
const end = Duration.fromMillis(word.end * 1000).plus(this.timeZeroOffset)
|
|
131
134
|
return { word: word.punctuated_word ?? word.word, start, end }
|
|
132
135
|
}))
|
|
133
|
-
const chunk = new SpeechFlowChunk(start, end,
|
|
136
|
+
const chunk = new SpeechFlowChunk(start, end,
|
|
137
|
+
isFinal ? "final" : "intermediate", "text", text, meta)
|
|
134
138
|
this.queue.write(chunk)
|
|
135
139
|
}
|
|
136
140
|
})
|
|
141
|
+
this.dg.on(Deepgram.LiveTranscriptionEvents.SpeechStarted, (data) => {
|
|
142
|
+
this.log("info", "speech started", data)
|
|
143
|
+
})
|
|
144
|
+
this.dg.on(Deepgram.LiveTranscriptionEvents.UtteranceEnd, (data) => {
|
|
145
|
+
this.log("info", "utterance end received", data)
|
|
146
|
+
})
|
|
137
147
|
this.dg.on(Deepgram.LiveTranscriptionEvents.Metadata, (data) => {
|
|
138
148
|
this.log("info", "metadata received")
|
|
139
149
|
})
|
|
@@ -170,31 +180,6 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
170
180
|
/* remember opening time to receive time zero offset */
|
|
171
181
|
this.timeOpen = DateTime.now()
|
|
172
182
|
|
|
173
|
-
/* workaround Deepgram initialization problems */
|
|
174
|
-
let initDone = false
|
|
175
|
-
const initTimeoutStart = () => {
|
|
176
|
-
if (initDone || this.destroyed)
|
|
177
|
-
return
|
|
178
|
-
this.initTimeout = setTimeout(async () => {
|
|
179
|
-
if (this.initTimeout === null || this.destroyed)
|
|
180
|
-
return
|
|
181
|
-
this.initTimeout = null
|
|
182
|
-
this.log("warning", "initialization timeout -- restarting service usage")
|
|
183
|
-
await this.close()
|
|
184
|
-
if (!this.destroyed)
|
|
185
|
-
await this.open()
|
|
186
|
-
}, 3 * 1000)
|
|
187
|
-
}
|
|
188
|
-
const initTimeoutStop = () => {
|
|
189
|
-
if (initDone)
|
|
190
|
-
return
|
|
191
|
-
initDone = true
|
|
192
|
-
if (this.initTimeout !== null) {
|
|
193
|
-
clearTimeout(this.initTimeout)
|
|
194
|
-
this.initTimeout = null
|
|
195
|
-
}
|
|
196
|
-
}
|
|
197
|
-
|
|
198
183
|
/* provide Duplex stream and internally attach to Deepgram API */
|
|
199
184
|
const self = this
|
|
200
185
|
this.stream = new Stream.Duplex({
|
|
@@ -214,7 +199,6 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
214
199
|
else {
|
|
215
200
|
if (chunk.payload.byteLength > 0) {
|
|
216
201
|
self.log("debug", `send data (${chunk.payload.byteLength} bytes)`)
|
|
217
|
-
initTimeoutStart()
|
|
218
202
|
if (chunk.meta.size > 0)
|
|
219
203
|
metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
|
|
220
204
|
try {
|
|
@@ -256,8 +240,7 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
256
240
|
this.push(null)
|
|
257
241
|
}
|
|
258
242
|
else {
|
|
259
|
-
self.log("
|
|
260
|
-
initTimeoutStop()
|
|
243
|
+
self.log("debug", `received data (${chunk.payload.length} bytes)`)
|
|
261
244
|
this.push(chunk, self.config.textEncoding)
|
|
262
245
|
}
|
|
263
246
|
}).catch((error) => {
|
|
@@ -151,22 +151,22 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
151
151
|
processTimeout = null
|
|
152
152
|
callback(new Error("ElevenLabs API timeout"))
|
|
153
153
|
}, 60 * 1000)
|
|
154
|
+
const clearProcessTimeout = () => {
|
|
155
|
+
if (processTimeout !== null) {
|
|
156
|
+
clearTimeout(processTimeout)
|
|
157
|
+
processTimeout = null
|
|
158
|
+
}
|
|
159
|
+
}
|
|
154
160
|
try {
|
|
155
161
|
const stream = await speechStream(chunk.payload as string)
|
|
156
162
|
if (self.destroyed) {
|
|
157
|
-
|
|
158
|
-
clearTimeout(processTimeout)
|
|
159
|
-
processTimeout = null
|
|
160
|
-
}
|
|
163
|
+
clearProcessTimeout()
|
|
161
164
|
callback(new Error("stream destroyed during processing"))
|
|
162
165
|
return
|
|
163
166
|
}
|
|
164
167
|
const buffer = await getStreamAsBuffer(stream)
|
|
165
168
|
if (self.destroyed) {
|
|
166
|
-
|
|
167
|
-
clearTimeout(processTimeout)
|
|
168
|
-
processTimeout = null
|
|
169
|
-
}
|
|
169
|
+
clearProcessTimeout()
|
|
170
170
|
callback(new Error("stream destroyed during processing"))
|
|
171
171
|
return
|
|
172
172
|
}
|
|
@@ -175,18 +175,12 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
175
175
|
const chunkNew = chunk.clone()
|
|
176
176
|
chunkNew.type = "audio"
|
|
177
177
|
chunkNew.payload = bufferResampled
|
|
178
|
-
|
|
179
|
-
clearTimeout(processTimeout)
|
|
180
|
-
processTimeout = null
|
|
181
|
-
}
|
|
178
|
+
clearProcessTimeout()
|
|
182
179
|
this.push(chunkNew)
|
|
183
180
|
callback()
|
|
184
181
|
}
|
|
185
182
|
catch (error) {
|
|
186
|
-
|
|
187
|
-
clearTimeout(processTimeout)
|
|
188
|
-
processTimeout = null
|
|
189
|
-
}
|
|
183
|
+
clearProcessTimeout()
|
|
190
184
|
callback(error instanceof Error ? error : new Error("ElevenLabs processing failed"))
|
|
191
185
|
}
|
|
192
186
|
})()
|