speechflow 0.9.8 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/LICENSE.txt +674 -0
- package/README.md +114 -17
- package/dst/speechflow-node-a2a-ffmpeg.js +1 -0
- package/dst/speechflow-node-a2a-ffmpeg.js.map +1 -0
- package/dst/{speechflow-node-deepl.d.ts → speechflow-node-a2a-meter.d.ts} +2 -2
- package/dst/speechflow-node-a2a-meter.js +147 -0
- package/dst/speechflow-node-a2a-meter.js.map +1 -0
- package/dst/speechflow-node-a2a-mute.d.ts +16 -0
- package/dst/speechflow-node-a2a-mute.js +90 -0
- package/dst/speechflow-node-a2a-mute.js.map +1 -0
- package/dst/{speechflow-node-whisper.d.ts → speechflow-node-a2a-vad.d.ts} +2 -5
- package/dst/speechflow-node-a2a-vad.js +272 -0
- package/dst/speechflow-node-a2a-vad.js.map +1 -0
- package/dst/speechflow-node-a2a-wav.js +1 -0
- package/dst/speechflow-node-a2a-wav.js.map +1 -0
- package/dst/speechflow-node-a2t-deepgram.js +2 -1
- package/dst/speechflow-node-a2t-deepgram.js.map +1 -0
- package/dst/speechflow-node-t2a-elevenlabs.js +1 -0
- package/dst/speechflow-node-t2a-elevenlabs.js.map +1 -0
- package/dst/{speechflow-node-elevenlabs.d.ts → speechflow-node-t2a-kokoro.d.ts} +2 -2
- package/dst/speechflow-node-t2a-kokoro.js +148 -0
- package/dst/speechflow-node-t2a-kokoro.js.map +1 -0
- package/dst/speechflow-node-t2t-deepl.js +1 -0
- package/dst/speechflow-node-t2t-deepl.js.map +1 -0
- package/dst/speechflow-node-t2t-format.js +1 -0
- package/dst/speechflow-node-t2t-format.js.map +1 -0
- package/dst/{speechflow-node-gemma.d.ts → speechflow-node-t2t-ollama.d.ts} +1 -1
- package/dst/{speechflow-node-gemma.js → speechflow-node-t2t-ollama.js} +41 -8
- package/dst/speechflow-node-t2t-ollama.js.map +1 -0
- package/dst/{speechflow-node-t2t-gemma.d.ts → speechflow-node-t2t-openai.d.ts} +2 -2
- package/dst/{speechflow-node-t2t-gemma.js → speechflow-node-t2t-openai.js} +43 -30
- package/dst/speechflow-node-t2t-openai.js.map +1 -0
- package/dst/speechflow-node-t2t-subtitle.js +1 -0
- package/dst/speechflow-node-t2t-subtitle.js.map +1 -0
- package/dst/{speechflow-node-opus.d.ts → speechflow-node-t2t-transformers.d.ts} +3 -1
- package/dst/speechflow-node-t2t-transformers.js +264 -0
- package/dst/speechflow-node-t2t-transformers.js.map +1 -0
- package/dst/speechflow-node-x2x-trace.js +3 -2
- package/dst/speechflow-node-x2x-trace.js.map +1 -0
- package/dst/speechflow-node-xio-device.js +1 -0
- package/dst/speechflow-node-xio-device.js.map +1 -0
- package/dst/speechflow-node-xio-file.js +1 -0
- package/dst/speechflow-node-xio-file.js.map +1 -0
- package/dst/speechflow-node-xio-mqtt.js +1 -0
- package/dst/speechflow-node-xio-mqtt.js.map +1 -0
- package/dst/speechflow-node-xio-websocket.js +1 -0
- package/dst/speechflow-node-xio-websocket.js.map +1 -0
- package/dst/speechflow-node.d.ts +3 -0
- package/dst/speechflow-node.js +10 -0
- package/dst/speechflow-node.js.map +1 -0
- package/dst/speechflow-utils.d.ts +33 -0
- package/dst/speechflow-utils.js +183 -1
- package/dst/speechflow-utils.js.map +1 -0
- package/dst/speechflow.js +295 -46
- package/dst/speechflow.js.map +1 -0
- package/etc/speechflow.yaml +14 -5
- package/etc/stx.conf +1 -1
- package/etc/tsconfig.json +2 -2
- package/package.json +17 -10
- package/src/speechflow-node-a2a-meter.ts +125 -0
- package/src/speechflow-node-a2a-mute.ts +101 -0
- package/src/speechflow-node-a2a-vad.ts +266 -0
- package/src/speechflow-node-a2t-deepgram.ts +1 -1
- package/src/speechflow-node-t2a-kokoro.ts +160 -0
- package/src/{speechflow-node-t2t-gemma.ts → speechflow-node-t2t-ollama.ts} +44 -10
- package/src/speechflow-node-t2t-openai.ts +246 -0
- package/src/speechflow-node-t2t-transformers.ts +249 -0
- package/src/speechflow-node-x2x-trace.ts +2 -2
- package/src/speechflow-node-xio-websocket.ts +5 -5
- package/src/speechflow-node.ts +12 -0
- package/src/speechflow-utils.ts +195 -0
- package/src/speechflow.ts +279 -46
- package/dst/speechflow-node-deepgram.d.ts +0 -12
- package/dst/speechflow-node-deepgram.js +0 -220
- package/dst/speechflow-node-deepl.js +0 -128
- package/dst/speechflow-node-device.d.ts +0 -13
- package/dst/speechflow-node-device.js +0 -205
- package/dst/speechflow-node-elevenlabs.js +0 -182
- package/dst/speechflow-node-ffmpeg.d.ts +0 -13
- package/dst/speechflow-node-ffmpeg.js +0 -152
- package/dst/speechflow-node-file.d.ts +0 -11
- package/dst/speechflow-node-file.js +0 -176
- package/dst/speechflow-node-format.d.ts +0 -11
- package/dst/speechflow-node-format.js +0 -80
- package/dst/speechflow-node-mqtt.d.ts +0 -13
- package/dst/speechflow-node-mqtt.js +0 -181
- package/dst/speechflow-node-opus.js +0 -135
- package/dst/speechflow-node-subtitle.d.ts +0 -12
- package/dst/speechflow-node-subtitle.js +0 -96
- package/dst/speechflow-node-t2t-opus.d.ts +0 -12
- package/dst/speechflow-node-t2t-opus.js +0 -135
- package/dst/speechflow-node-trace.d.ts +0 -11
- package/dst/speechflow-node-trace.js +0 -88
- package/dst/speechflow-node-wav.d.ts +0 -11
- package/dst/speechflow-node-wav.js +0 -170
- package/dst/speechflow-node-websocket.d.ts +0 -13
- package/dst/speechflow-node-websocket.js +0 -275
- package/dst/speechflow-node-whisper-common.d.ts +0 -34
- package/dst/speechflow-node-whisper-common.js +0 -7
- package/dst/speechflow-node-whisper-ggml.d.ts +0 -1
- package/dst/speechflow-node-whisper-ggml.js +0 -97
- package/dst/speechflow-node-whisper-onnx.d.ts +0 -1
- package/dst/speechflow-node-whisper-onnx.js +0 -131
- package/dst/speechflow-node-whisper-worker-ggml.d.ts +0 -1
- package/dst/speechflow-node-whisper-worker-ggml.js +0 -97
- package/dst/speechflow-node-whisper-worker-onnx.d.ts +0 -1
- package/dst/speechflow-node-whisper-worker-onnx.js +0 -131
- package/dst/speechflow-node-whisper-worker.d.ts +0 -1
- package/dst/speechflow-node-whisper-worker.js +0 -116
- package/dst/speechflow-node-whisper-worker2.d.ts +0 -1
- package/dst/speechflow-node-whisper-worker2.js +0 -82
- package/dst/speechflow-node-whisper.js +0 -604
- package/src/speechflow-node-t2t-opus.ts +0 -111
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import path from "node:path"
|
|
9
|
+
import Stream from "node:stream"
|
|
10
|
+
|
|
11
|
+
/* external dependencies */
|
|
12
|
+
import * as Transformers from "@huggingface/transformers"
|
|
13
|
+
|
|
14
|
+
/* internal dependencies */
|
|
15
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
16
|
+
|
|
17
|
+
/* internal utility types */
|
|
18
|
+
type ConfigEntry = { systemPrompt: string, chat: Array<{ role: string, content: string }> }
|
|
19
|
+
type Config = { [ key: string ]: ConfigEntry }
|
|
20
|
+
|
|
21
|
+
/* SpeechFlow node for Transformers text-to-text translation */
|
|
22
|
+
export default class SpeechFlowNodeTransformers extends SpeechFlowNode {
|
|
23
|
+
/* declare official node name */
|
|
24
|
+
public static name = "transformers"
|
|
25
|
+
|
|
26
|
+
/* internal state */
|
|
27
|
+
private translator: Transformers.TranslationPipeline | null = null
|
|
28
|
+
private generator: Transformers.TextGenerationPipeline | null = null
|
|
29
|
+
|
|
30
|
+
/* internal LLM setup */
|
|
31
|
+
private setup: Config = {
|
|
32
|
+
/* SmolLM3: English (EN) to German (DE) translation */
|
|
33
|
+
"SmolLM3:en-de": {
|
|
34
|
+
systemPrompt:
|
|
35
|
+
"/no_think\n" +
|
|
36
|
+
"You are a translator.\n" +
|
|
37
|
+
"Output only the requested text.\n" +
|
|
38
|
+
"Do not use markdown.\n" +
|
|
39
|
+
"Do not chat.\n" +
|
|
40
|
+
"Do not show any explanations.\n" +
|
|
41
|
+
"Do not show any introduction.\n" +
|
|
42
|
+
"Do not show any preamble.\n" +
|
|
43
|
+
"Do not show any prolog.\n" +
|
|
44
|
+
"Do not show any epilog.\n" +
|
|
45
|
+
"Get to the point.\n" +
|
|
46
|
+
"Preserve the original meaning, tone, and nuance.\n" +
|
|
47
|
+
"Directly translate text from English (EN) to fluent and natural German (DE) language.\n",
|
|
48
|
+
chat: [
|
|
49
|
+
{ role: "user", content: "I love my wife." },
|
|
50
|
+
{ role: "assistant", content: "Ich liebe meine Frau." },
|
|
51
|
+
{ role: "user", content: "The weather is wonderful." },
|
|
52
|
+
{ role: "assistant", content: "Das Wetter ist wunderschön." },
|
|
53
|
+
{ role: "user", content: "The live is awesome." },
|
|
54
|
+
{ role: "assistant", content: "Das Leben ist einfach großartig." }
|
|
55
|
+
]
|
|
56
|
+
},
|
|
57
|
+
|
|
58
|
+
/* SmolLM3: German (DE) to English (EN) translation */
|
|
59
|
+
"SmolLM3:de-en": {
|
|
60
|
+
systemPrompt:
|
|
61
|
+
"/no_think\n" +
|
|
62
|
+
"You are a translator.\n" +
|
|
63
|
+
"Output only the requested text.\n" +
|
|
64
|
+
"Do not use markdown.\n" +
|
|
65
|
+
"Do not chat.\n" +
|
|
66
|
+
"Do not show any explanations.\n" +
|
|
67
|
+
"Do not show any introduction.\n" +
|
|
68
|
+
"Do not show any preamble. \n" +
|
|
69
|
+
"Do not show any prolog. \n" +
|
|
70
|
+
"Do not show any epilog. \n" +
|
|
71
|
+
"Get to the point.\n" +
|
|
72
|
+
"Preserve the original meaning, tone, and nuance.\n" +
|
|
73
|
+
"Directly translate text from German (DE) to fluent and natural English (EN) language.\n",
|
|
74
|
+
chat: [
|
|
75
|
+
{ role: "user", content: "Ich liebe meine Frau." },
|
|
76
|
+
{ role: "assistant", content: "I love my wife." },
|
|
77
|
+
{ role: "user", content: "Das Wetter ist wunderschön." },
|
|
78
|
+
{ role: "assistant", content: "The weather is wonderful." },
|
|
79
|
+
{ role: "user", content: "Das Leben ist einfach großartig." },
|
|
80
|
+
{ role: "assistant", content: "The live is awesome." }
|
|
81
|
+
]
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/* construct node */
|
|
86
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
87
|
+
super(id, cfg, opts, args)
|
|
88
|
+
|
|
89
|
+
/* declare node configuration parameters */
|
|
90
|
+
this.configure({
|
|
91
|
+
src: { type: "string", pos: 0, val: "de", match: /^(?:de|en)$/ },
|
|
92
|
+
dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ },
|
|
93
|
+
model: { type: "string", val: "OPUS", match: /^(?:OPUS|SmolLM3)$/ }
|
|
94
|
+
})
|
|
95
|
+
|
|
96
|
+
/* sanity check parameters */
|
|
97
|
+
if (this.params.src === this.params.dst)
|
|
98
|
+
throw new Error("source and destination languages cannot be the same")
|
|
99
|
+
|
|
100
|
+
/* declare node input/output format */
|
|
101
|
+
this.input = "text"
|
|
102
|
+
this.output = "text"
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/* open node */
|
|
106
|
+
async open () {
|
|
107
|
+
let model: string = ""
|
|
108
|
+
|
|
109
|
+
/* track download progress when instantiating Transformers engine and model */
|
|
110
|
+
const progressState = new Map<string, number>()
|
|
111
|
+
const progressCallback: Transformers.ProgressCallback = (progress: any) => {
|
|
112
|
+
let artifact = model
|
|
113
|
+
if (typeof progress.file === "string")
|
|
114
|
+
artifact += `:${progress.file}`
|
|
115
|
+
let percent = 0
|
|
116
|
+
if (typeof progress.loaded === "number" && typeof progress.total === "number")
|
|
117
|
+
percent = (progress.loaded as number / progress.total as number) * 100
|
|
118
|
+
else if (typeof progress.progress === "number")
|
|
119
|
+
percent = progress.progress
|
|
120
|
+
if (percent > 0)
|
|
121
|
+
progressState.set(artifact, percent)
|
|
122
|
+
}
|
|
123
|
+
const interval = setInterval(() => {
|
|
124
|
+
for (const [ artifact, percent ] of progressState) {
|
|
125
|
+
this.log("info", `downloaded ${percent.toFixed(2)}% of artifact "${artifact}"`)
|
|
126
|
+
if (percent >= 1.0)
|
|
127
|
+
progressState.delete(artifact)
|
|
128
|
+
}
|
|
129
|
+
}, 1000)
|
|
130
|
+
|
|
131
|
+
/* instantiate Transformers engine and model */
|
|
132
|
+
if (this.params.model === "OPUS") {
|
|
133
|
+
model = `onnx-community/opus-mt-${this.params.src}-${this.params.dst}`
|
|
134
|
+
const pipeline = Transformers.pipeline("translation", model, {
|
|
135
|
+
cache_dir: path.join(this.config.cacheDir, "transformers"),
|
|
136
|
+
dtype: "q4",
|
|
137
|
+
device: "auto",
|
|
138
|
+
progress_callback: progressCallback
|
|
139
|
+
})
|
|
140
|
+
this.translator = await pipeline
|
|
141
|
+
clearInterval(interval)
|
|
142
|
+
if (this.translator === null)
|
|
143
|
+
throw new Error("failed to instantiate translator pipeline")
|
|
144
|
+
}
|
|
145
|
+
else if (this.params.model === "SmolLM3") {
|
|
146
|
+
model = "HuggingFaceTB/SmolLM3-3B-ONNX"
|
|
147
|
+
const pipeline = Transformers.pipeline("text-generation", model, {
|
|
148
|
+
cache_dir: path.join(this.config.cacheDir, "transformers"),
|
|
149
|
+
dtype: "q4",
|
|
150
|
+
device: "auto",
|
|
151
|
+
progress_callback: progressCallback
|
|
152
|
+
})
|
|
153
|
+
this.generator = await pipeline
|
|
154
|
+
clearInterval(interval)
|
|
155
|
+
if (this.generator === null)
|
|
156
|
+
throw new Error("failed to instantiate generator pipeline")
|
|
157
|
+
}
|
|
158
|
+
else
|
|
159
|
+
throw new Error("invalid model")
|
|
160
|
+
|
|
161
|
+
/* provide text-to-text translation */
|
|
162
|
+
const translate = async (text: string) => {
|
|
163
|
+
if (this.params.model === "OPUS") {
|
|
164
|
+
const result = await this.translator!(text)
|
|
165
|
+
return Array.isArray(result) ?
|
|
166
|
+
(result[0] as Transformers.TranslationSingle).translation_text :
|
|
167
|
+
(result as Transformers.TranslationSingle).translation_text
|
|
168
|
+
}
|
|
169
|
+
else if (this.params.model === "SmolLM3") {
|
|
170
|
+
const key = `SmolLM3:${this.params.src}-${this.params.dst}`
|
|
171
|
+
const cfg = this.setup[key]
|
|
172
|
+
const messages = [
|
|
173
|
+
{ role: "system", content: cfg.systemPrompt },
|
|
174
|
+
...cfg.chat,
|
|
175
|
+
{ role: "user", content: text }
|
|
176
|
+
]
|
|
177
|
+
const result = await this.generator!(messages, {
|
|
178
|
+
max_new_tokens: 100,
|
|
179
|
+
temperature: 0.6,
|
|
180
|
+
top_p: 0.95,
|
|
181
|
+
streamer: new Transformers.TextStreamer(this.generator!.tokenizer, {
|
|
182
|
+
skip_prompt: true,
|
|
183
|
+
skip_special_tokens: true
|
|
184
|
+
})
|
|
185
|
+
})
|
|
186
|
+
const generatedText = Array.isArray(result) ?
|
|
187
|
+
(result[0] as Transformers.TextGenerationSingle).generated_text :
|
|
188
|
+
(result as Transformers.TextGenerationSingle).generated_text
|
|
189
|
+
const response = typeof generatedText === "string" ?
|
|
190
|
+
generatedText :
|
|
191
|
+
generatedText.at(-1)!.content
|
|
192
|
+
return response
|
|
193
|
+
}
|
|
194
|
+
else
|
|
195
|
+
throw new Error("invalid model")
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/* establish a duplex stream and connect it to Transformers */
|
|
199
|
+
this.stream = new Stream.Transform({
|
|
200
|
+
readableObjectMode: true,
|
|
201
|
+
writableObjectMode: true,
|
|
202
|
+
decodeStrings: false,
|
|
203
|
+
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
204
|
+
if (Buffer.isBuffer(chunk.payload))
|
|
205
|
+
callback(new Error("invalid chunk payload type"))
|
|
206
|
+
else {
|
|
207
|
+
if (chunk.payload === "") {
|
|
208
|
+
this.push(chunk)
|
|
209
|
+
callback()
|
|
210
|
+
}
|
|
211
|
+
else {
|
|
212
|
+
translate(chunk.payload).then((payload) => {
|
|
213
|
+
chunk = chunk.clone()
|
|
214
|
+
chunk.payload = payload
|
|
215
|
+
this.push(chunk)
|
|
216
|
+
callback()
|
|
217
|
+
}).catch((err) => {
|
|
218
|
+
callback(err)
|
|
219
|
+
})
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
},
|
|
223
|
+
final (callback) {
|
|
224
|
+
this.push(null)
|
|
225
|
+
callback()
|
|
226
|
+
}
|
|
227
|
+
})
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
/* close node */
|
|
231
|
+
async close () {
|
|
232
|
+
/* close stream */
|
|
233
|
+
if (this.stream !== null) {
|
|
234
|
+
this.stream.destroy()
|
|
235
|
+
this.stream = null
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
/* shutdown Transformers */
|
|
239
|
+
if (this.translator !== null) {
|
|
240
|
+
this.translator.dispose()
|
|
241
|
+
this.translator = null
|
|
242
|
+
}
|
|
243
|
+
if (this.generator !== null) {
|
|
244
|
+
this.generator.dispose()
|
|
245
|
+
this.generator = null
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
@@ -52,7 +52,7 @@ export default class SpeechFlowNodeTrace extends SpeechFlowNode {
|
|
|
52
52
|
const fmt = (t: Duration) => t.toFormat("hh:mm:ss.SSS")
|
|
53
53
|
if (Buffer.isBuffer(chunk.payload)) {
|
|
54
54
|
if (type === "audio")
|
|
55
|
-
log("
|
|
55
|
+
log("debug", `writing ${type} chunk: start=${fmt(chunk.timestampStart)} ` +
|
|
56
56
|
`end=${fmt(chunk.timestampEnd)} kind=${chunk.kind} type=${chunk.type} ` +
|
|
57
57
|
`payload-type=Buffer payload-bytes=${chunk.payload.byteLength}`)
|
|
58
58
|
else
|
|
@@ -60,7 +60,7 @@ export default class SpeechFlowNodeTrace extends SpeechFlowNode {
|
|
|
60
60
|
}
|
|
61
61
|
else {
|
|
62
62
|
if (type === "text")
|
|
63
|
-
log("
|
|
63
|
+
log("debug", `writing ${type} chunk: start=${fmt(chunk.timestampStart)} ` +
|
|
64
64
|
`end=${fmt(chunk.timestampEnd)} kind=${chunk.kind} type=${chunk.type}` +
|
|
65
65
|
`payload-type=String payload-length=${chunk.payload.length} ` +
|
|
66
66
|
`payload-encoding=${encoding} payload-content="${chunk.payload.toString()}"`)
|
|
@@ -9,7 +9,7 @@ import Stream from "node:stream"
|
|
|
9
9
|
|
|
10
10
|
/* external dependencies */
|
|
11
11
|
import ws from "ws"
|
|
12
|
-
import
|
|
12
|
+
import ReconnWebSocket, { ErrorEvent } from "@opensumi/reconnecting-websocket"
|
|
13
13
|
|
|
14
14
|
/* internal dependencies */
|
|
15
15
|
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
@@ -22,7 +22,7 @@ export default class SpeechFlowNodeWebsocket extends SpeechFlowNode {
|
|
|
22
22
|
|
|
23
23
|
/* internal state */
|
|
24
24
|
private server: ws.WebSocketServer | null = null
|
|
25
|
-
private client:
|
|
25
|
+
private client: ReconnWebSocket | null = null
|
|
26
26
|
|
|
27
27
|
/* construct node */
|
|
28
28
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -153,7 +153,7 @@ export default class SpeechFlowNodeWebsocket extends SpeechFlowNode {
|
|
|
153
153
|
}
|
|
154
154
|
else if (this.params.connect !== "") {
|
|
155
155
|
/* connect remotely to a Websocket port */
|
|
156
|
-
this.client = new
|
|
156
|
+
this.client = new ReconnWebSocket(this.params.connect, [], {
|
|
157
157
|
WebSocket: ws,
|
|
158
158
|
WebSocketOptions: {},
|
|
159
159
|
reconnectionDelayGrowFactor: 1.3,
|
|
@@ -162,10 +162,10 @@ export default class SpeechFlowNodeWebsocket extends SpeechFlowNode {
|
|
|
162
162
|
connectionTimeout: 4000,
|
|
163
163
|
minUptime: 5000
|
|
164
164
|
})
|
|
165
|
-
this.client.addEventListener("open", (ev
|
|
165
|
+
this.client.addEventListener("open", (ev) => {
|
|
166
166
|
this.log("info", `connection opened to URL ${this.params.connect}`)
|
|
167
167
|
})
|
|
168
|
-
this.client.addEventListener("close", (ev
|
|
168
|
+
this.client.addEventListener("close", (ev) => {
|
|
169
169
|
this.log("info", `connection closed to URL ${this.params.connect}`)
|
|
170
170
|
})
|
|
171
171
|
this.client.addEventListener("error", (ev: ErrorEvent) => {
|
package/src/speechflow-node.ts
CHANGED
|
@@ -36,6 +36,8 @@ export class SpeechFlowChunk {
|
|
|
36
36
|
|
|
37
37
|
/* the base class for all SpeechFlow nodes */
|
|
38
38
|
export default class SpeechFlowNode extends Events.EventEmitter {
|
|
39
|
+
public static name: string | undefined
|
|
40
|
+
|
|
39
41
|
/* general constant configuration (for reference) */
|
|
40
42
|
config = {
|
|
41
43
|
audioChannels: 1, /* audio mono channel */
|
|
@@ -80,6 +82,16 @@ export default class SpeechFlowNode extends Events.EventEmitter {
|
|
|
80
82
|
this.timeZeroOffset = this.timeZero.diff(this.timeOpen)
|
|
81
83
|
}
|
|
82
84
|
|
|
85
|
+
/* receive external request */
|
|
86
|
+
async receiveRequest (args: any[]) {
|
|
87
|
+
/* no-op */
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/* send external response */
|
|
91
|
+
sendResponse (args: any[]) {
|
|
92
|
+
this.emit("send-response", args)
|
|
93
|
+
}
|
|
94
|
+
|
|
83
95
|
/* INTERNAL: utility function: create "params" attribute from constructor of sub-classes */
|
|
84
96
|
configure (spec: { [ id: string ]: { type: string, pos?: number, val?: any, match?: RegExp | ((x: any) => boolean) } }) {
|
|
85
97
|
for (const name of Object.keys(spec)) {
|
package/src/speechflow-utils.ts
CHANGED
|
@@ -31,6 +31,33 @@ export function audioBufferDuration (
|
|
|
31
31
|
return totalSamples / sampleRate
|
|
32
32
|
}
|
|
33
33
|
|
|
34
|
+
/* calculate duration of an audio array */
|
|
35
|
+
export function audioArrayDuration (
|
|
36
|
+
arr: Float32Array,
|
|
37
|
+
sampleRate = 48000,
|
|
38
|
+
channels = 1
|
|
39
|
+
) {
|
|
40
|
+
const totalSamples = arr.length / channels
|
|
41
|
+
return totalSamples / sampleRate
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/* helper function: convert Buffer in PCM/I16 to Float32Array in PCM/F32 format */
|
|
45
|
+
export function convertBufToF32 (buf: Buffer, littleEndian = true) {
|
|
46
|
+
const dataView = new DataView(buf.buffer)
|
|
47
|
+
const arr = new Float32Array(buf.length / 2)
|
|
48
|
+
for (let i = 0; i < arr.length; i++)
|
|
49
|
+
arr[i] = dataView.getInt16(i * 2, littleEndian) / 32768
|
|
50
|
+
return arr
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/* helper function: convert Float32Array in PCM/F32 to Buffer in PCM/I16 format */
|
|
54
|
+
export function convertF32ToBuf (arr: Float32Array) {
|
|
55
|
+
const int16Array = new Int16Array(arr.length)
|
|
56
|
+
for (let i = 0; i < arr.length; i++)
|
|
57
|
+
int16Array[i] = Math.max(-32768, Math.min(32767, Math.round(arr[i] * 32768)))
|
|
58
|
+
return Buffer.from(int16Array.buffer)
|
|
59
|
+
}
|
|
60
|
+
|
|
34
61
|
/* create a Duplex/Transform stream which has
|
|
35
62
|
object-mode on Writable side and buffer/string-mode on Readable side */
|
|
36
63
|
export function createTransformStreamForWritableSide () {
|
|
@@ -210,3 +237,171 @@ export class DoubleQueue<T0, T1> extends EventEmitter {
|
|
|
210
237
|
})
|
|
211
238
|
}
|
|
212
239
|
}
|
|
240
|
+
|
|
241
|
+
/* queue element */
|
|
242
|
+
export type QueueElement = { type: string }
|
|
243
|
+
|
|
244
|
+
/* queue pointer */
|
|
245
|
+
export class QueuePointer<T extends QueueElement> extends EventEmitter {
|
|
246
|
+
/* internal state */
|
|
247
|
+
private index = 0
|
|
248
|
+
|
|
249
|
+
/* construction */
|
|
250
|
+
constructor (
|
|
251
|
+
private name: string,
|
|
252
|
+
private queue: Queue<T>
|
|
253
|
+
) {
|
|
254
|
+
super()
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/* positioning operations */
|
|
258
|
+
maxPosition () {
|
|
259
|
+
return this.queue.elements.length
|
|
260
|
+
}
|
|
261
|
+
position (index?: number): number {
|
|
262
|
+
if (index !== undefined) {
|
|
263
|
+
this.index = index
|
|
264
|
+
if (this.index < 0)
|
|
265
|
+
this.index = 0
|
|
266
|
+
else if (this.index >= this.queue.elements.length)
|
|
267
|
+
this.index = this.queue.elements.length
|
|
268
|
+
this.emit("position", this.index)
|
|
269
|
+
}
|
|
270
|
+
return this.index
|
|
271
|
+
}
|
|
272
|
+
walk (num: number) {
|
|
273
|
+
if (num > 0) {
|
|
274
|
+
for (let i = 0; i < num && this.index < this.queue.elements.length; i++)
|
|
275
|
+
this.index++
|
|
276
|
+
this.emit("position", { start: this.index })
|
|
277
|
+
}
|
|
278
|
+
else if (num < 0) {
|
|
279
|
+
for (let i = 0; i < Math.abs(num) && this.index > 0; i++)
|
|
280
|
+
this.index--
|
|
281
|
+
this.emit("position", { start: this.index })
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
walkForwardUntil (type: T["type"]) {
|
|
285
|
+
while (this.index < this.queue.elements.length
|
|
286
|
+
&& this.queue.elements[this.index].type !== type)
|
|
287
|
+
this.index++
|
|
288
|
+
this.emit("position", { start: this.index })
|
|
289
|
+
}
|
|
290
|
+
walkBackwardUntil (type: T["type"]) {
|
|
291
|
+
while (this.index > 0
|
|
292
|
+
&& this.queue.elements[this.index].type !== type)
|
|
293
|
+
this.index--
|
|
294
|
+
this.emit("position", { start: this.index })
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
/* search operations */
|
|
298
|
+
searchForward (type: T["type"]) {
|
|
299
|
+
let position = this.index
|
|
300
|
+
while (position < this.queue.elements.length
|
|
301
|
+
&& this.queue.elements[position].type !== type)
|
|
302
|
+
position++
|
|
303
|
+
this.emit("search", { start: this.index, end: position })
|
|
304
|
+
return position
|
|
305
|
+
}
|
|
306
|
+
searchBackward (type: T["type"]) {
|
|
307
|
+
let position = this.index
|
|
308
|
+
while (position > 0
|
|
309
|
+
&& this.queue.elements[position].type !== type)
|
|
310
|
+
position--
|
|
311
|
+
this.emit("search", { start: position, end: this.index })
|
|
312
|
+
return position
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
/* reading operations */
|
|
316
|
+
peek (position?: number) {
|
|
317
|
+
if (position === undefined)
|
|
318
|
+
position = this.index
|
|
319
|
+
else {
|
|
320
|
+
if (position < 0)
|
|
321
|
+
position = 0
|
|
322
|
+
else if (position > this.queue.elements.length)
|
|
323
|
+
position = this.queue.elements.length
|
|
324
|
+
}
|
|
325
|
+
const element = this.queue.elements[position]
|
|
326
|
+
this.queue.emit("read", { start: position, end: position })
|
|
327
|
+
return element
|
|
328
|
+
}
|
|
329
|
+
read () {
|
|
330
|
+
const element = this.queue.elements[this.index]
|
|
331
|
+
if (this.index < this.queue.elements.length)
|
|
332
|
+
this.index++
|
|
333
|
+
this.queue.emit("read", { start: this.index - 1, end: this.index - 1 })
|
|
334
|
+
return element
|
|
335
|
+
}
|
|
336
|
+
slice (size?: number) {
|
|
337
|
+
let slice: T[]
|
|
338
|
+
const start = this.index
|
|
339
|
+
if (size !== undefined) {
|
|
340
|
+
if (size < 0)
|
|
341
|
+
size = 0
|
|
342
|
+
else if (size > this.queue.elements.length - this.index)
|
|
343
|
+
size = this.queue.elements.length - this.index
|
|
344
|
+
slice = this.queue.elements.slice(this.index, size)
|
|
345
|
+
this.index += size
|
|
346
|
+
}
|
|
347
|
+
else {
|
|
348
|
+
slice = this.queue.elements.slice(this.index)
|
|
349
|
+
this.index = this.queue.elements.length
|
|
350
|
+
}
|
|
351
|
+
this.queue.emit("read", { start, end: this.index })
|
|
352
|
+
return slice
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
/* writing operations */
|
|
356
|
+
touch () {
|
|
357
|
+
if (this.index >= this.queue.elements.length)
|
|
358
|
+
throw new Error("cannot touch after last element")
|
|
359
|
+
this.queue.emit("write", { start: this.index, end: this.index + 1 })
|
|
360
|
+
}
|
|
361
|
+
append (element: T) {
|
|
362
|
+
this.queue.elements.push(element)
|
|
363
|
+
this.index = this.queue.elements.length
|
|
364
|
+
this.queue.emit("write", { start: this.index - 1, end: this.index - 1 })
|
|
365
|
+
}
|
|
366
|
+
insert (element: T) {
|
|
367
|
+
this.queue.elements.splice(this.index++, 0, element)
|
|
368
|
+
this.queue.emit("write", { start: this.index - 1, end: this.index })
|
|
369
|
+
}
|
|
370
|
+
delete () {
|
|
371
|
+
if (this.index >= this.queue.elements.length)
|
|
372
|
+
throw new Error("cannot delete after last element")
|
|
373
|
+
this.queue.elements.splice(this.index, 1)
|
|
374
|
+
this.queue.emit("write", { start: this.index, end: this.index })
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
/* queue */
|
|
379
|
+
export class Queue<T extends QueueElement> extends EventEmitter {
|
|
380
|
+
public elements: T[] = []
|
|
381
|
+
private pointers = new Map<string, QueuePointer<T>>()
|
|
382
|
+
pointerUse (name: string): QueuePointer<T> {
|
|
383
|
+
if (!this.pointers.has(name))
|
|
384
|
+
this.pointers.set(name, new QueuePointer<T>(name, this))
|
|
385
|
+
return this.pointers.get(name)!
|
|
386
|
+
}
|
|
387
|
+
pointerDelete (name: string): void {
|
|
388
|
+
if (!this.pointers.has(name))
|
|
389
|
+
throw new Error("pointer not exists")
|
|
390
|
+
this.pointers.delete(name)
|
|
391
|
+
}
|
|
392
|
+
trim (): void {
|
|
393
|
+
/* determine minimum pointer position */
|
|
394
|
+
let min = this.elements.length
|
|
395
|
+
for (const pointer of this.pointers.values())
|
|
396
|
+
if (min > pointer.position())
|
|
397
|
+
min = pointer.position()
|
|
398
|
+
|
|
399
|
+
/* trim the maximum amount of first elements */
|
|
400
|
+
this.elements.splice(0, min)
|
|
401
|
+
|
|
402
|
+
/* shift all pointers */
|
|
403
|
+
for (const pointer of this.pointers.values())
|
|
404
|
+
pointer.position(pointer.position() - min)
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
|