speechflow 0.9.9 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/CHANGELOG.md +8 -0
  2. package/README.md +48 -1
  3. package/dst/speechflow-node-a2a-ffmpeg.js +1 -0
  4. package/dst/speechflow-node-a2a-ffmpeg.js.map +1 -0
  5. package/dst/{speechflow-node-gemma.d.ts → speechflow-node-a2a-meter.d.ts} +2 -3
  6. package/dst/speechflow-node-a2a-meter.js +147 -0
  7. package/dst/speechflow-node-a2a-meter.js.map +1 -0
  8. package/dst/speechflow-node-a2a-mute.d.ts +16 -0
  9. package/dst/speechflow-node-a2a-mute.js +90 -0
  10. package/dst/speechflow-node-a2a-mute.js.map +1 -0
  11. package/dst/speechflow-node-a2a-vad.js +130 -289
  12. package/dst/speechflow-node-a2a-vad.js.map +1 -0
  13. package/dst/speechflow-node-a2a-wav.js +1 -0
  14. package/dst/speechflow-node-a2a-wav.js.map +1 -0
  15. package/dst/speechflow-node-a2t-deepgram.js +2 -1
  16. package/dst/speechflow-node-a2t-deepgram.js.map +1 -0
  17. package/dst/speechflow-node-t2a-elevenlabs.js +1 -0
  18. package/dst/speechflow-node-t2a-elevenlabs.js.map +1 -0
  19. package/dst/speechflow-node-t2a-kokoro.js +1 -0
  20. package/dst/speechflow-node-t2a-kokoro.js.map +1 -0
  21. package/dst/speechflow-node-t2t-deepl.js +1 -0
  22. package/dst/speechflow-node-t2t-deepl.js.map +1 -0
  23. package/dst/speechflow-node-t2t-format.js +1 -0
  24. package/dst/speechflow-node-t2t-format.js.map +1 -0
  25. package/dst/speechflow-node-t2t-ollama.js +1 -0
  26. package/dst/speechflow-node-t2t-ollama.js.map +1 -0
  27. package/dst/speechflow-node-t2t-openai.js +1 -0
  28. package/dst/speechflow-node-t2t-openai.js.map +1 -0
  29. package/dst/speechflow-node-t2t-subtitle.js +1 -0
  30. package/dst/speechflow-node-t2t-subtitle.js.map +1 -0
  31. package/dst/speechflow-node-t2t-transformers.js +10 -6
  32. package/dst/speechflow-node-t2t-transformers.js.map +1 -0
  33. package/dst/speechflow-node-x2x-trace.js +1 -0
  34. package/dst/speechflow-node-x2x-trace.js.map +1 -0
  35. package/dst/speechflow-node-xio-device.js +1 -0
  36. package/dst/speechflow-node-xio-device.js.map +1 -0
  37. package/dst/speechflow-node-xio-file.js +1 -0
  38. package/dst/speechflow-node-xio-file.js.map +1 -0
  39. package/dst/speechflow-node-xio-mqtt.js +1 -0
  40. package/dst/speechflow-node-xio-mqtt.js.map +1 -0
  41. package/dst/speechflow-node-xio-websocket.js +1 -0
  42. package/dst/speechflow-node-xio-websocket.js.map +1 -0
  43. package/dst/speechflow-node.d.ts +3 -0
  44. package/dst/speechflow-node.js +10 -0
  45. package/dst/speechflow-node.js.map +1 -0
  46. package/dst/speechflow-utils.d.ts +33 -0
  47. package/dst/speechflow-utils.js +183 -1
  48. package/dst/speechflow-utils.js.map +1 -0
  49. package/dst/speechflow.js +209 -6
  50. package/dst/speechflow.js.map +1 -0
  51. package/etc/speechflow.yaml +5 -3
  52. package/etc/stx.conf +1 -1
  53. package/etc/tsconfig.json +2 -2
  54. package/package.json +14 -8
  55. package/src/speechflow-node-a2a-meter.ts +125 -0
  56. package/src/speechflow-node-a2a-mute.ts +101 -0
  57. package/src/speechflow-node-a2a-vad.ts +266 -0
  58. package/src/speechflow-node-a2t-deepgram.ts +1 -1
  59. package/src/speechflow-node-t2t-transformers.ts +12 -7
  60. package/src/speechflow-node-xio-websocket.ts +5 -5
  61. package/src/speechflow-node.ts +12 -0
  62. package/src/speechflow-utils.ts +195 -0
  63. package/src/speechflow.ts +193 -6
  64. package/dst/speechflow-node-deepgram.d.ts +0 -12
  65. package/dst/speechflow-node-deepgram.js +0 -220
  66. package/dst/speechflow-node-deepl.d.ts +0 -12
  67. package/dst/speechflow-node-deepl.js +0 -128
  68. package/dst/speechflow-node-device.d.ts +0 -13
  69. package/dst/speechflow-node-device.js +0 -205
  70. package/dst/speechflow-node-elevenlabs.d.ts +0 -13
  71. package/dst/speechflow-node-elevenlabs.js +0 -182
  72. package/dst/speechflow-node-ffmpeg.d.ts +0 -13
  73. package/dst/speechflow-node-ffmpeg.js +0 -152
  74. package/dst/speechflow-node-file.d.ts +0 -11
  75. package/dst/speechflow-node-file.js +0 -176
  76. package/dst/speechflow-node-format.d.ts +0 -11
  77. package/dst/speechflow-node-format.js +0 -80
  78. package/dst/speechflow-node-gemma.js +0 -213
  79. package/dst/speechflow-node-mqtt.d.ts +0 -13
  80. package/dst/speechflow-node-mqtt.js +0 -181
  81. package/dst/speechflow-node-opus.d.ts +0 -12
  82. package/dst/speechflow-node-opus.js +0 -135
  83. package/dst/speechflow-node-subtitle.d.ts +0 -12
  84. package/dst/speechflow-node-subtitle.js +0 -96
  85. package/dst/speechflow-node-t2t-gemma.d.ts +0 -13
  86. package/dst/speechflow-node-t2t-gemma.js +0 -233
  87. package/dst/speechflow-node-t2t-opus.d.ts +0 -12
  88. package/dst/speechflow-node-t2t-opus.js +0 -135
  89. package/dst/speechflow-node-trace.d.ts +0 -11
  90. package/dst/speechflow-node-trace.js +0 -88
  91. package/dst/speechflow-node-wav.d.ts +0 -11
  92. package/dst/speechflow-node-wav.js +0 -170
  93. package/dst/speechflow-node-websocket.d.ts +0 -13
  94. package/dst/speechflow-node-websocket.js +0 -275
  95. package/dst/speechflow-node-whisper-common.d.ts +0 -34
  96. package/dst/speechflow-node-whisper-common.js +0 -7
  97. package/dst/speechflow-node-whisper-ggml.d.ts +0 -1
  98. package/dst/speechflow-node-whisper-ggml.js +0 -97
  99. package/dst/speechflow-node-whisper-onnx.d.ts +0 -1
  100. package/dst/speechflow-node-whisper-onnx.js +0 -131
  101. package/dst/speechflow-node-whisper-worker-ggml.d.ts +0 -1
  102. package/dst/speechflow-node-whisper-worker-ggml.js +0 -97
  103. package/dst/speechflow-node-whisper-worker-onnx.d.ts +0 -1
  104. package/dst/speechflow-node-whisper-worker-onnx.js +0 -131
  105. package/dst/speechflow-node-whisper-worker.d.ts +0 -1
  106. package/dst/speechflow-node-whisper-worker.js +0 -116
  107. package/dst/speechflow-node-whisper-worker2.d.ts +0 -1
  108. package/dst/speechflow-node-whisper-worker2.js +0 -82
  109. package/dst/speechflow-node-whisper.d.ts +0 -19
  110. package/dst/speechflow-node-whisper.js +0 -604
@@ -0,0 +1,101 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import Stream from "node:stream"
9
+
10
+ /* internal dependencies */
11
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
12
+
13
+ /* the type of muting */
14
+ type MuteMode =
15
+ "none" | /* not muted */
16
+ "silenced" | /* muted by changing audio samples to silence */
17
+ "unplugged" /* muted by unplugging the audio sample flow */
18
+
19
+ /* SpeechFlow node for muting in audio-to-audio passing */
20
+ export default class SpeechFlowNodeMute extends SpeechFlowNode {
21
+ /* declare official node name */
22
+ public static name = "mute"
23
+
24
+ /* internal state */
25
+ private muteMode: MuteMode = "none"
26
+
27
+ /* construct node */
28
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
29
+ super(id, cfg, opts, args)
30
+
31
+ /* declare node configuration parameters */
32
+ this.configure({})
33
+
34
+ /* declare node input/output format */
35
+ this.input = "audio"
36
+ this.output = "audio"
37
+ }
38
+
39
+ /* receive external request */
40
+ async receiveRequest (params: any[]) {
41
+ if (params.length === 2 && params[0] === "mode") {
42
+ if (!params[1].match(/^(?:none|silenced|unplugged)$/))
43
+ throw new Error("mute: invalid mode argument in external request")
44
+ const muteMode: MuteMode = params[1] as MuteMode
45
+ this.setMuteMode(muteMode)
46
+ this.sendResponse([ "mute", "mode", muteMode ])
47
+ }
48
+ else
49
+ throw new Error("mute: invalid arguments in external request")
50
+ }
51
+
52
+ /* change mute mode */
53
+ setMuteMode (mode: MuteMode) {
54
+ this.log("info", `setting mute mode to "${mode}"`)
55
+ this.muteMode = mode
56
+ }
57
+
58
+ /* open node */
59
+ async open () {
60
+ /* establish a transform stream */
61
+ const self = this
62
+ this.stream = new Stream.Transform({
63
+ readableObjectMode: true,
64
+ writableObjectMode: true,
65
+ decodeStrings: false,
66
+ transform (chunk: SpeechFlowChunk, encoding, callback) {
67
+ if (!Buffer.isBuffer(chunk.payload))
68
+ callback(new Error("invalid chunk payload type"))
69
+ else if (self.muteMode === "unplugged")
70
+ /* pass-through nothing */
71
+ callback()
72
+ else if (self.muteMode === "silenced") {
73
+ /* pass-through a silenced chunk */
74
+ chunk = chunk.clone()
75
+ const buffer = chunk.payload as Buffer
76
+ buffer.fill(0)
77
+ callback()
78
+ }
79
+ else {
80
+ /* pass-through original chunk */
81
+ this.push(chunk)
82
+ callback()
83
+ }
84
+ },
85
+ final (callback) {
86
+ this.push(null)
87
+ callback()
88
+ }
89
+ })
90
+ }
91
+
92
+ /* close node */
93
+ async close () {
94
+ /* close stream */
95
+ if (this.stream !== null) {
96
+ this.stream.destroy()
97
+ this.stream = null
98
+ }
99
+ }
100
+ }
101
+
@@ -0,0 +1,266 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import Stream from "node:stream"
9
+
10
+ /* external dependencies */
11
+ import { RealTimeVAD } from "@ericedouard/vad-node-realtime"
12
+ import { Duration } from "luxon"
13
+
14
+ /* internal dependencies */
15
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
16
+ import * as utils from "./speechflow-utils"
17
+
18
+ /* audio stream queue element */
19
+ type AudioQueueElement = {
20
+ type: "audio-frame",
21
+ chunk: SpeechFlowChunk,
22
+ isSpeech?: boolean
23
+ } | {
24
+ type: "audio-eof"
25
+ }
26
+
27
+ /* SpeechFlow node for VAD speech-to-speech processing */
28
+ export default class SpeechFlowNodeVAD extends SpeechFlowNode {
29
+ /* declare official node name */
30
+ public static name = "vad"
31
+
32
+ /* internal state */
33
+ private vad: RealTimeVAD | null = null
34
+ private queue = new utils.Queue<AudioQueueElement>()
35
+ private queueRecv = this.queue.pointerUse("recv")
36
+ private queueVAD = this.queue.pointerUse("vad")
37
+ private queueSend = this.queue.pointerUse("send")
38
+
39
+ /* construct node */
40
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
41
+ super(id, cfg, opts, args)
42
+
43
+ /* declare node configuration parameters */
44
+ this.configure({
45
+ mode: { type: "string", val: "unplugged", match: /^(?:silenced|unplugged)$/ },
46
+ posSpeechThreshold: { type: "number", val: 0.50 },
47
+ negSpeechThreshold: { type: "number", val: 0.35 },
48
+ minSpeechFrames: { type: "number", val: 2 },
49
+ redemptionFrames: { type: "number", val: 12 },
50
+ preSpeechPadFrames: { type: "number", val: 1 }
51
+ })
52
+
53
+ /* declare node input/output format */
54
+ this.input = "audio"
55
+ this.output = "audio"
56
+ }
57
+
58
+ /* open node */
59
+ async open () {
60
+ /* sanity check situation */
61
+ if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
62
+ throw new Error("VAD node currently supports PCM-S16LE audio only")
63
+
64
+ /* pass-through logging */
65
+ const log = (level: string, msg: string) => { this.log(level, msg) }
66
+
67
+ /* internal processing constants */
68
+ const vadSampleRateTarget = 16000 /* internal target of VAD */
69
+ const vadSamplesPerFrame = 512 /* required for VAD v5 */
70
+
71
+ /* establish Voice Activity Detection (VAD) facility */
72
+ this.vad = await RealTimeVAD.new({
73
+ model: "v5",
74
+ sampleRate: this.config.audioSampleRate, /* before resampling to 16KHz */
75
+ frameSamples: vadSamplesPerFrame, /* after resampling to 16KHz */
76
+ positiveSpeechThreshold: this.params.posSpeechThreshold,
77
+ negativeSpeechThreshold: this.params.negSpeechThreshold,
78
+ minSpeechFrames: this.params.minSpeechFrames,
79
+ redemptionFrames: this.params.redemptionFrames,
80
+ preSpeechPadFrames: this.params.preSpeechPadFrames,
81
+ onSpeechStart: () => {
82
+ log("info", "VAD: speech start")
83
+ },
84
+ onSpeechEnd: (audio) => {
85
+ const duration = utils.audioArrayDuration(audio, vadSampleRateTarget)
86
+ log("info", `VAD: speech end (duration: ${duration.toFixed(2)}s)`)
87
+ },
88
+ onVADMisfire: () => {
89
+ log("info", "VAD: speech end (segment too short)")
90
+ },
91
+ onFrameProcessed: (audio) => {
92
+ /* annotate the current audio frame */
93
+ const element = this.queueVAD.peek()
94
+ if (element !== undefined && element.type === "audio-frame") {
95
+ const isSpeech = audio.isSpeech > audio.notSpeech
96
+ element.isSpeech = isSpeech
97
+ this.queueVAD.touch()
98
+ this.queueVAD.walk(+1)
99
+ }
100
+ }
101
+ })
102
+ this.vad.start()
103
+
104
+ /* provide Duplex stream and internally attach to VAD */
105
+ const vad = this.vad
106
+ const cfg = this.config
107
+ const queue = this.queue
108
+ const queueRecv = this.queueRecv
109
+ const queueSend = this.queueSend
110
+ const mode = this.params.mode
111
+ let carrySamples = new Float32Array()
112
+ let carryStart = Duration.fromDurationLike(0)
113
+ this.stream = new Stream.Duplex({
114
+ writableObjectMode: true,
115
+ readableObjectMode: true,
116
+ decodeStrings: false,
117
+
118
+ /* receive audio chunk (writable side of stream) */
119
+ write (chunk: SpeechFlowChunk, encoding, callback) {
120
+ if (!Buffer.isBuffer(chunk.payload))
121
+ callback(new Error("expected audio input as Buffer chunks"))
122
+ else if (chunk.payload.byteLength === 0)
123
+ callback()
124
+ else {
125
+ /* convert audio samples from PCM/I16 to PCM/F32 */
126
+ let data = utils.convertBufToF32(chunk.payload, cfg.audioLittleEndian)
127
+ let start = chunk.timestampStart
128
+
129
+ /* merge previous carry samples */
130
+ if (carrySamples.length > 0) {
131
+ start = carryStart
132
+ const merged = new Float32Array(carrySamples.length + data.length)
133
+ merged.set(carrySamples)
134
+ merged.set(data, carrySamples.length)
135
+ data = merged
136
+ carrySamples = new Float32Array()
137
+ }
138
+
139
+ /* queue audio samples as individual VAD-sized frames
140
+ and in parallel send it into the Voice Activity Detection (VAD) */
141
+ const chunkSize = (vadSamplesPerFrame * (cfg.audioSampleRate / vadSampleRateTarget))
142
+ const chunks = Math.trunc(data.length / chunkSize)
143
+ for (let i = 0; i < chunks; i++) {
144
+ const frame = data.slice(i * chunkSize, (i + 1) * chunkSize)
145
+ const buf = utils.convertF32ToBuf(frame)
146
+ const duration = utils.audioBufferDuration(buf)
147
+ const end = start.plus(duration)
148
+ const chunk = new SpeechFlowChunk(start, end, "final", "audio", buf)
149
+ queueRecv.append({ type: "audio-frame", chunk })
150
+ vad.processAudio(frame)
151
+ start = end
152
+ }
153
+
154
+ /* remember new carry samples */
155
+ const bulkLen = chunks * chunkSize
156
+ carrySamples = data.slice(bulkLen)
157
+ carryStart = start
158
+
159
+ callback()
160
+ }
161
+ },
162
+
163
+ /* receive no more audio chunks (writable side of stream) */
164
+ final (callback) {
165
+ /* flush pending audio chunks */
166
+ if (carrySamples.length > 0) {
167
+ const chunkSize = (vadSamplesPerFrame * (cfg.audioSampleRate / vadSampleRateTarget))
168
+ if (carrySamples.length < chunkSize) {
169
+ const merged = new Float32Array(chunkSize)
170
+ merged.set(carrySamples)
171
+ merged.fill(0.0, carrySamples.length, chunkSize)
172
+ carrySamples = merged
173
+ }
174
+ const buf = utils.convertF32ToBuf(carrySamples)
175
+ const duration = utils.audioBufferDuration(buf)
176
+ const end = carryStart.plus(duration)
177
+ const chunk = new SpeechFlowChunk(carryStart, end, "final", "audio", buf)
178
+ queueRecv.append({ type: "audio-frame", chunk })
179
+ vad.processAudio(carrySamples)
180
+ }
181
+
182
+ /* signal end of file */
183
+ queueRecv.append({ type: "audio-eof" })
184
+ callback()
185
+ },
186
+
187
+ /* send audio chunk(s) (readable side of stream) */
188
+ read (_size) {
189
+ /* try to perform read operation from scratch */
190
+ const tryToRead = () => {
191
+ /* flush pending audio chunks */
192
+ const flushPendingChunks = () => {
193
+ let pushed = 0
194
+ while (true) {
195
+ const element = queueSend.peek()
196
+ if (element === undefined)
197
+ break
198
+ else if (element.type === "audio-eof") {
199
+ this.push(null)
200
+ break
201
+ }
202
+ else if (element.type === "audio-frame"
203
+ && element.isSpeech === undefined)
204
+ break
205
+ queueSend.walk(+1)
206
+ if (element.isSpeech) {
207
+ this.push(element.chunk)
208
+ pushed++
209
+ }
210
+ else if (mode === "silenced") {
211
+ const chunk = element.chunk.clone()
212
+ const buffer = chunk.payload as Buffer
213
+ buffer.fill(0)
214
+ this.push(chunk)
215
+ pushed++
216
+ }
217
+ else if (mode === "unplugged" && pushed === 0)
218
+ /* we have to await chunks now, as in unplugged
219
+ mode we else would be never called again until
220
+ we at least once push a new chunk as the result */
221
+ tryToRead()
222
+ }
223
+ }
224
+
225
+ /* await forthcoming audio chunks */
226
+ const awaitForthcomingChunks = () => {
227
+ const element = queueSend.peek()
228
+ if (element !== undefined
229
+ && element.type === "audio-frame"
230
+ && element.isSpeech !== undefined)
231
+ flushPendingChunks()
232
+ else
233
+ queue.once("write", awaitForthcomingChunks)
234
+ }
235
+
236
+ const element = queueSend.peek()
237
+ if (element !== undefined && element.type === "audio-eof")
238
+ this.push(null)
239
+ else if (element !== undefined
240
+ && element.type === "audio-frame"
241
+ && element.isSpeech !== undefined)
242
+ flushPendingChunks()
243
+ else
244
+ queue.once("write", awaitForthcomingChunks)
245
+ }
246
+ tryToRead()
247
+ }
248
+ })
249
+ }
250
+
251
+ /* close node */
252
+ async close () {
253
+ /* close stream */
254
+ if (this.stream !== null) {
255
+ this.stream.destroy()
256
+ this.stream = null
257
+ }
258
+
259
+ /* close VAD */
260
+ if (this.vad !== null) {
261
+ await this.vad.flush()
262
+ this.vad.destroy()
263
+ this.vad = null
264
+ }
265
+ }
266
+ }
@@ -164,7 +164,7 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
164
164
  if (chunk.payload.byteLength > 0) {
165
165
  log("info", `Deepgram: send data (${chunk.payload.byteLength} bytes)`)
166
166
  initTimeoutStart()
167
- dg.send(chunk.payload) /* intentionally discard all time information */
167
+ dg.send(chunk.payload.buffer) /* intentionally discard all time information */
168
168
  }
169
169
  callback()
170
170
  }
@@ -104,10 +104,11 @@ export default class SpeechFlowNodeTransformers extends SpeechFlowNode {
104
104
 
105
105
  /* open node */
106
106
  async open () {
107
- /* instantiate Transformers engine and model */
108
107
  let model: string = ""
108
+
109
+ /* track download progress when instantiating Transformers engine and model */
109
110
  const progressState = new Map<string, number>()
110
- const progressCallback = (progress: any) => {
111
+ const progressCallback: Transformers.ProgressCallback = (progress: any) => {
111
112
  let artifact = model
112
113
  if (typeof progress.file === "string")
113
114
  artifact += `:${progress.file}`
@@ -126,26 +127,30 @@ export default class SpeechFlowNodeTransformers extends SpeechFlowNode {
126
127
  progressState.delete(artifact)
127
128
  }
128
129
  }, 1000)
130
+
131
+ /* instantiate Transformers engine and model */
129
132
  if (this.params.model === "OPUS") {
130
133
  model = `onnx-community/opus-mt-${this.params.src}-${this.params.dst}`
131
- this.translator = await Transformers.pipeline("translation", model, {
132
- cache_dir: path.join(this.config.cacheDir, "opus"),
134
+ const pipeline = Transformers.pipeline("translation", model, {
135
+ cache_dir: path.join(this.config.cacheDir, "transformers"),
133
136
  dtype: "q4",
134
- device: "gpu",
137
+ device: "auto",
135
138
  progress_callback: progressCallback
136
139
  })
140
+ this.translator = await pipeline
137
141
  clearInterval(interval)
138
142
  if (this.translator === null)
139
143
  throw new Error("failed to instantiate translator pipeline")
140
144
  }
141
145
  else if (this.params.model === "SmolLM3") {
142
146
  model = "HuggingFaceTB/SmolLM3-3B-ONNX"
143
- this.generator = await Transformers.pipeline("text-generation", model, {
147
+ const pipeline = Transformers.pipeline("text-generation", model, {
144
148
  cache_dir: path.join(this.config.cacheDir, "transformers"),
145
149
  dtype: "q4",
146
- device: "gpu",
150
+ device: "auto",
147
151
  progress_callback: progressCallback
148
152
  })
153
+ this.generator = await pipeline
149
154
  clearInterval(interval)
150
155
  if (this.generator === null)
151
156
  throw new Error("failed to instantiate generator pipeline")
@@ -9,7 +9,7 @@ import Stream from "node:stream"
9
9
 
10
10
  /* external dependencies */
11
11
  import ws from "ws"
12
- import ReconnWebsocket, { ErrorEvent } from "@opensumi/reconnecting-websocket"
12
+ import ReconnWebSocket, { ErrorEvent } from "@opensumi/reconnecting-websocket"
13
13
 
14
14
  /* internal dependencies */
15
15
  import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
@@ -22,7 +22,7 @@ export default class SpeechFlowNodeWebsocket extends SpeechFlowNode {
22
22
 
23
23
  /* internal state */
24
24
  private server: ws.WebSocketServer | null = null
25
- private client: WebSocket | null = null
25
+ private client: ReconnWebSocket | null = null
26
26
 
27
27
  /* construct node */
28
28
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -153,7 +153,7 @@ export default class SpeechFlowNodeWebsocket extends SpeechFlowNode {
153
153
  }
154
154
  else if (this.params.connect !== "") {
155
155
  /* connect remotely to a Websocket port */
156
- this.client = new ReconnWebsocket(this.params.connect, [], {
156
+ this.client = new ReconnWebSocket(this.params.connect, [], {
157
157
  WebSocket: ws,
158
158
  WebSocketOptions: {},
159
159
  reconnectionDelayGrowFactor: 1.3,
@@ -162,10 +162,10 @@ export default class SpeechFlowNodeWebsocket extends SpeechFlowNode {
162
162
  connectionTimeout: 4000,
163
163
  minUptime: 5000
164
164
  })
165
- this.client.addEventListener("open", (ev: Event) => {
165
+ this.client.addEventListener("open", (ev) => {
166
166
  this.log("info", `connection opened to URL ${this.params.connect}`)
167
167
  })
168
- this.client.addEventListener("close", (ev: Event) => {
168
+ this.client.addEventListener("close", (ev) => {
169
169
  this.log("info", `connection closed to URL ${this.params.connect}`)
170
170
  })
171
171
  this.client.addEventListener("error", (ev: ErrorEvent) => {
@@ -36,6 +36,8 @@ export class SpeechFlowChunk {
36
36
 
37
37
  /* the base class for all SpeechFlow nodes */
38
38
  export default class SpeechFlowNode extends Events.EventEmitter {
39
+ public static name: string | undefined
40
+
39
41
  /* general constant configuration (for reference) */
40
42
  config = {
41
43
  audioChannels: 1, /* audio mono channel */
@@ -80,6 +82,16 @@ export default class SpeechFlowNode extends Events.EventEmitter {
80
82
  this.timeZeroOffset = this.timeZero.diff(this.timeOpen)
81
83
  }
82
84
 
85
+ /* receive external request */
86
+ async receiveRequest (args: any[]) {
87
+ /* no-op */
88
+ }
89
+
90
+ /* send external response */
91
+ sendResponse (args: any[]) {
92
+ this.emit("send-response", args)
93
+ }
94
+
83
95
  /* INTERNAL: utility function: create "params" attribute from constructor of sub-classes */
84
96
  configure (spec: { [ id: string ]: { type: string, pos?: number, val?: any, match?: RegExp | ((x: any) => boolean) } }) {
85
97
  for (const name of Object.keys(spec)) {