speechflow 1.3.1 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dst/speechflow-node-a2a-gender.d.ts +2 -0
- package/dst/speechflow-node-a2a-gender.js +137 -59
- package/dst/speechflow-node-a2a-gender.js.map +1 -1
- package/dst/speechflow-node-a2a-meter.d.ts +3 -1
- package/dst/speechflow-node-a2a-meter.js +80 -39
- package/dst/speechflow-node-a2a-meter.js.map +1 -1
- package/dst/speechflow-node-a2a-mute.d.ts +1 -0
- package/dst/speechflow-node-a2a-mute.js +37 -11
- package/dst/speechflow-node-a2a-mute.js.map +1 -1
- package/dst/speechflow-node-a2a-vad.d.ts +3 -0
- package/dst/speechflow-node-a2a-vad.js +194 -96
- package/dst/speechflow-node-a2a-vad.js.map +1 -1
- package/dst/speechflow-node-a2a-wav.js +27 -11
- package/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/dst/speechflow-node-a2t-deepgram.d.ts +4 -0
- package/dst/speechflow-node-a2t-deepgram.js +136 -46
- package/dst/speechflow-node-a2t-deepgram.js.map +1 -1
- package/dst/speechflow-node-t2a-elevenlabs.d.ts +2 -0
- package/dst/speechflow-node-t2a-elevenlabs.js +61 -12
- package/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/dst/speechflow-node-t2a-kokoro.d.ts +1 -0
- package/dst/speechflow-node-t2a-kokoro.js +10 -4
- package/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/dst/speechflow-node-t2t-deepl.js +8 -4
- package/dst/speechflow-node-t2t-deepl.js.map +1 -1
- package/dst/speechflow-node-t2t-format.js +2 -2
- package/dst/speechflow-node-t2t-format.js.map +1 -1
- package/dst/speechflow-node-t2t-ollama.js +1 -1
- package/dst/speechflow-node-t2t-ollama.js.map +1 -1
- package/dst/speechflow-node-t2t-openai.js +1 -1
- package/dst/speechflow-node-t2t-openai.js.map +1 -1
- package/dst/speechflow-node-t2t-sentence.d.ts +1 -1
- package/dst/speechflow-node-t2t-sentence.js +34 -18
- package/dst/speechflow-node-t2t-sentence.js.map +1 -1
- package/dst/speechflow-node-t2t-subtitle.d.ts +0 -1
- package/dst/speechflow-node-t2t-subtitle.js +78 -190
- package/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/dst/speechflow-node-t2t-transformers.js +2 -2
- package/dst/speechflow-node-t2t-transformers.js.map +1 -1
- package/dst/speechflow-node-x2x-filter.js +4 -4
- package/dst/speechflow-node-x2x-trace.js +6 -13
- package/dst/speechflow-node-x2x-trace.js.map +1 -1
- package/dst/speechflow-node-xio-device.js +12 -8
- package/dst/speechflow-node-xio-device.js.map +1 -1
- package/dst/speechflow-node-xio-file.js +9 -3
- package/dst/speechflow-node-xio-file.js.map +1 -1
- package/dst/speechflow-node-xio-mqtt.js +5 -2
- package/dst/speechflow-node-xio-mqtt.js.map +1 -1
- package/dst/speechflow-node-xio-websocket.js +11 -11
- package/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/dst/speechflow-node.d.ts +0 -2
- package/dst/speechflow-node.js +0 -3
- package/dst/speechflow-node.js.map +1 -1
- package/dst/speechflow-utils.d.ts +5 -0
- package/dst/speechflow-utils.js +77 -44
- package/dst/speechflow-utils.js.map +1 -1
- package/dst/speechflow.js +101 -82
- package/dst/speechflow.js.map +1 -1
- package/etc/eslint.mjs +1 -2
- package/etc/stx.conf +3 -3
- package/package.json +6 -6
- package/src/speechflow-node-a2a-gender.ts +148 -64
- package/src/speechflow-node-a2a-meter.ts +87 -40
- package/src/speechflow-node-a2a-mute.ts +39 -11
- package/src/speechflow-node-a2a-vad.ts +206 -100
- package/src/speechflow-node-a2a-wav.ts +27 -11
- package/src/speechflow-node-a2t-deepgram.ts +139 -43
- package/src/speechflow-node-t2a-elevenlabs.ts +65 -12
- package/src/speechflow-node-t2a-kokoro.ts +11 -4
- package/src/speechflow-node-t2t-deepl.ts +9 -4
- package/src/speechflow-node-t2t-format.ts +2 -2
- package/src/speechflow-node-t2t-ollama.ts +1 -1
- package/src/speechflow-node-t2t-openai.ts +1 -1
- package/src/speechflow-node-t2t-sentence.ts +37 -20
- package/src/speechflow-node-t2t-transformers.ts +4 -3
- package/src/speechflow-node-x2x-filter.ts +4 -4
- package/src/speechflow-node-x2x-trace.ts +1 -1
- package/src/speechflow-node-xio-device.ts +12 -8
- package/src/speechflow-node-xio-file.ts +9 -3
- package/src/speechflow-node-xio-mqtt.ts +5 -2
- package/src/speechflow-node-xio-websocket.ts +12 -12
- package/src/speechflow-utils.ts +78 -44
- package/src/speechflow.ts +114 -35
|
@@ -21,7 +21,11 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
21
21
|
public static name = "deepgram"
|
|
22
22
|
|
|
23
23
|
/* internal state */
|
|
24
|
-
private dg:
|
|
24
|
+
private dg: Deepgram.LiveClient | null = null
|
|
25
|
+
private destroyed = false
|
|
26
|
+
private initTimeout: ReturnType<typeof setTimeout> | null = null
|
|
27
|
+
private connectionTimeout: ReturnType<typeof setTimeout> | null = null
|
|
28
|
+
private queue: utils.SingleQueue<SpeechFlowChunk | null> | null = null
|
|
25
29
|
|
|
26
30
|
/* construct node */
|
|
27
31
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -43,16 +47,21 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
43
47
|
|
|
44
48
|
/* one-time status of node */
|
|
45
49
|
async status () {
|
|
46
|
-
let balance
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
const
|
|
52
|
-
|
|
53
|
-
|
|
50
|
+
let balance = 0
|
|
51
|
+
try {
|
|
52
|
+
const deepgram = Deepgram.createClient(this.params.keyAdm)
|
|
53
|
+
const response = await deepgram.manage.getProjects()
|
|
54
|
+
if (response !== null && response.error === null) {
|
|
55
|
+
for (const project of response.result.projects) {
|
|
56
|
+
const response = await deepgram.manage.getProjectBalances(project.project_id)
|
|
57
|
+
if (response !== null && response.error === null)
|
|
58
|
+
balance += response.result.balances[0]?.amount ?? 0
|
|
59
|
+
}
|
|
54
60
|
}
|
|
55
61
|
}
|
|
62
|
+
catch (error) {
|
|
63
|
+
this.log("warning", `failed to fetch balance: ${error}`)
|
|
64
|
+
}
|
|
56
65
|
return { balance: balance.toFixed(2) }
|
|
57
66
|
}
|
|
58
67
|
|
|
@@ -62,8 +71,11 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
62
71
|
if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
|
|
63
72
|
throw new Error("Deepgram node currently supports PCM-S16LE audio only")
|
|
64
73
|
|
|
74
|
+
/* clear destruction flag */
|
|
75
|
+
this.destroyed = false
|
|
76
|
+
|
|
65
77
|
/* create queue for results */
|
|
66
|
-
|
|
78
|
+
this.queue = new utils.SingleQueue<SpeechFlowChunk | null>()
|
|
67
79
|
|
|
68
80
|
/* create a store for the meta information */
|
|
69
81
|
const metastore = new utils.TimeStore<Map<string, any>>()
|
|
@@ -96,6 +108,8 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
96
108
|
|
|
97
109
|
/* hook onto Deepgram API events */
|
|
98
110
|
this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => {
|
|
111
|
+
if (this.destroyed || this.queue === null)
|
|
112
|
+
return
|
|
99
113
|
const text = (data.channel?.alternatives[0]?.transcript ?? "") as string
|
|
100
114
|
const words = (data.channel?.alternatives[0]?.words ?? []) as
|
|
101
115
|
{ word: string, punctuated_word?: string, start: number, end: number }[]
|
|
@@ -117,7 +131,7 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
117
131
|
return { word: word.punctuated_word ?? word.word, start, end }
|
|
118
132
|
}))
|
|
119
133
|
const chunk = new SpeechFlowChunk(start, end, "final", "text", text, meta)
|
|
120
|
-
queue.write(chunk)
|
|
134
|
+
this.queue.write(chunk)
|
|
121
135
|
}
|
|
122
136
|
})
|
|
123
137
|
this.dg.on(Deepgram.LiveTranscriptionEvents.Metadata, (data) => {
|
|
@@ -125,25 +139,29 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
125
139
|
})
|
|
126
140
|
this.dg.on(Deepgram.LiveTranscriptionEvents.Close, () => {
|
|
127
141
|
this.log("info", "connection close")
|
|
142
|
+
if (!this.destroyed && this.queue !== null)
|
|
143
|
+
this.queue.write(null)
|
|
128
144
|
})
|
|
129
145
|
this.dg.on(Deepgram.LiveTranscriptionEvents.Error, (error: Error) => {
|
|
130
146
|
this.log("error", `error: ${error.message}`)
|
|
147
|
+
if (!this.destroyed && this.queue !== null)
|
|
148
|
+
this.queue.write(null)
|
|
131
149
|
this.emit("error")
|
|
132
150
|
})
|
|
133
151
|
|
|
134
152
|
/* wait for Deepgram API to be available */
|
|
135
153
|
await new Promise((resolve, reject) => {
|
|
136
|
-
|
|
137
|
-
if (
|
|
138
|
-
|
|
154
|
+
this.connectionTimeout = setTimeout(() => {
|
|
155
|
+
if (this.connectionTimeout !== null) {
|
|
156
|
+
this.connectionTimeout = null
|
|
139
157
|
reject(new Error("Deepgram: timeout waiting for connection open"))
|
|
140
158
|
}
|
|
141
159
|
}, 8000)
|
|
142
160
|
this.dg!.once(Deepgram.LiveTranscriptionEvents.Open, () => {
|
|
143
161
|
this.log("info", "connection open")
|
|
144
|
-
if (
|
|
145
|
-
clearTimeout(
|
|
146
|
-
|
|
162
|
+
if (this.connectionTimeout !== null) {
|
|
163
|
+
clearTimeout(this.connectionTimeout)
|
|
164
|
+
this.connectionTimeout = null
|
|
147
165
|
}
|
|
148
166
|
resolve(true)
|
|
149
167
|
})
|
|
@@ -154,66 +172,117 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
154
172
|
|
|
155
173
|
/* workaround Deepgram initialization problems */
|
|
156
174
|
let initDone = false
|
|
157
|
-
let initTimeout: ReturnType<typeof setTimeout> | null = null
|
|
158
175
|
const initTimeoutStart = () => {
|
|
159
|
-
if (initDone)
|
|
176
|
+
if (initDone || this.destroyed)
|
|
160
177
|
return
|
|
161
|
-
setTimeout(async () => {
|
|
162
|
-
if (initTimeout === null)
|
|
178
|
+
this.initTimeout = setTimeout(async () => {
|
|
179
|
+
if (this.initTimeout === null || this.destroyed)
|
|
163
180
|
return
|
|
164
|
-
initTimeout = null
|
|
181
|
+
this.initTimeout = null
|
|
165
182
|
this.log("warning", "initialization timeout -- restarting service usage")
|
|
166
183
|
await this.close()
|
|
167
|
-
this.
|
|
168
|
-
|
|
184
|
+
if (!this.destroyed)
|
|
185
|
+
await this.open()
|
|
186
|
+
}, 3 * 1000)
|
|
169
187
|
}
|
|
170
188
|
const initTimeoutStop = () => {
|
|
171
189
|
if (initDone)
|
|
172
190
|
return
|
|
173
191
|
initDone = true
|
|
174
|
-
if (initTimeout !== null) {
|
|
175
|
-
clearTimeout(initTimeout)
|
|
176
|
-
initTimeout = null
|
|
192
|
+
if (this.initTimeout !== null) {
|
|
193
|
+
clearTimeout(this.initTimeout)
|
|
194
|
+
this.initTimeout = null
|
|
177
195
|
}
|
|
178
196
|
}
|
|
179
197
|
|
|
180
198
|
/* provide Duplex stream and internally attach to Deepgram API */
|
|
181
|
-
const
|
|
182
|
-
const log = (level: string, msg: string) => {
|
|
183
|
-
this.log(level, msg)
|
|
184
|
-
}
|
|
185
|
-
const encoding = this.config.textEncoding
|
|
199
|
+
const self = this
|
|
186
200
|
this.stream = new Stream.Duplex({
|
|
187
201
|
writableObjectMode: true,
|
|
188
202
|
readableObjectMode: true,
|
|
189
203
|
decodeStrings: false,
|
|
190
204
|
highWaterMark: 1,
|
|
191
205
|
write (chunk: SpeechFlowChunk, encoding, callback) {
|
|
206
|
+
if (self.destroyed || self.dg === null) {
|
|
207
|
+
callback(new Error("stream already destroyed"))
|
|
208
|
+
return
|
|
209
|
+
}
|
|
192
210
|
if (chunk.type !== "audio")
|
|
193
211
|
callback(new Error("expected audio input chunk"))
|
|
194
212
|
else if (!Buffer.isBuffer(chunk.payload))
|
|
195
213
|
callback(new Error("expected Buffer input chunk"))
|
|
196
214
|
else {
|
|
197
215
|
if (chunk.payload.byteLength > 0) {
|
|
198
|
-
log("debug", `send data (${chunk.payload.byteLength} bytes)`)
|
|
216
|
+
self.log("debug", `send data (${chunk.payload.byteLength} bytes)`)
|
|
199
217
|
initTimeoutStart()
|
|
200
218
|
if (chunk.meta.size > 0)
|
|
201
219
|
metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
|
|
202
|
-
|
|
220
|
+
try {
|
|
221
|
+
self.dg.send(chunk.payload.buffer) /* intentionally discard all time information */
|
|
222
|
+
}
|
|
223
|
+
catch (error) {
|
|
224
|
+
callback(error instanceof Error ? error : new Error("failed to send to Deepgram"))
|
|
225
|
+
return
|
|
226
|
+
}
|
|
203
227
|
}
|
|
204
228
|
callback()
|
|
205
229
|
}
|
|
206
230
|
},
|
|
207
231
|
read (size) {
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
232
|
+
if (self.destroyed || self.queue === null) {
|
|
233
|
+
this.push(null)
|
|
234
|
+
return
|
|
235
|
+
}
|
|
236
|
+
let readTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
|
|
237
|
+
if (readTimeout !== null) {
|
|
238
|
+
readTimeout = null
|
|
239
|
+
if (!self.destroyed) {
|
|
240
|
+
self.log("warning", "read timeout - pushing null to prevent hanging")
|
|
241
|
+
this.push(null)
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
}, 30 * 1000)
|
|
245
|
+
self.queue.read().then((chunk) => {
|
|
246
|
+
if (readTimeout !== null) {
|
|
247
|
+
clearTimeout(readTimeout)
|
|
248
|
+
readTimeout = null
|
|
249
|
+
}
|
|
250
|
+
if (self.destroyed) {
|
|
251
|
+
this.push(null)
|
|
252
|
+
return
|
|
253
|
+
}
|
|
254
|
+
if (chunk === null) {
|
|
255
|
+
self.log("info", "received EOF signal")
|
|
256
|
+
this.push(null)
|
|
257
|
+
}
|
|
258
|
+
else {
|
|
259
|
+
self.log("info", `received data (${chunk.payload.length} bytes)`)
|
|
260
|
+
initTimeoutStop()
|
|
261
|
+
this.push(chunk, self.config.textEncoding)
|
|
262
|
+
}
|
|
263
|
+
}).catch((error) => {
|
|
264
|
+
if (readTimeout !== null) {
|
|
265
|
+
clearTimeout(readTimeout)
|
|
266
|
+
readTimeout = null
|
|
267
|
+
}
|
|
268
|
+
if (!self.destroyed) {
|
|
269
|
+
self.log("error", `queue read error: ${error.message}`)
|
|
270
|
+
this.push(null)
|
|
271
|
+
}
|
|
212
272
|
})
|
|
213
273
|
},
|
|
214
274
|
final (callback) {
|
|
215
|
-
dg
|
|
216
|
-
|
|
275
|
+
if (self.destroyed || self.dg === null) {
|
|
276
|
+
callback()
|
|
277
|
+
return
|
|
278
|
+
}
|
|
279
|
+
try {
|
|
280
|
+
self.dg.requestClose()
|
|
281
|
+
}
|
|
282
|
+
catch (error) {
|
|
283
|
+
self.log("warning", `error closing Deepgram connection: ${error}`)
|
|
284
|
+
}
|
|
285
|
+
/* NOTICE: do not push null here -- let the Deepgram close event handle it */
|
|
217
286
|
callback()
|
|
218
287
|
}
|
|
219
288
|
})
|
|
@@ -221,14 +290,41 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
221
290
|
|
|
222
291
|
/* close node */
|
|
223
292
|
async close () {
|
|
293
|
+
/* indicate destruction first to stop all async operations */
|
|
294
|
+
this.destroyed = true
|
|
295
|
+
|
|
296
|
+
/* cleanup all timers */
|
|
297
|
+
if (this.initTimeout !== null) {
|
|
298
|
+
clearTimeout(this.initTimeout)
|
|
299
|
+
this.initTimeout = null
|
|
300
|
+
}
|
|
301
|
+
if (this.connectionTimeout !== null) {
|
|
302
|
+
clearTimeout(this.connectionTimeout)
|
|
303
|
+
this.connectionTimeout = null
|
|
304
|
+
}
|
|
305
|
+
|
|
224
306
|
/* close stream */
|
|
225
307
|
if (this.stream !== null) {
|
|
226
308
|
this.stream.destroy()
|
|
227
309
|
this.stream = null
|
|
228
310
|
}
|
|
229
311
|
|
|
230
|
-
/*
|
|
231
|
-
if (this.dg !== null)
|
|
232
|
-
|
|
312
|
+
/* close Deepgram connection and remove listeners */
|
|
313
|
+
if (this.dg !== null) {
|
|
314
|
+
try {
|
|
315
|
+
this.dg.removeAllListeners()
|
|
316
|
+
this.dg.requestClose()
|
|
317
|
+
}
|
|
318
|
+
catch (error) {
|
|
319
|
+
this.log("warning", `error during Deepgram cleanup: ${error}`)
|
|
320
|
+
}
|
|
321
|
+
this.dg = null
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
/* signal EOF to any pending read operations */
|
|
325
|
+
if (this.queue !== null) {
|
|
326
|
+
this.queue.write(null)
|
|
327
|
+
this.queue = null
|
|
328
|
+
}
|
|
233
329
|
}
|
|
234
330
|
}
|
|
@@ -23,6 +23,8 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
23
23
|
/* internal state */
|
|
24
24
|
private elevenlabs: ElevenLabs.ElevenLabsClient | null = null
|
|
25
25
|
private static speexInitialized = false
|
|
26
|
+
private destroyed = false
|
|
27
|
+
private resampler: SpeexResampler | null = null
|
|
26
28
|
|
|
27
29
|
/* construct node */
|
|
28
30
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -39,6 +41,10 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
39
41
|
optimize: { type: "string", val: "latency", pos: 5, match: /^(?:latency|quality)$/ }
|
|
40
42
|
})
|
|
41
43
|
|
|
44
|
+
/* sanity check parameters */
|
|
45
|
+
if (!this.params.key)
|
|
46
|
+
throw new Error("ElevenLabs API key not configured")
|
|
47
|
+
|
|
42
48
|
/* declare node input/output format */
|
|
43
49
|
this.input = "text"
|
|
44
50
|
this.output = "audio"
|
|
@@ -54,6 +60,9 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
54
60
|
|
|
55
61
|
/* open node */
|
|
56
62
|
async open () {
|
|
63
|
+
/* clear destruction flag */
|
|
64
|
+
this.destroyed = false
|
|
65
|
+
|
|
57
66
|
/* establish ElevenLabs API connection */
|
|
58
67
|
this.elevenlabs = new ElevenLabs.ElevenLabsClient({
|
|
59
68
|
apiKey: this.params.key
|
|
@@ -120,37 +129,74 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
120
129
|
await SpeexResampler.initPromise
|
|
121
130
|
SpeechFlowNodeElevenlabs.speexInitialized = true
|
|
122
131
|
}
|
|
123
|
-
|
|
132
|
+
this.resampler = new SpeexResampler(1, maxSampleRate, this.config.audioSampleRate, 7)
|
|
124
133
|
|
|
125
134
|
/* create transform stream and connect it to the ElevenLabs API */
|
|
126
|
-
const
|
|
135
|
+
const self = this
|
|
127
136
|
this.stream = new Stream.Transform({
|
|
128
137
|
writableObjectMode: true,
|
|
129
138
|
readableObjectMode: true,
|
|
130
139
|
decodeStrings: false,
|
|
131
140
|
highWaterMark: 1,
|
|
132
141
|
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
142
|
+
if (self.destroyed) {
|
|
143
|
+
callback(new Error("stream already destroyed"))
|
|
144
|
+
return
|
|
145
|
+
}
|
|
133
146
|
if (Buffer.isBuffer(chunk.payload))
|
|
134
147
|
callback(new Error("invalid chunk payload type"))
|
|
135
148
|
else {
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
149
|
+
(async () => {
|
|
150
|
+
let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
|
|
151
|
+
processTimeout = null
|
|
152
|
+
callback(new Error("ElevenLabs API timeout"))
|
|
153
|
+
}, 60 * 1000)
|
|
154
|
+
try {
|
|
155
|
+
const stream = await speechStream(chunk.payload as string)
|
|
156
|
+
if (self.destroyed) {
|
|
157
|
+
if (processTimeout !== null) {
|
|
158
|
+
clearTimeout(processTimeout)
|
|
159
|
+
processTimeout = null
|
|
160
|
+
}
|
|
161
|
+
callback(new Error("stream destroyed during processing"))
|
|
162
|
+
return
|
|
163
|
+
}
|
|
164
|
+
const buffer = await getStreamAsBuffer(stream)
|
|
165
|
+
if (self.destroyed) {
|
|
166
|
+
if (processTimeout !== null) {
|
|
167
|
+
clearTimeout(processTimeout)
|
|
168
|
+
processTimeout = null
|
|
169
|
+
}
|
|
170
|
+
callback(new Error("stream destroyed during processing"))
|
|
171
|
+
return
|
|
172
|
+
}
|
|
173
|
+
const bufferResampled = self.resampler!.processChunk(buffer)
|
|
174
|
+
self.log("info", `ElevenLabs: received audio (buffer length: ${buffer.byteLength})`)
|
|
140
175
|
const chunkNew = chunk.clone()
|
|
141
176
|
chunkNew.type = "audio"
|
|
142
177
|
chunkNew.payload = bufferResampled
|
|
178
|
+
if (processTimeout !== null) {
|
|
179
|
+
clearTimeout(processTimeout)
|
|
180
|
+
processTimeout = null
|
|
181
|
+
}
|
|
143
182
|
this.push(chunkNew)
|
|
144
183
|
callback()
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
184
|
+
}
|
|
185
|
+
catch (error) {
|
|
186
|
+
if (processTimeout !== null) {
|
|
187
|
+
clearTimeout(processTimeout)
|
|
188
|
+
processTimeout = null
|
|
189
|
+
}
|
|
190
|
+
callback(error instanceof Error ? error : new Error("ElevenLabs processing failed"))
|
|
191
|
+
}
|
|
192
|
+
})()
|
|
151
193
|
}
|
|
152
194
|
},
|
|
153
195
|
final (callback) {
|
|
196
|
+
if (self.destroyed) {
|
|
197
|
+
callback()
|
|
198
|
+
return
|
|
199
|
+
}
|
|
154
200
|
this.push(null)
|
|
155
201
|
callback()
|
|
156
202
|
}
|
|
@@ -159,12 +205,19 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
159
205
|
|
|
160
206
|
/* close node */
|
|
161
207
|
async close () {
|
|
208
|
+
/* indicate destruction */
|
|
209
|
+
this.destroyed = true
|
|
210
|
+
|
|
162
211
|
/* destroy stream */
|
|
163
212
|
if (this.stream !== null) {
|
|
164
213
|
this.stream.destroy()
|
|
165
214
|
this.stream = null
|
|
166
215
|
}
|
|
167
216
|
|
|
217
|
+
/* destroy resampler */
|
|
218
|
+
if (this.resampler !== null)
|
|
219
|
+
this.resampler = null
|
|
220
|
+
|
|
168
221
|
/* destroy ElevenLabs API */
|
|
169
222
|
if (this.elevenlabs !== null)
|
|
170
223
|
this.elevenlabs = null
|
|
@@ -21,6 +21,7 @@ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
|
|
|
21
21
|
|
|
22
22
|
/* internal state */
|
|
23
23
|
private kokoro: KokoroTTS | null = null
|
|
24
|
+
private resampler: SpeexResampler | null = null
|
|
24
25
|
private static speexInitialized = false
|
|
25
26
|
|
|
26
27
|
/* construct node */
|
|
@@ -59,9 +60,11 @@ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
|
|
|
59
60
|
const interval = setInterval(() => {
|
|
60
61
|
for (const [ artifact, percent ] of progressState) {
|
|
61
62
|
this.log("info", `downloaded ${percent.toFixed(2)}% of artifact "${artifact}"`)
|
|
62
|
-
if (percent >=
|
|
63
|
+
if (percent >= 100.0)
|
|
63
64
|
progressState.delete(artifact)
|
|
64
65
|
}
|
|
66
|
+
if (progressState.size === 0)
|
|
67
|
+
clearInterval(interval)
|
|
65
68
|
}, 1000)
|
|
66
69
|
this.kokoro = await KokoroTTS.from_pretrained(model, {
|
|
67
70
|
dtype: "q4f16",
|
|
@@ -78,7 +81,7 @@ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
|
|
|
78
81
|
await SpeexResampler.initPromise
|
|
79
82
|
SpeechFlowNodeKokoro.speexInitialized = true
|
|
80
83
|
}
|
|
81
|
-
|
|
84
|
+
this.resampler = new SpeexResampler(1, 24000, this.config.audioSampleRate, 7)
|
|
82
85
|
|
|
83
86
|
/* determine voice for text-to-speech operation */
|
|
84
87
|
const voices = {
|
|
@@ -91,7 +94,7 @@ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
|
|
|
91
94
|
if (voice === undefined)
|
|
92
95
|
throw new Error(`invalid Kokoro voice "${this.params.voice}"`)
|
|
93
96
|
|
|
94
|
-
/* perform text-to-speech operation with
|
|
97
|
+
/* perform text-to-speech operation with Kokoro API */
|
|
95
98
|
const text2speech = async (text: string) => {
|
|
96
99
|
this.log("info", `Kokoro: input: "${text}"`)
|
|
97
100
|
const audio = await this.kokoro!.generate(text, {
|
|
@@ -110,7 +113,7 @@ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
|
|
|
110
113
|
}
|
|
111
114
|
|
|
112
115
|
/* resample audio samples from PCM/I16/24Khz to PCM/I16/48KHz */
|
|
113
|
-
const buffer2 = resampler
|
|
116
|
+
const buffer2 = this.resampler!.processChunk(buffer1)
|
|
114
117
|
|
|
115
118
|
return buffer2
|
|
116
119
|
}
|
|
@@ -153,6 +156,10 @@ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
|
|
|
153
156
|
this.stream = null
|
|
154
157
|
}
|
|
155
158
|
|
|
159
|
+
/* destroy resampler */
|
|
160
|
+
if (this.resampler !== null)
|
|
161
|
+
this.resampler = null
|
|
162
|
+
|
|
156
163
|
/* destroy Kokoro API */
|
|
157
164
|
if (this.kokoro !== null)
|
|
158
165
|
this.kokoro = null
|
|
@@ -27,12 +27,16 @@ export default class SpeechFlowNodeDeepL extends SpeechFlowNode {
|
|
|
27
27
|
|
|
28
28
|
/* declare node configuration parameters */
|
|
29
29
|
this.configure({
|
|
30
|
-
key: { type: "string", val: process.env.SPEECHFLOW_DEEPL_KEY },
|
|
30
|
+
key: { type: "string", val: process.env.SPEECHFLOW_DEEPL_KEY ?? "" },
|
|
31
31
|
src: { type: "string", pos: 0, val: "de", match: /^(?:de|en)$/ },
|
|
32
32
|
dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ },
|
|
33
33
|
optimize: { type: "string", pos: 2, val: "latency", match: /^(?:latency|quality)$/ }
|
|
34
34
|
})
|
|
35
35
|
|
|
36
|
+
/* validate API key */
|
|
37
|
+
if (this.params.key === "")
|
|
38
|
+
throw new Error("DeepL API key is required")
|
|
39
|
+
|
|
36
40
|
/* sanity check situation */
|
|
37
41
|
if (this.params.src === this.params.dst)
|
|
38
42
|
throw new Error("source and destination languages cannot be the same")
|
|
@@ -44,9 +48,10 @@ export default class SpeechFlowNodeDeepL extends SpeechFlowNode {
|
|
|
44
48
|
|
|
45
49
|
/* one-time status of node */
|
|
46
50
|
async status () {
|
|
47
|
-
|
|
48
|
-
const usage = await
|
|
49
|
-
const
|
|
51
|
+
const deepl = new DeepL.Translator(this.params.key)
|
|
52
|
+
const usage = await deepl.getUsage()
|
|
53
|
+
const limit = usage?.character?.limit ?? 1
|
|
54
|
+
const percent = limit > 0 ? ((usage?.character?.count ?? 0) / limit * 100) : 0
|
|
50
55
|
return { usage: `${percent.toFixed(8)}%` }
|
|
51
56
|
}
|
|
52
57
|
|
|
@@ -41,7 +41,7 @@ export default class SpeechFlowNodeFormat extends SpeechFlowNode {
|
|
|
41
41
|
return text
|
|
42
42
|
}
|
|
43
43
|
|
|
44
|
-
/* establish a duplex stream and connect it to
|
|
44
|
+
/* establish a duplex stream and connect it to text formatting */
|
|
45
45
|
this.stream = new Stream.Transform({
|
|
46
46
|
readableObjectMode: true,
|
|
47
47
|
writableObjectMode: true,
|
|
@@ -74,7 +74,7 @@ export default class SpeechFlowNodeFormat extends SpeechFlowNode {
|
|
|
74
74
|
})
|
|
75
75
|
}
|
|
76
76
|
|
|
77
|
-
/*
|
|
77
|
+
/* close node */
|
|
78
78
|
async close () {
|
|
79
79
|
/* close stream */
|
|
80
80
|
if (this.stream !== null) {
|
|
@@ -39,7 +39,7 @@ export default class SpeechFlowNodeOllama extends SpeechFlowNode {
|
|
|
39
39
|
"Do NOT give any preamble.\n" +
|
|
40
40
|
"Do NOT give any prolog.\n" +
|
|
41
41
|
"Do NOT give any epilog.\n" +
|
|
42
|
-
"Do NOT change the
|
|
42
|
+
"Do NOT change the grammar.\n" +
|
|
43
43
|
"Do NOT use synonyms for words.\n" +
|
|
44
44
|
"Keep all words.\n" +
|
|
45
45
|
"Fill in missing commas.\n" +
|
|
@@ -39,7 +39,7 @@ export default class SpeechFlowNodeOpenAI extends SpeechFlowNode {
|
|
|
39
39
|
"Do NOT give any preamble.\n" +
|
|
40
40
|
"Do NOT give any prolog.\n" +
|
|
41
41
|
"Do NOT give any epilog.\n" +
|
|
42
|
-
"Do NOT change the
|
|
42
|
+
"Do NOT change the grammar.\n" +
|
|
43
43
|
"Do NOT use synonyms for words.\n" +
|
|
44
44
|
"Keep all words.\n" +
|
|
45
45
|
"Fill in missing commas.\n" +
|