speechflow 1.3.0 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/README.md +165 -22
- package/dst/speechflow-node-a2a-gender.d.ts +2 -0
- package/dst/speechflow-node-a2a-gender.js +137 -59
- package/dst/speechflow-node-a2a-gender.js.map +1 -1
- package/dst/speechflow-node-a2a-meter.d.ts +3 -1
- package/dst/speechflow-node-a2a-meter.js +79 -35
- package/dst/speechflow-node-a2a-meter.js.map +1 -1
- package/dst/speechflow-node-a2a-mute.d.ts +1 -0
- package/dst/speechflow-node-a2a-mute.js +37 -11
- package/dst/speechflow-node-a2a-mute.js.map +1 -1
- package/dst/speechflow-node-a2a-vad.d.ts +3 -0
- package/dst/speechflow-node-a2a-vad.js +194 -96
- package/dst/speechflow-node-a2a-vad.js.map +1 -1
- package/dst/speechflow-node-a2a-wav.js +27 -11
- package/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/dst/speechflow-node-a2t-deepgram.d.ts +4 -0
- package/dst/speechflow-node-a2t-deepgram.js +141 -43
- package/dst/speechflow-node-a2t-deepgram.js.map +1 -1
- package/dst/speechflow-node-t2a-elevenlabs.d.ts +2 -0
- package/dst/speechflow-node-t2a-elevenlabs.js +61 -12
- package/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/dst/speechflow-node-t2a-kokoro.d.ts +1 -0
- package/dst/speechflow-node-t2a-kokoro.js +10 -4
- package/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/dst/speechflow-node-t2t-deepl.js +8 -4
- package/dst/speechflow-node-t2t-deepl.js.map +1 -1
- package/dst/speechflow-node-t2t-format.js +2 -2
- package/dst/speechflow-node-t2t-format.js.map +1 -1
- package/dst/speechflow-node-t2t-ollama.js +1 -1
- package/dst/speechflow-node-t2t-ollama.js.map +1 -1
- package/dst/speechflow-node-t2t-openai.js +1 -1
- package/dst/speechflow-node-t2t-openai.js.map +1 -1
- package/dst/speechflow-node-t2t-sentence.d.ts +1 -1
- package/dst/speechflow-node-t2t-sentence.js +35 -24
- package/dst/speechflow-node-t2t-sentence.js.map +1 -1
- package/dst/speechflow-node-t2t-subtitle.js +85 -17
- package/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/dst/speechflow-node-t2t-transformers.js +2 -2
- package/dst/speechflow-node-t2t-transformers.js.map +1 -1
- package/dst/speechflow-node-x2x-filter.js +4 -4
- package/dst/speechflow-node-x2x-trace.js +1 -1
- package/dst/speechflow-node-x2x-trace.js.map +1 -1
- package/dst/speechflow-node-xio-device.js +12 -8
- package/dst/speechflow-node-xio-device.js.map +1 -1
- package/dst/speechflow-node-xio-file.js +9 -3
- package/dst/speechflow-node-xio-file.js.map +1 -1
- package/dst/speechflow-node-xio-mqtt.js +5 -2
- package/dst/speechflow-node-xio-mqtt.js.map +1 -1
- package/dst/speechflow-node-xio-websocket.js +11 -11
- package/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/dst/speechflow-utils.d.ts +5 -0
- package/dst/speechflow-utils.js +77 -44
- package/dst/speechflow-utils.js.map +1 -1
- package/dst/speechflow.js +104 -34
- package/dst/speechflow.js.map +1 -1
- package/etc/eslint.mjs +1 -2
- package/etc/speechflow.yaml +18 -7
- package/etc/stx.conf +3 -3
- package/package.json +14 -13
- package/src/speechflow-node-a2a-gender.ts +148 -64
- package/src/speechflow-node-a2a-meter.ts +87 -40
- package/src/speechflow-node-a2a-mute.ts +39 -11
- package/src/speechflow-node-a2a-vad.ts +206 -100
- package/src/speechflow-node-a2a-wav.ts +27 -11
- package/src/speechflow-node-a2t-deepgram.ts +148 -45
- package/src/speechflow-node-t2a-elevenlabs.ts +65 -12
- package/src/speechflow-node-t2a-kokoro.ts +11 -4
- package/src/speechflow-node-t2t-deepl.ts +9 -4
- package/src/speechflow-node-t2t-format.ts +2 -2
- package/src/speechflow-node-t2t-ollama.ts +1 -1
- package/src/speechflow-node-t2t-openai.ts +1 -1
- package/src/speechflow-node-t2t-sentence.ts +38 -27
- package/src/speechflow-node-t2t-subtitle.ts +62 -15
- package/src/speechflow-node-t2t-transformers.ts +4 -3
- package/src/speechflow-node-x2x-filter.ts +4 -4
- package/src/speechflow-node-x2x-trace.ts +1 -1
- package/src/speechflow-node-xio-device.ts +12 -8
- package/src/speechflow-node-xio-file.ts +9 -3
- package/src/speechflow-node-xio-mqtt.ts +5 -2
- package/src/speechflow-node-xio-websocket.ts +12 -12
- package/src/speechflow-utils.ts +78 -44
- package/src/speechflow.ts +117 -36
|
@@ -21,7 +21,11 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
21
21
|
public static name = "deepgram"
|
|
22
22
|
|
|
23
23
|
/* internal state */
|
|
24
|
-
private dg:
|
|
24
|
+
private dg: Deepgram.LiveClient | null = null
|
|
25
|
+
private destroyed = false
|
|
26
|
+
private initTimeout: ReturnType<typeof setTimeout> | null = null
|
|
27
|
+
private connectionTimeout: ReturnType<typeof setTimeout> | null = null
|
|
28
|
+
private queue: utils.SingleQueue<SpeechFlowChunk | null> | null = null
|
|
25
29
|
|
|
26
30
|
/* construct node */
|
|
27
31
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -43,16 +47,21 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
43
47
|
|
|
44
48
|
/* one-time status of node */
|
|
45
49
|
async status () {
|
|
46
|
-
let balance
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
const
|
|
52
|
-
|
|
53
|
-
|
|
50
|
+
let balance = 0
|
|
51
|
+
try {
|
|
52
|
+
const deepgram = Deepgram.createClient(this.params.keyAdm)
|
|
53
|
+
const response = await deepgram.manage.getProjects()
|
|
54
|
+
if (response !== null && response.error === null) {
|
|
55
|
+
for (const project of response.result.projects) {
|
|
56
|
+
const response = await deepgram.manage.getProjectBalances(project.project_id)
|
|
57
|
+
if (response !== null && response.error === null)
|
|
58
|
+
balance += response.result.balances[0]?.amount ?? 0
|
|
59
|
+
}
|
|
54
60
|
}
|
|
55
61
|
}
|
|
62
|
+
catch (error) {
|
|
63
|
+
this.log("warning", `failed to fetch balance: ${error}`)
|
|
64
|
+
}
|
|
56
65
|
return { balance: balance.toFixed(2) }
|
|
57
66
|
}
|
|
58
67
|
|
|
@@ -62,8 +71,11 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
62
71
|
if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
|
|
63
72
|
throw new Error("Deepgram node currently supports PCM-S16LE audio only")
|
|
64
73
|
|
|
74
|
+
/* clear destruction flag */
|
|
75
|
+
this.destroyed = false
|
|
76
|
+
|
|
65
77
|
/* create queue for results */
|
|
66
|
-
|
|
78
|
+
this.queue = new utils.SingleQueue<SpeechFlowChunk | null>()
|
|
67
79
|
|
|
68
80
|
/* create a store for the meta information */
|
|
69
81
|
const metastore = new utils.TimeStore<Map<string, any>>()
|
|
@@ -96,7 +108,11 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
96
108
|
|
|
97
109
|
/* hook onto Deepgram API events */
|
|
98
110
|
this.dg.on(Deepgram.LiveTranscriptionEvents.Transcript, async (data) => {
|
|
99
|
-
|
|
111
|
+
if (this.destroyed || this.queue === null)
|
|
112
|
+
return
|
|
113
|
+
const text = (data.channel?.alternatives[0]?.transcript ?? "") as string
|
|
114
|
+
const words = (data.channel?.alternatives[0]?.words ?? []) as
|
|
115
|
+
{ word: string, punctuated_word?: string, start: number, end: number }[]
|
|
100
116
|
if (text === "")
|
|
101
117
|
this.log("info", `empty/dummy text received (start: ${data.start}s, duration: ${data.duration.toFixed(2)}s)`)
|
|
102
118
|
else {
|
|
@@ -109,8 +125,13 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
109
125
|
return prev
|
|
110
126
|
}, new Map<string, any>())
|
|
111
127
|
metastore.prune(start)
|
|
128
|
+
meta.set("words", words.map((word) => {
|
|
129
|
+
const start = Duration.fromMillis(word.start * 1000).plus(this.timeZeroOffset)
|
|
130
|
+
const end = Duration.fromMillis(word.end * 1000).plus(this.timeZeroOffset)
|
|
131
|
+
return { word: word.punctuated_word ?? word.word, start, end }
|
|
132
|
+
}))
|
|
112
133
|
const chunk = new SpeechFlowChunk(start, end, "final", "text", text, meta)
|
|
113
|
-
queue.write(chunk)
|
|
134
|
+
this.queue.write(chunk)
|
|
114
135
|
}
|
|
115
136
|
})
|
|
116
137
|
this.dg.on(Deepgram.LiveTranscriptionEvents.Metadata, (data) => {
|
|
@@ -118,25 +139,29 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
118
139
|
})
|
|
119
140
|
this.dg.on(Deepgram.LiveTranscriptionEvents.Close, () => {
|
|
120
141
|
this.log("info", "connection close")
|
|
142
|
+
if (!this.destroyed && this.queue !== null)
|
|
143
|
+
this.queue.write(null)
|
|
121
144
|
})
|
|
122
145
|
this.dg.on(Deepgram.LiveTranscriptionEvents.Error, (error: Error) => {
|
|
123
146
|
this.log("error", `error: ${error.message}`)
|
|
147
|
+
if (!this.destroyed && this.queue !== null)
|
|
148
|
+
this.queue.write(null)
|
|
124
149
|
this.emit("error")
|
|
125
150
|
})
|
|
126
151
|
|
|
127
152
|
/* wait for Deepgram API to be available */
|
|
128
153
|
await new Promise((resolve, reject) => {
|
|
129
|
-
|
|
130
|
-
if (
|
|
131
|
-
|
|
154
|
+
this.connectionTimeout = setTimeout(() => {
|
|
155
|
+
if (this.connectionTimeout !== null) {
|
|
156
|
+
this.connectionTimeout = null
|
|
132
157
|
reject(new Error("Deepgram: timeout waiting for connection open"))
|
|
133
158
|
}
|
|
134
|
-
},
|
|
159
|
+
}, 8000)
|
|
135
160
|
this.dg!.once(Deepgram.LiveTranscriptionEvents.Open, () => {
|
|
136
161
|
this.log("info", "connection open")
|
|
137
|
-
if (
|
|
138
|
-
clearTimeout(
|
|
139
|
-
|
|
162
|
+
if (this.connectionTimeout !== null) {
|
|
163
|
+
clearTimeout(this.connectionTimeout)
|
|
164
|
+
this.connectionTimeout = null
|
|
140
165
|
}
|
|
141
166
|
resolve(true)
|
|
142
167
|
})
|
|
@@ -147,66 +172,117 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
147
172
|
|
|
148
173
|
/* workaround Deepgram initialization problems */
|
|
149
174
|
let initDone = false
|
|
150
|
-
let initTimeout: ReturnType<typeof setTimeout> | null = null
|
|
151
175
|
const initTimeoutStart = () => {
|
|
152
|
-
if (initDone)
|
|
176
|
+
if (initDone || this.destroyed)
|
|
153
177
|
return
|
|
154
|
-
setTimeout(async () => {
|
|
155
|
-
if (initTimeout === null)
|
|
178
|
+
this.initTimeout = setTimeout(async () => {
|
|
179
|
+
if (this.initTimeout === null || this.destroyed)
|
|
156
180
|
return
|
|
157
|
-
initTimeout = null
|
|
181
|
+
this.initTimeout = null
|
|
158
182
|
this.log("warning", "initialization timeout -- restarting service usage")
|
|
159
183
|
await this.close()
|
|
160
|
-
this.
|
|
161
|
-
|
|
184
|
+
if (!this.destroyed)
|
|
185
|
+
await this.open()
|
|
186
|
+
}, 3 * 1000)
|
|
162
187
|
}
|
|
163
188
|
const initTimeoutStop = () => {
|
|
164
189
|
if (initDone)
|
|
165
190
|
return
|
|
166
191
|
initDone = true
|
|
167
|
-
if (initTimeout !== null) {
|
|
168
|
-
clearTimeout(initTimeout)
|
|
169
|
-
initTimeout = null
|
|
192
|
+
if (this.initTimeout !== null) {
|
|
193
|
+
clearTimeout(this.initTimeout)
|
|
194
|
+
this.initTimeout = null
|
|
170
195
|
}
|
|
171
196
|
}
|
|
172
197
|
|
|
173
198
|
/* provide Duplex stream and internally attach to Deepgram API */
|
|
174
|
-
const
|
|
175
|
-
const log = (level: string, msg: string) => {
|
|
176
|
-
this.log(level, msg)
|
|
177
|
-
}
|
|
178
|
-
const encoding = this.config.textEncoding
|
|
199
|
+
const self = this
|
|
179
200
|
this.stream = new Stream.Duplex({
|
|
180
201
|
writableObjectMode: true,
|
|
181
202
|
readableObjectMode: true,
|
|
182
203
|
decodeStrings: false,
|
|
183
204
|
highWaterMark: 1,
|
|
184
205
|
write (chunk: SpeechFlowChunk, encoding, callback) {
|
|
206
|
+
if (self.destroyed || self.dg === null) {
|
|
207
|
+
callback(new Error("stream already destroyed"))
|
|
208
|
+
return
|
|
209
|
+
}
|
|
185
210
|
if (chunk.type !== "audio")
|
|
186
211
|
callback(new Error("expected audio input chunk"))
|
|
187
212
|
else if (!Buffer.isBuffer(chunk.payload))
|
|
188
213
|
callback(new Error("expected Buffer input chunk"))
|
|
189
214
|
else {
|
|
190
215
|
if (chunk.payload.byteLength > 0) {
|
|
191
|
-
log("debug", `send data (${chunk.payload.byteLength} bytes)`)
|
|
216
|
+
self.log("debug", `send data (${chunk.payload.byteLength} bytes)`)
|
|
192
217
|
initTimeoutStart()
|
|
193
218
|
if (chunk.meta.size > 0)
|
|
194
219
|
metastore.store(chunk.timestampStart, chunk.timestampEnd, chunk.meta)
|
|
195
|
-
|
|
220
|
+
try {
|
|
221
|
+
self.dg.send(chunk.payload.buffer) /* intentionally discard all time information */
|
|
222
|
+
}
|
|
223
|
+
catch (error) {
|
|
224
|
+
callback(error instanceof Error ? error : new Error("failed to send to Deepgram"))
|
|
225
|
+
return
|
|
226
|
+
}
|
|
196
227
|
}
|
|
197
228
|
callback()
|
|
198
229
|
}
|
|
199
230
|
},
|
|
200
231
|
read (size) {
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
232
|
+
if (self.destroyed || self.queue === null) {
|
|
233
|
+
this.push(null)
|
|
234
|
+
return
|
|
235
|
+
}
|
|
236
|
+
let readTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
|
|
237
|
+
if (readTimeout !== null) {
|
|
238
|
+
readTimeout = null
|
|
239
|
+
if (!self.destroyed) {
|
|
240
|
+
self.log("warning", "read timeout - pushing null to prevent hanging")
|
|
241
|
+
this.push(null)
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
}, 30 * 1000)
|
|
245
|
+
self.queue.read().then((chunk) => {
|
|
246
|
+
if (readTimeout !== null) {
|
|
247
|
+
clearTimeout(readTimeout)
|
|
248
|
+
readTimeout = null
|
|
249
|
+
}
|
|
250
|
+
if (self.destroyed) {
|
|
251
|
+
this.push(null)
|
|
252
|
+
return
|
|
253
|
+
}
|
|
254
|
+
if (chunk === null) {
|
|
255
|
+
self.log("info", "received EOF signal")
|
|
256
|
+
this.push(null)
|
|
257
|
+
}
|
|
258
|
+
else {
|
|
259
|
+
self.log("info", `received data (${chunk.payload.length} bytes)`)
|
|
260
|
+
initTimeoutStop()
|
|
261
|
+
this.push(chunk, self.config.textEncoding)
|
|
262
|
+
}
|
|
263
|
+
}).catch((error) => {
|
|
264
|
+
if (readTimeout !== null) {
|
|
265
|
+
clearTimeout(readTimeout)
|
|
266
|
+
readTimeout = null
|
|
267
|
+
}
|
|
268
|
+
if (!self.destroyed) {
|
|
269
|
+
self.log("error", `queue read error: ${error.message}`)
|
|
270
|
+
this.push(null)
|
|
271
|
+
}
|
|
205
272
|
})
|
|
206
273
|
},
|
|
207
274
|
final (callback) {
|
|
208
|
-
dg
|
|
209
|
-
|
|
275
|
+
if (self.destroyed || self.dg === null) {
|
|
276
|
+
callback()
|
|
277
|
+
return
|
|
278
|
+
}
|
|
279
|
+
try {
|
|
280
|
+
self.dg.requestClose()
|
|
281
|
+
}
|
|
282
|
+
catch (error) {
|
|
283
|
+
self.log("warning", `error closing Deepgram connection: ${error}`)
|
|
284
|
+
}
|
|
285
|
+
/* NOTICE: do not push null here -- let the Deepgram close event handle it */
|
|
210
286
|
callback()
|
|
211
287
|
}
|
|
212
288
|
})
|
|
@@ -214,14 +290,41 @@ export default class SpeechFlowNodeDeepgram extends SpeechFlowNode {
|
|
|
214
290
|
|
|
215
291
|
/* close node */
|
|
216
292
|
async close () {
|
|
293
|
+
/* indicate destruction first to stop all async operations */
|
|
294
|
+
this.destroyed = true
|
|
295
|
+
|
|
296
|
+
/* cleanup all timers */
|
|
297
|
+
if (this.initTimeout !== null) {
|
|
298
|
+
clearTimeout(this.initTimeout)
|
|
299
|
+
this.initTimeout = null
|
|
300
|
+
}
|
|
301
|
+
if (this.connectionTimeout !== null) {
|
|
302
|
+
clearTimeout(this.connectionTimeout)
|
|
303
|
+
this.connectionTimeout = null
|
|
304
|
+
}
|
|
305
|
+
|
|
217
306
|
/* close stream */
|
|
218
307
|
if (this.stream !== null) {
|
|
219
308
|
this.stream.destroy()
|
|
220
309
|
this.stream = null
|
|
221
310
|
}
|
|
222
311
|
|
|
223
|
-
/*
|
|
224
|
-
if (this.dg !== null)
|
|
225
|
-
|
|
312
|
+
/* close Deepgram connection and remove listeners */
|
|
313
|
+
if (this.dg !== null) {
|
|
314
|
+
try {
|
|
315
|
+
this.dg.removeAllListeners()
|
|
316
|
+
this.dg.requestClose()
|
|
317
|
+
}
|
|
318
|
+
catch (error) {
|
|
319
|
+
this.log("warning", `error during Deepgram cleanup: ${error}`)
|
|
320
|
+
}
|
|
321
|
+
this.dg = null
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
/* signal EOF to any pending read operations */
|
|
325
|
+
if (this.queue !== null) {
|
|
326
|
+
this.queue.write(null)
|
|
327
|
+
this.queue = null
|
|
328
|
+
}
|
|
226
329
|
}
|
|
227
330
|
}
|
|
@@ -23,6 +23,8 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
23
23
|
/* internal state */
|
|
24
24
|
private elevenlabs: ElevenLabs.ElevenLabsClient | null = null
|
|
25
25
|
private static speexInitialized = false
|
|
26
|
+
private destroyed = false
|
|
27
|
+
private resampler: SpeexResampler | null = null
|
|
26
28
|
|
|
27
29
|
/* construct node */
|
|
28
30
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -39,6 +41,10 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
39
41
|
optimize: { type: "string", val: "latency", pos: 5, match: /^(?:latency|quality)$/ }
|
|
40
42
|
})
|
|
41
43
|
|
|
44
|
+
/* sanity check parameters */
|
|
45
|
+
if (!this.params.key)
|
|
46
|
+
throw new Error("ElevenLabs API key not configured")
|
|
47
|
+
|
|
42
48
|
/* declare node input/output format */
|
|
43
49
|
this.input = "text"
|
|
44
50
|
this.output = "audio"
|
|
@@ -54,6 +60,9 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
54
60
|
|
|
55
61
|
/* open node */
|
|
56
62
|
async open () {
|
|
63
|
+
/* clear destruction flag */
|
|
64
|
+
this.destroyed = false
|
|
65
|
+
|
|
57
66
|
/* establish ElevenLabs API connection */
|
|
58
67
|
this.elevenlabs = new ElevenLabs.ElevenLabsClient({
|
|
59
68
|
apiKey: this.params.key
|
|
@@ -120,37 +129,74 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
120
129
|
await SpeexResampler.initPromise
|
|
121
130
|
SpeechFlowNodeElevenlabs.speexInitialized = true
|
|
122
131
|
}
|
|
123
|
-
|
|
132
|
+
this.resampler = new SpeexResampler(1, maxSampleRate, this.config.audioSampleRate, 7)
|
|
124
133
|
|
|
125
134
|
/* create transform stream and connect it to the ElevenLabs API */
|
|
126
|
-
const
|
|
135
|
+
const self = this
|
|
127
136
|
this.stream = new Stream.Transform({
|
|
128
137
|
writableObjectMode: true,
|
|
129
138
|
readableObjectMode: true,
|
|
130
139
|
decodeStrings: false,
|
|
131
140
|
highWaterMark: 1,
|
|
132
141
|
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
142
|
+
if (self.destroyed) {
|
|
143
|
+
callback(new Error("stream already destroyed"))
|
|
144
|
+
return
|
|
145
|
+
}
|
|
133
146
|
if (Buffer.isBuffer(chunk.payload))
|
|
134
147
|
callback(new Error("invalid chunk payload type"))
|
|
135
148
|
else {
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
149
|
+
(async () => {
|
|
150
|
+
let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
|
|
151
|
+
processTimeout = null
|
|
152
|
+
callback(new Error("ElevenLabs API timeout"))
|
|
153
|
+
}, 60 * 1000)
|
|
154
|
+
try {
|
|
155
|
+
const stream = await speechStream(chunk.payload as string)
|
|
156
|
+
if (self.destroyed) {
|
|
157
|
+
if (processTimeout !== null) {
|
|
158
|
+
clearTimeout(processTimeout)
|
|
159
|
+
processTimeout = null
|
|
160
|
+
}
|
|
161
|
+
callback(new Error("stream destroyed during processing"))
|
|
162
|
+
return
|
|
163
|
+
}
|
|
164
|
+
const buffer = await getStreamAsBuffer(stream)
|
|
165
|
+
if (self.destroyed) {
|
|
166
|
+
if (processTimeout !== null) {
|
|
167
|
+
clearTimeout(processTimeout)
|
|
168
|
+
processTimeout = null
|
|
169
|
+
}
|
|
170
|
+
callback(new Error("stream destroyed during processing"))
|
|
171
|
+
return
|
|
172
|
+
}
|
|
173
|
+
const bufferResampled = self.resampler!.processChunk(buffer)
|
|
174
|
+
self.log("info", `ElevenLabs: received audio (buffer length: ${buffer.byteLength})`)
|
|
140
175
|
const chunkNew = chunk.clone()
|
|
141
176
|
chunkNew.type = "audio"
|
|
142
177
|
chunkNew.payload = bufferResampled
|
|
178
|
+
if (processTimeout !== null) {
|
|
179
|
+
clearTimeout(processTimeout)
|
|
180
|
+
processTimeout = null
|
|
181
|
+
}
|
|
143
182
|
this.push(chunkNew)
|
|
144
183
|
callback()
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
184
|
+
}
|
|
185
|
+
catch (error) {
|
|
186
|
+
if (processTimeout !== null) {
|
|
187
|
+
clearTimeout(processTimeout)
|
|
188
|
+
processTimeout = null
|
|
189
|
+
}
|
|
190
|
+
callback(error instanceof Error ? error : new Error("ElevenLabs processing failed"))
|
|
191
|
+
}
|
|
192
|
+
})()
|
|
151
193
|
}
|
|
152
194
|
},
|
|
153
195
|
final (callback) {
|
|
196
|
+
if (self.destroyed) {
|
|
197
|
+
callback()
|
|
198
|
+
return
|
|
199
|
+
}
|
|
154
200
|
this.push(null)
|
|
155
201
|
callback()
|
|
156
202
|
}
|
|
@@ -159,12 +205,19 @@ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
|
|
|
159
205
|
|
|
160
206
|
/* close node */
|
|
161
207
|
async close () {
|
|
208
|
+
/* indicate destruction */
|
|
209
|
+
this.destroyed = true
|
|
210
|
+
|
|
162
211
|
/* destroy stream */
|
|
163
212
|
if (this.stream !== null) {
|
|
164
213
|
this.stream.destroy()
|
|
165
214
|
this.stream = null
|
|
166
215
|
}
|
|
167
216
|
|
|
217
|
+
/* destroy resampler */
|
|
218
|
+
if (this.resampler !== null)
|
|
219
|
+
this.resampler = null
|
|
220
|
+
|
|
168
221
|
/* destroy ElevenLabs API */
|
|
169
222
|
if (this.elevenlabs !== null)
|
|
170
223
|
this.elevenlabs = null
|
|
@@ -21,6 +21,7 @@ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
|
|
|
21
21
|
|
|
22
22
|
/* internal state */
|
|
23
23
|
private kokoro: KokoroTTS | null = null
|
|
24
|
+
private resampler: SpeexResampler | null = null
|
|
24
25
|
private static speexInitialized = false
|
|
25
26
|
|
|
26
27
|
/* construct node */
|
|
@@ -59,9 +60,11 @@ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
|
|
|
59
60
|
const interval = setInterval(() => {
|
|
60
61
|
for (const [ artifact, percent ] of progressState) {
|
|
61
62
|
this.log("info", `downloaded ${percent.toFixed(2)}% of artifact "${artifact}"`)
|
|
62
|
-
if (percent >=
|
|
63
|
+
if (percent >= 100.0)
|
|
63
64
|
progressState.delete(artifact)
|
|
64
65
|
}
|
|
66
|
+
if (progressState.size === 0)
|
|
67
|
+
clearInterval(interval)
|
|
65
68
|
}, 1000)
|
|
66
69
|
this.kokoro = await KokoroTTS.from_pretrained(model, {
|
|
67
70
|
dtype: "q4f16",
|
|
@@ -78,7 +81,7 @@ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
|
|
|
78
81
|
await SpeexResampler.initPromise
|
|
79
82
|
SpeechFlowNodeKokoro.speexInitialized = true
|
|
80
83
|
}
|
|
81
|
-
|
|
84
|
+
this.resampler = new SpeexResampler(1, 24000, this.config.audioSampleRate, 7)
|
|
82
85
|
|
|
83
86
|
/* determine voice for text-to-speech operation */
|
|
84
87
|
const voices = {
|
|
@@ -91,7 +94,7 @@ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
|
|
|
91
94
|
if (voice === undefined)
|
|
92
95
|
throw new Error(`invalid Kokoro voice "${this.params.voice}"`)
|
|
93
96
|
|
|
94
|
-
/* perform text-to-speech operation with
|
|
97
|
+
/* perform text-to-speech operation with Kokoro API */
|
|
95
98
|
const text2speech = async (text: string) => {
|
|
96
99
|
this.log("info", `Kokoro: input: "${text}"`)
|
|
97
100
|
const audio = await this.kokoro!.generate(text, {
|
|
@@ -110,7 +113,7 @@ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
|
|
|
110
113
|
}
|
|
111
114
|
|
|
112
115
|
/* resample audio samples from PCM/I16/24Khz to PCM/I16/48KHz */
|
|
113
|
-
const buffer2 = resampler
|
|
116
|
+
const buffer2 = this.resampler!.processChunk(buffer1)
|
|
114
117
|
|
|
115
118
|
return buffer2
|
|
116
119
|
}
|
|
@@ -153,6 +156,10 @@ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
|
|
|
153
156
|
this.stream = null
|
|
154
157
|
}
|
|
155
158
|
|
|
159
|
+
/* destroy resampler */
|
|
160
|
+
if (this.resampler !== null)
|
|
161
|
+
this.resampler = null
|
|
162
|
+
|
|
156
163
|
/* destroy Kokoro API */
|
|
157
164
|
if (this.kokoro !== null)
|
|
158
165
|
this.kokoro = null
|
|
@@ -27,12 +27,16 @@ export default class SpeechFlowNodeDeepL extends SpeechFlowNode {
|
|
|
27
27
|
|
|
28
28
|
/* declare node configuration parameters */
|
|
29
29
|
this.configure({
|
|
30
|
-
key: { type: "string", val: process.env.SPEECHFLOW_DEEPL_KEY },
|
|
30
|
+
key: { type: "string", val: process.env.SPEECHFLOW_DEEPL_KEY ?? "" },
|
|
31
31
|
src: { type: "string", pos: 0, val: "de", match: /^(?:de|en)$/ },
|
|
32
32
|
dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ },
|
|
33
33
|
optimize: { type: "string", pos: 2, val: "latency", match: /^(?:latency|quality)$/ }
|
|
34
34
|
})
|
|
35
35
|
|
|
36
|
+
/* validate API key */
|
|
37
|
+
if (this.params.key === "")
|
|
38
|
+
throw new Error("DeepL API key is required")
|
|
39
|
+
|
|
36
40
|
/* sanity check situation */
|
|
37
41
|
if (this.params.src === this.params.dst)
|
|
38
42
|
throw new Error("source and destination languages cannot be the same")
|
|
@@ -44,9 +48,10 @@ export default class SpeechFlowNodeDeepL extends SpeechFlowNode {
|
|
|
44
48
|
|
|
45
49
|
/* one-time status of node */
|
|
46
50
|
async status () {
|
|
47
|
-
|
|
48
|
-
const usage = await
|
|
49
|
-
const
|
|
51
|
+
const deepl = new DeepL.Translator(this.params.key)
|
|
52
|
+
const usage = await deepl.getUsage()
|
|
53
|
+
const limit = usage?.character?.limit ?? 1
|
|
54
|
+
const percent = limit > 0 ? ((usage?.character?.count ?? 0) / limit * 100) : 0
|
|
50
55
|
return { usage: `${percent.toFixed(8)}%` }
|
|
51
56
|
}
|
|
52
57
|
|
|
@@ -41,7 +41,7 @@ export default class SpeechFlowNodeFormat extends SpeechFlowNode {
|
|
|
41
41
|
return text
|
|
42
42
|
}
|
|
43
43
|
|
|
44
|
-
/* establish a duplex stream and connect it to
|
|
44
|
+
/* establish a duplex stream and connect it to text formatting */
|
|
45
45
|
this.stream = new Stream.Transform({
|
|
46
46
|
readableObjectMode: true,
|
|
47
47
|
writableObjectMode: true,
|
|
@@ -74,7 +74,7 @@ export default class SpeechFlowNodeFormat extends SpeechFlowNode {
|
|
|
74
74
|
})
|
|
75
75
|
}
|
|
76
76
|
|
|
77
|
-
/*
|
|
77
|
+
/* close node */
|
|
78
78
|
async close () {
|
|
79
79
|
/* close stream */
|
|
80
80
|
if (this.stream !== null) {
|
|
@@ -39,7 +39,7 @@ export default class SpeechFlowNodeOllama extends SpeechFlowNode {
|
|
|
39
39
|
"Do NOT give any preamble.\n" +
|
|
40
40
|
"Do NOT give any prolog.\n" +
|
|
41
41
|
"Do NOT give any epilog.\n" +
|
|
42
|
-
"Do NOT change the
|
|
42
|
+
"Do NOT change the grammar.\n" +
|
|
43
43
|
"Do NOT use synonyms for words.\n" +
|
|
44
44
|
"Keep all words.\n" +
|
|
45
45
|
"Fill in missing commas.\n" +
|
|
@@ -39,7 +39,7 @@ export default class SpeechFlowNodeOpenAI extends SpeechFlowNode {
|
|
|
39
39
|
"Do NOT give any preamble.\n" +
|
|
40
40
|
"Do NOT give any prolog.\n" +
|
|
41
41
|
"Do NOT give any epilog.\n" +
|
|
42
|
-
"Do NOT change the
|
|
42
|
+
"Do NOT change the grammar.\n" +
|
|
43
43
|
"Do NOT use synonyms for words.\n" +
|
|
44
44
|
"Keep all words.\n" +
|
|
45
45
|
"Fill in missing commas.\n" +
|