speechflow 2.0.0 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/README.md +4 -4
- package/package.json +4 -4
- package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-cli.js +1 -0
- package/speechflow-cli/dst/speechflow-main-cli.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-graph.js +2 -4
- package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-nodes.js +1 -0
- package/speechflow-cli/dst/speechflow-main-nodes.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +7 -9
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js +8 -9
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js +2 -0
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-meter.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-pitch.js +11 -9
- package/speechflow-cli/dst/speechflow-node-a2a-pitch.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js +1 -0
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js +4 -2
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js +19 -22
- package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js +7 -0
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.d.ts +0 -1
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +2 -11
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-google.d.ts +0 -1
- package/speechflow-cli/dst/speechflow-node-a2t-google.js +0 -6
- package/speechflow-cli/dst/speechflow-node-a2t-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js +6 -1
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +27 -7
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +5 -3
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-google.js +1 -4
- package/speechflow-cli/dst/speechflow-node-t2a-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +27 -6
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-openai.js +1 -4
- package/speechflow-cli/dst/speechflow-node-t2a-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.d.ts +2 -3
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js +97 -459
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-amazon.js +0 -2
- package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-opus.js +18 -16
- package/speechflow-cli/dst/speechflow-node-t2t-opus.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js +2 -3
- package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js +2 -3
- package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +5 -2
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-summary.js +2 -3
- package/speechflow-cli/dst/speechflow-node-t2t-summary.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-translate.js +1 -2
- package/speechflow-cli/dst/speechflow-node-t2t-translate.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js +2 -0
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-exec.js +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-exec.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-file.js +3 -5
- package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-vban.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.js +2 -0
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js +9 -9
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-audio.js +4 -0
- package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-llm.d.ts +0 -1
- package/speechflow-cli/dst/speechflow-util-llm.js +4 -8
- package/speechflow-cli/dst/speechflow-util-llm.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-queue.js +2 -1
- package/speechflow-cli/dst/speechflow-util-queue.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util.js +1 -0
- package/speechflow-cli/dst/speechflow-util.js.map +1 -1
- package/speechflow-cli/dst/test.d.ts +1 -0
- package/speechflow-cli/dst/test.js +18 -0
- package/speechflow-cli/dst/test.js.map +1 -0
- package/speechflow-cli/etc/oxlint.jsonc +3 -1
- package/speechflow-cli/package.json +16 -16
- package/speechflow-cli/src/speechflow-main-api.ts +16 -16
- package/speechflow-cli/src/speechflow-main-cli.ts +1 -0
- package/speechflow-cli/src/speechflow-main-graph.ts +7 -9
- package/speechflow-cli/src/speechflow-main-nodes.ts +1 -0
- package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +1 -0
- package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +8 -10
- package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +1 -0
- package/speechflow-cli/src/speechflow-node-a2a-expander.ts +9 -10
- package/speechflow-cli/src/speechflow-node-a2a-filler.ts +2 -0
- package/speechflow-cli/src/speechflow-node-a2a-gender.ts +3 -3
- package/speechflow-cli/src/speechflow-node-a2a-meter.ts +2 -2
- package/speechflow-cli/src/speechflow-node-a2a-pitch.ts +11 -9
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise-wt.ts +1 -0
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2a-speex.ts +5 -3
- package/speechflow-cli/src/speechflow-node-a2a-vad.ts +20 -23
- package/speechflow-cli/src/speechflow-node-a2a-wav.ts +7 -0
- package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +6 -18
- package/speechflow-cli/src/speechflow-node-a2t-google.ts +4 -11
- package/speechflow-cli/src/speechflow-node-a2t-openai.ts +12 -7
- package/speechflow-cli/src/speechflow-node-t2a-amazon.ts +32 -10
- package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +6 -4
- package/speechflow-cli/src/speechflow-node-t2a-google.ts +1 -4
- package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +33 -10
- package/speechflow-cli/src/speechflow-node-t2a-openai.ts +1 -4
- package/speechflow-cli/src/speechflow-node-t2a-supertonic.ts +106 -571
- package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +1 -3
- package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +2 -2
- package/speechflow-cli/src/speechflow-node-t2t-google.ts +1 -1
- package/speechflow-cli/src/speechflow-node-t2t-opus.ts +19 -18
- package/speechflow-cli/src/speechflow-node-t2t-punctuation.ts +2 -3
- package/speechflow-cli/src/speechflow-node-t2t-spellcheck.ts +2 -3
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +5 -2
- package/speechflow-cli/src/speechflow-node-t2t-summary.ts +2 -3
- package/speechflow-cli/src/speechflow-node-t2t-translate.ts +1 -2
- package/speechflow-cli/src/speechflow-node-x2x-filter.ts +2 -0
- package/speechflow-cli/src/speechflow-node-xio-exec.ts +1 -0
- package/speechflow-cli/src/speechflow-node-xio-file.ts +3 -5
- package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +2 -2
- package/speechflow-cli/src/speechflow-node-xio-vban.ts +5 -5
- package/speechflow-cli/src/speechflow-node-xio-webrtc.ts +2 -0
- package/speechflow-cli/src/speechflow-node-xio-websocket.ts +9 -9
- package/speechflow-cli/src/speechflow-util-audio.ts +5 -0
- package/speechflow-cli/src/speechflow-util-llm.ts +4 -9
- package/speechflow-cli/src/speechflow-util-queue.ts +4 -4
- package/speechflow-cli/src/speechflow-util.ts +1 -0
- package/speechflow-ui-db/dst/index.js +14 -14
- package/speechflow-ui-db/package.json +6 -6
- package/speechflow-ui-st/dst/index.js +32 -32
- package/speechflow-ui-st/package.json +6 -6
|
@@ -85,6 +85,18 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
85
85
|
}
|
|
86
86
|
}
|
|
87
87
|
|
|
88
|
+
/* helper function for tail timer handling */
|
|
89
|
+
const startTailTimer = () => {
|
|
90
|
+
tail = true
|
|
91
|
+
clearTailTimer()
|
|
92
|
+
this.tailTimer = setTimeout(() => {
|
|
93
|
+
if (this.closing || this.tailTimer === null)
|
|
94
|
+
return
|
|
95
|
+
tail = false
|
|
96
|
+
this.tailTimer = null
|
|
97
|
+
}, this.params.postSpeechTail)
|
|
98
|
+
}
|
|
99
|
+
|
|
88
100
|
/* establish Voice Activity Detection (VAD) facility */
|
|
89
101
|
let tail = false
|
|
90
102
|
try {
|
|
@@ -111,31 +123,15 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
111
123
|
return
|
|
112
124
|
const duration = util.audioArrayDuration(audio, vadSampleRateTarget)
|
|
113
125
|
this.log("info", `VAD: speech end (duration: ${duration.toFixed(2)}s)`)
|
|
114
|
-
if (this.params.mode === "unplugged")
|
|
115
|
-
|
|
116
|
-
clearTailTimer()
|
|
117
|
-
this.tailTimer = setTimeout(() => {
|
|
118
|
-
if (this.closing || this.tailTimer === null)
|
|
119
|
-
return
|
|
120
|
-
tail = false
|
|
121
|
-
this.tailTimer = null
|
|
122
|
-
}, this.params.postSpeechTail)
|
|
123
|
-
}
|
|
126
|
+
if (this.params.mode === "unplugged")
|
|
127
|
+
startTailTimer()
|
|
124
128
|
},
|
|
125
129
|
onVADMisfire: () => {
|
|
126
130
|
if (this.closing)
|
|
127
131
|
return
|
|
128
132
|
this.log("info", "VAD: speech end (segment too short)")
|
|
129
|
-
if (this.params.mode === "unplugged")
|
|
130
|
-
|
|
131
|
-
clearTailTimer()
|
|
132
|
-
this.tailTimer = setTimeout(() => {
|
|
133
|
-
if (this.closing || this.tailTimer === null)
|
|
134
|
-
return
|
|
135
|
-
tail = false
|
|
136
|
-
this.tailTimer = null
|
|
137
|
-
}, this.params.postSpeechTail)
|
|
138
|
-
}
|
|
133
|
+
if (this.params.mode === "unplugged")
|
|
134
|
+
startTailTimer()
|
|
139
135
|
},
|
|
140
136
|
onFrameProcessed: (audio) => {
|
|
141
137
|
if (this.closing)
|
|
@@ -144,7 +140,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
144
140
|
/* annotate the current audio segment */
|
|
145
141
|
const element = this.queueVAD.peek()
|
|
146
142
|
if (element === undefined || element.type !== "audio-frame")
|
|
147
|
-
throw new Error("internal error
|
|
143
|
+
throw new Error("internal error that cannot happen: no more queued element")
|
|
148
144
|
if (element.segmentIdx >= element.segmentData.length)
|
|
149
145
|
throw new Error("segment index out of bounds")
|
|
150
146
|
const segment = element.segmentData[element.segmentIdx++]
|
|
@@ -227,6 +223,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
227
223
|
}
|
|
228
224
|
}
|
|
229
225
|
|
|
226
|
+
/* signal completion */
|
|
230
227
|
callback()
|
|
231
228
|
}
|
|
232
229
|
catch (error) {
|
|
@@ -322,6 +319,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
322
319
|
}
|
|
323
320
|
}
|
|
324
321
|
|
|
322
|
+
/* peek at send queue element */
|
|
325
323
|
const element = self.queueSend.peek()
|
|
326
324
|
if (element !== undefined && element.type === "audio-eof")
|
|
327
325
|
this.push(null)
|
|
@@ -371,8 +369,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
|
|
|
371
369
|
if (this.vad !== null) {
|
|
372
370
|
try {
|
|
373
371
|
const flushPromise = this.vad.flush()
|
|
374
|
-
const timeoutPromise = new Promise((resolve) =>
|
|
375
|
-
setTimeout(resolve, 5000))
|
|
372
|
+
const timeoutPromise = new Promise((resolve) => { setTimeout(resolve, 5000) })
|
|
376
373
|
await Promise.race([ flushPromise, timeoutPromise ])
|
|
377
374
|
}
|
|
378
375
|
catch (error) {
|
|
@@ -21,15 +21,18 @@ const writeWavHeader = (
|
|
|
21
21
|
const sampleRate = options?.sampleRate ?? 44100 /* 44KHz */
|
|
22
22
|
const bitDepth = options?.bitDepth ?? 16 /* 16-Bit */
|
|
23
23
|
|
|
24
|
+
/* determine header dimensions */
|
|
24
25
|
const headerLength = 44
|
|
25
26
|
const maxDataSize = Math.pow(2, 32) - 100 /* safe maximum for 32-bit WAV files */
|
|
26
27
|
const dataLength = length ?? maxDataSize
|
|
27
28
|
const fileSize = dataLength + headerLength
|
|
28
29
|
const header = Buffer.alloc(headerLength)
|
|
29
30
|
|
|
31
|
+
/* calculate byte rate and block alignment */
|
|
30
32
|
const byteRate = (sampleRate * channels * bitDepth) / 8
|
|
31
33
|
const blockAlign = (channels * bitDepth) / 8
|
|
32
34
|
|
|
35
|
+
/* write header fields */
|
|
33
36
|
let offset = 0
|
|
34
37
|
header.write("RIFF", offset); offset += 4
|
|
35
38
|
header.writeUInt32LE(fileSize - 8, offset); offset += 4
|
|
@@ -45,6 +48,7 @@ const writeWavHeader = (
|
|
|
45
48
|
header.write("data", offset); offset += 4
|
|
46
49
|
header.writeUInt32LE(dataLength, offset); offset += 4
|
|
47
50
|
|
|
51
|
+
/* return completed header */
|
|
48
52
|
return header
|
|
49
53
|
}
|
|
50
54
|
|
|
@@ -53,6 +57,7 @@ const readWavHeader = (buffer: Buffer) => {
|
|
|
53
57
|
if (buffer.length < 44)
|
|
54
58
|
throw new Error("WAV header too short, expected at least 44 bytes")
|
|
55
59
|
|
|
60
|
+
/* read header fields */
|
|
56
61
|
let offset = 0
|
|
57
62
|
const riffHead = buffer.subarray(offset, offset + 4).toString(); offset += 4
|
|
58
63
|
const fileSize = buffer.readUInt32LE(offset); offset += 4
|
|
@@ -68,6 +73,7 @@ const readWavHeader = (buffer: Buffer) => {
|
|
|
68
73
|
const data = buffer.subarray(offset, offset + 4).toString(); offset += 4
|
|
69
74
|
const dataLength = buffer.readUInt32LE(offset); offset += 4
|
|
70
75
|
|
|
76
|
+
/* validate RIFF header */
|
|
71
77
|
if (riffHead !== "RIFF")
|
|
72
78
|
throw new Error(`Invalid WAV file: expected RIFF header, got "${riffHead}"`)
|
|
73
79
|
if (waveHead !== "WAVE")
|
|
@@ -77,6 +83,7 @@ const readWavHeader = (buffer: Buffer) => {
|
|
|
77
83
|
if (data !== "data")
|
|
78
84
|
throw new Error(`Invalid WAV file: expected "data" header, got "${data}"`)
|
|
79
85
|
|
|
86
|
+
/* return parsed header data */
|
|
80
87
|
return {
|
|
81
88
|
riffHead, fileSize, waveHead, fmtHead, formatLength, audioFormat,
|
|
82
89
|
channels, sampleRate, byteRate, blockAlign, bitDepth, data, dataLength
|
|
@@ -53,7 +53,7 @@ class AsyncQueue<T> {
|
|
|
53
53
|
continue
|
|
54
54
|
}
|
|
55
55
|
else {
|
|
56
|
-
const it = await new Promise<IteratorResult<T>>((resolve) => this.resolvers.push(resolve))
|
|
56
|
+
const it = await new Promise<IteratorResult<T>>((resolve) => { this.resolvers.push(resolve) })
|
|
57
57
|
if (it.done)
|
|
58
58
|
return
|
|
59
59
|
yield it.value
|
|
@@ -68,11 +68,10 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
68
68
|
public static name = "a2t-amazon"
|
|
69
69
|
|
|
70
70
|
/* internal state */
|
|
71
|
-
private client:
|
|
72
|
-
private clientStream:
|
|
73
|
-
private closing
|
|
74
|
-
private
|
|
75
|
-
private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
|
|
71
|
+
private client: TranscribeStreamingClient | null = null
|
|
72
|
+
private clientStream: AsyncIterable<TranscriptResultStream> | null = null
|
|
73
|
+
private closing = false
|
|
74
|
+
private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
|
|
76
75
|
|
|
77
76
|
/* construct node */
|
|
78
77
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -126,8 +125,6 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
126
125
|
secretAccessKey: this.params.secKey
|
|
127
126
|
}
|
|
128
127
|
})
|
|
129
|
-
if (this.client === null)
|
|
130
|
-
throw new Error("failed to establish Amazon Transcribe client")
|
|
131
128
|
|
|
132
129
|
/* create an AudioStream for Amazon Transcribe */
|
|
133
130
|
const audioQueue = new AsyncQueue<Uint8Array>()
|
|
@@ -236,11 +233,8 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
236
233
|
callback()
|
|
237
234
|
return
|
|
238
235
|
}
|
|
239
|
-
|
|
240
|
-
/* await all read operations */
|
|
241
236
|
await reads.awaitAll()
|
|
242
|
-
|
|
243
|
-
util.run(
|
|
237
|
+
util.run("closing Amazon Transcribe connection",
|
|
244
238
|
() => self.client!.destroy(),
|
|
245
239
|
(error: Error) => self.log("warning", `error closing Amazon Transcribe connection: ${error}`)
|
|
246
240
|
)
|
|
@@ -279,12 +273,6 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
279
273
|
/* indicate closing first to stop all async operations */
|
|
280
274
|
this.closing = true
|
|
281
275
|
|
|
282
|
-
/* cleanup all timers */
|
|
283
|
-
if (this.connectionTimeout !== null) {
|
|
284
|
-
clearTimeout(this.connectionTimeout)
|
|
285
|
-
this.connectionTimeout = null
|
|
286
|
-
}
|
|
287
|
-
|
|
288
276
|
/* close queue */
|
|
289
277
|
if (this.queue !== null) {
|
|
290
278
|
this.queue.write(null)
|
|
@@ -22,11 +22,10 @@ export default class SpeechFlowNodeA2TGoogle extends SpeechFlowNode {
|
|
|
22
22
|
public static name = "a2t-google"
|
|
23
23
|
|
|
24
24
|
/* internal state */
|
|
25
|
-
private client:
|
|
26
|
-
private recognizeStream:
|
|
27
|
-
private
|
|
28
|
-
private
|
|
29
|
-
private closing = false
|
|
25
|
+
private client: GoogleSpeech.SpeechClient | null = null
|
|
26
|
+
private recognizeStream: ReturnType<GoogleSpeech.SpeechClient["streamingRecognize"]> | null = null
|
|
27
|
+
private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
|
|
28
|
+
private closing = false
|
|
30
29
|
|
|
31
30
|
/* construct node */
|
|
32
31
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -280,12 +279,6 @@ export default class SpeechFlowNodeA2TGoogle extends SpeechFlowNode {
|
|
|
280
279
|
/* indicate closing first to stop all async operations */
|
|
281
280
|
this.closing = true
|
|
282
281
|
|
|
283
|
-
/* cleanup all timers */
|
|
284
|
-
if (this.connectionTimeout !== null) {
|
|
285
|
-
clearTimeout(this.connectionTimeout)
|
|
286
|
-
this.connectionTimeout = null
|
|
287
|
-
}
|
|
288
|
-
|
|
289
282
|
/* shutdown stream */
|
|
290
283
|
if (this.stream !== null) {
|
|
291
284
|
await util.destroyStream(this.stream)
|
|
@@ -23,12 +23,12 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
23
23
|
public static name = "a2t-openai"
|
|
24
24
|
|
|
25
25
|
/* internal state */
|
|
26
|
-
private openai: OpenAI
|
|
27
|
-
private ws: ws.WebSocket
|
|
28
|
-
private queue: util.SingleQueue<SpeechFlowChunk | null>
|
|
29
|
-
private resampler: SpeexResampler
|
|
30
|
-
private closing
|
|
31
|
-
private connectionTimeout: ReturnType<typeof setTimeout>
|
|
26
|
+
private openai: OpenAI | null = null
|
|
27
|
+
private ws: ws.WebSocket | null = null
|
|
28
|
+
private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
|
|
29
|
+
private resampler: SpeexResampler | null = null
|
|
30
|
+
private closing = false
|
|
31
|
+
private connectionTimeout: ReturnType<typeof setTimeout> | null = null
|
|
32
32
|
|
|
33
33
|
/* construct node */
|
|
34
34
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -150,6 +150,9 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
150
150
|
})
|
|
151
151
|
this.ws.on("error", (err) => {
|
|
152
152
|
this.log("error", `WebSocket connection error: ${err}`)
|
|
153
|
+
if (!this.closing && this.queue !== null)
|
|
154
|
+
this.queue.write(null)
|
|
155
|
+
this.emit("error", err)
|
|
153
156
|
})
|
|
154
157
|
|
|
155
158
|
/* track speech timing by item_id (OpenAI provides timestamps via VAD events) */
|
|
@@ -164,6 +167,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
164
167
|
}, new Map<string, any>())
|
|
165
168
|
}
|
|
166
169
|
|
|
170
|
+
/* track transcription text */
|
|
167
171
|
let text = ""
|
|
168
172
|
this.ws.on("message", (data) => {
|
|
169
173
|
let ev: any
|
|
@@ -353,7 +357,8 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
|
|
|
353
357
|
this.ws.close()
|
|
354
358
|
this.ws = null
|
|
355
359
|
}
|
|
356
|
-
this.openai
|
|
360
|
+
if (this.openai !== null)
|
|
361
|
+
this.openai = null
|
|
357
362
|
|
|
358
363
|
/* close resampler */
|
|
359
364
|
this.resampler = null
|
|
@@ -9,6 +9,7 @@ import Stream from "node:stream"
|
|
|
9
9
|
|
|
10
10
|
/* external dependencies */
|
|
11
11
|
import { getStreamAsBuffer } from "get-stream"
|
|
12
|
+
import { Duration } from "luxon"
|
|
12
13
|
import SpeexResampler from "speex-resampler"
|
|
13
14
|
import {
|
|
14
15
|
PollyClient, SynthesizeSpeechCommand,
|
|
@@ -25,9 +26,9 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
|
|
|
25
26
|
public static name = "t2a-amazon"
|
|
26
27
|
|
|
27
28
|
/* internal state */
|
|
28
|
-
private client:
|
|
29
|
-
private closing = false
|
|
29
|
+
private client: PollyClient | null = null
|
|
30
30
|
private resampler: SpeexResampler | null = null
|
|
31
|
+
private closing = false
|
|
31
32
|
|
|
32
33
|
/* construct node */
|
|
33
34
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -129,22 +130,43 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
|
|
|
129
130
|
}
|
|
130
131
|
if (Buffer.isBuffer(chunk.payload))
|
|
131
132
|
callback(new Error("invalid chunk payload type"))
|
|
132
|
-
else if (chunk.payload
|
|
133
|
+
else if (chunk.payload === "")
|
|
134
|
+
callback()
|
|
135
|
+
else {
|
|
136
|
+
let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
|
|
137
|
+
processTimeout = null
|
|
138
|
+
callback(new Error("AWS Polly API timeout"))
|
|
139
|
+
}, 60 * 1000)
|
|
140
|
+
const clearProcessTimeout = () => {
|
|
141
|
+
if (processTimeout !== null) {
|
|
142
|
+
clearTimeout(processTimeout)
|
|
143
|
+
processTimeout = null
|
|
144
|
+
}
|
|
145
|
+
}
|
|
133
146
|
self.log("debug", `send data (${chunk.payload.length} bytes): "${chunk.payload}"`)
|
|
134
147
|
textToSpeech(chunk.payload as string).then((buffer) => {
|
|
135
|
-
if (self.closing)
|
|
136
|
-
|
|
148
|
+
if (self.closing) {
|
|
149
|
+
clearProcessTimeout()
|
|
150
|
+
callback(new Error("stream destroyed during processing"))
|
|
151
|
+
return
|
|
152
|
+
}
|
|
153
|
+
/* calculate actual audio duration from PCM buffer size */
|
|
154
|
+
const durationMs = util.audioBufferDuration(buffer,
|
|
155
|
+
self.config.audioSampleRate, self.config.audioBitDepth) * 1000
|
|
156
|
+
|
|
157
|
+
/* create new chunk with recalculated timestamps */
|
|
137
158
|
const chunkNew = chunk.clone()
|
|
138
|
-
chunkNew.type
|
|
139
|
-
chunkNew.payload
|
|
159
|
+
chunkNew.type = "audio"
|
|
160
|
+
chunkNew.payload = buffer
|
|
161
|
+
chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
|
|
162
|
+
clearProcessTimeout()
|
|
140
163
|
this.push(chunkNew)
|
|
141
164
|
callback()
|
|
142
165
|
}).catch((error: unknown) => {
|
|
143
|
-
|
|
166
|
+
clearProcessTimeout()
|
|
167
|
+
callback(util.ensureError(error, "AWS Polly processing failed"))
|
|
144
168
|
})
|
|
145
169
|
}
|
|
146
|
-
else
|
|
147
|
-
callback()
|
|
148
170
|
},
|
|
149
171
|
final (callback) {
|
|
150
172
|
callback()
|
|
@@ -24,8 +24,8 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
|
|
|
24
24
|
|
|
25
25
|
/* internal state */
|
|
26
26
|
private elevenlabs: ElevenLabs.ElevenLabsClient | null = null
|
|
27
|
-
private
|
|
28
|
-
private
|
|
27
|
+
private resampler: SpeexResampler | null = null
|
|
28
|
+
private closing = false
|
|
29
29
|
|
|
30
30
|
/* construct node */
|
|
31
31
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -131,8 +131,8 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
|
|
|
131
131
|
})
|
|
132
132
|
}
|
|
133
133
|
|
|
134
|
-
/* establish resampler from ElevenLabs's
|
|
135
|
-
output to our standard audio sample rate (48KHz) */
|
|
134
|
+
/* establish resampler from ElevenLabs's tier-dependent
|
|
135
|
+
output sample rate to our standard audio sample rate (48KHz) */
|
|
136
136
|
this.resampler = new SpeexResampler(1, maxSampleRate, this.config.audioSampleRate, 7)
|
|
137
137
|
|
|
138
138
|
/* create transform stream and connect it to the ElevenLabs API */
|
|
@@ -147,6 +147,8 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
|
|
|
147
147
|
callback(new Error("stream already destroyed"))
|
|
148
148
|
else if (Buffer.isBuffer(chunk.payload))
|
|
149
149
|
callback(new Error("invalid chunk payload type"))
|
|
150
|
+
else if (chunk.payload === "")
|
|
151
|
+
callback()
|
|
150
152
|
else {
|
|
151
153
|
let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
|
|
152
154
|
processTimeout = null
|
|
@@ -126,11 +126,8 @@ export default class SpeechFlowNodeT2AGoogle extends SpeechFlowNode {
|
|
|
126
126
|
callback(new Error("stream already destroyed"))
|
|
127
127
|
else if (Buffer.isBuffer(chunk.payload))
|
|
128
128
|
callback(new Error("invalid chunk payload type"))
|
|
129
|
-
else if (chunk.payload === "")
|
|
130
|
-
/* pass through empty chunks */
|
|
131
|
-
this.push(chunk)
|
|
129
|
+
else if (chunk.payload === "")
|
|
132
130
|
callback()
|
|
133
|
-
}
|
|
134
131
|
else {
|
|
135
132
|
let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
|
|
136
133
|
processTimeout = null
|
|
@@ -9,6 +9,7 @@ import Stream from "node:stream"
|
|
|
9
9
|
|
|
10
10
|
/* external dependencies */
|
|
11
11
|
import { KokoroTTS } from "kokoro-js"
|
|
12
|
+
import { Duration } from "luxon"
|
|
12
13
|
import SpeexResampler from "speex-resampler"
|
|
13
14
|
|
|
14
15
|
/* internal dependencies */
|
|
@@ -21,9 +22,9 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
|
|
|
21
22
|
public static name = "t2a-kokoro"
|
|
22
23
|
|
|
23
24
|
/* internal state */
|
|
24
|
-
private kokoro:
|
|
25
|
-
private closing = false
|
|
25
|
+
private kokoro: KokoroTTS | null = null
|
|
26
26
|
private resampler: SpeexResampler | null = null
|
|
27
|
+
private closing = false
|
|
27
28
|
|
|
28
29
|
/* construct node */
|
|
29
30
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -122,9 +123,7 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
|
|
|
122
123
|
}
|
|
123
124
|
|
|
124
125
|
/* resample audio samples from PCM/I16/24Khz to PCM/I16/48KHz */
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
return buffer2
|
|
126
|
+
return this.resampler!.processChunk(buffer1)
|
|
128
127
|
}
|
|
129
128
|
|
|
130
129
|
/* create transform stream and connect it to the Kokoro API */
|
|
@@ -139,18 +138,42 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
|
|
|
139
138
|
callback(new Error("stream already destroyed"))
|
|
140
139
|
else if (Buffer.isBuffer(chunk.payload))
|
|
141
140
|
callback(new Error("invalid chunk payload type"))
|
|
141
|
+
else if (chunk.payload === "")
|
|
142
|
+
callback()
|
|
142
143
|
else {
|
|
144
|
+
let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
|
|
145
|
+
processTimeout = null
|
|
146
|
+
callback(new Error("Kokoro TTS timeout"))
|
|
147
|
+
}, 60 * 1000)
|
|
148
|
+
const clearProcessTimeout = () => {
|
|
149
|
+
if (processTimeout !== null) {
|
|
150
|
+
clearTimeout(processTimeout)
|
|
151
|
+
processTimeout = null
|
|
152
|
+
}
|
|
153
|
+
}
|
|
143
154
|
text2speech(chunk.payload).then((buffer) => {
|
|
144
|
-
if (self.closing)
|
|
145
|
-
|
|
155
|
+
if (self.closing) {
|
|
156
|
+
clearProcessTimeout()
|
|
157
|
+
callback(new Error("stream destroyed during processing"))
|
|
158
|
+
return
|
|
159
|
+
}
|
|
146
160
|
self.log("info", `Kokoro: received audio (buffer length: ${buffer.byteLength})`)
|
|
161
|
+
|
|
162
|
+
/* calculate actual audio duration from PCM buffer size */
|
|
163
|
+
const durationMs = util.audioBufferDuration(buffer,
|
|
164
|
+
self.config.audioSampleRate, self.config.audioBitDepth) * 1000
|
|
165
|
+
|
|
166
|
+
/* create new chunk with recalculated timestamps */
|
|
147
167
|
const chunkNew = chunk.clone()
|
|
148
|
-
chunkNew.type
|
|
149
|
-
chunkNew.payload
|
|
168
|
+
chunkNew.type = "audio"
|
|
169
|
+
chunkNew.payload = buffer
|
|
170
|
+
chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
|
|
171
|
+
clearProcessTimeout()
|
|
150
172
|
this.push(chunkNew)
|
|
151
173
|
callback()
|
|
152
174
|
}).catch((error: unknown) => {
|
|
153
|
-
|
|
175
|
+
clearProcessTimeout()
|
|
176
|
+
callback(util.ensureError(error, "Kokoro processing failed"))
|
|
154
177
|
})
|
|
155
178
|
}
|
|
156
179
|
},
|
|
@@ -103,11 +103,8 @@ export default class SpeechFlowNodeT2AOpenAI extends SpeechFlowNode {
|
|
|
103
103
|
callback(new Error("stream already destroyed"))
|
|
104
104
|
else if (Buffer.isBuffer(chunk.payload))
|
|
105
105
|
callback(new Error("invalid chunk payload type"))
|
|
106
|
-
else if (chunk.payload === "")
|
|
107
|
-
/* pass through empty chunks */
|
|
108
|
-
this.push(chunk)
|
|
106
|
+
else if (chunk.payload === "")
|
|
109
107
|
callback()
|
|
110
|
-
}
|
|
111
108
|
else {
|
|
112
109
|
let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
|
|
113
110
|
processTimeout = null
|