speechflow 1.6.7 → 1.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/README.md +77 -52
- package/etc/secretlint.json +7 -0
- package/etc/speechflow.yaml +13 -4
- package/etc/stx.conf +3 -2
- package/package.json +8 -6
- package/speechflow-cli/dst/speechflow-main-api.js +9 -8
- package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-graph.js +13 -14
- package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-status.js +38 -8
- package/speechflow-cli/dst/speechflow-main-status.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +3 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +4 -2
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js +4 -2
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +2 -2
- package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js +46 -17
- package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js +0 -5
- package/speechflow-cli/dst/speechflow-node-a2a-gain.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js +3 -4
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js +0 -5
- package/speechflow-cli/dst/speechflow-node-a2a-mute.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-pitch.js +1 -2
- package/speechflow-cli/dst/speechflow-node-a2a-pitch.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +0 -5
- package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js +0 -5
- package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js +8 -2
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.d.ts +0 -1
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +17 -19
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.d.ts +0 -1
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +30 -25
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js +79 -48
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +6 -11
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +45 -44
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.d.ts +2 -0
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +19 -7
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-amazon.js +1 -2
- package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-format.js +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-format.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-google.js +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-modify.js +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-modify.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +173 -29
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-filter.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js +10 -1
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js +0 -5
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-device.js +5 -5
- package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-file.js +4 -4
- package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +9 -3
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js +16 -5
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-audio.js +3 -3
- package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-error.d.ts +0 -1
- package/speechflow-cli/dst/speechflow-util-error.js +0 -7
- package/speechflow-cli/dst/speechflow-util-error.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-misc.d.ts +2 -0
- package/speechflow-cli/dst/speechflow-util-misc.js +26 -0
- package/speechflow-cli/dst/speechflow-util-misc.js.map +1 -0
- package/speechflow-cli/dst/speechflow-util-queue.d.ts +9 -2
- package/speechflow-cli/dst/speechflow-util-queue.js +36 -15
- package/speechflow-cli/dst/speechflow-util-queue.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-stream.d.ts +2 -2
- package/speechflow-cli/dst/speechflow-util-stream.js +17 -19
- package/speechflow-cli/dst/speechflow-util-stream.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-util.js +1 -0
- package/speechflow-cli/dst/speechflow-util.js.map +1 -1
- package/speechflow-cli/etc/oxlint.jsonc +6 -1
- package/speechflow-cli/etc/stx.conf +1 -0
- package/speechflow-cli/package.json +28 -27
- package/speechflow-cli/src/speechflow-main-api.ts +9 -11
- package/speechflow-cli/src/speechflow-main-graph.ts +15 -16
- package/speechflow-cli/src/speechflow-main-status.ts +6 -10
- package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +4 -0
- package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +4 -2
- package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2a-expander.ts +4 -2
- package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +4 -2
- package/speechflow-cli/src/speechflow-node-a2a-filler.ts +57 -20
- package/speechflow-cli/src/speechflow-node-a2a-gain.ts +0 -5
- package/speechflow-cli/src/speechflow-node-a2a-gender.ts +3 -4
- package/speechflow-cli/src/speechflow-node-a2a-mute.ts +0 -5
- package/speechflow-cli/src/speechflow-node-a2a-pitch.ts +1 -2
- package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +0 -5
- package/speechflow-cli/src/speechflow-node-a2a-speex.ts +0 -5
- package/speechflow-cli/src/speechflow-node-a2a-wav.ts +9 -3
- package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +27 -27
- package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +37 -28
- package/speechflow-cli/src/speechflow-node-a2t-openai.ts +92 -56
- package/speechflow-cli/src/speechflow-node-t2a-amazon.ts +7 -11
- package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +47 -43
- package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +22 -7
- package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +1 -2
- package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-format.ts +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-google.ts +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-modify.ts +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-openai.ts +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +205 -33
- package/speechflow-cli/src/speechflow-node-x2x-filter.ts +16 -4
- package/speechflow-cli/src/speechflow-node-x2x-trace.ts +3 -8
- package/speechflow-cli/src/speechflow-node-xio-device.ts +6 -9
- package/speechflow-cli/src/speechflow-node-xio-file.ts +4 -4
- package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +10 -4
- package/speechflow-cli/src/speechflow-node-xio-websocket.ts +16 -5
- package/speechflow-cli/src/speechflow-util-audio-wt.ts +4 -4
- package/speechflow-cli/src/speechflow-util-audio.ts +7 -7
- package/speechflow-cli/src/speechflow-util-error.ts +0 -7
- package/speechflow-cli/src/speechflow-util-misc.ts +23 -0
- package/speechflow-cli/src/speechflow-util-queue.ts +40 -20
- package/speechflow-cli/src/speechflow-util-stream.ts +29 -24
- package/speechflow-cli/src/speechflow-util.ts +1 -0
- package/speechflow-ui-db/dst/index.css +1 -5
- package/speechflow-ui-db/dst/index.js +14 -58
- package/speechflow-ui-db/etc/stx.conf +5 -16
- package/speechflow-ui-db/package.json +16 -15
- package/speechflow-ui-st/dst/index.css +1 -5
- package/speechflow-ui-st/dst/index.js +31 -160
- package/speechflow-ui-st/etc/stx.conf +5 -16
- package/speechflow-ui-st/package.json +17 -16
|
@@ -76,6 +76,10 @@ class CompressorProcessor extends AudioWorkletProcessor {
|
|
|
76
76
|
/* determine number of channels */
|
|
77
77
|
const nCh = input.length
|
|
78
78
|
|
|
79
|
+
/* reset envelope array if channel count changed */
|
|
80
|
+
if (nCh !== this.env.length)
|
|
81
|
+
this.env = []
|
|
82
|
+
|
|
79
83
|
/* initially just copy input to output (pass-through) */
|
|
80
84
|
for (let c = 0; c < output.length; c++) {
|
|
81
85
|
if (!output[c] || !input[c])
|
|
@@ -245,8 +245,10 @@ export default class SpeechFlowNodeA2ACompressor extends SpeechFlowNode {
|
|
|
245
245
|
/* compress chunk */
|
|
246
246
|
const payload = util.convertBufToI16(chunk.payload)
|
|
247
247
|
self.compressor?.process(payload).then((result) => {
|
|
248
|
-
if (self.closing)
|
|
249
|
-
|
|
248
|
+
if (self.closing) {
|
|
249
|
+
callback(new Error("stream already destroyed"))
|
|
250
|
+
return
|
|
251
|
+
}
|
|
250
252
|
if ((self.params.type === "standalone" && self.params.mode === "compress") ||
|
|
251
253
|
(self.params.type === "sidechain" && self.params.mode === "adjust") ) {
|
|
252
254
|
/* take over compressed data */
|
|
@@ -113,7 +113,7 @@ class ExpanderProcessor extends AudioWorkletProcessor {
|
|
|
113
113
|
const expectedOutLevelDB = levelDB + gainDB + makeupDB
|
|
114
114
|
if (expectedOutLevelDB < floorDB) {
|
|
115
115
|
const neededLiftDB = floorDB - expectedOutLevelDB
|
|
116
|
-
gainLin
|
|
116
|
+
gainLin *= util.dB2lin(neededLiftDB)
|
|
117
117
|
}
|
|
118
118
|
|
|
119
119
|
/* apply gain change to channel */
|
|
@@ -168,8 +168,10 @@ export default class SpeechFlowNodeA2AExpander extends SpeechFlowNode {
|
|
|
168
168
|
/* expand chunk */
|
|
169
169
|
const payload = util.convertBufToI16(chunk.payload)
|
|
170
170
|
self.expander?.process(payload).then((result) => {
|
|
171
|
-
if (self.closing)
|
|
172
|
-
|
|
171
|
+
if (self.closing) {
|
|
172
|
+
callback(new Error("stream already destroyed"))
|
|
173
|
+
return
|
|
174
|
+
}
|
|
173
175
|
|
|
174
176
|
/* take over expanded data */
|
|
175
177
|
const payload = util.convertI16ToBuf(result)
|
|
@@ -99,7 +99,7 @@ export default class SpeechFlowNodeA2AFFMPEG extends SpeechFlowNode {
|
|
|
99
99
|
})
|
|
100
100
|
|
|
101
101
|
/* wrap streams with conversions for chunk vs plain audio */
|
|
102
|
-
const wrapper1 = util.createTransformStreamForWritableSide()
|
|
102
|
+
const wrapper1 = util.createTransformStreamForWritableSide("audio", 1)
|
|
103
103
|
const wrapper2 = util.createTransformStreamForReadableSide("audio", () => this.timeZero)
|
|
104
104
|
this.stream = Stream.compose(wrapper1, ffmpegStream, wrapper2)
|
|
105
105
|
}
|
|
@@ -114,7 +114,9 @@ export default class SpeechFlowNodeA2AFFMPEG extends SpeechFlowNode {
|
|
|
114
114
|
|
|
115
115
|
/* shutdown FFmpeg */
|
|
116
116
|
if (this.ffmpeg !== null) {
|
|
117
|
-
util.run(
|
|
117
|
+
util.run("stopping FFmpeg process",
|
|
118
|
+
() => this.ffmpeg!.kill(),
|
|
119
|
+
() => {})
|
|
118
120
|
this.ffmpeg = null
|
|
119
121
|
}
|
|
120
122
|
}
|
|
@@ -15,6 +15,8 @@ import * as util from "./speechflow-util"
|
|
|
15
15
|
|
|
16
16
|
class AudioFiller extends EventEmitter {
|
|
17
17
|
private emittedEndSamples = 0 /* stream position in samples already emitted */
|
|
18
|
+
private maxInputEndSamples = 0
|
|
19
|
+
private lastMeta: Map<string, any> | undefined = undefined
|
|
18
20
|
private readonly bytesPerSample = 2 /* PCM I16 */
|
|
19
21
|
private readonly bytesPerFrame: number
|
|
20
22
|
private readonly sampleTolerance = 0.5 /* tolerance for floating-point sample comparisons */
|
|
@@ -25,12 +27,12 @@ class AudioFiller extends EventEmitter {
|
|
|
25
27
|
}
|
|
26
28
|
|
|
27
29
|
/* optional helper to allow subscribing with strong typing */
|
|
28
|
-
public on(event: "chunk", listener: (chunk: SpeechFlowChunk) => void): this
|
|
30
|
+
public on(event: "chunk", listener: (chunk: SpeechFlowChunk, type: string) => void): this
|
|
29
31
|
public on(event: string, listener: (...args: any[]) => void): this {
|
|
30
32
|
return super.on(event, listener)
|
|
31
33
|
}
|
|
32
34
|
|
|
33
|
-
/* convert fractional samples
|
|
35
|
+
/* convert fractional samples from duration */
|
|
34
36
|
private samplesFromDuration(duration: Duration): number {
|
|
35
37
|
const seconds = duration.as("seconds")
|
|
36
38
|
const samples = seconds * this.sampleRate
|
|
@@ -51,8 +53,9 @@ class AudioFiller extends EventEmitter {
|
|
|
51
53
|
const payload = Buffer.alloc(frames * this.bytesPerFrame) /* already zeroed */
|
|
52
54
|
const timestampStart = this.durationFromSamples(fromSamples)
|
|
53
55
|
const timestampEnd = this.durationFromSamples(toSamples)
|
|
54
|
-
const chunk = new SpeechFlowChunk(timestampStart, timestampEnd,
|
|
55
|
-
|
|
56
|
+
const chunk = new SpeechFlowChunk(timestampStart, timestampEnd,
|
|
57
|
+
"final", "audio", payload, meta ? new Map(meta) : undefined)
|
|
58
|
+
this.emit("chunk", chunk, "silence")
|
|
56
59
|
}
|
|
57
60
|
|
|
58
61
|
/* add a chunk of audio for processing */
|
|
@@ -62,6 +65,12 @@ class AudioFiller extends EventEmitter {
|
|
|
62
65
|
if (endSamp < startSamp)
|
|
63
66
|
throw new Error("invalid timestamps")
|
|
64
67
|
|
|
68
|
+
/* track maximum input end timestamp and last metadata for trailing silence */
|
|
69
|
+
if (endSamp > this.maxInputEndSamples) {
|
|
70
|
+
this.maxInputEndSamples = endSamp
|
|
71
|
+
this.lastMeta = chunk.meta ? new Map(chunk.meta) : undefined
|
|
72
|
+
}
|
|
73
|
+
|
|
65
74
|
/* if chunk starts beyond what we've emitted, insert silence for the gap */
|
|
66
75
|
if (startSamp > this.emittedEndSamples + this.sampleTolerance) {
|
|
67
76
|
this.emitSilence(this.emittedEndSamples, startSamp, chunk.meta)
|
|
@@ -95,12 +104,20 @@ class AudioFiller extends EventEmitter {
|
|
|
95
104
|
const outEndSamples = outStartSamples + Math.floor(payload.length / this.bytesPerFrame)
|
|
96
105
|
const timestampStart = this.durationFromSamples(outStartSamples)
|
|
97
106
|
const timestampEnd = this.durationFromSamples(outEndSamples)
|
|
98
|
-
const c = new SpeechFlowChunk(timestampStart, timestampEnd,
|
|
99
|
-
|
|
107
|
+
const c = new SpeechFlowChunk(timestampStart, timestampEnd,
|
|
108
|
+
"final", "audio", payload, new Map(chunk.meta))
|
|
109
|
+
this.emit("chunk", c, "content")
|
|
100
110
|
|
|
101
111
|
/* advance emitted cursor */
|
|
102
112
|
this.emittedEndSamples = Math.max(this.emittedEndSamples, outEndSamples)
|
|
103
113
|
}
|
|
114
|
+
|
|
115
|
+
/* signal end of processing and emit trailing silence */
|
|
116
|
+
public done (): void {
|
|
117
|
+
/* emit trailing silence if there's a gap between emitted and max input */
|
|
118
|
+
if (this.maxInputEndSamples > this.emittedEndSamples + this.sampleTolerance)
|
|
119
|
+
this.emitSilence(this.emittedEndSamples, this.maxInputEndSamples, this.lastMeta)
|
|
120
|
+
}
|
|
104
121
|
}
|
|
105
122
|
|
|
106
123
|
/* SpeechFlow node for filling audio gaps */
|
|
@@ -137,12 +154,13 @@ export default class SpeechFlowNodeA2AFiller extends SpeechFlowNode {
|
|
|
137
154
|
this.sendQueue = new util.AsyncQueue<SpeechFlowChunk | null>()
|
|
138
155
|
|
|
139
156
|
/* shift chunks from filler to send queue */
|
|
140
|
-
this.filler.on("chunk", (chunk) => {
|
|
157
|
+
this.filler.on("chunk", (chunk, type) => {
|
|
141
158
|
this.sendQueue?.write(chunk)
|
|
142
159
|
})
|
|
143
160
|
|
|
144
161
|
/* establish a duplex stream */
|
|
145
162
|
const self = this
|
|
163
|
+
const reads = new util.PromiseSet<void>()
|
|
146
164
|
this.stream = new Stream.Duplex({
|
|
147
165
|
readableObjectMode: true,
|
|
148
166
|
writableObjectMode: true,
|
|
@@ -154,8 +172,6 @@ export default class SpeechFlowNodeA2AFiller extends SpeechFlowNode {
|
|
|
154
172
|
callback(new Error("invalid chunk payload type"))
|
|
155
173
|
else {
|
|
156
174
|
try {
|
|
157
|
-
if (self.closing || self.filler === null)
|
|
158
|
-
throw new Error("stream already destroyed")
|
|
159
175
|
self.filler.add(chunk)
|
|
160
176
|
callback()
|
|
161
177
|
}
|
|
@@ -164,12 +180,37 @@ export default class SpeechFlowNodeA2AFiller extends SpeechFlowNode {
|
|
|
164
180
|
}
|
|
165
181
|
}
|
|
166
182
|
},
|
|
183
|
+
async final (callback) {
|
|
184
|
+
/* short-circuit processing in case of own closing */
|
|
185
|
+
if (self.closing) {
|
|
186
|
+
callback()
|
|
187
|
+
return
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/* signal end of stream */
|
|
191
|
+
if (self.filler !== null && self.sendQueue !== null) {
|
|
192
|
+
/* optionally emit trailing silence
|
|
193
|
+
(we have to wait for its internal "emit" operation to happen) */
|
|
194
|
+
self.filler.done()
|
|
195
|
+
await util.sleep(10)
|
|
196
|
+
|
|
197
|
+
/* signal end of stream */
|
|
198
|
+
self.sendQueue.write(null)
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/* await all read operations */
|
|
202
|
+
await reads.awaitAll()
|
|
203
|
+
|
|
204
|
+
/* signal end of streaming */
|
|
205
|
+
this.push(null)
|
|
206
|
+
callback()
|
|
207
|
+
},
|
|
167
208
|
read (size) {
|
|
168
209
|
if (self.closing || self.sendQueue === null) {
|
|
169
210
|
this.push(null)
|
|
170
211
|
return
|
|
171
212
|
}
|
|
172
|
-
self.sendQueue.read().then((chunk) => {
|
|
213
|
+
reads.add(self.sendQueue.read().then((chunk) => {
|
|
173
214
|
if (self.closing || self.sendQueue === null) {
|
|
174
215
|
this.push(null)
|
|
175
216
|
return
|
|
@@ -178,22 +219,18 @@ export default class SpeechFlowNodeA2AFiller extends SpeechFlowNode {
|
|
|
178
219
|
self.log("info", "received EOF signal")
|
|
179
220
|
this.push(null)
|
|
180
221
|
}
|
|
222
|
+
else if (!(chunk.payload instanceof Buffer)) {
|
|
223
|
+
self.log("warning", "invalid chunk (expected audio buffer)")
|
|
224
|
+
this.push(null)
|
|
225
|
+
}
|
|
181
226
|
else {
|
|
182
|
-
self.log("debug", `received data (${chunk.payload.
|
|
227
|
+
self.log("debug", `received data (${chunk.payload.byteLength} bytes)`)
|
|
183
228
|
this.push(chunk)
|
|
184
229
|
}
|
|
185
230
|
}).catch((error: unknown) => {
|
|
186
231
|
if (!self.closing && self.sendQueue !== null)
|
|
187
232
|
self.log("error", `queue read error: ${util.ensureError(error).message}`)
|
|
188
|
-
})
|
|
189
|
-
},
|
|
190
|
-
final (callback) {
|
|
191
|
-
if (self.closing) {
|
|
192
|
-
callback()
|
|
193
|
-
return
|
|
194
|
-
}
|
|
195
|
-
this.push(null)
|
|
196
|
-
callback()
|
|
233
|
+
}))
|
|
197
234
|
}
|
|
198
235
|
})
|
|
199
236
|
}
|
|
@@ -107,7 +107,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
|
|
|
107
107
|
})
|
|
108
108
|
this.classifier = await Promise.race([
|
|
109
109
|
pipelinePromise,
|
|
110
|
-
util.
|
|
110
|
+
util.timeout(30 * 1000, "model initialization timeout")
|
|
111
111
|
]) as Transformers.AudioClassificationPipeline
|
|
112
112
|
}
|
|
113
113
|
catch (error) {
|
|
@@ -149,7 +149,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
|
|
|
149
149
|
/* classify audio */
|
|
150
150
|
const result = await Promise.race([
|
|
151
151
|
this.classifier(data),
|
|
152
|
-
util.
|
|
152
|
+
util.timeout(30 * 1000, "classification timeout")
|
|
153
153
|
]) as Transformers.AudioClassificationOutput | Transformers.AudioClassificationOutput[]
|
|
154
154
|
const classified = Array.isArray(result) ?
|
|
155
155
|
result as Transformers.AudioClassificationOutput :
|
|
@@ -363,8 +363,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
|
|
|
363
363
|
if (this.classifier !== null) {
|
|
364
364
|
try {
|
|
365
365
|
const disposePromise = this.classifier.dispose()
|
|
366
|
-
|
|
367
|
-
await Promise.race([ disposePromise, timeoutPromise ])
|
|
366
|
+
await Promise.race([ disposePromise, util.sleep(5000) ])
|
|
368
367
|
}
|
|
369
368
|
catch (error) {
|
|
370
369
|
this.log("warning", `error during classifier cleanup: ${error}`)
|
|
@@ -185,8 +185,7 @@ export default class SpeechFlowNodeA2APitch extends SpeechFlowNode {
|
|
|
185
185
|
this.push(chunk)
|
|
186
186
|
callback()
|
|
187
187
|
}).catch((error: unknown) => {
|
|
188
|
-
|
|
189
|
-
callback(util.ensureError(error, "pitch shifting failed"))
|
|
188
|
+
callback(util.ensureError(error, "pitch shifting failed"))
|
|
190
189
|
})
|
|
191
190
|
}
|
|
192
191
|
},
|
|
@@ -141,10 +141,17 @@ export default class SpeechFlowNodeA2AWAV extends SpeechFlowNode {
|
|
|
141
141
|
callback(new Error("WAV header too short, expected at least 44 bytes"))
|
|
142
142
|
return
|
|
143
143
|
}
|
|
144
|
-
|
|
144
|
+
let header: ReturnType<typeof readWavHeader>
|
|
145
|
+
try {
|
|
146
|
+
header = readWavHeader(chunk.payload)
|
|
147
|
+
}
|
|
148
|
+
catch (error) {
|
|
149
|
+
callback(util.ensureError(error, "WAV header parsing failed"))
|
|
150
|
+
return
|
|
151
|
+
}
|
|
145
152
|
self.log("info", "WAV audio stream: " +
|
|
146
153
|
`audioFormat=${header.audioFormat === 0x0001 ? "PCM" :
|
|
147
|
-
"0x" +
|
|
154
|
+
"0x" + header.audioFormat.toString(16).padStart(4, "0")} ` +
|
|
148
155
|
`channels=${header.channels} ` +
|
|
149
156
|
`sampleRate=${header.sampleRate} ` +
|
|
150
157
|
`bitDepth=${header.bitDepth}`)
|
|
@@ -181,7 +188,6 @@ export default class SpeechFlowNodeA2AWAV extends SpeechFlowNode {
|
|
|
181
188
|
}
|
|
182
189
|
},
|
|
183
190
|
final (callback) {
|
|
184
|
-
this.push(null)
|
|
185
191
|
callback()
|
|
186
192
|
}
|
|
187
193
|
})
|
|
@@ -68,11 +68,10 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
68
68
|
public static name = "a2t-amazon"
|
|
69
69
|
|
|
70
70
|
/* internal state */
|
|
71
|
-
private client: TranscribeStreamingClient
|
|
72
|
-
private clientStream: AsyncIterable<TranscriptResultStream>
|
|
73
|
-
private closing
|
|
74
|
-
private
|
|
75
|
-
private connectionTimeout: ReturnType<typeof setTimeout> | null = null
|
|
71
|
+
private client: TranscribeStreamingClient | null = null
|
|
72
|
+
private clientStream: AsyncIterable<TranscriptResultStream> | null = null
|
|
73
|
+
private closing = false
|
|
74
|
+
private connectionTimeout: ReturnType<typeof setTimeout> | null = null
|
|
76
75
|
private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
|
|
77
76
|
|
|
78
77
|
/* construct node */
|
|
@@ -194,16 +193,17 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
194
193
|
this.queue?.write(chunk)
|
|
195
194
|
}
|
|
196
195
|
}
|
|
197
|
-
})().catch((err:
|
|
198
|
-
this.log("warning", `failed to establish connectivity to Amazon Transcribe: ${err}`)
|
|
196
|
+
})().catch((err: unknown) => {
|
|
197
|
+
this.log("warning", `failed to establish connectivity to Amazon Transcribe: ${util.ensureError(err).message}`)
|
|
199
198
|
})
|
|
200
199
|
}
|
|
201
200
|
|
|
202
201
|
/* remember opening time to receive time zero offset */
|
|
203
202
|
this.timeOpen = DateTime.now()
|
|
204
203
|
|
|
205
|
-
/* provide Duplex stream and internally attach to
|
|
204
|
+
/* provide Duplex stream and internally attach to Amazon Transcribe API */
|
|
206
205
|
const self = this
|
|
206
|
+
const reads = new util.PromiseSet<void>()
|
|
207
207
|
this.stream = new Stream.Duplex({
|
|
208
208
|
writableObjectMode: true,
|
|
209
209
|
readableObjectMode: true,
|
|
@@ -231,12 +231,29 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
231
231
|
callback()
|
|
232
232
|
}
|
|
233
233
|
},
|
|
234
|
+
async final (callback) {
|
|
235
|
+
if (self.closing || self.client === null) {
|
|
236
|
+
callback()
|
|
237
|
+
return
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
/* await all read operations */
|
|
241
|
+
await reads.awaitAll()
|
|
242
|
+
|
|
243
|
+
util.run(
|
|
244
|
+
() => self.client!.destroy(),
|
|
245
|
+
(error: Error) => self.log("warning", `error closing Amazon Transcribe connection: ${error}`)
|
|
246
|
+
)
|
|
247
|
+
audioQueue.push(null) /* do not push null to stream, let Amazon Transcribe do it */
|
|
248
|
+
audioQueue.destroy()
|
|
249
|
+
callback()
|
|
250
|
+
},
|
|
234
251
|
read (size) {
|
|
235
252
|
if (self.closing || self.queue === null) {
|
|
236
253
|
this.push(null)
|
|
237
254
|
return
|
|
238
255
|
}
|
|
239
|
-
self.queue.read().then((chunk) => {
|
|
256
|
+
reads.add(self.queue.read().then((chunk) => {
|
|
240
257
|
if (self.closing || self.queue === null) {
|
|
241
258
|
this.push(null)
|
|
242
259
|
return
|
|
@@ -252,20 +269,7 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
252
269
|
}).catch((error: unknown) => {
|
|
253
270
|
if (!self.closing && self.queue !== null)
|
|
254
271
|
self.log("error", `queue read error: ${util.ensureError(error).message}`)
|
|
255
|
-
})
|
|
256
|
-
},
|
|
257
|
-
final (callback) {
|
|
258
|
-
if (self.closing || self.client === null) {
|
|
259
|
-
callback()
|
|
260
|
-
return
|
|
261
|
-
}
|
|
262
|
-
util.run(
|
|
263
|
-
() => self.client!.destroy(),
|
|
264
|
-
(error: Error) => self.log("warning", `error closing Amazon Transcribe connection: ${error}`)
|
|
265
|
-
)
|
|
266
|
-
audioQueue.push(null) /* do not push null to stream, let Amazon Transcribe do it */
|
|
267
|
-
audioQueue.destroy()
|
|
268
|
-
callback()
|
|
272
|
+
}))
|
|
269
273
|
}
|
|
270
274
|
})
|
|
271
275
|
}
|
|
@@ -276,10 +280,6 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
|
|
|
276
280
|
this.closing = true
|
|
277
281
|
|
|
278
282
|
/* cleanup all timers */
|
|
279
|
-
if (this.initTimeout !== null) {
|
|
280
|
-
clearTimeout(this.initTimeout)
|
|
281
|
-
this.initTimeout = null
|
|
282
|
-
}
|
|
283
283
|
if (this.connectionTimeout !== null) {
|
|
284
284
|
clearTimeout(this.connectionTimeout)
|
|
285
285
|
this.connectionTimeout = null
|
|
@@ -21,10 +21,9 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
21
21
|
public static name = "a2t-deepgram"
|
|
22
22
|
|
|
23
23
|
/* internal state */
|
|
24
|
-
private dg: Deepgram.LiveClient
|
|
25
|
-
private closing
|
|
26
|
-
private
|
|
27
|
-
private connectionTimeout: ReturnType<typeof setTimeout> | null = null
|
|
24
|
+
private dg: Deepgram.LiveClient | null = null
|
|
25
|
+
private closing = false
|
|
26
|
+
private connectionTimeout: ReturnType<typeof setTimeout> | null = null
|
|
28
27
|
private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
|
|
29
28
|
|
|
30
29
|
/* construct node */
|
|
@@ -41,6 +40,10 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
41
40
|
interim: { type: "boolean", val: false, pos: 3 }
|
|
42
41
|
})
|
|
43
42
|
|
|
43
|
+
/* sanity check parameters */
|
|
44
|
+
if (!this.params.key)
|
|
45
|
+
throw new Error("Deepgram API key not configured")
|
|
46
|
+
|
|
44
47
|
/* declare node input/output format */
|
|
45
48
|
this.input = "audio"
|
|
46
49
|
this.output = "text"
|
|
@@ -126,7 +129,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
126
129
|
this.log("info", `text received (start: ${data.start}s, ` +
|
|
127
130
|
`duration: ${data.duration.toFixed(2)}s, ` +
|
|
128
131
|
`kind: ${isFinal ? "final" : "intermediate"}): ` +
|
|
129
|
-
|
|
132
|
+
`"${text}"`)
|
|
130
133
|
const start = Duration.fromMillis(data.start * 1000).plus(this.timeZeroOffset)
|
|
131
134
|
const end = start.plus({ seconds: data.duration })
|
|
132
135
|
const metas = metastore.fetch(start, end)
|
|
@@ -163,14 +166,16 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
163
166
|
this.log("error", `error: ${error.message}`)
|
|
164
167
|
if (!this.closing && this.queue !== null)
|
|
165
168
|
this.queue.write(null)
|
|
166
|
-
this.emit("error")
|
|
169
|
+
this.emit("error", error)
|
|
167
170
|
})
|
|
168
171
|
|
|
169
172
|
/* wait for Deepgram API to be available */
|
|
170
173
|
await new Promise((resolve, reject) => {
|
|
171
174
|
this.connectionTimeout = setTimeout(() => {
|
|
172
|
-
this.connectionTimeout
|
|
173
|
-
|
|
175
|
+
if (this.connectionTimeout !== null) {
|
|
176
|
+
this.connectionTimeout = null
|
|
177
|
+
reject(new Error("Deepgram: timeout waiting for connection open"))
|
|
178
|
+
}
|
|
174
179
|
}, 8000)
|
|
175
180
|
this.dg!.once(Deepgram.LiveTranscriptionEvents.Open, () => {
|
|
176
181
|
this.log("info", "connection open")
|
|
@@ -187,6 +192,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
187
192
|
|
|
188
193
|
/* provide Duplex stream and internally attach to Deepgram API */
|
|
189
194
|
const self = this
|
|
195
|
+
const reads = new util.PromiseSet<void>()
|
|
190
196
|
this.stream = new Stream.Duplex({
|
|
191
197
|
writableObjectMode: true,
|
|
192
198
|
readableObjectMode: true,
|
|
@@ -217,12 +223,33 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
217
223
|
callback()
|
|
218
224
|
}
|
|
219
225
|
},
|
|
226
|
+
async final (callback) {
|
|
227
|
+
/* short-circuiting in case of own closing */
|
|
228
|
+
if (self.closing || self.dg === null) {
|
|
229
|
+
callback()
|
|
230
|
+
return
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/* close Deepgram API */
|
|
234
|
+
try {
|
|
235
|
+
self.dg.requestClose()
|
|
236
|
+
}
|
|
237
|
+
catch (error) {
|
|
238
|
+
self.log("warning", `error closing Deepgram connection: ${error}`)
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/* await all read operations */
|
|
242
|
+
await reads.awaitAll()
|
|
243
|
+
|
|
244
|
+
/* NOTICE: do not push null here -- let the Deepgram close event handle it */
|
|
245
|
+
callback()
|
|
246
|
+
},
|
|
220
247
|
read (size) {
|
|
221
248
|
if (self.closing || self.queue === null) {
|
|
222
249
|
this.push(null)
|
|
223
250
|
return
|
|
224
251
|
}
|
|
225
|
-
self.queue.read().then((chunk) => {
|
|
252
|
+
reads.add(self.queue.read().then((chunk) => {
|
|
226
253
|
if (self.closing || self.queue === null) {
|
|
227
254
|
this.push(null)
|
|
228
255
|
return
|
|
@@ -238,21 +265,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
238
265
|
}).catch((error: unknown) => {
|
|
239
266
|
if (!self.closing && self.queue !== null)
|
|
240
267
|
self.log("error", `queue read error: ${util.ensureError(error).message}`)
|
|
241
|
-
})
|
|
242
|
-
},
|
|
243
|
-
final (callback) {
|
|
244
|
-
if (self.closing || self.dg === null) {
|
|
245
|
-
callback()
|
|
246
|
-
return
|
|
247
|
-
}
|
|
248
|
-
try {
|
|
249
|
-
self.dg.requestClose()
|
|
250
|
-
}
|
|
251
|
-
catch (error) {
|
|
252
|
-
self.log("warning", `error closing Deepgram connection: ${error}`)
|
|
253
|
-
}
|
|
254
|
-
/* NOTICE: do not push null here -- let the Deepgram close event handle it */
|
|
255
|
-
callback()
|
|
268
|
+
}))
|
|
256
269
|
}
|
|
257
270
|
})
|
|
258
271
|
}
|
|
@@ -263,10 +276,6 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
|
|
|
263
276
|
this.closing = true
|
|
264
277
|
|
|
265
278
|
/* cleanup all timers */
|
|
266
|
-
if (this.initTimeout !== null) {
|
|
267
|
-
clearTimeout(this.initTimeout)
|
|
268
|
-
this.initTimeout = null
|
|
269
|
-
}
|
|
270
279
|
if (this.connectionTimeout !== null) {
|
|
271
280
|
clearTimeout(this.connectionTimeout)
|
|
272
281
|
this.connectionTimeout = null
|