speechflow 1.6.7 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. package/CHANGELOG.md +15 -0
  2. package/README.md +77 -52
  3. package/etc/secretlint.json +7 -0
  4. package/etc/speechflow.yaml +13 -4
  5. package/etc/stx.conf +3 -2
  6. package/package.json +8 -6
  7. package/speechflow-cli/dst/speechflow-main-api.js +9 -8
  8. package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
  9. package/speechflow-cli/dst/speechflow-main-graph.js +13 -14
  10. package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
  11. package/speechflow-cli/dst/speechflow-main-status.js +38 -8
  12. package/speechflow-cli/dst/speechflow-main-status.js.map +1 -1
  13. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +3 -0
  14. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -1
  15. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +4 -2
  16. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
  17. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +1 -1
  18. package/speechflow-cli/dst/speechflow-node-a2a-expander.js +4 -2
  19. package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -1
  20. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +2 -2
  21. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
  22. package/speechflow-cli/dst/speechflow-node-a2a-filler.js +46 -17
  23. package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
  24. package/speechflow-cli/dst/speechflow-node-a2a-gain.js +0 -5
  25. package/speechflow-cli/dst/speechflow-node-a2a-gain.js.map +1 -1
  26. package/speechflow-cli/dst/speechflow-node-a2a-gender.js +3 -4
  27. package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
  28. package/speechflow-cli/dst/speechflow-node-a2a-mute.js +0 -5
  29. package/speechflow-cli/dst/speechflow-node-a2a-mute.js.map +1 -1
  30. package/speechflow-cli/dst/speechflow-node-a2a-pitch.js +1 -2
  31. package/speechflow-cli/dst/speechflow-node-a2a-pitch.js.map +1 -1
  32. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +0 -5
  33. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
  34. package/speechflow-cli/dst/speechflow-node-a2a-speex.js +0 -5
  35. package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -1
  36. package/speechflow-cli/dst/speechflow-node-a2a-wav.js +8 -2
  37. package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
  38. package/speechflow-cli/dst/speechflow-node-a2t-amazon.d.ts +0 -1
  39. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +17 -19
  40. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
  41. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.d.ts +0 -1
  42. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +30 -25
  43. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
  44. package/speechflow-cli/dst/speechflow-node-a2t-openai.js +79 -48
  45. package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
  46. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +6 -11
  47. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -1
  48. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +45 -44
  49. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
  50. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.d.ts +2 -0
  51. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +19 -7
  52. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
  53. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js +1 -2
  54. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
  55. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +0 -1
  56. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
  57. package/speechflow-cli/dst/speechflow-node-t2t-format.js +0 -1
  58. package/speechflow-cli/dst/speechflow-node-t2t-format.js.map +1 -1
  59. package/speechflow-cli/dst/speechflow-node-t2t-google.js +0 -1
  60. package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
  61. package/speechflow-cli/dst/speechflow-node-t2t-modify.js +0 -1
  62. package/speechflow-cli/dst/speechflow-node-t2t-modify.js.map +1 -1
  63. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +0 -1
  64. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +1 -1
  65. package/speechflow-cli/dst/speechflow-node-t2t-openai.js +0 -1
  66. package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +1 -1
  67. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +173 -29
  68. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  69. package/speechflow-cli/dst/speechflow-node-x2x-filter.d.ts +1 -0
  70. package/speechflow-cli/dst/speechflow-node-x2x-filter.js +10 -1
  71. package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
  72. package/speechflow-cli/dst/speechflow-node-x2x-trace.js +0 -5
  73. package/speechflow-cli/dst/speechflow-node-x2x-trace.js.map +1 -1
  74. package/speechflow-cli/dst/speechflow-node-xio-device.js +5 -5
  75. package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
  76. package/speechflow-cli/dst/speechflow-node-xio-file.js +4 -4
  77. package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
  78. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +9 -3
  79. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
  80. package/speechflow-cli/dst/speechflow-node-xio-websocket.js +16 -5
  81. package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
  82. package/speechflow-cli/dst/speechflow-util-audio.js +3 -3
  83. package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
  84. package/speechflow-cli/dst/speechflow-util-error.d.ts +0 -1
  85. package/speechflow-cli/dst/speechflow-util-error.js +0 -7
  86. package/speechflow-cli/dst/speechflow-util-error.js.map +1 -1
  87. package/speechflow-cli/dst/speechflow-util-misc.d.ts +2 -0
  88. package/speechflow-cli/dst/speechflow-util-misc.js +26 -0
  89. package/speechflow-cli/dst/speechflow-util-misc.js.map +1 -0
  90. package/speechflow-cli/dst/speechflow-util-queue.d.ts +9 -2
  91. package/speechflow-cli/dst/speechflow-util-queue.js +36 -15
  92. package/speechflow-cli/dst/speechflow-util-queue.js.map +1 -1
  93. package/speechflow-cli/dst/speechflow-util-stream.d.ts +2 -2
  94. package/speechflow-cli/dst/speechflow-util-stream.js +17 -19
  95. package/speechflow-cli/dst/speechflow-util-stream.js.map +1 -1
  96. package/speechflow-cli/dst/speechflow-util.d.ts +1 -0
  97. package/speechflow-cli/dst/speechflow-util.js +1 -0
  98. package/speechflow-cli/dst/speechflow-util.js.map +1 -1
  99. package/speechflow-cli/etc/oxlint.jsonc +6 -1
  100. package/speechflow-cli/etc/stx.conf +1 -0
  101. package/speechflow-cli/package.json +28 -27
  102. package/speechflow-cli/src/speechflow-main-api.ts +9 -11
  103. package/speechflow-cli/src/speechflow-main-graph.ts +15 -16
  104. package/speechflow-cli/src/speechflow-main-status.ts +6 -10
  105. package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +4 -0
  106. package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +4 -2
  107. package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +1 -1
  108. package/speechflow-cli/src/speechflow-node-a2a-expander.ts +4 -2
  109. package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +4 -2
  110. package/speechflow-cli/src/speechflow-node-a2a-filler.ts +57 -20
  111. package/speechflow-cli/src/speechflow-node-a2a-gain.ts +0 -5
  112. package/speechflow-cli/src/speechflow-node-a2a-gender.ts +3 -4
  113. package/speechflow-cli/src/speechflow-node-a2a-mute.ts +0 -5
  114. package/speechflow-cli/src/speechflow-node-a2a-pitch.ts +1 -2
  115. package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +0 -5
  116. package/speechflow-cli/src/speechflow-node-a2a-speex.ts +0 -5
  117. package/speechflow-cli/src/speechflow-node-a2a-wav.ts +9 -3
  118. package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +27 -27
  119. package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +37 -28
  120. package/speechflow-cli/src/speechflow-node-a2t-openai.ts +92 -56
  121. package/speechflow-cli/src/speechflow-node-t2a-amazon.ts +7 -11
  122. package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +47 -43
  123. package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +22 -7
  124. package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +1 -2
  125. package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +0 -1
  126. package/speechflow-cli/src/speechflow-node-t2t-format.ts +0 -1
  127. package/speechflow-cli/src/speechflow-node-t2t-google.ts +0 -1
  128. package/speechflow-cli/src/speechflow-node-t2t-modify.ts +0 -1
  129. package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +0 -1
  130. package/speechflow-cli/src/speechflow-node-t2t-openai.ts +0 -1
  131. package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +205 -33
  132. package/speechflow-cli/src/speechflow-node-x2x-filter.ts +16 -4
  133. package/speechflow-cli/src/speechflow-node-x2x-trace.ts +3 -8
  134. package/speechflow-cli/src/speechflow-node-xio-device.ts +6 -9
  135. package/speechflow-cli/src/speechflow-node-xio-file.ts +4 -4
  136. package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +10 -4
  137. package/speechflow-cli/src/speechflow-node-xio-websocket.ts +16 -5
  138. package/speechflow-cli/src/speechflow-util-audio-wt.ts +4 -4
  139. package/speechflow-cli/src/speechflow-util-audio.ts +7 -7
  140. package/speechflow-cli/src/speechflow-util-error.ts +0 -7
  141. package/speechflow-cli/src/speechflow-util-misc.ts +23 -0
  142. package/speechflow-cli/src/speechflow-util-queue.ts +40 -20
  143. package/speechflow-cli/src/speechflow-util-stream.ts +29 -24
  144. package/speechflow-cli/src/speechflow-util.ts +1 -0
  145. package/speechflow-ui-db/dst/index.css +1 -5
  146. package/speechflow-ui-db/dst/index.js +14 -58
  147. package/speechflow-ui-db/etc/stx.conf +5 -16
  148. package/speechflow-ui-db/package.json +16 -15
  149. package/speechflow-ui-st/dst/index.css +1 -5
  150. package/speechflow-ui-st/dst/index.js +31 -160
  151. package/speechflow-ui-st/etc/stx.conf +5 -16
  152. package/speechflow-ui-st/package.json +17 -16
@@ -76,6 +76,10 @@ class CompressorProcessor extends AudioWorkletProcessor {
76
76
  /* determine number of channels */
77
77
  const nCh = input.length
78
78
 
79
+ /* reset envelope array if channel count changed */
80
+ if (nCh !== this.env.length)
81
+ this.env = []
82
+
79
83
  /* initially just copy input to output (pass-through) */
80
84
  for (let c = 0; c < output.length; c++) {
81
85
  if (!output[c] || !input[c])
@@ -245,8 +245,10 @@ export default class SpeechFlowNodeA2ACompressor extends SpeechFlowNode {
245
245
  /* compress chunk */
246
246
  const payload = util.convertBufToI16(chunk.payload)
247
247
  self.compressor?.process(payload).then((result) => {
248
- if (self.closing)
249
- throw new Error("stream already destroyed")
248
+ if (self.closing) {
249
+ callback(new Error("stream already destroyed"))
250
+ return
251
+ }
250
252
  if ((self.params.type === "standalone" && self.params.mode === "compress") ||
251
253
  (self.params.type === "sidechain" && self.params.mode === "adjust") ) {
252
254
  /* take over compressed data */
@@ -113,7 +113,7 @@ class ExpanderProcessor extends AudioWorkletProcessor {
113
113
  const expectedOutLevelDB = levelDB + gainDB + makeupDB
114
114
  if (expectedOutLevelDB < floorDB) {
115
115
  const neededLiftDB = floorDB - expectedOutLevelDB
116
- gainLin /= util.dB2lin(neededLiftDB)
116
+ gainLin *= util.dB2lin(neededLiftDB)
117
117
  }
118
118
 
119
119
  /* apply gain change to channel */
@@ -168,8 +168,10 @@ export default class SpeechFlowNodeA2AExpander extends SpeechFlowNode {
168
168
  /* expand chunk */
169
169
  const payload = util.convertBufToI16(chunk.payload)
170
170
  self.expander?.process(payload).then((result) => {
171
- if (self.closing)
172
- throw new Error("stream already destroyed")
171
+ if (self.closing) {
172
+ callback(new Error("stream already destroyed"))
173
+ return
174
+ }
173
175
 
174
176
  /* take over expanded data */
175
177
  const payload = util.convertI16ToBuf(result)
@@ -99,7 +99,7 @@ export default class SpeechFlowNodeA2AFFMPEG extends SpeechFlowNode {
99
99
  })
100
100
 
101
101
  /* wrap streams with conversions for chunk vs plain audio */
102
- const wrapper1 = util.createTransformStreamForWritableSide()
102
+ const wrapper1 = util.createTransformStreamForWritableSide("audio", 1)
103
103
  const wrapper2 = util.createTransformStreamForReadableSide("audio", () => this.timeZero)
104
104
  this.stream = Stream.compose(wrapper1, ffmpegStream, wrapper2)
105
105
  }
@@ -114,7 +114,9 @@ export default class SpeechFlowNodeA2AFFMPEG extends SpeechFlowNode {
114
114
 
115
115
  /* shutdown FFmpeg */
116
116
  if (this.ffmpeg !== null) {
117
- util.run(() => this.ffmpeg!.kill(), () => {})
117
+ util.run("stopping FFmpeg process",
118
+ () => this.ffmpeg!.kill(),
119
+ () => {})
118
120
  this.ffmpeg = null
119
121
  }
120
122
  }
@@ -15,6 +15,8 @@ import * as util from "./speechflow-util"
15
15
 
16
16
  class AudioFiller extends EventEmitter {
17
17
  private emittedEndSamples = 0 /* stream position in samples already emitted */
18
+ private maxInputEndSamples = 0
19
+ private lastMeta: Map<string, any> | undefined = undefined
18
20
  private readonly bytesPerSample = 2 /* PCM I16 */
19
21
  private readonly bytesPerFrame: number
20
22
  private readonly sampleTolerance = 0.5 /* tolerance for floating-point sample comparisons */
@@ -25,12 +27,12 @@ class AudioFiller extends EventEmitter {
25
27
  }
26
28
 
27
29
  /* optional helper to allow subscribing with strong typing */
28
- public on(event: "chunk", listener: (chunk: SpeechFlowChunk) => void): this
30
+ public on(event: "chunk", listener: (chunk: SpeechFlowChunk, type: string) => void): this
29
31
  public on(event: string, listener: (...args: any[]) => void): this {
30
32
  return super.on(event, listener)
31
33
  }
32
34
 
33
- /* convert fractional samples to duration */
35
+ /* convert fractional samples from duration */
34
36
  private samplesFromDuration(duration: Duration): number {
35
37
  const seconds = duration.as("seconds")
36
38
  const samples = seconds * this.sampleRate
@@ -51,8 +53,9 @@ class AudioFiller extends EventEmitter {
51
53
  const payload = Buffer.alloc(frames * this.bytesPerFrame) /* already zeroed */
52
54
  const timestampStart = this.durationFromSamples(fromSamples)
53
55
  const timestampEnd = this.durationFromSamples(toSamples)
54
- const chunk = new SpeechFlowChunk(timestampStart, timestampEnd, "final", "audio", payload, meta ? new Map(meta) : undefined)
55
- this.emit("chunk", chunk)
56
+ const chunk = new SpeechFlowChunk(timestampStart, timestampEnd,
57
+ "final", "audio", payload, meta ? new Map(meta) : undefined)
58
+ this.emit("chunk", chunk, "silence")
56
59
  }
57
60
 
58
61
  /* add a chunk of audio for processing */
@@ -62,6 +65,12 @@ class AudioFiller extends EventEmitter {
62
65
  if (endSamp < startSamp)
63
66
  throw new Error("invalid timestamps")
64
67
 
68
+ /* track maximum input end timestamp and last metadata for trailing silence */
69
+ if (endSamp > this.maxInputEndSamples) {
70
+ this.maxInputEndSamples = endSamp
71
+ this.lastMeta = chunk.meta ? new Map(chunk.meta) : undefined
72
+ }
73
+
65
74
  /* if chunk starts beyond what we've emitted, insert silence for the gap */
66
75
  if (startSamp > this.emittedEndSamples + this.sampleTolerance) {
67
76
  this.emitSilence(this.emittedEndSamples, startSamp, chunk.meta)
@@ -95,12 +104,20 @@ class AudioFiller extends EventEmitter {
95
104
  const outEndSamples = outStartSamples + Math.floor(payload.length / this.bytesPerFrame)
96
105
  const timestampStart = this.durationFromSamples(outStartSamples)
97
106
  const timestampEnd = this.durationFromSamples(outEndSamples)
98
- const c = new SpeechFlowChunk(timestampStart, timestampEnd, "final", "audio", payload, new Map(chunk.meta))
99
- this.emit("chunk", c)
107
+ const c = new SpeechFlowChunk(timestampStart, timestampEnd,
108
+ "final", "audio", payload, new Map(chunk.meta))
109
+ this.emit("chunk", c, "content")
100
110
 
101
111
  /* advance emitted cursor */
102
112
  this.emittedEndSamples = Math.max(this.emittedEndSamples, outEndSamples)
103
113
  }
114
+
115
+ /* signal end of processing and emit trailing silence */
116
+ public done (): void {
117
+ /* emit trailing silence if there's a gap between emitted and max input */
118
+ if (this.maxInputEndSamples > this.emittedEndSamples + this.sampleTolerance)
119
+ this.emitSilence(this.emittedEndSamples, this.maxInputEndSamples, this.lastMeta)
120
+ }
104
121
  }
105
122
 
106
123
  /* SpeechFlow node for filling audio gaps */
@@ -137,12 +154,13 @@ export default class SpeechFlowNodeA2AFiller extends SpeechFlowNode {
137
154
  this.sendQueue = new util.AsyncQueue<SpeechFlowChunk | null>()
138
155
 
139
156
  /* shift chunks from filler to send queue */
140
- this.filler.on("chunk", (chunk) => {
157
+ this.filler.on("chunk", (chunk, type) => {
141
158
  this.sendQueue?.write(chunk)
142
159
  })
143
160
 
144
161
  /* establish a duplex stream */
145
162
  const self = this
163
+ const reads = new util.PromiseSet<void>()
146
164
  this.stream = new Stream.Duplex({
147
165
  readableObjectMode: true,
148
166
  writableObjectMode: true,
@@ -154,8 +172,6 @@ export default class SpeechFlowNodeA2AFiller extends SpeechFlowNode {
154
172
  callback(new Error("invalid chunk payload type"))
155
173
  else {
156
174
  try {
157
- if (self.closing || self.filler === null)
158
- throw new Error("stream already destroyed")
159
175
  self.filler.add(chunk)
160
176
  callback()
161
177
  }
@@ -164,12 +180,37 @@ export default class SpeechFlowNodeA2AFiller extends SpeechFlowNode {
164
180
  }
165
181
  }
166
182
  },
183
+ async final (callback) {
184
+ /* short-circuit processing in case of own closing */
185
+ if (self.closing) {
186
+ callback()
187
+ return
188
+ }
189
+
190
+ /* signal end of stream */
191
+ if (self.filler !== null && self.sendQueue !== null) {
192
+ /* optionally emit trailing silence
193
+ (we have to wait for its internal "emit" operation to happen) */
194
+ self.filler.done()
195
+ await util.sleep(10)
196
+
197
+ /* signal end of stream */
198
+ self.sendQueue.write(null)
199
+ }
200
+
201
+ /* await all read operations */
202
+ await reads.awaitAll()
203
+
204
+ /* signal end of streaming */
205
+ this.push(null)
206
+ callback()
207
+ },
167
208
  read (size) {
168
209
  if (self.closing || self.sendQueue === null) {
169
210
  this.push(null)
170
211
  return
171
212
  }
172
- self.sendQueue.read().then((chunk) => {
213
+ reads.add(self.sendQueue.read().then((chunk) => {
173
214
  if (self.closing || self.sendQueue === null) {
174
215
  this.push(null)
175
216
  return
@@ -178,22 +219,18 @@ export default class SpeechFlowNodeA2AFiller extends SpeechFlowNode {
178
219
  self.log("info", "received EOF signal")
179
220
  this.push(null)
180
221
  }
222
+ else if (!(chunk.payload instanceof Buffer)) {
223
+ self.log("warning", "invalid chunk (expected audio buffer)")
224
+ this.push(null)
225
+ }
181
226
  else {
182
- self.log("debug", `received data (${chunk.payload.length} bytes)`)
227
+ self.log("debug", `received data (${chunk.payload.byteLength} bytes)`)
183
228
  this.push(chunk)
184
229
  }
185
230
  }).catch((error: unknown) => {
186
231
  if (!self.closing && self.sendQueue !== null)
187
232
  self.log("error", `queue read error: ${util.ensureError(error).message}`)
188
- })
189
- },
190
- final (callback) {
191
- if (self.closing) {
192
- callback()
193
- return
194
- }
195
- this.push(null)
196
- callback()
233
+ }))
197
234
  }
198
235
  })
199
236
  }
@@ -73,11 +73,6 @@ export default class SpeechFlowNodeA2AGain extends SpeechFlowNode {
73
73
  }
74
74
  },
75
75
  final (callback) {
76
- if (self.closing) {
77
- callback()
78
- return
79
- }
80
- this.push(null)
81
76
  callback()
82
77
  }
83
78
  })
@@ -107,7 +107,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
107
107
  })
108
108
  this.classifier = await Promise.race([
109
109
  pipelinePromise,
110
- util.timeoutPromise(30 * 1000, "model initialization timeout")
110
+ util.timeout(30 * 1000, "model initialization timeout")
111
111
  ]) as Transformers.AudioClassificationPipeline
112
112
  }
113
113
  catch (error) {
@@ -149,7 +149,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
149
149
  /* classify audio */
150
150
  const result = await Promise.race([
151
151
  this.classifier(data),
152
- util.timeoutPromise(30 * 1000, "classification timeout")
152
+ util.timeout(30 * 1000, "classification timeout")
153
153
  ]) as Transformers.AudioClassificationOutput | Transformers.AudioClassificationOutput[]
154
154
  const classified = Array.isArray(result) ?
155
155
  result as Transformers.AudioClassificationOutput :
@@ -363,8 +363,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
363
363
  if (this.classifier !== null) {
364
364
  try {
365
365
  const disposePromise = this.classifier.dispose()
366
- const timeoutPromise = new Promise((resolve) => setTimeout(resolve, 5000))
367
- await Promise.race([ disposePromise, timeoutPromise ])
366
+ await Promise.race([ disposePromise, util.sleep(5000) ])
368
367
  }
369
368
  catch (error) {
370
369
  this.log("warning", `error during classifier cleanup: ${error}`)
@@ -107,11 +107,6 @@ export default class SpeechFlowNodeA2AMute extends SpeechFlowNode {
107
107
  }
108
108
  },
109
109
  final (callback) {
110
- if (self.closing) {
111
- callback()
112
- return
113
- }
114
- this.push(null)
115
110
  callback()
116
111
  }
117
112
  })
@@ -185,8 +185,7 @@ export default class SpeechFlowNodeA2APitch extends SpeechFlowNode {
185
185
  this.push(chunk)
186
186
  callback()
187
187
  }).catch((error: unknown) => {
188
- if (!self.closing)
189
- callback(util.ensureError(error, "pitch shifting failed"))
188
+ callback(util.ensureError(error, "pitch shifting failed"))
190
189
  })
191
190
  }
192
191
  },
@@ -136,11 +136,6 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
136
136
  }
137
137
  },
138
138
  final (callback) {
139
- if (self.closing) {
140
- callback()
141
- return
142
- }
143
- this.push(null)
144
139
  callback()
145
140
  }
146
141
  })
@@ -108,11 +108,6 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
108
108
  }
109
109
  },
110
110
  final (callback) {
111
- if (self.closing) {
112
- callback()
113
- return
114
- }
115
- this.push(null)
116
111
  callback()
117
112
  }
118
113
  })
@@ -141,10 +141,17 @@ export default class SpeechFlowNodeA2AWAV extends SpeechFlowNode {
141
141
  callback(new Error("WAV header too short, expected at least 44 bytes"))
142
142
  return
143
143
  }
144
- const header = readWavHeader(chunk.payload)
144
+ let header: ReturnType<typeof readWavHeader>
145
+ try {
146
+ header = readWavHeader(chunk.payload)
147
+ }
148
+ catch (error) {
149
+ callback(util.ensureError(error, "WAV header parsing failed"))
150
+ return
151
+ }
145
152
  self.log("info", "WAV audio stream: " +
146
153
  `audioFormat=${header.audioFormat === 0x0001 ? "PCM" :
147
- "0x" + (header.audioFormat as number).toString(16).padStart(4, "0")} ` +
154
+ "0x" + header.audioFormat.toString(16).padStart(4, "0")} ` +
148
155
  `channels=${header.channels} ` +
149
156
  `sampleRate=${header.sampleRate} ` +
150
157
  `bitDepth=${header.bitDepth}`)
@@ -181,7 +188,6 @@ export default class SpeechFlowNodeA2AWAV extends SpeechFlowNode {
181
188
  }
182
189
  },
183
190
  final (callback) {
184
- this.push(null)
185
191
  callback()
186
192
  }
187
193
  })
@@ -68,11 +68,10 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
68
68
  public static name = "a2t-amazon"
69
69
 
70
70
  /* internal state */
71
- private client: TranscribeStreamingClient | null = null
72
- private clientStream: AsyncIterable<TranscriptResultStream> | null = null
73
- private closing = false
74
- private initTimeout: ReturnType<typeof setTimeout> | null = null
75
- private connectionTimeout: ReturnType<typeof setTimeout> | null = null
71
+ private client: TranscribeStreamingClient | null = null
72
+ private clientStream: AsyncIterable<TranscriptResultStream> | null = null
73
+ private closing = false
74
+ private connectionTimeout: ReturnType<typeof setTimeout> | null = null
76
75
  private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
77
76
 
78
77
  /* construct node */
@@ -194,16 +193,17 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
194
193
  this.queue?.write(chunk)
195
194
  }
196
195
  }
197
- })().catch((err: Error) => {
198
- this.log("warning", `failed to establish connectivity to Amazon Transcribe: ${err}`)
196
+ })().catch((err: unknown) => {
197
+ this.log("warning", `failed to establish connectivity to Amazon Transcribe: ${util.ensureError(err).message}`)
199
198
  })
200
199
  }
201
200
 
202
201
  /* remember opening time to receive time zero offset */
203
202
  this.timeOpen = DateTime.now()
204
203
 
205
- /* provide Duplex stream and internally attach to Deepgram API */
204
+ /* provide Duplex stream and internally attach to Amazon Transcribe API */
206
205
  const self = this
206
+ const reads = new util.PromiseSet<void>()
207
207
  this.stream = new Stream.Duplex({
208
208
  writableObjectMode: true,
209
209
  readableObjectMode: true,
@@ -231,12 +231,29 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
231
231
  callback()
232
232
  }
233
233
  },
234
+ async final (callback) {
235
+ if (self.closing || self.client === null) {
236
+ callback()
237
+ return
238
+ }
239
+
240
+ /* await all read operations */
241
+ await reads.awaitAll()
242
+
243
+ util.run(
244
+ () => self.client!.destroy(),
245
+ (error: Error) => self.log("warning", `error closing Amazon Transcribe connection: ${error}`)
246
+ )
247
+ audioQueue.push(null) /* do not push null to stream, let Amazon Transcribe do it */
248
+ audioQueue.destroy()
249
+ callback()
250
+ },
234
251
  read (size) {
235
252
  if (self.closing || self.queue === null) {
236
253
  this.push(null)
237
254
  return
238
255
  }
239
- self.queue.read().then((chunk) => {
256
+ reads.add(self.queue.read().then((chunk) => {
240
257
  if (self.closing || self.queue === null) {
241
258
  this.push(null)
242
259
  return
@@ -252,20 +269,7 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
252
269
  }).catch((error: unknown) => {
253
270
  if (!self.closing && self.queue !== null)
254
271
  self.log("error", `queue read error: ${util.ensureError(error).message}`)
255
- })
256
- },
257
- final (callback) {
258
- if (self.closing || self.client === null) {
259
- callback()
260
- return
261
- }
262
- util.run(
263
- () => self.client!.destroy(),
264
- (error: Error) => self.log("warning", `error closing Amazon Transcribe connection: ${error}`)
265
- )
266
- audioQueue.push(null) /* do not push null to stream, let Amazon Transcribe do it */
267
- audioQueue.destroy()
268
- callback()
272
+ }))
269
273
  }
270
274
  })
271
275
  }
@@ -276,10 +280,6 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
276
280
  this.closing = true
277
281
 
278
282
  /* cleanup all timers */
279
- if (this.initTimeout !== null) {
280
- clearTimeout(this.initTimeout)
281
- this.initTimeout = null
282
- }
283
283
  if (this.connectionTimeout !== null) {
284
284
  clearTimeout(this.connectionTimeout)
285
285
  this.connectionTimeout = null
@@ -21,10 +21,9 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
21
21
  public static name = "a2t-deepgram"
22
22
 
23
23
  /* internal state */
24
- private dg: Deepgram.LiveClient | null = null
25
- private closing = false
26
- private initTimeout: ReturnType<typeof setTimeout> | null = null
27
- private connectionTimeout: ReturnType<typeof setTimeout> | null = null
24
+ private dg: Deepgram.LiveClient | null = null
25
+ private closing = false
26
+ private connectionTimeout: ReturnType<typeof setTimeout> | null = null
28
27
  private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
29
28
 
30
29
  /* construct node */
@@ -41,6 +40,10 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
41
40
  interim: { type: "boolean", val: false, pos: 3 }
42
41
  })
43
42
 
43
+ /* sanity check parameters */
44
+ if (!this.params.key)
45
+ throw new Error("Deepgram API key not configured")
46
+
44
47
  /* declare node input/output format */
45
48
  this.input = "audio"
46
49
  this.output = "text"
@@ -126,7 +129,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
126
129
  this.log("info", `text received (start: ${data.start}s, ` +
127
130
  `duration: ${data.duration.toFixed(2)}s, ` +
128
131
  `kind: ${isFinal ? "final" : "intermediate"}): ` +
129
- `${text}"`)
132
+ `"${text}"`)
130
133
  const start = Duration.fromMillis(data.start * 1000).plus(this.timeZeroOffset)
131
134
  const end = start.plus({ seconds: data.duration })
132
135
  const metas = metastore.fetch(start, end)
@@ -163,14 +166,16 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
163
166
  this.log("error", `error: ${error.message}`)
164
167
  if (!this.closing && this.queue !== null)
165
168
  this.queue.write(null)
166
- this.emit("error")
169
+ this.emit("error", error)
167
170
  })
168
171
 
169
172
  /* wait for Deepgram API to be available */
170
173
  await new Promise((resolve, reject) => {
171
174
  this.connectionTimeout = setTimeout(() => {
172
- this.connectionTimeout = null
173
- reject(new Error("Deepgram: timeout waiting for connection open"))
175
+ if (this.connectionTimeout !== null) {
176
+ this.connectionTimeout = null
177
+ reject(new Error("Deepgram: timeout waiting for connection open"))
178
+ }
174
179
  }, 8000)
175
180
  this.dg!.once(Deepgram.LiveTranscriptionEvents.Open, () => {
176
181
  this.log("info", "connection open")
@@ -187,6 +192,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
187
192
 
188
193
  /* provide Duplex stream and internally attach to Deepgram API */
189
194
  const self = this
195
+ const reads = new util.PromiseSet<void>()
190
196
  this.stream = new Stream.Duplex({
191
197
  writableObjectMode: true,
192
198
  readableObjectMode: true,
@@ -217,12 +223,33 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
217
223
  callback()
218
224
  }
219
225
  },
226
+ async final (callback) {
227
+ /* short-circuiting in case of own closing */
228
+ if (self.closing || self.dg === null) {
229
+ callback()
230
+ return
231
+ }
232
+
233
+ /* close Deepgram API */
234
+ try {
235
+ self.dg.requestClose()
236
+ }
237
+ catch (error) {
238
+ self.log("warning", `error closing Deepgram connection: ${error}`)
239
+ }
240
+
241
+ /* await all read operations */
242
+ await reads.awaitAll()
243
+
244
+ /* NOTICE: do not push null here -- let the Deepgram close event handle it */
245
+ callback()
246
+ },
220
247
  read (size) {
221
248
  if (self.closing || self.queue === null) {
222
249
  this.push(null)
223
250
  return
224
251
  }
225
- self.queue.read().then((chunk) => {
252
+ reads.add(self.queue.read().then((chunk) => {
226
253
  if (self.closing || self.queue === null) {
227
254
  this.push(null)
228
255
  return
@@ -238,21 +265,7 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
238
265
  }).catch((error: unknown) => {
239
266
  if (!self.closing && self.queue !== null)
240
267
  self.log("error", `queue read error: ${util.ensureError(error).message}`)
241
- })
242
- },
243
- final (callback) {
244
- if (self.closing || self.dg === null) {
245
- callback()
246
- return
247
- }
248
- try {
249
- self.dg.requestClose()
250
- }
251
- catch (error) {
252
- self.log("warning", `error closing Deepgram connection: ${error}`)
253
- }
254
- /* NOTICE: do not push null here -- let the Deepgram close event handle it */
255
- callback()
268
+ }))
256
269
  }
257
270
  })
258
271
  }
@@ -263,10 +276,6 @@ export default class SpeechFlowNodeA2TDeepgram extends SpeechFlowNode {
263
276
  this.closing = true
264
277
 
265
278
  /* cleanup all timers */
266
- if (this.initTimeout !== null) {
267
- clearTimeout(this.initTimeout)
268
- this.initTimeout = null
269
- }
270
279
  if (this.connectionTimeout !== null) {
271
280
  clearTimeout(this.connectionTimeout)
272
281
  this.connectionTimeout = null