speechflow 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/README.md +4 -4
  3. package/package.json +2 -2
  4. package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
  5. package/speechflow-cli/dst/speechflow-main-cli.js +1 -0
  6. package/speechflow-cli/dst/speechflow-main-cli.js.map +1 -1
  7. package/speechflow-cli/dst/speechflow-main-graph.js +2 -4
  8. package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
  9. package/speechflow-cli/dst/speechflow-main-nodes.js +1 -0
  10. package/speechflow-cli/dst/speechflow-main-nodes.js.map +1 -1
  11. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +1 -0
  12. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -1
  13. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +7 -9
  14. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
  15. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +1 -0
  16. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -1
  17. package/speechflow-cli/dst/speechflow-node-a2a-expander.js +8 -9
  18. package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -1
  19. package/speechflow-cli/dst/speechflow-node-a2a-filler.js +2 -0
  20. package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
  21. package/speechflow-cli/dst/speechflow-node-a2a-gender.js +1 -1
  22. package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
  23. package/speechflow-cli/dst/speechflow-node-a2a-meter.js +1 -1
  24. package/speechflow-cli/dst/speechflow-node-a2a-pitch.js +11 -9
  25. package/speechflow-cli/dst/speechflow-node-a2a-pitch.js.map +1 -1
  26. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js +1 -0
  27. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js.map +1 -1
  28. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
  29. package/speechflow-cli/dst/speechflow-node-a2a-speex.js +4 -2
  30. package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -1
  31. package/speechflow-cli/dst/speechflow-node-a2a-vad.js +19 -22
  32. package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
  33. package/speechflow-cli/dst/speechflow-node-a2a-wav.js +7 -0
  34. package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
  35. package/speechflow-cli/dst/speechflow-node-a2t-amazon.d.ts +0 -1
  36. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +2 -11
  37. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
  38. package/speechflow-cli/dst/speechflow-node-a2t-google.d.ts +0 -1
  39. package/speechflow-cli/dst/speechflow-node-a2t-google.js +0 -6
  40. package/speechflow-cli/dst/speechflow-node-a2t-google.js.map +1 -1
  41. package/speechflow-cli/dst/speechflow-node-a2t-openai.js +6 -1
  42. package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
  43. package/speechflow-cli/dst/speechflow-node-t2a-amazon.d.ts +1 -1
  44. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +27 -7
  45. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -1
  46. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.d.ts +1 -1
  47. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +5 -3
  48. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
  49. package/speechflow-cli/dst/speechflow-node-t2a-google.js +1 -4
  50. package/speechflow-cli/dst/speechflow-node-t2a-google.js.map +1 -1
  51. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.d.ts +1 -1
  52. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +27 -6
  53. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
  54. package/speechflow-cli/dst/speechflow-node-t2a-openai.js +1 -4
  55. package/speechflow-cli/dst/speechflow-node-t2a-openai.js.map +1 -1
  56. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js +15 -4
  57. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js.map +1 -1
  58. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js +0 -2
  59. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
  60. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
  61. package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
  62. package/speechflow-cli/dst/speechflow-node-t2t-opus.js +18 -16
  63. package/speechflow-cli/dst/speechflow-node-t2t-opus.js.map +1 -1
  64. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js +1 -1
  65. package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js +1 -1
  66. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +5 -2
  67. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  68. package/speechflow-cli/dst/speechflow-node-t2t-summary.js +1 -1
  69. package/speechflow-cli/dst/speechflow-node-t2t-summary.js.map +1 -1
  70. package/speechflow-cli/dst/speechflow-node-x2x-filter.js +2 -0
  71. package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
  72. package/speechflow-cli/dst/speechflow-node-xio-exec.js +1 -0
  73. package/speechflow-cli/dst/speechflow-node-xio-exec.js.map +1 -1
  74. package/speechflow-cli/dst/speechflow-node-xio-file.js +3 -5
  75. package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
  76. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
  77. package/speechflow-cli/dst/speechflow-node-xio-vban.js.map +1 -1
  78. package/speechflow-cli/dst/speechflow-node-xio-webrtc.js +2 -0
  79. package/speechflow-cli/dst/speechflow-node-xio-webrtc.js.map +1 -1
  80. package/speechflow-cli/dst/speechflow-node-xio-websocket.js +9 -9
  81. package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
  82. package/speechflow-cli/dst/speechflow-util-audio.js +4 -0
  83. package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
  84. package/speechflow-cli/dst/speechflow-util-queue.js +2 -1
  85. package/speechflow-cli/dst/speechflow-util-queue.js.map +1 -1
  86. package/speechflow-cli/dst/speechflow-util.js +1 -0
  87. package/speechflow-cli/dst/speechflow-util.js.map +1 -1
  88. package/speechflow-cli/package.json +10 -10
  89. package/speechflow-cli/src/speechflow-main-api.ts +16 -16
  90. package/speechflow-cli/src/speechflow-main-cli.ts +1 -0
  91. package/speechflow-cli/src/speechflow-main-graph.ts +7 -9
  92. package/speechflow-cli/src/speechflow-main-nodes.ts +1 -0
  93. package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +1 -0
  94. package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +8 -10
  95. package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +1 -0
  96. package/speechflow-cli/src/speechflow-node-a2a-expander.ts +9 -10
  97. package/speechflow-cli/src/speechflow-node-a2a-filler.ts +2 -0
  98. package/speechflow-cli/src/speechflow-node-a2a-gender.ts +3 -3
  99. package/speechflow-cli/src/speechflow-node-a2a-meter.ts +2 -2
  100. package/speechflow-cli/src/speechflow-node-a2a-pitch.ts +11 -9
  101. package/speechflow-cli/src/speechflow-node-a2a-rnnoise-wt.ts +1 -0
  102. package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +1 -1
  103. package/speechflow-cli/src/speechflow-node-a2a-speex.ts +5 -3
  104. package/speechflow-cli/src/speechflow-node-a2a-vad.ts +20 -23
  105. package/speechflow-cli/src/speechflow-node-a2a-wav.ts +7 -0
  106. package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +6 -18
  107. package/speechflow-cli/src/speechflow-node-a2t-google.ts +4 -11
  108. package/speechflow-cli/src/speechflow-node-a2t-openai.ts +12 -7
  109. package/speechflow-cli/src/speechflow-node-t2a-amazon.ts +32 -10
  110. package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +6 -4
  111. package/speechflow-cli/src/speechflow-node-t2a-google.ts +1 -4
  112. package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +33 -10
  113. package/speechflow-cli/src/speechflow-node-t2a-openai.ts +1 -4
  114. package/speechflow-cli/src/speechflow-node-t2a-supertonic.ts +15 -6
  115. package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +1 -3
  116. package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +2 -2
  117. package/speechflow-cli/src/speechflow-node-t2t-google.ts +1 -1
  118. package/speechflow-cli/src/speechflow-node-t2t-opus.ts +19 -18
  119. package/speechflow-cli/src/speechflow-node-t2t-punctuation.ts +1 -1
  120. package/speechflow-cli/src/speechflow-node-t2t-spellcheck.ts +1 -1
  121. package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +5 -2
  122. package/speechflow-cli/src/speechflow-node-t2t-summary.ts +1 -1
  123. package/speechflow-cli/src/speechflow-node-x2x-filter.ts +2 -0
  124. package/speechflow-cli/src/speechflow-node-xio-exec.ts +1 -0
  125. package/speechflow-cli/src/speechflow-node-xio-file.ts +3 -5
  126. package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +2 -2
  127. package/speechflow-cli/src/speechflow-node-xio-vban.ts +5 -5
  128. package/speechflow-cli/src/speechflow-node-xio-webrtc.ts +2 -0
  129. package/speechflow-cli/src/speechflow-node-xio-websocket.ts +9 -9
  130. package/speechflow-cli/src/speechflow-util-audio.ts +5 -0
  131. package/speechflow-cli/src/speechflow-util-queue.ts +3 -3
  132. package/speechflow-cli/src/speechflow-util.ts +1 -0
  133. package/speechflow-ui-db/package.json +4 -4
  134. package/speechflow-ui-st/package.json +4 -4
@@ -24,15 +24,15 @@ import * as util from "./speechflow-util"
24
24
  /* the SpeechFlow node graph management */
25
25
  export class NodeGraph {
26
26
  /* internal state */
27
- private graphNodes = new Set<SpeechFlowNode>()
28
- private activeNodes = new Set<SpeechFlowNode>()
29
- private finishEvents = new EventEmitter()
27
+ private graphNodes = new Set<SpeechFlowNode>()
28
+ private activeNodes = new Set<SpeechFlowNode>()
29
+ private finishEvents = new EventEmitter()
30
30
  private timeZero: DateTime | null = null
31
- private shuttingDown = false
31
+ private shuttingDown = false
32
32
 
33
33
  /* simple construction */
34
34
  constructor (
35
- private cli: CLIio,
35
+ private cli: CLIio,
36
36
  private debug = false
37
37
  ) {}
38
38
 
@@ -59,6 +59,7 @@ export class NodeGraph {
59
59
  err instanceof Error && err.name === "FlowLinkError"
60
60
  ? err.toString() : (err instanceof Error ? err.message : "internal error")
61
61
 
62
+ /* instantiate FlowLink parser */
62
63
  const flowlink = new FlowLink<SpeechFlowNode>({
63
64
  trace: (msg: string) => {
64
65
  this.cli.log("debug", msg)
@@ -97,10 +98,7 @@ export class NodeGraph {
97
98
  }
98
99
  catch (err) {
99
100
  /* fatal error */
100
- if (err instanceof Error)
101
- this.cli.log("error", `creation of node <${id}> failed: ${err.message}`)
102
- else
103
- this.cli.log("error", `creation of node <${id}> failed: ${err}`)
101
+ this.cli.log("error", `creation of node <${id}> failed: ${util.ensureError(err).message}`)
104
102
  process.exit(1)
105
103
  }
106
104
  const params = Object.keys(node.params).map((key) => {
@@ -16,6 +16,7 @@ import SpeechFlowNode from "./speechflow-node"
16
16
  export class NodeRegistry {
17
17
  public nodes: { [ id: string ]: typeof SpeechFlowNode } = {}
18
18
 
19
+ /* simple constructor */
19
20
  constructor (
20
21
  private cli: CLIio
21
22
  ) {}
@@ -4,6 +4,7 @@
4
4
  ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
5
  */
6
6
 
7
+ /* internal dependencies */
7
8
  import * as util from "./speechflow-util"
8
9
 
9
10
  /* downward compressor with soft knee */
@@ -36,7 +36,7 @@ class AudioCompressor extends util.WebAudio {
36
36
  private gainNode: GainNode | null = null
37
37
 
38
38
  /* construct object */
39
- constructor(
39
+ constructor (
40
40
  sampleRate: number,
41
41
  channels: number,
42
42
  type: "standalone" | "sidechain" = "standalone",
@@ -106,8 +106,7 @@ class AudioCompressor extends util.WebAudio {
106
106
  /* configure compressor worklet node */
107
107
  const currentTime = this.audioContext.currentTime
108
108
  if (needsCompressor) {
109
- const node = this.compressorNode!
110
- const params = node.parameters as Map<string, AudioParam>
109
+ const params = this.compressorNode!.parameters as Map<string, AudioParam>
111
110
  params.get("threshold")!.setValueAtTime(this.config.thresholdDb, currentTime)
112
111
  params.get("ratio")!.setValueAtTime(this.config.ratio, currentTime)
113
112
  params.get("attack")!.setValueAtTime(this.config.attackMs / 1000, currentTime)
@@ -241,10 +240,12 @@ export default class SpeechFlowNodeA2ACompressor extends SpeechFlowNode {
241
240
  }
242
241
  if (!Buffer.isBuffer(chunk.payload))
243
242
  callback(new Error("invalid chunk payload type"))
243
+ else if (self.compressor === null)
244
+ callback(new Error("compressor not initialized"))
244
245
  else {
245
246
  /* compress chunk */
246
247
  const payload = util.convertBufToI16(chunk.payload)
247
- self.compressor?.process(payload).then((result) => {
248
+ self.compressor.process(payload).then((result) => {
248
249
  if (self.closing) {
249
250
  callback(new Error("stream already destroyed"))
250
251
  return
@@ -258,17 +259,14 @@ export default class SpeechFlowNodeA2ACompressor extends SpeechFlowNode {
258
259
  this.push(chunk)
259
260
  callback()
260
261
  }).catch((error: unknown) => {
261
- if (!self.closing)
262
+ if (self.closing)
263
+ callback()
264
+ else
262
265
  callback(util.ensureError(error, "compression failed"))
263
266
  })
264
267
  }
265
268
  },
266
269
  final (callback) {
267
- if (self.closing) {
268
- callback()
269
- return
270
- }
271
- this.push(null)
272
270
  callback()
273
271
  }
274
272
  })
@@ -4,6 +4,7 @@
4
4
  ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
5
  */
6
6
 
7
+ /* internal dependencies */
7
8
  import * as util from "./speechflow-util"
8
9
 
9
10
  /* downward expander with soft knee */
@@ -33,7 +33,7 @@ class AudioExpander extends util.WebAudio {
33
33
  private expanderNode: AudioWorkletNode | null = null
34
34
 
35
35
  /* construct object */
36
- constructor(
36
+ constructor (
37
37
  sampleRate: number,
38
38
  channels: number,
39
39
  config: AudioExpanderConfig = {}
@@ -71,8 +71,7 @@ class AudioExpander extends util.WebAudio {
71
71
 
72
72
  /* configure expander node */
73
73
  const currentTime = this.audioContext.currentTime
74
- const node = this.expanderNode!
75
- const params = node.parameters as Map<string, AudioParam>
74
+ const params = this.expanderNode.parameters as Map<string, AudioParam>
76
75
  params.get("threshold")!.setValueAtTime(this.config.thresholdDb, currentTime)
77
76
  params.get("floor")!.setValueAtTime(this.config.floorDb, currentTime)
78
77
  params.get("ratio")!.setValueAtTime(this.config.ratio, currentTime)
@@ -86,6 +85,7 @@ class AudioExpander extends util.WebAudio {
86
85
  this.expanderNode.connect(this.captureNode!)
87
86
  }
88
87
 
88
+ /* destroy object */
89
89
  public async destroy (): Promise<void> {
90
90
  /* destroy expander node */
91
91
  if (this.expanderNode !== null) {
@@ -164,10 +164,12 @@ export default class SpeechFlowNodeA2AExpander extends SpeechFlowNode {
164
164
  }
165
165
  if (!Buffer.isBuffer(chunk.payload))
166
166
  callback(new Error("invalid chunk payload type"))
167
+ else if (self.expander === null)
168
+ callback(new Error("expander not initialized"))
167
169
  else {
168
170
  /* expand chunk */
169
171
  const payload = util.convertBufToI16(chunk.payload)
170
- self.expander?.process(payload).then((result) => {
172
+ self.expander.process(payload).then((result) => {
171
173
  if (self.closing) {
172
174
  callback(new Error("stream already destroyed"))
173
175
  return
@@ -179,17 +181,14 @@ export default class SpeechFlowNodeA2AExpander extends SpeechFlowNode {
179
181
  this.push(chunk)
180
182
  callback()
181
183
  }).catch((error: unknown) => {
182
- if (!self.closing)
184
+ if (self.closing)
185
+ callback()
186
+ else
183
187
  callback(util.ensureError(error, "expansion failed"))
184
188
  })
185
189
  }
186
190
  },
187
191
  final (callback) {
188
- if (self.closing) {
189
- callback()
190
- return
191
- }
192
- this.push(null)
193
192
  callback()
194
193
  }
195
194
  })
@@ -13,6 +13,7 @@ import { Duration } from "luxon"
13
13
  import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
14
14
  import * as util from "./speechflow-util"
15
15
 
16
+ /* audio gap filler class */
16
17
  class AudioFiller extends EventEmitter {
17
18
  private emittedEndSamples = 0 /* stream position in samples already emitted */
18
19
  private maxInputEndSamples = 0
@@ -21,6 +22,7 @@ class AudioFiller extends EventEmitter {
21
22
  private readonly bytesPerFrame: number
22
23
  private readonly sampleTolerance = 0.5 /* tolerance for floating-point sample comparisons */
23
24
 
25
+ /* construct object */
24
26
  constructor (private sampleRate = 48000, private channels = 1) {
25
27
  super()
26
28
  this.bytesPerFrame = this.channels * this.bytesPerSample
@@ -134,7 +134,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
134
134
  throw new Error("classifier destroyed during operation")
135
135
 
136
136
  /* check volume level and return "unknown" if too low
137
- in order to avoid a wrong classificaton */
137
+ in order to avoid a wrong classification */
138
138
  const audioData = {
139
139
  sampleRate: sampleRateTarget,
140
140
  numberOfChannels: 1,
@@ -154,8 +154,8 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
154
154
  const classified = Array.isArray(result) ?
155
155
  result as Transformers.AudioClassificationOutput :
156
156
  [ result ]
157
- const c1 = classified.find((c) => c.label === "male")
158
- const c2 = classified.find((c) => c.label === "female")
157
+ const c1 = classified.find((c) => c.label === "male")
158
+ const c2 = classified.find((c) => c.label === "female")
159
159
  const male = c1 ? c1.score : 0.0
160
160
  const female = c2 ? c2.score : 0.0
161
161
  const threshold = this.params.threshold
@@ -83,7 +83,7 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
83
83
  const chunkData = this.chunkBuffer
84
84
  this.chunkBuffer = new Float32Array(0)
85
85
 
86
- /* update internal audio sample sliding window for LUFS-S */
86
+ /* update internal audio sample sliding window for LUFS-M */
87
87
  if (chunkData.length > sampleWindow.length)
88
88
  sampleWindow.set(chunkData.subarray(chunkData.length - sampleWindow.length), 0)
89
89
  else {
@@ -218,4 +218,4 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
218
218
  this.stream = null
219
219
  }
220
220
  }
221
- }
221
+ }
@@ -172,12 +172,16 @@ export default class SpeechFlowNodeA2APitch extends SpeechFlowNode {
172
172
  }
173
173
  if (!Buffer.isBuffer(chunk.payload))
174
174
  callback(new Error("invalid chunk payload type"))
175
+ else if (self.pitchShifter === null)
176
+ callback(new Error("pitch shifter not initialized"))
175
177
  else {
176
178
  /* shift pitch of audio chunk */
177
179
  const payload = util.convertBufToI16(chunk.payload, self.config.audioLittleEndian)
178
- self.pitchShifter?.process(payload).then((result) => {
179
- if (self.closing)
180
- throw new Error("stream already destroyed")
180
+ self.pitchShifter.process(payload).then((result) => {
181
+ if (self.closing) {
182
+ callback(new Error("stream already destroyed"))
183
+ return
184
+ }
181
185
 
182
186
  /* take over pitch-shifted data */
183
187
  const payload = util.convertI16ToBuf(result, self.config.audioLittleEndian)
@@ -185,16 +189,14 @@ export default class SpeechFlowNodeA2APitch extends SpeechFlowNode {
185
189
  this.push(chunk)
186
190
  callback()
187
191
  }).catch((error: unknown) => {
188
- callback(util.ensureError(error, "pitch shifting failed"))
192
+ if (self.closing)
193
+ callback()
194
+ else
195
+ callback(util.ensureError(error, "pitch shifting failed"))
189
196
  })
190
197
  }
191
198
  },
192
199
  final (callback) {
193
- if (self.closing) {
194
- callback()
195
- return
196
- }
197
- this.push(null)
198
200
  callback()
199
201
  }
200
202
  })
@@ -48,6 +48,7 @@ parentPort!.on("message", (msg) => {
48
48
  for (let i = 0; i < data.length; i++)
49
49
  i16[i] = Math.round(f32a[i])
50
50
 
51
+ /* send processed frame back to parent */
51
52
  parentPort!.postMessage({ type: "process-done", id, data: i16 }, [ i16.buffer ])
52
53
  }
53
54
  else if (msg.type === "close") {
@@ -93,7 +93,7 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
93
93
  return segment
94
94
  const id = `${seq++}`
95
95
  return new Promise<Int16Array<ArrayBuffer>>((resolve) => {
96
- pending.set(id, (segment: Int16Array<ArrayBuffer>) => { resolve(segment) })
96
+ pending.set(id, (segment) => { resolve(segment) })
97
97
  this.worker!.postMessage({ type: "process", id, data: segment }, [ segment.buffer ])
98
98
  })
99
99
  }
@@ -32,7 +32,7 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
32
32
 
33
33
  /* declare node configuration parameters */
34
34
  this.configure({
35
- attenuate: { type: "number", val: -18, pos: 0, match: (n: number) => n >= -60 && n <= 0 },
35
+ attenuate: { type: "number", val: -18, pos: 0, match: (n: number) => n >= -60 && n <= 0 }
36
36
  })
37
37
 
38
38
  /* declare node input/output format */
@@ -53,7 +53,7 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
53
53
  const wasmBinary = await fs.promises.readFile(
54
54
  path.join(__dirname, "../node_modules/@sapphi-red/speex-preprocess-wasm/dist/speex.wasm"))
55
55
  const speexModule = await loadSpeexModule({
56
- wasmBinary: wasmBinary.buffer as ArrayBuffer
56
+ wasmBinary: wasmBinary.buffer
57
57
  })
58
58
  this.speexProcessor = new SpeexPreprocessor(
59
59
  speexModule, this.sampleSize, this.config.audioSampleRate)
@@ -85,7 +85,9 @@ export default class SpeechFlowNodeA2ASpeex extends SpeechFlowNode {
85
85
  util.processInt16ArrayInSegments(payload, self.sampleSize, (segment) => {
86
86
  if (self.closing)
87
87
  throw new Error("stream already destroyed")
88
- self.speexProcessor?.processInt16(segment)
88
+ if (self.speexProcessor === null)
89
+ throw new Error("speex processor not initialized")
90
+ self.speexProcessor.processInt16(segment)
89
91
  return Promise.resolve(segment)
90
92
  }).then((payload: Int16Array<ArrayBuffer>) => {
91
93
  if (self.closing)
@@ -85,6 +85,18 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
85
85
  }
86
86
  }
87
87
 
88
+ /* helper function for tail timer handling */
89
+ const startTailTimer = () => {
90
+ tail = true
91
+ clearTailTimer()
92
+ this.tailTimer = setTimeout(() => {
93
+ if (this.closing || this.tailTimer === null)
94
+ return
95
+ tail = false
96
+ this.tailTimer = null
97
+ }, this.params.postSpeechTail)
98
+ }
99
+
88
100
  /* establish Voice Activity Detection (VAD) facility */
89
101
  let tail = false
90
102
  try {
@@ -111,31 +123,15 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
111
123
  return
112
124
  const duration = util.audioArrayDuration(audio, vadSampleRateTarget)
113
125
  this.log("info", `VAD: speech end (duration: ${duration.toFixed(2)}s)`)
114
- if (this.params.mode === "unplugged") {
115
- tail = true
116
- clearTailTimer()
117
- this.tailTimer = setTimeout(() => {
118
- if (this.closing || this.tailTimer === null)
119
- return
120
- tail = false
121
- this.tailTimer = null
122
- }, this.params.postSpeechTail)
123
- }
126
+ if (this.params.mode === "unplugged")
127
+ startTailTimer()
124
128
  },
125
129
  onVADMisfire: () => {
126
130
  if (this.closing)
127
131
  return
128
132
  this.log("info", "VAD: speech end (segment too short)")
129
- if (this.params.mode === "unplugged") {
130
- tail = true
131
- clearTailTimer()
132
- this.tailTimer = setTimeout(() => {
133
- if (this.closing || this.tailTimer === null)
134
- return
135
- tail = false
136
- this.tailTimer = null
137
- }, this.params.postSpeechTail)
138
- }
133
+ if (this.params.mode === "unplugged")
134
+ startTailTimer()
139
135
  },
140
136
  onFrameProcessed: (audio) => {
141
137
  if (this.closing)
@@ -144,7 +140,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
144
140
  /* annotate the current audio segment */
145
141
  const element = this.queueVAD.peek()
146
142
  if (element === undefined || element.type !== "audio-frame")
147
- throw new Error("internal error which cannot happen: no more queued element")
143
+ throw new Error("internal error that cannot happen: no more queued element")
148
144
  if (element.segmentIdx >= element.segmentData.length)
149
145
  throw new Error("segment index out of bounds")
150
146
  const segment = element.segmentData[element.segmentIdx++]
@@ -227,6 +223,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
227
223
  }
228
224
  }
229
225
 
226
+ /* signal completion */
230
227
  callback()
231
228
  }
232
229
  catch (error) {
@@ -322,6 +319,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
322
319
  }
323
320
  }
324
321
 
322
+ /* peek at send queue element */
325
323
  const element = self.queueSend.peek()
326
324
  if (element !== undefined && element.type === "audio-eof")
327
325
  this.push(null)
@@ -371,8 +369,7 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
371
369
  if (this.vad !== null) {
372
370
  try {
373
371
  const flushPromise = this.vad.flush()
374
- const timeoutPromise = new Promise((resolve) =>
375
- setTimeout(resolve, 5000))
372
+ const timeoutPromise = new Promise((resolve) => { setTimeout(resolve, 5000) })
376
373
  await Promise.race([ flushPromise, timeoutPromise ])
377
374
  }
378
375
  catch (error) {
@@ -21,15 +21,18 @@ const writeWavHeader = (
21
21
  const sampleRate = options?.sampleRate ?? 44100 /* 44KHz */
22
22
  const bitDepth = options?.bitDepth ?? 16 /* 16-Bit */
23
23
 
24
+ /* determine header dimensions */
24
25
  const headerLength = 44
25
26
  const maxDataSize = Math.pow(2, 32) - 100 /* safe maximum for 32-bit WAV files */
26
27
  const dataLength = length ?? maxDataSize
27
28
  const fileSize = dataLength + headerLength
28
29
  const header = Buffer.alloc(headerLength)
29
30
 
31
+ /* calculate byte rate and block alignment */
30
32
  const byteRate = (sampleRate * channels * bitDepth) / 8
31
33
  const blockAlign = (channels * bitDepth) / 8
32
34
 
35
+ /* write header fields */
33
36
  let offset = 0
34
37
  header.write("RIFF", offset); offset += 4
35
38
  header.writeUInt32LE(fileSize - 8, offset); offset += 4
@@ -45,6 +48,7 @@ const writeWavHeader = (
45
48
  header.write("data", offset); offset += 4
46
49
  header.writeUInt32LE(dataLength, offset); offset += 4
47
50
 
51
+ /* return completed header */
48
52
  return header
49
53
  }
50
54
 
@@ -53,6 +57,7 @@ const readWavHeader = (buffer: Buffer) => {
53
57
  if (buffer.length < 44)
54
58
  throw new Error("WAV header too short, expected at least 44 bytes")
55
59
 
60
+ /* read header fields */
56
61
  let offset = 0
57
62
  const riffHead = buffer.subarray(offset, offset + 4).toString(); offset += 4
58
63
  const fileSize = buffer.readUInt32LE(offset); offset += 4
@@ -68,6 +73,7 @@ const readWavHeader = (buffer: Buffer) => {
68
73
  const data = buffer.subarray(offset, offset + 4).toString(); offset += 4
69
74
  const dataLength = buffer.readUInt32LE(offset); offset += 4
70
75
 
76
+ /* validate RIFF header */
71
77
  if (riffHead !== "RIFF")
72
78
  throw new Error(`Invalid WAV file: expected RIFF header, got "${riffHead}"`)
73
79
  if (waveHead !== "WAVE")
@@ -77,6 +83,7 @@ const readWavHeader = (buffer: Buffer) => {
77
83
  if (data !== "data")
78
84
  throw new Error(`Invalid WAV file: expected "data" header, got "${data}"`)
79
85
 
86
+ /* return parsed header data */
80
87
  return {
81
88
  riffHead, fileSize, waveHead, fmtHead, formatLength, audioFormat,
82
89
  channels, sampleRate, byteRate, blockAlign, bitDepth, data, dataLength
@@ -53,7 +53,7 @@ class AsyncQueue<T> {
53
53
  continue
54
54
  }
55
55
  else {
56
- const it = await new Promise<IteratorResult<T>>((resolve) => this.resolvers.push(resolve))
56
+ const it = await new Promise<IteratorResult<T>>((resolve) => { this.resolvers.push(resolve) })
57
57
  if (it.done)
58
58
  return
59
59
  yield it.value
@@ -68,11 +68,10 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
68
68
  public static name = "a2t-amazon"
69
69
 
70
70
  /* internal state */
71
- private client: TranscribeStreamingClient | null = null
72
- private clientStream: AsyncIterable<TranscriptResultStream> | null = null
73
- private closing = false
74
- private connectionTimeout: ReturnType<typeof setTimeout> | null = null
75
- private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
71
+ private client: TranscribeStreamingClient | null = null
72
+ private clientStream: AsyncIterable<TranscriptResultStream> | null = null
73
+ private closing = false
74
+ private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
76
75
 
77
76
  /* construct node */
78
77
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -126,8 +125,6 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
126
125
  secretAccessKey: this.params.secKey
127
126
  }
128
127
  })
129
- if (this.client === null)
130
- throw new Error("failed to establish Amazon Transcribe client")
131
128
 
132
129
  /* create an AudioStream for Amazon Transcribe */
133
130
  const audioQueue = new AsyncQueue<Uint8Array>()
@@ -236,11 +233,8 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
236
233
  callback()
237
234
  return
238
235
  }
239
-
240
- /* await all read operations */
241
236
  await reads.awaitAll()
242
-
243
- util.run(
237
+ util.run("closing Amazon Transcribe connection",
244
238
  () => self.client!.destroy(),
245
239
  (error: Error) => self.log("warning", `error closing Amazon Transcribe connection: ${error}`)
246
240
  )
@@ -279,12 +273,6 @@ export default class SpeechFlowNodeA2TAmazon extends SpeechFlowNode {
279
273
  /* indicate closing first to stop all async operations */
280
274
  this.closing = true
281
275
 
282
- /* cleanup all timers */
283
- if (this.connectionTimeout !== null) {
284
- clearTimeout(this.connectionTimeout)
285
- this.connectionTimeout = null
286
- }
287
-
288
276
  /* close queue */
289
277
  if (this.queue !== null) {
290
278
  this.queue.write(null)
@@ -22,11 +22,10 @@ export default class SpeechFlowNodeA2TGoogle extends SpeechFlowNode {
22
22
  public static name = "a2t-google"
23
23
 
24
24
  /* internal state */
25
- private client: GoogleSpeech.SpeechClient | null = null
26
- private recognizeStream: ReturnType<GoogleSpeech.SpeechClient["streamingRecognize"]> | null = null
27
- private connectionTimeout: ReturnType<typeof setTimeout> | null = null
28
- private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
29
- private closing = false
25
+ private client: GoogleSpeech.SpeechClient | null = null
26
+ private recognizeStream: ReturnType<GoogleSpeech.SpeechClient["streamingRecognize"]> | null = null
27
+ private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
28
+ private closing = false
30
29
 
31
30
  /* construct node */
32
31
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -280,12 +279,6 @@ export default class SpeechFlowNodeA2TGoogle extends SpeechFlowNode {
280
279
  /* indicate closing first to stop all async operations */
281
280
  this.closing = true
282
281
 
283
- /* cleanup all timers */
284
- if (this.connectionTimeout !== null) {
285
- clearTimeout(this.connectionTimeout)
286
- this.connectionTimeout = null
287
- }
288
-
289
282
  /* shutdown stream */
290
283
  if (this.stream !== null) {
291
284
  await util.destroyStream(this.stream)
@@ -23,12 +23,12 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
23
23
  public static name = "a2t-openai"
24
24
 
25
25
  /* internal state */
26
- private openai: OpenAI | null = null
27
- private ws: ws.WebSocket | null = null
28
- private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
29
- private resampler: SpeexResampler | null = null
30
- private closing = false
31
- private connectionTimeout: ReturnType<typeof setTimeout> | null = null
26
+ private openai: OpenAI | null = null
27
+ private ws: ws.WebSocket | null = null
28
+ private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
29
+ private resampler: SpeexResampler | null = null
30
+ private closing = false
31
+ private connectionTimeout: ReturnType<typeof setTimeout> | null = null
32
32
 
33
33
  /* construct node */
34
34
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -150,6 +150,9 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
150
150
  })
151
151
  this.ws.on("error", (err) => {
152
152
  this.log("error", `WebSocket connection error: ${err}`)
153
+ if (!this.closing && this.queue !== null)
154
+ this.queue.write(null)
155
+ this.emit("error", err)
153
156
  })
154
157
 
155
158
  /* track speech timing by item_id (OpenAI provides timestamps via VAD events) */
@@ -164,6 +167,7 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
164
167
  }, new Map<string, any>())
165
168
  }
166
169
 
170
+ /* track transcription text */
167
171
  let text = ""
168
172
  this.ws.on("message", (data) => {
169
173
  let ev: any
@@ -353,7 +357,8 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
353
357
  this.ws.close()
354
358
  this.ws = null
355
359
  }
356
- this.openai = null
360
+ if (this.openai !== null)
361
+ this.openai = null
357
362
 
358
363
  /* close resampler */
359
364
  this.resampler = null