speechflow 1.6.4 → 1.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/CHANGELOG.md +10 -0
  2. package/README.md +5 -3
  3. package/etc/speechflow.yaml +15 -13
  4. package/package.json +5 -5
  5. package/speechflow-cli/dst/speechflow-main-api.js +3 -7
  6. package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
  7. package/speechflow-cli/dst/speechflow-main-graph.js +1 -1
  8. package/speechflow-cli/dst/speechflow-main.js +6 -0
  9. package/speechflow-cli/dst/speechflow-main.js.map +1 -1
  10. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +1 -21
  11. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -1
  12. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +1 -21
  13. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -1
  14. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +2 -2
  15. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
  16. package/speechflow-cli/dst/speechflow-node-a2a-gender.js +35 -29
  17. package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
  18. package/speechflow-cli/dst/speechflow-node-a2a-meter.js +50 -34
  19. package/speechflow-cli/dst/speechflow-node-a2a-meter.js.map +1 -1
  20. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +1 -0
  21. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
  22. package/speechflow-cli/dst/speechflow-node-a2a-vad.js +2 -2
  23. package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
  24. package/speechflow-cli/dst/speechflow-node-a2t-openai.d.ts +0 -1
  25. package/speechflow-cli/dst/speechflow-node-a2t-openai.js +0 -6
  26. package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
  27. package/speechflow-cli/dst/speechflow-node-t2a-amazon.d.ts +0 -1
  28. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +0 -6
  29. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -1
  30. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.d.ts +0 -1
  31. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +0 -6
  32. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
  33. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.d.ts +0 -1
  34. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +0 -6
  35. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
  36. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +1 -1
  37. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +1 -1
  38. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +6 -6
  39. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  40. package/speechflow-cli/dst/speechflow-node-xio-device.js +3 -2
  41. package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
  42. package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
  43. package/speechflow-cli/dst/speechflow-util-audio.d.ts +1 -0
  44. package/speechflow-cli/dst/speechflow-util-audio.js +21 -0
  45. package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
  46. package/speechflow-cli/dst/speechflow-util-error.d.ts +1 -1
  47. package/speechflow-cli/dst/speechflow-util-error.js +7 -1
  48. package/speechflow-cli/dst/speechflow-util-error.js.map +1 -1
  49. package/speechflow-cli/dst/speechflow-util-stream.d.ts +1 -1
  50. package/speechflow-cli/dst/speechflow-util-stream.js +2 -2
  51. package/speechflow-cli/dst/speechflow-util-stream.js.map +1 -1
  52. package/speechflow-cli/etc/oxlint.jsonc +2 -1
  53. package/speechflow-cli/package.json +17 -17
  54. package/speechflow-cli/src/speechflow-main-api.ts +6 -13
  55. package/speechflow-cli/src/speechflow-main-graph.ts +1 -1
  56. package/speechflow-cli/src/speechflow-main.ts +4 -0
  57. package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +1 -29
  58. package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +1 -29
  59. package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +2 -2
  60. package/speechflow-cli/src/speechflow-node-a2a-gender.ts +44 -39
  61. package/speechflow-cli/src/speechflow-node-a2a-meter.ts +58 -38
  62. package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +1 -0
  63. package/speechflow-cli/src/speechflow-node-a2a-vad.ts +2 -2
  64. package/speechflow-cli/src/speechflow-node-a2t-openai.ts +0 -6
  65. package/speechflow-cli/src/speechflow-node-t2a-amazon.ts +0 -6
  66. package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +0 -6
  67. package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +0 -6
  68. package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +1 -1
  69. package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +10 -14
  70. package/speechflow-cli/src/speechflow-node-xio-device.ts +3 -2
  71. package/speechflow-cli/src/speechflow-node-xio-websocket.ts +1 -1
  72. package/speechflow-cli/src/speechflow-util-audio.ts +30 -0
  73. package/speechflow-cli/src/speechflow-util-error.ts +9 -3
  74. package/speechflow-cli/src/speechflow-util-stream.ts +2 -2
  75. package/speechflow-ui-db/dst/index.js +20 -20
  76. package/speechflow-ui-db/package.json +8 -8
  77. package/speechflow-ui-db/src/app.vue +14 -5
  78. package/speechflow-ui-st/dst/index.js +455 -20
  79. package/speechflow-ui-st/package.json +9 -9
  80. package/speechflow-ui-st/src/app.vue +8 -3
  81. package/speechflow-cli/dst/speechflow-util-webaudio-wt.d.ts +0 -1
  82. package/speechflow-cli/dst/speechflow-util-webaudio-wt.js +0 -124
  83. package/speechflow-cli/dst/speechflow-util-webaudio-wt.js.map +0 -1
  84. package/speechflow-cli/dst/speechflow-util-webaudio.d.ts +0 -13
  85. package/speechflow-cli/dst/speechflow-util-webaudio.js +0 -137
  86. package/speechflow-cli/dst/speechflow-util-webaudio.js.map +0 -1
@@ -61,34 +61,6 @@ class ExpanderProcessor extends AudioWorkletProcessor {
61
61
  return targetOut - levelDB
62
62
  }
63
63
 
64
- /* update envelope (smoothed amplitude contour) for single channel */
65
- private updateEnvelopeForChannel (
66
- chan: number,
67
- samples: Float32Array,
68
- attack: number,
69
- release: number
70
- ): void {
71
- /* fetch old envelope value */
72
- if (this.env[chan] === undefined)
73
- this.env[chan] = 1e-12
74
- let env = this.env[chan]
75
-
76
- /* calculate attack/release alpha values */
77
- const alphaA = Math.exp(-1 / (attack * this.sampleRate))
78
- const alphaR = Math.exp(-1 / (release * this.sampleRate))
79
-
80
- /* iterate over all samples and calculate RMS */
81
- for (const s of samples) {
82
- const x = Math.abs(s)
83
- const det = x * x
84
- if (det > env)
85
- env = alphaA * env + (1 - alphaA) * det
86
- else
87
- env = alphaR * env + (1 - alphaR) * det
88
- }
89
- this.env[chan] = Math.sqrt(Math.max(env, 1e-12))
90
- }
91
-
92
64
  /* process a single sample frame */
93
65
  process(
94
66
  inputs: Float32Array[][],
@@ -126,7 +98,7 @@ class ExpanderProcessor extends AudioWorkletProcessor {
126
98
 
127
99
  /* update envelope per channel */
128
100
  for (let ch = 0; ch < nCh; ch++)
129
- this.updateEnvelopeForChannel(ch, input[ch], attackS, releaseS)
101
+ this.env[ch] = util.updateEnvelopeForChannel(this.env, this.sampleRate, ch, input[ch], attackS, releaseS)
130
102
 
131
103
  /* determine linear value from decibel makeup value */
132
104
  const makeUpLin = util.dB2lin(makeupDB)
@@ -93,7 +93,7 @@ export default class SpeechFlowNodeA2AFFMPEG extends SpeechFlowNode {
93
93
  util.run("starting FFmpeg process", () => this.ffmpeg!.run())
94
94
 
95
95
  /* establish a duplex stream and connect it to FFmpeg */
96
- this.stream = Stream.Duplex.from({
96
+ const ffmpegStream = Stream.Duplex.from({
97
97
  writable: streamInput,
98
98
  readable: streamOutput
99
99
  })
@@ -101,7 +101,7 @@ export default class SpeechFlowNodeA2AFFMPEG extends SpeechFlowNode {
101
101
  /* wrap streams with conversions for chunk vs plain audio */
102
102
  const wrapper1 = util.createTransformStreamForWritableSide()
103
103
  const wrapper2 = util.createTransformStreamForReadableSide("audio", () => this.timeZero)
104
- this.stream = Stream.compose(wrapper1, this.stream, wrapper2)
104
+ this.stream = Stream.compose(wrapper1, ffmpegStream, wrapper2)
105
105
  }
106
106
 
107
107
  /* close node */
@@ -5,12 +5,13 @@
5
5
  */
6
6
 
7
7
  /* standard dependencies */
8
- import path from "node:path"
9
- import Stream from "node:stream"
8
+ import path from "node:path"
9
+ import Stream from "node:stream"
10
10
 
11
11
  /* external dependencies */
12
- import * as Transformers from "@huggingface/transformers"
13
- import { WaveFile } from "wavefile"
12
+ import * as Transformers from "@huggingface/transformers"
13
+ import { WaveFile } from "wavefile"
14
+ import { getRMS, AudioData } from "audio-inspect"
14
15
 
15
16
  /* internal dependencies */
16
17
  import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
@@ -47,7 +48,10 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
47
48
 
48
49
  /* declare node configuration parameters */
49
50
  this.configure({
50
- window: { type: "number", pos: 0, val: 500 }
51
+ window: { type: "number", pos: 0, val: 500 },
52
+ threshold: { type: "number", pos: 1, val: 0.50 },
53
+ hysteresis: { type: "number", pos: 2, val: 0.25 },
54
+ volumeThreshold: { type: "number", pos: 3, val: -45 }
51
55
  })
52
56
 
53
57
  /* declare node input/output format */
@@ -64,9 +68,6 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
64
68
  /* clear shutdown flag */
65
69
  this.shutdown = false
66
70
 
67
- /* pass-through logging */
68
- const log = this.log.bind(this)
69
-
70
71
  /* the used model */
71
72
  const model = "Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech"
72
73
 
@@ -102,24 +103,17 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
102
103
  device: "auto",
103
104
  progress_callback: progressCallback
104
105
  })
105
- let timeoutId: ReturnType<typeof setTimeout> | null = null
106
- const timeoutPromise = new Promise((resolve, reject) => {
107
- timeoutId = setTimeout(() =>
108
- reject(new Error("model initialization timeout")), 30 * 1000)
109
- })
110
106
  this.classifier = await Promise.race([
111
- pipelinePromise, timeoutPromise
112
- ]).finally(() => {
113
- if (timeoutId !== null)
114
- clearTimeout(timeoutId)
115
- }) as Transformers.AudioClassificationPipeline
107
+ pipelinePromise,
108
+ util.timeoutPromise(30 * 1000, "model initialization timeout")
109
+ ]) as Transformers.AudioClassificationPipeline
116
110
  }
117
111
  catch (error) {
118
112
  if (this.progressInterval) {
119
113
  clearInterval(this.progressInterval)
120
114
  this.progressInterval = null
121
115
  }
122
- throw new Error(`failed to initialize classifier pipeline: ${error}`)
116
+ throw new Error(`failed to initialize classifier pipeline: ${error}`, { cause: error })
123
117
  }
124
118
  if (this.progressInterval) {
125
119
  clearInterval(this.progressInterval)
@@ -128,38 +122,49 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
128
122
  if (this.classifier === null)
129
123
  throw new Error("failed to instantiate classifier pipeline")
130
124
 
125
+ /* define sample rate required by model */
126
+ const sampleRateTarget = 16000
127
+
131
128
  /* classify a single large-enough concatenated audio frame */
132
129
  const classify = async (data: Float32Array) => {
133
130
  if (this.shutdown || this.classifier === null)
134
131
  throw new Error("classifier shutdown during operation")
135
- const classifyPromise = this.classifier(data)
136
- let timeoutId: ReturnType<typeof setTimeout> | null = null
137
- const timeoutPromise = new Promise((resolve, reject) => {
138
- timeoutId = setTimeout(() =>
139
- reject(new Error("classification timeout")), 30 * 1000)
140
- })
141
- const result = await Promise.race([ classifyPromise, timeoutPromise ]).finally(() => {
142
- if (timeoutId !== null)
143
- clearTimeout(timeoutId)
144
- }) as Transformers.AudioClassificationOutput | Transformers.AudioClassificationOutput[]
132
+
133
+ /* check volume level and return "unknown" if too low
134
+ in order to avoid a wrong classificaton */
135
+ const audioData = {
136
+ sampleRate: sampleRateTarget,
137
+ numberOfChannels: 1,
138
+ channelData: [ data ],
139
+ duration: data.length / sampleRateTarget,
140
+ length: data.length
141
+ } satisfies AudioData
142
+ const rms = getRMS(audioData, { asDB: true })
143
+ if (rms < this.params.volumeThreshold)
144
+ return "unknown"
145
+
146
+ /* classify audio */
147
+ const result = await Promise.race([
148
+ this.classifier(data),
149
+ util.timeoutPromise(30 * 1000, "classification timeout")
150
+ ]) as Transformers.AudioClassificationOutput | Transformers.AudioClassificationOutput[]
145
151
  const classified = Array.isArray(result) ?
146
152
  result as Transformers.AudioClassificationOutput :
147
153
  [ result ]
148
- const c1 = classified.find((c: any) => c.label === "male")
149
- const c2 = classified.find((c: any) => c.label === "female")
154
+ const c1 = classified.find((c) => c.label === "male")
155
+ const c2 = classified.find((c) => c.label === "female")
150
156
  const male = c1 ? c1.score : 0.0
151
157
  const female = c2 ? c2.score : 0.0
152
- if (male > 0.50 && male > female + 0.25)
158
+ const threshold = this.params.threshold
159
+ const hysteresis = this.params.hysteresis
160
+ if (male > threshold && male > female + hysteresis)
153
161
  return "male"
154
- else if (female > 0.50 && female > male + 0.25)
162
+ else if (female > threshold && female > male + hysteresis)
155
163
  return "female"
156
164
  else
157
165
  return "unknown"
158
166
  }
159
167
 
160
- /* define sample rate required by model */
161
- const sampleRateTarget = 16000
162
-
163
168
  /* work off queued audio frames */
164
169
  const frameWindowDuration = this.params.window / 1000
165
170
  const frameWindowSamples = Math.floor(frameWindowDuration * sampleRateTarget)
@@ -209,13 +214,13 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
209
214
  pos0++
210
215
  }
211
216
  if (lastGender !== gender && !this.shutdown) {
212
- log("info", `gender now recognized as <${gender}>`)
217
+ this.log("info", `gender now recognized as <${gender}>`)
213
218
  lastGender = gender
214
219
  }
215
220
  }
216
221
  }
217
222
  catch (error) {
218
- log("error", `gender classification error: ${error}`)
223
+ this.log("error", `gender classification error: ${error}`)
219
224
  }
220
225
 
221
226
  /* re-initiate working off round */
@@ -307,7 +312,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
307
312
  && element.gender === undefined)
308
313
  break
309
314
  const duration = util.audioArrayDuration(element.data)
310
- log("debug", `send chunk (${duration.toFixed(3)}s) with gender <${element.gender}>`)
315
+ self.log("debug", `send chunk (${duration.toFixed(3)}s) with gender <${element.gender}>`)
311
316
  element.chunk.meta.set("gender", element.gender)
312
317
  this.push(element.chunk)
313
318
  self.queueSend.walk(+1)
@@ -22,7 +22,7 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
22
22
  /* internal state */
23
23
  private emitInterval: ReturnType<typeof setInterval> | null = null
24
24
  private calcInterval: ReturnType<typeof setInterval> | null = null
25
- private silenceTimer: ReturnType<typeof setTimeout> | null = null
25
+ private silenceTimer: ReturnType<typeof setTimeout> | null = null
26
26
  private chunkBuffer = new Float32Array(0)
27
27
  private destroyed = false
28
28
 
@@ -32,7 +32,7 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
32
32
 
33
33
  /* declare node configuration parameters */
34
34
  this.configure({
35
- interval: { type: "number", pos: 0, val: 250 },
35
+ interval: { type: "number", pos: 0, val: 100 },
36
36
  mode: { type: "string", pos: 1, val: "filter", match: /^(?:filter|sink)$/ },
37
37
  dashboard: { type: "string", val: "" }
38
38
  })
@@ -55,71 +55,91 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
55
55
  this.destroyed = false
56
56
 
57
57
  /* internal state */
58
- const sampleWindowDuration = 3 /* LUFS-S requires 3s */
58
+ let lufsm = -60
59
+ let rms = -60
60
+
61
+ /* chunk processing state for LUFS-M */
62
+ const sampleWindowDuration = 0.4 /* LUFS-M requires 400ms */
59
63
  const sampleWindowSize = Math.floor(this.config.audioSampleRate * sampleWindowDuration)
60
64
  const sampleWindow = new Float32Array(sampleWindowSize)
61
65
  sampleWindow.fill(0, 0, sampleWindowSize)
62
- let lufss = -60
63
- let rms = -60
64
66
 
65
- /* chunk processing state */
67
+ /* chunk processing state for RMS */
66
68
  const chunkDuration = 0.050 /* meter update frequency is about 50ms */
67
69
  const samplesPerChunk = Math.floor(this.config.audioSampleRate * chunkDuration)
68
70
  this.chunkBuffer = new Float32Array(0)
69
71
 
70
- /* define chunk processing function */
71
- const processChunk = (chunkData: Float32Array) => {
72
- /* update internal audio sample sliding window */
73
- sampleWindow.set(sampleWindow.subarray(chunkData.length), 0)
74
- sampleWindow.set(chunkData, sampleWindowSize - chunkData.length)
72
+ /* setup chunking interval */
73
+ this.calcInterval = setInterval(() => {
74
+ /* short-circuit during destruction */
75
+ if (this.destroyed)
76
+ return
75
77
 
76
- /* calculate the LUFS-S and RMS metric */
77
- const audioData = {
78
+ /* short-circuit if still not enough chunk data */
79
+ if (this.chunkBuffer.length < samplesPerChunk)
80
+ return
81
+
82
+ /* grab the accumulated chunk data */
83
+ const chunkData = this.chunkBuffer
84
+ this.chunkBuffer = new Float32Array(0)
85
+
86
+ /* update internal audio sample sliding window for LUFS-S */
87
+ if (chunkData.length > sampleWindow.length)
88
+ sampleWindow.set(chunkData.subarray(chunkData.length - sampleWindow.length), 0)
89
+ else {
90
+ sampleWindow.set(sampleWindow.subarray(chunkData.length), 0)
91
+ sampleWindow.set(chunkData, sampleWindow.length - chunkData.length)
92
+ }
93
+
94
+ /* calculate the LUFS-M metric */
95
+ const audioDataLUFS = {
78
96
  sampleRate: this.config.audioSampleRate,
79
97
  numberOfChannels: this.config.audioChannels,
80
98
  channelData: [ sampleWindow ],
81
99
  duration: sampleWindowDuration,
82
100
  length: sampleWindow.length
83
101
  } satisfies AudioData
84
- const lufs = getLUFS(audioData, {
102
+ const lufs = getLUFS(audioDataLUFS, {
85
103
  channelMode: this.config.audioChannels === 1 ? "mono" : "stereo",
86
- calculateShortTerm: true,
87
- calculateMomentary: false,
104
+ calculateShortTerm: false,
105
+ calculateMomentary: true,
88
106
  calculateLoudnessRange: false,
89
107
  calculateTruePeak: false
90
108
  })
91
- lufss = lufs.shortTerm ? lufs.shortTerm[0] : -60
92
- rms = getRMS(audioData, { asDB: true })
109
+ lufsm = lufs.momentary ? Math.max(-60, lufs.momentary[0]) : -60
110
+
111
+ /* calculate the RMS metric */
112
+ const totalSamples = chunkData.length / this.config.audioChannels
113
+ const duration = totalSamples / this.config.audioSampleRate
114
+ const audioDataRMS = {
115
+ sampleRate: this.config.audioSampleRate,
116
+ numberOfChannels: this.config.audioChannels,
117
+ channelData: [ chunkData ],
118
+ duration,
119
+ length: chunkData.length
120
+ } satisfies AudioData
121
+ rms = Math.max(-60, getRMS(audioDataRMS, {
122
+ asDB: true
123
+ }))
124
+
125
+ /* automatically clear measurement (in case no new measurements happen) */
93
126
  if (this.silenceTimer !== null)
94
127
  clearTimeout(this.silenceTimer)
95
128
  this.silenceTimer = setTimeout(() => {
96
- lufss = -60
129
+ lufsm = -60
97
130
  rms = -60
98
131
  }, 500)
99
- }
100
-
101
- /* setup chunking interval */
102
- this.calcInterval = setInterval(() => {
103
- if (this.destroyed)
104
- return
105
-
106
- /* process one single 50ms chunk if available */
107
- if (this.chunkBuffer.length >= samplesPerChunk) {
108
- const chunkData = this.chunkBuffer.slice(0, samplesPerChunk)
109
- this.chunkBuffer = this.chunkBuffer.slice(samplesPerChunk)
110
- processChunk(chunkData)
111
- }
112
132
  }, chunkDuration * 1000)
113
133
 
114
134
  /* setup loudness emitting interval */
115
135
  this.emitInterval = setInterval(() => {
116
136
  if (this.destroyed)
117
137
  return
118
- this.log("debug", `LUFS-S: ${lufss.toFixed(1)} dB, RMS: ${rms.toFixed(1)} dB`)
119
- this.sendResponse([ "meter", "LUFS-S", lufss ])
138
+ this.log("debug", `LUFS-M: ${lufsm.toFixed(1)} dB, RMS: ${rms.toFixed(1)} dB`)
139
+ this.sendResponse([ "meter", "LUFS-M", lufsm ])
120
140
  this.sendResponse([ "meter", "RMS", rms ])
121
141
  if (this.params.dashboard !== "")
122
- this.sendDashboard("audio", this.params.dashboard, "final", lufss)
142
+ this.sendDashboard("audio", this.params.dashboard, "final", lufsm)
123
143
  }, this.params.interval)
124
144
 
125
145
  /* provide Duplex stream and internally attach to meter */
@@ -175,6 +195,9 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
175
195
 
176
196
  /* close node */
177
197
  async close () {
198
+ /* indicate destruction immediately to stop any ongoing operations */
199
+ this.destroyed = true
200
+
178
201
  /* stop intervals */
179
202
  if (this.emitInterval !== null) {
180
203
  clearInterval(this.emitInterval)
@@ -194,8 +217,5 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
194
217
  this.stream.destroy()
195
218
  this.stream = null
196
219
  }
197
-
198
- /* indicate destruction */
199
- this.destroyed = true
200
220
  }
201
221
  }
@@ -44,6 +44,7 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
44
44
  this.worker = new Worker(resolve(__dirname, "speechflow-node-a2a-rnnoise-wt.js"))
45
45
  this.worker.on("error", (err) => {
46
46
  this.log("error", `RNNoise worker thread error: ${err}`)
47
+ this.stream?.emit("error", err)
47
48
  })
48
49
  this.worker.on("exit", (code) => {
49
50
  if (code !== 0)
@@ -158,14 +158,14 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
158
158
  }
159
159
  }
160
160
  catch (error) {
161
- this.log("error", `VAD frame processing error: ${error}`)
161
+ this.log("error", `VAD frame processing error: ${error}`, { cause: error })
162
162
  }
163
163
  }
164
164
  })
165
165
  this.vad.start()
166
166
  }
167
167
  catch (error) {
168
- throw new Error(`failed to initialize VAD: ${error}`)
168
+ throw new Error(`failed to initialize VAD: ${error}`, { cause: error })
169
169
  }
170
170
 
171
171
  /* provide Duplex stream and internally attach to VAD */
@@ -23,7 +23,6 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
23
23
  public static name = "a2t-openai"
24
24
 
25
25
  /* internal state */
26
- private static speexInitialized = false
27
26
  private openai: OpenAI | null = null
28
27
  private ws: ws.WebSocket | null = null
29
28
  private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
@@ -71,11 +70,6 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
71
70
 
72
71
  /* establish resampler from our standard audio sample rate (48Khz)
73
72
  to OpenAI's maximum 24Khz input sample rate */
74
- if (!SpeechFlowNodeA2TOpenAI.speexInitialized) {
75
- /* at least once initialize resampler */
76
- await SpeexResampler.initPromise
77
- SpeechFlowNodeA2TOpenAI.speexInitialized = true
78
- }
79
73
  this.resampler = new SpeexResampler(1, this.config.audioSampleRate, 24000, 7)
80
74
 
81
75
  /* instantiate OpenAI API */
@@ -26,7 +26,6 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
26
26
 
27
27
  /* internal state */
28
28
  private client: PollyClient | null = null
29
- private static speexInitialized = false
30
29
  private destroyed = false
31
30
  private resampler: SpeexResampler | null = null
32
31
 
@@ -114,11 +113,6 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
114
113
 
115
114
  /* establish resampler from AWS Polly's maximum 16Khz output
116
115
  (for PCM output) to our standard audio sample rate (48KHz) */
117
- if (!SpeechFlowNodeT2AAmazon.speexInitialized) {
118
- /* at least once initialize resampler */
119
- await SpeexResampler.initPromise
120
- SpeechFlowNodeT2AAmazon.speexInitialized = true
121
- }
122
116
  this.resampler = new SpeexResampler(1, 16000, this.config.audioSampleRate, 7)
123
117
 
124
118
  /* create transform stream and connect it to the AWS Polly API */
@@ -22,7 +22,6 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
22
22
 
23
23
  /* internal state */
24
24
  private elevenlabs: ElevenLabs.ElevenLabsClient | null = null
25
- private static speexInitialized = false
26
25
  private destroyed = false
27
26
  private resampler: SpeexResampler | null = null
28
27
 
@@ -131,11 +130,6 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
131
130
 
132
131
  /* establish resampler from ElevenLabs's maximum 24Khz
133
132
  output to our standard audio sample rate (48KHz) */
134
- if (!SpeechFlowNodeT2AElevenlabs.speexInitialized) {
135
- /* at least once initialize resampler */
136
- await SpeexResampler.initPromise
137
- SpeechFlowNodeT2AElevenlabs.speexInitialized = true
138
- }
139
133
  this.resampler = new SpeexResampler(1, maxSampleRate, this.config.audioSampleRate, 7)
140
134
 
141
135
  /* create transform stream and connect it to the ElevenLabs API */
@@ -23,7 +23,6 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
23
23
  /* internal state */
24
24
  private kokoro: KokoroTTS | null = null
25
25
  private resampler: SpeexResampler | null = null
26
- private static speexInitialized = false
27
26
 
28
27
  /* construct node */
29
28
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -82,11 +81,6 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
82
81
 
83
82
  /* establish resampler from Kokoro's maximum 24Khz
84
83
  output to our standard audio sample rate (48KHz) */
85
- if (!SpeechFlowNodeT2AKokoro.speexInitialized) {
86
- /* at least once initialize resampler */
87
- SpeechFlowNodeT2AKokoro.speexInitialized = true
88
- await SpeexResampler.initPromise
89
- }
90
84
  this.resampler = new SpeexResampler(1, 24000, this.config.audioSampleRate, 7)
91
85
 
92
86
  /* determine voice for text-to-speech operation */
@@ -177,7 +177,7 @@ export default class SpeechFlowNodeT2TOllama extends SpeechFlowNode {
177
177
  models = await this.ollama.list()
178
178
  }
179
179
  catch (err) {
180
- throw new Error(`failed to connect to Ollama API at ${this.params.api}: ${err}`)
180
+ throw new Error(`failed to connect to Ollama API at ${this.params.api}: ${err}`, { cause: err })
181
181
  }
182
182
  const exists = models.models.some((m) => m.name === this.params.model)
183
183
  if (!exists) {
@@ -20,13 +20,10 @@ import HAPIWebSocket from "hapi-plugin-websocket"
20
20
  import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
21
21
  import * as util from "./speechflow-util"
22
22
 
23
- type wsPeerCtx = {
24
- peer: string
25
- }
26
- type wsPeerInfo = {
27
- ctx: wsPeerCtx
28
- ws: WebSocket
29
- req: http.IncomingMessage
23
+ type WSPeerInfo = {
24
+ ctx: Record<string, any>
25
+ ws: WebSocket
26
+ req: http.IncomingMessage
30
27
  }
31
28
 
32
29
  /* SpeechFlow node for subtitle (text-to-text) "translations" */
@@ -160,7 +157,7 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
160
157
  }
161
158
  else if (this.params.mode === "render") {
162
159
  /* establish REST/WebSocket API */
163
- const wsPeers = new Map<string, wsPeerInfo>()
160
+ const wsPeers = new Map<string, WSPeerInfo>()
164
161
  this.hapi = new HAPI.Server({
165
162
  address: this.params.addr,
166
163
  port: this.params.port
@@ -205,19 +202,18 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
205
202
  plugins: {
206
203
  websocket: {
207
204
  autoping: 30 * 1000,
208
- connect: (args: any) => {
209
- const ctx: wsPeerCtx = args.ctx
210
- const ws: WebSocket = args.ws
211
- const req: http.IncomingMessage = args.req
205
+ connect: ({ ctx, ws, req }) => {
212
206
  const peer = `${req.socket.remoteAddress}:${req.socket.remotePort}`
213
207
  ctx.peer = peer
214
208
  wsPeers.set(peer, { ctx, ws, req })
215
209
  this.log("info", `HAPI: WebSocket: connect: peer ${peer}`)
216
210
  },
217
- disconnect: (args: any) => {
218
- const ctx: wsPeerCtx = args.ctx
211
+ disconnect: ({ ctx, ws }) => {
219
212
  const peer = ctx.peer
220
213
  wsPeers.delete(peer)
214
+ ws.removeAllListeners()
215
+ if (ws.readyState === WebSocket.OPEN)
216
+ ws.close()
221
217
  this.log("info", `HAPI: WebSocket: disconnect: peer ${peer}`)
222
218
  }
223
219
  }
@@ -115,7 +115,7 @@ export default class SpeechFlowNodeXIODevice extends SpeechFlowNode {
115
115
 
116
116
  /* convert regular stream into object-mode stream */
117
117
  const wrapper1 = util.createTransformStreamForWritableSide()
118
- const wrapper2 = util.createTransformStreamForReadableSide("audio", () => this.timeZero)
118
+ const wrapper2 = util.createTransformStreamForReadableSide("audio", () => this.timeZero, highwaterMark)
119
119
  this.stream = Stream.compose(wrapper1, this.stream, wrapper2)
120
120
  }
121
121
 
@@ -136,7 +136,7 @@ export default class SpeechFlowNodeXIODevice extends SpeechFlowNode {
136
136
  this.stream = this.io as unknown as Stream.Readable
137
137
 
138
138
  /* convert regular stream into object-mode stream */
139
- const wrapper = util.createTransformStreamForReadableSide("audio", () => this.timeZero)
139
+ const wrapper = util.createTransformStreamForReadableSide("audio", () => this.timeZero, highwaterMark)
140
140
  this.stream = Stream.compose(this.stream, wrapper)
141
141
  }
142
142
 
@@ -193,6 +193,7 @@ export default class SpeechFlowNodeXIODevice extends SpeechFlowNode {
193
193
  /* pass-through PortAudio errors */
194
194
  this.io!.on("error", (err) => {
195
195
  this.emit("error", err)
196
+ this.stream?.emit("error", err)
196
197
  })
197
198
 
198
199
  /* start PortAudio */
@@ -236,7 +236,7 @@ export default class SpeechFlowNodeXIOWebSocket extends SpeechFlowNode {
236
236
 
237
237
  /* close Websocket client */
238
238
  if (this.client !== null) {
239
- this.client!.close()
239
+ this.client.close()
240
240
  this.client = null
241
241
  }
242
242
 
@@ -132,6 +132,36 @@ export async function processInt16ArrayInSegments (
132
132
  return data
133
133
  }
134
134
 
135
+ /* update envelope (smoothed amplitude contour) for single channel */
136
+ export function updateEnvelopeForChannel(
137
+ env: number[],
138
+ sampleRate: number,
139
+ chan: number,
140
+ samples: Float32Array,
141
+ attack: number,
142
+ release: number
143
+ ): number {
144
+ /* fetch old envelope value */
145
+ if (env[chan] === undefined)
146
+ env[chan] = 1e-12
147
+ let currentEnv = env[chan]
148
+
149
+ /* calculate attack/release alpha values */
150
+ const alphaA = Math.exp(-1 / (attack * sampleRate))
151
+ const alphaR = Math.exp(-1 / (release * sampleRate))
152
+
153
+ /* iterate over all samples and calculate RMS */
154
+ for (const s of samples) {
155
+ const x = Math.abs(s)
156
+ const det = x * x
157
+ if (det > currentEnv)
158
+ currentEnv = alphaA * currentEnv + (1 - alphaA) * det
159
+ else
160
+ currentEnv = alphaR * currentEnv + (1 - alphaR) * det
161
+ }
162
+ return Math.sqrt(Math.max(currentEnv, 1e-12))
163
+ }
164
+
135
165
  /* helper functions for linear/decibel conversions */
136
166
  export function lin2dB (x: number): number {
137
167
  return 20 * Math.log10(Math.max(x, 1e-12))
@@ -5,8 +5,8 @@
5
5
  */
6
6
 
7
7
  /* helper function for promise-based timeout */
8
- export function timeoutPromise (duration: number = 10 * 1000, info = "timeout") {
9
- return new Promise<void>((resolve, reject) => {
8
+ export function timeoutPromise<T = void> (duration: number = 10 * 1000, info = "timeout") {
9
+ return new Promise<T>((resolve, reject) => {
10
10
  setTimeout(() => { reject(new Error(info)) }, duration)
11
11
  })
12
12
  }
@@ -21,7 +21,13 @@ export function ensureError (error: unknown, prefix?: string, debug = false): Er
21
21
  msg = `${prefix}: ${msg}`
22
22
  if (debug && error instanceof Error)
23
23
  msg = `${msg}\n${error.stack}`
24
- return new Error(msg, { cause: error })
24
+ if (error instanceof Error) {
25
+ const err = new Error(msg, { cause: error })
26
+ err.stack = error.stack
27
+ return err
28
+ }
29
+ else
30
+ return new Error(msg)
25
31
  }
26
32
 
27
33
  /* helper function for retrieving a Promise object */