speechflow 1.6.3 → 1.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/.claude/settings.local.json +3 -0
  2. package/CHANGELOG.md +20 -0
  3. package/README.md +87 -48
  4. package/etc/speechflow.yaml +21 -14
  5. package/package.json +5 -5
  6. package/speechflow-cli/dst/speechflow-main-api.js +3 -7
  7. package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
  8. package/speechflow-cli/dst/speechflow-main-graph.js +1 -1
  9. package/speechflow-cli/dst/speechflow-main.js +6 -0
  10. package/speechflow-cli/dst/speechflow-main.js.map +1 -1
  11. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +1 -21
  12. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -1
  13. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +2 -1
  14. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
  15. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +1 -21
  16. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -1
  17. package/speechflow-cli/dst/speechflow-node-a2a-expander.js +2 -1
  18. package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -1
  19. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +2 -2
  20. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
  21. package/speechflow-cli/dst/speechflow-node-a2a-gender.js +38 -42
  22. package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
  23. package/speechflow-cli/dst/speechflow-node-a2a-meter.js +59 -40
  24. package/speechflow-cli/dst/speechflow-node-a2a-meter.js.map +1 -1
  25. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +1 -0
  26. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
  27. package/speechflow-cli/dst/speechflow-node-a2a-vad.js +2 -2
  28. package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
  29. package/speechflow-cli/dst/speechflow-node-a2t-openai.d.ts +0 -1
  30. package/speechflow-cli/dst/speechflow-node-a2t-openai.js +0 -6
  31. package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
  32. package/speechflow-cli/dst/speechflow-node-t2a-amazon.d.ts +0 -1
  33. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +0 -6
  34. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -1
  35. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.d.ts +0 -1
  36. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +0 -6
  37. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
  38. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.d.ts +0 -1
  39. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +0 -6
  40. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
  41. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +1 -1
  42. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +1 -1
  43. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +6 -6
  44. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  45. package/speechflow-cli/dst/speechflow-node-x2x-trace.d.ts +1 -0
  46. package/speechflow-cli/dst/speechflow-node-x2x-trace.js +22 -2
  47. package/speechflow-cli/dst/speechflow-node-x2x-trace.js.map +1 -1
  48. package/speechflow-cli/dst/speechflow-node-xio-device.js +3 -2
  49. package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
  50. package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
  51. package/speechflow-cli/dst/speechflow-util-audio.d.ts +1 -0
  52. package/speechflow-cli/dst/speechflow-util-audio.js +21 -0
  53. package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
  54. package/speechflow-cli/dst/speechflow-util-error.d.ts +1 -1
  55. package/speechflow-cli/dst/speechflow-util-error.js +7 -1
  56. package/speechflow-cli/dst/speechflow-util-error.js.map +1 -1
  57. package/speechflow-cli/dst/speechflow-util-stream.d.ts +1 -1
  58. package/speechflow-cli/dst/speechflow-util-stream.js +2 -2
  59. package/speechflow-cli/dst/speechflow-util-stream.js.map +1 -1
  60. package/speechflow-cli/etc/oxlint.jsonc +2 -1
  61. package/speechflow-cli/etc/stx.conf +6 -10
  62. package/speechflow-cli/package.json +19 -19
  63. package/speechflow-cli/src/speechflow-main-api.ts +6 -13
  64. package/speechflow-cli/src/speechflow-main-graph.ts +1 -1
  65. package/speechflow-cli/src/speechflow-main.ts +4 -0
  66. package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +1 -29
  67. package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +2 -1
  68. package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +1 -29
  69. package/speechflow-cli/src/speechflow-node-a2a-expander.ts +2 -1
  70. package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +2 -2
  71. package/speechflow-cli/src/speechflow-node-a2a-gender.ts +44 -39
  72. package/speechflow-cli/src/speechflow-node-a2a-meter.ts +67 -44
  73. package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +1 -0
  74. package/speechflow-cli/src/speechflow-node-a2a-vad.ts +2 -2
  75. package/speechflow-cli/src/speechflow-node-a2t-openai.ts +0 -6
  76. package/speechflow-cli/src/speechflow-node-t2a-amazon.ts +0 -6
  77. package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +0 -6
  78. package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +0 -6
  79. package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +1 -1
  80. package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +10 -14
  81. package/speechflow-cli/src/speechflow-node-x2x-trace.ts +25 -2
  82. package/speechflow-cli/src/speechflow-node-xio-device.ts +3 -2
  83. package/speechflow-cli/src/speechflow-node-xio-websocket.ts +1 -1
  84. package/speechflow-cli/src/speechflow-util-audio.ts +30 -0
  85. package/speechflow-cli/src/speechflow-util-error.ts +9 -3
  86. package/speechflow-cli/src/speechflow-util-stream.ts +2 -2
  87. package/speechflow-ui-db/dst/index.js +24 -33
  88. package/speechflow-ui-db/package.json +14 -12
  89. package/speechflow-ui-db/src/app.vue +30 -7
  90. package/speechflow-ui-st/.claude/settings.local.json +3 -0
  91. package/speechflow-ui-st/dst/app-font-TypoPRO-SourceSansPro-BoldIt.eot +0 -0
  92. package/speechflow-ui-st/dst/app-font-TypoPRO-SourceSansPro-BoldIt.ttf +0 -0
  93. package/speechflow-ui-st/dst/app-font-TypoPRO-SourceSansPro-BoldIt.woff +0 -0
  94. package/speechflow-ui-st/dst/app-font-TypoPRO-SourceSansPro-RegularIt.eot +0 -0
  95. package/speechflow-ui-st/dst/app-font-TypoPRO-SourceSansPro-RegularIt.ttf +0 -0
  96. package/speechflow-ui-st/dst/app-font-TypoPRO-SourceSansPro-RegularIt.woff +0 -0
  97. package/speechflow-ui-st/dst/app-font-TypoPRO-SourceSansPro-Semibold.eot +0 -0
  98. package/speechflow-ui-st/dst/app-font-TypoPRO-SourceSansPro-Semibold.ttf +0 -0
  99. package/speechflow-ui-st/dst/app-font-TypoPRO-SourceSansPro-Semibold.woff +0 -0
  100. package/speechflow-ui-st/dst/app-font-TypoPRO-SourceSansPro-SemiboldIt.eot +0 -0
  101. package/speechflow-ui-st/dst/app-font-TypoPRO-SourceSansPro-SemiboldIt.ttf +0 -0
  102. package/speechflow-ui-st/dst/app-font-TypoPRO-SourceSansPro-SemiboldIt.woff +0 -0
  103. package/speechflow-ui-st/dst/index.css +2 -2
  104. package/speechflow-ui-st/dst/index.js +461 -28
  105. package/speechflow-ui-st/package.json +14 -13
  106. package/speechflow-ui-st/src/app.vue +150 -51
  107. package/speechflow-ui-st/src/index.ts +4 -0
  108. package/speechflow-cli/dst/speechflow-util-webaudio-wt.d.ts +0 -1
  109. package/speechflow-cli/dst/speechflow-util-webaudio-wt.js +0 -124
  110. package/speechflow-cli/dst/speechflow-util-webaudio-wt.js.map +0 -1
  111. package/speechflow-cli/dst/speechflow-util-webaudio.d.ts +0 -13
  112. package/speechflow-cli/dst/speechflow-util-webaudio.js +0 -137
  113. package/speechflow-cli/dst/speechflow-util-webaudio.js.map +0 -1
@@ -5,12 +5,13 @@
5
5
  */
6
6
 
7
7
  /* standard dependencies */
8
- import path from "node:path"
9
- import Stream from "node:stream"
8
+ import path from "node:path"
9
+ import Stream from "node:stream"
10
10
 
11
11
  /* external dependencies */
12
- import * as Transformers from "@huggingface/transformers"
13
- import { WaveFile } from "wavefile"
12
+ import * as Transformers from "@huggingface/transformers"
13
+ import { WaveFile } from "wavefile"
14
+ import { getRMS, AudioData } from "audio-inspect"
14
15
 
15
16
  /* internal dependencies */
16
17
  import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
@@ -47,7 +48,10 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
47
48
 
48
49
  /* declare node configuration parameters */
49
50
  this.configure({
50
- window: { type: "number", pos: 0, val: 500 }
51
+ window: { type: "number", pos: 0, val: 500 },
52
+ threshold: { type: "number", pos: 1, val: 0.50 },
53
+ hysteresis: { type: "number", pos: 2, val: 0.25 },
54
+ volumeThreshold: { type: "number", pos: 3, val: -45 }
51
55
  })
52
56
 
53
57
  /* declare node input/output format */
@@ -64,9 +68,6 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
64
68
  /* clear shutdown flag */
65
69
  this.shutdown = false
66
70
 
67
- /* pass-through logging */
68
- const log = this.log.bind(this)
69
-
70
71
  /* the used model */
71
72
  const model = "Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech"
72
73
 
@@ -102,24 +103,17 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
102
103
  device: "auto",
103
104
  progress_callback: progressCallback
104
105
  })
105
- let timeoutId: ReturnType<typeof setTimeout> | null = null
106
- const timeoutPromise = new Promise((resolve, reject) => {
107
- timeoutId = setTimeout(() =>
108
- reject(new Error("model initialization timeout")), 30 * 1000)
109
- })
110
106
  this.classifier = await Promise.race([
111
- pipelinePromise, timeoutPromise
112
- ]).finally(() => {
113
- if (timeoutId !== null)
114
- clearTimeout(timeoutId)
115
- }) as Transformers.AudioClassificationPipeline
107
+ pipelinePromise,
108
+ util.timeoutPromise(30 * 1000, "model initialization timeout")
109
+ ]) as Transformers.AudioClassificationPipeline
116
110
  }
117
111
  catch (error) {
118
112
  if (this.progressInterval) {
119
113
  clearInterval(this.progressInterval)
120
114
  this.progressInterval = null
121
115
  }
122
- throw new Error(`failed to initialize classifier pipeline: ${error}`)
116
+ throw new Error(`failed to initialize classifier pipeline: ${error}`, { cause: error })
123
117
  }
124
118
  if (this.progressInterval) {
125
119
  clearInterval(this.progressInterval)
@@ -128,38 +122,49 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
128
122
  if (this.classifier === null)
129
123
  throw new Error("failed to instantiate classifier pipeline")
130
124
 
125
+ /* define sample rate required by model */
126
+ const sampleRateTarget = 16000
127
+
131
128
  /* classify a single large-enough concatenated audio frame */
132
129
  const classify = async (data: Float32Array) => {
133
130
  if (this.shutdown || this.classifier === null)
134
131
  throw new Error("classifier shutdown during operation")
135
- const classifyPromise = this.classifier(data)
136
- let timeoutId: ReturnType<typeof setTimeout> | null = null
137
- const timeoutPromise = new Promise((resolve, reject) => {
138
- timeoutId = setTimeout(() =>
139
- reject(new Error("classification timeout")), 30 * 1000)
140
- })
141
- const result = await Promise.race([ classifyPromise, timeoutPromise ]).finally(() => {
142
- if (timeoutId !== null)
143
- clearTimeout(timeoutId)
144
- }) as Transformers.AudioClassificationOutput | Transformers.AudioClassificationOutput[]
132
+
133
+ /* check volume level and return "unknown" if too low
134
+ in order to avoid a wrong classificaton */
135
+ const audioData = {
136
+ sampleRate: sampleRateTarget,
137
+ numberOfChannels: 1,
138
+ channelData: [ data ],
139
+ duration: data.length / sampleRateTarget,
140
+ length: data.length
141
+ } satisfies AudioData
142
+ const rms = getRMS(audioData, { asDB: true })
143
+ if (rms < this.params.volumeThreshold)
144
+ return "unknown"
145
+
146
+ /* classify audio */
147
+ const result = await Promise.race([
148
+ this.classifier(data),
149
+ util.timeoutPromise(30 * 1000, "classification timeout")
150
+ ]) as Transformers.AudioClassificationOutput | Transformers.AudioClassificationOutput[]
145
151
  const classified = Array.isArray(result) ?
146
152
  result as Transformers.AudioClassificationOutput :
147
153
  [ result ]
148
- const c1 = classified.find((c: any) => c.label === "male")
149
- const c2 = classified.find((c: any) => c.label === "female")
154
+ const c1 = classified.find((c) => c.label === "male")
155
+ const c2 = classified.find((c) => c.label === "female")
150
156
  const male = c1 ? c1.score : 0.0
151
157
  const female = c2 ? c2.score : 0.0
152
- if (male > 0.50 && male > female + 0.25)
158
+ const threshold = this.params.threshold
159
+ const hysteresis = this.params.hysteresis
160
+ if (male > threshold && male > female + hysteresis)
153
161
  return "male"
154
- else if (female > 0.50 && female > male + 0.25)
162
+ else if (female > threshold && female > male + hysteresis)
155
163
  return "female"
156
164
  else
157
165
  return "unknown"
158
166
  }
159
167
 
160
- /* define sample rate required by model */
161
- const sampleRateTarget = 16000
162
-
163
168
  /* work off queued audio frames */
164
169
  const frameWindowDuration = this.params.window / 1000
165
170
  const frameWindowSamples = Math.floor(frameWindowDuration * sampleRateTarget)
@@ -209,13 +214,13 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
209
214
  pos0++
210
215
  }
211
216
  if (lastGender !== gender && !this.shutdown) {
212
- log("info", `gender now recognized as <${gender}>`)
217
+ this.log("info", `gender now recognized as <${gender}>`)
213
218
  lastGender = gender
214
219
  }
215
220
  }
216
221
  }
217
222
  catch (error) {
218
- log("error", `gender classification error: ${error}`)
223
+ this.log("error", `gender classification error: ${error}`)
219
224
  }
220
225
 
221
226
  /* re-initiate working off round */
@@ -307,7 +312,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
307
312
  && element.gender === undefined)
308
313
  break
309
314
  const duration = util.audioArrayDuration(element.data)
310
- log("debug", `send chunk (${duration.toFixed(3)}s) with gender <${element.gender}>`)
315
+ self.log("debug", `send chunk (${duration.toFixed(3)}s) with gender <${element.gender}>`)
311
316
  element.chunk.meta.set("gender", element.gender)
312
317
  this.push(element.chunk)
313
318
  self.queueSend.walk(+1)
@@ -22,7 +22,7 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
22
22
  /* internal state */
23
23
  private emitInterval: ReturnType<typeof setInterval> | null = null
24
24
  private calcInterval: ReturnType<typeof setInterval> | null = null
25
- private silenceTimer: ReturnType<typeof setTimeout> | null = null
25
+ private silenceTimer: ReturnType<typeof setTimeout> | null = null
26
26
  private chunkBuffer = new Float32Array(0)
27
27
  private destroyed = false
28
28
 
@@ -32,13 +32,17 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
32
32
 
33
33
  /* declare node configuration parameters */
34
34
  this.configure({
35
- interval: { type: "number", pos: 0, val: 250 },
35
+ interval: { type: "number", pos: 0, val: 100 },
36
+ mode: { type: "string", pos: 1, val: "filter", match: /^(?:filter|sink)$/ },
36
37
  dashboard: { type: "string", val: "" }
37
38
  })
38
39
 
39
40
  /* declare node input/output format */
40
41
  this.input = "audio"
41
- this.output = "audio"
42
+ if (this.params.mode === "filter")
43
+ this.output = "audio"
44
+ else if (this.params.mode === "sink")
45
+ this.output = "none"
42
46
  }
43
47
 
44
48
  /* open node */
@@ -51,73 +55,91 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
51
55
  this.destroyed = false
52
56
 
53
57
  /* internal state */
54
- const sampleWindowDuration = 3 /* LUFS-S requires 3s */
58
+ let lufsm = -60
59
+ let rms = -60
60
+
61
+ /* chunk processing state for LUFS-M */
62
+ const sampleWindowDuration = 0.4 /* LUFS-M requires 400ms */
55
63
  const sampleWindowSize = Math.floor(this.config.audioSampleRate * sampleWindowDuration)
56
- let sampleWindow = new Float32Array(sampleWindowSize)
64
+ const sampleWindow = new Float32Array(sampleWindowSize)
57
65
  sampleWindow.fill(0, 0, sampleWindowSize)
58
- let lufss = -60
59
- let rms = -60
60
66
 
61
- /* chunk processing state */
67
+ /* chunk processing state for RMS */
62
68
  const chunkDuration = 0.050 /* meter update frequency is about 50ms */
63
69
  const samplesPerChunk = Math.floor(this.config.audioSampleRate * chunkDuration)
64
70
  this.chunkBuffer = new Float32Array(0)
65
71
 
66
- /* define chunk processing function */
67
- const processChunk = (chunkData: Float32Array) => {
68
- /* update internal audio sample sliding window */
69
- const newWindow = new Float32Array(sampleWindowSize)
70
- newWindow.set(sampleWindow.slice(chunkData.length), 0)
71
- newWindow.set(chunkData, sampleWindowSize - chunkData.length)
72
- sampleWindow = newWindow
72
+ /* setup chunking interval */
73
+ this.calcInterval = setInterval(() => {
74
+ /* short-circuit during destruction */
75
+ if (this.destroyed)
76
+ return
73
77
 
74
- /* calculate the LUFS-S and RMS metric */
75
- const audioData = {
78
+ /* short-circuit if still not enough chunk data */
79
+ if (this.chunkBuffer.length < samplesPerChunk)
80
+ return
81
+
82
+ /* grab the accumulated chunk data */
83
+ const chunkData = this.chunkBuffer
84
+ this.chunkBuffer = new Float32Array(0)
85
+
86
+ /* update internal audio sample sliding window for LUFS-S */
87
+ if (chunkData.length > sampleWindow.length)
88
+ sampleWindow.set(chunkData.subarray(chunkData.length - sampleWindow.length), 0)
89
+ else {
90
+ sampleWindow.set(sampleWindow.subarray(chunkData.length), 0)
91
+ sampleWindow.set(chunkData, sampleWindow.length - chunkData.length)
92
+ }
93
+
94
+ /* calculate the LUFS-M metric */
95
+ const audioDataLUFS = {
76
96
  sampleRate: this.config.audioSampleRate,
77
97
  numberOfChannels: this.config.audioChannels,
78
98
  channelData: [ sampleWindow ],
79
99
  duration: sampleWindowDuration,
80
100
  length: sampleWindow.length
81
101
  } satisfies AudioData
82
- const lufs = getLUFS(audioData, {
102
+ const lufs = getLUFS(audioDataLUFS, {
83
103
  channelMode: this.config.audioChannels === 1 ? "mono" : "stereo",
84
- calculateShortTerm: true,
85
- calculateMomentary: false,
104
+ calculateShortTerm: false,
105
+ calculateMomentary: true,
86
106
  calculateLoudnessRange: false,
87
107
  calculateTruePeak: false
88
108
  })
89
- lufss = lufs.shortTerm ? lufs.shortTerm[0] : -60
90
- rms = getRMS(audioData, { asDB: true })
109
+ lufsm = lufs.momentary ? Math.max(-60, lufs.momentary[0]) : -60
110
+
111
+ /* calculate the RMS metric */
112
+ const totalSamples = chunkData.length / this.config.audioChannels
113
+ const duration = totalSamples / this.config.audioSampleRate
114
+ const audioDataRMS = {
115
+ sampleRate: this.config.audioSampleRate,
116
+ numberOfChannels: this.config.audioChannels,
117
+ channelData: [ chunkData ],
118
+ duration,
119
+ length: chunkData.length
120
+ } satisfies AudioData
121
+ rms = Math.max(-60, getRMS(audioDataRMS, {
122
+ asDB: true
123
+ }))
124
+
125
+ /* automatically clear measurement (in case no new measurements happen) */
91
126
  if (this.silenceTimer !== null)
92
127
  clearTimeout(this.silenceTimer)
93
128
  this.silenceTimer = setTimeout(() => {
94
- lufss = -60
129
+ lufsm = -60
95
130
  rms = -60
96
131
  }, 500)
97
- }
98
-
99
- /* setup chunking interval */
100
- this.calcInterval = setInterval(() => {
101
- if (this.destroyed)
102
- return
103
-
104
- /* process one single 50ms chunk if available */
105
- if (this.chunkBuffer.length >= samplesPerChunk) {
106
- const chunkData = this.chunkBuffer.slice(0, samplesPerChunk)
107
- this.chunkBuffer = this.chunkBuffer.slice(samplesPerChunk)
108
- processChunk(chunkData)
109
- }
110
132
  }, chunkDuration * 1000)
111
133
 
112
134
  /* setup loudness emitting interval */
113
135
  this.emitInterval = setInterval(() => {
114
136
  if (this.destroyed)
115
137
  return
116
- this.log("debug", `LUFS-S: ${lufss.toFixed(1)} dB, RMS: ${rms.toFixed(1)} dB`)
117
- this.sendResponse([ "meter", "LUFS-S", lufss ])
138
+ this.log("debug", `LUFS-M: ${lufsm.toFixed(1)} dB, RMS: ${rms.toFixed(1)} dB`)
139
+ this.sendResponse([ "meter", "LUFS-M", lufsm ])
118
140
  this.sendResponse([ "meter", "RMS", rms ])
119
141
  if (this.params.dashboard !== "")
120
- this.sendDashboard("audio", this.params.dashboard, "final", lufss)
142
+ this.sendDashboard("audio", this.params.dashboard, "final", lufsm)
121
143
  }, this.params.interval)
122
144
 
123
145
  /* provide Duplex stream and internally attach to meter */
@@ -151,7 +173,8 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
151
173
  self.chunkBuffer = newBuffer
152
174
 
153
175
  /* pass-through original audio chunk */
154
- this.push(chunk)
176
+ if (self.params.mode === "filter")
177
+ this.push(chunk)
155
178
  callback()
156
179
  }
157
180
  catch (error) {
@@ -160,7 +183,7 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
160
183
  }
161
184
  },
162
185
  final (callback) {
163
- if (self.destroyed) {
186
+ if (self.destroyed || self.params.mode === "sink") {
164
187
  callback()
165
188
  return
166
189
  }
@@ -172,6 +195,9 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
172
195
 
173
196
  /* close node */
174
197
  async close () {
198
+ /* indicate destruction immediately to stop any ongoing operations */
199
+ this.destroyed = true
200
+
175
201
  /* stop intervals */
176
202
  if (this.emitInterval !== null) {
177
203
  clearInterval(this.emitInterval)
@@ -191,8 +217,5 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
191
217
  this.stream.destroy()
192
218
  this.stream = null
193
219
  }
194
-
195
- /* indicate destruction */
196
- this.destroyed = true
197
220
  }
198
221
  }
@@ -44,6 +44,7 @@ export default class SpeechFlowNodeA2ARNNoise extends SpeechFlowNode {
44
44
  this.worker = new Worker(resolve(__dirname, "speechflow-node-a2a-rnnoise-wt.js"))
45
45
  this.worker.on("error", (err) => {
46
46
  this.log("error", `RNNoise worker thread error: ${err}`)
47
+ this.stream?.emit("error", err)
47
48
  })
48
49
  this.worker.on("exit", (code) => {
49
50
  if (code !== 0)
@@ -158,14 +158,14 @@ export default class SpeechFlowNodeA2AVAD extends SpeechFlowNode {
158
158
  }
159
159
  }
160
160
  catch (error) {
161
- this.log("error", `VAD frame processing error: ${error}`)
161
+ this.log("error", `VAD frame processing error: ${error}`, { cause: error })
162
162
  }
163
163
  }
164
164
  })
165
165
  this.vad.start()
166
166
  }
167
167
  catch (error) {
168
- throw new Error(`failed to initialize VAD: ${error}`)
168
+ throw new Error(`failed to initialize VAD: ${error}`, { cause: error })
169
169
  }
170
170
 
171
171
  /* provide Duplex stream and internally attach to VAD */
@@ -23,7 +23,6 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
23
23
  public static name = "a2t-openai"
24
24
 
25
25
  /* internal state */
26
- private static speexInitialized = false
27
26
  private openai: OpenAI | null = null
28
27
  private ws: ws.WebSocket | null = null
29
28
  private queue: util.SingleQueue<SpeechFlowChunk | null> | null = null
@@ -71,11 +70,6 @@ export default class SpeechFlowNodeA2TOpenAI extends SpeechFlowNode {
71
70
 
72
71
  /* establish resampler from our standard audio sample rate (48Khz)
73
72
  to OpenAI's maximum 24Khz input sample rate */
74
- if (!SpeechFlowNodeA2TOpenAI.speexInitialized) {
75
- /* at least once initialize resampler */
76
- await SpeexResampler.initPromise
77
- SpeechFlowNodeA2TOpenAI.speexInitialized = true
78
- }
79
73
  this.resampler = new SpeexResampler(1, this.config.audioSampleRate, 24000, 7)
80
74
 
81
75
  /* instantiate OpenAI API */
@@ -26,7 +26,6 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
26
26
 
27
27
  /* internal state */
28
28
  private client: PollyClient | null = null
29
- private static speexInitialized = false
30
29
  private destroyed = false
31
30
  private resampler: SpeexResampler | null = null
32
31
 
@@ -114,11 +113,6 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
114
113
 
115
114
  /* establish resampler from AWS Polly's maximum 16Khz output
116
115
  (for PCM output) to our standard audio sample rate (48KHz) */
117
- if (!SpeechFlowNodeT2AAmazon.speexInitialized) {
118
- /* at least once initialize resampler */
119
- await SpeexResampler.initPromise
120
- SpeechFlowNodeT2AAmazon.speexInitialized = true
121
- }
122
116
  this.resampler = new SpeexResampler(1, 16000, this.config.audioSampleRate, 7)
123
117
 
124
118
  /* create transform stream and connect it to the AWS Polly API */
@@ -22,7 +22,6 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
22
22
 
23
23
  /* internal state */
24
24
  private elevenlabs: ElevenLabs.ElevenLabsClient | null = null
25
- private static speexInitialized = false
26
25
  private destroyed = false
27
26
  private resampler: SpeexResampler | null = null
28
27
 
@@ -131,11 +130,6 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
131
130
 
132
131
  /* establish resampler from ElevenLabs's maximum 24Khz
133
132
  output to our standard audio sample rate (48KHz) */
134
- if (!SpeechFlowNodeT2AElevenlabs.speexInitialized) {
135
- /* at least once initialize resampler */
136
- await SpeexResampler.initPromise
137
- SpeechFlowNodeT2AElevenlabs.speexInitialized = true
138
- }
139
133
  this.resampler = new SpeexResampler(1, maxSampleRate, this.config.audioSampleRate, 7)
140
134
 
141
135
  /* create transform stream and connect it to the ElevenLabs API */
@@ -23,7 +23,6 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
23
23
  /* internal state */
24
24
  private kokoro: KokoroTTS | null = null
25
25
  private resampler: SpeexResampler | null = null
26
- private static speexInitialized = false
27
26
 
28
27
  /* construct node */
29
28
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -82,11 +81,6 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
82
81
 
83
82
  /* establish resampler from Kokoro's maximum 24Khz
84
83
  output to our standard audio sample rate (48KHz) */
85
- if (!SpeechFlowNodeT2AKokoro.speexInitialized) {
86
- /* at least once initialize resampler */
87
- SpeechFlowNodeT2AKokoro.speexInitialized = true
88
- await SpeexResampler.initPromise
89
- }
90
84
  this.resampler = new SpeexResampler(1, 24000, this.config.audioSampleRate, 7)
91
85
 
92
86
  /* determine voice for text-to-speech operation */
@@ -177,7 +177,7 @@ export default class SpeechFlowNodeT2TOllama extends SpeechFlowNode {
177
177
  models = await this.ollama.list()
178
178
  }
179
179
  catch (err) {
180
- throw new Error(`failed to connect to Ollama API at ${this.params.api}: ${err}`)
180
+ throw new Error(`failed to connect to Ollama API at ${this.params.api}: ${err}`, { cause: err })
181
181
  }
182
182
  const exists = models.models.some((m) => m.name === this.params.model)
183
183
  if (!exists) {
@@ -20,13 +20,10 @@ import HAPIWebSocket from "hapi-plugin-websocket"
20
20
  import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
21
21
  import * as util from "./speechflow-util"
22
22
 
23
- type wsPeerCtx = {
24
- peer: string
25
- }
26
- type wsPeerInfo = {
27
- ctx: wsPeerCtx
28
- ws: WebSocket
29
- req: http.IncomingMessage
23
+ type WSPeerInfo = {
24
+ ctx: Record<string, any>
25
+ ws: WebSocket
26
+ req: http.IncomingMessage
30
27
  }
31
28
 
32
29
  /* SpeechFlow node for subtitle (text-to-text) "translations" */
@@ -160,7 +157,7 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
160
157
  }
161
158
  else if (this.params.mode === "render") {
162
159
  /* establish REST/WebSocket API */
163
- const wsPeers = new Map<string, wsPeerInfo>()
160
+ const wsPeers = new Map<string, WSPeerInfo>()
164
161
  this.hapi = new HAPI.Server({
165
162
  address: this.params.addr,
166
163
  port: this.params.port
@@ -205,19 +202,18 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
205
202
  plugins: {
206
203
  websocket: {
207
204
  autoping: 30 * 1000,
208
- connect: (args: any) => {
209
- const ctx: wsPeerCtx = args.ctx
210
- const ws: WebSocket = args.ws
211
- const req: http.IncomingMessage = args.req
205
+ connect: ({ ctx, ws, req }) => {
212
206
  const peer = `${req.socket.remoteAddress}:${req.socket.remotePort}`
213
207
  ctx.peer = peer
214
208
  wsPeers.set(peer, { ctx, ws, req })
215
209
  this.log("info", `HAPI: WebSocket: connect: peer ${peer}`)
216
210
  },
217
- disconnect: (args: any) => {
218
- const ctx: wsPeerCtx = args.ctx
211
+ disconnect: ({ ctx, ws }) => {
219
212
  const peer = ctx.peer
220
213
  wsPeers.delete(peer)
214
+ ws.removeAllListeners()
215
+ if (ws.readyState === WebSocket.OPEN)
216
+ ws.close()
221
217
  this.log("info", `HAPI: WebSocket: disconnect: peer ${peer}`)
222
218
  }
223
219
  }
@@ -18,6 +18,9 @@ export default class SpeechFlowNodeX2XTrace extends SpeechFlowNode {
18
18
  /* declare official node name */
19
19
  public static name = "x2x-trace"
20
20
 
21
+ /* internal state */
22
+ private destroyed = false
23
+
21
24
  /* construct node */
22
25
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
23
26
  super(id, cfg, opts, args)
@@ -26,6 +29,7 @@ export default class SpeechFlowNodeX2XTrace extends SpeechFlowNode {
26
29
  this.configure({
27
30
  type: { type: "string", pos: 0, val: "audio", match: /^(?:audio|text)$/ },
28
31
  name: { type: "string", pos: 1, val: "trace" },
32
+ mode: { type: "string", pos: 2, val: "filter", match: /^(?:filter|sink)$/ },
29
33
  dashboard: { type: "string", val: "" }
30
34
  })
31
35
 
@@ -35,7 +39,10 @@ export default class SpeechFlowNodeX2XTrace extends SpeechFlowNode {
35
39
 
36
40
  /* declare node input/output format */
37
41
  this.input = this.params.type
38
- this.output = this.params.type
42
+ if (this.params.mode === "filter")
43
+ this.output = this.params.type
44
+ else if (this.params.mode === "sink")
45
+ this.output = "none"
39
46
  }
40
47
 
41
48
  /* open node */
@@ -48,6 +55,9 @@ export default class SpeechFlowNodeX2XTrace extends SpeechFlowNode {
48
55
  this.log(level, msg)
49
56
  }
50
57
 
58
+ /* clear destruction flag */
59
+ this.destroyed = false
60
+
51
61
  /* helper functions for formatting */
52
62
  const fmtTime = (t: Duration) => t.toFormat("hh:mm:ss.SSS")
53
63
  const fmtMeta = (meta: Map<string, any>) => {
@@ -74,6 +84,10 @@ export default class SpeechFlowNodeX2XTrace extends SpeechFlowNode {
74
84
  highWaterMark: 1,
75
85
  transform (chunk: SpeechFlowChunk, encoding, callback) {
76
86
  let error: Error | undefined
87
+ if (self.destroyed) {
88
+ callback(new Error("stream already destroyed"))
89
+ return
90
+ }
77
91
  if (Buffer.isBuffer(chunk.payload)) {
78
92
  if (self.params.type === "audio")
79
93
  log("debug", fmtChunkBase(chunk) +
@@ -94,7 +108,9 @@ export default class SpeechFlowNodeX2XTrace extends SpeechFlowNode {
94
108
  else
95
109
  error = new Error(`${self.params.type} chunk: seen String instead of Buffer chunk type`)
96
110
  }
97
- if (error !== undefined)
111
+ if (self.params.mode === "sink")
112
+ callback()
113
+ else if (error !== undefined)
98
114
  callback(error)
99
115
  else {
100
116
  this.push(chunk, encoding)
@@ -102,6 +118,10 @@ export default class SpeechFlowNodeX2XTrace extends SpeechFlowNode {
102
118
  }
103
119
  },
104
120
  final (callback) {
121
+ if (self.destroyed || self.params.mode === "sink") {
122
+ callback()
123
+ return
124
+ }
105
125
  this.push(null)
106
126
  callback()
107
127
  }
@@ -115,5 +135,8 @@ export default class SpeechFlowNodeX2XTrace extends SpeechFlowNode {
115
135
  this.stream.destroy()
116
136
  this.stream = null
117
137
  }
138
+
139
+ /* indicate destruction */
140
+ this.destroyed = true
118
141
  }
119
142
  }
@@ -115,7 +115,7 @@ export default class SpeechFlowNodeXIODevice extends SpeechFlowNode {
115
115
 
116
116
  /* convert regular stream into object-mode stream */
117
117
  const wrapper1 = util.createTransformStreamForWritableSide()
118
- const wrapper2 = util.createTransformStreamForReadableSide("audio", () => this.timeZero)
118
+ const wrapper2 = util.createTransformStreamForReadableSide("audio", () => this.timeZero, highwaterMark)
119
119
  this.stream = Stream.compose(wrapper1, this.stream, wrapper2)
120
120
  }
121
121
 
@@ -136,7 +136,7 @@ export default class SpeechFlowNodeXIODevice extends SpeechFlowNode {
136
136
  this.stream = this.io as unknown as Stream.Readable
137
137
 
138
138
  /* convert regular stream into object-mode stream */
139
- const wrapper = util.createTransformStreamForReadableSide("audio", () => this.timeZero)
139
+ const wrapper = util.createTransformStreamForReadableSide("audio", () => this.timeZero, highwaterMark)
140
140
  this.stream = Stream.compose(this.stream, wrapper)
141
141
  }
142
142
 
@@ -193,6 +193,7 @@ export default class SpeechFlowNodeXIODevice extends SpeechFlowNode {
193
193
  /* pass-through PortAudio errors */
194
194
  this.io!.on("error", (err) => {
195
195
  this.emit("error", err)
196
+ this.stream?.emit("error", err)
196
197
  })
197
198
 
198
199
  /* start PortAudio */
@@ -236,7 +236,7 @@ export default class SpeechFlowNodeXIOWebSocket extends SpeechFlowNode {
236
236
 
237
237
  /* close Websocket client */
238
238
  if (this.client !== null) {
239
- this.client!.close()
239
+ this.client.close()
240
240
  this.client = null
241
241
  }
242
242