speechflow 1.6.5 → 1.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/CHANGELOG.md +19 -0
  2. package/README.md +23 -0
  3. package/etc/stx.conf +5 -0
  4. package/package.json +4 -4
  5. package/speechflow-cli/dst/speechflow-main-cli.js +2 -2
  6. package/speechflow-cli/dst/speechflow-main-cli.js.map +1 -1
  7. package/speechflow-cli/dst/speechflow-main-graph.js +4 -3
  8. package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
  9. package/speechflow-cli/dst/speechflow-node-a2a-compressor.d.ts +1 -1
  10. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +12 -11
  11. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
  12. package/speechflow-cli/dst/speechflow-node-a2a-expander.d.ts +1 -1
  13. package/speechflow-cli/dst/speechflow-node-a2a-expander.js +12 -11
  14. package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -1
  15. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +2 -8
  16. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
  17. package/speechflow-cli/dst/speechflow-node-a2a-filler.d.ts +1 -1
  18. package/speechflow-cli/dst/speechflow-node-a2a-filler.js +18 -16
  19. package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
  20. package/speechflow-cli/dst/speechflow-node-a2a-gain.d.ts +1 -1
  21. package/speechflow-cli/dst/speechflow-node-a2a-gain.js +8 -8
  22. package/speechflow-cli/dst/speechflow-node-a2a-gain.js.map +1 -1
  23. package/speechflow-cli/dst/speechflow-node-a2a-gender.d.ts +1 -1
  24. package/speechflow-cli/dst/speechflow-node-a2a-gender.js +38 -34
  25. package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
  26. package/speechflow-cli/dst/speechflow-node-a2a-meter.d.ts +1 -1
  27. package/speechflow-cli/dst/speechflow-node-a2a-meter.js +11 -11
  28. package/speechflow-cli/dst/speechflow-node-a2a-meter.js.map +1 -1
  29. package/speechflow-cli/dst/speechflow-node-a2a-mute.d.ts +1 -1
  30. package/speechflow-cli/dst/speechflow-node-a2a-mute.js +44 -10
  31. package/speechflow-cli/dst/speechflow-node-a2a-mute.js.map +1 -1
  32. package/speechflow-cli/dst/speechflow-node-a2a-pitch.d.ts +13 -0
  33. package/speechflow-cli/dst/speechflow-node-a2a-pitch.js +213 -0
  34. package/speechflow-cli/dst/speechflow-node-a2a-pitch.js.map +1 -0
  35. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.d.ts +1 -1
  36. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +12 -11
  37. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
  38. package/speechflow-cli/dst/speechflow-node-a2a-speex.d.ts +1 -1
  39. package/speechflow-cli/dst/speechflow-node-a2a-speex.js +13 -12
  40. package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -1
  41. package/speechflow-cli/dst/speechflow-node-a2a-vad.d.ts +1 -1
  42. package/speechflow-cli/dst/speechflow-node-a2a-vad.js +24 -23
  43. package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
  44. package/speechflow-cli/dst/speechflow-node-a2a-wav.js +35 -7
  45. package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
  46. package/speechflow-cli/dst/speechflow-node-a2t-amazon.d.ts +1 -1
  47. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +16 -16
  48. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
  49. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.d.ts +1 -1
  50. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +16 -16
  51. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
  52. package/speechflow-cli/dst/speechflow-node-a2t-openai.d.ts +1 -1
  53. package/speechflow-cli/dst/speechflow-node-a2t-openai.js +15 -15
  54. package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
  55. package/speechflow-cli/dst/speechflow-node-t2a-amazon.d.ts +1 -1
  56. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +9 -9
  57. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -1
  58. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.d.ts +1 -1
  59. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +13 -12
  60. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
  61. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +4 -4
  62. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
  63. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js +3 -3
  64. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
  65. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +2 -2
  66. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
  67. package/speechflow-cli/dst/speechflow-node-t2t-format.js +36 -2
  68. package/speechflow-cli/dst/speechflow-node-t2t-format.js.map +1 -1
  69. package/speechflow-cli/dst/speechflow-node-t2t-google.js +2 -2
  70. package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
  71. package/speechflow-cli/dst/speechflow-node-t2t-modify.js +5 -5
  72. package/speechflow-cli/dst/speechflow-node-t2t-modify.js.map +1 -1
  73. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +2 -2
  74. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +1 -1
  75. package/speechflow-cli/dst/speechflow-node-t2t-openai.js +2 -2
  76. package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +1 -1
  77. package/speechflow-cli/dst/speechflow-node-t2t-sentence.d.ts +1 -1
  78. package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +13 -13
  79. package/speechflow-cli/dst/speechflow-node-t2t-sentence.js.map +1 -1
  80. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +2 -2
  81. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  82. package/speechflow-cli/dst/speechflow-node-t2t-transformers.js +2 -2
  83. package/speechflow-cli/dst/speechflow-node-t2t-transformers.js.map +1 -1
  84. package/speechflow-cli/dst/speechflow-node-x2x-filter.js +2 -2
  85. package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
  86. package/speechflow-cli/dst/speechflow-node-x2x-trace.d.ts +1 -1
  87. package/speechflow-cli/dst/speechflow-node-x2x-trace.js +42 -8
  88. package/speechflow-cli/dst/speechflow-node-x2x-trace.js.map +1 -1
  89. package/speechflow-cli/dst/speechflow-node-xio-device.js +3 -2
  90. package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
  91. package/speechflow-cli/dst/speechflow-node-xio-file.js +19 -18
  92. package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
  93. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +13 -13
  94. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
  95. package/speechflow-cli/dst/speechflow-node-xio-websocket.js +8 -8
  96. package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
  97. package/speechflow-cli/dst/speechflow-node.js +7 -7
  98. package/speechflow-cli/dst/speechflow-node.js.map +1 -1
  99. package/speechflow-cli/dst/speechflow-util-audio.js +2 -2
  100. package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
  101. package/speechflow-cli/dst/speechflow-util-stream.d.ts +1 -0
  102. package/speechflow-cli/dst/speechflow-util-stream.js +22 -2
  103. package/speechflow-cli/dst/speechflow-util-stream.js.map +1 -1
  104. package/speechflow-cli/etc/oxlint.jsonc +2 -1
  105. package/speechflow-cli/etc/tsconfig.json +1 -0
  106. package/speechflow-cli/package.json +21 -21
  107. package/speechflow-cli/src/speechflow-main-cli.ts +2 -2
  108. package/speechflow-cli/src/speechflow-main-graph.ts +4 -3
  109. package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +13 -12
  110. package/speechflow-cli/src/speechflow-node-a2a-expander.ts +13 -12
  111. package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +2 -8
  112. package/speechflow-cli/src/speechflow-node-a2a-filler.ts +19 -17
  113. package/speechflow-cli/src/speechflow-node-a2a-gain.ts +8 -8
  114. package/speechflow-cli/src/speechflow-node-a2a-gender.ts +42 -36
  115. package/speechflow-cli/src/speechflow-node-a2a-meter.ts +11 -11
  116. package/speechflow-cli/src/speechflow-node-a2a-mute.ts +11 -10
  117. package/speechflow-cli/src/speechflow-node-a2a-pitch.ts +221 -0
  118. package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +13 -12
  119. package/speechflow-cli/src/speechflow-node-a2a-speex.ts +14 -13
  120. package/speechflow-cli/src/speechflow-node-a2a-vad.ts +24 -23
  121. package/speechflow-cli/src/speechflow-node-a2a-wav.ts +2 -7
  122. package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +16 -16
  123. package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +16 -16
  124. package/speechflow-cli/src/speechflow-node-a2t-openai.ts +15 -15
  125. package/speechflow-cli/src/speechflow-node-t2a-amazon.ts +9 -9
  126. package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +13 -12
  127. package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +4 -4
  128. package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +3 -3
  129. package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +2 -2
  130. package/speechflow-cli/src/speechflow-node-t2t-format.ts +3 -2
  131. package/speechflow-cli/src/speechflow-node-t2t-google.ts +2 -2
  132. package/speechflow-cli/src/speechflow-node-t2t-modify.ts +6 -6
  133. package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +2 -2
  134. package/speechflow-cli/src/speechflow-node-t2t-openai.ts +2 -2
  135. package/speechflow-cli/src/speechflow-node-t2t-sentence.ts +13 -13
  136. package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +2 -2
  137. package/speechflow-cli/src/speechflow-node-t2t-transformers.ts +2 -2
  138. package/speechflow-cli/src/speechflow-node-x2x-filter.ts +2 -2
  139. package/speechflow-cli/src/speechflow-node-x2x-trace.ts +10 -9
  140. package/speechflow-cli/src/speechflow-node-xio-device.ts +4 -3
  141. package/speechflow-cli/src/speechflow-node-xio-file.ts +20 -19
  142. package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +14 -14
  143. package/speechflow-cli/src/speechflow-node-xio-websocket.ts +10 -10
  144. package/speechflow-cli/src/speechflow-node.ts +7 -7
  145. package/speechflow-cli/src/speechflow-util-audio.ts +2 -2
  146. package/speechflow-cli/src/speechflow-util-stream.ts +30 -5
  147. package/speechflow-ui-db/dst/app-font-fa-brands-400.woff2 +0 -0
  148. package/speechflow-ui-db/dst/app-font-fa-regular-400.woff2 +0 -0
  149. package/speechflow-ui-db/dst/app-font-fa-solid-900.woff2 +0 -0
  150. package/speechflow-ui-db/dst/app-font-fa-v4compatibility.woff2 +0 -0
  151. package/speechflow-ui-db/dst/index.css +1 -1
  152. package/speechflow-ui-db/dst/index.js +28 -25
  153. package/speechflow-ui-db/package.json +14 -14
  154. package/speechflow-ui-st/dst/app-font-fa-brands-400.woff2 +0 -0
  155. package/speechflow-ui-st/dst/app-font-fa-regular-400.woff2 +0 -0
  156. package/speechflow-ui-st/dst/app-font-fa-solid-900.woff2 +0 -0
  157. package/speechflow-ui-st/dst/app-font-fa-v4compatibility.woff2 +0 -0
  158. package/speechflow-ui-st/dst/index.css +1 -1
  159. package/speechflow-ui-st/dst/index.js +137 -51
  160. package/speechflow-ui-st/package.json +15 -15
@@ -17,7 +17,7 @@ export default class SpeechFlowNodeA2AGain extends SpeechFlowNode {
17
17
  public static name = "a2a-gain"
18
18
 
19
19
  /* internal state */
20
- private destroyed = false
20
+ private closing = false
21
21
 
22
22
  /* construct node */
23
23
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -36,7 +36,7 @@ export default class SpeechFlowNodeA2AGain extends SpeechFlowNode {
36
36
  /* open node */
37
37
  async open () {
38
38
  /* clear destruction flag */
39
- this.destroyed = false
39
+ this.closing = false
40
40
 
41
41
  /* adjust gain */
42
42
  const adjustGain = (chunk: SpeechFlowChunk & { payload: Buffer }, db: number) => {
@@ -57,7 +57,7 @@ export default class SpeechFlowNodeA2AGain extends SpeechFlowNode {
57
57
  writableObjectMode: true,
58
58
  decodeStrings: false,
59
59
  transform (chunk: SpeechFlowChunk & { payload: Buffer }, encoding, callback) {
60
- if (self.destroyed) {
60
+ if (self.closing) {
61
61
  callback(new Error("stream already destroyed"))
62
62
  return
63
63
  }
@@ -73,7 +73,7 @@ export default class SpeechFlowNodeA2AGain extends SpeechFlowNode {
73
73
  }
74
74
  },
75
75
  final (callback) {
76
- if (self.destroyed) {
76
+ if (self.closing) {
77
77
  callback()
78
78
  return
79
79
  }
@@ -85,12 +85,12 @@ export default class SpeechFlowNodeA2AGain extends SpeechFlowNode {
85
85
 
86
86
  /* close node */
87
87
  async close () {
88
- /* indicate destruction */
89
- this.destroyed = true
88
+ /* indicate closing */
89
+ this.closing = true
90
90
 
91
- /* close stream */
91
+ /* shutdown stream */
92
92
  if (this.stream !== null) {
93
- this.stream.destroy()
93
+ await util.destroyStream(this.stream)
94
94
  this.stream = null
95
95
  }
96
96
  }
@@ -12,17 +12,19 @@ import Stream from "node:stream"
12
12
  import * as Transformers from "@huggingface/transformers"
13
13
  import { WaveFile } from "wavefile"
14
14
  import { getRMS, AudioData } from "audio-inspect"
15
+ import { Duration } from "luxon"
15
16
 
16
17
  /* internal dependencies */
17
18
  import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
18
19
  import * as util from "./speechflow-util"
19
20
 
20
21
  /* audio stream queue element */
22
+ type Gender = "male" | "female" | "unknown"
21
23
  type AudioQueueElement = {
22
24
  type: "audio-frame",
23
25
  chunk: SpeechFlowChunk,
24
26
  data: Float32Array,
25
- gender?: "male" | "female" | "unknown"
27
+ gender?: Gender
26
28
  } | {
27
29
  type: "audio-eof"
28
30
  }
@@ -38,7 +40,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
38
40
  private queueRecv = this.queue.pointerUse("recv")
39
41
  private queueAC = this.queue.pointerUse("ac")
40
42
  private queueSend = this.queue.pointerUse("send")
41
- private shutdown = false
43
+ private closing = false
42
44
  private workingOffTimer: ReturnType<typeof setTimeout> | null = null
43
45
  private progressInterval: ReturnType<typeof setInterval> | null = null
44
46
 
@@ -65,8 +67,8 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
65
67
  if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
66
68
  throw new Error("Gender node currently supports PCM-S16LE audio only")
67
69
 
68
- /* clear shutdown flag */
69
- this.shutdown = false
70
+ /* clear destruction flag */
71
+ this.closing = false
70
72
 
71
73
  /* the used model */
72
74
  const model = "Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech"
@@ -74,7 +76,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
74
76
  /* track download progress when instantiating Transformers engine and model */
75
77
  const progressState = new Map<string, number>()
76
78
  const progressCallback: Transformers.ProgressCallback = (progress: any) => {
77
- if (this.shutdown)
79
+ if (this.closing)
78
80
  return
79
81
  let artifact = model
80
82
  if (typeof progress.file === "string")
@@ -88,7 +90,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
88
90
  progressState.set(artifact, percent)
89
91
  }
90
92
  this.progressInterval = setInterval(() => {
91
- if (this.shutdown)
93
+ if (this.closing)
92
94
  return
93
95
  for (const [ artifact, percent ] of progressState) {
94
96
  this.log("info", `downloaded ${percent.toFixed(2)}% of artifact "${artifact}"`)
@@ -126,9 +128,10 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
126
128
  const sampleRateTarget = 16000
127
129
 
128
130
  /* classify a single large-enough concatenated audio frame */
131
+ let genderLast: Gender = "unknown"
129
132
  const classify = async (data: Float32Array) => {
130
- if (this.shutdown || this.classifier === null)
131
- throw new Error("classifier shutdown during operation")
133
+ if (this.closing || this.classifier === null)
134
+ throw new Error("classifier destroyed during operation")
132
135
 
133
136
  /* check volume level and return "unknown" if too low
134
137
  in order to avoid a wrong classificaton */
@@ -141,7 +144,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
141
144
  } satisfies AudioData
142
145
  const rms = getRMS(audioData, { asDB: true })
143
146
  if (rms < this.params.volumeThreshold)
144
- return "unknown"
147
+ return genderLast
145
148
 
146
149
  /* classify audio */
147
150
  const result = await Promise.race([
@@ -157,22 +160,25 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
157
160
  const female = c2 ? c2.score : 0.0
158
161
  const threshold = this.params.threshold
159
162
  const hysteresis = this.params.hysteresis
163
+ let genderNow: Gender = genderLast
160
164
  if (male > threshold && male > female + hysteresis)
161
- return "male"
165
+ genderNow = "male"
162
166
  else if (female > threshold && female > male + hysteresis)
163
- return "female"
164
- else
165
- return "unknown"
167
+ genderNow = "female"
168
+ if (genderNow !== genderLast) {
169
+ this.log("info", `switching detected gender from <${genderLast}> to <${genderNow}>`)
170
+ genderLast = genderNow
171
+ }
172
+ return genderNow
166
173
  }
167
174
 
168
175
  /* work off queued audio frames */
169
176
  const frameWindowDuration = this.params.window / 1000
170
177
  const frameWindowSamples = Math.floor(frameWindowDuration * sampleRateTarget)
171
- let lastGender = ""
172
178
  let workingOff = false
173
179
  const workOffQueue = async () => {
174
180
  /* control working off round */
175
- if (workingOff || this.shutdown)
181
+ if (workingOff || this.closing)
176
182
  return
177
183
  workingOff = true
178
184
  if (this.workingOffTimer !== null) {
@@ -189,7 +195,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
189
195
  data.fill(0)
190
196
  let samples = 0
191
197
  let pos = pos0
192
- while (pos < posL && samples < frameWindowSamples && !this.shutdown) {
198
+ while (pos < posL && samples < frameWindowSamples && !this.closing) {
193
199
  const element = this.queueAC.peek(pos)
194
200
  if (element === undefined || element.type !== "audio-frame")
195
201
  break
@@ -199,12 +205,12 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
199
205
  }
200
206
  pos++
201
207
  }
202
- if (pos0 < pos && samples > frameWindowSamples * 0.75 && !this.shutdown) {
208
+ if (pos0 < pos && samples > frameWindowSamples * 0.75 && !this.closing) {
203
209
  const gender = await classify(data)
204
- if (this.shutdown)
210
+ if (this.closing)
205
211
  return
206
212
  const posM = pos0 + Math.trunc((pos - pos0) * 0.25)
207
- while (pos0 < posM && pos0 < posL && !this.shutdown) {
213
+ while (pos0 < posM && pos0 < posL && !this.closing) {
208
214
  const element = this.queueAC.peek(pos0)
209
215
  if (element === undefined || element.type !== "audio-frame")
210
216
  break
@@ -213,10 +219,6 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
213
219
  this.queueAC.walk(+1)
214
220
  pos0++
215
221
  }
216
- if (lastGender !== gender && !this.shutdown) {
217
- this.log("info", `gender now recognized as <${gender}>`)
218
- lastGender = gender
219
- }
220
222
  }
221
223
  }
222
224
  catch (error) {
@@ -225,7 +227,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
225
227
 
226
228
  /* re-initiate working off round */
227
229
  workingOff = false
228
- if (!this.shutdown) {
230
+ if (!this.closing) {
229
231
  this.workingOffTimer = setTimeout(workOffQueue, 100)
230
232
  this.queue.once("write", workOffQueue)
231
233
  }
@@ -242,7 +244,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
242
244
 
243
245
  /* receive audio chunk (writable side of stream) */
244
246
  write (chunk: SpeechFlowChunk, encoding, callback) {
245
- if (self.shutdown) {
247
+ if (self.closing) {
246
248
  callback(new Error("stream already destroyed"))
247
249
  return
248
250
  }
@@ -257,21 +259,21 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
257
259
  const wav = new WaveFile()
258
260
  wav.fromScratch(self.config.audioChannels, self.config.audioSampleRate, "32f", data)
259
261
  wav.toSampleRate(sampleRateTarget, { method: "cubic" })
260
- data = wav.getSamples(false, Float32Array) as any as Float32Array<ArrayBuffer>
262
+ data = wav.getSamples(false, Float32Array) as unknown as Float32Array<ArrayBuffer>
261
263
 
262
264
  /* queue chunk and converted data */
263
265
  self.queueRecv.append({ type: "audio-frame", chunk, data })
264
266
  callback()
265
267
  }
266
268
  catch (error) {
267
- callback(error instanceof Error ? error : new Error("audio processing failed"))
269
+ callback(util.ensureError(error, "audio processing failed"))
268
270
  }
269
271
  }
270
272
  },
271
273
 
272
274
  /* receive no more audio chunks (writable side of stream) */
273
275
  final (callback) {
274
- if (self.shutdown) {
276
+ if (self.closing) {
275
277
  callback()
276
278
  return
277
279
  }
@@ -285,7 +287,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
285
287
  read (_size) {
286
288
  /* flush pending audio chunks */
287
289
  const flushPendingChunks = () => {
288
- if (self.shutdown) {
290
+ if (self.closing) {
289
291
  this.push(null)
290
292
  return
291
293
  }
@@ -297,7 +299,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
297
299
  && element.type === "audio-frame"
298
300
  && element.gender !== undefined) {
299
301
  while (true) {
300
- if (self.shutdown) {
302
+ if (self.closing) {
301
303
  this.push(null)
302
304
  return
303
305
  }
@@ -312,14 +314,18 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
312
314
  && element.gender === undefined)
313
315
  break
314
316
  const duration = util.audioArrayDuration(element.data)
315
- self.log("debug", `send chunk (${duration.toFixed(3)}s) with gender <${element.gender}>`)
317
+ const fmtTime = (t: Duration) => t.toFormat("hh:mm:ss.SSS")
318
+ const times = `start: ${fmtTime(element.chunk.timestampStart)}, ` +
319
+ `end: ${fmtTime(element.chunk.timestampEnd)}`
320
+ self.log("debug", `send chunk (${times}, duration: ${duration.toFixed(3)}s) ` +
321
+ `with gender <${element.gender}>`)
316
322
  element.chunk.meta.set("gender", element.gender)
317
323
  this.push(element.chunk)
318
324
  self.queueSend.walk(+1)
319
325
  self.queue.trim()
320
326
  }
321
327
  }
322
- else if (!self.shutdown)
328
+ else if (!self.closing)
323
329
  self.queue.once("write", flushPendingChunks)
324
330
  }
325
331
  flushPendingChunks()
@@ -329,8 +335,8 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
329
335
 
330
336
  /* close node */
331
337
  async close () {
332
- /* indicate shutdown */
333
- this.shutdown = true
338
+ /* indicate closing */
339
+ this.closing = true
334
340
 
335
341
  /* cleanup working-off timer */
336
342
  if (this.workingOffTimer !== null) {
@@ -347,9 +353,9 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
347
353
  /* remove all event listeners */
348
354
  this.queue.removeAllListeners("write")
349
355
 
350
- /* close stream */
356
+ /* shutdown stream */
351
357
  if (this.stream !== null) {
352
- this.stream.destroy()
358
+ await util.destroyStream(this.stream)
353
359
  this.stream = null
354
360
  }
355
361
 
@@ -24,7 +24,7 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
24
24
  private calcInterval: ReturnType<typeof setInterval> | null = null
25
25
  private silenceTimer: ReturnType<typeof setTimeout> | null = null
26
26
  private chunkBuffer = new Float32Array(0)
27
- private destroyed = false
27
+ private closing = false
28
28
 
29
29
  /* construct node */
30
30
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -52,7 +52,7 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
52
52
  throw new Error("meter node currently supports PCM-S16LE audio only")
53
53
 
54
54
  /* clear destruction flag */
55
- this.destroyed = false
55
+ this.closing = false
56
56
 
57
57
  /* internal state */
58
58
  let lufsm = -60
@@ -72,7 +72,7 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
72
72
  /* setup chunking interval */
73
73
  this.calcInterval = setInterval(() => {
74
74
  /* short-circuit during destruction */
75
- if (this.destroyed)
75
+ if (this.closing)
76
76
  return
77
77
 
78
78
  /* short-circuit if still not enough chunk data */
@@ -133,7 +133,7 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
133
133
 
134
134
  /* setup loudness emitting interval */
135
135
  this.emitInterval = setInterval(() => {
136
- if (this.destroyed)
136
+ if (this.closing)
137
137
  return
138
138
  this.log("debug", `LUFS-M: ${lufsm.toFixed(1)} dB, RMS: ${rms.toFixed(1)} dB`)
139
139
  this.sendResponse([ "meter", "LUFS-M", lufsm ])
@@ -152,7 +152,7 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
152
152
 
153
153
  /* transform audio chunk */
154
154
  transform (chunk: SpeechFlowChunk, encoding, callback) {
155
- if (self.destroyed) {
155
+ if (self.closing) {
156
156
  callback(new Error("stream already destroyed"))
157
157
  return
158
158
  }
@@ -178,12 +178,12 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
178
178
  callback()
179
179
  }
180
180
  catch (error) {
181
- callback(error instanceof Error ? error : new Error("meter processing failed"))
181
+ callback(util.ensureError(error, "meter processing failed"))
182
182
  }
183
183
  }
184
184
  },
185
185
  final (callback) {
186
- if (self.destroyed || self.params.mode === "sink") {
186
+ if (self.closing || self.params.mode === "sink") {
187
187
  callback()
188
188
  return
189
189
  }
@@ -195,8 +195,8 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
195
195
 
196
196
  /* close node */
197
197
  async close () {
198
- /* indicate destruction immediately to stop any ongoing operations */
199
- this.destroyed = true
198
+ /* indicate closing immediately to stop any ongoing operations */
199
+ this.closing = true
200
200
 
201
201
  /* stop intervals */
202
202
  if (this.emitInterval !== null) {
@@ -212,9 +212,9 @@ export default class SpeechFlowNodeA2AMeter extends SpeechFlowNode {
212
212
  this.silenceTimer = null
213
213
  }
214
214
 
215
- /* close stream */
215
+ /* shutdown stream */
216
216
  if (this.stream !== null) {
217
- this.stream.destroy()
217
+ await util.destroyStream(this.stream)
218
218
  this.stream = null
219
219
  }
220
220
  }
@@ -9,6 +9,7 @@ import Stream from "node:stream"
9
9
 
10
10
  /* internal dependencies */
11
11
  import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
12
+ import * as util from "./speechflow-util"
12
13
 
13
14
  /* the type of muting */
14
15
  type MuteMode =
@@ -23,7 +24,7 @@ export default class SpeechFlowNodeA2AMute extends SpeechFlowNode {
23
24
 
24
25
  /* internal state */
25
26
  private muteMode: MuteMode = "none"
26
- private destroyed = false
27
+ private closing = false
27
28
 
28
29
  /* construct node */
29
30
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -39,7 +40,7 @@ export default class SpeechFlowNodeA2AMute extends SpeechFlowNode {
39
40
 
40
41
  /* receive external request */
41
42
  async receiveRequest (params: any[]) {
42
- if (this.destroyed)
43
+ if (this.closing)
43
44
  throw new Error("mute: node already destroyed")
44
45
  try {
45
46
  if (params.length === 2 && params[0] === "mode") {
@@ -61,7 +62,7 @@ export default class SpeechFlowNodeA2AMute extends SpeechFlowNode {
61
62
 
62
63
  /* change mute mode */
63
64
  setMuteMode (mode: MuteMode) {
64
- if (this.destroyed) {
65
+ if (this.closing) {
65
66
  this.log("warning", "attempted to set mute mode on destroyed node")
66
67
  return
67
68
  }
@@ -72,7 +73,7 @@ export default class SpeechFlowNodeA2AMute extends SpeechFlowNode {
72
73
  /* open node */
73
74
  async open () {
74
75
  /* clear destruction flag */
75
- this.destroyed = false
76
+ this.closing = false
76
77
 
77
78
  /* establish a transform stream */
78
79
  const self = this
@@ -81,7 +82,7 @@ export default class SpeechFlowNodeA2AMute extends SpeechFlowNode {
81
82
  writableObjectMode: true,
82
83
  decodeStrings: false,
83
84
  transform (chunk: SpeechFlowChunk, encoding, callback) {
84
- if (self.destroyed) {
85
+ if (self.closing) {
85
86
  callback(new Error("stream already destroyed"))
86
87
  return
87
88
  }
@@ -106,7 +107,7 @@ export default class SpeechFlowNodeA2AMute extends SpeechFlowNode {
106
107
  }
107
108
  },
108
109
  final (callback) {
109
- if (self.destroyed) {
110
+ if (self.closing) {
110
111
  callback()
111
112
  return
112
113
  }
@@ -118,12 +119,12 @@ export default class SpeechFlowNodeA2AMute extends SpeechFlowNode {
118
119
 
119
120
  /* close node */
120
121
  async close () {
121
- /* indicate destruction */
122
- this.destroyed = true
122
+ /* indicate closing */
123
+ this.closing = true
123
124
 
124
- /* close stream */
125
+ /* shutdown stream */
125
126
  if (this.stream !== null) {
126
- this.stream.destroy()
127
+ await util.destroyStream(this.stream)
127
128
  this.stream = null
128
129
  }
129
130
  }
@@ -0,0 +1,221 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import path from "node:path"
9
+ import Stream from "node:stream"
10
+
11
+ /* external dependencies */
12
+ import { AudioWorkletNode } from "node-web-audio-api"
13
+
14
+ /* internal dependencies */
15
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
16
+ import * as util from "./speechflow-util"
17
+
18
+ /* parameter configuration */
19
+ type AudioPitchShifterConfig = {
20
+ rate?: number
21
+ tempo?: number
22
+ pitch?: number
23
+ semitones?: number
24
+ }
25
+
26
+ /* audio pitch shifter class using SoundTouch WebAudio worklet */
27
+ class AudioPitchShifter extends util.WebAudio {
28
+ /* internal state */
29
+ private pitchNode: AudioWorkletNode | null = null
30
+ private config: Required<AudioPitchShifterConfig>
31
+
32
+ /* construct object */
33
+ constructor (
34
+ sampleRate: number,
35
+ channels: number,
36
+ config: AudioPitchShifterConfig = {}
37
+ ) {
38
+ super(sampleRate, channels)
39
+ this.config = {
40
+ rate: config.rate ?? 1.0,
41
+ tempo: config.tempo ?? 1.0,
42
+ pitch: config.pitch ?? 1.0,
43
+ semitones: config.semitones ?? 0.0
44
+ }
45
+ }
46
+
47
+ /* setup object */
48
+ public async setup (): Promise<void> {
49
+ await super.setup()
50
+
51
+ /* add SoundTouch worklet module */
52
+ const packagePath = path.join(__dirname, "../node_modules/@soundtouchjs/audio-worklet")
53
+ const workletPath = path.join(packagePath, "dist/soundtouch-worklet.js")
54
+ await this.audioContext.audioWorklet.addModule(workletPath)
55
+
56
+ /* create SoundTouch worklet node */
57
+ this.pitchNode = new AudioWorkletNode(this.audioContext, "soundtouch-processor", {
58
+ numberOfInputs: 1,
59
+ numberOfOutputs: 1,
60
+ outputChannelCount: [ this.channels ]
61
+ })
62
+
63
+ /* set initial parameter values */
64
+ const params = this.pitchNode.parameters as Map<string, AudioParam>
65
+ params.get("rate")!.value = this.config.rate
66
+ params.get("tempo")!.value = this.config.tempo
67
+ params.get("pitch")!.value = this.config.pitch
68
+ params.get("pitchSemitones")!.value = this.config.semitones
69
+
70
+ /* connect nodes: source -> pitch -> capture */
71
+ this.sourceNode!.connect(this.pitchNode)
72
+ this.pitchNode.connect(this.captureNode!)
73
+ }
74
+
75
+ /* update an audio parameter value */
76
+ private updateParameter (
77
+ paramName: string,
78
+ value: number,
79
+ configField: keyof Required<AudioPitchShifterConfig>
80
+ ): void {
81
+ const params = this.pitchNode?.parameters as Map<string, AudioParam>
82
+ params?.get(paramName)?.setValueAtTime(value, this.audioContext.currentTime)
83
+ this.config[configField] = value
84
+ }
85
+
86
+ /* update rate value */
87
+ public setRate (rate: number): void {
88
+ this.updateParameter("rate", rate, "rate")
89
+ }
90
+
91
+ /* update tempo value */
92
+ public setTempo (tempo: number): void {
93
+ this.updateParameter("tempo", tempo, "tempo")
94
+ }
95
+
96
+ /* update pitch shift value */
97
+ public setPitch (pitch: number): void {
98
+ this.updateParameter("pitch", pitch, "pitch")
99
+ }
100
+
101
+ /* update pitch semitones setting */
102
+ public setSemitones (semitones: number): void {
103
+ this.updateParameter("pitchSemitones", semitones, "semitones")
104
+ }
105
+
106
+ /* destroy the pitch shifter */
107
+ public async destroy (): Promise<void> {
108
+ /* disconnect pitch node */
109
+ if (this.pitchNode !== null) {
110
+ this.pitchNode.disconnect()
111
+ this.pitchNode = null
112
+ }
113
+
114
+ /* destroy parent */
115
+ await super.destroy()
116
+ }
117
+ }
118
+
119
+ /* SpeechFlow node for pitch adjustment using SoundTouch WebAudio */
120
+ export default class SpeechFlowNodeA2APitch extends SpeechFlowNode {
121
+ /* declare official node name */
122
+ public static name = "a2a-pitch"
123
+
124
+ /* internal state */
125
+ private closing = false
126
+ private pitchShifter: AudioPitchShifter | null = null
127
+
128
+ /* construct node */
129
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
130
+ super(id, cfg, opts, args)
131
+
132
+ /* declare node configuration parameters */
133
+ this.configure({
134
+ rate: { type: "number", val: 1.0, match: (n: number) => n >= 0.25 && n <= 4.0 },
135
+ tempo: { type: "number", val: 1.0, match: (n: number) => n >= 0.25 && n <= 4.0 },
136
+ pitch: { type: "number", val: 1.0, match: (n: number) => n >= 0.25 && n <= 4.0 },
137
+ semitones: { type: "number", val: 0.0, match: (n: number) => n >= -24 && n <= 24 }
138
+ })
139
+
140
+ /* declare node input/output format */
141
+ this.input = "audio"
142
+ this.output = "audio"
143
+ }
144
+
145
+ /* open node */
146
+ async open () {
147
+ /* clear destruction flag */
148
+ this.closing = false
149
+
150
+ /* setup pitch shifter */
151
+ this.pitchShifter = new AudioPitchShifter(
152
+ this.config.audioSampleRate,
153
+ this.config.audioChannels, {
154
+ rate: this.params.rate,
155
+ tempo: this.params.tempo,
156
+ pitch: this.params.pitch,
157
+ semitones: this.params.semitones
158
+ }
159
+ )
160
+ await this.pitchShifter.setup()
161
+
162
+ /* establish a transform stream */
163
+ const self = this
164
+ this.stream = new Stream.Transform({
165
+ readableObjectMode: true,
166
+ writableObjectMode: true,
167
+ decodeStrings: false,
168
+ transform (chunk: SpeechFlowChunk & { payload: Buffer }, encoding, callback) {
169
+ if (self.closing) {
170
+ callback(new Error("stream already destroyed"))
171
+ return
172
+ }
173
+ if (!Buffer.isBuffer(chunk.payload))
174
+ callback(new Error("invalid chunk payload type"))
175
+ else {
176
+ /* shift pitch of audio chunk */
177
+ const payload = util.convertBufToI16(chunk.payload, self.config.audioLittleEndian)
178
+ self.pitchShifter?.process(payload).then((result) => {
179
+ if (self.closing)
180
+ throw new Error("stream already destroyed")
181
+
182
+ /* take over pitch-shifted data */
183
+ const payload = util.convertI16ToBuf(result, self.config.audioLittleEndian)
184
+ chunk.payload = payload
185
+ this.push(chunk)
186
+ callback()
187
+ }).catch((error: unknown) => {
188
+ if (!self.closing)
189
+ callback(util.ensureError(error, "pitch shifting failed"))
190
+ })
191
+ }
192
+ },
193
+ final (callback) {
194
+ if (self.closing) {
195
+ callback()
196
+ return
197
+ }
198
+ this.push(null)
199
+ callback()
200
+ }
201
+ })
202
+ }
203
+
204
+ /* close node */
205
+ async close () {
206
+ /* indicate closing */
207
+ this.closing = true
208
+
209
+ /* destroy pitch shifter */
210
+ if (this.pitchShifter !== null) {
211
+ await this.pitchShifter.destroy()
212
+ this.pitchShifter = null
213
+ }
214
+
215
+ /* shutdown stream */
216
+ if (this.stream !== null) {
217
+ await util.destroyStream(this.stream)
218
+ this.stream = null
219
+ }
220
+ }
221
+ }