speechflow 1.6.4 → 1.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. package/CHANGELOG.md +22 -0
  2. package/README.md +28 -3
  3. package/etc/speechflow.yaml +15 -13
  4. package/etc/stx.conf +5 -0
  5. package/package.json +5 -5
  6. package/speechflow-cli/dst/speechflow-main-api.js +3 -7
  7. package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
  8. package/speechflow-cli/dst/speechflow-main-graph.js +1 -1
  9. package/speechflow-cli/dst/speechflow-main.js +6 -0
  10. package/speechflow-cli/dst/speechflow-main.js.map +1 -1
  11. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +1 -21
  12. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -1
  13. package/speechflow-cli/dst/speechflow-node-a2a-compressor.d.ts +1 -1
  14. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +12 -11
  15. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
  16. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +1 -21
  17. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -1
  18. package/speechflow-cli/dst/speechflow-node-a2a-expander.d.ts +1 -1
  19. package/speechflow-cli/dst/speechflow-node-a2a-expander.js +12 -11
  20. package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -1
  21. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js +4 -10
  22. package/speechflow-cli/dst/speechflow-node-a2a-ffmpeg.js.map +1 -1
  23. package/speechflow-cli/dst/speechflow-node-a2a-filler.d.ts +1 -1
  24. package/speechflow-cli/dst/speechflow-node-a2a-filler.js +18 -16
  25. package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
  26. package/speechflow-cli/dst/speechflow-node-a2a-gain.d.ts +1 -1
  27. package/speechflow-cli/dst/speechflow-node-a2a-gain.js +8 -8
  28. package/speechflow-cli/dst/speechflow-node-a2a-gain.js.map +1 -1
  29. package/speechflow-cli/dst/speechflow-node-a2a-gender.d.ts +1 -1
  30. package/speechflow-cli/dst/speechflow-node-a2a-gender.js +70 -60
  31. package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
  32. package/speechflow-cli/dst/speechflow-node-a2a-meter.d.ts +1 -1
  33. package/speechflow-cli/dst/speechflow-node-a2a-meter.js +58 -42
  34. package/speechflow-cli/dst/speechflow-node-a2a-meter.js.map +1 -1
  35. package/speechflow-cli/dst/speechflow-node-a2a-mute.d.ts +1 -1
  36. package/speechflow-cli/dst/speechflow-node-a2a-mute.js +44 -10
  37. package/speechflow-cli/dst/speechflow-node-a2a-mute.js.map +1 -1
  38. package/speechflow-cli/dst/speechflow-node-a2a-pitch.d.ts +13 -0
  39. package/speechflow-cli/dst/speechflow-node-a2a-pitch.js +213 -0
  40. package/speechflow-cli/dst/speechflow-node-a2a-pitch.js.map +1 -0
  41. package/speechflow-cli/dst/speechflow-node-a2a-pitch2-wt.js +149 -0
  42. package/speechflow-cli/dst/speechflow-node-a2a-pitch2-wt.js.map +1 -0
  43. package/speechflow-cli/dst/speechflow-node-a2a-pitch2.d.ts +13 -0
  44. package/speechflow-cli/dst/speechflow-node-a2a-pitch2.js +202 -0
  45. package/speechflow-cli/dst/speechflow-node-a2a-pitch2.js.map +1 -0
  46. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.d.ts +1 -1
  47. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js +13 -11
  48. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
  49. package/speechflow-cli/dst/speechflow-node-a2a-speex.d.ts +1 -1
  50. package/speechflow-cli/dst/speechflow-node-a2a-speex.js +13 -12
  51. package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -1
  52. package/speechflow-cli/dst/speechflow-node-a2a-vad.d.ts +1 -1
  53. package/speechflow-cli/dst/speechflow-node-a2a-vad.js +26 -25
  54. package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
  55. package/speechflow-cli/dst/speechflow-node-a2a-wav.js +35 -7
  56. package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
  57. package/speechflow-cli/dst/speechflow-node-a2t-amazon.d.ts +1 -1
  58. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +16 -16
  59. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
  60. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.d.ts +1 -1
  61. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +16 -16
  62. package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
  63. package/speechflow-cli/dst/speechflow-node-a2t-openai.d.ts +1 -2
  64. package/speechflow-cli/dst/speechflow-node-a2t-openai.js +15 -21
  65. package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
  66. package/speechflow-cli/dst/speechflow-node-t2a-amazon.d.ts +1 -2
  67. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +9 -15
  68. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -1
  69. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.d.ts +1 -2
  70. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +13 -18
  71. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
  72. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.d.ts +0 -1
  73. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +4 -10
  74. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
  75. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js +3 -3
  76. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
  77. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js +2 -2
  78. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
  79. package/speechflow-cli/dst/speechflow-node-t2t-format.js +36 -2
  80. package/speechflow-cli/dst/speechflow-node-t2t-format.js.map +1 -1
  81. package/speechflow-cli/dst/speechflow-node-t2t-google.js +2 -2
  82. package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
  83. package/speechflow-cli/dst/speechflow-node-t2t-modify.js +5 -5
  84. package/speechflow-cli/dst/speechflow-node-t2t-modify.js.map +1 -1
  85. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +3 -3
  86. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +1 -1
  87. package/speechflow-cli/dst/speechflow-node-t2t-openai.js +2 -2
  88. package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +1 -1
  89. package/speechflow-cli/dst/speechflow-node-t2t-sentence.d.ts +1 -1
  90. package/speechflow-cli/dst/speechflow-node-t2t-sentence.js +13 -13
  91. package/speechflow-cli/dst/speechflow-node-t2t-sentence.js.map +1 -1
  92. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +8 -8
  93. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  94. package/speechflow-cli/dst/speechflow-node-t2t-transformers.js +2 -2
  95. package/speechflow-cli/dst/speechflow-node-t2t-transformers.js.map +1 -1
  96. package/speechflow-cli/dst/speechflow-node-x2x-filter.js +2 -2
  97. package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
  98. package/speechflow-cli/dst/speechflow-node-x2x-trace.d.ts +1 -1
  99. package/speechflow-cli/dst/speechflow-node-x2x-trace.js +42 -8
  100. package/speechflow-cli/dst/speechflow-node-x2x-trace.js.map +1 -1
  101. package/speechflow-cli/dst/speechflow-node-xio-device.js +6 -4
  102. package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
  103. package/speechflow-cli/dst/speechflow-node-xio-file.js +19 -18
  104. package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
  105. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +13 -13
  106. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
  107. package/speechflow-cli/dst/speechflow-node-xio-websocket.js +8 -8
  108. package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
  109. package/speechflow-cli/dst/speechflow-node.js +6 -6
  110. package/speechflow-cli/dst/speechflow-node.js.map +1 -1
  111. package/speechflow-cli/dst/speechflow-util-audio.d.ts +1 -0
  112. package/speechflow-cli/dst/speechflow-util-audio.js +22 -1
  113. package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
  114. package/speechflow-cli/dst/speechflow-util-error.d.ts +1 -1
  115. package/speechflow-cli/dst/speechflow-util-error.js +7 -1
  116. package/speechflow-cli/dst/speechflow-util-error.js.map +1 -1
  117. package/speechflow-cli/dst/speechflow-util-stream.d.ts +2 -1
  118. package/speechflow-cli/dst/speechflow-util-stream.js +23 -3
  119. package/speechflow-cli/dst/speechflow-util-stream.js.map +1 -1
  120. package/speechflow-cli/etc/oxlint.jsonc +2 -1
  121. package/speechflow-cli/etc/tsconfig.json +1 -0
  122. package/speechflow-cli/package.json +20 -20
  123. package/speechflow-cli/src/speechflow-main-api.ts +6 -13
  124. package/speechflow-cli/src/speechflow-main-graph.ts +1 -1
  125. package/speechflow-cli/src/speechflow-main.ts +4 -0
  126. package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +1 -29
  127. package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +13 -12
  128. package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +1 -29
  129. package/speechflow-cli/src/speechflow-node-a2a-expander.ts +13 -12
  130. package/speechflow-cli/src/speechflow-node-a2a-ffmpeg.ts +4 -10
  131. package/speechflow-cli/src/speechflow-node-a2a-filler.ts +19 -17
  132. package/speechflow-cli/src/speechflow-node-a2a-gain.ts +8 -8
  133. package/speechflow-cli/src/speechflow-node-a2a-gender.ts +83 -72
  134. package/speechflow-cli/src/speechflow-node-a2a-meter.ts +66 -46
  135. package/speechflow-cli/src/speechflow-node-a2a-mute.ts +11 -10
  136. package/speechflow-cli/src/speechflow-node-a2a-pitch.ts +221 -0
  137. package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +14 -12
  138. package/speechflow-cli/src/speechflow-node-a2a-speex.ts +14 -13
  139. package/speechflow-cli/src/speechflow-node-a2a-vad.ts +26 -25
  140. package/speechflow-cli/src/speechflow-node-a2a-wav.ts +2 -7
  141. package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +16 -16
  142. package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +16 -16
  143. package/speechflow-cli/src/speechflow-node-a2t-openai.ts +15 -21
  144. package/speechflow-cli/src/speechflow-node-t2a-amazon.ts +9 -15
  145. package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +13 -18
  146. package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +4 -10
  147. package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +3 -3
  148. package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +2 -2
  149. package/speechflow-cli/src/speechflow-node-t2t-format.ts +3 -2
  150. package/speechflow-cli/src/speechflow-node-t2t-google.ts +2 -2
  151. package/speechflow-cli/src/speechflow-node-t2t-modify.ts +6 -6
  152. package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +3 -3
  153. package/speechflow-cli/src/speechflow-node-t2t-openai.ts +2 -2
  154. package/speechflow-cli/src/speechflow-node-t2t-sentence.ts +13 -13
  155. package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +12 -16
  156. package/speechflow-cli/src/speechflow-node-t2t-transformers.ts +2 -2
  157. package/speechflow-cli/src/speechflow-node-x2x-filter.ts +2 -2
  158. package/speechflow-cli/src/speechflow-node-x2x-trace.ts +10 -9
  159. package/speechflow-cli/src/speechflow-node-xio-device.ts +7 -5
  160. package/speechflow-cli/src/speechflow-node-xio-file.ts +20 -19
  161. package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +14 -14
  162. package/speechflow-cli/src/speechflow-node-xio-websocket.ts +11 -11
  163. package/speechflow-cli/src/speechflow-node.ts +6 -6
  164. package/speechflow-cli/src/speechflow-util-audio.ts +31 -1
  165. package/speechflow-cli/src/speechflow-util-error.ts +9 -3
  166. package/speechflow-cli/src/speechflow-util-stream.ts +31 -6
  167. package/speechflow-ui-db/dst/index.js +25 -25
  168. package/speechflow-ui-db/package.json +11 -11
  169. package/speechflow-ui-db/src/app.vue +14 -5
  170. package/speechflow-ui-st/dst/index.js +460 -25
  171. package/speechflow-ui-st/package.json +13 -13
  172. package/speechflow-ui-st/src/app.vue +8 -3
  173. package/speechflow-cli/dst/speechflow-util-webaudio-wt.js +0 -124
  174. package/speechflow-cli/dst/speechflow-util-webaudio-wt.js.map +0 -1
  175. package/speechflow-cli/dst/speechflow-util-webaudio.d.ts +0 -13
  176. package/speechflow-cli/dst/speechflow-util-webaudio.js +0 -137
  177. package/speechflow-cli/dst/speechflow-util-webaudio.js.map +0 -1
  178. /package/speechflow-cli/dst/{speechflow-util-webaudio-wt.d.ts → speechflow-node-a2a-pitch2-wt.d.ts} +0 -0
@@ -87,13 +87,14 @@ class AudioExpander extends util.WebAudio {
87
87
  }
88
88
 
89
89
  public async destroy (): Promise<void> {
90
- await super.destroy()
91
-
92
90
  /* destroy expander node */
93
91
  if (this.expanderNode !== null) {
94
92
  this.expanderNode.disconnect()
95
93
  this.expanderNode = null
96
94
  }
95
+
96
+ /* destroy parent */
97
+ await super.destroy()
97
98
  }
98
99
  }
99
100
 
@@ -103,7 +104,7 @@ export default class SpeechFlowNodeA2AExpander extends SpeechFlowNode {
103
104
  public static name = "a2a-expander"
104
105
 
105
106
  /* internal state */
106
- private destroyed = false
107
+ private closing = false
107
108
  private expander: AudioExpander | null = null
108
109
 
109
110
  /* construct node */
@@ -133,7 +134,7 @@ export default class SpeechFlowNodeA2AExpander extends SpeechFlowNode {
133
134
  /* open node */
134
135
  async open () {
135
136
  /* clear destruction flag */
136
- this.destroyed = false
137
+ this.closing = false
137
138
 
138
139
  /* setup expander */
139
140
  this.expander = new AudioExpander(
@@ -157,7 +158,7 @@ export default class SpeechFlowNodeA2AExpander extends SpeechFlowNode {
157
158
  writableObjectMode: true,
158
159
  decodeStrings: false,
159
160
  transform (chunk: SpeechFlowChunk & { payload: Buffer }, encoding, callback) {
160
- if (self.destroyed) {
161
+ if (self.closing) {
161
162
  callback(new Error("stream already destroyed"))
162
163
  return
163
164
  }
@@ -167,7 +168,7 @@ export default class SpeechFlowNodeA2AExpander extends SpeechFlowNode {
167
168
  /* expand chunk */
168
169
  const payload = util.convertBufToI16(chunk.payload)
169
170
  self.expander?.process(payload).then((result) => {
170
- if (self.destroyed)
171
+ if (self.closing)
171
172
  throw new Error("stream already destroyed")
172
173
 
173
174
  /* take over expanded data */
@@ -176,13 +177,13 @@ export default class SpeechFlowNodeA2AExpander extends SpeechFlowNode {
176
177
  this.push(chunk)
177
178
  callback()
178
179
  }).catch((error: unknown) => {
179
- if (!self.destroyed)
180
+ if (!self.closing)
180
181
  callback(util.ensureError(error, "expansion failed"))
181
182
  })
182
183
  }
183
184
  },
184
185
  final (callback) {
185
- if (self.destroyed) {
186
+ if (self.closing) {
186
187
  callback()
187
188
  return
188
189
  }
@@ -194,8 +195,8 @@ export default class SpeechFlowNodeA2AExpander extends SpeechFlowNode {
194
195
 
195
196
  /* close node */
196
197
  async close () {
197
- /* indicate destruction */
198
- this.destroyed = true
198
+ /* indicate closing */
199
+ this.closing = true
199
200
 
200
201
  /* destroy expander */
201
202
  if (this.expander !== null) {
@@ -203,9 +204,9 @@ export default class SpeechFlowNodeA2AExpander extends SpeechFlowNode {
203
204
  this.expander = null
204
205
  }
205
206
 
206
- /* close stream */
207
+ /* shutdown stream */
207
208
  if (this.stream !== null) {
208
- this.stream.destroy()
209
+ await util.destroyStream(this.stream)
209
210
  this.stream = null
210
211
  }
211
212
  }
@@ -93,7 +93,7 @@ export default class SpeechFlowNodeA2AFFMPEG extends SpeechFlowNode {
93
93
  util.run("starting FFmpeg process", () => this.ffmpeg!.run())
94
94
 
95
95
  /* establish a duplex stream and connect it to FFmpeg */
96
- this.stream = Stream.Duplex.from({
96
+ const ffmpegStream = Stream.Duplex.from({
97
97
  writable: streamInput,
98
98
  readable: streamOutput
99
99
  })
@@ -101,20 +101,14 @@ export default class SpeechFlowNodeA2AFFMPEG extends SpeechFlowNode {
101
101
  /* wrap streams with conversions for chunk vs plain audio */
102
102
  const wrapper1 = util.createTransformStreamForWritableSide()
103
103
  const wrapper2 = util.createTransformStreamForReadableSide("audio", () => this.timeZero)
104
- this.stream = Stream.compose(wrapper1, this.stream, wrapper2)
104
+ this.stream = Stream.compose(wrapper1, ffmpegStream, wrapper2)
105
105
  }
106
106
 
107
107
  /* close node */
108
108
  async close () {
109
- /* close duplex stream */
109
+ /* shutdown stream */
110
110
  if (this.stream !== null) {
111
- await new Promise<void>((resolve) => {
112
- if (this.stream instanceof Stream.Duplex)
113
- this.stream.end(() => { resolve() })
114
- else
115
- resolve()
116
- })
117
- this.stream.destroy()
111
+ await util.destroyStream(this.stream)
118
112
  this.stream = null
119
113
  }
120
114
 
@@ -44,14 +44,14 @@ class AudioFiller extends EventEmitter {
44
44
  }
45
45
 
46
46
  /* emit a chunk of silence */
47
- private emitSilence (fromSamples: number, toSamples: number) {
47
+ private emitSilence (fromSamples: number, toSamples: number, meta?: Map<string, any>) {
48
48
  const frames = Math.max(0, Math.floor(toSamples - fromSamples))
49
49
  if (frames <= 0)
50
50
  return
51
51
  const payload = Buffer.alloc(frames * this.bytesPerFrame) /* already zeroed */
52
52
  const timestampStart = this.durationFromSamples(fromSamples)
53
53
  const timestampEnd = this.durationFromSamples(toSamples)
54
- const chunk = new SpeechFlowChunk(timestampStart, timestampEnd, "final", "audio", payload)
54
+ const chunk = new SpeechFlowChunk(timestampStart, timestampEnd, "final", "audio", payload, meta ? new Map(meta) : undefined)
55
55
  this.emit("chunk", chunk)
56
56
  }
57
57
 
@@ -64,7 +64,7 @@ class AudioFiller extends EventEmitter {
64
64
 
65
65
  /* if chunk starts beyond what we've emitted, insert silence for the gap */
66
66
  if (startSamp > this.emittedEndSamples + this.sampleTolerance) {
67
- this.emitSilence(this.emittedEndSamples, startSamp)
67
+ this.emitSilence(this.emittedEndSamples, startSamp, chunk.meta)
68
68
  this.emittedEndSamples = startSamp
69
69
  }
70
70
 
@@ -95,7 +95,7 @@ class AudioFiller extends EventEmitter {
95
95
  const outEndSamples = outStartSamples + Math.floor(payload.length / this.bytesPerFrame)
96
96
  const timestampStart = this.durationFromSamples(outStartSamples)
97
97
  const timestampEnd = this.durationFromSamples(outEndSamples)
98
- const c = new SpeechFlowChunk(timestampStart, timestampEnd, "final", "audio", payload)
98
+ const c = new SpeechFlowChunk(timestampStart, timestampEnd, "final", "audio", payload, new Map(chunk.meta))
99
99
  this.emit("chunk", c)
100
100
 
101
101
  /* advance emitted cursor */
@@ -109,7 +109,7 @@ export default class SpeechFlowNodeA2AFiller extends SpeechFlowNode {
109
109
  public static name = "a2a-filler"
110
110
 
111
111
  /* internal state */
112
- private destroyed = false
112
+ private closing = false
113
113
  private filler: AudioFiller | null = null
114
114
  private sendQueue: util.AsyncQueue<SpeechFlowChunk | null> | null = null
115
115
 
@@ -130,7 +130,7 @@ export default class SpeechFlowNodeA2AFiller extends SpeechFlowNode {
130
130
  /* open node */
131
131
  async open () {
132
132
  /* clear destruction flag */
133
- this.destroyed = false
133
+ this.closing = false
134
134
 
135
135
  /* establish queues */
136
136
  this.filler = new AudioFiller(this.config.audioSampleRate, this.config.audioChannels)
@@ -148,27 +148,29 @@ export default class SpeechFlowNodeA2AFiller extends SpeechFlowNode {
148
148
  writableObjectMode: true,
149
149
  decodeStrings: false,
150
150
  write (chunk: SpeechFlowChunk & { type: "audio", payload: Buffer }, encoding, callback) {
151
- if (self.destroyed || self.filler === null)
151
+ if (self.closing || self.filler === null)
152
152
  callback(new Error("stream already destroyed"))
153
153
  else if (!Buffer.isBuffer(chunk.payload))
154
154
  callback(new Error("invalid chunk payload type"))
155
155
  else {
156
156
  try {
157
+ if (self.closing || self.filler === null)
158
+ throw new Error("stream already destroyed")
157
159
  self.filler.add(chunk)
158
160
  callback()
159
161
  }
160
- catch (error: any) {
161
- callback(error)
162
+ catch (error: unknown) {
163
+ callback(util.ensureError(error))
162
164
  }
163
165
  }
164
166
  },
165
167
  read (size) {
166
- if (self.destroyed || self.sendQueue === null) {
168
+ if (self.closing || self.sendQueue === null) {
167
169
  this.push(null)
168
170
  return
169
171
  }
170
172
  self.sendQueue.read().then((chunk) => {
171
- if (self.destroyed) {
173
+ if (self.closing || self.sendQueue === null) {
172
174
  this.push(null)
173
175
  return
174
176
  }
@@ -181,12 +183,12 @@ export default class SpeechFlowNodeA2AFiller extends SpeechFlowNode {
181
183
  this.push(chunk)
182
184
  }
183
185
  }).catch((error: unknown) => {
184
- if (!self.destroyed)
186
+ if (!self.closing && self.sendQueue !== null)
185
187
  self.log("error", `queue read error: ${util.ensureError(error).message}`)
186
188
  })
187
189
  },
188
190
  final (callback) {
189
- if (self.destroyed) {
191
+ if (self.closing) {
190
192
  callback()
191
193
  return
192
194
  }
@@ -198,8 +200,8 @@ export default class SpeechFlowNodeA2AFiller extends SpeechFlowNode {
198
200
 
199
201
  /* close node */
200
202
  async close () {
201
- /* indicate destruction */
202
- this.destroyed = true
203
+ /* indicate closing */
204
+ this.closing = true
203
205
 
204
206
  /* destroy queues */
205
207
  if (this.sendQueue !== null) {
@@ -213,9 +215,9 @@ export default class SpeechFlowNodeA2AFiller extends SpeechFlowNode {
213
215
  this.filler = null
214
216
  }
215
217
 
216
- /* close stream */
218
+ /* shutdown stream */
217
219
  if (this.stream !== null) {
218
- this.stream.destroy()
220
+ await util.destroyStream(this.stream)
219
221
  this.stream = null
220
222
  }
221
223
  }
@@ -17,7 +17,7 @@ export default class SpeechFlowNodeA2AGain extends SpeechFlowNode {
17
17
  public static name = "a2a-gain"
18
18
 
19
19
  /* internal state */
20
- private destroyed = false
20
+ private closing = false
21
21
 
22
22
  /* construct node */
23
23
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -36,7 +36,7 @@ export default class SpeechFlowNodeA2AGain extends SpeechFlowNode {
36
36
  /* open node */
37
37
  async open () {
38
38
  /* clear destruction flag */
39
- this.destroyed = false
39
+ this.closing = false
40
40
 
41
41
  /* adjust gain */
42
42
  const adjustGain = (chunk: SpeechFlowChunk & { payload: Buffer }, db: number) => {
@@ -57,7 +57,7 @@ export default class SpeechFlowNodeA2AGain extends SpeechFlowNode {
57
57
  writableObjectMode: true,
58
58
  decodeStrings: false,
59
59
  transform (chunk: SpeechFlowChunk & { payload: Buffer }, encoding, callback) {
60
- if (self.destroyed) {
60
+ if (self.closing) {
61
61
  callback(new Error("stream already destroyed"))
62
62
  return
63
63
  }
@@ -73,7 +73,7 @@ export default class SpeechFlowNodeA2AGain extends SpeechFlowNode {
73
73
  }
74
74
  },
75
75
  final (callback) {
76
- if (self.destroyed) {
76
+ if (self.closing) {
77
77
  callback()
78
78
  return
79
79
  }
@@ -85,12 +85,12 @@ export default class SpeechFlowNodeA2AGain extends SpeechFlowNode {
85
85
 
86
86
  /* close node */
87
87
  async close () {
88
- /* indicate destruction */
89
- this.destroyed = true
88
+ /* indicate closing */
89
+ this.closing = true
90
90
 
91
- /* close stream */
91
+ /* shutdown stream */
92
92
  if (this.stream !== null) {
93
- this.stream.destroy()
93
+ await util.destroyStream(this.stream)
94
94
  this.stream = null
95
95
  }
96
96
  }
@@ -5,23 +5,26 @@
5
5
  */
6
6
 
7
7
  /* standard dependencies */
8
- import path from "node:path"
9
- import Stream from "node:stream"
8
+ import path from "node:path"
9
+ import Stream from "node:stream"
10
10
 
11
11
  /* external dependencies */
12
- import * as Transformers from "@huggingface/transformers"
13
- import { WaveFile } from "wavefile"
12
+ import * as Transformers from "@huggingface/transformers"
13
+ import { WaveFile } from "wavefile"
14
+ import { getRMS, AudioData } from "audio-inspect"
15
+ import { Duration } from "luxon"
14
16
 
15
17
  /* internal dependencies */
16
18
  import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
17
19
  import * as util from "./speechflow-util"
18
20
 
19
21
  /* audio stream queue element */
22
+ type Gender = "male" | "female" | "unknown"
20
23
  type AudioQueueElement = {
21
24
  type: "audio-frame",
22
25
  chunk: SpeechFlowChunk,
23
26
  data: Float32Array,
24
- gender?: "male" | "female" | "unknown"
27
+ gender?: Gender
25
28
  } | {
26
29
  type: "audio-eof"
27
30
  }
@@ -37,7 +40,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
37
40
  private queueRecv = this.queue.pointerUse("recv")
38
41
  private queueAC = this.queue.pointerUse("ac")
39
42
  private queueSend = this.queue.pointerUse("send")
40
- private shutdown = false
43
+ private closing = false
41
44
  private workingOffTimer: ReturnType<typeof setTimeout> | null = null
42
45
  private progressInterval: ReturnType<typeof setInterval> | null = null
43
46
 
@@ -47,7 +50,10 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
47
50
 
48
51
  /* declare node configuration parameters */
49
52
  this.configure({
50
- window: { type: "number", pos: 0, val: 500 }
53
+ window: { type: "number", pos: 0, val: 500 },
54
+ threshold: { type: "number", pos: 1, val: 0.50 },
55
+ hysteresis: { type: "number", pos: 2, val: 0.25 },
56
+ volumeThreshold: { type: "number", pos: 3, val: -45 }
51
57
  })
52
58
 
53
59
  /* declare node input/output format */
@@ -61,11 +67,8 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
61
67
  if (this.config.audioBitDepth !== 16 || !this.config.audioLittleEndian)
62
68
  throw new Error("Gender node currently supports PCM-S16LE audio only")
63
69
 
64
- /* clear shutdown flag */
65
- this.shutdown = false
66
-
67
- /* pass-through logging */
68
- const log = this.log.bind(this)
70
+ /* clear destruction flag */
71
+ this.closing = false
69
72
 
70
73
  /* the used model */
71
74
  const model = "Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech"
@@ -73,7 +76,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
73
76
  /* track download progress when instantiating Transformers engine and model */
74
77
  const progressState = new Map<string, number>()
75
78
  const progressCallback: Transformers.ProgressCallback = (progress: any) => {
76
- if (this.shutdown)
79
+ if (this.closing)
77
80
  return
78
81
  let artifact = model
79
82
  if (typeof progress.file === "string")
@@ -87,7 +90,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
87
90
  progressState.set(artifact, percent)
88
91
  }
89
92
  this.progressInterval = setInterval(() => {
90
- if (this.shutdown)
93
+ if (this.closing)
91
94
  return
92
95
  for (const [ artifact, percent ] of progressState) {
93
96
  this.log("info", `downloaded ${percent.toFixed(2)}% of artifact "${artifact}"`)
@@ -102,24 +105,17 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
102
105
  device: "auto",
103
106
  progress_callback: progressCallback
104
107
  })
105
- let timeoutId: ReturnType<typeof setTimeout> | null = null
106
- const timeoutPromise = new Promise((resolve, reject) => {
107
- timeoutId = setTimeout(() =>
108
- reject(new Error("model initialization timeout")), 30 * 1000)
109
- })
110
108
  this.classifier = await Promise.race([
111
- pipelinePromise, timeoutPromise
112
- ]).finally(() => {
113
- if (timeoutId !== null)
114
- clearTimeout(timeoutId)
115
- }) as Transformers.AudioClassificationPipeline
109
+ pipelinePromise,
110
+ util.timeoutPromise(30 * 1000, "model initialization timeout")
111
+ ]) as Transformers.AudioClassificationPipeline
116
112
  }
117
113
  catch (error) {
118
114
  if (this.progressInterval) {
119
115
  clearInterval(this.progressInterval)
120
116
  this.progressInterval = null
121
117
  }
122
- throw new Error(`failed to initialize classifier pipeline: ${error}`)
118
+ throw new Error(`failed to initialize classifier pipeline: ${error}`, { cause: error })
123
119
  }
124
120
  if (this.progressInterval) {
125
121
  clearInterval(this.progressInterval)
@@ -128,46 +124,61 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
128
124
  if (this.classifier === null)
129
125
  throw new Error("failed to instantiate classifier pipeline")
130
126
 
127
+ /* define sample rate required by model */
128
+ const sampleRateTarget = 16000
129
+
131
130
  /* classify a single large-enough concatenated audio frame */
131
+ let genderLast: Gender = "unknown"
132
132
  const classify = async (data: Float32Array) => {
133
- if (this.shutdown || this.classifier === null)
134
- throw new Error("classifier shutdown during operation")
135
- const classifyPromise = this.classifier(data)
136
- let timeoutId: ReturnType<typeof setTimeout> | null = null
137
- const timeoutPromise = new Promise((resolve, reject) => {
138
- timeoutId = setTimeout(() =>
139
- reject(new Error("classification timeout")), 30 * 1000)
140
- })
141
- const result = await Promise.race([ classifyPromise, timeoutPromise ]).finally(() => {
142
- if (timeoutId !== null)
143
- clearTimeout(timeoutId)
144
- }) as Transformers.AudioClassificationOutput | Transformers.AudioClassificationOutput[]
133
+ if (this.closing || this.classifier === null)
134
+ throw new Error("classifier destroyed during operation")
135
+
136
+ /* check volume level and return "unknown" if too low
137
+ in order to avoid a wrong classificaton */
138
+ const audioData = {
139
+ sampleRate: sampleRateTarget,
140
+ numberOfChannels: 1,
141
+ channelData: [ data ],
142
+ duration: data.length / sampleRateTarget,
143
+ length: data.length
144
+ } satisfies AudioData
145
+ const rms = getRMS(audioData, { asDB: true })
146
+ if (rms < this.params.volumeThreshold)
147
+ return genderLast
148
+
149
+ /* classify audio */
150
+ const result = await Promise.race([
151
+ this.classifier(data),
152
+ util.timeoutPromise(30 * 1000, "classification timeout")
153
+ ]) as Transformers.AudioClassificationOutput | Transformers.AudioClassificationOutput[]
145
154
  const classified = Array.isArray(result) ?
146
155
  result as Transformers.AudioClassificationOutput :
147
156
  [ result ]
148
- const c1 = classified.find((c: any) => c.label === "male")
149
- const c2 = classified.find((c: any) => c.label === "female")
157
+ const c1 = classified.find((c) => c.label === "male")
158
+ const c2 = classified.find((c) => c.label === "female")
150
159
  const male = c1 ? c1.score : 0.0
151
160
  const female = c2 ? c2.score : 0.0
152
- if (male > 0.50 && male > female + 0.25)
153
- return "male"
154
- else if (female > 0.50 && female > male + 0.25)
155
- return "female"
156
- else
157
- return "unknown"
161
+ const threshold = this.params.threshold
162
+ const hysteresis = this.params.hysteresis
163
+ let genderNow: Gender = genderLast
164
+ if (male > threshold && male > female + hysteresis)
165
+ genderNow = "male"
166
+ else if (female > threshold && female > male + hysteresis)
167
+ genderNow = "female"
168
+ if (genderNow !== genderLast) {
169
+ this.log("info", `switching detected gender from <${genderLast}> to <${genderNow}>`)
170
+ genderLast = genderNow
171
+ }
172
+ return genderNow
158
173
  }
159
174
 
160
- /* define sample rate required by model */
161
- const sampleRateTarget = 16000
162
-
163
175
  /* work off queued audio frames */
164
176
  const frameWindowDuration = this.params.window / 1000
165
177
  const frameWindowSamples = Math.floor(frameWindowDuration * sampleRateTarget)
166
- let lastGender = ""
167
178
  let workingOff = false
168
179
  const workOffQueue = async () => {
169
180
  /* control working off round */
170
- if (workingOff || this.shutdown)
181
+ if (workingOff || this.closing)
171
182
  return
172
183
  workingOff = true
173
184
  if (this.workingOffTimer !== null) {
@@ -184,7 +195,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
184
195
  data.fill(0)
185
196
  let samples = 0
186
197
  let pos = pos0
187
- while (pos < posL && samples < frameWindowSamples && !this.shutdown) {
198
+ while (pos < posL && samples < frameWindowSamples && !this.closing) {
188
199
  const element = this.queueAC.peek(pos)
189
200
  if (element === undefined || element.type !== "audio-frame")
190
201
  break
@@ -194,12 +205,12 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
194
205
  }
195
206
  pos++
196
207
  }
197
- if (pos0 < pos && samples > frameWindowSamples * 0.75 && !this.shutdown) {
208
+ if (pos0 < pos && samples > frameWindowSamples * 0.75 && !this.closing) {
198
209
  const gender = await classify(data)
199
- if (this.shutdown)
210
+ if (this.closing)
200
211
  return
201
212
  const posM = pos0 + Math.trunc((pos - pos0) * 0.25)
202
- while (pos0 < posM && pos0 < posL && !this.shutdown) {
213
+ while (pos0 < posM && pos0 < posL && !this.closing) {
203
214
  const element = this.queueAC.peek(pos0)
204
215
  if (element === undefined || element.type !== "audio-frame")
205
216
  break
@@ -208,19 +219,15 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
208
219
  this.queueAC.walk(+1)
209
220
  pos0++
210
221
  }
211
- if (lastGender !== gender && !this.shutdown) {
212
- log("info", `gender now recognized as <${gender}>`)
213
- lastGender = gender
214
- }
215
222
  }
216
223
  }
217
224
  catch (error) {
218
- log("error", `gender classification error: ${error}`)
225
+ this.log("error", `gender classification error: ${error}`)
219
226
  }
220
227
 
221
228
  /* re-initiate working off round */
222
229
  workingOff = false
223
- if (!this.shutdown) {
230
+ if (!this.closing) {
224
231
  this.workingOffTimer = setTimeout(workOffQueue, 100)
225
232
  this.queue.once("write", workOffQueue)
226
233
  }
@@ -237,7 +244,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
237
244
 
238
245
  /* receive audio chunk (writable side of stream) */
239
246
  write (chunk: SpeechFlowChunk, encoding, callback) {
240
- if (self.shutdown) {
247
+ if (self.closing) {
241
248
  callback(new Error("stream already destroyed"))
242
249
  return
243
250
  }
@@ -252,21 +259,21 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
252
259
  const wav = new WaveFile()
253
260
  wav.fromScratch(self.config.audioChannels, self.config.audioSampleRate, "32f", data)
254
261
  wav.toSampleRate(sampleRateTarget, { method: "cubic" })
255
- data = wav.getSamples(false, Float32Array) as any as Float32Array<ArrayBuffer>
262
+ data = wav.getSamples(false, Float32Array) as unknown as Float32Array<ArrayBuffer>
256
263
 
257
264
  /* queue chunk and converted data */
258
265
  self.queueRecv.append({ type: "audio-frame", chunk, data })
259
266
  callback()
260
267
  }
261
268
  catch (error) {
262
- callback(error instanceof Error ? error : new Error("audio processing failed"))
269
+ callback(util.ensureError(error, "audio processing failed"))
263
270
  }
264
271
  }
265
272
  },
266
273
 
267
274
  /* receive no more audio chunks (writable side of stream) */
268
275
  final (callback) {
269
- if (self.shutdown) {
276
+ if (self.closing) {
270
277
  callback()
271
278
  return
272
279
  }
@@ -280,7 +287,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
280
287
  read (_size) {
281
288
  /* flush pending audio chunks */
282
289
  const flushPendingChunks = () => {
283
- if (self.shutdown) {
290
+ if (self.closing) {
284
291
  this.push(null)
285
292
  return
286
293
  }
@@ -292,7 +299,7 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
292
299
  && element.type === "audio-frame"
293
300
  && element.gender !== undefined) {
294
301
  while (true) {
295
- if (self.shutdown) {
302
+ if (self.closing) {
296
303
  this.push(null)
297
304
  return
298
305
  }
@@ -307,14 +314,18 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
307
314
  && element.gender === undefined)
308
315
  break
309
316
  const duration = util.audioArrayDuration(element.data)
310
- log("debug", `send chunk (${duration.toFixed(3)}s) with gender <${element.gender}>`)
317
+ const fmtTime = (t: Duration) => t.toFormat("hh:mm:ss.SSS")
318
+ const times = `start: ${fmtTime(element.chunk.timestampStart)}, ` +
319
+ `end: ${fmtTime(element.chunk.timestampEnd)}`
320
+ self.log("debug", `send chunk (${times}, duration: ${duration.toFixed(3)}s) ` +
321
+ `with gender <${element.gender}>`)
311
322
  element.chunk.meta.set("gender", element.gender)
312
323
  this.push(element.chunk)
313
324
  self.queueSend.walk(+1)
314
325
  self.queue.trim()
315
326
  }
316
327
  }
317
- else if (!self.shutdown)
328
+ else if (!self.closing)
318
329
  self.queue.once("write", flushPendingChunks)
319
330
  }
320
331
  flushPendingChunks()
@@ -324,8 +335,8 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
324
335
 
325
336
  /* close node */
326
337
  async close () {
327
- /* indicate shutdown */
328
- this.shutdown = true
338
+ /* indicate closing */
339
+ this.closing = true
329
340
 
330
341
  /* cleanup working-off timer */
331
342
  if (this.workingOffTimer !== null) {
@@ -342,9 +353,9 @@ export default class SpeechFlowNodeA2AGender extends SpeechFlowNode {
342
353
  /* remove all event listeners */
343
354
  this.queue.removeAllListeners("write")
344
355
 
345
- /* close stream */
356
+ /* shutdown stream */
346
357
  if (this.stream !== null) {
347
- this.stream.destroy()
358
+ await util.destroyStream(this.stream)
348
359
  this.stream = null
349
360
  }
350
361