speechflow 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/README.md +4 -4
  3. package/package.json +2 -2
  4. package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
  5. package/speechflow-cli/dst/speechflow-main-cli.js +1 -0
  6. package/speechflow-cli/dst/speechflow-main-cli.js.map +1 -1
  7. package/speechflow-cli/dst/speechflow-main-graph.js +2 -4
  8. package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
  9. package/speechflow-cli/dst/speechflow-main-nodes.js +1 -0
  10. package/speechflow-cli/dst/speechflow-main-nodes.js.map +1 -1
  11. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +1 -0
  12. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -1
  13. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +7 -9
  14. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
  15. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +1 -0
  16. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -1
  17. package/speechflow-cli/dst/speechflow-node-a2a-expander.js +8 -9
  18. package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -1
  19. package/speechflow-cli/dst/speechflow-node-a2a-filler.js +2 -0
  20. package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
  21. package/speechflow-cli/dst/speechflow-node-a2a-gender.js +1 -1
  22. package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
  23. package/speechflow-cli/dst/speechflow-node-a2a-meter.js +1 -1
  24. package/speechflow-cli/dst/speechflow-node-a2a-pitch.js +11 -9
  25. package/speechflow-cli/dst/speechflow-node-a2a-pitch.js.map +1 -1
  26. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js +1 -0
  27. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js.map +1 -1
  28. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
  29. package/speechflow-cli/dst/speechflow-node-a2a-speex.js +4 -2
  30. package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -1
  31. package/speechflow-cli/dst/speechflow-node-a2a-vad.js +19 -22
  32. package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
  33. package/speechflow-cli/dst/speechflow-node-a2a-wav.js +7 -0
  34. package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
  35. package/speechflow-cli/dst/speechflow-node-a2t-amazon.d.ts +0 -1
  36. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +2 -11
  37. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
  38. package/speechflow-cli/dst/speechflow-node-a2t-google.d.ts +0 -1
  39. package/speechflow-cli/dst/speechflow-node-a2t-google.js +0 -6
  40. package/speechflow-cli/dst/speechflow-node-a2t-google.js.map +1 -1
  41. package/speechflow-cli/dst/speechflow-node-a2t-openai.js +6 -1
  42. package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
  43. package/speechflow-cli/dst/speechflow-node-t2a-amazon.d.ts +1 -1
  44. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +27 -7
  45. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -1
  46. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.d.ts +1 -1
  47. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +5 -3
  48. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
  49. package/speechflow-cli/dst/speechflow-node-t2a-google.js +1 -4
  50. package/speechflow-cli/dst/speechflow-node-t2a-google.js.map +1 -1
  51. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.d.ts +1 -1
  52. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +27 -6
  53. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
  54. package/speechflow-cli/dst/speechflow-node-t2a-openai.js +1 -4
  55. package/speechflow-cli/dst/speechflow-node-t2a-openai.js.map +1 -1
  56. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js +15 -4
  57. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js.map +1 -1
  58. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js +0 -2
  59. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
  60. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
  61. package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
  62. package/speechflow-cli/dst/speechflow-node-t2t-opus.js +18 -16
  63. package/speechflow-cli/dst/speechflow-node-t2t-opus.js.map +1 -1
  64. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js +1 -1
  65. package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js +1 -1
  66. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +5 -2
  67. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  68. package/speechflow-cli/dst/speechflow-node-t2t-summary.js +1 -1
  69. package/speechflow-cli/dst/speechflow-node-t2t-summary.js.map +1 -1
  70. package/speechflow-cli/dst/speechflow-node-x2x-filter.js +2 -0
  71. package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
  72. package/speechflow-cli/dst/speechflow-node-xio-exec.js +1 -0
  73. package/speechflow-cli/dst/speechflow-node-xio-exec.js.map +1 -1
  74. package/speechflow-cli/dst/speechflow-node-xio-file.js +3 -5
  75. package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
  76. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
  77. package/speechflow-cli/dst/speechflow-node-xio-vban.js.map +1 -1
  78. package/speechflow-cli/dst/speechflow-node-xio-webrtc.js +2 -0
  79. package/speechflow-cli/dst/speechflow-node-xio-webrtc.js.map +1 -1
  80. package/speechflow-cli/dst/speechflow-node-xio-websocket.js +9 -9
  81. package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
  82. package/speechflow-cli/dst/speechflow-util-audio.js +4 -0
  83. package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
  84. package/speechflow-cli/dst/speechflow-util-queue.js +2 -1
  85. package/speechflow-cli/dst/speechflow-util-queue.js.map +1 -1
  86. package/speechflow-cli/dst/speechflow-util.js +1 -0
  87. package/speechflow-cli/dst/speechflow-util.js.map +1 -1
  88. package/speechflow-cli/package.json +10 -10
  89. package/speechflow-cli/src/speechflow-main-api.ts +16 -16
  90. package/speechflow-cli/src/speechflow-main-cli.ts +1 -0
  91. package/speechflow-cli/src/speechflow-main-graph.ts +7 -9
  92. package/speechflow-cli/src/speechflow-main-nodes.ts +1 -0
  93. package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +1 -0
  94. package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +8 -10
  95. package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +1 -0
  96. package/speechflow-cli/src/speechflow-node-a2a-expander.ts +9 -10
  97. package/speechflow-cli/src/speechflow-node-a2a-filler.ts +2 -0
  98. package/speechflow-cli/src/speechflow-node-a2a-gender.ts +3 -3
  99. package/speechflow-cli/src/speechflow-node-a2a-meter.ts +2 -2
  100. package/speechflow-cli/src/speechflow-node-a2a-pitch.ts +11 -9
  101. package/speechflow-cli/src/speechflow-node-a2a-rnnoise-wt.ts +1 -0
  102. package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +1 -1
  103. package/speechflow-cli/src/speechflow-node-a2a-speex.ts +5 -3
  104. package/speechflow-cli/src/speechflow-node-a2a-vad.ts +20 -23
  105. package/speechflow-cli/src/speechflow-node-a2a-wav.ts +7 -0
  106. package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +6 -18
  107. package/speechflow-cli/src/speechflow-node-a2t-google.ts +4 -11
  108. package/speechflow-cli/src/speechflow-node-a2t-openai.ts +12 -7
  109. package/speechflow-cli/src/speechflow-node-t2a-amazon.ts +32 -10
  110. package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +6 -4
  111. package/speechflow-cli/src/speechflow-node-t2a-google.ts +1 -4
  112. package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +33 -10
  113. package/speechflow-cli/src/speechflow-node-t2a-openai.ts +1 -4
  114. package/speechflow-cli/src/speechflow-node-t2a-supertonic.ts +15 -6
  115. package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +1 -3
  116. package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +2 -2
  117. package/speechflow-cli/src/speechflow-node-t2t-google.ts +1 -1
  118. package/speechflow-cli/src/speechflow-node-t2t-opus.ts +19 -18
  119. package/speechflow-cli/src/speechflow-node-t2t-punctuation.ts +1 -1
  120. package/speechflow-cli/src/speechflow-node-t2t-spellcheck.ts +1 -1
  121. package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +5 -2
  122. package/speechflow-cli/src/speechflow-node-t2t-summary.ts +1 -1
  123. package/speechflow-cli/src/speechflow-node-x2x-filter.ts +2 -0
  124. package/speechflow-cli/src/speechflow-node-xio-exec.ts +1 -0
  125. package/speechflow-cli/src/speechflow-node-xio-file.ts +3 -5
  126. package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +2 -2
  127. package/speechflow-cli/src/speechflow-node-xio-vban.ts +5 -5
  128. package/speechflow-cli/src/speechflow-node-xio-webrtc.ts +2 -0
  129. package/speechflow-cli/src/speechflow-node-xio-websocket.ts +9 -9
  130. package/speechflow-cli/src/speechflow-util-audio.ts +5 -0
  131. package/speechflow-cli/src/speechflow-util-queue.ts +3 -3
  132. package/speechflow-cli/src/speechflow-util.ts +1 -0
  133. package/speechflow-ui-db/package.json +4 -4
  134. package/speechflow-ui-st/package.json +4 -4
@@ -9,6 +9,7 @@ import Stream from "node:stream"
9
9
 
10
10
  /* external dependencies */
11
11
  import { getStreamAsBuffer } from "get-stream"
12
+ import { Duration } from "luxon"
12
13
  import SpeexResampler from "speex-resampler"
13
14
  import {
14
15
  PollyClient, SynthesizeSpeechCommand,
@@ -25,9 +26,9 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
25
26
  public static name = "t2a-amazon"
26
27
 
27
28
  /* internal state */
28
- private client: PollyClient | null = null
29
- private closing = false
29
+ private client: PollyClient | null = null
30
30
  private resampler: SpeexResampler | null = null
31
+ private closing = false
31
32
 
32
33
  /* construct node */
33
34
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -129,22 +130,43 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
129
130
  }
130
131
  if (Buffer.isBuffer(chunk.payload))
131
132
  callback(new Error("invalid chunk payload type"))
132
- else if (chunk.payload.length > 0) {
133
+ else if (chunk.payload === "")
134
+ callback()
135
+ else {
136
+ let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
137
+ processTimeout = null
138
+ callback(new Error("AWS Polly API timeout"))
139
+ }, 60 * 1000)
140
+ const clearProcessTimeout = () => {
141
+ if (processTimeout !== null) {
142
+ clearTimeout(processTimeout)
143
+ processTimeout = null
144
+ }
145
+ }
133
146
  self.log("debug", `send data (${chunk.payload.length} bytes): "${chunk.payload}"`)
134
147
  textToSpeech(chunk.payload as string).then((buffer) => {
135
- if (self.closing)
136
- throw new Error("stream destroyed during processing")
148
+ if (self.closing) {
149
+ clearProcessTimeout()
150
+ callback(new Error("stream destroyed during processing"))
151
+ return
152
+ }
153
+ /* calculate actual audio duration from PCM buffer size */
154
+ const durationMs = util.audioBufferDuration(buffer,
155
+ self.config.audioSampleRate, self.config.audioBitDepth) * 1000
156
+
157
+ /* create new chunk with recalculated timestamps */
137
158
  const chunkNew = chunk.clone()
138
- chunkNew.type = "audio"
139
- chunkNew.payload = buffer
159
+ chunkNew.type = "audio"
160
+ chunkNew.payload = buffer
161
+ chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
162
+ clearProcessTimeout()
140
163
  this.push(chunkNew)
141
164
  callback()
142
165
  }).catch((error: unknown) => {
143
- callback(util.ensureError(error, "failed to send to AWS Polly"))
166
+ clearProcessTimeout()
167
+ callback(util.ensureError(error, "AWS Polly processing failed"))
144
168
  })
145
169
  }
146
- else
147
- callback()
148
170
  },
149
171
  final (callback) {
150
172
  callback()
@@ -24,8 +24,8 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
24
24
 
25
25
  /* internal state */
26
26
  private elevenlabs: ElevenLabs.ElevenLabsClient | null = null
27
- private closing = false
28
- private resampler: SpeexResampler | null = null
27
+ private resampler: SpeexResampler | null = null
28
+ private closing = false
29
29
 
30
30
  /* construct node */
31
31
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -131,8 +131,8 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
131
131
  })
132
132
  }
133
133
 
134
- /* establish resampler from ElevenLabs's maximum 24Khz
135
- output to our standard audio sample rate (48KHz) */
134
+ /* establish resampler from ElevenLabs's tier-dependent
135
+ output sample rate to our standard audio sample rate (48KHz) */
136
136
  this.resampler = new SpeexResampler(1, maxSampleRate, this.config.audioSampleRate, 7)
137
137
 
138
138
  /* create transform stream and connect it to the ElevenLabs API */
@@ -147,6 +147,8 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
147
147
  callback(new Error("stream already destroyed"))
148
148
  else if (Buffer.isBuffer(chunk.payload))
149
149
  callback(new Error("invalid chunk payload type"))
150
+ else if (chunk.payload === "")
151
+ callback()
150
152
  else {
151
153
  let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
152
154
  processTimeout = null
@@ -126,11 +126,8 @@ export default class SpeechFlowNodeT2AGoogle extends SpeechFlowNode {
126
126
  callback(new Error("stream already destroyed"))
127
127
  else if (Buffer.isBuffer(chunk.payload))
128
128
  callback(new Error("invalid chunk payload type"))
129
- else if (chunk.payload === "") {
130
- /* pass through empty chunks */
131
- this.push(chunk)
129
+ else if (chunk.payload === "")
132
130
  callback()
133
- }
134
131
  else {
135
132
  let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
136
133
  processTimeout = null
@@ -9,6 +9,7 @@ import Stream from "node:stream"
9
9
 
10
10
  /* external dependencies */
11
11
  import { KokoroTTS } from "kokoro-js"
12
+ import { Duration } from "luxon"
12
13
  import SpeexResampler from "speex-resampler"
13
14
 
14
15
  /* internal dependencies */
@@ -21,9 +22,9 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
21
22
  public static name = "t2a-kokoro"
22
23
 
23
24
  /* internal state */
24
- private kokoro: KokoroTTS | null = null
25
- private closing = false
25
+ private kokoro: KokoroTTS | null = null
26
26
  private resampler: SpeexResampler | null = null
27
+ private closing = false
27
28
 
28
29
  /* construct node */
29
30
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -122,9 +123,7 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
122
123
  }
123
124
 
124
125
  /* resample audio samples from PCM/I16/24Khz to PCM/I16/48KHz */
125
- const buffer2 = this.resampler!.processChunk(buffer1)
126
-
127
- return buffer2
126
+ return this.resampler!.processChunk(buffer1)
128
127
  }
129
128
 
130
129
  /* create transform stream and connect it to the Kokoro API */
@@ -139,18 +138,42 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
139
138
  callback(new Error("stream already destroyed"))
140
139
  else if (Buffer.isBuffer(chunk.payload))
141
140
  callback(new Error("invalid chunk payload type"))
141
+ else if (chunk.payload === "")
142
+ callback()
142
143
  else {
144
+ let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
145
+ processTimeout = null
146
+ callback(new Error("Kokoro TTS timeout"))
147
+ }, 60 * 1000)
148
+ const clearProcessTimeout = () => {
149
+ if (processTimeout !== null) {
150
+ clearTimeout(processTimeout)
151
+ processTimeout = null
152
+ }
153
+ }
143
154
  text2speech(chunk.payload).then((buffer) => {
144
- if (self.closing)
145
- throw new Error("stream destroyed during processing")
155
+ if (self.closing) {
156
+ clearProcessTimeout()
157
+ callback(new Error("stream destroyed during processing"))
158
+ return
159
+ }
146
160
  self.log("info", `Kokoro: received audio (buffer length: ${buffer.byteLength})`)
161
+
162
+ /* calculate actual audio duration from PCM buffer size */
163
+ const durationMs = util.audioBufferDuration(buffer,
164
+ self.config.audioSampleRate, self.config.audioBitDepth) * 1000
165
+
166
+ /* create new chunk with recalculated timestamps */
147
167
  const chunkNew = chunk.clone()
148
- chunkNew.type = "audio"
149
- chunkNew.payload = buffer
168
+ chunkNew.type = "audio"
169
+ chunkNew.payload = buffer
170
+ chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
171
+ clearProcessTimeout()
150
172
  this.push(chunkNew)
151
173
  callback()
152
174
  }).catch((error: unknown) => {
153
- callback(util.ensureError(error))
175
+ clearProcessTimeout()
176
+ callback(util.ensureError(error, "Kokoro processing failed"))
154
177
  })
155
178
  }
156
179
  },
@@ -103,11 +103,8 @@ export default class SpeechFlowNodeT2AOpenAI extends SpeechFlowNode {
103
103
  callback(new Error("stream already destroyed"))
104
104
  else if (Buffer.isBuffer(chunk.payload))
105
105
  callback(new Error("invalid chunk payload type"))
106
- else if (chunk.payload === "") {
107
- /* pass through empty chunks */
108
- this.push(chunk)
106
+ else if (chunk.payload === "")
109
107
  callback()
110
- }
111
108
  else {
112
109
  let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
113
110
  processTimeout = null
@@ -131,6 +131,7 @@ function chunkText (text: string, maxLen = 300): string[] {
131
131
  class SupertonicTextProcessor {
132
132
  private indexer: Record<number, number>
133
133
 
134
+ /* construct text processor */
134
135
  constructor (unicodeIndexerJsonPath: string) {
135
136
  /* load and parse unicode indexer JSON */
136
137
  try {
@@ -141,6 +142,7 @@ class SupertonicTextProcessor {
141
142
  }
142
143
  }
143
144
 
145
+ /* preprocess text */
144
146
  private preprocessText (text: string): string {
145
147
  /* normalize text */
146
148
  text = text.normalize("NFKD")
@@ -211,11 +213,13 @@ class SupertonicTextProcessor {
211
213
  return text
212
214
  }
213
215
 
216
+ /* convert text to Unicode values */
214
217
  private textToUnicodeValues (text: string): number[] {
215
218
  /* convert text characters to unicode code points */
216
219
  return Array.from(text).map((char) => char.charCodeAt(0))
217
220
  }
218
221
 
222
+ /* process text list */
219
223
  call (textList: string[]): { textIds: number[][], textMask: number[][][] } {
220
224
  /* handle empty input */
221
225
  if (textList.length === 0)
@@ -246,6 +250,7 @@ class SupertonicTextProcessor {
246
250
  class SupertonicTTS {
247
251
  public sampleRate: number
248
252
 
253
+ /* internal TTS state */
249
254
  private cfgs: SupertonicConfig
250
255
  private textProcessor: SupertonicTextProcessor
251
256
  private dpOrt: ORT.InferenceSession
@@ -256,6 +261,7 @@ class SupertonicTTS {
256
261
  private chunkCompressFactor: number
257
262
  private latentDim: number
258
263
 
264
+ /* construct TTS engine */
259
265
  constructor (
260
266
  cfgs: SupertonicConfig,
261
267
  textProcessor: SupertonicTextProcessor,
@@ -279,6 +285,7 @@ class SupertonicTTS {
279
285
  this.latentDim = cfgs.ttl.latent_dim
280
286
  }
281
287
 
288
+ /* sample noisy latent vectors */
282
289
  private sampleNoisyLatent (duration: number[]): { noisyLatent: number[][][], latentMask: number[][][] } {
283
290
  /* calculate dimensions for latent space */
284
291
  const wavLenMax = Math.max(...duration) * this.sampleRate
@@ -294,7 +301,6 @@ class SupertonicTTS {
294
301
  for (let d = 0; d < latentDimExpanded; d++) {
295
302
  const row: number[] = Array.from({ length: latentLen })
296
303
  for (let t = 0; t < latentLen; t++) {
297
-
298
304
  /* Box-Muller transform for normal distribution */
299
305
  const eps = 1e-10
300
306
  const u1 = Math.max(eps, Math.random())
@@ -317,6 +323,7 @@ class SupertonicTTS {
317
323
  return { noisyLatent, latentMask }
318
324
  }
319
325
 
326
+ /* perform inference */
320
327
  private async infer (textList: string[], style: SupertonicStyle, totalStep: number, speed: number): Promise<{ wav: number[], duration: number[] }> {
321
328
  /* validate batch size matches style vectors */
322
329
  if (textList.length !== style.ttl.dims[0])
@@ -392,6 +399,7 @@ class SupertonicTTS {
392
399
  return { wav, duration: predictedDurations }
393
400
  }
394
401
 
402
+ /* synthesize speech from text */
395
403
  async synthesize (text: string, style: SupertonicStyle, totalStep: number, speed: number, silenceDuration = 0.3): Promise<{ wav: number[], duration: number }> {
396
404
  /* validate single speaker mode */
397
405
  if (style.ttl.dims[0] !== 1)
@@ -420,6 +428,7 @@ class SupertonicTTS {
420
428
  return { wav: wavParts.flat(), duration: totalDuration }
421
429
  }
422
430
 
431
+ /* release TTS engine resources */
423
432
  async release (): Promise<void> {
424
433
  /* release all ONNX inference sessions */
425
434
  await Promise.all([
@@ -535,7 +544,7 @@ export default class SpeechFlowNodeT2ASupertonic extends SpeechFlowNode {
535
544
  "onnx/text_encoder.onnx",
536
545
  "onnx/unicode_indexer.json",
537
546
  "onnx/vector_estimator.onnx",
538
- "onnx/vocoder.onnx",
547
+ "onnx/vocoder.onnx"
539
548
  ]
540
549
 
541
550
  /* create asset directories */
@@ -602,9 +611,8 @@ export default class SpeechFlowNodeT2ASupertonic extends SpeechFlowNode {
602
611
  buffer1.writeInt16LE(sample * 0x7FFF, i * 2)
603
612
  }
604
613
 
605
- /* resample audio samples from 44.1kHz to 48kHz */
606
- const buffer2 = this.resampler!.processChunk(buffer1)
607
- return buffer2
614
+ /* resample audio samples from Supertonic sample rate to 48kHz */
615
+ return this.resampler!.processChunk(buffer1)
608
616
  }
609
617
 
610
618
  /* create transform stream and connect it to the Supertonic TTS */
@@ -619,6 +627,8 @@ export default class SpeechFlowNodeT2ASupertonic extends SpeechFlowNode {
619
627
  callback(new Error("stream already destroyed"))
620
628
  else if (Buffer.isBuffer(chunk.payload))
621
629
  callback(new Error("invalid chunk payload type"))
630
+ else if (chunk.payload === "")
631
+ callback()
622
632
  else {
623
633
  let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
624
634
  processTimeout = null
@@ -660,7 +670,6 @@ export default class SpeechFlowNodeT2ASupertonic extends SpeechFlowNode {
660
670
  callback()
661
671
  }
662
672
  catch (error) {
663
-
664
673
  /* handle processing errors */
665
674
  clearProcessTimeout()
666
675
  callback(util.ensureError(error, "Supertonic processing failed"))
@@ -5,7 +5,7 @@
5
5
  */
6
6
 
7
7
  /* standard dependencies */
8
- import Stream from "node:stream"
8
+ import Stream from "node:stream"
9
9
 
10
10
  /* external dependencies */
11
11
  import { TranslateClient, TranslateTextCommand } from "@aws-sdk/client-translate"
@@ -65,8 +65,6 @@ export default class SpeechFlowNodeT2TAmazon extends SpeechFlowNode {
65
65
  secretAccessKey: this.params.secKey
66
66
  }
67
67
  })
68
- if (this.client === null)
69
- throw new Error("failed to establish Amazon Translate client")
70
68
 
71
69
  /* provide text-to-text translation */
72
70
  const maxRetries = 10
@@ -5,10 +5,10 @@
5
5
  */
6
6
 
7
7
  /* standard dependencies */
8
- import Stream from "node:stream"
8
+ import Stream from "node:stream"
9
9
 
10
10
  /* external dependencies */
11
- import * as DeepL from "deepl-node"
11
+ import * as DeepL from "deepl-node"
12
12
 
13
13
  /* internal dependencies */
14
14
  import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
@@ -5,7 +5,7 @@
5
5
  */
6
6
 
7
7
  /* standard dependencies */
8
- import Stream from "node:stream"
8
+ import Stream from "node:stream"
9
9
 
10
10
  /* external dependencies */
11
11
  import { TranslationServiceClient } from "@google-cloud/translate"
@@ -68,18 +68,19 @@ export default class SpeechFlowNodeT2TOPUS extends SpeechFlowNode {
68
68
  }, 1000)
69
69
 
70
70
  /* instantiate Transformers engine and model */
71
- const pipeline = Transformers.pipeline("translation", model, {
72
- cache_dir: path.join(this.config.cacheDir, "transformers"),
73
- dtype: "q4",
74
- device: "auto",
75
- progress_callback: progressCallback
76
- })
77
- this.translator = await pipeline
78
- if (this.translator === null)
79
- throw new Error("failed to instantiate translator pipeline")
80
-
81
- /* clear progress interval again */
82
- clearInterval(interval)
71
+ try {
72
+ const pipeline = Transformers.pipeline("translation", model, {
73
+ cache_dir: path.join(this.config.cacheDir, "transformers"),
74
+ dtype: "q4",
75
+ device: "auto",
76
+ progress_callback: progressCallback
77
+ })
78
+ this.translator = await pipeline
79
+ }
80
+ finally {
81
+ /* clear progress interval again */
82
+ clearInterval(interval)
83
+ }
83
84
 
84
85
  /* provide text-to-text translation */
85
86
  const translate = async (text: string) => {
@@ -120,17 +121,17 @@ export default class SpeechFlowNodeT2TOPUS extends SpeechFlowNode {
120
121
 
121
122
  /* close node */
122
123
  async close () {
123
- /* shutdown stream */
124
- if (this.stream !== null) {
125
- await util.destroyStream(this.stream)
126
- this.stream = null
127
- }
128
-
129
124
  /* shutdown Transformers */
130
125
  if (this.translator !== null) {
131
126
  this.translator.dispose()
132
127
  this.translator = null
133
128
  }
129
+
130
+ /* shutdown stream */
131
+ if (this.stream !== null) {
132
+ await util.destroyStream(this.stream)
133
+ this.stream = null
134
+ }
134
135
  }
135
136
  }
136
137
 
@@ -74,7 +74,7 @@ export default class SpeechFlowNodeT2TPunctuation extends SpeechFlowNode {
74
74
  "Gib KEINE Erklärungen.\n" +
75
75
  "Gib KEINE Einleitung.\n" +
76
76
  "Gib KEINE Kommentare.\n" +
77
- "Gib KEINE Preamble.\n" +
77
+ "Gib KEINE Präambel.\n" +
78
78
  "Gib KEINEN Prolog.\n" +
79
79
  "Gib KEINEN Epilog.\n" +
80
80
  "Ändere NICHT die Wörter.\n" +
@@ -66,7 +66,7 @@ export default class SpeechFlowNodeT2TSpellcheck extends SpeechFlowNode {
66
66
  "Gib KEINE Erklärungen.\n" +
67
67
  "Gib KEINE Einleitung.\n" +
68
68
  "Gib KEINE Kommentare.\n" +
69
- "Gib KEINE Preamble.\n" +
69
+ "Gib KEINE Präambel.\n" +
70
70
  "Gib KEINEN Prolog.\n" +
71
71
  "Gib KEINEN Epilog.\n" +
72
72
  "Ändere NICHT die Grammatik.\n" +
@@ -191,7 +191,7 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
191
191
  for (const block of blocks) {
192
192
  const lines = block.trim().split(/\r?\n/)
193
193
  if (lines.length < 2) {
194
- this.log("warning", "SRT block contains less than 2 lines")
194
+ this.log("warning", "SRT block contains fewer than 2 lines")
195
195
  continue
196
196
  }
197
197
 
@@ -231,7 +231,7 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
231
231
  for (const block of blocks) {
232
232
  const lines = block.trim().split(/\r?\n/)
233
233
  if (lines.length < 1) {
234
- this.log("warning", "VTT block contains less than 1 line")
234
+ this.log("warning", "VTT block contains fewer than 1 line")
235
235
  continue
236
236
  }
237
237
 
@@ -394,15 +394,18 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
394
394
  h.response({}).code(204)
395
395
  })
396
396
 
397
+ /* start HAPI server */
397
398
  await this.hapi.start()
398
399
  this.log("info", `HAPI: started REST/WebSocket network service: http://${this.params.addr}:${this.params.port}`)
399
400
 
401
+ /* helper to emit chunks to WebSocket peers */
400
402
  const emit = (chunk: SpeechFlowChunk) => {
401
403
  const data = JSON.stringify(chunk)
402
404
  for (const info of wsPeers.values())
403
405
  info.ws.send(data)
404
406
  }
405
407
 
408
+ /* establish writable stream */
406
409
  this.stream = new Stream.Writable({
407
410
  objectMode: true,
408
411
  decodeStrings: false,
@@ -60,7 +60,7 @@ export default class SpeechFlowNodeT2TSummary extends SpeechFlowNode {
60
60
  "Gib KEINE Erklärungen.\n" +
61
61
  "Gib KEINE Einleitung.\n" +
62
62
  "Gib KEINE Kommentare.\n" +
63
- "Gib KEINE Prämbel.\n" +
63
+ "Gib KEINE Präambel.\n" +
64
64
  "Gib KEINEN Prolog.\n" +
65
65
  "Gib KEINEN Epilog.\n" +
66
66
  "Komme auf den Punkt.\n" +
@@ -120,6 +120,8 @@ export default class SpeechFlowNodeX2XFilter extends SpeechFlowNode {
120
120
  val1 = chunk.timestampStart.toMillis()
121
121
  else if (self.params.var === "time:end")
122
122
  val1 = chunk.timestampEnd.toMillis()
123
+ else
124
+ val1 = undefined
123
125
  if (comparison(val1, self.params.op, val2)) {
124
126
  self.log("info", `[${self.params.name}]: passing through ${chunk.type} chunk`)
125
127
  this.push(chunk)
@@ -198,6 +198,7 @@ export default class SpeechFlowNodeXIOExec extends SpeechFlowNode {
198
198
  this.subprocess.removeAllListeners("error")
199
199
  this.subprocess.removeAllListeners("exit")
200
200
 
201
+ /* clear subprocess reference */
201
202
  this.subprocess = null
202
203
  }
203
204
 
@@ -54,7 +54,7 @@ export default class SpeechFlowNodeXIOFile extends SpeechFlowNode {
54
54
  /* open node */
55
55
  async open () {
56
56
  /* determine how many bytes we need per chunk when
57
- the chunk should be of the required duration/size */
57
+ the chunk should be of the required duration/size */
58
58
  const highWaterMarkAudio = (
59
59
  this.config.audioSampleRate *
60
60
  (this.config.audioBitDepth / 8)
@@ -139,11 +139,10 @@ export default class SpeechFlowNodeXIOFile extends SpeechFlowNode {
139
139
  const payload = Buffer.isBuffer(chunk.payload) ?
140
140
  chunk.payload : Buffer.from(chunk.payload)
141
141
  const seekPosition = chunk.meta.get("chunk:seek") as number | undefined
142
- if (seekPosition !== undefined) {
142
+ if (seekPosition !== undefined)
143
143
  /* seek to specified position and write (overload) */
144
144
  fs.write(self.fd!, payload, 0, payload.byteLength, seekPosition, callback)
145
- }
146
- else {
145
+ else
147
146
  /* append at current position */
148
147
  fs.write(self.fd!, payload, 0, payload.byteLength, writePosition, (err) => {
149
148
  if (err)
@@ -153,7 +152,6 @@ export default class SpeechFlowNodeXIOFile extends SpeechFlowNode {
153
152
  callback()
154
153
  }
155
154
  })
156
- }
157
155
  },
158
156
  final (callback) {
159
157
  callback()
@@ -21,8 +21,8 @@ export default class SpeechFlowNodeXIOMQTT extends SpeechFlowNode {
21
21
  public static name = "xio-mqtt"
22
22
 
23
23
  /* internal state */
24
- private broker: MQTT.MqttClient | null = null
25
- private clientId: string = (new UUID(1)).format()
24
+ private broker: MQTT.MqttClient | null = null
25
+ private clientId: string = (new UUID(1)).format()
26
26
  private chunkQueue: util.SingleQueue<SpeechFlowChunk> | null = null
27
27
 
28
28
  /* construct node */
@@ -29,11 +29,11 @@ export default class SpeechFlowNodeXIOVBAN extends SpeechFlowNode {
29
29
  public static name = "xio-vban"
30
30
 
31
31
  /* internal state */
32
- private server: VBANServer | null = null
33
- private chunkQueue: util.SingleQueue<SpeechFlowChunk> | null = null
34
- private frameCounter = 0
35
- private targetAddress = ""
36
- private targetPort = 0
32
+ private server: VBANServer | null = null
33
+ private chunkQueue: util.SingleQueue<SpeechFlowChunk> | null = null
34
+ private frameCounter = 0
35
+ private targetAddress = ""
36
+ private targetPort = 0
37
37
 
38
38
  /* construct node */
39
39
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -154,6 +154,7 @@ export default class SpeechFlowNodeXIOWebRTC extends SpeechFlowNode {
154
154
  this.pcmBuffer = this.pcmBuffer.subarray(this.pcmBuffer.length - maxBufferSize)
155
155
  }
156
156
 
157
+ /* process full Opus frames from buffer */
157
158
  while (this.pcmBuffer.length >= this.OPUS_FRAME_BYTES) {
158
159
  const frame = this.pcmBuffer.subarray(0, this.OPUS_FRAME_BYTES)
159
160
  this.pcmBuffer = this.pcmBuffer.subarray(this.OPUS_FRAME_BYTES)
@@ -418,6 +419,7 @@ export default class SpeechFlowNodeXIOWebRTC extends SpeechFlowNode {
418
419
  const isPublisher = hasSendonly || hasSendrecv
419
420
  const isViewer = hasRecvonly
420
421
 
422
+ /* handle protocol based on mode */
421
423
  if (self.params.mode === "r" && isPublisher)
422
424
  /* in read mode, accept WHIP publishers */
423
425
  await self.handleWHIP(res, body)