speechflow 1.7.1 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/CHANGELOG.md +24 -0
  2. package/README.md +388 -120
  3. package/etc/claude.md +5 -5
  4. package/etc/speechflow.yaml +2 -2
  5. package/package.json +3 -3
  6. package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
  7. package/speechflow-cli/dst/speechflow-main-cli.js +1 -0
  8. package/speechflow-cli/dst/speechflow-main-cli.js.map +1 -1
  9. package/speechflow-cli/dst/speechflow-main-graph.d.ts +1 -0
  10. package/speechflow-cli/dst/speechflow-main-graph.js +30 -9
  11. package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
  12. package/speechflow-cli/dst/speechflow-main-nodes.js +1 -0
  13. package/speechflow-cli/dst/speechflow-main-nodes.js.map +1 -1
  14. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +1 -0
  15. package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -1
  16. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +7 -9
  17. package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
  18. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +1 -0
  19. package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js.map +1 -1
  20. package/speechflow-cli/dst/speechflow-node-a2a-expander.js +8 -9
  21. package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -1
  22. package/speechflow-cli/dst/speechflow-node-a2a-filler.js +2 -0
  23. package/speechflow-cli/dst/speechflow-node-a2a-filler.js.map +1 -1
  24. package/speechflow-cli/dst/speechflow-node-a2a-gender.js +1 -1
  25. package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
  26. package/speechflow-cli/dst/speechflow-node-a2a-meter.js +1 -1
  27. package/speechflow-cli/dst/speechflow-node-a2a-pitch.js +11 -9
  28. package/speechflow-cli/dst/speechflow-node-a2a-pitch.js.map +1 -1
  29. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js +1 -0
  30. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise-wt.js.map +1 -1
  31. package/speechflow-cli/dst/speechflow-node-a2a-rnnoise.js.map +1 -1
  32. package/speechflow-cli/dst/speechflow-node-a2a-speex.js +4 -2
  33. package/speechflow-cli/dst/speechflow-node-a2a-speex.js.map +1 -1
  34. package/speechflow-cli/dst/speechflow-node-a2a-vad.js +19 -22
  35. package/speechflow-cli/dst/speechflow-node-a2a-vad.js.map +1 -1
  36. package/speechflow-cli/dst/speechflow-node-a2a-wav.js +31 -4
  37. package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
  38. package/speechflow-cli/dst/speechflow-node-a2t-amazon.d.ts +0 -1
  39. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +2 -11
  40. package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
  41. package/speechflow-cli/dst/speechflow-node-a2t-google.d.ts +16 -0
  42. package/speechflow-cli/dst/speechflow-node-a2t-google.js +314 -0
  43. package/speechflow-cli/dst/speechflow-node-a2t-google.js.map +1 -0
  44. package/speechflow-cli/dst/speechflow-node-a2t-openai.js +6 -1
  45. package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
  46. package/speechflow-cli/dst/speechflow-node-t2a-amazon.d.ts +1 -1
  47. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +27 -7
  48. package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -1
  49. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.d.ts +1 -1
  50. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +5 -3
  51. package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
  52. package/speechflow-cli/dst/speechflow-node-t2a-google.d.ts +15 -0
  53. package/speechflow-cli/dst/speechflow-node-t2a-google.js +215 -0
  54. package/speechflow-cli/dst/speechflow-node-t2a-google.js.map +1 -0
  55. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.d.ts +1 -1
  56. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +27 -6
  57. package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
  58. package/speechflow-cli/dst/speechflow-node-t2a-openai.d.ts +15 -0
  59. package/speechflow-cli/dst/speechflow-node-t2a-openai.js +192 -0
  60. package/speechflow-cli/dst/speechflow-node-t2a-openai.js.map +1 -0
  61. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.d.ts +17 -0
  62. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js +619 -0
  63. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js.map +1 -0
  64. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js +0 -2
  65. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
  66. package/speechflow-cli/dst/speechflow-node-t2t-deepl.js.map +1 -1
  67. package/speechflow-cli/dst/speechflow-node-t2t-google.js.map +1 -1
  68. package/speechflow-cli/dst/{speechflow-node-t2t-transformers.d.ts → speechflow-node-t2t-opus.d.ts} +1 -3
  69. package/speechflow-cli/dst/speechflow-node-t2t-opus.js +161 -0
  70. package/speechflow-cli/dst/speechflow-node-t2t-opus.js.map +1 -0
  71. package/speechflow-cli/dst/speechflow-node-t2t-profanity.d.ts +11 -0
  72. package/speechflow-cli/dst/speechflow-node-t2t-profanity.js +118 -0
  73. package/speechflow-cli/dst/speechflow-node-t2t-profanity.js.map +1 -0
  74. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.d.ts +13 -0
  75. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js +220 -0
  76. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js.map +1 -0
  77. package/speechflow-cli/dst/{speechflow-node-t2t-openai.d.ts → speechflow-node-t2t-spellcheck.d.ts} +2 -2
  78. package/speechflow-cli/dst/{speechflow-node-t2t-openai.js → speechflow-node-t2t-spellcheck.js} +48 -100
  79. package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js.map +1 -0
  80. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +8 -8
  81. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  82. package/speechflow-cli/dst/speechflow-node-t2t-summary.d.ts +16 -0
  83. package/speechflow-cli/dst/speechflow-node-t2t-summary.js +241 -0
  84. package/speechflow-cli/dst/speechflow-node-t2t-summary.js.map +1 -0
  85. package/speechflow-cli/dst/{speechflow-node-t2t-ollama.d.ts → speechflow-node-t2t-translate.d.ts} +2 -2
  86. package/speechflow-cli/dst/{speechflow-node-t2t-transformers.js → speechflow-node-t2t-translate.js} +53 -115
  87. package/speechflow-cli/dst/speechflow-node-t2t-translate.js.map +1 -0
  88. package/speechflow-cli/dst/speechflow-node-x2x-filter.js +2 -0
  89. package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
  90. package/speechflow-cli/dst/speechflow-node-xio-exec.d.ts +12 -0
  91. package/speechflow-cli/dst/speechflow-node-xio-exec.js +224 -0
  92. package/speechflow-cli/dst/speechflow-node-xio-exec.js.map +1 -0
  93. package/speechflow-cli/dst/speechflow-node-xio-file.d.ts +1 -0
  94. package/speechflow-cli/dst/speechflow-node-xio-file.js +78 -67
  95. package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
  96. package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
  97. package/speechflow-cli/dst/speechflow-node-xio-vban.d.ts +17 -0
  98. package/speechflow-cli/dst/speechflow-node-xio-vban.js +330 -0
  99. package/speechflow-cli/dst/speechflow-node-xio-vban.js.map +1 -0
  100. package/speechflow-cli/dst/speechflow-node-xio-webrtc.d.ts +39 -0
  101. package/speechflow-cli/dst/speechflow-node-xio-webrtc.js +502 -0
  102. package/speechflow-cli/dst/speechflow-node-xio-webrtc.js.map +1 -0
  103. package/speechflow-cli/dst/speechflow-node-xio-websocket.js +9 -9
  104. package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
  105. package/speechflow-cli/dst/speechflow-util-audio.js +8 -5
  106. package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
  107. package/speechflow-cli/dst/speechflow-util-error.d.ts +1 -0
  108. package/speechflow-cli/dst/speechflow-util-error.js +5 -0
  109. package/speechflow-cli/dst/speechflow-util-error.js.map +1 -1
  110. package/speechflow-cli/dst/speechflow-util-llm.d.ts +35 -0
  111. package/speechflow-cli/dst/speechflow-util-llm.js +363 -0
  112. package/speechflow-cli/dst/speechflow-util-llm.js.map +1 -0
  113. package/speechflow-cli/dst/speechflow-util-queue.js +2 -1
  114. package/speechflow-cli/dst/speechflow-util-queue.js.map +1 -1
  115. package/speechflow-cli/dst/speechflow-util.d.ts +1 -0
  116. package/speechflow-cli/dst/speechflow-util.js +2 -0
  117. package/speechflow-cli/dst/speechflow-util.js.map +1 -1
  118. package/speechflow-cli/etc/oxlint.jsonc +2 -1
  119. package/speechflow-cli/package.json +35 -18
  120. package/speechflow-cli/src/lib.d.ts +5 -0
  121. package/speechflow-cli/src/speechflow-main-api.ts +16 -16
  122. package/speechflow-cli/src/speechflow-main-cli.ts +1 -0
  123. package/speechflow-cli/src/speechflow-main-graph.ts +38 -14
  124. package/speechflow-cli/src/speechflow-main-nodes.ts +1 -0
  125. package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +1 -0
  126. package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +8 -10
  127. package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +1 -0
  128. package/speechflow-cli/src/speechflow-node-a2a-expander.ts +9 -10
  129. package/speechflow-cli/src/speechflow-node-a2a-filler.ts +2 -0
  130. package/speechflow-cli/src/speechflow-node-a2a-gender.ts +3 -3
  131. package/speechflow-cli/src/speechflow-node-a2a-meter.ts +2 -2
  132. package/speechflow-cli/src/speechflow-node-a2a-pitch.ts +11 -9
  133. package/speechflow-cli/src/speechflow-node-a2a-rnnoise-wt.ts +1 -0
  134. package/speechflow-cli/src/speechflow-node-a2a-rnnoise.ts +1 -1
  135. package/speechflow-cli/src/speechflow-node-a2a-speex.ts +5 -3
  136. package/speechflow-cli/src/speechflow-node-a2a-vad.ts +20 -23
  137. package/speechflow-cli/src/speechflow-node-a2a-wav.ts +31 -4
  138. package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +6 -18
  139. package/speechflow-cli/src/speechflow-node-a2t-google.ts +315 -0
  140. package/speechflow-cli/src/speechflow-node-a2t-openai.ts +12 -7
  141. package/speechflow-cli/src/speechflow-node-t2a-amazon.ts +32 -10
  142. package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +6 -4
  143. package/speechflow-cli/src/speechflow-node-t2a-google.ts +203 -0
  144. package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +33 -10
  145. package/speechflow-cli/src/speechflow-node-t2a-openai.ts +176 -0
  146. package/speechflow-cli/src/speechflow-node-t2a-supertonic.ts +710 -0
  147. package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +3 -4
  148. package/speechflow-cli/src/speechflow-node-t2t-deepl.ts +2 -2
  149. package/speechflow-cli/src/speechflow-node-t2t-google.ts +1 -1
  150. package/speechflow-cli/src/speechflow-node-t2t-opus.ts +137 -0
  151. package/speechflow-cli/src/speechflow-node-t2t-profanity.ts +93 -0
  152. package/speechflow-cli/src/speechflow-node-t2t-punctuation.ts +201 -0
  153. package/speechflow-cli/src/speechflow-node-t2t-spellcheck.ts +188 -0
  154. package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +8 -8
  155. package/speechflow-cli/src/speechflow-node-t2t-summary.ts +229 -0
  156. package/speechflow-cli/src/speechflow-node-t2t-translate.ts +181 -0
  157. package/speechflow-cli/src/speechflow-node-x2x-filter.ts +2 -0
  158. package/speechflow-cli/src/speechflow-node-xio-exec.ts +211 -0
  159. package/speechflow-cli/src/speechflow-node-xio-file.ts +91 -80
  160. package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +2 -2
  161. package/speechflow-cli/src/speechflow-node-xio-vban.ts +325 -0
  162. package/speechflow-cli/src/speechflow-node-xio-webrtc.ts +535 -0
  163. package/speechflow-cli/src/speechflow-node-xio-websocket.ts +9 -9
  164. package/speechflow-cli/src/speechflow-util-audio.ts +10 -5
  165. package/speechflow-cli/src/speechflow-util-error.ts +9 -0
  166. package/speechflow-cli/src/speechflow-util-llm.ts +367 -0
  167. package/speechflow-cli/src/speechflow-util-queue.ts +3 -3
  168. package/speechflow-cli/src/speechflow-util.ts +2 -0
  169. package/speechflow-ui-db/package.json +9 -9
  170. package/speechflow-ui-st/package.json +9 -9
  171. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +0 -293
  172. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +0 -1
  173. package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +0 -1
  174. package/speechflow-cli/dst/speechflow-node-t2t-transformers.js.map +0 -1
  175. package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +0 -281
  176. package/speechflow-cli/src/speechflow-node-t2t-openai.ts +0 -247
  177. package/speechflow-cli/src/speechflow-node-t2t-transformers.ts +0 -247
@@ -9,6 +9,7 @@ import Stream from "node:stream"
9
9
 
10
10
  /* external dependencies */
11
11
  import { getStreamAsBuffer } from "get-stream"
12
+ import { Duration } from "luxon"
12
13
  import SpeexResampler from "speex-resampler"
13
14
  import {
14
15
  PollyClient, SynthesizeSpeechCommand,
@@ -25,9 +26,9 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
25
26
  public static name = "t2a-amazon"
26
27
 
27
28
  /* internal state */
28
- private client: PollyClient | null = null
29
- private closing = false
29
+ private client: PollyClient | null = null
30
30
  private resampler: SpeexResampler | null = null
31
+ private closing = false
31
32
 
32
33
  /* construct node */
33
34
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -129,22 +130,43 @@ export default class SpeechFlowNodeT2AAmazon extends SpeechFlowNode {
129
130
  }
130
131
  if (Buffer.isBuffer(chunk.payload))
131
132
  callback(new Error("invalid chunk payload type"))
132
- else if (chunk.payload.length > 0) {
133
+ else if (chunk.payload === "")
134
+ callback()
135
+ else {
136
+ let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
137
+ processTimeout = null
138
+ callback(new Error("AWS Polly API timeout"))
139
+ }, 60 * 1000)
140
+ const clearProcessTimeout = () => {
141
+ if (processTimeout !== null) {
142
+ clearTimeout(processTimeout)
143
+ processTimeout = null
144
+ }
145
+ }
133
146
  self.log("debug", `send data (${chunk.payload.length} bytes): "${chunk.payload}"`)
134
147
  textToSpeech(chunk.payload as string).then((buffer) => {
135
- if (self.closing)
136
- throw new Error("stream destroyed during processing")
148
+ if (self.closing) {
149
+ clearProcessTimeout()
150
+ callback(new Error("stream destroyed during processing"))
151
+ return
152
+ }
153
+ /* calculate actual audio duration from PCM buffer size */
154
+ const durationMs = util.audioBufferDuration(buffer,
155
+ self.config.audioSampleRate, self.config.audioBitDepth) * 1000
156
+
157
+ /* create new chunk with recalculated timestamps */
137
158
  const chunkNew = chunk.clone()
138
- chunkNew.type = "audio"
139
- chunkNew.payload = buffer
159
+ chunkNew.type = "audio"
160
+ chunkNew.payload = buffer
161
+ chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
162
+ clearProcessTimeout()
140
163
  this.push(chunkNew)
141
164
  callback()
142
165
  }).catch((error: unknown) => {
143
- callback(util.ensureError(error, "failed to send to AWS Polly"))
166
+ clearProcessTimeout()
167
+ callback(util.ensureError(error, "AWS Polly processing failed"))
144
168
  })
145
169
  }
146
- else
147
- callback()
148
170
  },
149
171
  final (callback) {
150
172
  callback()
@@ -24,8 +24,8 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
24
24
 
25
25
  /* internal state */
26
26
  private elevenlabs: ElevenLabs.ElevenLabsClient | null = null
27
- private closing = false
28
- private resampler: SpeexResampler | null = null
27
+ private resampler: SpeexResampler | null = null
28
+ private closing = false
29
29
 
30
30
  /* construct node */
31
31
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -131,8 +131,8 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
131
131
  })
132
132
  }
133
133
 
134
- /* establish resampler from ElevenLabs's maximum 24Khz
135
- output to our standard audio sample rate (48KHz) */
134
+ /* establish resampler from ElevenLabs's tier-dependent
135
+ output sample rate to our standard audio sample rate (48KHz) */
136
136
  this.resampler = new SpeexResampler(1, maxSampleRate, this.config.audioSampleRate, 7)
137
137
 
138
138
  /* create transform stream and connect it to the ElevenLabs API */
@@ -147,6 +147,8 @@ export default class SpeechFlowNodeT2AElevenlabs extends SpeechFlowNode {
147
147
  callback(new Error("stream already destroyed"))
148
148
  else if (Buffer.isBuffer(chunk.payload))
149
149
  callback(new Error("invalid chunk payload type"))
150
+ else if (chunk.payload === "")
151
+ callback()
150
152
  else {
151
153
  let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
152
154
  processTimeout = null
@@ -0,0 +1,203 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import Stream from "node:stream"
9
+
10
+ /* external dependencies */
11
+ import * as GoogleTTS from "@google-cloud/text-to-speech"
12
+ import { Duration } from "luxon"
13
+ import SpeexResampler from "speex-resampler"
14
+ import * as arktype from "arktype"
15
+
16
+ /* internal dependencies */
17
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
18
+ import * as util from "./speechflow-util"
19
+
20
+ /* SpeechFlow node for Google Cloud text-to-speech conversion */
21
+ export default class SpeechFlowNodeT2AGoogle extends SpeechFlowNode {
22
+ /* declare official node name */
23
+ public static name = "t2a-google"
24
+
25
+ /* internal state */
26
+ private client: GoogleTTS.TextToSpeechClient | null = null
27
+ private resampler: SpeexResampler | null = null
28
+ private closing = false
29
+
30
+ /* construct node */
31
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
32
+ super(id, cfg, opts, args)
33
+
34
+ /* declare node configuration parameters */
35
+ this.configure({
36
+ key: { type: "string", val: process.env.SPEECHFLOW_GOOGLE_KEY ?? "" },
37
+ voice: { type: "string", pos: 0, val: "en-US-Neural2-J" },
38
+ language: { type: "string", pos: 1, val: "en-US" },
39
+ speed: { type: "number", pos: 2, val: 1.0, match: (n: number) => n >= 0.25 && n <= 4.0 },
40
+ pitch: { type: "number", pos: 3, val: 0.0, match: (n: number) => n >= -20.0 && n <= 20.0 }
41
+ })
42
+
43
+ /* validate API key */
44
+ if (this.params.key === "")
45
+ throw new Error("Google Cloud API credentials JSON key is required")
46
+
47
+ /* declare node input/output format */
48
+ this.input = "text"
49
+ this.output = "audio"
50
+ }
51
+
52
+ /* one-time status of node */
53
+ async status () {
54
+ return {}
55
+ }
56
+
57
+ /* open node */
58
+ async open () {
59
+ /* clear destruction flag */
60
+ this.closing = false
61
+
62
+ /* instantiate Google TTS client */
63
+ const data = util.run("Google Cloud API credentials key", () =>
64
+ JSON.parse(this.params.key))
65
+ const credentials = util.importObject("Google Cloud API credentials key",
66
+ data,
67
+ arktype.type({
68
+ project_id: "string",
69
+ private_key: "string",
70
+ client_email: "string"
71
+ })
72
+ )
73
+ this.client = new GoogleTTS.TextToSpeechClient({
74
+ credentials: {
75
+ private_key: credentials.private_key,
76
+ client_email: credentials.client_email
77
+ },
78
+ projectId: credentials.project_id
79
+ })
80
+
81
+ /* establish resampler from Google TTS's output sample rate
82
+ to our standard audio sample rate (48KHz) */
83
+ const googleSampleRate = 24000 /* Google TTS outputs 24kHz for LINEAR16 */
84
+ this.resampler = new SpeexResampler(1, googleSampleRate, this.config.audioSampleRate, 7)
85
+
86
+ /* perform text-to-speech operation with Google Cloud TTS API */
87
+ const textToSpeech = async (text: string) => {
88
+ this.log("info", `Google TTS: send text "${text}"`)
89
+ const [ response ] = await this.client!.synthesizeSpeech({
90
+ input: { text },
91
+ voice: {
92
+ languageCode: this.params.language,
93
+ name: this.params.voice
94
+ },
95
+ audioConfig: {
96
+ audioEncoding: "LINEAR16",
97
+ sampleRateHertz: googleSampleRate,
98
+ speakingRate: this.params.speed,
99
+ pitch: this.params.pitch
100
+ }
101
+ })
102
+ if (!response.audioContent)
103
+ throw new Error("no audio content returned from Google TTS")
104
+
105
+ /* convert response to buffer */
106
+ const buffer = Buffer.isBuffer(response.audioContent)
107
+ ? response.audioContent
108
+ : Buffer.from(response.audioContent)
109
+ this.log("info", `Google TTS: received audio (buffer length: ${buffer.byteLength})`)
110
+
111
+ /* resample from Google's sample rate to our standard rate */
112
+ const bufferResampled = this.resampler!.processChunk(buffer)
113
+ this.log("info", `Google TTS: forwarding resampled audio (buffer length: ${bufferResampled.byteLength})`)
114
+ return bufferResampled
115
+ }
116
+
117
+ /* create transform stream and connect it to the Google TTS API */
118
+ const self = this
119
+ this.stream = new Stream.Transform({
120
+ writableObjectMode: true,
121
+ readableObjectMode: true,
122
+ decodeStrings: false,
123
+ highWaterMark: 1,
124
+ async transform (chunk: SpeechFlowChunk, encoding, callback) {
125
+ if (self.closing)
126
+ callback(new Error("stream already destroyed"))
127
+ else if (Buffer.isBuffer(chunk.payload))
128
+ callback(new Error("invalid chunk payload type"))
129
+ else if (chunk.payload === "")
130
+ callback()
131
+ else {
132
+ let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
133
+ processTimeout = null
134
+ callback(new Error("Google TTS API timeout"))
135
+ }, 60 * 1000)
136
+ const clearProcessTimeout = () => {
137
+ if (processTimeout !== null) {
138
+ clearTimeout(processTimeout)
139
+ processTimeout = null
140
+ }
141
+ }
142
+ try {
143
+ if (self.closing) {
144
+ clearProcessTimeout()
145
+ callback(new Error("stream destroyed during processing"))
146
+ return
147
+ }
148
+ const buffer = await textToSpeech(chunk.payload as string)
149
+ if (self.closing) {
150
+ clearProcessTimeout()
151
+ callback(new Error("stream destroyed during processing"))
152
+ return
153
+ }
154
+
155
+ /* calculate actual audio duration from PCM buffer size */
156
+ const durationMs = util.audioBufferDuration(buffer,
157
+ self.config.audioSampleRate, self.config.audioBitDepth) * 1000
158
+
159
+ /* create new chunk with recalculated timestamps */
160
+ const chunkNew = chunk.clone()
161
+ chunkNew.type = "audio"
162
+ chunkNew.payload = buffer
163
+ chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
164
+ clearProcessTimeout()
165
+ this.push(chunkNew)
166
+ callback()
167
+ }
168
+ catch (error) {
169
+ clearProcessTimeout()
170
+ callback(util.ensureError(error, "Google TTS processing failed"))
171
+ }
172
+ }
173
+ },
174
+ final (callback) {
175
+ callback()
176
+ }
177
+ })
178
+ }
179
+
180
+ /* close node */
181
+ async close () {
182
+ /* indicate closing */
183
+ this.closing = true
184
+
185
+ /* shutdown stream */
186
+ if (this.stream !== null) {
187
+ await util.destroyStream(this.stream)
188
+ this.stream = null
189
+ }
190
+
191
+ /* destroy resampler */
192
+ if (this.resampler !== null)
193
+ this.resampler = null
194
+
195
+ /* destroy Google TTS client */
196
+ if (this.client !== null) {
197
+ await this.client.close().catch((error) => {
198
+ this.log("warning", `error closing Google TTS client: ${error}`)
199
+ })
200
+ this.client = null
201
+ }
202
+ }
203
+ }
@@ -9,6 +9,7 @@ import Stream from "node:stream"
9
9
 
10
10
  /* external dependencies */
11
11
  import { KokoroTTS } from "kokoro-js"
12
+ import { Duration } from "luxon"
12
13
  import SpeexResampler from "speex-resampler"
13
14
 
14
15
  /* internal dependencies */
@@ -21,9 +22,9 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
21
22
  public static name = "t2a-kokoro"
22
23
 
23
24
  /* internal state */
24
- private kokoro: KokoroTTS | null = null
25
- private closing = false
25
+ private kokoro: KokoroTTS | null = null
26
26
  private resampler: SpeexResampler | null = null
27
+ private closing = false
27
28
 
28
29
  /* construct node */
29
30
  constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
@@ -122,9 +123,7 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
122
123
  }
123
124
 
124
125
  /* resample audio samples from PCM/I16/24Khz to PCM/I16/48KHz */
125
- const buffer2 = this.resampler!.processChunk(buffer1)
126
-
127
- return buffer2
126
+ return this.resampler!.processChunk(buffer1)
128
127
  }
129
128
 
130
129
  /* create transform stream and connect it to the Kokoro API */
@@ -139,18 +138,42 @@ export default class SpeechFlowNodeT2AKokoro extends SpeechFlowNode {
139
138
  callback(new Error("stream already destroyed"))
140
139
  else if (Buffer.isBuffer(chunk.payload))
141
140
  callback(new Error("invalid chunk payload type"))
141
+ else if (chunk.payload === "")
142
+ callback()
142
143
  else {
144
+ let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
145
+ processTimeout = null
146
+ callback(new Error("Kokoro TTS timeout"))
147
+ }, 60 * 1000)
148
+ const clearProcessTimeout = () => {
149
+ if (processTimeout !== null) {
150
+ clearTimeout(processTimeout)
151
+ processTimeout = null
152
+ }
153
+ }
143
154
  text2speech(chunk.payload).then((buffer) => {
144
- if (self.closing)
145
- throw new Error("stream destroyed during processing")
155
+ if (self.closing) {
156
+ clearProcessTimeout()
157
+ callback(new Error("stream destroyed during processing"))
158
+ return
159
+ }
146
160
  self.log("info", `Kokoro: received audio (buffer length: ${buffer.byteLength})`)
161
+
162
+ /* calculate actual audio duration from PCM buffer size */
163
+ const durationMs = util.audioBufferDuration(buffer,
164
+ self.config.audioSampleRate, self.config.audioBitDepth) * 1000
165
+
166
+ /* create new chunk with recalculated timestamps */
147
167
  const chunkNew = chunk.clone()
148
- chunkNew.type = "audio"
149
- chunkNew.payload = buffer
168
+ chunkNew.type = "audio"
169
+ chunkNew.payload = buffer
170
+ chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
171
+ clearProcessTimeout()
150
172
  this.push(chunkNew)
151
173
  callback()
152
174
  }).catch((error: unknown) => {
153
- callback(util.ensureError(error))
175
+ clearProcessTimeout()
176
+ callback(util.ensureError(error, "Kokoro processing failed"))
154
177
  })
155
178
  }
156
179
  },
@@ -0,0 +1,176 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import Stream from "node:stream"
9
+
10
+ /* external dependencies */
11
+ import OpenAI from "openai"
12
+ import { Duration } from "luxon"
13
+ import SpeexResampler from "speex-resampler"
14
+
15
+ /* internal dependencies */
16
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
17
+ import * as util from "./speechflow-util"
18
+
19
+ /* SpeechFlow node for OpenAI text-to-speech conversion */
20
+ export default class SpeechFlowNodeT2AOpenAI extends SpeechFlowNode {
21
+ /* declare official node name */
22
+ public static name = "t2a-openai"
23
+
24
+ /* internal state */
25
+ private openai: OpenAI | null = null
26
+ private resampler: SpeexResampler | null = null
27
+ private closing = false
28
+
29
+ /* construct node */
30
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
31
+ super(id, cfg, opts, args)
32
+
33
+ /* declare node configuration parameters */
34
+ this.configure({
35
+ key: { type: "string", val: process.env.SPEECHFLOW_OPENAI_KEY },
36
+ api: { type: "string", val: "https://api.openai.com/v1", match: /^https?:\/\/.+/ },
37
+ voice: { type: "string", val: "alloy", pos: 0, match: /^(?:alloy|echo|fable|onyx|nova|shimmer)$/ },
38
+ model: { type: "string", val: "tts-1", pos: 1, match: /^(?:tts-1|tts-1-hd)$/ },
39
+ speed: { type: "number", val: 1.0, pos: 2, match: (n: number) => n >= 0.25 && n <= 4.0 }
40
+ })
41
+
42
+ /* sanity check parameters */
43
+ if (!this.params.key)
44
+ throw new Error("OpenAI API key not configured")
45
+
46
+ /* declare node input/output format */
47
+ this.input = "text"
48
+ this.output = "audio"
49
+ }
50
+
51
+ /* one-time status of node */
52
+ async status () {
53
+ return {}
54
+ }
55
+
56
+ /* open node */
57
+ async open () {
58
+ /* clear destruction flag */
59
+ this.closing = false
60
+
61
+ /* establish OpenAI API connection */
62
+ this.openai = new OpenAI({
63
+ baseURL: this.params.api,
64
+ apiKey: this.params.key,
65
+ timeout: 60000
66
+ })
67
+
68
+ /* establish resampler from OpenAI's 24Khz PCM output
69
+ to our standard audio sample rate (48KHz) */
70
+ this.resampler = new SpeexResampler(1, 24000, this.config.audioSampleRate, 7)
71
+
72
+ /* perform text-to-speech operation with OpenAI API */
73
+ const textToSpeech = async (text: string) => {
74
+ this.log("info", `OpenAI TTS: send text "${text}"`)
75
+ const response = await this.openai!.audio.speech.create({
76
+ model: this.params.model,
77
+ voice: this.params.voice,
78
+ input: text,
79
+ response_format: "pcm",
80
+ speed: this.params.speed
81
+ })
82
+
83
+ /* convert response to buffer (PCM 24kHz, 16-bit, little-endian) */
84
+ const arrayBuffer = await response.arrayBuffer()
85
+ const buffer = Buffer.from(arrayBuffer)
86
+ this.log("info", `OpenAI TTS: received audio (buffer length: ${buffer.byteLength})`)
87
+
88
+ /* resample from 24kHz to 48kHz */
89
+ const bufferResampled = this.resampler!.processChunk(buffer)
90
+ this.log("info", `OpenAI TTS: forwarding resampled audio (buffer length: ${bufferResampled.byteLength})`)
91
+ return bufferResampled
92
+ }
93
+
94
+ /* create transform stream and connect it to the OpenAI API */
95
+ const self = this
96
+ this.stream = new Stream.Transform({
97
+ writableObjectMode: true,
98
+ readableObjectMode: true,
99
+ decodeStrings: false,
100
+ highWaterMark: 1,
101
+ async transform (chunk: SpeechFlowChunk, encoding, callback) {
102
+ if (self.closing)
103
+ callback(new Error("stream already destroyed"))
104
+ else if (Buffer.isBuffer(chunk.payload))
105
+ callback(new Error("invalid chunk payload type"))
106
+ else if (chunk.payload === "")
107
+ callback()
108
+ else {
109
+ let processTimeout: ReturnType<typeof setTimeout> | null = setTimeout(() => {
110
+ processTimeout = null
111
+ callback(new Error("OpenAI TTS API timeout"))
112
+ }, 60 * 1000)
113
+ const clearProcessTimeout = () => {
114
+ if (processTimeout !== null) {
115
+ clearTimeout(processTimeout)
116
+ processTimeout = null
117
+ }
118
+ }
119
+ try {
120
+ if (self.closing) {
121
+ clearProcessTimeout()
122
+ callback(new Error("stream destroyed during processing"))
123
+ return
124
+ }
125
+ const buffer = await textToSpeech(chunk.payload as string)
126
+ if (self.closing) {
127
+ clearProcessTimeout()
128
+ callback(new Error("stream destroyed during processing"))
129
+ return
130
+ }
131
+
132
+ /* calculate actual audio duration from PCM buffer size */
133
+ const durationMs = util.audioBufferDuration(buffer,
134
+ self.config.audioSampleRate, self.config.audioBitDepth) * 1000
135
+
136
+ /* create new chunk with recalculated timestamps */
137
+ const chunkNew = chunk.clone()
138
+ chunkNew.type = "audio"
139
+ chunkNew.payload = buffer
140
+ chunkNew.timestampEnd = Duration.fromMillis(chunkNew.timestampStart.toMillis() + durationMs)
141
+ clearProcessTimeout()
142
+ this.push(chunkNew)
143
+ callback()
144
+ }
145
+ catch (error) {
146
+ clearProcessTimeout()
147
+ callback(util.ensureError(error, "OpenAI TTS processing failed"))
148
+ }
149
+ }
150
+ },
151
+ final (callback) {
152
+ callback()
153
+ }
154
+ })
155
+ }
156
+
157
+ /* close node */
158
+ async close () {
159
+ /* indicate closing */
160
+ this.closing = true
161
+
162
+ /* shutdown stream */
163
+ if (this.stream !== null) {
164
+ await util.destroyStream(this.stream)
165
+ this.stream = null
166
+ }
167
+
168
+ /* destroy resampler */
169
+ if (this.resampler !== null)
170
+ this.resampler = null
171
+
172
+ /* destroy OpenAI API */
173
+ if (this.openai !== null)
174
+ this.openai = null
175
+ }
176
+ }