speechflow 1.7.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/CHANGELOG.md +18 -0
  2. package/README.md +387 -119
  3. package/etc/claude.md +5 -5
  4. package/etc/speechflow.yaml +2 -2
  5. package/package.json +3 -3
  6. package/speechflow-cli/dst/speechflow-main-graph.d.ts +1 -0
  7. package/speechflow-cli/dst/speechflow-main-graph.js +28 -5
  8. package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
  9. package/speechflow-cli/dst/speechflow-node-a2a-wav.js +24 -4
  10. package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
  11. package/speechflow-cli/dst/speechflow-node-a2t-google.d.ts +17 -0
  12. package/speechflow-cli/dst/speechflow-node-a2t-google.js +320 -0
  13. package/speechflow-cli/dst/speechflow-node-a2t-google.js.map +1 -0
  14. package/speechflow-cli/dst/speechflow-node-t2a-google.d.ts +15 -0
  15. package/speechflow-cli/dst/speechflow-node-t2a-google.js +218 -0
  16. package/speechflow-cli/dst/speechflow-node-t2a-google.js.map +1 -0
  17. package/speechflow-cli/dst/speechflow-node-t2a-openai.d.ts +15 -0
  18. package/speechflow-cli/dst/speechflow-node-t2a-openai.js +195 -0
  19. package/speechflow-cli/dst/speechflow-node-t2a-openai.js.map +1 -0
  20. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.d.ts +17 -0
  21. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js +608 -0
  22. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js.map +1 -0
  23. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
  24. package/speechflow-cli/dst/{speechflow-node-t2t-transformers.d.ts → speechflow-node-t2t-opus.d.ts} +1 -3
  25. package/speechflow-cli/dst/speechflow-node-t2t-opus.js +159 -0
  26. package/speechflow-cli/dst/speechflow-node-t2t-opus.js.map +1 -0
  27. package/speechflow-cli/dst/speechflow-node-t2t-profanity.d.ts +11 -0
  28. package/speechflow-cli/dst/speechflow-node-t2t-profanity.js +118 -0
  29. package/speechflow-cli/dst/speechflow-node-t2t-profanity.js.map +1 -0
  30. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.d.ts +13 -0
  31. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js +220 -0
  32. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js.map +1 -0
  33. package/speechflow-cli/dst/{speechflow-node-t2t-openai.d.ts → speechflow-node-t2t-spellcheck.d.ts} +2 -2
  34. package/speechflow-cli/dst/{speechflow-node-t2t-openai.js → speechflow-node-t2t-spellcheck.js} +47 -99
  35. package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js.map +1 -0
  36. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +3 -6
  37. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  38. package/speechflow-cli/dst/speechflow-node-t2t-summary.d.ts +16 -0
  39. package/speechflow-cli/dst/speechflow-node-t2t-summary.js +241 -0
  40. package/speechflow-cli/dst/speechflow-node-t2t-summary.js.map +1 -0
  41. package/speechflow-cli/dst/{speechflow-node-t2t-ollama.d.ts → speechflow-node-t2t-translate.d.ts} +2 -2
  42. package/speechflow-cli/dst/{speechflow-node-t2t-transformers.js → speechflow-node-t2t-translate.js} +53 -115
  43. package/speechflow-cli/dst/speechflow-node-t2t-translate.js.map +1 -0
  44. package/speechflow-cli/dst/speechflow-node-xio-exec.d.ts +12 -0
  45. package/speechflow-cli/dst/speechflow-node-xio-exec.js +223 -0
  46. package/speechflow-cli/dst/speechflow-node-xio-exec.js.map +1 -0
  47. package/speechflow-cli/dst/speechflow-node-xio-file.d.ts +1 -0
  48. package/speechflow-cli/dst/speechflow-node-xio-file.js +79 -66
  49. package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
  50. package/speechflow-cli/dst/speechflow-node-xio-vban.d.ts +17 -0
  51. package/speechflow-cli/dst/speechflow-node-xio-vban.js +330 -0
  52. package/speechflow-cli/dst/speechflow-node-xio-vban.js.map +1 -0
  53. package/speechflow-cli/dst/speechflow-node-xio-webrtc.d.ts +39 -0
  54. package/speechflow-cli/dst/speechflow-node-xio-webrtc.js +500 -0
  55. package/speechflow-cli/dst/speechflow-node-xio-webrtc.js.map +1 -0
  56. package/speechflow-cli/dst/speechflow-util-audio.js +4 -5
  57. package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
  58. package/speechflow-cli/dst/speechflow-util-error.d.ts +1 -0
  59. package/speechflow-cli/dst/speechflow-util-error.js +5 -0
  60. package/speechflow-cli/dst/speechflow-util-error.js.map +1 -1
  61. package/speechflow-cli/dst/speechflow-util-llm.d.ts +35 -0
  62. package/speechflow-cli/dst/speechflow-util-llm.js +363 -0
  63. package/speechflow-cli/dst/speechflow-util-llm.js.map +1 -0
  64. package/speechflow-cli/dst/speechflow-util.d.ts +1 -0
  65. package/speechflow-cli/dst/speechflow-util.js +1 -0
  66. package/speechflow-cli/dst/speechflow-util.js.map +1 -1
  67. package/speechflow-cli/etc/oxlint.jsonc +2 -1
  68. package/speechflow-cli/package.json +34 -17
  69. package/speechflow-cli/src/lib.d.ts +5 -0
  70. package/speechflow-cli/src/speechflow-main-graph.ts +31 -5
  71. package/speechflow-cli/src/speechflow-node-a2a-wav.ts +24 -4
  72. package/speechflow-cli/src/speechflow-node-a2t-google.ts +322 -0
  73. package/speechflow-cli/src/speechflow-node-t2a-google.ts +206 -0
  74. package/speechflow-cli/src/speechflow-node-t2a-openai.ts +179 -0
  75. package/speechflow-cli/src/speechflow-node-t2a-supertonic.ts +701 -0
  76. package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +2 -1
  77. package/speechflow-cli/src/speechflow-node-t2t-opus.ts +136 -0
  78. package/speechflow-cli/src/speechflow-node-t2t-profanity.ts +93 -0
  79. package/speechflow-cli/src/speechflow-node-t2t-punctuation.ts +201 -0
  80. package/speechflow-cli/src/{speechflow-node-t2t-openai.ts → speechflow-node-t2t-spellcheck.ts} +48 -107
  81. package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +3 -6
  82. package/speechflow-cli/src/speechflow-node-t2t-summary.ts +229 -0
  83. package/speechflow-cli/src/speechflow-node-t2t-translate.ts +181 -0
  84. package/speechflow-cli/src/speechflow-node-xio-exec.ts +210 -0
  85. package/speechflow-cli/src/speechflow-node-xio-file.ts +92 -79
  86. package/speechflow-cli/src/speechflow-node-xio-vban.ts +325 -0
  87. package/speechflow-cli/src/speechflow-node-xio-webrtc.ts +533 -0
  88. package/speechflow-cli/src/speechflow-util-audio.ts +5 -5
  89. package/speechflow-cli/src/speechflow-util-error.ts +9 -0
  90. package/speechflow-cli/src/speechflow-util-llm.ts +367 -0
  91. package/speechflow-cli/src/speechflow-util.ts +1 -0
  92. package/speechflow-ui-db/package.json +9 -9
  93. package/speechflow-ui-st/package.json +9 -9
  94. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +0 -293
  95. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +0 -1
  96. package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +0 -1
  97. package/speechflow-cli/dst/speechflow-node-t2t-transformers.js.map +0 -1
  98. package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +0 -281
  99. package/speechflow-cli/src/speechflow-node-t2t-transformers.ts +0 -247
@@ -1,247 +0,0 @@
1
- /*
2
- ** SpeechFlow - Speech Processing Flow Graph
3
- ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
- ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
- */
6
-
7
- /* standard dependencies */
8
- import path from "node:path"
9
- import Stream from "node:stream"
10
-
11
- /* external dependencies */
12
- import * as Transformers from "@huggingface/transformers"
13
-
14
- /* internal dependencies */
15
- import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
16
- import * as util from "./speechflow-util"
17
-
18
- /* internal utility types */
19
- type ConfigEntry = { systemPrompt: string, chat: Array<{ role: string, content: string }> }
20
- type Config = { [ key: string ]: ConfigEntry }
21
-
22
- /* SpeechFlow node for Transformers text-to-text translation */
23
- export default class SpeechFlowNodeT2TTransformers extends SpeechFlowNode {
24
- /* declare official node name */
25
- public static name = "t2t-transformers"
26
-
27
- /* internal state */
28
- private translator: Transformers.TranslationPipeline | null = null
29
- private generator: Transformers.TextGenerationPipeline | null = null
30
-
31
- /* internal LLM setup */
32
- private setup: Config = {
33
- /* SmolLM3: English (EN) to German (DE) translation */
34
- "SmolLM3:en-de": {
35
- systemPrompt:
36
- "/no_think\n" +
37
- "You are a translator.\n" +
38
- "Output only the requested text.\n" +
39
- "Do not use markdown.\n" +
40
- "Do not chat.\n" +
41
- "Do not show any explanations.\n" +
42
- "Do not show any introduction.\n" +
43
- "Do not show any preamble.\n" +
44
- "Do not show any prolog.\n" +
45
- "Do not show any epilog.\n" +
46
- "Get to the point.\n" +
47
- "Preserve the original meaning, tone, and nuance.\n" +
48
- "Directly translate text from English (EN) to fluent and natural German (DE) language.\n",
49
- chat: [
50
- { role: "user", content: "I love my wife." },
51
- { role: "assistant", content: "Ich liebe meine Frau." },
52
- { role: "user", content: "The weather is wonderful." },
53
- { role: "assistant", content: "Das Wetter ist wunderschön." },
54
- { role: "user", content: "The life is awesome." },
55
- { role: "assistant", content: "Das Leben ist einfach großartig." }
56
- ]
57
- },
58
-
59
- /* SmolLM3: German (DE) to English (EN) translation */
60
- "SmolLM3:de-en": {
61
- systemPrompt:
62
- "/no_think\n" +
63
- "You are a translator.\n" +
64
- "Output only the requested text.\n" +
65
- "Do not use markdown.\n" +
66
- "Do not chat.\n" +
67
- "Do not show any explanations.\n" +
68
- "Do not show any introduction.\n" +
69
- "Do not show any preamble.\n" +
70
- "Do not show any prolog.\n" +
71
- "Do not show any epilog.\n" +
72
- "Get to the point.\n" +
73
- "Preserve the original meaning, tone, and nuance.\n" +
74
- "Directly translate text from German (DE) to fluent and natural English (EN) language.\n",
75
- chat: [
76
- { role: "user", content: "Ich liebe meine Frau." },
77
- { role: "assistant", content: "I love my wife." },
78
- { role: "user", content: "Das Wetter ist wunderschön." },
79
- { role: "assistant", content: "The weather is wonderful." },
80
- { role: "user", content: "Das Leben ist einfach großartig." },
81
- { role: "assistant", content: "The life is awesome." }
82
- ]
83
- }
84
- }
85
-
86
- /* construct node */
87
- constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
88
- super(id, cfg, opts, args)
89
-
90
- /* declare node configuration parameters */
91
- this.configure({
92
- src: { type: "string", pos: 0, val: "de", match: /^(?:de|en)$/ },
93
- dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ },
94
- model: { type: "string", val: "OPUS", match: /^(?:OPUS|SmolLM3)$/ }
95
- })
96
-
97
- /* sanity check parameters */
98
- if (this.params.src === this.params.dst)
99
- throw new Error("source and destination languages cannot be the same")
100
-
101
- /* declare node input/output format */
102
- this.input = "text"
103
- this.output = "text"
104
- }
105
-
106
- /* open node */
107
- async open () {
108
- let model = ""
109
-
110
- /* track download progress when instantiating Transformers engine and model */
111
- const progressState = new Map<string, number>()
112
- const progressCallback: Transformers.ProgressCallback = (progress: any) => {
113
- let artifact = model
114
- if (typeof progress.file === "string")
115
- artifact += `:${progress.file}`
116
- let percent = 0
117
- if (typeof progress.loaded === "number" && typeof progress.total === "number")
118
- percent = (progress.loaded / progress.total) * 100
119
- else if (typeof progress.progress === "number")
120
- percent = progress.progress
121
- if (percent > 0)
122
- progressState.set(artifact, percent)
123
- }
124
- const interval = setInterval(() => {
125
- for (const [ artifact, percent ] of progressState) {
126
- this.log("info", `downloaded ${percent.toFixed(2)}% of artifact "${artifact}"`)
127
- if (percent >= 100.0)
128
- progressState.delete(artifact)
129
- }
130
- }, 1000)
131
-
132
- /* instantiate Transformers engine and model */
133
- if (this.params.model === "OPUS") {
134
- model = `onnx-community/opus-mt-${this.params.src}-${this.params.dst}`
135
- const pipeline = Transformers.pipeline("translation", model, {
136
- cache_dir: path.join(this.config.cacheDir, "transformers"),
137
- dtype: "q4",
138
- device: "auto",
139
- progress_callback: progressCallback
140
- })
141
- this.translator = await pipeline
142
- if (this.translator === null)
143
- throw new Error("failed to instantiate translator pipeline")
144
- }
145
- else if (this.params.model === "SmolLM3") {
146
- model = "HuggingFaceTB/SmolLM3-3B-ONNX"
147
- const pipeline = Transformers.pipeline("text-generation", model, {
148
- cache_dir: path.join(this.config.cacheDir, "transformers"),
149
- dtype: "q4",
150
- device: "auto",
151
- progress_callback: progressCallback
152
- })
153
- this.generator = await pipeline
154
- if (this.generator === null)
155
- throw new Error("failed to instantiate generator pipeline")
156
- }
157
- else
158
- throw new Error("invalid model")
159
-
160
- /* clear progress interval again */
161
- clearInterval(interval)
162
-
163
- /* provide text-to-text translation */
164
- const translate = async (text: string) => {
165
- if (this.params.model === "OPUS") {
166
- const result = await this.translator!(text)
167
- const single = Array.isArray(result) ? result[0] : result
168
- return (single as Transformers.TranslationSingle).translation_text
169
- }
170
- else if (this.params.model === "SmolLM3") {
171
- const key = `SmolLM3:${this.params.src}-${this.params.dst}`
172
- const cfg = this.setup[key]
173
- const messages = [
174
- { role: "system", content: cfg.systemPrompt },
175
- ...cfg.chat,
176
- { role: "user", content: text }
177
- ]
178
- const result = await this.generator!(messages, {
179
- max_new_tokens: 100,
180
- temperature: 0.6,
181
- top_p: 0.95,
182
- streamer: new Transformers.TextStreamer(this.generator!.tokenizer, {
183
- skip_prompt: true,
184
- skip_special_tokens: true
185
- })
186
- })
187
- const single = Array.isArray(result) ? result[0] : result
188
- const generatedText = (single as Transformers.TextGenerationSingle).generated_text
189
- return typeof generatedText === "string" ?
190
- generatedText :
191
- generatedText.at(-1)!.content
192
- }
193
- else
194
- throw new Error("invalid model")
195
- }
196
-
197
- /* establish a duplex stream and connect it to Transformers */
198
- this.stream = new Stream.Transform({
199
- readableObjectMode: true,
200
- writableObjectMode: true,
201
- decodeStrings: false,
202
- highWaterMark: 1,
203
- transform (chunk: SpeechFlowChunk, encoding, callback) {
204
- if (Buffer.isBuffer(chunk.payload))
205
- callback(new Error("invalid chunk payload type"))
206
- else if (chunk.payload === "") {
207
- this.push(chunk)
208
- callback()
209
- }
210
- else {
211
- translate(chunk.payload).then((payload) => {
212
- chunk = chunk.clone()
213
- chunk.payload = payload
214
- this.push(chunk)
215
- callback()
216
- }).catch((error: unknown) => {
217
- callback(util.ensureError(error))
218
- })
219
- }
220
- },
221
- final (callback) {
222
- this.push(null)
223
- callback()
224
- }
225
- })
226
- }
227
-
228
- /* close node */
229
- async close () {
230
- /* shutdown stream */
231
- if (this.stream !== null) {
232
- await util.destroyStream(this.stream)
233
- this.stream = null
234
- }
235
-
236
- /* shutdown Transformers */
237
- if (this.translator !== null) {
238
- this.translator.dispose()
239
- this.translator = null
240
- }
241
- if (this.generator !== null) {
242
- this.generator.dispose()
243
- this.generator = null
244
- }
245
- }
246
- }
247
-