speechflow 1.7.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/CHANGELOG.md +18 -0
  2. package/README.md +387 -119
  3. package/etc/claude.md +5 -5
  4. package/etc/speechflow.yaml +2 -2
  5. package/package.json +3 -3
  6. package/speechflow-cli/dst/speechflow-main-graph.d.ts +1 -0
  7. package/speechflow-cli/dst/speechflow-main-graph.js +28 -5
  8. package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
  9. package/speechflow-cli/dst/speechflow-node-a2a-wav.js +24 -4
  10. package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
  11. package/speechflow-cli/dst/speechflow-node-a2t-google.d.ts +17 -0
  12. package/speechflow-cli/dst/speechflow-node-a2t-google.js +320 -0
  13. package/speechflow-cli/dst/speechflow-node-a2t-google.js.map +1 -0
  14. package/speechflow-cli/dst/speechflow-node-t2a-google.d.ts +15 -0
  15. package/speechflow-cli/dst/speechflow-node-t2a-google.js +218 -0
  16. package/speechflow-cli/dst/speechflow-node-t2a-google.js.map +1 -0
  17. package/speechflow-cli/dst/speechflow-node-t2a-openai.d.ts +15 -0
  18. package/speechflow-cli/dst/speechflow-node-t2a-openai.js +195 -0
  19. package/speechflow-cli/dst/speechflow-node-t2a-openai.js.map +1 -0
  20. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.d.ts +17 -0
  21. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js +608 -0
  22. package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js.map +1 -0
  23. package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
  24. package/speechflow-cli/dst/{speechflow-node-t2t-transformers.d.ts → speechflow-node-t2t-opus.d.ts} +1 -3
  25. package/speechflow-cli/dst/speechflow-node-t2t-opus.js +159 -0
  26. package/speechflow-cli/dst/speechflow-node-t2t-opus.js.map +1 -0
  27. package/speechflow-cli/dst/speechflow-node-t2t-profanity.d.ts +11 -0
  28. package/speechflow-cli/dst/speechflow-node-t2t-profanity.js +118 -0
  29. package/speechflow-cli/dst/speechflow-node-t2t-profanity.js.map +1 -0
  30. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.d.ts +13 -0
  31. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js +220 -0
  32. package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js.map +1 -0
  33. package/speechflow-cli/dst/{speechflow-node-t2t-openai.d.ts → speechflow-node-t2t-spellcheck.d.ts} +2 -2
  34. package/speechflow-cli/dst/{speechflow-node-t2t-openai.js → speechflow-node-t2t-spellcheck.js} +47 -99
  35. package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js.map +1 -0
  36. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +3 -6
  37. package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
  38. package/speechflow-cli/dst/speechflow-node-t2t-summary.d.ts +16 -0
  39. package/speechflow-cli/dst/speechflow-node-t2t-summary.js +241 -0
  40. package/speechflow-cli/dst/speechflow-node-t2t-summary.js.map +1 -0
  41. package/speechflow-cli/dst/{speechflow-node-t2t-ollama.d.ts → speechflow-node-t2t-translate.d.ts} +2 -2
  42. package/speechflow-cli/dst/{speechflow-node-t2t-transformers.js → speechflow-node-t2t-translate.js} +53 -115
  43. package/speechflow-cli/dst/speechflow-node-t2t-translate.js.map +1 -0
  44. package/speechflow-cli/dst/speechflow-node-xio-exec.d.ts +12 -0
  45. package/speechflow-cli/dst/speechflow-node-xio-exec.js +223 -0
  46. package/speechflow-cli/dst/speechflow-node-xio-exec.js.map +1 -0
  47. package/speechflow-cli/dst/speechflow-node-xio-file.d.ts +1 -0
  48. package/speechflow-cli/dst/speechflow-node-xio-file.js +79 -66
  49. package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
  50. package/speechflow-cli/dst/speechflow-node-xio-vban.d.ts +17 -0
  51. package/speechflow-cli/dst/speechflow-node-xio-vban.js +330 -0
  52. package/speechflow-cli/dst/speechflow-node-xio-vban.js.map +1 -0
  53. package/speechflow-cli/dst/speechflow-node-xio-webrtc.d.ts +39 -0
  54. package/speechflow-cli/dst/speechflow-node-xio-webrtc.js +500 -0
  55. package/speechflow-cli/dst/speechflow-node-xio-webrtc.js.map +1 -0
  56. package/speechflow-cli/dst/speechflow-util-audio.js +4 -5
  57. package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
  58. package/speechflow-cli/dst/speechflow-util-error.d.ts +1 -0
  59. package/speechflow-cli/dst/speechflow-util-error.js +5 -0
  60. package/speechflow-cli/dst/speechflow-util-error.js.map +1 -1
  61. package/speechflow-cli/dst/speechflow-util-llm.d.ts +35 -0
  62. package/speechflow-cli/dst/speechflow-util-llm.js +363 -0
  63. package/speechflow-cli/dst/speechflow-util-llm.js.map +1 -0
  64. package/speechflow-cli/dst/speechflow-util.d.ts +1 -0
  65. package/speechflow-cli/dst/speechflow-util.js +1 -0
  66. package/speechflow-cli/dst/speechflow-util.js.map +1 -1
  67. package/speechflow-cli/etc/oxlint.jsonc +2 -1
  68. package/speechflow-cli/package.json +34 -17
  69. package/speechflow-cli/src/lib.d.ts +5 -0
  70. package/speechflow-cli/src/speechflow-main-graph.ts +31 -5
  71. package/speechflow-cli/src/speechflow-node-a2a-wav.ts +24 -4
  72. package/speechflow-cli/src/speechflow-node-a2t-google.ts +322 -0
  73. package/speechflow-cli/src/speechflow-node-t2a-google.ts +206 -0
  74. package/speechflow-cli/src/speechflow-node-t2a-openai.ts +179 -0
  75. package/speechflow-cli/src/speechflow-node-t2a-supertonic.ts +701 -0
  76. package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +2 -1
  77. package/speechflow-cli/src/speechflow-node-t2t-opus.ts +136 -0
  78. package/speechflow-cli/src/speechflow-node-t2t-profanity.ts +93 -0
  79. package/speechflow-cli/src/speechflow-node-t2t-punctuation.ts +201 -0
  80. package/speechflow-cli/src/{speechflow-node-t2t-openai.ts → speechflow-node-t2t-spellcheck.ts} +48 -107
  81. package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +3 -6
  82. package/speechflow-cli/src/speechflow-node-t2t-summary.ts +229 -0
  83. package/speechflow-cli/src/speechflow-node-t2t-translate.ts +181 -0
  84. package/speechflow-cli/src/speechflow-node-xio-exec.ts +210 -0
  85. package/speechflow-cli/src/speechflow-node-xio-file.ts +92 -79
  86. package/speechflow-cli/src/speechflow-node-xio-vban.ts +325 -0
  87. package/speechflow-cli/src/speechflow-node-xio-webrtc.ts +533 -0
  88. package/speechflow-cli/src/speechflow-util-audio.ts +5 -5
  89. package/speechflow-cli/src/speechflow-util-error.ts +9 -0
  90. package/speechflow-cli/src/speechflow-util-llm.ts +367 -0
  91. package/speechflow-cli/src/speechflow-util.ts +1 -0
  92. package/speechflow-ui-db/package.json +9 -9
  93. package/speechflow-ui-st/package.json +9 -9
  94. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +0 -293
  95. package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +0 -1
  96. package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +0 -1
  97. package/speechflow-cli/dst/speechflow-node-t2t-transformers.js.map +0 -1
  98. package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +0 -281
  99. package/speechflow-cli/src/speechflow-node-t2t-transformers.ts +0 -247
@@ -86,7 +86,8 @@ export default class SpeechFlowNodeT2TAmazon extends SpeechFlowNode {
86
86
  })
87
87
  const out = await this.client!.send(cmd)
88
88
  return (out.TranslatedText ?? "").trim()
89
- } catch (e: any) {
89
+ }
90
+ catch (e: any) {
90
91
  lastError = e
91
92
  attempt += 1
92
93
 
@@ -0,0 +1,136 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import path from "node:path"
9
+ import Stream from "node:stream"
10
+
11
+ /* external dependencies */
12
+ import * as Transformers from "@huggingface/transformers"
13
+
14
+ /* internal dependencies */
15
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
16
+ import * as util from "./speechflow-util"
17
+
18
+ /* SpeechFlow node for OPUS text-to-text translation */
19
+ export default class SpeechFlowNodeT2TOPUS extends SpeechFlowNode {
20
+ /* declare official node name */
21
+ public static name = "t2t-opus"
22
+
23
+ /* internal state */
24
+ private translator: Transformers.TranslationPipeline | null = null
25
+
26
+ /* construct node */
27
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
28
+ super(id, cfg, opts, args)
29
+
30
+ /* declare node configuration parameters */
31
+ this.configure({
32
+ src: { type: "string", pos: 0, val: "de", match: /^(?:de|en)$/ },
33
+ dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ }
34
+ })
35
+
36
+ /* sanity check parameters */
37
+ if (this.params.src === this.params.dst)
38
+ throw new Error("source and destination languages cannot be the same")
39
+
40
+ /* declare node input/output format */
41
+ this.input = "text"
42
+ this.output = "text"
43
+ }
44
+
45
+ /* open node */
46
+ async open () {
47
+ /* track download progress when instantiating Transformers engine and model */
48
+ const model = `onnx-community/opus-mt-${this.params.src}-${this.params.dst}`
49
+ const progressState = new Map<string, number>()
50
+ const progressCallback: Transformers.ProgressCallback = (progress: any) => {
51
+ let artifact = model
52
+ if (typeof progress.file === "string")
53
+ artifact += `:${progress.file}`
54
+ let percent = 0
55
+ if (typeof progress.loaded === "number" && typeof progress.total === "number")
56
+ percent = (progress.loaded / progress.total) * 100
57
+ else if (typeof progress.progress === "number")
58
+ percent = progress.progress
59
+ if (percent > 0)
60
+ progressState.set(artifact, percent)
61
+ }
62
+ const interval = setInterval(() => {
63
+ for (const [ artifact, percent ] of progressState) {
64
+ this.log("info", `downloaded ${percent.toFixed(2)}% of artifact "${artifact}"`)
65
+ if (percent >= 100.0)
66
+ progressState.delete(artifact)
67
+ }
68
+ }, 1000)
69
+
70
+ /* instantiate Transformers engine and model */
71
+ const pipeline = Transformers.pipeline("translation", model, {
72
+ cache_dir: path.join(this.config.cacheDir, "transformers"),
73
+ dtype: "q4",
74
+ device: "auto",
75
+ progress_callback: progressCallback
76
+ })
77
+ this.translator = await pipeline
78
+ if (this.translator === null)
79
+ throw new Error("failed to instantiate translator pipeline")
80
+
81
+ /* clear progress interval again */
82
+ clearInterval(interval)
83
+
84
+ /* provide text-to-text translation */
85
+ const translate = async (text: string) => {
86
+ const result = await this.translator!(text)
87
+ const single = Array.isArray(result) ? result[0] : result
88
+ return (single as Transformers.TranslationSingle).translation_text
89
+ }
90
+
91
+ /* establish a duplex stream and connect it to Transformers */
92
+ this.stream = new Stream.Transform({
93
+ readableObjectMode: true,
94
+ writableObjectMode: true,
95
+ decodeStrings: false,
96
+ highWaterMark: 1,
97
+ transform (chunk: SpeechFlowChunk, encoding, callback) {
98
+ if (Buffer.isBuffer(chunk.payload))
99
+ callback(new Error("invalid chunk payload type"))
100
+ else if (chunk.payload === "") {
101
+ this.push(chunk)
102
+ callback()
103
+ }
104
+ else {
105
+ translate(chunk.payload).then((payload) => {
106
+ const chunkNew = chunk.clone()
107
+ chunkNew.payload = payload
108
+ this.push(chunkNew)
109
+ callback()
110
+ }).catch((error: unknown) => {
111
+ callback(util.ensureError(error))
112
+ })
113
+ }
114
+ },
115
+ final (callback) {
116
+ callback()
117
+ }
118
+ })
119
+ }
120
+
121
+ /* close node */
122
+ async close () {
123
+ /* shutdown stream */
124
+ if (this.stream !== null) {
125
+ await util.destroyStream(this.stream)
126
+ this.stream = null
127
+ }
128
+
129
+ /* shutdown Transformers */
130
+ if (this.translator !== null) {
131
+ this.translator.dispose()
132
+ this.translator = null
133
+ }
134
+ }
135
+ }
136
+
@@ -0,0 +1,93 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import Stream from "node:stream"
9
+
10
+ /* external dependencies */
11
+ import BadWordsNext from "bad-words-next"
12
+ import en from "bad-words-next/lib/en"
13
+ import de from "bad-words-next/lib/de"
14
+
15
+ /* internal dependencies */
16
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
17
+ import * as util from "./speechflow-util"
18
+
19
+ /* language data mapping */
20
+ const langData: { [ lang: string ]: typeof en } = { en, de }
21
+
22
+ /* SpeechFlow node for text-to-text profanity filtering */
23
+ export default class SpeechFlowNodeT2TProfanity extends SpeechFlowNode {
24
+ /* declare official node name */
25
+ public static name = "t2t-profanity"
26
+
27
+ /* construct node */
28
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
29
+ super(id, cfg, opts, args)
30
+
31
+ /* declare node configuration parameters */
32
+ this.configure({
33
+ lang: { type: "string", val: "en", match: /^(?:en|de)$/ },
34
+ placeholder: { type: "string", val: "***" },
35
+ mode: { type: "string", val: "replace", match: /^(?:replace|repeat)$/ }
36
+ })
37
+
38
+ /* declare node input/output format */
39
+ this.input = "text"
40
+ this.output = "text"
41
+ }
42
+
43
+ /* open node */
44
+ async open () {
45
+ /* create profanity filter instance */
46
+ const filter = util.run("creating profanity filter", () =>
47
+ new BadWordsNext({
48
+ data: langData[this.params.lang],
49
+ placeholder: this.params.placeholder,
50
+ placeholderMode: this.params.mode as "replace" | "repeat"
51
+ })
52
+ )
53
+
54
+ /* apply profanity filtering */
55
+ const censor = (text: string): string =>
56
+ filter.filter(text)
57
+
58
+ /* establish a transform stream and connect it to profanity filtering */
59
+ this.stream = new Stream.Transform({
60
+ readableObjectMode: true,
61
+ writableObjectMode: true,
62
+ decodeStrings: false,
63
+ highWaterMark: 1,
64
+ transform (chunk: SpeechFlowChunk, encoding, callback) {
65
+ if (Buffer.isBuffer(chunk.payload))
66
+ callback(new Error("invalid chunk payload type"))
67
+ else if (chunk.payload === "") {
68
+ this.push(chunk)
69
+ callback()
70
+ }
71
+ else {
72
+ const payload = censor(chunk.payload)
73
+ const chunkNew = chunk.clone()
74
+ chunkNew.payload = payload
75
+ this.push(chunkNew)
76
+ callback()
77
+ }
78
+ },
79
+ final (callback) {
80
+ callback()
81
+ }
82
+ })
83
+ }
84
+
85
+ /* close node */
86
+ async close () {
87
+ /* shutdown stream */
88
+ if (this.stream !== null) {
89
+ await util.destroyStream(this.stream)
90
+ this.stream = null
91
+ }
92
+ }
93
+ }
@@ -0,0 +1,201 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import Stream from "node:stream"
9
+
10
+ /* internal dependencies */
11
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
12
+ import * as util from "./speechflow-util"
13
+ import { LLM, type LLMCompleteMessage } from "./speechflow-util-llm"
14
+
15
+ /* internal utility types */
16
+ type ConfigEntry = { systemPrompt: string, chat: LLMCompleteMessage[] }
17
+ type Config = { [ key: string ]: ConfigEntry }
18
+
19
+ /* SpeechFlow node for text-to-text punctuation restoration */
20
+ export default class SpeechFlowNodeT2TPunctuation extends SpeechFlowNode {
21
+ /* declare official node name */
22
+ public static name = "t2t-punctuation"
23
+
24
+ /* internal state */
25
+ private llm: LLM | null = null
26
+
27
+ /* internal LLM setup */
28
+ private setup: Config = {
29
+ /* English (EN) punctuation restoration */
30
+ "en": {
31
+ systemPrompt:
32
+ "You are a punctuation restoration specialist for English.\n" +
33
+ "Your task is to add missing punctuation to unpunctuated text.\n" +
34
+ "Output only the punctuated text.\n" +
35
+ "Do NOT use markdown.\n" +
36
+ "Do NOT give any explanations.\n" +
37
+ "Do NOT give any introduction.\n" +
38
+ "Do NOT give any comments.\n" +
39
+ "Do NOT give any preamble.\n" +
40
+ "Do NOT give any prolog.\n" +
41
+ "Do NOT give any epilog.\n" +
42
+ "Do NOT change the words.\n" +
43
+ "Do NOT add or remove words.\n" +
44
+ "Do NOT fix spelling errors.\n" +
45
+ "Do NOT change the grammar.\n" +
46
+ "Do NOT use synonyms.\n" +
47
+ "Keep all original words exactly as they are.\n" +
48
+ "Add periods at sentence endings.\n" +
49
+ "Add commas where appropriate.\n" +
50
+ "Add question marks for questions.\n" +
51
+ "Add exclamation marks where appropriate.\n" +
52
+ "Add colons and semicolons where appropriate.\n" +
53
+ "Capitalize first letters of sentences.\n" +
54
+ "The text you have to punctuate is:\n",
55
+ chat: [
56
+ { role: "user", content: "hello how are you today" },
57
+ { role: "assistant", content: "Hello, how are you today?" },
58
+ { role: "user", content: "i went to the store and bought some milk eggs and bread" },
59
+ { role: "assistant", content: "I went to the store and bought some milk, eggs, and bread." },
60
+ { role: "user", content: "what time is it i need to leave soon" },
61
+ { role: "assistant", content: "What time is it? I need to leave soon." },
62
+ { role: "user", content: "thats amazing i cant believe it worked" },
63
+ { role: "assistant", content: "That's amazing! I can't believe it worked!" }
64
+ ]
65
+ },
66
+
67
+ /* German (DE) punctuation restoration */
68
+ "de": {
69
+ systemPrompt:
70
+ "Du bist ein Spezialist für Zeichensetzung im Deutschen.\n" +
71
+ "Deine Aufgabe ist es, fehlende Satzzeichen in unpunktierten Text einzufügen.\n" +
72
+ "Gib nur den punktierten Text aus.\n" +
73
+ "Benutze KEIN Markdown.\n" +
74
+ "Gib KEINE Erklärungen.\n" +
75
+ "Gib KEINE Einleitung.\n" +
76
+ "Gib KEINE Kommentare.\n" +
77
+ "Gib KEINE Preamble.\n" +
78
+ "Gib KEINEN Prolog.\n" +
79
+ "Gib KEINEN Epilog.\n" +
80
+ "Ändere NICHT die Wörter.\n" +
81
+ "Füge KEINE Wörter hinzu oder entferne welche.\n" +
82
+ "Korrigiere KEINE Rechtschreibfehler.\n" +
83
+ "Ändere NICHT die Grammatik.\n" +
84
+ "Verwende KEINE Synonyme.\n" +
85
+ "Behalte alle ursprünglichen Wörter genau bei.\n" +
86
+ "Füge Punkte am Satzende ein.\n" +
87
+ "Füge Kommas an passenden Stellen ein.\n" +
88
+ "Füge Fragezeichen bei Fragen ein.\n" +
89
+ "Füge Ausrufezeichen an passenden Stellen ein.\n" +
90
+ "Füge Doppelpunkte und Semikolons an passenden Stellen ein.\n" +
91
+ "Großschreibe die ersten Buchstaben von Sätzen.\n" +
92
+ "Der von dir zu punktierende Text ist:\n",
93
+ chat: [
94
+ { role: "user", content: "hallo wie geht es dir heute" },
95
+ { role: "assistant", content: "Hallo, wie geht es dir heute?" },
96
+ { role: "user", content: "ich bin in den laden gegangen und habe milch eier und brot gekauft" },
97
+ { role: "assistant", content: "Ich bin in den Laden gegangen und habe Milch, Eier und Brot gekauft." },
98
+ { role: "user", content: "wie spät ist es ich muss bald los" },
99
+ { role: "assistant", content: "Wie spät ist es? Ich muss bald los." },
100
+ { role: "user", content: "das ist fantastisch ich kann nicht glauben dass es funktioniert hat" },
101
+ { role: "assistant", content: "Das ist fantastisch! Ich kann nicht glauben, dass es funktioniert hat!" }
102
+ ]
103
+ }
104
+ }
105
+
106
+ /* construct node */
107
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
108
+ super(id, cfg, opts, args)
109
+
110
+ /* declare node configuration parameters */
111
+ this.configure({
112
+ provider: { type: "string", val: "ollama", match: /^(?:openai|anthropic|google|ollama|transformers)$/ },
113
+ api: { type: "string", val: "http://127.0.0.1:11434", match: /^https?:\/\/.+?(:\d+)?$/ },
114
+ model: { type: "string", val: "gemma3:4b-it-q4_K_M", match: /^.+$/ },
115
+ key: { type: "string", val: "", match: /^.*$/ },
116
+ lang: { type: "string", pos: 0, val: "en", match: /^(?:de|en)$/ }
117
+ })
118
+
119
+ /* tell effective mode */
120
+ this.log("info", `punctuation restoration for language "${this.params.lang}" ` +
121
+ `via ${this.params.provider} LLM (model: ${this.params.model})`)
122
+
123
+ /* declare node input/output format */
124
+ this.input = "text"
125
+ this.output = "text"
126
+ }
127
+
128
+ /* open node */
129
+ async open () {
130
+ /* instantiate LLM */
131
+ this.llm = new LLM({
132
+ provider: this.params.provider,
133
+ api: this.params.api,
134
+ model: this.params.model,
135
+ key: this.params.key,
136
+ temperature: 0.7,
137
+ topP: 0.5
138
+ })
139
+ this.llm.on("log", (level: string, message: string) => {
140
+ this.log(level as "info" | "warning" | "error", message)
141
+ })
142
+ await this.llm.open()
143
+
144
+ /* provide text-to-text punctuation restoration */
145
+ const llm = this.llm!
146
+ const punctuate = async (text: string) => {
147
+ const cfg = this.setup[this.params.lang]
148
+ if (!cfg)
149
+ throw new Error(`unsupported language: ${this.params.lang}`)
150
+ return llm.complete({
151
+ system: cfg.systemPrompt,
152
+ messages: cfg.chat,
153
+ prompt: text
154
+ })
155
+ }
156
+
157
+ /* establish a transform stream for punctuation restoration */
158
+ this.stream = new Stream.Transform({
159
+ readableObjectMode: true,
160
+ writableObjectMode: true,
161
+ decodeStrings: false,
162
+ highWaterMark: 1,
163
+ transform (chunk: SpeechFlowChunk, encoding, callback) {
164
+ if (Buffer.isBuffer(chunk.payload))
165
+ callback(new Error("invalid chunk payload type"))
166
+ else if (chunk.payload === "") {
167
+ this.push(chunk)
168
+ callback()
169
+ }
170
+ else {
171
+ punctuate(chunk.payload).then((payload) => {
172
+ const chunkNew = chunk.clone()
173
+ chunkNew.payload = payload
174
+ this.push(chunkNew)
175
+ callback()
176
+ }).catch((error: unknown) => {
177
+ callback(util.ensureError(error))
178
+ })
179
+ }
180
+ },
181
+ final (callback) {
182
+ callback()
183
+ }
184
+ })
185
+ }
186
+
187
+ /* close node */
188
+ async close () {
189
+ /* shutdown stream */
190
+ if (this.stream !== null) {
191
+ await util.destroyStream(this.stream)
192
+ this.stream = null
193
+ }
194
+
195
+ /* shutdown LLM */
196
+ if (this.llm !== null) {
197
+ await this.llm.close()
198
+ this.llm = null
199
+ }
200
+ }
201
+ }
@@ -5,31 +5,29 @@
5
5
  */
6
6
 
7
7
  /* standard dependencies */
8
- import Stream from "node:stream"
9
-
10
- /* external dependencies */
11
- import OpenAI from "openai"
8
+ import Stream from "node:stream"
12
9
 
13
10
  /* internal dependencies */
14
11
  import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
15
12
  import * as util from "./speechflow-util"
13
+ import { LLM, type LLMCompleteMessage } from "./speechflow-util-llm"
16
14
 
17
15
  /* internal utility types */
18
- type ConfigEntry = { systemPrompt: string, chat: OpenAI.ChatCompletionMessageParam[] }
16
+ type ConfigEntry = { systemPrompt: string, chat: LLMCompleteMessage[] }
19
17
  type Config = { [ key: string ]: ConfigEntry }
20
18
 
21
- /* SpeechFlow node for OpenAI/GPT text-to-text translation */
22
- export default class SpeechFlowNodeT2TOpenAI extends SpeechFlowNode {
19
+ /* SpeechFlow node for LLM-based text-to-text spellchecking */
20
+ export default class SpeechFlowNodeT2TSpellcheck extends SpeechFlowNode {
23
21
  /* declare official node name */
24
- public static name = "t2t-openai"
22
+ public static name = "t2t-spellcheck"
25
23
 
26
24
  /* internal state */
27
- private openai: OpenAI | null = null
25
+ private llm: LLM | null = null
28
26
 
29
27
  /* internal LLM setup */
30
28
  private setup: Config = {
31
- /* English (EN) spellchecking only */
32
- "en-en": {
29
+ /* English (EN) spellchecking */
30
+ "en": {
33
31
  systemPrompt:
34
32
  "You are a proofreader and spellchecker for English.\n" +
35
33
  "Output only the corrected text.\n" +
@@ -59,8 +57,8 @@ export default class SpeechFlowNodeT2TOpenAI extends SpeechFlowNode {
59
57
  ]
60
58
  },
61
59
 
62
- /* German (DE) spellchecking only */
63
- "de-de": {
60
+ /* German (DE) spellchecking */
61
+ "de": {
64
62
  systemPrompt:
65
63
  "Du bist ein Korrekturleser und Rechtschreibprüfer für Deutsch.\n" +
66
64
  "Gib nur den korrigierten Text aus.\n" +
@@ -89,56 +87,6 @@ export default class SpeechFlowNodeT2TOpenAI extends SpeechFlowNode {
89
87
  { role: "user", content: "Das Leben einfach großartig aber ich bin hungrig." },
90
88
  { role: "assistant", content: "Das Leben ist einfach großartig, aber ich bin hungrig." }
91
89
  ]
92
- },
93
-
94
- /* English (EN) to German (DE) translation */
95
- "en-de": {
96
- systemPrompt:
97
- "You are a translator.\n" +
98
- "Output only the requested text.\n" +
99
- "Do not use markdown.\n" +
100
- "Do not chat.\n" +
101
- "Do not show any explanations.\n" +
102
- "Do not show any introduction.\n" +
103
- "Do not show any preamble.\n" +
104
- "Do not show any prolog.\n" +
105
- "Do not show any epilog.\n" +
106
- "Get to the point.\n" +
107
- "Preserve the original meaning, tone, and nuance.\n" +
108
- "Directly translate text from English (EN) to fluent and natural German (DE) language.\n",
109
- chat: [
110
- { role: "user", content: "I love my wife." },
111
- { role: "assistant", content: "Ich liebe meine Frau." },
112
- { role: "user", content: "The weather is wonderful." },
113
- { role: "assistant", content: "Das Wetter ist wunderschön." },
114
- { role: "user", content: "The life is awesome." },
115
- { role: "assistant", content: "Das Leben ist einfach großartig." }
116
- ]
117
- },
118
-
119
- /* German (DE) to English (EN) translation */
120
- "de-en": {
121
- systemPrompt:
122
- "You are a translator.\n" +
123
- "Output only the requested text.\n" +
124
- "Do not use markdown.\n" +
125
- "Do not chat.\n" +
126
- "Do not show any explanations.\n" +
127
- "Do not show any introduction.\n" +
128
- "Do not show any preamble.\n" +
129
- "Do not show any prolog.\n" +
130
- "Do not show any epilog.\n" +
131
- "Get to the point.\n" +
132
- "Preserve the original meaning, tone, and nuance.\n" +
133
- "Directly translate text from German (DE) to fluent and natural English (EN) language.\n",
134
- chat: [
135
- { role: "user", content: "Ich liebe meine Frau." },
136
- { role: "assistant", content: "I love my wife." },
137
- { role: "user", content: "Das Wetter ist wunderschön." },
138
- { role: "assistant", content: "The weather is wonderful." },
139
- { role: "user", content: "Das Leben ist einfach großartig." },
140
- { role: "assistant", content: "The life is awesome." }
141
- ]
142
90
  }
143
91
  }
144
92
 
@@ -148,19 +96,16 @@ export default class SpeechFlowNodeT2TOpenAI extends SpeechFlowNode {
148
96
 
149
97
  /* declare node configuration parameters */
150
98
  this.configure({
151
- src: { type: "string", pos: 0, val: "de", match: /^(?:de|en)$/ },
152
- dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ },
153
- key: { type: "string", val: process.env.SPEECHFLOW_OPENAI_KEY, match: /^.+$/ },
154
- api: { type: "string", val: "https://api.openai.com/v1", match: /^https?:\/\/.+/ },
155
- model: { type: "string", val: "gpt-5-mini", match: /^.+$/ }
99
+ lang: { type: "string", pos: 0, val: "en", match: /^(?:de|en)$/ },
100
+ provider: { type: "string", val: "ollama", match: /^(?:openai|anthropic|google|ollama|transformers)$/ },
101
+ api: { type: "string", val: "http://127.0.0.1:11434", match: /^https?:\/\/.+?(:\d+)?$/ },
102
+ model: { type: "string", val: "gemma3:4b-it-q4_K_M", match: /^.+$/ },
103
+ key: { type: "string", val: "", match: /^.*$/ }
156
104
  })
157
105
 
158
106
  /* tell effective mode */
159
- if (this.params.src === this.params.dst)
160
- this.log("info", `OpenAI: operation mode: spellchecking for language "${this.params.src}"`)
161
- else
162
- this.log("info", `OpenAI: operation mode: translation from language "${this.params.src}"` +
163
- ` to language "${this.params.dst}"`)
107
+ this.log("info", `spellchecking language "${this.params.lang}" ` +
108
+ `via ${this.params.provider} LLM (model: ${this.params.model})`)
164
109
 
165
110
  /* declare node input/output format */
166
111
  this.input = "text"
@@ -169,39 +114,34 @@ export default class SpeechFlowNodeT2TOpenAI extends SpeechFlowNode {
169
114
 
170
115
  /* open node */
171
116
  async open () {
172
- /* validate API key */
173
- if (!this.params.key)
174
- throw new Error("OpenAI API key is required")
175
-
176
- /* instantiate OpenAI API */
177
- this.openai = new OpenAI({
178
- baseURL: this.params.api,
179
- apiKey: this.params.key,
180
- timeout: 30000
117
+ /* instantiate LLM */
118
+ this.llm = new LLM({
119
+ provider: this.params.provider,
120
+ api: this.params.api,
121
+ model: this.params.model,
122
+ key: this.params.key,
123
+ temperature: 0.7,
124
+ topP: 0.5
181
125
  })
182
-
183
- /* provide text-to-text translation */
184
- const translate = async (text: string) => {
185
- const key = `${this.params.src}-${this.params.dst}`
186
- const cfg = this.setup[key]
187
- if (!this.openai)
188
- throw new Error("OpenAI client not available")
189
- const completion = await this.openai.chat.completions.create({
190
- model: this.params.model,
191
- temperature: this.params.model.endsWith("-mini") ? 1.0 : 0.7,
192
- messages: [
193
- { role: "system", content: cfg.systemPrompt },
194
- ...cfg.chat,
195
- { role: "user", content: text }
196
- ]
126
+ this.llm.on("log", (level: string, message: string) => {
127
+ this.log(level as "info" | "warning" | "error", message)
128
+ })
129
+ await this.llm.open()
130
+
131
+ /* provide text-to-text spellchecking */
132
+ const llm = this.llm!
133
+ const spellcheck = async (text: string) => {
134
+ const cfg = this.setup[this.params.lang]
135
+ if (!cfg)
136
+ throw new Error(`unsupported language: ${this.params.lang}`)
137
+ return llm.complete({
138
+ system: cfg.systemPrompt,
139
+ messages: cfg.chat,
140
+ prompt: text
197
141
  })
198
- const content = completion?.choices?.[0]?.message?.content
199
- if (!content)
200
- throw new Error("OpenAI API returned empty content")
201
- return content
202
142
  }
203
143
 
204
- /* establish a duplex stream and connect it to OpenAI */
144
+ /* establish a transform stream and connect it to LLM */
205
145
  this.stream = new Stream.Transform({
206
146
  readableObjectMode: true,
207
147
  writableObjectMode: true,
@@ -215,7 +155,7 @@ export default class SpeechFlowNodeT2TOpenAI extends SpeechFlowNode {
215
155
  callback()
216
156
  }
217
157
  else {
218
- translate(chunk.payload).then((payload) => {
158
+ spellcheck(chunk.payload).then((payload) => {
219
159
  const chunkNew = chunk.clone()
220
160
  chunkNew.payload = payload
221
161
  this.push(chunkNew)
@@ -239,9 +179,10 @@ export default class SpeechFlowNodeT2TOpenAI extends SpeechFlowNode {
239
179
  this.stream = null
240
180
  }
241
181
 
242
- /* shutdown OpenAI */
243
- if (this.openai !== null)
244
- this.openai = null
182
+ /* shutdown LLM */
183
+ if (this.llm !== null) {
184
+ await this.llm.close()
185
+ this.llm = null
186
+ }
245
187
  }
246
188
  }
247
-