speechflow 0.9.8 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/CHANGELOG.md +18 -0
  2. package/LICENSE.txt +674 -0
  3. package/README.md +114 -17
  4. package/dst/speechflow-node-a2a-ffmpeg.js +1 -0
  5. package/dst/speechflow-node-a2a-ffmpeg.js.map +1 -0
  6. package/dst/{speechflow-node-deepl.d.ts → speechflow-node-a2a-meter.d.ts} +2 -2
  7. package/dst/speechflow-node-a2a-meter.js +147 -0
  8. package/dst/speechflow-node-a2a-meter.js.map +1 -0
  9. package/dst/speechflow-node-a2a-mute.d.ts +16 -0
  10. package/dst/speechflow-node-a2a-mute.js +90 -0
  11. package/dst/speechflow-node-a2a-mute.js.map +1 -0
  12. package/dst/{speechflow-node-whisper.d.ts → speechflow-node-a2a-vad.d.ts} +2 -5
  13. package/dst/speechflow-node-a2a-vad.js +272 -0
  14. package/dst/speechflow-node-a2a-vad.js.map +1 -0
  15. package/dst/speechflow-node-a2a-wav.js +1 -0
  16. package/dst/speechflow-node-a2a-wav.js.map +1 -0
  17. package/dst/speechflow-node-a2t-deepgram.js +2 -1
  18. package/dst/speechflow-node-a2t-deepgram.js.map +1 -0
  19. package/dst/speechflow-node-t2a-elevenlabs.js +1 -0
  20. package/dst/speechflow-node-t2a-elevenlabs.js.map +1 -0
  21. package/dst/{speechflow-node-elevenlabs.d.ts → speechflow-node-t2a-kokoro.d.ts} +2 -2
  22. package/dst/speechflow-node-t2a-kokoro.js +148 -0
  23. package/dst/speechflow-node-t2a-kokoro.js.map +1 -0
  24. package/dst/speechflow-node-t2t-deepl.js +1 -0
  25. package/dst/speechflow-node-t2t-deepl.js.map +1 -0
  26. package/dst/speechflow-node-t2t-format.js +1 -0
  27. package/dst/speechflow-node-t2t-format.js.map +1 -0
  28. package/dst/{speechflow-node-gemma.d.ts → speechflow-node-t2t-ollama.d.ts} +1 -1
  29. package/dst/{speechflow-node-gemma.js → speechflow-node-t2t-ollama.js} +41 -8
  30. package/dst/speechflow-node-t2t-ollama.js.map +1 -0
  31. package/dst/{speechflow-node-t2t-gemma.d.ts → speechflow-node-t2t-openai.d.ts} +2 -2
  32. package/dst/{speechflow-node-t2t-gemma.js → speechflow-node-t2t-openai.js} +43 -30
  33. package/dst/speechflow-node-t2t-openai.js.map +1 -0
  34. package/dst/speechflow-node-t2t-subtitle.js +1 -0
  35. package/dst/speechflow-node-t2t-subtitle.js.map +1 -0
  36. package/dst/{speechflow-node-opus.d.ts → speechflow-node-t2t-transformers.d.ts} +3 -1
  37. package/dst/speechflow-node-t2t-transformers.js +264 -0
  38. package/dst/speechflow-node-t2t-transformers.js.map +1 -0
  39. package/dst/speechflow-node-x2x-trace.js +3 -2
  40. package/dst/speechflow-node-x2x-trace.js.map +1 -0
  41. package/dst/speechflow-node-xio-device.js +1 -0
  42. package/dst/speechflow-node-xio-device.js.map +1 -0
  43. package/dst/speechflow-node-xio-file.js +1 -0
  44. package/dst/speechflow-node-xio-file.js.map +1 -0
  45. package/dst/speechflow-node-xio-mqtt.js +1 -0
  46. package/dst/speechflow-node-xio-mqtt.js.map +1 -0
  47. package/dst/speechflow-node-xio-websocket.js +1 -0
  48. package/dst/speechflow-node-xio-websocket.js.map +1 -0
  49. package/dst/speechflow-node.d.ts +3 -0
  50. package/dst/speechflow-node.js +10 -0
  51. package/dst/speechflow-node.js.map +1 -0
  52. package/dst/speechflow-utils.d.ts +33 -0
  53. package/dst/speechflow-utils.js +183 -1
  54. package/dst/speechflow-utils.js.map +1 -0
  55. package/dst/speechflow.js +295 -46
  56. package/dst/speechflow.js.map +1 -0
  57. package/etc/speechflow.yaml +14 -5
  58. package/etc/stx.conf +1 -1
  59. package/etc/tsconfig.json +2 -2
  60. package/package.json +17 -10
  61. package/src/speechflow-node-a2a-meter.ts +125 -0
  62. package/src/speechflow-node-a2a-mute.ts +101 -0
  63. package/src/speechflow-node-a2a-vad.ts +266 -0
  64. package/src/speechflow-node-a2t-deepgram.ts +1 -1
  65. package/src/speechflow-node-t2a-kokoro.ts +160 -0
  66. package/src/{speechflow-node-t2t-gemma.ts → speechflow-node-t2t-ollama.ts} +44 -10
  67. package/src/speechflow-node-t2t-openai.ts +246 -0
  68. package/src/speechflow-node-t2t-transformers.ts +249 -0
  69. package/src/speechflow-node-x2x-trace.ts +2 -2
  70. package/src/speechflow-node-xio-websocket.ts +5 -5
  71. package/src/speechflow-node.ts +12 -0
  72. package/src/speechflow-utils.ts +195 -0
  73. package/src/speechflow.ts +279 -46
  74. package/dst/speechflow-node-deepgram.d.ts +0 -12
  75. package/dst/speechflow-node-deepgram.js +0 -220
  76. package/dst/speechflow-node-deepl.js +0 -128
  77. package/dst/speechflow-node-device.d.ts +0 -13
  78. package/dst/speechflow-node-device.js +0 -205
  79. package/dst/speechflow-node-elevenlabs.js +0 -182
  80. package/dst/speechflow-node-ffmpeg.d.ts +0 -13
  81. package/dst/speechflow-node-ffmpeg.js +0 -152
  82. package/dst/speechflow-node-file.d.ts +0 -11
  83. package/dst/speechflow-node-file.js +0 -176
  84. package/dst/speechflow-node-format.d.ts +0 -11
  85. package/dst/speechflow-node-format.js +0 -80
  86. package/dst/speechflow-node-mqtt.d.ts +0 -13
  87. package/dst/speechflow-node-mqtt.js +0 -181
  88. package/dst/speechflow-node-opus.js +0 -135
  89. package/dst/speechflow-node-subtitle.d.ts +0 -12
  90. package/dst/speechflow-node-subtitle.js +0 -96
  91. package/dst/speechflow-node-t2t-opus.d.ts +0 -12
  92. package/dst/speechflow-node-t2t-opus.js +0 -135
  93. package/dst/speechflow-node-trace.d.ts +0 -11
  94. package/dst/speechflow-node-trace.js +0 -88
  95. package/dst/speechflow-node-wav.d.ts +0 -11
  96. package/dst/speechflow-node-wav.js +0 -170
  97. package/dst/speechflow-node-websocket.d.ts +0 -13
  98. package/dst/speechflow-node-websocket.js +0 -275
  99. package/dst/speechflow-node-whisper-common.d.ts +0 -34
  100. package/dst/speechflow-node-whisper-common.js +0 -7
  101. package/dst/speechflow-node-whisper-ggml.d.ts +0 -1
  102. package/dst/speechflow-node-whisper-ggml.js +0 -97
  103. package/dst/speechflow-node-whisper-onnx.d.ts +0 -1
  104. package/dst/speechflow-node-whisper-onnx.js +0 -131
  105. package/dst/speechflow-node-whisper-worker-ggml.d.ts +0 -1
  106. package/dst/speechflow-node-whisper-worker-ggml.js +0 -97
  107. package/dst/speechflow-node-whisper-worker-onnx.d.ts +0 -1
  108. package/dst/speechflow-node-whisper-worker-onnx.js +0 -131
  109. package/dst/speechflow-node-whisper-worker.d.ts +0 -1
  110. package/dst/speechflow-node-whisper-worker.js +0 -116
  111. package/dst/speechflow-node-whisper-worker2.d.ts +0 -1
  112. package/dst/speechflow-node-whisper-worker2.js +0 -82
  113. package/dst/speechflow-node-whisper.js +0 -604
  114. package/src/speechflow-node-t2t-opus.ts +0 -111
@@ -0,0 +1,160 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import Stream from "node:stream"
9
+
10
+ /* external dependencies */
11
+ import { KokoroTTS } from "kokoro-js"
12
+ import SpeexResampler from "speex-resampler"
13
+
14
+ /* internal dependencies */
15
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
16
+
17
+ /* SpeechFlow node for Kokoro text-to-speech conversion */
18
+ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
19
+ /* declare official node name */
20
+ public static name = "kokoro"
21
+
22
+ /* internal state */
23
+ private kokoro: KokoroTTS | null = null
24
+ private static speexInitialized = false
25
+
26
+ /* construct node */
27
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
28
+ super(id, cfg, opts, args)
29
+
30
+ /* declare node configuration parameters */
31
+ this.configure({
32
+ voice: { type: "string", val: "Aoede", pos: 0, match: /^(?:Aoede|Heart|Puck|Fenrir)$/ },
33
+ language: { type: "string", val: "en", pos: 1, match: /^(?:en)$/ },
34
+ speed: { type: "number", val: 1.25, pos: 2, match: (n: number) => n >= 1.0 && n <= 1.30 },
35
+ })
36
+
37
+ /* declare node input/output format */
38
+ this.input = "text"
39
+ this.output = "audio"
40
+ }
41
+
42
+ /* open node */
43
+ async open () {
44
+ /* establish Kokoro */
45
+ const model = "onnx-community/Kokoro-82M-v1.0-ONNX"
46
+ const progressState = new Map<string, number>()
47
+ const progressCallback = (progress: any) => {
48
+ let artifact = model
49
+ if (typeof progress.file === "string")
50
+ artifact += `:${progress.file}`
51
+ let percent = 0
52
+ if (typeof progress.loaded === "number" && typeof progress.total === "number")
53
+ percent = (progress.loaded as number / progress.total as number) * 100
54
+ else if (typeof progress.progress === "number")
55
+ percent = progress.progress
56
+ if (percent > 0)
57
+ progressState.set(artifact, percent)
58
+ }
59
+ const interval = setInterval(() => {
60
+ for (const [ artifact, percent ] of progressState) {
61
+ this.log("info", `downloaded ${percent.toFixed(2)}% of artifact "${artifact}"`)
62
+ if (percent >= 1.0)
63
+ progressState.delete(artifact)
64
+ }
65
+ }, 1000)
66
+ this.kokoro = await KokoroTTS.from_pretrained(model, {
67
+ dtype: "q4f16",
68
+ progress_callback: progressCallback
69
+ })
70
+ clearInterval(interval)
71
+ if (this.kokoro === null)
72
+ throw new Error("failed to instantiate Kokoro")
73
+
74
+ /* establish resampler from Kokoro's maximum 24Khz
75
+ output to our standard audio sample rate (48KHz) */
76
+ if (!SpeechFlowNodeKokoro.speexInitialized) {
77
+ /* at least once initialize resampler */
78
+ await SpeexResampler.initPromise
79
+ SpeechFlowNodeKokoro.speexInitialized = true
80
+ }
81
+ const resampler = new SpeexResampler(1, 24000, this.config.audioSampleRate, 7)
82
+
83
+ /* determine voice for text-to-speech operation */
84
+ const voices = {
85
+ "Aoede": "af_aoede",
86
+ "Heart": "af_heart",
87
+ "Puck": "am_puck",
88
+ "Fenrir": "am_fenrir"
89
+ }
90
+ const voice = ((voices as any)[this.params.voice]) as string | undefined
91
+ if (voice === undefined)
92
+ throw new Error(`invalid Kokoro voice "${this.params.voice}"`)
93
+
94
+ /* perform text-to-speech operation with Elevenlabs API */
95
+ const text2speech = async (text: string) => {
96
+ this.log("info", `Kokoro: input: "${text}"`)
97
+ const audio = await this.kokoro!.generate(text, {
98
+ speed: this.params.speed,
99
+ voice: voice as any
100
+ })
101
+ if (audio.sampling_rate !== 24000)
102
+ throw new Error("expected 24KHz sampling rate in Kokoro output")
103
+
104
+ /* convert audio samples from PCM/F32/24Khz to PCM/I16/24KHz */
105
+ const samples = audio.audio
106
+ const buffer1 = Buffer.alloc(samples.length * 2)
107
+ for (let i = 0; i < samples.length; i++) {
108
+ const sample = Math.max(-1, Math.min(1, samples[i]))
109
+ buffer1.writeInt16LE(sample * 0x7FFF, i * 2)
110
+ }
111
+
112
+ /* resample audio samples from PCM/I16/24Khz to PCM/I16/48KHz */
113
+ const buffer2 = resampler.processChunk(buffer1)
114
+
115
+ return buffer2
116
+ }
117
+
118
+ /* create transform stream and connect it to the Kokoro API */
119
+ const log = (level: string, msg: string) => { this.log(level, msg) }
120
+ this.stream = new Stream.Transform({
121
+ writableObjectMode: true,
122
+ readableObjectMode: true,
123
+ decodeStrings: false,
124
+ transform (chunk: SpeechFlowChunk, encoding, callback) {
125
+ if (Buffer.isBuffer(chunk.payload))
126
+ callback(new Error("invalid chunk payload type"))
127
+ else {
128
+ text2speech(chunk.payload).then((buffer) => {
129
+ log("info", `Kokoro: received audio (buffer length: ${buffer.byteLength})`)
130
+ chunk = chunk.clone()
131
+ chunk.type = "audio"
132
+ chunk.payload = buffer
133
+ this.push(chunk)
134
+ callback()
135
+ }).catch((err) => {
136
+ callback(err)
137
+ })
138
+ }
139
+ },
140
+ final (callback) {
141
+ this.push(null)
142
+ callback()
143
+ }
144
+ })
145
+ }
146
+
147
+ /* close node */
148
+ async close () {
149
+ /* destroy stream */
150
+ if (this.stream !== null) {
151
+ this.stream.destroy()
152
+ this.stream = null
153
+ }
154
+
155
+ /* destroy Kokoro API */
156
+ if (this.kokoro !== null)
157
+ this.kokoro = null
158
+ }
159
+ }
160
+
@@ -17,10 +17,10 @@ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
17
17
  type ConfigEntry = { systemPrompt: string, chat: Array<{ role: string, content: string }> }
18
18
  type Config = { [ key: string ]: ConfigEntry }
19
19
 
20
- /* SpeechFlow node for Gemma/Ollama text-to-text translation */
21
- export default class SpeechFlowNodeGemma extends SpeechFlowNode {
20
+ /* SpeechFlow node for Ollama text-to-text translation */
21
+ export default class SpeechFlowNodeOllama extends SpeechFlowNode {
22
22
  /* declare official node name */
23
- public static name = "gemma"
23
+ public static name = "ollama"
24
24
 
25
25
  /* internal state */
26
26
  private ollama: Ollama | null = null
@@ -103,7 +103,8 @@ export default class SpeechFlowNodeGemma extends SpeechFlowNode {
103
103
  "Do not show any prolog.\n" +
104
104
  "Do not show any epilog.\n" +
105
105
  "Get to the point.\n" +
106
- "Directly translate text from Enlish (EN) to German (DE) language.\n",
106
+ "Preserve the original meaning, tone, and nuance.\n" +
107
+ "Directly translate text from English (EN) to fluent and natural German (DE) language.\n",
107
108
  chat: [
108
109
  { role: "user", content: "I love my wife." },
109
110
  { role: "system", content: "Ich liebe meine Frau." },
@@ -121,13 +122,14 @@ export default class SpeechFlowNodeGemma extends SpeechFlowNode {
121
122
  "Output only the requested text.\n" +
122
123
  "Do not use markdown.\n" +
123
124
  "Do not chat.\n" +
124
- "Do not show any explanations. \n" +
125
+ "Do not show any explanations.\n" +
125
126
  "Do not show any introduction.\n" +
126
127
  "Do not show any preamble. \n" +
127
128
  "Do not show any prolog. \n" +
128
129
  "Do not show any epilog. \n" +
129
130
  "Get to the point.\n" +
130
- "Directly translate text from German (DE) to English (EN) language.\n",
131
+ "Preserve the original meaning, tone, and nuance.\n" +
132
+ "Directly translate text from German (DE) to fluent and natural English (EN) language.\n",
131
133
  chat: [
132
134
  { role: "user", content: "Ich liebe meine Frau." },
133
135
  { role: "system", content: "I love my wife." },
@@ -145,11 +147,19 @@ export default class SpeechFlowNodeGemma extends SpeechFlowNode {
145
147
 
146
148
  /* declare node configuration parameters */
147
149
  this.configure({
148
- api: { type: "string", val: "http://127.0.0.1:11434", match: /^https?:\/\/.+?:\d+$/ },
149
- src: { type: "string", pos: 0, val: "de", match: /^(?:de|en)$/ },
150
- dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ }
150
+ api: { type: "string", val: "http://127.0.0.1:11434", match: /^https?:\/\/.+?:\d+$/ },
151
+ model: { type: "string", val: "gemma3:4b-it-q4_K_M", match: /^.+$/ },
152
+ src: { type: "string", pos: 0, val: "de", match: /^(?:de|en)$/ },
153
+ dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ }
151
154
  })
152
155
 
156
+ /* tell effective mode */
157
+ if (this.params.src === this.params.dst)
158
+ this.log("info", `Ollama: operation mode: spellchecking for language "${this.params.src}"`)
159
+ else
160
+ this.log("info", `Ollama: operation mode: translation from language "${this.params.src}"` +
161
+ ` to language "${this.params.dst}"`)
162
+
153
163
  /* declare node input/output format */
154
164
  this.input = "text"
155
165
  this.output = "text"
@@ -160,12 +170,36 @@ export default class SpeechFlowNodeGemma extends SpeechFlowNode {
160
170
  /* instantiate Ollama API */
161
171
  this.ollama = new Ollama({ host: this.params.api })
162
172
 
173
+ /* ensure the model is available */
174
+ const model = this.params.model
175
+ const models = await this.ollama.list()
176
+ const exists = models.models.some((m) => m.name === model)
177
+ if (!exists) {
178
+ this.log("info", `Ollama: model "${model}" still not present in Ollama -- ` +
179
+ "automatically downloading model")
180
+ let artifact = ""
181
+ let percent = 0
182
+ const interval = setInterval(() => {
183
+ this.log("info", `downloaded ${percent.toFixed(2)}% of artifact "${artifact}"`)
184
+ }, 1000)
185
+ const progress = await this.ollama.pull({ model, stream: true })
186
+ for await (const event of progress) {
187
+ if (event.digest)
188
+ artifact = event.digest
189
+ if (event.completed && event.total)
190
+ percent = (event.completed / event.total) * 100
191
+ }
192
+ clearInterval(interval)
193
+ }
194
+ else
195
+ this.log("info", `Ollama: model "${model}" already present in Ollama`)
196
+
163
197
  /* provide text-to-text translation */
164
198
  const translate = async (text: string) => {
165
199
  const key = `${this.params.src}-${this.params.dst}`
166
200
  const cfg = this.setup[key]
167
201
  const response = await this.ollama!.chat({
168
- model: "gemma3:4b-it-q4_K_M",
202
+ model,
169
203
  messages: [
170
204
  { role: "system", content: cfg.systemPrompt },
171
205
  ...cfg.chat,
@@ -0,0 +1,246 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import Stream from "node:stream"
9
+
10
+ /* external dependencies */
11
+ import OpenAI from "openai"
12
+
13
+ /* internal dependencies */
14
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
15
+
16
+ /* internal utility types */
17
+ type ConfigEntry = { systemPrompt: string, chat: OpenAI.ChatCompletionMessageParam[] }
18
+ type Config = { [ key: string ]: ConfigEntry }
19
+
20
+ /* SpeechFlow node for OpenAI/GPT text-to-text translation */
21
+ export default class SpeechFlowNodeOpenAI extends SpeechFlowNode {
22
+ /* declare official node name */
23
+ public static name = "openai"
24
+
25
+ /* internal state */
26
+ private openai: OpenAI | null = null
27
+
28
+ /* internal LLM setup */
29
+ private setup: Config = {
30
+ /* English (EN) spellchecking only */
31
+ "en-en": {
32
+ systemPrompt:
33
+ "You are a proofreader and spellchecker for English.\n" +
34
+ "Output only the corrected text.\n" +
35
+ "Do NOT use markdown.\n" +
36
+ "Do NOT give any explanations.\n" +
37
+ "Do NOT give any introduction.\n" +
38
+ "Do NOT give any comments.\n" +
39
+ "Do NOT give any preamble.\n" +
40
+ "Do NOT give any prolog.\n" +
41
+ "Do NOT give any epilog.\n" +
42
+ "Do NOT change the gammar.\n" +
43
+ "Do NOT use synonyms for words.\n" +
44
+ "Keep all words.\n" +
45
+ "Fill in missing commas.\n" +
46
+ "Fill in missing points.\n" +
47
+ "Fill in missing question marks.\n" +
48
+ "Fill in missing hyphens.\n" +
49
+ "Focus ONLY on the word spelling.\n" +
50
+ "The text you have to correct is:\n",
51
+ chat: [
52
+ { role: "user", content: "I luve my wyfe" },
53
+ { role: "system", content: "I love my wife." },
54
+ { role: "user", content: "The weether is wunderfull!" },
55
+ { role: "system", content: "The weather is wonderful!" },
56
+ { role: "user", content: "The live awesome but I'm hungry." },
57
+ { role: "system", content: "The live is awesome, but I'm hungry." }
58
+ ]
59
+ },
60
+
61
+ /* German (DE) spellchecking only */
62
+ "de-de": {
63
+ systemPrompt:
64
+ "Du bist ein Korrekturleser und Rechtschreibprüfer für Deutsch.\n" +
65
+ "Gib nur den korrigierten Text aus.\n" +
66
+ "Benutze KEIN Markdown.\n" +
67
+ "Gib KEINE Erklärungen.\n" +
68
+ "Gib KEINE Einleitung.\n" +
69
+ "Gib KEINE Kommentare.\n" +
70
+ "Gib KEINE Preamble.\n" +
71
+ "Gib KEINEN Prolog.\n" +
72
+ "Gib KEINEN Epilog.\n" +
73
+ "Ändere NICHT die Grammatik.\n" +
74
+ "Verwende KEINE Synonyme für Wörter.\n" +
75
+ "Behalte alle Wörter bei.\n" +
76
+ "Füge fehlende Kommas ein.\n" +
77
+ "Füge fehlende Punkte ein.\n" +
78
+ "Füge fehlende Fragezeichen ein.\n" +
79
+ "Füge fehlende Bindestriche ein.\n" +
80
+ "Füge fehlende Gedankenstriche ein.\n" +
81
+ "Fokussiere dich NUR auf die Rechtschreibung der Wörter.\n" +
82
+ "Der von dir zu korrigierende Text ist:\n",
83
+ chat: [
84
+ { role: "user", content: "Ich ljebe meine Frao" },
85
+ { role: "system", content: "Ich liebe meine Frau." },
86
+ { role: "user", content: "Die Wedter ist wunderschoen." },
87
+ { role: "system", content: "Das Wetter ist wunderschön." },
88
+ { role: "user", content: "Das Leben einfach großartig aber ich bin hungrig." },
89
+ { role: "system", content: "Das Leben ist einfach großartig, aber ich bin hungrig." }
90
+ ]
91
+ },
92
+
93
+ /* English (EN) to German (DE) translation */
94
+ "en-de": {
95
+ systemPrompt:
96
+ "You are a translator.\n" +
97
+ "Output only the requested text.\n" +
98
+ "Do not use markdown.\n" +
99
+ "Do not chat.\n" +
100
+ "Do not show any explanations.\n" +
101
+ "Do not show any introduction.\n" +
102
+ "Do not show any preamble.\n" +
103
+ "Do not show any prolog.\n" +
104
+ "Do not show any epilog.\n" +
105
+ "Get to the point.\n" +
106
+ "Preserve the original meaning, tone, and nuance.\n" +
107
+ "Directly translate text from English (EN) to fluent and natural German (DE) language.\n",
108
+ chat: [
109
+ { role: "user", content: "I love my wife." },
110
+ { role: "system", content: "Ich liebe meine Frau." },
111
+ { role: "user", content: "The weather is wonderful." },
112
+ { role: "system", content: "Das Wetter ist wunderschön." },
113
+ { role: "user", content: "The live is awesome." },
114
+ { role: "system", content: "Das Leben ist einfach großartig." }
115
+ ]
116
+ },
117
+
118
+ /* German (DE) to English (EN) translation */
119
+ "de-en": {
120
+ systemPrompt:
121
+ "You are a translator.\n" +
122
+ "Output only the requested text.\n" +
123
+ "Do not use markdown.\n" +
124
+ "Do not chat.\n" +
125
+ "Do not show any explanations. \n" +
126
+ "Do not show any introduction.\n" +
127
+ "Do not show any preamble. \n" +
128
+ "Do not show any prolog. \n" +
129
+ "Do not show any epilog. \n" +
130
+ "Get to the point.\n" +
131
+ "Preserve the original meaning, tone, and nuance.\n" +
132
+ "Directly translate text from German (DE) to fluent and natural English (EN) language.\n",
133
+ chat: [
134
+ { role: "user", content: "Ich liebe meine Frau." },
135
+ { role: "system", content: "I love my wife." },
136
+ { role: "user", content: "Das Wetter ist wunderschön." },
137
+ { role: "system", content: "The weather is wonderful." },
138
+ { role: "user", content: "Das Leben ist einfach großartig." },
139
+ { role: "system", content: "The live is awesome." }
140
+ ]
141
+ }
142
+ }
143
+
144
+ /* construct node */
145
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
146
+ super(id, cfg, opts, args)
147
+
148
+ /* declare node configuration parameters */
149
+ this.configure({
150
+ src: { type: "string", pos: 0, val: "de", match: /^(?:de|en)$/ },
151
+ dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ },
152
+ key: { type: "string", val: process.env.SPEECHFLOW_KEY_OPENAI },
153
+ api: { type: "string", val: "https://api.openai.com/v1", match: /^https?:\/\/.+?:\d+$/ },
154
+ model: { type: "string", val: "gpt-4o-mini" }
155
+ })
156
+
157
+ /* tell effective mode */
158
+ if (this.params.src === this.params.dst)
159
+ this.log("info", `OpenAI: operation mode: spellchecking for language "${this.params.src}"`)
160
+ else
161
+ this.log("info", `OpenAI: operation mode: translation from language "${this.params.src}"` +
162
+ ` to language "${this.params.dst}"`)
163
+
164
+ /* declare node input/output format */
165
+ this.input = "text"
166
+ this.output = "text"
167
+ }
168
+
169
+ /* open node */
170
+ async open () {
171
+ /* instantiate OpenAI API */
172
+ this.openai = new OpenAI({
173
+ baseURL: this.params.api,
174
+ apiKey: this.params.key,
175
+ dangerouslyAllowBrowser: true
176
+ })
177
+
178
+ /* provide text-to-text translation */
179
+ const translate = async (text: string) => {
180
+ const key = `${this.params.src}-${this.params.dst}`
181
+ const cfg = this.setup[key]
182
+ const stream = this.openai!.chat.completions.stream({
183
+ stream: true,
184
+ model: this.params.model,
185
+ seed: null,
186
+ temperature: 0.7,
187
+ n: 1,
188
+ messages: [
189
+ { role: "system", content: cfg.systemPrompt },
190
+ ...cfg.chat,
191
+ { role: "user", content: text }
192
+ ]
193
+ })
194
+ const completion = await stream.finalChatCompletion()
195
+ const translation = completion.choices[0].message.content!
196
+ if (!stream.ended)
197
+ stream.abort()
198
+ return translation
199
+ }
200
+
201
+ /* establish a duplex stream and connect it to OpenAI */
202
+ this.stream = new Stream.Transform({
203
+ readableObjectMode: true,
204
+ writableObjectMode: true,
205
+ decodeStrings: false,
206
+ transform (chunk: SpeechFlowChunk, encoding, callback) {
207
+ if (Buffer.isBuffer(chunk.payload))
208
+ callback(new Error("invalid chunk payload type"))
209
+ else {
210
+ if (chunk.payload === "") {
211
+ this.push(chunk)
212
+ callback()
213
+ }
214
+ else {
215
+ translate(chunk.payload).then((payload) => {
216
+ const chunkNew = chunk.clone()
217
+ chunkNew.payload = payload
218
+ this.push(chunkNew)
219
+ callback()
220
+ }).catch((err) => {
221
+ callback(err)
222
+ })
223
+ }
224
+ }
225
+ },
226
+ final (callback) {
227
+ this.push(null)
228
+ callback()
229
+ }
230
+ })
231
+ }
232
+
233
+ /* close node */
234
+ async close () {
235
+ /* close stream */
236
+ if (this.stream !== null) {
237
+ this.stream.destroy()
238
+ this.stream = null
239
+ }
240
+
241
+ /* shutdown OpenAI */
242
+ if (this.openai !== null)
243
+ this.openai = null
244
+ }
245
+ }
246
+