speechflow 0.9.4 → 0.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/CHANGELOG.md +19 -0
  2. package/README.md +227 -54
  3. package/dst/speechflow-node-a2a-ffmpeg.d.ts +13 -0
  4. package/dst/speechflow-node-a2a-ffmpeg.js +152 -0
  5. package/dst/speechflow-node-a2a-wav.d.ts +11 -0
  6. package/dst/speechflow-node-a2a-wav.js +170 -0
  7. package/dst/speechflow-node-a2t-deepgram.d.ts +12 -0
  8. package/dst/speechflow-node-a2t-deepgram.js +220 -0
  9. package/dst/speechflow-node-deepgram.d.ts +3 -1
  10. package/dst/speechflow-node-deepgram.js +86 -22
  11. package/dst/speechflow-node-deepl.d.ts +3 -1
  12. package/dst/speechflow-node-deepl.js +25 -20
  13. package/dst/speechflow-node-device.d.ts +3 -1
  14. package/dst/speechflow-node-device.js +53 -2
  15. package/dst/speechflow-node-elevenlabs.d.ts +4 -1
  16. package/dst/speechflow-node-elevenlabs.js +88 -49
  17. package/dst/speechflow-node-ffmpeg.d.ts +3 -1
  18. package/dst/speechflow-node-ffmpeg.js +42 -4
  19. package/dst/speechflow-node-file.d.ts +3 -1
  20. package/dst/speechflow-node-file.js +84 -13
  21. package/dst/speechflow-node-format.d.ts +11 -0
  22. package/dst/speechflow-node-format.js +80 -0
  23. package/dst/speechflow-node-gemma.d.ts +3 -1
  24. package/dst/speechflow-node-gemma.js +84 -23
  25. package/dst/speechflow-node-mqtt.d.ts +13 -0
  26. package/dst/speechflow-node-mqtt.js +181 -0
  27. package/dst/speechflow-node-opus.d.ts +12 -0
  28. package/dst/speechflow-node-opus.js +135 -0
  29. package/dst/speechflow-node-subtitle.d.ts +12 -0
  30. package/dst/speechflow-node-subtitle.js +96 -0
  31. package/dst/speechflow-node-t2a-elevenlabs.d.ts +13 -0
  32. package/dst/speechflow-node-t2a-elevenlabs.js +182 -0
  33. package/dst/speechflow-node-t2t-deepl.d.ts +12 -0
  34. package/dst/speechflow-node-t2t-deepl.js +133 -0
  35. package/dst/speechflow-node-t2t-format.d.ts +11 -0
  36. package/dst/speechflow-node-t2t-format.js +80 -0
  37. package/dst/speechflow-node-t2t-gemma.d.ts +13 -0
  38. package/dst/speechflow-node-t2t-gemma.js +213 -0
  39. package/dst/speechflow-node-t2t-opus.d.ts +12 -0
  40. package/dst/speechflow-node-t2t-opus.js +135 -0
  41. package/dst/speechflow-node-t2t-subtitle.d.ts +12 -0
  42. package/dst/speechflow-node-t2t-subtitle.js +96 -0
  43. package/dst/speechflow-node-trace.d.ts +11 -0
  44. package/dst/speechflow-node-trace.js +88 -0
  45. package/dst/speechflow-node-wav.d.ts +11 -0
  46. package/dst/speechflow-node-wav.js +170 -0
  47. package/dst/speechflow-node-websocket.d.ts +3 -1
  48. package/dst/speechflow-node-websocket.js +149 -49
  49. package/dst/speechflow-node-whisper-common.d.ts +34 -0
  50. package/dst/speechflow-node-whisper-common.js +7 -0
  51. package/dst/speechflow-node-whisper-ggml.d.ts +1 -0
  52. package/dst/speechflow-node-whisper-ggml.js +97 -0
  53. package/dst/speechflow-node-whisper-onnx.d.ts +1 -0
  54. package/dst/speechflow-node-whisper-onnx.js +131 -0
  55. package/dst/speechflow-node-whisper-worker-ggml.d.ts +1 -0
  56. package/dst/speechflow-node-whisper-worker-ggml.js +97 -0
  57. package/dst/speechflow-node-whisper-worker-onnx.d.ts +1 -0
  58. package/dst/speechflow-node-whisper-worker-onnx.js +131 -0
  59. package/dst/speechflow-node-whisper-worker.d.ts +1 -0
  60. package/dst/speechflow-node-whisper-worker.js +116 -0
  61. package/dst/speechflow-node-whisper-worker2.d.ts +1 -0
  62. package/dst/speechflow-node-whisper-worker2.js +82 -0
  63. package/dst/speechflow-node-whisper.d.ts +19 -0
  64. package/dst/speechflow-node-whisper.js +604 -0
  65. package/dst/speechflow-node-x2x-trace.d.ts +11 -0
  66. package/dst/speechflow-node-x2x-trace.js +88 -0
  67. package/dst/speechflow-node-xio-device.d.ts +13 -0
  68. package/dst/speechflow-node-xio-device.js +205 -0
  69. package/dst/speechflow-node-xio-file.d.ts +11 -0
  70. package/dst/speechflow-node-xio-file.js +176 -0
  71. package/dst/speechflow-node-xio-mqtt.d.ts +13 -0
  72. package/dst/speechflow-node-xio-mqtt.js +181 -0
  73. package/dst/speechflow-node-xio-websocket.d.ts +13 -0
  74. package/dst/speechflow-node-xio-websocket.js +275 -0
  75. package/dst/speechflow-node.d.ts +25 -7
  76. package/dst/speechflow-node.js +74 -9
  77. package/dst/speechflow-utils.d.ts +23 -0
  78. package/dst/speechflow-utils.js +194 -0
  79. package/dst/speechflow.js +146 -43
  80. package/etc/biome.jsonc +12 -4
  81. package/etc/stx.conf +65 -0
  82. package/package.d/@ericedouard+vad-node-realtime+0.2.0.patch +18 -0
  83. package/package.json +49 -31
  84. package/sample.yaml +61 -23
  85. package/src/lib.d.ts +6 -1
  86. package/src/{speechflow-node-ffmpeg.ts → speechflow-node-a2a-ffmpeg.ts} +10 -4
  87. package/src/speechflow-node-a2a-wav.ts +143 -0
  88. package/src/speechflow-node-a2t-deepgram.ts +199 -0
  89. package/src/speechflow-node-t2a-elevenlabs.ts +160 -0
  90. package/src/{speechflow-node-deepl.ts → speechflow-node-t2t-deepl.ts} +36 -25
  91. package/src/speechflow-node-t2t-format.ts +85 -0
  92. package/src/{speechflow-node-gemma.ts → speechflow-node-t2t-gemma.ts} +89 -25
  93. package/src/speechflow-node-t2t-opus.ts +111 -0
  94. package/src/speechflow-node-t2t-subtitle.ts +101 -0
  95. package/src/speechflow-node-x2x-trace.ts +92 -0
  96. package/src/{speechflow-node-device.ts → speechflow-node-xio-device.ts} +25 -3
  97. package/src/speechflow-node-xio-file.ts +153 -0
  98. package/src/speechflow-node-xio-mqtt.ts +154 -0
  99. package/src/speechflow-node-xio-websocket.ts +248 -0
  100. package/src/speechflow-node.ts +78 -13
  101. package/src/speechflow-utils.ts +212 -0
  102. package/src/speechflow.ts +150 -43
  103. package/etc/nps.yaml +0 -40
  104. package/src/speechflow-node-deepgram.ts +0 -133
  105. package/src/speechflow-node-elevenlabs.ts +0 -116
  106. package/src/speechflow-node-file.ts +0 -108
  107. package/src/speechflow-node-websocket.ts +0 -179
@@ -0,0 +1,160 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import Stream from "node:stream"
9
+
10
+ /* external dependencies */
11
+ import * as ElevenLabs from "@elevenlabs/elevenlabs-js"
12
+ import { getStreamAsBuffer } from "get-stream"
13
+ import SpeexResampler from "speex-resampler"
14
+
15
+ /* internal dependencies */
16
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
17
+
18
+ /* SpeechFlow node for Elevenlabs text-to-speech conversion */
19
+ export default class SpeechFlowNodeElevenlabs extends SpeechFlowNode {
20
+ /* declare official node name */
21
+ public static name = "elevenlabs"
22
+
23
+ /* internal state */
24
+ private elevenlabs: ElevenLabs.ElevenLabsClient | null = null
25
+ private static speexInitialized = false
26
+
27
+ /* construct node */
28
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
29
+ super(id, cfg, opts, args)
30
+
31
+ /* declare node configuration parameters */
32
+ this.configure({
33
+ key: { type: "string", val: process.env.SPEECHFLOW_KEY_ELEVENLABS },
34
+ voice: { type: "string", val: "Brian", pos: 0, match: /^(?:.+)$/ },
35
+ language: { type: "string", val: "en", pos: 1, match: /^(?:de|en)$/ },
36
+ speed: { type: "number", val: 1.05, pos: 2, match: (n: number) => n >= 0.7 && n <= 1.2 },
37
+ optimize: { type: "string", val: "latency", pos: 3, match: /^(?:latency|quality)$/ }
38
+ })
39
+
40
+ /* declare node input/output format */
41
+ this.input = "text"
42
+ this.output = "audio"
43
+ }
44
+
45
+ /* open node */
46
+ async open () {
47
+ /* establish ElevenLabs API connection */
48
+ this.elevenlabs = new ElevenLabs.ElevenLabsClient({
49
+ apiKey: this.params.key
50
+ })
51
+
52
+ /* determine maximum sample rate of ElevenLabs tier */
53
+ const maxSampleRates = {
54
+ "free": 16000,
55
+ "starter": 22050,
56
+ "creator": 24000,
57
+ "independent_publisher": 44100,
58
+ "growing_business": 44100,
59
+ "enterprise": 44100
60
+ }
61
+ const sub = await this.elevenlabs.user.subscription.get()
62
+ const tier = (sub.tier ?? "free") as keyof typeof maxSampleRates
63
+ this.log("info", `determined ElevenLabs tier: "${tier}"`)
64
+ let maxSampleRate = 16000
65
+ if (maxSampleRates[tier] !== undefined)
66
+ maxSampleRate = maxSampleRates[tier]
67
+ this.log("info", `determined maximum audio sample rate: ${maxSampleRate}`)
68
+
69
+ /* determine voice for text-to-speech operation
70
+ (for details see https://elevenlabs.io/text-to-speech) */
71
+ const voices = await this.elevenlabs.voices.getAll()
72
+ let voice = voices.voices.find((voice) => voice.name === this.params.voice)
73
+ if (voice === undefined) {
74
+ voice = voices.voices.find((voice) => voice.name!.startsWith(this.params.voice))
75
+ if (voice === undefined)
76
+ throw new Error(`invalid ElevenLabs voice "${this.params.voice}"`)
77
+ }
78
+ const info = Object.keys(voice.labels ?? {}).length > 0 ?
79
+ (", " + Object.entries(voice.labels!)
80
+ .map(([ key, val ]) => `${key}: "${val}"`).join(", ")) : ""
81
+ this.log("info", `selected voice: name: "${voice.name}"${info}`)
82
+
83
+ /* perform text-to-speech operation with Elevenlabs API */
84
+ const model = this.params.optimize === "quality" ?
85
+ "eleven_multilingual_v2" :
86
+ "eleven_flash_v2_5"
87
+ const speechStream = (text: string) => {
88
+ this.log("info", `ElevenLabs: send text "${text}"`)
89
+ return this.elevenlabs!.textToSpeech.convert(voice.voiceId, {
90
+ text,
91
+ modelId: model,
92
+ languageCode: this.params.language,
93
+ outputFormat: `pcm_${maxSampleRate}` as ElevenLabs.ElevenLabs.OutputFormat,
94
+ seed: 815, /* arbitrary, but fixated by us */
95
+ voiceSettings: {
96
+ speed: this.params.speed
97
+ }
98
+ }, {
99
+ timeoutInSeconds: 30,
100
+ maxRetries: 10
101
+ })
102
+ }
103
+
104
+ /* establish resampler from ElevenLabs's maximum 24Khz
105
+ output to our standard audio sample rate (48KHz) */
106
+ if (!SpeechFlowNodeElevenlabs.speexInitialized) {
107
+ /* at least once initialize resampler */
108
+ await SpeexResampler.initPromise
109
+ SpeechFlowNodeElevenlabs.speexInitialized = true
110
+ }
111
+ const resampler = new SpeexResampler(1, maxSampleRate, this.config.audioSampleRate, 7)
112
+
113
+ /* create transform stream and connect it to the ElevenLabs API */
114
+ const log = (level: string, msg: string) => { this.log(level, msg) }
115
+ this.stream = new Stream.Transform({
116
+ writableObjectMode: true,
117
+ readableObjectMode: true,
118
+ decodeStrings: false,
119
+ transform (chunk: SpeechFlowChunk, encoding, callback) {
120
+ if (Buffer.isBuffer(chunk.payload))
121
+ callback(new Error("invalid chunk payload type"))
122
+ else {
123
+ speechStream(chunk.payload).then((stream) => {
124
+ getStreamAsBuffer(stream).then((buffer) => {
125
+ const bufferResampled = resampler.processChunk(buffer)
126
+ log("info", `ElevenLabs: received audio (buffer length: ${buffer.byteLength})`)
127
+ const chunkNew = chunk.clone()
128
+ chunkNew.type = "audio"
129
+ chunkNew.payload = bufferResampled
130
+ this.push(chunkNew)
131
+ callback()
132
+ }).catch((error) => {
133
+ callback(error)
134
+ })
135
+ }).catch((error) => {
136
+ callback(error)
137
+ })
138
+ }
139
+ },
140
+ final (callback) {
141
+ this.push(null)
142
+ callback()
143
+ }
144
+ })
145
+ }
146
+
147
+ /* close node */
148
+ async close () {
149
+ /* destroy stream */
150
+ if (this.stream !== null) {
151
+ this.stream.destroy()
152
+ this.stream = null
153
+ }
154
+
155
+ /* destroy ElevenLabs API */
156
+ if (this.elevenlabs !== null)
157
+ this.elevenlabs = null
158
+ }
159
+ }
160
+
@@ -6,13 +6,12 @@
6
6
 
7
7
  /* standard dependencies */
8
8
  import Stream from "node:stream"
9
- import { EventEmitter } from "node:events"
10
9
 
11
10
  /* external dependencies */
12
11
  import * as DeepL from "deepl-node"
13
12
 
14
13
  /* internal dependencies */
15
- import SpeechFlowNode from "./speechflow-node"
14
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
16
15
 
17
16
  /* SpeechFlow node for DeepL text-to-text translations */
18
17
  export default class SpeechFlowNodeDeepL extends SpeechFlowNode {
@@ -23,17 +22,21 @@ export default class SpeechFlowNodeDeepL extends SpeechFlowNode {
23
22
  private deepl: DeepL.Translator | null = null
24
23
 
25
24
  /* construct node */
26
- constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
27
- super(id, opts, args)
25
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
26
+ super(id, cfg, opts, args)
28
27
 
29
28
  /* declare node configuration parameters */
30
29
  this.configure({
31
30
  key: { type: "string", val: process.env.SPEECHFLOW_KEY_DEEPL },
32
- src: { type: "string", pos: 0, val: "de", match: /^(?:de|en-US)$/ },
33
- dst: { type: "string", pos: 1, val: "en-US", match: /^(?:de|en-US)$/ },
31
+ src: { type: "string", pos: 0, val: "de", match: /^(?:de|en)$/ },
32
+ dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ },
34
33
  optimize: { type: "string", pos: 2, val: "latency", match: /^(?:latency|quality)$/ }
35
34
  })
36
35
 
36
+ /* sanity check situation */
37
+ if (this.params.src === this.params.dst)
38
+ throw new Error("source and destination languages cannot be the same")
39
+
37
40
  /* declare node input/output format */
38
41
  this.input = "text"
39
42
  this.output = "text"
@@ -46,7 +49,9 @@ export default class SpeechFlowNodeDeepL extends SpeechFlowNode {
46
49
 
47
50
  /* provide text-to-text translation */
48
51
  const translate = async (text: string) => {
49
- const result = await this.deepl!.translateText(text, this.params.src, this.params.dst, {
52
+ const src = this.params.src === "en" ? "en-US" : this.params.src
53
+ const dst = this.params.dst === "en" ? "en-US" : this.params.dst
54
+ const result = await this.deepl!.translateText(text, src, dst, {
50
55
  splitSentences: "off",
51
56
  modelType: this.params.optimize === "latency" ?
52
57
  "latency_optimized" : "prefer_quality_optimized",
@@ -57,32 +62,38 @@ export default class SpeechFlowNodeDeepL extends SpeechFlowNode {
57
62
  }
58
63
 
59
64
  /* establish a duplex stream and connect it to DeepL translation */
60
- const queue = new EventEmitter()
61
- this.stream = new Stream.Duplex({
62
- write (chunk: Buffer, encoding, callback) {
63
- const data = chunk.toString()
64
- if (data === "") {
65
- queue.emit("result", "")
66
- callback()
67
- }
65
+ this.stream = new Stream.Transform({
66
+ readableObjectMode: true,
67
+ writableObjectMode: true,
68
+ decodeStrings: false,
69
+ transform (chunk: SpeechFlowChunk, encoding, callback) {
70
+ if (Buffer.isBuffer(chunk.payload))
71
+ callback(new Error("invalid chunk payload type"))
68
72
  else {
69
- translate(data).then((result) => {
70
- queue.emit("result", result)
73
+ if (chunk.payload === "") {
74
+ this.push(chunk)
71
75
  callback()
72
- }).catch((err) => {
73
- callback(err)
74
- })
76
+ }
77
+ else {
78
+ translate(chunk.payload).then((payload) => {
79
+ const chunkNew = chunk.clone()
80
+ chunkNew.payload = payload
81
+ this.push(chunkNew)
82
+ callback()
83
+ }).catch((err) => {
84
+ callback(err)
85
+ })
86
+ }
75
87
  }
76
88
  },
77
- read (size: number) {
78
- queue.once("result", (result: string) => {
79
- this.push(result)
80
- })
89
+ final (callback) {
90
+ this.push(null)
91
+ callback()
81
92
  }
82
93
  })
83
94
  }
84
95
 
85
- /* open node */
96
+ /* close node */
86
97
  async close () {
87
98
  /* close stream */
88
99
  if (this.stream !== null) {
@@ -0,0 +1,85 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import Stream from "node:stream"
9
+
10
+ /* external dependencies */
11
+ import wrapText from "wrap-text"
12
+
13
+ /* internal dependencies */
14
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
15
+
16
+ /* SpeechFlow node for text-to-text formatting */
17
+ export default class SpeechFlowNodeFormat extends SpeechFlowNode {
18
+ /* declare official node name */
19
+ public static name = "format"
20
+
21
+ /* construct node */
22
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
23
+ super(id, cfg, opts, args)
24
+
25
+ /* declare node configuration parameters */
26
+ this.configure({
27
+ width: { type: "number", val: 80 }
28
+ })
29
+
30
+ /* declare node input/output format */
31
+ this.input = "text"
32
+ this.output = "text"
33
+ }
34
+
35
+ /* open node */
36
+ async open () {
37
+ /* provide text-to-text formatter */
38
+ const format = async (text: string) => {
39
+ text = wrapText(text, this.params.width)
40
+ text = text.replace(/([^\n])$/, "$1\n")
41
+ return text
42
+ }
43
+
44
+ /* establish a duplex stream and connect it to DeepL translation */
45
+ this.stream = new Stream.Transform({
46
+ readableObjectMode: true,
47
+ writableObjectMode: true,
48
+ decodeStrings: false,
49
+ transform (chunk: SpeechFlowChunk, encoding, callback) {
50
+ if (Buffer.isBuffer(chunk.payload))
51
+ callback(new Error("invalid chunk payload type"))
52
+ else {
53
+ if (chunk.payload === "") {
54
+ this.push(chunk)
55
+ callback()
56
+ }
57
+ else {
58
+ format(chunk.payload).then((payload) => {
59
+ const chunkNew = chunk.clone()
60
+ chunkNew.payload = payload
61
+ this.push(chunkNew)
62
+ callback()
63
+ }).catch((err) => {
64
+ callback(err)
65
+ })
66
+ }
67
+ }
68
+ },
69
+ final (callback) {
70
+ this.push(null)
71
+ callback()
72
+ }
73
+ })
74
+ }
75
+
76
+ /* open node */
77
+ async close () {
78
+ /* close stream */
79
+ if (this.stream !== null) {
80
+ this.stream.destroy()
81
+ this.stream = null
82
+ }
83
+ }
84
+ }
85
+
@@ -6,13 +6,12 @@
6
6
 
7
7
  /* standard dependencies */
8
8
  import Stream from "node:stream"
9
- import { EventEmitter } from "node:events"
10
9
 
11
10
  /* external dependencies */
12
11
  import { Ollama } from "ollama"
13
12
 
14
13
  /* internal dependencies */
15
- import SpeechFlowNode from "./speechflow-node"
14
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
16
15
 
17
16
  /* internal utility types */
18
17
  type ConfigEntry = { systemPrompt: string, chat: Array<{ role: string, content: string }> }
@@ -28,6 +27,69 @@ export default class SpeechFlowNodeGemma extends SpeechFlowNode {
28
27
 
29
28
  /* internal LLM setup */
30
29
  private setup: Config = {
30
+ /* English (EN) spellchecking only */
31
+ "en-en": {
32
+ systemPrompt:
33
+ "You are a proofreader and spellchecker for English.\n" +
34
+ "Output only the corrected text.\n" +
35
+ "Do NOT use markdown.\n" +
36
+ "Do NOT give any explanations.\n" +
37
+ "Do NOT give any introduction.\n" +
38
+ "Do NOT give any comments.\n" +
39
+ "Do NOT give any preamble.\n" +
40
+ "Do NOT give any prolog.\n" +
41
+ "Do NOT give any epilog.\n" +
42
+ "Do NOT change the gammar.\n" +
43
+ "Do NOT use synonyms for words.\n" +
44
+ "Keep all words.\n" +
45
+ "Fill in missing commas.\n" +
46
+ "Fill in missing points.\n" +
47
+ "Fill in missing question marks.\n" +
48
+ "Fill in missing hyphens.\n" +
49
+ "Focus ONLY on the word spelling.\n" +
50
+ "The text you have to correct is:\n",
51
+ chat: [
52
+ { role: "user", content: "I luve my wyfe" },
53
+ { role: "system", content: "I love my wife." },
54
+ { role: "user", content: "The weether is wunderfull!" },
55
+ { role: "system", content: "The weather is wonderful!" },
56
+ { role: "user", content: "The live awesome but I'm hungry." },
57
+ { role: "system", content: "The live is awesome, but I'm hungry." }
58
+ ]
59
+ },
60
+
61
+ /* German (DE) spellchecking only */
62
+ "de-de": {
63
+ systemPrompt:
64
+ "Du bist ein Korrekturleser und Rechtschreibprüfer für Deutsch.\n" +
65
+ "Gib nur den korrigierten Text aus.\n" +
66
+ "Benutze KEIN Markdown.\n" +
67
+ "Gib KEINE Erklärungen.\n" +
68
+ "Gib KEINE Einleitung.\n" +
69
+ "Gib KEINE Kommentare.\n" +
70
+ "Gib KEINE Preamble.\n" +
71
+ "Gib KEINEN Prolog.\n" +
72
+ "Gib KEINEN Epilog.\n" +
73
+ "Ändere NICHT die Grammatik.\n" +
74
+ "Verwende KEINE Synonyme für Wörter.\n" +
75
+ "Behalte alle Wörter bei.\n" +
76
+ "Füge fehlende Kommas ein.\n" +
77
+ "Füge fehlende Punkte ein.\n" +
78
+ "Füge fehlende Fragezeichen ein.\n" +
79
+ "Füge fehlende Bindestriche ein.\n" +
80
+ "Füge fehlende Gedankenstriche ein.\n" +
81
+ "Fokussiere dich NUR auf die Rechtschreibung der Wörter.\n" +
82
+ "Der von dir zu korrigierende Text ist:\n",
83
+ chat: [
84
+ { role: "user", content: "Ich ljebe meine Frao" },
85
+ { role: "system", content: "Ich liebe meine Frau." },
86
+ { role: "user", content: "Die Wedter ist wunderschoen." },
87
+ { role: "system", content: "Das Wetter ist wunderschön." },
88
+ { role: "user", content: "Das Leben einfach großartig aber ich bin hungrig." },
89
+ { role: "system", content: "Das Leben ist einfach großartig, aber ich bin hungrig." }
90
+ ]
91
+ },
92
+
31
93
  /* English (EN) to German (DE) translation */
32
94
  "en-de": {
33
95
  systemPrompt:
@@ -78,8 +140,8 @@ export default class SpeechFlowNodeGemma extends SpeechFlowNode {
78
140
  }
79
141
 
80
142
  /* construct node */
81
- constructor (id: string, opts: { [ id: string ]: any }, args: any[]) {
82
- super(id, opts, args)
143
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
144
+ super(id, cfg, opts, args)
83
145
 
84
146
  /* declare node configuration parameters */
85
147
  this.configure({
@@ -88,10 +150,6 @@ export default class SpeechFlowNodeGemma extends SpeechFlowNode {
88
150
  dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ }
89
151
  })
90
152
 
91
- /* sanity check situation */
92
- if (this.params.src === this.params.dst)
93
- throw new Error("source and destination languages cannot be the same")
94
-
95
153
  /* declare node input/output format */
96
154
  this.input = "text"
97
155
  this.output = "text"
@@ -126,27 +184,33 @@ export default class SpeechFlowNodeGemma extends SpeechFlowNode {
126
184
  }
127
185
 
128
186
  /* establish a duplex stream and connect it to Ollama */
129
- const queue = new EventEmitter()
130
- this.stream = new Stream.Duplex({
131
- write (chunk: Buffer, encoding, callback) {
132
- const data = chunk.toString()
133
- if (data === "") {
134
- queue.emit("result", "")
135
- callback()
136
- }
187
+ this.stream = new Stream.Transform({
188
+ readableObjectMode: true,
189
+ writableObjectMode: true,
190
+ decodeStrings: false,
191
+ transform (chunk: SpeechFlowChunk, encoding, callback) {
192
+ if (Buffer.isBuffer(chunk.payload))
193
+ callback(new Error("invalid chunk payload type"))
137
194
  else {
138
- translate(data).then((result) => {
139
- queue.emit("result", result)
195
+ if (chunk.payload === "") {
196
+ this.push(chunk)
140
197
  callback()
141
- }).catch((err) => {
142
- callback(err)
143
- })
198
+ }
199
+ else {
200
+ translate(chunk.payload).then((payload) => {
201
+ const chunkNew = chunk.clone()
202
+ chunkNew.payload = payload
203
+ this.push(chunkNew)
204
+ callback()
205
+ }).catch((err) => {
206
+ callback(err)
207
+ })
208
+ }
144
209
  }
145
210
  },
146
- read (size) {
147
- queue.once("result", (result: string) => {
148
- this.push(result)
149
- })
211
+ final (callback) {
212
+ this.push(null)
213
+ callback()
150
214
  }
151
215
  })
152
216
  }
@@ -0,0 +1,111 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import path from "node:path"
9
+ import Stream from "node:stream"
10
+
11
+ /* external dependencies */
12
+ import * as Transformers from "@huggingface/transformers"
13
+
14
+ /* internal dependencies */
15
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
16
+
17
+ /* SpeechFlow node for OPUS text-to-text translation */
18
+ export default class SpeechFlowNodeOPUS extends SpeechFlowNode {
19
+ /* declare official node name */
20
+ public static name = "opus"
21
+
22
+ /* internal state */
23
+ private translator: Transformers.TranslationPipeline | null = null
24
+
25
+ /* construct node */
26
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
27
+ super(id, cfg, opts, args)
28
+
29
+ /* declare node configuration parameters */
30
+ this.configure({
31
+ src: { type: "string", pos: 0, val: "de", match: /^(?:de|en)$/ },
32
+ dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ }
33
+ })
34
+
35
+ /* sanity check situation */
36
+ if (this.params.src === this.params.dst)
37
+ throw new Error("source and destination languages cannot be the same")
38
+
39
+ /* declare node input/output format */
40
+ this.input = "text"
41
+ this.output = "text"
42
+ }
43
+
44
+ /* open node */
45
+ async open () {
46
+ /* instantiate OPUS */
47
+ const model = `onnx-community/opus-mt-${this.params.src}-${this.params.dst}`
48
+ this.translator = await Transformers.pipeline("translation", model, {
49
+ cache_dir: path.join(this.config.cacheDir, "opus"),
50
+ dtype: "q4",
51
+ device: "gpu"
52
+ })
53
+ if (this.translator === null)
54
+ throw new Error("failed to instantiate translator pipeline")
55
+
56
+ /* provide text-to-text translation */
57
+ const translate = async (text: string) => {
58
+ const result = await this.translator!(text)
59
+ return Array.isArray(result) ?
60
+ (result[0] as Transformers.TranslationSingle).translation_text :
61
+ (result as Transformers.TranslationSingle).translation_text
62
+ }
63
+
64
+ /* establish a duplex stream and connect it to Ollama */
65
+ this.stream = new Stream.Transform({
66
+ readableObjectMode: true,
67
+ writableObjectMode: true,
68
+ decodeStrings: false,
69
+ transform (chunk: SpeechFlowChunk, encoding, callback) {
70
+ if (Buffer.isBuffer(chunk.payload))
71
+ callback(new Error("invalid chunk payload type"))
72
+ else {
73
+ if (chunk.payload === "") {
74
+ this.push(chunk)
75
+ callback()
76
+ }
77
+ else {
78
+ translate(chunk.payload).then((payload) => {
79
+ const chunkNew = chunk.clone()
80
+ chunkNew.payload = payload
81
+ this.push(chunkNew)
82
+ callback()
83
+ }).catch((err) => {
84
+ callback(err)
85
+ })
86
+ }
87
+ }
88
+ },
89
+ final (callback) {
90
+ this.push(null)
91
+ callback()
92
+ }
93
+ })
94
+ }
95
+
96
+ /* close node */
97
+ async close () {
98
+ /* close stream */
99
+ if (this.stream !== null) {
100
+ this.stream.destroy()
101
+ this.stream = null
102
+ }
103
+
104
+ /* shutdown OPUS */
105
+ if (this.translator !== null) {
106
+ this.translator.dispose()
107
+ this.translator = null
108
+ }
109
+ }
110
+ }
111
+