speechflow 1.7.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +23 -0
- package/README.md +425 -146
- package/etc/claude.md +5 -5
- package/etc/speechflow.yaml +2 -2
- package/package.json +3 -3
- package/speechflow-cli/dst/speechflow-main-api.js +6 -5
- package/speechflow-cli/dst/speechflow-main-api.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-graph.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-main-graph.js +35 -13
- package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
- package/speechflow-cli/dst/speechflow-main-status.js +3 -7
- package/speechflow-cli/dst/speechflow-main-status.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js +3 -0
- package/speechflow-cli/dst/speechflow-node-a2a-compressor-wt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js +4 -2
- package/speechflow-cli/dst/speechflow-node-a2a-compressor.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-expander-wt.js +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js +4 -2
- package/speechflow-cli/dst/speechflow-node-a2a-expander.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js +2 -2
- package/speechflow-cli/dst/speechflow-node-a2a-gender.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-pitch.js +1 -2
- package/speechflow-cli/dst/speechflow-node-a2a-pitch.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js +32 -5
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.d.ts +0 -1
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js +1 -6
- package/speechflow-cli/dst/speechflow-node-a2t-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.d.ts +0 -1
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js +9 -9
- package/speechflow-cli/dst/speechflow-node-a2t-deepgram.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-google.d.ts +17 -0
- package/speechflow-cli/dst/speechflow-node-a2t-google.js +320 -0
- package/speechflow-cli/dst/speechflow-node-a2t-google.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js +6 -4
- package/speechflow-cli/dst/speechflow-node-a2t-openai.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.js +6 -11
- package/speechflow-cli/dst/speechflow-node-t2a-amazon.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js +6 -5
- package/speechflow-cli/dst/speechflow-node-t2a-elevenlabs.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-google.d.ts +15 -0
- package/speechflow-cli/dst/speechflow-node-t2a-google.js +218 -0
- package/speechflow-cli/dst/speechflow-node-t2a-google.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.d.ts +2 -0
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js +19 -6
- package/speechflow-cli/dst/speechflow-node-t2a-kokoro.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2a-openai.d.ts +15 -0
- package/speechflow-cli/dst/speechflow-node-t2a-openai.js +195 -0
- package/speechflow-cli/dst/speechflow-node-t2a-openai.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.d.ts +17 -0
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js +608 -0
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
- package/speechflow-cli/dst/{speechflow-node-t2t-transformers.d.ts → speechflow-node-t2t-opus.d.ts} +1 -3
- package/speechflow-cli/dst/speechflow-node-t2t-opus.js +159 -0
- package/speechflow-cli/dst/speechflow-node-t2t-opus.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2t-profanity.d.ts +11 -0
- package/speechflow-cli/dst/speechflow-node-t2t-profanity.js +118 -0
- package/speechflow-cli/dst/speechflow-node-t2t-profanity.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2t-punctuation.d.ts +13 -0
- package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js +220 -0
- package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js.map +1 -0
- package/speechflow-cli/dst/{speechflow-node-t2t-openai.d.ts → speechflow-node-t2t-spellcheck.d.ts} +2 -2
- package/speechflow-cli/dst/{speechflow-node-t2t-openai.js → speechflow-node-t2t-spellcheck.js} +47 -99
- package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +3 -6
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-summary.d.ts +16 -0
- package/speechflow-cli/dst/speechflow-node-t2t-summary.js +241 -0
- package/speechflow-cli/dst/speechflow-node-t2t-summary.js.map +1 -0
- package/speechflow-cli/dst/{speechflow-node-t2t-ollama.d.ts → speechflow-node-t2t-translate.d.ts} +2 -2
- package/speechflow-cli/dst/{speechflow-node-t2t-transformers.js → speechflow-node-t2t-translate.js} +53 -115
- package/speechflow-cli/dst/speechflow-node-t2t-translate.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-x2x-filter.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js +10 -0
- package/speechflow-cli/dst/speechflow-node-x2x-filter.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-x2x-trace.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-device.js +3 -3
- package/speechflow-cli/dst/speechflow-node-xio-device.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-exec.d.ts +12 -0
- package/speechflow-cli/dst/speechflow-node-xio-exec.js +223 -0
- package/speechflow-cli/dst/speechflow-node-xio-exec.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-file.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-file.js +80 -67
- package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js +2 -1
- package/speechflow-cli/dst/speechflow-node-xio-mqtt.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-vban.d.ts +17 -0
- package/speechflow-cli/dst/speechflow-node-xio-vban.js +330 -0
- package/speechflow-cli/dst/speechflow-node-xio-vban.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.d.ts +39 -0
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.js +500 -0
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js +2 -1
- package/speechflow-cli/dst/speechflow-node-xio-websocket.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-audio.js +5 -6
- package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-error.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-util-error.js +5 -7
- package/speechflow-cli/dst/speechflow-util-error.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-llm.d.ts +35 -0
- package/speechflow-cli/dst/speechflow-util-llm.js +363 -0
- package/speechflow-cli/dst/speechflow-util-llm.js.map +1 -0
- package/speechflow-cli/dst/speechflow-util-misc.d.ts +1 -1
- package/speechflow-cli/dst/speechflow-util-misc.js +4 -4
- package/speechflow-cli/dst/speechflow-util-misc.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-queue.js +3 -3
- package/speechflow-cli/dst/speechflow-util-queue.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-stream.js +4 -2
- package/speechflow-cli/dst/speechflow-util-stream.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-util.js +1 -0
- package/speechflow-cli/dst/speechflow-util.js.map +1 -1
- package/speechflow-cli/etc/oxlint.jsonc +2 -1
- package/speechflow-cli/package.json +34 -17
- package/speechflow-cli/src/lib.d.ts +5 -0
- package/speechflow-cli/src/speechflow-main-api.ts +6 -5
- package/speechflow-cli/src/speechflow-main-graph.ts +40 -13
- package/speechflow-cli/src/speechflow-main-status.ts +4 -8
- package/speechflow-cli/src/speechflow-node-a2a-compressor-wt.ts +4 -0
- package/speechflow-cli/src/speechflow-node-a2a-compressor.ts +4 -2
- package/speechflow-cli/src/speechflow-node-a2a-expander-wt.ts +1 -1
- package/speechflow-cli/src/speechflow-node-a2a-expander.ts +4 -2
- package/speechflow-cli/src/speechflow-node-a2a-gender.ts +2 -2
- package/speechflow-cli/src/speechflow-node-a2a-pitch.ts +1 -2
- package/speechflow-cli/src/speechflow-node-a2a-wav.ts +33 -6
- package/speechflow-cli/src/speechflow-node-a2t-amazon.ts +6 -11
- package/speechflow-cli/src/speechflow-node-a2t-deepgram.ts +13 -12
- package/speechflow-cli/src/speechflow-node-a2t-google.ts +322 -0
- package/speechflow-cli/src/speechflow-node-a2t-openai.ts +8 -4
- package/speechflow-cli/src/speechflow-node-t2a-amazon.ts +7 -11
- package/speechflow-cli/src/speechflow-node-t2a-elevenlabs.ts +6 -5
- package/speechflow-cli/src/speechflow-node-t2a-google.ts +206 -0
- package/speechflow-cli/src/speechflow-node-t2a-kokoro.ts +22 -6
- package/speechflow-cli/src/speechflow-node-t2a-openai.ts +179 -0
- package/speechflow-cli/src/speechflow-node-t2a-supertonic.ts +701 -0
- package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +2 -1
- package/speechflow-cli/src/speechflow-node-t2t-opus.ts +136 -0
- package/speechflow-cli/src/speechflow-node-t2t-profanity.ts +93 -0
- package/speechflow-cli/src/speechflow-node-t2t-punctuation.ts +201 -0
- package/speechflow-cli/src/{speechflow-node-t2t-openai.ts → speechflow-node-t2t-spellcheck.ts} +48 -107
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +3 -6
- package/speechflow-cli/src/speechflow-node-t2t-summary.ts +229 -0
- package/speechflow-cli/src/speechflow-node-t2t-translate.ts +181 -0
- package/speechflow-cli/src/speechflow-node-x2x-filter.ts +16 -3
- package/speechflow-cli/src/speechflow-node-x2x-trace.ts +3 -3
- package/speechflow-cli/src/speechflow-node-xio-device.ts +4 -7
- package/speechflow-cli/src/speechflow-node-xio-exec.ts +210 -0
- package/speechflow-cli/src/speechflow-node-xio-file.ts +93 -80
- package/speechflow-cli/src/speechflow-node-xio-mqtt.ts +3 -2
- package/speechflow-cli/src/speechflow-node-xio-vban.ts +325 -0
- package/speechflow-cli/src/speechflow-node-xio-webrtc.ts +533 -0
- package/speechflow-cli/src/speechflow-node-xio-websocket.ts +2 -1
- package/speechflow-cli/src/speechflow-util-audio-wt.ts +4 -4
- package/speechflow-cli/src/speechflow-util-audio.ts +10 -10
- package/speechflow-cli/src/speechflow-util-error.ts +9 -7
- package/speechflow-cli/src/speechflow-util-llm.ts +367 -0
- package/speechflow-cli/src/speechflow-util-misc.ts +4 -4
- package/speechflow-cli/src/speechflow-util-queue.ts +4 -4
- package/speechflow-cli/src/speechflow-util-stream.ts +5 -3
- package/speechflow-cli/src/speechflow-util.ts +1 -0
- package/speechflow-ui-db/package.json +9 -9
- package/speechflow-ui-st/package.json +9 -9
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +0 -293
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-transformers.js.map +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +0 -281
- package/speechflow-cli/src/speechflow-node-t2t-transformers.ts +0 -247
|
@@ -32,7 +32,7 @@ type TextChunk = {
|
|
|
32
32
|
text: string
|
|
33
33
|
}
|
|
34
34
|
|
|
35
|
-
/* SpeechFlow node for subtitle (text-to-text)
|
|
35
|
+
/* SpeechFlow node for subtitle (text-to-text) conversions */
|
|
36
36
|
export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
|
|
37
37
|
/* declare official node name */
|
|
38
38
|
public static name = "t2t-subtitle"
|
|
@@ -311,7 +311,7 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
|
|
|
311
311
|
final (callback) {
|
|
312
312
|
/* process any remaining buffer content */
|
|
313
313
|
if (buffer.trim() !== "") {
|
|
314
|
-
|
|
314
|
+
util.shield(() => {
|
|
315
315
|
/* parse entries */
|
|
316
316
|
const entries = self.params.format === "srt" ? parseSRT(buffer) : parseVTT(buffer)
|
|
317
317
|
|
|
@@ -320,10 +320,7 @@ export default class SpeechFlowNodeT2TSubtitle extends SpeechFlowNode {
|
|
|
320
320
|
const chunkNew = new SpeechFlowChunk(entry.start, entry.end, "final", "text", entry.text)
|
|
321
321
|
this.push(chunkNew)
|
|
322
322
|
}
|
|
323
|
-
}
|
|
324
|
-
catch (_error: unknown) {
|
|
325
|
-
/* ignore parse errors on final flush */
|
|
326
|
-
}
|
|
323
|
+
})
|
|
327
324
|
}
|
|
328
325
|
callback()
|
|
329
326
|
}
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
|
|
10
|
+
/* internal dependencies */
|
|
11
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
12
|
+
import * as util from "./speechflow-util"
|
|
13
|
+
import { LLM, type LLMCompleteMessage } from "./speechflow-util-llm"
|
|
14
|
+
|
|
15
|
+
/* internal utility types */
|
|
16
|
+
type ConfigEntry = { systemPrompt: string, chat: LLMCompleteMessage[] }
|
|
17
|
+
type Config = { [ key: string ]: ConfigEntry }
|
|
18
|
+
|
|
19
|
+
/* SpeechFlow node for text-to-text summarization */
|
|
20
|
+
export default class SpeechFlowNodeT2TSummary extends SpeechFlowNode {
|
|
21
|
+
/* declare official node name */
|
|
22
|
+
public static name = "t2t-summary"
|
|
23
|
+
|
|
24
|
+
/* internal state */
|
|
25
|
+
private llm: LLM | null = null
|
|
26
|
+
private accumulatedText = ""
|
|
27
|
+
private sentencesSinceLastSummary = 0
|
|
28
|
+
|
|
29
|
+
/* internal LLM setup */
|
|
30
|
+
private setup: Config = {
|
|
31
|
+
/* English (EN) summarization */
|
|
32
|
+
"en": {
|
|
33
|
+
systemPrompt:
|
|
34
|
+
"You are a text summarizer.\n" +
|
|
35
|
+
"Output only the summary.\n" +
|
|
36
|
+
"Do NOT use markdown.\n" +
|
|
37
|
+
"Do NOT give any explanations.\n" +
|
|
38
|
+
"Do NOT give any introduction.\n" +
|
|
39
|
+
"Do NOT give any comments.\n" +
|
|
40
|
+
"Do NOT give any preamble.\n" +
|
|
41
|
+
"Do NOT give any prolog.\n" +
|
|
42
|
+
"Do NOT give any epilog.\n" +
|
|
43
|
+
"Get to the point.\n" +
|
|
44
|
+
"Summarize the following text into %N% sentences.\n" +
|
|
45
|
+
"The text is:\n",
|
|
46
|
+
chat: [
|
|
47
|
+
{ role: "user", content: "The weather today is sunny and warm. Birds are singing in the trees. People are enjoying the outdoors." },
|
|
48
|
+
{ role: "assistant", content: "The weather is pleasant with sunshine, birdsong, and people outdoors." },
|
|
49
|
+
{ role: "user", content: "John went to the store to buy groceries. He needed milk, bread, and eggs. The store was crowded but he found everything he needed." },
|
|
50
|
+
{ role: "assistant", content: "John successfully bought milk, bread, and eggs from a crowded store." }
|
|
51
|
+
]
|
|
52
|
+
},
|
|
53
|
+
|
|
54
|
+
/* German (DE) summarization */
|
|
55
|
+
"de": {
|
|
56
|
+
systemPrompt:
|
|
57
|
+
"Du bist ein Textzusammenfasser.\n" +
|
|
58
|
+
"Gib nur die Zusammenfassung aus.\n" +
|
|
59
|
+
"Benutze KEIN Markdown.\n" +
|
|
60
|
+
"Gib KEINE Erklärungen.\n" +
|
|
61
|
+
"Gib KEINE Einleitung.\n" +
|
|
62
|
+
"Gib KEINE Kommentare.\n" +
|
|
63
|
+
"Gib KEINE Prämbel.\n" +
|
|
64
|
+
"Gib KEINEN Prolog.\n" +
|
|
65
|
+
"Gib KEINEN Epilog.\n" +
|
|
66
|
+
"Komme auf den Punkt.\n" +
|
|
67
|
+
"Fasse den folgenden Text in %N% Sätzen zusammen.\n" +
|
|
68
|
+
"Der Text ist:\n",
|
|
69
|
+
chat: [
|
|
70
|
+
{ role: "user", content: "Das Wetter heute ist sonnig und warm. Vögel singen in den Bäumen. Die Menschen genießen die Zeit im Freien." },
|
|
71
|
+
{ role: "assistant", content: "Das Wetter ist angenehm mit Sonnenschein, Vogelgesang und Menschen im Freien." },
|
|
72
|
+
{ role: "user", content: "Hans ging in den Laden, um Lebensmittel zu kaufen. Er brauchte Milch, Brot und Eier. Der Laden war voll, aber er fand alles, was er brauchte." },
|
|
73
|
+
{ role: "assistant", content: "Hans kaufte erfolgreich Milch, Brot und Eier in einem vollen Laden." }
|
|
74
|
+
]
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/* construct node */
|
|
79
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
80
|
+
super(id, cfg, opts, args)
|
|
81
|
+
|
|
82
|
+
/* declare node configuration parameters */
|
|
83
|
+
this.configure({
|
|
84
|
+
provider: { type: "string", val: "ollama", match: /^(?:openai|anthropic|google|ollama|transformers)$/ },
|
|
85
|
+
api: { type: "string", val: "http://127.0.0.1:11434", match: /^https?:\/\/.+?(:\d+)?$/ },
|
|
86
|
+
model: { type: "string", val: "gemma3:4b-it-q4_K_M", match: /^.+$/ },
|
|
87
|
+
key: { type: "string", val: "", match: /^.*$/ },
|
|
88
|
+
lang: { type: "string", pos: 0, val: "en", match: /^(?:en|de)$/ },
|
|
89
|
+
size: { type: "number", pos: 1, val: 4, match: (n: number) => n >= 1 && n <= 20 },
|
|
90
|
+
trigger: { type: "number", pos: 2, val: 8, match: (n: number) => n >= 1 && n <= 100 }
|
|
91
|
+
})
|
|
92
|
+
|
|
93
|
+
/* tell effective mode */
|
|
94
|
+
this.log("info", `summarizing language "${this.params.lang}" ` +
|
|
95
|
+
`via ${this.params.provider} LLM (model: ${this.params.model}), ` +
|
|
96
|
+
`triggering every new ${this.params.trigger} sentences, ` +
|
|
97
|
+
`summarizing into ${this.params.size} sentences`)
|
|
98
|
+
|
|
99
|
+
/* declare node input/output format */
|
|
100
|
+
this.input = "text"
|
|
101
|
+
this.output = "text"
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/* count sentences in text */
|
|
105
|
+
private countSentences (text: string): number {
|
|
106
|
+
const matches = text.match(/[.;?!]/g)
|
|
107
|
+
return matches ? matches.length : 0
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/* open node */
|
|
111
|
+
async open () {
|
|
112
|
+
/* reset internal state */
|
|
113
|
+
this.accumulatedText = ""
|
|
114
|
+
this.sentencesSinceLastSummary = 0
|
|
115
|
+
|
|
116
|
+
/* instantiate LLM */
|
|
117
|
+
this.llm = new LLM({
|
|
118
|
+
provider: this.params.provider,
|
|
119
|
+
api: this.params.api,
|
|
120
|
+
model: this.params.model,
|
|
121
|
+
key: this.params.key,
|
|
122
|
+
temperature: 0.7,
|
|
123
|
+
topP: 0.5
|
|
124
|
+
})
|
|
125
|
+
this.llm.on("log", (level: string, message: string) => {
|
|
126
|
+
this.log(level as "info" | "warning" | "error", message)
|
|
127
|
+
})
|
|
128
|
+
await this.llm.open()
|
|
129
|
+
|
|
130
|
+
/* provide text summarization */
|
|
131
|
+
const llm = this.llm!
|
|
132
|
+
const summarize = async (text: string) => {
|
|
133
|
+
const cfg = this.setup[this.params.lang]
|
|
134
|
+
if (!cfg)
|
|
135
|
+
throw new Error(`unsupported language: ${this.params.lang}`)
|
|
136
|
+
return llm.complete({
|
|
137
|
+
system: cfg.systemPrompt.replace(/%N%/, this.params.size),
|
|
138
|
+
messages: cfg.chat,
|
|
139
|
+
prompt: text
|
|
140
|
+
})
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/* establish a transform stream for summarization */
|
|
144
|
+
const self = this
|
|
145
|
+
this.stream = new Stream.Transform({
|
|
146
|
+
readableObjectMode: true,
|
|
147
|
+
writableObjectMode: true,
|
|
148
|
+
decodeStrings: false,
|
|
149
|
+
highWaterMark: 1,
|
|
150
|
+
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
151
|
+
if (Buffer.isBuffer(chunk.payload))
|
|
152
|
+
callback(new Error("invalid chunk payload type"))
|
|
153
|
+
else if (chunk.payload === "") {
|
|
154
|
+
this.push(chunk)
|
|
155
|
+
callback()
|
|
156
|
+
}
|
|
157
|
+
else {
|
|
158
|
+
/* accumulate text */
|
|
159
|
+
if (self.accumulatedText.length > 0)
|
|
160
|
+
self.accumulatedText += " "
|
|
161
|
+
self.accumulatedText += chunk.payload
|
|
162
|
+
|
|
163
|
+
/* count new sentences */
|
|
164
|
+
const newSentences = self.countSentences(chunk.payload)
|
|
165
|
+
self.sentencesSinceLastSummary += newSentences
|
|
166
|
+
self.log("info", `accumulated ${self.sentencesSinceLastSummary} sentences ` +
|
|
167
|
+
`(trigger: ${self.params.trigger})`)
|
|
168
|
+
|
|
169
|
+
/* check if we should generate a summary */
|
|
170
|
+
if (self.sentencesSinceLastSummary >= self.params.trigger) {
|
|
171
|
+
self.sentencesSinceLastSummary = 0
|
|
172
|
+
self.log("info", `generating summary of accumulated text`)
|
|
173
|
+
const textToSummarize = self.accumulatedText
|
|
174
|
+
self.accumulatedText = ""
|
|
175
|
+
summarize(textToSummarize).then((summary) => {
|
|
176
|
+
const chunkNew = chunk.clone()
|
|
177
|
+
chunkNew.payload = summary
|
|
178
|
+
this.push(chunkNew)
|
|
179
|
+
callback()
|
|
180
|
+
}).catch((error: unknown) => {
|
|
181
|
+
callback(util.ensureError(error))
|
|
182
|
+
})
|
|
183
|
+
}
|
|
184
|
+
else
|
|
185
|
+
callback()
|
|
186
|
+
}
|
|
187
|
+
},
|
|
188
|
+
final (callback) {
|
|
189
|
+
/* generate final summary if there is accumulated text */
|
|
190
|
+
if (self.accumulatedText.length > 0 && self.sentencesSinceLastSummary > 0) {
|
|
191
|
+
self.sentencesSinceLastSummary = 0
|
|
192
|
+
self.log("info", `generating final summary of accumulated text`)
|
|
193
|
+
const textToSummarize = self.accumulatedText
|
|
194
|
+
self.accumulatedText = ""
|
|
195
|
+
summarize(textToSummarize).then((summary) => {
|
|
196
|
+
const chunkNew = new SpeechFlowChunk(
|
|
197
|
+
self.timeZeroOffset, self.timeZeroOffset,
|
|
198
|
+
"final", "text", summary)
|
|
199
|
+
this.push(chunkNew)
|
|
200
|
+
callback()
|
|
201
|
+
}).catch((error: unknown) => {
|
|
202
|
+
callback(util.ensureError(error))
|
|
203
|
+
})
|
|
204
|
+
}
|
|
205
|
+
else
|
|
206
|
+
callback()
|
|
207
|
+
}
|
|
208
|
+
})
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/* close node */
|
|
212
|
+
async close () {
|
|
213
|
+
/* reset internal state */
|
|
214
|
+
this.accumulatedText = ""
|
|
215
|
+
this.sentencesSinceLastSummary = 0
|
|
216
|
+
|
|
217
|
+
/* shutdown stream */
|
|
218
|
+
if (this.stream !== null) {
|
|
219
|
+
await util.destroyStream(this.stream)
|
|
220
|
+
this.stream = null
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/* shutdown LLM */
|
|
224
|
+
if (this.llm !== null) {
|
|
225
|
+
await this.llm.close()
|
|
226
|
+
this.llm = null
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
}
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
|
|
10
|
+
/* internal dependencies */
|
|
11
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
12
|
+
import * as util from "./speechflow-util"
|
|
13
|
+
import { LLM, type LLMCompleteMessage } from "./speechflow-util-llm"
|
|
14
|
+
|
|
15
|
+
/* internal utility types */
|
|
16
|
+
type ConfigEntry = { systemPrompt: string, chat: LLMCompleteMessage[] }
|
|
17
|
+
type Config = { [ key: string ]: ConfigEntry }
|
|
18
|
+
|
|
19
|
+
/* SpeechFlow node for LLM-based text-to-text translation */
|
|
20
|
+
export default class SpeechFlowNodeT2TTranslate extends SpeechFlowNode {
|
|
21
|
+
/* declare official node name */
|
|
22
|
+
public static name = "t2t-translate"
|
|
23
|
+
|
|
24
|
+
/* internal state */
|
|
25
|
+
private llm: LLM | null = null
|
|
26
|
+
|
|
27
|
+
/* internal LLM setup */
|
|
28
|
+
private setup: Config = {
|
|
29
|
+
/* English (EN) to German (DE) translation */
|
|
30
|
+
"en-de": {
|
|
31
|
+
systemPrompt:
|
|
32
|
+
"You are a translator.\n" +
|
|
33
|
+
"Output only the requested text.\n" +
|
|
34
|
+
"Do not use markdown.\n" +
|
|
35
|
+
"Do not chat.\n" +
|
|
36
|
+
"Do not show any explanations.\n" +
|
|
37
|
+
"Do not show any introduction.\n" +
|
|
38
|
+
"Do not show any preamble.\n" +
|
|
39
|
+
"Do not show any prolog.\n" +
|
|
40
|
+
"Do not show any epilog.\n" +
|
|
41
|
+
"Get to the point.\n" +
|
|
42
|
+
"Preserve the original meaning, tone, and nuance.\n" +
|
|
43
|
+
"Directly translate text from English (EN) to fluent and natural German (DE) language.\n",
|
|
44
|
+
chat: [
|
|
45
|
+
{ role: "user", content: "I love my wife." },
|
|
46
|
+
{ role: "assistant", content: "Ich liebe meine Frau." },
|
|
47
|
+
{ role: "user", content: "The weather is wonderful." },
|
|
48
|
+
{ role: "assistant", content: "Das Wetter ist wunderschön." },
|
|
49
|
+
{ role: "user", content: "The life is awesome." },
|
|
50
|
+
{ role: "assistant", content: "Das Leben ist einfach großartig." }
|
|
51
|
+
]
|
|
52
|
+
},
|
|
53
|
+
|
|
54
|
+
/* German (DE) to English (EN) translation */
|
|
55
|
+
"de-en": {
|
|
56
|
+
systemPrompt:
|
|
57
|
+
"You are a translator.\n" +
|
|
58
|
+
"Output only the requested text.\n" +
|
|
59
|
+
"Do not use markdown.\n" +
|
|
60
|
+
"Do not chat.\n" +
|
|
61
|
+
"Do not show any explanations.\n" +
|
|
62
|
+
"Do not show any introduction.\n" +
|
|
63
|
+
"Do not show any preamble.\n" +
|
|
64
|
+
"Do not show any prolog.\n" +
|
|
65
|
+
"Do not show any epilog.\n" +
|
|
66
|
+
"Get to the point.\n" +
|
|
67
|
+
"Preserve the original meaning, tone, and nuance.\n" +
|
|
68
|
+
"Directly translate text from German (DE) to fluent and natural English (EN) language.\n",
|
|
69
|
+
chat: [
|
|
70
|
+
{ role: "user", content: "Ich liebe meine Frau." },
|
|
71
|
+
{ role: "assistant", content: "I love my wife." },
|
|
72
|
+
{ role: "user", content: "Das Wetter ist wunderschön." },
|
|
73
|
+
{ role: "assistant", content: "The weather is wonderful." },
|
|
74
|
+
{ role: "user", content: "Das Leben ist einfach großartig." },
|
|
75
|
+
{ role: "assistant", content: "The life is awesome." }
|
|
76
|
+
]
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/* construct node */
|
|
81
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
82
|
+
super(id, cfg, opts, args)
|
|
83
|
+
|
|
84
|
+
/* declare node configuration parameters */
|
|
85
|
+
this.configure({
|
|
86
|
+
src: { type: "string", pos: 0, val: "de", match: /^(?:de|en)$/ },
|
|
87
|
+
dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ },
|
|
88
|
+
provider: { type: "string", val: "ollama", match: /^(?:openai|anthropic|google|ollama|transformers)$/ },
|
|
89
|
+
api: { type: "string", val: "http://127.0.0.1:11434", match: /^https?:\/\/.+?(:\d+)?$/ },
|
|
90
|
+
model: { type: "string", val: "gemma3:4b-it-q4_K_M", match: /^.+$/ },
|
|
91
|
+
key: { type: "string", val: "", match: /^.*$/ }
|
|
92
|
+
})
|
|
93
|
+
|
|
94
|
+
/* validate translation direction */
|
|
95
|
+
if (this.params.src === this.params.dst)
|
|
96
|
+
throw new Error("source and destination language must be different for translation")
|
|
97
|
+
|
|
98
|
+
/* tell effective mode */
|
|
99
|
+
this.log("info", `translating from language "${this.params.src}" to language "${this.params.dst}" ` +
|
|
100
|
+
`via ${this.params.provider} LLM (model: ${this.params.model})`)
|
|
101
|
+
|
|
102
|
+
/* declare node input/output format */
|
|
103
|
+
this.input = "text"
|
|
104
|
+
this.output = "text"
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/* open node */
|
|
108
|
+
async open () {
|
|
109
|
+
/* instantiate LLM */
|
|
110
|
+
this.llm = new LLM({
|
|
111
|
+
provider: this.params.provider,
|
|
112
|
+
api: this.params.api,
|
|
113
|
+
model: this.params.model,
|
|
114
|
+
key: this.params.key,
|
|
115
|
+
temperature: 0.7,
|
|
116
|
+
topP: 0.5
|
|
117
|
+
})
|
|
118
|
+
this.llm.on("log", (level: string, message: string) => {
|
|
119
|
+
this.log(level as "info" | "warning" | "error", message)
|
|
120
|
+
})
|
|
121
|
+
await this.llm.open()
|
|
122
|
+
|
|
123
|
+
/* provide text-to-text translation */
|
|
124
|
+
const llm = this.llm!
|
|
125
|
+
const translate = async (text: string) => {
|
|
126
|
+
const key = `${this.params.src}-${this.params.dst}`
|
|
127
|
+
const cfg = this.setup[key]
|
|
128
|
+
if (!cfg)
|
|
129
|
+
throw new Error(`unsupported language pair: ${key}`)
|
|
130
|
+
return llm.complete({
|
|
131
|
+
system: cfg.systemPrompt,
|
|
132
|
+
messages: cfg.chat,
|
|
133
|
+
prompt: text
|
|
134
|
+
})
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/* establish a transform stream and connect it to LLM */
|
|
138
|
+
this.stream = new Stream.Transform({
|
|
139
|
+
readableObjectMode: true,
|
|
140
|
+
writableObjectMode: true,
|
|
141
|
+
decodeStrings: false,
|
|
142
|
+
highWaterMark: 1,
|
|
143
|
+
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
144
|
+
if (Buffer.isBuffer(chunk.payload))
|
|
145
|
+
callback(new Error("invalid chunk payload type"))
|
|
146
|
+
else if (chunk.payload === "") {
|
|
147
|
+
this.push(chunk)
|
|
148
|
+
callback()
|
|
149
|
+
}
|
|
150
|
+
else {
|
|
151
|
+
translate(chunk.payload).then((payload) => {
|
|
152
|
+
const chunkNew = chunk.clone()
|
|
153
|
+
chunkNew.payload = payload
|
|
154
|
+
this.push(chunkNew)
|
|
155
|
+
callback()
|
|
156
|
+
}).catch((error: unknown) => {
|
|
157
|
+
callback(util.ensureError(error))
|
|
158
|
+
})
|
|
159
|
+
}
|
|
160
|
+
},
|
|
161
|
+
final (callback) {
|
|
162
|
+
callback()
|
|
163
|
+
}
|
|
164
|
+
})
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/* close node */
|
|
168
|
+
async close () {
|
|
169
|
+
/* shutdown stream */
|
|
170
|
+
if (this.stream !== null) {
|
|
171
|
+
await util.destroyStream(this.stream)
|
|
172
|
+
this.stream = null
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/* shutdown LLM */
|
|
176
|
+
if (this.llm !== null) {
|
|
177
|
+
await this.llm.close()
|
|
178
|
+
this.llm = null
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
@@ -19,6 +19,9 @@ export default class SpeechFlowNodeX2XFilter extends SpeechFlowNode {
|
|
|
19
19
|
/* cached regular expression instance */
|
|
20
20
|
private cachedRegExp = new util.CachedRegExp()
|
|
21
21
|
|
|
22
|
+
/* internal state */
|
|
23
|
+
private closing = false
|
|
24
|
+
|
|
22
25
|
/* construct node */
|
|
23
26
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
24
27
|
super(id, cfg, opts, args)
|
|
@@ -39,17 +42,20 @@ export default class SpeechFlowNodeX2XFilter extends SpeechFlowNode {
|
|
|
39
42
|
|
|
40
43
|
/* open node */
|
|
41
44
|
async open () {
|
|
45
|
+
/* clear destruction flag */
|
|
46
|
+
this.closing = false
|
|
47
|
+
|
|
42
48
|
/* helper function for comparing two values */
|
|
43
49
|
const comparison = (val1: any, op: string, val2: any) => {
|
|
44
50
|
if (op === "==" || op === "!=") {
|
|
45
51
|
/* equal comparison */
|
|
46
|
-
const str1 = (typeof val1 === "string" ? val1 : val1.toString())
|
|
47
|
-
const str2 = (typeof val2 === "string" ? val2 : val2.toString())
|
|
52
|
+
const str1 = (typeof val1 === "string" ? val1 : val1.toString())
|
|
53
|
+
const str2 = (typeof val2 === "string" ? val2 : val2.toString())
|
|
48
54
|
return (op === "==" ? (str1 === str2) : (str1 !== str2))
|
|
49
55
|
}
|
|
50
56
|
else if (op === "~~" || op === "!~") {
|
|
51
57
|
/* regular expression comparison */
|
|
52
|
-
const str = (typeof val1 === "string" ? val1 : val1.toString())
|
|
58
|
+
const str = (typeof val1 === "string" ? val1 : val1.toString())
|
|
53
59
|
const regexp = (
|
|
54
60
|
val2 instanceof RegExp ?
|
|
55
61
|
val2 :
|
|
@@ -93,6 +99,10 @@ export default class SpeechFlowNodeX2XFilter extends SpeechFlowNode {
|
|
|
93
99
|
decodeStrings: false,
|
|
94
100
|
highWaterMark: 1,
|
|
95
101
|
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
102
|
+
if (self.closing) {
|
|
103
|
+
callback(new Error("stream already destroyed"))
|
|
104
|
+
return
|
|
105
|
+
}
|
|
96
106
|
let val1: any
|
|
97
107
|
const val2: any = self.params.val
|
|
98
108
|
const m = self.params.var.match(/^meta:(.+)$/)
|
|
@@ -124,6 +134,9 @@ export default class SpeechFlowNodeX2XFilter extends SpeechFlowNode {
|
|
|
124
134
|
|
|
125
135
|
/* close node */
|
|
126
136
|
async close () {
|
|
137
|
+
/* indicate closing */
|
|
138
|
+
this.closing = true
|
|
139
|
+
|
|
127
140
|
/* shutdown stream */
|
|
128
141
|
if (this.stream !== null) {
|
|
129
142
|
await util.destroyStream(this.stream)
|
|
@@ -28,10 +28,10 @@ export default class SpeechFlowNodeX2XTrace extends SpeechFlowNode {
|
|
|
28
28
|
|
|
29
29
|
/* declare node configuration parameters */
|
|
30
30
|
this.configure({
|
|
31
|
-
type: { type: "string", pos: 0, val: "audio",
|
|
32
|
-
name: { type: "string", pos: 1, val: "trace"
|
|
31
|
+
type: { type: "string", pos: 0, val: "audio", match: /^(?:audio|text)$/ },
|
|
32
|
+
name: { type: "string", pos: 1, val: "trace" },
|
|
33
33
|
mode: { type: "string", pos: 2, val: "filter", match: /^(?:filter|sink)$/ },
|
|
34
|
-
dashboard: { type: "string", val: ""
|
|
34
|
+
dashboard: { type: "string", val: "" }
|
|
35
35
|
})
|
|
36
36
|
|
|
37
37
|
/* sanity check parameters */
|
|
@@ -20,10 +20,7 @@ export default class SpeechFlowNodeXIODevice extends SpeechFlowNode {
|
|
|
20
20
|
public static name = "xio-device"
|
|
21
21
|
|
|
22
22
|
/* internal state */
|
|
23
|
-
private io: PortAudio.IoStreamRead
|
|
24
|
-
| PortAudio.IoStreamWrite
|
|
25
|
-
| PortAudio.IoStreamDuplex
|
|
26
|
-
| null = null
|
|
23
|
+
private io: PortAudio.IoStreamRead | PortAudio.IoStreamWrite | PortAudio.IoStreamDuplex | null = null
|
|
27
24
|
|
|
28
25
|
/* construct node */
|
|
29
26
|
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
@@ -87,7 +84,7 @@ export default class SpeechFlowNodeXIODevice extends SpeechFlowNode {
|
|
|
87
84
|
return device
|
|
88
85
|
}
|
|
89
86
|
|
|
90
|
-
/* NOTICE: "
|
|
87
|
+
/* NOTICE: "naudiodon" actually implements Stream.{Readable,Writable,Duplex}, but
|
|
91
88
|
declares just its sub-interface NodeJS.{Readable,Writable,Duplex}Stream,
|
|
92
89
|
so it is correct to cast it back to Stream.{Readable,Writable,Duplex}
|
|
93
90
|
in the following device stream setup functions! */
|
|
@@ -211,7 +208,7 @@ export default class SpeechFlowNodeXIODevice extends SpeechFlowNode {
|
|
|
211
208
|
throw error
|
|
212
209
|
}
|
|
213
210
|
await Promise.race([
|
|
214
|
-
util.
|
|
211
|
+
util.timeout(2 * 1000, "PortAudio abort timeout"),
|
|
215
212
|
new Promise<void>((resolve) => {
|
|
216
213
|
this.io!.abort(() => {
|
|
217
214
|
resolve()
|
|
@@ -219,7 +216,7 @@ export default class SpeechFlowNodeXIODevice extends SpeechFlowNode {
|
|
|
219
216
|
}).catch(catchHandler)
|
|
220
217
|
])
|
|
221
218
|
await Promise.race([
|
|
222
|
-
util.
|
|
219
|
+
util.timeout(2 * 1000, "PortAudio quit timeout"),
|
|
223
220
|
new Promise<void>((resolve) => {
|
|
224
221
|
this.io!.quit(() => {
|
|
225
222
|
resolve()
|