speechflow 1.7.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/README.md +387 -119
- package/etc/claude.md +5 -5
- package/etc/speechflow.yaml +2 -2
- package/package.json +3 -3
- package/speechflow-cli/dst/speechflow-main-graph.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-main-graph.js +28 -5
- package/speechflow-cli/dst/speechflow-main-graph.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js +24 -4
- package/speechflow-cli/dst/speechflow-node-a2a-wav.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-a2t-google.d.ts +17 -0
- package/speechflow-cli/dst/speechflow-node-a2t-google.js +320 -0
- package/speechflow-cli/dst/speechflow-node-a2t-google.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-google.d.ts +15 -0
- package/speechflow-cli/dst/speechflow-node-t2a-google.js +218 -0
- package/speechflow-cli/dst/speechflow-node-t2a-google.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-openai.d.ts +15 -0
- package/speechflow-cli/dst/speechflow-node-t2a-openai.js +195 -0
- package/speechflow-cli/dst/speechflow-node-t2a-openai.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.d.ts +17 -0
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js +608 -0
- package/speechflow-cli/dst/speechflow-node-t2a-supertonic.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2t-amazon.js.map +1 -1
- package/speechflow-cli/dst/{speechflow-node-t2t-transformers.d.ts → speechflow-node-t2t-opus.d.ts} +1 -3
- package/speechflow-cli/dst/speechflow-node-t2t-opus.js +159 -0
- package/speechflow-cli/dst/speechflow-node-t2t-opus.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2t-profanity.d.ts +11 -0
- package/speechflow-cli/dst/speechflow-node-t2t-profanity.js +118 -0
- package/speechflow-cli/dst/speechflow-node-t2t-profanity.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2t-punctuation.d.ts +13 -0
- package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js +220 -0
- package/speechflow-cli/dst/speechflow-node-t2t-punctuation.js.map +1 -0
- package/speechflow-cli/dst/{speechflow-node-t2t-openai.d.ts → speechflow-node-t2t-spellcheck.d.ts} +2 -2
- package/speechflow-cli/dst/{speechflow-node-t2t-openai.js → speechflow-node-t2t-spellcheck.js} +47 -99
- package/speechflow-cli/dst/speechflow-node-t2t-spellcheck.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js +3 -6
- package/speechflow-cli/dst/speechflow-node-t2t-subtitle.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-t2t-summary.d.ts +16 -0
- package/speechflow-cli/dst/speechflow-node-t2t-summary.js +241 -0
- package/speechflow-cli/dst/speechflow-node-t2t-summary.js.map +1 -0
- package/speechflow-cli/dst/{speechflow-node-t2t-ollama.d.ts → speechflow-node-t2t-translate.d.ts} +2 -2
- package/speechflow-cli/dst/{speechflow-node-t2t-transformers.js → speechflow-node-t2t-translate.js} +53 -115
- package/speechflow-cli/dst/speechflow-node-t2t-translate.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-exec.d.ts +12 -0
- package/speechflow-cli/dst/speechflow-node-xio-exec.js +223 -0
- package/speechflow-cli/dst/speechflow-node-xio-exec.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-file.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-file.js +79 -66
- package/speechflow-cli/dst/speechflow-node-xio-file.js.map +1 -1
- package/speechflow-cli/dst/speechflow-node-xio-vban.d.ts +17 -0
- package/speechflow-cli/dst/speechflow-node-xio-vban.js +330 -0
- package/speechflow-cli/dst/speechflow-node-xio-vban.js.map +1 -0
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.d.ts +39 -0
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.js +500 -0
- package/speechflow-cli/dst/speechflow-node-xio-webrtc.js.map +1 -0
- package/speechflow-cli/dst/speechflow-util-audio.js +4 -5
- package/speechflow-cli/dst/speechflow-util-audio.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-error.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-util-error.js +5 -0
- package/speechflow-cli/dst/speechflow-util-error.js.map +1 -1
- package/speechflow-cli/dst/speechflow-util-llm.d.ts +35 -0
- package/speechflow-cli/dst/speechflow-util-llm.js +363 -0
- package/speechflow-cli/dst/speechflow-util-llm.js.map +1 -0
- package/speechflow-cli/dst/speechflow-util.d.ts +1 -0
- package/speechflow-cli/dst/speechflow-util.js +1 -0
- package/speechflow-cli/dst/speechflow-util.js.map +1 -1
- package/speechflow-cli/etc/oxlint.jsonc +2 -1
- package/speechflow-cli/package.json +34 -17
- package/speechflow-cli/src/lib.d.ts +5 -0
- package/speechflow-cli/src/speechflow-main-graph.ts +31 -5
- package/speechflow-cli/src/speechflow-node-a2a-wav.ts +24 -4
- package/speechflow-cli/src/speechflow-node-a2t-google.ts +322 -0
- package/speechflow-cli/src/speechflow-node-t2a-google.ts +206 -0
- package/speechflow-cli/src/speechflow-node-t2a-openai.ts +179 -0
- package/speechflow-cli/src/speechflow-node-t2a-supertonic.ts +701 -0
- package/speechflow-cli/src/speechflow-node-t2t-amazon.ts +2 -1
- package/speechflow-cli/src/speechflow-node-t2t-opus.ts +136 -0
- package/speechflow-cli/src/speechflow-node-t2t-profanity.ts +93 -0
- package/speechflow-cli/src/speechflow-node-t2t-punctuation.ts +201 -0
- package/speechflow-cli/src/{speechflow-node-t2t-openai.ts → speechflow-node-t2t-spellcheck.ts} +48 -107
- package/speechflow-cli/src/speechflow-node-t2t-subtitle.ts +3 -6
- package/speechflow-cli/src/speechflow-node-t2t-summary.ts +229 -0
- package/speechflow-cli/src/speechflow-node-t2t-translate.ts +181 -0
- package/speechflow-cli/src/speechflow-node-xio-exec.ts +210 -0
- package/speechflow-cli/src/speechflow-node-xio-file.ts +92 -79
- package/speechflow-cli/src/speechflow-node-xio-vban.ts +325 -0
- package/speechflow-cli/src/speechflow-node-xio-webrtc.ts +533 -0
- package/speechflow-cli/src/speechflow-util-audio.ts +5 -5
- package/speechflow-cli/src/speechflow-util-error.ts +9 -0
- package/speechflow-cli/src/speechflow-util-llm.ts +367 -0
- package/speechflow-cli/src/speechflow-util.ts +1 -0
- package/speechflow-ui-db/package.json +9 -9
- package/speechflow-ui-st/package.json +9 -9
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js +0 -293
- package/speechflow-cli/dst/speechflow-node-t2t-ollama.js.map +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-openai.js.map +0 -1
- package/speechflow-cli/dst/speechflow-node-t2t-transformers.js.map +0 -1
- package/speechflow-cli/src/speechflow-node-t2t-ollama.ts +0 -281
- package/speechflow-cli/src/speechflow-node-t2t-transformers.ts +0 -247
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import path from "node:path"
|
|
9
|
+
import Stream from "node:stream"
|
|
10
|
+
|
|
11
|
+
/* external dependencies */
|
|
12
|
+
import * as Transformers from "@huggingface/transformers"
|
|
13
|
+
|
|
14
|
+
/* internal dependencies */
|
|
15
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
16
|
+
import * as util from "./speechflow-util"
|
|
17
|
+
|
|
18
|
+
/* SpeechFlow node for OPUS text-to-text translation */
|
|
19
|
+
export default class SpeechFlowNodeT2TOPUS extends SpeechFlowNode {
|
|
20
|
+
/* declare official node name */
|
|
21
|
+
public static name = "t2t-opus"
|
|
22
|
+
|
|
23
|
+
/* internal state */
|
|
24
|
+
private translator: Transformers.TranslationPipeline | null = null
|
|
25
|
+
|
|
26
|
+
/* construct node */
|
|
27
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
28
|
+
super(id, cfg, opts, args)
|
|
29
|
+
|
|
30
|
+
/* declare node configuration parameters */
|
|
31
|
+
this.configure({
|
|
32
|
+
src: { type: "string", pos: 0, val: "de", match: /^(?:de|en)$/ },
|
|
33
|
+
dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ }
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
/* sanity check parameters */
|
|
37
|
+
if (this.params.src === this.params.dst)
|
|
38
|
+
throw new Error("source and destination languages cannot be the same")
|
|
39
|
+
|
|
40
|
+
/* declare node input/output format */
|
|
41
|
+
this.input = "text"
|
|
42
|
+
this.output = "text"
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/* open node */
|
|
46
|
+
async open () {
|
|
47
|
+
/* track download progress when instantiating Transformers engine and model */
|
|
48
|
+
const model = `onnx-community/opus-mt-${this.params.src}-${this.params.dst}`
|
|
49
|
+
const progressState = new Map<string, number>()
|
|
50
|
+
const progressCallback: Transformers.ProgressCallback = (progress: any) => {
|
|
51
|
+
let artifact = model
|
|
52
|
+
if (typeof progress.file === "string")
|
|
53
|
+
artifact += `:${progress.file}`
|
|
54
|
+
let percent = 0
|
|
55
|
+
if (typeof progress.loaded === "number" && typeof progress.total === "number")
|
|
56
|
+
percent = (progress.loaded / progress.total) * 100
|
|
57
|
+
else if (typeof progress.progress === "number")
|
|
58
|
+
percent = progress.progress
|
|
59
|
+
if (percent > 0)
|
|
60
|
+
progressState.set(artifact, percent)
|
|
61
|
+
}
|
|
62
|
+
const interval = setInterval(() => {
|
|
63
|
+
for (const [ artifact, percent ] of progressState) {
|
|
64
|
+
this.log("info", `downloaded ${percent.toFixed(2)}% of artifact "${artifact}"`)
|
|
65
|
+
if (percent >= 100.0)
|
|
66
|
+
progressState.delete(artifact)
|
|
67
|
+
}
|
|
68
|
+
}, 1000)
|
|
69
|
+
|
|
70
|
+
/* instantiate Transformers engine and model */
|
|
71
|
+
const pipeline = Transformers.pipeline("translation", model, {
|
|
72
|
+
cache_dir: path.join(this.config.cacheDir, "transformers"),
|
|
73
|
+
dtype: "q4",
|
|
74
|
+
device: "auto",
|
|
75
|
+
progress_callback: progressCallback
|
|
76
|
+
})
|
|
77
|
+
this.translator = await pipeline
|
|
78
|
+
if (this.translator === null)
|
|
79
|
+
throw new Error("failed to instantiate translator pipeline")
|
|
80
|
+
|
|
81
|
+
/* clear progress interval again */
|
|
82
|
+
clearInterval(interval)
|
|
83
|
+
|
|
84
|
+
/* provide text-to-text translation */
|
|
85
|
+
const translate = async (text: string) => {
|
|
86
|
+
const result = await this.translator!(text)
|
|
87
|
+
const single = Array.isArray(result) ? result[0] : result
|
|
88
|
+
return (single as Transformers.TranslationSingle).translation_text
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/* establish a duplex stream and connect it to Transformers */
|
|
92
|
+
this.stream = new Stream.Transform({
|
|
93
|
+
readableObjectMode: true,
|
|
94
|
+
writableObjectMode: true,
|
|
95
|
+
decodeStrings: false,
|
|
96
|
+
highWaterMark: 1,
|
|
97
|
+
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
98
|
+
if (Buffer.isBuffer(chunk.payload))
|
|
99
|
+
callback(new Error("invalid chunk payload type"))
|
|
100
|
+
else if (chunk.payload === "") {
|
|
101
|
+
this.push(chunk)
|
|
102
|
+
callback()
|
|
103
|
+
}
|
|
104
|
+
else {
|
|
105
|
+
translate(chunk.payload).then((payload) => {
|
|
106
|
+
const chunkNew = chunk.clone()
|
|
107
|
+
chunkNew.payload = payload
|
|
108
|
+
this.push(chunkNew)
|
|
109
|
+
callback()
|
|
110
|
+
}).catch((error: unknown) => {
|
|
111
|
+
callback(util.ensureError(error))
|
|
112
|
+
})
|
|
113
|
+
}
|
|
114
|
+
},
|
|
115
|
+
final (callback) {
|
|
116
|
+
callback()
|
|
117
|
+
}
|
|
118
|
+
})
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/* close node */
|
|
122
|
+
async close () {
|
|
123
|
+
/* shutdown stream */
|
|
124
|
+
if (this.stream !== null) {
|
|
125
|
+
await util.destroyStream(this.stream)
|
|
126
|
+
this.stream = null
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/* shutdown Transformers */
|
|
130
|
+
if (this.translator !== null) {
|
|
131
|
+
this.translator.dispose()
|
|
132
|
+
this.translator = null
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
|
|
10
|
+
/* external dependencies */
|
|
11
|
+
import BadWordsNext from "bad-words-next"
|
|
12
|
+
import en from "bad-words-next/lib/en"
|
|
13
|
+
import de from "bad-words-next/lib/de"
|
|
14
|
+
|
|
15
|
+
/* internal dependencies */
|
|
16
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
17
|
+
import * as util from "./speechflow-util"
|
|
18
|
+
|
|
19
|
+
/* language data mapping */
|
|
20
|
+
const langData: { [ lang: string ]: typeof en } = { en, de }
|
|
21
|
+
|
|
22
|
+
/* SpeechFlow node for text-to-text profanity filtering */
|
|
23
|
+
export default class SpeechFlowNodeT2TProfanity extends SpeechFlowNode {
|
|
24
|
+
/* declare official node name */
|
|
25
|
+
public static name = "t2t-profanity"
|
|
26
|
+
|
|
27
|
+
/* construct node */
|
|
28
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
29
|
+
super(id, cfg, opts, args)
|
|
30
|
+
|
|
31
|
+
/* declare node configuration parameters */
|
|
32
|
+
this.configure({
|
|
33
|
+
lang: { type: "string", val: "en", match: /^(?:en|de)$/ },
|
|
34
|
+
placeholder: { type: "string", val: "***" },
|
|
35
|
+
mode: { type: "string", val: "replace", match: /^(?:replace|repeat)$/ }
|
|
36
|
+
})
|
|
37
|
+
|
|
38
|
+
/* declare node input/output format */
|
|
39
|
+
this.input = "text"
|
|
40
|
+
this.output = "text"
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/* open node */
|
|
44
|
+
async open () {
|
|
45
|
+
/* create profanity filter instance */
|
|
46
|
+
const filter = util.run("creating profanity filter", () =>
|
|
47
|
+
new BadWordsNext({
|
|
48
|
+
data: langData[this.params.lang],
|
|
49
|
+
placeholder: this.params.placeholder,
|
|
50
|
+
placeholderMode: this.params.mode as "replace" | "repeat"
|
|
51
|
+
})
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
/* apply profanity filtering */
|
|
55
|
+
const censor = (text: string): string =>
|
|
56
|
+
filter.filter(text)
|
|
57
|
+
|
|
58
|
+
/* establish a transform stream and connect it to profanity filtering */
|
|
59
|
+
this.stream = new Stream.Transform({
|
|
60
|
+
readableObjectMode: true,
|
|
61
|
+
writableObjectMode: true,
|
|
62
|
+
decodeStrings: false,
|
|
63
|
+
highWaterMark: 1,
|
|
64
|
+
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
65
|
+
if (Buffer.isBuffer(chunk.payload))
|
|
66
|
+
callback(new Error("invalid chunk payload type"))
|
|
67
|
+
else if (chunk.payload === "") {
|
|
68
|
+
this.push(chunk)
|
|
69
|
+
callback()
|
|
70
|
+
}
|
|
71
|
+
else {
|
|
72
|
+
const payload = censor(chunk.payload)
|
|
73
|
+
const chunkNew = chunk.clone()
|
|
74
|
+
chunkNew.payload = payload
|
|
75
|
+
this.push(chunkNew)
|
|
76
|
+
callback()
|
|
77
|
+
}
|
|
78
|
+
},
|
|
79
|
+
final (callback) {
|
|
80
|
+
callback()
|
|
81
|
+
}
|
|
82
|
+
})
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/* close node */
|
|
86
|
+
async close () {
|
|
87
|
+
/* shutdown stream */
|
|
88
|
+
if (this.stream !== null) {
|
|
89
|
+
await util.destroyStream(this.stream)
|
|
90
|
+
this.stream = null
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
/*
|
|
2
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* standard dependencies */
|
|
8
|
+
import Stream from "node:stream"
|
|
9
|
+
|
|
10
|
+
/* internal dependencies */
|
|
11
|
+
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
12
|
+
import * as util from "./speechflow-util"
|
|
13
|
+
import { LLM, type LLMCompleteMessage } from "./speechflow-util-llm"
|
|
14
|
+
|
|
15
|
+
/* internal utility types */
|
|
16
|
+
type ConfigEntry = { systemPrompt: string, chat: LLMCompleteMessage[] }
|
|
17
|
+
type Config = { [ key: string ]: ConfigEntry }
|
|
18
|
+
|
|
19
|
+
/* SpeechFlow node for text-to-text punctuation restoration */
|
|
20
|
+
export default class SpeechFlowNodeT2TPunctuation extends SpeechFlowNode {
|
|
21
|
+
/* declare official node name */
|
|
22
|
+
public static name = "t2t-punctuation"
|
|
23
|
+
|
|
24
|
+
/* internal state */
|
|
25
|
+
private llm: LLM | null = null
|
|
26
|
+
|
|
27
|
+
/* internal LLM setup */
|
|
28
|
+
private setup: Config = {
|
|
29
|
+
/* English (EN) punctuation restoration */
|
|
30
|
+
"en": {
|
|
31
|
+
systemPrompt:
|
|
32
|
+
"You are a punctuation restoration specialist for English.\n" +
|
|
33
|
+
"Your task is to add missing punctuation to unpunctuated text.\n" +
|
|
34
|
+
"Output only the punctuated text.\n" +
|
|
35
|
+
"Do NOT use markdown.\n" +
|
|
36
|
+
"Do NOT give any explanations.\n" +
|
|
37
|
+
"Do NOT give any introduction.\n" +
|
|
38
|
+
"Do NOT give any comments.\n" +
|
|
39
|
+
"Do NOT give any preamble.\n" +
|
|
40
|
+
"Do NOT give any prolog.\n" +
|
|
41
|
+
"Do NOT give any epilog.\n" +
|
|
42
|
+
"Do NOT change the words.\n" +
|
|
43
|
+
"Do NOT add or remove words.\n" +
|
|
44
|
+
"Do NOT fix spelling errors.\n" +
|
|
45
|
+
"Do NOT change the grammar.\n" +
|
|
46
|
+
"Do NOT use synonyms.\n" +
|
|
47
|
+
"Keep all original words exactly as they are.\n" +
|
|
48
|
+
"Add periods at sentence endings.\n" +
|
|
49
|
+
"Add commas where appropriate.\n" +
|
|
50
|
+
"Add question marks for questions.\n" +
|
|
51
|
+
"Add exclamation marks where appropriate.\n" +
|
|
52
|
+
"Add colons and semicolons where appropriate.\n" +
|
|
53
|
+
"Capitalize first letters of sentences.\n" +
|
|
54
|
+
"The text you have to punctuate is:\n",
|
|
55
|
+
chat: [
|
|
56
|
+
{ role: "user", content: "hello how are you today" },
|
|
57
|
+
{ role: "assistant", content: "Hello, how are you today?" },
|
|
58
|
+
{ role: "user", content: "i went to the store and bought some milk eggs and bread" },
|
|
59
|
+
{ role: "assistant", content: "I went to the store and bought some milk, eggs, and bread." },
|
|
60
|
+
{ role: "user", content: "what time is it i need to leave soon" },
|
|
61
|
+
{ role: "assistant", content: "What time is it? I need to leave soon." },
|
|
62
|
+
{ role: "user", content: "thats amazing i cant believe it worked" },
|
|
63
|
+
{ role: "assistant", content: "That's amazing! I can't believe it worked!" }
|
|
64
|
+
]
|
|
65
|
+
},
|
|
66
|
+
|
|
67
|
+
/* German (DE) punctuation restoration */
|
|
68
|
+
"de": {
|
|
69
|
+
systemPrompt:
|
|
70
|
+
"Du bist ein Spezialist für Zeichensetzung im Deutschen.\n" +
|
|
71
|
+
"Deine Aufgabe ist es, fehlende Satzzeichen in unpunktierten Text einzufügen.\n" +
|
|
72
|
+
"Gib nur den punktierten Text aus.\n" +
|
|
73
|
+
"Benutze KEIN Markdown.\n" +
|
|
74
|
+
"Gib KEINE Erklärungen.\n" +
|
|
75
|
+
"Gib KEINE Einleitung.\n" +
|
|
76
|
+
"Gib KEINE Kommentare.\n" +
|
|
77
|
+
"Gib KEINE Preamble.\n" +
|
|
78
|
+
"Gib KEINEN Prolog.\n" +
|
|
79
|
+
"Gib KEINEN Epilog.\n" +
|
|
80
|
+
"Ändere NICHT die Wörter.\n" +
|
|
81
|
+
"Füge KEINE Wörter hinzu oder entferne welche.\n" +
|
|
82
|
+
"Korrigiere KEINE Rechtschreibfehler.\n" +
|
|
83
|
+
"Ändere NICHT die Grammatik.\n" +
|
|
84
|
+
"Verwende KEINE Synonyme.\n" +
|
|
85
|
+
"Behalte alle ursprünglichen Wörter genau bei.\n" +
|
|
86
|
+
"Füge Punkte am Satzende ein.\n" +
|
|
87
|
+
"Füge Kommas an passenden Stellen ein.\n" +
|
|
88
|
+
"Füge Fragezeichen bei Fragen ein.\n" +
|
|
89
|
+
"Füge Ausrufezeichen an passenden Stellen ein.\n" +
|
|
90
|
+
"Füge Doppelpunkte und Semikolons an passenden Stellen ein.\n" +
|
|
91
|
+
"Großschreibe die ersten Buchstaben von Sätzen.\n" +
|
|
92
|
+
"Der von dir zu punktierende Text ist:\n",
|
|
93
|
+
chat: [
|
|
94
|
+
{ role: "user", content: "hallo wie geht es dir heute" },
|
|
95
|
+
{ role: "assistant", content: "Hallo, wie geht es dir heute?" },
|
|
96
|
+
{ role: "user", content: "ich bin in den laden gegangen und habe milch eier und brot gekauft" },
|
|
97
|
+
{ role: "assistant", content: "Ich bin in den Laden gegangen und habe Milch, Eier und Brot gekauft." },
|
|
98
|
+
{ role: "user", content: "wie spät ist es ich muss bald los" },
|
|
99
|
+
{ role: "assistant", content: "Wie spät ist es? Ich muss bald los." },
|
|
100
|
+
{ role: "user", content: "das ist fantastisch ich kann nicht glauben dass es funktioniert hat" },
|
|
101
|
+
{ role: "assistant", content: "Das ist fantastisch! Ich kann nicht glauben, dass es funktioniert hat!" }
|
|
102
|
+
]
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/* construct node */
|
|
107
|
+
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
108
|
+
super(id, cfg, opts, args)
|
|
109
|
+
|
|
110
|
+
/* declare node configuration parameters */
|
|
111
|
+
this.configure({
|
|
112
|
+
provider: { type: "string", val: "ollama", match: /^(?:openai|anthropic|google|ollama|transformers)$/ },
|
|
113
|
+
api: { type: "string", val: "http://127.0.0.1:11434", match: /^https?:\/\/.+?(:\d+)?$/ },
|
|
114
|
+
model: { type: "string", val: "gemma3:4b-it-q4_K_M", match: /^.+$/ },
|
|
115
|
+
key: { type: "string", val: "", match: /^.*$/ },
|
|
116
|
+
lang: { type: "string", pos: 0, val: "en", match: /^(?:de|en)$/ }
|
|
117
|
+
})
|
|
118
|
+
|
|
119
|
+
/* tell effective mode */
|
|
120
|
+
this.log("info", `punctuation restoration for language "${this.params.lang}" ` +
|
|
121
|
+
`via ${this.params.provider} LLM (model: ${this.params.model})`)
|
|
122
|
+
|
|
123
|
+
/* declare node input/output format */
|
|
124
|
+
this.input = "text"
|
|
125
|
+
this.output = "text"
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/* open node */
|
|
129
|
+
async open () {
|
|
130
|
+
/* instantiate LLM */
|
|
131
|
+
this.llm = new LLM({
|
|
132
|
+
provider: this.params.provider,
|
|
133
|
+
api: this.params.api,
|
|
134
|
+
model: this.params.model,
|
|
135
|
+
key: this.params.key,
|
|
136
|
+
temperature: 0.7,
|
|
137
|
+
topP: 0.5
|
|
138
|
+
})
|
|
139
|
+
this.llm.on("log", (level: string, message: string) => {
|
|
140
|
+
this.log(level as "info" | "warning" | "error", message)
|
|
141
|
+
})
|
|
142
|
+
await this.llm.open()
|
|
143
|
+
|
|
144
|
+
/* provide text-to-text punctuation restoration */
|
|
145
|
+
const llm = this.llm!
|
|
146
|
+
const punctuate = async (text: string) => {
|
|
147
|
+
const cfg = this.setup[this.params.lang]
|
|
148
|
+
if (!cfg)
|
|
149
|
+
throw new Error(`unsupported language: ${this.params.lang}`)
|
|
150
|
+
return llm.complete({
|
|
151
|
+
system: cfg.systemPrompt,
|
|
152
|
+
messages: cfg.chat,
|
|
153
|
+
prompt: text
|
|
154
|
+
})
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
/* establish a transform stream for punctuation restoration */
|
|
158
|
+
this.stream = new Stream.Transform({
|
|
159
|
+
readableObjectMode: true,
|
|
160
|
+
writableObjectMode: true,
|
|
161
|
+
decodeStrings: false,
|
|
162
|
+
highWaterMark: 1,
|
|
163
|
+
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
164
|
+
if (Buffer.isBuffer(chunk.payload))
|
|
165
|
+
callback(new Error("invalid chunk payload type"))
|
|
166
|
+
else if (chunk.payload === "") {
|
|
167
|
+
this.push(chunk)
|
|
168
|
+
callback()
|
|
169
|
+
}
|
|
170
|
+
else {
|
|
171
|
+
punctuate(chunk.payload).then((payload) => {
|
|
172
|
+
const chunkNew = chunk.clone()
|
|
173
|
+
chunkNew.payload = payload
|
|
174
|
+
this.push(chunkNew)
|
|
175
|
+
callback()
|
|
176
|
+
}).catch((error: unknown) => {
|
|
177
|
+
callback(util.ensureError(error))
|
|
178
|
+
})
|
|
179
|
+
}
|
|
180
|
+
},
|
|
181
|
+
final (callback) {
|
|
182
|
+
callback()
|
|
183
|
+
}
|
|
184
|
+
})
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/* close node */
|
|
188
|
+
async close () {
|
|
189
|
+
/* shutdown stream */
|
|
190
|
+
if (this.stream !== null) {
|
|
191
|
+
await util.destroyStream(this.stream)
|
|
192
|
+
this.stream = null
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/* shutdown LLM */
|
|
196
|
+
if (this.llm !== null) {
|
|
197
|
+
await this.llm.close()
|
|
198
|
+
this.llm = null
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
}
|
package/speechflow-cli/src/{speechflow-node-t2t-openai.ts → speechflow-node-t2t-spellcheck.ts}
RENAMED
|
@@ -5,31 +5,29 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
/* standard dependencies */
|
|
8
|
-
import Stream
|
|
9
|
-
|
|
10
|
-
/* external dependencies */
|
|
11
|
-
import OpenAI from "openai"
|
|
8
|
+
import Stream from "node:stream"
|
|
12
9
|
|
|
13
10
|
/* internal dependencies */
|
|
14
11
|
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
15
12
|
import * as util from "./speechflow-util"
|
|
13
|
+
import { LLM, type LLMCompleteMessage } from "./speechflow-util-llm"
|
|
16
14
|
|
|
17
15
|
/* internal utility types */
|
|
18
|
-
type ConfigEntry = { systemPrompt: string, chat:
|
|
16
|
+
type ConfigEntry = { systemPrompt: string, chat: LLMCompleteMessage[] }
|
|
19
17
|
type Config = { [ key: string ]: ConfigEntry }
|
|
20
18
|
|
|
21
|
-
/* SpeechFlow node for
|
|
22
|
-
export default class
|
|
19
|
+
/* SpeechFlow node for LLM-based text-to-text spellchecking */
|
|
20
|
+
export default class SpeechFlowNodeT2TSpellcheck extends SpeechFlowNode {
|
|
23
21
|
/* declare official node name */
|
|
24
|
-
public static name = "t2t-
|
|
22
|
+
public static name = "t2t-spellcheck"
|
|
25
23
|
|
|
26
24
|
/* internal state */
|
|
27
|
-
private
|
|
25
|
+
private llm: LLM | null = null
|
|
28
26
|
|
|
29
27
|
/* internal LLM setup */
|
|
30
28
|
private setup: Config = {
|
|
31
|
-
/* English (EN) spellchecking
|
|
32
|
-
"en
|
|
29
|
+
/* English (EN) spellchecking */
|
|
30
|
+
"en": {
|
|
33
31
|
systemPrompt:
|
|
34
32
|
"You are a proofreader and spellchecker for English.\n" +
|
|
35
33
|
"Output only the corrected text.\n" +
|
|
@@ -59,8 +57,8 @@ export default class SpeechFlowNodeT2TOpenAI extends SpeechFlowNode {
|
|
|
59
57
|
]
|
|
60
58
|
},
|
|
61
59
|
|
|
62
|
-
/* German (DE) spellchecking
|
|
63
|
-
"de
|
|
60
|
+
/* German (DE) spellchecking */
|
|
61
|
+
"de": {
|
|
64
62
|
systemPrompt:
|
|
65
63
|
"Du bist ein Korrekturleser und Rechtschreibprüfer für Deutsch.\n" +
|
|
66
64
|
"Gib nur den korrigierten Text aus.\n" +
|
|
@@ -89,56 +87,6 @@ export default class SpeechFlowNodeT2TOpenAI extends SpeechFlowNode {
|
|
|
89
87
|
{ role: "user", content: "Das Leben einfach großartig aber ich bin hungrig." },
|
|
90
88
|
{ role: "assistant", content: "Das Leben ist einfach großartig, aber ich bin hungrig." }
|
|
91
89
|
]
|
|
92
|
-
},
|
|
93
|
-
|
|
94
|
-
/* English (EN) to German (DE) translation */
|
|
95
|
-
"en-de": {
|
|
96
|
-
systemPrompt:
|
|
97
|
-
"You are a translator.\n" +
|
|
98
|
-
"Output only the requested text.\n" +
|
|
99
|
-
"Do not use markdown.\n" +
|
|
100
|
-
"Do not chat.\n" +
|
|
101
|
-
"Do not show any explanations.\n" +
|
|
102
|
-
"Do not show any introduction.\n" +
|
|
103
|
-
"Do not show any preamble.\n" +
|
|
104
|
-
"Do not show any prolog.\n" +
|
|
105
|
-
"Do not show any epilog.\n" +
|
|
106
|
-
"Get to the point.\n" +
|
|
107
|
-
"Preserve the original meaning, tone, and nuance.\n" +
|
|
108
|
-
"Directly translate text from English (EN) to fluent and natural German (DE) language.\n",
|
|
109
|
-
chat: [
|
|
110
|
-
{ role: "user", content: "I love my wife." },
|
|
111
|
-
{ role: "assistant", content: "Ich liebe meine Frau." },
|
|
112
|
-
{ role: "user", content: "The weather is wonderful." },
|
|
113
|
-
{ role: "assistant", content: "Das Wetter ist wunderschön." },
|
|
114
|
-
{ role: "user", content: "The life is awesome." },
|
|
115
|
-
{ role: "assistant", content: "Das Leben ist einfach großartig." }
|
|
116
|
-
]
|
|
117
|
-
},
|
|
118
|
-
|
|
119
|
-
/* German (DE) to English (EN) translation */
|
|
120
|
-
"de-en": {
|
|
121
|
-
systemPrompt:
|
|
122
|
-
"You are a translator.\n" +
|
|
123
|
-
"Output only the requested text.\n" +
|
|
124
|
-
"Do not use markdown.\n" +
|
|
125
|
-
"Do not chat.\n" +
|
|
126
|
-
"Do not show any explanations.\n" +
|
|
127
|
-
"Do not show any introduction.\n" +
|
|
128
|
-
"Do not show any preamble.\n" +
|
|
129
|
-
"Do not show any prolog.\n" +
|
|
130
|
-
"Do not show any epilog.\n" +
|
|
131
|
-
"Get to the point.\n" +
|
|
132
|
-
"Preserve the original meaning, tone, and nuance.\n" +
|
|
133
|
-
"Directly translate text from German (DE) to fluent and natural English (EN) language.\n",
|
|
134
|
-
chat: [
|
|
135
|
-
{ role: "user", content: "Ich liebe meine Frau." },
|
|
136
|
-
{ role: "assistant", content: "I love my wife." },
|
|
137
|
-
{ role: "user", content: "Das Wetter ist wunderschön." },
|
|
138
|
-
{ role: "assistant", content: "The weather is wonderful." },
|
|
139
|
-
{ role: "user", content: "Das Leben ist einfach großartig." },
|
|
140
|
-
{ role: "assistant", content: "The life is awesome." }
|
|
141
|
-
]
|
|
142
90
|
}
|
|
143
91
|
}
|
|
144
92
|
|
|
@@ -148,19 +96,16 @@ export default class SpeechFlowNodeT2TOpenAI extends SpeechFlowNode {
|
|
|
148
96
|
|
|
149
97
|
/* declare node configuration parameters */
|
|
150
98
|
this.configure({
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
99
|
+
lang: { type: "string", pos: 0, val: "en", match: /^(?:de|en)$/ },
|
|
100
|
+
provider: { type: "string", val: "ollama", match: /^(?:openai|anthropic|google|ollama|transformers)$/ },
|
|
101
|
+
api: { type: "string", val: "http://127.0.0.1:11434", match: /^https?:\/\/.+?(:\d+)?$/ },
|
|
102
|
+
model: { type: "string", val: "gemma3:4b-it-q4_K_M", match: /^.+$/ },
|
|
103
|
+
key: { type: "string", val: "", match: /^.*$/ }
|
|
156
104
|
})
|
|
157
105
|
|
|
158
106
|
/* tell effective mode */
|
|
159
|
-
|
|
160
|
-
this.
|
|
161
|
-
else
|
|
162
|
-
this.log("info", `OpenAI: operation mode: translation from language "${this.params.src}"` +
|
|
163
|
-
` to language "${this.params.dst}"`)
|
|
107
|
+
this.log("info", `spellchecking language "${this.params.lang}" ` +
|
|
108
|
+
`via ${this.params.provider} LLM (model: ${this.params.model})`)
|
|
164
109
|
|
|
165
110
|
/* declare node input/output format */
|
|
166
111
|
this.input = "text"
|
|
@@ -169,39 +114,34 @@ export default class SpeechFlowNodeT2TOpenAI extends SpeechFlowNode {
|
|
|
169
114
|
|
|
170
115
|
/* open node */
|
|
171
116
|
async open () {
|
|
172
|
-
/*
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
timeout: 30000
|
|
117
|
+
/* instantiate LLM */
|
|
118
|
+
this.llm = new LLM({
|
|
119
|
+
provider: this.params.provider,
|
|
120
|
+
api: this.params.api,
|
|
121
|
+
model: this.params.model,
|
|
122
|
+
key: this.params.key,
|
|
123
|
+
temperature: 0.7,
|
|
124
|
+
topP: 0.5
|
|
181
125
|
})
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
126
|
+
this.llm.on("log", (level: string, message: string) => {
|
|
127
|
+
this.log(level as "info" | "warning" | "error", message)
|
|
128
|
+
})
|
|
129
|
+
await this.llm.open()
|
|
130
|
+
|
|
131
|
+
/* provide text-to-text spellchecking */
|
|
132
|
+
const llm = this.llm!
|
|
133
|
+
const spellcheck = async (text: string) => {
|
|
134
|
+
const cfg = this.setup[this.params.lang]
|
|
135
|
+
if (!cfg)
|
|
136
|
+
throw new Error(`unsupported language: ${this.params.lang}`)
|
|
137
|
+
return llm.complete({
|
|
138
|
+
system: cfg.systemPrompt,
|
|
139
|
+
messages: cfg.chat,
|
|
140
|
+
prompt: text
|
|
197
141
|
})
|
|
198
|
-
const content = completion?.choices?.[0]?.message?.content
|
|
199
|
-
if (!content)
|
|
200
|
-
throw new Error("OpenAI API returned empty content")
|
|
201
|
-
return content
|
|
202
142
|
}
|
|
203
143
|
|
|
204
|
-
/* establish a
|
|
144
|
+
/* establish a transform stream and connect it to LLM */
|
|
205
145
|
this.stream = new Stream.Transform({
|
|
206
146
|
readableObjectMode: true,
|
|
207
147
|
writableObjectMode: true,
|
|
@@ -215,7 +155,7 @@ export default class SpeechFlowNodeT2TOpenAI extends SpeechFlowNode {
|
|
|
215
155
|
callback()
|
|
216
156
|
}
|
|
217
157
|
else {
|
|
218
|
-
|
|
158
|
+
spellcheck(chunk.payload).then((payload) => {
|
|
219
159
|
const chunkNew = chunk.clone()
|
|
220
160
|
chunkNew.payload = payload
|
|
221
161
|
this.push(chunkNew)
|
|
@@ -239,9 +179,10 @@ export default class SpeechFlowNodeT2TOpenAI extends SpeechFlowNode {
|
|
|
239
179
|
this.stream = null
|
|
240
180
|
}
|
|
241
181
|
|
|
242
|
-
/* shutdown
|
|
243
|
-
if (this.
|
|
244
|
-
this.
|
|
182
|
+
/* shutdown LLM */
|
|
183
|
+
if (this.llm !== null) {
|
|
184
|
+
await this.llm.close()
|
|
185
|
+
this.llm = null
|
|
186
|
+
}
|
|
245
187
|
}
|
|
246
188
|
}
|
|
247
|
-
|