speechflow 0.9.8 → 0.9.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/LICENSE.txt +674 -0
- package/README.md +66 -16
- package/dst/speechflow-node-a2a-vad.d.ts +16 -0
- package/dst/speechflow-node-a2a-vad.js +431 -0
- package/dst/speechflow-node-t2a-kokoro.d.ts +13 -0
- package/dst/speechflow-node-t2a-kokoro.js +147 -0
- package/dst/speechflow-node-t2t-gemma.js +23 -3
- package/dst/speechflow-node-t2t-ollama.d.ts +13 -0
- package/dst/speechflow-node-t2t-ollama.js +245 -0
- package/dst/speechflow-node-t2t-openai.d.ts +13 -0
- package/dst/speechflow-node-t2t-openai.js +225 -0
- package/dst/speechflow-node-t2t-opus.js +1 -1
- package/dst/speechflow-node-t2t-transformers.d.ts +14 -0
- package/dst/speechflow-node-t2t-transformers.js +260 -0
- package/dst/speechflow-node-x2x-trace.js +2 -2
- package/dst/speechflow.js +86 -40
- package/etc/speechflow.yaml +9 -2
- package/etc/stx.conf +1 -1
- package/package.json +7 -6
- package/src/speechflow-node-t2a-kokoro.ts +160 -0
- package/src/{speechflow-node-t2t-gemma.ts → speechflow-node-t2t-ollama.ts} +44 -10
- package/src/speechflow-node-t2t-openai.ts +246 -0
- package/src/speechflow-node-t2t-transformers.ts +244 -0
- package/src/speechflow-node-x2x-trace.ts +2 -2
- package/src/speechflow.ts +86 -40
- package/src/speechflow-node-t2t-opus.ts +0 -111
package/src/speechflow.ts
CHANGED
|
@@ -13,6 +13,7 @@ import { EventEmitter } from "node:events"
|
|
|
13
13
|
import { DateTime } from "luxon"
|
|
14
14
|
import CLIio from "cli-io"
|
|
15
15
|
import yargs from "yargs"
|
|
16
|
+
import { hideBin } from "yargs/helpers"
|
|
16
17
|
import jsYAML from "js-yaml"
|
|
17
18
|
import FlowLink from "flowlink"
|
|
18
19
|
import objectPath from "object-path"
|
|
@@ -36,6 +37,7 @@ let cli: CLIio | null = null
|
|
|
36
37
|
})
|
|
37
38
|
|
|
38
39
|
/* parse command-line arguments */
|
|
40
|
+
const coerce = (arg: string) => Array.isArray(arg) ? arg[arg.length - 1] : arg
|
|
39
41
|
const args = await yargs()
|
|
40
42
|
/* eslint @stylistic/indent: off */
|
|
41
43
|
.usage(
|
|
@@ -49,28 +51,69 @@ let cli: CLIio | null = null
|
|
|
49
51
|
"[-c|--config <id>@<yaml-config-file>] " +
|
|
50
52
|
"[<argument> [...]]"
|
|
51
53
|
)
|
|
52
|
-
.
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
.
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
54
|
+
.option("V", {
|
|
55
|
+
alias: "version",
|
|
56
|
+
type: "boolean",
|
|
57
|
+
array: false,
|
|
58
|
+
coerce,
|
|
59
|
+
default: false,
|
|
60
|
+
describe: "show program version information"
|
|
61
|
+
})
|
|
62
|
+
.option("v", {
|
|
63
|
+
alias: "log-level",
|
|
64
|
+
type: "string",
|
|
65
|
+
array: false,
|
|
66
|
+
coerce,
|
|
67
|
+
nargs: 1,
|
|
68
|
+
default: "warning",
|
|
69
|
+
describe: "level for verbose logging ('none', 'error', 'warning', 'info', 'debug')"
|
|
70
|
+
})
|
|
71
|
+
.option("C", {
|
|
72
|
+
alias: "cache",
|
|
73
|
+
type: "string",
|
|
74
|
+
array: false,
|
|
75
|
+
coerce,
|
|
76
|
+
nargs: 1,
|
|
77
|
+
default: path.join(dataDir, "cache"),
|
|
78
|
+
describe: "directory for cached files (primarily AI model files)"
|
|
79
|
+
})
|
|
80
|
+
.option("e", {
|
|
81
|
+
alias: "expression",
|
|
82
|
+
type: "string",
|
|
83
|
+
array: false,
|
|
84
|
+
coerce,
|
|
85
|
+
nargs: 1,
|
|
86
|
+
default: "",
|
|
87
|
+
describe: "FlowLink expression string"
|
|
88
|
+
})
|
|
89
|
+
.option("f", {
|
|
90
|
+
alias: "file",
|
|
91
|
+
type: "string",
|
|
92
|
+
array: false,
|
|
93
|
+
coerce,
|
|
94
|
+
nargs: 1,
|
|
95
|
+
default: "",
|
|
96
|
+
describe: "FlowLink expression file"
|
|
97
|
+
})
|
|
98
|
+
.option("c", {
|
|
99
|
+
alias: "config",
|
|
100
|
+
type: "string",
|
|
101
|
+
array: false,
|
|
102
|
+
coerce,
|
|
103
|
+
nargs: 1,
|
|
104
|
+
default: "",
|
|
105
|
+
describe: "FlowLink expression reference into YAML file (in format <id>@<file>)"
|
|
106
|
+
})
|
|
107
|
+
.help("h", "show usage help")
|
|
108
|
+
.alias("h", "help")
|
|
109
|
+
.showHelpOnFail(true)
|
|
66
110
|
.version(false)
|
|
67
111
|
.strict()
|
|
68
|
-
.showHelpOnFail(true)
|
|
69
112
|
.demand(0)
|
|
70
|
-
.parse(process.argv
|
|
113
|
+
.parse(hideBin(process.argv))
|
|
71
114
|
|
|
72
115
|
/* short-circuit version request */
|
|
73
|
-
if (args.
|
|
116
|
+
if (args.V) {
|
|
74
117
|
process.stderr.write(`SpeechFlow ${pkg["x-stdver"]} (${pkg["x-release"]}) <${pkg.homepage}>\n`)
|
|
75
118
|
process.stderr.write(`${pkg.description}\n`)
|
|
76
119
|
process.stderr.write(`Copyright (c) 2024-2025 ${pkg.author.name} <${pkg.author.url}>\n`)
|
|
@@ -81,7 +124,7 @@ let cli: CLIio | null = null
|
|
|
81
124
|
/* establish CLI environment */
|
|
82
125
|
cli = new CLIio({
|
|
83
126
|
encoding: "utf8",
|
|
84
|
-
logLevel: args.
|
|
127
|
+
logLevel: args.v,
|
|
85
128
|
logTime: true,
|
|
86
129
|
logPrefix: pkg.name
|
|
87
130
|
})
|
|
@@ -112,28 +155,28 @@ let cli: CLIio | null = null
|
|
|
112
155
|
|
|
113
156
|
/* sanity check usage */
|
|
114
157
|
let n = 0
|
|
115
|
-
if (typeof args.
|
|
116
|
-
if (typeof args.
|
|
117
|
-
if (typeof args.
|
|
158
|
+
if (typeof args.e === "string" && args.e !== "") n++
|
|
159
|
+
if (typeof args.f === "string" && args.f !== "") n++
|
|
160
|
+
if (typeof args.c === "string" && args.c !== "") n++
|
|
118
161
|
if (n !== 1)
|
|
119
162
|
throw new Error("cannot use more than one FlowLink specification source (either option -e, -f or -c)")
|
|
120
163
|
|
|
121
164
|
/* read configuration */
|
|
122
165
|
let config = ""
|
|
123
|
-
if (typeof args.
|
|
124
|
-
config = args.
|
|
125
|
-
else if (typeof args.
|
|
126
|
-
config = await cli.input(args.
|
|
127
|
-
else if (typeof args.
|
|
128
|
-
const m = args.
|
|
166
|
+
if (typeof args.e === "string" && args.e !== "")
|
|
167
|
+
config = args.e
|
|
168
|
+
else if (typeof args.f === "string" && args.f !== "")
|
|
169
|
+
config = await cli.input(args.f, { encoding: "utf8" })
|
|
170
|
+
else if (typeof args.c === "string" && args.c !== "") {
|
|
171
|
+
const m = args.c.match(/^(.+?)@(.+)$/)
|
|
129
172
|
if (m === null)
|
|
130
|
-
throw new Error("invalid configuration file specification (expected \"<
|
|
131
|
-
const [ ,
|
|
173
|
+
throw new Error("invalid configuration file specification (expected \"<id>@<yaml-config-file>\")")
|
|
174
|
+
const [ , id, file ] = m
|
|
132
175
|
const yaml = await cli.input(file, { encoding: "utf8" })
|
|
133
176
|
const obj: any = jsYAML.load(yaml)
|
|
134
|
-
if (obj[
|
|
135
|
-
throw new Error(`no such
|
|
136
|
-
config = obj[
|
|
177
|
+
if (obj[id] === undefined)
|
|
178
|
+
throw new Error(`no such id "${id}" found in configuration file`)
|
|
179
|
+
config = obj[id] as string
|
|
137
180
|
}
|
|
138
181
|
|
|
139
182
|
/* track the available SpeechFlow nodes */
|
|
@@ -145,11 +188,14 @@ let cli: CLIio | null = null
|
|
|
145
188
|
"./speechflow-node-a2a-wav.js",
|
|
146
189
|
"./speechflow-node-a2t-deepgram.js",
|
|
147
190
|
"./speechflow-node-t2a-elevenlabs.js",
|
|
191
|
+
"./speechflow-node-t2a-kokoro.js",
|
|
148
192
|
"./speechflow-node-t2t-deepl.js",
|
|
149
|
-
"./speechflow-node-t2t-
|
|
150
|
-
"./speechflow-node-t2t-
|
|
193
|
+
"./speechflow-node-t2t-openai.js",
|
|
194
|
+
"./speechflow-node-t2t-ollama.js",
|
|
195
|
+
"./speechflow-node-t2t-transformers.js",
|
|
151
196
|
"./speechflow-node-t2t-opus.js",
|
|
152
197
|
"./speechflow-node-t2t-subtitle.js",
|
|
198
|
+
"./speechflow-node-t2t-format.js",
|
|
153
199
|
"./speechflow-node-x2x-trace.js",
|
|
154
200
|
"./speechflow-node-xio-device.js",
|
|
155
201
|
"./speechflow-node-xio-file.js",
|
|
@@ -200,7 +246,7 @@ let cli: CLIio | null = null
|
|
|
200
246
|
audioLittleEndian: true,
|
|
201
247
|
audioSampleRate: 48000,
|
|
202
248
|
textEncoding: "utf8",
|
|
203
|
-
cacheDir: args.
|
|
249
|
+
cacheDir: args.C
|
|
204
250
|
}
|
|
205
251
|
let ast: unknown
|
|
206
252
|
try {
|
|
@@ -254,9 +300,9 @@ let cli: CLIio | null = null
|
|
|
254
300
|
}
|
|
255
301
|
catch (err) {
|
|
256
302
|
if (err instanceof Error && err.name === "FlowLinkError")
|
|
257
|
-
cli!.log("error", `failed to materialize SpeechFlow configuration: ${err.toString()}
|
|
303
|
+
cli!.log("error", `failed to materialize SpeechFlow configuration: ${err.toString()}`)
|
|
258
304
|
else if (err instanceof Error)
|
|
259
|
-
cli!.log("error", `failed to materialize SpeechFlow configuration: ${err.message}
|
|
305
|
+
cli!.log("error", `failed to materialize SpeechFlow configuration: ${err.message}`)
|
|
260
306
|
else
|
|
261
307
|
cli!.log("error", "failed to materialize SpeechFlow configuration: internal error")
|
|
262
308
|
process.exit(1)
|
|
@@ -358,7 +404,7 @@ let cli: CLIio | null = null
|
|
|
358
404
|
}
|
|
359
405
|
|
|
360
406
|
/* start of internal stream processing */
|
|
361
|
-
cli!.log("info", "everything established -- stream processing in SpeechFlow graph starts")
|
|
407
|
+
cli!.log("info", "**** everything established -- stream processing in SpeechFlow graph starts ****")
|
|
362
408
|
|
|
363
409
|
/* gracefully shutdown process */
|
|
364
410
|
let shuttingDown = false
|
|
@@ -367,9 +413,9 @@ let cli: CLIio | null = null
|
|
|
367
413
|
return
|
|
368
414
|
shuttingDown = true
|
|
369
415
|
if (signal === "finished")
|
|
370
|
-
cli!.log("info", "streams of all nodes finished -- shutting down service")
|
|
416
|
+
cli!.log("info", "**** streams of all nodes finished -- shutting down service ****")
|
|
371
417
|
else
|
|
372
|
-
cli!.log("warning",
|
|
418
|
+
cli!.log("warning", `**** received signal ${signal} -- shutting down service ****`)
|
|
373
419
|
|
|
374
420
|
/* graph processing: PASS 1: disconnect node streams */
|
|
375
421
|
for (const node of graphNodes) {
|
|
@@ -1,111 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
** SpeechFlow - Speech Processing Flow Graph
|
|
3
|
-
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
4
|
-
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
/* standard dependencies */
|
|
8
|
-
import path from "node:path"
|
|
9
|
-
import Stream from "node:stream"
|
|
10
|
-
|
|
11
|
-
/* external dependencies */
|
|
12
|
-
import * as Transformers from "@huggingface/transformers"
|
|
13
|
-
|
|
14
|
-
/* internal dependencies */
|
|
15
|
-
import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
|
|
16
|
-
|
|
17
|
-
/* SpeechFlow node for OPUS text-to-text translation */
|
|
18
|
-
export default class SpeechFlowNodeOPUS extends SpeechFlowNode {
|
|
19
|
-
/* declare official node name */
|
|
20
|
-
public static name = "opus"
|
|
21
|
-
|
|
22
|
-
/* internal state */
|
|
23
|
-
private translator: Transformers.TranslationPipeline | null = null
|
|
24
|
-
|
|
25
|
-
/* construct node */
|
|
26
|
-
constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
|
|
27
|
-
super(id, cfg, opts, args)
|
|
28
|
-
|
|
29
|
-
/* declare node configuration parameters */
|
|
30
|
-
this.configure({
|
|
31
|
-
src: { type: "string", pos: 0, val: "de", match: /^(?:de|en)$/ },
|
|
32
|
-
dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ }
|
|
33
|
-
})
|
|
34
|
-
|
|
35
|
-
/* sanity check situation */
|
|
36
|
-
if (this.params.src === this.params.dst)
|
|
37
|
-
throw new Error("source and destination languages cannot be the same")
|
|
38
|
-
|
|
39
|
-
/* declare node input/output format */
|
|
40
|
-
this.input = "text"
|
|
41
|
-
this.output = "text"
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
/* open node */
|
|
45
|
-
async open () {
|
|
46
|
-
/* instantiate OPUS */
|
|
47
|
-
const model = `onnx-community/opus-mt-${this.params.src}-${this.params.dst}`
|
|
48
|
-
this.translator = await Transformers.pipeline("translation", model, {
|
|
49
|
-
cache_dir: path.join(this.config.cacheDir, "opus"),
|
|
50
|
-
dtype: "q4",
|
|
51
|
-
device: "gpu"
|
|
52
|
-
})
|
|
53
|
-
if (this.translator === null)
|
|
54
|
-
throw new Error("failed to instantiate translator pipeline")
|
|
55
|
-
|
|
56
|
-
/* provide text-to-text translation */
|
|
57
|
-
const translate = async (text: string) => {
|
|
58
|
-
const result = await this.translator!(text)
|
|
59
|
-
return Array.isArray(result) ?
|
|
60
|
-
(result[0] as Transformers.TranslationSingle).translation_text :
|
|
61
|
-
(result as Transformers.TranslationSingle).translation_text
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
/* establish a duplex stream and connect it to Ollama */
|
|
65
|
-
this.stream = new Stream.Transform({
|
|
66
|
-
readableObjectMode: true,
|
|
67
|
-
writableObjectMode: true,
|
|
68
|
-
decodeStrings: false,
|
|
69
|
-
transform (chunk: SpeechFlowChunk, encoding, callback) {
|
|
70
|
-
if (Buffer.isBuffer(chunk.payload))
|
|
71
|
-
callback(new Error("invalid chunk payload type"))
|
|
72
|
-
else {
|
|
73
|
-
if (chunk.payload === "") {
|
|
74
|
-
this.push(chunk)
|
|
75
|
-
callback()
|
|
76
|
-
}
|
|
77
|
-
else {
|
|
78
|
-
translate(chunk.payload).then((payload) => {
|
|
79
|
-
const chunkNew = chunk.clone()
|
|
80
|
-
chunkNew.payload = payload
|
|
81
|
-
this.push(chunkNew)
|
|
82
|
-
callback()
|
|
83
|
-
}).catch((err) => {
|
|
84
|
-
callback(err)
|
|
85
|
-
})
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
},
|
|
89
|
-
final (callback) {
|
|
90
|
-
this.push(null)
|
|
91
|
-
callback()
|
|
92
|
-
}
|
|
93
|
-
})
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
/* close node */
|
|
97
|
-
async close () {
|
|
98
|
-
/* close stream */
|
|
99
|
-
if (this.stream !== null) {
|
|
100
|
-
this.stream.destroy()
|
|
101
|
-
this.stream = null
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
/* shutdown OPUS */
|
|
105
|
-
if (this.translator !== null) {
|
|
106
|
-
this.translator.dispose()
|
|
107
|
-
this.translator = null
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
|