speechflow 0.9.7 → 0.9.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dst/speechflow.js CHANGED
@@ -17,6 +17,7 @@ const node_events_1 = require("node:events");
17
17
  const luxon_1 = require("luxon");
18
18
  const cli_io_1 = __importDefault(require("cli-io"));
19
19
  const yargs_1 = __importDefault(require("yargs"));
20
+ const helpers_1 = require("yargs/helpers");
20
21
  const js_yaml_1 = __importDefault(require("js-yaml"));
21
22
  const flowlink_1 = __importDefault(require("flowlink"));
22
23
  const object_path_1 = __importDefault(require("object-path"));
@@ -33,6 +34,7 @@ let cli = null;
33
34
  dataDirAutoCreate: true
34
35
  });
35
36
  /* parse command-line arguments */
37
+ const coerce = (arg) => Array.isArray(arg) ? arg[arg.length - 1] : arg;
36
38
  const args = await (0, yargs_1.default)()
37
39
  /* eslint @stylistic/indent: off */
38
40
  .usage("Usage: $0 " +
@@ -44,27 +46,68 @@ let cli = null;
44
46
  "[-f|--file <file>] " +
45
47
  "[-c|--config <id>@<yaml-config-file>] " +
46
48
  "[<argument> [...]]")
47
- .help("h").alias("h", "help").default("h", false)
48
- .describe("h", "show usage help")
49
- .boolean("V").alias("V", "version").default("V", false)
50
- .describe("V", "show program version information")
51
- .string("v").nargs("v", 1).alias("v", "log-level").default("v", "warning")
52
- .describe("v", "level for verbose logging ('none', 'error', 'warning', 'info', 'debug')")
53
- .string("C").nargs("C", 1).alias("C", "cache").default("C", node_path_1.default.join(dataDir, "cache"))
54
- .describe("C", "directory for cached files (primarily AI model files)")
55
- .string("e").nargs("e", 1).alias("e", "expression").default("e", "")
56
- .describe("e", "FlowLink expression string")
57
- .string("f").nargs("f", 1).alias("f", "file").default("f", "")
58
- .describe("f", "FlowLink expression file")
59
- .string("c").nargs("c", 1).alias("c", "config-file").default("c", "")
60
- .describe("c", "FlowLink expression reference into YAML file (in format <id>@<file>)")
49
+ .option("V", {
50
+ alias: "version",
51
+ type: "boolean",
52
+ array: false,
53
+ coerce,
54
+ default: false,
55
+ describe: "show program version information"
56
+ })
57
+ .option("v", {
58
+ alias: "log-level",
59
+ type: "string",
60
+ array: false,
61
+ coerce,
62
+ nargs: 1,
63
+ default: "warning",
64
+ describe: "level for verbose logging ('none', 'error', 'warning', 'info', 'debug')"
65
+ })
66
+ .option("C", {
67
+ alias: "cache",
68
+ type: "string",
69
+ array: false,
70
+ coerce,
71
+ nargs: 1,
72
+ default: node_path_1.default.join(dataDir, "cache"),
73
+ describe: "directory for cached files (primarily AI model files)"
74
+ })
75
+ .option("e", {
76
+ alias: "expression",
77
+ type: "string",
78
+ array: false,
79
+ coerce,
80
+ nargs: 1,
81
+ default: "",
82
+ describe: "FlowLink expression string"
83
+ })
84
+ .option("f", {
85
+ alias: "file",
86
+ type: "string",
87
+ array: false,
88
+ coerce,
89
+ nargs: 1,
90
+ default: "",
91
+ describe: "FlowLink expression file"
92
+ })
93
+ .option("c", {
94
+ alias: "config",
95
+ type: "string",
96
+ array: false,
97
+ coerce,
98
+ nargs: 1,
99
+ default: "",
100
+ describe: "FlowLink expression reference into YAML file (in format <id>@<file>)"
101
+ })
102
+ .help("h", "show usage help")
103
+ .alias("h", "help")
104
+ .showHelpOnFail(true)
61
105
  .version(false)
62
106
  .strict()
63
- .showHelpOnFail(true)
64
107
  .demand(0)
65
- .parse(process.argv.slice(2));
108
+ .parse((0, helpers_1.hideBin)(process.argv));
66
109
  /* short-circuit version request */
67
- if (args.version) {
110
+ if (args.V) {
68
111
  process.stderr.write(`SpeechFlow ${package_json_1.default["x-stdver"]} (${package_json_1.default["x-release"]}) <${package_json_1.default.homepage}>\n`);
69
112
  process.stderr.write(`${package_json_1.default.description}\n`);
70
113
  process.stderr.write(`Copyright (c) 2024-2025 ${package_json_1.default.author.name} <${package_json_1.default.author.url}>\n`);
@@ -74,7 +117,7 @@ let cli = null;
74
117
  /* establish CLI environment */
75
118
  cli = new cli_io_1.default({
76
119
  encoding: "utf8",
77
- logLevel: args.logLevel,
120
+ logLevel: args.v,
78
121
  logTime: true,
79
122
  logPrefix: package_json_1.default.name
80
123
  });
@@ -100,30 +143,30 @@ let cli = null;
100
143
  });
101
144
  /* sanity check usage */
102
145
  let n = 0;
103
- if (typeof args.expression === "string" && args.expression !== "")
146
+ if (typeof args.e === "string" && args.e !== "")
104
147
  n++;
105
- if (typeof args.expressionFile === "string" && args.expressionFile !== "")
148
+ if (typeof args.f === "string" && args.f !== "")
106
149
  n++;
107
- if (typeof args.configFile === "string" && args.configFile !== "")
150
+ if (typeof args.c === "string" && args.c !== "")
108
151
  n++;
109
152
  if (n !== 1)
110
153
  throw new Error("cannot use more than one FlowLink specification source (either option -e, -f or -c)");
111
154
  /* read configuration */
112
155
  let config = "";
113
- if (typeof args.expression === "string" && args.expression !== "")
114
- config = args.expression;
115
- else if (typeof args.expressionFile === "string" && args.expressionFile !== "")
116
- config = await cli.input(args.expressionFile, { encoding: "utf8" });
117
- else if (typeof args.configFile === "string" && args.configFile !== "") {
118
- const m = args.configFile.match(/^(.+?)@(.+)$/);
156
+ if (typeof args.e === "string" && args.e !== "")
157
+ config = args.e;
158
+ else if (typeof args.f === "string" && args.f !== "")
159
+ config = await cli.input(args.f, { encoding: "utf8" });
160
+ else if (typeof args.c === "string" && args.c !== "") {
161
+ const m = args.c.match(/^(.+?)@(.+)$/);
119
162
  if (m === null)
120
- throw new Error("invalid configuration file specification (expected \"<key>@<yaml-config-file>\")");
121
- const [, key, file] = m;
163
+ throw new Error("invalid configuration file specification (expected \"<id>@<yaml-config-file>\")");
164
+ const [, id, file] = m;
122
165
  const yaml = await cli.input(file, { encoding: "utf8" });
123
166
  const obj = js_yaml_1.default.load(yaml);
124
- if (obj[key] === undefined)
125
- throw new Error(`no such key "${key}" found in configuration file`);
126
- config = obj[key];
167
+ if (obj[id] === undefined)
168
+ throw new Error(`no such id "${id}" found in configuration file`);
169
+ config = obj[id];
127
170
  }
128
171
  /* track the available SpeechFlow nodes */
129
172
  const nodes = {};
@@ -133,11 +176,14 @@ let cli = null;
133
176
  "./speechflow-node-a2a-wav.js",
134
177
  "./speechflow-node-a2t-deepgram.js",
135
178
  "./speechflow-node-t2a-elevenlabs.js",
179
+ "./speechflow-node-t2a-kokoro.js",
136
180
  "./speechflow-node-t2t-deepl.js",
137
- "./speechflow-node-t2t-format.js",
138
- "./speechflow-node-t2t-gemma.js",
181
+ "./speechflow-node-t2t-openai.js",
182
+ "./speechflow-node-t2t-ollama.js",
183
+ "./speechflow-node-t2t-transformers.js",
139
184
  "./speechflow-node-t2t-opus.js",
140
185
  "./speechflow-node-t2t-subtitle.js",
186
+ "./speechflow-node-t2t-format.js",
141
187
  "./speechflow-node-x2x-trace.js",
142
188
  "./speechflow-node-xio-device.js",
143
189
  "./speechflow-node-xio-file.js",
@@ -186,7 +232,7 @@ let cli = null;
186
232
  audioLittleEndian: true,
187
233
  audioSampleRate: 48000,
188
234
  textEncoding: "utf8",
189
- cacheDir: args.cache
235
+ cacheDir: args.C
190
236
  };
191
237
  let ast;
192
238
  try {
@@ -240,9 +286,9 @@ let cli = null;
240
286
  }
241
287
  catch (err) {
242
288
  if (err instanceof Error && err.name === "FlowLinkError")
243
- cli.log("error", `failed to materialize SpeechFlow configuration: ${err.toString()}"`);
289
+ cli.log("error", `failed to materialize SpeechFlow configuration: ${err.toString()}`);
244
290
  else if (err instanceof Error)
245
- cli.log("error", `failed to materialize SpeechFlow configuration: ${err.message}"`);
291
+ cli.log("error", `failed to materialize SpeechFlow configuration: ${err.message}`);
246
292
  else
247
293
  cli.log("error", "failed to materialize SpeechFlow configuration: internal error");
248
294
  process.exit(1);
@@ -332,7 +378,7 @@ let cli = null;
332
378
  });
333
379
  }
334
380
  /* start of internal stream processing */
335
- cli.log("info", "everything established -- stream processing in SpeechFlow graph starts");
381
+ cli.log("info", "**** everything established -- stream processing in SpeechFlow graph starts ****");
336
382
  /* gracefully shutdown process */
337
383
  let shuttingDown = false;
338
384
  const shutdown = async (signal) => {
@@ -340,9 +386,9 @@ let cli = null;
340
386
  return;
341
387
  shuttingDown = true;
342
388
  if (signal === "finished")
343
- cli.log("info", "streams of all nodes finished -- shutting down service");
389
+ cli.log("info", "**** streams of all nodes finished -- shutting down service ****");
344
390
  else
345
- cli.log("warning", `received signal ${signal} -- shutting down service`);
391
+ cli.log("warning", `**** received signal ${signal} -- shutting down service ****`);
346
392
  /* graph processing: PASS 1: disconnect node streams */
347
393
  for (const node of graphNodes) {
348
394
  if (node.stream === null) {
@@ -0,0 +1,6 @@
1
+ @echo off
2
+ title Speechflow
3
+ "c:\Program Files\nodejs\node.exe" ^
4
+ .\dst\speechflow.js ^
5
+ -v info ^
6
+ -c studio@.\etc\speechflow.yaml
@@ -0,0 +1,5 @@
1
+ #!/bin/sh
2
+ node \
3
+ ./dst/speechflow.js \
4
+ -v info \
5
+ -c studio@./etc/speechflow.yaml
@@ -17,8 +17,8 @@ pass-through: |
17
17
  device(device: "wasapi:VoiceMeeter VAIO3 Input", mode: "w")
18
18
  }
19
19
 
20
- # Generate text file with German narration of MP3 audio file
21
- narration: |
20
+ # Generate text file with German transcription of MP3 audio file
21
+ transcription: |
22
22
  file(path: argv.0, mode: "r", type: "audio") |
23
23
  ffmpeg(src: "mp3", dst: "pcm") |
24
24
  deepgram(language: "de", key: env.SPEECHFLOW_KEY_DEEPGRAM) |
@@ -39,6 +39,13 @@ translation: |
39
39
  deepl(src: "de", dst: "en") |
40
40
  file(path: "-", mode: "w", type: "text")
41
41
 
42
+ # Generate audio file with English voice for a text file
43
+ speaking: |
44
+ file(path: argv.0, mode: "r", type: "text") |
45
+ kokoro(language: "en") |
46
+ wav(mode: "encode") |
47
+ file(path: argv.1, mode: "w", type: "audio")
48
+
42
49
  # Real-time studio translation from German to English,
43
50
  # including the capturing of all involved inputs and outputs:
44
51
  studio: |
package/etc/stx.conf CHANGED
@@ -48,7 +48,7 @@ server-delay
48
48
 
49
49
  # run program
50
50
  server
51
- node dst/speechflow.js -v info -c studio@sample.yaml
51
+ node dst/speechflow.js -v info -c studio@etc/speechflow.yaml "$@"
52
52
 
53
53
  # run program with file watching
54
54
  server-watch
package/package.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "speechflow",
3
- "version": "0.9.7",
4
- "x-stdver": "0.9.7-EA",
5
- "x-release": "2025-07-12",
3
+ "version": "0.9.9",
4
+ "x-stdver": "0.9.9-EA",
5
+ "x-release": "2025-07-13",
6
6
  "homepage": "https://github.com/rse/speechflow",
7
7
  "description": "Speech Processing Flow Graph",
8
8
  "license": "GPL-3.0-only",
@@ -35,6 +35,7 @@
35
35
  "utf-8-validate": "6.0.5",
36
36
  "@opensumi/reconnecting-websocket": "4.4.0",
37
37
  "ollama": "0.5.16",
38
+ "openai": "5.9.0",
38
39
  "@rse/ffmpeg": "1.4.2",
39
40
  "ffmpeg-stream": "1.0.1",
40
41
  "installed-packages": "1.0.13",
@@ -45,10 +46,10 @@
45
46
  "pure-uuid": "1.8.1",
46
47
  "wavefile": "11.0.0",
47
48
  "@huggingface/transformers": "3.6.3",
49
+ "kokoro-js": "1.2.1",
48
50
  "@ericedouard/vad-node-realtime": "0.2.0",
49
51
  "luxon": "3.7.1",
50
- "wrap-text": "1.0.10",
51
- "smart-whisper": "0.8.1"
52
+ "wrap-text": "1.0.10"
52
53
  },
53
54
  "devDependencies": {
54
55
  "eslint": "9.31.0",
@@ -84,7 +85,7 @@
84
85
  "cross-env": "7.0.3"
85
86
  },
86
87
  "overrides": {
87
- "onnxruntime-node": "1.22.0-dev.20250418-c19a49615b"
88
+ "@huggingface/transformers": { "onnxruntime-node": "1.23.0-dev.20250703-7fc6235861" }
88
89
  },
89
90
  "upd": [ "!@biomejs/biome" ],
90
91
  "engines": {
@@ -0,0 +1,160 @@
1
+ /*
2
+ ** SpeechFlow - Speech Processing Flow Graph
3
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
4
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
5
+ */
6
+
7
+ /* standard dependencies */
8
+ import Stream from "node:stream"
9
+
10
+ /* external dependencies */
11
+ import { KokoroTTS } from "kokoro-js"
12
+ import SpeexResampler from "speex-resampler"
13
+
14
+ /* internal dependencies */
15
+ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
16
+
17
+ /* SpeechFlow node for Kokoro text-to-speech conversion */
18
+ export default class SpeechFlowNodeKokoro extends SpeechFlowNode {
19
+ /* declare official node name */
20
+ public static name = "kokoro"
21
+
22
+ /* internal state */
23
+ private kokoro: KokoroTTS | null = null
24
+ private static speexInitialized = false
25
+
26
+ /* construct node */
27
+ constructor (id: string, cfg: { [ id: string ]: any }, opts: { [ id: string ]: any }, args: any[]) {
28
+ super(id, cfg, opts, args)
29
+
30
+ /* declare node configuration parameters */
31
+ this.configure({
32
+ voice: { type: "string", val: "Aoede", pos: 0, match: /^(?:Aoede|Heart|Puck|Fenrir)$/ },
33
+ language: { type: "string", val: "en", pos: 1, match: /^(?:en)$/ },
34
+ speed: { type: "number", val: 1.25, pos: 2, match: (n: number) => n >= 1.0 && n <= 1.30 },
35
+ })
36
+
37
+ /* declare node input/output format */
38
+ this.input = "text"
39
+ this.output = "audio"
40
+ }
41
+
42
+ /* open node */
43
+ async open () {
44
+ /* establish Kokoro */
45
+ const model = "onnx-community/Kokoro-82M-v1.0-ONNX"
46
+ const progressState = new Map<string, number>()
47
+ const progressCallback = (progress: any) => {
48
+ let artifact = model
49
+ if (typeof progress.file === "string")
50
+ artifact += `:${progress.file}`
51
+ let percent = 0
52
+ if (typeof progress.loaded === "number" && typeof progress.total === "number")
53
+ percent = (progress.loaded as number / progress.total as number) * 100
54
+ else if (typeof progress.progress === "number")
55
+ percent = progress.progress
56
+ if (percent > 0)
57
+ progressState.set(artifact, percent)
58
+ }
59
+ const interval = setInterval(() => {
60
+ for (const [ artifact, percent ] of progressState) {
61
+ this.log("info", `downloaded ${percent.toFixed(2)}% of artifact "${artifact}"`)
62
+ if (percent >= 1.0)
63
+ progressState.delete(artifact)
64
+ }
65
+ }, 1000)
66
+ this.kokoro = await KokoroTTS.from_pretrained(model, {
67
+ dtype: "q4f16",
68
+ progress_callback: progressCallback
69
+ })
70
+ clearInterval(interval)
71
+ if (this.kokoro === null)
72
+ throw new Error("failed to instantiate Kokoro")
73
+
74
+ /* establish resampler from Kokoro's maximum 24Khz
75
+ output to our standard audio sample rate (48KHz) */
76
+ if (!SpeechFlowNodeKokoro.speexInitialized) {
77
+ /* at least once initialize resampler */
78
+ await SpeexResampler.initPromise
79
+ SpeechFlowNodeKokoro.speexInitialized = true
80
+ }
81
+ const resampler = new SpeexResampler(1, 24000, this.config.audioSampleRate, 7)
82
+
83
+ /* determine voice for text-to-speech operation */
84
+ const voices = {
85
+ "Aoede": "af_aoede",
86
+ "Heart": "af_heart",
87
+ "Puck": "am_puck",
88
+ "Fenrir": "am_fenrir"
89
+ }
90
+ const voice = ((voices as any)[this.params.voice]) as string | undefined
91
+ if (voice === undefined)
92
+ throw new Error(`invalid Kokoro voice "${this.params.voice}"`)
93
+
94
+ /* perform text-to-speech operation with Elevenlabs API */
95
+ const text2speech = async (text: string) => {
96
+ this.log("info", `Kokoro: input: "${text}"`)
97
+ const audio = await this.kokoro!.generate(text, {
98
+ speed: this.params.speed,
99
+ voice: voice as any
100
+ })
101
+ if (audio.sampling_rate !== 24000)
102
+ throw new Error("expected 24KHz sampling rate in Kokoro output")
103
+
104
+ /* convert audio samples from PCM/F32/24Khz to PCM/I16/24KHz */
105
+ const samples = audio.audio
106
+ const buffer1 = Buffer.alloc(samples.length * 2)
107
+ for (let i = 0; i < samples.length; i++) {
108
+ const sample = Math.max(-1, Math.min(1, samples[i]))
109
+ buffer1.writeInt16LE(sample * 0x7FFF, i * 2)
110
+ }
111
+
112
+ /* resample audio samples from PCM/I16/24Khz to PCM/I16/48KHz */
113
+ const buffer2 = resampler.processChunk(buffer1)
114
+
115
+ return buffer2
116
+ }
117
+
118
+ /* create transform stream and connect it to the Kokoro API */
119
+ const log = (level: string, msg: string) => { this.log(level, msg) }
120
+ this.stream = new Stream.Transform({
121
+ writableObjectMode: true,
122
+ readableObjectMode: true,
123
+ decodeStrings: false,
124
+ transform (chunk: SpeechFlowChunk, encoding, callback) {
125
+ if (Buffer.isBuffer(chunk.payload))
126
+ callback(new Error("invalid chunk payload type"))
127
+ else {
128
+ text2speech(chunk.payload).then((buffer) => {
129
+ log("info", `Kokoro: received audio (buffer length: ${buffer.byteLength})`)
130
+ chunk = chunk.clone()
131
+ chunk.type = "audio"
132
+ chunk.payload = buffer
133
+ this.push(chunk)
134
+ callback()
135
+ }).catch((err) => {
136
+ callback(err)
137
+ })
138
+ }
139
+ },
140
+ final (callback) {
141
+ this.push(null)
142
+ callback()
143
+ }
144
+ })
145
+ }
146
+
147
+ /* close node */
148
+ async close () {
149
+ /* destroy stream */
150
+ if (this.stream !== null) {
151
+ this.stream.destroy()
152
+ this.stream = null
153
+ }
154
+
155
+ /* destroy Kokoro API */
156
+ if (this.kokoro !== null)
157
+ this.kokoro = null
158
+ }
159
+ }
160
+
@@ -17,10 +17,10 @@ import SpeechFlowNode, { SpeechFlowChunk } from "./speechflow-node"
17
17
  type ConfigEntry = { systemPrompt: string, chat: Array<{ role: string, content: string }> }
18
18
  type Config = { [ key: string ]: ConfigEntry }
19
19
 
20
- /* SpeechFlow node for Gemma/Ollama text-to-text translation */
21
- export default class SpeechFlowNodeGemma extends SpeechFlowNode {
20
+ /* SpeechFlow node for Ollama text-to-text translation */
21
+ export default class SpeechFlowNodeOllama extends SpeechFlowNode {
22
22
  /* declare official node name */
23
- public static name = "gemma"
23
+ public static name = "ollama"
24
24
 
25
25
  /* internal state */
26
26
  private ollama: Ollama | null = null
@@ -103,7 +103,8 @@ export default class SpeechFlowNodeGemma extends SpeechFlowNode {
103
103
  "Do not show any prolog.\n" +
104
104
  "Do not show any epilog.\n" +
105
105
  "Get to the point.\n" +
106
- "Directly translate text from Enlish (EN) to German (DE) language.\n",
106
+ "Preserve the original meaning, tone, and nuance.\n" +
107
+ "Directly translate text from English (EN) to fluent and natural German (DE) language.\n",
107
108
  chat: [
108
109
  { role: "user", content: "I love my wife." },
109
110
  { role: "system", content: "Ich liebe meine Frau." },
@@ -121,13 +122,14 @@ export default class SpeechFlowNodeGemma extends SpeechFlowNode {
121
122
  "Output only the requested text.\n" +
122
123
  "Do not use markdown.\n" +
123
124
  "Do not chat.\n" +
124
- "Do not show any explanations. \n" +
125
+ "Do not show any explanations.\n" +
125
126
  "Do not show any introduction.\n" +
126
127
  "Do not show any preamble. \n" +
127
128
  "Do not show any prolog. \n" +
128
129
  "Do not show any epilog. \n" +
129
130
  "Get to the point.\n" +
130
- "Directly translate text from German (DE) to English (EN) language.\n",
131
+ "Preserve the original meaning, tone, and nuance.\n" +
132
+ "Directly translate text from German (DE) to fluent and natural English (EN) language.\n",
131
133
  chat: [
132
134
  { role: "user", content: "Ich liebe meine Frau." },
133
135
  { role: "system", content: "I love my wife." },
@@ -145,11 +147,19 @@ export default class SpeechFlowNodeGemma extends SpeechFlowNode {
145
147
 
146
148
  /* declare node configuration parameters */
147
149
  this.configure({
148
- api: { type: "string", val: "http://127.0.0.1:11434", match: /^https?:\/\/.+?:\d+$/ },
149
- src: { type: "string", pos: 0, val: "de", match: /^(?:de|en)$/ },
150
- dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ }
150
+ api: { type: "string", val: "http://127.0.0.1:11434", match: /^https?:\/\/.+?:\d+$/ },
151
+ model: { type: "string", val: "gemma3:4b-it-q4_K_M", match: /^.+$/ },
152
+ src: { type: "string", pos: 0, val: "de", match: /^(?:de|en)$/ },
153
+ dst: { type: "string", pos: 1, val: "en", match: /^(?:de|en)$/ }
151
154
  })
152
155
 
156
+ /* tell effective mode */
157
+ if (this.params.src === this.params.dst)
158
+ this.log("info", `Ollama: operation mode: spellchecking for language "${this.params.src}"`)
159
+ else
160
+ this.log("info", `Ollama: operation mode: translation from language "${this.params.src}"` +
161
+ ` to language "${this.params.dst}"`)
162
+
153
163
  /* declare node input/output format */
154
164
  this.input = "text"
155
165
  this.output = "text"
@@ -160,12 +170,36 @@ export default class SpeechFlowNodeGemma extends SpeechFlowNode {
160
170
  /* instantiate Ollama API */
161
171
  this.ollama = new Ollama({ host: this.params.api })
162
172
 
173
+ /* ensure the model is available */
174
+ const model = this.params.model
175
+ const models = await this.ollama.list()
176
+ const exists = models.models.some((m) => m.name === model)
177
+ if (!exists) {
178
+ this.log("info", `Ollama: model "${model}" still not present in Ollama -- ` +
179
+ "automatically downloading model")
180
+ let artifact = ""
181
+ let percent = 0
182
+ const interval = setInterval(() => {
183
+ this.log("info", `downloaded ${percent.toFixed(2)}% of artifact "${artifact}"`)
184
+ }, 1000)
185
+ const progress = await this.ollama.pull({ model, stream: true })
186
+ for await (const event of progress) {
187
+ if (event.digest)
188
+ artifact = event.digest
189
+ if (event.completed && event.total)
190
+ percent = (event.completed / event.total) * 100
191
+ }
192
+ clearInterval(interval)
193
+ }
194
+ else
195
+ this.log("info", `Ollama: model "${model}" already present in Ollama`)
196
+
163
197
  /* provide text-to-text translation */
164
198
  const translate = async (text: string) => {
165
199
  const key = `${this.params.src}-${this.params.dst}`
166
200
  const cfg = this.setup[key]
167
201
  const response = await this.ollama!.chat({
168
- model: "gemma3:4b-it-q4_K_M",
202
+ model,
169
203
  messages: [
170
204
  { role: "system", content: cfg.systemPrompt },
171
205
  ...cfg.chat,