speechflow 0.9.4 → 0.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/CHANGELOG.md +19 -0
  2. package/README.md +227 -54
  3. package/dst/speechflow-node-a2a-ffmpeg.d.ts +13 -0
  4. package/dst/speechflow-node-a2a-ffmpeg.js +152 -0
  5. package/dst/speechflow-node-a2a-wav.d.ts +11 -0
  6. package/dst/speechflow-node-a2a-wav.js +170 -0
  7. package/dst/speechflow-node-a2t-deepgram.d.ts +12 -0
  8. package/dst/speechflow-node-a2t-deepgram.js +220 -0
  9. package/dst/speechflow-node-deepgram.d.ts +3 -1
  10. package/dst/speechflow-node-deepgram.js +86 -22
  11. package/dst/speechflow-node-deepl.d.ts +3 -1
  12. package/dst/speechflow-node-deepl.js +25 -20
  13. package/dst/speechflow-node-device.d.ts +3 -1
  14. package/dst/speechflow-node-device.js +53 -2
  15. package/dst/speechflow-node-elevenlabs.d.ts +4 -1
  16. package/dst/speechflow-node-elevenlabs.js +88 -49
  17. package/dst/speechflow-node-ffmpeg.d.ts +3 -1
  18. package/dst/speechflow-node-ffmpeg.js +42 -4
  19. package/dst/speechflow-node-file.d.ts +3 -1
  20. package/dst/speechflow-node-file.js +84 -13
  21. package/dst/speechflow-node-format.d.ts +11 -0
  22. package/dst/speechflow-node-format.js +80 -0
  23. package/dst/speechflow-node-gemma.d.ts +3 -1
  24. package/dst/speechflow-node-gemma.js +84 -23
  25. package/dst/speechflow-node-mqtt.d.ts +13 -0
  26. package/dst/speechflow-node-mqtt.js +181 -0
  27. package/dst/speechflow-node-opus.d.ts +12 -0
  28. package/dst/speechflow-node-opus.js +135 -0
  29. package/dst/speechflow-node-subtitle.d.ts +12 -0
  30. package/dst/speechflow-node-subtitle.js +96 -0
  31. package/dst/speechflow-node-t2a-elevenlabs.d.ts +13 -0
  32. package/dst/speechflow-node-t2a-elevenlabs.js +182 -0
  33. package/dst/speechflow-node-t2t-deepl.d.ts +12 -0
  34. package/dst/speechflow-node-t2t-deepl.js +133 -0
  35. package/dst/speechflow-node-t2t-format.d.ts +11 -0
  36. package/dst/speechflow-node-t2t-format.js +80 -0
  37. package/dst/speechflow-node-t2t-gemma.d.ts +13 -0
  38. package/dst/speechflow-node-t2t-gemma.js +213 -0
  39. package/dst/speechflow-node-t2t-opus.d.ts +12 -0
  40. package/dst/speechflow-node-t2t-opus.js +135 -0
  41. package/dst/speechflow-node-t2t-subtitle.d.ts +12 -0
  42. package/dst/speechflow-node-t2t-subtitle.js +96 -0
  43. package/dst/speechflow-node-trace.d.ts +11 -0
  44. package/dst/speechflow-node-trace.js +88 -0
  45. package/dst/speechflow-node-wav.d.ts +11 -0
  46. package/dst/speechflow-node-wav.js +170 -0
  47. package/dst/speechflow-node-websocket.d.ts +3 -1
  48. package/dst/speechflow-node-websocket.js +149 -49
  49. package/dst/speechflow-node-whisper-common.d.ts +34 -0
  50. package/dst/speechflow-node-whisper-common.js +7 -0
  51. package/dst/speechflow-node-whisper-ggml.d.ts +1 -0
  52. package/dst/speechflow-node-whisper-ggml.js +97 -0
  53. package/dst/speechflow-node-whisper-onnx.d.ts +1 -0
  54. package/dst/speechflow-node-whisper-onnx.js +131 -0
  55. package/dst/speechflow-node-whisper-worker-ggml.d.ts +1 -0
  56. package/dst/speechflow-node-whisper-worker-ggml.js +97 -0
  57. package/dst/speechflow-node-whisper-worker-onnx.d.ts +1 -0
  58. package/dst/speechflow-node-whisper-worker-onnx.js +131 -0
  59. package/dst/speechflow-node-whisper-worker.d.ts +1 -0
  60. package/dst/speechflow-node-whisper-worker.js +116 -0
  61. package/dst/speechflow-node-whisper-worker2.d.ts +1 -0
  62. package/dst/speechflow-node-whisper-worker2.js +82 -0
  63. package/dst/speechflow-node-whisper.d.ts +19 -0
  64. package/dst/speechflow-node-whisper.js +604 -0
  65. package/dst/speechflow-node-x2x-trace.d.ts +11 -0
  66. package/dst/speechflow-node-x2x-trace.js +88 -0
  67. package/dst/speechflow-node-xio-device.d.ts +13 -0
  68. package/dst/speechflow-node-xio-device.js +205 -0
  69. package/dst/speechflow-node-xio-file.d.ts +11 -0
  70. package/dst/speechflow-node-xio-file.js +176 -0
  71. package/dst/speechflow-node-xio-mqtt.d.ts +13 -0
  72. package/dst/speechflow-node-xio-mqtt.js +181 -0
  73. package/dst/speechflow-node-xio-websocket.d.ts +13 -0
  74. package/dst/speechflow-node-xio-websocket.js +275 -0
  75. package/dst/speechflow-node.d.ts +25 -7
  76. package/dst/speechflow-node.js +74 -9
  77. package/dst/speechflow-utils.d.ts +23 -0
  78. package/dst/speechflow-utils.js +194 -0
  79. package/dst/speechflow.js +146 -43
  80. package/etc/biome.jsonc +12 -4
  81. package/etc/stx.conf +65 -0
  82. package/package.d/@ericedouard+vad-node-realtime+0.2.0.patch +18 -0
  83. package/package.json +49 -31
  84. package/sample.yaml +61 -23
  85. package/src/lib.d.ts +6 -1
  86. package/src/{speechflow-node-ffmpeg.ts → speechflow-node-a2a-ffmpeg.ts} +10 -4
  87. package/src/speechflow-node-a2a-wav.ts +143 -0
  88. package/src/speechflow-node-a2t-deepgram.ts +199 -0
  89. package/src/speechflow-node-t2a-elevenlabs.ts +160 -0
  90. package/src/{speechflow-node-deepl.ts → speechflow-node-t2t-deepl.ts} +36 -25
  91. package/src/speechflow-node-t2t-format.ts +85 -0
  92. package/src/{speechflow-node-gemma.ts → speechflow-node-t2t-gemma.ts} +89 -25
  93. package/src/speechflow-node-t2t-opus.ts +111 -0
  94. package/src/speechflow-node-t2t-subtitle.ts +101 -0
  95. package/src/speechflow-node-x2x-trace.ts +92 -0
  96. package/src/{speechflow-node-device.ts → speechflow-node-xio-device.ts} +25 -3
  97. package/src/speechflow-node-xio-file.ts +153 -0
  98. package/src/speechflow-node-xio-mqtt.ts +154 -0
  99. package/src/speechflow-node-xio-websocket.ts +248 -0
  100. package/src/speechflow-node.ts +78 -13
  101. package/src/speechflow-utils.ts +212 -0
  102. package/src/speechflow.ts +150 -43
  103. package/etc/nps.yaml +0 -40
  104. package/src/speechflow-node-deepgram.ts +0 -133
  105. package/src/speechflow-node-elevenlabs.ts +0 -116
  106. package/src/speechflow-node-file.ts +0 -108
  107. package/src/speechflow-node-websocket.ts +0 -179
@@ -4,6 +4,39 @@
4
4
  ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
5
5
  ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
6
6
  */
7
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
8
+ if (k2 === undefined) k2 = k;
9
+ var desc = Object.getOwnPropertyDescriptor(m, k);
10
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
11
+ desc = { enumerable: true, get: function() { return m[k]; } };
12
+ }
13
+ Object.defineProperty(o, k2, desc);
14
+ }) : (function(o, m, k, k2) {
15
+ if (k2 === undefined) k2 = k;
16
+ o[k2] = m[k];
17
+ }));
18
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
19
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
20
+ }) : function(o, v) {
21
+ o["default"] = v;
22
+ });
23
+ var __importStar = (this && this.__importStar) || (function () {
24
+ var ownKeys = function(o) {
25
+ ownKeys = Object.getOwnPropertyNames || function (o) {
26
+ var ar = [];
27
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
28
+ return ar;
29
+ };
30
+ return ownKeys(o);
31
+ };
32
+ return function (mod) {
33
+ if (mod && mod.__esModule) return mod;
34
+ var result = {};
35
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
36
+ __setModuleDefault(result, mod);
37
+ return result;
38
+ };
39
+ })();
7
40
  var __importDefault = (this && this.__importDefault) || function (mod) {
8
41
  return (mod && mod.__esModule) ? mod : { "default": mod };
9
42
  };
@@ -15,6 +48,7 @@ const ws_1 = __importDefault(require("ws"));
15
48
  const reconnecting_websocket_1 = __importDefault(require("@opensumi/reconnecting-websocket"));
16
49
  /* internal dependencies */
17
50
  const speechflow_node_1 = __importDefault(require("./speechflow-node"));
51
+ const utils = __importStar(require("./speechflow-utils"));
18
52
  /* SpeechFlow node for Websocket networking */
19
53
  class SpeechFlowNodeWebsocket extends speechflow_node_1.default {
20
54
  /* declare official node name */
@@ -23,12 +57,13 @@ class SpeechFlowNodeWebsocket extends speechflow_node_1.default {
23
57
  server = null;
24
58
  client = null;
25
59
  /* construct node */
26
- constructor(id, opts, args) {
27
- super(id, opts, args);
60
+ constructor(id, cfg, opts, args) {
61
+ super(id, cfg, opts, args);
28
62
  /* declare node configuration parameters */
29
63
  this.configure({
30
64
  listen: { type: "string", val: "", match: /^(?:|ws:\/\/(.+?):(\d+))$/ },
31
65
  connect: { type: "string", val: "", match: /^(?:|ws:\/\/(.+?):(\d+)(?:\/.*)?)$/ },
66
+ mode: { type: "string", val: "r", match: /^(?:r|w|rw)$/ },
32
67
  type: { type: "string", val: "text", match: /^(?:audio|text)$/ }
33
68
  });
34
69
  /* sanity check usage */
@@ -37,11 +72,15 @@ class SpeechFlowNodeWebsocket extends speechflow_node_1.default {
37
72
  else if (this.params.listen === "" && this.params.connect === "")
38
73
  throw new Error("Websocket node requires either listen or connect mode");
39
74
  /* declare node input/output format */
40
- if (this.params.listen !== "") {
75
+ if (this.params.mode === "rw") {
76
+ this.input = this.params.type;
77
+ this.output = this.params.type;
78
+ }
79
+ else if (this.params.mode === "r") {
41
80
  this.input = "none";
42
81
  this.output = this.params.type;
43
82
  }
44
- else if (this.params.connect !== "") {
83
+ else if (this.params.mode === "w") {
45
84
  this.input = this.params.type;
46
85
  this.output = "none";
47
86
  }
@@ -51,7 +90,8 @@ class SpeechFlowNodeWebsocket extends speechflow_node_1.default {
51
90
  if (this.params.listen !== "") {
52
91
  /* listen locally on a Websocket port */
53
92
  const url = new URL(this.params.listen);
54
- let websocket = null;
93
+ const websockets = new Set();
94
+ const chunkQueue = new utils.SingleQueue();
55
95
  const server = new ws_1.default.WebSocketServer({
56
96
  host: url.hostname,
57
97
  port: Number.parseInt(url.port),
@@ -61,40 +101,81 @@ class SpeechFlowNodeWebsocket extends speechflow_node_1.default {
61
101
  this.log("info", `listening on URL ${this.params.listen}`);
62
102
  });
63
103
  server.on("connection", (ws, request) => {
64
- this.log("info", `connection opened on URL ${this.params.listen}`);
65
- websocket = ws;
66
- });
67
- server.on("close", () => {
68
- this.log("info", `connection closed on URL ${this.params.listen}`);
69
- websocket = null;
104
+ const peer = `${request.socket.remoteAddress}:${request.socket.remotePort}`;
105
+ this.log("info", `connection opened on URL ${this.params.listen} by peer ${peer}`);
106
+ websockets.add(ws);
107
+ ws.on("close", () => {
108
+ this.log("info", `connection closed on URL ${this.params.listen} by peer ${peer}`);
109
+ websockets.delete(ws);
110
+ });
111
+ ws.on("error", (error) => {
112
+ this.log("error", `error of connection on URL ${this.params.listen} for peer ${peer}: ${error.message}`);
113
+ });
114
+ ws.on("message", (data, isBinary) => {
115
+ if (this.params.mode === "w") {
116
+ this.log("warning", `connection on URL ${this.params.listen} by peer ${peer}: ` +
117
+ "received remote data on write-only node");
118
+ return;
119
+ }
120
+ if (!isBinary) {
121
+ this.log("warning", `connection on URL ${this.params.listen} by peer ${peer}: ` +
122
+ "received non-binary message");
123
+ return;
124
+ }
125
+ let buffer;
126
+ if (Buffer.isBuffer(data))
127
+ buffer = data;
128
+ else if (data instanceof ArrayBuffer)
129
+ buffer = Buffer.from(data);
130
+ else
131
+ buffer = Buffer.concat(data);
132
+ const chunk = utils.streamChunkDecode(buffer);
133
+ chunkQueue.write(chunk);
134
+ });
70
135
  });
71
136
  server.on("error", (error) => {
72
- this.log("error", `error on URL ${this.params.listen}: ${error.message}`);
73
- websocket = null;
137
+ this.log("error", `error of some connection on URL ${this.params.listen}: ${error.message}`);
74
138
  });
75
- const textEncoding = this.config.textEncoding;
139
+ const type = this.params.type;
140
+ const mode = this.params.mode;
76
141
  this.stream = new node_stream_1.default.Duplex({
142
+ writableObjectMode: true,
143
+ readableObjectMode: true,
144
+ decodeStrings: false,
77
145
  write(chunk, encoding, callback) {
78
- const data = chunk.buffer.slice(chunk.byteOffset, chunk.byteOffset + chunk.byteLength);
79
- if (websocket !== null) {
80
- websocket.send(data, (error) => {
81
- if (error)
82
- callback(error);
83
- else
84
- callback();
146
+ if (mode === "r")
147
+ callback(new Error("write operation on read-only node"));
148
+ else if (chunk.type !== type)
149
+ callback(new Error(`written chunk is not of ${type} type`));
150
+ else if (websockets.size === 0)
151
+ callback(new Error("still no Websocket connections available"));
152
+ else {
153
+ const data = utils.streamChunkEncode(chunk);
154
+ const results = [];
155
+ for (const websocket of websockets.values()) {
156
+ results.push(new Promise((resolve, reject) => {
157
+ websocket.send(data, (error) => {
158
+ if (error)
159
+ reject(error);
160
+ else
161
+ resolve();
162
+ });
163
+ }));
164
+ }
165
+ Promise.all(results).then(() => {
166
+ callback();
167
+ }).catch((errors) => {
168
+ const error = new Error(errors.map((e) => e.message).join("; "));
169
+ callback(error);
85
170
  });
86
171
  }
87
- else
88
- callback(new Error("still no Websocket connection available"));
89
172
  },
90
173
  read(size) {
91
- if (websocket !== null) {
92
- websocket.once("message", (data, isBinary) => {
93
- this.push(data, isBinary ? "binary" : textEncoding);
94
- });
95
- }
96
- else
97
- throw new Error("still no Websocket connection available");
174
+ if (mode === "w")
175
+ throw new Error("read operation on write-only node");
176
+ chunkQueue.read().then((chunk) => {
177
+ this.push(chunk, "binary");
178
+ });
98
179
  }
99
180
  });
100
181
  }
@@ -110,38 +191,57 @@ class SpeechFlowNodeWebsocket extends speechflow_node_1.default {
110
191
  minUptime: 5000
111
192
  });
112
193
  this.client.addEventListener("open", (ev) => {
113
- this.log("info", `connection opened on URL ${this.params.connect}`);
194
+ this.log("info", `connection opened to URL ${this.params.connect}`);
114
195
  });
115
196
  this.client.addEventListener("close", (ev) => {
116
- this.log("info", `connection closed on URL ${this.params.connect}`);
197
+ this.log("info", `connection closed to URL ${this.params.connect}`);
117
198
  });
118
199
  this.client.addEventListener("error", (ev) => {
119
- this.log("error", `error on URL ${this.params.connect}: ${ev.error.message}`);
200
+ this.log("error", `error of connection on URL ${this.params.connect}: ${ev.error.message}`);
201
+ });
202
+ const chunkQueue = new utils.SingleQueue();
203
+ this.client.addEventListener("message", (ev) => {
204
+ if (this.params.mode === "w") {
205
+ this.log("warning", `connection to URL ${this.params.listen}: ` +
206
+ "received remote data on write-only node");
207
+ return;
208
+ }
209
+ if (!(ev.data instanceof ArrayBuffer)) {
210
+ this.log("warning", `connection to URL ${this.params.listen}: ` +
211
+ "received non-binary message");
212
+ return;
213
+ }
214
+ const buffer = Buffer.from(ev.data);
215
+ const chunk = utils.streamChunkDecode(buffer);
216
+ chunkQueue.write(chunk);
120
217
  });
121
218
  const client = this.client;
122
219
  client.binaryType = "arraybuffer";
123
- const textEncoding = this.config.textEncoding;
220
+ const type = this.params.type;
221
+ const mode = this.params.mode;
124
222
  this.stream = new node_stream_1.default.Duplex({
223
+ writableObjectMode: true,
224
+ readableObjectMode: true,
225
+ decodeStrings: false,
125
226
  write(chunk, encoding, callback) {
126
- const data = chunk.buffer.slice(chunk.byteOffset, chunk.byteOffset + chunk.byteLength);
127
- if (client.OPEN) {
128
- client.send(data);
129
- callback();
130
- }
131
- else
227
+ if (mode === "r")
228
+ callback(new Error("write operation on read-only node"));
229
+ else if (chunk.type !== type)
230
+ callback(new Error(`written chunk is not of ${type} type`));
231
+ else if (!client.OPEN)
132
232
  callback(new Error("still no Websocket connection available"));
233
+ const data = utils.streamChunkEncode(chunk);
234
+ client.send(data);
235
+ callback();
133
236
  },
134
237
  read(size) {
135
- if (client.OPEN) {
136
- client.addEventListener("message", (ev) => {
137
- if (ev.data instanceof ArrayBuffer)
138
- this.push(ev.data, "binary");
139
- else
140
- this.push(ev.data, textEncoding);
141
- }, { once: true });
142
- }
143
- else
238
+ if (mode === "w")
239
+ throw new Error("read operation on write-only node");
240
+ if (!client.OPEN)
144
241
  throw new Error("still no Websocket connection available");
242
+ chunkQueue.read().then((chunk) => {
243
+ this.push(chunk, "binary");
244
+ });
145
245
  }
146
246
  });
147
247
  }
@@ -0,0 +1,34 @@
1
+ export type TranscriptionTaskRequest = {
2
+ type: "intermediate" | "final";
3
+ id: number;
4
+ language: string;
5
+ audio: Float32Array;
6
+ };
7
+ export type TranscriptionTaskResponse = {
8
+ type: "intermediate" | "final";
9
+ id: number;
10
+ language: string;
11
+ text: string;
12
+ };
13
+ export type WorkerRequest = {
14
+ type: "open";
15
+ cacheDir: string;
16
+ model: string;
17
+ } | {
18
+ type: "task-request";
19
+ task: TranscriptionTaskRequest;
20
+ } | {
21
+ type: "close";
22
+ };
23
+ export type WorkerResponse = {
24
+ type: "log";
25
+ message: string;
26
+ } | {
27
+ type: "error";
28
+ message: string;
29
+ } | {
30
+ type: "ok";
31
+ } | {
32
+ type: "task-response";
33
+ task: TranscriptionTaskResponse;
34
+ };
@@ -0,0 +1,7 @@
1
+ "use strict";
2
+ /*
3
+ ** SpeechFlow - Speech Processing Flow Graph
4
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
5
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
6
+ */
7
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,97 @@
1
+ "use strict";
2
+ /*
3
+ ** SpeechFlow - Speech Processing Flow Graph
4
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
5
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
6
+ */
7
+ var __importDefault = (this && this.__importDefault) || function (mod) {
8
+ return (mod && mod.__esModule) ? mod : { "default": mod };
9
+ };
10
+ Object.defineProperty(exports, "__esModule", { value: true });
11
+ /* standard dependencies */
12
+ const node_worker_threads_1 = __importDefault(require("node:worker_threads"));
13
+ /* external dependencies */
14
+ const smart_whisper_1 = require("smart-whisper");
15
+ /* utility function for sending a log message */
16
+ const log = (message) => node_worker_threads_1.default.parentPort.postMessage({ type: "log", message });
17
+ /* internal state */
18
+ let whisper = null;
19
+ /* OpenAI Whisper models (GGML variants for Whisper.cpp) */
20
+ const models = {
21
+ "v1-tiny": { model: "tiny" },
22
+ "v1-base": { model: "base" },
23
+ "v1-small": { model: "small" },
24
+ "v1-medium": { model: "medium" },
25
+ "v2-large": { model: "large-v2" },
26
+ "v3-large": { model: "large-v3" },
27
+ "v3-large-turbo": { model: "large-v3-turbo" }
28
+ };
29
+ /* thread communication hook */
30
+ node_worker_threads_1.default.parentPort?.on("message", async (request) => {
31
+ let response = null;
32
+ if (request.type === "open") {
33
+ /* initialize Whisper */
34
+ const model = models[request.model]?.model;
35
+ if (!model)
36
+ response = { type: "error", message: `unknown Whisper model "${request.model}"` };
37
+ else {
38
+ log(`loading Whisper model "${request.model}": BEGIN`);
39
+ const name = await smart_whisper_1.manager.download(model);
40
+ const resolved = smart_whisper_1.manager.resolve(name);
41
+ whisper = new smart_whisper_1.Whisper(resolved, {
42
+ gpu: true,
43
+ offload: 120 * 60
44
+ });
45
+ if (whisper === null) {
46
+ log(`loading Whisper model "${request.model}": FAILED`);
47
+ response = { type: "error", message: "failed to open Whisper" };
48
+ }
49
+ else {
50
+ await whisper.load();
51
+ log(`loading Whisper model "${request.model}": SUCCESS`);
52
+ response = { type: "ok" };
53
+ }
54
+ }
55
+ }
56
+ else if (request.type === "task-request") {
57
+ log(`${request.task.type} transcription task ${request.task.id}": START`);
58
+ const task = await whisper.transcribe(request.task.audio, {
59
+ language: request.task.language,
60
+ n_threads: 16,
61
+ no_timestamps: false,
62
+ speed_up: true,
63
+ suppress_non_speech_tokens: true,
64
+ suppress_blank: true,
65
+ debug_mode: false,
66
+ print_special: false,
67
+ print_progress: false,
68
+ print_realtime: false,
69
+ print_timestamps: false
70
+ });
71
+ task.on("transcribed", (result) => {
72
+ console.log("TRANSCRIBED", JSON.stringify(result));
73
+ });
74
+ const result = await task.result;
75
+ log(`${request.task.type} transcription task ${request.task.id}": END`);
76
+ console.log("RESULT", result);
77
+ const text = result[0].text;
78
+ const taskResponse = {
79
+ type: request.task.type,
80
+ id: request.task.id,
81
+ language: request.task.language,
82
+ text: text ?? ""
83
+ };
84
+ response = { type: "task-response", task: taskResponse };
85
+ }
86
+ else if (request.type === "close") {
87
+ /* shutdown Whisper */
88
+ if (whisper !== null) {
89
+ log("unloading Whisper model: BEGIN");
90
+ await whisper.free();
91
+ whisper = null;
92
+ log("unloading Whisper model: END");
93
+ }
94
+ }
95
+ if (response !== null)
96
+ node_worker_threads_1.default.parentPort.postMessage(response);
97
+ });
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,131 @@
1
+ "use strict";
2
+ /*
3
+ ** SpeechFlow - Speech Processing Flow Graph
4
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
5
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
6
+ */
7
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
8
+ if (k2 === undefined) k2 = k;
9
+ var desc = Object.getOwnPropertyDescriptor(m, k);
10
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
11
+ desc = { enumerable: true, get: function() { return m[k]; } };
12
+ }
13
+ Object.defineProperty(o, k2, desc);
14
+ }) : (function(o, m, k, k2) {
15
+ if (k2 === undefined) k2 = k;
16
+ o[k2] = m[k];
17
+ }));
18
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
19
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
20
+ }) : function(o, v) {
21
+ o["default"] = v;
22
+ });
23
+ var __importStar = (this && this.__importStar) || (function () {
24
+ var ownKeys = function(o) {
25
+ ownKeys = Object.getOwnPropertyNames || function (o) {
26
+ var ar = [];
27
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
28
+ return ar;
29
+ };
30
+ return ownKeys(o);
31
+ };
32
+ return function (mod) {
33
+ if (mod && mod.__esModule) return mod;
34
+ var result = {};
35
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
36
+ __setModuleDefault(result, mod);
37
+ return result;
38
+ };
39
+ })();
40
+ var __importDefault = (this && this.__importDefault) || function (mod) {
41
+ return (mod && mod.__esModule) ? mod : { "default": mod };
42
+ };
43
+ Object.defineProperty(exports, "__esModule", { value: true });
44
+ /* standard dependencies */
45
+ const node_path_1 = __importDefault(require("node:path"));
46
+ const node_worker_threads_1 = __importDefault(require("node:worker_threads"));
47
+ /* external dependencies */
48
+ const Transformers = __importStar(require("@huggingface/transformers"));
49
+ /* utility function for sending a log message */
50
+ const log = (message) => node_worker_threads_1.default.parentPort.postMessage({ type: "log", message });
51
+ /* internal state */
52
+ let transcriber = null;
53
+ /* OpenAI Whisper models (ONNX variants) */
54
+ const models = {
55
+ "v1-tiny": { model: "onnx-community/whisper-tiny-ONNX" },
56
+ "v1-base": { model: "onnx-community/whisper-base" },
57
+ "v1-small": { model: "onnx-community/whisper-small" },
58
+ "v1-medium": { model: "onnx-community/whisper-medium-ONNX" },
59
+ "v2-large": { model: "reach-vb/whisper-large-v2-onnx" },
60
+ "v3-large": { model: "onnx-community/whisper-large-v3-ONNX" },
61
+ "v3-large-turbo": { model: "onnx-community/whisper-large-v3-turbo" }
62
+ };
63
+ /* thread communication hook */
64
+ node_worker_threads_1.default.parentPort?.on("message", async (request) => {
65
+ let response = null;
66
+ if (request.type === "open") {
67
+ /* initialize Whisper */
68
+ const model = models[request.model]?.model;
69
+ if (!model)
70
+ response = { type: "error", message: `unknown Whisper model "${request.model}"` };
71
+ else {
72
+ log(`loading Whisper model "${request.model}": BEGIN`);
73
+ transcriber = await Transformers.pipeline("automatic-speech-recognition", model, {
74
+ cache_dir: node_path_1.default.join(request.cacheDir, "whisper"),
75
+ dtype: "q4",
76
+ device: "gpu"
77
+ });
78
+ if (transcriber === null) {
79
+ log(`loading Whisper model "${request.model}": FAILED`);
80
+ response = { type: "error", message: "failed to open Whisper" };
81
+ }
82
+ else {
83
+ log(`loading Whisper model "${request.model}": SUCCESS`);
84
+ response = { type: "ok" };
85
+ }
86
+ }
87
+ }
88
+ else if (request.type === "task-request") {
89
+ /* perform a speech-to-text transcription with Whisper */
90
+ /*
91
+ const streamer = new Transformers.TextStreamer(transcriber!.tokenizer, {
92
+ skip_prompt: true,
93
+ callback_function: (text) => {
94
+ console.log("TEXT", text)
95
+ }
96
+ })
97
+ */
98
+ log(`${request.task.type} transcription task ${request.task.id}": START`);
99
+ const result = await transcriber(request.task.audio, {
100
+ chunk_length_s: 3,
101
+ stride_length_s: 1,
102
+ language: request.task.language,
103
+ task: "transcribe",
104
+ force_full_sequences: false,
105
+ use_cache: true,
106
+ return_timestamps: true,
107
+ // streamer
108
+ });
109
+ log(`${request.task.type} transcription task ${request.task.id}": END`);
110
+ console.log("RESULT", JSON.stringify(result));
111
+ const text = Array.isArray(result) ? result[0].text : result.text;
112
+ const taskResponse = {
113
+ type: request.task.type,
114
+ id: request.task.id,
115
+ language: request.task.language,
116
+ text: text ?? ""
117
+ };
118
+ response = { type: "task-response", task: taskResponse };
119
+ }
120
+ else if (request.type === "close") {
121
+ /* shutdown Whisper */
122
+ if (transcriber !== null) {
123
+ log("unloading Whisper model: BEGIN");
124
+ await transcriber.dispose();
125
+ transcriber = null;
126
+ log("unloading Whisper model: END");
127
+ }
128
+ }
129
+ if (response !== null)
130
+ node_worker_threads_1.default.parentPort.postMessage(response);
131
+ });
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,97 @@
1
+ "use strict";
2
+ /*
3
+ ** SpeechFlow - Speech Processing Flow Graph
4
+ ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
5
+ ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
6
+ */
7
+ var __importDefault = (this && this.__importDefault) || function (mod) {
8
+ return (mod && mod.__esModule) ? mod : { "default": mod };
9
+ };
10
+ Object.defineProperty(exports, "__esModule", { value: true });
11
+ /* standard dependencies */
12
+ const node_worker_threads_1 = __importDefault(require("node:worker_threads"));
13
+ /* external dependencies */
14
+ const smart_whisper_1 = require("smart-whisper");
15
+ /* utility function for sending a log message */
16
+ const log = (message) => node_worker_threads_1.default.parentPort.postMessage({ type: "log", message });
17
+ /* internal state */
18
+ let whisper = null;
19
+ /* OpenAI Whisper models (GGML variants for Whisper.cpp) */
20
+ const models = {
21
+ "v1-tiny": { model: "tiny" },
22
+ "v1-base": { model: "base" },
23
+ "v1-small": { model: "small" },
24
+ "v1-medium": { model: "medium" },
25
+ "v2-large": { model: "large-v2" },
26
+ "v3-large": { model: "large-v3" },
27
+ "v3-large-turbo": { model: "large-v3-turbo" }
28
+ };
29
+ /* thread communication hook */
30
+ node_worker_threads_1.default.parentPort?.on("message", async (request) => {
31
+ let response = null;
32
+ if (request.type === "open") {
33
+ /* initialize Whisper */
34
+ const model = models[request.model]?.model;
35
+ if (!model)
36
+ response = { type: "error", message: `unknown Whisper model "${request.model}"` };
37
+ else {
38
+ log(`loading Whisper model "${request.model}": BEGIN`);
39
+ const name = await smart_whisper_1.manager.download(model);
40
+ const resolved = smart_whisper_1.manager.resolve(name);
41
+ whisper = new smart_whisper_1.Whisper(resolved, {
42
+ gpu: true,
43
+ offload: 120 * 60
44
+ });
45
+ if (whisper === null) {
46
+ log(`loading Whisper model "${request.model}": FAILED`);
47
+ response = { type: "error", message: "failed to open Whisper" };
48
+ }
49
+ else {
50
+ await whisper.load();
51
+ log(`loading Whisper model "${request.model}": SUCCESS`);
52
+ response = { type: "ok" };
53
+ }
54
+ }
55
+ }
56
+ else if (request.type === "task-request") {
57
+ log(`${request.task.type} transcription task ${request.task.id}": START`);
58
+ const task = await whisper.transcribe(request.task.audio, {
59
+ language: request.task.language,
60
+ n_threads: 16,
61
+ no_timestamps: false,
62
+ speed_up: true,
63
+ suppress_non_speech_tokens: true,
64
+ suppress_blank: true,
65
+ debug_mode: false,
66
+ print_special: false,
67
+ print_progress: false,
68
+ print_realtime: false,
69
+ print_timestamps: false
70
+ });
71
+ task.on("transcribed", (result) => {
72
+ console.log("TRANSCRIBED", JSON.stringify(result));
73
+ });
74
+ const result = await task.result;
75
+ log(`${request.task.type} transcription task ${request.task.id}": END`);
76
+ console.log("RESULT", result);
77
+ const text = result[0].text;
78
+ const taskResponse = {
79
+ type: request.task.type,
80
+ id: request.task.id,
81
+ language: request.task.language,
82
+ text: text ?? ""
83
+ };
84
+ response = { type: "task-response", task: taskResponse };
85
+ }
86
+ else if (request.type === "close") {
87
+ /* shutdown Whisper */
88
+ if (whisper !== null) {
89
+ log("unloading Whisper model: BEGIN");
90
+ await whisper.free();
91
+ whisper = null;
92
+ log("unloading Whisper model: END");
93
+ }
94
+ }
95
+ if (response !== null)
96
+ node_worker_threads_1.default.parentPort.postMessage(response);
97
+ });
@@ -0,0 +1 @@
1
+ export {};