speechflow 0.9.9 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/CHANGELOG.md +8 -0
  2. package/README.md +48 -1
  3. package/dst/speechflow-node-a2a-ffmpeg.js +1 -0
  4. package/dst/speechflow-node-a2a-ffmpeg.js.map +1 -0
  5. package/dst/{speechflow-node-gemma.d.ts → speechflow-node-a2a-meter.d.ts} +2 -3
  6. package/dst/speechflow-node-a2a-meter.js +147 -0
  7. package/dst/speechflow-node-a2a-meter.js.map +1 -0
  8. package/dst/speechflow-node-a2a-mute.d.ts +16 -0
  9. package/dst/speechflow-node-a2a-mute.js +90 -0
  10. package/dst/speechflow-node-a2a-mute.js.map +1 -0
  11. package/dst/speechflow-node-a2a-vad.js +130 -289
  12. package/dst/speechflow-node-a2a-vad.js.map +1 -0
  13. package/dst/speechflow-node-a2a-wav.js +1 -0
  14. package/dst/speechflow-node-a2a-wav.js.map +1 -0
  15. package/dst/speechflow-node-a2t-deepgram.js +2 -1
  16. package/dst/speechflow-node-a2t-deepgram.js.map +1 -0
  17. package/dst/speechflow-node-t2a-elevenlabs.js +1 -0
  18. package/dst/speechflow-node-t2a-elevenlabs.js.map +1 -0
  19. package/dst/speechflow-node-t2a-kokoro.js +1 -0
  20. package/dst/speechflow-node-t2a-kokoro.js.map +1 -0
  21. package/dst/speechflow-node-t2t-deepl.js +1 -0
  22. package/dst/speechflow-node-t2t-deepl.js.map +1 -0
  23. package/dst/speechflow-node-t2t-format.js +1 -0
  24. package/dst/speechflow-node-t2t-format.js.map +1 -0
  25. package/dst/speechflow-node-t2t-ollama.js +1 -0
  26. package/dst/speechflow-node-t2t-ollama.js.map +1 -0
  27. package/dst/speechflow-node-t2t-openai.js +1 -0
  28. package/dst/speechflow-node-t2t-openai.js.map +1 -0
  29. package/dst/speechflow-node-t2t-subtitle.js +1 -0
  30. package/dst/speechflow-node-t2t-subtitle.js.map +1 -0
  31. package/dst/speechflow-node-t2t-transformers.js +10 -6
  32. package/dst/speechflow-node-t2t-transformers.js.map +1 -0
  33. package/dst/speechflow-node-x2x-trace.js +1 -0
  34. package/dst/speechflow-node-x2x-trace.js.map +1 -0
  35. package/dst/speechflow-node-xio-device.js +1 -0
  36. package/dst/speechflow-node-xio-device.js.map +1 -0
  37. package/dst/speechflow-node-xio-file.js +1 -0
  38. package/dst/speechflow-node-xio-file.js.map +1 -0
  39. package/dst/speechflow-node-xio-mqtt.js +1 -0
  40. package/dst/speechflow-node-xio-mqtt.js.map +1 -0
  41. package/dst/speechflow-node-xio-websocket.js +1 -0
  42. package/dst/speechflow-node-xio-websocket.js.map +1 -0
  43. package/dst/speechflow-node.d.ts +3 -0
  44. package/dst/speechflow-node.js +10 -0
  45. package/dst/speechflow-node.js.map +1 -0
  46. package/dst/speechflow-utils.d.ts +33 -0
  47. package/dst/speechflow-utils.js +183 -1
  48. package/dst/speechflow-utils.js.map +1 -0
  49. package/dst/speechflow.js +209 -6
  50. package/dst/speechflow.js.map +1 -0
  51. package/etc/speechflow.yaml +5 -3
  52. package/etc/stx.conf +1 -1
  53. package/etc/tsconfig.json +2 -2
  54. package/package.json +14 -8
  55. package/src/speechflow-node-a2a-meter.ts +125 -0
  56. package/src/speechflow-node-a2a-mute.ts +101 -0
  57. package/src/speechflow-node-a2a-vad.ts +266 -0
  58. package/src/speechflow-node-a2t-deepgram.ts +1 -1
  59. package/src/speechflow-node-t2t-transformers.ts +12 -7
  60. package/src/speechflow-node-xio-websocket.ts +5 -5
  61. package/src/speechflow-node.ts +12 -0
  62. package/src/speechflow-utils.ts +195 -0
  63. package/src/speechflow.ts +193 -6
  64. package/dst/speechflow-node-deepgram.d.ts +0 -12
  65. package/dst/speechflow-node-deepgram.js +0 -220
  66. package/dst/speechflow-node-deepl.d.ts +0 -12
  67. package/dst/speechflow-node-deepl.js +0 -128
  68. package/dst/speechflow-node-device.d.ts +0 -13
  69. package/dst/speechflow-node-device.js +0 -205
  70. package/dst/speechflow-node-elevenlabs.d.ts +0 -13
  71. package/dst/speechflow-node-elevenlabs.js +0 -182
  72. package/dst/speechflow-node-ffmpeg.d.ts +0 -13
  73. package/dst/speechflow-node-ffmpeg.js +0 -152
  74. package/dst/speechflow-node-file.d.ts +0 -11
  75. package/dst/speechflow-node-file.js +0 -176
  76. package/dst/speechflow-node-format.d.ts +0 -11
  77. package/dst/speechflow-node-format.js +0 -80
  78. package/dst/speechflow-node-gemma.js +0 -213
  79. package/dst/speechflow-node-mqtt.d.ts +0 -13
  80. package/dst/speechflow-node-mqtt.js +0 -181
  81. package/dst/speechflow-node-opus.d.ts +0 -12
  82. package/dst/speechflow-node-opus.js +0 -135
  83. package/dst/speechflow-node-subtitle.d.ts +0 -12
  84. package/dst/speechflow-node-subtitle.js +0 -96
  85. package/dst/speechflow-node-t2t-gemma.d.ts +0 -13
  86. package/dst/speechflow-node-t2t-gemma.js +0 -233
  87. package/dst/speechflow-node-t2t-opus.d.ts +0 -12
  88. package/dst/speechflow-node-t2t-opus.js +0 -135
  89. package/dst/speechflow-node-trace.d.ts +0 -11
  90. package/dst/speechflow-node-trace.js +0 -88
  91. package/dst/speechflow-node-wav.d.ts +0 -11
  92. package/dst/speechflow-node-wav.js +0 -170
  93. package/dst/speechflow-node-websocket.d.ts +0 -13
  94. package/dst/speechflow-node-websocket.js +0 -275
  95. package/dst/speechflow-node-whisper-common.d.ts +0 -34
  96. package/dst/speechflow-node-whisper-common.js +0 -7
  97. package/dst/speechflow-node-whisper-ggml.d.ts +0 -1
  98. package/dst/speechflow-node-whisper-ggml.js +0 -97
  99. package/dst/speechflow-node-whisper-onnx.d.ts +0 -1
  100. package/dst/speechflow-node-whisper-onnx.js +0 -131
  101. package/dst/speechflow-node-whisper-worker-ggml.d.ts +0 -1
  102. package/dst/speechflow-node-whisper-worker-ggml.js +0 -97
  103. package/dst/speechflow-node-whisper-worker-onnx.d.ts +0 -1
  104. package/dst/speechflow-node-whisper-worker-onnx.js +0 -131
  105. package/dst/speechflow-node-whisper-worker.d.ts +0 -1
  106. package/dst/speechflow-node-whisper-worker.js +0 -116
  107. package/dst/speechflow-node-whisper-worker2.d.ts +0 -1
  108. package/dst/speechflow-node-whisper-worker2.js +0 -82
  109. package/dst/speechflow-node-whisper.d.ts +0 -19
  110. package/dst/speechflow-node-whisper.js +0 -604
@@ -1,275 +0,0 @@
1
- "use strict";
2
- /*
3
- ** SpeechFlow - Speech Processing Flow Graph
4
- ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
5
- ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
6
- */
7
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
8
- if (k2 === undefined) k2 = k;
9
- var desc = Object.getOwnPropertyDescriptor(m, k);
10
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
11
- desc = { enumerable: true, get: function() { return m[k]; } };
12
- }
13
- Object.defineProperty(o, k2, desc);
14
- }) : (function(o, m, k, k2) {
15
- if (k2 === undefined) k2 = k;
16
- o[k2] = m[k];
17
- }));
18
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
19
- Object.defineProperty(o, "default", { enumerable: true, value: v });
20
- }) : function(o, v) {
21
- o["default"] = v;
22
- });
23
- var __importStar = (this && this.__importStar) || (function () {
24
- var ownKeys = function(o) {
25
- ownKeys = Object.getOwnPropertyNames || function (o) {
26
- var ar = [];
27
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
28
- return ar;
29
- };
30
- return ownKeys(o);
31
- };
32
- return function (mod) {
33
- if (mod && mod.__esModule) return mod;
34
- var result = {};
35
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
36
- __setModuleDefault(result, mod);
37
- return result;
38
- };
39
- })();
40
- var __importDefault = (this && this.__importDefault) || function (mod) {
41
- return (mod && mod.__esModule) ? mod : { "default": mod };
42
- };
43
- Object.defineProperty(exports, "__esModule", { value: true });
44
- /* standard dependencies */
45
- const node_stream_1 = __importDefault(require("node:stream"));
46
- /* external dependencies */
47
- const ws_1 = __importDefault(require("ws"));
48
- const reconnecting_websocket_1 = __importDefault(require("@opensumi/reconnecting-websocket"));
49
- /* internal dependencies */
50
- const speechflow_node_1 = __importDefault(require("./speechflow-node"));
51
- const utils = __importStar(require("./speechflow-utils"));
52
- /* SpeechFlow node for Websocket networking */
53
- class SpeechFlowNodeWebsocket extends speechflow_node_1.default {
54
- /* declare official node name */
55
- static name = "websocket";
56
- /* internal state */
57
- server = null;
58
- client = null;
59
- /* construct node */
60
- constructor(id, cfg, opts, args) {
61
- super(id, cfg, opts, args);
62
- /* declare node configuration parameters */
63
- this.configure({
64
- listen: { type: "string", val: "", match: /^(?:|ws:\/\/(.+?):(\d+))$/ },
65
- connect: { type: "string", val: "", match: /^(?:|ws:\/\/(.+?):(\d+)(?:\/.*)?)$/ },
66
- mode: { type: "string", val: "r", match: /^(?:r|w|rw)$/ },
67
- type: { type: "string", val: "text", match: /^(?:audio|text)$/ }
68
- });
69
- /* sanity check usage */
70
- if (this.params.listen !== "" && this.params.connect !== "")
71
- throw new Error("Websocket node cannot listen and connect at the same time");
72
- else if (this.params.listen === "" && this.params.connect === "")
73
- throw new Error("Websocket node requires either listen or connect mode");
74
- /* declare node input/output format */
75
- if (this.params.mode === "rw") {
76
- this.input = this.params.type;
77
- this.output = this.params.type;
78
- }
79
- else if (this.params.mode === "r") {
80
- this.input = "none";
81
- this.output = this.params.type;
82
- }
83
- else if (this.params.mode === "w") {
84
- this.input = this.params.type;
85
- this.output = "none";
86
- }
87
- }
88
- /* open node */
89
- async open() {
90
- if (this.params.listen !== "") {
91
- /* listen locally on a Websocket port */
92
- const url = new URL(this.params.listen);
93
- const websockets = new Set();
94
- const chunkQueue = new utils.SingleQueue();
95
- const server = new ws_1.default.WebSocketServer({
96
- host: url.hostname,
97
- port: Number.parseInt(url.port),
98
- path: url.pathname
99
- });
100
- server.on("listening", () => {
101
- this.log("info", `listening on URL ${this.params.listen}`);
102
- });
103
- server.on("connection", (ws, request) => {
104
- const peer = `${request.socket.remoteAddress}:${request.socket.remotePort}`;
105
- this.log("info", `connection opened on URL ${this.params.listen} by peer ${peer}`);
106
- websockets.add(ws);
107
- ws.on("close", () => {
108
- this.log("info", `connection closed on URL ${this.params.listen} by peer ${peer}`);
109
- websockets.delete(ws);
110
- });
111
- ws.on("error", (error) => {
112
- this.log("error", `error of connection on URL ${this.params.listen} for peer ${peer}: ${error.message}`);
113
- });
114
- ws.on("message", (data, isBinary) => {
115
- if (this.params.mode === "w") {
116
- this.log("warning", `connection on URL ${this.params.listen} by peer ${peer}: ` +
117
- "received remote data on write-only node");
118
- return;
119
- }
120
- if (!isBinary) {
121
- this.log("warning", `connection on URL ${this.params.listen} by peer ${peer}: ` +
122
- "received non-binary message");
123
- return;
124
- }
125
- let buffer;
126
- if (Buffer.isBuffer(data))
127
- buffer = data;
128
- else if (data instanceof ArrayBuffer)
129
- buffer = Buffer.from(data);
130
- else
131
- buffer = Buffer.concat(data);
132
- const chunk = utils.streamChunkDecode(buffer);
133
- chunkQueue.write(chunk);
134
- });
135
- });
136
- server.on("error", (error) => {
137
- this.log("error", `error of some connection on URL ${this.params.listen}: ${error.message}`);
138
- });
139
- const type = this.params.type;
140
- const mode = this.params.mode;
141
- this.stream = new node_stream_1.default.Duplex({
142
- writableObjectMode: true,
143
- readableObjectMode: true,
144
- decodeStrings: false,
145
- write(chunk, encoding, callback) {
146
- if (mode === "r")
147
- callback(new Error("write operation on read-only node"));
148
- else if (chunk.type !== type)
149
- callback(new Error(`written chunk is not of ${type} type`));
150
- else if (websockets.size === 0)
151
- callback(new Error("still no Websocket connections available"));
152
- else {
153
- const data = utils.streamChunkEncode(chunk);
154
- const results = [];
155
- for (const websocket of websockets.values()) {
156
- results.push(new Promise((resolve, reject) => {
157
- websocket.send(data, (error) => {
158
- if (error)
159
- reject(error);
160
- else
161
- resolve();
162
- });
163
- }));
164
- }
165
- Promise.all(results).then(() => {
166
- callback();
167
- }).catch((errors) => {
168
- const error = new Error(errors.map((e) => e.message).join("; "));
169
- callback(error);
170
- });
171
- }
172
- },
173
- read(size) {
174
- if (mode === "w")
175
- throw new Error("read operation on write-only node");
176
- chunkQueue.read().then((chunk) => {
177
- this.push(chunk, "binary");
178
- });
179
- }
180
- });
181
- }
182
- else if (this.params.connect !== "") {
183
- /* connect remotely to a Websocket port */
184
- this.client = new reconnecting_websocket_1.default(this.params.connect, [], {
185
- WebSocket: ws_1.default,
186
- WebSocketOptions: {},
187
- reconnectionDelayGrowFactor: 1.3,
188
- maxReconnectionDelay: 4000,
189
- minReconnectionDelay: 1000,
190
- connectionTimeout: 4000,
191
- minUptime: 5000
192
- });
193
- this.client.addEventListener("open", (ev) => {
194
- this.log("info", `connection opened to URL ${this.params.connect}`);
195
- });
196
- this.client.addEventListener("close", (ev) => {
197
- this.log("info", `connection closed to URL ${this.params.connect}`);
198
- });
199
- this.client.addEventListener("error", (ev) => {
200
- this.log("error", `error of connection on URL ${this.params.connect}: ${ev.error.message}`);
201
- });
202
- const chunkQueue = new utils.SingleQueue();
203
- this.client.addEventListener("message", (ev) => {
204
- if (this.params.mode === "w") {
205
- this.log("warning", `connection to URL ${this.params.listen}: ` +
206
- "received remote data on write-only node");
207
- return;
208
- }
209
- if (!(ev.data instanceof ArrayBuffer)) {
210
- this.log("warning", `connection to URL ${this.params.listen}: ` +
211
- "received non-binary message");
212
- return;
213
- }
214
- const buffer = Buffer.from(ev.data);
215
- const chunk = utils.streamChunkDecode(buffer);
216
- chunkQueue.write(chunk);
217
- });
218
- const client = this.client;
219
- client.binaryType = "arraybuffer";
220
- const type = this.params.type;
221
- const mode = this.params.mode;
222
- this.stream = new node_stream_1.default.Duplex({
223
- writableObjectMode: true,
224
- readableObjectMode: true,
225
- decodeStrings: false,
226
- write(chunk, encoding, callback) {
227
- if (mode === "r")
228
- callback(new Error("write operation on read-only node"));
229
- else if (chunk.type !== type)
230
- callback(new Error(`written chunk is not of ${type} type`));
231
- else if (!client.OPEN)
232
- callback(new Error("still no Websocket connection available"));
233
- const data = utils.streamChunkEncode(chunk);
234
- client.send(data);
235
- callback();
236
- },
237
- read(size) {
238
- if (mode === "w")
239
- throw new Error("read operation on write-only node");
240
- if (!client.OPEN)
241
- throw new Error("still no Websocket connection available");
242
- chunkQueue.read().then((chunk) => {
243
- this.push(chunk, "binary");
244
- });
245
- }
246
- });
247
- }
248
- }
249
- /* close node */
250
- async close() {
251
- /* close Websocket server */
252
- if (this.server !== null) {
253
- await new Promise((resolve, reject) => {
254
- this.server.close((error) => {
255
- if (error)
256
- reject(error);
257
- else
258
- resolve();
259
- });
260
- });
261
- this.server = null;
262
- }
263
- /* close Websocket client */
264
- if (this.client !== null) {
265
- this.client.close();
266
- this.client = null;
267
- }
268
- /* close stream */
269
- if (this.stream !== null) {
270
- this.stream.destroy();
271
- this.stream = null;
272
- }
273
- }
274
- }
275
- exports.default = SpeechFlowNodeWebsocket;
@@ -1,34 +0,0 @@
1
- export type TranscriptionTaskRequest = {
2
- type: "intermediate" | "final";
3
- id: number;
4
- language: string;
5
- audio: Float32Array;
6
- };
7
- export type TranscriptionTaskResponse = {
8
- type: "intermediate" | "final";
9
- id: number;
10
- language: string;
11
- text: string;
12
- };
13
- export type WorkerRequest = {
14
- type: "open";
15
- cacheDir: string;
16
- model: string;
17
- } | {
18
- type: "task-request";
19
- task: TranscriptionTaskRequest;
20
- } | {
21
- type: "close";
22
- };
23
- export type WorkerResponse = {
24
- type: "log";
25
- message: string;
26
- } | {
27
- type: "error";
28
- message: string;
29
- } | {
30
- type: "ok";
31
- } | {
32
- type: "task-response";
33
- task: TranscriptionTaskResponse;
34
- };
@@ -1,7 +0,0 @@
1
- "use strict";
2
- /*
3
- ** SpeechFlow - Speech Processing Flow Graph
4
- ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
5
- ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
6
- */
7
- Object.defineProperty(exports, "__esModule", { value: true });
@@ -1 +0,0 @@
1
- export {};
@@ -1,97 +0,0 @@
1
- "use strict";
2
- /*
3
- ** SpeechFlow - Speech Processing Flow Graph
4
- ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
5
- ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
6
- */
7
- var __importDefault = (this && this.__importDefault) || function (mod) {
8
- return (mod && mod.__esModule) ? mod : { "default": mod };
9
- };
10
- Object.defineProperty(exports, "__esModule", { value: true });
11
- /* standard dependencies */
12
- const node_worker_threads_1 = __importDefault(require("node:worker_threads"));
13
- /* external dependencies */
14
- const smart_whisper_1 = require("smart-whisper");
15
- /* utility function for sending a log message */
16
- const log = (message) => node_worker_threads_1.default.parentPort.postMessage({ type: "log", message });
17
- /* internal state */
18
- let whisper = null;
19
- /* OpenAI Whisper models (GGML variants for Whisper.cpp) */
20
- const models = {
21
- "v1-tiny": { model: "tiny" },
22
- "v1-base": { model: "base" },
23
- "v1-small": { model: "small" },
24
- "v1-medium": { model: "medium" },
25
- "v2-large": { model: "large-v2" },
26
- "v3-large": { model: "large-v3" },
27
- "v3-large-turbo": { model: "large-v3-turbo" }
28
- };
29
- /* thread communication hook */
30
- node_worker_threads_1.default.parentPort?.on("message", async (request) => {
31
- let response = null;
32
- if (request.type === "open") {
33
- /* initialize Whisper */
34
- const model = models[request.model]?.model;
35
- if (!model)
36
- response = { type: "error", message: `unknown Whisper model "${request.model}"` };
37
- else {
38
- log(`loading Whisper model "${request.model}": BEGIN`);
39
- const name = await smart_whisper_1.manager.download(model);
40
- const resolved = smart_whisper_1.manager.resolve(name);
41
- whisper = new smart_whisper_1.Whisper(resolved, {
42
- gpu: true,
43
- offload: 120 * 60
44
- });
45
- if (whisper === null) {
46
- log(`loading Whisper model "${request.model}": FAILED`);
47
- response = { type: "error", message: "failed to open Whisper" };
48
- }
49
- else {
50
- await whisper.load();
51
- log(`loading Whisper model "${request.model}": SUCCESS`);
52
- response = { type: "ok" };
53
- }
54
- }
55
- }
56
- else if (request.type === "task-request") {
57
- log(`${request.task.type} transcription task ${request.task.id}": START`);
58
- const task = await whisper.transcribe(request.task.audio, {
59
- language: request.task.language,
60
- n_threads: 16,
61
- no_timestamps: false,
62
- speed_up: true,
63
- suppress_non_speech_tokens: true,
64
- suppress_blank: true,
65
- debug_mode: false,
66
- print_special: false,
67
- print_progress: false,
68
- print_realtime: false,
69
- print_timestamps: false
70
- });
71
- task.on("transcribed", (result) => {
72
- console.log("TRANSCRIBED", JSON.stringify(result));
73
- });
74
- const result = await task.result;
75
- log(`${request.task.type} transcription task ${request.task.id}": END`);
76
- console.log("RESULT", result);
77
- const text = result[0].text;
78
- const taskResponse = {
79
- type: request.task.type,
80
- id: request.task.id,
81
- language: request.task.language,
82
- text: text ?? ""
83
- };
84
- response = { type: "task-response", task: taskResponse };
85
- }
86
- else if (request.type === "close") {
87
- /* shutdown Whisper */
88
- if (whisper !== null) {
89
- log("unloading Whisper model: BEGIN");
90
- await whisper.free();
91
- whisper = null;
92
- log("unloading Whisper model: END");
93
- }
94
- }
95
- if (response !== null)
96
- node_worker_threads_1.default.parentPort.postMessage(response);
97
- });
@@ -1 +0,0 @@
1
- export {};
@@ -1,131 +0,0 @@
1
- "use strict";
2
- /*
3
- ** SpeechFlow - Speech Processing Flow Graph
4
- ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
5
- ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
6
- */
7
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
8
- if (k2 === undefined) k2 = k;
9
- var desc = Object.getOwnPropertyDescriptor(m, k);
10
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
11
- desc = { enumerable: true, get: function() { return m[k]; } };
12
- }
13
- Object.defineProperty(o, k2, desc);
14
- }) : (function(o, m, k, k2) {
15
- if (k2 === undefined) k2 = k;
16
- o[k2] = m[k];
17
- }));
18
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
19
- Object.defineProperty(o, "default", { enumerable: true, value: v });
20
- }) : function(o, v) {
21
- o["default"] = v;
22
- });
23
- var __importStar = (this && this.__importStar) || (function () {
24
- var ownKeys = function(o) {
25
- ownKeys = Object.getOwnPropertyNames || function (o) {
26
- var ar = [];
27
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
28
- return ar;
29
- };
30
- return ownKeys(o);
31
- };
32
- return function (mod) {
33
- if (mod && mod.__esModule) return mod;
34
- var result = {};
35
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
36
- __setModuleDefault(result, mod);
37
- return result;
38
- };
39
- })();
40
- var __importDefault = (this && this.__importDefault) || function (mod) {
41
- return (mod && mod.__esModule) ? mod : { "default": mod };
42
- };
43
- Object.defineProperty(exports, "__esModule", { value: true });
44
- /* standard dependencies */
45
- const node_path_1 = __importDefault(require("node:path"));
46
- const node_worker_threads_1 = __importDefault(require("node:worker_threads"));
47
- /* external dependencies */
48
- const Transformers = __importStar(require("@huggingface/transformers"));
49
- /* utility function for sending a log message */
50
- const log = (message) => node_worker_threads_1.default.parentPort.postMessage({ type: "log", message });
51
- /* internal state */
52
- let transcriber = null;
53
- /* OpenAI Whisper models (ONNX variants) */
54
- const models = {
55
- "v1-tiny": { model: "onnx-community/whisper-tiny-ONNX" },
56
- "v1-base": { model: "onnx-community/whisper-base" },
57
- "v1-small": { model: "onnx-community/whisper-small" },
58
- "v1-medium": { model: "onnx-community/whisper-medium-ONNX" },
59
- "v2-large": { model: "reach-vb/whisper-large-v2-onnx" },
60
- "v3-large": { model: "onnx-community/whisper-large-v3-ONNX" },
61
- "v3-large-turbo": { model: "onnx-community/whisper-large-v3-turbo" }
62
- };
63
- /* thread communication hook */
64
- node_worker_threads_1.default.parentPort?.on("message", async (request) => {
65
- let response = null;
66
- if (request.type === "open") {
67
- /* initialize Whisper */
68
- const model = models[request.model]?.model;
69
- if (!model)
70
- response = { type: "error", message: `unknown Whisper model "${request.model}"` };
71
- else {
72
- log(`loading Whisper model "${request.model}": BEGIN`);
73
- transcriber = await Transformers.pipeline("automatic-speech-recognition", model, {
74
- cache_dir: node_path_1.default.join(request.cacheDir, "whisper"),
75
- dtype: "q4",
76
- device: "gpu"
77
- });
78
- if (transcriber === null) {
79
- log(`loading Whisper model "${request.model}": FAILED`);
80
- response = { type: "error", message: "failed to open Whisper" };
81
- }
82
- else {
83
- log(`loading Whisper model "${request.model}": SUCCESS`);
84
- response = { type: "ok" };
85
- }
86
- }
87
- }
88
- else if (request.type === "task-request") {
89
- /* perform a speech-to-text transcription with Whisper */
90
- /*
91
- const streamer = new Transformers.TextStreamer(transcriber!.tokenizer, {
92
- skip_prompt: true,
93
- callback_function: (text) => {
94
- console.log("TEXT", text)
95
- }
96
- })
97
- */
98
- log(`${request.task.type} transcription task ${request.task.id}": START`);
99
- const result = await transcriber(request.task.audio, {
100
- chunk_length_s: 3,
101
- stride_length_s: 1,
102
- language: request.task.language,
103
- task: "transcribe",
104
- force_full_sequences: false,
105
- use_cache: true,
106
- return_timestamps: true,
107
- // streamer
108
- });
109
- log(`${request.task.type} transcription task ${request.task.id}": END`);
110
- console.log("RESULT", JSON.stringify(result));
111
- const text = Array.isArray(result) ? result[0].text : result.text;
112
- const taskResponse = {
113
- type: request.task.type,
114
- id: request.task.id,
115
- language: request.task.language,
116
- text: text ?? ""
117
- };
118
- response = { type: "task-response", task: taskResponse };
119
- }
120
- else if (request.type === "close") {
121
- /* shutdown Whisper */
122
- if (transcriber !== null) {
123
- log("unloading Whisper model: BEGIN");
124
- await transcriber.dispose();
125
- transcriber = null;
126
- log("unloading Whisper model: END");
127
- }
128
- }
129
- if (response !== null)
130
- node_worker_threads_1.default.parentPort.postMessage(response);
131
- });
@@ -1 +0,0 @@
1
- export {};
@@ -1,97 +0,0 @@
1
- "use strict";
2
- /*
3
- ** SpeechFlow - Speech Processing Flow Graph
4
- ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
5
- ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
6
- */
7
- var __importDefault = (this && this.__importDefault) || function (mod) {
8
- return (mod && mod.__esModule) ? mod : { "default": mod };
9
- };
10
- Object.defineProperty(exports, "__esModule", { value: true });
11
- /* standard dependencies */
12
- const node_worker_threads_1 = __importDefault(require("node:worker_threads"));
13
- /* external dependencies */
14
- const smart_whisper_1 = require("smart-whisper");
15
- /* utility function for sending a log message */
16
- const log = (message) => node_worker_threads_1.default.parentPort.postMessage({ type: "log", message });
17
- /* internal state */
18
- let whisper = null;
19
- /* OpenAI Whisper models (GGML variants for Whisper.cpp) */
20
- const models = {
21
- "v1-tiny": { model: "tiny" },
22
- "v1-base": { model: "base" },
23
- "v1-small": { model: "small" },
24
- "v1-medium": { model: "medium" },
25
- "v2-large": { model: "large-v2" },
26
- "v3-large": { model: "large-v3" },
27
- "v3-large-turbo": { model: "large-v3-turbo" }
28
- };
29
- /* thread communication hook */
30
- node_worker_threads_1.default.parentPort?.on("message", async (request) => {
31
- let response = null;
32
- if (request.type === "open") {
33
- /* initialize Whisper */
34
- const model = models[request.model]?.model;
35
- if (!model)
36
- response = { type: "error", message: `unknown Whisper model "${request.model}"` };
37
- else {
38
- log(`loading Whisper model "${request.model}": BEGIN`);
39
- const name = await smart_whisper_1.manager.download(model);
40
- const resolved = smart_whisper_1.manager.resolve(name);
41
- whisper = new smart_whisper_1.Whisper(resolved, {
42
- gpu: true,
43
- offload: 120 * 60
44
- });
45
- if (whisper === null) {
46
- log(`loading Whisper model "${request.model}": FAILED`);
47
- response = { type: "error", message: "failed to open Whisper" };
48
- }
49
- else {
50
- await whisper.load();
51
- log(`loading Whisper model "${request.model}": SUCCESS`);
52
- response = { type: "ok" };
53
- }
54
- }
55
- }
56
- else if (request.type === "task-request") {
57
- log(`${request.task.type} transcription task ${request.task.id}": START`);
58
- const task = await whisper.transcribe(request.task.audio, {
59
- language: request.task.language,
60
- n_threads: 16,
61
- no_timestamps: false,
62
- speed_up: true,
63
- suppress_non_speech_tokens: true,
64
- suppress_blank: true,
65
- debug_mode: false,
66
- print_special: false,
67
- print_progress: false,
68
- print_realtime: false,
69
- print_timestamps: false
70
- });
71
- task.on("transcribed", (result) => {
72
- console.log("TRANSCRIBED", JSON.stringify(result));
73
- });
74
- const result = await task.result;
75
- log(`${request.task.type} transcription task ${request.task.id}": END`);
76
- console.log("RESULT", result);
77
- const text = result[0].text;
78
- const taskResponse = {
79
- type: request.task.type,
80
- id: request.task.id,
81
- language: request.task.language,
82
- text: text ?? ""
83
- };
84
- response = { type: "task-response", task: taskResponse };
85
- }
86
- else if (request.type === "close") {
87
- /* shutdown Whisper */
88
- if (whisper !== null) {
89
- log("unloading Whisper model: BEGIN");
90
- await whisper.free();
91
- whisper = null;
92
- log("unloading Whisper model: END");
93
- }
94
- }
95
- if (response !== null)
96
- node_worker_threads_1.default.parentPort.postMessage(response);
97
- });
@@ -1 +0,0 @@
1
- export {};