speechflow 0.9.8 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/CHANGELOG.md +18 -0
  2. package/LICENSE.txt +674 -0
  3. package/README.md +114 -17
  4. package/dst/speechflow-node-a2a-ffmpeg.js +1 -0
  5. package/dst/speechflow-node-a2a-ffmpeg.js.map +1 -0
  6. package/dst/{speechflow-node-deepl.d.ts → speechflow-node-a2a-meter.d.ts} +2 -2
  7. package/dst/speechflow-node-a2a-meter.js +147 -0
  8. package/dst/speechflow-node-a2a-meter.js.map +1 -0
  9. package/dst/speechflow-node-a2a-mute.d.ts +16 -0
  10. package/dst/speechflow-node-a2a-mute.js +90 -0
  11. package/dst/speechflow-node-a2a-mute.js.map +1 -0
  12. package/dst/{speechflow-node-whisper.d.ts → speechflow-node-a2a-vad.d.ts} +2 -5
  13. package/dst/speechflow-node-a2a-vad.js +272 -0
  14. package/dst/speechflow-node-a2a-vad.js.map +1 -0
  15. package/dst/speechflow-node-a2a-wav.js +1 -0
  16. package/dst/speechflow-node-a2a-wav.js.map +1 -0
  17. package/dst/speechflow-node-a2t-deepgram.js +2 -1
  18. package/dst/speechflow-node-a2t-deepgram.js.map +1 -0
  19. package/dst/speechflow-node-t2a-elevenlabs.js +1 -0
  20. package/dst/speechflow-node-t2a-elevenlabs.js.map +1 -0
  21. package/dst/{speechflow-node-elevenlabs.d.ts → speechflow-node-t2a-kokoro.d.ts} +2 -2
  22. package/dst/speechflow-node-t2a-kokoro.js +148 -0
  23. package/dst/speechflow-node-t2a-kokoro.js.map +1 -0
  24. package/dst/speechflow-node-t2t-deepl.js +1 -0
  25. package/dst/speechflow-node-t2t-deepl.js.map +1 -0
  26. package/dst/speechflow-node-t2t-format.js +1 -0
  27. package/dst/speechflow-node-t2t-format.js.map +1 -0
  28. package/dst/{speechflow-node-gemma.d.ts → speechflow-node-t2t-ollama.d.ts} +1 -1
  29. package/dst/{speechflow-node-gemma.js → speechflow-node-t2t-ollama.js} +41 -8
  30. package/dst/speechflow-node-t2t-ollama.js.map +1 -0
  31. package/dst/{speechflow-node-t2t-gemma.d.ts → speechflow-node-t2t-openai.d.ts} +2 -2
  32. package/dst/{speechflow-node-t2t-gemma.js → speechflow-node-t2t-openai.js} +43 -30
  33. package/dst/speechflow-node-t2t-openai.js.map +1 -0
  34. package/dst/speechflow-node-t2t-subtitle.js +1 -0
  35. package/dst/speechflow-node-t2t-subtitle.js.map +1 -0
  36. package/dst/{speechflow-node-opus.d.ts → speechflow-node-t2t-transformers.d.ts} +3 -1
  37. package/dst/speechflow-node-t2t-transformers.js +264 -0
  38. package/dst/speechflow-node-t2t-transformers.js.map +1 -0
  39. package/dst/speechflow-node-x2x-trace.js +3 -2
  40. package/dst/speechflow-node-x2x-trace.js.map +1 -0
  41. package/dst/speechflow-node-xio-device.js +1 -0
  42. package/dst/speechflow-node-xio-device.js.map +1 -0
  43. package/dst/speechflow-node-xio-file.js +1 -0
  44. package/dst/speechflow-node-xio-file.js.map +1 -0
  45. package/dst/speechflow-node-xio-mqtt.js +1 -0
  46. package/dst/speechflow-node-xio-mqtt.js.map +1 -0
  47. package/dst/speechflow-node-xio-websocket.js +1 -0
  48. package/dst/speechflow-node-xio-websocket.js.map +1 -0
  49. package/dst/speechflow-node.d.ts +3 -0
  50. package/dst/speechflow-node.js +10 -0
  51. package/dst/speechflow-node.js.map +1 -0
  52. package/dst/speechflow-utils.d.ts +33 -0
  53. package/dst/speechflow-utils.js +183 -1
  54. package/dst/speechflow-utils.js.map +1 -0
  55. package/dst/speechflow.js +295 -46
  56. package/dst/speechflow.js.map +1 -0
  57. package/etc/speechflow.yaml +14 -5
  58. package/etc/stx.conf +1 -1
  59. package/etc/tsconfig.json +2 -2
  60. package/package.json +17 -10
  61. package/src/speechflow-node-a2a-meter.ts +125 -0
  62. package/src/speechflow-node-a2a-mute.ts +101 -0
  63. package/src/speechflow-node-a2a-vad.ts +266 -0
  64. package/src/speechflow-node-a2t-deepgram.ts +1 -1
  65. package/src/speechflow-node-t2a-kokoro.ts +160 -0
  66. package/src/{speechflow-node-t2t-gemma.ts → speechflow-node-t2t-ollama.ts} +44 -10
  67. package/src/speechflow-node-t2t-openai.ts +246 -0
  68. package/src/speechflow-node-t2t-transformers.ts +249 -0
  69. package/src/speechflow-node-x2x-trace.ts +2 -2
  70. package/src/speechflow-node-xio-websocket.ts +5 -5
  71. package/src/speechflow-node.ts +12 -0
  72. package/src/speechflow-utils.ts +195 -0
  73. package/src/speechflow.ts +279 -46
  74. package/dst/speechflow-node-deepgram.d.ts +0 -12
  75. package/dst/speechflow-node-deepgram.js +0 -220
  76. package/dst/speechflow-node-deepl.js +0 -128
  77. package/dst/speechflow-node-device.d.ts +0 -13
  78. package/dst/speechflow-node-device.js +0 -205
  79. package/dst/speechflow-node-elevenlabs.js +0 -182
  80. package/dst/speechflow-node-ffmpeg.d.ts +0 -13
  81. package/dst/speechflow-node-ffmpeg.js +0 -152
  82. package/dst/speechflow-node-file.d.ts +0 -11
  83. package/dst/speechflow-node-file.js +0 -176
  84. package/dst/speechflow-node-format.d.ts +0 -11
  85. package/dst/speechflow-node-format.js +0 -80
  86. package/dst/speechflow-node-mqtt.d.ts +0 -13
  87. package/dst/speechflow-node-mqtt.js +0 -181
  88. package/dst/speechflow-node-opus.js +0 -135
  89. package/dst/speechflow-node-subtitle.d.ts +0 -12
  90. package/dst/speechflow-node-subtitle.js +0 -96
  91. package/dst/speechflow-node-t2t-opus.d.ts +0 -12
  92. package/dst/speechflow-node-t2t-opus.js +0 -135
  93. package/dst/speechflow-node-trace.d.ts +0 -11
  94. package/dst/speechflow-node-trace.js +0 -88
  95. package/dst/speechflow-node-wav.d.ts +0 -11
  96. package/dst/speechflow-node-wav.js +0 -170
  97. package/dst/speechflow-node-websocket.d.ts +0 -13
  98. package/dst/speechflow-node-websocket.js +0 -275
  99. package/dst/speechflow-node-whisper-common.d.ts +0 -34
  100. package/dst/speechflow-node-whisper-common.js +0 -7
  101. package/dst/speechflow-node-whisper-ggml.d.ts +0 -1
  102. package/dst/speechflow-node-whisper-ggml.js +0 -97
  103. package/dst/speechflow-node-whisper-onnx.d.ts +0 -1
  104. package/dst/speechflow-node-whisper-onnx.js +0 -131
  105. package/dst/speechflow-node-whisper-worker-ggml.d.ts +0 -1
  106. package/dst/speechflow-node-whisper-worker-ggml.js +0 -97
  107. package/dst/speechflow-node-whisper-worker-onnx.d.ts +0 -1
  108. package/dst/speechflow-node-whisper-worker-onnx.js +0 -131
  109. package/dst/speechflow-node-whisper-worker.d.ts +0 -1
  110. package/dst/speechflow-node-whisper-worker.js +0 -116
  111. package/dst/speechflow-node-whisper-worker2.d.ts +0 -1
  112. package/dst/speechflow-node-whisper-worker2.js +0 -82
  113. package/dst/speechflow-node-whisper.js +0 -604
  114. package/src/speechflow-node-t2t-opus.ts +0 -111
@@ -1,131 +0,0 @@
1
- "use strict";
2
- /*
3
- ** SpeechFlow - Speech Processing Flow Graph
4
- ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
5
- ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
6
- */
7
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
8
- if (k2 === undefined) k2 = k;
9
- var desc = Object.getOwnPropertyDescriptor(m, k);
10
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
11
- desc = { enumerable: true, get: function() { return m[k]; } };
12
- }
13
- Object.defineProperty(o, k2, desc);
14
- }) : (function(o, m, k, k2) {
15
- if (k2 === undefined) k2 = k;
16
- o[k2] = m[k];
17
- }));
18
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
19
- Object.defineProperty(o, "default", { enumerable: true, value: v });
20
- }) : function(o, v) {
21
- o["default"] = v;
22
- });
23
- var __importStar = (this && this.__importStar) || (function () {
24
- var ownKeys = function(o) {
25
- ownKeys = Object.getOwnPropertyNames || function (o) {
26
- var ar = [];
27
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
28
- return ar;
29
- };
30
- return ownKeys(o);
31
- };
32
- return function (mod) {
33
- if (mod && mod.__esModule) return mod;
34
- var result = {};
35
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
36
- __setModuleDefault(result, mod);
37
- return result;
38
- };
39
- })();
40
- var __importDefault = (this && this.__importDefault) || function (mod) {
41
- return (mod && mod.__esModule) ? mod : { "default": mod };
42
- };
43
- Object.defineProperty(exports, "__esModule", { value: true });
44
- /* standard dependencies */
45
- const node_path_1 = __importDefault(require("node:path"));
46
- const node_worker_threads_1 = __importDefault(require("node:worker_threads"));
47
- /* external dependencies */
48
- const Transformers = __importStar(require("@huggingface/transformers"));
49
- /* utility function for sending a log message */
50
- const log = (message) => node_worker_threads_1.default.parentPort.postMessage({ type: "log", message });
51
- /* internal state */
52
- let transcriber = null;
53
- /* OpenAI Whisper models (ONNX variants) */
54
- const models = {
55
- "v1-tiny": { model: "onnx-community/whisper-tiny-ONNX" },
56
- "v1-base": { model: "onnx-community/whisper-base" },
57
- "v1-small": { model: "onnx-community/whisper-small" },
58
- "v1-medium": { model: "onnx-community/whisper-medium-ONNX" },
59
- "v2-large": { model: "reach-vb/whisper-large-v2-onnx" },
60
- "v3-large": { model: "onnx-community/whisper-large-v3-ONNX" },
61
- "v3-large-turbo": { model: "onnx-community/whisper-large-v3-turbo" }
62
- };
63
- /* thread communication hook */
64
- node_worker_threads_1.default.parentPort?.on("message", async (request) => {
65
- let response = null;
66
- if (request.type === "open") {
67
- /* initialize Whisper */
68
- const model = models[request.model]?.model;
69
- if (!model)
70
- response = { type: "error", message: `unknown Whisper model "${request.model}"` };
71
- else {
72
- log(`loading Whisper model "${request.model}": BEGIN`);
73
- transcriber = await Transformers.pipeline("automatic-speech-recognition", model, {
74
- cache_dir: node_path_1.default.join(request.cacheDir, "whisper"),
75
- dtype: "q4",
76
- device: "gpu"
77
- });
78
- if (transcriber === null) {
79
- log(`loading Whisper model "${request.model}": FAILED`);
80
- response = { type: "error", message: "failed to open Whisper" };
81
- }
82
- else {
83
- log(`loading Whisper model "${request.model}": SUCCESS`);
84
- response = { type: "ok" };
85
- }
86
- }
87
- }
88
- else if (request.type === "task-request") {
89
- /* perform a speech-to-text transcription with Whisper */
90
- /*
91
- const streamer = new Transformers.TextStreamer(transcriber!.tokenizer, {
92
- skip_prompt: true,
93
- callback_function: (text) => {
94
- console.log("TEXT", text)
95
- }
96
- })
97
- */
98
- log(`${request.task.type} transcription task ${request.task.id}": START`);
99
- const result = await transcriber(request.task.audio, {
100
- chunk_length_s: 3,
101
- stride_length_s: 1,
102
- language: request.task.language,
103
- task: "transcribe",
104
- force_full_sequences: false,
105
- use_cache: true,
106
- return_timestamps: true,
107
- // streamer
108
- });
109
- log(`${request.task.type} transcription task ${request.task.id}": END`);
110
- console.log("RESULT", JSON.stringify(result));
111
- const text = Array.isArray(result) ? result[0].text : result.text;
112
- const taskResponse = {
113
- type: request.task.type,
114
- id: request.task.id,
115
- language: request.task.language,
116
- text: text ?? ""
117
- };
118
- response = { type: "task-response", task: taskResponse };
119
- }
120
- else if (request.type === "close") {
121
- /* shutdown Whisper */
122
- if (transcriber !== null) {
123
- log("unloading Whisper model: BEGIN");
124
- await transcriber.dispose();
125
- transcriber = null;
126
- log("unloading Whisper model: END");
127
- }
128
- }
129
- if (response !== null)
130
- node_worker_threads_1.default.parentPort.postMessage(response);
131
- });
@@ -1 +0,0 @@
1
- export {};
@@ -1,97 +0,0 @@
1
- "use strict";
2
- /*
3
- ** SpeechFlow - Speech Processing Flow Graph
4
- ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
5
- ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
6
- */
7
- var __importDefault = (this && this.__importDefault) || function (mod) {
8
- return (mod && mod.__esModule) ? mod : { "default": mod };
9
- };
10
- Object.defineProperty(exports, "__esModule", { value: true });
11
- /* standard dependencies */
12
- const node_worker_threads_1 = __importDefault(require("node:worker_threads"));
13
- /* external dependencies */
14
- const smart_whisper_1 = require("smart-whisper");
15
- /* utility function for sending a log message */
16
- const log = (message) => node_worker_threads_1.default.parentPort.postMessage({ type: "log", message });
17
- /* internal state */
18
- let whisper = null;
19
- /* OpenAI Whisper models (GGML variants for Whisper.cpp) */
20
- const models = {
21
- "v1-tiny": { model: "tiny" },
22
- "v1-base": { model: "base" },
23
- "v1-small": { model: "small" },
24
- "v1-medium": { model: "medium" },
25
- "v2-large": { model: "large-v2" },
26
- "v3-large": { model: "large-v3" },
27
- "v3-large-turbo": { model: "large-v3-turbo" }
28
- };
29
- /* thread communication hook */
30
- node_worker_threads_1.default.parentPort?.on("message", async (request) => {
31
- let response = null;
32
- if (request.type === "open") {
33
- /* initialize Whisper */
34
- const model = models[request.model]?.model;
35
- if (!model)
36
- response = { type: "error", message: `unknown Whisper model "${request.model}"` };
37
- else {
38
- log(`loading Whisper model "${request.model}": BEGIN`);
39
- const name = await smart_whisper_1.manager.download(model);
40
- const resolved = smart_whisper_1.manager.resolve(name);
41
- whisper = new smart_whisper_1.Whisper(resolved, {
42
- gpu: true,
43
- offload: 120 * 60
44
- });
45
- if (whisper === null) {
46
- log(`loading Whisper model "${request.model}": FAILED`);
47
- response = { type: "error", message: "failed to open Whisper" };
48
- }
49
- else {
50
- await whisper.load();
51
- log(`loading Whisper model "${request.model}": SUCCESS`);
52
- response = { type: "ok" };
53
- }
54
- }
55
- }
56
- else if (request.type === "task-request") {
57
- log(`${request.task.type} transcription task ${request.task.id}": START`);
58
- const task = await whisper.transcribe(request.task.audio, {
59
- language: request.task.language,
60
- n_threads: 16,
61
- no_timestamps: false,
62
- speed_up: true,
63
- suppress_non_speech_tokens: true,
64
- suppress_blank: true,
65
- debug_mode: false,
66
- print_special: false,
67
- print_progress: false,
68
- print_realtime: false,
69
- print_timestamps: false
70
- });
71
- task.on("transcribed", (result) => {
72
- console.log("TRANSCRIBED", JSON.stringify(result));
73
- });
74
- const result = await task.result;
75
- log(`${request.task.type} transcription task ${request.task.id}": END`);
76
- console.log("RESULT", result);
77
- const text = result[0].text;
78
- const taskResponse = {
79
- type: request.task.type,
80
- id: request.task.id,
81
- language: request.task.language,
82
- text: text ?? ""
83
- };
84
- response = { type: "task-response", task: taskResponse };
85
- }
86
- else if (request.type === "close") {
87
- /* shutdown Whisper */
88
- if (whisper !== null) {
89
- log("unloading Whisper model: BEGIN");
90
- await whisper.free();
91
- whisper = null;
92
- log("unloading Whisper model: END");
93
- }
94
- }
95
- if (response !== null)
96
- node_worker_threads_1.default.parentPort.postMessage(response);
97
- });
@@ -1 +0,0 @@
1
- export {};
@@ -1,131 +0,0 @@
1
- "use strict";
2
- /*
3
- ** SpeechFlow - Speech Processing Flow Graph
4
- ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
5
- ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
6
- */
7
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
8
- if (k2 === undefined) k2 = k;
9
- var desc = Object.getOwnPropertyDescriptor(m, k);
10
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
11
- desc = { enumerable: true, get: function() { return m[k]; } };
12
- }
13
- Object.defineProperty(o, k2, desc);
14
- }) : (function(o, m, k, k2) {
15
- if (k2 === undefined) k2 = k;
16
- o[k2] = m[k];
17
- }));
18
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
19
- Object.defineProperty(o, "default", { enumerable: true, value: v });
20
- }) : function(o, v) {
21
- o["default"] = v;
22
- });
23
- var __importStar = (this && this.__importStar) || (function () {
24
- var ownKeys = function(o) {
25
- ownKeys = Object.getOwnPropertyNames || function (o) {
26
- var ar = [];
27
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
28
- return ar;
29
- };
30
- return ownKeys(o);
31
- };
32
- return function (mod) {
33
- if (mod && mod.__esModule) return mod;
34
- var result = {};
35
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
36
- __setModuleDefault(result, mod);
37
- return result;
38
- };
39
- })();
40
- var __importDefault = (this && this.__importDefault) || function (mod) {
41
- return (mod && mod.__esModule) ? mod : { "default": mod };
42
- };
43
- Object.defineProperty(exports, "__esModule", { value: true });
44
- /* standard dependencies */
45
- const node_path_1 = __importDefault(require("node:path"));
46
- const node_worker_threads_1 = __importDefault(require("node:worker_threads"));
47
- /* external dependencies */
48
- const Transformers = __importStar(require("@huggingface/transformers"));
49
- /* utility function for sending a log message */
50
- const log = (message) => node_worker_threads_1.default.parentPort.postMessage({ type: "log", message });
51
- /* internal state */
52
- let transcriber = null;
53
- /* OpenAI Whisper models (ONNX variants) */
54
- const models = {
55
- "v1-tiny": { model: "onnx-community/whisper-tiny-ONNX" },
56
- "v1-base": { model: "onnx-community/whisper-base" },
57
- "v1-small": { model: "onnx-community/whisper-small" },
58
- "v1-medium": { model: "onnx-community/whisper-medium-ONNX" },
59
- "v2-large": { model: "reach-vb/whisper-large-v2-onnx" },
60
- "v3-large": { model: "onnx-community/whisper-large-v3-ONNX" },
61
- "v3-large-turbo": { model: "onnx-community/whisper-large-v3-turbo" }
62
- };
63
- /* thread communication hook */
64
- node_worker_threads_1.default.parentPort?.on("message", async (request) => {
65
- let response = null;
66
- if (request.type === "open") {
67
- /* initialize Whisper */
68
- const model = models[request.model]?.model;
69
- if (!model)
70
- response = { type: "error", message: `unknown Whisper model "${request.model}"` };
71
- else {
72
- log(`loading Whisper model "${request.model}": BEGIN`);
73
- transcriber = await Transformers.pipeline("automatic-speech-recognition", model, {
74
- cache_dir: node_path_1.default.join(request.cacheDir, "whisper"),
75
- dtype: "q4",
76
- device: "gpu"
77
- });
78
- if (transcriber === null) {
79
- log(`loading Whisper model "${request.model}": FAILED`);
80
- response = { type: "error", message: "failed to open Whisper" };
81
- }
82
- else {
83
- log(`loading Whisper model "${request.model}": SUCCESS`);
84
- response = { type: "ok" };
85
- }
86
- }
87
- }
88
- else if (request.type === "task-request") {
89
- /* perform a speech-to-text transcription with Whisper */
90
- /*
91
- const streamer = new Transformers.TextStreamer(transcriber!.tokenizer, {
92
- skip_prompt: true,
93
- callback_function: (text) => {
94
- console.log("TEXT", text)
95
- }
96
- })
97
- */
98
- log(`${request.task.type} transcription task ${request.task.id}": START`);
99
- const result = await transcriber(request.task.audio, {
100
- chunk_length_s: 3,
101
- stride_length_s: 1,
102
- language: request.task.language,
103
- task: "transcribe",
104
- force_full_sequences: false,
105
- use_cache: true,
106
- return_timestamps: true,
107
- // streamer
108
- });
109
- log(`${request.task.type} transcription task ${request.task.id}": END`);
110
- console.log("RESULT", JSON.stringify(result));
111
- const text = Array.isArray(result) ? result[0].text : result.text;
112
- const taskResponse = {
113
- type: request.task.type,
114
- id: request.task.id,
115
- language: request.task.language,
116
- text: text ?? ""
117
- };
118
- response = { type: "task-response", task: taskResponse };
119
- }
120
- else if (request.type === "close") {
121
- /* shutdown Whisper */
122
- if (transcriber !== null) {
123
- log("unloading Whisper model: BEGIN");
124
- await transcriber.dispose();
125
- transcriber = null;
126
- log("unloading Whisper model: END");
127
- }
128
- }
129
- if (response !== null)
130
- node_worker_threads_1.default.parentPort.postMessage(response);
131
- });
@@ -1 +0,0 @@
1
- export {};
@@ -1,116 +0,0 @@
1
- "use strict";
2
- /*
3
- ** SpeechFlow - Speech Processing Flow Graph
4
- ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
5
- ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
6
- */
7
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
8
- if (k2 === undefined) k2 = k;
9
- var desc = Object.getOwnPropertyDescriptor(m, k);
10
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
11
- desc = { enumerable: true, get: function() { return m[k]; } };
12
- }
13
- Object.defineProperty(o, k2, desc);
14
- }) : (function(o, m, k, k2) {
15
- if (k2 === undefined) k2 = k;
16
- o[k2] = m[k];
17
- }));
18
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
19
- Object.defineProperty(o, "default", { enumerable: true, value: v });
20
- }) : function(o, v) {
21
- o["default"] = v;
22
- });
23
- var __importStar = (this && this.__importStar) || (function () {
24
- var ownKeys = function(o) {
25
- ownKeys = Object.getOwnPropertyNames || function (o) {
26
- var ar = [];
27
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
28
- return ar;
29
- };
30
- return ownKeys(o);
31
- };
32
- return function (mod) {
33
- if (mod && mod.__esModule) return mod;
34
- var result = {};
35
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
36
- __setModuleDefault(result, mod);
37
- return result;
38
- };
39
- })();
40
- var __importDefault = (this && this.__importDefault) || function (mod) {
41
- return (mod && mod.__esModule) ? mod : { "default": mod };
42
- };
43
- Object.defineProperty(exports, "__esModule", { value: true });
44
- /* standard dependencies */
45
- const node_path_1 = __importDefault(require("node:path"));
46
- const node_worker_threads_1 = __importDefault(require("node:worker_threads"));
47
- /* external dependencies */
48
- const Transformers = __importStar(require("@huggingface/transformers"));
49
- /* utility function for sending a log message */
50
- const log = (message) => node_worker_threads_1.default.parentPort.postMessage({ type: "log", message });
51
- /* internal state */
52
- let transcriber = null;
53
- /* thread communication hook */
54
- node_worker_threads_1.default.parentPort?.on("message", async (request) => {
55
- let response = null;
56
- if (request.type === "open") {
57
- /* initialize Whisper */
58
- log(`loading Whisper model "${request.model}": BEGIN`);
59
- transcriber = await Transformers.pipeline("automatic-speech-recognition", request.model, {
60
- cache_dir: node_path_1.default.join(request.cacheDir, "whisper"),
61
- dtype: "q4",
62
- device: "gpu"
63
- });
64
- if (transcriber === null) {
65
- log(`loading Whisper model "${request.model}": FAILED`);
66
- response = { type: "error", message: "failed to open Whisper" };
67
- }
68
- else {
69
- log(`loading Whisper model "${request.model}": SUCCESS`);
70
- response = { type: "ok" };
71
- }
72
- }
73
- else if (request.type === "task-request") {
74
- /* perform a speech-to-text transcription with Whisper */
75
- /*
76
- const streamer = new Transformers.TextStreamer(transcriber!.tokenizer, {
77
- skip_prompt: true,
78
- callback_function: (text) => {
79
- console.log("TEXT", text)
80
- }
81
- })
82
- */
83
- log(`${request.task.type} transcription task ${request.task.id}": START`);
84
- const result = await transcriber(request.task.audio, {
85
- chunk_length_s: 3,
86
- stride_length_s: 1,
87
- language: request.task.language,
88
- task: "transcribe",
89
- force_full_sequences: false,
90
- use_cache: true,
91
- return_timestamps: true,
92
- // streamer
93
- });
94
- log(`${request.task.type} transcription task ${request.task.id}": END`);
95
- console.log("RESULT", JSON.stringify(result));
96
- const text = Array.isArray(result) ? result[0].text : result.text;
97
- const taskResponse = {
98
- type: request.task.type,
99
- id: request.task.id,
100
- language: request.task.language,
101
- text: text ?? ""
102
- };
103
- response = { type: "task-response", task: taskResponse };
104
- }
105
- else if (request.type === "close") {
106
- /* shutdown Whisper */
107
- if (transcriber !== null) {
108
- log("unloading Whisper model: BEGIN");
109
- await transcriber.dispose();
110
- transcriber = null;
111
- log("unloading Whisper model: END");
112
- }
113
- }
114
- if (response !== null)
115
- node_worker_threads_1.default.parentPort.postMessage(response);
116
- });
@@ -1 +0,0 @@
1
- export {};
@@ -1,82 +0,0 @@
1
- "use strict";
2
- /*
3
- ** SpeechFlow - Speech Processing Flow Graph
4
- ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
5
- ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
6
- */
7
- var __importDefault = (this && this.__importDefault) || function (mod) {
8
- return (mod && mod.__esModule) ? mod : { "default": mod };
9
- };
10
- Object.defineProperty(exports, "__esModule", { value: true });
11
- /* standard dependencies */
12
- const node_worker_threads_1 = __importDefault(require("node:worker_threads"));
13
- /* external dependencies */
14
- const smart_whisper_1 = require("smart-whisper");
15
- /* utility function for sending a log message */
16
- const log = (message) => node_worker_threads_1.default.parentPort.postMessage({ type: "log", message });
17
- /* internal state */
18
- let whisper = null;
19
- /* thread communication hook */
20
- node_worker_threads_1.default.parentPort?.on("message", async (request) => {
21
- let response = null;
22
- if (request.type === "open") {
23
- /* initialize Whisper */
24
- log(`loading Whisper model "${request.model}": BEGIN`);
25
- const name = await smart_whisper_1.manager.download(request.model);
26
- const resolved = smart_whisper_1.manager.resolve(name);
27
- whisper = new smart_whisper_1.Whisper(resolved, {
28
- gpu: true,
29
- offload: 120 * 60
30
- });
31
- if (whisper === null) {
32
- log(`loading Whisper model "${request.model}": FAILED`);
33
- response = { type: "error", message: "failed to open Whisper" };
34
- }
35
- else {
36
- await whisper.load();
37
- log(`loading Whisper model "${request.model}": SUCCESS`);
38
- response = { type: "ok" };
39
- }
40
- }
41
- else if (request.type === "task-request") {
42
- log(`${request.task.type} transcription task ${request.task.id}": START`);
43
- const task = await whisper.transcribe(request.task.audio, {
44
- language: request.task.language,
45
- n_threads: 16,
46
- no_timestamps: false,
47
- speed_up: true,
48
- suppress_non_speech_tokens: true,
49
- suppress_blank: true,
50
- debug_mode: false,
51
- print_special: false,
52
- print_progress: false,
53
- print_realtime: false,
54
- print_timestamps: false
55
- });
56
- task.on("transcribed", (result) => {
57
- console.log("TRANSCRIBED", JSON.stringify(result));
58
- });
59
- const result = await task.result;
60
- log(`${request.task.type} transcription task ${request.task.id}": END`);
61
- console.log("RESULT", result);
62
- const text = result[0].text;
63
- const taskResponse = {
64
- type: request.task.type,
65
- id: request.task.id,
66
- language: request.task.language,
67
- text: text ?? ""
68
- };
69
- response = { type: "task-response", task: taskResponse };
70
- }
71
- else if (request.type === "close") {
72
- /* shutdown Whisper */
73
- if (whisper !== null) {
74
- log("unloading Whisper model: BEGIN");
75
- await whisper.free();
76
- whisper = null;
77
- log("unloading Whisper model: END");
78
- }
79
- }
80
- if (response !== null)
81
- node_worker_threads_1.default.parentPort.postMessage(response);
82
- });