speechflow 0.9.9 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/CHANGELOG.md +16 -0
  2. package/README.md +57 -9
  3. package/dst/speechflow-node-a2a-ffmpeg.js +1 -0
  4. package/dst/speechflow-node-a2a-ffmpeg.js.map +1 -0
  5. package/dst/{speechflow-node-gemma.d.ts → speechflow-node-a2a-meter.d.ts} +2 -3
  6. package/dst/speechflow-node-a2a-meter.js +151 -0
  7. package/dst/speechflow-node-a2a-meter.js.map +1 -0
  8. package/dst/speechflow-node-a2a-mute.d.ts +16 -0
  9. package/dst/speechflow-node-a2a-mute.js +90 -0
  10. package/dst/speechflow-node-a2a-mute.js.map +1 -0
  11. package/dst/speechflow-node-a2a-vad.js +130 -289
  12. package/dst/speechflow-node-a2a-vad.js.map +1 -0
  13. package/dst/speechflow-node-a2a-wav.js +1 -0
  14. package/dst/speechflow-node-a2a-wav.js.map +1 -0
  15. package/dst/speechflow-node-a2t-deepgram.d.ts +3 -0
  16. package/dst/speechflow-node-a2t-deepgram.js +18 -2
  17. package/dst/speechflow-node-a2t-deepgram.js.map +1 -0
  18. package/dst/speechflow-node-t2a-elevenlabs.d.ts +3 -0
  19. package/dst/speechflow-node-t2a-elevenlabs.js +9 -1
  20. package/dst/speechflow-node-t2a-elevenlabs.js.map +1 -0
  21. package/dst/speechflow-node-t2a-kokoro.js +1 -0
  22. package/dst/speechflow-node-t2a-kokoro.js.map +1 -0
  23. package/dst/speechflow-node-t2t-deepl.d.ts +3 -0
  24. package/dst/speechflow-node-t2t-deepl.js +9 -1
  25. package/dst/speechflow-node-t2t-deepl.js.map +1 -0
  26. package/dst/speechflow-node-t2t-format.js +1 -0
  27. package/dst/speechflow-node-t2t-format.js.map +1 -0
  28. package/dst/speechflow-node-t2t-ollama.js +1 -0
  29. package/dst/speechflow-node-t2t-ollama.js.map +1 -0
  30. package/dst/speechflow-node-t2t-openai.js +2 -1
  31. package/dst/speechflow-node-t2t-openai.js.map +1 -0
  32. package/dst/speechflow-node-t2t-subtitle.js +1 -0
  33. package/dst/speechflow-node-t2t-subtitle.js.map +1 -0
  34. package/dst/speechflow-node-t2t-transformers.js +10 -6
  35. package/dst/speechflow-node-t2t-transformers.js.map +1 -0
  36. package/dst/speechflow-node-x2x-trace.js +2 -1
  37. package/dst/speechflow-node-x2x-trace.js.map +1 -0
  38. package/dst/speechflow-node-xio-device.js +4 -1
  39. package/dst/speechflow-node-xio-device.js.map +1 -0
  40. package/dst/speechflow-node-xio-file.js +4 -1
  41. package/dst/speechflow-node-xio-file.js.map +1 -0
  42. package/dst/speechflow-node-xio-mqtt.js +8 -5
  43. package/dst/speechflow-node-xio-mqtt.js.map +1 -0
  44. package/dst/speechflow-node-xio-websocket.js +6 -5
  45. package/dst/speechflow-node-xio-websocket.js.map +1 -0
  46. package/dst/speechflow-node.d.ts +8 -1
  47. package/dst/speechflow-node.js +18 -2
  48. package/dst/speechflow-node.js.map +1 -0
  49. package/dst/speechflow-utils.d.ts +33 -0
  50. package/dst/speechflow-utils.js +183 -1
  51. package/dst/speechflow-utils.js.map +1 -0
  52. package/dst/speechflow.js +259 -16
  53. package/dst/speechflow.js.map +1 -0
  54. package/etc/speechflow.yaml +9 -7
  55. package/etc/stx.conf +1 -1
  56. package/etc/tsconfig.json +2 -2
  57. package/package.json +19 -12
  58. package/src/speechflow-node-a2a-meter.ts +129 -0
  59. package/src/speechflow-node-a2a-mute.ts +101 -0
  60. package/src/speechflow-node-a2a-vad.ts +266 -0
  61. package/src/speechflow-node-a2t-deepgram.ts +18 -2
  62. package/src/speechflow-node-t2a-elevenlabs.ts +9 -1
  63. package/src/speechflow-node-t2t-deepl.ts +9 -1
  64. package/src/speechflow-node-t2t-openai.ts +1 -1
  65. package/src/speechflow-node-t2t-transformers.ts +12 -7
  66. package/src/speechflow-node-x2x-trace.ts +1 -1
  67. package/src/speechflow-node-xio-device.ts +4 -1
  68. package/src/speechflow-node-xio-file.ts +3 -1
  69. package/src/speechflow-node-xio-mqtt.ts +8 -6
  70. package/src/speechflow-node-xio-websocket.ts +11 -11
  71. package/src/speechflow-node.ts +21 -2
  72. package/src/speechflow-utils.ts +195 -0
  73. package/src/speechflow.ts +245 -16
  74. package/dst/speechflow-node-deepgram.d.ts +0 -12
  75. package/dst/speechflow-node-deepgram.js +0 -220
  76. package/dst/speechflow-node-deepl.d.ts +0 -12
  77. package/dst/speechflow-node-deepl.js +0 -128
  78. package/dst/speechflow-node-device.d.ts +0 -13
  79. package/dst/speechflow-node-device.js +0 -205
  80. package/dst/speechflow-node-elevenlabs.d.ts +0 -13
  81. package/dst/speechflow-node-elevenlabs.js +0 -182
  82. package/dst/speechflow-node-ffmpeg.d.ts +0 -13
  83. package/dst/speechflow-node-ffmpeg.js +0 -152
  84. package/dst/speechflow-node-file.d.ts +0 -11
  85. package/dst/speechflow-node-file.js +0 -176
  86. package/dst/speechflow-node-format.d.ts +0 -11
  87. package/dst/speechflow-node-format.js +0 -80
  88. package/dst/speechflow-node-gemma.js +0 -213
  89. package/dst/speechflow-node-mqtt.d.ts +0 -13
  90. package/dst/speechflow-node-mqtt.js +0 -181
  91. package/dst/speechflow-node-opus.d.ts +0 -12
  92. package/dst/speechflow-node-opus.js +0 -135
  93. package/dst/speechflow-node-subtitle.d.ts +0 -12
  94. package/dst/speechflow-node-subtitle.js +0 -96
  95. package/dst/speechflow-node-t2t-gemma.d.ts +0 -13
  96. package/dst/speechflow-node-t2t-gemma.js +0 -233
  97. package/dst/speechflow-node-t2t-opus.d.ts +0 -12
  98. package/dst/speechflow-node-t2t-opus.js +0 -135
  99. package/dst/speechflow-node-trace.d.ts +0 -11
  100. package/dst/speechflow-node-trace.js +0 -88
  101. package/dst/speechflow-node-wav.d.ts +0 -11
  102. package/dst/speechflow-node-wav.js +0 -170
  103. package/dst/speechflow-node-websocket.d.ts +0 -13
  104. package/dst/speechflow-node-websocket.js +0 -275
  105. package/dst/speechflow-node-whisper-common.d.ts +0 -34
  106. package/dst/speechflow-node-whisper-common.js +0 -7
  107. package/dst/speechflow-node-whisper-ggml.d.ts +0 -1
  108. package/dst/speechflow-node-whisper-ggml.js +0 -97
  109. package/dst/speechflow-node-whisper-onnx.d.ts +0 -1
  110. package/dst/speechflow-node-whisper-onnx.js +0 -131
  111. package/dst/speechflow-node-whisper-worker-ggml.d.ts +0 -1
  112. package/dst/speechflow-node-whisper-worker-ggml.js +0 -97
  113. package/dst/speechflow-node-whisper-worker-onnx.d.ts +0 -1
  114. package/dst/speechflow-node-whisper-worker-onnx.js +0 -131
  115. package/dst/speechflow-node-whisper-worker.d.ts +0 -1
  116. package/dst/speechflow-node-whisper-worker.js +0 -116
  117. package/dst/speechflow-node-whisper-worker2.d.ts +0 -1
  118. package/dst/speechflow-node-whisper-worker2.js +0 -82
  119. package/dst/speechflow-node-whisper.d.ts +0 -19
  120. package/dst/speechflow-node-whisper.js +0 -604
@@ -1,131 +0,0 @@
1
- "use strict";
2
- /*
3
- ** SpeechFlow - Speech Processing Flow Graph
4
- ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
5
- ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
6
- */
7
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
8
- if (k2 === undefined) k2 = k;
9
- var desc = Object.getOwnPropertyDescriptor(m, k);
10
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
11
- desc = { enumerable: true, get: function() { return m[k]; } };
12
- }
13
- Object.defineProperty(o, k2, desc);
14
- }) : (function(o, m, k, k2) {
15
- if (k2 === undefined) k2 = k;
16
- o[k2] = m[k];
17
- }));
18
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
19
- Object.defineProperty(o, "default", { enumerable: true, value: v });
20
- }) : function(o, v) {
21
- o["default"] = v;
22
- });
23
- var __importStar = (this && this.__importStar) || (function () {
24
- var ownKeys = function(o) {
25
- ownKeys = Object.getOwnPropertyNames || function (o) {
26
- var ar = [];
27
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
28
- return ar;
29
- };
30
- return ownKeys(o);
31
- };
32
- return function (mod) {
33
- if (mod && mod.__esModule) return mod;
34
- var result = {};
35
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
36
- __setModuleDefault(result, mod);
37
- return result;
38
- };
39
- })();
40
- var __importDefault = (this && this.__importDefault) || function (mod) {
41
- return (mod && mod.__esModule) ? mod : { "default": mod };
42
- };
43
- Object.defineProperty(exports, "__esModule", { value: true });
44
- /* standard dependencies */
45
- const node_path_1 = __importDefault(require("node:path"));
46
- const node_worker_threads_1 = __importDefault(require("node:worker_threads"));
47
- /* external dependencies */
48
- const Transformers = __importStar(require("@huggingface/transformers"));
49
- /* utility function for sending a log message */
50
- const log = (message) => node_worker_threads_1.default.parentPort.postMessage({ type: "log", message });
51
- /* internal state */
52
- let transcriber = null;
53
- /* OpenAI Whisper models (ONNX variants) */
54
- const models = {
55
- "v1-tiny": { model: "onnx-community/whisper-tiny-ONNX" },
56
- "v1-base": { model: "onnx-community/whisper-base" },
57
- "v1-small": { model: "onnx-community/whisper-small" },
58
- "v1-medium": { model: "onnx-community/whisper-medium-ONNX" },
59
- "v2-large": { model: "reach-vb/whisper-large-v2-onnx" },
60
- "v3-large": { model: "onnx-community/whisper-large-v3-ONNX" },
61
- "v3-large-turbo": { model: "onnx-community/whisper-large-v3-turbo" }
62
- };
63
- /* thread communication hook */
64
- node_worker_threads_1.default.parentPort?.on("message", async (request) => {
65
- let response = null;
66
- if (request.type === "open") {
67
- /* initialize Whisper */
68
- const model = models[request.model]?.model;
69
- if (!model)
70
- response = { type: "error", message: `unknown Whisper model "${request.model}"` };
71
- else {
72
- log(`loading Whisper model "${request.model}": BEGIN`);
73
- transcriber = await Transformers.pipeline("automatic-speech-recognition", model, {
74
- cache_dir: node_path_1.default.join(request.cacheDir, "whisper"),
75
- dtype: "q4",
76
- device: "gpu"
77
- });
78
- if (transcriber === null) {
79
- log(`loading Whisper model "${request.model}": FAILED`);
80
- response = { type: "error", message: "failed to open Whisper" };
81
- }
82
- else {
83
- log(`loading Whisper model "${request.model}": SUCCESS`);
84
- response = { type: "ok" };
85
- }
86
- }
87
- }
88
- else if (request.type === "task-request") {
89
- /* perform a speech-to-text transcription with Whisper */
90
- /*
91
- const streamer = new Transformers.TextStreamer(transcriber!.tokenizer, {
92
- skip_prompt: true,
93
- callback_function: (text) => {
94
- console.log("TEXT", text)
95
- }
96
- })
97
- */
98
- log(`${request.task.type} transcription task ${request.task.id}": START`);
99
- const result = await transcriber(request.task.audio, {
100
- chunk_length_s: 3,
101
- stride_length_s: 1,
102
- language: request.task.language,
103
- task: "transcribe",
104
- force_full_sequences: false,
105
- use_cache: true,
106
- return_timestamps: true,
107
- // streamer
108
- });
109
- log(`${request.task.type} transcription task ${request.task.id}": END`);
110
- console.log("RESULT", JSON.stringify(result));
111
- const text = Array.isArray(result) ? result[0].text : result.text;
112
- const taskResponse = {
113
- type: request.task.type,
114
- id: request.task.id,
115
- language: request.task.language,
116
- text: text ?? ""
117
- };
118
- response = { type: "task-response", task: taskResponse };
119
- }
120
- else if (request.type === "close") {
121
- /* shutdown Whisper */
122
- if (transcriber !== null) {
123
- log("unloading Whisper model: BEGIN");
124
- await transcriber.dispose();
125
- transcriber = null;
126
- log("unloading Whisper model: END");
127
- }
128
- }
129
- if (response !== null)
130
- node_worker_threads_1.default.parentPort.postMessage(response);
131
- });
@@ -1 +0,0 @@
1
- export {};
@@ -1,116 +0,0 @@
1
- "use strict";
2
- /*
3
- ** SpeechFlow - Speech Processing Flow Graph
4
- ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
5
- ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
6
- */
7
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
8
- if (k2 === undefined) k2 = k;
9
- var desc = Object.getOwnPropertyDescriptor(m, k);
10
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
11
- desc = { enumerable: true, get: function() { return m[k]; } };
12
- }
13
- Object.defineProperty(o, k2, desc);
14
- }) : (function(o, m, k, k2) {
15
- if (k2 === undefined) k2 = k;
16
- o[k2] = m[k];
17
- }));
18
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
19
- Object.defineProperty(o, "default", { enumerable: true, value: v });
20
- }) : function(o, v) {
21
- o["default"] = v;
22
- });
23
- var __importStar = (this && this.__importStar) || (function () {
24
- var ownKeys = function(o) {
25
- ownKeys = Object.getOwnPropertyNames || function (o) {
26
- var ar = [];
27
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
28
- return ar;
29
- };
30
- return ownKeys(o);
31
- };
32
- return function (mod) {
33
- if (mod && mod.__esModule) return mod;
34
- var result = {};
35
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
36
- __setModuleDefault(result, mod);
37
- return result;
38
- };
39
- })();
40
- var __importDefault = (this && this.__importDefault) || function (mod) {
41
- return (mod && mod.__esModule) ? mod : { "default": mod };
42
- };
43
- Object.defineProperty(exports, "__esModule", { value: true });
44
- /* standard dependencies */
45
- const node_path_1 = __importDefault(require("node:path"));
46
- const node_worker_threads_1 = __importDefault(require("node:worker_threads"));
47
- /* external dependencies */
48
- const Transformers = __importStar(require("@huggingface/transformers"));
49
- /* utility function for sending a log message */
50
- const log = (message) => node_worker_threads_1.default.parentPort.postMessage({ type: "log", message });
51
- /* internal state */
52
- let transcriber = null;
53
- /* thread communication hook */
54
- node_worker_threads_1.default.parentPort?.on("message", async (request) => {
55
- let response = null;
56
- if (request.type === "open") {
57
- /* initialize Whisper */
58
- log(`loading Whisper model "${request.model}": BEGIN`);
59
- transcriber = await Transformers.pipeline("automatic-speech-recognition", request.model, {
60
- cache_dir: node_path_1.default.join(request.cacheDir, "whisper"),
61
- dtype: "q4",
62
- device: "gpu"
63
- });
64
- if (transcriber === null) {
65
- log(`loading Whisper model "${request.model}": FAILED`);
66
- response = { type: "error", message: "failed to open Whisper" };
67
- }
68
- else {
69
- log(`loading Whisper model "${request.model}": SUCCESS`);
70
- response = { type: "ok" };
71
- }
72
- }
73
- else if (request.type === "task-request") {
74
- /* perform a speech-to-text transcription with Whisper */
75
- /*
76
- const streamer = new Transformers.TextStreamer(transcriber!.tokenizer, {
77
- skip_prompt: true,
78
- callback_function: (text) => {
79
- console.log("TEXT", text)
80
- }
81
- })
82
- */
83
- log(`${request.task.type} transcription task ${request.task.id}": START`);
84
- const result = await transcriber(request.task.audio, {
85
- chunk_length_s: 3,
86
- stride_length_s: 1,
87
- language: request.task.language,
88
- task: "transcribe",
89
- force_full_sequences: false,
90
- use_cache: true,
91
- return_timestamps: true,
92
- // streamer
93
- });
94
- log(`${request.task.type} transcription task ${request.task.id}": END`);
95
- console.log("RESULT", JSON.stringify(result));
96
- const text = Array.isArray(result) ? result[0].text : result.text;
97
- const taskResponse = {
98
- type: request.task.type,
99
- id: request.task.id,
100
- language: request.task.language,
101
- text: text ?? ""
102
- };
103
- response = { type: "task-response", task: taskResponse };
104
- }
105
- else if (request.type === "close") {
106
- /* shutdown Whisper */
107
- if (transcriber !== null) {
108
- log("unloading Whisper model: BEGIN");
109
- await transcriber.dispose();
110
- transcriber = null;
111
- log("unloading Whisper model: END");
112
- }
113
- }
114
- if (response !== null)
115
- node_worker_threads_1.default.parentPort.postMessage(response);
116
- });
@@ -1 +0,0 @@
1
- export {};
@@ -1,82 +0,0 @@
1
- "use strict";
2
- /*
3
- ** SpeechFlow - Speech Processing Flow Graph
4
- ** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
5
- ** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
6
- */
7
- var __importDefault = (this && this.__importDefault) || function (mod) {
8
- return (mod && mod.__esModule) ? mod : { "default": mod };
9
- };
10
- Object.defineProperty(exports, "__esModule", { value: true });
11
- /* standard dependencies */
12
- const node_worker_threads_1 = __importDefault(require("node:worker_threads"));
13
- /* external dependencies */
14
- const smart_whisper_1 = require("smart-whisper");
15
- /* utility function for sending a log message */
16
- const log = (message) => node_worker_threads_1.default.parentPort.postMessage({ type: "log", message });
17
- /* internal state */
18
- let whisper = null;
19
- /* thread communication hook */
20
- node_worker_threads_1.default.parentPort?.on("message", async (request) => {
21
- let response = null;
22
- if (request.type === "open") {
23
- /* initialize Whisper */
24
- log(`loading Whisper model "${request.model}": BEGIN`);
25
- const name = await smart_whisper_1.manager.download(request.model);
26
- const resolved = smart_whisper_1.manager.resolve(name);
27
- whisper = new smart_whisper_1.Whisper(resolved, {
28
- gpu: true,
29
- offload: 120 * 60
30
- });
31
- if (whisper === null) {
32
- log(`loading Whisper model "${request.model}": FAILED`);
33
- response = { type: "error", message: "failed to open Whisper" };
34
- }
35
- else {
36
- await whisper.load();
37
- log(`loading Whisper model "${request.model}": SUCCESS`);
38
- response = { type: "ok" };
39
- }
40
- }
41
- else if (request.type === "task-request") {
42
- log(`${request.task.type} transcription task ${request.task.id}": START`);
43
- const task = await whisper.transcribe(request.task.audio, {
44
- language: request.task.language,
45
- n_threads: 16,
46
- no_timestamps: false,
47
- speed_up: true,
48
- suppress_non_speech_tokens: true,
49
- suppress_blank: true,
50
- debug_mode: false,
51
- print_special: false,
52
- print_progress: false,
53
- print_realtime: false,
54
- print_timestamps: false
55
- });
56
- task.on("transcribed", (result) => {
57
- console.log("TRANSCRIBED", JSON.stringify(result));
58
- });
59
- const result = await task.result;
60
- log(`${request.task.type} transcription task ${request.task.id}": END`);
61
- console.log("RESULT", result);
62
- const text = result[0].text;
63
- const taskResponse = {
64
- type: request.task.type,
65
- id: request.task.id,
66
- language: request.task.language,
67
- text: text ?? ""
68
- };
69
- response = { type: "task-response", task: taskResponse };
70
- }
71
- else if (request.type === "close") {
72
- /* shutdown Whisper */
73
- if (whisper !== null) {
74
- log("unloading Whisper model: BEGIN");
75
- await whisper.free();
76
- whisper = null;
77
- log("unloading Whisper model: END");
78
- }
79
- }
80
- if (response !== null)
81
- node_worker_threads_1.default.parentPort.postMessage(response);
82
- });
@@ -1,19 +0,0 @@
1
- import SpeechFlowNode from "./speechflow-node";
2
- export default class SpeechFlowNodeWhisper extends SpeechFlowNode {
3
- static name: string;
4
- private models;
5
- private transcriber;
6
- private vad;
7
- private queue;
8
- private queueRecv;
9
- private queueVAD;
10
- private queueSTT;
11
- private tqueue;
12
- constructor(id: string, cfg: {
13
- [id: string]: any;
14
- }, opts: {
15
- [id: string]: any;
16
- }, args: any[]);
17
- open(): Promise<void>;
18
- close(): Promise<void>;
19
- }