speechflow 0.9.5 → 0.9.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/README.md +221 -53
- package/dst/speechflow-node-a2a-ffmpeg.d.ts +13 -0
- package/dst/speechflow-node-a2a-ffmpeg.js +152 -0
- package/dst/speechflow-node-a2a-wav.d.ts +11 -0
- package/dst/speechflow-node-a2a-wav.js +170 -0
- package/dst/speechflow-node-a2t-deepgram.d.ts +12 -0
- package/dst/speechflow-node-a2t-deepgram.js +220 -0
- package/dst/speechflow-node-deepgram.d.ts +3 -1
- package/dst/speechflow-node-deepgram.js +86 -22
- package/dst/speechflow-node-deepl.d.ts +3 -1
- package/dst/speechflow-node-deepl.js +25 -20
- package/dst/speechflow-node-device.d.ts +3 -1
- package/dst/speechflow-node-device.js +53 -2
- package/dst/speechflow-node-elevenlabs.d.ts +3 -1
- package/dst/speechflow-node-elevenlabs.js +37 -42
- package/dst/speechflow-node-ffmpeg.d.ts +3 -1
- package/dst/speechflow-node-ffmpeg.js +42 -4
- package/dst/speechflow-node-file.d.ts +3 -1
- package/dst/speechflow-node-file.js +84 -13
- package/dst/speechflow-node-format.d.ts +11 -0
- package/dst/speechflow-node-format.js +80 -0
- package/dst/speechflow-node-gemma.d.ts +3 -1
- package/dst/speechflow-node-gemma.js +84 -23
- package/dst/speechflow-node-mqtt.d.ts +13 -0
- package/dst/speechflow-node-mqtt.js +181 -0
- package/dst/speechflow-node-opus.d.ts +12 -0
- package/dst/speechflow-node-opus.js +135 -0
- package/dst/speechflow-node-subtitle.d.ts +12 -0
- package/dst/speechflow-node-subtitle.js +96 -0
- package/dst/speechflow-node-t2a-elevenlabs.d.ts +13 -0
- package/dst/speechflow-node-t2a-elevenlabs.js +182 -0
- package/dst/speechflow-node-t2t-deepl.d.ts +12 -0
- package/dst/speechflow-node-t2t-deepl.js +133 -0
- package/dst/speechflow-node-t2t-format.d.ts +11 -0
- package/dst/speechflow-node-t2t-format.js +80 -0
- package/dst/speechflow-node-t2t-gemma.d.ts +13 -0
- package/dst/speechflow-node-t2t-gemma.js +213 -0
- package/dst/speechflow-node-t2t-opus.d.ts +12 -0
- package/dst/speechflow-node-t2t-opus.js +135 -0
- package/dst/speechflow-node-t2t-subtitle.d.ts +12 -0
- package/dst/speechflow-node-t2t-subtitle.js +96 -0
- package/dst/speechflow-node-trace.d.ts +11 -0
- package/dst/speechflow-node-trace.js +88 -0
- package/dst/speechflow-node-wav.d.ts +11 -0
- package/dst/speechflow-node-wav.js +170 -0
- package/dst/speechflow-node-websocket.d.ts +3 -1
- package/dst/speechflow-node-websocket.js +149 -49
- package/dst/speechflow-node-whisper-common.d.ts +34 -0
- package/dst/speechflow-node-whisper-common.js +7 -0
- package/dst/speechflow-node-whisper-ggml.d.ts +1 -0
- package/dst/speechflow-node-whisper-ggml.js +97 -0
- package/dst/speechflow-node-whisper-onnx.d.ts +1 -0
- package/dst/speechflow-node-whisper-onnx.js +131 -0
- package/dst/speechflow-node-whisper-worker-ggml.d.ts +1 -0
- package/dst/speechflow-node-whisper-worker-ggml.js +97 -0
- package/dst/speechflow-node-whisper-worker-onnx.d.ts +1 -0
- package/dst/speechflow-node-whisper-worker-onnx.js +131 -0
- package/dst/speechflow-node-whisper-worker.d.ts +1 -0
- package/dst/speechflow-node-whisper-worker.js +116 -0
- package/dst/speechflow-node-whisper-worker2.d.ts +1 -0
- package/dst/speechflow-node-whisper-worker2.js +82 -0
- package/dst/speechflow-node-whisper.d.ts +19 -0
- package/dst/speechflow-node-whisper.js +604 -0
- package/dst/speechflow-node-x2x-trace.d.ts +11 -0
- package/dst/speechflow-node-x2x-trace.js +88 -0
- package/dst/speechflow-node-xio-device.d.ts +13 -0
- package/dst/speechflow-node-xio-device.js +205 -0
- package/dst/speechflow-node-xio-file.d.ts +11 -0
- package/dst/speechflow-node-xio-file.js +176 -0
- package/dst/speechflow-node-xio-mqtt.d.ts +13 -0
- package/dst/speechflow-node-xio-mqtt.js +181 -0
- package/dst/speechflow-node-xio-websocket.d.ts +13 -0
- package/dst/speechflow-node-xio-websocket.js +275 -0
- package/dst/speechflow-node.d.ts +24 -6
- package/dst/speechflow-node.js +63 -6
- package/dst/speechflow-utils.d.ts +23 -0
- package/dst/speechflow-utils.js +194 -0
- package/dst/speechflow.js +146 -43
- package/etc/biome.jsonc +12 -4
- package/etc/stx.conf +65 -0
- package/package.d/@ericedouard+vad-node-realtime+0.2.0.patch +18 -0
- package/package.json +49 -31
- package/sample.yaml +59 -27
- package/src/lib.d.ts +6 -1
- package/src/{speechflow-node-ffmpeg.ts → speechflow-node-a2a-ffmpeg.ts} +10 -4
- package/src/speechflow-node-a2a-wav.ts +143 -0
- package/src/speechflow-node-a2t-deepgram.ts +199 -0
- package/src/{speechflow-node-elevenlabs.ts → speechflow-node-t2a-elevenlabs.ts} +38 -45
- package/src/{speechflow-node-deepl.ts → speechflow-node-t2t-deepl.ts} +36 -25
- package/src/speechflow-node-t2t-format.ts +85 -0
- package/src/{speechflow-node-gemma.ts → speechflow-node-t2t-gemma.ts} +89 -25
- package/src/speechflow-node-t2t-opus.ts +111 -0
- package/src/speechflow-node-t2t-subtitle.ts +101 -0
- package/src/speechflow-node-x2x-trace.ts +92 -0
- package/src/{speechflow-node-device.ts → speechflow-node-xio-device.ts} +25 -3
- package/src/speechflow-node-xio-file.ts +153 -0
- package/src/speechflow-node-xio-mqtt.ts +154 -0
- package/src/speechflow-node-xio-websocket.ts +248 -0
- package/src/speechflow-node.ts +63 -6
- package/src/speechflow-utils.ts +212 -0
- package/src/speechflow.ts +150 -43
- package/etc/nps.yaml +0 -40
- package/src/speechflow-node-deepgram.ts +0 -133
- package/src/speechflow-node-file.ts +0 -108
- package/src/speechflow-node-websocket.ts +0 -179
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/*
|
|
3
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
4
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
5
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
6
|
+
*/
|
|
7
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
8
|
+
if (k2 === undefined) k2 = k;
|
|
9
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
10
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
11
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
12
|
+
}
|
|
13
|
+
Object.defineProperty(o, k2, desc);
|
|
14
|
+
}) : (function(o, m, k, k2) {
|
|
15
|
+
if (k2 === undefined) k2 = k;
|
|
16
|
+
o[k2] = m[k];
|
|
17
|
+
}));
|
|
18
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
19
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
20
|
+
}) : function(o, v) {
|
|
21
|
+
o["default"] = v;
|
|
22
|
+
});
|
|
23
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
24
|
+
var ownKeys = function(o) {
|
|
25
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
26
|
+
var ar = [];
|
|
27
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
28
|
+
return ar;
|
|
29
|
+
};
|
|
30
|
+
return ownKeys(o);
|
|
31
|
+
};
|
|
32
|
+
return function (mod) {
|
|
33
|
+
if (mod && mod.__esModule) return mod;
|
|
34
|
+
var result = {};
|
|
35
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
36
|
+
__setModuleDefault(result, mod);
|
|
37
|
+
return result;
|
|
38
|
+
};
|
|
39
|
+
})();
|
|
40
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
41
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
42
|
+
};
|
|
43
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
44
|
+
/* standard dependencies */
|
|
45
|
+
const node_path_1 = __importDefault(require("node:path"));
|
|
46
|
+
const node_worker_threads_1 = __importDefault(require("node:worker_threads"));
|
|
47
|
+
/* external dependencies */
|
|
48
|
+
const Transformers = __importStar(require("@huggingface/transformers"));
|
|
49
|
+
/* utility function for sending a log message */
|
|
50
|
+
const log = (message) => node_worker_threads_1.default.parentPort.postMessage({ type: "log", message });
|
|
51
|
+
/* internal state */
|
|
52
|
+
let transcriber = null;
|
|
53
|
+
/* OpenAI Whisper models (ONNX variants) */
|
|
54
|
+
const models = {
|
|
55
|
+
"v1-tiny": { model: "onnx-community/whisper-tiny-ONNX" },
|
|
56
|
+
"v1-base": { model: "onnx-community/whisper-base" },
|
|
57
|
+
"v1-small": { model: "onnx-community/whisper-small" },
|
|
58
|
+
"v1-medium": { model: "onnx-community/whisper-medium-ONNX" },
|
|
59
|
+
"v2-large": { model: "reach-vb/whisper-large-v2-onnx" },
|
|
60
|
+
"v3-large": { model: "onnx-community/whisper-large-v3-ONNX" },
|
|
61
|
+
"v3-large-turbo": { model: "onnx-community/whisper-large-v3-turbo" }
|
|
62
|
+
};
|
|
63
|
+
/* thread communication hook */
|
|
64
|
+
node_worker_threads_1.default.parentPort?.on("message", async (request) => {
|
|
65
|
+
let response = null;
|
|
66
|
+
if (request.type === "open") {
|
|
67
|
+
/* initialize Whisper */
|
|
68
|
+
const model = models[request.model]?.model;
|
|
69
|
+
if (!model)
|
|
70
|
+
response = { type: "error", message: `unknown Whisper model "${request.model}"` };
|
|
71
|
+
else {
|
|
72
|
+
log(`loading Whisper model "${request.model}": BEGIN`);
|
|
73
|
+
transcriber = await Transformers.pipeline("automatic-speech-recognition", model, {
|
|
74
|
+
cache_dir: node_path_1.default.join(request.cacheDir, "whisper"),
|
|
75
|
+
dtype: "q4",
|
|
76
|
+
device: "gpu"
|
|
77
|
+
});
|
|
78
|
+
if (transcriber === null) {
|
|
79
|
+
log(`loading Whisper model "${request.model}": FAILED`);
|
|
80
|
+
response = { type: "error", message: "failed to open Whisper" };
|
|
81
|
+
}
|
|
82
|
+
else {
|
|
83
|
+
log(`loading Whisper model "${request.model}": SUCCESS`);
|
|
84
|
+
response = { type: "ok" };
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
else if (request.type === "task-request") {
|
|
89
|
+
/* perform a speech-to-text transcription with Whisper */
|
|
90
|
+
/*
|
|
91
|
+
const streamer = new Transformers.TextStreamer(transcriber!.tokenizer, {
|
|
92
|
+
skip_prompt: true,
|
|
93
|
+
callback_function: (text) => {
|
|
94
|
+
console.log("TEXT", text)
|
|
95
|
+
}
|
|
96
|
+
})
|
|
97
|
+
*/
|
|
98
|
+
log(`${request.task.type} transcription task ${request.task.id}": START`);
|
|
99
|
+
const result = await transcriber(request.task.audio, {
|
|
100
|
+
chunk_length_s: 3,
|
|
101
|
+
stride_length_s: 1,
|
|
102
|
+
language: request.task.language,
|
|
103
|
+
task: "transcribe",
|
|
104
|
+
force_full_sequences: false,
|
|
105
|
+
use_cache: true,
|
|
106
|
+
return_timestamps: true,
|
|
107
|
+
// streamer
|
|
108
|
+
});
|
|
109
|
+
log(`${request.task.type} transcription task ${request.task.id}": END`);
|
|
110
|
+
console.log("RESULT", JSON.stringify(result));
|
|
111
|
+
const text = Array.isArray(result) ? result[0].text : result.text;
|
|
112
|
+
const taskResponse = {
|
|
113
|
+
type: request.task.type,
|
|
114
|
+
id: request.task.id,
|
|
115
|
+
language: request.task.language,
|
|
116
|
+
text: text ?? ""
|
|
117
|
+
};
|
|
118
|
+
response = { type: "task-response", task: taskResponse };
|
|
119
|
+
}
|
|
120
|
+
else if (request.type === "close") {
|
|
121
|
+
/* shutdown Whisper */
|
|
122
|
+
if (transcriber !== null) {
|
|
123
|
+
log("unloading Whisper model: BEGIN");
|
|
124
|
+
await transcriber.dispose();
|
|
125
|
+
transcriber = null;
|
|
126
|
+
log("unloading Whisper model: END");
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
if (response !== null)
|
|
130
|
+
node_worker_threads_1.default.parentPort.postMessage(response);
|
|
131
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/*
|
|
3
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
4
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
5
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
6
|
+
*/
|
|
7
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
8
|
+
if (k2 === undefined) k2 = k;
|
|
9
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
10
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
11
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
12
|
+
}
|
|
13
|
+
Object.defineProperty(o, k2, desc);
|
|
14
|
+
}) : (function(o, m, k, k2) {
|
|
15
|
+
if (k2 === undefined) k2 = k;
|
|
16
|
+
o[k2] = m[k];
|
|
17
|
+
}));
|
|
18
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
19
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
20
|
+
}) : function(o, v) {
|
|
21
|
+
o["default"] = v;
|
|
22
|
+
});
|
|
23
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
24
|
+
var ownKeys = function(o) {
|
|
25
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
26
|
+
var ar = [];
|
|
27
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
28
|
+
return ar;
|
|
29
|
+
};
|
|
30
|
+
return ownKeys(o);
|
|
31
|
+
};
|
|
32
|
+
return function (mod) {
|
|
33
|
+
if (mod && mod.__esModule) return mod;
|
|
34
|
+
var result = {};
|
|
35
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
36
|
+
__setModuleDefault(result, mod);
|
|
37
|
+
return result;
|
|
38
|
+
};
|
|
39
|
+
})();
|
|
40
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
41
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
42
|
+
};
|
|
43
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
44
|
+
/* standard dependencies */
|
|
45
|
+
const node_path_1 = __importDefault(require("node:path"));
|
|
46
|
+
const node_worker_threads_1 = __importDefault(require("node:worker_threads"));
|
|
47
|
+
/* external dependencies */
|
|
48
|
+
const Transformers = __importStar(require("@huggingface/transformers"));
|
|
49
|
+
/* utility function for sending a log message */
|
|
50
|
+
const log = (message) => node_worker_threads_1.default.parentPort.postMessage({ type: "log", message });
|
|
51
|
+
/* internal state */
|
|
52
|
+
let transcriber = null;
|
|
53
|
+
/* thread communication hook */
|
|
54
|
+
node_worker_threads_1.default.parentPort?.on("message", async (request) => {
|
|
55
|
+
let response = null;
|
|
56
|
+
if (request.type === "open") {
|
|
57
|
+
/* initialize Whisper */
|
|
58
|
+
log(`loading Whisper model "${request.model}": BEGIN`);
|
|
59
|
+
transcriber = await Transformers.pipeline("automatic-speech-recognition", request.model, {
|
|
60
|
+
cache_dir: node_path_1.default.join(request.cacheDir, "whisper"),
|
|
61
|
+
dtype: "q4",
|
|
62
|
+
device: "gpu"
|
|
63
|
+
});
|
|
64
|
+
if (transcriber === null) {
|
|
65
|
+
log(`loading Whisper model "${request.model}": FAILED`);
|
|
66
|
+
response = { type: "error", message: "failed to open Whisper" };
|
|
67
|
+
}
|
|
68
|
+
else {
|
|
69
|
+
log(`loading Whisper model "${request.model}": SUCCESS`);
|
|
70
|
+
response = { type: "ok" };
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
else if (request.type === "task-request") {
|
|
74
|
+
/* perform a speech-to-text transcription with Whisper */
|
|
75
|
+
/*
|
|
76
|
+
const streamer = new Transformers.TextStreamer(transcriber!.tokenizer, {
|
|
77
|
+
skip_prompt: true,
|
|
78
|
+
callback_function: (text) => {
|
|
79
|
+
console.log("TEXT", text)
|
|
80
|
+
}
|
|
81
|
+
})
|
|
82
|
+
*/
|
|
83
|
+
log(`${request.task.type} transcription task ${request.task.id}": START`);
|
|
84
|
+
const result = await transcriber(request.task.audio, {
|
|
85
|
+
chunk_length_s: 3,
|
|
86
|
+
stride_length_s: 1,
|
|
87
|
+
language: request.task.language,
|
|
88
|
+
task: "transcribe",
|
|
89
|
+
force_full_sequences: false,
|
|
90
|
+
use_cache: true,
|
|
91
|
+
return_timestamps: true,
|
|
92
|
+
// streamer
|
|
93
|
+
});
|
|
94
|
+
log(`${request.task.type} transcription task ${request.task.id}": END`);
|
|
95
|
+
console.log("RESULT", JSON.stringify(result));
|
|
96
|
+
const text = Array.isArray(result) ? result[0].text : result.text;
|
|
97
|
+
const taskResponse = {
|
|
98
|
+
type: request.task.type,
|
|
99
|
+
id: request.task.id,
|
|
100
|
+
language: request.task.language,
|
|
101
|
+
text: text ?? ""
|
|
102
|
+
};
|
|
103
|
+
response = { type: "task-response", task: taskResponse };
|
|
104
|
+
}
|
|
105
|
+
else if (request.type === "close") {
|
|
106
|
+
/* shutdown Whisper */
|
|
107
|
+
if (transcriber !== null) {
|
|
108
|
+
log("unloading Whisper model: BEGIN");
|
|
109
|
+
await transcriber.dispose();
|
|
110
|
+
transcriber = null;
|
|
111
|
+
log("unloading Whisper model: END");
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
if (response !== null)
|
|
115
|
+
node_worker_threads_1.default.parentPort.postMessage(response);
|
|
116
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/*
|
|
3
|
+
** SpeechFlow - Speech Processing Flow Graph
|
|
4
|
+
** Copyright (c) 2024-2025 Dr. Ralf S. Engelschall <rse@engelschall.com>
|
|
5
|
+
** Licensed under GPL 3.0 <https://spdx.org/licenses/GPL-3.0-only>
|
|
6
|
+
*/
|
|
7
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
8
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
9
|
+
};
|
|
10
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
11
|
+
/* standard dependencies */
|
|
12
|
+
const node_worker_threads_1 = __importDefault(require("node:worker_threads"));
|
|
13
|
+
/* external dependencies */
|
|
14
|
+
const smart_whisper_1 = require("smart-whisper");
|
|
15
|
+
/* utility function for sending a log message */
|
|
16
|
+
const log = (message) => node_worker_threads_1.default.parentPort.postMessage({ type: "log", message });
|
|
17
|
+
/* internal state */
|
|
18
|
+
let whisper = null;
|
|
19
|
+
/* thread communication hook */
|
|
20
|
+
node_worker_threads_1.default.parentPort?.on("message", async (request) => {
|
|
21
|
+
let response = null;
|
|
22
|
+
if (request.type === "open") {
|
|
23
|
+
/* initialize Whisper */
|
|
24
|
+
log(`loading Whisper model "${request.model}": BEGIN`);
|
|
25
|
+
const name = await smart_whisper_1.manager.download(request.model);
|
|
26
|
+
const resolved = smart_whisper_1.manager.resolve(name);
|
|
27
|
+
whisper = new smart_whisper_1.Whisper(resolved, {
|
|
28
|
+
gpu: true,
|
|
29
|
+
offload: 120 * 60
|
|
30
|
+
});
|
|
31
|
+
if (whisper === null) {
|
|
32
|
+
log(`loading Whisper model "${request.model}": FAILED`);
|
|
33
|
+
response = { type: "error", message: "failed to open Whisper" };
|
|
34
|
+
}
|
|
35
|
+
else {
|
|
36
|
+
await whisper.load();
|
|
37
|
+
log(`loading Whisper model "${request.model}": SUCCESS`);
|
|
38
|
+
response = { type: "ok" };
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
else if (request.type === "task-request") {
|
|
42
|
+
log(`${request.task.type} transcription task ${request.task.id}": START`);
|
|
43
|
+
const task = await whisper.transcribe(request.task.audio, {
|
|
44
|
+
language: request.task.language,
|
|
45
|
+
n_threads: 16,
|
|
46
|
+
no_timestamps: false,
|
|
47
|
+
speed_up: true,
|
|
48
|
+
suppress_non_speech_tokens: true,
|
|
49
|
+
suppress_blank: true,
|
|
50
|
+
debug_mode: false,
|
|
51
|
+
print_special: false,
|
|
52
|
+
print_progress: false,
|
|
53
|
+
print_realtime: false,
|
|
54
|
+
print_timestamps: false
|
|
55
|
+
});
|
|
56
|
+
task.on("transcribed", (result) => {
|
|
57
|
+
console.log("TRANSCRIBED", JSON.stringify(result));
|
|
58
|
+
});
|
|
59
|
+
const result = await task.result;
|
|
60
|
+
log(`${request.task.type} transcription task ${request.task.id}": END`);
|
|
61
|
+
console.log("RESULT", result);
|
|
62
|
+
const text = result[0].text;
|
|
63
|
+
const taskResponse = {
|
|
64
|
+
type: request.task.type,
|
|
65
|
+
id: request.task.id,
|
|
66
|
+
language: request.task.language,
|
|
67
|
+
text: text ?? ""
|
|
68
|
+
};
|
|
69
|
+
response = { type: "task-response", task: taskResponse };
|
|
70
|
+
}
|
|
71
|
+
else if (request.type === "close") {
|
|
72
|
+
/* shutdown Whisper */
|
|
73
|
+
if (whisper !== null) {
|
|
74
|
+
log("unloading Whisper model: BEGIN");
|
|
75
|
+
await whisper.free();
|
|
76
|
+
whisper = null;
|
|
77
|
+
log("unloading Whisper model: END");
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
if (response !== null)
|
|
81
|
+
node_worker_threads_1.default.parentPort.postMessage(response);
|
|
82
|
+
});
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import SpeechFlowNode from "./speechflow-node";
|
|
2
|
+
export default class SpeechFlowNodeWhisper extends SpeechFlowNode {
|
|
3
|
+
static name: string;
|
|
4
|
+
private models;
|
|
5
|
+
private transcriber;
|
|
6
|
+
private vad;
|
|
7
|
+
private queue;
|
|
8
|
+
private queueRecv;
|
|
9
|
+
private queueVAD;
|
|
10
|
+
private queueSTT;
|
|
11
|
+
private tqueue;
|
|
12
|
+
constructor(id: string, cfg: {
|
|
13
|
+
[id: string]: any;
|
|
14
|
+
}, opts: {
|
|
15
|
+
[id: string]: any;
|
|
16
|
+
}, args: any[]);
|
|
17
|
+
open(): Promise<void>;
|
|
18
|
+
close(): Promise<void>;
|
|
19
|
+
}
|