handy-remote-server 1.2.0 β 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -6
- package/dist/index.js +164 -169
- package/package.json +6 -2
- package/rust-infer/src/main.rs +90 -55
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Handy Remote Server ποΈ
|
|
2
2
|
|
|
3
|
-
A lightweight standalone inference server for [Handy](https://github.com/
|
|
3
|
+
A lightweight standalone inference server for [Handy](https://github.com/viktor-silakov/Handy), allowing you to transcribe audio from external devices, weak computers, and more.
|
|
4
4
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
@@ -67,11 +67,47 @@ Handy Remote Server is running on port 3000
|
|
|
67
67
|
|
|
68
68
|
## Environment Variables
|
|
69
69
|
|
|
70
|
-
| Variable | Default | Description
|
|
71
|
-
| ---------------- | ------------------------------------------- |
|
|
72
|
-
| `PORT` | `3000` | Server port
|
|
73
|
-
| `API_KEY` | auto-generated, saved to `~/.handy/api_key` | Bearer token for authentication
|
|
74
|
-
| `INFER_CLI_PATH` | auto-detected | Path to the `rust-infer` binary
|
|
70
|
+
| Variable | Default | Description |
|
|
71
|
+
| ---------------- | ------------------------------------------- | -------------------------------------- |
|
|
72
|
+
| `PORT` | `3000` | Server port |
|
|
73
|
+
| `API_KEY` | auto-generated, saved to `~/.handy/api_key` | Bearer token for authentication |
|
|
74
|
+
| `INFER_CLI_PATH` | auto-detected | Path to the `rust-infer` binary |
|
|
75
|
+
| `MODEL_TYPE` | `gigaam` | Transcription model to use (see below) |
|
|
76
|
+
|
|
77
|
+
## Supported Models
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
# Π ΡΡΡΠΊΠΈΠΉ (ΠΏΠΎ ΡΠΌΠΎΠ»ΡΠ°Π½ΠΈΡ)
|
|
81
|
+
MODEL_TYPE=gigaam npx handy-remote-server
|
|
82
|
+
|
|
83
|
+
# ΠΡΠ»ΡΡΠΈΡΠ·ΡΡΠ½ΡΠΉ (Π²ΠΊΠ»ΡΡΠ°Ρ ΡΡΡΡΠΊΠΈΠΉ) β Whisper ΠΌΠΎΠ΄Π΅Π»ΠΈ
|
|
84
|
+
MODEL_TYPE=whisper-tiny npx handy-remote-server # 75 MB
|
|
85
|
+
MODEL_TYPE=whisper-base npx handy-remote-server # 142 MB
|
|
86
|
+
MODEL_TYPE=whisper-small npx handy-remote-server # 487 MB
|
|
87
|
+
MODEL_TYPE=whisper-medium npx handy-remote-server # 1.5 GB
|
|
88
|
+
|
|
89
|
+
# ΠΠ½Π³Π»ΠΈΠΉΡΠΊΠΈΠΉ β Moonshine
|
|
90
|
+
MODEL_TYPE=moonshine-tiny npx handy-remote-server # 60 MB
|
|
91
|
+
MODEL_TYPE=moonshine-base npx handy-remote-server # 100 MB
|
|
92
|
+
|
|
93
|
+
# ΠΠ½Π³Π»ΠΈΠΉΡΠΊΠΈΠΉ β Breeze/Parakeet
|
|
94
|
+
MODEL_TYPE=parakeet npx handy-remote-server # ~200 MB
|
|
95
|
+
|
|
96
|
+
# ΠΡΠ»ΡΡΠΈΡΠ·ΡΡΠ½ΡΠΉ β SenseVoice
|
|
97
|
+
MODEL_TYPE=sensevoice npx handy-remote-server # ~200 MB
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
| Model | Language | Size | Speed |
|
|
101
|
+
| ------------------ | -------------- | ------- | --------- |
|
|
102
|
+
| `gigaam` (default) | Russian | ~100 MB | β‘ Fast |
|
|
103
|
+
| `whisper-tiny` | Multi-language | 75 MB | β‘ Fast |
|
|
104
|
+
| `whisper-base` | Multi-language | 142 MB | β‘ Fast |
|
|
105
|
+
| `whisper-small` | Multi-language | 487 MB | π Medium |
|
|
106
|
+
| `whisper-medium` | Multi-language | 1.5 GB | π’ Slow |
|
|
107
|
+
| `moonshine-tiny` | English | 60 MB | β‘ Fast |
|
|
108
|
+
| `moonshine-base` | English | 100 MB | β‘ Fast |
|
|
109
|
+
| `parakeet` | English | ~200 MB | π Medium |
|
|
110
|
+
| `sensevoice` | Multi-language | ~200 MB | π Medium |
|
|
75
111
|
|
|
76
112
|
## How It Works
|
|
77
113
|
|
package/dist/index.js
CHANGED
|
@@ -5,13 +5,14 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
5
5
|
};
|
|
6
6
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
7
|
const express_1 = __importDefault(require("express"));
|
|
8
|
-
const multer_1 = __importDefault(require("multer"));
|
|
9
8
|
const child_process_1 = require("child_process");
|
|
10
9
|
const crypto_1 = __importDefault(require("crypto"));
|
|
11
10
|
const path_1 = __importDefault(require("path"));
|
|
12
11
|
const fs_1 = __importDefault(require("fs"));
|
|
13
12
|
const os_1 = __importDefault(require("os"));
|
|
14
13
|
const dotenv_1 = __importDefault(require("dotenv"));
|
|
14
|
+
const tar_fs_1 = __importDefault(require("tar-fs"));
|
|
15
|
+
const gunzip_maybe_1 = __importDefault(require("gunzip-maybe"));
|
|
15
16
|
dotenv_1.default.config();
|
|
16
17
|
const app = (0, express_1.default)();
|
|
17
18
|
const port = process.env.PORT || 3000;
|
|
@@ -19,36 +20,21 @@ const port = process.env.PORT || 3000;
|
|
|
19
20
|
const handyDir = path_1.default.join(os_1.default.homedir(), '.handy');
|
|
20
21
|
const keyFilePath = path_1.default.join(handyDir, 'api_key');
|
|
21
22
|
function loadOrCreateApiKey() {
|
|
22
|
-
|
|
23
|
-
if (process.env.API_KEY) {
|
|
23
|
+
if (process.env.API_KEY)
|
|
24
24
|
return process.env.API_KEY;
|
|
25
|
-
}
|
|
26
|
-
// 2. Try to load from cached file
|
|
27
25
|
if (fs_1.default.existsSync(keyFilePath)) {
|
|
28
26
|
const cached = fs_1.default.readFileSync(keyFilePath, 'utf-8').trim();
|
|
29
|
-
if (cached.length > 0)
|
|
30
|
-
console.log(`\n======================================================`);
|
|
31
|
-
console.log(`Loaded API KEY from ${keyFilePath}`);
|
|
32
|
-
console.log(`Your API KEY is: ${cached}`);
|
|
33
|
-
console.log(`======================================================\n`);
|
|
27
|
+
if (cached.length > 0)
|
|
34
28
|
return cached;
|
|
35
|
-
}
|
|
36
29
|
}
|
|
37
|
-
// 3. Generate a new one and persist it
|
|
38
30
|
const newKey = crypto_1.default.randomBytes(32).toString('hex');
|
|
39
31
|
fs_1.default.mkdirSync(handyDir, { recursive: true });
|
|
40
32
|
fs_1.default.writeFileSync(keyFilePath, newKey + '\n', { mode: 0o600 });
|
|
41
|
-
console.log(`\n======================================================`);
|
|
42
|
-
console.log(`Generated a new API KEY (saved to ${keyFilePath})`);
|
|
43
|
-
console.log(`Your API KEY is: ${newKey}`);
|
|
44
|
-
console.log(`======================================================\n`);
|
|
45
33
|
return newKey;
|
|
46
34
|
}
|
|
47
35
|
const API_KEY = loadOrCreateApiKey();
|
|
48
36
|
// ββ Logging helpers βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
49
|
-
function timestamp() {
|
|
50
|
-
return new Date().toISOString();
|
|
51
|
-
}
|
|
37
|
+
function timestamp() { return new Date().toISOString(); }
|
|
52
38
|
function formatBytes(bytes) {
|
|
53
39
|
if (bytes < 1024)
|
|
54
40
|
return `${bytes} B`;
|
|
@@ -61,94 +47,98 @@ function formatDuration(ms) {
|
|
|
61
47
|
return `${ms}ms`;
|
|
62
48
|
return `${(ms / 1000).toFixed(2)}s`;
|
|
63
49
|
}
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
50
|
+
const MODEL_REGISTRY = {
|
|
51
|
+
'gigaam': {
|
|
52
|
+
engine: 'gigaam',
|
|
53
|
+
url: 'https://blob.handy.computer/giga-am-v3.int8.onnx',
|
|
54
|
+
filename: 'gigaam.onnx'
|
|
55
|
+
},
|
|
56
|
+
'whisper-tiny': {
|
|
57
|
+
engine: 'whisper',
|
|
58
|
+
url: 'https://blob.handy.computer/ggml-tiny.bin',
|
|
59
|
+
filename: 'whisper-tiny.bin'
|
|
60
|
+
},
|
|
61
|
+
'whisper-base': {
|
|
62
|
+
engine: 'whisper',
|
|
63
|
+
url: 'https://blob.handy.computer/ggml-base.bin',
|
|
64
|
+
filename: 'whisper-base.bin'
|
|
65
|
+
},
|
|
66
|
+
'whisper-small': {
|
|
67
|
+
engine: 'whisper',
|
|
68
|
+
url: 'https://blob.handy.computer/ggml-small.bin',
|
|
69
|
+
filename: 'whisper-small.bin'
|
|
70
|
+
},
|
|
71
|
+
'whisper-medium': {
|
|
72
|
+
engine: 'whisper',
|
|
73
|
+
url: 'https://blob.handy.computer/whisper-medium-q4_1.bin',
|
|
74
|
+
filename: 'whisper-medium.bin'
|
|
75
|
+
},
|
|
76
|
+
'moonshine-tiny': {
|
|
77
|
+
engine: 'moonshine',
|
|
78
|
+
url: 'https://blob.handy.computer/moonshine-tiny-streaming-en.tar.gz',
|
|
79
|
+
filename: 'moonshine-tiny', // Dir name after extraction
|
|
80
|
+
isArchive: true
|
|
81
|
+
},
|
|
82
|
+
'moonshine-base': {
|
|
83
|
+
engine: 'moonshine',
|
|
84
|
+
url: 'https://blob.handy.computer/moonshine-base.tar.gz',
|
|
85
|
+
filename: 'moonshine-base',
|
|
86
|
+
isArchive: true
|
|
87
|
+
},
|
|
88
|
+
'parakeet': {
|
|
89
|
+
engine: 'parakeet',
|
|
90
|
+
url: 'https://blob.handy.computer/parakeet-v3-int8.tar.gz',
|
|
91
|
+
filename: 'parakeet-v3',
|
|
92
|
+
isArchive: true,
|
|
93
|
+
configFilename: 'preprocessor.json'
|
|
94
|
+
},
|
|
95
|
+
'sensevoice': {
|
|
96
|
+
engine: 'sensevoice',
|
|
97
|
+
url: 'https://blob.handy.computer/sense-voice-int8.tar.gz',
|
|
98
|
+
filename: 'sensevoice',
|
|
99
|
+
isArchive: true
|
|
100
|
+
}
|
|
101
|
+
};
|
|
102
|
+
const SELECTED_MODEL_TYPE = (process.env.MODEL_TYPE || 'gigaam').toLowerCase();
|
|
103
|
+
const modelCfg = MODEL_REGISTRY[SELECTED_MODEL_TYPE];
|
|
104
|
+
if (!modelCfg) {
|
|
105
|
+
console.error(`Error: Unknown MODEL_TYPE "${SELECTED_MODEL_TYPE}".`);
|
|
106
|
+
console.error(`Supported types: ${Object.keys(MODEL_REGISTRY).join(', ')}`);
|
|
107
|
+
process.exit(1);
|
|
68
108
|
}
|
|
109
|
+
// ββ Directories βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
110
|
+
const modelsBaseDir = path_1.default.join(__dirname, '..', 'models');
|
|
69
111
|
const uploadDir = path_1.default.join(__dirname, '..', 'uploads');
|
|
70
|
-
if (!fs_1.default.existsSync(
|
|
71
|
-
fs_1.default.mkdirSync(
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
let
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
console.log(` From: ${ip}`);
|
|
82
|
-
console.log(` Headers: Content-Type=${req.headers['content-type'] || 'N/A'}, Content-Length=${req.headers['content-length'] || 'N/A'}`);
|
|
83
|
-
// Store metadata on request for later use
|
|
84
|
-
req._reqId = reqId;
|
|
85
|
-
req._startTime = start;
|
|
86
|
-
req._ip = ip;
|
|
87
|
-
const originalJson = res.json.bind(res);
|
|
88
|
-
res.json = function (body) {
|
|
89
|
-
const duration = Date.now() - start;
|
|
90
|
-
const status = res.statusCode;
|
|
91
|
-
console.log(`[${timestamp()}] ββ RESPONSE #${reqId} βββββββββββββββββββββ`);
|
|
92
|
-
console.log(` Status: ${status}`);
|
|
93
|
-
console.log(` Duration: ${formatDuration(duration)}`);
|
|
94
|
-
if (body?.text) {
|
|
95
|
-
const preview = body.text.length > 100 ? body.text.substring(0, 100) + '...' : body.text;
|
|
96
|
-
console.log(` Result: "${preview}"`);
|
|
97
|
-
}
|
|
98
|
-
else if (body?.error) {
|
|
99
|
-
console.log(` Error: ${body.error}`);
|
|
100
|
-
}
|
|
101
|
-
console.log(` ββββββββββββββββββββββββββββββββββββββββββββββββ`);
|
|
102
|
-
return originalJson(body);
|
|
103
|
-
};
|
|
104
|
-
next();
|
|
105
|
-
});
|
|
106
|
-
// ββ Authentication middleware βββββββββββββββββββββββββββββββββββββββββ
|
|
107
|
-
app.use((req, res, next) => {
|
|
108
|
-
const authHeader = req.headers.authorization;
|
|
109
|
-
if (!authHeader || !authHeader.startsWith('Bearer ')) {
|
|
110
|
-
console.log(` Auth: REJECTED (missing/invalid Authorization header)`);
|
|
111
|
-
return res.status(401).json({ error: 'Missing or invalid Authorization header' });
|
|
112
|
-
}
|
|
113
|
-
const token = authHeader.split(' ')[1];
|
|
114
|
-
if (token !== API_KEY) {
|
|
115
|
-
console.log(` Auth: REJECTED (invalid key)`);
|
|
116
|
-
return res.status(403).json({ error: 'Invalid API Key' });
|
|
117
|
-
}
|
|
118
|
-
console.log(` Auth: OK`);
|
|
119
|
-
next();
|
|
120
|
-
});
|
|
121
|
-
// ββ Multer storage ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
122
|
-
const storage = multer_1.default.diskStorage({
|
|
123
|
-
destination: function (req, file, cb) {
|
|
124
|
-
cb(null, uploadDir);
|
|
125
|
-
},
|
|
126
|
-
filename: function (req, file, cb) {
|
|
127
|
-
const uniqueSuffix = Date.now() + '-' + Math.round(Math.random() * 1E9);
|
|
128
|
-
cb(null, file.fieldname + '-' + uniqueSuffix + '.wav');
|
|
112
|
+
[modelsBaseDir, uploadDir].forEach(d => { if (!fs_1.default.existsSync(d))
|
|
113
|
+
fs_1.default.mkdirSync(d, { recursive: true }); });
|
|
114
|
+
// ββ Model paths βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
115
|
+
const modelPath = path_1.default.join(modelsBaseDir, modelCfg.filename);
|
|
116
|
+
let actualModelFile = modelPath;
|
|
117
|
+
let parakeetConfigPath = '';
|
|
118
|
+
if (modelCfg.isArchive) {
|
|
119
|
+
// For archives, we look for model.onnx inside the directory
|
|
120
|
+
actualModelFile = path_1.default.join(modelPath, 'model.onnx');
|
|
121
|
+
if (modelCfg.engine === 'parakeet') {
|
|
122
|
+
parakeetConfigPath = path_1.default.join(modelPath, modelCfg.configFilename);
|
|
129
123
|
}
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
const gigaamModelPath = path_1.default.join(modelsDir, 'gigaam.onnx');
|
|
135
|
-
async function downloadFile(url, dest) {
|
|
136
|
-
if (fs_1.default.existsSync(dest))
|
|
124
|
+
}
|
|
125
|
+
// ββ Download & Extract ββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
126
|
+
async function downloadAndPrepare() {
|
|
127
|
+
if (fs_1.default.existsSync(actualModelFile))
|
|
137
128
|
return;
|
|
138
|
-
|
|
139
|
-
console.log(
|
|
140
|
-
console.log(`
|
|
141
|
-
|
|
142
|
-
const response = await fetch(url);
|
|
129
|
+
const dest = modelCfg.isArchive ? modelPath + '.tar.gz' : modelPath;
|
|
130
|
+
console.log(`\nπ₯ Downloading model: ${SELECTED_MODEL_TYPE}...`);
|
|
131
|
+
console.log(` URL: ${modelCfg.url}`);
|
|
132
|
+
const response = await fetch(modelCfg.url);
|
|
143
133
|
if (!response.ok)
|
|
144
|
-
throw new Error(`Failed to fetch
|
|
134
|
+
throw new Error(`Failed to fetch: ${response.statusText}`);
|
|
145
135
|
const totalBytes = parseInt(response.headers.get('content-length') || '0', 10);
|
|
146
136
|
let downloadedBytes = 0;
|
|
147
137
|
const startTime = Date.now();
|
|
148
138
|
const fileStream = fs_1.default.createWriteStream(dest);
|
|
149
139
|
const reader = response.body?.getReader();
|
|
150
140
|
if (!reader)
|
|
151
|
-
throw new Error('
|
|
141
|
+
throw new Error('Body not readable');
|
|
152
142
|
const barWidth = 40;
|
|
153
143
|
while (true) {
|
|
154
144
|
const { done, value } = await reader.read();
|
|
@@ -156,69 +146,92 @@ async function downloadFile(url, dest) {
|
|
|
156
146
|
break;
|
|
157
147
|
fileStream.write(Buffer.from(value));
|
|
158
148
|
downloadedBytes += value.length;
|
|
159
|
-
// Draw progress bar
|
|
160
149
|
const percent = totalBytes > 0 ? downloadedBytes / totalBytes : 0;
|
|
161
150
|
const filled = Math.round(barWidth * percent);
|
|
162
|
-
const
|
|
163
|
-
const bar = 'β'.repeat(filled) + 'β'.repeat(empty);
|
|
151
|
+
const bar = 'β'.repeat(filled) + 'β'.repeat(barWidth - filled);
|
|
164
152
|
const pct = (percent * 100).toFixed(1).padStart(5);
|
|
165
|
-
const
|
|
166
|
-
|
|
167
|
-
const elapsed = (Date.now() - startTime) / 1000;
|
|
168
|
-
const speed = elapsed > 0 ? formatBytes(downloadedBytes / elapsed) + '/s' : '';
|
|
169
|
-
process.stdout.write(`\r ${bar} ${pct}% ${dl} / ${tot} ${speed} `);
|
|
153
|
+
const speed = (downloadedBytes / ((Date.now() - startTime) / 1000) / 1024 / 1024).toFixed(1);
|
|
154
|
+
process.stdout.write(`\r ${bar} ${pct}% ${formatBytes(downloadedBytes)} / ${formatBytes(totalBytes)} ${speed} MB/s `);
|
|
170
155
|
}
|
|
171
|
-
await new Promise((
|
|
172
|
-
fileStream.end(() => resolve());
|
|
173
|
-
fileStream.on('error', reject);
|
|
174
|
-
});
|
|
175
|
-
const totalTime = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
156
|
+
await new Promise(r => fileStream.end(() => r()));
|
|
176
157
|
process.stdout.write('\n');
|
|
177
|
-
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
|
|
158
|
+
if (modelCfg.isArchive) {
|
|
159
|
+
console.log(`π¦ Extracting archive to ${modelPath}...`);
|
|
160
|
+
fs_1.default.mkdirSync(modelPath, { recursive: true });
|
|
161
|
+
await new Promise((resolve, reject) => {
|
|
162
|
+
fs_1.default.createReadStream(dest)
|
|
163
|
+
.pipe((0, gunzip_maybe_1.default)())
|
|
164
|
+
.pipe(tar_fs_1.default.extract(modelPath))
|
|
165
|
+
.on('finish', resolve)
|
|
166
|
+
.on('error', reject);
|
|
167
|
+
});
|
|
168
|
+
fs_1.default.unlinkSync(dest); // Cleanup
|
|
169
|
+
}
|
|
170
|
+
console.log(`β
Ready!\n`);
|
|
181
171
|
}
|
|
172
|
+
// ββ Request logging βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
173
|
+
let requestCounter = 0;
|
|
174
|
+
app.use((req, res, next) => {
|
|
175
|
+
const reqId = ++requestCounter;
|
|
176
|
+
const start = Date.now();
|
|
177
|
+
const ip = req.headers['x-forwarded-for'] || req.socket.remoteAddress || 'unknown';
|
|
178
|
+
console.log(`\n[${timestamp()}] ββ REQUEST #${reqId} ββββββββββββββββββββββ`);
|
|
179
|
+
console.log(` Method: ${req.method} ${req.path}`);
|
|
180
|
+
console.log(` Model: ${SELECTED_MODEL_TYPE}`);
|
|
181
|
+
req._reqId = reqId;
|
|
182
|
+
req._startTime = start;
|
|
183
|
+
const originalJson = res.json.bind(res);
|
|
184
|
+
res.json = function (body) {
|
|
185
|
+
console.log(`[${timestamp()}] ββ RESPONSE #${reqId} (Status: ${res.statusCode}, ${Date.now() - start}ms) βββββ`);
|
|
186
|
+
if (body?.text)
|
|
187
|
+
console.log(` Result: "${body.text.substring(0, 100)}${body.text.length > 100 ? '...' : ''}"`);
|
|
188
|
+
else if (body?.error)
|
|
189
|
+
console.log(` Error: ${body.error}`);
|
|
190
|
+
return originalJson(body);
|
|
191
|
+
};
|
|
192
|
+
next();
|
|
193
|
+
});
|
|
194
|
+
// ββ Auth ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
195
|
+
app.use((req, res, next) => {
|
|
196
|
+
const authHeader = req.headers.authorization;
|
|
197
|
+
if (!authHeader?.startsWith('Bearer ') || authHeader.split(' ')[1] !== API_KEY) {
|
|
198
|
+
return res.status(401).json({ error: 'Auth failed' });
|
|
199
|
+
}
|
|
200
|
+
next();
|
|
201
|
+
});
|
|
202
|
+
// ββ Inference Bridge ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
182
203
|
let inferProcess = null;
|
|
183
204
|
let isReady = false;
|
|
184
205
|
let resolvers = {};
|
|
185
|
-
|
|
186
|
-
let
|
|
187
|
-
if (!fs_1.default.existsSync(
|
|
188
|
-
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
|
|
206
|
+
downloadAndPrepare().then(() => {
|
|
207
|
+
let binPath = process.env.INFER_CLI_PATH || path_1.default.join(__dirname, '..', 'rust-infer', 'target', 'release', 'rust-infer');
|
|
208
|
+
if (!fs_1.default.existsSync(binPath))
|
|
209
|
+
binPath = path_1.default.join(__dirname, '..', 'rust-infer', 'target', 'debug', 'rust-infer');
|
|
210
|
+
console.log(`Starting inference: ${binPath}`);
|
|
211
|
+
const args = [modelCfg.engine, actualModelFile];
|
|
212
|
+
if (parakeetConfigPath)
|
|
213
|
+
args.push(parakeetConfigPath);
|
|
214
|
+
inferProcess = (0, child_process_1.spawn)(binPath, args, { stdio: ['pipe', 'pipe', 'inherit'] });
|
|
192
215
|
inferProcess.stdout.on('data', (data) => {
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
if (line === 'READY') {
|
|
216
|
+
data.toString().split('\n').filter(Boolean).forEach(line => {
|
|
217
|
+
if (line.trim() === 'READY') {
|
|
196
218
|
isReady = true;
|
|
197
|
-
console.log('
|
|
198
|
-
|
|
219
|
+
console.log('--- Model fully loaded and ready ---');
|
|
220
|
+
return;
|
|
199
221
|
}
|
|
200
222
|
try {
|
|
201
223
|
const parsed = JSON.parse(line);
|
|
202
|
-
const
|
|
203
|
-
if (
|
|
204
|
-
const firstKey = Object.keys(resolvers)[0];
|
|
224
|
+
const firstKey = Object.keys(resolvers)[0];
|
|
225
|
+
if (firstKey) {
|
|
205
226
|
resolvers[firstKey](parsed);
|
|
227
|
+
delete resolvers[firstKey];
|
|
206
228
|
}
|
|
207
229
|
}
|
|
208
|
-
catch
|
|
209
|
-
|
|
210
|
-
}
|
|
211
|
-
}
|
|
212
|
-
});
|
|
213
|
-
inferProcess.on('exit', (code) => {
|
|
214
|
-
console.log(`Inference worker exited with code ${code}`);
|
|
215
|
-
process.exit(code || 1);
|
|
230
|
+
catch { }
|
|
231
|
+
});
|
|
216
232
|
});
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
process.exit(1);
|
|
220
|
-
});
|
|
221
|
-
// ββ Request queue βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
233
|
+
inferProcess.on('exit', (code) => process.exit(code || 1));
|
|
234
|
+
}).catch(e => { console.error(e); process.exit(1); });
|
|
222
235
|
const requestQueue = [];
|
|
223
236
|
let isProcessing = false;
|
|
224
237
|
function processQueue() {
|
|
@@ -226,42 +239,24 @@ function processQueue() {
|
|
|
226
239
|
return;
|
|
227
240
|
isProcessing = true;
|
|
228
241
|
const req = requestQueue.shift();
|
|
229
|
-
console.log(` [Queue] Processing request #${req.reqId} (queue length: ${requestQueue.length})`);
|
|
230
242
|
resolvers[req.file] = (result) => {
|
|
231
|
-
delete resolvers[req.file];
|
|
232
243
|
isProcessing = false;
|
|
233
|
-
if (fs_1.default.existsSync(req.file))
|
|
244
|
+
if (fs_1.default.existsSync(req.file))
|
|
234
245
|
fs_1.default.unlinkSync(req.file);
|
|
235
|
-
}
|
|
236
246
|
req.resolve(result);
|
|
237
|
-
|
|
247
|
+
processQueue();
|
|
238
248
|
};
|
|
239
249
|
inferProcess.stdin.write(req.file + '\n');
|
|
240
250
|
}
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
if (!req.body || !Buffer.isBuffer(req.body)) {
|
|
249
|
-
console.log(` [#${reqId}] Rejected: invalid audio body`);
|
|
250
|
-
return res.status(400).json({ error: 'Invalid audio body. Send raw WAV bytes with Content-Type: audio/wav' });
|
|
251
|
-
}
|
|
252
|
-
const audioSize = req.body.length;
|
|
253
|
-
console.log(` [#${reqId}] Audio received: ${formatBytes(audioSize)}`);
|
|
254
|
-
const tempFilePath = path_1.default.join(uploadDir, `upload-${Date.now()}-${Math.random().toString(36).substring(7)}.wav`);
|
|
255
|
-
fs_1.default.writeFileSync(tempFilePath, req.body);
|
|
256
|
-
console.log(` [#${reqId}] Queued for inference (queue length: ${requestQueue.length})`);
|
|
257
|
-
const result = await new Promise((resolve) => {
|
|
258
|
-
requestQueue.push({ file: tempFilePath, resolve, reqId });
|
|
251
|
+
app.post('/transcribe', express_1.default.raw({ type: 'audio/wav', limit: '100mb' }), async (req, res) => {
|
|
252
|
+
if (!isReady)
|
|
253
|
+
return res.status(503).json({ error: 'Starting up' });
|
|
254
|
+
const tempFile = path_1.default.join(uploadDir, `up-${Date.now()}.wav`);
|
|
255
|
+
fs_1.default.writeFileSync(tempFile, req.body);
|
|
256
|
+
const result = await new Promise(r => {
|
|
257
|
+
requestQueue.push({ file: tempFile, resolve: r, reqId: req._reqId });
|
|
259
258
|
processQueue();
|
|
260
259
|
});
|
|
261
260
|
res.json(result);
|
|
262
261
|
});
|
|
263
|
-
|
|
264
|
-
app.listen(port, () => {
|
|
265
|
-
console.log(`\nHandy Remote Server is running on port ${port}`);
|
|
266
|
-
console.log(`Waiting for requests...\n`);
|
|
267
|
-
});
|
|
262
|
+
app.listen(port, () => console.log(`\nHandy Server on port ${port} | API Key: ${API_KEY}`));
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "handy-remote-server",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.3.0",
|
|
4
4
|
"description": "Remote Transcription Server for Handy",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -34,9 +34,13 @@
|
|
|
34
34
|
},
|
|
35
35
|
"devDependencies": {
|
|
36
36
|
"@types/express": "^5.0.6",
|
|
37
|
+
"@types/gunzip-maybe": "^1.4.3",
|
|
37
38
|
"@types/multer": "^2.1.0",
|
|
38
39
|
"@types/node": "^25.3.5",
|
|
40
|
+
"@types/tar-fs": "^2.0.4",
|
|
41
|
+
"gunzip-maybe": "^1.4.2",
|
|
42
|
+
"tar-fs": "^3.1.2",
|
|
39
43
|
"ts-node": "^10.9.2",
|
|
40
44
|
"typescript": "^5.9.3"
|
|
41
45
|
}
|
|
42
|
-
}
|
|
46
|
+
}
|
package/rust-infer/src/main.rs
CHANGED
|
@@ -1,35 +1,91 @@
|
|
|
1
1
|
use std::io::{self, BufRead, Write};
|
|
2
|
-
use std::path::PathBuf;
|
|
3
|
-
use transcribe_rs::
|
|
2
|
+
use std::path::{Path, PathBuf};
|
|
3
|
+
use transcribe_rs::TranscriptionEngine;
|
|
4
|
+
use transcribe_rs::engines::{
|
|
5
|
+
gigaam::GigaAMEngine,
|
|
6
|
+
whisper::WhisperEngine,
|
|
7
|
+
moonshine::{MoonshineEngine, MoonshineModelParams, ModelVariant},
|
|
8
|
+
parakeet::{ParakeetEngine, ParakeetModelParams},
|
|
9
|
+
sense_voice::{SenseVoiceEngine, SenseVoiceModelParams},
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
enum EngineWrapper {
|
|
13
|
+
GigaAM(GigaAMEngine),
|
|
14
|
+
Whisper(WhisperEngine),
|
|
15
|
+
Moonshine(MoonshineEngine),
|
|
16
|
+
Parakeet(ParakeetEngine),
|
|
17
|
+
SenseVoice(SenseVoiceEngine),
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
impl EngineWrapper {
|
|
21
|
+
fn transcribe_samples(&mut self, audio: Vec<f32>) -> Result<transcribe_rs::TranscriptionResult, Box<dyn std::error::Error>> {
|
|
22
|
+
match self {
|
|
23
|
+
EngineWrapper::GigaAM(e) => e.transcribe_samples(audio, None).map_err(|e| e.into()),
|
|
24
|
+
EngineWrapper::Whisper(e) => e.transcribe_samples(audio, None).map_err(|e| e.into()),
|
|
25
|
+
EngineWrapper::Moonshine(e) => e.transcribe_samples(audio, None).map_err(|e| e.into()),
|
|
26
|
+
EngineWrapper::Parakeet(e) => e.transcribe_samples(audio, None).map_err(|e| e.into()),
|
|
27
|
+
EngineWrapper::SenseVoice(e) => e.transcribe_samples(audio, None).map_err(|e| e.into()),
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
4
31
|
|
|
5
32
|
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
6
|
-
// We get model and config path from args or default
|
|
7
33
|
let args: Vec<String> = std::env::args().collect();
|
|
8
34
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
d.push("models");
|
|
14
|
-
d.push("gigaam.onnx");
|
|
15
|
-
d.to_string_lossy().to_string()
|
|
16
|
-
};
|
|
17
|
-
|
|
18
|
-
// Auto-download logic or print error if missing
|
|
19
|
-
if !PathBuf::from(&model_path).exists() {
|
|
20
|
-
eprintln!("Model file not found: {}. Please ensure model file exists.", model_path);
|
|
35
|
+
// Usage: rust-infer <engine_type> <model_path>
|
|
36
|
+
if args.len() < 3 {
|
|
37
|
+
eprintln!("Usage: rust-infer <engine_type> <model_path>");
|
|
38
|
+
eprintln!("Engines: gigaam, whisper, moonshine, parakeet, sensevoice");
|
|
21
39
|
std::process::exit(1);
|
|
22
40
|
}
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
41
|
+
|
|
42
|
+
let engine_type = args[1].to_lowercase();
|
|
43
|
+
let model_path = &args[2];
|
|
44
|
+
|
|
45
|
+
if !PathBuf::from(model_path).exists() {
|
|
46
|
+
eprintln!("Model file not found: {}", model_path);
|
|
47
|
+
std::process::exit(1);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
eprintln!("Loading {} engine with model {}...", engine_type, model_path);
|
|
51
|
+
|
|
52
|
+
let mut engine = match engine_type.as_str() {
|
|
53
|
+
"gigaam" => {
|
|
54
|
+
let mut e = GigaAMEngine::new();
|
|
55
|
+
e.load_model(Path::new(model_path))?;
|
|
56
|
+
EngineWrapper::GigaAM(e)
|
|
57
|
+
}
|
|
58
|
+
"whisper" => {
|
|
59
|
+
let mut e = WhisperEngine::new();
|
|
60
|
+
e.load_model(Path::new(model_path))?;
|
|
61
|
+
EngineWrapper::Whisper(e)
|
|
62
|
+
}
|
|
63
|
+
"moonshine" => {
|
|
64
|
+
let mut e = MoonshineEngine::new();
|
|
65
|
+
// Use Base as default for remote
|
|
66
|
+
e.load_model_with_params(Path::new(model_path), MoonshineModelParams::variant(ModelVariant::Base))?;
|
|
67
|
+
EngineWrapper::Moonshine(e)
|
|
68
|
+
}
|
|
69
|
+
"parakeet" => {
|
|
70
|
+
let mut e = ParakeetEngine::new();
|
|
71
|
+
e.load_model_with_params(Path::new(model_path), ParakeetModelParams::int8())?;
|
|
72
|
+
EngineWrapper::Parakeet(e)
|
|
73
|
+
}
|
|
74
|
+
"sensevoice" => {
|
|
75
|
+
let mut e = SenseVoiceEngine::new();
|
|
76
|
+
e.load_model_with_params(Path::new(model_path), SenseVoiceModelParams::int8())?;
|
|
77
|
+
EngineWrapper::SenseVoice(e)
|
|
78
|
+
}
|
|
79
|
+
_ => {
|
|
80
|
+
eprintln!("Unknown engine type: {}", engine_type);
|
|
81
|
+
std::process::exit(1);
|
|
82
|
+
}
|
|
83
|
+
};
|
|
84
|
+
|
|
29
85
|
eprintln!("Model loaded. Ready to transcribe.");
|
|
30
|
-
println!("READY");
|
|
86
|
+
println!("READY");
|
|
31
87
|
io::stdout().flush()?;
|
|
32
|
-
|
|
88
|
+
|
|
33
89
|
let stdin = io::stdin();
|
|
34
90
|
for line in stdin.lock().lines() {
|
|
35
91
|
let line = line?;
|
|
@@ -37,64 +93,47 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
|
37
93
|
if line.is_empty() {
|
|
38
94
|
continue;
|
|
39
95
|
}
|
|
40
|
-
|
|
41
96
|
if line == "EXIT" {
|
|
42
97
|
break;
|
|
43
98
|
}
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
let wav_path = line;
|
|
47
|
-
|
|
48
|
-
// Read the file and convert to f32
|
|
49
|
-
match read_wav(wav_path) {
|
|
99
|
+
|
|
100
|
+
match read_wav(line) {
|
|
50
101
|
Ok(samples) => {
|
|
51
|
-
match engine.transcribe_samples(samples
|
|
102
|
+
match engine.transcribe_samples(samples) {
|
|
52
103
|
Ok(result) => {
|
|
53
|
-
let json = serde_json::json!({
|
|
54
|
-
"status": "success",
|
|
55
|
-
"text": result.text
|
|
56
|
-
});
|
|
104
|
+
let json = serde_json::json!({ "status": "success", "text": result.text });
|
|
57
105
|
println!("{}", json.to_string());
|
|
58
|
-
}
|
|
106
|
+
}
|
|
59
107
|
Err(e) => {
|
|
60
|
-
let json = serde_json::json!({
|
|
61
|
-
"status": "error",
|
|
62
|
-
"error": format!("Transcription failed: {}", e)
|
|
63
|
-
});
|
|
108
|
+
let json = serde_json::json!({ "status": "error", "error": format!("Transcription failed: {}", e) });
|
|
64
109
|
println!("{}", json.to_string());
|
|
65
110
|
}
|
|
66
111
|
}
|
|
67
|
-
}
|
|
112
|
+
}
|
|
68
113
|
Err(e) => {
|
|
69
|
-
let json = serde_json::json!({
|
|
70
|
-
"status": "error",
|
|
71
|
-
"error": format!("Failed to read WAV: {}", e)
|
|
72
|
-
});
|
|
114
|
+
let json = serde_json::json!({ "status": "error", "error": format!("Failed to read WAV: {}", e) });
|
|
73
115
|
println!("{}", json.to_string());
|
|
74
116
|
}
|
|
75
117
|
}
|
|
76
118
|
io::stdout().flush()?;
|
|
77
119
|
}
|
|
78
|
-
|
|
79
120
|
Ok(())
|
|
80
121
|
}
|
|
81
122
|
|
|
82
123
|
fn read_wav(path: &str) -> Result<Vec<f32>, Box<dyn std::error::Error>> {
|
|
83
124
|
let mut reader = hound::WavReader::open(path)?;
|
|
84
125
|
let spec = reader.spec();
|
|
85
|
-
|
|
86
126
|
let mut samples = Vec::new();
|
|
87
127
|
match spec.sample_format {
|
|
88
128
|
hound::SampleFormat::Int => {
|
|
89
129
|
if spec.bits_per_sample == 16 {
|
|
90
130
|
for sample in reader.samples::<i16>() {
|
|
91
|
-
|
|
92
|
-
samples.push(s);
|
|
131
|
+
samples.push(sample? as f32 / i16::MAX as f32);
|
|
93
132
|
}
|
|
94
133
|
} else {
|
|
95
134
|
return Err("Only 16-bit integer WAV is supported".into());
|
|
96
135
|
}
|
|
97
|
-
}
|
|
136
|
+
}
|
|
98
137
|
hound::SampleFormat::Float => {
|
|
99
138
|
if spec.bits_per_sample == 32 {
|
|
100
139
|
for sample in reader.samples::<f32>() {
|
|
@@ -105,17 +144,13 @@ fn read_wav(path: &str) -> Result<Vec<f32>, Box<dyn std::error::Error>> {
|
|
|
105
144
|
}
|
|
106
145
|
}
|
|
107
146
|
}
|
|
108
|
-
|
|
109
|
-
// Multi-channel to mono (simple average)
|
|
110
147
|
if spec.channels > 1 {
|
|
111
148
|
let channels = spec.channels as usize;
|
|
112
149
|
let mut mono = Vec::with_capacity(samples.len() / channels);
|
|
113
150
|
for chunk in samples.chunks(channels) {
|
|
114
|
-
|
|
115
|
-
mono.push(sum / channels as f32);
|
|
151
|
+
mono.push(chunk.iter().sum::<f32>() / channels as f32);
|
|
116
152
|
}
|
|
117
153
|
samples = mono;
|
|
118
154
|
}
|
|
119
|
-
|
|
120
155
|
Ok(samples)
|
|
121
156
|
}
|