handy-remote-server 1.1.0 β 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +92 -15
- package/dist/index.js +164 -137
- package/package.json +6 -2
- package/rust-infer/src/main.rs +90 -55
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Handy Remote Server ποΈ
|
|
2
2
|
|
|
3
|
-
A lightweight standalone inference server for [Handy](https://github.com/
|
|
3
|
+
A lightweight standalone inference server for [Handy](https://github.com/viktor-silakov/Handy), allowing you to transcribe audio from external devices, weak computers, and more.
|
|
4
4
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
@@ -10,30 +10,107 @@ The easiest way to run the external inference server is using `npx`:
|
|
|
10
10
|
npx handy-remote-server
|
|
11
11
|
```
|
|
12
12
|
|
|
13
|
-
_(You must have Node.js and
|
|
13
|
+
_(You must have Node.js and Rust/Cargo installed)_
|
|
14
14
|
|
|
15
15
|
## Usage
|
|
16
16
|
|
|
17
|
-
When you run the server for the first time, it will
|
|
17
|
+
When you run the server for the first time, it will:
|
|
18
18
|
|
|
19
|
-
|
|
19
|
+
1. **Download the GigaAM v3 model** (~100 MB) with a progress bar:
|
|
20
20
|
|
|
21
|
-
```
|
|
22
|
-
|
|
21
|
+
```
|
|
22
|
+
π₯ Downloading model...
|
|
23
|
+
URL: https://blob.handy.computer/giga-am-v3.int8.onnx
|
|
24
|
+
Dest: /path/to/models/gigaam.onnx
|
|
25
|
+
|
|
26
|
+
ββββββββββββββββββββββββββββββββββββββββ 52.3% 52.10 MB / 99.60 MB 12.5 MB/s
|
|
27
|
+
|
|
28
|
+
β
Download complete in 8.2s
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
2. **Generate a persistent API key** saved to `~/.handy/api_key`:
|
|
32
|
+
|
|
33
|
+
```
|
|
34
|
+
======================================================
|
|
35
|
+
Generated a new API KEY (saved to /Users/you/.handy/api_key)
|
|
36
|
+
Your API KEY is: xxxxx...xxxxx
|
|
37
|
+
======================================================
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
The key persists across restarts. On the next launch, it will be loaded automatically.
|
|
41
|
+
|
|
42
|
+
3. **Start the server** and log every request in detail:
|
|
43
|
+
|
|
44
|
+
```
|
|
23
45
|
Handy Remote Server is running on port 3000
|
|
46
|
+
|
|
47
|
+
[2026-03-07T12:00:00.000Z] ββ REQUEST #1 ββββββββββββββββββββββ
|
|
48
|
+
Method: POST /transcribe
|
|
49
|
+
From: 192.168.1.5
|
|
50
|
+
Auth: OK
|
|
51
|
+
[#1] Audio received: 156.3 KB
|
|
52
|
+
[#1] Queued for inference (queue length: 0)
|
|
53
|
+
[2026-03-07T12:00:01.234Z] ββ RESPONSE #1 βββββββββββββββββββββ
|
|
54
|
+
Status: 200
|
|
55
|
+
Duration: 1.23s
|
|
56
|
+
Result: "ΠΡΠΈΠ²Π΅Ρ, ΠΊΠ°ΠΊ Π΄Π΅Π»Π°?"
|
|
24
57
|
```
|
|
25
58
|
|
|
59
|
+
### Connecting from Handy
|
|
60
|
+
|
|
26
61
|
1. Open **Handy** on your client machine.
|
|
27
|
-
2. Go to **Settings >
|
|
28
|
-
3.
|
|
29
|
-
|
|
30
|
-
|
|
62
|
+
2. Go to **Settings > Models**, select **Remote Server**.
|
|
63
|
+
3. Go to **Settings > General**, fill in:
|
|
64
|
+
- **Remote Server URL**: `http://<your-server-ip>:3000`
|
|
65
|
+
- **API Token**: the generated token
|
|
66
|
+
4. All transcriptions will now be processed by the server!
|
|
31
67
|
|
|
32
|
-
##
|
|
68
|
+
## Environment Variables
|
|
33
69
|
|
|
34
|
-
|
|
70
|
+
| Variable | Default | Description |
|
|
71
|
+
| ---------------- | ------------------------------------------- | -------------------------------------- |
|
|
72
|
+
| `PORT` | `3000` | Server port |
|
|
73
|
+
| `API_KEY` | auto-generated, saved to `~/.handy/api_key` | Bearer token for authentication |
|
|
74
|
+
| `INFER_CLI_PATH` | auto-detected | Path to the `rust-infer` binary |
|
|
75
|
+
| `MODEL_TYPE` | `gigaam` | Transcription model to use (see below) |
|
|
76
|
+
|
|
77
|
+
## Supported Models
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
# Π ΡΡΡΠΊΠΈΠΉ (ΠΏΠΎ ΡΠΌΠΎΠ»ΡΠ°Π½ΠΈΡ)
|
|
81
|
+
MODEL_TYPE=gigaam npx handy-remote-server
|
|
82
|
+
|
|
83
|
+
# ΠΡΠ»ΡΡΠΈΡΠ·ΡΡΠ½ΡΠΉ (Π²ΠΊΠ»ΡΡΠ°Ρ ΡΡΡΡΠΊΠΈΠΉ) β Whisper ΠΌΠΎΠ΄Π΅Π»ΠΈ
|
|
84
|
+
MODEL_TYPE=whisper-tiny npx handy-remote-server # 75 MB
|
|
85
|
+
MODEL_TYPE=whisper-base npx handy-remote-server # 142 MB
|
|
86
|
+
MODEL_TYPE=whisper-small npx handy-remote-server # 487 MB
|
|
87
|
+
MODEL_TYPE=whisper-medium npx handy-remote-server # 1.5 GB
|
|
88
|
+
|
|
89
|
+
# ΠΠ½Π³Π»ΠΈΠΉΡΠΊΠΈΠΉ β Moonshine
|
|
90
|
+
MODEL_TYPE=moonshine-tiny npx handy-remote-server # 60 MB
|
|
91
|
+
MODEL_TYPE=moonshine-base npx handy-remote-server # 100 MB
|
|
35
92
|
|
|
36
|
-
|
|
93
|
+
# ΠΠ½Π³Π»ΠΈΠΉΡΠΊΠΈΠΉ β Breeze/Parakeet
|
|
94
|
+
MODEL_TYPE=parakeet npx handy-remote-server # ~200 MB
|
|
95
|
+
|
|
96
|
+
# ΠΡΠ»ΡΡΠΈΡΠ·ΡΡΠ½ΡΠΉ β SenseVoice
|
|
97
|
+
MODEL_TYPE=sensevoice npx handy-remote-server # ~200 MB
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
| Model | Language | Size | Speed |
|
|
101
|
+
| ------------------ | -------------- | ------- | --------- |
|
|
102
|
+
| `gigaam` (default) | Russian | ~100 MB | β‘ Fast |
|
|
103
|
+
| `whisper-tiny` | Multi-language | 75 MB | β‘ Fast |
|
|
104
|
+
| `whisper-base` | Multi-language | 142 MB | β‘ Fast |
|
|
105
|
+
| `whisper-small` | Multi-language | 487 MB | π Medium |
|
|
106
|
+
| `whisper-medium` | Multi-language | 1.5 GB | π’ Slow |
|
|
107
|
+
| `moonshine-tiny` | English | 60 MB | β‘ Fast |
|
|
108
|
+
| `moonshine-base` | English | 100 MB | β‘ Fast |
|
|
109
|
+
| `parakeet` | English | ~200 MB | π Medium |
|
|
110
|
+
| `sensevoice` | Multi-language | ~200 MB | π Medium |
|
|
111
|
+
|
|
112
|
+
## How It Works
|
|
113
|
+
|
|
114
|
+
The `handy-remote-server` spins up a tiny Express server alongside a heavily optimized Rust CLI (`rust-infer`) powered by `transcribe-rs`. Audio files are dispatched sequentially from the Node server directly into the Rust engine.
|
|
37
115
|
|
|
38
|
-
|
|
39
|
-
- `API_KEY` - defaults to an auto-generated token in development. Set this to a permanent token for production.
|
|
116
|
+
Currently the server uses the **GigaAM v3** model (Russian-language, fast inference, ~100 MB).
|
package/dist/index.js
CHANGED
|
@@ -5,13 +5,14 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
5
5
|
};
|
|
6
6
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
7
|
const express_1 = __importDefault(require("express"));
|
|
8
|
-
const multer_1 = __importDefault(require("multer"));
|
|
9
8
|
const child_process_1 = require("child_process");
|
|
10
9
|
const crypto_1 = __importDefault(require("crypto"));
|
|
11
10
|
const path_1 = __importDefault(require("path"));
|
|
12
11
|
const fs_1 = __importDefault(require("fs"));
|
|
13
12
|
const os_1 = __importDefault(require("os"));
|
|
14
13
|
const dotenv_1 = __importDefault(require("dotenv"));
|
|
14
|
+
const tar_fs_1 = __importDefault(require("tar-fs"));
|
|
15
|
+
const gunzip_maybe_1 = __importDefault(require("gunzip-maybe"));
|
|
15
16
|
dotenv_1.default.config();
|
|
16
17
|
const app = (0, express_1.default)();
|
|
17
18
|
const port = process.env.PORT || 3000;
|
|
@@ -19,36 +20,21 @@ const port = process.env.PORT || 3000;
|
|
|
19
20
|
const handyDir = path_1.default.join(os_1.default.homedir(), '.handy');
|
|
20
21
|
const keyFilePath = path_1.default.join(handyDir, 'api_key');
|
|
21
22
|
function loadOrCreateApiKey() {
|
|
22
|
-
|
|
23
|
-
if (process.env.API_KEY) {
|
|
23
|
+
if (process.env.API_KEY)
|
|
24
24
|
return process.env.API_KEY;
|
|
25
|
-
}
|
|
26
|
-
// 2. Try to load from cached file
|
|
27
25
|
if (fs_1.default.existsSync(keyFilePath)) {
|
|
28
26
|
const cached = fs_1.default.readFileSync(keyFilePath, 'utf-8').trim();
|
|
29
|
-
if (cached.length > 0)
|
|
30
|
-
console.log(`\n======================================================`);
|
|
31
|
-
console.log(`Loaded API KEY from ${keyFilePath}`);
|
|
32
|
-
console.log(`Your API KEY is: ${cached}`);
|
|
33
|
-
console.log(`======================================================\n`);
|
|
27
|
+
if (cached.length > 0)
|
|
34
28
|
return cached;
|
|
35
|
-
}
|
|
36
29
|
}
|
|
37
|
-
// 3. Generate a new one and persist it
|
|
38
30
|
const newKey = crypto_1.default.randomBytes(32).toString('hex');
|
|
39
31
|
fs_1.default.mkdirSync(handyDir, { recursive: true });
|
|
40
32
|
fs_1.default.writeFileSync(keyFilePath, newKey + '\n', { mode: 0o600 });
|
|
41
|
-
console.log(`\n======================================================`);
|
|
42
|
-
console.log(`Generated a new API KEY (saved to ${keyFilePath})`);
|
|
43
|
-
console.log(`Your API KEY is: ${newKey}`);
|
|
44
|
-
console.log(`======================================================\n`);
|
|
45
33
|
return newKey;
|
|
46
34
|
}
|
|
47
35
|
const API_KEY = loadOrCreateApiKey();
|
|
48
36
|
// ββ Logging helpers βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
49
|
-
function timestamp() {
|
|
50
|
-
return new Date().toISOString();
|
|
51
|
-
}
|
|
37
|
+
function timestamp() { return new Date().toISOString(); }
|
|
52
38
|
function formatBytes(bytes) {
|
|
53
39
|
if (bytes < 1024)
|
|
54
40
|
return `${bytes} B`;
|
|
@@ -61,16 +47,129 @@ function formatDuration(ms) {
|
|
|
61
47
|
return `${ms}ms`;
|
|
62
48
|
return `${(ms / 1000).toFixed(2)}s`;
|
|
63
49
|
}
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
50
|
+
const MODEL_REGISTRY = {
|
|
51
|
+
'gigaam': {
|
|
52
|
+
engine: 'gigaam',
|
|
53
|
+
url: 'https://blob.handy.computer/giga-am-v3.int8.onnx',
|
|
54
|
+
filename: 'gigaam.onnx'
|
|
55
|
+
},
|
|
56
|
+
'whisper-tiny': {
|
|
57
|
+
engine: 'whisper',
|
|
58
|
+
url: 'https://blob.handy.computer/ggml-tiny.bin',
|
|
59
|
+
filename: 'whisper-tiny.bin'
|
|
60
|
+
},
|
|
61
|
+
'whisper-base': {
|
|
62
|
+
engine: 'whisper',
|
|
63
|
+
url: 'https://blob.handy.computer/ggml-base.bin',
|
|
64
|
+
filename: 'whisper-base.bin'
|
|
65
|
+
},
|
|
66
|
+
'whisper-small': {
|
|
67
|
+
engine: 'whisper',
|
|
68
|
+
url: 'https://blob.handy.computer/ggml-small.bin',
|
|
69
|
+
filename: 'whisper-small.bin'
|
|
70
|
+
},
|
|
71
|
+
'whisper-medium': {
|
|
72
|
+
engine: 'whisper',
|
|
73
|
+
url: 'https://blob.handy.computer/whisper-medium-q4_1.bin',
|
|
74
|
+
filename: 'whisper-medium.bin'
|
|
75
|
+
},
|
|
76
|
+
'moonshine-tiny': {
|
|
77
|
+
engine: 'moonshine',
|
|
78
|
+
url: 'https://blob.handy.computer/moonshine-tiny-streaming-en.tar.gz',
|
|
79
|
+
filename: 'moonshine-tiny', // Dir name after extraction
|
|
80
|
+
isArchive: true
|
|
81
|
+
},
|
|
82
|
+
'moonshine-base': {
|
|
83
|
+
engine: 'moonshine',
|
|
84
|
+
url: 'https://blob.handy.computer/moonshine-base.tar.gz',
|
|
85
|
+
filename: 'moonshine-base',
|
|
86
|
+
isArchive: true
|
|
87
|
+
},
|
|
88
|
+
'parakeet': {
|
|
89
|
+
engine: 'parakeet',
|
|
90
|
+
url: 'https://blob.handy.computer/parakeet-v3-int8.tar.gz',
|
|
91
|
+
filename: 'parakeet-v3',
|
|
92
|
+
isArchive: true,
|
|
93
|
+
configFilename: 'preprocessor.json'
|
|
94
|
+
},
|
|
95
|
+
'sensevoice': {
|
|
96
|
+
engine: 'sensevoice',
|
|
97
|
+
url: 'https://blob.handy.computer/sense-voice-int8.tar.gz',
|
|
98
|
+
filename: 'sensevoice',
|
|
99
|
+
isArchive: true
|
|
100
|
+
}
|
|
101
|
+
};
|
|
102
|
+
const SELECTED_MODEL_TYPE = (process.env.MODEL_TYPE || 'gigaam').toLowerCase();
|
|
103
|
+
const modelCfg = MODEL_REGISTRY[SELECTED_MODEL_TYPE];
|
|
104
|
+
if (!modelCfg) {
|
|
105
|
+
console.error(`Error: Unknown MODEL_TYPE "${SELECTED_MODEL_TYPE}".`);
|
|
106
|
+
console.error(`Supported types: ${Object.keys(MODEL_REGISTRY).join(', ')}`);
|
|
107
|
+
process.exit(1);
|
|
68
108
|
}
|
|
109
|
+
// ββ Directories βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
110
|
+
const modelsBaseDir = path_1.default.join(__dirname, '..', 'models');
|
|
69
111
|
const uploadDir = path_1.default.join(__dirname, '..', 'uploads');
|
|
70
|
-
if (!fs_1.default.existsSync(
|
|
71
|
-
fs_1.default.mkdirSync(
|
|
112
|
+
[modelsBaseDir, uploadDir].forEach(d => { if (!fs_1.default.existsSync(d))
|
|
113
|
+
fs_1.default.mkdirSync(d, { recursive: true }); });
|
|
114
|
+
// ββ Model paths βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
115
|
+
const modelPath = path_1.default.join(modelsBaseDir, modelCfg.filename);
|
|
116
|
+
let actualModelFile = modelPath;
|
|
117
|
+
let parakeetConfigPath = '';
|
|
118
|
+
if (modelCfg.isArchive) {
|
|
119
|
+
// For archives, we look for model.onnx inside the directory
|
|
120
|
+
actualModelFile = path_1.default.join(modelPath, 'model.onnx');
|
|
121
|
+
if (modelCfg.engine === 'parakeet') {
|
|
122
|
+
parakeetConfigPath = path_1.default.join(modelPath, modelCfg.configFilename);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
// ββ Download & Extract ββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
126
|
+
async function downloadAndPrepare() {
|
|
127
|
+
if (fs_1.default.existsSync(actualModelFile))
|
|
128
|
+
return;
|
|
129
|
+
const dest = modelCfg.isArchive ? modelPath + '.tar.gz' : modelPath;
|
|
130
|
+
console.log(`\nπ₯ Downloading model: ${SELECTED_MODEL_TYPE}...`);
|
|
131
|
+
console.log(` URL: ${modelCfg.url}`);
|
|
132
|
+
const response = await fetch(modelCfg.url);
|
|
133
|
+
if (!response.ok)
|
|
134
|
+
throw new Error(`Failed to fetch: ${response.statusText}`);
|
|
135
|
+
const totalBytes = parseInt(response.headers.get('content-length') || '0', 10);
|
|
136
|
+
let downloadedBytes = 0;
|
|
137
|
+
const startTime = Date.now();
|
|
138
|
+
const fileStream = fs_1.default.createWriteStream(dest);
|
|
139
|
+
const reader = response.body?.getReader();
|
|
140
|
+
if (!reader)
|
|
141
|
+
throw new Error('Body not readable');
|
|
142
|
+
const barWidth = 40;
|
|
143
|
+
while (true) {
|
|
144
|
+
const { done, value } = await reader.read();
|
|
145
|
+
if (done)
|
|
146
|
+
break;
|
|
147
|
+
fileStream.write(Buffer.from(value));
|
|
148
|
+
downloadedBytes += value.length;
|
|
149
|
+
const percent = totalBytes > 0 ? downloadedBytes / totalBytes : 0;
|
|
150
|
+
const filled = Math.round(barWidth * percent);
|
|
151
|
+
const bar = 'β'.repeat(filled) + 'β'.repeat(barWidth - filled);
|
|
152
|
+
const pct = (percent * 100).toFixed(1).padStart(5);
|
|
153
|
+
const speed = (downloadedBytes / ((Date.now() - startTime) / 1000) / 1024 / 1024).toFixed(1);
|
|
154
|
+
process.stdout.write(`\r ${bar} ${pct}% ${formatBytes(downloadedBytes)} / ${formatBytes(totalBytes)} ${speed} MB/s `);
|
|
155
|
+
}
|
|
156
|
+
await new Promise(r => fileStream.end(() => r()));
|
|
157
|
+
process.stdout.write('\n');
|
|
158
|
+
if (modelCfg.isArchive) {
|
|
159
|
+
console.log(`π¦ Extracting archive to ${modelPath}...`);
|
|
160
|
+
fs_1.default.mkdirSync(modelPath, { recursive: true });
|
|
161
|
+
await new Promise((resolve, reject) => {
|
|
162
|
+
fs_1.default.createReadStream(dest)
|
|
163
|
+
.pipe((0, gunzip_maybe_1.default)())
|
|
164
|
+
.pipe(tar_fs_1.default.extract(modelPath))
|
|
165
|
+
.on('finish', resolve)
|
|
166
|
+
.on('error', reject);
|
|
167
|
+
});
|
|
168
|
+
fs_1.default.unlinkSync(dest); // Cleanup
|
|
169
|
+
}
|
|
170
|
+
console.log(`β
Ready!\n`);
|
|
72
171
|
}
|
|
73
|
-
// ββ Request logging
|
|
172
|
+
// ββ Request logging βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
74
173
|
let requestCounter = 0;
|
|
75
174
|
app.use((req, res, next) => {
|
|
76
175
|
const reqId = ++requestCounter;
|
|
@@ -78,115 +177,61 @@ app.use((req, res, next) => {
|
|
|
78
177
|
const ip = req.headers['x-forwarded-for'] || req.socket.remoteAddress || 'unknown';
|
|
79
178
|
console.log(`\n[${timestamp()}] ββ REQUEST #${reqId} ββββββββββββββββββββββ`);
|
|
80
179
|
console.log(` Method: ${req.method} ${req.path}`);
|
|
81
|
-
console.log(`
|
|
82
|
-
console.log(` Headers: Content-Type=${req.headers['content-type'] || 'N/A'}, Content-Length=${req.headers['content-length'] || 'N/A'}`);
|
|
83
|
-
// Store metadata on request for later use
|
|
180
|
+
console.log(` Model: ${SELECTED_MODEL_TYPE}`);
|
|
84
181
|
req._reqId = reqId;
|
|
85
182
|
req._startTime = start;
|
|
86
|
-
req._ip = ip;
|
|
87
183
|
const originalJson = res.json.bind(res);
|
|
88
184
|
res.json = function (body) {
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
console.log(` Duration: ${formatDuration(duration)}`);
|
|
94
|
-
if (body?.text) {
|
|
95
|
-
const preview = body.text.length > 100 ? body.text.substring(0, 100) + '...' : body.text;
|
|
96
|
-
console.log(` Result: "${preview}"`);
|
|
97
|
-
}
|
|
98
|
-
else if (body?.error) {
|
|
185
|
+
console.log(`[${timestamp()}] ββ RESPONSE #${reqId} (Status: ${res.statusCode}, ${Date.now() - start}ms) βββββ`);
|
|
186
|
+
if (body?.text)
|
|
187
|
+
console.log(` Result: "${body.text.substring(0, 100)}${body.text.length > 100 ? '...' : ''}"`);
|
|
188
|
+
else if (body?.error)
|
|
99
189
|
console.log(` Error: ${body.error}`);
|
|
100
|
-
}
|
|
101
|
-
console.log(` ββββββββββββββββββββββββββββββββββββββββββββββββ`);
|
|
102
190
|
return originalJson(body);
|
|
103
191
|
};
|
|
104
192
|
next();
|
|
105
193
|
});
|
|
106
|
-
// ββ
|
|
194
|
+
// ββ Auth ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
107
195
|
app.use((req, res, next) => {
|
|
108
196
|
const authHeader = req.headers.authorization;
|
|
109
|
-
if (!authHeader ||
|
|
110
|
-
|
|
111
|
-
return res.status(401).json({ error: 'Missing or invalid Authorization header' });
|
|
112
|
-
}
|
|
113
|
-
const token = authHeader.split(' ')[1];
|
|
114
|
-
if (token !== API_KEY) {
|
|
115
|
-
console.log(` Auth: REJECTED (invalid key)`);
|
|
116
|
-
return res.status(403).json({ error: 'Invalid API Key' });
|
|
197
|
+
if (!authHeader?.startsWith('Bearer ') || authHeader.split(' ')[1] !== API_KEY) {
|
|
198
|
+
return res.status(401).json({ error: 'Auth failed' });
|
|
117
199
|
}
|
|
118
|
-
console.log(` Auth: OK`);
|
|
119
200
|
next();
|
|
120
201
|
});
|
|
121
|
-
// ββ
|
|
122
|
-
const storage = multer_1.default.diskStorage({
|
|
123
|
-
destination: function (req, file, cb) {
|
|
124
|
-
cb(null, uploadDir);
|
|
125
|
-
},
|
|
126
|
-
filename: function (req, file, cb) {
|
|
127
|
-
const uniqueSuffix = Date.now() + '-' + Math.round(Math.random() * 1E9);
|
|
128
|
-
cb(null, file.fieldname + '-' + uniqueSuffix + '.wav');
|
|
129
|
-
}
|
|
130
|
-
});
|
|
131
|
-
const upload = (0, multer_1.default)({ storage: storage });
|
|
132
|
-
// ββ Model download βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
133
|
-
const GIGAAM_MODEL_URL = 'https://blob.handy.computer/giga-am-v3.int8.onnx';
|
|
134
|
-
const gigaamModelPath = path_1.default.join(modelsDir, 'gigaam.onnx');
|
|
135
|
-
async function downloadFile(url, dest) {
|
|
136
|
-
if (fs_1.default.existsSync(dest))
|
|
137
|
-
return;
|
|
138
|
-
console.log(`Downloading ${url} to ${dest}...`);
|
|
139
|
-
fs_1.default.mkdirSync(path_1.default.dirname(dest), { recursive: true });
|
|
140
|
-
const response = await fetch(url);
|
|
141
|
-
if (!response.ok)
|
|
142
|
-
throw new Error(`Failed to fetch ${url}: ${response.statusText}`);
|
|
143
|
-
const arrBuffer = await response.arrayBuffer();
|
|
144
|
-
fs_1.default.writeFileSync(dest, Buffer.from(arrBuffer));
|
|
145
|
-
console.log(`Downloaded ${dest}`);
|
|
146
|
-
}
|
|
147
|
-
async function ensureModels() {
|
|
148
|
-
await downloadFile(GIGAAM_MODEL_URL, gigaamModelPath);
|
|
149
|
-
}
|
|
202
|
+
// ββ Inference Bridge ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
150
203
|
let inferProcess = null;
|
|
151
204
|
let isReady = false;
|
|
152
205
|
let resolvers = {};
|
|
153
|
-
|
|
154
|
-
let
|
|
155
|
-
if (!fs_1.default.existsSync(
|
|
156
|
-
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
|
|
206
|
+
downloadAndPrepare().then(() => {
|
|
207
|
+
let binPath = process.env.INFER_CLI_PATH || path_1.default.join(__dirname, '..', 'rust-infer', 'target', 'release', 'rust-infer');
|
|
208
|
+
if (!fs_1.default.existsSync(binPath))
|
|
209
|
+
binPath = path_1.default.join(__dirname, '..', 'rust-infer', 'target', 'debug', 'rust-infer');
|
|
210
|
+
console.log(`Starting inference: ${binPath}`);
|
|
211
|
+
const args = [modelCfg.engine, actualModelFile];
|
|
212
|
+
if (parakeetConfigPath)
|
|
213
|
+
args.push(parakeetConfigPath);
|
|
214
|
+
inferProcess = (0, child_process_1.spawn)(binPath, args, { stdio: ['pipe', 'pipe', 'inherit'] });
|
|
160
215
|
inferProcess.stdout.on('data', (data) => {
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
if (line === 'READY') {
|
|
216
|
+
data.toString().split('\n').filter(Boolean).forEach(line => {
|
|
217
|
+
if (line.trim() === 'READY') {
|
|
164
218
|
isReady = true;
|
|
165
|
-
console.log('
|
|
166
|
-
|
|
219
|
+
console.log('--- Model fully loaded and ready ---');
|
|
220
|
+
return;
|
|
167
221
|
}
|
|
168
222
|
try {
|
|
169
223
|
const parsed = JSON.parse(line);
|
|
170
|
-
const
|
|
171
|
-
if (
|
|
172
|
-
const firstKey = Object.keys(resolvers)[0];
|
|
224
|
+
const firstKey = Object.keys(resolvers)[0];
|
|
225
|
+
if (firstKey) {
|
|
173
226
|
resolvers[firstKey](parsed);
|
|
227
|
+
delete resolvers[firstKey];
|
|
174
228
|
}
|
|
175
229
|
}
|
|
176
|
-
catch
|
|
177
|
-
|
|
178
|
-
}
|
|
179
|
-
}
|
|
180
|
-
});
|
|
181
|
-
inferProcess.on('exit', (code) => {
|
|
182
|
-
console.log(`Inference worker exited with code ${code}`);
|
|
183
|
-
process.exit(code || 1);
|
|
230
|
+
catch { }
|
|
231
|
+
});
|
|
184
232
|
});
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
process.exit(1);
|
|
188
|
-
});
|
|
189
|
-
// ββ Request queue βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
233
|
+
inferProcess.on('exit', (code) => process.exit(code || 1));
|
|
234
|
+
}).catch(e => { console.error(e); process.exit(1); });
|
|
190
235
|
const requestQueue = [];
|
|
191
236
|
let isProcessing = false;
|
|
192
237
|
function processQueue() {
|
|
@@ -194,42 +239,24 @@ function processQueue() {
|
|
|
194
239
|
return;
|
|
195
240
|
isProcessing = true;
|
|
196
241
|
const req = requestQueue.shift();
|
|
197
|
-
console.log(` [Queue] Processing request #${req.reqId} (queue length: ${requestQueue.length})`);
|
|
198
242
|
resolvers[req.file] = (result) => {
|
|
199
|
-
delete resolvers[req.file];
|
|
200
243
|
isProcessing = false;
|
|
201
|
-
if (fs_1.default.existsSync(req.file))
|
|
244
|
+
if (fs_1.default.existsSync(req.file))
|
|
202
245
|
fs_1.default.unlinkSync(req.file);
|
|
203
|
-
}
|
|
204
246
|
req.resolve(result);
|
|
205
|
-
|
|
247
|
+
processQueue();
|
|
206
248
|
};
|
|
207
249
|
inferProcess.stdin.write(req.file + '\n');
|
|
208
250
|
}
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
if (!req.body || !Buffer.isBuffer(req.body)) {
|
|
217
|
-
console.log(` [#${reqId}] Rejected: invalid audio body`);
|
|
218
|
-
return res.status(400).json({ error: 'Invalid audio body. Send raw WAV bytes with Content-Type: audio/wav' });
|
|
219
|
-
}
|
|
220
|
-
const audioSize = req.body.length;
|
|
221
|
-
console.log(` [#${reqId}] Audio received: ${formatBytes(audioSize)}`);
|
|
222
|
-
const tempFilePath = path_1.default.join(uploadDir, `upload-${Date.now()}-${Math.random().toString(36).substring(7)}.wav`);
|
|
223
|
-
fs_1.default.writeFileSync(tempFilePath, req.body);
|
|
224
|
-
console.log(` [#${reqId}] Queued for inference (queue length: ${requestQueue.length})`);
|
|
225
|
-
const result = await new Promise((resolve) => {
|
|
226
|
-
requestQueue.push({ file: tempFilePath, resolve, reqId });
|
|
251
|
+
app.post('/transcribe', express_1.default.raw({ type: 'audio/wav', limit: '100mb' }), async (req, res) => {
|
|
252
|
+
if (!isReady)
|
|
253
|
+
return res.status(503).json({ error: 'Starting up' });
|
|
254
|
+
const tempFile = path_1.default.join(uploadDir, `up-${Date.now()}.wav`);
|
|
255
|
+
fs_1.default.writeFileSync(tempFile, req.body);
|
|
256
|
+
const result = await new Promise(r => {
|
|
257
|
+
requestQueue.push({ file: tempFile, resolve: r, reqId: req._reqId });
|
|
227
258
|
processQueue();
|
|
228
259
|
});
|
|
229
260
|
res.json(result);
|
|
230
261
|
});
|
|
231
|
-
|
|
232
|
-
app.listen(port, () => {
|
|
233
|
-
console.log(`\nHandy Remote Server is running on port ${port}`);
|
|
234
|
-
console.log(`Waiting for requests...\n`);
|
|
235
|
-
});
|
|
262
|
+
app.listen(port, () => console.log(`\nHandy Server on port ${port} | API Key: ${API_KEY}`));
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "handy-remote-server",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.3.0",
|
|
4
4
|
"description": "Remote Transcription Server for Handy",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -34,9 +34,13 @@
|
|
|
34
34
|
},
|
|
35
35
|
"devDependencies": {
|
|
36
36
|
"@types/express": "^5.0.6",
|
|
37
|
+
"@types/gunzip-maybe": "^1.4.3",
|
|
37
38
|
"@types/multer": "^2.1.0",
|
|
38
39
|
"@types/node": "^25.3.5",
|
|
40
|
+
"@types/tar-fs": "^2.0.4",
|
|
41
|
+
"gunzip-maybe": "^1.4.2",
|
|
42
|
+
"tar-fs": "^3.1.2",
|
|
39
43
|
"ts-node": "^10.9.2",
|
|
40
44
|
"typescript": "^5.9.3"
|
|
41
45
|
}
|
|
42
|
-
}
|
|
46
|
+
}
|
package/rust-infer/src/main.rs
CHANGED
|
@@ -1,35 +1,91 @@
|
|
|
1
1
|
use std::io::{self, BufRead, Write};
|
|
2
|
-
use std::path::PathBuf;
|
|
3
|
-
use transcribe_rs::
|
|
2
|
+
use std::path::{Path, PathBuf};
|
|
3
|
+
use transcribe_rs::TranscriptionEngine;
|
|
4
|
+
use transcribe_rs::engines::{
|
|
5
|
+
gigaam::GigaAMEngine,
|
|
6
|
+
whisper::WhisperEngine,
|
|
7
|
+
moonshine::{MoonshineEngine, MoonshineModelParams, ModelVariant},
|
|
8
|
+
parakeet::{ParakeetEngine, ParakeetModelParams},
|
|
9
|
+
sense_voice::{SenseVoiceEngine, SenseVoiceModelParams},
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
enum EngineWrapper {
|
|
13
|
+
GigaAM(GigaAMEngine),
|
|
14
|
+
Whisper(WhisperEngine),
|
|
15
|
+
Moonshine(MoonshineEngine),
|
|
16
|
+
Parakeet(ParakeetEngine),
|
|
17
|
+
SenseVoice(SenseVoiceEngine),
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
impl EngineWrapper {
|
|
21
|
+
fn transcribe_samples(&mut self, audio: Vec<f32>) -> Result<transcribe_rs::TranscriptionResult, Box<dyn std::error::Error>> {
|
|
22
|
+
match self {
|
|
23
|
+
EngineWrapper::GigaAM(e) => e.transcribe_samples(audio, None).map_err(|e| e.into()),
|
|
24
|
+
EngineWrapper::Whisper(e) => e.transcribe_samples(audio, None).map_err(|e| e.into()),
|
|
25
|
+
EngineWrapper::Moonshine(e) => e.transcribe_samples(audio, None).map_err(|e| e.into()),
|
|
26
|
+
EngineWrapper::Parakeet(e) => e.transcribe_samples(audio, None).map_err(|e| e.into()),
|
|
27
|
+
EngineWrapper::SenseVoice(e) => e.transcribe_samples(audio, None).map_err(|e| e.into()),
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
4
31
|
|
|
5
32
|
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
6
|
-
// We get model and config path from args or default
|
|
7
33
|
let args: Vec<String> = std::env::args().collect();
|
|
8
34
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
d.push("models");
|
|
14
|
-
d.push("gigaam.onnx");
|
|
15
|
-
d.to_string_lossy().to_string()
|
|
16
|
-
};
|
|
17
|
-
|
|
18
|
-
// Auto-download logic or print error if missing
|
|
19
|
-
if !PathBuf::from(&model_path).exists() {
|
|
20
|
-
eprintln!("Model file not found: {}. Please ensure model file exists.", model_path);
|
|
35
|
+
// Usage: rust-infer <engine_type> <model_path>
|
|
36
|
+
if args.len() < 3 {
|
|
37
|
+
eprintln!("Usage: rust-infer <engine_type> <model_path>");
|
|
38
|
+
eprintln!("Engines: gigaam, whisper, moonshine, parakeet, sensevoice");
|
|
21
39
|
std::process::exit(1);
|
|
22
40
|
}
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
41
|
+
|
|
42
|
+
let engine_type = args[1].to_lowercase();
|
|
43
|
+
let model_path = &args[2];
|
|
44
|
+
|
|
45
|
+
if !PathBuf::from(model_path).exists() {
|
|
46
|
+
eprintln!("Model file not found: {}", model_path);
|
|
47
|
+
std::process::exit(1);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
eprintln!("Loading {} engine with model {}...", engine_type, model_path);
|
|
51
|
+
|
|
52
|
+
let mut engine = match engine_type.as_str() {
|
|
53
|
+
"gigaam" => {
|
|
54
|
+
let mut e = GigaAMEngine::new();
|
|
55
|
+
e.load_model(Path::new(model_path))?;
|
|
56
|
+
EngineWrapper::GigaAM(e)
|
|
57
|
+
}
|
|
58
|
+
"whisper" => {
|
|
59
|
+
let mut e = WhisperEngine::new();
|
|
60
|
+
e.load_model(Path::new(model_path))?;
|
|
61
|
+
EngineWrapper::Whisper(e)
|
|
62
|
+
}
|
|
63
|
+
"moonshine" => {
|
|
64
|
+
let mut e = MoonshineEngine::new();
|
|
65
|
+
// Use Base as default for remote
|
|
66
|
+
e.load_model_with_params(Path::new(model_path), MoonshineModelParams::variant(ModelVariant::Base))?;
|
|
67
|
+
EngineWrapper::Moonshine(e)
|
|
68
|
+
}
|
|
69
|
+
"parakeet" => {
|
|
70
|
+
let mut e = ParakeetEngine::new();
|
|
71
|
+
e.load_model_with_params(Path::new(model_path), ParakeetModelParams::int8())?;
|
|
72
|
+
EngineWrapper::Parakeet(e)
|
|
73
|
+
}
|
|
74
|
+
"sensevoice" => {
|
|
75
|
+
let mut e = SenseVoiceEngine::new();
|
|
76
|
+
e.load_model_with_params(Path::new(model_path), SenseVoiceModelParams::int8())?;
|
|
77
|
+
EngineWrapper::SenseVoice(e)
|
|
78
|
+
}
|
|
79
|
+
_ => {
|
|
80
|
+
eprintln!("Unknown engine type: {}", engine_type);
|
|
81
|
+
std::process::exit(1);
|
|
82
|
+
}
|
|
83
|
+
};
|
|
84
|
+
|
|
29
85
|
eprintln!("Model loaded. Ready to transcribe.");
|
|
30
|
-
println!("READY");
|
|
86
|
+
println!("READY");
|
|
31
87
|
io::stdout().flush()?;
|
|
32
|
-
|
|
88
|
+
|
|
33
89
|
let stdin = io::stdin();
|
|
34
90
|
for line in stdin.lock().lines() {
|
|
35
91
|
let line = line?;
|
|
@@ -37,64 +93,47 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
|
37
93
|
if line.is_empty() {
|
|
38
94
|
continue;
|
|
39
95
|
}
|
|
40
|
-
|
|
41
96
|
if line == "EXIT" {
|
|
42
97
|
break;
|
|
43
98
|
}
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
let wav_path = line;
|
|
47
|
-
|
|
48
|
-
// Read the file and convert to f32
|
|
49
|
-
match read_wav(wav_path) {
|
|
99
|
+
|
|
100
|
+
match read_wav(line) {
|
|
50
101
|
Ok(samples) => {
|
|
51
|
-
match engine.transcribe_samples(samples
|
|
102
|
+
match engine.transcribe_samples(samples) {
|
|
52
103
|
Ok(result) => {
|
|
53
|
-
let json = serde_json::json!({
|
|
54
|
-
"status": "success",
|
|
55
|
-
"text": result.text
|
|
56
|
-
});
|
|
104
|
+
let json = serde_json::json!({ "status": "success", "text": result.text });
|
|
57
105
|
println!("{}", json.to_string());
|
|
58
|
-
}
|
|
106
|
+
}
|
|
59
107
|
Err(e) => {
|
|
60
|
-
let json = serde_json::json!({
|
|
61
|
-
"status": "error",
|
|
62
|
-
"error": format!("Transcription failed: {}", e)
|
|
63
|
-
});
|
|
108
|
+
let json = serde_json::json!({ "status": "error", "error": format!("Transcription failed: {}", e) });
|
|
64
109
|
println!("{}", json.to_string());
|
|
65
110
|
}
|
|
66
111
|
}
|
|
67
|
-
}
|
|
112
|
+
}
|
|
68
113
|
Err(e) => {
|
|
69
|
-
let json = serde_json::json!({
|
|
70
|
-
"status": "error",
|
|
71
|
-
"error": format!("Failed to read WAV: {}", e)
|
|
72
|
-
});
|
|
114
|
+
let json = serde_json::json!({ "status": "error", "error": format!("Failed to read WAV: {}", e) });
|
|
73
115
|
println!("{}", json.to_string());
|
|
74
116
|
}
|
|
75
117
|
}
|
|
76
118
|
io::stdout().flush()?;
|
|
77
119
|
}
|
|
78
|
-
|
|
79
120
|
Ok(())
|
|
80
121
|
}
|
|
81
122
|
|
|
82
123
|
fn read_wav(path: &str) -> Result<Vec<f32>, Box<dyn std::error::Error>> {
|
|
83
124
|
let mut reader = hound::WavReader::open(path)?;
|
|
84
125
|
let spec = reader.spec();
|
|
85
|
-
|
|
86
126
|
let mut samples = Vec::new();
|
|
87
127
|
match spec.sample_format {
|
|
88
128
|
hound::SampleFormat::Int => {
|
|
89
129
|
if spec.bits_per_sample == 16 {
|
|
90
130
|
for sample in reader.samples::<i16>() {
|
|
91
|
-
|
|
92
|
-
samples.push(s);
|
|
131
|
+
samples.push(sample? as f32 / i16::MAX as f32);
|
|
93
132
|
}
|
|
94
133
|
} else {
|
|
95
134
|
return Err("Only 16-bit integer WAV is supported".into());
|
|
96
135
|
}
|
|
97
|
-
}
|
|
136
|
+
}
|
|
98
137
|
hound::SampleFormat::Float => {
|
|
99
138
|
if spec.bits_per_sample == 32 {
|
|
100
139
|
for sample in reader.samples::<f32>() {
|
|
@@ -105,17 +144,13 @@ fn read_wav(path: &str) -> Result<Vec<f32>, Box<dyn std::error::Error>> {
|
|
|
105
144
|
}
|
|
106
145
|
}
|
|
107
146
|
}
|
|
108
|
-
|
|
109
|
-
// Multi-channel to mono (simple average)
|
|
110
147
|
if spec.channels > 1 {
|
|
111
148
|
let channels = spec.channels as usize;
|
|
112
149
|
let mut mono = Vec::with_capacity(samples.len() / channels);
|
|
113
150
|
for chunk in samples.chunks(channels) {
|
|
114
|
-
|
|
115
|
-
mono.push(sum / channels as f32);
|
|
151
|
+
mono.push(chunk.iter().sum::<f32>() / channels as f32);
|
|
116
152
|
}
|
|
117
153
|
samples = mono;
|
|
118
154
|
}
|
|
119
|
-
|
|
120
155
|
Ok(samples)
|
|
121
156
|
}
|