handy-remote-server 1.2.0 β†’ 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Handy Remote Server πŸŽ™οΈ
2
2
 
3
- A lightweight standalone inference server for [Handy](https://github.com/cjpais/Handy), allowing you to transcribe audio from external devices, weak computers, and more.
3
+ A lightweight standalone inference server for [Handy](https://github.com/viktor-silakov/Handy), allowing you to transcribe audio from external devices, weak computers, and more.
4
4
 
5
5
  ## Installation
6
6
 
@@ -67,11 +67,47 @@ Handy Remote Server is running on port 3000
67
67
 
68
68
  ## Environment Variables
69
69
 
70
- | Variable | Default | Description |
71
- | ---------------- | ------------------------------------------- | ------------------------------- |
72
- | `PORT` | `3000` | Server port |
73
- | `API_KEY` | auto-generated, saved to `~/.handy/api_key` | Bearer token for authentication |
74
- | `INFER_CLI_PATH` | auto-detected | Path to the `rust-infer` binary |
70
+ | Variable | Default | Description |
71
+ | ---------------- | ------------------------------------------- | -------------------------------------- |
72
+ | `PORT` | `3000` | Server port |
73
+ | `API_KEY` | auto-generated, saved to `~/.handy/api_key` | Bearer token for authentication |
74
+ | `INFER_CLI_PATH` | auto-detected | Path to the `rust-infer` binary |
75
+ | `MODEL_TYPE` | `gigaam` | Transcription model to use (see below) |
76
+
77
+ ## Supported Models
78
+
79
+ ```bash
80
+ # Русский (ΠΏΠΎ ΡƒΠΌΠΎΠ»Ρ‡Π°Π½ΠΈΡŽ)
81
+ MODEL_TYPE=gigaam npx handy-remote-server
82
+
83
+ # ΠœΡƒΠ»ΡŒΡ‚ΠΈΡΠ·Ρ‹Ρ‡Π½Ρ‹ΠΉ (Π²ΠΊΠ»ΡŽΡ‡Π°Ρ русский) β€” Whisper ΠΌΠΎΠ΄Π΅Π»ΠΈ
84
+ MODEL_TYPE=whisper-tiny npx handy-remote-server # 75 MB
85
+ MODEL_TYPE=whisper-base npx handy-remote-server # 142 MB
86
+ MODEL_TYPE=whisper-small npx handy-remote-server # 487 MB
87
+ MODEL_TYPE=whisper-medium npx handy-remote-server # 1.5 GB
88
+
89
+ # Английский β€” Moonshine
90
+ MODEL_TYPE=moonshine-tiny npx handy-remote-server # 60 MB
91
+ MODEL_TYPE=moonshine-base npx handy-remote-server # 100 MB
92
+
93
+ # Английский β€” Breeze/Parakeet
94
+ MODEL_TYPE=parakeet npx handy-remote-server # ~200 MB
95
+
96
+ # ΠœΡƒΠ»ΡŒΡ‚ΠΈΡΠ·Ρ‹Ρ‡Π½Ρ‹ΠΉ β€” SenseVoice
97
+ MODEL_TYPE=sensevoice npx handy-remote-server # ~200 MB
98
+ ```
99
+
100
+ | Model | Language | Size | Speed |
101
+ | ------------------ | -------------- | ------- | --------- |
102
+ | `gigaam` (default) | Russian | ~100 MB | ⚑ Fast |
103
+ | `whisper-tiny` | Multi-language | 75 MB | ⚑ Fast |
104
+ | `whisper-base` | Multi-language | 142 MB | ⚑ Fast |
105
+ | `whisper-small` | Multi-language | 487 MB | πŸ”„ Medium |
106
+ | `whisper-medium` | Multi-language | 1.5 GB | 🐒 Slow |
107
+ | `moonshine-tiny` | English | 60 MB | ⚑ Fast |
108
+ | `moonshine-base` | English | 100 MB | ⚑ Fast |
109
+ | `parakeet` | English | ~200 MB | πŸ”„ Medium |
110
+ | `sensevoice` | Multi-language | ~200 MB | πŸ”„ Medium |
75
111
 
76
112
  ## How It Works
77
113
 
package/dist/index.js CHANGED
@@ -5,13 +5,14 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
5
5
  };
6
6
  Object.defineProperty(exports, "__esModule", { value: true });
7
7
  const express_1 = __importDefault(require("express"));
8
- const multer_1 = __importDefault(require("multer"));
9
8
  const child_process_1 = require("child_process");
10
9
  const crypto_1 = __importDefault(require("crypto"));
11
10
  const path_1 = __importDefault(require("path"));
12
11
  const fs_1 = __importDefault(require("fs"));
13
12
  const os_1 = __importDefault(require("os"));
14
13
  const dotenv_1 = __importDefault(require("dotenv"));
14
+ const tar_fs_1 = __importDefault(require("tar-fs"));
15
+ const gunzip_maybe_1 = __importDefault(require("gunzip-maybe"));
15
16
  dotenv_1.default.config();
16
17
  const app = (0, express_1.default)();
17
18
  const port = process.env.PORT || 3000;
@@ -19,36 +20,21 @@ const port = process.env.PORT || 3000;
19
20
  const handyDir = path_1.default.join(os_1.default.homedir(), '.handy');
20
21
  const keyFilePath = path_1.default.join(handyDir, 'api_key');
21
22
  function loadOrCreateApiKey() {
22
- // 1. Env var takes priority
23
- if (process.env.API_KEY) {
23
+ if (process.env.API_KEY)
24
24
  return process.env.API_KEY;
25
- }
26
- // 2. Try to load from cached file
27
25
  if (fs_1.default.existsSync(keyFilePath)) {
28
26
  const cached = fs_1.default.readFileSync(keyFilePath, 'utf-8').trim();
29
- if (cached.length > 0) {
30
- console.log(`\n======================================================`);
31
- console.log(`Loaded API KEY from ${keyFilePath}`);
32
- console.log(`Your API KEY is: ${cached}`);
33
- console.log(`======================================================\n`);
27
+ if (cached.length > 0)
34
28
  return cached;
35
- }
36
29
  }
37
- // 3. Generate a new one and persist it
38
30
  const newKey = crypto_1.default.randomBytes(32).toString('hex');
39
31
  fs_1.default.mkdirSync(handyDir, { recursive: true });
40
32
  fs_1.default.writeFileSync(keyFilePath, newKey + '\n', { mode: 0o600 });
41
- console.log(`\n======================================================`);
42
- console.log(`Generated a new API KEY (saved to ${keyFilePath})`);
43
- console.log(`Your API KEY is: ${newKey}`);
44
- console.log(`======================================================\n`);
45
33
  return newKey;
46
34
  }
47
35
  const API_KEY = loadOrCreateApiKey();
48
36
  // ── Logging helpers ───────────────────────────────────────────────────
49
- function timestamp() {
50
- return new Date().toISOString();
51
- }
37
+ function timestamp() { return new Date().toISOString(); }
52
38
  function formatBytes(bytes) {
53
39
  if (bytes < 1024)
54
40
  return `${bytes} B`;
@@ -61,94 +47,98 @@ function formatDuration(ms) {
61
47
  return `${ms}ms`;
62
48
  return `${(ms / 1000).toFixed(2)}s`;
63
49
  }
64
- // ── Ensure directories ────────────────────────────────────────────────
65
- const modelsDir = path_1.default.join(__dirname, '..', 'models');
66
- if (!fs_1.default.existsSync(modelsDir)) {
67
- fs_1.default.mkdirSync(modelsDir, { recursive: true });
50
+ const MODEL_REGISTRY = {
51
+ 'gigaam': {
52
+ engine: 'gigaam',
53
+ url: 'https://blob.handy.computer/giga-am-v3.int8.onnx',
54
+ filename: 'gigaam.onnx'
55
+ },
56
+ 'whisper-tiny': {
57
+ engine: 'whisper',
58
+ url: 'https://blob.handy.computer/ggml-tiny.bin',
59
+ filename: 'whisper-tiny.bin'
60
+ },
61
+ 'whisper-base': {
62
+ engine: 'whisper',
63
+ url: 'https://blob.handy.computer/ggml-base.bin',
64
+ filename: 'whisper-base.bin'
65
+ },
66
+ 'whisper-small': {
67
+ engine: 'whisper',
68
+ url: 'https://blob.handy.computer/ggml-small.bin',
69
+ filename: 'whisper-small.bin'
70
+ },
71
+ 'whisper-medium': {
72
+ engine: 'whisper',
73
+ url: 'https://blob.handy.computer/whisper-medium-q4_1.bin',
74
+ filename: 'whisper-medium.bin'
75
+ },
76
+ 'moonshine-tiny': {
77
+ engine: 'moonshine',
78
+ url: 'https://blob.handy.computer/moonshine-tiny-streaming-en.tar.gz',
79
+ filename: 'moonshine-tiny', // Dir name after extraction
80
+ isArchive: true
81
+ },
82
+ 'moonshine-base': {
83
+ engine: 'moonshine',
84
+ url: 'https://blob.handy.computer/moonshine-base.tar.gz',
85
+ filename: 'moonshine-base',
86
+ isArchive: true
87
+ },
88
+ 'parakeet': {
89
+ engine: 'parakeet',
90
+ url: 'https://blob.handy.computer/parakeet-v3-int8.tar.gz',
91
+ filename: 'parakeet-v3',
92
+ isArchive: true,
93
+ configFilename: 'preprocessor.json'
94
+ },
95
+ 'sensevoice': {
96
+ engine: 'sensevoice',
97
+ url: 'https://blob.handy.computer/sense-voice-int8.tar.gz',
98
+ filename: 'sensevoice',
99
+ isArchive: true
100
+ }
101
+ };
102
+ const SELECTED_MODEL_TYPE = (process.env.MODEL_TYPE || 'gigaam').toLowerCase();
103
+ const modelCfg = MODEL_REGISTRY[SELECTED_MODEL_TYPE];
104
+ if (!modelCfg) {
105
+ console.error(`Error: Unknown MODEL_TYPE "${SELECTED_MODEL_TYPE}".`);
106
+ console.error(`Supported types: ${Object.keys(MODEL_REGISTRY).join(', ')}`);
107
+ process.exit(1);
68
108
  }
109
+ // ── Directories ───────────────────────────────────────────────────────
110
+ const modelsBaseDir = path_1.default.join(__dirname, '..', 'models');
69
111
  const uploadDir = path_1.default.join(__dirname, '..', 'uploads');
70
- if (!fs_1.default.existsSync(uploadDir)) {
71
- fs_1.default.mkdirSync(uploadDir, { recursive: true });
72
- }
73
- // ── Request logging middleware ────────────────────────────────────────
74
- let requestCounter = 0;
75
- app.use((req, res, next) => {
76
- const reqId = ++requestCounter;
77
- const start = Date.now();
78
- const ip = req.headers['x-forwarded-for'] || req.socket.remoteAddress || 'unknown';
79
- console.log(`\n[${timestamp()}] ── REQUEST #${reqId} ──────────────────────`);
80
- console.log(` Method: ${req.method} ${req.path}`);
81
- console.log(` From: ${ip}`);
82
- console.log(` Headers: Content-Type=${req.headers['content-type'] || 'N/A'}, Content-Length=${req.headers['content-length'] || 'N/A'}`);
83
- // Store metadata on request for later use
84
- req._reqId = reqId;
85
- req._startTime = start;
86
- req._ip = ip;
87
- const originalJson = res.json.bind(res);
88
- res.json = function (body) {
89
- const duration = Date.now() - start;
90
- const status = res.statusCode;
91
- console.log(`[${timestamp()}] ── RESPONSE #${reqId} ─────────────────────`);
92
- console.log(` Status: ${status}`);
93
- console.log(` Duration: ${formatDuration(duration)}`);
94
- if (body?.text) {
95
- const preview = body.text.length > 100 ? body.text.substring(0, 100) + '...' : body.text;
96
- console.log(` Result: "${preview}"`);
97
- }
98
- else if (body?.error) {
99
- console.log(` Error: ${body.error}`);
100
- }
101
- console.log(` ────────────────────────────────────────────────`);
102
- return originalJson(body);
103
- };
104
- next();
105
- });
106
- // ── Authentication middleware ─────────────────────────────────────────
107
- app.use((req, res, next) => {
108
- const authHeader = req.headers.authorization;
109
- if (!authHeader || !authHeader.startsWith('Bearer ')) {
110
- console.log(` Auth: REJECTED (missing/invalid Authorization header)`);
111
- return res.status(401).json({ error: 'Missing or invalid Authorization header' });
112
- }
113
- const token = authHeader.split(' ')[1];
114
- if (token !== API_KEY) {
115
- console.log(` Auth: REJECTED (invalid key)`);
116
- return res.status(403).json({ error: 'Invalid API Key' });
117
- }
118
- console.log(` Auth: OK`);
119
- next();
120
- });
121
- // ── Multer storage ────────────────────────────────────────────────────
122
- const storage = multer_1.default.diskStorage({
123
- destination: function (req, file, cb) {
124
- cb(null, uploadDir);
125
- },
126
- filename: function (req, file, cb) {
127
- const uniqueSuffix = Date.now() + '-' + Math.round(Math.random() * 1E9);
128
- cb(null, file.fieldname + '-' + uniqueSuffix + '.wav');
112
+ [modelsBaseDir, uploadDir].forEach(d => { if (!fs_1.default.existsSync(d))
113
+ fs_1.default.mkdirSync(d, { recursive: true }); });
114
+ // ── Model paths ───────────────────────────────────────────────────────
115
+ const modelPath = path_1.default.join(modelsBaseDir, modelCfg.filename);
116
+ let actualModelFile = modelPath;
117
+ let parakeetConfigPath = '';
118
+ if (modelCfg.isArchive) {
119
+ // For archives, we look for model.onnx inside the directory
120
+ actualModelFile = path_1.default.join(modelPath, 'model.onnx');
121
+ if (modelCfg.engine === 'parakeet') {
122
+ parakeetConfigPath = path_1.default.join(modelPath, modelCfg.configFilename);
129
123
  }
130
- });
131
- const upload = (0, multer_1.default)({ storage: storage });
132
- // ── Model download ───────────────────────────────────────────────────
133
- const GIGAAM_MODEL_URL = 'https://blob.handy.computer/giga-am-v3.int8.onnx';
134
- const gigaamModelPath = path_1.default.join(modelsDir, 'gigaam.onnx');
135
- async function downloadFile(url, dest) {
136
- if (fs_1.default.existsSync(dest))
124
+ }
125
+ // ── Download & Extract ────────────────────────────────────────────────
126
+ async function downloadAndPrepare() {
127
+ if (fs_1.default.existsSync(actualModelFile))
137
128
  return;
138
- console.log(`\nπŸ“₯ Downloading model...`);
139
- console.log(` URL: ${url}`);
140
- console.log(` Dest: ${dest}\n`);
141
- fs_1.default.mkdirSync(path_1.default.dirname(dest), { recursive: true });
142
- const response = await fetch(url);
129
+ const dest = modelCfg.isArchive ? modelPath + '.tar.gz' : modelPath;
130
+ console.log(`\nπŸ“₯ Downloading model: ${SELECTED_MODEL_TYPE}...`);
131
+ console.log(` URL: ${modelCfg.url}`);
132
+ const response = await fetch(modelCfg.url);
143
133
  if (!response.ok)
144
- throw new Error(`Failed to fetch ${url}: ${response.statusText}`);
134
+ throw new Error(`Failed to fetch: ${response.statusText}`);
145
135
  const totalBytes = parseInt(response.headers.get('content-length') || '0', 10);
146
136
  let downloadedBytes = 0;
147
137
  const startTime = Date.now();
148
138
  const fileStream = fs_1.default.createWriteStream(dest);
149
139
  const reader = response.body?.getReader();
150
140
  if (!reader)
151
- throw new Error('Response body is not readable');
141
+ throw new Error('Body not readable');
152
142
  const barWidth = 40;
153
143
  while (true) {
154
144
  const { done, value } = await reader.read();
@@ -156,69 +146,92 @@ async function downloadFile(url, dest) {
156
146
  break;
157
147
  fileStream.write(Buffer.from(value));
158
148
  downloadedBytes += value.length;
159
- // Draw progress bar
160
149
  const percent = totalBytes > 0 ? downloadedBytes / totalBytes : 0;
161
150
  const filled = Math.round(barWidth * percent);
162
- const empty = barWidth - filled;
163
- const bar = 'β–ˆ'.repeat(filled) + 'β–‘'.repeat(empty);
151
+ const bar = 'β–ˆ'.repeat(filled) + 'β–‘'.repeat(barWidth - filled);
164
152
  const pct = (percent * 100).toFixed(1).padStart(5);
165
- const dl = formatBytes(downloadedBytes);
166
- const tot = totalBytes > 0 ? formatBytes(totalBytes) : '?';
167
- const elapsed = (Date.now() - startTime) / 1000;
168
- const speed = elapsed > 0 ? formatBytes(downloadedBytes / elapsed) + '/s' : '';
169
- process.stdout.write(`\r ${bar} ${pct}% ${dl} / ${tot} ${speed} `);
153
+ const speed = (downloadedBytes / ((Date.now() - startTime) / 1000) / 1024 / 1024).toFixed(1);
154
+ process.stdout.write(`\r ${bar} ${pct}% ${formatBytes(downloadedBytes)} / ${formatBytes(totalBytes)} ${speed} MB/s `);
170
155
  }
171
- await new Promise((resolve, reject) => {
172
- fileStream.end(() => resolve());
173
- fileStream.on('error', reject);
174
- });
175
- const totalTime = ((Date.now() - startTime) / 1000).toFixed(1);
156
+ await new Promise(r => fileStream.end(() => r()));
176
157
  process.stdout.write('\n');
177
- console.log(`\nβœ… Download complete in ${totalTime}s\n`);
178
- }
179
- async function ensureModels() {
180
- await downloadFile(GIGAAM_MODEL_URL, gigaamModelPath);
158
+ if (modelCfg.isArchive) {
159
+ console.log(`πŸ“¦ Extracting archive to ${modelPath}...`);
160
+ fs_1.default.mkdirSync(modelPath, { recursive: true });
161
+ await new Promise((resolve, reject) => {
162
+ fs_1.default.createReadStream(dest)
163
+ .pipe((0, gunzip_maybe_1.default)())
164
+ .pipe(tar_fs_1.default.extract(modelPath))
165
+ .on('finish', resolve)
166
+ .on('error', reject);
167
+ });
168
+ fs_1.default.unlinkSync(dest); // Cleanup
169
+ }
170
+ console.log(`βœ… Ready!\n`);
181
171
  }
172
+ // ── Request logging ───────────────────────────────────────────────────
173
+ let requestCounter = 0;
174
+ app.use((req, res, next) => {
175
+ const reqId = ++requestCounter;
176
+ const start = Date.now();
177
+ const ip = req.headers['x-forwarded-for'] || req.socket.remoteAddress || 'unknown';
178
+ console.log(`\n[${timestamp()}] ── REQUEST #${reqId} ──────────────────────`);
179
+ console.log(` Method: ${req.method} ${req.path}`);
180
+ console.log(` Model: ${SELECTED_MODEL_TYPE}`);
181
+ req._reqId = reqId;
182
+ req._startTime = start;
183
+ const originalJson = res.json.bind(res);
184
+ res.json = function (body) {
185
+ console.log(`[${timestamp()}] ── RESPONSE #${reqId} (Status: ${res.statusCode}, ${Date.now() - start}ms) ─────`);
186
+ if (body?.text)
187
+ console.log(` Result: "${body.text.substring(0, 100)}${body.text.length > 100 ? '...' : ''}"`);
188
+ else if (body?.error)
189
+ console.log(` Error: ${body.error}`);
190
+ return originalJson(body);
191
+ };
192
+ next();
193
+ });
194
+ // ── Auth ──────────────────────────────────────────────────────────────
195
+ app.use((req, res, next) => {
196
+ const authHeader = req.headers.authorization;
197
+ if (!authHeader?.startsWith('Bearer ') || authHeader.split(' ')[1] !== API_KEY) {
198
+ return res.status(401).json({ error: 'Auth failed' });
199
+ }
200
+ next();
201
+ });
202
+ // ── Inference Bridge ──────────────────────────────────────────────────
182
203
  let inferProcess = null;
183
204
  let isReady = false;
184
205
  let resolvers = {};
185
- ensureModels().then(() => {
186
- let inferProcessPath = process.env.INFER_CLI_PATH || path_1.default.join(__dirname, '..', 'rust-infer', 'target', 'release', 'rust-infer');
187
- if (!fs_1.default.existsSync(inferProcessPath)) {
188
- inferProcessPath = path_1.default.join(__dirname, '..', 'rust-infer', 'target', 'debug', 'rust-infer');
189
- }
190
- console.log(`Using inference CLI: ${inferProcessPath}`);
191
- inferProcess = (0, child_process_1.spawn)(inferProcessPath, [gigaamModelPath], { stdio: ['pipe', 'pipe', 'inherit'] });
206
+ downloadAndPrepare().then(() => {
207
+ let binPath = process.env.INFER_CLI_PATH || path_1.default.join(__dirname, '..', 'rust-infer', 'target', 'release', 'rust-infer');
208
+ if (!fs_1.default.existsSync(binPath))
209
+ binPath = path_1.default.join(__dirname, '..', 'rust-infer', 'target', 'debug', 'rust-infer');
210
+ console.log(`Starting inference: ${binPath}`);
211
+ const args = [modelCfg.engine, actualModelFile];
212
+ if (parakeetConfigPath)
213
+ args.push(parakeetConfigPath);
214
+ inferProcess = (0, child_process_1.spawn)(binPath, args, { stdio: ['pipe', 'pipe', 'inherit'] });
192
215
  inferProcess.stdout.on('data', (data) => {
193
- const lines = data.toString().split('\n').map((l) => l.trim()).filter(Boolean);
194
- for (const line of lines) {
195
- if (line === 'READY') {
216
+ data.toString().split('\n').filter(Boolean).forEach(line => {
217
+ if (line.trim() === 'READY') {
196
218
  isReady = true;
197
- console.log('Inference worker is ready.');
198
- continue;
219
+ console.log('--- Model fully loaded and ready ---');
220
+ return;
199
221
  }
200
222
  try {
201
223
  const parsed = JSON.parse(line);
202
- const resolverCount = Object.keys(resolvers).length;
203
- if (resolverCount > 0) {
204
- const firstKey = Object.keys(resolvers)[0];
224
+ const firstKey = Object.keys(resolvers)[0];
225
+ if (firstKey) {
205
226
  resolvers[firstKey](parsed);
227
+ delete resolvers[firstKey];
206
228
  }
207
229
  }
208
- catch (e) {
209
- console.log('Got non-JSON output from worker:', line);
210
- }
211
- }
212
- });
213
- inferProcess.on('exit', (code) => {
214
- console.log(`Inference worker exited with code ${code}`);
215
- process.exit(code || 1);
230
+ catch { }
231
+ });
216
232
  });
217
- }).catch(e => {
218
- console.error('Failed to download models:', e);
219
- process.exit(1);
220
- });
221
- // ── Request queue ─────────────────────────────────────────────────────
233
+ inferProcess.on('exit', (code) => process.exit(code || 1));
234
+ }).catch(e => { console.error(e); process.exit(1); });
222
235
  const requestQueue = [];
223
236
  let isProcessing = false;
224
237
  function processQueue() {
@@ -226,42 +239,24 @@ function processQueue() {
226
239
  return;
227
240
  isProcessing = true;
228
241
  const req = requestQueue.shift();
229
- console.log(` [Queue] Processing request #${req.reqId} (queue length: ${requestQueue.length})`);
230
242
  resolvers[req.file] = (result) => {
231
- delete resolvers[req.file];
232
243
  isProcessing = false;
233
- if (fs_1.default.existsSync(req.file)) {
244
+ if (fs_1.default.existsSync(req.file))
234
245
  fs_1.default.unlinkSync(req.file);
235
- }
236
246
  req.resolve(result);
237
- process.nextTick(processQueue);
247
+ processQueue();
238
248
  };
239
249
  inferProcess.stdin.write(req.file + '\n');
240
250
  }
241
- // ── Transcription endpoint ────────────────────────────────────────────
242
- app.post('/transcribe', express_1.default.raw({ type: 'audio/wav', limit: '50mb' }), async (req, res) => {
243
- const reqId = req._reqId || 0;
244
- if (!isReady) {
245
- console.log(` [#${reqId}] Rejected: models still loading`);
246
- return res.status(503).json({ error: 'Models are still loading' });
247
- }
248
- if (!req.body || !Buffer.isBuffer(req.body)) {
249
- console.log(` [#${reqId}] Rejected: invalid audio body`);
250
- return res.status(400).json({ error: 'Invalid audio body. Send raw WAV bytes with Content-Type: audio/wav' });
251
- }
252
- const audioSize = req.body.length;
253
- console.log(` [#${reqId}] Audio received: ${formatBytes(audioSize)}`);
254
- const tempFilePath = path_1.default.join(uploadDir, `upload-${Date.now()}-${Math.random().toString(36).substring(7)}.wav`);
255
- fs_1.default.writeFileSync(tempFilePath, req.body);
256
- console.log(` [#${reqId}] Queued for inference (queue length: ${requestQueue.length})`);
257
- const result = await new Promise((resolve) => {
258
- requestQueue.push({ file: tempFilePath, resolve, reqId });
251
+ app.post('/transcribe', express_1.default.raw({ type: 'audio/wav', limit: '100mb' }), async (req, res) => {
252
+ if (!isReady)
253
+ return res.status(503).json({ error: 'Starting up' });
254
+ const tempFile = path_1.default.join(uploadDir, `up-${Date.now()}.wav`);
255
+ fs_1.default.writeFileSync(tempFile, req.body);
256
+ const result = await new Promise(r => {
257
+ requestQueue.push({ file: tempFile, resolve: r, reqId: req._reqId });
259
258
  processQueue();
260
259
  });
261
260
  res.json(result);
262
261
  });
263
- // ── Start server ──────────────────────────────────────────────────────
264
- app.listen(port, () => {
265
- console.log(`\nHandy Remote Server is running on port ${port}`);
266
- console.log(`Waiting for requests...\n`);
267
- });
262
+ app.listen(port, () => console.log(`\nHandy Server on port ${port} | API Key: ${API_KEY}`));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "handy-remote-server",
3
- "version": "1.2.0",
3
+ "version": "1.3.0",
4
4
  "description": "Remote Transcription Server for Handy",
5
5
  "main": "dist/index.js",
6
6
  "bin": {
@@ -34,9 +34,13 @@
34
34
  },
35
35
  "devDependencies": {
36
36
  "@types/express": "^5.0.6",
37
+ "@types/gunzip-maybe": "^1.4.3",
37
38
  "@types/multer": "^2.1.0",
38
39
  "@types/node": "^25.3.5",
40
+ "@types/tar-fs": "^2.0.4",
41
+ "gunzip-maybe": "^1.4.2",
42
+ "tar-fs": "^3.1.2",
39
43
  "ts-node": "^10.9.2",
40
44
  "typescript": "^5.9.3"
41
45
  }
42
- }
46
+ }
@@ -1,35 +1,91 @@
1
1
  use std::io::{self, BufRead, Write};
2
- use std::path::PathBuf;
3
- use transcribe_rs::{engines::gigaam::GigaAMEngine, TranscriptionEngine};
2
+ use std::path::{Path, PathBuf};
3
+ use transcribe_rs::TranscriptionEngine;
4
+ use transcribe_rs::engines::{
5
+ gigaam::GigaAMEngine,
6
+ whisper::WhisperEngine,
7
+ moonshine::{MoonshineEngine, MoonshineModelParams, ModelVariant},
8
+ parakeet::{ParakeetEngine, ParakeetModelParams},
9
+ sense_voice::{SenseVoiceEngine, SenseVoiceModelParams},
10
+ };
11
+
12
+ enum EngineWrapper {
13
+ GigaAM(GigaAMEngine),
14
+ Whisper(WhisperEngine),
15
+ Moonshine(MoonshineEngine),
16
+ Parakeet(ParakeetEngine),
17
+ SenseVoice(SenseVoiceEngine),
18
+ }
19
+
20
+ impl EngineWrapper {
21
+ fn transcribe_samples(&mut self, audio: Vec<f32>) -> Result<transcribe_rs::TranscriptionResult, Box<dyn std::error::Error>> {
22
+ match self {
23
+ EngineWrapper::GigaAM(e) => e.transcribe_samples(audio, None).map_err(|e| e.into()),
24
+ EngineWrapper::Whisper(e) => e.transcribe_samples(audio, None).map_err(|e| e.into()),
25
+ EngineWrapper::Moonshine(e) => e.transcribe_samples(audio, None).map_err(|e| e.into()),
26
+ EngineWrapper::Parakeet(e) => e.transcribe_samples(audio, None).map_err(|e| e.into()),
27
+ EngineWrapper::SenseVoice(e) => e.transcribe_samples(audio, None).map_err(|e| e.into()),
28
+ }
29
+ }
30
+ }
4
31
 
5
32
  fn main() -> Result<(), Box<dyn std::error::Error>> {
6
- // We get model and config path from args or default
7
33
  let args: Vec<String> = std::env::args().collect();
8
34
 
9
- let model_path = if args.len() > 1 {
10
- args[1].clone()
11
- } else {
12
- let mut d = std::env::current_dir()?;
13
- d.push("models");
14
- d.push("gigaam.onnx");
15
- d.to_string_lossy().to_string()
16
- };
17
-
18
- // Auto-download logic or print error if missing
19
- if !PathBuf::from(&model_path).exists() {
20
- eprintln!("Model file not found: {}. Please ensure model file exists.", model_path);
35
+ // Usage: rust-infer <engine_type> <model_path>
36
+ if args.len() < 3 {
37
+ eprintln!("Usage: rust-infer <engine_type> <model_path>");
38
+ eprintln!("Engines: gigaam, whisper, moonshine, parakeet, sensevoice");
21
39
  std::process::exit(1);
22
40
  }
23
-
24
- eprintln!("Loading GigaAM model from {}...", model_path);
25
-
26
- let mut engine = GigaAMEngine::new();
27
- engine.load_model(std::path::Path::new(&model_path)).map_err(|e| format!("Failed to load GigaAM model: {}", e))?;
28
-
41
+
42
+ let engine_type = args[1].to_lowercase();
43
+ let model_path = &args[2];
44
+
45
+ if !PathBuf::from(model_path).exists() {
46
+ eprintln!("Model file not found: {}", model_path);
47
+ std::process::exit(1);
48
+ }
49
+
50
+ eprintln!("Loading {} engine with model {}...", engine_type, model_path);
51
+
52
+ let mut engine = match engine_type.as_str() {
53
+ "gigaam" => {
54
+ let mut e = GigaAMEngine::new();
55
+ e.load_model(Path::new(model_path))?;
56
+ EngineWrapper::GigaAM(e)
57
+ }
58
+ "whisper" => {
59
+ let mut e = WhisperEngine::new();
60
+ e.load_model(Path::new(model_path))?;
61
+ EngineWrapper::Whisper(e)
62
+ }
63
+ "moonshine" => {
64
+ let mut e = MoonshineEngine::new();
65
+ // Use Base as default for remote
66
+ e.load_model_with_params(Path::new(model_path), MoonshineModelParams::variant(ModelVariant::Base))?;
67
+ EngineWrapper::Moonshine(e)
68
+ }
69
+ "parakeet" => {
70
+ let mut e = ParakeetEngine::new();
71
+ e.load_model_with_params(Path::new(model_path), ParakeetModelParams::int8())?;
72
+ EngineWrapper::Parakeet(e)
73
+ }
74
+ "sensevoice" => {
75
+ let mut e = SenseVoiceEngine::new();
76
+ e.load_model_with_params(Path::new(model_path), SenseVoiceModelParams::int8())?;
77
+ EngineWrapper::SenseVoice(e)
78
+ }
79
+ _ => {
80
+ eprintln!("Unknown engine type: {}", engine_type);
81
+ std::process::exit(1);
82
+ }
83
+ };
84
+
29
85
  eprintln!("Model loaded. Ready to transcribe.");
30
- println!("READY"); // Signal to Node.js that we are ready
86
+ println!("READY");
31
87
  io::stdout().flush()?;
32
-
88
+
33
89
  let stdin = io::stdin();
34
90
  for line in stdin.lock().lines() {
35
91
  let line = line?;
@@ -37,64 +93,47 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
37
93
  if line.is_empty() {
38
94
  continue;
39
95
  }
40
-
41
96
  if line == "EXIT" {
42
97
  break;
43
98
  }
44
-
45
- // Line format: "file_path"
46
- let wav_path = line;
47
-
48
- // Read the file and convert to f32
49
- match read_wav(wav_path) {
99
+
100
+ match read_wav(line) {
50
101
  Ok(samples) => {
51
- match engine.transcribe_samples(samples, None) {
102
+ match engine.transcribe_samples(samples) {
52
103
  Ok(result) => {
53
- let json = serde_json::json!({
54
- "status": "success",
55
- "text": result.text
56
- });
104
+ let json = serde_json::json!({ "status": "success", "text": result.text });
57
105
  println!("{}", json.to_string());
58
- },
106
+ }
59
107
  Err(e) => {
60
- let json = serde_json::json!({
61
- "status": "error",
62
- "error": format!("Transcription failed: {}", e)
63
- });
108
+ let json = serde_json::json!({ "status": "error", "error": format!("Transcription failed: {}", e) });
64
109
  println!("{}", json.to_string());
65
110
  }
66
111
  }
67
- },
112
+ }
68
113
  Err(e) => {
69
- let json = serde_json::json!({
70
- "status": "error",
71
- "error": format!("Failed to read WAV: {}", e)
72
- });
114
+ let json = serde_json::json!({ "status": "error", "error": format!("Failed to read WAV: {}", e) });
73
115
  println!("{}", json.to_string());
74
116
  }
75
117
  }
76
118
  io::stdout().flush()?;
77
119
  }
78
-
79
120
  Ok(())
80
121
  }
81
122
 
82
123
  fn read_wav(path: &str) -> Result<Vec<f32>, Box<dyn std::error::Error>> {
83
124
  let mut reader = hound::WavReader::open(path)?;
84
125
  let spec = reader.spec();
85
-
86
126
  let mut samples = Vec::new();
87
127
  match spec.sample_format {
88
128
  hound::SampleFormat::Int => {
89
129
  if spec.bits_per_sample == 16 {
90
130
  for sample in reader.samples::<i16>() {
91
- let s = sample? as f32 / i16::MAX as f32;
92
- samples.push(s);
131
+ samples.push(sample? as f32 / i16::MAX as f32);
93
132
  }
94
133
  } else {
95
134
  return Err("Only 16-bit integer WAV is supported".into());
96
135
  }
97
- },
136
+ }
98
137
  hound::SampleFormat::Float => {
99
138
  if spec.bits_per_sample == 32 {
100
139
  for sample in reader.samples::<f32>() {
@@ -105,17 +144,13 @@ fn read_wav(path: &str) -> Result<Vec<f32>, Box<dyn std::error::Error>> {
105
144
  }
106
145
  }
107
146
  }
108
-
109
- // Multi-channel to mono (simple average)
110
147
  if spec.channels > 1 {
111
148
  let channels = spec.channels as usize;
112
149
  let mut mono = Vec::with_capacity(samples.len() / channels);
113
150
  for chunk in samples.chunks(channels) {
114
- let sum: f32 = chunk.iter().sum();
115
- mono.push(sum / channels as f32);
151
+ mono.push(chunk.iter().sum::<f32>() / channels as f32);
116
152
  }
117
153
  samples = mono;
118
154
  }
119
-
120
155
  Ok(samples)
121
156
  }