handy-remote-server 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,39 @@
1
+ # Handy Remote Server 🎙️
2
+
3
+ A lightweight standalone inference server for [Handy](https://github.com/cjpais/Handy), allowing you to transcribe audio from external devices, weak computers, and more.
4
+
5
+ ## Installation
6
+
7
+ The easiest way to run the external inference server is using `npx`:
8
+
9
+ ```bash
10
+ npx handy-remote-server
11
+ ```
12
+
13
+ _(You must have Node.js and npm installed)_
14
+
15
+ ## Usage
16
+
17
+ When you run the server for the first time, it will automatically download the **GigaAM v3** model (Russian-only fast architecture model) if it's not present.
18
+
19
+ It will also generate a unique Bearer API Token for your active session:
20
+
21
+ ```bash
22
+ Your API KEY is: xxxxx-xxxxx-xxxxx-xxxxx
23
+ Handy Remote Server is running on port 3000
24
+ ```
25
+
26
+ 1. Open **Handy** on your client machine.
27
+ 2. Go to **Settings > General**, select the `Remote` engine.
28
+ 3. Provide the Server URL: `http://<your-server-ip>:3000`
29
+ 4. Provide the generated Token.
30
+ 5. All audio chunks will now be transcribed by the server!
31
+
32
+ ## How it works
33
+
34
+ The `handy-remote-server` spins up a tiny Express server alongside a heavily optimized Rust CLI (`rust-infer`) powered by `transcribe-rs`. Audio files are dispatched sequentially from the Node server directly into the Rust engine.
35
+
36
+ ### Environment variables
37
+
38
+ - `PORT` - defaults to `3000`
39
+ - `API_KEY` - defaults to an auto-generated token in development. Set this to a permanent token for production.
package/dist/index.js ADDED
@@ -0,0 +1,159 @@
1
+ #!/usr/bin/env node
2
+ "use strict";
3
+ var __importDefault = (this && this.__importDefault) || function (mod) {
4
+ return (mod && mod.__esModule) ? mod : { "default": mod };
5
+ };
6
+ Object.defineProperty(exports, "__esModule", { value: true });
7
+ const express_1 = __importDefault(require("express"));
8
+ const multer_1 = __importDefault(require("multer"));
9
+ const child_process_1 = require("child_process");
10
+ const crypto_1 = __importDefault(require("crypto"));
11
+ const path_1 = __importDefault(require("path"));
12
+ const fs_1 = __importDefault(require("fs"));
13
+ const dotenv_1 = __importDefault(require("dotenv"));
14
+ dotenv_1.default.config();
15
+ const app = (0, express_1.default)();
16
+ const port = process.env.PORT || 3000;
17
+ // Set up API KEY
18
+ let API_KEY = process.env.API_KEY;
19
+ if (!API_KEY) {
20
+ API_KEY = crypto_1.default.randomBytes(32).toString('hex');
21
+ console.log(`\n======================================================`);
22
+ console.log(`Server started without API_KEY in environment variables.`);
23
+ console.log(`Generated a new token for this session.`);
24
+ console.log(`Your API KEY is: ${API_KEY}`);
25
+ console.log(`======================================================\n`);
26
+ }
27
+ // Ensure models directory exists
28
+ const modelsDir = path_1.default.join(__dirname, '..', 'models');
29
+ if (!fs_1.default.existsSync(modelsDir)) {
30
+ fs_1.default.mkdirSync(modelsDir, { recursive: true });
31
+ }
32
+ // Authentication middleware
33
+ app.use((req, res, next) => {
34
+ const authHeader = req.headers.authorization;
35
+ if (!authHeader || !authHeader.startsWith('Bearer ')) {
36
+ return res.status(401).json({ error: 'Missing or invalid Authorization header' });
37
+ }
38
+ const token = authHeader.split(' ')[1];
39
+ if (token !== API_KEY) {
40
+ return res.status(403).json({ error: 'Invalid API Key' });
41
+ }
42
+ next();
43
+ });
44
+ // Configure multer to store uploaded files in a temp directory
45
+ const uploadDir = path_1.default.join(__dirname, '..', 'uploads');
46
+ if (!fs_1.default.existsSync(uploadDir)) {
47
+ fs_1.default.mkdirSync(uploadDir, { recursive: true });
48
+ }
49
+ const storage = multer_1.default.diskStorage({
50
+ destination: function (req, file, cb) {
51
+ cb(null, uploadDir);
52
+ },
53
+ filename: function (req, file, cb) {
54
+ const uniqueSuffix = Date.now() + '-' + Math.round(Math.random() * 1E9);
55
+ cb(null, file.fieldname + '-' + uniqueSuffix + '.wav');
56
+ }
57
+ });
58
+ const upload = (0, multer_1.default)({ storage: storage });
59
+ // Model download logic
60
+ const GIGAAM_MODEL_URL = 'https://blob.handy.computer/giga-am-v3.int8.onnx';
61
+ const gigaamModelPath = path_1.default.join(modelsDir, 'gigaam.onnx');
62
+ async function downloadFile(url, dest) {
63
+ if (fs_1.default.existsSync(dest))
64
+ return;
65
+ console.log(`Downloading ${url} to ${dest}...`);
66
+ fs_1.default.mkdirSync(path_1.default.dirname(dest), { recursive: true });
67
+ const response = await fetch(url);
68
+ if (!response.ok)
69
+ throw new Error(`Failed to fetch ${url}: ${response.statusText}`);
70
+ const arrBuffer = await response.arrayBuffer();
71
+ fs_1.default.writeFileSync(dest, Buffer.from(arrBuffer));
72
+ console.log(`Downloaded ${dest}`);
73
+ }
74
+ async function ensureModels() {
75
+ await downloadFile(GIGAAM_MODEL_URL, gigaamModelPath);
76
+ }
77
+ let inferProcess = null;
78
+ let isReady = false;
79
+ let resolvers = {};
80
+ ensureModels().then(() => {
81
+ // Spawn the rust background process
82
+ let inferProcessPath = process.env.INFER_CLI_PATH || path_1.default.join(__dirname, '..', 'rust-infer', 'target', 'release', 'rust-infer');
83
+ if (!fs_1.default.existsSync(inferProcessPath)) {
84
+ inferProcessPath = path_1.default.join(__dirname, '..', 'rust-infer', 'target', 'debug', 'rust-infer');
85
+ }
86
+ console.log(`Using inference CLI: ${inferProcessPath}`);
87
+ inferProcess = (0, child_process_1.spawn)(inferProcessPath, [gigaamModelPath], { stdio: ['pipe', 'pipe', 'inherit'] });
88
+ inferProcess.stdout.on('data', (data) => {
89
+ const lines = data.toString().split('\n').map((l) => l.trim()).filter(Boolean);
90
+ for (const line of lines) {
91
+ if (line === 'READY') {
92
+ isReady = true;
93
+ console.log('Inference worker is ready.');
94
+ continue;
95
+ }
96
+ try {
97
+ const parsed = JSON.parse(line);
98
+ const resolverCount = Object.keys(resolvers).length;
99
+ if (resolverCount > 0) {
100
+ const firstKey = Object.keys(resolvers)[0];
101
+ resolvers[firstKey](parsed);
102
+ }
103
+ }
104
+ catch (e) {
105
+ console.log('Got non-JSON output from worker:', line);
106
+ }
107
+ }
108
+ });
109
+ inferProcess.on('exit', (code) => {
110
+ console.log(`Inference worker exited with code ${code}`);
111
+ process.exit(code || 1);
112
+ });
113
+ }).catch(e => {
114
+ console.error('Failed to download models:', e);
115
+ process.exit(1);
116
+ });
117
+ // Queue for pending transcriptions to send them sequentially to the single worker
118
+ const requestQueue = [];
119
+ let isProcessing = false;
120
+ function processQueue() {
121
+ if (isProcessing || requestQueue.length === 0 || !isReady)
122
+ return;
123
+ isProcessing = true;
124
+ const req = requestQueue.shift();
125
+ // Register resolver
126
+ resolvers[req.file] = (result) => {
127
+ delete resolvers[req.file];
128
+ isProcessing = false;
129
+ // Clean up temp file
130
+ if (fs_1.default.existsSync(req.file)) {
131
+ fs_1.default.unlinkSync(req.file);
132
+ }
133
+ req.resolve(result);
134
+ // Process next
135
+ process.nextTick(processQueue);
136
+ };
137
+ // Send path to worker via stdin
138
+ inferProcess.stdin.write(req.file + '\n');
139
+ }
140
+ // The route
141
+ app.post('/transcribe', express_1.default.raw({ type: 'audio/wav', limit: '50mb' }), async (req, res) => {
142
+ if (!isReady) {
143
+ return res.status(503).json({ error: 'Models are still loading' });
144
+ }
145
+ // If using express.raw, the body is a Buffer
146
+ if (!req.body || !Buffer.isBuffer(req.body)) {
147
+ return res.status(400).json({ error: 'Invalid audio body. Send raw WAV bytes with Content-Type: audio/wav' });
148
+ }
149
+ const tempFilePath = path_1.default.join(uploadDir, `upload-${Date.now()}-${Math.random().toString(36).substring(7)}.wav`);
150
+ fs_1.default.writeFileSync(tempFilePath, req.body);
151
+ const result = await new Promise((resolve) => {
152
+ requestQueue.push({ file: tempFilePath, resolve });
153
+ processQueue();
154
+ });
155
+ res.json(result);
156
+ });
157
+ app.listen(port, () => {
158
+ console.log(`Handy Remote Server is running on port ${port}`);
159
+ });
package/package.json ADDED
@@ -0,0 +1,42 @@
1
+ {
2
+ "name": "handy-remote-server",
3
+ "version": "1.0.0",
4
+ "description": "Remote Transcription Server for Handy",
5
+ "main": "dist/index.js",
6
+ "bin": {
7
+ "handy-remote-server": "./dist/index.js"
8
+ },
9
+ "scripts": {
10
+ "build": "tsc",
11
+ "start": "node dist/index.js",
12
+ "dev": "ts-node src/index.ts",
13
+ "postinstall": "cd rust-infer && cargo build --release"
14
+ },
15
+ "files": [
16
+ "dist",
17
+ "rust-infer/Cargo.toml",
18
+ "rust-infer/src"
19
+ ],
20
+ "keywords": [
21
+ "handy",
22
+ "transcription",
23
+ "stt",
24
+ "remote",
25
+ "server"
26
+ ],
27
+ "author": "",
28
+ "license": "ISC",
29
+ "type": "commonjs",
30
+ "dependencies": {
31
+ "dotenv": "^17.3.1",
32
+ "express": "^5.2.1",
33
+ "multer": "^2.1.1"
34
+ },
35
+ "devDependencies": {
36
+ "@types/express": "^5.0.6",
37
+ "@types/multer": "^2.1.0",
38
+ "@types/node": "^25.3.5",
39
+ "ts-node": "^10.9.2",
40
+ "typescript": "^5.9.3"
41
+ }
42
+ }
@@ -0,0 +1,9 @@
1
+ [package]
2
+ name = "rust-infer"
3
+ version = "0.1.0"
4
+ edition = "2024"
5
+
6
+ [dependencies]
7
+ hound = "3.5.1"
8
+ serde_json = "1.0.149"
9
+ transcribe-rs = { version = "0.2.8", features = ["whisper", "parakeet", "moonshine", "sense_voice", "gigaam"] }
@@ -0,0 +1,121 @@
1
+ use std::io::{self, BufRead, Write};
2
+ use std::path::PathBuf;
3
+ use transcribe_rs::{engines::gigaam::GigaAMEngine, TranscriptionEngine};
4
+
5
+ fn main() -> Result<(), Box<dyn std::error::Error>> {
6
+ // We get model and config path from args or default
7
+ let args: Vec<String> = std::env::args().collect();
8
+
9
+ let model_path = if args.len() > 1 {
10
+ args[1].clone()
11
+ } else {
12
+ let mut d = std::env::current_dir()?;
13
+ d.push("models");
14
+ d.push("gigaam.onnx");
15
+ d.to_string_lossy().to_string()
16
+ };
17
+
18
+ // Auto-download logic or print error if missing
19
+ if !PathBuf::from(&model_path).exists() {
20
+ eprintln!("Model file not found: {}. Please ensure model file exists.", model_path);
21
+ std::process::exit(1);
22
+ }
23
+
24
+ eprintln!("Loading GigaAM model from {}...", model_path);
25
+
26
+ let mut engine = GigaAMEngine::new();
27
+ engine.load_model(std::path::Path::new(&model_path)).map_err(|e| format!("Failed to load GigaAM model: {}", e))?;
28
+
29
+ eprintln!("Model loaded. Ready to transcribe.");
30
+ println!("READY"); // Signal to Node.js that we are ready
31
+ io::stdout().flush()?;
32
+
33
+ let stdin = io::stdin();
34
+ for line in stdin.lock().lines() {
35
+ let line = line?;
36
+ let line = line.trim();
37
+ if line.is_empty() {
38
+ continue;
39
+ }
40
+
41
+ if line == "EXIT" {
42
+ break;
43
+ }
44
+
45
+ // Line format: "file_path"
46
+ let wav_path = line;
47
+
48
+ // Read the file and convert to f32
49
+ match read_wav(wav_path) {
50
+ Ok(samples) => {
51
+ match engine.transcribe_samples(samples, None) {
52
+ Ok(result) => {
53
+ let json = serde_json::json!({
54
+ "status": "success",
55
+ "text": result.text
56
+ });
57
+ println!("{}", json.to_string());
58
+ },
59
+ Err(e) => {
60
+ let json = serde_json::json!({
61
+ "status": "error",
62
+ "error": format!("Transcription failed: {}", e)
63
+ });
64
+ println!("{}", json.to_string());
65
+ }
66
+ }
67
+ },
68
+ Err(e) => {
69
+ let json = serde_json::json!({
70
+ "status": "error",
71
+ "error": format!("Failed to read WAV: {}", e)
72
+ });
73
+ println!("{}", json.to_string());
74
+ }
75
+ }
76
+ io::stdout().flush()?;
77
+ }
78
+
79
+ Ok(())
80
+ }
81
+
82
+ fn read_wav(path: &str) -> Result<Vec<f32>, Box<dyn std::error::Error>> {
83
+ let mut reader = hound::WavReader::open(path)?;
84
+ let spec = reader.spec();
85
+
86
+ let mut samples = Vec::new();
87
+ match spec.sample_format {
88
+ hound::SampleFormat::Int => {
89
+ if spec.bits_per_sample == 16 {
90
+ for sample in reader.samples::<i16>() {
91
+ let s = sample? as f32 / i16::MAX as f32;
92
+ samples.push(s);
93
+ }
94
+ } else {
95
+ return Err("Only 16-bit integer WAV is supported".into());
96
+ }
97
+ },
98
+ hound::SampleFormat::Float => {
99
+ if spec.bits_per_sample == 32 {
100
+ for sample in reader.samples::<f32>() {
101
+ samples.push(sample?);
102
+ }
103
+ } else {
104
+ return Err("Only 32-bit float WAV is supported".into());
105
+ }
106
+ }
107
+ }
108
+
109
+ // Multi-channel to mono (simple average)
110
+ if spec.channels > 1 {
111
+ let channels = spec.channels as usize;
112
+ let mut mono = Vec::with_capacity(samples.len() / channels);
113
+ for chunk in samples.chunks(channels) {
114
+ let sum: f32 = chunk.iter().sum();
115
+ mono.push(sum / channels as f32);
116
+ }
117
+ samples = mono;
118
+ }
119
+
120
+ Ok(samples)
121
+ }