handy-remote-server 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -0
- package/dist/index.js +159 -0
- package/package.json +42 -0
- package/rust-infer/Cargo.toml +9 -0
- package/rust-infer/src/main.rs +121 -0
package/README.md
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Handy Remote Server 🎙️
|
|
2
|
+
|
|
3
|
+
A lightweight standalone inference server for [Handy](https://github.com/cjpais/Handy), allowing you to transcribe audio from external devices, weak computers, and more.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
The easiest way to run the external inference server is using `npx`:
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npx handy-remote-server
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
_(You must have Node.js and npm installed)_
|
|
14
|
+
|
|
15
|
+
## Usage
|
|
16
|
+
|
|
17
|
+
When you run the server for the first time, it will automatically download the **GigaAM v3** model (Russian-only fast architecture model) if it's not present.
|
|
18
|
+
|
|
19
|
+
It will also generate a unique Bearer API Token for your active session:
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
Your API KEY is: xxxxx-xxxxx-xxxxx-xxxxx
|
|
23
|
+
Handy Remote Server is running on port 3000
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
1. Open **Handy** on your client machine.
|
|
27
|
+
2. Go to **Settings > General**, select the `Remote` engine.
|
|
28
|
+
3. Provide the Server URL: `http://<your-server-ip>:3000`
|
|
29
|
+
4. Provide the generated Token.
|
|
30
|
+
5. All audio chunks will now be transcribed by the server!
|
|
31
|
+
|
|
32
|
+
## How it works
|
|
33
|
+
|
|
34
|
+
The `handy-remote-server` spins up a tiny Express server alongside a heavily optimized Rust CLI (`rust-infer`) powered by `transcribe-rs`. Audio files are dispatched sequentially from the Node server directly into the Rust engine.
|
|
35
|
+
|
|
36
|
+
### Environment variables
|
|
37
|
+
|
|
38
|
+
- `PORT` - defaults to `3000`
|
|
39
|
+
- `API_KEY` - defaults to an auto-generated token in development. Set this to a permanent token for production.
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
"use strict";
|
|
3
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
4
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
5
|
+
};
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
const express_1 = __importDefault(require("express"));
|
|
8
|
+
const multer_1 = __importDefault(require("multer"));
|
|
9
|
+
const child_process_1 = require("child_process");
|
|
10
|
+
const crypto_1 = __importDefault(require("crypto"));
|
|
11
|
+
const path_1 = __importDefault(require("path"));
|
|
12
|
+
const fs_1 = __importDefault(require("fs"));
|
|
13
|
+
const dotenv_1 = __importDefault(require("dotenv"));
|
|
14
|
+
dotenv_1.default.config();
|
|
15
|
+
const app = (0, express_1.default)();
|
|
16
|
+
const port = process.env.PORT || 3000;
|
|
17
|
+
// Set up API KEY
|
|
18
|
+
let API_KEY = process.env.API_KEY;
|
|
19
|
+
if (!API_KEY) {
|
|
20
|
+
API_KEY = crypto_1.default.randomBytes(32).toString('hex');
|
|
21
|
+
console.log(`\n======================================================`);
|
|
22
|
+
console.log(`Server started without API_KEY in environment variables.`);
|
|
23
|
+
console.log(`Generated a new token for this session.`);
|
|
24
|
+
console.log(`Your API KEY is: ${API_KEY}`);
|
|
25
|
+
console.log(`======================================================\n`);
|
|
26
|
+
}
|
|
27
|
+
// Ensure models directory exists
|
|
28
|
+
const modelsDir = path_1.default.join(__dirname, '..', 'models');
|
|
29
|
+
if (!fs_1.default.existsSync(modelsDir)) {
|
|
30
|
+
fs_1.default.mkdirSync(modelsDir, { recursive: true });
|
|
31
|
+
}
|
|
32
|
+
// Authentication middleware
|
|
33
|
+
app.use((req, res, next) => {
|
|
34
|
+
const authHeader = req.headers.authorization;
|
|
35
|
+
if (!authHeader || !authHeader.startsWith('Bearer ')) {
|
|
36
|
+
return res.status(401).json({ error: 'Missing or invalid Authorization header' });
|
|
37
|
+
}
|
|
38
|
+
const token = authHeader.split(' ')[1];
|
|
39
|
+
if (token !== API_KEY) {
|
|
40
|
+
return res.status(403).json({ error: 'Invalid API Key' });
|
|
41
|
+
}
|
|
42
|
+
next();
|
|
43
|
+
});
|
|
44
|
+
// Configure multer to store uploaded files in a temp directory
|
|
45
|
+
const uploadDir = path_1.default.join(__dirname, '..', 'uploads');
|
|
46
|
+
if (!fs_1.default.existsSync(uploadDir)) {
|
|
47
|
+
fs_1.default.mkdirSync(uploadDir, { recursive: true });
|
|
48
|
+
}
|
|
49
|
+
const storage = multer_1.default.diskStorage({
|
|
50
|
+
destination: function (req, file, cb) {
|
|
51
|
+
cb(null, uploadDir);
|
|
52
|
+
},
|
|
53
|
+
filename: function (req, file, cb) {
|
|
54
|
+
const uniqueSuffix = Date.now() + '-' + Math.round(Math.random() * 1E9);
|
|
55
|
+
cb(null, file.fieldname + '-' + uniqueSuffix + '.wav');
|
|
56
|
+
}
|
|
57
|
+
});
|
|
58
|
+
const upload = (0, multer_1.default)({ storage: storage });
|
|
59
|
+
// Model download logic
|
|
60
|
+
const GIGAAM_MODEL_URL = 'https://blob.handy.computer/giga-am-v3.int8.onnx';
|
|
61
|
+
const gigaamModelPath = path_1.default.join(modelsDir, 'gigaam.onnx');
|
|
62
|
+
async function downloadFile(url, dest) {
|
|
63
|
+
if (fs_1.default.existsSync(dest))
|
|
64
|
+
return;
|
|
65
|
+
console.log(`Downloading ${url} to ${dest}...`);
|
|
66
|
+
fs_1.default.mkdirSync(path_1.default.dirname(dest), { recursive: true });
|
|
67
|
+
const response = await fetch(url);
|
|
68
|
+
if (!response.ok)
|
|
69
|
+
throw new Error(`Failed to fetch ${url}: ${response.statusText}`);
|
|
70
|
+
const arrBuffer = await response.arrayBuffer();
|
|
71
|
+
fs_1.default.writeFileSync(dest, Buffer.from(arrBuffer));
|
|
72
|
+
console.log(`Downloaded ${dest}`);
|
|
73
|
+
}
|
|
74
|
+
async function ensureModels() {
|
|
75
|
+
await downloadFile(GIGAAM_MODEL_URL, gigaamModelPath);
|
|
76
|
+
}
|
|
77
|
+
let inferProcess = null;
|
|
78
|
+
let isReady = false;
|
|
79
|
+
let resolvers = {};
|
|
80
|
+
ensureModels().then(() => {
|
|
81
|
+
// Spawn the rust background process
|
|
82
|
+
let inferProcessPath = process.env.INFER_CLI_PATH || path_1.default.join(__dirname, '..', 'rust-infer', 'target', 'release', 'rust-infer');
|
|
83
|
+
if (!fs_1.default.existsSync(inferProcessPath)) {
|
|
84
|
+
inferProcessPath = path_1.default.join(__dirname, '..', 'rust-infer', 'target', 'debug', 'rust-infer');
|
|
85
|
+
}
|
|
86
|
+
console.log(`Using inference CLI: ${inferProcessPath}`);
|
|
87
|
+
inferProcess = (0, child_process_1.spawn)(inferProcessPath, [gigaamModelPath], { stdio: ['pipe', 'pipe', 'inherit'] });
|
|
88
|
+
inferProcess.stdout.on('data', (data) => {
|
|
89
|
+
const lines = data.toString().split('\n').map((l) => l.trim()).filter(Boolean);
|
|
90
|
+
for (const line of lines) {
|
|
91
|
+
if (line === 'READY') {
|
|
92
|
+
isReady = true;
|
|
93
|
+
console.log('Inference worker is ready.');
|
|
94
|
+
continue;
|
|
95
|
+
}
|
|
96
|
+
try {
|
|
97
|
+
const parsed = JSON.parse(line);
|
|
98
|
+
const resolverCount = Object.keys(resolvers).length;
|
|
99
|
+
if (resolverCount > 0) {
|
|
100
|
+
const firstKey = Object.keys(resolvers)[0];
|
|
101
|
+
resolvers[firstKey](parsed);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
catch (e) {
|
|
105
|
+
console.log('Got non-JSON output from worker:', line);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
});
|
|
109
|
+
inferProcess.on('exit', (code) => {
|
|
110
|
+
console.log(`Inference worker exited with code ${code}`);
|
|
111
|
+
process.exit(code || 1);
|
|
112
|
+
});
|
|
113
|
+
}).catch(e => {
|
|
114
|
+
console.error('Failed to download models:', e);
|
|
115
|
+
process.exit(1);
|
|
116
|
+
});
|
|
117
|
+
// Queue for pending transcriptions to send them sequentially to the single worker
|
|
118
|
+
const requestQueue = [];
|
|
119
|
+
let isProcessing = false;
|
|
120
|
+
function processQueue() {
|
|
121
|
+
if (isProcessing || requestQueue.length === 0 || !isReady)
|
|
122
|
+
return;
|
|
123
|
+
isProcessing = true;
|
|
124
|
+
const req = requestQueue.shift();
|
|
125
|
+
// Register resolver
|
|
126
|
+
resolvers[req.file] = (result) => {
|
|
127
|
+
delete resolvers[req.file];
|
|
128
|
+
isProcessing = false;
|
|
129
|
+
// Clean up temp file
|
|
130
|
+
if (fs_1.default.existsSync(req.file)) {
|
|
131
|
+
fs_1.default.unlinkSync(req.file);
|
|
132
|
+
}
|
|
133
|
+
req.resolve(result);
|
|
134
|
+
// Process next
|
|
135
|
+
process.nextTick(processQueue);
|
|
136
|
+
};
|
|
137
|
+
// Send path to worker via stdin
|
|
138
|
+
inferProcess.stdin.write(req.file + '\n');
|
|
139
|
+
}
|
|
140
|
+
// The route
|
|
141
|
+
app.post('/transcribe', express_1.default.raw({ type: 'audio/wav', limit: '50mb' }), async (req, res) => {
|
|
142
|
+
if (!isReady) {
|
|
143
|
+
return res.status(503).json({ error: 'Models are still loading' });
|
|
144
|
+
}
|
|
145
|
+
// If using express.raw, the body is a Buffer
|
|
146
|
+
if (!req.body || !Buffer.isBuffer(req.body)) {
|
|
147
|
+
return res.status(400).json({ error: 'Invalid audio body. Send raw WAV bytes with Content-Type: audio/wav' });
|
|
148
|
+
}
|
|
149
|
+
const tempFilePath = path_1.default.join(uploadDir, `upload-${Date.now()}-${Math.random().toString(36).substring(7)}.wav`);
|
|
150
|
+
fs_1.default.writeFileSync(tempFilePath, req.body);
|
|
151
|
+
const result = await new Promise((resolve) => {
|
|
152
|
+
requestQueue.push({ file: tempFilePath, resolve });
|
|
153
|
+
processQueue();
|
|
154
|
+
});
|
|
155
|
+
res.json(result);
|
|
156
|
+
});
|
|
157
|
+
app.listen(port, () => {
|
|
158
|
+
console.log(`Handy Remote Server is running on port ${port}`);
|
|
159
|
+
});
|
package/package.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "handy-remote-server",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Remote Transcription Server for Handy",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"bin": {
|
|
7
|
+
"handy-remote-server": "./dist/index.js"
|
|
8
|
+
},
|
|
9
|
+
"scripts": {
|
|
10
|
+
"build": "tsc",
|
|
11
|
+
"start": "node dist/index.js",
|
|
12
|
+
"dev": "ts-node src/index.ts",
|
|
13
|
+
"postinstall": "cd rust-infer && cargo build --release"
|
|
14
|
+
},
|
|
15
|
+
"files": [
|
|
16
|
+
"dist",
|
|
17
|
+
"rust-infer/Cargo.toml",
|
|
18
|
+
"rust-infer/src"
|
|
19
|
+
],
|
|
20
|
+
"keywords": [
|
|
21
|
+
"handy",
|
|
22
|
+
"transcription",
|
|
23
|
+
"stt",
|
|
24
|
+
"remote",
|
|
25
|
+
"server"
|
|
26
|
+
],
|
|
27
|
+
"author": "",
|
|
28
|
+
"license": "ISC",
|
|
29
|
+
"type": "commonjs",
|
|
30
|
+
"dependencies": {
|
|
31
|
+
"dotenv": "^17.3.1",
|
|
32
|
+
"express": "^5.2.1",
|
|
33
|
+
"multer": "^2.1.1"
|
|
34
|
+
},
|
|
35
|
+
"devDependencies": {
|
|
36
|
+
"@types/express": "^5.0.6",
|
|
37
|
+
"@types/multer": "^2.1.0",
|
|
38
|
+
"@types/node": "^25.3.5",
|
|
39
|
+
"ts-node": "^10.9.2",
|
|
40
|
+
"typescript": "^5.9.3"
|
|
41
|
+
}
|
|
42
|
+
}
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
use std::io::{self, BufRead, Write};
|
|
2
|
+
use std::path::PathBuf;
|
|
3
|
+
use transcribe_rs::{engines::gigaam::GigaAMEngine, TranscriptionEngine};
|
|
4
|
+
|
|
5
|
+
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
6
|
+
// We get model and config path from args or default
|
|
7
|
+
let args: Vec<String> = std::env::args().collect();
|
|
8
|
+
|
|
9
|
+
let model_path = if args.len() > 1 {
|
|
10
|
+
args[1].clone()
|
|
11
|
+
} else {
|
|
12
|
+
let mut d = std::env::current_dir()?;
|
|
13
|
+
d.push("models");
|
|
14
|
+
d.push("gigaam.onnx");
|
|
15
|
+
d.to_string_lossy().to_string()
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
// Auto-download logic or print error if missing
|
|
19
|
+
if !PathBuf::from(&model_path).exists() {
|
|
20
|
+
eprintln!("Model file not found: {}. Please ensure model file exists.", model_path);
|
|
21
|
+
std::process::exit(1);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
eprintln!("Loading GigaAM model from {}...", model_path);
|
|
25
|
+
|
|
26
|
+
let mut engine = GigaAMEngine::new();
|
|
27
|
+
engine.load_model(std::path::Path::new(&model_path)).map_err(|e| format!("Failed to load GigaAM model: {}", e))?;
|
|
28
|
+
|
|
29
|
+
eprintln!("Model loaded. Ready to transcribe.");
|
|
30
|
+
println!("READY"); // Signal to Node.js that we are ready
|
|
31
|
+
io::stdout().flush()?;
|
|
32
|
+
|
|
33
|
+
let stdin = io::stdin();
|
|
34
|
+
for line in stdin.lock().lines() {
|
|
35
|
+
let line = line?;
|
|
36
|
+
let line = line.trim();
|
|
37
|
+
if line.is_empty() {
|
|
38
|
+
continue;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if line == "EXIT" {
|
|
42
|
+
break;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// Line format: "file_path"
|
|
46
|
+
let wav_path = line;
|
|
47
|
+
|
|
48
|
+
// Read the file and convert to f32
|
|
49
|
+
match read_wav(wav_path) {
|
|
50
|
+
Ok(samples) => {
|
|
51
|
+
match engine.transcribe_samples(samples, None) {
|
|
52
|
+
Ok(result) => {
|
|
53
|
+
let json = serde_json::json!({
|
|
54
|
+
"status": "success",
|
|
55
|
+
"text": result.text
|
|
56
|
+
});
|
|
57
|
+
println!("{}", json.to_string());
|
|
58
|
+
},
|
|
59
|
+
Err(e) => {
|
|
60
|
+
let json = serde_json::json!({
|
|
61
|
+
"status": "error",
|
|
62
|
+
"error": format!("Transcription failed: {}", e)
|
|
63
|
+
});
|
|
64
|
+
println!("{}", json.to_string());
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
},
|
|
68
|
+
Err(e) => {
|
|
69
|
+
let json = serde_json::json!({
|
|
70
|
+
"status": "error",
|
|
71
|
+
"error": format!("Failed to read WAV: {}", e)
|
|
72
|
+
});
|
|
73
|
+
println!("{}", json.to_string());
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
io::stdout().flush()?;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
Ok(())
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
fn read_wav(path: &str) -> Result<Vec<f32>, Box<dyn std::error::Error>> {
|
|
83
|
+
let mut reader = hound::WavReader::open(path)?;
|
|
84
|
+
let spec = reader.spec();
|
|
85
|
+
|
|
86
|
+
let mut samples = Vec::new();
|
|
87
|
+
match spec.sample_format {
|
|
88
|
+
hound::SampleFormat::Int => {
|
|
89
|
+
if spec.bits_per_sample == 16 {
|
|
90
|
+
for sample in reader.samples::<i16>() {
|
|
91
|
+
let s = sample? as f32 / i16::MAX as f32;
|
|
92
|
+
samples.push(s);
|
|
93
|
+
}
|
|
94
|
+
} else {
|
|
95
|
+
return Err("Only 16-bit integer WAV is supported".into());
|
|
96
|
+
}
|
|
97
|
+
},
|
|
98
|
+
hound::SampleFormat::Float => {
|
|
99
|
+
if spec.bits_per_sample == 32 {
|
|
100
|
+
for sample in reader.samples::<f32>() {
|
|
101
|
+
samples.push(sample?);
|
|
102
|
+
}
|
|
103
|
+
} else {
|
|
104
|
+
return Err("Only 32-bit float WAV is supported".into());
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Multi-channel to mono (simple average)
|
|
110
|
+
if spec.channels > 1 {
|
|
111
|
+
let channels = spec.channels as usize;
|
|
112
|
+
let mut mono = Vec::with_capacity(samples.len() / channels);
|
|
113
|
+
for chunk in samples.chunks(channels) {
|
|
114
|
+
let sum: f32 = chunk.iter().sum();
|
|
115
|
+
mono.push(sum / channels as f32);
|
|
116
|
+
}
|
|
117
|
+
samples = mono;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
Ok(samples)
|
|
121
|
+
}
|