readback 0.0.0-alpha.0 → 0.0.0-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +104 -0
- package/bin/index.ts +54 -31
- package/package.json +8 -6
- package/src/capture.ts +130 -99
- package/src/cleaner.ts +26 -79
- package/src/console.ts +12 -0
- package/src/model-manager.ts +230 -0
- package/src/normalization/atc-phrases.ts +385 -0
- package/src/normalization/callsigns.ts +164 -0
- package/src/normalization/keywords.ts +137 -0
- package/src/normalization/numbers.ts +197 -0
- package/src/normalization/phonetic.ts +58 -0
- package/src/normalization/tokenizer.ts +26 -0
- package/src/normalization/waypoints.ts +194 -0
- package/src/presentation/renderer.ts +92 -0
- package/src/types.ts +45 -0
- package/src/vad-worker.ts +26 -0
- package/src/whisper-worker.ts +37 -0
- package/src/altitude.ts +0 -52
- package/src/callsigns.ts +0 -57
- package/src/flightlevel.ts +0 -35
- package/src/heading.ts +0 -39
- package/src/keywords.ts +0 -32
- package/src/numbers.ts +0 -17
- package/src/phonetic.ts +0 -49
- package/src/runway.ts +0 -37
- package/src/speed.ts +0 -39
package/README.md
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# 🎧 readback
|
|
2
|
+
|
|
3
|
+
**readback** is a real-time ATC (Air Traffic Control) transcription tool that captures system audio, transcribes it using local ATC-fine-tuned [Whisper](https://en.wikipedia.org/wiki/Whisper_(speech_recognition_system)) models, and formats the output with aviation-specific syntax highlighting. Useful for flight simmers on [VATSIM](https://vatsim.net) and [IVAO](https://www.ivao.aero) networks who (like me) struggle with following ATC communications.
|
|
4
|
+
|
|
5
|
+
## ⚠️ Disclaimer
|
|
6
|
+
|
|
7
|
+
**Transcription is far from perfect.** The AI will make mistakes with fast speech, accents, background noise, and similar-sounding words. Always verify critical information (altitudes, headings, frequencies) and ask ATC to repeat if unsure. Never use for real-world aviation.
|
|
8
|
+
|
|
9
|
+
## 🚀 Installation
|
|
10
|
+
|
|
11
|
+
### Step 1: Install build tools for native modules:
|
|
12
|
+
|
|
13
|
+
**macOS**: Install Xcode Command Line Tools with `xcode-select --install`
|
|
14
|
+
|
|
15
|
+
**Windows**: Install Visual Studio Build Tools with C++ workload from visualstudio.microsoft.com
|
|
16
|
+
|
|
17
|
+
**Linux**: Install build essentials with `sudo apt-get install build-essential cmake` (Debian/Ubuntu) or equivalent for your distribution
|
|
18
|
+
|
|
19
|
+
### Step 2: Install Node.js
|
|
20
|
+
|
|
21
|
+
You need Node.js version 22.18.0 or higher installed on your system:
|
|
22
|
+
|
|
23
|
+
1. **Download Node.js**: Go to https://nodejs.org/en/download/ and download the **LTS version** (Long Term Support)
|
|
24
|
+
2. **Install**: Run the downloaded installer
|
|
25
|
+
3. **Verify**: Open Terminal, type `node --version` and hit the enter key (you should see something like `v24.x.x`)
|
|
26
|
+
|
|
27
|
+
### Step 3: Install readback globally
|
|
28
|
+
|
|
29
|
+
Open Terminal and run:
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
npm install -g readback
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
This installs readback globally so you can run it from anywhere.
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## 🎮 Usage
|
|
40
|
+
|
|
41
|
+
In your terminal, run:
|
|
42
|
+
```bash
|
|
43
|
+
readback
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
The first time you run readback, it will check if a model exists in `~/.readback/models` and ask you to download a Whisper model (Medium ~1.5GB or Large ~3GB). Models are stored in `~/.readback/models`.
|
|
47
|
+
|
|
48
|
+
> 💡I recommend using **medium** since it's faster and the quality difference to large is negligible.
|
|
49
|
+
|
|
50
|
+
You can bring your own model too:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
MODEL_PATH=~/path/to/your-model-ggml.bin readback
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### Exiting
|
|
57
|
+
|
|
58
|
+
Press `Ctrl+C`
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## ⬆️️ Updating
|
|
63
|
+
|
|
64
|
+
Open Terminal and run:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
npm update -g readback
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## 🗑️ Uninstalling
|
|
73
|
+
|
|
74
|
+
### Uninstall npm package
|
|
75
|
+
|
|
76
|
+
Open Terminal and run:
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
npm uninstall -g readback
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Delete the `.readback` folder in your home directory.
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
## 📝 License
|
|
87
|
+
|
|
88
|
+
MIT License - See repository for details
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
## 🙏 Credits
|
|
93
|
+
|
|
94
|
+
This project would not be possible without the ATC-fine-tuned models provided by [Jack Tol](https://huggingface.co/jacktol).
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
## 🐛 Issues and contributions
|
|
99
|
+
|
|
100
|
+
Found a bug or want to add features? Submit issues or pull requests on the [GitHub repository](https://github.com/borisdiakur/readback).
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
**👋🏻 Happy flying!**
|
package/bin/index.ts
CHANGED
|
@@ -1,45 +1,68 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
|
-
import {Command} from
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
6
|
-
import
|
|
3
|
+
import { Command } from "commander";
|
|
4
|
+
import { styleText } from "node:util";
|
|
5
|
+
import { cleanTranscript } from "../src/cleaner.ts";
|
|
6
|
+
import { startCapture } from "../src/capture.ts";
|
|
7
|
+
import packageJson from "../package.json" with { type: "json" };
|
|
8
|
+
import { ensureModel } from "../src/model-manager.ts";
|
|
7
9
|
|
|
8
|
-
const PREFIX =
|
|
10
|
+
const PREFIX = "🎧";
|
|
9
11
|
|
|
10
12
|
const program = new Command();
|
|
11
13
|
program
|
|
12
14
|
.version(packageJson.version)
|
|
13
15
|
.description(`${PREFIX} ${packageJson.name}\n${packageJson.description}`)
|
|
14
|
-
.option(
|
|
15
|
-
.option(
|
|
16
|
-
.option(
|
|
17
|
-
.option(
|
|
18
|
-
.option(
|
|
19
|
-
.option(
|
|
20
|
-
.option(
|
|
21
|
-
.option(
|
|
22
|
-
.option(
|
|
23
|
-
.option(
|
|
16
|
+
.option("--debug", "Enable debug output showing token stream")
|
|
17
|
+
.option("--raw", "Disable all cleaning and formatting")
|
|
18
|
+
.option("--no-callsigns", "Disable callsign detection and normalization")
|
|
19
|
+
.option("--no-phonetic", "Disable phonetic formatting")
|
|
20
|
+
.option("--no-fl", "Disable flight level abbreviation")
|
|
21
|
+
.option("--no-numbers", "Disable number-word conversion")
|
|
22
|
+
.option("--no-runways", "Disable runway formatting")
|
|
23
|
+
.option("--no-heading", "Disable heading formatting")
|
|
24
|
+
.option("--no-speed", "Disable speed formatting")
|
|
25
|
+
.option("--no-altitude", "Disable altitude formatting")
|
|
26
|
+
.option("--no-keywords", "Disable keyword highlighting")
|
|
27
|
+
.option("--no-frequency", "Disable frequency formatting")
|
|
28
|
+
.option("--no-squawk", "Disable squawk formatting")
|
|
29
|
+
.option("--no-qnh", "Disable qnh formatting")
|
|
30
|
+
.option("--no-colors", "Disable coloring")
|
|
31
|
+
.option("--no-waypoints", "Disable waypoint formatting")
|
|
24
32
|
.helpOption("-h, --help", "Display this help text")
|
|
25
33
|
.parse(process.argv);
|
|
26
34
|
|
|
27
35
|
const opts = program.opts();
|
|
28
36
|
|
|
37
|
+
const modelPath = await ensureModel(opts.model);
|
|
38
|
+
|
|
29
39
|
startCapture(async (text: string) => {
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
40
|
+
if (opts.raw) {
|
|
41
|
+
console.log("🎧 " + text);
|
|
42
|
+
return;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const result = cleanTranscript(text, {
|
|
46
|
+
callsigns: opts.callsigns,
|
|
47
|
+
phonetic: opts.phonetic,
|
|
48
|
+
fl: opts.fl,
|
|
49
|
+
numbers: opts.numbers,
|
|
50
|
+
runways: opts.runways,
|
|
51
|
+
heading: opts.runways,
|
|
52
|
+
speed: opts.speed,
|
|
53
|
+
altitude: opts.altitude,
|
|
54
|
+
keywords: opts.keywords,
|
|
55
|
+
frequency: opts.frequency,
|
|
56
|
+
squawk: opts.squawk,
|
|
57
|
+
colors: opts.colors,
|
|
58
|
+
qnh: opts.qnh,
|
|
59
|
+
waypoints: opts.waypoints,
|
|
60
|
+
raw: opts.raw,
|
|
61
|
+
debug: opts.debug,
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
if (opts.debug) {
|
|
65
|
+
console.log("🐞 " + styleText(["dim"], JSON.stringify(result.tokens)));
|
|
66
|
+
}
|
|
67
|
+
console.log("🎧 " + result.output);
|
|
68
|
+
}, modelPath);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "readback",
|
|
3
|
-
"version": "0.0.0-alpha.
|
|
3
|
+
"version": "0.0.0-alpha.2",
|
|
4
4
|
"description": "Transcribes ATC transmissions into readable text.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ATC",
|
|
@@ -28,19 +28,21 @@
|
|
|
28
28
|
"LICENSE"
|
|
29
29
|
],
|
|
30
30
|
"scripts": {
|
|
31
|
-
"start": "node ./bin/index.ts"
|
|
31
|
+
"start": "node ./bin/index.ts",
|
|
32
|
+
"test": "node --test tests/**/*.spec.ts",
|
|
33
|
+
"test:cover": "node --experimental-test-coverage --test tests/**/*.spec.ts"
|
|
32
34
|
},
|
|
33
35
|
"dependencies": {
|
|
34
36
|
"@ricky0123/vad-node": "^0.0.3",
|
|
35
|
-
"chalk": "^5.6.2",
|
|
36
37
|
"commander": "^14.0.3",
|
|
38
|
+
"cmake-js": "^8.0.0",
|
|
37
39
|
"native-recorder-nodejs": "^1.2.0",
|
|
38
|
-
"
|
|
40
|
+
"smart-whisper": "^0.8.1",
|
|
41
|
+
"ora": "^9.0.0"
|
|
39
42
|
},
|
|
40
43
|
"devDependencies": {
|
|
41
44
|
"@types/node": "^25.1.0",
|
|
42
|
-
"
|
|
43
|
-
"cmake-js": "^8.0.0"
|
|
45
|
+
"prettier": "^3.8.1"
|
|
44
46
|
},
|
|
45
47
|
"engines": {
|
|
46
48
|
"node": ">=22.18.0"
|
package/src/capture.ts
CHANGED
|
@@ -1,80 +1,132 @@
|
|
|
1
|
-
import {AudioRecorder, SYSTEM_AUDIO_DEVICE_ID} from
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
4
|
-
import
|
|
5
|
-
import {
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
const
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
1
|
+
import { AudioRecorder, SYSTEM_AUDIO_DEVICE_ID } from "native-recorder-nodejs";
|
|
2
|
+
import { fork } from "child_process";
|
|
3
|
+
import { fileURLToPath } from "url";
|
|
4
|
+
import { styleText } from "node:util";
|
|
5
|
+
import { dirname, join, basename } from "path";
|
|
6
|
+
import { clearLine } from "./console.ts";
|
|
7
|
+
|
|
8
|
+
const MIN_UTTERANCE_SIZE = 48000;
|
|
9
|
+
const WINDOW_SIZE = 96000;
|
|
10
|
+
const MAX_CONCURRENT = 6;
|
|
11
|
+
|
|
12
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
13
|
+
const __dirname = dirname(__filename);
|
|
14
|
+
|
|
15
|
+
export async function startCapture(
|
|
16
|
+
onTranscript: (text: string) => void,
|
|
17
|
+
modelPath: string,
|
|
18
|
+
) {
|
|
16
19
|
let seqNum = 0;
|
|
17
20
|
const results = new Map<number, string>();
|
|
18
21
|
let nextToPrint = 0;
|
|
19
|
-
const queue: Array<{ data: Buffer; seq: number }> = [];
|
|
20
22
|
let activeCount = 0;
|
|
21
|
-
const MAX_CONCURRENT = 4;
|
|
22
23
|
|
|
23
24
|
const audioState = {
|
|
24
25
|
buffer: [] as Buffer[],
|
|
25
26
|
size: 0,
|
|
26
27
|
utterance: [] as Buffer[],
|
|
27
|
-
utteranceSize: 0
|
|
28
|
+
utteranceSize: 0,
|
|
28
29
|
};
|
|
29
30
|
|
|
30
|
-
|
|
31
|
-
const
|
|
31
|
+
// Create VAD worker
|
|
32
|
+
const vadWorker = fork(join(__dirname, "vad-worker.ts"), [], {
|
|
33
|
+
stdio: ["ignore", "ignore", "ignore", "ipc"],
|
|
34
|
+
env: process.env,
|
|
35
|
+
detached: true,
|
|
36
|
+
});
|
|
32
37
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
+
// Wait for VAD to be ready
|
|
39
|
+
await new Promise<void>((resolve) => {
|
|
40
|
+
vadWorker.once("message", (msg: { ready?: boolean }) => {
|
|
41
|
+
if (msg.ready) resolve();
|
|
42
|
+
});
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
const workers = Array.from({ length: MAX_CONCURRENT }, () => {
|
|
46
|
+
const worker = fork(join(__dirname, "whisper-worker.ts"), [], {
|
|
47
|
+
stdio: ["pipe", "pipe", "pipe", "ipc"],
|
|
48
|
+
env: { ...process.env, MODEL_PATH: modelPath },
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
worker.on("error", (err) => {
|
|
52
|
+
console.error("Worker error:", err);
|
|
53
|
+
process.exit(1);
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
worker.on("exit", (code) => {
|
|
57
|
+
if (code !== 0 && code !== null) {
|
|
58
|
+
console.error(`Worker exited with code ${code}`);
|
|
59
|
+
process.exit(1);
|
|
60
|
+
}
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
return worker;
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
process.on("SIGINT", async () => {
|
|
67
|
+
clearLine();
|
|
68
|
+
console.log("👋 Bye!");
|
|
69
|
+
|
|
70
|
+
try {
|
|
71
|
+
systemRecorder.stop();
|
|
72
|
+
} catch {}
|
|
73
|
+
|
|
74
|
+
// Kill VAD worker
|
|
75
|
+
if (vadWorker.pid) {
|
|
76
|
+
try {
|
|
77
|
+
process.kill(vadWorker.pid, "SIGKILL");
|
|
78
|
+
} catch {}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Kill whisper workers
|
|
82
|
+
workers.forEach((w) => {
|
|
83
|
+
if (w.pid) {
|
|
84
|
+
try {
|
|
85
|
+
process.kill(w.pid, "SIGKILL");
|
|
86
|
+
} catch {}
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
process.exit(0);
|
|
38
91
|
});
|
|
39
92
|
|
|
40
93
|
const systemRecorder = new AudioRecorder();
|
|
41
|
-
const outputs = AudioRecorder.getDevices(
|
|
94
|
+
const outputs = AudioRecorder.getDevices("output");
|
|
42
95
|
const systemAudio =
|
|
43
|
-
outputs.find(d => d.id === SYSTEM_AUDIO_DEVICE_ID) ||
|
|
44
|
-
outputs.find(d => d.isDefault);
|
|
96
|
+
outputs.find((d) => d.id === SYSTEM_AUDIO_DEVICE_ID) ||
|
|
97
|
+
outputs.find((d) => d.isDefault);
|
|
45
98
|
|
|
46
99
|
if (!systemAudio) {
|
|
47
|
-
throw new Error(
|
|
100
|
+
throw new Error("Missing audio devices");
|
|
48
101
|
}
|
|
49
102
|
|
|
50
103
|
const permissions = AudioRecorder.checkPermission();
|
|
51
|
-
if (!permissions.system) AudioRecorder.requestPermission(
|
|
52
|
-
if (!permissions.mic) AudioRecorder.requestPermission(
|
|
104
|
+
if (!permissions.system) AudioRecorder.requestPermission("system");
|
|
105
|
+
if (!permissions.mic) AudioRecorder.requestPermission("mic");
|
|
53
106
|
|
|
54
|
-
systemRecorder.on(
|
|
107
|
+
systemRecorder.on("data", async (chunk: Buffer) => {
|
|
55
108
|
await processAudioChunk(chunk);
|
|
56
109
|
});
|
|
57
110
|
|
|
58
|
-
systemRecorder.on(
|
|
59
|
-
console.error(
|
|
111
|
+
systemRecorder.on("error", (err) => {
|
|
112
|
+
console.error("⚡️ System recorder error:", err);
|
|
60
113
|
});
|
|
61
114
|
|
|
62
|
-
process.on(
|
|
63
|
-
console.log(
|
|
115
|
+
process.on("SIGINT", async () => {
|
|
116
|
+
console.log("\nStopping...");
|
|
64
117
|
await systemRecorder.stop();
|
|
118
|
+
workers.forEach((w) => w.kill());
|
|
65
119
|
process.exit(0);
|
|
66
120
|
});
|
|
67
121
|
|
|
68
122
|
await systemRecorder.start({
|
|
69
|
-
deviceType:
|
|
70
|
-
deviceId: systemAudio.id
|
|
123
|
+
deviceType: "output",
|
|
124
|
+
deviceId: systemAudio.id,
|
|
71
125
|
});
|
|
72
126
|
|
|
73
|
-
console.info(
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
// Internal helper functions
|
|
77
|
-
// -----------------------------
|
|
127
|
+
console.info(
|
|
128
|
+
`🗼 Capturing system audio - transcribing with ${styleText("cyan", basename(modelPath))}`,
|
|
129
|
+
);
|
|
78
130
|
|
|
79
131
|
async function processAudioChunk(chunk: Buffer) {
|
|
80
132
|
const state = audioState;
|
|
@@ -83,7 +135,7 @@ export async function startCapture(onTranscript: (text: string) => void) {
|
|
|
83
135
|
|
|
84
136
|
while (state.size >= WINDOW_SIZE) {
|
|
85
137
|
const windowChunk = Buffer.concat(
|
|
86
|
-
state.buffer.splice(0, Math.ceil(WINDOW_SIZE / state.buffer[0].length))
|
|
138
|
+
state.buffer.splice(0, Math.ceil(WINDOW_SIZE / state.buffer[0].length)),
|
|
87
139
|
);
|
|
88
140
|
state.size -= windowChunk.length;
|
|
89
141
|
|
|
@@ -91,11 +143,15 @@ export async function startCapture(onTranscript: (text: string) => void) {
|
|
|
91
143
|
const resampled = resample48to16(mono);
|
|
92
144
|
const float32 = bufferToFloat32(resampled);
|
|
93
145
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
146
|
+
// Check speech with VAD worker
|
|
147
|
+
const hasSpeech = await new Promise<boolean>((resolve) => {
|
|
148
|
+
const handler = (msg: { hasSpeech?: boolean }) => {
|
|
149
|
+
vadWorker.off("message", handler);
|
|
150
|
+
resolve(msg.hasSpeech || false);
|
|
151
|
+
};
|
|
152
|
+
vadWorker.on("message", handler);
|
|
153
|
+
vadWorker.send({ data: Array.from(float32) });
|
|
154
|
+
});
|
|
99
155
|
|
|
100
156
|
if (hasSpeech) {
|
|
101
157
|
state.utterance.push(windowChunk);
|
|
@@ -106,8 +162,11 @@ export async function startCapture(onTranscript: (text: string) => void) {
|
|
|
106
162
|
|
|
107
163
|
if (state.utteranceSize >= MIN_UTTERANCE_SIZE) {
|
|
108
164
|
const utterance = Buffer.concat(state.utterance);
|
|
109
|
-
|
|
110
|
-
|
|
165
|
+
const mono = stereoToMono(utterance);
|
|
166
|
+
const resampled = resample48to16(mono);
|
|
167
|
+
const float32 = bufferToFloat32(resampled);
|
|
168
|
+
|
|
169
|
+
transcribe(float32, seqNum++);
|
|
111
170
|
}
|
|
112
171
|
|
|
113
172
|
state.utterance = [];
|
|
@@ -116,52 +175,26 @@ export async function startCapture(onTranscript: (text: string) => void) {
|
|
|
116
175
|
}
|
|
117
176
|
}
|
|
118
177
|
|
|
119
|
-
function
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
activeCount++;
|
|
123
|
-
transcribe(item.data, item.seq);
|
|
124
|
-
}
|
|
125
|
-
}
|
|
178
|
+
function transcribe(float32: Float32Array, seq: number) {
|
|
179
|
+
if (activeCount >= MAX_CONCURRENT) return;
|
|
180
|
+
activeCount++;
|
|
126
181
|
|
|
127
|
-
|
|
128
|
-
const wavFile = `./tmp/temp-${seq}.wav`;
|
|
182
|
+
const worker = workers[seq % workers.length];
|
|
129
183
|
|
|
130
|
-
const
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
184
|
+
const handler = (msg: { text?: string; error?: string }) => {
|
|
185
|
+
if (msg.error) {
|
|
186
|
+
console.error("Worker error:", msg.error);
|
|
187
|
+
process.exit(1);
|
|
188
|
+
}
|
|
135
189
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
wavStream.on('finish', () => {
|
|
142
|
-
const whisper = spawn(WHISPER_BIN, [
|
|
143
|
-
'-m', MODEL_PATH,
|
|
144
|
-
'-f', wavFile,
|
|
145
|
-
'--best-of', '5',
|
|
146
|
-
'--prompt', 'Air traffic control radio communication',
|
|
147
|
-
'--no-timestamps',
|
|
148
|
-
'--language', 'en'
|
|
149
|
-
]);
|
|
150
|
-
|
|
151
|
-
let output = '';
|
|
152
|
-
whisper.stdout.on('data', (data) => output += data);
|
|
153
|
-
whisper.stderr.on('data', () => {
|
|
154
|
-
});
|
|
190
|
+
results.set(seq, msg.text!);
|
|
191
|
+
worker.off("message", handler);
|
|
192
|
+
activeCount--;
|
|
193
|
+
flushOrdered();
|
|
194
|
+
};
|
|
155
195
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
fs.unlink(wavFile, () => {
|
|
159
|
-
});
|
|
160
|
-
activeCount--;
|
|
161
|
-
processNext();
|
|
162
|
-
flushOrdered();
|
|
163
|
-
});
|
|
164
|
-
});
|
|
196
|
+
worker.on("message", handler);
|
|
197
|
+
worker.send({ data: Array.from(float32), seq });
|
|
165
198
|
}
|
|
166
199
|
|
|
167
200
|
function flushOrdered() {
|
|
@@ -170,16 +203,14 @@ export async function startCapture(onTranscript: (text: string) => void) {
|
|
|
170
203
|
results.delete(nextToPrint);
|
|
171
204
|
nextToPrint++;
|
|
172
205
|
|
|
173
|
-
|
|
174
|
-
|
|
206
|
+
if (text) {
|
|
207
|
+
onTranscript(text.toLowerCase());
|
|
208
|
+
}
|
|
175
209
|
}
|
|
176
210
|
}
|
|
177
211
|
}
|
|
178
212
|
|
|
179
|
-
// -----------------------------
|
|
180
213
|
// Audio helpers
|
|
181
|
-
// -----------------------------
|
|
182
|
-
|
|
183
214
|
function stereoToMono(stereo: Buffer): Buffer {
|
|
184
215
|
const mono = Buffer.alloc(stereo.length / 2);
|
|
185
216
|
for (let i = 0; i < mono.length / 2; i++) {
|
|
@@ -208,4 +239,4 @@ function bufferToFloat32(buffer: Buffer): Float32Array {
|
|
|
208
239
|
float32[i] = int16 / 32768.0;
|
|
209
240
|
}
|
|
210
241
|
return float32;
|
|
211
|
-
}
|
|
242
|
+
}
|
package/src/cleaner.ts
CHANGED
|
@@ -1,88 +1,35 @@
|
|
|
1
|
-
import
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
import {
|
|
10
|
-
import {highlightKeywords} from './keywords.ts';
|
|
1
|
+
import { tokenize } from "./normalization/tokenizer.ts";
|
|
2
|
+
import { normalizePhonetics } from "./normalization/phonetic.ts";
|
|
3
|
+
import { normalizeNumbers } from "./normalization/numbers.ts";
|
|
4
|
+
import { normalizeCallsigns } from "./normalization/callsigns.ts";
|
|
5
|
+
import { normalizePhrases } from "./normalization/atc-phrases.ts";
|
|
6
|
+
import { highlightKeywords } from "./normalization/keywords.ts";
|
|
7
|
+
import { renderToAnsi } from "./presentation/renderer.ts";
|
|
8
|
+
import { normalizeWaypoints } from "./normalization/waypoints.ts";
|
|
9
|
+
import { type Opts } from "./types.ts";
|
|
11
10
|
|
|
12
|
-
export function cleanTranscript(
|
|
13
|
-
text
|
|
14
|
-
opts: {
|
|
15
|
-
callsigns: boolean;
|
|
16
|
-
phonetic: boolean;
|
|
17
|
-
fl: boolean;
|
|
18
|
-
numbers: boolean;
|
|
19
|
-
runways: boolean;
|
|
20
|
-
heading: boolean;
|
|
21
|
-
speed: boolean;
|
|
22
|
-
altitude: boolean;
|
|
23
|
-
keywords: boolean;
|
|
24
|
-
}
|
|
25
|
-
) {
|
|
26
|
-
let out = text.trim();
|
|
11
|
+
export function cleanTranscript(text: string, opts: Opts) {
|
|
12
|
+
let tokens = tokenize(text);
|
|
27
13
|
|
|
28
|
-
|
|
29
|
-
out = out.replace(/\s+/g, ' ');
|
|
14
|
+
tokens = normalizePhonetics(tokens, opts);
|
|
30
15
|
|
|
31
|
-
|
|
16
|
+
tokens = normalizeNumbers(tokens, opts);
|
|
32
17
|
|
|
33
|
-
|
|
34
|
-
normalizedCallsign = detectAndNormalizeCallsign(out);
|
|
35
|
-
if (normalizedCallsign) {
|
|
36
|
-
// Highlight callsign
|
|
37
|
-
const highlighted = chalk.green.bold(normalizedCallsign);
|
|
18
|
+
tokens = normalizeCallsigns(tokens, opts);
|
|
38
19
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
out = out.replace(firstWords, highlighted);
|
|
43
|
-
}
|
|
44
|
-
}
|
|
20
|
+
// ATC Phrases: Consumes remaining numbers for FL, Heading, etc.
|
|
21
|
+
// This function handles FL, Heading, Speed, Runway, Squawk, QNH, Altitude, Freq
|
|
22
|
+
tokens = normalizePhrases(tokens, opts);
|
|
45
23
|
|
|
46
|
-
|
|
47
|
-
out = numberWordsToDigits(out);
|
|
48
|
-
}
|
|
24
|
+
tokens = normalizeWaypoints(tokens, opts);
|
|
49
25
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
}
|
|
26
|
+
// Keyword Highlighting (runs last on remaining words)
|
|
27
|
+
tokens = highlightKeywords(tokens, opts);
|
|
53
28
|
|
|
54
|
-
|
|
55
|
-
out = normalizeRunways(out);
|
|
56
|
-
}
|
|
29
|
+
const output = renderToAnsi(tokens, opts);
|
|
57
30
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
out = normalizeSpeed(out);
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
if (opts.altitude) {
|
|
67
|
-
out = normalizeAltitude(out);
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
if (opts.keywords) {
|
|
71
|
-
out = highlightKeywords(out);
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
if (opts.phonetic) {
|
|
75
|
-
out = out.replace(
|
|
76
|
-
/\b(alpha|bravo|charlie|delta|echo|foxtrot|golf|hotel|india|juliet|kilo|lima|mike|november|oscar|papa|quebec|romeo|sierra|tango|uniform|victor|whiskey|xray|x-ray|yankee|zulu)\b/gi,
|
|
77
|
-
(match) => {
|
|
78
|
-
// If this phonetic word is part of the callsign, skip formatting
|
|
79
|
-
if (normalizedCallsign && normalizedCallsign.toLowerCase().includes(match.toLowerCase())) {
|
|
80
|
-
return match;
|
|
81
|
-
}
|
|
82
|
-
return formatPhoneticWord(match);
|
|
83
|
-
}
|
|
84
|
-
);
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
return out;
|
|
88
|
-
}
|
|
31
|
+
return {
|
|
32
|
+
output,
|
|
33
|
+
tokens,
|
|
34
|
+
};
|
|
35
|
+
}
|