npm - readback - Versions diffs - 0.0.0-alpha.0 → 0.0.0-alpha.2 - Mend

readback 0.0.0-alpha.0 → 0.0.0-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/README.md +104 -0
package/bin/index.ts +54 -31
package/package.json +8 -6
package/src/capture.ts +130 -99
package/src/cleaner.ts +26 -79
package/src/console.ts +12 -0
package/src/model-manager.ts +230 -0
package/src/normalization/atc-phrases.ts +385 -0
package/src/normalization/callsigns.ts +164 -0
package/src/normalization/keywords.ts +137 -0
package/src/normalization/numbers.ts +197 -0
package/src/normalization/phonetic.ts +58 -0
package/src/normalization/tokenizer.ts +26 -0
package/src/normalization/waypoints.ts +194 -0
package/src/presentation/renderer.ts +92 -0
package/src/types.ts +45 -0
package/src/vad-worker.ts +26 -0
package/src/whisper-worker.ts +37 -0
package/src/altitude.ts +0 -52
package/src/callsigns.ts +0 -57
package/src/flightlevel.ts +0 -35
package/src/heading.ts +0 -39
package/src/keywords.ts +0 -32
package/src/numbers.ts +0 -17
package/src/phonetic.ts +0 -49
package/src/runway.ts +0 -37
package/src/speed.ts +0 -39

package/README.md ADDED Viewed

@@ -0,0 +1,104 @@
+# 🎧 readback
+**readback** is a real-time ATC (Air Traffic Control) transcription tool that captures system audio, transcribes it using local ATC-fine-tuned [Whisper](https://en.wikipedia.org/wiki/Whisper_(speech_recognition_system)) models, and formats the output with aviation-specific syntax highlighting. Useful for flight simmers on [VATSIM](https://vatsim.net) and [IVAO](https://www.ivao.aero) networks who (like me) struggle with following ATC communications.
+## ⚠️ Disclaimer
+**Transcription is far from perfect.** The AI will make mistakes with fast speech, accents, background noise, and similar-sounding words. Always verify critical information (altitudes, headings, frequencies) and ask ATC to repeat if unsure. Never use for real-world aviation.
+## 🚀 Installation
+### Step 1: Install build tools for native modules:
+**macOS**: Install Xcode Command Line Tools with `xcode-select --install`
+**Windows**: Install Visual Studio Build Tools with C++ workload from visualstudio.microsoft.com
+**Linux**: Install build essentials with `sudo apt-get install build-essential cmake` (Debian/Ubuntu) or equivalent for your distribution
+### Step 2: Install Node.js
+You need Node.js version 22.18.0 or higher installed on your system:
+1. **Download Node.js**: Go to https://nodejs.org/en/download/ and download the **LTS version** (Long Term Support)
+2. **Install**: Run the downloaded installer
+3. **Verify**: Open Terminal, type `node --version` and hit the enter key (you should see something like `v24.x.x`)
+### Step 3: Install readback globally
+Open Terminal and run:
+```bash
+npm install -g readback
+```
+This installs readback globally so you can run it from anywhere.
+---
+## 🎮 Usage
+In your terminal, run:
+```bash
+readback
+```
+The first time you run readback, it will check if a model exists in `~/.readback/models` and ask you to download a Whisper model (Medium ~1.5GB or Large ~3GB). Models are stored in `~/.readback/models`.
+> 💡I recommend using **medium** since it's faster and the quality difference to large is negligible.
+You can bring your own model too:
+```bash
+MODEL_PATH=~/path/to/your-model-ggml.bin readback
+```
+### Exiting
+Press `Ctrl+C`
+---
+## ⬆️️ Updating
+Open Terminal and run:
+```bash
+npm update -g readback
+```
+---
+## 🗑️ Uninstalling
+### Uninstall npm package
+Open Terminal and run:
+```bash
+npm uninstall -g readback
+```
+Delete the `.readback` folder in your home directory.
+---
+## 📝 License
+MIT License - See repository for details
+---
+## 🙏 Credits
+This project would not be possible without the ATC-fine-tuned models provided by [Jack Tol](https://huggingface.co/jacktol).
+---
+## 🐛 Issues and contributions
+Found a bug or want to add features? Submit issues or pull requests on the [GitHub repository](https://github.com/borisdiakur/readback).
+---
+**👋🏻 Happy flying!**

package/bin/index.ts CHANGED Viewed

@@ -1,45 +1,68 @@
 #!/usr/bin/env node
-import {Command} from 'commander';
-import {cleanTranscript} from '../src/cleaner.ts';
-import {startCapture} from '../src/capture.ts';
-import packageJson from "../package.json" with {type: "json"};
+import { Command } from "commander";
+import { styleText } from "node:util";
+import { cleanTranscript } from "../src/cleaner.ts";
+import { startCapture } from "../src/capture.ts";
+import packageJson from "../package.json" with { type: "json" };
+import { ensureModel } from "../src/model-manager.ts";
-const PREFIX = '📻'
+const PREFIX = "🎧";
 const program = new Command();
 program
   .version(packageJson.version)
   .description(`${PREFIX} ${packageJson.name}\n${packageJson.description}`)
-  .option('--raw', 'Disable all cleaning and formatting')
-  .option('--no-callsigns', 'Disable callsign detection and normalization')
-  .option('--no-phonetic', 'Disable phonetic formatting')
-  .option('--no-fl', 'Disable flight level abbreviation')
-  .option('--no-numbers', 'Disable number-word conversion')
-  .option('--no-runways', 'Disable runway formatting')
-  .option('--no-heading', 'Disable heading formatting')
-  .option('--no-speed', 'Disable speed formatting')
-  .option('--no-altitude', 'Disable altitude formatting')
-  .option('--no-keywords', 'Disable keyword highlighting')
+  .option("--debug", "Enable debug output showing token stream")
+  .option("--raw", "Disable all cleaning and formatting")
+  .option("--no-callsigns", "Disable callsign detection and normalization")
+  .option("--no-phonetic", "Disable phonetic formatting")
+  .option("--no-fl", "Disable flight level abbreviation")
+  .option("--no-numbers", "Disable number-word conversion")
+  .option("--no-runways", "Disable runway formatting")
+  .option("--no-heading", "Disable heading formatting")
+  .option("--no-speed", "Disable speed formatting")
+  .option("--no-altitude", "Disable altitude formatting")
+  .option("--no-keywords", "Disable keyword highlighting")
+  .option("--no-frequency", "Disable frequency formatting")
+  .option("--no-squawk", "Disable squawk formatting")
+  .option("--no-qnh", "Disable qnh formatting")
+  .option("--no-colors", "Disable coloring")
+  .option("--no-waypoints", "Disable waypoint formatting")
   .helpOption("-h, --help", "Display this help text")
   .parse(process.argv);
 const opts = program.opts();
+const modelPath = await ensureModel(opts.model);
 startCapture(async (text: string) => {
-  const output = opts.raw
-    ? text
-    : cleanTranscript(text, {
-      callsigns: opts.callsigns,
-      phonetic: opts.phonetic,
-      fl: opts.fl,
-      numbers: opts.numbers,
-      runways: opts.runways,
-      heading: opts.runways,
-      speed: opts.speed,
-      altitude: opts.altitude,
-      keywords: opts.keywords
-    });
-  console.log('📻 ' + output);
-});
+  if (opts.raw) {
+    console.log("🎧 " + text);
+    return;
+  }
+  const result = cleanTranscript(text, {
+    callsigns: opts.callsigns,
+    phonetic: opts.phonetic,
+    fl: opts.fl,
+    numbers: opts.numbers,
+    runways: opts.runways,
+    heading: opts.runways,
+    speed: opts.speed,
+    altitude: opts.altitude,
+    keywords: opts.keywords,
+    frequency: opts.frequency,
+    squawk: opts.squawk,
+    colors: opts.colors,
+    qnh: opts.qnh,
+    waypoints: opts.waypoints,
+    raw: opts.raw,
+    debug: opts.debug,
+  });
+  if (opts.debug) {
+    console.log("🐞 " + styleText(["dim"], JSON.stringify(result.tokens)));
+  }
+  console.log("🎧 " + result.output);
+}, modelPath);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "readback",
-  "version": "0.0.0-alpha.0",
+  "version": "0.0.0-alpha.2",
   "description": "Transcribes ATC transmissions into readable text.",
   "keywords": [
     "ATC",
@@ -28,19 +28,21 @@
     "LICENSE"
   ],
   "scripts": {
-    "start": "node ./bin/index.ts"
+    "start": "node ./bin/index.ts",
+    "test": "node --test tests/**/*.spec.ts",
+    "test:cover": "node --experimental-test-coverage --test tests/**/*.spec.ts"
   },
   "dependencies": {
     "@ricky0123/vad-node": "^0.0.3",
-    "chalk": "^5.6.2",
     "commander": "^14.0.3",
+    "cmake-js": "^8.0.0",
     "native-recorder-nodejs": "^1.2.0",
-    "wav": "^1.0.2"
+    "smart-whisper": "^0.8.1",
+    "ora": "^9.0.0"
   },
   "devDependencies": {
     "@types/node": "^25.1.0",
-    "@types/wav": "^1.0.4",
-    "cmake-js": "^8.0.0"
+    "prettier": "^3.8.1"
   },
   "engines": {
     "node": ">=22.18.0"

package/src/capture.ts CHANGED Viewed

@@ -1,80 +1,132 @@
-import {AudioRecorder, SYSTEM_AUDIO_DEVICE_ID} from 'native-recorder-nodejs';
-import {spawn} from 'child_process';
-import {Writer} from 'wav';
-import fs from 'fs';
-import {NonRealTimeVAD} from '@ricky0123/vad-node';
-const WHISPER_BIN = process.env.WHISPER_BIN || '../whisper.cpp/build/bin/whisper-cli';
-const MODEL_PATH = process.env.MODEL_PATH || './ggml-whisper-atc.bin';
-export async function startCapture(onTranscript: (text: string) => void) {
-  if (!fs.existsSync(MODEL_PATH)) {
-    console.error(`⚡️ Model not found at: ${MODEL_PATH}`);
-    process.exit(1);
-  }
+import { AudioRecorder, SYSTEM_AUDIO_DEVICE_ID } from "native-recorder-nodejs";
+import { fork } from "child_process";
+import { fileURLToPath } from "url";
+import { styleText } from "node:util";
+import { dirname, join, basename } from "path";
+import { clearLine } from "./console.ts";
+const MIN_UTTERANCE_SIZE = 48000;
+const WINDOW_SIZE = 96000;
+const MAX_CONCURRENT = 6;
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+export async function startCapture(
+  onTranscript: (text: string) => void,
+  modelPath: string,
+) {
   let seqNum = 0;
   const results = new Map<number, string>();
   let nextToPrint = 0;
-  const queue: Array<{ data: Buffer; seq: number }> = [];
   let activeCount = 0;
-  const MAX_CONCURRENT = 4;
   const audioState = {
     buffer: [] as Buffer[],
     size: 0,
     utterance: [] as Buffer[],
-    utteranceSize: 0
+    utteranceSize: 0,
   };
-  const MIN_UTTERANCE_SIZE = 48000;
-  const WINDOW_SIZE = 96000;
+  // Create VAD worker
+  const vadWorker = fork(join(__dirname, "vad-worker.ts"), [], {
+    stdio: ["ignore", "ignore", "ignore", "ipc"],
+    env: process.env,
+    detached: true,
+  });
-  const vad = await NonRealTimeVAD.new({
-    positiveSpeechThreshold: 0.3,
-    negativeSpeechThreshold: 0.15,
-    redemptionFrames: 10,
-    preSpeechPadFrames: 1
+  // Wait for VAD to be ready
+  await new Promise<void>((resolve) => {
+    vadWorker.once("message", (msg: { ready?: boolean }) => {
+      if (msg.ready) resolve();
+    });
+  });
+  const workers = Array.from({ length: MAX_CONCURRENT }, () => {
+    const worker = fork(join(__dirname, "whisper-worker.ts"), [], {
+      stdio: ["pipe", "pipe", "pipe", "ipc"],
+      env: { ...process.env, MODEL_PATH: modelPath },
+    });
+    worker.on("error", (err) => {
+      console.error("Worker error:", err);
+      process.exit(1);
+    });
+    worker.on("exit", (code) => {
+      if (code !== 0 && code !== null) {
+        console.error(`Worker exited with code ${code}`);
+        process.exit(1);
+      }
+    });
+    return worker;
+  });
+  process.on("SIGINT", async () => {
+    clearLine();
+    console.log("👋 Bye!");
+    try {
+      systemRecorder.stop();
+    } catch {}
+    // Kill VAD worker
+    if (vadWorker.pid) {
+      try {
+        process.kill(vadWorker.pid, "SIGKILL");
+      } catch {}
+    }
+    // Kill whisper workers
+    workers.forEach((w) => {
+      if (w.pid) {
+        try {
+          process.kill(w.pid, "SIGKILL");
+        } catch {}
+      }
+    });
+    process.exit(0);
   });
   const systemRecorder = new AudioRecorder();
-  const outputs = AudioRecorder.getDevices('output');
+  const outputs = AudioRecorder.getDevices("output");
   const systemAudio =
-    outputs.find(d => d.id === SYSTEM_AUDIO_DEVICE_ID) ||
-    outputs.find(d => d.isDefault);
+    outputs.find((d) => d.id === SYSTEM_AUDIO_DEVICE_ID) ||
+    outputs.find((d) => d.isDefault);
   if (!systemAudio) {
-    throw new Error('Missing audio devices');
+    throw new Error("Missing audio devices");
   }
   const permissions = AudioRecorder.checkPermission();
-  if (!permissions.system) AudioRecorder.requestPermission('system');
-  if (!permissions.mic) AudioRecorder.requestPermission('mic');
+  if (!permissions.system) AudioRecorder.requestPermission("system");
+  if (!permissions.mic) AudioRecorder.requestPermission("mic");
-  systemRecorder.on('data', async (chunk: Buffer) => {
+  systemRecorder.on("data", async (chunk: Buffer) => {
     await processAudioChunk(chunk);
   });
-  systemRecorder.on('error', (err) => {
-    console.error('⚡️ System recorder error:', err);
+  systemRecorder.on("error", (err) => {
+    console.error("⚡️ System recorder error:", err);
   });
-  process.on('SIGINT', async () => {
-    console.log('\nStopping...');
+  process.on("SIGINT", async () => {
+    console.log("\nStopping...");
     await systemRecorder.stop();
+    workers.forEach((w) => w.kill());
     process.exit(0);
   });
   await systemRecorder.start({
-    deviceType: 'output',
-    deviceId: systemAudio.id
+    deviceType: "output",
+    deviceId: systemAudio.id,
   });
-  console.info('🗼 Capturing system audio...');
-  // -----------------------------
-  // Internal helper functions
-  // -----------------------------
+  console.info(
+    `🗼 Capturing system audio - transcribing with ${styleText("cyan", basename(modelPath))}`,
+  );
   async function processAudioChunk(chunk: Buffer) {
     const state = audioState;
@@ -83,7 +135,7 @@ export async function startCapture(onTranscript: (text: string) => void) {
     while (state.size >= WINDOW_SIZE) {
       const windowChunk = Buffer.concat(
-        state.buffer.splice(0, Math.ceil(WINDOW_SIZE / state.buffer[0].length))
+        state.buffer.splice(0, Math.ceil(WINDOW_SIZE / state.buffer[0].length)),
       );
       state.size -= windowChunk.length;
@@ -91,11 +143,15 @@ export async function startCapture(onTranscript: (text: string) => void) {
       const resampled = resample48to16(mono);
       const float32 = bufferToFloat32(resampled);
-      let hasSpeech = false;
-      for await (const _ of vad.run(float32, 16000)) {
-        hasSpeech = true;
-        break;
-      }
+      // Check speech with VAD worker
+      const hasSpeech = await new Promise<boolean>((resolve) => {
+        const handler = (msg: { hasSpeech?: boolean }) => {
+          vadWorker.off("message", handler);
+          resolve(msg.hasSpeech || false);
+        };
+        vadWorker.on("message", handler);
+        vadWorker.send({ data: Array.from(float32) });
+      });
       if (hasSpeech) {
         state.utterance.push(windowChunk);
@@ -106,8 +162,11 @@ export async function startCapture(onTranscript: (text: string) => void) {
         if (state.utteranceSize >= MIN_UTTERANCE_SIZE) {
           const utterance = Buffer.concat(state.utterance);
-          queue.push({data: utterance, seq: seqNum++});
-          processNext();
+          const mono = stereoToMono(utterance);
+          const resampled = resample48to16(mono);
+          const float32 = bufferToFloat32(resampled);
+          transcribe(float32, seqNum++);
         }
         state.utterance = [];
@@ -116,52 +175,26 @@ export async function startCapture(onTranscript: (text: string) => void) {
     }
   }
-  function processNext() {
-    while (activeCount < MAX_CONCURRENT && queue.length > 0) {
-      const item = queue.shift()!;
-      activeCount++;
-      transcribe(item.data, item.seq);
-    }
-  }
+  function transcribe(float32: Float32Array, seq: number) {
+    if (activeCount >= MAX_CONCURRENT) return;
+    activeCount++;
-  function transcribe(pcmData: Buffer, seq: number) {
-    const wavFile = `./tmp/temp-${seq}.wav`;
+    const worker = workers[seq % workers.length];
-    const writer = new Writer({
-      sampleRate: 48000,
-      channels: 2,
-      bitDepth: 16
-    });
+    const handler = (msg: { text?: string; error?: string }) => {
+      if (msg.error) {
+        console.error("Worker error:", msg.error);
+        process.exit(1);
+      }
-    const wavStream = fs.createWriteStream(wavFile);
-    writer.pipe(wavStream);
-    writer.write(pcmData);
-    writer.end();
-    wavStream.on('finish', () => {
-      const whisper = spawn(WHISPER_BIN, [
-        '-m', MODEL_PATH,
-        '-f', wavFile,
-        '--best-of', '5',
-        '--prompt', 'Air traffic control radio communication',
-        '--no-timestamps',
-        '--language', 'en'
-      ]);
-      let output = '';
-      whisper.stdout.on('data', (data) => output += data);
-      whisper.stderr.on('data', () => {
-      });
+      results.set(seq, msg.text!);
+      worker.off("message", handler);
+      activeCount--;
+      flushOrdered();
+    };
-      whisper.on('close', () => {
-        results.set(seq, output.trim());
-        fs.unlink(wavFile, () => {
-        });
-        activeCount--;
-        processNext();
-        flushOrdered();
-      });
-    });
+    worker.on("message", handler);
+    worker.send({ data: Array.from(float32), seq });
   }
   function flushOrdered() {
@@ -170,16 +203,14 @@ export async function startCapture(onTranscript: (text: string) => void) {
       results.delete(nextToPrint);
       nextToPrint++;
-      // Send raw text to the CLI layer
-      onTranscript(text);
+      if (text) {
+        onTranscript(text.toLowerCase());
+      }
     }
   }
 }
-// -----------------------------
 // Audio helpers
-// -----------------------------
 function stereoToMono(stereo: Buffer): Buffer {
   const mono = Buffer.alloc(stereo.length / 2);
   for (let i = 0; i < mono.length / 2; i++) {
@@ -208,4 +239,4 @@ function bufferToFloat32(buffer: Buffer): Float32Array {
     float32[i] = int16 / 32768.0;
   }
   return float32;
-}
+}

package/src/cleaner.ts CHANGED Viewed

@@ -1,88 +1,35 @@
-import chalk from 'chalk';
-import {numberWordsToDigits} from './numbers.ts';
-import {abbreviateFlightLevel} from './flightlevel.ts';
-import {formatPhoneticWord} from './phonetic.ts';
-import {detectAndNormalizeCallsign} from './callsigns.ts';
-import {normalizeRunways} from './runway.ts';
-import {normalizeHeadings} from './heading.ts';
-import {normalizeSpeed} from "./speed.ts";
-import {normalizeAltitude} from "./altitude.ts";
-import {highlightKeywords} from './keywords.ts';
+import { tokenize } from "./normalization/tokenizer.ts";
+import { normalizePhonetics } from "./normalization/phonetic.ts";
+import { normalizeNumbers } from "./normalization/numbers.ts";
+import { normalizeCallsigns } from "./normalization/callsigns.ts";
+import { normalizePhrases } from "./normalization/atc-phrases.ts";
+import { highlightKeywords } from "./normalization/keywords.ts";
+import { renderToAnsi } from "./presentation/renderer.ts";
+import { normalizeWaypoints } from "./normalization/waypoints.ts";
+import { type Opts } from "./types.ts";
-export function cleanTranscript(
-  text: string,
-  opts: {
-    callsigns: boolean;
-    phonetic: boolean;
-    fl: boolean;
-    numbers: boolean;
-    runways: boolean;
-    heading: boolean;
-    speed: boolean;
-    altitude: boolean;
-    keywords: boolean;
-  }
-) {
-  let out = text.trim();
+export function cleanTranscript(text: string, opts: Opts) {
+  let tokens = tokenize(text);
-  // Normalize whitespace early
-  out = out.replace(/\s+/g, ' ');
+  tokens = normalizePhonetics(tokens, opts);
-  let normalizedCallsign: string | null = null;
+  tokens = normalizeNumbers(tokens, opts);
-  if (opts.callsigns) {
-    normalizedCallsign = detectAndNormalizeCallsign(out);
-    if (normalizedCallsign) {
-      // Highlight callsign
-      const highlighted = chalk.green.bold(normalizedCallsign);
+  tokens = normalizeCallsigns(tokens, opts);
-      // Replace only the callsign portion
-      // (We assume callsign is at the beginning of the utterance)
-      const firstWords = out.split(' ').slice(0, normalizedCallsign.split(' ').length).join(' ');
-      out = out.replace(firstWords, highlighted);
-    }
-  }
+  // ATC Phrases: Consumes remaining numbers for FL, Heading, etc.
+  // This function handles FL, Heading, Speed, Runway, Squawk, QNH, Altitude, Freq
+  tokens = normalizePhrases(tokens, opts);
-  if (opts.numbers) {
-    out = numberWordsToDigits(out);
-  }
+  tokens = normalizeWaypoints(tokens, opts);
-  if (opts.fl) {
-    out = abbreviateFlightLevel(out);
-  }
+  // Keyword Highlighting (runs last on remaining words)
+  tokens = highlightKeywords(tokens, opts);
-  if (opts.runways) {
-    out = normalizeRunways(out);
-  }
+  const output = renderToAnsi(tokens, opts);
-  if (opts.heading) {
-    out = normalizeHeadings(out);
-  }
-  if (opts.speed) {
-    out = normalizeSpeed(out);
-  }
-  if (opts.altitude) {
-    out = normalizeAltitude(out);
-  }
-  if (opts.keywords) {
-    out = highlightKeywords(out);
-  }
-  if (opts.phonetic) {
-    out = out.replace(
-      /\b(alpha|bravo|charlie|delta|echo|foxtrot|golf|hotel|india|juliet|kilo|lima|mike|november|oscar|papa|quebec|romeo|sierra|tango|uniform|victor|whiskey|xray|x-ray|yankee|zulu)\b/gi,
-      (match) => {
-        // If this phonetic word is part of the callsign, skip formatting
-        if (normalizedCallsign && normalizedCallsign.toLowerCase().includes(match.toLowerCase())) {
-          return match;
-        }
-        return formatPhoneticWord(match);
-      }
-    );
-  }
-  return out;
-}
+  return {
+    output,
+    tokens,
+  };
+}