readback 0.0.0-alpha.6 → 0.0.0-alpha.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -6
- package/dist/bin/index.js +3 -0
- package/dist/package.json +10 -4
- package/dist/src/banner.js +41 -10
- package/dist/src/cleaner.js +2 -0
- package/dist/src/logger.js +78 -0
- package/dist/src/normalization/airlines.js +0 -1
- package/dist/src/normalization/altitude.js +12 -5
- package/dist/src/normalization/break.js +20 -0
- package/dist/src/normalization/callsigns.js +198 -19
- package/dist/src/normalization/keywords.js +500 -124
- package/dist/src/normalization/numbers.js +63 -4
- package/dist/src/normalization/squawk.js +10 -2
- package/dist/src/presentation/renderer.js +23 -14
- package/dist/src/session-state.js +24 -0
- package/dist/src/types.js +1 -0
- package/package.json +10 -4
package/README.md
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
```
|
|
2
|
-
█▀▄ █▀▀ █▀█ █▀▄ █▀▄ █▀█ █▀▀ █ █
|
|
3
|
-
|
|
4
|
-
█ █ █ █ █ █ █ █ █ █ █ █ █ █
|
|
5
|
-
|
|
2
|
+
◣ █▀▄ █▀▀ █▀█ █▀▄ █▀▄ █▀█ █▀▀ █ █
|
|
3
|
+
█◣ ║┇ █ █ █ █ █ █ █ █ █ █ █ █ █ █
|
|
4
|
+
◣ ██◣❚ ║┇ █ █ █ █ █ █ █ █ █ █ █ █ █ █
|
|
5
|
+
━╸█►◄█████████ ◗╺━║┇━━ █▀▄ █▀▀ █▀█ █ █ █▀▄ █▀█ █ █▀▄
|
|
6
|
+
◤ ██◤❚ ║┇ █ █ █ █ █ █ █ █ █ █ █ █ █ █
|
|
7
|
+
█◤ ║┇ █ █ █ █ █ █ █ █ █ █ █ █ █ █
|
|
8
|
+
◤ ▀ ▀ ▀▀▀ ▀ ▀ ▀▀ ▀▀ ▀ ▀ ▀▀▀ ▀ ▀
|
|
9
|
+
• • • • • • • • • • • • • • • •
|
|
6
10
|
```
|
|
7
11
|
|
|
8
12
|
**readback** is a real-time ATC (Air Traffic Control) transcription tool that captures system audio, transcribes it using local ATC-fine-tuned [Whisper](https://en.wikipedia.org/wiki/Whisper_(speech_recognition_system)) models, and formats the output with aviation-specific syntax highlighting. Useful for flight simmers on [VATSIM](https://vatsim.net) and [IVAO](https://www.ivao.aero) networks who (like me) struggle with following ATC communications.
|
|
@@ -119,8 +123,16 @@ This project would not be possible without the ATC-fine-tuned Whisper models pro
|
|
|
119
123
|
|
|
120
124
|
## Issues and contributions
|
|
121
125
|
|
|
122
|
-
Found a bug or want to add features? Submit issues or pull requests on the [GitHub repository](https://github.com/borisdiakur/readback).
|
|
126
|
+
Found a bug or want to add features? Submit issues or pull requests on the [GitHub repository](https://github.com/borisdiakur/readback/issues).
|
|
127
|
+
|
|
128
|
+
### Reporting issues
|
|
129
|
+
|
|
130
|
+
When filing a bug, please attach your session log from:
|
|
131
|
+
|
|
132
|
+
```
|
|
133
|
+
~/.readback/logs/
|
|
134
|
+
```
|
|
123
135
|
|
|
124
136
|
---
|
|
125
137
|
|
|
126
|
-
Happy flying!
|
|
138
|
+
Happy flying!
|
package/dist/bin/index.js
CHANGED
|
@@ -9,6 +9,7 @@ import { startupCleanup } from "../src/startup-cleanup.js";
|
|
|
9
9
|
import packageJson from "../package.json" with { type: "json" };
|
|
10
10
|
import { BottomStatusLine } from "../src/status-line.js";
|
|
11
11
|
import { printTranscript } from "../src/presentation/renderer.js";
|
|
12
|
+
import { initLogger, logTranscript } from "../src/logger.js";
|
|
12
13
|
await startupCleanup();
|
|
13
14
|
const program = new Command();
|
|
14
15
|
program
|
|
@@ -47,6 +48,7 @@ process.on("SIGINT", () => {
|
|
|
47
48
|
process.exit(0);
|
|
48
49
|
});
|
|
49
50
|
const paths = await ensureAllModels();
|
|
51
|
+
await initLogger(paths.asr, opts);
|
|
50
52
|
const status = new BottomStatusLine();
|
|
51
53
|
startCapture(async (text) => {
|
|
52
54
|
if (opts.raw) {
|
|
@@ -71,6 +73,7 @@ startCapture(async (text) => {
|
|
|
71
73
|
squawk: opts.squawk,
|
|
72
74
|
waypoints: opts.waypoints,
|
|
73
75
|
});
|
|
76
|
+
await logTranscript(text, result.tokens);
|
|
74
77
|
if (opts.debug) {
|
|
75
78
|
console.log(" " + styleText("dim", JSON.stringify(result.tokens)));
|
|
76
79
|
}
|
package/dist/package.json
CHANGED
|
@@ -1,10 +1,15 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "readback",
|
|
3
|
-
"version": "0.0.0-alpha.
|
|
3
|
+
"version": "0.0.0-alpha.7",
|
|
4
4
|
"description": "Transcribes ATC transmissions into readable text.",
|
|
5
5
|
"keywords": [
|
|
6
|
-
"
|
|
7
|
-
"aviation"
|
|
6
|
+
"atc",
|
|
7
|
+
"aviation",
|
|
8
|
+
"speech-to-text",
|
|
9
|
+
"transcription",
|
|
10
|
+
"flight-simulator",
|
|
11
|
+
"vatsim",
|
|
12
|
+
"ivao"
|
|
8
13
|
],
|
|
9
14
|
"homepage": "https://github.com/borisdiakur/readback#readme",
|
|
10
15
|
"bugs": {
|
|
@@ -34,7 +39,7 @@
|
|
|
34
39
|
"build:start": "tsc && npm start",
|
|
35
40
|
"build:watch": "tsc --watch",
|
|
36
41
|
"extractNavData": "node scripts/extract-nav-data.ts",
|
|
37
|
-
"test": "node --test tests/**/*.spec.ts",
|
|
42
|
+
"test": "node --test ./tests/**/*.spec.ts",
|
|
38
43
|
"test:cover": "node --experimental-test-coverage --test tests/**/*.spec.ts"
|
|
39
44
|
},
|
|
40
45
|
"dependencies": {
|
|
@@ -43,6 +48,7 @@
|
|
|
43
48
|
"commander": "^14.0.3",
|
|
44
49
|
"native-recorder-nodejs": "^1.2.0",
|
|
45
50
|
"onnxruntime-node": "^1.24.1",
|
|
51
|
+
"ora": "^9.3.0",
|
|
46
52
|
"smart-whisper": "^0.8.1",
|
|
47
53
|
"speex-resampler": "^3.0.1",
|
|
48
54
|
"wrap-ansi": "^9.0.2"
|
package/dist/src/banner.js
CHANGED
|
@@ -1,19 +1,50 @@
|
|
|
1
1
|
import { styleText } from "node:util";
|
|
2
2
|
import packageJson from "../package.json" with { type: "json" };
|
|
3
|
-
const
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
const cols = [
|
|
4
|
+
"blue",
|
|
5
|
+
"cyan",
|
|
6
|
+
"magenta",
|
|
7
|
+
"green",
|
|
8
|
+
"red",
|
|
9
|
+
"blueBright",
|
|
10
|
+
"cyanBright",
|
|
11
|
+
"magentaBright",
|
|
12
|
+
"greenBright",
|
|
13
|
+
"redBright",
|
|
14
|
+
"whiteBright",
|
|
15
|
+
];
|
|
16
|
+
const stripes = (t) => t
|
|
6
17
|
.split("")
|
|
7
18
|
.map((c, i) => i < 2 ||
|
|
8
19
|
i > t.length - 2 ||
|
|
9
20
|
i === Math.floor(t.length / 2) - 1 ||
|
|
10
21
|
i === Math.floor(t.length / 2) + 1 ||
|
|
11
22
|
(i + 1) % 2 === 0
|
|
12
|
-
?
|
|
13
|
-
:
|
|
23
|
+
? styleText(["dim", "white"], c)
|
|
24
|
+
: styleText(["whiteBright"], c))
|
|
14
25
|
.join("");
|
|
15
|
-
const
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
26
|
+
const version = styleText(["dim", "cyan"], " v" + packageJson.version);
|
|
27
|
+
const stop = styleText("yellow", "║┇ ");
|
|
28
|
+
const blob = styleText("yellow", "║┇━━ ");
|
|
29
|
+
const chop = " ";
|
|
30
|
+
const cp = cols[Math.floor(Math.random() * cols.length)];
|
|
31
|
+
const cs = cols[Math.floor(Math.random() * cols.length)];
|
|
32
|
+
const csd = (t) => styleText([cs, "dim"], t);
|
|
33
|
+
const csr = (t) => styleText(cs, t);
|
|
34
|
+
const cpr = (t) => styleText(cp, t);
|
|
35
|
+
const y = (t) => styleText(["yellow"], t);
|
|
36
|
+
let l1 = chop + stripes("█▀▄ █▀▀ █▀█ █▀▄ █▀▄ █▀█ █▀▀ █ █");
|
|
37
|
+
let l2 = stop + stripes("█ █ █ █ █ █ █ █ █ █ █ █ █ █");
|
|
38
|
+
let l3 = stop + stripes("█ █ █ █ █ █ █ █ █ █ █ █ █ █");
|
|
39
|
+
let l4 = blob + stripes("█▀▄ █▀▀ █▀█ █ █ █▀▄ █▀█ █ █▀▄");
|
|
40
|
+
let l5 = stop + stripes("█ █ █ █ █ █ █ █ █ █ █ █ █ █");
|
|
41
|
+
let l6 = stop + stripes("█ █ █ █ █ █ █ █ █ █ █ █ █ █");
|
|
42
|
+
let l7 = chop + stripes("▀ ▀ ▀▀▀ ▀ ▀ ▀▀ ▀▀ ▀ ▀ ▀▀▀ ▀ ▀") + version;
|
|
43
|
+
console.log(" " + cpr(" ◣ ") + l1);
|
|
44
|
+
console.log(" " + csd(" █◣ ") + l2);
|
|
45
|
+
console.log(" " + cpr(" ◣") + csd(" ██◣") + csr("❚ ") + l3);
|
|
46
|
+
console.log(y("━╸") + csd("█►") + cpr("◄█████████ ◗") + y("╺━") + l4);
|
|
47
|
+
console.log(" " + cpr(" ◤") + csd(" ██◤") + csr("❚ ") + l5);
|
|
48
|
+
console.log(" " + csd(" █◤ ") + l6);
|
|
49
|
+
console.log(" " + cpr(" ◤ ") + l7);
|
|
50
|
+
console.log(styleText("greenBright", " • • • • • • • • • • • • • • • •"));
|
package/dist/src/cleaner.js
CHANGED
|
@@ -5,6 +5,7 @@ import { normalizeCallsigns } from "./normalization/callsigns.js";
|
|
|
5
5
|
import { normalizeNavData } from "./normalization/nav-data.js";
|
|
6
6
|
import { normalizeFlightLevel } from "./normalization/flight-level.js";
|
|
7
7
|
import { normalizeHeading } from "./normalization/heading.js";
|
|
8
|
+
import { normalizeBreak } from "./normalization/break.js";
|
|
8
9
|
import { normalizeSpeed } from "./normalization/speed.js";
|
|
9
10
|
import { normalizeRunway } from "./normalization/runway.js";
|
|
10
11
|
import { normalizeSquawk } from "./normalization/squawk.js";
|
|
@@ -15,6 +16,7 @@ import { highlightKeywords } from "./normalization/keywords.js";
|
|
|
15
16
|
import { renderToAnsi } from "./presentation/renderer.js";
|
|
16
17
|
export function cleanTranscript(text, opts) {
|
|
17
18
|
let tokens = tokenize(text);
|
|
19
|
+
tokens = normalizeBreak(tokens, opts);
|
|
18
20
|
tokens = normalizePhonetics(tokens, opts);
|
|
19
21
|
tokens = normalizeNumbers(tokens, opts);
|
|
20
22
|
tokens = normalizeCallsigns(tokens, opts);
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import { appendFile, mkdir, readdir, rm } from "fs/promises";
|
|
2
|
+
import { join } from "path";
|
|
3
|
+
import { DATA_ROOT } from "./setup-manager.js";
|
|
4
|
+
import packageJson from "../package.json" with { type: "json" };
|
|
5
|
+
import { basename } from "node:path";
|
|
6
|
+
const LOGS_DIR = join(DATA_ROOT, "logs");
|
|
7
|
+
const MAX_LOG_FILES = 5;
|
|
8
|
+
let logFilePath = null;
|
|
9
|
+
/**
|
|
10
|
+
* Call once at startup. Creates the log file for this session and
|
|
11
|
+
* rotates old log files so we keep at most MAX_LOG_FILES.
|
|
12
|
+
*/
|
|
13
|
+
export async function initLogger(asrModelPath, opts) {
|
|
14
|
+
await mkdir(LOGS_DIR, { recursive: true });
|
|
15
|
+
await rotateLogs();
|
|
16
|
+
const filename = `session-${new Date().toISOString().replace(/[:.]/g, "-")}.jsonl`;
|
|
17
|
+
logFilePath = join(LOGS_DIR, filename);
|
|
18
|
+
const entry = {
|
|
19
|
+
type: "session_start",
|
|
20
|
+
timestamp: new Date().toISOString(),
|
|
21
|
+
env: {
|
|
22
|
+
nodeVersion: process.version,
|
|
23
|
+
platform: process.platform,
|
|
24
|
+
arch: process.arch,
|
|
25
|
+
readbackVersion: packageJson.version,
|
|
26
|
+
asrModel: basename(asrModelPath),
|
|
27
|
+
opts,
|
|
28
|
+
},
|
|
29
|
+
};
|
|
30
|
+
await writeEntry(entry);
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Log a single transcript result including its token stream.
|
|
34
|
+
*/
|
|
35
|
+
export async function logTranscript(raw, tokens) {
|
|
36
|
+
if (!logFilePath)
|
|
37
|
+
return;
|
|
38
|
+
const entry = {
|
|
39
|
+
type: "transcript",
|
|
40
|
+
timestamp: new Date().toISOString(),
|
|
41
|
+
raw,
|
|
42
|
+
tokens,
|
|
43
|
+
};
|
|
44
|
+
await writeEntry(entry);
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Returns the path of the current session log file, useful for
|
|
48
|
+
* printing to the user so they know where to find it.
|
|
49
|
+
*/
|
|
50
|
+
export function getLogFilePath() {
|
|
51
|
+
return logFilePath;
|
|
52
|
+
}
|
|
53
|
+
// --------------- internals ---------------
|
|
54
|
+
async function writeEntry(entry) {
|
|
55
|
+
if (!logFilePath)
|
|
56
|
+
return;
|
|
57
|
+
try {
|
|
58
|
+
await appendFile(logFilePath, JSON.stringify(entry) + "\n", "utf-8");
|
|
59
|
+
}
|
|
60
|
+
catch {
|
|
61
|
+
// Logging must never crash the main process
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
async function rotateLogs() {
|
|
65
|
+
try {
|
|
66
|
+
const files = (await readdir(LOGS_DIR))
|
|
67
|
+
.filter((f) => f.startsWith("session-") && f.endsWith(".jsonl"))
|
|
68
|
+
.sort(); // ISO timestamp names sort lexicographically = chronologically
|
|
69
|
+
const excess = files.length - (MAX_LOG_FILES - 1); // -1 to make room for the new one
|
|
70
|
+
if (excess > 0) {
|
|
71
|
+
const toDelete = files.slice(0, excess);
|
|
72
|
+
await Promise.all(toDelete.map((f) => rm(join(LOGS_DIR, f), { force: true })));
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
catch {
|
|
76
|
+
// Non-fatal
|
|
77
|
+
}
|
|
78
|
+
}
|
|
@@ -10,25 +10,32 @@ export function normalizeAltitude(tokens, opts) {
|
|
|
10
10
|
let isAltitude = false;
|
|
11
11
|
let startOffset = 0;
|
|
12
12
|
const val = t.value.toLowerCase();
|
|
13
|
-
//
|
|
13
|
+
// "altitude" + NUMBER
|
|
14
14
|
if (val === "altitude" && next?.type === TokenType.NUMBER) {
|
|
15
15
|
isAltitude = true;
|
|
16
16
|
startOffset = 1;
|
|
17
17
|
}
|
|
18
|
-
//
|
|
18
|
+
// "descend/climb/maintain" + NUMBER
|
|
19
19
|
else if (["descend", "climb", "maintain"].includes(val) &&
|
|
20
20
|
next?.type === TokenType.NUMBER) {
|
|
21
21
|
isAltitude = true;
|
|
22
22
|
startOffset = 1;
|
|
23
23
|
}
|
|
24
|
-
//
|
|
24
|
+
// "descend/climb" + "maintain" + NUMBER
|
|
25
25
|
else if (["descend", "climb"].includes(val) &&
|
|
26
26
|
next?.value.toLowerCase() === "maintain" &&
|
|
27
27
|
next2?.type === TokenType.NUMBER) {
|
|
28
28
|
isAltitude = true;
|
|
29
29
|
startOffset = 2;
|
|
30
30
|
}
|
|
31
|
-
//
|
|
31
|
+
// descend/climb to NUMBER
|
|
32
|
+
else if (["descend", "climb"].includes(val) &&
|
|
33
|
+
next?.value.toLowerCase() === "to" &&
|
|
34
|
+
next2?.type === TokenType.NUMBER) {
|
|
35
|
+
isAltitude = true;
|
|
36
|
+
startOffset = 2; // skip both "to" and land on NUMBER
|
|
37
|
+
}
|
|
38
|
+
// Heuristic on bare number
|
|
32
39
|
else if (t.type === TokenType.NUMBER) {
|
|
33
40
|
const nextVal = next?.value.toLowerCase();
|
|
34
41
|
if (nextVal && ["feet", "foot", "ft"].includes(nextVal)) {
|
|
@@ -45,7 +52,7 @@ export function normalizeAltitude(tokens, opts) {
|
|
|
45
52
|
if (isAltitude) {
|
|
46
53
|
const numberToken = tokens[i + startOffset];
|
|
47
54
|
const valNum = parseInt(numberToken.value, 10);
|
|
48
|
-
//
|
|
55
|
+
// Detect QNH/Altimeter disguised as Altitude
|
|
49
56
|
// Condition: Keyword is "altitude", value is in altimeter range, and NOT a clean altitude
|
|
50
57
|
const isAltimeterRange = (valNum >= 2800 && valNum <= 3100) || // inHg * 100 (e.g. 2992)
|
|
51
58
|
(valNum >= 960 && valNum <= 1070); // hPa (e.g. 1013)
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { TokenType } from "../types.js";
|
|
2
|
+
const BREAK_EXCLUSIONS = new Set(["off", "out", "away"]);
|
|
3
|
+
export function normalizeBreak(tokens, opts) {
|
|
4
|
+
if (!opts.keywords)
|
|
5
|
+
return tokens;
|
|
6
|
+
const output = [];
|
|
7
|
+
for (let i = 0; i < tokens.length; i++) {
|
|
8
|
+
const t = tokens[i];
|
|
9
|
+
const next = tokens[i + 1];
|
|
10
|
+
if (t.value.toLowerCase() === "break") {
|
|
11
|
+
const nextVal = next?.value.toLowerCase();
|
|
12
|
+
if (!nextVal || !BREAK_EXCLUSIONS.has(nextVal)) {
|
|
13
|
+
output.push({ value: "BREAK", raw: t.raw, type: TokenType.BREAK });
|
|
14
|
+
continue;
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
output.push(t);
|
|
18
|
+
}
|
|
19
|
+
return output;
|
|
20
|
+
}
|
|
@@ -1,33 +1,74 @@
|
|
|
1
1
|
import { TokenType } from "../types.js";
|
|
2
2
|
import { KNOWN_AIRLINES } from "./airlines.js";
|
|
3
3
|
const MAX_FLIGHT_ID_LENGTH = 6;
|
|
4
|
+
const MAX_FLIGHT_NUMBER_DIGITS = 4;
|
|
5
|
+
const lastHeardCallsigns = [];
|
|
6
|
+
const MAX_TRACKED = 10;
|
|
7
|
+
function trackCallsign(entry) {
|
|
8
|
+
// Deduplicate by full callsign
|
|
9
|
+
const idx = lastHeardCallsigns.findIndex((c) => c.full === entry.full);
|
|
10
|
+
if (idx !== -1)
|
|
11
|
+
lastHeardCallsigns.splice(idx, 1);
|
|
12
|
+
lastHeardCallsigns.unshift(entry);
|
|
13
|
+
if (lastHeardCallsigns.length > MAX_TRACKED)
|
|
14
|
+
lastHeardCallsigns.pop();
|
|
15
|
+
}
|
|
16
|
+
/** Exported for use in heading.ts or other normalizers if needed */
|
|
17
|
+
export function getLastHeardCallsigns() {
|
|
18
|
+
return lastHeardCallsigns;
|
|
19
|
+
}
|
|
4
20
|
export function normalizeCallsigns(tokens, opts) {
|
|
5
21
|
if (!opts.callsigns)
|
|
6
22
|
return tokens;
|
|
7
23
|
const output = [];
|
|
8
24
|
for (let i = 0; i < tokens.length; i++) {
|
|
9
25
|
const t = tokens[i];
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
26
|
+
// --- 1. Try GA callsign FIRST (more specific, needs 4+ tokens) ---
|
|
27
|
+
const gaMatch = matchGACallsign(tokens, i);
|
|
28
|
+
if (gaMatch) {
|
|
29
|
+
trackCallsign({
|
|
30
|
+
full: gaMatch.icao,
|
|
31
|
+
suffix: gaMatch.suffix,
|
|
32
|
+
raw: gaMatch.displayValue,
|
|
33
|
+
});
|
|
34
|
+
output.push({
|
|
35
|
+
value: gaMatch.displayValue,
|
|
36
|
+
raw: gaMatch.rawTokens.map((x) => x.raw).join(" "),
|
|
37
|
+
type: TokenType.CALLSIGN,
|
|
38
|
+
});
|
|
39
|
+
i += gaMatch.consumed - 1;
|
|
40
|
+
continue;
|
|
41
|
+
}
|
|
42
|
+
// --- 2. Try airline callsign ---
|
|
43
|
+
const airlineMatch = matchAirline(tokens, i);
|
|
44
|
+
if (airlineMatch) {
|
|
45
|
+
const airlineName = airlineMatch.name;
|
|
46
|
+
const airlineTokens = airlineMatch.tokens;
|
|
47
|
+
let j = i + airlineMatch.length;
|
|
15
48
|
const flightNumTokens = [];
|
|
49
|
+
let idLength = 0;
|
|
16
50
|
let letterCount = 0;
|
|
17
|
-
while (j < tokens.length &&
|
|
18
|
-
flightNumTokens.length < MAX_FLIGHT_ID_LENGTH) {
|
|
51
|
+
while (j < tokens.length && idLength < MAX_FLIGHT_ID_LENGTH) {
|
|
19
52
|
const next = tokens[j];
|
|
20
53
|
if (next.type === TokenType.NUMBER) {
|
|
54
|
+
const currentDigits = flightNumTokens
|
|
55
|
+
.filter((ft) => ft.type === TokenType.NUMBER)
|
|
56
|
+
.reduce((acc, ft) => acc + ft.value.length, 0);
|
|
57
|
+
if (currentDigits + next.value.length > MAX_FLIGHT_NUMBER_DIGITS)
|
|
58
|
+
break;
|
|
59
|
+
if (idLength + next.value.length > MAX_FLIGHT_ID_LENGTH)
|
|
60
|
+
break;
|
|
21
61
|
flightNumTokens.push(next);
|
|
62
|
+
idLength += next.value.length;
|
|
22
63
|
j++;
|
|
23
64
|
}
|
|
24
65
|
else if (next.type === TokenType.PHONETIC ||
|
|
25
|
-
(next.type === TokenType.WORD &&
|
|
26
|
-
|
|
27
|
-
if (letterCount >= 2)
|
|
66
|
+
(next.type === TokenType.WORD &&
|
|
67
|
+
/^[a-zA-Z]$/.test(next.value))) {
|
|
68
|
+
if (letterCount >= 2)
|
|
28
69
|
break;
|
|
29
|
-
}
|
|
30
70
|
flightNumTokens.push(next);
|
|
71
|
+
idLength++;
|
|
31
72
|
letterCount++;
|
|
32
73
|
j++;
|
|
33
74
|
}
|
|
@@ -38,14 +79,20 @@ export function normalizeCallsigns(tokens, opts) {
|
|
|
38
79
|
if (flightNumTokens.length > 0) {
|
|
39
80
|
const idStr = flightNumTokens
|
|
40
81
|
.map((ft) => {
|
|
41
|
-
if (ft.type === TokenType.PHONETIC &&
|
|
82
|
+
if (ft.type === TokenType.PHONETIC &&
|
|
83
|
+
ft.metadata?.letter)
|
|
42
84
|
return ft.metadata.letter;
|
|
43
|
-
}
|
|
44
85
|
return ft.value;
|
|
45
86
|
})
|
|
46
87
|
.join("");
|
|
88
|
+
const displayValue = `${airlineName} ${idStr}`;
|
|
89
|
+
const suffix = flightNumTokens
|
|
90
|
+
.filter((ft) => ft.type === TokenType.PHONETIC && ft.metadata?.letter)
|
|
91
|
+
.map((ft) => ft.metadata.letter)
|
|
92
|
+
.join("");
|
|
93
|
+
trackCallsign({ full: displayValue, suffix, raw: displayValue });
|
|
47
94
|
output.push({
|
|
48
|
-
value:
|
|
95
|
+
value: displayValue,
|
|
49
96
|
raw: [...airlineTokens, ...flightNumTokens]
|
|
50
97
|
.map((x) => x.raw)
|
|
51
98
|
.join(" "),
|
|
@@ -55,14 +102,148 @@ export function normalizeCallsigns(tokens, opts) {
|
|
|
55
102
|
continue;
|
|
56
103
|
}
|
|
57
104
|
}
|
|
105
|
+
// --- 3. Try abbreviated readback: suffix-only callsign (e.g. "Mike Sierra") ---
|
|
106
|
+
const abbrMatch = matchAbbreviatedCallsign(tokens, i);
|
|
107
|
+
if (abbrMatch) {
|
|
108
|
+
output.push({
|
|
109
|
+
value: abbrMatch.tracked.full,
|
|
110
|
+
raw: abbrMatch.rawTokens.map((x) => x.raw).join(" "),
|
|
111
|
+
type: TokenType.CALLSIGN,
|
|
112
|
+
metadata: {
|
|
113
|
+
abbreviated: true,
|
|
114
|
+
originalSuffix: abbrMatch.tracked.suffix,
|
|
115
|
+
},
|
|
116
|
+
});
|
|
117
|
+
i += abbrMatch.consumed - 1;
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
58
120
|
output.push(t);
|
|
59
121
|
}
|
|
60
122
|
return output;
|
|
61
123
|
}
|
|
124
|
+
const MAX_ICAO_LENGTH = 6; // ICAO registrations are max 5-6 chars (e.g. N422MS, DBOBS, D-BOBS)
|
|
125
|
+
function matchGACallsign(tokens, startIndex) {
|
|
126
|
+
const t = tokens[startIndex];
|
|
127
|
+
if (t.type !== TokenType.PHONETIC || !t.metadata?.letter)
|
|
128
|
+
return null;
|
|
129
|
+
let j = startIndex + 1;
|
|
130
|
+
const restTokens = [];
|
|
131
|
+
while (j < tokens.length && restTokens.length < 4) {
|
|
132
|
+
const next = tokens[j];
|
|
133
|
+
const isPhonetic = next.type === TokenType.PHONETIC && !!next.metadata?.letter;
|
|
134
|
+
const isNumber = next.type === TokenType.NUMBER;
|
|
135
|
+
if (!isPhonetic && !isNumber)
|
|
136
|
+
break;
|
|
137
|
+
// Check we won't exceed max ICAO length
|
|
138
|
+
const nextChars = isPhonetic ? 1 : next.value.length;
|
|
139
|
+
const currentLength = restTokens.reduce((acc, ct) => acc +
|
|
140
|
+
(ct.type === TokenType.PHONETIC ? 1 : ct.value.length), 1); // 1 for leading phonetic
|
|
141
|
+
if (currentLength + nextChars > MAX_ICAO_LENGTH)
|
|
142
|
+
break;
|
|
143
|
+
restTokens.push(next);
|
|
144
|
+
j++;
|
|
145
|
+
}
|
|
146
|
+
if (restTokens.length < 3)
|
|
147
|
+
return null;
|
|
148
|
+
const allTokens = [t, ...restTokens];
|
|
149
|
+
const icaoParts = allTokens.map((ct) => ct.type === TokenType.PHONETIC
|
|
150
|
+
? ct.metadata.letter
|
|
151
|
+
: ct.value);
|
|
152
|
+
const icao = icaoParts.join("");
|
|
153
|
+
if (icao.length > MAX_ICAO_LENGTH)
|
|
154
|
+
return null; // hard stop
|
|
155
|
+
const letterCount = icao.replace(/[0-9]/g, "").length;
|
|
156
|
+
if (letterCount < 2)
|
|
157
|
+
return null;
|
|
158
|
+
const suffix = icaoParts
|
|
159
|
+
.filter((p) => /[A-Z]/.test(p))
|
|
160
|
+
.slice(-2)
|
|
161
|
+
.join("");
|
|
162
|
+
return {
|
|
163
|
+
icao,
|
|
164
|
+
suffix,
|
|
165
|
+
displayValue: icao,
|
|
166
|
+
rawTokens: allTokens,
|
|
167
|
+
consumed: allTokens.length,
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
function matchAbbreviatedCallsign(tokens, startIndex) {
|
|
171
|
+
if (lastHeardCallsigns.length === 0)
|
|
172
|
+
return null;
|
|
173
|
+
const t = tokens[startIndex];
|
|
174
|
+
const isNumber = t.type === TokenType.NUMBER;
|
|
175
|
+
const isPhonetic = t.type === TokenType.PHONETIC && !!t.metadata?.letter;
|
|
176
|
+
if (!isNumber && !isPhonetic)
|
|
177
|
+
return null;
|
|
178
|
+
// Greedily collect up to 4 consecutive NUMBER or PHONETIC tokens
|
|
179
|
+
const candidateTokens = [];
|
|
180
|
+
let j = startIndex;
|
|
181
|
+
while (j < tokens.length && candidateTokens.length < 4) {
|
|
182
|
+
const next = tokens[j];
|
|
183
|
+
const nextIsNumber = next.type === TokenType.NUMBER;
|
|
184
|
+
const nextIsPhonetic = next.type === TokenType.PHONETIC && !!next.metadata?.letter;
|
|
185
|
+
if (!nextIsNumber && !nextIsPhonetic)
|
|
186
|
+
break;
|
|
187
|
+
candidateTokens.push(next);
|
|
188
|
+
j++;
|
|
189
|
+
}
|
|
190
|
+
if (candidateTokens.length < 1)
|
|
191
|
+
return null;
|
|
192
|
+
// For a NUMBER token, generate all possible trailing substrings.
|
|
193
|
+
// e.g. "0902" → ["0902", "902", "02", "2"]
|
|
194
|
+
// For a PHONETIC token, it's just the single letter.
|
|
195
|
+
function tokenSuffixCandidates(ct) {
|
|
196
|
+
if (ct.type === TokenType.PHONETIC)
|
|
197
|
+
return [ct.metadata.letter];
|
|
198
|
+
const v = ct.value;
|
|
199
|
+
return Array.from({ length: v.length }, (_, i) => v.slice(i));
|
|
200
|
+
}
|
|
201
|
+
// Build all candidate suffix strings by combining token suffix candidates
|
|
202
|
+
// Try from the longest token slice down to 1 token
|
|
203
|
+
for (let len = candidateTokens.length; len >= 1; len--) {
|
|
204
|
+
const slice = candidateTokens.slice(0, len);
|
|
205
|
+
// Generate cartesian product of suffix candidates per token
|
|
206
|
+
const candidates = slice.reduce((acc, ct) => {
|
|
207
|
+
const parts = tokenSuffixCandidates(ct);
|
|
208
|
+
if (acc.length === 0)
|
|
209
|
+
return parts;
|
|
210
|
+
return acc.flatMap((prefix) => parts.map((part) => prefix + part));
|
|
211
|
+
}, []);
|
|
212
|
+
// Only consider candidates that are >= 3 chars
|
|
213
|
+
for (const candidateSuffix of candidates) {
|
|
214
|
+
if (candidateSuffix.length < 3)
|
|
215
|
+
continue;
|
|
216
|
+
const match = lastHeardCallsigns.find((c) => c.full.endsWith(candidateSuffix));
|
|
217
|
+
if (match) {
|
|
218
|
+
return { tracked: match, rawTokens: slice, consumed: len };
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
return null;
|
|
223
|
+
}
|
|
224
|
+
// --- Existing airline matcher ---
|
|
225
|
+
// Words that are ATC instructions/keywords and should never be treated as airline names
|
|
226
|
+
const AIRLINE_BLOCKLIST = new Set([
|
|
227
|
+
"approach",
|
|
228
|
+
"departure",
|
|
229
|
+
"tower",
|
|
230
|
+
"ground",
|
|
231
|
+
"center",
|
|
232
|
+
"centre",
|
|
233
|
+
"control",
|
|
234
|
+
"delivery",
|
|
235
|
+
"radar",
|
|
236
|
+
"director",
|
|
237
|
+
"information",
|
|
238
|
+
"radio",
|
|
239
|
+
]);
|
|
62
240
|
function matchAirline(tokens, startIndex) {
|
|
63
241
|
if (startIndex + 1 < tokens.length) {
|
|
64
242
|
const twoWords = `${tokens[startIndex].value} ${tokens[startIndex + 1].value}`.toLowerCase();
|
|
65
243
|
if (KNOWN_AIRLINES.has(twoWords)) {
|
|
244
|
+
const firstWord = tokens[startIndex].value.toLowerCase();
|
|
245
|
+
if (AIRLINE_BLOCKLIST.has(firstWord))
|
|
246
|
+
return null;
|
|
66
247
|
const niceName = twoWords
|
|
67
248
|
.split(" ")
|
|
68
249
|
.map((w) => w.charAt(0).toUpperCase() + w.slice(1))
|
|
@@ -76,12 +257,10 @@ function matchAirline(tokens, startIndex) {
|
|
|
76
257
|
}
|
|
77
258
|
const oneWord = tokens[startIndex].value.toLowerCase();
|
|
78
259
|
if (KNOWN_AIRLINES.has(oneWord)) {
|
|
260
|
+
if (AIRLINE_BLOCKLIST.has(oneWord))
|
|
261
|
+
return null;
|
|
79
262
|
const niceName = oneWord.charAt(0).toUpperCase() + oneWord.slice(1);
|
|
80
|
-
return {
|
|
81
|
-
name: niceName,
|
|
82
|
-
length: 1,
|
|
83
|
-
tokens: [tokens[startIndex]],
|
|
84
|
-
};
|
|
263
|
+
return { name: niceName, length: 1, tokens: [tokens[startIndex]] };
|
|
85
264
|
}
|
|
86
265
|
return null;
|
|
87
266
|
}
|