@drakulavich/parakeet-cli 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -107
- package/package.json +1 -1
- package/src/__tests__/coreml.test.ts +28 -0
- package/src/__tests__/models.test.ts +8 -1
- package/src/cli.ts +19 -3
- package/src/coreml.ts +35 -0
- package/src/lib.ts +2 -2
- package/src/models.ts +55 -13
- package/src/transcribe.ts +20 -5
- package/src/__tests__/audio.test.ts +0 -28
package/README.md
CHANGED
|
@@ -1,149 +1,88 @@
|
|
|
1
|
-
# parakeet-cli
|
|
1
|
+
# 🦜 parakeet-cli
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
[](https://www.npmjs.com/package/@drakulavich/parakeet-cli)
|
|
4
|
+
[](https://github.com/drakulavich/parakeet-cli/actions/workflows/ci.yml)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
[](https://bun.sh)
|
|
7
|
+
[](https://drakulavich.github.io/parakeet-cli/reports/allure)
|
|
4
8
|
|
|
5
|
-
|
|
9
|
+
Fast local speech-to-text. 25 languages. ~18x faster than Whisper on Apple Silicon.
|
|
6
10
|
|
|
7
|
-
- **
|
|
8
|
-
- **3x faster than Whisper
|
|
9
|
-
- **
|
|
10
|
-
- **
|
|
11
|
-
- **Any audio format** — ffmpeg handles OGG, MP3, WAV, FLAC, M4A, etc.
|
|
11
|
+
- **CoreML on Apple Silicon** — ~155x real-time via [FluidAudio](https://github.com/FluidInference/FluidAudio)
|
|
12
|
+
- **ONNX on CPU** — cross-platform fallback, 3x faster than Whisper
|
|
13
|
+
- **Any audio format** — ffmpeg handles OGG, MP3, WAV, FLAC, M4A
|
|
14
|
+
- **Zero Python** — Bun + TypeScript, native Swift binary for CoreML
|
|
12
15
|
|
|
13
|
-
##
|
|
14
|
-
|
|
15
|
-
Using Bun (recommended):
|
|
16
|
+
## Quick Start
|
|
16
17
|
|
|
17
18
|
```bash
|
|
18
19
|
bun install -g @drakulavich/parakeet-cli
|
|
20
|
+
parakeet install # CoreML on macOS arm64, ONNX elsewhere
|
|
21
|
+
parakeet audio.ogg # → transcript to stdout
|
|
19
22
|
```
|
|
20
23
|
|
|
21
|
-
Using npm (requires Bun runtime installed):
|
|
22
|
-
|
|
23
|
-
```bash
|
|
24
|
-
npm install -g @drakulavich/parakeet-cli
|
|
25
|
-
```
|
|
26
|
-
|
|
27
|
-
Or clone and link locally:
|
|
28
|
-
|
|
29
|
-
```bash
|
|
30
|
-
git clone https://github.com/drakulavich/parakeet-cli.git
|
|
31
|
-
cd parakeet-cli
|
|
32
|
-
bun install
|
|
33
|
-
bun link
|
|
34
|
-
```
|
|
35
|
-
|
|
36
|
-
> **Note:** Bun is required as the runtime — the CLI uses Bun-native APIs and TypeScript execution. You can use either `bun` or `npm` as the package manager to install it, but Bun must be available in PATH to run the `parakeet` command.
|
|
37
|
-
|
|
38
24
|
## Usage
|
|
39
25
|
|
|
40
26
|
```bash
|
|
41
|
-
#
|
|
42
|
-
parakeet
|
|
43
|
-
|
|
44
|
-
#
|
|
45
|
-
parakeet --no-cache audio.wav
|
|
46
|
-
|
|
47
|
-
# Show version
|
|
27
|
+
parakeet install # auto-detect backend
|
|
28
|
+
parakeet install --coreml # force CoreML (macOS arm64)
|
|
29
|
+
parakeet install --onnx # force ONNX (~3GB)
|
|
30
|
+
parakeet audio.ogg # transcribe (language auto-detected)
|
|
48
31
|
parakeet --version
|
|
49
32
|
```
|
|
50
33
|
|
|
51
|
-
|
|
34
|
+
Stdout: transcript. Stderr: errors. Pipe-friendly.
|
|
52
35
|
|
|
53
|
-
##
|
|
36
|
+
## Requirements
|
|
37
|
+
|
|
38
|
+
- [Bun](https://bun.sh) >= 1.3
|
|
39
|
+
- [ffmpeg](https://ffmpeg.org) in PATH (ONNX backend only)
|
|
40
|
+
- ~3GB disk (ONNX models)
|
|
54
41
|
|
|
55
|
-
|
|
56
|
-
VM: AMD EPYC 7763 8C/16T, 64GB RAM, CPU-only.
|
|
42
|
+
## Supported Languages
|
|
57
43
|
|
|
58
|
-
|
|
59
|
-
|---|---------|----------|--------------------|---------------------|
|
|
60
|
-
| 1 | 13.3s | 4.4s | Проверь все свои конфиги и перенеси секреты в .env файл. | проверь все свои конфигии и перенеси секреты в дот энф файл |
|
|
61
|
-
| 2 | 13.1s | 4.2s | Вынеси еще секрет от Клода, который я тебе добавил. | неси еще секрет от Клода, который я тебе добавил |
|
|
62
|
-
| 3 | 12.7s | 4.0s | Установи пока Клод Код | Установи пока клот кот |
|
|
63
|
-
| 4 | 13.1s | 4.1s | Какие еще Telegram-юзеры имеют доступ к тебе? | ки еще телеграм юзеры имеют доступ к тебе |
|
|
64
|
-
| 5 | 12.7s | 4.0s | Закомите изменения в ГИТ | Закомить изменения в Гет |
|
|
65
|
-
| 6 | 13.1s | 4.1s | Узнай второго юзера в телеграме. | Узнай второго юзера в Телеграме |
|
|
66
|
-
| 7 | 13.4s | 5.0s | Ты добавил себе в память информацию из Vantage Handbook Репозитория | Ты добавил себе в память информацию из Вентаж хэндбук репозитория |
|
|
67
|
-
| 8 | 13.1s | 4.8s | Покажи его username в телеграмме, хочу написать ему. | жи его юзернейм в телеграме хочу написать ему |
|
|
68
|
-
| 9 | 14.2s | 4.5s | Не нужно посылать сообщение с транскрипцией. Сразу выполняй инструкцию. | жно слать сообщение с транскрипцией сразу выполняй инструкцию |
|
|
69
|
-
| 10 | 13.5s | 4.8s | То, что находится в папке Workspace, ты тоже коммитишь? | То, что находится в папке Воркспейс, ты тоже комитишь? |
|
|
70
|
-
| **Total** | **132.1s** | **43.8s** | | |
|
|
44
|
+
:bulgaria: Bulgarian, :croatia: Croatian, :czech_republic: Czech, :denmark: Danish, :netherlands: Dutch, :gb: English, :estonia: Estonian, :finland: Finnish, :fr: French, :de: German, :greece: Greek, :hungary: Hungarian, :it: Italian, :latvia: Latvian, :lithuania: Lithuanian, :malta: Maltese, :poland: Polish, :portugal: Portuguese, :romania: Romanian, :ru: Russian, :slovakia: Slovak, :slovenia: Slovenian, :es: Spanish, :sweden: Swedish, :ukraine: Ukrainian
|
|
71
45
|
|
|
72
|
-
|
|
46
|
+
## Benchmark
|
|
73
47
|
|
|
74
|
-
|
|
48
|
+
MacBook Pro M3 Pro — 10 Russian voice messages:
|
|
75
49
|
|
|
76
|
-
|
|
50
|
+
| | faster-whisper (CPU) | Parakeet (CoreML) |
|
|
51
|
+
|---|---|---|
|
|
52
|
+
| **Total** | 35.3s | 1.9s |
|
|
53
|
+
| **Speed** | | **~18x faster** |
|
|
77
54
|
|
|
78
|
-
|
|
55
|
+
Full results with transcripts: [BENCHMARK.md](BENCHMARK.md)
|
|
79
56
|
|
|
80
57
|
## How It Works
|
|
81
58
|
|
|
82
59
|
```
|
|
83
60
|
parakeet audio.ogg
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
+-- nemo128.onnx: waveform -> 128-dim log-mel spectrogram
|
|
87
|
-
+-- per-utterance normalization (mean=0, std=1)
|
|
88
|
-
+-- encoder-model.onnx: mel features -> encoder output
|
|
89
|
-
+-- TDT greedy decoder: encoder output -> token IDs + durations
|
|
90
|
-
+-- vocab.txt: token IDs -> text
|
|
91
|
-
|
|
|
92
|
-
stdout: transcript
|
|
93
|
-
```
|
|
94
|
-
|
|
95
|
-
Uses [NVIDIA Parakeet TDT 0.6B v3](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v3) exported to ONNX by [istupakov](https://huggingface.co/istupakov/parakeet-tdt-0.6b-v3-onnx). Models auto-download from HuggingFace on first run (~3GB).
|
|
96
|
-
|
|
97
|
-
## Requirements
|
|
98
|
-
|
|
99
|
-
- [Bun](https://bun.sh) >= 1.3 (runtime)
|
|
100
|
-
- [ffmpeg](https://ffmpeg.org) installed and in PATH
|
|
101
|
-
- ~3GB disk space for model cache
|
|
102
|
-
- npm or Bun can be used as the package manager
|
|
103
|
-
|
|
104
|
-
### macOS (Apple Silicon)
|
|
105
|
-
|
|
106
|
-
Works natively on M1/M2/M3/M4. Install dependencies with Homebrew:
|
|
107
|
-
|
|
108
|
-
```bash
|
|
109
|
-
brew install ffmpeg
|
|
110
|
-
curl -fsSL https://bun.sh/install | bash
|
|
111
|
-
bun install -g @drakulavich/parakeet-cli # or: npm install -g @drakulavich/parakeet-cli
|
|
61
|
+
├── CoreML installed? → parakeet-coreml subprocess → stdout
|
|
62
|
+
└── ONNX installed? → ffmpeg → mel → encoder → decoder → stdout
|
|
112
63
|
```
|
|
113
64
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
```bash
|
|
117
|
-
apt install ffmpeg # or yum, pacman, etc.
|
|
118
|
-
curl -fsSL https://bun.sh/install | bash
|
|
119
|
-
bun install -g @drakulavich/parakeet-cli # or: npm install -g @drakulavich/parakeet-cli
|
|
120
|
-
```
|
|
65
|
+
- **CoreML**: Swift binary wraps [FluidAudio](https://github.com/FluidInference/FluidAudio) + [CoreML model](https://huggingface.co/FluidInference/parakeet-tdt-0.6b-v3-coreml)
|
|
66
|
+
- **ONNX**: [NVIDIA Parakeet TDT 0.6B v3](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v3) via [onnxruntime-node](https://www.npmjs.com/package/onnxruntime-node)
|
|
121
67
|
|
|
122
68
|
## OpenClaw Integration
|
|
123
69
|
|
|
124
|
-
|
|
70
|
+
Drop-in replacement for OpenClaw voice processing — no API keys, runs locally.
|
|
125
71
|
|
|
126
72
|
```json
|
|
127
|
-
|
|
128
|
-
"
|
|
129
|
-
"
|
|
130
|
-
"
|
|
131
|
-
|
|
132
|
-
{
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
"args": ["{{MediaPath}}"],
|
|
136
|
-
"timeoutSeconds": 120
|
|
137
|
-
}
|
|
138
|
-
],
|
|
139
|
-
"echoTranscript": false
|
|
73
|
+
{
|
|
74
|
+
"tools": {
|
|
75
|
+
"media": {
|
|
76
|
+
"audio": {
|
|
77
|
+
"enabled": true,
|
|
78
|
+
"models": [{"type": "cli", "command": "parakeet", "args": ["{{MediaPath}}"], "timeoutSeconds": 120}],
|
|
79
|
+
"echoTranscript": false
|
|
80
|
+
}
|
|
140
81
|
}
|
|
141
82
|
}
|
|
142
83
|
}
|
|
143
84
|
```
|
|
144
85
|
|
|
145
|
-
Then restart the gateway: `openclaw gateway restart`
|
|
146
|
-
|
|
147
86
|
## License
|
|
148
87
|
|
|
149
88
|
MIT
|
package/package.json
CHANGED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { describe, test, expect } from "bun:test";
|
|
2
|
+
import { getCoreMLBinPath, isMacArm64, isCoreMLInstalled } from "../coreml";
|
|
3
|
+
import { join } from "path";
|
|
4
|
+
import { homedir } from "os";
|
|
5
|
+
|
|
6
|
+
describe("coreml", () => {
|
|
7
|
+
test("getCoreMLBinPath returns correct cache path", () => {
|
|
8
|
+
const binPath = getCoreMLBinPath();
|
|
9
|
+
expect(binPath).toBe(
|
|
10
|
+
join(homedir(), ".cache", "parakeet", "coreml", "bin", "parakeet-coreml"),
|
|
11
|
+
);
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
test("isMacArm64 returns a boolean", () => {
|
|
15
|
+
const result = isMacArm64();
|
|
16
|
+
expect(typeof result).toBe("boolean");
|
|
17
|
+
if (process.platform === "darwin" && process.arch === "arm64") {
|
|
18
|
+
expect(result).toBe(true);
|
|
19
|
+
} else {
|
|
20
|
+
expect(result).toBe(false);
|
|
21
|
+
}
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
test("isCoreMLInstalled returns a boolean", () => {
|
|
25
|
+
const result = isCoreMLInstalled();
|
|
26
|
+
expect(typeof result).toBe("boolean");
|
|
27
|
+
});
|
|
28
|
+
});
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { describe, test, expect } from "bun:test";
|
|
2
|
-
import { getModelDir, MODEL_FILES, HF_REPO } from "../models";
|
|
2
|
+
import { getModelDir, MODEL_FILES, HF_REPO, getCoreMLDownloadURL } from "../models";
|
|
3
3
|
import { join } from "path";
|
|
4
4
|
import { homedir } from "os";
|
|
5
5
|
|
|
@@ -20,4 +20,11 @@ describe("models", () => {
|
|
|
20
20
|
test("HF_REPO points to v3 ONNX repo", () => {
|
|
21
21
|
expect(HF_REPO).toBe("istupakov/parakeet-tdt-0.6b-v3-onnx");
|
|
22
22
|
});
|
|
23
|
+
|
|
24
|
+
test("getCoreMLDownloadURL includes version and correct filename", () => {
|
|
25
|
+
const url = getCoreMLDownloadURL("0.5.0");
|
|
26
|
+
expect(url).toBe(
|
|
27
|
+
"https://github.com/drakulavich/parakeet-cli/releases/download/v0.5.0/parakeet-coreml-darwin-arm64"
|
|
28
|
+
);
|
|
29
|
+
});
|
|
23
30
|
});
|
package/src/cli.ts
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
#!/usr/bin/env bun
|
|
2
2
|
|
|
3
3
|
import { transcribe } from "./lib";
|
|
4
|
-
import { downloadModel } from "./models";
|
|
4
|
+
import { downloadModel, downloadCoreML } from "./models";
|
|
5
|
+
import { isMacArm64 } from "./coreml";
|
|
5
6
|
|
|
6
7
|
async function main(): Promise<void> {
|
|
7
8
|
const args = process.argv.slice(2);
|
|
@@ -16,8 +17,23 @@ async function main(): Promise<void> {
|
|
|
16
17
|
|
|
17
18
|
if (positional[0] === "install") {
|
|
18
19
|
const noCache = args.includes("--no-cache");
|
|
20
|
+
const forceCoreML = args.includes("--coreml");
|
|
21
|
+
const forceOnnx = args.includes("--onnx");
|
|
22
|
+
|
|
19
23
|
try {
|
|
20
|
-
|
|
24
|
+
if (forceCoreML) {
|
|
25
|
+
if (!isMacArm64()) {
|
|
26
|
+
console.error("Error: CoreML backend is only available on macOS Apple Silicon.");
|
|
27
|
+
process.exit(1);
|
|
28
|
+
}
|
|
29
|
+
await downloadCoreML(noCache);
|
|
30
|
+
} else if (forceOnnx) {
|
|
31
|
+
await downloadModel(noCache);
|
|
32
|
+
} else if (isMacArm64()) {
|
|
33
|
+
await downloadCoreML(noCache);
|
|
34
|
+
} else {
|
|
35
|
+
await downloadModel(noCache);
|
|
36
|
+
}
|
|
21
37
|
} catch (err: unknown) {
|
|
22
38
|
const message = err instanceof Error ? err.message : String(err);
|
|
23
39
|
console.error(`Error: ${message}`);
|
|
@@ -30,7 +46,7 @@ async function main(): Promise<void> {
|
|
|
30
46
|
|
|
31
47
|
if (!file) {
|
|
32
48
|
console.error("Usage: parakeet [--version] <audio_file>");
|
|
33
|
-
console.error(" parakeet install [--no-cache]");
|
|
49
|
+
console.error(" parakeet install [--coreml | --onnx] [--no-cache]");
|
|
34
50
|
process.exit(1);
|
|
35
51
|
}
|
|
36
52
|
|
package/src/coreml.ts
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { join } from "path";
|
|
2
|
+
import { homedir } from "os";
|
|
3
|
+
import { existsSync } from "fs";
|
|
4
|
+
|
|
5
|
+
export function isMacArm64(): boolean {
|
|
6
|
+
return process.platform === "darwin" && process.arch === "arm64";
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export function getCoreMLBinPath(): string {
|
|
10
|
+
return join(homedir(), ".cache", "parakeet", "coreml", "bin", "parakeet-coreml");
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export function isCoreMLInstalled(): boolean {
|
|
14
|
+
return isMacArm64() && existsSync(getCoreMLBinPath());
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export async function transcribeCoreML(audioPath: string): Promise<string> {
|
|
18
|
+
const binPath = getCoreMLBinPath();
|
|
19
|
+
const proc = Bun.spawn([binPath, audioPath], {
|
|
20
|
+
stdout: "pipe",
|
|
21
|
+
stderr: "pipe",
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
const [stdout, stderr, exitCode] = await Promise.all([
|
|
25
|
+
new Response(proc.stdout).text(),
|
|
26
|
+
new Response(proc.stderr).text(),
|
|
27
|
+
proc.exited,
|
|
28
|
+
]);
|
|
29
|
+
|
|
30
|
+
if (exitCode !== 0) {
|
|
31
|
+
throw new Error(stderr);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
return stdout.trim();
|
|
35
|
+
}
|
package/src/lib.ts
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import { existsSync } from "fs";
|
|
2
2
|
import { transcribe as internalTranscribe, type TranscribeOptions } from "./transcribe";
|
|
3
|
-
import { downloadModel } from "./models";
|
|
3
|
+
import { downloadModel, downloadCoreML } from "./models";
|
|
4
4
|
|
|
5
5
|
export type { TranscribeOptions };
|
|
6
|
-
export { downloadModel };
|
|
6
|
+
export { downloadModel, downloadCoreML };
|
|
7
7
|
|
|
8
8
|
export async function transcribe(
|
|
9
9
|
audioPath: string,
|
package/src/models.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
import { join } from "path";
|
|
1
|
+
import { join, dirname } from "path";
|
|
2
2
|
import { homedir } from "os";
|
|
3
|
-
import { existsSync, mkdirSync } from "fs";
|
|
3
|
+
import { existsSync, mkdirSync, chmodSync } from "fs";
|
|
4
|
+
import { isCoreMLInstalled } from "./coreml";
|
|
4
5
|
|
|
5
6
|
export const HF_REPO = "istupakov/parakeet-tdt-0.6b-v3-onnx";
|
|
6
7
|
|
|
@@ -21,21 +22,28 @@ export function isModelCached(dir?: string): boolean {
|
|
|
21
22
|
return MODEL_FILES.every((f) => existsSync(join(d, f)));
|
|
22
23
|
}
|
|
23
24
|
|
|
25
|
+
export function isModelInstalled(modelDir?: string): boolean {
|
|
26
|
+
return isCoreMLInstalled() || isModelCached(modelDir);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export function installHintError(headline: string): Error {
|
|
30
|
+
const lines = [
|
|
31
|
+
headline,
|
|
32
|
+
"",
|
|
33
|
+
"╔══════════════════════════════════════════════════════════╗",
|
|
34
|
+
"║ Please run the following command to get started: ║",
|
|
35
|
+
"║ ║",
|
|
36
|
+
"║ bunx @drakulavich/parakeet-cli install ║",
|
|
37
|
+
"╚══════════════════════════════════════════════════════════╝",
|
|
38
|
+
];
|
|
39
|
+
return new Error(lines.join("\n"));
|
|
40
|
+
}
|
|
41
|
+
|
|
24
42
|
export function requireModel(modelDir?: string): string {
|
|
25
43
|
const dir = modelDir ?? getModelDir();
|
|
26
44
|
|
|
27
45
|
if (!isModelCached(dir)) {
|
|
28
|
-
|
|
29
|
-
`Error: Model not found at ${dir}`,
|
|
30
|
-
"",
|
|
31
|
-
"╔══════════════════════════════════════════════════════════╗",
|
|
32
|
-
"║ Looks like Parakeet model is not downloaded yet. ║",
|
|
33
|
-
"║ Please run the following command to download the model: ║",
|
|
34
|
-
"║ ║",
|
|
35
|
-
"║ npx @drakulavich/parakeet-cli install ║",
|
|
36
|
-
"╚══════════════════════════════════════════════════════════╝",
|
|
37
|
-
];
|
|
38
|
-
throw new Error(lines.join("\n"));
|
|
46
|
+
throw installHintError(`Error: Model not found at ${dir}`);
|
|
39
47
|
}
|
|
40
48
|
|
|
41
49
|
return dir;
|
|
@@ -71,3 +79,37 @@ export async function downloadModel(noCache = false, modelDir?: string): Promise
|
|
|
71
79
|
console.error("Model downloaded successfully.");
|
|
72
80
|
return dir;
|
|
73
81
|
}
|
|
82
|
+
|
|
83
|
+
export function getCoreMLDownloadURL(version: string): string {
|
|
84
|
+
return `https://github.com/drakulavich/parakeet-cli/releases/download/v${version}/parakeet-coreml-darwin-arm64`;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
export async function downloadCoreML(noCache = false): Promise<string> {
|
|
88
|
+
const { getCoreMLBinPath } = await import("./coreml");
|
|
89
|
+
const binPath = getCoreMLBinPath();
|
|
90
|
+
|
|
91
|
+
if (!noCache && existsSync(binPath)) {
|
|
92
|
+
console.error("CoreML backend already installed.");
|
|
93
|
+
return binPath;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const pkg = await Bun.file(new URL("../package.json", import.meta.url)).json();
|
|
97
|
+
const url = getCoreMLDownloadURL(pkg.version);
|
|
98
|
+
|
|
99
|
+
console.error("Downloading parakeet-coreml binary...");
|
|
100
|
+
|
|
101
|
+
const res = await fetch(url, { redirect: "follow" });
|
|
102
|
+
|
|
103
|
+
if (!res.ok) {
|
|
104
|
+
throw new Error(`Failed to download CoreML binary: ${url} (${res.status})`);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
mkdirSync(dirname(binPath), { recursive: true });
|
|
108
|
+
|
|
109
|
+
await Bun.write(binPath, res);
|
|
110
|
+
|
|
111
|
+
chmodSync(binPath, 0o755);
|
|
112
|
+
|
|
113
|
+
console.error("CoreML backend installed successfully.");
|
|
114
|
+
return binPath;
|
|
115
|
+
}
|
package/src/transcribe.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import { requireModel } from "./models";
|
|
1
|
+
import { requireModel, isModelCached, installHintError } from "./models";
|
|
2
|
+
import { isCoreMLInstalled, transcribeCoreML } from "./coreml";
|
|
2
3
|
import { convertToFloat32PCM } from "./audio";
|
|
3
4
|
import { initPreprocessor, preprocess } from "./preprocess";
|
|
4
5
|
import { initEncoder, encode } from "./encoder";
|
|
@@ -33,6 +34,18 @@ export interface TranscribeOptions {
|
|
|
33
34
|
const MIN_AUDIO_SAMPLES = 1600;
|
|
34
35
|
|
|
35
36
|
export async function transcribe(audioPath: string, opts: TranscribeOptions = {}): Promise<string> {
|
|
37
|
+
if (isCoreMLInstalled()) {
|
|
38
|
+
return transcribeCoreML(audioPath);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if (isModelCached(opts.modelDir)) {
|
|
42
|
+
return transcribeOnnx(audioPath, opts);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
throw installHintError("Error: No transcription backend is installed");
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
async function transcribeOnnx(audioPath: string, opts: TranscribeOptions): Promise<string> {
|
|
36
49
|
const audio = await convertToFloat32PCM(audioPath);
|
|
37
50
|
|
|
38
51
|
if (audio.length < MIN_AUDIO_SAMPLES) {
|
|
@@ -41,11 +54,13 @@ export async function transcribe(audioPath: string, opts: TranscribeOptions = {}
|
|
|
41
54
|
|
|
42
55
|
const beamWidth = opts.beamWidth ?? 4;
|
|
43
56
|
const modelDir = requireModel(opts.modelDir);
|
|
44
|
-
const tokenizer = await Tokenizer.fromFile(join(modelDir, "vocab.txt"));
|
|
45
57
|
|
|
46
|
-
await
|
|
47
|
-
|
|
48
|
-
|
|
58
|
+
const [tokenizer] = await Promise.all([
|
|
59
|
+
Tokenizer.fromFile(join(modelDir, "vocab.txt")),
|
|
60
|
+
initPreprocessor(modelDir),
|
|
61
|
+
initEncoder(modelDir),
|
|
62
|
+
initDecoder(modelDir),
|
|
63
|
+
]);
|
|
49
64
|
|
|
50
65
|
const { features, length } = await preprocess(audio);
|
|
51
66
|
const { encoderOutput, encodedLength } = await encode(features, length);
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
import { describe, test, expect } from "bun:test";
|
|
2
|
-
import { convertToFloat32PCM } from "../audio";
|
|
3
|
-
import { spawnSync } from "child_process";
|
|
4
|
-
|
|
5
|
-
const hasFfmpeg = spawnSync("which", ["ffmpeg"]).status === 0;
|
|
6
|
-
|
|
7
|
-
describe.skipIf(!hasFfmpeg)("audio", () => {
|
|
8
|
-
test("converts WAV to 16kHz mono Float32Array", async () => {
|
|
9
|
-
const buffer = await convertToFloat32PCM("fixtures/silence.wav");
|
|
10
|
-
expect(buffer).toBeInstanceOf(Float32Array);
|
|
11
|
-
// 1 second at 16kHz = 16000 samples
|
|
12
|
-
expect(buffer.length).toBeGreaterThan(15000);
|
|
13
|
-
expect(buffer.length).toBeLessThan(17000);
|
|
14
|
-
});
|
|
15
|
-
|
|
16
|
-
test("throws on missing file", async () => {
|
|
17
|
-
expect(convertToFloat32PCM("nonexistent.wav")).rejects.toThrow(
|
|
18
|
-
"file not found"
|
|
19
|
-
);
|
|
20
|
-
});
|
|
21
|
-
|
|
22
|
-
test("throws on corrupt file", async () => {
|
|
23
|
-
await Bun.write("fixtures/corrupt.bin", "not audio data");
|
|
24
|
-
expect(convertToFloat32PCM("fixtures/corrupt.bin")).rejects.toThrow(
|
|
25
|
-
"failed to convert audio"
|
|
26
|
-
);
|
|
27
|
-
});
|
|
28
|
-
});
|