@drakulavich/parakeet-cli 0.5.3 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +51 -122
- package/package.json +3 -2
- package/src/models.ts +30 -3
- package/src/__tests__/audio.test.ts +0 -28
package/README.md
CHANGED
|
@@ -1,171 +1,100 @@
|
|
|
1
|
-
# parakeet-cli
|
|
1
|
+
# 🦜 parakeet-cli
|
|
2
2
|
|
|
3
|
-
[](https://www.npmjs.com/package/@drakulavich/parakeet-cli)
|
|
4
3
|
[](https://github.com/drakulavich/parakeet-cli/actions/workflows/ci.yml)
|
|
4
|
+
[](https://www.npmjs.com/package/@drakulavich/parakeet-cli)
|
|
5
5
|
[](https://opensource.org/licenses/MIT)
|
|
6
6
|
[](https://bun.sh)
|
|
7
7
|
|
|
8
|
-
Fast
|
|
9
|
-
|
|
10
|
-
## Features
|
|
8
|
+
Fast local speech-to-text. 25 languages. ~18x faster than Whisper on Apple Silicon.
|
|
11
9
|
|
|
12
|
-
- **
|
|
13
|
-
-
|
|
14
|
-
- **
|
|
15
|
-
- **Zero Python** —
|
|
16
|
-
- **Smart install** — `parakeet install` auto-detects platform: CoreML on macOS arm64, ONNX elsewhere
|
|
17
|
-
- **Any audio format** — ffmpeg handles OGG, MP3, WAV, FLAC, M4A, etc.
|
|
10
|
+
- **CoreML on Apple Silicon** — ~155x real-time via [FluidAudio](https://github.com/FluidInference/FluidAudio)
|
|
11
|
+
- **ONNX on CPU** — cross-platform fallback, 3x faster than Whisper
|
|
12
|
+
- **Any audio format** — ffmpeg handles OGG, MP3, WAV, FLAC, M4A
|
|
13
|
+
- **Zero Python** — Bun + TypeScript, native Swift binary for CoreML
|
|
18
14
|
|
|
19
|
-
##
|
|
20
|
-
|
|
21
|
-
Using Bun (recommended):
|
|
15
|
+
## Quick Start
|
|
22
16
|
|
|
23
17
|
```bash
|
|
24
18
|
bun install -g @drakulavich/parakeet-cli
|
|
19
|
+
parakeet install # CoreML on macOS arm64, ONNX elsewhere
|
|
20
|
+
parakeet audio.ogg # → transcript to stdout
|
|
25
21
|
```
|
|
26
22
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
```bash
|
|
30
|
-
npm install -g @drakulavich/parakeet-cli
|
|
31
|
-
```
|
|
32
|
-
|
|
33
|
-
Or clone and link locally:
|
|
23
|
+
## Usage
|
|
34
24
|
|
|
35
25
|
```bash
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
26
|
+
parakeet install # auto-detect backend
|
|
27
|
+
parakeet install --coreml # force CoreML (macOS arm64)
|
|
28
|
+
parakeet install --onnx # force ONNX (~3GB)
|
|
29
|
+
parakeet audio.ogg # transcribe (language auto-detected)
|
|
30
|
+
parakeet --version
|
|
40
31
|
```
|
|
41
32
|
|
|
42
|
-
|
|
33
|
+
Stdout: transcript. Stderr: errors. Pipe-friendly.
|
|
43
34
|
|
|
44
|
-
##
|
|
35
|
+
## Requirements
|
|
45
36
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
# On Linux/other: downloads ONNX models (~3GB)
|
|
50
|
-
parakeet install
|
|
37
|
+
- [Bun](https://bun.sh) >= 1.3
|
|
38
|
+
- [ffmpeg](https://ffmpeg.org) in PATH (ONNX backend only)
|
|
39
|
+
- ~3GB disk (ONNX models)
|
|
51
40
|
|
|
52
|
-
|
|
53
|
-
parakeet install --coreml # CoreML (macOS arm64 only)
|
|
54
|
-
parakeet install --onnx # ONNX (any platform)
|
|
41
|
+
## Benchmark
|
|
55
42
|
|
|
56
|
-
|
|
57
|
-
parakeet audio.ogg
|
|
43
|
+
> **~18x faster than Whisper** on Apple Silicon (CoreML)
|
|
58
44
|
|
|
59
|
-
|
|
60
|
-
|
|
45
|
+
<details>
|
|
46
|
+
<summary>MacBook Pro M3 Pro — 10 Russian voice messages</summary>
|
|
61
47
|
|
|
62
|
-
|
|
63
|
-
|
|
48
|
+
```
|
|
49
|
+
faster-whisper (CPU): 35.3s ██████████████████████████████████████
|
|
50
|
+
Parakeet (CoreML): 1.9s ██
|
|
64
51
|
```
|
|
65
52
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
10 Telegram voice messages (Russian, 3-10s each) on MacBook Pro M3 Pro:
|
|
71
|
-
|
|
72
|
-
| | faster-whisper (CPU) | Parakeet (CoreML) |
|
|
73
|
-
|---|---|---|
|
|
74
|
-
| **Total time** | 35.3s | 1.9s |
|
|
75
|
-
| **Speedup** | | **~18x faster** |
|
|
53
|
+
| | faster-whisper | Parakeet | Speedup |
|
|
54
|
+
|---|---|---|---|
|
|
55
|
+
| Apple Silicon (CoreML) | 35.3s | **1.9s** | **~18x** |
|
|
56
|
+
| Linux CI (ONNX) | 79.2s | **45.4s** | **~1.7x** |
|
|
76
57
|
|
|
77
|
-
|
|
58
|
+
</details>
|
|
78
59
|
|
|
79
|
-
|
|
60
|
+
Full results with transcripts: [BENCHMARK.md](BENCHMARK.md)
|
|
80
61
|
|
|
81
62
|
## Supported Languages
|
|
82
63
|
|
|
83
|
-
Bulgarian, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, German, Greek, Hungarian, Italian, Latvian, Lithuanian, Maltese, Polish, Portuguese, Romanian, Russian, Slovak, Slovenian, Spanish, Swedish, Ukrainian
|
|
64
|
+
:bulgaria: Bulgarian, :croatia: Croatian, :czech_republic: Czech, :denmark: Danish, :netherlands: Dutch, :gb: English, :estonia: Estonian, :finland: Finnish, :fr: French, :de: German, :greece: Greek, :hungary: Hungarian, :it: Italian, :latvia: Latvian, :lithuania: Lithuanian, :malta: Maltese, :poland: Polish, :portugal: Portuguese, :romania: Romanian, :ru: Russian, :slovakia: Slovak, :slovenia: Slovenian, :es: Spanish, :sweden: Swedish, :ukraine: Ukrainian
|
|
84
65
|
|
|
85
66
|
## How It Works
|
|
86
67
|
|
|
87
|
-
### CoreML backend (macOS Apple Silicon)
|
|
88
|
-
|
|
89
|
-
```
|
|
90
|
-
parakeet audio.ogg
|
|
91
|
-
|
|
|
92
|
-
+-- parakeet-coreml (Swift binary via FluidAudio)
|
|
93
|
-
| +-- CoreML inference on Apple Neural Engine
|
|
94
|
-
| +-- ~155x real-time on M4 Pro
|
|
95
|
-
|
|
|
96
|
-
stdout: transcript
|
|
97
|
-
```
|
|
98
|
-
|
|
99
|
-
Uses [FluidAudio](https://github.com/FluidInference/FluidAudio) with the [CoreML model](https://huggingface.co/FluidInference/parakeet-tdt-0.6b-v3-coreml). CoreML model files are downloaded by FluidAudio on first transcription.
|
|
100
|
-
|
|
101
|
-
### ONNX backend (cross-platform fallback)
|
|
102
|
-
|
|
103
68
|
```
|
|
104
69
|
parakeet audio.ogg
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
+-- nemo128.onnx: waveform -> 128-dim log-mel spectrogram
|
|
108
|
-
+-- per-utterance normalization (mean=0, std=1)
|
|
109
|
-
+-- encoder-model.onnx: mel features -> encoder output
|
|
110
|
-
+-- TDT greedy decoder: encoder output -> token IDs + durations
|
|
111
|
-
+-- vocab.txt: token IDs -> text
|
|
112
|
-
|
|
|
113
|
-
stdout: transcript
|
|
114
|
-
```
|
|
115
|
-
|
|
116
|
-
Uses [NVIDIA Parakeet TDT 0.6B v3](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v3) exported to ONNX by [istupakov](https://huggingface.co/istupakov/parakeet-tdt-0.6b-v3-onnx). Run `parakeet install --onnx` to download models from HuggingFace (~3GB).
|
|
117
|
-
|
|
118
|
-
## Requirements
|
|
119
|
-
|
|
120
|
-
- [Bun](https://bun.sh) >= 1.3 (runtime)
|
|
121
|
-
- [ffmpeg](https://ffmpeg.org) installed and in PATH
|
|
122
|
-
- ~3GB disk space for model cache
|
|
123
|
-
- npm or Bun can be used as the package manager
|
|
124
|
-
|
|
125
|
-
### macOS (Apple Silicon)
|
|
126
|
-
|
|
127
|
-
Works natively on M1/M2/M3/M4 with CoreML acceleration. Install dependencies with Homebrew:
|
|
128
|
-
|
|
129
|
-
```bash
|
|
130
|
-
brew install ffmpeg
|
|
131
|
-
curl -fsSL https://bun.sh/install | bash
|
|
132
|
-
bun install -g @drakulavich/parakeet-cli # or: npm install -g @drakulavich/parakeet-cli
|
|
133
|
-
parakeet install # downloads CoreML binary
|
|
70
|
+
├── CoreML installed? → parakeet-coreml subprocess → stdout
|
|
71
|
+
└── ONNX installed? → ffmpeg → mel → encoder → decoder → stdout
|
|
134
72
|
```
|
|
135
73
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
```bash
|
|
139
|
-
apt install ffmpeg # or yum, pacman, etc.
|
|
140
|
-
curl -fsSL https://bun.sh/install | bash
|
|
141
|
-
bun install -g @drakulavich/parakeet-cli # or: npm install -g @drakulavich/parakeet-cli
|
|
142
|
-
parakeet install # downloads ONNX models (~3GB)
|
|
143
|
-
```
|
|
74
|
+
- **CoreML**: Swift binary wraps [FluidAudio](https://github.com/FluidInference/FluidAudio) + [CoreML model](https://huggingface.co/FluidInference/parakeet-tdt-0.6b-v3-coreml)
|
|
75
|
+
- **ONNX**: [NVIDIA Parakeet TDT 0.6B v3](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v3) via [onnxruntime-node](https://www.npmjs.com/package/onnxruntime-node)
|
|
144
76
|
|
|
145
77
|
## OpenClaw Integration
|
|
146
78
|
|
|
147
|
-
|
|
79
|
+
Drop-in replacement for OpenClaw voice processing — no API keys, runs locally.
|
|
148
80
|
|
|
149
81
|
```json
|
|
150
|
-
|
|
151
|
-
"
|
|
152
|
-
"
|
|
153
|
-
"
|
|
154
|
-
|
|
155
|
-
{
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
"args": ["{{MediaPath}}"],
|
|
159
|
-
"timeoutSeconds": 120
|
|
160
|
-
}
|
|
161
|
-
],
|
|
162
|
-
"echoTranscript": false
|
|
82
|
+
{
|
|
83
|
+
"tools": {
|
|
84
|
+
"media": {
|
|
85
|
+
"audio": {
|
|
86
|
+
"enabled": true,
|
|
87
|
+
"models": [{"type": "cli", "command": "parakeet", "args": ["{{MediaPath}}"], "timeoutSeconds": 120}],
|
|
88
|
+
"echoTranscript": false
|
|
89
|
+
}
|
|
163
90
|
}
|
|
164
91
|
}
|
|
165
92
|
}
|
|
166
93
|
```
|
|
167
94
|
|
|
168
|
-
|
|
95
|
+
## Contributing
|
|
96
|
+
|
|
97
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md).
|
|
169
98
|
|
|
170
99
|
## License
|
|
171
100
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@drakulavich/parakeet-cli",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "Fast
|
|
3
|
+
"version": "0.6.1",
|
|
4
|
+
"description": "Fast local speech-to-text CLI. CoreML on Apple Silicon, ONNX on CPU. 25 languages.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
7
7
|
"parakeet": "bin/parakeet.js"
|
|
@@ -49,6 +49,7 @@
|
|
|
49
49
|
},
|
|
50
50
|
"devDependencies": {
|
|
51
51
|
"@types/bun": "latest",
|
|
52
|
+
"fast-xml-parser": "^5.5.10",
|
|
52
53
|
"typescript": "^6.0.2"
|
|
53
54
|
},
|
|
54
55
|
"dependencies": {
|
package/src/models.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { join, dirname } from "path";
|
|
2
2
|
import { homedir } from "os";
|
|
3
3
|
import { existsSync, mkdirSync, chmodSync } from "fs";
|
|
4
|
+
import { isCoreMLInstalled } from "./coreml";
|
|
4
5
|
|
|
5
6
|
export const HF_REPO = "istupakov/parakeet-tdt-0.6b-v3-onnx";
|
|
6
7
|
|
|
@@ -21,6 +22,10 @@ export function isModelCached(dir?: string): boolean {
|
|
|
21
22
|
return MODEL_FILES.every((f) => existsSync(join(d, f)));
|
|
22
23
|
}
|
|
23
24
|
|
|
25
|
+
export function isModelInstalled(modelDir?: string): boolean {
|
|
26
|
+
return isCoreMLInstalled() || isModelCached(modelDir);
|
|
27
|
+
}
|
|
28
|
+
|
|
24
29
|
export function installHintError(headline: string): Error {
|
|
25
30
|
const lines = [
|
|
26
31
|
headline,
|
|
@@ -62,13 +67,35 @@ export async function downloadModel(noCache = false, modelDir?: string): Promise
|
|
|
62
67
|
|
|
63
68
|
console.error(`Downloading ${file}...`);
|
|
64
69
|
|
|
65
|
-
|
|
70
|
+
let res: Response;
|
|
71
|
+
try {
|
|
72
|
+
res = await fetch(url, { redirect: "follow" });
|
|
73
|
+
} catch (e) {
|
|
74
|
+
throw new Error(`failed to fetch ${file}: ${e instanceof Error ? e.message : e}`);
|
|
75
|
+
}
|
|
66
76
|
|
|
67
77
|
if (!res.ok) {
|
|
68
|
-
throw new Error(`failed to download
|
|
78
|
+
throw new Error(`failed to download ${file}: HTTP ${res.status}`);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
if (!res.body) {
|
|
82
|
+
throw new Error(`empty response body for ${file}`);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
const writer = Bun.file(dest).writer();
|
|
86
|
+
let bytes = 0;
|
|
87
|
+
try {
|
|
88
|
+
for await (const chunk of res.body) {
|
|
89
|
+
writer.write(chunk);
|
|
90
|
+
bytes += chunk.length;
|
|
91
|
+
}
|
|
92
|
+
} finally {
|
|
93
|
+
writer.end();
|
|
69
94
|
}
|
|
70
95
|
|
|
71
|
-
|
|
96
|
+
if (bytes === 0) {
|
|
97
|
+
throw new Error(`downloaded 0 bytes for ${file}`);
|
|
98
|
+
}
|
|
72
99
|
}
|
|
73
100
|
|
|
74
101
|
console.error("Model downloaded successfully.");
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
import { describe, test, expect } from "bun:test";
|
|
2
|
-
import { convertToFloat32PCM } from "../audio";
|
|
3
|
-
import { spawnSync } from "child_process";
|
|
4
|
-
|
|
5
|
-
const hasFfmpeg = spawnSync("which", ["ffmpeg"]).status === 0;
|
|
6
|
-
|
|
7
|
-
describe.skipIf(!hasFfmpeg)("audio", () => {
|
|
8
|
-
test("converts WAV to 16kHz mono Float32Array", async () => {
|
|
9
|
-
const buffer = await convertToFloat32PCM("fixtures/silence.wav");
|
|
10
|
-
expect(buffer).toBeInstanceOf(Float32Array);
|
|
11
|
-
// 1 second at 16kHz = 16000 samples
|
|
12
|
-
expect(buffer.length).toBeGreaterThan(15000);
|
|
13
|
-
expect(buffer.length).toBeLessThan(17000);
|
|
14
|
-
});
|
|
15
|
-
|
|
16
|
-
test("throws on missing file", async () => {
|
|
17
|
-
expect(convertToFloat32PCM("nonexistent.wav")).rejects.toThrow(
|
|
18
|
-
"file not found"
|
|
19
|
-
);
|
|
20
|
-
});
|
|
21
|
-
|
|
22
|
-
test("throws on corrupt file", async () => {
|
|
23
|
-
await Bun.write("fixtures/corrupt.bin", "not audio data");
|
|
24
|
-
expect(convertToFloat32PCM("fixtures/corrupt.bin")).rejects.toThrow(
|
|
25
|
-
"failed to convert audio"
|
|
26
|
-
);
|
|
27
|
-
});
|
|
28
|
-
});
|