@drakulavich/parakeet-cli 0.5.3 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -124
- package/package.json +1 -1
- package/src/models.ts +5 -0
- package/src/__tests__/audio.test.ts +0 -28
package/README.md
CHANGED
|
@@ -1,172 +1,88 @@
|
|
|
1
|
-
# parakeet-cli
|
|
1
|
+
# 🦜 parakeet-cli
|
|
2
2
|
|
|
3
3
|
[](https://www.npmjs.com/package/@drakulavich/parakeet-cli)
|
|
4
4
|
[](https://github.com/drakulavich/parakeet-cli/actions/workflows/ci.yml)
|
|
5
5
|
[](https://opensource.org/licenses/MIT)
|
|
6
6
|
[](https://bun.sh)
|
|
7
|
+
[](https://drakulavich.github.io/parakeet-cli/reports/allure)
|
|
7
8
|
|
|
8
|
-
Fast
|
|
9
|
+
Fast local speech-to-text. 25 languages. ~18x faster than Whisper on Apple Silicon.
|
|
9
10
|
|
|
10
|
-
|
|
11
|
+
- **CoreML on Apple Silicon** — ~155x real-time via [FluidAudio](https://github.com/FluidInference/FluidAudio)
|
|
12
|
+
- **ONNX on CPU** — cross-platform fallback, 3x faster than Whisper
|
|
13
|
+
- **Any audio format** — ffmpeg handles OGG, MP3, WAV, FLAC, M4A
|
|
14
|
+
- **Zero Python** — Bun + TypeScript, native Swift binary for CoreML
|
|
11
15
|
|
|
12
|
-
|
|
13
|
-
- **~155x real-time on Apple Silicon** — CoreML backend via [FluidAudio](https://github.com/FluidInference/FluidAudio) (1 min audio in ~0.4s)
|
|
14
|
-
- **3x faster than Whisper** on CPU with ONNX fallback (see [benchmark](#benchmark))
|
|
15
|
-
- **Zero Python** — pure TypeScript/Bun, native Swift binary for CoreML
|
|
16
|
-
- **Smart install** — `parakeet install` auto-detects platform: CoreML on macOS arm64, ONNX elsewhere
|
|
17
|
-
- **Any audio format** — ffmpeg handles OGG, MP3, WAV, FLAC, M4A, etc.
|
|
18
|
-
|
|
19
|
-
## Install
|
|
20
|
-
|
|
21
|
-
Using Bun (recommended):
|
|
16
|
+
## Quick Start
|
|
22
17
|
|
|
23
18
|
```bash
|
|
24
19
|
bun install -g @drakulavich/parakeet-cli
|
|
20
|
+
parakeet install # CoreML on macOS arm64, ONNX elsewhere
|
|
21
|
+
parakeet audio.ogg # → transcript to stdout
|
|
25
22
|
```
|
|
26
23
|
|
|
27
|
-
Using npm (requires Bun runtime installed):
|
|
28
|
-
|
|
29
|
-
```bash
|
|
30
|
-
npm install -g @drakulavich/parakeet-cli
|
|
31
|
-
```
|
|
32
|
-
|
|
33
|
-
Or clone and link locally:
|
|
34
|
-
|
|
35
|
-
```bash
|
|
36
|
-
git clone https://github.com/drakulavich/parakeet-cli.git
|
|
37
|
-
cd parakeet-cli
|
|
38
|
-
bun install
|
|
39
|
-
bun link
|
|
40
|
-
```
|
|
41
|
-
|
|
42
|
-
> **Note:** Bun is required as the runtime — the CLI uses Bun-native APIs and TypeScript execution. You can use either `bun` or `npm` as the package manager to install it, but Bun must be available in PATH to run the `parakeet` command.
|
|
43
|
-
|
|
44
24
|
## Usage
|
|
45
25
|
|
|
46
26
|
```bash
|
|
47
|
-
#
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
parakeet
|
|
27
|
+
parakeet install # auto-detect backend
|
|
28
|
+
parakeet install --coreml # force CoreML (macOS arm64)
|
|
29
|
+
parakeet install --onnx # force ONNX (~3GB)
|
|
30
|
+
parakeet audio.ogg # transcribe (language auto-detected)
|
|
31
|
+
parakeet --version
|
|
32
|
+
```
|
|
51
33
|
|
|
52
|
-
|
|
53
|
-
parakeet install --coreml # CoreML (macOS arm64 only)
|
|
54
|
-
parakeet install --onnx # ONNX (any platform)
|
|
34
|
+
Stdout: transcript. Stderr: errors. Pipe-friendly.
|
|
55
35
|
|
|
56
|
-
|
|
57
|
-
parakeet audio.ogg
|
|
36
|
+
## Requirements
|
|
58
37
|
|
|
59
|
-
|
|
60
|
-
|
|
38
|
+
- [Bun](https://bun.sh) >= 1.3
|
|
39
|
+
- [ffmpeg](https://ffmpeg.org) in PATH (ONNX backend only)
|
|
40
|
+
- ~3GB disk (ONNX models)
|
|
61
41
|
|
|
62
|
-
|
|
63
|
-
parakeet --version
|
|
64
|
-
```
|
|
42
|
+
## Supported Languages
|
|
65
43
|
|
|
66
|
-
|
|
44
|
+
:bulgaria: Bulgarian, :croatia: Croatian, :czech_republic: Czech, :denmark: Danish, :netherlands: Dutch, :gb: English, :estonia: Estonian, :finland: Finnish, :fr: French, :de: German, :greece: Greek, :hungary: Hungarian, :it: Italian, :latvia: Latvian, :lithuania: Lithuanian, :malta: Maltese, :poland: Polish, :portugal: Portuguese, :romania: Romanian, :ru: Russian, :slovakia: Slovak, :slovenia: Slovenian, :es: Spanish, :sweden: Swedish, :ukraine: Ukrainian
|
|
67
45
|
|
|
68
46
|
## Benchmark
|
|
69
47
|
|
|
70
|
-
|
|
48
|
+
MacBook Pro M3 Pro — 10 Russian voice messages:
|
|
71
49
|
|
|
72
50
|
| | faster-whisper (CPU) | Parakeet (CoreML) |
|
|
73
51
|
|---|---|---|
|
|
74
|
-
| **Total
|
|
75
|
-
| **
|
|
76
|
-
|
|
77
|
-
Models: faster-whisper medium (int8) vs Parakeet TDT 0.6B v3 (CoreML, Apple Neural Engine).
|
|
78
|
-
|
|
79
|
-
See [BENCHMARK.md](BENCHMARK.md) for full results with transcripts. Updated automatically on each release.
|
|
80
|
-
|
|
81
|
-
## Supported Languages
|
|
52
|
+
| **Total** | 35.3s | 1.9s |
|
|
53
|
+
| **Speed** | | **~18x faster** |
|
|
82
54
|
|
|
83
|
-
|
|
55
|
+
Full results with transcripts: [BENCHMARK.md](BENCHMARK.md)
|
|
84
56
|
|
|
85
57
|
## How It Works
|
|
86
58
|
|
|
87
|
-
### CoreML backend (macOS Apple Silicon)
|
|
88
|
-
|
|
89
59
|
```
|
|
90
60
|
parakeet audio.ogg
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
| +-- CoreML inference on Apple Neural Engine
|
|
94
|
-
| +-- ~155x real-time on M4 Pro
|
|
95
|
-
|
|
|
96
|
-
stdout: transcript
|
|
97
|
-
```
|
|
98
|
-
|
|
99
|
-
Uses [FluidAudio](https://github.com/FluidInference/FluidAudio) with the [CoreML model](https://huggingface.co/FluidInference/parakeet-tdt-0.6b-v3-coreml). CoreML model files are downloaded by FluidAudio on first transcription.
|
|
100
|
-
|
|
101
|
-
### ONNX backend (cross-platform fallback)
|
|
102
|
-
|
|
61
|
+
├── CoreML installed? → parakeet-coreml subprocess → stdout
|
|
62
|
+
└── ONNX installed? → ffmpeg → mel → encoder → decoder → stdout
|
|
103
63
|
```
|
|
104
|
-
parakeet audio.ogg
|
|
105
|
-
|
|
|
106
|
-
+-- ffmpeg: any format -> 16kHz mono float32
|
|
107
|
-
+-- nemo128.onnx: waveform -> 128-dim log-mel spectrogram
|
|
108
|
-
+-- per-utterance normalization (mean=0, std=1)
|
|
109
|
-
+-- encoder-model.onnx: mel features -> encoder output
|
|
110
|
-
+-- TDT greedy decoder: encoder output -> token IDs + durations
|
|
111
|
-
+-- vocab.txt: token IDs -> text
|
|
112
|
-
|
|
|
113
|
-
stdout: transcript
|
|
114
|
-
```
|
|
115
|
-
|
|
116
|
-
Uses [NVIDIA Parakeet TDT 0.6B v3](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v3) exported to ONNX by [istupakov](https://huggingface.co/istupakov/parakeet-tdt-0.6b-v3-onnx). Run `parakeet install --onnx` to download models from HuggingFace (~3GB).
|
|
117
|
-
|
|
118
|
-
## Requirements
|
|
119
64
|
|
|
120
|
-
- [
|
|
121
|
-
- [
|
|
122
|
-
- ~3GB disk space for model cache
|
|
123
|
-
- npm or Bun can be used as the package manager
|
|
124
|
-
|
|
125
|
-
### macOS (Apple Silicon)
|
|
126
|
-
|
|
127
|
-
Works natively on M1/M2/M3/M4 with CoreML acceleration. Install dependencies with Homebrew:
|
|
128
|
-
|
|
129
|
-
```bash
|
|
130
|
-
brew install ffmpeg
|
|
131
|
-
curl -fsSL https://bun.sh/install | bash
|
|
132
|
-
bun install -g @drakulavich/parakeet-cli # or: npm install -g @drakulavich/parakeet-cli
|
|
133
|
-
parakeet install # downloads CoreML binary
|
|
134
|
-
```
|
|
135
|
-
|
|
136
|
-
### Linux
|
|
137
|
-
|
|
138
|
-
```bash
|
|
139
|
-
apt install ffmpeg # or yum, pacman, etc.
|
|
140
|
-
curl -fsSL https://bun.sh/install | bash
|
|
141
|
-
bun install -g @drakulavich/parakeet-cli # or: npm install -g @drakulavich/parakeet-cli
|
|
142
|
-
parakeet install # downloads ONNX models (~3GB)
|
|
143
|
-
```
|
|
65
|
+
- **CoreML**: Swift binary wraps [FluidAudio](https://github.com/FluidInference/FluidAudio) + [CoreML model](https://huggingface.co/FluidInference/parakeet-tdt-0.6b-v3-coreml)
|
|
66
|
+
- **ONNX**: [NVIDIA Parakeet TDT 0.6B v3](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v3) via [onnxruntime-node](https://www.npmjs.com/package/onnxruntime-node)
|
|
144
67
|
|
|
145
68
|
## OpenClaw Integration
|
|
146
69
|
|
|
147
|
-
|
|
70
|
+
Drop-in replacement for OpenClaw voice processing — no API keys, runs locally.
|
|
148
71
|
|
|
149
72
|
```json
|
|
150
|
-
|
|
151
|
-
"
|
|
152
|
-
"
|
|
153
|
-
"
|
|
154
|
-
|
|
155
|
-
{
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
"args": ["{{MediaPath}}"],
|
|
159
|
-
"timeoutSeconds": 120
|
|
160
|
-
}
|
|
161
|
-
],
|
|
162
|
-
"echoTranscript": false
|
|
73
|
+
{
|
|
74
|
+
"tools": {
|
|
75
|
+
"media": {
|
|
76
|
+
"audio": {
|
|
77
|
+
"enabled": true,
|
|
78
|
+
"models": [{"type": "cli", "command": "parakeet", "args": ["{{MediaPath}}"], "timeoutSeconds": 120}],
|
|
79
|
+
"echoTranscript": false
|
|
80
|
+
}
|
|
163
81
|
}
|
|
164
82
|
}
|
|
165
83
|
}
|
|
166
84
|
```
|
|
167
85
|
|
|
168
|
-
Then restart the gateway: `openclaw gateway restart`
|
|
169
|
-
|
|
170
86
|
## License
|
|
171
87
|
|
|
172
88
|
MIT
|
package/package.json
CHANGED
package/src/models.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { join, dirname } from "path";
|
|
2
2
|
import { homedir } from "os";
|
|
3
3
|
import { existsSync, mkdirSync, chmodSync } from "fs";
|
|
4
|
+
import { isCoreMLInstalled } from "./coreml";
|
|
4
5
|
|
|
5
6
|
export const HF_REPO = "istupakov/parakeet-tdt-0.6b-v3-onnx";
|
|
6
7
|
|
|
@@ -21,6 +22,10 @@ export function isModelCached(dir?: string): boolean {
|
|
|
21
22
|
return MODEL_FILES.every((f) => existsSync(join(d, f)));
|
|
22
23
|
}
|
|
23
24
|
|
|
25
|
+
export function isModelInstalled(modelDir?: string): boolean {
|
|
26
|
+
return isCoreMLInstalled() || isModelCached(modelDir);
|
|
27
|
+
}
|
|
28
|
+
|
|
24
29
|
export function installHintError(headline: string): Error {
|
|
25
30
|
const lines = [
|
|
26
31
|
headline,
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
import { describe, test, expect } from "bun:test";
|
|
2
|
-
import { convertToFloat32PCM } from "../audio";
|
|
3
|
-
import { spawnSync } from "child_process";
|
|
4
|
-
|
|
5
|
-
const hasFfmpeg = spawnSync("which", ["ffmpeg"]).status === 0;
|
|
6
|
-
|
|
7
|
-
describe.skipIf(!hasFfmpeg)("audio", () => {
|
|
8
|
-
test("converts WAV to 16kHz mono Float32Array", async () => {
|
|
9
|
-
const buffer = await convertToFloat32PCM("fixtures/silence.wav");
|
|
10
|
-
expect(buffer).toBeInstanceOf(Float32Array);
|
|
11
|
-
// 1 second at 16kHz = 16000 samples
|
|
12
|
-
expect(buffer.length).toBeGreaterThan(15000);
|
|
13
|
-
expect(buffer.length).toBeLessThan(17000);
|
|
14
|
-
});
|
|
15
|
-
|
|
16
|
-
test("throws on missing file", async () => {
|
|
17
|
-
expect(convertToFloat32PCM("nonexistent.wav")).rejects.toThrow(
|
|
18
|
-
"file not found"
|
|
19
|
-
);
|
|
20
|
-
});
|
|
21
|
-
|
|
22
|
-
test("throws on corrupt file", async () => {
|
|
23
|
-
await Bun.write("fixtures/corrupt.bin", "not audio data");
|
|
24
|
-
expect(convertToFloat32PCM("fixtures/corrupt.bin")).rejects.toThrow(
|
|
25
|
-
"failed to convert audio"
|
|
26
|
-
);
|
|
27
|
-
});
|
|
28
|
-
});
|