@drakulavich/parakeet-cli 0.5.3 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,172 +1,88 @@
1
- # parakeet-cli
1
+ # 🦜 parakeet-cli
2
2
 
3
3
  [![npm version](https://img.shields.io/npm/v/@drakulavich/parakeet-cli)](https://www.npmjs.com/package/@drakulavich/parakeet-cli)
4
4
  [![CI](https://github.com/drakulavich/parakeet-cli/actions/workflows/ci.yml/badge.svg)](https://github.com/drakulavich/parakeet-cli/actions/workflows/ci.yml)
5
5
  [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
6
6
  [![Bun](https://img.shields.io/badge/runtime-Bun-f9f1e1?logo=bun)](https://bun.sh)
7
+ [![Test Report](https://img.shields.io/badge/Test_Report-Allure-orange)](https://drakulavich.github.io/parakeet-cli/reports/allure)
7
8
 
8
- Fast multilingual speech-to-text CLI powered by NVIDIA Parakeet models. Zero Python. CoreML on Apple Silicon, ONNX on CPU.
9
+ Fast local speech-to-text. 25 languages. ~18x faster than Whisper on Apple Silicon.
9
10
 
10
- ## Features
11
+ - **CoreML on Apple Silicon** — ~155x real-time via [FluidAudio](https://github.com/FluidInference/FluidAudio)
12
+ - **ONNX on CPU** — cross-platform fallback, 3x faster than Whisper
13
+ - **Any audio format** — ffmpeg handles OGG, MP3, WAV, FLAC, M4A
14
+ - **Zero Python** — Bun + TypeScript, native Swift binary for CoreML
11
15
 
12
- - **25 languages** — automatic language detection, no prompting needed
13
- - **~155x real-time on Apple Silicon** — CoreML backend via [FluidAudio](https://github.com/FluidInference/FluidAudio) (1 min audio in ~0.4s)
14
- - **3x faster than Whisper** on CPU with ONNX fallback (see [benchmark](#benchmark))
15
- - **Zero Python** — pure TypeScript/Bun, native Swift binary for CoreML
16
- - **Smart install** — `parakeet install` auto-detects platform: CoreML on macOS arm64, ONNX elsewhere
17
- - **Any audio format** — ffmpeg handles OGG, MP3, WAV, FLAC, M4A, etc.
18
-
19
- ## Install
20
-
21
- Using Bun (recommended):
16
+ ## Quick Start
22
17
 
23
18
  ```bash
24
19
  bun install -g @drakulavich/parakeet-cli
20
+ parakeet install # CoreML on macOS arm64, ONNX elsewhere
21
+ parakeet audio.ogg # → transcript to stdout
25
22
  ```
26
23
 
27
- Using npm (requires Bun runtime installed):
28
-
29
- ```bash
30
- npm install -g @drakulavich/parakeet-cli
31
- ```
32
-
33
- Or clone and link locally:
34
-
35
- ```bash
36
- git clone https://github.com/drakulavich/parakeet-cli.git
37
- cd parakeet-cli
38
- bun install
39
- bun link
40
- ```
41
-
42
- > **Note:** Bun is required as the runtime — the CLI uses Bun-native APIs and TypeScript execution. You can use either `bun` or `npm` as the package manager to install it, but Bun must be available in PATH to run the `parakeet` command.
43
-
44
24
  ## Usage
45
25
 
46
26
  ```bash
47
- # Download backend (required before first use)
48
- # On macOS Apple Silicon: downloads CoreML binary
49
- # On Linux/other: downloads ONNX models (~3GB)
50
- parakeet install
27
+ parakeet install # auto-detect backend
28
+ parakeet install --coreml # force CoreML (macOS arm64)
29
+ parakeet install --onnx # force ONNX (~3GB)
30
+ parakeet audio.ogg # transcribe (language auto-detected)
31
+ parakeet --version
32
+ ```
51
33
 
52
- # Force a specific backend
53
- parakeet install --coreml # CoreML (macOS arm64 only)
54
- parakeet install --onnx # ONNX (any platform)
34
+ Stdout: transcript. Stderr: errors. Pipe-friendly.
55
35
 
56
- # Transcribe any audio file (language auto-detected)
57
- parakeet audio.ogg
36
+ ## Requirements
58
37
 
59
- # Force re-download
60
- parakeet install --no-cache
38
+ - [Bun](https://bun.sh) >= 1.3
39
+ - [ffmpeg](https://ffmpeg.org) in PATH (ONNX backend only)
40
+ - ~3GB disk (ONNX models)
61
41
 
62
- # Show version
63
- parakeet --version
64
- ```
42
+ ## Supported Languages
65
43
 
66
- Output goes to stdout, errors to stderr. Designed for piping and scripting.
44
+ :bulgaria: Bulgarian, :croatia: Croatian, :czech_republic: Czech, :denmark: Danish, :netherlands: Dutch, :gb: English, :estonia: Estonian, :finland: Finnish, :fr: French, :de: German, :greece: Greek, :hungary: Hungarian, :it: Italian, :latvia: Latvian, :lithuania: Lithuanian, :malta: Maltese, :poland: Polish, :portugal: Portuguese, :romania: Romanian, :ru: Russian, :slovakia: Slovak, :slovenia: Slovenian, :es: Spanish, :sweden: Swedish, :ukraine: Ukrainian
67
45
 
68
46
  ## Benchmark
69
47
 
70
- 10 Telegram voice messages (Russian, 3-10s each) on MacBook Pro M3 Pro:
48
+ MacBook Pro M3 Pro 10 Russian voice messages:
71
49
 
72
50
  | | faster-whisper (CPU) | Parakeet (CoreML) |
73
51
  |---|---|---|
74
- | **Total time** | 35.3s | 1.9s |
75
- | **Speedup** | | **~18x faster** |
76
-
77
- Models: faster-whisper medium (int8) vs Parakeet TDT 0.6B v3 (CoreML, Apple Neural Engine).
78
-
79
- See [BENCHMARK.md](BENCHMARK.md) for full results with transcripts. Updated automatically on each release.
80
-
81
- ## Supported Languages
52
+ | **Total** | 35.3s | 1.9s |
53
+ | **Speed** | | **~18x faster** |
82
54
 
83
- Bulgarian, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, German, Greek, Hungarian, Italian, Latvian, Lithuanian, Maltese, Polish, Portuguese, Romanian, Russian, Slovak, Slovenian, Spanish, Swedish, Ukrainian.
55
+ Full results with transcripts: [BENCHMARK.md](BENCHMARK.md)
84
56
 
85
57
  ## How It Works
86
58
 
87
- ### CoreML backend (macOS Apple Silicon)
88
-
89
59
  ```
90
60
  parakeet audio.ogg
91
- |
92
- +-- parakeet-coreml (Swift binary via FluidAudio)
93
- | +-- CoreML inference on Apple Neural Engine
94
- | +-- ~155x real-time on M4 Pro
95
- |
96
- stdout: transcript
97
- ```
98
-
99
- Uses [FluidAudio](https://github.com/FluidInference/FluidAudio) with the [CoreML model](https://huggingface.co/FluidInference/parakeet-tdt-0.6b-v3-coreml). CoreML model files are downloaded by FluidAudio on first transcription.
100
-
101
- ### ONNX backend (cross-platform fallback)
102
-
61
+ ├── CoreML installed? → parakeet-coreml subprocess → stdout
62
+ └── ONNX installed? → ffmpeg mel → encoder → decoder → stdout
103
63
  ```
104
- parakeet audio.ogg
105
- |
106
- +-- ffmpeg: any format -> 16kHz mono float32
107
- +-- nemo128.onnx: waveform -> 128-dim log-mel spectrogram
108
- +-- per-utterance normalization (mean=0, std=1)
109
- +-- encoder-model.onnx: mel features -> encoder output
110
- +-- TDT greedy decoder: encoder output -> token IDs + durations
111
- +-- vocab.txt: token IDs -> text
112
- |
113
- stdout: transcript
114
- ```
115
-
116
- Uses [NVIDIA Parakeet TDT 0.6B v3](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v3) exported to ONNX by [istupakov](https://huggingface.co/istupakov/parakeet-tdt-0.6b-v3-onnx). Run `parakeet install --onnx` to download models from HuggingFace (~3GB).
117
-
118
- ## Requirements
119
64
 
120
- - [Bun](https://bun.sh) >= 1.3 (runtime)
121
- - [ffmpeg](https://ffmpeg.org) installed and in PATH
122
- - ~3GB disk space for model cache
123
- - npm or Bun can be used as the package manager
124
-
125
- ### macOS (Apple Silicon)
126
-
127
- Works natively on M1/M2/M3/M4 with CoreML acceleration. Install dependencies with Homebrew:
128
-
129
- ```bash
130
- brew install ffmpeg
131
- curl -fsSL https://bun.sh/install | bash
132
- bun install -g @drakulavich/parakeet-cli # or: npm install -g @drakulavich/parakeet-cli
133
- parakeet install # downloads CoreML binary
134
- ```
135
-
136
- ### Linux
137
-
138
- ```bash
139
- apt install ffmpeg # or yum, pacman, etc.
140
- curl -fsSL https://bun.sh/install | bash
141
- bun install -g @drakulavich/parakeet-cli # or: npm install -g @drakulavich/parakeet-cli
142
- parakeet install # downloads ONNX models (~3GB)
143
- ```
65
+ - **CoreML**: Swift binary wraps [FluidAudio](https://github.com/FluidInference/FluidAudio) + [CoreML model](https://huggingface.co/FluidInference/parakeet-tdt-0.6b-v3-coreml)
66
+ - **ONNX**: [NVIDIA Parakeet TDT 0.6B v3](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v3) via [onnxruntime-node](https://www.npmjs.com/package/onnxruntime-node)
144
67
 
145
68
  ## OpenClaw Integration
146
69
 
147
- To use parakeet as the voice transcription engine in [OpenClaw](https://docs.openclaw.ai), update `~/.openclaw/openclaw.json`:
70
+ Drop-in replacement for OpenClaw voice processing no API keys, runs locally.
148
71
 
149
72
  ```json
150
- "tools": {
151
- "media": {
152
- "audio": {
153
- "enabled": true,
154
- "models": [
155
- {
156
- "type": "cli",
157
- "command": "parakeet",
158
- "args": ["{{MediaPath}}"],
159
- "timeoutSeconds": 120
160
- }
161
- ],
162
- "echoTranscript": false
73
+ {
74
+ "tools": {
75
+ "media": {
76
+ "audio": {
77
+ "enabled": true,
78
+ "models": [{"type": "cli", "command": "parakeet", "args": ["{{MediaPath}}"], "timeoutSeconds": 120}],
79
+ "echoTranscript": false
80
+ }
163
81
  }
164
82
  }
165
83
  }
166
84
  ```
167
85
 
168
- Then restart the gateway: `openclaw gateway restart`
169
-
170
86
  ## License
171
87
 
172
88
  MIT
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@drakulavich/parakeet-cli",
3
- "version": "0.5.3",
3
+ "version": "0.6.0",
4
4
  "description": "Fast multilingual speech-to-text CLI powered by NVIDIA Parakeet ONNX models",
5
5
  "type": "module",
6
6
  "bin": {
package/src/models.ts CHANGED
@@ -1,6 +1,7 @@
1
1
  import { join, dirname } from "path";
2
2
  import { homedir } from "os";
3
3
  import { existsSync, mkdirSync, chmodSync } from "fs";
4
+ import { isCoreMLInstalled } from "./coreml";
4
5
 
5
6
  export const HF_REPO = "istupakov/parakeet-tdt-0.6b-v3-onnx";
6
7
 
@@ -21,6 +22,10 @@ export function isModelCached(dir?: string): boolean {
21
22
  return MODEL_FILES.every((f) => existsSync(join(d, f)));
22
23
  }
23
24
 
25
+ export function isModelInstalled(modelDir?: string): boolean {
26
+ return isCoreMLInstalled() || isModelCached(modelDir);
27
+ }
28
+
24
29
  export function installHintError(headline: string): Error {
25
30
  const lines = [
26
31
  headline,
@@ -1,28 +0,0 @@
1
- import { describe, test, expect } from "bun:test";
2
- import { convertToFloat32PCM } from "../audio";
3
- import { spawnSync } from "child_process";
4
-
5
- const hasFfmpeg = spawnSync("which", ["ffmpeg"]).status === 0;
6
-
7
- describe.skipIf(!hasFfmpeg)("audio", () => {
8
- test("converts WAV to 16kHz mono Float32Array", async () => {
9
- const buffer = await convertToFloat32PCM("fixtures/silence.wav");
10
- expect(buffer).toBeInstanceOf(Float32Array);
11
- // 1 second at 16kHz = 16000 samples
12
- expect(buffer.length).toBeGreaterThan(15000);
13
- expect(buffer.length).toBeLessThan(17000);
14
- });
15
-
16
- test("throws on missing file", async () => {
17
- expect(convertToFloat32PCM("nonexistent.wav")).rejects.toThrow(
18
- "file not found"
19
- );
20
- });
21
-
22
- test("throws on corrupt file", async () => {
23
- await Bun.write("fixtures/corrupt.bin", "not audio data");
24
- expect(convertToFloat32PCM("fixtures/corrupt.bin")).rejects.toThrow(
25
- "failed to convert audio"
26
- );
27
- });
28
- });