@drakulavich/parakeet-cli 0.3.0 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +48 -25
- package/package.json +1 -1
- package/src/__tests__/coreml.test.ts +28 -0
- package/src/__tests__/models.test.ts +8 -1
- package/src/cli.ts +36 -5
- package/src/coreml.ts +35 -0
- package/src/lib.ts +5 -11
- package/src/models.ts +63 -4
- package/src/transcribe.ts +21 -8
package/README.md
CHANGED
|
@@ -1,13 +1,19 @@
|
|
|
1
1
|
# parakeet-cli
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
[](https://www.npmjs.com/package/@drakulavich/parakeet-cli)
|
|
4
|
+
[](https://github.com/drakulavich/parakeet-cli/actions/workflows/ci.yml)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
[](https://bun.sh)
|
|
7
|
+
|
|
8
|
+
Fast multilingual speech-to-text CLI powered by NVIDIA Parakeet models. Zero Python. CoreML on Apple Silicon, ONNX on CPU.
|
|
4
9
|
|
|
5
10
|
## Features
|
|
6
11
|
|
|
7
12
|
- **25 languages** — automatic language detection, no prompting needed
|
|
8
|
-
-
|
|
9
|
-
- **
|
|
10
|
-
- **
|
|
13
|
+
- **~155x real-time on Apple Silicon** — CoreML backend via [FluidAudio](https://github.com/FluidInference/FluidAudio) (1 min audio in ~0.4s)
|
|
14
|
+
- **3x faster than Whisper** on CPU with ONNX fallback (see [benchmark](#benchmark))
|
|
15
|
+
- **Zero Python** — pure TypeScript/Bun, native Swift binary for CoreML
|
|
16
|
+
- **Smart install** — `parakeet install` auto-detects platform: CoreML on macOS arm64, ONNX elsewhere
|
|
11
17
|
- **Any audio format** — ffmpeg handles OGG, MP3, WAV, FLAC, M4A, etc.
|
|
12
18
|
|
|
13
19
|
## Install
|
|
@@ -38,11 +44,20 @@ bun link
|
|
|
38
44
|
## Usage
|
|
39
45
|
|
|
40
46
|
```bash
|
|
47
|
+
# Download backend (required before first use)
|
|
48
|
+
# On macOS Apple Silicon: downloads CoreML binary
|
|
49
|
+
# On Linux/other: downloads ONNX models (~3GB)
|
|
50
|
+
parakeet install
|
|
51
|
+
|
|
52
|
+
# Force a specific backend
|
|
53
|
+
parakeet install --coreml # CoreML (macOS arm64 only)
|
|
54
|
+
parakeet install --onnx # ONNX (any platform)
|
|
55
|
+
|
|
41
56
|
# Transcribe any audio file (language auto-detected)
|
|
42
57
|
parakeet audio.ogg
|
|
43
58
|
|
|
44
|
-
# Force re-download
|
|
45
|
-
parakeet --no-cache
|
|
59
|
+
# Force re-download
|
|
60
|
+
parakeet install --no-cache
|
|
46
61
|
|
|
47
62
|
# Show version
|
|
48
63
|
parakeet --version
|
|
@@ -52,26 +67,16 @@ Output goes to stdout, errors to stderr. Designed for piping and scripting.
|
|
|
52
67
|
|
|
53
68
|
## Benchmark
|
|
54
69
|
|
|
55
|
-
|
|
56
|
-
VM: AMD EPYC 7763 8C/16T, 64GB RAM, CPU-only.
|
|
70
|
+
10 Telegram voice messages (Russian, 3-10s each) on MacBook Pro M3 Pro:
|
|
57
71
|
|
|
58
|
-
|
|
|
59
|
-
|
|
60
|
-
|
|
|
61
|
-
|
|
|
62
|
-
| 3 | 12.7s | 4.0s | Установи пока Клод Код | Установи пока клот кот |
|
|
63
|
-
| 4 | 13.1s | 4.1s | Какие еще Telegram-юзеры имеют доступ к тебе? | ки еще телеграм юзеры имеют доступ к тебе |
|
|
64
|
-
| 5 | 12.7s | 4.0s | Закомите изменения в ГИТ | Закомить изменения в Гет |
|
|
65
|
-
| 6 | 13.1s | 4.1s | Узнай второго юзера в телеграме. | Узнай второго юзера в Телеграме |
|
|
66
|
-
| 7 | 13.4s | 5.0s | Ты добавил себе в память информацию из Vantage Handbook Репозитория | Ты добавил себе в память информацию из Вентаж хэндбук репозитория |
|
|
67
|
-
| 8 | 13.1s | 4.8s | Покажи его username в телеграмме, хочу написать ему. | жи его юзернейм в телеграме хочу написать ему |
|
|
68
|
-
| 9 | 14.2s | 4.5s | Не нужно посылать сообщение с транскрипцией. Сразу выполняй инструкцию. | жно слать сообщение с транскрипцией сразу выполняй инструкцию |
|
|
69
|
-
| 10 | 13.5s | 4.8s | То, что находится в папке Workspace, ты тоже коммитишь? | То, что находится в папке Воркспейс, ты тоже комитишь? |
|
|
70
|
-
| **Total** | **132.1s** | **43.8s** | | |
|
|
72
|
+
| | faster-whisper (CPU) | Parakeet (CoreML) |
|
|
73
|
+
|---|---|---|
|
|
74
|
+
| **Total time** | 35.3s | 1.9s |
|
|
75
|
+
| **Speedup** | | **~18x faster** |
|
|
71
76
|
|
|
72
|
-
|
|
77
|
+
Models: faster-whisper medium (int8) vs Parakeet TDT 0.6B v3 (CoreML, Apple Neural Engine).
|
|
73
78
|
|
|
74
|
-
|
|
79
|
+
See [BENCHMARK.md](BENCHMARK.md) for full results with transcripts. Updated automatically on each release.
|
|
75
80
|
|
|
76
81
|
## Supported Languages
|
|
77
82
|
|
|
@@ -79,6 +84,22 @@ Bulgarian, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, G
|
|
|
79
84
|
|
|
80
85
|
## How It Works
|
|
81
86
|
|
|
87
|
+
### CoreML backend (macOS Apple Silicon)
|
|
88
|
+
|
|
89
|
+
```
|
|
90
|
+
parakeet audio.ogg
|
|
91
|
+
|
|
|
92
|
+
+-- parakeet-coreml (Swift binary via FluidAudio)
|
|
93
|
+
| +-- CoreML inference on Apple Neural Engine
|
|
94
|
+
| +-- ~155x real-time on M4 Pro
|
|
95
|
+
|
|
|
96
|
+
stdout: transcript
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Uses [FluidAudio](https://github.com/FluidInference/FluidAudio) with the [CoreML model](https://huggingface.co/FluidInference/parakeet-tdt-0.6b-v3-coreml). CoreML model files are downloaded by FluidAudio on first transcription.
|
|
100
|
+
|
|
101
|
+
### ONNX backend (cross-platform fallback)
|
|
102
|
+
|
|
82
103
|
```
|
|
83
104
|
parakeet audio.ogg
|
|
84
105
|
|
|
|
@@ -92,7 +113,7 @@ parakeet audio.ogg
|
|
|
92
113
|
stdout: transcript
|
|
93
114
|
```
|
|
94
115
|
|
|
95
|
-
Uses [NVIDIA Parakeet TDT 0.6B v3](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v3) exported to ONNX by [istupakov](https://huggingface.co/istupakov/parakeet-tdt-0.6b-v3-onnx).
|
|
116
|
+
Uses [NVIDIA Parakeet TDT 0.6B v3](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v3) exported to ONNX by [istupakov](https://huggingface.co/istupakov/parakeet-tdt-0.6b-v3-onnx). Run `parakeet install --onnx` to download models from HuggingFace (~3GB).
|
|
96
117
|
|
|
97
118
|
## Requirements
|
|
98
119
|
|
|
@@ -103,12 +124,13 @@ Uses [NVIDIA Parakeet TDT 0.6B v3](https://huggingface.co/nvidia/parakeet-tdt-0.
|
|
|
103
124
|
|
|
104
125
|
### macOS (Apple Silicon)
|
|
105
126
|
|
|
106
|
-
Works natively on M1/M2/M3/M4. Install dependencies with Homebrew:
|
|
127
|
+
Works natively on M1/M2/M3/M4 with CoreML acceleration. Install dependencies with Homebrew:
|
|
107
128
|
|
|
108
129
|
```bash
|
|
109
130
|
brew install ffmpeg
|
|
110
131
|
curl -fsSL https://bun.sh/install | bash
|
|
111
132
|
bun install -g @drakulavich/parakeet-cli # or: npm install -g @drakulavich/parakeet-cli
|
|
133
|
+
parakeet install # downloads CoreML binary
|
|
112
134
|
```
|
|
113
135
|
|
|
114
136
|
### Linux
|
|
@@ -117,6 +139,7 @@ bun install -g @drakulavich/parakeet-cli # or: npm install -g @drakulavich/pa
|
|
|
117
139
|
apt install ffmpeg # or yum, pacman, etc.
|
|
118
140
|
curl -fsSL https://bun.sh/install | bash
|
|
119
141
|
bun install -g @drakulavich/parakeet-cli # or: npm install -g @drakulavich/parakeet-cli
|
|
142
|
+
parakeet install # downloads ONNX models (~3GB)
|
|
120
143
|
```
|
|
121
144
|
|
|
122
145
|
## OpenClaw Integration
|
package/package.json
CHANGED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { describe, test, expect } from "bun:test";
|
|
2
|
+
import { getCoreMLBinPath, isMacArm64, isCoreMLInstalled } from "../coreml";
|
|
3
|
+
import { join } from "path";
|
|
4
|
+
import { homedir } from "os";
|
|
5
|
+
|
|
6
|
+
describe("coreml", () => {
|
|
7
|
+
test("getCoreMLBinPath returns correct cache path", () => {
|
|
8
|
+
const binPath = getCoreMLBinPath();
|
|
9
|
+
expect(binPath).toBe(
|
|
10
|
+
join(homedir(), ".cache", "parakeet", "coreml", "bin", "parakeet-coreml"),
|
|
11
|
+
);
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
test("isMacArm64 returns a boolean", () => {
|
|
15
|
+
const result = isMacArm64();
|
|
16
|
+
expect(typeof result).toBe("boolean");
|
|
17
|
+
if (process.platform === "darwin" && process.arch === "arm64") {
|
|
18
|
+
expect(result).toBe(true);
|
|
19
|
+
} else {
|
|
20
|
+
expect(result).toBe(false);
|
|
21
|
+
}
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
test("isCoreMLInstalled returns a boolean", () => {
|
|
25
|
+
const result = isCoreMLInstalled();
|
|
26
|
+
expect(typeof result).toBe("boolean");
|
|
27
|
+
});
|
|
28
|
+
});
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { describe, test, expect } from "bun:test";
|
|
2
|
-
import { getModelDir, MODEL_FILES, HF_REPO } from "../models";
|
|
2
|
+
import { getModelDir, MODEL_FILES, HF_REPO, getCoreMLDownloadURL } from "../models";
|
|
3
3
|
import { join } from "path";
|
|
4
4
|
import { homedir } from "os";
|
|
5
5
|
|
|
@@ -20,4 +20,11 @@ describe("models", () => {
|
|
|
20
20
|
test("HF_REPO points to v3 ONNX repo", () => {
|
|
21
21
|
expect(HF_REPO).toBe("istupakov/parakeet-tdt-0.6b-v3-onnx");
|
|
22
22
|
});
|
|
23
|
+
|
|
24
|
+
test("getCoreMLDownloadURL includes version and correct filename", () => {
|
|
25
|
+
const url = getCoreMLDownloadURL("0.5.0");
|
|
26
|
+
expect(url).toBe(
|
|
27
|
+
"https://github.com/drakulavich/parakeet-cli/releases/download/v0.5.0/parakeet-coreml-darwin-arm64"
|
|
28
|
+
);
|
|
29
|
+
});
|
|
23
30
|
});
|
package/src/cli.ts
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
#!/usr/bin/env bun
|
|
2
2
|
|
|
3
3
|
import { transcribe } from "./lib";
|
|
4
|
+
import { downloadModel, downloadCoreML } from "./models";
|
|
5
|
+
import { isMacArm64 } from "./coreml";
|
|
4
6
|
|
|
5
7
|
async function main(): Promise<void> {
|
|
6
8
|
const args = process.argv.slice(2);
|
|
@@ -11,20 +13,49 @@ async function main(): Promise<void> {
|
|
|
11
13
|
process.exit(0);
|
|
12
14
|
}
|
|
13
15
|
|
|
14
|
-
const
|
|
15
|
-
|
|
16
|
+
const positional = args.filter((a) => !a.startsWith("--"));
|
|
17
|
+
|
|
18
|
+
if (positional[0] === "install") {
|
|
19
|
+
const noCache = args.includes("--no-cache");
|
|
20
|
+
const forceCoreML = args.includes("--coreml");
|
|
21
|
+
const forceOnnx = args.includes("--onnx");
|
|
22
|
+
|
|
23
|
+
try {
|
|
24
|
+
if (forceCoreML) {
|
|
25
|
+
if (!isMacArm64()) {
|
|
26
|
+
console.error("Error: CoreML backend is only available on macOS Apple Silicon.");
|
|
27
|
+
process.exit(1);
|
|
28
|
+
}
|
|
29
|
+
await downloadCoreML(noCache);
|
|
30
|
+
} else if (forceOnnx) {
|
|
31
|
+
await downloadModel(noCache);
|
|
32
|
+
} else if (isMacArm64()) {
|
|
33
|
+
await downloadCoreML(noCache);
|
|
34
|
+
} else {
|
|
35
|
+
await downloadModel(noCache);
|
|
36
|
+
}
|
|
37
|
+
} catch (err: unknown) {
|
|
38
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
39
|
+
console.error(`Error: ${message}`);
|
|
40
|
+
process.exit(1);
|
|
41
|
+
}
|
|
42
|
+
process.exit(0);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const file = positional[0];
|
|
16
46
|
|
|
17
47
|
if (!file) {
|
|
18
|
-
console.error("Usage: parakeet [--
|
|
48
|
+
console.error("Usage: parakeet [--version] <audio_file>");
|
|
49
|
+
console.error(" parakeet install [--coreml | --onnx] [--no-cache]");
|
|
19
50
|
process.exit(1);
|
|
20
51
|
}
|
|
21
52
|
|
|
22
53
|
try {
|
|
23
|
-
const text = await transcribe(file
|
|
54
|
+
const text = await transcribe(file);
|
|
24
55
|
if (text) process.stdout.write(text + "\n");
|
|
25
56
|
} catch (err: unknown) {
|
|
26
57
|
const message = err instanceof Error ? err.message : String(err);
|
|
27
|
-
console.error(
|
|
58
|
+
console.error(message);
|
|
28
59
|
process.exit(1);
|
|
29
60
|
}
|
|
30
61
|
}
|
package/src/coreml.ts
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { join } from "path";
|
|
2
|
+
import { homedir } from "os";
|
|
3
|
+
import { existsSync } from "fs";
|
|
4
|
+
|
|
5
|
+
export function isMacArm64(): boolean {
|
|
6
|
+
return process.platform === "darwin" && process.arch === "arm64";
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export function getCoreMLBinPath(): string {
|
|
10
|
+
return join(homedir(), ".cache", "parakeet", "coreml", "bin", "parakeet-coreml");
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export function isCoreMLInstalled(): boolean {
|
|
14
|
+
return isMacArm64() && existsSync(getCoreMLBinPath());
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export async function transcribeCoreML(audioPath: string): Promise<string> {
|
|
18
|
+
const binPath = getCoreMLBinPath();
|
|
19
|
+
const proc = Bun.spawn([binPath, audioPath], {
|
|
20
|
+
stdout: "pipe",
|
|
21
|
+
stderr: "pipe",
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
const [stdout, stderr, exitCode] = await Promise.all([
|
|
25
|
+
new Response(proc.stdout).text(),
|
|
26
|
+
new Response(proc.stderr).text(),
|
|
27
|
+
proc.exited,
|
|
28
|
+
]);
|
|
29
|
+
|
|
30
|
+
if (exitCode !== 0) {
|
|
31
|
+
throw new Error(stderr);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
return stdout.trim();
|
|
35
|
+
}
|
package/src/lib.ts
CHANGED
|
@@ -1,11 +1,9 @@
|
|
|
1
1
|
import { existsSync } from "fs";
|
|
2
|
-
import { transcribe as internalTranscribe } from "./transcribe";
|
|
2
|
+
import { transcribe as internalTranscribe, type TranscribeOptions } from "./transcribe";
|
|
3
|
+
import { downloadModel, downloadCoreML } from "./models";
|
|
3
4
|
|
|
4
|
-
export
|
|
5
|
-
|
|
6
|
-
noCache?: boolean;
|
|
7
|
-
modelDir?: string;
|
|
8
|
-
}
|
|
5
|
+
export type { TranscribeOptions };
|
|
6
|
+
export { downloadModel, downloadCoreML };
|
|
9
7
|
|
|
10
8
|
export async function transcribe(
|
|
11
9
|
audioPath: string,
|
|
@@ -15,9 +13,5 @@ export async function transcribe(
|
|
|
15
13
|
throw new Error(`File not found: ${audioPath}`);
|
|
16
14
|
}
|
|
17
15
|
|
|
18
|
-
return internalTranscribe(audioPath,
|
|
19
|
-
noCache: options.noCache ?? false,
|
|
20
|
-
beamWidth: options.beamWidth,
|
|
21
|
-
modelDir: options.modelDir,
|
|
22
|
-
});
|
|
16
|
+
return internalTranscribe(audioPath, options);
|
|
23
17
|
}
|
package/src/models.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { join } from "path";
|
|
1
|
+
import { join, dirname } from "path";
|
|
2
2
|
import { homedir } from "os";
|
|
3
|
-
import { existsSync, mkdirSync } from "fs";
|
|
3
|
+
import { existsSync, mkdirSync, chmodSync } from "fs";
|
|
4
4
|
|
|
5
5
|
export const HF_REPO = "istupakov/parakeet-tdt-0.6b-v3-onnx";
|
|
6
6
|
|
|
@@ -21,10 +21,34 @@ export function isModelCached(dir?: string): boolean {
|
|
|
21
21
|
return MODEL_FILES.every((f) => existsSync(join(d, f)));
|
|
22
22
|
}
|
|
23
23
|
|
|
24
|
-
export
|
|
24
|
+
export function installHintError(headline: string): Error {
|
|
25
|
+
const lines = [
|
|
26
|
+
headline,
|
|
27
|
+
"",
|
|
28
|
+
"╔══════════════════════════════════════════════════════════╗",
|
|
29
|
+
"║ Please run the following command to get started: ║",
|
|
30
|
+
"║ ║",
|
|
31
|
+
"║ bunx @drakulavich/parakeet-cli install ║",
|
|
32
|
+
"╚══════════════════════════════════════════════════════════╝",
|
|
33
|
+
];
|
|
34
|
+
return new Error(lines.join("\n"));
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export function requireModel(modelDir?: string): string {
|
|
38
|
+
const dir = modelDir ?? getModelDir();
|
|
39
|
+
|
|
40
|
+
if (!isModelCached(dir)) {
|
|
41
|
+
throw installHintError(`Error: Model not found at ${dir}`);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return dir;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export async function downloadModel(noCache = false, modelDir?: string): Promise<string> {
|
|
25
48
|
const dir = modelDir ?? getModelDir();
|
|
26
49
|
|
|
27
|
-
if (!noCache && isModelCached()) {
|
|
50
|
+
if (!noCache && isModelCached(dir)) {
|
|
51
|
+
console.error("Model already downloaded.");
|
|
28
52
|
return dir;
|
|
29
53
|
}
|
|
30
54
|
|
|
@@ -47,5 +71,40 @@ export async function ensureModel(noCache = false, modelDir?: string): Promise<s
|
|
|
47
71
|
await Bun.write(dest, res);
|
|
48
72
|
}
|
|
49
73
|
|
|
74
|
+
console.error("Model downloaded successfully.");
|
|
50
75
|
return dir;
|
|
51
76
|
}
|
|
77
|
+
|
|
78
|
+
export function getCoreMLDownloadURL(version: string): string {
|
|
79
|
+
return `https://github.com/drakulavich/parakeet-cli/releases/download/v${version}/parakeet-coreml-darwin-arm64`;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export async function downloadCoreML(noCache = false): Promise<string> {
|
|
83
|
+
const { getCoreMLBinPath } = await import("./coreml");
|
|
84
|
+
const binPath = getCoreMLBinPath();
|
|
85
|
+
|
|
86
|
+
if (!noCache && existsSync(binPath)) {
|
|
87
|
+
console.error("CoreML backend already installed.");
|
|
88
|
+
return binPath;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const pkg = await Bun.file(new URL("../package.json", import.meta.url)).json();
|
|
92
|
+
const url = getCoreMLDownloadURL(pkg.version);
|
|
93
|
+
|
|
94
|
+
console.error("Downloading parakeet-coreml binary...");
|
|
95
|
+
|
|
96
|
+
const res = await fetch(url, { redirect: "follow" });
|
|
97
|
+
|
|
98
|
+
if (!res.ok) {
|
|
99
|
+
throw new Error(`Failed to download CoreML binary: ${url} (${res.status})`);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
mkdirSync(dirname(binPath), { recursive: true });
|
|
103
|
+
|
|
104
|
+
await Bun.write(binPath, res);
|
|
105
|
+
|
|
106
|
+
chmodSync(binPath, 0o755);
|
|
107
|
+
|
|
108
|
+
console.error("CoreML backend installed successfully.");
|
|
109
|
+
return binPath;
|
|
110
|
+
}
|
package/src/transcribe.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { requireModel, isModelCached, installHintError } from "./models";
|
|
2
|
+
import { isCoreMLInstalled, transcribeCoreML } from "./coreml";
|
|
2
3
|
import { convertToFloat32PCM } from "./audio";
|
|
3
4
|
import { initPreprocessor, preprocess } from "./preprocess";
|
|
4
5
|
import { initEncoder, encode } from "./encoder";
|
|
@@ -25,7 +26,6 @@ const DECODER_LAYERS = 2;
|
|
|
25
26
|
const DECODER_HIDDEN = 640;
|
|
26
27
|
|
|
27
28
|
export interface TranscribeOptions {
|
|
28
|
-
noCache?: boolean;
|
|
29
29
|
beamWidth?: number;
|
|
30
30
|
modelDir?: string;
|
|
31
31
|
}
|
|
@@ -34,20 +34,33 @@ export interface TranscribeOptions {
|
|
|
34
34
|
const MIN_AUDIO_SAMPLES = 1600;
|
|
35
35
|
|
|
36
36
|
export async function transcribe(audioPath: string, opts: TranscribeOptions = {}): Promise<string> {
|
|
37
|
+
if (isCoreMLInstalled()) {
|
|
38
|
+
return transcribeCoreML(audioPath);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if (isModelCached(opts.modelDir)) {
|
|
42
|
+
return transcribeOnnx(audioPath, opts);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
throw installHintError("Error: No transcription backend is installed");
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
async function transcribeOnnx(audioPath: string, opts: TranscribeOptions): Promise<string> {
|
|
37
49
|
const audio = await convertToFloat32PCM(audioPath);
|
|
38
50
|
|
|
39
51
|
if (audio.length < MIN_AUDIO_SAMPLES) {
|
|
40
52
|
return "";
|
|
41
53
|
}
|
|
42
54
|
|
|
43
|
-
const noCache = opts.noCache ?? false;
|
|
44
55
|
const beamWidth = opts.beamWidth ?? 4;
|
|
45
|
-
const modelDir =
|
|
46
|
-
const tokenizer = await Tokenizer.fromFile(join(modelDir, "vocab.txt"));
|
|
56
|
+
const modelDir = requireModel(opts.modelDir);
|
|
47
57
|
|
|
48
|
-
await
|
|
49
|
-
|
|
50
|
-
|
|
58
|
+
const [tokenizer] = await Promise.all([
|
|
59
|
+
Tokenizer.fromFile(join(modelDir, "vocab.txt")),
|
|
60
|
+
initPreprocessor(modelDir),
|
|
61
|
+
initEncoder(modelDir),
|
|
62
|
+
initDecoder(modelDir),
|
|
63
|
+
]);
|
|
51
64
|
|
|
52
65
|
const { features, length } = await preprocess(audio);
|
|
53
66
|
const { encoderOutput, encodedLength } = await encode(features, length);
|