@drakulavich/parakeet-cli 0.7.4 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/bin/parakeet.js +3 -1
- package/package.json +5 -3
- package/src/audio.ts +4 -1
- package/src/cli.ts +182 -41
- package/src/coreml-install.ts +25 -14
- package/src/lib.ts +1 -1
- package/src/onnx-install.ts +11 -19
- package/src/progress.ts +80 -0
- package/src/status.ts +175 -0
- package/src/transcribe.ts +10 -1
- package/src/__tests__/audio.test.ts +0 -36
- package/src/__tests__/benchmark-report.test.ts +0 -67
- package/src/__tests__/coreml-install.test.ts +0 -281
- package/src/__tests__/coreml.test.ts +0 -24
- package/src/__tests__/decoder.test.ts +0 -50
- package/src/__tests__/lib.test.ts +0 -8
- package/src/__tests__/tokenizer.test.ts +0 -41
package/README.md
CHANGED
package/bin/parakeet.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@drakulavich/parakeet-cli",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.8.1",
|
|
4
4
|
"description": "Fast local speech-to-text CLI. CoreML on Apple Silicon, ONNX on CPU. 25 languages.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
],
|
|
21
21
|
"scripts": {
|
|
22
22
|
"test": "bun test",
|
|
23
|
-
"test:unit": "bun test
|
|
23
|
+
"test:unit": "bun test tests/unit/",
|
|
24
24
|
"test:integration": "bun test tests/integration/"
|
|
25
25
|
},
|
|
26
26
|
"keywords": [
|
|
@@ -53,7 +53,9 @@
|
|
|
53
53
|
"typescript": "^6.0.2"
|
|
54
54
|
},
|
|
55
55
|
"dependencies": {
|
|
56
|
+
"citty": "^0.2.2",
|
|
56
57
|
"onnxruntime-node": "^1.24.0",
|
|
57
|
-
"picocolors": "^1.1.1"
|
|
58
|
+
"picocolors": "^1.1.1",
|
|
59
|
+
"tinyld": "^1.3.4"
|
|
58
60
|
}
|
|
59
61
|
}
|
package/src/audio.ts
CHANGED
|
@@ -55,7 +55,10 @@ async function convertAudioWithFfmpeg(
|
|
|
55
55
|
]);
|
|
56
56
|
|
|
57
57
|
if (exitCode !== 0) {
|
|
58
|
-
|
|
58
|
+
const lastLine = stderr.trim().split("\n").pop() ?? "unknown error";
|
|
59
|
+
throw new Error(
|
|
60
|
+
`Audio conversion failed: ${lastLine}\n File: ${inputPath}\n Fix: Ensure the file is a valid audio format. Run "ffmpeg -i ${inputPath}" to diagnose.`,
|
|
61
|
+
);
|
|
59
62
|
}
|
|
60
63
|
|
|
61
64
|
return tmpPath;
|
package/src/cli.ts
CHANGED
|
@@ -1,60 +1,74 @@
|
|
|
1
1
|
#!/usr/bin/env bun
|
|
2
2
|
|
|
3
|
+
import { defineCommand, runMain } from "citty";
|
|
4
|
+
import { detect } from "tinyld";
|
|
3
5
|
import { transcribe } from "./lib";
|
|
4
6
|
import { downloadModel } from "./onnx-install";
|
|
5
7
|
import { downloadCoreML } from "./coreml-install";
|
|
6
8
|
import { isMacArm64 } from "./coreml";
|
|
7
9
|
import { log } from "./log";
|
|
10
|
+
import { showStatus } from "./status";
|
|
8
11
|
|
|
9
|
-
|
|
10
|
-
|
|
12
|
+
export function detectLanguage(text: string): string {
|
|
13
|
+
if (!text) return "";
|
|
14
|
+
return detect(text);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export function checkLanguageMismatch(expected: string | undefined, detected: string): string | null {
|
|
18
|
+
if (!expected || !detected || expected === detected) return null;
|
|
19
|
+
return `warning: expected language "${expected}" but detected "${detected}"`;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface InstallOptions {
|
|
23
|
+
coreml: boolean;
|
|
24
|
+
onnx: boolean;
|
|
25
|
+
noCache: boolean;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
interface InstallCommandArgs {
|
|
29
|
+
coreml: boolean;
|
|
30
|
+
onnx: boolean;
|
|
31
|
+
"no-cache": boolean;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
interface MainCommandArgs {
|
|
35
|
+
_: string[];
|
|
36
|
+
json: boolean;
|
|
37
|
+
lang?: string;
|
|
38
|
+
}
|
|
11
39
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
40
|
+
const pkg = await Bun.file(new URL("../package.json", import.meta.url)).json();
|
|
41
|
+
|
|
42
|
+
export function resolveInstallBackend(options: InstallOptions, macArm64 = isMacArm64()): "coreml" | "onnx" {
|
|
43
|
+
const { coreml, onnx } = options;
|
|
44
|
+
|
|
45
|
+
if (coreml && onnx) {
|
|
46
|
+
throw new Error('Choose only one backend: "--coreml" or "--onnx".');
|
|
16
47
|
}
|
|
17
48
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
const noCache = args.includes("--no-cache");
|
|
22
|
-
const forceCoreML = args.includes("--coreml");
|
|
23
|
-
const forceOnnx = args.includes("--onnx");
|
|
24
|
-
|
|
25
|
-
try {
|
|
26
|
-
if (forceCoreML) {
|
|
27
|
-
if (!isMacArm64()) {
|
|
28
|
-
log.error("CoreML backend is only available on macOS Apple Silicon.");
|
|
29
|
-
process.exit(1);
|
|
30
|
-
}
|
|
31
|
-
await downloadCoreML(noCache);
|
|
32
|
-
} else if (forceOnnx) {
|
|
33
|
-
await downloadModel(noCache);
|
|
34
|
-
} else if (isMacArm64()) {
|
|
35
|
-
await downloadCoreML(noCache);
|
|
36
|
-
} else {
|
|
37
|
-
await downloadModel(noCache);
|
|
38
|
-
}
|
|
39
|
-
} catch (err: unknown) {
|
|
40
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
41
|
-
log.error(message);
|
|
42
|
-
process.exit(1);
|
|
49
|
+
if (coreml) {
|
|
50
|
+
if (!macArm64) {
|
|
51
|
+
throw new Error("CoreML backend is only available on macOS Apple Silicon.");
|
|
43
52
|
}
|
|
44
|
-
|
|
53
|
+
return "coreml";
|
|
45
54
|
}
|
|
46
55
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
if (!file) {
|
|
50
|
-
log.info("Usage: parakeet [--version] <audio_file>");
|
|
51
|
-
log.info(" parakeet install [--coreml | --onnx] [--no-cache]");
|
|
52
|
-
process.exit(1);
|
|
56
|
+
if (onnx) {
|
|
57
|
+
return "onnx";
|
|
53
58
|
}
|
|
54
59
|
|
|
60
|
+
return macArm64 ? "coreml" : "onnx";
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
async function performInstall(options: InstallOptions) {
|
|
64
|
+
const { noCache } = options;
|
|
55
65
|
try {
|
|
56
|
-
const
|
|
57
|
-
if (
|
|
66
|
+
const backend = resolveInstallBackend(options);
|
|
67
|
+
if (backend === "coreml") {
|
|
68
|
+
await downloadCoreML(noCache);
|
|
69
|
+
} else {
|
|
70
|
+
await downloadModel(noCache);
|
|
71
|
+
}
|
|
58
72
|
} catch (err: unknown) {
|
|
59
73
|
const message = err instanceof Error ? err.message : String(err);
|
|
60
74
|
log.error(message);
|
|
@@ -62,4 +76,131 @@ async function main(): Promise<void> {
|
|
|
62
76
|
}
|
|
63
77
|
}
|
|
64
78
|
|
|
65
|
-
|
|
79
|
+
export const installCommand = defineCommand({
|
|
80
|
+
meta: {
|
|
81
|
+
name: "install",
|
|
82
|
+
description: "Download speech-to-text models",
|
|
83
|
+
},
|
|
84
|
+
args: {
|
|
85
|
+
coreml: {
|
|
86
|
+
type: "boolean",
|
|
87
|
+
description: "Force CoreML backend (macOS arm64)",
|
|
88
|
+
default: false,
|
|
89
|
+
},
|
|
90
|
+
onnx: {
|
|
91
|
+
type: "boolean",
|
|
92
|
+
description: "Force ONNX backend",
|
|
93
|
+
default: false,
|
|
94
|
+
},
|
|
95
|
+
"no-cache": {
|
|
96
|
+
type: "boolean",
|
|
97
|
+
description: "Re-download even if cached",
|
|
98
|
+
default: false,
|
|
99
|
+
},
|
|
100
|
+
},
|
|
101
|
+
async run({ args }: { args: InstallCommandArgs }) {
|
|
102
|
+
await performInstall({ coreml: args.coreml, onnx: args.onnx, noCache: args["no-cache"] });
|
|
103
|
+
},
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
export const statusCommand = defineCommand({
|
|
107
|
+
meta: {
|
|
108
|
+
name: "status",
|
|
109
|
+
description: "Show backend installation status",
|
|
110
|
+
},
|
|
111
|
+
async run() {
|
|
112
|
+
await showStatus();
|
|
113
|
+
},
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
export const mainCommand = defineCommand({
|
|
117
|
+
meta: {
|
|
118
|
+
name: "parakeet",
|
|
119
|
+
version: pkg.version,
|
|
120
|
+
description:
|
|
121
|
+
"Fast local speech-to-text. 25 languages. CoreML on Apple Silicon, ONNX on CPU.\n" +
|
|
122
|
+
" Run 'parakeet install [--coreml | --onnx] [--no-cache]' to download models.\n" +
|
|
123
|
+
" Run 'parakeet status' to inspect installed backends.",
|
|
124
|
+
},
|
|
125
|
+
args: {
|
|
126
|
+
json: {
|
|
127
|
+
type: "boolean",
|
|
128
|
+
description: "Output results as JSON",
|
|
129
|
+
default: false,
|
|
130
|
+
},
|
|
131
|
+
lang: {
|
|
132
|
+
type: "string",
|
|
133
|
+
description: "Expected language code (ISO 639-1), warn if mismatch",
|
|
134
|
+
},
|
|
135
|
+
},
|
|
136
|
+
async run({ args }: { args: MainCommandArgs }) {
|
|
137
|
+
const files = args._;
|
|
138
|
+
|
|
139
|
+
if (files.length === 0) {
|
|
140
|
+
log.info("Usage: parakeet <audio_file> [audio_file ...]\n parakeet install [--coreml | --onnx] [--no-cache]\n parakeet status");
|
|
141
|
+
process.exit(1);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
let hasError = false;
|
|
145
|
+
const results: TranscribeResult[] = [];
|
|
146
|
+
|
|
147
|
+
for (const file of files) {
|
|
148
|
+
try {
|
|
149
|
+
const text = await transcribe(file);
|
|
150
|
+
const lang = detectLanguage(text);
|
|
151
|
+
|
|
152
|
+
const mismatchWarning = checkLanguageMismatch(args.lang, lang);
|
|
153
|
+
if (mismatchWarning) log.warn(`${file}: ${mismatchWarning}`);
|
|
154
|
+
|
|
155
|
+
results.push({ file, text, lang });
|
|
156
|
+
} catch (err: unknown) {
|
|
157
|
+
hasError = true;
|
|
158
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
159
|
+
log.error(`${file}: ${message}`);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
if (args.json) {
|
|
164
|
+
process.stdout.write(formatJsonOutput(results));
|
|
165
|
+
} else {
|
|
166
|
+
process.stdout.write(formatTextOutput(results));
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
if (hasError) process.exit(1);
|
|
170
|
+
},
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
export async function runCli(rawArgs = process.argv.slice(2)): Promise<void> {
|
|
174
|
+
const [firstArg, ...restArgs] = rawArgs;
|
|
175
|
+
|
|
176
|
+
if (firstArg === "install") {
|
|
177
|
+
await runMain(installCommand, { rawArgs: restArgs });
|
|
178
|
+
return;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
if (firstArg === "status") {
|
|
182
|
+
await runMain(statusCommand, { rawArgs: restArgs });
|
|
183
|
+
return;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
await runMain(mainCommand, { rawArgs });
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
export type TranscribeResult = { file: string; text: string; lang: string };
|
|
190
|
+
|
|
191
|
+
export function formatTextOutput(results: TranscribeResult[]): string {
|
|
192
|
+
if (results.length === 1) {
|
|
193
|
+
return results[0].text + "\n";
|
|
194
|
+
}
|
|
195
|
+
return results
|
|
196
|
+
.map((r, i) => (i > 0 ? "\n" : "") + `=== ${r.file} ===\n${r.text}\n`)
|
|
197
|
+
.join("");
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
export function formatJsonOutput(results: TranscribeResult[]): string {
|
|
201
|
+
return JSON.stringify(results, null, 2) + "\n";
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
if (import.meta.main) {
|
|
205
|
+
await runCli();
|
|
206
|
+
}
|
package/src/coreml-install.ts
CHANGED
|
@@ -3,6 +3,7 @@ import { homedir } from "os";
|
|
|
3
3
|
import { existsSync, mkdirSync, chmodSync } from "fs";
|
|
4
4
|
import { getCoreMLBinPath } from "./coreml";
|
|
5
5
|
import { log } from "./log";
|
|
6
|
+
import { streamResponseToFile } from "./progress";
|
|
6
7
|
|
|
7
8
|
const COREML_BINARY_NAME = "parakeet-coreml-darwin-arm64";
|
|
8
9
|
const GITHUB_REPO = "drakulavich/parakeet-cli";
|
|
@@ -84,6 +85,18 @@ export function getCoreMLLatestDownloadURL(): string {
|
|
|
84
85
|
return `https://github.com/${GITHUB_REPO}/releases/latest/download/${COREML_BINARY_NAME}`;
|
|
85
86
|
}
|
|
86
87
|
|
|
88
|
+
export function isUnreleasedVersion(version: string): boolean {
|
|
89
|
+
return version === "0.0.0" || version.includes("-");
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export function getCoreMLBinaryDownloadCandidates(version: string): string[] {
|
|
93
|
+
const versionUrl = getCoreMLDownloadURL(version);
|
|
94
|
+
if (isUnreleasedVersion(version)) {
|
|
95
|
+
return [getCoreMLLatestDownloadURL(), versionUrl];
|
|
96
|
+
}
|
|
97
|
+
return [versionUrl];
|
|
98
|
+
}
|
|
99
|
+
|
|
87
100
|
export function getCoreMLInstallState(opts?: {
|
|
88
101
|
binPath?: string;
|
|
89
102
|
exists?: (path: string) => boolean;
|
|
@@ -164,22 +177,21 @@ export function classifyCoreMLInstallProbe(exitCode: number, stdout: string): Co
|
|
|
164
177
|
}
|
|
165
178
|
|
|
166
179
|
async function fetchCoreMLBinary(): Promise<Response> {
|
|
167
|
-
const latestUrl = getCoreMLLatestDownloadURL();
|
|
168
|
-
let res = await fetch(latestUrl, { redirect: "follow" });
|
|
169
|
-
|
|
170
|
-
if (res.ok) {
|
|
171
|
-
return res;
|
|
172
|
-
}
|
|
173
|
-
|
|
174
180
|
const pkg = await Bun.file(new URL("../package.json", import.meta.url)).json();
|
|
175
|
-
const
|
|
176
|
-
res = await fetch(versionUrl, { redirect: "follow" });
|
|
181
|
+
const version = typeof pkg.version === "string" ? pkg.version : "unknown";
|
|
177
182
|
|
|
178
|
-
|
|
179
|
-
|
|
183
|
+
let lastStatus: number | null = null;
|
|
184
|
+
for (const url of getCoreMLBinaryDownloadCandidates(version)) {
|
|
185
|
+
const res = await fetch(url, { redirect: "follow" });
|
|
186
|
+
if (res.ok) {
|
|
187
|
+
return res;
|
|
188
|
+
}
|
|
189
|
+
lastStatus = res.status;
|
|
180
190
|
}
|
|
181
191
|
|
|
182
|
-
|
|
192
|
+
throw new Error(
|
|
193
|
+
`Failed to download CoreML binary${lastStatus ? ` (HTTP ${lastStatus})` : ""}\n Requested package version: ${version}\n Fix: Check https://github.com/drakulavich/parakeet-cli/releases for available versions\n Or install the ONNX backend instead: parakeet install --onnx`,
|
|
194
|
+
);
|
|
183
195
|
}
|
|
184
196
|
|
|
185
197
|
export function getCoreMLInstallStatus(
|
|
@@ -232,10 +244,9 @@ export async function downloadCoreML(
|
|
|
232
244
|
}
|
|
233
245
|
|
|
234
246
|
if (plan.downloadBinary) {
|
|
235
|
-
log.progress("Downloading parakeet-coreml binary...");
|
|
236
247
|
const res = await fetchCoreMLBinary();
|
|
237
248
|
mkdirSync(dirname(binPath), { recursive: true });
|
|
238
|
-
await
|
|
249
|
+
await streamResponseToFile(res, binPath, "parakeet-coreml binary");
|
|
239
250
|
chmodSync(binPath, 0o755);
|
|
240
251
|
}
|
|
241
252
|
|
package/src/lib.ts
CHANGED
package/src/onnx-install.ts
CHANGED
|
@@ -2,6 +2,7 @@ import { join } from "path";
|
|
|
2
2
|
import { homedir } from "os";
|
|
3
3
|
import { existsSync, mkdirSync } from "fs";
|
|
4
4
|
import { log } from "./log";
|
|
5
|
+
import { streamResponseToFile } from "./progress";
|
|
5
6
|
|
|
6
7
|
export const HF_REPO = "istupakov/parakeet-tdt-0.6b-v3-onnx";
|
|
7
8
|
|
|
@@ -61,36 +62,27 @@ export async function downloadModel(noCache = false, modelDir?: string): Promise
|
|
|
61
62
|
|
|
62
63
|
if (!noCache && existsSync(dest)) continue;
|
|
63
64
|
|
|
64
|
-
log.progress(`Downloading ${file}...`);
|
|
65
|
-
|
|
66
65
|
let res: Response;
|
|
67
66
|
try {
|
|
68
67
|
res = await fetch(url, { redirect: "follow" });
|
|
69
68
|
} catch (e) {
|
|
70
|
-
throw new Error(
|
|
69
|
+
throw new Error(
|
|
70
|
+
`Failed to fetch ${file}: ${e instanceof Error ? e.message : e}\n Fix: Check your network connection and try again`,
|
|
71
|
+
);
|
|
71
72
|
}
|
|
72
73
|
|
|
73
74
|
if (!res.ok) {
|
|
74
|
-
throw new Error(
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
if (!res.body) {
|
|
78
|
-
throw new Error(`empty response body for ${file}`);
|
|
75
|
+
throw new Error(
|
|
76
|
+
`Failed to download ${file}: HTTP ${res.status}\n Fix: Check your network connection or try again with --no-cache`,
|
|
77
|
+
);
|
|
79
78
|
}
|
|
80
79
|
|
|
81
|
-
const
|
|
82
|
-
let bytes = 0;
|
|
83
|
-
try {
|
|
84
|
-
for await (const chunk of res.body) {
|
|
85
|
-
writer.write(chunk);
|
|
86
|
-
bytes += chunk.length;
|
|
87
|
-
}
|
|
88
|
-
} finally {
|
|
89
|
-
writer.end();
|
|
90
|
-
}
|
|
80
|
+
const bytes = await streamResponseToFile(res, dest, file);
|
|
91
81
|
|
|
92
82
|
if (bytes === 0) {
|
|
93
|
-
throw new Error(
|
|
83
|
+
throw new Error(
|
|
84
|
+
`Downloaded 0 bytes for ${file}\n Fix: Try again — the server may be temporarily unavailable`,
|
|
85
|
+
);
|
|
94
86
|
}
|
|
95
87
|
}
|
|
96
88
|
|
package/src/progress.ts
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import { log } from "./log";
|
|
2
|
+
|
|
3
|
+
const BAR_WIDTH = 20;
|
|
4
|
+
|
|
5
|
+
export function formatBytes(bytes: number): string {
|
|
6
|
+
return `${(bytes / 1024 / 1024).toFixed(1)}MB`;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export function formatProgressBar(label: string, downloaded: number, total: number): string {
|
|
10
|
+
const pct = total <= 0 ? 0 : Math.min(100, Math.floor((downloaded / total) * 100));
|
|
11
|
+
const filled = Math.round((pct / 100) * BAR_WIDTH);
|
|
12
|
+
const empty = BAR_WIDTH - filled;
|
|
13
|
+
const bar = "█".repeat(filled) + "░".repeat(empty);
|
|
14
|
+
return `${label} [${bar}] ${pct}% ${formatBytes(downloaded)}/${formatBytes(total)}`;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export async function streamResponseToFile(
|
|
18
|
+
res: Response,
|
|
19
|
+
destPath: string,
|
|
20
|
+
label: string,
|
|
21
|
+
): Promise<number> {
|
|
22
|
+
if (!res.body) {
|
|
23
|
+
throw new Error(
|
|
24
|
+
`Download failed: empty response for ${label}\n Fix: Try again — the server may be temporarily unavailable`,
|
|
25
|
+
);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const totalBytes = Number(res.headers.get("content-length") || 0);
|
|
29
|
+
const progress = createProgressBar(label, totalBytes);
|
|
30
|
+
|
|
31
|
+
const writer = Bun.file(destPath).writer();
|
|
32
|
+
let bytes = 0;
|
|
33
|
+
try {
|
|
34
|
+
for await (const chunk of res.body) {
|
|
35
|
+
writer.write(chunk);
|
|
36
|
+
bytes += chunk.length;
|
|
37
|
+
progress.update(chunk.length);
|
|
38
|
+
}
|
|
39
|
+
} finally {
|
|
40
|
+
writer.end();
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
progress.finish();
|
|
44
|
+
return bytes;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export function createProgressBar(label: string, totalBytes: number): {
|
|
48
|
+
update(downloadedBytes: number): void;
|
|
49
|
+
finish(): void;
|
|
50
|
+
} {
|
|
51
|
+
const isTTY = process.stderr.isTTY;
|
|
52
|
+
|
|
53
|
+
if (!isTTY || totalBytes <= 0) {
|
|
54
|
+
const sizeInfo = totalBytes > 0 ? ` (${formatBytes(totalBytes)})` : "";
|
|
55
|
+
log.progress(`Downloading ${label}${sizeInfo}...`);
|
|
56
|
+
return {
|
|
57
|
+
update() {},
|
|
58
|
+
finish() {
|
|
59
|
+
log.success(`Downloaded ${label} ✓`);
|
|
60
|
+
},
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
let current = 0;
|
|
65
|
+
let lastPct = -1;
|
|
66
|
+
return {
|
|
67
|
+
update(downloadedBytes: number) {
|
|
68
|
+
current += downloadedBytes;
|
|
69
|
+
const pct = totalBytes > 0 ? Math.floor((current / totalBytes) * 100) : 0;
|
|
70
|
+
if (pct === lastPct) return;
|
|
71
|
+
lastPct = pct;
|
|
72
|
+
const line = formatProgressBar(label, current, totalBytes);
|
|
73
|
+
process.stderr.write(`\r${line}`);
|
|
74
|
+
},
|
|
75
|
+
finish() {
|
|
76
|
+
const line = formatProgressBar(label, totalBytes, totalBytes);
|
|
77
|
+
process.stderr.write(`\r${line}\n`);
|
|
78
|
+
},
|
|
79
|
+
};
|
|
80
|
+
}
|
package/src/status.ts
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
import { isMacArm64, getCoreMLBinPath } from "./coreml";
|
|
2
|
+
import { isModelCached, getModelDir } from "./onnx-install";
|
|
3
|
+
import { getCoreMLInstallState, getCoreMLInstallStatus, getCoreMLSupportDir, type CoreMLInstallState } from "./coreml-install";
|
|
4
|
+
import { log } from "./log";
|
|
5
|
+
import pc from "picocolors";
|
|
6
|
+
|
|
7
|
+
export type StatusCoreMLState = CoreMLInstallState | "n/a" | "probe-failed";
|
|
8
|
+
export type StatusPlatform = "mac-arm64" | "other";
|
|
9
|
+
|
|
10
|
+
export function formatStatusLine(
|
|
11
|
+
label: string,
|
|
12
|
+
path: string | null,
|
|
13
|
+
installed: boolean,
|
|
14
|
+
missingLabel = "not installed",
|
|
15
|
+
): string {
|
|
16
|
+
const status = installed ? pc.green("✓") : pc.red(`✗ ${missingLabel}`);
|
|
17
|
+
const pathStr = path ?? "";
|
|
18
|
+
const padding = " ".repeat(Math.max(1, 50 - label.length - pathStr.length));
|
|
19
|
+
return ` ${label}:${pathStr ? ` ${pathStr}` : ""}${padding}${status}`;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface StatusInfo {
|
|
23
|
+
onnx: boolean;
|
|
24
|
+
coreml: StatusCoreMLState;
|
|
25
|
+
ffmpeg: boolean;
|
|
26
|
+
platform: StatusPlatform;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export function collectSuggestions(info: StatusInfo): string[] {
|
|
30
|
+
const suggestions: string[] = [];
|
|
31
|
+
|
|
32
|
+
if (info.platform === "mac-arm64") {
|
|
33
|
+
if (info.coreml === "missing") {
|
|
34
|
+
suggestions.push(`Run "parakeet install --coreml" to install the CoreML backend.`);
|
|
35
|
+
} else if (info.coreml === "binary-only") {
|
|
36
|
+
suggestions.push(`Run "parakeet install --coreml" to download CoreML models.`);
|
|
37
|
+
} else if (info.coreml === "stale-binary") {
|
|
38
|
+
suggestions.push(`Run "parakeet install --coreml --no-cache" to refresh the incompatible CoreML binary.`);
|
|
39
|
+
} else if (info.coreml === "probe-failed") {
|
|
40
|
+
suggestions.push(`Run "parakeet install --coreml --no-cache" to refresh the CoreML backend and restore status checks.`);
|
|
41
|
+
}
|
|
42
|
+
return suggestions;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (!info.onnx) {
|
|
46
|
+
suggestions.push(`Run "parakeet install --onnx" to install the ONNX backend.`);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
if (!info.ffmpeg) {
|
|
50
|
+
suggestions.push(`Install ffmpeg for ONNX audio conversion (see "parakeet install" output for instructions).`);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
return suggestions;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export interface StatusDeps {
|
|
57
|
+
isMacArm64: () => boolean;
|
|
58
|
+
getCoreMLBinPath: () => string;
|
|
59
|
+
getCoreMLState: (binPath: string) => CoreMLInstallState;
|
|
60
|
+
getCoreMLSupportDir: () => string;
|
|
61
|
+
isModelCached: () => boolean;
|
|
62
|
+
getModelDir: () => string;
|
|
63
|
+
whichFfmpeg: () => string | null;
|
|
64
|
+
bunVersion: string;
|
|
65
|
+
platform: string;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function defaultDeps(): StatusDeps {
|
|
69
|
+
return {
|
|
70
|
+
isMacArm64,
|
|
71
|
+
getCoreMLBinPath,
|
|
72
|
+
getCoreMLState: (binPath) => getCoreMLInstallState({
|
|
73
|
+
binPath,
|
|
74
|
+
verifyReady: (path) => getCoreMLInstallStatus(path),
|
|
75
|
+
}),
|
|
76
|
+
getCoreMLSupportDir,
|
|
77
|
+
isModelCached,
|
|
78
|
+
getModelDir,
|
|
79
|
+
whichFfmpeg: () => Bun.which("ffmpeg"),
|
|
80
|
+
bunVersion: Bun.version,
|
|
81
|
+
platform: `${process.platform} ${process.arch}`,
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function getCoreMLBinaryDisplay(state: StatusCoreMLState): { installed: boolean; missingLabel: string } {
|
|
86
|
+
switch (state) {
|
|
87
|
+
case "ready":
|
|
88
|
+
case "binary-only":
|
|
89
|
+
return { installed: true, missingLabel: "not installed" };
|
|
90
|
+
case "stale-binary":
|
|
91
|
+
return { installed: false, missingLabel: "stale binary" };
|
|
92
|
+
case "probe-failed":
|
|
93
|
+
return { installed: false, missingLabel: "probe failed" };
|
|
94
|
+
case "missing":
|
|
95
|
+
case "n/a":
|
|
96
|
+
return { installed: false, missingLabel: "not installed" };
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function getCoreMLModelsDisplay(state: StatusCoreMLState): { installed: boolean; missingLabel: string } {
|
|
101
|
+
switch (state) {
|
|
102
|
+
case "ready":
|
|
103
|
+
return { installed: true, missingLabel: "not installed" };
|
|
104
|
+
case "stale-binary":
|
|
105
|
+
return { installed: false, missingLabel: "reinstall required" };
|
|
106
|
+
case "probe-failed":
|
|
107
|
+
return { installed: false, missingLabel: "status unknown" };
|
|
108
|
+
case "binary-only":
|
|
109
|
+
case "missing":
|
|
110
|
+
case "n/a":
|
|
111
|
+
return { installed: false, missingLabel: "not installed" };
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
export async function showStatus(deps?: Partial<StatusDeps>): Promise<void> {
|
|
116
|
+
const d = { ...defaultDeps(), ...deps };
|
|
117
|
+
|
|
118
|
+
const isMac = d.isMacArm64();
|
|
119
|
+
const platform: StatusPlatform = isMac ? "mac-arm64" : "other";
|
|
120
|
+
|
|
121
|
+
// CoreML status
|
|
122
|
+
let coremlState: StatusCoreMLState = "n/a";
|
|
123
|
+
let coremlProbeError: string | null = null;
|
|
124
|
+
if (isMac) {
|
|
125
|
+
const binPath = d.getCoreMLBinPath();
|
|
126
|
+
try {
|
|
127
|
+
coremlState = d.getCoreMLState(binPath);
|
|
128
|
+
} catch (error: unknown) {
|
|
129
|
+
coremlState = "probe-failed";
|
|
130
|
+
coremlProbeError = error instanceof Error ? error.message : String(error);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
log.info("CoreML (macOS Apple Silicon):");
|
|
134
|
+
const binaryDisplay = getCoreMLBinaryDisplay(coremlState);
|
|
135
|
+
log.info(formatStatusLine("Binary", coremlState === "missing" ? null : binPath, binaryDisplay.installed, binaryDisplay.missingLabel));
|
|
136
|
+
|
|
137
|
+
const modelsDisplay = getCoreMLModelsDisplay(coremlState);
|
|
138
|
+
const modelDir = d.getCoreMLSupportDir();
|
|
139
|
+
const modelsPath = (coremlState === "ready" || coremlState === "stale-binary" || coremlState === "probe-failed") ? modelDir : null;
|
|
140
|
+
log.info(formatStatusLine("Models", modelsPath, modelsDisplay.installed, modelsDisplay.missingLabel));
|
|
141
|
+
log.info("");
|
|
142
|
+
|
|
143
|
+
if (coremlProbeError) {
|
|
144
|
+
log.warn(`CoreML status probe failed: ${coremlProbeError}`);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// ONNX status
|
|
149
|
+
const modelDir = d.getModelDir();
|
|
150
|
+
const onnxInstalled = d.isModelCached();
|
|
151
|
+
log.info("ONNX:");
|
|
152
|
+
log.info(formatStatusLine("Models", onnxInstalled ? modelDir : null, onnxInstalled));
|
|
153
|
+
log.info("");
|
|
154
|
+
|
|
155
|
+
// ffmpeg
|
|
156
|
+
const ffmpegPath = d.whichFfmpeg();
|
|
157
|
+
log.info(formatStatusLine("ffmpeg", ffmpegPath, !!ffmpegPath, "not found"));
|
|
158
|
+
|
|
159
|
+
// Runtime info
|
|
160
|
+
log.info(formatStatusLine("Runtime", `Bun ${d.bunVersion}`, true));
|
|
161
|
+
log.info(formatStatusLine("Platform", d.platform, true));
|
|
162
|
+
log.info("");
|
|
163
|
+
|
|
164
|
+
// Suggestions
|
|
165
|
+
const suggestions = collectSuggestions({
|
|
166
|
+
onnx: onnxInstalled,
|
|
167
|
+
coreml: coremlState,
|
|
168
|
+
ffmpeg: !!ffmpegPath,
|
|
169
|
+
platform,
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
for (const suggestion of suggestions) {
|
|
173
|
+
log.warn(suggestion);
|
|
174
|
+
}
|
|
175
|
+
}
|
package/src/transcribe.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { requireModel, isModelCached, installHintError } from "./onnx-install";
|
|
2
|
-
import { isCoreMLInstalled, transcribeCoreML } from "./coreml";
|
|
2
|
+
import { isCoreMLInstalled, transcribeCoreML, isMacArm64 } from "./coreml";
|
|
3
|
+
import { log } from "./log";
|
|
3
4
|
import { convertToFloat32PCM } from "./audio";
|
|
4
5
|
import { initPreprocessor, preprocess } from "./preprocess";
|
|
5
6
|
import { initEncoder, encode } from "./encoder";
|
|
@@ -28,6 +29,7 @@ const DECODER_HIDDEN = 640;
|
|
|
28
29
|
export interface TranscribeOptions {
|
|
29
30
|
beamWidth?: number;
|
|
30
31
|
modelDir?: string;
|
|
32
|
+
silent?: boolean;
|
|
31
33
|
}
|
|
32
34
|
|
|
33
35
|
// Minimum 0.1s of audio at 16kHz to produce meaningful output
|
|
@@ -38,6 +40,10 @@ export async function transcribe(audioPath: string, opts: TranscribeOptions = {}
|
|
|
38
40
|
return transcribeCoreML(audioPath);
|
|
39
41
|
}
|
|
40
42
|
|
|
43
|
+
if (!opts.silent && isMacArm64()) {
|
|
44
|
+
log.warn("CoreML backend unavailable, falling back to ONNX");
|
|
45
|
+
}
|
|
46
|
+
|
|
41
47
|
if (isModelCached(opts.modelDir)) {
|
|
42
48
|
return transcribeOnnx(audioPath, opts);
|
|
43
49
|
}
|
|
@@ -49,6 +55,9 @@ async function transcribeOnnx(audioPath: string, opts: TranscribeOptions): Promi
|
|
|
49
55
|
const audio = await convertToFloat32PCM(audioPath);
|
|
50
56
|
|
|
51
57
|
if (audio.length < MIN_AUDIO_SAMPLES) {
|
|
58
|
+
if (!opts.silent) {
|
|
59
|
+
log.warn(`Audio too short (< 0.1s), skipping: ${audioPath}`);
|
|
60
|
+
}
|
|
52
61
|
return "";
|
|
53
62
|
}
|
|
54
63
|
|
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
import { describe, test, expect } from "bun:test";
|
|
2
|
-
import { getFfmpegInstallHint, assertFfmpegExists, resetFfmpegCheck } from "../audio";
|
|
3
|
-
|
|
4
|
-
describe("getFfmpegInstallHint", () => {
|
|
5
|
-
test("returns a non-empty string", () => {
|
|
6
|
-
const hint = getFfmpegInstallHint();
|
|
7
|
-
expect(hint).toBeTruthy();
|
|
8
|
-
expect(typeof hint).toBe("string");
|
|
9
|
-
});
|
|
10
|
-
|
|
11
|
-
test("contains install keyword", () => {
|
|
12
|
-
const hint = getFfmpegInstallHint();
|
|
13
|
-
expect(hint).toMatch(/install|ffmpeg\.org/i);
|
|
14
|
-
});
|
|
15
|
-
});
|
|
16
|
-
|
|
17
|
-
describe("assertFfmpegExists", () => {
|
|
18
|
-
test("includes install hint when ffmpeg is missing", () => {
|
|
19
|
-
// Save and override Bun.which to simulate missing ffmpeg
|
|
20
|
-
const originalWhich = Bun.which;
|
|
21
|
-
Bun.which = ((cmd: string) => {
|
|
22
|
-
if (cmd === "ffmpeg") return null;
|
|
23
|
-
return originalWhich(cmd);
|
|
24
|
-
}) as typeof Bun.which;
|
|
25
|
-
|
|
26
|
-
// Reset the cached check so assertFfmpegExists re-checks
|
|
27
|
-
resetFfmpegCheck();
|
|
28
|
-
|
|
29
|
-
try {
|
|
30
|
-
expect(() => assertFfmpegExists()).toThrow(/Install it:/);
|
|
31
|
-
} finally {
|
|
32
|
-
Bun.which = originalWhich;
|
|
33
|
-
resetFfmpegCheck();
|
|
34
|
-
}
|
|
35
|
-
});
|
|
36
|
-
});
|
|
@@ -1,67 +0,0 @@
|
|
|
1
|
-
import { describe, test, expect } from "bun:test";
|
|
2
|
-
import {
|
|
3
|
-
createBenchmarkSummary,
|
|
4
|
-
renderBenchmarkReport,
|
|
5
|
-
type BenchmarkSystemInfo,
|
|
6
|
-
} from "../benchmark-report";
|
|
7
|
-
|
|
8
|
-
const system: BenchmarkSystemInfo = {
|
|
9
|
-
os: "Darwin",
|
|
10
|
-
arch: "arm64",
|
|
11
|
-
chip: "Apple M3 Pro",
|
|
12
|
-
ram: "18 GB",
|
|
13
|
-
backend: "CoreML",
|
|
14
|
-
};
|
|
15
|
-
|
|
16
|
-
describe("benchmark-report", () => {
|
|
17
|
-
test("createBenchmarkSummary computes totals and speedup", () => {
|
|
18
|
-
expect(
|
|
19
|
-
createBenchmarkSummary(
|
|
20
|
-
[
|
|
21
|
-
{ time: 2.34, text: "a" },
|
|
22
|
-
{ time: 1.11, text: "b" },
|
|
23
|
-
],
|
|
24
|
-
[
|
|
25
|
-
{ time: 1.0, text: "a" },
|
|
26
|
-
{ time: 0.5, text: "b" },
|
|
27
|
-
],
|
|
28
|
-
),
|
|
29
|
-
).toEqual({
|
|
30
|
-
whisper_total: 3.5,
|
|
31
|
-
parakeet_total: 1.5,
|
|
32
|
-
speedup: 2.3,
|
|
33
|
-
});
|
|
34
|
-
});
|
|
35
|
-
|
|
36
|
-
test("createBenchmarkSummary rejects mismatched result counts", () => {
|
|
37
|
-
expect(() =>
|
|
38
|
-
createBenchmarkSummary(
|
|
39
|
-
[{ time: 1, text: "a" }],
|
|
40
|
-
[
|
|
41
|
-
{ time: 1, text: "a" },
|
|
42
|
-
{ time: 2, text: "b" },
|
|
43
|
-
],
|
|
44
|
-
),
|
|
45
|
-
).toThrow("Benchmark result count mismatch");
|
|
46
|
-
});
|
|
47
|
-
|
|
48
|
-
test("renderBenchmarkReport produces markdown with totals", () => {
|
|
49
|
-
const report = renderBenchmarkReport({
|
|
50
|
-
date: "2026-04-08",
|
|
51
|
-
version: "0.7.0",
|
|
52
|
-
system,
|
|
53
|
-
whisperResults: [{ time: 3.2, text: "hello" }],
|
|
54
|
-
parakeetResults: [{ time: 1.6, text: "hello" }],
|
|
55
|
-
});
|
|
56
|
-
|
|
57
|
-
expect(report.summary).toEqual({
|
|
58
|
-
whisper_total: 3.2,
|
|
59
|
-
parakeet_total: 1.6,
|
|
60
|
-
speedup: 2,
|
|
61
|
-
});
|
|
62
|
-
expect(report.markdown).toContain("**Date:** 2026-04-08");
|
|
63
|
-
expect(report.markdown).toContain("**Runner:** Darwin arm64 (Apple M3 Pro, 18 GB RAM)");
|
|
64
|
-
expect(report.markdown).toContain("| **Total** | **3.2s** | **1.6s** | | |");
|
|
65
|
-
expect(report.markdown).toContain("**Parakeet is ~2x faster.**");
|
|
66
|
-
});
|
|
67
|
-
});
|
|
@@ -1,281 +0,0 @@
|
|
|
1
|
-
import { describe, test, expect } from "bun:test";
|
|
2
|
-
import {
|
|
3
|
-
classifyCoreMLInstallProbe,
|
|
4
|
-
createCoreMLBinaryRunner,
|
|
5
|
-
ensureCoreMLModels,
|
|
6
|
-
getCoreMLDownloadURL,
|
|
7
|
-
getCoreMLInstallState,
|
|
8
|
-
getCoreMLInstallStatus,
|
|
9
|
-
getCoreMLSupportDir,
|
|
10
|
-
parseCoreMLBinaryCapabilities,
|
|
11
|
-
planCoreMLInstall,
|
|
12
|
-
type CoreMLBinaryCommandResult,
|
|
13
|
-
type CoreMLBinaryRunner,
|
|
14
|
-
} from "../coreml-install";
|
|
15
|
-
import { join } from "path";
|
|
16
|
-
import { homedir } from "os";
|
|
17
|
-
|
|
18
|
-
describe("coreml-install", () => {
|
|
19
|
-
test("getCoreMLSupportDir returns correct cache path", () => {
|
|
20
|
-
expect(getCoreMLSupportDir()).toBe(
|
|
21
|
-
join(homedir(), ".cache", "parakeet", "coreml"),
|
|
22
|
-
);
|
|
23
|
-
});
|
|
24
|
-
|
|
25
|
-
test("getCoreMLDownloadURL includes version and correct filename", () => {
|
|
26
|
-
const url = getCoreMLDownloadURL("0.5.0");
|
|
27
|
-
expect(url).toBe(
|
|
28
|
-
"https://github.com/drakulavich/parakeet-cli/releases/download/v0.5.0/parakeet-coreml-darwin-arm64",
|
|
29
|
-
);
|
|
30
|
-
});
|
|
31
|
-
|
|
32
|
-
test("getCoreMLInstallState returns missing when binary is absent", () => {
|
|
33
|
-
const state = getCoreMLInstallState({
|
|
34
|
-
binPath: "/tmp/parakeet-coreml",
|
|
35
|
-
exists: () => false,
|
|
36
|
-
});
|
|
37
|
-
|
|
38
|
-
expect(state).toBe("missing");
|
|
39
|
-
});
|
|
40
|
-
|
|
41
|
-
test("getCoreMLInstallState returns binary-only when readiness check fails", () => {
|
|
42
|
-
const state = getCoreMLInstallState({
|
|
43
|
-
binPath: "/tmp/parakeet-coreml",
|
|
44
|
-
exists: () => true,
|
|
45
|
-
verifyReady: () => "binary-only",
|
|
46
|
-
});
|
|
47
|
-
|
|
48
|
-
expect(state).toBe("binary-only");
|
|
49
|
-
});
|
|
50
|
-
|
|
51
|
-
test("getCoreMLInstallState returns ready when readiness check passes", () => {
|
|
52
|
-
const state = getCoreMLInstallState({
|
|
53
|
-
binPath: "/tmp/parakeet-coreml",
|
|
54
|
-
exists: () => true,
|
|
55
|
-
verifyReady: () => "ready",
|
|
56
|
-
});
|
|
57
|
-
|
|
58
|
-
expect(state).toBe("ready");
|
|
59
|
-
});
|
|
60
|
-
|
|
61
|
-
test("getCoreMLInstallState returns stale-binary when cached binary is too old", () => {
|
|
62
|
-
const state = getCoreMLInstallState({
|
|
63
|
-
binPath: "/tmp/parakeet-coreml",
|
|
64
|
-
exists: () => true,
|
|
65
|
-
verifyReady: () => "stale-binary",
|
|
66
|
-
});
|
|
67
|
-
|
|
68
|
-
expect(state).toBe("stale-binary");
|
|
69
|
-
});
|
|
70
|
-
|
|
71
|
-
test("getCoreMLInstallState defaults to binary-only when no readiness checker is provided", () => {
|
|
72
|
-
const state = getCoreMLInstallState({
|
|
73
|
-
binPath: "/tmp/parakeet-coreml",
|
|
74
|
-
exists: () => true,
|
|
75
|
-
});
|
|
76
|
-
|
|
77
|
-
expect(state).toBe("binary-only");
|
|
78
|
-
});
|
|
79
|
-
|
|
80
|
-
test("planCoreMLInstall skips work when install is ready", () => {
|
|
81
|
-
expect(planCoreMLInstall("ready")).toEqual({
|
|
82
|
-
downloadBinary: false,
|
|
83
|
-
downloadModels: false,
|
|
84
|
-
});
|
|
85
|
-
});
|
|
86
|
-
|
|
87
|
-
test("planCoreMLInstall downloads only models when binary already exists", () => {
|
|
88
|
-
expect(planCoreMLInstall("binary-only")).toEqual({
|
|
89
|
-
downloadBinary: false,
|
|
90
|
-
downloadModels: true,
|
|
91
|
-
});
|
|
92
|
-
});
|
|
93
|
-
|
|
94
|
-
test("planCoreMLInstall forces both downloads with no-cache", () => {
|
|
95
|
-
expect(planCoreMLInstall("ready", true)).toEqual({
|
|
96
|
-
downloadBinary: true,
|
|
97
|
-
downloadModels: true,
|
|
98
|
-
});
|
|
99
|
-
});
|
|
100
|
-
|
|
101
|
-
test("planCoreMLInstall refreshes stale cached binaries", () => {
|
|
102
|
-
expect(planCoreMLInstall("stale-binary")).toEqual({
|
|
103
|
-
downloadBinary: true,
|
|
104
|
-
downloadModels: true,
|
|
105
|
-
});
|
|
106
|
-
});
|
|
107
|
-
|
|
108
|
-
test("parseCoreMLBinaryCapabilities accepts the current protocol payload", () => {
|
|
109
|
-
expect(
|
|
110
|
-
parseCoreMLBinaryCapabilities(
|
|
111
|
-
JSON.stringify({
|
|
112
|
-
protocolVersion: 1,
|
|
113
|
-
installState: "ready",
|
|
114
|
-
supportedCommands: {
|
|
115
|
-
checkInstall: true,
|
|
116
|
-
downloadOnly: true,
|
|
117
|
-
},
|
|
118
|
-
}),
|
|
119
|
-
),
|
|
120
|
-
).toEqual({
|
|
121
|
-
protocolVersion: 1,
|
|
122
|
-
installState: "ready",
|
|
123
|
-
supportedCommands: {
|
|
124
|
-
checkInstall: true,
|
|
125
|
-
downloadOnly: true,
|
|
126
|
-
},
|
|
127
|
-
});
|
|
128
|
-
});
|
|
129
|
-
|
|
130
|
-
test("parseCoreMLBinaryCapabilities rejects malformed payloads", () => {
|
|
131
|
-
expect(
|
|
132
|
-
parseCoreMLBinaryCapabilities("{invalid"),
|
|
133
|
-
).toBeNull();
|
|
134
|
-
expect(
|
|
135
|
-
parseCoreMLBinaryCapabilities(
|
|
136
|
-
JSON.stringify({
|
|
137
|
-
protocolVersion: 2,
|
|
138
|
-
installState: "ready",
|
|
139
|
-
supportedCommands: {
|
|
140
|
-
checkInstall: true,
|
|
141
|
-
downloadOnly: true,
|
|
142
|
-
},
|
|
143
|
-
}),
|
|
144
|
-
),
|
|
145
|
-
).toBeNull();
|
|
146
|
-
});
|
|
147
|
-
|
|
148
|
-
test("classifyCoreMLInstallProbe classifies capabilities responses", () => {
|
|
149
|
-
expect(
|
|
150
|
-
classifyCoreMLInstallProbe(1, ""),
|
|
151
|
-
).toBe("stale-binary");
|
|
152
|
-
expect(
|
|
153
|
-
classifyCoreMLInstallProbe(
|
|
154
|
-
0,
|
|
155
|
-
JSON.stringify({
|
|
156
|
-
protocolVersion: 1,
|
|
157
|
-
installState: "models-missing",
|
|
158
|
-
supportedCommands: {
|
|
159
|
-
checkInstall: true,
|
|
160
|
-
downloadOnly: true,
|
|
161
|
-
},
|
|
162
|
-
}),
|
|
163
|
-
),
|
|
164
|
-
).toBe("binary-only");
|
|
165
|
-
expect(
|
|
166
|
-
classifyCoreMLInstallProbe(
|
|
167
|
-
0,
|
|
168
|
-
JSON.stringify({
|
|
169
|
-
protocolVersion: 1,
|
|
170
|
-
installState: "ready",
|
|
171
|
-
supportedCommands: {
|
|
172
|
-
checkInstall: true,
|
|
173
|
-
downloadOnly: true,
|
|
174
|
-
},
|
|
175
|
-
}),
|
|
176
|
-
),
|
|
177
|
-
).toBe("ready");
|
|
178
|
-
expect(
|
|
179
|
-
classifyCoreMLInstallProbe(0, "{\"protocolVersion\":999}"),
|
|
180
|
-
).toBe("stale-binary");
|
|
181
|
-
});
|
|
182
|
-
|
|
183
|
-
test("createCoreMLBinaryRunner runs the expected commands", () => {
|
|
184
|
-
const calls: string[][] = [];
|
|
185
|
-
const runner = createCoreMLBinaryRunner((cmd) => {
|
|
186
|
-
calls.push(Array.isArray(cmd) ? cmd : cmd.cmd);
|
|
187
|
-
return {
|
|
188
|
-
exitCode: 0,
|
|
189
|
-
stdout: Buffer.from("{}"),
|
|
190
|
-
stderr: Buffer.from(""),
|
|
191
|
-
} as ReturnType<typeof Bun.spawnSync>;
|
|
192
|
-
});
|
|
193
|
-
|
|
194
|
-
runner.probeCapabilities("/tmp/parakeet-coreml");
|
|
195
|
-
runner.downloadModels("/tmp/parakeet-coreml");
|
|
196
|
-
|
|
197
|
-
expect(calls).toEqual([
|
|
198
|
-
["/tmp/parakeet-coreml", "--capabilities-json"],
|
|
199
|
-
["/tmp/parakeet-coreml", "--download-only"],
|
|
200
|
-
]);
|
|
201
|
-
});
|
|
202
|
-
|
|
203
|
-
test("getCoreMLInstallStatus delegates to the runner probe", () => {
|
|
204
|
-
const runner: CoreMLBinaryRunner = {
|
|
205
|
-
probeCapabilities() {
|
|
206
|
-
return {
|
|
207
|
-
exitCode: 0,
|
|
208
|
-
stdout: JSON.stringify({
|
|
209
|
-
protocolVersion: 1,
|
|
210
|
-
installState: "models-missing",
|
|
211
|
-
supportedCommands: {
|
|
212
|
-
checkInstall: true,
|
|
213
|
-
downloadOnly: true,
|
|
214
|
-
},
|
|
215
|
-
}),
|
|
216
|
-
stderr: "",
|
|
217
|
-
};
|
|
218
|
-
},
|
|
219
|
-
downloadModels() {
|
|
220
|
-
throw new Error("not used");
|
|
221
|
-
},
|
|
222
|
-
};
|
|
223
|
-
|
|
224
|
-
expect(getCoreMLInstallStatus("/tmp/parakeet-coreml", runner)).toBe("binary-only");
|
|
225
|
-
});
|
|
226
|
-
|
|
227
|
-
test("ensureCoreMLModels streams command output through the writer", async () => {
|
|
228
|
-
const writes = {
|
|
229
|
-
stdout: [] as string[],
|
|
230
|
-
stderr: [] as string[],
|
|
231
|
-
};
|
|
232
|
-
const runner: CoreMLBinaryRunner = {
|
|
233
|
-
probeCapabilities() {
|
|
234
|
-
throw new Error("not used");
|
|
235
|
-
},
|
|
236
|
-
downloadModels() {
|
|
237
|
-
return {
|
|
238
|
-
exitCode: 0,
|
|
239
|
-
stdout: "downloaded\n",
|
|
240
|
-
stderr: "progress\n",
|
|
241
|
-
};
|
|
242
|
-
},
|
|
243
|
-
};
|
|
244
|
-
|
|
245
|
-
await ensureCoreMLModels("/tmp/parakeet-coreml", runner, {
|
|
246
|
-
stdout(message) {
|
|
247
|
-
writes.stdout.push(message);
|
|
248
|
-
},
|
|
249
|
-
stderr(message) {
|
|
250
|
-
writes.stderr.push(message);
|
|
251
|
-
},
|
|
252
|
-
});
|
|
253
|
-
|
|
254
|
-
expect(writes).toEqual({
|
|
255
|
-
stdout: ["downloaded\n"],
|
|
256
|
-
stderr: ["progress\n"],
|
|
257
|
-
});
|
|
258
|
-
});
|
|
259
|
-
|
|
260
|
-
test("ensureCoreMLModels throws a contextual error when download fails", async () => {
|
|
261
|
-
const runner: CoreMLBinaryRunner = {
|
|
262
|
-
probeCapabilities() {
|
|
263
|
-
throw new Error("not used");
|
|
264
|
-
},
|
|
265
|
-
downloadModels() {
|
|
266
|
-
return {
|
|
267
|
-
exitCode: 2,
|
|
268
|
-
stdout: "",
|
|
269
|
-
stderr: "download failed",
|
|
270
|
-
};
|
|
271
|
-
},
|
|
272
|
-
};
|
|
273
|
-
|
|
274
|
-
await expect(
|
|
275
|
-
ensureCoreMLModels("/tmp/parakeet-coreml", runner, {
|
|
276
|
-
stdout() {},
|
|
277
|
-
stderr() {},
|
|
278
|
-
}),
|
|
279
|
-
).rejects.toThrow("Failed to download CoreML models: download failed");
|
|
280
|
-
});
|
|
281
|
-
});
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
import { describe, test, expect } from "bun:test";
|
|
2
|
-
import {
|
|
3
|
-
shouldRetryCoreMLWithWav,
|
|
4
|
-
} from "../coreml";
|
|
5
|
-
|
|
6
|
-
describe("coreml", () => {
|
|
7
|
-
test("retries non-wav files on CoreAudio decode errors", () => {
|
|
8
|
-
expect(
|
|
9
|
-
shouldRetryCoreMLWithWav(
|
|
10
|
-
"fixtures/hello-english.oga",
|
|
11
|
-
new Error("Error: The operation couldn’t be completed. (com.apple.coreaudio.avfaudio error 1718449215.)"),
|
|
12
|
-
),
|
|
13
|
-
).toBe(true);
|
|
14
|
-
});
|
|
15
|
-
|
|
16
|
-
test("does not retry wav files on CoreAudio decode errors", () => {
|
|
17
|
-
expect(
|
|
18
|
-
shouldRetryCoreMLWithWav(
|
|
19
|
-
"fixtures/silence.wav",
|
|
20
|
-
new Error("Error: The operation couldn’t be completed. (com.apple.coreaudio.avfaudio error 1718449215.)"),
|
|
21
|
-
),
|
|
22
|
-
).toBe(false);
|
|
23
|
-
});
|
|
24
|
-
});
|
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
import { describe, test, expect } from "bun:test";
|
|
2
|
-
import { beamDecode, type DecoderSession } from "../decoder";
|
|
3
|
-
|
|
4
|
-
function mockSession(responses: Array<{ tokenLogits: number[]; durationLogits: number[] }>): DecoderSession {
|
|
5
|
-
let callIndex = 0;
|
|
6
|
-
return {
|
|
7
|
-
async decode(_encoderFrame, _targets, _targetLength, _state1, _state2) {
|
|
8
|
-
const resp = responses[Math.min(callIndex++, responses.length - 1)];
|
|
9
|
-
const output = new Float32Array([...resp.tokenLogits, ...resp.durationLogits]);
|
|
10
|
-
const state1 = new Float32Array(1);
|
|
11
|
-
const state2 = new Float32Array(1);
|
|
12
|
-
return { output, state1, state2 };
|
|
13
|
-
},
|
|
14
|
-
vocabSize: responses[0]?.tokenLogits.length ?? 4,
|
|
15
|
-
blankId: (responses[0]?.tokenLogits.length ?? 4) - 1,
|
|
16
|
-
stateDims: { layers: 1, hidden: 1 },
|
|
17
|
-
};
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
describe("decoder", () => {
|
|
21
|
-
test("emits non-blank tokens", async () => {
|
|
22
|
-
const session = mockSession([
|
|
23
|
-
{ tokenLogits: [10, 0, 0, -10], durationLogits: [10, 0] },
|
|
24
|
-
{ tokenLogits: [0, 10, 0, -10], durationLogits: [10, 0] },
|
|
25
|
-
{ tokenLogits: [0, 0, 0, 10], durationLogits: [10, 0] },
|
|
26
|
-
]);
|
|
27
|
-
const encoderData = new Float32Array(3);
|
|
28
|
-
const tokens = await beamDecode(session, 3, encoderData, 1, 1);
|
|
29
|
-
expect(tokens).toEqual([0, 1]);
|
|
30
|
-
});
|
|
31
|
-
|
|
32
|
-
test("respects duration skipping", async () => {
|
|
33
|
-
const session = mockSession([
|
|
34
|
-
{ tokenLogits: [10, 0, 0, -10], durationLogits: [0, 0, 10] },
|
|
35
|
-
{ tokenLogits: [0, 10, 0, -10], durationLogits: [10, 0, 0] },
|
|
36
|
-
{ tokenLogits: [0, 0, 0, 10], durationLogits: [10, 0, 0] },
|
|
37
|
-
]);
|
|
38
|
-
const encoderData = new Float32Array(5);
|
|
39
|
-
const tokens = await beamDecode(session, 5, encoderData, 1, 1);
|
|
40
|
-
expect(tokens).toEqual([0, 1]);
|
|
41
|
-
});
|
|
42
|
-
|
|
43
|
-
test("returns empty for zero-length encoder output", async () => {
|
|
44
|
-
const session = mockSession([
|
|
45
|
-
{ tokenLogits: [0, 0, 0, 10], durationLogits: [10, 0] },
|
|
46
|
-
]);
|
|
47
|
-
const tokens = await beamDecode(session, 0, new Float32Array(0), 1);
|
|
48
|
-
expect(tokens).toEqual([]);
|
|
49
|
-
});
|
|
50
|
-
});
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
import { describe, test, expect } from "bun:test";
|
|
2
|
-
import { Tokenizer } from "../tokenizer";
|
|
3
|
-
|
|
4
|
-
describe("tokenizer", () => {
|
|
5
|
-
test("loads vocab from file", async () => {
|
|
6
|
-
const tok = await Tokenizer.fromFile("fixtures/test-vocab.txt");
|
|
7
|
-
expect(tok.vocabSize).toBe(6);
|
|
8
|
-
expect(tok.blankId).toBe(5);
|
|
9
|
-
});
|
|
10
|
-
|
|
11
|
-
test("detokenizes token IDs to text", async () => {
|
|
12
|
-
const tok = await Tokenizer.fromFile("fixtures/test-vocab.txt");
|
|
13
|
-
const text = tok.detokenize([0, 1]);
|
|
14
|
-
expect(text).toBe("hello world");
|
|
15
|
-
});
|
|
16
|
-
|
|
17
|
-
test("handles blank tokens by skipping them", async () => {
|
|
18
|
-
const tok = await Tokenizer.fromFile("fixtures/test-vocab.txt");
|
|
19
|
-
const text = tok.detokenize([0, 5, 1]);
|
|
20
|
-
expect(text).toBe("hello world");
|
|
21
|
-
});
|
|
22
|
-
|
|
23
|
-
test("handles empty token list", async () => {
|
|
24
|
-
const tok = await Tokenizer.fromFile("fixtures/test-vocab.txt");
|
|
25
|
-
const text = tok.detokenize([]);
|
|
26
|
-
expect(text).toBe("");
|
|
27
|
-
});
|
|
28
|
-
|
|
29
|
-
test("handles only blank tokens", async () => {
|
|
30
|
-
const tok = await Tokenizer.fromFile("fixtures/test-vocab.txt");
|
|
31
|
-
const text = tok.detokenize([5, 5, 5]);
|
|
32
|
-
expect(text).toBe("");
|
|
33
|
-
});
|
|
34
|
-
|
|
35
|
-
test("joins subword tokens correctly", async () => {
|
|
36
|
-
const tok = await Tokenizer.fromFile("fixtures/test-vocab.txt");
|
|
37
|
-
const text = tok.detokenize([3, 4]);
|
|
38
|
-
expect(text).toBe("cats");
|
|
39
|
-
});
|
|
40
|
-
|
|
41
|
-
});
|