@drakulavich/parakeet-cli 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/__tests__/benchmark-report.test.ts +67 -0
- package/src/__tests__/coreml-install.test.ts +281 -0
- package/src/__tests__/coreml.test.ts +17 -21
- package/src/__tests__/lib.test.ts +0 -4
- package/src/audio.ts +46 -22
- package/src/benchmark-report.ts +92 -0
- package/src/cli.ts +2 -1
- package/src/coreml-install.ts +247 -0
- package/src/coreml.ts +29 -1
- package/src/lib.ts +2 -1
- package/src/models.ts +3 -162
- package/src/onnx-install.ts +98 -0
- package/src/transcribe.ts +1 -1
- package/src/__tests__/models.test.ts +0 -30
- package/src/__tests__/transcribe.test.ts +0 -11
package/package.json
CHANGED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { describe, test, expect } from "bun:test";
|
|
2
|
+
import {
|
|
3
|
+
createBenchmarkSummary,
|
|
4
|
+
renderBenchmarkReport,
|
|
5
|
+
type BenchmarkSystemInfo,
|
|
6
|
+
} from "../benchmark-report";
|
|
7
|
+
|
|
8
|
+
const system: BenchmarkSystemInfo = {
|
|
9
|
+
os: "Darwin",
|
|
10
|
+
arch: "arm64",
|
|
11
|
+
chip: "Apple M3 Pro",
|
|
12
|
+
ram: "18 GB",
|
|
13
|
+
backend: "CoreML",
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
describe("benchmark-report", () => {
|
|
17
|
+
test("createBenchmarkSummary computes totals and speedup", () => {
|
|
18
|
+
expect(
|
|
19
|
+
createBenchmarkSummary(
|
|
20
|
+
[
|
|
21
|
+
{ time: 2.34, text: "a" },
|
|
22
|
+
{ time: 1.11, text: "b" },
|
|
23
|
+
],
|
|
24
|
+
[
|
|
25
|
+
{ time: 1.0, text: "a" },
|
|
26
|
+
{ time: 0.5, text: "b" },
|
|
27
|
+
],
|
|
28
|
+
),
|
|
29
|
+
).toEqual({
|
|
30
|
+
whisper_total: 3.5,
|
|
31
|
+
parakeet_total: 1.5,
|
|
32
|
+
speedup: 2.3,
|
|
33
|
+
});
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
test("createBenchmarkSummary rejects mismatched result counts", () => {
|
|
37
|
+
expect(() =>
|
|
38
|
+
createBenchmarkSummary(
|
|
39
|
+
[{ time: 1, text: "a" }],
|
|
40
|
+
[
|
|
41
|
+
{ time: 1, text: "a" },
|
|
42
|
+
{ time: 2, text: "b" },
|
|
43
|
+
],
|
|
44
|
+
),
|
|
45
|
+
).toThrow("Benchmark result count mismatch");
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
test("renderBenchmarkReport produces markdown with totals", () => {
|
|
49
|
+
const report = renderBenchmarkReport({
|
|
50
|
+
date: "2026-04-08",
|
|
51
|
+
version: "0.7.0",
|
|
52
|
+
system,
|
|
53
|
+
whisperResults: [{ time: 3.2, text: "hello" }],
|
|
54
|
+
parakeetResults: [{ time: 1.6, text: "hello" }],
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
expect(report.summary).toEqual({
|
|
58
|
+
whisper_total: 3.2,
|
|
59
|
+
parakeet_total: 1.6,
|
|
60
|
+
speedup: 2,
|
|
61
|
+
});
|
|
62
|
+
expect(report.markdown).toContain("**Date:** 2026-04-08");
|
|
63
|
+
expect(report.markdown).toContain("**Runner:** Darwin arm64 (Apple M3 Pro, 18 GB RAM)");
|
|
64
|
+
expect(report.markdown).toContain("| **Total** | **3.2s** | **1.6s** | | |");
|
|
65
|
+
expect(report.markdown).toContain("**Parakeet is ~2x faster.**");
|
|
66
|
+
});
|
|
67
|
+
});
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
import { describe, test, expect } from "bun:test";
|
|
2
|
+
import {
|
|
3
|
+
classifyCoreMLInstallProbe,
|
|
4
|
+
createCoreMLBinaryRunner,
|
|
5
|
+
ensureCoreMLModels,
|
|
6
|
+
getCoreMLDownloadURL,
|
|
7
|
+
getCoreMLInstallState,
|
|
8
|
+
getCoreMLInstallStatus,
|
|
9
|
+
getCoreMLSupportDir,
|
|
10
|
+
parseCoreMLBinaryCapabilities,
|
|
11
|
+
planCoreMLInstall,
|
|
12
|
+
type CoreMLBinaryCommandResult,
|
|
13
|
+
type CoreMLBinaryRunner,
|
|
14
|
+
} from "../coreml-install";
|
|
15
|
+
import { join } from "path";
|
|
16
|
+
import { homedir } from "os";
|
|
17
|
+
|
|
18
|
+
describe("coreml-install", () => {
|
|
19
|
+
test("getCoreMLSupportDir returns correct cache path", () => {
|
|
20
|
+
expect(getCoreMLSupportDir()).toBe(
|
|
21
|
+
join(homedir(), ".cache", "parakeet", "coreml"),
|
|
22
|
+
);
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
test("getCoreMLDownloadURL includes version and correct filename", () => {
|
|
26
|
+
const url = getCoreMLDownloadURL("0.5.0");
|
|
27
|
+
expect(url).toBe(
|
|
28
|
+
"https://github.com/drakulavich/parakeet-cli/releases/download/v0.5.0/parakeet-coreml-darwin-arm64",
|
|
29
|
+
);
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
test("getCoreMLInstallState returns missing when binary is absent", () => {
|
|
33
|
+
const state = getCoreMLInstallState({
|
|
34
|
+
binPath: "/tmp/parakeet-coreml",
|
|
35
|
+
exists: () => false,
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
expect(state).toBe("missing");
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
test("getCoreMLInstallState returns binary-only when readiness check fails", () => {
|
|
42
|
+
const state = getCoreMLInstallState({
|
|
43
|
+
binPath: "/tmp/parakeet-coreml",
|
|
44
|
+
exists: () => true,
|
|
45
|
+
verifyReady: () => "binary-only",
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
expect(state).toBe("binary-only");
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
test("getCoreMLInstallState returns ready when readiness check passes", () => {
|
|
52
|
+
const state = getCoreMLInstallState({
|
|
53
|
+
binPath: "/tmp/parakeet-coreml",
|
|
54
|
+
exists: () => true,
|
|
55
|
+
verifyReady: () => "ready",
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
expect(state).toBe("ready");
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
test("getCoreMLInstallState returns stale-binary when cached binary is too old", () => {
|
|
62
|
+
const state = getCoreMLInstallState({
|
|
63
|
+
binPath: "/tmp/parakeet-coreml",
|
|
64
|
+
exists: () => true,
|
|
65
|
+
verifyReady: () => "stale-binary",
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
expect(state).toBe("stale-binary");
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
test("getCoreMLInstallState defaults to binary-only when no readiness checker is provided", () => {
|
|
72
|
+
const state = getCoreMLInstallState({
|
|
73
|
+
binPath: "/tmp/parakeet-coreml",
|
|
74
|
+
exists: () => true,
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
expect(state).toBe("binary-only");
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
test("planCoreMLInstall skips work when install is ready", () => {
|
|
81
|
+
expect(planCoreMLInstall("ready")).toEqual({
|
|
82
|
+
downloadBinary: false,
|
|
83
|
+
downloadModels: false,
|
|
84
|
+
});
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
test("planCoreMLInstall downloads only models when binary already exists", () => {
|
|
88
|
+
expect(planCoreMLInstall("binary-only")).toEqual({
|
|
89
|
+
downloadBinary: false,
|
|
90
|
+
downloadModels: true,
|
|
91
|
+
});
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
test("planCoreMLInstall forces both downloads with no-cache", () => {
|
|
95
|
+
expect(planCoreMLInstall("ready", true)).toEqual({
|
|
96
|
+
downloadBinary: true,
|
|
97
|
+
downloadModels: true,
|
|
98
|
+
});
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
test("planCoreMLInstall refreshes stale cached binaries", () => {
|
|
102
|
+
expect(planCoreMLInstall("stale-binary")).toEqual({
|
|
103
|
+
downloadBinary: true,
|
|
104
|
+
downloadModels: true,
|
|
105
|
+
});
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
test("parseCoreMLBinaryCapabilities accepts the current protocol payload", () => {
|
|
109
|
+
expect(
|
|
110
|
+
parseCoreMLBinaryCapabilities(
|
|
111
|
+
JSON.stringify({
|
|
112
|
+
protocolVersion: 1,
|
|
113
|
+
installState: "ready",
|
|
114
|
+
supportedCommands: {
|
|
115
|
+
checkInstall: true,
|
|
116
|
+
downloadOnly: true,
|
|
117
|
+
},
|
|
118
|
+
}),
|
|
119
|
+
),
|
|
120
|
+
).toEqual({
|
|
121
|
+
protocolVersion: 1,
|
|
122
|
+
installState: "ready",
|
|
123
|
+
supportedCommands: {
|
|
124
|
+
checkInstall: true,
|
|
125
|
+
downloadOnly: true,
|
|
126
|
+
},
|
|
127
|
+
});
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
test("parseCoreMLBinaryCapabilities rejects malformed payloads", () => {
|
|
131
|
+
expect(
|
|
132
|
+
parseCoreMLBinaryCapabilities("{invalid"),
|
|
133
|
+
).toBeNull();
|
|
134
|
+
expect(
|
|
135
|
+
parseCoreMLBinaryCapabilities(
|
|
136
|
+
JSON.stringify({
|
|
137
|
+
protocolVersion: 2,
|
|
138
|
+
installState: "ready",
|
|
139
|
+
supportedCommands: {
|
|
140
|
+
checkInstall: true,
|
|
141
|
+
downloadOnly: true,
|
|
142
|
+
},
|
|
143
|
+
}),
|
|
144
|
+
),
|
|
145
|
+
).toBeNull();
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
test("classifyCoreMLInstallProbe classifies capabilities responses", () => {
|
|
149
|
+
expect(
|
|
150
|
+
classifyCoreMLInstallProbe(1, ""),
|
|
151
|
+
).toBe("stale-binary");
|
|
152
|
+
expect(
|
|
153
|
+
classifyCoreMLInstallProbe(
|
|
154
|
+
0,
|
|
155
|
+
JSON.stringify({
|
|
156
|
+
protocolVersion: 1,
|
|
157
|
+
installState: "models-missing",
|
|
158
|
+
supportedCommands: {
|
|
159
|
+
checkInstall: true,
|
|
160
|
+
downloadOnly: true,
|
|
161
|
+
},
|
|
162
|
+
}),
|
|
163
|
+
),
|
|
164
|
+
).toBe("binary-only");
|
|
165
|
+
expect(
|
|
166
|
+
classifyCoreMLInstallProbe(
|
|
167
|
+
0,
|
|
168
|
+
JSON.stringify({
|
|
169
|
+
protocolVersion: 1,
|
|
170
|
+
installState: "ready",
|
|
171
|
+
supportedCommands: {
|
|
172
|
+
checkInstall: true,
|
|
173
|
+
downloadOnly: true,
|
|
174
|
+
},
|
|
175
|
+
}),
|
|
176
|
+
),
|
|
177
|
+
).toBe("ready");
|
|
178
|
+
expect(
|
|
179
|
+
classifyCoreMLInstallProbe(0, "{\"protocolVersion\":999}"),
|
|
180
|
+
).toBe("stale-binary");
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
test("createCoreMLBinaryRunner runs the expected commands", () => {
|
|
184
|
+
const calls: string[][] = [];
|
|
185
|
+
const runner = createCoreMLBinaryRunner((cmd) => {
|
|
186
|
+
calls.push(Array.isArray(cmd) ? cmd : cmd.cmd);
|
|
187
|
+
return {
|
|
188
|
+
exitCode: 0,
|
|
189
|
+
stdout: Buffer.from("{}"),
|
|
190
|
+
stderr: Buffer.from(""),
|
|
191
|
+
} as ReturnType<typeof Bun.spawnSync>;
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
runner.probeCapabilities("/tmp/parakeet-coreml");
|
|
195
|
+
runner.downloadModels("/tmp/parakeet-coreml");
|
|
196
|
+
|
|
197
|
+
expect(calls).toEqual([
|
|
198
|
+
["/tmp/parakeet-coreml", "--capabilities-json"],
|
|
199
|
+
["/tmp/parakeet-coreml", "--download-only"],
|
|
200
|
+
]);
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
test("getCoreMLInstallStatus delegates to the runner probe", () => {
|
|
204
|
+
const runner: CoreMLBinaryRunner = {
|
|
205
|
+
probeCapabilities() {
|
|
206
|
+
return {
|
|
207
|
+
exitCode: 0,
|
|
208
|
+
stdout: JSON.stringify({
|
|
209
|
+
protocolVersion: 1,
|
|
210
|
+
installState: "models-missing",
|
|
211
|
+
supportedCommands: {
|
|
212
|
+
checkInstall: true,
|
|
213
|
+
downloadOnly: true,
|
|
214
|
+
},
|
|
215
|
+
}),
|
|
216
|
+
stderr: "",
|
|
217
|
+
};
|
|
218
|
+
},
|
|
219
|
+
downloadModels() {
|
|
220
|
+
throw new Error("not used");
|
|
221
|
+
},
|
|
222
|
+
};
|
|
223
|
+
|
|
224
|
+
expect(getCoreMLInstallStatus("/tmp/parakeet-coreml", runner)).toBe("binary-only");
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
test("ensureCoreMLModels streams command output through the writer", async () => {
|
|
228
|
+
const writes = {
|
|
229
|
+
stdout: [] as string[],
|
|
230
|
+
stderr: [] as string[],
|
|
231
|
+
};
|
|
232
|
+
const runner: CoreMLBinaryRunner = {
|
|
233
|
+
probeCapabilities() {
|
|
234
|
+
throw new Error("not used");
|
|
235
|
+
},
|
|
236
|
+
downloadModels() {
|
|
237
|
+
return {
|
|
238
|
+
exitCode: 0,
|
|
239
|
+
stdout: "downloaded\n",
|
|
240
|
+
stderr: "progress\n",
|
|
241
|
+
};
|
|
242
|
+
},
|
|
243
|
+
};
|
|
244
|
+
|
|
245
|
+
await ensureCoreMLModels("/tmp/parakeet-coreml", runner, {
|
|
246
|
+
stdout(message) {
|
|
247
|
+
writes.stdout.push(message);
|
|
248
|
+
},
|
|
249
|
+
stderr(message) {
|
|
250
|
+
writes.stderr.push(message);
|
|
251
|
+
},
|
|
252
|
+
});
|
|
253
|
+
|
|
254
|
+
expect(writes).toEqual({
|
|
255
|
+
stdout: ["downloaded\n"],
|
|
256
|
+
stderr: ["progress\n"],
|
|
257
|
+
});
|
|
258
|
+
});
|
|
259
|
+
|
|
260
|
+
test("ensureCoreMLModels throws a contextual error when download fails", async () => {
|
|
261
|
+
const runner: CoreMLBinaryRunner = {
|
|
262
|
+
probeCapabilities() {
|
|
263
|
+
throw new Error("not used");
|
|
264
|
+
},
|
|
265
|
+
downloadModels() {
|
|
266
|
+
return {
|
|
267
|
+
exitCode: 2,
|
|
268
|
+
stdout: "",
|
|
269
|
+
stderr: "download failed",
|
|
270
|
+
};
|
|
271
|
+
},
|
|
272
|
+
};
|
|
273
|
+
|
|
274
|
+
await expect(
|
|
275
|
+
ensureCoreMLModels("/tmp/parakeet-coreml", runner, {
|
|
276
|
+
stdout() {},
|
|
277
|
+
stderr() {},
|
|
278
|
+
}),
|
|
279
|
+
).rejects.toThrow("Failed to download CoreML models: download failed");
|
|
280
|
+
});
|
|
281
|
+
});
|
|
@@ -1,28 +1,24 @@
|
|
|
1
1
|
import { describe, test, expect } from "bun:test";
|
|
2
|
-
import {
|
|
3
|
-
|
|
4
|
-
|
|
2
|
+
import {
|
|
3
|
+
shouldRetryCoreMLWithWav,
|
|
4
|
+
} from "../coreml";
|
|
5
5
|
|
|
6
6
|
describe("coreml", () => {
|
|
7
|
-
test("
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
7
|
+
test("retries non-wav files on CoreAudio decode errors", () => {
|
|
8
|
+
expect(
|
|
9
|
+
shouldRetryCoreMLWithWav(
|
|
10
|
+
"fixtures/hello-english.oga",
|
|
11
|
+
new Error("Error: The operation couldn’t be completed. (com.apple.coreaudio.avfaudio error 1718449215.)"),
|
|
12
|
+
),
|
|
13
|
+
).toBe(true);
|
|
12
14
|
});
|
|
13
15
|
|
|
14
|
-
test("
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
}
|
|
22
|
-
});
|
|
23
|
-
|
|
24
|
-
test("isCoreMLInstalled returns a boolean", () => {
|
|
25
|
-
const result = isCoreMLInstalled();
|
|
26
|
-
expect(typeof result).toBe("boolean");
|
|
16
|
+
test("does not retry wav files on CoreAudio decode errors", () => {
|
|
17
|
+
expect(
|
|
18
|
+
shouldRetryCoreMLWithWav(
|
|
19
|
+
"fixtures/silence.wav",
|
|
20
|
+
new Error("Error: The operation couldn’t be completed. (com.apple.coreaudio.avfaudio error 1718449215.)"),
|
|
21
|
+
),
|
|
22
|
+
).toBe(false);
|
|
27
23
|
});
|
|
28
24
|
});
|
|
@@ -2,10 +2,6 @@ import { describe, expect, it } from "bun:test";
|
|
|
2
2
|
import { transcribe } from "../lib";
|
|
3
3
|
|
|
4
4
|
describe("lib API", () => {
|
|
5
|
-
it("exports transcribe function", () => {
|
|
6
|
-
expect(typeof transcribe).toBe("function");
|
|
7
|
-
});
|
|
8
|
-
|
|
9
5
|
it("rejects missing file", async () => {
|
|
10
6
|
await expect(transcribe("/nonexistent/audio.wav")).rejects.toThrow("File not found");
|
|
11
7
|
});
|
package/src/audio.ts
CHANGED
|
@@ -6,40 +6,64 @@ import { randomUUID } from "crypto";
|
|
|
6
6
|
let ffmpegChecked = false;
|
|
7
7
|
|
|
8
8
|
export async function convertToFloat32PCM(inputPath: string): Promise<Float32Array> {
|
|
9
|
+
const tmpPath = await convertAudioWithFfmpeg(inputPath, "f32le", [
|
|
10
|
+
"-ar", "16000",
|
|
11
|
+
"-ac", "1",
|
|
12
|
+
"-f", "f32le",
|
|
13
|
+
"-acodec", "pcm_f32le",
|
|
14
|
+
]);
|
|
15
|
+
|
|
16
|
+
try {
|
|
17
|
+
const raw = await Bun.file(tmpPath).arrayBuffer();
|
|
18
|
+
return new Float32Array(raw);
|
|
19
|
+
} finally {
|
|
20
|
+
// Best-effort cleanup; file may already be gone
|
|
21
|
+
try { unlinkSync(tmpPath); } catch {}
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export async function convertToWav16kMono(inputPath: string): Promise<string> {
|
|
26
|
+
return convertAudioWithFfmpeg(inputPath, "wav", [
|
|
27
|
+
"-ar", "16000",
|
|
28
|
+
"-ac", "1",
|
|
29
|
+
"-f", "wav",
|
|
30
|
+
"-acodec", "pcm_s16le",
|
|
31
|
+
]);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
async function convertAudioWithFfmpeg(
|
|
35
|
+
inputPath: string,
|
|
36
|
+
extension: string,
|
|
37
|
+
ffmpegArgs: string[],
|
|
38
|
+
): Promise<string> {
|
|
9
39
|
if (!existsSync(inputPath)) {
|
|
10
40
|
throw new Error(`file not found: ${inputPath}`);
|
|
11
41
|
}
|
|
12
42
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
const tmpPath = join(tmpdir(), `parakeet-${randomUUID()}.f32le`);
|
|
43
|
+
assertFfmpegExists();
|
|
16
44
|
|
|
17
|
-
|
|
18
|
-
const proc = Bun.spawn(
|
|
19
|
-
["ffmpeg", "-i", inputPath, "-ar", "16000", "-ac", "1", "-f", "f32le", "-acodec", "pcm_f32le", tmpPath, "-y"],
|
|
20
|
-
{ stdout: "pipe", stderr: "pipe" }
|
|
21
|
-
);
|
|
45
|
+
const tmpPath = join(tmpdir(), `parakeet-${randomUUID()}.${extension}`);
|
|
22
46
|
|
|
23
|
-
|
|
47
|
+
const proc = Bun.spawn(
|
|
48
|
+
["ffmpeg", "-i", inputPath, ...ffmpegArgs, tmpPath, "-y"],
|
|
49
|
+
{ stdout: "pipe", stderr: "pipe" }
|
|
50
|
+
);
|
|
24
51
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
52
|
+
const [exitCode, stderr] = await Promise.all([
|
|
53
|
+
proc.exited,
|
|
54
|
+
new Response(proc.stderr).text(),
|
|
55
|
+
]);
|
|
29
56
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
} finally {
|
|
33
|
-
// Best-effort cleanup; file may already be gone
|
|
34
|
-
try { unlinkSync(tmpPath); } catch {}
|
|
57
|
+
if (exitCode !== 0) {
|
|
58
|
+
throw new Error(`failed to convert audio: ${stderr.trim().split("\n").pop()}`);
|
|
35
59
|
}
|
|
60
|
+
|
|
61
|
+
return tmpPath;
|
|
36
62
|
}
|
|
37
63
|
|
|
38
|
-
|
|
64
|
+
function assertFfmpegExists(): void {
|
|
39
65
|
if (ffmpegChecked) return;
|
|
40
|
-
|
|
41
|
-
const exitCode = await proc.exited;
|
|
42
|
-
if (exitCode !== 0) {
|
|
66
|
+
if (!Bun.which("ffmpeg")) {
|
|
43
67
|
throw new Error("ffmpeg not found in PATH");
|
|
44
68
|
}
|
|
45
69
|
ffmpegChecked = true;
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
export interface BenchmarkSystemInfo {
|
|
2
|
+
os: string;
|
|
3
|
+
arch: string;
|
|
4
|
+
chip: string;
|
|
5
|
+
ram: string;
|
|
6
|
+
backend: string;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export interface BenchmarkResult {
|
|
10
|
+
time: number;
|
|
11
|
+
text: string;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export interface BenchmarkSummary {
|
|
15
|
+
whisper_total: number;
|
|
16
|
+
parakeet_total: number;
|
|
17
|
+
speedup: number;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface BenchmarkReport {
|
|
21
|
+
markdown: string;
|
|
22
|
+
summary: BenchmarkSummary;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function roundToTenths(value: number): number {
|
|
26
|
+
return Math.round(value * 10) / 10;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export function createBenchmarkSummary(
|
|
30
|
+
whisperResults: BenchmarkResult[],
|
|
31
|
+
parakeetResults: BenchmarkResult[],
|
|
32
|
+
): BenchmarkSummary {
|
|
33
|
+
if (whisperResults.length !== parakeetResults.length) {
|
|
34
|
+
throw new Error(
|
|
35
|
+
`Benchmark result count mismatch: faster-whisper=${whisperResults.length}, parakeet=${parakeetResults.length}`,
|
|
36
|
+
);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const whisperTotal = roundToTenths(
|
|
40
|
+
whisperResults.reduce((total, result) => total + result.time, 0),
|
|
41
|
+
);
|
|
42
|
+
const parakeetTotal = roundToTenths(
|
|
43
|
+
parakeetResults.reduce((total, result) => total + result.time, 0),
|
|
44
|
+
);
|
|
45
|
+
|
|
46
|
+
return {
|
|
47
|
+
whisper_total: whisperTotal,
|
|
48
|
+
parakeet_total: parakeetTotal,
|
|
49
|
+
speedup: parakeetTotal > 0 ? roundToTenths(whisperTotal / parakeetTotal) : 0,
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export function renderBenchmarkReport(args: {
|
|
54
|
+
date: string;
|
|
55
|
+
version: string;
|
|
56
|
+
system: BenchmarkSystemInfo;
|
|
57
|
+
whisperResults: BenchmarkResult[];
|
|
58
|
+
parakeetResults: BenchmarkResult[];
|
|
59
|
+
}): BenchmarkReport {
|
|
60
|
+
const { date, version, system, whisperResults, parakeetResults } = args;
|
|
61
|
+
const summary = createBenchmarkSummary(whisperResults, parakeetResults);
|
|
62
|
+
|
|
63
|
+
const lines: string[] = [
|
|
64
|
+
"",
|
|
65
|
+
`**Date:** ${date}`,
|
|
66
|
+
`**Version:** v${version}`,
|
|
67
|
+
`**Runner:** ${system.os} ${system.arch} (${system.chip}, ${system.ram} RAM)`,
|
|
68
|
+
`**Backend:** ${system.backend}`,
|
|
69
|
+
"",
|
|
70
|
+
`| # | faster-whisper | Parakeet (${system.backend}) | faster-whisper Transcript | Parakeet Transcript |`,
|
|
71
|
+
"|---|---------|----------|--------------------|---------------------|",
|
|
72
|
+
];
|
|
73
|
+
|
|
74
|
+
for (let i = 0; i < whisperResults.length; i++) {
|
|
75
|
+
const whisper = whisperResults[i];
|
|
76
|
+
const parakeet = parakeetResults[i];
|
|
77
|
+
lines.push(
|
|
78
|
+
`| ${i + 1} | ${whisper.time}s | ${parakeet.time}s | ${whisper.text} | ${parakeet.text} |`,
|
|
79
|
+
);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
lines.push(
|
|
83
|
+
`| **Total** | **${summary.whisper_total}s** | **${summary.parakeet_total}s** | | |`,
|
|
84
|
+
);
|
|
85
|
+
lines.push("");
|
|
86
|
+
lines.push(`**Parakeet is ~${summary.speedup}x faster.**`);
|
|
87
|
+
|
|
88
|
+
return {
|
|
89
|
+
markdown: lines.join("\n"),
|
|
90
|
+
summary,
|
|
91
|
+
};
|
|
92
|
+
}
|
package/src/cli.ts
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
#!/usr/bin/env bun
|
|
2
2
|
|
|
3
3
|
import { transcribe } from "./lib";
|
|
4
|
-
import { downloadModel
|
|
4
|
+
import { downloadModel } from "./onnx-install";
|
|
5
|
+
import { downloadCoreML } from "./coreml-install";
|
|
5
6
|
import { isMacArm64 } from "./coreml";
|
|
6
7
|
|
|
7
8
|
async function main(): Promise<void> {
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
import { join, dirname } from "path";
|
|
2
|
+
import { homedir } from "os";
|
|
3
|
+
import { existsSync, mkdirSync, chmodSync } from "fs";
|
|
4
|
+
import { getCoreMLBinPath } from "./coreml";
|
|
5
|
+
|
|
6
|
+
const COREML_BINARY_NAME = "parakeet-coreml-darwin-arm64";
|
|
7
|
+
const GITHUB_REPO = "drakulavich/parakeet-cli";
|
|
8
|
+
|
|
9
|
+
export type CoreMLInstallState = "missing" | "binary-only" | "ready" | "stale-binary";
|
|
10
|
+
export type CoreMLBinaryInstallState = "ready" | "models-missing";
|
|
11
|
+
|
|
12
|
+
export interface CoreMLBinaryCommandResult {
|
|
13
|
+
exitCode: number;
|
|
14
|
+
stdout: string;
|
|
15
|
+
stderr: string;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export interface CoreMLBinaryRunner {
|
|
19
|
+
probeCapabilities(binPath: string): CoreMLBinaryCommandResult;
|
|
20
|
+
downloadModels(binPath: string): CoreMLBinaryCommandResult;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface CoreMLOutputWriter {
|
|
24
|
+
stdout(message: string): void;
|
|
25
|
+
stderr(message: string): void;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface CoreMLBinaryCapabilities {
|
|
29
|
+
protocolVersion: number;
|
|
30
|
+
installState: CoreMLBinaryInstallState;
|
|
31
|
+
supportedCommands: {
|
|
32
|
+
checkInstall: boolean;
|
|
33
|
+
downloadOnly: boolean;
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const defaultOutputWriter: CoreMLOutputWriter = {
|
|
38
|
+
stdout(message) {
|
|
39
|
+
process.stdout.write(message);
|
|
40
|
+
},
|
|
41
|
+
stderr(message) {
|
|
42
|
+
process.stderr.write(message);
|
|
43
|
+
},
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
export function createCoreMLBinaryRunner(
|
|
47
|
+
spawnSync: typeof Bun.spawnSync = Bun.spawnSync,
|
|
48
|
+
): CoreMLBinaryRunner {
|
|
49
|
+
function runCommand(binPath: string, flag: string): CoreMLBinaryCommandResult {
|
|
50
|
+
const proc = spawnSync([binPath, flag], {
|
|
51
|
+
stdout: "pipe",
|
|
52
|
+
stderr: "pipe",
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
return {
|
|
56
|
+
exitCode: proc.exitCode,
|
|
57
|
+
stdout: proc.stdout.toString(),
|
|
58
|
+
stderr: proc.stderr.toString(),
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return {
|
|
63
|
+
probeCapabilities(binPath) {
|
|
64
|
+
return runCommand(binPath, "--capabilities-json");
|
|
65
|
+
},
|
|
66
|
+
downloadModels(binPath) {
|
|
67
|
+
return runCommand(binPath, "--download-only");
|
|
68
|
+
},
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const defaultCoreMLBinaryRunner = createCoreMLBinaryRunner();
|
|
73
|
+
|
|
74
|
+
export function getCoreMLSupportDir(): string {
|
|
75
|
+
return join(homedir(), ".cache", "parakeet", "coreml");
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export function getCoreMLDownloadURL(version: string): string {
|
|
79
|
+
return `https://github.com/${GITHUB_REPO}/releases/download/v${version}/${COREML_BINARY_NAME}`;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export function getCoreMLLatestDownloadURL(): string {
|
|
83
|
+
return `https://github.com/${GITHUB_REPO}/releases/latest/download/${COREML_BINARY_NAME}`;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
export function getCoreMLInstallState(opts?: {
|
|
87
|
+
binPath?: string;
|
|
88
|
+
exists?: (path: string) => boolean;
|
|
89
|
+
verifyReady?: (binPath: string) => CoreMLInstallState;
|
|
90
|
+
}): CoreMLInstallState {
|
|
91
|
+
const binPath = opts?.binPath ?? getCoreMLBinPath();
|
|
92
|
+
const fileExists = opts?.exists ?? existsSync;
|
|
93
|
+
const verifyReady = opts?.verifyReady;
|
|
94
|
+
|
|
95
|
+
if (!fileExists(binPath)) {
|
|
96
|
+
return "missing";
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
if (!verifyReady) {
|
|
100
|
+
return "binary-only";
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return verifyReady(binPath);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
export function planCoreMLInstall(
|
|
107
|
+
state: CoreMLInstallState,
|
|
108
|
+
noCache = false,
|
|
109
|
+
): { downloadBinary: boolean; downloadModels: boolean } {
|
|
110
|
+
if (noCache) {
|
|
111
|
+
return { downloadBinary: true, downloadModels: true };
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
switch (state) {
|
|
115
|
+
case "missing":
|
|
116
|
+
return { downloadBinary: true, downloadModels: true };
|
|
117
|
+
case "binary-only":
|
|
118
|
+
return { downloadBinary: false, downloadModels: true };
|
|
119
|
+
case "ready":
|
|
120
|
+
return { downloadBinary: false, downloadModels: false };
|
|
121
|
+
case "stale-binary":
|
|
122
|
+
return { downloadBinary: true, downloadModels: true };
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
export function parseCoreMLBinaryCapabilities(stdout: string): CoreMLBinaryCapabilities | null {
|
|
127
|
+
try {
|
|
128
|
+
const parsed = JSON.parse(stdout) as Partial<CoreMLBinaryCapabilities>;
|
|
129
|
+
if (parsed.protocolVersion !== 1) {
|
|
130
|
+
return null;
|
|
131
|
+
}
|
|
132
|
+
if (parsed.installState !== "ready" && parsed.installState !== "models-missing") {
|
|
133
|
+
return null;
|
|
134
|
+
}
|
|
135
|
+
if (!parsed.supportedCommands) {
|
|
136
|
+
return null;
|
|
137
|
+
}
|
|
138
|
+
if (parsed.supportedCommands.checkInstall !== true || parsed.supportedCommands.downloadOnly !== true) {
|
|
139
|
+
return null;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return {
|
|
143
|
+
protocolVersion: parsed.protocolVersion,
|
|
144
|
+
installState: parsed.installState,
|
|
145
|
+
supportedCommands: parsed.supportedCommands,
|
|
146
|
+
};
|
|
147
|
+
} catch {
|
|
148
|
+
return null;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
export function classifyCoreMLInstallProbe(exitCode: number, stdout: string): CoreMLInstallState {
|
|
153
|
+
if (exitCode !== 0) {
|
|
154
|
+
return "stale-binary";
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
const capabilities = parseCoreMLBinaryCapabilities(stdout);
|
|
158
|
+
if (!capabilities) {
|
|
159
|
+
return "stale-binary";
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
return capabilities.installState === "ready" ? "ready" : "binary-only";
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
async function fetchCoreMLBinary(): Promise<Response> {
|
|
166
|
+
const latestUrl = getCoreMLLatestDownloadURL();
|
|
167
|
+
let res = await fetch(latestUrl, { redirect: "follow" });
|
|
168
|
+
|
|
169
|
+
if (res.ok) {
|
|
170
|
+
return res;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
const pkg = await Bun.file(new URL("../package.json", import.meta.url)).json();
|
|
174
|
+
const versionUrl = getCoreMLDownloadURL(pkg.version);
|
|
175
|
+
res = await fetch(versionUrl, { redirect: "follow" });
|
|
176
|
+
|
|
177
|
+
if (!res.ok) {
|
|
178
|
+
throw new Error(`Failed to download CoreML binary (HTTP ${res.status}). No release found with ${COREML_BINARY_NAME}.`);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
return res;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
export function getCoreMLInstallStatus(
|
|
185
|
+
binPath: string,
|
|
186
|
+
runner: CoreMLBinaryRunner = defaultCoreMLBinaryRunner,
|
|
187
|
+
): CoreMLInstallState {
|
|
188
|
+
const probe = runner.probeCapabilities(binPath);
|
|
189
|
+
return classifyCoreMLInstallProbe(probe.exitCode, probe.stdout);
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
export async function ensureCoreMLModels(
|
|
193
|
+
binPath: string,
|
|
194
|
+
runner: CoreMLBinaryRunner = defaultCoreMLBinaryRunner,
|
|
195
|
+
output: CoreMLOutputWriter = defaultOutputWriter,
|
|
196
|
+
): Promise<void> {
|
|
197
|
+
const download = runner.downloadModels(binPath);
|
|
198
|
+
|
|
199
|
+
if (download.stderr) {
|
|
200
|
+
output.stderr(download.stderr);
|
|
201
|
+
}
|
|
202
|
+
if (download.stdout) {
|
|
203
|
+
output.stdout(download.stdout);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
if (download.exitCode !== 0) {
|
|
207
|
+
const detail = download.stderr.trim();
|
|
208
|
+
throw new Error(detail ? `Failed to download CoreML models: ${detail}` : "Failed to download CoreML models");
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
export async function downloadCoreML(
|
|
213
|
+
noCache = false,
|
|
214
|
+
opts?: {
|
|
215
|
+
runner?: CoreMLBinaryRunner;
|
|
216
|
+
output?: CoreMLOutputWriter;
|
|
217
|
+
},
|
|
218
|
+
): Promise<string> {
|
|
219
|
+
const binPath = getCoreMLBinPath();
|
|
220
|
+
const runner = opts?.runner ?? defaultCoreMLBinaryRunner;
|
|
221
|
+
const output = opts?.output ?? defaultOutputWriter;
|
|
222
|
+
const state = getCoreMLInstallState({
|
|
223
|
+
binPath,
|
|
224
|
+
verifyReady: noCache ? undefined : (path) => getCoreMLInstallStatus(path, runner),
|
|
225
|
+
});
|
|
226
|
+
const plan = planCoreMLInstall(state, noCache);
|
|
227
|
+
|
|
228
|
+
if (!plan.downloadBinary && !plan.downloadModels) {
|
|
229
|
+
console.log("CoreML backend already installed.");
|
|
230
|
+
return binPath;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
if (plan.downloadBinary) {
|
|
234
|
+
console.error("Downloading parakeet-coreml binary...");
|
|
235
|
+
const res = await fetchCoreMLBinary();
|
|
236
|
+
mkdirSync(dirname(binPath), { recursive: true });
|
|
237
|
+
await Bun.write(binPath, res);
|
|
238
|
+
chmodSync(binPath, 0o755);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
if (plan.downloadModels) {
|
|
242
|
+
await ensureCoreMLModels(binPath, runner, output);
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
console.log("CoreML backend installed successfully.");
|
|
246
|
+
return binPath;
|
|
247
|
+
}
|
package/src/coreml.ts
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import { join } from "path";
|
|
2
2
|
import { homedir } from "os";
|
|
3
3
|
import { existsSync } from "fs";
|
|
4
|
+
import { unlinkSync } from "fs";
|
|
5
|
+
import { convertToWav16kMono } from "./audio";
|
|
4
6
|
|
|
5
7
|
export function isMacArm64(): boolean {
|
|
6
8
|
return process.platform === "darwin" && process.arch === "arm64";
|
|
@@ -15,6 +17,32 @@ export function isCoreMLInstalled(): boolean {
|
|
|
15
17
|
}
|
|
16
18
|
|
|
17
19
|
export async function transcribeCoreML(audioPath: string): Promise<string> {
|
|
20
|
+
try {
|
|
21
|
+
return await runCoreML(audioPath);
|
|
22
|
+
} catch (error) {
|
|
23
|
+
if (!shouldRetryCoreMLWithWav(audioPath, error)) {
|
|
24
|
+
throw error;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
const wavPath = await convertToWav16kMono(audioPath);
|
|
28
|
+
try {
|
|
29
|
+
return await runCoreML(wavPath);
|
|
30
|
+
} finally {
|
|
31
|
+
try { unlinkSync(wavPath); } catch {}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export function shouldRetryCoreMLWithWav(audioPath: string, error: unknown): boolean {
|
|
37
|
+
if (audioPath.toLowerCase().endsWith(".wav")) {
|
|
38
|
+
return false;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
42
|
+
return message.includes("com.apple.coreaudio.avfaudio error");
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
async function runCoreML(audioPath: string): Promise<string> {
|
|
18
46
|
const binPath = getCoreMLBinPath();
|
|
19
47
|
const proc = Bun.spawn([binPath, audioPath], {
|
|
20
48
|
stdout: "pipe",
|
|
@@ -28,7 +56,7 @@ export async function transcribeCoreML(audioPath: string): Promise<string> {
|
|
|
28
56
|
]);
|
|
29
57
|
|
|
30
58
|
if (exitCode !== 0) {
|
|
31
|
-
throw new Error(stderr);
|
|
59
|
+
throw new Error(stderr.trim() || `parakeet-coreml exited with code ${exitCode}`);
|
|
32
60
|
}
|
|
33
61
|
|
|
34
62
|
return stdout.trim();
|
package/src/lib.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { existsSync } from "fs";
|
|
2
2
|
import { transcribe as internalTranscribe, type TranscribeOptions } from "./transcribe";
|
|
3
|
-
import { downloadModel
|
|
3
|
+
import { downloadModel } from "./onnx-install";
|
|
4
|
+
import { downloadCoreML } from "./coreml-install";
|
|
4
5
|
|
|
5
6
|
export type { TranscribeOptions };
|
|
6
7
|
export { downloadModel, downloadCoreML };
|
package/src/models.ts
CHANGED
|
@@ -1,168 +1,9 @@
|
|
|
1
|
-
import { join, dirname } from "path";
|
|
2
|
-
import { homedir } from "os";
|
|
3
|
-
import { existsSync, mkdirSync, chmodSync } from "fs";
|
|
4
1
|
import { isCoreMLInstalled } from "./coreml";
|
|
2
|
+
import { isModelCached } from "./onnx-install";
|
|
5
3
|
|
|
6
|
-
export
|
|
7
|
-
|
|
8
|
-
export const MODEL_FILES = [
|
|
9
|
-
"encoder-model.onnx",
|
|
10
|
-
"encoder-model.onnx.data",
|
|
11
|
-
"decoder_joint-model.onnx",
|
|
12
|
-
"nemo128.onnx",
|
|
13
|
-
"vocab.txt",
|
|
14
|
-
];
|
|
15
|
-
|
|
16
|
-
export function getModelDir(): string {
|
|
17
|
-
return join(homedir(), ".cache", "parakeet", "v3");
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
export function isModelCached(dir?: string): boolean {
|
|
21
|
-
const d = dir ?? getModelDir();
|
|
22
|
-
return MODEL_FILES.every((f) => existsSync(join(d, f)));
|
|
23
|
-
}
|
|
4
|
+
export * from "./onnx-install";
|
|
5
|
+
export * from "./coreml-install";
|
|
24
6
|
|
|
25
7
|
export function isModelInstalled(modelDir?: string): boolean {
|
|
26
8
|
return isCoreMLInstalled() || isModelCached(modelDir);
|
|
27
9
|
}
|
|
28
|
-
|
|
29
|
-
export function installHintError(headline: string): Error {
|
|
30
|
-
const lines = [
|
|
31
|
-
headline,
|
|
32
|
-
"",
|
|
33
|
-
"╔══════════════════════════════════════════════════════════╗",
|
|
34
|
-
"║ Please run the following command to get started: ║",
|
|
35
|
-
"║ ║",
|
|
36
|
-
"║ bunx @drakulavich/parakeet-cli install ║",
|
|
37
|
-
"╚══════════════════════════════════════════════════════════╝",
|
|
38
|
-
];
|
|
39
|
-
return new Error(lines.join("\n"));
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
export function requireModel(modelDir?: string): string {
|
|
43
|
-
const dir = modelDir ?? getModelDir();
|
|
44
|
-
|
|
45
|
-
if (!isModelCached(dir)) {
|
|
46
|
-
throw installHintError(`Error: Model not found at ${dir}`);
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
return dir;
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
export async function downloadModel(noCache = false, modelDir?: string): Promise<string> {
|
|
53
|
-
const dir = modelDir ?? getModelDir();
|
|
54
|
-
|
|
55
|
-
if (!noCache && isModelCached(dir)) {
|
|
56
|
-
console.log("Model already downloaded.");
|
|
57
|
-
return dir;
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
mkdirSync(dir, { recursive: true });
|
|
61
|
-
|
|
62
|
-
for (const file of MODEL_FILES) {
|
|
63
|
-
const url = `https://huggingface.co/${HF_REPO}/resolve/main/${file}`;
|
|
64
|
-
const dest = join(dir, file);
|
|
65
|
-
|
|
66
|
-
if (!noCache && existsSync(dest)) continue;
|
|
67
|
-
|
|
68
|
-
console.error(`Downloading ${file}...`);
|
|
69
|
-
|
|
70
|
-
let res: Response;
|
|
71
|
-
try {
|
|
72
|
-
res = await fetch(url, { redirect: "follow" });
|
|
73
|
-
} catch (e) {
|
|
74
|
-
throw new Error(`failed to fetch ${file}: ${e instanceof Error ? e.message : e}`);
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
if (!res.ok) {
|
|
78
|
-
throw new Error(`failed to download ${file}: HTTP ${res.status}`);
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
if (!res.body) {
|
|
82
|
-
throw new Error(`empty response body for ${file}`);
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
const writer = Bun.file(dest).writer();
|
|
86
|
-
let bytes = 0;
|
|
87
|
-
try {
|
|
88
|
-
for await (const chunk of res.body) {
|
|
89
|
-
writer.write(chunk);
|
|
90
|
-
bytes += chunk.length;
|
|
91
|
-
}
|
|
92
|
-
} finally {
|
|
93
|
-
writer.end();
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
if (bytes === 0) {
|
|
97
|
-
throw new Error(`downloaded 0 bytes for ${file}`);
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
console.log("Model downloaded successfully.");
|
|
102
|
-
return dir;
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
const COREML_BINARY_NAME = "parakeet-coreml-darwin-arm64";
|
|
106
|
-
const GITHUB_REPO = "drakulavich/parakeet-cli";
|
|
107
|
-
|
|
108
|
-
export function getCoreMLDownloadURL(version: string): string {
|
|
109
|
-
return `https://github.com/${GITHUB_REPO}/releases/download/v${version}/${COREML_BINARY_NAME}`;
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
export function getCoreMLLatestDownloadURL(): string {
|
|
113
|
-
return `https://github.com/${GITHUB_REPO}/releases/latest/download/${COREML_BINARY_NAME}`;
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
export async function downloadCoreML(noCache = false): Promise<string> {
|
|
117
|
-
const { getCoreMLBinPath } = await import("./coreml");
|
|
118
|
-
const binPath = getCoreMLBinPath();
|
|
119
|
-
|
|
120
|
-
if (!noCache && existsSync(binPath)) {
|
|
121
|
-
// Binary exists — ensure models are also downloaded
|
|
122
|
-
const checkProc = Bun.spawnSync([binPath, "--download-only"], {
|
|
123
|
-
stdout: "pipe",
|
|
124
|
-
stderr: "pipe",
|
|
125
|
-
});
|
|
126
|
-
if (checkProc.exitCode === 0) {
|
|
127
|
-
console.log("CoreML backend already installed.");
|
|
128
|
-
return binPath;
|
|
129
|
-
}
|
|
130
|
-
// Models missing — continue to re-download
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
// Try latest release first, fall back to version-specific
|
|
134
|
-
const latestUrl = getCoreMLLatestDownloadURL();
|
|
135
|
-
console.error("Downloading parakeet-coreml binary...");
|
|
136
|
-
|
|
137
|
-
let res = await fetch(latestUrl, { redirect: "follow" });
|
|
138
|
-
|
|
139
|
-
if (!res.ok) {
|
|
140
|
-
const pkg = await Bun.file(new URL("../package.json", import.meta.url)).json();
|
|
141
|
-
const versionUrl = getCoreMLDownloadURL(pkg.version);
|
|
142
|
-
res = await fetch(versionUrl, { redirect: "follow" });
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
if (!res.ok) {
|
|
146
|
-
throw new Error(`Failed to download CoreML binary (HTTP ${res.status}). No release found with ${COREML_BINARY_NAME}.`);
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
mkdirSync(dirname(binPath), { recursive: true });
|
|
150
|
-
|
|
151
|
-
await Bun.write(binPath, res);
|
|
152
|
-
|
|
153
|
-
chmodSync(binPath, 0o755);
|
|
154
|
-
|
|
155
|
-
// Download CoreML model files (first transcription would be slow without this)
|
|
156
|
-
console.error("Downloading CoreML models...");
|
|
157
|
-
const downloadProc = Bun.spawnSync([binPath, "--download-only"], {
|
|
158
|
-
stdout: "inherit",
|
|
159
|
-
stderr: "inherit",
|
|
160
|
-
});
|
|
161
|
-
|
|
162
|
-
if (downloadProc.exitCode !== 0) {
|
|
163
|
-
throw new Error("Failed to download CoreML models");
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
console.log("CoreML backend installed successfully.");
|
|
167
|
-
return binPath;
|
|
168
|
-
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import { join } from "path";
|
|
2
|
+
import { homedir } from "os";
|
|
3
|
+
import { existsSync, mkdirSync } from "fs";
|
|
4
|
+
|
|
5
|
+
export const HF_REPO = "istupakov/parakeet-tdt-0.6b-v3-onnx";
|
|
6
|
+
|
|
7
|
+
export const MODEL_FILES = [
|
|
8
|
+
"encoder-model.onnx",
|
|
9
|
+
"encoder-model.onnx.data",
|
|
10
|
+
"decoder_joint-model.onnx",
|
|
11
|
+
"nemo128.onnx",
|
|
12
|
+
"vocab.txt",
|
|
13
|
+
];
|
|
14
|
+
|
|
15
|
+
export function getModelDir(): string {
|
|
16
|
+
return join(homedir(), ".cache", "parakeet", "v3");
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export function isModelCached(dir?: string): boolean {
|
|
20
|
+
const resolvedDir = dir ?? getModelDir();
|
|
21
|
+
return MODEL_FILES.every((file) => existsSync(join(resolvedDir, file)));
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export function installHintError(headline: string): Error {
|
|
25
|
+
const lines = [
|
|
26
|
+
headline,
|
|
27
|
+
"",
|
|
28
|
+
"╔══════════════════════════════════════════════════════════╗",
|
|
29
|
+
"║ Please run the following command to get started: ║",
|
|
30
|
+
"║ ║",
|
|
31
|
+
"║ bunx @drakulavich/parakeet-cli install ║",
|
|
32
|
+
"╚══════════════════════════════════════════════════════════╝",
|
|
33
|
+
];
|
|
34
|
+
return new Error(lines.join("\n"));
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export function requireModel(modelDir?: string): string {
|
|
38
|
+
const dir = modelDir ?? getModelDir();
|
|
39
|
+
|
|
40
|
+
if (!isModelCached(dir)) {
|
|
41
|
+
throw installHintError(`Error: Model not found at ${dir}`);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return dir;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export async function downloadModel(noCache = false, modelDir?: string): Promise<string> {
|
|
48
|
+
const dir = modelDir ?? getModelDir();
|
|
49
|
+
|
|
50
|
+
if (!noCache && isModelCached(dir)) {
|
|
51
|
+
console.log("Model already downloaded.");
|
|
52
|
+
return dir;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
mkdirSync(dir, { recursive: true });
|
|
56
|
+
|
|
57
|
+
for (const file of MODEL_FILES) {
|
|
58
|
+
const url = `https://huggingface.co/${HF_REPO}/resolve/main/${file}`;
|
|
59
|
+
const dest = join(dir, file);
|
|
60
|
+
|
|
61
|
+
if (!noCache && existsSync(dest)) continue;
|
|
62
|
+
|
|
63
|
+
console.error(`Downloading ${file}...`);
|
|
64
|
+
|
|
65
|
+
let res: Response;
|
|
66
|
+
try {
|
|
67
|
+
res = await fetch(url, { redirect: "follow" });
|
|
68
|
+
} catch (e) {
|
|
69
|
+
throw new Error(`failed to fetch ${file}: ${e instanceof Error ? e.message : e}`);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
if (!res.ok) {
|
|
73
|
+
throw new Error(`failed to download ${file}: HTTP ${res.status}`);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
if (!res.body) {
|
|
77
|
+
throw new Error(`empty response body for ${file}`);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const writer = Bun.file(dest).writer();
|
|
81
|
+
let bytes = 0;
|
|
82
|
+
try {
|
|
83
|
+
for await (const chunk of res.body) {
|
|
84
|
+
writer.write(chunk);
|
|
85
|
+
bytes += chunk.length;
|
|
86
|
+
}
|
|
87
|
+
} finally {
|
|
88
|
+
writer.end();
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
if (bytes === 0) {
|
|
92
|
+
throw new Error(`downloaded 0 bytes for ${file}`);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
console.log("Model downloaded successfully.");
|
|
97
|
+
return dir;
|
|
98
|
+
}
|
package/src/transcribe.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { requireModel, isModelCached, installHintError } from "./
|
|
1
|
+
import { requireModel, isModelCached, installHintError } from "./onnx-install";
|
|
2
2
|
import { isCoreMLInstalled, transcribeCoreML } from "./coreml";
|
|
3
3
|
import { convertToFloat32PCM } from "./audio";
|
|
4
4
|
import { initPreprocessor, preprocess } from "./preprocess";
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
import { describe, test, expect } from "bun:test";
|
|
2
|
-
import { getModelDir, MODEL_FILES, HF_REPO, getCoreMLDownloadURL } from "../models";
|
|
3
|
-
import { join } from "path";
|
|
4
|
-
import { homedir } from "os";
|
|
5
|
-
|
|
6
|
-
describe("models", () => {
|
|
7
|
-
test("getModelDir returns correct cache path", () => {
|
|
8
|
-
const dir = getModelDir();
|
|
9
|
-
expect(dir).toBe(join(homedir(), ".cache", "parakeet", "v3"));
|
|
10
|
-
});
|
|
11
|
-
|
|
12
|
-
test("MODEL_FILES lists required files", () => {
|
|
13
|
-
expect(MODEL_FILES).toContain("encoder-model.onnx");
|
|
14
|
-
expect(MODEL_FILES).toContain("encoder-model.onnx.data");
|
|
15
|
-
expect(MODEL_FILES).toContain("decoder_joint-model.onnx");
|
|
16
|
-
expect(MODEL_FILES).toContain("nemo128.onnx");
|
|
17
|
-
expect(MODEL_FILES).toContain("vocab.txt");
|
|
18
|
-
});
|
|
19
|
-
|
|
20
|
-
test("HF_REPO points to v3 ONNX repo", () => {
|
|
21
|
-
expect(HF_REPO).toBe("istupakov/parakeet-tdt-0.6b-v3-onnx");
|
|
22
|
-
});
|
|
23
|
-
|
|
24
|
-
test("getCoreMLDownloadURL includes version and correct filename", () => {
|
|
25
|
-
const url = getCoreMLDownloadURL("0.5.0");
|
|
26
|
-
expect(url).toBe(
|
|
27
|
-
"https://github.com/drakulavich/parakeet-cli/releases/download/v0.5.0/parakeet-coreml-darwin-arm64"
|
|
28
|
-
);
|
|
29
|
-
});
|
|
30
|
-
});
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
import { describe, test, expect } from "bun:test";
|
|
2
|
-
import { transcribe } from "../transcribe";
|
|
3
|
-
|
|
4
|
-
describe("transcribe", () => {
|
|
5
|
-
test("returns empty string for very short audio", async () => {
|
|
6
|
-
// Audio < 0.1s (1600 samples) should return empty
|
|
7
|
-
// We can't easily test this without a fixture, so this is a smoke test
|
|
8
|
-
// that the module exports correctly
|
|
9
|
-
expect(typeof transcribe).toBe("function");
|
|
10
|
-
});
|
|
11
|
-
});
|