transcribe-cpp 0.0.0 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Resolve the native transcribe.cpp library. Resolution order (mirrors the
3
+ * Python provider choke point in `_library.py`):
4
+ *
5
+ * 1. TRANSCRIBE_LIBRARY — explicit path (developer escape hatch).
6
+ * 2. A per-platform npm package `@transcribe-cpp/<tuple>` (the prebuilt
7
+ * bundle wrapping the shared `transcribe-native-<tuple>` bytes).
8
+ * 3. A dev build tree (build-shared/src, build/src, …) found by walking up.
9
+ *
10
+ * Before dlopen of a platform package, its `contract.json` is validated
11
+ * (version base-match + header_hash equals our PUBLIC_HEADER_HASH).
12
+ */
13
+ export interface Resolved {
14
+ libraryPath: string;
15
+ /** Directory holding the library and any ggml backend modules. */
16
+ artifactDir: string;
17
+ /** Provider package name, or null for env/dev resolution. */
18
+ provider: string | null;
19
+ }
20
+ export declare function resolveLibrary(): Resolved;
package/dist/loader.js ADDED
@@ -0,0 +1,140 @@
1
+ /**
2
+ * Resolve the native transcribe.cpp library. Resolution order (mirrors the
3
+ * Python provider choke point in `_library.py`):
4
+ *
5
+ * 1. TRANSCRIBE_LIBRARY — explicit path (developer escape hatch).
6
+ * 2. A per-platform npm package `@transcribe-cpp/<tuple>` (the prebuilt
7
+ * bundle wrapping the shared `transcribe-native-<tuple>` bytes).
8
+ * 3. A dev build tree (build-shared/src, build/src, …) found by walking up.
9
+ *
10
+ * Before dlopen of a platform package, its `contract.json` is validated
11
+ * (version base-match + header_hash equals our PUBLIC_HEADER_HASH).
12
+ */
13
+ import * as fs from "node:fs";
14
+ import * as path from "node:path";
15
+ import { createRequire } from "node:module";
16
+ import { fileURLToPath } from "node:url";
17
+ import * as g from "./_generated.js";
18
+ import { OUR_VERSION, baseVersion } from "./version.js";
19
+ import { AbiError, BackendError, TranscribeError, VersionMismatch } from "./errors.js";
20
+ const LIB_NAME = process.platform === "darwin"
21
+ ? "libtranscribe.dylib"
22
+ : process.platform === "win32"
23
+ ? "transcribe.dll"
24
+ : "libtranscribe.so";
25
+ /**
26
+ * The platform-package tuple for this host, or null if unsupported. Uses the
27
+ * Node platform-arch convention (matches npm os/cpu fields), e.g.
28
+ * `darwin-arm64-metal`. The release workflow maps each shared
29
+ * `transcribe-native-<build-tuple>` bundle onto its tuple here.
30
+ */
31
+ function platformTuple() {
32
+ const { platform, arch } = process;
33
+ if (platform === "darwin") {
34
+ if (arch === "arm64")
35
+ return "darwin-arm64-metal";
36
+ if (arch === "x64")
37
+ return "darwin-x64-cpu";
38
+ }
39
+ else if (platform === "linux") {
40
+ if (arch === "x64")
41
+ return "linux-x64-cpu-vulkan";
42
+ if (arch === "arm64")
43
+ return "linux-arm64-cpu-vulkan";
44
+ }
45
+ else if (platform === "win32") {
46
+ if (arch === "x64")
47
+ return "win32-x64-cpu-vulkan";
48
+ }
49
+ return null;
50
+ }
51
+ /** Validate a bundle's contract.json. Throws on a real mismatch. */
52
+ function validateContract(dir, provider) {
53
+ const file = path.join(dir, "contract.json");
54
+ let contract;
55
+ try {
56
+ contract = JSON.parse(fs.readFileSync(file, "utf8"));
57
+ }
58
+ catch (e) {
59
+ throw new BackendError(`provider ${provider} is missing a readable contract.json at ${file}: ${String(e)}`);
60
+ }
61
+ if (contract.header_hash !== g.PUBLIC_HEADER_HASH) {
62
+ throw new AbiError(`provider ${provider} was built against header hash ${contract.header_hash} ` +
63
+ `but this binding expects ${g.PUBLIC_HEADER_HASH}. Reinstall matching packages.`);
64
+ }
65
+ if (contract.version && baseVersion(contract.version) !== OUR_VERSION) {
66
+ throw new VersionMismatch(`provider ${provider} is version ${contract.version} but this binding is ${OUR_VERSION}. ` +
67
+ `Pre-1.0 requires an exact base-version match.`);
68
+ }
69
+ }
70
+ function tryPlatformPackage() {
71
+ const tuple = platformTuple();
72
+ if (!tuple)
73
+ return null;
74
+ const provider = `@transcribe-cpp/${tuple}`;
75
+ const require = createRequire(import.meta.url);
76
+ let dir;
77
+ try {
78
+ dir = path.dirname(require.resolve(`${provider}/package.json`));
79
+ }
80
+ catch {
81
+ return null; // package not installed — fall through to dev tree
82
+ }
83
+ validateContract(dir, provider); // a contract mismatch is fatal, not a fall-through
84
+ return { libraryPath: path.join(dir, LIB_NAME), artifactDir: dir, provider };
85
+ }
86
+ /** A library produced by the source build (`npm run build:native`). */
87
+ function tryLocalPrebuild() {
88
+ const tuple = platformTuple();
89
+ if (!tuple)
90
+ return null;
91
+ // <package>/prebuilds/<tuple>/lib/<lib> (build-from-source installs here).
92
+ const pkgRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
93
+ const dir = path.join(pkgRoot, "prebuilds", tuple, "lib");
94
+ const cand = path.join(dir, LIB_NAME);
95
+ return fs.existsSync(cand)
96
+ ? { libraryPath: cand, artifactDir: dir, provider: `source-build:${tuple}` }
97
+ : null;
98
+ }
99
+ /** Walk up to the repo root, then probe known build trees. */
100
+ function tryDevTree() {
101
+ let dir = path.dirname(fileURLToPath(import.meta.url));
102
+ let root = null;
103
+ for (let i = 0; i < 12; i++) {
104
+ if (fs.existsSync(path.join(dir, "CMakeLists.txt")) &&
105
+ fs.existsSync(path.join(dir, "include", "transcribe.h"))) {
106
+ root = dir;
107
+ break;
108
+ }
109
+ const up = path.dirname(dir);
110
+ if (up === dir)
111
+ break;
112
+ dir = up;
113
+ }
114
+ if (!root)
115
+ return null;
116
+ for (const rel of ["build-shared/src", "build-shared/bin", "build/src", "build/bin"]) {
117
+ const cand = path.join(root, rel, LIB_NAME);
118
+ if (fs.existsSync(cand)) {
119
+ return { libraryPath: cand, artifactDir: path.dirname(cand), provider: null };
120
+ }
121
+ }
122
+ return null;
123
+ }
124
+ export function resolveLibrary() {
125
+ const override = process.env.TRANSCRIBE_LIBRARY;
126
+ if (override) {
127
+ if (!fs.existsSync(override)) {
128
+ throw new TranscribeError(`TRANSCRIBE_LIBRARY points at a missing file: ${override}`);
129
+ }
130
+ return { libraryPath: override, artifactDir: path.dirname(override), provider: null };
131
+ }
132
+ return (tryPlatformPackage() ??
133
+ tryLocalPrebuild() ??
134
+ tryDevTree() ??
135
+ (() => {
136
+ throw new BackendError("No transcribe.cpp native library found. Set TRANSCRIBE_LIBRARY, install a " +
137
+ "matching @transcribe-cpp/<platform> package, run `npm run build:native` to " +
138
+ `build from source, or build the shared library (build-shared/src/${LIB_NAME}).`);
139
+ })());
140
+ }
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Native bootstrap: resolve -> load -> bind -> verify ABI -> version-gate ->
3
+ * init backends. The single choke point both the public API and tests go
4
+ * through. Lazy and memoized: nothing loads until the first call.
5
+ */
6
+ import { type Bound } from "./ffi.js";
7
+ export interface Native extends Bound {
8
+ libraryPath: string;
9
+ provider: string | null;
10
+ abortProto: any;
11
+ logProto: any;
12
+ }
13
+ export type LogHandler = (level: number, message: string) => void;
14
+ export declare function native(): Native;
15
+ /**
16
+ * Route native (and ggml) diagnostics to `handler`, or pass null to disable.
17
+ * The callback may fire from ggml worker threads; koffi marshals it to the
18
+ * event-loop thread (proven by the M2 spike), so the handler runs on the main
19
+ * thread. Exceptions thrown by the handler are swallowed (never re-enter C).
20
+ *
21
+ * The native callback is installed once during lazy bootstrap, before backend
22
+ * initialization. Later calls only swap this JS handler; they never call
23
+ * transcribe_log_set again.
24
+ */
25
+ export declare function setLogHandler(handler: LogHandler | null): void;
package/dist/native.js ADDED
@@ -0,0 +1,77 @@
1
+ /**
2
+ * Native bootstrap: resolve -> load -> bind -> verify ABI -> version-gate ->
3
+ * init backends. The single choke point both the public API and tests go
4
+ * through. Lazy and memoized: nothing loads until the first call.
5
+ */
6
+ import { resolveLibrary } from "./loader.js";
7
+ import { abortProto, bindLibrary, logProto } from "./ffi.js";
8
+ import { verifyLayouts } from "./abi.js";
9
+ import { BackendError, VersionMismatch } from "./errors.js";
10
+ import { OUR_VERSION, baseVersion } from "./version.js";
11
+ import * as g from "./_generated.js";
12
+ let cached = null;
13
+ let logHandler = null;
14
+ let logRegistered = null;
15
+ function ensureLogCallback(bound) {
16
+ if (logRegistered)
17
+ return;
18
+ const thunk = (level, msg) => {
19
+ const h = logHandler;
20
+ if (!h)
21
+ return;
22
+ try {
23
+ h(level, msg ?? "");
24
+ }
25
+ catch {
26
+ /* a logging handler must never propagate into native code */
27
+ }
28
+ };
29
+ logRegistered = bound.koffi.register(thunk, bound.koffi.pointer(logProto()));
30
+ bound.F.logSet(logRegistered, null);
31
+ }
32
+ export function native() {
33
+ if (cached)
34
+ return cached;
35
+ const resolved = resolveLibrary();
36
+ const bound = bindLibrary(resolved.libraryPath);
37
+ verifyLayouts(bound);
38
+ const nativeVersion = bound.F.version();
39
+ if (baseVersion(nativeVersion) !== OUR_VERSION) {
40
+ throw new VersionMismatch(`native library is version ${nativeVersion} but this binding is ${OUR_VERSION} ` +
41
+ `(pre-1.0 requires an exact base-version match)`);
42
+ }
43
+ // transcribe_log_set is only supported as a startup-time install. Register a
44
+ // stable JS dispatch callback once, before backend init can create worker
45
+ // threads; setLogHandler later only swaps the JS target.
46
+ ensureLogCallback(bound);
47
+ // Register backend modules package-local. On a compiled-in build this is a
48
+ // near no-op; on a dynamic-backend bundle it scans the artifact dir.
49
+ let st = bound.F.initBackends(resolved.artifactDir);
50
+ if (st !== g.TRANSCRIBE_OK)
51
+ st = bound.F.initBackendsDefault();
52
+ if (st !== g.TRANSCRIBE_OK) {
53
+ throw new BackendError(`transcribe_init_backends found no usable compute device in ${resolved.artifactDir}: ` +
54
+ `${bound.F.statusString(st)} (status ${st})`);
55
+ }
56
+ cached = {
57
+ ...bound,
58
+ libraryPath: resolved.libraryPath,
59
+ provider: resolved.provider,
60
+ abortProto: abortProto(),
61
+ logProto: logProto(),
62
+ };
63
+ return cached;
64
+ }
65
+ /**
66
+ * Route native (and ggml) diagnostics to `handler`, or pass null to disable.
67
+ * The callback may fire from ggml worker threads; koffi marshals it to the
68
+ * event-loop thread (proven by the M2 spike), so the handler runs on the main
69
+ * thread. Exceptions thrown by the handler are swallowed (never re-enter C).
70
+ *
71
+ * The native callback is installed once during lazy bootstrap, before backend
72
+ * initialization. Later calls only swap this JS handler; they never call
73
+ * transcribe_log_set again.
74
+ */
75
+ export function setLogHandler(handler) {
76
+ logHandler = handler;
77
+ }
@@ -0,0 +1,181 @@
1
+ /** Public value types for the transcribe.cpp TypeScript binding. */
2
+ import type { TranscribeError } from "./errors.js";
3
+ export type Backend = "auto" | "cpu" | "cpu_accel" | "cuda" | "vulkan" | "metal";
4
+ export type KvType = "auto" | "f32" | "f16";
5
+ export type Task = "transcribe" | "translate";
6
+ export type TimestampKind = "none" | "auto" | "segment" | "word" | "token";
7
+ export type Feature = "initial_prompt" | "temperature_fallback" | "long_form" | "cancellation" | "pnc" | "itn";
8
+ /** Mono float32 PCM at the model's native sample rate (16 kHz for v1). */
9
+ export type PcmLike = Float32Array | number[] | ArrayBuffer | Buffer;
10
+ export interface Segment {
11
+ text: string;
12
+ t0Ms: number;
13
+ t1Ms: number;
14
+ firstWord: number;
15
+ nWords: number;
16
+ firstToken: number;
17
+ nTokens: number;
18
+ }
19
+ export interface Word {
20
+ text: string;
21
+ t0Ms: number;
22
+ t1Ms: number;
23
+ segIndex: number;
24
+ firstToken: number;
25
+ nTokens: number;
26
+ }
27
+ export interface Token {
28
+ text: string;
29
+ id: number;
30
+ p: number;
31
+ t0Ms: number;
32
+ t1Ms: number;
33
+ segIndex: number;
34
+ wordIndex: number;
35
+ }
36
+ export interface Timings {
37
+ loadMs: number;
38
+ melMs: number;
39
+ encodeMs: number;
40
+ decodeMs: number;
41
+ }
42
+ export interface Capabilities {
43
+ nativeSampleRate: number;
44
+ languages: string[];
45
+ maxTimestampKind: TimestampKind;
46
+ supportsLanguageDetect: boolean;
47
+ supportsTranslate: boolean;
48
+ supportsStreaming: boolean;
49
+ supportsSpecDecode: boolean;
50
+ maxAudioMs: number;
51
+ }
52
+ export interface SessionLimits {
53
+ effectiveNCtx: number;
54
+ effectiveMaxAudioMs: number;
55
+ maxKvBytes: number;
56
+ }
57
+ export interface TranscriptionResult {
58
+ text: string;
59
+ language: string;
60
+ timestampKind: TimestampKind;
61
+ segments: Segment[];
62
+ words: Word[];
63
+ tokens: Token[];
64
+ timings: Timings;
65
+ aborted: boolean;
66
+ truncated: boolean;
67
+ }
68
+ export interface BackendInfo {
69
+ name: string;
70
+ description: string;
71
+ kind: string;
72
+ }
73
+ export interface ModelOptions {
74
+ /** "auto" (default), or an explicit backend. */
75
+ backend?: Backend;
76
+ /** GPU device ordinal for multi-GPU hosts. */
77
+ gpuDevice?: number;
78
+ }
79
+ export interface SessionOptions {
80
+ /** CPU threads for CPU-side ops; 0 = library default. */
81
+ nThreads?: number;
82
+ kvType?: KvType;
83
+ /** Context-token cap; 0 = model default. */
84
+ nCtx?: number;
85
+ }
86
+ export interface TranscribeOptions {
87
+ task?: Task;
88
+ language?: string;
89
+ targetLanguage?: string;
90
+ timestamps?: TimestampKind;
91
+ keepSpecialTags?: boolean;
92
+ /** Speculative-decode draft count; -1 = family default. */
93
+ specKDrafts?: number;
94
+ /** Cancel the run cooperatively. */
95
+ signal?: AbortSignal;
96
+ /** A run-slot family extension (e.g. whisper). */
97
+ family?: FamilyExtension;
98
+ }
99
+ /** A native compute device the runtime discovered. */
100
+ export interface DeviceInfo extends BackendInfo {
101
+ }
102
+ /** One result of a batch run: success carries the transcript, failure the error.
103
+ * On failure, `error.utteranceIndex` is set, and `error.partialResult` carries any
104
+ * recovered transcript when the failure was an abort/truncation. */
105
+ export type BatchItem = {
106
+ ok: true;
107
+ result: TranscriptionResult;
108
+ } | {
109
+ ok: false;
110
+ error: TranscribeError;
111
+ };
112
+ export type CommitPolicy = "auto" | "on_finalize" | "stable_prefix";
113
+ export type StreamState = "idle" | "active" | "finished" | "failed";
114
+ export type ExtSlot = "run" | "stream";
115
+ export interface StreamUpdate {
116
+ resultChanged: boolean;
117
+ isFinal: boolean;
118
+ revision: number;
119
+ inputReceivedMs: number;
120
+ audioCommittedMs: number;
121
+ bufferedMs: number;
122
+ committedChanged: boolean;
123
+ tentativeChanged: boolean;
124
+ }
125
+ export interface StreamText {
126
+ /** Raw model hypothesis. */
127
+ full: string;
128
+ /** Append-only, stable prefix. */
129
+ committed: string;
130
+ /** Volatile suffix. */
131
+ tentative: string;
132
+ }
133
+ export interface StreamOptions {
134
+ task?: Task;
135
+ language?: string;
136
+ targetLanguage?: string;
137
+ timestamps?: TimestampKind;
138
+ keepSpecialTags?: boolean;
139
+ commitPolicy?: CommitPolicy;
140
+ stablePrefixAgreementN?: number;
141
+ /** A stream-slot family extension (moonshine, parakeet, voxtral). */
142
+ family?: FamilyExtension;
143
+ }
144
+ export interface WhisperRunOptions {
145
+ initialPrompt?: string;
146
+ conditionOnPrevTokens?: boolean;
147
+ temperature?: number;
148
+ temperatureInc?: number;
149
+ compressionRatioThold?: number;
150
+ logprobThold?: number;
151
+ noSpeechThold?: number;
152
+ maxPrevContextTokens?: number;
153
+ seed?: number;
154
+ maxInitialTimestamp?: number;
155
+ }
156
+ export interface MoonshineStreamingOptions {
157
+ minDecodeIntervalMs?: number;
158
+ }
159
+ export interface ParakeetStreamOptions {
160
+ attContextRight?: number;
161
+ }
162
+ export interface ParakeetBufferedStreamOptions {
163
+ leftMs?: number;
164
+ chunkMs?: number;
165
+ rightMs?: number;
166
+ }
167
+ export interface VoxtralRealtimeStreamOptions {
168
+ numDelayTokens?: number;
169
+ minDecodeIntervalMs?: number;
170
+ }
171
+ export type FamilyExtension = ({
172
+ kind: "whisper";
173
+ } & WhisperRunOptions) | ({
174
+ kind: "moonshine";
175
+ } & MoonshineStreamingOptions) | ({
176
+ kind: "parakeet";
177
+ } & ParakeetStreamOptions) | ({
178
+ kind: "parakeet_buffered";
179
+ } & ParakeetBufferedStreamOptions) | ({
180
+ kind: "voxtral";
181
+ } & VoxtralRealtimeStreamOptions);
package/dist/types.js ADDED
@@ -0,0 +1,2 @@
1
+ /** Public value types for the transcribe.cpp TypeScript binding. */
2
+ export {};
@@ -0,0 +1,18 @@
1
+ /**
2
+ * This binding's own version, and the base-version helper the load gates use.
3
+ *
4
+ * `OUR_VERSION` is read from the API package's `package.json` at runtime — NOT
5
+ * from the generated FFI macros. The generators stopped emitting
6
+ * `TRANSCRIBE_VERSION_*` so a version-only bump no longer churns generated
7
+ * files or the abihash (notes/releasing.md §8 P0 #1). `package.json` is always
8
+ * present in the published tarball and sits one directory above the compiled
9
+ * `dist/` output, so a runtime `require("../package.json")` resolves it.
10
+ *
11
+ * A runtime `createRequire(...)` is used rather than `import "../package.json"`:
12
+ * tsconfig sets `rootDir: "src"` with no `resolveJsonModule`, so a static JSON
13
+ * import would break the emit layout.
14
+ */
15
+ /** Leading dotted-numeric release segment, suffix stripped: "0.0.1.post3" -> "0.0.1". */
16
+ export declare function baseVersion(v: string): string;
17
+ /** The base `MAJOR.MINOR.PATCH` this binding was built as. */
18
+ export declare const OUR_VERSION: string;
@@ -0,0 +1,30 @@
1
+ /**
2
+ * This binding's own version, and the base-version helper the load gates use.
3
+ *
4
+ * `OUR_VERSION` is read from the API package's `package.json` at runtime — NOT
5
+ * from the generated FFI macros. The generators stopped emitting
6
+ * `TRANSCRIBE_VERSION_*` so a version-only bump no longer churns generated
7
+ * files or the abihash (notes/releasing.md §8 P0 #1). `package.json` is always
8
+ * present in the published tarball and sits one directory above the compiled
9
+ * `dist/` output, so a runtime `require("../package.json")` resolves it.
10
+ *
11
+ * A runtime `createRequire(...)` is used rather than `import "../package.json"`:
12
+ * tsconfig sets `rootDir: "src"` with no `resolveJsonModule`, so a static JSON
13
+ * import would break the emit layout.
14
+ */
15
+ import { createRequire } from "node:module";
16
+ /** Leading dotted-numeric release segment, suffix stripped: "0.0.1.post3" -> "0.0.1". */
17
+ export function baseVersion(v) {
18
+ const m = /^\d+(?:\.\d+)*/.exec(v.trim());
19
+ return m ? m[0] : v.trim();
20
+ }
21
+ function readPackageVersion() {
22
+ const require = createRequire(import.meta.url);
23
+ const pkg = require("../package.json");
24
+ if (!pkg.version) {
25
+ throw new Error("transcribe-cpp: package.json is missing a version field");
26
+ }
27
+ return pkg.version;
28
+ }
29
+ /** The base `MAJOR.MINOR.PATCH` this binding was built as. */
30
+ export const OUR_VERSION = baseVersion(readPackageVersion());
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "transcribe-cpp",
3
- "version": "0.0.0",
4
- "description": "TypeScript/Node.js bindings for transcribe.cpp (pre-release name reservation placeholder)",
3
+ "version": "0.0.3",
4
+ "description": "TypeScript/Node.js bindings for transcribe.cpp — a C/C++ speech-to-text library built on ggml",
5
5
  "type": "module",
6
6
  "exports": {
7
7
  ".": {
@@ -12,7 +12,12 @@
12
12
  "main": "./dist/index.js",
13
13
  "module": "./dist/index.js",
14
14
  "types": "./dist/index.d.ts",
15
- "files": ["dist", "README.md", "LICENSE"],
15
+ "files": [
16
+ "dist",
17
+ "README.md",
18
+ "LICENSE",
19
+ "scripts/build-from-source.mjs"
20
+ ],
16
21
  "license": "MIT",
17
22
  "author": "The transcribe.cpp authors",
18
23
  "homepage": "https://github.com/handy-computer/transcribe.cpp#readme",
@@ -23,15 +28,37 @@
23
28
  "bugs": {
24
29
  "url": "https://github.com/handy-computer/transcribe.cpp/issues"
25
30
  },
26
- "keywords": ["transcription", "speech-to-text", "asr", "stt", "ggml", "whisper", "parakeet"],
31
+ "keywords": [
32
+ "transcription",
33
+ "speech-to-text",
34
+ "asr",
35
+ "stt",
36
+ "ggml",
37
+ "whisper",
38
+ "parakeet"
39
+ ],
27
40
  "engines": {
28
- "node": ">=18"
41
+ "node": ">=22"
29
42
  },
30
43
  "scripts": {
31
44
  "build": "tsc",
32
- "prepublishOnly": "bun run build"
45
+ "build:native": "node scripts/build-from-source.mjs",
46
+ "pretest": "npm run build",
47
+ "test": "node --test test/*.test.mjs",
48
+ "prepublishOnly": "npm run build"
49
+ },
50
+ "dependencies": {
51
+ "koffi": "^3.0.2"
52
+ },
53
+ "optionalDependencies": {
54
+ "@transcribe-cpp/darwin-arm64-metal": "0.0.3",
55
+ "@transcribe-cpp/darwin-x64-cpu": "0.0.3",
56
+ "@transcribe-cpp/linux-x64-cpu-vulkan": "0.0.3",
57
+ "@transcribe-cpp/linux-arm64-cpu-vulkan": "0.0.3",
58
+ "@transcribe-cpp/win32-x64-cpu-vulkan": "0.0.3"
33
59
  },
34
60
  "devDependencies": {
61
+ "@types/node": "^22.0.0",
35
62
  "typescript": "^5.6.0"
36
63
  }
37
64
  }
@@ -0,0 +1,90 @@
1
+ // Build libtranscribe from source — the universal fallback when no prebuilt
2
+ // @transcribe-cpp/<platform> package exists for the host (a new arch, a custom
3
+ // backend, a self-contained library). The result installs into
4
+ // `prebuilds/<tuple>/`, which the loader probes automatically — no env var.
5
+ //
6
+ // node scripts/build-from-source.mjs [--source <repo>] [--self-contained]
7
+ //
8
+ // --source Path to a transcribe.cpp checkout (default:
9
+ // $TRANSCRIBE_SOURCE_DIR, else walk up to find the repo).
10
+ // --self-contained Link ggml statically into one libtranscribe (no sibling
11
+ // ggml libraries). Otherwise ggml ships as sibling shared
12
+ // libs alongside libtranscribe.
13
+ //
14
+ // This drives CMake directly (the binding is FFI over a shared library, not an
15
+ // N-API addon — so cmake-js, which builds .node addons, is not the right tool).
16
+ // Requires cmake + a C/C++ toolchain (Ninja recommended). On macOS Metal is
17
+ // enabled and embedded; on Linux the default CPU backend builds (add your own
18
+ // -D flags via TRANSCRIBE_CMAKE_FLAGS for CUDA/Vulkan/etc.).
19
+
20
+ import { spawnSync } from "node:child_process";
21
+ import * as fs from "node:fs";
22
+ import * as path from "node:path";
23
+ import { fileURLToPath } from "node:url";
24
+
25
+ const PKG_ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
26
+
27
+ function arg(name) {
28
+ const i = process.argv.indexOf(`--${name}`);
29
+ return i >= 0 ? (process.argv[i + 1]?.startsWith("--") ? true : process.argv[i + 1] ?? true) : undefined;
30
+ }
31
+
32
+ function tuple() {
33
+ const { platform, arch } = process;
34
+ const a = arch === "x64" ? "x64" : arch;
35
+ if (platform === "darwin") return arch === "arm64" ? "darwin-arm64-metal" : "darwin-x64-cpu";
36
+ if (platform === "linux") return `linux-${a}-cpu-vulkan`;
37
+ if (platform === "win32") return `win32-${a}-cpu-vulkan`;
38
+ throw new Error(`unsupported platform ${platform}/${arch}`);
39
+ }
40
+
41
+ function findSource() {
42
+ const explicit = arg("source") || process.env.TRANSCRIBE_SOURCE_DIR;
43
+ const start = typeof explicit === "string" ? explicit : PKG_ROOT;
44
+ let dir = path.resolve(start);
45
+ for (let i = 0; i < 12; i++) {
46
+ if (
47
+ fs.existsSync(path.join(dir, "CMakeLists.txt")) &&
48
+ fs.existsSync(path.join(dir, "include", "transcribe.h"))
49
+ ) {
50
+ return dir;
51
+ }
52
+ const up = path.dirname(dir);
53
+ if (up === dir) break;
54
+ dir = up;
55
+ }
56
+ throw new Error(
57
+ "could not locate a transcribe.cpp source tree. Pass --source <repo> or set " +
58
+ "TRANSCRIBE_SOURCE_DIR (the published npm package ships no C++ sources).",
59
+ );
60
+ }
61
+
62
+ function run(cmd, args) {
63
+ console.log(`$ ${cmd} ${args.join(" ")}`);
64
+ const r = spawnSync(cmd, args, { stdio: "inherit" });
65
+ if (r.status !== 0) {
66
+ throw new Error(`${cmd} exited with ${r.status ?? r.signal}`);
67
+ }
68
+ }
69
+
70
+ const src = findSource();
71
+ const t = tuple();
72
+ const buildDir = path.join(PKG_ROOT, "build-from-source");
73
+ const prefix = path.join(PKG_ROOT, "prebuilds", t);
74
+ const selfContained = Boolean(arg("self-contained"));
75
+
76
+ console.log(`building libtranscribe for ${t}`);
77
+ console.log(` source: ${src}`);
78
+ console.log(` install prefix: ${prefix}${selfContained ? " (self-contained)" : ""}`);
79
+
80
+ const flags = ["-DTRANSCRIBE_BUILD_SHARED=ON"];
81
+ if (process.platform === "darwin") flags.push("-DGGML_METAL=ON", "-DGGML_METAL_EMBED_LIBRARY=ON");
82
+ if (selfContained) flags.push("-DBUILD_SHARED_LIBS=OFF"); // ggml static -> one libtranscribe
83
+ if (process.env.TRANSCRIBE_CMAKE_FLAGS) flags.push(...process.env.TRANSCRIBE_CMAKE_FLAGS.split(" ").filter(Boolean));
84
+
85
+ fs.rmSync(prefix, { recursive: true, force: true });
86
+ run("cmake", ["-B", buildDir, "-S", src, "-G", "Ninja", ...flags]);
87
+ run("cmake", ["--build", buildDir, "--target", "transcribe"]);
88
+ run("cmake", ["--install", buildDir, "--prefix", prefix]);
89
+
90
+ console.log(`\ndone. The loader will find prebuilds/${t}/lib/ automatically.`);