transcribe-cpp 0.0.0 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,10 +1,1115 @@
1
1
  /**
2
- * TypeScript/Node.js bindings for transcribe.cpp.
2
+ * transcribe.cpp — TypeScript/Node.js bindings.
3
3
  *
4
- * This is a placeholder package that reserves the `transcribe-cpp` name on npm
5
- * while first-party bindings are developed. It ships no functionality yet.
4
+ * A koffi FFI binding over the shared native library: offline transcription and
5
+ * batch, streaming with committed/tentative text, typed per-family extensions,
6
+ * backend discovery, log routing, and cooperative cancellation.
7
+ */
8
+ import { native, setLogHandler } from "./native.js";
9
+ import * as g from "./_generated.js";
10
+ import { Aborted, Busy, exceptionForStatus, InvalidArgument, ModelLoadError, NotImplementedByModel, OutputTruncated, TranscribeError, UnsupportedRequest, } from "./errors.js";
11
+ export * from "./types.js";
12
+ export * from "./errors.js";
13
+ export { setLogHandler };
14
+ // ---- enum maps -------------------------------------------------------------
15
+ const BACKENDS = {
16
+ auto: g.TRANSCRIBE_BACKEND_AUTO,
17
+ cpu: g.TRANSCRIBE_BACKEND_CPU,
18
+ metal: g.TRANSCRIBE_BACKEND_METAL,
19
+ vulkan: g.TRANSCRIBE_BACKEND_VULKAN,
20
+ cpu_accel: g.TRANSCRIBE_BACKEND_CPU_ACCEL,
21
+ cuda: g.TRANSCRIBE_BACKEND_CUDA,
22
+ };
23
+ const KV_TYPES = {
24
+ auto: g.TRANSCRIBE_KV_TYPE_AUTO,
25
+ f32: g.TRANSCRIBE_KV_TYPE_F32,
26
+ f16: g.TRANSCRIBE_KV_TYPE_F16,
27
+ };
28
+ const TASKS = { transcribe: g.TRANSCRIBE_TASK_TRANSCRIBE, translate: g.TRANSCRIBE_TASK_TRANSLATE };
29
+ const TIMESTAMPS = {
30
+ none: g.TRANSCRIBE_TIMESTAMPS_NONE,
31
+ auto: g.TRANSCRIBE_TIMESTAMPS_AUTO,
32
+ segment: g.TRANSCRIBE_TIMESTAMPS_SEGMENT,
33
+ word: g.TRANSCRIBE_TIMESTAMPS_WORD,
34
+ token: g.TRANSCRIBE_TIMESTAMPS_TOKEN,
35
+ };
36
+ const TIMESTAMP_NAMES = Object.fromEntries(Object.entries(TIMESTAMPS).map(([k, v]) => [v, k]));
37
+ const FEATURES = {
38
+ initial_prompt: g.TRANSCRIBE_FEATURE_INITIAL_PROMPT,
39
+ temperature_fallback: g.TRANSCRIBE_FEATURE_TEMPERATURE_FALLBACK,
40
+ long_form: g.TRANSCRIBE_FEATURE_LONG_FORM,
41
+ cancellation: g.TRANSCRIBE_FEATURE_CANCELLATION,
42
+ pnc: g.TRANSCRIBE_FEATURE_PNC,
43
+ itn: g.TRANSCRIBE_FEATURE_ITN,
44
+ };
45
+ // ---- helpers ---------------------------------------------------------------
46
+ function lookup(map, key, what) {
47
+ const v = map[key];
48
+ if (v === undefined) {
49
+ throw new TranscribeError(`invalid ${what} ${JSON.stringify(key)}; expected one of ${Object.keys(map).join(", ")}`);
50
+ }
51
+ return v;
52
+ }
53
+ function check(n, status, context) {
54
+ if (status === g.TRANSCRIBE_OK)
55
+ return;
56
+ throw exceptionForStatus(status, n.F.statusString(status), context);
57
+ }
58
+ function busyError(op) {
59
+ return new Busy(`cannot ${op}: a stream is active on this model. The C library allows one ` +
60
+ `run / batch / active stream in flight per model across all sessions — ` +
61
+ `finalize or reset the stream first, or use a separate model.`);
62
+ }
63
+ // Native frees are queued behind the model lock (below), which resolves on a
64
+ // promise microtask. `process.exit()` terminates WITHOUT draining the microtask
65
+ // queue, so those deferred frees would never run — leaving native resources
66
+ // (model weights, session compute buffers) alive when the library's C++ static
67
+ // destructors run at process exit. On macOS >= 15 that aborts the process:
68
+ // ggml-metal's device teardown asserts every Metal buffer was freed first
69
+ // (GGML_ASSERT([rsets->data count] == 0), the residency-set collection). A
70
+ // process 'exit' handler runs synchronously and BEFORE those static destructors,
71
+ // so we flush any still-pending frees there. Every pending free registers itself
72
+ // here; it deletes itself once it has run (idempotent via the `done` guard), so
73
+ // the flush never double-frees and a clean (natural) exit finds nothing to do.
74
+ const PENDING_FREES = new Set();
75
+ // Models the user loaded but has not disposed. On exit we force-dispose them so
76
+ // their native frees get registered in PENDING_FREES and flushed below —
77
+ // otherwise an undisposed model leaks its (residency-set-backed) weight buffers
78
+ // and aborts at the macOS-15 ggml-metal teardown just like a disposed-but-
79
+ // process.exit()'d one.
80
+ const LIVE_MODELS = new Set();
81
+ let exitHookInstalled = false;
82
+ function ensureExitHook() {
83
+ if (exitHookInstalled)
84
+ return;
85
+ exitHookInstalled = true;
86
+ process.once("exit", () => {
87
+ // 1) Force-dispose any still-live model. dispose() runs synchronously and
88
+ // enqueues its session + model frees into PENDING_FREES (its deferred
89
+ // microtask won't run at exit, but the registration does). Snapshot
90
+ // first: dispose() removes the model from LIVE_MODELS as it goes.
91
+ for (const m of [...LIVE_MODELS]) {
92
+ try {
93
+ m.dispose();
94
+ }
95
+ catch {
96
+ /* best-effort teardown */
97
+ }
98
+ }
99
+ // 2) Flush every pending native free synchronously. Insertion order frees
100
+ // sessions before their model, honoring the C contract that a model
101
+ // outlives its sessions. The lease release (`after`) is intentionally
102
+ // skipped — the process is exiting, so only the native frees matter.
103
+ for (const free of PENDING_FREES) {
104
+ try {
105
+ free();
106
+ }
107
+ catch {
108
+ /* best-effort teardown; a native free cannot meaningfully fail */
109
+ }
110
+ }
111
+ PENDING_FREES.clear();
112
+ });
113
+ }
114
+ /**
115
+ * Run a native free/reset behind the model lock, after any in-flight (and
116
+ * queued) worker call drains, so it never overlaps a compute on a libuv worker.
117
+ * `after` (e.g. the compute-lease release) runs in the SAME queued slot, after
118
+ * the native teardown — so an op queued earlier still observes the lease held /
119
+ * the stream un-reset until the native state has actually been torn down.
120
+ * Best-effort: errors are swallowed (a free cannot meaningfully fail) and the
121
+ * floating promise is marked intentional with `void`.
122
+ *
123
+ * The native free is also registered in PENDING_FREES so a `process.exit()` that
124
+ * skips the microtask queue still tears the resource down at exit (see above).
125
+ */
126
+ function deferFree(lock, fn, after) {
127
+ let done = false;
128
+ const free = () => {
129
+ if (done)
130
+ return;
131
+ done = true;
132
+ PENDING_FREES.delete(free);
133
+ fn();
134
+ };
135
+ PENDING_FREES.add(free);
136
+ ensureExitHook();
137
+ void lock.run(async () => {
138
+ try {
139
+ free();
140
+ }
141
+ catch {
142
+ /* native free is infallible in practice; nothing to recover */
143
+ }
144
+ after?.();
145
+ });
146
+ }
147
+ function callAsync(fn, ...args) {
148
+ return new Promise((resolve, reject) => fn.async(...args, (err, res) => (err ? reject(err) : resolve(res))));
149
+ }
150
+ // Coerce caller PCM to a Float32Array. A Float32Array is returned AS-IS (no
151
+ // copy): the buffer is borrowed across the async native call, which reads it on
152
+ // a worker thread, so callers must not mutate it until the promise resolves
153
+ // (documented on run/runBatch/feed). Other inputs already produce a fresh array.
154
+ function toFloat32(pcm) {
155
+ let out;
156
+ if (pcm instanceof Float32Array)
157
+ out = pcm;
158
+ else if (Array.isArray(pcm))
159
+ out = Float32Array.from(pcm);
160
+ else if (pcm instanceof ArrayBuffer)
161
+ out = new Float32Array(pcm);
162
+ else if (ArrayBuffer.isView(pcm)) {
163
+ const b = pcm;
164
+ if (b.byteLength % 4 !== 0) {
165
+ throw new TranscribeError("PCM byte length must be a multiple of 4 (float32)");
166
+ }
167
+ out = new Float32Array(b.buffer, b.byteOffset, b.byteLength / 4);
168
+ }
169
+ else {
170
+ throw new TranscribeError("PCM must be a Float32Array, number[], ArrayBuffer, or Buffer");
171
+ }
172
+ if (out.length === 0)
173
+ throw new TranscribeError("PCM is empty");
174
+ return out;
175
+ }
176
+ // koffi decodes int64 struct fields as bigint. We surface them as number for
177
+ // ergonomics; the fields this is used on (millisecond timestamps, kv byte caps)
178
+ // stay well under Number.MAX_SAFE_INTEGER, so the narrowing is lossless in
179
+ // practice. Revisit if a field can exceed 2^53.
180
+ function num(v) {
181
+ return typeof v === "bigint" ? Number(v) : v;
182
+ }
183
+ /**
184
+ * A FIFO async mutex plus the model-wide compute lease. One per Model.
6
185
  *
7
- * @see https://github.com/handy-computer/transcribe.cpp
186
+ * `run()` serializes every native compute call (run, batch, stream
187
+ * feed/finalize) so they never overlap in time, even when koffi `.async()` puts
188
+ * work on libuv workers. But the C contract is stronger: at most one
189
+ * run/batch/*active stream* may be in flight per model across all sessions, and
190
+ * an active stream occupies the model for its whole lifetime — not just during
191
+ * a feed. `streamActive` is that lease: it is claimed at stream begin and held
192
+ * until finalize/reset, and run/batch/stream refuse (Busy) while it is set.
193
+ */
194
+ class Mutex {
195
+ #tail = Promise.resolve();
196
+ /** True while an active stream holds the model's single compute slot. */
197
+ streamActive = false;
198
+ run(fn) {
199
+ const prev = this.#tail;
200
+ let release;
201
+ this.#tail = new Promise((r) => (release = r));
202
+ return prev.then(fn).finally(release);
203
+ }
204
+ }
205
+ // ---- module-level introspection -------------------------------------------
206
+ //
207
+ // Note: these (like every public entry point) trigger the lazy native bootstrap
208
+ // on first call — they dlopen the library, verify the ABI, and init backends.
209
+ // There is no way to probe the binding without loading the native library, with
210
+ // one exception: `headerHash` below is the compile-time PUBLIC_HEADER_HASH and
211
+ // is the value the binding *expects*, not one read from the loaded library.
212
+ export function version() {
213
+ const n = native();
214
+ return { version: n.F.version(), commit: n.F.versionCommit(), headerHash: g.PUBLIC_HEADER_HASH };
215
+ }
216
+ export function libraryPath() {
217
+ return native().libraryPath;
218
+ }
219
+ const DEVICE_TYPE_NAMES = {
220
+ [g.TRANSCRIBE_DEVICE_TYPE_CPU]: "cpu",
221
+ [g.TRANSCRIBE_DEVICE_TYPE_GPU]: "gpu",
222
+ [g.TRANSCRIBE_DEVICE_TYPE_IGPU]: "igpu",
223
+ [g.TRANSCRIBE_DEVICE_TYPE_ACCEL]: "accel",
224
+ };
225
+ // Decode a koffi-filled transcribe_backend_device struct into a BackendInfo.
226
+ // memory_* are uint64 (bigint from koffi) but stay well under 2^53 for any
227
+ // real device, so num() narrows them losslessly.
228
+ function deviceFromRaw(dev, index = null) {
229
+ return {
230
+ name: dev.name ?? "",
231
+ description: dev.description ?? "",
232
+ kind: dev.kind ?? "",
233
+ deviceType: DEVICE_TYPE_NAMES[dev.device_type] ?? "unknown",
234
+ deviceId: dev.device_id ?? null,
235
+ memoryTotal: num(dev.memory_total),
236
+ memoryFree: num(dev.memory_free),
237
+ index,
238
+ };
239
+ }
240
+ export function getAvailableBackends() {
241
+ const n = native();
242
+ const count = n.F.backendDeviceCount();
243
+ const out = [];
244
+ for (let i = 0; i < count; i++) {
245
+ const dev = {};
246
+ n.F.backendDeviceInit(dev);
247
+ check(n, n.F.getBackendDevice(i, dev), `reading backend device ${i}`);
248
+ out.push(deviceFromRaw(dev, i));
249
+ }
250
+ return out;
251
+ }
252
+ export function backendAvailable(backend) {
253
+ const n = native();
254
+ return n.F.backendAvailable(lookup(BACKENDS, backend, "backend"));
255
+ }
256
+ function singleAccessors(n, h) {
257
+ const F = n.F;
258
+ return {
259
+ nSegments: () => F.nSegments(h),
260
+ getSegment: (j, o) => F.getSegment(h, j, o),
261
+ nWords: () => F.nWords(h),
262
+ getWord: (j, o) => F.getWord(h, j, o),
263
+ nTokens: () => F.nTokens(h),
264
+ getToken: (j, o) => F.getToken(h, j, o),
265
+ getTimings: (o) => F.getTimings(h, o),
266
+ fullText: () => F.fullText(h),
267
+ detectedLanguage: () => F.detectedLanguage(h),
268
+ returnedTimestampKind: () => F.returnedTimestampKind(h),
269
+ };
270
+ }
271
+ function batchAccessors(n, h, i) {
272
+ const F = n.F;
273
+ return {
274
+ nSegments: () => F.batchNSegments(h, i),
275
+ getSegment: (j, o) => F.batchGetSegment(h, i, j, o),
276
+ nWords: () => F.batchNWords(h, i),
277
+ getWord: (j, o) => F.batchGetWord(h, i, j, o),
278
+ nTokens: () => F.batchNTokens(h, i),
279
+ getToken: (j, o) => F.batchGetToken(h, i, j, o),
280
+ getTimings: (o) => F.batchGetTimings(h, i, o),
281
+ fullText: () => F.batchFullText(h, i),
282
+ detectedLanguage: () => F.batchDetectedLanguage(h, i),
283
+ returnedTimestampKind: () => F.batchReturnedTimestampKind(h, i),
284
+ };
285
+ }
286
+ function materialize(n, acc) {
287
+ const F = n.F;
288
+ const segments = [];
289
+ for (let i = 0, c = acc.nSegments(); i < c; i++) {
290
+ const s = {};
291
+ F.segmentInit(s);
292
+ check(n, acc.getSegment(i, s), `reading segment ${i}`);
293
+ segments.push({
294
+ text: s.text ?? "",
295
+ t0Ms: num(s.t0_ms),
296
+ t1Ms: num(s.t1_ms),
297
+ firstWord: s.first_word,
298
+ nWords: s.n_words,
299
+ firstToken: s.first_token,
300
+ nTokens: s.n_tokens,
301
+ });
302
+ }
303
+ const words = [];
304
+ for (let i = 0, c = acc.nWords(); i < c; i++) {
305
+ const w = {};
306
+ F.wordInit(w);
307
+ check(n, acc.getWord(i, w), `reading word ${i}`);
308
+ words.push({
309
+ text: w.text ?? "",
310
+ t0Ms: num(w.t0_ms),
311
+ t1Ms: num(w.t1_ms),
312
+ segIndex: w.seg_index,
313
+ firstToken: w.first_token,
314
+ nTokens: w.n_tokens,
315
+ });
316
+ }
317
+ const tokens = [];
318
+ for (let i = 0, c = acc.nTokens(); i < c; i++) {
319
+ const t = {};
320
+ F.tokenInit(t);
321
+ check(n, acc.getToken(i, t), `reading token ${i}`);
322
+ tokens.push({
323
+ text: t.text ?? "",
324
+ id: t.id,
325
+ p: t.p,
326
+ t0Ms: num(t.t0_ms),
327
+ t1Ms: num(t.t1_ms),
328
+ segIndex: t.seg_index,
329
+ wordIndex: t.word_index,
330
+ });
331
+ }
332
+ const tm = {};
333
+ F.timingsInit(tm);
334
+ check(n, acc.getTimings(tm), "reading timings");
335
+ const timings = {
336
+ loadMs: tm.load_ms,
337
+ melMs: tm.mel_ms,
338
+ encodeMs: tm.encode_ms,
339
+ decodeMs: tm.decode_ms,
340
+ };
341
+ return {
342
+ text: acc.fullText() ?? "",
343
+ language: acc.detectedLanguage() ?? "",
344
+ timestampKind: TIMESTAMP_NAMES[acc.returnedTimestampKind()] ?? "none",
345
+ segments,
346
+ words,
347
+ tokens,
348
+ timings,
349
+ };
350
+ }
351
+ // ---- family extensions -----------------------------------------------------
352
+ const COMMIT_POLICIES = {
353
+ auto: g.TRANSCRIBE_STREAM_COMMIT_AUTO,
354
+ on_finalize: g.TRANSCRIBE_STREAM_COMMIT_ON_FINALIZE,
355
+ stable_prefix: g.TRANSCRIBE_STREAM_COMMIT_STABLE_PREFIX,
356
+ };
357
+ const STREAM_STATES = {
358
+ [g.TRANSCRIBE_STREAM_IDLE]: "idle",
359
+ [g.TRANSCRIBE_STREAM_ACTIVE]: "active",
360
+ [g.TRANSCRIBE_STREAM_FINISHED]: "finished",
361
+ [g.TRANSCRIBE_STREAM_FAILED]: "failed",
362
+ };
363
+ const SLOT = {
364
+ run: g.TRANSCRIBE_EXT_SLOT_RUN,
365
+ stream: g.TRANSCRIBE_EXT_SLOT_STREAM,
366
+ };
367
+ const FAMILY = {
368
+ whisper: {
369
+ slot: "run",
370
+ kind: g.TRANSCRIBE_EXT_KIND_WHISPER_RUN,
371
+ type: "transcribe_whisper_run_ext",
372
+ init: "whisperRunExtInit",
373
+ map: (o) => ({
374
+ initial_prompt: o.initialPrompt,
375
+ condition_on_prev_tokens: o.conditionOnPrevTokens,
376
+ temperature: o.temperature,
377
+ temperature_inc: o.temperatureInc,
378
+ compression_ratio_thold: o.compressionRatioThold,
379
+ logprob_thold: o.logprobThold,
380
+ no_speech_thold: o.noSpeechThold,
381
+ max_prev_context_tokens: o.maxPrevContextTokens,
382
+ seed: o.seed,
383
+ max_initial_timestamp: o.maxInitialTimestamp,
384
+ }),
385
+ },
386
+ moonshine: {
387
+ slot: "stream",
388
+ kind: g.TRANSCRIBE_EXT_KIND_MOONSHINE_STREAMING_STREAM,
389
+ type: "transcribe_moonshine_streaming_stream_ext",
390
+ init: "moonshineStreamingStreamExtInit",
391
+ map: (o) => ({ min_decode_interval_ms: o.minDecodeIntervalMs }),
392
+ },
393
+ parakeet: {
394
+ slot: "stream",
395
+ kind: g.TRANSCRIBE_EXT_KIND_PARAKEET_STREAM,
396
+ type: "transcribe_parakeet_stream_ext",
397
+ init: "parakeetStreamExtInit",
398
+ map: (o) => ({ att_context_right: o.attContextRight }),
399
+ },
400
+ parakeet_buffered: {
401
+ slot: "stream",
402
+ kind: g.TRANSCRIBE_EXT_KIND_PARAKEET_BUFFERED_STREAM,
403
+ type: "transcribe_parakeet_buffered_stream_ext",
404
+ init: "parakeetBufferedStreamExtInit",
405
+ map: (o) => ({ left_ms: o.leftMs, chunk_ms: o.chunkMs, right_ms: o.rightMs }),
406
+ },
407
+ voxtral: {
408
+ slot: "stream",
409
+ kind: g.TRANSCRIBE_EXT_KIND_VOXTRAL_REALTIME_STREAM,
410
+ type: "transcribe_voxtral_realtime_stream_ext",
411
+ init: "voxtralRealtimeStreamExtInit",
412
+ map: (o) => ({
413
+ num_delay_tokens: o.numDelayTokens,
414
+ min_decode_interval_ms: o.minDecodeIntervalMs,
415
+ }),
416
+ },
417
+ };
418
+ /**
419
+ * Build a native ext-struct buffer for a family extension and return the koffi
420
+ * pointer to assign to `params.family`. Validates the slot and that the model
421
+ * accepts the kind. The returned buffer must be kept alive (held via the params
422
+ * object) until the native call returns.
423
+ */
424
+ function buildFamily(n, modelHandle, family, slot) {
425
+ const reg = FAMILY[family.kind];
426
+ if (!reg)
427
+ throw new InvalidArgument(`unknown family extension kind ${JSON.stringify(family.kind)}`);
428
+ if (reg.slot !== slot) {
429
+ throw new InvalidArgument(`family "${family.kind}" is a ${reg.slot} extension, not valid for a ${slot} call`);
430
+ }
431
+ if (!n.F.modelAcceptsExtKind(modelHandle, SLOT[reg.slot], reg.kind)) {
432
+ throw new UnsupportedRequest(`this model does not accept the "${family.kind}" ${reg.slot} extension`);
433
+ }
434
+ const ext = {};
435
+ n.F[reg.init](ext); // defaults + struct_size + kind
436
+ for (const [k, v] of Object.entries(reg.map(family))) {
437
+ if (v !== undefined)
438
+ ext[k] = v;
439
+ }
440
+ const buf = n.koffi.alloc(n.T[reg.type], 1);
441
+ n.koffi.encode(buf, n.T[reg.type], ext);
442
+ return buf;
443
+ }
444
+ function toStreamUpdate(u) {
445
+ return {
446
+ resultChanged: u.result_changed,
447
+ isFinal: u.is_final,
448
+ revision: u.revision,
449
+ inputReceivedMs: num(u.input_received_ms),
450
+ audioCommittedMs: num(u.audio_committed_ms),
451
+ bufferedMs: num(u.buffered_ms),
452
+ committedChanged: u.committed_changed,
453
+ tentativeChanged: u.tentative_changed,
454
+ };
455
+ }
456
+ /**
457
+ * Module-private teardown control for each Stream, keyed by the instance. These
458
+ * ops mutate the stream's `#active`/lease state, so they must NOT be reachable
459
+ * from user code: calling the lease release on a live stream would clear the
460
+ * model-wide lease and let a sibling run/stream overlap it — the exact race the
461
+ * lease prevents. Public `@internal` is only a type hint (the method still exists
462
+ * at runtime), so the control surface lives here instead, reached only by the
463
+ * owning Session within this module. The closures capture the Stream; WeakMap
464
+ * ephemeron semantics keep that from pinning it in memory.
8
465
  */
9
- /** Version of this placeholder package. */
10
- export const version = "0.0.0";
466
+ const STREAM_TEARDOWN = new WeakMap();
467
+ const SESSION_CONTROL = new WeakMap();
468
+ // ---- Session ---------------------------------------------------------------
469
+ export class Session {
470
+ #n;
471
+ #h;
472
+ #model; // keep the model alive while this session lives
473
+ #lock; // shared with the model; serializes compute model-wide
474
+ #untrack; // drop self from the model's session set
475
+ #inFlight = null; // set while a native call runs on a worker
476
+ #activeStream = null; // current wrapper for the session's native stream slot
477
+ #disposed = false;
478
+ /** @internal */
479
+ constructor(n, model, handle, lock, untrack) {
480
+ this.#n = n;
481
+ this.#model = model;
482
+ this.#h = handle;
483
+ this.#lock = lock;
484
+ this.#untrack = untrack;
485
+ SESSION_CONTROL.set(this, {
486
+ enterCompute: (kind) => {
487
+ this.#inFlight = kind;
488
+ },
489
+ leaveCompute: (kind) => {
490
+ if (this.#inFlight === kind)
491
+ this.#inFlight = null;
492
+ },
493
+ isCurrentStream: (stream) => this.#activeStream === stream,
494
+ replaceCurrentStream: (stream) => {
495
+ if (this.#activeStream && this.#activeStream !== stream) {
496
+ STREAM_TEARDOWN.get(this.#activeStream)?.invalidate();
497
+ }
498
+ this.#activeStream = stream;
499
+ },
500
+ clearCurrentStream: (stream) => {
501
+ if (this.#activeStream === stream)
502
+ this.#activeStream = null;
503
+ },
504
+ });
505
+ }
506
+ /** @internal */
507
+ get handle() {
508
+ if (this.#disposed)
509
+ throw new TranscribeError("session has been disposed");
510
+ return this.#h;
511
+ }
512
+ /** Reads touch the session; forbidden while a worker call is in flight. */
513
+ #assertNotComputing(what) {
514
+ if (this.#inFlight) {
515
+ throw new TranscribeError(`cannot read session ${what} while ${this.#inFlight} is in flight; await it first`);
516
+ }
517
+ }
518
+ get limits() {
519
+ this.#assertNotComputing("limits");
520
+ const n = this.#n;
521
+ const l = {};
522
+ n.F.sessionLimitsInit(l);
523
+ check(n, n.F.sessionGetLimits(this.handle, l), "reading session limits");
524
+ return {
525
+ effectiveNCtx: l.effective_n_ctx,
526
+ effectiveMaxAudioMs: num(l.effective_max_audio_ms),
527
+ maxKvBytes: num(l.max_kv_bytes),
528
+ };
529
+ }
530
+ /**
531
+ * Transcribe one clip. The input PCM is borrowed, not copied: native code
532
+ * reads it on a worker thread while this runs, so do not mutate the buffer
533
+ * (e.g. reuse a scratch array) until the returned promise resolves.
534
+ */
535
+ async run(pcm, opts = {}) {
536
+ const n = this.#n;
537
+ const F = n.F;
538
+ const h = this.handle;
539
+ const samples = toFloat32(pcm);
540
+ const p = this.#buildRunParams(opts);
541
+ return this.#lock.run(async () => {
542
+ if (this.#disposed)
543
+ throw new TranscribeError("session has been disposed");
544
+ if (this.#lock.streamActive)
545
+ throw busyError("run");
546
+ const cancel = this.#installAbort(opts.signal);
547
+ let status;
548
+ this.#inFlight = "run()";
549
+ try {
550
+ status = await callAsync(F.run, h, samples, samples.length, p);
551
+ }
552
+ finally {
553
+ this.#inFlight = null;
554
+ cancel?.();
555
+ }
556
+ if (status === g.TRANSCRIBE_ERR_ABORTED || status === g.TRANSCRIBE_ERR_OUTPUT_TRUNCATED) {
557
+ const partial = {
558
+ ...materialize(n, singleAccessors(n, h)),
559
+ aborted: F.wasAborted(h),
560
+ truncated: F.wasTruncated(h),
561
+ };
562
+ const exc = status === g.TRANSCRIBE_ERR_ABORTED
563
+ ? new Aborted(`run aborted`, status)
564
+ : new OutputTruncated(`run output truncated`, status);
565
+ exc.partialResult = partial;
566
+ throw exc;
567
+ }
568
+ check(n, status, "transcribe_run");
569
+ return { ...materialize(n, singleAccessors(n, h)), aborted: F.wasAborted(h), truncated: F.wasTruncated(h) };
570
+ });
571
+ }
572
+ #buildRunParams(opts) {
573
+ const n = this.#n;
574
+ const p = {};
575
+ n.F.runParamsInit(p);
576
+ p.task = lookup(TASKS, opts.task ?? "transcribe", "task");
577
+ p.timestamps = lookup(TIMESTAMPS, opts.timestamps ?? "none", "timestamps");
578
+ if (opts.language !== undefined)
579
+ p.language = opts.language;
580
+ if (opts.targetLanguage !== undefined)
581
+ p.target_language = opts.targetLanguage;
582
+ if (opts.keepSpecialTags !== undefined)
583
+ p.keep_special_tags = opts.keepSpecialTags;
584
+ if (opts.specKDrafts !== undefined)
585
+ p.spec_k_drafts = opts.specKDrafts;
586
+ if (opts.family)
587
+ p.family = buildFamily(n, this.#model.handle, opts.family, "run");
588
+ return p;
589
+ }
590
+ /**
591
+ * Offline batch transcription; each item carries its own success/failure.
592
+ * Inputs are borrowed, not copied: native code reads them on a worker thread
593
+ * while this runs, so do not mutate them until the returned promise resolves.
594
+ */
595
+ async runBatch(pcms, opts = {}) {
596
+ const n = this.#n;
597
+ const F = n.F;
598
+ const h = this.handle;
599
+ if (pcms.length === 0)
600
+ throw new InvalidArgument("runBatch requires at least one input");
601
+ const arrays = pcms.map(toFloat32);
602
+ const counts = Int32Array.from(arrays, (a) => a.length);
603
+ const p = this.#buildRunParams(opts);
604
+ return this.#lock.run(async () => {
605
+ if (this.#disposed)
606
+ throw new TranscribeError("session has been disposed");
607
+ if (this.#lock.streamActive)
608
+ throw busyError("runBatch");
609
+ const cancel = this.#installAbort(opts.signal);
610
+ let status;
611
+ this.#inFlight = "runBatch()";
612
+ try {
613
+ status = await callAsync(F.runBatch, h, arrays, counts, arrays.length, p);
614
+ }
615
+ finally {
616
+ this.#inFlight = null;
617
+ cancel?.();
618
+ }
619
+ // A batch returns OK even with per-utterance failures; only a top-level
620
+ // error (or a whole-batch abort) is fatal here.
621
+ if (status !== g.TRANSCRIBE_OK && status !== g.TRANSCRIBE_ERR_ABORTED) {
622
+ check(n, status, "transcribe_run_batch");
623
+ }
624
+ const out = [];
625
+ for (let i = 0, c = F.batchNResults(h); i < c; i++) {
626
+ const st = F.batchStatus(h, i);
627
+ if (st === g.TRANSCRIBE_OK) {
628
+ out.push({
629
+ ok: true,
630
+ result: { ...materialize(n, batchAccessors(n, h, i)), aborted: false, truncated: false },
631
+ });
632
+ }
633
+ else {
634
+ const error = exceptionForStatus(st, F.statusString(st), `utterance ${i}`);
635
+ error.utteranceIndex = i;
636
+ if (st === g.TRANSCRIBE_ERR_ABORTED || st === g.TRANSCRIBE_ERR_OUTPUT_TRUNCATED) {
637
+ error.partialResult = {
638
+ ...materialize(n, batchAccessors(n, h, i)),
639
+ aborted: st === g.TRANSCRIBE_ERR_ABORTED,
640
+ truncated: st === g.TRANSCRIBE_ERR_OUTPUT_TRUNCATED,
641
+ };
642
+ }
643
+ out.push({ ok: false, error });
644
+ }
645
+ }
646
+ return out;
647
+ });
648
+ }
649
+ /** Begin a streaming session. The returned Stream owns the begin params. */
650
+ async stream(opts = {}) {
651
+ const n = this.#n;
652
+ const F = n.F;
653
+ const h = this.handle;
654
+ const rp = this.#buildRunParams({
655
+ task: opts.task,
656
+ language: opts.language,
657
+ targetLanguage: opts.targetLanguage,
658
+ timestamps: opts.timestamps,
659
+ keepSpecialTags: opts.keepSpecialTags,
660
+ specKDrafts: -1,
661
+ });
662
+ const sp = {};
663
+ F.streamParamsInit(sp);
664
+ sp.commit_policy = lookup(COMMIT_POLICIES, opts.commitPolicy ?? "auto", "commitPolicy");
665
+ if (opts.stablePrefixAgreementN !== undefined) {
666
+ sp.stable_prefix_agreement_n = opts.stablePrefixAgreementN;
667
+ }
668
+ if (opts.family)
669
+ sp.family = buildFamily(n, this.#model.handle, opts.family, "stream");
670
+ return this.#lock.run(async () => {
671
+ // Recheck inside the lock: dispose() may have run after we captured `h`
672
+ // but before this queued body — don't begin a stream on a dead session.
673
+ if (this.#disposed)
674
+ throw new TranscribeError("session has been disposed");
675
+ if (this.#lock.streamActive)
676
+ throw busyError("begin a stream");
677
+ check(n, F.streamBegin(h, rp, sp), "transcribe_stream_begin");
678
+ this.#lock.streamActive = true; // claim the lease for the whole stream lifetime
679
+ // The Stream holds the Session (not a raw handle) so its calls fail fast
680
+ // once the session is disposed, and so dispose() can find and invalidate it.
681
+ const stream = new Stream(n, this, this.#lock, [rp, sp]); // pin params until reset
682
+ const control = SESSION_CONTROL.get(this);
683
+ if (!control)
684
+ throw new TranscribeError("session control is missing");
685
+ control.replaceCurrentStream(stream);
686
+ return stream;
687
+ });
688
+ }
689
+ /**
690
+ * Wire an AbortSignal to a native abort callback for one run. The callback is
691
+ * installed on *this session's* handle, but install/run/uninstall is only safe
692
+ * because every caller holds the model-wide #lock for the whole run — the lock,
693
+ * not the per-session handle, is what guarantees no run overlaps the window
694
+ * between setAbortCallback(cb) and setAbortCallback(null). A future change that
695
+ * relaxes the lock must keep this install/uninstall paired within one run.
696
+ */
697
+ #installAbort(signal) {
698
+ if (!signal)
699
+ return null;
700
+ const n = this.#n;
701
+ const flag = { aborted: signal.aborted };
702
+ const onAbort = () => {
703
+ flag.aborted = true;
704
+ };
705
+ signal.addEventListener("abort", onAbort, { once: true });
706
+ const cbPtr = n.koffi.register(() => flag.aborted, n.koffi.pointer(n.abortProto));
707
+ n.F.setAbortCallback(this.handle, cbPtr, null);
708
+ return () => {
709
+ n.F.setAbortCallback(this.handle, null, null);
710
+ n.koffi.unregister(cbPtr);
711
+ signal.removeEventListener("abort", onAbort);
712
+ };
713
+ }
714
+ get wasAborted() {
715
+ this.#assertNotComputing("wasAborted");
716
+ return this.#n.F.wasAborted(this.handle);
717
+ }
718
+ dispose() {
719
+ if (this.#disposed)
720
+ return;
721
+ this.#disposed = true;
722
+ this.#untrack(this); // stop the model from holding a dead session
723
+ // Deactivate any live stream NOW (its reset() no-ops, reads throw via the
724
+ // disposed handle), but release its model lease only inside the deferred
725
+ // teardown, after sessionFree — so a stream/run queued ahead of this can't
726
+ // claim the slot before the native session is actually gone.
727
+ const stream = this.#activeStream;
728
+ this.#activeStream = null;
729
+ const teardown = stream ? STREAM_TEARDOWN.get(stream) : undefined;
730
+ teardown?.deactivate();
731
+ // Free behind the model lock: a run/feed worker may still hold this handle,
732
+ // and sessionFree mid-call is a use-after-free. Queuing on the FIFO lock
733
+ // runs the free after any in-flight (and queued) compute drains. The JS-side
734
+ // guard is already synchronous (#disposed/handle), so use-after-dispose
735
+ // still throws immediately; only the native free + lease release are deferred.
736
+ const n = this.#n;
737
+ const h = this.#h;
738
+ this.#h = null;
739
+ deferFree(this.#lock, () => n.F.sessionFree(h), () => teardown?.releaseLease());
740
+ }
741
+ [Symbol.dispose]() {
742
+ this.dispose();
743
+ }
744
+ }
745
+ // ---- Stream ----------------------------------------------------------------
746
+ export class Stream {
747
+ #n;
748
+ #session; // owns the native handle; throws once disposed (no stale handle)
749
+ #lock;
750
+ #sessionControl;
751
+ #keepalive;
752
+ #active = true;
753
+ #stale = false; // true once the session has begun a newer native stream
754
+ #inFlight = false; // true while a feed/finalize native call runs on a worker
755
+ #holdsLease = true; // born holding the model's compute lease (claimed at begin)
756
+ /** @internal */
757
+ constructor(n, session, lock, keepalive) {
758
+ this.#n = n;
759
+ this.#session = session;
760
+ this.#lock = lock;
761
+ const sessionControl = SESSION_CONTROL.get(session);
762
+ if (!sessionControl)
763
+ throw new TranscribeError("session control is missing");
764
+ this.#sessionControl = sessionControl;
765
+ this.#keepalive = keepalive;
766
+ // Expose the teardown surface ONLY to the owning Session, via the
767
+ // module-private map — never as public methods (see STREAM_TEARDOWN).
768
+ STREAM_TEARDOWN.set(this, {
769
+ // Synchronous deactivation on Session dispose: a later reset() no-ops and
770
+ // reads fail fast via the disposed handle, so nothing touches freed memory.
771
+ deactivate: () => {
772
+ this.#active = false;
773
+ this.#keepalive = null;
774
+ },
775
+ invalidate: () => {
776
+ this.#stale = true;
777
+ this.#keepalive = null;
778
+ },
779
+ // Release the lease (guarded); the Session calls this inside its deferred
780
+ // teardown, after sessionFree, so the slot frees in FIFO order.
781
+ releaseLease: () => this.#releaseLease(),
782
+ });
783
+ }
784
+ /**
785
+ * Release the model's compute lease, once, if this stream still holds it. The
786
+ * `#holdsLease` guard ensures a reset() after finalize() (or a double reset)
787
+ * never clears a lease that another session has since claimed.
788
+ */
789
+ #releaseLease() {
790
+ if (this.#holdsLease) {
791
+ this.#holdsLease = false;
792
+ this.#lock.streamActive = false;
793
+ }
794
+ }
795
+ #assertCurrent(what) {
796
+ if (this.#stale) {
797
+ throw new TranscribeError(`cannot ${what}: stream is no longer current for this session`);
798
+ }
799
+ }
800
+ /**
801
+ * Feed one chunk of PCM; returns the update snapshot. The chunk is borrowed,
802
+ * not copied: native code reads it on a worker thread while the feed runs, so
803
+ * do not mutate it (e.g. reuse a capture buffer) until the promise resolves.
804
+ */
805
+ async feed(pcm) {
806
+ const n = this.#n;
807
+ const h = this.#session.handle; // throws if the session was disposed
808
+ this.#assertCurrent("feed");
809
+ if (!this.#active)
810
+ throw new TranscribeError("stream has been reset");
811
+ const samples = toFloat32(pcm);
812
+ return this.#lock.run(async () => {
813
+ const u = {};
814
+ n.F.streamUpdateInit(u);
815
+ // The native feed runs on a libuv worker. While it is in flight the
816
+ // session must not be touched from the main thread — the C session API
817
+ // is single-threaded (transcribe.h), and stream_get_text hands back
818
+ // pointers the feed may free/realloc. Flag it so the read getters fail
819
+ // fast instead of racing into a use-after-free.
820
+ this.#inFlight = true;
821
+ this.#sessionControl.enterCompute("feed()/finalize()");
822
+ try {
823
+ const status = await callAsync(n.F.streamFeed, h, samples, samples.length, u);
824
+ if (status !== g.TRANSCRIBE_OK) {
825
+ // Native feed failures leave the stream in FAILED, which is no longer
826
+ // an active stream in the C API. Keep the wrapper readable for
827
+ // state/lastStatus, but free the model-wide compute slot.
828
+ this.#releaseLease();
829
+ }
830
+ check(n, status, "transcribe_stream_feed");
831
+ }
832
+ finally {
833
+ this.#inFlight = false;
834
+ this.#sessionControl.leaveCompute("feed()/finalize()");
835
+ }
836
+ return toStreamUpdate(u);
837
+ });
838
+ }
839
+ /** Flush remaining audio and commit the final text. */
840
+ async finalize() {
841
+ const n = this.#n;
842
+ const h = this.#session.handle; // throws if the session was disposed
843
+ this.#assertCurrent("finalize stream");
844
+ if (!this.#active)
845
+ throw new TranscribeError("stream has been reset");
846
+ return this.#lock.run(async () => {
847
+ const u = {};
848
+ n.F.streamUpdateInit(u);
849
+ this.#inFlight = true; // see feed(): worker-thread compute, no concurrent reads
850
+ this.#sessionControl.enterCompute("feed()/finalize()");
851
+ try {
852
+ check(n, await callAsync(n.F.streamFinalize, h, u), "transcribe_stream_finalize");
853
+ }
854
+ finally {
855
+ this.#inFlight = false;
856
+ this.#sessionControl.leaveCompute("feed()/finalize()");
857
+ // Finalize ends the active stream (FINISHED on success, FAILED on
858
+ // error), so the model is free again — release the lease either way.
859
+ this.#releaseLease();
860
+ }
861
+ return toStreamUpdate(u);
862
+ });
863
+ }
864
+ /**
865
+ * Reads borrow session-owned snapshot memory, so they are forbidden while a
866
+ * feed()/finalize() is computing on a worker thread (concurrent use of a
867
+ * single session is undefined per transcribe.h). The natural pattern —
868
+ * `await stream.feed(chunk)` then read — is unaffected; this only rejects a
869
+ * read issued against an un-awaited feed.
870
+ */
871
+ #assertNotFeeding(what) {
872
+ if (this.#inFlight) {
873
+ throw new TranscribeError(`cannot read stream ${what} while a feed()/finalize() is in flight; await it first`);
874
+ }
875
+ }
876
+ /** Current text snapshot (copied at the boundary). */
877
+ get text() {
878
+ const h = this.#session.handle; // throws if the session was disposed
879
+ this.#assertCurrent("read stream text");
880
+ this.#assertNotFeeding("text");
881
+ const n = this.#n;
882
+ const t = {};
883
+ n.F.streamTextInit(t);
884
+ check(n, n.F.streamGetText(h, t), "transcribe_stream_get_text");
885
+ return {
886
+ full: t.full_text ?? "",
887
+ committed: t.committed_text ?? "",
888
+ tentative: t.tentative_text ?? "",
889
+ };
890
+ }
891
+ get state() {
892
+ const h = this.#session.handle; // throws if the session was disposed
893
+ this.#assertCurrent("read stream state");
894
+ if (!this.#active)
895
+ return "idle"; // reset() returns to idle; native reset may still be queued
896
+ this.#assertNotFeeding("state");
897
+ return STREAM_STATES[this.#n.F.streamGetState(h)] ?? "idle";
898
+ }
899
+ get revision() {
900
+ const h = this.#session.handle; // throws if the session was disposed
901
+ this.#assertCurrent("read stream revision");
902
+ this.#assertNotFeeding("revision");
903
+ return this.#n.F.streamRevision(h);
904
+ }
905
+ /**
906
+ * The stream's recorded terminal failure, or `null` while it is healthy. Set
907
+ * after a feed()/finalize() transitions the stream to `"failed"`; reset by a
908
+ * new stream. Inspect it when `state === "failed"`.
909
+ */
910
+ get lastStatus() {
911
+ const h = this.#session.handle; // throws if the session was disposed
912
+ this.#assertCurrent("read stream lastStatus");
913
+ this.#assertNotFeeding("lastStatus");
914
+ const n = this.#n;
915
+ const status = n.F.streamLastStatus(h);
916
+ if (status === g.TRANSCRIBE_OK)
917
+ return null;
918
+ return exceptionForStatus(status, n.F.statusString(status), "stream");
919
+ }
920
+ /** End the stream and return the session to idle. Idempotent. */
921
+ reset() {
922
+ if (this.#stale)
923
+ return; // a newer stream owns the session slot now
924
+ if (!this.#active)
925
+ return; // already reset, finalized-and-reset, or invalidated
926
+ this.#active = false;
927
+ this.#keepalive = null;
928
+ // Defer BOTH the native reset and the lease release into one queued slot,
929
+ // behind any in-flight feed/finalize. Releasing the lease only after the
930
+ // native streamReset means a stream/run queued before this reset still sees
931
+ // the lease held — it cannot begin and overlap the not-yet-reset stream.
932
+ // #active was true, so the session is still alive (dispose() deactivates
933
+ // streams first), and the handle is valid here.
934
+ const n = this.#n;
935
+ const h = this.#session.handle;
936
+ deferFree(this.#lock, () => {
937
+ if (this.#sessionControl.isCurrentStream(this)) {
938
+ n.F.streamReset(h);
939
+ this.#sessionControl.clearCurrentStream(this);
940
+ }
941
+ }, () => this.#releaseLease());
942
+ }
943
+ [Symbol.dispose]() {
944
+ this.reset();
945
+ }
946
+ }
947
+ // ---- Model -----------------------------------------------------------------
948
+ export class TranscribeModel {
949
+ #n;
950
+ #h;
951
+ #disposed = false;
952
+ #sessions = new Set();
953
+ #lock = new Mutex(); // serializes compute across all sessions of this model
954
+ constructor(n, handle) {
955
+ this.#n = n;
956
+ this.#h = handle;
957
+ // Track the model so an undisposed-then-process.exit() still frees its
958
+ // native (Metal) buffers at exit (see ensureExitHook). Loading alone
959
+ // allocates weight buffers, so install the hook here, not just on dispose.
960
+ LIVE_MODELS.add(this);
961
+ ensureExitHook();
962
+ }
963
+ static async load(path, opts = {}) {
964
+ const n = native();
965
+ const p = {};
966
+ n.F.modelLoadParamsInit(p);
967
+ if (opts.backend)
968
+ p.backend = lookup(BACKENDS, opts.backend, "backend");
969
+ if (opts.gpuDevice !== undefined)
970
+ p.gpu_device = opts.gpuDevice;
971
+ const out = [null];
972
+ const st = await callAsync(n.F.modelLoadFile, path, p, out);
973
+ check(n, st, `loading model ${path}`);
974
+ if (!out[0])
975
+ throw new ModelLoadError(`model load returned a null handle for ${path}`);
976
+ return new TranscribeModel(n, out[0]);
977
+ }
978
+ /** @internal */
979
+ get handle() {
980
+ if (this.#disposed)
981
+ throw new TranscribeError("model has been disposed");
982
+ return this.#h;
983
+ }
984
+ createSession(opts = {}) {
985
+ const n = this.#n;
986
+ const p = {};
987
+ n.F.sessionParamsInit(p);
988
+ if (opts.nThreads !== undefined)
989
+ p.n_threads = opts.nThreads;
990
+ if (opts.kvType)
991
+ p.kv_type = lookup(KV_TYPES, opts.kvType, "kvType");
992
+ if (opts.nCtx !== undefined)
993
+ p.n_ctx = opts.nCtx;
994
+ const out = [null];
995
+ check(n, n.F.sessionInit(this.handle, p, out), "opening session");
996
+ if (!out[0])
997
+ throw new TranscribeError("session init returned a null handle");
998
+ const session = new Session(n, this, out[0], this.#lock, (s) => this.#sessions.delete(s));
999
+ this.#sessions.add(session);
1000
+ return session;
1001
+ }
1002
+ /** Convenience: one session, one run, disposed after. */
1003
+ async transcribe(pcm, opts = {}) {
1004
+ const session = this.createSession();
1005
+ try {
1006
+ return await session.run(pcm, opts);
1007
+ }
1008
+ finally {
1009
+ session.dispose(); // untracks itself from #sessions
1010
+ }
1011
+ }
1012
+ get capabilities() {
1013
+ const n = this.#n;
1014
+ const c = {};
1015
+ n.F.capabilitiesInit(c);
1016
+ check(n, n.F.modelGetCapabilities(this.handle, c), "reading capabilities");
1017
+ let languages = [];
1018
+ try {
1019
+ if (c.languages && c.n_languages > 0) {
1020
+ languages = n.koffi.decode(c.languages, "char *", c.n_languages);
1021
+ }
1022
+ }
1023
+ catch {
1024
+ languages = [];
1025
+ }
1026
+ return {
1027
+ nativeSampleRate: c.native_sample_rate,
1028
+ languages,
1029
+ maxTimestampKind: TIMESTAMP_NAMES[c.max_timestamp_kind] ?? "none",
1030
+ supportsLanguageDetect: c.supports_language_detect,
1031
+ supportsTranslate: c.supports_translate,
1032
+ supportsStreaming: c.supports_streaming,
1033
+ supportsSpecDecode: c.supports_spec_decode,
1034
+ maxAudioMs: num(c.max_audio_ms),
1035
+ };
1036
+ }
1037
+ supports(feature) {
1038
+ return this.#n.F.modelSupports(this.handle, lookup(FEATURES, feature, "feature"));
1039
+ }
1040
+ /** Whether this model accepts the given family extension on its slot. */
1041
+ accepts(family) {
1042
+ const reg = FAMILY[family.kind];
1043
+ if (!reg)
1044
+ return false;
1045
+ return this.#n.F.modelAcceptsExtKind(this.handle, SLOT[reg.slot], reg.kind);
1046
+ }
1047
+ /** Tokenize plain UTF-8 text into the model's vocabulary (no special tokens). */
1048
+ tokenize(text) {
1049
+ const F = this.#n.F;
1050
+ const INT_MIN = -2147483648;
1051
+ let cap = Math.max(16, text.length + 16);
1052
+ for (let attempt = 0; attempt < 4; attempt++) {
1053
+ const buf = new Int32Array(cap);
1054
+ const r = F.tokenize(this.handle, text, buf, cap);
1055
+ if (r === INT_MIN) {
1056
+ throw new NotImplementedByModel("this model's tokenizer does not support encode");
1057
+ }
1058
+ if (r >= 0)
1059
+ return buf.subarray(0, r);
1060
+ cap = -r; // buffer too small; -r is the count needed
1061
+ }
1062
+ throw new TranscribeError("tokenize did not converge");
1063
+ }
1064
+ get arch() {
1065
+ return this.#n.F.modelArch(this.handle) ?? "";
1066
+ }
1067
+ get variant() {
1068
+ return this.#n.F.modelVariant(this.handle) ?? "";
1069
+ }
1070
+ get backend() {
1071
+ return this.#n.F.modelBackend(this.handle) ?? "";
1072
+ }
1073
+ /** The compute device this model is running on. `memoryFree` is a live
1074
+ * snapshot, so read this again to poll how much device memory is left
1075
+ * after the model loaded. */
1076
+ get device() {
1077
+ const dev = {};
1078
+ this.#n.F.backendDeviceInit(dev);
1079
+ check(this.#n, this.#n.F.modelGetDevice(this.handle, dev), "reading model device");
1080
+ return deviceFromRaw(dev);
1081
+ }
1082
+ dispose() {
1083
+ if (this.#disposed)
1084
+ return;
1085
+ this.#disposed = true;
1086
+ LIVE_MODELS.delete(this); // its frees are now queued in PENDING_FREES
1087
+ // Snapshot: each dispose() untracks itself from #sessions as we go and
1088
+ // queues its native free on the model lock. Queue modelFree last, so the
1089
+ // FIFO lock runs it after every session free (the C contract: a model may
1090
+ // only be freed once all derived sessions are). All deferred behind any
1091
+ // in-flight worker call, so nothing is freed out from under a compute.
1092
+ for (const s of [...this.#sessions])
1093
+ s.dispose();
1094
+ this.#sessions.clear();
1095
+ const n = this.#n;
1096
+ const h = this.#h;
1097
+ this.#h = null;
1098
+ deferFree(this.#lock, () => n.F.modelFree(h));
1099
+ }
1100
+ [Symbol.dispose]() {
1101
+ this.dispose();
1102
+ }
1103
+ }
1104
+ /** One-shot: load (or reuse) a model, transcribe, return the result. */
1105
+ export async function transcribe(model, pcm, opts = {}) {
1106
+ if (model instanceof TranscribeModel)
1107
+ return model.transcribe(pcm, opts);
1108
+ const m = await TranscribeModel.load(model, opts);
1109
+ try {
1110
+ return await m.transcribe(pcm, opts);
1111
+ }
1112
+ finally {
1113
+ m.dispose();
1114
+ }
1115
+ }