inferis-ml 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,543 @@
1
+ import { M as MainToWorkerMessage, W as WorkerToMainMessage, C as CapabilityReport, S as SerializedError, a as ModelState, b as ModelEntry, D as Device, c as ScheduledTask, T as TaskPriority, P as PoolConfig, d as ModelLoadConfig, e as ModelHandle, L as LoadProgressEvent } from './types-Y6Ytjh7U.cjs';
2
+ export { I as InferenceOptions, f as LoadedModel, g as ModelAdapter, h as ModelAdapterFactory, i as WasmCapability, j as WebGpuCapability, k as WorkerPoolInterface } from './types-Y6Ytjh7U.cjs';
3
+
4
+ type RoleChangeCallback = (role: 'leader' | 'follower') => void;
5
+ /**
6
+ * Leader election via Web Locks API.
7
+ *
8
+ * @remarks
9
+ * One tab holds the lock and acts as the "leader" — it owns the worker pool.
10
+ * All other tabs are "followers" and proxy their requests through BroadcastChannel.
11
+ *
12
+ * When the leader tab closes, the lock is released automatically by the browser,
13
+ * and the next tab in the lock queue is promoted to leader.
14
+ *
15
+ * Web Locks + BroadcastChannel combined coverage: ~96% of modern browsers.
16
+ *
17
+ * iOS Safari and older Android Chrome do not support SharedWorker but DO support
18
+ * Web Locks, making this tier 2 a reliable cross-mobile fallback.
19
+ */
20
+ declare class LeaderElection {
21
+ private role;
22
+ private readonly listeners;
23
+ private abortController;
24
+ /**
25
+ * Start leader election. Resolves once the role is determined.
26
+ * The lock is held for the lifetime of the tab.
27
+ */
28
+ start(): Promise<'leader' | 'follower'>;
29
+ private holdLock;
30
+ /**
31
+ * Release the lock and stop the election. Used for cleanup.
32
+ */
33
+ stop(): void;
34
+ /**
35
+ * Subscribe to role changes.
36
+ * @returns unsubscribe function
37
+ */
38
+ onRoleChange(callback: RoleChangeCallback): () => void;
39
+ get currentRole(): 'leader' | 'follower' | 'unknown';
40
+ get isLeader(): boolean;
41
+ private setRole;
42
+ /**
43
+ * Check if the Web Locks API is available.
44
+ */
45
+ static isSupported(): boolean;
46
+ isSupported(): boolean;
47
+ }
48
+
49
+ type MessageListener = (msg: WorkerToMainMessage) => void;
50
+ /**
51
+ * Bridge to a SharedWorker.
52
+ *
53
+ * @remarks
54
+ * SharedWorker allows all tabs from the same origin to share one worker process.
55
+ * One worker holds all model instances, eliminating per-tab model duplication.
56
+ *
57
+ * Tier 1 coordination (~58% browser coverage):
58
+ * - Chrome 4+, Edge 79+, Firefox 29+, Safari 16+
59
+ * - NOT supported on iOS Safari < 16 or Android Chrome
60
+ *
61
+ * If SharedWorker is unavailable, fall back to LeaderElection (tier 2)
62
+ * or per-tab dedicated workers (tier 3).
63
+ */
64
+ declare class SharedWorkerBridge {
65
+ private readonly port;
66
+ private readonly listeners;
67
+ constructor(workerUrl: URL | string);
68
+ /**
69
+ * Send a message to the shared worker.
70
+ */
71
+ postMessage(msg: MainToWorkerMessage, transfer?: Transferable[]): void;
72
+ /**
73
+ * Subscribe to incoming messages from the shared worker.
74
+ * @returns unsubscribe function
75
+ */
76
+ on(listener: MessageListener): () => void;
77
+ /**
78
+ * Disconnect from the shared worker. The worker itself stays alive
79
+ * as long as other tabs are connected.
80
+ */
81
+ disconnect(): void;
82
+ /**
83
+ * Check if SharedWorker is available in the current environment.
84
+ */
85
+ static isSupported(): boolean;
86
+ }
87
+
88
+ type TabChannelMessage = {
89
+ type: 'leader-elected';
90
+ tabId: string;
91
+ } | {
92
+ type: 'leader-gone';
93
+ tabId: string;
94
+ } | {
95
+ type: 'request';
96
+ tabId: string;
97
+ reqId: string;
98
+ payload: unknown;
99
+ } | {
100
+ type: 'response';
101
+ reqId: string;
102
+ payload: unknown;
103
+ error?: {
104
+ message: string;
105
+ name: string;
106
+ };
107
+ } | {
108
+ type: 'stream-chunk';
109
+ reqId: string;
110
+ chunk: unknown;
111
+ } | {
112
+ type: 'stream-end';
113
+ reqId: string;
114
+ } | {
115
+ type: 'stream-error';
116
+ reqId: string;
117
+ error: {
118
+ message: string;
119
+ name: string;
120
+ };
121
+ };
122
+ type TabChannelListener = (msg: TabChannelMessage) => void;
123
+ /**
124
+ * Thin wrapper over BroadcastChannel for cross-tab coordination.
125
+ *
126
+ * @remarks
127
+ * BroadcastChannel has ~96% browser coverage and works on both desktop
128
+ * and mobile. Messages are NOT delivered to the sender tab.
129
+ */
130
+ declare class TabChannel {
131
+ private readonly channel;
132
+ private readonly listeners;
133
+ constructor();
134
+ /**
135
+ * Broadcast a message to all other tabs.
136
+ */
137
+ send(msg: TabChannelMessage): void;
138
+ /**
139
+ * Subscribe to incoming messages.
140
+ * @returns unsubscribe function
141
+ */
142
+ on(listener: TabChannelListener): () => void;
143
+ /**
144
+ * Close the channel and remove all listeners.
145
+ */
146
+ close(): void;
147
+ /**
148
+ * Check if BroadcastChannel is available in the current environment.
149
+ */
150
+ static isSupported(): boolean;
151
+ }
152
+
153
+ /**
154
+ * LRU-based memory budget tracker.
155
+ *
156
+ * Tracks approximate memory usage across loaded models.
157
+ * When a new model load would exceed the configured budget,
158
+ * evicts least-recently-used models to make room.
159
+ */
160
+ declare class MemoryBudget {
161
+ private readonly maxMB;
162
+ private usedMB;
163
+ private readonly lruOrder;
164
+ private readonly modelSizes;
165
+ constructor(maxMB: number);
166
+ /** Total memory budget in MB. */
167
+ get totalMB(): number;
168
+ /** Currently allocated memory in MB. */
169
+ get allocatedMB(): number;
170
+ /** Remaining available memory in MB. */
171
+ get availableMB(): number;
172
+ /**
173
+ * Determine which model IDs must be evicted to fit `requiredMB`.
174
+ * Does NOT perform the eviction itself — caller is responsible for
175
+ * actually unloading the returned models before calling `allocate()`.
176
+ *
177
+ * @returns Array of model IDs to evict (LRU order), or null if
178
+ * `requiredMB` exceeds the total budget (impossible to fit).
179
+ */
180
+ planEviction(requiredMB: number): string[] | null;
181
+ /**
182
+ * Allocate memory for a model. Call after the model is loaded.
183
+ * Updates LRU order.
184
+ */
185
+ allocate(modelId: string, memoryMB: number): void;
186
+ /**
187
+ * Release memory for a model. Call when model is unloaded.
188
+ */
189
+ release(modelId: string): void;
190
+ /**
191
+ * Mark a model as recently used, moving it to the back of the LRU queue.
192
+ */
193
+ touch(modelId: string): void;
194
+ /**
195
+ * Check whether allocating `requiredMB` would stay within budget
196
+ * (i.e., requires no evictions).
197
+ */
198
+ fits(requiredMB: number): boolean;
199
+ /** Return LRU-ordered list of tracked model IDs (oldest first). */
200
+ get lruList(): readonly string[];
201
+ }
202
+
203
+ /**
204
+ * Detect browser capabilities for AI inference.
205
+ * Result is cached after first call.
206
+ *
207
+ * @example
208
+ * const caps = await detectCapabilities();
209
+ * if (caps.webgpu.supported) {
210
+ * console.log('GPU vendor:', caps.webgpu.adapter?.vendor);
211
+ * }
212
+ */
213
+ declare function detectCapabilities(): Promise<CapabilityReport>;
214
+ /** Clear the cached capability report. Useful for testing. */
215
+ declare function clearCapabilitiesCache(): void;
216
+
217
+ declare class InferisError extends Error {
218
+ readonly code: string;
219
+ constructor(message: string, code: string);
220
+ static fromSerialized(err: SerializedError): InferisError;
221
+ serialize(): SerializedError;
222
+ }
223
+ declare class ModelLoadError extends InferisError {
224
+ readonly modelId: string;
225
+ constructor(modelId: string, message: string);
226
+ }
227
+ declare class ModelNotReadyError extends InferisError {
228
+ readonly modelId: string;
229
+ constructor(modelId: string, state: string);
230
+ }
231
+ declare class ModelDisposedError extends InferisError {
232
+ readonly modelId: string;
233
+ constructor(modelId: string);
234
+ }
235
+ declare class InferenceError extends InferisError {
236
+ readonly modelId: string;
237
+ constructor(modelId: string, message: string);
238
+ }
239
+ declare class BudgetExceededError extends InferisError {
240
+ readonly requestedMB: number;
241
+ readonly budgetMB: number;
242
+ constructor(requestedMB: number, budgetMB: number);
243
+ }
244
+ declare class TaskTimeoutError extends InferisError {
245
+ readonly reqId: string;
246
+ constructor(reqId: string, timeoutMs: number);
247
+ }
248
+ declare class WorkerError extends InferisError {
249
+ readonly workerId: number;
250
+ constructor(workerId: number, message: string);
251
+ }
252
+ declare class DeviceLostError extends InferisError {
253
+ readonly modelId: string;
254
+ readonly reason: string;
255
+ constructor(modelId: string, reason: string);
256
+ }
257
+ declare class InvalidStateTransitionError extends InferisError {
258
+ constructor(from: string, to: string);
259
+ }
260
+
261
+ /**
262
+ * Validate and apply a lifecycle state transition.
263
+ *
264
+ * @throws {InvalidStateTransitionError} if the transition is not allowed.
265
+ */
266
+ declare function transition(from: ModelState, to: ModelState): ModelState;
267
+ /**
268
+ * Check if a state transition is valid without throwing.
269
+ */
270
+ declare function canTransition(from: ModelState, to: ModelState): boolean;
271
+ /**
272
+ * Check if a model in the given state can accept inference tasks.
273
+ */
274
+ declare function isAcceptingInference(state: ModelState): boolean;
275
+ /**
276
+ * Check if a model in the given state is considered terminal (no further transitions possible).
277
+ */
278
+ declare function isTerminal(state: ModelState): boolean;
279
+
280
+ /**
281
+ * ModelRegistry tracks all model entries across workers.
282
+ *
283
+ * A model ID is composed of `task:modelName` to allow the same underlying
284
+ * model to be used for different tasks without collision.
285
+ */
286
+ declare class ModelRegistry {
287
+ private readonly models;
288
+ /**
289
+ * Compose a canonical model ID from task and config.
290
+ */
291
+ static makeId(task: string, modelName: string): string;
292
+ /**
293
+ * Register a new model entry with IDLE state.
294
+ */
295
+ register(id: string, task: string, config: Record<string, unknown>): ModelEntry;
296
+ /**
297
+ * Get a model entry by ID.
298
+ */
299
+ get(id: string): ModelEntry | undefined;
300
+ /**
301
+ * Check if a model entry exists.
302
+ */
303
+ has(id: string): boolean;
304
+ /**
305
+ * Update the state of a model entry, notifying all subscribers.
306
+ */
307
+ setState(id: string, state: ModelState): void;
308
+ /**
309
+ * Update the device, memory, and worker assignment after a successful load.
310
+ */
311
+ setLoaded(id: string, device: Device, memoryMB: number, workerId: number): void;
312
+ /**
313
+ * Clear the worker assignment on unload.
314
+ */
315
+ setUnloaded(id: string): void;
316
+ /**
317
+ * Add a state change listener to a model entry.
318
+ * @returns unsubscribe function
319
+ */
320
+ subscribe(id: string, listener: (state: ModelState) => void): () => void;
321
+ /**
322
+ * Remove a model entry completely.
323
+ */
324
+ delete(id: string): void;
325
+ /**
326
+ * Return all model entries in a given state.
327
+ */
328
+ byState(state: ModelState): ModelEntry[];
329
+ /**
330
+ * Return all model entries assigned to a specific worker.
331
+ */
332
+ byWorker(workerId: number): ModelEntry[];
333
+ /** Number of registered model entries. */
334
+ get size(): number;
335
+ /** All model entries (read-only view for scheduling). */
336
+ get entries(): ReadonlyMap<string, ModelEntry>;
337
+ }
338
+
339
+ /**
340
+ * Task scheduler with priority queue and model affinity.
341
+ *
342
+ * Model affinity: when dispatching a task, the scheduler prefers workers
343
+ * that already have the required model loaded, avoiding redundant re-loads.
344
+ */
345
+ declare class Scheduler {
346
+ private readonly queues;
347
+ private readonly workerLoad;
348
+ private readonly workerModels;
349
+ /**
350
+ * Register a worker with the scheduler.
351
+ */
352
+ addWorker(workerId: number): void;
353
+ /**
354
+ * Remove a worker from the scheduler.
355
+ * Rejects any queued tasks targeting only this worker (affinity-pinned).
356
+ */
357
+ removeWorker(workerId: number): void;
358
+ /**
359
+ * Notify scheduler that a worker has loaded a model.
360
+ * Used for affinity tracking.
361
+ */
362
+ notifyModelLoaded(workerId: number, modelId: string): void;
363
+ /**
364
+ * Notify scheduler that a worker has unloaded a model.
365
+ */
366
+ notifyModelUnloaded(workerId: number, modelId: string): void;
367
+ /**
368
+ * Enqueue a task. It will be dispatched to the best available worker.
369
+ * If all workers are busy, the task waits in the priority queue.
370
+ */
371
+ enqueue(task: ScheduledTask, _models?: ReadonlyMap<string, ModelEntry>): void;
372
+ /**
373
+ * Notify scheduler that a worker has completed a task.
374
+ * Dispatches the next queued task to the now-free worker, if any.
375
+ */
376
+ notifyTaskComplete(workerId: number, _models?: ReadonlyMap<string, ModelEntry>): void;
377
+ /**
378
+ * Attempt to dispatch the highest-priority queued task to a specific worker.
379
+ */
380
+ private drainNext;
381
+ /**
382
+ * Pick the best available worker for a given model.
383
+ * Prefers workers with the model already loaded (affinity).
384
+ * Falls back to the least-loaded worker.
385
+ * Returns null if all workers are saturated (load >= concurrencyLimit).
386
+ */
387
+ pickWorker(modelId: string, concurrencyPerWorker?: number): number | null;
388
+ private dispatch;
389
+ private findAffinityTask;
390
+ /** Return current queue depth across all priorities. */
391
+ get queueDepth(): number;
392
+ /** Return current load for a worker. */
393
+ workerLoadFor(workerId: number): number;
394
+ /** Return all registered worker IDs. */
395
+ get workerIds(): number[];
396
+ /** Clear all worker state (called on pool termination). */
397
+ reset(): void;
398
+ /** Return the effective priority weight for sorting. */
399
+ static priorityWeight(p: TaskPriority): number;
400
+ }
401
+
402
+ /**
403
+ * WorkerPool — main orchestrator.
404
+ *
405
+ * Manages a pool of dedicated Web Workers, routes inference tasks,
406
+ * tracks model lifecycle, and enforces memory budgets.
407
+ *
408
+ * @example
409
+ * ```ts
410
+ * const pool = await createPool({ adapter: transformersAdapter() });
411
+ * const model = await pool.load('feature-extraction', { model: 'Xenova/...' });
412
+ * const result = await model.run(['Hello world']);
413
+ * await model.dispose();
414
+ * await pool.terminate();
415
+ * ```
416
+ */
417
+ declare class WorkerPool {
418
+ private readonly workers;
419
+ private readonly registry;
420
+ private readonly scheduler;
421
+ private readonly budget;
422
+ private readonly inferenceWaiters;
423
+ private readonly pending;
424
+ private readonly config;
425
+ private readonly caps;
426
+ private readonly resolvedDevice;
427
+ private nextWorkerId;
428
+ private nextReqId;
429
+ private terminated;
430
+ private constructor();
431
+ /**
432
+ * Create and initialize a WorkerPool.
433
+ * Spawns `maxWorkers` dedicated workers and detects browser capabilities.
434
+ */
435
+ static create(config: PoolConfig): Promise<WorkerPool>;
436
+ private spawnWorkers;
437
+ private spawnWorker;
438
+ private handleWorkerMessage;
439
+ private readonly reqIdToModelId;
440
+ /**
441
+ * Load a model. If already loaded with the same config, returns existing handle.
442
+ * Performs memory budget check and eviction before loading.
443
+ */
444
+ load<TOutput = unknown>(task: string, config: ModelLoadConfig): Promise<ModelHandle<TOutput>>;
445
+ private disposeModel;
446
+ private makeHandle;
447
+ /**
448
+ * Waits until the model transitions to `ready`, or rejects if it reaches a
449
+ * terminal/non-recoverable state (error, disposed, unloading).
450
+ * Respects the provided AbortSignal.
451
+ *
452
+ * When the model is `inferring`, uses a priority queue so that higher-priority
453
+ * callers are unblocked before lower-priority ones.
454
+ * When the model is `loading`, falls back to a plain state subscription.
455
+ */
456
+ private waitForReady;
457
+ /**
458
+ * Unblocks the highest-priority waiter queued for the given model.
459
+ * Called after a model transitions back to `ready`.
460
+ */
461
+ private drainInferenceWaiter;
462
+ private runInference;
463
+ private streamInference;
464
+ private setRequestTimeout;
465
+ private clearPendingTimeout;
466
+ private handleWorkerCrash;
467
+ /**
468
+ * Gracefully terminate all workers and dispose all models.
469
+ */
470
+ terminate(): Promise<void>;
471
+ /**
472
+ * Return snapshot of detected browser capabilities.
473
+ */
474
+ capabilities(): CapabilityReport;
475
+ private resolveDevice;
476
+ /** @internal */
477
+ get _registry(): ModelRegistry;
478
+ /** @internal */
479
+ get _budget(): MemoryBudget;
480
+ /** @internal */
481
+ get _scheduler(): Scheduler;
482
+ }
483
+ /**
484
+ * Create and initialize a WorkerPool.
485
+ *
486
+ * @example
487
+ * ```ts
488
+ * import { createPool } from 'inferis-ml';
489
+ * import { transformersAdapter } from 'inferis-ml/adapters/transformers';
490
+ *
491
+ * const pool = await createPool({ adapter: transformersAdapter() });
492
+ * ```
493
+ */
494
+ declare function createPool(config: PoolConfig): Promise<WorkerPool>;
495
+
496
+ type ProgressListener = (event: LoadProgressEvent) => void;
497
+ /**
498
+ * Simple progress event emitter for model load operations.
499
+ * Wraps the raw progress callback into a subscribable interface.
500
+ */
501
+ declare class ProgressEmitter {
502
+ private readonly listeners;
503
+ /**
504
+ * Subscribe to progress events.
505
+ * @returns unsubscribe function
506
+ */
507
+ on(listener: ProgressListener): () => void;
508
+ /**
509
+ * Emit a progress event to all subscribers.
510
+ */
511
+ emit(event: LoadProgressEvent): void;
512
+ /** Remove all listeners. */
513
+ clear(): void;
514
+ /** Number of active listeners. */
515
+ get listenerCount(): number;
516
+ }
517
+
518
+ /**
519
+ * Async iterator adapter for ReadableStream.
520
+ *
521
+ * Allows `for await (const token of stream)` syntax when the runtime
522
+ * does not natively support ReadableStream async iteration.
523
+ *
524
+ * @example
525
+ * ```ts
526
+ * const stream = model.stream({ messages: [...] });
527
+ * for await (const token of readableToAsyncIter(stream)) {
528
+ * process.stdout.write(token);
529
+ * }
530
+ * ```
531
+ */
532
+ declare function readableToAsyncIter<T>(stream: ReadableStream<T>): AsyncIterable<T>;
533
+ /**
534
+ * Collect all chunks from a ReadableStream into an array.
535
+ * Useful for non-streaming consumers.
536
+ */
537
+ declare function collectStream<T>(stream: ReadableStream<T>): Promise<T[]>;
538
+ /**
539
+ * Collect all chunks from a ReadableStream and join them as a string.
540
+ */
541
+ declare function collectStreamText(stream: ReadableStream<string>): Promise<string>;
542
+
543
+ export { BudgetExceededError, CapabilityReport, Device, DeviceLostError, InferenceError, InferisError, InvalidStateTransitionError, LeaderElection, LoadProgressEvent, MemoryBudget, ModelDisposedError, ModelHandle, ModelLoadConfig, ModelLoadError, ModelNotReadyError, ModelRegistry, ModelState, PoolConfig, ProgressEmitter, Scheduler, SharedWorkerBridge, TabChannel, TaskTimeoutError, WorkerError, WorkerPool, canTransition, clearCapabilitiesCache, collectStream, collectStreamText, createPool, detectCapabilities, isAcceptingInference, isTerminal, readableToAsyncIter, transition };