inferis-ml 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +664 -0
- package/dist/adapters/onnx.cjs +2 -0
- package/dist/adapters/onnx.cjs.map +1 -0
- package/dist/adapters/onnx.d.cts +28 -0
- package/dist/adapters/onnx.d.ts +28 -0
- package/dist/adapters/onnx.js +2 -0
- package/dist/adapters/onnx.js.map +1 -0
- package/dist/adapters/transformers.cjs +2 -0
- package/dist/adapters/transformers.cjs.map +1 -0
- package/dist/adapters/transformers.d.cts +29 -0
- package/dist/adapters/transformers.d.ts +29 -0
- package/dist/adapters/transformers.js +2 -0
- package/dist/adapters/transformers.js.map +1 -0
- package/dist/adapters/web-llm.cjs +2 -0
- package/dist/adapters/web-llm.cjs.map +1 -0
- package/dist/adapters/web-llm.d.cts +31 -0
- package/dist/adapters/web-llm.d.ts +31 -0
- package/dist/adapters/web-llm.js +2 -0
- package/dist/adapters/web-llm.js.map +1 -0
- package/dist/index.cjs +2 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +543 -0
- package/dist/index.d.ts +543 -0
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -0
- package/dist/types-Y6Ytjh7U.d.cts +271 -0
- package/dist/types-Y6Ytjh7U.d.ts +271 -0
- package/dist/worker/dedicated.worker.cjs +2 -0
- package/dist/worker/dedicated.worker.cjs.map +1 -0
- package/dist/worker/dedicated.worker.d.cts +17 -0
- package/dist/worker/dedicated.worker.d.ts +17 -0
- package/dist/worker/dedicated.worker.js +502 -0
- package/dist/worker/dedicated.worker.js.map +1 -0
- package/dist/worker/shared.worker.js +460 -0
- package/package.json +103 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { h as ModelAdapterFactory } from '../types-Y6Ytjh7U.cjs';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Adapter for onnxruntime-web.
|
|
5
|
+
*
|
|
6
|
+
* @remarks
|
|
7
|
+
* Provides low-level access to ONNX model inference.
|
|
8
|
+
* Use for custom models not supported by transformers.js or web-llm.
|
|
9
|
+
* Input must be pre-processed `OrtTensor` instances.
|
|
10
|
+
*
|
|
11
|
+
* @example
|
|
12
|
+
* ```ts
|
|
13
|
+
* import { createPool } from 'inferis-ml';
|
|
14
|
+
* import { onnxAdapter } from 'inferis-ml/adapters/onnx';
|
|
15
|
+
* import * as ort from 'onnxruntime-web';
|
|
16
|
+
*
|
|
17
|
+
* const pool = await createPool({ adapter: onnxAdapter() });
|
|
18
|
+
* const model = await pool.load('custom', {
|
|
19
|
+
* model: 'https://example.com/model.onnx',
|
|
20
|
+
* });
|
|
21
|
+
*
|
|
22
|
+
* const input = new ort.Tensor('float32', data, [1, 3, 224, 224]);
|
|
23
|
+
* const output = await model.run({ feeds: { input } });
|
|
24
|
+
* ```
|
|
25
|
+
*/
|
|
26
|
+
declare function onnxAdapter(): ModelAdapterFactory;
|
|
27
|
+
|
|
28
|
+
export { onnxAdapter };
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { h as ModelAdapterFactory } from '../types-Y6Ytjh7U.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Adapter for onnxruntime-web.
|
|
5
|
+
*
|
|
6
|
+
* @remarks
|
|
7
|
+
* Provides low-level access to ONNX model inference.
|
|
8
|
+
* Use for custom models not supported by transformers.js or web-llm.
|
|
9
|
+
* Input must be pre-processed `OrtTensor` instances.
|
|
10
|
+
*
|
|
11
|
+
* @example
|
|
12
|
+
* ```ts
|
|
13
|
+
* import { createPool } from 'inferis-ml';
|
|
14
|
+
* import { onnxAdapter } from 'inferis-ml/adapters/onnx';
|
|
15
|
+
* import * as ort from 'onnxruntime-web';
|
|
16
|
+
*
|
|
17
|
+
* const pool = await createPool({ adapter: onnxAdapter() });
|
|
18
|
+
* const model = await pool.load('custom', {
|
|
19
|
+
* model: 'https://example.com/model.onnx',
|
|
20
|
+
* });
|
|
21
|
+
*
|
|
22
|
+
* const input = new ort.Tensor('float32', data, [1, 3, 224, 224]);
|
|
23
|
+
* const output = await model.run({ feeds: { input } });
|
|
24
|
+
* ```
|
|
25
|
+
*/
|
|
26
|
+
declare function onnxAdapter(): ModelAdapterFactory;
|
|
27
|
+
|
|
28
|
+
export { onnxAdapter };
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
function c(){return {name:"onnx",async create(){let d=await import('onnxruntime-web');return {name:"onnx",estimateMemoryMB(n,e){return e.estimatedMemoryMB??50},async load(n,e,a,s){let o=e.model;s({loaded:0,phase:"downloading",total:0});let t=a==="webgpu"?["webgpu","wasm"]:["wasm"],r=await d.InferenceSession.create(o,{executionProviders:t,graphOptimizationLevel:"all"});return s({loaded:1,phase:"done",total:1}),{instance:r,memoryMB:e.estimatedMemoryMB??50}},async run(n,e,a){let s=n.instance,{feeds:o,outputNames:t}=e;return await s.run(o,t)},async stream(n,e,a,s){let o=n.instance,{feeds:t,outputNames:r}=e,i=await o.run(t,r,s);a(i);},async unload(n){await n.instance.release?.();}}}}}export{c as onnxAdapter};//# sourceMappingURL=onnx.js.map
|
|
2
|
+
//# sourceMappingURL=onnx.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/adapters/onnx.ts"],"names":["onnxAdapter","ort","_task","config","device","onProgress","modelUrl","executionProviders","session","model","input","_options","feeds","outputNames","onChunk","options","result"],"mappings":"AAyCO,SAASA,GAAmC,CACjD,OAAO,CACL,IAAA,CAAM,OAEN,MAAM,MAAA,EAAgC,CAEpC,IAAMC,EAAM,MAAM,OAAO,iBAAiB,CAAA,CAE1C,OAAO,CACL,IAAA,CAAM,MAAA,CAEN,gBAAA,CAAiBC,EAAeC,CAAAA,CAAyC,CACvE,OAAQA,CAAAA,CAAO,mBAA4C,EAC7D,CAAA,CAEA,MAAM,IAAA,CACJD,EACAC,CAAAA,CACAC,CAAAA,CACAC,EACsB,CACtB,IAAMC,EAAWH,CAAAA,CAAO,KAAA,CAExBE,CAAAA,CAAW,CAAE,OAAQ,CAAA,CAAG,KAAA,CAAO,aAAA,CAAe,KAAA,CAAO,CAAE,CAAC,CAAA,CAExD,IAAME,CAAAA,CAAqBH,IAAW,QAAA,CAClC,CAAC,QAAA,CAAU,MAAM,EACjB,CAAC,MAAM,CAAA,CAELI,CAAAA,CAA4B,MAAMP,CAAAA,CAAI,gBAAA,CAAiB,MAAA,CAAOK,CAAAA,CAAU,CAC5E,kBAAA,CAAAC,CAAAA,CACA,sBAAA,CAAwB,KAC1B,CAAC,CAAA,CAED,OAAAF,EAAW,CAAE,MAAA,CAAQ,EAAG,KAAA,CAAO,MAAA,CAAQ,KAAA,CAAO,CAAE,CAAC,CAAA,CAE1C,CACL,QAAA,CAAUG,CAAAA,CACV,SAAWL,CAAAA,CAAO,iBAAA,EAA4C,EAChE,CACF,EAEA,MAAM,GAAA,CAAIM,CAAAA,CAAoBC,CAAAA,CAAgBC,EAAsC,CAClF,IAAMH,CAAAA,CAAUC,CAAAA,CAAM,SAChB,CAAE,KAAA,CAAAG,CAAAA,CAAO,WAAA,CAAAC,CAAY,CAAA,CAAIH,CAAAA,CAG/B,OADgB,MAAMF,EAAQ,GAAA,CAAII,CAAAA,CAAOC,CAAW,CAEtD,CAAA,CAEA,MAAM,MAAA,CACJJ,CAAAA,CACAC,CAAAA,CACAI,CAAAA,CACAC,EACe,CAGf,IAAMP,CAAAA,CAAUC,CAAAA,CAAM,SAChB,CAAE,KAAA,CAAAG,CAAAA,CAAO,WAAA,CAAAC,CAAY,CAAA,CAAIH,CAAAA,CACzBM,CAAAA,CAAS,MAAMR,EAAQ,GAAA,CAAII,CAAAA,CAAOC,CAAAA,CAAaE,CAAO,EAC5DD,CAAAA,CAAQE,CAAM,EAChB,CAAA,CAEA,MAAM,MAAA,CAAOP,CAAAA,CAAmC,CAE9C,MADgBA,EAAM,QAAA,CACR,OAAA,KAChB,CACF,CACF,CACF,CACF","file":"onnx.js","sourcesContent":["import type {\n Device,\n LoadedModel,\n LoadProgressEvent,\n ModelAdapter,\n ModelAdapterFactory,\n} from '../core/types.js';\n\n// eslint-disable-next-line ts/no-explicit-any\ntype InferenceSession = any;\n// eslint-disable-next-line ts/no-explicit-any\ntype Tensor = any;\n\ninterface OnnxInput {\n feeds: Record<string, Tensor>;\n outputNames?: string[];\n}\n\n/**\n * Adapter for onnxruntime-web.\n *\n * @remarks\n * Provides low-level access to ONNX model inference.\n * Use for custom models not supported by transformers.js or web-llm.\n * Input must be pre-processed `OrtTensor` instances.\n *\n * @example\n * ```ts\n * import { createPool } from 'inferis-ml';\n * import { onnxAdapter } from 'inferis-ml/adapters/onnx';\n * import * as ort from 'onnxruntime-web';\n *\n * const pool = await createPool({ adapter: onnxAdapter() });\n * const model = await pool.load('custom', {\n * model: 'https://example.com/model.onnx',\n * });\n *\n * const input = new ort.Tensor('float32', data, [1, 3, 224, 224]);\n * const output = await model.run({ feeds: { input } });\n * ```\n */\nexport function onnxAdapter(): ModelAdapterFactory {\n return {\n name: 'onnx',\n\n async create(): Promise<ModelAdapter> {\n // @ts-expect-error - optional peer dependency, resolved at runtime inside worker\n const ort = await import('onnxruntime-web');\n\n return {\n name: 'onnx',\n\n estimateMemoryMB(_task: string, config: Record<string, unknown>): number {\n return (config.estimatedMemoryMB as number | undefined) ?? 50;\n },\n\n async load(\n _task: string,\n config: Record<string, unknown>,\n device: Device,\n onProgress: (event: LoadProgressEvent) => void,\n ): Promise<LoadedModel> {\n const modelUrl = config.model as string;\n\n onProgress({ loaded: 0, phase: 'downloading', total: 0 });\n\n const executionProviders = device === 'webgpu'\n ? ['webgpu', 'wasm'] as const\n : ['wasm'] as const;\n\n const session: InferenceSession = await ort.InferenceSession.create(modelUrl, {\n executionProviders,\n graphOptimizationLevel: 'all',\n });\n\n onProgress({ loaded: 1, phase: 'done', total: 1 });\n\n return {\n instance: session,\n memoryMB: (config.estimatedMemoryMB as number | undefined) ?? 50,\n };\n },\n\n async run(model: LoadedModel, input: unknown, _options?: unknown): Promise<unknown> {\n const session = model.instance as InferenceSession;\n const { feeds, outputNames } = input as OnnxInput;\n\n const results = await session.run(feeds, outputNames);\n return results;\n },\n\n async stream(\n model: LoadedModel,\n input: unknown,\n onChunk: (chunk: unknown) => void,\n options?: unknown,\n ): Promise<void> {\n // ONNX Runtime Web doesn't natively support streaming inference.\n // Run full inference and emit the result as a single chunk.\n const session = model.instance as InferenceSession;\n const { feeds, outputNames } = input as OnnxInput;\n const result = await session.run(feeds, outputNames, options);\n onChunk(result);\n },\n\n async unload(model: LoadedModel): Promise<void> {\n const session = model.instance as InferenceSession;\n await session.release?.();\n },\n };\n },\n };\n}\n"]}
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
'use strict';function k(){return {name:"transformers",async create(){let{pipeline:r,TextStreamer:t,env:p}=await import('@huggingface/transformers');return p.backends.onnx.wasm.proxy=false,{name:"transformers",estimateMemoryMB(n,e){return e.estimatedMemoryMB??100},async load(n,e,o,a){let i=e.model,c=e.dtype??"fp32",d=new Map,w=await r(n,i,{device:o==="webgpu"?"webgpu":"wasm",dtype:c,progress_callback:s=>{if(s.status==="progress"){let m=s.file??"unknown",y=s.total??0;d.set(m,y),a({loaded:s.loaded??0,phase:m,total:y});}else s.status==="initiate"?a({loaded:0,phase:"initiate",total:0}):s.status==="done"&&a({loaded:1,phase:"done",total:1});}}),l=0;for(let s of d.values())l+=s;return {instance:w,memoryMB:Math.ceil(l/(1024*1024))||100}},async run(n,e,o){let a=n.instance,i=Array.isArray(e)?await a(...e,o??{}):await a(e,o??{});return u(i)},async stream(n,e,o,a){let i=n.instance,c=new t(i.tokenizer,{callback_function:d=>o(d),skip_prompt:true,skip_special_tokens:true});await i(e,{...a??{},streamer:c});},async unload(n){await n.instance.dispose?.();}}}}}function u(r){if(r===null||typeof r!="object")return r;if(Array.isArray(r))return r.map(u);let t=r;return typeof t.data<"u"&&Array.isArray(t.dims)?{data:Array.from(t.data),dims:t.dims,size:t.size,type:t.type}:Object.fromEntries(Object.entries(t).map(([p,n])=>[p,u(n)]))}exports.serializeOutput=u;exports.transformersAdapter=k;//# sourceMappingURL=transformers.cjs.map
|
|
2
|
+
//# sourceMappingURL=transformers.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/adapters/transformers.ts"],"names":["transformersAdapter","pipeline","TextStreamer","env","_task","config","task","device","onProgress","model","dtype","fileTotals","pipe","info","file","total","totalBytes","size","input","options","result","serializeOutput","onChunk","streamer","text","value","obj","k","v"],"mappings":"aA8BO,SAASA,GAA2C,CACzD,OAAO,CACL,IAAA,CAAM,eAEN,MAAM,MAAA,EAAgC,CAGpC,GAAM,CAAE,QAAA,CAAAC,CAAAA,CAAU,YAAA,CAAAC,CAAAA,CAAc,IAAAC,CAAI,CAAA,CAAI,MAAM,OAAO,2BAA2B,CAAA,CAGhF,OAAAA,CAAAA,CAAI,QAAA,CAAS,KAAK,IAAA,CAAK,KAAA,CAAQ,KAAA,CAExB,CACL,KAAM,cAAA,CAEN,gBAAA,CAAiBC,CAAAA,CAAeC,CAAAA,CAAyC,CACvE,OAAQA,CAAAA,CAAO,iBAAA,EAA4C,GAC7D,EAEA,MAAM,IAAA,CACJC,CAAAA,CACAD,CAAAA,CACAE,EACAC,CAAAA,CACsB,CACtB,IAAMC,CAAAA,CAAQJ,EAAO,KAAA,CACfK,CAAAA,CAASL,CAAAA,CAAO,KAAA,EAAgC,OAEhDM,CAAAA,CAAa,IAAI,GAAA,CAEjBC,CAAAA,CAAO,MAAMX,CAAAA,CAASK,CAAAA,CAAwCG,CAAAA,CAAO,CACzE,OAAQF,CAAAA,GAAW,QAAA,CAAW,QAAA,CAAW,MAAA,CACzC,MAAAG,CAAAA,CACA,iBAAA,CAAoBG,GAAkC,CACpD,GAAIA,EAAK,MAAA,GAAW,UAAA,CAAY,CAC9B,IAAMC,EAAQD,CAAAA,CAAK,IAAA,EAAmB,SAAA,CAChCE,CAAAA,CAASF,EAAK,KAAA,EAAoB,CAAA,CACxCF,CAAAA,CAAW,GAAA,CAAIG,EAAMC,CAAK,CAAA,CAC1BP,CAAAA,CAAW,CACT,OAASK,CAAAA,CAAK,MAAA,EAAqB,CAAA,CACnC,KAAA,CAAOC,EACP,KAAA,CAAAC,CACF,CAAC,EACH,MACSF,CAAAA,CAAK,MAAA,GAAW,UAAA,CACvBL,CAAAA,CAAW,CAAE,MAAA,CAAQ,CAAA,CAAG,KAAA,CAAO,UAAA,CAAY,MAAO,CAAE,CAAC,CAAA,CAE9CK,CAAAA,CAAK,SAAW,MAAA,EACvBL,CAAAA,CAAW,CAAE,MAAA,CAAQ,EAAG,KAAA,CAAO,MAAA,CAAQ,KAAA,CAAO,CAAE,CAAC,EAErD,CACF,CAAC,CAAA,CAEGQ,EAAa,CAAA,CACjB,IAAA,IAAWC,CAAAA,IAAQN,CAAAA,CAAW,QAAO,CAAGK,CAAAA,EAAcC,CAAAA,CAEtD,OAAO,CACL,QAAA,CAAUL,CAAAA,CACV,QAAA,CAAU,IAAA,CAAK,KAAKI,CAAAA,EAAc,IAAA,CAAO,KAAK,CAAA,EAAK,GACrD,CACF,CAAA,CAEA,MAAM,GAAA,CAAIP,CAAAA,CAAoBS,EAAgBC,CAAAA,CAAqC,CACjF,IAAMP,CAAAA,CAAOH,EAAM,QAAA,CACbW,CAAAA,CAAS,KAAA,CAAM,OAAA,CAAQF,CAAK,CAAA,CAC9B,MAAMN,CAAAA,CAAK,GAAIM,EAAmCC,CAAAA,EAAW,EAAE,CAAA,CAC/D,MAAMP,CAAAA,CAAKM,CAAAA,CAAOC,CAAAA,EAAW,EAAE,CAAA,CACnC,OAAOE,CAAAA,CAAgBD,CAAM,CAC/B,CAAA,CAEA,MAAM,MAAA,CACJX,CAAAA,CACAS,EACAI,CAAAA,CACAH,CAAAA,CACe,CACf,IAAMP,EAAOH,CAAAA,CAAM,QAAA,CAEbc,CAAAA,CAAW,IAAIrB,EAAaU,CAAAA,CAAK,SAAA,CAAW,CAChD,iBAAA,CAAoBY,GAAiBF,CAAAA,CAAQE,CAAI,CAAA,CACjD,WAAA,CAAa,KACb,mBAAA,CAAqB,IACvB,CAAC,CAAA,CAED,MAAMZ,CAAAA,CAAKM,CAAAA,CAAO,CAChB,GAAIC,GAAqB,EAAC,CAC1B,QAAA,CAAAI,CACF,CAAC,EACH,CAAA,CAEA,MAAM,MAAA,CAAOd,CAAAA,CAAmC,CAE9C,MADaA,CAAAA,CAAM,QAAA,CACR,OAAA,KACb,CACF,CACF,CACF,CACF,CAMO,SAASY,CAAAA,CAAgBI,CAAAA,CAAyB,CACvD,GAAIA,CAAAA,GAAU,IAAA,EAAQ,OAAOA,CAAAA,EAAU,SACrC,OAAOA,CAAAA,CAET,GAAI,KAAA,CAAM,QAAQA,CAAK,CAAA,CACrB,OAAOA,CAAAA,CAAM,IAAIJ,CAAe,CAAA,CAElC,IAAMK,CAAAA,CAAMD,EAEZ,OAAI,OAAOC,CAAAA,CAAI,IAAA,CAAS,KAAe,KAAA,CAAM,OAAA,CAAQA,CAAAA,CAAI,IAAI,EACpD,CACL,IAAA,CAAM,KAAA,CAAM,IAAA,CAAKA,EAAI,IAAyB,CAAA,CAC9C,IAAA,CAAMA,CAAAA,CAAI,KACV,IAAA,CAAMA,CAAAA,CAAI,IAAA,CACV,IAAA,CAAMA,EAAI,IACZ,CAAA,CAGK,MAAA,CAAO,WAAA,CACZ,OAAO,OAAA,CAAQA,CAAG,CAAA,CAAE,GAAA,CAAI,CAAC,CAACC,CAAAA,CAAGC,CAAC,CAAA,GAAM,CAACD,CAAAA,CAAGN,CAAAA,CAAgBO,CAAC,CAAC,CAAC,CAC7D,CACF","file":"transformers.cjs","sourcesContent":["import type {\n Device,\n LoadedModel,\n LoadProgressEvent,\n ModelAdapter,\n ModelAdapterFactory,\n} from '../core/types.js';\n\n// eslint-disable-next-line ts/no-explicit-any\ntype AnyPipeline = any;\n\n/**\n * Adapter for @huggingface/transformers v3+.\n *\n * @remarks\n * The adapter is instantiated INSIDE the worker via `create()`.\n * The heavy `@huggingface/transformers` library is dynamically imported\n * inside the worker, keeping the main thread bundle lightweight.\n *\n * @example\n * ```ts\n * import { createPool } from 'inferis-ml';\n * import { transformersAdapter } from 'inferis-ml/adapters/transformers';\n *\n * const pool = await createPool({ adapter: transformersAdapter() });\n * const model = await pool.load<number[][]>('feature-extraction', {\n * model: 'mixedbread-ai/mxbai-embed-xsmall-v1',\n * });\n * ```\n */\nexport function transformersAdapter(): ModelAdapterFactory {\n return {\n name: 'transformers',\n\n async create(): Promise<ModelAdapter> {\n // Dynamic import — runs inside the worker only\n // @ts-expect-error - optional peer dependency, resolved at runtime inside worker\n const { pipeline, TextStreamer, env } = await import('@huggingface/transformers');\n\n // Use ONNX WASM backend with optimizations\n env.backends.onnx.wasm.proxy = false;\n\n return {\n name: 'transformers',\n\n estimateMemoryMB(_task: string, config: Record<string, unknown>): number {\n return (config.estimatedMemoryMB as number | undefined) ?? 100;\n },\n\n async load(\n task: string,\n config: Record<string, unknown>,\n device: Device,\n onProgress: (event: LoadProgressEvent) => void,\n ): Promise<LoadedModel> {\n const model = config.model as string;\n const dtype = (config.dtype as string | undefined) ?? 'fp32';\n\n const fileTotals = new Map<string, number>();\n\n const pipe = await pipeline(task as Parameters<typeof pipeline>[0], model, {\n device: device === 'webgpu' ? 'webgpu' : 'wasm',\n dtype,\n progress_callback: (info: Record<string, unknown>) => {\n if (info.status === 'progress') {\n const file = (info.file as string) ?? 'unknown';\n const total = (info.total as number) ?? 0;\n fileTotals.set(file, total);\n onProgress({\n loaded: (info.loaded as number) ?? 0,\n phase: file,\n total,\n });\n }\n else if (info.status === 'initiate') {\n onProgress({ loaded: 0, phase: 'initiate', total: 0 });\n }\n else if (info.status === 'done') {\n onProgress({ loaded: 1, phase: 'done', total: 1 });\n }\n },\n });\n\n let totalBytes = 0;\n for (const size of fileTotals.values()) totalBytes += size;\n\n return {\n instance: pipe,\n memoryMB: Math.ceil(totalBytes / (1024 * 1024)) || 100,\n };\n },\n\n async run(model: LoadedModel, input: unknown, options?: unknown): Promise<unknown> {\n const pipe = model.instance as AnyPipeline;\n const result = Array.isArray(input)\n ? await pipe(...(input as [unknown, ...unknown[]]), options ?? {})\n : await pipe(input, options ?? {});\n return serializeOutput(result);\n },\n\n async stream(\n model: LoadedModel,\n input: unknown,\n onChunk: (chunk: unknown) => void,\n options?: unknown,\n ): Promise<void> {\n const pipe = model.instance as AnyPipeline;\n\n const streamer = new TextStreamer(pipe.tokenizer, {\n callback_function: (text: string) => onChunk(text),\n skip_prompt: true,\n skip_special_tokens: true,\n });\n\n await pipe(input, {\n ...(options as object ?? {}),\n streamer,\n });\n },\n\n async unload(model: LoadedModel): Promise<void> {\n const pipe = model.instance as AnyPipeline;\n await pipe.dispose?.();\n },\n };\n },\n };\n}\n\n/**\n * Recursively converts Tensor class instances and typed arrays to plain\n * structured-clone-compatible objects so they can cross the worker boundary.\n */\nexport function serializeOutput(value: unknown): unknown {\n if (value === null || typeof value !== 'object')\n return value;\n\n if (Array.isArray(value))\n return value.map(serializeOutput);\n\n const obj = value as Record<string, unknown>;\n\n if (typeof obj.data !== 'undefined' && Array.isArray(obj.dims)) {\n return {\n data: Array.from(obj.data as ArrayLike<number>),\n dims: obj.dims as number[],\n size: obj.size,\n type: obj.type,\n };\n }\n\n return Object.fromEntries(\n Object.entries(obj).map(([k, v]) => [k, serializeOutput(v)]),\n );\n}\n"]}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import { h as ModelAdapterFactory } from '../types-Y6Ytjh7U.cjs';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Adapter for @huggingface/transformers v3+.
|
|
5
|
+
*
|
|
6
|
+
* @remarks
|
|
7
|
+
* The adapter is instantiated INSIDE the worker via `create()`.
|
|
8
|
+
* The heavy `@huggingface/transformers` library is dynamically imported
|
|
9
|
+
* inside the worker, keeping the main thread bundle lightweight.
|
|
10
|
+
*
|
|
11
|
+
* @example
|
|
12
|
+
* ```ts
|
|
13
|
+
* import { createPool } from 'inferis-ml';
|
|
14
|
+
* import { transformersAdapter } from 'inferis-ml/adapters/transformers';
|
|
15
|
+
*
|
|
16
|
+
* const pool = await createPool({ adapter: transformersAdapter() });
|
|
17
|
+
* const model = await pool.load<number[][]>('feature-extraction', {
|
|
18
|
+
* model: 'mixedbread-ai/mxbai-embed-xsmall-v1',
|
|
19
|
+
* });
|
|
20
|
+
* ```
|
|
21
|
+
*/
|
|
22
|
+
declare function transformersAdapter(): ModelAdapterFactory;
|
|
23
|
+
/**
|
|
24
|
+
* Recursively converts Tensor class instances and typed arrays to plain
|
|
25
|
+
* structured-clone-compatible objects so they can cross the worker boundary.
|
|
26
|
+
*/
|
|
27
|
+
declare function serializeOutput(value: unknown): unknown;
|
|
28
|
+
|
|
29
|
+
export { serializeOutput, transformersAdapter };
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import { h as ModelAdapterFactory } from '../types-Y6Ytjh7U.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Adapter for @huggingface/transformers v3+.
|
|
5
|
+
*
|
|
6
|
+
* @remarks
|
|
7
|
+
* The adapter is instantiated INSIDE the worker via `create()`.
|
|
8
|
+
* The heavy `@huggingface/transformers` library is dynamically imported
|
|
9
|
+
* inside the worker, keeping the main thread bundle lightweight.
|
|
10
|
+
*
|
|
11
|
+
* @example
|
|
12
|
+
* ```ts
|
|
13
|
+
* import { createPool } from 'inferis-ml';
|
|
14
|
+
* import { transformersAdapter } from 'inferis-ml/adapters/transformers';
|
|
15
|
+
*
|
|
16
|
+
* const pool = await createPool({ adapter: transformersAdapter() });
|
|
17
|
+
* const model = await pool.load<number[][]>('feature-extraction', {
|
|
18
|
+
* model: 'mixedbread-ai/mxbai-embed-xsmall-v1',
|
|
19
|
+
* });
|
|
20
|
+
* ```
|
|
21
|
+
*/
|
|
22
|
+
declare function transformersAdapter(): ModelAdapterFactory;
|
|
23
|
+
/**
|
|
24
|
+
* Recursively converts Tensor class instances and typed arrays to plain
|
|
25
|
+
* structured-clone-compatible objects so they can cross the worker boundary.
|
|
26
|
+
*/
|
|
27
|
+
declare function serializeOutput(value: unknown): unknown;
|
|
28
|
+
|
|
29
|
+
export { serializeOutput, transformersAdapter };
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
function k(){return {name:"transformers",async create(){let{pipeline:r,TextStreamer:t,env:p}=await import('@huggingface/transformers');return p.backends.onnx.wasm.proxy=false,{name:"transformers",estimateMemoryMB(n,e){return e.estimatedMemoryMB??100},async load(n,e,o,a){let i=e.model,c=e.dtype??"fp32",d=new Map,w=await r(n,i,{device:o==="webgpu"?"webgpu":"wasm",dtype:c,progress_callback:s=>{if(s.status==="progress"){let m=s.file??"unknown",y=s.total??0;d.set(m,y),a({loaded:s.loaded??0,phase:m,total:y});}else s.status==="initiate"?a({loaded:0,phase:"initiate",total:0}):s.status==="done"&&a({loaded:1,phase:"done",total:1});}}),l=0;for(let s of d.values())l+=s;return {instance:w,memoryMB:Math.ceil(l/(1024*1024))||100}},async run(n,e,o){let a=n.instance,i=Array.isArray(e)?await a(...e,o??{}):await a(e,o??{});return u(i)},async stream(n,e,o,a){let i=n.instance,c=new t(i.tokenizer,{callback_function:d=>o(d),skip_prompt:true,skip_special_tokens:true});await i(e,{...a??{},streamer:c});},async unload(n){await n.instance.dispose?.();}}}}}function u(r){if(r===null||typeof r!="object")return r;if(Array.isArray(r))return r.map(u);let t=r;return typeof t.data<"u"&&Array.isArray(t.dims)?{data:Array.from(t.data),dims:t.dims,size:t.size,type:t.type}:Object.fromEntries(Object.entries(t).map(([p,n])=>[p,u(n)]))}export{u as serializeOutput,k as transformersAdapter};//# sourceMappingURL=transformers.js.map
|
|
2
|
+
//# sourceMappingURL=transformers.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/adapters/transformers.ts"],"names":["transformersAdapter","pipeline","TextStreamer","env","_task","config","task","device","onProgress","model","dtype","fileTotals","pipe","info","file","total","totalBytes","size","input","options","result","serializeOutput","onChunk","streamer","text","value","obj","k","v"],"mappings":"AA8BO,SAASA,GAA2C,CACzD,OAAO,CACL,IAAA,CAAM,eAEN,MAAM,MAAA,EAAgC,CAGpC,GAAM,CAAE,QAAA,CAAAC,CAAAA,CAAU,YAAA,CAAAC,CAAAA,CAAc,IAAAC,CAAI,CAAA,CAAI,MAAM,OAAO,2BAA2B,CAAA,CAGhF,OAAAA,CAAAA,CAAI,QAAA,CAAS,KAAK,IAAA,CAAK,KAAA,CAAQ,KAAA,CAExB,CACL,KAAM,cAAA,CAEN,gBAAA,CAAiBC,CAAAA,CAAeC,CAAAA,CAAyC,CACvE,OAAQA,CAAAA,CAAO,iBAAA,EAA4C,GAC7D,EAEA,MAAM,IAAA,CACJC,CAAAA,CACAD,CAAAA,CACAE,EACAC,CAAAA,CACsB,CACtB,IAAMC,CAAAA,CAAQJ,EAAO,KAAA,CACfK,CAAAA,CAASL,CAAAA,CAAO,KAAA,EAAgC,OAEhDM,CAAAA,CAAa,IAAI,GAAA,CAEjBC,CAAAA,CAAO,MAAMX,CAAAA,CAASK,CAAAA,CAAwCG,CAAAA,CAAO,CACzE,OAAQF,CAAAA,GAAW,QAAA,CAAW,QAAA,CAAW,MAAA,CACzC,MAAAG,CAAAA,CACA,iBAAA,CAAoBG,GAAkC,CACpD,GAAIA,EAAK,MAAA,GAAW,UAAA,CAAY,CAC9B,IAAMC,EAAQD,CAAAA,CAAK,IAAA,EAAmB,SAAA,CAChCE,CAAAA,CAASF,EAAK,KAAA,EAAoB,CAAA,CACxCF,CAAAA,CAAW,GAAA,CAAIG,EAAMC,CAAK,CAAA,CAC1BP,CAAAA,CAAW,CACT,OAASK,CAAAA,CAAK,MAAA,EAAqB,CAAA,CACnC,KAAA,CAAOC,EACP,KAAA,CAAAC,CACF,CAAC,EACH,MACSF,CAAAA,CAAK,MAAA,GAAW,UAAA,CACvBL,CAAAA,CAAW,CAAE,MAAA,CAAQ,CAAA,CAAG,KAAA,CAAO,UAAA,CAAY,MAAO,CAAE,CAAC,CAAA,CAE9CK,CAAAA,CAAK,SAAW,MAAA,EACvBL,CAAAA,CAAW,CAAE,MAAA,CAAQ,EAAG,KAAA,CAAO,MAAA,CAAQ,KAAA,CAAO,CAAE,CAAC,EAErD,CACF,CAAC,CAAA,CAEGQ,EAAa,CAAA,CACjB,IAAA,IAAWC,CAAAA,IAAQN,CAAAA,CAAW,QAAO,CAAGK,CAAAA,EAAcC,CAAAA,CAEtD,OAAO,CACL,QAAA,CAAUL,CAAAA,CACV,QAAA,CAAU,IAAA,CAAK,KAAKI,CAAAA,EAAc,IAAA,CAAO,KAAK,CAAA,EAAK,GACrD,CACF,CAAA,CAEA,MAAM,GAAA,CAAIP,CAAAA,CAAoBS,EAAgBC,CAAAA,CAAqC,CACjF,IAAMP,CAAAA,CAAOH,EAAM,QAAA,CACbW,CAAAA,CAAS,KAAA,CAAM,OAAA,CAAQF,CAAK,CAAA,CAC9B,MAAMN,CAAAA,CAAK,GAAIM,EAAmCC,CAAAA,EAAW,EAAE,CAAA,CAC/D,MAAMP,CAAAA,CAAKM,CAAAA,CAAOC,CAAAA,EAAW,EAAE,CAAA,CACnC,OAAOE,CAAAA,CAAgBD,CAAM,CAC/B,CAAA,CAEA,MAAM,MAAA,CACJX,CAAAA,CACAS,EACAI,CAAAA,CACAH,CAAAA,CACe,CACf,IAAMP,EAAOH,CAAAA,CAAM,QAAA,CAEbc,CAAAA,CAAW,IAAIrB,EAAaU,CAAAA,CAAK,SAAA,CAAW,CAChD,iBAAA,CAAoBY,GAAiBF,CAAAA,CAAQE,CAAI,CAAA,CACjD,WAAA,CAAa,KACb,mBAAA,CAAqB,IACvB,CAAC,CAAA,CAED,MAAMZ,CAAAA,CAAKM,CAAAA,CAAO,CAChB,GAAIC,GAAqB,EAAC,CAC1B,QAAA,CAAAI,CACF,CAAC,EACH,CAAA,CAEA,MAAM,MAAA,CAAOd,CAAAA,CAAmC,CAE9C,MADaA,CAAAA,CAAM,QAAA,CACR,OAAA,KACb,CACF,CACF,CACF,CACF,CAMO,SAASY,CAAAA,CAAgBI,CAAAA,CAAyB,CACvD,GAAIA,CAAAA,GAAU,IAAA,EAAQ,OAAOA,CAAAA,EAAU,SACrC,OAAOA,CAAAA,CAET,GAAI,KAAA,CAAM,QAAQA,CAAK,CAAA,CACrB,OAAOA,CAAAA,CAAM,IAAIJ,CAAe,CAAA,CAElC,IAAMK,CAAAA,CAAMD,EAEZ,OAAI,OAAOC,CAAAA,CAAI,IAAA,CAAS,KAAe,KAAA,CAAM,OAAA,CAAQA,CAAAA,CAAI,IAAI,EACpD,CACL,IAAA,CAAM,KAAA,CAAM,IAAA,CAAKA,EAAI,IAAyB,CAAA,CAC9C,IAAA,CAAMA,CAAAA,CAAI,KACV,IAAA,CAAMA,CAAAA,CAAI,IAAA,CACV,IAAA,CAAMA,EAAI,IACZ,CAAA,CAGK,MAAA,CAAO,WAAA,CACZ,OAAO,OAAA,CAAQA,CAAG,CAAA,CAAE,GAAA,CAAI,CAAC,CAACC,CAAAA,CAAGC,CAAC,CAAA,GAAM,CAACD,CAAAA,CAAGN,CAAAA,CAAgBO,CAAC,CAAC,CAAC,CAC7D,CACF","file":"transformers.js","sourcesContent":["import type {\n Device,\n LoadedModel,\n LoadProgressEvent,\n ModelAdapter,\n ModelAdapterFactory,\n} from '../core/types.js';\n\n// eslint-disable-next-line ts/no-explicit-any\ntype AnyPipeline = any;\n\n/**\n * Adapter for @huggingface/transformers v3+.\n *\n * @remarks\n * The adapter is instantiated INSIDE the worker via `create()`.\n * The heavy `@huggingface/transformers` library is dynamically imported\n * inside the worker, keeping the main thread bundle lightweight.\n *\n * @example\n * ```ts\n * import { createPool } from 'inferis-ml';\n * import { transformersAdapter } from 'inferis-ml/adapters/transformers';\n *\n * const pool = await createPool({ adapter: transformersAdapter() });\n * const model = await pool.load<number[][]>('feature-extraction', {\n * model: 'mixedbread-ai/mxbai-embed-xsmall-v1',\n * });\n * ```\n */\nexport function transformersAdapter(): ModelAdapterFactory {\n return {\n name: 'transformers',\n\n async create(): Promise<ModelAdapter> {\n // Dynamic import — runs inside the worker only\n // @ts-expect-error - optional peer dependency, resolved at runtime inside worker\n const { pipeline, TextStreamer, env } = await import('@huggingface/transformers');\n\n // Use ONNX WASM backend with optimizations\n env.backends.onnx.wasm.proxy = false;\n\n return {\n name: 'transformers',\n\n estimateMemoryMB(_task: string, config: Record<string, unknown>): number {\n return (config.estimatedMemoryMB as number | undefined) ?? 100;\n },\n\n async load(\n task: string,\n config: Record<string, unknown>,\n device: Device,\n onProgress: (event: LoadProgressEvent) => void,\n ): Promise<LoadedModel> {\n const model = config.model as string;\n const dtype = (config.dtype as string | undefined) ?? 'fp32';\n\n const fileTotals = new Map<string, number>();\n\n const pipe = await pipeline(task as Parameters<typeof pipeline>[0], model, {\n device: device === 'webgpu' ? 'webgpu' : 'wasm',\n dtype,\n progress_callback: (info: Record<string, unknown>) => {\n if (info.status === 'progress') {\n const file = (info.file as string) ?? 'unknown';\n const total = (info.total as number) ?? 0;\n fileTotals.set(file, total);\n onProgress({\n loaded: (info.loaded as number) ?? 0,\n phase: file,\n total,\n });\n }\n else if (info.status === 'initiate') {\n onProgress({ loaded: 0, phase: 'initiate', total: 0 });\n }\n else if (info.status === 'done') {\n onProgress({ loaded: 1, phase: 'done', total: 1 });\n }\n },\n });\n\n let totalBytes = 0;\n for (const size of fileTotals.values()) totalBytes += size;\n\n return {\n instance: pipe,\n memoryMB: Math.ceil(totalBytes / (1024 * 1024)) || 100,\n };\n },\n\n async run(model: LoadedModel, input: unknown, options?: unknown): Promise<unknown> {\n const pipe = model.instance as AnyPipeline;\n const result = Array.isArray(input)\n ? await pipe(...(input as [unknown, ...unknown[]]), options ?? {})\n : await pipe(input, options ?? {});\n return serializeOutput(result);\n },\n\n async stream(\n model: LoadedModel,\n input: unknown,\n onChunk: (chunk: unknown) => void,\n options?: unknown,\n ): Promise<void> {\n const pipe = model.instance as AnyPipeline;\n\n const streamer = new TextStreamer(pipe.tokenizer, {\n callback_function: (text: string) => onChunk(text),\n skip_prompt: true,\n skip_special_tokens: true,\n });\n\n await pipe(input, {\n ...(options as object ?? {}),\n streamer,\n });\n },\n\n async unload(model: LoadedModel): Promise<void> {\n const pipe = model.instance as AnyPipeline;\n await pipe.dispose?.();\n },\n };\n },\n };\n}\n\n/**\n * Recursively converts Tensor class instances and typed arrays to plain\n * structured-clone-compatible objects so they can cross the worker boundary.\n */\nexport function serializeOutput(value: unknown): unknown {\n if (value === null || typeof value !== 'object')\n return value;\n\n if (Array.isArray(value))\n return value.map(serializeOutput);\n\n const obj = value as Record<string, unknown>;\n\n if (typeof obj.data !== 'undefined' && Array.isArray(obj.dims)) {\n return {\n data: Array.from(obj.data as ArrayLike<number>),\n dims: obj.dims as number[],\n size: obj.size,\n type: obj.type,\n };\n }\n\n return Object.fromEntries(\n Object.entries(obj).map(([k, v]) => [k, serializeOutput(v)]),\n );\n}\n"]}
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
'use strict';function m(){return {name:"web-llm",async create(){let{CreateMLCEngine:c}=await import('@mlc-ai/web-llm');return {name:"web-llm",estimateMemoryMB(n,e){return e.estimatedMemoryMB??2e3},async load(n,e,a,t){let s=e.model;return {instance:await c(s,{initProgressCallback:o=>{t({loaded:o.progress,phase:o.text,total:1});}}),memoryMB:e.estimatedMemoryMB??2e3}},async run(n,e,a){let t=n.instance,s=e;return (await t.chat.completions.create({...a??{},messages:s.messages,stream:false})).choices[0]?.message?.content??""},async stream(n,e,a,t){let s=n.instance,r=e,o=await s.chat.completions.create({...t??{},messages:r.messages,stream:true});for await(let d of o){let i=d.choices[0]?.delta?.content;i&&a(i);}},async unload(n){await n.instance.unload?.();}}}}}exports.webLlmAdapter=m;//# sourceMappingURL=web-llm.cjs.map
|
|
2
|
+
//# sourceMappingURL=web-llm.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/adapters/web-llm.ts"],"names":["webLlmAdapter","CreateMLCEngine","_task","config","_device","onProgress","modelId","info","model","input","options","engine","chatInput","onChunk","asyncChunks","chunk","delta"],"mappings":"aAiDO,SAASA,GAAqC,CACnD,OAAO,CACL,IAAA,CAAM,UAEN,MAAM,MAAA,EAAgC,CAEpC,GAAM,CAAE,eAAA,CAAAC,CAAgB,EAAI,MAAM,OAAO,iBAAiB,CAAA,CAE1D,OAAO,CACL,IAAA,CAAM,UAEN,gBAAA,CAAiBC,CAAAA,CAAeC,EAAyC,CACvE,OAAQA,EAAO,iBAAA,EAA4C,GAC7D,CAAA,CAEA,MAAM,KACJD,CAAAA,CACAC,CAAAA,CACAC,EACAC,CAAAA,CACsB,CACtB,IAAMC,CAAAA,CAAUH,CAAAA,CAAO,KAAA,CAYvB,OAAO,CACL,QAAA,CAXwB,MAAMF,EAAgBK,CAAAA,CAAS,CACvD,qBAAuBC,CAAAA,EAA6C,CAClEF,CAAAA,CAAW,CACT,OAAQE,CAAAA,CAAK,QAAA,CACb,MAAOA,CAAAA,CAAK,IAAA,CACZ,MAAO,CACT,CAAC,EACH,CACF,CAAC,CAAA,CAIC,QAAA,CAAWJ,EAAO,iBAAA,EAA4C,GAChE,CACF,CAAA,CAEA,MAAM,GAAA,CAAIK,CAAAA,CAAoBC,EAAgBC,CAAAA,CAAqC,CACjF,IAAMC,CAAAA,CAASH,CAAAA,CAAM,SACfI,CAAAA,CAAYH,CAAAA,CAMlB,OAAA,CALiB,MAAME,EAAO,IAAA,CAAK,WAAA,CAAY,OAAO,CACpD,GAAID,GAAqB,EAAC,CAC1B,QAAA,CAAUE,CAAAA,CAAU,SACpB,MAAA,CAAQ,KACV,CAAC,CAAA,EACe,OAAA,CAAQ,CAAC,CAAA,EAAG,OAAA,EAAS,OAAA,EAAW,EAClD,EAEA,MAAM,MAAA,CACJJ,EACAC,CAAAA,CACAI,CAAAA,CACAH,EACe,CACf,IAAMC,CAAAA,CAASH,CAAAA,CAAM,SACfI,CAAAA,CAAYH,CAAAA,CAEZK,EAAc,MAAMH,CAAAA,CAAO,KAAK,WAAA,CAAY,MAAA,CAAO,CACvD,GAAID,GAAqB,EAAC,CAC1B,SAAUE,CAAAA,CAAU,QAAA,CACpB,OAAQ,IACV,CAAC,CAAA,CAED,UAAA,IAAiBG,KAASD,CAAAA,CAAa,CACrC,IAAME,CAAAA,CAAQD,CAAAA,CAAM,QAAQ,CAAC,CAAA,EAAG,KAAA,EAAO,OAAA,CACnCC,GACFH,CAAAA,CAAQG,CAAK,EACjB,CACF,CAAA,CAEA,MAAM,MAAA,CAAOR,CAAAA,CAAmC,CAE9C,MADeA,EAAM,QAAA,CACR,MAAA,KACf,CACF,CACF,CACF,CACF","file":"web-llm.cjs","sourcesContent":["import type {\n Device,\n LoadedModel,\n LoadProgressEvent,\n ModelAdapter,\n ModelAdapterFactory,\n} from '../core/types.js';\n\n// eslint-disable-next-line ts/no-explicit-any\ntype MLCEngine = any;\n\ninterface ChatMessage {\n role: 'system' | 'user' | 'assistant';\n content: string;\n}\n\ninterface ChatInput {\n messages: ChatMessage[];\n temperature?: number;\n top_p?: number;\n max_tokens?: number;\n}\n\n/**\n * Adapter for @mlc-ai/web-llm.\n *\n * @remarks\n * Optimized for LLM text generation with WebGPU acceleration.\n * Streaming is natively supported via the OpenAI-compatible chat API.\n *\n * @example\n * ```ts\n * import { createPool } from 'inferis-ml';\n * import { webLlmAdapter } from 'inferis-ml/adapters/web-llm';\n *\n * const pool = await createPool({\n * adapter: webLlmAdapter(),\n * defaultDevice: 'webgpu',\n * maxWorkers: 1,\n * });\n *\n * const llm = await pool.load<string>('text-generation', {\n * model: 'Llama-3.1-8B-Instruct-q4f32_1-MLC',\n * });\n *\n * const stream = llm.stream({ messages: [{ role: 'user', content: 'Hello' }] });\n * for await (const token of stream) { ... }\n * ```\n */\nexport function webLlmAdapter(): ModelAdapterFactory {\n return {\n name: 'web-llm',\n\n async create(): Promise<ModelAdapter> {\n // @ts-expect-error - optional peer dependency, resolved at runtime inside worker\n const { CreateMLCEngine } = await import('@mlc-ai/web-llm');\n\n return {\n name: 'web-llm',\n\n estimateMemoryMB(_task: string, config: Record<string, unknown>): number {\n return (config.estimatedMemoryMB as number | undefined) ?? 2000;\n },\n\n async load(\n _task: string,\n config: Record<string, unknown>,\n _device: Device,\n onProgress: (event: LoadProgressEvent) => void,\n ): Promise<LoadedModel> {\n const modelId = config.model as string;\n\n const engine: MLCEngine = await CreateMLCEngine(modelId, {\n initProgressCallback: (info: { progress: number; text: string }) => {\n onProgress({\n loaded: info.progress,\n phase: info.text,\n total: 1,\n });\n },\n });\n\n return {\n instance: engine,\n memoryMB: (config.estimatedMemoryMB as number | undefined) ?? 2000,\n };\n },\n\n async run(model: LoadedModel, input: unknown, options?: unknown): Promise<unknown> {\n const engine = model.instance as MLCEngine;\n const chatInput = input as ChatInput;\n const response = await engine.chat.completions.create({\n ...(options as object ?? {}),\n messages: chatInput.messages,\n stream: false,\n });\n return response.choices[0]?.message?.content ?? '';\n },\n\n async stream(\n model: LoadedModel,\n input: unknown,\n onChunk: (chunk: unknown) => void,\n options?: unknown,\n ): Promise<void> {\n const engine = model.instance as MLCEngine;\n const chatInput = input as ChatInput;\n\n const asyncChunks = await engine.chat.completions.create({\n ...(options as object ?? {}),\n messages: chatInput.messages,\n stream: true,\n });\n\n for await (const chunk of asyncChunks) {\n const delta = chunk.choices[0]?.delta?.content;\n if (delta)\n onChunk(delta);\n }\n },\n\n async unload(model: LoadedModel): Promise<void> {\n const engine = model.instance as MLCEngine;\n await engine.unload?.();\n },\n };\n },\n };\n}\n"]}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { h as ModelAdapterFactory } from '../types-Y6Ytjh7U.cjs';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Adapter for @mlc-ai/web-llm.
|
|
5
|
+
*
|
|
6
|
+
* @remarks
|
|
7
|
+
* Optimized for LLM text generation with WebGPU acceleration.
|
|
8
|
+
* Streaming is natively supported via the OpenAI-compatible chat API.
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```ts
|
|
12
|
+
* import { createPool } from 'inferis-ml';
|
|
13
|
+
* import { webLlmAdapter } from 'inferis-ml/adapters/web-llm';
|
|
14
|
+
*
|
|
15
|
+
* const pool = await createPool({
|
|
16
|
+
* adapter: webLlmAdapter(),
|
|
17
|
+
* defaultDevice: 'webgpu',
|
|
18
|
+
* maxWorkers: 1,
|
|
19
|
+
* });
|
|
20
|
+
*
|
|
21
|
+
* const llm = await pool.load<string>('text-generation', {
|
|
22
|
+
* model: 'Llama-3.1-8B-Instruct-q4f32_1-MLC',
|
|
23
|
+
* });
|
|
24
|
+
*
|
|
25
|
+
* const stream = llm.stream({ messages: [{ role: 'user', content: 'Hello' }] });
|
|
26
|
+
* for await (const token of stream) { ... }
|
|
27
|
+
* ```
|
|
28
|
+
*/
|
|
29
|
+
declare function webLlmAdapter(): ModelAdapterFactory;
|
|
30
|
+
|
|
31
|
+
export { webLlmAdapter };
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { h as ModelAdapterFactory } from '../types-Y6Ytjh7U.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Adapter for @mlc-ai/web-llm.
|
|
5
|
+
*
|
|
6
|
+
* @remarks
|
|
7
|
+
* Optimized for LLM text generation with WebGPU acceleration.
|
|
8
|
+
* Streaming is natively supported via the OpenAI-compatible chat API.
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```ts
|
|
12
|
+
* import { createPool } from 'inferis-ml';
|
|
13
|
+
* import { webLlmAdapter } from 'inferis-ml/adapters/web-llm';
|
|
14
|
+
*
|
|
15
|
+
* const pool = await createPool({
|
|
16
|
+
* adapter: webLlmAdapter(),
|
|
17
|
+
* defaultDevice: 'webgpu',
|
|
18
|
+
* maxWorkers: 1,
|
|
19
|
+
* });
|
|
20
|
+
*
|
|
21
|
+
* const llm = await pool.load<string>('text-generation', {
|
|
22
|
+
* model: 'Llama-3.1-8B-Instruct-q4f32_1-MLC',
|
|
23
|
+
* });
|
|
24
|
+
*
|
|
25
|
+
* const stream = llm.stream({ messages: [{ role: 'user', content: 'Hello' }] });
|
|
26
|
+
* for await (const token of stream) { ... }
|
|
27
|
+
* ```
|
|
28
|
+
*/
|
|
29
|
+
declare function webLlmAdapter(): ModelAdapterFactory;
|
|
30
|
+
|
|
31
|
+
export { webLlmAdapter };
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
function m(){return {name:"web-llm",async create(){let{CreateMLCEngine:c}=await import('@mlc-ai/web-llm');return {name:"web-llm",estimateMemoryMB(n,e){return e.estimatedMemoryMB??2e3},async load(n,e,a,t){let s=e.model;return {instance:await c(s,{initProgressCallback:o=>{t({loaded:o.progress,phase:o.text,total:1});}}),memoryMB:e.estimatedMemoryMB??2e3}},async run(n,e,a){let t=n.instance,s=e;return (await t.chat.completions.create({...a??{},messages:s.messages,stream:false})).choices[0]?.message?.content??""},async stream(n,e,a,t){let s=n.instance,r=e,o=await s.chat.completions.create({...t??{},messages:r.messages,stream:true});for await(let d of o){let i=d.choices[0]?.delta?.content;i&&a(i);}},async unload(n){await n.instance.unload?.();}}}}}export{m as webLlmAdapter};//# sourceMappingURL=web-llm.js.map
|
|
2
|
+
//# sourceMappingURL=web-llm.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/adapters/web-llm.ts"],"names":["webLlmAdapter","CreateMLCEngine","_task","config","_device","onProgress","modelId","info","model","input","options","engine","chatInput","onChunk","asyncChunks","chunk","delta"],"mappings":"AAiDO,SAASA,GAAqC,CACnD,OAAO,CACL,IAAA,CAAM,UAEN,MAAM,MAAA,EAAgC,CAEpC,GAAM,CAAE,eAAA,CAAAC,CAAgB,EAAI,MAAM,OAAO,iBAAiB,CAAA,CAE1D,OAAO,CACL,IAAA,CAAM,UAEN,gBAAA,CAAiBC,CAAAA,CAAeC,EAAyC,CACvE,OAAQA,EAAO,iBAAA,EAA4C,GAC7D,CAAA,CAEA,MAAM,KACJD,CAAAA,CACAC,CAAAA,CACAC,EACAC,CAAAA,CACsB,CACtB,IAAMC,CAAAA,CAAUH,CAAAA,CAAO,KAAA,CAYvB,OAAO,CACL,QAAA,CAXwB,MAAMF,EAAgBK,CAAAA,CAAS,CACvD,qBAAuBC,CAAAA,EAA6C,CAClEF,CAAAA,CAAW,CACT,OAAQE,CAAAA,CAAK,QAAA,CACb,MAAOA,CAAAA,CAAK,IAAA,CACZ,MAAO,CACT,CAAC,EACH,CACF,CAAC,CAAA,CAIC,QAAA,CAAWJ,EAAO,iBAAA,EAA4C,GAChE,CACF,CAAA,CAEA,MAAM,GAAA,CAAIK,CAAAA,CAAoBC,EAAgBC,CAAAA,CAAqC,CACjF,IAAMC,CAAAA,CAASH,CAAAA,CAAM,SACfI,CAAAA,CAAYH,CAAAA,CAMlB,OAAA,CALiB,MAAME,EAAO,IAAA,CAAK,WAAA,CAAY,OAAO,CACpD,GAAID,GAAqB,EAAC,CAC1B,QAAA,CAAUE,CAAAA,CAAU,SACpB,MAAA,CAAQ,KACV,CAAC,CAAA,EACe,OAAA,CAAQ,CAAC,CAAA,EAAG,OAAA,EAAS,OAAA,EAAW,EAClD,EAEA,MAAM,MAAA,CACJJ,EACAC,CAAAA,CACAI,CAAAA,CACAH,EACe,CACf,IAAMC,CAAAA,CAASH,CAAAA,CAAM,SACfI,CAAAA,CAAYH,CAAAA,CAEZK,EAAc,MAAMH,CAAAA,CAAO,KAAK,WAAA,CAAY,MAAA,CAAO,CACvD,GAAID,GAAqB,EAAC,CAC1B,SAAUE,CAAAA,CAAU,QAAA,CACpB,OAAQ,IACV,CAAC,CAAA,CAED,UAAA,IAAiBG,KAASD,CAAAA,CAAa,CACrC,IAAME,CAAAA,CAAQD,CAAAA,CAAM,QAAQ,CAAC,CAAA,EAAG,KAAA,EAAO,OAAA,CACnCC,GACFH,CAAAA,CAAQG,CAAK,EACjB,CACF,CAAA,CAEA,MAAM,MAAA,CAAOR,CAAAA,CAAmC,CAE9C,MADeA,EAAM,QAAA,CACR,MAAA,KACf,CACF,CACF,CACF,CACF","file":"web-llm.js","sourcesContent":["import type {\n Device,\n LoadedModel,\n LoadProgressEvent,\n ModelAdapter,\n ModelAdapterFactory,\n} from '../core/types.js';\n\n// eslint-disable-next-line ts/no-explicit-any\ntype MLCEngine = any;\n\ninterface ChatMessage {\n role: 'system' | 'user' | 'assistant';\n content: string;\n}\n\ninterface ChatInput {\n messages: ChatMessage[];\n temperature?: number;\n top_p?: number;\n max_tokens?: number;\n}\n\n/**\n * Adapter for @mlc-ai/web-llm.\n *\n * @remarks\n * Optimized for LLM text generation with WebGPU acceleration.\n * Streaming is natively supported via the OpenAI-compatible chat API.\n *\n * @example\n * ```ts\n * import { createPool } from 'inferis-ml';\n * import { webLlmAdapter } from 'inferis-ml/adapters/web-llm';\n *\n * const pool = await createPool({\n * adapter: webLlmAdapter(),\n * defaultDevice: 'webgpu',\n * maxWorkers: 1,\n * });\n *\n * const llm = await pool.load<string>('text-generation', {\n * model: 'Llama-3.1-8B-Instruct-q4f32_1-MLC',\n * });\n *\n * const stream = llm.stream({ messages: [{ role: 'user', content: 'Hello' }] });\n * for await (const token of stream) { ... }\n * ```\n */\nexport function webLlmAdapter(): ModelAdapterFactory {\n return {\n name: 'web-llm',\n\n async create(): Promise<ModelAdapter> {\n // @ts-expect-error - optional peer dependency, resolved at runtime inside worker\n const { CreateMLCEngine } = await import('@mlc-ai/web-llm');\n\n return {\n name: 'web-llm',\n\n estimateMemoryMB(_task: string, config: Record<string, unknown>): number {\n return (config.estimatedMemoryMB as number | undefined) ?? 2000;\n },\n\n async load(\n _task: string,\n config: Record<string, unknown>,\n _device: Device,\n onProgress: (event: LoadProgressEvent) => void,\n ): Promise<LoadedModel> {\n const modelId = config.model as string;\n\n const engine: MLCEngine = await CreateMLCEngine(modelId, {\n initProgressCallback: (info: { progress: number; text: string }) => {\n onProgress({\n loaded: info.progress,\n phase: info.text,\n total: 1,\n });\n },\n });\n\n return {\n instance: engine,\n memoryMB: (config.estimatedMemoryMB as number | undefined) ?? 2000,\n };\n },\n\n async run(model: LoadedModel, input: unknown, options?: unknown): Promise<unknown> {\n const engine = model.instance as MLCEngine;\n const chatInput = input as ChatInput;\n const response = await engine.chat.completions.create({\n ...(options as object ?? {}),\n messages: chatInput.messages,\n stream: false,\n });\n return response.choices[0]?.message?.content ?? '';\n },\n\n async stream(\n model: LoadedModel,\n input: unknown,\n onChunk: (chunk: unknown) => void,\n options?: unknown,\n ): Promise<void> {\n const engine = model.instance as MLCEngine;\n const chatInput = input as ChatInput;\n\n const asyncChunks = await engine.chat.completions.create({\n ...(options as object ?? {}),\n messages: chatInput.messages,\n stream: true,\n });\n\n for await (const chunk of asyncChunks) {\n const delta = chunk.choices[0]?.delta?.content;\n if (delta)\n onChunk(delta);\n }\n },\n\n async unload(model: LoadedModel): Promise<void> {\n const engine = model.instance as MLCEngine;\n await engine.unload?.();\n },\n };\n },\n };\n}\n"]}
|
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
'use strict';var _documentCurrentScript=typeof document!=='undefined'?document.currentScript:null;var E="inferis:leader",S=class o{role="unknown";listeners=new Set;abortController=null;async start(){if(!this.isSupported())return this.setRole("leader"),"leader";this.abortController=new AbortController;let e=false;return new Promise(t=>{let r=setTimeout(()=>{e||(e=true,this.setRole("leader"),t("leader"));},5e3);navigator.locks.request(E,{signal:this.abortController.signal},async()=>{e||(e=true,clearTimeout(r),this.setRole("leader"),t("leader")),await this.holdLock();}).catch(n=>{n.name;}),navigator.locks.query().then(n=>{if(e)return;let s=n.held?.some(i=>i.name===E)??false,d=n.pending?.some(i=>i.name===E)??false;(s||d)&&(e=true,clearTimeout(r),this.setRole("follower"),t("follower"));}).catch(()=>{});})}holdLock(){return new Promise(e=>{this.abortController&&this.abortController.signal.addEventListener("abort",()=>e(),{once:true});})}stop(){this.abortController?.abort(),this.abortController=null;}onRoleChange(e){return this.listeners.add(e),()=>this.listeners.delete(e)}get currentRole(){return this.role}get isLeader(){return this.role==="leader"}setRole(e){this.role=e;for(let t of this.listeners)try{t(e);}catch{}}static isSupported(){return typeof navigator<"u"&&"locks"in navigator}isSupported(){return o.isSupported()}};var q=class{port;listeners=new Set;constructor(e){let t=new SharedWorker(e,{type:"module"});this.port=t.port,t.onerror=r=>{console.error("[SharedWorkerBridge] worker error:",r.message);},this.port.onmessage=r=>{for(let n of this.listeners)try{n(r.data);}catch{}},this.port.onmessageerror=r=>{console.error("[SharedWorkerBridge] port deserialization error:",r);},this.port.start();}postMessage(e,t){t?.length?this.port.postMessage(e,t):this.port.postMessage(e);}on(e){return this.listeners.add(e),()=>this.listeners.delete(e)}disconnect(){this.listeners.clear(),this.port.close();}static isSupported(){return typeof SharedWorker<"u"}};var $="inferis:bus",j=new Set(["leader-elected","leader-gone","request","response","stream-chunk","stream-end","stream-error"]);function H(o){return typeof o=="object"&&o!==null&&"type"in o&&typeof o.type=="string"&&j.has(o.type)}var R=class{channel;listeners=new Set;constructor(){this.channel=new BroadcastChannel($),this.channel.onmessage=e=>{if(H(e.data))for(let t of this.listeners)try{t(e.data);}catch{}};}send(e){this.channel.postMessage(e);}on(e){return this.listeners.add(e),()=>this.listeners.delete(e)}close(){this.listeners.clear(),this.channel.close();}static isSupported(){return typeof BroadcastChannel<"u"}};var m=class{maxMB;usedMB=0;lruOrder=[];modelSizes=new Map;constructor(e){if(e<=0)throw new RangeError("maxMB must be > 0");this.maxMB=e;}get totalMB(){return this.maxMB}get allocatedMB(){return this.usedMB}get availableMB(){return this.maxMB-this.usedMB}planEviction(e){if(e>this.maxMB)return null;if(e<=this.availableMB)return [];let t=[],r=0;for(let n of this.lruOrder){if(r>=e-this.availableMB)break;let s=this.modelSizes.get(n);s!==void 0&&(t.push(n),r+=s);}return r>=e-this.availableMB?t:null}allocate(e,t){this.modelSizes.has(e)&&this.release(e),this.modelSizes.set(e,t),this.usedMB+=t,this.touch(e);}release(e){let t=this.modelSizes.get(e);if(t===void 0)return;this.modelSizes.delete(e),this.usedMB=Math.max(0,this.usedMB-t);let r=this.lruOrder.indexOf(e);r!==-1&&this.lruOrder.splice(r,1);}touch(e){let t=this.lruOrder.indexOf(e);t!==-1&&this.lruOrder.splice(t,1),this.lruOrder.push(e);}fits(e){return e<=this.availableMB}get lruList(){return this.lruOrder}};var K=new Uint8Array([0,97,115,109,1,0,0,0,1,5,1,96,0,1,123,3,2,1,0,10,10,1,8,0,65,0,253,15,0,0,11]),G=new Uint8Array([0,97,115,109,1,0,0,0,1,4,1,96,0,0,3,2,1,0,5,4,1,3,1,1,10,11,1,9,0,254,1,2,0,65,0,11]);function Y(){return typeof WebAssembly>"u"?{simd:false,supported:false,threads:false}:{simd:WebAssembly.validate(K),supported:true,threads:WebAssembly.validate(G)}}async function Z(){if(typeof navigator>"u"||!("gpu"in navigator))return {adapter:null,isFallback:false,limits:null,supported:false};let o=navigator.gpu;if(o==null)return {adapter:null,isFallback:false,limits:null,supported:false};try{let e=await o.requestAdapter();if(!e)return {adapter:null,isFallback:!1,limits:null,supported:!1};let t=await e.requestAdapterInfo?.()??{},r=e.isFallbackAdapter??!1;return {adapter:{architecture:t.architecture??"",description:t.description??"",device:t.device??"",vendor:t.vendor??""},isFallback:r,limits:{maxBufferSize:e.limits?.maxBufferSize??0,maxStorageBufferBindingSize:e.limits?.maxStorageBufferBindingSize??0},supported:!0}}catch{return {adapter:null,isFallback:false,limits:null,supported:false}}}function V(){return typeof SharedWorker<"u"}function X(){return typeof BroadcastChannel<"u"}function J(){return typeof navigator<"u"&&"locks"in navigator}var b=null;async function A(){if(b)return b;let[o,e]=await Promise.all([Z(),Promise.resolve(Y())]);return b=Object.freeze({broadcastChannel:X(),hardwareConcurrency:typeof navigator<"u"?navigator.hardwareConcurrency??1:1,sharedWorker:V(),wasm:e,webgpu:o,webLocks:J()}),b}function Q(){b=null;}var u=class o extends Error{code;constructor(e,t){super(e),this.name="InferisError",this.code=t;}static fromSerialized(e){let t=new o(e.message,e.code??"UNKNOWN");return t.name=e.name,e.stack&&(t.stack=e.stack),t}serialize(){let e={code:this.code,message:this.message,name:this.name};return this.stack!==void 0?{...e,stack:this.stack}:e}},L=class extends u{modelId;constructor(e,t){super(t,"MODEL_LOAD_ERROR"),this.name="ModelLoadError",this.modelId=e;}},f=class extends u{modelId;constructor(e,t){super(`Model "${e}" is not ready (state: ${t})`,"MODEL_NOT_READY"),this.name="ModelNotReadyError",this.modelId=e;}},C=class extends u{modelId;constructor(e){super(`Model "${e}" has been disposed`,"MODEL_DISPOSED"),this.name="ModelDisposedError",this.modelId=e;}},W=class extends u{modelId;constructor(e,t){super(t,"INFERENCE_ERROR"),this.name="InferenceError",this.modelId=e;}},v=class extends u{requestedMB;budgetMB;constructor(e,t){super(`Cannot load model: requested ${e}MB exceeds memory budget ${t}MB`,"BUDGET_EXCEEDED"),this.name="BudgetExceededError",this.budgetMB=t,this.requestedMB=e;}},M=class extends u{reqId;constructor(e,t){super(`Task "${e}" timed out after ${t}ms`,"TASK_TIMEOUT"),this.name="TaskTimeoutError",this.reqId=e;}},P=class extends u{workerId;constructor(e,t){super(t,"WORKER_ERROR"),this.name="WorkerError",this.workerId=e;}},O=class extends u{modelId;reason;constructor(e,t){super(`GPU device lost for model "${e}": ${t}`,"DEVICE_LOST"),this.name="DeviceLostError",this.modelId=e,this.reason=t;}},w=class extends u{constructor(e,t){super(`Invalid state transition: ${e} -> ${t}`,"INVALID_STATE_TRANSITION"),this.name="InvalidStateTransitionError";}};var D={disposed:new Set,error:new Set(["loading","disposed"]),idle:new Set(["loading","disposed"]),inferring:new Set(["inferring","ready","error","unloading"]),loading:new Set(["ready","error","disposed"]),ready:new Set(["inferring","unloading","error"]),unloading:new Set(["disposed"])};function p(o,e){if(!D[o].has(e))throw new w(o,e);return e}function k(o,e){return D[o].has(e)}function I(o){return o==="ready"}function ee(o){return o==="disposed"}var y=class{models=new Map;static makeId(e,t){return `${e}:${t}`}register(e,t,r){if(this.models.has(e))return this.models.get(e);let n={config:r,device:"wasm",id:e,memoryMB:0,state:"idle",stateListeners:new Set,task:t,workerId:null};return this.models.set(e,n),n}get(e){return this.models.get(e)}has(e){return this.models.has(e)}setState(e,t){let r=this.models.get(e);if(r){r.state=t;for(let n of r.stateListeners)try{n(t);}catch{}}}setLoaded(e,t,r,n){let s=this.models.get(e);s&&(s.device=t,s.memoryMB=r,s.workerId=n);}setUnloaded(e){let t=this.models.get(e);t&&(t.workerId=null,t.memoryMB=0);}subscribe(e,t){let r=this.models.get(e);return r?(r.stateListeners.add(t),()=>r.stateListeners.delete(t)):()=>{}}delete(e){let t=this.models.get(e);t&&(t.stateListeners.clear(),this.models.delete(e));}byState(e){return [...this.models.values()].filter(t=>t.state===e)}byWorker(e){return [...this.models.values()].filter(t=>t.workerId===e)}get size(){return this.models.size}get entries(){return this.models}};var te={high:2,low:0,normal:1},T=class{queues=new Map([["high",[]],["low",[]],["normal",[]]]);workerLoad=new Map;workerModels=new Map;addWorker(e){this.workerLoad.set(e,0),this.workerModels.set(e,new Set);}removeWorker(e){this.workerLoad.delete(e),this.workerModels.delete(e);}notifyModelLoaded(e,t){this.workerModels.get(e)?.add(t);}notifyModelUnloaded(e,t){this.workerModels.get(e)?.delete(t);}enqueue(e,t){let r=this.pickWorker(e.modelId);r!==null?this.dispatch(e,r):this.queues.get(e.priority).push(e);}notifyTaskComplete(e,t){let r=this.workerLoad.get(e);r!==void 0&&this.workerLoad.set(e,Math.max(0,r-1)),this.drainNext(e);}drainNext(e){for(let t of ["high","normal","low"]){let r=this.queues.get(t);if(r.length===0)continue;let n=this.findAffinityTask(e,r),s=n!==-1?n:0,[d]=r.splice(s,1);this.dispatch(d,e);return}}pickWorker(e,t=4){let r=null,n=null,s=1/0;for(let[d,i]of this.workerLoad)i>=t||(this.workerModels.get(d)?.has(e)&&(r===null||i<(this.workerLoad.get(r)??1/0))&&(r=d),i<s&&(s=i,n=d));return r??n}dispatch(e,t){let r=this.workerLoad.get(t)??0;this.workerLoad.set(t,r+1),e.execute(t);}findAffinityTask(e,t){let r=this.workerModels.get(e);return r?t.findIndex(n=>r.has(n.modelId)):-1}get queueDepth(){let e=0;for(let t of this.queues.values())e+=t.length;return e}workerLoadFor(e){return this.workerLoad.get(e)??0}get workerIds(){return [...this.workerLoad.keys()]}reset(){this.workerLoad.clear(),this.workerModels.clear();}static priorityWeight(e){return te[e]}};var re=typeof navigator<"u"?Math.max(1,(navigator.hardwareConcurrency??2)-1):1,ne=2048,se=12e4,N={high:0,low:2,normal:1},x=class o{workers=new Map;registry=new y;scheduler=new T;budget;inferenceWaiters=new Map;pending=new Map;config;caps;resolvedDevice;nextWorkerId=0;nextReqId=0;terminated=false;constructor(e,t){this.config=e,this.caps=t,this.budget=new m(e.maxMemoryMB),this.resolvedDevice=this.resolveDevice(e.defaultDevice,t);}static async create(e){let t=await A(),r={adapter:e.adapter,crossTab:e.crossTab??false,defaultDevice:e.defaultDevice??"auto",maxMemoryMB:e.maxMemoryMB??ne,maxWorkers:e.maxWorkers??re,taskTimeout:e.taskTimeout??se,workerUrl:e.workerUrl??new URL("./worker/dedicated.worker.js",(typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.cjs', document.baseURI).href)))},n=new o(r,t);return await n.spawnWorkers(),n}async spawnWorkers(){for(let e=0;e<this.config.maxWorkers;e++)await this.spawnWorker();}async spawnWorker(){let e=this.nextWorkerId++,t=new Worker(this.config.workerUrl,{type:"module"});t.onmessage=n=>{this.handleWorkerMessage(e,n.data);},t.onerror=n=>{console.error(`[inferis] Worker ${e} error:`,n.message),this.handleWorkerCrash(e);};let r={id:e,worker:t};return this.workers.set(e,r),this.scheduler.addWorker(e),t.postMessage({device:this.resolvedDevice,type:"__init__"}),r}handleWorkerMessage(e,t){switch(t.type){case "load-progress":{let r=this.reqIdToModelId.get(t.reqId);if(r){let n=this.registry.get(r);n?.config.onProgress&&n.config.onProgress(t.progress);}break}case "load-complete":{let r=this.reqIdToModelId.get(t.reqId);if(r){let s=this.registry.get(r);s&&(this.registry.setLoaded(r,this.resolvedDevice,t.memoryMB,e),this.budget.allocate(r,t.memoryMB),this.scheduler.notifyModelLoaded(e,r),this.registry.setState(r,p(s.state,"ready"))),this.reqIdToModelId.delete(t.reqId);}let n=this.pending.get(t.reqId);n&&(this.clearPendingTimeout(t.reqId),this.pending.delete(t.reqId),n.resolve(void 0)),this.scheduler.notifyTaskComplete(e);break}case "load-error":{if(t.reqId==="__init__"){console.error("[inferis] Worker init failed:",t.error);break}let r=this.reqIdToModelId.get(t.reqId);if(r){let s=this.registry.get(r);s&&this.registry.setState(r,p(s.state,"error")),this.reqIdToModelId.delete(t.reqId);}let n=this.pending.get(t.reqId);n&&(this.clearPendingTimeout(t.reqId),this.pending.delete(t.reqId),n.reject(u.fromSerialized(t.error))),this.scheduler.notifyTaskComplete(e);break}case "unload-complete":{let r=this.reqIdToModelId.get(t.reqId);r&&(this.budget.release(r),this.scheduler.notifyModelUnloaded(e,r),this.registry.setUnloaded(r),this.registry.setState(r,"disposed"),this.registry.delete(r),this.reqIdToModelId.delete(t.reqId));let n=this.pending.get(t.reqId);n&&(this.clearPendingTimeout(t.reqId),this.pending.delete(t.reqId),n.resolve(void 0));break}case "unload-error":{let r=this.reqIdToModelId.get(t.reqId);r&&(this.budget.release(r),this.scheduler.notifyModelUnloaded(e,r),this.registry.setUnloaded(r),this.registry.setState(r,"error"),this.registry.delete(r),this.reqIdToModelId.delete(t.reqId));let n=this.pending.get(t.reqId);n&&(this.clearPendingTimeout(t.reqId),this.pending.delete(t.reqId),n.reject(u.fromSerialized(t.error)));break}case "run-result":{let r=this.pending.get(t.reqId);r&&(this.clearPendingTimeout(t.reqId),this.pending.delete(t.reqId),r.resolve(t.output));let n=this.reqIdToModelId.get(t.reqId);if(n){let s=this.registry.get(n);s&&s.state==="inferring"&&this.registry.setState(n,p(s.state,"ready")),this.reqIdToModelId.delete(t.reqId);}this.scheduler.notifyTaskComplete(e),n&&this.drainInferenceWaiter(n);break}case "run-error":{let r=this.pending.get(t.reqId);r&&(this.clearPendingTimeout(t.reqId),this.pending.delete(t.reqId),r.reject(u.fromSerialized(t.error)));let n=this.reqIdToModelId.get(t.reqId);if(n){let s=this.registry.get(n);s&&s.state==="inferring"&&this.registry.setState(n,p(s.state,"ready")),this.reqIdToModelId.delete(t.reqId);}this.scheduler.notifyTaskComplete(e),n&&this.drainInferenceWaiter(n);break}case "stream-chunk":{this.pending.get(t.reqId)?.streamController?.enqueue(t.chunk);break}case "stream-end":{let r=this.pending.get(t.reqId);r&&(this.clearPendingTimeout(t.reqId),this.pending.delete(t.reqId),r.streamController?.close());let n=this.reqIdToModelId.get(t.reqId);if(n){let s=this.registry.get(n);s&&s.state==="inferring"&&this.registry.setState(n,p(s.state,"ready")),this.reqIdToModelId.delete(t.reqId);}this.scheduler.notifyTaskComplete(e),n&&this.drainInferenceWaiter(n);break}case "stream-error":{let r=this.pending.get(t.reqId);r&&(this.clearPendingTimeout(t.reqId),this.pending.delete(t.reqId),r.streamController?.error(u.fromSerialized(t.error)));let n=this.reqIdToModelId.get(t.reqId);if(n){let s=this.registry.get(n);s&&s.state==="inferring"&&this.registry.setState(n,p(s.state,"ready")),this.reqIdToModelId.delete(t.reqId);}this.scheduler.notifyTaskComplete(e),n&&this.drainInferenceWaiter(n);break}case "device-lost":{let r=this.registry.get(t.modelId);r&&k(r.state,"error")&&this.registry.setState(t.modelId,"error");break}}}reqIdToModelId=new Map;async load(e,t){if(this.terminated)throw new u("Pool has been terminated","POOL_TERMINATED");let r=y.makeId(e,t.model),n=this.registry.get(r);if(n){let a=n.state;if(a==="ready"||a==="loading"||a==="inferring")return this.makeHandle(r)}let s=this.config.adapter,d=t.estimatedMemoryMB??100;if(!t.estimatedMemoryMB){let a=await s.create().catch(()=>null);try{d=a?.estimateMemoryMB(e,t)??100;}finally{await a?.unload({instance:null,memoryMB:0}).catch(()=>{});}}let i=this.budget.planEviction(d);if(i===null)throw new v(d,this.config.maxMemoryMB);for(let a of i)await this.disposeModel(a);return this.registry.register(r,e,t),this.registry.setState(r,p("idle","loading")),await new Promise((a,c)=>{let l=`${this.nextReqId++}`;this.reqIdToModelId.set(l,r),this.pending.set(l,{reject:c,resolve:a}),this.setRequestTimeout(l,this.config.taskTimeout),this.scheduler.enqueue({enqueuedAt:Date.now(),execute:h=>{let g=this.workers.get(h);if(!g){c(new u("Worker not found","WORKER_NOT_FOUND"));return}try{let{estimatedMemoryMB:_,onProgress:ae,...F}=t;g.worker.postMessage({config:F,device:this.resolvedDevice,modelId:r,reqId:l,task:e,type:"load-model"});}catch{this.clearPendingTimeout(l),this.pending.delete(l),this.reqIdToModelId.delete(l),c(new u("Input cannot be serialized for worker transfer","SERIALIZATION_ERROR"));}},modelId:r,priority:"high",reject:c,reqId:l},this.registry.entries);}),this.makeHandle(r)}async disposeModel(e){let t=this.registry.get(e);if(!t||t.state==="disposed"||!k(t.state,"unloading"))return;if(this.registry.setState(e,"unloading"),t.workerId===null){this.budget.release(e),this.registry.delete(e);return}let r=this.workers.get(t.workerId);if(!r){this.budget.release(e),this.registry.delete(e);return}let n=`${this.nextReqId++}`;this.reqIdToModelId.set(n,e),await new Promise((s,d)=>{this.pending.set(n,{reject:d,resolve:s}),this.setRequestTimeout(n,1e4);try{r.worker.postMessage({modelId:e,reqId:n,type:"unload-model"});}catch{this.clearPendingTimeout(n),this.pending.delete(n),this.reqIdToModelId.delete(n),d(new u("Input cannot be serialized for worker transfer","SERIALIZATION_ERROR"));}});}makeHandle(e){let t=this.registry,r=this.disposeModel.bind(this),n=this.runInference.bind(this),s=this.streamInference.bind(this);return {get device(){return t.get(e)?.device??"wasm"},get id(){return e},get memoryMB(){return t.get(e)?.memoryMB??0},get state(){return t.get(e)?.state??"disposed"},dispose:()=>r(e),onStateChange(d){return t.subscribe(e,d)},run(d,i){return n(e,d,i)},stream(d,i){return s(e,d,i)}}}waitForReady(e,t,r){return new Promise((n,s)=>{if(r?.aborted){s(Object.assign(new Error("AbortError"),{name:"AbortError"}));return}if(this.registry.get(e)?.state==="loading"){let l=()=>{h(),s(Object.assign(new Error("AbortError"),{name:"AbortError"}));},h=this.registry.subscribe(e,g=>{g==="ready"?(h(),r?.removeEventListener("abort",l),n()):g!=="inferring"&&g!=="loading"&&(h(),r?.removeEventListener("abort",l),s(new f(e,g)));});r?.addEventListener("abort",l,{once:true});return}let i={enqueuedAt:Date.now(),onAbort:void 0,priority:t,reject:s,resolve:n,signal:r,unsub:void 0},a=this.inferenceWaiters.get(e)??[];a.push(i),a.sort((l,h)=>N[l.priority]-N[h.priority]||l.enqueuedAt-h.enqueuedAt),this.inferenceWaiters.set(e,a);let c=this.registry.subscribe(e,l=>{if(l!=="inferring"&&l!=="loading"&&l!=="ready"){i.unsub=void 0;let h=this.inferenceWaiters.get(e);if(h){let g=h.indexOf(i);g>=0&&h.splice(g,1);}c(),i.signal&&i.onAbort&&i.signal.removeEventListener("abort",i.onAbort),s(new f(e,l));}});if(i.unsub=c,r){let l=()=>{let h=this.inferenceWaiters.get(e);if(h){let g=h.indexOf(i);g>=0&&h.splice(g,1);}i.unsub?.(),i.unsub=void 0,s(Object.assign(new Error("AbortError"),{name:"AbortError"}));};i.onAbort=l,r.addEventListener("abort",l,{once:true});}})}drainInferenceWaiter(e){let t=this.inferenceWaiters.get(e);if(!t||t.length===0)return;let[r]=t.splice(0,1);r.unsub?.(),r.signal&&r.onAbort&&r.signal.removeEventListener("abort",r.onAbort),r.resolve();}async runInference(e,t,r){if(!this.registry.get(e))throw new u(`Model "${e}" not found`,"MODEL_NOT_FOUND");for(;!I(this.registry.get(e).state);){let s=this.registry.get(e).state;if(s==="inferring"||s==="loading")await this.waitForReady(e,r?.priority??"normal",r?.signal);else throw new f(e,s)}if(r?.signal?.aborted)throw Object.assign(new Error("AbortError"),{name:"AbortError"});let n=this.registry.get(e);return this.registry.setState(e,p(n.state,"inferring")),this.budget.touch(e),new Promise((s,d)=>{let i=`${this.nextReqId++}`;this.reqIdToModelId.set(i,e);let a=()=>{(n.workerId!==null?this.workers.get(n.workerId):void 0)?.worker.postMessage({reqId:i,type:"abort"});};r?.signal?.addEventListener("abort",a,{once:true}),this.pending.set(i,{onAbort:a,reject:c=>{r?.signal?.removeEventListener("abort",a),d(c);},resolve:c=>{r?.signal?.removeEventListener("abort",a),s(c);},signal:r?.signal}),this.setRequestTimeout(i,this.config.taskTimeout),this.scheduler.enqueue({enqueuedAt:Date.now(),execute:c=>{let l=this.workers.get(c);if(l)try{let{priority:h,signal:g,..._}=r??{};l.worker.postMessage({input:t,modelId:e,options:_,reqId:i,type:"run"});}catch{this.clearPendingTimeout(i),this.pending.delete(i),this.reqIdToModelId.delete(i),r?.signal?.removeEventListener("abort",a),d(new u("Input cannot be serialized for worker transfer","SERIALIZATION_ERROR"));}},modelId:e,priority:r?.priority??"normal",reject:c=>{r?.signal?.removeEventListener("abort",a),d(c);},reqId:i},this.registry.entries);})}streamInference(e,t,r){if(!this.registry.get(e))return new ReadableStream({start(s){s.error(new u(`Model "${e}" not found`,"MODEL_NOT_FOUND"));}});let n="";return new ReadableStream({start:async s=>{for(;!I(this.registry.get(e).state);){let a=this.registry.get(e).state;if(a==="inferring"||a==="loading")try{await this.waitForReady(e,r?.priority??"normal",r?.signal);}catch(c){s.error(c);return}else {s.error(new f(e,a));return}}let d=this.registry.get(e);this.registry.setState(e,p(d.state,"inferring")),this.budget.touch(e),n=`${this.nextReqId++}`,this.reqIdToModelId.set(n,e);let i=()=>{let a=this.registry.get(e);(a?.workerId!=null?this.workers.get(a.workerId):void 0)?.worker.postMessage({reqId:n,type:"abort"});};r?.signal?.addEventListener("abort",i,{once:true}),this.pending.set(n,{onAbort:i,reject:a=>{r?.signal?.removeEventListener("abort",i),s.error(a);},resolve:()=>{},signal:r?.signal,streamController:s}),this.setRequestTimeout(n,this.config.taskTimeout),this.scheduler.enqueue({enqueuedAt:Date.now(),execute:a=>{let c=this.workers.get(a);if(c)try{let{priority:l,signal:h,...g}=r??{};c.worker.postMessage({input:t,modelId:e,options:g,reqId:n,type:"run-stream"});}catch{this.clearPendingTimeout(n),this.pending.delete(n),this.reqIdToModelId.delete(n),r?.signal?.removeEventListener("abort",i),s.error(new u("Input cannot be serialized for worker transfer","SERIALIZATION_ERROR"));}},modelId:e,priority:r?.priority??"normal",reject:a=>{r?.signal?.removeEventListener("abort",i),s.error(a);},reqId:n},this.registry.entries);},cancel:()=>{if(!n)return;let s=this.registry.get(e);(s?.workerId!=null?this.workers.get(s.workerId):void 0)?.worker.postMessage({reqId:n,type:"abort"});let i=this.pending.get(n);if(i){i.signal&&i.onAbort&&i.signal.removeEventListener("abort",i.onAbort);try{i.streamController?.close();}catch{}this.clearPendingTimeout(n),this.pending.delete(n),this.reqIdToModelId.delete(n);}}})}setRequestTimeout(e,t){let r=setTimeout(()=>{let s=this.pending.get(e);s&&(s.signal&&s.onAbort&&s.signal.removeEventListener("abort",s.onAbort),this.pending.delete(e),this.reqIdToModelId.delete(e),s.reject(new M(e,t)));},t),n=this.pending.get(e);n&&(n.timeoutId=r);}clearPendingTimeout(e){let t=this.pending.get(e);t?.timeoutId!==void 0&&clearTimeout(t.timeoutId);}handleWorkerCrash(e){this.scheduler.removeWorker(e),this.workers.delete(e);let t=new Set;for(let r of this.registry.byWorker(e))k(r.state,"error")&&this.registry.setState(r.id,"error"),t.add(r.id);for(let[r,n]of this.pending){let s=this.reqIdToModelId.get(r);s&&t.has(s)&&(this.clearPendingTimeout(r),n.signal&&n.onAbort&&n.signal.removeEventListener("abort",n.onAbort),this.pending.delete(r),this.reqIdToModelId.delete(r),n.reject(new u("Worker crashed","WORKER_CRASHED")));}}async terminate(){this.terminated=true;for(let[e,t]of this.pending)clearTimeout(t.timeoutId),t.reject(new u("Pool terminated","POOL_TERMINATED")),this.pending.delete(e),this.reqIdToModelId.delete(e);for(let e of this.workers.values())e.worker.terminate();this.workers.clear(),this.scheduler.reset();}capabilities(){return this.caps}resolveDevice(e,t){return e==="auto"?t.webgpu.supported&&!t.webgpu.isFallback?"webgpu":"wasm":e}get _registry(){return this.registry}get _budget(){return this.budget}get _scheduler(){return this.scheduler}};async function ie(o){return x.create(o)}var B=class{listeners=new Set;on(e){return this.listeners.add(e),()=>this.listeners.delete(e)}emit(e){for(let t of this.listeners)try{t(e);}catch{}}clear(){this.listeners.clear();}get listenerCount(){return this.listeners.size}};function z(o){return Symbol.asyncIterator in o?o:{[Symbol.asyncIterator](){let e=o.getReader();return {async next(){let{done:t,value:r}=await e.read();return t?{done:true,value:void 0}:{done:false,value:r}},async return(){return await e.cancel(),{done:true,value:void 0}},async throw(t){throw await e.cancel(t),t}}}}}async function U(o){let e=[];for await(let t of z(o))e.push(t);return e}async function oe(o){return (await U(o)).join("")}exports.BudgetExceededError=v;exports.DeviceLostError=O;exports.InferenceError=W;exports.InferisError=u;exports.InvalidStateTransitionError=w;exports.LeaderElection=S;exports.MemoryBudget=m;exports.ModelDisposedError=C;exports.ModelLoadError=L;exports.ModelNotReadyError=f;exports.ModelRegistry=y;exports.ProgressEmitter=B;exports.Scheduler=T;exports.SharedWorkerBridge=q;exports.TabChannel=R;exports.TaskTimeoutError=M;exports.WorkerError=P;exports.WorkerPool=x;exports.canTransition=k;exports.clearCapabilitiesCache=Q;exports.collectStream=U;exports.collectStreamText=oe;exports.createPool=ie;exports.detectCapabilities=A;exports.isAcceptingInference=I;exports.isTerminal=ee;exports.readableToAsyncIter=z;exports.transition=p;//# sourceMappingURL=index.cjs.map
|
|
2
|
+
//# sourceMappingURL=index.cjs.map
|