@inbrowser/model 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +50 -0
- package/LICENSE +21 -0
- package/README.md +63 -0
- package/dist/adapters/agent.d.ts +19 -0
- package/dist/adapters/agent.d.ts.map +1 -0
- package/dist/adapters/agent.js +96 -0
- package/dist/adapters/agent.js.map +1 -0
- package/dist/adapters/relay.d.ts +17 -0
- package/dist/adapters/relay.d.ts.map +1 -0
- package/dist/adapters/relay.js +90 -0
- package/dist/adapters/relay.js.map +1 -0
- package/dist/engine.d.ts +35 -0
- package/dist/engine.d.ts.map +1 -0
- package/dist/engine.js +353 -0
- package/dist/engine.js.map +1 -0
- package/dist/index.d.ts +21 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +20 -0
- package/dist/index.js.map +1 -0
- package/dist/parse-tool-calls.d.ts +49 -0
- package/dist/parse-tool-calls.d.ts.map +1 -0
- package/dist/parse-tool-calls.js +115 -0
- package/dist/parse-tool-calls.js.map +1 -0
- package/dist/presets.d.ts +93 -0
- package/dist/presets.d.ts.map +1 -0
- package/dist/presets.js +191 -0
- package/dist/presets.js.map +1 -0
- package/dist/think.d.ts +57 -0
- package/dist/think.d.ts.map +1 -0
- package/dist/think.js +138 -0
- package/dist/think.js.map +1 -0
- package/dist/types.d.ts +291 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +17 -0
- package/dist/types.js.map +1 -0
- package/dist/worker.d.ts +62 -0
- package/dist/worker.d.ts.map +1 -0
- package/dist/worker.js +493 -0
- package/dist/worker.js.map +1 -0
- package/package.json +65 -0
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Public types for `@inbrowser/model`.
|
|
3
|
+
*
|
|
4
|
+
* Vocabulary anchor:
|
|
5
|
+
* - `ModelRef` — bare locator (HF Hub repo id + revision).
|
|
6
|
+
* - `ModelPreset` — locator + static config (dtype, backend,
|
|
7
|
+
* capabilities, optional chat template).
|
|
8
|
+
* - `Engine` — runtime object owning a loaded model.
|
|
9
|
+
* Spread a preset into `createEngine` to get one.
|
|
10
|
+
*
|
|
11
|
+
* The Engine speaks a narrow `EngineEvent` vocabulary. Adapters in
|
|
12
|
+
* `./adapters/*` translate to the relay's `InferenceEvent` or the
|
|
13
|
+
* agent's `ChatEvent` shape. Cloud-only concepts (cost, signatures)
|
|
14
|
+
* are deliberately absent here.
|
|
15
|
+
*/
|
|
16
|
+
/**
|
|
17
|
+
* HF Hub repo id + optional revision. Pin a revision for
|
|
18
|
+
* reproducibility — `main` drifts.
|
|
19
|
+
*/
|
|
20
|
+
export interface ModelRef {
|
|
21
|
+
modelId: string;
|
|
22
|
+
revision?: string;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* ONNX Runtime Web execution backend.
|
|
26
|
+
*
|
|
27
|
+
* - `'webgpu'` — WebGPU compute pipeline. Required for Gemma 4 perf.
|
|
28
|
+
* - `'wasm'` — SIMD CPU fallback. Always available, much slower.
|
|
29
|
+
* - `'auto'` — Probe `navigator.gpu`; fall back to wasm if absent.
|
|
30
|
+
*/
|
|
31
|
+
export type Backend = 'auto' | 'webgpu' | 'wasm';
|
|
32
|
+
/**
|
|
33
|
+
* Weight/activation precision selection.
|
|
34
|
+
*
|
|
35
|
+
* - `q4f16` — 4-bit int weights, fp16 activations. Recommended
|
|
36
|
+
* default for Gemma 4 on WebGPU.
|
|
37
|
+
* - `q8` — 8-bit int weights. Larger, sometimes higher quality.
|
|
38
|
+
* - `fp16` — half precision throughout.
|
|
39
|
+
* - `fp32` — full precision (rarely useful in-browser).
|
|
40
|
+
*/
|
|
41
|
+
export type Dtype = 'q4f16' | 'q8' | 'fp16' | 'fp32';
|
|
42
|
+
/**
|
|
43
|
+
* Static, pre-load capability declaration. Lives on `ModelPreset`
|
|
44
|
+
* so consumers can interrogate before paying load cost.
|
|
45
|
+
*/
|
|
46
|
+
export interface EngineCapabilities {
|
|
47
|
+
supportsTools: boolean;
|
|
48
|
+
supportsVision: boolean;
|
|
49
|
+
supportsAudio: boolean;
|
|
50
|
+
/** Context window in tokens. */
|
|
51
|
+
contextWindow: number;
|
|
52
|
+
/** Whether the model emits thinking traces when prompted. */
|
|
53
|
+
supportsThinking: boolean;
|
|
54
|
+
/**
|
|
55
|
+
* When `supportsThinking` is true and the consumer enables thinking
|
|
56
|
+
* via `GenerateOpts.enableThinking`, the model emits reasoning
|
|
57
|
+
* inside these tags. Models vary:
|
|
58
|
+
*
|
|
59
|
+
* - DeepSeek R1 / R1 Distill: `<think>…</think>` (literal text).
|
|
60
|
+
* The model emits both open and close tags during generation.
|
|
61
|
+
* - Gemma 4 / Gemma 3n: only `<channel|>` is emitted (close).
|
|
62
|
+
* The chat template's `add_generation_prompt` pre-fills the
|
|
63
|
+
* `<|channel>thought\n` open marker into the prompt, so
|
|
64
|
+
* generation starts inside thinking. Configure with
|
|
65
|
+
* `implicitOpen: true`. The Gemma end-of-turn marker `<turn|>`
|
|
66
|
+
* leaks when `skip_special_tokens: false` is set; strip via
|
|
67
|
+
* `stripTokens: ['<turn|>']`.
|
|
68
|
+
*
|
|
69
|
+
* Consumers thread these into `splitThinking()` to route reasoning
|
|
70
|
+
* to a dedicated UI surface. The full shape matches
|
|
71
|
+
* `ThinkingSplitOpts` so the preset can be spread directly.
|
|
72
|
+
*/
|
|
73
|
+
thinkingTags?: {
|
|
74
|
+
openTag: string;
|
|
75
|
+
closeTag: string;
|
|
76
|
+
/** When true, the stream starts inside thinking (no open tag in output). */
|
|
77
|
+
implicitOpen?: boolean;
|
|
78
|
+
/** Literal substrings to strip from `token` events (structural leak tokens). */
|
|
79
|
+
stripTokens?: ReadonlyArray<string>;
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* A fully-specified model configuration. Spread into `createEngine`
|
|
84
|
+
* along with optional hooks. Authored via `definePreset` for
|
|
85
|
+
* compile-time completeness checks; community presets can be exported
|
|
86
|
+
* the same way.
|
|
87
|
+
*/
|
|
88
|
+
export interface ModelPreset {
|
|
89
|
+
model: ModelRef;
|
|
90
|
+
dtype: Dtype;
|
|
91
|
+
backend: Backend;
|
|
92
|
+
capabilities: EngineCapabilities;
|
|
93
|
+
/**
|
|
94
|
+
* Optional override of the chat template. The default uses the
|
|
95
|
+
* tokenizer's bundled template (`apply_chat_template`); override
|
|
96
|
+
* only when a model ships a broken or missing template.
|
|
97
|
+
*/
|
|
98
|
+
chatTemplate?: (messages: ReadonlyArray<EngineMessage>) => string;
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Granular progress for the three observable phases of cold start.
|
|
102
|
+
*
|
|
103
|
+
* - `fetch` — weights flowing from HF Hub (or `weightsBaseUrl`) into
|
|
104
|
+
* the browser Cache API. Cached after first run.
|
|
105
|
+
* - `init` — ONNX Runtime compiling the graph for `backend`.
|
|
106
|
+
* Per-page-load (not cached across reloads today).
|
|
107
|
+
* - `warmup` — first forward pass primes WebGPU pipelines + kernel
|
|
108
|
+
* caches. Once-per-engine-instance.
|
|
109
|
+
* - `ready` — terminal phase; safe to `generate`.
|
|
110
|
+
*/
|
|
111
|
+
export type LoadProgress = {
|
|
112
|
+
phase: 'fetch';
|
|
113
|
+
file: string;
|
|
114
|
+
loadedBytes: number;
|
|
115
|
+
totalBytes: number;
|
|
116
|
+
} | {
|
|
117
|
+
phase: 'init';
|
|
118
|
+
backend: Backend;
|
|
119
|
+
} | {
|
|
120
|
+
phase: 'warmup';
|
|
121
|
+
tokensGenerated: number;
|
|
122
|
+
} | {
|
|
123
|
+
phase: 'ready';
|
|
124
|
+
};
|
|
125
|
+
export type EngineState = 'idle' | 'loading' | 'ready' | 'error' | 'disposed';
|
|
126
|
+
/**
|
|
127
|
+
* Inline media for multimodal models. Gemma 4 accepts audio; future
|
|
128
|
+
* presets may set `supportsVision: true` and accept images.
|
|
129
|
+
*/
|
|
130
|
+
export type MediaPart = {
|
|
131
|
+
kind: 'image';
|
|
132
|
+
data: Blob | ArrayBuffer;
|
|
133
|
+
mimeType: string;
|
|
134
|
+
} | {
|
|
135
|
+
kind: 'audio';
|
|
136
|
+
data: Blob | ArrayBuffer;
|
|
137
|
+
mimeType: string;
|
|
138
|
+
};
|
|
139
|
+
/**
|
|
140
|
+
* Engine-side chat message. Symmetric with agent's `NormalizedMessage`
|
|
141
|
+
* minus tool fields — the engine itself is toolless. Tool-use
|
|
142
|
+
* polyfilling lives upstream in `@inbrowser/agent`.
|
|
143
|
+
*/
|
|
144
|
+
export interface EngineMessage {
|
|
145
|
+
role: 'system' | 'user' | 'assistant';
|
|
146
|
+
text: string;
|
|
147
|
+
media?: ReadonlyArray<MediaPart>;
|
|
148
|
+
}
|
|
149
|
+
export interface GenerateOpts {
|
|
150
|
+
maxNewTokens?: number;
|
|
151
|
+
temperature?: number;
|
|
152
|
+
topP?: number;
|
|
153
|
+
topK?: number;
|
|
154
|
+
/** Stop sequences enforced post-tokenization. */
|
|
155
|
+
stop?: ReadonlyArray<string>;
|
|
156
|
+
/** Caller-side cancellation. Aborting stops the decode loop. */
|
|
157
|
+
signal?: AbortSignal;
|
|
158
|
+
/**
|
|
159
|
+
* Tool declarations advertised to the model. Only honored when the
|
|
160
|
+
* active preset declares `capabilities.supportsTools: true`. When
|
|
161
|
+
* provided, the engine threads them through the tokenizer's chat
|
|
162
|
+
* template (`apply_chat_template({ messages, tools })`) and wraps
|
|
163
|
+
* the output stream with a tool-call parser so `kind: 'tool_call'`
|
|
164
|
+
* events are emitted when the model invokes a tool.
|
|
165
|
+
*/
|
|
166
|
+
tools?: ReadonlyArray<ToolSpec>;
|
|
167
|
+
/**
|
|
168
|
+
* Opt into the model's thinking mode. Only honored when the active
|
|
169
|
+
* preset declares `capabilities.supportsThinking: true`. When set:
|
|
170
|
+
*
|
|
171
|
+
* 1. The engine passes `enable_thinking: true` to
|
|
172
|
+
* `apply_chat_template` so the model's template renders its
|
|
173
|
+
* thinking-mode preamble.
|
|
174
|
+
* 2. When the preset also declares `capabilities.thinkingTags`,
|
|
175
|
+
* the engine sets `skip_special_tokens: false` on the
|
|
176
|
+
* TextStreamer so the channel markers reach the output stream
|
|
177
|
+
* (Gemma 4 family uses special tokens for this; DeepSeek uses
|
|
178
|
+
* literal text).
|
|
179
|
+
*
|
|
180
|
+
* The reasoning text is still emitted as `kind: 'token'` events
|
|
181
|
+
* from the engine. Consumers wrap with `splitThinking()` (using
|
|
182
|
+
* `capabilities.thinkingTags` when present) to route it to a
|
|
183
|
+
* dedicated UI surface.
|
|
184
|
+
*/
|
|
185
|
+
enableThinking?: boolean;
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Tool declaration shape. Matches the OAI function-calling format
|
|
189
|
+
* that most modern chat templates (Qwen 2/3, DeepSeek R1, Llama 3.2+,
|
|
190
|
+
* Mistral v0.3+) accept directly.
|
|
191
|
+
*/
|
|
192
|
+
export interface ToolSpec {
|
|
193
|
+
type: 'function';
|
|
194
|
+
function: {
|
|
195
|
+
name: string;
|
|
196
|
+
description: string;
|
|
197
|
+
parameters: unknown;
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* Engine's narrow event vocabulary. Adapters translate to the wider
|
|
202
|
+
* shapes consumers expect (`InferenceEvent`, `ChatEvent`). No cost,
|
|
203
|
+
* no thoughtSignature, no cloud-specific extension fields.
|
|
204
|
+
*
|
|
205
|
+
* - `token` — decoded text per decode step.
|
|
206
|
+
* - `thinking` — content the model emitted inside a reasoning-tag
|
|
207
|
+
* wrapper (e.g., `<think>…</think>` for DeepSeek R1).
|
|
208
|
+
* The engine itself never produces this kind; it's
|
|
209
|
+
* emitted by `splitThinking()` (see `./think.ts`) when
|
|
210
|
+
* a consumer wraps the engine's stream. The variant
|
|
211
|
+
* lives on `EngineEvent` so a single `switch (kind)`
|
|
212
|
+
* handles both wrapped and raw streams.
|
|
213
|
+
* - `usage` — terminal accounting, once per stream.
|
|
214
|
+
* - `error` — `recoverable` distinguishes retryable transients
|
|
215
|
+
* (decode hiccup) from terminal failures (OOM, dispose).
|
|
216
|
+
*/
|
|
217
|
+
export type EngineEvent = {
|
|
218
|
+
kind: 'token';
|
|
219
|
+
text: string;
|
|
220
|
+
} | {
|
|
221
|
+
kind: 'thinking';
|
|
222
|
+
text: string;
|
|
223
|
+
} | {
|
|
224
|
+
kind: 'tool_call';
|
|
225
|
+
/** Locally-generated id; the engine doesn't get one from the model. */
|
|
226
|
+
id: string;
|
|
227
|
+
/** Tool name as the model wrote it (may not be in the registered set — caller validates). */
|
|
228
|
+
name: string;
|
|
229
|
+
/** Parsed args. Plain object when JSON parsing succeeds; `{ _raw: string }` when malformed. */
|
|
230
|
+
args: unknown;
|
|
231
|
+
} | {
|
|
232
|
+
kind: 'usage';
|
|
233
|
+
promptTokens: number;
|
|
234
|
+
outputTokens: number;
|
|
235
|
+
decodeMs: number;
|
|
236
|
+
} | {
|
|
237
|
+
kind: 'error';
|
|
238
|
+
message: string;
|
|
239
|
+
recoverable: boolean;
|
|
240
|
+
};
|
|
241
|
+
/**
|
|
242
|
+
* Lifecycle event types `engine.on()` subscribes to.
|
|
243
|
+
*/
|
|
244
|
+
export interface EngineEventMap {
|
|
245
|
+
state: EngineState;
|
|
246
|
+
load: LoadProgress;
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* The runtime engine. Holds the loaded model + tokenizer/processor.
|
|
250
|
+
* One engine per model instance; spawn multiple to run different
|
|
251
|
+
* models in parallel (memory permitting).
|
|
252
|
+
*/
|
|
253
|
+
export interface Engine {
|
|
254
|
+
readonly model: ModelRef;
|
|
255
|
+
readonly state: EngineState;
|
|
256
|
+
/** Static capabilities — equal to `preset.capabilities` post-load. */
|
|
257
|
+
readonly capabilities: EngineCapabilities;
|
|
258
|
+
/** Idempotent. Resolves once state is `'ready'`. */
|
|
259
|
+
ensureReady(): Promise<void>;
|
|
260
|
+
on<K extends keyof EngineEventMap>(event: K, handler: (value: EngineEventMap[K]) => void): () => void;
|
|
261
|
+
generate(messages: ReadonlyArray<EngineMessage>, opts?: GenerateOpts): AsyncIterable<EngineEvent>;
|
|
262
|
+
/** Release GPU buffers + tokenizer state. Engine unusable after. */
|
|
263
|
+
dispose(): Promise<void>;
|
|
264
|
+
}
|
|
265
|
+
/**
|
|
266
|
+
* Non-preset construction options. Spread alongside a `ModelPreset`
|
|
267
|
+
* into `createEngine`.
|
|
268
|
+
*/
|
|
269
|
+
export interface EngineHooks {
|
|
270
|
+
/**
|
|
271
|
+
* Base URL for weight fetches. Defaults to huggingface.co. Set for
|
|
272
|
+
* self-hosted mirrors or offline bundles; the engine appends
|
|
273
|
+
* `{modelId}/{file}`.
|
|
274
|
+
*/
|
|
275
|
+
weightsBaseUrl?: string;
|
|
276
|
+
/**
|
|
277
|
+
* Minimum reported GPU memory in MB. If the device reports less,
|
|
278
|
+
* `ensureReady()` rejects with `InsufficientMemoryError` rather than
|
|
279
|
+
* crashing mid-load.
|
|
280
|
+
*/
|
|
281
|
+
minGpuMemoryMb?: number;
|
|
282
|
+
onLoadProgress?: (p: LoadProgress) => void;
|
|
283
|
+
}
|
|
284
|
+
/**
|
|
285
|
+
* Full argument shape for `createEngine`. A complete `ModelPreset`
|
|
286
|
+
* is required; hooks are optional.
|
|
287
|
+
*
|
|
288
|
+
* createEngine({ ...gemma4_E2B, onLoadProgress: console.log });
|
|
289
|
+
*/
|
|
290
|
+
export type CreateEngineOpts = ModelPreset & EngineHooks;
|
|
291
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAEH;;;GAGG;AACH,MAAM,WAAW,QAAQ;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED;;;;;;GAMG;AACH,MAAM,MAAM,OAAO,GAAG,MAAM,GAAG,QAAQ,GAAG,MAAM,CAAC;AAEjD;;;;;;;;GAQG;AACH,MAAM,MAAM,KAAK,GAAG,OAAO,GAAG,IAAI,GAAG,MAAM,GAAG,MAAM,CAAC;AAErD;;;GAGG;AACH,MAAM,WAAW,kBAAkB;IACjC,aAAa,EAAE,OAAO,CAAC;IACvB,cAAc,EAAE,OAAO,CAAC;IACxB,aAAa,EAAE,OAAO,CAAC;IACvB,gCAAgC;IAChC,aAAa,EAAE,MAAM,CAAC;IACtB,6DAA6D;IAC7D,gBAAgB,EAAE,OAAO,CAAC;IAC1B;;;;;;;;;;;;;;;;;;OAkBG;IACH,YAAY,CAAC,EAAE;QACb,OAAO,EAAE,MAAM,CAAC;QAChB,QAAQ,EAAE,MAAM,CAAC;QACjB,4EAA4E;QAC5E,YAAY,CAAC,EAAE,OAAO,CAAC;QACvB,gFAAgF;QAChF,WAAW,CAAC,EAAE,aAAa,CAAC,MAAM,CAAC,CAAC;KACrC,CAAC;CACH;AAED;;;;;GAKG;AACH,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,QAAQ,CAAC;IAChB,KAAK,EAAE,KAAK,CAAC;IACb,OAAO,EAAE,OAAO,CAAC;IACjB,YAAY,EAAE,kBAAkB,CAAC;IACjC;;;;OAIG;IACH,YAAY,CAAC,EAAE,CAAC,QAAQ,EAAE,aAAa,CAAC,aAAa,CAAC,KAAK,MAAM,CAAC;CACnE;AAED;;;;;;;;;;GAUG;AACH,MAAM,MAAM,YAAY,GACpB;IAAE,KAAK,EAAE,OAAO,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,CAAA;CAAE,GACzE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,OAAO,CAAA;CAAE,GACnC;IAAE,KAAK,EAAE,QAAQ,CAAC;IAAC,eAAe,EAAE,MAAM,CAAA;CAAE,GAC5C;IAAE,KAAK,EAAE,OAAO,CAAA;CAAE,CAAC;AAEvB,MAAM,MAAM,WAAW,GAAG,MAAM,GAAG,SAAS,GAAG,OAAO,GAAG,OAAO,GAAG,UAAU,CAAC;AAE9E;;;GAGG;AACH,MAAM,MAAM,SAAS,GACjB;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,IAAI,EAAE,IAAI,GAAG,WAAW,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAA;CAAE,GAC7D;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,IAAI,EAAE,IAAI,GAAG,WAAW,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAA;CAAE,CAAC;AAElE;;;;GAIG;AACH,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,CAAC;IACtC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,aAAa,CAAC,SAAS,CAAC,CAAC;CAClC;AAED,MAAM,WAAW,YAAY;IAC3B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,iDAAiD;IACjD,IAAI,CAAC,EAAE,aAAa,CAAC,MAAM,CAAC,CAAC;IAC7B,gEAAgE;IAChE,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB;;;;;;;OAOG;IACH,KAAK,CAAC,EAAE,aAAa,CAAC,QAAQ,CAAC,CAAC;IAChC;;;;;;;;;;;;;;;;;OAiBG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B;AAED;;;;GAIG;AACH,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,UAAU,CAAC;IACjB,QAAQ,EAAE;QACR,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,CAAC;QACpB,UAAU,EAAE,OAAO,CAAC;KACrB,CAAC;CACH;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,MAAM,WAAW,GACnB;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,GAC/B;IAAE,IAAI,EAAE,UAAU,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,GAClC;IACE,IAAI,EAAE,WAAW,CAAC;IAClB,uEAAuE;IACvE,EAAE,EAAE,MAAM,CAAC;IACX,6FAA6F;IAC7F,IAAI,EAAE,MAAM,CAAC;IACb,+FAA+F;IAC/F,IAAI,EAAE,OAAO,CAAC;CACf,GACD;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAA;CAAE,GAC/E;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,OAAO,CAAA;CAAE,CAAC;AAE7D;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE,WAAW,CAAC;IACnB,IAAI,EAAE,YAAY,CAAC;CACpB;AAED;;;;GAIG;AACH,MAAM,WAAW,MAAM;IACrB,QAAQ,CAAC,KAAK,EAAE,QAAQ,CAAC;IACzB,QAAQ,CAAC,KAAK,EAAE,WAAW,CAAC;IAC5B,sEAAsE;IACtE,QAAQ,CAAC,YAAY,EAAE,kBAAkB,CAAC;IAE1C,oDAAoD;IACpD,WAAW,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IAE7B,EAAE,CAAC,CAAC,SAAS,MAAM,cAAc,EAC/B,KAAK,EAAE,CAAC,EACR,OAAO,EAAE,CAAC,KAAK,EAAE,cAAc,CAAC,CAAC,CAAC,KAAK,IAAI,GAC1C,MAAM,IAAI,CAAC;IAEd,QAAQ,CAAC,QAAQ,EAAE,aAAa,CAAC,aAAa,CAAC,EAAE,IAAI,CAAC,EAAE,YAAY,GAAG,aAAa,CAAC,WAAW,CAAC,CAAC;IAElG,oEAAoE;IACpE,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC1B;AAED;;;GAGG;AACH,MAAM,WAAW,WAAW;IAC1B;;;;OAIG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB;;;;OAIG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,cAAc,CAAC,EAAE,CAAC,CAAC,EAAE,YAAY,KAAK,IAAI,CAAC;CAC5C;AAED;;;;;GAKG;AACH,MAAM,MAAM,gBAAgB,GAAG,WAAW,GAAG,WAAW,CAAC"}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Public types for `@inbrowser/model`.
|
|
3
|
+
*
|
|
4
|
+
* Vocabulary anchor:
|
|
5
|
+
* - `ModelRef` — bare locator (HF Hub repo id + revision).
|
|
6
|
+
* - `ModelPreset` — locator + static config (dtype, backend,
|
|
7
|
+
* capabilities, optional chat template).
|
|
8
|
+
* - `Engine` — runtime object owning a loaded model.
|
|
9
|
+
* Spread a preset into `createEngine` to get one.
|
|
10
|
+
*
|
|
11
|
+
* The Engine speaks a narrow `EngineEvent` vocabulary. Adapters in
|
|
12
|
+
* `./adapters/*` translate to the relay's `InferenceEvent` or the
|
|
13
|
+
* agent's `ChatEvent` shape. Cloud-only concepts (cost, signatures)
|
|
14
|
+
* are deliberately absent here.
|
|
15
|
+
*/
|
|
16
|
+
export {};
|
|
17
|
+
//# sourceMappingURL=types.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG"}
|
package/dist/worker.d.ts
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Worker transport — host an `Engine` inside a Web Worker; expose
|
|
3
|
+
* the same `Engine` shape on the main thread via a postMessage RPC.
|
|
4
|
+
*
|
|
5
|
+
* The key invariant: `connectWorkerEngine` returns a value that
|
|
6
|
+
* satisfies the same `Engine` interface as `createEngine`. Anything
|
|
7
|
+
* downstream — adapters, agent runtime, UI — cannot tell whether it
|
|
8
|
+
* holds a direct engine or a remote stub.
|
|
9
|
+
*
|
|
10
|
+
* RPC framing is sequence-numbered; `generate` opens a per-call
|
|
11
|
+
* stream id and the worker side fans `EngineEvent`s back tagged
|
|
12
|
+
* with that id. Backpressure today is implicit (postMessage queue);
|
|
13
|
+
* a future revision may add explicit acks.
|
|
14
|
+
*
|
|
15
|
+
* Wire shape:
|
|
16
|
+
*
|
|
17
|
+
* main → worker (ClientFrame)
|
|
18
|
+
* init — once, on connect; carries CreateEngineOpts
|
|
19
|
+
* ensure-ready — request the engine to load (seq-tracked ack/reject)
|
|
20
|
+
* generate-start — open a stream; events flow back tagged with seq
|
|
21
|
+
* generate-abort — request a stream to terminate (best effort)
|
|
22
|
+
* dispose — tear down engine + transport
|
|
23
|
+
*
|
|
24
|
+
* worker → main (HostFrame)
|
|
25
|
+
* init-ack — handshake reply (model + capabilities echoed)
|
|
26
|
+
* state — engine state transitions
|
|
27
|
+
* load — LoadProgress events
|
|
28
|
+
* ack | reject — per-seq reply for ensure-ready / dispose
|
|
29
|
+
* event — a single EngineEvent for a generate stream
|
|
30
|
+
* event-end — terminal marker for a generate stream
|
|
31
|
+
*/
|
|
32
|
+
import type { CreateEngineOpts, Engine } from './types.js';
|
|
33
|
+
export interface WorkerHostHandle {
|
|
34
|
+
dispose(): Promise<void>;
|
|
35
|
+
}
|
|
36
|
+
export interface HostEngineInWorkerOpts {
|
|
37
|
+
/**
|
|
38
|
+
* Factory the host uses to build the real engine after receiving
|
|
39
|
+
* `init` from the main thread. Default: `createEngine` from this
|
|
40
|
+
* package. Override only for testing.
|
|
41
|
+
*/
|
|
42
|
+
factory?: (opts: CreateEngineOpts) => Engine;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Install the worker-side RPC. Call from inside your worker entry:
|
|
46
|
+
*
|
|
47
|
+
* import { hostEngineInWorker } from '@inbrowser/model/worker';
|
|
48
|
+
* hostEngineInWorker(self);
|
|
49
|
+
*/
|
|
50
|
+
export declare function hostEngineInWorker(workerScope: DedicatedWorkerGlobalScope, opts?: HostEngineInWorkerOpts): WorkerHostHandle;
|
|
51
|
+
export interface ConnectWorkerEngineOpts {
|
|
52
|
+
worker: Worker;
|
|
53
|
+
engine: CreateEngineOpts;
|
|
54
|
+
/** Handshake timeout for the initial capabilities exchange. Default 10s. */
|
|
55
|
+
handshakeTimeoutMs?: number;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Connect to a worker that has called `hostEngineInWorker(self)`.
|
|
59
|
+
* Returns an `Engine` whose calls are RPC'd over postMessage.
|
|
60
|
+
*/
|
|
61
|
+
export declare function connectWorkerEngine(opts: ConnectWorkerEngineOpts): Engine;
|
|
62
|
+
//# sourceMappingURL=worker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"worker.d.ts","sourceRoot":"","sources":["../src/worker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAGH,OAAO,KAAK,EACV,gBAAgB,EAChB,MAAM,EASP,MAAM,YAAY,CAAC;AAiCpB,MAAM,WAAW,gBAAgB;IAC/B,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC1B;AAED,MAAM,WAAW,sBAAsB;IACrC;;;;OAIG;IACH,OAAO,CAAC,EAAE,CAAC,IAAI,EAAE,gBAAgB,KAAK,MAAM,CAAC;CAC9C;AAED;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAChC,WAAW,EAAE,0BAA0B,EACvC,IAAI,CAAC,EAAE,sBAAsB,GAC5B,gBAAgB,CA6KlB;AAID,MAAM,WAAW,uBAAuB;IACtC,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,gBAAgB,CAAC;IACzB,4EAA4E;IAC5E,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAED;;;GAGG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,uBAAuB,GAAG,MAAM,CAyRzE"}
|