@inbrowser/model 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/AGENTS.md +44 -18
  2. package/README.md +128 -20
  3. package/dist/contract.d.ts +104 -0
  4. package/dist/contract.d.ts.map +1 -0
  5. package/dist/contract.js +13 -0
  6. package/dist/contract.js.map +1 -0
  7. package/dist/engine-client.d.ts +44 -0
  8. package/dist/engine-client.d.ts.map +1 -0
  9. package/dist/engine-client.js +136 -0
  10. package/dist/engine-client.js.map +1 -0
  11. package/dist/engine.d.ts.map +1 -1
  12. package/dist/engine.js +20 -10
  13. package/dist/engine.js.map +1 -1
  14. package/dist/index.d.ts +23 -8
  15. package/dist/index.d.ts.map +1 -1
  16. package/dist/index.js +44 -8
  17. package/dist/index.js.map +1 -1
  18. package/dist/presets.d.ts +10 -0
  19. package/dist/presets.d.ts.map +1 -1
  20. package/dist/presets.js +21 -0
  21. package/dist/presets.js.map +1 -1
  22. package/dist/providers/anthropic.d.ts +45 -0
  23. package/dist/providers/anthropic.d.ts.map +1 -0
  24. package/dist/providers/anthropic.js +217 -0
  25. package/dist/providers/anthropic.js.map +1 -0
  26. package/dist/providers/claude-cli.d.ts +135 -0
  27. package/dist/providers/claude-cli.d.ts.map +1 -0
  28. package/dist/providers/claude-cli.js +270 -0
  29. package/dist/providers/claude-cli.js.map +1 -0
  30. package/dist/providers/claude-code.d.ts +188 -0
  31. package/dist/providers/claude-code.d.ts.map +1 -0
  32. package/dist/providers/claude-code.js +182 -0
  33. package/dist/providers/claude-code.js.map +1 -0
  34. package/dist/providers/gemini.d.ts +32 -0
  35. package/dist/providers/gemini.d.ts.map +1 -0
  36. package/dist/providers/gemini.js +441 -0
  37. package/dist/providers/gemini.js.map +1 -0
  38. package/dist/providers/llama-server.d.ts +15 -0
  39. package/dist/providers/llama-server.d.ts.map +1 -0
  40. package/dist/providers/llama-server.js +51 -0
  41. package/dist/providers/llama-server.js.map +1 -0
  42. package/dist/providers/oai-compat.d.ts +100 -0
  43. package/dist/providers/oai-compat.d.ts.map +1 -0
  44. package/dist/providers/oai-compat.js +206 -0
  45. package/dist/providers/oai-compat.js.map +1 -0
  46. package/dist/providers/ollama.d.ts +15 -0
  47. package/dist/providers/ollama.d.ts.map +1 -0
  48. package/dist/providers/ollama.js +51 -0
  49. package/dist/providers/ollama.js.map +1 -0
  50. package/dist/providers/openrouter-oauth.d.ts +67 -0
  51. package/dist/providers/openrouter-oauth.d.ts.map +1 -0
  52. package/dist/providers/openrouter-oauth.js +84 -0
  53. package/dist/providers/openrouter-oauth.js.map +1 -0
  54. package/dist/providers/openrouter.d.ts +13 -0
  55. package/dist/providers/openrouter.d.ts.map +1 -0
  56. package/dist/providers/openrouter.js +218 -0
  57. package/dist/providers/openrouter.js.map +1 -0
  58. package/dist/providers/types.d.ts +50 -0
  59. package/dist/providers/types.d.ts.map +1 -0
  60. package/dist/providers/types.js +2 -0
  61. package/dist/providers/types.js.map +1 -0
  62. package/dist/sse.d.ts +20 -0
  63. package/dist/sse.d.ts.map +1 -0
  64. package/dist/sse.js +47 -0
  65. package/dist/sse.js.map +1 -0
  66. package/dist/types.d.ts +2 -13
  67. package/dist/types.d.ts.map +1 -1
  68. package/dist/with-retry.d.ts +27 -0
  69. package/dist/with-retry.d.ts.map +1 -0
  70. package/dist/with-retry.js +55 -0
  71. package/dist/with-retry.js.map +1 -0
  72. package/dist/worker.d.ts +1 -1
  73. package/dist/worker.js +1 -1
  74. package/package.json +9 -29
  75. package/dist/adapters/agent.d.ts +0 -19
  76. package/dist/adapters/agent.d.ts.map +0 -1
  77. package/dist/adapters/agent.js +0 -96
  78. package/dist/adapters/agent.js.map +0 -1
  79. package/dist/adapters/relay.d.ts +0 -17
  80. package/dist/adapters/relay.d.ts.map +0 -1
  81. package/dist/adapters/relay.js +0 -90
  82. package/dist/adapters/relay.js.map +0 -1
package/AGENTS.md CHANGED
@@ -2,19 +2,36 @@
2
2
 
3
3
  ## Purpose
4
4
 
5
- On-device LLM inference. Wraps `@huggingface/transformers` behind a
6
- narrow `Engine` surface so a local Gemma 4 model is a drop-in
7
- replacement for a cloud provider when consumed through the adapter
8
- subpaths.
5
+ The model layer. Two halves:
6
+
7
+ 1. **Contract + cloud providers.** `src/contract.ts` defines the one
8
+ `ModelClient` contract the whole stack shares (relay + agent both
9
+ consume it). `src/providers/*` are the cloud providers (Gemini,
10
+ OpenRouter, Anthropic, Ollama, Claude-CLI, Claude-Code), each a
11
+ factory returning a `ModelClient`. `src/with-retry.ts` decorates one.
12
+ 2. **On-device engine.** Wraps `@huggingface/transformers` behind a
13
+ narrow `Engine` surface (`src/engine.ts`) that streams `EngineEvent`.
14
+
15
+ The engine is also a `ModelClient`, via `createEngineModelClient`
16
+ (`src/engine-client.ts`; exported from the root + the
17
+ `@inbrowser/model/engine-client` subpath). It wraps an `Engine`,
18
+ widening the engine's `EngineEvent` stream to the contract's
19
+ `ModelEvent`. The old engine→relay/agent adapter subpaths were removed;
20
+ this single wrapper replaces them. (The site's in-browser docs-chat
21
+ toggle that drives a local engine through the agent is a separate,
22
+ still-forthcoming piece — the adapter is the building block it needs.)
9
23
 
10
24
  ## Layering invariants
11
25
 
12
- - `src/types.ts` is the canonical type surface. Every other file in
13
- the package imports types from here.
14
- - `src/engine.ts` is the only module that holds runtime state.
15
- - `src/adapters/relay.ts` is the only place that imports from
16
- `@inbrowser/relay`. `src/adapters/agent.ts` is the only place that
17
- imports from `@inbrowser/agent`. The root barrel must not.
26
+ - `src/contract.ts` is type-only (zero runtime imports) so importing the
27
+ contract never pulls in the engine or `@huggingface/transformers`.
28
+ - `src/types.ts` is the canonical engine type surface. Engine-side files
29
+ import engine types from here.
30
+ - `src/engine.ts` is the only module that holds runtime model state.
31
+ - Each `src/providers/<name>.ts` is self-contained: it imports the
32
+ contract types and emits `ModelEvent`s. Providers do not import the
33
+ relay or the agent — the dependency points inward (relay/agent depend
34
+ on this package's contract, never the reverse).
18
35
  - `src/worker.ts` returns the same `Engine` shape `createEngine`
19
36
  returns. Consumers must not need to know which side of `postMessage`
20
37
  the engine lives on.
@@ -35,16 +52,25 @@ Use the precise terms — they show up in types, comments, and PRs:
35
52
 
36
53
  - Don't add `createGemmaEngine` / `createPhi3Engine` / sugar
37
54
  factories. New models are new `ModelPreset` entries.
38
- - Don't put tool-calling polyfill logic here. It belongs in
39
- `@inbrowser/agent` it's a property of the agent runtime, not the
40
- model.
55
+ - Don't put the agent's tool-calling polyfill logic here. The native
56
+ envelope recognition (`parseToolCalls`) is mechanical and stays; the
57
+ prompt-engineered polyfill is a strategy and belongs in
58
+ `@inbrowser/agent`.
41
59
  - Don't widen `EngineEvent` with cloud-only concepts (cost,
42
- thoughtSignature). Translate at the adapter boundary.
60
+ thoughtSignature). Translate at the `createEngineModelClient`
61
+ boundary (`src/engine-client.ts`), not in the engine.
62
+ - Don't re-introduce provider exports into `@inbrowser/relay` — the
63
+ providers live here now and the relay consumes them as
64
+ `ModelClientFactory`s.
43
65
  - Don't make `@huggingface/transformers` a regular dependency. It's
44
- a peer dep; consumers control the version.
66
+ a peer dep; consumers control the version. (The Claude Code Agent SDK,
67
+ used only by `claudeCodeModelClient`, is an optional peer dep.)
45
68
 
46
69
  ## Status
47
70
 
48
- POC. Types + adapter surface + worker frames are stable. The
49
- `@huggingface/transformers` wiring inside `createEngine` is the
50
- next slice.
71
+ Contract + cloud providers are the live path: relay and agent both
72
+ consume a `ModelClient` from here. The engine loads and `generate()`
73
+ streams real tokens, and the engine is now a `ModelClient` via
74
+ `createEngineModelClient` (the engine→ModelClient adapter). The next
75
+ slice is the site wiring that drives a local engine through the agent
76
+ end to end (the in-browser docs-chat toggle).
package/README.md CHANGED
@@ -1,19 +1,95 @@
1
1
  # @inbrowser/model
2
2
 
3
- On-device LLM engine. Loads ONNX models in the browser via
4
- `@huggingface/transformers` + ONNX Runtime Web (WebGPU / WASM), and
5
- exposes them behind a narrow `Engine` surface.
3
+ The model layer for the stack. It owns the one model-call contract —
4
+ `ModelClient` plus the cloud providers that implement it and the
5
+ on-device LLM engine. `@inbrowser/relay` (transport) and
6
+ `@inbrowser/agent` (runtime) both consume a `ModelClient`, so this is the
7
+ single shared definition of "an LLM" for everything downstream.
6
8
 
7
- > **Status: POC stub.** Types, presets, adapter surface, and worker
8
- > RPC frames are in place. The `@huggingface/transformers` wiring
9
- > inside `createEngine` is not yet implemented — `generate()` yields
10
- > an `error` event today. See `src/engine.ts`.
9
+ Two halves, one package:
11
10
 
12
- ## One-liner
11
+ - **The contract + cloud providers.** `@inbrowser/model`
12
+ defines `ModelClient` / `ModelRequest` / `ModelEvent`. The cloud
13
+ providers (`geminiModelClient`, `openrouterModelClient`,
14
+ `anthropicModelClient`, `openaiCompatModelClient`, `ollamaModelClient`,
15
+ `llamaServerModelClient`, `claudeCliModelClient`, `claudeCodeModelClient`)
16
+ are factories that each return a `ModelClient`. `withRetry` decorates one.
17
+ - **The on-device engine.** `createEngine` loads ONNX models in the
18
+ browser via `@huggingface/transformers` + ONNX Runtime Web (WebGPU /
19
+ WASM) and exposes them behind a narrow `Engine` surface that streams
20
+ `EngineEvent`s.
21
+
22
+ > **Status.** Contract + cloud providers are the live integration path:
23
+ > relay and agent both consume a `ModelClient`. `createEngine` loads a
24
+ > model through `@huggingface/transformers` and `generate()` streams real
25
+ > tokens (the end-to-end load path runs in `examples/local-llm-poc`,
26
+ > headless-verified). The engine is now a `ModelClient` too, via
27
+ > `createEngineModelClient` (root),
28
+ > which widens the engine's `EngineEvent` stream to the contract's
29
+ > `ModelEvent`. The old `@inbrowser/model/relay` and
30
+ > `@inbrowser/model/agent` adapter subpaths have been removed.
31
+ > Known gaps: `GenerateOpts.stop` sequences are accepted but not yet
32
+ > enforced, and the site's in-browser docs-chat path that drives a local
33
+ > engine through the agent is still forthcoming (the adapter exists; the
34
+ > site toggle does not).
35
+
36
+ ## A cloud model as a `ModelClient`
37
+
38
+ ```ts
39
+ import { geminiModelClient } from '@inbrowser/model';
40
+
41
+ const client = geminiModelClient({ apiKey: process.env.GEMINI_KEY, model: 'gemini-3.5-flash' });
42
+
43
+ for await (const evt of client.chat(
44
+ {
45
+ messages: [{ role: 'user', text: 'Explain WebGPU in one paragraph.' }],
46
+ tools: [],
47
+ toolUseEnabled: false,
48
+ },
49
+ new AbortController().signal,
50
+ )) {
51
+ if (evt.kind === 'text') process.stdout.write(evt.text);
52
+ else if (evt.kind === 'usage') console.error(evt.usage);
53
+ }
54
+ ```
55
+
56
+ The turn ends when the iterable returns; a `usage` event (or a terminal
57
+ `error` event) is the last thing emitted. There is no `turn_complete`
58
+ event.
59
+
60
+ ## A local OpenAI-compatible server
61
+
62
+ Ollama, llama.cpp's `llama-server`, vLLM, LM Studio, LocalAI, and friends all
63
+ expose the same OpenAI `POST /v1/chat/completions` wire shape. One generic
64
+ factory talks to any of them; two named presets carry the right defaults for
65
+ the common local servers:
66
+
67
+ ```ts
68
+ import {
69
+ openaiCompatModelClient, // any OAI server — set baseUrl (or endpoint)
70
+ ollamaModelClient, // preset: defaults to http://localhost:11434, no auth
71
+ llamaServerModelClient, // preset: defaults to http://localhost:8080
72
+ } from '@inbrowser/model';
73
+
74
+ // Generic: point at any OAI-compatible server. `apiKey` becomes a Bearer token.
75
+ const vllm = openaiCompatModelClient({ baseUrl: 'http://gpu.local:8000', model: 'qwen2.5' });
76
+
77
+ // llama.cpp llama-server. `--api-key` is optional; pass it as `apiKey`.
78
+ const llama = llamaServerModelClient({ model: 'qwen2.5-coder', apiKey: process.env.LLAMA_KEY });
79
+ ```
80
+
81
+ > **Tool calling on `llama-server` needs `--jinja`.** The server only honors the
82
+ > OpenAI `tools` array when launched with `--jinja` (so it applies a tool-aware
83
+ > chat template); without it, tool calls never stream back. Auth is off unless
84
+ > you start it with `--api-key KEY`.
85
+
86
+ The presets delegate to `openaiCompatModelClient`; reach for the generic factory
87
+ directly for any server without a named preset.
88
+
89
+ ## An on-device model via the engine
13
90
 
14
91
  ```ts
15
- import { createEngine } from '@inbrowser/model';
16
- import { gemma4_E2B } from '@inbrowser/model/presets';
92
+ import { createEngine, gemma4_E2B } from '@inbrowser/model';
17
93
 
18
94
  const engine = createEngine(gemma4_E2B);
19
95
  await engine.ensureReady();
@@ -25,17 +101,48 @@ for await (const evt of engine.generate([
25
101
  }
26
102
  ```
27
103
 
104
+ The engine speaks `EngineEvent` (`token` / `thinking` / `tool_call` /
105
+ `usage` / `error`), not `ModelEvent`. To use it as a `ModelClient` —
106
+ e.g. to hand it to the agent — wrap it with `createEngineModelClient`:
107
+
108
+ ```ts
109
+ import { createEngine, createEngineModelClient, smollm2_360m } from '@inbrowser/model';
110
+
111
+ const engine = createEngine(smollm2_360m);
112
+ const client = createEngineModelClient(engine); // a ModelClient
113
+
114
+ for await (const evt of client.chat(
115
+ { messages: [{ role: 'user', text: 'Hello' }], tools: [], toolUseEnabled: false },
116
+ new AbortController().signal,
117
+ )) {
118
+ if (evt.kind === 'text') process.stdout.write(evt.text);
119
+ }
120
+ ```
121
+
122
+ The adapter maps `token` → `text`, folds the engine's terminal `usage`
123
+ into a `ModelEvent` `usage`, passes `tool_call`s through (no signature),
124
+ and drops the engine-only extras (`decodeMs`, `recoverable`). Wiring a
125
+ local model into the docs-chat site through the agent is forthcoming;
126
+ the `createEngineModelClient` building block it needs now exists.
127
+
28
128
  ## Surface
29
129
 
130
+ Everything is imported from the package root `@inbrowser/model`.
131
+
30
132
  | Export | What it gives you |
31
133
  |---|---|
32
- | `createEngine(preset)` | Runtime `Engine` owns load state + decode loop |
134
+ | `ModelClient`, `ModelRequest`, `ModelEvent`, `ModelMessage`, `ModelUsage`, `ToolSpec`, `ReasoningEffort` | The shared contract (type-only) |
135
+ | `geminiModelClient`, `openrouterModelClient`, `anthropicModelClient`, `openaiCompatModelClient`, `ollamaModelClient`, `llamaServerModelClient`, `claudeCliModelClient`, `claudeCodeModelClient` | Cloud + local provider factories; each returns a `ModelClient` |
136
+ | `OpenAiCompatConfig`, `OllamaConfig`, `LlamaServerConfig` | Config shapes for the OpenAI-compatible factory and its local presets |
137
+ | `withRetry(client, opts?)` | Decorator that retries transient upstream errors while nothing has streamed |
138
+ | `CloudProviderConfig`, `ModelClientFactory` | Shared provider config + the factory type the relay routes on |
139
+ | `createEngine(preset)` | Runtime `Engine` — owns load state + decode loop, streams `EngineEvent` |
140
+ | `createEngineModelClient(engine, id?)` | Wraps an `Engine` as a `ModelClient` (maps `EngineEvent` → `ModelEvent`) |
33
141
  | `definePreset(p)` | Type-safe identity helper for community presets |
34
- | `ModelPreset`, `Engine`, `EngineEvent`, | Public types |
35
- | `@inbrowser/model/presets` | `gemma4_E2B`, `gemma4_E4B` |
36
- | `@inbrowser/model/relay` | `createLocalInferenceProvider(engine)` relay `InferenceProvider` |
37
- | `@inbrowser/model/agent` | `createLocalLlmClient(engine, id)` agent `LlmClient` |
38
- | `@inbrowser/model/worker` | `hostEngineInWorker(self)` + `connectWorkerEngine(opts)` |
142
+ | `parseToolCalls`, `splitThinking` | Stream transformers over an `EngineEvent` stream |
143
+ | `ModelPreset`, `Engine`, `EngineEvent`, | Public engine types |
144
+ | `gemma4_E2B`, `gemma4_E4B`, `qwen2_5_coder_1_5b`, `qwen3_1_7b`, `deepseek_r1_qwen_1_5b`, `smollm2_360m` | The six bundled presets |
145
+ | `hostEngineInWorker(self)`, `connectWorkerEngine(opts)` | Worker host/connect helpers |
39
146
 
40
147
  ## Vocabulary anchor
41
148
 
@@ -54,10 +161,11 @@ for await (const evt of engine.generate([
54
161
  - One factory (`createEngine`), many presets. No `createGemmaEngine`.
55
162
  - `capabilities` is on the preset, not the engine — interrogable
56
163
  pre-load (`gemma4_E2B.capabilities.contextWindow`).
57
- - `EngineEvent` is narrower than `InferenceEvent`/`ChatEvent`.
58
- Adapters widen.
59
- - Worker subpath returns the same `Engine` shape; the agent runtime
60
- cannot tell whether it holds a direct or remote engine.
164
+ - `EngineEvent` is narrower than the contract's `ModelEvent` (no
165
+ cost, no `thoughtSignature`). `createEngineModelClient` is the place
166
+ that widens it translate at that boundary, not in the engine.
167
+ - Worker subpath returns the same `Engine` shape; a consumer cannot
168
+ tell whether it holds a direct or remote engine.
61
169
  - Tool calling is not native to Gemma 4. The polyfill (prompt-engineered
62
170
  tool calling + structured-output parsing) lives in `@inbrowser/agent`,
63
171
  not here.
@@ -0,0 +1,104 @@
1
+ /**
2
+ * `@inbrowser/model/contract` — the one model-call contract for the stack.
3
+ *
4
+ * A `ModelClient` is anything that, given a `ModelRequest`, streams `ModelEvent`s:
5
+ * the cloud providers, the on-device engine, and any adapter all implement it.
6
+ * Both `@inbrowser/relay` (transport) and `@inbrowser/agent` (runtime) consume a
7
+ * `ModelClient`, so this is the single shared LLM contract.
8
+ *
9
+ * This module is TYPE-ONLY (zero runtime imports), so importing the contract
10
+ * never pulls in the on-device engine or `@huggingface/transformers`.
11
+ */
12
+ /** A turn of the conversation handed to a model. */
13
+ export interface ModelMessage {
14
+ role: 'system' | 'user' | 'assistant' | 'tool';
15
+ text?: string;
16
+ /** Tool calls the assistant made (assistant turns). */
17
+ toolCalls?: {
18
+ id: string;
19
+ name: string;
20
+ args: unknown;
21
+ signature?: string;
22
+ }[];
23
+ /** The call this message answers (tool-result turns). */
24
+ toolCallId?: string;
25
+ /** Tool name (tool-result turns). */
26
+ name?: string;
27
+ /** Serialized tool result (tool-result turns). */
28
+ resultJson?: string;
29
+ }
30
+ /**
31
+ * Tool declaration in the OAI function-calling shape that modern chat templates
32
+ * accept directly (Qwen, DeepSeek, Llama 3.2+, etc.). Cloud providers that speak
33
+ * a different wire shape translate internally.
34
+ */
35
+ export interface ToolSpec {
36
+ type: 'function';
37
+ function: {
38
+ name: string;
39
+ description: string;
40
+ parameters: unknown;
41
+ };
42
+ }
43
+ export type ReasoningEffort = 'off' | 'low' | 'medium' | 'high';
44
+ /** A single model call. */
45
+ export interface ModelRequest {
46
+ messages: ModelMessage[];
47
+ tools: ToolSpec[];
48
+ /** Whether tool use is enabled this turn (cheaper than checking tools.length). */
49
+ toolUseEnabled: boolean;
50
+ temperature?: number;
51
+ topP?: number;
52
+ topK?: number;
53
+ reasoningEffort?: ReasoningEffort;
54
+ }
55
+ /** Token + cost accounting for one turn. */
56
+ export interface ModelUsage {
57
+ promptTokens: number;
58
+ outputTokens: number;
59
+ cachedTokens?: number;
60
+ /** Reasoning tokens, when a provider reports them. */
61
+ reasoningTokens?: number;
62
+ /** Real dollar cost, when a provider reports it (e.g. OpenRouter). */
63
+ costUsd?: number;
64
+ }
65
+ /**
66
+ * One streamed item from a model call.
67
+ *
68
+ * The turn ends when the async iterable returns. On a normal end a `usage` event
69
+ * MUST be emitted before the return (it carries the final accounting); there is
70
+ * no separate terminal event. The exception is `error`, which is itself terminal:
71
+ * after an `error` event the iterable returns with no `usage` event. Consumers
72
+ * can therefore rely on exactly one of {a `usage` event, an `error` event} per
73
+ * turn.
74
+ */
75
+ export type ModelEvent = {
76
+ kind: 'text';
77
+ text: string;
78
+ } | {
79
+ kind: 'thinking';
80
+ text: string;
81
+ } | {
82
+ kind: 'tool_call';
83
+ id: string;
84
+ name: string;
85
+ args: unknown;
86
+ signature?: string;
87
+ } | {
88
+ kind: 'usage';
89
+ usage: ModelUsage;
90
+ } | {
91
+ kind: 'error';
92
+ message: string;
93
+ };
94
+ /**
95
+ * The one model-call contract. Implemented by the cloud providers and the
96
+ * on-device engine; consumed by the relay (transport) and the agent (runtime).
97
+ */
98
+ export interface ModelClient {
99
+ /** Stable id for metrics + provenance, e.g. `gemini:gemini-3.5-flash`. */
100
+ readonly id: string;
101
+ readonly supportsTools: boolean;
102
+ chat(req: ModelRequest, signal: AbortSignal): AsyncIterable<ModelEvent>;
103
+ }
104
+ //# sourceMappingURL=contract.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"contract.d.ts","sourceRoot":"","sources":["../src/contract.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,oDAAoD;AACpD,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,GAAG,MAAM,CAAC;IAC/C,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,uDAAuD;IACvD,SAAS,CAAC,EAAE;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,OAAO,CAAC;QAAC,SAAS,CAAC,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IAC9E,yDAAyD;IACzD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,qCAAqC;IACrC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,kDAAkD;IAClD,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;;;GAIG;AACH,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,UAAU,CAAC;IACjB,QAAQ,EAAE;QACR,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,CAAC;QACpB,UAAU,EAAE,OAAO,CAAC;KACrB,CAAC;CACH;AAED,MAAM,MAAM,eAAe,GAAG,KAAK,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AAEhE,2BAA2B;AAC3B,MAAM,WAAW,YAAY;IAC3B,QAAQ,EAAE,YAAY,EAAE,CAAC;IACzB,KAAK,EAAE,QAAQ,EAAE,CAAC;IAClB,kFAAkF;IAClF,cAAc,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,eAAe,CAAC,EAAE,eAAe,CAAC;CACnC;AAED,4CAA4C;AAC5C,MAAM,WAAW,UAAU;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,sDAAsD;IACtD,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,sEAAsE;IACtE,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;;;;;;;;GASG;AACH,MAAM,MAAM,UAAU,GAClB;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,GAC9B;IAAE,IAAI,EAAE,UAAU,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,GAClC;IAAE,IAAI,EAAE,WAAW,CAAC;IAAC,EAAE,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,OAAO,CAAC;IAAC,SAAS,CAAC,EAAE,MAAM,CAAA;CAAE,GAClF;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,KAAK,EAAE,UAAU,CAAA;CAAE,GACpC;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC;AAEvC;;;GAGG;AACH,MAAM,WAAW,WAAW;IAC1B,0EAA0E;IAC1E,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,aAAa,EAAE,OAAO,CAAC;IAChC,IAAI,CAAC,GAAG,EAAE,YAAY,EAAE,MAAM,EAAE,WAAW,GAAG,aAAa,CAAC,UAAU,CAAC,CAAC;CACzE"}
@@ -0,0 +1,13 @@
1
+ /**
2
+ * `@inbrowser/model/contract` — the one model-call contract for the stack.
3
+ *
4
+ * A `ModelClient` is anything that, given a `ModelRequest`, streams `ModelEvent`s:
5
+ * the cloud providers, the on-device engine, and any adapter all implement it.
6
+ * Both `@inbrowser/relay` (transport) and `@inbrowser/agent` (runtime) consume a
7
+ * `ModelClient`, so this is the single shared LLM contract.
8
+ *
9
+ * This module is TYPE-ONLY (zero runtime imports), so importing the contract
10
+ * never pulls in the on-device engine or `@huggingface/transformers`.
11
+ */
12
+ export {};
13
+ //# sourceMappingURL=contract.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"contract.js","sourceRoot":"","sources":["../src/contract.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG"}
@@ -0,0 +1,44 @@
1
+ /**
2
+ * `createEngineModelClient` — wraps an on-device `Engine` as a `ModelClient`.
3
+ *
4
+ * This is the adapter that lets the on-device engine plug into the same
5
+ * `ModelClient` contract the cloud providers (and the relay + agent) speak.
6
+ * Without it the engine can only be driven directly via its `EngineEvent`
7
+ * stream; with it the engine is just another `ModelClient` the agent/relay
8
+ * can route to.
9
+ *
10
+ * The mapping is deliberately lossless in the directions that matter and
11
+ * drops the cloud-irrelevant engine extras:
12
+ *
13
+ * - `EngineEvent.token` → `{ kind: 'text', text }`
14
+ * - `EngineEvent.thinking` → `{ kind: 'thinking', text }`
15
+ * - `EngineEvent.tool_call` → `{ kind: 'tool_call', id, name, args }`
16
+ * (the engine emits no signature — omitted)
17
+ * - `EngineEvent.usage` → `{ kind: 'usage', usage: { promptTokens,
18
+ * outputTokens } }` (`decodeMs` is dropped)
19
+ * - `EngineEvent.error` → `{ kind: 'error', message }`
20
+ * (`recoverable` is dropped)
21
+ *
22
+ * The engine already emits exactly one terminal `usage` (success) or `error`
23
+ * (failure) before its stream returns, so the contract's "exactly one of
24
+ * {usage, error} per turn" invariant carries straight through — this adapter
25
+ * synthesizes nothing.
26
+ *
27
+ * This module has runtime imports (it constructs a `ModelClient` at runtime
28
+ * and imports engine types), so it lives on the engine surface, NOT in the
29
+ * type-only `./contract` module.
30
+ */
31
+ import type { ModelClient } from './contract.js';
32
+ import type { Engine } from './types.js';
33
+ /**
34
+ * Wrap an `Engine` as a `ModelClient`.
35
+ *
36
+ * @param engine The on-device engine to drive.
37
+ * @param id Stable id for metrics + provenance. Defaults to
38
+ * `local:${engine.model.modelId}` when the engine exposes a model id,
39
+ * else `'local'`. The engine has no preset id of its own — `engine.model`
40
+ * is a bare `ModelRef` (HF Hub `modelId`), which is the most stable handle
41
+ * available.
42
+ */
43
+ export declare function createEngineModelClient(engine: Engine, id?: string): ModelClient;
44
+ //# sourceMappingURL=engine-client.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"engine-client.d.ts","sourceRoot":"","sources":["../src/engine-client.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AAEH,OAAO,KAAK,EAAE,WAAW,EAA0C,MAAM,eAAe,CAAC;AACzF,OAAO,KAAK,EAAE,MAAM,EAAiB,MAAM,YAAY,CAAC;AAExD;;;;;;;;;GASG;AACH,wBAAgB,uBAAuB,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,GAAG,WAAW,CAkBhF"}
@@ -0,0 +1,136 @@
1
+ /**
2
+ * `createEngineModelClient` — wraps an on-device `Engine` as a `ModelClient`.
3
+ *
4
+ * This is the adapter that lets the on-device engine plug into the same
5
+ * `ModelClient` contract the cloud providers (and the relay + agent) speak.
6
+ * Without it the engine can only be driven directly via its `EngineEvent`
7
+ * stream; with it the engine is just another `ModelClient` the agent/relay
8
+ * can route to.
9
+ *
10
+ * The mapping is deliberately lossless in the directions that matter and
11
+ * drops the cloud-irrelevant engine extras:
12
+ *
13
+ * - `EngineEvent.token` → `{ kind: 'text', text }`
14
+ * - `EngineEvent.thinking` → `{ kind: 'thinking', text }`
15
+ * - `EngineEvent.tool_call` → `{ kind: 'tool_call', id, name, args }`
16
+ * (the engine emits no signature — omitted)
17
+ * - `EngineEvent.usage` → `{ kind: 'usage', usage: { promptTokens,
18
+ * outputTokens } }` (`decodeMs` is dropped)
19
+ * - `EngineEvent.error` → `{ kind: 'error', message }`
20
+ * (`recoverable` is dropped)
21
+ *
22
+ * The engine already emits exactly one terminal `usage` (success) or `error`
23
+ * (failure) before its stream returns, so the contract's "exactly one of
24
+ * {usage, error} per turn" invariant carries straight through — this adapter
25
+ * synthesizes nothing.
26
+ *
27
+ * This module has runtime imports (it constructs a `ModelClient` at runtime
28
+ * and imports engine types), so it lives on the engine surface, NOT in the
29
+ * type-only `./contract` module.
30
+ */
31
+ /**
32
+ * Wrap an `Engine` as a `ModelClient`.
33
+ *
34
+ * @param engine The on-device engine to drive.
35
+ * @param id Stable id for metrics + provenance. Defaults to
36
+ * `local:${engine.model.modelId}` when the engine exposes a model id,
37
+ * else `'local'`. The engine has no preset id of its own — `engine.model`
38
+ * is a bare `ModelRef` (HF Hub `modelId`), which is the most stable handle
39
+ * available.
40
+ */
41
+ export function createEngineModelClient(engine, id) {
42
+ const resolvedId = id ?? (engine.model?.modelId ? `local:${engine.model.modelId}` : 'local');
43
+ return {
44
+ id: resolvedId,
45
+ supportsTools: engine.capabilities.supportsTools,
46
+ chat(req, signal) {
47
+ const engineMessages = toEngineMessages(req.messages);
48
+ const stream = engine.generate(engineMessages, {
49
+ tools: req.toolUseEnabled ? req.tools : undefined,
50
+ temperature: req.temperature,
51
+ topP: req.topP,
52
+ topK: req.topK,
53
+ signal,
54
+ });
55
+ return mapEvents(stream);
56
+ },
57
+ };
58
+ }
59
+ /**
60
+ * Flatten the contract's `ModelMessage[]` into the engine's toolless
61
+ * `EngineMessage[]`. `EngineMessage` has no tool round-trip fields (role is
62
+ * `system | user | assistant`, plus `text`), so two shapes the engine can't
63
+ * represent are flattened into plain text it can still read for grounding:
64
+ *
65
+ * - A `role: 'tool'` result becomes a `user` line:
66
+ * `Tool ${name} result: ${resultJson}`.
67
+ * - An `assistant` turn carrying `toolCalls` keeps its text (if any) and
68
+ * appends a `Tool call: ${name}(${args})` line per call, so the call the
69
+ * assistant made survives into the prompt rather than being silently lost.
70
+ *
71
+ * For the retrieval strategy these are just system/user messages and pass
72
+ * straight through; the flattening exists so the general case is lossless.
73
+ */
74
+ function toEngineMessages(messages) {
75
+ const out = [];
76
+ for (const m of messages) {
77
+ if (m.role === 'tool') {
78
+ const name = m.name ?? 'tool';
79
+ const body = m.resultJson ?? m.text ?? '';
80
+ out.push({ role: 'user', text: `Tool ${name} result: ${body}` });
81
+ continue;
82
+ }
83
+ if (m.role === 'assistant' && m.toolCalls && m.toolCalls.length > 0) {
84
+ const lines = [];
85
+ if (m.text)
86
+ lines.push(m.text);
87
+ for (const call of m.toolCalls) {
88
+ lines.push(`Tool call: ${call.name}(${stringifyArgs(call.args)})`);
89
+ }
90
+ out.push({ role: 'assistant', text: lines.join('\n') });
91
+ continue;
92
+ }
93
+ // system / user / plain assistant → role + text straight through.
94
+ out.push({ role: m.role, text: m.text ?? '' });
95
+ }
96
+ return out;
97
+ }
98
+ function stringifyArgs(args) {
99
+ if (typeof args === 'string')
100
+ return args;
101
+ try {
102
+ return JSON.stringify(args ?? {});
103
+ }
104
+ catch {
105
+ return String(args);
106
+ }
107
+ }
108
+ /** Translate the engine's `EngineEvent` stream into `ModelEvent`s. */
109
+ async function* mapEvents(source) {
110
+ for await (const ev of source) {
111
+ switch (ev.kind) {
112
+ case 'token':
113
+ yield { kind: 'text', text: ev.text };
114
+ break;
115
+ case 'thinking':
116
+ yield { kind: 'thinking', text: ev.text };
117
+ break;
118
+ case 'tool_call':
119
+ // The engine emits no signature; omit it.
120
+ yield { kind: 'tool_call', id: ev.id, name: ev.name, args: ev.args };
121
+ break;
122
+ case 'usage':
123
+ // Drop `decodeMs` — not part of the cloud `ModelUsage` shape.
124
+ yield {
125
+ kind: 'usage',
126
+ usage: { promptTokens: ev.promptTokens, outputTokens: ev.outputTokens },
127
+ };
128
+ break;
129
+ case 'error':
130
+ // Drop `recoverable` — the contract's error event is terminal + flat.
131
+ yield { kind: 'error', message: ev.message };
132
+ break;
133
+ }
134
+ }
135
+ }
136
+ //# sourceMappingURL=engine-client.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"engine-client.js","sourceRoot":"","sources":["../src/engine-client.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AAKH;;;;;;;;;GASG;AACH,MAAM,UAAU,uBAAuB,CAAC,MAAc,EAAE,EAAW;IACjE,MAAM,UAAU,GAAG,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC,CAAC,SAAS,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;IAE7F,OAAO;QACL,EAAE,EAAE,UAAU;QACd,aAAa,EAAE,MAAM,CAAC,YAAY,CAAC,aAAa;QAChD,IAAI,CAAC,GAAiB,EAAE,MAAmB;YACzC,MAAM,cAAc,GAAG,gBAAgB,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACtD,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,cAAc,EAAE;gBAC7C,KAAK,EAAE,GAAG,CAAC,cAAc,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;gBACjD,WAAW,EAAE,GAAG,CAAC,WAAW;gBAC5B,IAAI,EAAE,GAAG,CAAC,IAAI;gBACd,IAAI,EAAE,GAAG,CAAC,IAAI;gBACd,MAAM;aACP,CAAC,CAAC;YACH,OAAO,SAAS,CAAC,MAAM,CAAC,CAAC;QAC3B,CAAC;KACF,CAAC;AACJ,CAAC;AAED;;;;;;;;;;;;;;GAcG;AACH,SAAS,gBAAgB,CAAC,QAAqC;IAC7D,MAAM,GAAG,GAAoB,EAAE,CAAC;IAChC,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,IAAI,CAAC,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;YACtB,MAAM,IAAI,GAAG,CAAC,CAAC,IAAI,IAAI,MAAM,CAAC;YAC9B,MAAM,IAAI,GAAG,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC;YAC1C,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,IAAI,YAAY,IAAI,EAAE,EAAE,CAAC,CAAC;YACjE,SAAS;QACX,CAAC;QAED,IAAI,CAAC,CAAC,IAAI,KAAK,WAAW,IAAI,CAAC,CAAC,SAAS,IAAI,CAAC,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpE,MAAM,KAAK,GAAa,EAAE,CAAC;YAC3B,IAAI,CAAC,CAAC,IAAI;gBAAE,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YAC/B,KAAK,MAAM,IAAI,IAAI,CAAC,CAAC,SAAS,EAAE,CAAC;gBAC/B,KAAK,CAAC,IAAI,CAAC,cAAc,IAAI,CAAC,IAAI,IAAI,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACrE,CAAC;YACD,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACxD,SAAS;QACX,CAAC;QAED,kEAAkE;QAClE,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,IAAI,EAAE,EAAE,CAAC,CAAC;IACjD,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,aAAa,CAAC,IAAa;IAClC,IAAI,OAAO,IAAI,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAC;IAC1C,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,SAAS,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;IACpC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC;IACtB,CAAC;AACH,CAAC;AAED,sEAAsE;AACtE,KAAK,SAAS,CAAC,CAAC,SAAS,CACvB,MAAuD;IAEvD,IAAI,KAAK,EAAE,MAAM,EAAE,IAAI,MAAM,EAAE,CAAC;QAC9B,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAC;YAChB,KAAK,OAAO;gBACV,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,EAAE,CAAC,IAAI,EAAE,CAAC;gBACtC,MAAM;YACR,KAAK,UAAU;gBACb,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,EAAE,CAAC,IAAI,EAAE,CAAC;gBAC1C,MAAM;YACR,KAAK,WAAW;gBACd,0CAA0C;gBAC1C,MAAM,EAAE,IAAI,EAAE,WAAW,EAAE,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,IAAI,EAAE,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,EAAE,CAAC,IAAI,EAAE,CAAC;gBACrE,MAAM;YACR,KAAK,OAAO;gBACV,8DAA8D;gBAC9D,MAAM;oBACJ,IAAI,EAAE,OAAO;oBACb,KAAK,EAAE,EAAE,YAAY,EAAE,EAAE,CAAC,YAAY,EAAE,YAAY,EAAE,EAAE,CAAC,YAAY,EAAE;iBACxE,CAAC;gBACF,MAAM;YACR,KAAK,OAAO;gBACV,sEAAsE;gBACtE,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,EAAE,CAAC,OAAO,EAAE,CAAC;gBAC7C,MAAM;QACV,CAAC;IACH,CAAC;AACH,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"engine.d.ts","sourceRoot":"","sources":["../src/engine.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAaH,OAAO,KAAK,EAEV,gBAAgB,EAChB,MAAM,EAUP,MAAM,YAAY,CAAC;AAEpB,wBAAgB,YAAY,CAAC,IAAI,EAAE,gBAAgB,GAAG,MAAM,CAqS3D;AAED;;;;GAIG;AACH,wBAAgB,YAAY,CAAC,CAAC,SAAS,OAAO,YAAY,EAAE,WAAW,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,CAEhF"}
1
+ {"version":3,"file":"engine.d.ts","sourceRoot":"","sources":["../src/engine.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAeH,OAAO,KAAK,EAEV,gBAAgB,EAChB,MAAM,EAUP,MAAM,YAAY,CAAC;AAEpB,wBAAgB,YAAY,CAAC,IAAI,EAAE,gBAAgB,GAAG,MAAM,CA4S3D;AAED;;;;GAIG;AACH,wBAAgB,YAAY,CAAC,CAAC,SAAS,OAAO,YAAY,EAAE,WAAW,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,CAEhF"}
package/dist/engine.js CHANGED
@@ -24,8 +24,15 @@
24
24
  * honored — needs a `StoppingCriteria` adapter. Tracked as a
25
25
  * follow-up.
26
26
  */
27
- import { AutoModelForCausalLM, AutoTokenizer, TextStreamer, env as transformersEnv, } from '@huggingface/transformers';
28
27
  import { parseToolCalls } from './parse-tool-calls.js';
28
+ // Lazy-load the heavy transformers runtime so importing @inbrowser/model (e.g.
29
+ // for a cloud provider) never statically bundles ONNX/WASM. The chunk is
30
+ // fetched only when an engine actually loads or generates.
31
+ let transformersModule = null;
32
+ function loadTransformers() {
33
+ transformersModule ??= import('@huggingface/transformers');
34
+ return transformersModule;
35
+ }
29
36
  export function createEngine(opts) {
30
37
  const model = opts.model;
31
38
  const capabilities = opts.capabilities;
@@ -79,16 +86,17 @@ export function createEngine(opts) {
79
86
  if (loadPromise)
80
87
  return loadPromise;
81
88
  setState('loading');
82
- // `weightsBaseUrl` overrides the HF Hub origin for self-hosted
83
- // mirrors. Transformers.js exposes this as the global
84
- // `env.remoteHost`; we set it process-wide before load. Documented
85
- // limitation: with multiple engines spanning different remotes,
86
- // the last one to load wins. Realistic use case (one app, one
87
- // mirror) is unaffected.
88
- if (opts.weightsBaseUrl) {
89
- transformersEnv.remoteHost = opts.weightsBaseUrl;
90
- }
91
89
  loadPromise = (async () => {
90
+ const { AutoTokenizer, AutoModelForCausalLM, env: transformersEnv, } = await loadTransformers();
91
+ // `weightsBaseUrl` overrides the HF Hub origin for self-hosted
92
+ // mirrors. Transformers.js exposes this as the global
93
+ // `env.remoteHost`; we set it process-wide before load. Documented
94
+ // limitation: with multiple engines spanning different remotes,
95
+ // the last one to load wins. Realistic use case (one app, one
96
+ // mirror) is unaffected.
97
+ if (opts.weightsBaseUrl) {
98
+ transformersEnv.remoteHost = opts.weightsBaseUrl;
99
+ }
92
100
  // AutoTokenizer (not AutoProcessor): text-only models like
93
101
  // SmolLM2 ship no preprocessor_config.json and AutoProcessor
94
102
  // 404s on them. Multimodal models (e.g., Gemma 4 audio) still
@@ -200,6 +208,8 @@ export function createEngine(opts) {
200
208
  // channels but inconsistent emission (Gemma 4 family — see
201
209
  // presets.ts) deliberately omit `thinkingTags` to take this path.
202
210
  const preserveSpecialTokens = useThinking && capabilities.thinkingTags !== undefined;
211
+ // Cached after `ensureReady` above already loaded it — resolves instantly.
212
+ const { TextStreamer } = await loadTransformers();
203
213
  const streamer = new TextStreamer(tokenizer, {
204
214
  skip_prompt: true,
205
215
  ...(preserveSpecialTokens ? { skip_special_tokens: false } : {}),