@inbrowser/model 0.1.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +44 -18
- package/README.md +129 -20
- package/dist/contract.d.ts +104 -0
- package/dist/contract.d.ts.map +1 -0
- package/dist/contract.js +13 -0
- package/dist/contract.js.map +1 -0
- package/dist/engine-client.d.ts +44 -0
- package/dist/engine-client.d.ts.map +1 -0
- package/dist/engine-client.js +136 -0
- package/dist/engine-client.js.map +1 -0
- package/dist/engine.d.ts.map +1 -1
- package/dist/engine.js +20 -10
- package/dist/engine.js.map +1 -1
- package/dist/index.d.ts +25 -8
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +46 -8
- package/dist/index.js.map +1 -1
- package/dist/presets.d.ts +10 -0
- package/dist/presets.d.ts.map +1 -1
- package/dist/presets.js +21 -0
- package/dist/presets.js.map +1 -1
- package/dist/providers/anthropic.d.ts +45 -0
- package/dist/providers/anthropic.d.ts.map +1 -0
- package/dist/providers/anthropic.js +217 -0
- package/dist/providers/anthropic.js.map +1 -0
- package/dist/providers/claude-cli.d.ts +135 -0
- package/dist/providers/claude-cli.d.ts.map +1 -0
- package/dist/providers/claude-cli.js +270 -0
- package/dist/providers/claude-cli.js.map +1 -0
- package/dist/providers/claude-code.d.ts +188 -0
- package/dist/providers/claude-code.d.ts.map +1 -0
- package/dist/providers/claude-code.js +182 -0
- package/dist/providers/claude-code.js.map +1 -0
- package/dist/providers/gemini.d.ts +32 -0
- package/dist/providers/gemini.d.ts.map +1 -0
- package/dist/providers/gemini.js +441 -0
- package/dist/providers/gemini.js.map +1 -0
- package/dist/providers/llama-server.d.ts +15 -0
- package/dist/providers/llama-server.d.ts.map +1 -0
- package/dist/providers/llama-server.js +51 -0
- package/dist/providers/llama-server.js.map +1 -0
- package/dist/providers/oai-compat.d.ts +113 -0
- package/dist/providers/oai-compat.d.ts.map +1 -0
- package/dist/providers/oai-compat.js +257 -0
- package/dist/providers/oai-compat.js.map +1 -0
- package/dist/providers/ollama.d.ts +15 -0
- package/dist/providers/ollama.d.ts.map +1 -0
- package/dist/providers/ollama.js +51 -0
- package/dist/providers/ollama.js.map +1 -0
- package/dist/providers/openrouter-oauth.d.ts +67 -0
- package/dist/providers/openrouter-oauth.d.ts.map +1 -0
- package/dist/providers/openrouter-oauth.js +84 -0
- package/dist/providers/openrouter-oauth.js.map +1 -0
- package/dist/providers/openrouter.d.ts +16 -0
- package/dist/providers/openrouter.d.ts.map +1 -0
- package/dist/providers/openrouter.js +27 -0
- package/dist/providers/openrouter.js.map +1 -0
- package/dist/providers/requesty.d.ts +16 -0
- package/dist/providers/requesty.d.ts.map +1 -0
- package/dist/providers/requesty.js +27 -0
- package/dist/providers/requesty.js.map +1 -0
- package/dist/providers/types.d.ts +50 -0
- package/dist/providers/types.d.ts.map +1 -0
- package/dist/providers/types.js +2 -0
- package/dist/providers/types.js.map +1 -0
- package/dist/sse.d.ts +20 -0
- package/dist/sse.d.ts.map +1 -0
- package/dist/sse.js +47 -0
- package/dist/sse.js.map +1 -0
- package/dist/types.d.ts +2 -13
- package/dist/types.d.ts.map +1 -1
- package/dist/usage.d.ts +6 -0
- package/dist/usage.d.ts.map +1 -0
- package/dist/usage.js +55 -0
- package/dist/usage.js.map +1 -0
- package/dist/with-retry.d.ts +27 -0
- package/dist/with-retry.d.ts.map +1 -0
- package/dist/with-retry.js +55 -0
- package/dist/with-retry.js.map +1 -0
- package/dist/worker.d.ts +1 -1
- package/dist/worker.js +1 -1
- package/package.json +14 -30
- package/dist/adapters/agent.d.ts +0 -19
- package/dist/adapters/agent.d.ts.map +0 -1
- package/dist/adapters/agent.js +0 -96
- package/dist/adapters/agent.js.map +0 -1
- package/dist/adapters/relay.d.ts +0 -17
- package/dist/adapters/relay.d.ts.map +0 -1
- package/dist/adapters/relay.js +0 -90
- package/dist/adapters/relay.js.map +0 -1
package/AGENTS.md
CHANGED
|
@@ -2,19 +2,36 @@
|
|
|
2
2
|
|
|
3
3
|
## Purpose
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
5
|
+
The model layer. Two halves:
|
|
6
|
+
|
|
7
|
+
1. **Contract + cloud providers.** `src/contract.ts` defines the one
|
|
8
|
+
`ModelClient` contract the whole stack shares (relay + agent both
|
|
9
|
+
consume it). `src/providers/*` are the cloud providers (Gemini,
|
|
10
|
+
OpenRouter, Requesty, Anthropic, Ollama, Claude-CLI, Claude-Code), each a
|
|
11
|
+
factory returning a `ModelClient`. `src/with-retry.ts` decorates one.
|
|
12
|
+
2. **On-device engine.** Wraps `@huggingface/transformers` behind a
|
|
13
|
+
narrow `Engine` surface (`src/engine.ts`) that streams `EngineEvent`.
|
|
14
|
+
|
|
15
|
+
The engine is also a `ModelClient`, via `createEngineModelClient`
|
|
16
|
+
(`src/engine-client.ts`; exported from the root + the
|
|
17
|
+
`@inbrowser/model/engine-client` subpath). It wraps an `Engine`,
|
|
18
|
+
widening the engine's `EngineEvent` stream to the contract's
|
|
19
|
+
`ModelEvent`. The old engine→relay/agent adapter subpaths were removed;
|
|
20
|
+
this single wrapper replaces them. (The site's in-browser docs-chat
|
|
21
|
+
toggle that drives a local engine through the agent is a separate,
|
|
22
|
+
still-forthcoming piece — the adapter is the building block it needs.)
|
|
9
23
|
|
|
10
24
|
## Layering invariants
|
|
11
25
|
|
|
12
|
-
- `src/
|
|
13
|
-
|
|
14
|
-
- `src/
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
26
|
+
- `src/contract.ts` is type-only (zero runtime imports) so importing the
|
|
27
|
+
contract never pulls in the engine or `@huggingface/transformers`.
|
|
28
|
+
- `src/types.ts` is the canonical engine type surface. Engine-side files
|
|
29
|
+
import engine types from here.
|
|
30
|
+
- `src/engine.ts` is the only module that holds runtime model state.
|
|
31
|
+
- Each `src/providers/<name>.ts` is self-contained: it imports the
|
|
32
|
+
contract types and emits `ModelEvent`s. Providers do not import the
|
|
33
|
+
relay or the agent — the dependency points inward (relay/agent depend
|
|
34
|
+
on this package's contract, never the reverse).
|
|
18
35
|
- `src/worker.ts` returns the same `Engine` shape `createEngine`
|
|
19
36
|
returns. Consumers must not need to know which side of `postMessage`
|
|
20
37
|
the engine lives on.
|
|
@@ -35,16 +52,25 @@ Use the precise terms — they show up in types, comments, and PRs:
|
|
|
35
52
|
|
|
36
53
|
- Don't add `createGemmaEngine` / `createPhi3Engine` / sugar
|
|
37
54
|
factories. New models are new `ModelPreset` entries.
|
|
38
|
-
- Don't put tool-calling polyfill logic here.
|
|
39
|
-
|
|
40
|
-
|
|
55
|
+
- Don't put the agent's tool-calling polyfill logic here. The native
|
|
56
|
+
envelope recognition (`parseToolCalls`) is mechanical and stays; the
|
|
57
|
+
prompt-engineered polyfill is a strategy and belongs in
|
|
58
|
+
`@inbrowser/agent`.
|
|
41
59
|
- Don't widen `EngineEvent` with cloud-only concepts (cost,
|
|
42
|
-
thoughtSignature). Translate at the
|
|
60
|
+
thoughtSignature). Translate at the `createEngineModelClient`
|
|
61
|
+
boundary (`src/engine-client.ts`), not in the engine.
|
|
62
|
+
- Don't re-introduce provider exports into `@inbrowser/relay` — the
|
|
63
|
+
providers live here now and the relay consumes them as
|
|
64
|
+
`ModelClientFactory`s.
|
|
43
65
|
- Don't make `@huggingface/transformers` a regular dependency. It's
|
|
44
|
-
a peer dep; consumers control the version.
|
|
66
|
+
a peer dep; consumers control the version. (The Claude Code Agent SDK,
|
|
67
|
+
used only by `claudeCodeModelClient`, is an optional peer dep.)
|
|
45
68
|
|
|
46
69
|
## Status
|
|
47
70
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
71
|
+
Contract + cloud providers are the live path: relay and agent both
|
|
72
|
+
consume a `ModelClient` from here. The engine loads and `generate()`
|
|
73
|
+
streams real tokens, and the engine is now a `ModelClient` via
|
|
74
|
+
`createEngineModelClient` (the engine→ModelClient adapter). The next
|
|
75
|
+
slice is the site wiring that drives a local engine through the agent
|
|
76
|
+
end to end (the in-browser docs-chat toggle).
|
package/README.md
CHANGED
|
@@ -1,19 +1,96 @@
|
|
|
1
1
|
# @inbrowser/model
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
The model layer for the stack. It owns the one model-call contract —
|
|
4
|
+
`ModelClient` — plus the cloud providers that implement it and the
|
|
5
|
+
on-device LLM engine. `@inbrowser/relay` (transport) and
|
|
6
|
+
`@inbrowser/agent` (runtime) both consume a `ModelClient`, so this is the
|
|
7
|
+
single shared definition of "an LLM" for everything downstream.
|
|
6
8
|
|
|
7
|
-
|
|
8
|
-
> RPC frames are in place. The `@huggingface/transformers` wiring
|
|
9
|
-
> inside `createEngine` is not yet implemented — `generate()` yields
|
|
10
|
-
> an `error` event today. See `src/engine.ts`.
|
|
9
|
+
Two halves, one package:
|
|
11
10
|
|
|
12
|
-
|
|
11
|
+
- **The contract + cloud providers.** `@inbrowser/model`
|
|
12
|
+
defines `ModelClient` / `ModelRequest` / `ModelEvent`. The cloud
|
|
13
|
+
providers (`geminiModelClient`, `openrouterModelClient`,
|
|
14
|
+
`requestyModelClient`, `anthropicModelClient`, `openaiCompatModelClient`,
|
|
15
|
+
`ollamaModelClient`, `llamaServerModelClient`, `claudeCliModelClient`,
|
|
16
|
+
`claudeCodeModelClient`) are factories that each return a `ModelClient`.
|
|
17
|
+
`withRetry` decorates one.
|
|
18
|
+
- **The on-device engine.** `createEngine` loads ONNX models in the
|
|
19
|
+
browser via `@huggingface/transformers` + ONNX Runtime Web (WebGPU /
|
|
20
|
+
WASM) and exposes them behind a narrow `Engine` surface that streams
|
|
21
|
+
`EngineEvent`s.
|
|
22
|
+
|
|
23
|
+
> **Status.** Contract + cloud providers are the live integration path:
|
|
24
|
+
> relay and agent both consume a `ModelClient`. `createEngine` loads a
|
|
25
|
+
> model through `@huggingface/transformers` and `generate()` streams real
|
|
26
|
+
> tokens (the end-to-end load path runs in `examples/local-llm-poc`,
|
|
27
|
+
> headless-verified). The engine is now a `ModelClient` too, via
|
|
28
|
+
> `createEngineModelClient` (root),
|
|
29
|
+
> which widens the engine's `EngineEvent` stream to the contract's
|
|
30
|
+
> `ModelEvent`. The old `@inbrowser/model/relay` and
|
|
31
|
+
> `@inbrowser/model/agent` adapter subpaths have been removed.
|
|
32
|
+
> Known gaps: `GenerateOpts.stop` sequences are accepted but not yet
|
|
33
|
+
> enforced, and the site's in-browser docs-chat path that drives a local
|
|
34
|
+
> engine through the agent is still forthcoming (the adapter exists; the
|
|
35
|
+
> site toggle does not).
|
|
36
|
+
|
|
37
|
+
## A cloud model as a `ModelClient`
|
|
38
|
+
|
|
39
|
+
```ts
|
|
40
|
+
import { geminiModelClient } from '@inbrowser/model';
|
|
41
|
+
|
|
42
|
+
const client = geminiModelClient({ apiKey: process.env.GEMINI_KEY, model: 'gemini-3.5-flash' });
|
|
43
|
+
|
|
44
|
+
for await (const evt of client.chat(
|
|
45
|
+
{
|
|
46
|
+
messages: [{ role: 'user', text: 'Explain WebGPU in one paragraph.' }],
|
|
47
|
+
tools: [],
|
|
48
|
+
toolUseEnabled: false,
|
|
49
|
+
},
|
|
50
|
+
new AbortController().signal,
|
|
51
|
+
)) {
|
|
52
|
+
if (evt.kind === 'text') process.stdout.write(evt.text);
|
|
53
|
+
else if (evt.kind === 'usage') console.error(evt.usage);
|
|
54
|
+
}
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
The turn ends when the iterable returns; a `usage` event (or a terminal
|
|
58
|
+
`error` event) is the last thing emitted. There is no `turn_complete`
|
|
59
|
+
event.
|
|
60
|
+
|
|
61
|
+
## A local OpenAI-compatible server
|
|
62
|
+
|
|
63
|
+
Ollama, llama.cpp's `llama-server`, vLLM, LM Studio, LocalAI, and friends all
|
|
64
|
+
expose the same OpenAI `POST /v1/chat/completions` wire shape. One generic
|
|
65
|
+
factory talks to any of them; two named presets carry the right defaults for
|
|
66
|
+
the common local servers:
|
|
67
|
+
|
|
68
|
+
```ts
|
|
69
|
+
import {
|
|
70
|
+
openaiCompatModelClient, // any OAI server — set baseUrl (or endpoint)
|
|
71
|
+
ollamaModelClient, // preset: defaults to http://localhost:11434, no auth
|
|
72
|
+
llamaServerModelClient, // preset: defaults to http://localhost:8080
|
|
73
|
+
} from '@inbrowser/model';
|
|
74
|
+
|
|
75
|
+
// Generic: point at any OAI-compatible server. `apiKey` becomes a Bearer token.
|
|
76
|
+
const vllm = openaiCompatModelClient({ baseUrl: 'http://gpu.local:8000', model: 'qwen2.5' });
|
|
77
|
+
|
|
78
|
+
// llama.cpp llama-server. `--api-key` is optional; pass it as `apiKey`.
|
|
79
|
+
const llama = llamaServerModelClient({ model: 'qwen2.5-coder', apiKey: process.env.LLAMA_KEY });
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
> **Tool calling on `llama-server` needs `--jinja`.** The server only honors the
|
|
83
|
+
> OpenAI `tools` array when launched with `--jinja` (so it applies a tool-aware
|
|
84
|
+
> chat template); without it, tool calls never stream back. Auth is off unless
|
|
85
|
+
> you start it with `--api-key KEY`.
|
|
86
|
+
|
|
87
|
+
The presets delegate to `openaiCompatModelClient`; reach for the generic factory
|
|
88
|
+
directly for any server without a named preset.
|
|
89
|
+
|
|
90
|
+
## An on-device model via the engine
|
|
13
91
|
|
|
14
92
|
```ts
|
|
15
|
-
import { createEngine } from '@inbrowser/model';
|
|
16
|
-
import { gemma4_E2B } from '@inbrowser/model/presets';
|
|
93
|
+
import { createEngine, gemma4_E2B } from '@inbrowser/model';
|
|
17
94
|
|
|
18
95
|
const engine = createEngine(gemma4_E2B);
|
|
19
96
|
await engine.ensureReady();
|
|
@@ -25,17 +102,48 @@ for await (const evt of engine.generate([
|
|
|
25
102
|
}
|
|
26
103
|
```
|
|
27
104
|
|
|
105
|
+
The engine speaks `EngineEvent` (`token` / `thinking` / `tool_call` /
|
|
106
|
+
`usage` / `error`), not `ModelEvent`. To use it as a `ModelClient` —
|
|
107
|
+
e.g. to hand it to the agent — wrap it with `createEngineModelClient`:
|
|
108
|
+
|
|
109
|
+
```ts
|
|
110
|
+
import { createEngine, createEngineModelClient, smollm2_360m } from '@inbrowser/model';
|
|
111
|
+
|
|
112
|
+
const engine = createEngine(smollm2_360m);
|
|
113
|
+
const client = createEngineModelClient(engine); // a ModelClient
|
|
114
|
+
|
|
115
|
+
for await (const evt of client.chat(
|
|
116
|
+
{ messages: [{ role: 'user', text: 'Hello' }], tools: [], toolUseEnabled: false },
|
|
117
|
+
new AbortController().signal,
|
|
118
|
+
)) {
|
|
119
|
+
if (evt.kind === 'text') process.stdout.write(evt.text);
|
|
120
|
+
}
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
The adapter maps `token` → `text`, folds the engine's terminal `usage`
|
|
124
|
+
into a `ModelEvent` `usage`, passes `tool_call`s through (no signature),
|
|
125
|
+
and drops the engine-only extras (`decodeMs`, `recoverable`). Wiring a
|
|
126
|
+
local model into the docs-chat site through the agent is forthcoming;
|
|
127
|
+
the `createEngineModelClient` building block it needs now exists.
|
|
128
|
+
|
|
28
129
|
## Surface
|
|
29
130
|
|
|
131
|
+
Everything is imported from the package root `@inbrowser/model`.
|
|
132
|
+
|
|
30
133
|
| Export | What it gives you |
|
|
31
134
|
|---|---|
|
|
32
|
-
| `
|
|
135
|
+
| `ModelClient`, `ModelRequest`, `ModelEvent`, `ModelMessage`, `ModelUsage`, `ToolSpec`, `ReasoningEffort` | The shared contract (type-only) |
|
|
136
|
+
| `geminiModelClient`, `openrouterModelClient`, `requestyModelClient`, `anthropicModelClient`, `openaiCompatModelClient`, `ollamaModelClient`, `llamaServerModelClient`, `claudeCliModelClient`, `claudeCodeModelClient` | Cloud + local provider factories; each returns a `ModelClient` |
|
|
137
|
+
| `OpenAiCompatConfig`, `OllamaConfig`, `LlamaServerConfig` | Config shapes for the OpenAI-compatible factory and its local presets |
|
|
138
|
+
| `withRetry(client, opts?)` | Decorator that retries transient upstream errors while nothing has streamed |
|
|
139
|
+
| `CloudProviderConfig`, `ModelClientFactory` | Shared provider config + the factory type the relay routes on |
|
|
140
|
+
| `createEngine(preset)` | Runtime `Engine` — owns load state + decode loop, streams `EngineEvent` |
|
|
141
|
+
| `createEngineModelClient(engine, id?)` | Wraps an `Engine` as a `ModelClient` (maps `EngineEvent` → `ModelEvent`) |
|
|
33
142
|
| `definePreset(p)` | Type-safe identity helper for community presets |
|
|
34
|
-
| `
|
|
35
|
-
|
|
|
36
|
-
|
|
|
37
|
-
|
|
|
38
|
-
| `@inbrowser/model/worker` | `hostEngineInWorker(self)` + `connectWorkerEngine(opts)` |
|
|
143
|
+
| `parseToolCalls`, `splitThinking` | Stream transformers over an `EngineEvent` stream |
|
|
144
|
+
| `ModelPreset`, `Engine`, `EngineEvent`, … | Public engine types |
|
|
145
|
+
| `gemma4_E2B`, `gemma4_E4B`, `qwen2_5_coder_1_5b`, `qwen3_1_7b`, `deepseek_r1_qwen_1_5b`, `smollm2_360m` | The six bundled presets |
|
|
146
|
+
| `hostEngineInWorker(self)`, `connectWorkerEngine(opts)` | Worker host/connect helpers |
|
|
39
147
|
|
|
40
148
|
## Vocabulary anchor
|
|
41
149
|
|
|
@@ -54,10 +162,11 @@ for await (const evt of engine.generate([
|
|
|
54
162
|
- One factory (`createEngine`), many presets. No `createGemmaEngine`.
|
|
55
163
|
- `capabilities` is on the preset, not the engine — interrogable
|
|
56
164
|
pre-load (`gemma4_E2B.capabilities.contextWindow`).
|
|
57
|
-
- `EngineEvent` is narrower than `
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
165
|
+
- `EngineEvent` is narrower than the contract's `ModelEvent` (no
|
|
166
|
+
cost, no `thoughtSignature`). `createEngineModelClient` is the place
|
|
167
|
+
that widens it — translate at that boundary, not in the engine.
|
|
168
|
+
- Worker subpath returns the same `Engine` shape; a consumer cannot
|
|
169
|
+
tell whether it holds a direct or remote engine.
|
|
61
170
|
- Tool calling is not native to Gemma 4. The polyfill (prompt-engineered
|
|
62
171
|
tool calling + structured-output parsing) lives in `@inbrowser/agent`,
|
|
63
172
|
not here.
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `@inbrowser/model/contract` — the one model-call contract for the stack.
|
|
3
|
+
*
|
|
4
|
+
* A `ModelClient` is anything that, given a `ModelRequest`, streams `ModelEvent`s:
|
|
5
|
+
* the cloud providers, the on-device engine, and any adapter all implement it.
|
|
6
|
+
* Both `@inbrowser/relay` (transport) and `@inbrowser/agent` (runtime) consume a
|
|
7
|
+
* `ModelClient`, so this is the single shared LLM contract.
|
|
8
|
+
*
|
|
9
|
+
* This module is TYPE-ONLY (zero runtime imports), so importing the contract
|
|
10
|
+
* never pulls in the on-device engine or `@huggingface/transformers`.
|
|
11
|
+
*/
|
|
12
|
+
/** A turn of the conversation handed to a model. */
|
|
13
|
+
export interface ModelMessage {
|
|
14
|
+
role: 'system' | 'user' | 'assistant' | 'tool';
|
|
15
|
+
text?: string;
|
|
16
|
+
/** Tool calls the assistant made (assistant turns). */
|
|
17
|
+
toolCalls?: {
|
|
18
|
+
id: string;
|
|
19
|
+
name: string;
|
|
20
|
+
args: unknown;
|
|
21
|
+
signature?: string;
|
|
22
|
+
}[];
|
|
23
|
+
/** The call this message answers (tool-result turns). */
|
|
24
|
+
toolCallId?: string;
|
|
25
|
+
/** Tool name (tool-result turns). */
|
|
26
|
+
name?: string;
|
|
27
|
+
/** Serialized tool result (tool-result turns). */
|
|
28
|
+
resultJson?: string;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Tool declaration in the OAI function-calling shape that modern chat templates
|
|
32
|
+
* accept directly (Qwen, DeepSeek, Llama 3.2+, etc.). Cloud providers that speak
|
|
33
|
+
* a different wire shape translate internally.
|
|
34
|
+
*/
|
|
35
|
+
export interface ToolSpec {
|
|
36
|
+
type: 'function';
|
|
37
|
+
function: {
|
|
38
|
+
name: string;
|
|
39
|
+
description: string;
|
|
40
|
+
parameters: unknown;
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
export type ReasoningEffort = 'off' | 'low' | 'medium' | 'high';
|
|
44
|
+
/** A single model call. */
|
|
45
|
+
export interface ModelRequest {
|
|
46
|
+
messages: ModelMessage[];
|
|
47
|
+
tools: ToolSpec[];
|
|
48
|
+
/** Whether tool use is enabled this turn (cheaper than checking tools.length). */
|
|
49
|
+
toolUseEnabled: boolean;
|
|
50
|
+
temperature?: number;
|
|
51
|
+
topP?: number;
|
|
52
|
+
topK?: number;
|
|
53
|
+
reasoningEffort?: ReasoningEffort;
|
|
54
|
+
}
|
|
55
|
+
/** Token + cost accounting for one turn. */
|
|
56
|
+
export interface ModelUsage {
|
|
57
|
+
promptTokens: number;
|
|
58
|
+
outputTokens: number;
|
|
59
|
+
cachedTokens?: number;
|
|
60
|
+
/** Reasoning tokens, when a provider reports them. */
|
|
61
|
+
reasoningTokens?: number;
|
|
62
|
+
/** Real dollar cost, when a provider reports it (e.g. OpenRouter). */
|
|
63
|
+
costUsd?: number;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* One streamed item from a model call.
|
|
67
|
+
*
|
|
68
|
+
* The turn ends when the async iterable returns. On a normal end a `usage` event
|
|
69
|
+
* MUST be emitted before the return (it carries the final accounting); there is
|
|
70
|
+
* no separate terminal event. The exception is `error`, which is itself terminal:
|
|
71
|
+
* after an `error` event the iterable returns with no `usage` event. Consumers
|
|
72
|
+
* can therefore rely on exactly one of {a `usage` event, an `error` event} per
|
|
73
|
+
* turn.
|
|
74
|
+
*/
|
|
75
|
+
export type ModelEvent = {
|
|
76
|
+
kind: 'text';
|
|
77
|
+
text: string;
|
|
78
|
+
} | {
|
|
79
|
+
kind: 'thinking';
|
|
80
|
+
text: string;
|
|
81
|
+
} | {
|
|
82
|
+
kind: 'tool_call';
|
|
83
|
+
id: string;
|
|
84
|
+
name: string;
|
|
85
|
+
args: unknown;
|
|
86
|
+
signature?: string;
|
|
87
|
+
} | {
|
|
88
|
+
kind: 'usage';
|
|
89
|
+
usage: ModelUsage;
|
|
90
|
+
} | {
|
|
91
|
+
kind: 'error';
|
|
92
|
+
message: string;
|
|
93
|
+
};
|
|
94
|
+
/**
|
|
95
|
+
* The one model-call contract. Implemented by the cloud providers and the
|
|
96
|
+
* on-device engine; consumed by the relay (transport) and the agent (runtime).
|
|
97
|
+
*/
|
|
98
|
+
export interface ModelClient {
|
|
99
|
+
/** Stable id for metrics + provenance, e.g. `gemini:gemini-3.5-flash`. */
|
|
100
|
+
readonly id: string;
|
|
101
|
+
readonly supportsTools: boolean;
|
|
102
|
+
chat(req: ModelRequest, signal: AbortSignal): AsyncIterable<ModelEvent>;
|
|
103
|
+
}
|
|
104
|
+
//# sourceMappingURL=contract.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"contract.d.ts","sourceRoot":"","sources":["../src/contract.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,oDAAoD;AACpD,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,GAAG,MAAM,CAAC;IAC/C,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,uDAAuD;IACvD,SAAS,CAAC,EAAE;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,OAAO,CAAC;QAAC,SAAS,CAAC,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;IAC9E,yDAAyD;IACzD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,qCAAqC;IACrC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,kDAAkD;IAClD,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;;;GAIG;AACH,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,UAAU,CAAC;IACjB,QAAQ,EAAE;QACR,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,CAAC;QACpB,UAAU,EAAE,OAAO,CAAC;KACrB,CAAC;CACH;AAED,MAAM,MAAM,eAAe,GAAG,KAAK,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AAEhE,2BAA2B;AAC3B,MAAM,WAAW,YAAY;IAC3B,QAAQ,EAAE,YAAY,EAAE,CAAC;IACzB,KAAK,EAAE,QAAQ,EAAE,CAAC;IAClB,kFAAkF;IAClF,cAAc,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,eAAe,CAAC,EAAE,eAAe,CAAC;CACnC;AAED,4CAA4C;AAC5C,MAAM,WAAW,UAAU;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,sDAAsD;IACtD,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,sEAAsE;IACtE,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;;;;;;;;GASG;AACH,MAAM,MAAM,UAAU,GAClB;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,GAC9B;IAAE,IAAI,EAAE,UAAU,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,GAClC;IAAE,IAAI,EAAE,WAAW,CAAC;IAAC,EAAE,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,OAAO,CAAC;IAAC,SAAS,CAAC,EAAE,MAAM,CAAA;CAAE,GAClF;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,KAAK,EAAE,UAAU,CAAA;CAAE,GACpC;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC;AAEvC;;;GAGG;AACH,MAAM,WAAW,WAAW;IAC1B,0EAA0E;IAC1E,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,aAAa,EAAE,OAAO,CAAC;IAChC,IAAI,CAAC,GAAG,EAAE,YAAY,EAAE,MAAM,EAAE,WAAW,GAAG,aAAa,CAAC,UAAU,CAAC,CAAC;CACzE"}
|
package/dist/contract.js
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `@inbrowser/model/contract` — the one model-call contract for the stack.
|
|
3
|
+
*
|
|
4
|
+
* A `ModelClient` is anything that, given a `ModelRequest`, streams `ModelEvent`s:
|
|
5
|
+
* the cloud providers, the on-device engine, and any adapter all implement it.
|
|
6
|
+
* Both `@inbrowser/relay` (transport) and `@inbrowser/agent` (runtime) consume a
|
|
7
|
+
* `ModelClient`, so this is the single shared LLM contract.
|
|
8
|
+
*
|
|
9
|
+
* This module is TYPE-ONLY (zero runtime imports), so importing the contract
|
|
10
|
+
* never pulls in the on-device engine or `@huggingface/transformers`.
|
|
11
|
+
*/
|
|
12
|
+
export {};
|
|
13
|
+
//# sourceMappingURL=contract.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"contract.js","sourceRoot":"","sources":["../src/contract.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG"}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `createEngineModelClient` — wraps an on-device `Engine` as a `ModelClient`.
|
|
3
|
+
*
|
|
4
|
+
* This is the adapter that lets the on-device engine plug into the same
|
|
5
|
+
* `ModelClient` contract the cloud providers (and the relay + agent) speak.
|
|
6
|
+
* Without it the engine can only be driven directly via its `EngineEvent`
|
|
7
|
+
* stream; with it the engine is just another `ModelClient` the agent/relay
|
|
8
|
+
* can route to.
|
|
9
|
+
*
|
|
10
|
+
* The mapping is deliberately lossless in the directions that matter and
|
|
11
|
+
* drops the cloud-irrelevant engine extras:
|
|
12
|
+
*
|
|
13
|
+
* - `EngineEvent.token` → `{ kind: 'text', text }`
|
|
14
|
+
* - `EngineEvent.thinking` → `{ kind: 'thinking', text }`
|
|
15
|
+
* - `EngineEvent.tool_call` → `{ kind: 'tool_call', id, name, args }`
|
|
16
|
+
* (the engine emits no signature — omitted)
|
|
17
|
+
* - `EngineEvent.usage` → `{ kind: 'usage', usage: { promptTokens,
|
|
18
|
+
* outputTokens } }` (`decodeMs` is dropped)
|
|
19
|
+
* - `EngineEvent.error` → `{ kind: 'error', message }`
|
|
20
|
+
* (`recoverable` is dropped)
|
|
21
|
+
*
|
|
22
|
+
* The engine already emits exactly one terminal `usage` (success) or `error`
|
|
23
|
+
* (failure) before its stream returns, so the contract's "exactly one of
|
|
24
|
+
* {usage, error} per turn" invariant carries straight through — this adapter
|
|
25
|
+
* synthesizes nothing.
|
|
26
|
+
*
|
|
27
|
+
* This module has runtime imports (it constructs a `ModelClient` at runtime
|
|
28
|
+
* and imports engine types), so it lives on the engine surface, NOT in the
|
|
29
|
+
* type-only `./contract` module.
|
|
30
|
+
*/
|
|
31
|
+
import type { ModelClient } from './contract.js';
|
|
32
|
+
import type { Engine } from './types.js';
|
|
33
|
+
/**
|
|
34
|
+
* Wrap an `Engine` as a `ModelClient`.
|
|
35
|
+
*
|
|
36
|
+
* @param engine The on-device engine to drive.
|
|
37
|
+
* @param id Stable id for metrics + provenance. Defaults to
|
|
38
|
+
* `local:${engine.model.modelId}` when the engine exposes a model id,
|
|
39
|
+
* else `'local'`. The engine has no preset id of its own — `engine.model`
|
|
40
|
+
* is a bare `ModelRef` (HF Hub `modelId`), which is the most stable handle
|
|
41
|
+
* available.
|
|
42
|
+
*/
|
|
43
|
+
export declare function createEngineModelClient(engine: Engine, id?: string): ModelClient;
|
|
44
|
+
//# sourceMappingURL=engine-client.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"engine-client.d.ts","sourceRoot":"","sources":["../src/engine-client.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AAEH,OAAO,KAAK,EAAE,WAAW,EAA0C,MAAM,eAAe,CAAC;AACzF,OAAO,KAAK,EAAE,MAAM,EAAiB,MAAM,YAAY,CAAC;AAExD;;;;;;;;;GASG;AACH,wBAAgB,uBAAuB,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,GAAG,WAAW,CAkBhF"}
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `createEngineModelClient` — wraps an on-device `Engine` as a `ModelClient`.
|
|
3
|
+
*
|
|
4
|
+
* This is the adapter that lets the on-device engine plug into the same
|
|
5
|
+
* `ModelClient` contract the cloud providers (and the relay + agent) speak.
|
|
6
|
+
* Without it the engine can only be driven directly via its `EngineEvent`
|
|
7
|
+
* stream; with it the engine is just another `ModelClient` the agent/relay
|
|
8
|
+
* can route to.
|
|
9
|
+
*
|
|
10
|
+
* The mapping is deliberately lossless in the directions that matter and
|
|
11
|
+
* drops the cloud-irrelevant engine extras:
|
|
12
|
+
*
|
|
13
|
+
* - `EngineEvent.token` → `{ kind: 'text', text }`
|
|
14
|
+
* - `EngineEvent.thinking` → `{ kind: 'thinking', text }`
|
|
15
|
+
* - `EngineEvent.tool_call` → `{ kind: 'tool_call', id, name, args }`
|
|
16
|
+
* (the engine emits no signature — omitted)
|
|
17
|
+
* - `EngineEvent.usage` → `{ kind: 'usage', usage: { promptTokens,
|
|
18
|
+
* outputTokens } }` (`decodeMs` is dropped)
|
|
19
|
+
* - `EngineEvent.error` → `{ kind: 'error', message }`
|
|
20
|
+
* (`recoverable` is dropped)
|
|
21
|
+
*
|
|
22
|
+
* The engine already emits exactly one terminal `usage` (success) or `error`
|
|
23
|
+
* (failure) before its stream returns, so the contract's "exactly one of
|
|
24
|
+
* {usage, error} per turn" invariant carries straight through — this adapter
|
|
25
|
+
* synthesizes nothing.
|
|
26
|
+
*
|
|
27
|
+
* This module has runtime imports (it constructs a `ModelClient` at runtime
|
|
28
|
+
* and imports engine types), so it lives on the engine surface, NOT in the
|
|
29
|
+
* type-only `./contract` module.
|
|
30
|
+
*/
|
|
31
|
+
/**
|
|
32
|
+
* Wrap an `Engine` as a `ModelClient`.
|
|
33
|
+
*
|
|
34
|
+
* @param engine The on-device engine to drive.
|
|
35
|
+
* @param id Stable id for metrics + provenance. Defaults to
|
|
36
|
+
* `local:${engine.model.modelId}` when the engine exposes a model id,
|
|
37
|
+
* else `'local'`. The engine has no preset id of its own — `engine.model`
|
|
38
|
+
* is a bare `ModelRef` (HF Hub `modelId`), which is the most stable handle
|
|
39
|
+
* available.
|
|
40
|
+
*/
|
|
41
|
+
export function createEngineModelClient(engine, id) {
|
|
42
|
+
const resolvedId = id ?? (engine.model?.modelId ? `local:${engine.model.modelId}` : 'local');
|
|
43
|
+
return {
|
|
44
|
+
id: resolvedId,
|
|
45
|
+
supportsTools: engine.capabilities.supportsTools,
|
|
46
|
+
chat(req, signal) {
|
|
47
|
+
const engineMessages = toEngineMessages(req.messages);
|
|
48
|
+
const stream = engine.generate(engineMessages, {
|
|
49
|
+
tools: req.toolUseEnabled ? req.tools : undefined,
|
|
50
|
+
temperature: req.temperature,
|
|
51
|
+
topP: req.topP,
|
|
52
|
+
topK: req.topK,
|
|
53
|
+
signal,
|
|
54
|
+
});
|
|
55
|
+
return mapEvents(stream);
|
|
56
|
+
},
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Flatten the contract's `ModelMessage[]` into the engine's toolless
|
|
61
|
+
* `EngineMessage[]`. `EngineMessage` has no tool round-trip fields (role is
|
|
62
|
+
* `system | user | assistant`, plus `text`), so two shapes the engine can't
|
|
63
|
+
* represent are flattened into plain text it can still read for grounding:
|
|
64
|
+
*
|
|
65
|
+
* - A `role: 'tool'` result becomes a `user` line:
|
|
66
|
+
* `Tool ${name} result: ${resultJson}`.
|
|
67
|
+
* - An `assistant` turn carrying `toolCalls` keeps its text (if any) and
|
|
68
|
+
* appends a `Tool call: ${name}(${args})` line per call, so the call the
|
|
69
|
+
* assistant made survives into the prompt rather than being silently lost.
|
|
70
|
+
*
|
|
71
|
+
* For the retrieval strategy these are just system/user messages and pass
|
|
72
|
+
* straight through; the flattening exists so the general case is lossless.
|
|
73
|
+
*/
|
|
74
|
+
function toEngineMessages(messages) {
|
|
75
|
+
const out = [];
|
|
76
|
+
for (const m of messages) {
|
|
77
|
+
if (m.role === 'tool') {
|
|
78
|
+
const name = m.name ?? 'tool';
|
|
79
|
+
const body = m.resultJson ?? m.text ?? '';
|
|
80
|
+
out.push({ role: 'user', text: `Tool ${name} result: ${body}` });
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
83
|
+
if (m.role === 'assistant' && m.toolCalls && m.toolCalls.length > 0) {
|
|
84
|
+
const lines = [];
|
|
85
|
+
if (m.text)
|
|
86
|
+
lines.push(m.text);
|
|
87
|
+
for (const call of m.toolCalls) {
|
|
88
|
+
lines.push(`Tool call: ${call.name}(${stringifyArgs(call.args)})`);
|
|
89
|
+
}
|
|
90
|
+
out.push({ role: 'assistant', text: lines.join('\n') });
|
|
91
|
+
continue;
|
|
92
|
+
}
|
|
93
|
+
// system / user / plain assistant → role + text straight through.
|
|
94
|
+
out.push({ role: m.role, text: m.text ?? '' });
|
|
95
|
+
}
|
|
96
|
+
return out;
|
|
97
|
+
}
|
|
98
|
+
function stringifyArgs(args) {
|
|
99
|
+
if (typeof args === 'string')
|
|
100
|
+
return args;
|
|
101
|
+
try {
|
|
102
|
+
return JSON.stringify(args ?? {});
|
|
103
|
+
}
|
|
104
|
+
catch {
|
|
105
|
+
return String(args);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
/** Translate the engine's `EngineEvent` stream into `ModelEvent`s. */
|
|
109
|
+
async function* mapEvents(source) {
|
|
110
|
+
for await (const ev of source) {
|
|
111
|
+
switch (ev.kind) {
|
|
112
|
+
case 'token':
|
|
113
|
+
yield { kind: 'text', text: ev.text };
|
|
114
|
+
break;
|
|
115
|
+
case 'thinking':
|
|
116
|
+
yield { kind: 'thinking', text: ev.text };
|
|
117
|
+
break;
|
|
118
|
+
case 'tool_call':
|
|
119
|
+
// The engine emits no signature; omit it.
|
|
120
|
+
yield { kind: 'tool_call', id: ev.id, name: ev.name, args: ev.args };
|
|
121
|
+
break;
|
|
122
|
+
case 'usage':
|
|
123
|
+
// Drop `decodeMs` — not part of the cloud `ModelUsage` shape.
|
|
124
|
+
yield {
|
|
125
|
+
kind: 'usage',
|
|
126
|
+
usage: { promptTokens: ev.promptTokens, outputTokens: ev.outputTokens },
|
|
127
|
+
};
|
|
128
|
+
break;
|
|
129
|
+
case 'error':
|
|
130
|
+
// Drop `recoverable` — the contract's error event is terminal + flat.
|
|
131
|
+
yield { kind: 'error', message: ev.message };
|
|
132
|
+
break;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
//# sourceMappingURL=engine-client.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"engine-client.js","sourceRoot":"","sources":["../src/engine-client.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AAKH;;;;;;;;;GASG;AACH,MAAM,UAAU,uBAAuB,CAAC,MAAc,EAAE,EAAW;IACjE,MAAM,UAAU,GAAG,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC,CAAC,SAAS,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;IAE7F,OAAO;QACL,EAAE,EAAE,UAAU;QACd,aAAa,EAAE,MAAM,CAAC,YAAY,CAAC,aAAa;QAChD,IAAI,CAAC,GAAiB,EAAE,MAAmB;YACzC,MAAM,cAAc,GAAG,gBAAgB,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACtD,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,cAAc,EAAE;gBAC7C,KAAK,EAAE,GAAG,CAAC,cAAc,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;gBACjD,WAAW,EAAE,GAAG,CAAC,WAAW;gBAC5B,IAAI,EAAE,GAAG,CAAC,IAAI;gBACd,IAAI,EAAE,GAAG,CAAC,IAAI;gBACd,MAAM;aACP,CAAC,CAAC;YACH,OAAO,SAAS,CAAC,MAAM,CAAC,CAAC;QAC3B,CAAC;KACF,CAAC;AACJ,CAAC;AAED;;;;;;;;;;;;;;GAcG;AACH,SAAS,gBAAgB,CAAC,QAAqC;IAC7D,MAAM,GAAG,GAAoB,EAAE,CAAC;IAChC,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,IAAI,CAAC,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;YACtB,MAAM,IAAI,GAAG,CAAC,CAAC,IAAI,IAAI,MAAM,CAAC;YAC9B,MAAM,IAAI,GAAG,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC;YAC1C,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,IAAI,YAAY,IAAI,EAAE,EAAE,CAAC,CAAC;YACjE,SAAS;QACX,CAAC;QAED,IAAI,CAAC,CAAC,IAAI,KAAK,WAAW,IAAI,CAAC,CAAC,SAAS,IAAI,CAAC,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpE,MAAM,KAAK,GAAa,EAAE,CAAC;YAC3B,IAAI,CAAC,CAAC,IAAI;gBAAE,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YAC/B,KAAK,MAAM,IAAI,IAAI,CAAC,CAAC,SAAS,EAAE,CAAC;gBAC/B,KAAK,CAAC,IAAI,CAAC,cAAc,IAAI,CAAC,IAAI,IAAI,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACrE,CAAC;YACD,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACxD,SAAS;QACX,CAAC;QAED,kEAAkE;QAClE,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,IAAI,EAAE,EAAE,CAAC,CAAC;IACjD,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,aAAa,CAAC,IAAa;IAClC,IAAI,OAAO,IAAI,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAC;IAC1C,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,SAAS,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;IACpC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC;IACtB,CAAC;AACH,CAAC;AAED,sEAAsE;AACtE,KAAK,SAAS,CAAC,CAAC,SAAS,CACvB,MAAuD;IAEvD,IAAI,KAAK,EAAE,MAAM,EAAE,IAAI,MAAM,EAAE,CAAC;QAC9B,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAC;YAChB,KAAK,OAAO;gBACV,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,EAAE,CAAC,IAAI,EAAE,CAAC;gBACtC,MAAM;YACR,KAAK,UAAU;gBACb,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,EAAE,CAAC,IAAI,EAAE,CAAC;gBAC1C,MAAM;YACR,KAAK,WAAW;gBACd,0CAA0C;gBAC1C,MAAM,EAAE,IAAI,EAAE,WAAW,EAAE,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,IAAI,EAAE,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,EAAE,CAAC,IAAI,EAAE,CAAC;gBACrE,MAAM;YACR,KAAK,OAAO;gBACV,8DAA8D;gBAC9D,MAAM;oBACJ,IAAI,EAAE,OAAO;oBACb,KAAK,EAAE,EAAE,YAAY,EAAE,EAAE,CAAC,YAAY,EAAE,YAAY,EAAE,EAAE,CAAC,YAAY,EAAE;iBACxE,CAAC;gBACF,MAAM;YACR,KAAK,OAAO;gBACV,sEAAsE;gBACtE,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,EAAE,CAAC,OAAO,EAAE,CAAC;gBAC7C,MAAM;QACV,CAAC;IACH,CAAC;AACH,CAAC"}
|
package/dist/engine.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"engine.d.ts","sourceRoot":"","sources":["../src/engine.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;
|
|
1
|
+
{"version":3,"file":"engine.d.ts","sourceRoot":"","sources":["../src/engine.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAeH,OAAO,KAAK,EAEV,gBAAgB,EAChB,MAAM,EAUP,MAAM,YAAY,CAAC;AAEpB,wBAAgB,YAAY,CAAC,IAAI,EAAE,gBAAgB,GAAG,MAAM,CA4S3D;AAED;;;;GAIG;AACH,wBAAgB,YAAY,CAAC,CAAC,SAAS,OAAO,YAAY,EAAE,WAAW,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,CAEhF"}
|
package/dist/engine.js
CHANGED
|
@@ -24,8 +24,15 @@
|
|
|
24
24
|
* honored — needs a `StoppingCriteria` adapter. Tracked as a
|
|
25
25
|
* follow-up.
|
|
26
26
|
*/
|
|
27
|
-
import { AutoModelForCausalLM, AutoTokenizer, TextStreamer, env as transformersEnv, } from '@huggingface/transformers';
|
|
28
27
|
import { parseToolCalls } from './parse-tool-calls.js';
|
|
28
|
+
// Lazy-load the heavy transformers runtime so importing @inbrowser/model (e.g.
|
|
29
|
+
// for a cloud provider) never statically bundles ONNX/WASM. The chunk is
|
|
30
|
+
// fetched only when an engine actually loads or generates.
|
|
31
|
+
let transformersModule = null;
|
|
32
|
+
function loadTransformers() {
|
|
33
|
+
transformersModule ??= import('@huggingface/transformers');
|
|
34
|
+
return transformersModule;
|
|
35
|
+
}
|
|
29
36
|
export function createEngine(opts) {
|
|
30
37
|
const model = opts.model;
|
|
31
38
|
const capabilities = opts.capabilities;
|
|
@@ -79,16 +86,17 @@ export function createEngine(opts) {
|
|
|
79
86
|
if (loadPromise)
|
|
80
87
|
return loadPromise;
|
|
81
88
|
setState('loading');
|
|
82
|
-
// `weightsBaseUrl` overrides the HF Hub origin for self-hosted
|
|
83
|
-
// mirrors. Transformers.js exposes this as the global
|
|
84
|
-
// `env.remoteHost`; we set it process-wide before load. Documented
|
|
85
|
-
// limitation: with multiple engines spanning different remotes,
|
|
86
|
-
// the last one to load wins. Realistic use case (one app, one
|
|
87
|
-
// mirror) is unaffected.
|
|
88
|
-
if (opts.weightsBaseUrl) {
|
|
89
|
-
transformersEnv.remoteHost = opts.weightsBaseUrl;
|
|
90
|
-
}
|
|
91
89
|
loadPromise = (async () => {
|
|
90
|
+
const { AutoTokenizer, AutoModelForCausalLM, env: transformersEnv, } = await loadTransformers();
|
|
91
|
+
// `weightsBaseUrl` overrides the HF Hub origin for self-hosted
|
|
92
|
+
// mirrors. Transformers.js exposes this as the global
|
|
93
|
+
// `env.remoteHost`; we set it process-wide before load. Documented
|
|
94
|
+
// limitation: with multiple engines spanning different remotes,
|
|
95
|
+
// the last one to load wins. Realistic use case (one app, one
|
|
96
|
+
// mirror) is unaffected.
|
|
97
|
+
if (opts.weightsBaseUrl) {
|
|
98
|
+
transformersEnv.remoteHost = opts.weightsBaseUrl;
|
|
99
|
+
}
|
|
92
100
|
// AutoTokenizer (not AutoProcessor): text-only models like
|
|
93
101
|
// SmolLM2 ship no preprocessor_config.json and AutoProcessor
|
|
94
102
|
// 404s on them. Multimodal models (e.g., Gemma 4 audio) still
|
|
@@ -200,6 +208,8 @@ export function createEngine(opts) {
|
|
|
200
208
|
// channels but inconsistent emission (Gemma 4 family — see
|
|
201
209
|
// presets.ts) deliberately omit `thinkingTags` to take this path.
|
|
202
210
|
const preserveSpecialTokens = useThinking && capabilities.thinkingTags !== undefined;
|
|
211
|
+
// Cached after `ensureReady` above already loaded it — resolves instantly.
|
|
212
|
+
const { TextStreamer } = await loadTransformers();
|
|
203
213
|
const streamer = new TextStreamer(tokenizer, {
|
|
204
214
|
skip_prompt: true,
|
|
205
215
|
...(preserveSpecialTokens ? { skip_special_tokens: false } : {}),
|