@inbrowser/model 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +50 -0
- package/LICENSE +21 -0
- package/README.md +63 -0
- package/dist/adapters/agent.d.ts +19 -0
- package/dist/adapters/agent.d.ts.map +1 -0
- package/dist/adapters/agent.js +96 -0
- package/dist/adapters/agent.js.map +1 -0
- package/dist/adapters/relay.d.ts +17 -0
- package/dist/adapters/relay.d.ts.map +1 -0
- package/dist/adapters/relay.js +90 -0
- package/dist/adapters/relay.js.map +1 -0
- package/dist/engine.d.ts +35 -0
- package/dist/engine.d.ts.map +1 -0
- package/dist/engine.js +353 -0
- package/dist/engine.js.map +1 -0
- package/dist/index.d.ts +21 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +20 -0
- package/dist/index.js.map +1 -0
- package/dist/parse-tool-calls.d.ts +49 -0
- package/dist/parse-tool-calls.d.ts.map +1 -0
- package/dist/parse-tool-calls.js +115 -0
- package/dist/parse-tool-calls.js.map +1 -0
- package/dist/presets.d.ts +93 -0
- package/dist/presets.d.ts.map +1 -0
- package/dist/presets.js +191 -0
- package/dist/presets.js.map +1 -0
- package/dist/think.d.ts +57 -0
- package/dist/think.d.ts.map +1 -0
- package/dist/think.js +138 -0
- package/dist/think.js.map +1 -0
- package/dist/types.d.ts +291 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +17 -0
- package/dist/types.js.map +1 -0
- package/dist/worker.d.ts +62 -0
- package/dist/worker.d.ts.map +1 -0
- package/dist/worker.js +493 -0
- package/dist/worker.js.map +1 -0
- package/package.json +65 -0
package/AGENTS.md
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# Agent context for `@inbrowser/model`
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
|
|
5
|
+
On-device LLM inference. Wraps `@huggingface/transformers` behind a
|
|
6
|
+
narrow `Engine` surface so a local Gemma 4 model is a drop-in
|
|
7
|
+
replacement for a cloud provider when consumed through the adapter
|
|
8
|
+
subpaths.
|
|
9
|
+
|
|
10
|
+
## Layering invariants
|
|
11
|
+
|
|
12
|
+
- `src/types.ts` is the canonical type surface. Every other file in
|
|
13
|
+
the package imports types from here.
|
|
14
|
+
- `src/engine.ts` is the only module that holds runtime state.
|
|
15
|
+
- `src/adapters/relay.ts` is the only place that imports from
|
|
16
|
+
`@inbrowser/relay`. `src/adapters/agent.ts` is the only place that
|
|
17
|
+
imports from `@inbrowser/agent`. The root barrel must not.
|
|
18
|
+
- `src/worker.ts` returns the same `Engine` shape `createEngine`
|
|
19
|
+
returns. Consumers must not need to know which side of `postMessage`
|
|
20
|
+
the engine lives on.
|
|
21
|
+
|
|
22
|
+
## Vocabulary
|
|
23
|
+
|
|
24
|
+
Use the precise terms — they show up in types, comments, and PRs:
|
|
25
|
+
|
|
26
|
+
- **ModelRef** (locator) vs **ModelPreset** (locator + static config)
|
|
27
|
+
vs **Engine** (loaded runtime).
|
|
28
|
+
- **Backend**: WebGPU / WASM, the ORT execution provider. Not "GPU
|
|
29
|
+
mode."
|
|
30
|
+
- **`dtype`**: precision selection. Not "model size."
|
|
31
|
+
- **Cold start** = fetch + init + warmup. Three distinct phases,
|
|
32
|
+
each with its own `LoadProgress` variant.
|
|
33
|
+
|
|
34
|
+
## Don't
|
|
35
|
+
|
|
36
|
+
- Don't add `createGemmaEngine` / `createPhi3Engine` / sugar
|
|
37
|
+
factories. New models are new `ModelPreset` entries.
|
|
38
|
+
- Don't put tool-calling polyfill logic here. It belongs in
|
|
39
|
+
`@inbrowser/agent` — it's a property of the agent runtime, not the
|
|
40
|
+
model.
|
|
41
|
+
- Don't widen `EngineEvent` with cloud-only concepts (cost,
|
|
42
|
+
thoughtSignature). Translate at the adapter boundary.
|
|
43
|
+
- Don't make `@huggingface/transformers` a regular dependency. It's
|
|
44
|
+
a peer dep; consumers control the version.
|
|
45
|
+
|
|
46
|
+
## Status
|
|
47
|
+
|
|
48
|
+
POC. Types + adapter surface + worker frames are stable. The
|
|
49
|
+
`@huggingface/transformers` wiring inside `createEngine` is the
|
|
50
|
+
next slice.
|
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 David East
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# @inbrowser/model
|
|
2
|
+
|
|
3
|
+
On-device LLM engine. Loads ONNX models in the browser via
|
|
4
|
+
`@huggingface/transformers` + ONNX Runtime Web (WebGPU / WASM), and
|
|
5
|
+
exposes them behind a narrow `Engine` surface.
|
|
6
|
+
|
|
7
|
+
> **Status: POC stub.** Types, presets, adapter surface, and worker
|
|
8
|
+
> RPC frames are in place. The `@huggingface/transformers` wiring
|
|
9
|
+
> inside `createEngine` is not yet implemented — `generate()` yields
|
|
10
|
+
> an `error` event today. See `src/engine.ts`.
|
|
11
|
+
|
|
12
|
+
## One-liner
|
|
13
|
+
|
|
14
|
+
```ts
|
|
15
|
+
import { createEngine } from '@inbrowser/model';
|
|
16
|
+
import { gemma4_E2B } from '@inbrowser/model/presets';
|
|
17
|
+
|
|
18
|
+
const engine = createEngine(gemma4_E2B);
|
|
19
|
+
await engine.ensureReady();
|
|
20
|
+
|
|
21
|
+
for await (const evt of engine.generate([
|
|
22
|
+
{ role: 'user', text: 'Explain WebGPU in one paragraph.' },
|
|
23
|
+
])) {
|
|
24
|
+
if (evt.kind === 'token') process.stdout.write(evt.text);
|
|
25
|
+
}
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Surface
|
|
29
|
+
|
|
30
|
+
| Export | What it gives you |
|
|
31
|
+
|---|---|
|
|
32
|
+
| `createEngine(preset)` | Runtime `Engine` — owns load state + decode loop |
|
|
33
|
+
| `definePreset(p)` | Type-safe identity helper for community presets |
|
|
34
|
+
| `ModelPreset`, `Engine`, `EngineEvent`, … | Public types |
|
|
35
|
+
| `@inbrowser/model/presets` | `gemma4_E2B`, `gemma4_E4B` |
|
|
36
|
+
| `@inbrowser/model/relay` | `createLocalInferenceProvider(engine)` → relay `InferenceProvider` |
|
|
37
|
+
| `@inbrowser/model/agent` | `createLocalLlmClient(engine, id)` → agent `LlmClient` |
|
|
38
|
+
| `@inbrowser/model/worker` | `hostEngineInWorker(self)` + `connectWorkerEngine(opts)` |
|
|
39
|
+
|
|
40
|
+
## Vocabulary anchor
|
|
41
|
+
|
|
42
|
+
- **ONNX** — model file format. **ONNX Runtime Web** is the execution
|
|
43
|
+
engine (`onnxruntime-web`); WebGPU and WASM are its **backends**.
|
|
44
|
+
- **`dtype`** — weight/activation precision selection (`q4f16`, `q8`,
|
|
45
|
+
`fp16`, `fp32`). Distinct from parameter count.
|
|
46
|
+
- **`ModelRef`** — bare locator (HF Hub `modelId` + `revision`).
|
|
47
|
+
- **`ModelPreset`** — locator + dtype + backend + capabilities. Static.
|
|
48
|
+
- **`Engine`** — runtime object owning a loaded model. Dynamic.
|
|
49
|
+
- **Cold start** — fetch + init + warmup. **Warm decode** — subsequent
|
|
50
|
+
calls on a ready engine.
|
|
51
|
+
|
|
52
|
+
## Design notes
|
|
53
|
+
|
|
54
|
+
- One factory (`createEngine`), many presets. No `createGemmaEngine`.
|
|
55
|
+
- `capabilities` is on the preset, not the engine — interrogable
|
|
56
|
+
pre-load (`gemma4_E2B.capabilities.contextWindow`).
|
|
57
|
+
- `EngineEvent` is narrower than `InferenceEvent`/`ChatEvent`.
|
|
58
|
+
Adapters widen.
|
|
59
|
+
- Worker subpath returns the same `Engine` shape; the agent runtime
|
|
60
|
+
cannot tell whether it holds a direct or remote engine.
|
|
61
|
+
- Tool calling is not native to Gemma 4. The polyfill (prompt-engineered
|
|
62
|
+
tool calling + structured-output parsing) lives in `@inbrowser/agent`,
|
|
63
|
+
not here.
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Adapt an on-device `Engine` to `@inbrowser/agent`'s `LlmClient`.
|
|
3
|
+
* The agent runtime then drives a local model identically to a
|
|
4
|
+
* cloud provider — same `chat(req, signal) → AsyncIterable<ChatEvent>`
|
|
5
|
+
* surface.
|
|
6
|
+
*
|
|
7
|
+
* Tool calling: when `req.toolUseEnabled` is true and the engine
|
|
8
|
+
* doesn't natively support tools, the adapter declines (yields a
|
|
9
|
+
* single `error` event). The runtime can layer a tool-use polyfill
|
|
10
|
+
* (`withToolUsePolyfill`) over this client to lift it into a
|
|
11
|
+
* tool-capable one via prompt-engineered tool calling.
|
|
12
|
+
*
|
|
13
|
+
* `@inbrowser/agent` is a peer dep; this subpath is the only point
|
|
14
|
+
* in `@inbrowser/model` that imports from it.
|
|
15
|
+
*/
|
|
16
|
+
import type { LlmClient } from '@inbrowser/agent';
|
|
17
|
+
import type { Engine } from '../types.js';
|
|
18
|
+
export declare function createLocalLlmClient(engine: Engine, id: string): LlmClient;
|
|
19
|
+
//# sourceMappingURL=agent.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agent.d.ts","sourceRoot":"","sources":["../../src/adapters/agent.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAEH,OAAO,KAAK,EAA0B,SAAS,EAAqB,MAAM,kBAAkB,CAAC;AAC7F,OAAO,KAAK,EAAE,MAAM,EAAiB,MAAM,aAAa,CAAC;AAEzD,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,GAAG,SAAS,CAQ1E"}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Adapt an on-device `Engine` to `@inbrowser/agent`'s `LlmClient`.
|
|
3
|
+
* The agent runtime then drives a local model identically to a
|
|
4
|
+
* cloud provider — same `chat(req, signal) → AsyncIterable<ChatEvent>`
|
|
5
|
+
* surface.
|
|
6
|
+
*
|
|
7
|
+
* Tool calling: when `req.toolUseEnabled` is true and the engine
|
|
8
|
+
* doesn't natively support tools, the adapter declines (yields a
|
|
9
|
+
* single `error` event). The runtime can layer a tool-use polyfill
|
|
10
|
+
* (`withToolUsePolyfill`) over this client to lift it into a
|
|
11
|
+
* tool-capable one via prompt-engineered tool calling.
|
|
12
|
+
*
|
|
13
|
+
* `@inbrowser/agent` is a peer dep; this subpath is the only point
|
|
14
|
+
* in `@inbrowser/model` that imports from it.
|
|
15
|
+
*/
|
|
16
|
+
export function createLocalLlmClient(engine, id) {
|
|
17
|
+
return {
|
|
18
|
+
id,
|
|
19
|
+
supportsTools: engine.capabilities.supportsTools,
|
|
20
|
+
chat(req, signal) {
|
|
21
|
+
return drive(engine, req, signal);
|
|
22
|
+
},
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
async function* drive(engine, req, signal) {
|
|
26
|
+
if (req.toolUseEnabled && !engine.capabilities.supportsTools) {
|
|
27
|
+
yield {
|
|
28
|
+
kind: 'error',
|
|
29
|
+
message: 'engine does not natively support tools — pick a tools-capable preset (see capabilities.supportsTools)',
|
|
30
|
+
};
|
|
31
|
+
return;
|
|
32
|
+
}
|
|
33
|
+
const messages = toEngineMessages(req.messages);
|
|
34
|
+
let promptTokens = 0;
|
|
35
|
+
let completionTokens = 0;
|
|
36
|
+
// Forward `req.tools` only when the request opts into tool use.
|
|
37
|
+
// The engine itself gates on `capabilities.supportsTools`; passing
|
|
38
|
+
// tools to a non-tools-capable engine is a no-op.
|
|
39
|
+
const tools = req.toolUseEnabled && req.tools.length > 0
|
|
40
|
+
? req.tools.map((t) => ({
|
|
41
|
+
type: 'function',
|
|
42
|
+
function: { name: t.name, description: t.description, parameters: t.parameters },
|
|
43
|
+
}))
|
|
44
|
+
: undefined;
|
|
45
|
+
for await (const evt of engine.generate(messages, { signal, ...(tools ? { tools } : {}) })) {
|
|
46
|
+
if (evt.kind === 'token') {
|
|
47
|
+
yield { kind: 'text', chunk: evt.text };
|
|
48
|
+
continue;
|
|
49
|
+
}
|
|
50
|
+
if (evt.kind === 'thinking') {
|
|
51
|
+
// ChatEvent has its own 'thinking' kind — pass through. The
|
|
52
|
+
// engine only emits this when the caller wrapped with
|
|
53
|
+
// splitThinking() upstream.
|
|
54
|
+
yield { kind: 'thinking', chunk: evt.text };
|
|
55
|
+
continue;
|
|
56
|
+
}
|
|
57
|
+
if (evt.kind === 'tool_call') {
|
|
58
|
+
yield {
|
|
59
|
+
kind: 'tool_call',
|
|
60
|
+
id: evt.id,
|
|
61
|
+
name: evt.name,
|
|
62
|
+
args: evt.args,
|
|
63
|
+
};
|
|
64
|
+
continue;
|
|
65
|
+
}
|
|
66
|
+
if (evt.kind === 'usage') {
|
|
67
|
+
promptTokens = evt.promptTokens;
|
|
68
|
+
completionTokens = evt.outputTokens;
|
|
69
|
+
continue;
|
|
70
|
+
}
|
|
71
|
+
yield { kind: 'error', message: evt.message };
|
|
72
|
+
return;
|
|
73
|
+
}
|
|
74
|
+
yield {
|
|
75
|
+
kind: 'turn_complete',
|
|
76
|
+
usage: { promptTokens, completionTokens },
|
|
77
|
+
details: { requestedModel: engine.model.modelId },
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
function toEngineMessages(messages) {
|
|
81
|
+
const out = [];
|
|
82
|
+
for (const m of messages) {
|
|
83
|
+
if (m.role === 'tool') {
|
|
84
|
+
out.push({
|
|
85
|
+
role: 'user',
|
|
86
|
+
text: `[tool ${m.name ?? ''} result]\n${m.resultJson ?? ''}`,
|
|
87
|
+
});
|
|
88
|
+
continue;
|
|
89
|
+
}
|
|
90
|
+
if (m.role === 'system' || m.role === 'user' || m.role === 'assistant') {
|
|
91
|
+
out.push({ role: m.role, text: m.text });
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
return out;
|
|
95
|
+
}
|
|
96
|
+
//# sourceMappingURL=agent.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agent.js","sourceRoot":"","sources":["../../src/adapters/agent.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAKH,MAAM,UAAU,oBAAoB,CAAC,MAAc,EAAE,EAAU;IAC7D,OAAO;QACL,EAAE;QACF,aAAa,EAAE,MAAM,CAAC,YAAY,CAAC,aAAa;QAChD,IAAI,CAAC,GAAgB,EAAE,MAAmB;YACxC,OAAO,KAAK,CAAC,MAAM,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QACpC,CAAC;KACF,CAAC;AACJ,CAAC;AAED,KAAK,SAAS,CAAC,CAAC,KAAK,CACnB,MAAc,EACd,GAAgB,EAChB,MAAmB;IAEnB,IAAI,GAAG,CAAC,cAAc,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,aAAa,EAAE,CAAC;QAC7D,MAAM;YACJ,IAAI,EAAE,OAAO;YACb,OAAO,EACL,uGAAuG;SAC1G,CAAC;QACF,OAAO;IACT,CAAC;IAED,MAAM,QAAQ,GAAG,gBAAgB,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IAChD,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,IAAI,gBAAgB,GAAG,CAAC,CAAC;IAEzB,gEAAgE;IAChE,mEAAmE;IACnE,kDAAkD;IAClD,MAAM,KAAK,GACT,GAAG,CAAC,cAAc,IAAI,GAAG,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC;QACxC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACpB,IAAI,EAAE,UAAmB;YACzB,QAAQ,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,WAAW,EAAE,CAAC,CAAC,WAAW,EAAE,UAAU,EAAE,CAAC,CAAC,UAAU,EAAE;SACjF,CAAC,CAAC;QACL,CAAC,CAAC,SAAS,CAAC;IAEhB,IAAI,KAAK,EAAE,MAAM,GAAG,IAAI,MAAM,CAAC,QAAQ,CAAC,QAAQ,EAAE,EAAE,MAAM,EAAE,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC;QAC3F,IAAI,GAAG,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;YACzB,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,GAAG,CAAC,IAAI,EAAE,CAAC;YACxC,SAAS;QACX,CAAC;QACD,IAAI,GAAG,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;YAC5B,4DAA4D;YAC5D,sDAAsD;YACtD,4BAA4B;YAC5B,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,GAAG,CAAC,IAAI,EAAE,CAAC;YAC5C,SAAS;QACX,CAAC;QACD,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;YAC7B,MAAM;gBACJ,IAAI,EAAE,WAAW;gBACjB,EAAE,EAAE,GAAG,CAAC,EAAE;gBACV,IAAI,EAAE,GAAG,CAAC,IAAI;gBACd,IAAI,EAAE,GAAG,CAAC,IAAI;aACf,CAAC;YACF,SAAS;QACX,CAAC;QACD,IAAI,GAAG,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;YACzB,YAAY,GAAG,GAAG,CAAC,YAAY,CAAC;YAChC,gBAAgB,GAAG,GAAG,CAAC,YAAY,CAAC;YACpC,SAAS;QACX,CAAC;QACD,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,GAAG,CAAC,OAAO,EAAE,CAAC;QAC9C,OAAO;IACT,CAAC;IAED,MAAM;QACJ,IAAI,EAAE,eAAe;QACrB,KAAK,EAAE,EAAE,YAAY,EAAE,gBAAgB,EAAE;QACzC,OAAO,EAAE,EAAE,cAAc,EAAE,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE;KAClD,CAAC;AACJ,CAAC;AAED,SAAS,gBAAgB,CAAC,QAA0C;IAClE,MAAM,GAAG,GAAoB,EAAE,CAAC;IAChC,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,IAAI,CAAC,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;YACtB,GAAG,CAAC,IAAI,CAAC;gBACP,IAAI,EAAE,MAAM;gBACZ,IAAI,EAAE,SAAS,CAAC,CAAC,IAAI,IAAI,EAAE,aAAa,CAAC,CAAC,UAAU,IAAI,EAAE,EAAE;aAC7D,CAAC,CAAC;YACH,SAAS;QACX,CAAC;QACD,IAAI,CAAC,CAAC,IAAI,KAAK,QAAQ,IAAI,CAAC,CAAC,IAAI,KAAK,MAAM,IAAI,CAAC,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;YACvE,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QAC3C,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Adapt an on-device `Engine` to `@inbrowser/relay`'s
|
|
3
|
+
* `InferenceProvider`. Lets the relay's existing handlers, durable
|
|
4
|
+
* storage, and SSE wire format treat a local Gemma model
|
|
5
|
+
* indistinguishably from Gemini-over-HTTP.
|
|
6
|
+
*
|
|
7
|
+
* `NormalizedRequest` fields with no on-device analogue (`apiKey`,
|
|
8
|
+
* `provider`, `model` routing) are ignored — the engine is already
|
|
9
|
+
* bound to a single model at construction time.
|
|
10
|
+
*
|
|
11
|
+
* `@inbrowser/relay` is a peer dep; this subpath is the only point
|
|
12
|
+
* in `@inbrowser/model` that imports from it.
|
|
13
|
+
*/
|
|
14
|
+
import type { InferenceProvider } from '@inbrowser/relay';
|
|
15
|
+
import type { Engine } from '../types.js';
|
|
16
|
+
export declare function createLocalInferenceProvider(engine: Engine): InferenceProvider;
|
|
17
|
+
//# sourceMappingURL=relay.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"relay.d.ts","sourceRoot":"","sources":["../../src/adapters/relay.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,KAAK,EAEV,iBAAiB,EAGlB,MAAM,kBAAkB,CAAC;AAC1B,OAAO,KAAK,EAAE,MAAM,EAAiB,MAAM,aAAa,CAAC;AAEzD,wBAAgB,4BAA4B,CAAC,MAAM,EAAE,MAAM,GAAG,iBAAiB,CA2D9E"}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Adapt an on-device `Engine` to `@inbrowser/relay`'s
|
|
3
|
+
* `InferenceProvider`. Lets the relay's existing handlers, durable
|
|
4
|
+
* storage, and SSE wire format treat a local Gemma model
|
|
5
|
+
* indistinguishably from Gemini-over-HTTP.
|
|
6
|
+
*
|
|
7
|
+
* `NormalizedRequest` fields with no on-device analogue (`apiKey`,
|
|
8
|
+
* `provider`, `model` routing) are ignored — the engine is already
|
|
9
|
+
* bound to a single model at construction time.
|
|
10
|
+
*
|
|
11
|
+
* `@inbrowser/relay` is a peer dep; this subpath is the only point
|
|
12
|
+
* in `@inbrowser/model` that imports from it.
|
|
13
|
+
*/
|
|
14
|
+
export function createLocalInferenceProvider(engine) {
|
|
15
|
+
return async function* (req) {
|
|
16
|
+
const messages = toEngineMessages(req.messages);
|
|
17
|
+
const startedAt = performance.now();
|
|
18
|
+
// Forward declared tools to the engine when the request brings
|
|
19
|
+
// any. The engine itself gates emission on the preset's
|
|
20
|
+
// capabilities.supportsTools — passing tools to a non-tools
|
|
21
|
+
// preset is a no-op rather than an error so callers can be
|
|
22
|
+
// backend-agnostic about which preset they bound to.
|
|
23
|
+
const tools = req.tools.length > 0
|
|
24
|
+
? req.tools.map((t) => ({
|
|
25
|
+
type: 'function',
|
|
26
|
+
function: { name: t.name, description: t.description, parameters: t.parameters },
|
|
27
|
+
}))
|
|
28
|
+
: undefined;
|
|
29
|
+
for await (const evt of engine.generate(messages, {
|
|
30
|
+
temperature: req.temperature,
|
|
31
|
+
topP: req.topP,
|
|
32
|
+
topK: req.topK,
|
|
33
|
+
...(tools ? { tools } : {}),
|
|
34
|
+
...(req.signal ? { signal: req.signal } : {}),
|
|
35
|
+
})) {
|
|
36
|
+
if (evt.kind === 'token') {
|
|
37
|
+
yield { kind: 'text', chunk: evt.text };
|
|
38
|
+
continue;
|
|
39
|
+
}
|
|
40
|
+
if (evt.kind === 'thinking') {
|
|
41
|
+
// InferenceEvent has its own 'thinking' kind — pass through.
|
|
42
|
+
// The engine only emits this when the caller wrapped with
|
|
43
|
+
// splitThinking() upstream.
|
|
44
|
+
yield { kind: 'thinking', chunk: evt.text };
|
|
45
|
+
continue;
|
|
46
|
+
}
|
|
47
|
+
if (evt.kind === 'tool_call') {
|
|
48
|
+
yield {
|
|
49
|
+
kind: 'tool_call',
|
|
50
|
+
callId: evt.id,
|
|
51
|
+
name: evt.name,
|
|
52
|
+
args: evt.args,
|
|
53
|
+
};
|
|
54
|
+
continue;
|
|
55
|
+
}
|
|
56
|
+
if (evt.kind === 'usage') {
|
|
57
|
+
yield {
|
|
58
|
+
kind: 'usage',
|
|
59
|
+
promptTokens: evt.promptTokens,
|
|
60
|
+
outputTokens: evt.outputTokens,
|
|
61
|
+
};
|
|
62
|
+
continue;
|
|
63
|
+
}
|
|
64
|
+
yield { kind: 'error', message: evt.message };
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
void startedAt;
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
function toEngineMessages(messages) {
|
|
71
|
+
const out = [];
|
|
72
|
+
for (const m of messages) {
|
|
73
|
+
// The engine vocabulary has no `tool` role — Gemma 4 is toolless.
|
|
74
|
+
// Tool turns are flattened into the prior assistant message so the
|
|
75
|
+
// model has the context, but the tool-call/result structure is
|
|
76
|
+
// dropped. Tool support arrives via the polyfill in @inbrowser/agent.
|
|
77
|
+
if (m.role === 'tool') {
|
|
78
|
+
out.push({
|
|
79
|
+
role: 'user',
|
|
80
|
+
text: `[tool ${m.name ?? ''} result]\n${m.resultJson ?? ''}`,
|
|
81
|
+
});
|
|
82
|
+
continue;
|
|
83
|
+
}
|
|
84
|
+
if (m.role === 'system' || m.role === 'user' || m.role === 'assistant') {
|
|
85
|
+
out.push({ role: m.role, text: m.text ?? '' });
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
return out;
|
|
89
|
+
}
|
|
90
|
+
//# sourceMappingURL=relay.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"relay.js","sourceRoot":"","sources":["../../src/adapters/relay.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAUH,MAAM,UAAU,4BAA4B,CAAC,MAAc;IACzD,OAAO,KAAK,SAAS,CAAC,EAAE,GAAsB;QAC5C,MAAM,QAAQ,GAAG,gBAAgB,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAChD,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QAEpC,+DAA+D;QAC/D,wDAAwD;QACxD,4DAA4D;QAC5D,2DAA2D;QAC3D,qDAAqD;QACrD,MAAM,KAAK,GACT,GAAG,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC;YAClB,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACpB,IAAI,EAAE,UAAmB;gBACzB,QAAQ,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,WAAW,EAAE,CAAC,CAAC,WAAW,EAAE,UAAU,EAAE,CAAC,CAAC,UAAU,EAAE;aACjF,CAAC,CAAC;YACL,CAAC,CAAC,SAAS,CAAC;QAEhB,IAAI,KAAK,EAAE,MAAM,GAAG,IAAI,MAAM,CAAC,QAAQ,CAAC,QAAQ,EAAE;YAChD,WAAW,EAAE,GAAG,CAAC,WAAW;YAC5B,IAAI,EAAE,GAAG,CAAC,IAAI;YACd,IAAI,EAAE,GAAG,CAAC,IAAI;YACd,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC3B,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SAC9C,CAAC,EAAE,CAAC;YACH,IAAI,GAAG,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;gBACzB,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,GAAG,CAAC,IAAI,EAAE,CAAC;gBACxC,SAAS;YACX,CAAC;YACD,IAAI,GAAG,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;gBAC5B,6DAA6D;gBAC7D,0DAA0D;gBAC1D,4BAA4B;gBAC5B,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,GAAG,CAAC,IAAI,EAAE,CAAC;gBAC5C,SAAS;YACX,CAAC;YACD,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBAC7B,MAAM;oBACJ,IAAI,EAAE,WAAW;oBACjB,MAAM,EAAE,GAAG,CAAC,EAAE;oBACd,IAAI,EAAE,GAAG,CAAC,IAAI;oBACd,IAAI,EAAE,GAAG,CAAC,IAAI;iBACf,CAAC;gBACF,SAAS;YACX,CAAC;YACD,IAAI,GAAG,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;gBACzB,MAAM;oBACJ,IAAI,EAAE,OAAO;oBACb,YAAY,EAAE,GAAG,CAAC,YAAY;oBAC9B,YAAY,EAAE,GAAG,CAAC,YAAY;iBAC/B,CAAC;gBACF,SAAS;YACX,CAAC;YACD,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,GAAG,CAAC,OAAO,EAAE,CAAC;YAC9C,OAAO;QACT,CAAC;QAED,KAAK,SAAS,CAAC;IACjB,CAAC,CAAC;AACJ,CAAC;AAED,SAAS,gBAAgB,CAAC,QAA0C;IAClE,MAAM,GAAG,GAAoB,EAAE,CAAC;IAChC,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,kEAAkE;QAClE,mEAAmE;QACnE,+DAA+D;QAC/D,sEAAsE;QACtE,IAAI,CAAC,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;YACtB,GAAG,CAAC,IAAI,CAAC;gBACP,IAAI,EAAE,MAAM;gBACZ,IAAI,EAAE,SAAS,CAAC,CAAC,IAAI,IAAI,EAAE,aAAa,CAAC,CAAC,UAAU,IAAI,EAAE,EAAE;aAC7D,CAAC,CAAC;YACH,SAAS;QACX,CAAC;QACD,IAAI,CAAC,CAAC,IAAI,KAAK,QAAQ,IAAI,CAAC,CAAC,IAAI,KAAK,MAAM,IAAI,CAAC,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;YACvE,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,IAAI,EAAE,EAAE,CAAC,CAAC;QACjD,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC"}
|
package/dist/engine.d.ts
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `createEngine` — on-device LLM engine implementation.
|
|
3
|
+
*
|
|
4
|
+
* Wires `@huggingface/transformers` v4 to the narrow `Engine` surface
|
|
5
|
+
* declared in `./types.ts`:
|
|
6
|
+
*
|
|
7
|
+
* - `ensureReady()` loads the `AutoProcessor` + `AutoModelForCausalLM`
|
|
8
|
+
* pair from the configured HF Hub repo, mapping the runtime's
|
|
9
|
+
* `ProgressInfo` stream into `LoadProgress` events.
|
|
10
|
+
* - `generate()` applies the model's bundled chat template, sets up
|
|
11
|
+
* a `TextStreamer` whose `callback_function` pushes tokens into
|
|
12
|
+
* an async-iterator queue, and drives `model.generate()`. Yields
|
|
13
|
+
* `{ kind: 'token' }` per decoded chunk, then a terminal `usage`
|
|
14
|
+
* event with the engine's local accounting.
|
|
15
|
+
*
|
|
16
|
+
* Backend mapping: `Backend` ('auto' | 'webgpu' | 'wasm') passes
|
|
17
|
+
* straight through to Transformers.js's `DeviceType`. `dtype` is
|
|
18
|
+
* forwarded unchanged.
|
|
19
|
+
*
|
|
20
|
+
* Tool calling: the engine itself is toolless. The agent-side
|
|
21
|
+
* polyfill lives in `@inbrowser/agent` (see AGENTS.md).
|
|
22
|
+
*
|
|
23
|
+
* Stop sequences: `GenerateOpts.stop` is accepted but not yet
|
|
24
|
+
* honored — needs a `StoppingCriteria` adapter. Tracked as a
|
|
25
|
+
* follow-up.
|
|
26
|
+
*/
|
|
27
|
+
import type { CreateEngineOpts, Engine } from './types.js';
|
|
28
|
+
export declare function createEngine(opts: CreateEngineOpts): Engine;
|
|
29
|
+
/**
|
|
30
|
+
* Type-safe preset authoring. Identity at runtime; the value of this
|
|
31
|
+
* helper is purely the compile-time completeness check it enforces
|
|
32
|
+
* on caller-defined presets.
|
|
33
|
+
*/
|
|
34
|
+
export declare function definePreset<P extends import('./types.js').ModelPreset>(p: P): P;
|
|
35
|
+
//# sourceMappingURL=engine.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"engine.d.ts","sourceRoot":"","sources":["../src/engine.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAaH,OAAO,KAAK,EAEV,gBAAgB,EAChB,MAAM,EAUP,MAAM,YAAY,CAAC;AAEpB,wBAAgB,YAAY,CAAC,IAAI,EAAE,gBAAgB,GAAG,MAAM,CAqS3D;AAED;;;;GAIG;AACH,wBAAgB,YAAY,CAAC,CAAC,SAAS,OAAO,YAAY,EAAE,WAAW,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,CAEhF"}
|