@simulatte/doppler 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -23
- package/package.json +3 -5
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# @simulatte/doppler
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Inference and training on raw WebGPU. Pure JS + WGSL.
|
|
4
4
|
|
|
5
5
|
**[Live Demo](https://d4da.com)** · **[npm](https://www.npmjs.com/package/@simulatte/doppler)** · **[simulatte.world](https://simulatte.world)**
|
|
6
6
|
|
|
@@ -10,7 +10,7 @@ Browser-native inference engine for local AI workloads.
|
|
|
10
10
|
npm install @simulatte/doppler
|
|
11
11
|
```
|
|
12
12
|
|
|
13
|
-
## Quick
|
|
13
|
+
## Quick start
|
|
14
14
|
|
|
15
15
|
```js
|
|
16
16
|
import { doppler } from '@simulatte/doppler';
|
|
@@ -22,38 +22,35 @@ for await (const token of model.generate('Hello, world')) {
|
|
|
22
22
|
}
|
|
23
23
|
```
|
|
24
24
|
|
|
25
|
-
|
|
25
|
+
Tokens stream from a native `AsyncGenerator`. See [more examples](#more-examples) below or the full [API contract](docs/doppler-api-contract.md).
|
|
26
26
|
|
|
27
|
-
##
|
|
27
|
+
## Why Doppler
|
|
28
28
|
|
|
29
|
-
|
|
30
|
-
- Native `for await` streaming — not callbacks
|
|
31
|
-
- Sharded weight loading via OPFS
|
|
32
|
-
- LoRA adapter hot-swap at runtime
|
|
33
|
-
- Quantized model support (Q4K, Q8, F16)
|
|
34
|
-
- Multi-model with independent instances
|
|
35
|
-
- Kernel hot-swap (prefill/decode paths)
|
|
36
|
-
- Reproducible benchmark tooling
|
|
37
|
-
- Auditable kernel execution tracing
|
|
29
|
+
**JS → WGSL → WebGPU.** Direct JavaScript orchestration into native WebGPU kernels, avoiding ONNX runtimes, WASM blobs, and bridge layers.
|
|
38
30
|
|
|
39
|
-
|
|
31
|
+
**`for await` streaming.** Generation uses a native `AsyncGenerator` that fits normal app control flow.
|
|
40
32
|
|
|
41
|
-
-
|
|
42
|
-
- Firefox (behind flag, WebGPU support varies)
|
|
43
|
-
- Safari (WebGPU support in progress)
|
|
33
|
+
**LoRA hot-swap.** Swap adapters at runtime without reloading the base model.
|
|
44
34
|
|
|
45
|
-
|
|
35
|
+
**Independent model instances.** Run multiple models concurrently. Each owns its pipeline, buffers, and KV cache.
|
|
46
36
|
|
|
47
37
|
## Evidence
|
|
48
38
|
|
|
49
|
-
Lower is better, comparing per-phase latency by workload.
|
|
50
|
-
|
|
51
39
|

|
|
52
40
|
|
|
53
|
-
Snapshot
|
|
54
|
-
- [g3-p064-d064-t0-k1.
|
|
41
|
+
Snapshot artifacts:
|
|
42
|
+
- [g3-1b-p064-d064-t0-k1.compare.json](benchmarks/vendors/fixtures/g3-1b-p064-d064-t0-k1.compare.json)
|
|
43
|
+
- [lfm2-5-1-2b-p064-d064-t0-k1.compare.json](benchmarks/vendors/fixtures/lfm2-5-1-2b-p064-d064-t0-k1.compare.json)
|
|
44
|
+
|
|
45
|
+
## Under the hood
|
|
46
|
+
|
|
47
|
+
- Sharded weight loading via OPFS moves multi-GB weights into VRAM without blocking the main thread.
|
|
48
|
+
- Quantized inference paths (Q4K, Q8, F16) support practical model sizes on consumer GPUs.
|
|
49
|
+
- Kernel hot-swap between prefill and decode paths.
|
|
50
|
+
- Config-driven runtime keeps presets, kernel-path selection, and sampling explicit.
|
|
51
|
+
- Reproducible benchmarks expose deterministic knobs and auditable kernel traces.
|
|
55
52
|
|
|
56
|
-
## More
|
|
53
|
+
## More examples
|
|
57
54
|
|
|
58
55
|
```js
|
|
59
56
|
// Non-streaming
|
|
@@ -80,6 +77,13 @@ for await (const token of doppler('Hello', { model: 'gemma-3-1b' })) {
|
|
|
80
77
|
- Runtime config contract: [docs/config.md](docs/config.md)
|
|
81
78
|
- Architecture: [docs/architecture.md](docs/architecture.md)
|
|
82
79
|
|
|
80
|
+
## Environment requirements
|
|
81
|
+
|
|
82
|
+
- WebGPU-capable browser runtime is required.
|
|
83
|
+
- Chrome / Edge 113+ supported.
|
|
84
|
+
- Firefox support varies (typically behind a flag).
|
|
85
|
+
- Safari support is evolving.
|
|
86
|
+
|
|
83
87
|
## License
|
|
84
88
|
|
|
85
89
|
Apache License 2.0 (`Apache-2.0`). See [LICENSE](LICENSE) and [NOTICE](NOTICE).
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@simulatte/doppler",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2",
|
|
4
4
|
"description": "Browser-native WebGPU inference engine for local intent and inference loops",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"types": "src/index.d.ts",
|
|
@@ -134,11 +134,9 @@
|
|
|
134
134
|
"tools/convert-safetensors-node.js"
|
|
135
135
|
],
|
|
136
136
|
"devDependencies": {
|
|
137
|
+
"@huggingface/transformers": "^3.8.1",
|
|
137
138
|
"jest": "^30.2.0",
|
|
139
|
+
"onnxruntime-web": "^1.24.1",
|
|
138
140
|
"playwright": "^1.58.2"
|
|
139
|
-
},
|
|
140
|
-
"dependencies": {
|
|
141
|
-
"@huggingface/transformers": "^3.8.1",
|
|
142
|
-
"onnxruntime-web": "^1.24.1"
|
|
143
141
|
}
|
|
144
142
|
}
|