@simulatte/doppler 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -5
- package/package.json +27 -4
- package/src/client/doppler-api.browser.d.ts +1 -0
- package/src/client/doppler-api.browser.js +288 -0
- package/src/client/doppler-api.d.ts +80 -0
- package/src/client/doppler-api.js +298 -0
- package/src/client/doppler-provider/types.js +1 -1
- package/src/client/doppler-registry.d.ts +23 -0
- package/src/client/doppler-registry.js +88 -0
- package/src/client/doppler-registry.json +39 -0
- package/src/config/execution-contract-check.d.ts +82 -0
- package/src/config/execution-contract-check.js +317 -0
- package/src/config/execution-v0-contract-check.d.ts +94 -0
- package/src/config/execution-v0-contract-check.js +251 -0
- package/src/config/execution-v0-graph-contract-check.d.ts +20 -0
- package/src/config/execution-v0-graph-contract-check.js +64 -0
- package/src/config/kernel-path-contract-check.d.ts +76 -0
- package/src/config/kernel-path-contract-check.js +479 -0
- package/src/config/kernel-path-loader.d.ts +16 -0
- package/src/config/kernel-path-loader.js +54 -0
- package/src/config/kernels/kernel-ref-digests.js +12 -0
- package/src/config/kernels/registry.json +556 -0
- package/src/config/loader.js +90 -67
- package/src/config/merge-contract-check.d.ts +16 -0
- package/src/config/merge-contract-check.js +321 -0
- package/src/config/merge-helpers.d.ts +58 -0
- package/src/config/merge-helpers.js +54 -0
- package/src/config/merge.js +3 -6
- package/src/config/presets/models/janus-text.json +27 -0
- package/src/config/quantization-contract-check.d.ts +12 -0
- package/src/config/quantization-contract-check.js +91 -0
- package/src/config/required-inference-fields-contract-check.d.ts +24 -0
- package/src/config/required-inference-fields-contract-check.js +231 -0
- package/src/config/schema/browser-suite-metrics.schema.d.ts +17 -0
- package/src/config/schema/browser-suite-metrics.schema.js +46 -0
- package/src/config/schema/conversion-report.schema.d.ts +40 -0
- package/src/config/schema/conversion-report.schema.js +108 -0
- package/src/config/schema/doppler.schema.js +12 -18
- package/src/config/schema/index.d.ts +22 -0
- package/src/config/schema/index.js +18 -0
- package/src/converter/core.d.ts +10 -0
- package/src/converter/core.js +49 -11
- package/src/converter/parsers/diffusion.js +63 -3
- package/src/converter/tokenizer-utils.js +17 -3
- package/src/formats/rdrr/validation.js +13 -0
- package/src/gpu/kernels/depthwise_conv2d.d.ts +29 -0
- package/src/gpu/kernels/depthwise_conv2d.js +98 -0
- package/src/gpu/kernels/depthwise_conv2d.wgsl +58 -0
- package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +62 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d.d.ts +27 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +92 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +47 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +51 -0
- package/src/gpu/kernels/index.d.ts +30 -0
- package/src/gpu/kernels/index.js +25 -0
- package/src/gpu/kernels/relu.d.ts +18 -0
- package/src/gpu/kernels/relu.js +45 -0
- package/src/gpu/kernels/relu.wgsl +21 -0
- package/src/gpu/kernels/relu_f16.wgsl +23 -0
- package/src/gpu/kernels/repeat_channels.d.ts +21 -0
- package/src/gpu/kernels/repeat_channels.js +60 -0
- package/src/gpu/kernels/repeat_channels.wgsl +29 -0
- package/src/gpu/kernels/repeat_channels_f16.wgsl +31 -0
- package/src/gpu/kernels/sana_linear_attention.d.ts +27 -0
- package/src/gpu/kernels/sana_linear_attention.js +122 -0
- package/src/gpu/kernels/sana_linear_attention_apply.wgsl +44 -0
- package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +47 -0
- package/src/gpu/kernels/sana_linear_attention_summary.wgsl +47 -0
- package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +49 -0
- package/src/index-browser.d.ts +1 -0
- package/src/index-browser.js +2 -1
- package/src/index.d.ts +1 -0
- package/src/index.js +2 -1
- package/src/inference/browser-harness.js +164 -38
- package/src/inference/pipelines/diffusion/init.js +14 -0
- package/src/inference/pipelines/diffusion/pipeline.js +206 -77
- package/src/inference/pipelines/diffusion/sana-transformer.d.ts +53 -0
- package/src/inference/pipelines/diffusion/sana-transformer.js +738 -0
- package/src/inference/pipelines/diffusion/scheduler.d.ts +17 -1
- package/src/inference/pipelines/diffusion/scheduler.js +91 -3
- package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +6 -4
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +270 -0
- package/src/inference/pipelines/diffusion/text-encoder.js +18 -1
- package/src/inference/pipelines/diffusion/types.d.ts +4 -0
- package/src/inference/pipelines/diffusion/vae.js +782 -78
- package/src/inference/pipelines/text/config.d.ts +5 -0
- package/src/inference/pipelines/text/config.js +1 -1
- package/src/inference/pipelines/text/execution-v0.js +141 -101
- package/src/inference/pipelines/text/init.js +41 -10
- package/src/inference/pipelines/text.js +7 -1
- package/src/rules/execution-rules-contract-check.d.ts +17 -0
- package/src/rules/execution-rules-contract-check.js +245 -0
- package/src/rules/kernels/depthwise-conv2d.rules.json +6 -0
- package/src/rules/kernels/grouped-pointwise-conv2d.rules.json +6 -0
- package/src/rules/kernels/relu.rules.json +6 -0
- package/src/rules/kernels/repeat-channels.rules.json +6 -0
- package/src/rules/kernels/sana-linear-attention.rules.json +6 -0
- package/src/rules/layer-pattern-contract-check.d.ts +17 -0
- package/src/rules/layer-pattern-contract-check.js +231 -0
- package/src/rules/rule-registry.d.ts +28 -0
- package/src/rules/rule-registry.js +38 -0
- package/src/tooling/conversion-config-materializer.d.ts +24 -0
- package/src/tooling/conversion-config-materializer.js +99 -0
- package/src/tooling/lean-execution-contract-runner.d.ts +43 -0
- package/src/tooling/lean-execution-contract-runner.js +158 -0
- package/src/tooling/lean-execution-contract.d.ts +16 -0
- package/src/tooling/lean-execution-contract.js +81 -0
- package/src/tooling/node-convert.d.ts +10 -0
- package/src/tooling/node-converter.js +59 -0
- package/src/tooling/node-webgpu.js +30 -9
- package/src/version.d.ts +2 -0
- package/src/version.js +2 -0
- package/tools/convert-safetensors-node.js +47 -0
- package/tools/doppler-cli.js +167 -6
package/README.md
CHANGED
|
@@ -15,14 +15,14 @@ npm install @simulatte/doppler
|
|
|
15
15
|
```js
|
|
16
16
|
import { doppler } from '@simulatte/doppler';
|
|
17
17
|
|
|
18
|
-
const model = await doppler.load('
|
|
18
|
+
const model = await doppler.load('gemma3-270m');
|
|
19
19
|
|
|
20
20
|
for await (const token of model.generate('Hello, world')) {
|
|
21
21
|
process.stdout.write(token);
|
|
22
22
|
}
|
|
23
23
|
```
|
|
24
24
|
|
|
25
|
-
Tokens stream from a native `AsyncGenerator`. See [more examples](#more-examples) below or the
|
|
25
|
+
Registry IDs resolve to hosted RDRR artifacts from `Clocksmith/rdrr` by default. Tokens stream from a native `AsyncGenerator`. See [more examples](#more-examples) below or the canonical [Root API guide](docs/api/root.md).
|
|
26
26
|
|
|
27
27
|
## Why Doppler
|
|
28
28
|
|
|
@@ -56,16 +56,21 @@ Snapshot artifacts:
|
|
|
56
56
|
// Non-streaming
|
|
57
57
|
const text = await model.generateText('Explain WebGPU in one sentence');
|
|
58
58
|
|
|
59
|
+
// Load with progress logging
|
|
60
|
+
const modelWithProgress = await doppler.load('gemma3-270m', {
|
|
61
|
+
onProgress: ({ message }) => console.log(`[doppler] ${message}`),
|
|
62
|
+
});
|
|
63
|
+
|
|
59
64
|
// Chat
|
|
60
65
|
const reply = await model.chatText([
|
|
61
66
|
{ role: 'user', content: 'Write a dispatch that outruns its own light cone' },
|
|
62
67
|
]);
|
|
63
68
|
|
|
64
69
|
// LoRA hot-swap
|
|
65
|
-
await model.loadLoRA('oneshift-twoshift-redshift-blueshift');
|
|
70
|
+
await model.loadLoRA('https://example.com/adapters/oneshift-twoshift-redshift-blueshift/manifest.json');
|
|
66
71
|
|
|
67
72
|
// Convenience shorthand (caches model automatically)
|
|
68
|
-
for await (const token of doppler('Hello', { model: '
|
|
73
|
+
for await (const token of doppler('Hello', { model: 'gemma3-270m' })) {
|
|
69
74
|
process.stdout.write(token);
|
|
70
75
|
}
|
|
71
76
|
```
|
|
@@ -79,7 +84,8 @@ for await (const token of doppler('Hello', { model: 'gemma-3-1b' })) {
|
|
|
79
84
|
|
|
80
85
|
## Environment requirements
|
|
81
86
|
|
|
82
|
-
- WebGPU
|
|
87
|
+
- WebGPU is required.
|
|
88
|
+
- Supported runtimes: WebGPU-capable browsers, or Node with a WebGPU provider.
|
|
83
89
|
- Chrome / Edge 113+ supported.
|
|
84
90
|
- Firefox support varies (typically behind a flag).
|
|
85
91
|
- Safari support is evolving.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@simulatte/doppler",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.5",
|
|
4
4
|
"description": "Browser-native WebGPU inference engine for local intent and inference loops",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"types": "src/index.d.ts",
|
|
@@ -14,6 +14,18 @@
|
|
|
14
14
|
"manifest-refresh": "node tools/refresh-converted-manifest.js",
|
|
15
15
|
"debug": "node tools/doppler-cli.js debug",
|
|
16
16
|
"bench": "node tools/doppler-cli.js bench",
|
|
17
|
+
"lean:check": "./lean/check.sh",
|
|
18
|
+
"lean:execution-contract": "node tools/lean-execution-contract.js",
|
|
19
|
+
"lean:execution-contract:sweep": "node tools/lean-execution-contract-sweep.js",
|
|
20
|
+
"lean:execution-contract:configs": "node tools/lean-execution-contract-config-sweep.js",
|
|
21
|
+
"ci:lean:execution-contract": "node tools/lean-execution-contract-sweep.js --root models",
|
|
22
|
+
"ci:lean:execution-contract:configs": "node tools/lean-execution-contract-config-sweep.js --config-root tools/configs/conversion --manifest-root models --require-manifest-match",
|
|
23
|
+
"contracts:check": "node tools/check-contract-artifacts.js",
|
|
24
|
+
"contracts:summary": "node tools/check-contract-artifacts.js --json",
|
|
25
|
+
"contracts:check:lean": "node tools/check-contract-artifacts.js --with-lean",
|
|
26
|
+
"contracts:summary:lean": "node tools/check-contract-artifacts.js --json --with-lean",
|
|
27
|
+
"ci:contracts:check": "node tools/check-contract-artifacts.js --with-lean --lean-require-manifest-match",
|
|
28
|
+
"reports:convert:summary": "node tools/summarize-conversion-reports.js",
|
|
17
29
|
"bench:chart": "node ./benchmarks/vendors/compare-chart.js",
|
|
18
30
|
"bench:chart:readme": "node ./benchmarks/vendors/compare-chart.js --preset readme-evidence",
|
|
19
31
|
"bench:architecture:chart": "node ./benchmarks/vendors/generate-architecture-overview-svg.js",
|
|
@@ -39,6 +51,8 @@
|
|
|
39
51
|
"agents:freshness:strict": "node tools/verify-agent-freshness.js --strict",
|
|
40
52
|
"conflicts:check": "node tools/check-merge-markers.js",
|
|
41
53
|
"imports:check:browser": "node tools/check-browser-import-graph.js",
|
|
54
|
+
"api:docs:sync": "node tools/sync-api-docs.js",
|
|
55
|
+
"api:docs:check": "node tools/sync-api-docs.js --check",
|
|
42
56
|
"verify:model": "node tools/doppler-cli.js verify",
|
|
43
57
|
"onboarding:check": "node tools/onboarding-tooling.js check",
|
|
44
58
|
"onboarding:check:strict": "node tools/onboarding-tooling.js check --strict",
|
|
@@ -51,8 +65,11 @@
|
|
|
51
65
|
"verify": "node tools/run-registry-verify.js",
|
|
52
66
|
"registry:sync:scripts": "node tools/sync-registry-scripts.js",
|
|
53
67
|
"registry:sync:scripts:check": "node tools/sync-registry-scripts.js --check",
|
|
68
|
+
"registry:hf:check": "node tools/check-hf-registry.js",
|
|
69
|
+
"registry:publish:hf": "node tools/publish-hf-registry-model.js",
|
|
54
70
|
"support:matrix:sync": "node tools/sync-model-support-matrix.js",
|
|
55
71
|
"support:matrix:check": "node tools/sync-model-support-matrix.js --check",
|
|
72
|
+
"ci:catalog:check": "npm run registry:sync:scripts:check && npm run support:matrix:check && npm run registry:hf:check",
|
|
56
73
|
"external:rdrr:index": "node tools/sync-external-rdrr-index.js",
|
|
57
74
|
"external:rdrr:index:check": "node tools/sync-external-rdrr-index.js --check",
|
|
58
75
|
"verify:embeddinggemma-300m": "node tools/run-registry-verify.js embeddinggemma-300m",
|
|
@@ -63,6 +80,7 @@
|
|
|
63
80
|
"verify:google-embeddinggemma-300m": "node tools/run-registry-verify.js google-embeddinggemma-300m",
|
|
64
81
|
"verify:google-embeddinggemma-300m-wq4k-ef16": "node tools/run-registry-verify.js google-embeddinggemma-300m-wq4k-ef16",
|
|
65
82
|
"verify:google-gemma-3-270m-it": "node tools/run-registry-verify.js google-gemma-3-270m-it",
|
|
83
|
+
"verify:google-translategemma-4b-it": "node tools/run-registry-verify.js google-translategemma-4b-it",
|
|
66
84
|
"verify:qwen-3-5-0-8b": "node tools/run-registry-verify.js qwen-3-5-0-8b",
|
|
67
85
|
"verify:qwen-3-5-0-8b-wq4k-ef16-hf16-f16": "node tools/run-registry-verify.js qwen-3-5-0-8b-wq4k-ef16-hf16-f16",
|
|
68
86
|
"verify:qwen-3-5-2b": "node tools/run-registry-verify.js qwen-3-5-2b",
|
|
@@ -70,7 +88,10 @@
|
|
|
70
88
|
"verify:qwen-qwen3.5-0.8b": "node tools/run-registry-verify.js qwen-qwen3.5-0.8b",
|
|
71
89
|
"verify:qwen-qwen3.5-2b": "node tools/run-registry-verify.js qwen-qwen3.5-2b",
|
|
72
90
|
"verify:qwen3-0.8b": "node tools/run-registry-verify.js qwen3-0.8b",
|
|
73
|
-
"verify:qwen3-2b": "node tools/run-registry-verify.js qwen3-2b"
|
|
91
|
+
"verify:qwen3-2b": "node tools/run-registry-verify.js qwen3-2b",
|
|
92
|
+
"verify:translategemma": "node tools/run-registry-verify.js translategemma",
|
|
93
|
+
"verify:translategemma-4b": "node tools/run-registry-verify.js translategemma-4b",
|
|
94
|
+
"verify:translategemma-4b-it-wq4k-ef16-hf16": "node tools/run-registry-verify.js translategemma-4b-it-wq4k-ef16-hf16"
|
|
74
95
|
},
|
|
75
96
|
"exports": {
|
|
76
97
|
".": {
|
|
@@ -100,8 +121,7 @@
|
|
|
100
121
|
"./energy": {
|
|
101
122
|
"types": "./src/energy/index.d.ts",
|
|
102
123
|
"import": "./src/energy/index.js"
|
|
103
|
-
}
|
|
104
|
-
"./*": "./src/*"
|
|
124
|
+
}
|
|
105
125
|
},
|
|
106
126
|
"repository": {
|
|
107
127
|
"type": "git",
|
|
@@ -138,5 +158,8 @@
|
|
|
138
158
|
"jest": "^30.2.0",
|
|
139
159
|
"onnxruntime-web": "^1.24.1",
|
|
140
160
|
"playwright": "^1.58.2"
|
|
161
|
+
},
|
|
162
|
+
"optionalDependencies": {
|
|
163
|
+
"@simulatte/webgpu-doe": "0.1.x"
|
|
141
164
|
}
|
|
142
165
|
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from './doppler-api.d.ts';
|
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
import { loadLoRAFromManifest, loadLoRAFromUrl } from '../adapters/lora-loader.js';
|
|
2
|
+
import { log } from '../debug/index.js';
|
|
3
|
+
import { getManifestUrl, parseManifest } from '../formats/rdrr/index.js';
|
|
4
|
+
import { createPipeline } from '../generation/index.js';
|
|
5
|
+
import { getKernelCapabilities } from '../gpu/device.js';
|
|
6
|
+
import { formatChatMessages } from '../inference/pipelines/text/chat-format.js';
|
|
7
|
+
import { buildQuickstartModelBaseUrl, listQuickstartModels, resolveQuickstartModel } from './doppler-registry.js';
|
|
8
|
+
|
|
9
|
+
const convenienceModelCache = new Map();
|
|
10
|
+
const inFlightLoadCache = new Map();
|
|
11
|
+
|
|
12
|
+
function emitLoadProgress(callback, phase, percent, message) {
|
|
13
|
+
if (typeof callback !== 'function') return;
|
|
14
|
+
callback({ phase, percent, message });
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
async function ensureWebGPUAvailable() {
|
|
18
|
+
if (typeof globalThis.navigator !== 'undefined' && globalThis.navigator?.gpu) {
|
|
19
|
+
return;
|
|
20
|
+
}
|
|
21
|
+
throw new Error('WebGPU is unavailable. Run in a WebGPU-capable browser.');
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export function createDefaultNodeLoadProgressLogger() {
|
|
25
|
+
return (event) => {
|
|
26
|
+
const message = typeof event?.message === 'string' ? event.message.trim() : '';
|
|
27
|
+
if (!message) return;
|
|
28
|
+
log.info('doppler', message);
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function resolveLoadProgressHandlers(options = {}) {
|
|
33
|
+
const onProgress = typeof options?.onProgress === 'function' ? options.onProgress : null;
|
|
34
|
+
if (onProgress) {
|
|
35
|
+
return {
|
|
36
|
+
userProgress: onProgress,
|
|
37
|
+
pipelineProgress: onProgress,
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
return {
|
|
41
|
+
userProgress: null,
|
|
42
|
+
pipelineProgress: null,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
async function fetchManifestFromBaseUrl(baseUrl) {
|
|
47
|
+
const response = await fetch(getManifestUrl(baseUrl));
|
|
48
|
+
if (!response.ok) {
|
|
49
|
+
throw new Error(`Failed to fetch manifest from ${baseUrl}: ${response.status}`);
|
|
50
|
+
}
|
|
51
|
+
return parseManifest(await response.text());
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
async function resolveModelSource(model) {
|
|
55
|
+
if (typeof model === 'string') {
|
|
56
|
+
const entry = await resolveQuickstartModel(model);
|
|
57
|
+
return {
|
|
58
|
+
modelId: entry.modelId,
|
|
59
|
+
baseUrl: buildQuickstartModelBaseUrl(entry),
|
|
60
|
+
manifest: null,
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
if (model && typeof model === 'object' && typeof model.url === 'string' && model.url.trim().length > 0) {
|
|
64
|
+
return {
|
|
65
|
+
modelId: model.url.trim(),
|
|
66
|
+
baseUrl: model.url.trim(),
|
|
67
|
+
manifest: null,
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
if (model && typeof model === 'object' && model.manifest && typeof model.manifest === 'object') {
|
|
71
|
+
const manifest = model.manifest;
|
|
72
|
+
const modelId = typeof manifest.modelId === 'string' && manifest.modelId.length > 0
|
|
73
|
+
? manifest.modelId
|
|
74
|
+
: 'manifest';
|
|
75
|
+
return {
|
|
76
|
+
modelId,
|
|
77
|
+
baseUrl: typeof model.baseUrl === 'string' && model.baseUrl.length > 0 ? model.baseUrl : null,
|
|
78
|
+
manifest,
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
throw new Error('doppler.load expects a quickstart registry id, { url }, or { manifest, baseUrl? }.');
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function countTokens(pipeline, text) {
|
|
85
|
+
if (!text || typeof text !== 'string') return 0;
|
|
86
|
+
try {
|
|
87
|
+
return pipeline?.tokenizer?.encode(text)?.length ?? 0;
|
|
88
|
+
} catch {
|
|
89
|
+
return 0;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function resolveChatPromptForUsage(pipeline, messages) {
|
|
94
|
+
const templateType = pipeline?.manifest?.inference?.chatTemplate?.enabled === false
|
|
95
|
+
? null
|
|
96
|
+
: (pipeline?.manifest?.inference?.chatTemplate?.type ?? null);
|
|
97
|
+
try {
|
|
98
|
+
return formatChatMessages(messages, templateType);
|
|
99
|
+
} catch {
|
|
100
|
+
return messages.map((message) => String(message?.content ?? '')).join('\n');
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
async function collectText(iterable) {
|
|
105
|
+
let output = '';
|
|
106
|
+
for await (const token of iterable) {
|
|
107
|
+
output += token;
|
|
108
|
+
}
|
|
109
|
+
return output;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
function createModelHandle(pipeline, resolved) {
|
|
113
|
+
return {
|
|
114
|
+
generate(prompt, options = {}) {
|
|
115
|
+
return pipeline.generate(prompt, options);
|
|
116
|
+
},
|
|
117
|
+
async generateText(prompt, options = {}) {
|
|
118
|
+
return collectText(pipeline.generate(prompt, options));
|
|
119
|
+
},
|
|
120
|
+
chat(messages, options = {}) {
|
|
121
|
+
return pipeline.generate(messages, options);
|
|
122
|
+
},
|
|
123
|
+
async chatText(messages, options = {}) {
|
|
124
|
+
const content = await collectText(pipeline.generate(messages, options));
|
|
125
|
+
const promptText = resolveChatPromptForUsage(pipeline, messages);
|
|
126
|
+
const promptTokens = countTokens(pipeline, promptText);
|
|
127
|
+
const completionTokens = countTokens(pipeline, content);
|
|
128
|
+
return {
|
|
129
|
+
content,
|
|
130
|
+
usage: {
|
|
131
|
+
promptTokens,
|
|
132
|
+
completionTokens,
|
|
133
|
+
totalTokens: promptTokens + completionTokens,
|
|
134
|
+
},
|
|
135
|
+
};
|
|
136
|
+
},
|
|
137
|
+
async loadLoRA(adapter) {
|
|
138
|
+
const lora = typeof adapter === 'string'
|
|
139
|
+
? await loadLoRAFromUrl(adapter)
|
|
140
|
+
: await loadLoRAFromManifest(adapter);
|
|
141
|
+
pipeline.setLoRAAdapter(lora);
|
|
142
|
+
},
|
|
143
|
+
async unloadLoRA() {
|
|
144
|
+
pipeline.setLoRAAdapter(null);
|
|
145
|
+
},
|
|
146
|
+
async unload() {
|
|
147
|
+
await pipeline.unload();
|
|
148
|
+
},
|
|
149
|
+
get activeLoRA() {
|
|
150
|
+
return pipeline.getActiveLoRA()?.name ?? null;
|
|
151
|
+
},
|
|
152
|
+
get loaded() {
|
|
153
|
+
return pipeline.isLoaded === true;
|
|
154
|
+
},
|
|
155
|
+
get modelId() {
|
|
156
|
+
return resolved.modelId;
|
|
157
|
+
},
|
|
158
|
+
get manifest() {
|
|
159
|
+
return pipeline.manifest;
|
|
160
|
+
},
|
|
161
|
+
get deviceInfo() {
|
|
162
|
+
return getKernelCapabilities()?.adapterInfo ?? null;
|
|
163
|
+
},
|
|
164
|
+
advanced: {
|
|
165
|
+
prefillKV(prompt, options = {}) {
|
|
166
|
+
return pipeline.prefillKVOnly(prompt, options);
|
|
167
|
+
},
|
|
168
|
+
generateWithPrefixKV(prefix, prompt, options = {}) {
|
|
169
|
+
return pipeline.generateWithPrefixKV(prefix, prompt, options);
|
|
170
|
+
},
|
|
171
|
+
},
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
export async function load(model, options = {}) {
|
|
176
|
+
const { userProgress, pipelineProgress } = resolveLoadProgressHandlers(options);
|
|
177
|
+
|
|
178
|
+
emitLoadProgress(userProgress, 'resolve', 5, 'Resolving model');
|
|
179
|
+
const resolved = await resolveModelSource(model);
|
|
180
|
+
await ensureWebGPUAvailable();
|
|
181
|
+
|
|
182
|
+
emitLoadProgress(userProgress, 'manifest', 15, 'Fetching manifest');
|
|
183
|
+
const manifest = resolved.manifest ?? await fetchManifestFromBaseUrl(resolved.baseUrl);
|
|
184
|
+
|
|
185
|
+
emitLoadProgress(userProgress, 'load', 25, 'Loading weights');
|
|
186
|
+
const pipeline = await createPipeline(manifest, {
|
|
187
|
+
baseUrl: resolved.baseUrl ?? undefined,
|
|
188
|
+
runtimeConfig: options.runtimeConfig,
|
|
189
|
+
onProgress: pipelineProgress
|
|
190
|
+
? (progress) => emitLoadProgress(
|
|
191
|
+
pipelineProgress,
|
|
192
|
+
'load',
|
|
193
|
+
Math.max(25, Math.min(99, Math.round(progress.percent))),
|
|
194
|
+
progress.message || 'Loading weights'
|
|
195
|
+
)
|
|
196
|
+
: undefined,
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
emitLoadProgress(userProgress, 'ready', 100, 'Model ready');
|
|
200
|
+
return createModelHandle(pipeline, resolved);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
async function getCachedModel(model, options = {}) {
|
|
204
|
+
const resolved = await resolveModelSource(model);
|
|
205
|
+
const cacheKey = resolved.modelId;
|
|
206
|
+
const cached = convenienceModelCache.get(cacheKey);
|
|
207
|
+
if (cached?.loaded) {
|
|
208
|
+
return cached;
|
|
209
|
+
}
|
|
210
|
+
if (cached && !cached.loaded) {
|
|
211
|
+
convenienceModelCache.delete(cacheKey);
|
|
212
|
+
}
|
|
213
|
+
if (!inFlightLoadCache.has(cacheKey)) {
|
|
214
|
+
inFlightLoadCache.set(cacheKey, load(model, options).then((instance) => {
|
|
215
|
+
convenienceModelCache.set(cacheKey, instance);
|
|
216
|
+
inFlightLoadCache.delete(cacheKey);
|
|
217
|
+
return instance;
|
|
218
|
+
}).catch((error) => {
|
|
219
|
+
inFlightLoadCache.delete(cacheKey);
|
|
220
|
+
throw error;
|
|
221
|
+
}));
|
|
222
|
+
}
|
|
223
|
+
return inFlightLoadCache.get(cacheKey);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
async function* dopplerGenerate(prompt, options = {}) {
|
|
227
|
+
if (!options || typeof options !== 'object' || options.model == null) {
|
|
228
|
+
throw new Error('doppler() requires options.model.');
|
|
229
|
+
}
|
|
230
|
+
if (options.runtimeConfig !== undefined || options.runtimePreset !== undefined) {
|
|
231
|
+
throw new Error('doppler() does not accept load-affecting options. Use doppler.load(model, options) instead.');
|
|
232
|
+
}
|
|
233
|
+
const model = await getCachedModel(options.model, { onProgress: options.onProgress });
|
|
234
|
+
yield* model.generate(prompt, options);
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
export function doppler(prompt, options) {
|
|
238
|
+
return dopplerGenerate(prompt, options);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
doppler.load = load;
|
|
242
|
+
|
|
243
|
+
doppler.text = async function text(prompt, options = {}) {
|
|
244
|
+
if (!options || typeof options !== 'object' || options.model == null) {
|
|
245
|
+
throw new Error('doppler.text() requires options.model.');
|
|
246
|
+
}
|
|
247
|
+
const model = await getCachedModel(options.model, { onProgress: options.onProgress });
|
|
248
|
+
return model.generateText(prompt, options);
|
|
249
|
+
};
|
|
250
|
+
|
|
251
|
+
doppler.chat = function chat(messages, options = {}) {
|
|
252
|
+
if (!options || typeof options !== 'object' || options.model == null) {
|
|
253
|
+
throw new Error('doppler.chat() requires options.model.');
|
|
254
|
+
}
|
|
255
|
+
return (async function* run() {
|
|
256
|
+
const model = await getCachedModel(options.model, { onProgress: options.onProgress });
|
|
257
|
+
yield* model.chat(messages, options);
|
|
258
|
+
}());
|
|
259
|
+
};
|
|
260
|
+
|
|
261
|
+
doppler.chatText = async function chatText(messages, options = {}) {
|
|
262
|
+
if (!options || typeof options !== 'object' || options.model == null) {
|
|
263
|
+
throw new Error('doppler.chatText() requires options.model.');
|
|
264
|
+
}
|
|
265
|
+
const model = await getCachedModel(options.model, { onProgress: options.onProgress });
|
|
266
|
+
return model.chatText(messages, options);
|
|
267
|
+
};
|
|
268
|
+
|
|
269
|
+
doppler.evict = async function evict(model) {
|
|
270
|
+
const resolved = await resolveModelSource(model);
|
|
271
|
+
const cacheKey = resolved.modelId;
|
|
272
|
+
const cached = convenienceModelCache.get(cacheKey);
|
|
273
|
+
if (!cached) return false;
|
|
274
|
+
await cached.unload();
|
|
275
|
+
convenienceModelCache.delete(cacheKey);
|
|
276
|
+
return true;
|
|
277
|
+
};
|
|
278
|
+
|
|
279
|
+
doppler.evictAll = async function evictAll() {
|
|
280
|
+
const cachedModels = Array.from(convenienceModelCache.values());
|
|
281
|
+
convenienceModelCache.clear();
|
|
282
|
+
await Promise.allSettled(cachedModels.map((model) => model.unload()));
|
|
283
|
+
};
|
|
284
|
+
|
|
285
|
+
doppler.listModels = async function listModels() {
|
|
286
|
+
const models = await listQuickstartModels();
|
|
287
|
+
return models.map((entry) => entry.aliases[0] || entry.modelId);
|
|
288
|
+
};
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import type { RDRRManifest } from '../formats/rdrr/index.js';
|
|
2
|
+
import type { GenerateOptions, KVCacheSnapshot } from '../generation/index.js';
|
|
3
|
+
import type { ChatMessage } from '../inference/pipelines/text/chat-format.js';
|
|
4
|
+
import type { LoRAManifest } from '../adapters/lora-loader.js';
|
|
5
|
+
|
|
6
|
+
export interface DopplerLoadProgress {
|
|
7
|
+
phase: 'resolve' | 'manifest' | 'load' | 'ready';
|
|
8
|
+
percent: number;
|
|
9
|
+
message: string;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export interface DopplerLoadOptions {
|
|
13
|
+
onProgress?: (event: DopplerLoadProgress) => void;
|
|
14
|
+
runtimeConfig?: Record<string, unknown>;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface DopplerCallOptions extends GenerateOptions {
|
|
18
|
+
model: string | { url: string };
|
|
19
|
+
onProgress?: (event: DopplerLoadProgress) => void;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface DopplerChatResponse {
|
|
23
|
+
content: string;
|
|
24
|
+
usage: {
|
|
25
|
+
promptTokens: number;
|
|
26
|
+
completionTokens: number;
|
|
27
|
+
totalTokens: number;
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export interface DopplerModel {
|
|
32
|
+
generate(prompt: string, options?: GenerateOptions): AsyncGenerator<string, void, void>;
|
|
33
|
+
generateText(prompt: string, options?: GenerateOptions): Promise<string>;
|
|
34
|
+
chat(messages: ChatMessage[], options?: GenerateOptions): AsyncGenerator<string, void, void>;
|
|
35
|
+
chatText(messages: ChatMessage[], options?: GenerateOptions): Promise<DopplerChatResponse>;
|
|
36
|
+
loadLoRA(adapter: string | LoRAManifest): Promise<void>;
|
|
37
|
+
unloadLoRA(): Promise<void>;
|
|
38
|
+
unload(): Promise<void>;
|
|
39
|
+
readonly activeLoRA: string | null;
|
|
40
|
+
readonly loaded: boolean;
|
|
41
|
+
readonly modelId: string;
|
|
42
|
+
readonly manifest: RDRRManifest;
|
|
43
|
+
readonly deviceInfo: Record<string, unknown> | null;
|
|
44
|
+
readonly advanced: {
|
|
45
|
+
prefillKV(prompt: string, options?: GenerateOptions): Promise<KVCacheSnapshot>;
|
|
46
|
+
generateWithPrefixKV(
|
|
47
|
+
prefix: KVCacheSnapshot,
|
|
48
|
+
prompt: string,
|
|
49
|
+
options?: GenerateOptions
|
|
50
|
+
): AsyncGenerator<string, void, void>;
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export interface DopplerNamespace {
|
|
55
|
+
(prompt: string, options: DopplerCallOptions): AsyncGenerator<string, void, void>;
|
|
56
|
+
load(
|
|
57
|
+
model: string | { url: string } | { manifest: RDRRManifest; baseUrl?: string },
|
|
58
|
+
options?: DopplerLoadOptions
|
|
59
|
+
): Promise<DopplerModel>;
|
|
60
|
+
text(prompt: string, options: DopplerCallOptions): Promise<string>;
|
|
61
|
+
chat(messages: ChatMessage[], options: DopplerCallOptions): AsyncGenerator<string, void, void>;
|
|
62
|
+
chatText(messages: ChatMessage[], options: DopplerCallOptions): Promise<DopplerChatResponse>;
|
|
63
|
+
evict(model: string | { url: string }): Promise<boolean>;
|
|
64
|
+
evictAll(): Promise<void>;
|
|
65
|
+
listModels(): Promise<string[]>;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export declare function load(
|
|
69
|
+
model: string | { url: string } | { manifest: RDRRManifest; baseUrl?: string },
|
|
70
|
+
options?: DopplerLoadOptions
|
|
71
|
+
): Promise<DopplerModel>;
|
|
72
|
+
|
|
73
|
+
export declare function createDefaultNodeLoadProgressLogger(): (event: DopplerLoadProgress) => void;
|
|
74
|
+
|
|
75
|
+
export declare function resolveLoadProgressHandlers(options?: DopplerLoadOptions): {
|
|
76
|
+
userProgress: ((event: DopplerLoadProgress) => void) | null;
|
|
77
|
+
pipelineProgress: ((event: DopplerLoadProgress) => void) | null;
|
|
78
|
+
};
|
|
79
|
+
|
|
80
|
+
export declare const doppler: DopplerNamespace;
|