@simulatte/doppler 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/README.md +4 -3
  2. package/package.json +25 -4
  3. package/src/client/doppler-api.browser.d.ts +1 -0
  4. package/src/client/doppler-api.browser.js +288 -0
  5. package/src/client/doppler-api.js +1 -1
  6. package/src/client/doppler-provider/types.js +1 -1
  7. package/src/config/execution-contract-check.d.ts +33 -0
  8. package/src/config/execution-contract-check.js +72 -0
  9. package/src/config/execution-v0-contract-check.d.ts +94 -0
  10. package/src/config/execution-v0-contract-check.js +251 -0
  11. package/src/config/execution-v0-graph-contract-check.d.ts +20 -0
  12. package/src/config/execution-v0-graph-contract-check.js +64 -0
  13. package/src/config/kernel-path-contract-check.d.ts +76 -0
  14. package/src/config/kernel-path-contract-check.js +479 -0
  15. package/src/config/kernel-path-loader.d.ts +16 -0
  16. package/src/config/kernel-path-loader.js +54 -0
  17. package/src/config/kernels/kernel-ref-digests.js +12 -0
  18. package/src/config/kernels/registry.json +556 -0
  19. package/src/config/loader.js +50 -46
  20. package/src/config/merge-contract-check.d.ts +16 -0
  21. package/src/config/merge-contract-check.js +321 -0
  22. package/src/config/merge-helpers.d.ts +58 -0
  23. package/src/config/merge-helpers.js +54 -0
  24. package/src/config/merge.js +3 -6
  25. package/src/config/presets/models/janus-text.json +2 -0
  26. package/src/config/quantization-contract-check.d.ts +12 -0
  27. package/src/config/quantization-contract-check.js +91 -0
  28. package/src/config/required-inference-fields-contract-check.d.ts +24 -0
  29. package/src/config/required-inference-fields-contract-check.js +231 -0
  30. package/src/config/schema/browser-suite-metrics.schema.d.ts +17 -0
  31. package/src/config/schema/browser-suite-metrics.schema.js +46 -0
  32. package/src/config/schema/conversion-report.schema.d.ts +40 -0
  33. package/src/config/schema/conversion-report.schema.js +108 -0
  34. package/src/config/schema/doppler.schema.js +12 -18
  35. package/src/config/schema/index.d.ts +22 -0
  36. package/src/config/schema/index.js +18 -0
  37. package/src/converter/core.d.ts +10 -0
  38. package/src/converter/core.js +27 -2
  39. package/src/converter/parsers/diffusion.js +63 -3
  40. package/src/gpu/kernels/depthwise_conv2d.d.ts +29 -0
  41. package/src/gpu/kernels/depthwise_conv2d.js +98 -0
  42. package/src/gpu/kernels/depthwise_conv2d.wgsl +58 -0
  43. package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +62 -0
  44. package/src/gpu/kernels/grouped_pointwise_conv2d.d.ts +27 -0
  45. package/src/gpu/kernels/grouped_pointwise_conv2d.js +92 -0
  46. package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +47 -0
  47. package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +51 -0
  48. package/src/gpu/kernels/index.d.ts +30 -0
  49. package/src/gpu/kernels/index.js +25 -0
  50. package/src/gpu/kernels/relu.d.ts +18 -0
  51. package/src/gpu/kernels/relu.js +45 -0
  52. package/src/gpu/kernels/relu.wgsl +21 -0
  53. package/src/gpu/kernels/relu_f16.wgsl +23 -0
  54. package/src/gpu/kernels/repeat_channels.d.ts +21 -0
  55. package/src/gpu/kernels/repeat_channels.js +60 -0
  56. package/src/gpu/kernels/repeat_channels.wgsl +29 -0
  57. package/src/gpu/kernels/repeat_channels_f16.wgsl +31 -0
  58. package/src/gpu/kernels/sana_linear_attention.d.ts +27 -0
  59. package/src/gpu/kernels/sana_linear_attention.js +122 -0
  60. package/src/gpu/kernels/sana_linear_attention_apply.wgsl +44 -0
  61. package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +47 -0
  62. package/src/gpu/kernels/sana_linear_attention_summary.wgsl +47 -0
  63. package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +49 -0
  64. package/src/index-browser.d.ts +1 -1
  65. package/src/index-browser.js +2 -2
  66. package/src/index.js +1 -1
  67. package/src/inference/browser-harness.js +62 -22
  68. package/src/inference/pipelines/diffusion/init.js +14 -0
  69. package/src/inference/pipelines/diffusion/pipeline.js +206 -77
  70. package/src/inference/pipelines/diffusion/sana-transformer.d.ts +53 -0
  71. package/src/inference/pipelines/diffusion/sana-transformer.js +738 -0
  72. package/src/inference/pipelines/diffusion/scheduler.d.ts +17 -1
  73. package/src/inference/pipelines/diffusion/scheduler.js +91 -3
  74. package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +6 -4
  75. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +270 -0
  76. package/src/inference/pipelines/diffusion/text-encoder.js +18 -1
  77. package/src/inference/pipelines/diffusion/types.d.ts +4 -0
  78. package/src/inference/pipelines/diffusion/vae.js +782 -78
  79. package/src/inference/pipelines/text/config.d.ts +5 -0
  80. package/src/inference/pipelines/text/config.js +1 -1
  81. package/src/inference/pipelines/text/execution-v0.js +14 -93
  82. package/src/rules/execution-rules-contract-check.d.ts +17 -0
  83. package/src/rules/execution-rules-contract-check.js +245 -0
  84. package/src/rules/kernels/depthwise-conv2d.rules.json +6 -0
  85. package/src/rules/kernels/grouped-pointwise-conv2d.rules.json +6 -0
  86. package/src/rules/kernels/relu.rules.json +6 -0
  87. package/src/rules/kernels/repeat-channels.rules.json +6 -0
  88. package/src/rules/kernels/sana-linear-attention.rules.json +6 -0
  89. package/src/rules/layer-pattern-contract-check.d.ts +17 -0
  90. package/src/rules/layer-pattern-contract-check.js +231 -0
  91. package/src/rules/rule-registry.d.ts +28 -0
  92. package/src/rules/rule-registry.js +38 -0
  93. package/src/tooling/conversion-config-materializer.d.ts +24 -0
  94. package/src/tooling/conversion-config-materializer.js +99 -0
  95. package/src/tooling/lean-execution-contract-runner.d.ts +43 -0
  96. package/src/tooling/lean-execution-contract-runner.js +158 -0
  97. package/src/tooling/node-convert.d.ts +10 -0
  98. package/src/tooling/node-converter.js +59 -0
  99. package/src/tooling/node-webgpu.js +9 -9
  100. package/src/version.d.ts +2 -0
  101. package/src/version.js +2 -0
  102. package/tools/convert-safetensors-node.js +47 -0
  103. package/tools/doppler-cli.js +115 -1
package/README.md CHANGED
@@ -22,7 +22,7 @@ for await (const token of model.generate('Hello, world')) {
22
22
  }
23
23
  ```
24
24
 
25
- Registry IDs resolve to hosted RDRR artifacts from `Clocksmith/rdrr` by default. Tokens stream from a native `AsyncGenerator`. See [more examples](#more-examples) below or the full [API contract](docs/doppler-api-contract.md).
25
+ Registry IDs resolve to hosted RDRR artifacts from `Clocksmith/rdrr` by default. Tokens stream from a native `AsyncGenerator`. See [more examples](#more-examples) below or the canonical [Root API guide](docs/api/root.md).
26
26
 
27
27
  ## Why Doppler
28
28
 
@@ -67,7 +67,7 @@ const reply = await model.chatText([
67
67
  ]);
68
68
 
69
69
  // LoRA hot-swap
70
- await model.loadLoRA('oneshift-twoshift-redshift-blueshift');
70
+ await model.loadLoRA('https://example.com/adapters/oneshift-twoshift-redshift-blueshift/manifest.json');
71
71
 
72
72
  // Convenience shorthand (caches model automatically)
73
73
  for await (const token of doppler('Hello', { model: 'gemma3-270m' })) {
@@ -84,7 +84,8 @@ for await (const token of doppler('Hello', { model: 'gemma3-270m' })) {
84
84
 
85
85
  ## Environment requirements
86
86
 
87
- - WebGPU-capable browser runtime is required.
87
+ - WebGPU is required.
88
+ - Supported runtimes: WebGPU-capable browsers, or Node with a WebGPU provider.
88
89
  - Chrome / Edge 113+ supported.
89
90
  - Firefox support varies (typically behind a flag).
90
91
  - Safari support is evolving.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@simulatte/doppler",
3
- "version": "0.1.4",
3
+ "version": "0.1.5",
4
4
  "description": "Browser-native WebGPU inference engine for local intent and inference loops",
5
5
  "main": "src/index.js",
6
6
  "types": "src/index.d.ts",
@@ -16,6 +16,16 @@
16
16
  "bench": "node tools/doppler-cli.js bench",
17
17
  "lean:check": "./lean/check.sh",
18
18
  "lean:execution-contract": "node tools/lean-execution-contract.js",
19
+ "lean:execution-contract:sweep": "node tools/lean-execution-contract-sweep.js",
20
+ "lean:execution-contract:configs": "node tools/lean-execution-contract-config-sweep.js",
21
+ "ci:lean:execution-contract": "node tools/lean-execution-contract-sweep.js --root models",
22
+ "ci:lean:execution-contract:configs": "node tools/lean-execution-contract-config-sweep.js --config-root tools/configs/conversion --manifest-root models --require-manifest-match",
23
+ "contracts:check": "node tools/check-contract-artifacts.js",
24
+ "contracts:summary": "node tools/check-contract-artifacts.js --json",
25
+ "contracts:check:lean": "node tools/check-contract-artifacts.js --with-lean",
26
+ "contracts:summary:lean": "node tools/check-contract-artifacts.js --json --with-lean",
27
+ "ci:contracts:check": "node tools/check-contract-artifacts.js --with-lean --lean-require-manifest-match",
28
+ "reports:convert:summary": "node tools/summarize-conversion-reports.js",
19
29
  "bench:chart": "node ./benchmarks/vendors/compare-chart.js",
20
30
  "bench:chart:readme": "node ./benchmarks/vendors/compare-chart.js --preset readme-evidence",
21
31
  "bench:architecture:chart": "node ./benchmarks/vendors/generate-architecture-overview-svg.js",
@@ -41,6 +51,8 @@
41
51
  "agents:freshness:strict": "node tools/verify-agent-freshness.js --strict",
42
52
  "conflicts:check": "node tools/check-merge-markers.js",
43
53
  "imports:check:browser": "node tools/check-browser-import-graph.js",
54
+ "api:docs:sync": "node tools/sync-api-docs.js",
55
+ "api:docs:check": "node tools/sync-api-docs.js --check",
44
56
  "verify:model": "node tools/doppler-cli.js verify",
45
57
  "onboarding:check": "node tools/onboarding-tooling.js check",
46
58
  "onboarding:check:strict": "node tools/onboarding-tooling.js check --strict",
@@ -53,8 +65,11 @@
53
65
  "verify": "node tools/run-registry-verify.js",
54
66
  "registry:sync:scripts": "node tools/sync-registry-scripts.js",
55
67
  "registry:sync:scripts:check": "node tools/sync-registry-scripts.js --check",
68
+ "registry:hf:check": "node tools/check-hf-registry.js",
69
+ "registry:publish:hf": "node tools/publish-hf-registry-model.js",
56
70
  "support:matrix:sync": "node tools/sync-model-support-matrix.js",
57
71
  "support:matrix:check": "node tools/sync-model-support-matrix.js --check",
72
+ "ci:catalog:check": "npm run registry:sync:scripts:check && npm run support:matrix:check && npm run registry:hf:check",
58
73
  "external:rdrr:index": "node tools/sync-external-rdrr-index.js",
59
74
  "external:rdrr:index:check": "node tools/sync-external-rdrr-index.js --check",
60
75
  "verify:embeddinggemma-300m": "node tools/run-registry-verify.js embeddinggemma-300m",
@@ -65,6 +80,7 @@
65
80
  "verify:google-embeddinggemma-300m": "node tools/run-registry-verify.js google-embeddinggemma-300m",
66
81
  "verify:google-embeddinggemma-300m-wq4k-ef16": "node tools/run-registry-verify.js google-embeddinggemma-300m-wq4k-ef16",
67
82
  "verify:google-gemma-3-270m-it": "node tools/run-registry-verify.js google-gemma-3-270m-it",
83
+ "verify:google-translategemma-4b-it": "node tools/run-registry-verify.js google-translategemma-4b-it",
68
84
  "verify:qwen-3-5-0-8b": "node tools/run-registry-verify.js qwen-3-5-0-8b",
69
85
  "verify:qwen-3-5-0-8b-wq4k-ef16-hf16-f16": "node tools/run-registry-verify.js qwen-3-5-0-8b-wq4k-ef16-hf16-f16",
70
86
  "verify:qwen-3-5-2b": "node tools/run-registry-verify.js qwen-3-5-2b",
@@ -72,7 +88,10 @@
72
88
  "verify:qwen-qwen3.5-0.8b": "node tools/run-registry-verify.js qwen-qwen3.5-0.8b",
73
89
  "verify:qwen-qwen3.5-2b": "node tools/run-registry-verify.js qwen-qwen3.5-2b",
74
90
  "verify:qwen3-0.8b": "node tools/run-registry-verify.js qwen3-0.8b",
75
- "verify:qwen3-2b": "node tools/run-registry-verify.js qwen3-2b"
91
+ "verify:qwen3-2b": "node tools/run-registry-verify.js qwen3-2b",
92
+ "verify:translategemma": "node tools/run-registry-verify.js translategemma",
93
+ "verify:translategemma-4b": "node tools/run-registry-verify.js translategemma-4b",
94
+ "verify:translategemma-4b-it-wq4k-ef16-hf16": "node tools/run-registry-verify.js translategemma-4b-it-wq4k-ef16-hf16"
76
95
  },
77
96
  "exports": {
78
97
  ".": {
@@ -102,8 +121,7 @@
102
121
  "./energy": {
103
122
  "types": "./src/energy/index.d.ts",
104
123
  "import": "./src/energy/index.js"
105
- },
106
- "./*": "./src/*"
124
+ }
107
125
  },
108
126
  "repository": {
109
127
  "type": "git",
@@ -140,5 +158,8 @@
140
158
  "jest": "^30.2.0",
141
159
  "onnxruntime-web": "^1.24.1",
142
160
  "playwright": "^1.58.2"
161
+ },
162
+ "optionalDependencies": {
163
+ "@simulatte/webgpu-doe": "0.1.x"
143
164
  }
144
165
  }
@@ -0,0 +1 @@
1
+ export * from './doppler-api.d.ts';
@@ -0,0 +1,288 @@
1
+ import { loadLoRAFromManifest, loadLoRAFromUrl } from '../adapters/lora-loader.js';
2
+ import { log } from '../debug/index.js';
3
+ import { getManifestUrl, parseManifest } from '../formats/rdrr/index.js';
4
+ import { createPipeline } from '../generation/index.js';
5
+ import { getKernelCapabilities } from '../gpu/device.js';
6
+ import { formatChatMessages } from '../inference/pipelines/text/chat-format.js';
7
+ import { buildQuickstartModelBaseUrl, listQuickstartModels, resolveQuickstartModel } from './doppler-registry.js';
8
+
9
+ const convenienceModelCache = new Map();
10
+ const inFlightLoadCache = new Map();
11
+
12
+ function emitLoadProgress(callback, phase, percent, message) {
13
+ if (typeof callback !== 'function') return;
14
+ callback({ phase, percent, message });
15
+ }
16
+
17
+ async function ensureWebGPUAvailable() {
18
+ if (typeof globalThis.navigator !== 'undefined' && globalThis.navigator?.gpu) {
19
+ return;
20
+ }
21
+ throw new Error('WebGPU is unavailable. Run in a WebGPU-capable browser.');
22
+ }
23
+
24
+ export function createDefaultNodeLoadProgressLogger() {
25
+ return (event) => {
26
+ const message = typeof event?.message === 'string' ? event.message.trim() : '';
27
+ if (!message) return;
28
+ log.info('doppler', message);
29
+ };
30
+ }
31
+
32
+ export function resolveLoadProgressHandlers(options = {}) {
33
+ const onProgress = typeof options?.onProgress === 'function' ? options.onProgress : null;
34
+ if (onProgress) {
35
+ return {
36
+ userProgress: onProgress,
37
+ pipelineProgress: onProgress,
38
+ };
39
+ }
40
+ return {
41
+ userProgress: null,
42
+ pipelineProgress: null,
43
+ };
44
+ }
45
+
46
+ async function fetchManifestFromBaseUrl(baseUrl) {
47
+ const response = await fetch(getManifestUrl(baseUrl));
48
+ if (!response.ok) {
49
+ throw new Error(`Failed to fetch manifest from ${baseUrl}: ${response.status}`);
50
+ }
51
+ return parseManifest(await response.text());
52
+ }
53
+
54
+ async function resolveModelSource(model) {
55
+ if (typeof model === 'string') {
56
+ const entry = await resolveQuickstartModel(model);
57
+ return {
58
+ modelId: entry.modelId,
59
+ baseUrl: buildQuickstartModelBaseUrl(entry),
60
+ manifest: null,
61
+ };
62
+ }
63
+ if (model && typeof model === 'object' && typeof model.url === 'string' && model.url.trim().length > 0) {
64
+ return {
65
+ modelId: model.url.trim(),
66
+ baseUrl: model.url.trim(),
67
+ manifest: null,
68
+ };
69
+ }
70
+ if (model && typeof model === 'object' && model.manifest && typeof model.manifest === 'object') {
71
+ const manifest = model.manifest;
72
+ const modelId = typeof manifest.modelId === 'string' && manifest.modelId.length > 0
73
+ ? manifest.modelId
74
+ : 'manifest';
75
+ return {
76
+ modelId,
77
+ baseUrl: typeof model.baseUrl === 'string' && model.baseUrl.length > 0 ? model.baseUrl : null,
78
+ manifest,
79
+ };
80
+ }
81
+ throw new Error('doppler.load expects a quickstart registry id, { url }, or { manifest, baseUrl? }.');
82
+ }
83
+
84
+ function countTokens(pipeline, text) {
85
+ if (!text || typeof text !== 'string') return 0;
86
+ try {
87
+ return pipeline?.tokenizer?.encode(text)?.length ?? 0;
88
+ } catch {
89
+ return 0;
90
+ }
91
+ }
92
+
93
+ function resolveChatPromptForUsage(pipeline, messages) {
94
+ const templateType = pipeline?.manifest?.inference?.chatTemplate?.enabled === false
95
+ ? null
96
+ : (pipeline?.manifest?.inference?.chatTemplate?.type ?? null);
97
+ try {
98
+ return formatChatMessages(messages, templateType);
99
+ } catch {
100
+ return messages.map((message) => String(message?.content ?? '')).join('\n');
101
+ }
102
+ }
103
+
104
+ async function collectText(iterable) {
105
+ let output = '';
106
+ for await (const token of iterable) {
107
+ output += token;
108
+ }
109
+ return output;
110
+ }
111
+
112
+ function createModelHandle(pipeline, resolved) {
113
+ return {
114
+ generate(prompt, options = {}) {
115
+ return pipeline.generate(prompt, options);
116
+ },
117
+ async generateText(prompt, options = {}) {
118
+ return collectText(pipeline.generate(prompt, options));
119
+ },
120
+ chat(messages, options = {}) {
121
+ return pipeline.generate(messages, options);
122
+ },
123
+ async chatText(messages, options = {}) {
124
+ const content = await collectText(pipeline.generate(messages, options));
125
+ const promptText = resolveChatPromptForUsage(pipeline, messages);
126
+ const promptTokens = countTokens(pipeline, promptText);
127
+ const completionTokens = countTokens(pipeline, content);
128
+ return {
129
+ content,
130
+ usage: {
131
+ promptTokens,
132
+ completionTokens,
133
+ totalTokens: promptTokens + completionTokens,
134
+ },
135
+ };
136
+ },
137
+ async loadLoRA(adapter) {
138
+ const lora = typeof adapter === 'string'
139
+ ? await loadLoRAFromUrl(adapter)
140
+ : await loadLoRAFromManifest(adapter);
141
+ pipeline.setLoRAAdapter(lora);
142
+ },
143
+ async unloadLoRA() {
144
+ pipeline.setLoRAAdapter(null);
145
+ },
146
+ async unload() {
147
+ await pipeline.unload();
148
+ },
149
+ get activeLoRA() {
150
+ return pipeline.getActiveLoRA()?.name ?? null;
151
+ },
152
+ get loaded() {
153
+ return pipeline.isLoaded === true;
154
+ },
155
+ get modelId() {
156
+ return resolved.modelId;
157
+ },
158
+ get manifest() {
159
+ return pipeline.manifest;
160
+ },
161
+ get deviceInfo() {
162
+ return getKernelCapabilities()?.adapterInfo ?? null;
163
+ },
164
+ advanced: {
165
+ prefillKV(prompt, options = {}) {
166
+ return pipeline.prefillKVOnly(prompt, options);
167
+ },
168
+ generateWithPrefixKV(prefix, prompt, options = {}) {
169
+ return pipeline.generateWithPrefixKV(prefix, prompt, options);
170
+ },
171
+ },
172
+ };
173
+ }
174
+
175
+ export async function load(model, options = {}) {
176
+ const { userProgress, pipelineProgress } = resolveLoadProgressHandlers(options);
177
+
178
+ emitLoadProgress(userProgress, 'resolve', 5, 'Resolving model');
179
+ const resolved = await resolveModelSource(model);
180
+ await ensureWebGPUAvailable();
181
+
182
+ emitLoadProgress(userProgress, 'manifest', 15, 'Fetching manifest');
183
+ const manifest = resolved.manifest ?? await fetchManifestFromBaseUrl(resolved.baseUrl);
184
+
185
+ emitLoadProgress(userProgress, 'load', 25, 'Loading weights');
186
+ const pipeline = await createPipeline(manifest, {
187
+ baseUrl: resolved.baseUrl ?? undefined,
188
+ runtimeConfig: options.runtimeConfig,
189
+ onProgress: pipelineProgress
190
+ ? (progress) => emitLoadProgress(
191
+ pipelineProgress,
192
+ 'load',
193
+ Math.max(25, Math.min(99, Math.round(progress.percent))),
194
+ progress.message || 'Loading weights'
195
+ )
196
+ : undefined,
197
+ });
198
+
199
+ emitLoadProgress(userProgress, 'ready', 100, 'Model ready');
200
+ return createModelHandle(pipeline, resolved);
201
+ }
202
+
203
+ async function getCachedModel(model, options = {}) {
204
+ const resolved = await resolveModelSource(model);
205
+ const cacheKey = resolved.modelId;
206
+ const cached = convenienceModelCache.get(cacheKey);
207
+ if (cached?.loaded) {
208
+ return cached;
209
+ }
210
+ if (cached && !cached.loaded) {
211
+ convenienceModelCache.delete(cacheKey);
212
+ }
213
+ if (!inFlightLoadCache.has(cacheKey)) {
214
+ inFlightLoadCache.set(cacheKey, load(model, options).then((instance) => {
215
+ convenienceModelCache.set(cacheKey, instance);
216
+ inFlightLoadCache.delete(cacheKey);
217
+ return instance;
218
+ }).catch((error) => {
219
+ inFlightLoadCache.delete(cacheKey);
220
+ throw error;
221
+ }));
222
+ }
223
+ return inFlightLoadCache.get(cacheKey);
224
+ }
225
+
226
+ async function* dopplerGenerate(prompt, options = {}) {
227
+ if (!options || typeof options !== 'object' || options.model == null) {
228
+ throw new Error('doppler() requires options.model.');
229
+ }
230
+ if (options.runtimeConfig !== undefined || options.runtimePreset !== undefined) {
231
+ throw new Error('doppler() does not accept load-affecting options. Use doppler.load(model, options) instead.');
232
+ }
233
+ const model = await getCachedModel(options.model, { onProgress: options.onProgress });
234
+ yield* model.generate(prompt, options);
235
+ }
236
+
237
+ export function doppler(prompt, options) {
238
+ return dopplerGenerate(prompt, options);
239
+ }
240
+
241
+ doppler.load = load;
242
+
243
+ doppler.text = async function text(prompt, options = {}) {
244
+ if (!options || typeof options !== 'object' || options.model == null) {
245
+ throw new Error('doppler.text() requires options.model.');
246
+ }
247
+ const model = await getCachedModel(options.model, { onProgress: options.onProgress });
248
+ return model.generateText(prompt, options);
249
+ };
250
+
251
+ doppler.chat = function chat(messages, options = {}) {
252
+ if (!options || typeof options !== 'object' || options.model == null) {
253
+ throw new Error('doppler.chat() requires options.model.');
254
+ }
255
+ return (async function* run() {
256
+ const model = await getCachedModel(options.model, { onProgress: options.onProgress });
257
+ yield* model.chat(messages, options);
258
+ }());
259
+ };
260
+
261
+ doppler.chatText = async function chatText(messages, options = {}) {
262
+ if (!options || typeof options !== 'object' || options.model == null) {
263
+ throw new Error('doppler.chatText() requires options.model.');
264
+ }
265
+ const model = await getCachedModel(options.model, { onProgress: options.onProgress });
266
+ return model.chatText(messages, options);
267
+ };
268
+
269
+ doppler.evict = async function evict(model) {
270
+ const resolved = await resolveModelSource(model);
271
+ const cacheKey = resolved.modelId;
272
+ const cached = convenienceModelCache.get(cacheKey);
273
+ if (!cached) return false;
274
+ await cached.unload();
275
+ convenienceModelCache.delete(cacheKey);
276
+ return true;
277
+ };
278
+
279
+ doppler.evictAll = async function evictAll() {
280
+ const cachedModels = Array.from(convenienceModelCache.values());
281
+ convenienceModelCache.clear();
282
+ await Promise.allSettled(cachedModels.map((model) => model.unload()));
283
+ };
284
+
285
+ doppler.listModels = async function listModels() {
286
+ const models = await listQuickstartModels();
287
+ return models.map((entry) => entry.aliases[0] || entry.modelId);
288
+ };
@@ -4,7 +4,6 @@ import { getManifestUrl, parseManifest } from '../formats/rdrr/index.js';
4
4
  import { createPipeline } from '../generation/index.js';
5
5
  import { getKernelCapabilities } from '../gpu/device.js';
6
6
  import { formatChatMessages } from '../inference/pipelines/text/chat-format.js';
7
- import { bootstrapNodeWebGPU } from '../tooling/node-webgpu.js';
8
7
  import { buildQuickstartModelBaseUrl, listQuickstartModels, resolveQuickstartModel } from './doppler-registry.js';
9
8
 
10
9
  const convenienceModelCache = new Map();
@@ -21,6 +20,7 @@ async function ensureWebGPUAvailable() {
21
20
  return;
22
21
  }
23
22
  if (isNodeRuntime()) {
23
+ const { bootstrapNodeWebGPU } = await import('../tooling/node-webgpu.js');
24
24
  const result = await bootstrapNodeWebGPU();
25
25
  if (result.ok && globalThis.navigator?.gpu) {
26
26
  return;
@@ -1,4 +1,4 @@
1
- export const DOPPLER_PROVIDER_VERSION = '0.1.0';
1
+ export { DOPPLER_PROVIDER_VERSION } from '../../version.js';
2
2
 
3
3
  export const DopplerCapabilities = {
4
4
  available: false,
@@ -1,3 +1,10 @@
1
+ import type {
2
+ ExecutionV0ContractArtifact,
3
+ } from './execution-v0-contract-check.js';
4
+ import type {
5
+ ExecutionV0GraphContractArtifact,
6
+ } from './execution-v0-graph-contract-check.js';
7
+
1
8
  export interface ExecutionContractStepFacts {
2
9
  id: string;
3
10
  phase: 'prefill' | 'decode' | 'both';
@@ -34,6 +41,28 @@ export interface ManifestExecutionContractValidationResult extends ExecutionCont
34
41
  facts: ExecutionContractFacts;
35
42
  }
36
43
 
44
+ export interface ExecutionContractArtifact {
45
+ schemaVersion: 1;
46
+ source: 'doppler';
47
+ ok: boolean;
48
+ checks: ExecutionContractCheckResult[];
49
+ errors: string[];
50
+ session: ExecutionContractSessionFacts | null;
51
+ steps: {
52
+ total: number;
53
+ attention: number;
54
+ attentionPhases: {
55
+ prefill: number;
56
+ decode: number;
57
+ both: number;
58
+ };
59
+ } | null;
60
+ executionV0?: {
61
+ kernelProfiles: ExecutionV0ContractArtifact | null;
62
+ graph: ExecutionV0GraphContractArtifact | null;
63
+ };
64
+ }
65
+
37
66
  export declare function sanitizeLeanModuleName(value: unknown): string;
38
67
 
39
68
  export declare function extractExecutionContractFacts(
@@ -47,3 +76,7 @@ export declare function validateExecutionContractFacts(
47
76
  export declare function validateManifestExecutionContract(
48
77
  manifest: Record<string, unknown>
49
78
  ): ManifestExecutionContractValidationResult;
79
+
80
+ export declare function buildExecutionContractArtifact(
81
+ manifest: Record<string, unknown>
82
+ ): ExecutionContractArtifact | null;
@@ -1,4 +1,7 @@
1
1
  import { DEFAULT_BATCHING_DEFAULTS, DEFAULT_GENERATION_CONFIG } from './schema/inference-defaults.schema.js';
2
+ import { buildExecutionV0ContractArtifact } from './execution-v0-contract-check.js';
3
+ import { buildExecutionV0GraphContractArtifact } from './execution-v0-graph-contract-check.js';
4
+ import { EXECUTION_V0_SCHEMA_ID } from './schema/execution-v0.schema.js';
2
5
  import { DEFAULT_KVCACHE_CONFIG } from './schema/kvcache.schema.js';
3
6
 
4
7
  const KV_LAYOUTS = new Set(['contiguous', 'paged', 'tiered', 'bdpa']);
@@ -243,3 +246,72 @@ export function validateManifestExecutionContract(manifest) {
243
246
  facts,
244
247
  };
245
248
  }
249
+
250
+ export function buildExecutionContractArtifact(manifest) {
251
+ if (!manifest || typeof manifest !== 'object') {
252
+ return null;
253
+ }
254
+ if (manifest.modelType === 'diffusion' || manifest.modelType === 'energy') {
255
+ return null;
256
+ }
257
+ if (!manifest.architecture || !manifest.inference || typeof manifest.inference !== 'object') {
258
+ return null;
259
+ }
260
+ try {
261
+ const evaluation = validateManifestExecutionContract(manifest);
262
+ const attentionPhaseCounts = { prefill: 0, decode: 0, both: 0 };
263
+ for (const step of evaluation.facts.steps) {
264
+ if (step.opClass !== 'attention') continue;
265
+ if (Object.prototype.hasOwnProperty.call(attentionPhaseCounts, step.phase)) {
266
+ attentionPhaseCounts[step.phase] += 1;
267
+ }
268
+ }
269
+ const executionV0 =
270
+ manifest?.inference?.schema === EXECUTION_V0_SCHEMA_ID
271
+ ? {
272
+ kernelProfiles: buildExecutionV0ContractArtifact(manifest.inference, {
273
+ modelId: evaluation.facts.modelId,
274
+ }),
275
+ graph: buildExecutionV0GraphContractArtifact({
276
+ modelId: evaluation.facts.modelId,
277
+ numLayers: manifest?.architecture?.numLayers,
278
+ manifestInference: manifest.inference,
279
+ }),
280
+ }
281
+ : null;
282
+ const nestedChecks = [];
283
+ const nestedErrors = [];
284
+ if (executionV0?.kernelProfiles) {
285
+ nestedChecks.push(...executionV0.kernelProfiles.checks);
286
+ nestedErrors.push(...executionV0.kernelProfiles.errors);
287
+ }
288
+ if (executionV0?.graph) {
289
+ nestedChecks.push(...executionV0.graph.checks);
290
+ nestedErrors.push(...executionV0.graph.errors);
291
+ }
292
+ return {
293
+ schemaVersion: 1,
294
+ source: 'doppler',
295
+ ok: evaluation.ok && nestedErrors.length === 0,
296
+ checks: [...evaluation.checks, ...nestedChecks],
297
+ errors: [...evaluation.errors, ...nestedErrors],
298
+ session: evaluation.facts.session,
299
+ steps: {
300
+ total: evaluation.facts.steps.length,
301
+ attention: attentionPhaseCounts.prefill + attentionPhaseCounts.decode + attentionPhaseCounts.both,
302
+ attentionPhases: attentionPhaseCounts,
303
+ },
304
+ ...(executionV0 ? { executionV0 } : {}),
305
+ };
306
+ } catch (error) {
307
+ return {
308
+ schemaVersion: 1,
309
+ source: 'doppler',
310
+ ok: false,
311
+ checks: [],
312
+ errors: [error instanceof Error ? error.message : String(error)],
313
+ session: null,
314
+ steps: null,
315
+ };
316
+ }
317
+ }
@@ -0,0 +1,94 @@
1
+ import type {
2
+ ExecutionV0ComputeDefaultsSchema,
3
+ ExecutionV0KernelProfileSchema,
4
+ ExecutionV0KernelRefSchema,
5
+ ExecutionV0KVIO,
6
+ ExecutionV0PrecisionSchema,
7
+ ExecutionV0SessionDefaultsSchema,
8
+ ExecutionV0StepSchema,
9
+ } from './schema/execution-v0.schema.js';
10
+
11
+ export interface ExecutionV0ContractCheckResult {
12
+ id: string;
13
+ ok: boolean;
14
+ }
15
+
16
+ export interface ExecutionV0ContractPerStep {
17
+ precision: {
18
+ inputDtype: string | null;
19
+ mathDtype: string | null;
20
+ accumDtype: string | null;
21
+ outputDtype: string | null;
22
+ };
23
+ precisionSources: {
24
+ inputDtype: 'manifest' | 'kernelProfile' | 'sessionDefault' | 'derived';
25
+ mathDtype: 'manifest' | 'kernelProfile' | 'sessionDefault' | 'derived';
26
+ accumDtype: 'manifest' | 'kernelProfile' | 'sessionDefault' | 'derived';
27
+ outputDtype: 'manifest' | 'kernelProfile' | 'sessionDefault' | 'derived';
28
+ };
29
+ resolvedPrecision?: {
30
+ inputDtype: 'f16' | 'f32' | null;
31
+ mathDtype: 'f16' | 'f32';
32
+ accumDtype: 'f16' | 'f32';
33
+ outputDtype: 'f16' | 'f32';
34
+ };
35
+ kvIO?: ExecutionV0KVIO;
36
+ kvIOSource?: 'manifest' | 'kernelProfile' | 'sessionDefault';
37
+ }
38
+
39
+ export interface ExecutionV0ContractArtifact {
40
+ schemaVersion: 1;
41
+ source: 'doppler';
42
+ ok: boolean;
43
+ checks: ExecutionV0ContractCheckResult[];
44
+ errors: string[];
45
+ stats: {
46
+ kernelProfiles: number;
47
+ pinnedSteps: number;
48
+ };
49
+ perStep: Record<string, ExecutionV0ContractPerStep>;
50
+ }
51
+
52
+ export declare function normalizeExecutionV0Dtype(value: unknown, label: string): 'f16' | 'f32';
53
+ export declare function buildExecutionV0KernelProfileKey(
54
+ kernelRef: ExecutionV0KernelRefSchema | null | undefined
55
+ ): string | null;
56
+ export declare function indexExecutionV0KernelProfiles(
57
+ sessionDefaults?: Partial<ExecutionV0SessionDefaultsSchema> | null
58
+ ): Map<string, ExecutionV0KernelProfileSchema>;
59
+ export declare function resolveExecutionV0KernelProfile(
60
+ profileIndex: Map<string, ExecutionV0KernelProfileSchema>,
61
+ step: Partial<ExecutionV0StepSchema>
62
+ ): ExecutionV0KernelProfileSchema | null;
63
+ export declare function resolveExecutionV0Precision(
64
+ step: Partial<ExecutionV0StepSchema>,
65
+ profile: ExecutionV0KernelProfileSchema | null,
66
+ sessionDefaults?: Partial<ExecutionV0SessionDefaultsSchema> | null
67
+ ): {
68
+ precision: {
69
+ inputDtype: 'f16' | 'f32' | null;
70
+ mathDtype: 'f16' | 'f32';
71
+ accumDtype: 'f16' | 'f32';
72
+ outputDtype: 'f16' | 'f32';
73
+ };
74
+ sources: {
75
+ inputDtype: 'manifest' | 'kernelProfile' | 'sessionDefault' | 'derived';
76
+ mathDtype: 'manifest' | 'kernelProfile' | 'sessionDefault' | 'derived';
77
+ accumDtype: 'manifest' | 'kernelProfile' | 'sessionDefault' | 'derived';
78
+ outputDtype: 'manifest' | 'kernelProfile' | 'sessionDefault' | 'derived';
79
+ };
80
+ };
81
+ export declare function resolveExecutionV0KVIO(
82
+ step: Partial<ExecutionV0StepSchema>,
83
+ profile: ExecutionV0KernelProfileSchema | null,
84
+ sessionDefaults?: Partial<ExecutionV0SessionDefaultsSchema> | null
85
+ ): {
86
+ value: ExecutionV0KVIO;
87
+ source: 'manifest' | 'kernelProfile' | 'sessionDefault';
88
+ };
89
+ export declare function buildExecutionV0ContractArtifact(
90
+ manifestInference: Record<string, unknown> | null | undefined,
91
+ options?: {
92
+ modelId?: string;
93
+ }
94
+ ): ExecutionV0ContractArtifact | null;