@simulatte/doppler 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (316) hide show
  1. package/CHANGELOG.md +126 -0
  2. package/README.md +16 -23
  3. package/package.json +14 -1
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +26 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +8 -7
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.js +5 -36
  29. package/src/config/kernels/kernel-ref-digests.js +1 -1
  30. package/src/config/kernels/registry.js +14 -1
  31. package/src/config/kernels/registry.json +7 -5
  32. package/src/config/loader.d.ts +1 -1
  33. package/src/config/loader.js +12 -2
  34. package/src/config/merge-contract-check.js +59 -4
  35. package/src/config/merge-helpers.js +128 -7
  36. package/src/config/merge.d.ts +1 -0
  37. package/src/config/merge.js +10 -0
  38. package/src/config/param-validator.js +47 -2
  39. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  40. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  41. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  42. package/src/config/presets/kernel-paths/registry.json +29 -8
  43. package/src/config/presets/models/gemma2.json +2 -2
  44. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  45. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  46. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  47. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  48. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  49. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  50. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  51. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  52. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  53. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  54. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  55. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  56. package/src/config/runtime.js +6 -1
  57. package/src/config/schema/debug.schema.d.ts +5 -0
  58. package/src/config/schema/doppler.schema.js +16 -21
  59. package/src/config/schema/inference-defaults.schema.js +3 -3
  60. package/src/config/schema/kernel-path.schema.d.ts +5 -1
  61. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  62. package/src/config/schema/manifest.schema.d.ts +2 -1
  63. package/src/config/schema/manifest.schema.js +16 -3
  64. package/src/config/training-defaults.js +30 -22
  65. package/src/converter/conversion-plan.js +94 -9
  66. package/src/converter/core.d.ts +7 -0
  67. package/src/converter/core.js +14 -9
  68. package/src/converter/execution-v0-manifest.js +4 -1
  69. package/src/converter/index.d.ts +1 -0
  70. package/src/converter/index.js +1 -0
  71. package/src/converter/manifest-inference.js +43 -12
  72. package/src/converter/parsers/diffusion.js +0 -3
  73. package/src/converter/quantization-info.js +35 -15
  74. package/src/converter/shard-packer.d.ts +1 -1
  75. package/src/converter/shard-packer.js +4 -1
  76. package/src/debug/config.js +123 -11
  77. package/src/debug/signals.js +7 -1
  78. package/src/debug/tensor.d.ts +2 -0
  79. package/src/debug/tensor.js +13 -2
  80. package/src/distribution/p2p-control-plane.js +52 -12
  81. package/src/distribution/p2p-observability.js +43 -7
  82. package/src/distribution/p2p-webrtc-browser.js +20 -0
  83. package/src/distribution/shard-delivery.js +77 -26
  84. package/src/formats/gguf/types.js +33 -16
  85. package/src/formats/rdrr/groups.d.ts +12 -4
  86. package/src/formats/rdrr/groups.js +3 -6
  87. package/src/formats/rdrr/parsing.js +39 -2
  88. package/src/formats/rdrr/types.d.ts +2 -1
  89. package/src/gpu/command-recorder.js +86 -61
  90. package/src/gpu/device.d.ts +1 -0
  91. package/src/gpu/device.js +73 -19
  92. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  93. package/src/gpu/kernel-tuner/cache.js +71 -4
  94. package/src/gpu/kernel-tuner/tuner.js +22 -4
  95. package/src/gpu/kernels/attention.js +15 -34
  96. package/src/gpu/kernels/backward/adam.js +62 -58
  97. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  98. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  99. package/src/gpu/kernels/cast.js +191 -149
  100. package/src/gpu/kernels/check-stop.js +33 -44
  101. package/src/gpu/kernels/conv2d.js +27 -17
  102. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  103. package/src/gpu/kernels/depthwise_conv2d.js +36 -26
  104. package/src/gpu/kernels/dequant.js +178 -126
  105. package/src/gpu/kernels/energy.d.ts +3 -21
  106. package/src/gpu/kernels/energy.js +111 -88
  107. package/src/gpu/kernels/feature-check.js +1 -1
  108. package/src/gpu/kernels/fused_ffn.js +84 -65
  109. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  110. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  111. package/src/gpu/kernels/gather.js +33 -15
  112. package/src/gpu/kernels/gelu.js +19 -11
  113. package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
  114. package/src/gpu/kernels/groupnorm.js +34 -23
  115. package/src/gpu/kernels/kv-quantize.js +5 -2
  116. package/src/gpu/kernels/layernorm.js +35 -19
  117. package/src/gpu/kernels/logit-merge.js +5 -3
  118. package/src/gpu/kernels/matmul.js +58 -39
  119. package/src/gpu/kernels/modulate.js +23 -15
  120. package/src/gpu/kernels/moe.js +221 -175
  121. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  122. package/src/gpu/kernels/relu.js +18 -10
  123. package/src/gpu/kernels/repeat_channels.js +25 -17
  124. package/src/gpu/kernels/residual.js +37 -27
  125. package/src/gpu/kernels/rmsnorm.js +57 -41
  126. package/src/gpu/kernels/rope.js +3 -0
  127. package/src/gpu/kernels/sample.js +27 -38
  128. package/src/gpu/kernels/sana_linear_attention.js +18 -10
  129. package/src/gpu/kernels/scale.js +18 -11
  130. package/src/gpu/kernels/shader-cache.js +4 -2
  131. package/src/gpu/kernels/silu.js +120 -72
  132. package/src/gpu/kernels/softmax.js +44 -25
  133. package/src/gpu/kernels/split_qkv.js +23 -13
  134. package/src/gpu/kernels/transpose.js +18 -10
  135. package/src/gpu/kernels/transpose.wgsl +5 -3
  136. package/src/gpu/kernels/upsample2d.js +21 -13
  137. package/src/gpu/kernels/utils.js +20 -13
  138. package/src/gpu/partitioned-buffer-pool.js +10 -2
  139. package/src/gpu/perf-guards.js +2 -9
  140. package/src/gpu/profiler.js +27 -22
  141. package/src/gpu/readback-utils.d.ts +16 -0
  142. package/src/gpu/readback-utils.js +41 -0
  143. package/src/gpu/submit-tracker.js +13 -0
  144. package/src/gpu/uniform-cache.d.ts +1 -0
  145. package/src/gpu/uniform-cache.js +30 -9
  146. package/src/hotswap/intent-bundle.js +6 -0
  147. package/src/hotswap/manifest.d.ts +10 -1
  148. package/src/hotswap/manifest.js +12 -2
  149. package/src/hotswap/runtime.js +30 -8
  150. package/src/index-browser.d.ts +44 -0
  151. package/src/index-browser.js +14 -0
  152. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  153. package/src/inference/browser-harness-contract-helpers.js +28 -0
  154. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  155. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  156. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  157. package/src/inference/browser-harness-model-helpers.js +217 -0
  158. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  159. package/src/inference/browser-harness-report-helpers.js +42 -0
  160. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  161. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  162. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  163. package/src/inference/browser-harness-suite-helpers.js +268 -0
  164. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  165. package/src/inference/browser-harness-text-helpers.js +788 -0
  166. package/src/inference/browser-harness.d.ts +6 -0
  167. package/src/inference/browser-harness.js +130 -1996
  168. package/src/inference/kv-cache/base.js +140 -94
  169. package/src/inference/kv-cache/tiered.js +5 -3
  170. package/src/inference/moe-router.js +88 -56
  171. package/src/inference/multi-model-network.js +5 -3
  172. package/src/inference/network-evolution.d.ts +11 -2
  173. package/src/inference/network-evolution.js +20 -21
  174. package/src/inference/pipelines/context.d.ts +3 -0
  175. package/src/inference/pipelines/context.js +142 -2
  176. package/src/inference/pipelines/diffusion/helpers.js +7 -2
  177. package/src/inference/pipelines/diffusion/pipeline.js +2 -1
  178. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  179. package/src/inference/pipelines/diffusion/vae.js +3 -7
  180. package/src/inference/pipelines/energy/pipeline.js +27 -21
  181. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  182. package/src/inference/pipelines/energy/quintel.js +11 -0
  183. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  184. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  185. package/src/inference/pipelines/text/attention/projections.js +151 -101
  186. package/src/inference/pipelines/text/attention/record.js +62 -8
  187. package/src/inference/pipelines/text/attention/run.js +62 -8
  188. package/src/inference/pipelines/text/config.js +3 -4
  189. package/src/inference/pipelines/text/embed.js +2 -8
  190. package/src/inference/pipelines/text/execution-plan.js +41 -19
  191. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  192. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  193. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  194. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  195. package/src/inference/pipelines/text/execution-v0.js +62 -1013
  196. package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
  197. package/src/inference/pipelines/text/generator-steps.js +298 -207
  198. package/src/inference/pipelines/text/generator.js +6 -23
  199. package/src/inference/pipelines/text/init.js +78 -20
  200. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  201. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  202. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  203. package/src/inference/pipelines/text/layer.js +3 -9
  204. package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
  205. package/src/inference/pipelines/text/linear-attention.js +80 -6
  206. package/src/inference/pipelines/text/logits/gpu.js +10 -5
  207. package/src/inference/pipelines/text/logits/index.js +10 -11
  208. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  209. package/src/inference/pipelines/text/logits/utils.js +9 -0
  210. package/src/inference/pipelines/text/lora-apply.js +50 -32
  211. package/src/inference/pipelines/text/model-load.js +279 -104
  212. package/src/inference/pipelines/text/moe-cache.js +5 -4
  213. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  214. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  215. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  216. package/src/inference/pipelines/text/ops.js +90 -90
  217. package/src/inference/pipelines/text/probes.js +9 -9
  218. package/src/inference/pipelines/text/weights.js +17 -7
  219. package/src/inference/pipelines/text.js +13 -1
  220. package/src/inference/speculative.d.ts +2 -2
  221. package/src/inference/speculative.js +4 -18
  222. package/src/inference/test-harness.d.ts +1 -1
  223. package/src/inference/test-harness.js +15 -5
  224. package/src/inference/tokenizer.d.ts +0 -5
  225. package/src/inference/tokenizer.js +4 -23
  226. package/src/inference/tokenizers/bpe.js +9 -0
  227. package/src/inference/tokenizers/bundled.js +20 -0
  228. package/src/inference/tokenizers/sentencepiece.js +12 -0
  229. package/src/loader/doppler-loader.js +38 -22
  230. package/src/loader/dtype-utils.js +3 -44
  231. package/src/loader/embedding-loader.js +7 -3
  232. package/src/loader/experts/expert-cache.js +13 -6
  233. package/src/loader/experts/expert-loader.js +10 -6
  234. package/src/loader/final-weights-loader.js +8 -4
  235. package/src/loader/layer-loader.js +2 -1
  236. package/src/loader/loader-state.js +2 -2
  237. package/src/loader/memory-monitor.js +8 -0
  238. package/src/loader/multi-model-loader.d.ts +14 -0
  239. package/src/loader/multi-model-loader.js +70 -24
  240. package/src/loader/shard-cache.js +81 -12
  241. package/src/loader/shard-resolver.js +25 -3
  242. package/src/loader/tensors/tensor-loader.js +209 -144
  243. package/src/loader/tensors/tensor-reader.js +76 -19
  244. package/src/loader/weight-downcast.js +1 -1
  245. package/src/memory/buffer-pool.d.ts +9 -1
  246. package/src/memory/buffer-pool.js +109 -44
  247. package/src/memory/unified-detect.js +1 -1
  248. package/src/rules/inference/kernel-path.rules.json +24 -8
  249. package/src/rules/rule-registry.js +25 -1
  250. package/src/storage/backends/opfs-store.js +68 -24
  251. package/src/storage/downloader.js +364 -83
  252. package/src/storage/index.d.ts +3 -0
  253. package/src/storage/index.js +3 -0
  254. package/src/storage/preflight.d.ts +2 -2
  255. package/src/storage/preflight.js +24 -2
  256. package/src/storage/quickstart-downloader.js +11 -5
  257. package/src/storage/registry.js +10 -4
  258. package/src/storage/reports.js +1 -1
  259. package/src/storage/shard-manager.d.ts +15 -1
  260. package/src/storage/shard-manager.js +51 -3
  261. package/src/storage/source-artifact-store.d.ts +52 -0
  262. package/src/storage/source-artifact-store.js +234 -0
  263. package/src/tooling/command-api-constants.d.ts +9 -0
  264. package/src/tooling/command-api-constants.js +9 -0
  265. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  266. package/src/tooling/command-api-family-normalizers.js +343 -0
  267. package/src/tooling/command-api-helpers.d.ts +25 -0
  268. package/src/tooling/command-api-helpers.js +262 -0
  269. package/src/tooling/command-api.js +16 -602
  270. package/src/tooling/command-envelope.js +4 -1
  271. package/src/tooling/command-runner-shared.js +52 -18
  272. package/src/tooling/lean-execution-contract.js +150 -3
  273. package/src/tooling/node-browser-command-runner.js +161 -271
  274. package/src/tooling/node-command-runner.js +29 -3
  275. package/src/tooling/node-converter.js +27 -1
  276. package/src/tooling/node-source-runtime.d.ts +1 -1
  277. package/src/tooling/node-source-runtime.js +84 -3
  278. package/src/tooling/node-webgpu.js +24 -21
  279. package/src/tooling/opfs-cache.js +21 -4
  280. package/src/tooling/runtime-input-composition.d.ts +38 -0
  281. package/src/tooling/runtime-input-composition.js +86 -0
  282. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  283. package/src/tooling/source-runtime-bundle.js +261 -34
  284. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  285. package/src/tooling/source-runtime-materializer.js +93 -0
  286. package/src/training/attention-backward.js +32 -17
  287. package/src/training/autograd.js +80 -52
  288. package/src/training/checkpoint-watch.d.ts +2 -1
  289. package/src/training/checkpoint-watch.js +39 -6
  290. package/src/training/checkpoint.js +40 -11
  291. package/src/training/clip.js +2 -1
  292. package/src/training/datasets/token-batch.js +20 -8
  293. package/src/training/distillation/checkpoint-watch.js +1 -0
  294. package/src/training/distillation/student-fixture.d.ts +22 -0
  295. package/src/training/distillation/student-fixture.js +846 -0
  296. package/src/training/distillation/suite-data.d.ts +45 -0
  297. package/src/training/distillation/suite-data.js +189 -0
  298. package/src/training/lora-pipeline.js +4 -7
  299. package/src/training/lora.js +26 -12
  300. package/src/training/loss.js +5 -6
  301. package/src/training/objectives/cross_entropy.js +2 -5
  302. package/src/training/objectives/distill_kd.js +4 -8
  303. package/src/training/objectives/distill_triplet.js +4 -8
  304. package/src/training/objectives/ul_stage2_base.js +4 -8
  305. package/src/training/operator-command.js +2 -0
  306. package/src/training/optimizer.js +19 -7
  307. package/src/training/runner.js +2 -1
  308. package/src/training/suite.js +18 -978
  309. package/src/training/tensor-factory.d.ts +9 -0
  310. package/src/training/tensor-factory.js +13 -0
  311. package/src/training/trainer.js +3 -5
  312. package/src/training/ul_dataset.js +3 -5
  313. package/src/training/workloads.js +70 -79
  314. package/src/version.js +1 -1
  315. package/tools/convert-safetensors-node.js +22 -16
  316. package/tools/doppler-cli.js +44 -25
@@ -3,7 +3,7 @@
3
3
  import { getDevice, hasFeature, FEATURES } from './device.js';
4
4
  import { allowReadback, trackAllocation } from './perf-guards.js';
5
5
  import { getUniformCache } from './uniform-cache.js';
6
- import { isBufferActive, releaseBuffer } from '../memory/buffer-pool.js';
6
+ import { isBufferActive, releaseBuffer, discardBuffer } from '../memory/buffer-pool.js';
7
7
  import { log } from '../debug/index.js';
8
8
  import { getRuntimeConfig } from '../config/runtime.js';
9
9
 
@@ -93,6 +93,9 @@ export class CommandRecorder {
93
93
 
94
94
 
95
95
  #initProfiling() {
96
+ let querySet = null;
97
+ let queryBuffer = null;
98
+ let readbackBuffer = null;
96
99
  try {
97
100
  const runtimeProfiler = getRuntimeConfig().shared?.debug?.profiler;
98
101
  if (!runtimeProfiler) {
@@ -119,25 +122,31 @@ export class CommandRecorder {
119
122
  didLogQueryFallback = true;
120
123
  }
121
124
 
122
- this.#querySet = this.device.createQuerySet({
125
+ querySet = this.device.createQuerySet({
123
126
  type: 'timestamp',
124
127
  count: this.#queryCapacity,
125
128
  });
126
129
 
127
130
  // Buffer to hold query results (8 bytes per timestamp = BigUint64)
128
- this.#queryBuffer = this.device.createBuffer({
131
+ queryBuffer = this.device.createBuffer({
129
132
  label: `${this.label}_query_buffer`,
130
133
  size: this.#queryCapacity * 8,
131
134
  usage: GPUBufferUsage.QUERY_RESOLVE | GPUBufferUsage.COPY_SRC,
132
135
  });
133
136
 
134
137
  // Readback buffer
135
- this.#readbackBuffer = this.device.createBuffer({
138
+ readbackBuffer = this.device.createBuffer({
136
139
  label: `${this.label}_readback_buffer`,
137
140
  size: this.#queryCapacity * 8,
138
141
  usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST,
139
142
  });
143
+ this.#querySet = querySet;
144
+ this.#queryBuffer = queryBuffer;
145
+ this.#readbackBuffer = readbackBuffer;
140
146
  } catch (e) {
147
+ readbackBuffer?.destroy();
148
+ queryBuffer?.destroy();
149
+ querySet?.destroy();
141
150
  log.warn('CommandRecorder', `Failed to initialize profiling: ${e}`);
142
151
  this.#profilingEnabled = false;
143
152
  }
@@ -277,39 +286,57 @@ export class CommandRecorder {
277
286
  }
278
287
  }
279
288
 
289
+ #finalizeTrackedBuffers(buffersToDestroy, buffersToRelease, discardPooled) {
290
+ for (const buffer of buffersToDestroy) {
291
+ buffer.destroy();
292
+ }
293
+ for (const buffer of buffersToRelease) {
294
+ if (discardPooled) {
295
+ discardBuffer(buffer);
296
+ } else {
297
+ releaseBuffer(buffer);
298
+ }
299
+ }
300
+ getUniformCache().flushPendingDestruction();
301
+ }
302
+
303
+ #takeTrackedBuffers() {
304
+ const buffersToDestroy = this.#tempBuffers;
305
+ const buffersToRelease = this.#pooledBuffers;
306
+ this.#tempBuffers = [];
307
+ this.#pooledBuffers = [];
308
+ this.#tempBufferSet.clear();
309
+ this.#pooledBufferSet.clear();
310
+ return { buffersToDestroy, buffersToRelease };
311
+ }
312
+
280
313
 
281
314
  submit() {
282
315
  if (this.#submitted) {
283
316
  throw new Error('[CommandRecorder] Already submitted');
284
317
  }
285
318
 
286
- // Submit commands
287
319
  const submitStart = performance.now();
288
- this.device.queue.submit([this.#encoder.finish()]);
320
+ const { buffersToDestroy, buffersToRelease } = this.#takeTrackedBuffers();
321
+ try {
322
+ this.device.queue.submit([this.#encoder.finish()]);
323
+ } catch (error) {
324
+ this.#submitted = true;
325
+ this.#submitStartMs = submitStart;
326
+ this.#finalizeTrackedBuffers(buffersToDestroy, buffersToRelease, false);
327
+ this.#destroyProfilingResources();
328
+ throw error;
329
+ }
330
+
289
331
  this.#submitted = true;
290
332
  this.#submitStartMs = submitStart;
291
333
 
292
- const buffersToDestroy = this.#tempBuffers;
293
- const buffersToRelease = this.#pooledBuffers;
294
- this.#tempBuffers = [];
295
- this.#pooledBuffers = [];
296
- this.#tempBufferSet.clear();
297
- this.#pooledBufferSet.clear();
298
-
299
334
  this.#cleanupPromise = this.device.queue.onSubmittedWorkDone().then(() => {
300
335
  this.#submitLatencyMs = performance.now() - submitStart;
301
- // Destroy buffers created directly by the recorder
302
- for (const buffer of buffersToDestroy) {
303
- buffer.destroy();
304
- }
305
- // Release pooled buffers back to the pool
306
- for (const buffer of buffersToRelease) {
307
- releaseBuffer(buffer);
308
- }
309
- // Safe to destroy evicted uniform buffers now that GPU work is complete
310
- getUniformCache().flushPendingDestruction();
336
+ this.#finalizeTrackedBuffers(buffersToDestroy, buffersToRelease, false);
311
337
  }).catch((err) => {
312
338
  log.warn('CommandRecorder', `Deferred cleanup failed: ${ (err).message}`);
339
+ this.#finalizeTrackedBuffers(buffersToDestroy, buffersToRelease, true);
313
340
  });
314
341
  }
315
342
 
@@ -370,55 +397,53 @@ export class CommandRecorder {
370
397
  }
371
398
 
372
399
  if (this.#profileEntries.length === 0) {
400
+ this.#destroyProfilingResources();
373
401
  return {};
374
402
  }
375
403
 
376
- // Wait for GPU work to complete
377
- await this.device.queue.onSubmittedWorkDone();
404
+ let mapped = false;
378
405
 
379
- // Resolve queries to buffer
380
- const maxIndex = Math.max(...this.#profileEntries.map(e => e.endQueryIndex)) + 1;
381
- const resolveEncoder = this.device.createCommandEncoder({ label: 'profile_resolve' });
382
- resolveEncoder.resolveQuerySet(this.#querySet, 0, maxIndex, this.#queryBuffer, 0);
383
- resolveEncoder.copyBufferToBuffer(this.#queryBuffer, 0, this.#readbackBuffer, 0, maxIndex * 8);
384
- this.device.queue.submit([resolveEncoder.finish()]);
385
-
386
- if (!allowReadback('CommandRecorder.resolveProfileTimings')) {
387
- return null;
388
- }
389
-
390
- // Read back timestamps
391
- await this.#readbackBuffer.mapAsync(GPUMapMode.READ);
392
- const timestamps = new BigUint64Array(this.#readbackBuffer.getMappedRange());
406
+ try {
407
+ await this.device.queue.onSubmittedWorkDone();
393
408
 
394
- // Aggregate timings by label
395
-
396
- const timings = {};
409
+ const maxIndex = Math.max(...this.#profileEntries.map(e => e.endQueryIndex)) + 1;
410
+ const resolveEncoder = this.device.createCommandEncoder({ label: 'profile_resolve' });
411
+ resolveEncoder.resolveQuerySet(this.#querySet, 0, maxIndex, this.#queryBuffer, 0);
412
+ resolveEncoder.copyBufferToBuffer(this.#queryBuffer, 0, this.#readbackBuffer, 0, maxIndex * 8);
413
+ this.device.queue.submit([resolveEncoder.finish()]);
397
414
 
398
- for (const entry of this.#profileEntries) {
399
- const startNs = timestamps[entry.startQueryIndex];
400
- const endNs = timestamps[entry.endQueryIndex];
401
- const durationMs = Number(endNs - startNs) / 1_000_000;
415
+ if (!allowReadback('CommandRecorder.resolveProfileTimings')) {
416
+ return null;
417
+ }
402
418
 
403
- // Skip invalid timings
404
- if (durationMs < 0 || durationMs > 60000) {
405
- continue;
419
+ await this.#readbackBuffer.mapAsync(GPUMapMode.READ);
420
+ mapped = true;
421
+ const timestamps = new BigUint64Array(this.#readbackBuffer.getMappedRange());
422
+ const timings = {};
423
+
424
+ for (const entry of this.#profileEntries) {
425
+ const startNs = timestamps[entry.startQueryIndex];
426
+ const endNs = timestamps[entry.endQueryIndex];
427
+ const durationMs = Number(endNs - startNs) / 1_000_000;
428
+
429
+ if (durationMs < 0 || durationMs > 60000) {
430
+ continue;
431
+ }
432
+
433
+ if (timings[entry.label] !== undefined) {
434
+ timings[entry.label] += durationMs;
435
+ } else {
436
+ timings[entry.label] = durationMs;
437
+ }
406
438
  }
407
439
 
408
- // Aggregate by label
409
- if (timings[entry.label] !== undefined) {
410
- timings[entry.label] += durationMs;
411
- } else {
412
- timings[entry.label] = durationMs;
440
+ return timings;
441
+ } finally {
442
+ if (mapped && this.#readbackBuffer) {
443
+ this.#readbackBuffer.unmap();
413
444
  }
445
+ this.#destroyProfilingResources();
414
446
  }
415
-
416
- this.#readbackBuffer.unmap();
417
-
418
- // Clean up profiling resources after use
419
- this.#destroyProfilingResources();
420
-
421
- return timings;
422
447
  }
423
448
 
424
449
 
@@ -82,6 +82,7 @@ export function initDevice(): Promise<GPUDevice>;
82
82
 
83
83
  /**
84
84
  * Register an externally created GPU device for pipeline use.
85
+ * The active device epoch advances and loss handling is attached to the device.
85
86
  */
86
87
  export function setDevice(
87
88
  device: GPUDevice | null,
package/src/gpu/device.js CHANGED
@@ -28,16 +28,47 @@ function advanceDeviceEpoch() {
28
28
  deviceEpoch += 1;
29
29
  }
30
30
 
31
+ function clearActiveDeviceState() {
32
+ gpuDevice = null;
33
+ kernelCapabilities = null;
34
+ resolvedPlatformConfig = null;
35
+ platformInitialized = false;
36
+ }
37
+
31
38
  function isValidGPUBuffer(value) {
32
39
  if (!value) {
33
40
  return false;
34
41
  }
42
+ if (value.__dopplerFakeGPUBuffer === true) {
43
+ return true;
44
+ }
45
+ if (
46
+ typeof value === 'object'
47
+ && value.constructor?.name === 'FakeBuffer'
48
+ && typeof value.size === 'number'
49
+ && typeof value.usage === 'number'
50
+ && typeof value.destroy === 'function'
51
+ ) {
52
+ return true;
53
+ }
35
54
  if (typeof GPUBuffer === 'undefined') {
36
55
  return true;
37
56
  }
38
57
  return value instanceof GPUBuffer;
39
58
  }
40
59
 
60
+ function isUsableGPUDevice(device) {
61
+ return !!(
62
+ device
63
+ && typeof device.createBuffer === 'function'
64
+ && typeof device.createBindGroup === 'function'
65
+ && typeof device.createCommandEncoder === 'function'
66
+ && typeof device.createShaderModule === 'function'
67
+ && device.queue
68
+ && typeof device.queue.submit === 'function'
69
+ );
70
+ }
71
+
41
72
  function describeBindGroupBufferValue(value) {
42
73
  if (value === null) return 'null';
43
74
  if (value === undefined) return 'undefined';
@@ -84,6 +115,39 @@ function wrapDeviceCreateBindGroup(device) {
84
115
  return device;
85
116
  }
86
117
 
118
+ function registerDeviceLostHandler(device) {
119
+ if (!device || device.__dopplerLossHandlerRegistered) {
120
+ return device;
121
+ }
122
+
123
+ if (device.lost && typeof device.lost.then === 'function') {
124
+ const trackedDevice = device;
125
+ device.lost.then((info) => {
126
+ if (gpuDevice !== trackedDevice) {
127
+ return;
128
+ }
129
+ log.error('GPU', 'Device lost: ' + info.message + ', Reason: ' + info.reason);
130
+ clearActiveDeviceState();
131
+ advanceDeviceEpoch();
132
+ }).catch((error) => {
133
+ if (gpuDevice !== trackedDevice) {
134
+ return;
135
+ }
136
+ log.warn('GPU', 'Device lost handler failed: ' + (error?.message ?? error));
137
+ clearActiveDeviceState();
138
+ advanceDeviceEpoch();
139
+ });
140
+ }
141
+
142
+ Object.defineProperty(device, '__dopplerLossHandlerRegistered', {
143
+ value: true,
144
+ configurable: true,
145
+ enumerable: false,
146
+ writable: false,
147
+ });
148
+ return device;
149
+ }
150
+
87
151
 
88
152
  export const FEATURES = ({
89
153
  SHADER_F16: 'shader-f16',
@@ -219,7 +283,11 @@ async function initializePlatformAndRegistry(adapter) {
219
283
  export async function initDevice() {
220
284
  // Return cached device if available
221
285
  if (gpuDevice) {
222
- return gpuDevice;
286
+ if (isUsableGPUDevice(gpuDevice)) {
287
+ return gpuDevice;
288
+ }
289
+ clearActiveDeviceState();
290
+ advanceDeviceEpoch();
223
291
  }
224
292
 
225
293
  if (!isWebGPUAvailable()) {
@@ -258,18 +326,9 @@ export async function initDevice() {
258
326
  throw createDopplerError(ERROR_CODES.GPU_DEVICE_FAILED, 'Failed to create WebGPU device');
259
327
  }
260
328
  wrapDeviceCreateBindGroup(gpuDevice);
329
+ registerDeviceLostHandler(gpuDevice);
261
330
  advanceDeviceEpoch();
262
331
 
263
- // Set up device lost handler
264
- gpuDevice.lost.then((info) => {
265
- log.error('GPU', 'Device lost: ' + info.message + ', Reason: ' + info.reason);
266
- gpuDevice = null;
267
- kernelCapabilities = null;
268
- resolvedPlatformConfig = null;
269
- platformInitialized = false;
270
- advanceDeviceEpoch();
271
- });
272
-
273
332
  // Wrap queue for submit tracking (when enabled)
274
333
  wrapQueueForTracking(gpuDevice.queue);
275
334
 
@@ -301,16 +360,14 @@ export async function initDevice() {
301
360
 
302
361
  export function setDevice(device, options = {}) {
303
362
  if (!device) {
304
- gpuDevice = null;
305
- kernelCapabilities = null;
306
- resolvedPlatformConfig = null;
307
- platformInitialized = false;
363
+ clearActiveDeviceState();
308
364
  advanceDeviceEpoch();
309
365
  return;
310
366
  }
311
367
 
312
368
  gpuDevice = device;
313
369
  wrapDeviceCreateBindGroup(gpuDevice);
370
+ registerDeviceLostHandler(gpuDevice);
314
371
  advanceDeviceEpoch();
315
372
  wrapQueueForTracking(gpuDevice.queue);
316
373
 
@@ -372,10 +429,7 @@ export function isPlatformInitialized() {
372
429
  export function destroyDevice() {
373
430
  if (gpuDevice) {
374
431
  gpuDevice.destroy();
375
- gpuDevice = null;
376
- kernelCapabilities = null;
377
- resolvedPlatformConfig = null;
378
- platformInitialized = false;
432
+ clearActiveDeviceState();
379
433
  advanceDeviceEpoch();
380
434
  }
381
435
  }