@simulatte/doppler 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (316) hide show
  1. package/CHANGELOG.md +126 -0
  2. package/README.md +16 -23
  3. package/package.json +14 -1
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +26 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +8 -7
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.js +5 -36
  29. package/src/config/kernels/kernel-ref-digests.js +1 -1
  30. package/src/config/kernels/registry.js +14 -1
  31. package/src/config/kernels/registry.json +7 -5
  32. package/src/config/loader.d.ts +1 -1
  33. package/src/config/loader.js +12 -2
  34. package/src/config/merge-contract-check.js +59 -4
  35. package/src/config/merge-helpers.js +128 -7
  36. package/src/config/merge.d.ts +1 -0
  37. package/src/config/merge.js +10 -0
  38. package/src/config/param-validator.js +47 -2
  39. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  40. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  41. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  42. package/src/config/presets/kernel-paths/registry.json +29 -8
  43. package/src/config/presets/models/gemma2.json +2 -2
  44. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  45. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  46. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  47. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  48. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  49. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  50. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  51. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  52. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  53. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  54. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  55. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  56. package/src/config/runtime.js +6 -1
  57. package/src/config/schema/debug.schema.d.ts +5 -0
  58. package/src/config/schema/doppler.schema.js +16 -21
  59. package/src/config/schema/inference-defaults.schema.js +3 -3
  60. package/src/config/schema/kernel-path.schema.d.ts +5 -1
  61. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  62. package/src/config/schema/manifest.schema.d.ts +2 -1
  63. package/src/config/schema/manifest.schema.js +16 -3
  64. package/src/config/training-defaults.js +30 -22
  65. package/src/converter/conversion-plan.js +94 -9
  66. package/src/converter/core.d.ts +7 -0
  67. package/src/converter/core.js +14 -9
  68. package/src/converter/execution-v0-manifest.js +4 -1
  69. package/src/converter/index.d.ts +1 -0
  70. package/src/converter/index.js +1 -0
  71. package/src/converter/manifest-inference.js +43 -12
  72. package/src/converter/parsers/diffusion.js +0 -3
  73. package/src/converter/quantization-info.js +35 -15
  74. package/src/converter/shard-packer.d.ts +1 -1
  75. package/src/converter/shard-packer.js +4 -1
  76. package/src/debug/config.js +123 -11
  77. package/src/debug/signals.js +7 -1
  78. package/src/debug/tensor.d.ts +2 -0
  79. package/src/debug/tensor.js +13 -2
  80. package/src/distribution/p2p-control-plane.js +52 -12
  81. package/src/distribution/p2p-observability.js +43 -7
  82. package/src/distribution/p2p-webrtc-browser.js +20 -0
  83. package/src/distribution/shard-delivery.js +77 -26
  84. package/src/formats/gguf/types.js +33 -16
  85. package/src/formats/rdrr/groups.d.ts +12 -4
  86. package/src/formats/rdrr/groups.js +3 -6
  87. package/src/formats/rdrr/parsing.js +39 -2
  88. package/src/formats/rdrr/types.d.ts +2 -1
  89. package/src/gpu/command-recorder.js +86 -61
  90. package/src/gpu/device.d.ts +1 -0
  91. package/src/gpu/device.js +73 -19
  92. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  93. package/src/gpu/kernel-tuner/cache.js +71 -4
  94. package/src/gpu/kernel-tuner/tuner.js +22 -4
  95. package/src/gpu/kernels/attention.js +15 -34
  96. package/src/gpu/kernels/backward/adam.js +62 -58
  97. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  98. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  99. package/src/gpu/kernels/cast.js +191 -149
  100. package/src/gpu/kernels/check-stop.js +33 -44
  101. package/src/gpu/kernels/conv2d.js +27 -17
  102. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  103. package/src/gpu/kernels/depthwise_conv2d.js +36 -26
  104. package/src/gpu/kernels/dequant.js +178 -126
  105. package/src/gpu/kernels/energy.d.ts +3 -21
  106. package/src/gpu/kernels/energy.js +111 -88
  107. package/src/gpu/kernels/feature-check.js +1 -1
  108. package/src/gpu/kernels/fused_ffn.js +84 -65
  109. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  110. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  111. package/src/gpu/kernels/gather.js +33 -15
  112. package/src/gpu/kernels/gelu.js +19 -11
  113. package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
  114. package/src/gpu/kernels/groupnorm.js +34 -23
  115. package/src/gpu/kernels/kv-quantize.js +5 -2
  116. package/src/gpu/kernels/layernorm.js +35 -19
  117. package/src/gpu/kernels/logit-merge.js +5 -3
  118. package/src/gpu/kernels/matmul.js +58 -39
  119. package/src/gpu/kernels/modulate.js +23 -15
  120. package/src/gpu/kernels/moe.js +221 -175
  121. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  122. package/src/gpu/kernels/relu.js +18 -10
  123. package/src/gpu/kernels/repeat_channels.js +25 -17
  124. package/src/gpu/kernels/residual.js +37 -27
  125. package/src/gpu/kernels/rmsnorm.js +57 -41
  126. package/src/gpu/kernels/rope.js +3 -0
  127. package/src/gpu/kernels/sample.js +27 -38
  128. package/src/gpu/kernels/sana_linear_attention.js +18 -10
  129. package/src/gpu/kernels/scale.js +18 -11
  130. package/src/gpu/kernels/shader-cache.js +4 -2
  131. package/src/gpu/kernels/silu.js +120 -72
  132. package/src/gpu/kernels/softmax.js +44 -25
  133. package/src/gpu/kernels/split_qkv.js +23 -13
  134. package/src/gpu/kernels/transpose.js +18 -10
  135. package/src/gpu/kernels/transpose.wgsl +5 -3
  136. package/src/gpu/kernels/upsample2d.js +21 -13
  137. package/src/gpu/kernels/utils.js +20 -13
  138. package/src/gpu/partitioned-buffer-pool.js +10 -2
  139. package/src/gpu/perf-guards.js +2 -9
  140. package/src/gpu/profiler.js +27 -22
  141. package/src/gpu/readback-utils.d.ts +16 -0
  142. package/src/gpu/readback-utils.js +41 -0
  143. package/src/gpu/submit-tracker.js +13 -0
  144. package/src/gpu/uniform-cache.d.ts +1 -0
  145. package/src/gpu/uniform-cache.js +30 -9
  146. package/src/hotswap/intent-bundle.js +6 -0
  147. package/src/hotswap/manifest.d.ts +10 -1
  148. package/src/hotswap/manifest.js +12 -2
  149. package/src/hotswap/runtime.js +30 -8
  150. package/src/index-browser.d.ts +44 -0
  151. package/src/index-browser.js +14 -0
  152. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  153. package/src/inference/browser-harness-contract-helpers.js +28 -0
  154. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  155. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  156. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  157. package/src/inference/browser-harness-model-helpers.js +217 -0
  158. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  159. package/src/inference/browser-harness-report-helpers.js +42 -0
  160. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  161. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  162. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  163. package/src/inference/browser-harness-suite-helpers.js +268 -0
  164. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  165. package/src/inference/browser-harness-text-helpers.js +788 -0
  166. package/src/inference/browser-harness.d.ts +6 -0
  167. package/src/inference/browser-harness.js +130 -1996
  168. package/src/inference/kv-cache/base.js +140 -94
  169. package/src/inference/kv-cache/tiered.js +5 -3
  170. package/src/inference/moe-router.js +88 -56
  171. package/src/inference/multi-model-network.js +5 -3
  172. package/src/inference/network-evolution.d.ts +11 -2
  173. package/src/inference/network-evolution.js +20 -21
  174. package/src/inference/pipelines/context.d.ts +3 -0
  175. package/src/inference/pipelines/context.js +142 -2
  176. package/src/inference/pipelines/diffusion/helpers.js +7 -2
  177. package/src/inference/pipelines/diffusion/pipeline.js +2 -1
  178. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  179. package/src/inference/pipelines/diffusion/vae.js +3 -7
  180. package/src/inference/pipelines/energy/pipeline.js +27 -21
  181. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  182. package/src/inference/pipelines/energy/quintel.js +11 -0
  183. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  184. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  185. package/src/inference/pipelines/text/attention/projections.js +151 -101
  186. package/src/inference/pipelines/text/attention/record.js +62 -8
  187. package/src/inference/pipelines/text/attention/run.js +62 -8
  188. package/src/inference/pipelines/text/config.js +3 -4
  189. package/src/inference/pipelines/text/embed.js +2 -8
  190. package/src/inference/pipelines/text/execution-plan.js +41 -19
  191. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  192. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  193. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  194. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  195. package/src/inference/pipelines/text/execution-v0.js +62 -1013
  196. package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
  197. package/src/inference/pipelines/text/generator-steps.js +298 -207
  198. package/src/inference/pipelines/text/generator.js +6 -23
  199. package/src/inference/pipelines/text/init.js +78 -20
  200. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  201. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  202. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  203. package/src/inference/pipelines/text/layer.js +3 -9
  204. package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
  205. package/src/inference/pipelines/text/linear-attention.js +80 -6
  206. package/src/inference/pipelines/text/logits/gpu.js +10 -5
  207. package/src/inference/pipelines/text/logits/index.js +10 -11
  208. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  209. package/src/inference/pipelines/text/logits/utils.js +9 -0
  210. package/src/inference/pipelines/text/lora-apply.js +50 -32
  211. package/src/inference/pipelines/text/model-load.js +279 -104
  212. package/src/inference/pipelines/text/moe-cache.js +5 -4
  213. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  214. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  215. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  216. package/src/inference/pipelines/text/ops.js +90 -90
  217. package/src/inference/pipelines/text/probes.js +9 -9
  218. package/src/inference/pipelines/text/weights.js +17 -7
  219. package/src/inference/pipelines/text.js +13 -1
  220. package/src/inference/speculative.d.ts +2 -2
  221. package/src/inference/speculative.js +4 -18
  222. package/src/inference/test-harness.d.ts +1 -1
  223. package/src/inference/test-harness.js +15 -5
  224. package/src/inference/tokenizer.d.ts +0 -5
  225. package/src/inference/tokenizer.js +4 -23
  226. package/src/inference/tokenizers/bpe.js +9 -0
  227. package/src/inference/tokenizers/bundled.js +20 -0
  228. package/src/inference/tokenizers/sentencepiece.js +12 -0
  229. package/src/loader/doppler-loader.js +38 -22
  230. package/src/loader/dtype-utils.js +3 -44
  231. package/src/loader/embedding-loader.js +7 -3
  232. package/src/loader/experts/expert-cache.js +13 -6
  233. package/src/loader/experts/expert-loader.js +10 -6
  234. package/src/loader/final-weights-loader.js +8 -4
  235. package/src/loader/layer-loader.js +2 -1
  236. package/src/loader/loader-state.js +2 -2
  237. package/src/loader/memory-monitor.js +8 -0
  238. package/src/loader/multi-model-loader.d.ts +14 -0
  239. package/src/loader/multi-model-loader.js +70 -24
  240. package/src/loader/shard-cache.js +81 -12
  241. package/src/loader/shard-resolver.js +25 -3
  242. package/src/loader/tensors/tensor-loader.js +209 -144
  243. package/src/loader/tensors/tensor-reader.js +76 -19
  244. package/src/loader/weight-downcast.js +1 -1
  245. package/src/memory/buffer-pool.d.ts +9 -1
  246. package/src/memory/buffer-pool.js +109 -44
  247. package/src/memory/unified-detect.js +1 -1
  248. package/src/rules/inference/kernel-path.rules.json +24 -8
  249. package/src/rules/rule-registry.js +25 -1
  250. package/src/storage/backends/opfs-store.js +68 -24
  251. package/src/storage/downloader.js +364 -83
  252. package/src/storage/index.d.ts +3 -0
  253. package/src/storage/index.js +3 -0
  254. package/src/storage/preflight.d.ts +2 -2
  255. package/src/storage/preflight.js +24 -2
  256. package/src/storage/quickstart-downloader.js +11 -5
  257. package/src/storage/registry.js +10 -4
  258. package/src/storage/reports.js +1 -1
  259. package/src/storage/shard-manager.d.ts +15 -1
  260. package/src/storage/shard-manager.js +51 -3
  261. package/src/storage/source-artifact-store.d.ts +52 -0
  262. package/src/storage/source-artifact-store.js +234 -0
  263. package/src/tooling/command-api-constants.d.ts +9 -0
  264. package/src/tooling/command-api-constants.js +9 -0
  265. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  266. package/src/tooling/command-api-family-normalizers.js +343 -0
  267. package/src/tooling/command-api-helpers.d.ts +25 -0
  268. package/src/tooling/command-api-helpers.js +262 -0
  269. package/src/tooling/command-api.js +16 -602
  270. package/src/tooling/command-envelope.js +4 -1
  271. package/src/tooling/command-runner-shared.js +52 -18
  272. package/src/tooling/lean-execution-contract.js +150 -3
  273. package/src/tooling/node-browser-command-runner.js +161 -271
  274. package/src/tooling/node-command-runner.js +29 -3
  275. package/src/tooling/node-converter.js +27 -1
  276. package/src/tooling/node-source-runtime.d.ts +1 -1
  277. package/src/tooling/node-source-runtime.js +84 -3
  278. package/src/tooling/node-webgpu.js +24 -21
  279. package/src/tooling/opfs-cache.js +21 -4
  280. package/src/tooling/runtime-input-composition.d.ts +38 -0
  281. package/src/tooling/runtime-input-composition.js +86 -0
  282. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  283. package/src/tooling/source-runtime-bundle.js +261 -34
  284. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  285. package/src/tooling/source-runtime-materializer.js +93 -0
  286. package/src/training/attention-backward.js +32 -17
  287. package/src/training/autograd.js +80 -52
  288. package/src/training/checkpoint-watch.d.ts +2 -1
  289. package/src/training/checkpoint-watch.js +39 -6
  290. package/src/training/checkpoint.js +40 -11
  291. package/src/training/clip.js +2 -1
  292. package/src/training/datasets/token-batch.js +20 -8
  293. package/src/training/distillation/checkpoint-watch.js +1 -0
  294. package/src/training/distillation/student-fixture.d.ts +22 -0
  295. package/src/training/distillation/student-fixture.js +846 -0
  296. package/src/training/distillation/suite-data.d.ts +45 -0
  297. package/src/training/distillation/suite-data.js +189 -0
  298. package/src/training/lora-pipeline.js +4 -7
  299. package/src/training/lora.js +26 -12
  300. package/src/training/loss.js +5 -6
  301. package/src/training/objectives/cross_entropy.js +2 -5
  302. package/src/training/objectives/distill_kd.js +4 -8
  303. package/src/training/objectives/distill_triplet.js +4 -8
  304. package/src/training/objectives/ul_stage2_base.js +4 -8
  305. package/src/training/operator-command.js +2 -0
  306. package/src/training/optimizer.js +19 -7
  307. package/src/training/runner.js +2 -1
  308. package/src/training/suite.js +18 -978
  309. package/src/training/tensor-factory.d.ts +9 -0
  310. package/src/training/tensor-factory.js +13 -0
  311. package/src/training/trainer.js +3 -5
  312. package/src/training/ul_dataset.js +3 -5
  313. package/src/training/workloads.js +70 -79
  314. package/src/version.js +1 -1
  315. package/tools/convert-safetensors-node.js +22 -16
  316. package/tools/doppler-cli.js +44 -25
@@ -1,13 +1,26 @@
1
1
 
2
2
 
3
3
  import { getDevice } from '../device.js';
4
- import { acquireBuffer } from '../../memory/buffer-pool.js';
4
+ import { acquireBuffer, releaseBuffer } from '../../memory/buffer-pool.js';
5
5
  import { createTensor, dtypeBytes } from '../tensor.js';
6
6
  import { WORKGROUP_SIZES } from './constants.js';
7
7
  import { dispatch, recordDispatch } from './dispatch.js';
8
8
  import { getPipelineFast, createUniformBufferWithView } from './utils.js';
9
9
  import { selectRuleValue } from './rule-registry.js';
10
10
 
11
+ function destroyAfterSubmit(device, buffer) {
12
+ if (!buffer) {
13
+ return;
14
+ }
15
+ device.queue.onSubmittedWorkDone()
16
+ .then(() => {
17
+ buffer.destroy();
18
+ })
19
+ .catch(() => {
20
+ buffer.destroy();
21
+ });
22
+ }
23
+
11
24
  function canUseF16(input) {
12
25
  return input.dtype === 'f16';
13
26
  }
@@ -47,6 +60,12 @@ function createSiLUBindGroupEntries(uniformBuffer, input, output, gate) {
47
60
  ];
48
61
  }
49
62
 
63
+ function cleanupRunResources(uniformBuffer, ownedOutput) {
64
+ if (ownedOutput) {
65
+ releaseBuffer(ownedOutput);
66
+ }
67
+ }
68
+
50
69
  function planSiLUDispatch(device, size, useVec4) {
51
70
  const maxPerDim = Number.isFinite(device?.limits?.maxComputeWorkgroupsPerDimension)
52
71
  ? device.limits.maxComputeWorkgroupsPerDimension
@@ -97,6 +116,7 @@ export async function runSiLU(
97
116
  const inferredSize = size || (input.buffer.size / bytesPerElement);
98
117
  const outputSize = inferredSize * bytesPerElement;
99
118
  const output = outputBuffer || acquireBuffer(outputSize, undefined, 'silu_output');
119
+ const ownedOutput = outputBuffer ? null : output;
100
120
  const dispatchPlan = planSiLUDispatch(device, inferredSize, useVec4);
101
121
 
102
122
  // Create uniform buffer
@@ -116,17 +136,21 @@ export async function runSiLU(
116
136
  // Create bind group using helper
117
137
  const entries = createSiLUBindGroupEntries(uniformBuffer, input, output, gate);
118
138
 
119
- const bindGroup = device.createBindGroup({
120
- label: 'silu_bind_group',
121
- layout: pipeline.getBindGroupLayout(0),
122
- entries,
123
- });
124
-
125
- dispatch(device, pipeline, bindGroup, dispatchPlan.workgroups, 'silu');
126
-
127
- uniformBuffer.destroy();
128
-
129
- return createTensor(output, input.dtype, [inferredSize], 'silu_output');
139
+ try {
140
+ const bindGroup = device.createBindGroup({
141
+ label: 'silu_bind_group',
142
+ layout: pipeline.getBindGroupLayout(0),
143
+ entries,
144
+ });
145
+
146
+ dispatch(device, pipeline, bindGroup, dispatchPlan.workgroups, 'silu');
147
+ return createTensor(output, input.dtype, [inferredSize], 'silu_output');
148
+ } catch (error) {
149
+ cleanupRunResources(null, ownedOutput);
150
+ throw error;
151
+ } finally {
152
+ destroyAfterSubmit(device, uniformBuffer);
153
+ }
130
154
  }
131
155
 
132
156
 
@@ -148,6 +172,7 @@ export async function runSwiGLURowsplitBias(
148
172
  const bytesPerElement = dtypeBytes(input.dtype);
149
173
  const outputSize = numTokens * dim * bytesPerElement;
150
174
  const output = outputBuffer || acquireBuffer(outputSize, undefined, 'swiglu_output');
175
+ const ownedOutput = outputBuffer ? null : output;
151
176
 
152
177
  // Create uniform buffer
153
178
  const uniformBuffer = createUniformBufferWithView(
@@ -164,23 +189,27 @@ export async function runSwiGLURowsplitBias(
164
189
  );
165
190
 
166
191
  // Create bind group
167
- const bindGroup = device.createBindGroup({
168
- label: 'swiglu_bind_group',
169
- layout: pipeline.getBindGroupLayout(0),
170
- entries: [
171
- { binding: 0, resource: { buffer: uniformBuffer } },
172
- { binding: 1, resource: { buffer: input.buffer } },
173
- { binding: 2, resource: { buffer: bias.buffer } },
174
- { binding: 3, resource: { buffer: output } },
175
- ],
176
- });
177
-
178
- const workgroups = Math.ceil((numTokens * dim) / WORKGROUP_SIZES.DEFAULT);
179
- dispatch(device, pipeline, bindGroup, workgroups, 'swiglu');
180
-
181
- uniformBuffer.destroy();
182
-
183
- return createTensor(output, input.dtype, [numTokens, dim], 'swiglu_output');
192
+ try {
193
+ const bindGroup = device.createBindGroup({
194
+ label: 'swiglu_bind_group',
195
+ layout: pipeline.getBindGroupLayout(0),
196
+ entries: [
197
+ { binding: 0, resource: { buffer: uniformBuffer } },
198
+ { binding: 1, resource: { buffer: input.buffer } },
199
+ { binding: 2, resource: { buffer: bias.buffer } },
200
+ { binding: 3, resource: { buffer: output } },
201
+ ],
202
+ });
203
+
204
+ const workgroups = Math.ceil((numTokens * dim) / WORKGROUP_SIZES.DEFAULT);
205
+ dispatch(device, pipeline, bindGroup, workgroups, 'swiglu');
206
+ return createTensor(output, input.dtype, [numTokens, dim], 'swiglu_output');
207
+ } catch (error) {
208
+ cleanupRunResources(null, ownedOutput);
209
+ throw error;
210
+ } finally {
211
+ destroyAfterSubmit(device, uniformBuffer);
212
+ }
184
213
  }
185
214
 
186
215
 
@@ -202,6 +231,7 @@ export async function runSiLURowSplit(
202
231
 
203
232
  const outputSize = numTokens * dim * bytesPerElement;
204
233
  const output = outputBuffer || acquireBuffer(outputSize, undefined, 'silu_rowsplit_output');
234
+ const ownedOutput = outputBuffer ? null : output;
205
235
 
206
236
  // Create uniform buffer
207
237
  const uniformBuffer = createUniformBufferWithView(
@@ -218,24 +248,28 @@ export async function runSiLURowSplit(
218
248
  );
219
249
 
220
250
  // Bind group: provide a dummy gate buffer to satisfy the fixed layout
221
- const gateBuffer = input.buffer;
222
- const bindGroup = device.createBindGroup({
223
- label: 'silu_rowsplit_bind_group',
224
- layout: pipeline.getBindGroupLayout(0),
225
- entries: [
226
- { binding: 0, resource: { buffer: uniformBuffer } },
227
- { binding: 1, resource: { buffer: input.buffer } },
228
- { binding: 2, resource: { buffer: output } },
229
- { binding: 3, resource: { buffer: gateBuffer } },
230
- ],
231
- });
232
-
233
- const workgroups = [Math.ceil(dim / WORKGROUP_SIZES.DEFAULT), numTokens, 1];
234
- dispatch(device, pipeline, bindGroup, workgroups, 'silu_rowsplit');
235
-
236
- uniformBuffer.destroy();
237
-
238
- return createTensor(output, input.dtype, [numTokens, dim], 'silu_rowsplit_output');
251
+ try {
252
+ const gateBuffer = input.buffer;
253
+ const bindGroup = device.createBindGroup({
254
+ label: 'silu_rowsplit_bind_group',
255
+ layout: pipeline.getBindGroupLayout(0),
256
+ entries: [
257
+ { binding: 0, resource: { buffer: uniformBuffer } },
258
+ { binding: 1, resource: { buffer: input.buffer } },
259
+ { binding: 2, resource: { buffer: output } },
260
+ { binding: 3, resource: { buffer: gateBuffer } },
261
+ ],
262
+ });
263
+
264
+ const workgroups = [Math.ceil(dim / WORKGROUP_SIZES.DEFAULT), numTokens, 1];
265
+ dispatch(device, pipeline, bindGroup, workgroups, 'silu_rowsplit');
266
+ return createTensor(output, input.dtype, [numTokens, dim], 'silu_rowsplit_output');
267
+ } catch (error) {
268
+ cleanupRunResources(null, ownedOutput);
269
+ throw error;
270
+ } finally {
271
+ uniformBuffer.destroy();
272
+ }
239
273
  }
240
274
 
241
275
 
@@ -258,6 +292,7 @@ export async function recordSiLURowSplit(
258
292
 
259
293
  const outputSize = numTokens * dim * bytesPerElement;
260
294
  const output = outputBuffer || acquireBuffer(outputSize, undefined, 'silu_rowsplit_output');
295
+ const ownedOutput = outputBuffer ? null : output;
261
296
 
262
297
  // Uniform buffer
263
298
  const uniformBuffer = createUniformBufferWithView(
@@ -272,22 +307,28 @@ export async function recordSiLURowSplit(
272
307
  recorder
273
308
  );
274
309
 
275
- const gateBuffer = input.buffer;
276
- const bindGroup = device.createBindGroup({
277
- label: 'silu_rowsplit_bind_group',
278
- layout: pipeline.getBindGroupLayout(0),
279
- entries: [
280
- { binding: 0, resource: { buffer: uniformBuffer } },
281
- { binding: 1, resource: { buffer: input.buffer } },
282
- { binding: 2, resource: { buffer: output } },
283
- { binding: 3, resource: { buffer: gateBuffer } },
284
- ],
285
- });
286
-
287
- const workgroups = [Math.ceil(dim / WORKGROUP_SIZES.DEFAULT), numTokens, 1];
288
- recordDispatch(recorder, pipeline, bindGroup, workgroups, 'silu_rowsplit');
289
-
290
- return createTensor(output, input.dtype, [numTokens, dim], 'silu_rowsplit_output');
310
+ try {
311
+ const gateBuffer = input.buffer;
312
+ const bindGroup = device.createBindGroup({
313
+ label: 'silu_rowsplit_bind_group',
314
+ layout: pipeline.getBindGroupLayout(0),
315
+ entries: [
316
+ { binding: 0, resource: { buffer: uniformBuffer } },
317
+ { binding: 1, resource: { buffer: input.buffer } },
318
+ { binding: 2, resource: { buffer: output } },
319
+ { binding: 3, resource: { buffer: gateBuffer } },
320
+ ],
321
+ });
322
+
323
+ const workgroups = [Math.ceil(dim / WORKGROUP_SIZES.DEFAULT), numTokens, 1];
324
+ recordDispatch(recorder, pipeline, bindGroup, workgroups, 'silu_rowsplit');
325
+ return createTensor(output, input.dtype, [numTokens, dim], 'silu_rowsplit_output');
326
+ } catch (error) {
327
+ if (ownedOutput) {
328
+ releaseBuffer(ownedOutput);
329
+ }
330
+ throw error;
331
+ }
291
332
  }
292
333
 
293
334
 
@@ -328,6 +369,7 @@ export async function recordSiLU(
328
369
  const inferredSize = size || (input.buffer.size / bytesPerElement);
329
370
  const outputSize = inferredSize * bytesPerElement;
330
371
  const output = outputBuffer || acquireBuffer(outputSize, undefined, 'silu_output');
372
+ const ownedOutput = outputBuffer ? null : output;
331
373
  const dispatchPlan = planSiLUDispatch(device, inferredSize, false);
332
374
 
333
375
  // Uniform buffer
@@ -346,13 +388,19 @@ export async function recordSiLU(
346
388
  // Create bind group using helper
347
389
  const entries = createSiLUBindGroupEntries(uniformBuffer, input, output, gate);
348
390
 
349
- const bindGroup = device.createBindGroup({
350
- label: 'silu_bind_group',
351
- layout: pipeline.getBindGroupLayout(0),
352
- entries,
353
- });
354
-
355
- recordDispatch(recorder, pipeline, bindGroup, dispatchPlan.workgroups, 'silu');
356
-
357
- return createTensor(output, input.dtype, [inferredSize], 'silu_output');
391
+ try {
392
+ const bindGroup = device.createBindGroup({
393
+ label: 'silu_bind_group',
394
+ layout: pipeline.getBindGroupLayout(0),
395
+ entries,
396
+ });
397
+
398
+ recordDispatch(recorder, pipeline, bindGroup, dispatchPlan.workgroups, 'silu');
399
+ return createTensor(output, input.dtype, [inferredSize], 'silu_output');
400
+ } catch (error) {
401
+ if (ownedOutput) {
402
+ releaseBuffer(ownedOutput);
403
+ }
404
+ throw error;
405
+ }
358
406
  }
@@ -1,6 +1,6 @@
1
1
 
2
2
  import { getKernelCapabilities } from '../device.js';
3
- import { acquireBuffer } from '../../memory/buffer-pool.js';
3
+ import { acquireBuffer, releaseBuffer } from '../../memory/buffer-pool.js';
4
4
  import { createTensor } from '../tensor.js';
5
5
  import { unifiedKernelWrapper } from './utils.js';
6
6
  import { createPipeline, createUniformBufferWithView, createBindGroupWithValidation } from './utils.js';
@@ -20,23 +20,34 @@ function selectSoftmaxVariant(innerSize) {
20
20
 
21
21
  async function _softmax(target, input, axis, options = {}) {
22
22
  const { batchSize = 1, size, seqLen, temperature = 1.0, outputBuffer = null } = options;
23
+ if (input.dtype !== 'f32') {
24
+ throw new Error(`Softmax requires f32 input, got ${input.dtype}.`);
25
+ }
23
26
 
24
- const bytesPerElement = input.dtype === 'f16' ? 2 : 4;
27
+ const bytesPerElement = 4;
25
28
  const inferredSize = size || seqLen || (input.buffer.size / (batchSize * bytesPerElement));
26
29
  const variant = selectSoftmaxVariant(inferredSize);
27
30
  trace.kernels(`Softmax: size=${inferredSize}, variant=${variant}`);
28
31
 
29
32
  const outputSize = batchSize * inferredSize * bytesPerElement;
30
33
  const output = outputBuffer || acquireBuffer(outputSize, undefined, 'softmax_output');
34
+ const ownedOutput = outputBuffer ? null : output;
35
+
36
+ try {
37
+ await unifiedKernelWrapper(
38
+ 'softmax', target, variant,
39
+ [input, output],
40
+ { inner_size: inferredSize, outer_size: batchSize, temperature },
41
+ batchSize
42
+ );
43
+ } catch (error) {
44
+ if (ownedOutput) {
45
+ releaseBuffer(ownedOutput);
46
+ }
47
+ throw error;
48
+ }
31
49
 
32
- await unifiedKernelWrapper(
33
- 'softmax', target, variant,
34
- [input, output],
35
- { inner_size: inferredSize, outer_size: batchSize, temperature },
36
- batchSize
37
- );
38
-
39
- return createTensor(output, input.dtype, [batchSize, inferredSize], 'softmax_output');
50
+ return createTensor(output, 'f32', [batchSize, inferredSize], 'softmax_output');
40
51
  }
41
52
 
42
53
  export async function runSoftmax(input, axis, options = {}) {
@@ -76,6 +87,7 @@ export async function runSoftmaxTopK(logits, numTokens, numExperts, topK, option
76
87
 
77
88
  const indices = acquireBuffer(indicesSize, undefined, 'softmax_topk_indices');
78
89
  const weights = acquireBuffer(weightsSize, undefined, 'softmax_topk_weights');
90
+ let completed = false;
79
91
 
80
92
  const uniformBuffer = createUniformBufferWithView(
81
93
  'softmax_topk_uniforms', 16,
@@ -88,19 +100,26 @@ export async function runSoftmaxTopK(logits, numTokens, numExperts, topK, option
88
100
  null, device
89
101
  );
90
102
 
91
- const bindGroup = await createBindGroupWithValidation(device, {
92
- label: 'softmax_topk_bind_group',
93
- layout: pipeline.getBindGroupLayout(0),
94
- entries: [
95
- { binding: 0, resource: { buffer: uniformBuffer } },
96
- { binding: 1, resource: { buffer: logits } },
97
- { binding: 2, resource: { buffer: indices } },
98
- { binding: 3, resource: { buffer: weights } },
99
- ],
100
- }, `topk:${variant}`);
101
-
102
- dispatchKernel(null, pipeline, bindGroup, numTokens, 'softmax_topk');
103
- uniformBuffer.destroy();
104
-
105
- return { indices, weights };
103
+ try {
104
+ const bindGroup = await createBindGroupWithValidation(device, {
105
+ label: 'softmax_topk_bind_group',
106
+ layout: pipeline.getBindGroupLayout(0),
107
+ entries: [
108
+ { binding: 0, resource: { buffer: uniformBuffer } },
109
+ { binding: 1, resource: { buffer: logits } },
110
+ { binding: 2, resource: { buffer: indices } },
111
+ { binding: 3, resource: { buffer: weights } },
112
+ ],
113
+ }, `topk:${variant}`);
114
+
115
+ dispatchKernel(null, pipeline, bindGroup, numTokens, 'softmax_topk');
116
+ completed = true;
117
+ return { indices, weights };
118
+ } finally {
119
+ uniformBuffer.destroy();
120
+ if (!completed) {
121
+ releaseBuffer(indices);
122
+ releaseBuffer(weights);
123
+ }
124
+ }
106
125
  }
@@ -1,5 +1,5 @@
1
1
 
2
- import { acquireBuffer } from '../../memory/buffer-pool.js';
2
+ import { acquireBuffer, releaseBuffer } from '../../memory/buffer-pool.js';
3
3
  import { createTensor, dtypeBytes } from '../tensor.js';
4
4
  import { WORKGROUP_SIZES } from './constants.js';
5
5
  import { unifiedKernelWrapper } from './utils.js';
@@ -7,6 +7,9 @@ import { selectRuleValue } from './rule-registry.js';
7
7
 
8
8
  async function _splitQKV(target, qkvTensor, options) {
9
9
  const { numTokens, qSize, kSize, vSize, qTensor = null, kTensor = null, vTensor = null } = options;
10
+ const ownsQ = qTensor == null;
11
+ const ownsK = kTensor == null;
12
+ const ownsV = vTensor == null;
10
13
 
11
14
  const outputDtype = qkvTensor.dtype;
12
15
  const pipelineVariant = selectRuleValue('splitQkv', 'variant', { outputDtype });
@@ -18,18 +21,25 @@ async function _splitQKV(target, qkvTensor, options) {
18
21
 
19
22
  const totalElements = numTokens * (qSize + kSize + vSize);
20
23
 
21
- await unifiedKernelWrapper(
22
- 'split_qkv', target, pipelineVariant,
23
- [qkvTensor, qBuffer, kBuffer, vBuffer],
24
- { num_tokens: numTokens, q_size: qSize, k_size: kSize, v_size: vSize },
25
- Math.ceil(totalElements / WORKGROUP_SIZES.DEFAULT)
26
- );
27
-
28
- const Q = qTensor || createTensor(qBuffer, outputDtype, [numTokens, qSize], 'Q');
29
- const K = kTensor || createTensor(kBuffer, outputDtype, [numTokens, kSize], 'K');
30
- const V = vTensor || createTensor(vBuffer, outputDtype, [numTokens, vSize], 'V');
31
-
32
- return { Q, K, V };
24
+ try {
25
+ await unifiedKernelWrapper(
26
+ 'split_qkv', target, pipelineVariant,
27
+ [qkvTensor, qBuffer, kBuffer, vBuffer],
28
+ { num_tokens: numTokens, q_size: qSize, k_size: kSize, v_size: vSize },
29
+ Math.ceil(totalElements / WORKGROUP_SIZES.DEFAULT)
30
+ );
31
+
32
+ const Q = qTensor || createTensor(qBuffer, outputDtype, [numTokens, qSize], 'Q');
33
+ const K = kTensor || createTensor(kBuffer, outputDtype, [numTokens, kSize], 'K');
34
+ const V = vTensor || createTensor(vBuffer, outputDtype, [numTokens, vSize], 'V');
35
+
36
+ return { Q, K, V };
37
+ } catch (error) {
38
+ if (ownsQ) releaseBuffer(qBuffer);
39
+ if (ownsK) releaseBuffer(kBuffer);
40
+ if (ownsV) releaseBuffer(vBuffer);
41
+ throw error;
42
+ }
33
43
  }
34
44
 
35
45
  export async function runSplitQKV(qkvTensor, options) {
@@ -1,4 +1,4 @@
1
- import { acquireBuffer } from '../../memory/buffer-pool.js';
1
+ import { acquireBuffer, releaseBuffer } from '../../memory/buffer-pool.js';
2
2
  import { createTensor, dtypeBytes } from '../tensor.js';
3
3
  import { WORKGROUP_SIZES } from './constants.js';
4
4
  import { unifiedKernelWrapper } from './utils.js';
@@ -20,18 +20,26 @@ async function _transpose(target, input, rows, cols, options = {}) {
20
20
  const bytesPerElement = dtypeBytes(input.dtype);
21
21
  const outputSize = rows * cols * bytesPerElement;
22
22
  const outputBuf = outputBuffer || acquireBuffer(outputSize, undefined, 'transpose_output');
23
+ const ownedOutput = outputBuffer ? null : outputBuf;
23
24
  const dispatchPlan = planTransposeDispatch(target, cols);
24
25
 
25
- await unifiedKernelWrapper(
26
- 'transpose',
27
- target,
28
- 'default',
29
- [input, outputBuf],
30
- { rows, cols, _pad0: dispatchPlan.dispatchStride, _pad1: 0 },
31
- [dispatchPlan.workgroups[0], rows, 1]
32
- );
26
+ try {
27
+ await unifiedKernelWrapper(
28
+ 'transpose',
29
+ target,
30
+ 'default',
31
+ [input, outputBuf],
32
+ { rows, cols, _pad0: dispatchPlan.dispatchStride, _pad1: 0 },
33
+ [dispatchPlan.workgroups[0], rows, 1]
34
+ );
33
35
 
34
- return createTensor(outputBuf, input.dtype, [cols, rows], 'transpose_output');
36
+ return createTensor(outputBuf, input.dtype, [cols, rows], 'transpose_output');
37
+ } catch (error) {
38
+ if (ownedOutput) {
39
+ releaseBuffer(ownedOutput);
40
+ }
41
+ throw error;
42
+ }
35
43
  }
36
44
 
37
45
  export async function runTranspose(input, rows, cols, options = {}) {
@@ -20,11 +20,13 @@ struct Uniforms {
20
20
  @compute @workgroup_size(WORKGROUP_SIZE, 1, 1)
21
21
  fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
22
22
  let dispatch_stride = max(u._pad0, 1u);
23
- let row = gid.y;
24
- let col = gid.x + row * dispatch_stride;
25
- if (row >= u.rows || col >= u.cols) {
23
+ let linear_idx = gid.y * dispatch_stride + gid.x;
24
+ let total = u.rows * u.cols;
25
+ if (linear_idx >= total) {
26
26
  return;
27
27
  }
28
+ let row = linear_idx / u.cols;
29
+ let col = linear_idx % u.cols;
28
30
  let idx = row * u.cols + col;
29
31
  let out_idx = col * u.rows + row;
30
32
  output[out_idx] = input[idx];
@@ -1,4 +1,4 @@
1
- import { acquireBuffer } from '../../memory/buffer-pool.js';
1
+ import { acquireBuffer, releaseBuffer } from '../../memory/buffer-pool.js';
2
2
  import { createTensor, dtypeBytes } from '../tensor.js';
3
3
  import { unifiedKernelWrapper } from './utils.js';
4
4
  import { selectRuleValue } from './rule-registry.js';
@@ -35,19 +35,27 @@ async function _upsample2d(target, input, options = {}) {
35
35
  const bytesPerElement = dtypeBytes(input.dtype);
36
36
  const outputSize = channels * outHeight * outWidth * bytesPerElement;
37
37
  const output = outputBuffer || acquireBuffer(outputSize, undefined, 'upsample2d_output');
38
+ const ownedOutput = outputBuffer ? null : output;
38
39
 
39
- await unifiedKernelWrapper(
40
- 'upsample2d', target, selectUpsample2DVariant(input.dtype === 'f16'),
41
- [input, output],
42
- {
43
- channels, in_height: resolvedHeight, in_width: resolvedWidth,
44
- out_height: outHeight, out_width: outWidth, scale,
45
- _pad0: 0, _pad1: 0,
46
- },
47
- [Math.ceil(outSpatial / WORKGROUP_SIZES.DEFAULT), channels, 1]
48
- );
49
-
50
- return createTensor(output, input.dtype, [channels, outHeight, outWidth], 'upsample2d_output');
40
+ try {
41
+ await unifiedKernelWrapper(
42
+ 'upsample2d', target, selectUpsample2DVariant(input.dtype === 'f16'),
43
+ [input, output],
44
+ {
45
+ channels, in_height: resolvedHeight, in_width: resolvedWidth,
46
+ out_height: outHeight, out_width: outWidth, scale,
47
+ _pad0: 0, _pad1: 0,
48
+ },
49
+ [Math.ceil(outSpatial / WORKGROUP_SIZES.DEFAULT), channels, 1]
50
+ );
51
+
52
+ return createTensor(output, input.dtype, [channels, outHeight, outWidth], 'upsample2d_output');
53
+ } catch (error) {
54
+ if (ownedOutput) {
55
+ releaseBuffer(ownedOutput);
56
+ }
57
+ throw error;
58
+ }
51
59
  }
52
60
 
53
61
  export async function runUpsample2D(input, options = {}) {
@@ -137,21 +137,28 @@ export async function unifiedKernelWrapper(opName, target, variant, bindings, un
137
137
  });
138
138
  }
139
139
 
140
- const bindGroup = device.createBindGroup({
141
- label: `${opName}_bind_group`,
142
- layout: pipeline.getBindGroupLayout(0),
143
- entries: bindGroupEntries,
144
- });
145
-
146
- if (workgroups && typeof workgroups === 'object' && workgroups.indirectBuffer) {
147
- const indirectOffset = workgroups.indirectOffset ?? 0;
148
- if (recorder) {
149
- recordDispatchIndirect(recorder, pipeline, bindGroup, workgroups.indirectBuffer, indirectOffset, opName);
140
+ try {
141
+ const bindGroup = device.createBindGroup({
142
+ label: `${opName}_bind_group`,
143
+ layout: pipeline.getBindGroupLayout(0),
144
+ entries: bindGroupEntries,
145
+ });
146
+
147
+ if (workgroups && typeof workgroups === 'object' && workgroups.indirectBuffer) {
148
+ const indirectOffset = workgroups.indirectOffset ?? 0;
149
+ if (recorder) {
150
+ recordDispatchIndirect(recorder, pipeline, bindGroup, workgroups.indirectBuffer, indirectOffset, opName);
151
+ } else {
152
+ dispatchIndirect(device, pipeline, bindGroup, workgroups.indirectBuffer, indirectOffset, opName);
153
+ }
150
154
  } else {
151
- dispatchIndirect(device, pipeline, bindGroup, workgroups.indirectBuffer, indirectOffset, opName);
155
+ dispatchKernel(target, pipeline, bindGroup, workgroups, opName);
156
+ }
157
+ } catch (error) {
158
+ if (!recorder) {
159
+ uniformBuffer.destroy();
152
160
  }
153
- } else {
154
- dispatchKernel(target, pipeline, bindGroup, workgroups, opName);
161
+ throw error;
155
162
  }
156
163
 
157
164
  if (!recorder) {
@@ -11,10 +11,13 @@ export class PartitionedBufferPool {
11
11
 
12
12
  #expertPools;
13
13
 
14
+ #bufferOwners;
15
+
14
16
 
15
17
  constructor(partitions, schemaConfig = getRuntimeConfig().shared.bufferPool) {
16
18
  this.#sharedPool = new BufferPool(false, schemaConfig);
17
19
  this.#expertPools = new Map();
20
+ this.#bufferOwners = new WeakMap();
18
21
  for (const partition of partitions) {
19
22
  this.#expertPools.set(partition.id, new BufferPool(false, schemaConfig));
20
23
  }
@@ -28,12 +31,17 @@ export class PartitionedBufferPool {
28
31
  label
29
32
  ) {
30
33
  const pool = this.#expertPools.get(partitionId) || this.#sharedPool;
31
- return pool.acquire(size, usage, label);
34
+ const buffer = pool.acquire(size, usage, label);
35
+ this.#bufferOwners.set(buffer, pool);
36
+ return buffer;
32
37
  }
33
38
 
34
39
 
35
40
  release(partitionId, buffer) {
36
- const pool = this.#expertPools.get(partitionId) || this.#sharedPool;
41
+ const pool = this.#bufferOwners.get(buffer)
42
+ || this.#expertPools.get(partitionId)
43
+ || this.#sharedPool;
44
+ this.#bufferOwners.delete(buffer);
37
45
  pool.release(buffer);
38
46
  }
39
47
 
@@ -1,15 +1,8 @@
1
1
 
2
-
3
2
  import { log, trace } from '../debug/index.js';
3
+ import { DEFAULT_PERF_GUARDS_CONFIG } from '../config/schema/debug.schema.js';
4
4
 
5
- // Initial config uses inline defaults; caller should configure via configurePerfGuards()
6
- let config = {
7
- allowGPUReadback: true,
8
- trackSubmitCount: false,
9
- trackAllocations: false,
10
- logExpensiveOps: false,
11
- strictMode: false,
12
- };
5
+ let config = { ...DEFAULT_PERF_GUARDS_CONFIG };
13
6
 
14
7
 
15
8
  let counters = {