@simulatte/doppler 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (316) hide show
  1. package/CHANGELOG.md +126 -0
  2. package/README.md +16 -23
  3. package/package.json +14 -1
  4. package/src/adapters/adapter-registry.js +12 -1
  5. package/src/adapters/lora-loader.js +23 -6
  6. package/src/bridge/extension-client.d.ts +5 -0
  7. package/src/bridge/extension-client.js +40 -0
  8. package/src/bridge/index.d.ts +2 -1
  9. package/src/bridge/index.js +6 -4
  10. package/src/browser/browser-converter.js +26 -1
  11. package/src/browser/file-picker.js +6 -0
  12. package/src/browser/safetensors-parser-browser.js +84 -1
  13. package/src/browser/shard-io-browser.js +2 -2
  14. package/src/browser/tensor-source-download.js +8 -2
  15. package/src/browser/tensor-source-http.d.ts +1 -0
  16. package/src/browser/tensor-source-http.js +5 -1
  17. package/src/client/doppler-api.browser.js +20 -4
  18. package/src/client/doppler-api.js +19 -3
  19. package/src/client/doppler-provider/generation.js +12 -0
  20. package/src/client/doppler-provider/model-manager.d.ts +10 -0
  21. package/src/client/doppler-provider/model-manager.js +91 -19
  22. package/src/client/doppler-provider/source-runtime.d.ts +2 -1
  23. package/src/client/doppler-provider/source-runtime.js +132 -13
  24. package/src/client/doppler-registry.json +8 -7
  25. package/src/config/backward-registry-loader.js +17 -2
  26. package/src/config/execution-v0-contract-check.js +113 -15
  27. package/src/config/kernel-path-contract-check.js +57 -29
  28. package/src/config/kernel-path-loader.js +5 -36
  29. package/src/config/kernels/kernel-ref-digests.js +1 -1
  30. package/src/config/kernels/registry.js +14 -1
  31. package/src/config/kernels/registry.json +7 -5
  32. package/src/config/loader.d.ts +1 -1
  33. package/src/config/loader.js +12 -2
  34. package/src/config/merge-contract-check.js +59 -4
  35. package/src/config/merge-helpers.js +128 -7
  36. package/src/config/merge.d.ts +1 -0
  37. package/src/config/merge.js +10 -0
  38. package/src/config/param-validator.js +47 -2
  39. package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
  40. package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
  41. package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
  42. package/src/config/presets/kernel-paths/registry.json +29 -8
  43. package/src/config/presets/models/gemma2.json +2 -2
  44. package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
  45. package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
  46. package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
  47. package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
  48. package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
  49. package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
  50. package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
  51. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
  52. package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
  53. package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
  54. package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
  55. package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
  56. package/src/config/runtime.js +6 -1
  57. package/src/config/schema/debug.schema.d.ts +5 -0
  58. package/src/config/schema/doppler.schema.js +16 -21
  59. package/src/config/schema/inference-defaults.schema.js +3 -3
  60. package/src/config/schema/kernel-path.schema.d.ts +5 -1
  61. package/src/config/schema/kernel-thresholds.schema.js +12 -4
  62. package/src/config/schema/manifest.schema.d.ts +2 -1
  63. package/src/config/schema/manifest.schema.js +16 -3
  64. package/src/config/training-defaults.js +30 -22
  65. package/src/converter/conversion-plan.js +94 -9
  66. package/src/converter/core.d.ts +7 -0
  67. package/src/converter/core.js +14 -9
  68. package/src/converter/execution-v0-manifest.js +4 -1
  69. package/src/converter/index.d.ts +1 -0
  70. package/src/converter/index.js +1 -0
  71. package/src/converter/manifest-inference.js +43 -12
  72. package/src/converter/parsers/diffusion.js +0 -3
  73. package/src/converter/quantization-info.js +35 -15
  74. package/src/converter/shard-packer.d.ts +1 -1
  75. package/src/converter/shard-packer.js +4 -1
  76. package/src/debug/config.js +123 -11
  77. package/src/debug/signals.js +7 -1
  78. package/src/debug/tensor.d.ts +2 -0
  79. package/src/debug/tensor.js +13 -2
  80. package/src/distribution/p2p-control-plane.js +52 -12
  81. package/src/distribution/p2p-observability.js +43 -7
  82. package/src/distribution/p2p-webrtc-browser.js +20 -0
  83. package/src/distribution/shard-delivery.js +77 -26
  84. package/src/formats/gguf/types.js +33 -16
  85. package/src/formats/rdrr/groups.d.ts +12 -4
  86. package/src/formats/rdrr/groups.js +3 -6
  87. package/src/formats/rdrr/parsing.js +39 -2
  88. package/src/formats/rdrr/types.d.ts +2 -1
  89. package/src/gpu/command-recorder.js +86 -61
  90. package/src/gpu/device.d.ts +1 -0
  91. package/src/gpu/device.js +73 -19
  92. package/src/gpu/kernel-tuner/benchmarks.js +326 -316
  93. package/src/gpu/kernel-tuner/cache.js +71 -4
  94. package/src/gpu/kernel-tuner/tuner.js +22 -4
  95. package/src/gpu/kernels/attention.js +15 -34
  96. package/src/gpu/kernels/backward/adam.js +62 -58
  97. package/src/gpu/kernels/backward/attention_backward.js +257 -169
  98. package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
  99. package/src/gpu/kernels/cast.js +191 -149
  100. package/src/gpu/kernels/check-stop.js +33 -44
  101. package/src/gpu/kernels/conv2d.js +27 -17
  102. package/src/gpu/kernels/cross_entropy_loss.js +21 -15
  103. package/src/gpu/kernels/depthwise_conv2d.js +36 -26
  104. package/src/gpu/kernels/dequant.js +178 -126
  105. package/src/gpu/kernels/energy.d.ts +3 -21
  106. package/src/gpu/kernels/energy.js +111 -88
  107. package/src/gpu/kernels/feature-check.js +1 -1
  108. package/src/gpu/kernels/fused_ffn.js +84 -65
  109. package/src/gpu/kernels/fused_matmul_residual.js +56 -33
  110. package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
  111. package/src/gpu/kernels/gather.js +33 -15
  112. package/src/gpu/kernels/gelu.js +19 -11
  113. package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
  114. package/src/gpu/kernels/groupnorm.js +34 -23
  115. package/src/gpu/kernels/kv-quantize.js +5 -2
  116. package/src/gpu/kernels/layernorm.js +35 -19
  117. package/src/gpu/kernels/logit-merge.js +5 -3
  118. package/src/gpu/kernels/matmul.js +58 -39
  119. package/src/gpu/kernels/modulate.js +23 -15
  120. package/src/gpu/kernels/moe.js +221 -175
  121. package/src/gpu/kernels/pixel_shuffle.js +22 -14
  122. package/src/gpu/kernels/relu.js +18 -10
  123. package/src/gpu/kernels/repeat_channels.js +25 -17
  124. package/src/gpu/kernels/residual.js +37 -27
  125. package/src/gpu/kernels/rmsnorm.js +57 -41
  126. package/src/gpu/kernels/rope.js +3 -0
  127. package/src/gpu/kernels/sample.js +27 -38
  128. package/src/gpu/kernels/sana_linear_attention.js +18 -10
  129. package/src/gpu/kernels/scale.js +18 -11
  130. package/src/gpu/kernels/shader-cache.js +4 -2
  131. package/src/gpu/kernels/silu.js +120 -72
  132. package/src/gpu/kernels/softmax.js +44 -25
  133. package/src/gpu/kernels/split_qkv.js +23 -13
  134. package/src/gpu/kernels/transpose.js +18 -10
  135. package/src/gpu/kernels/transpose.wgsl +5 -3
  136. package/src/gpu/kernels/upsample2d.js +21 -13
  137. package/src/gpu/kernels/utils.js +20 -13
  138. package/src/gpu/partitioned-buffer-pool.js +10 -2
  139. package/src/gpu/perf-guards.js +2 -9
  140. package/src/gpu/profiler.js +27 -22
  141. package/src/gpu/readback-utils.d.ts +16 -0
  142. package/src/gpu/readback-utils.js +41 -0
  143. package/src/gpu/submit-tracker.js +13 -0
  144. package/src/gpu/uniform-cache.d.ts +1 -0
  145. package/src/gpu/uniform-cache.js +30 -9
  146. package/src/hotswap/intent-bundle.js +6 -0
  147. package/src/hotswap/manifest.d.ts +10 -1
  148. package/src/hotswap/manifest.js +12 -2
  149. package/src/hotswap/runtime.js +30 -8
  150. package/src/index-browser.d.ts +44 -0
  151. package/src/index-browser.js +14 -0
  152. package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
  153. package/src/inference/browser-harness-contract-helpers.js +28 -0
  154. package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
  155. package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
  156. package/src/inference/browser-harness-model-helpers.d.ts +16 -0
  157. package/src/inference/browser-harness-model-helpers.js +217 -0
  158. package/src/inference/browser-harness-report-helpers.d.ts +7 -0
  159. package/src/inference/browser-harness-report-helpers.js +42 -0
  160. package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
  161. package/src/inference/browser-harness-runtime-helpers.js +415 -0
  162. package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
  163. package/src/inference/browser-harness-suite-helpers.js +268 -0
  164. package/src/inference/browser-harness-text-helpers.d.ts +27 -0
  165. package/src/inference/browser-harness-text-helpers.js +788 -0
  166. package/src/inference/browser-harness.d.ts +6 -0
  167. package/src/inference/browser-harness.js +130 -1996
  168. package/src/inference/kv-cache/base.js +140 -94
  169. package/src/inference/kv-cache/tiered.js +5 -3
  170. package/src/inference/moe-router.js +88 -56
  171. package/src/inference/multi-model-network.js +5 -3
  172. package/src/inference/network-evolution.d.ts +11 -2
  173. package/src/inference/network-evolution.js +20 -21
  174. package/src/inference/pipelines/context.d.ts +3 -0
  175. package/src/inference/pipelines/context.js +142 -2
  176. package/src/inference/pipelines/diffusion/helpers.js +7 -2
  177. package/src/inference/pipelines/diffusion/pipeline.js +2 -1
  178. package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
  179. package/src/inference/pipelines/diffusion/vae.js +3 -7
  180. package/src/inference/pipelines/energy/pipeline.js +27 -21
  181. package/src/inference/pipelines/energy/quintel.d.ts +5 -0
  182. package/src/inference/pipelines/energy/quintel.js +11 -0
  183. package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
  184. package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
  185. package/src/inference/pipelines/text/attention/projections.js +151 -101
  186. package/src/inference/pipelines/text/attention/record.js +62 -8
  187. package/src/inference/pipelines/text/attention/run.js +62 -8
  188. package/src/inference/pipelines/text/config.js +3 -4
  189. package/src/inference/pipelines/text/embed.js +2 -8
  190. package/src/inference/pipelines/text/execution-plan.js +41 -19
  191. package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
  192. package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
  193. package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
  194. package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
  195. package/src/inference/pipelines/text/execution-v0.js +62 -1013
  196. package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
  197. package/src/inference/pipelines/text/generator-steps.js +298 -207
  198. package/src/inference/pipelines/text/generator.js +6 -23
  199. package/src/inference/pipelines/text/init.js +78 -20
  200. package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
  201. package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
  202. package/src/inference/pipelines/text/kernel-trace.js +6 -0
  203. package/src/inference/pipelines/text/layer.js +3 -9
  204. package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
  205. package/src/inference/pipelines/text/linear-attention.js +80 -6
  206. package/src/inference/pipelines/text/logits/gpu.js +10 -5
  207. package/src/inference/pipelines/text/logits/index.js +10 -11
  208. package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
  209. package/src/inference/pipelines/text/logits/utils.js +9 -0
  210. package/src/inference/pipelines/text/lora-apply.js +50 -32
  211. package/src/inference/pipelines/text/model-load.js +279 -104
  212. package/src/inference/pipelines/text/moe-cache.js +5 -4
  213. package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
  214. package/src/inference/pipelines/text/moe-cpu.js +42 -38
  215. package/src/inference/pipelines/text/moe-gpu.js +110 -86
  216. package/src/inference/pipelines/text/ops.js +90 -90
  217. package/src/inference/pipelines/text/probes.js +9 -9
  218. package/src/inference/pipelines/text/weights.js +17 -7
  219. package/src/inference/pipelines/text.js +13 -1
  220. package/src/inference/speculative.d.ts +2 -2
  221. package/src/inference/speculative.js +4 -18
  222. package/src/inference/test-harness.d.ts +1 -1
  223. package/src/inference/test-harness.js +15 -5
  224. package/src/inference/tokenizer.d.ts +0 -5
  225. package/src/inference/tokenizer.js +4 -23
  226. package/src/inference/tokenizers/bpe.js +9 -0
  227. package/src/inference/tokenizers/bundled.js +20 -0
  228. package/src/inference/tokenizers/sentencepiece.js +12 -0
  229. package/src/loader/doppler-loader.js +38 -22
  230. package/src/loader/dtype-utils.js +3 -44
  231. package/src/loader/embedding-loader.js +7 -3
  232. package/src/loader/experts/expert-cache.js +13 -6
  233. package/src/loader/experts/expert-loader.js +10 -6
  234. package/src/loader/final-weights-loader.js +8 -4
  235. package/src/loader/layer-loader.js +2 -1
  236. package/src/loader/loader-state.js +2 -2
  237. package/src/loader/memory-monitor.js +8 -0
  238. package/src/loader/multi-model-loader.d.ts +14 -0
  239. package/src/loader/multi-model-loader.js +70 -24
  240. package/src/loader/shard-cache.js +81 -12
  241. package/src/loader/shard-resolver.js +25 -3
  242. package/src/loader/tensors/tensor-loader.js +209 -144
  243. package/src/loader/tensors/tensor-reader.js +76 -19
  244. package/src/loader/weight-downcast.js +1 -1
  245. package/src/memory/buffer-pool.d.ts +9 -1
  246. package/src/memory/buffer-pool.js +109 -44
  247. package/src/memory/unified-detect.js +1 -1
  248. package/src/rules/inference/kernel-path.rules.json +24 -8
  249. package/src/rules/rule-registry.js +25 -1
  250. package/src/storage/backends/opfs-store.js +68 -24
  251. package/src/storage/downloader.js +364 -83
  252. package/src/storage/index.d.ts +3 -0
  253. package/src/storage/index.js +3 -0
  254. package/src/storage/preflight.d.ts +2 -2
  255. package/src/storage/preflight.js +24 -2
  256. package/src/storage/quickstart-downloader.js +11 -5
  257. package/src/storage/registry.js +10 -4
  258. package/src/storage/reports.js +1 -1
  259. package/src/storage/shard-manager.d.ts +15 -1
  260. package/src/storage/shard-manager.js +51 -3
  261. package/src/storage/source-artifact-store.d.ts +52 -0
  262. package/src/storage/source-artifact-store.js +234 -0
  263. package/src/tooling/command-api-constants.d.ts +9 -0
  264. package/src/tooling/command-api-constants.js +9 -0
  265. package/src/tooling/command-api-family-normalizers.d.ts +9 -0
  266. package/src/tooling/command-api-family-normalizers.js +343 -0
  267. package/src/tooling/command-api-helpers.d.ts +25 -0
  268. package/src/tooling/command-api-helpers.js +262 -0
  269. package/src/tooling/command-api.js +16 -602
  270. package/src/tooling/command-envelope.js +4 -1
  271. package/src/tooling/command-runner-shared.js +52 -18
  272. package/src/tooling/lean-execution-contract.js +150 -3
  273. package/src/tooling/node-browser-command-runner.js +161 -271
  274. package/src/tooling/node-command-runner.js +29 -3
  275. package/src/tooling/node-converter.js +27 -1
  276. package/src/tooling/node-source-runtime.d.ts +1 -1
  277. package/src/tooling/node-source-runtime.js +84 -3
  278. package/src/tooling/node-webgpu.js +24 -21
  279. package/src/tooling/opfs-cache.js +21 -4
  280. package/src/tooling/runtime-input-composition.d.ts +38 -0
  281. package/src/tooling/runtime-input-composition.js +86 -0
  282. package/src/tooling/source-runtime-bundle.d.ts +40 -5
  283. package/src/tooling/source-runtime-bundle.js +261 -34
  284. package/src/tooling/source-runtime-materializer.d.ts +6 -0
  285. package/src/tooling/source-runtime-materializer.js +93 -0
  286. package/src/training/attention-backward.js +32 -17
  287. package/src/training/autograd.js +80 -52
  288. package/src/training/checkpoint-watch.d.ts +2 -1
  289. package/src/training/checkpoint-watch.js +39 -6
  290. package/src/training/checkpoint.js +40 -11
  291. package/src/training/clip.js +2 -1
  292. package/src/training/datasets/token-batch.js +20 -8
  293. package/src/training/distillation/checkpoint-watch.js +1 -0
  294. package/src/training/distillation/student-fixture.d.ts +22 -0
  295. package/src/training/distillation/student-fixture.js +846 -0
  296. package/src/training/distillation/suite-data.d.ts +45 -0
  297. package/src/training/distillation/suite-data.js +189 -0
  298. package/src/training/lora-pipeline.js +4 -7
  299. package/src/training/lora.js +26 -12
  300. package/src/training/loss.js +5 -6
  301. package/src/training/objectives/cross_entropy.js +2 -5
  302. package/src/training/objectives/distill_kd.js +4 -8
  303. package/src/training/objectives/distill_triplet.js +4 -8
  304. package/src/training/objectives/ul_stage2_base.js +4 -8
  305. package/src/training/operator-command.js +2 -0
  306. package/src/training/optimizer.js +19 -7
  307. package/src/training/runner.js +2 -1
  308. package/src/training/suite.js +18 -978
  309. package/src/training/tensor-factory.d.ts +9 -0
  310. package/src/training/tensor-factory.js +13 -0
  311. package/src/training/trainer.js +3 -5
  312. package/src/training/ul_dataset.js +3 -5
  313. package/src/training/workloads.js +70 -79
  314. package/src/version.js +1 -1
  315. package/tools/convert-safetensors-node.js +22 -16
  316. package/tools/doppler-cli.js +44 -25
@@ -15,10 +15,14 @@ import { KERNEL_CONFIGS } from '../../../gpu/kernels/kernel-configs.js';
15
15
  import { resolveCapabilityKernelPathRef, resolveKernelPathPolicy } from './kernel-path-auto-select.js';
16
16
  import { initTokenizer } from './init.js';
17
17
  import { selectRuleValue } from '../../../rules/rule-registry.js';
18
+ import { mergeRuntimeValues } from '../../../config/runtime-merge.js';
18
19
  import {
19
20
  DEFAULT_BATCHING_DEFAULTS,
21
+ DEFAULT_COMPUTE_DEFAULTS,
20
22
  DEFAULT_GENERATION_CONFIG,
21
23
  } from '../../../config/schema/inference-defaults.schema.js';
24
+ import { DEFAULT_KVCACHE_CONFIG } from '../../../config/schema/kvcache.schema.js';
25
+ import { DEFAULT_EXECUTION_V0_SESSION_DEFAULTS } from '../../../config/schema/execution-v0.schema.js';
22
26
 
23
27
  function validateKernelWarmupMode(mode) {
24
28
  if (mode !== 'parallel' && mode !== 'sequential') {
@@ -48,23 +52,97 @@ function normalizeBoolean(value) {
48
52
  return typeof value === 'boolean' ? value : null;
49
53
  }
50
54
 
55
+ function parseManifestDecodeLoopOptionalPositiveInt(value, label, modelId) {
56
+ if (value === undefined) {
57
+ return undefined;
58
+ }
59
+ if (value === null) {
60
+ return null;
61
+ }
62
+ const normalized = normalizePositiveInt(value);
63
+ if (normalized == null) {
64
+ throw new Error(
65
+ `Manifest "${modelId}" inference.sessionDefaults.decodeLoop.${label} must be a positive integer or null.`
66
+ );
67
+ }
68
+ return normalized;
69
+ }
70
+
71
+ function parseManifestDecodeLoopOptionalBoolean(value, label, modelId) {
72
+ if (value === undefined) {
73
+ return undefined;
74
+ }
75
+ if (typeof value !== 'boolean') {
76
+ throw new Error(
77
+ `Manifest "${modelId}" inference.sessionDefaults.decodeLoop.${label} must be a boolean when provided.`
78
+ );
79
+ }
80
+ return value;
81
+ }
82
+
83
+ function requireGlobalBatchingDefault(value, label) {
84
+ const normalized = normalizePositiveInt(value);
85
+ if (normalized == null) {
86
+ throw new Error(`${label} must be a positive integer.`);
87
+ }
88
+ return normalized;
89
+ }
90
+
91
+ function requireGlobalStopCheckMode(value, label) {
92
+ const normalized = normalizeStopCheckMode(value);
93
+ if (normalized == null) {
94
+ throw new Error(`${label} must be "batch" or "per-token".`);
95
+ }
96
+ return normalized;
97
+ }
98
+
51
99
  const GLOBAL_DEFAULT_BATCHING = Object.freeze({
52
- batchSize: normalizePositiveInt(DEFAULT_BATCHING_DEFAULTS.batchSize) ?? 4,
53
- stopCheckMode: normalizeStopCheckMode(DEFAULT_BATCHING_DEFAULTS.stopCheckMode) ?? 'batch',
54
- readbackInterval: normalizeReadbackInterval(DEFAULT_BATCHING_DEFAULTS.readbackInterval) ?? 1,
100
+ batchSize: requireGlobalBatchingDefault(
101
+ DEFAULT_BATCHING_DEFAULTS.batchSize,
102
+ 'DEFAULT_BATCHING_DEFAULTS.batchSize'
103
+ ),
104
+ stopCheckMode: requireGlobalStopCheckMode(
105
+ DEFAULT_BATCHING_DEFAULTS.stopCheckMode,
106
+ 'DEFAULT_BATCHING_DEFAULTS.stopCheckMode'
107
+ ),
108
+ readbackInterval: requireGlobalBatchingDefault(
109
+ DEFAULT_BATCHING_DEFAULTS.readbackInterval,
110
+ 'DEFAULT_BATCHING_DEFAULTS.readbackInterval'
111
+ ),
112
+ ringTokens: requireGlobalBatchingDefault(
113
+ DEFAULT_BATCHING_DEFAULTS.ringTokens,
114
+ 'DEFAULT_BATCHING_DEFAULTS.ringTokens'
115
+ ),
116
+ ringStop: requireGlobalBatchingDefault(
117
+ DEFAULT_BATCHING_DEFAULTS.ringStop,
118
+ 'DEFAULT_BATCHING_DEFAULTS.ringStop'
119
+ ),
120
+ ringStaging: requireGlobalBatchingDefault(
121
+ DEFAULT_BATCHING_DEFAULTS.ringStaging,
122
+ 'DEFAULT_BATCHING_DEFAULTS.ringStaging'
123
+ ),
55
124
  });
56
125
 
57
126
  const GLOBAL_DEFAULT_GENERATION = Object.freeze({
58
127
  disableCommandBatching: DEFAULT_GENERATION_CONFIG.disableCommandBatching === true,
59
128
  });
60
129
 
130
+ const GLOBAL_DEFAULT_KERNEL_PATH_DTYPES = Object.freeze({
131
+ activationDtype: DEFAULT_COMPUTE_DEFAULTS.activationDtype,
132
+ kvDtype: DEFAULT_KVCACHE_CONFIG.kvDtype,
133
+ outputDtype: DEFAULT_EXECUTION_V0_SESSION_DEFAULTS.compute.defaults.outputDtype,
134
+ });
135
+
61
136
  function isRuntimeBatchingAtGlobalDefaults(batching) {
62
137
  if (!batching || typeof batching !== 'object') {
63
138
  return false;
64
139
  }
65
140
  return normalizePositiveInt(batching.batchSize) === GLOBAL_DEFAULT_BATCHING.batchSize
66
141
  && normalizeStopCheckMode(batching.stopCheckMode) === GLOBAL_DEFAULT_BATCHING.stopCheckMode
67
- && normalizeReadbackInterval(batching.readbackInterval) === GLOBAL_DEFAULT_BATCHING.readbackInterval;
142
+ && normalizeReadbackInterval(batching.readbackInterval) === GLOBAL_DEFAULT_BATCHING.readbackInterval
143
+ && normalizeReadbackInterval(batching.ringTokens) === GLOBAL_DEFAULT_BATCHING.ringTokens
144
+ && normalizeReadbackInterval(batching.ringStop) === GLOBAL_DEFAULT_BATCHING.ringStop
145
+ && normalizeReadbackInterval(batching.ringStaging) === GLOBAL_DEFAULT_BATCHING.ringStaging;
68
146
  }
69
147
 
70
148
  function isRuntimeGenerationAtGlobalDefaults(generation) {
@@ -74,98 +152,127 @@ function isRuntimeGenerationAtGlobalDefaults(generation) {
74
152
  return (generation.disableCommandBatching === true) === GLOBAL_DEFAULT_GENERATION.disableCommandBatching;
75
153
  }
76
154
 
77
- function resolveModelBatchingDefaults(manifest, modelConfig) {
78
- const presetId = String(manifest?.inference?.presetId ?? '').trim().toLowerCase();
79
- const modelType = String(manifest?.modelType ?? '').trim().toLowerCase();
80
- return selectRuleValue('inference', 'execution', 'modelBatchingDefaults', {
81
- modelId: manifest?.modelId ?? null,
82
- presetId: presetId || null,
83
- modelType: modelType || null,
84
- numLayers: Number(modelConfig?.numLayers ?? 0),
85
- hiddenSize: Number(modelConfig?.hiddenSize ?? 0),
86
- });
155
+ function requireManifestDecodeLoopPositiveInt(value, label, modelId) {
156
+ const normalized = normalizePositiveInt(value);
157
+ if (normalized == null) {
158
+ throw new Error(`Manifest "${modelId}" inference.sessionDefaults.decodeLoop.${label} must be a positive integer.`);
159
+ }
160
+ return normalized;
161
+ }
162
+
163
+ function requireManifestDecodeLoopStopCheckMode(value, modelId) {
164
+ const normalized = normalizeStopCheckMode(value);
165
+ if (normalized == null) {
166
+ throw new Error(
167
+ `Manifest "${modelId}" inference.sessionDefaults.decodeLoop.stopCheckMode must be "batch" or "per-token".`
168
+ );
169
+ }
170
+ return normalized;
87
171
  }
88
172
 
89
- function resolveManifestDecodeLoopDefaults(manifest) {
173
+ function buildManifestDecodeLoopRuntimePatch(manifest) {
90
174
  const decodeLoop = manifest?.inference?.sessionDefaults?.decodeLoop;
91
- if (!decodeLoop || typeof decodeLoop !== 'object') {
175
+ if (decodeLoop == null) {
92
176
  return null;
93
177
  }
94
- const batchSize = normalizePositiveInt(decodeLoop.batchSize);
95
- const stopCheckMode = normalizeStopCheckMode(decodeLoop.stopCheckMode);
96
- const readbackInterval = normalizeReadbackInterval(decodeLoop.readbackInterval);
97
- const disableCommandBatching = normalizeBoolean(decodeLoop.disableCommandBatching);
98
- if (batchSize == null || stopCheckMode == null || readbackInterval == null) {
99
- return null;
178
+ const modelId = String(manifest?.modelId ?? 'unknown').trim() || 'unknown';
179
+ if (typeof decodeLoop !== 'object') {
180
+ throw new Error(
181
+ `Manifest "${modelId}" inference.sessionDefaults.decodeLoop must be an object when provided.`
182
+ );
100
183
  }
101
- return {
184
+ const batchSize = requireManifestDecodeLoopPositiveInt(decodeLoop.batchSize, 'batchSize', modelId);
185
+ const stopCheckMode = requireManifestDecodeLoopStopCheckMode(decodeLoop.stopCheckMode, modelId);
186
+ const readbackInterval = requireManifestDecodeLoopPositiveInt(
187
+ decodeLoop.readbackInterval,
188
+ 'readbackInterval',
189
+ modelId
190
+ );
191
+ const disableCommandBatching = parseManifestDecodeLoopOptionalBoolean(
192
+ decodeLoop.disableCommandBatching,
193
+ 'disableCommandBatching',
194
+ modelId
195
+ );
196
+
197
+ const batchingPatch = {
102
198
  batchSize,
103
199
  stopCheckMode,
104
200
  readbackInterval,
105
- ...(disableCommandBatching == null ? {} : { disableCommandBatching }),
201
+ };
202
+ const ringTokens = parseManifestDecodeLoopOptionalPositiveInt(
203
+ decodeLoop.ringTokens,
204
+ 'ringTokens',
205
+ modelId
206
+ );
207
+ if (ringTokens !== undefined) {
208
+ batchingPatch.ringTokens = ringTokens;
209
+ }
210
+ const ringStop = parseManifestDecodeLoopOptionalPositiveInt(
211
+ decodeLoop.ringStop,
212
+ 'ringStop',
213
+ modelId
214
+ );
215
+ if (ringStop !== undefined) {
216
+ batchingPatch.ringStop = ringStop;
217
+ }
218
+ const ringStaging = parseManifestDecodeLoopOptionalPositiveInt(
219
+ decodeLoop.ringStaging,
220
+ 'ringStaging',
221
+ modelId
222
+ );
223
+ if (ringStaging !== undefined) {
224
+ batchingPatch.ringStaging = ringStaging;
225
+ }
226
+
227
+ return {
228
+ batching: batchingPatch,
229
+ generation: disableCommandBatching == null
230
+ ? null
231
+ : { disableCommandBatching: disableCommandBatching === true },
106
232
  };
107
233
  }
108
234
 
109
235
  export function applyModelBatchingRuntimeDefaults(runtimeConfig, manifest, modelConfig) {
236
+ void modelConfig;
110
237
  const batching = runtimeConfig?.inference?.batching;
111
238
  const generation = runtimeConfig?.inference?.generation;
112
239
  const runtimeBatchingAtDefaults = isRuntimeBatchingAtGlobalDefaults(batching);
113
240
  const runtimeGenerationAtDefaults = isRuntimeGenerationAtGlobalDefaults(generation);
114
241
 
115
- const defaults = resolveManifestDecodeLoopDefaults(manifest)
116
- ?? resolveModelBatchingDefaults(manifest, modelConfig);
117
- if (!defaults || typeof defaults !== 'object') {
242
+ const patch = buildManifestDecodeLoopRuntimePatch(manifest);
243
+ if (!patch) {
118
244
  return runtimeConfig;
119
245
  }
120
246
 
121
- let nextBatching = batching;
122
- let appliedBatching = false;
123
- if (runtimeBatchingAtDefaults) {
124
- const nextBatchSize = normalizePositiveInt(defaults.batchSize);
125
- const nextStopCheckMode = normalizeStopCheckMode(defaults.stopCheckMode);
126
- const nextReadbackInterval = normalizeReadbackInterval(defaults.readbackInterval);
127
- if (nextBatchSize != null && nextStopCheckMode != null && nextReadbackInterval != null) {
128
- nextBatching = {
129
- ...batching,
130
- batchSize: nextBatchSize,
131
- stopCheckMode: nextStopCheckMode,
132
- readbackInterval: nextReadbackInterval,
133
- };
134
- appliedBatching = true;
135
- }
136
- }
137
-
138
- const shouldApplyDisableCommandBatching = runtimeGenerationAtDefaults
139
- && normalizeBoolean(defaults.disableCommandBatching) != null;
140
- const nextGeneration = shouldApplyDisableCommandBatching
141
- ? {
142
- ...generation,
143
- disableCommandBatching: defaults.disableCommandBatching === true,
144
- }
145
- : generation;
146
-
147
- if (!appliedBatching && !shouldApplyDisableCommandBatching) {
148
- return runtimeConfig;
247
+ const runtimeDisableCommandBatching = generation?.disableCommandBatching === true;
248
+ const manifestDisableCommandBatching = patch.generation?.disableCommandBatching === true;
249
+ if (!runtimeBatchingAtDefaults) {
250
+ throw new Error(
251
+ 'Manifest decodeLoop defaults cannot be merged after runtime batching overrides were already resolved. ' +
252
+ 'Set runtime.inference.batching explicitly to the desired final values, or remove manifest.inference.sessionDefaults.decodeLoop.'
253
+ );
149
254
  }
150
-
151
- if (appliedBatching || shouldApplyDisableCommandBatching) {
152
- log.info(
153
- 'Pipeline',
154
- `Model defaults applied (${manifest?.inference?.presetId ?? 'unknown'}): ` +
155
- `batchSize=${nextBatching.batchSize}, stopCheckMode=${nextBatching.stopCheckMode}, ` +
156
- `readbackInterval=${nextBatching.readbackInterval}, ` +
157
- `disableCommandBatching=${nextGeneration.disableCommandBatching === true}`
255
+ if (patch.generation && !runtimeGenerationAtDefaults && runtimeDisableCommandBatching !== manifestDisableCommandBatching) {
256
+ throw new Error(
257
+ 'Manifest decodeLoop.disableCommandBatching conflicts with runtime.inference.generation.disableCommandBatching. ' +
258
+ 'Choose one explicit source of truth.'
158
259
  );
159
260
  }
160
261
 
161
- return {
162
- ...runtimeConfig,
262
+ const nextRuntimeConfig = mergeRuntimeValues(runtimeConfig, {
163
263
  inference: {
164
- ...runtimeConfig.inference,
165
- ...(appliedBatching ? { batching: nextBatching } : {}),
166
- ...(shouldApplyDisableCommandBatching ? { generation: nextGeneration } : {}),
264
+ batching: patch.batching,
265
+ ...(patch.generation ? { generation: patch.generation } : {}),
167
266
  },
168
- };
267
+ });
268
+ log.info(
269
+ 'Pipeline',
270
+ `Manifest decodeLoop applied (${manifest?.modelId ?? 'unknown'}): ` +
271
+ `batchSize=${patch.batching.batchSize}, stopCheckMode=${patch.batching.stopCheckMode}, ` +
272
+ `readbackInterval=${patch.batching.readbackInterval}, ` +
273
+ `disableCommandBatching=${patch.generation?.disableCommandBatching === true}`
274
+ );
275
+ return nextRuntimeConfig;
169
276
  }
170
277
 
171
278
  export async function runKernelWarmup(options) {
@@ -206,7 +313,7 @@ function normalizeKernelPathSourceHint(value) {
206
313
  function resolveKernelPathSource(runtimeConfigKernelPath, runtimeKernelPathSourceHint, modelKernelPath) {
207
314
  if (runtimeConfigKernelPath) {
208
315
  const sourceHint = normalizeKernelPathSourceHint(runtimeKernelPathSourceHint);
209
- if (sourceHint === 'execution-v0') return 'execution-v0';
316
+ if (sourceHint !== 'none') return sourceHint;
210
317
  return 'config';
211
318
  }
212
319
  if (modelKernelPath) return 'model';
@@ -334,7 +441,7 @@ function assertKernelPathFeatureCompatibility(
334
441
 
335
442
  if (kernelPathSource === 'execution-v0' && typeof effectiveKernelPathRef !== 'string') {
336
443
  const remediation = policyAllowsSource
337
- ? 'Execution-v0 inline kernel paths are not auto-remapped yet. Use subgroup/f16-compatible execution steps, or set runtime.inference.kernelPath to a compatible string preset (for example "gemma2-q4k-dequant-f32a").'
444
+ ? 'Execution-v0 inline kernel paths are not auto-remapped yet. Use subgroup/f16-compatible execution steps, or set runtime.inference.kernelPath to a compatible string preset (for example "gemma2-q4k-dequant-f32a-nosubgroups").'
338
445
  : 'Enable runtime.inference.kernelPathPolicy.sourceScope to include "execution-v0", then use compatible execution steps or a compatible preset id.';
339
446
  throw new Error(
340
447
  `[ExecutionV0] Inline kernelPath requires unsupported GPU features. ` +
@@ -366,6 +473,55 @@ function normalizeKernelDtype(value) {
366
473
  });
367
474
  }
368
475
 
476
+ function buildKernelPathDtypeContract(resolvedKernelPath) {
477
+ if (!resolvedKernelPath) {
478
+ return null;
479
+ }
480
+ const activationDtype = normalizeKernelDtype(getKernelPathActivationDtype(resolvedKernelPath));
481
+ const outputDtype = normalizeKernelDtype(
482
+ getKernelPathOutputDtype(resolvedKernelPath) ?? activationDtype
483
+ );
484
+ const kvDtype = normalizeKernelDtype(getKernelPathKVDtype(resolvedKernelPath) ?? activationDtype);
485
+ if (!activationDtype && !outputDtype && !kvDtype) {
486
+ return null;
487
+ }
488
+ return {
489
+ activationDtype,
490
+ outputDtype,
491
+ kvDtype,
492
+ };
493
+ }
494
+
495
+ function isGlobalKernelPathDtypeDefault(currentValue, key) {
496
+ if (currentValue == null) {
497
+ return true;
498
+ }
499
+ return currentValue === GLOBAL_DEFAULT_KERNEL_PATH_DTYPES[key];
500
+ }
501
+
502
+ function describeKernelPathDtypeMismatch(contract, current) {
503
+ const mismatches = [];
504
+ if (contract.activationDtype && current.activationDtype !== contract.activationDtype) {
505
+ mismatches.push(
506
+ `runtime.inference.compute.activationDtype=${current.activationDtype ?? 'unset'} ` +
507
+ `(expected ${contract.activationDtype})`
508
+ );
509
+ }
510
+ if (contract.kvDtype && current.kvDtype !== contract.kvDtype) {
511
+ mismatches.push(
512
+ `runtime.inference.kvcache.kvDtype=${current.kvDtype ?? 'unset'} ` +
513
+ `(expected ${contract.kvDtype})`
514
+ );
515
+ }
516
+ if (contract.outputDtype && current.outputDtype !== contract.outputDtype) {
517
+ mismatches.push(
518
+ `runtime.inference.session.compute.defaults.outputDtype=${current.outputDtype ?? 'unset'} ` +
519
+ `(expected ${contract.outputDtype})`
520
+ );
521
+ }
522
+ return mismatches;
523
+ }
524
+
369
525
  function assertManifestKernelPathDtypeCompatibility(manifest, resolvedKernelPath, kernelPathSource) {
370
526
  if (!resolvedKernelPath) return;
371
527
  if (kernelPathSource === 'config') return;
@@ -376,16 +532,6 @@ function assertManifestKernelPathDtypeCompatibility(manifest, resolvedKernelPath
376
532
  if (!manifestCompute || !kernelActivation) return;
377
533
  if (manifestCompute === kernelActivation) return;
378
534
 
379
- const presetId = String(manifest?.inference?.presetId ?? '').trim().toLowerCase();
380
- if (presetId === 'lfm2' && manifestCompute === 'f32' && kernelActivation === 'f16') {
381
- log.warn(
382
- 'Pipeline',
383
- `Manifest "${manifest?.modelId ?? 'unknown'}" uses quantizationInfo.compute=f32 ` +
384
- `with kernelPath activationDtype=f16 (${resolvedKernelPath.id}); continuing for LFM2 mixed-precision compatibility.`
385
- );
386
- return;
387
- }
388
-
389
535
  throw new Error(
390
536
  `Manifest kernel path dtype mismatch for "${manifest?.modelId ?? 'unknown'}": ` +
391
537
  `quantizationInfo.compute=${manifestCompute} but ` +
@@ -402,17 +548,45 @@ function getKernelCapabilitiesSafe() {
402
548
  }
403
549
  }
404
550
 
405
- function applyKernelPathRuntimeDtypeOverrides(resolvedKernelPath, runtimeConfig) {
406
- const kernelPathActivationDtype = getKernelPathActivationDtype(resolvedKernelPath);
407
- const kernelPathOutputDtype = getKernelPathOutputDtype(resolvedKernelPath) ?? kernelPathActivationDtype;
408
- const kernelPathKVDtype = getKernelPathKVDtype(resolvedKernelPath);
409
- if (!kernelPathActivationDtype && !kernelPathOutputDtype && !kernelPathKVDtype) {
551
+ function applyKernelPathRuntimeDtypeContract(resolvedKernelPath, runtimeConfig, kernelPathSource, modelId) {
552
+ const contract = buildKernelPathDtypeContract(resolvedKernelPath);
553
+ if (!contract) {
410
554
  return runtimeConfig;
411
555
  }
412
556
 
413
- const currentActivation = runtimeConfig.inference.compute.activationDtype;
414
- const currentKV = runtimeConfig.inference.kvcache.kvDtype;
415
- const currentOutput = runtimeConfig.inference?.session?.compute?.defaults?.outputDtype;
557
+ const current = {
558
+ activationDtype: normalizeKernelDtype(runtimeConfig.inference?.compute?.activationDtype),
559
+ kvDtype: normalizeKernelDtype(runtimeConfig.inference?.kvcache?.kvDtype),
560
+ outputDtype: normalizeKernelDtype(runtimeConfig.inference?.session?.compute?.defaults?.outputDtype),
561
+ };
562
+ const mismatches = describeKernelPathDtypeMismatch(contract, current);
563
+ if (mismatches.length === 0) {
564
+ return runtimeConfig;
565
+ }
566
+
567
+ if (kernelPathSource === 'config' || kernelPathSource === 'execution-v0') {
568
+ throw new Error(
569
+ `KernelPath "${resolvedKernelPath?.id ?? 'unknown'}" selected from ${kernelPathSource} ` +
570
+ `requires explicit matching runtime dtypes for "${modelId}". ` +
571
+ `Mismatches: ${mismatches.join('; ')}. ` +
572
+ 'Set runtime.inference.compute.activationDtype, runtime.inference.kvcache.kvDtype, ' +
573
+ 'and runtime.inference.session.compute.defaults.outputDtype to match the kernel path.'
574
+ );
575
+ }
576
+
577
+ const canApplyManifestDefaults = (
578
+ (contract.activationDtype == null || isGlobalKernelPathDtypeDefault(current.activationDtype, 'activationDtype'))
579
+ && (contract.kvDtype == null || isGlobalKernelPathDtypeDefault(current.kvDtype, 'kvDtype'))
580
+ && (contract.outputDtype == null || isGlobalKernelPathDtypeDefault(current.outputDtype, 'outputDtype'))
581
+ );
582
+ if (!canApplyManifestDefaults) {
583
+ throw new Error(
584
+ `Manifest/model kernelPath "${resolvedKernelPath?.id ?? 'unknown'}" for "${modelId}" ` +
585
+ `conflicts with runtime dtype overrides. Mismatches: ${mismatches.join('; ')}. ` +
586
+ 'Either remove the runtime dtype override or set it to match the kernel path.'
587
+ );
588
+ }
589
+
416
590
  const nextInference = {
417
591
  ...runtimeConfig.inference,
418
592
  compute: { ...runtimeConfig.inference.compute },
@@ -420,37 +594,33 @@ function applyKernelPathRuntimeDtypeOverrides(resolvedKernelPath, runtimeConfig)
420
594
  };
421
595
  const dtypeChanges = [];
422
596
 
423
- if (kernelPathActivationDtype && currentActivation !== kernelPathActivationDtype) {
424
- nextInference.compute.activationDtype = kernelPathActivationDtype;
425
- dtypeChanges.push(`activation=${currentActivation}->${kernelPathActivationDtype}`);
597
+ if (contract.activationDtype && current.activationDtype !== contract.activationDtype) {
598
+ nextInference.compute.activationDtype = contract.activationDtype;
599
+ dtypeChanges.push(`activation=${current.activationDtype ?? 'unset'}->${contract.activationDtype}`);
426
600
  }
427
601
 
428
- if (kernelPathKVDtype && currentKV !== kernelPathKVDtype) {
429
- nextInference.kvcache.kvDtype = kernelPathKVDtype;
430
- dtypeChanges.push(`kv=${currentKV}->${kernelPathKVDtype}`);
602
+ if (contract.kvDtype && current.kvDtype !== contract.kvDtype) {
603
+ nextInference.kvcache.kvDtype = contract.kvDtype;
604
+ dtypeChanges.push(`kv=${current.kvDtype ?? 'unset'}->${contract.kvDtype}`);
431
605
  }
432
606
 
433
- if (kernelPathOutputDtype && currentOutput !== kernelPathOutputDtype) {
607
+ if (contract.outputDtype && current.outputDtype !== contract.outputDtype) {
434
608
  nextInference.session = {
435
609
  ...(nextInference.session ?? {}),
436
610
  compute: {
437
611
  ...(nextInference.session?.compute ?? {}),
438
612
  defaults: {
439
613
  ...(nextInference.session?.compute?.defaults ?? {}),
440
- outputDtype: kernelPathOutputDtype,
614
+ outputDtype: contract.outputDtype,
441
615
  },
442
616
  },
443
617
  };
444
- dtypeChanges.push(`session.outputDtype=${currentOutput ?? 'undefined'}->${kernelPathOutputDtype}`);
445
- }
446
-
447
- if (dtypeChanges.length === 0) {
448
- return runtimeConfig;
618
+ dtypeChanges.push(`session.outputDtype=${current.outputDtype ?? 'unset'}->${contract.outputDtype}`);
449
619
  }
450
620
 
451
621
  log.info(
452
622
  'Pipeline',
453
- `KernelPath ${resolvedKernelPath?.id ?? 'unknown'} runtime dtype overrides: ${dtypeChanges.join(', ')}`
623
+ `KernelPath ${resolvedKernelPath?.id ?? 'unknown'} applied manifest/model runtime dtype defaults: ${dtypeChanges.join(', ')}`
454
624
  );
455
625
  return { ...runtimeConfig, inference: nextInference };
456
626
  }
@@ -521,7 +691,12 @@ export function resolveKernelPathState(options) {
521
691
  log.info('Pipeline', 'KernelPath: none (no kernel path configured)');
522
692
  }
523
693
 
524
- const nextRuntimeConfig = applyKernelPathRuntimeDtypeOverrides(resolvedKernelPath, runtimeConfig);
694
+ const nextRuntimeConfig = applyKernelPathRuntimeDtypeContract(
695
+ resolvedKernelPath,
696
+ runtimeConfig,
697
+ kernelPathSource,
698
+ String(manifest?.modelId ?? 'unknown').trim() || 'unknown'
699
+ );
525
700
  return {
526
701
  resolvedKernelPath,
527
702
  kernelPathSource,
@@ -1,5 +1,6 @@
1
1
  import { getRuntimeConfig } from '../../../config/runtime.js';
2
2
  import { QK_K } from '../../../config/schema/index.js';
3
+ import { releaseBuffer } from '../../../memory/buffer-pool.js';
3
4
 
4
5
  const dequantCache = new Map();
5
6
  let dequantCacheMaxEntriesOverride = null;
@@ -73,8 +74,8 @@ export function setCachedDequant(layerIdx, expertIdx, outputDtype, gateUp, down)
73
74
  if (oldestKey) {
74
75
  const evicted = dequantCache.get(oldestKey);
75
76
  if (evicted) {
76
- evicted.gateUp.destroy();
77
- evicted.down.destroy();
77
+ releaseBuffer(evicted.gateUp);
78
+ releaseBuffer(evicted.down);
78
79
  }
79
80
  dequantCache.delete(oldestKey);
80
81
  }
@@ -85,8 +86,8 @@ export function setCachedDequant(layerIdx, expertIdx, outputDtype, gateUp, down)
85
86
 
86
87
  export function clearDequantCache() {
87
88
  for (const cached of dequantCache.values()) {
88
- cached.gateUp.destroy();
89
- cached.down.destroy();
89
+ releaseBuffer(cached.gateUp);
90
+ releaseBuffer(cached.down);
90
91
  }
91
92
  dequantCache.clear();
92
93
  dequantCacheHits = 0;