npm - @simulatte/doppler - Versions diffs - 0.1.5 → 0.1.7 - Mend

@simulatte/doppler 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (392) hide show

package/CHANGELOG.md +126 -0
package/README.md +25 -17
package/package.json +20 -4
package/src/adapters/adapter-registry.js +12 -1
package/src/adapters/lora-loader.js +23 -6
package/src/bridge/extension-client.d.ts +5 -0
package/src/bridge/extension-client.js +40 -0
package/src/bridge/index.d.ts +2 -1
package/src/bridge/index.js +6 -4
package/src/browser/browser-converter.js +26 -1
package/src/browser/file-picker.js +6 -0
package/src/browser/safetensors-parser-browser.js +84 -1
package/src/browser/shard-io-browser.js +2 -2
package/src/browser/tensor-source-download.js +8 -2
package/src/browser/tensor-source-http.d.ts +1 -0
package/src/browser/tensor-source-http.js +5 -1
package/src/client/doppler-api.browser.js +20 -4
package/src/client/doppler-api.js +19 -3
package/src/client/doppler-provider/generation.js +12 -0
package/src/client/doppler-provider/model-manager.d.ts +10 -0
package/src/client/doppler-provider/model-manager.js +91 -19
package/src/client/doppler-provider/source-runtime.d.ts +2 -1
package/src/client/doppler-provider/source-runtime.js +132 -13
package/src/client/doppler-registry.json +8 -7
package/src/config/backward-registry-loader.js +17 -2
package/src/config/execution-v0-contract-check.js +113 -15
package/src/config/kernel-path-contract-check.js +57 -29
package/src/config/kernel-path-loader.js +5 -36
package/src/config/kernels/kernel-ref-digests.js +39 -39
package/src/config/kernels/registry.js +14 -1
package/src/config/kernels/registry.json +49 -7
package/src/config/loader.d.ts +1 -1
package/src/config/loader.js +43 -4
package/src/config/merge-contract-check.js +59 -4
package/src/config/merge-helpers.js +128 -7
package/src/config/merge.d.ts +1 -0
package/src/config/merge.js +28 -0
package/src/config/param-validator.js +47 -2
package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
package/src/config/presets/kernel-paths/registry.json +29 -8
package/src/config/presets/models/gemma2.json +2 -2
package/src/config/presets/models/qwen3.json +9 -2
package/src/config/presets/models/transformer.json +5 -0
package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
package/src/config/required-inference-fields-contract-check.js +6 -0
package/src/config/runtime.js +6 -1
package/src/config/schema/debug.schema.d.ts +5 -0
package/src/config/schema/doppler.schema.js +16 -21
package/src/config/schema/inference-defaults.schema.js +6 -3
package/src/config/schema/inference.schema.d.ts +9 -0
package/src/config/schema/kernel-path.schema.d.ts +11 -1
package/src/config/schema/kernel-thresholds.schema.js +12 -4
package/src/config/schema/manifest.schema.d.ts +8 -1
package/src/config/schema/manifest.schema.js +19 -3
package/src/config/training-defaults.js +30 -22
package/src/converter/conversion-plan.js +94 -9
package/src/converter/core.d.ts +7 -0
package/src/converter/core.js +14 -9
package/src/converter/execution-v0-manifest.js +4 -1
package/src/converter/index.d.ts +1 -0
package/src/converter/index.js +1 -0
package/src/converter/manifest-inference.js +43 -12
package/src/converter/parsers/diffusion.js +0 -3
package/src/converter/quantization-info.js +35 -15
package/src/converter/rope-config.js +42 -0
package/src/converter/shard-packer.d.ts +1 -1
package/src/converter/shard-packer.js +4 -1
package/src/debug/config.js +123 -11
package/src/debug/signals.js +7 -1
package/src/debug/tensor.d.ts +2 -0
package/src/debug/tensor.js +13 -2
package/src/distribution/p2p-control-plane.js +52 -12
package/src/distribution/p2p-observability.js +43 -7
package/src/distribution/p2p-webrtc-browser.js +20 -0
package/src/distribution/shard-delivery.js +77 -26
package/src/formats/gguf/types.js +33 -16
package/src/formats/rdrr/groups.d.ts +12 -4
package/src/formats/rdrr/groups.js +3 -6
package/src/formats/rdrr/parsing.js +39 -2
package/src/formats/rdrr/types.d.ts +2 -1
package/src/gpu/command-recorder.js +86 -61
package/src/gpu/device.d.ts +1 -0
package/src/gpu/device.js +131 -19
package/src/gpu/kernel-tuner/benchmarks.js +326 -316
package/src/gpu/kernel-tuner/cache.js +71 -4
package/src/gpu/kernel-tuner/tuner.js +22 -4
package/src/gpu/kernels/attention.js +113 -34
package/src/gpu/kernels/backward/adam.js +62 -58
package/src/gpu/kernels/backward/attention_backward.js +257 -169
package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
package/src/gpu/kernels/bias_add.wgsl +8 -6
package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
package/src/gpu/kernels/cast.js +191 -149
package/src/gpu/kernels/check-stop.js +33 -44
package/src/gpu/kernels/conv2d.js +27 -17
package/src/gpu/kernels/conv2d.wgsl +7 -8
package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
package/src/gpu/kernels/cross_entropy_loss.js +21 -15
package/src/gpu/kernels/depthwise_conv2d.js +37 -26
package/src/gpu/kernels/depthwise_conv2d.wgsl +6 -9
package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +6 -9
package/src/gpu/kernels/dequant.js +178 -126
package/src/gpu/kernels/energy.d.ts +3 -21
package/src/gpu/kernels/energy.js +111 -88
package/src/gpu/kernels/feature-check.js +1 -1
package/src/gpu/kernels/fused_ffn.js +84 -65
package/src/gpu/kernels/fused_matmul_residual.js +56 -33
package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
package/src/gpu/kernels/gather.js +33 -15
package/src/gpu/kernels/gelu.js +19 -11
package/src/gpu/kernels/grouped_pointwise_conv2d.js +34 -23
package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +6 -9
package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +6 -9
package/src/gpu/kernels/groupnorm.js +34 -23
package/src/gpu/kernels/kv-quantize.js +5 -2
package/src/gpu/kernels/layernorm.js +35 -19
package/src/gpu/kernels/logit-merge.js +5 -3
package/src/gpu/kernels/matmul.js +83 -39
package/src/gpu/kernels/modulate.js +23 -15
package/src/gpu/kernels/moe.js +221 -175
package/src/gpu/kernels/pixel_shuffle.js +22 -14
package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
package/src/gpu/kernels/relu.js +31 -10
package/src/gpu/kernels/relu.wgsl +2 -1
package/src/gpu/kernels/relu_f16.wgsl +2 -1
package/src/gpu/kernels/repeat_channels.js +25 -17
package/src/gpu/kernels/repeat_channels.wgsl +4 -5
package/src/gpu/kernels/repeat_channels_f16.wgsl +4 -5
package/src/gpu/kernels/residual.js +69 -23
package/src/gpu/kernels/residual.wgsl +6 -3
package/src/gpu/kernels/residual_f16.wgsl +2 -1
package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
package/src/gpu/kernels/residual_vec4.wgsl +2 -1
package/src/gpu/kernels/rmsnorm.js +96 -28
package/src/gpu/kernels/rmsnorm.wgsl +14 -6
package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
package/src/gpu/kernels/rope.d.ts +2 -0
package/src/gpu/kernels/rope.js +14 -1
package/src/gpu/kernels/rope.wgsl +56 -40
package/src/gpu/kernels/sample.js +27 -38
package/src/gpu/kernels/sana_linear_attention.js +19 -12
package/src/gpu/kernels/sana_linear_attention_apply.wgsl +4 -5
package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +4 -5
package/src/gpu/kernels/sana_linear_attention_summary.wgsl +4 -0
package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +4 -0
package/src/gpu/kernels/scale.js +18 -11
package/src/gpu/kernels/shader-cache.js +4 -2
package/src/gpu/kernels/silu.d.ts +1 -0
package/src/gpu/kernels/silu.js +148 -82
package/src/gpu/kernels/silu.wgsl +19 -9
package/src/gpu/kernels/silu_f16.wgsl +19 -9
package/src/gpu/kernels/softmax.js +44 -25
package/src/gpu/kernels/split_qkv.js +23 -13
package/src/gpu/kernels/transpose.js +31 -10
package/src/gpu/kernels/transpose.wgsl +6 -5
package/src/gpu/kernels/upsample2d.js +22 -13
package/src/gpu/kernels/upsample2d.wgsl +6 -9
package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
package/src/gpu/kernels/utils.js +35 -13
package/src/gpu/partitioned-buffer-pool.js +10 -2
package/src/gpu/perf-guards.js +2 -9
package/src/gpu/profiler.js +27 -22
package/src/gpu/readback-utils.d.ts +16 -0
package/src/gpu/readback-utils.js +41 -0
package/src/gpu/submit-tracker.js +13 -0
package/src/gpu/uniform-cache.d.ts +1 -0
package/src/gpu/uniform-cache.js +30 -9
package/src/hotswap/intent-bundle.js +6 -0
package/src/hotswap/manifest.d.ts +10 -1
package/src/hotswap/manifest.js +12 -2
package/src/hotswap/runtime.js +30 -8
package/src/index-browser.d.ts +44 -0
package/src/index-browser.js +14 -0
package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
package/src/inference/browser-harness-contract-helpers.js +28 -0
package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
package/src/inference/browser-harness-model-helpers.d.ts +16 -0
package/src/inference/browser-harness-model-helpers.js +217 -0
package/src/inference/browser-harness-report-helpers.d.ts +7 -0
package/src/inference/browser-harness-report-helpers.js +42 -0
package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
package/src/inference/browser-harness-runtime-helpers.js +415 -0
package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
package/src/inference/browser-harness-suite-helpers.js +268 -0
package/src/inference/browser-harness-text-helpers.d.ts +27 -0
package/src/inference/browser-harness-text-helpers.js +788 -0
package/src/inference/browser-harness.d.ts +6 -0
package/src/inference/browser-harness.js +130 -1950
package/src/inference/kv-cache/base.js +140 -94
package/src/inference/kv-cache/tiered.js +5 -3
package/src/inference/moe-router.js +88 -56
package/src/inference/multi-model-network.js +5 -3
package/src/inference/network-evolution.d.ts +11 -2
package/src/inference/network-evolution.js +20 -21
package/src/inference/pipelines/context.d.ts +3 -0
package/src/inference/pipelines/context.js +142 -2
package/src/inference/pipelines/diffusion/helpers.js +7 -2
package/src/inference/pipelines/diffusion/pipeline.js +17 -7
package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +5 -0
package/src/inference/pipelines/diffusion/text-encoder-gpu.js +27 -15
package/src/inference/pipelines/diffusion/vae.js +3 -7
package/src/inference/pipelines/energy/pipeline.js +27 -21
package/src/inference/pipelines/energy/quintel.d.ts +5 -0
package/src/inference/pipelines/energy/quintel.js +11 -0
package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
package/src/inference/pipelines/text/attention/projections.js +151 -101
package/src/inference/pipelines/text/attention/record.js +73 -10
package/src/inference/pipelines/text/attention/run.js +73 -10
package/src/inference/pipelines/text/chat-format.js +25 -1
package/src/inference/pipelines/text/config.d.ts +4 -0
package/src/inference/pipelines/text/config.js +71 -5
package/src/inference/pipelines/text/embed.js +2 -8
package/src/inference/pipelines/text/execution-plan.js +64 -50
package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
package/src/inference/pipelines/text/execution-v0.js +78 -1002
package/src/inference/pipelines/text/ffn/standard.js +3 -0
package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
package/src/inference/pipelines/text/generator-steps.js +298 -207
package/src/inference/pipelines/text/generator.js +6 -23
package/src/inference/pipelines/text/init.d.ts +4 -0
package/src/inference/pipelines/text/init.js +134 -29
package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
package/src/inference/pipelines/text/kernel-trace.js +6 -0
package/src/inference/pipelines/text/layer.js +14 -9
package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
package/src/inference/pipelines/text/linear-attention.js +80 -6
package/src/inference/pipelines/text/logits/gpu.js +10 -5
package/src/inference/pipelines/text/logits/index.js +10 -11
package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
package/src/inference/pipelines/text/logits/utils.js +9 -0
package/src/inference/pipelines/text/lora-apply.js +50 -32
package/src/inference/pipelines/text/model-load.js +279 -104
package/src/inference/pipelines/text/moe-cache.js +5 -4
package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
package/src/inference/pipelines/text/moe-cpu.js +42 -38
package/src/inference/pipelines/text/moe-gpu.js +110 -86
package/src/inference/pipelines/text/ops.js +90 -90
package/src/inference/pipelines/text/probes.js +9 -9
package/src/inference/pipelines/text/weights.js +17 -7
package/src/inference/pipelines/text.js +17 -1
package/src/inference/speculative.d.ts +2 -2
package/src/inference/speculative.js +4 -18
package/src/inference/test-harness.d.ts +1 -1
package/src/inference/test-harness.js +15 -5
package/src/inference/tokenizer.d.ts +0 -5
package/src/inference/tokenizer.js +4 -23
package/src/inference/tokenizers/bpe.js +9 -0
package/src/inference/tokenizers/bundled.js +176 -33
package/src/inference/tokenizers/sentencepiece.js +12 -0
package/src/loader/doppler-loader.js +38 -22
package/src/loader/dtype-utils.js +3 -44
package/src/loader/embedding-loader.js +7 -3
package/src/loader/experts/expert-cache.js +13 -6
package/src/loader/experts/expert-loader.js +10 -6
package/src/loader/final-weights-loader.js +8 -4
package/src/loader/layer-loader.js +2 -1
package/src/loader/loader-state.js +2 -2
package/src/loader/memory-monitor.js +8 -0
package/src/loader/multi-model-loader.d.ts +14 -0
package/src/loader/multi-model-loader.js +70 -24
package/src/loader/shard-cache.js +81 -12
package/src/loader/shard-resolver.js +25 -3
package/src/loader/tensors/tensor-loader.js +209 -144
package/src/loader/tensors/tensor-reader.js +76 -19
package/src/loader/weight-downcast.js +1 -1
package/src/memory/buffer-pool.d.ts +9 -1
package/src/memory/buffer-pool.js +109 -44
package/src/memory/unified-detect.js +1 -1
package/src/rules/inference/kernel-path.rules.json +24 -8
package/src/rules/rule-registry.js +25 -1
package/src/rules/tooling/command-runtime.rules.json +18 -0
package/src/storage/backends/opfs-store.js +68 -24
package/src/storage/downloader.js +364 -83
package/src/storage/index.d.ts +3 -0
package/src/storage/index.js +3 -0
package/src/storage/preflight.d.ts +2 -2
package/src/storage/preflight.js +24 -2
package/src/storage/quickstart-downloader.js +11 -5
package/src/storage/registry.js +10 -4
package/src/storage/reports.js +1 -1
package/src/storage/shard-manager.d.ts +15 -1
package/src/storage/shard-manager.js +51 -3
package/src/storage/source-artifact-store.d.ts +52 -0
package/src/storage/source-artifact-store.js +234 -0
package/src/tooling/command-api-constants.d.ts +9 -0
package/src/tooling/command-api-constants.js +9 -0
package/src/tooling/command-api-family-normalizers.d.ts +9 -0
package/src/tooling/command-api-family-normalizers.js +343 -0
package/src/tooling/command-api-helpers.d.ts +25 -0
package/src/tooling/command-api-helpers.js +262 -0
package/src/tooling/command-api.d.ts +27 -1
package/src/tooling/command-api.js +26 -473
package/src/tooling/command-envelope.js +4 -1
package/src/tooling/command-runner-shared.js +52 -18
package/src/tooling/lean-execution-contract.js +150 -3
package/src/tooling/node-browser-command-runner.d.ts +4 -0
package/src/tooling/node-browser-command-runner.js +218 -273
package/src/tooling/node-command-runner.js +44 -3
package/src/tooling/node-converter.js +27 -1
package/src/tooling/node-source-runtime.d.ts +1 -1
package/src/tooling/node-source-runtime.js +84 -3
package/src/tooling/node-webgpu.js +30 -105
package/src/tooling/opfs-cache.js +21 -4
package/src/tooling/runtime-input-composition.d.ts +38 -0
package/src/tooling/runtime-input-composition.js +86 -0
package/src/tooling/source-runtime-bundle.d.ts +40 -5
package/src/tooling/source-runtime-bundle.js +261 -34
package/src/tooling/source-runtime-materializer.d.ts +6 -0
package/src/tooling/source-runtime-materializer.js +93 -0
package/src/training/attention-backward.js +32 -17
package/src/training/autograd.js +80 -52
package/src/training/checkpoint-watch.d.ts +8 -0
package/src/training/checkpoint-watch.js +139 -0
package/src/training/checkpoint.d.ts +6 -1
package/src/training/checkpoint.js +46 -7
package/src/training/clip.js +2 -1
package/src/training/datasets/token-batch.js +20 -8
package/src/training/distillation/artifacts.d.ts +71 -0
package/src/training/distillation/artifacts.js +132 -0
package/src/training/distillation/checkpoint-watch.d.ts +10 -0
package/src/training/distillation/checkpoint-watch.js +58 -0
package/src/training/distillation/dataset.d.ts +59 -0
package/src/training/distillation/dataset.js +337 -0
package/src/training/distillation/eval.d.ts +34 -0
package/src/training/distillation/eval.js +310 -0
package/src/training/distillation/index.d.ts +29 -0
package/src/training/distillation/index.js +29 -0
package/src/training/distillation/runtime.d.ts +20 -0
package/src/training/distillation/runtime.js +121 -0
package/src/training/distillation/scoreboard.d.ts +6 -0
package/src/training/distillation/scoreboard.js +8 -0
package/src/training/distillation/stage-a.d.ts +45 -0
package/src/training/distillation/stage-a.js +338 -0
package/src/training/distillation/stage-b.d.ts +24 -0
package/src/training/distillation/stage-b.js +20 -0
package/src/training/distillation/student-fixture.d.ts +22 -0
package/src/training/distillation/student-fixture.js +846 -0
package/src/training/distillation/suite-data.d.ts +45 -0
package/src/training/distillation/suite-data.js +189 -0
package/src/training/index.d.ts +10 -0
package/src/training/index.js +10 -0
package/src/training/lora-pipeline.d.ts +40 -0
package/src/training/lora-pipeline.js +793 -0
package/src/training/lora.js +26 -12
package/src/training/loss.js +5 -6
package/src/training/objectives/cross_entropy.js +2 -5
package/src/training/objectives/distill_kd.js +4 -8
package/src/training/objectives/distill_triplet.js +4 -8
package/src/training/objectives/ul_stage2_base.js +4 -8
package/src/training/operator-artifacts.d.ts +62 -0
package/src/training/operator-artifacts.js +140 -0
package/src/training/operator-command.d.ts +5 -0
package/src/training/operator-command.js +455 -0
package/src/training/operator-eval.d.ts +48 -0
package/src/training/operator-eval.js +230 -0
package/src/training/operator-scoreboard.d.ts +5 -0
package/src/training/operator-scoreboard.js +44 -0
package/src/training/optimizer.js +19 -7
package/src/training/runner.d.ts +52 -0
package/src/training/runner.js +31 -5
package/src/training/suite.d.ts +112 -0
package/src/training/suite.js +24 -984
package/src/training/tensor-factory.d.ts +9 -0
package/src/training/tensor-factory.js +13 -0
package/src/training/trainer.js +3 -5
package/src/training/ul_dataset.js +3 -5
package/src/training/workloads.d.ts +164 -0
package/src/training/workloads.js +530 -0
package/src/version.js +1 -1
package/tools/convert-safetensors-node.js +22 -16
package/tools/doppler-cli.js +179 -63

package/src/training/suite.js CHANGED Viewed

@@ -3,18 +3,11 @@ import { setPlatformsBaseUrl } from '../config/platforms/loader.js';
 import { setRegistryUrl } from '../config/kernels/registry.js';
 import { createTrainingConfig } from '../config/training-defaults.js';
 import {
-  runAttention,
-  castF16ToF32,
-  runGather,
   runMatmul,
   runResidualAdd,
-  runRMSNorm,
-  runRoPE,
-  runSiLURowSplit,
 } from '../gpu/kernels/index.js';
 import { createTensor } from '../gpu/tensor.js';
 import { acquireBuffer, uploadData, releaseBuffer } from '../memory/buffer-pool.js';
-import { getBufferDtype, getWeightDtype, isCpuWeightBuffer, isWeightBuffer } from '../gpu/weight-buffer.js';
 import { OpType } from './autograd.js';
 import { AdamOptimizer } from './optimizer.js';
 import { TrainingRunner } from './runner.js';
@@ -25,6 +18,16 @@ import { exportLoRAAdapter } from './export.js';
 import { sha256Hex } from '../utils/sha256.js';
 import { computeSampleStats } from '../debug/stats.js';
 import { parseJsonl } from './datasets/jsonl.js';
+import {
+  buildDistillCandidatePrompt,
+  buildDistillPrompt,
+  encodeDistillRow,
+  normalizeDistillDatasetPath,
+  normalizeOptionalString,
+  resolveDistillDataScope,
+  summarizeDirectionCounts,
+} from './distillation/suite-data.js';
+import { createDistillStudentRuntimeModelFixture } from './distillation/student-fixture.js';
 import { initializeInference } from '../inference/test-harness.js';
 import { createPipeline } from '../inference/pipelines/text.js';
 import { parseManifest } from '../formats/rdrr/index.js';
@@ -128,195 +131,7 @@ function isNodeRuntime() {
   return typeof process !== 'undefined' && !!process.versions?.node;
 }
-function normalizeOptionalString(value) {
-  if (value === undefined || value === null) return null;
-  const trimmed = String(value).trim();
-  return trimmed || null;
-}
-function normalizeDistillDatasetPath(value) {
-  return normalizeOptionalString(value);
-}
-function normalizeLangCode(value) {
-  const normalized = normalizeOptionalString(value);
-  if (!normalized) return null;
-  const compact = normalized.toLowerCase().replace(/_/g, '-');
-  if (compact.startsWith('en')) return 'en';
-  if (compact.startsWith('es')) return 'es';
-  return compact;
-}
-function normalizePairDirection(value) {
-  const pair = normalizeOptionalString(value);
-  if (!pair) return null;
-  const normalized = pair.toLowerCase().replace(/_/g, '-').replace(/\s+/g, '');
-  const parts = normalized.includes('->')
-    ? normalized.split('->').filter(Boolean)
-    : normalized.split('-').filter(Boolean);
-  if (parts.length !== 2) return null;
-  return `${normalizeLangCode(parts[0]) || parts[0]}->${normalizeLangCode(parts[1]) || parts[1]}`;
-}
-function normalizeOptionalStringArray(value) {
-  if (value === undefined || value === null) return null;
-  const list = Array.isArray(value)
-    ? value
-    : (typeof value === 'string' ? value.split(',') : null);
-  if (!Array.isArray(list)) return null;
-  const normalized = list
-    .map((entry) => normalizeOptionalString(entry))
-    .filter(Boolean);
-  return normalized.length > 0 ? normalized : null;
-}
-function normalizeDistillLanguageAllowlist(value) {
-  const list = normalizeOptionalStringArray(value);
-  if (!list) return null;
-  const normalized = list
-    .map((entry) => normalizeLangCode(entry))
-    .filter(Boolean);
-  if (normalized.length === 0) return null;
-  return [...new Set(normalized)];
-}
-function normalizeDistillPairAllowlist(value) {
-  const list = normalizeOptionalStringArray(value);
-  if (!list) return null;
-  const normalized = list
-    .map((entry) => normalizePairDirection(entry))
-    .filter(Boolean);
-  if (normalized.length === 0) return null;
-  return [...new Set(normalized)];
-}
-function resolveDistillDataScope(options = {}, trainingConfig = null) {
-  const distillConfig = trainingConfig?.distill || {};
-  const sourceLangs = normalizeDistillLanguageAllowlist(
-    options.distillSourceLangs ?? distillConfig.sourceLangs ?? null
-  );
-  const targetLangs = normalizeDistillLanguageAllowlist(
-    options.distillTargetLangs ?? distillConfig.targetLangs ?? null
-  );
-  const pairAllowlist = normalizeDistillPairAllowlist(
-    options.distillPairAllowlist ?? distillConfig.pairAllowlist ?? null
-  );
-  const strictPairContract = (
-    options.strictPairContract === true
-    || distillConfig.strictPairContract === true
-  );
-  return {
-    sourceLangs,
-    targetLangs,
-    pairAllowlist,
-    sourceLangSet: sourceLangs ? new Set(sourceLangs) : null,
-    targetLangSet: targetLangs ? new Set(targetLangs) : null,
-    pairAllowlistSet: pairAllowlist ? new Set(pairAllowlist) : null,
-    strictPairContract,
-  };
-}
-function resolveDistillDirection(record) {
-  const pairDirection = normalizePairDirection(record?.pair);
-  if (pairDirection) return pairDirection;
-  const srcLang = normalizeLangCode(record?.src_lang);
-  const tgtLang = normalizeLangCode(record?.tgt_lang || record?.lang);
-  if (srcLang && tgtLang) {
-    return `${srcLang}->${tgtLang}`;
-  }
-  return null;
-}
-function resolveStringCandidate(record, keys) {
-  for (const key of keys) {
-    const value = normalizeOptionalString(record?.[key]);
-    if (value) return value;
-  }
-  return null;
-}
-function encodeDistillRow(record, index, scope = null) {
-  if (!record || typeof record !== 'object') return null;
-  const source = resolveStringCandidate(record, ['source', 'query']);
-  const targetPos = resolveStringCandidate(record, ['target_pos', 'target', 'pos']);
-  const targetNeg = resolveStringCandidate(record, ['target_neg', 'neg']);
-  if (!source || !targetPos) return null;
-  const sourceLangRaw = normalizeLangCode(record?.src_lang);
-  const targetLangRaw = normalizeLangCode(record?.tgt_lang || record?.lang);
-  const pairDirection = normalizePairDirection(record?.pair);
-  const sourceTargetDirection = (
-    sourceLangRaw && targetLangRaw
-      ? `${sourceLangRaw}->${targetLangRaw}`
-      : null
-  );
-  if (scope?.strictPairContract === true) {
-    if (!sourceLangRaw || !targetLangRaw) {
-      throw new Error('strictPairContract requires src_lang and tgt_lang/lang on each row.');
-    }
-    if (!pairDirection) {
-      throw new Error('strictPairContract requires pair on each row.');
-    }
-    if (pairDirection !== sourceTargetDirection) {
-      throw new Error(`pair "${record?.pair}" does not match src/tgt "${sourceLangRaw}-${targetLangRaw}".`);
-    }
-  }
-  const direction = pairDirection || sourceTargetDirection || resolveDistillDirection(record) || 'unknown';
-  const [directionSourceLang, directionTargetLang] = String(direction).split('->');
-  const sourceLang = sourceLangRaw || normalizeLangCode(directionSourceLang);
-  const targetLang = targetLangRaw || normalizeLangCode(directionTargetLang);
-  if (scope?.sourceLangSet && (!sourceLang || !scope.sourceLangSet.has(sourceLang))) {
-    return null;
-  }
-  if (scope?.targetLangSet && (!targetLang || !scope.targetLangSet.has(targetLang))) {
-    return null;
-  }
-  if (scope?.pairAllowlistSet && !scope.pairAllowlistSet.has(direction)) {
-    return null;
-  }
-  return {
-    index,
-    direction,
-    sourceLang: sourceLang || null,
-    targetLang: targetLang || null,
-    source,
-    targetPos,
-    targetNeg: targetNeg || null,
-  };
-}
-function summarizeDirectionCounts(samples) {
-  const counts = {};
-  for (const sample of samples) {
-    const key = sample?.direction || 'unknown';
-    counts[key] = (counts[key] || 0) + 1;
-  }
-  return counts;
-}
-function resolveLanguageName(langCode) {
-  const normalized = normalizeLangCode(langCode);
-  if (normalized === 'en') return 'English';
-  if (normalized === 'es') return 'Spanish';
-  return normalized || 'target';
-}
-function buildDistillPrompt(sample) {
-  const direction = String(sample?.direction || '').trim();
-  const [srcCodeRaw, tgtCodeRaw] = direction.split('->');
-  const srcCode = normalizeLangCode(srcCodeRaw) || srcCodeRaw || 'source';
-  const tgtCode = normalizeLangCode(tgtCodeRaw) || tgtCodeRaw || 'target';
-  const srcName = resolveLanguageName(srcCode);
-  const tgtName = resolveLanguageName(tgtCode);
-  const source = String(sample?.source || '').trim();
-  return `Translate from ${srcName} to ${tgtName}:\n${source}\nTranslation:`;
-}
-function buildDistillCandidatePrompt(sample, candidate) {
-  const base = buildDistillPrompt(sample);
-  const text = String(candidate || '').trim();
-  return text ? `${base} ${text}` : base;
-}
+export { buildDistillPrompt, resolveDistillDataScope };
 function toFiniteNumber(value, fallback) {
   const parsed = Number(value);
@@ -328,7 +143,7 @@ function clampDistillTopK(value) {
   return Math.max(2, Math.min(256, parsed));
 }
-function normalizeDistillStudentGraphMode(value) {
+export function normalizeDistillStudentGraphMode(value) {
   const normalized = normalizeOptionalString(value);
   if (!normalized) return DISTILL_STUDENT_GRAPH_FULL;
   const compact = normalized.toLowerCase().replace(/[-\s]/g, '_');
@@ -605,7 +420,7 @@ function createDistillTensorDataset(samples, options = {}) {
   };
 }
-async function loadDistillDatasetFromJsonl(datasetPath, scopeOptions = null) {
+export async function loadDistillDatasetFromJsonl(datasetPath, scopeOptions = null) {
   const normalizedPath = normalizeDistillDatasetPath(datasetPath);
   if (!normalizedPath) return null;
   if (!isNodeRuntime()) {
@@ -820,7 +635,7 @@ async function initializeInferenceFromStore(modelId) {
   return { pipeline, manifest };
 }
-async function loadDistillModelHandle(modelRef, role, loadOptions = {}) {
+export async function loadDistillModelHandle(modelRef, role, loadOptions = {}) {
   const normalizedRef = normalizeOptionalString(modelRef);
   if (!normalizedRef) {
     throw new Error(`Distill ${role} model reference is required.`);
@@ -876,7 +691,7 @@ function resolveDistillModelRefs(options = {}, trainingConfig = null) {
   };
 }
-async function createDistillRuntimeContext(options = {}, trainingConfig = null) {
+export async function createDistillRuntimeContext(options = {}, trainingConfig = null) {
   const { teacherModelRef, studentModelRef } = resolveDistillModelRefs(options, trainingConfig);
   if (!teacherModelRef || !studentModelRef) {
     throw new Error('Distill stage requires teacherModelId and studentModelId.');
@@ -967,7 +782,7 @@ async function ensureTrainingGpuRuntime() {
   await initDevice();
 }
-function createToyModelFixture(overrides = {}) {
+export function createToyModelFixture(overrides = {}) {
   const config = createTrainingConfig({
     ...overrides,
     training: {
@@ -1040,770 +855,7 @@ function createToyModelFixture(overrides = {}) {
   };
 }
-function resolveTensorDtype(value, fallback = 'f32') {
-  const dtype = isWeightBuffer(value)
-    ? value.dtype
-    : (value?.dtype || getWeightDtype(value) || null);
-  const normalized = String(dtype || '').toLowerCase();
-  return normalized === 'f16' ? 'f16' : (normalized === 'f32' ? 'f32' : fallback);
-}
-async function ensureTrainableTensor(value, shape, label, ownedTrainables = null) {
-  if (!value) {
-    throw new Error(`Distill full-graph student missing required weight "${label}".`);
-  }
-  const registerOwned = (tensor) => {
-    if (ownedTrainables instanceof Set && tensor?.buffer instanceof GPUBuffer) {
-      ownedTrainables.add(tensor);
-    }
-    return tensor;
-  };
-  if (isWeightBuffer(value)) {
-    if (value.dtype === 'f32') {
-      return value;
-    }
-    if (value.dtype === 'f16') {
-      const sourceShape = Array.isArray(value.shape) && value.shape.length > 0 ? value.shape : [...shape];
-      const source = createTensor(value.buffer, 'f16', sourceShape, `${label}_source_f16`);
-      const promoted = await castF16ToF32(source);
-      return registerOwned(createTensor(promoted.buffer, 'f32', sourceShape, `${label}_trainable_f32`));
-    }
-    throw new Error(`Distill full-graph student weight "${label}" uses unsupported dtype "${value.dtype}".`);
-  }
-  if (value instanceof GPUBuffer) {
-    const sourceShape = [...shape];
-    const rawDtype = String(getBufferDtype(value) || 'f32').toLowerCase();
-    const dtype = rawDtype === 'f16' ? 'f16' : 'f32';
-    const tensor = createTensor(value, dtype, sourceShape, label);
-    if (dtype === 'f16') {
-      const promoted = await castF16ToF32(tensor);
-      return registerOwned(createTensor(promoted.buffer, 'f32', sourceShape, `${label}_trainable_f32`));
-    }
-    return tensor;
-  }
-  if (isCpuWeightBuffer(value)) {
-    const sourceShape = Array.isArray(value.shape) && value.shape.length > 0 ? value.shape : [...shape];
-    const dtype = resolveTensorDtype(value, 'f32');
-    if (dtype === 'f32') {
-      const tensor = makeTensorFromFloat32(value.data, sourceShape, `${label}_cpu_f32`);
-      return registerOwned(tensor);
-    }
-    if (dtype === 'f16') {
-      let raw = null;
-      if (value.data instanceof Uint16Array) {
-        raw = value.data;
-      } else if (ArrayBuffer.isView(value.data)) {
-        raw = new Uint16Array(
-          value.data.buffer,
-          value.data.byteOffset,
-          Math.floor(value.data.byteLength / 2)
-        );
-      } else if (value.data instanceof ArrayBuffer) {
-        raw = new Uint16Array(value.data);
-      }
-      if (!raw) {
-        throw new Error(`Distill full-graph student weight "${label}" has non-typed f16 CPU data.`);
-      }
-      const source = makeTensorFromF16Bits(raw, sourceShape, `${label}_cpu_f16`);
-      const promoted = await castF16ToF32(source);
-      releaseTensor(source);
-      return registerOwned(createTensor(promoted.buffer, 'f32', sourceShape, `${label}_trainable_f32`));
-    }
-    throw new Error(`Distill full-graph student weight "${label}" has unsupported CPU dtype "${dtype}".`);
-  }
-  if (value.buffer instanceof GPUBuffer) {
-    const resolvedShape = Array.isArray(value.shape) && value.shape.length > 0 ? value.shape : [...shape];
-    const tensor = createTensor(
-      value.buffer,
-      resolveTensorDtype(value, 'f32'),
-      resolvedShape,
-      label
-    );
-    if (tensor.dtype === 'f16') {
-      const promoted = await castF16ToF32(tensor);
-      return registerOwned(createTensor(promoted.buffer, 'f32', resolvedShape, `${label}_trainable_f32`));
-    }
-    return tensor;
-  }
-  throw new Error(`Distill full-graph student weight "${label}" is not GPU-resident.`);
-}
-async function ensureNormTensor(value, hiddenSize, label, ownedTrainables = null) {
-  return ensureTrainableTensor(value, [hiddenSize], label, ownedTrainables);
-}
-function hasTensorPayload(value) {
-  if (!value) return false;
-  if (value instanceof GPUBuffer) return true;
-  if (isWeightBuffer(value) || isCpuWeightBuffer(value)) return true;
-  if (value?.buffer instanceof GPUBuffer) return true;
-  if (ArrayBuffer.isView(value) || Array.isArray(value)) return true;
-  return false;
-}
-async function fuseGateUpTensors(gateTensor, upTensor, intermediateSize, hiddenSize, label, ownedTrainables = null) {
-  const device = getDevice();
-  if (!device) {
-    throw new Error('Distill full-graph student requires active GPU device.');
-  }
-  if (gateTensor?.dtype !== 'f32' || upTensor?.dtype !== 'f32') {
-    throw new Error(`Distill fused gate_up expects f32 tensors for "${label}".`);
-  }
-  const expectedRows = intermediateSize;
-  const expectedCols = hiddenSize;
-  const gateRows = Number.isFinite(gateTensor?.shape?.[0]) ? gateTensor.shape[0] : 0;
-  const gateCols = Number.isFinite(gateTensor?.shape?.[1]) ? gateTensor.shape[1] : 0;
-  const upRows = Number.isFinite(upTensor?.shape?.[0]) ? upTensor.shape[0] : 0;
-  const upCols = Number.isFinite(upTensor?.shape?.[1]) ? upTensor.shape[1] : 0;
-  if (gateRows !== expectedRows || gateCols !== expectedCols || upRows !== expectedRows || upCols !== expectedCols) {
-    throw new Error(
-      `Distill gate/up shape mismatch for "${label}": gate=[${gateRows},${gateCols}] up=[${upRows},${upCols}] ` +
-      `expected=[${expectedRows},${expectedCols}]`
-    );
-  }
-  const rowBytes = expectedCols * 4;
-  const blockBytes = expectedRows * rowBytes;
-  const fusedBuffer = acquireBuffer(blockBytes * 2, undefined, `${label}_fused`);
-  const encoder = device.createCommandEncoder();
-  encoder.copyBufferToBuffer(gateTensor.buffer, 0, fusedBuffer, 0, blockBytes);
-  encoder.copyBufferToBuffer(upTensor.buffer, 0, fusedBuffer, blockBytes, blockBytes);
-  device.queue.submit([encoder.finish()]);
-  const fused = createTensor(fusedBuffer, 'f32', [expectedRows * 2, expectedCols], `${label}_fused`);
-  if (ownedTrainables instanceof Set) {
-    ownedTrainables.add(fused);
-  }
-  return fused;
-}
-function resolvePhasePrompts(batch, phase) {
-  const distill = batch?.distill || {};
-  const prompts = phase === 'positive'
-    ? distill.tripletPositivePrompts
-    : (phase === 'negative' ? distill.tripletNegativePrompts : distill.prompts);
-  if (!Array.isArray(prompts) || prompts.length === 0) {
-    throw new Error(`Distill student fixture requires distill prompts for phase "${phase}".`);
-  }
-  return prompts;
-}
-function createRowSliceTensor(inputTensor, rows, cols, rowIndex, label) {
-  const device = getDevice();
-  if (!device) {
-    throw new Error('Distill full-graph student requires active GPU device.');
-  }
-  const dtype = inputTensor?.dtype === 'f16' ? 'f16' : 'f32';
-  const bytesPerElement = dtype === 'f16' ? 2 : 4;
-  const rowBytes = cols * bytesPerElement;
-  const clampedRow = Math.max(0, Math.min(rows - 1, rowIndex));
-  const outputBuffer = acquireBuffer(rowBytes, undefined, label);
-  const encoder = device.createCommandEncoder();
-  encoder.copyBufferToBuffer(
-    inputTensor.buffer,
-    clampedRow * rowBytes,
-    outputBuffer,
-    0,
-    rowBytes
-  );
-  device.queue.submit([encoder.finish()]);
-  return createTensor(outputBuffer, dtype, [1, cols], label);
-}
-function createDistillStudentProjectionModelFixture(overrides = {}, options = {}) {
-  const distillRuntime = options.distillRuntime && typeof options.distillRuntime === 'object'
-    ? options.distillRuntime
-    : null;
-  if (!distillRuntime?.studentPipeline) {
-    throw new Error('Distill student fixture requires distillRuntime.studentPipeline.');
-  }
-  const outputDim = clampDistillTopK(
-    options.outputDim
-    ?? options.inputDim
-    ?? DISTILL_ADAPTER_TOP_K
-  );
-  const inferredEmbeddingDim = Math.floor(
-    Number(distillRuntime.studentPipeline?.modelConfig?.hiddenSize)
-  );
-  const embeddingDim = Number.isInteger(options.embeddingDim) && options.embeddingDim > 0
-    ? options.embeddingDim
-    : (Number.isFinite(inferredEmbeddingDim) && inferredEmbeddingDim > 0
-      ? inferredEmbeddingDim
-      : outputDim);
-  const config = createTrainingConfig({
-    ...overrides,
-    training: {
-      enabled: true,
-      lossScaling: { enabled: false },
-      gradient: { maxNorm: 0 },
-      ...(overrides.training || {}),
-    },
-  });
-  const projectionWeights = new Float32Array(embeddingDim * outputDim);
-  const projectionWeight = makeTensorFromFloat32(
-    projectionWeights,
-    [embeddingDim, outputDim],
-    'distill_student_head_weight'
-  );
-  const temporaryInputs = new Set();
-  async function projectEmbeddingInput(inputTensor, tape) {
-    const rows = Number.isFinite(inputTensor?.shape?.[0]) ? inputTensor.shape[0] : 1;
-    return tape.record(
-      OpType.MATMUL,
-      (a, b) => runMatmul(a, b, rows, outputDim, embeddingDim, { transposeB: false }),
-      [inputTensor, projectionWeight],
-      { M: rows, N: outputDim, K: embeddingDim, transposeB: false }
-    );
-  }
-  async function buildStudentEmbeddingInput(batch, phase = 'anchor') {
-    const distill = batch?.distill || {};
-    const prompts = phase === 'positive'
-      ? distill.tripletPositivePrompts
-      : (phase === 'negative' ? distill.tripletNegativePrompts : distill.prompts);
-    if (!Array.isArray(prompts) || prompts.length === 0) {
-      throw new Error(`Distill student fixture requires distill prompts for phase "${phase}".`);
-    }
-    const rows = prompts.length;
-    const features = new Float32Array(rows * embeddingDim);
-    for (let row = 0; row < rows; row += 1) {
-      const prompt = String(prompts[row] || '').trim();
-      const studentResult = await distillRuntime.studentPipeline.prefillWithEmbedding(prompt, {
-        useChatTemplate: false,
-        embeddingMode: 'last',
-      });
-      try {
-        const studentEmbedding = toFloat32Array(studentResult?.embedding, 'student embedding');
-        const rowOffset = row * embeddingDim;
-        const copyCount = Math.min(embeddingDim, studentEmbedding.length);
-        features.set(studentEmbedding.subarray(0, copyCount), rowOffset);
-      } finally {
-        disposePrefillSnapshot(studentResult);
-        distillRuntime.studentPipeline.reset();
-      }
-    }
-    const inputTensor = makeTensorFromFloat32(
-      features,
-      [rows, embeddingDim],
-      `distill_student_${phase}_embedding`
-    );
-    temporaryInputs.add(inputTensor);
-    return inputTensor;
-  }
-  const model = {
-    async forward(inputTensor, tape) {
-      return projectEmbeddingInput(inputTensor, tape);
-    },
-    async forwardDistill(batch, tape, forwardOptions = {}) {
-      const requestedPhase = String(forwardOptions?.phase || 'anchor').trim();
-      const phase = requestedPhase === 'positive'
-        ? 'positive'
-        : (requestedPhase === 'negative' ? 'negative' : 'anchor');
-      const inputTensor = await buildStudentEmbeddingInput(batch, phase);
-      const logits = await projectEmbeddingInput(inputTensor, tape);
-      return { logits };
-    },
-    cleanupDistillStep() {
-      for (const tensor of temporaryInputs) {
-        releaseTensor(tensor);
-      }
-      temporaryInputs.clear();
-    },
-    loraParams() {
-      return [projectionWeight];
-    },
-    paramGroups() {
-      return {
-        encoder: [],
-        prior: [],
-        decoder: [],
-        base: [projectionWeight],
-        lora: [projectionWeight],
-      };
-    },
-  };
-  return {
-    config,
-    model,
-    outputDim,
-    embeddingDim,
-    cleanup() {
-      model.cleanupDistillStep();
-      releaseTensor(projectionWeight);
-    },
-  };
-}
-async function createDistillStudentTransformerModelFixture(overrides = {}, options = {}) {
-  const distillRuntime = options.distillRuntime && typeof options.distillRuntime === 'object'
-    ? options.distillRuntime
-    : null;
-  const studentPipeline = distillRuntime?.studentPipeline || null;
-  if (!studentPipeline?.modelConfig || !(studentPipeline.weights instanceof Map)) {
-    throw new Error('Distill full-graph student fixture requires loaded student pipeline weights.');
-  }
-  const modelConfig = studentPipeline.modelConfig;
-  const hiddenSize = Math.max(1, Math.floor(Number(modelConfig.hiddenSize) || 0));
-  const intermediateSize = Math.max(1, Math.floor(Number(modelConfig.intermediateSize) || 0));
-  const numLayers = Math.max(1, Math.floor(Number(modelConfig.numLayers) || 0));
-  const numHeads = Math.max(1, Math.floor(Number(modelConfig.numHeads) || 0));
-  const numKVHeads = Math.max(1, Math.floor(Number(modelConfig.numKVHeads || numHeads) || 0));
-  const headDim = Math.max(1, Math.floor(Number(modelConfig.headDim) || 0));
-  const vocabSize = Math.max(1, Math.floor(Number(modelConfig.vocabSize) || 0));
-  const rmsNormEps = Number.isFinite(modelConfig.rmsNormEps) ? modelConfig.rmsNormEps : 1e-6;
-  const hiddenActivation = String(modelConfig.hiddenActivation || 'silu').toLowerCase();
-  const swigluLimit = Number.isFinite(modelConfig.swigluLimit) ? modelConfig.swigluLimit : 0;
-  const useEmbeddingTranspose = modelConfig.embeddingTranspose === true;
-  const tieWordEmbeddings = modelConfig.useTiedEmbeddings === true;
-  const config = createTrainingConfig({
-    ...overrides,
-    training: {
-      enabled: true,
-      lossScaling: { enabled: false },
-      gradient: { maxNorm: 0 },
-      ...(overrides.training || {}),
-    },
-  });
-  const ownedTrainables = new Set();
-  const embeddingWeight = await ensureTrainableTensor(
-    studentPipeline.weights.get('embed'),
-    [vocabSize, hiddenSize],
-    'embed',
-    ownedTrainables
-  );
-  const lmHeadWeight = tieWordEmbeddings
-    ? embeddingWeight
-    : await ensureTrainableTensor(
-      studentPipeline.weights.get('lm_head'),
-      [vocabSize, hiddenSize],
-      'lm_head',
-      ownedTrainables
-    );
-  const finalNormWeight = await ensureNormTensor(
-    studentPipeline.weights.get('final_norm'),
-    hiddenSize,
-    'final_norm',
-    ownedTrainables
-  );
-  const ropeDim = Math.max(1, Math.floor(headDim / 2));
-  const ropeRows = Math.max(1, Math.floor(Number(modelConfig.maxSeqLen) || 1));
-  const ropeCos = await ensureTrainableTensor(
-    createTensor(studentPipeline.ropeFreqsCos, 'f32', [ropeRows, ropeDim], 'rope_cos'),
-    [ropeRows, ropeDim],
-    'rope_cos',
-    ownedTrainables
-  );
-  const ropeSin = await ensureTrainableTensor(
-    createTensor(studentPipeline.ropeFreqsSin, 'f32', [ropeRows, ropeDim], 'rope_sin'),
-    [ropeRows, ropeDim],
-    'rope_sin',
-    ownedTrainables
-  );
-  const layerParams = [];
-  const layers = [];
-  for (let layerIdx = 0; layerIdx < numLayers; layerIdx += 1) {
-    const layerWeights = studentPipeline.weights.get(`layer_${layerIdx}`);
-    if (!layerWeights) {
-      throw new Error(`Distill full-graph student missing layer_${layerIdx} weights.`);
-    }
-    const gateUpWeight = layerWeights.gateUp || layerWeights.ffnGateUp || null;
-    let layerGateUp = null;
-    if (hasTensorPayload(gateUpWeight)) {
-      layerGateUp = await ensureTrainableTensor(
-        gateUpWeight,
-        [intermediateSize * 2, hiddenSize],
-        `layer_${layerIdx}.ffn_gate_up`,
-        ownedTrainables
-      );
-    } else {
-      const gateWeight = layerWeights.gate || layerWeights.ffnGate || null;
-      const upWeight = layerWeights.up || layerWeights.ffnUp || null;
-      if (!hasTensorPayload(gateWeight) || !hasTensorPayload(upWeight)) {
-        throw new Error(
-          `Distill full-graph student missing gate/up projections on layer ${layerIdx}.`
-        );
-      }
-      const gateTensor = await ensureTrainableTensor(
-        gateWeight,
-        [intermediateSize, hiddenSize],
-        `layer_${layerIdx}.ffn_gate`,
-        ownedTrainables
-      );
-      const upTensor = await ensureTrainableTensor(
-        upWeight,
-        [intermediateSize, hiddenSize],
-        `layer_${layerIdx}.ffn_up`,
-        ownedTrainables
-      );
-      layerGateUp = await fuseGateUpTensors(
-        gateTensor,
-        upTensor,
-        intermediateSize,
-        hiddenSize,
-        `layer_${layerIdx}.ffn_gate_up`,
-        ownedTrainables
-      );
-    }
-    const layer = {
-      inputNorm: await ensureNormTensor(
-        layerWeights.inputNorm,
-        hiddenSize,
-        `layer_${layerIdx}.input_norm`,
-        ownedTrainables
-      ),
-      qProj: await ensureTrainableTensor(
-        layerWeights.qProj,
-        [numHeads * headDim, hiddenSize],
-        `layer_${layerIdx}.q_proj`,
-        ownedTrainables
-      ),
-      kProj: await ensureTrainableTensor(
-        layerWeights.kProj,
-        [numKVHeads * headDim, hiddenSize],
-        `layer_${layerIdx}.k_proj`,
-        ownedTrainables
-      ),
-      vProj: await ensureTrainableTensor(
-        layerWeights.vProj,
-        [numKVHeads * headDim, hiddenSize],
-        `layer_${layerIdx}.v_proj`,
-        ownedTrainables
-      ),
-      oProj: await ensureTrainableTensor(
-        layerWeights.oProj,
-        [hiddenSize, hiddenSize],
-        `layer_${layerIdx}.o_proj`,
-        ownedTrainables
-      ),
-      postAttentionNorm: layerWeights.postAttentionNorm
-        ? await ensureNormTensor(
-          layerWeights.postAttentionNorm,
-          hiddenSize,
-          `layer_${layerIdx}.post_attention_norm`,
-          ownedTrainables
-        )
-        : null,
-      gateUp: layerGateUp,
-      down: await ensureTrainableTensor(
-        layerWeights.down || layerWeights.ffnDown,
-        [hiddenSize, intermediateSize],
-        `layer_${layerIdx}.ffn_down`,
-        ownedTrainables
-      ),
-    };
-    layers.push(layer);
-    layerParams.push(layer.inputNorm, layer.qProj, layer.kProj, layer.vProj, layer.oProj, layer.gateUp, layer.down);
-    if (layer.postAttentionNorm) {
-      layerParams.push(layer.postAttentionNorm);
-    }
-  }
-  const encoderParams = [embeddingWeight, ...layerParams];
-  const decoderParams = [finalNormWeight, lmHeadWeight];
-  const baseParams = [...encoderParams, ...decoderParams];
-  const temporaryInputs = new Set();
-  async function buildPromptTokens(prompt) {
-    const normalized = String(prompt || '').trim();
-    if (!normalized) {
-      throw new Error('Distill full-graph student prompt is empty.');
-    }
-    const tokenIds = studentPipeline.tokenizer.encode(normalized);
-    if (!Array.isArray(tokenIds) || tokenIds.length === 0) {
-      throw new Error('Distill full-graph student tokenizer produced no tokens.');
-    }
-    const tokenTensor = makeTensorFromUint32(
-      tokenIds,
-      [tokenIds.length],
-      'distill_student_prompt_tokens'
-    );
-    temporaryInputs.add(tokenTensor);
-    return { tokenTensor, seqLen: tokenIds.length };
-  }
-  async function runTransformerPrompt(prompt, tape) {
-    const { tokenTensor, seqLen } = await buildPromptTokens(prompt);
-    let hidden = await tape.record(
-      OpType.EMBED,
-      (indices, embeddings) => runGather(
-        indices,
-        embeddings,
-        seqLen,
-        hiddenSize,
-        vocabSize,
-        {
-          embeddingDtype: resolveTensorDtype(embeddingWeight, 'f32'),
-          outputDtype: 'f32',
-          transpose: useEmbeddingTranspose,
-        }
-      ),
-      [tokenTensor, embeddingWeight],
-      {
-        numTokens: seqLen,
-        hiddenSize,
-        vocabSize,
-        transpose: useEmbeddingTranspose,
-        indexOffset: 0,
-      }
-    );
-    for (let layerIdx = 0; layerIdx < layers.length; layerIdx += 1) {
-      const layer = layers[layerIdx];
-      const normed = await tape.record(
-        OpType.RMSNORM,
-        (x, gamma) => runRMSNorm(x, gamma, rmsNormEps, {
-          batchSize: seqLen,
-          hiddenSize,
-          rmsNormWeightOffset: modelConfig.rmsNormWeightOffset === true,
-        }),
-        [hidden, layer.inputNorm],
-        { numTokens: seqLen, hiddenSize, eps: rmsNormEps }
-      );
-      const q2d = await tape.record(
-        OpType.MATMUL,
-        (x, w) => runMatmul(x, w, seqLen, numHeads * headDim, hiddenSize, {
-          transposeB: 'auto',
-          outputDtype: 'f32',
-        }),
-        [normed, layer.qProj],
-        { M: seqLen, N: numHeads * headDim, K: hiddenSize, transposeB: 'auto' }
-      );
-      const k2d = await tape.record(
-        OpType.MATMUL,
-        (x, w) => runMatmul(x, w, seqLen, numKVHeads * headDim, hiddenSize, {
-          transposeB: 'auto',
-          outputDtype: 'f32',
-        }),
-        [normed, layer.kProj],
-        { M: seqLen, N: numKVHeads * headDim, K: hiddenSize, transposeB: 'auto' }
-      );
-      const v2d = await tape.record(
-        OpType.MATMUL,
-        (x, w) => runMatmul(x, w, seqLen, numKVHeads * headDim, hiddenSize, {
-          transposeB: 'auto',
-          outputDtype: 'f32',
-        }),
-        [normed, layer.vProj],
-        { M: seqLen, N: numKVHeads * headDim, K: hiddenSize, transposeB: 'auto' }
-      );
-      const q3d = createTensor(q2d.buffer, q2d.dtype, [seqLen, numHeads, headDim], `layer_${layerIdx}_q`);
-      const k3d = createTensor(k2d.buffer, k2d.dtype, [seqLen, numKVHeads, headDim], `layer_${layerIdx}_k`);
-      const v3d = createTensor(v2d.buffer, v2d.dtype, [seqLen, numKVHeads, headDim], `layer_${layerIdx}_v`);
-      const qRope = await tape.record(
-        OpType.ROPE,
-        (q, cos, sin) => runRoPE(q, cos, sin, seqLen, { numHeads, headDim, startPos: 0 }),
-        [q3d, ropeCos, ropeSin],
-        { seqLen, numHeads, headDim, startPos: 0 }
-      );
-      const kRope = await tape.record(
-        OpType.ROPE,
-        (k, cos, sin) => runRoPE(k, cos, sin, seqLen, { numHeads: numKVHeads, headDim, startPos: 0 }),
-        [k3d, ropeCos, ropeSin],
-        { seqLen, numHeads: numKVHeads, headDim, startPos: 0 }
-      );
-      const attention = await tape.record(
-        OpType.ATTENTION,
-        (q, k, v) => runAttention(q, k, v, null, numHeads, headDim, {
-          seqLen,
-          kvLen: seqLen,
-          numKVHeads,
-          causal: true,
-          startPos: 0,
-          scale: 1 / Math.sqrt(headDim),
-        }),
-        [qRope, kRope, v3d],
-        { seqLen, numHeads, headDim, scale: 1 / Math.sqrt(headDim), causal: true, recomputeForward: true }
-      );
-      const attention2d = createTensor(
-        attention.buffer,
-        attention.dtype,
-        [seqLen, hiddenSize],
-        `layer_${layerIdx}_attn_2d`
-      );
-      const attentionOutput = await tape.record(
-        OpType.MATMUL,
-        (x, w) => runMatmul(x, w, seqLen, hiddenSize, hiddenSize, {
-          transposeB: 'auto',
-          outputDtype: 'f32',
-        }),
-        [attention2d, layer.oProj],
-        { M: seqLen, N: hiddenSize, K: hiddenSize, transposeB: 'auto' }
-      );
-      const postAttention = await tape.record(
-        OpType.RESIDUAL_ADD,
-        (a, b) => runResidualAdd(a, b, seqLen * hiddenSize),
-        [attentionOutput, hidden],
-        { size: seqLen * hiddenSize }
-      );
-      const ffnInput = layer.postAttentionNorm
-        ? await tape.record(
-          OpType.RMSNORM,
-          (x, gamma) => runRMSNorm(x, gamma, rmsNormEps, {
-            batchSize: seqLen,
-            hiddenSize,
-            rmsNormWeightOffset: modelConfig.rmsNormWeightOffset === true,
-          }),
-          [postAttention, layer.postAttentionNorm],
-          { numTokens: seqLen, hiddenSize, eps: rmsNormEps }
-        )
-        : postAttention;
-      const gateUp = await tape.record(
-        OpType.MATMUL,
-        (x, w) => runMatmul(x, w, seqLen, intermediateSize * 2, hiddenSize, {
-          transposeB: 'auto',
-          outputDtype: 'f32',
-        }),
-        [ffnInput, layer.gateUp],
-        { M: seqLen, N: intermediateSize * 2, K: hiddenSize, transposeB: 'auto' }
-      );
-      const activated = await tape.record(
-        OpType.SILU_ROWSPLIT,
-        (x) => runSiLURowSplit(x, {
-          numTokens: seqLen,
-          dim: intermediateSize,
-          activation: hiddenActivation === 'gelu' ? 'gelu' : 'silu',
-          swigluLimit: hiddenActivation === 'gelu' ? null : swigluLimit,
-        }),
-        [gateUp],
-        {
-          numTokens: seqLen,
-          dim: intermediateSize,
-          activation: hiddenActivation === 'gelu' ? 'gelu' : 'silu',
-          swigluLimit: hiddenActivation === 'gelu' ? 0 : swigluLimit,
-        }
-      );
-      const ffnOutput = await tape.record(
-        OpType.MATMUL,
-        (x, w) => runMatmul(x, w, seqLen, hiddenSize, intermediateSize, {
-          transposeB: 'auto',
-          outputDtype: 'f32',
-        }),
-        [activated, layer.down],
-        { M: seqLen, N: hiddenSize, K: intermediateSize, transposeB: 'auto' }
-      );
-      hidden = await tape.record(
-        OpType.RESIDUAL_ADD,
-        (a, b) => runResidualAdd(a, b, seqLen * hiddenSize),
-        [ffnOutput, postAttention],
-        { size: seqLen * hiddenSize }
-      );
-    }
-    const finalHidden = await tape.record(
-      OpType.RMSNORM,
-      (x, gamma) => runRMSNorm(x, gamma, rmsNormEps, {
-        batchSize: seqLen,
-        hiddenSize,
-        rmsNormWeightOffset: modelConfig.rmsNormWeightOffset === true,
-      }),
-      [hidden, finalNormWeight],
-      { numTokens: seqLen, hiddenSize, eps: rmsNormEps }
-    );
-    const lastHidden = await tape.record(
-      OpType.ROW_SLICE,
-      (x) => createRowSliceTensor(x, seqLen, hiddenSize, seqLen - 1, 'distill_last_hidden'),
-      [finalHidden],
-      { rows: seqLen, cols: hiddenSize, rowIndex: seqLen - 1 }
-    );
-    return tape.record(
-      OpType.MATMUL,
-      (x, w) => runMatmul(x, w, 1, vocabSize, hiddenSize, {
-        transposeB: 'auto',
-        outputDtype: 'f32',
-      }),
-      [lastHidden, lmHeadWeight],
-      { M: 1, N: vocabSize, K: hiddenSize, transposeB: 'auto' }
-    );
-  }
-  const model = {
-    async forward(inputTensor, tape) {
-      return tape.record(
-        OpType.MATMUL,
-        (x, w) => runMatmul(x, w, 1, vocabSize, hiddenSize, {
-          transposeB: 'auto',
-          outputDtype: 'f32',
-        }),
-        [inputTensor, lmHeadWeight],
-        { M: 1, N: vocabSize, K: hiddenSize, transposeB: 'auto' }
-      );
-    },
-    async forwardDistill(batch, tape, forwardOptions = {}) {
-      const requestedPhase = String(forwardOptions?.phase || 'anchor').trim();
-      const phase = requestedPhase === 'positive'
-        ? 'positive'
-        : (requestedPhase === 'negative' ? 'negative' : 'anchor');
-      const prompts = resolvePhasePrompts(batch, phase);
-      if (prompts.length !== 1) {
-        throw new Error(
-          `Distill full-graph student currently requires batchSize=1, got ${prompts.length}.`
-        );
-      }
-      const logits = await runTransformerPrompt(prompts[0], tape);
-      return { logits };
-    },
-    cleanupDistillStep() {
-      for (const tensor of temporaryInputs) {
-        releaseTensor(tensor);
-      }
-      temporaryInputs.clear();
-    },
-    loraParams() {
-      return decoderParams;
-    },
-    paramGroups() {
-      return {
-        encoder: encoderParams,
-        prior: [],
-        decoder: decoderParams,
-        base: baseParams,
-        lora: [],
-      };
-    },
-  };
-  return {
-    config,
-    model,
-    outputDim: vocabSize,
-    embeddingDim: hiddenSize,
-    cleanup() {
-      model.cleanupDistillStep();
-      for (const tensor of ownedTrainables) {
-        releaseTensor(tensor);
-      }
-      ownedTrainables.clear();
-    },
-  };
-}
-async function createDistillStudentRuntimeModelFixture(overrides = {}, options = {}) {
-  const distillRuntime = options.distillRuntime && typeof options.distillRuntime === 'object'
-    ? options.distillRuntime
-    : null;
-  const graphMode = normalizeDistillStudentGraphMode(
-    options.studentGraphMode
-    ?? distillRuntime?.studentGraphMode
-    ?? overrides?.training?.distill?.studentGraphMode
-  );
-  if (graphMode === DISTILL_STUDENT_GRAPH_PROJECTION) {
-    return createDistillStudentProjectionModelFixture(overrides, options);
-  }
-  return createDistillStudentTransformerModelFixture(overrides, options);
-}
+export { createDistillStudentRuntimeModelFixture };
 async function runRunnerSmokeTest() {
   const fixture = createToyModelFixture();
@@ -2085,7 +1137,7 @@ function buildUlTrainingOverrides(options = {}) {
   };
 }
-function buildDistillTrainingOverrides(options = {}) {
+export function buildDistillTrainingOverrides(options = {}) {
   const trainingConfig = normalizeTrainingConfigOverride(options.trainingConfig);
   const explicitStage = normalizeTrainingStage(options.trainingStage || trainingConfig?.distill?.stage);
   const distillEnabled = isDistillStage(explicitStage) || trainingConfig?.distill?.enabled === true;
@@ -2160,22 +1212,6 @@ async function computeNodeFileHash(filePath) {
   };
 }
-async function resolveIsolatedArtifactDir(explicitDir, prefix) {
-  const normalized = normalizeOptionalString(explicitDir);
-  if (normalized) {
-    return normalized;
-  }
-  if (!(typeof process !== 'undefined' && process.versions?.node)) {
-    return null;
-  }
-  const [{ mkdtemp }, { tmpdir }, { join }] = await Promise.all([
-    import('node:fs/promises'),
-    import('node:os'),
-    import('node:path'),
-  ]);
-  return mkdtemp(join(tmpdir(), `doppler-${prefix}-`));
-}
 async function runUlStageTest(stage, options = {}) {
   const ulTraining = buildUlTrainingOverrides({
     ...options,
@@ -2198,7 +1234,9 @@ async function runUlStageTest(stage, options = {}) {
         }
       },
     };
-    const ulArtifactDir = await resolveIsolatedArtifactDir(options.ulArtifactDir, 'ul');
+    const ulArtifactDir = normalizeOptionalString(options.ulArtifactDir)
+      || normalizeOptionalString(fixture.config.training?.ul?.artifactDir)
+      || 'reports/training/ul';
     const metrics = await runner.run(fixture.model, dataset, {
       epochs: 1,
       batchSize: 1,
@@ -2374,7 +1412,9 @@ async function runDistillStageTest(stage, options = {}) {
       distillRuntime,
     });
     const distillRunStartMs = performance.now();
-    const distillArtifactDir = await resolveIsolatedArtifactDir(options.distillArtifactDir, 'distill');
+    const distillArtifactDir = normalizeOptionalString(options.distillArtifactDir)
+      || normalizeOptionalString(fixture.config.training?.distill?.artifactDir)
+      || 'reports/training/distill';
     const metrics = await runner.run(fixture.model, dataset, {
       epochs: 1,
       batchSize: 1,