@simulatte/doppler 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +126 -0
- package/README.md +16 -23
- package/package.json +14 -1
- package/src/adapters/adapter-registry.js +12 -1
- package/src/adapters/lora-loader.js +23 -6
- package/src/bridge/extension-client.d.ts +5 -0
- package/src/bridge/extension-client.js +40 -0
- package/src/bridge/index.d.ts +2 -1
- package/src/bridge/index.js +6 -4
- package/src/browser/browser-converter.js +26 -1
- package/src/browser/file-picker.js +6 -0
- package/src/browser/safetensors-parser-browser.js +84 -1
- package/src/browser/shard-io-browser.js +2 -2
- package/src/browser/tensor-source-download.js +8 -2
- package/src/browser/tensor-source-http.d.ts +1 -0
- package/src/browser/tensor-source-http.js +5 -1
- package/src/client/doppler-api.browser.js +20 -4
- package/src/client/doppler-api.js +19 -3
- package/src/client/doppler-provider/generation.js +12 -0
- package/src/client/doppler-provider/model-manager.d.ts +10 -0
- package/src/client/doppler-provider/model-manager.js +91 -19
- package/src/client/doppler-provider/source-runtime.d.ts +2 -1
- package/src/client/doppler-provider/source-runtime.js +132 -13
- package/src/client/doppler-registry.json +8 -7
- package/src/config/backward-registry-loader.js +17 -2
- package/src/config/execution-v0-contract-check.js +113 -15
- package/src/config/kernel-path-contract-check.js +57 -29
- package/src/config/kernel-path-loader.js +5 -36
- package/src/config/kernels/kernel-ref-digests.js +1 -1
- package/src/config/kernels/registry.js +14 -1
- package/src/config/kernels/registry.json +7 -5
- package/src/config/loader.d.ts +1 -1
- package/src/config/loader.js +12 -2
- package/src/config/merge-contract-check.js +59 -4
- package/src/config/merge-helpers.js +128 -7
- package/src/config/merge.d.ts +1 -0
- package/src/config/merge.js +10 -0
- package/src/config/param-validator.js +47 -2
- package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
- package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/registry.json +29 -8
- package/src/config/presets/models/gemma2.json +2 -2
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
- package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
- package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
- package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
- package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
- package/src/config/runtime.js +6 -1
- package/src/config/schema/debug.schema.d.ts +5 -0
- package/src/config/schema/doppler.schema.js +16 -21
- package/src/config/schema/inference-defaults.schema.js +3 -3
- package/src/config/schema/kernel-path.schema.d.ts +5 -1
- package/src/config/schema/kernel-thresholds.schema.js +12 -4
- package/src/config/schema/manifest.schema.d.ts +2 -1
- package/src/config/schema/manifest.schema.js +16 -3
- package/src/config/training-defaults.js +30 -22
- package/src/converter/conversion-plan.js +94 -9
- package/src/converter/core.d.ts +7 -0
- package/src/converter/core.js +14 -9
- package/src/converter/execution-v0-manifest.js +4 -1
- package/src/converter/index.d.ts +1 -0
- package/src/converter/index.js +1 -0
- package/src/converter/manifest-inference.js +43 -12
- package/src/converter/parsers/diffusion.js +0 -3
- package/src/converter/quantization-info.js +35 -15
- package/src/converter/shard-packer.d.ts +1 -1
- package/src/converter/shard-packer.js +4 -1
- package/src/debug/config.js +123 -11
- package/src/debug/signals.js +7 -1
- package/src/debug/tensor.d.ts +2 -0
- package/src/debug/tensor.js +13 -2
- package/src/distribution/p2p-control-plane.js +52 -12
- package/src/distribution/p2p-observability.js +43 -7
- package/src/distribution/p2p-webrtc-browser.js +20 -0
- package/src/distribution/shard-delivery.js +77 -26
- package/src/formats/gguf/types.js +33 -16
- package/src/formats/rdrr/groups.d.ts +12 -4
- package/src/formats/rdrr/groups.js +3 -6
- package/src/formats/rdrr/parsing.js +39 -2
- package/src/formats/rdrr/types.d.ts +2 -1
- package/src/gpu/command-recorder.js +86 -61
- package/src/gpu/device.d.ts +1 -0
- package/src/gpu/device.js +73 -19
- package/src/gpu/kernel-tuner/benchmarks.js +326 -316
- package/src/gpu/kernel-tuner/cache.js +71 -4
- package/src/gpu/kernel-tuner/tuner.js +22 -4
- package/src/gpu/kernels/attention.js +15 -34
- package/src/gpu/kernels/backward/adam.js +62 -58
- package/src/gpu/kernels/backward/attention_backward.js +257 -169
- package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
- package/src/gpu/kernels/cast.js +191 -149
- package/src/gpu/kernels/check-stop.js +33 -44
- package/src/gpu/kernels/conv2d.js +27 -17
- package/src/gpu/kernels/cross_entropy_loss.js +21 -15
- package/src/gpu/kernels/depthwise_conv2d.js +36 -26
- package/src/gpu/kernels/dequant.js +178 -126
- package/src/gpu/kernels/energy.d.ts +3 -21
- package/src/gpu/kernels/energy.js +111 -88
- package/src/gpu/kernels/feature-check.js +1 -1
- package/src/gpu/kernels/fused_ffn.js +84 -65
- package/src/gpu/kernels/fused_matmul_residual.js +56 -33
- package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
- package/src/gpu/kernels/gather.js +33 -15
- package/src/gpu/kernels/gelu.js +19 -11
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
- package/src/gpu/kernels/groupnorm.js +34 -23
- package/src/gpu/kernels/kv-quantize.js +5 -2
- package/src/gpu/kernels/layernorm.js +35 -19
- package/src/gpu/kernels/logit-merge.js +5 -3
- package/src/gpu/kernels/matmul.js +58 -39
- package/src/gpu/kernels/modulate.js +23 -15
- package/src/gpu/kernels/moe.js +221 -175
- package/src/gpu/kernels/pixel_shuffle.js +22 -14
- package/src/gpu/kernels/relu.js +18 -10
- package/src/gpu/kernels/repeat_channels.js +25 -17
- package/src/gpu/kernels/residual.js +37 -27
- package/src/gpu/kernels/rmsnorm.js +57 -41
- package/src/gpu/kernels/rope.js +3 -0
- package/src/gpu/kernels/sample.js +27 -38
- package/src/gpu/kernels/sana_linear_attention.js +18 -10
- package/src/gpu/kernels/scale.js +18 -11
- package/src/gpu/kernels/shader-cache.js +4 -2
- package/src/gpu/kernels/silu.js +120 -72
- package/src/gpu/kernels/softmax.js +44 -25
- package/src/gpu/kernels/split_qkv.js +23 -13
- package/src/gpu/kernels/transpose.js +18 -10
- package/src/gpu/kernels/transpose.wgsl +5 -3
- package/src/gpu/kernels/upsample2d.js +21 -13
- package/src/gpu/kernels/utils.js +20 -13
- package/src/gpu/partitioned-buffer-pool.js +10 -2
- package/src/gpu/perf-guards.js +2 -9
- package/src/gpu/profiler.js +27 -22
- package/src/gpu/readback-utils.d.ts +16 -0
- package/src/gpu/readback-utils.js +41 -0
- package/src/gpu/submit-tracker.js +13 -0
- package/src/gpu/uniform-cache.d.ts +1 -0
- package/src/gpu/uniform-cache.js +30 -9
- package/src/hotswap/intent-bundle.js +6 -0
- package/src/hotswap/manifest.d.ts +10 -1
- package/src/hotswap/manifest.js +12 -2
- package/src/hotswap/runtime.js +30 -8
- package/src/index-browser.d.ts +44 -0
- package/src/index-browser.js +14 -0
- package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
- package/src/inference/browser-harness-contract-helpers.js +28 -0
- package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
- package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
- package/src/inference/browser-harness-model-helpers.d.ts +16 -0
- package/src/inference/browser-harness-model-helpers.js +217 -0
- package/src/inference/browser-harness-report-helpers.d.ts +7 -0
- package/src/inference/browser-harness-report-helpers.js +42 -0
- package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
- package/src/inference/browser-harness-runtime-helpers.js +415 -0
- package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
- package/src/inference/browser-harness-suite-helpers.js +268 -0
- package/src/inference/browser-harness-text-helpers.d.ts +27 -0
- package/src/inference/browser-harness-text-helpers.js +788 -0
- package/src/inference/browser-harness.d.ts +6 -0
- package/src/inference/browser-harness.js +130 -1996
- package/src/inference/kv-cache/base.js +140 -94
- package/src/inference/kv-cache/tiered.js +5 -3
- package/src/inference/moe-router.js +88 -56
- package/src/inference/multi-model-network.js +5 -3
- package/src/inference/network-evolution.d.ts +11 -2
- package/src/inference/network-evolution.js +20 -21
- package/src/inference/pipelines/context.d.ts +3 -0
- package/src/inference/pipelines/context.js +142 -2
- package/src/inference/pipelines/diffusion/helpers.js +7 -2
- package/src/inference/pipelines/diffusion/pipeline.js +2 -1
- package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
- package/src/inference/pipelines/diffusion/vae.js +3 -7
- package/src/inference/pipelines/energy/pipeline.js +27 -21
- package/src/inference/pipelines/energy/quintel.d.ts +5 -0
- package/src/inference/pipelines/energy/quintel.js +11 -0
- package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
- package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
- package/src/inference/pipelines/text/attention/projections.js +151 -101
- package/src/inference/pipelines/text/attention/record.js +62 -8
- package/src/inference/pipelines/text/attention/run.js +62 -8
- package/src/inference/pipelines/text/config.js +3 -4
- package/src/inference/pipelines/text/embed.js +2 -8
- package/src/inference/pipelines/text/execution-plan.js +41 -19
- package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
- package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
- package/src/inference/pipelines/text/execution-v0.js +62 -1013
- package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
- package/src/inference/pipelines/text/generator-steps.js +298 -207
- package/src/inference/pipelines/text/generator.js +6 -23
- package/src/inference/pipelines/text/init.js +78 -20
- package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
- package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
- package/src/inference/pipelines/text/kernel-trace.js +6 -0
- package/src/inference/pipelines/text/layer.js +3 -9
- package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
- package/src/inference/pipelines/text/linear-attention.js +80 -6
- package/src/inference/pipelines/text/logits/gpu.js +10 -5
- package/src/inference/pipelines/text/logits/index.js +10 -11
- package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
- package/src/inference/pipelines/text/logits/utils.js +9 -0
- package/src/inference/pipelines/text/lora-apply.js +50 -32
- package/src/inference/pipelines/text/model-load.js +279 -104
- package/src/inference/pipelines/text/moe-cache.js +5 -4
- package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
- package/src/inference/pipelines/text/moe-cpu.js +42 -38
- package/src/inference/pipelines/text/moe-gpu.js +110 -86
- package/src/inference/pipelines/text/ops.js +90 -90
- package/src/inference/pipelines/text/probes.js +9 -9
- package/src/inference/pipelines/text/weights.js +17 -7
- package/src/inference/pipelines/text.js +13 -1
- package/src/inference/speculative.d.ts +2 -2
- package/src/inference/speculative.js +4 -18
- package/src/inference/test-harness.d.ts +1 -1
- package/src/inference/test-harness.js +15 -5
- package/src/inference/tokenizer.d.ts +0 -5
- package/src/inference/tokenizer.js +4 -23
- package/src/inference/tokenizers/bpe.js +9 -0
- package/src/inference/tokenizers/bundled.js +20 -0
- package/src/inference/tokenizers/sentencepiece.js +12 -0
- package/src/loader/doppler-loader.js +38 -22
- package/src/loader/dtype-utils.js +3 -44
- package/src/loader/embedding-loader.js +7 -3
- package/src/loader/experts/expert-cache.js +13 -6
- package/src/loader/experts/expert-loader.js +10 -6
- package/src/loader/final-weights-loader.js +8 -4
- package/src/loader/layer-loader.js +2 -1
- package/src/loader/loader-state.js +2 -2
- package/src/loader/memory-monitor.js +8 -0
- package/src/loader/multi-model-loader.d.ts +14 -0
- package/src/loader/multi-model-loader.js +70 -24
- package/src/loader/shard-cache.js +81 -12
- package/src/loader/shard-resolver.js +25 -3
- package/src/loader/tensors/tensor-loader.js +209 -144
- package/src/loader/tensors/tensor-reader.js +76 -19
- package/src/loader/weight-downcast.js +1 -1
- package/src/memory/buffer-pool.d.ts +9 -1
- package/src/memory/buffer-pool.js +109 -44
- package/src/memory/unified-detect.js +1 -1
- package/src/rules/inference/kernel-path.rules.json +24 -8
- package/src/rules/rule-registry.js +25 -1
- package/src/storage/backends/opfs-store.js +68 -24
- package/src/storage/downloader.js +364 -83
- package/src/storage/index.d.ts +3 -0
- package/src/storage/index.js +3 -0
- package/src/storage/preflight.d.ts +2 -2
- package/src/storage/preflight.js +24 -2
- package/src/storage/quickstart-downloader.js +11 -5
- package/src/storage/registry.js +10 -4
- package/src/storage/reports.js +1 -1
- package/src/storage/shard-manager.d.ts +15 -1
- package/src/storage/shard-manager.js +51 -3
- package/src/storage/source-artifact-store.d.ts +52 -0
- package/src/storage/source-artifact-store.js +234 -0
- package/src/tooling/command-api-constants.d.ts +9 -0
- package/src/tooling/command-api-constants.js +9 -0
- package/src/tooling/command-api-family-normalizers.d.ts +9 -0
- package/src/tooling/command-api-family-normalizers.js +343 -0
- package/src/tooling/command-api-helpers.d.ts +25 -0
- package/src/tooling/command-api-helpers.js +262 -0
- package/src/tooling/command-api.js +16 -602
- package/src/tooling/command-envelope.js +4 -1
- package/src/tooling/command-runner-shared.js +52 -18
- package/src/tooling/lean-execution-contract.js +150 -3
- package/src/tooling/node-browser-command-runner.js +161 -271
- package/src/tooling/node-command-runner.js +29 -3
- package/src/tooling/node-converter.js +27 -1
- package/src/tooling/node-source-runtime.d.ts +1 -1
- package/src/tooling/node-source-runtime.js +84 -3
- package/src/tooling/node-webgpu.js +24 -21
- package/src/tooling/opfs-cache.js +21 -4
- package/src/tooling/runtime-input-composition.d.ts +38 -0
- package/src/tooling/runtime-input-composition.js +86 -0
- package/src/tooling/source-runtime-bundle.d.ts +40 -5
- package/src/tooling/source-runtime-bundle.js +261 -34
- package/src/tooling/source-runtime-materializer.d.ts +6 -0
- package/src/tooling/source-runtime-materializer.js +93 -0
- package/src/training/attention-backward.js +32 -17
- package/src/training/autograd.js +80 -52
- package/src/training/checkpoint-watch.d.ts +2 -1
- package/src/training/checkpoint-watch.js +39 -6
- package/src/training/checkpoint.js +40 -11
- package/src/training/clip.js +2 -1
- package/src/training/datasets/token-batch.js +20 -8
- package/src/training/distillation/checkpoint-watch.js +1 -0
- package/src/training/distillation/student-fixture.d.ts +22 -0
- package/src/training/distillation/student-fixture.js +846 -0
- package/src/training/distillation/suite-data.d.ts +45 -0
- package/src/training/distillation/suite-data.js +189 -0
- package/src/training/lora-pipeline.js +4 -7
- package/src/training/lora.js +26 -12
- package/src/training/loss.js +5 -6
- package/src/training/objectives/cross_entropy.js +2 -5
- package/src/training/objectives/distill_kd.js +4 -8
- package/src/training/objectives/distill_triplet.js +4 -8
- package/src/training/objectives/ul_stage2_base.js +4 -8
- package/src/training/operator-command.js +2 -0
- package/src/training/optimizer.js +19 -7
- package/src/training/runner.js +2 -1
- package/src/training/suite.js +18 -978
- package/src/training/tensor-factory.d.ts +9 -0
- package/src/training/tensor-factory.js +13 -0
- package/src/training/trainer.js +3 -5
- package/src/training/ul_dataset.js +3 -5
- package/src/training/workloads.js +70 -79
- package/src/version.js +1 -1
- package/tools/convert-safetensors-node.js +22 -16
- package/tools/doppler-cli.js +44 -25
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
import { getDevice } from '../../gpu/device.js';
|
|
4
4
|
import { allowReadback } from '../../gpu/perf-guards.js';
|
|
5
5
|
import { log } from '../../debug/index.js';
|
|
6
|
+
import { readBuffer } from '../../memory/buffer-pool.js';
|
|
6
7
|
import {
|
|
7
8
|
isContiguousLayer,
|
|
8
9
|
isPagedLayer,
|
|
@@ -815,8 +816,52 @@ export class KVCache {
|
|
|
815
816
|
}
|
|
816
817
|
}
|
|
817
818
|
|
|
819
|
+
_destroyGpuBuffer(buffer) {
|
|
820
|
+
if (!buffer) return;
|
|
821
|
+
try {
|
|
822
|
+
buffer.destroy();
|
|
823
|
+
} catch {
|
|
824
|
+
// Ignore already-destroyed buffers during rollback.
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
_snapshotLayerGpuState(layer) {
|
|
829
|
+
return {
|
|
830
|
+
keysGPU: layer.keysGPU ?? null,
|
|
831
|
+
valuesGPU: layer.valuesGPU ?? null,
|
|
832
|
+
pageTableGPU: layer.pageTableGPU ?? null,
|
|
833
|
+
};
|
|
834
|
+
}
|
|
835
|
+
|
|
836
|
+
_rollbackMigratedLayers(snapshots) {
|
|
837
|
+
for (let l = 0; l < this.numLayers; l++) {
|
|
838
|
+
const layer = this.layers[l];
|
|
839
|
+
const snapshot = snapshots[l];
|
|
840
|
+
if (!snapshot) continue;
|
|
841
|
+
|
|
842
|
+
if (layer.keysGPU && layer.keysGPU !== snapshot.keysGPU) {
|
|
843
|
+
this._destroyGpuBuffer(layer.keysGPU);
|
|
844
|
+
}
|
|
845
|
+
if (layer.valuesGPU && layer.valuesGPU !== snapshot.valuesGPU) {
|
|
846
|
+
this._destroyGpuBuffer(layer.valuesGPU);
|
|
847
|
+
}
|
|
848
|
+
if (layer.pageTableGPU && layer.pageTableGPU !== snapshot.pageTableGPU) {
|
|
849
|
+
this._destroyGpuBuffer(layer.pageTableGPU);
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
layer.keysGPU = snapshot.keysGPU;
|
|
853
|
+
layer.valuesGPU = snapshot.valuesGPU;
|
|
854
|
+
if ('pageTableGPU' in layer) {
|
|
855
|
+
layer.pageTableGPU = snapshot.pageTableGPU;
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
}
|
|
859
|
+
|
|
818
860
|
|
|
819
861
|
_migrateToGPU(device) {
|
|
862
|
+
const snapshots = this.layers.map((layer) => this._snapshotLayerGpuState(layer));
|
|
863
|
+
|
|
864
|
+
try {
|
|
820
865
|
if (this.layout === 'paged') {
|
|
821
866
|
log.info('KVCache', `Migrating ${this.currentSeqLen} positions to GPU (paged)...`);
|
|
822
867
|
const numPages = Math.ceil(this.maxSeqLen / this.pageSize);
|
|
@@ -826,56 +871,66 @@ export class KVCache {
|
|
|
826
871
|
|
|
827
872
|
for (let l = 0; l < this.numLayers; l++) {
|
|
828
873
|
const layer = (this.layers[l]);
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
874
|
+
let keysGPU = null;
|
|
875
|
+
let valuesGPU = null;
|
|
876
|
+
let pageTableGPU = null;
|
|
877
|
+
try {
|
|
878
|
+
keysGPU = device.createBuffer({
|
|
832
879
|
label: `kv_cache_keys_paged_layer_${l}`,
|
|
833
880
|
size: bytesPerLayer,
|
|
834
881
|
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
|
|
835
882
|
});
|
|
836
|
-
|
|
837
|
-
if (!layer.valuesGPU) {
|
|
838
|
-
layer.valuesGPU = device.createBuffer({
|
|
883
|
+
valuesGPU = device.createBuffer({
|
|
839
884
|
label: `kv_cache_values_paged_layer_${l}`,
|
|
840
885
|
size: bytesPerLayer,
|
|
841
886
|
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
|
|
842
887
|
});
|
|
843
|
-
}
|
|
844
888
|
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
889
|
+
if (!layer.pageTable) {
|
|
890
|
+
layer.pageTable = new Uint32Array(numPages);
|
|
891
|
+
for (let i = 0; i < numPages; i++) {
|
|
892
|
+
layer.pageTable[i] = i;
|
|
893
|
+
}
|
|
849
894
|
}
|
|
850
|
-
|
|
851
|
-
if (!layer.pageTableGPU) {
|
|
852
|
-
layer.pageTableGPU = device.createBuffer({
|
|
895
|
+
pageTableGPU = device.createBuffer({
|
|
853
896
|
label: `kv_cache_page_table_layer_${l}`,
|
|
854
897
|
size: pageTableBytes,
|
|
855
898
|
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
|
|
856
899
|
});
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
900
|
+
device.queue.writeBuffer(pageTableGPU, 0, layer.pageTable);
|
|
901
|
+
|
|
902
|
+
const allocatedPages = layer.allocatedPages ?? 0;
|
|
903
|
+
if (allocatedPages > 0) {
|
|
904
|
+
const pageElems = this.pageSize * this.kvSize;
|
|
905
|
+
const pageBytes = pageElems * this.bytesPerElem;
|
|
906
|
+
for (let p = 0; p < allocatedPages; p++) {
|
|
907
|
+
const keyPage = layer.keyPages?.[p];
|
|
908
|
+
const valuePage = layer.valuePages?.[p];
|
|
909
|
+
if (!keyPage || !valuePage) continue;
|
|
910
|
+
const byteOffset = p * pageBytes;
|
|
911
|
+
if (this.kvDtype === 'f16') {
|
|
912
|
+
const keysF16 = f32ToF16Array(keyPage);
|
|
913
|
+
const valuesF16 = f32ToF16Array(valuePage);
|
|
914
|
+
device.queue.writeBuffer(keysGPU, byteOffset, keysF16);
|
|
915
|
+
device.queue.writeBuffer(valuesGPU, byteOffset, valuesF16);
|
|
916
|
+
} else {
|
|
917
|
+
device.queue.writeBuffer(keysGPU, byteOffset, keyPage);
|
|
918
|
+
device.queue.writeBuffer(valuesGPU, byteOffset, valuePage);
|
|
919
|
+
}
|
|
877
920
|
}
|
|
878
921
|
}
|
|
922
|
+
|
|
923
|
+
this._destroyGpuBuffer(layer.keysGPU);
|
|
924
|
+
this._destroyGpuBuffer(layer.valuesGPU);
|
|
925
|
+
this._destroyGpuBuffer(layer.pageTableGPU);
|
|
926
|
+
layer.keysGPU = keysGPU;
|
|
927
|
+
layer.valuesGPU = valuesGPU;
|
|
928
|
+
layer.pageTableGPU = pageTableGPU;
|
|
929
|
+
} catch (error) {
|
|
930
|
+
this._destroyGpuBuffer(keysGPU);
|
|
931
|
+
this._destroyGpuBuffer(valuesGPU);
|
|
932
|
+
this._destroyGpuBuffer(pageTableGPU);
|
|
933
|
+
throw error;
|
|
879
934
|
}
|
|
880
935
|
}
|
|
881
936
|
|
|
@@ -890,53 +945,64 @@ export class KVCache {
|
|
|
890
945
|
|
|
891
946
|
for (let l = 0; l < this.numLayers; l++) {
|
|
892
947
|
const layer = (this.layers[l]);
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
948
|
+
let keysGPU = null;
|
|
949
|
+
let valuesGPU = null;
|
|
950
|
+
try {
|
|
951
|
+
keysGPU = device.createBuffer({
|
|
897
952
|
label: `kv_cache_keys_layer_${l}`,
|
|
898
953
|
size: bytesPerLayer,
|
|
899
954
|
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
|
|
900
955
|
});
|
|
901
|
-
|
|
902
|
-
if (!layer.valuesGPU) {
|
|
903
|
-
layer.valuesGPU = device.createBuffer({
|
|
956
|
+
valuesGPU = device.createBuffer({
|
|
904
957
|
label: `kv_cache_values_layer_${l}`,
|
|
905
958
|
size: bytesPerLayer,
|
|
906
959
|
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
|
|
907
960
|
});
|
|
908
|
-
}
|
|
909
961
|
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
962
|
+
// Upload existing CPU data to GPU
|
|
963
|
+
const usedElems = layer.seqLen * this.kvSize;
|
|
964
|
+
const usedSize = usedElems * this.bytesPerElem;
|
|
965
|
+
if (usedSize > 0) {
|
|
966
|
+
if (this.kvDtype === 'f16') {
|
|
967
|
+
const keysF16 = f32ToF16Array(layer.keys.subarray(0, usedElems));
|
|
968
|
+
const valuesF16 = f32ToF16Array(layer.values.subarray(0, usedElems));
|
|
969
|
+
device.queue.writeBuffer(keysGPU, 0, keysF16);
|
|
970
|
+
device.queue.writeBuffer(valuesGPU, 0, valuesF16);
|
|
971
|
+
} else {
|
|
972
|
+
device.queue.writeBuffer(
|
|
973
|
+
keysGPU,
|
|
974
|
+
0,
|
|
975
|
+
layer.keys.buffer,
|
|
976
|
+
layer.keys.byteOffset,
|
|
977
|
+
usedSize
|
|
978
|
+
);
|
|
979
|
+
device.queue.writeBuffer(
|
|
980
|
+
valuesGPU,
|
|
981
|
+
0,
|
|
982
|
+
layer.values.buffer,
|
|
983
|
+
layer.values.byteOffset,
|
|
984
|
+
usedSize
|
|
985
|
+
);
|
|
986
|
+
}
|
|
934
987
|
}
|
|
988
|
+
|
|
989
|
+
this._destroyGpuBuffer(layer.keysGPU);
|
|
990
|
+
this._destroyGpuBuffer(layer.valuesGPU);
|
|
991
|
+
layer.keysGPU = keysGPU;
|
|
992
|
+
layer.valuesGPU = valuesGPU;
|
|
993
|
+
} catch (error) {
|
|
994
|
+
this._destroyGpuBuffer(keysGPU);
|
|
995
|
+
this._destroyGpuBuffer(valuesGPU);
|
|
996
|
+
throw error;
|
|
935
997
|
}
|
|
936
998
|
}
|
|
937
999
|
|
|
938
1000
|
this.useGPU = true;
|
|
939
1001
|
log.info('KVCache', 'Migration complete');
|
|
1002
|
+
} catch (error) {
|
|
1003
|
+
this._rollbackMigratedLayers(snapshots);
|
|
1004
|
+
throw error;
|
|
1005
|
+
}
|
|
940
1006
|
}
|
|
941
1007
|
|
|
942
1008
|
|
|
@@ -962,44 +1028,24 @@ export class KVCache {
|
|
|
962
1028
|
layer.values = new Float32Array(sizePerLayer);
|
|
963
1029
|
}
|
|
964
1030
|
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
});
|
|
970
|
-
const valuesStaging = device.createBuffer({
|
|
971
|
-
size: usedSize,
|
|
972
|
-
usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST,
|
|
973
|
-
});
|
|
974
|
-
|
|
975
|
-
// Copy from GPU cache to staging
|
|
976
|
-
const encoder = device.createCommandEncoder({ label: 'kv_cache_sync' });
|
|
977
|
-
encoder.copyBufferToBuffer(layer.keysGPU, 0, keysStaging, 0, usedSize);
|
|
978
|
-
encoder.copyBufferToBuffer(layer.valuesGPU, 0, valuesStaging, 0, usedSize);
|
|
979
|
-
device.queue.submit([encoder.finish()]);
|
|
980
|
-
|
|
981
|
-
// Map and copy to CPU arrays
|
|
982
|
-
await keysStaging.mapAsync(GPUMapMode.READ);
|
|
983
|
-
await valuesStaging.mapAsync(GPUMapMode.READ);
|
|
1031
|
+
const [keysBytes, valuesBytes] = await Promise.all([
|
|
1032
|
+
readBuffer(layer.keysGPU, usedSize),
|
|
1033
|
+
readBuffer(layer.valuesGPU, usedSize),
|
|
1034
|
+
]);
|
|
984
1035
|
|
|
985
1036
|
if (this.kvDtype === 'f16') {
|
|
986
|
-
const keysRaw = new Uint16Array(
|
|
987
|
-
const valuesRaw = new Uint16Array(
|
|
1037
|
+
const keysRaw = new Uint16Array(keysBytes);
|
|
1038
|
+
const valuesRaw = new Uint16Array(valuesBytes);
|
|
988
1039
|
const keysData = f16ToF32Array(keysRaw);
|
|
989
1040
|
const valuesData = f16ToF32Array(valuesRaw);
|
|
990
1041
|
layer.keys.set(keysData);
|
|
991
1042
|
layer.values.set(valuesData);
|
|
992
1043
|
} else {
|
|
993
|
-
const keysData = new Float32Array(
|
|
994
|
-
const valuesData = new Float32Array(
|
|
1044
|
+
const keysData = new Float32Array(keysBytes);
|
|
1045
|
+
const valuesData = new Float32Array(valuesBytes);
|
|
995
1046
|
layer.keys.set(keysData);
|
|
996
1047
|
layer.values.set(valuesData);
|
|
997
1048
|
}
|
|
998
|
-
|
|
999
|
-
keysStaging.unmap();
|
|
1000
|
-
valuesStaging.unmap();
|
|
1001
|
-
keysStaging.destroy();
|
|
1002
|
-
valuesStaging.destroy();
|
|
1003
1049
|
}
|
|
1004
1050
|
}
|
|
1005
1051
|
|
|
@@ -60,7 +60,7 @@ export class TieredKVCache {
|
|
|
60
60
|
: (tiering.mode === 'int4' ? 'int4' : 'none');
|
|
61
61
|
this.compression = tiering.compression ?? { mode: defaultCompressionMode, blockSize: 1 };
|
|
62
62
|
|
|
63
|
-
this.gating = tiering.gating ?? { mode: '
|
|
63
|
+
this.gating = tiering.gating ?? { mode: 'force_off', minAluBwRatio: 0.0 };
|
|
64
64
|
|
|
65
65
|
this.currentSeqLen = 0;
|
|
66
66
|
|
|
@@ -145,8 +145,10 @@ export class TieredKVCache {
|
|
|
145
145
|
if (gating?.mode === 'force_off') return 'none';
|
|
146
146
|
if (gating?.mode === 'force_on') return requested;
|
|
147
147
|
if (gating?.mode === 'auto' && gating.minAluBwRatio > 0) {
|
|
148
|
-
|
|
149
|
-
|
|
148
|
+
throw new Error(
|
|
149
|
+
'TieredKVCache auto compression gating requires an explicit measured ALU/BW ratio. ' +
|
|
150
|
+
'Use gating.mode="force_on"/"force_off" or set minAluBwRatio to 0.'
|
|
151
|
+
);
|
|
150
152
|
}
|
|
151
153
|
return requested;
|
|
152
154
|
}
|
|
@@ -8,6 +8,9 @@ import { createTensor } from '../gpu/tensor.js';
|
|
|
8
8
|
import { f16ToF32Array } from './kv-cache/types.js';
|
|
9
9
|
import { selectRuleValue } from '../rules/rule-registry.js';
|
|
10
10
|
|
|
11
|
+
function isGpuBufferInstance(value) {
|
|
12
|
+
return typeof GPUBuffer !== 'undefined' && value instanceof GPUBuffer;
|
|
13
|
+
}
|
|
11
14
|
|
|
12
15
|
|
|
13
16
|
|
|
@@ -84,6 +87,12 @@ export class MoERouter {
|
|
|
84
87
|
|
|
85
88
|
|
|
86
89
|
loadWeights(weights, bias = null) {
|
|
90
|
+
if (this._gateBiasGPU) {
|
|
91
|
+
this._gateBiasGPU.destroy();
|
|
92
|
+
}
|
|
93
|
+
if (this._gateWeightGPU) {
|
|
94
|
+
this._gateWeightGPU.destroy();
|
|
95
|
+
}
|
|
87
96
|
this.gateWeight = weights;
|
|
88
97
|
this.gateBias = bias;
|
|
89
98
|
// Clear cached GPU uploads when swapping router parameters (e.g., per-layer routers).
|
|
@@ -91,13 +100,27 @@ export class MoERouter {
|
|
|
91
100
|
this._gateWeightGPU = null;
|
|
92
101
|
}
|
|
93
102
|
|
|
103
|
+
destroy() {
|
|
104
|
+
if (isGpuBufferInstance(this._gateBiasGPU)) {
|
|
105
|
+
this._gateBiasGPU.destroy();
|
|
106
|
+
}
|
|
107
|
+
if (isGpuBufferInstance(this._gateWeightGPU)) {
|
|
108
|
+
this._gateWeightGPU.destroy();
|
|
109
|
+
}
|
|
110
|
+
this._gateBiasGPU = null;
|
|
111
|
+
this._gateWeightGPU = null;
|
|
112
|
+
this.gateWeight = null;
|
|
113
|
+
this.gateBias = null;
|
|
114
|
+
this._biasAddPipelines.clear();
|
|
115
|
+
}
|
|
116
|
+
|
|
94
117
|
|
|
95
118
|
computeRouterLogitsCPU(hiddenStates, numTokens) {
|
|
96
119
|
if (!this.gateWeight) {
|
|
97
120
|
throw new Error('Router gate weights not loaded');
|
|
98
121
|
}
|
|
99
122
|
|
|
100
|
-
if (this.gateWeight
|
|
123
|
+
if (isGpuBufferInstance(this.gateWeight) || isWeightBuffer(this.gateWeight)) {
|
|
101
124
|
throw new Error('Gate weights are on GPU, use computeRouterLogitsGPU instead');
|
|
102
125
|
}
|
|
103
126
|
|
|
@@ -140,13 +163,18 @@ export class MoERouter {
|
|
|
140
163
|
if (!gateWeightBuffer) {
|
|
141
164
|
throw new Error('Router gate weights not loaded');
|
|
142
165
|
}
|
|
143
|
-
if (!isWeightBuffer(gateWeightBuffer) && !(gateWeightBuffer
|
|
166
|
+
if (!isWeightBuffer(gateWeightBuffer) && !isGpuBufferInstance(gateWeightBuffer)) {
|
|
144
167
|
const uploaded = device.createBuffer({
|
|
145
168
|
label: 'moe_gate_weight',
|
|
146
169
|
size: gateWeightBuffer.byteLength,
|
|
147
170
|
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
|
|
148
171
|
});
|
|
149
|
-
|
|
172
|
+
try {
|
|
173
|
+
device.queue.writeBuffer(uploaded, 0, gateWeightBuffer);
|
|
174
|
+
} catch (error) {
|
|
175
|
+
uploaded.destroy();
|
|
176
|
+
throw error;
|
|
177
|
+
}
|
|
150
178
|
this._gateWeightGPU = uploaded;
|
|
151
179
|
this.gateWeight = uploaded;
|
|
152
180
|
gateWeightBuffer = uploaded;
|
|
@@ -186,7 +214,7 @@ export class MoERouter {
|
|
|
186
214
|
|
|
187
215
|
|
|
188
216
|
async _getGateBiasBuffer(device) {
|
|
189
|
-
if (this.gateBias
|
|
217
|
+
if (isGpuBufferInstance(this.gateBias)) return this.gateBias;
|
|
190
218
|
if (this._gateBiasGPU) return this._gateBiasGPU;
|
|
191
219
|
|
|
192
220
|
if (!(this.gateBias instanceof Float32Array)) {
|
|
@@ -198,7 +226,12 @@ export class MoERouter {
|
|
|
198
226
|
size: this.gateBias.byteLength,
|
|
199
227
|
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
|
|
200
228
|
});
|
|
201
|
-
|
|
229
|
+
try {
|
|
230
|
+
device.queue.writeBuffer(buf, 0, this.gateBias);
|
|
231
|
+
} catch (error) {
|
|
232
|
+
buf.destroy();
|
|
233
|
+
throw error;
|
|
234
|
+
}
|
|
202
235
|
this._gateBiasGPU = buf;
|
|
203
236
|
return buf;
|
|
204
237
|
}
|
|
@@ -206,7 +239,7 @@ export class MoERouter {
|
|
|
206
239
|
|
|
207
240
|
_inferBiasDtype(bias) {
|
|
208
241
|
if (bias instanceof Float32Array) return 'f32';
|
|
209
|
-
if (bias
|
|
242
|
+
if (isGpuBufferInstance(bias)) {
|
|
210
243
|
const bytesPerElement = Math.round(bias.size / this.numExperts);
|
|
211
244
|
return selectRuleValue('inference', 'dtype', 'f16OrF32FromBytes', { bytesPerElement });
|
|
212
245
|
}
|
|
@@ -276,65 +309,64 @@ export class MoERouter {
|
|
|
276
309
|
size: 16,
|
|
277
310
|
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
|
|
278
311
|
});
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
const encoder = device.createCommandEncoder({ label: 'moe_router_bias_add_encoder' });
|
|
291
|
-
const pass = encoder.beginComputePass({ label: 'moe_router_bias_add_pass' });
|
|
292
|
-
pass.setPipeline(pipeline);
|
|
293
|
-
pass.setBindGroup(0, bindGroup);
|
|
294
|
-
const total = numTokens * this.numExperts;
|
|
295
|
-
pass.dispatchWorkgroups(Math.ceil(total / 256));
|
|
296
|
-
pass.end();
|
|
297
|
-
device.queue.submit([encoder.finish()]);
|
|
312
|
+
try {
|
|
313
|
+
device.queue.writeBuffer(uniformBuffer, 0, uniformData);
|
|
314
|
+
|
|
315
|
+
const bindGroup = device.createBindGroup({
|
|
316
|
+
layout: pipeline.getBindGroupLayout(0),
|
|
317
|
+
entries: [
|
|
318
|
+
{ binding: 0, resource: { buffer: uniformBuffer } },
|
|
319
|
+
{ binding: 1, resource: { buffer: logits } },
|
|
320
|
+
{ binding: 2, resource: { buffer: bias } },
|
|
321
|
+
],
|
|
322
|
+
});
|
|
298
323
|
|
|
299
|
-
|
|
324
|
+
const encoder = device.createCommandEncoder({ label: 'moe_router_bias_add_encoder' });
|
|
325
|
+
const pass = encoder.beginComputePass({ label: 'moe_router_bias_add_pass' });
|
|
326
|
+
pass.setPipeline(pipeline);
|
|
327
|
+
pass.setBindGroup(0, bindGroup);
|
|
328
|
+
const total = numTokens * this.numExperts;
|
|
329
|
+
pass.dispatchWorkgroups(Math.ceil(total / 256));
|
|
330
|
+
pass.end();
|
|
331
|
+
device.queue.submit([encoder.finish()]);
|
|
332
|
+
} finally {
|
|
333
|
+
uniformBuffer.destroy();
|
|
334
|
+
}
|
|
300
335
|
}
|
|
301
336
|
|
|
302
337
|
|
|
303
338
|
async routeGPU(hiddenStates, numTokens) {
|
|
304
339
|
// Compute router logits on GPU
|
|
305
340
|
const logitsBuffer = await this.computeRouterLogitsGPU(hiddenStates, numTokens);
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
this.
|
|
329
|
-
this.loadBalanceStats.expertCounts[idx]++;
|
|
341
|
+
try {
|
|
342
|
+
const logitsData = await readBuffer(logitsBuffer);
|
|
343
|
+
const logits = this.lastLogitsDtype === 'f16'
|
|
344
|
+
? f16ToF32Array(new Uint16Array(logitsData))
|
|
345
|
+
: new Float32Array(logitsData);
|
|
346
|
+
|
|
347
|
+
const selections = [];
|
|
348
|
+
this.activeExperts.clear();
|
|
349
|
+
|
|
350
|
+
for (let t = 0; t < numTokens; t++) {
|
|
351
|
+
const tokenLogits = logits.subarray(
|
|
352
|
+
t * this.numExperts,
|
|
353
|
+
(t + 1) * this.numExperts
|
|
354
|
+
);
|
|
355
|
+
|
|
356
|
+
const selection = this.selectExpertsForToken(tokenLogits);
|
|
357
|
+
selections.push(selection);
|
|
358
|
+
|
|
359
|
+
for (const idx of selection.indices) {
|
|
360
|
+
this.activeExperts.add(idx);
|
|
361
|
+
this.loadBalanceStats.expertCounts[idx]++;
|
|
362
|
+
}
|
|
363
|
+
this.loadBalanceStats.totalTokens++;
|
|
330
364
|
}
|
|
331
|
-
this.loadBalanceStats.totalTokens++;
|
|
332
|
-
}
|
|
333
|
-
|
|
334
|
-
// Clean up logits buffer
|
|
335
|
-
releaseBuffer(logitsBuffer);
|
|
336
365
|
|
|
337
|
-
|
|
366
|
+
return selections;
|
|
367
|
+
} finally {
|
|
368
|
+
releaseBuffer(logitsBuffer);
|
|
369
|
+
}
|
|
338
370
|
}
|
|
339
371
|
|
|
340
372
|
|
|
@@ -4,9 +4,10 @@ import { ExpertRouter } from './expert-router.js';
|
|
|
4
4
|
import { MultiModelRecorder } from '../gpu/multi-model-recorder.js';
|
|
5
5
|
import { applyRepetitionPenalty, sample, getTopK } from './pipelines/text/sampling.js';
|
|
6
6
|
import { finalizeLogits, extractLastPositionLogits } from './pipelines/text/logits/index.js';
|
|
7
|
+
import { readBufferWithCleanup } from './pipelines/text/logits/utils.js';
|
|
7
8
|
import { isStopToken } from './pipelines/text/init.js';
|
|
8
9
|
import { mergeMultipleLogits } from '../gpu/kernels/logit-merge.js';
|
|
9
|
-
import { releaseBuffer
|
|
10
|
+
import { releaseBuffer } from '../memory/buffer-pool.js';
|
|
10
11
|
|
|
11
12
|
const MIN_AGREEMENT_WEIGHT = 1e-4;
|
|
12
13
|
|
|
@@ -478,8 +479,9 @@ export class MultiModelNetwork {
|
|
|
478
479
|
if (canMergeOnGpu) {
|
|
479
480
|
const buffers = voterResults.map((result) => result.logitsBuffer);
|
|
480
481
|
const mergedBuffer = await mergeMultipleLogits(buffers, rawVocabSize, normalizedWeights, 1.0);
|
|
481
|
-
const mergedData = await
|
|
482
|
-
|
|
482
|
+
const mergedData = await readBufferWithCleanup(mergedBuffer, rawVocabSize * 4, () => {
|
|
483
|
+
releaseBuffer(mergedBuffer);
|
|
484
|
+
});
|
|
483
485
|
const rawMerged = new Float32Array(mergedData);
|
|
484
486
|
const finalized = await finalizeLogits(
|
|
485
487
|
rawMerged,
|
|
@@ -35,12 +35,21 @@ export interface EvolutionConfig {
|
|
|
35
35
|
generations?: number;
|
|
36
36
|
eliteCount?: number;
|
|
37
37
|
mutationRate?: number;
|
|
38
|
+
random: () => number;
|
|
38
39
|
evaluate: (genome: NetworkGenome) => Promise<number>;
|
|
39
40
|
randomGenome: () => NetworkGenome;
|
|
40
41
|
}
|
|
41
42
|
|
|
42
|
-
export declare const mutateGenome: (
|
|
43
|
+
export declare const mutateGenome: (
|
|
44
|
+
genome: NetworkGenome,
|
|
45
|
+
mutationRate?: number,
|
|
46
|
+
random?: (() => number) | null
|
|
47
|
+
) => NetworkGenome;
|
|
43
48
|
|
|
44
|
-
export declare const crossoverGenome: (
|
|
49
|
+
export declare const crossoverGenome: (
|
|
50
|
+
a: NetworkGenome,
|
|
51
|
+
b: NetworkGenome,
|
|
52
|
+
random?: (() => number) | null
|
|
53
|
+
) => NetworkGenome;
|
|
45
54
|
|
|
46
55
|
export declare function evolveNetwork(config: EvolutionConfig): Promise<NetworkGenome>;
|