@simulatte/doppler 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +126 -0
- package/README.md +16 -23
- package/package.json +14 -1
- package/src/adapters/adapter-registry.js +12 -1
- package/src/adapters/lora-loader.js +23 -6
- package/src/bridge/extension-client.d.ts +5 -0
- package/src/bridge/extension-client.js +40 -0
- package/src/bridge/index.d.ts +2 -1
- package/src/bridge/index.js +6 -4
- package/src/browser/browser-converter.js +26 -1
- package/src/browser/file-picker.js +6 -0
- package/src/browser/safetensors-parser-browser.js +84 -1
- package/src/browser/shard-io-browser.js +2 -2
- package/src/browser/tensor-source-download.js +8 -2
- package/src/browser/tensor-source-http.d.ts +1 -0
- package/src/browser/tensor-source-http.js +5 -1
- package/src/client/doppler-api.browser.js +20 -4
- package/src/client/doppler-api.js +19 -3
- package/src/client/doppler-provider/generation.js +12 -0
- package/src/client/doppler-provider/model-manager.d.ts +10 -0
- package/src/client/doppler-provider/model-manager.js +91 -19
- package/src/client/doppler-provider/source-runtime.d.ts +2 -1
- package/src/client/doppler-provider/source-runtime.js +132 -13
- package/src/client/doppler-registry.json +8 -7
- package/src/config/backward-registry-loader.js +17 -2
- package/src/config/execution-v0-contract-check.js +113 -15
- package/src/config/kernel-path-contract-check.js +57 -29
- package/src/config/kernel-path-loader.js +5 -36
- package/src/config/kernels/kernel-ref-digests.js +1 -1
- package/src/config/kernels/registry.js +14 -1
- package/src/config/kernels/registry.json +7 -5
- package/src/config/loader.d.ts +1 -1
- package/src/config/loader.js +12 -2
- package/src/config/merge-contract-check.js +59 -4
- package/src/config/merge-helpers.js +128 -7
- package/src/config/merge.d.ts +1 -0
- package/src/config/merge.js +10 -0
- package/src/config/param-validator.js +47 -2
- package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
- package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/registry.json +29 -8
- package/src/config/presets/models/gemma2.json +2 -2
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
- package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
- package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
- package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
- package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
- package/src/config/runtime.js +6 -1
- package/src/config/schema/debug.schema.d.ts +5 -0
- package/src/config/schema/doppler.schema.js +16 -21
- package/src/config/schema/inference-defaults.schema.js +3 -3
- package/src/config/schema/kernel-path.schema.d.ts +5 -1
- package/src/config/schema/kernel-thresholds.schema.js +12 -4
- package/src/config/schema/manifest.schema.d.ts +2 -1
- package/src/config/schema/manifest.schema.js +16 -3
- package/src/config/training-defaults.js +30 -22
- package/src/converter/conversion-plan.js +94 -9
- package/src/converter/core.d.ts +7 -0
- package/src/converter/core.js +14 -9
- package/src/converter/execution-v0-manifest.js +4 -1
- package/src/converter/index.d.ts +1 -0
- package/src/converter/index.js +1 -0
- package/src/converter/manifest-inference.js +43 -12
- package/src/converter/parsers/diffusion.js +0 -3
- package/src/converter/quantization-info.js +35 -15
- package/src/converter/shard-packer.d.ts +1 -1
- package/src/converter/shard-packer.js +4 -1
- package/src/debug/config.js +123 -11
- package/src/debug/signals.js +7 -1
- package/src/debug/tensor.d.ts +2 -0
- package/src/debug/tensor.js +13 -2
- package/src/distribution/p2p-control-plane.js +52 -12
- package/src/distribution/p2p-observability.js +43 -7
- package/src/distribution/p2p-webrtc-browser.js +20 -0
- package/src/distribution/shard-delivery.js +77 -26
- package/src/formats/gguf/types.js +33 -16
- package/src/formats/rdrr/groups.d.ts +12 -4
- package/src/formats/rdrr/groups.js +3 -6
- package/src/formats/rdrr/parsing.js +39 -2
- package/src/formats/rdrr/types.d.ts +2 -1
- package/src/gpu/command-recorder.js +86 -61
- package/src/gpu/device.d.ts +1 -0
- package/src/gpu/device.js +73 -19
- package/src/gpu/kernel-tuner/benchmarks.js +326 -316
- package/src/gpu/kernel-tuner/cache.js +71 -4
- package/src/gpu/kernel-tuner/tuner.js +22 -4
- package/src/gpu/kernels/attention.js +15 -34
- package/src/gpu/kernels/backward/adam.js +62 -58
- package/src/gpu/kernels/backward/attention_backward.js +257 -169
- package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
- package/src/gpu/kernels/cast.js +191 -149
- package/src/gpu/kernels/check-stop.js +33 -44
- package/src/gpu/kernels/conv2d.js +27 -17
- package/src/gpu/kernels/cross_entropy_loss.js +21 -15
- package/src/gpu/kernels/depthwise_conv2d.js +36 -26
- package/src/gpu/kernels/dequant.js +178 -126
- package/src/gpu/kernels/energy.d.ts +3 -21
- package/src/gpu/kernels/energy.js +111 -88
- package/src/gpu/kernels/feature-check.js +1 -1
- package/src/gpu/kernels/fused_ffn.js +84 -65
- package/src/gpu/kernels/fused_matmul_residual.js +56 -33
- package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
- package/src/gpu/kernels/gather.js +33 -15
- package/src/gpu/kernels/gelu.js +19 -11
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
- package/src/gpu/kernels/groupnorm.js +34 -23
- package/src/gpu/kernels/kv-quantize.js +5 -2
- package/src/gpu/kernels/layernorm.js +35 -19
- package/src/gpu/kernels/logit-merge.js +5 -3
- package/src/gpu/kernels/matmul.js +58 -39
- package/src/gpu/kernels/modulate.js +23 -15
- package/src/gpu/kernels/moe.js +221 -175
- package/src/gpu/kernels/pixel_shuffle.js +22 -14
- package/src/gpu/kernels/relu.js +18 -10
- package/src/gpu/kernels/repeat_channels.js +25 -17
- package/src/gpu/kernels/residual.js +37 -27
- package/src/gpu/kernels/rmsnorm.js +57 -41
- package/src/gpu/kernels/rope.js +3 -0
- package/src/gpu/kernels/sample.js +27 -38
- package/src/gpu/kernels/sana_linear_attention.js +18 -10
- package/src/gpu/kernels/scale.js +18 -11
- package/src/gpu/kernels/shader-cache.js +4 -2
- package/src/gpu/kernels/silu.js +120 -72
- package/src/gpu/kernels/softmax.js +44 -25
- package/src/gpu/kernels/split_qkv.js +23 -13
- package/src/gpu/kernels/transpose.js +18 -10
- package/src/gpu/kernels/transpose.wgsl +5 -3
- package/src/gpu/kernels/upsample2d.js +21 -13
- package/src/gpu/kernels/utils.js +20 -13
- package/src/gpu/partitioned-buffer-pool.js +10 -2
- package/src/gpu/perf-guards.js +2 -9
- package/src/gpu/profiler.js +27 -22
- package/src/gpu/readback-utils.d.ts +16 -0
- package/src/gpu/readback-utils.js +41 -0
- package/src/gpu/submit-tracker.js +13 -0
- package/src/gpu/uniform-cache.d.ts +1 -0
- package/src/gpu/uniform-cache.js +30 -9
- package/src/hotswap/intent-bundle.js +6 -0
- package/src/hotswap/manifest.d.ts +10 -1
- package/src/hotswap/manifest.js +12 -2
- package/src/hotswap/runtime.js +30 -8
- package/src/index-browser.d.ts +44 -0
- package/src/index-browser.js +14 -0
- package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
- package/src/inference/browser-harness-contract-helpers.js +28 -0
- package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
- package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
- package/src/inference/browser-harness-model-helpers.d.ts +16 -0
- package/src/inference/browser-harness-model-helpers.js +217 -0
- package/src/inference/browser-harness-report-helpers.d.ts +7 -0
- package/src/inference/browser-harness-report-helpers.js +42 -0
- package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
- package/src/inference/browser-harness-runtime-helpers.js +415 -0
- package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
- package/src/inference/browser-harness-suite-helpers.js +268 -0
- package/src/inference/browser-harness-text-helpers.d.ts +27 -0
- package/src/inference/browser-harness-text-helpers.js +788 -0
- package/src/inference/browser-harness.d.ts +6 -0
- package/src/inference/browser-harness.js +130 -1996
- package/src/inference/kv-cache/base.js +140 -94
- package/src/inference/kv-cache/tiered.js +5 -3
- package/src/inference/moe-router.js +88 -56
- package/src/inference/multi-model-network.js +5 -3
- package/src/inference/network-evolution.d.ts +11 -2
- package/src/inference/network-evolution.js +20 -21
- package/src/inference/pipelines/context.d.ts +3 -0
- package/src/inference/pipelines/context.js +142 -2
- package/src/inference/pipelines/diffusion/helpers.js +7 -2
- package/src/inference/pipelines/diffusion/pipeline.js +2 -1
- package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
- package/src/inference/pipelines/diffusion/vae.js +3 -7
- package/src/inference/pipelines/energy/pipeline.js +27 -21
- package/src/inference/pipelines/energy/quintel.d.ts +5 -0
- package/src/inference/pipelines/energy/quintel.js +11 -0
- package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
- package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
- package/src/inference/pipelines/text/attention/projections.js +151 -101
- package/src/inference/pipelines/text/attention/record.js +62 -8
- package/src/inference/pipelines/text/attention/run.js +62 -8
- package/src/inference/pipelines/text/config.js +3 -4
- package/src/inference/pipelines/text/embed.js +2 -8
- package/src/inference/pipelines/text/execution-plan.js +41 -19
- package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
- package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
- package/src/inference/pipelines/text/execution-v0.js +62 -1013
- package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
- package/src/inference/pipelines/text/generator-steps.js +298 -207
- package/src/inference/pipelines/text/generator.js +6 -23
- package/src/inference/pipelines/text/init.js +78 -20
- package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
- package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
- package/src/inference/pipelines/text/kernel-trace.js +6 -0
- package/src/inference/pipelines/text/layer.js +3 -9
- package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
- package/src/inference/pipelines/text/linear-attention.js +80 -6
- package/src/inference/pipelines/text/logits/gpu.js +10 -5
- package/src/inference/pipelines/text/logits/index.js +10 -11
- package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
- package/src/inference/pipelines/text/logits/utils.js +9 -0
- package/src/inference/pipelines/text/lora-apply.js +50 -32
- package/src/inference/pipelines/text/model-load.js +279 -104
- package/src/inference/pipelines/text/moe-cache.js +5 -4
- package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
- package/src/inference/pipelines/text/moe-cpu.js +42 -38
- package/src/inference/pipelines/text/moe-gpu.js +110 -86
- package/src/inference/pipelines/text/ops.js +90 -90
- package/src/inference/pipelines/text/probes.js +9 -9
- package/src/inference/pipelines/text/weights.js +17 -7
- package/src/inference/pipelines/text.js +13 -1
- package/src/inference/speculative.d.ts +2 -2
- package/src/inference/speculative.js +4 -18
- package/src/inference/test-harness.d.ts +1 -1
- package/src/inference/test-harness.js +15 -5
- package/src/inference/tokenizer.d.ts +0 -5
- package/src/inference/tokenizer.js +4 -23
- package/src/inference/tokenizers/bpe.js +9 -0
- package/src/inference/tokenizers/bundled.js +20 -0
- package/src/inference/tokenizers/sentencepiece.js +12 -0
- package/src/loader/doppler-loader.js +38 -22
- package/src/loader/dtype-utils.js +3 -44
- package/src/loader/embedding-loader.js +7 -3
- package/src/loader/experts/expert-cache.js +13 -6
- package/src/loader/experts/expert-loader.js +10 -6
- package/src/loader/final-weights-loader.js +8 -4
- package/src/loader/layer-loader.js +2 -1
- package/src/loader/loader-state.js +2 -2
- package/src/loader/memory-monitor.js +8 -0
- package/src/loader/multi-model-loader.d.ts +14 -0
- package/src/loader/multi-model-loader.js +70 -24
- package/src/loader/shard-cache.js +81 -12
- package/src/loader/shard-resolver.js +25 -3
- package/src/loader/tensors/tensor-loader.js +209 -144
- package/src/loader/tensors/tensor-reader.js +76 -19
- package/src/loader/weight-downcast.js +1 -1
- package/src/memory/buffer-pool.d.ts +9 -1
- package/src/memory/buffer-pool.js +109 -44
- package/src/memory/unified-detect.js +1 -1
- package/src/rules/inference/kernel-path.rules.json +24 -8
- package/src/rules/rule-registry.js +25 -1
- package/src/storage/backends/opfs-store.js +68 -24
- package/src/storage/downloader.js +364 -83
- package/src/storage/index.d.ts +3 -0
- package/src/storage/index.js +3 -0
- package/src/storage/preflight.d.ts +2 -2
- package/src/storage/preflight.js +24 -2
- package/src/storage/quickstart-downloader.js +11 -5
- package/src/storage/registry.js +10 -4
- package/src/storage/reports.js +1 -1
- package/src/storage/shard-manager.d.ts +15 -1
- package/src/storage/shard-manager.js +51 -3
- package/src/storage/source-artifact-store.d.ts +52 -0
- package/src/storage/source-artifact-store.js +234 -0
- package/src/tooling/command-api-constants.d.ts +9 -0
- package/src/tooling/command-api-constants.js +9 -0
- package/src/tooling/command-api-family-normalizers.d.ts +9 -0
- package/src/tooling/command-api-family-normalizers.js +343 -0
- package/src/tooling/command-api-helpers.d.ts +25 -0
- package/src/tooling/command-api-helpers.js +262 -0
- package/src/tooling/command-api.js +16 -602
- package/src/tooling/command-envelope.js +4 -1
- package/src/tooling/command-runner-shared.js +52 -18
- package/src/tooling/lean-execution-contract.js +150 -3
- package/src/tooling/node-browser-command-runner.js +161 -271
- package/src/tooling/node-command-runner.js +29 -3
- package/src/tooling/node-converter.js +27 -1
- package/src/tooling/node-source-runtime.d.ts +1 -1
- package/src/tooling/node-source-runtime.js +84 -3
- package/src/tooling/node-webgpu.js +24 -21
- package/src/tooling/opfs-cache.js +21 -4
- package/src/tooling/runtime-input-composition.d.ts +38 -0
- package/src/tooling/runtime-input-composition.js +86 -0
- package/src/tooling/source-runtime-bundle.d.ts +40 -5
- package/src/tooling/source-runtime-bundle.js +261 -34
- package/src/tooling/source-runtime-materializer.d.ts +6 -0
- package/src/tooling/source-runtime-materializer.js +93 -0
- package/src/training/attention-backward.js +32 -17
- package/src/training/autograd.js +80 -52
- package/src/training/checkpoint-watch.d.ts +2 -1
- package/src/training/checkpoint-watch.js +39 -6
- package/src/training/checkpoint.js +40 -11
- package/src/training/clip.js +2 -1
- package/src/training/datasets/token-batch.js +20 -8
- package/src/training/distillation/checkpoint-watch.js +1 -0
- package/src/training/distillation/student-fixture.d.ts +22 -0
- package/src/training/distillation/student-fixture.js +846 -0
- package/src/training/distillation/suite-data.d.ts +45 -0
- package/src/training/distillation/suite-data.js +189 -0
- package/src/training/lora-pipeline.js +4 -7
- package/src/training/lora.js +26 -12
- package/src/training/loss.js +5 -6
- package/src/training/objectives/cross_entropy.js +2 -5
- package/src/training/objectives/distill_kd.js +4 -8
- package/src/training/objectives/distill_triplet.js +4 -8
- package/src/training/objectives/ul_stage2_base.js +4 -8
- package/src/training/operator-command.js +2 -0
- package/src/training/optimizer.js +19 -7
- package/src/training/runner.js +2 -1
- package/src/training/suite.js +18 -978
- package/src/training/tensor-factory.d.ts +9 -0
- package/src/training/tensor-factory.js +13 -0
- package/src/training/trainer.js +3 -5
- package/src/training/ul_dataset.js +3 -5
- package/src/training/workloads.js +70 -79
- package/src/version.js +1 -1
- package/tools/convert-safetensors-node.js +22 -16
- package/tools/doppler-cli.js +44 -25
|
@@ -16,6 +16,21 @@ import { selectRuleValue } from '../../rules/rule-registry.js';
|
|
|
16
16
|
|
|
17
17
|
let loggedF32UpcastNonMatmul = false;
|
|
18
18
|
|
|
19
|
+
function isGpuBufferInstance(value) {
|
|
20
|
+
return typeof GPUBuffer !== 'undefined' && value instanceof GPUBuffer;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function isReleasableBuffer(value) {
|
|
24
|
+
return typeof value === 'object' && value !== null && 'size' in value;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function releaseOwnedGpuBuffer(buffer, owned) {
|
|
28
|
+
if (!owned || !isReleasableBuffer(buffer)) {
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
releaseBuffer(buffer);
|
|
32
|
+
}
|
|
33
|
+
|
|
19
34
|
function logF32UpcastNonMatmul(name, numElements, bufferSize) {
|
|
20
35
|
if (loggedF32UpcastNonMatmul) {
|
|
21
36
|
return;
|
|
@@ -152,66 +167,80 @@ export function convertF16ToF32CPU(f16Data) {
|
|
|
152
167
|
|
|
153
168
|
export async function loadQ4KFused(shardData, location, name) {
|
|
154
169
|
const device = getDevice();
|
|
155
|
-
const
|
|
170
|
+
const ownsBuffer = !isGpuBufferInstance(shardData);
|
|
171
|
+
const buffer = isGpuBufferInstance(shardData)
|
|
156
172
|
? shardData
|
|
157
173
|
: acquireAlignedBuffer(location.size, `q4k_${name}`);
|
|
158
|
-
|
|
159
|
-
|
|
174
|
+
try {
|
|
175
|
+
if (ownsBuffer) {
|
|
176
|
+
writeBufferAligned(device, buffer, shardData);
|
|
177
|
+
}
|
|
178
|
+
return {
|
|
179
|
+
data: createWeightBuffer(buffer, 'q4k', 'row', location.shape, name),
|
|
180
|
+
allocatedBuffers: [buffer],
|
|
181
|
+
};
|
|
182
|
+
} catch (error) {
|
|
183
|
+
releaseOwnedGpuBuffer(buffer, ownsBuffer);
|
|
184
|
+
throw error;
|
|
160
185
|
}
|
|
161
|
-
|
|
162
|
-
return {
|
|
163
|
-
data: createWeightBuffer(buffer, 'q4k', 'row', location.shape, name),
|
|
164
|
-
allocatedBuffers: [buffer],
|
|
165
|
-
};
|
|
166
186
|
}
|
|
167
187
|
|
|
168
188
|
|
|
169
189
|
export async function loadQ4KDequant(shardData, location, name, config) {
|
|
170
190
|
const device = getDevice();
|
|
171
|
-
|
|
191
|
+
let ownsQuantBuffer = !isGpuBufferInstance(shardData);
|
|
192
|
+
const quantBuffer = isGpuBufferInstance(shardData)
|
|
172
193
|
? shardData
|
|
173
194
|
: acquireAlignedBuffer(location.size, `quant_${name}`);
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
// Check if this is a 2D matrix with K (columns) not aligned to QK_K (256).
|
|
181
|
-
// If so, we need row-wise dequant to produce proper row-major output.
|
|
182
|
-
const is2DMatrix = Array.isArray(location.shape) && location.shape.length === 2;
|
|
183
|
-
const K = is2DMatrix ? location.shape[1] : 0;
|
|
184
|
-
const needsRowwise = is2DMatrix && K > 0 && K % QK_K !== 0;
|
|
195
|
+
let dequantized = null;
|
|
196
|
+
try {
|
|
197
|
+
if (ownsQuantBuffer) {
|
|
198
|
+
writeBufferAligned(device, quantBuffer, shardData);
|
|
199
|
+
}
|
|
185
200
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
const
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
201
|
+
const outputDtype = getQ4KOutputDtype(location, config);
|
|
202
|
+
|
|
203
|
+
const is2DMatrix = Array.isArray(location.shape) && location.shape.length === 2;
|
|
204
|
+
const K = is2DMatrix ? location.shape[1] : 0;
|
|
205
|
+
const needsRowwise = is2DMatrix && K > 0 && K % QK_K !== 0;
|
|
206
|
+
|
|
207
|
+
let dequantizedTensor;
|
|
208
|
+
if (needsRowwise) {
|
|
209
|
+
const rows = location.shape[0];
|
|
210
|
+
debugTrace.loader(
|
|
211
|
+
`Dequantizing ${name} (row-wise): [${rows},${K}], K not 256-aligned, ` +
|
|
212
|
+
`outputDtype=${outputDtype}`
|
|
213
|
+
);
|
|
214
|
+
dequantizedTensor = await dequantizeRowwise(quantBuffer, rows, K, { outputDtype });
|
|
215
|
+
} else {
|
|
216
|
+
const numBlocks = Math.ceil(location.size / Q4K_BLOCK_BYTES);
|
|
217
|
+
debugTrace.loader(
|
|
218
|
+
`Dequantizing ${name}: size=${location.size}, numBlocks=${numBlocks}, ` +
|
|
219
|
+
`outputDtype=${outputDtype}, expectedOutput=${numBlocks * QK_K * (outputDtype === 'f16' ? 2 : 4)}`
|
|
220
|
+
);
|
|
221
|
+
dequantizedTensor = await dequantize(quantBuffer, numBlocks, { outputDtype });
|
|
222
|
+
}
|
|
223
|
+
dequantized = dequantizedTensor.buffer;
|
|
203
224
|
|
|
204
|
-
|
|
205
|
-
|
|
225
|
+
debugTrace.loader(`Dequantized ${name}: resultSize=${dequantized.size}`);
|
|
226
|
+
releaseOwnedGpuBuffer(quantBuffer, ownsQuantBuffer);
|
|
227
|
+
ownsQuantBuffer = false;
|
|
206
228
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
const dtype = outputDtype;
|
|
229
|
+
const layout = getWeightLayout(location, config);
|
|
230
|
+
const dtype = outputDtype;
|
|
210
231
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
232
|
+
return {
|
|
233
|
+
data: createWeightBuffer(dequantized, dtype, layout, location.shape, name),
|
|
234
|
+
allocatedBuffers: [dequantized],
|
|
235
|
+
};
|
|
236
|
+
} catch (error) {
|
|
237
|
+
if (isReleasableBuffer(dequantized)) {
|
|
238
|
+
releaseBuffer(dequantized);
|
|
239
|
+
}
|
|
240
|
+
throw error;
|
|
241
|
+
} finally {
|
|
242
|
+
releaseOwnedGpuBuffer(quantBuffer, ownsQuantBuffer);
|
|
243
|
+
}
|
|
215
244
|
}
|
|
216
245
|
|
|
217
246
|
|
|
@@ -219,97 +248,119 @@ export async function loadQ6K(shardData, location, name) {
|
|
|
219
248
|
const device = getDevice();
|
|
220
249
|
|
|
221
250
|
debugTrace.loader(`Loading Q6_K tensor "${name}", size=${location.size}`);
|
|
222
|
-
|
|
251
|
+
let ownsQuantBuffer = !isGpuBufferInstance(shardData);
|
|
252
|
+
const quantBuffer = isGpuBufferInstance(shardData)
|
|
223
253
|
? shardData
|
|
224
254
|
: acquireAlignedBuffer(location.size, `quant_${name}`);
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
255
|
+
let dequantized = null;
|
|
256
|
+
try {
|
|
257
|
+
if (ownsQuantBuffer) {
|
|
258
|
+
writeBufferAligned(device, quantBuffer, shardData);
|
|
259
|
+
}
|
|
228
260
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
261
|
+
const numBlocks = Math.floor(location.size / Q6K_BLOCK_BYTES);
|
|
262
|
+
debugTrace.loader(
|
|
263
|
+
`Dequantizing Q6_K ${name}: size=${location.size}, numBlocks=${numBlocks}, ` +
|
|
264
|
+
`expectedOutput=${numBlocks * 256 * 2} (f16)`
|
|
265
|
+
);
|
|
234
266
|
|
|
235
|
-
|
|
236
|
-
|
|
267
|
+
const dequantizedTensor = await dequantizeQ6K(quantBuffer, numBlocks, { outputDtype: 'f16' });
|
|
268
|
+
dequantized = dequantizedTensor.buffer;
|
|
237
269
|
|
|
238
|
-
|
|
239
|
-
|
|
270
|
+
debugTrace.loader(`Dequantized Q6_K ${name}: resultSize=${dequantized.size}`);
|
|
271
|
+
releaseOwnedGpuBuffer(quantBuffer, ownsQuantBuffer);
|
|
272
|
+
ownsQuantBuffer = false;
|
|
273
|
+
|
|
274
|
+
const isMatmulWeight = shouldDequantizeToF16(location);
|
|
275
|
+
if (isMatmulWeight) {
|
|
276
|
+
return {
|
|
277
|
+
data: createWeightBuffer(dequantized, 'f16', 'row', location.shape, name),
|
|
278
|
+
allocatedBuffers: [dequantized],
|
|
279
|
+
};
|
|
280
|
+
}
|
|
240
281
|
|
|
241
|
-
const isMatmulWeight = shouldDequantizeToF16(location);
|
|
242
|
-
if (isMatmulWeight) {
|
|
243
282
|
return {
|
|
244
|
-
data:
|
|
283
|
+
data: applyBufferLayout(dequantized, location, 'f16'),
|
|
245
284
|
allocatedBuffers: [dequantized],
|
|
246
285
|
};
|
|
286
|
+
} catch (error) {
|
|
287
|
+
if (isReleasableBuffer(dequantized)) {
|
|
288
|
+
releaseBuffer(dequantized);
|
|
289
|
+
}
|
|
290
|
+
throw error;
|
|
291
|
+
} finally {
|
|
292
|
+
releaseOwnedGpuBuffer(quantBuffer, ownsQuantBuffer);
|
|
247
293
|
}
|
|
248
|
-
|
|
249
|
-
return {
|
|
250
|
-
data: applyBufferLayout(dequantized, location, 'f16'),
|
|
251
|
-
allocatedBuffers: [dequantized],
|
|
252
|
-
};
|
|
253
294
|
}
|
|
254
295
|
|
|
255
296
|
|
|
256
297
|
export async function loadBF16(shardData, location, name, config) {
|
|
257
298
|
const device = getDevice();
|
|
258
|
-
|
|
299
|
+
let ownsSrcBuffer = !isGpuBufferInstance(shardData);
|
|
300
|
+
const srcBuffer = isGpuBufferInstance(shardData)
|
|
259
301
|
? shardData
|
|
260
302
|
: acquireAlignedBuffer(location.size, `${name}_bf16`);
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
const caps = config.gpuCapabilities || getKernelCapabilities();
|
|
267
|
-
const isMatmulWeight = shouldDequantizeToF16(location);
|
|
303
|
+
let resultBuffer = null;
|
|
304
|
+
try {
|
|
305
|
+
if (ownsSrcBuffer) {
|
|
306
|
+
writeBufferAligned(device, srcBuffer, shardData);
|
|
307
|
+
}
|
|
268
308
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
const
|
|
272
|
-
releaseBuffer(srcBuffer);
|
|
273
|
-
debugTrace.loader(`BF16->F16 for matmul weight: ${name} (${numElements} elements)`);
|
|
309
|
+
const numElements = location.size / 2;
|
|
310
|
+
const caps = config.gpuCapabilities || getKernelCapabilities();
|
|
311
|
+
const isMatmulWeight = shouldDequantizeToF16(location);
|
|
274
312
|
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
data: createWeightBuffer(f16Tensor.buffer, 'f16', layout, location.shape, name),
|
|
282
|
-
allocatedBuffers: [f16Tensor.buffer],
|
|
283
|
-
};
|
|
284
|
-
}
|
|
313
|
+
if (caps?.hasF16 && isMatmulWeight) {
|
|
314
|
+
const f16Tensor = await runBF16ToF16(srcBuffer, [numElements], name);
|
|
315
|
+
resultBuffer = f16Tensor.buffer;
|
|
316
|
+
releaseOwnedGpuBuffer(srcBuffer, ownsSrcBuffer);
|
|
317
|
+
ownsSrcBuffer = false;
|
|
318
|
+
debugTrace.loader(`BF16->F16 for matmul weight: ${name} (${numElements} elements)`);
|
|
285
319
|
|
|
286
|
-
// Standard path: BF16 -> F32
|
|
287
|
-
const dstBuffer = await convertBF16ToF32GPU(srcBuffer, numElements, name);
|
|
288
|
-
releaseBuffer(srcBuffer);
|
|
289
|
-
|
|
290
|
-
if (dstBuffer instanceof GPUBuffer) {
|
|
291
|
-
if (isMatmulWeight) {
|
|
292
|
-
|
|
293
320
|
const layout = selectRuleValue('loader', 'weights', 'weightLayout', {
|
|
294
321
|
layout: location.layout ?? null,
|
|
295
322
|
useColumnWise: false,
|
|
296
323
|
});
|
|
297
324
|
return {
|
|
298
|
-
data: createWeightBuffer(
|
|
325
|
+
data: createWeightBuffer(f16Tensor.buffer, 'f16', layout, location.shape, name),
|
|
326
|
+
allocatedBuffers: [f16Tensor.buffer],
|
|
327
|
+
};
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
const dstBuffer = await convertBF16ToF32GPU(srcBuffer, numElements, name);
|
|
331
|
+
resultBuffer = dstBuffer;
|
|
332
|
+
releaseOwnedGpuBuffer(srcBuffer, ownsSrcBuffer);
|
|
333
|
+
ownsSrcBuffer = false;
|
|
334
|
+
|
|
335
|
+
if (isGpuBufferInstance(dstBuffer)) {
|
|
336
|
+
if (isMatmulWeight) {
|
|
337
|
+
const layout = selectRuleValue('loader', 'weights', 'weightLayout', {
|
|
338
|
+
layout: location.layout ?? null,
|
|
339
|
+
useColumnWise: false,
|
|
340
|
+
});
|
|
341
|
+
return {
|
|
342
|
+
data: createWeightBuffer(dstBuffer, 'f32', layout, location.shape, name),
|
|
343
|
+
allocatedBuffers: [dstBuffer],
|
|
344
|
+
};
|
|
345
|
+
}
|
|
346
|
+
return {
|
|
347
|
+
data: applyBufferLayout(dstBuffer, location, 'f32'),
|
|
299
348
|
allocatedBuffers: [dstBuffer],
|
|
300
349
|
};
|
|
301
350
|
}
|
|
351
|
+
|
|
302
352
|
return {
|
|
303
|
-
data:
|
|
304
|
-
allocatedBuffers: [
|
|
353
|
+
data: dstBuffer,
|
|
354
|
+
allocatedBuffers: [],
|
|
305
355
|
};
|
|
356
|
+
} catch (error) {
|
|
357
|
+
if (isReleasableBuffer(resultBuffer)) {
|
|
358
|
+
releaseBuffer(resultBuffer);
|
|
359
|
+
}
|
|
360
|
+
throw error;
|
|
361
|
+
} finally {
|
|
362
|
+
releaseOwnedGpuBuffer(srcBuffer, ownsSrcBuffer);
|
|
306
363
|
}
|
|
307
|
-
|
|
308
|
-
// Float32Array returned (shouldn't happen in GPU path)
|
|
309
|
-
return {
|
|
310
|
-
data: dstBuffer,
|
|
311
|
-
allocatedBuffers: [],
|
|
312
|
-
};
|
|
313
364
|
}
|
|
314
365
|
|
|
315
366
|
|
|
@@ -318,55 +369,69 @@ export async function loadFloat(shardData, location, name, config) {
|
|
|
318
369
|
throw new Error('Tensor load config is required.');
|
|
319
370
|
}
|
|
320
371
|
const device = getDevice();
|
|
321
|
-
|
|
372
|
+
let ownsBuffer = !isGpuBufferInstance(shardData);
|
|
373
|
+
const buffer = isGpuBufferInstance(shardData)
|
|
322
374
|
? shardData
|
|
323
375
|
: acquireAlignedBuffer(location.size, name);
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
locationDtype: location.dtype,
|
|
330
|
-
});
|
|
331
|
-
const layout = selectRuleValue('loader', 'weights', 'weightLayout', {
|
|
332
|
-
layout: location.layout ?? null,
|
|
333
|
-
useColumnWise: false,
|
|
334
|
-
});
|
|
335
|
-
const isMatmulWeight = shouldDequantizeToF16(location);
|
|
376
|
+
let resultBuffer = null;
|
|
377
|
+
try {
|
|
378
|
+
if (ownsBuffer) {
|
|
379
|
+
writeBufferAligned(device, buffer, shardData);
|
|
380
|
+
}
|
|
336
381
|
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
382
|
+
const dtype = selectRuleValue('loader', 'weights', 'floatLocationDtype', {
|
|
383
|
+
locationDtype: location.dtype,
|
|
384
|
+
});
|
|
385
|
+
const layout = selectRuleValue('loader', 'weights', 'weightLayout', {
|
|
386
|
+
layout: location.layout ?? null,
|
|
387
|
+
useColumnWise: false,
|
|
388
|
+
});
|
|
389
|
+
const isMatmulWeight = shouldDequantizeToF16(location);
|
|
344
390
|
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
if (config.allowF32UpcastNonMatmul === false) {
|
|
391
|
+
if (isMatmulWeight) {
|
|
392
|
+
ownsBuffer = false;
|
|
348
393
|
return {
|
|
349
|
-
data:
|
|
394
|
+
data: createWeightBuffer(buffer, dtype, layout, location.shape, name),
|
|
350
395
|
allocatedBuffers: [buffer],
|
|
351
396
|
};
|
|
352
397
|
}
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
398
|
+
|
|
399
|
+
if (dtype === 'f16') {
|
|
400
|
+
if (config.allowF32UpcastNonMatmul === false) {
|
|
401
|
+
ownsBuffer = false;
|
|
402
|
+
return {
|
|
403
|
+
data: applyBufferLayout(buffer, location, 'f16'),
|
|
404
|
+
allocatedBuffers: [buffer],
|
|
405
|
+
};
|
|
406
|
+
}
|
|
407
|
+
const numElements = location.shape.reduce((a, b) => a * b, 1);
|
|
408
|
+
logF32UpcastNonMatmul(name, numElements, buffer.size);
|
|
409
|
+
debugTrace.loader(`F16->F32 upcast for non-matmul: ${name} (${numElements} elements, bufSize=${buffer.size})`);
|
|
410
|
+
const inputTensor = createTensor(buffer, 'f16', [numElements], `${name}_f16`);
|
|
411
|
+
const f32Tensor = await castF16ToF32(inputTensor);
|
|
412
|
+
resultBuffer = f32Tensor.buffer;
|
|
413
|
+
debugTrace.loader(`F16->F32 complete: ${name} resultSize=${f32Tensor.buffer.size}`);
|
|
414
|
+
releaseOwnedGpuBuffer(buffer, ownsBuffer);
|
|
415
|
+
ownsBuffer = false;
|
|
416
|
+
return {
|
|
417
|
+
data: applyBufferLayout(f32Tensor.buffer, location, 'f32'),
|
|
418
|
+
allocatedBuffers: [f32Tensor.buffer],
|
|
419
|
+
};
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
ownsBuffer = false;
|
|
360
423
|
return {
|
|
361
|
-
data: applyBufferLayout(
|
|
362
|
-
allocatedBuffers: [
|
|
424
|
+
data: applyBufferLayout(buffer, location, dtype),
|
|
425
|
+
allocatedBuffers: [buffer],
|
|
363
426
|
};
|
|
427
|
+
} catch (error) {
|
|
428
|
+
if (isReleasableBuffer(resultBuffer)) {
|
|
429
|
+
releaseBuffer(resultBuffer);
|
|
430
|
+
}
|
|
431
|
+
throw error;
|
|
432
|
+
} finally {
|
|
433
|
+
releaseOwnedGpuBuffer(buffer, ownsBuffer);
|
|
364
434
|
}
|
|
365
|
-
|
|
366
|
-
return {
|
|
367
|
-
data: applyBufferLayout(buffer, location, dtype),
|
|
368
|
-
allocatedBuffers: [buffer],
|
|
369
|
-
};
|
|
370
435
|
}
|
|
371
436
|
|
|
372
437
|
// ============================================================================
|
|
@@ -2,30 +2,84 @@
|
|
|
2
2
|
|
|
3
3
|
import { trace } from '../../debug/index.js';
|
|
4
4
|
|
|
5
|
+
function resolveSpanShardIndex(span, name, spanIndex) {
|
|
6
|
+
const shardIndex = typeof span?.shardIndex === 'number'
|
|
7
|
+
? span.shardIndex
|
|
8
|
+
: span?.shard;
|
|
9
|
+
if (!Number.isInteger(shardIndex) || shardIndex < 0) {
|
|
10
|
+
throw new Error(
|
|
11
|
+
`[DopplerLoader] Tensor "${name}" span[${spanIndex}] has invalid shard index.`
|
|
12
|
+
);
|
|
13
|
+
}
|
|
14
|
+
return shardIndex;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
function validateSpanField(value, field, name, spanIndex) {
|
|
18
|
+
if (!Number.isInteger(value) || value < 0) {
|
|
19
|
+
throw new Error(
|
|
20
|
+
`[DopplerLoader] Tensor "${name}" span[${spanIndex}] has invalid ${field}.`
|
|
21
|
+
);
|
|
22
|
+
}
|
|
23
|
+
return value;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function getLocationSpans(location) {
|
|
27
|
+
if (!Array.isArray(location?.spans) || location.spans.length === 0) {
|
|
28
|
+
return null;
|
|
29
|
+
}
|
|
30
|
+
return location.spans;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function resolveLocationShardIndex(location, name) {
|
|
34
|
+
const shardIndex = typeof location?.shardIndex === 'number'
|
|
35
|
+
? location.shardIndex
|
|
36
|
+
: location?.shard;
|
|
37
|
+
if (!Number.isInteger(shardIndex) || shardIndex < 0) {
|
|
38
|
+
throw new Error(`[DopplerLoader] Tensor "${name}" has invalid shard index.`);
|
|
39
|
+
}
|
|
40
|
+
return shardIndex;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function validateLocationField(location, field, name) {
|
|
44
|
+
const value = location?.[field];
|
|
45
|
+
if (!Number.isInteger(value) || value < 0) {
|
|
46
|
+
throw new Error(`[DopplerLoader] Tensor "${name}" has invalid ${field}.`);
|
|
47
|
+
}
|
|
48
|
+
return value;
|
|
49
|
+
}
|
|
5
50
|
|
|
6
51
|
export async function assembleShardData(location, name, loadShard, loadShardRange = null) {
|
|
7
|
-
|
|
8
|
-
|
|
52
|
+
const spans = getLocationSpans(location);
|
|
53
|
+
if (spans) {
|
|
54
|
+
trace.loader(`Assembling tensor "${name}" from ${spans.length} spans`);
|
|
9
55
|
|
|
10
|
-
const chunks = await Promise.all(
|
|
56
|
+
const chunks = await Promise.all(spans.map(async (span, spanIndex) => {
|
|
57
|
+
const shardIndex = resolveSpanShardIndex(span, name, spanIndex);
|
|
58
|
+
const offset = validateSpanField(span.offset, 'offset', name, spanIndex);
|
|
59
|
+
const size = validateSpanField(span.size, 'size', name, spanIndex);
|
|
11
60
|
if (loadShardRange) {
|
|
12
|
-
const data = await loadShardRange(
|
|
13
|
-
if (
|
|
61
|
+
const data = await loadShardRange(shardIndex, offset, size);
|
|
62
|
+
if (size > data.byteLength) {
|
|
14
63
|
throw new Error(
|
|
15
|
-
`[DopplerLoader] Shard ${
|
|
64
|
+
`[DopplerLoader] Shard ${shardIndex} too small for tensor "${name}" span.`
|
|
16
65
|
);
|
|
17
66
|
}
|
|
18
|
-
return new Uint8Array(data, 0,
|
|
67
|
+
return new Uint8Array(data, 0, size);
|
|
19
68
|
}
|
|
20
|
-
const data = await loadShard(
|
|
21
|
-
if (
|
|
69
|
+
const data = await loadShard(shardIndex);
|
|
70
|
+
if (offset + size > data.byteLength) {
|
|
22
71
|
throw new Error(
|
|
23
|
-
`[DopplerLoader] Shard ${
|
|
72
|
+
`[DopplerLoader] Shard ${shardIndex} too small for tensor "${name}" span.`
|
|
24
73
|
);
|
|
25
74
|
}
|
|
26
|
-
return new Uint8Array(data,
|
|
75
|
+
return new Uint8Array(data, offset, size);
|
|
27
76
|
}));
|
|
28
77
|
const totalSize = chunks.reduce((s, c) => s + c.length, 0);
|
|
78
|
+
if (Number.isInteger(location?.size) && totalSize !== location.size) {
|
|
79
|
+
throw new Error(
|
|
80
|
+
`[DopplerLoader] Tensor "${name}" spans total ${totalSize} bytes, expected ${location.size}.`
|
|
81
|
+
);
|
|
82
|
+
}
|
|
29
83
|
const combined = new Uint8Array(totalSize);
|
|
30
84
|
let offset = 0;
|
|
31
85
|
for (const chunk of chunks) {
|
|
@@ -36,21 +90,24 @@ export async function assembleShardData(location, name, loadShard, loadShardRang
|
|
|
36
90
|
}
|
|
37
91
|
|
|
38
92
|
// Single shard - use view to avoid copying
|
|
93
|
+
const shardIndex = resolveLocationShardIndex(location, name);
|
|
94
|
+
const offset = validateLocationField(location, 'offset', name);
|
|
95
|
+
const size = validateLocationField(location, 'size', name);
|
|
39
96
|
if (loadShardRange) {
|
|
40
|
-
const slice = await loadShardRange(
|
|
41
|
-
if (
|
|
97
|
+
const slice = await loadShardRange(shardIndex, offset, size);
|
|
98
|
+
if (size > slice.byteLength) {
|
|
42
99
|
throw new Error(
|
|
43
|
-
`[DopplerLoader] Shard ${
|
|
100
|
+
`[DopplerLoader] Shard ${shardIndex} too small for tensor "${name}" (offset=${offset}, size=${size}, shard=${slice.byteLength})`
|
|
44
101
|
);
|
|
45
102
|
}
|
|
46
|
-
return new Uint8Array(slice, 0,
|
|
103
|
+
return new Uint8Array(slice, 0, size);
|
|
47
104
|
}
|
|
48
105
|
|
|
49
|
-
const fullShard = await loadShard(
|
|
50
|
-
if (
|
|
106
|
+
const fullShard = await loadShard(shardIndex);
|
|
107
|
+
if (offset + size > fullShard.byteLength) {
|
|
51
108
|
throw new Error(
|
|
52
|
-
`[DopplerLoader] Shard ${
|
|
109
|
+
`[DopplerLoader] Shard ${shardIndex} too small for tensor "${name}" (offset=${offset}, size=${size}, shard=${fullShard.byteLength})`
|
|
53
110
|
);
|
|
54
111
|
}
|
|
55
|
-
return new Uint8Array(fullShard,
|
|
112
|
+
return new Uint8Array(fullShard, offset, size);
|
|
56
113
|
}
|
|
@@ -80,6 +80,12 @@ export declare class BufferPool {
|
|
|
80
80
|
*/
|
|
81
81
|
release(buffer: GPUBuffer): void;
|
|
82
82
|
|
|
83
|
+
/**
|
|
84
|
+
* Force-dispose an active buffer instead of returning it to the pool.
|
|
85
|
+
* Use for error paths where the buffer contents or device state may be invalid.
|
|
86
|
+
*/
|
|
87
|
+
discard(buffer: GPUBuffer): void;
|
|
88
|
+
|
|
83
89
|
/**
|
|
84
90
|
* Check if a buffer is currently tracked as active by the pool
|
|
85
91
|
*/
|
|
@@ -159,7 +165,8 @@ export declare class BufferPool {
|
|
|
159
165
|
}
|
|
160
166
|
|
|
161
167
|
/**
|
|
162
|
-
* Get the global buffer pool
|
|
168
|
+
* Get the global buffer pool for the current device epoch.
|
|
169
|
+
* If the active device has changed or was lost, a fresh global pool is created.
|
|
163
170
|
*/
|
|
164
171
|
export function getBufferPool(): BufferPool;
|
|
165
172
|
|
|
@@ -179,6 +186,7 @@ export declare const createUploadBuffer: (size: number) => GPUBuffer;
|
|
|
179
186
|
export declare const createUniformBuffer: (size: number) => GPUBuffer;
|
|
180
187
|
export declare const acquireBuffer: (size: number, usage?: GPUBufferUsageFlags, label?: string) => GPUBuffer;
|
|
181
188
|
export declare const releaseBuffer: (buffer: GPUBuffer) => void;
|
|
189
|
+
export declare const discardBuffer: (buffer: GPUBuffer) => void;
|
|
182
190
|
export declare const isBufferActive: (buffer: GPUBuffer) => boolean;
|
|
183
191
|
export declare const getBufferRequestedSize: (buffer: GPUBuffer) => number;
|
|
184
192
|
export declare const uploadData: (buffer: GPUBuffer, data: ArrayBuffer | ArrayBufferView, offset?: number) => void;
|