@simulatte/doppler 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +126 -0
- package/README.md +16 -23
- package/package.json +14 -1
- package/src/adapters/adapter-registry.js +12 -1
- package/src/adapters/lora-loader.js +23 -6
- package/src/bridge/extension-client.d.ts +5 -0
- package/src/bridge/extension-client.js +40 -0
- package/src/bridge/index.d.ts +2 -1
- package/src/bridge/index.js +6 -4
- package/src/browser/browser-converter.js +26 -1
- package/src/browser/file-picker.js +6 -0
- package/src/browser/safetensors-parser-browser.js +84 -1
- package/src/browser/shard-io-browser.js +2 -2
- package/src/browser/tensor-source-download.js +8 -2
- package/src/browser/tensor-source-http.d.ts +1 -0
- package/src/browser/tensor-source-http.js +5 -1
- package/src/client/doppler-api.browser.js +20 -4
- package/src/client/doppler-api.js +19 -3
- package/src/client/doppler-provider/generation.js +12 -0
- package/src/client/doppler-provider/model-manager.d.ts +10 -0
- package/src/client/doppler-provider/model-manager.js +91 -19
- package/src/client/doppler-provider/source-runtime.d.ts +2 -1
- package/src/client/doppler-provider/source-runtime.js +132 -13
- package/src/client/doppler-registry.json +8 -7
- package/src/config/backward-registry-loader.js +17 -2
- package/src/config/execution-v0-contract-check.js +113 -15
- package/src/config/kernel-path-contract-check.js +57 -29
- package/src/config/kernel-path-loader.js +5 -36
- package/src/config/kernels/kernel-ref-digests.js +1 -1
- package/src/config/kernels/registry.js +14 -1
- package/src/config/kernels/registry.json +7 -5
- package/src/config/loader.d.ts +1 -1
- package/src/config/loader.js +12 -2
- package/src/config/merge-contract-check.js +59 -4
- package/src/config/merge-helpers.js +128 -7
- package/src/config/merge.d.ts +1 -0
- package/src/config/merge.js +10 -0
- package/src/config/param-validator.js +47 -2
- package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
- package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/registry.json +29 -8
- package/src/config/presets/models/gemma2.json +2 -2
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
- package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
- package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
- package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
- package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
- package/src/config/runtime.js +6 -1
- package/src/config/schema/debug.schema.d.ts +5 -0
- package/src/config/schema/doppler.schema.js +16 -21
- package/src/config/schema/inference-defaults.schema.js +3 -3
- package/src/config/schema/kernel-path.schema.d.ts +5 -1
- package/src/config/schema/kernel-thresholds.schema.js +12 -4
- package/src/config/schema/manifest.schema.d.ts +2 -1
- package/src/config/schema/manifest.schema.js +16 -3
- package/src/config/training-defaults.js +30 -22
- package/src/converter/conversion-plan.js +94 -9
- package/src/converter/core.d.ts +7 -0
- package/src/converter/core.js +14 -9
- package/src/converter/execution-v0-manifest.js +4 -1
- package/src/converter/index.d.ts +1 -0
- package/src/converter/index.js +1 -0
- package/src/converter/manifest-inference.js +43 -12
- package/src/converter/parsers/diffusion.js +0 -3
- package/src/converter/quantization-info.js +35 -15
- package/src/converter/shard-packer.d.ts +1 -1
- package/src/converter/shard-packer.js +4 -1
- package/src/debug/config.js +123 -11
- package/src/debug/signals.js +7 -1
- package/src/debug/tensor.d.ts +2 -0
- package/src/debug/tensor.js +13 -2
- package/src/distribution/p2p-control-plane.js +52 -12
- package/src/distribution/p2p-observability.js +43 -7
- package/src/distribution/p2p-webrtc-browser.js +20 -0
- package/src/distribution/shard-delivery.js +77 -26
- package/src/formats/gguf/types.js +33 -16
- package/src/formats/rdrr/groups.d.ts +12 -4
- package/src/formats/rdrr/groups.js +3 -6
- package/src/formats/rdrr/parsing.js +39 -2
- package/src/formats/rdrr/types.d.ts +2 -1
- package/src/gpu/command-recorder.js +86 -61
- package/src/gpu/device.d.ts +1 -0
- package/src/gpu/device.js +73 -19
- package/src/gpu/kernel-tuner/benchmarks.js +326 -316
- package/src/gpu/kernel-tuner/cache.js +71 -4
- package/src/gpu/kernel-tuner/tuner.js +22 -4
- package/src/gpu/kernels/attention.js +15 -34
- package/src/gpu/kernels/backward/adam.js +62 -58
- package/src/gpu/kernels/backward/attention_backward.js +257 -169
- package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
- package/src/gpu/kernels/cast.js +191 -149
- package/src/gpu/kernels/check-stop.js +33 -44
- package/src/gpu/kernels/conv2d.js +27 -17
- package/src/gpu/kernels/cross_entropy_loss.js +21 -15
- package/src/gpu/kernels/depthwise_conv2d.js +36 -26
- package/src/gpu/kernels/dequant.js +178 -126
- package/src/gpu/kernels/energy.d.ts +3 -21
- package/src/gpu/kernels/energy.js +111 -88
- package/src/gpu/kernels/feature-check.js +1 -1
- package/src/gpu/kernels/fused_ffn.js +84 -65
- package/src/gpu/kernels/fused_matmul_residual.js +56 -33
- package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
- package/src/gpu/kernels/gather.js +33 -15
- package/src/gpu/kernels/gelu.js +19 -11
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
- package/src/gpu/kernels/groupnorm.js +34 -23
- package/src/gpu/kernels/kv-quantize.js +5 -2
- package/src/gpu/kernels/layernorm.js +35 -19
- package/src/gpu/kernels/logit-merge.js +5 -3
- package/src/gpu/kernels/matmul.js +58 -39
- package/src/gpu/kernels/modulate.js +23 -15
- package/src/gpu/kernels/moe.js +221 -175
- package/src/gpu/kernels/pixel_shuffle.js +22 -14
- package/src/gpu/kernels/relu.js +18 -10
- package/src/gpu/kernels/repeat_channels.js +25 -17
- package/src/gpu/kernels/residual.js +37 -27
- package/src/gpu/kernels/rmsnorm.js +57 -41
- package/src/gpu/kernels/rope.js +3 -0
- package/src/gpu/kernels/sample.js +27 -38
- package/src/gpu/kernels/sana_linear_attention.js +18 -10
- package/src/gpu/kernels/scale.js +18 -11
- package/src/gpu/kernels/shader-cache.js +4 -2
- package/src/gpu/kernels/silu.js +120 -72
- package/src/gpu/kernels/softmax.js +44 -25
- package/src/gpu/kernels/split_qkv.js +23 -13
- package/src/gpu/kernels/transpose.js +18 -10
- package/src/gpu/kernels/transpose.wgsl +5 -3
- package/src/gpu/kernels/upsample2d.js +21 -13
- package/src/gpu/kernels/utils.js +20 -13
- package/src/gpu/partitioned-buffer-pool.js +10 -2
- package/src/gpu/perf-guards.js +2 -9
- package/src/gpu/profiler.js +27 -22
- package/src/gpu/readback-utils.d.ts +16 -0
- package/src/gpu/readback-utils.js +41 -0
- package/src/gpu/submit-tracker.js +13 -0
- package/src/gpu/uniform-cache.d.ts +1 -0
- package/src/gpu/uniform-cache.js +30 -9
- package/src/hotswap/intent-bundle.js +6 -0
- package/src/hotswap/manifest.d.ts +10 -1
- package/src/hotswap/manifest.js +12 -2
- package/src/hotswap/runtime.js +30 -8
- package/src/index-browser.d.ts +44 -0
- package/src/index-browser.js +14 -0
- package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
- package/src/inference/browser-harness-contract-helpers.js +28 -0
- package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
- package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
- package/src/inference/browser-harness-model-helpers.d.ts +16 -0
- package/src/inference/browser-harness-model-helpers.js +217 -0
- package/src/inference/browser-harness-report-helpers.d.ts +7 -0
- package/src/inference/browser-harness-report-helpers.js +42 -0
- package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
- package/src/inference/browser-harness-runtime-helpers.js +415 -0
- package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
- package/src/inference/browser-harness-suite-helpers.js +268 -0
- package/src/inference/browser-harness-text-helpers.d.ts +27 -0
- package/src/inference/browser-harness-text-helpers.js +788 -0
- package/src/inference/browser-harness.d.ts +6 -0
- package/src/inference/browser-harness.js +130 -1996
- package/src/inference/kv-cache/base.js +140 -94
- package/src/inference/kv-cache/tiered.js +5 -3
- package/src/inference/moe-router.js +88 -56
- package/src/inference/multi-model-network.js +5 -3
- package/src/inference/network-evolution.d.ts +11 -2
- package/src/inference/network-evolution.js +20 -21
- package/src/inference/pipelines/context.d.ts +3 -0
- package/src/inference/pipelines/context.js +142 -2
- package/src/inference/pipelines/diffusion/helpers.js +7 -2
- package/src/inference/pipelines/diffusion/pipeline.js +2 -1
- package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
- package/src/inference/pipelines/diffusion/vae.js +3 -7
- package/src/inference/pipelines/energy/pipeline.js +27 -21
- package/src/inference/pipelines/energy/quintel.d.ts +5 -0
- package/src/inference/pipelines/energy/quintel.js +11 -0
- package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
- package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
- package/src/inference/pipelines/text/attention/projections.js +151 -101
- package/src/inference/pipelines/text/attention/record.js +62 -8
- package/src/inference/pipelines/text/attention/run.js +62 -8
- package/src/inference/pipelines/text/config.js +3 -4
- package/src/inference/pipelines/text/embed.js +2 -8
- package/src/inference/pipelines/text/execution-plan.js +41 -19
- package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
- package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
- package/src/inference/pipelines/text/execution-v0.js +62 -1013
- package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
- package/src/inference/pipelines/text/generator-steps.js +298 -207
- package/src/inference/pipelines/text/generator.js +6 -23
- package/src/inference/pipelines/text/init.js +78 -20
- package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
- package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
- package/src/inference/pipelines/text/kernel-trace.js +6 -0
- package/src/inference/pipelines/text/layer.js +3 -9
- package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
- package/src/inference/pipelines/text/linear-attention.js +80 -6
- package/src/inference/pipelines/text/logits/gpu.js +10 -5
- package/src/inference/pipelines/text/logits/index.js +10 -11
- package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
- package/src/inference/pipelines/text/logits/utils.js +9 -0
- package/src/inference/pipelines/text/lora-apply.js +50 -32
- package/src/inference/pipelines/text/model-load.js +279 -104
- package/src/inference/pipelines/text/moe-cache.js +5 -4
- package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
- package/src/inference/pipelines/text/moe-cpu.js +42 -38
- package/src/inference/pipelines/text/moe-gpu.js +110 -86
- package/src/inference/pipelines/text/ops.js +90 -90
- package/src/inference/pipelines/text/probes.js +9 -9
- package/src/inference/pipelines/text/weights.js +17 -7
- package/src/inference/pipelines/text.js +13 -1
- package/src/inference/speculative.d.ts +2 -2
- package/src/inference/speculative.js +4 -18
- package/src/inference/test-harness.d.ts +1 -1
- package/src/inference/test-harness.js +15 -5
- package/src/inference/tokenizer.d.ts +0 -5
- package/src/inference/tokenizer.js +4 -23
- package/src/inference/tokenizers/bpe.js +9 -0
- package/src/inference/tokenizers/bundled.js +20 -0
- package/src/inference/tokenizers/sentencepiece.js +12 -0
- package/src/loader/doppler-loader.js +38 -22
- package/src/loader/dtype-utils.js +3 -44
- package/src/loader/embedding-loader.js +7 -3
- package/src/loader/experts/expert-cache.js +13 -6
- package/src/loader/experts/expert-loader.js +10 -6
- package/src/loader/final-weights-loader.js +8 -4
- package/src/loader/layer-loader.js +2 -1
- package/src/loader/loader-state.js +2 -2
- package/src/loader/memory-monitor.js +8 -0
- package/src/loader/multi-model-loader.d.ts +14 -0
- package/src/loader/multi-model-loader.js +70 -24
- package/src/loader/shard-cache.js +81 -12
- package/src/loader/shard-resolver.js +25 -3
- package/src/loader/tensors/tensor-loader.js +209 -144
- package/src/loader/tensors/tensor-reader.js +76 -19
- package/src/loader/weight-downcast.js +1 -1
- package/src/memory/buffer-pool.d.ts +9 -1
- package/src/memory/buffer-pool.js +109 -44
- package/src/memory/unified-detect.js +1 -1
- package/src/rules/inference/kernel-path.rules.json +24 -8
- package/src/rules/rule-registry.js +25 -1
- package/src/storage/backends/opfs-store.js +68 -24
- package/src/storage/downloader.js +364 -83
- package/src/storage/index.d.ts +3 -0
- package/src/storage/index.js +3 -0
- package/src/storage/preflight.d.ts +2 -2
- package/src/storage/preflight.js +24 -2
- package/src/storage/quickstart-downloader.js +11 -5
- package/src/storage/registry.js +10 -4
- package/src/storage/reports.js +1 -1
- package/src/storage/shard-manager.d.ts +15 -1
- package/src/storage/shard-manager.js +51 -3
- package/src/storage/source-artifact-store.d.ts +52 -0
- package/src/storage/source-artifact-store.js +234 -0
- package/src/tooling/command-api-constants.d.ts +9 -0
- package/src/tooling/command-api-constants.js +9 -0
- package/src/tooling/command-api-family-normalizers.d.ts +9 -0
- package/src/tooling/command-api-family-normalizers.js +343 -0
- package/src/tooling/command-api-helpers.d.ts +25 -0
- package/src/tooling/command-api-helpers.js +262 -0
- package/src/tooling/command-api.js +16 -602
- package/src/tooling/command-envelope.js +4 -1
- package/src/tooling/command-runner-shared.js +52 -18
- package/src/tooling/lean-execution-contract.js +150 -3
- package/src/tooling/node-browser-command-runner.js +161 -271
- package/src/tooling/node-command-runner.js +29 -3
- package/src/tooling/node-converter.js +27 -1
- package/src/tooling/node-source-runtime.d.ts +1 -1
- package/src/tooling/node-source-runtime.js +84 -3
- package/src/tooling/node-webgpu.js +24 -21
- package/src/tooling/opfs-cache.js +21 -4
- package/src/tooling/runtime-input-composition.d.ts +38 -0
- package/src/tooling/runtime-input-composition.js +86 -0
- package/src/tooling/source-runtime-bundle.d.ts +40 -5
- package/src/tooling/source-runtime-bundle.js +261 -34
- package/src/tooling/source-runtime-materializer.d.ts +6 -0
- package/src/tooling/source-runtime-materializer.js +93 -0
- package/src/training/attention-backward.js +32 -17
- package/src/training/autograd.js +80 -52
- package/src/training/checkpoint-watch.d.ts +2 -1
- package/src/training/checkpoint-watch.js +39 -6
- package/src/training/checkpoint.js +40 -11
- package/src/training/clip.js +2 -1
- package/src/training/datasets/token-batch.js +20 -8
- package/src/training/distillation/checkpoint-watch.js +1 -0
- package/src/training/distillation/student-fixture.d.ts +22 -0
- package/src/training/distillation/student-fixture.js +846 -0
- package/src/training/distillation/suite-data.d.ts +45 -0
- package/src/training/distillation/suite-data.js +189 -0
- package/src/training/lora-pipeline.js +4 -7
- package/src/training/lora.js +26 -12
- package/src/training/loss.js +5 -6
- package/src/training/objectives/cross_entropy.js +2 -5
- package/src/training/objectives/distill_kd.js +4 -8
- package/src/training/objectives/distill_triplet.js +4 -8
- package/src/training/objectives/ul_stage2_base.js +4 -8
- package/src/training/operator-command.js +2 -0
- package/src/training/optimizer.js +19 -7
- package/src/training/runner.js +2 -1
- package/src/training/suite.js +18 -978
- package/src/training/tensor-factory.d.ts +9 -0
- package/src/training/tensor-factory.js +13 -0
- package/src/training/trainer.js +3 -5
- package/src/training/ul_dataset.js +3 -5
- package/src/training/workloads.js +70 -79
- package/src/version.js +1 -1
- package/tools/convert-safetensors-node.js +22 -16
- package/tools/doppler-cli.js +44 -25
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
import { getDevice, hasFeature, FEATURES } from './device.js';
|
|
4
4
|
import { allowReadback, trackAllocation } from './perf-guards.js';
|
|
5
5
|
import { getUniformCache } from './uniform-cache.js';
|
|
6
|
-
import { isBufferActive, releaseBuffer } from '../memory/buffer-pool.js';
|
|
6
|
+
import { isBufferActive, releaseBuffer, discardBuffer } from '../memory/buffer-pool.js';
|
|
7
7
|
import { log } from '../debug/index.js';
|
|
8
8
|
import { getRuntimeConfig } from '../config/runtime.js';
|
|
9
9
|
|
|
@@ -93,6 +93,9 @@ export class CommandRecorder {
|
|
|
93
93
|
|
|
94
94
|
|
|
95
95
|
#initProfiling() {
|
|
96
|
+
let querySet = null;
|
|
97
|
+
let queryBuffer = null;
|
|
98
|
+
let readbackBuffer = null;
|
|
96
99
|
try {
|
|
97
100
|
const runtimeProfiler = getRuntimeConfig().shared?.debug?.profiler;
|
|
98
101
|
if (!runtimeProfiler) {
|
|
@@ -119,25 +122,31 @@ export class CommandRecorder {
|
|
|
119
122
|
didLogQueryFallback = true;
|
|
120
123
|
}
|
|
121
124
|
|
|
122
|
-
|
|
125
|
+
querySet = this.device.createQuerySet({
|
|
123
126
|
type: 'timestamp',
|
|
124
127
|
count: this.#queryCapacity,
|
|
125
128
|
});
|
|
126
129
|
|
|
127
130
|
// Buffer to hold query results (8 bytes per timestamp = BigUint64)
|
|
128
|
-
|
|
131
|
+
queryBuffer = this.device.createBuffer({
|
|
129
132
|
label: `${this.label}_query_buffer`,
|
|
130
133
|
size: this.#queryCapacity * 8,
|
|
131
134
|
usage: GPUBufferUsage.QUERY_RESOLVE | GPUBufferUsage.COPY_SRC,
|
|
132
135
|
});
|
|
133
136
|
|
|
134
137
|
// Readback buffer
|
|
135
|
-
|
|
138
|
+
readbackBuffer = this.device.createBuffer({
|
|
136
139
|
label: `${this.label}_readback_buffer`,
|
|
137
140
|
size: this.#queryCapacity * 8,
|
|
138
141
|
usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST,
|
|
139
142
|
});
|
|
143
|
+
this.#querySet = querySet;
|
|
144
|
+
this.#queryBuffer = queryBuffer;
|
|
145
|
+
this.#readbackBuffer = readbackBuffer;
|
|
140
146
|
} catch (e) {
|
|
147
|
+
readbackBuffer?.destroy();
|
|
148
|
+
queryBuffer?.destroy();
|
|
149
|
+
querySet?.destroy();
|
|
141
150
|
log.warn('CommandRecorder', `Failed to initialize profiling: ${e}`);
|
|
142
151
|
this.#profilingEnabled = false;
|
|
143
152
|
}
|
|
@@ -277,39 +286,57 @@ export class CommandRecorder {
|
|
|
277
286
|
}
|
|
278
287
|
}
|
|
279
288
|
|
|
289
|
+
#finalizeTrackedBuffers(buffersToDestroy, buffersToRelease, discardPooled) {
|
|
290
|
+
for (const buffer of buffersToDestroy) {
|
|
291
|
+
buffer.destroy();
|
|
292
|
+
}
|
|
293
|
+
for (const buffer of buffersToRelease) {
|
|
294
|
+
if (discardPooled) {
|
|
295
|
+
discardBuffer(buffer);
|
|
296
|
+
} else {
|
|
297
|
+
releaseBuffer(buffer);
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
getUniformCache().flushPendingDestruction();
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
#takeTrackedBuffers() {
|
|
304
|
+
const buffersToDestroy = this.#tempBuffers;
|
|
305
|
+
const buffersToRelease = this.#pooledBuffers;
|
|
306
|
+
this.#tempBuffers = [];
|
|
307
|
+
this.#pooledBuffers = [];
|
|
308
|
+
this.#tempBufferSet.clear();
|
|
309
|
+
this.#pooledBufferSet.clear();
|
|
310
|
+
return { buffersToDestroy, buffersToRelease };
|
|
311
|
+
}
|
|
312
|
+
|
|
280
313
|
|
|
281
314
|
submit() {
|
|
282
315
|
if (this.#submitted) {
|
|
283
316
|
throw new Error('[CommandRecorder] Already submitted');
|
|
284
317
|
}
|
|
285
318
|
|
|
286
|
-
// Submit commands
|
|
287
319
|
const submitStart = performance.now();
|
|
288
|
-
this
|
|
320
|
+
const { buffersToDestroy, buffersToRelease } = this.#takeTrackedBuffers();
|
|
321
|
+
try {
|
|
322
|
+
this.device.queue.submit([this.#encoder.finish()]);
|
|
323
|
+
} catch (error) {
|
|
324
|
+
this.#submitted = true;
|
|
325
|
+
this.#submitStartMs = submitStart;
|
|
326
|
+
this.#finalizeTrackedBuffers(buffersToDestroy, buffersToRelease, false);
|
|
327
|
+
this.#destroyProfilingResources();
|
|
328
|
+
throw error;
|
|
329
|
+
}
|
|
330
|
+
|
|
289
331
|
this.#submitted = true;
|
|
290
332
|
this.#submitStartMs = submitStart;
|
|
291
333
|
|
|
292
|
-
const buffersToDestroy = this.#tempBuffers;
|
|
293
|
-
const buffersToRelease = this.#pooledBuffers;
|
|
294
|
-
this.#tempBuffers = [];
|
|
295
|
-
this.#pooledBuffers = [];
|
|
296
|
-
this.#tempBufferSet.clear();
|
|
297
|
-
this.#pooledBufferSet.clear();
|
|
298
|
-
|
|
299
334
|
this.#cleanupPromise = this.device.queue.onSubmittedWorkDone().then(() => {
|
|
300
335
|
this.#submitLatencyMs = performance.now() - submitStart;
|
|
301
|
-
|
|
302
|
-
for (const buffer of buffersToDestroy) {
|
|
303
|
-
buffer.destroy();
|
|
304
|
-
}
|
|
305
|
-
// Release pooled buffers back to the pool
|
|
306
|
-
for (const buffer of buffersToRelease) {
|
|
307
|
-
releaseBuffer(buffer);
|
|
308
|
-
}
|
|
309
|
-
// Safe to destroy evicted uniform buffers now that GPU work is complete
|
|
310
|
-
getUniformCache().flushPendingDestruction();
|
|
336
|
+
this.#finalizeTrackedBuffers(buffersToDestroy, buffersToRelease, false);
|
|
311
337
|
}).catch((err) => {
|
|
312
338
|
log.warn('CommandRecorder', `Deferred cleanup failed: ${ (err).message}`);
|
|
339
|
+
this.#finalizeTrackedBuffers(buffersToDestroy, buffersToRelease, true);
|
|
313
340
|
});
|
|
314
341
|
}
|
|
315
342
|
|
|
@@ -370,55 +397,53 @@ export class CommandRecorder {
|
|
|
370
397
|
}
|
|
371
398
|
|
|
372
399
|
if (this.#profileEntries.length === 0) {
|
|
400
|
+
this.#destroyProfilingResources();
|
|
373
401
|
return {};
|
|
374
402
|
}
|
|
375
403
|
|
|
376
|
-
|
|
377
|
-
await this.device.queue.onSubmittedWorkDone();
|
|
404
|
+
let mapped = false;
|
|
378
405
|
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
const resolveEncoder = this.device.createCommandEncoder({ label: 'profile_resolve' });
|
|
382
|
-
resolveEncoder.resolveQuerySet(this.#querySet, 0, maxIndex, this.#queryBuffer, 0);
|
|
383
|
-
resolveEncoder.copyBufferToBuffer(this.#queryBuffer, 0, this.#readbackBuffer, 0, maxIndex * 8);
|
|
384
|
-
this.device.queue.submit([resolveEncoder.finish()]);
|
|
385
|
-
|
|
386
|
-
if (!allowReadback('CommandRecorder.resolveProfileTimings')) {
|
|
387
|
-
return null;
|
|
388
|
-
}
|
|
389
|
-
|
|
390
|
-
// Read back timestamps
|
|
391
|
-
await this.#readbackBuffer.mapAsync(GPUMapMode.READ);
|
|
392
|
-
const timestamps = new BigUint64Array(this.#readbackBuffer.getMappedRange());
|
|
406
|
+
try {
|
|
407
|
+
await this.device.queue.onSubmittedWorkDone();
|
|
393
408
|
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
409
|
+
const maxIndex = Math.max(...this.#profileEntries.map(e => e.endQueryIndex)) + 1;
|
|
410
|
+
const resolveEncoder = this.device.createCommandEncoder({ label: 'profile_resolve' });
|
|
411
|
+
resolveEncoder.resolveQuerySet(this.#querySet, 0, maxIndex, this.#queryBuffer, 0);
|
|
412
|
+
resolveEncoder.copyBufferToBuffer(this.#queryBuffer, 0, this.#readbackBuffer, 0, maxIndex * 8);
|
|
413
|
+
this.device.queue.submit([resolveEncoder.finish()]);
|
|
397
414
|
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
const durationMs = Number(endNs - startNs) / 1_000_000;
|
|
415
|
+
if (!allowReadback('CommandRecorder.resolveProfileTimings')) {
|
|
416
|
+
return null;
|
|
417
|
+
}
|
|
402
418
|
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
419
|
+
await this.#readbackBuffer.mapAsync(GPUMapMode.READ);
|
|
420
|
+
mapped = true;
|
|
421
|
+
const timestamps = new BigUint64Array(this.#readbackBuffer.getMappedRange());
|
|
422
|
+
const timings = {};
|
|
423
|
+
|
|
424
|
+
for (const entry of this.#profileEntries) {
|
|
425
|
+
const startNs = timestamps[entry.startQueryIndex];
|
|
426
|
+
const endNs = timestamps[entry.endQueryIndex];
|
|
427
|
+
const durationMs = Number(endNs - startNs) / 1_000_000;
|
|
428
|
+
|
|
429
|
+
if (durationMs < 0 || durationMs > 60000) {
|
|
430
|
+
continue;
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
if (timings[entry.label] !== undefined) {
|
|
434
|
+
timings[entry.label] += durationMs;
|
|
435
|
+
} else {
|
|
436
|
+
timings[entry.label] = durationMs;
|
|
437
|
+
}
|
|
406
438
|
}
|
|
407
439
|
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
timings[entry.label] = durationMs;
|
|
440
|
+
return timings;
|
|
441
|
+
} finally {
|
|
442
|
+
if (mapped && this.#readbackBuffer) {
|
|
443
|
+
this.#readbackBuffer.unmap();
|
|
413
444
|
}
|
|
445
|
+
this.#destroyProfilingResources();
|
|
414
446
|
}
|
|
415
|
-
|
|
416
|
-
this.#readbackBuffer.unmap();
|
|
417
|
-
|
|
418
|
-
// Clean up profiling resources after use
|
|
419
|
-
this.#destroyProfilingResources();
|
|
420
|
-
|
|
421
|
-
return timings;
|
|
422
447
|
}
|
|
423
448
|
|
|
424
449
|
|
package/src/gpu/device.d.ts
CHANGED
|
@@ -82,6 +82,7 @@ export function initDevice(): Promise<GPUDevice>;
|
|
|
82
82
|
|
|
83
83
|
/**
|
|
84
84
|
* Register an externally created GPU device for pipeline use.
|
|
85
|
+
* The active device epoch advances and loss handling is attached to the device.
|
|
85
86
|
*/
|
|
86
87
|
export function setDevice(
|
|
87
88
|
device: GPUDevice | null,
|
package/src/gpu/device.js
CHANGED
|
@@ -28,16 +28,47 @@ function advanceDeviceEpoch() {
|
|
|
28
28
|
deviceEpoch += 1;
|
|
29
29
|
}
|
|
30
30
|
|
|
31
|
+
function clearActiveDeviceState() {
|
|
32
|
+
gpuDevice = null;
|
|
33
|
+
kernelCapabilities = null;
|
|
34
|
+
resolvedPlatformConfig = null;
|
|
35
|
+
platformInitialized = false;
|
|
36
|
+
}
|
|
37
|
+
|
|
31
38
|
function isValidGPUBuffer(value) {
|
|
32
39
|
if (!value) {
|
|
33
40
|
return false;
|
|
34
41
|
}
|
|
42
|
+
if (value.__dopplerFakeGPUBuffer === true) {
|
|
43
|
+
return true;
|
|
44
|
+
}
|
|
45
|
+
if (
|
|
46
|
+
typeof value === 'object'
|
|
47
|
+
&& value.constructor?.name === 'FakeBuffer'
|
|
48
|
+
&& typeof value.size === 'number'
|
|
49
|
+
&& typeof value.usage === 'number'
|
|
50
|
+
&& typeof value.destroy === 'function'
|
|
51
|
+
) {
|
|
52
|
+
return true;
|
|
53
|
+
}
|
|
35
54
|
if (typeof GPUBuffer === 'undefined') {
|
|
36
55
|
return true;
|
|
37
56
|
}
|
|
38
57
|
return value instanceof GPUBuffer;
|
|
39
58
|
}
|
|
40
59
|
|
|
60
|
+
function isUsableGPUDevice(device) {
|
|
61
|
+
return !!(
|
|
62
|
+
device
|
|
63
|
+
&& typeof device.createBuffer === 'function'
|
|
64
|
+
&& typeof device.createBindGroup === 'function'
|
|
65
|
+
&& typeof device.createCommandEncoder === 'function'
|
|
66
|
+
&& typeof device.createShaderModule === 'function'
|
|
67
|
+
&& device.queue
|
|
68
|
+
&& typeof device.queue.submit === 'function'
|
|
69
|
+
);
|
|
70
|
+
}
|
|
71
|
+
|
|
41
72
|
function describeBindGroupBufferValue(value) {
|
|
42
73
|
if (value === null) return 'null';
|
|
43
74
|
if (value === undefined) return 'undefined';
|
|
@@ -84,6 +115,39 @@ function wrapDeviceCreateBindGroup(device) {
|
|
|
84
115
|
return device;
|
|
85
116
|
}
|
|
86
117
|
|
|
118
|
+
function registerDeviceLostHandler(device) {
|
|
119
|
+
if (!device || device.__dopplerLossHandlerRegistered) {
|
|
120
|
+
return device;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if (device.lost && typeof device.lost.then === 'function') {
|
|
124
|
+
const trackedDevice = device;
|
|
125
|
+
device.lost.then((info) => {
|
|
126
|
+
if (gpuDevice !== trackedDevice) {
|
|
127
|
+
return;
|
|
128
|
+
}
|
|
129
|
+
log.error('GPU', 'Device lost: ' + info.message + ', Reason: ' + info.reason);
|
|
130
|
+
clearActiveDeviceState();
|
|
131
|
+
advanceDeviceEpoch();
|
|
132
|
+
}).catch((error) => {
|
|
133
|
+
if (gpuDevice !== trackedDevice) {
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
136
|
+
log.warn('GPU', 'Device lost handler failed: ' + (error?.message ?? error));
|
|
137
|
+
clearActiveDeviceState();
|
|
138
|
+
advanceDeviceEpoch();
|
|
139
|
+
});
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
Object.defineProperty(device, '__dopplerLossHandlerRegistered', {
|
|
143
|
+
value: true,
|
|
144
|
+
configurable: true,
|
|
145
|
+
enumerable: false,
|
|
146
|
+
writable: false,
|
|
147
|
+
});
|
|
148
|
+
return device;
|
|
149
|
+
}
|
|
150
|
+
|
|
87
151
|
|
|
88
152
|
export const FEATURES = ({
|
|
89
153
|
SHADER_F16: 'shader-f16',
|
|
@@ -219,7 +283,11 @@ async function initializePlatformAndRegistry(adapter) {
|
|
|
219
283
|
export async function initDevice() {
|
|
220
284
|
// Return cached device if available
|
|
221
285
|
if (gpuDevice) {
|
|
222
|
-
|
|
286
|
+
if (isUsableGPUDevice(gpuDevice)) {
|
|
287
|
+
return gpuDevice;
|
|
288
|
+
}
|
|
289
|
+
clearActiveDeviceState();
|
|
290
|
+
advanceDeviceEpoch();
|
|
223
291
|
}
|
|
224
292
|
|
|
225
293
|
if (!isWebGPUAvailable()) {
|
|
@@ -258,18 +326,9 @@ export async function initDevice() {
|
|
|
258
326
|
throw createDopplerError(ERROR_CODES.GPU_DEVICE_FAILED, 'Failed to create WebGPU device');
|
|
259
327
|
}
|
|
260
328
|
wrapDeviceCreateBindGroup(gpuDevice);
|
|
329
|
+
registerDeviceLostHandler(gpuDevice);
|
|
261
330
|
advanceDeviceEpoch();
|
|
262
331
|
|
|
263
|
-
// Set up device lost handler
|
|
264
|
-
gpuDevice.lost.then((info) => {
|
|
265
|
-
log.error('GPU', 'Device lost: ' + info.message + ', Reason: ' + info.reason);
|
|
266
|
-
gpuDevice = null;
|
|
267
|
-
kernelCapabilities = null;
|
|
268
|
-
resolvedPlatformConfig = null;
|
|
269
|
-
platformInitialized = false;
|
|
270
|
-
advanceDeviceEpoch();
|
|
271
|
-
});
|
|
272
|
-
|
|
273
332
|
// Wrap queue for submit tracking (when enabled)
|
|
274
333
|
wrapQueueForTracking(gpuDevice.queue);
|
|
275
334
|
|
|
@@ -301,16 +360,14 @@ export async function initDevice() {
|
|
|
301
360
|
|
|
302
361
|
export function setDevice(device, options = {}) {
|
|
303
362
|
if (!device) {
|
|
304
|
-
|
|
305
|
-
kernelCapabilities = null;
|
|
306
|
-
resolvedPlatformConfig = null;
|
|
307
|
-
platformInitialized = false;
|
|
363
|
+
clearActiveDeviceState();
|
|
308
364
|
advanceDeviceEpoch();
|
|
309
365
|
return;
|
|
310
366
|
}
|
|
311
367
|
|
|
312
368
|
gpuDevice = device;
|
|
313
369
|
wrapDeviceCreateBindGroup(gpuDevice);
|
|
370
|
+
registerDeviceLostHandler(gpuDevice);
|
|
314
371
|
advanceDeviceEpoch();
|
|
315
372
|
wrapQueueForTracking(gpuDevice.queue);
|
|
316
373
|
|
|
@@ -372,10 +429,7 @@ export function isPlatformInitialized() {
|
|
|
372
429
|
export function destroyDevice() {
|
|
373
430
|
if (gpuDevice) {
|
|
374
431
|
gpuDevice.destroy();
|
|
375
|
-
|
|
376
|
-
kernelCapabilities = null;
|
|
377
|
-
resolvedPlatformConfig = null;
|
|
378
|
-
platformInitialized = false;
|
|
432
|
+
clearActiveDeviceState();
|
|
379
433
|
advanceDeviceEpoch();
|
|
380
434
|
}
|
|
381
435
|
}
|