@simulatte/doppler 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +145 -0
- package/README.md +16 -23
- package/package.json +30 -32
- package/src/adapters/adapter-registry.js +12 -1
- package/src/adapters/lora-loader.js +23 -6
- package/src/bridge/extension-client.d.ts +5 -0
- package/src/bridge/extension-client.js +40 -0
- package/src/bridge/index.d.ts +2 -1
- package/src/bridge/index.js +6 -4
- package/src/browser/browser-converter.js +31 -1
- package/src/browser/file-picker.js +6 -0
- package/src/browser/safetensors-parser-browser.js +84 -1
- package/src/browser/shard-io-browser.js +2 -2
- package/src/browser/tensor-source-download.js +8 -2
- package/src/browser/tensor-source-http.d.ts +1 -0
- package/src/browser/tensor-source-http.js +5 -1
- package/src/client/doppler-api.browser.js +20 -4
- package/src/client/doppler-api.js +19 -3
- package/src/client/doppler-provider/generation.js +12 -0
- package/src/client/doppler-provider/model-manager.d.ts +10 -0
- package/src/client/doppler-provider/model-manager.js +91 -19
- package/src/client/doppler-provider/source-runtime.d.ts +2 -1
- package/src/client/doppler-provider/source-runtime.js +132 -13
- package/src/client/doppler-registry.json +5 -20
- package/src/config/backward-registry-loader.js +17 -2
- package/src/config/execution-v0-contract-check.js +113 -15
- package/src/config/kernel-path-contract-check.js +57 -29
- package/src/config/kernel-path-loader.d.ts +5 -0
- package/src/config/kernel-path-loader.js +18 -36
- package/src/config/kernels/kernel-ref-digests.js +1 -1
- package/src/config/kernels/registry.js +14 -1
- package/src/config/kernels/registry.json +81 -5
- package/src/config/loader.d.ts +1 -1
- package/src/config/loader.js +15 -2
- package/src/config/merge-contract-check.js +66 -4
- package/src/config/merge-helpers.js +128 -7
- package/src/config/merge.d.ts +1 -0
- package/src/config/merge.js +10 -0
- package/src/config/param-validator.js +47 -2
- package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
- package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json +56 -0
- package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json +61 -0
- package/src/config/presets/kernel-paths/registry.json +43 -8
- package/src/config/presets/models/gemma2.json +3 -2
- package/src/config/presets/models/gemma3.json +2 -0
- package/src/config/presets/models/qwen3.json +4 -3
- package/src/config/presets/models/qwen3_5.json +16 -0
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
- package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
- package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
- package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
- package/src/config/presets/runtime/model/qwen3-5-layer-probe.json +52 -0
- package/src/config/presets/runtime/model/qwen3-5-linear-attn-debug.json +90 -0
- package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
- package/src/config/runtime.js +6 -1
- package/src/config/schema/conversion.schema.d.ts +1 -0
- package/src/config/schema/debug.schema.d.ts +5 -0
- package/src/config/schema/doppler.schema.js +16 -21
- package/src/config/schema/inference-defaults.schema.js +3 -3
- package/src/config/schema/kernel-path.schema.d.ts +5 -1
- package/src/config/schema/kernel-thresholds.schema.js +12 -4
- package/src/config/schema/manifest.schema.d.ts +3 -2
- package/src/config/schema/manifest.schema.js +17 -4
- package/src/config/schema/storage.schema.js +1 -1
- package/src/config/training-defaults.js +30 -22
- package/src/converter/conversion-plan.js +104 -11
- package/src/converter/core.d.ts +7 -0
- package/src/converter/core.js +16 -9
- package/src/converter/execution-v0-manifest.js +4 -1
- package/src/converter/index.d.ts +1 -0
- package/src/converter/index.js +1 -0
- package/src/converter/manifest-inference.js +50 -29
- package/src/converter/parsers/diffusion.js +0 -3
- package/src/converter/parsers/transformer.js +4 -0
- package/src/converter/quantization-info.js +40 -16
- package/src/converter/quantizer.js +19 -12
- package/src/converter/rope-config.js +8 -6
- package/src/converter/shard-packer.d.ts +1 -1
- package/src/converter/shard-packer.js +4 -1
- package/src/converter/tokenizer-utils.d.ts +1 -0
- package/src/converter/tokenizer-utils.js +4 -1
- package/src/debug/config.js +123 -11
- package/src/debug/reference/hf_qwen35_linear_attn_debug.py +268 -0
- package/src/debug/signals.js +7 -1
- package/src/debug/tensor.d.ts +2 -0
- package/src/debug/tensor.js +13 -2
- package/src/distribution/p2p-control-plane.js +52 -12
- package/src/distribution/p2p-observability.js +43 -7
- package/src/distribution/p2p-webrtc-browser.js +20 -0
- package/src/distribution/shard-delivery.js +83 -27
- package/src/formats/gguf/types.js +33 -16
- package/src/formats/rdrr/groups.d.ts +12 -4
- package/src/formats/rdrr/groups.js +3 -6
- package/src/formats/rdrr/parsing.d.ts +4 -0
- package/src/formats/rdrr/parsing.js +53 -3
- package/src/formats/rdrr/types.d.ts +2 -1
- package/src/gpu/command-recorder.js +86 -61
- package/src/gpu/device.d.ts +1 -0
- package/src/gpu/device.js +73 -19
- package/src/gpu/kernel-tuner/benchmarks.js +326 -316
- package/src/gpu/kernel-tuner/cache.js +71 -4
- package/src/gpu/kernel-tuner/tuner.js +22 -4
- package/src/gpu/kernels/attention.js +15 -34
- package/src/gpu/kernels/backward/adam.js +62 -58
- package/src/gpu/kernels/backward/attention_backward.js +257 -169
- package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
- package/src/gpu/kernels/cast.js +191 -149
- package/src/gpu/kernels/check-stop.js +33 -44
- package/src/gpu/kernels/conv2d.js +27 -17
- package/src/gpu/kernels/cross_entropy_loss.js +21 -15
- package/src/gpu/kernels/depthwise_conv2d.js +36 -26
- package/src/gpu/kernels/dequant.js +178 -126
- package/src/gpu/kernels/energy.d.ts +3 -21
- package/src/gpu/kernels/energy.js +111 -88
- package/src/gpu/kernels/feature-check.js +1 -1
- package/src/gpu/kernels/fused_ffn.js +84 -65
- package/src/gpu/kernels/fused_matmul_residual.js +56 -33
- package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
- package/src/gpu/kernels/gather.js +33 -15
- package/src/gpu/kernels/gelu.js +19 -11
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
- package/src/gpu/kernels/groupnorm.js +34 -23
- package/src/gpu/kernels/index.d.ts +8 -0
- package/src/gpu/kernels/index.js +6 -0
- package/src/gpu/kernels/kv-quantize.js +5 -2
- package/src/gpu/kernels/layernorm.js +35 -19
- package/src/gpu/kernels/logit-merge.js +5 -3
- package/src/gpu/kernels/matmul-selection.js +47 -4
- package/src/gpu/kernels/matmul.d.ts +2 -0
- package/src/gpu/kernels/matmul.js +59 -40
- package/src/gpu/kernels/modulate.js +23 -15
- package/src/gpu/kernels/moe.js +221 -175
- package/src/gpu/kernels/pixel_shuffle.js +22 -14
- package/src/gpu/kernels/relu.js +18 -10
- package/src/gpu/kernels/repeat_channels.js +25 -17
- package/src/gpu/kernels/residual.js +37 -27
- package/src/gpu/kernels/rmsnorm.js +66 -43
- package/src/gpu/kernels/rope.js +3 -0
- package/src/gpu/kernels/sample.js +27 -38
- package/src/gpu/kernels/sana_linear_attention.js +18 -10
- package/src/gpu/kernels/scale.js +18 -11
- package/src/gpu/kernels/shader-cache.js +4 -2
- package/src/gpu/kernels/silu.js +120 -72
- package/src/gpu/kernels/softmax.js +44 -25
- package/src/gpu/kernels/split_qg.d.ts +50 -0
- package/src/gpu/kernels/split_qg.js +46 -0
- package/src/gpu/kernels/split_qg.wgsl +58 -0
- package/src/gpu/kernels/split_qg_f16.wgsl +62 -0
- package/src/gpu/kernels/split_qkv.js +23 -13
- package/src/gpu/kernels/transpose.js +18 -10
- package/src/gpu/kernels/transpose.wgsl +5 -3
- package/src/gpu/kernels/upsample2d.js +21 -13
- package/src/gpu/kernels/utils.js +20 -13
- package/src/gpu/partitioned-buffer-pool.js +10 -2
- package/src/gpu/perf-guards.js +2 -9
- package/src/gpu/profiler.js +27 -22
- package/src/gpu/readback-utils.d.ts +16 -0
- package/src/gpu/readback-utils.js +41 -0
- package/src/gpu/submit-tracker.js +13 -0
- package/src/gpu/uniform-cache.d.ts +1 -0
- package/src/gpu/uniform-cache.js +30 -9
- package/src/gpu/weight-buffer.d.ts +1 -1
- package/src/gpu/weight-buffer.js +1 -1
- package/src/hotswap/intent-bundle.js +6 -0
- package/src/hotswap/manifest.d.ts +10 -1
- package/src/hotswap/manifest.js +12 -2
- package/src/hotswap/runtime.js +30 -8
- package/src/index-browser.d.ts +44 -0
- package/src/index-browser.js +14 -0
- package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
- package/src/inference/browser-harness-contract-helpers.js +28 -0
- package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
- package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
- package/src/inference/browser-harness-model-helpers.d.ts +16 -0
- package/src/inference/browser-harness-model-helpers.js +217 -0
- package/src/inference/browser-harness-report-helpers.d.ts +7 -0
- package/src/inference/browser-harness-report-helpers.js +42 -0
- package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
- package/src/inference/browser-harness-runtime-helpers.js +415 -0
- package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
- package/src/inference/browser-harness-suite-helpers.js +268 -0
- package/src/inference/browser-harness-text-helpers.d.ts +27 -0
- package/src/inference/browser-harness-text-helpers.js +788 -0
- package/src/inference/browser-harness.d.ts +8 -0
- package/src/inference/browser-harness.js +149 -1996
- package/src/inference/kv-cache/base.js +140 -94
- package/src/inference/kv-cache/tiered.js +5 -3
- package/src/inference/moe-router.js +88 -56
- package/src/inference/multi-model-network.js +5 -3
- package/src/inference/network-evolution.d.ts +11 -2
- package/src/inference/network-evolution.js +20 -21
- package/src/inference/pipelines/context.d.ts +3 -0
- package/src/inference/pipelines/context.js +142 -2
- package/src/inference/pipelines/diffusion/helpers.js +10 -2
- package/src/inference/pipelines/diffusion/pipeline.js +2 -1
- package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +8 -2
- package/src/inference/pipelines/diffusion/vae.js +3 -7
- package/src/inference/pipelines/energy/pipeline.js +27 -21
- package/src/inference/pipelines/energy/quintel.d.ts +5 -0
- package/src/inference/pipelines/energy/quintel.js +11 -0
- package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
- package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
- package/src/inference/pipelines/text/attention/output-projection.d.ts +12 -0
- package/src/inference/pipelines/text/attention/output-projection.js +8 -0
- package/src/inference/pipelines/text/attention/projections.d.ts +10 -1
- package/src/inference/pipelines/text/attention/projections.js +192 -112
- package/src/inference/pipelines/text/attention/record.js +77 -14
- package/src/inference/pipelines/text/attention/run.js +112 -14
- package/src/inference/pipelines/text/config.js +17 -4
- package/src/inference/pipelines/text/embed.js +2 -8
- package/src/inference/pipelines/text/execution-plan.js +46 -23
- package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
- package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
- package/src/inference/pipelines/text/execution-v0.js +62 -1013
- package/src/inference/pipelines/text/generator-runtime.js +5 -0
- package/src/inference/pipelines/text/generator-steps.d.ts +52 -0
- package/src/inference/pipelines/text/generator-steps.js +340 -221
- package/src/inference/pipelines/text/generator.js +56 -40
- package/src/inference/pipelines/text/init.d.ts +13 -0
- package/src/inference/pipelines/text/init.js +94 -25
- package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
- package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
- package/src/inference/pipelines/text/kernel-trace.js +6 -0
- package/src/inference/pipelines/text/layer.js +4 -9
- package/src/inference/pipelines/text/linear-attention.d.ts +15 -0
- package/src/inference/pipelines/text/linear-attention.js +113 -9
- package/src/inference/pipelines/text/logits/gpu.js +12 -7
- package/src/inference/pipelines/text/logits/index.d.ts +6 -1
- package/src/inference/pipelines/text/logits/index.js +13 -12
- package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
- package/src/inference/pipelines/text/logits/utils.js +9 -0
- package/src/inference/pipelines/text/lora-apply.js +50 -32
- package/src/inference/pipelines/text/model-load.js +282 -104
- package/src/inference/pipelines/text/moe-cache.js +5 -4
- package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
- package/src/inference/pipelines/text/moe-cpu.js +42 -38
- package/src/inference/pipelines/text/moe-gpu.js +110 -86
- package/src/inference/pipelines/text/ops.js +90 -90
- package/src/inference/pipelines/text/probes.js +9 -9
- package/src/inference/pipelines/text/sampling.js +52 -6
- package/src/inference/pipelines/text/weights.js +17 -7
- package/src/inference/pipelines/text.js +13 -1
- package/src/inference/speculative.d.ts +2 -2
- package/src/inference/speculative.js +4 -18
- package/src/inference/test-harness.d.ts +1 -1
- package/src/inference/test-harness.js +17 -7
- package/src/inference/tokenizer.d.ts +0 -5
- package/src/inference/tokenizer.js +4 -23
- package/src/inference/tokenizers/bpe.js +9 -0
- package/src/inference/tokenizers/bundled.js +20 -0
- package/src/inference/tokenizers/sentencepiece.js +12 -0
- package/src/loader/doppler-loader.js +38 -22
- package/src/loader/dtype-utils.js +3 -44
- package/src/loader/embedding-loader.js +7 -3
- package/src/loader/experts/expert-cache.js +13 -6
- package/src/loader/experts/expert-loader.js +10 -6
- package/src/loader/final-weights-loader.js +10 -4
- package/src/loader/layer-loader.js +2 -1
- package/src/loader/loader-state.js +2 -2
- package/src/loader/memory-monitor.js +8 -0
- package/src/loader/multi-model-loader.d.ts +14 -0
- package/src/loader/multi-model-loader.js +70 -24
- package/src/loader/shard-cache.js +84 -14
- package/src/loader/shard-resolver.js +25 -3
- package/src/loader/tensors/tensor-loader.js +214 -144
- package/src/loader/tensors/tensor-reader.js +76 -19
- package/src/loader/weight-downcast.js +1 -1
- package/src/memory/buffer-pool.d.ts +9 -1
- package/src/memory/buffer-pool.js +109 -44
- package/src/memory/unified-detect.js +1 -1
- package/src/rules/inference/dtype.rules.json +5 -0
- package/src/rules/inference/kernel-path.rules.json +24 -8
- package/src/rules/kernels/split-qg.rules.json +6 -0
- package/src/rules/rule-registry.js +27 -1
- package/src/storage/backends/opfs-store.js +68 -24
- package/src/storage/downloader.js +365 -83
- package/src/storage/index.d.ts +3 -0
- package/src/storage/index.js +3 -0
- package/src/storage/preflight.d.ts +2 -2
- package/src/storage/preflight.js +24 -2
- package/src/storage/quickstart-downloader.js +11 -5
- package/src/storage/registry.js +10 -4
- package/src/storage/reports.js +1 -1
- package/src/storage/shard-manager.d.ts +15 -1
- package/src/storage/shard-manager.js +55 -6
- package/src/storage/source-artifact-store.d.ts +52 -0
- package/src/storage/source-artifact-store.js +234 -0
- package/src/tooling/command-api-constants.d.ts +9 -0
- package/src/tooling/command-api-constants.js +9 -0
- package/src/tooling/command-api-family-normalizers.d.ts +9 -0
- package/src/tooling/command-api-family-normalizers.js +343 -0
- package/src/tooling/command-api-helpers.d.ts +25 -0
- package/src/tooling/command-api-helpers.js +262 -0
- package/src/tooling/command-api.js +16 -602
- package/src/tooling/command-envelope.js +4 -1
- package/src/tooling/command-runner-shared.js +52 -18
- package/src/tooling/conversion-config-materializer.js +3 -5
- package/src/tooling/lean-execution-contract.js +150 -3
- package/src/tooling/node-browser-command-runner.js +161 -271
- package/src/tooling/node-command-runner.js +29 -3
- package/src/tooling/node-converter.js +30 -1
- package/src/tooling/node-source-runtime.d.ts +1 -1
- package/src/tooling/node-source-runtime.js +120 -3
- package/src/tooling/node-webgpu.js +24 -21
- package/src/tooling/opfs-cache.js +21 -4
- package/src/tooling/runtime-input-composition.d.ts +38 -0
- package/src/tooling/runtime-input-composition.js +86 -0
- package/src/tooling/source-runtime-bundle.d.ts +40 -5
- package/src/tooling/source-runtime-bundle.js +261 -34
- package/src/tooling/source-runtime-materializer.d.ts +6 -0
- package/src/tooling/source-runtime-materializer.js +93 -0
- package/src/training/attention-backward.js +32 -17
- package/src/training/autograd.js +80 -52
- package/src/training/checkpoint-watch.d.ts +2 -1
- package/src/training/checkpoint-watch.js +39 -6
- package/src/training/checkpoint.js +40 -11
- package/src/training/clip.js +2 -1
- package/src/training/datasets/token-batch.js +20 -8
- package/src/training/distillation/checkpoint-watch.js +1 -0
- package/src/training/distillation/student-fixture.d.ts +22 -0
- package/src/training/distillation/student-fixture.js +846 -0
- package/src/training/distillation/suite-data.d.ts +45 -0
- package/src/training/distillation/suite-data.js +189 -0
- package/src/training/lora-pipeline.js +4 -7
- package/src/training/lora.js +26 -12
- package/src/training/loss.js +5 -6
- package/src/training/objectives/cross_entropy.js +2 -5
- package/src/training/objectives/distill_kd.js +4 -8
- package/src/training/objectives/distill_triplet.js +4 -8
- package/src/training/objectives/ul_stage2_base.js +4 -8
- package/src/training/operator-command.js +2 -0
- package/src/training/optimizer.js +19 -7
- package/src/training/runner.js +2 -1
- package/src/training/suite.js +18 -978
- package/src/training/tensor-factory.d.ts +9 -0
- package/src/training/tensor-factory.js +13 -0
- package/src/training/trainer.js +3 -5
- package/src/training/ul_dataset.js +3 -5
- package/src/training/workloads.js +70 -79
- package/src/types/model.d.ts +5 -0
- package/src/version.js +1 -1
- package/tools/convert-safetensors-node.js +22 -16
- package/tools/doppler-cli.js +50 -26
|
@@ -2,6 +2,10 @@
|
|
|
2
2
|
import { DEFAULT_QUANTIZATION_DEFAULTS, DEFAULT_Q4K_LAYOUT } from '../config/index.js';
|
|
3
3
|
import { classifyTensorRole } from '../formats/rdrr/index.js';
|
|
4
4
|
|
|
5
|
+
// Default quantization tag when no explicit dtype is provided.
|
|
6
|
+
// F16 is the canonical unquantized storage format for WebGPU inference.
|
|
7
|
+
const DEFAULT_QUANT_TAG = 'f16';
|
|
8
|
+
|
|
5
9
|
// Quantization tag aliases mapped to canonical names.
|
|
6
10
|
// Add new aliases here rather than adding if/else branches.
|
|
7
11
|
const QUANT_TAG_ALIASES = {
|
|
@@ -47,7 +51,7 @@ const QUANT_TAG_ALIASES = {
|
|
|
47
51
|
};
|
|
48
52
|
|
|
49
53
|
export function normalizeQuantTag(value) {
|
|
50
|
-
if (!value) return
|
|
54
|
+
if (!value) return DEFAULT_QUANT_TAG;
|
|
51
55
|
const lower = value.toLowerCase();
|
|
52
56
|
return QUANT_TAG_ALIASES[lower] ?? lower;
|
|
53
57
|
}
|
|
@@ -91,6 +95,7 @@ export function buildVariantTag(info) {
|
|
|
91
95
|
const weights = info.weights;
|
|
92
96
|
const embeddings = info.embeddings ?? weights;
|
|
93
97
|
const lmHead = info.lmHead ?? embeddings;
|
|
98
|
+
const compute = info.compute ? normalizeQuantTag(info.compute) : null;
|
|
94
99
|
const experts = info.experts ?? null;
|
|
95
100
|
const layout = info.layout ?? null;
|
|
96
101
|
|
|
@@ -100,30 +105,42 @@ export function buildVariantTag(info) {
|
|
|
100
105
|
? `${weights}${layout === 'row' ? '' : '-col'}`
|
|
101
106
|
: weights;
|
|
102
107
|
|
|
103
|
-
const parts = [
|
|
108
|
+
const parts = [weightTag];
|
|
109
|
+
const groupedRolesByDtype = new Map();
|
|
110
|
+
const GROUPED_ROLE_ORDER = ['e', 'h', 'a'];
|
|
104
111
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
112
|
+
const addGroupedRole = (role, dtype) => {
|
|
113
|
+
if (!dtype || dtype === weights) return;
|
|
114
|
+
const existing = groupedRolesByDtype.get(dtype) ?? [];
|
|
115
|
+
if (!existing.includes(role)) {
|
|
116
|
+
existing.push(role);
|
|
117
|
+
groupedRolesByDtype.set(dtype, existing);
|
|
118
|
+
}
|
|
119
|
+
};
|
|
108
120
|
|
|
109
|
-
|
|
110
|
-
|
|
121
|
+
addGroupedRole('e', embeddings);
|
|
122
|
+
addGroupedRole('h', lmHead);
|
|
123
|
+
addGroupedRole('a', compute);
|
|
124
|
+
|
|
125
|
+
for (const [dtype, roles] of groupedRolesByDtype.entries()) {
|
|
126
|
+
const orderedRoles = GROUPED_ROLE_ORDER.filter((role) => roles.includes(role));
|
|
127
|
+
parts.push(`${orderedRoles.join('')}${dtype}`);
|
|
111
128
|
}
|
|
112
129
|
|
|
113
130
|
if (experts && experts !== weights) {
|
|
114
131
|
parts.push(`x${experts}`);
|
|
115
132
|
}
|
|
116
133
|
|
|
117
|
-
if (info.vision) {
|
|
134
|
+
if (info.vision && info.vision !== weights) {
|
|
118
135
|
parts.push(`v${info.vision}`);
|
|
119
136
|
}
|
|
120
|
-
if (info.audio) {
|
|
121
|
-
parts.push(`
|
|
137
|
+
if (info.audio && info.audio !== weights) {
|
|
138
|
+
parts.push(`audio${info.audio}`);
|
|
122
139
|
}
|
|
123
|
-
if (info.tts) {
|
|
124
|
-
parts.push(`
|
|
140
|
+
if (info.tts && info.tts !== weights) {
|
|
141
|
+
parts.push(`tts${info.tts}`);
|
|
125
142
|
}
|
|
126
|
-
if (info.projector) {
|
|
143
|
+
if (info.projector && info.projector !== weights) {
|
|
127
144
|
parts.push(`p${info.projector}`);
|
|
128
145
|
}
|
|
129
146
|
|
|
@@ -167,9 +184,16 @@ const Q4K_LAYOUT_ALIASES = {
|
|
|
167
184
|
};
|
|
168
185
|
|
|
169
186
|
export function normalizeQ4KLayout(value) {
|
|
170
|
-
if (
|
|
171
|
-
const lower = String(value).toLowerCase().replace(/_/g, '');
|
|
172
|
-
|
|
187
|
+
if (value == null) return null;
|
|
188
|
+
const lower = String(value).trim().toLowerCase().replace(/_/g, '');
|
|
189
|
+
if (!lower) return null;
|
|
190
|
+
const normalized = Q4K_LAYOUT_ALIASES[lower];
|
|
191
|
+
if (!normalized) {
|
|
192
|
+
throw new Error(
|
|
193
|
+
`converter.quantization.q4kLayout must be "row" or "col"; got ${JSON.stringify(value)}.`
|
|
194
|
+
);
|
|
195
|
+
}
|
|
196
|
+
return normalized;
|
|
173
197
|
}
|
|
174
198
|
|
|
175
199
|
export function buildQuantizationInfo(
|
|
@@ -74,9 +74,10 @@ function findMinMax(data, offset, length) {
|
|
|
74
74
|
return { min, max };
|
|
75
75
|
}
|
|
76
76
|
|
|
77
|
-
|
|
77
|
+
function quantizeQ4KBlockWithValidLength(data, offset, validLength = QK_K) {
|
|
78
78
|
const block = new Uint8Array(QK4_K_BLOCK_SIZE);
|
|
79
79
|
const blockView = new DataView(block.buffer);
|
|
80
|
+
const clampedValidLength = Math.max(0, Math.min(QK_K, Math.trunc(validLength)));
|
|
80
81
|
|
|
81
82
|
const scales = new Float32Array(8);
|
|
82
83
|
const minOffsets = new Float32Array(8);
|
|
@@ -84,14 +85,22 @@ export function quantizeQ4KBlock(data, offset) {
|
|
|
84
85
|
|
|
85
86
|
for (let sb = 0; sb < 8; sb++) {
|
|
86
87
|
const sbOffset = offset + sb * 32;
|
|
87
|
-
const
|
|
88
|
+
const subblockStart = sb * 32;
|
|
89
|
+
const validInSubblock = Math.max(0, Math.min(32, clampedValidLength - subblockStart));
|
|
90
|
+
if (validInSubblock === 0) {
|
|
91
|
+
scales[sb] = 0;
|
|
92
|
+
minOffsets[sb] = 0;
|
|
93
|
+
continue;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const { min, max } = findMinMax(data, sbOffset, validInSubblock);
|
|
88
97
|
|
|
89
98
|
minOffsets[sb] = -min;
|
|
90
99
|
const range = max - min;
|
|
91
100
|
scales[sb] = range > 0 ? range / 15 : 0;
|
|
92
101
|
|
|
93
102
|
const invScale = scales[sb] > 0 ? 1 / scales[sb] : 0;
|
|
94
|
-
for (let i = 0; i <
|
|
103
|
+
for (let i = 0; i < validInSubblock; i++) {
|
|
95
104
|
const val = data[sbOffset + i];
|
|
96
105
|
let q = Math.round((val - min) * invScale);
|
|
97
106
|
q = Math.max(0, Math.min(15, q));
|
|
@@ -155,6 +164,10 @@ export function quantizeQ4KBlock(data, offset) {
|
|
|
155
164
|
return block;
|
|
156
165
|
}
|
|
157
166
|
|
|
167
|
+
export function quantizeQ4KBlock(data, offset) {
|
|
168
|
+
return quantizeQ4KBlockWithValidLength(data, offset, QK_K);
|
|
169
|
+
}
|
|
170
|
+
|
|
158
171
|
function dequantizeQ4KBlock(block) {
|
|
159
172
|
const blockView = new DataView(block.buffer, block.byteOffset);
|
|
160
173
|
const result = new Float32Array(256);
|
|
@@ -245,22 +258,16 @@ export function quantizeToQ4KMRowWise(data, shape) {
|
|
|
245
258
|
}
|
|
246
259
|
|
|
247
260
|
const blocksPerRow = Math.ceil(cols / QK_K);
|
|
248
|
-
const paddedColsPerRow = blocksPerRow * QK_K;
|
|
249
261
|
const totalBlocks = rows * blocksPerRow;
|
|
250
262
|
|
|
251
263
|
const quantized = new Uint8Array(totalBlocks * QK4_K_BLOCK_SIZE);
|
|
252
264
|
|
|
253
265
|
for (let row = 0; row < rows; row++) {
|
|
254
|
-
// Extract and pad this row
|
|
255
|
-
const rowData = new Float32Array(paddedColsPerRow);
|
|
256
|
-
const srcOffset = row * cols;
|
|
257
|
-
for (let c = 0; c < cols; c++) {
|
|
258
|
-
rowData[c] = data[srcOffset + c];
|
|
259
|
-
}
|
|
260
|
-
|
|
261
266
|
// Quantize each block in this row
|
|
262
267
|
for (let b = 0; b < blocksPerRow; b++) {
|
|
263
|
-
const
|
|
268
|
+
const validLength = Math.max(0, Math.min(QK_K, cols - b * QK_K));
|
|
269
|
+
const srcOffset = row * cols + b * QK_K;
|
|
270
|
+
const block = quantizeQ4KBlockWithValidLength(data, srcOffset, validLength);
|
|
264
271
|
const dstOffset = (row * blocksPerRow + b) * QK4_K_BLOCK_SIZE;
|
|
265
272
|
quantized.set(block, dstOffset);
|
|
266
273
|
}
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import { DEFAULT_MANIFEST_INFERENCE } from '../config/schema/index.js';
|
|
2
|
+
|
|
1
3
|
function asObject(value) {
|
|
2
4
|
if (value == null || typeof value !== 'object' || Array.isArray(value)) {
|
|
3
5
|
return null;
|
|
@@ -50,7 +52,7 @@ function resolveScalingConfig(ropeScalingConfig, options = {}) {
|
|
|
50
52
|
}
|
|
51
53
|
return {
|
|
52
54
|
ropeScalingType: null,
|
|
53
|
-
ropeScalingFactor:
|
|
55
|
+
ropeScalingFactor: DEFAULT_MANIFEST_INFERENCE.rope.ropeScalingFactor,
|
|
54
56
|
yarnBetaFast: null,
|
|
55
57
|
yarnBetaSlow: null,
|
|
56
58
|
yarnOriginalMaxPos: null,
|
|
@@ -58,7 +60,7 @@ function resolveScalingConfig(ropeScalingConfig, options = {}) {
|
|
|
58
60
|
}
|
|
59
61
|
|
|
60
62
|
let ropeScalingType = scalingType;
|
|
61
|
-
let ropeScalingFactor =
|
|
63
|
+
let ropeScalingFactor = DEFAULT_MANIFEST_INFERENCE.rope.ropeScalingFactor;
|
|
62
64
|
let yarnBetaFast = null;
|
|
63
65
|
let yarnBetaSlow = null;
|
|
64
66
|
let yarnOriginalMaxPos = null;
|
|
@@ -110,7 +112,7 @@ function hasScalingDirective(ropeScalingConfig) {
|
|
|
110
112
|
function hasMeaningfulScalingConfig(resolvedScaling) {
|
|
111
113
|
if (!resolvedScaling) return false;
|
|
112
114
|
return resolvedScaling.ropeScalingType != null
|
|
113
|
-
|| resolvedScaling.ropeScalingFactor !==
|
|
115
|
+
|| resolvedScaling.ropeScalingFactor !== DEFAULT_MANIFEST_INFERENCE.rope.ropeScalingFactor
|
|
114
116
|
|| resolvedScaling.yarnBetaFast != null
|
|
115
117
|
|| resolvedScaling.yarnBetaSlow != null
|
|
116
118
|
|| resolvedScaling.yarnOriginalMaxPos != null;
|
|
@@ -159,7 +161,7 @@ export function buildRoPEConfig(presetInference, config) {
|
|
|
159
161
|
?? null,
|
|
160
162
|
ropeScalingFactor: presetRoPE.ropeScalingFactor
|
|
161
163
|
?? presetAttn?.ropeScalingFactor // Deprecated location
|
|
162
|
-
??
|
|
164
|
+
?? DEFAULT_MANIFEST_INFERENCE.rope.ropeScalingFactor,
|
|
163
165
|
yarnBetaFast: presetRoPE.yarnBetaFast ?? null,
|
|
164
166
|
yarnBetaSlow: presetRoPE.yarnBetaSlow ?? null,
|
|
165
167
|
yarnOriginalMaxPos: presetRoPE.yarnOriginalMaxPos ?? null,
|
|
@@ -223,7 +225,7 @@ export function buildRoPEConfig(presetInference, config) {
|
|
|
223
225
|
?? asFiniteNumber(flatRoPEParameters?.rope_theta)
|
|
224
226
|
?? asFiniteNumber(config.rope_theta)
|
|
225
227
|
?? presetInference.rope?.ropeTheta
|
|
226
|
-
??
|
|
228
|
+
?? DEFAULT_MANIFEST_INFERENCE.rope.ropeTheta;
|
|
227
229
|
|
|
228
230
|
// For Gemma 3, local sliding attention theta comes from rope_parameters.sliding_attention.
|
|
229
231
|
const ropeLocalTheta = asFiniteNumber(slidingAttentionRoPE?.rope_theta)
|
|
@@ -232,7 +234,7 @@ export function buildRoPEConfig(presetInference, config) {
|
|
|
232
234
|
|
|
233
235
|
const mropeInterleaved = asBoolean(flatRoPEParameters?.mrope_interleaved)
|
|
234
236
|
?? presetInference.rope?.mropeInterleaved
|
|
235
|
-
??
|
|
237
|
+
?? DEFAULT_MANIFEST_INFERENCE.rope.mropeInterleaved;
|
|
236
238
|
const mropeSection = asNumberArray(flatRoPEParameters?.mrope_section)
|
|
237
239
|
?? presetInference.rope?.mropeSection
|
|
238
240
|
?? null;
|
|
@@ -399,7 +399,10 @@ function bytesToHex(bytes) {
|
|
|
399
399
|
}
|
|
400
400
|
|
|
401
401
|
|
|
402
|
-
export function sortTensorsByGroup(tensors, modelType
|
|
402
|
+
export function sortTensorsByGroup(tensors, modelType) {
|
|
403
|
+
if (typeof modelType !== 'string' || modelType.trim().length === 0) {
|
|
404
|
+
throw new Error('sortTensorsByGroup requires an explicit modelType.');
|
|
405
|
+
}
|
|
403
406
|
return [...tensors].sort((a, b) => {
|
|
404
407
|
const groupA = classifyTensor(a.name, modelType);
|
|
405
408
|
const groupB = classifyTensor(b.name, modelType);
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
-
export function resolveEosTokenId({ config, tokenizer, tokenizerJson }) {
|
|
1
|
+
export function resolveEosTokenId({ config, generationConfig, tokenizer, tokenizerJson }) {
|
|
2
2
|
const nestedTextConfig = getNestedTextConfig(config);
|
|
3
3
|
const candidateSources = [
|
|
4
|
+
generationConfig?.eos_token_id,
|
|
5
|
+
generationConfig?.eos_token_ids,
|
|
4
6
|
tokenizer?.eosTokenId,
|
|
5
7
|
tokenizer?.eos_token_id,
|
|
6
8
|
tokenizerJson?.specialTokens?.eos,
|
|
@@ -19,6 +21,7 @@ export function resolveEosTokenId({ config, tokenizer, tokenizerJson }) {
|
|
|
19
21
|
}
|
|
20
22
|
|
|
21
23
|
const eosTokenStringCandidates = [
|
|
24
|
+
generationConfig?.eos_token,
|
|
22
25
|
tokenizer?.eosToken,
|
|
23
26
|
tokenizer?.eos_token,
|
|
24
27
|
tokenizerJson?.specialTokens?.eos_token,
|
package/src/debug/config.js
CHANGED
|
@@ -50,6 +50,108 @@ const originalConsoleInfo = console.info;
|
|
|
50
50
|
const originalConsoleWarn = console.warn;
|
|
51
51
|
let warnedBenchmarkMode = false;
|
|
52
52
|
|
|
53
|
+
function requirePlainObject(value, label) {
|
|
54
|
+
if (!value || typeof value !== 'object' || Array.isArray(value)) {
|
|
55
|
+
throw new Error(`${label} must be an object when provided.`);
|
|
56
|
+
}
|
|
57
|
+
return value;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function requireNonNegativeIntegerArray(value, label) {
|
|
61
|
+
if (!Array.isArray(value)) {
|
|
62
|
+
throw new Error(`${label} must be an array of non-negative integers when provided.`);
|
|
63
|
+
}
|
|
64
|
+
return value.map((entry, index) => {
|
|
65
|
+
const parsed = Number(entry);
|
|
66
|
+
if (!Number.isInteger(parsed) || parsed < 0) {
|
|
67
|
+
throw new Error(`${label}[${index}] must be a non-negative integer.`);
|
|
68
|
+
}
|
|
69
|
+
return parsed;
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function requireNonNegativeInteger(value, label) {
|
|
74
|
+
const parsed = Number(value);
|
|
75
|
+
if (!Number.isInteger(parsed) || parsed < 0) {
|
|
76
|
+
throw new Error(`${label} must be a non-negative integer when provided.`);
|
|
77
|
+
}
|
|
78
|
+
return parsed;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function requireBoolean(value, label) {
|
|
82
|
+
if (typeof value !== 'boolean') {
|
|
83
|
+
throw new Error(`${label} must be a boolean when provided.`);
|
|
84
|
+
}
|
|
85
|
+
return value;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function normalizeLogLevel(level) {
|
|
89
|
+
if (typeof level !== 'string' || !level.trim()) {
|
|
90
|
+
throw new Error('setLogLevel(level) requires a non-empty log level string.');
|
|
91
|
+
}
|
|
92
|
+
return level.trim().toLowerCase();
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function normalizeTraceCategories(categories) {
|
|
96
|
+
if (typeof categories === 'string') {
|
|
97
|
+
const values = categories
|
|
98
|
+
.split(',')
|
|
99
|
+
.map((value) => value.trim())
|
|
100
|
+
.filter(Boolean);
|
|
101
|
+
if (values.length === 0) {
|
|
102
|
+
throw new Error('setTrace(categories) requires at least one trace category.');
|
|
103
|
+
}
|
|
104
|
+
return values;
|
|
105
|
+
}
|
|
106
|
+
if (Array.isArray(categories) && categories.length > 0) {
|
|
107
|
+
return categories.map((value, index) => {
|
|
108
|
+
if (typeof value !== 'string' || !value.trim()) {
|
|
109
|
+
throw new Error(`setTrace(categories)[${index}] must be a non-empty string.`);
|
|
110
|
+
}
|
|
111
|
+
return value.trim();
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
throw new Error(
|
|
115
|
+
'setTrace(categories) requires false, a comma-delimited string, or a non-empty string array.'
|
|
116
|
+
);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
function validateTraceCategoryToken(token) {
|
|
120
|
+
if (token === 'all') {
|
|
121
|
+
return;
|
|
122
|
+
}
|
|
123
|
+
const value = token.startsWith('-') ? token.slice(1) : token;
|
|
124
|
+
if (!TRACE_CATEGORIES.includes(value)) {
|
|
125
|
+
throw new Error(
|
|
126
|
+
`Unknown trace category "${token}". Allowed categories: all, ${TRACE_CATEGORIES.join(', ')}.`
|
|
127
|
+
);
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function normalizeTraceOptions(options) {
|
|
132
|
+
if (options == null) {
|
|
133
|
+
return {};
|
|
134
|
+
}
|
|
135
|
+
const normalized = requirePlainObject(options, 'setTrace(options)');
|
|
136
|
+
return {
|
|
137
|
+
...(normalized.layers === undefined ? {} : {
|
|
138
|
+
layers: requireNonNegativeIntegerArray(normalized.layers, 'setTrace(options).layers'),
|
|
139
|
+
}),
|
|
140
|
+
...(normalized.maxDecodeSteps === undefined ? {} : {
|
|
141
|
+
maxDecodeSteps: requireNonNegativeInteger(
|
|
142
|
+
normalized.maxDecodeSteps,
|
|
143
|
+
'setTrace(options).maxDecodeSteps'
|
|
144
|
+
),
|
|
145
|
+
}),
|
|
146
|
+
...(normalized.breakOnAnomaly === undefined ? {} : {
|
|
147
|
+
breakOnAnomaly: requireBoolean(
|
|
148
|
+
normalized.breakOnAnomaly,
|
|
149
|
+
'setTrace(options).breakOnAnomaly'
|
|
150
|
+
),
|
|
151
|
+
}),
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
|
|
53
155
|
export function setLogLevel(level) {
|
|
54
156
|
const levelMap = {
|
|
55
157
|
debug: LOG_LEVELS.DEBUG,
|
|
@@ -59,8 +161,14 @@ export function setLogLevel(level) {
|
|
|
59
161
|
error: LOG_LEVELS.ERROR,
|
|
60
162
|
silent: LOG_LEVELS.SILENT,
|
|
61
163
|
};
|
|
62
|
-
|
|
63
|
-
|
|
164
|
+
const normalizedLevel = normalizeLogLevel(level);
|
|
165
|
+
if (!Object.prototype.hasOwnProperty.call(levelMap, normalizedLevel)) {
|
|
166
|
+
throw new Error(
|
|
167
|
+
`Unknown log level "${level}". Allowed levels: ${Object.keys(levelMap).join(', ')}.`
|
|
168
|
+
);
|
|
169
|
+
}
|
|
170
|
+
currentLogLevel = levelMap[normalizedLevel];
|
|
171
|
+
console.log(`[Doppler] Log level set to: ${normalizedLevel.toUpperCase()}`);
|
|
64
172
|
}
|
|
65
173
|
|
|
66
174
|
export function getLogLevel() {
|
|
@@ -77,9 +185,11 @@ export function setTrace(categories, options) {
|
|
|
77
185
|
return;
|
|
78
186
|
}
|
|
79
187
|
|
|
80
|
-
const catArray =
|
|
81
|
-
|
|
82
|
-
|
|
188
|
+
const catArray = normalizeTraceCategories(categories);
|
|
189
|
+
const traceOptions = normalizeTraceOptions(options);
|
|
190
|
+
for (const cat of catArray) {
|
|
191
|
+
validateTraceCategoryToken(cat);
|
|
192
|
+
}
|
|
83
193
|
|
|
84
194
|
enabledTraceCategories.clear();
|
|
85
195
|
|
|
@@ -101,14 +211,14 @@ export function setTrace(categories, options) {
|
|
|
101
211
|
}
|
|
102
212
|
}
|
|
103
213
|
|
|
104
|
-
if (
|
|
105
|
-
traceLayerFilter =
|
|
214
|
+
if (traceOptions.layers !== undefined) {
|
|
215
|
+
traceLayerFilter = traceOptions.layers;
|
|
106
216
|
}
|
|
107
|
-
if (
|
|
108
|
-
traceMaxDecodeSteps =
|
|
217
|
+
if (traceOptions.maxDecodeSteps !== undefined) {
|
|
218
|
+
traceMaxDecodeSteps = traceOptions.maxDecodeSteps;
|
|
109
219
|
}
|
|
110
|
-
if (
|
|
111
|
-
traceBreakOnAnomaly =
|
|
220
|
+
if (traceOptions.breakOnAnomaly !== undefined) {
|
|
221
|
+
traceBreakOnAnomaly = traceOptions.breakOnAnomaly;
|
|
112
222
|
}
|
|
113
223
|
|
|
114
224
|
const enabled = [...enabledTraceCategories].join(',') || 'none';
|
|
@@ -184,11 +294,13 @@ export function setSilentMode(enabled) {
|
|
|
184
294
|
console.log = noop;
|
|
185
295
|
console.debug = noop;
|
|
186
296
|
console.info = noop;
|
|
297
|
+
console.warn = noop;
|
|
187
298
|
originalConsoleLog('[Doppler] Silent mode enabled - logging silenced');
|
|
188
299
|
} else {
|
|
189
300
|
console.log = originalConsoleLog;
|
|
190
301
|
console.debug = originalConsoleDebug;
|
|
191
302
|
console.info = originalConsoleInfo;
|
|
303
|
+
console.warn = originalConsoleWarn;
|
|
192
304
|
console.log('[Doppler] Silent mode disabled - logging restored');
|
|
193
305
|
}
|
|
194
306
|
}
|