@simulatte/doppler 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +126 -0
- package/README.md +25 -17
- package/package.json +20 -4
- package/src/adapters/adapter-registry.js +12 -1
- package/src/adapters/lora-loader.js +23 -6
- package/src/bridge/extension-client.d.ts +5 -0
- package/src/bridge/extension-client.js +40 -0
- package/src/bridge/index.d.ts +2 -1
- package/src/bridge/index.js +6 -4
- package/src/browser/browser-converter.js +26 -1
- package/src/browser/file-picker.js +6 -0
- package/src/browser/safetensors-parser-browser.js +84 -1
- package/src/browser/shard-io-browser.js +2 -2
- package/src/browser/tensor-source-download.js +8 -2
- package/src/browser/tensor-source-http.d.ts +1 -0
- package/src/browser/tensor-source-http.js +5 -1
- package/src/client/doppler-api.browser.js +20 -4
- package/src/client/doppler-api.js +19 -3
- package/src/client/doppler-provider/generation.js +12 -0
- package/src/client/doppler-provider/model-manager.d.ts +10 -0
- package/src/client/doppler-provider/model-manager.js +91 -19
- package/src/client/doppler-provider/source-runtime.d.ts +2 -1
- package/src/client/doppler-provider/source-runtime.js +132 -13
- package/src/client/doppler-registry.json +8 -7
- package/src/config/backward-registry-loader.js +17 -2
- package/src/config/execution-v0-contract-check.js +113 -15
- package/src/config/kernel-path-contract-check.js +57 -29
- package/src/config/kernel-path-loader.js +5 -36
- package/src/config/kernels/kernel-ref-digests.js +39 -39
- package/src/config/kernels/registry.js +14 -1
- package/src/config/kernels/registry.json +49 -7
- package/src/config/loader.d.ts +1 -1
- package/src/config/loader.js +43 -4
- package/src/config/merge-contract-check.js +59 -4
- package/src/config/merge-helpers.js +128 -7
- package/src/config/merge.d.ts +1 -0
- package/src/config/merge.js +28 -0
- package/src/config/param-validator.js +47 -2
- package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
- package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/registry.json +29 -8
- package/src/config/presets/models/gemma2.json +2 -2
- package/src/config/presets/models/qwen3.json +9 -2
- package/src/config/presets/models/transformer.json +5 -0
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
- package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
- package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
- package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
- package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
- package/src/config/required-inference-fields-contract-check.js +6 -0
- package/src/config/runtime.js +6 -1
- package/src/config/schema/debug.schema.d.ts +5 -0
- package/src/config/schema/doppler.schema.js +16 -21
- package/src/config/schema/inference-defaults.schema.js +6 -3
- package/src/config/schema/inference.schema.d.ts +9 -0
- package/src/config/schema/kernel-path.schema.d.ts +11 -1
- package/src/config/schema/kernel-thresholds.schema.js +12 -4
- package/src/config/schema/manifest.schema.d.ts +8 -1
- package/src/config/schema/manifest.schema.js +19 -3
- package/src/config/training-defaults.js +30 -22
- package/src/converter/conversion-plan.js +94 -9
- package/src/converter/core.d.ts +7 -0
- package/src/converter/core.js +14 -9
- package/src/converter/execution-v0-manifest.js +4 -1
- package/src/converter/index.d.ts +1 -0
- package/src/converter/index.js +1 -0
- package/src/converter/manifest-inference.js +43 -12
- package/src/converter/parsers/diffusion.js +0 -3
- package/src/converter/quantization-info.js +35 -15
- package/src/converter/rope-config.js +42 -0
- package/src/converter/shard-packer.d.ts +1 -1
- package/src/converter/shard-packer.js +4 -1
- package/src/debug/config.js +123 -11
- package/src/debug/signals.js +7 -1
- package/src/debug/tensor.d.ts +2 -0
- package/src/debug/tensor.js +13 -2
- package/src/distribution/p2p-control-plane.js +52 -12
- package/src/distribution/p2p-observability.js +43 -7
- package/src/distribution/p2p-webrtc-browser.js +20 -0
- package/src/distribution/shard-delivery.js +77 -26
- package/src/formats/gguf/types.js +33 -16
- package/src/formats/rdrr/groups.d.ts +12 -4
- package/src/formats/rdrr/groups.js +3 -6
- package/src/formats/rdrr/parsing.js +39 -2
- package/src/formats/rdrr/types.d.ts +2 -1
- package/src/gpu/command-recorder.js +86 -61
- package/src/gpu/device.d.ts +1 -0
- package/src/gpu/device.js +131 -19
- package/src/gpu/kernel-tuner/benchmarks.js +326 -316
- package/src/gpu/kernel-tuner/cache.js +71 -4
- package/src/gpu/kernel-tuner/tuner.js +22 -4
- package/src/gpu/kernels/attention.js +113 -34
- package/src/gpu/kernels/backward/adam.js +62 -58
- package/src/gpu/kernels/backward/attention_backward.js +257 -169
- package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
- package/src/gpu/kernels/bias_add.wgsl +8 -6
- package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
- package/src/gpu/kernels/cast.js +191 -149
- package/src/gpu/kernels/check-stop.js +33 -44
- package/src/gpu/kernels/conv2d.js +27 -17
- package/src/gpu/kernels/conv2d.wgsl +7 -8
- package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
- package/src/gpu/kernels/cross_entropy_loss.js +21 -15
- package/src/gpu/kernels/depthwise_conv2d.js +37 -26
- package/src/gpu/kernels/depthwise_conv2d.wgsl +6 -9
- package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +6 -9
- package/src/gpu/kernels/dequant.js +178 -126
- package/src/gpu/kernels/energy.d.ts +3 -21
- package/src/gpu/kernels/energy.js +111 -88
- package/src/gpu/kernels/feature-check.js +1 -1
- package/src/gpu/kernels/fused_ffn.js +84 -65
- package/src/gpu/kernels/fused_matmul_residual.js +56 -33
- package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
- package/src/gpu/kernels/gather.js +33 -15
- package/src/gpu/kernels/gelu.js +19 -11
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +34 -23
- package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +6 -9
- package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +6 -9
- package/src/gpu/kernels/groupnorm.js +34 -23
- package/src/gpu/kernels/kv-quantize.js +5 -2
- package/src/gpu/kernels/layernorm.js +35 -19
- package/src/gpu/kernels/logit-merge.js +5 -3
- package/src/gpu/kernels/matmul.js +83 -39
- package/src/gpu/kernels/modulate.js +23 -15
- package/src/gpu/kernels/moe.js +221 -175
- package/src/gpu/kernels/pixel_shuffle.js +22 -14
- package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
- package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
- package/src/gpu/kernels/relu.js +31 -10
- package/src/gpu/kernels/relu.wgsl +2 -1
- package/src/gpu/kernels/relu_f16.wgsl +2 -1
- package/src/gpu/kernels/repeat_channels.js +25 -17
- package/src/gpu/kernels/repeat_channels.wgsl +4 -5
- package/src/gpu/kernels/repeat_channels_f16.wgsl +4 -5
- package/src/gpu/kernels/residual.js +69 -23
- package/src/gpu/kernels/residual.wgsl +6 -3
- package/src/gpu/kernels/residual_f16.wgsl +2 -1
- package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
- package/src/gpu/kernels/residual_vec4.wgsl +2 -1
- package/src/gpu/kernels/rmsnorm.js +96 -28
- package/src/gpu/kernels/rmsnorm.wgsl +14 -6
- package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
- package/src/gpu/kernels/rope.d.ts +2 -0
- package/src/gpu/kernels/rope.js +14 -1
- package/src/gpu/kernels/rope.wgsl +56 -40
- package/src/gpu/kernels/sample.js +27 -38
- package/src/gpu/kernels/sana_linear_attention.js +19 -12
- package/src/gpu/kernels/sana_linear_attention_apply.wgsl +4 -5
- package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +4 -5
- package/src/gpu/kernels/sana_linear_attention_summary.wgsl +4 -0
- package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +4 -0
- package/src/gpu/kernels/scale.js +18 -11
- package/src/gpu/kernels/shader-cache.js +4 -2
- package/src/gpu/kernels/silu.d.ts +1 -0
- package/src/gpu/kernels/silu.js +148 -82
- package/src/gpu/kernels/silu.wgsl +19 -9
- package/src/gpu/kernels/silu_f16.wgsl +19 -9
- package/src/gpu/kernels/softmax.js +44 -25
- package/src/gpu/kernels/split_qkv.js +23 -13
- package/src/gpu/kernels/transpose.js +31 -10
- package/src/gpu/kernels/transpose.wgsl +6 -5
- package/src/gpu/kernels/upsample2d.js +22 -13
- package/src/gpu/kernels/upsample2d.wgsl +6 -9
- package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
- package/src/gpu/kernels/utils.js +35 -13
- package/src/gpu/partitioned-buffer-pool.js +10 -2
- package/src/gpu/perf-guards.js +2 -9
- package/src/gpu/profiler.js +27 -22
- package/src/gpu/readback-utils.d.ts +16 -0
- package/src/gpu/readback-utils.js +41 -0
- package/src/gpu/submit-tracker.js +13 -0
- package/src/gpu/uniform-cache.d.ts +1 -0
- package/src/gpu/uniform-cache.js +30 -9
- package/src/hotswap/intent-bundle.js +6 -0
- package/src/hotswap/manifest.d.ts +10 -1
- package/src/hotswap/manifest.js +12 -2
- package/src/hotswap/runtime.js +30 -8
- package/src/index-browser.d.ts +44 -0
- package/src/index-browser.js +14 -0
- package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
- package/src/inference/browser-harness-contract-helpers.js +28 -0
- package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
- package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
- package/src/inference/browser-harness-model-helpers.d.ts +16 -0
- package/src/inference/browser-harness-model-helpers.js +217 -0
- package/src/inference/browser-harness-report-helpers.d.ts +7 -0
- package/src/inference/browser-harness-report-helpers.js +42 -0
- package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
- package/src/inference/browser-harness-runtime-helpers.js +415 -0
- package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
- package/src/inference/browser-harness-suite-helpers.js +268 -0
- package/src/inference/browser-harness-text-helpers.d.ts +27 -0
- package/src/inference/browser-harness-text-helpers.js +788 -0
- package/src/inference/browser-harness.d.ts +6 -0
- package/src/inference/browser-harness.js +130 -1950
- package/src/inference/kv-cache/base.js +140 -94
- package/src/inference/kv-cache/tiered.js +5 -3
- package/src/inference/moe-router.js +88 -56
- package/src/inference/multi-model-network.js +5 -3
- package/src/inference/network-evolution.d.ts +11 -2
- package/src/inference/network-evolution.js +20 -21
- package/src/inference/pipelines/context.d.ts +3 -0
- package/src/inference/pipelines/context.js +142 -2
- package/src/inference/pipelines/diffusion/helpers.js +7 -2
- package/src/inference/pipelines/diffusion/pipeline.js +17 -7
- package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
- package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +5 -0
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +27 -15
- package/src/inference/pipelines/diffusion/vae.js +3 -7
- package/src/inference/pipelines/energy/pipeline.js +27 -21
- package/src/inference/pipelines/energy/quintel.d.ts +5 -0
- package/src/inference/pipelines/energy/quintel.js +11 -0
- package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
- package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
- package/src/inference/pipelines/text/attention/projections.js +151 -101
- package/src/inference/pipelines/text/attention/record.js +73 -10
- package/src/inference/pipelines/text/attention/run.js +73 -10
- package/src/inference/pipelines/text/chat-format.js +25 -1
- package/src/inference/pipelines/text/config.d.ts +4 -0
- package/src/inference/pipelines/text/config.js +71 -5
- package/src/inference/pipelines/text/embed.js +2 -8
- package/src/inference/pipelines/text/execution-plan.js +64 -50
- package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
- package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
- package/src/inference/pipelines/text/execution-v0.js +78 -1002
- package/src/inference/pipelines/text/ffn/standard.js +3 -0
- package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
- package/src/inference/pipelines/text/generator-steps.js +298 -207
- package/src/inference/pipelines/text/generator.js +6 -23
- package/src/inference/pipelines/text/init.d.ts +4 -0
- package/src/inference/pipelines/text/init.js +134 -29
- package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
- package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
- package/src/inference/pipelines/text/kernel-trace.js +6 -0
- package/src/inference/pipelines/text/layer.js +14 -9
- package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
- package/src/inference/pipelines/text/linear-attention.js +80 -6
- package/src/inference/pipelines/text/logits/gpu.js +10 -5
- package/src/inference/pipelines/text/logits/index.js +10 -11
- package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
- package/src/inference/pipelines/text/logits/utils.js +9 -0
- package/src/inference/pipelines/text/lora-apply.js +50 -32
- package/src/inference/pipelines/text/model-load.js +279 -104
- package/src/inference/pipelines/text/moe-cache.js +5 -4
- package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
- package/src/inference/pipelines/text/moe-cpu.js +42 -38
- package/src/inference/pipelines/text/moe-gpu.js +110 -86
- package/src/inference/pipelines/text/ops.js +90 -90
- package/src/inference/pipelines/text/probes.js +9 -9
- package/src/inference/pipelines/text/weights.js +17 -7
- package/src/inference/pipelines/text.js +17 -1
- package/src/inference/speculative.d.ts +2 -2
- package/src/inference/speculative.js +4 -18
- package/src/inference/test-harness.d.ts +1 -1
- package/src/inference/test-harness.js +15 -5
- package/src/inference/tokenizer.d.ts +0 -5
- package/src/inference/tokenizer.js +4 -23
- package/src/inference/tokenizers/bpe.js +9 -0
- package/src/inference/tokenizers/bundled.js +176 -33
- package/src/inference/tokenizers/sentencepiece.js +12 -0
- package/src/loader/doppler-loader.js +38 -22
- package/src/loader/dtype-utils.js +3 -44
- package/src/loader/embedding-loader.js +7 -3
- package/src/loader/experts/expert-cache.js +13 -6
- package/src/loader/experts/expert-loader.js +10 -6
- package/src/loader/final-weights-loader.js +8 -4
- package/src/loader/layer-loader.js +2 -1
- package/src/loader/loader-state.js +2 -2
- package/src/loader/memory-monitor.js +8 -0
- package/src/loader/multi-model-loader.d.ts +14 -0
- package/src/loader/multi-model-loader.js +70 -24
- package/src/loader/shard-cache.js +81 -12
- package/src/loader/shard-resolver.js +25 -3
- package/src/loader/tensors/tensor-loader.js +209 -144
- package/src/loader/tensors/tensor-reader.js +76 -19
- package/src/loader/weight-downcast.js +1 -1
- package/src/memory/buffer-pool.d.ts +9 -1
- package/src/memory/buffer-pool.js +109 -44
- package/src/memory/unified-detect.js +1 -1
- package/src/rules/inference/kernel-path.rules.json +24 -8
- package/src/rules/rule-registry.js +25 -1
- package/src/rules/tooling/command-runtime.rules.json +18 -0
- package/src/storage/backends/opfs-store.js +68 -24
- package/src/storage/downloader.js +364 -83
- package/src/storage/index.d.ts +3 -0
- package/src/storage/index.js +3 -0
- package/src/storage/preflight.d.ts +2 -2
- package/src/storage/preflight.js +24 -2
- package/src/storage/quickstart-downloader.js +11 -5
- package/src/storage/registry.js +10 -4
- package/src/storage/reports.js +1 -1
- package/src/storage/shard-manager.d.ts +15 -1
- package/src/storage/shard-manager.js +51 -3
- package/src/storage/source-artifact-store.d.ts +52 -0
- package/src/storage/source-artifact-store.js +234 -0
- package/src/tooling/command-api-constants.d.ts +9 -0
- package/src/tooling/command-api-constants.js +9 -0
- package/src/tooling/command-api-family-normalizers.d.ts +9 -0
- package/src/tooling/command-api-family-normalizers.js +343 -0
- package/src/tooling/command-api-helpers.d.ts +25 -0
- package/src/tooling/command-api-helpers.js +262 -0
- package/src/tooling/command-api.d.ts +27 -1
- package/src/tooling/command-api.js +26 -473
- package/src/tooling/command-envelope.js +4 -1
- package/src/tooling/command-runner-shared.js +52 -18
- package/src/tooling/lean-execution-contract.js +150 -3
- package/src/tooling/node-browser-command-runner.d.ts +4 -0
- package/src/tooling/node-browser-command-runner.js +218 -273
- package/src/tooling/node-command-runner.js +44 -3
- package/src/tooling/node-converter.js +27 -1
- package/src/tooling/node-source-runtime.d.ts +1 -1
- package/src/tooling/node-source-runtime.js +84 -3
- package/src/tooling/node-webgpu.js +30 -105
- package/src/tooling/opfs-cache.js +21 -4
- package/src/tooling/runtime-input-composition.d.ts +38 -0
- package/src/tooling/runtime-input-composition.js +86 -0
- package/src/tooling/source-runtime-bundle.d.ts +40 -5
- package/src/tooling/source-runtime-bundle.js +261 -34
- package/src/tooling/source-runtime-materializer.d.ts +6 -0
- package/src/tooling/source-runtime-materializer.js +93 -0
- package/src/training/attention-backward.js +32 -17
- package/src/training/autograd.js +80 -52
- package/src/training/checkpoint-watch.d.ts +8 -0
- package/src/training/checkpoint-watch.js +139 -0
- package/src/training/checkpoint.d.ts +6 -1
- package/src/training/checkpoint.js +46 -7
- package/src/training/clip.js +2 -1
- package/src/training/datasets/token-batch.js +20 -8
- package/src/training/distillation/artifacts.d.ts +71 -0
- package/src/training/distillation/artifacts.js +132 -0
- package/src/training/distillation/checkpoint-watch.d.ts +10 -0
- package/src/training/distillation/checkpoint-watch.js +58 -0
- package/src/training/distillation/dataset.d.ts +59 -0
- package/src/training/distillation/dataset.js +337 -0
- package/src/training/distillation/eval.d.ts +34 -0
- package/src/training/distillation/eval.js +310 -0
- package/src/training/distillation/index.d.ts +29 -0
- package/src/training/distillation/index.js +29 -0
- package/src/training/distillation/runtime.d.ts +20 -0
- package/src/training/distillation/runtime.js +121 -0
- package/src/training/distillation/scoreboard.d.ts +6 -0
- package/src/training/distillation/scoreboard.js +8 -0
- package/src/training/distillation/stage-a.d.ts +45 -0
- package/src/training/distillation/stage-a.js +338 -0
- package/src/training/distillation/stage-b.d.ts +24 -0
- package/src/training/distillation/stage-b.js +20 -0
- package/src/training/distillation/student-fixture.d.ts +22 -0
- package/src/training/distillation/student-fixture.js +846 -0
- package/src/training/distillation/suite-data.d.ts +45 -0
- package/src/training/distillation/suite-data.js +189 -0
- package/src/training/index.d.ts +10 -0
- package/src/training/index.js +10 -0
- package/src/training/lora-pipeline.d.ts +40 -0
- package/src/training/lora-pipeline.js +793 -0
- package/src/training/lora.js +26 -12
- package/src/training/loss.js +5 -6
- package/src/training/objectives/cross_entropy.js +2 -5
- package/src/training/objectives/distill_kd.js +4 -8
- package/src/training/objectives/distill_triplet.js +4 -8
- package/src/training/objectives/ul_stage2_base.js +4 -8
- package/src/training/operator-artifacts.d.ts +62 -0
- package/src/training/operator-artifacts.js +140 -0
- package/src/training/operator-command.d.ts +5 -0
- package/src/training/operator-command.js +455 -0
- package/src/training/operator-eval.d.ts +48 -0
- package/src/training/operator-eval.js +230 -0
- package/src/training/operator-scoreboard.d.ts +5 -0
- package/src/training/operator-scoreboard.js +44 -0
- package/src/training/optimizer.js +19 -7
- package/src/training/runner.d.ts +52 -0
- package/src/training/runner.js +31 -5
- package/src/training/suite.d.ts +112 -0
- package/src/training/suite.js +24 -984
- package/src/training/tensor-factory.d.ts +9 -0
- package/src/training/tensor-factory.js +13 -0
- package/src/training/trainer.js +3 -5
- package/src/training/ul_dataset.js +3 -5
- package/src/training/workloads.d.ts +164 -0
- package/src/training/workloads.js +530 -0
- package/src/version.js +1 -1
- package/tools/convert-safetensors-node.js +22 -16
- package/tools/doppler-cli.js +179 -63
|
@@ -17,6 +17,8 @@ import { sanitizeModelId } from './core.js';
|
|
|
17
17
|
import { classifyTensorRole } from '../formats/rdrr/index.js';
|
|
18
18
|
import { selectRuleValue } from '../rules/rule-registry.js';
|
|
19
19
|
import { buildKernelRefFromKernelEntry, isKernelRefBoundToKernel } from '../config/kernels/kernel-ref.js';
|
|
20
|
+
import { mergeLayeredShallowObjects } from '../config/merge-helpers.js';
|
|
21
|
+
import { buildExecutionV0ContractArtifact } from '../config/execution-v0-contract-check.js';
|
|
20
22
|
|
|
21
23
|
const KNOWN_MODEL_PRESETS = new Set(listPresets());
|
|
22
24
|
const CONVERSION_SUPPORTED_PRESETS = [...KNOWN_MODEL_PRESETS]
|
|
@@ -179,9 +181,6 @@ export function validateDefaultKernelPath(inference, context = {}) {
|
|
|
179
181
|
&& expectedComputeDtype !== kernelActivationDtype
|
|
180
182
|
) {
|
|
181
183
|
const presetId = context?.presetId ?? 'unknown';
|
|
182
|
-
if (presetId === 'lfm2' && expectedComputeDtype === 'f32' && kernelActivationDtype === 'f16') {
|
|
183
|
-
return;
|
|
184
|
-
}
|
|
185
184
|
throw new Error(
|
|
186
185
|
`Invalid defaultKernelPath "${inference.defaultKernelPath}" for preset "${presetId}" ` +
|
|
187
186
|
`(weights=${quantizationInfo?.weights ?? 'unknown'}, compute=${expectedComputeDtype}, ` +
|
|
@@ -208,6 +207,61 @@ function cloneJson(value) {
|
|
|
208
207
|
return JSON.parse(JSON.stringify(value));
|
|
209
208
|
}
|
|
210
209
|
|
|
210
|
+
function mergeExecutionV0SessionDefaults(baseSessionDefaults, overrideSessionDefaults) {
|
|
211
|
+
if (!overrideSessionDefaults) {
|
|
212
|
+
return cloneJson(baseSessionDefaults);
|
|
213
|
+
}
|
|
214
|
+
const base = cloneJson(baseSessionDefaults ?? {});
|
|
215
|
+
const override = cloneJson(overrideSessionDefaults);
|
|
216
|
+
const baseCompute = base.compute ?? {};
|
|
217
|
+
const overrideCompute = override.compute ?? {};
|
|
218
|
+
|
|
219
|
+
return {
|
|
220
|
+
...base,
|
|
221
|
+
...override,
|
|
222
|
+
compute: {
|
|
223
|
+
...baseCompute,
|
|
224
|
+
...overrideCompute,
|
|
225
|
+
defaults: mergeLayeredShallowObjects(
|
|
226
|
+
baseCompute.defaults ?? {},
|
|
227
|
+
overrideCompute.defaults ?? {}
|
|
228
|
+
),
|
|
229
|
+
kernelProfiles: Object.prototype.hasOwnProperty.call(overrideCompute, 'kernelProfiles')
|
|
230
|
+
? overrideCompute.kernelProfiles
|
|
231
|
+
: baseCompute.kernelProfiles,
|
|
232
|
+
},
|
|
233
|
+
kvcache: Object.prototype.hasOwnProperty.call(override, 'kvcache')
|
|
234
|
+
? (
|
|
235
|
+
override.kvcache === null
|
|
236
|
+
? null
|
|
237
|
+
: mergeLayeredShallowObjects(base.kvcache ?? {}, override.kvcache ?? {})
|
|
238
|
+
)
|
|
239
|
+
: base.kvcache,
|
|
240
|
+
decodeLoop: Object.prototype.hasOwnProperty.call(override, 'decodeLoop')
|
|
241
|
+
? (
|
|
242
|
+
override.decodeLoop === null
|
|
243
|
+
? null
|
|
244
|
+
: mergeLayeredShallowObjects(base.decodeLoop ?? {}, override.decodeLoop ?? {})
|
|
245
|
+
)
|
|
246
|
+
: base.decodeLoop,
|
|
247
|
+
};
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
function assertExecutionV0ConversionContract(manifestInference, modelId) {
|
|
251
|
+
if (!manifestInference?.execution) {
|
|
252
|
+
return;
|
|
253
|
+
}
|
|
254
|
+
const artifact = buildExecutionV0ContractArtifact(manifestInference, {
|
|
255
|
+
modelId: modelId ?? 'converted-model',
|
|
256
|
+
});
|
|
257
|
+
if (!artifact?.ok) {
|
|
258
|
+
const detail = artifact?.errors?.join(' ') ?? 'unknown execution-v0 contract error';
|
|
259
|
+
throw new Error(
|
|
260
|
+
`converterConfig.inference produced an invalid execution-v0 contract: ${detail}`
|
|
261
|
+
);
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
211
265
|
function readConverterSessionDefaultsOverride(converterConfig) {
|
|
212
266
|
const raw = converterConfig?.inference?.sessionDefaults;
|
|
213
267
|
if (raw == null) return null;
|
|
@@ -219,6 +273,26 @@ function readConverterSessionDefaultsOverride(converterConfig) {
|
|
|
219
273
|
return cloneJson(raw);
|
|
220
274
|
}
|
|
221
275
|
|
|
276
|
+
function assertNonExecutionSessionDefaults(manifestInference) {
|
|
277
|
+
const sessionDefaults = manifestInference?.sessionDefaults;
|
|
278
|
+
if (sessionDefaults == null) {
|
|
279
|
+
return;
|
|
280
|
+
}
|
|
281
|
+
if (typeof sessionDefaults !== 'object' || Array.isArray(sessionDefaults)) {
|
|
282
|
+
throw new Error(
|
|
283
|
+
'converterConfig.inference.sessionDefaults must resolve to an object for non-execution manifests.'
|
|
284
|
+
);
|
|
285
|
+
}
|
|
286
|
+
const keys = Object.keys(sessionDefaults);
|
|
287
|
+
const invalidKeys = keys.filter((key) => key !== 'decodeLoop');
|
|
288
|
+
if (invalidKeys.length > 0) {
|
|
289
|
+
throw new Error(
|
|
290
|
+
'converterConfig.inference.sessionDefaults may only set decodeLoop unless ' +
|
|
291
|
+
'converterConfig.inference.execution is present.'
|
|
292
|
+
);
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
222
296
|
function readConverterExecutionOverride(converterConfig) {
|
|
223
297
|
const raw = converterConfig?.inference?.execution;
|
|
224
298
|
if (raw == null) return null;
|
|
@@ -331,10 +405,10 @@ function applyConverterInferenceOverrides(manifestInference, converterConfig, co
|
|
|
331
405
|
manifestInference.defaultKernelPath = overrideKernelPath;
|
|
332
406
|
}
|
|
333
407
|
const sessionDefaults = readConverterSessionDefaultsOverride(converterConfig);
|
|
408
|
+
const execution = readConverterExecutionOverride(converterConfig);
|
|
334
409
|
if (sessionDefaults) {
|
|
335
410
|
manifestInference.sessionDefaults = sessionDefaults;
|
|
336
411
|
}
|
|
337
|
-
const execution = readConverterExecutionOverride(converterConfig);
|
|
338
412
|
if (execution) {
|
|
339
413
|
manifestInference.execution = execution;
|
|
340
414
|
}
|
|
@@ -351,17 +425,28 @@ function applyConverterInferenceOverrides(manifestInference, converterConfig, co
|
|
|
351
425
|
const generatedExecution = buildExecutionV0FromKernelPath(manifestInference.defaultKernelPath);
|
|
352
426
|
if (generatedExecution) {
|
|
353
427
|
manifestInference.execution = generatedExecution.execution;
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
428
|
+
manifestInference.sessionDefaults = mergeExecutionV0SessionDefaults(
|
|
429
|
+
generatedExecution.sessionDefaults,
|
|
430
|
+
manifestInference.sessionDefaults
|
|
431
|
+
);
|
|
357
432
|
manifestInference.schema = generatedExecution.schema;
|
|
358
433
|
}
|
|
359
434
|
}
|
|
360
435
|
|
|
361
|
-
if (
|
|
436
|
+
if (execution && !manifestInference.sessionDefaults) {
|
|
437
|
+
throw new Error(
|
|
438
|
+
'converterConfig.inference.execution requires converterConfig.inference.sessionDefaults.'
|
|
439
|
+
);
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
if (manifestInference.execution) {
|
|
362
443
|
manifestInference.schema = EXECUTION_V0_SCHEMA_ID;
|
|
444
|
+
} else {
|
|
445
|
+
assertNonExecutionSessionDefaults(manifestInference);
|
|
446
|
+
manifestInference.schema = null;
|
|
363
447
|
}
|
|
364
448
|
validateDefaultKernelPath(manifestInference, context);
|
|
449
|
+
assertExecutionV0ConversionContract(manifestInference, context?.modelId ?? context?.presetId);
|
|
365
450
|
}
|
|
366
451
|
|
|
367
452
|
export function resolveConversionPlan(options) {
|
|
@@ -418,7 +503,7 @@ export function resolveConversionPlan(options) {
|
|
|
418
503
|
if (!presetOverride && isLikelyEmbeddingGemma(rawConfig, architectureHint)) {
|
|
419
504
|
presetId = 'embeddinggemma';
|
|
420
505
|
}
|
|
421
|
-
if (presetId
|
|
506
|
+
if (!presetId) {
|
|
422
507
|
throw buildUnknownFamilyError(architectureHint, rawConfig, options?.includePresetOverrideHint === true);
|
|
423
508
|
}
|
|
424
509
|
const preset = resolvePreset(presetId);
|
package/src/converter/core.d.ts
CHANGED
|
@@ -168,6 +168,13 @@ export declare const RDRR_VERSION: number;
|
|
|
168
168
|
*/
|
|
169
169
|
export declare function sanitizeModelId(name: string): string | null;
|
|
170
170
|
|
|
171
|
+
/**
|
|
172
|
+
* Resolve bundled tokenizer vocab size from Hugging Face tokenizer.json payloads.
|
|
173
|
+
*/
|
|
174
|
+
export declare function resolveBundledTokenizerVocabSize(
|
|
175
|
+
tokenizerJson: Record<string, unknown> | null | undefined
|
|
176
|
+
): number;
|
|
177
|
+
|
|
171
178
|
/**
|
|
172
179
|
* Format bytes for human-readable display
|
|
173
180
|
*/
|
package/src/converter/core.js
CHANGED
|
@@ -26,6 +26,7 @@ import { buildManifestRequiredInferenceFieldsArtifact } from '../config/required
|
|
|
26
26
|
import { buildManifestInference, inferEmbeddingOutputConfig } from './manifest-inference.js';
|
|
27
27
|
import { resolveEosTokenId } from './tokenizer-utils.js';
|
|
28
28
|
import {
|
|
29
|
+
normalizeQ4KLayout,
|
|
29
30
|
resolveManifestQuantization,
|
|
30
31
|
resolveEffectiveQuantizationInfo,
|
|
31
32
|
} from './quantization-info.js';
|
|
@@ -122,11 +123,6 @@ function bf16ToFloat32(value) {
|
|
|
122
123
|
return view.getFloat32(0, true);
|
|
123
124
|
}
|
|
124
125
|
|
|
125
|
-
function normalizeQ4KLayout(value) {
|
|
126
|
-
const normalized = String(value || '').trim().toLowerCase();
|
|
127
|
-
return normalized === 'col' ? 'col' : 'row';
|
|
128
|
-
}
|
|
129
|
-
|
|
130
126
|
function normalizeTensorName(tensor) {
|
|
131
127
|
const name = tensor?.name;
|
|
132
128
|
return typeof name === 'string' ? name : '';
|
|
@@ -495,6 +491,17 @@ function buildSentencepieceTokenizer(tokenizerConfig, rawConfig, architecture, m
|
|
|
495
491
|
return tokenizer;
|
|
496
492
|
}
|
|
497
493
|
|
|
494
|
+
export function resolveBundledTokenizerVocabSize(tokenizerJson) {
|
|
495
|
+
const vocab = tokenizerJson?.model?.vocab;
|
|
496
|
+
if (Array.isArray(vocab)) {
|
|
497
|
+
return vocab.length;
|
|
498
|
+
}
|
|
499
|
+
if (vocab && typeof vocab === 'object') {
|
|
500
|
+
return Object.keys(vocab).length;
|
|
501
|
+
}
|
|
502
|
+
return 0;
|
|
503
|
+
}
|
|
504
|
+
|
|
498
505
|
|
|
499
506
|
export function sanitizeModelId(name) {
|
|
500
507
|
const sanitized = name
|
|
@@ -988,7 +995,7 @@ export function createManifest(
|
|
|
988
995
|
inference = { ...DEFAULT_MANIFEST_INFERENCE, presetId: 'diffusion' };
|
|
989
996
|
} else {
|
|
990
997
|
const presetId = detectPreset(rawConfig, model.architecture);
|
|
991
|
-
if (presetId
|
|
998
|
+
if (!presetId) {
|
|
992
999
|
const modelType = rawConfig.model_type ?? 'unknown';
|
|
993
1000
|
throw new Error(
|
|
994
1001
|
`Unknown model family: architecture="${model.architecture || 'unknown'}", model_type="${modelType}"\n\n` +
|
|
@@ -1070,9 +1077,7 @@ export function createManifest(
|
|
|
1070
1077
|
// Include tokenizer if available
|
|
1071
1078
|
if (model.tokenizerJson) {
|
|
1072
1079
|
const tokenizer = model.tokenizerJson;
|
|
1073
|
-
const vocabSize =
|
|
1074
|
-
tokenizer.model?.vocab?.length ||
|
|
1075
|
-
Object.keys(tokenizer.model?.vocab || {}).length;
|
|
1080
|
+
const vocabSize = resolveBundledTokenizerVocabSize(tokenizer);
|
|
1076
1081
|
if (!vocabSize) {
|
|
1077
1082
|
throw new Error('Tokenizer vocab is missing or empty');
|
|
1078
1083
|
}
|
|
@@ -104,7 +104,10 @@ function buildKernelProfiles(steps) {
|
|
|
104
104
|
}
|
|
105
105
|
|
|
106
106
|
function buildSessionDefaults(kernelPath) {
|
|
107
|
-
const activationDtype = normalizeKernelDtype(getKernelPathActivationDtype(kernelPath))
|
|
107
|
+
const activationDtype = normalizeKernelDtype(getKernelPathActivationDtype(kernelPath));
|
|
108
|
+
if (!activationDtype) {
|
|
109
|
+
throw new Error('execution-v0 manifest: kernel path is missing activationDtype.');
|
|
110
|
+
}
|
|
108
111
|
const outputDtype = normalizeKernelDtype(getKernelPathOutputDtype(kernelPath)) ?? activationDtype;
|
|
109
112
|
const kvDtype = normalizeKernelDtype(getKernelPathKVDtype(kernelPath)) ?? activationDtype;
|
|
110
113
|
return {
|
package/src/converter/index.d.ts
CHANGED
package/src/converter/index.js
CHANGED
|
@@ -268,6 +268,19 @@ function resolveQueryPreAttnScalar(preset, modelConfig, headDim) {
|
|
|
268
268
|
return Math.sqrt(headDim);
|
|
269
269
|
}
|
|
270
270
|
|
|
271
|
+
function detectRmsNormWeightOffset(presetInference, modelConfig, defaults) {
|
|
272
|
+
const modelType = normalizeLayerTypeName(modelConfig?.model_type);
|
|
273
|
+
if (modelType === 'qwen3_5' || modelType === 'qwen3_5_text') {
|
|
274
|
+
return true;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
if (typeof presetInference?.normalization?.rmsNormWeightOffset === 'boolean') {
|
|
278
|
+
return presetInference.normalization.rmsNormWeightOffset;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
return defaults.normalization.rmsNormWeightOffset;
|
|
282
|
+
}
|
|
283
|
+
|
|
271
284
|
// Build normalization config with auto-detection from tensor names.
|
|
272
285
|
// Priority: auto-detected > preset > default
|
|
273
286
|
function buildNormalizationConfig(presetInference, modelConfig, defaults, tensorNames) {
|
|
@@ -278,7 +291,7 @@ function buildNormalizationConfig(presetInference, modelConfig, defaults, tensor
|
|
|
278
291
|
modelConfig.rms_norm_eps ??
|
|
279
292
|
modelConfig.attentionLayerNormRMSEpsilon ??
|
|
280
293
|
defaults.normalization.rmsNormEps,
|
|
281
|
-
rmsNormWeightOffset: presetInference
|
|
294
|
+
rmsNormWeightOffset: detectRmsNormWeightOffset(presetInference, modelConfig, defaults),
|
|
282
295
|
// For norm flags: auto-detected > preset > default
|
|
283
296
|
postAttentionNorm: detected.postAttentionNorm ?? presetInference.normalization?.postAttentionNorm ?? defaults.normalization.postAttentionNorm,
|
|
284
297
|
preFeedforwardNorm: detected.preFeedforwardNorm ?? presetInference.normalization?.preFeedforwardNorm ?? defaults.normalization.preFeedforwardNorm,
|
|
@@ -303,26 +316,44 @@ function resolveKernelPathFromPreset(presetInference, quantizationInfo, q4kLayou
|
|
|
303
316
|
}
|
|
304
317
|
|
|
305
318
|
const weightKey = normalizeKernelDtype(quantizationInfo?.weights);
|
|
306
|
-
const computeKey = normalizeKernelDtype(quantizationInfo?.compute)
|
|
307
|
-
|
|
308
|
-
const entry =
|
|
319
|
+
const computeKey = normalizeKernelDtype(quantizationInfo?.compute);
|
|
320
|
+
const hasWeightEntry = weightKey != null && Object.prototype.hasOwnProperty.call(kernelPaths, weightKey);
|
|
321
|
+
const entry = hasWeightEntry ? kernelPaths[weightKey] : kernelPaths.default;
|
|
322
|
+
const weightLabel = weightKey ? `.${weightKey}` : '';
|
|
309
323
|
let resolved = null;
|
|
324
|
+
if (entry == null) {
|
|
325
|
+
return presetInference?.kernelPath ?? null;
|
|
326
|
+
}
|
|
327
|
+
|
|
310
328
|
if (typeof entry === 'string') {
|
|
311
329
|
resolved = entry;
|
|
312
|
-
} else if (entry && computeKey && entry
|
|
330
|
+
} else if (entry && computeKey && Object.prototype.hasOwnProperty.call(entry, computeKey)) {
|
|
313
331
|
resolved = entry[computeKey];
|
|
314
|
-
} else if (entry && entry.default) {
|
|
332
|
+
} else if (entry && typeof entry === 'object' && !Array.isArray(entry) && Object.prototype.hasOwnProperty.call(entry, 'default')) {
|
|
315
333
|
resolved = entry.default;
|
|
334
|
+
} else if (entry && typeof entry === 'object' && !Array.isArray(entry) && !computeKey) {
|
|
335
|
+
throw new Error(
|
|
336
|
+
`Preset kernelPaths${weightLabel} requires quantizationInfo.compute ` +
|
|
337
|
+
'to resolve a compute-specific defaultKernelPath.'
|
|
338
|
+
);
|
|
339
|
+
} else if (entry && typeof entry === 'object' && !Array.isArray(entry)) {
|
|
340
|
+
throw new Error(
|
|
341
|
+
`Preset kernelPaths${weightLabel} is missing compute "${computeKey}". ` +
|
|
342
|
+
'Add an explicit compute-specific mapping or default instead of relying on JS fallbacks.'
|
|
343
|
+
);
|
|
316
344
|
} else {
|
|
317
|
-
|
|
345
|
+
throw new Error(
|
|
346
|
+
`Preset kernelPaths${weightLabel} must resolve to a string or object.`
|
|
347
|
+
);
|
|
318
348
|
}
|
|
319
349
|
|
|
320
|
-
//
|
|
321
|
-
//
|
|
350
|
+
// Column-wise Q4K must be mapped explicitly in preset JSON; JS must not
|
|
351
|
+
// rewrite kernel-path ids to infer policy.
|
|
322
352
|
if (resolved && q4kLayout === 'col' && resolved.includes('-fused-')) {
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
353
|
+
throw new Error(
|
|
354
|
+
`Preset kernelPaths${weightKey ? `.${weightKey}` : ''} resolved fused kernel path "${resolved}" ` +
|
|
355
|
+
'for q4k layout "col". Add an explicit dequant kernel path mapping to the preset instead of relying on JS rewrites.'
|
|
356
|
+
);
|
|
326
357
|
}
|
|
327
358
|
|
|
328
359
|
return resolved;
|
|
@@ -261,9 +261,6 @@ export async function parseDiffusionModel(adapter) {
|
|
|
261
261
|
}
|
|
262
262
|
const configSuffix = defaultConfigPath(componentId);
|
|
263
263
|
const config = await readJson(configSuffix, `${componentId} config`);
|
|
264
|
-
if (componentId === 'transformer' && config && !config.weight_format) {
|
|
265
|
-
config.weight_format = 'diffusers';
|
|
266
|
-
}
|
|
267
264
|
diffusionConfig.components[componentId] = {
|
|
268
265
|
...(diffusionConfig.components[componentId] || {}),
|
|
269
266
|
config,
|
|
@@ -91,6 +91,7 @@ export function buildVariantTag(info) {
|
|
|
91
91
|
const weights = info.weights;
|
|
92
92
|
const embeddings = info.embeddings ?? weights;
|
|
93
93
|
const lmHead = info.lmHead ?? embeddings;
|
|
94
|
+
const compute = info.compute ? normalizeQuantTag(info.compute) : null;
|
|
94
95
|
const experts = info.experts ?? null;
|
|
95
96
|
const layout = info.layout ?? null;
|
|
96
97
|
|
|
@@ -100,30 +101,42 @@ export function buildVariantTag(info) {
|
|
|
100
101
|
? `${weights}${layout === 'row' ? '' : '-col'}`
|
|
101
102
|
: weights;
|
|
102
103
|
|
|
103
|
-
const parts = [
|
|
104
|
+
const parts = [weightTag];
|
|
105
|
+
const groupedRolesByDtype = new Map();
|
|
106
|
+
const GROUPED_ROLE_ORDER = ['e', 'h', 'a'];
|
|
104
107
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
+
const addGroupedRole = (role, dtype) => {
|
|
109
|
+
if (!dtype || dtype === weights) return;
|
|
110
|
+
const existing = groupedRolesByDtype.get(dtype) ?? [];
|
|
111
|
+
if (!existing.includes(role)) {
|
|
112
|
+
existing.push(role);
|
|
113
|
+
groupedRolesByDtype.set(dtype, existing);
|
|
114
|
+
}
|
|
115
|
+
};
|
|
108
116
|
|
|
109
|
-
|
|
110
|
-
|
|
117
|
+
addGroupedRole('e', embeddings);
|
|
118
|
+
addGroupedRole('h', lmHead);
|
|
119
|
+
addGroupedRole('a', compute);
|
|
120
|
+
|
|
121
|
+
for (const [dtype, roles] of groupedRolesByDtype.entries()) {
|
|
122
|
+
const orderedRoles = GROUPED_ROLE_ORDER.filter((role) => roles.includes(role));
|
|
123
|
+
parts.push(`${orderedRoles.join('')}${dtype}`);
|
|
111
124
|
}
|
|
112
125
|
|
|
113
126
|
if (experts && experts !== weights) {
|
|
114
127
|
parts.push(`x${experts}`);
|
|
115
128
|
}
|
|
116
129
|
|
|
117
|
-
if (info.vision) {
|
|
130
|
+
if (info.vision && info.vision !== weights) {
|
|
118
131
|
parts.push(`v${info.vision}`);
|
|
119
132
|
}
|
|
120
|
-
if (info.audio) {
|
|
121
|
-
parts.push(`
|
|
133
|
+
if (info.audio && info.audio !== weights) {
|
|
134
|
+
parts.push(`audio${info.audio}`);
|
|
122
135
|
}
|
|
123
|
-
if (info.tts) {
|
|
124
|
-
parts.push(`
|
|
136
|
+
if (info.tts && info.tts !== weights) {
|
|
137
|
+
parts.push(`tts${info.tts}`);
|
|
125
138
|
}
|
|
126
|
-
if (info.projector) {
|
|
139
|
+
if (info.projector && info.projector !== weights) {
|
|
127
140
|
parts.push(`p${info.projector}`);
|
|
128
141
|
}
|
|
129
142
|
|
|
@@ -167,9 +180,16 @@ const Q4K_LAYOUT_ALIASES = {
|
|
|
167
180
|
};
|
|
168
181
|
|
|
169
182
|
export function normalizeQ4KLayout(value) {
|
|
170
|
-
if (
|
|
171
|
-
const lower = String(value).toLowerCase().replace(/_/g, '');
|
|
172
|
-
|
|
183
|
+
if (value == null) return null;
|
|
184
|
+
const lower = String(value).trim().toLowerCase().replace(/_/g, '');
|
|
185
|
+
if (!lower) return null;
|
|
186
|
+
const normalized = Q4K_LAYOUT_ALIASES[lower];
|
|
187
|
+
if (!normalized) {
|
|
188
|
+
throw new Error(
|
|
189
|
+
`converter.quantization.q4kLayout must be "row" or "col"; got ${JSON.stringify(value)}.`
|
|
190
|
+
);
|
|
191
|
+
}
|
|
192
|
+
return normalized;
|
|
173
193
|
}
|
|
174
194
|
|
|
175
195
|
export function buildQuantizationInfo(
|
|
@@ -6,10 +6,26 @@ function asObject(value) {
|
|
|
6
6
|
}
|
|
7
7
|
|
|
8
8
|
function asFiniteNumber(value) {
|
|
9
|
+
if (value == null || value === '') {
|
|
10
|
+
return null;
|
|
11
|
+
}
|
|
9
12
|
const parsed = Number(value);
|
|
10
13
|
return Number.isFinite(parsed) ? parsed : null;
|
|
11
14
|
}
|
|
12
15
|
|
|
16
|
+
function asBoolean(value) {
|
|
17
|
+
return typeof value === 'boolean' ? value : null;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function asNumberArray(value) {
|
|
21
|
+
if (!Array.isArray(value)) return null;
|
|
22
|
+
const normalized = value.map((entry) => asFiniteNumber(entry));
|
|
23
|
+
if (normalized.some((entry) => entry == null || entry <= 0)) {
|
|
24
|
+
return null;
|
|
25
|
+
}
|
|
26
|
+
return normalized.map((entry) => Math.trunc(entry));
|
|
27
|
+
}
|
|
28
|
+
|
|
13
29
|
function normalizeRoPEType(value) {
|
|
14
30
|
if (typeof value !== 'string') return null;
|
|
15
31
|
const normalized = value.trim().toLowerCase();
|
|
@@ -125,6 +141,13 @@ function failOnConflictingScaling(sourceLabel, canonicalScaling, candidateScalin
|
|
|
125
141
|
export function buildRoPEConfig(presetInference, config) {
|
|
126
142
|
const ropeScaling = asObject(config.rope_scaling);
|
|
127
143
|
const ropeParameters = asObject(config.rope_parameters);
|
|
144
|
+
const flatRoPEParameters = (
|
|
145
|
+
ropeParameters
|
|
146
|
+
&& !asObject(ropeParameters.full_attention)
|
|
147
|
+
&& !asObject(ropeParameters.sliding_attention)
|
|
148
|
+
)
|
|
149
|
+
? ropeParameters
|
|
150
|
+
: null;
|
|
128
151
|
const fullAttentionRoPE = asObject(ropeParameters?.full_attention);
|
|
129
152
|
const slidingAttentionRoPE = asObject(ropeParameters?.sliding_attention);
|
|
130
153
|
const presetRoPE = presetInference.rope ?? {};
|
|
@@ -164,6 +187,11 @@ export function buildRoPEConfig(presetInference, config) {
|
|
|
164
187
|
strictMissingTypeAndFactor: false,
|
|
165
188
|
sourceLabel: 'HF config rope_parameters.full_attention',
|
|
166
189
|
});
|
|
190
|
+
} else if (flatRoPEParameters) {
|
|
191
|
+
globalScaling = resolveScalingConfig(flatRoPEParameters, {
|
|
192
|
+
strictMissingTypeAndFactor: false,
|
|
193
|
+
sourceLabel: 'HF config rope_parameters',
|
|
194
|
+
});
|
|
167
195
|
}
|
|
168
196
|
|
|
169
197
|
const hasPresetLocalScaling = presetRoPE.ropeLocalScalingType !== undefined
|
|
@@ -192,6 +220,7 @@ export function buildRoPEConfig(presetInference, config) {
|
|
|
192
220
|
// HF config is source of truth for ropeTheta when provided:
|
|
193
221
|
// prefer rope_parameters.full_attention.rope_theta, then rope_theta.
|
|
194
222
|
const ropeTheta = asFiniteNumber(fullAttentionRoPE?.rope_theta)
|
|
223
|
+
?? asFiniteNumber(flatRoPEParameters?.rope_theta)
|
|
195
224
|
?? asFiniteNumber(config.rope_theta)
|
|
196
225
|
?? presetInference.rope?.ropeTheta
|
|
197
226
|
?? 10000;
|
|
@@ -201,9 +230,22 @@ export function buildRoPEConfig(presetInference, config) {
|
|
|
201
230
|
?? presetInference.rope?.ropeLocalTheta
|
|
202
231
|
?? null;
|
|
203
232
|
|
|
233
|
+
const mropeInterleaved = asBoolean(flatRoPEParameters?.mrope_interleaved)
|
|
234
|
+
?? presetInference.rope?.mropeInterleaved
|
|
235
|
+
?? false;
|
|
236
|
+
const mropeSection = asNumberArray(flatRoPEParameters?.mrope_section)
|
|
237
|
+
?? presetInference.rope?.mropeSection
|
|
238
|
+
?? null;
|
|
239
|
+
const partialRotaryFactor = asFiniteNumber(flatRoPEParameters?.partial_rotary_factor)
|
|
240
|
+
?? asFiniteNumber(presetInference.rope?.partialRotaryFactor)
|
|
241
|
+
?? null;
|
|
242
|
+
|
|
204
243
|
return {
|
|
205
244
|
ropeTheta,
|
|
206
245
|
ropeLocalTheta,
|
|
246
|
+
mropeInterleaved,
|
|
247
|
+
mropeSection,
|
|
248
|
+
partialRotaryFactor,
|
|
207
249
|
ropeScalingType: globalScaling.ropeScalingType,
|
|
208
250
|
ropeScalingFactor: globalScaling.ropeScalingFactor,
|
|
209
251
|
yarnBetaFast: globalScaling.yarnBetaFast,
|
|
@@ -399,7 +399,10 @@ function bytesToHex(bytes) {
|
|
|
399
399
|
}
|
|
400
400
|
|
|
401
401
|
|
|
402
|
-
export function sortTensorsByGroup(tensors, modelType
|
|
402
|
+
export function sortTensorsByGroup(tensors, modelType) {
|
|
403
|
+
if (typeof modelType !== 'string' || modelType.trim().length === 0) {
|
|
404
|
+
throw new Error('sortTensorsByGroup requires an explicit modelType.');
|
|
405
|
+
}
|
|
403
406
|
return [...tensors].sort((a, b) => {
|
|
404
407
|
const groupA = classifyTensor(a.name, modelType);
|
|
405
408
|
const groupB = classifyTensor(b.name, modelType);
|