@simulatte/doppler 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +145 -0
- package/README.md +16 -23
- package/package.json +30 -32
- package/src/adapters/adapter-registry.js +12 -1
- package/src/adapters/lora-loader.js +23 -6
- package/src/bridge/extension-client.d.ts +5 -0
- package/src/bridge/extension-client.js +40 -0
- package/src/bridge/index.d.ts +2 -1
- package/src/bridge/index.js +6 -4
- package/src/browser/browser-converter.js +31 -1
- package/src/browser/file-picker.js +6 -0
- package/src/browser/safetensors-parser-browser.js +84 -1
- package/src/browser/shard-io-browser.js +2 -2
- package/src/browser/tensor-source-download.js +8 -2
- package/src/browser/tensor-source-http.d.ts +1 -0
- package/src/browser/tensor-source-http.js +5 -1
- package/src/client/doppler-api.browser.js +20 -4
- package/src/client/doppler-api.js +19 -3
- package/src/client/doppler-provider/generation.js +12 -0
- package/src/client/doppler-provider/model-manager.d.ts +10 -0
- package/src/client/doppler-provider/model-manager.js +91 -19
- package/src/client/doppler-provider/source-runtime.d.ts +2 -1
- package/src/client/doppler-provider/source-runtime.js +132 -13
- package/src/client/doppler-registry.json +5 -20
- package/src/config/backward-registry-loader.js +17 -2
- package/src/config/execution-v0-contract-check.js +113 -15
- package/src/config/kernel-path-contract-check.js +57 -29
- package/src/config/kernel-path-loader.d.ts +5 -0
- package/src/config/kernel-path-loader.js +18 -36
- package/src/config/kernels/kernel-ref-digests.js +1 -1
- package/src/config/kernels/registry.js +14 -1
- package/src/config/kernels/registry.json +81 -5
- package/src/config/loader.d.ts +1 -1
- package/src/config/loader.js +15 -2
- package/src/config/merge-contract-check.js +66 -4
- package/src/config/merge-helpers.js +128 -7
- package/src/config/merge.d.ts +1 -0
- package/src/config/merge.js +10 -0
- package/src/config/param-validator.js +47 -2
- package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
- package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json +56 -0
- package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json +61 -0
- package/src/config/presets/kernel-paths/registry.json +43 -8
- package/src/config/presets/models/gemma2.json +3 -2
- package/src/config/presets/models/gemma3.json +2 -0
- package/src/config/presets/models/qwen3.json +4 -3
- package/src/config/presets/models/qwen3_5.json +16 -0
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
- package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
- package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
- package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
- package/src/config/presets/runtime/model/qwen3-5-layer-probe.json +52 -0
- package/src/config/presets/runtime/model/qwen3-5-linear-attn-debug.json +90 -0
- package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
- package/src/config/runtime.js +6 -1
- package/src/config/schema/conversion.schema.d.ts +1 -0
- package/src/config/schema/debug.schema.d.ts +5 -0
- package/src/config/schema/doppler.schema.js +16 -21
- package/src/config/schema/inference-defaults.schema.js +3 -3
- package/src/config/schema/kernel-path.schema.d.ts +5 -1
- package/src/config/schema/kernel-thresholds.schema.js +12 -4
- package/src/config/schema/manifest.schema.d.ts +3 -2
- package/src/config/schema/manifest.schema.js +17 -4
- package/src/config/schema/storage.schema.js +1 -1
- package/src/config/training-defaults.js +30 -22
- package/src/converter/conversion-plan.js +104 -11
- package/src/converter/core.d.ts +7 -0
- package/src/converter/core.js +16 -9
- package/src/converter/execution-v0-manifest.js +4 -1
- package/src/converter/index.d.ts +1 -0
- package/src/converter/index.js +1 -0
- package/src/converter/manifest-inference.js +50 -29
- package/src/converter/parsers/diffusion.js +0 -3
- package/src/converter/parsers/transformer.js +4 -0
- package/src/converter/quantization-info.js +40 -16
- package/src/converter/quantizer.js +19 -12
- package/src/converter/rope-config.js +8 -6
- package/src/converter/shard-packer.d.ts +1 -1
- package/src/converter/shard-packer.js +4 -1
- package/src/converter/tokenizer-utils.d.ts +1 -0
- package/src/converter/tokenizer-utils.js +4 -1
- package/src/debug/config.js +123 -11
- package/src/debug/reference/hf_qwen35_linear_attn_debug.py +268 -0
- package/src/debug/signals.js +7 -1
- package/src/debug/tensor.d.ts +2 -0
- package/src/debug/tensor.js +13 -2
- package/src/distribution/p2p-control-plane.js +52 -12
- package/src/distribution/p2p-observability.js +43 -7
- package/src/distribution/p2p-webrtc-browser.js +20 -0
- package/src/distribution/shard-delivery.js +83 -27
- package/src/formats/gguf/types.js +33 -16
- package/src/formats/rdrr/groups.d.ts +12 -4
- package/src/formats/rdrr/groups.js +3 -6
- package/src/formats/rdrr/parsing.d.ts +4 -0
- package/src/formats/rdrr/parsing.js +53 -3
- package/src/formats/rdrr/types.d.ts +2 -1
- package/src/gpu/command-recorder.js +86 -61
- package/src/gpu/device.d.ts +1 -0
- package/src/gpu/device.js +73 -19
- package/src/gpu/kernel-tuner/benchmarks.js +326 -316
- package/src/gpu/kernel-tuner/cache.js +71 -4
- package/src/gpu/kernel-tuner/tuner.js +22 -4
- package/src/gpu/kernels/attention.js +15 -34
- package/src/gpu/kernels/backward/adam.js +62 -58
- package/src/gpu/kernels/backward/attention_backward.js +257 -169
- package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
- package/src/gpu/kernels/cast.js +191 -149
- package/src/gpu/kernels/check-stop.js +33 -44
- package/src/gpu/kernels/conv2d.js +27 -17
- package/src/gpu/kernels/cross_entropy_loss.js +21 -15
- package/src/gpu/kernels/depthwise_conv2d.js +36 -26
- package/src/gpu/kernels/dequant.js +178 -126
- package/src/gpu/kernels/energy.d.ts +3 -21
- package/src/gpu/kernels/energy.js +111 -88
- package/src/gpu/kernels/feature-check.js +1 -1
- package/src/gpu/kernels/fused_ffn.js +84 -65
- package/src/gpu/kernels/fused_matmul_residual.js +56 -33
- package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
- package/src/gpu/kernels/gather.js +33 -15
- package/src/gpu/kernels/gelu.js +19 -11
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
- package/src/gpu/kernels/groupnorm.js +34 -23
- package/src/gpu/kernels/index.d.ts +8 -0
- package/src/gpu/kernels/index.js +6 -0
- package/src/gpu/kernels/kv-quantize.js +5 -2
- package/src/gpu/kernels/layernorm.js +35 -19
- package/src/gpu/kernels/logit-merge.js +5 -3
- package/src/gpu/kernels/matmul-selection.js +47 -4
- package/src/gpu/kernels/matmul.d.ts +2 -0
- package/src/gpu/kernels/matmul.js +59 -40
- package/src/gpu/kernels/modulate.js +23 -15
- package/src/gpu/kernels/moe.js +221 -175
- package/src/gpu/kernels/pixel_shuffle.js +22 -14
- package/src/gpu/kernels/relu.js +18 -10
- package/src/gpu/kernels/repeat_channels.js +25 -17
- package/src/gpu/kernels/residual.js +37 -27
- package/src/gpu/kernels/rmsnorm.js +66 -43
- package/src/gpu/kernels/rope.js +3 -0
- package/src/gpu/kernels/sample.js +27 -38
- package/src/gpu/kernels/sana_linear_attention.js +18 -10
- package/src/gpu/kernels/scale.js +18 -11
- package/src/gpu/kernels/shader-cache.js +4 -2
- package/src/gpu/kernels/silu.js +120 -72
- package/src/gpu/kernels/softmax.js +44 -25
- package/src/gpu/kernels/split_qg.d.ts +50 -0
- package/src/gpu/kernels/split_qg.js +46 -0
- package/src/gpu/kernels/split_qg.wgsl +58 -0
- package/src/gpu/kernels/split_qg_f16.wgsl +62 -0
- package/src/gpu/kernels/split_qkv.js +23 -13
- package/src/gpu/kernels/transpose.js +18 -10
- package/src/gpu/kernels/transpose.wgsl +5 -3
- package/src/gpu/kernels/upsample2d.js +21 -13
- package/src/gpu/kernels/utils.js +20 -13
- package/src/gpu/partitioned-buffer-pool.js +10 -2
- package/src/gpu/perf-guards.js +2 -9
- package/src/gpu/profiler.js +27 -22
- package/src/gpu/readback-utils.d.ts +16 -0
- package/src/gpu/readback-utils.js +41 -0
- package/src/gpu/submit-tracker.js +13 -0
- package/src/gpu/uniform-cache.d.ts +1 -0
- package/src/gpu/uniform-cache.js +30 -9
- package/src/gpu/weight-buffer.d.ts +1 -1
- package/src/gpu/weight-buffer.js +1 -1
- package/src/hotswap/intent-bundle.js +6 -0
- package/src/hotswap/manifest.d.ts +10 -1
- package/src/hotswap/manifest.js +12 -2
- package/src/hotswap/runtime.js +30 -8
- package/src/index-browser.d.ts +44 -0
- package/src/index-browser.js +14 -0
- package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
- package/src/inference/browser-harness-contract-helpers.js +28 -0
- package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
- package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
- package/src/inference/browser-harness-model-helpers.d.ts +16 -0
- package/src/inference/browser-harness-model-helpers.js +217 -0
- package/src/inference/browser-harness-report-helpers.d.ts +7 -0
- package/src/inference/browser-harness-report-helpers.js +42 -0
- package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
- package/src/inference/browser-harness-runtime-helpers.js +415 -0
- package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
- package/src/inference/browser-harness-suite-helpers.js +268 -0
- package/src/inference/browser-harness-text-helpers.d.ts +27 -0
- package/src/inference/browser-harness-text-helpers.js +788 -0
- package/src/inference/browser-harness.d.ts +8 -0
- package/src/inference/browser-harness.js +149 -1996
- package/src/inference/kv-cache/base.js +140 -94
- package/src/inference/kv-cache/tiered.js +5 -3
- package/src/inference/moe-router.js +88 -56
- package/src/inference/multi-model-network.js +5 -3
- package/src/inference/network-evolution.d.ts +11 -2
- package/src/inference/network-evolution.js +20 -21
- package/src/inference/pipelines/context.d.ts +3 -0
- package/src/inference/pipelines/context.js +142 -2
- package/src/inference/pipelines/diffusion/helpers.js +10 -2
- package/src/inference/pipelines/diffusion/pipeline.js +2 -1
- package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +8 -2
- package/src/inference/pipelines/diffusion/vae.js +3 -7
- package/src/inference/pipelines/energy/pipeline.js +27 -21
- package/src/inference/pipelines/energy/quintel.d.ts +5 -0
- package/src/inference/pipelines/energy/quintel.js +11 -0
- package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
- package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
- package/src/inference/pipelines/text/attention/output-projection.d.ts +12 -0
- package/src/inference/pipelines/text/attention/output-projection.js +8 -0
- package/src/inference/pipelines/text/attention/projections.d.ts +10 -1
- package/src/inference/pipelines/text/attention/projections.js +192 -112
- package/src/inference/pipelines/text/attention/record.js +77 -14
- package/src/inference/pipelines/text/attention/run.js +112 -14
- package/src/inference/pipelines/text/config.js +17 -4
- package/src/inference/pipelines/text/embed.js +2 -8
- package/src/inference/pipelines/text/execution-plan.js +46 -23
- package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
- package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
- package/src/inference/pipelines/text/execution-v0.js +62 -1013
- package/src/inference/pipelines/text/generator-runtime.js +5 -0
- package/src/inference/pipelines/text/generator-steps.d.ts +52 -0
- package/src/inference/pipelines/text/generator-steps.js +340 -221
- package/src/inference/pipelines/text/generator.js +56 -40
- package/src/inference/pipelines/text/init.d.ts +13 -0
- package/src/inference/pipelines/text/init.js +94 -25
- package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
- package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
- package/src/inference/pipelines/text/kernel-trace.js +6 -0
- package/src/inference/pipelines/text/layer.js +4 -9
- package/src/inference/pipelines/text/linear-attention.d.ts +15 -0
- package/src/inference/pipelines/text/linear-attention.js +113 -9
- package/src/inference/pipelines/text/logits/gpu.js +12 -7
- package/src/inference/pipelines/text/logits/index.d.ts +6 -1
- package/src/inference/pipelines/text/logits/index.js +13 -12
- package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
- package/src/inference/pipelines/text/logits/utils.js +9 -0
- package/src/inference/pipelines/text/lora-apply.js +50 -32
- package/src/inference/pipelines/text/model-load.js +282 -104
- package/src/inference/pipelines/text/moe-cache.js +5 -4
- package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
- package/src/inference/pipelines/text/moe-cpu.js +42 -38
- package/src/inference/pipelines/text/moe-gpu.js +110 -86
- package/src/inference/pipelines/text/ops.js +90 -90
- package/src/inference/pipelines/text/probes.js +9 -9
- package/src/inference/pipelines/text/sampling.js +52 -6
- package/src/inference/pipelines/text/weights.js +17 -7
- package/src/inference/pipelines/text.js +13 -1
- package/src/inference/speculative.d.ts +2 -2
- package/src/inference/speculative.js +4 -18
- package/src/inference/test-harness.d.ts +1 -1
- package/src/inference/test-harness.js +17 -7
- package/src/inference/tokenizer.d.ts +0 -5
- package/src/inference/tokenizer.js +4 -23
- package/src/inference/tokenizers/bpe.js +9 -0
- package/src/inference/tokenizers/bundled.js +20 -0
- package/src/inference/tokenizers/sentencepiece.js +12 -0
- package/src/loader/doppler-loader.js +38 -22
- package/src/loader/dtype-utils.js +3 -44
- package/src/loader/embedding-loader.js +7 -3
- package/src/loader/experts/expert-cache.js +13 -6
- package/src/loader/experts/expert-loader.js +10 -6
- package/src/loader/final-weights-loader.js +10 -4
- package/src/loader/layer-loader.js +2 -1
- package/src/loader/loader-state.js +2 -2
- package/src/loader/memory-monitor.js +8 -0
- package/src/loader/multi-model-loader.d.ts +14 -0
- package/src/loader/multi-model-loader.js +70 -24
- package/src/loader/shard-cache.js +84 -14
- package/src/loader/shard-resolver.js +25 -3
- package/src/loader/tensors/tensor-loader.js +214 -144
- package/src/loader/tensors/tensor-reader.js +76 -19
- package/src/loader/weight-downcast.js +1 -1
- package/src/memory/buffer-pool.d.ts +9 -1
- package/src/memory/buffer-pool.js +109 -44
- package/src/memory/unified-detect.js +1 -1
- package/src/rules/inference/dtype.rules.json +5 -0
- package/src/rules/inference/kernel-path.rules.json +24 -8
- package/src/rules/kernels/split-qg.rules.json +6 -0
- package/src/rules/rule-registry.js +27 -1
- package/src/storage/backends/opfs-store.js +68 -24
- package/src/storage/downloader.js +365 -83
- package/src/storage/index.d.ts +3 -0
- package/src/storage/index.js +3 -0
- package/src/storage/preflight.d.ts +2 -2
- package/src/storage/preflight.js +24 -2
- package/src/storage/quickstart-downloader.js +11 -5
- package/src/storage/registry.js +10 -4
- package/src/storage/reports.js +1 -1
- package/src/storage/shard-manager.d.ts +15 -1
- package/src/storage/shard-manager.js +55 -6
- package/src/storage/source-artifact-store.d.ts +52 -0
- package/src/storage/source-artifact-store.js +234 -0
- package/src/tooling/command-api-constants.d.ts +9 -0
- package/src/tooling/command-api-constants.js +9 -0
- package/src/tooling/command-api-family-normalizers.d.ts +9 -0
- package/src/tooling/command-api-family-normalizers.js +343 -0
- package/src/tooling/command-api-helpers.d.ts +25 -0
- package/src/tooling/command-api-helpers.js +262 -0
- package/src/tooling/command-api.js +16 -602
- package/src/tooling/command-envelope.js +4 -1
- package/src/tooling/command-runner-shared.js +52 -18
- package/src/tooling/conversion-config-materializer.js +3 -5
- package/src/tooling/lean-execution-contract.js +150 -3
- package/src/tooling/node-browser-command-runner.js +161 -271
- package/src/tooling/node-command-runner.js +29 -3
- package/src/tooling/node-converter.js +30 -1
- package/src/tooling/node-source-runtime.d.ts +1 -1
- package/src/tooling/node-source-runtime.js +120 -3
- package/src/tooling/node-webgpu.js +24 -21
- package/src/tooling/opfs-cache.js +21 -4
- package/src/tooling/runtime-input-composition.d.ts +38 -0
- package/src/tooling/runtime-input-composition.js +86 -0
- package/src/tooling/source-runtime-bundle.d.ts +40 -5
- package/src/tooling/source-runtime-bundle.js +261 -34
- package/src/tooling/source-runtime-materializer.d.ts +6 -0
- package/src/tooling/source-runtime-materializer.js +93 -0
- package/src/training/attention-backward.js +32 -17
- package/src/training/autograd.js +80 -52
- package/src/training/checkpoint-watch.d.ts +2 -1
- package/src/training/checkpoint-watch.js +39 -6
- package/src/training/checkpoint.js +40 -11
- package/src/training/clip.js +2 -1
- package/src/training/datasets/token-batch.js +20 -8
- package/src/training/distillation/checkpoint-watch.js +1 -0
- package/src/training/distillation/student-fixture.d.ts +22 -0
- package/src/training/distillation/student-fixture.js +846 -0
- package/src/training/distillation/suite-data.d.ts +45 -0
- package/src/training/distillation/suite-data.js +189 -0
- package/src/training/lora-pipeline.js +4 -7
- package/src/training/lora.js +26 -12
- package/src/training/loss.js +5 -6
- package/src/training/objectives/cross_entropy.js +2 -5
- package/src/training/objectives/distill_kd.js +4 -8
- package/src/training/objectives/distill_triplet.js +4 -8
- package/src/training/objectives/ul_stage2_base.js +4 -8
- package/src/training/operator-command.js +2 -0
- package/src/training/optimizer.js +19 -7
- package/src/training/runner.js +2 -1
- package/src/training/suite.js +18 -978
- package/src/training/tensor-factory.d.ts +9 -0
- package/src/training/tensor-factory.js +13 -0
- package/src/training/trainer.js +3 -5
- package/src/training/ul_dataset.js +3 -5
- package/src/training/workloads.js +70 -79
- package/src/types/model.d.ts +5 -0
- package/src/version.js +1 -1
- package/tools/convert-safetensors-node.js +22 -16
- package/tools/doppler-cli.js +50 -26
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { MB } from './units.schema.js';
|
|
2
|
+
import { validateRequiredInferenceFields } from '../../inference/pipelines/text/config.js';
|
|
2
3
|
|
|
3
4
|
// =============================================================================
|
|
4
5
|
// Hash & Versioning
|
|
@@ -39,7 +40,7 @@ export const DEFAULT_MANIFEST_INFERENCE = {
|
|
|
39
40
|
schema: null,
|
|
40
41
|
presetId: null,
|
|
41
42
|
attention: {
|
|
42
|
-
queryPreAttnScalar:
|
|
43
|
+
queryPreAttnScalar: 64, // headDim for standard 64-dim heads; attnScale = 1/sqrt(scalar)
|
|
43
44
|
attnLogitSoftcapping: null, // No softcapping (null = disabled)
|
|
44
45
|
slidingWindow: null, // Full attention (null = no sliding window)
|
|
45
46
|
queryKeyNorm: false,
|
|
@@ -70,9 +71,9 @@ export const DEFAULT_MANIFEST_INFERENCE = {
|
|
|
70
71
|
ropeLocalScalingType: null, // Local scaling policy (null = no scaling)
|
|
71
72
|
ropeLocalScalingFactor: 1.0,
|
|
72
73
|
// YARN parameters - only relevant when ropeScalingType='yarn'
|
|
73
|
-
yarnBetaFast:
|
|
74
|
-
yarnBetaSlow:
|
|
75
|
-
yarnOriginalMaxPos:
|
|
74
|
+
yarnBetaFast: null,
|
|
75
|
+
yarnBetaSlow: null,
|
|
76
|
+
yarnOriginalMaxPos: null,
|
|
76
77
|
// Local YARN parameters - only relevant when ropeLocalScalingType='yarn'
|
|
77
78
|
ropeLocalYarnBetaFast: null,
|
|
78
79
|
ropeLocalYarnBetaSlow: null,
|
|
@@ -124,6 +125,18 @@ export function validateManifestInference(
|
|
|
124
125
|
`Please re-convert the model using the latest converter.`
|
|
125
126
|
);
|
|
126
127
|
}
|
|
128
|
+
|
|
129
|
+
if (manifest.modelType === 'diffusion' || manifest.modelType === 'energy') {
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const inference = typeof structuredClone === 'function'
|
|
134
|
+
? structuredClone(manifest.inference)
|
|
135
|
+
: JSON.parse(JSON.stringify(manifest.inference));
|
|
136
|
+
validateRequiredInferenceFields(
|
|
137
|
+
inference,
|
|
138
|
+
manifest.modelId ?? 'unknown'
|
|
139
|
+
);
|
|
127
140
|
}
|
|
128
141
|
|
|
129
142
|
export function hasInferenceConfig(
|
|
@@ -35,7 +35,7 @@ export const DEFAULT_STORAGE_ALIGNMENT_CONFIG = {
|
|
|
35
35
|
export const DEFAULT_STORAGE_BACKEND_CONFIG = {
|
|
36
36
|
backend: 'auto', // auto | opfs | indexeddb | memory
|
|
37
37
|
opfs: {
|
|
38
|
-
useSyncAccessHandle:
|
|
38
|
+
useSyncAccessHandle: false,
|
|
39
39
|
maxConcurrentHandles: 2,
|
|
40
40
|
},
|
|
41
41
|
indexeddb: {
|
|
@@ -2,9 +2,17 @@ import { createDopplerConfig, DEFAULT_TRAINING_SETTINGS } from './schema/index.j
|
|
|
2
2
|
import { validateDistillTrainingConfig } from './schema/distill-training.schema.js';
|
|
3
3
|
import { validateUlTrainingConfig } from './schema/ul-training.schema.js';
|
|
4
4
|
|
|
5
|
+
function cloneConfigTree(value) {
|
|
6
|
+
if (typeof structuredClone === 'function') {
|
|
7
|
+
return structuredClone(value);
|
|
8
|
+
}
|
|
9
|
+
return JSON.parse(JSON.stringify(value));
|
|
10
|
+
}
|
|
11
|
+
|
|
5
12
|
function mergeTrainingSettings(base, overrides) {
|
|
13
|
+
const baseConfig = cloneConfigTree(base);
|
|
6
14
|
if (!overrides) {
|
|
7
|
-
const merged =
|
|
15
|
+
const merged = baseConfig;
|
|
8
16
|
validateDistillTrainingConfig(merged.distill);
|
|
9
17
|
validateUlTrainingConfig(merged.ul);
|
|
10
18
|
if (merged.distill.enabled === true && merged.ul.enabled === true) {
|
|
@@ -14,42 +22,42 @@ function mergeTrainingSettings(base, overrides) {
|
|
|
14
22
|
}
|
|
15
23
|
|
|
16
24
|
const merged = {
|
|
17
|
-
enabled: overrides.enabled ??
|
|
18
|
-
lora: { ...
|
|
25
|
+
enabled: overrides.enabled ?? baseConfig.enabled,
|
|
26
|
+
lora: { ...baseConfig.lora, ...overrides.lora },
|
|
19
27
|
optimizer: {
|
|
20
|
-
...
|
|
28
|
+
...baseConfig.optimizer,
|
|
21
29
|
...overrides.optimizer,
|
|
22
|
-
scheduler: { ...
|
|
30
|
+
scheduler: { ...baseConfig.optimizer.scheduler, ...overrides.optimizer?.scheduler },
|
|
23
31
|
},
|
|
24
|
-
gradient: { ...
|
|
25
|
-
precision: { ...
|
|
26
|
-
attention: { ...
|
|
32
|
+
gradient: { ...baseConfig.gradient, ...overrides.gradient },
|
|
33
|
+
precision: { ...baseConfig.precision, ...overrides.precision },
|
|
34
|
+
attention: { ...baseConfig.attention, ...overrides.attention },
|
|
27
35
|
telemetry: {
|
|
28
|
-
...
|
|
36
|
+
...baseConfig.telemetry,
|
|
29
37
|
...overrides.telemetry,
|
|
30
38
|
alerts: {
|
|
31
|
-
...
|
|
39
|
+
...baseConfig.telemetry.alerts,
|
|
32
40
|
...overrides.telemetry?.alerts,
|
|
33
41
|
thresholds: {
|
|
34
|
-
...
|
|
42
|
+
...baseConfig.telemetry.alerts.thresholds,
|
|
35
43
|
...overrides.telemetry?.alerts?.thresholds,
|
|
36
44
|
},
|
|
37
45
|
},
|
|
38
46
|
},
|
|
39
|
-
lossScaling: { ...
|
|
47
|
+
lossScaling: { ...baseConfig.lossScaling, ...overrides.lossScaling },
|
|
40
48
|
distill: {
|
|
41
|
-
...
|
|
49
|
+
...baseConfig.distill,
|
|
42
50
|
...overrides.distill,
|
|
43
|
-
freeze: { ...
|
|
51
|
+
freeze: { ...baseConfig.distill.freeze, ...overrides.distill?.freeze },
|
|
44
52
|
},
|
|
45
53
|
ul: {
|
|
46
|
-
...
|
|
54
|
+
...baseConfig.ul,
|
|
47
55
|
...overrides.ul,
|
|
48
|
-
noiseSchedule: { ...
|
|
49
|
-
priorAlignment: { ...
|
|
50
|
-
decoderSigmoidWeight: { ...
|
|
51
|
-
lossWeights: { ...
|
|
52
|
-
freeze: { ...
|
|
56
|
+
noiseSchedule: { ...baseConfig.ul.noiseSchedule, ...overrides.ul?.noiseSchedule },
|
|
57
|
+
priorAlignment: { ...baseConfig.ul.priorAlignment, ...overrides.ul?.priorAlignment },
|
|
58
|
+
decoderSigmoidWeight: { ...baseConfig.ul.decoderSigmoidWeight, ...overrides.ul?.decoderSigmoidWeight },
|
|
59
|
+
lossWeights: { ...baseConfig.ul.lossWeights, ...overrides.ul?.lossWeights },
|
|
60
|
+
freeze: { ...baseConfig.ul.freeze, ...overrides.ul?.freeze },
|
|
53
61
|
},
|
|
54
62
|
};
|
|
55
63
|
validateDistillTrainingConfig(merged.distill);
|
|
@@ -74,7 +82,7 @@ export function createTrainingConfig(overrides = {}) {
|
|
|
74
82
|
|
|
75
83
|
export const DEFAULT_TRAINING_CONFIG = createTrainingConfig();
|
|
76
84
|
|
|
77
|
-
let trainingConfig =
|
|
85
|
+
let trainingConfig = createTrainingConfig();
|
|
78
86
|
|
|
79
87
|
export function getTrainingConfig() {
|
|
80
88
|
return trainingConfig;
|
|
@@ -86,6 +94,6 @@ export function setTrainingConfig(overrides) {
|
|
|
86
94
|
}
|
|
87
95
|
|
|
88
96
|
export function resetTrainingConfig() {
|
|
89
|
-
trainingConfig =
|
|
97
|
+
trainingConfig = createTrainingConfig();
|
|
90
98
|
return trainingConfig;
|
|
91
99
|
}
|
|
@@ -17,6 +17,8 @@ import { sanitizeModelId } from './core.js';
|
|
|
17
17
|
import { classifyTensorRole } from '../formats/rdrr/index.js';
|
|
18
18
|
import { selectRuleValue } from '../rules/rule-registry.js';
|
|
19
19
|
import { buildKernelRefFromKernelEntry, isKernelRefBoundToKernel } from '../config/kernels/kernel-ref.js';
|
|
20
|
+
import { mergeLayeredShallowObjects } from '../config/merge-helpers.js';
|
|
21
|
+
import { buildExecutionV0ContractArtifact } from '../config/execution-v0-contract-check.js';
|
|
20
22
|
|
|
21
23
|
const KNOWN_MODEL_PRESETS = new Set(listPresets());
|
|
22
24
|
const CONVERSION_SUPPORTED_PRESETS = [...KNOWN_MODEL_PRESETS]
|
|
@@ -115,7 +117,10 @@ function isLikelyEmbeddingGemma(rawConfig, architectureHint) {
|
|
|
115
117
|
|
|
116
118
|
export function inferSourceWeightQuantization(tensors) {
|
|
117
119
|
if (!Array.isArray(tensors) || tensors.length === 0) {
|
|
118
|
-
|
|
120
|
+
throw new Error(
|
|
121
|
+
'Cannot infer source weight quantization: no tensors provided. ' +
|
|
122
|
+
'Set converterConfig.quantization.weights explicitly.'
|
|
123
|
+
);
|
|
119
124
|
}
|
|
120
125
|
const weightTensors = [];
|
|
121
126
|
for (const tensor of tensors) {
|
|
@@ -126,7 +131,12 @@ export function inferSourceWeightQuantization(tensors) {
|
|
|
126
131
|
weightTensors.push({ name, dtype });
|
|
127
132
|
}
|
|
128
133
|
const dtypes = new Set(weightTensors.map((tensor) => tensor.dtype));
|
|
129
|
-
if (dtypes.size === 0)
|
|
134
|
+
if (dtypes.size === 0) {
|
|
135
|
+
throw new Error(
|
|
136
|
+
'Cannot infer source weight quantization: no recognizable weight dtypes found. ' +
|
|
137
|
+
'Set converterConfig.quantization.weights explicitly.'
|
|
138
|
+
);
|
|
139
|
+
}
|
|
130
140
|
if (dtypes.size > 1) {
|
|
131
141
|
const detail = Array.from(dtypes)
|
|
132
142
|
.sort()
|
|
@@ -179,9 +189,6 @@ export function validateDefaultKernelPath(inference, context = {}) {
|
|
|
179
189
|
&& expectedComputeDtype !== kernelActivationDtype
|
|
180
190
|
) {
|
|
181
191
|
const presetId = context?.presetId ?? 'unknown';
|
|
182
|
-
if (presetId === 'lfm2' && expectedComputeDtype === 'f32' && kernelActivationDtype === 'f16') {
|
|
183
|
-
return;
|
|
184
|
-
}
|
|
185
192
|
throw new Error(
|
|
186
193
|
`Invalid defaultKernelPath "${inference.defaultKernelPath}" for preset "${presetId}" ` +
|
|
187
194
|
`(weights=${quantizationInfo?.weights ?? 'unknown'}, compute=${expectedComputeDtype}, ` +
|
|
@@ -208,6 +215,61 @@ function cloneJson(value) {
|
|
|
208
215
|
return JSON.parse(JSON.stringify(value));
|
|
209
216
|
}
|
|
210
217
|
|
|
218
|
+
function mergeExecutionV0SessionDefaults(baseSessionDefaults, overrideSessionDefaults) {
|
|
219
|
+
if (!overrideSessionDefaults) {
|
|
220
|
+
return cloneJson(baseSessionDefaults);
|
|
221
|
+
}
|
|
222
|
+
const base = cloneJson(baseSessionDefaults ?? {});
|
|
223
|
+
const override = cloneJson(overrideSessionDefaults);
|
|
224
|
+
const baseCompute = base.compute ?? {};
|
|
225
|
+
const overrideCompute = override.compute ?? {};
|
|
226
|
+
|
|
227
|
+
return {
|
|
228
|
+
...base,
|
|
229
|
+
...override,
|
|
230
|
+
compute: {
|
|
231
|
+
...baseCompute,
|
|
232
|
+
...overrideCompute,
|
|
233
|
+
defaults: mergeLayeredShallowObjects(
|
|
234
|
+
baseCompute.defaults ?? {},
|
|
235
|
+
overrideCompute.defaults ?? {}
|
|
236
|
+
),
|
|
237
|
+
kernelProfiles: Object.prototype.hasOwnProperty.call(overrideCompute, 'kernelProfiles')
|
|
238
|
+
? overrideCompute.kernelProfiles
|
|
239
|
+
: baseCompute.kernelProfiles,
|
|
240
|
+
},
|
|
241
|
+
kvcache: Object.prototype.hasOwnProperty.call(override, 'kvcache')
|
|
242
|
+
? (
|
|
243
|
+
override.kvcache === null
|
|
244
|
+
? null
|
|
245
|
+
: mergeLayeredShallowObjects(base.kvcache ?? {}, override.kvcache ?? {})
|
|
246
|
+
)
|
|
247
|
+
: base.kvcache,
|
|
248
|
+
decodeLoop: Object.prototype.hasOwnProperty.call(override, 'decodeLoop')
|
|
249
|
+
? (
|
|
250
|
+
override.decodeLoop === null
|
|
251
|
+
? null
|
|
252
|
+
: mergeLayeredShallowObjects(base.decodeLoop ?? {}, override.decodeLoop ?? {})
|
|
253
|
+
)
|
|
254
|
+
: base.decodeLoop,
|
|
255
|
+
};
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
function assertExecutionV0ConversionContract(manifestInference, modelId) {
|
|
259
|
+
if (!manifestInference?.execution) {
|
|
260
|
+
return;
|
|
261
|
+
}
|
|
262
|
+
const artifact = buildExecutionV0ContractArtifact(manifestInference, {
|
|
263
|
+
modelId: modelId ?? 'converted-model',
|
|
264
|
+
});
|
|
265
|
+
if (!artifact?.ok) {
|
|
266
|
+
const detail = artifact?.errors?.join(' ') ?? 'unknown execution-v0 contract error';
|
|
267
|
+
throw new Error(
|
|
268
|
+
`converterConfig.inference produced an invalid execution-v0 contract: ${detail}`
|
|
269
|
+
);
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
211
273
|
function readConverterSessionDefaultsOverride(converterConfig) {
|
|
212
274
|
const raw = converterConfig?.inference?.sessionDefaults;
|
|
213
275
|
if (raw == null) return null;
|
|
@@ -219,6 +281,26 @@ function readConverterSessionDefaultsOverride(converterConfig) {
|
|
|
219
281
|
return cloneJson(raw);
|
|
220
282
|
}
|
|
221
283
|
|
|
284
|
+
function assertNonExecutionSessionDefaults(manifestInference) {
|
|
285
|
+
const sessionDefaults = manifestInference?.sessionDefaults;
|
|
286
|
+
if (sessionDefaults == null) {
|
|
287
|
+
return;
|
|
288
|
+
}
|
|
289
|
+
if (typeof sessionDefaults !== 'object' || Array.isArray(sessionDefaults)) {
|
|
290
|
+
throw new Error(
|
|
291
|
+
'converterConfig.inference.sessionDefaults must resolve to an object for non-execution manifests.'
|
|
292
|
+
);
|
|
293
|
+
}
|
|
294
|
+
const keys = Object.keys(sessionDefaults);
|
|
295
|
+
const invalidKeys = keys.filter((key) => key !== 'decodeLoop');
|
|
296
|
+
if (invalidKeys.length > 0) {
|
|
297
|
+
throw new Error(
|
|
298
|
+
'converterConfig.inference.sessionDefaults may only set decodeLoop unless ' +
|
|
299
|
+
'converterConfig.inference.execution is present.'
|
|
300
|
+
);
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
222
304
|
function readConverterExecutionOverride(converterConfig) {
|
|
223
305
|
const raw = converterConfig?.inference?.execution;
|
|
224
306
|
if (raw == null) return null;
|
|
@@ -331,10 +413,10 @@ function applyConverterInferenceOverrides(manifestInference, converterConfig, co
|
|
|
331
413
|
manifestInference.defaultKernelPath = overrideKernelPath;
|
|
332
414
|
}
|
|
333
415
|
const sessionDefaults = readConverterSessionDefaultsOverride(converterConfig);
|
|
416
|
+
const execution = readConverterExecutionOverride(converterConfig);
|
|
334
417
|
if (sessionDefaults) {
|
|
335
418
|
manifestInference.sessionDefaults = sessionDefaults;
|
|
336
419
|
}
|
|
337
|
-
const execution = readConverterExecutionOverride(converterConfig);
|
|
338
420
|
if (execution) {
|
|
339
421
|
manifestInference.execution = execution;
|
|
340
422
|
}
|
|
@@ -351,17 +433,28 @@ function applyConverterInferenceOverrides(manifestInference, converterConfig, co
|
|
|
351
433
|
const generatedExecution = buildExecutionV0FromKernelPath(manifestInference.defaultKernelPath);
|
|
352
434
|
if (generatedExecution) {
|
|
353
435
|
manifestInference.execution = generatedExecution.execution;
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
436
|
+
manifestInference.sessionDefaults = mergeExecutionV0SessionDefaults(
|
|
437
|
+
generatedExecution.sessionDefaults,
|
|
438
|
+
manifestInference.sessionDefaults
|
|
439
|
+
);
|
|
357
440
|
manifestInference.schema = generatedExecution.schema;
|
|
358
441
|
}
|
|
359
442
|
}
|
|
360
443
|
|
|
361
|
-
if (
|
|
444
|
+
if (execution && !manifestInference.sessionDefaults) {
|
|
445
|
+
throw new Error(
|
|
446
|
+
'converterConfig.inference.execution requires converterConfig.inference.sessionDefaults.'
|
|
447
|
+
);
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
if (manifestInference.execution) {
|
|
362
451
|
manifestInference.schema = EXECUTION_V0_SCHEMA_ID;
|
|
452
|
+
} else {
|
|
453
|
+
assertNonExecutionSessionDefaults(manifestInference);
|
|
454
|
+
manifestInference.schema = null;
|
|
363
455
|
}
|
|
364
456
|
validateDefaultKernelPath(manifestInference, context);
|
|
457
|
+
assertExecutionV0ConversionContract(manifestInference, context?.modelId ?? context?.presetId);
|
|
365
458
|
}
|
|
366
459
|
|
|
367
460
|
export function resolveConversionPlan(options) {
|
|
@@ -418,7 +511,7 @@ export function resolveConversionPlan(options) {
|
|
|
418
511
|
if (!presetOverride && isLikelyEmbeddingGemma(rawConfig, architectureHint)) {
|
|
419
512
|
presetId = 'embeddinggemma';
|
|
420
513
|
}
|
|
421
|
-
if (presetId
|
|
514
|
+
if (!presetId) {
|
|
422
515
|
throw buildUnknownFamilyError(architectureHint, rawConfig, options?.includePresetOverrideHint === true);
|
|
423
516
|
}
|
|
424
517
|
const preset = resolvePreset(presetId);
|
package/src/converter/core.d.ts
CHANGED
|
@@ -168,6 +168,13 @@ export declare const RDRR_VERSION: number;
|
|
|
168
168
|
*/
|
|
169
169
|
export declare function sanitizeModelId(name: string): string | null;
|
|
170
170
|
|
|
171
|
+
/**
|
|
172
|
+
* Resolve bundled tokenizer vocab size from Hugging Face tokenizer.json payloads.
|
|
173
|
+
*/
|
|
174
|
+
export declare function resolveBundledTokenizerVocabSize(
|
|
175
|
+
tokenizerJson: Record<string, unknown> | null | undefined
|
|
176
|
+
): number;
|
|
177
|
+
|
|
171
178
|
/**
|
|
172
179
|
* Format bytes for human-readable display
|
|
173
180
|
*/
|
package/src/converter/core.js
CHANGED
|
@@ -26,6 +26,7 @@ import { buildManifestRequiredInferenceFieldsArtifact } from '../config/required
|
|
|
26
26
|
import { buildManifestInference, inferEmbeddingOutputConfig } from './manifest-inference.js';
|
|
27
27
|
import { resolveEosTokenId } from './tokenizer-utils.js';
|
|
28
28
|
import {
|
|
29
|
+
normalizeQ4KLayout,
|
|
29
30
|
resolveManifestQuantization,
|
|
30
31
|
resolveEffectiveQuantizationInfo,
|
|
31
32
|
} from './quantization-info.js';
|
|
@@ -122,11 +123,6 @@ function bf16ToFloat32(value) {
|
|
|
122
123
|
return view.getFloat32(0, true);
|
|
123
124
|
}
|
|
124
125
|
|
|
125
|
-
function normalizeQ4KLayout(value) {
|
|
126
|
-
const normalized = String(value || '').trim().toLowerCase();
|
|
127
|
-
return normalized === 'col' ? 'col' : 'row';
|
|
128
|
-
}
|
|
129
|
-
|
|
130
126
|
function normalizeTensorName(tensor) {
|
|
131
127
|
const name = tensor?.name;
|
|
132
128
|
return typeof name === 'string' ? name : '';
|
|
@@ -495,6 +491,17 @@ function buildSentencepieceTokenizer(tokenizerConfig, rawConfig, architecture, m
|
|
|
495
491
|
return tokenizer;
|
|
496
492
|
}
|
|
497
493
|
|
|
494
|
+
export function resolveBundledTokenizerVocabSize(tokenizerJson) {
|
|
495
|
+
const vocab = tokenizerJson?.model?.vocab;
|
|
496
|
+
if (Array.isArray(vocab)) {
|
|
497
|
+
return vocab.length;
|
|
498
|
+
}
|
|
499
|
+
if (vocab && typeof vocab === 'object') {
|
|
500
|
+
return Object.keys(vocab).length;
|
|
501
|
+
}
|
|
502
|
+
return 0;
|
|
503
|
+
}
|
|
504
|
+
|
|
498
505
|
|
|
499
506
|
export function sanitizeModelId(name) {
|
|
500
507
|
const sanitized = name
|
|
@@ -976,6 +983,7 @@ export function createManifest(
|
|
|
976
983
|
isDiffusion ? 'diffusion' : extractArchitecture(model.config, model.ggufConfig)
|
|
977
984
|
);
|
|
978
985
|
const rawConfig = model.config || {};
|
|
986
|
+
const generationConfig = model.generationConfig ?? null;
|
|
979
987
|
const resolvedArchitecture = isDiffusion
|
|
980
988
|
? architecture
|
|
981
989
|
: resolveIntermediateSizeFromTensors(architecture, model, tensorLocations, rawConfig, modelId);
|
|
@@ -988,7 +996,7 @@ export function createManifest(
|
|
|
988
996
|
inference = { ...DEFAULT_MANIFEST_INFERENCE, presetId: 'diffusion' };
|
|
989
997
|
} else {
|
|
990
998
|
const presetId = detectPreset(rawConfig, model.architecture);
|
|
991
|
-
if (presetId
|
|
999
|
+
if (!presetId) {
|
|
992
1000
|
const modelType = rawConfig.model_type ?? 'unknown';
|
|
993
1001
|
throw new Error(
|
|
994
1002
|
`Unknown model family: architecture="${model.architecture || 'unknown'}", model_type="${modelType}"\n\n` +
|
|
@@ -1030,6 +1038,7 @@ export function createManifest(
|
|
|
1030
1038
|
? null
|
|
1031
1039
|
: resolveEosTokenId({
|
|
1032
1040
|
config: rawConfig,
|
|
1041
|
+
generationConfig,
|
|
1033
1042
|
tokenizer: model.tokenizer ?? model.tokenizerConfig ?? null,
|
|
1034
1043
|
tokenizerJson: model.tokenizerJson ?? null,
|
|
1035
1044
|
});
|
|
@@ -1070,9 +1079,7 @@ export function createManifest(
|
|
|
1070
1079
|
// Include tokenizer if available
|
|
1071
1080
|
if (model.tokenizerJson) {
|
|
1072
1081
|
const tokenizer = model.tokenizerJson;
|
|
1073
|
-
const vocabSize =
|
|
1074
|
-
tokenizer.model?.vocab?.length ||
|
|
1075
|
-
Object.keys(tokenizer.model?.vocab || {}).length;
|
|
1082
|
+
const vocabSize = resolveBundledTokenizerVocabSize(tokenizer);
|
|
1076
1083
|
if (!vocabSize) {
|
|
1077
1084
|
throw new Error('Tokenizer vocab is missing or empty');
|
|
1078
1085
|
}
|
|
@@ -104,7 +104,10 @@ function buildKernelProfiles(steps) {
|
|
|
104
104
|
}
|
|
105
105
|
|
|
106
106
|
function buildSessionDefaults(kernelPath) {
|
|
107
|
-
const activationDtype = normalizeKernelDtype(getKernelPathActivationDtype(kernelPath))
|
|
107
|
+
const activationDtype = normalizeKernelDtype(getKernelPathActivationDtype(kernelPath));
|
|
108
|
+
if (!activationDtype) {
|
|
109
|
+
throw new Error('execution-v0 manifest: kernel path is missing activationDtype.');
|
|
110
|
+
}
|
|
108
111
|
const outputDtype = normalizeKernelDtype(getKernelPathOutputDtype(kernelPath)) ?? activationDtype;
|
|
109
112
|
const kvDtype = normalizeKernelDtype(getKernelPathKVDtype(kernelPath)) ?? activationDtype;
|
|
110
113
|
return {
|
package/src/converter/index.d.ts
CHANGED
package/src/converter/index.js
CHANGED
|
@@ -240,16 +240,6 @@ function detectAttentionOutputGate(presetInference, modelConfig, defaults) {
|
|
|
240
240
|
return modelConfig.attn_output_gate;
|
|
241
241
|
}
|
|
242
242
|
|
|
243
|
-
const modelType = normalizeLayerTypeName(modelConfig?.model_type);
|
|
244
|
-
const hasLinearAttentionLayers = Array.isArray(modelConfig?.layer_types)
|
|
245
|
-
&& modelConfig.layer_types.some((entry) => normalizeCustomLayerType(entry) === 'linear_attention');
|
|
246
|
-
if (
|
|
247
|
-
hasLinearAttentionLayers
|
|
248
|
-
&& (modelType === 'qwen2' || modelType === 'qwen3_5' || modelType === 'qwen3_5_text')
|
|
249
|
-
) {
|
|
250
|
-
return true;
|
|
251
|
-
}
|
|
252
|
-
|
|
253
243
|
return defaults.attention.attentionOutputGate;
|
|
254
244
|
}
|
|
255
245
|
|
|
@@ -259,13 +249,23 @@ function resolveQueryPreAttnScalar(preset, modelConfig, headDim) {
|
|
|
259
249
|
return explicit;
|
|
260
250
|
}
|
|
261
251
|
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
252
|
+
// Standard attention scaling: attnScale = 1/sqrt(queryPreAttnScalar).
|
|
253
|
+
// For standard transformers queryPreAttnScalar = headDim, giving 1/sqrt(headDim).
|
|
254
|
+
// Preset may override for non-standard models.
|
|
255
|
+
const presetScalar = Number(preset?.inference?.attention?.queryPreAttnScalar);
|
|
256
|
+
if (Number.isFinite(presetScalar) && presetScalar > 0) {
|
|
257
|
+
return presetScalar;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
return headDim;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
function detectRmsNormWeightOffset(presetInference, modelConfig, defaults) {
|
|
264
|
+
if (typeof presetInference?.normalization?.rmsNormWeightOffset === 'boolean') {
|
|
265
|
+
return presetInference.normalization.rmsNormWeightOffset;
|
|
266
266
|
}
|
|
267
267
|
|
|
268
|
-
return
|
|
268
|
+
return defaults.normalization.rmsNormWeightOffset;
|
|
269
269
|
}
|
|
270
270
|
|
|
271
271
|
// Build normalization config with auto-detection from tensor names.
|
|
@@ -278,7 +278,7 @@ function buildNormalizationConfig(presetInference, modelConfig, defaults, tensor
|
|
|
278
278
|
modelConfig.rms_norm_eps ??
|
|
279
279
|
modelConfig.attentionLayerNormRMSEpsilon ??
|
|
280
280
|
defaults.normalization.rmsNormEps,
|
|
281
|
-
rmsNormWeightOffset: presetInference
|
|
281
|
+
rmsNormWeightOffset: detectRmsNormWeightOffset(presetInference, modelConfig, defaults),
|
|
282
282
|
// For norm flags: auto-detected > preset > default
|
|
283
283
|
postAttentionNorm: detected.postAttentionNorm ?? presetInference.normalization?.postAttentionNorm ?? defaults.normalization.postAttentionNorm,
|
|
284
284
|
preFeedforwardNorm: detected.preFeedforwardNorm ?? presetInference.normalization?.preFeedforwardNorm ?? defaults.normalization.preFeedforwardNorm,
|
|
@@ -303,26 +303,44 @@ function resolveKernelPathFromPreset(presetInference, quantizationInfo, q4kLayou
|
|
|
303
303
|
}
|
|
304
304
|
|
|
305
305
|
const weightKey = normalizeKernelDtype(quantizationInfo?.weights);
|
|
306
|
-
const computeKey = normalizeKernelDtype(quantizationInfo?.compute)
|
|
307
|
-
|
|
308
|
-
const entry =
|
|
306
|
+
const computeKey = normalizeKernelDtype(quantizationInfo?.compute);
|
|
307
|
+
const hasWeightEntry = weightKey != null && Object.prototype.hasOwnProperty.call(kernelPaths, weightKey);
|
|
308
|
+
const entry = hasWeightEntry ? kernelPaths[weightKey] : kernelPaths.default;
|
|
309
|
+
const weightLabel = weightKey ? `.${weightKey}` : '';
|
|
309
310
|
let resolved = null;
|
|
311
|
+
if (entry == null) {
|
|
312
|
+
return presetInference?.kernelPath ?? null;
|
|
313
|
+
}
|
|
314
|
+
|
|
310
315
|
if (typeof entry === 'string') {
|
|
311
316
|
resolved = entry;
|
|
312
|
-
} else if (entry && computeKey && entry
|
|
317
|
+
} else if (entry && computeKey && Object.prototype.hasOwnProperty.call(entry, computeKey)) {
|
|
313
318
|
resolved = entry[computeKey];
|
|
314
|
-
} else if (entry && entry.default) {
|
|
319
|
+
} else if (entry && typeof entry === 'object' && !Array.isArray(entry) && Object.prototype.hasOwnProperty.call(entry, 'default')) {
|
|
315
320
|
resolved = entry.default;
|
|
321
|
+
} else if (entry && typeof entry === 'object' && !Array.isArray(entry) && !computeKey) {
|
|
322
|
+
throw new Error(
|
|
323
|
+
`Preset kernelPaths${weightLabel} requires quantizationInfo.compute ` +
|
|
324
|
+
'to resolve a compute-specific defaultKernelPath.'
|
|
325
|
+
);
|
|
326
|
+
} else if (entry && typeof entry === 'object' && !Array.isArray(entry)) {
|
|
327
|
+
throw new Error(
|
|
328
|
+
`Preset kernelPaths${weightLabel} is missing compute "${computeKey}". ` +
|
|
329
|
+
'Add an explicit compute-specific mapping or default instead of relying on JS fallbacks.'
|
|
330
|
+
);
|
|
316
331
|
} else {
|
|
317
|
-
|
|
332
|
+
throw new Error(
|
|
333
|
+
`Preset kernelPaths${weightLabel} must resolve to a string or object.`
|
|
334
|
+
);
|
|
318
335
|
}
|
|
319
336
|
|
|
320
|
-
//
|
|
321
|
-
//
|
|
337
|
+
// Column-wise Q4K must be mapped explicitly in preset JSON; JS must not
|
|
338
|
+
// rewrite kernel-path ids to infer policy.
|
|
322
339
|
if (resolved && q4kLayout === 'col' && resolved.includes('-fused-')) {
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
340
|
+
throw new Error(
|
|
341
|
+
`Preset kernelPaths${weightKey ? `.${weightKey}` : ''} resolved fused kernel path "${resolved}" ` +
|
|
342
|
+
'for q4k layout "col". Add an explicit dequant kernel path mapping to the preset instead of relying on JS rewrites.'
|
|
343
|
+
);
|
|
326
344
|
}
|
|
327
345
|
|
|
328
346
|
return resolved;
|
|
@@ -354,8 +372,8 @@ export function buildManifestInference(preset, config, headDim = 64, quantizatio
|
|
|
354
372
|
queryPreAttnScalar: resolveQueryPreAttnScalar(preset, modelConfig, headDim),
|
|
355
373
|
attnLogitSoftcapping: presetInference.attention?.attnLogitSoftcapping ??
|
|
356
374
|
modelConfig.attn_logit_softcapping ?? defaults.attention.attnLogitSoftcapping,
|
|
357
|
-
slidingWindow:
|
|
358
|
-
|
|
375
|
+
slidingWindow: modelConfig.sliding_window ??
|
|
376
|
+
presetInference.attention?.slidingWindow ?? defaults.attention.slidingWindow,
|
|
359
377
|
queryKeyNorm: presetInference.attention?.queryKeyNorm ?? defaults.attention.queryKeyNorm,
|
|
360
378
|
attentionOutputGate: detectAttentionOutputGate(presetInference, modelConfig, defaults),
|
|
361
379
|
causal: detectedCausalAttention ?? presetInference.attention?.causal ?? defaults.attention.causal,
|
|
@@ -428,6 +446,9 @@ export function buildManifestInference(preset, config, headDim = 64, quantizatio
|
|
|
428
446
|
);
|
|
429
447
|
}
|
|
430
448
|
globalPattern = null;
|
|
449
|
+
// Default offset 0 means first global layer at index 0 (most common pattern).
|
|
450
|
+
// This is the every_n pattern default, distinct from layerPattern.offset=null
|
|
451
|
+
// which means "not applicable" in the schema.
|
|
431
452
|
offset = (
|
|
432
453
|
detectEveryNOffsetFromLayerTypes(modelConfig.layer_types, period)
|
|
433
454
|
?? normalizeEveryNOffset(presetPattern.offset, period)
|
|
@@ -261,9 +261,6 @@ export async function parseDiffusionModel(adapter) {
|
|
|
261
261
|
}
|
|
262
262
|
const configSuffix = defaultConfigPath(componentId);
|
|
263
263
|
const config = await readJson(configSuffix, `${componentId} config`);
|
|
264
|
-
if (componentId === 'transformer' && config && !config.weight_format) {
|
|
265
|
-
config.weight_format = 'diffusers';
|
|
266
|
-
}
|
|
267
264
|
diffusionConfig.components[componentId] = {
|
|
268
265
|
...(diffusionConfig.components[componentId] || {}),
|
|
269
266
|
config,
|
|
@@ -7,6 +7,9 @@ export async function parseTransformerModel(adapter) {
|
|
|
7
7
|
} = adapter;
|
|
8
8
|
|
|
9
9
|
const config = await readJson('config.json', 'config.json');
|
|
10
|
+
const generationConfig = await fileExists('generation_config.json')
|
|
11
|
+
? await readJson('generation_config.json', 'generation_config.json')
|
|
12
|
+
: null;
|
|
10
13
|
const architectureHint = config.architectures?.[0] ?? config.model_type ?? '';
|
|
11
14
|
|
|
12
15
|
let tensors = null;
|
|
@@ -19,6 +22,7 @@ export async function parseTransformerModel(adapter) {
|
|
|
19
22
|
|
|
20
23
|
return {
|
|
21
24
|
config,
|
|
25
|
+
generationConfig,
|
|
22
26
|
tensors,
|
|
23
27
|
architectureHint,
|
|
24
28
|
};
|