@simulatte/doppler 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -8
- package/package.json +7 -4
- package/src/config/kernels/kernel-ref-digests.js +39 -39
- package/src/config/kernels/registry.json +42 -2
- package/src/config/loader.js +31 -2
- package/src/config/merge.js +18 -0
- package/src/config/presets/models/qwen3.json +9 -2
- package/src/config/presets/models/transformer.json +5 -0
- package/src/config/required-inference-fields-contract-check.js +6 -0
- package/src/config/schema/inference-defaults.schema.js +3 -0
- package/src/config/schema/inference.schema.d.ts +9 -0
- package/src/config/schema/kernel-path.schema.d.ts +6 -0
- package/src/config/schema/manifest.schema.d.ts +6 -0
- package/src/config/schema/manifest.schema.js +3 -0
- package/src/converter/rope-config.js +42 -0
- package/src/gpu/device.js +58 -0
- package/src/gpu/kernels/attention.js +98 -0
- package/src/gpu/kernels/bias_add.wgsl +8 -6
- package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
- package/src/gpu/kernels/conv2d.js +1 -1
- package/src/gpu/kernels/conv2d.wgsl +7 -8
- package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
- package/src/gpu/kernels/depthwise_conv2d.js +2 -1
- package/src/gpu/kernels/depthwise_conv2d.wgsl +6 -9
- package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +6 -9
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +2 -1
- package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +6 -9
- package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +6 -9
- package/src/gpu/kernels/matmul.js +25 -0
- package/src/gpu/kernels/pixel_shuffle.js +1 -1
- package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
- package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
- package/src/gpu/kernels/relu.js +15 -2
- package/src/gpu/kernels/relu.wgsl +2 -1
- package/src/gpu/kernels/relu_f16.wgsl +2 -1
- package/src/gpu/kernels/repeat_channels.js +1 -1
- package/src/gpu/kernels/repeat_channels.wgsl +4 -5
- package/src/gpu/kernels/repeat_channels_f16.wgsl +4 -5
- package/src/gpu/kernels/residual.js +44 -8
- package/src/gpu/kernels/residual.wgsl +6 -3
- package/src/gpu/kernels/residual_f16.wgsl +2 -1
- package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
- package/src/gpu/kernels/residual_vec4.wgsl +2 -1
- package/src/gpu/kernels/rmsnorm.js +58 -6
- package/src/gpu/kernels/rmsnorm.wgsl +14 -6
- package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
- package/src/gpu/kernels/rope.d.ts +2 -0
- package/src/gpu/kernels/rope.js +11 -1
- package/src/gpu/kernels/rope.wgsl +56 -40
- package/src/gpu/kernels/sana_linear_attention.js +1 -2
- package/src/gpu/kernels/sana_linear_attention_apply.wgsl +4 -5
- package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +4 -5
- package/src/gpu/kernels/sana_linear_attention_summary.wgsl +4 -0
- package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +4 -0
- package/src/gpu/kernels/silu.d.ts +1 -0
- package/src/gpu/kernels/silu.js +32 -14
- package/src/gpu/kernels/silu.wgsl +19 -9
- package/src/gpu/kernels/silu_f16.wgsl +19 -9
- package/src/gpu/kernels/transpose.js +15 -2
- package/src/gpu/kernels/transpose.wgsl +5 -6
- package/src/gpu/kernels/upsample2d.js +2 -1
- package/src/gpu/kernels/upsample2d.wgsl +6 -9
- package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
- package/src/gpu/kernels/utils.js +16 -1
- package/src/inference/browser-harness.js +47 -1
- package/src/inference/pipelines/diffusion/pipeline.js +15 -6
- package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +5 -0
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +27 -15
- package/src/inference/pipelines/text/attention/record.js +11 -2
- package/src/inference/pipelines/text/attention/run.js +11 -2
- package/src/inference/pipelines/text/chat-format.js +25 -1
- package/src/inference/pipelines/text/config.d.ts +4 -0
- package/src/inference/pipelines/text/config.js +68 -1
- package/src/inference/pipelines/text/execution-plan.js +23 -31
- package/src/inference/pipelines/text/execution-v0.js +29 -2
- package/src/inference/pipelines/text/ffn/standard.js +3 -0
- package/src/inference/pipelines/text/init.d.ts +4 -0
- package/src/inference/pipelines/text/init.js +56 -9
- package/src/inference/pipelines/text/layer.js +11 -0
- package/src/inference/pipelines/text.js +4 -0
- package/src/inference/tokenizers/bundled.js +156 -33
- package/src/rules/tooling/command-runtime.rules.json +18 -0
- package/src/tooling/command-api.d.ts +27 -1
- package/src/tooling/command-api.js +142 -3
- package/src/tooling/node-browser-command-runner.d.ts +4 -0
- package/src/tooling/node-browser-command-runner.js +58 -3
- package/src/tooling/node-command-runner.js +15 -0
- package/src/tooling/node-webgpu.js +9 -87
- package/src/training/checkpoint-watch.d.ts +7 -0
- package/src/training/checkpoint-watch.js +106 -0
- package/src/training/checkpoint.d.ts +6 -1
- package/src/training/checkpoint.js +12 -2
- package/src/training/distillation/artifacts.d.ts +71 -0
- package/src/training/distillation/artifacts.js +132 -0
- package/src/training/distillation/checkpoint-watch.d.ts +10 -0
- package/src/training/distillation/checkpoint-watch.js +57 -0
- package/src/training/distillation/dataset.d.ts +59 -0
- package/src/training/distillation/dataset.js +337 -0
- package/src/training/distillation/eval.d.ts +34 -0
- package/src/training/distillation/eval.js +310 -0
- package/src/training/distillation/index.d.ts +29 -0
- package/src/training/distillation/index.js +29 -0
- package/src/training/distillation/runtime.d.ts +20 -0
- package/src/training/distillation/runtime.js +121 -0
- package/src/training/distillation/scoreboard.d.ts +6 -0
- package/src/training/distillation/scoreboard.js +8 -0
- package/src/training/distillation/stage-a.d.ts +45 -0
- package/src/training/distillation/stage-a.js +338 -0
- package/src/training/distillation/stage-b.d.ts +24 -0
- package/src/training/distillation/stage-b.js +20 -0
- package/src/training/index.d.ts +10 -0
- package/src/training/index.js +10 -0
- package/src/training/lora-pipeline.d.ts +40 -0
- package/src/training/lora-pipeline.js +796 -0
- package/src/training/operator-artifacts.d.ts +62 -0
- package/src/training/operator-artifacts.js +140 -0
- package/src/training/operator-command.d.ts +5 -0
- package/src/training/operator-command.js +453 -0
- package/src/training/operator-eval.d.ts +48 -0
- package/src/training/operator-eval.js +230 -0
- package/src/training/operator-scoreboard.d.ts +5 -0
- package/src/training/operator-scoreboard.js +44 -0
- package/src/training/runner.d.ts +52 -0
- package/src/training/runner.js +29 -4
- package/src/training/suite.d.ts +112 -0
- package/src/training/suite.js +9 -9
- package/src/training/workloads.d.ts +164 -0
- package/src/training/workloads.js +539 -0
- package/src/version.js +1 -1
- package/tools/doppler-cli.js +137 -40
package/README.md
CHANGED
|
@@ -22,7 +22,7 @@ for await (const token of model.generate('Hello, world')) {
|
|
|
22
22
|
}
|
|
23
23
|
```
|
|
24
24
|
|
|
25
|
-
Registry IDs resolve to hosted RDRR artifacts from `Clocksmith/rdrr` by default. Tokens stream from a native `AsyncGenerator`. See [more examples](#more-examples) below or the canonical [Root API guide](docs/api/root.md).
|
|
25
|
+
Registry IDs resolve to hosted RDRR artifacts from `Clocksmith/rdrr` by default. Tokens stream from a native `AsyncGenerator`. See [more examples](#more-examples) below or the canonical [Root API guide](https://github.com/clocksmith/doppler/blob/main/docs/api/root.md).
|
|
26
26
|
|
|
27
27
|
## Why Doppler
|
|
28
28
|
|
|
@@ -36,11 +36,11 @@ Registry IDs resolve to hosted RDRR artifacts from `Clocksmith/rdrr` by default.
|
|
|
36
36
|
|
|
37
37
|
## Evidence
|
|
38
38
|
|
|
39
|
-

|
|
39
|
+

|
|
40
40
|
|
|
41
41
|
Snapshot artifacts:
|
|
42
|
-
- [g3-1b-p064-d064-t0-k1.compare.json](benchmarks/vendors/fixtures/g3-1b-p064-d064-t0-k1.compare.json)
|
|
43
|
-
- [lfm2-5-1-2b-p064-d064-t0-k1.compare.json](benchmarks/vendors/fixtures/lfm2-5-1-2b-p064-d064-t0-k1.compare.json)
|
|
42
|
+
- [g3-1b-p064-d064-t0-k1.compare.json](https://github.com/clocksmith/doppler/blob/main/benchmarks/vendors/fixtures/g3-1b-p064-d064-t0-k1.compare.json)
|
|
43
|
+
- [lfm2-5-1-2b-p064-d064-t0-k1.compare.json](https://github.com/clocksmith/doppler/blob/main/benchmarks/vendors/fixtures/lfm2-5-1-2b-p064-d064-t0-k1.compare.json)
|
|
44
44
|
|
|
45
45
|
## Under the hood
|
|
46
46
|
|
|
@@ -77,10 +77,25 @@ for await (const token of doppler('Hello', { model: 'gemma3-270m' })) {
|
|
|
77
77
|
|
|
78
78
|
## Documentation
|
|
79
79
|
|
|
80
|
-
- Docs index (canonical navigation): [docs/INDEX.md](docs/INDEX.md)
|
|
81
|
-
- First-run workflow: [docs/getting-started.md](docs/getting-started.md)
|
|
82
|
-
- Runtime config contract: [docs/config.md](docs/config.md)
|
|
83
|
-
- Architecture: [docs/architecture.md](docs/architecture.md)
|
|
80
|
+
- Docs index (canonical navigation): [docs/INDEX.md](https://github.com/clocksmith/doppler/blob/main/docs/INDEX.md)
|
|
81
|
+
- First-run workflow: [docs/getting-started.md](https://github.com/clocksmith/doppler/blob/main/docs/getting-started.md)
|
|
82
|
+
- Runtime config contract: [docs/config.md](https://github.com/clocksmith/doppler/blob/main/docs/config.md)
|
|
83
|
+
- Architecture: [docs/architecture.md](https://github.com/clocksmith/doppler/blob/main/docs/architecture.md)
|
|
84
|
+
- Generated model support table: [docs/model-support-matrix.md](https://github.com/clocksmith/doppler/blob/main/docs/model-support-matrix.md)
|
|
85
|
+
|
|
86
|
+
## Current model support
|
|
87
|
+
|
|
88
|
+
Verified right now:
|
|
89
|
+
- `gemma-3-270m-it-wq4k-ef16-hf16`
|
|
90
|
+
- `gemma-3-1b-it-wq4k-ef16-hf16`
|
|
91
|
+
- `google-embeddinggemma-300m-wq4k-ef16`
|
|
92
|
+
- `translategemma-4b-it-wq4k-ef16-hf16`
|
|
93
|
+
|
|
94
|
+
Known failing right now:
|
|
95
|
+
- `qwen-3-5-0-8b-wq4k-ef16-hf16-f16`
|
|
96
|
+
- `qwen-3-5-2b-wq4k-ef16-hf16-f16`
|
|
97
|
+
|
|
98
|
+
For the generated status table, including `loads but unverified` and `everything else`, see [docs/model-support-matrix.md](https://github.com/clocksmith/doppler/blob/main/docs/model-support-matrix.md).
|
|
84
99
|
|
|
85
100
|
## Environment requirements
|
|
86
101
|
|
package/package.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@simulatte/doppler",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.6",
|
|
4
4
|
"description": "Browser-native WebGPU inference engine for local intent and inference loops",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"types": "src/index.d.ts",
|
|
7
7
|
"type": "module",
|
|
8
8
|
"bin": {
|
|
9
|
-
"doppler": "
|
|
9
|
+
"doppler": "tools/doppler-cli.js"
|
|
10
10
|
},
|
|
11
11
|
"scripts": {
|
|
12
12
|
"cli": "node tools/doppler-cli.js",
|
|
@@ -125,7 +125,7 @@
|
|
|
125
125
|
},
|
|
126
126
|
"repository": {
|
|
127
127
|
"type": "git",
|
|
128
|
-
"url": "https://github.com/clocksmith/doppler.git"
|
|
128
|
+
"url": "git+https://github.com/clocksmith/doppler.git"
|
|
129
129
|
},
|
|
130
130
|
"homepage": "https://github.com/clocksmith/doppler",
|
|
131
131
|
"keywords": [
|
|
@@ -142,6 +142,9 @@
|
|
|
142
142
|
"url": "https://github.com/clocksmith/doppler/issues"
|
|
143
143
|
},
|
|
144
144
|
"license": "Apache-2.0",
|
|
145
|
+
"publishConfig": {
|
|
146
|
+
"access": "public"
|
|
147
|
+
},
|
|
145
148
|
"files": [
|
|
146
149
|
"src",
|
|
147
150
|
"src/gpu/kernels/*.wgsl",
|
|
@@ -160,6 +163,6 @@
|
|
|
160
163
|
"playwright": "^1.58.2"
|
|
161
164
|
},
|
|
162
165
|
"optionalDependencies": {
|
|
163
|
-
"@simulatte/webgpu
|
|
166
|
+
"@simulatte/webgpu": "0.1.x"
|
|
164
167
|
}
|
|
165
168
|
}
|
|
@@ -49,16 +49,16 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
|
|
|
49
49
|
"backward/upsample2d_backward.wgsl#main": "3f3a27fc880d3c4cba49889cafb4be66d30132cfeec9c2105751cc12a5a44ae3",
|
|
50
50
|
"bf16_to_f16.wgsl#main": "91c009d291a205fc42e7b45942e58630442eb67f8cb4bacc2e54160fa5f25c35",
|
|
51
51
|
"bf16_to_f32.wgsl#main": "2c5c08476d40aeb64287b8b31d1ef088c63e8bb4568bf7b7c5faf4a4ed493576",
|
|
52
|
-
"bias_add_f16.wgsl#main": "
|
|
53
|
-
"bias_add.wgsl#main": "
|
|
52
|
+
"bias_add_f16.wgsl#main": "f4c5b6e6495bcaae4582c3eb11c7fe39466a9625bbe57eede116c24db71682f6",
|
|
53
|
+
"bias_add.wgsl#main": "e7d3b4a5f9efc8b0569c56bcdeb63e921fa6d1d006901deabc734cab40346715",
|
|
54
54
|
"cast_f16_to_f32.wgsl#main": "98a0b31fcba2bad945e9f8522d643ae75135bfdf6b39a824565f57d5d4fd965d",
|
|
55
55
|
"cast_f32_to_f16.wgsl#main": "0b669e7812c09a2f44e219548e11ca0dfd8af921d30017e2a5c79d54f792542f",
|
|
56
56
|
"clamp.wgsl#main": "9d8039a590d102133636d67de8ae76ca8e9129bf220c48f7a08e9d82fcc48bad",
|
|
57
|
-
"conv2d_f16.wgsl#main": "
|
|
58
|
-
"conv2d.wgsl#main": "
|
|
57
|
+
"conv2d_f16.wgsl#main": "1e26a7e45e434fc45bdcd1913b0b97d2ed7e80b3c0bb1beae3d762ee457841c7",
|
|
58
|
+
"conv2d.wgsl#main": "140ed68bf1554ebfadbad7bcf1dfdff7f95aff18588459205f14b05cb432eee2",
|
|
59
59
|
"cross_entropy_loss.wgsl#main": "5a48087bdec94184432c90ce5b345e1eadbdfcb13b9793ecee8052bc7392239c",
|
|
60
|
-
"depthwise_conv2d_f16.wgsl#main": "
|
|
61
|
-
"depthwise_conv2d.wgsl#main": "
|
|
60
|
+
"depthwise_conv2d_f16.wgsl#main": "f7f093a7e6623ed17a675bac729149e94718aece916416966eaf03c1d6939f2a",
|
|
61
|
+
"depthwise_conv2d.wgsl#main": "cf14cb40d282ad4d4fab160109b97eaeaf12aab62579b73324ac485ac75155b0",
|
|
62
62
|
"dequant_f16_out_vec4.wgsl#main_vec4": "61c20e6c71c1c8421b4ec202dbd26292a6300587bd44c314f2a6c6d9d9442c3a",
|
|
63
63
|
"dequant_f16_out.wgsl#main": "94d61843d56f9a3bbc6b7c2b95dc6ecbba3f6a262b2c4086a076f69a8c38ccae",
|
|
64
64
|
"dequant_f16_rowwise.wgsl#main": "f5bf7cef950b52d65cee6121dbaa176244d3221045b3b6386b3be47f23ce17dc",
|
|
@@ -118,8 +118,8 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
|
|
|
118
118
|
"gelu.wgsl#main": "a9007ea08aaff98f9be08f1e0490a6bcf252883eac5513de876ab9ce918865e6",
|
|
119
119
|
"gptoss_mxfp4_expert_fused.wgsl#main_expert": "3159e8cd81da13f909cf905e6d35307fefe1dcbbdf1b2b8e8ff0ce923bd71180",
|
|
120
120
|
"gptoss_router_topk.wgsl#softmax_topk": "86e4ea709c0c0084d09c6a4cd07710dc14f380e03f91b8ed9ec871b310be49f1",
|
|
121
|
-
"grouped_pointwise_conv2d_f16.wgsl#main": "
|
|
122
|
-
"grouped_pointwise_conv2d.wgsl#main": "
|
|
121
|
+
"grouped_pointwise_conv2d_f16.wgsl#main": "578a0fcb3362864feb0de0e800b2df49e66e42db4360a08189e5f815ead944c4",
|
|
122
|
+
"grouped_pointwise_conv2d.wgsl#main": "9cf77937b10dd96f3aedc1793183ef2fff05d075fac3884fad5cd5869d0d9181",
|
|
123
123
|
"groupnorm_apply_f16.wgsl#main": "cfd850b87944ac1c03ba7bd98136db556dadd8a70611e351d82d297299a7cd02",
|
|
124
124
|
"groupnorm_apply.wgsl#main": "b09b8f2f57dcdfa1a0366daa30d3910feb134204652c711d2ba564e566b5a334",
|
|
125
125
|
"groupnorm_stats_f16.wgsl#main": "fb76f78ce668ea8459110335698fe4b09a2425fc71deed3bab67efd7641c3199",
|
|
@@ -155,33 +155,33 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
|
|
|
155
155
|
"moe_gather.wgsl#gather_single_pass": "8dbd0c38a323ba6c73af3ecaad297d79d861b817152e6e2c12fcd9db6f767f1e",
|
|
156
156
|
"moe_gather.wgsl#gather_tokens": "58e40a63a39c2f99c47c75dee71767f98482217afd74607cee1b7efc2c85738b",
|
|
157
157
|
"moe_offsets.wgsl#build_offsets": "3ea004145fa234659408cdeb0d4d802adff1037c9c5c03af146b3734cc69dd27",
|
|
158
|
-
"pixel_shuffle_f16.wgsl#main": "
|
|
159
|
-
"pixel_shuffle.wgsl#main": "
|
|
160
|
-
"relu_f16.wgsl#main": "
|
|
161
|
-
"relu.wgsl#main": "
|
|
162
|
-
"repeat_channels_f16.wgsl#main": "
|
|
163
|
-
"repeat_channels.wgsl#main": "
|
|
164
|
-
"residual_f16_vec4.wgsl#add_vec4": "
|
|
165
|
-
"residual_f16.wgsl#main": "
|
|
166
|
-
"residual_vec4.wgsl#add_vec4": "
|
|
167
|
-
"residual.wgsl#main": "
|
|
168
|
-
"rmsnorm_f16.wgsl#main": "
|
|
169
|
-
"rmsnorm_f16.wgsl#rmsnorm_small_f16": "
|
|
170
|
-
"rmsnorm.wgsl#main": "
|
|
171
|
-
"rmsnorm.wgsl#main_cached": "
|
|
172
|
-
"rmsnorm.wgsl#main_small": "
|
|
173
|
-
"rmsnorm.wgsl#main_small_subgroup": "
|
|
174
|
-
"rmsnorm.wgsl#main_subgroup": "
|
|
158
|
+
"pixel_shuffle_f16.wgsl#main": "1d1202cdaa8f7a94e015c5e2212bc98881d00d548a12fe7a8e91c4e17f2ba723",
|
|
159
|
+
"pixel_shuffle.wgsl#main": "91c017f0642132229768a2be6c8d47ad26d486f5d40e7dbf9e2349c847e527b8",
|
|
160
|
+
"relu_f16.wgsl#main": "baac3a33693e5a72e60c7ec9b4a4dbcec10a83ca1fac2972988b07a75780f14a",
|
|
161
|
+
"relu.wgsl#main": "52ffa66f9898a4f291758ae93055000cb1eb488c22a993e6b0568a29d9d3ad28",
|
|
162
|
+
"repeat_channels_f16.wgsl#main": "6eee5f9f74d8c17a71f6f422b503bbfb872350a9486ec0065fd1f67718311883",
|
|
163
|
+
"repeat_channels.wgsl#main": "bce0cb33beed50ce388b2d069961ae0fbe361dd4930b9344cb066b3390475a51",
|
|
164
|
+
"residual_f16_vec4.wgsl#add_vec4": "bf8cd304a1d4c5500143b0bc52d0236a7e8bbc4cc2d51d54ef0d4fce209f503b",
|
|
165
|
+
"residual_f16.wgsl#main": "3ca5c562fb5baf495e31e662f85fb26863f8f6d4ad29bb119c484e2ffdee7791",
|
|
166
|
+
"residual_vec4.wgsl#add_vec4": "f2f30d2dec9d90062bf5fce1f141951e8e6b54f56698b87ffb9bf6662c8acce2",
|
|
167
|
+
"residual.wgsl#main": "f1abd88c959c5d8dd27b9353d487e37b2a96850ed9d90c365212e260399cc2a7",
|
|
168
|
+
"rmsnorm_f16.wgsl#main": "7ae20c01b0453306504f777c4a8de37364a8b45bef3c569b0572c7863740a6bd",
|
|
169
|
+
"rmsnorm_f16.wgsl#rmsnorm_small_f16": "e9ce0a7427831e4d1280691eb9ca0daab55d917d4f0d9975d4bcd7e8fe960941",
|
|
170
|
+
"rmsnorm.wgsl#main": "f516b3e4bde2015f2a207c3ca5b8c9820c7809fa8f8d0786f90c568e0f1ac077",
|
|
171
|
+
"rmsnorm.wgsl#main_cached": "bcae48e93d63e11701386850559fec5d4924128ad9d6ac1de27d1b8c34fc3dff",
|
|
172
|
+
"rmsnorm.wgsl#main_small": "95c65dcb443717c821c44de87dc89cdd4f6da97e08cccf8a9526f5cdd19dd33d",
|
|
173
|
+
"rmsnorm.wgsl#main_small_subgroup": "8ff72e445b662b1820be25a594fb0558007bfca7e50d2d1bc915df5774a76f6a",
|
|
174
|
+
"rmsnorm.wgsl#main_subgroup": "0aac7dd6455bf8f3d11917ec87fa71bb5fa9ef4f8e5bba02dfbfd11b92ccd01f",
|
|
175
175
|
"rope_f16.wgsl#main": "a87f2964b77e851a2fbcc88305adeecaf8eba372291d83a71b817c8ef3da5c58",
|
|
176
176
|
"rope_f16.wgsl#rope_compute_freqs": "c7aa2cb50420ab2709b20e0a33e93ab6aa4f50d2fa8d9f79b0bfcfefb2f7abab",
|
|
177
177
|
"rope_f16.wgsl#rope_ntk_scaled": "46d2574b46539b289371c0c37a0b4e2fb21279134126b36a1fc30b98523905dd",
|
|
178
178
|
"rope_f16.wgsl#rope_qk": "3bc01e167dc3cc5397bd7751e493311b4d3f5c6c0f6fad30234a740bb4c4507b",
|
|
179
179
|
"rope_f16.wgsl#rope_yarn": "9b788dd05a1598aadcba5b0218d1666ce75faadbe32b71ee1def65ec23fb7dfe",
|
|
180
|
-
"rope.wgsl#main": "
|
|
181
|
-
"rope.wgsl#rope_compute_freqs": "
|
|
182
|
-
"rope.wgsl#rope_ntk_scaled": "
|
|
183
|
-
"rope.wgsl#rope_qk": "
|
|
184
|
-
"rope.wgsl#rope_yarn": "
|
|
180
|
+
"rope.wgsl#main": "4c803ad5e0dd065d5572c7aecc1def277c43884dcc02f22a9676914c10111400",
|
|
181
|
+
"rope.wgsl#rope_compute_freqs": "c9338316a31c8d467acbf8d512cb9616ee902d2619fa9187639f8ff5d78414ac",
|
|
182
|
+
"rope.wgsl#rope_ntk_scaled": "818f89865a3d1d6f2d49f671ac882d0fde9709702160a1ae8d9a8ef113afb511",
|
|
183
|
+
"rope.wgsl#rope_qk": "3d773c8b8c400142edc8a4111afb04a2bf75bdb109b2d41cbe5afdb72a959772",
|
|
184
|
+
"rope.wgsl#rope_yarn": "cb00e1cf87fac198dcf0fb0d4e2d5f6f99d2fed6dff0a089a96bb459917851d2",
|
|
185
185
|
"sample_f16.wgsl#argmax": "30b9f199b49352e5aff91b7aa8016edb423ce33f77481c3a7bc184251856fb27",
|
|
186
186
|
"sample_f16.wgsl#argmax_reduce": "a3ca27fc50b10c36c1676bdd5dbfe5edc67850cdd5c1af7a1d3ad70f830dd8a7",
|
|
187
187
|
"sample_f16.wgsl#find_topk_phase1": "24e47e5ced28af802959e350ff0a6eec6b9a26f89fb38e222990eeaffb16bd36",
|
|
@@ -194,10 +194,10 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
|
|
|
194
194
|
"sample.wgsl#find_topk_phase2": "940b216e605d22096da5aca65950a8030866fc5a39e7fdf484d69a832de1b63a",
|
|
195
195
|
"sample.wgsl#sample_single_pass": "4412357e84113ee2f1bc0dc8bf89e314c2ab482c89c14ca016ea9949d16a9d0c",
|
|
196
196
|
"sample.wgsl#softmax_and_sample": "7172c60e76430fbe130e530e3564b569b45eccf193987b32d6f52bd6bbcc9f08",
|
|
197
|
-
"sana_linear_attention_apply_f16.wgsl#main": "
|
|
198
|
-
"sana_linear_attention_apply.wgsl#main": "
|
|
199
|
-
"sana_linear_attention_summary_f16.wgsl#main": "
|
|
200
|
-
"sana_linear_attention_summary.wgsl#main": "
|
|
197
|
+
"sana_linear_attention_apply_f16.wgsl#main": "4a7426ce67eccfb70956feeae84275f4d3cc586c50e8442c07eb69993b378ab5",
|
|
198
|
+
"sana_linear_attention_apply.wgsl#main": "5f69e0bc1d9e2df5a61e13bd819313c8f7ff5dfc4b7d78e71d5152dc23b6a86c",
|
|
199
|
+
"sana_linear_attention_summary_f16.wgsl#main": "3abb736ead999485b5dac9c6b534143b464cfd0b5300c5e03c56cec03c8fa48e",
|
|
200
|
+
"sana_linear_attention_summary.wgsl#main": "be9c1fe861dcb5ea46927749764267656a69160bc8b732c6eb1a1bcb0c075589",
|
|
201
201
|
"scale.wgsl#main": "44ec481452b586307957163e3d65c9d02561d3f2f3db633f906f5488b1ea1ca4",
|
|
202
202
|
"scale.wgsl#main_inplace": "020824c7118a59c461ce81f1c2cd01b7c2a3f1aab326392b7d48d4448a0c2ed1",
|
|
203
203
|
"scatter_add_dynamic_f16_weights.wgsl#scatter_add_dynamic": "42799e745bc445b199b1cbc384bc12bb9372ed1599af3260a803cefc8dd35497",
|
|
@@ -206,8 +206,8 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
|
|
|
206
206
|
"scatter_add_vec4.wgsl#scatter_add_vec4": "247c4f23129cdfbb19593b17c5833d85048da117d77141c74bc4e16e691d94e1",
|
|
207
207
|
"scatter_add.wgsl#main": "dea947b8014e9b674e4fec8f15fac6c926e8a3a4d8eff104b953d77f35a1ac35",
|
|
208
208
|
"scatter_add.wgsl#scatter_add_accumulate": "561800af22dedae63f1abe69b757b0ef6c7832a2bff228c2262e0b7111d89247",
|
|
209
|
-
"silu_f16.wgsl#main": "
|
|
210
|
-
"silu.wgsl#main": "
|
|
209
|
+
"silu_f16.wgsl#main": "867634b20dcb75969e001966836892a2b7e01782b0028d94779c6ec21c254ae0",
|
|
210
|
+
"silu.wgsl#main": "7b52d30fb741beef2dbf728e0c4ecffe5b08d9661d63c306caecb4cb3ced85e5",
|
|
211
211
|
"softmax_subgroup.wgsl#main_subgroup": "88472c0dab5f81c5f045f0ee79c4c3bb484791a4a2b84af398c019851438f091",
|
|
212
212
|
"softmax_subgroup.wgsl#softmax_small_subgroup": "5d7bd1b698910a437197bf6c8b7f8b259036dd006ad5470f767b539dba8538f8",
|
|
213
213
|
"softmax.wgsl#main": "45c5876806b442222d7e190e595f55a0079bae82e07d37586996c1a63790bb7a",
|
|
@@ -220,7 +220,7 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
|
|
|
220
220
|
"topk.wgsl#main": "a18763303cd18e8a020e647f8a52f65403526849faf835d9f9394f634c3c97eb",
|
|
221
221
|
"topk.wgsl#softmax_topk": "95ff3517da909e4bd4d0ff8d85b619bd250522943aeb9276375edc59f67e9604",
|
|
222
222
|
"topk.wgsl#topk_2_small": "289eaa5c4f005e0aaf37dfe5343aeda30d9ab3929979dbf0cc3553f23e136807",
|
|
223
|
-
"transpose.wgsl#main": "
|
|
224
|
-
"upsample2d_f16.wgsl#main": "
|
|
225
|
-
"upsample2d.wgsl#main": "
|
|
223
|
+
"transpose.wgsl#main": "002bce09c48b63ab5017d83f42233340011ac6fc20dae9cd08e3095ae5bf72b2",
|
|
224
|
+
"upsample2d_f16.wgsl#main": "43cee5f2503cb4b6caea45e9842f8961ce313b02eb8ed23a97d6967113ce521c",
|
|
225
|
+
"upsample2d.wgsl#main": "6de9172ad3d6940dd3c94470a105755a33760e66a84d6e9e96ec4d6a07dc4a25"
|
|
226
226
|
});
|
|
@@ -1826,7 +1826,7 @@
|
|
|
1826
1826
|
}
|
|
1827
1827
|
],
|
|
1828
1828
|
"baseUniforms": {
|
|
1829
|
-
"size":
|
|
1829
|
+
"size": 32,
|
|
1830
1830
|
"fields": [
|
|
1831
1831
|
{
|
|
1832
1832
|
"name": "hidden_size",
|
|
@@ -1847,6 +1847,26 @@
|
|
|
1847
1847
|
"name": "has_residual",
|
|
1848
1848
|
"type": "u32",
|
|
1849
1849
|
"offset": 12
|
|
1850
|
+
},
|
|
1851
|
+
{
|
|
1852
|
+
"name": "token_stride",
|
|
1853
|
+
"type": "u32",
|
|
1854
|
+
"offset": 16
|
|
1855
|
+
},
|
|
1856
|
+
{
|
|
1857
|
+
"name": "_pad0",
|
|
1858
|
+
"type": "u32",
|
|
1859
|
+
"offset": 20
|
|
1860
|
+
},
|
|
1861
|
+
{
|
|
1862
|
+
"name": "_pad1",
|
|
1863
|
+
"type": "u32",
|
|
1864
|
+
"offset": 24
|
|
1865
|
+
},
|
|
1866
|
+
{
|
|
1867
|
+
"name": "_pad2",
|
|
1868
|
+
"type": "u32",
|
|
1869
|
+
"offset": 28
|
|
1850
1870
|
}
|
|
1851
1871
|
]
|
|
1852
1872
|
},
|
|
@@ -3637,7 +3657,7 @@
|
|
|
3637
3657
|
}
|
|
3638
3658
|
],
|
|
3639
3659
|
"baseUniforms": {
|
|
3640
|
-
"size":
|
|
3660
|
+
"size": 32,
|
|
3641
3661
|
"fields": [
|
|
3642
3662
|
{
|
|
3643
3663
|
"name": "num_tokens",
|
|
@@ -4117,6 +4137,26 @@
|
|
|
4117
4137
|
"name": "bias_offset",
|
|
4118
4138
|
"type": "u32",
|
|
4119
4139
|
"offset": 12
|
|
4140
|
+
},
|
|
4141
|
+
{
|
|
4142
|
+
"name": "token_stride",
|
|
4143
|
+
"type": "u32",
|
|
4144
|
+
"offset": 16
|
|
4145
|
+
},
|
|
4146
|
+
{
|
|
4147
|
+
"name": "_pad0",
|
|
4148
|
+
"type": "u32",
|
|
4149
|
+
"offset": 20
|
|
4150
|
+
},
|
|
4151
|
+
{
|
|
4152
|
+
"name": "_pad1",
|
|
4153
|
+
"type": "u32",
|
|
4154
|
+
"offset": 24
|
|
4155
|
+
},
|
|
4156
|
+
{
|
|
4157
|
+
"name": "_pad2",
|
|
4158
|
+
"type": "u32",
|
|
4159
|
+
"offset": 28
|
|
4120
4160
|
}
|
|
4121
4161
|
]
|
|
4122
4162
|
},
|
package/src/config/loader.js
CHANGED
|
@@ -326,6 +326,8 @@ function assertArchitecture(manifest, architecture) {
|
|
|
326
326
|
|
|
327
327
|
function extractArchitectureFromConfig(config) {
|
|
328
328
|
const nestedTextConfig = getNestedTextConfig(config);
|
|
329
|
+
const topLevelRoPEParameters = getFlatRoPEParameters(config);
|
|
330
|
+
const nestedRoPEParameters = getFlatRoPEParameters(nestedTextConfig);
|
|
329
331
|
return {
|
|
330
332
|
numLayers: config.num_hidden_layers ?? nestedTextConfig?.num_hidden_layers ?? config.n_layer ?? config.blockCount,
|
|
331
333
|
hiddenSize: config.hidden_size ?? nestedTextConfig?.hidden_size ?? config.n_embd ?? config.embeddingLength,
|
|
@@ -335,13 +337,20 @@ function extractArchitectureFromConfig(config) {
|
|
|
335
337
|
headDim: config.head_dim ?? nestedTextConfig?.head_dim,
|
|
336
338
|
vocabSize: config.vocab_size ?? nestedTextConfig?.vocab_size ?? config.vocabSize,
|
|
337
339
|
maxSeqLen: config.max_position_embeddings ?? nestedTextConfig?.max_position_embeddings ?? config.n_positions ?? config.contextLength,
|
|
338
|
-
ropeTheta:
|
|
340
|
+
ropeTheta: topLevelRoPEParameters?.rope_theta
|
|
341
|
+
?? nestedRoPEParameters?.rope_theta
|
|
342
|
+
?? config.rope_theta
|
|
343
|
+
?? nestedTextConfig?.rope_theta
|
|
344
|
+
?? config.ropeFreqBase,
|
|
339
345
|
rmsNormEps: config.rms_norm_eps ?? nestedTextConfig?.rms_norm_eps ?? config.attentionLayerNormRMSEpsilon,
|
|
340
346
|
};
|
|
341
347
|
}
|
|
342
348
|
|
|
343
349
|
function extractInferenceFromConfig(config) {
|
|
344
350
|
const nestedTextConfig = getNestedTextConfig(config);
|
|
351
|
+
const topLevelRoPEParameters = getFlatRoPEParameters(config);
|
|
352
|
+
const nestedRoPEParameters = getFlatRoPEParameters(nestedTextConfig);
|
|
353
|
+
const ropeParameters = nestedRoPEParameters ?? topLevelRoPEParameters;
|
|
345
354
|
return {
|
|
346
355
|
attention: {
|
|
347
356
|
slidingWindow: config.sliding_window ?? nestedTextConfig?.sliding_window,
|
|
@@ -355,7 +364,13 @@ function extractInferenceFromConfig(config) {
|
|
|
355
364
|
},
|
|
356
365
|
pipeline: config.pipeline ?? nestedTextConfig?.pipeline,
|
|
357
366
|
rope: {
|
|
358
|
-
ropeTheta:
|
|
367
|
+
ropeTheta: ropeParameters?.rope_theta
|
|
368
|
+
?? config.rope_theta
|
|
369
|
+
?? nestedTextConfig?.rope_theta
|
|
370
|
+
?? config.ropeFreqBase,
|
|
371
|
+
mropeInterleaved: ropeParameters?.mrope_interleaved,
|
|
372
|
+
mropeSection: Array.isArray(ropeParameters?.mrope_section) ? ropeParameters.mrope_section : undefined,
|
|
373
|
+
partialRotaryFactor: ropeParameters?.partial_rotary_factor,
|
|
359
374
|
ropeScalingType: config.rope_scaling_type ?? nestedTextConfig?.rope_scaling_type,
|
|
360
375
|
ropeScalingFactor: config.rope_scaling_factor ?? nestedTextConfig?.rope_scaling_factor,
|
|
361
376
|
},
|
|
@@ -375,6 +390,20 @@ function getNestedTextConfig(config) {
|
|
|
375
390
|
return null;
|
|
376
391
|
}
|
|
377
392
|
|
|
393
|
+
function getFlatRoPEParameters(config) {
|
|
394
|
+
if (!config || typeof config !== 'object' || Array.isArray(config)) {
|
|
395
|
+
return null;
|
|
396
|
+
}
|
|
397
|
+
const ropeParameters = config.rope_parameters;
|
|
398
|
+
if (!ropeParameters || typeof ropeParameters !== 'object' || Array.isArray(ropeParameters)) {
|
|
399
|
+
return null;
|
|
400
|
+
}
|
|
401
|
+
if (ropeParameters.full_attention || ropeParameters.sliding_attention) {
|
|
402
|
+
return null;
|
|
403
|
+
}
|
|
404
|
+
return ropeParameters;
|
|
405
|
+
}
|
|
406
|
+
|
|
378
407
|
function extractTokenizerFromManifest(manifest) {
|
|
379
408
|
if (!manifest.tokenizer) return {};
|
|
380
409
|
|
package/src/config/merge.js
CHANGED
|
@@ -152,6 +152,24 @@ function mergeRoPE(
|
|
|
152
152
|
runtime?.ropeLocalTheta,
|
|
153
153
|
sources
|
|
154
154
|
),
|
|
155
|
+
mropeInterleaved: overlay(
|
|
156
|
+
`${prefix}.mropeInterleaved`,
|
|
157
|
+
manifest.mropeInterleaved,
|
|
158
|
+
runtime?.mropeInterleaved,
|
|
159
|
+
sources
|
|
160
|
+
),
|
|
161
|
+
mropeSection: overlay(
|
|
162
|
+
`${prefix}.mropeSection`,
|
|
163
|
+
manifest.mropeSection,
|
|
164
|
+
runtime?.mropeSection,
|
|
165
|
+
sources
|
|
166
|
+
),
|
|
167
|
+
partialRotaryFactor: overlay(
|
|
168
|
+
`${prefix}.partialRotaryFactor`,
|
|
169
|
+
manifest.partialRotaryFactor,
|
|
170
|
+
runtime?.partialRotaryFactor,
|
|
171
|
+
sources
|
|
172
|
+
),
|
|
155
173
|
ropeScalingType: overlay(
|
|
156
174
|
`${prefix}.ropeScalingType`,
|
|
157
175
|
manifest.ropeScalingType,
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
"extends": "transformer",
|
|
5
5
|
|
|
6
6
|
"architecture": {
|
|
7
|
-
"ropeTheta":
|
|
7
|
+
"ropeTheta": 10000000
|
|
8
8
|
},
|
|
9
9
|
|
|
10
10
|
"inference": {
|
|
@@ -12,6 +12,9 @@
|
|
|
12
12
|
"slidingWindow": null,
|
|
13
13
|
"queryKeyNorm": true
|
|
14
14
|
},
|
|
15
|
+
"output": {
|
|
16
|
+
"scaleEmbeddings": false
|
|
17
|
+
},
|
|
15
18
|
"normalization": {
|
|
16
19
|
"rmsNormWeightOffset": false,
|
|
17
20
|
"rmsNormEps": 1e-6
|
|
@@ -21,7 +24,10 @@
|
|
|
21
24
|
"enabled": true
|
|
22
25
|
},
|
|
23
26
|
"rope": {
|
|
24
|
-
"ropeTheta":
|
|
27
|
+
"ropeTheta": 10000000,
|
|
28
|
+
"mropeInterleaved": true,
|
|
29
|
+
"mropeSection": [11, 11, 10],
|
|
30
|
+
"partialRotaryFactor": 0.25
|
|
25
31
|
}
|
|
26
32
|
},
|
|
27
33
|
|
|
@@ -34,6 +40,7 @@
|
|
|
34
40
|
|
|
35
41
|
"detection": {
|
|
36
42
|
"architecturePatterns": ["qwen3", "qwen3_5", "Qwen3ForCausalLM", "Qwen3_5ForCausalLM", "Qwen2ForCausalLM"],
|
|
43
|
+
"modelTypePatterns": ["qwen3_5", "qwen3_5_text", "qwen2"],
|
|
37
44
|
"configPatterns": {
|
|
38
45
|
"model_type": "qwen2"
|
|
39
46
|
}
|
|
@@ -50,6 +50,9 @@ function createValidInferenceFixture() {
|
|
|
50
50
|
ropeScalingFactor: 1.0,
|
|
51
51
|
ropeScalingType: null,
|
|
52
52
|
ropeLocalTheta: null,
|
|
53
|
+
mropeInterleaved: false,
|
|
54
|
+
mropeSection: null,
|
|
55
|
+
partialRotaryFactor: null,
|
|
53
56
|
yarnBetaFast: null,
|
|
54
57
|
yarnBetaSlow: null,
|
|
55
58
|
yarnOriginalMaxPos: null,
|
|
@@ -94,6 +97,9 @@ const FIELD_CASES = Object.freeze([
|
|
|
94
97
|
{ kind: 'nonNullable', path: ['rope', 'ropeScalingFactor'], message: 'rope.ropeScalingFactor is required' },
|
|
95
98
|
{ kind: 'nullable', path: ['rope', 'ropeScalingType'], message: 'rope.ropeScalingType must be explicitly set' },
|
|
96
99
|
{ kind: 'nullable', path: ['rope', 'ropeLocalTheta'], message: 'rope.ropeLocalTheta must be explicitly set' },
|
|
100
|
+
{ kind: 'nonNullable', path: ['rope', 'mropeInterleaved'], message: 'rope.mropeInterleaved is required' },
|
|
101
|
+
{ kind: 'nullable', path: ['rope', 'mropeSection'], message: 'rope.mropeSection must be explicitly set' },
|
|
102
|
+
{ kind: 'nullable', path: ['rope', 'partialRotaryFactor'], message: 'rope.partialRotaryFactor must be explicitly set' },
|
|
97
103
|
{ kind: 'nullable', path: ['rope', 'yarnBetaFast'], message: 'rope.yarnBetaFast must be explicitly set' },
|
|
98
104
|
{ kind: 'nullable', path: ['rope', 'yarnBetaSlow'], message: 'rope.yarnBetaSlow must be explicitly set' },
|
|
99
105
|
{ kind: 'nullable', path: ['rope', 'yarnOriginalMaxPos'], message: 'rope.yarnOriginalMaxPos must be explicitly set' },
|
|
@@ -165,6 +165,9 @@ export const DEFAULT_PRESET_INFERENCE_CONFIG = {
|
|
|
165
165
|
rope: {
|
|
166
166
|
ropeTheta: 10000,
|
|
167
167
|
ropeLocalTheta: null,
|
|
168
|
+
mropeInterleaved: false,
|
|
169
|
+
mropeSection: null,
|
|
170
|
+
partialRotaryFactor: null,
|
|
168
171
|
ropeScalingType: null,
|
|
169
172
|
ropeScalingFactor: 1.0,
|
|
170
173
|
ropeLocalScalingType: null,
|
|
@@ -18,6 +18,15 @@ export interface RoPEConfigSchema {
|
|
|
18
18
|
/** Local RoPE theta for sliding window layers (Gemma 3 uses 10000) */
|
|
19
19
|
ropeLocalTheta?: number;
|
|
20
20
|
|
|
21
|
+
/** Apply adjacent-pair rotary layout instead of rotate-half layout. */
|
|
22
|
+
mropeInterleaved?: boolean;
|
|
23
|
+
|
|
24
|
+
/** mRoPE section sizes before the Qwen doubling step. */
|
|
25
|
+
mropeSection?: number[] | null;
|
|
26
|
+
|
|
27
|
+
/** Fraction of the head dimension that participates in rotary embedding. */
|
|
28
|
+
partialRotaryFactor?: number | null;
|
|
29
|
+
|
|
21
30
|
/** RoPE scaling type */
|
|
22
31
|
ropeScalingType?: 'linear' | 'dynamic' | 'yarn' | null;
|
|
23
32
|
|
|
@@ -105,6 +105,12 @@ export interface KernelPathSchema {
|
|
|
105
105
|
/** KV cache dtype for this path; defaults to activationDtype when omitted. */
|
|
106
106
|
kvDtype?: string;
|
|
107
107
|
|
|
108
|
+
/**
|
|
109
|
+
* Explicit widening target used by the finiteness fallback execution plan.
|
|
110
|
+
* Required for inline/generated kernel paths that do not have a stable registry id.
|
|
111
|
+
*/
|
|
112
|
+
finitenessFallbackKernelPathId?: string;
|
|
113
|
+
|
|
108
114
|
/**
|
|
109
115
|
* Prefill phase kernel sequence (M > 1).
|
|
110
116
|
* If not specified, uses decode with batched variants.
|
|
@@ -217,6 +217,12 @@ export interface ManifestRoPESchema {
|
|
|
217
217
|
ropeTheta: number;
|
|
218
218
|
/** Local theta for sliding window layers (null = same as ropeTheta) */
|
|
219
219
|
ropeLocalTheta: number | null;
|
|
220
|
+
/** Use adjacent-pair rotary layout instead of rotate-half layout. */
|
|
221
|
+
mropeInterleaved: boolean;
|
|
222
|
+
/** mRoPE section sizes before the Qwen doubling step. */
|
|
223
|
+
mropeSection: number[] | null;
|
|
224
|
+
/** Fraction of the head dimension that participates in rotary embedding. */
|
|
225
|
+
partialRotaryFactor: number | null;
|
|
220
226
|
/** RoPE scaling type (null = no scaling, 'linear', 'dynamic', 'yarn') */
|
|
221
227
|
ropeScalingType: string | null;
|
|
222
228
|
/** RoPE scaling factor (1.0 if no scaling) */
|
|
@@ -62,6 +62,9 @@ export const DEFAULT_MANIFEST_INFERENCE = {
|
|
|
62
62
|
rope: {
|
|
63
63
|
ropeTheta: 10000,
|
|
64
64
|
ropeLocalTheta: null, // Same as ropeTheta (null = use ropeTheta)
|
|
65
|
+
mropeInterleaved: false,
|
|
66
|
+
mropeSection: null,
|
|
67
|
+
partialRotaryFactor: null,
|
|
65
68
|
ropeScalingType: null, // No scaling (null = disabled)
|
|
66
69
|
ropeScalingFactor: 1.0,
|
|
67
70
|
ropeLocalScalingType: null, // Local scaling policy (null = no scaling)
|
|
@@ -6,10 +6,26 @@ function asObject(value) {
|
|
|
6
6
|
}
|
|
7
7
|
|
|
8
8
|
function asFiniteNumber(value) {
|
|
9
|
+
if (value == null || value === '') {
|
|
10
|
+
return null;
|
|
11
|
+
}
|
|
9
12
|
const parsed = Number(value);
|
|
10
13
|
return Number.isFinite(parsed) ? parsed : null;
|
|
11
14
|
}
|
|
12
15
|
|
|
16
|
+
function asBoolean(value) {
|
|
17
|
+
return typeof value === 'boolean' ? value : null;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function asNumberArray(value) {
|
|
21
|
+
if (!Array.isArray(value)) return null;
|
|
22
|
+
const normalized = value.map((entry) => asFiniteNumber(entry));
|
|
23
|
+
if (normalized.some((entry) => entry == null || entry <= 0)) {
|
|
24
|
+
return null;
|
|
25
|
+
}
|
|
26
|
+
return normalized.map((entry) => Math.trunc(entry));
|
|
27
|
+
}
|
|
28
|
+
|
|
13
29
|
function normalizeRoPEType(value) {
|
|
14
30
|
if (typeof value !== 'string') return null;
|
|
15
31
|
const normalized = value.trim().toLowerCase();
|
|
@@ -125,6 +141,13 @@ function failOnConflictingScaling(sourceLabel, canonicalScaling, candidateScalin
|
|
|
125
141
|
export function buildRoPEConfig(presetInference, config) {
|
|
126
142
|
const ropeScaling = asObject(config.rope_scaling);
|
|
127
143
|
const ropeParameters = asObject(config.rope_parameters);
|
|
144
|
+
const flatRoPEParameters = (
|
|
145
|
+
ropeParameters
|
|
146
|
+
&& !asObject(ropeParameters.full_attention)
|
|
147
|
+
&& !asObject(ropeParameters.sliding_attention)
|
|
148
|
+
)
|
|
149
|
+
? ropeParameters
|
|
150
|
+
: null;
|
|
128
151
|
const fullAttentionRoPE = asObject(ropeParameters?.full_attention);
|
|
129
152
|
const slidingAttentionRoPE = asObject(ropeParameters?.sliding_attention);
|
|
130
153
|
const presetRoPE = presetInference.rope ?? {};
|
|
@@ -164,6 +187,11 @@ export function buildRoPEConfig(presetInference, config) {
|
|
|
164
187
|
strictMissingTypeAndFactor: false,
|
|
165
188
|
sourceLabel: 'HF config rope_parameters.full_attention',
|
|
166
189
|
});
|
|
190
|
+
} else if (flatRoPEParameters) {
|
|
191
|
+
globalScaling = resolveScalingConfig(flatRoPEParameters, {
|
|
192
|
+
strictMissingTypeAndFactor: false,
|
|
193
|
+
sourceLabel: 'HF config rope_parameters',
|
|
194
|
+
});
|
|
167
195
|
}
|
|
168
196
|
|
|
169
197
|
const hasPresetLocalScaling = presetRoPE.ropeLocalScalingType !== undefined
|
|
@@ -192,6 +220,7 @@ export function buildRoPEConfig(presetInference, config) {
|
|
|
192
220
|
// HF config is source of truth for ropeTheta when provided:
|
|
193
221
|
// prefer rope_parameters.full_attention.rope_theta, then rope_theta.
|
|
194
222
|
const ropeTheta = asFiniteNumber(fullAttentionRoPE?.rope_theta)
|
|
223
|
+
?? asFiniteNumber(flatRoPEParameters?.rope_theta)
|
|
195
224
|
?? asFiniteNumber(config.rope_theta)
|
|
196
225
|
?? presetInference.rope?.ropeTheta
|
|
197
226
|
?? 10000;
|
|
@@ -201,9 +230,22 @@ export function buildRoPEConfig(presetInference, config) {
|
|
|
201
230
|
?? presetInference.rope?.ropeLocalTheta
|
|
202
231
|
?? null;
|
|
203
232
|
|
|
233
|
+
const mropeInterleaved = asBoolean(flatRoPEParameters?.mrope_interleaved)
|
|
234
|
+
?? presetInference.rope?.mropeInterleaved
|
|
235
|
+
?? false;
|
|
236
|
+
const mropeSection = asNumberArray(flatRoPEParameters?.mrope_section)
|
|
237
|
+
?? presetInference.rope?.mropeSection
|
|
238
|
+
?? null;
|
|
239
|
+
const partialRotaryFactor = asFiniteNumber(flatRoPEParameters?.partial_rotary_factor)
|
|
240
|
+
?? asFiniteNumber(presetInference.rope?.partialRotaryFactor)
|
|
241
|
+
?? null;
|
|
242
|
+
|
|
204
243
|
return {
|
|
205
244
|
ropeTheta,
|
|
206
245
|
ropeLocalTheta,
|
|
246
|
+
mropeInterleaved,
|
|
247
|
+
mropeSection,
|
|
248
|
+
partialRotaryFactor,
|
|
207
249
|
ropeScalingType: globalScaling.ropeScalingType,
|
|
208
250
|
ropeScalingFactor: globalScaling.ropeScalingFactor,
|
|
209
251
|
yarnBetaFast: globalScaling.yarnBetaFast,
|