@simulatte/doppler 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. package/README.md +23 -8
  2. package/package.json +7 -4
  3. package/src/config/kernels/kernel-ref-digests.js +39 -39
  4. package/src/config/kernels/registry.json +42 -2
  5. package/src/config/loader.js +31 -2
  6. package/src/config/merge.js +18 -0
  7. package/src/config/presets/models/qwen3.json +9 -2
  8. package/src/config/presets/models/transformer.json +5 -0
  9. package/src/config/required-inference-fields-contract-check.js +6 -0
  10. package/src/config/schema/inference-defaults.schema.js +3 -0
  11. package/src/config/schema/inference.schema.d.ts +9 -0
  12. package/src/config/schema/kernel-path.schema.d.ts +6 -0
  13. package/src/config/schema/manifest.schema.d.ts +6 -0
  14. package/src/config/schema/manifest.schema.js +3 -0
  15. package/src/converter/rope-config.js +42 -0
  16. package/src/gpu/device.js +58 -0
  17. package/src/gpu/kernels/attention.js +98 -0
  18. package/src/gpu/kernels/bias_add.wgsl +8 -6
  19. package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
  20. package/src/gpu/kernels/conv2d.js +1 -1
  21. package/src/gpu/kernels/conv2d.wgsl +7 -8
  22. package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
  23. package/src/gpu/kernels/depthwise_conv2d.js +2 -1
  24. package/src/gpu/kernels/depthwise_conv2d.wgsl +6 -9
  25. package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +6 -9
  26. package/src/gpu/kernels/grouped_pointwise_conv2d.js +2 -1
  27. package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +6 -9
  28. package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +6 -9
  29. package/src/gpu/kernels/matmul.js +25 -0
  30. package/src/gpu/kernels/pixel_shuffle.js +1 -1
  31. package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
  32. package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
  33. package/src/gpu/kernels/relu.js +15 -2
  34. package/src/gpu/kernels/relu.wgsl +2 -1
  35. package/src/gpu/kernels/relu_f16.wgsl +2 -1
  36. package/src/gpu/kernels/repeat_channels.js +1 -1
  37. package/src/gpu/kernels/repeat_channels.wgsl +4 -5
  38. package/src/gpu/kernels/repeat_channels_f16.wgsl +4 -5
  39. package/src/gpu/kernels/residual.js +44 -8
  40. package/src/gpu/kernels/residual.wgsl +6 -3
  41. package/src/gpu/kernels/residual_f16.wgsl +2 -1
  42. package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
  43. package/src/gpu/kernels/residual_vec4.wgsl +2 -1
  44. package/src/gpu/kernels/rmsnorm.js +58 -6
  45. package/src/gpu/kernels/rmsnorm.wgsl +14 -6
  46. package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
  47. package/src/gpu/kernels/rope.d.ts +2 -0
  48. package/src/gpu/kernels/rope.js +11 -1
  49. package/src/gpu/kernels/rope.wgsl +56 -40
  50. package/src/gpu/kernels/sana_linear_attention.js +1 -2
  51. package/src/gpu/kernels/sana_linear_attention_apply.wgsl +4 -5
  52. package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +4 -5
  53. package/src/gpu/kernels/sana_linear_attention_summary.wgsl +4 -0
  54. package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +4 -0
  55. package/src/gpu/kernels/silu.d.ts +1 -0
  56. package/src/gpu/kernels/silu.js +32 -14
  57. package/src/gpu/kernels/silu.wgsl +19 -9
  58. package/src/gpu/kernels/silu_f16.wgsl +19 -9
  59. package/src/gpu/kernels/transpose.js +15 -2
  60. package/src/gpu/kernels/transpose.wgsl +5 -6
  61. package/src/gpu/kernels/upsample2d.js +2 -1
  62. package/src/gpu/kernels/upsample2d.wgsl +6 -9
  63. package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
  64. package/src/gpu/kernels/utils.js +16 -1
  65. package/src/inference/browser-harness.js +47 -1
  66. package/src/inference/pipelines/diffusion/pipeline.js +15 -6
  67. package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +5 -0
  68. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +27 -15
  69. package/src/inference/pipelines/text/attention/record.js +11 -2
  70. package/src/inference/pipelines/text/attention/run.js +11 -2
  71. package/src/inference/pipelines/text/chat-format.js +25 -1
  72. package/src/inference/pipelines/text/config.d.ts +4 -0
  73. package/src/inference/pipelines/text/config.js +68 -1
  74. package/src/inference/pipelines/text/execution-plan.js +23 -31
  75. package/src/inference/pipelines/text/execution-v0.js +29 -2
  76. package/src/inference/pipelines/text/ffn/standard.js +3 -0
  77. package/src/inference/pipelines/text/init.d.ts +4 -0
  78. package/src/inference/pipelines/text/init.js +56 -9
  79. package/src/inference/pipelines/text/layer.js +11 -0
  80. package/src/inference/pipelines/text.js +4 -0
  81. package/src/inference/tokenizers/bundled.js +156 -33
  82. package/src/rules/tooling/command-runtime.rules.json +18 -0
  83. package/src/tooling/command-api.d.ts +27 -1
  84. package/src/tooling/command-api.js +142 -3
  85. package/src/tooling/node-browser-command-runner.d.ts +4 -0
  86. package/src/tooling/node-browser-command-runner.js +58 -3
  87. package/src/tooling/node-command-runner.js +15 -0
  88. package/src/tooling/node-webgpu.js +9 -87
  89. package/src/training/checkpoint-watch.d.ts +7 -0
  90. package/src/training/checkpoint-watch.js +106 -0
  91. package/src/training/checkpoint.d.ts +6 -1
  92. package/src/training/checkpoint.js +12 -2
  93. package/src/training/distillation/artifacts.d.ts +71 -0
  94. package/src/training/distillation/artifacts.js +132 -0
  95. package/src/training/distillation/checkpoint-watch.d.ts +10 -0
  96. package/src/training/distillation/checkpoint-watch.js +57 -0
  97. package/src/training/distillation/dataset.d.ts +59 -0
  98. package/src/training/distillation/dataset.js +337 -0
  99. package/src/training/distillation/eval.d.ts +34 -0
  100. package/src/training/distillation/eval.js +310 -0
  101. package/src/training/distillation/index.d.ts +29 -0
  102. package/src/training/distillation/index.js +29 -0
  103. package/src/training/distillation/runtime.d.ts +20 -0
  104. package/src/training/distillation/runtime.js +121 -0
  105. package/src/training/distillation/scoreboard.d.ts +6 -0
  106. package/src/training/distillation/scoreboard.js +8 -0
  107. package/src/training/distillation/stage-a.d.ts +45 -0
  108. package/src/training/distillation/stage-a.js +338 -0
  109. package/src/training/distillation/stage-b.d.ts +24 -0
  110. package/src/training/distillation/stage-b.js +20 -0
  111. package/src/training/index.d.ts +10 -0
  112. package/src/training/index.js +10 -0
  113. package/src/training/lora-pipeline.d.ts +40 -0
  114. package/src/training/lora-pipeline.js +796 -0
  115. package/src/training/operator-artifacts.d.ts +62 -0
  116. package/src/training/operator-artifacts.js +140 -0
  117. package/src/training/operator-command.d.ts +5 -0
  118. package/src/training/operator-command.js +453 -0
  119. package/src/training/operator-eval.d.ts +48 -0
  120. package/src/training/operator-eval.js +230 -0
  121. package/src/training/operator-scoreboard.d.ts +5 -0
  122. package/src/training/operator-scoreboard.js +44 -0
  123. package/src/training/runner.d.ts +52 -0
  124. package/src/training/runner.js +29 -4
  125. package/src/training/suite.d.ts +112 -0
  126. package/src/training/suite.js +9 -9
  127. package/src/training/workloads.d.ts +164 -0
  128. package/src/training/workloads.js +539 -0
  129. package/src/version.js +1 -1
  130. package/tools/doppler-cli.js +137 -40
package/README.md CHANGED
@@ -22,7 +22,7 @@ for await (const token of model.generate('Hello, world')) {
22
22
  }
23
23
  ```
24
24
 
25
- Registry IDs resolve to hosted RDRR artifacts from `Clocksmith/rdrr` by default. Tokens stream from a native `AsyncGenerator`. See [more examples](#more-examples) below or the canonical [Root API guide](docs/api/root.md).
25
+ Registry IDs resolve to hosted RDRR artifacts from `Clocksmith/rdrr` by default. Tokens stream from a native `AsyncGenerator`. See [more examples](#more-examples) below or the canonical [Root API guide](https://github.com/clocksmith/doppler/blob/main/docs/api/root.md).
26
26
 
27
27
  ## Why Doppler
28
28
 
@@ -36,11 +36,11 @@ Registry IDs resolve to hosted RDRR artifacts from `Clocksmith/rdrr` by default.
36
36
 
37
37
  ## Evidence
38
38
 
39
- ![Phase-latency comparison on one workload across models](benchmarks/vendors/results/compare_1b_multi-workload_favorable_phases.svg)
39
+ ![Phase-latency comparison on one workload across models](https://raw.githubusercontent.com/clocksmith/doppler/main/benchmarks/vendors/results/compare_1b_multi-workload_favorable_phases.svg)
40
40
 
41
41
  Snapshot artifacts:
42
- - [g3-1b-p064-d064-t0-k1.compare.json](benchmarks/vendors/fixtures/g3-1b-p064-d064-t0-k1.compare.json)
43
- - [lfm2-5-1-2b-p064-d064-t0-k1.compare.json](benchmarks/vendors/fixtures/lfm2-5-1-2b-p064-d064-t0-k1.compare.json)
42
+ - [g3-1b-p064-d064-t0-k1.compare.json](https://github.com/clocksmith/doppler/blob/main/benchmarks/vendors/fixtures/g3-1b-p064-d064-t0-k1.compare.json)
43
+ - [lfm2-5-1-2b-p064-d064-t0-k1.compare.json](https://github.com/clocksmith/doppler/blob/main/benchmarks/vendors/fixtures/lfm2-5-1-2b-p064-d064-t0-k1.compare.json)
44
44
 
45
45
  ## Under the hood
46
46
 
@@ -77,10 +77,25 @@ for await (const token of doppler('Hello', { model: 'gemma3-270m' })) {
77
77
 
78
78
  ## Documentation
79
79
 
80
- - Docs index (canonical navigation): [docs/INDEX.md](docs/INDEX.md)
81
- - First-run workflow: [docs/getting-started.md](docs/getting-started.md)
82
- - Runtime config contract: [docs/config.md](docs/config.md)
83
- - Architecture: [docs/architecture.md](docs/architecture.md)
80
+ - Docs index (canonical navigation): [docs/INDEX.md](https://github.com/clocksmith/doppler/blob/main/docs/INDEX.md)
81
+ - First-run workflow: [docs/getting-started.md](https://github.com/clocksmith/doppler/blob/main/docs/getting-started.md)
82
+ - Runtime config contract: [docs/config.md](https://github.com/clocksmith/doppler/blob/main/docs/config.md)
83
+ - Architecture: [docs/architecture.md](https://github.com/clocksmith/doppler/blob/main/docs/architecture.md)
84
+ - Generated model support table: [docs/model-support-matrix.md](https://github.com/clocksmith/doppler/blob/main/docs/model-support-matrix.md)
85
+
86
+ ## Current model support
87
+
88
+ Verified right now:
89
+ - `gemma-3-270m-it-wq4k-ef16-hf16`
90
+ - `gemma-3-1b-it-wq4k-ef16-hf16`
91
+ - `google-embeddinggemma-300m-wq4k-ef16`
92
+ - `translategemma-4b-it-wq4k-ef16-hf16`
93
+
94
+ Known failing right now:
95
+ - `qwen-3-5-0-8b-wq4k-ef16-hf16-f16`
96
+ - `qwen-3-5-2b-wq4k-ef16-hf16-f16`
97
+
98
+ For the generated status table, including `loads but unverified` and `everything else`, see [docs/model-support-matrix.md](https://github.com/clocksmith/doppler/blob/main/docs/model-support-matrix.md).
84
99
 
85
100
  ## Environment requirements
86
101
 
package/package.json CHANGED
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "@simulatte/doppler",
3
- "version": "0.1.5",
3
+ "version": "0.1.6",
4
4
  "description": "Browser-native WebGPU inference engine for local intent and inference loops",
5
5
  "main": "src/index.js",
6
6
  "types": "src/index.d.ts",
7
7
  "type": "module",
8
8
  "bin": {
9
- "doppler": "./tools/doppler-cli.js"
9
+ "doppler": "tools/doppler-cli.js"
10
10
  },
11
11
  "scripts": {
12
12
  "cli": "node tools/doppler-cli.js",
@@ -125,7 +125,7 @@
125
125
  },
126
126
  "repository": {
127
127
  "type": "git",
128
- "url": "https://github.com/clocksmith/doppler.git"
128
+ "url": "git+https://github.com/clocksmith/doppler.git"
129
129
  },
130
130
  "homepage": "https://github.com/clocksmith/doppler",
131
131
  "keywords": [
@@ -142,6 +142,9 @@
142
142
  "url": "https://github.com/clocksmith/doppler/issues"
143
143
  },
144
144
  "license": "Apache-2.0",
145
+ "publishConfig": {
146
+ "access": "public"
147
+ },
145
148
  "files": [
146
149
  "src",
147
150
  "src/gpu/kernels/*.wgsl",
@@ -160,6 +163,6 @@
160
163
  "playwright": "^1.58.2"
161
164
  },
162
165
  "optionalDependencies": {
163
- "@simulatte/webgpu-doe": "0.1.x"
166
+ "@simulatte/webgpu": "0.1.x"
164
167
  }
165
168
  }
@@ -49,16 +49,16 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
49
49
  "backward/upsample2d_backward.wgsl#main": "3f3a27fc880d3c4cba49889cafb4be66d30132cfeec9c2105751cc12a5a44ae3",
50
50
  "bf16_to_f16.wgsl#main": "91c009d291a205fc42e7b45942e58630442eb67f8cb4bacc2e54160fa5f25c35",
51
51
  "bf16_to_f32.wgsl#main": "2c5c08476d40aeb64287b8b31d1ef088c63e8bb4568bf7b7c5faf4a4ed493576",
52
- "bias_add_f16.wgsl#main": "8ec47faa1a52190616b431ca8bbd7b1e657ef13720e35e787e4df16b65817b03",
53
- "bias_add.wgsl#main": "d15e3f38379e5a7b393ab88c8a1a6eae13bbc8fec4348267d891ce5b7e8bb963",
52
+ "bias_add_f16.wgsl#main": "f4c5b6e6495bcaae4582c3eb11c7fe39466a9625bbe57eede116c24db71682f6",
53
+ "bias_add.wgsl#main": "e7d3b4a5f9efc8b0569c56bcdeb63e921fa6d1d006901deabc734cab40346715",
54
54
  "cast_f16_to_f32.wgsl#main": "98a0b31fcba2bad945e9f8522d643ae75135bfdf6b39a824565f57d5d4fd965d",
55
55
  "cast_f32_to_f16.wgsl#main": "0b669e7812c09a2f44e219548e11ca0dfd8af921d30017e2a5c79d54f792542f",
56
56
  "clamp.wgsl#main": "9d8039a590d102133636d67de8ae76ca8e9129bf220c48f7a08e9d82fcc48bad",
57
- "conv2d_f16.wgsl#main": "aa139e9f0270873acbc1c4b3cbacff4d224cae7247b520ec129a4f068eb6ed59",
58
- "conv2d.wgsl#main": "484a676692d2b8097daeefe42e2296a1f8b3ef11abfd7b41df6cdcdf16b7a8fd",
57
+ "conv2d_f16.wgsl#main": "1e26a7e45e434fc45bdcd1913b0b97d2ed7e80b3c0bb1beae3d762ee457841c7",
58
+ "conv2d.wgsl#main": "140ed68bf1554ebfadbad7bcf1dfdff7f95aff18588459205f14b05cb432eee2",
59
59
  "cross_entropy_loss.wgsl#main": "5a48087bdec94184432c90ce5b345e1eadbdfcb13b9793ecee8052bc7392239c",
60
- "depthwise_conv2d_f16.wgsl#main": "d5d8d195b1449e39715340af4a0759da4b44b54f6a3cfbdfa6abe743b0f1d002",
61
- "depthwise_conv2d.wgsl#main": "e5da160f505e18508619b78ba30f9bde0c84689a166df06cb59ef0e6591c6faf",
60
+ "depthwise_conv2d_f16.wgsl#main": "f7f093a7e6623ed17a675bac729149e94718aece916416966eaf03c1d6939f2a",
61
+ "depthwise_conv2d.wgsl#main": "cf14cb40d282ad4d4fab160109b97eaeaf12aab62579b73324ac485ac75155b0",
62
62
  "dequant_f16_out_vec4.wgsl#main_vec4": "61c20e6c71c1c8421b4ec202dbd26292a6300587bd44c314f2a6c6d9d9442c3a",
63
63
  "dequant_f16_out.wgsl#main": "94d61843d56f9a3bbc6b7c2b95dc6ecbba3f6a262b2c4086a076f69a8c38ccae",
64
64
  "dequant_f16_rowwise.wgsl#main": "f5bf7cef950b52d65cee6121dbaa176244d3221045b3b6386b3be47f23ce17dc",
@@ -118,8 +118,8 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
118
118
  "gelu.wgsl#main": "a9007ea08aaff98f9be08f1e0490a6bcf252883eac5513de876ab9ce918865e6",
119
119
  "gptoss_mxfp4_expert_fused.wgsl#main_expert": "3159e8cd81da13f909cf905e6d35307fefe1dcbbdf1b2b8e8ff0ce923bd71180",
120
120
  "gptoss_router_topk.wgsl#softmax_topk": "86e4ea709c0c0084d09c6a4cd07710dc14f380e03f91b8ed9ec871b310be49f1",
121
- "grouped_pointwise_conv2d_f16.wgsl#main": "11bcaefc5929b2e3c1ba338ebea6a28d2cac26553be8b00f51bfddbabf513be7",
122
- "grouped_pointwise_conv2d.wgsl#main": "c0d5cdec0743b4ee337a8df95bda442e617c1678e3d1b6e20ec692d500ede50d",
121
+ "grouped_pointwise_conv2d_f16.wgsl#main": "578a0fcb3362864feb0de0e800b2df49e66e42db4360a08189e5f815ead944c4",
122
+ "grouped_pointwise_conv2d.wgsl#main": "9cf77937b10dd96f3aedc1793183ef2fff05d075fac3884fad5cd5869d0d9181",
123
123
  "groupnorm_apply_f16.wgsl#main": "cfd850b87944ac1c03ba7bd98136db556dadd8a70611e351d82d297299a7cd02",
124
124
  "groupnorm_apply.wgsl#main": "b09b8f2f57dcdfa1a0366daa30d3910feb134204652c711d2ba564e566b5a334",
125
125
  "groupnorm_stats_f16.wgsl#main": "fb76f78ce668ea8459110335698fe4b09a2425fc71deed3bab67efd7641c3199",
@@ -155,33 +155,33 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
155
155
  "moe_gather.wgsl#gather_single_pass": "8dbd0c38a323ba6c73af3ecaad297d79d861b817152e6e2c12fcd9db6f767f1e",
156
156
  "moe_gather.wgsl#gather_tokens": "58e40a63a39c2f99c47c75dee71767f98482217afd74607cee1b7efc2c85738b",
157
157
  "moe_offsets.wgsl#build_offsets": "3ea004145fa234659408cdeb0d4d802adff1037c9c5c03af146b3734cc69dd27",
158
- "pixel_shuffle_f16.wgsl#main": "57903a9c19cecc56371b2198402745127115680d266c3ce609201be9119aa359",
159
- "pixel_shuffle.wgsl#main": "845b88700b1b46d18cde6f2ec11bb89512c90d7e148763e74ce2a4173fd99b21",
160
- "relu_f16.wgsl#main": "fc6134aabe43081b42ce8507d8f374092d0f2e03316aa42c25dd50229dc0ee40",
161
- "relu.wgsl#main": "ca2c9bfa0acb9ece3b7e67de5209e00e553602b3917d23aca10338c1e6f01e27",
162
- "repeat_channels_f16.wgsl#main": "e7e4d9164752e782d482db40256d0d86d96f784aa7debdb72faf3261b9bdd737",
163
- "repeat_channels.wgsl#main": "ad0e34925c8c1173b9f0d92fa6e3808d039f82b3d9ad943b0a75b213ee1776e5",
164
- "residual_f16_vec4.wgsl#add_vec4": "30e9226fb6636e2f01e65b1dc8e93c8e849a87acec6215342fc114996da1ed41",
165
- "residual_f16.wgsl#main": "d392433f3065d1caf68b033219f4ffacf022dc1f90fc3cf3fd620e4ba49f3219",
166
- "residual_vec4.wgsl#add_vec4": "ef011d1683e62887db712da563e783d12fdc80c152955661137d2dca612d7d6a",
167
- "residual.wgsl#main": "1fc456b14e2fb2bc9627107b4e51e7a2098f723b5ba6ab5542cd9455af99f423",
168
- "rmsnorm_f16.wgsl#main": "21d6d702cfcb8d653d8e105bbbc522e85704fa5628b865a7d36cde338ca5c779",
169
- "rmsnorm_f16.wgsl#rmsnorm_small_f16": "10803373bce7d5d5eb0939821e629e0f3fc4e8d27cdddab89f14a9db02e83603",
170
- "rmsnorm.wgsl#main": "c529986befb29a04b94d89744585923a7cef82baf4b2b0a243aa2431618622cc",
171
- "rmsnorm.wgsl#main_cached": "828d30dd8b6137457375b9f7b446a6bbdc1df826ce4d9b1818f6d1abe7adc9c5",
172
- "rmsnorm.wgsl#main_small": "2baf3a1a1a1e982cf9ffcddd3646655503536148c4a3ce9938646bc05cf701b5",
173
- "rmsnorm.wgsl#main_small_subgroup": "909b52d4dfcdd780f531afeb6964885adc90552487f36f131aa349bcc54fb495",
174
- "rmsnorm.wgsl#main_subgroup": "545ebdd1e6e3ab9790a17a4dd5501b12dbb7bc0073787f9a4f5821752a75991c",
158
+ "pixel_shuffle_f16.wgsl#main": "1d1202cdaa8f7a94e015c5e2212bc98881d00d548a12fe7a8e91c4e17f2ba723",
159
+ "pixel_shuffle.wgsl#main": "91c017f0642132229768a2be6c8d47ad26d486f5d40e7dbf9e2349c847e527b8",
160
+ "relu_f16.wgsl#main": "baac3a33693e5a72e60c7ec9b4a4dbcec10a83ca1fac2972988b07a75780f14a",
161
+ "relu.wgsl#main": "52ffa66f9898a4f291758ae93055000cb1eb488c22a993e6b0568a29d9d3ad28",
162
+ "repeat_channels_f16.wgsl#main": "6eee5f9f74d8c17a71f6f422b503bbfb872350a9486ec0065fd1f67718311883",
163
+ "repeat_channels.wgsl#main": "bce0cb33beed50ce388b2d069961ae0fbe361dd4930b9344cb066b3390475a51",
164
+ "residual_f16_vec4.wgsl#add_vec4": "bf8cd304a1d4c5500143b0bc52d0236a7e8bbc4cc2d51d54ef0d4fce209f503b",
165
+ "residual_f16.wgsl#main": "3ca5c562fb5baf495e31e662f85fb26863f8f6d4ad29bb119c484e2ffdee7791",
166
+ "residual_vec4.wgsl#add_vec4": "f2f30d2dec9d90062bf5fce1f141951e8e6b54f56698b87ffb9bf6662c8acce2",
167
+ "residual.wgsl#main": "f1abd88c959c5d8dd27b9353d487e37b2a96850ed9d90c365212e260399cc2a7",
168
+ "rmsnorm_f16.wgsl#main": "7ae20c01b0453306504f777c4a8de37364a8b45bef3c569b0572c7863740a6bd",
169
+ "rmsnorm_f16.wgsl#rmsnorm_small_f16": "e9ce0a7427831e4d1280691eb9ca0daab55d917d4f0d9975d4bcd7e8fe960941",
170
+ "rmsnorm.wgsl#main": "f516b3e4bde2015f2a207c3ca5b8c9820c7809fa8f8d0786f90c568e0f1ac077",
171
+ "rmsnorm.wgsl#main_cached": "bcae48e93d63e11701386850559fec5d4924128ad9d6ac1de27d1b8c34fc3dff",
172
+ "rmsnorm.wgsl#main_small": "95c65dcb443717c821c44de87dc89cdd4f6da97e08cccf8a9526f5cdd19dd33d",
173
+ "rmsnorm.wgsl#main_small_subgroup": "8ff72e445b662b1820be25a594fb0558007bfca7e50d2d1bc915df5774a76f6a",
174
+ "rmsnorm.wgsl#main_subgroup": "0aac7dd6455bf8f3d11917ec87fa71bb5fa9ef4f8e5bba02dfbfd11b92ccd01f",
175
175
  "rope_f16.wgsl#main": "a87f2964b77e851a2fbcc88305adeecaf8eba372291d83a71b817c8ef3da5c58",
176
176
  "rope_f16.wgsl#rope_compute_freqs": "c7aa2cb50420ab2709b20e0a33e93ab6aa4f50d2fa8d9f79b0bfcfefb2f7abab",
177
177
  "rope_f16.wgsl#rope_ntk_scaled": "46d2574b46539b289371c0c37a0b4e2fb21279134126b36a1fc30b98523905dd",
178
178
  "rope_f16.wgsl#rope_qk": "3bc01e167dc3cc5397bd7751e493311b4d3f5c6c0f6fad30234a740bb4c4507b",
179
179
  "rope_f16.wgsl#rope_yarn": "9b788dd05a1598aadcba5b0218d1666ce75faadbe32b71ee1def65ec23fb7dfe",
180
- "rope.wgsl#main": "b639fe8a54508115c82c13c923bfea89f59c6e15a5bef66bfc34e12f0ab4e32f",
181
- "rope.wgsl#rope_compute_freqs": "3d3a25df6036c9e87f6270bd6e1f67ffcd5cf3a1aac7cea19b1935fdb6ea6046",
182
- "rope.wgsl#rope_ntk_scaled": "14b044c70e8f7bc775606a88298fefbf1181024f45775497cf2350682879fcd4",
183
- "rope.wgsl#rope_qk": "d84c973564235347fa16b13552b5a6d7ab524f4ac9b65f0504fec07568b2f032",
184
- "rope.wgsl#rope_yarn": "00b473ce1d86f16444d948739da39e834e21d4db30ab09b0df7502d2e4c511b2",
180
+ "rope.wgsl#main": "4c803ad5e0dd065d5572c7aecc1def277c43884dcc02f22a9676914c10111400",
181
+ "rope.wgsl#rope_compute_freqs": "c9338316a31c8d467acbf8d512cb9616ee902d2619fa9187639f8ff5d78414ac",
182
+ "rope.wgsl#rope_ntk_scaled": "818f89865a3d1d6f2d49f671ac882d0fde9709702160a1ae8d9a8ef113afb511",
183
+ "rope.wgsl#rope_qk": "3d773c8b8c400142edc8a4111afb04a2bf75bdb109b2d41cbe5afdb72a959772",
184
+ "rope.wgsl#rope_yarn": "cb00e1cf87fac198dcf0fb0d4e2d5f6f99d2fed6dff0a089a96bb459917851d2",
185
185
  "sample_f16.wgsl#argmax": "30b9f199b49352e5aff91b7aa8016edb423ce33f77481c3a7bc184251856fb27",
186
186
  "sample_f16.wgsl#argmax_reduce": "a3ca27fc50b10c36c1676bdd5dbfe5edc67850cdd5c1af7a1d3ad70f830dd8a7",
187
187
  "sample_f16.wgsl#find_topk_phase1": "24e47e5ced28af802959e350ff0a6eec6b9a26f89fb38e222990eeaffb16bd36",
@@ -194,10 +194,10 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
194
194
  "sample.wgsl#find_topk_phase2": "940b216e605d22096da5aca65950a8030866fc5a39e7fdf484d69a832de1b63a",
195
195
  "sample.wgsl#sample_single_pass": "4412357e84113ee2f1bc0dc8bf89e314c2ab482c89c14ca016ea9949d16a9d0c",
196
196
  "sample.wgsl#softmax_and_sample": "7172c60e76430fbe130e530e3564b569b45eccf193987b32d6f52bd6bbcc9f08",
197
- "sana_linear_attention_apply_f16.wgsl#main": "e47366b94d40c4388e631b5bf93f8d61ef4e52cc65ffcd3b08d9d170616bb138",
198
- "sana_linear_attention_apply.wgsl#main": "59cad7974c644fd910af776ad85a9a2c43c00492d4d1152fdc8373ecbb8bba18",
199
- "sana_linear_attention_summary_f16.wgsl#main": "e3c040bb6469d37fc78eb22c1cc3e0456301607e461bbcdf5365a583c5d260d2",
200
- "sana_linear_attention_summary.wgsl#main": "20c7ecdbcd1c73c0f9937c3cdac07b4b6edfe8618bf6f66281806343fd41b122",
197
+ "sana_linear_attention_apply_f16.wgsl#main": "4a7426ce67eccfb70956feeae84275f4d3cc586c50e8442c07eb69993b378ab5",
198
+ "sana_linear_attention_apply.wgsl#main": "5f69e0bc1d9e2df5a61e13bd819313c8f7ff5dfc4b7d78e71d5152dc23b6a86c",
199
+ "sana_linear_attention_summary_f16.wgsl#main": "3abb736ead999485b5dac9c6b534143b464cfd0b5300c5e03c56cec03c8fa48e",
200
+ "sana_linear_attention_summary.wgsl#main": "be9c1fe861dcb5ea46927749764267656a69160bc8b732c6eb1a1bcb0c075589",
201
201
  "scale.wgsl#main": "44ec481452b586307957163e3d65c9d02561d3f2f3db633f906f5488b1ea1ca4",
202
202
  "scale.wgsl#main_inplace": "020824c7118a59c461ce81f1c2cd01b7c2a3f1aab326392b7d48d4448a0c2ed1",
203
203
  "scatter_add_dynamic_f16_weights.wgsl#scatter_add_dynamic": "42799e745bc445b199b1cbc384bc12bb9372ed1599af3260a803cefc8dd35497",
@@ -206,8 +206,8 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
206
206
  "scatter_add_vec4.wgsl#scatter_add_vec4": "247c4f23129cdfbb19593b17c5833d85048da117d77141c74bc4e16e691d94e1",
207
207
  "scatter_add.wgsl#main": "dea947b8014e9b674e4fec8f15fac6c926e8a3a4d8eff104b953d77f35a1ac35",
208
208
  "scatter_add.wgsl#scatter_add_accumulate": "561800af22dedae63f1abe69b757b0ef6c7832a2bff228c2262e0b7111d89247",
209
- "silu_f16.wgsl#main": "66a87710c1a680f9bf8d01f804709ecd2ce6a99bf2ee82a5f7e199e48eca6e29",
210
- "silu.wgsl#main": "e833f98b090845ada7b9ae8edc68cd6cfe823bd1d5ae7a6401f32b6e3bb6ee75",
209
+ "silu_f16.wgsl#main": "867634b20dcb75969e001966836892a2b7e01782b0028d94779c6ec21c254ae0",
210
+ "silu.wgsl#main": "7b52d30fb741beef2dbf728e0c4ecffe5b08d9661d63c306caecb4cb3ced85e5",
211
211
  "softmax_subgroup.wgsl#main_subgroup": "88472c0dab5f81c5f045f0ee79c4c3bb484791a4a2b84af398c019851438f091",
212
212
  "softmax_subgroup.wgsl#softmax_small_subgroup": "5d7bd1b698910a437197bf6c8b7f8b259036dd006ad5470f767b539dba8538f8",
213
213
  "softmax.wgsl#main": "45c5876806b442222d7e190e595f55a0079bae82e07d37586996c1a63790bb7a",
@@ -220,7 +220,7 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
220
220
  "topk.wgsl#main": "a18763303cd18e8a020e647f8a52f65403526849faf835d9f9394f634c3c97eb",
221
221
  "topk.wgsl#softmax_topk": "95ff3517da909e4bd4d0ff8d85b619bd250522943aeb9276375edc59f67e9604",
222
222
  "topk.wgsl#topk_2_small": "289eaa5c4f005e0aaf37dfe5343aeda30d9ab3929979dbf0cc3553f23e136807",
223
- "transpose.wgsl#main": "3f80f9db1b150f453a9abb62300bfef35e744ec6a8c774c7b64f2ad6088204e5",
224
- "upsample2d_f16.wgsl#main": "961f876852d9f4f1d3b224f39cdae3fa434378db12f7903d23a8dab6824644d4",
225
- "upsample2d.wgsl#main": "9ff2d864c58a6776b7e03221bc6a7409e9e646fbabe7d9a33f4e06ed62120e43",
223
+ "transpose.wgsl#main": "002bce09c48b63ab5017d83f42233340011ac6fc20dae9cd08e3095ae5bf72b2",
224
+ "upsample2d_f16.wgsl#main": "43cee5f2503cb4b6caea45e9842f8961ce313b02eb8ed23a97d6967113ce521c",
225
+ "upsample2d.wgsl#main": "6de9172ad3d6940dd3c94470a105755a33760e66a84d6e9e96ec4d6a07dc4a25"
226
226
  });
@@ -1826,7 +1826,7 @@
1826
1826
  }
1827
1827
  ],
1828
1828
  "baseUniforms": {
1829
- "size": 16,
1829
+ "size": 32,
1830
1830
  "fields": [
1831
1831
  {
1832
1832
  "name": "hidden_size",
@@ -1847,6 +1847,26 @@
1847
1847
  "name": "has_residual",
1848
1848
  "type": "u32",
1849
1849
  "offset": 12
1850
+ },
1851
+ {
1852
+ "name": "token_stride",
1853
+ "type": "u32",
1854
+ "offset": 16
1855
+ },
1856
+ {
1857
+ "name": "_pad0",
1858
+ "type": "u32",
1859
+ "offset": 20
1860
+ },
1861
+ {
1862
+ "name": "_pad1",
1863
+ "type": "u32",
1864
+ "offset": 24
1865
+ },
1866
+ {
1867
+ "name": "_pad2",
1868
+ "type": "u32",
1869
+ "offset": 28
1850
1870
  }
1851
1871
  ]
1852
1872
  },
@@ -3637,7 +3657,7 @@
3637
3657
  }
3638
3658
  ],
3639
3659
  "baseUniforms": {
3640
- "size": 16,
3660
+ "size": 32,
3641
3661
  "fields": [
3642
3662
  {
3643
3663
  "name": "num_tokens",
@@ -4117,6 +4137,26 @@
4117
4137
  "name": "bias_offset",
4118
4138
  "type": "u32",
4119
4139
  "offset": 12
4140
+ },
4141
+ {
4142
+ "name": "token_stride",
4143
+ "type": "u32",
4144
+ "offset": 16
4145
+ },
4146
+ {
4147
+ "name": "_pad0",
4148
+ "type": "u32",
4149
+ "offset": 20
4150
+ },
4151
+ {
4152
+ "name": "_pad1",
4153
+ "type": "u32",
4154
+ "offset": 24
4155
+ },
4156
+ {
4157
+ "name": "_pad2",
4158
+ "type": "u32",
4159
+ "offset": 28
4120
4160
  }
4121
4161
  ]
4122
4162
  },
@@ -326,6 +326,8 @@ function assertArchitecture(manifest, architecture) {
326
326
 
327
327
  function extractArchitectureFromConfig(config) {
328
328
  const nestedTextConfig = getNestedTextConfig(config);
329
+ const topLevelRoPEParameters = getFlatRoPEParameters(config);
330
+ const nestedRoPEParameters = getFlatRoPEParameters(nestedTextConfig);
329
331
  return {
330
332
  numLayers: config.num_hidden_layers ?? nestedTextConfig?.num_hidden_layers ?? config.n_layer ?? config.blockCount,
331
333
  hiddenSize: config.hidden_size ?? nestedTextConfig?.hidden_size ?? config.n_embd ?? config.embeddingLength,
@@ -335,13 +337,20 @@ function extractArchitectureFromConfig(config) {
335
337
  headDim: config.head_dim ?? nestedTextConfig?.head_dim,
336
338
  vocabSize: config.vocab_size ?? nestedTextConfig?.vocab_size ?? config.vocabSize,
337
339
  maxSeqLen: config.max_position_embeddings ?? nestedTextConfig?.max_position_embeddings ?? config.n_positions ?? config.contextLength,
338
- ropeTheta: config.rope_theta ?? nestedTextConfig?.rope_theta ?? config.ropeFreqBase,
340
+ ropeTheta: topLevelRoPEParameters?.rope_theta
341
+ ?? nestedRoPEParameters?.rope_theta
342
+ ?? config.rope_theta
343
+ ?? nestedTextConfig?.rope_theta
344
+ ?? config.ropeFreqBase,
339
345
  rmsNormEps: config.rms_norm_eps ?? nestedTextConfig?.rms_norm_eps ?? config.attentionLayerNormRMSEpsilon,
340
346
  };
341
347
  }
342
348
 
343
349
  function extractInferenceFromConfig(config) {
344
350
  const nestedTextConfig = getNestedTextConfig(config);
351
+ const topLevelRoPEParameters = getFlatRoPEParameters(config);
352
+ const nestedRoPEParameters = getFlatRoPEParameters(nestedTextConfig);
353
+ const ropeParameters = nestedRoPEParameters ?? topLevelRoPEParameters;
345
354
  return {
346
355
  attention: {
347
356
  slidingWindow: config.sliding_window ?? nestedTextConfig?.sliding_window,
@@ -355,7 +364,13 @@ function extractInferenceFromConfig(config) {
355
364
  },
356
365
  pipeline: config.pipeline ?? nestedTextConfig?.pipeline,
357
366
  rope: {
358
- ropeTheta: config.rope_theta ?? nestedTextConfig?.rope_theta ?? config.ropeFreqBase,
367
+ ropeTheta: ropeParameters?.rope_theta
368
+ ?? config.rope_theta
369
+ ?? nestedTextConfig?.rope_theta
370
+ ?? config.ropeFreqBase,
371
+ mropeInterleaved: ropeParameters?.mrope_interleaved,
372
+ mropeSection: Array.isArray(ropeParameters?.mrope_section) ? ropeParameters.mrope_section : undefined,
373
+ partialRotaryFactor: ropeParameters?.partial_rotary_factor,
359
374
  ropeScalingType: config.rope_scaling_type ?? nestedTextConfig?.rope_scaling_type,
360
375
  ropeScalingFactor: config.rope_scaling_factor ?? nestedTextConfig?.rope_scaling_factor,
361
376
  },
@@ -375,6 +390,20 @@ function getNestedTextConfig(config) {
375
390
  return null;
376
391
  }
377
392
 
393
+ function getFlatRoPEParameters(config) {
394
+ if (!config || typeof config !== 'object' || Array.isArray(config)) {
395
+ return null;
396
+ }
397
+ const ropeParameters = config.rope_parameters;
398
+ if (!ropeParameters || typeof ropeParameters !== 'object' || Array.isArray(ropeParameters)) {
399
+ return null;
400
+ }
401
+ if (ropeParameters.full_attention || ropeParameters.sliding_attention) {
402
+ return null;
403
+ }
404
+ return ropeParameters;
405
+ }
406
+
378
407
  function extractTokenizerFromManifest(manifest) {
379
408
  if (!manifest.tokenizer) return {};
380
409
 
@@ -152,6 +152,24 @@ function mergeRoPE(
152
152
  runtime?.ropeLocalTheta,
153
153
  sources
154
154
  ),
155
+ mropeInterleaved: overlay(
156
+ `${prefix}.mropeInterleaved`,
157
+ manifest.mropeInterleaved,
158
+ runtime?.mropeInterleaved,
159
+ sources
160
+ ),
161
+ mropeSection: overlay(
162
+ `${prefix}.mropeSection`,
163
+ manifest.mropeSection,
164
+ runtime?.mropeSection,
165
+ sources
166
+ ),
167
+ partialRotaryFactor: overlay(
168
+ `${prefix}.partialRotaryFactor`,
169
+ manifest.partialRotaryFactor,
170
+ runtime?.partialRotaryFactor,
171
+ sources
172
+ ),
155
173
  ropeScalingType: overlay(
156
174
  `${prefix}.ropeScalingType`,
157
175
  manifest.ropeScalingType,
@@ -4,7 +4,7 @@
4
4
  "extends": "transformer",
5
5
 
6
6
  "architecture": {
7
- "ropeTheta": 1000000
7
+ "ropeTheta": 10000000
8
8
  },
9
9
 
10
10
  "inference": {
@@ -12,6 +12,9 @@
12
12
  "slidingWindow": null,
13
13
  "queryKeyNorm": true
14
14
  },
15
+ "output": {
16
+ "scaleEmbeddings": false
17
+ },
15
18
  "normalization": {
16
19
  "rmsNormWeightOffset": false,
17
20
  "rmsNormEps": 1e-6
@@ -21,7 +24,10 @@
21
24
  "enabled": true
22
25
  },
23
26
  "rope": {
24
- "ropeTheta": 1000000
27
+ "ropeTheta": 10000000,
28
+ "mropeInterleaved": true,
29
+ "mropeSection": [11, 11, 10],
30
+ "partialRotaryFactor": 0.25
25
31
  }
26
32
  },
27
33
 
@@ -34,6 +40,7 @@
34
40
 
35
41
  "detection": {
36
42
  "architecturePatterns": ["qwen3", "qwen3_5", "Qwen3ForCausalLM", "Qwen3_5ForCausalLM", "Qwen2ForCausalLM"],
43
+ "modelTypePatterns": ["qwen3_5", "qwen3_5_text", "qwen2"],
37
44
  "configPatterns": {
38
45
  "model_type": "qwen2"
39
46
  }
@@ -21,6 +21,11 @@
21
21
  "activation": "silu",
22
22
  "gatedActivation": true
23
23
  },
24
+ "rope": {
25
+ "mropeInterleaved": false,
26
+ "mropeSection": null,
27
+ "partialRotaryFactor": null
28
+ },
24
29
  "output": {
25
30
  "finalLogitSoftcapping": null,
26
31
  "tieWordEmbeddings": false
@@ -50,6 +50,9 @@ function createValidInferenceFixture() {
50
50
  ropeScalingFactor: 1.0,
51
51
  ropeScalingType: null,
52
52
  ropeLocalTheta: null,
53
+ mropeInterleaved: false,
54
+ mropeSection: null,
55
+ partialRotaryFactor: null,
53
56
  yarnBetaFast: null,
54
57
  yarnBetaSlow: null,
55
58
  yarnOriginalMaxPos: null,
@@ -94,6 +97,9 @@ const FIELD_CASES = Object.freeze([
94
97
  { kind: 'nonNullable', path: ['rope', 'ropeScalingFactor'], message: 'rope.ropeScalingFactor is required' },
95
98
  { kind: 'nullable', path: ['rope', 'ropeScalingType'], message: 'rope.ropeScalingType must be explicitly set' },
96
99
  { kind: 'nullable', path: ['rope', 'ropeLocalTheta'], message: 'rope.ropeLocalTheta must be explicitly set' },
100
+ { kind: 'nonNullable', path: ['rope', 'mropeInterleaved'], message: 'rope.mropeInterleaved is required' },
101
+ { kind: 'nullable', path: ['rope', 'mropeSection'], message: 'rope.mropeSection must be explicitly set' },
102
+ { kind: 'nullable', path: ['rope', 'partialRotaryFactor'], message: 'rope.partialRotaryFactor must be explicitly set' },
97
103
  { kind: 'nullable', path: ['rope', 'yarnBetaFast'], message: 'rope.yarnBetaFast must be explicitly set' },
98
104
  { kind: 'nullable', path: ['rope', 'yarnBetaSlow'], message: 'rope.yarnBetaSlow must be explicitly set' },
99
105
  { kind: 'nullable', path: ['rope', 'yarnOriginalMaxPos'], message: 'rope.yarnOriginalMaxPos must be explicitly set' },
@@ -165,6 +165,9 @@ export const DEFAULT_PRESET_INFERENCE_CONFIG = {
165
165
  rope: {
166
166
  ropeTheta: 10000,
167
167
  ropeLocalTheta: null,
168
+ mropeInterleaved: false,
169
+ mropeSection: null,
170
+ partialRotaryFactor: null,
168
171
  ropeScalingType: null,
169
172
  ropeScalingFactor: 1.0,
170
173
  ropeLocalScalingType: null,
@@ -18,6 +18,15 @@ export interface RoPEConfigSchema {
18
18
  /** Local RoPE theta for sliding window layers (Gemma 3 uses 10000) */
19
19
  ropeLocalTheta?: number;
20
20
 
21
+ /** Apply adjacent-pair rotary layout instead of rotate-half layout. */
22
+ mropeInterleaved?: boolean;
23
+
24
+ /** mRoPE section sizes before the Qwen doubling step. */
25
+ mropeSection?: number[] | null;
26
+
27
+ /** Fraction of the head dimension that participates in rotary embedding. */
28
+ partialRotaryFactor?: number | null;
29
+
21
30
  /** RoPE scaling type */
22
31
  ropeScalingType?: 'linear' | 'dynamic' | 'yarn' | null;
23
32
 
@@ -105,6 +105,12 @@ export interface KernelPathSchema {
105
105
  /** KV cache dtype for this path; defaults to activationDtype when omitted. */
106
106
  kvDtype?: string;
107
107
 
108
+ /**
109
+ * Explicit widening target used by the finiteness fallback execution plan.
110
+ * Required for inline/generated kernel paths that do not have a stable registry id.
111
+ */
112
+ finitenessFallbackKernelPathId?: string;
113
+
108
114
  /**
109
115
  * Prefill phase kernel sequence (M > 1).
110
116
  * If not specified, uses decode with batched variants.
@@ -217,6 +217,12 @@ export interface ManifestRoPESchema {
217
217
  ropeTheta: number;
218
218
  /** Local theta for sliding window layers (null = same as ropeTheta) */
219
219
  ropeLocalTheta: number | null;
220
+ /** Use adjacent-pair rotary layout instead of rotate-half layout. */
221
+ mropeInterleaved: boolean;
222
+ /** mRoPE section sizes before the Qwen doubling step. */
223
+ mropeSection: number[] | null;
224
+ /** Fraction of the head dimension that participates in rotary embedding. */
225
+ partialRotaryFactor: number | null;
220
226
  /** RoPE scaling type (null = no scaling, 'linear', 'dynamic', 'yarn') */
221
227
  ropeScalingType: string | null;
222
228
  /** RoPE scaling factor (1.0 if no scaling) */
@@ -62,6 +62,9 @@ export const DEFAULT_MANIFEST_INFERENCE = {
62
62
  rope: {
63
63
  ropeTheta: 10000,
64
64
  ropeLocalTheta: null, // Same as ropeTheta (null = use ropeTheta)
65
+ mropeInterleaved: false,
66
+ mropeSection: null,
67
+ partialRotaryFactor: null,
65
68
  ropeScalingType: null, // No scaling (null = disabled)
66
69
  ropeScalingFactor: 1.0,
67
70
  ropeLocalScalingType: null, // Local scaling policy (null = no scaling)
@@ -6,10 +6,26 @@ function asObject(value) {
6
6
  }
7
7
 
8
8
  function asFiniteNumber(value) {
9
+ if (value == null || value === '') {
10
+ return null;
11
+ }
9
12
  const parsed = Number(value);
10
13
  return Number.isFinite(parsed) ? parsed : null;
11
14
  }
12
15
 
16
+ function asBoolean(value) {
17
+ return typeof value === 'boolean' ? value : null;
18
+ }
19
+
20
+ function asNumberArray(value) {
21
+ if (!Array.isArray(value)) return null;
22
+ const normalized = value.map((entry) => asFiniteNumber(entry));
23
+ if (normalized.some((entry) => entry == null || entry <= 0)) {
24
+ return null;
25
+ }
26
+ return normalized.map((entry) => Math.trunc(entry));
27
+ }
28
+
13
29
  function normalizeRoPEType(value) {
14
30
  if (typeof value !== 'string') return null;
15
31
  const normalized = value.trim().toLowerCase();
@@ -125,6 +141,13 @@ function failOnConflictingScaling(sourceLabel, canonicalScaling, candidateScalin
125
141
  export function buildRoPEConfig(presetInference, config) {
126
142
  const ropeScaling = asObject(config.rope_scaling);
127
143
  const ropeParameters = asObject(config.rope_parameters);
144
+ const flatRoPEParameters = (
145
+ ropeParameters
146
+ && !asObject(ropeParameters.full_attention)
147
+ && !asObject(ropeParameters.sliding_attention)
148
+ )
149
+ ? ropeParameters
150
+ : null;
128
151
  const fullAttentionRoPE = asObject(ropeParameters?.full_attention);
129
152
  const slidingAttentionRoPE = asObject(ropeParameters?.sliding_attention);
130
153
  const presetRoPE = presetInference.rope ?? {};
@@ -164,6 +187,11 @@ export function buildRoPEConfig(presetInference, config) {
164
187
  strictMissingTypeAndFactor: false,
165
188
  sourceLabel: 'HF config rope_parameters.full_attention',
166
189
  });
190
+ } else if (flatRoPEParameters) {
191
+ globalScaling = resolveScalingConfig(flatRoPEParameters, {
192
+ strictMissingTypeAndFactor: false,
193
+ sourceLabel: 'HF config rope_parameters',
194
+ });
167
195
  }
168
196
 
169
197
  const hasPresetLocalScaling = presetRoPE.ropeLocalScalingType !== undefined
@@ -192,6 +220,7 @@ export function buildRoPEConfig(presetInference, config) {
192
220
  // HF config is source of truth for ropeTheta when provided:
193
221
  // prefer rope_parameters.full_attention.rope_theta, then rope_theta.
194
222
  const ropeTheta = asFiniteNumber(fullAttentionRoPE?.rope_theta)
223
+ ?? asFiniteNumber(flatRoPEParameters?.rope_theta)
195
224
  ?? asFiniteNumber(config.rope_theta)
196
225
  ?? presetInference.rope?.ropeTheta
197
226
  ?? 10000;
@@ -201,9 +230,22 @@ export function buildRoPEConfig(presetInference, config) {
201
230
  ?? presetInference.rope?.ropeLocalTheta
202
231
  ?? null;
203
232
 
233
+ const mropeInterleaved = asBoolean(flatRoPEParameters?.mrope_interleaved)
234
+ ?? presetInference.rope?.mropeInterleaved
235
+ ?? false;
236
+ const mropeSection = asNumberArray(flatRoPEParameters?.mrope_section)
237
+ ?? presetInference.rope?.mropeSection
238
+ ?? null;
239
+ const partialRotaryFactor = asFiniteNumber(flatRoPEParameters?.partial_rotary_factor)
240
+ ?? asFiniteNumber(presetInference.rope?.partialRotaryFactor)
241
+ ?? null;
242
+
204
243
  return {
205
244
  ropeTheta,
206
245
  ropeLocalTheta,
246
+ mropeInterleaved,
247
+ mropeSection,
248
+ partialRotaryFactor,
207
249
  ropeScalingType: globalScaling.ropeScalingType,
208
250
  ropeScalingFactor: globalScaling.ropeScalingFactor,
209
251
  yarnBetaFast: globalScaling.yarnBetaFast,