@simulatte/doppler 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. package/README.md +26 -10
  2. package/package.json +30 -6
  3. package/src/client/doppler-api.browser.d.ts +1 -0
  4. package/src/client/doppler-api.browser.js +288 -0
  5. package/src/client/doppler-api.js +1 -1
  6. package/src/client/doppler-provider/types.js +1 -1
  7. package/src/config/execution-contract-check.d.ts +33 -0
  8. package/src/config/execution-contract-check.js +72 -0
  9. package/src/config/execution-v0-contract-check.d.ts +94 -0
  10. package/src/config/execution-v0-contract-check.js +251 -0
  11. package/src/config/execution-v0-graph-contract-check.d.ts +20 -0
  12. package/src/config/execution-v0-graph-contract-check.js +64 -0
  13. package/src/config/kernel-path-contract-check.d.ts +76 -0
  14. package/src/config/kernel-path-contract-check.js +479 -0
  15. package/src/config/kernel-path-loader.d.ts +16 -0
  16. package/src/config/kernel-path-loader.js +54 -0
  17. package/src/config/kernels/kernel-ref-digests.js +39 -27
  18. package/src/config/kernels/registry.json +598 -2
  19. package/src/config/loader.js +81 -48
  20. package/src/config/merge-contract-check.d.ts +16 -0
  21. package/src/config/merge-contract-check.js +321 -0
  22. package/src/config/merge-helpers.d.ts +58 -0
  23. package/src/config/merge-helpers.js +54 -0
  24. package/src/config/merge.js +21 -6
  25. package/src/config/presets/models/janus-text.json +2 -0
  26. package/src/config/presets/models/qwen3.json +9 -2
  27. package/src/config/presets/models/transformer.json +5 -0
  28. package/src/config/quantization-contract-check.d.ts +12 -0
  29. package/src/config/quantization-contract-check.js +91 -0
  30. package/src/config/required-inference-fields-contract-check.d.ts +24 -0
  31. package/src/config/required-inference-fields-contract-check.js +237 -0
  32. package/src/config/schema/browser-suite-metrics.schema.d.ts +17 -0
  33. package/src/config/schema/browser-suite-metrics.schema.js +46 -0
  34. package/src/config/schema/conversion-report.schema.d.ts +40 -0
  35. package/src/config/schema/conversion-report.schema.js +108 -0
  36. package/src/config/schema/doppler.schema.js +12 -18
  37. package/src/config/schema/index.d.ts +22 -0
  38. package/src/config/schema/index.js +18 -0
  39. package/src/config/schema/inference-defaults.schema.js +3 -0
  40. package/src/config/schema/inference.schema.d.ts +9 -0
  41. package/src/config/schema/kernel-path.schema.d.ts +6 -0
  42. package/src/config/schema/manifest.schema.d.ts +6 -0
  43. package/src/config/schema/manifest.schema.js +3 -0
  44. package/src/converter/core.d.ts +10 -0
  45. package/src/converter/core.js +27 -2
  46. package/src/converter/parsers/diffusion.js +63 -3
  47. package/src/converter/rope-config.js +42 -0
  48. package/src/gpu/device.js +58 -0
  49. package/src/gpu/kernels/attention.js +98 -0
  50. package/src/gpu/kernels/bias_add.wgsl +8 -6
  51. package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
  52. package/src/gpu/kernels/conv2d.js +1 -1
  53. package/src/gpu/kernels/conv2d.wgsl +7 -8
  54. package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
  55. package/src/gpu/kernels/depthwise_conv2d.d.ts +29 -0
  56. package/src/gpu/kernels/depthwise_conv2d.js +99 -0
  57. package/src/gpu/kernels/depthwise_conv2d.wgsl +55 -0
  58. package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +59 -0
  59. package/src/gpu/kernels/grouped_pointwise_conv2d.d.ts +27 -0
  60. package/src/gpu/kernels/grouped_pointwise_conv2d.js +93 -0
  61. package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +44 -0
  62. package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +48 -0
  63. package/src/gpu/kernels/index.d.ts +30 -0
  64. package/src/gpu/kernels/index.js +25 -0
  65. package/src/gpu/kernels/matmul.js +25 -0
  66. package/src/gpu/kernels/pixel_shuffle.js +1 -1
  67. package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
  68. package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
  69. package/src/gpu/kernels/relu.d.ts +18 -0
  70. package/src/gpu/kernels/relu.js +58 -0
  71. package/src/gpu/kernels/relu.wgsl +22 -0
  72. package/src/gpu/kernels/relu_f16.wgsl +24 -0
  73. package/src/gpu/kernels/repeat_channels.d.ts +21 -0
  74. package/src/gpu/kernels/repeat_channels.js +60 -0
  75. package/src/gpu/kernels/repeat_channels.wgsl +28 -0
  76. package/src/gpu/kernels/repeat_channels_f16.wgsl +30 -0
  77. package/src/gpu/kernels/residual.js +44 -8
  78. package/src/gpu/kernels/residual.wgsl +6 -3
  79. package/src/gpu/kernels/residual_f16.wgsl +2 -1
  80. package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
  81. package/src/gpu/kernels/residual_vec4.wgsl +2 -1
  82. package/src/gpu/kernels/rmsnorm.js +58 -6
  83. package/src/gpu/kernels/rmsnorm.wgsl +14 -6
  84. package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
  85. package/src/gpu/kernels/rope.d.ts +2 -0
  86. package/src/gpu/kernels/rope.js +11 -1
  87. package/src/gpu/kernels/rope.wgsl +56 -40
  88. package/src/gpu/kernels/sana_linear_attention.d.ts +27 -0
  89. package/src/gpu/kernels/sana_linear_attention.js +121 -0
  90. package/src/gpu/kernels/sana_linear_attention_apply.wgsl +43 -0
  91. package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +46 -0
  92. package/src/gpu/kernels/sana_linear_attention_summary.wgsl +51 -0
  93. package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +53 -0
  94. package/src/gpu/kernels/silu.d.ts +1 -0
  95. package/src/gpu/kernels/silu.js +32 -14
  96. package/src/gpu/kernels/silu.wgsl +19 -9
  97. package/src/gpu/kernels/silu_f16.wgsl +19 -9
  98. package/src/gpu/kernels/transpose.js +15 -2
  99. package/src/gpu/kernels/transpose.wgsl +5 -6
  100. package/src/gpu/kernels/upsample2d.js +2 -1
  101. package/src/gpu/kernels/upsample2d.wgsl +6 -9
  102. package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
  103. package/src/gpu/kernels/utils.js +16 -1
  104. package/src/index-browser.d.ts +1 -1
  105. package/src/index-browser.js +2 -2
  106. package/src/index.js +1 -1
  107. package/src/inference/browser-harness.js +109 -23
  108. package/src/inference/pipelines/diffusion/init.js +14 -0
  109. package/src/inference/pipelines/diffusion/pipeline.js +215 -77
  110. package/src/inference/pipelines/diffusion/sana-transformer.d.ts +53 -0
  111. package/src/inference/pipelines/diffusion/sana-transformer.js +738 -0
  112. package/src/inference/pipelines/diffusion/scheduler.d.ts +17 -1
  113. package/src/inference/pipelines/diffusion/scheduler.js +91 -3
  114. package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +11 -4
  115. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +282 -0
  116. package/src/inference/pipelines/diffusion/text-encoder.js +18 -1
  117. package/src/inference/pipelines/diffusion/types.d.ts +4 -0
  118. package/src/inference/pipelines/diffusion/vae.js +782 -78
  119. package/src/inference/pipelines/text/attention/record.js +11 -2
  120. package/src/inference/pipelines/text/attention/run.js +11 -2
  121. package/src/inference/pipelines/text/chat-format.js +25 -1
  122. package/src/inference/pipelines/text/config.d.ts +9 -0
  123. package/src/inference/pipelines/text/config.js +69 -2
  124. package/src/inference/pipelines/text/execution-plan.js +23 -31
  125. package/src/inference/pipelines/text/execution-v0.js +43 -95
  126. package/src/inference/pipelines/text/ffn/standard.js +3 -0
  127. package/src/inference/pipelines/text/init.d.ts +4 -0
  128. package/src/inference/pipelines/text/init.js +56 -9
  129. package/src/inference/pipelines/text/layer.js +11 -0
  130. package/src/inference/pipelines/text.js +4 -0
  131. package/src/inference/tokenizers/bundled.js +156 -33
  132. package/src/rules/execution-rules-contract-check.d.ts +17 -0
  133. package/src/rules/execution-rules-contract-check.js +245 -0
  134. package/src/rules/kernels/depthwise-conv2d.rules.json +6 -0
  135. package/src/rules/kernels/grouped-pointwise-conv2d.rules.json +6 -0
  136. package/src/rules/kernels/relu.rules.json +6 -0
  137. package/src/rules/kernels/repeat-channels.rules.json +6 -0
  138. package/src/rules/kernels/sana-linear-attention.rules.json +6 -0
  139. package/src/rules/layer-pattern-contract-check.d.ts +17 -0
  140. package/src/rules/layer-pattern-contract-check.js +231 -0
  141. package/src/rules/rule-registry.d.ts +28 -0
  142. package/src/rules/rule-registry.js +38 -0
  143. package/src/rules/tooling/command-runtime.rules.json +18 -0
  144. package/src/tooling/command-api.d.ts +27 -1
  145. package/src/tooling/command-api.js +142 -3
  146. package/src/tooling/conversion-config-materializer.d.ts +24 -0
  147. package/src/tooling/conversion-config-materializer.js +99 -0
  148. package/src/tooling/lean-execution-contract-runner.d.ts +43 -0
  149. package/src/tooling/lean-execution-contract-runner.js +158 -0
  150. package/src/tooling/node-browser-command-runner.d.ts +4 -0
  151. package/src/tooling/node-browser-command-runner.js +58 -3
  152. package/src/tooling/node-command-runner.js +15 -0
  153. package/src/tooling/node-convert.d.ts +10 -0
  154. package/src/tooling/node-converter.js +59 -0
  155. package/src/tooling/node-webgpu.js +11 -89
  156. package/src/training/checkpoint-watch.d.ts +7 -0
  157. package/src/training/checkpoint-watch.js +106 -0
  158. package/src/training/checkpoint.d.ts +6 -1
  159. package/src/training/checkpoint.js +12 -2
  160. package/src/training/distillation/artifacts.d.ts +71 -0
  161. package/src/training/distillation/artifacts.js +132 -0
  162. package/src/training/distillation/checkpoint-watch.d.ts +10 -0
  163. package/src/training/distillation/checkpoint-watch.js +57 -0
  164. package/src/training/distillation/dataset.d.ts +59 -0
  165. package/src/training/distillation/dataset.js +337 -0
  166. package/src/training/distillation/eval.d.ts +34 -0
  167. package/src/training/distillation/eval.js +310 -0
  168. package/src/training/distillation/index.d.ts +29 -0
  169. package/src/training/distillation/index.js +29 -0
  170. package/src/training/distillation/runtime.d.ts +20 -0
  171. package/src/training/distillation/runtime.js +121 -0
  172. package/src/training/distillation/scoreboard.d.ts +6 -0
  173. package/src/training/distillation/scoreboard.js +8 -0
  174. package/src/training/distillation/stage-a.d.ts +45 -0
  175. package/src/training/distillation/stage-a.js +338 -0
  176. package/src/training/distillation/stage-b.d.ts +24 -0
  177. package/src/training/distillation/stage-b.js +20 -0
  178. package/src/training/index.d.ts +10 -0
  179. package/src/training/index.js +10 -0
  180. package/src/training/lora-pipeline.d.ts +40 -0
  181. package/src/training/lora-pipeline.js +796 -0
  182. package/src/training/operator-artifacts.d.ts +62 -0
  183. package/src/training/operator-artifacts.js +140 -0
  184. package/src/training/operator-command.d.ts +5 -0
  185. package/src/training/operator-command.js +453 -0
  186. package/src/training/operator-eval.d.ts +48 -0
  187. package/src/training/operator-eval.js +230 -0
  188. package/src/training/operator-scoreboard.d.ts +5 -0
  189. package/src/training/operator-scoreboard.js +44 -0
  190. package/src/training/runner.d.ts +52 -0
  191. package/src/training/runner.js +29 -4
  192. package/src/training/suite.d.ts +112 -0
  193. package/src/training/suite.js +9 -9
  194. package/src/training/workloads.d.ts +164 -0
  195. package/src/training/workloads.js +539 -0
  196. package/src/version.d.ts +2 -0
  197. package/src/version.js +2 -0
  198. package/tools/convert-safetensors-node.js +47 -0
  199. package/tools/doppler-cli.js +252 -41
@@ -1826,7 +1826,7 @@
1826
1826
  }
1827
1827
  ],
1828
1828
  "baseUniforms": {
1829
- "size": 16,
1829
+ "size": 32,
1830
1830
  "fields": [
1831
1831
  {
1832
1832
  "name": "hidden_size",
@@ -1847,6 +1847,26 @@
1847
1847
  "name": "has_residual",
1848
1848
  "type": "u32",
1849
1849
  "offset": 12
1850
+ },
1851
+ {
1852
+ "name": "token_stride",
1853
+ "type": "u32",
1854
+ "offset": 16
1855
+ },
1856
+ {
1857
+ "name": "_pad0",
1858
+ "type": "u32",
1859
+ "offset": 20
1860
+ },
1861
+ {
1862
+ "name": "_pad1",
1863
+ "type": "u32",
1864
+ "offset": 24
1865
+ },
1866
+ {
1867
+ "name": "_pad2",
1868
+ "type": "u32",
1869
+ "offset": 28
1850
1870
  }
1851
1871
  ]
1852
1872
  },
@@ -3637,7 +3657,7 @@
3637
3657
  }
3638
3658
  ],
3639
3659
  "baseUniforms": {
3640
- "size": 16,
3660
+ "size": 32,
3641
3661
  "fields": [
3642
3662
  {
3643
3663
  "name": "num_tokens",
@@ -4117,6 +4137,26 @@
4117
4137
  "name": "bias_offset",
4118
4138
  "type": "u32",
4119
4139
  "offset": 12
4140
+ },
4141
+ {
4142
+ "name": "token_stride",
4143
+ "type": "u32",
4144
+ "offset": 16
4145
+ },
4146
+ {
4147
+ "name": "_pad0",
4148
+ "type": "u32",
4149
+ "offset": 20
4150
+ },
4151
+ {
4152
+ "name": "_pad1",
4153
+ "type": "u32",
4154
+ "offset": 24
4155
+ },
4156
+ {
4157
+ "name": "_pad2",
4158
+ "type": "u32",
4159
+ "offset": 28
4120
4160
  }
4121
4161
  ]
4122
4162
  },
@@ -5537,6 +5577,228 @@
5537
5577
  }
5538
5578
  }
5539
5579
  },
5580
+ "depthwise_conv2d": {
5581
+ "description": "Depthwise Conv2D (NCHW)",
5582
+ "baseBindings": [
5583
+ {
5584
+ "index": 0,
5585
+ "name": "uniforms",
5586
+ "type": "uniform"
5587
+ },
5588
+ {
5589
+ "index": 1,
5590
+ "name": "input",
5591
+ "type": "read-only-storage"
5592
+ },
5593
+ {
5594
+ "index": 2,
5595
+ "name": "weight",
5596
+ "type": "read-only-storage"
5597
+ },
5598
+ {
5599
+ "index": 3,
5600
+ "name": "bias",
5601
+ "type": "read-only-storage"
5602
+ },
5603
+ {
5604
+ "index": 4,
5605
+ "name": "output",
5606
+ "type": "storage"
5607
+ }
5608
+ ],
5609
+ "baseUniforms": {
5610
+ "size": 48,
5611
+ "fields": [
5612
+ {
5613
+ "name": "channels",
5614
+ "type": "u32",
5615
+ "offset": 0
5616
+ },
5617
+ {
5618
+ "name": "height",
5619
+ "type": "u32",
5620
+ "offset": 4
5621
+ },
5622
+ {
5623
+ "name": "width",
5624
+ "type": "u32",
5625
+ "offset": 8
5626
+ },
5627
+ {
5628
+ "name": "out_height",
5629
+ "type": "u32",
5630
+ "offset": 12
5631
+ },
5632
+ {
5633
+ "name": "out_width",
5634
+ "type": "u32",
5635
+ "offset": 16
5636
+ },
5637
+ {
5638
+ "name": "kernel_h",
5639
+ "type": "u32",
5640
+ "offset": 20
5641
+ },
5642
+ {
5643
+ "name": "kernel_w",
5644
+ "type": "u32",
5645
+ "offset": 24
5646
+ },
5647
+ {
5648
+ "name": "stride",
5649
+ "type": "u32",
5650
+ "offset": 28
5651
+ },
5652
+ {
5653
+ "name": "pad",
5654
+ "type": "u32",
5655
+ "offset": 32
5656
+ },
5657
+ {
5658
+ "name": "_pad0",
5659
+ "type": "u32",
5660
+ "offset": 36
5661
+ },
5662
+ {
5663
+ "name": "_pad1",
5664
+ "type": "u32",
5665
+ "offset": 40
5666
+ },
5667
+ {
5668
+ "name": "_pad2",
5669
+ "type": "u32",
5670
+ "offset": 44
5671
+ }
5672
+ ]
5673
+ },
5674
+ "variants": {
5675
+ "default": {
5676
+ "wgsl": "depthwise_conv2d.wgsl",
5677
+ "entryPoint": "main",
5678
+ "workgroup": [
5679
+ 256,
5680
+ 1,
5681
+ 1
5682
+ ],
5683
+ "requires": [],
5684
+ "outputDtype": "f32"
5685
+ },
5686
+ "default_f16": {
5687
+ "wgsl": "depthwise_conv2d_f16.wgsl",
5688
+ "entryPoint": "main",
5689
+ "workgroup": [
5690
+ 256,
5691
+ 1,
5692
+ 1
5693
+ ],
5694
+ "requires": [
5695
+ "shader-f16"
5696
+ ],
5697
+ "outputDtype": "f16"
5698
+ }
5699
+ }
5700
+ },
5701
+ "grouped_pointwise_conv2d": {
5702
+ "description": "Grouped Pointwise Conv2D (NCHW)",
5703
+ "baseBindings": [
5704
+ {
5705
+ "index": 0,
5706
+ "name": "uniforms",
5707
+ "type": "uniform"
5708
+ },
5709
+ {
5710
+ "index": 1,
5711
+ "name": "input",
5712
+ "type": "read-only-storage"
5713
+ },
5714
+ {
5715
+ "index": 2,
5716
+ "name": "weight",
5717
+ "type": "read-only-storage"
5718
+ },
5719
+ {
5720
+ "index": 3,
5721
+ "name": "bias",
5722
+ "type": "read-only-storage"
5723
+ },
5724
+ {
5725
+ "index": 4,
5726
+ "name": "output",
5727
+ "type": "storage"
5728
+ }
5729
+ ],
5730
+ "baseUniforms": {
5731
+ "size": 32,
5732
+ "fields": [
5733
+ {
5734
+ "name": "in_channels",
5735
+ "type": "u32",
5736
+ "offset": 0
5737
+ },
5738
+ {
5739
+ "name": "out_channels",
5740
+ "type": "u32",
5741
+ "offset": 4
5742
+ },
5743
+ {
5744
+ "name": "height",
5745
+ "type": "u32",
5746
+ "offset": 8
5747
+ },
5748
+ {
5749
+ "name": "width",
5750
+ "type": "u32",
5751
+ "offset": 12
5752
+ },
5753
+ {
5754
+ "name": "groups",
5755
+ "type": "u32",
5756
+ "offset": 16
5757
+ },
5758
+ {
5759
+ "name": "_pad0",
5760
+ "type": "u32",
5761
+ "offset": 20
5762
+ },
5763
+ {
5764
+ "name": "_pad1",
5765
+ "type": "u32",
5766
+ "offset": 24
5767
+ },
5768
+ {
5769
+ "name": "_pad2",
5770
+ "type": "u32",
5771
+ "offset": 28
5772
+ }
5773
+ ]
5774
+ },
5775
+ "variants": {
5776
+ "default": {
5777
+ "wgsl": "grouped_pointwise_conv2d.wgsl",
5778
+ "entryPoint": "main",
5779
+ "workgroup": [
5780
+ 256,
5781
+ 1,
5782
+ 1
5783
+ ],
5784
+ "requires": [],
5785
+ "outputDtype": "f32"
5786
+ },
5787
+ "default_f16": {
5788
+ "wgsl": "grouped_pointwise_conv2d_f16.wgsl",
5789
+ "entryPoint": "main",
5790
+ "workgroup": [
5791
+ 256,
5792
+ 1,
5793
+ 1
5794
+ ],
5795
+ "requires": [
5796
+ "shader-f16"
5797
+ ],
5798
+ "outputDtype": "f16"
5799
+ }
5800
+ }
5801
+ },
5540
5802
  "groupnorm_stats": {
5541
5803
  "description": "GroupNorm stats (mean/var) for each group",
5542
5804
  "baseBindings": [
@@ -6424,6 +6686,340 @@
6424
6686
  }
6425
6687
  }
6426
6688
  },
6689
+ "sana_linear_attention_summary": {
6690
+ "description": "Sana linear attention summary stage",
6691
+ "baseBindings": [
6692
+ {
6693
+ "index": 0,
6694
+ "name": "uniforms",
6695
+ "type": "uniform"
6696
+ },
6697
+ {
6698
+ "index": 1,
6699
+ "name": "query",
6700
+ "type": "read-only-storage"
6701
+ },
6702
+ {
6703
+ "index": 2,
6704
+ "name": "key",
6705
+ "type": "read-only-storage"
6706
+ },
6707
+ {
6708
+ "index": 3,
6709
+ "name": "value",
6710
+ "type": "read-only-storage"
6711
+ },
6712
+ {
6713
+ "index": 4,
6714
+ "name": "summary",
6715
+ "type": "storage"
6716
+ }
6717
+ ],
6718
+ "baseUniforms": {
6719
+ "size": 24,
6720
+ "fields": [
6721
+ {
6722
+ "name": "num_heads",
6723
+ "type": "u32",
6724
+ "offset": 0
6725
+ },
6726
+ {
6727
+ "name": "head_dim",
6728
+ "type": "u32",
6729
+ "offset": 4
6730
+ },
6731
+ {
6732
+ "name": "num_tokens",
6733
+ "type": "u32",
6734
+ "offset": 8
6735
+ },
6736
+ {
6737
+ "name": "hidden_size",
6738
+ "type": "u32",
6739
+ "offset": 12
6740
+ },
6741
+ {
6742
+ "name": "_pad0",
6743
+ "type": "u32",
6744
+ "offset": 16
6745
+ },
6746
+ {
6747
+ "name": "_pad1",
6748
+ "type": "u32",
6749
+ "offset": 20
6750
+ }
6751
+ ]
6752
+ },
6753
+ "variants": {
6754
+ "default": {
6755
+ "wgsl": "sana_linear_attention_summary.wgsl",
6756
+ "entryPoint": "main",
6757
+ "workgroup": [
6758
+ 256,
6759
+ 1,
6760
+ 1
6761
+ ],
6762
+ "requires": [],
6763
+ "outputDtype": "f32"
6764
+ },
6765
+ "default_f16": {
6766
+ "wgsl": "sana_linear_attention_summary_f16.wgsl",
6767
+ "entryPoint": "main",
6768
+ "workgroup": [
6769
+ 256,
6770
+ 1,
6771
+ 1
6772
+ ],
6773
+ "requires": [
6774
+ "shader-f16"
6775
+ ],
6776
+ "outputDtype": "f32"
6777
+ }
6778
+ }
6779
+ },
6780
+ "sana_linear_attention_apply": {
6781
+ "description": "Sana linear attention apply stage",
6782
+ "baseBindings": [
6783
+ {
6784
+ "index": 0,
6785
+ "name": "uniforms",
6786
+ "type": "uniform"
6787
+ },
6788
+ {
6789
+ "index": 1,
6790
+ "name": "query",
6791
+ "type": "read-only-storage"
6792
+ },
6793
+ {
6794
+ "index": 2,
6795
+ "name": "summary",
6796
+ "type": "read-only-storage"
6797
+ },
6798
+ {
6799
+ "index": 3,
6800
+ "name": "output",
6801
+ "type": "storage"
6802
+ }
6803
+ ],
6804
+ "baseUniforms": {
6805
+ "size": 32,
6806
+ "fields": [
6807
+ {
6808
+ "name": "num_heads",
6809
+ "type": "u32",
6810
+ "offset": 0
6811
+ },
6812
+ {
6813
+ "name": "head_dim",
6814
+ "type": "u32",
6815
+ "offset": 4
6816
+ },
6817
+ {
6818
+ "name": "num_tokens",
6819
+ "type": "u32",
6820
+ "offset": 8
6821
+ },
6822
+ {
6823
+ "name": "hidden_size",
6824
+ "type": "u32",
6825
+ "offset": 12
6826
+ },
6827
+ {
6828
+ "name": "eps",
6829
+ "type": "f32",
6830
+ "offset": 16
6831
+ },
6832
+ {
6833
+ "name": "_pad0",
6834
+ "type": "u32",
6835
+ "offset": 20
6836
+ },
6837
+ {
6838
+ "name": "_pad1",
6839
+ "type": "u32",
6840
+ "offset": 24
6841
+ },
6842
+ {
6843
+ "name": "_pad2",
6844
+ "type": "u32",
6845
+ "offset": 28
6846
+ }
6847
+ ]
6848
+ },
6849
+ "variants": {
6850
+ "default": {
6851
+ "wgsl": "sana_linear_attention_apply.wgsl",
6852
+ "entryPoint": "main",
6853
+ "workgroup": [
6854
+ 256,
6855
+ 1,
6856
+ 1
6857
+ ],
6858
+ "requires": [],
6859
+ "outputDtype": "f32"
6860
+ },
6861
+ "default_f16": {
6862
+ "wgsl": "sana_linear_attention_apply_f16.wgsl",
6863
+ "entryPoint": "main",
6864
+ "workgroup": [
6865
+ 256,
6866
+ 1,
6867
+ 1
6868
+ ],
6869
+ "requires": [
6870
+ "shader-f16"
6871
+ ],
6872
+ "outputDtype": "f16"
6873
+ }
6874
+ }
6875
+ },
6876
+ "repeat_channels": {
6877
+ "description": "Repeat channels along the NCHW channel axis",
6878
+ "baseBindings": [
6879
+ {
6880
+ "index": 0,
6881
+ "name": "uniforms",
6882
+ "type": "uniform"
6883
+ },
6884
+ {
6885
+ "index": 1,
6886
+ "name": "input",
6887
+ "type": "read-only-storage"
6888
+ },
6889
+ {
6890
+ "index": 2,
6891
+ "name": "output",
6892
+ "type": "storage"
6893
+ }
6894
+ ],
6895
+ "baseUniforms": {
6896
+ "size": 20,
6897
+ "fields": [
6898
+ {
6899
+ "name": "in_channels",
6900
+ "type": "u32",
6901
+ "offset": 0
6902
+ },
6903
+ {
6904
+ "name": "height",
6905
+ "type": "u32",
6906
+ "offset": 4
6907
+ },
6908
+ {
6909
+ "name": "width",
6910
+ "type": "u32",
6911
+ "offset": 8
6912
+ },
6913
+ {
6914
+ "name": "repeats",
6915
+ "type": "u32",
6916
+ "offset": 12
6917
+ },
6918
+ {
6919
+ "name": "_pad0",
6920
+ "type": "u32",
6921
+ "offset": 16
6922
+ }
6923
+ ]
6924
+ },
6925
+ "variants": {
6926
+ "default": {
6927
+ "wgsl": "repeat_channels.wgsl",
6928
+ "entryPoint": "main",
6929
+ "workgroup": [
6930
+ 256,
6931
+ 1,
6932
+ 1
6933
+ ],
6934
+ "requires": [],
6935
+ "outputDtype": "f32"
6936
+ },
6937
+ "default_f16": {
6938
+ "wgsl": "repeat_channels_f16.wgsl",
6939
+ "entryPoint": "main",
6940
+ "workgroup": [
6941
+ 256,
6942
+ 1,
6943
+ 1
6944
+ ],
6945
+ "requires": [
6946
+ "shader-f16"
6947
+ ],
6948
+ "outputDtype": "f16"
6949
+ }
6950
+ }
6951
+ },
6952
+ "relu": {
6953
+ "description": "ReLU activation",
6954
+ "baseBindings": [
6955
+ {
6956
+ "index": 0,
6957
+ "name": "uniforms",
6958
+ "type": "uniform"
6959
+ },
6960
+ {
6961
+ "index": 1,
6962
+ "name": "input",
6963
+ "type": "read-only-storage"
6964
+ },
6965
+ {
6966
+ "index": 2,
6967
+ "name": "output",
6968
+ "type": "storage"
6969
+ }
6970
+ ],
6971
+ "baseUniforms": {
6972
+ "size": 16,
6973
+ "fields": [
6974
+ {
6975
+ "name": "size",
6976
+ "type": "u32",
6977
+ "offset": 0
6978
+ },
6979
+ {
6980
+ "name": "_pad0",
6981
+ "type": "u32",
6982
+ "offset": 4
6983
+ },
6984
+ {
6985
+ "name": "_pad1",
6986
+ "type": "u32",
6987
+ "offset": 8
6988
+ },
6989
+ {
6990
+ "name": "_pad2",
6991
+ "type": "u32",
6992
+ "offset": 12
6993
+ }
6994
+ ]
6995
+ },
6996
+ "variants": {
6997
+ "default": {
6998
+ "wgsl": "relu.wgsl",
6999
+ "entryPoint": "main",
7000
+ "workgroup": [
7001
+ 256,
7002
+ 1,
7003
+ 1
7004
+ ],
7005
+ "requires": [],
7006
+ "outputDtype": "f32"
7007
+ },
7008
+ "default_f16": {
7009
+ "wgsl": "relu_f16.wgsl",
7010
+ "entryPoint": "main",
7011
+ "workgroup": [
7012
+ 256,
7013
+ 1,
7014
+ 1
7015
+ ],
7016
+ "requires": [
7017
+ "shader-f16"
7018
+ ],
7019
+ "outputDtype": "f16"
7020
+ }
7021
+ }
7022
+ },
6427
7023
  "conv2d_backward_input": {
6428
7024
  "description": "Conv2D backward (input gradient)",
6429
7025
  "baseBindings": [