@simulatte/doppler 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -10
- package/package.json +30 -6
- package/src/client/doppler-api.browser.d.ts +1 -0
- package/src/client/doppler-api.browser.js +288 -0
- package/src/client/doppler-api.js +1 -1
- package/src/client/doppler-provider/types.js +1 -1
- package/src/config/execution-contract-check.d.ts +33 -0
- package/src/config/execution-contract-check.js +72 -0
- package/src/config/execution-v0-contract-check.d.ts +94 -0
- package/src/config/execution-v0-contract-check.js +251 -0
- package/src/config/execution-v0-graph-contract-check.d.ts +20 -0
- package/src/config/execution-v0-graph-contract-check.js +64 -0
- package/src/config/kernel-path-contract-check.d.ts +76 -0
- package/src/config/kernel-path-contract-check.js +479 -0
- package/src/config/kernel-path-loader.d.ts +16 -0
- package/src/config/kernel-path-loader.js +54 -0
- package/src/config/kernels/kernel-ref-digests.js +39 -27
- package/src/config/kernels/registry.json +598 -2
- package/src/config/loader.js +81 -48
- package/src/config/merge-contract-check.d.ts +16 -0
- package/src/config/merge-contract-check.js +321 -0
- package/src/config/merge-helpers.d.ts +58 -0
- package/src/config/merge-helpers.js +54 -0
- package/src/config/merge.js +21 -6
- package/src/config/presets/models/janus-text.json +2 -0
- package/src/config/presets/models/qwen3.json +9 -2
- package/src/config/presets/models/transformer.json +5 -0
- package/src/config/quantization-contract-check.d.ts +12 -0
- package/src/config/quantization-contract-check.js +91 -0
- package/src/config/required-inference-fields-contract-check.d.ts +24 -0
- package/src/config/required-inference-fields-contract-check.js +237 -0
- package/src/config/schema/browser-suite-metrics.schema.d.ts +17 -0
- package/src/config/schema/browser-suite-metrics.schema.js +46 -0
- package/src/config/schema/conversion-report.schema.d.ts +40 -0
- package/src/config/schema/conversion-report.schema.js +108 -0
- package/src/config/schema/doppler.schema.js +12 -18
- package/src/config/schema/index.d.ts +22 -0
- package/src/config/schema/index.js +18 -0
- package/src/config/schema/inference-defaults.schema.js +3 -0
- package/src/config/schema/inference.schema.d.ts +9 -0
- package/src/config/schema/kernel-path.schema.d.ts +6 -0
- package/src/config/schema/manifest.schema.d.ts +6 -0
- package/src/config/schema/manifest.schema.js +3 -0
- package/src/converter/core.d.ts +10 -0
- package/src/converter/core.js +27 -2
- package/src/converter/parsers/diffusion.js +63 -3
- package/src/converter/rope-config.js +42 -0
- package/src/gpu/device.js +58 -0
- package/src/gpu/kernels/attention.js +98 -0
- package/src/gpu/kernels/bias_add.wgsl +8 -6
- package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
- package/src/gpu/kernels/conv2d.js +1 -1
- package/src/gpu/kernels/conv2d.wgsl +7 -8
- package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
- package/src/gpu/kernels/depthwise_conv2d.d.ts +29 -0
- package/src/gpu/kernels/depthwise_conv2d.js +99 -0
- package/src/gpu/kernels/depthwise_conv2d.wgsl +55 -0
- package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +59 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d.d.ts +27 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +93 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +44 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +48 -0
- package/src/gpu/kernels/index.d.ts +30 -0
- package/src/gpu/kernels/index.js +25 -0
- package/src/gpu/kernels/matmul.js +25 -0
- package/src/gpu/kernels/pixel_shuffle.js +1 -1
- package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
- package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
- package/src/gpu/kernels/relu.d.ts +18 -0
- package/src/gpu/kernels/relu.js +58 -0
- package/src/gpu/kernels/relu.wgsl +22 -0
- package/src/gpu/kernels/relu_f16.wgsl +24 -0
- package/src/gpu/kernels/repeat_channels.d.ts +21 -0
- package/src/gpu/kernels/repeat_channels.js +60 -0
- package/src/gpu/kernels/repeat_channels.wgsl +28 -0
- package/src/gpu/kernels/repeat_channels_f16.wgsl +30 -0
- package/src/gpu/kernels/residual.js +44 -8
- package/src/gpu/kernels/residual.wgsl +6 -3
- package/src/gpu/kernels/residual_f16.wgsl +2 -1
- package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
- package/src/gpu/kernels/residual_vec4.wgsl +2 -1
- package/src/gpu/kernels/rmsnorm.js +58 -6
- package/src/gpu/kernels/rmsnorm.wgsl +14 -6
- package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
- package/src/gpu/kernels/rope.d.ts +2 -0
- package/src/gpu/kernels/rope.js +11 -1
- package/src/gpu/kernels/rope.wgsl +56 -40
- package/src/gpu/kernels/sana_linear_attention.d.ts +27 -0
- package/src/gpu/kernels/sana_linear_attention.js +121 -0
- package/src/gpu/kernels/sana_linear_attention_apply.wgsl +43 -0
- package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +46 -0
- package/src/gpu/kernels/sana_linear_attention_summary.wgsl +51 -0
- package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +53 -0
- package/src/gpu/kernels/silu.d.ts +1 -0
- package/src/gpu/kernels/silu.js +32 -14
- package/src/gpu/kernels/silu.wgsl +19 -9
- package/src/gpu/kernels/silu_f16.wgsl +19 -9
- package/src/gpu/kernels/transpose.js +15 -2
- package/src/gpu/kernels/transpose.wgsl +5 -6
- package/src/gpu/kernels/upsample2d.js +2 -1
- package/src/gpu/kernels/upsample2d.wgsl +6 -9
- package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
- package/src/gpu/kernels/utils.js +16 -1
- package/src/index-browser.d.ts +1 -1
- package/src/index-browser.js +2 -2
- package/src/index.js +1 -1
- package/src/inference/browser-harness.js +109 -23
- package/src/inference/pipelines/diffusion/init.js +14 -0
- package/src/inference/pipelines/diffusion/pipeline.js +215 -77
- package/src/inference/pipelines/diffusion/sana-transformer.d.ts +53 -0
- package/src/inference/pipelines/diffusion/sana-transformer.js +738 -0
- package/src/inference/pipelines/diffusion/scheduler.d.ts +17 -1
- package/src/inference/pipelines/diffusion/scheduler.js +91 -3
- package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +11 -4
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +282 -0
- package/src/inference/pipelines/diffusion/text-encoder.js +18 -1
- package/src/inference/pipelines/diffusion/types.d.ts +4 -0
- package/src/inference/pipelines/diffusion/vae.js +782 -78
- package/src/inference/pipelines/text/attention/record.js +11 -2
- package/src/inference/pipelines/text/attention/run.js +11 -2
- package/src/inference/pipelines/text/chat-format.js +25 -1
- package/src/inference/pipelines/text/config.d.ts +9 -0
- package/src/inference/pipelines/text/config.js +69 -2
- package/src/inference/pipelines/text/execution-plan.js +23 -31
- package/src/inference/pipelines/text/execution-v0.js +43 -95
- package/src/inference/pipelines/text/ffn/standard.js +3 -0
- package/src/inference/pipelines/text/init.d.ts +4 -0
- package/src/inference/pipelines/text/init.js +56 -9
- package/src/inference/pipelines/text/layer.js +11 -0
- package/src/inference/pipelines/text.js +4 -0
- package/src/inference/tokenizers/bundled.js +156 -33
- package/src/rules/execution-rules-contract-check.d.ts +17 -0
- package/src/rules/execution-rules-contract-check.js +245 -0
- package/src/rules/kernels/depthwise-conv2d.rules.json +6 -0
- package/src/rules/kernels/grouped-pointwise-conv2d.rules.json +6 -0
- package/src/rules/kernels/relu.rules.json +6 -0
- package/src/rules/kernels/repeat-channels.rules.json +6 -0
- package/src/rules/kernels/sana-linear-attention.rules.json +6 -0
- package/src/rules/layer-pattern-contract-check.d.ts +17 -0
- package/src/rules/layer-pattern-contract-check.js +231 -0
- package/src/rules/rule-registry.d.ts +28 -0
- package/src/rules/rule-registry.js +38 -0
- package/src/rules/tooling/command-runtime.rules.json +18 -0
- package/src/tooling/command-api.d.ts +27 -1
- package/src/tooling/command-api.js +142 -3
- package/src/tooling/conversion-config-materializer.d.ts +24 -0
- package/src/tooling/conversion-config-materializer.js +99 -0
- package/src/tooling/lean-execution-contract-runner.d.ts +43 -0
- package/src/tooling/lean-execution-contract-runner.js +158 -0
- package/src/tooling/node-browser-command-runner.d.ts +4 -0
- package/src/tooling/node-browser-command-runner.js +58 -3
- package/src/tooling/node-command-runner.js +15 -0
- package/src/tooling/node-convert.d.ts +10 -0
- package/src/tooling/node-converter.js +59 -0
- package/src/tooling/node-webgpu.js +11 -89
- package/src/training/checkpoint-watch.d.ts +7 -0
- package/src/training/checkpoint-watch.js +106 -0
- package/src/training/checkpoint.d.ts +6 -1
- package/src/training/checkpoint.js +12 -2
- package/src/training/distillation/artifacts.d.ts +71 -0
- package/src/training/distillation/artifacts.js +132 -0
- package/src/training/distillation/checkpoint-watch.d.ts +10 -0
- package/src/training/distillation/checkpoint-watch.js +57 -0
- package/src/training/distillation/dataset.d.ts +59 -0
- package/src/training/distillation/dataset.js +337 -0
- package/src/training/distillation/eval.d.ts +34 -0
- package/src/training/distillation/eval.js +310 -0
- package/src/training/distillation/index.d.ts +29 -0
- package/src/training/distillation/index.js +29 -0
- package/src/training/distillation/runtime.d.ts +20 -0
- package/src/training/distillation/runtime.js +121 -0
- package/src/training/distillation/scoreboard.d.ts +6 -0
- package/src/training/distillation/scoreboard.js +8 -0
- package/src/training/distillation/stage-a.d.ts +45 -0
- package/src/training/distillation/stage-a.js +338 -0
- package/src/training/distillation/stage-b.d.ts +24 -0
- package/src/training/distillation/stage-b.js +20 -0
- package/src/training/index.d.ts +10 -0
- package/src/training/index.js +10 -0
- package/src/training/lora-pipeline.d.ts +40 -0
- package/src/training/lora-pipeline.js +796 -0
- package/src/training/operator-artifacts.d.ts +62 -0
- package/src/training/operator-artifacts.js +140 -0
- package/src/training/operator-command.d.ts +5 -0
- package/src/training/operator-command.js +453 -0
- package/src/training/operator-eval.d.ts +48 -0
- package/src/training/operator-eval.js +230 -0
- package/src/training/operator-scoreboard.d.ts +5 -0
- package/src/training/operator-scoreboard.js +44 -0
- package/src/training/runner.d.ts +52 -0
- package/src/training/runner.js +29 -4
- package/src/training/suite.d.ts +112 -0
- package/src/training/suite.js +9 -9
- package/src/training/workloads.d.ts +164 -0
- package/src/training/workloads.js +539 -0
- package/src/version.d.ts +2 -0
- package/src/version.js +2 -0
- package/tools/convert-safetensors-node.js +47 -0
- package/tools/doppler-cli.js +252 -41
|
@@ -1826,7 +1826,7 @@
|
|
|
1826
1826
|
}
|
|
1827
1827
|
],
|
|
1828
1828
|
"baseUniforms": {
|
|
1829
|
-
"size":
|
|
1829
|
+
"size": 32,
|
|
1830
1830
|
"fields": [
|
|
1831
1831
|
{
|
|
1832
1832
|
"name": "hidden_size",
|
|
@@ -1847,6 +1847,26 @@
|
|
|
1847
1847
|
"name": "has_residual",
|
|
1848
1848
|
"type": "u32",
|
|
1849
1849
|
"offset": 12
|
|
1850
|
+
},
|
|
1851
|
+
{
|
|
1852
|
+
"name": "token_stride",
|
|
1853
|
+
"type": "u32",
|
|
1854
|
+
"offset": 16
|
|
1855
|
+
},
|
|
1856
|
+
{
|
|
1857
|
+
"name": "_pad0",
|
|
1858
|
+
"type": "u32",
|
|
1859
|
+
"offset": 20
|
|
1860
|
+
},
|
|
1861
|
+
{
|
|
1862
|
+
"name": "_pad1",
|
|
1863
|
+
"type": "u32",
|
|
1864
|
+
"offset": 24
|
|
1865
|
+
},
|
|
1866
|
+
{
|
|
1867
|
+
"name": "_pad2",
|
|
1868
|
+
"type": "u32",
|
|
1869
|
+
"offset": 28
|
|
1850
1870
|
}
|
|
1851
1871
|
]
|
|
1852
1872
|
},
|
|
@@ -3637,7 +3657,7 @@
|
|
|
3637
3657
|
}
|
|
3638
3658
|
],
|
|
3639
3659
|
"baseUniforms": {
|
|
3640
|
-
"size":
|
|
3660
|
+
"size": 32,
|
|
3641
3661
|
"fields": [
|
|
3642
3662
|
{
|
|
3643
3663
|
"name": "num_tokens",
|
|
@@ -4117,6 +4137,26 @@
|
|
|
4117
4137
|
"name": "bias_offset",
|
|
4118
4138
|
"type": "u32",
|
|
4119
4139
|
"offset": 12
|
|
4140
|
+
},
|
|
4141
|
+
{
|
|
4142
|
+
"name": "token_stride",
|
|
4143
|
+
"type": "u32",
|
|
4144
|
+
"offset": 16
|
|
4145
|
+
},
|
|
4146
|
+
{
|
|
4147
|
+
"name": "_pad0",
|
|
4148
|
+
"type": "u32",
|
|
4149
|
+
"offset": 20
|
|
4150
|
+
},
|
|
4151
|
+
{
|
|
4152
|
+
"name": "_pad1",
|
|
4153
|
+
"type": "u32",
|
|
4154
|
+
"offset": 24
|
|
4155
|
+
},
|
|
4156
|
+
{
|
|
4157
|
+
"name": "_pad2",
|
|
4158
|
+
"type": "u32",
|
|
4159
|
+
"offset": 28
|
|
4120
4160
|
}
|
|
4121
4161
|
]
|
|
4122
4162
|
},
|
|
@@ -5537,6 +5577,228 @@
|
|
|
5537
5577
|
}
|
|
5538
5578
|
}
|
|
5539
5579
|
},
|
|
5580
|
+
"depthwise_conv2d": {
|
|
5581
|
+
"description": "Depthwise Conv2D (NCHW)",
|
|
5582
|
+
"baseBindings": [
|
|
5583
|
+
{
|
|
5584
|
+
"index": 0,
|
|
5585
|
+
"name": "uniforms",
|
|
5586
|
+
"type": "uniform"
|
|
5587
|
+
},
|
|
5588
|
+
{
|
|
5589
|
+
"index": 1,
|
|
5590
|
+
"name": "input",
|
|
5591
|
+
"type": "read-only-storage"
|
|
5592
|
+
},
|
|
5593
|
+
{
|
|
5594
|
+
"index": 2,
|
|
5595
|
+
"name": "weight",
|
|
5596
|
+
"type": "read-only-storage"
|
|
5597
|
+
},
|
|
5598
|
+
{
|
|
5599
|
+
"index": 3,
|
|
5600
|
+
"name": "bias",
|
|
5601
|
+
"type": "read-only-storage"
|
|
5602
|
+
},
|
|
5603
|
+
{
|
|
5604
|
+
"index": 4,
|
|
5605
|
+
"name": "output",
|
|
5606
|
+
"type": "storage"
|
|
5607
|
+
}
|
|
5608
|
+
],
|
|
5609
|
+
"baseUniforms": {
|
|
5610
|
+
"size": 48,
|
|
5611
|
+
"fields": [
|
|
5612
|
+
{
|
|
5613
|
+
"name": "channels",
|
|
5614
|
+
"type": "u32",
|
|
5615
|
+
"offset": 0
|
|
5616
|
+
},
|
|
5617
|
+
{
|
|
5618
|
+
"name": "height",
|
|
5619
|
+
"type": "u32",
|
|
5620
|
+
"offset": 4
|
|
5621
|
+
},
|
|
5622
|
+
{
|
|
5623
|
+
"name": "width",
|
|
5624
|
+
"type": "u32",
|
|
5625
|
+
"offset": 8
|
|
5626
|
+
},
|
|
5627
|
+
{
|
|
5628
|
+
"name": "out_height",
|
|
5629
|
+
"type": "u32",
|
|
5630
|
+
"offset": 12
|
|
5631
|
+
},
|
|
5632
|
+
{
|
|
5633
|
+
"name": "out_width",
|
|
5634
|
+
"type": "u32",
|
|
5635
|
+
"offset": 16
|
|
5636
|
+
},
|
|
5637
|
+
{
|
|
5638
|
+
"name": "kernel_h",
|
|
5639
|
+
"type": "u32",
|
|
5640
|
+
"offset": 20
|
|
5641
|
+
},
|
|
5642
|
+
{
|
|
5643
|
+
"name": "kernel_w",
|
|
5644
|
+
"type": "u32",
|
|
5645
|
+
"offset": 24
|
|
5646
|
+
},
|
|
5647
|
+
{
|
|
5648
|
+
"name": "stride",
|
|
5649
|
+
"type": "u32",
|
|
5650
|
+
"offset": 28
|
|
5651
|
+
},
|
|
5652
|
+
{
|
|
5653
|
+
"name": "pad",
|
|
5654
|
+
"type": "u32",
|
|
5655
|
+
"offset": 32
|
|
5656
|
+
},
|
|
5657
|
+
{
|
|
5658
|
+
"name": "_pad0",
|
|
5659
|
+
"type": "u32",
|
|
5660
|
+
"offset": 36
|
|
5661
|
+
},
|
|
5662
|
+
{
|
|
5663
|
+
"name": "_pad1",
|
|
5664
|
+
"type": "u32",
|
|
5665
|
+
"offset": 40
|
|
5666
|
+
},
|
|
5667
|
+
{
|
|
5668
|
+
"name": "_pad2",
|
|
5669
|
+
"type": "u32",
|
|
5670
|
+
"offset": 44
|
|
5671
|
+
}
|
|
5672
|
+
]
|
|
5673
|
+
},
|
|
5674
|
+
"variants": {
|
|
5675
|
+
"default": {
|
|
5676
|
+
"wgsl": "depthwise_conv2d.wgsl",
|
|
5677
|
+
"entryPoint": "main",
|
|
5678
|
+
"workgroup": [
|
|
5679
|
+
256,
|
|
5680
|
+
1,
|
|
5681
|
+
1
|
|
5682
|
+
],
|
|
5683
|
+
"requires": [],
|
|
5684
|
+
"outputDtype": "f32"
|
|
5685
|
+
},
|
|
5686
|
+
"default_f16": {
|
|
5687
|
+
"wgsl": "depthwise_conv2d_f16.wgsl",
|
|
5688
|
+
"entryPoint": "main",
|
|
5689
|
+
"workgroup": [
|
|
5690
|
+
256,
|
|
5691
|
+
1,
|
|
5692
|
+
1
|
|
5693
|
+
],
|
|
5694
|
+
"requires": [
|
|
5695
|
+
"shader-f16"
|
|
5696
|
+
],
|
|
5697
|
+
"outputDtype": "f16"
|
|
5698
|
+
}
|
|
5699
|
+
}
|
|
5700
|
+
},
|
|
5701
|
+
"grouped_pointwise_conv2d": {
|
|
5702
|
+
"description": "Grouped Pointwise Conv2D (NCHW)",
|
|
5703
|
+
"baseBindings": [
|
|
5704
|
+
{
|
|
5705
|
+
"index": 0,
|
|
5706
|
+
"name": "uniforms",
|
|
5707
|
+
"type": "uniform"
|
|
5708
|
+
},
|
|
5709
|
+
{
|
|
5710
|
+
"index": 1,
|
|
5711
|
+
"name": "input",
|
|
5712
|
+
"type": "read-only-storage"
|
|
5713
|
+
},
|
|
5714
|
+
{
|
|
5715
|
+
"index": 2,
|
|
5716
|
+
"name": "weight",
|
|
5717
|
+
"type": "read-only-storage"
|
|
5718
|
+
},
|
|
5719
|
+
{
|
|
5720
|
+
"index": 3,
|
|
5721
|
+
"name": "bias",
|
|
5722
|
+
"type": "read-only-storage"
|
|
5723
|
+
},
|
|
5724
|
+
{
|
|
5725
|
+
"index": 4,
|
|
5726
|
+
"name": "output",
|
|
5727
|
+
"type": "storage"
|
|
5728
|
+
}
|
|
5729
|
+
],
|
|
5730
|
+
"baseUniforms": {
|
|
5731
|
+
"size": 32,
|
|
5732
|
+
"fields": [
|
|
5733
|
+
{
|
|
5734
|
+
"name": "in_channels",
|
|
5735
|
+
"type": "u32",
|
|
5736
|
+
"offset": 0
|
|
5737
|
+
},
|
|
5738
|
+
{
|
|
5739
|
+
"name": "out_channels",
|
|
5740
|
+
"type": "u32",
|
|
5741
|
+
"offset": 4
|
|
5742
|
+
},
|
|
5743
|
+
{
|
|
5744
|
+
"name": "height",
|
|
5745
|
+
"type": "u32",
|
|
5746
|
+
"offset": 8
|
|
5747
|
+
},
|
|
5748
|
+
{
|
|
5749
|
+
"name": "width",
|
|
5750
|
+
"type": "u32",
|
|
5751
|
+
"offset": 12
|
|
5752
|
+
},
|
|
5753
|
+
{
|
|
5754
|
+
"name": "groups",
|
|
5755
|
+
"type": "u32",
|
|
5756
|
+
"offset": 16
|
|
5757
|
+
},
|
|
5758
|
+
{
|
|
5759
|
+
"name": "_pad0",
|
|
5760
|
+
"type": "u32",
|
|
5761
|
+
"offset": 20
|
|
5762
|
+
},
|
|
5763
|
+
{
|
|
5764
|
+
"name": "_pad1",
|
|
5765
|
+
"type": "u32",
|
|
5766
|
+
"offset": 24
|
|
5767
|
+
},
|
|
5768
|
+
{
|
|
5769
|
+
"name": "_pad2",
|
|
5770
|
+
"type": "u32",
|
|
5771
|
+
"offset": 28
|
|
5772
|
+
}
|
|
5773
|
+
]
|
|
5774
|
+
},
|
|
5775
|
+
"variants": {
|
|
5776
|
+
"default": {
|
|
5777
|
+
"wgsl": "grouped_pointwise_conv2d.wgsl",
|
|
5778
|
+
"entryPoint": "main",
|
|
5779
|
+
"workgroup": [
|
|
5780
|
+
256,
|
|
5781
|
+
1,
|
|
5782
|
+
1
|
|
5783
|
+
],
|
|
5784
|
+
"requires": [],
|
|
5785
|
+
"outputDtype": "f32"
|
|
5786
|
+
},
|
|
5787
|
+
"default_f16": {
|
|
5788
|
+
"wgsl": "grouped_pointwise_conv2d_f16.wgsl",
|
|
5789
|
+
"entryPoint": "main",
|
|
5790
|
+
"workgroup": [
|
|
5791
|
+
256,
|
|
5792
|
+
1,
|
|
5793
|
+
1
|
|
5794
|
+
],
|
|
5795
|
+
"requires": [
|
|
5796
|
+
"shader-f16"
|
|
5797
|
+
],
|
|
5798
|
+
"outputDtype": "f16"
|
|
5799
|
+
}
|
|
5800
|
+
}
|
|
5801
|
+
},
|
|
5540
5802
|
"groupnorm_stats": {
|
|
5541
5803
|
"description": "GroupNorm stats (mean/var) for each group",
|
|
5542
5804
|
"baseBindings": [
|
|
@@ -6424,6 +6686,340 @@
|
|
|
6424
6686
|
}
|
|
6425
6687
|
}
|
|
6426
6688
|
},
|
|
6689
|
+
"sana_linear_attention_summary": {
|
|
6690
|
+
"description": "Sana linear attention summary stage",
|
|
6691
|
+
"baseBindings": [
|
|
6692
|
+
{
|
|
6693
|
+
"index": 0,
|
|
6694
|
+
"name": "uniforms",
|
|
6695
|
+
"type": "uniform"
|
|
6696
|
+
},
|
|
6697
|
+
{
|
|
6698
|
+
"index": 1,
|
|
6699
|
+
"name": "query",
|
|
6700
|
+
"type": "read-only-storage"
|
|
6701
|
+
},
|
|
6702
|
+
{
|
|
6703
|
+
"index": 2,
|
|
6704
|
+
"name": "key",
|
|
6705
|
+
"type": "read-only-storage"
|
|
6706
|
+
},
|
|
6707
|
+
{
|
|
6708
|
+
"index": 3,
|
|
6709
|
+
"name": "value",
|
|
6710
|
+
"type": "read-only-storage"
|
|
6711
|
+
},
|
|
6712
|
+
{
|
|
6713
|
+
"index": 4,
|
|
6714
|
+
"name": "summary",
|
|
6715
|
+
"type": "storage"
|
|
6716
|
+
}
|
|
6717
|
+
],
|
|
6718
|
+
"baseUniforms": {
|
|
6719
|
+
"size": 24,
|
|
6720
|
+
"fields": [
|
|
6721
|
+
{
|
|
6722
|
+
"name": "num_heads",
|
|
6723
|
+
"type": "u32",
|
|
6724
|
+
"offset": 0
|
|
6725
|
+
},
|
|
6726
|
+
{
|
|
6727
|
+
"name": "head_dim",
|
|
6728
|
+
"type": "u32",
|
|
6729
|
+
"offset": 4
|
|
6730
|
+
},
|
|
6731
|
+
{
|
|
6732
|
+
"name": "num_tokens",
|
|
6733
|
+
"type": "u32",
|
|
6734
|
+
"offset": 8
|
|
6735
|
+
},
|
|
6736
|
+
{
|
|
6737
|
+
"name": "hidden_size",
|
|
6738
|
+
"type": "u32",
|
|
6739
|
+
"offset": 12
|
|
6740
|
+
},
|
|
6741
|
+
{
|
|
6742
|
+
"name": "_pad0",
|
|
6743
|
+
"type": "u32",
|
|
6744
|
+
"offset": 16
|
|
6745
|
+
},
|
|
6746
|
+
{
|
|
6747
|
+
"name": "_pad1",
|
|
6748
|
+
"type": "u32",
|
|
6749
|
+
"offset": 20
|
|
6750
|
+
}
|
|
6751
|
+
]
|
|
6752
|
+
},
|
|
6753
|
+
"variants": {
|
|
6754
|
+
"default": {
|
|
6755
|
+
"wgsl": "sana_linear_attention_summary.wgsl",
|
|
6756
|
+
"entryPoint": "main",
|
|
6757
|
+
"workgroup": [
|
|
6758
|
+
256,
|
|
6759
|
+
1,
|
|
6760
|
+
1
|
|
6761
|
+
],
|
|
6762
|
+
"requires": [],
|
|
6763
|
+
"outputDtype": "f32"
|
|
6764
|
+
},
|
|
6765
|
+
"default_f16": {
|
|
6766
|
+
"wgsl": "sana_linear_attention_summary_f16.wgsl",
|
|
6767
|
+
"entryPoint": "main",
|
|
6768
|
+
"workgroup": [
|
|
6769
|
+
256,
|
|
6770
|
+
1,
|
|
6771
|
+
1
|
|
6772
|
+
],
|
|
6773
|
+
"requires": [
|
|
6774
|
+
"shader-f16"
|
|
6775
|
+
],
|
|
6776
|
+
"outputDtype": "f32"
|
|
6777
|
+
}
|
|
6778
|
+
}
|
|
6779
|
+
},
|
|
6780
|
+
"sana_linear_attention_apply": {
|
|
6781
|
+
"description": "Sana linear attention apply stage",
|
|
6782
|
+
"baseBindings": [
|
|
6783
|
+
{
|
|
6784
|
+
"index": 0,
|
|
6785
|
+
"name": "uniforms",
|
|
6786
|
+
"type": "uniform"
|
|
6787
|
+
},
|
|
6788
|
+
{
|
|
6789
|
+
"index": 1,
|
|
6790
|
+
"name": "query",
|
|
6791
|
+
"type": "read-only-storage"
|
|
6792
|
+
},
|
|
6793
|
+
{
|
|
6794
|
+
"index": 2,
|
|
6795
|
+
"name": "summary",
|
|
6796
|
+
"type": "read-only-storage"
|
|
6797
|
+
},
|
|
6798
|
+
{
|
|
6799
|
+
"index": 3,
|
|
6800
|
+
"name": "output",
|
|
6801
|
+
"type": "storage"
|
|
6802
|
+
}
|
|
6803
|
+
],
|
|
6804
|
+
"baseUniforms": {
|
|
6805
|
+
"size": 32,
|
|
6806
|
+
"fields": [
|
|
6807
|
+
{
|
|
6808
|
+
"name": "num_heads",
|
|
6809
|
+
"type": "u32",
|
|
6810
|
+
"offset": 0
|
|
6811
|
+
},
|
|
6812
|
+
{
|
|
6813
|
+
"name": "head_dim",
|
|
6814
|
+
"type": "u32",
|
|
6815
|
+
"offset": 4
|
|
6816
|
+
},
|
|
6817
|
+
{
|
|
6818
|
+
"name": "num_tokens",
|
|
6819
|
+
"type": "u32",
|
|
6820
|
+
"offset": 8
|
|
6821
|
+
},
|
|
6822
|
+
{
|
|
6823
|
+
"name": "hidden_size",
|
|
6824
|
+
"type": "u32",
|
|
6825
|
+
"offset": 12
|
|
6826
|
+
},
|
|
6827
|
+
{
|
|
6828
|
+
"name": "eps",
|
|
6829
|
+
"type": "f32",
|
|
6830
|
+
"offset": 16
|
|
6831
|
+
},
|
|
6832
|
+
{
|
|
6833
|
+
"name": "_pad0",
|
|
6834
|
+
"type": "u32",
|
|
6835
|
+
"offset": 20
|
|
6836
|
+
},
|
|
6837
|
+
{
|
|
6838
|
+
"name": "_pad1",
|
|
6839
|
+
"type": "u32",
|
|
6840
|
+
"offset": 24
|
|
6841
|
+
},
|
|
6842
|
+
{
|
|
6843
|
+
"name": "_pad2",
|
|
6844
|
+
"type": "u32",
|
|
6845
|
+
"offset": 28
|
|
6846
|
+
}
|
|
6847
|
+
]
|
|
6848
|
+
},
|
|
6849
|
+
"variants": {
|
|
6850
|
+
"default": {
|
|
6851
|
+
"wgsl": "sana_linear_attention_apply.wgsl",
|
|
6852
|
+
"entryPoint": "main",
|
|
6853
|
+
"workgroup": [
|
|
6854
|
+
256,
|
|
6855
|
+
1,
|
|
6856
|
+
1
|
|
6857
|
+
],
|
|
6858
|
+
"requires": [],
|
|
6859
|
+
"outputDtype": "f32"
|
|
6860
|
+
},
|
|
6861
|
+
"default_f16": {
|
|
6862
|
+
"wgsl": "sana_linear_attention_apply_f16.wgsl",
|
|
6863
|
+
"entryPoint": "main",
|
|
6864
|
+
"workgroup": [
|
|
6865
|
+
256,
|
|
6866
|
+
1,
|
|
6867
|
+
1
|
|
6868
|
+
],
|
|
6869
|
+
"requires": [
|
|
6870
|
+
"shader-f16"
|
|
6871
|
+
],
|
|
6872
|
+
"outputDtype": "f16"
|
|
6873
|
+
}
|
|
6874
|
+
}
|
|
6875
|
+
},
|
|
6876
|
+
"repeat_channels": {
|
|
6877
|
+
"description": "Repeat channels along the NCHW channel axis",
|
|
6878
|
+
"baseBindings": [
|
|
6879
|
+
{
|
|
6880
|
+
"index": 0,
|
|
6881
|
+
"name": "uniforms",
|
|
6882
|
+
"type": "uniform"
|
|
6883
|
+
},
|
|
6884
|
+
{
|
|
6885
|
+
"index": 1,
|
|
6886
|
+
"name": "input",
|
|
6887
|
+
"type": "read-only-storage"
|
|
6888
|
+
},
|
|
6889
|
+
{
|
|
6890
|
+
"index": 2,
|
|
6891
|
+
"name": "output",
|
|
6892
|
+
"type": "storage"
|
|
6893
|
+
}
|
|
6894
|
+
],
|
|
6895
|
+
"baseUniforms": {
|
|
6896
|
+
"size": 20,
|
|
6897
|
+
"fields": [
|
|
6898
|
+
{
|
|
6899
|
+
"name": "in_channels",
|
|
6900
|
+
"type": "u32",
|
|
6901
|
+
"offset": 0
|
|
6902
|
+
},
|
|
6903
|
+
{
|
|
6904
|
+
"name": "height",
|
|
6905
|
+
"type": "u32",
|
|
6906
|
+
"offset": 4
|
|
6907
|
+
},
|
|
6908
|
+
{
|
|
6909
|
+
"name": "width",
|
|
6910
|
+
"type": "u32",
|
|
6911
|
+
"offset": 8
|
|
6912
|
+
},
|
|
6913
|
+
{
|
|
6914
|
+
"name": "repeats",
|
|
6915
|
+
"type": "u32",
|
|
6916
|
+
"offset": 12
|
|
6917
|
+
},
|
|
6918
|
+
{
|
|
6919
|
+
"name": "_pad0",
|
|
6920
|
+
"type": "u32",
|
|
6921
|
+
"offset": 16
|
|
6922
|
+
}
|
|
6923
|
+
]
|
|
6924
|
+
},
|
|
6925
|
+
"variants": {
|
|
6926
|
+
"default": {
|
|
6927
|
+
"wgsl": "repeat_channels.wgsl",
|
|
6928
|
+
"entryPoint": "main",
|
|
6929
|
+
"workgroup": [
|
|
6930
|
+
256,
|
|
6931
|
+
1,
|
|
6932
|
+
1
|
|
6933
|
+
],
|
|
6934
|
+
"requires": [],
|
|
6935
|
+
"outputDtype": "f32"
|
|
6936
|
+
},
|
|
6937
|
+
"default_f16": {
|
|
6938
|
+
"wgsl": "repeat_channels_f16.wgsl",
|
|
6939
|
+
"entryPoint": "main",
|
|
6940
|
+
"workgroup": [
|
|
6941
|
+
256,
|
|
6942
|
+
1,
|
|
6943
|
+
1
|
|
6944
|
+
],
|
|
6945
|
+
"requires": [
|
|
6946
|
+
"shader-f16"
|
|
6947
|
+
],
|
|
6948
|
+
"outputDtype": "f16"
|
|
6949
|
+
}
|
|
6950
|
+
}
|
|
6951
|
+
},
|
|
6952
|
+
"relu": {
|
|
6953
|
+
"description": "ReLU activation",
|
|
6954
|
+
"baseBindings": [
|
|
6955
|
+
{
|
|
6956
|
+
"index": 0,
|
|
6957
|
+
"name": "uniforms",
|
|
6958
|
+
"type": "uniform"
|
|
6959
|
+
},
|
|
6960
|
+
{
|
|
6961
|
+
"index": 1,
|
|
6962
|
+
"name": "input",
|
|
6963
|
+
"type": "read-only-storage"
|
|
6964
|
+
},
|
|
6965
|
+
{
|
|
6966
|
+
"index": 2,
|
|
6967
|
+
"name": "output",
|
|
6968
|
+
"type": "storage"
|
|
6969
|
+
}
|
|
6970
|
+
],
|
|
6971
|
+
"baseUniforms": {
|
|
6972
|
+
"size": 16,
|
|
6973
|
+
"fields": [
|
|
6974
|
+
{
|
|
6975
|
+
"name": "size",
|
|
6976
|
+
"type": "u32",
|
|
6977
|
+
"offset": 0
|
|
6978
|
+
},
|
|
6979
|
+
{
|
|
6980
|
+
"name": "_pad0",
|
|
6981
|
+
"type": "u32",
|
|
6982
|
+
"offset": 4
|
|
6983
|
+
},
|
|
6984
|
+
{
|
|
6985
|
+
"name": "_pad1",
|
|
6986
|
+
"type": "u32",
|
|
6987
|
+
"offset": 8
|
|
6988
|
+
},
|
|
6989
|
+
{
|
|
6990
|
+
"name": "_pad2",
|
|
6991
|
+
"type": "u32",
|
|
6992
|
+
"offset": 12
|
|
6993
|
+
}
|
|
6994
|
+
]
|
|
6995
|
+
},
|
|
6996
|
+
"variants": {
|
|
6997
|
+
"default": {
|
|
6998
|
+
"wgsl": "relu.wgsl",
|
|
6999
|
+
"entryPoint": "main",
|
|
7000
|
+
"workgroup": [
|
|
7001
|
+
256,
|
|
7002
|
+
1,
|
|
7003
|
+
1
|
|
7004
|
+
],
|
|
7005
|
+
"requires": [],
|
|
7006
|
+
"outputDtype": "f32"
|
|
7007
|
+
},
|
|
7008
|
+
"default_f16": {
|
|
7009
|
+
"wgsl": "relu_f16.wgsl",
|
|
7010
|
+
"entryPoint": "main",
|
|
7011
|
+
"workgroup": [
|
|
7012
|
+
256,
|
|
7013
|
+
1,
|
|
7014
|
+
1
|
|
7015
|
+
],
|
|
7016
|
+
"requires": [
|
|
7017
|
+
"shader-f16"
|
|
7018
|
+
],
|
|
7019
|
+
"outputDtype": "f16"
|
|
7020
|
+
}
|
|
7021
|
+
}
|
|
7022
|
+
},
|
|
6427
7023
|
"conv2d_backward_input": {
|
|
6428
7024
|
"description": "Conv2D backward (input gradient)",
|
|
6429
7025
|
"baseBindings": [
|