@simulatte/doppler 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/README.md +11 -5
  2. package/package.json +27 -4
  3. package/src/client/doppler-api.browser.d.ts +1 -0
  4. package/src/client/doppler-api.browser.js +288 -0
  5. package/src/client/doppler-api.d.ts +80 -0
  6. package/src/client/doppler-api.js +298 -0
  7. package/src/client/doppler-provider/types.js +1 -1
  8. package/src/client/doppler-registry.d.ts +23 -0
  9. package/src/client/doppler-registry.js +88 -0
  10. package/src/client/doppler-registry.json +39 -0
  11. package/src/config/execution-contract-check.d.ts +82 -0
  12. package/src/config/execution-contract-check.js +317 -0
  13. package/src/config/execution-v0-contract-check.d.ts +94 -0
  14. package/src/config/execution-v0-contract-check.js +251 -0
  15. package/src/config/execution-v0-graph-contract-check.d.ts +20 -0
  16. package/src/config/execution-v0-graph-contract-check.js +64 -0
  17. package/src/config/kernel-path-contract-check.d.ts +76 -0
  18. package/src/config/kernel-path-contract-check.js +479 -0
  19. package/src/config/kernel-path-loader.d.ts +16 -0
  20. package/src/config/kernel-path-loader.js +54 -0
  21. package/src/config/kernels/kernel-ref-digests.js +12 -0
  22. package/src/config/kernels/registry.json +556 -0
  23. package/src/config/loader.js +90 -67
  24. package/src/config/merge-contract-check.d.ts +16 -0
  25. package/src/config/merge-contract-check.js +321 -0
  26. package/src/config/merge-helpers.d.ts +58 -0
  27. package/src/config/merge-helpers.js +54 -0
  28. package/src/config/merge.js +3 -6
  29. package/src/config/presets/models/janus-text.json +27 -0
  30. package/src/config/quantization-contract-check.d.ts +12 -0
  31. package/src/config/quantization-contract-check.js +91 -0
  32. package/src/config/required-inference-fields-contract-check.d.ts +24 -0
  33. package/src/config/required-inference-fields-contract-check.js +231 -0
  34. package/src/config/schema/browser-suite-metrics.schema.d.ts +17 -0
  35. package/src/config/schema/browser-suite-metrics.schema.js +46 -0
  36. package/src/config/schema/conversion-report.schema.d.ts +40 -0
  37. package/src/config/schema/conversion-report.schema.js +108 -0
  38. package/src/config/schema/doppler.schema.js +12 -18
  39. package/src/config/schema/index.d.ts +22 -0
  40. package/src/config/schema/index.js +18 -0
  41. package/src/converter/core.d.ts +10 -0
  42. package/src/converter/core.js +49 -11
  43. package/src/converter/parsers/diffusion.js +63 -3
  44. package/src/converter/tokenizer-utils.js +17 -3
  45. package/src/formats/rdrr/validation.js +13 -0
  46. package/src/gpu/kernels/depthwise_conv2d.d.ts +29 -0
  47. package/src/gpu/kernels/depthwise_conv2d.js +98 -0
  48. package/src/gpu/kernels/depthwise_conv2d.wgsl +58 -0
  49. package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +62 -0
  50. package/src/gpu/kernels/grouped_pointwise_conv2d.d.ts +27 -0
  51. package/src/gpu/kernels/grouped_pointwise_conv2d.js +92 -0
  52. package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +47 -0
  53. package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +51 -0
  54. package/src/gpu/kernels/index.d.ts +30 -0
  55. package/src/gpu/kernels/index.js +25 -0
  56. package/src/gpu/kernels/relu.d.ts +18 -0
  57. package/src/gpu/kernels/relu.js +45 -0
  58. package/src/gpu/kernels/relu.wgsl +21 -0
  59. package/src/gpu/kernels/relu_f16.wgsl +23 -0
  60. package/src/gpu/kernels/repeat_channels.d.ts +21 -0
  61. package/src/gpu/kernels/repeat_channels.js +60 -0
  62. package/src/gpu/kernels/repeat_channels.wgsl +29 -0
  63. package/src/gpu/kernels/repeat_channels_f16.wgsl +31 -0
  64. package/src/gpu/kernels/sana_linear_attention.d.ts +27 -0
  65. package/src/gpu/kernels/sana_linear_attention.js +122 -0
  66. package/src/gpu/kernels/sana_linear_attention_apply.wgsl +44 -0
  67. package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +47 -0
  68. package/src/gpu/kernels/sana_linear_attention_summary.wgsl +47 -0
  69. package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +49 -0
  70. package/src/index-browser.d.ts +1 -0
  71. package/src/index-browser.js +2 -1
  72. package/src/index.d.ts +1 -0
  73. package/src/index.js +2 -1
  74. package/src/inference/browser-harness.js +164 -38
  75. package/src/inference/pipelines/diffusion/init.js +14 -0
  76. package/src/inference/pipelines/diffusion/pipeline.js +206 -77
  77. package/src/inference/pipelines/diffusion/sana-transformer.d.ts +53 -0
  78. package/src/inference/pipelines/diffusion/sana-transformer.js +738 -0
  79. package/src/inference/pipelines/diffusion/scheduler.d.ts +17 -1
  80. package/src/inference/pipelines/diffusion/scheduler.js +91 -3
  81. package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +6 -4
  82. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +270 -0
  83. package/src/inference/pipelines/diffusion/text-encoder.js +18 -1
  84. package/src/inference/pipelines/diffusion/types.d.ts +4 -0
  85. package/src/inference/pipelines/diffusion/vae.js +782 -78
  86. package/src/inference/pipelines/text/config.d.ts +5 -0
  87. package/src/inference/pipelines/text/config.js +1 -1
  88. package/src/inference/pipelines/text/execution-v0.js +141 -101
  89. package/src/inference/pipelines/text/init.js +41 -10
  90. package/src/inference/pipelines/text.js +7 -1
  91. package/src/rules/execution-rules-contract-check.d.ts +17 -0
  92. package/src/rules/execution-rules-contract-check.js +245 -0
  93. package/src/rules/kernels/depthwise-conv2d.rules.json +6 -0
  94. package/src/rules/kernels/grouped-pointwise-conv2d.rules.json +6 -0
  95. package/src/rules/kernels/relu.rules.json +6 -0
  96. package/src/rules/kernels/repeat-channels.rules.json +6 -0
  97. package/src/rules/kernels/sana-linear-attention.rules.json +6 -0
  98. package/src/rules/layer-pattern-contract-check.d.ts +17 -0
  99. package/src/rules/layer-pattern-contract-check.js +231 -0
  100. package/src/rules/rule-registry.d.ts +28 -0
  101. package/src/rules/rule-registry.js +38 -0
  102. package/src/tooling/conversion-config-materializer.d.ts +24 -0
  103. package/src/tooling/conversion-config-materializer.js +99 -0
  104. package/src/tooling/lean-execution-contract-runner.d.ts +43 -0
  105. package/src/tooling/lean-execution-contract-runner.js +158 -0
  106. package/src/tooling/lean-execution-contract.d.ts +16 -0
  107. package/src/tooling/lean-execution-contract.js +81 -0
  108. package/src/tooling/node-convert.d.ts +10 -0
  109. package/src/tooling/node-converter.js +59 -0
  110. package/src/tooling/node-webgpu.js +30 -9
  111. package/src/version.d.ts +2 -0
  112. package/src/version.js +2 -0
  113. package/tools/convert-safetensors-node.js +47 -0
  114. package/tools/doppler-cli.js +167 -6
@@ -1,6 +1,8 @@
1
1
  import { DEFAULT_ENTRY } from './schema/kernel-path.schema.js';
2
2
  import { KERNEL_CONFIGS } from '../gpu/kernels/utils.js';
3
+ import { selectByRules } from '../gpu/kernels/rule-matcher.js';
3
4
  import { loadJson } from '../utils/load-json.js';
5
+ import { buildKernelPathContractArtifact } from './kernel-path-contract-check.js';
4
6
 
5
7
  // =============================================================================
6
8
  // Built-in Kernel Paths (imported at build time)
@@ -77,6 +79,11 @@ const KERNEL_PATH_REGISTRY_INDEX = new Map(
77
79
  );
78
80
 
79
81
  const KERNEL_PATH_REGISTRY = Object.create(null);
82
+ const KERNEL_PATH_RULES = await loadJson(
83
+ '../rules/inference/kernel-path.rules.json',
84
+ import.meta.url,
85
+ 'Failed to load kernel path rules'
86
+ );
80
87
 
81
88
  const resolveKernelPathConfig = (id, chain = new Set()) => {
82
89
  if (KERNEL_PATH_REGISTRY[id] !== undefined) {
@@ -117,6 +124,42 @@ for (const entry of KERNEL_PATH_REGISTRY_ENTRIES) {
117
124
  resolveKernelPathConfig(entry.id);
118
125
  }
119
126
 
127
+ const KERNEL_PATH_FINITENESS_FALLBACK_MAPPINGS = KERNEL_PATH_REGISTRY_ENTRIES
128
+ .map((entry) => {
129
+ const fallbackKernelPathId = selectByRules(
130
+ Array.isArray(KERNEL_PATH_RULES?.finitenessFallback) ? KERNEL_PATH_RULES.finitenessFallback : [],
131
+ { kernelPathId: entry.id }
132
+ );
133
+ if (typeof fallbackKernelPathId !== 'string' || fallbackKernelPathId.length === 0) {
134
+ return null;
135
+ }
136
+ return {
137
+ primaryKernelPathId: entry.id,
138
+ fallbackKernelPathId,
139
+ primaryActivationDtype: KERNEL_PATH_REGISTRY[entry.id]?.activationDtype ?? null,
140
+ fallbackActivationDtype: KERNEL_PATH_REGISTRY[fallbackKernelPathId]?.activationDtype ?? null,
141
+ };
142
+ })
143
+ .filter(Boolean);
144
+
145
+ const KERNEL_PATH_CONTRACT_ARTIFACT = buildKernelPathContractArtifact(
146
+ {
147
+ registryId: 'builtin-kernel-paths',
148
+ entries: KERNEL_PATH_REGISTRY_ENTRIES,
149
+ fallbackMappings: KERNEL_PATH_FINITENESS_FALLBACK_MAPPINGS,
150
+ fallbackRules: Array.isArray(KERNEL_PATH_RULES?.finitenessFallback)
151
+ ? KERNEL_PATH_RULES.finitenessFallback
152
+ : [],
153
+ autoSelectRules: Array.isArray(KERNEL_PATH_RULES?.autoSelect)
154
+ ? KERNEL_PATH_RULES.autoSelect
155
+ : [],
156
+ }
157
+ );
158
+
159
+ if (!KERNEL_PATH_CONTRACT_ARTIFACT.ok) {
160
+ throw new Error(KERNEL_PATH_CONTRACT_ARTIFACT.errors[0]);
161
+ }
162
+
120
163
  // =============================================================================
121
164
  // Public API
122
165
  // =============================================================================
@@ -129,6 +172,17 @@ export function listKernelPaths() {
129
172
  return Object.keys(KERNEL_PATH_REGISTRY);
130
173
  }
131
174
 
175
+ export function getKernelPathContractArtifact() {
176
+ return {
177
+ schemaVersion: KERNEL_PATH_CONTRACT_ARTIFACT.schemaVersion,
178
+ source: KERNEL_PATH_CONTRACT_ARTIFACT.source,
179
+ ok: KERNEL_PATH_CONTRACT_ARTIFACT.ok,
180
+ checks: KERNEL_PATH_CONTRACT_ARTIFACT.checks.map((entry) => ({ ...entry })),
181
+ errors: [...KERNEL_PATH_CONTRACT_ARTIFACT.errors],
182
+ stats: { ...KERNEL_PATH_CONTRACT_ARTIFACT.stats },
183
+ };
184
+ }
185
+
132
186
  export function resolveKernelPath(ref) {
133
187
  if (typeof ref === 'string') {
134
188
  const path = getKernelPath(ref);
@@ -57,6 +57,8 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
57
57
  "conv2d_f16.wgsl#main": "aa139e9f0270873acbc1c4b3cbacff4d224cae7247b520ec129a4f068eb6ed59",
58
58
  "conv2d.wgsl#main": "484a676692d2b8097daeefe42e2296a1f8b3ef11abfd7b41df6cdcdf16b7a8fd",
59
59
  "cross_entropy_loss.wgsl#main": "5a48087bdec94184432c90ce5b345e1eadbdfcb13b9793ecee8052bc7392239c",
60
+ "depthwise_conv2d_f16.wgsl#main": "d5d8d195b1449e39715340af4a0759da4b44b54f6a3cfbdfa6abe743b0f1d002",
61
+ "depthwise_conv2d.wgsl#main": "e5da160f505e18508619b78ba30f9bde0c84689a166df06cb59ef0e6591c6faf",
60
62
  "dequant_f16_out_vec4.wgsl#main_vec4": "61c20e6c71c1c8421b4ec202dbd26292a6300587bd44c314f2a6c6d9d9442c3a",
61
63
  "dequant_f16_out.wgsl#main": "94d61843d56f9a3bbc6b7c2b95dc6ecbba3f6a262b2c4086a076f69a8c38ccae",
62
64
  "dequant_f16_rowwise.wgsl#main": "f5bf7cef950b52d65cee6121dbaa176244d3221045b3b6386b3be47f23ce17dc",
@@ -116,6 +118,8 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
116
118
  "gelu.wgsl#main": "a9007ea08aaff98f9be08f1e0490a6bcf252883eac5513de876ab9ce918865e6",
117
119
  "gptoss_mxfp4_expert_fused.wgsl#main_expert": "3159e8cd81da13f909cf905e6d35307fefe1dcbbdf1b2b8e8ff0ce923bd71180",
118
120
  "gptoss_router_topk.wgsl#softmax_topk": "86e4ea709c0c0084d09c6a4cd07710dc14f380e03f91b8ed9ec871b310be49f1",
121
+ "grouped_pointwise_conv2d_f16.wgsl#main": "11bcaefc5929b2e3c1ba338ebea6a28d2cac26553be8b00f51bfddbabf513be7",
122
+ "grouped_pointwise_conv2d.wgsl#main": "c0d5cdec0743b4ee337a8df95bda442e617c1678e3d1b6e20ec692d500ede50d",
119
123
  "groupnorm_apply_f16.wgsl#main": "cfd850b87944ac1c03ba7bd98136db556dadd8a70611e351d82d297299a7cd02",
120
124
  "groupnorm_apply.wgsl#main": "b09b8f2f57dcdfa1a0366daa30d3910feb134204652c711d2ba564e566b5a334",
121
125
  "groupnorm_stats_f16.wgsl#main": "fb76f78ce668ea8459110335698fe4b09a2425fc71deed3bab67efd7641c3199",
@@ -153,6 +157,10 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
153
157
  "moe_offsets.wgsl#build_offsets": "3ea004145fa234659408cdeb0d4d802adff1037c9c5c03af146b3734cc69dd27",
154
158
  "pixel_shuffle_f16.wgsl#main": "57903a9c19cecc56371b2198402745127115680d266c3ce609201be9119aa359",
155
159
  "pixel_shuffle.wgsl#main": "845b88700b1b46d18cde6f2ec11bb89512c90d7e148763e74ce2a4173fd99b21",
160
+ "relu_f16.wgsl#main": "fc6134aabe43081b42ce8507d8f374092d0f2e03316aa42c25dd50229dc0ee40",
161
+ "relu.wgsl#main": "ca2c9bfa0acb9ece3b7e67de5209e00e553602b3917d23aca10338c1e6f01e27",
162
+ "repeat_channels_f16.wgsl#main": "e7e4d9164752e782d482db40256d0d86d96f784aa7debdb72faf3261b9bdd737",
163
+ "repeat_channels.wgsl#main": "ad0e34925c8c1173b9f0d92fa6e3808d039f82b3d9ad943b0a75b213ee1776e5",
156
164
  "residual_f16_vec4.wgsl#add_vec4": "30e9226fb6636e2f01e65b1dc8e93c8e849a87acec6215342fc114996da1ed41",
157
165
  "residual_f16.wgsl#main": "d392433f3065d1caf68b033219f4ffacf022dc1f90fc3cf3fd620e4ba49f3219",
158
166
  "residual_vec4.wgsl#add_vec4": "ef011d1683e62887db712da563e783d12fdc80c152955661137d2dca612d7d6a",
@@ -186,6 +194,10 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
186
194
  "sample.wgsl#find_topk_phase2": "940b216e605d22096da5aca65950a8030866fc5a39e7fdf484d69a832de1b63a",
187
195
  "sample.wgsl#sample_single_pass": "4412357e84113ee2f1bc0dc8bf89e314c2ab482c89c14ca016ea9949d16a9d0c",
188
196
  "sample.wgsl#softmax_and_sample": "7172c60e76430fbe130e530e3564b569b45eccf193987b32d6f52bd6bbcc9f08",
197
+ "sana_linear_attention_apply_f16.wgsl#main": "e47366b94d40c4388e631b5bf93f8d61ef4e52cc65ffcd3b08d9d170616bb138",
198
+ "sana_linear_attention_apply.wgsl#main": "59cad7974c644fd910af776ad85a9a2c43c00492d4d1152fdc8373ecbb8bba18",
199
+ "sana_linear_attention_summary_f16.wgsl#main": "e3c040bb6469d37fc78eb22c1cc3e0456301607e461bbcdf5365a583c5d260d2",
200
+ "sana_linear_attention_summary.wgsl#main": "20c7ecdbcd1c73c0f9937c3cdac07b4b6edfe8618bf6f66281806343fd41b122",
189
201
  "scale.wgsl#main": "44ec481452b586307957163e3d65c9d02561d3f2f3db633f906f5488b1ea1ca4",
190
202
  "scale.wgsl#main_inplace": "020824c7118a59c461ce81f1c2cd01b7c2a3f1aab326392b7d48d4448a0c2ed1",
191
203
  "scatter_add_dynamic_f16_weights.wgsl#scatter_add_dynamic": "42799e745bc445b199b1cbc384bc12bb9372ed1599af3260a803cefc8dd35497",
@@ -5537,6 +5537,228 @@
5537
5537
  }
5538
5538
  }
5539
5539
  },
5540
+ "depthwise_conv2d": {
5541
+ "description": "Depthwise Conv2D (NCHW)",
5542
+ "baseBindings": [
5543
+ {
5544
+ "index": 0,
5545
+ "name": "uniforms",
5546
+ "type": "uniform"
5547
+ },
5548
+ {
5549
+ "index": 1,
5550
+ "name": "input",
5551
+ "type": "read-only-storage"
5552
+ },
5553
+ {
5554
+ "index": 2,
5555
+ "name": "weight",
5556
+ "type": "read-only-storage"
5557
+ },
5558
+ {
5559
+ "index": 3,
5560
+ "name": "bias",
5561
+ "type": "read-only-storage"
5562
+ },
5563
+ {
5564
+ "index": 4,
5565
+ "name": "output",
5566
+ "type": "storage"
5567
+ }
5568
+ ],
5569
+ "baseUniforms": {
5570
+ "size": 48,
5571
+ "fields": [
5572
+ {
5573
+ "name": "channels",
5574
+ "type": "u32",
5575
+ "offset": 0
5576
+ },
5577
+ {
5578
+ "name": "height",
5579
+ "type": "u32",
5580
+ "offset": 4
5581
+ },
5582
+ {
5583
+ "name": "width",
5584
+ "type": "u32",
5585
+ "offset": 8
5586
+ },
5587
+ {
5588
+ "name": "out_height",
5589
+ "type": "u32",
5590
+ "offset": 12
5591
+ },
5592
+ {
5593
+ "name": "out_width",
5594
+ "type": "u32",
5595
+ "offset": 16
5596
+ },
5597
+ {
5598
+ "name": "kernel_h",
5599
+ "type": "u32",
5600
+ "offset": 20
5601
+ },
5602
+ {
5603
+ "name": "kernel_w",
5604
+ "type": "u32",
5605
+ "offset": 24
5606
+ },
5607
+ {
5608
+ "name": "stride",
5609
+ "type": "u32",
5610
+ "offset": 28
5611
+ },
5612
+ {
5613
+ "name": "pad",
5614
+ "type": "u32",
5615
+ "offset": 32
5616
+ },
5617
+ {
5618
+ "name": "_pad0",
5619
+ "type": "u32",
5620
+ "offset": 36
5621
+ },
5622
+ {
5623
+ "name": "_pad1",
5624
+ "type": "u32",
5625
+ "offset": 40
5626
+ },
5627
+ {
5628
+ "name": "_pad2",
5629
+ "type": "u32",
5630
+ "offset": 44
5631
+ }
5632
+ ]
5633
+ },
5634
+ "variants": {
5635
+ "default": {
5636
+ "wgsl": "depthwise_conv2d.wgsl",
5637
+ "entryPoint": "main",
5638
+ "workgroup": [
5639
+ 256,
5640
+ 1,
5641
+ 1
5642
+ ],
5643
+ "requires": [],
5644
+ "outputDtype": "f32"
5645
+ },
5646
+ "default_f16": {
5647
+ "wgsl": "depthwise_conv2d_f16.wgsl",
5648
+ "entryPoint": "main",
5649
+ "workgroup": [
5650
+ 256,
5651
+ 1,
5652
+ 1
5653
+ ],
5654
+ "requires": [
5655
+ "shader-f16"
5656
+ ],
5657
+ "outputDtype": "f16"
5658
+ }
5659
+ }
5660
+ },
5661
+ "grouped_pointwise_conv2d": {
5662
+ "description": "Grouped Pointwise Conv2D (NCHW)",
5663
+ "baseBindings": [
5664
+ {
5665
+ "index": 0,
5666
+ "name": "uniforms",
5667
+ "type": "uniform"
5668
+ },
5669
+ {
5670
+ "index": 1,
5671
+ "name": "input",
5672
+ "type": "read-only-storage"
5673
+ },
5674
+ {
5675
+ "index": 2,
5676
+ "name": "weight",
5677
+ "type": "read-only-storage"
5678
+ },
5679
+ {
5680
+ "index": 3,
5681
+ "name": "bias",
5682
+ "type": "read-only-storage"
5683
+ },
5684
+ {
5685
+ "index": 4,
5686
+ "name": "output",
5687
+ "type": "storage"
5688
+ }
5689
+ ],
5690
+ "baseUniforms": {
5691
+ "size": 32,
5692
+ "fields": [
5693
+ {
5694
+ "name": "in_channels",
5695
+ "type": "u32",
5696
+ "offset": 0
5697
+ },
5698
+ {
5699
+ "name": "out_channels",
5700
+ "type": "u32",
5701
+ "offset": 4
5702
+ },
5703
+ {
5704
+ "name": "height",
5705
+ "type": "u32",
5706
+ "offset": 8
5707
+ },
5708
+ {
5709
+ "name": "width",
5710
+ "type": "u32",
5711
+ "offset": 12
5712
+ },
5713
+ {
5714
+ "name": "groups",
5715
+ "type": "u32",
5716
+ "offset": 16
5717
+ },
5718
+ {
5719
+ "name": "_pad0",
5720
+ "type": "u32",
5721
+ "offset": 20
5722
+ },
5723
+ {
5724
+ "name": "_pad1",
5725
+ "type": "u32",
5726
+ "offset": 24
5727
+ },
5728
+ {
5729
+ "name": "_pad2",
5730
+ "type": "u32",
5731
+ "offset": 28
5732
+ }
5733
+ ]
5734
+ },
5735
+ "variants": {
5736
+ "default": {
5737
+ "wgsl": "grouped_pointwise_conv2d.wgsl",
5738
+ "entryPoint": "main",
5739
+ "workgroup": [
5740
+ 256,
5741
+ 1,
5742
+ 1
5743
+ ],
5744
+ "requires": [],
5745
+ "outputDtype": "f32"
5746
+ },
5747
+ "default_f16": {
5748
+ "wgsl": "grouped_pointwise_conv2d_f16.wgsl",
5749
+ "entryPoint": "main",
5750
+ "workgroup": [
5751
+ 256,
5752
+ 1,
5753
+ 1
5754
+ ],
5755
+ "requires": [
5756
+ "shader-f16"
5757
+ ],
5758
+ "outputDtype": "f16"
5759
+ }
5760
+ }
5761
+ },
5540
5762
  "groupnorm_stats": {
5541
5763
  "description": "GroupNorm stats (mean/var) for each group",
5542
5764
  "baseBindings": [
@@ -6424,6 +6646,340 @@
6424
6646
  }
6425
6647
  }
6426
6648
  },
6649
+ "sana_linear_attention_summary": {
6650
+ "description": "Sana linear attention summary stage",
6651
+ "baseBindings": [
6652
+ {
6653
+ "index": 0,
6654
+ "name": "uniforms",
6655
+ "type": "uniform"
6656
+ },
6657
+ {
6658
+ "index": 1,
6659
+ "name": "query",
6660
+ "type": "read-only-storage"
6661
+ },
6662
+ {
6663
+ "index": 2,
6664
+ "name": "key",
6665
+ "type": "read-only-storage"
6666
+ },
6667
+ {
6668
+ "index": 3,
6669
+ "name": "value",
6670
+ "type": "read-only-storage"
6671
+ },
6672
+ {
6673
+ "index": 4,
6674
+ "name": "summary",
6675
+ "type": "storage"
6676
+ }
6677
+ ],
6678
+ "baseUniforms": {
6679
+ "size": 24,
6680
+ "fields": [
6681
+ {
6682
+ "name": "num_heads",
6683
+ "type": "u32",
6684
+ "offset": 0
6685
+ },
6686
+ {
6687
+ "name": "head_dim",
6688
+ "type": "u32",
6689
+ "offset": 4
6690
+ },
6691
+ {
6692
+ "name": "num_tokens",
6693
+ "type": "u32",
6694
+ "offset": 8
6695
+ },
6696
+ {
6697
+ "name": "hidden_size",
6698
+ "type": "u32",
6699
+ "offset": 12
6700
+ },
6701
+ {
6702
+ "name": "_pad0",
6703
+ "type": "u32",
6704
+ "offset": 16
6705
+ },
6706
+ {
6707
+ "name": "_pad1",
6708
+ "type": "u32",
6709
+ "offset": 20
6710
+ }
6711
+ ]
6712
+ },
6713
+ "variants": {
6714
+ "default": {
6715
+ "wgsl": "sana_linear_attention_summary.wgsl",
6716
+ "entryPoint": "main",
6717
+ "workgroup": [
6718
+ 256,
6719
+ 1,
6720
+ 1
6721
+ ],
6722
+ "requires": [],
6723
+ "outputDtype": "f32"
6724
+ },
6725
+ "default_f16": {
6726
+ "wgsl": "sana_linear_attention_summary_f16.wgsl",
6727
+ "entryPoint": "main",
6728
+ "workgroup": [
6729
+ 256,
6730
+ 1,
6731
+ 1
6732
+ ],
6733
+ "requires": [
6734
+ "shader-f16"
6735
+ ],
6736
+ "outputDtype": "f32"
6737
+ }
6738
+ }
6739
+ },
6740
+ "sana_linear_attention_apply": {
6741
+ "description": "Sana linear attention apply stage",
6742
+ "baseBindings": [
6743
+ {
6744
+ "index": 0,
6745
+ "name": "uniforms",
6746
+ "type": "uniform"
6747
+ },
6748
+ {
6749
+ "index": 1,
6750
+ "name": "query",
6751
+ "type": "read-only-storage"
6752
+ },
6753
+ {
6754
+ "index": 2,
6755
+ "name": "summary",
6756
+ "type": "read-only-storage"
6757
+ },
6758
+ {
6759
+ "index": 3,
6760
+ "name": "output",
6761
+ "type": "storage"
6762
+ }
6763
+ ],
6764
+ "baseUniforms": {
6765
+ "size": 32,
6766
+ "fields": [
6767
+ {
6768
+ "name": "num_heads",
6769
+ "type": "u32",
6770
+ "offset": 0
6771
+ },
6772
+ {
6773
+ "name": "head_dim",
6774
+ "type": "u32",
6775
+ "offset": 4
6776
+ },
6777
+ {
6778
+ "name": "num_tokens",
6779
+ "type": "u32",
6780
+ "offset": 8
6781
+ },
6782
+ {
6783
+ "name": "hidden_size",
6784
+ "type": "u32",
6785
+ "offset": 12
6786
+ },
6787
+ {
6788
+ "name": "eps",
6789
+ "type": "f32",
6790
+ "offset": 16
6791
+ },
6792
+ {
6793
+ "name": "_pad0",
6794
+ "type": "u32",
6795
+ "offset": 20
6796
+ },
6797
+ {
6798
+ "name": "_pad1",
6799
+ "type": "u32",
6800
+ "offset": 24
6801
+ },
6802
+ {
6803
+ "name": "_pad2",
6804
+ "type": "u32",
6805
+ "offset": 28
6806
+ }
6807
+ ]
6808
+ },
6809
+ "variants": {
6810
+ "default": {
6811
+ "wgsl": "sana_linear_attention_apply.wgsl",
6812
+ "entryPoint": "main",
6813
+ "workgroup": [
6814
+ 256,
6815
+ 1,
6816
+ 1
6817
+ ],
6818
+ "requires": [],
6819
+ "outputDtype": "f32"
6820
+ },
6821
+ "default_f16": {
6822
+ "wgsl": "sana_linear_attention_apply_f16.wgsl",
6823
+ "entryPoint": "main",
6824
+ "workgroup": [
6825
+ 256,
6826
+ 1,
6827
+ 1
6828
+ ],
6829
+ "requires": [
6830
+ "shader-f16"
6831
+ ],
6832
+ "outputDtype": "f16"
6833
+ }
6834
+ }
6835
+ },
6836
+ "repeat_channels": {
6837
+ "description": "Repeat channels along the NCHW channel axis",
6838
+ "baseBindings": [
6839
+ {
6840
+ "index": 0,
6841
+ "name": "uniforms",
6842
+ "type": "uniform"
6843
+ },
6844
+ {
6845
+ "index": 1,
6846
+ "name": "input",
6847
+ "type": "read-only-storage"
6848
+ },
6849
+ {
6850
+ "index": 2,
6851
+ "name": "output",
6852
+ "type": "storage"
6853
+ }
6854
+ ],
6855
+ "baseUniforms": {
6856
+ "size": 20,
6857
+ "fields": [
6858
+ {
6859
+ "name": "in_channels",
6860
+ "type": "u32",
6861
+ "offset": 0
6862
+ },
6863
+ {
6864
+ "name": "height",
6865
+ "type": "u32",
6866
+ "offset": 4
6867
+ },
6868
+ {
6869
+ "name": "width",
6870
+ "type": "u32",
6871
+ "offset": 8
6872
+ },
6873
+ {
6874
+ "name": "repeats",
6875
+ "type": "u32",
6876
+ "offset": 12
6877
+ },
6878
+ {
6879
+ "name": "_pad0",
6880
+ "type": "u32",
6881
+ "offset": 16
6882
+ }
6883
+ ]
6884
+ },
6885
+ "variants": {
6886
+ "default": {
6887
+ "wgsl": "repeat_channels.wgsl",
6888
+ "entryPoint": "main",
6889
+ "workgroup": [
6890
+ 256,
6891
+ 1,
6892
+ 1
6893
+ ],
6894
+ "requires": [],
6895
+ "outputDtype": "f32"
6896
+ },
6897
+ "default_f16": {
6898
+ "wgsl": "repeat_channels_f16.wgsl",
6899
+ "entryPoint": "main",
6900
+ "workgroup": [
6901
+ 256,
6902
+ 1,
6903
+ 1
6904
+ ],
6905
+ "requires": [
6906
+ "shader-f16"
6907
+ ],
6908
+ "outputDtype": "f16"
6909
+ }
6910
+ }
6911
+ },
6912
+ "relu": {
6913
+ "description": "ReLU activation",
6914
+ "baseBindings": [
6915
+ {
6916
+ "index": 0,
6917
+ "name": "uniforms",
6918
+ "type": "uniform"
6919
+ },
6920
+ {
6921
+ "index": 1,
6922
+ "name": "input",
6923
+ "type": "read-only-storage"
6924
+ },
6925
+ {
6926
+ "index": 2,
6927
+ "name": "output",
6928
+ "type": "storage"
6929
+ }
6930
+ ],
6931
+ "baseUniforms": {
6932
+ "size": 16,
6933
+ "fields": [
6934
+ {
6935
+ "name": "size",
6936
+ "type": "u32",
6937
+ "offset": 0
6938
+ },
6939
+ {
6940
+ "name": "_pad0",
6941
+ "type": "u32",
6942
+ "offset": 4
6943
+ },
6944
+ {
6945
+ "name": "_pad1",
6946
+ "type": "u32",
6947
+ "offset": 8
6948
+ },
6949
+ {
6950
+ "name": "_pad2",
6951
+ "type": "u32",
6952
+ "offset": 12
6953
+ }
6954
+ ]
6955
+ },
6956
+ "variants": {
6957
+ "default": {
6958
+ "wgsl": "relu.wgsl",
6959
+ "entryPoint": "main",
6960
+ "workgroup": [
6961
+ 256,
6962
+ 1,
6963
+ 1
6964
+ ],
6965
+ "requires": [],
6966
+ "outputDtype": "f32"
6967
+ },
6968
+ "default_f16": {
6969
+ "wgsl": "relu_f16.wgsl",
6970
+ "entryPoint": "main",
6971
+ "workgroup": [
6972
+ 256,
6973
+ 1,
6974
+ 1
6975
+ ],
6976
+ "requires": [
6977
+ "shader-f16"
6978
+ ],
6979
+ "outputDtype": "f16"
6980
+ }
6981
+ }
6982
+ },
6427
6983
  "conv2d_backward_input": {
6428
6984
  "description": "Conv2D backward (input gradient)",
6429
6985
  "baseBindings": [