@simulatte/doppler 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -5
- package/package.json +27 -4
- package/src/client/doppler-api.browser.d.ts +1 -0
- package/src/client/doppler-api.browser.js +288 -0
- package/src/client/doppler-api.d.ts +80 -0
- package/src/client/doppler-api.js +298 -0
- package/src/client/doppler-provider/types.js +1 -1
- package/src/client/doppler-registry.d.ts +23 -0
- package/src/client/doppler-registry.js +88 -0
- package/src/client/doppler-registry.json +39 -0
- package/src/config/execution-contract-check.d.ts +82 -0
- package/src/config/execution-contract-check.js +317 -0
- package/src/config/execution-v0-contract-check.d.ts +94 -0
- package/src/config/execution-v0-contract-check.js +251 -0
- package/src/config/execution-v0-graph-contract-check.d.ts +20 -0
- package/src/config/execution-v0-graph-contract-check.js +64 -0
- package/src/config/kernel-path-contract-check.d.ts +76 -0
- package/src/config/kernel-path-contract-check.js +479 -0
- package/src/config/kernel-path-loader.d.ts +16 -0
- package/src/config/kernel-path-loader.js +54 -0
- package/src/config/kernels/kernel-ref-digests.js +12 -0
- package/src/config/kernels/registry.json +556 -0
- package/src/config/loader.js +90 -67
- package/src/config/merge-contract-check.d.ts +16 -0
- package/src/config/merge-contract-check.js +321 -0
- package/src/config/merge-helpers.d.ts +58 -0
- package/src/config/merge-helpers.js +54 -0
- package/src/config/merge.js +3 -6
- package/src/config/presets/models/janus-text.json +27 -0
- package/src/config/quantization-contract-check.d.ts +12 -0
- package/src/config/quantization-contract-check.js +91 -0
- package/src/config/required-inference-fields-contract-check.d.ts +24 -0
- package/src/config/required-inference-fields-contract-check.js +231 -0
- package/src/config/schema/browser-suite-metrics.schema.d.ts +17 -0
- package/src/config/schema/browser-suite-metrics.schema.js +46 -0
- package/src/config/schema/conversion-report.schema.d.ts +40 -0
- package/src/config/schema/conversion-report.schema.js +108 -0
- package/src/config/schema/doppler.schema.js +12 -18
- package/src/config/schema/index.d.ts +22 -0
- package/src/config/schema/index.js +18 -0
- package/src/converter/core.d.ts +10 -0
- package/src/converter/core.js +49 -11
- package/src/converter/parsers/diffusion.js +63 -3
- package/src/converter/tokenizer-utils.js +17 -3
- package/src/formats/rdrr/validation.js +13 -0
- package/src/gpu/kernels/depthwise_conv2d.d.ts +29 -0
- package/src/gpu/kernels/depthwise_conv2d.js +98 -0
- package/src/gpu/kernels/depthwise_conv2d.wgsl +58 -0
- package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +62 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d.d.ts +27 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +92 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +47 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +51 -0
- package/src/gpu/kernels/index.d.ts +30 -0
- package/src/gpu/kernels/index.js +25 -0
- package/src/gpu/kernels/relu.d.ts +18 -0
- package/src/gpu/kernels/relu.js +45 -0
- package/src/gpu/kernels/relu.wgsl +21 -0
- package/src/gpu/kernels/relu_f16.wgsl +23 -0
- package/src/gpu/kernels/repeat_channels.d.ts +21 -0
- package/src/gpu/kernels/repeat_channels.js +60 -0
- package/src/gpu/kernels/repeat_channels.wgsl +29 -0
- package/src/gpu/kernels/repeat_channels_f16.wgsl +31 -0
- package/src/gpu/kernels/sana_linear_attention.d.ts +27 -0
- package/src/gpu/kernels/sana_linear_attention.js +122 -0
- package/src/gpu/kernels/sana_linear_attention_apply.wgsl +44 -0
- package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +47 -0
- package/src/gpu/kernels/sana_linear_attention_summary.wgsl +47 -0
- package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +49 -0
- package/src/index-browser.d.ts +1 -0
- package/src/index-browser.js +2 -1
- package/src/index.d.ts +1 -0
- package/src/index.js +2 -1
- package/src/inference/browser-harness.js +164 -38
- package/src/inference/pipelines/diffusion/init.js +14 -0
- package/src/inference/pipelines/diffusion/pipeline.js +206 -77
- package/src/inference/pipelines/diffusion/sana-transformer.d.ts +53 -0
- package/src/inference/pipelines/diffusion/sana-transformer.js +738 -0
- package/src/inference/pipelines/diffusion/scheduler.d.ts +17 -1
- package/src/inference/pipelines/diffusion/scheduler.js +91 -3
- package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +6 -4
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +270 -0
- package/src/inference/pipelines/diffusion/text-encoder.js +18 -1
- package/src/inference/pipelines/diffusion/types.d.ts +4 -0
- package/src/inference/pipelines/diffusion/vae.js +782 -78
- package/src/inference/pipelines/text/config.d.ts +5 -0
- package/src/inference/pipelines/text/config.js +1 -1
- package/src/inference/pipelines/text/execution-v0.js +141 -101
- package/src/inference/pipelines/text/init.js +41 -10
- package/src/inference/pipelines/text.js +7 -1
- package/src/rules/execution-rules-contract-check.d.ts +17 -0
- package/src/rules/execution-rules-contract-check.js +245 -0
- package/src/rules/kernels/depthwise-conv2d.rules.json +6 -0
- package/src/rules/kernels/grouped-pointwise-conv2d.rules.json +6 -0
- package/src/rules/kernels/relu.rules.json +6 -0
- package/src/rules/kernels/repeat-channels.rules.json +6 -0
- package/src/rules/kernels/sana-linear-attention.rules.json +6 -0
- package/src/rules/layer-pattern-contract-check.d.ts +17 -0
- package/src/rules/layer-pattern-contract-check.js +231 -0
- package/src/rules/rule-registry.d.ts +28 -0
- package/src/rules/rule-registry.js +38 -0
- package/src/tooling/conversion-config-materializer.d.ts +24 -0
- package/src/tooling/conversion-config-materializer.js +99 -0
- package/src/tooling/lean-execution-contract-runner.d.ts +43 -0
- package/src/tooling/lean-execution-contract-runner.js +158 -0
- package/src/tooling/lean-execution-contract.d.ts +16 -0
- package/src/tooling/lean-execution-contract.js +81 -0
- package/src/tooling/node-convert.d.ts +10 -0
- package/src/tooling/node-converter.js +59 -0
- package/src/tooling/node-webgpu.js +30 -9
- package/src/version.d.ts +2 -0
- package/src/version.js +2 -0
- package/tools/convert-safetensors-node.js +47 -0
- package/tools/doppler-cli.js +167 -6
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import { DEFAULT_ENTRY } from './schema/kernel-path.schema.js';
|
|
2
2
|
import { KERNEL_CONFIGS } from '../gpu/kernels/utils.js';
|
|
3
|
+
import { selectByRules } from '../gpu/kernels/rule-matcher.js';
|
|
3
4
|
import { loadJson } from '../utils/load-json.js';
|
|
5
|
+
import { buildKernelPathContractArtifact } from './kernel-path-contract-check.js';
|
|
4
6
|
|
|
5
7
|
// =============================================================================
|
|
6
8
|
// Built-in Kernel Paths (imported at build time)
|
|
@@ -77,6 +79,11 @@ const KERNEL_PATH_REGISTRY_INDEX = new Map(
|
|
|
77
79
|
);
|
|
78
80
|
|
|
79
81
|
const KERNEL_PATH_REGISTRY = Object.create(null);
|
|
82
|
+
const KERNEL_PATH_RULES = await loadJson(
|
|
83
|
+
'../rules/inference/kernel-path.rules.json',
|
|
84
|
+
import.meta.url,
|
|
85
|
+
'Failed to load kernel path rules'
|
|
86
|
+
);
|
|
80
87
|
|
|
81
88
|
const resolveKernelPathConfig = (id, chain = new Set()) => {
|
|
82
89
|
if (KERNEL_PATH_REGISTRY[id] !== undefined) {
|
|
@@ -117,6 +124,42 @@ for (const entry of KERNEL_PATH_REGISTRY_ENTRIES) {
|
|
|
117
124
|
resolveKernelPathConfig(entry.id);
|
|
118
125
|
}
|
|
119
126
|
|
|
127
|
+
const KERNEL_PATH_FINITENESS_FALLBACK_MAPPINGS = KERNEL_PATH_REGISTRY_ENTRIES
|
|
128
|
+
.map((entry) => {
|
|
129
|
+
const fallbackKernelPathId = selectByRules(
|
|
130
|
+
Array.isArray(KERNEL_PATH_RULES?.finitenessFallback) ? KERNEL_PATH_RULES.finitenessFallback : [],
|
|
131
|
+
{ kernelPathId: entry.id }
|
|
132
|
+
);
|
|
133
|
+
if (typeof fallbackKernelPathId !== 'string' || fallbackKernelPathId.length === 0) {
|
|
134
|
+
return null;
|
|
135
|
+
}
|
|
136
|
+
return {
|
|
137
|
+
primaryKernelPathId: entry.id,
|
|
138
|
+
fallbackKernelPathId,
|
|
139
|
+
primaryActivationDtype: KERNEL_PATH_REGISTRY[entry.id]?.activationDtype ?? null,
|
|
140
|
+
fallbackActivationDtype: KERNEL_PATH_REGISTRY[fallbackKernelPathId]?.activationDtype ?? null,
|
|
141
|
+
};
|
|
142
|
+
})
|
|
143
|
+
.filter(Boolean);
|
|
144
|
+
|
|
145
|
+
const KERNEL_PATH_CONTRACT_ARTIFACT = buildKernelPathContractArtifact(
|
|
146
|
+
{
|
|
147
|
+
registryId: 'builtin-kernel-paths',
|
|
148
|
+
entries: KERNEL_PATH_REGISTRY_ENTRIES,
|
|
149
|
+
fallbackMappings: KERNEL_PATH_FINITENESS_FALLBACK_MAPPINGS,
|
|
150
|
+
fallbackRules: Array.isArray(KERNEL_PATH_RULES?.finitenessFallback)
|
|
151
|
+
? KERNEL_PATH_RULES.finitenessFallback
|
|
152
|
+
: [],
|
|
153
|
+
autoSelectRules: Array.isArray(KERNEL_PATH_RULES?.autoSelect)
|
|
154
|
+
? KERNEL_PATH_RULES.autoSelect
|
|
155
|
+
: [],
|
|
156
|
+
}
|
|
157
|
+
);
|
|
158
|
+
|
|
159
|
+
if (!KERNEL_PATH_CONTRACT_ARTIFACT.ok) {
|
|
160
|
+
throw new Error(KERNEL_PATH_CONTRACT_ARTIFACT.errors[0]);
|
|
161
|
+
}
|
|
162
|
+
|
|
120
163
|
// =============================================================================
|
|
121
164
|
// Public API
|
|
122
165
|
// =============================================================================
|
|
@@ -129,6 +172,17 @@ export function listKernelPaths() {
|
|
|
129
172
|
return Object.keys(KERNEL_PATH_REGISTRY);
|
|
130
173
|
}
|
|
131
174
|
|
|
175
|
+
export function getKernelPathContractArtifact() {
|
|
176
|
+
return {
|
|
177
|
+
schemaVersion: KERNEL_PATH_CONTRACT_ARTIFACT.schemaVersion,
|
|
178
|
+
source: KERNEL_PATH_CONTRACT_ARTIFACT.source,
|
|
179
|
+
ok: KERNEL_PATH_CONTRACT_ARTIFACT.ok,
|
|
180
|
+
checks: KERNEL_PATH_CONTRACT_ARTIFACT.checks.map((entry) => ({ ...entry })),
|
|
181
|
+
errors: [...KERNEL_PATH_CONTRACT_ARTIFACT.errors],
|
|
182
|
+
stats: { ...KERNEL_PATH_CONTRACT_ARTIFACT.stats },
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
|
|
132
186
|
export function resolveKernelPath(ref) {
|
|
133
187
|
if (typeof ref === 'string') {
|
|
134
188
|
const path = getKernelPath(ref);
|
|
@@ -57,6 +57,8 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
|
|
|
57
57
|
"conv2d_f16.wgsl#main": "aa139e9f0270873acbc1c4b3cbacff4d224cae7247b520ec129a4f068eb6ed59",
|
|
58
58
|
"conv2d.wgsl#main": "484a676692d2b8097daeefe42e2296a1f8b3ef11abfd7b41df6cdcdf16b7a8fd",
|
|
59
59
|
"cross_entropy_loss.wgsl#main": "5a48087bdec94184432c90ce5b345e1eadbdfcb13b9793ecee8052bc7392239c",
|
|
60
|
+
"depthwise_conv2d_f16.wgsl#main": "d5d8d195b1449e39715340af4a0759da4b44b54f6a3cfbdfa6abe743b0f1d002",
|
|
61
|
+
"depthwise_conv2d.wgsl#main": "e5da160f505e18508619b78ba30f9bde0c84689a166df06cb59ef0e6591c6faf",
|
|
60
62
|
"dequant_f16_out_vec4.wgsl#main_vec4": "61c20e6c71c1c8421b4ec202dbd26292a6300587bd44c314f2a6c6d9d9442c3a",
|
|
61
63
|
"dequant_f16_out.wgsl#main": "94d61843d56f9a3bbc6b7c2b95dc6ecbba3f6a262b2c4086a076f69a8c38ccae",
|
|
62
64
|
"dequant_f16_rowwise.wgsl#main": "f5bf7cef950b52d65cee6121dbaa176244d3221045b3b6386b3be47f23ce17dc",
|
|
@@ -116,6 +118,8 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
|
|
|
116
118
|
"gelu.wgsl#main": "a9007ea08aaff98f9be08f1e0490a6bcf252883eac5513de876ab9ce918865e6",
|
|
117
119
|
"gptoss_mxfp4_expert_fused.wgsl#main_expert": "3159e8cd81da13f909cf905e6d35307fefe1dcbbdf1b2b8e8ff0ce923bd71180",
|
|
118
120
|
"gptoss_router_topk.wgsl#softmax_topk": "86e4ea709c0c0084d09c6a4cd07710dc14f380e03f91b8ed9ec871b310be49f1",
|
|
121
|
+
"grouped_pointwise_conv2d_f16.wgsl#main": "11bcaefc5929b2e3c1ba338ebea6a28d2cac26553be8b00f51bfddbabf513be7",
|
|
122
|
+
"grouped_pointwise_conv2d.wgsl#main": "c0d5cdec0743b4ee337a8df95bda442e617c1678e3d1b6e20ec692d500ede50d",
|
|
119
123
|
"groupnorm_apply_f16.wgsl#main": "cfd850b87944ac1c03ba7bd98136db556dadd8a70611e351d82d297299a7cd02",
|
|
120
124
|
"groupnorm_apply.wgsl#main": "b09b8f2f57dcdfa1a0366daa30d3910feb134204652c711d2ba564e566b5a334",
|
|
121
125
|
"groupnorm_stats_f16.wgsl#main": "fb76f78ce668ea8459110335698fe4b09a2425fc71deed3bab67efd7641c3199",
|
|
@@ -153,6 +157,10 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
|
|
|
153
157
|
"moe_offsets.wgsl#build_offsets": "3ea004145fa234659408cdeb0d4d802adff1037c9c5c03af146b3734cc69dd27",
|
|
154
158
|
"pixel_shuffle_f16.wgsl#main": "57903a9c19cecc56371b2198402745127115680d266c3ce609201be9119aa359",
|
|
155
159
|
"pixel_shuffle.wgsl#main": "845b88700b1b46d18cde6f2ec11bb89512c90d7e148763e74ce2a4173fd99b21",
|
|
160
|
+
"relu_f16.wgsl#main": "fc6134aabe43081b42ce8507d8f374092d0f2e03316aa42c25dd50229dc0ee40",
|
|
161
|
+
"relu.wgsl#main": "ca2c9bfa0acb9ece3b7e67de5209e00e553602b3917d23aca10338c1e6f01e27",
|
|
162
|
+
"repeat_channels_f16.wgsl#main": "e7e4d9164752e782d482db40256d0d86d96f784aa7debdb72faf3261b9bdd737",
|
|
163
|
+
"repeat_channels.wgsl#main": "ad0e34925c8c1173b9f0d92fa6e3808d039f82b3d9ad943b0a75b213ee1776e5",
|
|
156
164
|
"residual_f16_vec4.wgsl#add_vec4": "30e9226fb6636e2f01e65b1dc8e93c8e849a87acec6215342fc114996da1ed41",
|
|
157
165
|
"residual_f16.wgsl#main": "d392433f3065d1caf68b033219f4ffacf022dc1f90fc3cf3fd620e4ba49f3219",
|
|
158
166
|
"residual_vec4.wgsl#add_vec4": "ef011d1683e62887db712da563e783d12fdc80c152955661137d2dca612d7d6a",
|
|
@@ -186,6 +194,10 @@ export const KERNEL_REF_CONTENT_DIGESTS = Object.freeze({
|
|
|
186
194
|
"sample.wgsl#find_topk_phase2": "940b216e605d22096da5aca65950a8030866fc5a39e7fdf484d69a832de1b63a",
|
|
187
195
|
"sample.wgsl#sample_single_pass": "4412357e84113ee2f1bc0dc8bf89e314c2ab482c89c14ca016ea9949d16a9d0c",
|
|
188
196
|
"sample.wgsl#softmax_and_sample": "7172c60e76430fbe130e530e3564b569b45eccf193987b32d6f52bd6bbcc9f08",
|
|
197
|
+
"sana_linear_attention_apply_f16.wgsl#main": "e47366b94d40c4388e631b5bf93f8d61ef4e52cc65ffcd3b08d9d170616bb138",
|
|
198
|
+
"sana_linear_attention_apply.wgsl#main": "59cad7974c644fd910af776ad85a9a2c43c00492d4d1152fdc8373ecbb8bba18",
|
|
199
|
+
"sana_linear_attention_summary_f16.wgsl#main": "e3c040bb6469d37fc78eb22c1cc3e0456301607e461bbcdf5365a583c5d260d2",
|
|
200
|
+
"sana_linear_attention_summary.wgsl#main": "20c7ecdbcd1c73c0f9937c3cdac07b4b6edfe8618bf6f66281806343fd41b122",
|
|
189
201
|
"scale.wgsl#main": "44ec481452b586307957163e3d65c9d02561d3f2f3db633f906f5488b1ea1ca4",
|
|
190
202
|
"scale.wgsl#main_inplace": "020824c7118a59c461ce81f1c2cd01b7c2a3f1aab326392b7d48d4448a0c2ed1",
|
|
191
203
|
"scatter_add_dynamic_f16_weights.wgsl#scatter_add_dynamic": "42799e745bc445b199b1cbc384bc12bb9372ed1599af3260a803cefc8dd35497",
|
|
@@ -5537,6 +5537,228 @@
|
|
|
5537
5537
|
}
|
|
5538
5538
|
}
|
|
5539
5539
|
},
|
|
5540
|
+
"depthwise_conv2d": {
|
|
5541
|
+
"description": "Depthwise Conv2D (NCHW)",
|
|
5542
|
+
"baseBindings": [
|
|
5543
|
+
{
|
|
5544
|
+
"index": 0,
|
|
5545
|
+
"name": "uniforms",
|
|
5546
|
+
"type": "uniform"
|
|
5547
|
+
},
|
|
5548
|
+
{
|
|
5549
|
+
"index": 1,
|
|
5550
|
+
"name": "input",
|
|
5551
|
+
"type": "read-only-storage"
|
|
5552
|
+
},
|
|
5553
|
+
{
|
|
5554
|
+
"index": 2,
|
|
5555
|
+
"name": "weight",
|
|
5556
|
+
"type": "read-only-storage"
|
|
5557
|
+
},
|
|
5558
|
+
{
|
|
5559
|
+
"index": 3,
|
|
5560
|
+
"name": "bias",
|
|
5561
|
+
"type": "read-only-storage"
|
|
5562
|
+
},
|
|
5563
|
+
{
|
|
5564
|
+
"index": 4,
|
|
5565
|
+
"name": "output",
|
|
5566
|
+
"type": "storage"
|
|
5567
|
+
}
|
|
5568
|
+
],
|
|
5569
|
+
"baseUniforms": {
|
|
5570
|
+
"size": 48,
|
|
5571
|
+
"fields": [
|
|
5572
|
+
{
|
|
5573
|
+
"name": "channels",
|
|
5574
|
+
"type": "u32",
|
|
5575
|
+
"offset": 0
|
|
5576
|
+
},
|
|
5577
|
+
{
|
|
5578
|
+
"name": "height",
|
|
5579
|
+
"type": "u32",
|
|
5580
|
+
"offset": 4
|
|
5581
|
+
},
|
|
5582
|
+
{
|
|
5583
|
+
"name": "width",
|
|
5584
|
+
"type": "u32",
|
|
5585
|
+
"offset": 8
|
|
5586
|
+
},
|
|
5587
|
+
{
|
|
5588
|
+
"name": "out_height",
|
|
5589
|
+
"type": "u32",
|
|
5590
|
+
"offset": 12
|
|
5591
|
+
},
|
|
5592
|
+
{
|
|
5593
|
+
"name": "out_width",
|
|
5594
|
+
"type": "u32",
|
|
5595
|
+
"offset": 16
|
|
5596
|
+
},
|
|
5597
|
+
{
|
|
5598
|
+
"name": "kernel_h",
|
|
5599
|
+
"type": "u32",
|
|
5600
|
+
"offset": 20
|
|
5601
|
+
},
|
|
5602
|
+
{
|
|
5603
|
+
"name": "kernel_w",
|
|
5604
|
+
"type": "u32",
|
|
5605
|
+
"offset": 24
|
|
5606
|
+
},
|
|
5607
|
+
{
|
|
5608
|
+
"name": "stride",
|
|
5609
|
+
"type": "u32",
|
|
5610
|
+
"offset": 28
|
|
5611
|
+
},
|
|
5612
|
+
{
|
|
5613
|
+
"name": "pad",
|
|
5614
|
+
"type": "u32",
|
|
5615
|
+
"offset": 32
|
|
5616
|
+
},
|
|
5617
|
+
{
|
|
5618
|
+
"name": "_pad0",
|
|
5619
|
+
"type": "u32",
|
|
5620
|
+
"offset": 36
|
|
5621
|
+
},
|
|
5622
|
+
{
|
|
5623
|
+
"name": "_pad1",
|
|
5624
|
+
"type": "u32",
|
|
5625
|
+
"offset": 40
|
|
5626
|
+
},
|
|
5627
|
+
{
|
|
5628
|
+
"name": "_pad2",
|
|
5629
|
+
"type": "u32",
|
|
5630
|
+
"offset": 44
|
|
5631
|
+
}
|
|
5632
|
+
]
|
|
5633
|
+
},
|
|
5634
|
+
"variants": {
|
|
5635
|
+
"default": {
|
|
5636
|
+
"wgsl": "depthwise_conv2d.wgsl",
|
|
5637
|
+
"entryPoint": "main",
|
|
5638
|
+
"workgroup": [
|
|
5639
|
+
256,
|
|
5640
|
+
1,
|
|
5641
|
+
1
|
|
5642
|
+
],
|
|
5643
|
+
"requires": [],
|
|
5644
|
+
"outputDtype": "f32"
|
|
5645
|
+
},
|
|
5646
|
+
"default_f16": {
|
|
5647
|
+
"wgsl": "depthwise_conv2d_f16.wgsl",
|
|
5648
|
+
"entryPoint": "main",
|
|
5649
|
+
"workgroup": [
|
|
5650
|
+
256,
|
|
5651
|
+
1,
|
|
5652
|
+
1
|
|
5653
|
+
],
|
|
5654
|
+
"requires": [
|
|
5655
|
+
"shader-f16"
|
|
5656
|
+
],
|
|
5657
|
+
"outputDtype": "f16"
|
|
5658
|
+
}
|
|
5659
|
+
}
|
|
5660
|
+
},
|
|
5661
|
+
"grouped_pointwise_conv2d": {
|
|
5662
|
+
"description": "Grouped Pointwise Conv2D (NCHW)",
|
|
5663
|
+
"baseBindings": [
|
|
5664
|
+
{
|
|
5665
|
+
"index": 0,
|
|
5666
|
+
"name": "uniforms",
|
|
5667
|
+
"type": "uniform"
|
|
5668
|
+
},
|
|
5669
|
+
{
|
|
5670
|
+
"index": 1,
|
|
5671
|
+
"name": "input",
|
|
5672
|
+
"type": "read-only-storage"
|
|
5673
|
+
},
|
|
5674
|
+
{
|
|
5675
|
+
"index": 2,
|
|
5676
|
+
"name": "weight",
|
|
5677
|
+
"type": "read-only-storage"
|
|
5678
|
+
},
|
|
5679
|
+
{
|
|
5680
|
+
"index": 3,
|
|
5681
|
+
"name": "bias",
|
|
5682
|
+
"type": "read-only-storage"
|
|
5683
|
+
},
|
|
5684
|
+
{
|
|
5685
|
+
"index": 4,
|
|
5686
|
+
"name": "output",
|
|
5687
|
+
"type": "storage"
|
|
5688
|
+
}
|
|
5689
|
+
],
|
|
5690
|
+
"baseUniforms": {
|
|
5691
|
+
"size": 32,
|
|
5692
|
+
"fields": [
|
|
5693
|
+
{
|
|
5694
|
+
"name": "in_channels",
|
|
5695
|
+
"type": "u32",
|
|
5696
|
+
"offset": 0
|
|
5697
|
+
},
|
|
5698
|
+
{
|
|
5699
|
+
"name": "out_channels",
|
|
5700
|
+
"type": "u32",
|
|
5701
|
+
"offset": 4
|
|
5702
|
+
},
|
|
5703
|
+
{
|
|
5704
|
+
"name": "height",
|
|
5705
|
+
"type": "u32",
|
|
5706
|
+
"offset": 8
|
|
5707
|
+
},
|
|
5708
|
+
{
|
|
5709
|
+
"name": "width",
|
|
5710
|
+
"type": "u32",
|
|
5711
|
+
"offset": 12
|
|
5712
|
+
},
|
|
5713
|
+
{
|
|
5714
|
+
"name": "groups",
|
|
5715
|
+
"type": "u32",
|
|
5716
|
+
"offset": 16
|
|
5717
|
+
},
|
|
5718
|
+
{
|
|
5719
|
+
"name": "_pad0",
|
|
5720
|
+
"type": "u32",
|
|
5721
|
+
"offset": 20
|
|
5722
|
+
},
|
|
5723
|
+
{
|
|
5724
|
+
"name": "_pad1",
|
|
5725
|
+
"type": "u32",
|
|
5726
|
+
"offset": 24
|
|
5727
|
+
},
|
|
5728
|
+
{
|
|
5729
|
+
"name": "_pad2",
|
|
5730
|
+
"type": "u32",
|
|
5731
|
+
"offset": 28
|
|
5732
|
+
}
|
|
5733
|
+
]
|
|
5734
|
+
},
|
|
5735
|
+
"variants": {
|
|
5736
|
+
"default": {
|
|
5737
|
+
"wgsl": "grouped_pointwise_conv2d.wgsl",
|
|
5738
|
+
"entryPoint": "main",
|
|
5739
|
+
"workgroup": [
|
|
5740
|
+
256,
|
|
5741
|
+
1,
|
|
5742
|
+
1
|
|
5743
|
+
],
|
|
5744
|
+
"requires": [],
|
|
5745
|
+
"outputDtype": "f32"
|
|
5746
|
+
},
|
|
5747
|
+
"default_f16": {
|
|
5748
|
+
"wgsl": "grouped_pointwise_conv2d_f16.wgsl",
|
|
5749
|
+
"entryPoint": "main",
|
|
5750
|
+
"workgroup": [
|
|
5751
|
+
256,
|
|
5752
|
+
1,
|
|
5753
|
+
1
|
|
5754
|
+
],
|
|
5755
|
+
"requires": [
|
|
5756
|
+
"shader-f16"
|
|
5757
|
+
],
|
|
5758
|
+
"outputDtype": "f16"
|
|
5759
|
+
}
|
|
5760
|
+
}
|
|
5761
|
+
},
|
|
5540
5762
|
"groupnorm_stats": {
|
|
5541
5763
|
"description": "GroupNorm stats (mean/var) for each group",
|
|
5542
5764
|
"baseBindings": [
|
|
@@ -6424,6 +6646,340 @@
|
|
|
6424
6646
|
}
|
|
6425
6647
|
}
|
|
6426
6648
|
},
|
|
6649
|
+
"sana_linear_attention_summary": {
|
|
6650
|
+
"description": "Sana linear attention summary stage",
|
|
6651
|
+
"baseBindings": [
|
|
6652
|
+
{
|
|
6653
|
+
"index": 0,
|
|
6654
|
+
"name": "uniforms",
|
|
6655
|
+
"type": "uniform"
|
|
6656
|
+
},
|
|
6657
|
+
{
|
|
6658
|
+
"index": 1,
|
|
6659
|
+
"name": "query",
|
|
6660
|
+
"type": "read-only-storage"
|
|
6661
|
+
},
|
|
6662
|
+
{
|
|
6663
|
+
"index": 2,
|
|
6664
|
+
"name": "key",
|
|
6665
|
+
"type": "read-only-storage"
|
|
6666
|
+
},
|
|
6667
|
+
{
|
|
6668
|
+
"index": 3,
|
|
6669
|
+
"name": "value",
|
|
6670
|
+
"type": "read-only-storage"
|
|
6671
|
+
},
|
|
6672
|
+
{
|
|
6673
|
+
"index": 4,
|
|
6674
|
+
"name": "summary",
|
|
6675
|
+
"type": "storage"
|
|
6676
|
+
}
|
|
6677
|
+
],
|
|
6678
|
+
"baseUniforms": {
|
|
6679
|
+
"size": 24,
|
|
6680
|
+
"fields": [
|
|
6681
|
+
{
|
|
6682
|
+
"name": "num_heads",
|
|
6683
|
+
"type": "u32",
|
|
6684
|
+
"offset": 0
|
|
6685
|
+
},
|
|
6686
|
+
{
|
|
6687
|
+
"name": "head_dim",
|
|
6688
|
+
"type": "u32",
|
|
6689
|
+
"offset": 4
|
|
6690
|
+
},
|
|
6691
|
+
{
|
|
6692
|
+
"name": "num_tokens",
|
|
6693
|
+
"type": "u32",
|
|
6694
|
+
"offset": 8
|
|
6695
|
+
},
|
|
6696
|
+
{
|
|
6697
|
+
"name": "hidden_size",
|
|
6698
|
+
"type": "u32",
|
|
6699
|
+
"offset": 12
|
|
6700
|
+
},
|
|
6701
|
+
{
|
|
6702
|
+
"name": "_pad0",
|
|
6703
|
+
"type": "u32",
|
|
6704
|
+
"offset": 16
|
|
6705
|
+
},
|
|
6706
|
+
{
|
|
6707
|
+
"name": "_pad1",
|
|
6708
|
+
"type": "u32",
|
|
6709
|
+
"offset": 20
|
|
6710
|
+
}
|
|
6711
|
+
]
|
|
6712
|
+
},
|
|
6713
|
+
"variants": {
|
|
6714
|
+
"default": {
|
|
6715
|
+
"wgsl": "sana_linear_attention_summary.wgsl",
|
|
6716
|
+
"entryPoint": "main",
|
|
6717
|
+
"workgroup": [
|
|
6718
|
+
256,
|
|
6719
|
+
1,
|
|
6720
|
+
1
|
|
6721
|
+
],
|
|
6722
|
+
"requires": [],
|
|
6723
|
+
"outputDtype": "f32"
|
|
6724
|
+
},
|
|
6725
|
+
"default_f16": {
|
|
6726
|
+
"wgsl": "sana_linear_attention_summary_f16.wgsl",
|
|
6727
|
+
"entryPoint": "main",
|
|
6728
|
+
"workgroup": [
|
|
6729
|
+
256,
|
|
6730
|
+
1,
|
|
6731
|
+
1
|
|
6732
|
+
],
|
|
6733
|
+
"requires": [
|
|
6734
|
+
"shader-f16"
|
|
6735
|
+
],
|
|
6736
|
+
"outputDtype": "f32"
|
|
6737
|
+
}
|
|
6738
|
+
}
|
|
6739
|
+
},
|
|
6740
|
+
"sana_linear_attention_apply": {
|
|
6741
|
+
"description": "Sana linear attention apply stage",
|
|
6742
|
+
"baseBindings": [
|
|
6743
|
+
{
|
|
6744
|
+
"index": 0,
|
|
6745
|
+
"name": "uniforms",
|
|
6746
|
+
"type": "uniform"
|
|
6747
|
+
},
|
|
6748
|
+
{
|
|
6749
|
+
"index": 1,
|
|
6750
|
+
"name": "query",
|
|
6751
|
+
"type": "read-only-storage"
|
|
6752
|
+
},
|
|
6753
|
+
{
|
|
6754
|
+
"index": 2,
|
|
6755
|
+
"name": "summary",
|
|
6756
|
+
"type": "read-only-storage"
|
|
6757
|
+
},
|
|
6758
|
+
{
|
|
6759
|
+
"index": 3,
|
|
6760
|
+
"name": "output",
|
|
6761
|
+
"type": "storage"
|
|
6762
|
+
}
|
|
6763
|
+
],
|
|
6764
|
+
"baseUniforms": {
|
|
6765
|
+
"size": 32,
|
|
6766
|
+
"fields": [
|
|
6767
|
+
{
|
|
6768
|
+
"name": "num_heads",
|
|
6769
|
+
"type": "u32",
|
|
6770
|
+
"offset": 0
|
|
6771
|
+
},
|
|
6772
|
+
{
|
|
6773
|
+
"name": "head_dim",
|
|
6774
|
+
"type": "u32",
|
|
6775
|
+
"offset": 4
|
|
6776
|
+
},
|
|
6777
|
+
{
|
|
6778
|
+
"name": "num_tokens",
|
|
6779
|
+
"type": "u32",
|
|
6780
|
+
"offset": 8
|
|
6781
|
+
},
|
|
6782
|
+
{
|
|
6783
|
+
"name": "hidden_size",
|
|
6784
|
+
"type": "u32",
|
|
6785
|
+
"offset": 12
|
|
6786
|
+
},
|
|
6787
|
+
{
|
|
6788
|
+
"name": "eps",
|
|
6789
|
+
"type": "f32",
|
|
6790
|
+
"offset": 16
|
|
6791
|
+
},
|
|
6792
|
+
{
|
|
6793
|
+
"name": "_pad0",
|
|
6794
|
+
"type": "u32",
|
|
6795
|
+
"offset": 20
|
|
6796
|
+
},
|
|
6797
|
+
{
|
|
6798
|
+
"name": "_pad1",
|
|
6799
|
+
"type": "u32",
|
|
6800
|
+
"offset": 24
|
|
6801
|
+
},
|
|
6802
|
+
{
|
|
6803
|
+
"name": "_pad2",
|
|
6804
|
+
"type": "u32",
|
|
6805
|
+
"offset": 28
|
|
6806
|
+
}
|
|
6807
|
+
]
|
|
6808
|
+
},
|
|
6809
|
+
"variants": {
|
|
6810
|
+
"default": {
|
|
6811
|
+
"wgsl": "sana_linear_attention_apply.wgsl",
|
|
6812
|
+
"entryPoint": "main",
|
|
6813
|
+
"workgroup": [
|
|
6814
|
+
256,
|
|
6815
|
+
1,
|
|
6816
|
+
1
|
|
6817
|
+
],
|
|
6818
|
+
"requires": [],
|
|
6819
|
+
"outputDtype": "f32"
|
|
6820
|
+
},
|
|
6821
|
+
"default_f16": {
|
|
6822
|
+
"wgsl": "sana_linear_attention_apply_f16.wgsl",
|
|
6823
|
+
"entryPoint": "main",
|
|
6824
|
+
"workgroup": [
|
|
6825
|
+
256,
|
|
6826
|
+
1,
|
|
6827
|
+
1
|
|
6828
|
+
],
|
|
6829
|
+
"requires": [
|
|
6830
|
+
"shader-f16"
|
|
6831
|
+
],
|
|
6832
|
+
"outputDtype": "f16"
|
|
6833
|
+
}
|
|
6834
|
+
}
|
|
6835
|
+
},
|
|
6836
|
+
"repeat_channels": {
|
|
6837
|
+
"description": "Repeat channels along the NCHW channel axis",
|
|
6838
|
+
"baseBindings": [
|
|
6839
|
+
{
|
|
6840
|
+
"index": 0,
|
|
6841
|
+
"name": "uniforms",
|
|
6842
|
+
"type": "uniform"
|
|
6843
|
+
},
|
|
6844
|
+
{
|
|
6845
|
+
"index": 1,
|
|
6846
|
+
"name": "input",
|
|
6847
|
+
"type": "read-only-storage"
|
|
6848
|
+
},
|
|
6849
|
+
{
|
|
6850
|
+
"index": 2,
|
|
6851
|
+
"name": "output",
|
|
6852
|
+
"type": "storage"
|
|
6853
|
+
}
|
|
6854
|
+
],
|
|
6855
|
+
"baseUniforms": {
|
|
6856
|
+
"size": 20,
|
|
6857
|
+
"fields": [
|
|
6858
|
+
{
|
|
6859
|
+
"name": "in_channels",
|
|
6860
|
+
"type": "u32",
|
|
6861
|
+
"offset": 0
|
|
6862
|
+
},
|
|
6863
|
+
{
|
|
6864
|
+
"name": "height",
|
|
6865
|
+
"type": "u32",
|
|
6866
|
+
"offset": 4
|
|
6867
|
+
},
|
|
6868
|
+
{
|
|
6869
|
+
"name": "width",
|
|
6870
|
+
"type": "u32",
|
|
6871
|
+
"offset": 8
|
|
6872
|
+
},
|
|
6873
|
+
{
|
|
6874
|
+
"name": "repeats",
|
|
6875
|
+
"type": "u32",
|
|
6876
|
+
"offset": 12
|
|
6877
|
+
},
|
|
6878
|
+
{
|
|
6879
|
+
"name": "_pad0",
|
|
6880
|
+
"type": "u32",
|
|
6881
|
+
"offset": 16
|
|
6882
|
+
}
|
|
6883
|
+
]
|
|
6884
|
+
},
|
|
6885
|
+
"variants": {
|
|
6886
|
+
"default": {
|
|
6887
|
+
"wgsl": "repeat_channels.wgsl",
|
|
6888
|
+
"entryPoint": "main",
|
|
6889
|
+
"workgroup": [
|
|
6890
|
+
256,
|
|
6891
|
+
1,
|
|
6892
|
+
1
|
|
6893
|
+
],
|
|
6894
|
+
"requires": [],
|
|
6895
|
+
"outputDtype": "f32"
|
|
6896
|
+
},
|
|
6897
|
+
"default_f16": {
|
|
6898
|
+
"wgsl": "repeat_channels_f16.wgsl",
|
|
6899
|
+
"entryPoint": "main",
|
|
6900
|
+
"workgroup": [
|
|
6901
|
+
256,
|
|
6902
|
+
1,
|
|
6903
|
+
1
|
|
6904
|
+
],
|
|
6905
|
+
"requires": [
|
|
6906
|
+
"shader-f16"
|
|
6907
|
+
],
|
|
6908
|
+
"outputDtype": "f16"
|
|
6909
|
+
}
|
|
6910
|
+
}
|
|
6911
|
+
},
|
|
6912
|
+
"relu": {
|
|
6913
|
+
"description": "ReLU activation",
|
|
6914
|
+
"baseBindings": [
|
|
6915
|
+
{
|
|
6916
|
+
"index": 0,
|
|
6917
|
+
"name": "uniforms",
|
|
6918
|
+
"type": "uniform"
|
|
6919
|
+
},
|
|
6920
|
+
{
|
|
6921
|
+
"index": 1,
|
|
6922
|
+
"name": "input",
|
|
6923
|
+
"type": "read-only-storage"
|
|
6924
|
+
},
|
|
6925
|
+
{
|
|
6926
|
+
"index": 2,
|
|
6927
|
+
"name": "output",
|
|
6928
|
+
"type": "storage"
|
|
6929
|
+
}
|
|
6930
|
+
],
|
|
6931
|
+
"baseUniforms": {
|
|
6932
|
+
"size": 16,
|
|
6933
|
+
"fields": [
|
|
6934
|
+
{
|
|
6935
|
+
"name": "size",
|
|
6936
|
+
"type": "u32",
|
|
6937
|
+
"offset": 0
|
|
6938
|
+
},
|
|
6939
|
+
{
|
|
6940
|
+
"name": "_pad0",
|
|
6941
|
+
"type": "u32",
|
|
6942
|
+
"offset": 4
|
|
6943
|
+
},
|
|
6944
|
+
{
|
|
6945
|
+
"name": "_pad1",
|
|
6946
|
+
"type": "u32",
|
|
6947
|
+
"offset": 8
|
|
6948
|
+
},
|
|
6949
|
+
{
|
|
6950
|
+
"name": "_pad2",
|
|
6951
|
+
"type": "u32",
|
|
6952
|
+
"offset": 12
|
|
6953
|
+
}
|
|
6954
|
+
]
|
|
6955
|
+
},
|
|
6956
|
+
"variants": {
|
|
6957
|
+
"default": {
|
|
6958
|
+
"wgsl": "relu.wgsl",
|
|
6959
|
+
"entryPoint": "main",
|
|
6960
|
+
"workgroup": [
|
|
6961
|
+
256,
|
|
6962
|
+
1,
|
|
6963
|
+
1
|
|
6964
|
+
],
|
|
6965
|
+
"requires": [],
|
|
6966
|
+
"outputDtype": "f32"
|
|
6967
|
+
},
|
|
6968
|
+
"default_f16": {
|
|
6969
|
+
"wgsl": "relu_f16.wgsl",
|
|
6970
|
+
"entryPoint": "main",
|
|
6971
|
+
"workgroup": [
|
|
6972
|
+
256,
|
|
6973
|
+
1,
|
|
6974
|
+
1
|
|
6975
|
+
],
|
|
6976
|
+
"requires": [
|
|
6977
|
+
"shader-f16"
|
|
6978
|
+
],
|
|
6979
|
+
"outputDtype": "f16"
|
|
6980
|
+
}
|
|
6981
|
+
}
|
|
6982
|
+
},
|
|
6427
6983
|
"conv2d_backward_input": {
|
|
6428
6984
|
"description": "Conv2D backward (input gradient)",
|
|
6429
6985
|
"baseBindings": [
|