@simulatte/doppler 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/README.md +11 -5
  2. package/package.json +27 -4
  3. package/src/client/doppler-api.browser.d.ts +1 -0
  4. package/src/client/doppler-api.browser.js +288 -0
  5. package/src/client/doppler-api.d.ts +80 -0
  6. package/src/client/doppler-api.js +298 -0
  7. package/src/client/doppler-provider/types.js +1 -1
  8. package/src/client/doppler-registry.d.ts +23 -0
  9. package/src/client/doppler-registry.js +88 -0
  10. package/src/client/doppler-registry.json +39 -0
  11. package/src/config/execution-contract-check.d.ts +82 -0
  12. package/src/config/execution-contract-check.js +317 -0
  13. package/src/config/execution-v0-contract-check.d.ts +94 -0
  14. package/src/config/execution-v0-contract-check.js +251 -0
  15. package/src/config/execution-v0-graph-contract-check.d.ts +20 -0
  16. package/src/config/execution-v0-graph-contract-check.js +64 -0
  17. package/src/config/kernel-path-contract-check.d.ts +76 -0
  18. package/src/config/kernel-path-contract-check.js +479 -0
  19. package/src/config/kernel-path-loader.d.ts +16 -0
  20. package/src/config/kernel-path-loader.js +54 -0
  21. package/src/config/kernels/kernel-ref-digests.js +12 -0
  22. package/src/config/kernels/registry.json +556 -0
  23. package/src/config/loader.js +90 -67
  24. package/src/config/merge-contract-check.d.ts +16 -0
  25. package/src/config/merge-contract-check.js +321 -0
  26. package/src/config/merge-helpers.d.ts +58 -0
  27. package/src/config/merge-helpers.js +54 -0
  28. package/src/config/merge.js +3 -6
  29. package/src/config/presets/models/janus-text.json +27 -0
  30. package/src/config/quantization-contract-check.d.ts +12 -0
  31. package/src/config/quantization-contract-check.js +91 -0
  32. package/src/config/required-inference-fields-contract-check.d.ts +24 -0
  33. package/src/config/required-inference-fields-contract-check.js +231 -0
  34. package/src/config/schema/browser-suite-metrics.schema.d.ts +17 -0
  35. package/src/config/schema/browser-suite-metrics.schema.js +46 -0
  36. package/src/config/schema/conversion-report.schema.d.ts +40 -0
  37. package/src/config/schema/conversion-report.schema.js +108 -0
  38. package/src/config/schema/doppler.schema.js +12 -18
  39. package/src/config/schema/index.d.ts +22 -0
  40. package/src/config/schema/index.js +18 -0
  41. package/src/converter/core.d.ts +10 -0
  42. package/src/converter/core.js +49 -11
  43. package/src/converter/parsers/diffusion.js +63 -3
  44. package/src/converter/tokenizer-utils.js +17 -3
  45. package/src/formats/rdrr/validation.js +13 -0
  46. package/src/gpu/kernels/depthwise_conv2d.d.ts +29 -0
  47. package/src/gpu/kernels/depthwise_conv2d.js +98 -0
  48. package/src/gpu/kernels/depthwise_conv2d.wgsl +58 -0
  49. package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +62 -0
  50. package/src/gpu/kernels/grouped_pointwise_conv2d.d.ts +27 -0
  51. package/src/gpu/kernels/grouped_pointwise_conv2d.js +92 -0
  52. package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +47 -0
  53. package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +51 -0
  54. package/src/gpu/kernels/index.d.ts +30 -0
  55. package/src/gpu/kernels/index.js +25 -0
  56. package/src/gpu/kernels/relu.d.ts +18 -0
  57. package/src/gpu/kernels/relu.js +45 -0
  58. package/src/gpu/kernels/relu.wgsl +21 -0
  59. package/src/gpu/kernels/relu_f16.wgsl +23 -0
  60. package/src/gpu/kernels/repeat_channels.d.ts +21 -0
  61. package/src/gpu/kernels/repeat_channels.js +60 -0
  62. package/src/gpu/kernels/repeat_channels.wgsl +29 -0
  63. package/src/gpu/kernels/repeat_channels_f16.wgsl +31 -0
  64. package/src/gpu/kernels/sana_linear_attention.d.ts +27 -0
  65. package/src/gpu/kernels/sana_linear_attention.js +122 -0
  66. package/src/gpu/kernels/sana_linear_attention_apply.wgsl +44 -0
  67. package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +47 -0
  68. package/src/gpu/kernels/sana_linear_attention_summary.wgsl +47 -0
  69. package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +49 -0
  70. package/src/index-browser.d.ts +1 -0
  71. package/src/index-browser.js +2 -1
  72. package/src/index.d.ts +1 -0
  73. package/src/index.js +2 -1
  74. package/src/inference/browser-harness.js +164 -38
  75. package/src/inference/pipelines/diffusion/init.js +14 -0
  76. package/src/inference/pipelines/diffusion/pipeline.js +206 -77
  77. package/src/inference/pipelines/diffusion/sana-transformer.d.ts +53 -0
  78. package/src/inference/pipelines/diffusion/sana-transformer.js +738 -0
  79. package/src/inference/pipelines/diffusion/scheduler.d.ts +17 -1
  80. package/src/inference/pipelines/diffusion/scheduler.js +91 -3
  81. package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +6 -4
  82. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +270 -0
  83. package/src/inference/pipelines/diffusion/text-encoder.js +18 -1
  84. package/src/inference/pipelines/diffusion/types.d.ts +4 -0
  85. package/src/inference/pipelines/diffusion/vae.js +782 -78
  86. package/src/inference/pipelines/text/config.d.ts +5 -0
  87. package/src/inference/pipelines/text/config.js +1 -1
  88. package/src/inference/pipelines/text/execution-v0.js +141 -101
  89. package/src/inference/pipelines/text/init.js +41 -10
  90. package/src/inference/pipelines/text.js +7 -1
  91. package/src/rules/execution-rules-contract-check.d.ts +17 -0
  92. package/src/rules/execution-rules-contract-check.js +245 -0
  93. package/src/rules/kernels/depthwise-conv2d.rules.json +6 -0
  94. package/src/rules/kernels/grouped-pointwise-conv2d.rules.json +6 -0
  95. package/src/rules/kernels/relu.rules.json +6 -0
  96. package/src/rules/kernels/repeat-channels.rules.json +6 -0
  97. package/src/rules/kernels/sana-linear-attention.rules.json +6 -0
  98. package/src/rules/layer-pattern-contract-check.d.ts +17 -0
  99. package/src/rules/layer-pattern-contract-check.js +231 -0
  100. package/src/rules/rule-registry.d.ts +28 -0
  101. package/src/rules/rule-registry.js +38 -0
  102. package/src/tooling/conversion-config-materializer.d.ts +24 -0
  103. package/src/tooling/conversion-config-materializer.js +99 -0
  104. package/src/tooling/lean-execution-contract-runner.d.ts +43 -0
  105. package/src/tooling/lean-execution-contract-runner.js +158 -0
  106. package/src/tooling/lean-execution-contract.d.ts +16 -0
  107. package/src/tooling/lean-execution-contract.js +81 -0
  108. package/src/tooling/node-convert.d.ts +10 -0
  109. package/src/tooling/node-converter.js +59 -0
  110. package/src/tooling/node-webgpu.js +30 -9
  111. package/src/version.d.ts +2 -0
  112. package/src/version.js +2 -0
  113. package/tools/convert-safetensors-node.js +47 -0
  114. package/tools/doppler-cli.js +167 -6
@@ -0,0 +1,122 @@
1
+ import { getDevice } from '../device.js';
2
+ import { acquireBuffer, releaseBuffer } from '../../memory/buffer-pool.js';
3
+ import { createTensor, dtypeBytes } from '../tensor.js';
4
+ import { unifiedKernelWrapper } from './utils.js';
5
+ import { selectRuleValue } from './rule-registry.js';
6
+ import { WORKGROUP_SIZES } from './constants.js';
7
+
8
+ function selectSanaLinearAttentionVariant(isF16) {
9
+ return selectRuleValue('sanaLinearAttention', 'variant', { isF16 });
10
+ }
11
+
12
+ async function runSummary(target, query, key, value, summaryBuffer, uniforms, variant) {
13
+ const summarySize = uniforms.num_heads * (uniforms.head_dim + 1) * uniforms.head_dim;
14
+ await unifiedKernelWrapper(
15
+ 'sana_linear_attention_summary',
16
+ target,
17
+ variant,
18
+ [query, key, value, summaryBuffer],
19
+ {
20
+ num_heads: uniforms.num_heads,
21
+ head_dim: uniforms.head_dim,
22
+ num_tokens: uniforms.num_tokens,
23
+ hidden_size: uniforms.hidden_size,
24
+ _pad0: 0,
25
+ _pad1: 0,
26
+ },
27
+ Math.ceil(summarySize / WORKGROUP_SIZES.DEFAULT)
28
+ );
29
+ }
30
+
31
+ async function runApply(target, query, summaryBuffer, outputBuffer, uniforms, variant) {
32
+ const outputSize = uniforms.num_tokens * uniforms.hidden_size;
33
+ await unifiedKernelWrapper(
34
+ 'sana_linear_attention_apply',
35
+ target,
36
+ variant,
37
+ [query, summaryBuffer, outputBuffer],
38
+ {
39
+ num_heads: uniforms.num_heads,
40
+ head_dim: uniforms.head_dim,
41
+ num_tokens: uniforms.num_tokens,
42
+ hidden_size: uniforms.hidden_size,
43
+ eps: uniforms.eps,
44
+ _pad0: 0,
45
+ _pad1: 0,
46
+ _pad2: 0,
47
+ },
48
+ Math.ceil(outputSize / WORKGROUP_SIZES.DEFAULT)
49
+ );
50
+ }
51
+
52
+ async function _sanaLinearAttention(target, query, key, value, options = {}) {
53
+ const recorder = target && typeof target.beginComputePass === 'function' ? target : null;
54
+ const device = target?.device || getDevice();
55
+ if (!device) {
56
+ throw new Error('SanaLinearAttention requires a WebGPU device.');
57
+ }
58
+
59
+ const {
60
+ numHeads,
61
+ headDim,
62
+ numTokens = query.shape?.[0],
63
+ hiddenSize = query.shape?.[1],
64
+ eps = 1e-15,
65
+ outputBuffer = null,
66
+ summaryBuffer = null,
67
+ } = options;
68
+
69
+ if (
70
+ !Number.isFinite(numHeads) ||
71
+ !Number.isFinite(headDim) ||
72
+ !Number.isFinite(numTokens) ||
73
+ !Number.isFinite(hiddenSize)
74
+ ) {
75
+ throw new Error('SanaLinearAttention requires numHeads, headDim, numTokens, and hiddenSize.');
76
+ }
77
+ if (hiddenSize !== numHeads * headDim) {
78
+ throw new Error(`SanaLinearAttention hiddenSize mismatch: ${hiddenSize} != ${numHeads} * ${headDim}`);
79
+ }
80
+
81
+ const isF16 = query.dtype === 'f16';
82
+ const variant = selectSanaLinearAttentionVariant(isF16);
83
+ const temporarySummary = summaryBuffer || acquireBuffer(
84
+ numHeads * (headDim + 1) * headDim * Float32Array.BYTES_PER_ELEMENT,
85
+ undefined,
86
+ 'sana_linear_attention_summary'
87
+ );
88
+ const output = outputBuffer || acquireBuffer(
89
+ numTokens * hiddenSize * dtypeBytes(query.dtype),
90
+ undefined,
91
+ 'sana_linear_attention_output'
92
+ );
93
+
94
+ const uniforms = {
95
+ num_heads: numHeads,
96
+ head_dim: headDim,
97
+ num_tokens: numTokens,
98
+ hidden_size: hiddenSize,
99
+ eps,
100
+ };
101
+
102
+ await runSummary(target, query, key, value, temporarySummary, uniforms, variant);
103
+ await runApply(target, query, temporarySummary, output, uniforms, variant);
104
+
105
+ if (!summaryBuffer) {
106
+ if (recorder) {
107
+ recorder.trackTemporaryBuffer(temporarySummary);
108
+ } else {
109
+ releaseBuffer(temporarySummary);
110
+ }
111
+ }
112
+
113
+ return createTensor(output, query.dtype, [numTokens, hiddenSize], 'sana_linear_attention_output');
114
+ }
115
+
116
+ export async function runSanaLinearAttention(query, key, value, options = {}) {
117
+ return _sanaLinearAttention(null, query, key, value, options);
118
+ }
119
+
120
+ export async function recordSanaLinearAttention(recorder, query, key, value, options = {}) {
121
+ return _sanaLinearAttention(recorder, query, key, value, options);
122
+ }
@@ -0,0 +1,44 @@
1
+ override WORKGROUP_SIZE: u32 = 256u;
2
+
3
+ struct Uniforms {
4
+ num_heads: u32,
5
+ head_dim: u32,
6
+ num_tokens: u32,
7
+ hidden_size: u32,
8
+ eps: f32,
9
+ _pad0: u32,
10
+ _pad1: u32,
11
+ _pad2: u32,
12
+ }
13
+
14
+ @group(0) @binding(0) var<uniform> u: Uniforms;
15
+ @group(0) @binding(1) var<storage, read> query: array<f32>;
16
+ @group(0) @binding(2) var<storage, read> summary: array<f32>;
17
+ @group(0) @binding(3) var<storage, read_write> output: array<f32>;
18
+
19
+ @compute @workgroup_size(WORKGROUP_SIZE, 1, 1)
20
+ fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
21
+ let idx = gid.x;
22
+ let total = u.num_tokens * u.hidden_size;
23
+ if (idx >= total) {
24
+ return;
25
+ }
26
+
27
+ let token = idx / u.hidden_size;
28
+ let hidden = idx - token * u.hidden_size;
29
+ let head = hidden / u.head_dim;
30
+ let dim = hidden - head * u.head_dim;
31
+ let rows_per_head = u.head_dim + 1u;
32
+ let head_offset = head * rows_per_head * u.head_dim;
33
+ let hidden_base = head * u.head_dim;
34
+
35
+ var numerator: f32 = 0.0;
36
+ var denominator: f32 = 0.0;
37
+ for (var i: u32 = 0u; i < u.head_dim; i = i + 1u) {
38
+ let q_value = max(query[token * u.hidden_size + hidden_base + i], 0.0);
39
+ numerator = numerator + summary[head_offset + dim * u.head_dim + i] * q_value;
40
+ denominator = denominator + summary[head_offset + u.head_dim * u.head_dim + i] * q_value;
41
+ }
42
+
43
+ output[idx] = numerator / (denominator + u.eps);
44
+ }
@@ -0,0 +1,47 @@
1
+ enable f16;
2
+
3
+ override WORKGROUP_SIZE: u32 = 256u;
4
+
5
+ struct Uniforms {
6
+ num_heads: u32,
7
+ head_dim: u32,
8
+ num_tokens: u32,
9
+ hidden_size: u32,
10
+ eps: f32,
11
+ _pad0: u32,
12
+ _pad1: u32,
13
+ _pad2: u32,
14
+ }
15
+
16
+ @group(0) @binding(0) var<uniform> u: Uniforms;
17
+ @group(0) @binding(1) var<storage, read> query: array<f16>;
18
+ @group(0) @binding(2) var<storage, read> summary: array<f32>;
19
+ @group(0) @binding(3) var<storage, read_write> output: array<f16>;
20
+
21
+ @compute @workgroup_size(WORKGROUP_SIZE, 1, 1)
22
+ fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
23
+ let idx = gid.x;
24
+ let total = u.num_tokens * u.hidden_size;
25
+ if (idx >= total) {
26
+ return;
27
+ }
28
+
29
+ let token = idx / u.hidden_size;
30
+ let hidden = idx - token * u.hidden_size;
31
+ let head = hidden / u.head_dim;
32
+ let dim = hidden - head * u.head_dim;
33
+ let rows_per_head = u.head_dim + 1u;
34
+ let head_offset = head * rows_per_head * u.head_dim;
35
+ let hidden_base = head * u.head_dim;
36
+
37
+ var numerator: f32 = 0.0;
38
+ var denominator: f32 = 0.0;
39
+ for (var i: u32 = 0u; i < u.head_dim; i = i + 1u) {
40
+ let q_value = max(f32(query[token * u.hidden_size + hidden_base + i]), 0.0);
41
+ numerator = numerator + summary[head_offset + dim * u.head_dim + i] * q_value;
42
+ denominator = denominator + summary[head_offset + u.head_dim * u.head_dim + i] * q_value;
43
+ }
44
+
45
+ let result = numerator / (denominator + u.eps);
46
+ output[idx] = f16(clamp(result, -65504.0, 65504.0));
47
+ }
@@ -0,0 +1,47 @@
1
+ override WORKGROUP_SIZE: u32 = 256u;
2
+
3
+ struct Uniforms {
4
+ num_heads: u32,
5
+ head_dim: u32,
6
+ num_tokens: u32,
7
+ hidden_size: u32,
8
+ _pad0: u32,
9
+ _pad1: u32,
10
+ }
11
+
12
+ @group(0) @binding(0) var<uniform> u: Uniforms;
13
+ @group(0) @binding(1) var<storage, read> query: array<f32>;
14
+ @group(0) @binding(2) var<storage, read> key: array<f32>;
15
+ @group(0) @binding(3) var<storage, read> value: array<f32>;
16
+ @group(0) @binding(4) var<storage, read_write> summary: array<f32>;
17
+
18
+ @compute @workgroup_size(WORKGROUP_SIZE, 1, 1)
19
+ fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
20
+ let idx = gid.x;
21
+ let rows_per_head = u.head_dim + 1u;
22
+ let head_span = rows_per_head * u.head_dim;
23
+ let total = u.num_heads * head_span;
24
+ if (idx >= total) {
25
+ return;
26
+ }
27
+
28
+ let head = idx / head_span;
29
+ let rem = idx - head * head_span;
30
+ let row = rem / u.head_dim;
31
+ let col = rem - row * u.head_dim;
32
+ let hidden_base = head * u.head_dim;
33
+
34
+ var acc: f32 = 0.0;
35
+ for (var token: u32 = 0u; token < u.num_tokens; token = token + 1u) {
36
+ let key_idx = token * u.hidden_size + hidden_base + col;
37
+ let key_value = max(key[key_idx], 0.0);
38
+ let value_value = select(
39
+ value[token * u.hidden_size + hidden_base + row],
40
+ 1.0,
41
+ row == u.head_dim
42
+ );
43
+ acc = acc + value_value * key_value;
44
+ }
45
+
46
+ summary[idx] = acc;
47
+ }
@@ -0,0 +1,49 @@
1
+ enable f16;
2
+
3
+ override WORKGROUP_SIZE: u32 = 256u;
4
+
5
+ struct Uniforms {
6
+ num_heads: u32,
7
+ head_dim: u32,
8
+ num_tokens: u32,
9
+ hidden_size: u32,
10
+ _pad0: u32,
11
+ _pad1: u32,
12
+ }
13
+
14
+ @group(0) @binding(0) var<uniform> u: Uniforms;
15
+ @group(0) @binding(1) var<storage, read> query: array<f16>;
16
+ @group(0) @binding(2) var<storage, read> key: array<f16>;
17
+ @group(0) @binding(3) var<storage, read> value: array<f16>;
18
+ @group(0) @binding(4) var<storage, read_write> summary: array<f32>;
19
+
20
+ @compute @workgroup_size(WORKGROUP_SIZE, 1, 1)
21
+ fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
22
+ let idx = gid.x;
23
+ let rows_per_head = u.head_dim + 1u;
24
+ let head_span = rows_per_head * u.head_dim;
25
+ let total = u.num_heads * head_span;
26
+ if (idx >= total) {
27
+ return;
28
+ }
29
+
30
+ let head = idx / head_span;
31
+ let rem = idx - head * head_span;
32
+ let row = rem / u.head_dim;
33
+ let col = rem - row * u.head_dim;
34
+ let hidden_base = head * u.head_dim;
35
+
36
+ var acc: f32 = 0.0;
37
+ for (var token: u32 = 0u; token < u.num_tokens; token = token + 1u) {
38
+ let key_idx = token * u.hidden_size + hidden_base + col;
39
+ let key_value = max(f32(key[key_idx]), 0.0);
40
+ let value_value = select(
41
+ f32(value[token * u.hidden_size + hidden_base + row]),
42
+ 1.0,
43
+ row == u.head_dim
44
+ );
45
+ acc = acc + value_value * key_value;
46
+ }
47
+
48
+ summary[idx] = acc;
49
+ }
@@ -1,4 +1,5 @@
1
1
  export declare const DOPPLER_VERSION: string;
2
+ export { doppler } from './client/doppler-api.browser.js';
2
3
 
3
4
  export {
4
5
  DopplerLoader,
@@ -1,4 +1,5 @@
1
- export const DOPPLER_VERSION = '0.1.0';
1
+ export { DOPPLER_VERSION } from './version.js';
2
+ export { doppler } from './client/doppler-api.browser.js';
2
3
 
3
4
  // Core loaders
4
5
  export {
package/src/index.d.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  export declare const DOPPLER_VERSION: string;
2
+ export { doppler } from './client/doppler-api.js';
2
3
 
3
4
  // Core loaders
4
5
  export {
package/src/index.js CHANGED
@@ -1,4 +1,5 @@
1
- export const DOPPLER_VERSION = '0.1.0';
1
+ export { DOPPLER_VERSION } from './version.js';
2
+ export { doppler } from './client/doppler-api.js';
2
3
 
3
4
  // Core loaders
4
5
  export {