@simulatte/doppler 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -5
- package/package.json +27 -4
- package/src/client/doppler-api.browser.d.ts +1 -0
- package/src/client/doppler-api.browser.js +288 -0
- package/src/client/doppler-api.d.ts +80 -0
- package/src/client/doppler-api.js +298 -0
- package/src/client/doppler-provider/types.js +1 -1
- package/src/client/doppler-registry.d.ts +23 -0
- package/src/client/doppler-registry.js +88 -0
- package/src/client/doppler-registry.json +39 -0
- package/src/config/execution-contract-check.d.ts +82 -0
- package/src/config/execution-contract-check.js +317 -0
- package/src/config/execution-v0-contract-check.d.ts +94 -0
- package/src/config/execution-v0-contract-check.js +251 -0
- package/src/config/execution-v0-graph-contract-check.d.ts +20 -0
- package/src/config/execution-v0-graph-contract-check.js +64 -0
- package/src/config/kernel-path-contract-check.d.ts +76 -0
- package/src/config/kernel-path-contract-check.js +479 -0
- package/src/config/kernel-path-loader.d.ts +16 -0
- package/src/config/kernel-path-loader.js +54 -0
- package/src/config/kernels/kernel-ref-digests.js +12 -0
- package/src/config/kernels/registry.json +556 -0
- package/src/config/loader.js +90 -67
- package/src/config/merge-contract-check.d.ts +16 -0
- package/src/config/merge-contract-check.js +321 -0
- package/src/config/merge-helpers.d.ts +58 -0
- package/src/config/merge-helpers.js +54 -0
- package/src/config/merge.js +3 -6
- package/src/config/presets/models/janus-text.json +27 -0
- package/src/config/quantization-contract-check.d.ts +12 -0
- package/src/config/quantization-contract-check.js +91 -0
- package/src/config/required-inference-fields-contract-check.d.ts +24 -0
- package/src/config/required-inference-fields-contract-check.js +231 -0
- package/src/config/schema/browser-suite-metrics.schema.d.ts +17 -0
- package/src/config/schema/browser-suite-metrics.schema.js +46 -0
- package/src/config/schema/conversion-report.schema.d.ts +40 -0
- package/src/config/schema/conversion-report.schema.js +108 -0
- package/src/config/schema/doppler.schema.js +12 -18
- package/src/config/schema/index.d.ts +22 -0
- package/src/config/schema/index.js +18 -0
- package/src/converter/core.d.ts +10 -0
- package/src/converter/core.js +49 -11
- package/src/converter/parsers/diffusion.js +63 -3
- package/src/converter/tokenizer-utils.js +17 -3
- package/src/formats/rdrr/validation.js +13 -0
- package/src/gpu/kernels/depthwise_conv2d.d.ts +29 -0
- package/src/gpu/kernels/depthwise_conv2d.js +98 -0
- package/src/gpu/kernels/depthwise_conv2d.wgsl +58 -0
- package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +62 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d.d.ts +27 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +92 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +47 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +51 -0
- package/src/gpu/kernels/index.d.ts +30 -0
- package/src/gpu/kernels/index.js +25 -0
- package/src/gpu/kernels/relu.d.ts +18 -0
- package/src/gpu/kernels/relu.js +45 -0
- package/src/gpu/kernels/relu.wgsl +21 -0
- package/src/gpu/kernels/relu_f16.wgsl +23 -0
- package/src/gpu/kernels/repeat_channels.d.ts +21 -0
- package/src/gpu/kernels/repeat_channels.js +60 -0
- package/src/gpu/kernels/repeat_channels.wgsl +29 -0
- package/src/gpu/kernels/repeat_channels_f16.wgsl +31 -0
- package/src/gpu/kernels/sana_linear_attention.d.ts +27 -0
- package/src/gpu/kernels/sana_linear_attention.js +122 -0
- package/src/gpu/kernels/sana_linear_attention_apply.wgsl +44 -0
- package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +47 -0
- package/src/gpu/kernels/sana_linear_attention_summary.wgsl +47 -0
- package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +49 -0
- package/src/index-browser.d.ts +1 -0
- package/src/index-browser.js +2 -1
- package/src/index.d.ts +1 -0
- package/src/index.js +2 -1
- package/src/inference/browser-harness.js +164 -38
- package/src/inference/pipelines/diffusion/init.js +14 -0
- package/src/inference/pipelines/diffusion/pipeline.js +206 -77
- package/src/inference/pipelines/diffusion/sana-transformer.d.ts +53 -0
- package/src/inference/pipelines/diffusion/sana-transformer.js +738 -0
- package/src/inference/pipelines/diffusion/scheduler.d.ts +17 -1
- package/src/inference/pipelines/diffusion/scheduler.js +91 -3
- package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +6 -4
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +270 -0
- package/src/inference/pipelines/diffusion/text-encoder.js +18 -1
- package/src/inference/pipelines/diffusion/types.d.ts +4 -0
- package/src/inference/pipelines/diffusion/vae.js +782 -78
- package/src/inference/pipelines/text/config.d.ts +5 -0
- package/src/inference/pipelines/text/config.js +1 -1
- package/src/inference/pipelines/text/execution-v0.js +141 -101
- package/src/inference/pipelines/text/init.js +41 -10
- package/src/inference/pipelines/text.js +7 -1
- package/src/rules/execution-rules-contract-check.d.ts +17 -0
- package/src/rules/execution-rules-contract-check.js +245 -0
- package/src/rules/kernels/depthwise-conv2d.rules.json +6 -0
- package/src/rules/kernels/grouped-pointwise-conv2d.rules.json +6 -0
- package/src/rules/kernels/relu.rules.json +6 -0
- package/src/rules/kernels/repeat-channels.rules.json +6 -0
- package/src/rules/kernels/sana-linear-attention.rules.json +6 -0
- package/src/rules/layer-pattern-contract-check.d.ts +17 -0
- package/src/rules/layer-pattern-contract-check.js +231 -0
- package/src/rules/rule-registry.d.ts +28 -0
- package/src/rules/rule-registry.js +38 -0
- package/src/tooling/conversion-config-materializer.d.ts +24 -0
- package/src/tooling/conversion-config-materializer.js +99 -0
- package/src/tooling/lean-execution-contract-runner.d.ts +43 -0
- package/src/tooling/lean-execution-contract-runner.js +158 -0
- package/src/tooling/lean-execution-contract.d.ts +16 -0
- package/src/tooling/lean-execution-contract.js +81 -0
- package/src/tooling/node-convert.d.ts +10 -0
- package/src/tooling/node-converter.js +59 -0
- package/src/tooling/node-webgpu.js +30 -9
- package/src/version.d.ts +2 -0
- package/src/version.js +2 -0
- package/tools/convert-safetensors-node.js +47 -0
- package/tools/doppler-cli.js +167 -6
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
import { selectByRules } from '../gpu/kernels/rule-matcher.js';
|
|
2
|
+
|
|
3
|
+
function isPlainObject(value) {
|
|
4
|
+
return value != null && typeof value === 'object' && !Array.isArray(value);
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
function matchesExactObject(actual, expected) {
|
|
8
|
+
if (!isPlainObject(actual) || !isPlainObject(expected)) {
|
|
9
|
+
return false;
|
|
10
|
+
}
|
|
11
|
+
const actualKeys = Object.keys(actual).sort();
|
|
12
|
+
const expectedKeys = Object.keys(expected).sort();
|
|
13
|
+
if (actualKeys.length !== expectedKeys.length) {
|
|
14
|
+
return false;
|
|
15
|
+
}
|
|
16
|
+
for (let i = 0; i < actualKeys.length; i += 1) {
|
|
17
|
+
if (actualKeys[i] !== expectedKeys[i]) {
|
|
18
|
+
return false;
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
for (const key of expectedKeys) {
|
|
22
|
+
const expectedValue = expected[key];
|
|
23
|
+
const actualValue = actual[key];
|
|
24
|
+
if (isPlainObject(expectedValue)) {
|
|
25
|
+
if (!matchesExactObject(actualValue, expectedValue)) {
|
|
26
|
+
return false;
|
|
27
|
+
}
|
|
28
|
+
continue;
|
|
29
|
+
}
|
|
30
|
+
if (Array.isArray(expectedValue)) {
|
|
31
|
+
if (!Array.isArray(actualValue) || actualValue.length !== expectedValue.length) {
|
|
32
|
+
return false;
|
|
33
|
+
}
|
|
34
|
+
for (let i = 0; i < expectedValue.length; i += 1) {
|
|
35
|
+
if (actualValue[i] !== expectedValue[i]) {
|
|
36
|
+
return false;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
if (actualValue !== expectedValue) {
|
|
42
|
+
return false;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
return true;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function decodeRecorderSemantic(context) {
|
|
49
|
+
return context.hasDevice === true
|
|
50
|
+
&& context.debug !== true
|
|
51
|
+
&& context.disableCommandBatching !== true
|
|
52
|
+
&& context.kvLayout !== 'bdpa_paged';
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function batchDecodeSemantic(context) {
|
|
56
|
+
return context.batchSize > 1
|
|
57
|
+
&& context.useGPU === true
|
|
58
|
+
&& context.gpuSamplingAvailable === true
|
|
59
|
+
&& context.disableMultiTokenDecode !== true
|
|
60
|
+
&& context.disableCommandBatching !== true
|
|
61
|
+
&& context.isBdpaPagedLayout !== true
|
|
62
|
+
&& context.finitenessFallbackWindowOpen !== true;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function enumerateDecodeRecorderContexts() {
|
|
66
|
+
const values = [true, false];
|
|
67
|
+
const kvLayouts = ['bdpa_paged', 'paged', null];
|
|
68
|
+
const contexts = [];
|
|
69
|
+
for (const hasDevice of values) {
|
|
70
|
+
for (const debug of values) {
|
|
71
|
+
for (const disableCommandBatching of values) {
|
|
72
|
+
for (const kvLayout of kvLayouts) {
|
|
73
|
+
contexts.push({
|
|
74
|
+
hasDevice,
|
|
75
|
+
debug,
|
|
76
|
+
disableCommandBatching,
|
|
77
|
+
kvLayout,
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
return contexts;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function enumerateBatchDecodeContexts() {
|
|
87
|
+
const values = [true, false];
|
|
88
|
+
const batchSizes = [1, 2];
|
|
89
|
+
const contexts = [];
|
|
90
|
+
for (const batchSize of batchSizes) {
|
|
91
|
+
for (const useGPU of values) {
|
|
92
|
+
for (const gpuSamplingAvailable of values) {
|
|
93
|
+
for (const disableMultiTokenDecode of values) {
|
|
94
|
+
for (const disableCommandBatching of values) {
|
|
95
|
+
for (const isBdpaPagedLayout of values) {
|
|
96
|
+
for (const finitenessFallbackWindowOpen of values) {
|
|
97
|
+
contexts.push({
|
|
98
|
+
batchSize,
|
|
99
|
+
useGPU,
|
|
100
|
+
gpuSamplingAvailable,
|
|
101
|
+
disableMultiTokenDecode,
|
|
102
|
+
disableCommandBatching,
|
|
103
|
+
isBdpaPagedLayout,
|
|
104
|
+
finitenessFallbackWindowOpen,
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
return contexts;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function checkRuleShape(rules, expectedFirstMatch, label) {
|
|
117
|
+
if (!Array.isArray(rules)) {
|
|
118
|
+
return {
|
|
119
|
+
ok: false,
|
|
120
|
+
errors: [`[ExecutionRulesContract] ${label} must be an array.`],
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
if (rules.length !== 2) {
|
|
124
|
+
return {
|
|
125
|
+
ok: false,
|
|
126
|
+
errors: [`[ExecutionRulesContract] ${label} must contain exactly 2 rules; got ${rules.length}.`],
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
const [firstRule, secondRule] = rules;
|
|
130
|
+
const errors = [];
|
|
131
|
+
if (!matchesExactObject(firstRule?.match, expectedFirstMatch) || firstRule?.value !== true) {
|
|
132
|
+
errors.push(`[ExecutionRulesContract] ${label} first rule drifted from the expected enabling predicate.`);
|
|
133
|
+
}
|
|
134
|
+
if (!matchesExactObject(secondRule?.match, {}) || secondRule?.value !== false) {
|
|
135
|
+
errors.push(`[ExecutionRulesContract] ${label} fallback rule must be { match: {}, value: false }.`);
|
|
136
|
+
}
|
|
137
|
+
return {
|
|
138
|
+
ok: errors.length === 0,
|
|
139
|
+
errors,
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
function checkRuleSemantics(rules, contexts, expectedValue, label) {
|
|
144
|
+
const errors = [];
|
|
145
|
+
for (const context of contexts) {
|
|
146
|
+
const actual = selectByRules(rules, context);
|
|
147
|
+
const expected = expectedValue(context);
|
|
148
|
+
if (actual !== expected) {
|
|
149
|
+
errors.push(
|
|
150
|
+
`[ExecutionRulesContract] ${label} mismatched context ${JSON.stringify(context)}: ` +
|
|
151
|
+
`expected ${JSON.stringify(expected)}, got ${JSON.stringify(actual)}.`
|
|
152
|
+
);
|
|
153
|
+
break;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
return {
|
|
157
|
+
ok: errors.length === 0,
|
|
158
|
+
errors,
|
|
159
|
+
sampledContexts: contexts.length,
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
export function buildInferenceExecutionRulesContractArtifact(ruleGroup) {
|
|
164
|
+
const errors = [];
|
|
165
|
+
const checks = [];
|
|
166
|
+
const decodeRules = ruleGroup?.decodeRecorderEnabled;
|
|
167
|
+
const batchRules = ruleGroup?.batchDecodeEnabled;
|
|
168
|
+
|
|
169
|
+
const decodeShape = checkRuleShape(
|
|
170
|
+
decodeRules,
|
|
171
|
+
{
|
|
172
|
+
hasDevice: true,
|
|
173
|
+
debug: false,
|
|
174
|
+
disableCommandBatching: false,
|
|
175
|
+
kvLayout: { neq: 'bdpa_paged' },
|
|
176
|
+
},
|
|
177
|
+
'decodeRecorderEnabled'
|
|
178
|
+
);
|
|
179
|
+
errors.push(...decodeShape.errors);
|
|
180
|
+
checks.push({
|
|
181
|
+
id: 'inference.execution.decodeRecorderEnabled.shape',
|
|
182
|
+
ok: decodeShape.ok,
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
const decodeSemantics = Array.isArray(decodeRules)
|
|
186
|
+
? checkRuleSemantics(
|
|
187
|
+
decodeRules,
|
|
188
|
+
enumerateDecodeRecorderContexts(),
|
|
189
|
+
decodeRecorderSemantic,
|
|
190
|
+
'decodeRecorderEnabled'
|
|
191
|
+
)
|
|
192
|
+
: { ok: false, errors: ['[ExecutionRulesContract] decodeRecorderEnabled is unavailable for semantic check.'], sampledContexts: 0 };
|
|
193
|
+
errors.push(...decodeSemantics.errors);
|
|
194
|
+
checks.push({
|
|
195
|
+
id: 'inference.execution.decodeRecorderEnabled.semantics',
|
|
196
|
+
ok: decodeSemantics.ok,
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
const batchShape = checkRuleShape(
|
|
200
|
+
batchRules,
|
|
201
|
+
{
|
|
202
|
+
batchSize: { gt: 1 },
|
|
203
|
+
useGPU: true,
|
|
204
|
+
gpuSamplingAvailable: true,
|
|
205
|
+
disableMultiTokenDecode: false,
|
|
206
|
+
disableCommandBatching: false,
|
|
207
|
+
isBdpaPagedLayout: false,
|
|
208
|
+
finitenessFallbackWindowOpen: false,
|
|
209
|
+
},
|
|
210
|
+
'batchDecodeEnabled'
|
|
211
|
+
);
|
|
212
|
+
errors.push(...batchShape.errors);
|
|
213
|
+
checks.push({
|
|
214
|
+
id: 'inference.execution.batchDecodeEnabled.shape',
|
|
215
|
+
ok: batchShape.ok,
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
const batchSemantics = Array.isArray(batchRules)
|
|
219
|
+
? checkRuleSemantics(
|
|
220
|
+
batchRules,
|
|
221
|
+
enumerateBatchDecodeContexts(),
|
|
222
|
+
batchDecodeSemantic,
|
|
223
|
+
'batchDecodeEnabled'
|
|
224
|
+
)
|
|
225
|
+
: { ok: false, errors: ['[ExecutionRulesContract] batchDecodeEnabled is unavailable for semantic check.'], sampledContexts: 0 };
|
|
226
|
+
errors.push(...batchSemantics.errors);
|
|
227
|
+
checks.push({
|
|
228
|
+
id: 'inference.execution.batchDecodeEnabled.semantics',
|
|
229
|
+
ok: batchSemantics.ok,
|
|
230
|
+
});
|
|
231
|
+
|
|
232
|
+
return {
|
|
233
|
+
schemaVersion: 1,
|
|
234
|
+
source: 'doppler',
|
|
235
|
+
ok: errors.length === 0,
|
|
236
|
+
checks,
|
|
237
|
+
errors,
|
|
238
|
+
stats: {
|
|
239
|
+
decodeRecorderRules: Array.isArray(decodeRules) ? decodeRules.length : 0,
|
|
240
|
+
batchDecodeRules: Array.isArray(batchRules) ? batchRules.length : 0,
|
|
241
|
+
decodeRecorderContexts: decodeSemantics.sampledContexts,
|
|
242
|
+
batchDecodeContexts: batchSemantics.sampledContexts,
|
|
243
|
+
},
|
|
244
|
+
};
|
|
245
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export interface LayerPatternContractArtifact {
|
|
2
|
+
schemaVersion: 1;
|
|
3
|
+
source: 'doppler';
|
|
4
|
+
ok: boolean;
|
|
5
|
+
checks: Array<{ id: string; ok: boolean }>;
|
|
6
|
+
errors: string[];
|
|
7
|
+
stats: {
|
|
8
|
+
patternKindRules: number;
|
|
9
|
+
layerTypeRules: number;
|
|
10
|
+
patternKindContexts: number;
|
|
11
|
+
layerTypeContexts: number;
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export declare function buildLayerPatternContractArtifact(
|
|
16
|
+
ruleGroup: Record<string, unknown> | null | undefined
|
|
17
|
+
): LayerPatternContractArtifact;
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
import { selectByRules } from '../gpu/kernels/rule-matcher.js';
|
|
2
|
+
import { computeGlobalLayers } from '../config/schema/inference.schema.js';
|
|
3
|
+
|
|
4
|
+
function isPlainObject(value) {
|
|
5
|
+
return value != null && typeof value === 'object' && !Array.isArray(value);
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
function matchesExactObject(actual, expected) {
|
|
9
|
+
if (!isPlainObject(actual) || !isPlainObject(expected)) {
|
|
10
|
+
return false;
|
|
11
|
+
}
|
|
12
|
+
const actualKeys = Object.keys(actual).sort();
|
|
13
|
+
const expectedKeys = Object.keys(expected).sort();
|
|
14
|
+
if (actualKeys.length !== expectedKeys.length) {
|
|
15
|
+
return false;
|
|
16
|
+
}
|
|
17
|
+
for (let i = 0; i < actualKeys.length; i += 1) {
|
|
18
|
+
if (actualKeys[i] !== expectedKeys[i]) {
|
|
19
|
+
return false;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
for (const key of expectedKeys) {
|
|
23
|
+
const expectedValue = expected[key];
|
|
24
|
+
const actualValue = actual[key];
|
|
25
|
+
if (isPlainObject(expectedValue)) {
|
|
26
|
+
if (!matchesExactObject(actualValue, expectedValue)) {
|
|
27
|
+
return false;
|
|
28
|
+
}
|
|
29
|
+
continue;
|
|
30
|
+
}
|
|
31
|
+
if (actualValue !== expectedValue) {
|
|
32
|
+
return false;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
return true;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function expectedPatternKind(context) {
|
|
39
|
+
if (context.patternType === 'alternating' && context.globalPattern === 'even') {
|
|
40
|
+
return 'alternating_even';
|
|
41
|
+
}
|
|
42
|
+
if (context.patternType === 'alternating' && context.globalPattern === 'odd') {
|
|
43
|
+
return 'alternating_odd';
|
|
44
|
+
}
|
|
45
|
+
if (context.patternType === 'every_n') {
|
|
46
|
+
return 'every_n';
|
|
47
|
+
}
|
|
48
|
+
return null;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function expectedLayerType(context) {
|
|
52
|
+
if (context.patternKind === 'alternating_even') {
|
|
53
|
+
return context.isEven ? 'full_attention' : 'sliding_attention';
|
|
54
|
+
}
|
|
55
|
+
if (context.patternKind === 'alternating_odd') {
|
|
56
|
+
return context.isEven ? 'sliding_attention' : 'full_attention';
|
|
57
|
+
}
|
|
58
|
+
if (context.patternKind === 'every_n') {
|
|
59
|
+
return context.isStride ? 'full_attention' : 'sliding_attention';
|
|
60
|
+
}
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function enumeratePatternKindContexts() {
|
|
65
|
+
const patternTypes = ['alternating', 'every_n', 'custom', null];
|
|
66
|
+
const globalPatterns = ['even', 'odd', 'every_n', null];
|
|
67
|
+
const contexts = [];
|
|
68
|
+
for (const patternType of patternTypes) {
|
|
69
|
+
for (const globalPattern of globalPatterns) {
|
|
70
|
+
contexts.push({ patternType, globalPattern });
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
return contexts;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function enumerateLayerTypeContexts() {
|
|
77
|
+
const patternKinds = ['alternating_even', 'alternating_odd', 'every_n'];
|
|
78
|
+
const booleans = [true, false];
|
|
79
|
+
const contexts = [];
|
|
80
|
+
for (const patternKind of patternKinds) {
|
|
81
|
+
for (const isEven of booleans) {
|
|
82
|
+
for (const isStride of booleans) {
|
|
83
|
+
contexts.push({ patternKind, isEven, isStride });
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
return contexts;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function checkRuleShape(rules, expected, label) {
|
|
91
|
+
if (!Array.isArray(rules)) {
|
|
92
|
+
return {
|
|
93
|
+
ok: false,
|
|
94
|
+
errors: [`[LayerPatternContract] ${label} must be an array.`],
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
if (rules.length !== expected.length) {
|
|
98
|
+
return {
|
|
99
|
+
ok: false,
|
|
100
|
+
errors: [`[LayerPatternContract] ${label} must contain exactly ${expected.length} rules; got ${rules.length}.`],
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
const errors = [];
|
|
104
|
+
for (let i = 0; i < expected.length; i += 1) {
|
|
105
|
+
if (!matchesExactObject(rules[i]?.match, expected[i].match) || rules[i]?.value !== expected[i].value) {
|
|
106
|
+
errors.push(`[LayerPatternContract] ${label} rule[${i}] drifted from the expected decision table.`);
|
|
107
|
+
break;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
return {
|
|
111
|
+
ok: errors.length === 0,
|
|
112
|
+
errors,
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function checkRuleSemantics(rules, contexts, expectedValue, label) {
|
|
117
|
+
const errors = [];
|
|
118
|
+
for (const context of contexts) {
|
|
119
|
+
const actual = selectByRules(rules, context);
|
|
120
|
+
const expected = expectedValue(context);
|
|
121
|
+
if (actual !== expected) {
|
|
122
|
+
errors.push(
|
|
123
|
+
`[LayerPatternContract] ${label} mismatched context ${JSON.stringify(context)}: ` +
|
|
124
|
+
`expected ${JSON.stringify(expected)}, got ${JSON.stringify(actual)}.`
|
|
125
|
+
);
|
|
126
|
+
break;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
return {
|
|
130
|
+
ok: errors.length === 0,
|
|
131
|
+
errors,
|
|
132
|
+
sampledContexts: contexts.length,
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function checkGlobalLayerSemantics() {
|
|
137
|
+
const checks = [
|
|
138
|
+
{
|
|
139
|
+
id: 'inference.layerPattern.computeGlobalLayers.even',
|
|
140
|
+
actual: computeGlobalLayers({ type: 'alternating', globalPattern: 'even' }, 6),
|
|
141
|
+
expected: [0, 2, 4],
|
|
142
|
+
},
|
|
143
|
+
{
|
|
144
|
+
id: 'inference.layerPattern.computeGlobalLayers.odd',
|
|
145
|
+
actual: computeGlobalLayers({ type: 'alternating', globalPattern: 'odd' }, 6),
|
|
146
|
+
expected: [1, 3, 5],
|
|
147
|
+
},
|
|
148
|
+
{
|
|
149
|
+
id: 'inference.layerPattern.computeGlobalLayers.every_n_offset',
|
|
150
|
+
actual: computeGlobalLayers({ type: 'every_n', period: 6, offset: 5 }, 12),
|
|
151
|
+
expected: [5, 11],
|
|
152
|
+
},
|
|
153
|
+
{
|
|
154
|
+
id: 'inference.layerPattern.computeGlobalLayers.every_n_negative_offset',
|
|
155
|
+
actual: computeGlobalLayers({ type: 'every_n', period: 6, offset: -1 }, 12),
|
|
156
|
+
expected: [5, 11],
|
|
157
|
+
},
|
|
158
|
+
];
|
|
159
|
+
const errors = [];
|
|
160
|
+
const results = [];
|
|
161
|
+
for (const entry of checks) {
|
|
162
|
+
const ok = JSON.stringify(entry.actual) === JSON.stringify(entry.expected);
|
|
163
|
+
results.push({ id: entry.id, ok });
|
|
164
|
+
if (!ok) {
|
|
165
|
+
errors.push(
|
|
166
|
+
`[LayerPatternContract] ${entry.id} expected ${JSON.stringify(entry.expected)}, got ${JSON.stringify(entry.actual)}.`
|
|
167
|
+
);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
return {
|
|
171
|
+
checks: results,
|
|
172
|
+
errors,
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
export function buildLayerPatternContractArtifact(ruleGroup) {
|
|
177
|
+
const errors = [];
|
|
178
|
+
const checks = [];
|
|
179
|
+
const patternKindRules = ruleGroup?.patternKind;
|
|
180
|
+
const layerTypeRules = ruleGroup?.layerType;
|
|
181
|
+
|
|
182
|
+
const patternKindShape = checkRuleShape(patternKindRules, [
|
|
183
|
+
{ match: { patternType: 'alternating', globalPattern: 'even' }, value: 'alternating_even' },
|
|
184
|
+
{ match: { patternType: 'alternating', globalPattern: 'odd' }, value: 'alternating_odd' },
|
|
185
|
+
{ match: { patternType: 'every_n' }, value: 'every_n' },
|
|
186
|
+
{ match: {}, value: null },
|
|
187
|
+
], 'patternKind');
|
|
188
|
+
errors.push(...patternKindShape.errors);
|
|
189
|
+
checks.push({ id: 'inference.layerPattern.patternKind.shape', ok: patternKindShape.ok });
|
|
190
|
+
|
|
191
|
+
const patternKindSemantics = Array.isArray(patternKindRules)
|
|
192
|
+
? checkRuleSemantics(patternKindRules, enumeratePatternKindContexts(), expectedPatternKind, 'patternKind')
|
|
193
|
+
: { ok: false, errors: ['[LayerPatternContract] patternKind is unavailable for semantic check.'], sampledContexts: 0 };
|
|
194
|
+
errors.push(...patternKindSemantics.errors);
|
|
195
|
+
checks.push({ id: 'inference.layerPattern.patternKind.semantics', ok: patternKindSemantics.ok });
|
|
196
|
+
|
|
197
|
+
const layerTypeShape = checkRuleShape(layerTypeRules, [
|
|
198
|
+
{ match: { patternKind: 'alternating_even', isEven: true }, value: 'full_attention' },
|
|
199
|
+
{ match: { patternKind: 'alternating_even' }, value: 'sliding_attention' },
|
|
200
|
+
{ match: { patternKind: 'alternating_odd', isEven: false }, value: 'full_attention' },
|
|
201
|
+
{ match: { patternKind: 'alternating_odd' }, value: 'sliding_attention' },
|
|
202
|
+
{ match: { patternKind: 'every_n', isStride: true }, value: 'full_attention' },
|
|
203
|
+
{ match: { patternKind: 'every_n' }, value: 'sliding_attention' },
|
|
204
|
+
], 'layerType');
|
|
205
|
+
errors.push(...layerTypeShape.errors);
|
|
206
|
+
checks.push({ id: 'inference.layerPattern.layerType.shape', ok: layerTypeShape.ok });
|
|
207
|
+
|
|
208
|
+
const layerTypeSemantics = Array.isArray(layerTypeRules)
|
|
209
|
+
? checkRuleSemantics(layerTypeRules, enumerateLayerTypeContexts(), expectedLayerType, 'layerType')
|
|
210
|
+
: { ok: false, errors: ['[LayerPatternContract] layerType is unavailable for semantic check.'], sampledContexts: 0 };
|
|
211
|
+
errors.push(...layerTypeSemantics.errors);
|
|
212
|
+
checks.push({ id: 'inference.layerPattern.layerType.semantics', ok: layerTypeSemantics.ok });
|
|
213
|
+
|
|
214
|
+
const globalLayerSemantics = checkGlobalLayerSemantics();
|
|
215
|
+
errors.push(...globalLayerSemantics.errors);
|
|
216
|
+
checks.push(...globalLayerSemantics.checks);
|
|
217
|
+
|
|
218
|
+
return {
|
|
219
|
+
schemaVersion: 1,
|
|
220
|
+
source: 'doppler',
|
|
221
|
+
ok: errors.length === 0,
|
|
222
|
+
checks,
|
|
223
|
+
errors,
|
|
224
|
+
stats: {
|
|
225
|
+
patternKindRules: Array.isArray(patternKindRules) ? patternKindRules.length : 0,
|
|
226
|
+
layerTypeRules: Array.isArray(layerTypeRules) ? layerTypeRules.length : 0,
|
|
227
|
+
patternKindContexts: patternKindSemantics.sampledContexts,
|
|
228
|
+
layerTypeContexts: layerTypeSemantics.sampledContexts,
|
|
229
|
+
},
|
|
230
|
+
};
|
|
231
|
+
}
|
|
@@ -46,3 +46,31 @@ export declare function registerRuleGroup(
|
|
|
46
46
|
group: RuleGroup,
|
|
47
47
|
rules: Record<string, RuleSet>
|
|
48
48
|
): void;
|
|
49
|
+
|
|
50
|
+
export declare function getInferenceExecutionRulesContractArtifact(): {
|
|
51
|
+
schemaVersion: 1;
|
|
52
|
+
source: 'doppler';
|
|
53
|
+
ok: boolean;
|
|
54
|
+
checks: Array<{ id: string; ok: boolean }>;
|
|
55
|
+
errors: string[];
|
|
56
|
+
stats: {
|
|
57
|
+
decodeRecorderRules: number;
|
|
58
|
+
batchDecodeRules: number;
|
|
59
|
+
decodeRecorderContexts: number;
|
|
60
|
+
batchDecodeContexts: number;
|
|
61
|
+
};
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
export declare function getInferenceLayerPatternContractArtifact(): {
|
|
65
|
+
schemaVersion: 1;
|
|
66
|
+
source: 'doppler';
|
|
67
|
+
ok: boolean;
|
|
68
|
+
checks: Array<{ id: string; ok: boolean }>;
|
|
69
|
+
errors: string[];
|
|
70
|
+
stats: {
|
|
71
|
+
patternKindRules: number;
|
|
72
|
+
layerTypeRules: number;
|
|
73
|
+
patternKindContexts: number;
|
|
74
|
+
layerTypeContexts: number;
|
|
75
|
+
};
|
|
76
|
+
};
|
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
import { selectByRules } from '../gpu/kernels/rule-matcher.js';
|
|
2
|
+
import { buildInferenceExecutionRulesContractArtifact } from './execution-rules-contract-check.js';
|
|
3
|
+
import { buildLayerPatternContractArtifact } from './layer-pattern-contract-check.js';
|
|
2
4
|
import { loadJson } from '../utils/load-json.js';
|
|
3
5
|
|
|
4
6
|
const attentionRules = await loadJson('./kernels/attention.rules.json', import.meta.url, 'Failed to load rules');
|
|
5
7
|
const conv2dRules = await loadJson('./kernels/conv2d.rules.json', import.meta.url, 'Failed to load rules');
|
|
8
|
+
const depthwiseConv2dRules = await loadJson('./kernels/depthwise-conv2d.rules.json', import.meta.url, 'Failed to load rules');
|
|
6
9
|
const dequantRules = await loadJson('./kernels/dequant.rules.json', import.meta.url, 'Failed to load rules');
|
|
7
10
|
const energyRules = await loadJson('./kernels/energy.rules.json', import.meta.url, 'Failed to load rules');
|
|
8
11
|
const fusedFfnRules = await loadJson('./kernels/fused-ffn.rules.json', import.meta.url, 'Failed to load rules');
|
|
@@ -10,6 +13,7 @@ const fusedMatmulResidualRules = await loadJson('./kernels/fused-matmul-residual
|
|
|
10
13
|
const fusedMatmulRmsnormRules = await loadJson('./kernels/fused-matmul-rmsnorm.rules.json', import.meta.url, 'Failed to load rules');
|
|
11
14
|
const gatherRules = await loadJson('./kernels/gather.rules.json', import.meta.url, 'Failed to load rules');
|
|
12
15
|
const geluRules = await loadJson('./kernels/gelu.rules.json', import.meta.url, 'Failed to load rules');
|
|
16
|
+
const groupedPointwiseConv2dRules = await loadJson('./kernels/grouped-pointwise-conv2d.rules.json', import.meta.url, 'Failed to load rules');
|
|
13
17
|
const groupnormRules = await loadJson('./kernels/groupnorm.rules.json', import.meta.url, 'Failed to load rules');
|
|
14
18
|
const kvQuantizeRules = await loadJson('./kernels/kv_quantize.rules.json', import.meta.url, 'Failed to load rules');
|
|
15
19
|
const layernormRules = await loadJson('./kernels/layernorm.rules.json', import.meta.url, 'Failed to load rules');
|
|
@@ -18,9 +22,12 @@ const kernelMoeRules = await loadJson('./kernels/moe.rules.json', import.meta.ur
|
|
|
18
22
|
const kernelMoeGptOssRules = await loadJson('./kernels/moe.rules.gptoss.json', import.meta.url, 'Failed to load rules');
|
|
19
23
|
const modulateRules = await loadJson('./kernels/modulate.rules.json', import.meta.url, 'Failed to load rules');
|
|
20
24
|
const pixelShuffleRules = await loadJson('./kernels/pixel_shuffle.rules.json', import.meta.url, 'Failed to load rules');
|
|
25
|
+
const repeatChannelsRules = await loadJson('./kernels/repeat-channels.rules.json', import.meta.url, 'Failed to load rules');
|
|
26
|
+
const reluRules = await loadJson('./kernels/relu.rules.json', import.meta.url, 'Failed to load rules');
|
|
21
27
|
const residualRules = await loadJson('./kernels/residual.rules.json', import.meta.url, 'Failed to load rules');
|
|
22
28
|
const rmsnormRules = await loadJson('./kernels/rmsnorm.rules.json', import.meta.url, 'Failed to load rules');
|
|
23
29
|
const ropeRules = await loadJson('./kernels/rope.rules.json', import.meta.url, 'Failed to load rules');
|
|
30
|
+
const sanaLinearAttentionRules = await loadJson('./kernels/sana-linear-attention.rules.json', import.meta.url, 'Failed to load rules');
|
|
24
31
|
const sampleRules = await loadJson('./kernels/sample.rules.json', import.meta.url, 'Failed to load rules');
|
|
25
32
|
const scaleRules = await loadJson('./kernels/scale.rules.json', import.meta.url, 'Failed to load rules');
|
|
26
33
|
const siluRules = await loadJson('./kernels/silu.rules.json', import.meta.url, 'Failed to load rules');
|
|
@@ -46,6 +53,24 @@ const toolingCommandRuntimeRules = await loadJson(
|
|
|
46
53
|
import.meta.url,
|
|
47
54
|
'Failed to load rules'
|
|
48
55
|
);
|
|
56
|
+
const INFERENCE_EXECUTION_RULES_CONTRACT_ARTIFACT = buildInferenceExecutionRulesContractArtifact(
|
|
57
|
+
inferenceExecutionRules
|
|
58
|
+
);
|
|
59
|
+
if (!INFERENCE_EXECUTION_RULES_CONTRACT_ARTIFACT.ok) {
|
|
60
|
+
throw new Error(
|
|
61
|
+
`RuleRegistry: inference.execution rules contract failed: ` +
|
|
62
|
+
`${INFERENCE_EXECUTION_RULES_CONTRACT_ARTIFACT.errors.join(' | ')}`
|
|
63
|
+
);
|
|
64
|
+
}
|
|
65
|
+
const INFERENCE_LAYER_PATTERN_CONTRACT_ARTIFACT = buildLayerPatternContractArtifact(
|
|
66
|
+
layerPatternRules
|
|
67
|
+
);
|
|
68
|
+
if (!INFERENCE_LAYER_PATTERN_CONTRACT_ARTIFACT.ok) {
|
|
69
|
+
throw new Error(
|
|
70
|
+
`RuleRegistry: inference.layerPattern rules contract failed: ` +
|
|
71
|
+
`${INFERENCE_LAYER_PATTERN_CONTRACT_ARTIFACT.errors.join(' | ')}`
|
|
72
|
+
);
|
|
73
|
+
}
|
|
49
74
|
|
|
50
75
|
const RULE_SETS = {
|
|
51
76
|
shared: {
|
|
@@ -54,6 +79,7 @@ const RULE_SETS = {
|
|
|
54
79
|
kernels: {
|
|
55
80
|
attention: attentionRules,
|
|
56
81
|
conv2d: conv2dRules,
|
|
82
|
+
depthwiseConv2d: depthwiseConv2dRules,
|
|
57
83
|
dequant: dequantRules,
|
|
58
84
|
energy: energyRules,
|
|
59
85
|
fusedFfn: fusedFfnRules,
|
|
@@ -61,6 +87,7 @@ const RULE_SETS = {
|
|
|
61
87
|
fusedMatmulRmsnorm: fusedMatmulRmsnormRules,
|
|
62
88
|
gather: gatherRules,
|
|
63
89
|
gelu: geluRules,
|
|
90
|
+
groupedPointwiseConv2d: groupedPointwiseConv2dRules,
|
|
64
91
|
groupnorm: groupnormRules,
|
|
65
92
|
kv_quantize: kvQuantizeRules,
|
|
66
93
|
layernorm: layernormRules,
|
|
@@ -69,9 +96,12 @@ const RULE_SETS = {
|
|
|
69
96
|
moeGptoss: kernelMoeGptOssRules,
|
|
70
97
|
modulate: modulateRules,
|
|
71
98
|
pixel_shuffle: pixelShuffleRules,
|
|
99
|
+
repeatChannels: repeatChannelsRules,
|
|
100
|
+
relu: reluRules,
|
|
72
101
|
residual: residualRules,
|
|
73
102
|
rmsnorm: rmsnormRules,
|
|
74
103
|
rope: ropeRules,
|
|
104
|
+
sanaLinearAttention: sanaLinearAttentionRules,
|
|
75
105
|
sample: sampleRules,
|
|
76
106
|
scale: scaleRules,
|
|
77
107
|
silu: siluRules,
|
|
@@ -133,6 +163,14 @@ export function registerRuleGroup(domain, group, rules) {
|
|
|
133
163
|
RULE_SETS[domain][group] = rules;
|
|
134
164
|
}
|
|
135
165
|
|
|
166
|
+
export function getInferenceExecutionRulesContractArtifact() {
|
|
167
|
+
return INFERENCE_EXECUTION_RULES_CONTRACT_ARTIFACT;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
export function getInferenceLayerPatternContractArtifact() {
|
|
171
|
+
return INFERENCE_LAYER_PATTERN_CONTRACT_ARTIFACT;
|
|
172
|
+
}
|
|
173
|
+
|
|
136
174
|
function resolveRuleValue(value, context) {
|
|
137
175
|
if (Array.isArray(value)) {
|
|
138
176
|
return value.map((entry) => resolveRuleValue(entry, context));
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
export declare function extractTensorEntriesFromManifest(
|
|
2
|
+
manifest: Record<string, unknown>
|
|
3
|
+
): Array<{
|
|
4
|
+
name: string;
|
|
5
|
+
dtype: unknown;
|
|
6
|
+
shape: unknown;
|
|
7
|
+
role: unknown;
|
|
8
|
+
layout: unknown;
|
|
9
|
+
}>;
|
|
10
|
+
|
|
11
|
+
export declare function resolveMaterializedManifestFromConversionConfig(
|
|
12
|
+
conversionConfigInput: Record<string, unknown>,
|
|
13
|
+
manifest: Record<string, unknown>
|
|
14
|
+
): {
|
|
15
|
+
modelId: string;
|
|
16
|
+
modelType: string;
|
|
17
|
+
architecture: Record<string, unknown> | null;
|
|
18
|
+
inference: Record<string, unknown> | null;
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
export declare function inferConversionConfigModelId(
|
|
22
|
+
configPath: string,
|
|
23
|
+
conversionConfigInput: Record<string, unknown>
|
|
24
|
+
): string;
|