@simulatte/doppler 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/README.md +11 -5
  2. package/package.json +27 -4
  3. package/src/client/doppler-api.browser.d.ts +1 -0
  4. package/src/client/doppler-api.browser.js +288 -0
  5. package/src/client/doppler-api.d.ts +80 -0
  6. package/src/client/doppler-api.js +298 -0
  7. package/src/client/doppler-provider/types.js +1 -1
  8. package/src/client/doppler-registry.d.ts +23 -0
  9. package/src/client/doppler-registry.js +88 -0
  10. package/src/client/doppler-registry.json +39 -0
  11. package/src/config/execution-contract-check.d.ts +82 -0
  12. package/src/config/execution-contract-check.js +317 -0
  13. package/src/config/execution-v0-contract-check.d.ts +94 -0
  14. package/src/config/execution-v0-contract-check.js +251 -0
  15. package/src/config/execution-v0-graph-contract-check.d.ts +20 -0
  16. package/src/config/execution-v0-graph-contract-check.js +64 -0
  17. package/src/config/kernel-path-contract-check.d.ts +76 -0
  18. package/src/config/kernel-path-contract-check.js +479 -0
  19. package/src/config/kernel-path-loader.d.ts +16 -0
  20. package/src/config/kernel-path-loader.js +54 -0
  21. package/src/config/kernels/kernel-ref-digests.js +12 -0
  22. package/src/config/kernels/registry.json +556 -0
  23. package/src/config/loader.js +90 -67
  24. package/src/config/merge-contract-check.d.ts +16 -0
  25. package/src/config/merge-contract-check.js +321 -0
  26. package/src/config/merge-helpers.d.ts +58 -0
  27. package/src/config/merge-helpers.js +54 -0
  28. package/src/config/merge.js +3 -6
  29. package/src/config/presets/models/janus-text.json +27 -0
  30. package/src/config/quantization-contract-check.d.ts +12 -0
  31. package/src/config/quantization-contract-check.js +91 -0
  32. package/src/config/required-inference-fields-contract-check.d.ts +24 -0
  33. package/src/config/required-inference-fields-contract-check.js +231 -0
  34. package/src/config/schema/browser-suite-metrics.schema.d.ts +17 -0
  35. package/src/config/schema/browser-suite-metrics.schema.js +46 -0
  36. package/src/config/schema/conversion-report.schema.d.ts +40 -0
  37. package/src/config/schema/conversion-report.schema.js +108 -0
  38. package/src/config/schema/doppler.schema.js +12 -18
  39. package/src/config/schema/index.d.ts +22 -0
  40. package/src/config/schema/index.js +18 -0
  41. package/src/converter/core.d.ts +10 -0
  42. package/src/converter/core.js +49 -11
  43. package/src/converter/parsers/diffusion.js +63 -3
  44. package/src/converter/tokenizer-utils.js +17 -3
  45. package/src/formats/rdrr/validation.js +13 -0
  46. package/src/gpu/kernels/depthwise_conv2d.d.ts +29 -0
  47. package/src/gpu/kernels/depthwise_conv2d.js +98 -0
  48. package/src/gpu/kernels/depthwise_conv2d.wgsl +58 -0
  49. package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +62 -0
  50. package/src/gpu/kernels/grouped_pointwise_conv2d.d.ts +27 -0
  51. package/src/gpu/kernels/grouped_pointwise_conv2d.js +92 -0
  52. package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +47 -0
  53. package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +51 -0
  54. package/src/gpu/kernels/index.d.ts +30 -0
  55. package/src/gpu/kernels/index.js +25 -0
  56. package/src/gpu/kernels/relu.d.ts +18 -0
  57. package/src/gpu/kernels/relu.js +45 -0
  58. package/src/gpu/kernels/relu.wgsl +21 -0
  59. package/src/gpu/kernels/relu_f16.wgsl +23 -0
  60. package/src/gpu/kernels/repeat_channels.d.ts +21 -0
  61. package/src/gpu/kernels/repeat_channels.js +60 -0
  62. package/src/gpu/kernels/repeat_channels.wgsl +29 -0
  63. package/src/gpu/kernels/repeat_channels_f16.wgsl +31 -0
  64. package/src/gpu/kernels/sana_linear_attention.d.ts +27 -0
  65. package/src/gpu/kernels/sana_linear_attention.js +122 -0
  66. package/src/gpu/kernels/sana_linear_attention_apply.wgsl +44 -0
  67. package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +47 -0
  68. package/src/gpu/kernels/sana_linear_attention_summary.wgsl +47 -0
  69. package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +49 -0
  70. package/src/index-browser.d.ts +1 -0
  71. package/src/index-browser.js +2 -1
  72. package/src/index.d.ts +1 -0
  73. package/src/index.js +2 -1
  74. package/src/inference/browser-harness.js +164 -38
  75. package/src/inference/pipelines/diffusion/init.js +14 -0
  76. package/src/inference/pipelines/diffusion/pipeline.js +206 -77
  77. package/src/inference/pipelines/diffusion/sana-transformer.d.ts +53 -0
  78. package/src/inference/pipelines/diffusion/sana-transformer.js +738 -0
  79. package/src/inference/pipelines/diffusion/scheduler.d.ts +17 -1
  80. package/src/inference/pipelines/diffusion/scheduler.js +91 -3
  81. package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +6 -4
  82. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +270 -0
  83. package/src/inference/pipelines/diffusion/text-encoder.js +18 -1
  84. package/src/inference/pipelines/diffusion/types.d.ts +4 -0
  85. package/src/inference/pipelines/diffusion/vae.js +782 -78
  86. package/src/inference/pipelines/text/config.d.ts +5 -0
  87. package/src/inference/pipelines/text/config.js +1 -1
  88. package/src/inference/pipelines/text/execution-v0.js +141 -101
  89. package/src/inference/pipelines/text/init.js +41 -10
  90. package/src/inference/pipelines/text.js +7 -1
  91. package/src/rules/execution-rules-contract-check.d.ts +17 -0
  92. package/src/rules/execution-rules-contract-check.js +245 -0
  93. package/src/rules/kernels/depthwise-conv2d.rules.json +6 -0
  94. package/src/rules/kernels/grouped-pointwise-conv2d.rules.json +6 -0
  95. package/src/rules/kernels/relu.rules.json +6 -0
  96. package/src/rules/kernels/repeat-channels.rules.json +6 -0
  97. package/src/rules/kernels/sana-linear-attention.rules.json +6 -0
  98. package/src/rules/layer-pattern-contract-check.d.ts +17 -0
  99. package/src/rules/layer-pattern-contract-check.js +231 -0
  100. package/src/rules/rule-registry.d.ts +28 -0
  101. package/src/rules/rule-registry.js +38 -0
  102. package/src/tooling/conversion-config-materializer.d.ts +24 -0
  103. package/src/tooling/conversion-config-materializer.js +99 -0
  104. package/src/tooling/lean-execution-contract-runner.d.ts +43 -0
  105. package/src/tooling/lean-execution-contract-runner.js +158 -0
  106. package/src/tooling/lean-execution-contract.d.ts +16 -0
  107. package/src/tooling/lean-execution-contract.js +81 -0
  108. package/src/tooling/node-convert.d.ts +10 -0
  109. package/src/tooling/node-converter.js +59 -0
  110. package/src/tooling/node-webgpu.js +30 -9
  111. package/src/version.d.ts +2 -0
  112. package/src/version.js +2 -0
  113. package/tools/convert-safetensors-node.js +47 -0
  114. package/tools/doppler-cli.js +167 -6
@@ -225,6 +225,28 @@ export {
225
225
  type ConversionIOSchema,
226
226
  } from './conversion.schema.js';
227
227
 
228
+ // =============================================================================
229
+ // Browser Suite Metrics Schema
230
+ // =============================================================================
231
+ export {
232
+ type BrowserSuiteMetricsSchema,
233
+ BROWSER_SUITE_METRICS_SCHEMA_VERSION,
234
+ DEFAULT_BROWSER_SUITE_METRICS,
235
+ validateBrowserSuiteMetrics,
236
+ } from './browser-suite-metrics.schema.js';
237
+
238
+ // =============================================================================
239
+ // Conversion Report Schema
240
+ // =============================================================================
241
+ export {
242
+ type ConversionReportResultSchema,
243
+ type ConversionReportManifestSchema,
244
+ type ConversionReportSchema,
245
+ CONVERSION_REPORT_SCHEMA_VERSION,
246
+ DEFAULT_CONVERSION_REPORT,
247
+ validateConversionReport,
248
+ } from './conversion-report.schema.js';
249
+
228
250
  // =============================================================================
229
251
  // Converter Schema
230
252
  // =============================================================================
@@ -55,6 +55,24 @@ export {
55
55
  ConversionStage,
56
56
  } from './conversion.schema.js';
57
57
 
58
+ // =============================================================================
59
+ // Browser Suite Metrics Schema
60
+ // =============================================================================
61
+ export {
62
+ BROWSER_SUITE_METRICS_SCHEMA_VERSION,
63
+ DEFAULT_BROWSER_SUITE_METRICS,
64
+ validateBrowserSuiteMetrics,
65
+ } from './browser-suite-metrics.schema.js';
66
+
67
+ // =============================================================================
68
+ // Conversion Report Schema
69
+ // =============================================================================
70
+ export {
71
+ CONVERSION_REPORT_SCHEMA_VERSION,
72
+ DEFAULT_CONVERSION_REPORT,
73
+ validateConversionReport,
74
+ } from './conversion-report.schema.js';
75
+
58
76
  // =============================================================================
59
77
  // Converter Schema
60
78
  // =============================================================================
@@ -27,6 +27,12 @@ import type {
27
27
  MoEConfigSchema,
28
28
  ConversionInfoSchema,
29
29
  } from '../config/schema/index.js';
30
+ import type { ExecutionContractArtifact } from '../config/execution-contract-check.js';
31
+ import type { ExecutionV0GraphContractArtifact } from '../config/execution-v0-graph-contract-check.js';
32
+ import type {
33
+ ManifestRequiredInferenceFieldsArtifact,
34
+ RequiredInferenceFieldsContractArtifact,
35
+ } from '../config/required-inference-fields-contract-check.js';
30
36
 
31
37
  export { generateShardFilename } from '../formats/rdrr/index.js';
32
38
 
@@ -144,6 +150,10 @@ export interface ConvertResult {
144
150
  shardCount: number;
145
151
  tensorCount: number;
146
152
  totalSize: number;
153
+ executionContractArtifact: ExecutionContractArtifact | null;
154
+ executionV0GraphContractArtifact: ExecutionV0GraphContractArtifact | null;
155
+ layerPatternContractArtifact: Record<string, unknown> | null;
156
+ requiredInferenceFieldsArtifact: ManifestRequiredInferenceFieldsArtifact | RequiredInferenceFieldsContractArtifact | null;
147
157
  }
148
158
 
149
159
  /** @deprecated Use ConversionIOSchema from config/schema */
@@ -9,15 +9,20 @@ import {
9
9
  formatBytes,
10
10
  } from '../config/schema/index.js';
11
11
 
12
- import { classifyTensorRole, generateShardFilename } from '../formats/rdrr/index.js';
12
+ import { classifyTensor, classifyTensorRole, generateShardFilename } from '../formats/rdrr/index.js';
13
13
  import { log } from '../debug/index.js';
14
- import { selectRuleValue } from '../rules/rule-registry.js';
14
+ import {
15
+ getInferenceLayerPatternContractArtifact,
16
+ selectRuleValue,
17
+ } from '../rules/rule-registry.js';
15
18
  import {
16
19
  createConverterConfig,
17
20
  detectPreset,
18
21
  listPresets,
19
22
  resolvePreset,
20
23
  } from '../config/index.js';
24
+ import { buildExecutionContractArtifact } from '../config/execution-contract-check.js';
25
+ import { buildManifestRequiredInferenceFieldsArtifact } from '../config/required-inference-fields-contract-check.js';
21
26
  import { buildManifestInference, inferEmbeddingOutputConfig } from './manifest-inference.js';
22
27
  import { resolveEosTokenId } from './tokenizer-utils.js';
23
28
  import {
@@ -76,7 +81,8 @@ function resolveTokenizerField(tokenizerConfig, ...keys) {
76
81
  }
77
82
 
78
83
  function resolveTokenizerVocabSize(tokenizerConfig, rawConfig, architecture) {
79
- const configVocab = rawConfig?.vocab_size ?? rawConfig?.text_config?.vocab_size;
84
+ const nestedTextConfig = getNestedTextConfig(rawConfig);
85
+ const configVocab = rawConfig?.vocab_size ?? nestedTextConfig?.vocab_size;
80
86
  const tokenizerVocab = tokenizerConfig?.vocab_size ?? tokenizerConfig?.vocabSize;
81
87
  const archVocab = architecture?.vocabSize;
82
88
  return tokenizerVocab ?? configVocab ?? archVocab ?? null;
@@ -223,21 +229,22 @@ function toFloat32ForQ4K(tensorData, sourceDtype, tensorName) {
223
229
 
224
230
  function resolveConfigTokenId(rawConfig, key) {
225
231
  const direct = rawConfig?.[key];
226
- const nested = rawConfig?.text_config?.[key];
232
+ const nested = getNestedTextConfig(rawConfig)?.[key];
227
233
  return resolveTokenizerId(direct ?? nested);
228
234
  }
229
235
 
230
236
  function resolveConfigTokenIds(rawConfig, key) {
231
237
  const direct = rawConfig?.[key];
232
- const nested = rawConfig?.text_config?.[key];
238
+ const nested = getNestedTextConfig(rawConfig)?.[key];
233
239
  return resolveTokenizerIds(direct ?? nested);
234
240
  }
235
241
 
236
242
  function resolveMoEConfigNumber(rawConfig, ...keys) {
243
+ const nestedTextConfig = getNestedTextConfig(rawConfig);
237
244
  for (const key of keys) {
238
245
  const direct = rawConfig?.[key];
239
246
  if (Number.isFinite(direct) && direct > 0) return Number(direct);
240
- const nested = rawConfig?.text_config?.[key];
247
+ const nested = nestedTextConfig?.[key];
241
248
  if (Number.isFinite(nested) && nested > 0) return Number(nested);
242
249
  }
243
250
  return null;
@@ -317,7 +324,7 @@ function resolveIntermediateSizeFromTensors(architecture, model, tensorLocations
317
324
  if (typeof current !== 'number' || !Number.isFinite(current) || current <= 0) {
318
325
  return architecture;
319
326
  }
320
- const modelType = String(rawConfig?.model_type ?? rawConfig?.text_config?.model_type ?? '').toLowerCase();
327
+ const modelType = String(rawConfig?.model_type ?? getNestedTextConfig(rawConfig)?.model_type ?? '').toLowerCase();
321
328
  if (modelType !== 'lfm2') {
322
329
  return architecture;
323
330
  }
@@ -359,7 +366,7 @@ function resolveMoEExpertFormat(rawConfig, resolvedModelType, quantizationInfo,
359
366
  const modelType = String(
360
367
  resolvedModelType ??
361
368
  rawConfig?.model_type ??
362
- rawConfig?.text_config?.model_type ??
369
+ getNestedTextConfig(rawConfig)?.model_type ??
363
370
  ''
364
371
  ).toLowerCase();
365
372
  if (modelType.includes('gpt_oss') || modelType.includes('gpt-oss') || modelType.includes('gptoss')) {
@@ -725,9 +732,7 @@ export function extractArchitecture(config, ggufConfig) {
725
732
 
726
733
  // Try HuggingFace config first
727
734
  if (config && Object.keys(config).length > 0) {
728
- const textConfig = (
729
- config.text_config && typeof config.text_config === 'object' && !Array.isArray(config.text_config)
730
- ) ? config.text_config : null;
735
+ const textConfig = getNestedTextConfig(config);
731
736
  const fromConfig = (...keys) => {
732
737
  const values = [];
733
738
  for (const key of keys) {
@@ -860,6 +865,19 @@ export function extractArchitecture(config, ggufConfig) {
860
865
  throw new Error('Missing model config: cannot extract architecture');
861
866
  }
862
867
 
868
+ function getNestedTextConfig(config) {
869
+ if (!config || typeof config !== 'object' || Array.isArray(config)) {
870
+ return null;
871
+ }
872
+ if (config.text_config && typeof config.text_config === 'object' && !Array.isArray(config.text_config)) {
873
+ return config.text_config;
874
+ }
875
+ if (config.language_config && typeof config.language_config === 'object' && !Array.isArray(config.language_config)) {
876
+ return config.language_config;
877
+ }
878
+ return null;
879
+ }
880
+
863
881
 
864
882
  export function buildTensorMap(tensors, shardSize) {
865
883
  if (!shardSize || shardSize <= 0) {
@@ -1115,6 +1133,7 @@ export async function convertModel(model, io, options = {}) {
1115
1133
  }
1116
1134
  const totalTensors = tensors.length;
1117
1135
  const targetQuant = String(options.quantization ?? model.quantization ?? '').trim().toLowerCase();
1136
+ const tensorGroupModelType = String(options.modelType ?? model.modelType ?? 'transformer');
1118
1137
  const q4kLayout = normalizeQ4KLayout(options.quantizationInfo?.layout);
1119
1138
  const quantizeEmbeddings = resolveQuantizeEmbeddings(
1120
1139
  options.quantizationInfo ?? null,
@@ -1238,6 +1257,7 @@ export async function convertModel(model, io, options = {}) {
1238
1257
 
1239
1258
  // Record tensor location
1240
1259
  const role = classifyTensorRole(tensor.name);
1260
+ const group = classifyTensor(tensor.name, tensorGroupModelType);
1241
1261
 
1242
1262
  if (tensorSpans.length === 1) {
1243
1263
  tensorLocations[tensor.name] = {
@@ -1247,6 +1267,7 @@ export async function convertModel(model, io, options = {}) {
1247
1267
  shape: tensor.shape,
1248
1268
  dtype: outDtype,
1249
1269
  role,
1270
+ group,
1250
1271
  ...(outLayout ? { layout: outLayout } : {}),
1251
1272
  };
1252
1273
  } else {
@@ -1256,6 +1277,7 @@ export async function convertModel(model, io, options = {}) {
1256
1277
  shape: tensor.shape,
1257
1278
  dtype: outDtype,
1258
1279
  role,
1280
+ group,
1259
1281
  ...(outLayout ? { layout: outLayout } : {}),
1260
1282
  };
1261
1283
  }
@@ -1314,11 +1336,27 @@ export async function convertModel(model, io, options = {}) {
1314
1336
  totalSize: formatBytes(totalSize),
1315
1337
  });
1316
1338
 
1339
+ const executionContractArtifact = buildExecutionContractArtifact(manifest);
1340
+ const layerPatternContractArtifact = getInferenceLayerPatternContractArtifact();
1341
+ const requiredInferenceFieldsArtifact = manifest?.modelType === 'transformer'
1342
+ && manifest?.inference
1343
+ && typeof manifest.inference === 'object'
1344
+ && manifest.inference.attention
1345
+ && typeof manifest.inference.attention === 'object'
1346
+ ? buildManifestRequiredInferenceFieldsArtifact(
1347
+ manifest?.inference ?? null,
1348
+ `${manifest?.modelId ?? modelId}.inference`
1349
+ )
1350
+ : null;
1317
1351
  return {
1318
1352
  manifest,
1319
1353
  shardCount: shards.length,
1320
1354
  tensorCount: tensors.length,
1321
1355
  totalSize,
1356
+ executionContractArtifact,
1357
+ executionV0GraphContractArtifact: executionContractArtifact?.executionV0?.graph ?? null,
1358
+ layerPatternContractArtifact,
1359
+ requiredInferenceFieldsArtifact,
1322
1360
  };
1323
1361
  }
1324
1362
 
@@ -4,6 +4,13 @@ const SD3_LAYOUT = {
4
4
  id: 'sd3',
5
5
  requiredComponents: ['transformer', 'text_encoder', 'text_encoder_2', 'text_encoder_3', 'vae', 'scheduler'],
6
6
  weightedComponents: ['transformer', 'text_encoder', 'text_encoder_2', 'text_encoder_3', 'vae'],
7
+ matches(modelIndex, components) {
8
+ return (
9
+ components.has('text_encoder_2') &&
10
+ components.has('text_encoder_3') &&
11
+ getComponentClassName(modelIndex?.transformer) === 'SD3Transformer2DModel'
12
+ );
13
+ },
7
14
  tokenizerSpecs: [
8
15
  {
9
16
  modelIndexKey: 'tokenizer',
@@ -66,6 +73,10 @@ const FLUX_LAYOUT = {
66
73
  id: 'flux',
67
74
  requiredComponents: ['transformer', 'text_encoder', 'vae', 'scheduler'],
68
75
  weightedComponents: ['transformer', 'text_encoder', 'vae'],
76
+ matches(modelIndex) {
77
+ const transformerClass = getComponentClassName(modelIndex?.transformer);
78
+ return typeof transformerClass === 'string' && /^Flux/i.test(transformerClass);
79
+ },
69
80
  tokenizerSpecs: [
70
81
  {
71
82
  modelIndexKey: 'tokenizer',
@@ -91,7 +102,39 @@ const FLUX_LAYOUT = {
91
102
  ],
92
103
  };
93
104
 
94
- const LAYOUTS = [SD3_LAYOUT, FLUX_LAYOUT];
105
+ const SANA_LAYOUT = {
106
+ id: 'sana',
107
+ requiredComponents: ['transformer', 'text_encoder', 'tokenizer', 'vae', 'scheduler'],
108
+ weightedComponents: ['transformer', 'text_encoder', 'vae'],
109
+ matches(modelIndex) {
110
+ return (
111
+ getComponentClassName(modelIndex?.transformer) === 'SanaTransformer2DModel' &&
112
+ getComponentClassName(modelIndex?.text_encoder) === 'Gemma2Model'
113
+ );
114
+ },
115
+ tokenizerSpecs: [
116
+ {
117
+ modelIndexKey: 'tokenizer',
118
+ componentId: 'text_encoder',
119
+ type: 'bundled',
120
+ assets: [
121
+ { suffix: 'tokenizer/tokenizer.json', targetName: 'tokenizer_tokenizer.json', kind: 'text', required: true },
122
+ { suffix: 'tokenizer/tokenizer_config.json', targetName: 'tokenizer_config.json', kind: 'text', required: false },
123
+ { suffix: 'tokenizer/special_tokens_map.json', targetName: 'tokenizer_special_tokens_map.json', kind: 'text', required: false },
124
+ { suffix: 'tokenizer/tokenizer.model', targetName: 'tokenizer_tokenizer.model', kind: 'binary', required: false },
125
+ ],
126
+ config: {
127
+ type: 'bundled',
128
+ tokenizerFile: 'tokenizer_tokenizer.json',
129
+ configFile: 'tokenizer_config.json',
130
+ specialTokensFile: 'tokenizer_special_tokens_map.json',
131
+ sentencePieceFile: 'tokenizer_tokenizer.model',
132
+ },
133
+ },
134
+ ],
135
+ };
136
+
137
+ const LAYOUTS = [SD3_LAYOUT, FLUX_LAYOUT, SANA_LAYOUT];
95
138
 
96
139
  function toAbortError(message = 'Cancelled') {
97
140
  if (typeof DOMException === 'function') {
@@ -112,12 +155,26 @@ function listModelComponents(modelIndex) {
112
155
  return Object.keys(modelIndex || {}).filter((key) => !key.startsWith('_'));
113
156
  }
114
157
 
158
+ function getComponentClassName(componentEntry) {
159
+ if (Array.isArray(componentEntry) && componentEntry.length >= 2 && typeof componentEntry[1] === 'string') {
160
+ return componentEntry[1];
161
+ }
162
+ if (componentEntry && typeof componentEntry === 'object' && typeof componentEntry._class_name === 'string') {
163
+ return componentEntry._class_name;
164
+ }
165
+ return null;
166
+ }
167
+
115
168
  export function detectDiffusionLayout(modelIndex) {
116
169
  const components = new Set(listModelComponents(modelIndex));
117
170
  for (const layout of LAYOUTS) {
118
- if (layout.requiredComponents.every((component) => components.has(component))) {
119
- return layout;
171
+ if (!layout.requiredComponents.every((component) => components.has(component))) {
172
+ continue;
120
173
  }
174
+ if (typeof layout.matches === 'function' && !layout.matches(modelIndex, components)) {
175
+ continue;
176
+ }
177
+ return layout;
121
178
  }
122
179
  const listed = [...components].sort().join(', ') || '(none)';
123
180
  const expected = LAYOUTS
@@ -199,6 +256,9 @@ export async function parseDiffusionModel(adapter) {
199
256
  const tensors = [];
200
257
 
201
258
  for (const componentId of layout.requiredComponents) {
259
+ if (componentId === 'tokenizer') {
260
+ continue;
261
+ }
202
262
  const configSuffix = defaultConfigPath(componentId);
203
263
  const config = await readJson(configSuffix, `${componentId} config`);
204
264
  if (componentId === 'transformer' && config && !config.weight_format) {
@@ -1,4 +1,5 @@
1
1
  export function resolveEosTokenId({ config, tokenizer, tokenizerJson }) {
2
+ const nestedTextConfig = getNestedTextConfig(config);
2
3
  const candidateSources = [
3
4
  tokenizer?.eosTokenId,
4
5
  tokenizer?.eos_token_id,
@@ -7,9 +8,9 @@ export function resolveEosTokenId({ config, tokenizer, tokenizerJson }) {
7
8
  tokenizerJson?.special_tokens?.eos,
8
9
  tokenizerJson?.special_tokens?.eos_token_id,
9
10
  config?.eos_token_id,
10
- config?.text_config?.eos_token_id,
11
+ nestedTextConfig?.eos_token_id,
11
12
  config?.eos_token_ids,
12
- config?.text_config?.eos_token_ids,
13
+ nestedTextConfig?.eos_token_ids,
13
14
  ];
14
15
 
15
16
  for (const candidate of candidateSources) {
@@ -23,7 +24,7 @@ export function resolveEosTokenId({ config, tokenizer, tokenizerJson }) {
23
24
  tokenizerJson?.specialTokens?.eos_token,
24
25
  tokenizerJson?.special_tokens?.eos_token,
25
26
  config?.eos_token,
26
- config?.text_config?.eos_token,
27
+ nestedTextConfig?.eos_token,
27
28
  ];
28
29
 
29
30
  for (const candidate of eosTokenStringCandidates) {
@@ -48,6 +49,19 @@ export function resolveEosTokenId({ config, tokenizer, tokenizerJson }) {
48
49
  throw new Error('Missing eos_token_id. Provide eos_token_id in config or tokenizer metadata.');
49
50
  }
50
51
 
52
+ function getNestedTextConfig(config) {
53
+ if (!config || typeof config !== 'object' || Array.isArray(config)) {
54
+ return null;
55
+ }
56
+ if (config.text_config && typeof config.text_config === 'object' && !Array.isArray(config.text_config)) {
57
+ return config.text_config;
58
+ }
59
+ if (config.language_config && typeof config.language_config === 'object' && !Array.isArray(config.language_config)) {
60
+ return config.language_config;
61
+ }
62
+ return null;
63
+ }
64
+
51
65
  function normalizeEosTokenId(value) {
52
66
  if (Array.isArray(value)) {
53
67
  if (value.length === 0 || value.some((id) => typeof id !== 'number')) {
@@ -1,4 +1,5 @@
1
1
  import { validateTensorConfigConsistency } from './tensor-config-validator.js';
2
+ import { validateManifestExecutionContract } from '../../config/execution-contract-check.js';
2
3
 
3
4
  export function validateManifest(manifest) {
4
5
  const errors = [];
@@ -196,5 +197,17 @@ export function validateManifest(manifest) {
196
197
  }
197
198
  }
198
199
 
200
+ if (!isDiffusion && !isEnergy && errors.length === 0) {
201
+ try {
202
+ const executionContract = validateManifestExecutionContract(manifest);
203
+ for (const error of executionContract.errors) {
204
+ errors.push(error);
205
+ }
206
+ } catch (error) {
207
+ const message = error instanceof Error ? error.message : String(error);
208
+ errors.push(`[ExecutionContract] ${message}`);
209
+ }
210
+ }
211
+
199
212
  return { valid: errors.length === 0, errors, warnings };
200
213
  }
@@ -0,0 +1,29 @@
1
+ import type { Tensor } from '../tensor.js';
2
+ import type { CommandRecorder } from '../command-recorder.js';
3
+ import type { OutputBufferOptions } from './types.js';
4
+ import type { WeightBuffer } from '../weight-buffer.js';
5
+
6
+ export interface DepthwiseConv2DOptions extends OutputBufferOptions {
7
+ channels: number;
8
+ height: number;
9
+ width: number;
10
+ kernelH: number;
11
+ kernelW: number;
12
+ stride?: number;
13
+ pad?: number;
14
+ }
15
+
16
+ export declare function runDepthwiseConv2D(
17
+ input: Tensor,
18
+ weight: GPUBuffer | WeightBuffer,
19
+ bias: GPUBuffer | WeightBuffer | null,
20
+ options: DepthwiseConv2DOptions
21
+ ): Promise<Tensor>;
22
+
23
+ export declare function recordDepthwiseConv2D(
24
+ recorder: CommandRecorder,
25
+ input: Tensor,
26
+ weight: GPUBuffer | WeightBuffer,
27
+ bias: GPUBuffer | WeightBuffer | null,
28
+ options: DepthwiseConv2DOptions
29
+ ): Promise<Tensor>;
@@ -0,0 +1,98 @@
1
+ import { getDevice } from '../device.js';
2
+ import { acquireBuffer, releaseBuffer } from '../../memory/buffer-pool.js';
3
+ import { createTensor, dtypeBytes } from '../tensor.js';
4
+ import { getBuffer } from '../weight-buffer.js';
5
+ import { unifiedKernelWrapper } from './utils.js';
6
+ import { selectRuleValue } from './rule-registry.js';
7
+ import { WORKGROUP_SIZES } from './constants.js';
8
+
9
+ function selectDepthwiseConv2DVariant(isF16) {
10
+ return selectRuleValue('depthwiseConv2d', 'variant', { isF16 });
11
+ }
12
+
13
+ async function _depthwiseConv2D(target, input, weight, bias, options = {}) {
14
+ const recorder = target && typeof target.beginComputePass === 'function' ? target : null;
15
+ const device = target?.device || getDevice();
16
+ const {
17
+ channels,
18
+ height,
19
+ width,
20
+ kernelH,
21
+ kernelW,
22
+ stride = 1,
23
+ pad = 0,
24
+ outputBuffer = null,
25
+ } = options;
26
+
27
+ if (
28
+ !Number.isFinite(channels) ||
29
+ !Number.isFinite(height) ||
30
+ !Number.isFinite(width) ||
31
+ !Number.isFinite(kernelH) ||
32
+ !Number.isFinite(kernelW)
33
+ ) {
34
+ throw new Error('DepthwiseConv2D requires explicit dimensions.');
35
+ }
36
+
37
+ const outHeight = Math.floor((height + pad * 2 - kernelH) / stride) + 1;
38
+ const outWidth = Math.floor((width + pad * 2 - kernelW) / stride) + 1;
39
+ if (outHeight <= 0 || outWidth <= 0) {
40
+ throw new Error(`DepthwiseConv2D invalid output size: ${outHeight}x${outWidth}`);
41
+ }
42
+
43
+ const isF16 = input.dtype === 'f16';
44
+ const variant = selectDepthwiseConv2DVariant(isF16);
45
+ const bytesPerElement = dtypeBytes(input.dtype);
46
+ const outputSize = channels * outHeight * outWidth * bytesPerElement;
47
+ const output = outputBuffer || acquireBuffer(outputSize, undefined, 'depthwise_conv2d_output');
48
+
49
+ const weightBuffer = getBuffer(weight);
50
+ let biasBuffer = getBuffer(bias);
51
+ let tempBias = null;
52
+ if (!biasBuffer) {
53
+ const biasSize = channels * bytesPerElement;
54
+ tempBias = acquireBuffer(biasSize, undefined, 'depthwise_conv2d_bias_zero');
55
+ biasBuffer = tempBias;
56
+ const paddedSize = Math.ceil(biasSize / 4) * 4;
57
+ device.queue.writeBuffer(biasBuffer, 0, new Uint8Array(paddedSize));
58
+ }
59
+
60
+ await unifiedKernelWrapper(
61
+ 'depthwise_conv2d',
62
+ target,
63
+ variant,
64
+ [input, weightBuffer, biasBuffer, output],
65
+ {
66
+ channels,
67
+ height,
68
+ width,
69
+ out_height: outHeight,
70
+ out_width: outWidth,
71
+ kernel_h: kernelH,
72
+ kernel_w: kernelW,
73
+ stride,
74
+ pad,
75
+ _pad0: 0,
76
+ _pad1: 0,
77
+ },
78
+ Math.ceil((channels * outHeight * outWidth) / WORKGROUP_SIZES.DEFAULT)
79
+ );
80
+
81
+ if (tempBias) {
82
+ if (recorder) {
83
+ recorder.trackTemporaryBuffer(tempBias);
84
+ } else {
85
+ releaseBuffer(tempBias);
86
+ }
87
+ }
88
+
89
+ return createTensor(output, input.dtype, [channels, outHeight, outWidth], 'depthwise_conv2d_output');
90
+ }
91
+
92
+ export async function runDepthwiseConv2D(input, weight, bias, options = {}) {
93
+ return _depthwiseConv2D(null, input, weight, bias, options);
94
+ }
95
+
96
+ export async function recordDepthwiseConv2D(recorder, input, weight, bias, options = {}) {
97
+ return _depthwiseConv2D(recorder, input, weight, bias, options);
98
+ }
@@ -0,0 +1,58 @@
1
+ override WORKGROUP_SIZE: u32 = 256u;
2
+
3
+ struct Uniforms {
4
+ channels: u32,
5
+ height: u32,
6
+ width: u32,
7
+ out_height: u32,
8
+ out_width: u32,
9
+ kernel_h: u32,
10
+ kernel_w: u32,
11
+ stride: u32,
12
+ pad: u32,
13
+ _pad0: u32,
14
+ _pad1: u32,
15
+ _pad2: u32,
16
+ }
17
+
18
+ @group(0) @binding(0) var<uniform> u: Uniforms;
19
+ @group(0) @binding(1) var<storage, read> input: array<f32>;
20
+ @group(0) @binding(2) var<storage, read> weight: array<f32>;
21
+ @group(0) @binding(3) var<storage, read> bias: array<f32>;
22
+ @group(0) @binding(4) var<storage, read_write> output: array<f32>;
23
+
24
+ @compute @workgroup_size(WORKGROUP_SIZE, 1, 1)
25
+ fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
26
+ let idx = gid.x;
27
+ let out_spatial = u.out_height * u.out_width;
28
+ let out_size = u.channels * out_spatial;
29
+ if (idx >= out_size) {
30
+ return;
31
+ }
32
+
33
+ let channel = idx / out_spatial;
34
+ let rem = idx - channel * out_spatial;
35
+ let out_y = rem / u.out_width;
36
+ let out_x = rem - out_y * u.out_width;
37
+
38
+ var sum: f32 = bias[channel];
39
+ let pad = i32(u.pad);
40
+
41
+ for (var ky: u32 = 0u; ky < u.kernel_h; ky = ky + 1u) {
42
+ let in_y = i32(out_y * u.stride + ky) - pad;
43
+ if (in_y < 0 || in_y >= i32(u.height)) {
44
+ continue;
45
+ }
46
+ for (var kx: u32 = 0u; kx < u.kernel_w; kx = kx + 1u) {
47
+ let in_x = i32(out_x * u.stride + kx) - pad;
48
+ if (in_x < 0 || in_x >= i32(u.width)) {
49
+ continue;
50
+ }
51
+ let input_idx = (channel * u.height + u32(in_y)) * u.width + u32(in_x);
52
+ let weight_idx = ((channel * u.kernel_h + ky) * u.kernel_w + kx);
53
+ sum = sum + input[input_idx] * weight[weight_idx];
54
+ }
55
+ }
56
+
57
+ output[idx] = sum;
58
+ }
@@ -0,0 +1,62 @@
1
+ // Depthwise Conv2D Kernel (NCHW, f16)
2
+
3
+ enable f16;
4
+
5
+ override WORKGROUP_SIZE: u32 = 256u;
6
+
7
+ struct Uniforms {
8
+ channels: u32,
9
+ height: u32,
10
+ width: u32,
11
+ out_height: u32,
12
+ out_width: u32,
13
+ kernel_h: u32,
14
+ kernel_w: u32,
15
+ stride: u32,
16
+ pad: u32,
17
+ _pad0: u32,
18
+ _pad1: u32,
19
+ _pad2: u32,
20
+ }
21
+
22
+ @group(0) @binding(0) var<uniform> u: Uniforms;
23
+ @group(0) @binding(1) var<storage, read> input: array<f16>;
24
+ @group(0) @binding(2) var<storage, read> weight: array<f16>;
25
+ @group(0) @binding(3) var<storage, read> bias: array<f16>;
26
+ @group(0) @binding(4) var<storage, read_write> output: array<f16>;
27
+
28
+ @compute @workgroup_size(WORKGROUP_SIZE, 1, 1)
29
+ fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
30
+ let idx = gid.x;
31
+ let out_spatial = u.out_height * u.out_width;
32
+ let out_size = u.channels * out_spatial;
33
+ if (idx >= out_size) {
34
+ return;
35
+ }
36
+
37
+ let channel = idx / out_spatial;
38
+ let rem = idx - channel * out_spatial;
39
+ let out_y = rem / u.out_width;
40
+ let out_x = rem - out_y * u.out_width;
41
+
42
+ var sum: f32 = f32(bias[channel]);
43
+ let pad = i32(u.pad);
44
+
45
+ for (var ky: u32 = 0u; ky < u.kernel_h; ky = ky + 1u) {
46
+ let in_y = i32(out_y * u.stride + ky) - pad;
47
+ if (in_y < 0 || in_y >= i32(u.height)) {
48
+ continue;
49
+ }
50
+ for (var kx: u32 = 0u; kx < u.kernel_w; kx = kx + 1u) {
51
+ let in_x = i32(out_x * u.stride + kx) - pad;
52
+ if (in_x < 0 || in_x >= i32(u.width)) {
53
+ continue;
54
+ }
55
+ let input_idx = (channel * u.height + u32(in_y)) * u.width + u32(in_x);
56
+ let weight_idx = ((channel * u.kernel_h + ky) * u.kernel_w + kx);
57
+ sum = sum + f32(input[input_idx]) * f32(weight[weight_idx]);
58
+ }
59
+ }
60
+
61
+ output[idx] = f16(sum);
62
+ }