@simulatte/doppler 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/README.md +11 -5
  2. package/package.json +27 -4
  3. package/src/client/doppler-api.browser.d.ts +1 -0
  4. package/src/client/doppler-api.browser.js +288 -0
  5. package/src/client/doppler-api.d.ts +80 -0
  6. package/src/client/doppler-api.js +298 -0
  7. package/src/client/doppler-provider/types.js +1 -1
  8. package/src/client/doppler-registry.d.ts +23 -0
  9. package/src/client/doppler-registry.js +88 -0
  10. package/src/client/doppler-registry.json +39 -0
  11. package/src/config/execution-contract-check.d.ts +82 -0
  12. package/src/config/execution-contract-check.js +317 -0
  13. package/src/config/execution-v0-contract-check.d.ts +94 -0
  14. package/src/config/execution-v0-contract-check.js +251 -0
  15. package/src/config/execution-v0-graph-contract-check.d.ts +20 -0
  16. package/src/config/execution-v0-graph-contract-check.js +64 -0
  17. package/src/config/kernel-path-contract-check.d.ts +76 -0
  18. package/src/config/kernel-path-contract-check.js +479 -0
  19. package/src/config/kernel-path-loader.d.ts +16 -0
  20. package/src/config/kernel-path-loader.js +54 -0
  21. package/src/config/kernels/kernel-ref-digests.js +12 -0
  22. package/src/config/kernels/registry.json +556 -0
  23. package/src/config/loader.js +90 -67
  24. package/src/config/merge-contract-check.d.ts +16 -0
  25. package/src/config/merge-contract-check.js +321 -0
  26. package/src/config/merge-helpers.d.ts +58 -0
  27. package/src/config/merge-helpers.js +54 -0
  28. package/src/config/merge.js +3 -6
  29. package/src/config/presets/models/janus-text.json +27 -0
  30. package/src/config/quantization-contract-check.d.ts +12 -0
  31. package/src/config/quantization-contract-check.js +91 -0
  32. package/src/config/required-inference-fields-contract-check.d.ts +24 -0
  33. package/src/config/required-inference-fields-contract-check.js +231 -0
  34. package/src/config/schema/browser-suite-metrics.schema.d.ts +17 -0
  35. package/src/config/schema/browser-suite-metrics.schema.js +46 -0
  36. package/src/config/schema/conversion-report.schema.d.ts +40 -0
  37. package/src/config/schema/conversion-report.schema.js +108 -0
  38. package/src/config/schema/doppler.schema.js +12 -18
  39. package/src/config/schema/index.d.ts +22 -0
  40. package/src/config/schema/index.js +18 -0
  41. package/src/converter/core.d.ts +10 -0
  42. package/src/converter/core.js +49 -11
  43. package/src/converter/parsers/diffusion.js +63 -3
  44. package/src/converter/tokenizer-utils.js +17 -3
  45. package/src/formats/rdrr/validation.js +13 -0
  46. package/src/gpu/kernels/depthwise_conv2d.d.ts +29 -0
  47. package/src/gpu/kernels/depthwise_conv2d.js +98 -0
  48. package/src/gpu/kernels/depthwise_conv2d.wgsl +58 -0
  49. package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +62 -0
  50. package/src/gpu/kernels/grouped_pointwise_conv2d.d.ts +27 -0
  51. package/src/gpu/kernels/grouped_pointwise_conv2d.js +92 -0
  52. package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +47 -0
  53. package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +51 -0
  54. package/src/gpu/kernels/index.d.ts +30 -0
  55. package/src/gpu/kernels/index.js +25 -0
  56. package/src/gpu/kernels/relu.d.ts +18 -0
  57. package/src/gpu/kernels/relu.js +45 -0
  58. package/src/gpu/kernels/relu.wgsl +21 -0
  59. package/src/gpu/kernels/relu_f16.wgsl +23 -0
  60. package/src/gpu/kernels/repeat_channels.d.ts +21 -0
  61. package/src/gpu/kernels/repeat_channels.js +60 -0
  62. package/src/gpu/kernels/repeat_channels.wgsl +29 -0
  63. package/src/gpu/kernels/repeat_channels_f16.wgsl +31 -0
  64. package/src/gpu/kernels/sana_linear_attention.d.ts +27 -0
  65. package/src/gpu/kernels/sana_linear_attention.js +122 -0
  66. package/src/gpu/kernels/sana_linear_attention_apply.wgsl +44 -0
  67. package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +47 -0
  68. package/src/gpu/kernels/sana_linear_attention_summary.wgsl +47 -0
  69. package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +49 -0
  70. package/src/index-browser.d.ts +1 -0
  71. package/src/index-browser.js +2 -1
  72. package/src/index.d.ts +1 -0
  73. package/src/index.js +2 -1
  74. package/src/inference/browser-harness.js +164 -38
  75. package/src/inference/pipelines/diffusion/init.js +14 -0
  76. package/src/inference/pipelines/diffusion/pipeline.js +206 -77
  77. package/src/inference/pipelines/diffusion/sana-transformer.d.ts +53 -0
  78. package/src/inference/pipelines/diffusion/sana-transformer.js +738 -0
  79. package/src/inference/pipelines/diffusion/scheduler.d.ts +17 -1
  80. package/src/inference/pipelines/diffusion/scheduler.js +91 -3
  81. package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +6 -4
  82. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +270 -0
  83. package/src/inference/pipelines/diffusion/text-encoder.js +18 -1
  84. package/src/inference/pipelines/diffusion/types.d.ts +4 -0
  85. package/src/inference/pipelines/diffusion/vae.js +782 -78
  86. package/src/inference/pipelines/text/config.d.ts +5 -0
  87. package/src/inference/pipelines/text/config.js +1 -1
  88. package/src/inference/pipelines/text/execution-v0.js +141 -101
  89. package/src/inference/pipelines/text/init.js +41 -10
  90. package/src/inference/pipelines/text.js +7 -1
  91. package/src/rules/execution-rules-contract-check.d.ts +17 -0
  92. package/src/rules/execution-rules-contract-check.js +245 -0
  93. package/src/rules/kernels/depthwise-conv2d.rules.json +6 -0
  94. package/src/rules/kernels/grouped-pointwise-conv2d.rules.json +6 -0
  95. package/src/rules/kernels/relu.rules.json +6 -0
  96. package/src/rules/kernels/repeat-channels.rules.json +6 -0
  97. package/src/rules/kernels/sana-linear-attention.rules.json +6 -0
  98. package/src/rules/layer-pattern-contract-check.d.ts +17 -0
  99. package/src/rules/layer-pattern-contract-check.js +231 -0
  100. package/src/rules/rule-registry.d.ts +28 -0
  101. package/src/rules/rule-registry.js +38 -0
  102. package/src/tooling/conversion-config-materializer.d.ts +24 -0
  103. package/src/tooling/conversion-config-materializer.js +99 -0
  104. package/src/tooling/lean-execution-contract-runner.d.ts +43 -0
  105. package/src/tooling/lean-execution-contract-runner.js +158 -0
  106. package/src/tooling/lean-execution-contract.d.ts +16 -0
  107. package/src/tooling/lean-execution-contract.js +81 -0
  108. package/src/tooling/node-convert.d.ts +10 -0
  109. package/src/tooling/node-converter.js +59 -0
  110. package/src/tooling/node-webgpu.js +30 -9
  111. package/src/version.d.ts +2 -0
  112. package/src/version.js +2 -0
  113. package/tools/convert-safetensors-node.js +47 -0
  114. package/tools/doppler-cli.js +167 -6
@@ -15,10 +15,16 @@ import {
15
15
  getActiveKernelPathSource,
16
16
  getActiveKernelPathPolicy,
17
17
  } from '../config/kernel-path-loader.js';
18
- import { selectRuleValue } from '../rules/rule-registry.js';
18
+ import {
19
+ getInferenceLayerPatternContractArtifact,
20
+ selectRuleValue,
21
+ } from '../rules/rule-registry.js';
19
22
  import { mergeRuntimeValues } from '../config/runtime-merge.js';
20
23
  import { isPlainObject } from '../utils/plain-object.js';
24
+ import { validateBrowserSuiteMetrics } from '../config/schema/browser-suite-metrics.schema.js';
21
25
  import { validateTrainingMetricsReport } from '../config/schema/training-metrics.schema.js';
26
+ import { buildExecutionContractArtifact } from '../config/execution-contract-check.js';
27
+ import { buildManifestRequiredInferenceFieldsArtifact } from '../config/required-inference-fields-contract-check.js';
22
28
 
23
29
  const TRAINING_SUITE_MODULE_PATH = '../training/suite.js';
24
30
  const NODE_SOURCE_RUNTIME_MODULE_PATH = '../tooling/node-source-runtime.js';
@@ -41,6 +47,29 @@ async function runTrainingBenchSuite(options = {}) {
41
47
  return module.runTrainingBenchSuite(options);
42
48
  }
43
49
 
50
+ function buildSuiteContractMetrics(suite, baseMetrics, manifest) {
51
+ const executionContractArtifact = buildExecutionContractArtifact(manifest);
52
+ const executionV0GraphContractArtifact = executionContractArtifact?.executionV0?.graph ?? null;
53
+ const layerPatternContractArtifact = getInferenceLayerPatternContractArtifact();
54
+ const requiredInferenceFieldsArtifact = manifest?.modelType === 'transformer'
55
+ && isPlainObject(manifest?.inference?.attention)
56
+ ? buildManifestRequiredInferenceFieldsArtifact(
57
+ manifest?.inference ?? null,
58
+ `${manifest?.modelId ?? 'unknown'}.inference`
59
+ )
60
+ : null;
61
+ return validateBrowserSuiteMetrics({
62
+ ...baseMetrics,
63
+ schemaVersion: 1,
64
+ source: 'doppler',
65
+ suite,
66
+ ...(executionContractArtifact ? { executionContractArtifact } : {}),
67
+ executionV0GraphContractArtifact,
68
+ layerPatternContractArtifact,
69
+ requiredInferenceFieldsArtifact,
70
+ });
71
+ }
72
+
44
73
  function parseReportTimestamp(rawTimestamp, label = 'timestamp') {
45
74
  if (rawTimestamp == null) {
46
75
  return null;
@@ -958,6 +987,22 @@ async function runKernelSuite(options = {}) {
958
987
 
959
988
 
960
989
  const DEFAULT_HARNESS_PROMPT = 'Summarize this input in one sentence.';
990
+ const DEFAULT_RUNTIME_PLACEHOLDER_PROMPT = 'Hello from Doppler.';
991
+ const DEFAULT_TRANSLATEGEMMA_PROMPT = Object.freeze({
992
+ messages: Object.freeze([
993
+ Object.freeze({
994
+ role: 'user',
995
+ content: Object.freeze([
996
+ Object.freeze({
997
+ type: 'text',
998
+ source_lang_code: 'en',
999
+ target_lang_code: 'fr',
1000
+ text: 'Hello world.',
1001
+ }),
1002
+ ]),
1003
+ }),
1004
+ ]),
1005
+ });
961
1006
  const DEFAULT_HARNESS_MAX_TOKENS = 32;
962
1007
  const EMBEDDING_PREVIEW_LENGTH = 16;
963
1008
  const EMBEDDING_SEMANTIC_MIN_RETRIEVAL_TOP1 = 0.67;
@@ -1205,24 +1250,90 @@ function isStructuredPromptInput(value) {
1205
1250
  return Array.isArray(value) || (value != null && typeof value === 'object');
1206
1251
  }
1207
1252
 
1208
- function resolveGenerationPromptInput(runtimeConfig, runOverrides = null) {
1253
+ function clonePromptInput(promptInput) {
1254
+ if (!isStructuredPromptInput(promptInput)) {
1255
+ return promptInput;
1256
+ }
1257
+ if (typeof structuredClone === 'function') {
1258
+ return structuredClone(promptInput);
1259
+ }
1260
+ return JSON.parse(JSON.stringify(promptInput));
1261
+ }
1262
+
1263
+ function resolvePromptTemplateType(source) {
1264
+ const sourceTemplateType = asText(source?.chatTemplateType);
1265
+ if (sourceTemplateType) {
1266
+ return sourceTemplateType;
1267
+ }
1268
+ const modelConfigTemplateType = asText(source?.modelConfig?.chatTemplateType);
1269
+ if (modelConfigTemplateType) {
1270
+ return modelConfigTemplateType;
1271
+ }
1272
+ return asText(source?.manifest?.inference?.chatTemplate?.type);
1273
+ }
1274
+
1275
+ function buildDefaultGenerationPrompt(templateType) {
1276
+ if (templateType === 'translategemma') {
1277
+ return clonePromptInput(DEFAULT_TRANSLATEGEMMA_PROMPT);
1278
+ }
1279
+ return DEFAULT_HARNESS_PROMPT;
1280
+ }
1281
+
1282
+ function shouldPreferModelDefaultPrompt(runtimePrompt, templateType) {
1283
+ if (templateType !== 'translategemma') {
1284
+ return false;
1285
+ }
1286
+ if (typeof runtimePrompt !== 'string') {
1287
+ return false;
1288
+ }
1289
+ return runtimePrompt.trim() === DEFAULT_RUNTIME_PLACEHOLDER_PROMPT;
1290
+ }
1291
+
1292
+ function describePromptInput(promptInput) {
1293
+ if (typeof promptInput === 'string') {
1294
+ return promptInput.trim() || DEFAULT_HARNESS_PROMPT;
1295
+ }
1296
+ const firstMessage = Array.isArray(promptInput?.messages)
1297
+ ? promptInput.messages[0]
1298
+ : null;
1299
+ const firstContent = Array.isArray(firstMessage?.content)
1300
+ ? firstMessage.content[0]
1301
+ : null;
1302
+ const sourceLang = asText(firstContent?.source_lang_code);
1303
+ const targetLang = asText(firstContent?.target_lang_code);
1304
+ const text = asText(firstContent?.text);
1305
+ if (sourceLang && targetLang) {
1306
+ return `${sourceLang} -> ${targetLang}: ${text || '[non-text request]'}`;
1307
+ }
1308
+ try {
1309
+ return JSON.stringify(promptInput);
1310
+ } catch {
1311
+ return '[structured prompt]';
1312
+ }
1313
+ }
1314
+
1315
+ function resolveGenerationPromptInput(runtimeConfig, runOverrides = null, source = null) {
1316
+ const templateType = resolvePromptTemplateType(source);
1209
1317
  const overridePrompt = runOverrides?.prompt;
1210
1318
  if (typeof overridePrompt === 'string' && overridePrompt.trim()) {
1211
1319
  return overridePrompt.trim();
1212
1320
  }
1213
1321
  if (isStructuredPromptInput(overridePrompt)) {
1214
- return overridePrompt;
1322
+ return clonePromptInput(overridePrompt);
1215
1323
  }
1216
1324
 
1217
1325
  const runtimePrompt = runtimeConfig?.inference?.prompt;
1326
+ if (shouldPreferModelDefaultPrompt(runtimePrompt, templateType)) {
1327
+ return buildDefaultGenerationPrompt(templateType);
1328
+ }
1218
1329
  if (typeof runtimePrompt === 'string' && runtimePrompt.trim()) {
1219
1330
  return runtimePrompt.trim();
1220
1331
  }
1221
1332
  if (isStructuredPromptInput(runtimePrompt)) {
1222
- return runtimePrompt;
1333
+ return clonePromptInput(runtimePrompt);
1223
1334
  }
1224
1335
 
1225
- return DEFAULT_HARNESS_PROMPT;
1336
+ return buildDefaultGenerationPrompt(templateType);
1226
1337
  }
1227
1338
 
1228
1339
  function resolveMaxTokens(runtimeConfig) {
@@ -1233,7 +1344,7 @@ function resolveMaxTokens(runtimeConfig) {
1233
1344
  return DEFAULT_HARNESS_MAX_TOKENS;
1234
1345
  }
1235
1346
 
1236
- function resolveBenchmarkRunSettings(runtimeConfig) {
1347
+ function resolveBenchmarkRunSettings(runtimeConfig, source = null) {
1237
1348
  const benchConfig = runtimeConfig?.shared?.benchmark?.run || {};
1238
1349
  const runtimeSampling = isPlainObject(runtimeConfig?.inference?.sampling)
1239
1350
  ? runtimeConfig.inference.sampling
@@ -1241,9 +1352,9 @@ function resolveBenchmarkRunSettings(runtimeConfig) {
1241
1352
  const benchSampling = isPlainObject(benchConfig?.sampling)
1242
1353
  ? benchConfig.sampling
1243
1354
  : {};
1244
- const prompt = typeof benchConfig.customPrompt === 'string' && benchConfig.customPrompt.trim()
1355
+ const promptInput = typeof benchConfig.customPrompt === 'string' && benchConfig.customPrompt.trim()
1245
1356
  ? benchConfig.customPrompt.trim()
1246
- : resolvePrompt(runtimeConfig);
1357
+ : resolveGenerationPromptInput(runtimeConfig, null, source);
1247
1358
  const maxTokens = Number.isFinite(benchConfig.maxNewTokens)
1248
1359
  ? Math.max(1, Math.floor(benchConfig.maxNewTokens))
1249
1360
  : resolveMaxTokens(runtimeConfig);
@@ -1251,7 +1362,8 @@ function resolveBenchmarkRunSettings(runtimeConfig) {
1251
1362
  return {
1252
1363
  warmupRuns: Math.max(0, Math.floor(benchConfig.warmupRuns ?? 0)),
1253
1364
  timedRuns: Math.max(1, Math.floor(benchConfig.timedRuns ?? 1)),
1254
- prompt,
1365
+ prompt: promptInput,
1366
+ promptLabel: describePromptInput(promptInput),
1255
1367
  maxTokens,
1256
1368
  sampling: {
1257
1369
  ...runtimeSampling,
@@ -1465,7 +1577,8 @@ function isCoherentOutput(tokens, output) {
1465
1577
  async function runGeneration(pipeline, runtimeConfig, runOverrides = null) {
1466
1578
  const tokens = [];
1467
1579
  const tokenIds = [];
1468
- const promptInput = resolveGenerationPromptInput(runtimeConfig, runOverrides);
1580
+ const promptInput = resolveGenerationPromptInput(runtimeConfig, runOverrides, pipeline);
1581
+ const promptLabel = describePromptInput(promptInput);
1469
1582
  const useChatTemplate = runOverrides?.useChatTemplate
1470
1583
  ?? runtimeConfig?.inference?.chatTemplate?.enabled
1471
1584
  ?? (isStructuredPromptInput(promptInput) ? true : undefined);
@@ -1532,7 +1645,8 @@ async function runGeneration(pipeline, runtimeConfig, runOverrides = null) {
1532
1645
  : null;
1533
1646
 
1534
1647
  return {
1535
- prompt: promptInput,
1648
+ prompt: promptLabel,
1649
+ promptInput,
1536
1650
  maxTokens,
1537
1651
  tokens,
1538
1652
  tokenIds,
@@ -1739,6 +1853,11 @@ async function runInferenceSuite(options = {}) {
1739
1853
  source: 'doppler',
1740
1854
  prefillSemantics: 'internal_prefill_phase',
1741
1855
  });
1856
+ const metricsWithContracts = buildSuiteContractMetrics(
1857
+ options.suiteName || 'inference',
1858
+ metrics,
1859
+ harness.manifest
1860
+ );
1742
1861
  return {
1743
1862
  ...summary,
1744
1863
  modelId: options.modelId || harness.manifest?.modelId || 'unknown',
@@ -1756,7 +1875,7 @@ async function runInferenceSuite(options = {}) {
1756
1875
  timing,
1757
1876
  timingDiagnostics,
1758
1877
  output,
1759
- metrics,
1878
+ metrics: metricsWithContracts,
1760
1879
  memoryStats,
1761
1880
  deviceInfo: resolveDeviceInfo(),
1762
1881
  pipeline: options.keepPipeline ? harness.pipeline : null,
@@ -1766,9 +1885,9 @@ async function runInferenceSuite(options = {}) {
1766
1885
  async function runBenchSuite(options = {}) {
1767
1886
  const startTime = performance.now();
1768
1887
  const runtimeConfig = getRuntimeConfig();
1769
- const benchRun = resolveBenchmarkRunSettings(runtimeConfig);
1770
- const warmupRuns = benchRun.warmupRuns;
1771
- const timedRuns = benchRun.timedRuns;
1888
+ const defaultBenchRun = resolveBenchmarkRunSettings(runtimeConfig);
1889
+ const warmupRuns = defaultBenchRun.warmupRuns;
1890
+ const timedRuns = defaultBenchRun.timedRuns;
1772
1891
  const cacheMode = normalizeCacheMode(options.cacheMode);
1773
1892
  const loadMode = normalizeLoadMode(options.loadMode, !options.modelUrl);
1774
1893
  const workloadType = normalizeWorkloadType(options.workloadType);
@@ -1776,7 +1895,7 @@ async function runBenchSuite(options = {}) {
1776
1895
  if (workloadType === 'training') {
1777
1896
  const trainingBench = await runTrainingBenchSuite({
1778
1897
  ...options,
1779
- benchRun,
1898
+ benchRun: defaultBenchRun,
1780
1899
  workloadType,
1781
1900
  });
1782
1901
  const trainingReport = trainingBench?.metrics?.trainingMetricsReport;
@@ -1858,6 +1977,7 @@ async function runBenchSuite(options = {}) {
1858
1977
  }
1859
1978
 
1860
1979
  const harness = await initializeSuiteModel(options);
1980
+ const benchRun = resolveBenchmarkRunSettings(runtimeConfig, harness.pipeline ?? harness);
1861
1981
  const modelType = harness.manifest?.modelType || 'transformer';
1862
1982
  const safeModelLoadMs = toTimingNumber(harness.modelLoadMs, 0);
1863
1983
 
@@ -1927,7 +2047,7 @@ async function runBenchSuite(options = {}) {
1927
2047
  validRuns: durations.length,
1928
2048
  invalidRuns,
1929
2049
  invalidRatePct: Number((timedRuns > 0 ? (invalidRuns / timedRuns) * 100 : 0).toFixed(2)),
1930
- prompt: benchRun.prompt,
2050
+ prompt: benchRun.promptLabel,
1931
2051
  embeddingDim: Math.round(embeddingDims.reduce((a, b) => a + b, 0) / (embeddingDims.length || 1)),
1932
2052
  nonFiniteValues: totalNonFiniteValues,
1933
2053
  firstTimedEmbeddingMs: Number((firstTimedEmbeddingMs ?? 0).toFixed(2)),
@@ -2058,7 +2178,7 @@ async function runBenchSuite(options = {}) {
2058
2178
  metrics = {
2059
2179
  warmupRuns,
2060
2180
  timedRuns,
2061
- prompt: benchRun.prompt,
2181
+ prompt: benchRun.promptLabel,
2062
2182
  maxTokens: benchRun.maxTokens,
2063
2183
  decodeTokensPerSec: sampleTimingNumber(decodeTokensPerSecStats, 'median'),
2064
2184
  avgTokensGenerated: Math.round(tokensGeneratedStats.mean),
@@ -2132,6 +2252,7 @@ async function runBenchSuite(options = {}) {
2132
2252
  source: 'doppler',
2133
2253
  prefillSemantics: 'internal_prefill_phase',
2134
2254
  });
2255
+ const metricsWithContracts = buildSuiteContractMetrics('bench', metrics, harness.manifest);
2135
2256
  return {
2136
2257
  ...summary,
2137
2258
  modelId: options.modelId || harness.manifest?.modelId || 'unknown',
@@ -2149,7 +2270,7 @@ async function runBenchSuite(options = {}) {
2149
2270
  timing,
2150
2271
  timingDiagnostics,
2151
2272
  output,
2152
- metrics,
2273
+ metrics: metricsWithContracts,
2153
2274
  memoryStats,
2154
2275
  deviceInfo: resolveDeviceInfo(),
2155
2276
  pipeline: options.keepPipeline ? harness.pipeline : null,
@@ -2310,25 +2431,9 @@ async function runDiffusionSuite(options = {}) {
2310
2431
  source: 'doppler',
2311
2432
  prefillSemantics: 'internal_prefill_phase',
2312
2433
  });
2313
-
2314
- return {
2315
- ...summary,
2316
- modelId: options.modelId || harness.manifest?.modelId || 'unknown',
2317
- cacheMode,
2318
- loadMode,
2319
- env: {
2320
- library: 'doppler',
2321
- runtime: 'browser',
2322
- device: 'webgpu',
2323
- browserUserAgent: typeof navigator !== 'undefined' ? (navigator.userAgent || null) : null,
2324
- browserPlatform: typeof navigator !== 'undefined' ? (navigator.platform || null) : null,
2325
- browserLanguage: typeof navigator !== 'undefined' ? (navigator.language || null) : null,
2326
- browserVendor: typeof navigator !== 'undefined' ? (navigator.vendor || null) : null,
2327
- },
2328
- timing,
2329
- timingDiagnostics,
2330
- output,
2331
- metrics: {
2434
+ const metricsWithContracts = buildSuiteContractMetrics(
2435
+ 'diffusion',
2436
+ {
2332
2437
  warmupRuns,
2333
2438
  timedRuns,
2334
2439
  width,
@@ -2353,6 +2458,27 @@ async function runDiffusionSuite(options = {}) {
2353
2458
  gpu: gpuStats,
2354
2459
  performanceArtifact: diffusionPerformanceArtifact,
2355
2460
  },
2461
+ harness.manifest
2462
+ );
2463
+
2464
+ return {
2465
+ ...summary,
2466
+ modelId: options.modelId || harness.manifest?.modelId || 'unknown',
2467
+ cacheMode,
2468
+ loadMode,
2469
+ env: {
2470
+ library: 'doppler',
2471
+ runtime: 'browser',
2472
+ device: 'webgpu',
2473
+ browserUserAgent: typeof navigator !== 'undefined' ? (navigator.userAgent || null) : null,
2474
+ browserPlatform: typeof navigator !== 'undefined' ? (navigator.platform || null) : null,
2475
+ browserLanguage: typeof navigator !== 'undefined' ? (navigator.language || null) : null,
2476
+ browserVendor: typeof navigator !== 'undefined' ? (navigator.vendor || null) : null,
2477
+ },
2478
+ timing,
2479
+ timingDiagnostics,
2480
+ output,
2481
+ metrics: metricsWithContracts,
2356
2482
  memoryStats,
2357
2483
  deviceInfo: resolveDeviceInfo(),
2358
2484
  pipeline: options.keepPipeline ? harness.pipeline : null,
@@ -1,5 +1,7 @@
1
1
  import { DEFAULT_DIFFUSION_CONFIG } from '../../../config/schema/index.js';
2
2
 
3
+ const SUPPORTED_DIFFUSION_RUNTIME_LAYOUTS = new Set(['sd3', 'flux', 'sana']);
4
+
3
5
  function mergeSection(base, override) {
4
6
  if (!override) return { ...base };
5
7
  return { ...base, ...override };
@@ -38,6 +40,9 @@ function resolveSchedulerType(modelScheduler, runtimeScheduler) {
38
40
  if (modelClass === 'FlowMatchEulerDiscreteScheduler') {
39
41
  return 'flowmatch_euler';
40
42
  }
43
+ if (modelClass === 'SCMScheduler') {
44
+ return 'scm';
45
+ }
41
46
  if (modelClass === 'EulerDiscreteScheduler') {
42
47
  return 'euler';
43
48
  }
@@ -58,6 +63,8 @@ function mergeSchedulerConfig(modelConfig, runtimeScheduler) {
58
63
  type,
59
64
  numTrainTimesteps: modelScheduler.num_train_timesteps ?? runtimeScheduler.numTrainTimesteps,
60
65
  shift: modelScheduler.shift ?? runtimeScheduler.shift,
66
+ predictionType: modelScheduler.prediction_type ?? runtimeScheduler.predictionType,
67
+ sigmaData: modelScheduler.sigma_data ?? runtimeScheduler.sigmaData,
61
68
  };
62
69
  }
63
70
 
@@ -95,6 +102,13 @@ export function initializeDiffusion(manifest, runtimeConfig) {
95
102
  }
96
103
  throw new Error('Diffusion manifest missing config.diffusion model contract.');
97
104
  }
105
+ const layout = modelConfig.layout;
106
+ if (layout && !SUPPORTED_DIFFUSION_RUNTIME_LAYOUTS.has(layout)) {
107
+ throw new Error(
108
+ `Diffusion layout "${layout}" is recognized in the manifest, but the GPU runtime is not implemented yet. ` +
109
+ 'Supported runtime layouts: sd3, flux, sana.'
110
+ );
111
+ }
98
112
 
99
113
  const runtimeBase = mergeDiffusionConfig(DEFAULT_DIFFUSION_CONFIG, runtimeConfig?.inference?.diffusion);
100
114
  const runtime = {