@simulatte/doppler 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -5
- package/package.json +27 -4
- package/src/client/doppler-api.browser.d.ts +1 -0
- package/src/client/doppler-api.browser.js +288 -0
- package/src/client/doppler-api.d.ts +80 -0
- package/src/client/doppler-api.js +298 -0
- package/src/client/doppler-provider/types.js +1 -1
- package/src/client/doppler-registry.d.ts +23 -0
- package/src/client/doppler-registry.js +88 -0
- package/src/client/doppler-registry.json +39 -0
- package/src/config/execution-contract-check.d.ts +82 -0
- package/src/config/execution-contract-check.js +317 -0
- package/src/config/execution-v0-contract-check.d.ts +94 -0
- package/src/config/execution-v0-contract-check.js +251 -0
- package/src/config/execution-v0-graph-contract-check.d.ts +20 -0
- package/src/config/execution-v0-graph-contract-check.js +64 -0
- package/src/config/kernel-path-contract-check.d.ts +76 -0
- package/src/config/kernel-path-contract-check.js +479 -0
- package/src/config/kernel-path-loader.d.ts +16 -0
- package/src/config/kernel-path-loader.js +54 -0
- package/src/config/kernels/kernel-ref-digests.js +12 -0
- package/src/config/kernels/registry.json +556 -0
- package/src/config/loader.js +90 -67
- package/src/config/merge-contract-check.d.ts +16 -0
- package/src/config/merge-contract-check.js +321 -0
- package/src/config/merge-helpers.d.ts +58 -0
- package/src/config/merge-helpers.js +54 -0
- package/src/config/merge.js +3 -6
- package/src/config/presets/models/janus-text.json +27 -0
- package/src/config/quantization-contract-check.d.ts +12 -0
- package/src/config/quantization-contract-check.js +91 -0
- package/src/config/required-inference-fields-contract-check.d.ts +24 -0
- package/src/config/required-inference-fields-contract-check.js +231 -0
- package/src/config/schema/browser-suite-metrics.schema.d.ts +17 -0
- package/src/config/schema/browser-suite-metrics.schema.js +46 -0
- package/src/config/schema/conversion-report.schema.d.ts +40 -0
- package/src/config/schema/conversion-report.schema.js +108 -0
- package/src/config/schema/doppler.schema.js +12 -18
- package/src/config/schema/index.d.ts +22 -0
- package/src/config/schema/index.js +18 -0
- package/src/converter/core.d.ts +10 -0
- package/src/converter/core.js +49 -11
- package/src/converter/parsers/diffusion.js +63 -3
- package/src/converter/tokenizer-utils.js +17 -3
- package/src/formats/rdrr/validation.js +13 -0
- package/src/gpu/kernels/depthwise_conv2d.d.ts +29 -0
- package/src/gpu/kernels/depthwise_conv2d.js +98 -0
- package/src/gpu/kernels/depthwise_conv2d.wgsl +58 -0
- package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +62 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d.d.ts +27 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +92 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +47 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +51 -0
- package/src/gpu/kernels/index.d.ts +30 -0
- package/src/gpu/kernels/index.js +25 -0
- package/src/gpu/kernels/relu.d.ts +18 -0
- package/src/gpu/kernels/relu.js +45 -0
- package/src/gpu/kernels/relu.wgsl +21 -0
- package/src/gpu/kernels/relu_f16.wgsl +23 -0
- package/src/gpu/kernels/repeat_channels.d.ts +21 -0
- package/src/gpu/kernels/repeat_channels.js +60 -0
- package/src/gpu/kernels/repeat_channels.wgsl +29 -0
- package/src/gpu/kernels/repeat_channels_f16.wgsl +31 -0
- package/src/gpu/kernels/sana_linear_attention.d.ts +27 -0
- package/src/gpu/kernels/sana_linear_attention.js +122 -0
- package/src/gpu/kernels/sana_linear_attention_apply.wgsl +44 -0
- package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +47 -0
- package/src/gpu/kernels/sana_linear_attention_summary.wgsl +47 -0
- package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +49 -0
- package/src/index-browser.d.ts +1 -0
- package/src/index-browser.js +2 -1
- package/src/index.d.ts +1 -0
- package/src/index.js +2 -1
- package/src/inference/browser-harness.js +164 -38
- package/src/inference/pipelines/diffusion/init.js +14 -0
- package/src/inference/pipelines/diffusion/pipeline.js +206 -77
- package/src/inference/pipelines/diffusion/sana-transformer.d.ts +53 -0
- package/src/inference/pipelines/diffusion/sana-transformer.js +738 -0
- package/src/inference/pipelines/diffusion/scheduler.d.ts +17 -1
- package/src/inference/pipelines/diffusion/scheduler.js +91 -3
- package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +6 -4
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +270 -0
- package/src/inference/pipelines/diffusion/text-encoder.js +18 -1
- package/src/inference/pipelines/diffusion/types.d.ts +4 -0
- package/src/inference/pipelines/diffusion/vae.js +782 -78
- package/src/inference/pipelines/text/config.d.ts +5 -0
- package/src/inference/pipelines/text/config.js +1 -1
- package/src/inference/pipelines/text/execution-v0.js +141 -101
- package/src/inference/pipelines/text/init.js +41 -10
- package/src/inference/pipelines/text.js +7 -1
- package/src/rules/execution-rules-contract-check.d.ts +17 -0
- package/src/rules/execution-rules-contract-check.js +245 -0
- package/src/rules/kernels/depthwise-conv2d.rules.json +6 -0
- package/src/rules/kernels/grouped-pointwise-conv2d.rules.json +6 -0
- package/src/rules/kernels/relu.rules.json +6 -0
- package/src/rules/kernels/repeat-channels.rules.json +6 -0
- package/src/rules/kernels/sana-linear-attention.rules.json +6 -0
- package/src/rules/layer-pattern-contract-check.d.ts +17 -0
- package/src/rules/layer-pattern-contract-check.js +231 -0
- package/src/rules/rule-registry.d.ts +28 -0
- package/src/rules/rule-registry.js +38 -0
- package/src/tooling/conversion-config-materializer.d.ts +24 -0
- package/src/tooling/conversion-config-materializer.js +99 -0
- package/src/tooling/lean-execution-contract-runner.d.ts +43 -0
- package/src/tooling/lean-execution-contract-runner.js +158 -0
- package/src/tooling/lean-execution-contract.d.ts +16 -0
- package/src/tooling/lean-execution-contract.js +81 -0
- package/src/tooling/node-convert.d.ts +10 -0
- package/src/tooling/node-converter.js +59 -0
- package/src/tooling/node-webgpu.js +30 -9
- package/src/version.d.ts +2 -0
- package/src/version.js +2 -0
- package/tools/convert-safetensors-node.js +47 -0
- package/tools/doppler-cli.js +167 -6
|
@@ -15,10 +15,16 @@ import {
|
|
|
15
15
|
getActiveKernelPathSource,
|
|
16
16
|
getActiveKernelPathPolicy,
|
|
17
17
|
} from '../config/kernel-path-loader.js';
|
|
18
|
-
import {
|
|
18
|
+
import {
|
|
19
|
+
getInferenceLayerPatternContractArtifact,
|
|
20
|
+
selectRuleValue,
|
|
21
|
+
} from '../rules/rule-registry.js';
|
|
19
22
|
import { mergeRuntimeValues } from '../config/runtime-merge.js';
|
|
20
23
|
import { isPlainObject } from '../utils/plain-object.js';
|
|
24
|
+
import { validateBrowserSuiteMetrics } from '../config/schema/browser-suite-metrics.schema.js';
|
|
21
25
|
import { validateTrainingMetricsReport } from '../config/schema/training-metrics.schema.js';
|
|
26
|
+
import { buildExecutionContractArtifact } from '../config/execution-contract-check.js';
|
|
27
|
+
import { buildManifestRequiredInferenceFieldsArtifact } from '../config/required-inference-fields-contract-check.js';
|
|
22
28
|
|
|
23
29
|
const TRAINING_SUITE_MODULE_PATH = '../training/suite.js';
|
|
24
30
|
const NODE_SOURCE_RUNTIME_MODULE_PATH = '../tooling/node-source-runtime.js';
|
|
@@ -41,6 +47,29 @@ async function runTrainingBenchSuite(options = {}) {
|
|
|
41
47
|
return module.runTrainingBenchSuite(options);
|
|
42
48
|
}
|
|
43
49
|
|
|
50
|
+
function buildSuiteContractMetrics(suite, baseMetrics, manifest) {
|
|
51
|
+
const executionContractArtifact = buildExecutionContractArtifact(manifest);
|
|
52
|
+
const executionV0GraphContractArtifact = executionContractArtifact?.executionV0?.graph ?? null;
|
|
53
|
+
const layerPatternContractArtifact = getInferenceLayerPatternContractArtifact();
|
|
54
|
+
const requiredInferenceFieldsArtifact = manifest?.modelType === 'transformer'
|
|
55
|
+
&& isPlainObject(manifest?.inference?.attention)
|
|
56
|
+
? buildManifestRequiredInferenceFieldsArtifact(
|
|
57
|
+
manifest?.inference ?? null,
|
|
58
|
+
`${manifest?.modelId ?? 'unknown'}.inference`
|
|
59
|
+
)
|
|
60
|
+
: null;
|
|
61
|
+
return validateBrowserSuiteMetrics({
|
|
62
|
+
...baseMetrics,
|
|
63
|
+
schemaVersion: 1,
|
|
64
|
+
source: 'doppler',
|
|
65
|
+
suite,
|
|
66
|
+
...(executionContractArtifact ? { executionContractArtifact } : {}),
|
|
67
|
+
executionV0GraphContractArtifact,
|
|
68
|
+
layerPatternContractArtifact,
|
|
69
|
+
requiredInferenceFieldsArtifact,
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
|
|
44
73
|
function parseReportTimestamp(rawTimestamp, label = 'timestamp') {
|
|
45
74
|
if (rawTimestamp == null) {
|
|
46
75
|
return null;
|
|
@@ -958,6 +987,22 @@ async function runKernelSuite(options = {}) {
|
|
|
958
987
|
|
|
959
988
|
|
|
960
989
|
const DEFAULT_HARNESS_PROMPT = 'Summarize this input in one sentence.';
|
|
990
|
+
const DEFAULT_RUNTIME_PLACEHOLDER_PROMPT = 'Hello from Doppler.';
|
|
991
|
+
const DEFAULT_TRANSLATEGEMMA_PROMPT = Object.freeze({
|
|
992
|
+
messages: Object.freeze([
|
|
993
|
+
Object.freeze({
|
|
994
|
+
role: 'user',
|
|
995
|
+
content: Object.freeze([
|
|
996
|
+
Object.freeze({
|
|
997
|
+
type: 'text',
|
|
998
|
+
source_lang_code: 'en',
|
|
999
|
+
target_lang_code: 'fr',
|
|
1000
|
+
text: 'Hello world.',
|
|
1001
|
+
}),
|
|
1002
|
+
]),
|
|
1003
|
+
}),
|
|
1004
|
+
]),
|
|
1005
|
+
});
|
|
961
1006
|
const DEFAULT_HARNESS_MAX_TOKENS = 32;
|
|
962
1007
|
const EMBEDDING_PREVIEW_LENGTH = 16;
|
|
963
1008
|
const EMBEDDING_SEMANTIC_MIN_RETRIEVAL_TOP1 = 0.67;
|
|
@@ -1205,24 +1250,90 @@ function isStructuredPromptInput(value) {
|
|
|
1205
1250
|
return Array.isArray(value) || (value != null && typeof value === 'object');
|
|
1206
1251
|
}
|
|
1207
1252
|
|
|
1208
|
-
function
|
|
1253
|
+
function clonePromptInput(promptInput) {
|
|
1254
|
+
if (!isStructuredPromptInput(promptInput)) {
|
|
1255
|
+
return promptInput;
|
|
1256
|
+
}
|
|
1257
|
+
if (typeof structuredClone === 'function') {
|
|
1258
|
+
return structuredClone(promptInput);
|
|
1259
|
+
}
|
|
1260
|
+
return JSON.parse(JSON.stringify(promptInput));
|
|
1261
|
+
}
|
|
1262
|
+
|
|
1263
|
+
function resolvePromptTemplateType(source) {
|
|
1264
|
+
const sourceTemplateType = asText(source?.chatTemplateType);
|
|
1265
|
+
if (sourceTemplateType) {
|
|
1266
|
+
return sourceTemplateType;
|
|
1267
|
+
}
|
|
1268
|
+
const modelConfigTemplateType = asText(source?.modelConfig?.chatTemplateType);
|
|
1269
|
+
if (modelConfigTemplateType) {
|
|
1270
|
+
return modelConfigTemplateType;
|
|
1271
|
+
}
|
|
1272
|
+
return asText(source?.manifest?.inference?.chatTemplate?.type);
|
|
1273
|
+
}
|
|
1274
|
+
|
|
1275
|
+
function buildDefaultGenerationPrompt(templateType) {
|
|
1276
|
+
if (templateType === 'translategemma') {
|
|
1277
|
+
return clonePromptInput(DEFAULT_TRANSLATEGEMMA_PROMPT);
|
|
1278
|
+
}
|
|
1279
|
+
return DEFAULT_HARNESS_PROMPT;
|
|
1280
|
+
}
|
|
1281
|
+
|
|
1282
|
+
function shouldPreferModelDefaultPrompt(runtimePrompt, templateType) {
|
|
1283
|
+
if (templateType !== 'translategemma') {
|
|
1284
|
+
return false;
|
|
1285
|
+
}
|
|
1286
|
+
if (typeof runtimePrompt !== 'string') {
|
|
1287
|
+
return false;
|
|
1288
|
+
}
|
|
1289
|
+
return runtimePrompt.trim() === DEFAULT_RUNTIME_PLACEHOLDER_PROMPT;
|
|
1290
|
+
}
|
|
1291
|
+
|
|
1292
|
+
function describePromptInput(promptInput) {
|
|
1293
|
+
if (typeof promptInput === 'string') {
|
|
1294
|
+
return promptInput.trim() || DEFAULT_HARNESS_PROMPT;
|
|
1295
|
+
}
|
|
1296
|
+
const firstMessage = Array.isArray(promptInput?.messages)
|
|
1297
|
+
? promptInput.messages[0]
|
|
1298
|
+
: null;
|
|
1299
|
+
const firstContent = Array.isArray(firstMessage?.content)
|
|
1300
|
+
? firstMessage.content[0]
|
|
1301
|
+
: null;
|
|
1302
|
+
const sourceLang = asText(firstContent?.source_lang_code);
|
|
1303
|
+
const targetLang = asText(firstContent?.target_lang_code);
|
|
1304
|
+
const text = asText(firstContent?.text);
|
|
1305
|
+
if (sourceLang && targetLang) {
|
|
1306
|
+
return `${sourceLang} -> ${targetLang}: ${text || '[non-text request]'}`;
|
|
1307
|
+
}
|
|
1308
|
+
try {
|
|
1309
|
+
return JSON.stringify(promptInput);
|
|
1310
|
+
} catch {
|
|
1311
|
+
return '[structured prompt]';
|
|
1312
|
+
}
|
|
1313
|
+
}
|
|
1314
|
+
|
|
1315
|
+
function resolveGenerationPromptInput(runtimeConfig, runOverrides = null, source = null) {
|
|
1316
|
+
const templateType = resolvePromptTemplateType(source);
|
|
1209
1317
|
const overridePrompt = runOverrides?.prompt;
|
|
1210
1318
|
if (typeof overridePrompt === 'string' && overridePrompt.trim()) {
|
|
1211
1319
|
return overridePrompt.trim();
|
|
1212
1320
|
}
|
|
1213
1321
|
if (isStructuredPromptInput(overridePrompt)) {
|
|
1214
|
-
return overridePrompt;
|
|
1322
|
+
return clonePromptInput(overridePrompt);
|
|
1215
1323
|
}
|
|
1216
1324
|
|
|
1217
1325
|
const runtimePrompt = runtimeConfig?.inference?.prompt;
|
|
1326
|
+
if (shouldPreferModelDefaultPrompt(runtimePrompt, templateType)) {
|
|
1327
|
+
return buildDefaultGenerationPrompt(templateType);
|
|
1328
|
+
}
|
|
1218
1329
|
if (typeof runtimePrompt === 'string' && runtimePrompt.trim()) {
|
|
1219
1330
|
return runtimePrompt.trim();
|
|
1220
1331
|
}
|
|
1221
1332
|
if (isStructuredPromptInput(runtimePrompt)) {
|
|
1222
|
-
return runtimePrompt;
|
|
1333
|
+
return clonePromptInput(runtimePrompt);
|
|
1223
1334
|
}
|
|
1224
1335
|
|
|
1225
|
-
return
|
|
1336
|
+
return buildDefaultGenerationPrompt(templateType);
|
|
1226
1337
|
}
|
|
1227
1338
|
|
|
1228
1339
|
function resolveMaxTokens(runtimeConfig) {
|
|
@@ -1233,7 +1344,7 @@ function resolveMaxTokens(runtimeConfig) {
|
|
|
1233
1344
|
return DEFAULT_HARNESS_MAX_TOKENS;
|
|
1234
1345
|
}
|
|
1235
1346
|
|
|
1236
|
-
function resolveBenchmarkRunSettings(runtimeConfig) {
|
|
1347
|
+
function resolveBenchmarkRunSettings(runtimeConfig, source = null) {
|
|
1237
1348
|
const benchConfig = runtimeConfig?.shared?.benchmark?.run || {};
|
|
1238
1349
|
const runtimeSampling = isPlainObject(runtimeConfig?.inference?.sampling)
|
|
1239
1350
|
? runtimeConfig.inference.sampling
|
|
@@ -1241,9 +1352,9 @@ function resolveBenchmarkRunSettings(runtimeConfig) {
|
|
|
1241
1352
|
const benchSampling = isPlainObject(benchConfig?.sampling)
|
|
1242
1353
|
? benchConfig.sampling
|
|
1243
1354
|
: {};
|
|
1244
|
-
const
|
|
1355
|
+
const promptInput = typeof benchConfig.customPrompt === 'string' && benchConfig.customPrompt.trim()
|
|
1245
1356
|
? benchConfig.customPrompt.trim()
|
|
1246
|
-
:
|
|
1357
|
+
: resolveGenerationPromptInput(runtimeConfig, null, source);
|
|
1247
1358
|
const maxTokens = Number.isFinite(benchConfig.maxNewTokens)
|
|
1248
1359
|
? Math.max(1, Math.floor(benchConfig.maxNewTokens))
|
|
1249
1360
|
: resolveMaxTokens(runtimeConfig);
|
|
@@ -1251,7 +1362,8 @@ function resolveBenchmarkRunSettings(runtimeConfig) {
|
|
|
1251
1362
|
return {
|
|
1252
1363
|
warmupRuns: Math.max(0, Math.floor(benchConfig.warmupRuns ?? 0)),
|
|
1253
1364
|
timedRuns: Math.max(1, Math.floor(benchConfig.timedRuns ?? 1)),
|
|
1254
|
-
prompt,
|
|
1365
|
+
prompt: promptInput,
|
|
1366
|
+
promptLabel: describePromptInput(promptInput),
|
|
1255
1367
|
maxTokens,
|
|
1256
1368
|
sampling: {
|
|
1257
1369
|
...runtimeSampling,
|
|
@@ -1465,7 +1577,8 @@ function isCoherentOutput(tokens, output) {
|
|
|
1465
1577
|
async function runGeneration(pipeline, runtimeConfig, runOverrides = null) {
|
|
1466
1578
|
const tokens = [];
|
|
1467
1579
|
const tokenIds = [];
|
|
1468
|
-
const promptInput = resolveGenerationPromptInput(runtimeConfig, runOverrides);
|
|
1580
|
+
const promptInput = resolveGenerationPromptInput(runtimeConfig, runOverrides, pipeline);
|
|
1581
|
+
const promptLabel = describePromptInput(promptInput);
|
|
1469
1582
|
const useChatTemplate = runOverrides?.useChatTemplate
|
|
1470
1583
|
?? runtimeConfig?.inference?.chatTemplate?.enabled
|
|
1471
1584
|
?? (isStructuredPromptInput(promptInput) ? true : undefined);
|
|
@@ -1532,7 +1645,8 @@ async function runGeneration(pipeline, runtimeConfig, runOverrides = null) {
|
|
|
1532
1645
|
: null;
|
|
1533
1646
|
|
|
1534
1647
|
return {
|
|
1535
|
-
prompt:
|
|
1648
|
+
prompt: promptLabel,
|
|
1649
|
+
promptInput,
|
|
1536
1650
|
maxTokens,
|
|
1537
1651
|
tokens,
|
|
1538
1652
|
tokenIds,
|
|
@@ -1739,6 +1853,11 @@ async function runInferenceSuite(options = {}) {
|
|
|
1739
1853
|
source: 'doppler',
|
|
1740
1854
|
prefillSemantics: 'internal_prefill_phase',
|
|
1741
1855
|
});
|
|
1856
|
+
const metricsWithContracts = buildSuiteContractMetrics(
|
|
1857
|
+
options.suiteName || 'inference',
|
|
1858
|
+
metrics,
|
|
1859
|
+
harness.manifest
|
|
1860
|
+
);
|
|
1742
1861
|
return {
|
|
1743
1862
|
...summary,
|
|
1744
1863
|
modelId: options.modelId || harness.manifest?.modelId || 'unknown',
|
|
@@ -1756,7 +1875,7 @@ async function runInferenceSuite(options = {}) {
|
|
|
1756
1875
|
timing,
|
|
1757
1876
|
timingDiagnostics,
|
|
1758
1877
|
output,
|
|
1759
|
-
metrics,
|
|
1878
|
+
metrics: metricsWithContracts,
|
|
1760
1879
|
memoryStats,
|
|
1761
1880
|
deviceInfo: resolveDeviceInfo(),
|
|
1762
1881
|
pipeline: options.keepPipeline ? harness.pipeline : null,
|
|
@@ -1766,9 +1885,9 @@ async function runInferenceSuite(options = {}) {
|
|
|
1766
1885
|
async function runBenchSuite(options = {}) {
|
|
1767
1886
|
const startTime = performance.now();
|
|
1768
1887
|
const runtimeConfig = getRuntimeConfig();
|
|
1769
|
-
const
|
|
1770
|
-
const warmupRuns =
|
|
1771
|
-
const timedRuns =
|
|
1888
|
+
const defaultBenchRun = resolveBenchmarkRunSettings(runtimeConfig);
|
|
1889
|
+
const warmupRuns = defaultBenchRun.warmupRuns;
|
|
1890
|
+
const timedRuns = defaultBenchRun.timedRuns;
|
|
1772
1891
|
const cacheMode = normalizeCacheMode(options.cacheMode);
|
|
1773
1892
|
const loadMode = normalizeLoadMode(options.loadMode, !options.modelUrl);
|
|
1774
1893
|
const workloadType = normalizeWorkloadType(options.workloadType);
|
|
@@ -1776,7 +1895,7 @@ async function runBenchSuite(options = {}) {
|
|
|
1776
1895
|
if (workloadType === 'training') {
|
|
1777
1896
|
const trainingBench = await runTrainingBenchSuite({
|
|
1778
1897
|
...options,
|
|
1779
|
-
benchRun,
|
|
1898
|
+
benchRun: defaultBenchRun,
|
|
1780
1899
|
workloadType,
|
|
1781
1900
|
});
|
|
1782
1901
|
const trainingReport = trainingBench?.metrics?.trainingMetricsReport;
|
|
@@ -1858,6 +1977,7 @@ async function runBenchSuite(options = {}) {
|
|
|
1858
1977
|
}
|
|
1859
1978
|
|
|
1860
1979
|
const harness = await initializeSuiteModel(options);
|
|
1980
|
+
const benchRun = resolveBenchmarkRunSettings(runtimeConfig, harness.pipeline ?? harness);
|
|
1861
1981
|
const modelType = harness.manifest?.modelType || 'transformer';
|
|
1862
1982
|
const safeModelLoadMs = toTimingNumber(harness.modelLoadMs, 0);
|
|
1863
1983
|
|
|
@@ -1927,7 +2047,7 @@ async function runBenchSuite(options = {}) {
|
|
|
1927
2047
|
validRuns: durations.length,
|
|
1928
2048
|
invalidRuns,
|
|
1929
2049
|
invalidRatePct: Number((timedRuns > 0 ? (invalidRuns / timedRuns) * 100 : 0).toFixed(2)),
|
|
1930
|
-
prompt: benchRun.
|
|
2050
|
+
prompt: benchRun.promptLabel,
|
|
1931
2051
|
embeddingDim: Math.round(embeddingDims.reduce((a, b) => a + b, 0) / (embeddingDims.length || 1)),
|
|
1932
2052
|
nonFiniteValues: totalNonFiniteValues,
|
|
1933
2053
|
firstTimedEmbeddingMs: Number((firstTimedEmbeddingMs ?? 0).toFixed(2)),
|
|
@@ -2058,7 +2178,7 @@ async function runBenchSuite(options = {}) {
|
|
|
2058
2178
|
metrics = {
|
|
2059
2179
|
warmupRuns,
|
|
2060
2180
|
timedRuns,
|
|
2061
|
-
prompt: benchRun.
|
|
2181
|
+
prompt: benchRun.promptLabel,
|
|
2062
2182
|
maxTokens: benchRun.maxTokens,
|
|
2063
2183
|
decodeTokensPerSec: sampleTimingNumber(decodeTokensPerSecStats, 'median'),
|
|
2064
2184
|
avgTokensGenerated: Math.round(tokensGeneratedStats.mean),
|
|
@@ -2132,6 +2252,7 @@ async function runBenchSuite(options = {}) {
|
|
|
2132
2252
|
source: 'doppler',
|
|
2133
2253
|
prefillSemantics: 'internal_prefill_phase',
|
|
2134
2254
|
});
|
|
2255
|
+
const metricsWithContracts = buildSuiteContractMetrics('bench', metrics, harness.manifest);
|
|
2135
2256
|
return {
|
|
2136
2257
|
...summary,
|
|
2137
2258
|
modelId: options.modelId || harness.manifest?.modelId || 'unknown',
|
|
@@ -2149,7 +2270,7 @@ async function runBenchSuite(options = {}) {
|
|
|
2149
2270
|
timing,
|
|
2150
2271
|
timingDiagnostics,
|
|
2151
2272
|
output,
|
|
2152
|
-
metrics,
|
|
2273
|
+
metrics: metricsWithContracts,
|
|
2153
2274
|
memoryStats,
|
|
2154
2275
|
deviceInfo: resolveDeviceInfo(),
|
|
2155
2276
|
pipeline: options.keepPipeline ? harness.pipeline : null,
|
|
@@ -2310,25 +2431,9 @@ async function runDiffusionSuite(options = {}) {
|
|
|
2310
2431
|
source: 'doppler',
|
|
2311
2432
|
prefillSemantics: 'internal_prefill_phase',
|
|
2312
2433
|
});
|
|
2313
|
-
|
|
2314
|
-
|
|
2315
|
-
|
|
2316
|
-
modelId: options.modelId || harness.manifest?.modelId || 'unknown',
|
|
2317
|
-
cacheMode,
|
|
2318
|
-
loadMode,
|
|
2319
|
-
env: {
|
|
2320
|
-
library: 'doppler',
|
|
2321
|
-
runtime: 'browser',
|
|
2322
|
-
device: 'webgpu',
|
|
2323
|
-
browserUserAgent: typeof navigator !== 'undefined' ? (navigator.userAgent || null) : null,
|
|
2324
|
-
browserPlatform: typeof navigator !== 'undefined' ? (navigator.platform || null) : null,
|
|
2325
|
-
browserLanguage: typeof navigator !== 'undefined' ? (navigator.language || null) : null,
|
|
2326
|
-
browserVendor: typeof navigator !== 'undefined' ? (navigator.vendor || null) : null,
|
|
2327
|
-
},
|
|
2328
|
-
timing,
|
|
2329
|
-
timingDiagnostics,
|
|
2330
|
-
output,
|
|
2331
|
-
metrics: {
|
|
2434
|
+
const metricsWithContracts = buildSuiteContractMetrics(
|
|
2435
|
+
'diffusion',
|
|
2436
|
+
{
|
|
2332
2437
|
warmupRuns,
|
|
2333
2438
|
timedRuns,
|
|
2334
2439
|
width,
|
|
@@ -2353,6 +2458,27 @@ async function runDiffusionSuite(options = {}) {
|
|
|
2353
2458
|
gpu: gpuStats,
|
|
2354
2459
|
performanceArtifact: diffusionPerformanceArtifact,
|
|
2355
2460
|
},
|
|
2461
|
+
harness.manifest
|
|
2462
|
+
);
|
|
2463
|
+
|
|
2464
|
+
return {
|
|
2465
|
+
...summary,
|
|
2466
|
+
modelId: options.modelId || harness.manifest?.modelId || 'unknown',
|
|
2467
|
+
cacheMode,
|
|
2468
|
+
loadMode,
|
|
2469
|
+
env: {
|
|
2470
|
+
library: 'doppler',
|
|
2471
|
+
runtime: 'browser',
|
|
2472
|
+
device: 'webgpu',
|
|
2473
|
+
browserUserAgent: typeof navigator !== 'undefined' ? (navigator.userAgent || null) : null,
|
|
2474
|
+
browserPlatform: typeof navigator !== 'undefined' ? (navigator.platform || null) : null,
|
|
2475
|
+
browserLanguage: typeof navigator !== 'undefined' ? (navigator.language || null) : null,
|
|
2476
|
+
browserVendor: typeof navigator !== 'undefined' ? (navigator.vendor || null) : null,
|
|
2477
|
+
},
|
|
2478
|
+
timing,
|
|
2479
|
+
timingDiagnostics,
|
|
2480
|
+
output,
|
|
2481
|
+
metrics: metricsWithContracts,
|
|
2356
2482
|
memoryStats,
|
|
2357
2483
|
deviceInfo: resolveDeviceInfo(),
|
|
2358
2484
|
pipeline: options.keepPipeline ? harness.pipeline : null,
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import { DEFAULT_DIFFUSION_CONFIG } from '../../../config/schema/index.js';
|
|
2
2
|
|
|
3
|
+
const SUPPORTED_DIFFUSION_RUNTIME_LAYOUTS = new Set(['sd3', 'flux', 'sana']);
|
|
4
|
+
|
|
3
5
|
function mergeSection(base, override) {
|
|
4
6
|
if (!override) return { ...base };
|
|
5
7
|
return { ...base, ...override };
|
|
@@ -38,6 +40,9 @@ function resolveSchedulerType(modelScheduler, runtimeScheduler) {
|
|
|
38
40
|
if (modelClass === 'FlowMatchEulerDiscreteScheduler') {
|
|
39
41
|
return 'flowmatch_euler';
|
|
40
42
|
}
|
|
43
|
+
if (modelClass === 'SCMScheduler') {
|
|
44
|
+
return 'scm';
|
|
45
|
+
}
|
|
41
46
|
if (modelClass === 'EulerDiscreteScheduler') {
|
|
42
47
|
return 'euler';
|
|
43
48
|
}
|
|
@@ -58,6 +63,8 @@ function mergeSchedulerConfig(modelConfig, runtimeScheduler) {
|
|
|
58
63
|
type,
|
|
59
64
|
numTrainTimesteps: modelScheduler.num_train_timesteps ?? runtimeScheduler.numTrainTimesteps,
|
|
60
65
|
shift: modelScheduler.shift ?? runtimeScheduler.shift,
|
|
66
|
+
predictionType: modelScheduler.prediction_type ?? runtimeScheduler.predictionType,
|
|
67
|
+
sigmaData: modelScheduler.sigma_data ?? runtimeScheduler.sigmaData,
|
|
61
68
|
};
|
|
62
69
|
}
|
|
63
70
|
|
|
@@ -95,6 +102,13 @@ export function initializeDiffusion(manifest, runtimeConfig) {
|
|
|
95
102
|
}
|
|
96
103
|
throw new Error('Diffusion manifest missing config.diffusion model contract.');
|
|
97
104
|
}
|
|
105
|
+
const layout = modelConfig.layout;
|
|
106
|
+
if (layout && !SUPPORTED_DIFFUSION_RUNTIME_LAYOUTS.has(layout)) {
|
|
107
|
+
throw new Error(
|
|
108
|
+
`Diffusion layout "${layout}" is recognized in the manifest, but the GPU runtime is not implemented yet. ` +
|
|
109
|
+
'Supported runtime layouts: sd3, flux, sana.'
|
|
110
|
+
);
|
|
111
|
+
}
|
|
98
112
|
|
|
99
113
|
const runtimeBase = mergeDiffusionConfig(DEFAULT_DIFFUSION_CONFIG, runtimeConfig?.inference?.diffusion);
|
|
100
114
|
const runtime = {
|