@simulatte/doppler 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -5
- package/package.json +27 -4
- package/src/client/doppler-api.browser.d.ts +1 -0
- package/src/client/doppler-api.browser.js +288 -0
- package/src/client/doppler-api.d.ts +80 -0
- package/src/client/doppler-api.js +298 -0
- package/src/client/doppler-provider/types.js +1 -1
- package/src/client/doppler-registry.d.ts +23 -0
- package/src/client/doppler-registry.js +88 -0
- package/src/client/doppler-registry.json +39 -0
- package/src/config/execution-contract-check.d.ts +82 -0
- package/src/config/execution-contract-check.js +317 -0
- package/src/config/execution-v0-contract-check.d.ts +94 -0
- package/src/config/execution-v0-contract-check.js +251 -0
- package/src/config/execution-v0-graph-contract-check.d.ts +20 -0
- package/src/config/execution-v0-graph-contract-check.js +64 -0
- package/src/config/kernel-path-contract-check.d.ts +76 -0
- package/src/config/kernel-path-contract-check.js +479 -0
- package/src/config/kernel-path-loader.d.ts +16 -0
- package/src/config/kernel-path-loader.js +54 -0
- package/src/config/kernels/kernel-ref-digests.js +12 -0
- package/src/config/kernels/registry.json +556 -0
- package/src/config/loader.js +90 -67
- package/src/config/merge-contract-check.d.ts +16 -0
- package/src/config/merge-contract-check.js +321 -0
- package/src/config/merge-helpers.d.ts +58 -0
- package/src/config/merge-helpers.js +54 -0
- package/src/config/merge.js +3 -6
- package/src/config/presets/models/janus-text.json +27 -0
- package/src/config/quantization-contract-check.d.ts +12 -0
- package/src/config/quantization-contract-check.js +91 -0
- package/src/config/required-inference-fields-contract-check.d.ts +24 -0
- package/src/config/required-inference-fields-contract-check.js +231 -0
- package/src/config/schema/browser-suite-metrics.schema.d.ts +17 -0
- package/src/config/schema/browser-suite-metrics.schema.js +46 -0
- package/src/config/schema/conversion-report.schema.d.ts +40 -0
- package/src/config/schema/conversion-report.schema.js +108 -0
- package/src/config/schema/doppler.schema.js +12 -18
- package/src/config/schema/index.d.ts +22 -0
- package/src/config/schema/index.js +18 -0
- package/src/converter/core.d.ts +10 -0
- package/src/converter/core.js +49 -11
- package/src/converter/parsers/diffusion.js +63 -3
- package/src/converter/tokenizer-utils.js +17 -3
- package/src/formats/rdrr/validation.js +13 -0
- package/src/gpu/kernels/depthwise_conv2d.d.ts +29 -0
- package/src/gpu/kernels/depthwise_conv2d.js +98 -0
- package/src/gpu/kernels/depthwise_conv2d.wgsl +58 -0
- package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +62 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d.d.ts +27 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +92 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +47 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +51 -0
- package/src/gpu/kernels/index.d.ts +30 -0
- package/src/gpu/kernels/index.js +25 -0
- package/src/gpu/kernels/relu.d.ts +18 -0
- package/src/gpu/kernels/relu.js +45 -0
- package/src/gpu/kernels/relu.wgsl +21 -0
- package/src/gpu/kernels/relu_f16.wgsl +23 -0
- package/src/gpu/kernels/repeat_channels.d.ts +21 -0
- package/src/gpu/kernels/repeat_channels.js +60 -0
- package/src/gpu/kernels/repeat_channels.wgsl +29 -0
- package/src/gpu/kernels/repeat_channels_f16.wgsl +31 -0
- package/src/gpu/kernels/sana_linear_attention.d.ts +27 -0
- package/src/gpu/kernels/sana_linear_attention.js +122 -0
- package/src/gpu/kernels/sana_linear_attention_apply.wgsl +44 -0
- package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +47 -0
- package/src/gpu/kernels/sana_linear_attention_summary.wgsl +47 -0
- package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +49 -0
- package/src/index-browser.d.ts +1 -0
- package/src/index-browser.js +2 -1
- package/src/index.d.ts +1 -0
- package/src/index.js +2 -1
- package/src/inference/browser-harness.js +164 -38
- package/src/inference/pipelines/diffusion/init.js +14 -0
- package/src/inference/pipelines/diffusion/pipeline.js +206 -77
- package/src/inference/pipelines/diffusion/sana-transformer.d.ts +53 -0
- package/src/inference/pipelines/diffusion/sana-transformer.js +738 -0
- package/src/inference/pipelines/diffusion/scheduler.d.ts +17 -1
- package/src/inference/pipelines/diffusion/scheduler.js +91 -3
- package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +6 -4
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +270 -0
- package/src/inference/pipelines/diffusion/text-encoder.js +18 -1
- package/src/inference/pipelines/diffusion/types.d.ts +4 -0
- package/src/inference/pipelines/diffusion/vae.js +782 -78
- package/src/inference/pipelines/text/config.d.ts +5 -0
- package/src/inference/pipelines/text/config.js +1 -1
- package/src/inference/pipelines/text/execution-v0.js +141 -101
- package/src/inference/pipelines/text/init.js +41 -10
- package/src/inference/pipelines/text.js +7 -1
- package/src/rules/execution-rules-contract-check.d.ts +17 -0
- package/src/rules/execution-rules-contract-check.js +245 -0
- package/src/rules/kernels/depthwise-conv2d.rules.json +6 -0
- package/src/rules/kernels/grouped-pointwise-conv2d.rules.json +6 -0
- package/src/rules/kernels/relu.rules.json +6 -0
- package/src/rules/kernels/repeat-channels.rules.json +6 -0
- package/src/rules/kernels/sana-linear-attention.rules.json +6 -0
- package/src/rules/layer-pattern-contract-check.d.ts +17 -0
- package/src/rules/layer-pattern-contract-check.js +231 -0
- package/src/rules/rule-registry.d.ts +28 -0
- package/src/rules/rule-registry.js +38 -0
- package/src/tooling/conversion-config-materializer.d.ts +24 -0
- package/src/tooling/conversion-config-materializer.js +99 -0
- package/src/tooling/lean-execution-contract-runner.d.ts +43 -0
- package/src/tooling/lean-execution-contract-runner.js +158 -0
- package/src/tooling/lean-execution-contract.d.ts +16 -0
- package/src/tooling/lean-execution-contract.js +81 -0
- package/src/tooling/node-convert.d.ts +10 -0
- package/src/tooling/node-converter.js +59 -0
- package/src/tooling/node-webgpu.js +30 -9
- package/src/version.d.ts +2 -0
- package/src/version.js +2 -0
- package/tools/convert-safetensors-node.js +47 -0
- package/tools/doppler-cli.js +167 -6
|
@@ -225,6 +225,28 @@ export {
|
|
|
225
225
|
type ConversionIOSchema,
|
|
226
226
|
} from './conversion.schema.js';
|
|
227
227
|
|
|
228
|
+
// =============================================================================
|
|
229
|
+
// Browser Suite Metrics Schema
|
|
230
|
+
// =============================================================================
|
|
231
|
+
export {
|
|
232
|
+
type BrowserSuiteMetricsSchema,
|
|
233
|
+
BROWSER_SUITE_METRICS_SCHEMA_VERSION,
|
|
234
|
+
DEFAULT_BROWSER_SUITE_METRICS,
|
|
235
|
+
validateBrowserSuiteMetrics,
|
|
236
|
+
} from './browser-suite-metrics.schema.js';
|
|
237
|
+
|
|
238
|
+
// =============================================================================
|
|
239
|
+
// Conversion Report Schema
|
|
240
|
+
// =============================================================================
|
|
241
|
+
export {
|
|
242
|
+
type ConversionReportResultSchema,
|
|
243
|
+
type ConversionReportManifestSchema,
|
|
244
|
+
type ConversionReportSchema,
|
|
245
|
+
CONVERSION_REPORT_SCHEMA_VERSION,
|
|
246
|
+
DEFAULT_CONVERSION_REPORT,
|
|
247
|
+
validateConversionReport,
|
|
248
|
+
} from './conversion-report.schema.js';
|
|
249
|
+
|
|
228
250
|
// =============================================================================
|
|
229
251
|
// Converter Schema
|
|
230
252
|
// =============================================================================
|
|
@@ -55,6 +55,24 @@ export {
|
|
|
55
55
|
ConversionStage,
|
|
56
56
|
} from './conversion.schema.js';
|
|
57
57
|
|
|
58
|
+
// =============================================================================
|
|
59
|
+
// Browser Suite Metrics Schema
|
|
60
|
+
// =============================================================================
|
|
61
|
+
export {
|
|
62
|
+
BROWSER_SUITE_METRICS_SCHEMA_VERSION,
|
|
63
|
+
DEFAULT_BROWSER_SUITE_METRICS,
|
|
64
|
+
validateBrowserSuiteMetrics,
|
|
65
|
+
} from './browser-suite-metrics.schema.js';
|
|
66
|
+
|
|
67
|
+
// =============================================================================
|
|
68
|
+
// Conversion Report Schema
|
|
69
|
+
// =============================================================================
|
|
70
|
+
export {
|
|
71
|
+
CONVERSION_REPORT_SCHEMA_VERSION,
|
|
72
|
+
DEFAULT_CONVERSION_REPORT,
|
|
73
|
+
validateConversionReport,
|
|
74
|
+
} from './conversion-report.schema.js';
|
|
75
|
+
|
|
58
76
|
// =============================================================================
|
|
59
77
|
// Converter Schema
|
|
60
78
|
// =============================================================================
|
package/src/converter/core.d.ts
CHANGED
|
@@ -27,6 +27,12 @@ import type {
|
|
|
27
27
|
MoEConfigSchema,
|
|
28
28
|
ConversionInfoSchema,
|
|
29
29
|
} from '../config/schema/index.js';
|
|
30
|
+
import type { ExecutionContractArtifact } from '../config/execution-contract-check.js';
|
|
31
|
+
import type { ExecutionV0GraphContractArtifact } from '../config/execution-v0-graph-contract-check.js';
|
|
32
|
+
import type {
|
|
33
|
+
ManifestRequiredInferenceFieldsArtifact,
|
|
34
|
+
RequiredInferenceFieldsContractArtifact,
|
|
35
|
+
} from '../config/required-inference-fields-contract-check.js';
|
|
30
36
|
|
|
31
37
|
export { generateShardFilename } from '../formats/rdrr/index.js';
|
|
32
38
|
|
|
@@ -144,6 +150,10 @@ export interface ConvertResult {
|
|
|
144
150
|
shardCount: number;
|
|
145
151
|
tensorCount: number;
|
|
146
152
|
totalSize: number;
|
|
153
|
+
executionContractArtifact: ExecutionContractArtifact | null;
|
|
154
|
+
executionV0GraphContractArtifact: ExecutionV0GraphContractArtifact | null;
|
|
155
|
+
layerPatternContractArtifact: Record<string, unknown> | null;
|
|
156
|
+
requiredInferenceFieldsArtifact: ManifestRequiredInferenceFieldsArtifact | RequiredInferenceFieldsContractArtifact | null;
|
|
147
157
|
}
|
|
148
158
|
|
|
149
159
|
/** @deprecated Use ConversionIOSchema from config/schema */
|
package/src/converter/core.js
CHANGED
|
@@ -9,15 +9,20 @@ import {
|
|
|
9
9
|
formatBytes,
|
|
10
10
|
} from '../config/schema/index.js';
|
|
11
11
|
|
|
12
|
-
import { classifyTensorRole, generateShardFilename } from '../formats/rdrr/index.js';
|
|
12
|
+
import { classifyTensor, classifyTensorRole, generateShardFilename } from '../formats/rdrr/index.js';
|
|
13
13
|
import { log } from '../debug/index.js';
|
|
14
|
-
import {
|
|
14
|
+
import {
|
|
15
|
+
getInferenceLayerPatternContractArtifact,
|
|
16
|
+
selectRuleValue,
|
|
17
|
+
} from '../rules/rule-registry.js';
|
|
15
18
|
import {
|
|
16
19
|
createConverterConfig,
|
|
17
20
|
detectPreset,
|
|
18
21
|
listPresets,
|
|
19
22
|
resolvePreset,
|
|
20
23
|
} from '../config/index.js';
|
|
24
|
+
import { buildExecutionContractArtifact } from '../config/execution-contract-check.js';
|
|
25
|
+
import { buildManifestRequiredInferenceFieldsArtifact } from '../config/required-inference-fields-contract-check.js';
|
|
21
26
|
import { buildManifestInference, inferEmbeddingOutputConfig } from './manifest-inference.js';
|
|
22
27
|
import { resolveEosTokenId } from './tokenizer-utils.js';
|
|
23
28
|
import {
|
|
@@ -76,7 +81,8 @@ function resolveTokenizerField(tokenizerConfig, ...keys) {
|
|
|
76
81
|
}
|
|
77
82
|
|
|
78
83
|
function resolveTokenizerVocabSize(tokenizerConfig, rawConfig, architecture) {
|
|
79
|
-
const
|
|
84
|
+
const nestedTextConfig = getNestedTextConfig(rawConfig);
|
|
85
|
+
const configVocab = rawConfig?.vocab_size ?? nestedTextConfig?.vocab_size;
|
|
80
86
|
const tokenizerVocab = tokenizerConfig?.vocab_size ?? tokenizerConfig?.vocabSize;
|
|
81
87
|
const archVocab = architecture?.vocabSize;
|
|
82
88
|
return tokenizerVocab ?? configVocab ?? archVocab ?? null;
|
|
@@ -223,21 +229,22 @@ function toFloat32ForQ4K(tensorData, sourceDtype, tensorName) {
|
|
|
223
229
|
|
|
224
230
|
function resolveConfigTokenId(rawConfig, key) {
|
|
225
231
|
const direct = rawConfig?.[key];
|
|
226
|
-
const nested = rawConfig?.
|
|
232
|
+
const nested = getNestedTextConfig(rawConfig)?.[key];
|
|
227
233
|
return resolveTokenizerId(direct ?? nested);
|
|
228
234
|
}
|
|
229
235
|
|
|
230
236
|
function resolveConfigTokenIds(rawConfig, key) {
|
|
231
237
|
const direct = rawConfig?.[key];
|
|
232
|
-
const nested = rawConfig?.
|
|
238
|
+
const nested = getNestedTextConfig(rawConfig)?.[key];
|
|
233
239
|
return resolveTokenizerIds(direct ?? nested);
|
|
234
240
|
}
|
|
235
241
|
|
|
236
242
|
function resolveMoEConfigNumber(rawConfig, ...keys) {
|
|
243
|
+
const nestedTextConfig = getNestedTextConfig(rawConfig);
|
|
237
244
|
for (const key of keys) {
|
|
238
245
|
const direct = rawConfig?.[key];
|
|
239
246
|
if (Number.isFinite(direct) && direct > 0) return Number(direct);
|
|
240
|
-
const nested =
|
|
247
|
+
const nested = nestedTextConfig?.[key];
|
|
241
248
|
if (Number.isFinite(nested) && nested > 0) return Number(nested);
|
|
242
249
|
}
|
|
243
250
|
return null;
|
|
@@ -317,7 +324,7 @@ function resolveIntermediateSizeFromTensors(architecture, model, tensorLocations
|
|
|
317
324
|
if (typeof current !== 'number' || !Number.isFinite(current) || current <= 0) {
|
|
318
325
|
return architecture;
|
|
319
326
|
}
|
|
320
|
-
const modelType = String(rawConfig?.model_type ?? rawConfig?.
|
|
327
|
+
const modelType = String(rawConfig?.model_type ?? getNestedTextConfig(rawConfig)?.model_type ?? '').toLowerCase();
|
|
321
328
|
if (modelType !== 'lfm2') {
|
|
322
329
|
return architecture;
|
|
323
330
|
}
|
|
@@ -359,7 +366,7 @@ function resolveMoEExpertFormat(rawConfig, resolvedModelType, quantizationInfo,
|
|
|
359
366
|
const modelType = String(
|
|
360
367
|
resolvedModelType ??
|
|
361
368
|
rawConfig?.model_type ??
|
|
362
|
-
rawConfig?.
|
|
369
|
+
getNestedTextConfig(rawConfig)?.model_type ??
|
|
363
370
|
''
|
|
364
371
|
).toLowerCase();
|
|
365
372
|
if (modelType.includes('gpt_oss') || modelType.includes('gpt-oss') || modelType.includes('gptoss')) {
|
|
@@ -725,9 +732,7 @@ export function extractArchitecture(config, ggufConfig) {
|
|
|
725
732
|
|
|
726
733
|
// Try HuggingFace config first
|
|
727
734
|
if (config && Object.keys(config).length > 0) {
|
|
728
|
-
const textConfig = (
|
|
729
|
-
config.text_config && typeof config.text_config === 'object' && !Array.isArray(config.text_config)
|
|
730
|
-
) ? config.text_config : null;
|
|
735
|
+
const textConfig = getNestedTextConfig(config);
|
|
731
736
|
const fromConfig = (...keys) => {
|
|
732
737
|
const values = [];
|
|
733
738
|
for (const key of keys) {
|
|
@@ -860,6 +865,19 @@ export function extractArchitecture(config, ggufConfig) {
|
|
|
860
865
|
throw new Error('Missing model config: cannot extract architecture');
|
|
861
866
|
}
|
|
862
867
|
|
|
868
|
+
function getNestedTextConfig(config) {
|
|
869
|
+
if (!config || typeof config !== 'object' || Array.isArray(config)) {
|
|
870
|
+
return null;
|
|
871
|
+
}
|
|
872
|
+
if (config.text_config && typeof config.text_config === 'object' && !Array.isArray(config.text_config)) {
|
|
873
|
+
return config.text_config;
|
|
874
|
+
}
|
|
875
|
+
if (config.language_config && typeof config.language_config === 'object' && !Array.isArray(config.language_config)) {
|
|
876
|
+
return config.language_config;
|
|
877
|
+
}
|
|
878
|
+
return null;
|
|
879
|
+
}
|
|
880
|
+
|
|
863
881
|
|
|
864
882
|
export function buildTensorMap(tensors, shardSize) {
|
|
865
883
|
if (!shardSize || shardSize <= 0) {
|
|
@@ -1115,6 +1133,7 @@ export async function convertModel(model, io, options = {}) {
|
|
|
1115
1133
|
}
|
|
1116
1134
|
const totalTensors = tensors.length;
|
|
1117
1135
|
const targetQuant = String(options.quantization ?? model.quantization ?? '').trim().toLowerCase();
|
|
1136
|
+
const tensorGroupModelType = String(options.modelType ?? model.modelType ?? 'transformer');
|
|
1118
1137
|
const q4kLayout = normalizeQ4KLayout(options.quantizationInfo?.layout);
|
|
1119
1138
|
const quantizeEmbeddings = resolveQuantizeEmbeddings(
|
|
1120
1139
|
options.quantizationInfo ?? null,
|
|
@@ -1238,6 +1257,7 @@ export async function convertModel(model, io, options = {}) {
|
|
|
1238
1257
|
|
|
1239
1258
|
// Record tensor location
|
|
1240
1259
|
const role = classifyTensorRole(tensor.name);
|
|
1260
|
+
const group = classifyTensor(tensor.name, tensorGroupModelType);
|
|
1241
1261
|
|
|
1242
1262
|
if (tensorSpans.length === 1) {
|
|
1243
1263
|
tensorLocations[tensor.name] = {
|
|
@@ -1247,6 +1267,7 @@ export async function convertModel(model, io, options = {}) {
|
|
|
1247
1267
|
shape: tensor.shape,
|
|
1248
1268
|
dtype: outDtype,
|
|
1249
1269
|
role,
|
|
1270
|
+
group,
|
|
1250
1271
|
...(outLayout ? { layout: outLayout } : {}),
|
|
1251
1272
|
};
|
|
1252
1273
|
} else {
|
|
@@ -1256,6 +1277,7 @@ export async function convertModel(model, io, options = {}) {
|
|
|
1256
1277
|
shape: tensor.shape,
|
|
1257
1278
|
dtype: outDtype,
|
|
1258
1279
|
role,
|
|
1280
|
+
group,
|
|
1259
1281
|
...(outLayout ? { layout: outLayout } : {}),
|
|
1260
1282
|
};
|
|
1261
1283
|
}
|
|
@@ -1314,11 +1336,27 @@ export async function convertModel(model, io, options = {}) {
|
|
|
1314
1336
|
totalSize: formatBytes(totalSize),
|
|
1315
1337
|
});
|
|
1316
1338
|
|
|
1339
|
+
const executionContractArtifact = buildExecutionContractArtifact(manifest);
|
|
1340
|
+
const layerPatternContractArtifact = getInferenceLayerPatternContractArtifact();
|
|
1341
|
+
const requiredInferenceFieldsArtifact = manifest?.modelType === 'transformer'
|
|
1342
|
+
&& manifest?.inference
|
|
1343
|
+
&& typeof manifest.inference === 'object'
|
|
1344
|
+
&& manifest.inference.attention
|
|
1345
|
+
&& typeof manifest.inference.attention === 'object'
|
|
1346
|
+
? buildManifestRequiredInferenceFieldsArtifact(
|
|
1347
|
+
manifest?.inference ?? null,
|
|
1348
|
+
`${manifest?.modelId ?? modelId}.inference`
|
|
1349
|
+
)
|
|
1350
|
+
: null;
|
|
1317
1351
|
return {
|
|
1318
1352
|
manifest,
|
|
1319
1353
|
shardCount: shards.length,
|
|
1320
1354
|
tensorCount: tensors.length,
|
|
1321
1355
|
totalSize,
|
|
1356
|
+
executionContractArtifact,
|
|
1357
|
+
executionV0GraphContractArtifact: executionContractArtifact?.executionV0?.graph ?? null,
|
|
1358
|
+
layerPatternContractArtifact,
|
|
1359
|
+
requiredInferenceFieldsArtifact,
|
|
1322
1360
|
};
|
|
1323
1361
|
}
|
|
1324
1362
|
|
|
@@ -4,6 +4,13 @@ const SD3_LAYOUT = {
|
|
|
4
4
|
id: 'sd3',
|
|
5
5
|
requiredComponents: ['transformer', 'text_encoder', 'text_encoder_2', 'text_encoder_3', 'vae', 'scheduler'],
|
|
6
6
|
weightedComponents: ['transformer', 'text_encoder', 'text_encoder_2', 'text_encoder_3', 'vae'],
|
|
7
|
+
matches(modelIndex, components) {
|
|
8
|
+
return (
|
|
9
|
+
components.has('text_encoder_2') &&
|
|
10
|
+
components.has('text_encoder_3') &&
|
|
11
|
+
getComponentClassName(modelIndex?.transformer) === 'SD3Transformer2DModel'
|
|
12
|
+
);
|
|
13
|
+
},
|
|
7
14
|
tokenizerSpecs: [
|
|
8
15
|
{
|
|
9
16
|
modelIndexKey: 'tokenizer',
|
|
@@ -66,6 +73,10 @@ const FLUX_LAYOUT = {
|
|
|
66
73
|
id: 'flux',
|
|
67
74
|
requiredComponents: ['transformer', 'text_encoder', 'vae', 'scheduler'],
|
|
68
75
|
weightedComponents: ['transformer', 'text_encoder', 'vae'],
|
|
76
|
+
matches(modelIndex) {
|
|
77
|
+
const transformerClass = getComponentClassName(modelIndex?.transformer);
|
|
78
|
+
return typeof transformerClass === 'string' && /^Flux/i.test(transformerClass);
|
|
79
|
+
},
|
|
69
80
|
tokenizerSpecs: [
|
|
70
81
|
{
|
|
71
82
|
modelIndexKey: 'tokenizer',
|
|
@@ -91,7 +102,39 @@ const FLUX_LAYOUT = {
|
|
|
91
102
|
],
|
|
92
103
|
};
|
|
93
104
|
|
|
94
|
-
const
|
|
105
|
+
const SANA_LAYOUT = {
|
|
106
|
+
id: 'sana',
|
|
107
|
+
requiredComponents: ['transformer', 'text_encoder', 'tokenizer', 'vae', 'scheduler'],
|
|
108
|
+
weightedComponents: ['transformer', 'text_encoder', 'vae'],
|
|
109
|
+
matches(modelIndex) {
|
|
110
|
+
return (
|
|
111
|
+
getComponentClassName(modelIndex?.transformer) === 'SanaTransformer2DModel' &&
|
|
112
|
+
getComponentClassName(modelIndex?.text_encoder) === 'Gemma2Model'
|
|
113
|
+
);
|
|
114
|
+
},
|
|
115
|
+
tokenizerSpecs: [
|
|
116
|
+
{
|
|
117
|
+
modelIndexKey: 'tokenizer',
|
|
118
|
+
componentId: 'text_encoder',
|
|
119
|
+
type: 'bundled',
|
|
120
|
+
assets: [
|
|
121
|
+
{ suffix: 'tokenizer/tokenizer.json', targetName: 'tokenizer_tokenizer.json', kind: 'text', required: true },
|
|
122
|
+
{ suffix: 'tokenizer/tokenizer_config.json', targetName: 'tokenizer_config.json', kind: 'text', required: false },
|
|
123
|
+
{ suffix: 'tokenizer/special_tokens_map.json', targetName: 'tokenizer_special_tokens_map.json', kind: 'text', required: false },
|
|
124
|
+
{ suffix: 'tokenizer/tokenizer.model', targetName: 'tokenizer_tokenizer.model', kind: 'binary', required: false },
|
|
125
|
+
],
|
|
126
|
+
config: {
|
|
127
|
+
type: 'bundled',
|
|
128
|
+
tokenizerFile: 'tokenizer_tokenizer.json',
|
|
129
|
+
configFile: 'tokenizer_config.json',
|
|
130
|
+
specialTokensFile: 'tokenizer_special_tokens_map.json',
|
|
131
|
+
sentencePieceFile: 'tokenizer_tokenizer.model',
|
|
132
|
+
},
|
|
133
|
+
},
|
|
134
|
+
],
|
|
135
|
+
};
|
|
136
|
+
|
|
137
|
+
const LAYOUTS = [SD3_LAYOUT, FLUX_LAYOUT, SANA_LAYOUT];
|
|
95
138
|
|
|
96
139
|
function toAbortError(message = 'Cancelled') {
|
|
97
140
|
if (typeof DOMException === 'function') {
|
|
@@ -112,12 +155,26 @@ function listModelComponents(modelIndex) {
|
|
|
112
155
|
return Object.keys(modelIndex || {}).filter((key) => !key.startsWith('_'));
|
|
113
156
|
}
|
|
114
157
|
|
|
158
|
+
function getComponentClassName(componentEntry) {
|
|
159
|
+
if (Array.isArray(componentEntry) && componentEntry.length >= 2 && typeof componentEntry[1] === 'string') {
|
|
160
|
+
return componentEntry[1];
|
|
161
|
+
}
|
|
162
|
+
if (componentEntry && typeof componentEntry === 'object' && typeof componentEntry._class_name === 'string') {
|
|
163
|
+
return componentEntry._class_name;
|
|
164
|
+
}
|
|
165
|
+
return null;
|
|
166
|
+
}
|
|
167
|
+
|
|
115
168
|
export function detectDiffusionLayout(modelIndex) {
|
|
116
169
|
const components = new Set(listModelComponents(modelIndex));
|
|
117
170
|
for (const layout of LAYOUTS) {
|
|
118
|
-
if (layout.requiredComponents.every((component) => components.has(component))) {
|
|
119
|
-
|
|
171
|
+
if (!layout.requiredComponents.every((component) => components.has(component))) {
|
|
172
|
+
continue;
|
|
120
173
|
}
|
|
174
|
+
if (typeof layout.matches === 'function' && !layout.matches(modelIndex, components)) {
|
|
175
|
+
continue;
|
|
176
|
+
}
|
|
177
|
+
return layout;
|
|
121
178
|
}
|
|
122
179
|
const listed = [...components].sort().join(', ') || '(none)';
|
|
123
180
|
const expected = LAYOUTS
|
|
@@ -199,6 +256,9 @@ export async function parseDiffusionModel(adapter) {
|
|
|
199
256
|
const tensors = [];
|
|
200
257
|
|
|
201
258
|
for (const componentId of layout.requiredComponents) {
|
|
259
|
+
if (componentId === 'tokenizer') {
|
|
260
|
+
continue;
|
|
261
|
+
}
|
|
202
262
|
const configSuffix = defaultConfigPath(componentId);
|
|
203
263
|
const config = await readJson(configSuffix, `${componentId} config`);
|
|
204
264
|
if (componentId === 'transformer' && config && !config.weight_format) {
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
export function resolveEosTokenId({ config, tokenizer, tokenizerJson }) {
|
|
2
|
+
const nestedTextConfig = getNestedTextConfig(config);
|
|
2
3
|
const candidateSources = [
|
|
3
4
|
tokenizer?.eosTokenId,
|
|
4
5
|
tokenizer?.eos_token_id,
|
|
@@ -7,9 +8,9 @@ export function resolveEosTokenId({ config, tokenizer, tokenizerJson }) {
|
|
|
7
8
|
tokenizerJson?.special_tokens?.eos,
|
|
8
9
|
tokenizerJson?.special_tokens?.eos_token_id,
|
|
9
10
|
config?.eos_token_id,
|
|
10
|
-
|
|
11
|
+
nestedTextConfig?.eos_token_id,
|
|
11
12
|
config?.eos_token_ids,
|
|
12
|
-
|
|
13
|
+
nestedTextConfig?.eos_token_ids,
|
|
13
14
|
];
|
|
14
15
|
|
|
15
16
|
for (const candidate of candidateSources) {
|
|
@@ -23,7 +24,7 @@ export function resolveEosTokenId({ config, tokenizer, tokenizerJson }) {
|
|
|
23
24
|
tokenizerJson?.specialTokens?.eos_token,
|
|
24
25
|
tokenizerJson?.special_tokens?.eos_token,
|
|
25
26
|
config?.eos_token,
|
|
26
|
-
|
|
27
|
+
nestedTextConfig?.eos_token,
|
|
27
28
|
];
|
|
28
29
|
|
|
29
30
|
for (const candidate of eosTokenStringCandidates) {
|
|
@@ -48,6 +49,19 @@ export function resolveEosTokenId({ config, tokenizer, tokenizerJson }) {
|
|
|
48
49
|
throw new Error('Missing eos_token_id. Provide eos_token_id in config or tokenizer metadata.');
|
|
49
50
|
}
|
|
50
51
|
|
|
52
|
+
function getNestedTextConfig(config) {
|
|
53
|
+
if (!config || typeof config !== 'object' || Array.isArray(config)) {
|
|
54
|
+
return null;
|
|
55
|
+
}
|
|
56
|
+
if (config.text_config && typeof config.text_config === 'object' && !Array.isArray(config.text_config)) {
|
|
57
|
+
return config.text_config;
|
|
58
|
+
}
|
|
59
|
+
if (config.language_config && typeof config.language_config === 'object' && !Array.isArray(config.language_config)) {
|
|
60
|
+
return config.language_config;
|
|
61
|
+
}
|
|
62
|
+
return null;
|
|
63
|
+
}
|
|
64
|
+
|
|
51
65
|
function normalizeEosTokenId(value) {
|
|
52
66
|
if (Array.isArray(value)) {
|
|
53
67
|
if (value.length === 0 || value.some((id) => typeof id !== 'number')) {
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { validateTensorConfigConsistency } from './tensor-config-validator.js';
|
|
2
|
+
import { validateManifestExecutionContract } from '../../config/execution-contract-check.js';
|
|
2
3
|
|
|
3
4
|
export function validateManifest(manifest) {
|
|
4
5
|
const errors = [];
|
|
@@ -196,5 +197,17 @@ export function validateManifest(manifest) {
|
|
|
196
197
|
}
|
|
197
198
|
}
|
|
198
199
|
|
|
200
|
+
if (!isDiffusion && !isEnergy && errors.length === 0) {
|
|
201
|
+
try {
|
|
202
|
+
const executionContract = validateManifestExecutionContract(manifest);
|
|
203
|
+
for (const error of executionContract.errors) {
|
|
204
|
+
errors.push(error);
|
|
205
|
+
}
|
|
206
|
+
} catch (error) {
|
|
207
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
208
|
+
errors.push(`[ExecutionContract] ${message}`);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
199
212
|
return { valid: errors.length === 0, errors, warnings };
|
|
200
213
|
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import type { Tensor } from '../tensor.js';
|
|
2
|
+
import type { CommandRecorder } from '../command-recorder.js';
|
|
3
|
+
import type { OutputBufferOptions } from './types.js';
|
|
4
|
+
import type { WeightBuffer } from '../weight-buffer.js';
|
|
5
|
+
|
|
6
|
+
export interface DepthwiseConv2DOptions extends OutputBufferOptions {
|
|
7
|
+
channels: number;
|
|
8
|
+
height: number;
|
|
9
|
+
width: number;
|
|
10
|
+
kernelH: number;
|
|
11
|
+
kernelW: number;
|
|
12
|
+
stride?: number;
|
|
13
|
+
pad?: number;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export declare function runDepthwiseConv2D(
|
|
17
|
+
input: Tensor,
|
|
18
|
+
weight: GPUBuffer | WeightBuffer,
|
|
19
|
+
bias: GPUBuffer | WeightBuffer | null,
|
|
20
|
+
options: DepthwiseConv2DOptions
|
|
21
|
+
): Promise<Tensor>;
|
|
22
|
+
|
|
23
|
+
export declare function recordDepthwiseConv2D(
|
|
24
|
+
recorder: CommandRecorder,
|
|
25
|
+
input: Tensor,
|
|
26
|
+
weight: GPUBuffer | WeightBuffer,
|
|
27
|
+
bias: GPUBuffer | WeightBuffer | null,
|
|
28
|
+
options: DepthwiseConv2DOptions
|
|
29
|
+
): Promise<Tensor>;
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import { getDevice } from '../device.js';
|
|
2
|
+
import { acquireBuffer, releaseBuffer } from '../../memory/buffer-pool.js';
|
|
3
|
+
import { createTensor, dtypeBytes } from '../tensor.js';
|
|
4
|
+
import { getBuffer } from '../weight-buffer.js';
|
|
5
|
+
import { unifiedKernelWrapper } from './utils.js';
|
|
6
|
+
import { selectRuleValue } from './rule-registry.js';
|
|
7
|
+
import { WORKGROUP_SIZES } from './constants.js';
|
|
8
|
+
|
|
9
|
+
function selectDepthwiseConv2DVariant(isF16) {
|
|
10
|
+
return selectRuleValue('depthwiseConv2d', 'variant', { isF16 });
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
async function _depthwiseConv2D(target, input, weight, bias, options = {}) {
|
|
14
|
+
const recorder = target && typeof target.beginComputePass === 'function' ? target : null;
|
|
15
|
+
const device = target?.device || getDevice();
|
|
16
|
+
const {
|
|
17
|
+
channels,
|
|
18
|
+
height,
|
|
19
|
+
width,
|
|
20
|
+
kernelH,
|
|
21
|
+
kernelW,
|
|
22
|
+
stride = 1,
|
|
23
|
+
pad = 0,
|
|
24
|
+
outputBuffer = null,
|
|
25
|
+
} = options;
|
|
26
|
+
|
|
27
|
+
if (
|
|
28
|
+
!Number.isFinite(channels) ||
|
|
29
|
+
!Number.isFinite(height) ||
|
|
30
|
+
!Number.isFinite(width) ||
|
|
31
|
+
!Number.isFinite(kernelH) ||
|
|
32
|
+
!Number.isFinite(kernelW)
|
|
33
|
+
) {
|
|
34
|
+
throw new Error('DepthwiseConv2D requires explicit dimensions.');
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const outHeight = Math.floor((height + pad * 2 - kernelH) / stride) + 1;
|
|
38
|
+
const outWidth = Math.floor((width + pad * 2 - kernelW) / stride) + 1;
|
|
39
|
+
if (outHeight <= 0 || outWidth <= 0) {
|
|
40
|
+
throw new Error(`DepthwiseConv2D invalid output size: ${outHeight}x${outWidth}`);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
const isF16 = input.dtype === 'f16';
|
|
44
|
+
const variant = selectDepthwiseConv2DVariant(isF16);
|
|
45
|
+
const bytesPerElement = dtypeBytes(input.dtype);
|
|
46
|
+
const outputSize = channels * outHeight * outWidth * bytesPerElement;
|
|
47
|
+
const output = outputBuffer || acquireBuffer(outputSize, undefined, 'depthwise_conv2d_output');
|
|
48
|
+
|
|
49
|
+
const weightBuffer = getBuffer(weight);
|
|
50
|
+
let biasBuffer = getBuffer(bias);
|
|
51
|
+
let tempBias = null;
|
|
52
|
+
if (!biasBuffer) {
|
|
53
|
+
const biasSize = channels * bytesPerElement;
|
|
54
|
+
tempBias = acquireBuffer(biasSize, undefined, 'depthwise_conv2d_bias_zero');
|
|
55
|
+
biasBuffer = tempBias;
|
|
56
|
+
const paddedSize = Math.ceil(biasSize / 4) * 4;
|
|
57
|
+
device.queue.writeBuffer(biasBuffer, 0, new Uint8Array(paddedSize));
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
await unifiedKernelWrapper(
|
|
61
|
+
'depthwise_conv2d',
|
|
62
|
+
target,
|
|
63
|
+
variant,
|
|
64
|
+
[input, weightBuffer, biasBuffer, output],
|
|
65
|
+
{
|
|
66
|
+
channels,
|
|
67
|
+
height,
|
|
68
|
+
width,
|
|
69
|
+
out_height: outHeight,
|
|
70
|
+
out_width: outWidth,
|
|
71
|
+
kernel_h: kernelH,
|
|
72
|
+
kernel_w: kernelW,
|
|
73
|
+
stride,
|
|
74
|
+
pad,
|
|
75
|
+
_pad0: 0,
|
|
76
|
+
_pad1: 0,
|
|
77
|
+
},
|
|
78
|
+
Math.ceil((channels * outHeight * outWidth) / WORKGROUP_SIZES.DEFAULT)
|
|
79
|
+
);
|
|
80
|
+
|
|
81
|
+
if (tempBias) {
|
|
82
|
+
if (recorder) {
|
|
83
|
+
recorder.trackTemporaryBuffer(tempBias);
|
|
84
|
+
} else {
|
|
85
|
+
releaseBuffer(tempBias);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
return createTensor(output, input.dtype, [channels, outHeight, outWidth], 'depthwise_conv2d_output');
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export async function runDepthwiseConv2D(input, weight, bias, options = {}) {
|
|
93
|
+
return _depthwiseConv2D(null, input, weight, bias, options);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export async function recordDepthwiseConv2D(recorder, input, weight, bias, options = {}) {
|
|
97
|
+
return _depthwiseConv2D(recorder, input, weight, bias, options);
|
|
98
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
override WORKGROUP_SIZE: u32 = 256u;
|
|
2
|
+
|
|
3
|
+
struct Uniforms {
|
|
4
|
+
channels: u32,
|
|
5
|
+
height: u32,
|
|
6
|
+
width: u32,
|
|
7
|
+
out_height: u32,
|
|
8
|
+
out_width: u32,
|
|
9
|
+
kernel_h: u32,
|
|
10
|
+
kernel_w: u32,
|
|
11
|
+
stride: u32,
|
|
12
|
+
pad: u32,
|
|
13
|
+
_pad0: u32,
|
|
14
|
+
_pad1: u32,
|
|
15
|
+
_pad2: u32,
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
@group(0) @binding(0) var<uniform> u: Uniforms;
|
|
19
|
+
@group(0) @binding(1) var<storage, read> input: array<f32>;
|
|
20
|
+
@group(0) @binding(2) var<storage, read> weight: array<f32>;
|
|
21
|
+
@group(0) @binding(3) var<storage, read> bias: array<f32>;
|
|
22
|
+
@group(0) @binding(4) var<storage, read_write> output: array<f32>;
|
|
23
|
+
|
|
24
|
+
@compute @workgroup_size(WORKGROUP_SIZE, 1, 1)
|
|
25
|
+
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
|
|
26
|
+
let idx = gid.x;
|
|
27
|
+
let out_spatial = u.out_height * u.out_width;
|
|
28
|
+
let out_size = u.channels * out_spatial;
|
|
29
|
+
if (idx >= out_size) {
|
|
30
|
+
return;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
let channel = idx / out_spatial;
|
|
34
|
+
let rem = idx - channel * out_spatial;
|
|
35
|
+
let out_y = rem / u.out_width;
|
|
36
|
+
let out_x = rem - out_y * u.out_width;
|
|
37
|
+
|
|
38
|
+
var sum: f32 = bias[channel];
|
|
39
|
+
let pad = i32(u.pad);
|
|
40
|
+
|
|
41
|
+
for (var ky: u32 = 0u; ky < u.kernel_h; ky = ky + 1u) {
|
|
42
|
+
let in_y = i32(out_y * u.stride + ky) - pad;
|
|
43
|
+
if (in_y < 0 || in_y >= i32(u.height)) {
|
|
44
|
+
continue;
|
|
45
|
+
}
|
|
46
|
+
for (var kx: u32 = 0u; kx < u.kernel_w; kx = kx + 1u) {
|
|
47
|
+
let in_x = i32(out_x * u.stride + kx) - pad;
|
|
48
|
+
if (in_x < 0 || in_x >= i32(u.width)) {
|
|
49
|
+
continue;
|
|
50
|
+
}
|
|
51
|
+
let input_idx = (channel * u.height + u32(in_y)) * u.width + u32(in_x);
|
|
52
|
+
let weight_idx = ((channel * u.kernel_h + ky) * u.kernel_w + kx);
|
|
53
|
+
sum = sum + input[input_idx] * weight[weight_idx];
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
output[idx] = sum;
|
|
58
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
// Depthwise Conv2D Kernel (NCHW, f16)
|
|
2
|
+
|
|
3
|
+
enable f16;
|
|
4
|
+
|
|
5
|
+
override WORKGROUP_SIZE: u32 = 256u;
|
|
6
|
+
|
|
7
|
+
struct Uniforms {
|
|
8
|
+
channels: u32,
|
|
9
|
+
height: u32,
|
|
10
|
+
width: u32,
|
|
11
|
+
out_height: u32,
|
|
12
|
+
out_width: u32,
|
|
13
|
+
kernel_h: u32,
|
|
14
|
+
kernel_w: u32,
|
|
15
|
+
stride: u32,
|
|
16
|
+
pad: u32,
|
|
17
|
+
_pad0: u32,
|
|
18
|
+
_pad1: u32,
|
|
19
|
+
_pad2: u32,
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
@group(0) @binding(0) var<uniform> u: Uniforms;
|
|
23
|
+
@group(0) @binding(1) var<storage, read> input: array<f16>;
|
|
24
|
+
@group(0) @binding(2) var<storage, read> weight: array<f16>;
|
|
25
|
+
@group(0) @binding(3) var<storage, read> bias: array<f16>;
|
|
26
|
+
@group(0) @binding(4) var<storage, read_write> output: array<f16>;
|
|
27
|
+
|
|
28
|
+
@compute @workgroup_size(WORKGROUP_SIZE, 1, 1)
|
|
29
|
+
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
|
|
30
|
+
let idx = gid.x;
|
|
31
|
+
let out_spatial = u.out_height * u.out_width;
|
|
32
|
+
let out_size = u.channels * out_spatial;
|
|
33
|
+
if (idx >= out_size) {
|
|
34
|
+
return;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
let channel = idx / out_spatial;
|
|
38
|
+
let rem = idx - channel * out_spatial;
|
|
39
|
+
let out_y = rem / u.out_width;
|
|
40
|
+
let out_x = rem - out_y * u.out_width;
|
|
41
|
+
|
|
42
|
+
var sum: f32 = f32(bias[channel]);
|
|
43
|
+
let pad = i32(u.pad);
|
|
44
|
+
|
|
45
|
+
for (var ky: u32 = 0u; ky < u.kernel_h; ky = ky + 1u) {
|
|
46
|
+
let in_y = i32(out_y * u.stride + ky) - pad;
|
|
47
|
+
if (in_y < 0 || in_y >= i32(u.height)) {
|
|
48
|
+
continue;
|
|
49
|
+
}
|
|
50
|
+
for (var kx: u32 = 0u; kx < u.kernel_w; kx = kx + 1u) {
|
|
51
|
+
let in_x = i32(out_x * u.stride + kx) - pad;
|
|
52
|
+
if (in_x < 0 || in_x >= i32(u.width)) {
|
|
53
|
+
continue;
|
|
54
|
+
}
|
|
55
|
+
let input_idx = (channel * u.height + u32(in_y)) * u.width + u32(in_x);
|
|
56
|
+
let weight_idx = ((channel * u.kernel_h + ky) * u.kernel_w + kx);
|
|
57
|
+
sum = sum + f32(input[input_idx]) * f32(weight[weight_idx]);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
output[idx] = f16(sum);
|
|
62
|
+
}
|