@simulatte/doppler 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -5
- package/package.json +27 -4
- package/src/client/doppler-api.browser.d.ts +1 -0
- package/src/client/doppler-api.browser.js +288 -0
- package/src/client/doppler-api.d.ts +80 -0
- package/src/client/doppler-api.js +298 -0
- package/src/client/doppler-provider/types.js +1 -1
- package/src/client/doppler-registry.d.ts +23 -0
- package/src/client/doppler-registry.js +88 -0
- package/src/client/doppler-registry.json +39 -0
- package/src/config/execution-contract-check.d.ts +82 -0
- package/src/config/execution-contract-check.js +317 -0
- package/src/config/execution-v0-contract-check.d.ts +94 -0
- package/src/config/execution-v0-contract-check.js +251 -0
- package/src/config/execution-v0-graph-contract-check.d.ts +20 -0
- package/src/config/execution-v0-graph-contract-check.js +64 -0
- package/src/config/kernel-path-contract-check.d.ts +76 -0
- package/src/config/kernel-path-contract-check.js +479 -0
- package/src/config/kernel-path-loader.d.ts +16 -0
- package/src/config/kernel-path-loader.js +54 -0
- package/src/config/kernels/kernel-ref-digests.js +12 -0
- package/src/config/kernels/registry.json +556 -0
- package/src/config/loader.js +90 -67
- package/src/config/merge-contract-check.d.ts +16 -0
- package/src/config/merge-contract-check.js +321 -0
- package/src/config/merge-helpers.d.ts +58 -0
- package/src/config/merge-helpers.js +54 -0
- package/src/config/merge.js +3 -6
- package/src/config/presets/models/janus-text.json +27 -0
- package/src/config/quantization-contract-check.d.ts +12 -0
- package/src/config/quantization-contract-check.js +91 -0
- package/src/config/required-inference-fields-contract-check.d.ts +24 -0
- package/src/config/required-inference-fields-contract-check.js +231 -0
- package/src/config/schema/browser-suite-metrics.schema.d.ts +17 -0
- package/src/config/schema/browser-suite-metrics.schema.js +46 -0
- package/src/config/schema/conversion-report.schema.d.ts +40 -0
- package/src/config/schema/conversion-report.schema.js +108 -0
- package/src/config/schema/doppler.schema.js +12 -18
- package/src/config/schema/index.d.ts +22 -0
- package/src/config/schema/index.js +18 -0
- package/src/converter/core.d.ts +10 -0
- package/src/converter/core.js +49 -11
- package/src/converter/parsers/diffusion.js +63 -3
- package/src/converter/tokenizer-utils.js +17 -3
- package/src/formats/rdrr/validation.js +13 -0
- package/src/gpu/kernels/depthwise_conv2d.d.ts +29 -0
- package/src/gpu/kernels/depthwise_conv2d.js +98 -0
- package/src/gpu/kernels/depthwise_conv2d.wgsl +58 -0
- package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +62 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d.d.ts +27 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +92 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +47 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +51 -0
- package/src/gpu/kernels/index.d.ts +30 -0
- package/src/gpu/kernels/index.js +25 -0
- package/src/gpu/kernels/relu.d.ts +18 -0
- package/src/gpu/kernels/relu.js +45 -0
- package/src/gpu/kernels/relu.wgsl +21 -0
- package/src/gpu/kernels/relu_f16.wgsl +23 -0
- package/src/gpu/kernels/repeat_channels.d.ts +21 -0
- package/src/gpu/kernels/repeat_channels.js +60 -0
- package/src/gpu/kernels/repeat_channels.wgsl +29 -0
- package/src/gpu/kernels/repeat_channels_f16.wgsl +31 -0
- package/src/gpu/kernels/sana_linear_attention.d.ts +27 -0
- package/src/gpu/kernels/sana_linear_attention.js +122 -0
- package/src/gpu/kernels/sana_linear_attention_apply.wgsl +44 -0
- package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +47 -0
- package/src/gpu/kernels/sana_linear_attention_summary.wgsl +47 -0
- package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +49 -0
- package/src/index-browser.d.ts +1 -0
- package/src/index-browser.js +2 -1
- package/src/index.d.ts +1 -0
- package/src/index.js +2 -1
- package/src/inference/browser-harness.js +164 -38
- package/src/inference/pipelines/diffusion/init.js +14 -0
- package/src/inference/pipelines/diffusion/pipeline.js +206 -77
- package/src/inference/pipelines/diffusion/sana-transformer.d.ts +53 -0
- package/src/inference/pipelines/diffusion/sana-transformer.js +738 -0
- package/src/inference/pipelines/diffusion/scheduler.d.ts +17 -1
- package/src/inference/pipelines/diffusion/scheduler.js +91 -3
- package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +6 -4
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +270 -0
- package/src/inference/pipelines/diffusion/text-encoder.js +18 -1
- package/src/inference/pipelines/diffusion/types.d.ts +4 -0
- package/src/inference/pipelines/diffusion/vae.js +782 -78
- package/src/inference/pipelines/text/config.d.ts +5 -0
- package/src/inference/pipelines/text/config.js +1 -1
- package/src/inference/pipelines/text/execution-v0.js +141 -101
- package/src/inference/pipelines/text/init.js +41 -10
- package/src/inference/pipelines/text.js +7 -1
- package/src/rules/execution-rules-contract-check.d.ts +17 -0
- package/src/rules/execution-rules-contract-check.js +245 -0
- package/src/rules/kernels/depthwise-conv2d.rules.json +6 -0
- package/src/rules/kernels/grouped-pointwise-conv2d.rules.json +6 -0
- package/src/rules/kernels/relu.rules.json +6 -0
- package/src/rules/kernels/repeat-channels.rules.json +6 -0
- package/src/rules/kernels/sana-linear-attention.rules.json +6 -0
- package/src/rules/layer-pattern-contract-check.d.ts +17 -0
- package/src/rules/layer-pattern-contract-check.js +231 -0
- package/src/rules/rule-registry.d.ts +28 -0
- package/src/rules/rule-registry.js +38 -0
- package/src/tooling/conversion-config-materializer.d.ts +24 -0
- package/src/tooling/conversion-config-materializer.js +99 -0
- package/src/tooling/lean-execution-contract-runner.d.ts +43 -0
- package/src/tooling/lean-execution-contract-runner.js +158 -0
- package/src/tooling/lean-execution-contract.d.ts +16 -0
- package/src/tooling/lean-execution-contract.js +81 -0
- package/src/tooling/node-convert.d.ts +10 -0
- package/src/tooling/node-converter.js +59 -0
- package/src/tooling/node-webgpu.js +30 -9
- package/src/version.d.ts +2 -0
- package/src/version.js +2 -0
- package/tools/convert-safetensors-node.js +47 -0
- package/tools/doppler-cli.js +167 -6
|
@@ -210,6 +210,11 @@ export interface ManifestWithInference {
|
|
|
210
210
|
*/
|
|
211
211
|
export function hasManifestInference(manifest: Manifest): manifest is Manifest & { inference: ManifestInferenceSchema };
|
|
212
212
|
|
|
213
|
+
export function validateRequiredInferenceFields(
|
|
214
|
+
inf: ManifestInferenceSchema,
|
|
215
|
+
modelId: string
|
|
216
|
+
): void;
|
|
217
|
+
|
|
213
218
|
/**
|
|
214
219
|
* Convert MergedConfig to ParsedModelConfig.
|
|
215
220
|
*/
|
|
@@ -1,4 +1,12 @@
|
|
|
1
1
|
import { mergeRuntimeValues } from '../../../config/runtime-merge.js';
|
|
2
|
+
import {
|
|
3
|
+
buildExecutionV0KernelProfileKey,
|
|
4
|
+
indexExecutionV0KernelProfiles,
|
|
5
|
+
normalizeExecutionV0Dtype,
|
|
6
|
+
resolveExecutionV0KernelProfile,
|
|
7
|
+
resolveExecutionV0KVIO,
|
|
8
|
+
resolveExecutionV0Precision,
|
|
9
|
+
} from '../../../config/execution-v0-contract-check.js';
|
|
2
10
|
import {
|
|
3
11
|
EXECUTION_V0_SCHEMA_ID,
|
|
4
12
|
DEFAULT_EXECUTION_V0_POLICIES,
|
|
@@ -59,13 +67,9 @@ function cloneJson(value) {
|
|
|
59
67
|
return JSON.parse(JSON.stringify(value));
|
|
60
68
|
}
|
|
61
69
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
throw new Error(`[ExecutionV0] ${label} must be "f16" or "f32"; got "${value}"`);
|
|
66
|
-
}
|
|
67
|
-
return normalized;
|
|
68
|
-
}
|
|
70
|
+
const normalizeDtype = normalizeExecutionV0Dtype;
|
|
71
|
+
const resolvePrecision = resolveExecutionV0Precision;
|
|
72
|
+
const resolveKVIO = resolveExecutionV0KVIO;
|
|
69
73
|
|
|
70
74
|
function normalizePhase(value, label) {
|
|
71
75
|
const normalized = String(value ?? '').trim().toLowerCase();
|
|
@@ -83,6 +87,17 @@ function normalizeSection(value, label) {
|
|
|
83
87
|
return normalized;
|
|
84
88
|
}
|
|
85
89
|
|
|
90
|
+
function normalizeKVLayout(value, label) {
|
|
91
|
+
if (value == null) {
|
|
92
|
+
return null;
|
|
93
|
+
}
|
|
94
|
+
const normalized = String(value).trim().toLowerCase();
|
|
95
|
+
if (!normalized) {
|
|
96
|
+
return null;
|
|
97
|
+
}
|
|
98
|
+
return normalized;
|
|
99
|
+
}
|
|
100
|
+
|
|
86
101
|
function assertKernelRef(kernelRef, label) {
|
|
87
102
|
if (!kernelRef) return;
|
|
88
103
|
if (typeof kernelRef.id !== 'string' || kernelRef.id.trim().length === 0) {
|
|
@@ -106,10 +121,7 @@ function stepHasLayer(step, layerIdx) {
|
|
|
106
121
|
return step.layers.includes(layerIdx);
|
|
107
122
|
}
|
|
108
123
|
|
|
109
|
-
|
|
110
|
-
if (!kernelRef) return null;
|
|
111
|
-
return `${kernelRef.id}|${kernelRef.version}|${kernelRef.digest}`;
|
|
112
|
-
}
|
|
124
|
+
const buildKernelProfileKey = buildExecutionV0KernelProfileKey;
|
|
113
125
|
|
|
114
126
|
function normalizeSlot(value, label) {
|
|
115
127
|
if (typeof value !== 'string' || value.trim().length === 0) {
|
|
@@ -201,90 +213,10 @@ function hasDefinedPath(root, pathSegments) {
|
|
|
201
213
|
return current !== undefined;
|
|
202
214
|
}
|
|
203
215
|
|
|
204
|
-
|
|
205
|
-
const byKey = new Map();
|
|
206
|
-
const profiles = sessionDefaults?.compute?.kernelProfiles ?? [];
|
|
207
|
-
for (const profile of profiles) {
|
|
208
|
-
assertKernelRef(profile.kernelRef, 'sessionDefaults.compute.kernelProfiles[].kernelRef');
|
|
209
|
-
byKey.set(buildKernelProfileKey(profile.kernelRef), profile);
|
|
210
|
-
}
|
|
211
|
-
return byKey;
|
|
212
|
-
}
|
|
216
|
+
const indexKernelProfiles = indexExecutionV0KernelProfiles;
|
|
213
217
|
|
|
214
218
|
function resolveProfile(profileIndex, step) {
|
|
215
|
-
|
|
216
|
-
if (!key) return null;
|
|
217
|
-
return profileIndex.get(key) ?? null;
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
function resolvePrecision(step, profile, sessionDefaults) {
|
|
221
|
-
const defaults = sessionDefaults.compute.defaults;
|
|
222
|
-
const precision = {
|
|
223
|
-
inputDtype: step.precision?.inputDtype
|
|
224
|
-
?? profile?.precision?.inputDtype
|
|
225
|
-
?? null,
|
|
226
|
-
mathDtype: step.precision?.mathDtype
|
|
227
|
-
?? profile?.precision?.mathDtype
|
|
228
|
-
?? defaults.mathDtype,
|
|
229
|
-
accumDtype: step.precision?.accumDtype
|
|
230
|
-
?? profile?.precision?.accumDtype
|
|
231
|
-
?? defaults.accumDtype,
|
|
232
|
-
outputDtype: step.precision?.outputDtype
|
|
233
|
-
?? profile?.precision?.outputDtype
|
|
234
|
-
?? defaults.outputDtype,
|
|
235
|
-
};
|
|
236
|
-
const sources = {
|
|
237
|
-
inputDtype: step.precision?.inputDtype != null
|
|
238
|
-
? 'manifest'
|
|
239
|
-
: profile?.precision?.inputDtype != null
|
|
240
|
-
? 'kernelProfile'
|
|
241
|
-
: 'derived',
|
|
242
|
-
mathDtype: step.precision?.mathDtype != null
|
|
243
|
-
? 'manifest'
|
|
244
|
-
: profile?.precision?.mathDtype != null
|
|
245
|
-
? 'kernelProfile'
|
|
246
|
-
: 'sessionDefault',
|
|
247
|
-
accumDtype: step.precision?.accumDtype != null
|
|
248
|
-
? 'manifest'
|
|
249
|
-
: profile?.precision?.accumDtype != null
|
|
250
|
-
? 'kernelProfile'
|
|
251
|
-
: 'sessionDefault',
|
|
252
|
-
outputDtype: step.precision?.outputDtype != null
|
|
253
|
-
? 'manifest'
|
|
254
|
-
: profile?.precision?.outputDtype != null
|
|
255
|
-
? 'kernelProfile'
|
|
256
|
-
: 'sessionDefault',
|
|
257
|
-
};
|
|
258
|
-
return { precision, sources };
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
function resolveKVIO(step, profile, sessionDefaults) {
|
|
262
|
-
if (step.kvIO) {
|
|
263
|
-
return {
|
|
264
|
-
value: {
|
|
265
|
-
readDtype: normalizeDtype(step.kvIO.readDtype, `${step.id}.kvIO.readDtype`),
|
|
266
|
-
writeDtype: normalizeDtype(step.kvIO.writeDtype, `${step.id}.kvIO.writeDtype`),
|
|
267
|
-
},
|
|
268
|
-
source: 'manifest',
|
|
269
|
-
};
|
|
270
|
-
}
|
|
271
|
-
if (profile?.kvIO) {
|
|
272
|
-
return {
|
|
273
|
-
value: {
|
|
274
|
-
readDtype: normalizeDtype(profile.kvIO.readDtype, `${step.id}.profile.kvIO.readDtype`),
|
|
275
|
-
writeDtype: normalizeDtype(profile.kvIO.writeDtype, `${step.id}.profile.kvIO.writeDtype`),
|
|
276
|
-
},
|
|
277
|
-
source: 'kernelProfile',
|
|
278
|
-
};
|
|
279
|
-
}
|
|
280
|
-
const kvDtype = normalizeDtype(
|
|
281
|
-
sessionDefaults?.kvcache?.kvDtype ?? sessionDefaults.compute.defaults.activationDtype,
|
|
282
|
-
`${step.id}.sessionDefaults.kvcache.kvDtype`
|
|
283
|
-
);
|
|
284
|
-
return {
|
|
285
|
-
value: { readDtype: kvDtype, writeDtype: kvDtype },
|
|
286
|
-
source: 'sessionDefault',
|
|
287
|
-
};
|
|
219
|
+
return resolveExecutionV0KernelProfile(profileIndex, step);
|
|
288
220
|
}
|
|
289
221
|
|
|
290
222
|
function validateStepShape(step, index) {
|
|
@@ -704,12 +636,20 @@ function normalizeRuntimeSessionForExecutionV0(runtimeSession, manifestInference
|
|
|
704
636
|
const manifestProfiles = manifestSessionDefaults?.compute?.kernelProfiles;
|
|
705
637
|
const hasManifestProfiles = Array.isArray(manifestProfiles) && manifestProfiles.length > 0;
|
|
706
638
|
const manifestComputeDefaults = manifestSessionDefaults?.compute?.defaults ?? null;
|
|
639
|
+
const hasManifestKVCache = manifestSessionDefaults?.kvcache != null;
|
|
640
|
+
const hasManifestDecodeLoop = manifestSessionDefaults?.decodeLoop != null;
|
|
707
641
|
|
|
708
|
-
if (!runtimeSession ||
|
|
642
|
+
if (!runtimeSession || typeof runtimeSession !== 'object') {
|
|
709
643
|
return runtimeSession;
|
|
710
644
|
}
|
|
711
645
|
|
|
712
|
-
let compute = runtimeSession.compute;
|
|
646
|
+
let compute = runtimeSession.compute ?? null;
|
|
647
|
+
let kvcache = Object.prototype.hasOwnProperty.call(runtimeSession, 'kvcache')
|
|
648
|
+
? runtimeSession.kvcache
|
|
649
|
+
: undefined;
|
|
650
|
+
let decodeLoop = Object.prototype.hasOwnProperty.call(runtimeSession, 'decodeLoop')
|
|
651
|
+
? runtimeSession.decodeLoop
|
|
652
|
+
: undefined;
|
|
713
653
|
let changed = false;
|
|
714
654
|
|
|
715
655
|
// Strip preset compute dtype defaults when manifest provides model-specific values.
|
|
@@ -736,19 +676,39 @@ function normalizeRuntimeSessionForExecutionV0(runtimeSession, manifestInference
|
|
|
736
676
|
}
|
|
737
677
|
}
|
|
738
678
|
|
|
679
|
+
// Strip preset nulls so manifest session defaults can win.
|
|
680
|
+
if (kvcache === null && hasManifestKVCache) {
|
|
681
|
+
kvcache = undefined;
|
|
682
|
+
changed = true;
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
if (decodeLoop === null && hasManifestDecodeLoop) {
|
|
686
|
+
decodeLoop = undefined;
|
|
687
|
+
changed = true;
|
|
688
|
+
}
|
|
689
|
+
|
|
739
690
|
if (!changed) {
|
|
740
691
|
return runtimeSession;
|
|
741
692
|
}
|
|
742
693
|
|
|
694
|
+
const nextRuntimeSession = { ...runtimeSession };
|
|
743
695
|
if (!compute) {
|
|
744
|
-
|
|
745
|
-
|
|
696
|
+
delete nextRuntimeSession.compute;
|
|
697
|
+
} else {
|
|
698
|
+
nextRuntimeSession.compute = compute;
|
|
699
|
+
}
|
|
700
|
+
if (kvcache === undefined) {
|
|
701
|
+
delete nextRuntimeSession.kvcache;
|
|
702
|
+
} else {
|
|
703
|
+
nextRuntimeSession.kvcache = kvcache;
|
|
704
|
+
}
|
|
705
|
+
if (decodeLoop === undefined) {
|
|
706
|
+
delete nextRuntimeSession.decodeLoop;
|
|
707
|
+
} else {
|
|
708
|
+
nextRuntimeSession.decodeLoop = decodeLoop;
|
|
746
709
|
}
|
|
747
710
|
|
|
748
|
-
return {
|
|
749
|
-
...runtimeSession,
|
|
750
|
-
compute,
|
|
751
|
-
};
|
|
711
|
+
return Object.keys(nextRuntimeSession).length === 0 ? {} : nextRuntimeSession;
|
|
752
712
|
}
|
|
753
713
|
|
|
754
714
|
function validatePhaseBoundaryCompatibility(options) {
|
|
@@ -790,6 +750,25 @@ function validatePhaseBoundaryCompatibility(options) {
|
|
|
790
750
|
}
|
|
791
751
|
}
|
|
792
752
|
|
|
753
|
+
function assertKVLayoutExecutionCompatibility(steps, sessionDefaults) {
|
|
754
|
+
const kvLayout = normalizeKVLayout(sessionDefaults?.kvcache?.layout, 'sessionDefaults.kvcache.layout');
|
|
755
|
+
if (kvLayout !== 'bdpa') {
|
|
756
|
+
return;
|
|
757
|
+
}
|
|
758
|
+
const incompatibleStep = steps.find((step) => (
|
|
759
|
+
step?.op === 'attention'
|
|
760
|
+
&& isPhaseMatch(normalizePhase(step.phase, `${step.id}.phase`), 'prefill')
|
|
761
|
+
));
|
|
762
|
+
if (!incompatibleStep) {
|
|
763
|
+
return;
|
|
764
|
+
}
|
|
765
|
+
throw new Error(
|
|
766
|
+
`[ExecutionV0] sessionDefaults.kvcache.layout="bdpa" is decode-only, ` +
|
|
767
|
+
`but step "${incompatibleStep.id}" declares prefill attention. ` +
|
|
768
|
+
'Use a non-BDPA KV layout for prefill-capable models or remove prefill attention from the execution contract.'
|
|
769
|
+
);
|
|
770
|
+
}
|
|
771
|
+
|
|
793
772
|
function toKernelPathStep(step) {
|
|
794
773
|
if (step.op === 'cast') return null;
|
|
795
774
|
if (!step.kernel) return null;
|
|
@@ -818,6 +797,65 @@ function buildLayerPhaseSteps(steps, phase, layerIdx) {
|
|
|
818
797
|
.filter((step) => step != null);
|
|
819
798
|
}
|
|
820
799
|
|
|
800
|
+
function getInlineKernelPathSteps(path) {
|
|
801
|
+
return [
|
|
802
|
+
...(path?.preLayer ?? []),
|
|
803
|
+
...(path?.decode?.steps ?? []),
|
|
804
|
+
...(path?.prefill?.steps ?? []),
|
|
805
|
+
...(path?.postLayer ?? []),
|
|
806
|
+
...(path?.sampling ?? []),
|
|
807
|
+
...(path?.layerOverrides?.flatMap((override) => override.steps ?? []) ?? []),
|
|
808
|
+
];
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
function assertInlineKernelPathSessionCompatibility(path, sessionDefaults) {
|
|
812
|
+
if (!path) {
|
|
813
|
+
return;
|
|
814
|
+
}
|
|
815
|
+
const activationDtype = normalizeDtype(
|
|
816
|
+
path.activationDtype ?? sessionDefaults?.compute?.defaults?.activationDtype ?? 'f16',
|
|
817
|
+
'inlineKernelPath.activationDtype'
|
|
818
|
+
);
|
|
819
|
+
const kvDtype = normalizeDtype(
|
|
820
|
+
path.kvDtype ?? sessionDefaults?.kvcache?.kvDtype ?? activationDtype,
|
|
821
|
+
'inlineKernelPath.kvDtype'
|
|
822
|
+
);
|
|
823
|
+
|
|
824
|
+
for (const step of getInlineKernelPathSteps(path)) {
|
|
825
|
+
const kernel = String(step?.kernel ?? '').trim();
|
|
826
|
+
if (!kernel.startsWith('attention')) {
|
|
827
|
+
continue;
|
|
828
|
+
}
|
|
829
|
+
if (kernel.includes('_f16kv')) {
|
|
830
|
+
if (activationDtype !== 'f32' || kvDtype !== 'f16') {
|
|
831
|
+
throw new Error(
|
|
832
|
+
`[ExecutionV0] Inline kernelPath attention kernel "${kernel}" requires ` +
|
|
833
|
+
`activationDtype="f32" and kvcache.kvDtype="f16", but resolved ` +
|
|
834
|
+
`activationDtype="${activationDtype}" and kvcache.kvDtype="${kvDtype}".`
|
|
835
|
+
);
|
|
836
|
+
}
|
|
837
|
+
continue;
|
|
838
|
+
}
|
|
839
|
+
if (kernel.includes('_f16')) {
|
|
840
|
+
if (activationDtype !== 'f16' || kvDtype !== 'f16') {
|
|
841
|
+
throw new Error(
|
|
842
|
+
`[ExecutionV0] Inline kernelPath attention kernel "${kernel}" requires ` +
|
|
843
|
+
`activationDtype="f16" and kvcache.kvDtype="f16", but resolved ` +
|
|
844
|
+
`activationDtype="${activationDtype}" and kvcache.kvDtype="${kvDtype}".`
|
|
845
|
+
);
|
|
846
|
+
}
|
|
847
|
+
continue;
|
|
848
|
+
}
|
|
849
|
+
if (activationDtype !== 'f32' || kvDtype !== 'f32') {
|
|
850
|
+
throw new Error(
|
|
851
|
+
`[ExecutionV0] Inline kernelPath attention kernel "${kernel}" requires ` +
|
|
852
|
+
`activationDtype="f32" and kvcache.kvDtype="f32", but resolved ` +
|
|
853
|
+
`activationDtype="${activationDtype}" and kvcache.kvDtype="${kvDtype}".`
|
|
854
|
+
);
|
|
855
|
+
}
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
|
|
821
859
|
function buildInlineKernelPath(steps, sessionDefaults, modelId, numLayers) {
|
|
822
860
|
const activationDtype = normalizeDtype(
|
|
823
861
|
sessionDefaults?.compute?.defaults?.activationDtype ?? 'f16',
|
|
@@ -884,6 +922,7 @@ function buildInlineKernelPath(steps, sessionDefaults, modelId, numLayers) {
|
|
|
884
922
|
path.sampling = sampling;
|
|
885
923
|
}
|
|
886
924
|
|
|
925
|
+
assertInlineKernelPathSessionCompatibility(path, sessionDefaults);
|
|
887
926
|
return path;
|
|
888
927
|
}
|
|
889
928
|
|
|
@@ -1003,6 +1042,7 @@ export function compileExecutionV0(options = {}) {
|
|
|
1003
1042
|
const patchedSteps = applyExecutionPatchAtomic(baseSteps, runtimeInference.executionPatch ?? null);
|
|
1004
1043
|
patchedSteps.forEach(validateStepShape);
|
|
1005
1044
|
validateUniqueStepIds(patchedSteps);
|
|
1045
|
+
assertKVLayoutExecutionCompatibility(patchedSteps, resolvedSession);
|
|
1006
1046
|
const runtimePatchMeta = indexRuntimePatchMeta(runtimeInference.executionPatch ?? null);
|
|
1007
1047
|
|
|
1008
1048
|
const manifestSessionDefaults = manifestInference.sessionDefaults ?? {};
|
|
@@ -49,6 +49,35 @@ function isRDRRManifest(manifest) {
|
|
|
49
49
|
return manifest !== null && typeof manifest === 'object' && Array.isArray( (manifest).shards);
|
|
50
50
|
}
|
|
51
51
|
|
|
52
|
+
function normalizeBaseUrl(baseUrl) {
|
|
53
|
+
if (typeof baseUrl !== 'string' || baseUrl.trim().length === 0) {
|
|
54
|
+
return null;
|
|
55
|
+
}
|
|
56
|
+
return baseUrl.replace(/\/$/, '');
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function createRemoteStorageContext(baseUrl, manifest) {
|
|
60
|
+
const root = normalizeBaseUrl(baseUrl);
|
|
61
|
+
if (!root || !isRDRRManifest(manifest)) {
|
|
62
|
+
return null;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
return {
|
|
66
|
+
async loadShard(index) {
|
|
67
|
+
const shard = manifest.shards[index];
|
|
68
|
+
const filename = shard?.filename;
|
|
69
|
+
if (!filename) {
|
|
70
|
+
throw new Error(`Manifest shard ${index} is missing filename.`);
|
|
71
|
+
}
|
|
72
|
+
const response = await fetch(`${root}/${filename.replace(/^\/+/, '')}`);
|
|
73
|
+
if (!response.ok) {
|
|
74
|
+
throw new Error(`Failed to fetch shard ${index} from ${root}: ${response.status}`);
|
|
75
|
+
}
|
|
76
|
+
return new Uint8Array(await response.arrayBuffer());
|
|
77
|
+
},
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
|
|
52
81
|
|
|
53
82
|
function resolveQ4KConfig(
|
|
54
83
|
manifest,
|
|
@@ -505,10 +534,12 @@ export async function initTokenizer(manifest, options = {}) {
|
|
|
505
534
|
|
|
506
535
|
|
|
507
536
|
export async function loadWeights(manifest, modelConfig, options = {}) {
|
|
508
|
-
const {
|
|
537
|
+
const { onProgress, loadingConfig, baseUrl } = options;
|
|
538
|
+
const runtimeStorageContext = options.storageContext
|
|
539
|
+
?? createRemoteStorageContext(baseUrl, manifest);
|
|
509
540
|
const verifyHashes = (
|
|
510
|
-
typeof
|
|
511
|
-
?
|
|
541
|
+
typeof runtimeStorageContext?.verifyHashes === 'boolean'
|
|
542
|
+
? runtimeStorageContext.verifyHashes
|
|
512
543
|
: options.verifyHashes
|
|
513
544
|
) ?? loadingConfig?.shardCache?.verifyHashes;
|
|
514
545
|
if (verifyHashes == null) {
|
|
@@ -536,31 +567,31 @@ export async function loadWeights(manifest, modelConfig, options = {}) {
|
|
|
536
567
|
}
|
|
537
568
|
|
|
538
569
|
// Configure custom shard loader if provided (Native Bridge or direct-source bundle)
|
|
539
|
-
const hasLoadShard = typeof
|
|
540
|
-
const hasLoadShardRange = typeof
|
|
541
|
-
const hasStreamShardRange = typeof
|
|
570
|
+
const hasLoadShard = typeof runtimeStorageContext?.loadShard === 'function';
|
|
571
|
+
const hasLoadShardRange = typeof runtimeStorageContext?.loadShardRange === 'function';
|
|
572
|
+
const hasStreamShardRange = typeof runtimeStorageContext?.streamShardRange === 'function';
|
|
542
573
|
if (hasLoadShard || hasLoadShardRange) {
|
|
543
574
|
log.debug('Pipeline', 'Using custom shard loader (Native Bridge or external)');
|
|
544
575
|
|
|
545
576
|
const loadShard = async (index) => {
|
|
546
577
|
if (hasLoadShard) {
|
|
547
|
-
const data = await
|
|
578
|
+
const data = await runtimeStorageContext.loadShard(index);
|
|
548
579
|
return toUint8Array(data, 'storageContext.loadShard');
|
|
549
580
|
}
|
|
550
|
-
const rangeData = await
|
|
581
|
+
const rangeData = await runtimeStorageContext.loadShardRange(index, 0, null);
|
|
551
582
|
return toUint8Array(rangeData, 'storageContext.loadShardRange');
|
|
552
583
|
};
|
|
553
584
|
|
|
554
585
|
const loadShardRange = hasLoadShardRange
|
|
555
586
|
? async (index, offset, length = null) => {
|
|
556
|
-
const data = await
|
|
587
|
+
const data = await runtimeStorageContext.loadShardRange(index, offset, length);
|
|
557
588
|
return toArrayBuffer(data, 'storageContext.loadShardRange');
|
|
558
589
|
}
|
|
559
590
|
: null;
|
|
560
591
|
|
|
561
592
|
const streamShardRange = hasStreamShardRange
|
|
562
593
|
? async function* (index, offset = 0, length = null, streamOptions = {}) {
|
|
563
|
-
for await (const chunk of
|
|
594
|
+
for await (const chunk of runtimeStorageContext.streamShardRange(index, offset, length, streamOptions)) {
|
|
564
595
|
yield toUint8Array(chunk, 'storageContext.streamShardRange');
|
|
565
596
|
}
|
|
566
597
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
|
|
2
|
-
import { getDevice } from '../../gpu/device.js';
|
|
2
|
+
import { getDevice, initDevice } from '../../gpu/device.js';
|
|
3
3
|
import { getBufferPool as getGlobalBufferPool } from '../../memory/buffer-pool.js';
|
|
4
4
|
import { log } from '../../debug/index.js';
|
|
5
5
|
import { configurePerfGuards } from '../../gpu/perf-guards.js';
|
|
@@ -85,6 +85,12 @@ export class InferencePipeline extends PipelineState {
|
|
|
85
85
|
applyPipelineDebugConfig(sharedDebug?.pipeline);
|
|
86
86
|
configurePerfGuards(sharedDebug?.perfGuards);
|
|
87
87
|
|
|
88
|
+
if (!this.gpuContext?.device && typeof globalThis.navigator !== 'undefined' && globalThis.navigator?.gpu) {
|
|
89
|
+
const device = await initDevice();
|
|
90
|
+
this.gpuContext = { device };
|
|
91
|
+
this.useGPU = true;
|
|
92
|
+
}
|
|
93
|
+
|
|
88
94
|
this.emulation = await initEmulation(this.runtimeConfig);
|
|
89
95
|
|
|
90
96
|
this.debug = sharedDebug?.pipeline?.enabled === true;
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export interface InferenceExecutionRulesContractArtifact {
|
|
2
|
+
schemaVersion: 1;
|
|
3
|
+
source: 'doppler';
|
|
4
|
+
ok: boolean;
|
|
5
|
+
checks: Array<{ id: string; ok: boolean }>;
|
|
6
|
+
errors: string[];
|
|
7
|
+
stats: {
|
|
8
|
+
decodeRecorderRules: number;
|
|
9
|
+
batchDecodeRules: number;
|
|
10
|
+
decodeRecorderContexts: number;
|
|
11
|
+
batchDecodeContexts: number;
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export declare function buildInferenceExecutionRulesContractArtifact(
|
|
16
|
+
ruleGroup: Record<string, unknown> | null | undefined
|
|
17
|
+
): InferenceExecutionRulesContractArtifact;
|