@simulatte/doppler 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/README.md +11 -5
  2. package/package.json +27 -4
  3. package/src/client/doppler-api.browser.d.ts +1 -0
  4. package/src/client/doppler-api.browser.js +288 -0
  5. package/src/client/doppler-api.d.ts +80 -0
  6. package/src/client/doppler-api.js +298 -0
  7. package/src/client/doppler-provider/types.js +1 -1
  8. package/src/client/doppler-registry.d.ts +23 -0
  9. package/src/client/doppler-registry.js +88 -0
  10. package/src/client/doppler-registry.json +39 -0
  11. package/src/config/execution-contract-check.d.ts +82 -0
  12. package/src/config/execution-contract-check.js +317 -0
  13. package/src/config/execution-v0-contract-check.d.ts +94 -0
  14. package/src/config/execution-v0-contract-check.js +251 -0
  15. package/src/config/execution-v0-graph-contract-check.d.ts +20 -0
  16. package/src/config/execution-v0-graph-contract-check.js +64 -0
  17. package/src/config/kernel-path-contract-check.d.ts +76 -0
  18. package/src/config/kernel-path-contract-check.js +479 -0
  19. package/src/config/kernel-path-loader.d.ts +16 -0
  20. package/src/config/kernel-path-loader.js +54 -0
  21. package/src/config/kernels/kernel-ref-digests.js +12 -0
  22. package/src/config/kernels/registry.json +556 -0
  23. package/src/config/loader.js +90 -67
  24. package/src/config/merge-contract-check.d.ts +16 -0
  25. package/src/config/merge-contract-check.js +321 -0
  26. package/src/config/merge-helpers.d.ts +58 -0
  27. package/src/config/merge-helpers.js +54 -0
  28. package/src/config/merge.js +3 -6
  29. package/src/config/presets/models/janus-text.json +27 -0
  30. package/src/config/quantization-contract-check.d.ts +12 -0
  31. package/src/config/quantization-contract-check.js +91 -0
  32. package/src/config/required-inference-fields-contract-check.d.ts +24 -0
  33. package/src/config/required-inference-fields-contract-check.js +231 -0
  34. package/src/config/schema/browser-suite-metrics.schema.d.ts +17 -0
  35. package/src/config/schema/browser-suite-metrics.schema.js +46 -0
  36. package/src/config/schema/conversion-report.schema.d.ts +40 -0
  37. package/src/config/schema/conversion-report.schema.js +108 -0
  38. package/src/config/schema/doppler.schema.js +12 -18
  39. package/src/config/schema/index.d.ts +22 -0
  40. package/src/config/schema/index.js +18 -0
  41. package/src/converter/core.d.ts +10 -0
  42. package/src/converter/core.js +49 -11
  43. package/src/converter/parsers/diffusion.js +63 -3
  44. package/src/converter/tokenizer-utils.js +17 -3
  45. package/src/formats/rdrr/validation.js +13 -0
  46. package/src/gpu/kernels/depthwise_conv2d.d.ts +29 -0
  47. package/src/gpu/kernels/depthwise_conv2d.js +98 -0
  48. package/src/gpu/kernels/depthwise_conv2d.wgsl +58 -0
  49. package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +62 -0
  50. package/src/gpu/kernels/grouped_pointwise_conv2d.d.ts +27 -0
  51. package/src/gpu/kernels/grouped_pointwise_conv2d.js +92 -0
  52. package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +47 -0
  53. package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +51 -0
  54. package/src/gpu/kernels/index.d.ts +30 -0
  55. package/src/gpu/kernels/index.js +25 -0
  56. package/src/gpu/kernels/relu.d.ts +18 -0
  57. package/src/gpu/kernels/relu.js +45 -0
  58. package/src/gpu/kernels/relu.wgsl +21 -0
  59. package/src/gpu/kernels/relu_f16.wgsl +23 -0
  60. package/src/gpu/kernels/repeat_channels.d.ts +21 -0
  61. package/src/gpu/kernels/repeat_channels.js +60 -0
  62. package/src/gpu/kernels/repeat_channels.wgsl +29 -0
  63. package/src/gpu/kernels/repeat_channels_f16.wgsl +31 -0
  64. package/src/gpu/kernels/sana_linear_attention.d.ts +27 -0
  65. package/src/gpu/kernels/sana_linear_attention.js +122 -0
  66. package/src/gpu/kernels/sana_linear_attention_apply.wgsl +44 -0
  67. package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +47 -0
  68. package/src/gpu/kernels/sana_linear_attention_summary.wgsl +47 -0
  69. package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +49 -0
  70. package/src/index-browser.d.ts +1 -0
  71. package/src/index-browser.js +2 -1
  72. package/src/index.d.ts +1 -0
  73. package/src/index.js +2 -1
  74. package/src/inference/browser-harness.js +164 -38
  75. package/src/inference/pipelines/diffusion/init.js +14 -0
  76. package/src/inference/pipelines/diffusion/pipeline.js +206 -77
  77. package/src/inference/pipelines/diffusion/sana-transformer.d.ts +53 -0
  78. package/src/inference/pipelines/diffusion/sana-transformer.js +738 -0
  79. package/src/inference/pipelines/diffusion/scheduler.d.ts +17 -1
  80. package/src/inference/pipelines/diffusion/scheduler.js +91 -3
  81. package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +6 -4
  82. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +270 -0
  83. package/src/inference/pipelines/diffusion/text-encoder.js +18 -1
  84. package/src/inference/pipelines/diffusion/types.d.ts +4 -0
  85. package/src/inference/pipelines/diffusion/vae.js +782 -78
  86. package/src/inference/pipelines/text/config.d.ts +5 -0
  87. package/src/inference/pipelines/text/config.js +1 -1
  88. package/src/inference/pipelines/text/execution-v0.js +141 -101
  89. package/src/inference/pipelines/text/init.js +41 -10
  90. package/src/inference/pipelines/text.js +7 -1
  91. package/src/rules/execution-rules-contract-check.d.ts +17 -0
  92. package/src/rules/execution-rules-contract-check.js +245 -0
  93. package/src/rules/kernels/depthwise-conv2d.rules.json +6 -0
  94. package/src/rules/kernels/grouped-pointwise-conv2d.rules.json +6 -0
  95. package/src/rules/kernels/relu.rules.json +6 -0
  96. package/src/rules/kernels/repeat-channels.rules.json +6 -0
  97. package/src/rules/kernels/sana-linear-attention.rules.json +6 -0
  98. package/src/rules/layer-pattern-contract-check.d.ts +17 -0
  99. package/src/rules/layer-pattern-contract-check.js +231 -0
  100. package/src/rules/rule-registry.d.ts +28 -0
  101. package/src/rules/rule-registry.js +38 -0
  102. package/src/tooling/conversion-config-materializer.d.ts +24 -0
  103. package/src/tooling/conversion-config-materializer.js +99 -0
  104. package/src/tooling/lean-execution-contract-runner.d.ts +43 -0
  105. package/src/tooling/lean-execution-contract-runner.js +158 -0
  106. package/src/tooling/lean-execution-contract.d.ts +16 -0
  107. package/src/tooling/lean-execution-contract.js +81 -0
  108. package/src/tooling/node-convert.d.ts +10 -0
  109. package/src/tooling/node-converter.js +59 -0
  110. package/src/tooling/node-webgpu.js +30 -9
  111. package/src/version.d.ts +2 -0
  112. package/src/version.js +2 -0
  113. package/tools/convert-safetensors-node.js +47 -0
  114. package/tools/doppler-cli.js +167 -6
@@ -210,6 +210,11 @@ export interface ManifestWithInference {
210
210
  */
211
211
  export function hasManifestInference(manifest: Manifest): manifest is Manifest & { inference: ManifestInferenceSchema };
212
212
 
213
+ export function validateRequiredInferenceFields(
214
+ inf: ManifestInferenceSchema,
215
+ modelId: string
216
+ ): void;
217
+
213
218
  /**
214
219
  * Convert MergedConfig to ParsedModelConfig.
215
220
  */
@@ -129,7 +129,7 @@ export function hasManifestInference(manifest) {
129
129
  }
130
130
 
131
131
 
132
- function validateRequiredInferenceFields(inf, modelId) {
132
+ export function validateRequiredInferenceFields(inf, modelId) {
133
133
 
134
134
  const errors = [];
135
135
 
@@ -1,4 +1,12 @@
1
1
  import { mergeRuntimeValues } from '../../../config/runtime-merge.js';
2
+ import {
3
+ buildExecutionV0KernelProfileKey,
4
+ indexExecutionV0KernelProfiles,
5
+ normalizeExecutionV0Dtype,
6
+ resolveExecutionV0KernelProfile,
7
+ resolveExecutionV0KVIO,
8
+ resolveExecutionV0Precision,
9
+ } from '../../../config/execution-v0-contract-check.js';
2
10
  import {
3
11
  EXECUTION_V0_SCHEMA_ID,
4
12
  DEFAULT_EXECUTION_V0_POLICIES,
@@ -59,13 +67,9 @@ function cloneJson(value) {
59
67
  return JSON.parse(JSON.stringify(value));
60
68
  }
61
69
 
62
- function normalizeDtype(value, label) {
63
- const normalized = String(value ?? '').trim().toLowerCase();
64
- if (normalized !== 'f16' && normalized !== 'f32') {
65
- throw new Error(`[ExecutionV0] ${label} must be "f16" or "f32"; got "${value}"`);
66
- }
67
- return normalized;
68
- }
70
+ const normalizeDtype = normalizeExecutionV0Dtype;
71
+ const resolvePrecision = resolveExecutionV0Precision;
72
+ const resolveKVIO = resolveExecutionV0KVIO;
69
73
 
70
74
  function normalizePhase(value, label) {
71
75
  const normalized = String(value ?? '').trim().toLowerCase();
@@ -83,6 +87,17 @@ function normalizeSection(value, label) {
83
87
  return normalized;
84
88
  }
85
89
 
90
+ function normalizeKVLayout(value, label) {
91
+ if (value == null) {
92
+ return null;
93
+ }
94
+ const normalized = String(value).trim().toLowerCase();
95
+ if (!normalized) {
96
+ return null;
97
+ }
98
+ return normalized;
99
+ }
100
+
86
101
  function assertKernelRef(kernelRef, label) {
87
102
  if (!kernelRef) return;
88
103
  if (typeof kernelRef.id !== 'string' || kernelRef.id.trim().length === 0) {
@@ -106,10 +121,7 @@ function stepHasLayer(step, layerIdx) {
106
121
  return step.layers.includes(layerIdx);
107
122
  }
108
123
 
109
- function buildKernelProfileKey(kernelRef) {
110
- if (!kernelRef) return null;
111
- return `${kernelRef.id}|${kernelRef.version}|${kernelRef.digest}`;
112
- }
124
+ const buildKernelProfileKey = buildExecutionV0KernelProfileKey;
113
125
 
114
126
  function normalizeSlot(value, label) {
115
127
  if (typeof value !== 'string' || value.trim().length === 0) {
@@ -201,90 +213,10 @@ function hasDefinedPath(root, pathSegments) {
201
213
  return current !== undefined;
202
214
  }
203
215
 
204
- function indexKernelProfiles(sessionDefaults) {
205
- const byKey = new Map();
206
- const profiles = sessionDefaults?.compute?.kernelProfiles ?? [];
207
- for (const profile of profiles) {
208
- assertKernelRef(profile.kernelRef, 'sessionDefaults.compute.kernelProfiles[].kernelRef');
209
- byKey.set(buildKernelProfileKey(profile.kernelRef), profile);
210
- }
211
- return byKey;
212
- }
216
+ const indexKernelProfiles = indexExecutionV0KernelProfiles;
213
217
 
214
218
  function resolveProfile(profileIndex, step) {
215
- const key = buildKernelProfileKey(step.kernelRef);
216
- if (!key) return null;
217
- return profileIndex.get(key) ?? null;
218
- }
219
-
220
- function resolvePrecision(step, profile, sessionDefaults) {
221
- const defaults = sessionDefaults.compute.defaults;
222
- const precision = {
223
- inputDtype: step.precision?.inputDtype
224
- ?? profile?.precision?.inputDtype
225
- ?? null,
226
- mathDtype: step.precision?.mathDtype
227
- ?? profile?.precision?.mathDtype
228
- ?? defaults.mathDtype,
229
- accumDtype: step.precision?.accumDtype
230
- ?? profile?.precision?.accumDtype
231
- ?? defaults.accumDtype,
232
- outputDtype: step.precision?.outputDtype
233
- ?? profile?.precision?.outputDtype
234
- ?? defaults.outputDtype,
235
- };
236
- const sources = {
237
- inputDtype: step.precision?.inputDtype != null
238
- ? 'manifest'
239
- : profile?.precision?.inputDtype != null
240
- ? 'kernelProfile'
241
- : 'derived',
242
- mathDtype: step.precision?.mathDtype != null
243
- ? 'manifest'
244
- : profile?.precision?.mathDtype != null
245
- ? 'kernelProfile'
246
- : 'sessionDefault',
247
- accumDtype: step.precision?.accumDtype != null
248
- ? 'manifest'
249
- : profile?.precision?.accumDtype != null
250
- ? 'kernelProfile'
251
- : 'sessionDefault',
252
- outputDtype: step.precision?.outputDtype != null
253
- ? 'manifest'
254
- : profile?.precision?.outputDtype != null
255
- ? 'kernelProfile'
256
- : 'sessionDefault',
257
- };
258
- return { precision, sources };
259
- }
260
-
261
- function resolveKVIO(step, profile, sessionDefaults) {
262
- if (step.kvIO) {
263
- return {
264
- value: {
265
- readDtype: normalizeDtype(step.kvIO.readDtype, `${step.id}.kvIO.readDtype`),
266
- writeDtype: normalizeDtype(step.kvIO.writeDtype, `${step.id}.kvIO.writeDtype`),
267
- },
268
- source: 'manifest',
269
- };
270
- }
271
- if (profile?.kvIO) {
272
- return {
273
- value: {
274
- readDtype: normalizeDtype(profile.kvIO.readDtype, `${step.id}.profile.kvIO.readDtype`),
275
- writeDtype: normalizeDtype(profile.kvIO.writeDtype, `${step.id}.profile.kvIO.writeDtype`),
276
- },
277
- source: 'kernelProfile',
278
- };
279
- }
280
- const kvDtype = normalizeDtype(
281
- sessionDefaults?.kvcache?.kvDtype ?? sessionDefaults.compute.defaults.activationDtype,
282
- `${step.id}.sessionDefaults.kvcache.kvDtype`
283
- );
284
- return {
285
- value: { readDtype: kvDtype, writeDtype: kvDtype },
286
- source: 'sessionDefault',
287
- };
219
+ return resolveExecutionV0KernelProfile(profileIndex, step);
288
220
  }
289
221
 
290
222
  function validateStepShape(step, index) {
@@ -704,12 +636,20 @@ function normalizeRuntimeSessionForExecutionV0(runtimeSession, manifestInference
704
636
  const manifestProfiles = manifestSessionDefaults?.compute?.kernelProfiles;
705
637
  const hasManifestProfiles = Array.isArray(manifestProfiles) && manifestProfiles.length > 0;
706
638
  const manifestComputeDefaults = manifestSessionDefaults?.compute?.defaults ?? null;
639
+ const hasManifestKVCache = manifestSessionDefaults?.kvcache != null;
640
+ const hasManifestDecodeLoop = manifestSessionDefaults?.decodeLoop != null;
707
641
 
708
- if (!runtimeSession || !runtimeSession.compute) {
642
+ if (!runtimeSession || typeof runtimeSession !== 'object') {
709
643
  return runtimeSession;
710
644
  }
711
645
 
712
- let compute = runtimeSession.compute;
646
+ let compute = runtimeSession.compute ?? null;
647
+ let kvcache = Object.prototype.hasOwnProperty.call(runtimeSession, 'kvcache')
648
+ ? runtimeSession.kvcache
649
+ : undefined;
650
+ let decodeLoop = Object.prototype.hasOwnProperty.call(runtimeSession, 'decodeLoop')
651
+ ? runtimeSession.decodeLoop
652
+ : undefined;
713
653
  let changed = false;
714
654
 
715
655
  // Strip preset compute dtype defaults when manifest provides model-specific values.
@@ -736,19 +676,39 @@ function normalizeRuntimeSessionForExecutionV0(runtimeSession, manifestInference
736
676
  }
737
677
  }
738
678
 
679
+ // Strip preset nulls so manifest session defaults can win.
680
+ if (kvcache === null && hasManifestKVCache) {
681
+ kvcache = undefined;
682
+ changed = true;
683
+ }
684
+
685
+ if (decodeLoop === null && hasManifestDecodeLoop) {
686
+ decodeLoop = undefined;
687
+ changed = true;
688
+ }
689
+
739
690
  if (!changed) {
740
691
  return runtimeSession;
741
692
  }
742
693
 
694
+ const nextRuntimeSession = { ...runtimeSession };
743
695
  if (!compute) {
744
- const { compute: _removed, ...rest } = runtimeSession;
745
- return Object.keys(rest).length === 0 ? {} : rest;
696
+ delete nextRuntimeSession.compute;
697
+ } else {
698
+ nextRuntimeSession.compute = compute;
699
+ }
700
+ if (kvcache === undefined) {
701
+ delete nextRuntimeSession.kvcache;
702
+ } else {
703
+ nextRuntimeSession.kvcache = kvcache;
704
+ }
705
+ if (decodeLoop === undefined) {
706
+ delete nextRuntimeSession.decodeLoop;
707
+ } else {
708
+ nextRuntimeSession.decodeLoop = decodeLoop;
746
709
  }
747
710
 
748
- return {
749
- ...runtimeSession,
750
- compute,
751
- };
711
+ return Object.keys(nextRuntimeSession).length === 0 ? {} : nextRuntimeSession;
752
712
  }
753
713
 
754
714
  function validatePhaseBoundaryCompatibility(options) {
@@ -790,6 +750,25 @@ function validatePhaseBoundaryCompatibility(options) {
790
750
  }
791
751
  }
792
752
 
753
+ function assertKVLayoutExecutionCompatibility(steps, sessionDefaults) {
754
+ const kvLayout = normalizeKVLayout(sessionDefaults?.kvcache?.layout, 'sessionDefaults.kvcache.layout');
755
+ if (kvLayout !== 'bdpa') {
756
+ return;
757
+ }
758
+ const incompatibleStep = steps.find((step) => (
759
+ step?.op === 'attention'
760
+ && isPhaseMatch(normalizePhase(step.phase, `${step.id}.phase`), 'prefill')
761
+ ));
762
+ if (!incompatibleStep) {
763
+ return;
764
+ }
765
+ throw new Error(
766
+ `[ExecutionV0] sessionDefaults.kvcache.layout="bdpa" is decode-only, ` +
767
+ `but step "${incompatibleStep.id}" declares prefill attention. ` +
768
+ 'Use a non-BDPA KV layout for prefill-capable models or remove prefill attention from the execution contract.'
769
+ );
770
+ }
771
+
793
772
  function toKernelPathStep(step) {
794
773
  if (step.op === 'cast') return null;
795
774
  if (!step.kernel) return null;
@@ -818,6 +797,65 @@ function buildLayerPhaseSteps(steps, phase, layerIdx) {
818
797
  .filter((step) => step != null);
819
798
  }
820
799
 
800
+ function getInlineKernelPathSteps(path) {
801
+ return [
802
+ ...(path?.preLayer ?? []),
803
+ ...(path?.decode?.steps ?? []),
804
+ ...(path?.prefill?.steps ?? []),
805
+ ...(path?.postLayer ?? []),
806
+ ...(path?.sampling ?? []),
807
+ ...(path?.layerOverrides?.flatMap((override) => override.steps ?? []) ?? []),
808
+ ];
809
+ }
810
+
811
+ function assertInlineKernelPathSessionCompatibility(path, sessionDefaults) {
812
+ if (!path) {
813
+ return;
814
+ }
815
+ const activationDtype = normalizeDtype(
816
+ path.activationDtype ?? sessionDefaults?.compute?.defaults?.activationDtype ?? 'f16',
817
+ 'inlineKernelPath.activationDtype'
818
+ );
819
+ const kvDtype = normalizeDtype(
820
+ path.kvDtype ?? sessionDefaults?.kvcache?.kvDtype ?? activationDtype,
821
+ 'inlineKernelPath.kvDtype'
822
+ );
823
+
824
+ for (const step of getInlineKernelPathSteps(path)) {
825
+ const kernel = String(step?.kernel ?? '').trim();
826
+ if (!kernel.startsWith('attention')) {
827
+ continue;
828
+ }
829
+ if (kernel.includes('_f16kv')) {
830
+ if (activationDtype !== 'f32' || kvDtype !== 'f16') {
831
+ throw new Error(
832
+ `[ExecutionV0] Inline kernelPath attention kernel "${kernel}" requires ` +
833
+ `activationDtype="f32" and kvcache.kvDtype="f16", but resolved ` +
834
+ `activationDtype="${activationDtype}" and kvcache.kvDtype="${kvDtype}".`
835
+ );
836
+ }
837
+ continue;
838
+ }
839
+ if (kernel.includes('_f16')) {
840
+ if (activationDtype !== 'f16' || kvDtype !== 'f16') {
841
+ throw new Error(
842
+ `[ExecutionV0] Inline kernelPath attention kernel "${kernel}" requires ` +
843
+ `activationDtype="f16" and kvcache.kvDtype="f16", but resolved ` +
844
+ `activationDtype="${activationDtype}" and kvcache.kvDtype="${kvDtype}".`
845
+ );
846
+ }
847
+ continue;
848
+ }
849
+ if (activationDtype !== 'f32' || kvDtype !== 'f32') {
850
+ throw new Error(
851
+ `[ExecutionV0] Inline kernelPath attention kernel "${kernel}" requires ` +
852
+ `activationDtype="f32" and kvcache.kvDtype="f32", but resolved ` +
853
+ `activationDtype="${activationDtype}" and kvcache.kvDtype="${kvDtype}".`
854
+ );
855
+ }
856
+ }
857
+ }
858
+
821
859
  function buildInlineKernelPath(steps, sessionDefaults, modelId, numLayers) {
822
860
  const activationDtype = normalizeDtype(
823
861
  sessionDefaults?.compute?.defaults?.activationDtype ?? 'f16',
@@ -884,6 +922,7 @@ function buildInlineKernelPath(steps, sessionDefaults, modelId, numLayers) {
884
922
  path.sampling = sampling;
885
923
  }
886
924
 
925
+ assertInlineKernelPathSessionCompatibility(path, sessionDefaults);
887
926
  return path;
888
927
  }
889
928
 
@@ -1003,6 +1042,7 @@ export function compileExecutionV0(options = {}) {
1003
1042
  const patchedSteps = applyExecutionPatchAtomic(baseSteps, runtimeInference.executionPatch ?? null);
1004
1043
  patchedSteps.forEach(validateStepShape);
1005
1044
  validateUniqueStepIds(patchedSteps);
1045
+ assertKVLayoutExecutionCompatibility(patchedSteps, resolvedSession);
1006
1046
  const runtimePatchMeta = indexRuntimePatchMeta(runtimeInference.executionPatch ?? null);
1007
1047
 
1008
1048
  const manifestSessionDefaults = manifestInference.sessionDefaults ?? {};
@@ -49,6 +49,35 @@ function isRDRRManifest(manifest) {
49
49
  return manifest !== null && typeof manifest === 'object' && Array.isArray( (manifest).shards);
50
50
  }
51
51
 
52
+ function normalizeBaseUrl(baseUrl) {
53
+ if (typeof baseUrl !== 'string' || baseUrl.trim().length === 0) {
54
+ return null;
55
+ }
56
+ return baseUrl.replace(/\/$/, '');
57
+ }
58
+
59
+ function createRemoteStorageContext(baseUrl, manifest) {
60
+ const root = normalizeBaseUrl(baseUrl);
61
+ if (!root || !isRDRRManifest(manifest)) {
62
+ return null;
63
+ }
64
+
65
+ return {
66
+ async loadShard(index) {
67
+ const shard = manifest.shards[index];
68
+ const filename = shard?.filename;
69
+ if (!filename) {
70
+ throw new Error(`Manifest shard ${index} is missing filename.`);
71
+ }
72
+ const response = await fetch(`${root}/${filename.replace(/^\/+/, '')}`);
73
+ if (!response.ok) {
74
+ throw new Error(`Failed to fetch shard ${index} from ${root}: ${response.status}`);
75
+ }
76
+ return new Uint8Array(await response.arrayBuffer());
77
+ },
78
+ };
79
+ }
80
+
52
81
 
53
82
  function resolveQ4KConfig(
54
83
  manifest,
@@ -505,10 +534,12 @@ export async function initTokenizer(manifest, options = {}) {
505
534
 
506
535
 
507
536
  export async function loadWeights(manifest, modelConfig, options = {}) {
508
- const { storageContext, onProgress, loadingConfig, baseUrl } = options;
537
+ const { onProgress, loadingConfig, baseUrl } = options;
538
+ const runtimeStorageContext = options.storageContext
539
+ ?? createRemoteStorageContext(baseUrl, manifest);
509
540
  const verifyHashes = (
510
- typeof storageContext?.verifyHashes === 'boolean'
511
- ? storageContext.verifyHashes
541
+ typeof runtimeStorageContext?.verifyHashes === 'boolean'
542
+ ? runtimeStorageContext.verifyHashes
512
543
  : options.verifyHashes
513
544
  ) ?? loadingConfig?.shardCache?.verifyHashes;
514
545
  if (verifyHashes == null) {
@@ -536,31 +567,31 @@ export async function loadWeights(manifest, modelConfig, options = {}) {
536
567
  }
537
568
 
538
569
  // Configure custom shard loader if provided (Native Bridge or direct-source bundle)
539
- const hasLoadShard = typeof storageContext?.loadShard === 'function';
540
- const hasLoadShardRange = typeof storageContext?.loadShardRange === 'function';
541
- const hasStreamShardRange = typeof storageContext?.streamShardRange === 'function';
570
+ const hasLoadShard = typeof runtimeStorageContext?.loadShard === 'function';
571
+ const hasLoadShardRange = typeof runtimeStorageContext?.loadShardRange === 'function';
572
+ const hasStreamShardRange = typeof runtimeStorageContext?.streamShardRange === 'function';
542
573
  if (hasLoadShard || hasLoadShardRange) {
543
574
  log.debug('Pipeline', 'Using custom shard loader (Native Bridge or external)');
544
575
 
545
576
  const loadShard = async (index) => {
546
577
  if (hasLoadShard) {
547
- const data = await storageContext.loadShard(index);
578
+ const data = await runtimeStorageContext.loadShard(index);
548
579
  return toUint8Array(data, 'storageContext.loadShard');
549
580
  }
550
- const rangeData = await storageContext.loadShardRange(index, 0, null);
581
+ const rangeData = await runtimeStorageContext.loadShardRange(index, 0, null);
551
582
  return toUint8Array(rangeData, 'storageContext.loadShardRange');
552
583
  };
553
584
 
554
585
  const loadShardRange = hasLoadShardRange
555
586
  ? async (index, offset, length = null) => {
556
- const data = await storageContext.loadShardRange(index, offset, length);
587
+ const data = await runtimeStorageContext.loadShardRange(index, offset, length);
557
588
  return toArrayBuffer(data, 'storageContext.loadShardRange');
558
589
  }
559
590
  : null;
560
591
 
561
592
  const streamShardRange = hasStreamShardRange
562
593
  ? async function* (index, offset = 0, length = null, streamOptions = {}) {
563
- for await (const chunk of storageContext.streamShardRange(index, offset, length, streamOptions)) {
594
+ for await (const chunk of runtimeStorageContext.streamShardRange(index, offset, length, streamOptions)) {
564
595
  yield toUint8Array(chunk, 'storageContext.streamShardRange');
565
596
  }
566
597
  }
@@ -1,5 +1,5 @@
1
1
 
2
- import { getDevice } from '../../gpu/device.js';
2
+ import { getDevice, initDevice } from '../../gpu/device.js';
3
3
  import { getBufferPool as getGlobalBufferPool } from '../../memory/buffer-pool.js';
4
4
  import { log } from '../../debug/index.js';
5
5
  import { configurePerfGuards } from '../../gpu/perf-guards.js';
@@ -85,6 +85,12 @@ export class InferencePipeline extends PipelineState {
85
85
  applyPipelineDebugConfig(sharedDebug?.pipeline);
86
86
  configurePerfGuards(sharedDebug?.perfGuards);
87
87
 
88
+ if (!this.gpuContext?.device && typeof globalThis.navigator !== 'undefined' && globalThis.navigator?.gpu) {
89
+ const device = await initDevice();
90
+ this.gpuContext = { device };
91
+ this.useGPU = true;
92
+ }
93
+
88
94
  this.emulation = await initEmulation(this.runtimeConfig);
89
95
 
90
96
  this.debug = sharedDebug?.pipeline?.enabled === true;
@@ -0,0 +1,17 @@
1
+ export interface InferenceExecutionRulesContractArtifact {
2
+ schemaVersion: 1;
3
+ source: 'doppler';
4
+ ok: boolean;
5
+ checks: Array<{ id: string; ok: boolean }>;
6
+ errors: string[];
7
+ stats: {
8
+ decodeRecorderRules: number;
9
+ batchDecodeRules: number;
10
+ decodeRecorderContexts: number;
11
+ batchDecodeContexts: number;
12
+ };
13
+ }
14
+
15
+ export declare function buildInferenceExecutionRulesContractArtifact(
16
+ ruleGroup: Record<string, unknown> | null | undefined
17
+ ): InferenceExecutionRulesContractArtifact;