@simulatte/doppler 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +126 -0
- package/README.md +16 -23
- package/package.json +14 -1
- package/src/adapters/adapter-registry.js +12 -1
- package/src/adapters/lora-loader.js +23 -6
- package/src/bridge/extension-client.d.ts +5 -0
- package/src/bridge/extension-client.js +40 -0
- package/src/bridge/index.d.ts +2 -1
- package/src/bridge/index.js +6 -4
- package/src/browser/browser-converter.js +26 -1
- package/src/browser/file-picker.js +6 -0
- package/src/browser/safetensors-parser-browser.js +84 -1
- package/src/browser/shard-io-browser.js +2 -2
- package/src/browser/tensor-source-download.js +8 -2
- package/src/browser/tensor-source-http.d.ts +1 -0
- package/src/browser/tensor-source-http.js +5 -1
- package/src/client/doppler-api.browser.js +20 -4
- package/src/client/doppler-api.js +19 -3
- package/src/client/doppler-provider/generation.js +12 -0
- package/src/client/doppler-provider/model-manager.d.ts +10 -0
- package/src/client/doppler-provider/model-manager.js +91 -19
- package/src/client/doppler-provider/source-runtime.d.ts +2 -1
- package/src/client/doppler-provider/source-runtime.js +132 -13
- package/src/client/doppler-registry.json +8 -7
- package/src/config/backward-registry-loader.js +17 -2
- package/src/config/execution-v0-contract-check.js +113 -15
- package/src/config/kernel-path-contract-check.js +57 -29
- package/src/config/kernel-path-loader.js +5 -36
- package/src/config/kernels/kernel-ref-digests.js +1 -1
- package/src/config/kernels/registry.js +14 -1
- package/src/config/kernels/registry.json +7 -5
- package/src/config/loader.d.ts +1 -1
- package/src/config/loader.js +12 -2
- package/src/config/merge-contract-check.js +59 -4
- package/src/config/merge-helpers.js +128 -7
- package/src/config/merge.d.ts +1 -0
- package/src/config/merge.js +10 -0
- package/src/config/param-validator.js +47 -2
- package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
- package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/registry.json +29 -8
- package/src/config/presets/models/gemma2.json +2 -2
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
- package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
- package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
- package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
- package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
- package/src/config/runtime.js +6 -1
- package/src/config/schema/debug.schema.d.ts +5 -0
- package/src/config/schema/doppler.schema.js +16 -21
- package/src/config/schema/inference-defaults.schema.js +3 -3
- package/src/config/schema/kernel-path.schema.d.ts +5 -1
- package/src/config/schema/kernel-thresholds.schema.js +12 -4
- package/src/config/schema/manifest.schema.d.ts +2 -1
- package/src/config/schema/manifest.schema.js +16 -3
- package/src/config/training-defaults.js +30 -22
- package/src/converter/conversion-plan.js +94 -9
- package/src/converter/core.d.ts +7 -0
- package/src/converter/core.js +14 -9
- package/src/converter/execution-v0-manifest.js +4 -1
- package/src/converter/index.d.ts +1 -0
- package/src/converter/index.js +1 -0
- package/src/converter/manifest-inference.js +43 -12
- package/src/converter/parsers/diffusion.js +0 -3
- package/src/converter/quantization-info.js +35 -15
- package/src/converter/shard-packer.d.ts +1 -1
- package/src/converter/shard-packer.js +4 -1
- package/src/debug/config.js +123 -11
- package/src/debug/signals.js +7 -1
- package/src/debug/tensor.d.ts +2 -0
- package/src/debug/tensor.js +13 -2
- package/src/distribution/p2p-control-plane.js +52 -12
- package/src/distribution/p2p-observability.js +43 -7
- package/src/distribution/p2p-webrtc-browser.js +20 -0
- package/src/distribution/shard-delivery.js +77 -26
- package/src/formats/gguf/types.js +33 -16
- package/src/formats/rdrr/groups.d.ts +12 -4
- package/src/formats/rdrr/groups.js +3 -6
- package/src/formats/rdrr/parsing.js +39 -2
- package/src/formats/rdrr/types.d.ts +2 -1
- package/src/gpu/command-recorder.js +86 -61
- package/src/gpu/device.d.ts +1 -0
- package/src/gpu/device.js +73 -19
- package/src/gpu/kernel-tuner/benchmarks.js +326 -316
- package/src/gpu/kernel-tuner/cache.js +71 -4
- package/src/gpu/kernel-tuner/tuner.js +22 -4
- package/src/gpu/kernels/attention.js +15 -34
- package/src/gpu/kernels/backward/adam.js +62 -58
- package/src/gpu/kernels/backward/attention_backward.js +257 -169
- package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
- package/src/gpu/kernels/cast.js +191 -149
- package/src/gpu/kernels/check-stop.js +33 -44
- package/src/gpu/kernels/conv2d.js +27 -17
- package/src/gpu/kernels/cross_entropy_loss.js +21 -15
- package/src/gpu/kernels/depthwise_conv2d.js +36 -26
- package/src/gpu/kernels/dequant.js +178 -126
- package/src/gpu/kernels/energy.d.ts +3 -21
- package/src/gpu/kernels/energy.js +111 -88
- package/src/gpu/kernels/feature-check.js +1 -1
- package/src/gpu/kernels/fused_ffn.js +84 -65
- package/src/gpu/kernels/fused_matmul_residual.js +56 -33
- package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
- package/src/gpu/kernels/gather.js +33 -15
- package/src/gpu/kernels/gelu.js +19 -11
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +33 -23
- package/src/gpu/kernels/groupnorm.js +34 -23
- package/src/gpu/kernels/kv-quantize.js +5 -2
- package/src/gpu/kernels/layernorm.js +35 -19
- package/src/gpu/kernels/logit-merge.js +5 -3
- package/src/gpu/kernels/matmul.js +58 -39
- package/src/gpu/kernels/modulate.js +23 -15
- package/src/gpu/kernels/moe.js +221 -175
- package/src/gpu/kernels/pixel_shuffle.js +22 -14
- package/src/gpu/kernels/relu.js +18 -10
- package/src/gpu/kernels/repeat_channels.js +25 -17
- package/src/gpu/kernels/residual.js +37 -27
- package/src/gpu/kernels/rmsnorm.js +57 -41
- package/src/gpu/kernels/rope.js +3 -0
- package/src/gpu/kernels/sample.js +27 -38
- package/src/gpu/kernels/sana_linear_attention.js +18 -10
- package/src/gpu/kernels/scale.js +18 -11
- package/src/gpu/kernels/shader-cache.js +4 -2
- package/src/gpu/kernels/silu.js +120 -72
- package/src/gpu/kernels/softmax.js +44 -25
- package/src/gpu/kernels/split_qkv.js +23 -13
- package/src/gpu/kernels/transpose.js +18 -10
- package/src/gpu/kernels/transpose.wgsl +5 -3
- package/src/gpu/kernels/upsample2d.js +21 -13
- package/src/gpu/kernels/utils.js +20 -13
- package/src/gpu/partitioned-buffer-pool.js +10 -2
- package/src/gpu/perf-guards.js +2 -9
- package/src/gpu/profiler.js +27 -22
- package/src/gpu/readback-utils.d.ts +16 -0
- package/src/gpu/readback-utils.js +41 -0
- package/src/gpu/submit-tracker.js +13 -0
- package/src/gpu/uniform-cache.d.ts +1 -0
- package/src/gpu/uniform-cache.js +30 -9
- package/src/hotswap/intent-bundle.js +6 -0
- package/src/hotswap/manifest.d.ts +10 -1
- package/src/hotswap/manifest.js +12 -2
- package/src/hotswap/runtime.js +30 -8
- package/src/index-browser.d.ts +44 -0
- package/src/index-browser.js +14 -0
- package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
- package/src/inference/browser-harness-contract-helpers.js +28 -0
- package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
- package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
- package/src/inference/browser-harness-model-helpers.d.ts +16 -0
- package/src/inference/browser-harness-model-helpers.js +217 -0
- package/src/inference/browser-harness-report-helpers.d.ts +7 -0
- package/src/inference/browser-harness-report-helpers.js +42 -0
- package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
- package/src/inference/browser-harness-runtime-helpers.js +415 -0
- package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
- package/src/inference/browser-harness-suite-helpers.js +268 -0
- package/src/inference/browser-harness-text-helpers.d.ts +27 -0
- package/src/inference/browser-harness-text-helpers.js +788 -0
- package/src/inference/browser-harness.d.ts +6 -0
- package/src/inference/browser-harness.js +130 -1996
- package/src/inference/kv-cache/base.js +140 -94
- package/src/inference/kv-cache/tiered.js +5 -3
- package/src/inference/moe-router.js +88 -56
- package/src/inference/multi-model-network.js +5 -3
- package/src/inference/network-evolution.d.ts +11 -2
- package/src/inference/network-evolution.js +20 -21
- package/src/inference/pipelines/context.d.ts +3 -0
- package/src/inference/pipelines/context.js +142 -2
- package/src/inference/pipelines/diffusion/helpers.js +7 -2
- package/src/inference/pipelines/diffusion/pipeline.js +2 -1
- package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
- package/src/inference/pipelines/diffusion/vae.js +3 -7
- package/src/inference/pipelines/energy/pipeline.js +27 -21
- package/src/inference/pipelines/energy/quintel.d.ts +5 -0
- package/src/inference/pipelines/energy/quintel.js +11 -0
- package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
- package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
- package/src/inference/pipelines/text/attention/projections.js +151 -101
- package/src/inference/pipelines/text/attention/record.js +62 -8
- package/src/inference/pipelines/text/attention/run.js +62 -8
- package/src/inference/pipelines/text/config.js +3 -4
- package/src/inference/pipelines/text/embed.js +2 -8
- package/src/inference/pipelines/text/execution-plan.js +41 -19
- package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
- package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
- package/src/inference/pipelines/text/execution-v0.js +62 -1013
- package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
- package/src/inference/pipelines/text/generator-steps.js +298 -207
- package/src/inference/pipelines/text/generator.js +6 -23
- package/src/inference/pipelines/text/init.js +78 -20
- package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
- package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
- package/src/inference/pipelines/text/kernel-trace.js +6 -0
- package/src/inference/pipelines/text/layer.js +3 -9
- package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
- package/src/inference/pipelines/text/linear-attention.js +80 -6
- package/src/inference/pipelines/text/logits/gpu.js +10 -5
- package/src/inference/pipelines/text/logits/index.js +10 -11
- package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
- package/src/inference/pipelines/text/logits/utils.js +9 -0
- package/src/inference/pipelines/text/lora-apply.js +50 -32
- package/src/inference/pipelines/text/model-load.js +279 -104
- package/src/inference/pipelines/text/moe-cache.js +5 -4
- package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
- package/src/inference/pipelines/text/moe-cpu.js +42 -38
- package/src/inference/pipelines/text/moe-gpu.js +110 -86
- package/src/inference/pipelines/text/ops.js +90 -90
- package/src/inference/pipelines/text/probes.js +9 -9
- package/src/inference/pipelines/text/weights.js +17 -7
- package/src/inference/pipelines/text.js +13 -1
- package/src/inference/speculative.d.ts +2 -2
- package/src/inference/speculative.js +4 -18
- package/src/inference/test-harness.d.ts +1 -1
- package/src/inference/test-harness.js +15 -5
- package/src/inference/tokenizer.d.ts +0 -5
- package/src/inference/tokenizer.js +4 -23
- package/src/inference/tokenizers/bpe.js +9 -0
- package/src/inference/tokenizers/bundled.js +20 -0
- package/src/inference/tokenizers/sentencepiece.js +12 -0
- package/src/loader/doppler-loader.js +38 -22
- package/src/loader/dtype-utils.js +3 -44
- package/src/loader/embedding-loader.js +7 -3
- package/src/loader/experts/expert-cache.js +13 -6
- package/src/loader/experts/expert-loader.js +10 -6
- package/src/loader/final-weights-loader.js +8 -4
- package/src/loader/layer-loader.js +2 -1
- package/src/loader/loader-state.js +2 -2
- package/src/loader/memory-monitor.js +8 -0
- package/src/loader/multi-model-loader.d.ts +14 -0
- package/src/loader/multi-model-loader.js +70 -24
- package/src/loader/shard-cache.js +81 -12
- package/src/loader/shard-resolver.js +25 -3
- package/src/loader/tensors/tensor-loader.js +209 -144
- package/src/loader/tensors/tensor-reader.js +76 -19
- package/src/loader/weight-downcast.js +1 -1
- package/src/memory/buffer-pool.d.ts +9 -1
- package/src/memory/buffer-pool.js +109 -44
- package/src/memory/unified-detect.js +1 -1
- package/src/rules/inference/kernel-path.rules.json +24 -8
- package/src/rules/rule-registry.js +25 -1
- package/src/storage/backends/opfs-store.js +68 -24
- package/src/storage/downloader.js +364 -83
- package/src/storage/index.d.ts +3 -0
- package/src/storage/index.js +3 -0
- package/src/storage/preflight.d.ts +2 -2
- package/src/storage/preflight.js +24 -2
- package/src/storage/quickstart-downloader.js +11 -5
- package/src/storage/registry.js +10 -4
- package/src/storage/reports.js +1 -1
- package/src/storage/shard-manager.d.ts +15 -1
- package/src/storage/shard-manager.js +51 -3
- package/src/storage/source-artifact-store.d.ts +52 -0
- package/src/storage/source-artifact-store.js +234 -0
- package/src/tooling/command-api-constants.d.ts +9 -0
- package/src/tooling/command-api-constants.js +9 -0
- package/src/tooling/command-api-family-normalizers.d.ts +9 -0
- package/src/tooling/command-api-family-normalizers.js +343 -0
- package/src/tooling/command-api-helpers.d.ts +25 -0
- package/src/tooling/command-api-helpers.js +262 -0
- package/src/tooling/command-api.js +16 -602
- package/src/tooling/command-envelope.js +4 -1
- package/src/tooling/command-runner-shared.js +52 -18
- package/src/tooling/lean-execution-contract.js +150 -3
- package/src/tooling/node-browser-command-runner.js +161 -271
- package/src/tooling/node-command-runner.js +29 -3
- package/src/tooling/node-converter.js +27 -1
- package/src/tooling/node-source-runtime.d.ts +1 -1
- package/src/tooling/node-source-runtime.js +84 -3
- package/src/tooling/node-webgpu.js +24 -21
- package/src/tooling/opfs-cache.js +21 -4
- package/src/tooling/runtime-input-composition.d.ts +38 -0
- package/src/tooling/runtime-input-composition.js +86 -0
- package/src/tooling/source-runtime-bundle.d.ts +40 -5
- package/src/tooling/source-runtime-bundle.js +261 -34
- package/src/tooling/source-runtime-materializer.d.ts +6 -0
- package/src/tooling/source-runtime-materializer.js +93 -0
- package/src/training/attention-backward.js +32 -17
- package/src/training/autograd.js +80 -52
- package/src/training/checkpoint-watch.d.ts +2 -1
- package/src/training/checkpoint-watch.js +39 -6
- package/src/training/checkpoint.js +40 -11
- package/src/training/clip.js +2 -1
- package/src/training/datasets/token-batch.js +20 -8
- package/src/training/distillation/checkpoint-watch.js +1 -0
- package/src/training/distillation/student-fixture.d.ts +22 -0
- package/src/training/distillation/student-fixture.js +846 -0
- package/src/training/distillation/suite-data.d.ts +45 -0
- package/src/training/distillation/suite-data.js +189 -0
- package/src/training/lora-pipeline.js +4 -7
- package/src/training/lora.js +26 -12
- package/src/training/loss.js +5 -6
- package/src/training/objectives/cross_entropy.js +2 -5
- package/src/training/objectives/distill_kd.js +4 -8
- package/src/training/objectives/distill_triplet.js +4 -8
- package/src/training/objectives/ul_stage2_base.js +4 -8
- package/src/training/operator-command.js +2 -0
- package/src/training/optimizer.js +19 -7
- package/src/training/runner.js +2 -1
- package/src/training/suite.js +18 -978
- package/src/training/tensor-factory.d.ts +9 -0
- package/src/training/tensor-factory.js +13 -0
- package/src/training/trainer.js +3 -5
- package/src/training/ul_dataset.js +3 -5
- package/src/training/workloads.js +70 -79
- package/src/version.js +1 -1
- package/tools/convert-safetensors-node.js +22 -16
- package/tools/doppler-cli.js +44 -25
|
@@ -3,6 +3,7 @@ import path from 'node:path';
|
|
|
3
3
|
import {
|
|
4
4
|
HEADER_READ_SIZE,
|
|
5
5
|
createConverterConfig,
|
|
6
|
+
DEFAULT_EXECUTION_V0_SESSION_DEFAULTS,
|
|
6
7
|
} from '../config/schema/index.js';
|
|
7
8
|
import { extractArchitecture } from '../converter/core.js';
|
|
8
9
|
import {
|
|
@@ -15,6 +16,7 @@ import { parseTransformerModel } from '../converter/parsers/transformer.js';
|
|
|
15
16
|
import { parseGGUFHeader } from '../formats/gguf/types.js';
|
|
16
17
|
import { parseSafetensorsHeader } from '../formats/safetensors/types.js';
|
|
17
18
|
import { log } from '../debug/index.js';
|
|
19
|
+
import { computeHash } from '../storage/shard-manager.js';
|
|
18
20
|
import {
|
|
19
21
|
buildSourceRuntimeBundle,
|
|
20
22
|
createSourceStorageContext,
|
|
@@ -33,6 +35,13 @@ const SOURCE_RUNTIME_EXECUTION_OVERRIDE = {
|
|
|
33
35
|
steps: [],
|
|
34
36
|
};
|
|
35
37
|
|
|
38
|
+
function cloneExecutionV0SessionDefaults() {
|
|
39
|
+
if (typeof structuredClone === 'function') {
|
|
40
|
+
return structuredClone(DEFAULT_EXECUTION_V0_SESSION_DEFAULTS);
|
|
41
|
+
}
|
|
42
|
+
return JSON.parse(JSON.stringify(DEFAULT_EXECUTION_V0_SESSION_DEFAULTS));
|
|
43
|
+
}
|
|
44
|
+
|
|
36
45
|
function toArrayBuffer(value, label) {
|
|
37
46
|
if (value instanceof ArrayBuffer) {
|
|
38
47
|
return value;
|
|
@@ -105,6 +114,16 @@ async function readJson(filePath, label) {
|
|
|
105
114
|
}
|
|
106
115
|
}
|
|
107
116
|
|
|
117
|
+
async function readFileBytes(filePath, label) {
|
|
118
|
+
try {
|
|
119
|
+
const bytes = await fs.readFile(filePath);
|
|
120
|
+
return bytes.buffer.slice(bytes.byteOffset, bytes.byteOffset + bytes.byteLength);
|
|
121
|
+
} catch (error) {
|
|
122
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
123
|
+
throw new Error(`Failed to read ${label} "${filePath}": ${message}`);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
108
127
|
async function readRange(filePath, offset, length) {
|
|
109
128
|
if (!Number.isFinite(offset) || !Number.isFinite(length) || length <= 0) {
|
|
110
129
|
return new ArrayBuffer(0);
|
|
@@ -206,6 +225,37 @@ async function parseSafetensorsInput(inputDir) {
|
|
|
206
225
|
const stats = await getPathStats(sourcePath, `source shard (${sourcePath})`);
|
|
207
226
|
sourceFiles.push({ path: sourcePath, size: Number(stats.size) });
|
|
208
227
|
}
|
|
228
|
+
const auxiliaryFiles = [
|
|
229
|
+
{ path: configPath, size: Number((await getPathStats(configPath, 'config.json')).size), kind: 'config' },
|
|
230
|
+
...(hasIndex
|
|
231
|
+
? [{
|
|
232
|
+
path: path.join(inputDir, 'model.safetensors.index.json'),
|
|
233
|
+
size: Number((await getPathStats(path.join(inputDir, 'model.safetensors.index.json'), 'model.safetensors.index.json')).size),
|
|
234
|
+
kind: 'safetensors_index',
|
|
235
|
+
}]
|
|
236
|
+
: []),
|
|
237
|
+
...(tokenizerJson
|
|
238
|
+
? [{
|
|
239
|
+
path: tokenizerJsonPath,
|
|
240
|
+
size: Number((await getPathStats(tokenizerJsonPath, 'tokenizer.json')).size),
|
|
241
|
+
kind: 'tokenizer_json',
|
|
242
|
+
}]
|
|
243
|
+
: []),
|
|
244
|
+
...(tokenizerConfig
|
|
245
|
+
? [{
|
|
246
|
+
path: tokenizerConfigPath,
|
|
247
|
+
size: Number((await getPathStats(tokenizerConfigPath, 'tokenizer_config.json')).size),
|
|
248
|
+
kind: 'tokenizer_config',
|
|
249
|
+
}]
|
|
250
|
+
: []),
|
|
251
|
+
...(hasTokenizerModel
|
|
252
|
+
? [{
|
|
253
|
+
path: tokenizerModelPath,
|
|
254
|
+
size: Number((await getPathStats(tokenizerModelPath, 'tokenizer.model')).size),
|
|
255
|
+
kind: 'tokenizer_model',
|
|
256
|
+
}]
|
|
257
|
+
: []),
|
|
258
|
+
];
|
|
209
259
|
|
|
210
260
|
return {
|
|
211
261
|
sourceKind: 'safetensors',
|
|
@@ -220,8 +270,10 @@ async function parseSafetensorsInput(inputDir) {
|
|
|
220
270
|
tokenizerConfig,
|
|
221
271
|
tokenizerModelName: hasTokenizerModel ? 'tokenizer.model' : null,
|
|
222
272
|
tokenizerJsonPath: tokenizerJsonPath,
|
|
273
|
+
tokenizerConfigPath: tokenizerConfigPath,
|
|
223
274
|
tokenizerModelPath: hasTokenizerModel ? tokenizerModelPath : null,
|
|
224
275
|
sourceFiles,
|
|
276
|
+
auxiliaryFiles,
|
|
225
277
|
};
|
|
226
278
|
}
|
|
227
279
|
|
|
@@ -283,8 +335,10 @@ async function parseGgufInput(ggufPath) {
|
|
|
283
335
|
tokenizerConfig: null,
|
|
284
336
|
tokenizerModelName: null,
|
|
285
337
|
tokenizerJsonPath: null,
|
|
338
|
+
tokenizerConfigPath: null,
|
|
286
339
|
tokenizerModelPath: null,
|
|
287
340
|
sourceFiles: [{ path: ggufPath, size: fileSize }],
|
|
341
|
+
auxiliaryFiles: [],
|
|
288
342
|
};
|
|
289
343
|
}
|
|
290
344
|
|
|
@@ -357,11 +411,29 @@ function buildNodeFileReaders() {
|
|
|
357
411
|
};
|
|
358
412
|
}
|
|
359
413
|
|
|
414
|
+
async function addHashesToFileEntries(entries, hashAlgorithm) {
|
|
415
|
+
const normalized = [];
|
|
416
|
+
for (const entry of Array.isArray(entries) ? entries : []) {
|
|
417
|
+
const filePath = normalizePath(entry?.path);
|
|
418
|
+
if (!filePath) continue;
|
|
419
|
+
const bytes = await readFileBytes(filePath, `source asset (${filePath})`);
|
|
420
|
+
normalized.push({
|
|
421
|
+
...entry,
|
|
422
|
+
path: filePath,
|
|
423
|
+
size: Number.isFinite(entry?.size) ? Math.max(0, Math.floor(Number(entry.size))) : bytes.byteLength,
|
|
424
|
+
hash: await computeHash(new Uint8Array(bytes), hashAlgorithm),
|
|
425
|
+
hashAlgorithm,
|
|
426
|
+
});
|
|
427
|
+
}
|
|
428
|
+
return normalized;
|
|
429
|
+
}
|
|
430
|
+
|
|
360
431
|
export async function resolveNodeSourceRuntimeBundle(options = {}) {
|
|
361
432
|
const inputPath = normalizePath(options.inputPath);
|
|
362
433
|
if (!inputPath) {
|
|
363
434
|
throw new Error('node source runtime: inputPath is required.');
|
|
364
435
|
}
|
|
436
|
+
const verifyHashes = options.verifyHashes === true;
|
|
365
437
|
const resolvedInputPath = path.resolve(inputPath);
|
|
366
438
|
const stats = await getPathStats(resolvedInputPath, 'inputPath');
|
|
367
439
|
|
|
@@ -405,6 +477,7 @@ export async function resolveNodeSourceRuntimeBundle(options = {}) {
|
|
|
405
477
|
modelBaseId: options.modelId || null,
|
|
406
478
|
},
|
|
407
479
|
inference: {
|
|
480
|
+
sessionDefaults: cloneExecutionV0SessionDefaults(),
|
|
408
481
|
execution: SOURCE_RUNTIME_EXECUTION_OVERRIDE,
|
|
409
482
|
},
|
|
410
483
|
});
|
|
@@ -425,22 +498,30 @@ export async function resolveNodeSourceRuntimeBundle(options = {}) {
|
|
|
425
498
|
parsed.sourceKind,
|
|
426
499
|
parsed.sourcePathForModelId
|
|
427
500
|
);
|
|
501
|
+
const hashAlgorithm = converterConfig.manifest.hashAlgorithm;
|
|
502
|
+
const sourceFiles = await addHashesToFileEntries(parsed.sourceFiles, hashAlgorithm);
|
|
503
|
+
const auxiliaryFiles = await addHashesToFileEntries(parsed.auxiliaryFiles, hashAlgorithm);
|
|
428
504
|
const { manifest, shardSources } = await buildSourceRuntimeBundle({
|
|
429
505
|
modelId,
|
|
430
506
|
modelName: modelId,
|
|
431
507
|
modelType: plan.modelType,
|
|
508
|
+
sourceKind: parsed.sourceKind,
|
|
432
509
|
architecture: parsed.architecture,
|
|
433
510
|
architectureHint: parsed.architectureHint,
|
|
434
511
|
rawConfig: parsed.config,
|
|
435
512
|
inference: plan.manifestInference,
|
|
436
513
|
tensors: parsed.tensors,
|
|
437
|
-
sourceFiles
|
|
514
|
+
sourceFiles,
|
|
515
|
+
auxiliaryFiles,
|
|
438
516
|
sourceQuantization: parsed.sourceQuantization,
|
|
439
517
|
quantizationInfo: plan.quantizationInfo,
|
|
440
|
-
hashAlgorithm
|
|
518
|
+
hashAlgorithm,
|
|
441
519
|
tokenizerJson: parsed.tokenizerJson,
|
|
442
520
|
tokenizerConfig: parsed.tokenizerConfig,
|
|
443
521
|
tokenizerModelName: parsed.tokenizerModelName,
|
|
522
|
+
tokenizerJsonPath: parsed.tokenizerJsonPath,
|
|
523
|
+
tokenizerConfigPath: parsed.tokenizerConfigPath,
|
|
524
|
+
tokenizerModelPath: parsed.tokenizerModelPath,
|
|
444
525
|
});
|
|
445
526
|
|
|
446
527
|
const readers = buildNodeFileReaders();
|
|
@@ -452,7 +533,7 @@ export async function resolveNodeSourceRuntimeBundle(options = {}) {
|
|
|
452
533
|
readBinary: readers.readBinary,
|
|
453
534
|
tokenizerJsonPath: parsed.tokenizerJsonPath,
|
|
454
535
|
tokenizerModelPath: parsed.tokenizerModelPath,
|
|
455
|
-
verifyHashes
|
|
536
|
+
verifyHashes,
|
|
456
537
|
});
|
|
457
538
|
|
|
458
539
|
log.info(
|
|
@@ -3,9 +3,12 @@ import { dirname, isAbsolute, resolve } from 'node:path';
|
|
|
3
3
|
import { fileURLToPath, pathToFileURL } from 'node:url';
|
|
4
4
|
|
|
5
5
|
const DEFAULT_DOE_PROVIDER_CREATE_ARGS = 'enable-dawn-features=allow_unsafe_apis';
|
|
6
|
+
const DOE_PROVIDER_CREATE_ARGS_ENV = 'FAWN_WEBGPU_CREATE_ARGS';
|
|
6
7
|
|
|
7
8
|
function hasNavigatorGpu() {
|
|
8
|
-
return typeof globalThis.navigator !== 'undefined'
|
|
9
|
+
return typeof globalThis.navigator !== 'undefined'
|
|
10
|
+
&& !!globalThis.navigator?.gpu
|
|
11
|
+
&& typeof globalThis.navigator.gpu.requestAdapter === 'function';
|
|
9
12
|
}
|
|
10
13
|
|
|
11
14
|
function hasGpuEnums() {
|
|
@@ -51,18 +54,12 @@ function resolveDefaultWebgpuModuleSpecifiers() {
|
|
|
51
54
|
return ['@simulatte/webgpu', 'webgpu'];
|
|
52
55
|
}
|
|
53
56
|
|
|
54
|
-
function
|
|
57
|
+
function resolveExplicitWebgpuModuleSpecifier() {
|
|
55
58
|
const fromEnv = process.env.DOPPLER_NODE_WEBGPU_MODULE;
|
|
56
59
|
if (typeof fromEnv === 'string' && fromEnv.trim().length > 0) {
|
|
57
|
-
return
|
|
58
|
-
explicit: true,
|
|
59
|
-
specifiers: [resolveCandidateModuleSpecifier(fromEnv.trim())],
|
|
60
|
-
};
|
|
60
|
+
return resolveCandidateModuleSpecifier(fromEnv.trim());
|
|
61
61
|
}
|
|
62
|
-
return
|
|
63
|
-
explicit: false,
|
|
64
|
-
specifiers: resolveDefaultWebgpuModuleSpecifiers(),
|
|
65
|
-
};
|
|
62
|
+
return null;
|
|
66
63
|
}
|
|
67
64
|
|
|
68
65
|
function isDoeWebgpuSpecifier(specifier) {
|
|
@@ -76,15 +73,15 @@ function isDoeWebgpuSpecifier(specifier) {
|
|
|
76
73
|
|
|
77
74
|
async function importWithProviderOverride(specifier) {
|
|
78
75
|
const shouldApplyCreateArgsDefault = isDoeWebgpuSpecifier(specifier)
|
|
79
|
-
&& !(typeof process.env
|
|
76
|
+
&& !(typeof process.env[DOE_PROVIDER_CREATE_ARGS_ENV] === 'string' && process.env[DOE_PROVIDER_CREATE_ARGS_ENV].trim().length > 0);
|
|
80
77
|
if (!shouldApplyCreateArgsDefault) {
|
|
81
78
|
return import(specifier);
|
|
82
79
|
}
|
|
83
|
-
process.env
|
|
80
|
+
process.env[DOE_PROVIDER_CREATE_ARGS_ENV] = DEFAULT_DOE_PROVIDER_CREATE_ARGS;
|
|
84
81
|
try {
|
|
85
82
|
return await import(specifier);
|
|
86
83
|
} finally {
|
|
87
|
-
delete process.env
|
|
84
|
+
delete process.env[DOE_PROVIDER_CREATE_ARGS_ENV];
|
|
88
85
|
}
|
|
89
86
|
}
|
|
90
87
|
|
|
@@ -237,27 +234,33 @@ function installWebgpuFromModule(mod) {
|
|
|
237
234
|
}
|
|
238
235
|
|
|
239
236
|
export async function bootstrapNodeWebGPU() {
|
|
237
|
+
const explicitSpecifier = resolveExplicitWebgpuModuleSpecifier();
|
|
238
|
+
if (explicitSpecifier) {
|
|
239
|
+
try {
|
|
240
|
+
const mod = await importWithProviderOverride(explicitSpecifier);
|
|
241
|
+
if (installWebgpuFromModule(mod)) {
|
|
242
|
+
return { ok: true, provider: explicitSpecifier };
|
|
243
|
+
}
|
|
244
|
+
return { ok: false, provider: explicitSpecifier };
|
|
245
|
+
} catch {
|
|
246
|
+
return { ok: false, provider: explicitSpecifier };
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
240
250
|
if (hasNavigatorGpu() && hasGpuEnums()) {
|
|
241
251
|
return { ok: true, provider: 'pre-installed' };
|
|
242
252
|
}
|
|
243
253
|
|
|
244
|
-
const
|
|
245
|
-
for (const specifier of specifiers) {
|
|
254
|
+
for (const specifier of resolveDefaultWebgpuModuleSpecifiers()) {
|
|
246
255
|
let mod;
|
|
247
256
|
try {
|
|
248
257
|
mod = await importWithProviderOverride(specifier);
|
|
249
258
|
} catch {
|
|
250
|
-
if (explicit) {
|
|
251
|
-
return { ok: false, provider: null };
|
|
252
|
-
}
|
|
253
259
|
continue;
|
|
254
260
|
}
|
|
255
261
|
if (installWebgpuFromModule(mod)) {
|
|
256
262
|
return { ok: true, provider: specifier };
|
|
257
263
|
}
|
|
258
|
-
if (explicit) {
|
|
259
|
-
return { ok: false, provider: null };
|
|
260
|
-
}
|
|
261
264
|
}
|
|
262
265
|
|
|
263
266
|
return { ok: false, provider: null };
|
|
@@ -8,6 +8,10 @@ import { downloadModel } from '../storage/downloader.js';
|
|
|
8
8
|
import { isOPFSAvailable } from '../storage/quota.js';
|
|
9
9
|
import { parseManifest, getManifestUrl } from '../formats/rdrr/index.js';
|
|
10
10
|
import { log } from '../debug/index.js';
|
|
11
|
+
import {
|
|
12
|
+
resolveSourceArtifact,
|
|
13
|
+
verifyStoredSourceArtifact,
|
|
14
|
+
} from '../storage/source-artifact-store.js';
|
|
11
15
|
|
|
12
16
|
const MODULE = 'OPFSCache';
|
|
13
17
|
|
|
@@ -43,6 +47,7 @@ function hasSameShardSet(aManifest, bManifest) {
|
|
|
43
47
|
}
|
|
44
48
|
|
|
45
49
|
function buildManifestFingerprint(manifest) {
|
|
50
|
+
const sourceArtifactFingerprint = resolveSourceArtifact(manifest)?.fingerprint ?? null;
|
|
46
51
|
const inference = manifest?.inference ?? {};
|
|
47
52
|
const layerPattern = inference?.layerPattern ?? {};
|
|
48
53
|
const quantizationInfo = manifest?.quantizationInfo ?? {};
|
|
@@ -75,6 +80,7 @@ function buildManifestFingerprint(manifest) {
|
|
|
75
80
|
},
|
|
76
81
|
},
|
|
77
82
|
shards,
|
|
83
|
+
sourceArtifactFingerprint,
|
|
78
84
|
});
|
|
79
85
|
}
|
|
80
86
|
|
|
@@ -119,16 +125,27 @@ export async function ensureModelCached(modelId, modelBaseUrl) {
|
|
|
119
125
|
if (!cachedManifestText || !cachedManifest) {
|
|
120
126
|
log.warn(MODULE, `Cache miss: "${modelId}" has no readable manifest in OPFS; re-importing`);
|
|
121
127
|
} else {
|
|
128
|
+
const cachedSourceArtifact = resolveSourceArtifact(cachedManifest);
|
|
129
|
+
const sourceIntegrity = cachedSourceArtifact
|
|
130
|
+
? await verifyStoredSourceArtifact(cachedManifest, { checkHashes: false })
|
|
131
|
+
: null;
|
|
132
|
+
const sourceIntegrityValid = !sourceIntegrity || sourceIntegrity.valid;
|
|
133
|
+
if (sourceIntegrity && !sourceIntegrity.valid) {
|
|
134
|
+
log.warn(
|
|
135
|
+
MODULE,
|
|
136
|
+
`Cache stale: "${modelId}" direct-source assets are incomplete (${sourceIntegrity.missingFiles.join(', ')})`
|
|
137
|
+
);
|
|
138
|
+
}
|
|
122
139
|
const cachedFingerprint = buildManifestFingerprint(cachedManifest);
|
|
123
140
|
const remoteFingerprint = buildManifestFingerprint(remoteManifest);
|
|
124
|
-
if (cachedFingerprint === remoteFingerprint) {
|
|
141
|
+
if (sourceIntegrityValid && cachedFingerprint === remoteFingerprint) {
|
|
125
142
|
log.info(MODULE, `Cache hit: "${modelId}"`);
|
|
126
143
|
return { cached: true, fromCache: true, modelId, error: null };
|
|
127
144
|
}
|
|
128
145
|
|
|
129
146
|
const sameShards = hasSameShardSet(cachedManifest, remoteManifest);
|
|
130
147
|
const sameHashAlgorithm = (cachedManifest?.hashAlgorithm ?? null) === (remoteManifest?.hashAlgorithm ?? null);
|
|
131
|
-
if (sameShards && sameHashAlgorithm) {
|
|
148
|
+
if (sourceIntegrityValid && sameShards && sameHashAlgorithm) {
|
|
132
149
|
await openModelStore(modelId);
|
|
133
150
|
await saveManifest(remoteManifestText);
|
|
134
151
|
log.info(MODULE, `Cache manifest refreshed: "${modelId}" (shards unchanged)`);
|
|
@@ -138,8 +155,8 @@ export async function ensureModelCached(modelId, modelBaseUrl) {
|
|
|
138
155
|
}
|
|
139
156
|
} catch (error) {
|
|
140
157
|
const message = toErrorMessage(error);
|
|
141
|
-
log.warn(MODULE, `Cache validation
|
|
142
|
-
return { cached:
|
|
158
|
+
log.warn(MODULE, `Cache validation failed (${message}); refusing cached model "${modelId}"`);
|
|
159
|
+
return { cached: false, fromCache: false, modelId, error: message };
|
|
143
160
|
}
|
|
144
161
|
}
|
|
145
162
|
} catch (error) {
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
export interface RuntimeCompositionBridge {
|
|
2
|
+
getRuntimeConfig: () => Record<string, unknown> | null;
|
|
3
|
+
setRuntimeConfig: (runtimeConfig: Record<string, unknown> | null) => void;
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
export interface RuntimeInputCompositionHandlers {
|
|
7
|
+
loadRuntimeConfigFromRef?: (
|
|
8
|
+
ref: string,
|
|
9
|
+
options?: Record<string, unknown>
|
|
10
|
+
) => Promise<Record<string, unknown> | null>;
|
|
11
|
+
applyRuntimePreset?: (
|
|
12
|
+
runtimePreset: string,
|
|
13
|
+
options?: Record<string, unknown>
|
|
14
|
+
) => Promise<void>;
|
|
15
|
+
applyRuntimeConfigFromUrl?: (
|
|
16
|
+
runtimeConfigUrl: string,
|
|
17
|
+
options?: Record<string, unknown>
|
|
18
|
+
) => Promise<void>;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface OrderedRuntimeInputs {
|
|
22
|
+
configChain?: string[] | null;
|
|
23
|
+
runtimePreset?: string | null;
|
|
24
|
+
runtimeConfigUrl?: string | null;
|
|
25
|
+
runtimeConfig?: Record<string, unknown> | null;
|
|
26
|
+
runtimeContractPatch?: Record<string, unknown> | null | (() => Record<string, unknown> | null);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export declare function resolveRuntimeFromConfig(
|
|
30
|
+
config: Record<string, unknown> | null | undefined
|
|
31
|
+
): Record<string, unknown> | null;
|
|
32
|
+
|
|
33
|
+
export declare function applyOrderedRuntimeInputs(
|
|
34
|
+
runtimeBridge: RuntimeCompositionBridge,
|
|
35
|
+
inputs?: OrderedRuntimeInputs,
|
|
36
|
+
handlers?: RuntimeInputCompositionHandlers,
|
|
37
|
+
options?: Record<string, unknown>
|
|
38
|
+
): Promise<void>;
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import { mergeRuntimeValues } from '../config/runtime-merge.js';
|
|
2
|
+
|
|
3
|
+
export function resolveRuntimeFromConfig(config) {
|
|
4
|
+
if (!config || typeof config !== 'object') return null;
|
|
5
|
+
if (config.runtime && typeof config.runtime === 'object') return config.runtime;
|
|
6
|
+
if (config.shared || config.loading || config.inference || config.emulation) return config;
|
|
7
|
+
return null;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
function mergeRuntimePatch(runtimeBridge, patch) {
|
|
11
|
+
if (!patch) return;
|
|
12
|
+
const mergedRuntime = mergeRuntimeValues(runtimeBridge.getRuntimeConfig(), patch);
|
|
13
|
+
runtimeBridge.setRuntimeConfig(mergedRuntime);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function requireRuntimeBridge(runtimeBridge) {
|
|
17
|
+
if (!runtimeBridge?.setRuntimeConfig) {
|
|
18
|
+
throw new Error('runtime bridge must provide setRuntimeConfig().');
|
|
19
|
+
}
|
|
20
|
+
if (typeof runtimeBridge.getRuntimeConfig !== 'function') {
|
|
21
|
+
throw new Error('runtime bridge must provide getRuntimeConfig().');
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
async function applyConfigChain(configChain, runtimeBridge, loadRuntimeConfigFromRef, options) {
|
|
26
|
+
if (!Array.isArray(configChain) || configChain.length === 0) {
|
|
27
|
+
return;
|
|
28
|
+
}
|
|
29
|
+
if (typeof loadRuntimeConfigFromRef !== 'function') {
|
|
30
|
+
throw new Error('runtime input composition does not support configChain on this surface.');
|
|
31
|
+
}
|
|
32
|
+
for (const ref of configChain) {
|
|
33
|
+
const loaded = await loadRuntimeConfigFromRef(ref, options);
|
|
34
|
+
const runtime = resolveRuntimeFromConfig(loaded);
|
|
35
|
+
if (!runtime) {
|
|
36
|
+
throw new Error(`Loaded runtime config "${ref}" is missing runtime fields.`);
|
|
37
|
+
}
|
|
38
|
+
mergeRuntimePatch(runtimeBridge, runtime);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
async function applyRuntimePreset(runtimePreset, applyPreset, options) {
|
|
43
|
+
if (!runtimePreset) {
|
|
44
|
+
return;
|
|
45
|
+
}
|
|
46
|
+
if (typeof applyPreset !== 'function') {
|
|
47
|
+
throw new Error('runtime input composition does not support runtimePreset on this surface.');
|
|
48
|
+
}
|
|
49
|
+
await applyPreset(runtimePreset, options);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async function applyRuntimeConfigUrl(runtimeConfigUrl, applyConfigFromUrl, options) {
|
|
53
|
+
if (!runtimeConfigUrl) {
|
|
54
|
+
return;
|
|
55
|
+
}
|
|
56
|
+
if (typeof applyConfigFromUrl !== 'function') {
|
|
57
|
+
throw new Error('runtime input composition does not support runtimeConfigUrl on this surface.');
|
|
58
|
+
}
|
|
59
|
+
await applyConfigFromUrl(runtimeConfigUrl, options);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export async function applyOrderedRuntimeInputs(runtimeBridge, inputs = {}, handlers = {}, options = {}) {
|
|
63
|
+
requireRuntimeBridge(runtimeBridge);
|
|
64
|
+
|
|
65
|
+
await applyConfigChain(
|
|
66
|
+
inputs.configChain,
|
|
67
|
+
runtimeBridge,
|
|
68
|
+
handlers.loadRuntimeConfigFromRef,
|
|
69
|
+
options
|
|
70
|
+
);
|
|
71
|
+
await applyRuntimePreset(inputs.runtimePreset, handlers.applyRuntimePreset, options);
|
|
72
|
+
await applyRuntimeConfigUrl(inputs.runtimeConfigUrl, handlers.applyRuntimeConfigFromUrl, options);
|
|
73
|
+
|
|
74
|
+
if (inputs.runtimeConfig) {
|
|
75
|
+
const runtime = resolveRuntimeFromConfig(inputs.runtimeConfig);
|
|
76
|
+
if (!runtime) {
|
|
77
|
+
throw new Error('runtimeConfig is missing runtime fields');
|
|
78
|
+
}
|
|
79
|
+
mergeRuntimePatch(runtimeBridge, runtime);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const runtimeContractPatch = typeof inputs.runtimeContractPatch === 'function'
|
|
83
|
+
? inputs.runtimeContractPatch()
|
|
84
|
+
: (inputs.runtimeContractPatch ?? null);
|
|
85
|
+
mergeRuntimePatch(runtimeBridge, runtimeContractPatch);
|
|
86
|
+
}
|
|
@@ -3,6 +3,8 @@ import type { RDRRManifest } from '../formats/rdrr/index.js';
|
|
|
3
3
|
export declare const DIRECT_SOURCE_RUNTIME_MODE: 'direct-source';
|
|
4
4
|
export declare const DIRECT_SOURCE_RUNTIME_SCHEMA_VERSION: 1;
|
|
5
5
|
export declare const DIRECT_SOURCE_RUNTIME_SCHEMA: 'direct-source/v1';
|
|
6
|
+
export declare const DIRECT_SOURCE_PATH_RUNTIME_LOCAL: 'runtime-local';
|
|
7
|
+
export declare const DIRECT_SOURCE_PATH_ARTIFACT_RELATIVE: 'artifact-relative';
|
|
6
8
|
|
|
7
9
|
export interface SourceRuntimeTensor {
|
|
8
10
|
name: string;
|
|
@@ -17,6 +19,9 @@ export interface SourceRuntimeTensor {
|
|
|
17
19
|
export interface SourceRuntimeFile {
|
|
18
20
|
path: string;
|
|
19
21
|
size: number;
|
|
22
|
+
hash?: string | null;
|
|
23
|
+
hashAlgorithm?: string | null;
|
|
24
|
+
kind?: string | null;
|
|
20
25
|
}
|
|
21
26
|
|
|
22
27
|
export interface SourceRuntimeShardSource {
|
|
@@ -24,18 +29,40 @@ export interface SourceRuntimeShardSource {
|
|
|
24
29
|
path: string;
|
|
25
30
|
filename: string;
|
|
26
31
|
size: number;
|
|
32
|
+
hash: string;
|
|
33
|
+
hashAlgorithm: string;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export interface SourceRuntimeTokenizerMetadata {
|
|
37
|
+
jsonPath: string | null;
|
|
38
|
+
configPath: string | null;
|
|
39
|
+
modelPath: string | null;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export interface SourceRuntimeMetadata {
|
|
43
|
+
mode: 'direct-source';
|
|
44
|
+
schema: 'direct-source/v1';
|
|
45
|
+
schemaVersion: 1;
|
|
46
|
+
sourceKind: string | null;
|
|
47
|
+
hashAlgorithm: string;
|
|
48
|
+
pathSemantics: 'runtime-local' | 'artifact-relative';
|
|
49
|
+
sourceFiles: SourceRuntimeShardSource[];
|
|
50
|
+
auxiliaryFiles: SourceRuntimeFile[];
|
|
51
|
+
tokenizer: SourceRuntimeTokenizerMetadata;
|
|
27
52
|
}
|
|
28
53
|
|
|
29
54
|
export interface BuildSourceRuntimeBundleOptions {
|
|
30
55
|
modelId: string;
|
|
31
56
|
modelName?: string | null;
|
|
32
57
|
modelType: string;
|
|
58
|
+
sourceKind?: string | null;
|
|
33
59
|
architecture: Record<string, unknown> | string | null;
|
|
34
60
|
architectureHint?: string | null;
|
|
35
61
|
rawConfig?: Record<string, unknown> | null;
|
|
36
62
|
inference: Record<string, unknown>;
|
|
37
63
|
tensors: SourceRuntimeTensor[];
|
|
38
64
|
sourceFiles?: SourceRuntimeFile[] | null;
|
|
65
|
+
auxiliaryFiles?: SourceRuntimeFile[] | null;
|
|
39
66
|
resolveSourceSize?: ((path: string) => Promise<number> | number) | null;
|
|
40
67
|
sourceQuantization?: string | null;
|
|
41
68
|
quantizationInfo?: Record<string, unknown> | null;
|
|
@@ -44,6 +71,9 @@ export interface BuildSourceRuntimeBundleOptions {
|
|
|
44
71
|
tokenizerJson?: Record<string, unknown> | null;
|
|
45
72
|
tokenizerConfig?: Record<string, unknown> | null;
|
|
46
73
|
tokenizerModelName?: string | null;
|
|
74
|
+
tokenizerJsonPath?: string | null;
|
|
75
|
+
tokenizerConfigPath?: string | null;
|
|
76
|
+
tokenizerModelPath?: string | null;
|
|
47
77
|
eosTokenId?: number | number[] | null;
|
|
48
78
|
convertedAt?: string | null;
|
|
49
79
|
conversionInfo?: Record<string, unknown> | null;
|
|
@@ -58,9 +88,13 @@ export declare function buildSourceRuntimeBundle(
|
|
|
58
88
|
options: BuildSourceRuntimeBundleOptions
|
|
59
89
|
): Promise<BuildSourceRuntimeBundleResult>;
|
|
60
90
|
|
|
91
|
+
export declare function getSourceRuntimeMetadata(
|
|
92
|
+
manifest: RDRRManifest | Record<string, unknown> | null | undefined
|
|
93
|
+
): SourceRuntimeMetadata | null;
|
|
94
|
+
|
|
61
95
|
export interface CreateSourceStorageContextOptions {
|
|
62
96
|
manifest: RDRRManifest;
|
|
63
|
-
shardSources
|
|
97
|
+
shardSources?: SourceRuntimeShardSource[] | null;
|
|
64
98
|
readRange: (
|
|
65
99
|
path: string,
|
|
66
100
|
offset: number,
|
|
@@ -75,23 +109,24 @@ export interface CreateSourceStorageContextOptions {
|
|
|
75
109
|
readText?: (path: string) => Promise<string | Record<string, unknown> | null | undefined>;
|
|
76
110
|
readBinary?: (path: string) => Promise<ArrayBuffer | Uint8Array>;
|
|
77
111
|
tokenizerJsonPath?: string | null;
|
|
112
|
+
tokenizerConfigPath?: string | null;
|
|
78
113
|
tokenizerModelPath?: string | null;
|
|
79
114
|
verifyHashes?: boolean;
|
|
80
115
|
}
|
|
81
116
|
|
|
82
117
|
export interface SourceStorageContext {
|
|
83
118
|
loadShard: (index: number) => Promise<ArrayBuffer | Uint8Array>;
|
|
84
|
-
loadShardRange: (
|
|
119
|
+
loadShardRange: ((
|
|
85
120
|
index: number,
|
|
86
121
|
offset?: number,
|
|
87
122
|
length?: number | null
|
|
88
|
-
) => Promise<ArrayBuffer | Uint8Array
|
|
89
|
-
streamShardRange: (
|
|
123
|
+
) => Promise<ArrayBuffer | Uint8Array>) | null;
|
|
124
|
+
streamShardRange: ((
|
|
90
125
|
index: number,
|
|
91
126
|
offset?: number,
|
|
92
127
|
length?: number | null,
|
|
93
128
|
options?: { chunkBytes?: number }
|
|
94
|
-
) => AsyncIterable<Uint8Array
|
|
129
|
+
) => AsyncIterable<Uint8Array>) | null;
|
|
95
130
|
loadTokenizerJson: (() => Promise<Record<string, unknown> | null>) | null;
|
|
96
131
|
loadTokenizerModel: ((pathHint?: string) => Promise<ArrayBuffer | null>) | null;
|
|
97
132
|
verifyHashes: boolean;
|