@simulatte/doppler 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +126 -0
- package/README.md +25 -17
- package/package.json +20 -4
- package/src/adapters/adapter-registry.js +12 -1
- package/src/adapters/lora-loader.js +23 -6
- package/src/bridge/extension-client.d.ts +5 -0
- package/src/bridge/extension-client.js +40 -0
- package/src/bridge/index.d.ts +2 -1
- package/src/bridge/index.js +6 -4
- package/src/browser/browser-converter.js +26 -1
- package/src/browser/file-picker.js +6 -0
- package/src/browser/safetensors-parser-browser.js +84 -1
- package/src/browser/shard-io-browser.js +2 -2
- package/src/browser/tensor-source-download.js +8 -2
- package/src/browser/tensor-source-http.d.ts +1 -0
- package/src/browser/tensor-source-http.js +5 -1
- package/src/client/doppler-api.browser.js +20 -4
- package/src/client/doppler-api.js +19 -3
- package/src/client/doppler-provider/generation.js +12 -0
- package/src/client/doppler-provider/model-manager.d.ts +10 -0
- package/src/client/doppler-provider/model-manager.js +91 -19
- package/src/client/doppler-provider/source-runtime.d.ts +2 -1
- package/src/client/doppler-provider/source-runtime.js +132 -13
- package/src/client/doppler-registry.json +8 -7
- package/src/config/backward-registry-loader.js +17 -2
- package/src/config/execution-v0-contract-check.js +113 -15
- package/src/config/kernel-path-contract-check.js +57 -29
- package/src/config/kernel-path-loader.js +5 -36
- package/src/config/kernels/kernel-ref-digests.js +39 -39
- package/src/config/kernels/registry.js +14 -1
- package/src/config/kernels/registry.json +49 -7
- package/src/config/loader.d.ts +1 -1
- package/src/config/loader.js +43 -4
- package/src/config/merge-contract-check.js +59 -4
- package/src/config/merge-helpers.js +128 -7
- package/src/config/merge.d.ts +1 -0
- package/src/config/merge.js +28 -0
- package/src/config/param-validator.js +47 -2
- package/src/config/presets/kernel-paths/{gemma2-q4k-dequant-f32a.json → gemma2-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/gemma3-f16-fused-f32a-online-streamingprefill.json +223 -0
- package/src/config/presets/kernel-paths/{gemma3-q4k-dequant-f32a.json → gemma3-q4k-dequant-f32a-nosubgroups.json} +3 -3
- package/src/config/presets/kernel-paths/registry.json +29 -8
- package/src/config/presets/models/gemma2.json +2 -2
- package/src/config/presets/models/qwen3.json +9 -2
- package/src/config/presets/models/transformer.json +5 -0
- package/src/config/presets/runtime/experiments/bench/gemma3-bench-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/debug/gemma3-debug-q4k.json +1 -1
- package/src/config/presets/runtime/experiments/verify/gemma3-verify.json +1 -1
- package/src/config/presets/runtime/kernels/dequant-f16-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/dequant-f32-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/embeddinggemma-q4k-dequant-f32a.json +37 -0
- package/src/config/presets/runtime/kernels/fused-q4k.json +6 -13
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f16a.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-dequant-f32a-nosubgroups.json +33 -0
- package/src/config/presets/runtime/kernels/gemma2-q4k-fused-f32a.json +33 -0
- package/src/config/presets/runtime/kernels/safe-q4k.json +6 -13
- package/src/config/presets/runtime/platform/metal-apple-q4k.json +1 -1
- package/src/config/required-inference-fields-contract-check.js +6 -0
- package/src/config/runtime.js +6 -1
- package/src/config/schema/debug.schema.d.ts +5 -0
- package/src/config/schema/doppler.schema.js +16 -21
- package/src/config/schema/inference-defaults.schema.js +6 -3
- package/src/config/schema/inference.schema.d.ts +9 -0
- package/src/config/schema/kernel-path.schema.d.ts +11 -1
- package/src/config/schema/kernel-thresholds.schema.js +12 -4
- package/src/config/schema/manifest.schema.d.ts +8 -1
- package/src/config/schema/manifest.schema.js +19 -3
- package/src/config/training-defaults.js +30 -22
- package/src/converter/conversion-plan.js +94 -9
- package/src/converter/core.d.ts +7 -0
- package/src/converter/core.js +14 -9
- package/src/converter/execution-v0-manifest.js +4 -1
- package/src/converter/index.d.ts +1 -0
- package/src/converter/index.js +1 -0
- package/src/converter/manifest-inference.js +43 -12
- package/src/converter/parsers/diffusion.js +0 -3
- package/src/converter/quantization-info.js +35 -15
- package/src/converter/rope-config.js +42 -0
- package/src/converter/shard-packer.d.ts +1 -1
- package/src/converter/shard-packer.js +4 -1
- package/src/debug/config.js +123 -11
- package/src/debug/signals.js +7 -1
- package/src/debug/tensor.d.ts +2 -0
- package/src/debug/tensor.js +13 -2
- package/src/distribution/p2p-control-plane.js +52 -12
- package/src/distribution/p2p-observability.js +43 -7
- package/src/distribution/p2p-webrtc-browser.js +20 -0
- package/src/distribution/shard-delivery.js +77 -26
- package/src/formats/gguf/types.js +33 -16
- package/src/formats/rdrr/groups.d.ts +12 -4
- package/src/formats/rdrr/groups.js +3 -6
- package/src/formats/rdrr/parsing.js +39 -2
- package/src/formats/rdrr/types.d.ts +2 -1
- package/src/gpu/command-recorder.js +86 -61
- package/src/gpu/device.d.ts +1 -0
- package/src/gpu/device.js +131 -19
- package/src/gpu/kernel-tuner/benchmarks.js +326 -316
- package/src/gpu/kernel-tuner/cache.js +71 -4
- package/src/gpu/kernel-tuner/tuner.js +22 -4
- package/src/gpu/kernels/attention.js +113 -34
- package/src/gpu/kernels/backward/adam.js +62 -58
- package/src/gpu/kernels/backward/attention_backward.js +257 -169
- package/src/gpu/kernels/backward/conv2d_backward.js +14 -1
- package/src/gpu/kernels/bias_add.wgsl +8 -6
- package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
- package/src/gpu/kernels/cast.js +191 -149
- package/src/gpu/kernels/check-stop.js +33 -44
- package/src/gpu/kernels/conv2d.js +27 -17
- package/src/gpu/kernels/conv2d.wgsl +7 -8
- package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
- package/src/gpu/kernels/cross_entropy_loss.js +21 -15
- package/src/gpu/kernels/depthwise_conv2d.js +37 -26
- package/src/gpu/kernels/depthwise_conv2d.wgsl +6 -9
- package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +6 -9
- package/src/gpu/kernels/dequant.js +178 -126
- package/src/gpu/kernels/energy.d.ts +3 -21
- package/src/gpu/kernels/energy.js +111 -88
- package/src/gpu/kernels/feature-check.js +1 -1
- package/src/gpu/kernels/fused_ffn.js +84 -65
- package/src/gpu/kernels/fused_matmul_residual.js +56 -33
- package/src/gpu/kernels/fused_matmul_rmsnorm.js +62 -45
- package/src/gpu/kernels/gather.js +33 -15
- package/src/gpu/kernels/gelu.js +19 -11
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +34 -23
- package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +6 -9
- package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +6 -9
- package/src/gpu/kernels/groupnorm.js +34 -23
- package/src/gpu/kernels/kv-quantize.js +5 -2
- package/src/gpu/kernels/layernorm.js +35 -19
- package/src/gpu/kernels/logit-merge.js +5 -3
- package/src/gpu/kernels/matmul.js +83 -39
- package/src/gpu/kernels/modulate.js +23 -15
- package/src/gpu/kernels/moe.js +221 -175
- package/src/gpu/kernels/pixel_shuffle.js +22 -14
- package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
- package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
- package/src/gpu/kernels/relu.js +31 -10
- package/src/gpu/kernels/relu.wgsl +2 -1
- package/src/gpu/kernels/relu_f16.wgsl +2 -1
- package/src/gpu/kernels/repeat_channels.js +25 -17
- package/src/gpu/kernels/repeat_channels.wgsl +4 -5
- package/src/gpu/kernels/repeat_channels_f16.wgsl +4 -5
- package/src/gpu/kernels/residual.js +69 -23
- package/src/gpu/kernels/residual.wgsl +6 -3
- package/src/gpu/kernels/residual_f16.wgsl +2 -1
- package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
- package/src/gpu/kernels/residual_vec4.wgsl +2 -1
- package/src/gpu/kernels/rmsnorm.js +96 -28
- package/src/gpu/kernels/rmsnorm.wgsl +14 -6
- package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
- package/src/gpu/kernels/rope.d.ts +2 -0
- package/src/gpu/kernels/rope.js +14 -1
- package/src/gpu/kernels/rope.wgsl +56 -40
- package/src/gpu/kernels/sample.js +27 -38
- package/src/gpu/kernels/sana_linear_attention.js +19 -12
- package/src/gpu/kernels/sana_linear_attention_apply.wgsl +4 -5
- package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +4 -5
- package/src/gpu/kernels/sana_linear_attention_summary.wgsl +4 -0
- package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +4 -0
- package/src/gpu/kernels/scale.js +18 -11
- package/src/gpu/kernels/shader-cache.js +4 -2
- package/src/gpu/kernels/silu.d.ts +1 -0
- package/src/gpu/kernels/silu.js +148 -82
- package/src/gpu/kernels/silu.wgsl +19 -9
- package/src/gpu/kernels/silu_f16.wgsl +19 -9
- package/src/gpu/kernels/softmax.js +44 -25
- package/src/gpu/kernels/split_qkv.js +23 -13
- package/src/gpu/kernels/transpose.js +31 -10
- package/src/gpu/kernels/transpose.wgsl +6 -5
- package/src/gpu/kernels/upsample2d.js +22 -13
- package/src/gpu/kernels/upsample2d.wgsl +6 -9
- package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
- package/src/gpu/kernels/utils.js +35 -13
- package/src/gpu/partitioned-buffer-pool.js +10 -2
- package/src/gpu/perf-guards.js +2 -9
- package/src/gpu/profiler.js +27 -22
- package/src/gpu/readback-utils.d.ts +16 -0
- package/src/gpu/readback-utils.js +41 -0
- package/src/gpu/submit-tracker.js +13 -0
- package/src/gpu/uniform-cache.d.ts +1 -0
- package/src/gpu/uniform-cache.js +30 -9
- package/src/hotswap/intent-bundle.js +6 -0
- package/src/hotswap/manifest.d.ts +10 -1
- package/src/hotswap/manifest.js +12 -2
- package/src/hotswap/runtime.js +30 -8
- package/src/index-browser.d.ts +44 -0
- package/src/index-browser.js +14 -0
- package/src/inference/browser-harness-contract-helpers.d.ts +5 -0
- package/src/inference/browser-harness-contract-helpers.js +28 -0
- package/src/inference/browser-harness-diffusion-energy-suites.d.ts +2 -0
- package/src/inference/browser-harness-diffusion-energy-suites.js +269 -0
- package/src/inference/browser-harness-model-helpers.d.ts +16 -0
- package/src/inference/browser-harness-model-helpers.js +217 -0
- package/src/inference/browser-harness-report-helpers.d.ts +7 -0
- package/src/inference/browser-harness-report-helpers.js +42 -0
- package/src/inference/browser-harness-runtime-helpers.d.ts +61 -0
- package/src/inference/browser-harness-runtime-helpers.js +415 -0
- package/src/inference/browser-harness-suite-helpers.d.ts +28 -0
- package/src/inference/browser-harness-suite-helpers.js +268 -0
- package/src/inference/browser-harness-text-helpers.d.ts +27 -0
- package/src/inference/browser-harness-text-helpers.js +788 -0
- package/src/inference/browser-harness.d.ts +6 -0
- package/src/inference/browser-harness.js +130 -1950
- package/src/inference/kv-cache/base.js +140 -94
- package/src/inference/kv-cache/tiered.js +5 -3
- package/src/inference/moe-router.js +88 -56
- package/src/inference/multi-model-network.js +5 -3
- package/src/inference/network-evolution.d.ts +11 -2
- package/src/inference/network-evolution.js +20 -21
- package/src/inference/pipelines/context.d.ts +3 -0
- package/src/inference/pipelines/context.js +142 -2
- package/src/inference/pipelines/diffusion/helpers.js +7 -2
- package/src/inference/pipelines/diffusion/pipeline.js +17 -7
- package/src/inference/pipelines/diffusion/sd3-transformer.js +10 -10
- package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +5 -0
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +27 -15
- package/src/inference/pipelines/diffusion/vae.js +3 -7
- package/src/inference/pipelines/energy/pipeline.js +27 -21
- package/src/inference/pipelines/energy/quintel.d.ts +5 -0
- package/src/inference/pipelines/energy/quintel.js +11 -0
- package/src/inference/pipelines/energy-head/row-head-pipeline.js +17 -13
- package/src/inference/pipelines/structured/json-head-pipeline.js +26 -11
- package/src/inference/pipelines/text/attention/projections.js +151 -101
- package/src/inference/pipelines/text/attention/record.js +73 -10
- package/src/inference/pipelines/text/attention/run.js +73 -10
- package/src/inference/pipelines/text/chat-format.js +25 -1
- package/src/inference/pipelines/text/config.d.ts +4 -0
- package/src/inference/pipelines/text/config.js +71 -5
- package/src/inference/pipelines/text/embed.js +2 -8
- package/src/inference/pipelines/text/execution-plan.js +64 -50
- package/src/inference/pipelines/text/execution-v0-contract-helpers.d.ts +59 -0
- package/src/inference/pipelines/text/execution-v0-contract-helpers.js +937 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.d.ts +15 -0
- package/src/inference/pipelines/text/execution-v0-runtime-builders.js +279 -0
- package/src/inference/pipelines/text/execution-v0.js +78 -1002
- package/src/inference/pipelines/text/ffn/standard.js +3 -0
- package/src/inference/pipelines/text/generator-steps.d.ts +46 -0
- package/src/inference/pipelines/text/generator-steps.js +298 -207
- package/src/inference/pipelines/text/generator.js +6 -23
- package/src/inference/pipelines/text/init.d.ts +4 -0
- package/src/inference/pipelines/text/init.js +134 -29
- package/src/inference/pipelines/text/kernel-path-auto-select.js +2 -0
- package/src/inference/pipelines/text/kernel-trace.d.ts +2 -0
- package/src/inference/pipelines/text/kernel-trace.js +6 -0
- package/src/inference/pipelines/text/layer.js +14 -9
- package/src/inference/pipelines/text/linear-attention.d.ts +10 -0
- package/src/inference/pipelines/text/linear-attention.js +80 -6
- package/src/inference/pipelines/text/logits/gpu.js +10 -5
- package/src/inference/pipelines/text/logits/index.js +10 -11
- package/src/inference/pipelines/text/logits/utils.d.ts +7 -0
- package/src/inference/pipelines/text/logits/utils.js +9 -0
- package/src/inference/pipelines/text/lora-apply.js +50 -32
- package/src/inference/pipelines/text/model-load.js +279 -104
- package/src/inference/pipelines/text/moe-cache.js +5 -4
- package/src/inference/pipelines/text/moe-cpu-gptoss.js +74 -69
- package/src/inference/pipelines/text/moe-cpu.js +42 -38
- package/src/inference/pipelines/text/moe-gpu.js +110 -86
- package/src/inference/pipelines/text/ops.js +90 -90
- package/src/inference/pipelines/text/probes.js +9 -9
- package/src/inference/pipelines/text/weights.js +17 -7
- package/src/inference/pipelines/text.js +17 -1
- package/src/inference/speculative.d.ts +2 -2
- package/src/inference/speculative.js +4 -18
- package/src/inference/test-harness.d.ts +1 -1
- package/src/inference/test-harness.js +15 -5
- package/src/inference/tokenizer.d.ts +0 -5
- package/src/inference/tokenizer.js +4 -23
- package/src/inference/tokenizers/bpe.js +9 -0
- package/src/inference/tokenizers/bundled.js +176 -33
- package/src/inference/tokenizers/sentencepiece.js +12 -0
- package/src/loader/doppler-loader.js +38 -22
- package/src/loader/dtype-utils.js +3 -44
- package/src/loader/embedding-loader.js +7 -3
- package/src/loader/experts/expert-cache.js +13 -6
- package/src/loader/experts/expert-loader.js +10 -6
- package/src/loader/final-weights-loader.js +8 -4
- package/src/loader/layer-loader.js +2 -1
- package/src/loader/loader-state.js +2 -2
- package/src/loader/memory-monitor.js +8 -0
- package/src/loader/multi-model-loader.d.ts +14 -0
- package/src/loader/multi-model-loader.js +70 -24
- package/src/loader/shard-cache.js +81 -12
- package/src/loader/shard-resolver.js +25 -3
- package/src/loader/tensors/tensor-loader.js +209 -144
- package/src/loader/tensors/tensor-reader.js +76 -19
- package/src/loader/weight-downcast.js +1 -1
- package/src/memory/buffer-pool.d.ts +9 -1
- package/src/memory/buffer-pool.js +109 -44
- package/src/memory/unified-detect.js +1 -1
- package/src/rules/inference/kernel-path.rules.json +24 -8
- package/src/rules/rule-registry.js +25 -1
- package/src/rules/tooling/command-runtime.rules.json +18 -0
- package/src/storage/backends/opfs-store.js +68 -24
- package/src/storage/downloader.js +364 -83
- package/src/storage/index.d.ts +3 -0
- package/src/storage/index.js +3 -0
- package/src/storage/preflight.d.ts +2 -2
- package/src/storage/preflight.js +24 -2
- package/src/storage/quickstart-downloader.js +11 -5
- package/src/storage/registry.js +10 -4
- package/src/storage/reports.js +1 -1
- package/src/storage/shard-manager.d.ts +15 -1
- package/src/storage/shard-manager.js +51 -3
- package/src/storage/source-artifact-store.d.ts +52 -0
- package/src/storage/source-artifact-store.js +234 -0
- package/src/tooling/command-api-constants.d.ts +9 -0
- package/src/tooling/command-api-constants.js +9 -0
- package/src/tooling/command-api-family-normalizers.d.ts +9 -0
- package/src/tooling/command-api-family-normalizers.js +343 -0
- package/src/tooling/command-api-helpers.d.ts +25 -0
- package/src/tooling/command-api-helpers.js +262 -0
- package/src/tooling/command-api.d.ts +27 -1
- package/src/tooling/command-api.js +26 -473
- package/src/tooling/command-envelope.js +4 -1
- package/src/tooling/command-runner-shared.js +52 -18
- package/src/tooling/lean-execution-contract.js +150 -3
- package/src/tooling/node-browser-command-runner.d.ts +4 -0
- package/src/tooling/node-browser-command-runner.js +218 -273
- package/src/tooling/node-command-runner.js +44 -3
- package/src/tooling/node-converter.js +27 -1
- package/src/tooling/node-source-runtime.d.ts +1 -1
- package/src/tooling/node-source-runtime.js +84 -3
- package/src/tooling/node-webgpu.js +30 -105
- package/src/tooling/opfs-cache.js +21 -4
- package/src/tooling/runtime-input-composition.d.ts +38 -0
- package/src/tooling/runtime-input-composition.js +86 -0
- package/src/tooling/source-runtime-bundle.d.ts +40 -5
- package/src/tooling/source-runtime-bundle.js +261 -34
- package/src/tooling/source-runtime-materializer.d.ts +6 -0
- package/src/tooling/source-runtime-materializer.js +93 -0
- package/src/training/attention-backward.js +32 -17
- package/src/training/autograd.js +80 -52
- package/src/training/checkpoint-watch.d.ts +8 -0
- package/src/training/checkpoint-watch.js +139 -0
- package/src/training/checkpoint.d.ts +6 -1
- package/src/training/checkpoint.js +46 -7
- package/src/training/clip.js +2 -1
- package/src/training/datasets/token-batch.js +20 -8
- package/src/training/distillation/artifacts.d.ts +71 -0
- package/src/training/distillation/artifacts.js +132 -0
- package/src/training/distillation/checkpoint-watch.d.ts +10 -0
- package/src/training/distillation/checkpoint-watch.js +58 -0
- package/src/training/distillation/dataset.d.ts +59 -0
- package/src/training/distillation/dataset.js +337 -0
- package/src/training/distillation/eval.d.ts +34 -0
- package/src/training/distillation/eval.js +310 -0
- package/src/training/distillation/index.d.ts +29 -0
- package/src/training/distillation/index.js +29 -0
- package/src/training/distillation/runtime.d.ts +20 -0
- package/src/training/distillation/runtime.js +121 -0
- package/src/training/distillation/scoreboard.d.ts +6 -0
- package/src/training/distillation/scoreboard.js +8 -0
- package/src/training/distillation/stage-a.d.ts +45 -0
- package/src/training/distillation/stage-a.js +338 -0
- package/src/training/distillation/stage-b.d.ts +24 -0
- package/src/training/distillation/stage-b.js +20 -0
- package/src/training/distillation/student-fixture.d.ts +22 -0
- package/src/training/distillation/student-fixture.js +846 -0
- package/src/training/distillation/suite-data.d.ts +45 -0
- package/src/training/distillation/suite-data.js +189 -0
- package/src/training/index.d.ts +10 -0
- package/src/training/index.js +10 -0
- package/src/training/lora-pipeline.d.ts +40 -0
- package/src/training/lora-pipeline.js +793 -0
- package/src/training/lora.js +26 -12
- package/src/training/loss.js +5 -6
- package/src/training/objectives/cross_entropy.js +2 -5
- package/src/training/objectives/distill_kd.js +4 -8
- package/src/training/objectives/distill_triplet.js +4 -8
- package/src/training/objectives/ul_stage2_base.js +4 -8
- package/src/training/operator-artifacts.d.ts +62 -0
- package/src/training/operator-artifacts.js +140 -0
- package/src/training/operator-command.d.ts +5 -0
- package/src/training/operator-command.js +455 -0
- package/src/training/operator-eval.d.ts +48 -0
- package/src/training/operator-eval.js +230 -0
- package/src/training/operator-scoreboard.d.ts +5 -0
- package/src/training/operator-scoreboard.js +44 -0
- package/src/training/optimizer.js +19 -7
- package/src/training/runner.d.ts +52 -0
- package/src/training/runner.js +31 -5
- package/src/training/suite.d.ts +112 -0
- package/src/training/suite.js +24 -984
- package/src/training/tensor-factory.d.ts +9 -0
- package/src/training/tensor-factory.js +13 -0
- package/src/training/trainer.js +3 -5
- package/src/training/ul_dataset.js +3 -5
- package/src/training/workloads.d.ts +164 -0
- package/src/training/workloads.js +530 -0
- package/src/version.js +1 -1
- package/tools/convert-safetensors-node.js +22 -16
- package/tools/doppler-cli.js +179 -63
|
@@ -175,103 +175,103 @@ export async function doConv(
|
|
|
175
175
|
}
|
|
176
176
|
|
|
177
177
|
// Use the first 2x hidden projection channels as a gated conv-state projection.
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
swigluLimit: options.swigluLimit ?? null,
|
|
198
|
-
label: `${label}.activation`,
|
|
199
|
-
layerIdx,
|
|
200
|
-
}, recorder);
|
|
201
|
-
|
|
202
|
-
if (recorder) {
|
|
203
|
-
recorder.trackTemporaryBuffer(inProj.buffer);
|
|
204
|
-
} else {
|
|
205
|
-
releaseBuffer(inProj.buffer);
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
// Optional generic conv2d stage when explicit shape metadata is provided.
|
|
209
|
-
// LFM2 depthwise conv kernels use model-specific packing, so this path is best-effort only.
|
|
210
|
-
let convInput = activated;
|
|
211
|
-
if (convKernel && options.conv2d && options.conv2d.enabled === true) {
|
|
212
|
-
const convTensorInput = createTensor(activated.buffer, activated.dtype, [
|
|
213
|
-
options.conv2d.inChannels,
|
|
214
|
-
options.conv2d.height,
|
|
215
|
-
options.conv2d.width,
|
|
216
|
-
], `${label}.conv_input`);
|
|
217
|
-
const convOptions = {
|
|
218
|
-
inChannels: options.conv2d.inChannels,
|
|
219
|
-
outChannels: options.conv2d.outChannels,
|
|
220
|
-
height: options.conv2d.height,
|
|
221
|
-
width: options.conv2d.width,
|
|
222
|
-
kernelH: options.conv2d.kernelH,
|
|
223
|
-
kernelW: options.conv2d.kernelW,
|
|
224
|
-
stride: options.conv2d.stride ?? 1,
|
|
225
|
-
pad: options.conv2d.pad ?? 0,
|
|
226
|
-
};
|
|
227
|
-
const convResult = recorder
|
|
228
|
-
? await recordConv2D(recorder, convTensorInput, convKernel, null, convOptions)
|
|
229
|
-
: await runConv2D(convTensorInput, convKernel, null, convOptions);
|
|
230
|
-
convInput = createTensor(
|
|
231
|
-
convResult.buffer,
|
|
232
|
-
convResult.dtype,
|
|
233
|
-
[numTokens, hiddenSize],
|
|
234
|
-
`${label}.conv_output`
|
|
178
|
+
let inProj = null;
|
|
179
|
+
let activated = null;
|
|
180
|
+
let convInput = null;
|
|
181
|
+
let outProj = null;
|
|
182
|
+
try {
|
|
183
|
+
inProj = await doMatmul(
|
|
184
|
+
inputTensor,
|
|
185
|
+
convInProj,
|
|
186
|
+
numTokens,
|
|
187
|
+
hiddenSize * 2,
|
|
188
|
+
hiddenSize,
|
|
189
|
+
{
|
|
190
|
+
transposeB: 'auto',
|
|
191
|
+
label: `${label}.in_proj`,
|
|
192
|
+
layerIdx,
|
|
193
|
+
kernelPath,
|
|
194
|
+
role: 'conv_in_proj',
|
|
195
|
+
},
|
|
196
|
+
recorder
|
|
235
197
|
);
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
198
|
+
activated = await doSiLURowSplit(inProj, {
|
|
199
|
+
numTokens,
|
|
200
|
+
dim: hiddenSize,
|
|
201
|
+
activation: 'silu',
|
|
202
|
+
swigluLimit: options.swigluLimit ?? null,
|
|
203
|
+
label: `${label}.activation`,
|
|
204
|
+
layerIdx,
|
|
205
|
+
}, recorder);
|
|
206
|
+
|
|
207
|
+
releaseOrTrack(recorder, inProj.buffer);
|
|
208
|
+
inProj = null;
|
|
209
|
+
|
|
210
|
+
convInput = activated;
|
|
211
|
+
if (convKernel && options.conv2d && options.conv2d.enabled === true) {
|
|
212
|
+
const convTensorInput = createTensor(activated.buffer, activated.dtype, [
|
|
213
|
+
options.conv2d.inChannels,
|
|
214
|
+
options.conv2d.height,
|
|
215
|
+
options.conv2d.width,
|
|
216
|
+
], `${label}.conv_input`);
|
|
217
|
+
const convOptions = {
|
|
218
|
+
inChannels: options.conv2d.inChannels,
|
|
219
|
+
outChannels: options.conv2d.outChannels,
|
|
220
|
+
height: options.conv2d.height,
|
|
221
|
+
width: options.conv2d.width,
|
|
222
|
+
kernelH: options.conv2d.kernelH,
|
|
223
|
+
kernelW: options.conv2d.kernelW,
|
|
224
|
+
stride: options.conv2d.stride ?? 1,
|
|
225
|
+
pad: options.conv2d.pad ?? 0,
|
|
226
|
+
};
|
|
227
|
+
const convResult = recorder
|
|
228
|
+
? await recordConv2D(recorder, convTensorInput, convKernel, null, convOptions)
|
|
229
|
+
: await runConv2D(convTensorInput, convKernel, null, convOptions);
|
|
230
|
+
convInput = createTensor(
|
|
231
|
+
convResult.buffer,
|
|
232
|
+
convResult.dtype,
|
|
233
|
+
[numTokens, hiddenSize],
|
|
234
|
+
`${label}.conv_output`
|
|
235
|
+
);
|
|
236
|
+
releaseOrTrack(recorder, activated.buffer);
|
|
237
|
+
activated = null;
|
|
240
238
|
}
|
|
241
|
-
}
|
|
242
239
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
240
|
+
outProj = await doMatmul(
|
|
241
|
+
convInput,
|
|
242
|
+
convOutProj,
|
|
243
|
+
numTokens,
|
|
244
|
+
hiddenSize,
|
|
245
|
+
hiddenSize,
|
|
246
|
+
{
|
|
247
|
+
transposeB: 'auto',
|
|
248
|
+
label: `${label}.out_proj`,
|
|
249
|
+
layerIdx,
|
|
250
|
+
kernelPath,
|
|
251
|
+
role: 'conv_out_proj',
|
|
252
|
+
},
|
|
253
|
+
recorder
|
|
254
|
+
);
|
|
258
255
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
} else {
|
|
263
|
-
|
|
256
|
+
if (convInput && (!activated || convInput.buffer !== activated.buffer)) {
|
|
257
|
+
releaseOrTrack(recorder, convInput.buffer);
|
|
258
|
+
convInput = null;
|
|
259
|
+
} else if (activated) {
|
|
260
|
+
releaseOrTrack(recorder, activated.buffer);
|
|
261
|
+
activated = null;
|
|
264
262
|
}
|
|
265
|
-
} else if (recorder) {
|
|
266
|
-
recorder.trackTemporaryBuffer(activated.buffer);
|
|
267
|
-
} else {
|
|
268
|
-
releaseBuffer(activated.buffer);
|
|
269
|
-
}
|
|
270
263
|
|
|
271
|
-
|
|
272
|
-
|
|
264
|
+
if (kernelTrace.enabled && !recorder) {
|
|
265
|
+
await traceStep('conv', label, layerIdx, outProj.buffer, [numTokens, hiddenSize]);
|
|
266
|
+
}
|
|
267
|
+
return outProj;
|
|
268
|
+
} catch (error) {
|
|
269
|
+
if (outProj) releaseOrTrack(recorder, outProj.buffer);
|
|
270
|
+
if (convInput && (!activated || convInput.buffer !== activated.buffer)) releaseOrTrack(recorder, convInput.buffer);
|
|
271
|
+
if (activated) releaseOrTrack(recorder, activated.buffer);
|
|
272
|
+
if (inProj) releaseOrTrack(recorder, inProj.buffer);
|
|
273
|
+
throw error;
|
|
273
274
|
}
|
|
274
|
-
return outProj;
|
|
275
275
|
}
|
|
276
276
|
|
|
277
277
|
export async function doCast(input, toDtype, recorder) {
|
|
@@ -4,6 +4,7 @@ import { trace } from '../../../debug/index.js';
|
|
|
4
4
|
import { getDevice } from '../../../gpu/device.js';
|
|
5
5
|
import { allowReadback } from '../../../gpu/perf-guards.js';
|
|
6
6
|
import { f16ToF32 } from '../../../loader/dtype-utils.js';
|
|
7
|
+
import { readBufferSlice } from '../../../memory/buffer-pool.js';
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
const STAGE_DEFAULT_CATEGORY = {
|
|
@@ -11,6 +12,11 @@ const STAGE_DEFAULT_CATEGORY = {
|
|
|
11
12
|
// Attention stages (per-layer)
|
|
12
13
|
attn_input: 'attn',
|
|
13
14
|
attn_normed: 'attn',
|
|
15
|
+
linear_qkv_proj: 'attn',
|
|
16
|
+
linear_z_proj: 'attn',
|
|
17
|
+
linear_a_proj: 'attn',
|
|
18
|
+
linear_b_proj: 'attn',
|
|
19
|
+
linear_core_out: 'attn',
|
|
14
20
|
q_proj: 'attn',
|
|
15
21
|
k_proj: 'attn',
|
|
16
22
|
v_proj: 'attn',
|
|
@@ -139,22 +145,16 @@ export async function runProbes(stage, buffer, options) {
|
|
|
139
145
|
const alignedOffset = Math.floor(byteOffset / 4) * 4;
|
|
140
146
|
const offsetWithinRead = byteOffset - alignedOffset;
|
|
141
147
|
const readSize = 4; // Always read 4 bytes (aligned)
|
|
142
|
-
const
|
|
143
|
-
const enc = (device).createCommandEncoder();
|
|
144
|
-
enc.copyBufferToBuffer( (buffer), alignedOffset, staging, 0, readSize);
|
|
145
|
-
(device).queue.submit([enc.finish()]);
|
|
146
|
-
await staging.mapAsync(GPUMapMode.READ);
|
|
148
|
+
const readback = await readBufferSlice(buffer, alignedOffset, readSize);
|
|
147
149
|
let value;
|
|
148
150
|
if (dtype === 'f16') {
|
|
149
151
|
// offsetWithinRead is 0 or 2 for F16 - extract correct u16
|
|
150
|
-
const u16Array = new Uint16Array(
|
|
152
|
+
const u16Array = new Uint16Array(readback);
|
|
151
153
|
const u16Index = offsetWithinRead / 2;
|
|
152
154
|
value = f16ToF32(u16Array[u16Index]);
|
|
153
155
|
} else {
|
|
154
|
-
value = new Float32Array(
|
|
156
|
+
value = new Float32Array(readback)[0];
|
|
155
157
|
}
|
|
156
|
-
staging.unmap();
|
|
157
|
-
staging.destroy();
|
|
158
158
|
values.push(`${dimIdx}=${value.toFixed(4)}`);
|
|
159
159
|
}
|
|
160
160
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
|
|
2
2
|
|
|
3
3
|
import { getDevice } from '../../../gpu/device.js';
|
|
4
|
-
import { acquireBuffer } from '../../../memory/buffer-pool.js';
|
|
4
|
+
import { acquireBuffer, releaseBuffer } from '../../../memory/buffer-pool.js';
|
|
5
5
|
import { log } from '../../../debug/index.js';
|
|
6
6
|
import { isWeightBuffer, isCpuWeightBuffer, tagBufferDtype } from '../../../gpu/weight-buffer.js';
|
|
7
7
|
|
|
@@ -53,9 +53,14 @@ export function getWeightBuffer(weight, label) {
|
|
|
53
53
|
}
|
|
54
54
|
|
|
55
55
|
const buf = acquireBuffer(data.byteLength, undefined, label);
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
56
|
+
try {
|
|
57
|
+
device.queue.writeBuffer(buf, 0, ( (data)));
|
|
58
|
+
tagBufferDtype(buf, bufferDtype);
|
|
59
|
+
return buf;
|
|
60
|
+
} catch (error) {
|
|
61
|
+
releaseBuffer(buf);
|
|
62
|
+
throw error;
|
|
63
|
+
}
|
|
59
64
|
}
|
|
60
65
|
|
|
61
66
|
|
|
@@ -92,9 +97,14 @@ export function getNormWeightBuffer(weight, label, config, debugFlags) {
|
|
|
92
97
|
}
|
|
93
98
|
|
|
94
99
|
const buf = acquireBuffer(data.byteLength, undefined, label);
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
100
|
+
try {
|
|
101
|
+
device.queue.writeBuffer(buf, 0, ( (data)));
|
|
102
|
+
tagBufferDtype(buf, 'f32');
|
|
103
|
+
return buf;
|
|
104
|
+
} catch (error) {
|
|
105
|
+
releaseBuffer(buf);
|
|
106
|
+
throw error;
|
|
107
|
+
}
|
|
98
108
|
}
|
|
99
109
|
|
|
100
110
|
|
|
@@ -6,7 +6,7 @@ import { configurePerfGuards } from '../../gpu/perf-guards.js';
|
|
|
6
6
|
import { MoERouter } from '../moe-router.js';
|
|
7
7
|
import { DecodeBufferManager } from '../decode-buffers.js';
|
|
8
8
|
import { DecodeRing } from '../decode-ring.js';
|
|
9
|
-
import { applyPipelineContexts } from './context.js';
|
|
9
|
+
import { applyPipelineContexts, restorePipelineContexts } from './context.js';
|
|
10
10
|
import { createInitializedPipeline } from './factory.js';
|
|
11
11
|
|
|
12
12
|
// Pipeline sub-modules
|
|
@@ -44,6 +44,11 @@ import { getDopplerLoader } from '../../loader/doppler-loader.js';
|
|
|
44
44
|
import { registerPipeline, getPipelineFactory } from './registry.js';
|
|
45
45
|
import { selectRuleValue } from '../../rules/rule-registry.js';
|
|
46
46
|
|
|
47
|
+
function destroyMoERouter(router) {
|
|
48
|
+
if (router && typeof router.destroy === 'function') {
|
|
49
|
+
router.destroy();
|
|
50
|
+
}
|
|
51
|
+
}
|
|
47
52
|
|
|
48
53
|
|
|
49
54
|
// ============================================================================
|
|
@@ -102,6 +107,8 @@ export class InferencePipeline extends PipelineState {
|
|
|
102
107
|
this.manifest = manifest;
|
|
103
108
|
this.decodeRing?.release();
|
|
104
109
|
this.linearAttentionRuntime = resetLinearAttentionRuntime(this.linearAttentionRuntime);
|
|
110
|
+
destroyMoERouter(this.moeRouter);
|
|
111
|
+
this.moeRouter = null;
|
|
105
112
|
|
|
106
113
|
const executionV0Runtime = applyExecutionV0RuntimeConfig({
|
|
107
114
|
runtimeConfig: this.runtimeConfig,
|
|
@@ -299,9 +306,13 @@ export class InferencePipeline extends PipelineState {
|
|
|
299
306
|
const maxSeqLen = config.maxSeqLen;
|
|
300
307
|
const ropeBuffers = await initRoPEFrequencies({
|
|
301
308
|
headDim: config.headDim,
|
|
309
|
+
rotaryDim: config.ropeRotaryDim,
|
|
302
310
|
maxSeqLen,
|
|
303
311
|
ropeTheta: config.ropeTheta,
|
|
304
312
|
ropeLocalTheta: config.ropeLocalTheta,
|
|
313
|
+
mropeInterleaved: config.ropeInterleaved,
|
|
314
|
+
mropeSection: config.mropeSection,
|
|
315
|
+
partialRotaryFactor: config.partialRotaryFactor,
|
|
305
316
|
ropeScale: config.ropeScale,
|
|
306
317
|
ropeLocalScale: config.ropeLocalScale,
|
|
307
318
|
ropeScalingType: config.ropeScalingType,
|
|
@@ -486,12 +497,15 @@ export class InferencePipeline extends PipelineState {
|
|
|
486
497
|
this.expertWeights.clear();
|
|
487
498
|
this.linearAttentionRuntime = resetLinearAttentionRuntime(this.linearAttentionRuntime);
|
|
488
499
|
this.lora = null;
|
|
500
|
+
destroyMoERouter(this.moeRouter);
|
|
501
|
+
this.moeRouter = null;
|
|
489
502
|
if (this.finitenessBuffer) {
|
|
490
503
|
this.finitenessBuffer.destroy();
|
|
491
504
|
this.finitenessBuffer = null;
|
|
492
505
|
}
|
|
493
506
|
this.isLoaded = false;
|
|
494
507
|
this.currentSeqLen = 0;
|
|
508
|
+
restorePipelineContexts(this);
|
|
495
509
|
log.info('Pipeline', 'Unloaded');
|
|
496
510
|
}
|
|
497
511
|
|
|
@@ -529,6 +543,8 @@ export class InferencePipeline extends PipelineState {
|
|
|
529
543
|
releaseGPUResources() {
|
|
530
544
|
this.decodeBuffers?.release();
|
|
531
545
|
this.decodeRing?.release();
|
|
546
|
+
destroyMoERouter(this.moeRouter);
|
|
547
|
+
this.moeRouter = null;
|
|
532
548
|
if (this.finitenessBuffer) {
|
|
533
549
|
this.finitenessBuffer.destroy();
|
|
534
550
|
this.finitenessBuffer = null;
|
|
@@ -66,8 +66,8 @@ export interface SpeculativeConfig {
|
|
|
66
66
|
enableTreeDraft: boolean;
|
|
67
67
|
/** Temperature for draft sampling */
|
|
68
68
|
temperature: number;
|
|
69
|
-
/**
|
|
70
|
-
randomSeed
|
|
69
|
+
/** Deterministic seed for speculative sampling */
|
|
70
|
+
randomSeed: number;
|
|
71
71
|
}
|
|
72
72
|
|
|
73
73
|
/**
|
|
@@ -10,22 +10,6 @@ function createRng(seed) {
|
|
|
10
10
|
};
|
|
11
11
|
}
|
|
12
12
|
|
|
13
|
-
function createUnseededRng() {
|
|
14
|
-
let fallbackState = ((Date.now() >>> 0) ^ 0xa341316c) >>> 0;
|
|
15
|
-
return () => {
|
|
16
|
-
const cryptoApi = typeof globalThis !== 'undefined' ? globalThis.crypto : null;
|
|
17
|
-
if (cryptoApi && typeof cryptoApi.getRandomValues === 'function') {
|
|
18
|
-
const random = new Uint32Array(1);
|
|
19
|
-
cryptoApi.getRandomValues(random);
|
|
20
|
-
return random[0] / 4294967296;
|
|
21
|
-
}
|
|
22
|
-
fallbackState = (fallbackState + 0x6d2b79f5) | 0;
|
|
23
|
-
let t = Math.imul(fallbackState ^ (fallbackState >>> 15), 1 | fallbackState);
|
|
24
|
-
t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t;
|
|
25
|
-
return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
|
|
26
|
-
};
|
|
27
|
-
}
|
|
28
|
-
|
|
29
13
|
function coerceLogitsVector(value, label) {
|
|
30
14
|
if (value instanceof Float32Array) {
|
|
31
15
|
if (value.length === 0) {
|
|
@@ -110,6 +94,9 @@ export class SpeculativeDecoder {
|
|
|
110
94
|
if (config.temperature == null) {
|
|
111
95
|
throw new Error('SpeculativeDecoder requires temperature.');
|
|
112
96
|
}
|
|
97
|
+
if (!Number.isFinite(config.randomSeed)) {
|
|
98
|
+
throw new Error('SpeculativeDecoder requires randomSeed.');
|
|
99
|
+
}
|
|
113
100
|
|
|
114
101
|
assertTemperature(config.temperature, 'temperature');
|
|
115
102
|
this.numDraftTokens = config.numDraftTokens;
|
|
@@ -117,8 +104,7 @@ export class SpeculativeDecoder {
|
|
|
117
104
|
this.enableTreeDraft = config.enableTreeDraft;
|
|
118
105
|
this.temperature = config.temperature;
|
|
119
106
|
|
|
120
|
-
|
|
121
|
-
this.random = seed === null ? createUnseededRng() : createRng(seed);
|
|
107
|
+
this.random = createRng(Math.floor(config.randomSeed));
|
|
122
108
|
}
|
|
123
109
|
|
|
124
110
|
setDraftModel(model) {
|
|
@@ -74,7 +74,7 @@ export interface InitializeResult {
|
|
|
74
74
|
/**
|
|
75
75
|
* Discover available models from the catalog.json endpoint.
|
|
76
76
|
*
|
|
77
|
-
* @param fallbackModels -
|
|
77
|
+
* @param fallbackModels - Explicit fallback models to use when catalog fetch is unavailable
|
|
78
78
|
* @returns Array of model info objects
|
|
79
79
|
*/
|
|
80
80
|
export declare function discoverModels(
|
|
@@ -25,7 +25,7 @@ import {
|
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
export async function discoverModels(
|
|
28
|
-
fallbackModels
|
|
28
|
+
fallbackModels
|
|
29
29
|
) {
|
|
30
30
|
try {
|
|
31
31
|
const resp = await fetch('/models/catalog.json');
|
|
@@ -40,10 +40,13 @@ export async function discoverModels(
|
|
|
40
40
|
}));
|
|
41
41
|
}
|
|
42
42
|
}
|
|
43
|
-
} catch (e) {
|
|
44
|
-
|
|
43
|
+
} catch (e) {}
|
|
44
|
+
|
|
45
|
+
if (Array.isArray(fallbackModels) && fallbackModels.length > 0) {
|
|
46
|
+
return fallbackModels.map((id) => ({ id, name: id }));
|
|
45
47
|
}
|
|
46
|
-
|
|
48
|
+
|
|
49
|
+
throw new Error('discoverModels: failed to fetch /models/catalog.json and no explicit fallback model list was provided.');
|
|
47
50
|
}
|
|
48
51
|
|
|
49
52
|
// ============================================================================
|
|
@@ -238,7 +241,13 @@ export async function initializeInference(modelUrl, options = {}) {
|
|
|
238
241
|
onProgress('hotswap', 0.05, 'Loading hot-swap manifest...');
|
|
239
242
|
log(`Hot-swap: loading manifest ${hotSwapConfig.manifestUrl}`);
|
|
240
243
|
const hotSwapManifest = await fetchHotSwapManifest(hotSwapConfig.manifestUrl);
|
|
241
|
-
const verification = await verifyHotSwapManifest(hotSwapManifest, hotSwapConfig
|
|
244
|
+
const verification = await verifyHotSwapManifest(hotSwapManifest, hotSwapConfig, {
|
|
245
|
+
source: {
|
|
246
|
+
kind: 'remote',
|
|
247
|
+
isLocal: false,
|
|
248
|
+
url: hotSwapConfig.manifestUrl,
|
|
249
|
+
},
|
|
250
|
+
});
|
|
242
251
|
if (!verification.ok) {
|
|
243
252
|
throw new Error(`Hot-swap manifest rejected: ${verification.reason}`);
|
|
244
253
|
}
|
|
@@ -309,6 +318,7 @@ export async function initializeInference(modelUrl, options = {}) {
|
|
|
309
318
|
const pipeline = await createPipeline( ( (manifest)), {
|
|
310
319
|
storage: { loadShard },
|
|
311
320
|
gpu: { device },
|
|
321
|
+
runtime,
|
|
312
322
|
baseUrl: modelUrl,
|
|
313
323
|
onProgress: ( progress) => {
|
|
314
324
|
const pct = 0.2 + progress.percent * 0.8;
|
|
@@ -46,11 +46,6 @@ export declare class Tokenizer {
|
|
|
46
46
|
*/
|
|
47
47
|
initialize(manifest: ModelManifest, options?: TokenizerInitOptions): Promise<void>;
|
|
48
48
|
|
|
49
|
-
/**
|
|
50
|
-
* Infer HuggingFace model ID from manifest architecture
|
|
51
|
-
*/
|
|
52
|
-
private _inferHuggingFaceModel(manifest: ModelManifest): string | null;
|
|
53
|
-
|
|
54
49
|
/**
|
|
55
50
|
* Encode text to token IDs
|
|
56
51
|
*/
|
|
@@ -130,14 +130,12 @@ export class Tokenizer {
|
|
|
130
130
|
);
|
|
131
131
|
}
|
|
132
132
|
|
|
133
|
-
let hfModel = tokenizerConfig.hfModel;
|
|
133
|
+
let hfModel = tokenizerConfig.hfModel ?? tokenizerConfig.modelId ?? null;
|
|
134
134
|
const allowArchFallback = tokenizerConfig.allowArchFallback === true;
|
|
135
135
|
if (allowArchFallback && !hfModel) {
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
log.warn('Tokenizer', `Using inferred HuggingFace model: ${inferred}`);
|
|
140
|
-
}
|
|
136
|
+
throw new Error(
|
|
137
|
+
`[Tokenizer] tokenizer.allowArchFallback requires explicit tokenizer.hfModel or tokenizer.modelId for model "${modelId}".`
|
|
138
|
+
);
|
|
141
139
|
}
|
|
142
140
|
|
|
143
141
|
if (hfModel) {
|
|
@@ -212,23 +210,6 @@ export class Tokenizer {
|
|
|
212
210
|
|
|
213
211
|
this.config = tokenizerConfig;
|
|
214
212
|
}
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
_inferHuggingFaceModel(manifest) {
|
|
218
|
-
const tokenizer = manifest?.tokenizer ?? {};
|
|
219
|
-
if (typeof tokenizer.modelId === 'string' && tokenizer.modelId.length > 0) {
|
|
220
|
-
return tokenizer.modelId;
|
|
221
|
-
}
|
|
222
|
-
if (typeof tokenizer.hfModel === 'string' && tokenizer.hfModel.length > 0) {
|
|
223
|
-
return tokenizer.hfModel;
|
|
224
|
-
}
|
|
225
|
-
if (typeof manifest?.modelId === 'string' && manifest.modelId.length > 0) {
|
|
226
|
-
return manifest.modelId;
|
|
227
|
-
}
|
|
228
|
-
return null;
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
|
|
232
213
|
encode(text) {
|
|
233
214
|
if (!this.backend) {
|
|
234
215
|
throw new Error('Tokenizer not initialized');
|
|
@@ -21,8 +21,17 @@ export class BPETokenizer extends BaseTokenizer {
|
|
|
21
21
|
});
|
|
22
22
|
}
|
|
23
23
|
|
|
24
|
+
#resetState() {
|
|
25
|
+
this.#vocab.clear();
|
|
26
|
+
this.#reverseVocab.clear();
|
|
27
|
+
this.#merges = [];
|
|
28
|
+
this.#mergeRanks.clear();
|
|
29
|
+
this.vocabSize = 0;
|
|
30
|
+
}
|
|
31
|
+
|
|
24
32
|
|
|
25
33
|
load(vocab, merges) {
|
|
34
|
+
this.#resetState();
|
|
26
35
|
// Build vocab maps
|
|
27
36
|
for (const [token, id] of Object.entries(vocab)) {
|
|
28
37
|
this.#vocab.set(token, id);
|