@simulatte/doppler 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -10
- package/package.json +30 -6
- package/src/client/doppler-api.browser.d.ts +1 -0
- package/src/client/doppler-api.browser.js +288 -0
- package/src/client/doppler-api.js +1 -1
- package/src/client/doppler-provider/types.js +1 -1
- package/src/config/execution-contract-check.d.ts +33 -0
- package/src/config/execution-contract-check.js +72 -0
- package/src/config/execution-v0-contract-check.d.ts +94 -0
- package/src/config/execution-v0-contract-check.js +251 -0
- package/src/config/execution-v0-graph-contract-check.d.ts +20 -0
- package/src/config/execution-v0-graph-contract-check.js +64 -0
- package/src/config/kernel-path-contract-check.d.ts +76 -0
- package/src/config/kernel-path-contract-check.js +479 -0
- package/src/config/kernel-path-loader.d.ts +16 -0
- package/src/config/kernel-path-loader.js +54 -0
- package/src/config/kernels/kernel-ref-digests.js +39 -27
- package/src/config/kernels/registry.json +598 -2
- package/src/config/loader.js +81 -48
- package/src/config/merge-contract-check.d.ts +16 -0
- package/src/config/merge-contract-check.js +321 -0
- package/src/config/merge-helpers.d.ts +58 -0
- package/src/config/merge-helpers.js +54 -0
- package/src/config/merge.js +21 -6
- package/src/config/presets/models/janus-text.json +2 -0
- package/src/config/presets/models/qwen3.json +9 -2
- package/src/config/presets/models/transformer.json +5 -0
- package/src/config/quantization-contract-check.d.ts +12 -0
- package/src/config/quantization-contract-check.js +91 -0
- package/src/config/required-inference-fields-contract-check.d.ts +24 -0
- package/src/config/required-inference-fields-contract-check.js +237 -0
- package/src/config/schema/browser-suite-metrics.schema.d.ts +17 -0
- package/src/config/schema/browser-suite-metrics.schema.js +46 -0
- package/src/config/schema/conversion-report.schema.d.ts +40 -0
- package/src/config/schema/conversion-report.schema.js +108 -0
- package/src/config/schema/doppler.schema.js +12 -18
- package/src/config/schema/index.d.ts +22 -0
- package/src/config/schema/index.js +18 -0
- package/src/config/schema/inference-defaults.schema.js +3 -0
- package/src/config/schema/inference.schema.d.ts +9 -0
- package/src/config/schema/kernel-path.schema.d.ts +6 -0
- package/src/config/schema/manifest.schema.d.ts +6 -0
- package/src/config/schema/manifest.schema.js +3 -0
- package/src/converter/core.d.ts +10 -0
- package/src/converter/core.js +27 -2
- package/src/converter/parsers/diffusion.js +63 -3
- package/src/converter/rope-config.js +42 -0
- package/src/gpu/device.js +58 -0
- package/src/gpu/kernels/attention.js +98 -0
- package/src/gpu/kernels/bias_add.wgsl +8 -6
- package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
- package/src/gpu/kernels/conv2d.js +1 -1
- package/src/gpu/kernels/conv2d.wgsl +7 -8
- package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
- package/src/gpu/kernels/depthwise_conv2d.d.ts +29 -0
- package/src/gpu/kernels/depthwise_conv2d.js +99 -0
- package/src/gpu/kernels/depthwise_conv2d.wgsl +55 -0
- package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +59 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d.d.ts +27 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d.js +93 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +44 -0
- package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +48 -0
- package/src/gpu/kernels/index.d.ts +30 -0
- package/src/gpu/kernels/index.js +25 -0
- package/src/gpu/kernels/matmul.js +25 -0
- package/src/gpu/kernels/pixel_shuffle.js +1 -1
- package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
- package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
- package/src/gpu/kernels/relu.d.ts +18 -0
- package/src/gpu/kernels/relu.js +58 -0
- package/src/gpu/kernels/relu.wgsl +22 -0
- package/src/gpu/kernels/relu_f16.wgsl +24 -0
- package/src/gpu/kernels/repeat_channels.d.ts +21 -0
- package/src/gpu/kernels/repeat_channels.js +60 -0
- package/src/gpu/kernels/repeat_channels.wgsl +28 -0
- package/src/gpu/kernels/repeat_channels_f16.wgsl +30 -0
- package/src/gpu/kernels/residual.js +44 -8
- package/src/gpu/kernels/residual.wgsl +6 -3
- package/src/gpu/kernels/residual_f16.wgsl +2 -1
- package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
- package/src/gpu/kernels/residual_vec4.wgsl +2 -1
- package/src/gpu/kernels/rmsnorm.js +58 -6
- package/src/gpu/kernels/rmsnorm.wgsl +14 -6
- package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
- package/src/gpu/kernels/rope.d.ts +2 -0
- package/src/gpu/kernels/rope.js +11 -1
- package/src/gpu/kernels/rope.wgsl +56 -40
- package/src/gpu/kernels/sana_linear_attention.d.ts +27 -0
- package/src/gpu/kernels/sana_linear_attention.js +121 -0
- package/src/gpu/kernels/sana_linear_attention_apply.wgsl +43 -0
- package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +46 -0
- package/src/gpu/kernels/sana_linear_attention_summary.wgsl +51 -0
- package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +53 -0
- package/src/gpu/kernels/silu.d.ts +1 -0
- package/src/gpu/kernels/silu.js +32 -14
- package/src/gpu/kernels/silu.wgsl +19 -9
- package/src/gpu/kernels/silu_f16.wgsl +19 -9
- package/src/gpu/kernels/transpose.js +15 -2
- package/src/gpu/kernels/transpose.wgsl +5 -6
- package/src/gpu/kernels/upsample2d.js +2 -1
- package/src/gpu/kernels/upsample2d.wgsl +6 -9
- package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
- package/src/gpu/kernels/utils.js +16 -1
- package/src/index-browser.d.ts +1 -1
- package/src/index-browser.js +2 -2
- package/src/index.js +1 -1
- package/src/inference/browser-harness.js +109 -23
- package/src/inference/pipelines/diffusion/init.js +14 -0
- package/src/inference/pipelines/diffusion/pipeline.js +215 -77
- package/src/inference/pipelines/diffusion/sana-transformer.d.ts +53 -0
- package/src/inference/pipelines/diffusion/sana-transformer.js +738 -0
- package/src/inference/pipelines/diffusion/scheduler.d.ts +17 -1
- package/src/inference/pipelines/diffusion/scheduler.js +91 -3
- package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +11 -4
- package/src/inference/pipelines/diffusion/text-encoder-gpu.js +282 -0
- package/src/inference/pipelines/diffusion/text-encoder.js +18 -1
- package/src/inference/pipelines/diffusion/types.d.ts +4 -0
- package/src/inference/pipelines/diffusion/vae.js +782 -78
- package/src/inference/pipelines/text/attention/record.js +11 -2
- package/src/inference/pipelines/text/attention/run.js +11 -2
- package/src/inference/pipelines/text/chat-format.js +25 -1
- package/src/inference/pipelines/text/config.d.ts +9 -0
- package/src/inference/pipelines/text/config.js +69 -2
- package/src/inference/pipelines/text/execution-plan.js +23 -31
- package/src/inference/pipelines/text/execution-v0.js +43 -95
- package/src/inference/pipelines/text/ffn/standard.js +3 -0
- package/src/inference/pipelines/text/init.d.ts +4 -0
- package/src/inference/pipelines/text/init.js +56 -9
- package/src/inference/pipelines/text/layer.js +11 -0
- package/src/inference/pipelines/text.js +4 -0
- package/src/inference/tokenizers/bundled.js +156 -33
- package/src/rules/execution-rules-contract-check.d.ts +17 -0
- package/src/rules/execution-rules-contract-check.js +245 -0
- package/src/rules/kernels/depthwise-conv2d.rules.json +6 -0
- package/src/rules/kernels/grouped-pointwise-conv2d.rules.json +6 -0
- package/src/rules/kernels/relu.rules.json +6 -0
- package/src/rules/kernels/repeat-channels.rules.json +6 -0
- package/src/rules/kernels/sana-linear-attention.rules.json +6 -0
- package/src/rules/layer-pattern-contract-check.d.ts +17 -0
- package/src/rules/layer-pattern-contract-check.js +231 -0
- package/src/rules/rule-registry.d.ts +28 -0
- package/src/rules/rule-registry.js +38 -0
- package/src/rules/tooling/command-runtime.rules.json +18 -0
- package/src/tooling/command-api.d.ts +27 -1
- package/src/tooling/command-api.js +142 -3
- package/src/tooling/conversion-config-materializer.d.ts +24 -0
- package/src/tooling/conversion-config-materializer.js +99 -0
- package/src/tooling/lean-execution-contract-runner.d.ts +43 -0
- package/src/tooling/lean-execution-contract-runner.js +158 -0
- package/src/tooling/node-browser-command-runner.d.ts +4 -0
- package/src/tooling/node-browser-command-runner.js +58 -3
- package/src/tooling/node-command-runner.js +15 -0
- package/src/tooling/node-convert.d.ts +10 -0
- package/src/tooling/node-converter.js +59 -0
- package/src/tooling/node-webgpu.js +11 -89
- package/src/training/checkpoint-watch.d.ts +7 -0
- package/src/training/checkpoint-watch.js +106 -0
- package/src/training/checkpoint.d.ts +6 -1
- package/src/training/checkpoint.js +12 -2
- package/src/training/distillation/artifacts.d.ts +71 -0
- package/src/training/distillation/artifacts.js +132 -0
- package/src/training/distillation/checkpoint-watch.d.ts +10 -0
- package/src/training/distillation/checkpoint-watch.js +57 -0
- package/src/training/distillation/dataset.d.ts +59 -0
- package/src/training/distillation/dataset.js +337 -0
- package/src/training/distillation/eval.d.ts +34 -0
- package/src/training/distillation/eval.js +310 -0
- package/src/training/distillation/index.d.ts +29 -0
- package/src/training/distillation/index.js +29 -0
- package/src/training/distillation/runtime.d.ts +20 -0
- package/src/training/distillation/runtime.js +121 -0
- package/src/training/distillation/scoreboard.d.ts +6 -0
- package/src/training/distillation/scoreboard.js +8 -0
- package/src/training/distillation/stage-a.d.ts +45 -0
- package/src/training/distillation/stage-a.js +338 -0
- package/src/training/distillation/stage-b.d.ts +24 -0
- package/src/training/distillation/stage-b.js +20 -0
- package/src/training/index.d.ts +10 -0
- package/src/training/index.js +10 -0
- package/src/training/lora-pipeline.d.ts +40 -0
- package/src/training/lora-pipeline.js +796 -0
- package/src/training/operator-artifacts.d.ts +62 -0
- package/src/training/operator-artifacts.js +140 -0
- package/src/training/operator-command.d.ts +5 -0
- package/src/training/operator-command.js +453 -0
- package/src/training/operator-eval.d.ts +48 -0
- package/src/training/operator-eval.js +230 -0
- package/src/training/operator-scoreboard.d.ts +5 -0
- package/src/training/operator-scoreboard.js +44 -0
- package/src/training/runner.d.ts +52 -0
- package/src/training/runner.js +29 -4
- package/src/training/suite.d.ts +112 -0
- package/src/training/suite.js +9 -9
- package/src/training/workloads.d.ts +164 -0
- package/src/training/workloads.js +539 -0
- package/src/version.d.ts +2 -0
- package/src/version.js +2 -0
- package/tools/convert-safetensors-node.js +47 -0
- package/tools/doppler-cli.js +252 -41
|
@@ -259,6 +259,8 @@ export async function processLayerGPU(layerIdx, inputBuffer, numTokens, isPrefil
|
|
|
259
259
|
attentionOutputGate: config.attentionOutputGate,
|
|
260
260
|
causalAttention: config.causalAttention,
|
|
261
261
|
rmsNormWeightOffset: config.rmsNormWeightOffset,
|
|
262
|
+
ropeRotaryDim: config.ropeRotaryDim,
|
|
263
|
+
ropeInterleaved: config.ropeInterleaved,
|
|
262
264
|
tokenIds: context.currentTokenIds ?? null,
|
|
263
265
|
kernelPath: context.kernelPath ?? null,
|
|
264
266
|
disableRoPE,
|
|
@@ -661,6 +663,8 @@ async function processLayerPlanGPU(layerIdx, inputBuffer, numTokens, isPrefill,
|
|
|
661
663
|
attentionOutputGate: config.attentionOutputGate,
|
|
662
664
|
causalAttention: config.causalAttention,
|
|
663
665
|
rmsNormWeightOffset: config.rmsNormWeightOffset,
|
|
666
|
+
ropeRotaryDim: config.ropeRotaryDim,
|
|
667
|
+
ropeInterleaved: config.ropeInterleaved,
|
|
664
668
|
tokenIds: context.currentTokenIds ?? null,
|
|
665
669
|
skipInputNorm: step.skipInputNorm === true,
|
|
666
670
|
activationDtype,
|
|
@@ -690,6 +694,7 @@ async function processLayerPlanGPU(layerIdx, inputBuffer, numTokens, isPrefill,
|
|
|
690
694
|
hiddenSize,
|
|
691
695
|
probes: context.debugProbes,
|
|
692
696
|
recorder,
|
|
697
|
+
dtype: outputDtype,
|
|
693
698
|
});
|
|
694
699
|
}
|
|
695
700
|
break;
|
|
@@ -733,6 +738,7 @@ async function processLayerPlanGPU(layerIdx, inputBuffer, numTokens, isPrefill,
|
|
|
733
738
|
hiddenSize,
|
|
734
739
|
probes: context.debugProbes,
|
|
735
740
|
recorder,
|
|
741
|
+
dtype: outputDtype,
|
|
736
742
|
});
|
|
737
743
|
}
|
|
738
744
|
break;
|
|
@@ -767,6 +773,7 @@ async function processLayerPlanGPU(layerIdx, inputBuffer, numTokens, isPrefill,
|
|
|
767
773
|
hiddenSize,
|
|
768
774
|
probes: context.debugProbes,
|
|
769
775
|
recorder,
|
|
776
|
+
dtype: outputDtype,
|
|
770
777
|
});
|
|
771
778
|
}
|
|
772
779
|
break;
|
|
@@ -801,6 +808,7 @@ async function processLayerPlanGPU(layerIdx, inputBuffer, numTokens, isPrefill,
|
|
|
801
808
|
hiddenSize,
|
|
802
809
|
probes: context.debugProbes,
|
|
803
810
|
recorder,
|
|
811
|
+
dtype: outputDtype,
|
|
804
812
|
});
|
|
805
813
|
}
|
|
806
814
|
break;
|
|
@@ -825,6 +833,7 @@ async function processLayerPlanGPU(layerIdx, inputBuffer, numTokens, isPrefill,
|
|
|
825
833
|
hiddenSize,
|
|
826
834
|
probes: context.debugProbes,
|
|
827
835
|
recorder,
|
|
836
|
+
dtype: outputDtype,
|
|
828
837
|
});
|
|
829
838
|
}
|
|
830
839
|
break;
|
|
@@ -851,6 +860,7 @@ async function processLayerPlanGPU(layerIdx, inputBuffer, numTokens, isPrefill,
|
|
|
851
860
|
hiddenSize,
|
|
852
861
|
probes: context.debugProbes,
|
|
853
862
|
recorder,
|
|
863
|
+
dtype: toDtype,
|
|
854
864
|
});
|
|
855
865
|
}
|
|
856
866
|
break;
|
|
@@ -880,6 +890,7 @@ async function processLayerPlanGPU(layerIdx, inputBuffer, numTokens, isPrefill,
|
|
|
880
890
|
hiddenSize,
|
|
881
891
|
probes: context.debugProbes,
|
|
882
892
|
recorder,
|
|
893
|
+
dtype: getSlotDtype('state') ?? activationDtype,
|
|
883
894
|
});
|
|
884
895
|
|
|
885
896
|
const computeConfig = context.runtimeComputeConfig ?? null;
|
|
@@ -299,9 +299,13 @@ export class InferencePipeline extends PipelineState {
|
|
|
299
299
|
const maxSeqLen = config.maxSeqLen;
|
|
300
300
|
const ropeBuffers = await initRoPEFrequencies({
|
|
301
301
|
headDim: config.headDim,
|
|
302
|
+
rotaryDim: config.ropeRotaryDim,
|
|
302
303
|
maxSeqLen,
|
|
303
304
|
ropeTheta: config.ropeTheta,
|
|
304
305
|
ropeLocalTheta: config.ropeLocalTheta,
|
|
306
|
+
mropeInterleaved: config.ropeInterleaved,
|
|
307
|
+
mropeSection: config.mropeSection,
|
|
308
|
+
partialRotaryFactor: config.partialRotaryFactor,
|
|
305
309
|
ropeScale: config.ropeScale,
|
|
306
310
|
ropeLocalScale: config.ropeLocalScale,
|
|
307
311
|
ropeScalingType: config.ropeScalingType,
|
|
@@ -64,6 +64,68 @@ function resolveSpecialTokens(specialTokensRaw, fallbackTokens, vocab) {
|
|
|
64
64
|
return resolved;
|
|
65
65
|
}
|
|
66
66
|
|
|
67
|
+
function resolveByteLevelPretokenizerConfig(preTokenizer) {
|
|
68
|
+
if (!preTokenizer || typeof preTokenizer !== 'object') {
|
|
69
|
+
return {
|
|
70
|
+
useByteLevel: false,
|
|
71
|
+
addPrefixSpace: null,
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
if (preTokenizer.type === 'ByteLevel') {
|
|
76
|
+
return {
|
|
77
|
+
useByteLevel: true,
|
|
78
|
+
addPrefixSpace: preTokenizer.add_prefix_space === true,
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
if (preTokenizer.type === 'Sequence' && Array.isArray(preTokenizer.pretokenizers)) {
|
|
83
|
+
for (const entry of preTokenizer.pretokenizers) {
|
|
84
|
+
const resolved = resolveByteLevelPretokenizerConfig(entry);
|
|
85
|
+
if (resolved.useByteLevel) {
|
|
86
|
+
return resolved;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
return {
|
|
92
|
+
useByteLevel: false,
|
|
93
|
+
addPrefixSpace: null,
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function registerAddedTokens(addedTokens, vocab, reverseVocab, patterns, specialTokenIds, derivedSpecialTokens = null) {
|
|
98
|
+
let maxId = -1;
|
|
99
|
+
for (const token of addedTokens) {
|
|
100
|
+
const content = token?.content;
|
|
101
|
+
const id = typeof token?.id === 'number' ? token.id : parseInt(token?.id, 10);
|
|
102
|
+
if (!Number.isFinite(id) || !content) continue;
|
|
103
|
+
if (!vocab.has(content)) {
|
|
104
|
+
vocab.set(content, id);
|
|
105
|
+
reverseVocab.set(id, content);
|
|
106
|
+
}
|
|
107
|
+
if (id > maxId) maxId = id;
|
|
108
|
+
if (content.length > 1) {
|
|
109
|
+
patterns.push({ content, id });
|
|
110
|
+
}
|
|
111
|
+
if (token.special) {
|
|
112
|
+
specialTokenIds.add(id);
|
|
113
|
+
if (derivedSpecialTokens) {
|
|
114
|
+
if (derivedSpecialTokens.bos == null && (content === '<bos>' || content === '<s>' || content.includes('bos'))) {
|
|
115
|
+
derivedSpecialTokens.bos = id;
|
|
116
|
+
} else if (derivedSpecialTokens.eos == null && (content === '<eos>' || content === '</s>' || content.includes('eos'))) {
|
|
117
|
+
derivedSpecialTokens.eos = id;
|
|
118
|
+
} else if (derivedSpecialTokens.pad == null && (content === '<pad>' || content.includes('pad'))) {
|
|
119
|
+
derivedSpecialTokens.pad = id;
|
|
120
|
+
} else if (derivedSpecialTokens.unk == null && (content === '<unk>' || content.includes('unk'))) {
|
|
121
|
+
derivedSpecialTokens.unk = id;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
return maxId;
|
|
127
|
+
}
|
|
128
|
+
|
|
67
129
|
|
|
68
130
|
export class TransformersTokenizer extends BaseTokenizer {
|
|
69
131
|
|
|
@@ -156,6 +218,10 @@ export class BundledTokenizer extends BaseTokenizer {
|
|
|
156
218
|
|
|
157
219
|
#byteDecoder = null;
|
|
158
220
|
|
|
221
|
+
#byteEncoder = null;
|
|
222
|
+
|
|
223
|
+
#useByteLevelEncoding = false;
|
|
224
|
+
|
|
159
225
|
|
|
160
226
|
constructor(config = {}) {
|
|
161
227
|
// BundledTokenizer gets vocabSize from load(), so defer validation
|
|
@@ -199,9 +265,20 @@ export class BundledTokenizer extends BaseTokenizer {
|
|
|
199
265
|
}
|
|
200
266
|
|
|
201
267
|
this.#byteDecoder = new Map();
|
|
268
|
+
this.#byteEncoder = new Map();
|
|
202
269
|
for (let i = 0; i < base.length; i++) {
|
|
203
270
|
this.#byteDecoder.set(String.fromCodePoint(chars[i]), base[i]);
|
|
271
|
+
this.#byteEncoder.set(base[i], String.fromCodePoint(chars[i]));
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
#encodeByteLevelText(text) {
|
|
276
|
+
const bytes = new TextEncoder().encode(text);
|
|
277
|
+
let out = '';
|
|
278
|
+
for (const byte of bytes) {
|
|
279
|
+
out += this.#byteEncoder?.get(byte) ?? String.fromCharCode(byte);
|
|
204
280
|
}
|
|
281
|
+
return out;
|
|
205
282
|
}
|
|
206
283
|
|
|
207
284
|
|
|
@@ -290,30 +367,16 @@ export class BundledTokenizer extends BaseTokenizer {
|
|
|
290
367
|
eos: null,
|
|
291
368
|
unk: null,
|
|
292
369
|
};
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
specialTokenIds.add(id);
|
|
304
|
-
if (content.length > 1) {
|
|
305
|
-
specialTokenPatterns.push({ content, id });
|
|
306
|
-
}
|
|
307
|
-
if (derivedSpecialTokens.bos == null && (content === '<bos>' || content === '<s>' || content.includes('bos'))) {
|
|
308
|
-
derivedSpecialTokens.bos = id;
|
|
309
|
-
} else if (derivedSpecialTokens.eos == null && (content === '<eos>' || content === '</s>' || content.includes('eos'))) {
|
|
310
|
-
derivedSpecialTokens.eos = id;
|
|
311
|
-
} else if (derivedSpecialTokens.pad == null && (content === '<pad>' || content.includes('pad'))) {
|
|
312
|
-
derivedSpecialTokens.pad = id;
|
|
313
|
-
} else if (derivedSpecialTokens.unk == null && (content === '<unk>' || content.includes('unk'))) {
|
|
314
|
-
derivedSpecialTokens.unk = id;
|
|
315
|
-
}
|
|
316
|
-
}
|
|
370
|
+
const addedMaxId = registerAddedTokens(
|
|
371
|
+
addedTokens,
|
|
372
|
+
this.#vocab,
|
|
373
|
+
this.#reverseVocab,
|
|
374
|
+
specialTokenPatterns,
|
|
375
|
+
specialTokenIds,
|
|
376
|
+
derivedSpecialTokens
|
|
377
|
+
);
|
|
378
|
+
if (addedMaxId > maxId) {
|
|
379
|
+
maxId = addedMaxId;
|
|
317
380
|
}
|
|
318
381
|
|
|
319
382
|
const specialTokensRaw = hf.special_tokens_map || hf.specialTokens || hf.special_tokens || null;
|
|
@@ -351,6 +414,7 @@ export class BundledTokenizer extends BaseTokenizer {
|
|
|
351
414
|
|
|
352
415
|
// Handle behavior flags (use HF config if present, else runtime defaults)
|
|
353
416
|
const runtimeDefaults = getRuntimeConfig().inference.tokenizer;
|
|
417
|
+
const byteLevelPretokenizer = resolveByteLevelPretokenizerConfig(hf.pre_tokenizer);
|
|
354
418
|
const configuredAddBosToken = this.addBosToken;
|
|
355
419
|
const configuredAddEosToken = this.addEosToken;
|
|
356
420
|
this.addBosToken =
|
|
@@ -378,9 +442,16 @@ export class BundledTokenizer extends BaseTokenizer {
|
|
|
378
442
|
// - runtime config addSpacePrefix (user override or null for auto-detect)
|
|
379
443
|
const decoderPrepend = hf.decoder?.prepend_scheme === 'always' || hf.decoder?.add_prefix_space === true;
|
|
380
444
|
const normalizerPrepend = hf.normalizer?.prepend_scheme === 'always' || hf.normalizer?.add_prefix_space === true;
|
|
445
|
+
this.#useByteLevelEncoding = byteLevelPretokenizer.useByteLevel;
|
|
381
446
|
const runtimeSpacePrefix = runtimeDefaults.addSpacePrefix;
|
|
382
447
|
// Use explicit runtime config if set (non-null), otherwise auto-detect from tokenizer.json
|
|
383
|
-
this.#addSpacePrefix = runtimeSpacePrefix
|
|
448
|
+
this.#addSpacePrefix = runtimeSpacePrefix
|
|
449
|
+
?? byteLevelPretokenizer.addPrefixSpace
|
|
450
|
+
?? model.add_prefix_space
|
|
451
|
+
?? model.add_dummy_prefix
|
|
452
|
+
?? decoderPrepend
|
|
453
|
+
?? normalizerPrepend
|
|
454
|
+
?? false;
|
|
384
455
|
log.debug('Tokenizer', `addSpacePrefix=${this.#addSpacePrefix} (runtime=${runtimeSpacePrefix}, model=${model.add_prefix_space ?? model.add_dummy_prefix}, decoder=${decoderPrepend}, normalizer=${normalizerPrepend})`);
|
|
385
456
|
|
|
386
457
|
// Detect space prefix style by checking which WORD tokens exist in vocab
|
|
@@ -469,11 +540,47 @@ export class BundledTokenizer extends BaseTokenizer {
|
|
|
469
540
|
this.#tokenTypes = tokenizerJson.tokenTypes;
|
|
470
541
|
}
|
|
471
542
|
|
|
543
|
+
let maxId = -1;
|
|
544
|
+
for (const id of this.#vocab.values()) {
|
|
545
|
+
if (Number.isFinite(id) && id > maxId) {
|
|
546
|
+
maxId = id;
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
const addedTokens = Array.isArray(tokenizerJson.added_tokens) ? tokenizerJson.added_tokens : [];
|
|
551
|
+
const tokenPatterns = [];
|
|
552
|
+
const specialTokenIds = new Set();
|
|
553
|
+
const derivedSpecialTokens = {
|
|
554
|
+
pad: null,
|
|
555
|
+
bos: null,
|
|
556
|
+
eos: null,
|
|
557
|
+
unk: null,
|
|
558
|
+
};
|
|
559
|
+
const addedMaxId = registerAddedTokens(
|
|
560
|
+
addedTokens,
|
|
561
|
+
this.#vocab,
|
|
562
|
+
this.#reverseVocab,
|
|
563
|
+
tokenPatterns,
|
|
564
|
+
specialTokenIds,
|
|
565
|
+
derivedSpecialTokens
|
|
566
|
+
);
|
|
567
|
+
if (addedMaxId > maxId) {
|
|
568
|
+
maxId = addedMaxId;
|
|
569
|
+
}
|
|
570
|
+
|
|
472
571
|
// Set special tokens - support both camelCase and snake_case formats
|
|
473
572
|
const specialTokensRaw = (tokenizerJson.specialTokens || (tokenizerJson).special_tokens);
|
|
474
|
-
this.specialTokens = resolveSpecialTokens(
|
|
573
|
+
this.specialTokens = resolveSpecialTokens(
|
|
574
|
+
specialTokensRaw,
|
|
575
|
+
{
|
|
576
|
+
...derivedSpecialTokens,
|
|
577
|
+
...this.specialTokens,
|
|
578
|
+
},
|
|
579
|
+
this.#vocab
|
|
580
|
+
);
|
|
475
581
|
log.debug('Tokenizer', `Special tokens: BOS=${this.specialTokens.bos}, EOS=${this.specialTokens.eos}`);
|
|
476
|
-
this.#specialTokenIds =
|
|
582
|
+
this.#specialTokenIds = specialTokenIds;
|
|
583
|
+
this.#specialTokenPatterns = tokenPatterns;
|
|
477
584
|
const builtinSpecials = [
|
|
478
585
|
this.specialTokens.pad,
|
|
479
586
|
this.specialTokens.bos,
|
|
@@ -485,8 +592,13 @@ export class BundledTokenizer extends BaseTokenizer {
|
|
|
485
592
|
this.#specialTokenIds.add(id);
|
|
486
593
|
}
|
|
487
594
|
}
|
|
595
|
+
this.#specialTokenPatterns.sort((a, b) => b.content.length - a.content.length);
|
|
596
|
+
if (maxId >= 0) {
|
|
597
|
+
this.vocabSize = Math.max(this.vocabSize, maxId + 1);
|
|
598
|
+
}
|
|
488
599
|
|
|
489
600
|
const runtimeDefaults = getRuntimeConfig().inference.tokenizer;
|
|
601
|
+
const byteLevelPretokenizer = resolveByteLevelPretokenizerConfig(tokenizerJson.pre_tokenizer);
|
|
490
602
|
const configuredAddBosToken = this.addBosToken;
|
|
491
603
|
const configuredAddEosToken = this.addEosToken;
|
|
492
604
|
this.addBosToken =
|
|
@@ -505,9 +617,11 @@ export class BundledTokenizer extends BaseTokenizer {
|
|
|
505
617
|
if (this.addEosToken && this.specialTokens.eos == null) {
|
|
506
618
|
throw new Error('[Tokenizer] addEosToken is enabled but eos token is missing.');
|
|
507
619
|
}
|
|
620
|
+
this.#useByteLevelEncoding = byteLevelPretokenizer.useByteLevel;
|
|
508
621
|
// NOTE: Default to FALSE - first word shouldn't get space prefix
|
|
509
622
|
// Space prefixes are only for words that follow a space in original text
|
|
510
|
-
this.#addSpacePrefix = tokenizerJson.addSpacePrefix === true
|
|
623
|
+
this.#addSpacePrefix = tokenizerJson.addSpacePrefix === true
|
|
624
|
+
|| byteLevelPretokenizer.addPrefixSpace === true;
|
|
511
625
|
|
|
512
626
|
// Detect space prefix style based on vocab tokens
|
|
513
627
|
// GPT-style uses 'Ġ' (U+0120), SentencePiece uses '▁' (U+2581)
|
|
@@ -548,7 +662,8 @@ export class BundledTokenizer extends BaseTokenizer {
|
|
|
548
662
|
ids.push(this.specialTokens.bos);
|
|
549
663
|
}
|
|
550
664
|
|
|
551
|
-
// Split text around
|
|
665
|
+
// Split text around literal added tokens and special tokens, then tokenize
|
|
666
|
+
// the remaining plain-text segments normally.
|
|
552
667
|
const segments = this.#splitOnSpecialTokens(text);
|
|
553
668
|
for (const seg of segments) {
|
|
554
669
|
if (seg.isSpecial && seg.id !== undefined) {
|
|
@@ -690,11 +805,19 @@ export class BundledTokenizer extends BaseTokenizer {
|
|
|
690
805
|
if (text.length === 0) return [];
|
|
691
806
|
|
|
692
807
|
let normalized = text;
|
|
693
|
-
|
|
694
|
-
|
|
808
|
+
let prefixed;
|
|
809
|
+
if (this.#useByteLevelEncoding) {
|
|
810
|
+
if (this.#addSpacePrefix && !normalized.startsWith(' ')) {
|
|
811
|
+
normalized = ` ${normalized}`;
|
|
812
|
+
}
|
|
813
|
+
prefixed = this.#encodeByteLevelText(normalized);
|
|
814
|
+
} else {
|
|
815
|
+
if (this.#addSpacePrefix && !normalized.startsWith(' ')) {
|
|
816
|
+
normalized = ` ${normalized}`;
|
|
817
|
+
}
|
|
818
|
+
const sp = this.#spacePrefixChar;
|
|
819
|
+
prefixed = normalized.replace(/ /g, sp);
|
|
695
820
|
}
|
|
696
|
-
const sp = this.#spacePrefixChar;
|
|
697
|
-
const prefixed = normalized.replace(/ /g, sp);
|
|
698
821
|
|
|
699
822
|
if (this.#mergeRanks.size === 0) {
|
|
700
823
|
return this.#encodeBPEGreedy(prefixed);
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export interface InferenceExecutionRulesContractArtifact {
|
|
2
|
+
schemaVersion: 1;
|
|
3
|
+
source: 'doppler';
|
|
4
|
+
ok: boolean;
|
|
5
|
+
checks: Array<{ id: string; ok: boolean }>;
|
|
6
|
+
errors: string[];
|
|
7
|
+
stats: {
|
|
8
|
+
decodeRecorderRules: number;
|
|
9
|
+
batchDecodeRules: number;
|
|
10
|
+
decodeRecorderContexts: number;
|
|
11
|
+
batchDecodeContexts: number;
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export declare function buildInferenceExecutionRulesContractArtifact(
|
|
16
|
+
ruleGroup: Record<string, unknown> | null | undefined
|
|
17
|
+
): InferenceExecutionRulesContractArtifact;
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
import { selectByRules } from '../gpu/kernels/rule-matcher.js';
|
|
2
|
+
|
|
3
|
+
function isPlainObject(value) {
|
|
4
|
+
return value != null && typeof value === 'object' && !Array.isArray(value);
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
function matchesExactObject(actual, expected) {
|
|
8
|
+
if (!isPlainObject(actual) || !isPlainObject(expected)) {
|
|
9
|
+
return false;
|
|
10
|
+
}
|
|
11
|
+
const actualKeys = Object.keys(actual).sort();
|
|
12
|
+
const expectedKeys = Object.keys(expected).sort();
|
|
13
|
+
if (actualKeys.length !== expectedKeys.length) {
|
|
14
|
+
return false;
|
|
15
|
+
}
|
|
16
|
+
for (let i = 0; i < actualKeys.length; i += 1) {
|
|
17
|
+
if (actualKeys[i] !== expectedKeys[i]) {
|
|
18
|
+
return false;
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
for (const key of expectedKeys) {
|
|
22
|
+
const expectedValue = expected[key];
|
|
23
|
+
const actualValue = actual[key];
|
|
24
|
+
if (isPlainObject(expectedValue)) {
|
|
25
|
+
if (!matchesExactObject(actualValue, expectedValue)) {
|
|
26
|
+
return false;
|
|
27
|
+
}
|
|
28
|
+
continue;
|
|
29
|
+
}
|
|
30
|
+
if (Array.isArray(expectedValue)) {
|
|
31
|
+
if (!Array.isArray(actualValue) || actualValue.length !== expectedValue.length) {
|
|
32
|
+
return false;
|
|
33
|
+
}
|
|
34
|
+
for (let i = 0; i < expectedValue.length; i += 1) {
|
|
35
|
+
if (actualValue[i] !== expectedValue[i]) {
|
|
36
|
+
return false;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
if (actualValue !== expectedValue) {
|
|
42
|
+
return false;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
return true;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function decodeRecorderSemantic(context) {
|
|
49
|
+
return context.hasDevice === true
|
|
50
|
+
&& context.debug !== true
|
|
51
|
+
&& context.disableCommandBatching !== true
|
|
52
|
+
&& context.kvLayout !== 'bdpa_paged';
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function batchDecodeSemantic(context) {
|
|
56
|
+
return context.batchSize > 1
|
|
57
|
+
&& context.useGPU === true
|
|
58
|
+
&& context.gpuSamplingAvailable === true
|
|
59
|
+
&& context.disableMultiTokenDecode !== true
|
|
60
|
+
&& context.disableCommandBatching !== true
|
|
61
|
+
&& context.isBdpaPagedLayout !== true
|
|
62
|
+
&& context.finitenessFallbackWindowOpen !== true;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function enumerateDecodeRecorderContexts() {
|
|
66
|
+
const values = [true, false];
|
|
67
|
+
const kvLayouts = ['bdpa_paged', 'paged', null];
|
|
68
|
+
const contexts = [];
|
|
69
|
+
for (const hasDevice of values) {
|
|
70
|
+
for (const debug of values) {
|
|
71
|
+
for (const disableCommandBatching of values) {
|
|
72
|
+
for (const kvLayout of kvLayouts) {
|
|
73
|
+
contexts.push({
|
|
74
|
+
hasDevice,
|
|
75
|
+
debug,
|
|
76
|
+
disableCommandBatching,
|
|
77
|
+
kvLayout,
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
return contexts;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function enumerateBatchDecodeContexts() {
|
|
87
|
+
const values = [true, false];
|
|
88
|
+
const batchSizes = [1, 2];
|
|
89
|
+
const contexts = [];
|
|
90
|
+
for (const batchSize of batchSizes) {
|
|
91
|
+
for (const useGPU of values) {
|
|
92
|
+
for (const gpuSamplingAvailable of values) {
|
|
93
|
+
for (const disableMultiTokenDecode of values) {
|
|
94
|
+
for (const disableCommandBatching of values) {
|
|
95
|
+
for (const isBdpaPagedLayout of values) {
|
|
96
|
+
for (const finitenessFallbackWindowOpen of values) {
|
|
97
|
+
contexts.push({
|
|
98
|
+
batchSize,
|
|
99
|
+
useGPU,
|
|
100
|
+
gpuSamplingAvailable,
|
|
101
|
+
disableMultiTokenDecode,
|
|
102
|
+
disableCommandBatching,
|
|
103
|
+
isBdpaPagedLayout,
|
|
104
|
+
finitenessFallbackWindowOpen,
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
return contexts;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function checkRuleShape(rules, expectedFirstMatch, label) {
|
|
117
|
+
if (!Array.isArray(rules)) {
|
|
118
|
+
return {
|
|
119
|
+
ok: false,
|
|
120
|
+
errors: [`[ExecutionRulesContract] ${label} must be an array.`],
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
if (rules.length !== 2) {
|
|
124
|
+
return {
|
|
125
|
+
ok: false,
|
|
126
|
+
errors: [`[ExecutionRulesContract] ${label} must contain exactly 2 rules; got ${rules.length}.`],
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
const [firstRule, secondRule] = rules;
|
|
130
|
+
const errors = [];
|
|
131
|
+
if (!matchesExactObject(firstRule?.match, expectedFirstMatch) || firstRule?.value !== true) {
|
|
132
|
+
errors.push(`[ExecutionRulesContract] ${label} first rule drifted from the expected enabling predicate.`);
|
|
133
|
+
}
|
|
134
|
+
if (!matchesExactObject(secondRule?.match, {}) || secondRule?.value !== false) {
|
|
135
|
+
errors.push(`[ExecutionRulesContract] ${label} fallback rule must be { match: {}, value: false }.`);
|
|
136
|
+
}
|
|
137
|
+
return {
|
|
138
|
+
ok: errors.length === 0,
|
|
139
|
+
errors,
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
function checkRuleSemantics(rules, contexts, expectedValue, label) {
|
|
144
|
+
const errors = [];
|
|
145
|
+
for (const context of contexts) {
|
|
146
|
+
const actual = selectByRules(rules, context);
|
|
147
|
+
const expected = expectedValue(context);
|
|
148
|
+
if (actual !== expected) {
|
|
149
|
+
errors.push(
|
|
150
|
+
`[ExecutionRulesContract] ${label} mismatched context ${JSON.stringify(context)}: ` +
|
|
151
|
+
`expected ${JSON.stringify(expected)}, got ${JSON.stringify(actual)}.`
|
|
152
|
+
);
|
|
153
|
+
break;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
return {
|
|
157
|
+
ok: errors.length === 0,
|
|
158
|
+
errors,
|
|
159
|
+
sampledContexts: contexts.length,
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
export function buildInferenceExecutionRulesContractArtifact(ruleGroup) {
|
|
164
|
+
const errors = [];
|
|
165
|
+
const checks = [];
|
|
166
|
+
const decodeRules = ruleGroup?.decodeRecorderEnabled;
|
|
167
|
+
const batchRules = ruleGroup?.batchDecodeEnabled;
|
|
168
|
+
|
|
169
|
+
const decodeShape = checkRuleShape(
|
|
170
|
+
decodeRules,
|
|
171
|
+
{
|
|
172
|
+
hasDevice: true,
|
|
173
|
+
debug: false,
|
|
174
|
+
disableCommandBatching: false,
|
|
175
|
+
kvLayout: { neq: 'bdpa_paged' },
|
|
176
|
+
},
|
|
177
|
+
'decodeRecorderEnabled'
|
|
178
|
+
);
|
|
179
|
+
errors.push(...decodeShape.errors);
|
|
180
|
+
checks.push({
|
|
181
|
+
id: 'inference.execution.decodeRecorderEnabled.shape',
|
|
182
|
+
ok: decodeShape.ok,
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
const decodeSemantics = Array.isArray(decodeRules)
|
|
186
|
+
? checkRuleSemantics(
|
|
187
|
+
decodeRules,
|
|
188
|
+
enumerateDecodeRecorderContexts(),
|
|
189
|
+
decodeRecorderSemantic,
|
|
190
|
+
'decodeRecorderEnabled'
|
|
191
|
+
)
|
|
192
|
+
: { ok: false, errors: ['[ExecutionRulesContract] decodeRecorderEnabled is unavailable for semantic check.'], sampledContexts: 0 };
|
|
193
|
+
errors.push(...decodeSemantics.errors);
|
|
194
|
+
checks.push({
|
|
195
|
+
id: 'inference.execution.decodeRecorderEnabled.semantics',
|
|
196
|
+
ok: decodeSemantics.ok,
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
const batchShape = checkRuleShape(
|
|
200
|
+
batchRules,
|
|
201
|
+
{
|
|
202
|
+
batchSize: { gt: 1 },
|
|
203
|
+
useGPU: true,
|
|
204
|
+
gpuSamplingAvailable: true,
|
|
205
|
+
disableMultiTokenDecode: false,
|
|
206
|
+
disableCommandBatching: false,
|
|
207
|
+
isBdpaPagedLayout: false,
|
|
208
|
+
finitenessFallbackWindowOpen: false,
|
|
209
|
+
},
|
|
210
|
+
'batchDecodeEnabled'
|
|
211
|
+
);
|
|
212
|
+
errors.push(...batchShape.errors);
|
|
213
|
+
checks.push({
|
|
214
|
+
id: 'inference.execution.batchDecodeEnabled.shape',
|
|
215
|
+
ok: batchShape.ok,
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
const batchSemantics = Array.isArray(batchRules)
|
|
219
|
+
? checkRuleSemantics(
|
|
220
|
+
batchRules,
|
|
221
|
+
enumerateBatchDecodeContexts(),
|
|
222
|
+
batchDecodeSemantic,
|
|
223
|
+
'batchDecodeEnabled'
|
|
224
|
+
)
|
|
225
|
+
: { ok: false, errors: ['[ExecutionRulesContract] batchDecodeEnabled is unavailable for semantic check.'], sampledContexts: 0 };
|
|
226
|
+
errors.push(...batchSemantics.errors);
|
|
227
|
+
checks.push({
|
|
228
|
+
id: 'inference.execution.batchDecodeEnabled.semantics',
|
|
229
|
+
ok: batchSemantics.ok,
|
|
230
|
+
});
|
|
231
|
+
|
|
232
|
+
return {
|
|
233
|
+
schemaVersion: 1,
|
|
234
|
+
source: 'doppler',
|
|
235
|
+
ok: errors.length === 0,
|
|
236
|
+
checks,
|
|
237
|
+
errors,
|
|
238
|
+
stats: {
|
|
239
|
+
decodeRecorderRules: Array.isArray(decodeRules) ? decodeRules.length : 0,
|
|
240
|
+
batchDecodeRules: Array.isArray(batchRules) ? batchRules.length : 0,
|
|
241
|
+
decodeRecorderContexts: decodeSemantics.sampledContexts,
|
|
242
|
+
batchDecodeContexts: batchSemantics.sampledContexts,
|
|
243
|
+
},
|
|
244
|
+
};
|
|
245
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export interface LayerPatternContractArtifact {
|
|
2
|
+
schemaVersion: 1;
|
|
3
|
+
source: 'doppler';
|
|
4
|
+
ok: boolean;
|
|
5
|
+
checks: Array<{ id: string; ok: boolean }>;
|
|
6
|
+
errors: string[];
|
|
7
|
+
stats: {
|
|
8
|
+
patternKindRules: number;
|
|
9
|
+
layerTypeRules: number;
|
|
10
|
+
patternKindContexts: number;
|
|
11
|
+
layerTypeContexts: number;
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export declare function buildLayerPatternContractArtifact(
|
|
16
|
+
ruleGroup: Record<string, unknown> | null | undefined
|
|
17
|
+
): LayerPatternContractArtifact;
|