@simulatte/doppler 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. package/README.md +26 -10
  2. package/package.json +30 -6
  3. package/src/client/doppler-api.browser.d.ts +1 -0
  4. package/src/client/doppler-api.browser.js +288 -0
  5. package/src/client/doppler-api.js +1 -1
  6. package/src/client/doppler-provider/types.js +1 -1
  7. package/src/config/execution-contract-check.d.ts +33 -0
  8. package/src/config/execution-contract-check.js +72 -0
  9. package/src/config/execution-v0-contract-check.d.ts +94 -0
  10. package/src/config/execution-v0-contract-check.js +251 -0
  11. package/src/config/execution-v0-graph-contract-check.d.ts +20 -0
  12. package/src/config/execution-v0-graph-contract-check.js +64 -0
  13. package/src/config/kernel-path-contract-check.d.ts +76 -0
  14. package/src/config/kernel-path-contract-check.js +479 -0
  15. package/src/config/kernel-path-loader.d.ts +16 -0
  16. package/src/config/kernel-path-loader.js +54 -0
  17. package/src/config/kernels/kernel-ref-digests.js +39 -27
  18. package/src/config/kernels/registry.json +598 -2
  19. package/src/config/loader.js +81 -48
  20. package/src/config/merge-contract-check.d.ts +16 -0
  21. package/src/config/merge-contract-check.js +321 -0
  22. package/src/config/merge-helpers.d.ts +58 -0
  23. package/src/config/merge-helpers.js +54 -0
  24. package/src/config/merge.js +21 -6
  25. package/src/config/presets/models/janus-text.json +2 -0
  26. package/src/config/presets/models/qwen3.json +9 -2
  27. package/src/config/presets/models/transformer.json +5 -0
  28. package/src/config/quantization-contract-check.d.ts +12 -0
  29. package/src/config/quantization-contract-check.js +91 -0
  30. package/src/config/required-inference-fields-contract-check.d.ts +24 -0
  31. package/src/config/required-inference-fields-contract-check.js +237 -0
  32. package/src/config/schema/browser-suite-metrics.schema.d.ts +17 -0
  33. package/src/config/schema/browser-suite-metrics.schema.js +46 -0
  34. package/src/config/schema/conversion-report.schema.d.ts +40 -0
  35. package/src/config/schema/conversion-report.schema.js +108 -0
  36. package/src/config/schema/doppler.schema.js +12 -18
  37. package/src/config/schema/index.d.ts +22 -0
  38. package/src/config/schema/index.js +18 -0
  39. package/src/config/schema/inference-defaults.schema.js +3 -0
  40. package/src/config/schema/inference.schema.d.ts +9 -0
  41. package/src/config/schema/kernel-path.schema.d.ts +6 -0
  42. package/src/config/schema/manifest.schema.d.ts +6 -0
  43. package/src/config/schema/manifest.schema.js +3 -0
  44. package/src/converter/core.d.ts +10 -0
  45. package/src/converter/core.js +27 -2
  46. package/src/converter/parsers/diffusion.js +63 -3
  47. package/src/converter/rope-config.js +42 -0
  48. package/src/gpu/device.js +58 -0
  49. package/src/gpu/kernels/attention.js +98 -0
  50. package/src/gpu/kernels/bias_add.wgsl +8 -6
  51. package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
  52. package/src/gpu/kernels/conv2d.js +1 -1
  53. package/src/gpu/kernels/conv2d.wgsl +7 -8
  54. package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
  55. package/src/gpu/kernels/depthwise_conv2d.d.ts +29 -0
  56. package/src/gpu/kernels/depthwise_conv2d.js +99 -0
  57. package/src/gpu/kernels/depthwise_conv2d.wgsl +55 -0
  58. package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +59 -0
  59. package/src/gpu/kernels/grouped_pointwise_conv2d.d.ts +27 -0
  60. package/src/gpu/kernels/grouped_pointwise_conv2d.js +93 -0
  61. package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +44 -0
  62. package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +48 -0
  63. package/src/gpu/kernels/index.d.ts +30 -0
  64. package/src/gpu/kernels/index.js +25 -0
  65. package/src/gpu/kernels/matmul.js +25 -0
  66. package/src/gpu/kernels/pixel_shuffle.js +1 -1
  67. package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
  68. package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
  69. package/src/gpu/kernels/relu.d.ts +18 -0
  70. package/src/gpu/kernels/relu.js +58 -0
  71. package/src/gpu/kernels/relu.wgsl +22 -0
  72. package/src/gpu/kernels/relu_f16.wgsl +24 -0
  73. package/src/gpu/kernels/repeat_channels.d.ts +21 -0
  74. package/src/gpu/kernels/repeat_channels.js +60 -0
  75. package/src/gpu/kernels/repeat_channels.wgsl +28 -0
  76. package/src/gpu/kernels/repeat_channels_f16.wgsl +30 -0
  77. package/src/gpu/kernels/residual.js +44 -8
  78. package/src/gpu/kernels/residual.wgsl +6 -3
  79. package/src/gpu/kernels/residual_f16.wgsl +2 -1
  80. package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
  81. package/src/gpu/kernels/residual_vec4.wgsl +2 -1
  82. package/src/gpu/kernels/rmsnorm.js +58 -6
  83. package/src/gpu/kernels/rmsnorm.wgsl +14 -6
  84. package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
  85. package/src/gpu/kernels/rope.d.ts +2 -0
  86. package/src/gpu/kernels/rope.js +11 -1
  87. package/src/gpu/kernels/rope.wgsl +56 -40
  88. package/src/gpu/kernels/sana_linear_attention.d.ts +27 -0
  89. package/src/gpu/kernels/sana_linear_attention.js +121 -0
  90. package/src/gpu/kernels/sana_linear_attention_apply.wgsl +43 -0
  91. package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +46 -0
  92. package/src/gpu/kernels/sana_linear_attention_summary.wgsl +51 -0
  93. package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +53 -0
  94. package/src/gpu/kernels/silu.d.ts +1 -0
  95. package/src/gpu/kernels/silu.js +32 -14
  96. package/src/gpu/kernels/silu.wgsl +19 -9
  97. package/src/gpu/kernels/silu_f16.wgsl +19 -9
  98. package/src/gpu/kernels/transpose.js +15 -2
  99. package/src/gpu/kernels/transpose.wgsl +5 -6
  100. package/src/gpu/kernels/upsample2d.js +2 -1
  101. package/src/gpu/kernels/upsample2d.wgsl +6 -9
  102. package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
  103. package/src/gpu/kernels/utils.js +16 -1
  104. package/src/index-browser.d.ts +1 -1
  105. package/src/index-browser.js +2 -2
  106. package/src/index.js +1 -1
  107. package/src/inference/browser-harness.js +109 -23
  108. package/src/inference/pipelines/diffusion/init.js +14 -0
  109. package/src/inference/pipelines/diffusion/pipeline.js +215 -77
  110. package/src/inference/pipelines/diffusion/sana-transformer.d.ts +53 -0
  111. package/src/inference/pipelines/diffusion/sana-transformer.js +738 -0
  112. package/src/inference/pipelines/diffusion/scheduler.d.ts +17 -1
  113. package/src/inference/pipelines/diffusion/scheduler.js +91 -3
  114. package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +11 -4
  115. package/src/inference/pipelines/diffusion/text-encoder-gpu.js +282 -0
  116. package/src/inference/pipelines/diffusion/text-encoder.js +18 -1
  117. package/src/inference/pipelines/diffusion/types.d.ts +4 -0
  118. package/src/inference/pipelines/diffusion/vae.js +782 -78
  119. package/src/inference/pipelines/text/attention/record.js +11 -2
  120. package/src/inference/pipelines/text/attention/run.js +11 -2
  121. package/src/inference/pipelines/text/chat-format.js +25 -1
  122. package/src/inference/pipelines/text/config.d.ts +9 -0
  123. package/src/inference/pipelines/text/config.js +69 -2
  124. package/src/inference/pipelines/text/execution-plan.js +23 -31
  125. package/src/inference/pipelines/text/execution-v0.js +43 -95
  126. package/src/inference/pipelines/text/ffn/standard.js +3 -0
  127. package/src/inference/pipelines/text/init.d.ts +4 -0
  128. package/src/inference/pipelines/text/init.js +56 -9
  129. package/src/inference/pipelines/text/layer.js +11 -0
  130. package/src/inference/pipelines/text.js +4 -0
  131. package/src/inference/tokenizers/bundled.js +156 -33
  132. package/src/rules/execution-rules-contract-check.d.ts +17 -0
  133. package/src/rules/execution-rules-contract-check.js +245 -0
  134. package/src/rules/kernels/depthwise-conv2d.rules.json +6 -0
  135. package/src/rules/kernels/grouped-pointwise-conv2d.rules.json +6 -0
  136. package/src/rules/kernels/relu.rules.json +6 -0
  137. package/src/rules/kernels/repeat-channels.rules.json +6 -0
  138. package/src/rules/kernels/sana-linear-attention.rules.json +6 -0
  139. package/src/rules/layer-pattern-contract-check.d.ts +17 -0
  140. package/src/rules/layer-pattern-contract-check.js +231 -0
  141. package/src/rules/rule-registry.d.ts +28 -0
  142. package/src/rules/rule-registry.js +38 -0
  143. package/src/rules/tooling/command-runtime.rules.json +18 -0
  144. package/src/tooling/command-api.d.ts +27 -1
  145. package/src/tooling/command-api.js +142 -3
  146. package/src/tooling/conversion-config-materializer.d.ts +24 -0
  147. package/src/tooling/conversion-config-materializer.js +99 -0
  148. package/src/tooling/lean-execution-contract-runner.d.ts +43 -0
  149. package/src/tooling/lean-execution-contract-runner.js +158 -0
  150. package/src/tooling/node-browser-command-runner.d.ts +4 -0
  151. package/src/tooling/node-browser-command-runner.js +58 -3
  152. package/src/tooling/node-command-runner.js +15 -0
  153. package/src/tooling/node-convert.d.ts +10 -0
  154. package/src/tooling/node-converter.js +59 -0
  155. package/src/tooling/node-webgpu.js +11 -89
  156. package/src/training/checkpoint-watch.d.ts +7 -0
  157. package/src/training/checkpoint-watch.js +106 -0
  158. package/src/training/checkpoint.d.ts +6 -1
  159. package/src/training/checkpoint.js +12 -2
  160. package/src/training/distillation/artifacts.d.ts +71 -0
  161. package/src/training/distillation/artifacts.js +132 -0
  162. package/src/training/distillation/checkpoint-watch.d.ts +10 -0
  163. package/src/training/distillation/checkpoint-watch.js +57 -0
  164. package/src/training/distillation/dataset.d.ts +59 -0
  165. package/src/training/distillation/dataset.js +337 -0
  166. package/src/training/distillation/eval.d.ts +34 -0
  167. package/src/training/distillation/eval.js +310 -0
  168. package/src/training/distillation/index.d.ts +29 -0
  169. package/src/training/distillation/index.js +29 -0
  170. package/src/training/distillation/runtime.d.ts +20 -0
  171. package/src/training/distillation/runtime.js +121 -0
  172. package/src/training/distillation/scoreboard.d.ts +6 -0
  173. package/src/training/distillation/scoreboard.js +8 -0
  174. package/src/training/distillation/stage-a.d.ts +45 -0
  175. package/src/training/distillation/stage-a.js +338 -0
  176. package/src/training/distillation/stage-b.d.ts +24 -0
  177. package/src/training/distillation/stage-b.js +20 -0
  178. package/src/training/index.d.ts +10 -0
  179. package/src/training/index.js +10 -0
  180. package/src/training/lora-pipeline.d.ts +40 -0
  181. package/src/training/lora-pipeline.js +796 -0
  182. package/src/training/operator-artifacts.d.ts +62 -0
  183. package/src/training/operator-artifacts.js +140 -0
  184. package/src/training/operator-command.d.ts +5 -0
  185. package/src/training/operator-command.js +453 -0
  186. package/src/training/operator-eval.d.ts +48 -0
  187. package/src/training/operator-eval.js +230 -0
  188. package/src/training/operator-scoreboard.d.ts +5 -0
  189. package/src/training/operator-scoreboard.js +44 -0
  190. package/src/training/runner.d.ts +52 -0
  191. package/src/training/runner.js +29 -4
  192. package/src/training/suite.d.ts +112 -0
  193. package/src/training/suite.js +9 -9
  194. package/src/training/workloads.d.ts +164 -0
  195. package/src/training/workloads.js +539 -0
  196. package/src/version.d.ts +2 -0
  197. package/src/version.js +2 -0
  198. package/tools/convert-safetensors-node.js +47 -0
  199. package/tools/doppler-cli.js +252 -41
@@ -259,6 +259,8 @@ export async function processLayerGPU(layerIdx, inputBuffer, numTokens, isPrefil
259
259
  attentionOutputGate: config.attentionOutputGate,
260
260
  causalAttention: config.causalAttention,
261
261
  rmsNormWeightOffset: config.rmsNormWeightOffset,
262
+ ropeRotaryDim: config.ropeRotaryDim,
263
+ ropeInterleaved: config.ropeInterleaved,
262
264
  tokenIds: context.currentTokenIds ?? null,
263
265
  kernelPath: context.kernelPath ?? null,
264
266
  disableRoPE,
@@ -661,6 +663,8 @@ async function processLayerPlanGPU(layerIdx, inputBuffer, numTokens, isPrefill,
661
663
  attentionOutputGate: config.attentionOutputGate,
662
664
  causalAttention: config.causalAttention,
663
665
  rmsNormWeightOffset: config.rmsNormWeightOffset,
666
+ ropeRotaryDim: config.ropeRotaryDim,
667
+ ropeInterleaved: config.ropeInterleaved,
664
668
  tokenIds: context.currentTokenIds ?? null,
665
669
  skipInputNorm: step.skipInputNorm === true,
666
670
  activationDtype,
@@ -690,6 +694,7 @@ async function processLayerPlanGPU(layerIdx, inputBuffer, numTokens, isPrefill,
690
694
  hiddenSize,
691
695
  probes: context.debugProbes,
692
696
  recorder,
697
+ dtype: outputDtype,
693
698
  });
694
699
  }
695
700
  break;
@@ -733,6 +738,7 @@ async function processLayerPlanGPU(layerIdx, inputBuffer, numTokens, isPrefill,
733
738
  hiddenSize,
734
739
  probes: context.debugProbes,
735
740
  recorder,
741
+ dtype: outputDtype,
736
742
  });
737
743
  }
738
744
  break;
@@ -767,6 +773,7 @@ async function processLayerPlanGPU(layerIdx, inputBuffer, numTokens, isPrefill,
767
773
  hiddenSize,
768
774
  probes: context.debugProbes,
769
775
  recorder,
776
+ dtype: outputDtype,
770
777
  });
771
778
  }
772
779
  break;
@@ -801,6 +808,7 @@ async function processLayerPlanGPU(layerIdx, inputBuffer, numTokens, isPrefill,
801
808
  hiddenSize,
802
809
  probes: context.debugProbes,
803
810
  recorder,
811
+ dtype: outputDtype,
804
812
  });
805
813
  }
806
814
  break;
@@ -825,6 +833,7 @@ async function processLayerPlanGPU(layerIdx, inputBuffer, numTokens, isPrefill,
825
833
  hiddenSize,
826
834
  probes: context.debugProbes,
827
835
  recorder,
836
+ dtype: outputDtype,
828
837
  });
829
838
  }
830
839
  break;
@@ -851,6 +860,7 @@ async function processLayerPlanGPU(layerIdx, inputBuffer, numTokens, isPrefill,
851
860
  hiddenSize,
852
861
  probes: context.debugProbes,
853
862
  recorder,
863
+ dtype: toDtype,
854
864
  });
855
865
  }
856
866
  break;
@@ -880,6 +890,7 @@ async function processLayerPlanGPU(layerIdx, inputBuffer, numTokens, isPrefill,
880
890
  hiddenSize,
881
891
  probes: context.debugProbes,
882
892
  recorder,
893
+ dtype: getSlotDtype('state') ?? activationDtype,
883
894
  });
884
895
 
885
896
  const computeConfig = context.runtimeComputeConfig ?? null;
@@ -299,9 +299,13 @@ export class InferencePipeline extends PipelineState {
299
299
  const maxSeqLen = config.maxSeqLen;
300
300
  const ropeBuffers = await initRoPEFrequencies({
301
301
  headDim: config.headDim,
302
+ rotaryDim: config.ropeRotaryDim,
302
303
  maxSeqLen,
303
304
  ropeTheta: config.ropeTheta,
304
305
  ropeLocalTheta: config.ropeLocalTheta,
306
+ mropeInterleaved: config.ropeInterleaved,
307
+ mropeSection: config.mropeSection,
308
+ partialRotaryFactor: config.partialRotaryFactor,
305
309
  ropeScale: config.ropeScale,
306
310
  ropeLocalScale: config.ropeLocalScale,
307
311
  ropeScalingType: config.ropeScalingType,
@@ -64,6 +64,68 @@ function resolveSpecialTokens(specialTokensRaw, fallbackTokens, vocab) {
64
64
  return resolved;
65
65
  }
66
66
 
67
+ function resolveByteLevelPretokenizerConfig(preTokenizer) {
68
+ if (!preTokenizer || typeof preTokenizer !== 'object') {
69
+ return {
70
+ useByteLevel: false,
71
+ addPrefixSpace: null,
72
+ };
73
+ }
74
+
75
+ if (preTokenizer.type === 'ByteLevel') {
76
+ return {
77
+ useByteLevel: true,
78
+ addPrefixSpace: preTokenizer.add_prefix_space === true,
79
+ };
80
+ }
81
+
82
+ if (preTokenizer.type === 'Sequence' && Array.isArray(preTokenizer.pretokenizers)) {
83
+ for (const entry of preTokenizer.pretokenizers) {
84
+ const resolved = resolveByteLevelPretokenizerConfig(entry);
85
+ if (resolved.useByteLevel) {
86
+ return resolved;
87
+ }
88
+ }
89
+ }
90
+
91
+ return {
92
+ useByteLevel: false,
93
+ addPrefixSpace: null,
94
+ };
95
+ }
96
+
97
+ function registerAddedTokens(addedTokens, vocab, reverseVocab, patterns, specialTokenIds, derivedSpecialTokens = null) {
98
+ let maxId = -1;
99
+ for (const token of addedTokens) {
100
+ const content = token?.content;
101
+ const id = typeof token?.id === 'number' ? token.id : parseInt(token?.id, 10);
102
+ if (!Number.isFinite(id) || !content) continue;
103
+ if (!vocab.has(content)) {
104
+ vocab.set(content, id);
105
+ reverseVocab.set(id, content);
106
+ }
107
+ if (id > maxId) maxId = id;
108
+ if (content.length > 1) {
109
+ patterns.push({ content, id });
110
+ }
111
+ if (token.special) {
112
+ specialTokenIds.add(id);
113
+ if (derivedSpecialTokens) {
114
+ if (derivedSpecialTokens.bos == null && (content === '<bos>' || content === '<s>' || content.includes('bos'))) {
115
+ derivedSpecialTokens.bos = id;
116
+ } else if (derivedSpecialTokens.eos == null && (content === '<eos>' || content === '</s>' || content.includes('eos'))) {
117
+ derivedSpecialTokens.eos = id;
118
+ } else if (derivedSpecialTokens.pad == null && (content === '<pad>' || content.includes('pad'))) {
119
+ derivedSpecialTokens.pad = id;
120
+ } else if (derivedSpecialTokens.unk == null && (content === '<unk>' || content.includes('unk'))) {
121
+ derivedSpecialTokens.unk = id;
122
+ }
123
+ }
124
+ }
125
+ }
126
+ return maxId;
127
+ }
128
+
67
129
 
68
130
  export class TransformersTokenizer extends BaseTokenizer {
69
131
 
@@ -156,6 +218,10 @@ export class BundledTokenizer extends BaseTokenizer {
156
218
 
157
219
  #byteDecoder = null;
158
220
 
221
+ #byteEncoder = null;
222
+
223
+ #useByteLevelEncoding = false;
224
+
159
225
 
160
226
  constructor(config = {}) {
161
227
  // BundledTokenizer gets vocabSize from load(), so defer validation
@@ -199,9 +265,20 @@ export class BundledTokenizer extends BaseTokenizer {
199
265
  }
200
266
 
201
267
  this.#byteDecoder = new Map();
268
+ this.#byteEncoder = new Map();
202
269
  for (let i = 0; i < base.length; i++) {
203
270
  this.#byteDecoder.set(String.fromCodePoint(chars[i]), base[i]);
271
+ this.#byteEncoder.set(base[i], String.fromCodePoint(chars[i]));
272
+ }
273
+ }
274
+
275
+ #encodeByteLevelText(text) {
276
+ const bytes = new TextEncoder().encode(text);
277
+ let out = '';
278
+ for (const byte of bytes) {
279
+ out += this.#byteEncoder?.get(byte) ?? String.fromCharCode(byte);
204
280
  }
281
+ return out;
205
282
  }
206
283
 
207
284
 
@@ -290,30 +367,16 @@ export class BundledTokenizer extends BaseTokenizer {
290
367
  eos: null,
291
368
  unk: null,
292
369
  };
293
- for (const token of addedTokens) {
294
- const content = token.content;
295
- const id = typeof token.id === 'number' ? token.id : parseInt( (token.id), 10);
296
- if (!Number.isFinite(id) || !content) continue;
297
- if (!this.#vocab.has(content)) {
298
- this.#vocab.set(content, id);
299
- this.#reverseVocab.set(id, content);
300
- }
301
- if (id > maxId) maxId = id;
302
- if (token.special) {
303
- specialTokenIds.add(id);
304
- if (content.length > 1) {
305
- specialTokenPatterns.push({ content, id });
306
- }
307
- if (derivedSpecialTokens.bos == null && (content === '<bos>' || content === '<s>' || content.includes('bos'))) {
308
- derivedSpecialTokens.bos = id;
309
- } else if (derivedSpecialTokens.eos == null && (content === '<eos>' || content === '</s>' || content.includes('eos'))) {
310
- derivedSpecialTokens.eos = id;
311
- } else if (derivedSpecialTokens.pad == null && (content === '<pad>' || content.includes('pad'))) {
312
- derivedSpecialTokens.pad = id;
313
- } else if (derivedSpecialTokens.unk == null && (content === '<unk>' || content.includes('unk'))) {
314
- derivedSpecialTokens.unk = id;
315
- }
316
- }
370
+ const addedMaxId = registerAddedTokens(
371
+ addedTokens,
372
+ this.#vocab,
373
+ this.#reverseVocab,
374
+ specialTokenPatterns,
375
+ specialTokenIds,
376
+ derivedSpecialTokens
377
+ );
378
+ if (addedMaxId > maxId) {
379
+ maxId = addedMaxId;
317
380
  }
318
381
 
319
382
  const specialTokensRaw = hf.special_tokens_map || hf.specialTokens || hf.special_tokens || null;
@@ -351,6 +414,7 @@ export class BundledTokenizer extends BaseTokenizer {
351
414
 
352
415
  // Handle behavior flags (use HF config if present, else runtime defaults)
353
416
  const runtimeDefaults = getRuntimeConfig().inference.tokenizer;
417
+ const byteLevelPretokenizer = resolveByteLevelPretokenizerConfig(hf.pre_tokenizer);
354
418
  const configuredAddBosToken = this.addBosToken;
355
419
  const configuredAddEosToken = this.addEosToken;
356
420
  this.addBosToken =
@@ -378,9 +442,16 @@ export class BundledTokenizer extends BaseTokenizer {
378
442
  // - runtime config addSpacePrefix (user override or null for auto-detect)
379
443
  const decoderPrepend = hf.decoder?.prepend_scheme === 'always' || hf.decoder?.add_prefix_space === true;
380
444
  const normalizerPrepend = hf.normalizer?.prepend_scheme === 'always' || hf.normalizer?.add_prefix_space === true;
445
+ this.#useByteLevelEncoding = byteLevelPretokenizer.useByteLevel;
381
446
  const runtimeSpacePrefix = runtimeDefaults.addSpacePrefix;
382
447
  // Use explicit runtime config if set (non-null), otherwise auto-detect from tokenizer.json
383
- this.#addSpacePrefix = runtimeSpacePrefix ?? model.add_prefix_space ?? model.add_dummy_prefix ?? decoderPrepend ?? normalizerPrepend ?? false;
448
+ this.#addSpacePrefix = runtimeSpacePrefix
449
+ ?? byteLevelPretokenizer.addPrefixSpace
450
+ ?? model.add_prefix_space
451
+ ?? model.add_dummy_prefix
452
+ ?? decoderPrepend
453
+ ?? normalizerPrepend
454
+ ?? false;
384
455
  log.debug('Tokenizer', `addSpacePrefix=${this.#addSpacePrefix} (runtime=${runtimeSpacePrefix}, model=${model.add_prefix_space ?? model.add_dummy_prefix}, decoder=${decoderPrepend}, normalizer=${normalizerPrepend})`);
385
456
 
386
457
  // Detect space prefix style by checking which WORD tokens exist in vocab
@@ -469,11 +540,47 @@ export class BundledTokenizer extends BaseTokenizer {
469
540
  this.#tokenTypes = tokenizerJson.tokenTypes;
470
541
  }
471
542
 
543
+ let maxId = -1;
544
+ for (const id of this.#vocab.values()) {
545
+ if (Number.isFinite(id) && id > maxId) {
546
+ maxId = id;
547
+ }
548
+ }
549
+
550
+ const addedTokens = Array.isArray(tokenizerJson.added_tokens) ? tokenizerJson.added_tokens : [];
551
+ const tokenPatterns = [];
552
+ const specialTokenIds = new Set();
553
+ const derivedSpecialTokens = {
554
+ pad: null,
555
+ bos: null,
556
+ eos: null,
557
+ unk: null,
558
+ };
559
+ const addedMaxId = registerAddedTokens(
560
+ addedTokens,
561
+ this.#vocab,
562
+ this.#reverseVocab,
563
+ tokenPatterns,
564
+ specialTokenIds,
565
+ derivedSpecialTokens
566
+ );
567
+ if (addedMaxId > maxId) {
568
+ maxId = addedMaxId;
569
+ }
570
+
472
571
  // Set special tokens - support both camelCase and snake_case formats
473
572
  const specialTokensRaw = (tokenizerJson.specialTokens || (tokenizerJson).special_tokens);
474
- this.specialTokens = resolveSpecialTokens(specialTokensRaw, this.specialTokens, this.#vocab);
573
+ this.specialTokens = resolveSpecialTokens(
574
+ specialTokensRaw,
575
+ {
576
+ ...derivedSpecialTokens,
577
+ ...this.specialTokens,
578
+ },
579
+ this.#vocab
580
+ );
475
581
  log.debug('Tokenizer', `Special tokens: BOS=${this.specialTokens.bos}, EOS=${this.specialTokens.eos}`);
476
- this.#specialTokenIds = new Set();
582
+ this.#specialTokenIds = specialTokenIds;
583
+ this.#specialTokenPatterns = tokenPatterns;
477
584
  const builtinSpecials = [
478
585
  this.specialTokens.pad,
479
586
  this.specialTokens.bos,
@@ -485,8 +592,13 @@ export class BundledTokenizer extends BaseTokenizer {
485
592
  this.#specialTokenIds.add(id);
486
593
  }
487
594
  }
595
+ this.#specialTokenPatterns.sort((a, b) => b.content.length - a.content.length);
596
+ if (maxId >= 0) {
597
+ this.vocabSize = Math.max(this.vocabSize, maxId + 1);
598
+ }
488
599
 
489
600
  const runtimeDefaults = getRuntimeConfig().inference.tokenizer;
601
+ const byteLevelPretokenizer = resolveByteLevelPretokenizerConfig(tokenizerJson.pre_tokenizer);
490
602
  const configuredAddBosToken = this.addBosToken;
491
603
  const configuredAddEosToken = this.addEosToken;
492
604
  this.addBosToken =
@@ -505,9 +617,11 @@ export class BundledTokenizer extends BaseTokenizer {
505
617
  if (this.addEosToken && this.specialTokens.eos == null) {
506
618
  throw new Error('[Tokenizer] addEosToken is enabled but eos token is missing.');
507
619
  }
620
+ this.#useByteLevelEncoding = byteLevelPretokenizer.useByteLevel;
508
621
  // NOTE: Default to FALSE - first word shouldn't get space prefix
509
622
  // Space prefixes are only for words that follow a space in original text
510
- this.#addSpacePrefix = tokenizerJson.addSpacePrefix === true;
623
+ this.#addSpacePrefix = tokenizerJson.addSpacePrefix === true
624
+ || byteLevelPretokenizer.addPrefixSpace === true;
511
625
 
512
626
  // Detect space prefix style based on vocab tokens
513
627
  // GPT-style uses 'Ġ' (U+0120), SentencePiece uses '▁' (U+2581)
@@ -548,7 +662,8 @@ export class BundledTokenizer extends BaseTokenizer {
548
662
  ids.push(this.specialTokens.bos);
549
663
  }
550
664
 
551
- // Split text around special tokens and tokenize each segment
665
+ // Split text around literal added tokens and special tokens, then tokenize
666
+ // the remaining plain-text segments normally.
552
667
  const segments = this.#splitOnSpecialTokens(text);
553
668
  for (const seg of segments) {
554
669
  if (seg.isSpecial && seg.id !== undefined) {
@@ -690,11 +805,19 @@ export class BundledTokenizer extends BaseTokenizer {
690
805
  if (text.length === 0) return [];
691
806
 
692
807
  let normalized = text;
693
- if (this.#addSpacePrefix && !normalized.startsWith(' ')) {
694
- normalized = ` ${normalized}`;
808
+ let prefixed;
809
+ if (this.#useByteLevelEncoding) {
810
+ if (this.#addSpacePrefix && !normalized.startsWith(' ')) {
811
+ normalized = ` ${normalized}`;
812
+ }
813
+ prefixed = this.#encodeByteLevelText(normalized);
814
+ } else {
815
+ if (this.#addSpacePrefix && !normalized.startsWith(' ')) {
816
+ normalized = ` ${normalized}`;
817
+ }
818
+ const sp = this.#spacePrefixChar;
819
+ prefixed = normalized.replace(/ /g, sp);
695
820
  }
696
- const sp = this.#spacePrefixChar;
697
- const prefixed = normalized.replace(/ /g, sp);
698
821
 
699
822
  if (this.#mergeRanks.size === 0) {
700
823
  return this.#encodeBPEGreedy(prefixed);
@@ -0,0 +1,17 @@
1
+ export interface InferenceExecutionRulesContractArtifact {
2
+ schemaVersion: 1;
3
+ source: 'doppler';
4
+ ok: boolean;
5
+ checks: Array<{ id: string; ok: boolean }>;
6
+ errors: string[];
7
+ stats: {
8
+ decodeRecorderRules: number;
9
+ batchDecodeRules: number;
10
+ decodeRecorderContexts: number;
11
+ batchDecodeContexts: number;
12
+ };
13
+ }
14
+
15
+ export declare function buildInferenceExecutionRulesContractArtifact(
16
+ ruleGroup: Record<string, unknown> | null | undefined
17
+ ): InferenceExecutionRulesContractArtifact;
@@ -0,0 +1,245 @@
1
+ import { selectByRules } from '../gpu/kernels/rule-matcher.js';
2
+
3
+ function isPlainObject(value) {
4
+ return value != null && typeof value === 'object' && !Array.isArray(value);
5
+ }
6
+
7
+ function matchesExactObject(actual, expected) {
8
+ if (!isPlainObject(actual) || !isPlainObject(expected)) {
9
+ return false;
10
+ }
11
+ const actualKeys = Object.keys(actual).sort();
12
+ const expectedKeys = Object.keys(expected).sort();
13
+ if (actualKeys.length !== expectedKeys.length) {
14
+ return false;
15
+ }
16
+ for (let i = 0; i < actualKeys.length; i += 1) {
17
+ if (actualKeys[i] !== expectedKeys[i]) {
18
+ return false;
19
+ }
20
+ }
21
+ for (const key of expectedKeys) {
22
+ const expectedValue = expected[key];
23
+ const actualValue = actual[key];
24
+ if (isPlainObject(expectedValue)) {
25
+ if (!matchesExactObject(actualValue, expectedValue)) {
26
+ return false;
27
+ }
28
+ continue;
29
+ }
30
+ if (Array.isArray(expectedValue)) {
31
+ if (!Array.isArray(actualValue) || actualValue.length !== expectedValue.length) {
32
+ return false;
33
+ }
34
+ for (let i = 0; i < expectedValue.length; i += 1) {
35
+ if (actualValue[i] !== expectedValue[i]) {
36
+ return false;
37
+ }
38
+ }
39
+ continue;
40
+ }
41
+ if (actualValue !== expectedValue) {
42
+ return false;
43
+ }
44
+ }
45
+ return true;
46
+ }
47
+
48
+ function decodeRecorderSemantic(context) {
49
+ return context.hasDevice === true
50
+ && context.debug !== true
51
+ && context.disableCommandBatching !== true
52
+ && context.kvLayout !== 'bdpa_paged';
53
+ }
54
+
55
+ function batchDecodeSemantic(context) {
56
+ return context.batchSize > 1
57
+ && context.useGPU === true
58
+ && context.gpuSamplingAvailable === true
59
+ && context.disableMultiTokenDecode !== true
60
+ && context.disableCommandBatching !== true
61
+ && context.isBdpaPagedLayout !== true
62
+ && context.finitenessFallbackWindowOpen !== true;
63
+ }
64
+
65
+ function enumerateDecodeRecorderContexts() {
66
+ const values = [true, false];
67
+ const kvLayouts = ['bdpa_paged', 'paged', null];
68
+ const contexts = [];
69
+ for (const hasDevice of values) {
70
+ for (const debug of values) {
71
+ for (const disableCommandBatching of values) {
72
+ for (const kvLayout of kvLayouts) {
73
+ contexts.push({
74
+ hasDevice,
75
+ debug,
76
+ disableCommandBatching,
77
+ kvLayout,
78
+ });
79
+ }
80
+ }
81
+ }
82
+ }
83
+ return contexts;
84
+ }
85
+
86
+ function enumerateBatchDecodeContexts() {
87
+ const values = [true, false];
88
+ const batchSizes = [1, 2];
89
+ const contexts = [];
90
+ for (const batchSize of batchSizes) {
91
+ for (const useGPU of values) {
92
+ for (const gpuSamplingAvailable of values) {
93
+ for (const disableMultiTokenDecode of values) {
94
+ for (const disableCommandBatching of values) {
95
+ for (const isBdpaPagedLayout of values) {
96
+ for (const finitenessFallbackWindowOpen of values) {
97
+ contexts.push({
98
+ batchSize,
99
+ useGPU,
100
+ gpuSamplingAvailable,
101
+ disableMultiTokenDecode,
102
+ disableCommandBatching,
103
+ isBdpaPagedLayout,
104
+ finitenessFallbackWindowOpen,
105
+ });
106
+ }
107
+ }
108
+ }
109
+ }
110
+ }
111
+ }
112
+ }
113
+ return contexts;
114
+ }
115
+
116
+ function checkRuleShape(rules, expectedFirstMatch, label) {
117
+ if (!Array.isArray(rules)) {
118
+ return {
119
+ ok: false,
120
+ errors: [`[ExecutionRulesContract] ${label} must be an array.`],
121
+ };
122
+ }
123
+ if (rules.length !== 2) {
124
+ return {
125
+ ok: false,
126
+ errors: [`[ExecutionRulesContract] ${label} must contain exactly 2 rules; got ${rules.length}.`],
127
+ };
128
+ }
129
+ const [firstRule, secondRule] = rules;
130
+ const errors = [];
131
+ if (!matchesExactObject(firstRule?.match, expectedFirstMatch) || firstRule?.value !== true) {
132
+ errors.push(`[ExecutionRulesContract] ${label} first rule drifted from the expected enabling predicate.`);
133
+ }
134
+ if (!matchesExactObject(secondRule?.match, {}) || secondRule?.value !== false) {
135
+ errors.push(`[ExecutionRulesContract] ${label} fallback rule must be { match: {}, value: false }.`);
136
+ }
137
+ return {
138
+ ok: errors.length === 0,
139
+ errors,
140
+ };
141
+ }
142
+
143
+ function checkRuleSemantics(rules, contexts, expectedValue, label) {
144
+ const errors = [];
145
+ for (const context of contexts) {
146
+ const actual = selectByRules(rules, context);
147
+ const expected = expectedValue(context);
148
+ if (actual !== expected) {
149
+ errors.push(
150
+ `[ExecutionRulesContract] ${label} mismatched context ${JSON.stringify(context)}: ` +
151
+ `expected ${JSON.stringify(expected)}, got ${JSON.stringify(actual)}.`
152
+ );
153
+ break;
154
+ }
155
+ }
156
+ return {
157
+ ok: errors.length === 0,
158
+ errors,
159
+ sampledContexts: contexts.length,
160
+ };
161
+ }
162
+
163
+ export function buildInferenceExecutionRulesContractArtifact(ruleGroup) {
164
+ const errors = [];
165
+ const checks = [];
166
+ const decodeRules = ruleGroup?.decodeRecorderEnabled;
167
+ const batchRules = ruleGroup?.batchDecodeEnabled;
168
+
169
+ const decodeShape = checkRuleShape(
170
+ decodeRules,
171
+ {
172
+ hasDevice: true,
173
+ debug: false,
174
+ disableCommandBatching: false,
175
+ kvLayout: { neq: 'bdpa_paged' },
176
+ },
177
+ 'decodeRecorderEnabled'
178
+ );
179
+ errors.push(...decodeShape.errors);
180
+ checks.push({
181
+ id: 'inference.execution.decodeRecorderEnabled.shape',
182
+ ok: decodeShape.ok,
183
+ });
184
+
185
+ const decodeSemantics = Array.isArray(decodeRules)
186
+ ? checkRuleSemantics(
187
+ decodeRules,
188
+ enumerateDecodeRecorderContexts(),
189
+ decodeRecorderSemantic,
190
+ 'decodeRecorderEnabled'
191
+ )
192
+ : { ok: false, errors: ['[ExecutionRulesContract] decodeRecorderEnabled is unavailable for semantic check.'], sampledContexts: 0 };
193
+ errors.push(...decodeSemantics.errors);
194
+ checks.push({
195
+ id: 'inference.execution.decodeRecorderEnabled.semantics',
196
+ ok: decodeSemantics.ok,
197
+ });
198
+
199
+ const batchShape = checkRuleShape(
200
+ batchRules,
201
+ {
202
+ batchSize: { gt: 1 },
203
+ useGPU: true,
204
+ gpuSamplingAvailable: true,
205
+ disableMultiTokenDecode: false,
206
+ disableCommandBatching: false,
207
+ isBdpaPagedLayout: false,
208
+ finitenessFallbackWindowOpen: false,
209
+ },
210
+ 'batchDecodeEnabled'
211
+ );
212
+ errors.push(...batchShape.errors);
213
+ checks.push({
214
+ id: 'inference.execution.batchDecodeEnabled.shape',
215
+ ok: batchShape.ok,
216
+ });
217
+
218
+ const batchSemantics = Array.isArray(batchRules)
219
+ ? checkRuleSemantics(
220
+ batchRules,
221
+ enumerateBatchDecodeContexts(),
222
+ batchDecodeSemantic,
223
+ 'batchDecodeEnabled'
224
+ )
225
+ : { ok: false, errors: ['[ExecutionRulesContract] batchDecodeEnabled is unavailable for semantic check.'], sampledContexts: 0 };
226
+ errors.push(...batchSemantics.errors);
227
+ checks.push({
228
+ id: 'inference.execution.batchDecodeEnabled.semantics',
229
+ ok: batchSemantics.ok,
230
+ });
231
+
232
+ return {
233
+ schemaVersion: 1,
234
+ source: 'doppler',
235
+ ok: errors.length === 0,
236
+ checks,
237
+ errors,
238
+ stats: {
239
+ decodeRecorderRules: Array.isArray(decodeRules) ? decodeRules.length : 0,
240
+ batchDecodeRules: Array.isArray(batchRules) ? batchRules.length : 0,
241
+ decodeRecorderContexts: decodeSemantics.sampledContexts,
242
+ batchDecodeContexts: batchSemantics.sampledContexts,
243
+ },
244
+ };
245
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "variant": [
3
+ { "match": { "isF16": true }, "value": "default_f16" },
4
+ { "match": {}, "value": "default" }
5
+ ]
6
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "variant": [
3
+ { "match": { "isF16": true }, "value": "default_f16" },
4
+ { "match": {}, "value": "default" }
5
+ ]
6
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "variant": [
3
+ { "match": { "dtype": "f16" }, "value": "default_f16" },
4
+ { "match": {}, "value": "default" }
5
+ ]
6
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "variant": [
3
+ { "match": { "dtype": "f16" }, "value": "default_f16" },
4
+ { "match": {}, "value": "default" }
5
+ ]
6
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "variant": [
3
+ { "match": { "isF16": true }, "value": "default_f16" },
4
+ { "match": {}, "value": "default" }
5
+ ]
6
+ }
@@ -0,0 +1,17 @@
1
+ export interface LayerPatternContractArtifact {
2
+ schemaVersion: 1;
3
+ source: 'doppler';
4
+ ok: boolean;
5
+ checks: Array<{ id: string; ok: boolean }>;
6
+ errors: string[];
7
+ stats: {
8
+ patternKindRules: number;
9
+ layerTypeRules: number;
10
+ patternKindContexts: number;
11
+ layerTypeContexts: number;
12
+ };
13
+ }
14
+
15
+ export declare function buildLayerPatternContractArtifact(
16
+ ruleGroup: Record<string, unknown> | null | undefined
17
+ ): LayerPatternContractArtifact;