npm - @simulatte/doppler - Versions diffs - 0.1.5 → 0.1.6 - Mend

@simulatte/doppler 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (130) hide show

package/README.md +23 -8
package/package.json +7 -4
package/src/config/kernels/kernel-ref-digests.js +39 -39
package/src/config/kernels/registry.json +42 -2
package/src/config/loader.js +31 -2
package/src/config/merge.js +18 -0
package/src/config/presets/models/qwen3.json +9 -2
package/src/config/presets/models/transformer.json +5 -0
package/src/config/required-inference-fields-contract-check.js +6 -0
package/src/config/schema/inference-defaults.schema.js +3 -0
package/src/config/schema/inference.schema.d.ts +9 -0
package/src/config/schema/kernel-path.schema.d.ts +6 -0
package/src/config/schema/manifest.schema.d.ts +6 -0
package/src/config/schema/manifest.schema.js +3 -0
package/src/converter/rope-config.js +42 -0
package/src/gpu/device.js +58 -0
package/src/gpu/kernels/attention.js +98 -0
package/src/gpu/kernels/bias_add.wgsl +8 -6
package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
package/src/gpu/kernels/conv2d.js +1 -1
package/src/gpu/kernels/conv2d.wgsl +7 -8
package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
package/src/gpu/kernels/depthwise_conv2d.js +2 -1
package/src/gpu/kernels/depthwise_conv2d.wgsl +6 -9
package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +6 -9
package/src/gpu/kernels/grouped_pointwise_conv2d.js +2 -1
package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +6 -9
package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +6 -9
package/src/gpu/kernels/matmul.js +25 -0
package/src/gpu/kernels/pixel_shuffle.js +1 -1
package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
package/src/gpu/kernels/relu.js +15 -2
package/src/gpu/kernels/relu.wgsl +2 -1
package/src/gpu/kernels/relu_f16.wgsl +2 -1
package/src/gpu/kernels/repeat_channels.js +1 -1
package/src/gpu/kernels/repeat_channels.wgsl +4 -5
package/src/gpu/kernels/repeat_channels_f16.wgsl +4 -5
package/src/gpu/kernels/residual.js +44 -8
package/src/gpu/kernels/residual.wgsl +6 -3
package/src/gpu/kernels/residual_f16.wgsl +2 -1
package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
package/src/gpu/kernels/residual_vec4.wgsl +2 -1
package/src/gpu/kernels/rmsnorm.js +58 -6
package/src/gpu/kernels/rmsnorm.wgsl +14 -6
package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
package/src/gpu/kernels/rope.d.ts +2 -0
package/src/gpu/kernels/rope.js +11 -1
package/src/gpu/kernels/rope.wgsl +56 -40
package/src/gpu/kernels/sana_linear_attention.js +1 -2
package/src/gpu/kernels/sana_linear_attention_apply.wgsl +4 -5
package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +4 -5
package/src/gpu/kernels/sana_linear_attention_summary.wgsl +4 -0
package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +4 -0
package/src/gpu/kernels/silu.d.ts +1 -0
package/src/gpu/kernels/silu.js +32 -14
package/src/gpu/kernels/silu.wgsl +19 -9
package/src/gpu/kernels/silu_f16.wgsl +19 -9
package/src/gpu/kernels/transpose.js +15 -2
package/src/gpu/kernels/transpose.wgsl +5 -6
package/src/gpu/kernels/upsample2d.js +2 -1
package/src/gpu/kernels/upsample2d.wgsl +6 -9
package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
package/src/gpu/kernels/utils.js +16 -1
package/src/inference/browser-harness.js +47 -1
package/src/inference/pipelines/diffusion/pipeline.js +15 -6
package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +5 -0
package/src/inference/pipelines/diffusion/text-encoder-gpu.js +27 -15
package/src/inference/pipelines/text/attention/record.js +11 -2
package/src/inference/pipelines/text/attention/run.js +11 -2
package/src/inference/pipelines/text/chat-format.js +25 -1
package/src/inference/pipelines/text/config.d.ts +4 -0
package/src/inference/pipelines/text/config.js +68 -1
package/src/inference/pipelines/text/execution-plan.js +23 -31
package/src/inference/pipelines/text/execution-v0.js +29 -2
package/src/inference/pipelines/text/ffn/standard.js +3 -0
package/src/inference/pipelines/text/init.d.ts +4 -0
package/src/inference/pipelines/text/init.js +56 -9
package/src/inference/pipelines/text/layer.js +11 -0
package/src/inference/pipelines/text.js +4 -0
package/src/inference/tokenizers/bundled.js +156 -33
package/src/rules/tooling/command-runtime.rules.json +18 -0
package/src/tooling/command-api.d.ts +27 -1
package/src/tooling/command-api.js +142 -3
package/src/tooling/node-browser-command-runner.d.ts +4 -0
package/src/tooling/node-browser-command-runner.js +58 -3
package/src/tooling/node-command-runner.js +15 -0
package/src/tooling/node-webgpu.js +9 -87
package/src/training/checkpoint-watch.d.ts +7 -0
package/src/training/checkpoint-watch.js +106 -0
package/src/training/checkpoint.d.ts +6 -1
package/src/training/checkpoint.js +12 -2
package/src/training/distillation/artifacts.d.ts +71 -0
package/src/training/distillation/artifacts.js +132 -0
package/src/training/distillation/checkpoint-watch.d.ts +10 -0
package/src/training/distillation/checkpoint-watch.js +57 -0
package/src/training/distillation/dataset.d.ts +59 -0
package/src/training/distillation/dataset.js +337 -0
package/src/training/distillation/eval.d.ts +34 -0
package/src/training/distillation/eval.js +310 -0
package/src/training/distillation/index.d.ts +29 -0
package/src/training/distillation/index.js +29 -0
package/src/training/distillation/runtime.d.ts +20 -0
package/src/training/distillation/runtime.js +121 -0
package/src/training/distillation/scoreboard.d.ts +6 -0
package/src/training/distillation/scoreboard.js +8 -0
package/src/training/distillation/stage-a.d.ts +45 -0
package/src/training/distillation/stage-a.js +338 -0
package/src/training/distillation/stage-b.d.ts +24 -0
package/src/training/distillation/stage-b.js +20 -0
package/src/training/index.d.ts +10 -0
package/src/training/index.js +10 -0
package/src/training/lora-pipeline.d.ts +40 -0
package/src/training/lora-pipeline.js +796 -0
package/src/training/operator-artifacts.d.ts +62 -0
package/src/training/operator-artifacts.js +140 -0
package/src/training/operator-command.d.ts +5 -0
package/src/training/operator-command.js +453 -0
package/src/training/operator-eval.d.ts +48 -0
package/src/training/operator-eval.js +230 -0
package/src/training/operator-scoreboard.d.ts +5 -0
package/src/training/operator-scoreboard.js +44 -0
package/src/training/runner.d.ts +52 -0
package/src/training/runner.js +29 -4
package/src/training/suite.d.ts +112 -0
package/src/training/suite.js +9 -9
package/src/training/workloads.d.ts +164 -0
package/src/training/workloads.js +539 -0
package/src/version.js +1 -1
package/tools/doppler-cli.js +137 -40

package/src/inference/pipelines/text/execution-plan.js CHANGED Viewed

@@ -42,56 +42,48 @@ function resolveFallbackActivationDtype(primaryActivationDtype) {
 function resolveFallbackKernelPath(primaryKernelPath) {
   const primaryKernelPathId = primaryKernelPath?.id ?? null;
   if (!primaryKernelPathId) {
-    return {
-      kernelPath: null,
-      kernelPathId: null,
-      kernelPathSource: 'none',
-    };
+    throw new Error(
+      '[ExecutionPlan] F16 finiteness fallback requires a primary kernel path with a stable id. ' +
+      'Add a registered kernelPath id and a finiteness fallback rule.'
+    );
   }
-  const primaryKernelPathIsObject = typeof primaryKernelPath === 'object' && primaryKernelPath !== null;
+  const explicitFallbackKernelPathId = typeof primaryKernelPath?.finitenessFallbackKernelPathId === 'string'
+    && primaryKernelPath.finitenessFallbackKernelPathId.length > 0
+    ? primaryKernelPath.finitenessFallbackKernelPathId
+    : null;
-  const fallbackKernelPathId = selectRuleValue(
+  const fallbackKernelPathId = explicitFallbackKernelPathId ?? selectRuleValue(
     'inference',
     'kernelPath',
     'finitenessFallback',
     { kernelPathId: primaryKernelPathId }
   );
-  const resolvedKernelPathId = typeof fallbackKernelPathId === 'string' && fallbackKernelPathId.length > 0
-    ? fallbackKernelPathId
-    : primaryKernelPathId;
-  const kernelPathSource = resolvedKernelPathId === primaryKernelPathId ? 'self' : 'rule';
+  if (typeof fallbackKernelPathId !== 'string' || fallbackKernelPathId.length === 0) {
+    throw new Error(
+      `[ExecutionPlan] Missing finiteness fallback kernel path mapping for "${primaryKernelPathId}". ` +
+      'Add an explicit rule in src/rules/inference/kernel-path.rules.json.'
+    );
+  }
-  if (kernelPathSource === 'self') {
-    log.warn(
-      'Pipeline',
-      `[ExecutionPlan] No finiteness fallback kernel path mapping for "${primaryKernelPathId}"; using primary kernel path.`
+  if (fallbackKernelPathId === primaryKernelPathId) {
+    throw new Error(
+      `[ExecutionPlan] Invalid finiteness fallback mapping for "${primaryKernelPathId}": ` +
+      `fallback kernel path resolves to itself. Add an explicit widening path.`
     );
   }
   try {
-    const kernelPath = resolveKernelPath(resolvedKernelPathId);
+    const kernelPath = resolveKernelPath(fallbackKernelPathId);
     return {
       kernelPath,
-      kernelPathId: resolvedKernelPathId,
-      kernelPathSource,
+      kernelPathId: fallbackKernelPathId,
+      kernelPathSource: 'rule',
     };
   } catch (error) {
-    if (primaryKernelPathIsObject) {
-      log.warn(
-        'Pipeline',
-        `[ExecutionPlan] Failed to resolve finiteness fallback kernel path "${resolvedKernelPathId}" ` +
-        `for "${primaryKernelPathId}", using inline kernel path as fallback. ${error?.message || error}`
-      );
-      return {
-        kernelPath: primaryKernelPath,
-        kernelPathId: primaryKernelPathId,
-        kernelPathSource,
-      };
-    }
     throw new Error(
-      `[ExecutionPlan] Failed to resolve finiteness fallback kernel path "${resolvedKernelPathId}" ` +
+      `[ExecutionPlan] Failed to resolve finiteness fallback kernel path "${fallbackKernelPathId}" ` +
       `(from "${primaryKernelPathId}"): ${error?.message || error}`
     );
   }

package/src/inference/pipelines/text/execution-v0.js CHANGED Viewed

@@ -7,6 +7,7 @@ import {
   resolveExecutionV0KVIO,
   resolveExecutionV0Precision,
 } from '../../../config/execution-v0-contract-check.js';
+import { selectRuleValue } from '../../../rules/rule-registry.js';
 import {
   EXECUTION_V0_SCHEMA_ID,
   DEFAULT_EXECUTION_V0_POLICIES,
@@ -856,7 +857,7 @@ function assertInlineKernelPathSessionCompatibility(path, sessionDefaults) {
   }
 }
-function buildInlineKernelPath(steps, sessionDefaults, modelId, numLayers) {
+function buildInlineKernelPath(steps, sessionDefaults, modelId, numLayers, finitenessFallbackKernelPathId = null) {
   const activationDtype = normalizeDtype(
     sessionDefaults?.compute?.defaults?.activationDtype ?? 'f16',
     'sessionDefaults.compute.defaults.activationDtype'
@@ -877,6 +878,9 @@ function buildInlineKernelPath(steps, sessionDefaults, modelId, numLayers) {
     description: 'Generated from manifest.inference.execution.steps',
     activationDtype,
     kvDtype,
+    ...(typeof finitenessFallbackKernelPathId === 'string' && finitenessFallbackKernelPathId.length > 0
+      ? { finitenessFallbackKernelPathId }
+      : {}),
     decode: {
       steps: decodeSteps.length > 0 ? decodeSteps : prefillSteps,
     },
@@ -1107,7 +1111,26 @@ export function compileExecutionV0(options = {}) {
     ...resolvedDecodeSteps.filter((step) => step.phase === 'decode'),
   ];
-  const kernelPath = buildInlineKernelPath(patchedSteps, resolvedSession, modelId, numLayers);
+  const defaultKernelPathId = typeof manifestInference.defaultKernelPath === 'string'
+    && manifestInference.defaultKernelPath.trim().length > 0
+    ? manifestInference.defaultKernelPath.trim()
+    : null;
+  const finitenessFallbackKernelPathId = defaultKernelPathId
+    ? selectRuleValue(
+      'inference',
+      'kernelPath',
+      'finitenessFallback',
+      { kernelPathId: defaultKernelPathId }
+    )
+    : null;
+  const kernelPath = buildInlineKernelPath(
+    patchedSteps,
+    resolvedSession,
+    modelId,
+    numLayers,
+    finitenessFallbackKernelPathId
+  );
   const layerPipeline = buildLayerPipelineFromExecution(resolvedSteps);
   const sessionPatch = buildSessionRuntimePatch(resolvedSession);
   const modelOverrides = buildModelRuntimeOverrides(manifestInference);
@@ -1162,6 +1185,10 @@ export function applyExecutionV0RuntimeConfig(options = {}) {
   }
   const runtimeInferencePatch = { ...executionV0State.runtimeInferencePatch };
+  if (runtimeInference.kernelPath !== undefined) {
+    delete runtimeInferencePatch.kernelPath;
+    delete runtimeInferencePatch.kernelPathSource;
+  }
   if (runtimeInferencePatch.modelOverrides) {
     runtimeInferencePatch.modelOverrides = mergeRuntimeValues(
       runtimeInferencePatch.modelOverrides,

package/src/inference/pipelines/text/ffn/standard.js CHANGED Viewed

@@ -42,6 +42,7 @@ export async function processFFNStandard(
     hiddenSize,
     probes: context.debugProbes,
     recorder,
+    dtype: normedTensor.dtype,
   });
   // 2. FFN
@@ -58,6 +59,7 @@ export async function processFFNStandard(
     hiddenSize,
     probes: context.debugProbes,
     recorder,
+    dtype: ffnOutput.dtype,
   });
   // 3. Residual add
@@ -72,6 +74,7 @@ export async function processFFNStandard(
     hiddenSize,
     probes: context.debugProbes,
     recorder,
+    dtype: output.dtype,
   });
   if (normedTensor !== postAttn) {

package/src/inference/pipelines/text/init.d.ts CHANGED Viewed

@@ -71,9 +71,13 @@ export interface PipelineContexts {
  */
 export interface RoPEConfig {
   headDim: number;
+  rotaryDim?: number;
   maxSeqLen: number;
   ropeTheta: number;
   ropeLocalTheta?: number | null;
+  mropeInterleaved?: boolean;
+  mropeSection?: number[] | null;
+  partialRotaryFactor?: number | null;
   ropeScale: number;
   ropeLocalScale?: number;
   ropeScalingType?: string | null;

package/src/inference/pipelines/text/init.js CHANGED Viewed

@@ -206,13 +206,45 @@ function isSameRoPEScalingConfig(
       === (rightScaling?.original_max_position_embeddings ?? null);
 }
+function resolveRotaryDim(headDim, rotaryDim, partialRotaryFactor) {
+  if (rotaryDim != null) {
+    if (!Number.isFinite(rotaryDim) || rotaryDim <= 0 || (rotaryDim % 2) !== 0) {
+      throw new Error(`RoPE rotary dim must be a positive even integer; got "${rotaryDim}".`);
+    }
+    if (rotaryDim > headDim) {
+      throw new Error(`RoPE rotary dim ${rotaryDim} cannot exceed headDim ${headDim}.`);
+    }
+    return rotaryDim;
+  }
+  if (partialRotaryFactor == null) {
+    return headDim;
+  }
+  if (!Number.isFinite(partialRotaryFactor) || partialRotaryFactor <= 0 || partialRotaryFactor > 1) {
+    throw new Error(
+      `RoPE partialRotaryFactor must be a number in (0, 1]; got "${partialRotaryFactor}".`
+    );
+  }
+  const resolved = Math.trunc(headDim * partialRotaryFactor);
+  if (resolved <= 0 || (resolved % 2) !== 0) {
+    throw new Error(
+      `RoPE partialRotaryFactor=${partialRotaryFactor} with headDim=${headDim} resolves ` +
+      `to rotaryDim=${resolved}, but rotaryDim must be a positive even integer.`
+    );
+  }
+  return resolved;
+}
 export async function initRoPEFrequencies(config, useGPU) {
   const {
     headDim,
+    rotaryDim,
     maxSeqLen,
     ropeTheta,
     ropeLocalTheta,
+    mropeInterleaved,
+    mropeSection,
+    partialRotaryFactor,
     ropeScale,
     ropeLocalScale,
     ropeScalingType,
@@ -230,14 +262,23 @@ export async function initRoPEFrequencies(config, useGPU) {
   const resolvedLocalTheta = ropeLocalTheta ?? ropeTheta;
   const resolvedLocalScalingType = ropeLocalScalingType ?? ropeScalingType;
   const resolvedLocalScaling = ropeLocalScaling ?? ropeScaling;
+  const resolvedRotaryDim = resolveRotaryDim(headDim, rotaryDim, partialRotaryFactor);
+  const halfDim = resolvedRotaryDim / 2;
+  if (mropeInterleaved === true && Array.isArray(mropeSection)) {
+    const expandedDim = mropeSection.reduce((sum, entry) => sum + entry, 0) * 2;
+    if (expandedDim !== resolvedRotaryDim) {
+      throw new Error(
+        `RoPE mropeSection expands to ${expandedDim} dims, but rotaryDim is ${resolvedRotaryDim}.`
+      );
+    }
+  }
-  const halfDim = headDim / 2;
   const isYarn = ropeScalingType === 'yarn';
   const isLocalYarn = resolvedLocalScalingType === 'yarn';
   // Compute global (full_attention) frequencies
   const globalFreqs = computeRoPEFreqsForTheta(
-    ropeTheta, headDim, maxSeqLen, ropeScale, ropeScalingType, ropeScaling
+    ropeTheta, resolvedRotaryDim, maxSeqLen, ropeScale, ropeScalingType, ropeScaling
   );
   // Compute local (sliding_attention) frequencies if different from global.
@@ -256,7 +297,7 @@ export async function initRoPEFrequencies(config, useGPU) {
   if (hasDistinctLocalTheta || hasDistinctLocalScaling) {
     localFreqs = computeRoPEFreqsForTheta(
       resolvedLocalTheta,
-      headDim,
+      resolvedRotaryDim,
       maxSeqLen,
       resolvedLocalScale,
       resolvedLocalScalingType,
@@ -303,9 +344,10 @@ export async function initRoPEFrequencies(config, useGPU) {
     log.debug(
       'Pipeline',
-      `RoPE frequencies initialized (GPU): ${maxSeqLen} positions, dim=${halfDim}, headDim=${headDim}, ` +
+      `RoPE frequencies initialized (GPU): ${maxSeqLen} positions, dim=${halfDim}, headDim=${headDim}, rotaryDim=${resolvedRotaryDim}, ` +
       `theta=${ropeTheta}${hasDistinctLocalTheta ? `, localTheta=${resolvedLocalTheta}` : ''}, ` +
-      `scaling=${ropeScalingType ?? 'none'}:${ropeScale}${hasDistinctLocalScaling ? `, localScaling=${resolvedLocalScalingType ?? 'none'}:${resolvedLocalScale}` : ''}`
+      `scaling=${ropeScalingType ?? 'none'}:${ropeScale}${hasDistinctLocalScaling ? `, localScaling=${resolvedLocalScalingType ?? 'none'}:${resolvedLocalScale}` : ''}, ` +
+      `interleaved=${mropeInterleaved === true}`
     );
     return {
@@ -318,9 +360,10 @@ export async function initRoPEFrequencies(config, useGPU) {
   log.debug(
     'Pipeline',
-    `RoPE frequencies initialized (CPU): ${maxSeqLen} positions, dim=${halfDim}, headDim=${headDim}, ` +
+    `RoPE frequencies initialized (CPU): ${maxSeqLen} positions, dim=${halfDim}, headDim=${headDim}, rotaryDim=${resolvedRotaryDim}, ` +
     `theta=${ropeTheta}${hasDistinctLocalTheta ? `, localTheta=${resolvedLocalTheta}` : ''}, ` +
-    `scaling=${ropeScalingType ?? 'none'}:${ropeScale}${hasDistinctLocalScaling ? `, localScaling=${resolvedLocalScalingType ?? 'none'}:${resolvedLocalScale}` : ''}`
+    `scaling=${ropeScalingType ?? 'none'}:${ropeScale}${hasDistinctLocalScaling ? `, localScaling=${resolvedLocalScalingType ?? 'none'}:${resolvedLocalScale}` : ''}, ` +
+    `interleaved=${mropeInterleaved === true}`
   );
   return {
@@ -688,6 +731,10 @@ function applyChatMLTemplate(prompt) {
   return `<|im_start|>user\n${prompt}<|im_end|>\n<|im_start|>assistant\n`;
 }
+function applyQwenTemplate(prompt) {
+  return `<|im_start|>user\n${prompt}<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n`;
+}
 function applyTranslateGemmaTemplate() {
   throw new Error(
     'TranslateGemma template requires structured messages. ' +
@@ -702,7 +749,7 @@ const PROMPT_TEMPLATES = {
   'llama3': applyHeaderBasedTemplate,
   'gpt-oss': applyChannelBasedTemplate,
   'chatml': applyChatMLTemplate,
-  'qwen': applyChatMLTemplate,  // Qwen uses ChatML format
+  'qwen': applyQwenTemplate,
   'translategemma': applyTranslateGemmaTemplate,
 };
@@ -721,7 +768,7 @@ export function applyChatTemplate(prompt, templateType) {
 export const applyGemmaChatTemplate = applyTurnBasedTemplate;
 export const applyLlama3ChatTemplate = applyHeaderBasedTemplate;
 export const applyGptOssChatTemplate = applyChannelBasedTemplate;
-export const applyQwenChatTemplate = applyChatMLTemplate;
+export const applyQwenChatTemplate = applyQwenTemplate;
 export function isStopToken(token, stopTokenIds, eosTokenId) {

package/src/inference/pipelines/text/layer.js CHANGED Viewed

@@ -259,6 +259,8 @@ export async function processLayerGPU(layerIdx, inputBuffer, numTokens, isPrefil
       attentionOutputGate: config.attentionOutputGate,
       causalAttention: config.causalAttention,
       rmsNormWeightOffset: config.rmsNormWeightOffset,
+      ropeRotaryDim: config.ropeRotaryDim,
+      ropeInterleaved: config.ropeInterleaved,
       tokenIds: context.currentTokenIds ?? null,
       kernelPath: context.kernelPath ?? null,
       disableRoPE,
@@ -661,6 +663,8 @@ async function processLayerPlanGPU(layerIdx, inputBuffer, numTokens, isPrefill,
             attentionOutputGate: config.attentionOutputGate,
             causalAttention: config.causalAttention,
             rmsNormWeightOffset: config.rmsNormWeightOffset,
+            ropeRotaryDim: config.ropeRotaryDim,
+            ropeInterleaved: config.ropeInterleaved,
             tokenIds: context.currentTokenIds ?? null,
             skipInputNorm: step.skipInputNorm === true,
             activationDtype,
@@ -690,6 +694,7 @@ async function processLayerPlanGPU(layerIdx, inputBuffer, numTokens, isPrefill,
               hiddenSize,
               probes: context.debugProbes,
               recorder,
+              dtype: outputDtype,
             });
           }
           break;
@@ -733,6 +738,7 @@ async function processLayerPlanGPU(layerIdx, inputBuffer, numTokens, isPrefill,
               hiddenSize,
               probes: context.debugProbes,
               recorder,
+              dtype: outputDtype,
             });
           }
           break;
@@ -767,6 +773,7 @@ async function processLayerPlanGPU(layerIdx, inputBuffer, numTokens, isPrefill,
               hiddenSize,
               probes: context.debugProbes,
               recorder,
+              dtype: outputDtype,
             });
           }
           break;
@@ -801,6 +808,7 @@ async function processLayerPlanGPU(layerIdx, inputBuffer, numTokens, isPrefill,
               hiddenSize,
               probes: context.debugProbes,
               recorder,
+              dtype: outputDtype,
             });
           }
           break;
@@ -825,6 +833,7 @@ async function processLayerPlanGPU(layerIdx, inputBuffer, numTokens, isPrefill,
               hiddenSize,
               probes: context.debugProbes,
               recorder,
+              dtype: outputDtype,
             });
           }
           break;
@@ -851,6 +860,7 @@ async function processLayerPlanGPU(layerIdx, inputBuffer, numTokens, isPrefill,
               hiddenSize,
               probes: context.debugProbes,
               recorder,
+              dtype: toDtype,
             });
           }
           break;
@@ -880,6 +890,7 @@ async function processLayerPlanGPU(layerIdx, inputBuffer, numTokens, isPrefill,
     hiddenSize,
     probes: context.debugProbes,
     recorder,
+    dtype: getSlotDtype('state') ?? activationDtype,
   });
   const computeConfig = context.runtimeComputeConfig ?? null;

package/src/inference/pipelines/text.js CHANGED Viewed

@@ -299,9 +299,13 @@ export class InferencePipeline extends PipelineState {
     const maxSeqLen = config.maxSeqLen;
     const ropeBuffers = await initRoPEFrequencies({
       headDim: config.headDim,
+      rotaryDim: config.ropeRotaryDim,
       maxSeqLen,
       ropeTheta: config.ropeTheta,
       ropeLocalTheta: config.ropeLocalTheta,
+      mropeInterleaved: config.ropeInterleaved,
+      mropeSection: config.mropeSection,
+      partialRotaryFactor: config.partialRotaryFactor,
       ropeScale: config.ropeScale,
       ropeLocalScale: config.ropeLocalScale,
       ropeScalingType: config.ropeScalingType,

package/src/inference/tokenizers/bundled.js CHANGED Viewed

@@ -64,6 +64,68 @@ function resolveSpecialTokens(specialTokensRaw, fallbackTokens, vocab) {
   return resolved;
 }
+function resolveByteLevelPretokenizerConfig(preTokenizer) {
+  if (!preTokenizer || typeof preTokenizer !== 'object') {
+    return {
+      useByteLevel: false,
+      addPrefixSpace: null,
+    };
+  }
+  if (preTokenizer.type === 'ByteLevel') {
+    return {
+      useByteLevel: true,
+      addPrefixSpace: preTokenizer.add_prefix_space === true,
+    };
+  }
+  if (preTokenizer.type === 'Sequence' && Array.isArray(preTokenizer.pretokenizers)) {
+    for (const entry of preTokenizer.pretokenizers) {
+      const resolved = resolveByteLevelPretokenizerConfig(entry);
+      if (resolved.useByteLevel) {
+        return resolved;
+      }
+    }
+  }
+  return {
+    useByteLevel: false,
+    addPrefixSpace: null,
+  };
+}
+function registerAddedTokens(addedTokens, vocab, reverseVocab, patterns, specialTokenIds, derivedSpecialTokens = null) {
+  let maxId = -1;
+  for (const token of addedTokens) {
+    const content = token?.content;
+    const id = typeof token?.id === 'number' ? token.id : parseInt(token?.id, 10);
+    if (!Number.isFinite(id) || !content) continue;
+    if (!vocab.has(content)) {
+      vocab.set(content, id);
+      reverseVocab.set(id, content);
+    }
+    if (id > maxId) maxId = id;
+    if (content.length > 1) {
+      patterns.push({ content, id });
+    }
+    if (token.special) {
+      specialTokenIds.add(id);
+      if (derivedSpecialTokens) {
+        if (derivedSpecialTokens.bos == null && (content === '<bos>' || content === '<s>' || content.includes('bos'))) {
+          derivedSpecialTokens.bos = id;
+        } else if (derivedSpecialTokens.eos == null && (content === '<eos>' || content === '</s>' || content.includes('eos'))) {
+          derivedSpecialTokens.eos = id;
+        } else if (derivedSpecialTokens.pad == null && (content === '<pad>' || content.includes('pad'))) {
+          derivedSpecialTokens.pad = id;
+        } else if (derivedSpecialTokens.unk == null && (content === '<unk>' || content.includes('unk'))) {
+          derivedSpecialTokens.unk = id;
+        }
+      }
+    }
+  }
+  return maxId;
+}
 export class TransformersTokenizer extends BaseTokenizer {
@@ -156,6 +218,10 @@ export class BundledTokenizer extends BaseTokenizer {
   #byteDecoder = null;
+  #byteEncoder = null;
+  #useByteLevelEncoding = false;
   constructor(config = {}) {
     // BundledTokenizer gets vocabSize from load(), so defer validation
@@ -199,9 +265,20 @@ export class BundledTokenizer extends BaseTokenizer {
     }
     this.#byteDecoder = new Map();
+    this.#byteEncoder = new Map();
     for (let i = 0; i < base.length; i++) {
       this.#byteDecoder.set(String.fromCodePoint(chars[i]), base[i]);
+      this.#byteEncoder.set(base[i], String.fromCodePoint(chars[i]));
+    }
+  }
+  #encodeByteLevelText(text) {
+    const bytes = new TextEncoder().encode(text);
+    let out = '';
+    for (const byte of bytes) {
+      out += this.#byteEncoder?.get(byte) ?? String.fromCharCode(byte);
     }
+    return out;
   }
@@ -290,30 +367,16 @@ export class BundledTokenizer extends BaseTokenizer {
       eos: null,
       unk: null,
     };
-    for (const token of addedTokens) {
-      const content = token.content;
-      const id = typeof token.id === 'number' ? token.id : parseInt( (token.id), 10);
-      if (!Number.isFinite(id) || !content) continue;
-      if (!this.#vocab.has(content)) {
-        this.#vocab.set(content, id);
-        this.#reverseVocab.set(id, content);
-      }
-      if (id > maxId) maxId = id;
-      if (token.special) {
-        specialTokenIds.add(id);
-        if (content.length > 1) {
-          specialTokenPatterns.push({ content, id });
-        }
-        if (derivedSpecialTokens.bos == null && (content === '<bos>' || content === '<s>' || content.includes('bos'))) {
-          derivedSpecialTokens.bos = id;
-        } else if (derivedSpecialTokens.eos == null && (content === '<eos>' || content === '</s>' || content.includes('eos'))) {
-          derivedSpecialTokens.eos = id;
-        } else if (derivedSpecialTokens.pad == null && (content === '<pad>' || content.includes('pad'))) {
-          derivedSpecialTokens.pad = id;
-        } else if (derivedSpecialTokens.unk == null && (content === '<unk>' || content.includes('unk'))) {
-          derivedSpecialTokens.unk = id;
-        }
-      }
+    const addedMaxId = registerAddedTokens(
+      addedTokens,
+      this.#vocab,
+      this.#reverseVocab,
+      specialTokenPatterns,
+      specialTokenIds,
+      derivedSpecialTokens
+    );
+    if (addedMaxId > maxId) {
+      maxId = addedMaxId;
     }
     const specialTokensRaw = hf.special_tokens_map || hf.specialTokens || hf.special_tokens || null;
@@ -351,6 +414,7 @@ export class BundledTokenizer extends BaseTokenizer {
     // Handle behavior flags (use HF config if present, else runtime defaults)
     const runtimeDefaults = getRuntimeConfig().inference.tokenizer;
+    const byteLevelPretokenizer = resolveByteLevelPretokenizerConfig(hf.pre_tokenizer);
     const configuredAddBosToken = this.addBosToken;
     const configuredAddEosToken = this.addEosToken;
     this.addBosToken =
@@ -378,9 +442,16 @@ export class BundledTokenizer extends BaseTokenizer {
     // - runtime config addSpacePrefix (user override or null for auto-detect)
     const decoderPrepend = hf.decoder?.prepend_scheme === 'always' || hf.decoder?.add_prefix_space === true;
     const normalizerPrepend = hf.normalizer?.prepend_scheme === 'always' || hf.normalizer?.add_prefix_space === true;
+    this.#useByteLevelEncoding = byteLevelPretokenizer.useByteLevel;
     const runtimeSpacePrefix = runtimeDefaults.addSpacePrefix;
     // Use explicit runtime config if set (non-null), otherwise auto-detect from tokenizer.json
-    this.#addSpacePrefix = runtimeSpacePrefix ?? model.add_prefix_space ?? model.add_dummy_prefix ?? decoderPrepend ?? normalizerPrepend ?? false;
+    this.#addSpacePrefix = runtimeSpacePrefix
+      ?? byteLevelPretokenizer.addPrefixSpace
+      ?? model.add_prefix_space
+      ?? model.add_dummy_prefix
+      ?? decoderPrepend
+      ?? normalizerPrepend
+      ?? false;
     log.debug('Tokenizer', `addSpacePrefix=${this.#addSpacePrefix} (runtime=${runtimeSpacePrefix}, model=${model.add_prefix_space ?? model.add_dummy_prefix}, decoder=${decoderPrepend}, normalizer=${normalizerPrepend})`);
     // Detect space prefix style by checking which WORD tokens exist in vocab
@@ -469,11 +540,47 @@ export class BundledTokenizer extends BaseTokenizer {
       this.#tokenTypes = tokenizerJson.tokenTypes;
     }
+    let maxId = -1;
+    for (const id of this.#vocab.values()) {
+      if (Number.isFinite(id) && id > maxId) {
+        maxId = id;
+      }
+    }
+    const addedTokens = Array.isArray(tokenizerJson.added_tokens) ? tokenizerJson.added_tokens : [];
+    const tokenPatterns = [];
+    const specialTokenIds = new Set();
+    const derivedSpecialTokens = {
+      pad: null,
+      bos: null,
+      eos: null,
+      unk: null,
+    };
+    const addedMaxId = registerAddedTokens(
+      addedTokens,
+      this.#vocab,
+      this.#reverseVocab,
+      tokenPatterns,
+      specialTokenIds,
+      derivedSpecialTokens
+    );
+    if (addedMaxId > maxId) {
+      maxId = addedMaxId;
+    }
     // Set special tokens - support both camelCase and snake_case formats
     const specialTokensRaw =  (tokenizerJson.specialTokens ||  (tokenizerJson).special_tokens);
-    this.specialTokens = resolveSpecialTokens(specialTokensRaw, this.specialTokens, this.#vocab);
+    this.specialTokens = resolveSpecialTokens(
+      specialTokensRaw,
+      {
+        ...derivedSpecialTokens,
+        ...this.specialTokens,
+      },
+      this.#vocab
+    );
     log.debug('Tokenizer', `Special tokens: BOS=${this.specialTokens.bos}, EOS=${this.specialTokens.eos}`);
-    this.#specialTokenIds = new Set();
+    this.#specialTokenIds = specialTokenIds;
+    this.#specialTokenPatterns = tokenPatterns;
     const builtinSpecials = [
       this.specialTokens.pad,
       this.specialTokens.bos,
@@ -485,8 +592,13 @@ export class BundledTokenizer extends BaseTokenizer {
         this.#specialTokenIds.add(id);
       }
     }
+    this.#specialTokenPatterns.sort((a, b) => b.content.length - a.content.length);
+    if (maxId >= 0) {
+      this.vocabSize = Math.max(this.vocabSize, maxId + 1);
+    }
     const runtimeDefaults = getRuntimeConfig().inference.tokenizer;
+    const byteLevelPretokenizer = resolveByteLevelPretokenizerConfig(tokenizerJson.pre_tokenizer);
     const configuredAddBosToken = this.addBosToken;
     const configuredAddEosToken = this.addEosToken;
     this.addBosToken =
@@ -505,9 +617,11 @@ export class BundledTokenizer extends BaseTokenizer {
     if (this.addEosToken && this.specialTokens.eos == null) {
       throw new Error('[Tokenizer] addEosToken is enabled but eos token is missing.');
     }
+    this.#useByteLevelEncoding = byteLevelPretokenizer.useByteLevel;
     // NOTE: Default to FALSE - first word shouldn't get space prefix
     // Space prefixes are only for words that follow a space in original text
-    this.#addSpacePrefix = tokenizerJson.addSpacePrefix === true;
+    this.#addSpacePrefix = tokenizerJson.addSpacePrefix === true
+      || byteLevelPretokenizer.addPrefixSpace === true;
     // Detect space prefix style based on vocab tokens
     // GPT-style uses 'Ġ' (U+0120), SentencePiece uses '▁' (U+2581)
@@ -548,7 +662,8 @@ export class BundledTokenizer extends BaseTokenizer {
       ids.push(this.specialTokens.bos);
     }
-    // Split text around special tokens and tokenize each segment
+    // Split text around literal added tokens and special tokens, then tokenize
+    // the remaining plain-text segments normally.
     const segments = this.#splitOnSpecialTokens(text);
     for (const seg of segments) {
       if (seg.isSpecial && seg.id !== undefined) {
@@ -690,11 +805,19 @@ export class BundledTokenizer extends BaseTokenizer {
     if (text.length === 0) return [];
     let normalized = text;
-    if (this.#addSpacePrefix && !normalized.startsWith(' ')) {
-      normalized = ` ${normalized}`;
+    let prefixed;
+    if (this.#useByteLevelEncoding) {
+      if (this.#addSpacePrefix && !normalized.startsWith(' ')) {
+        normalized = ` ${normalized}`;
+      }
+      prefixed = this.#encodeByteLevelText(normalized);
+    } else {
+      if (this.#addSpacePrefix && !normalized.startsWith(' ')) {
+        normalized = ` ${normalized}`;
+      }
+      const sp = this.#spacePrefixChar;
+      prefixed = normalized.replace(/ /g, sp);
     }
-    const sp = this.#spacePrefixChar;
-    const prefixed = normalized.replace(/ /g, sp);
     if (this.#mergeRanks.size === 0) {
       return this.#encodeBPEGreedy(prefixed);