npm - @simulatte/doppler - Versions diffs - 0.1.5 → 0.1.6 - Mend

@simulatte/doppler 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (130) hide show

package/README.md +23 -8
package/package.json +7 -4
package/src/config/kernels/kernel-ref-digests.js +39 -39
package/src/config/kernels/registry.json +42 -2
package/src/config/loader.js +31 -2
package/src/config/merge.js +18 -0
package/src/config/presets/models/qwen3.json +9 -2
package/src/config/presets/models/transformer.json +5 -0
package/src/config/required-inference-fields-contract-check.js +6 -0
package/src/config/schema/inference-defaults.schema.js +3 -0
package/src/config/schema/inference.schema.d.ts +9 -0
package/src/config/schema/kernel-path.schema.d.ts +6 -0
package/src/config/schema/manifest.schema.d.ts +6 -0
package/src/config/schema/manifest.schema.js +3 -0
package/src/converter/rope-config.js +42 -0
package/src/gpu/device.js +58 -0
package/src/gpu/kernels/attention.js +98 -0
package/src/gpu/kernels/bias_add.wgsl +8 -6
package/src/gpu/kernels/bias_add_f16.wgsl +8 -5
package/src/gpu/kernels/conv2d.js +1 -1
package/src/gpu/kernels/conv2d.wgsl +7 -8
package/src/gpu/kernels/conv2d_f16.wgsl +7 -8
package/src/gpu/kernels/depthwise_conv2d.js +2 -1
package/src/gpu/kernels/depthwise_conv2d.wgsl +6 -9
package/src/gpu/kernels/depthwise_conv2d_f16.wgsl +6 -9
package/src/gpu/kernels/grouped_pointwise_conv2d.js +2 -1
package/src/gpu/kernels/grouped_pointwise_conv2d.wgsl +6 -9
package/src/gpu/kernels/grouped_pointwise_conv2d_f16.wgsl +6 -9
package/src/gpu/kernels/matmul.js +25 -0
package/src/gpu/kernels/pixel_shuffle.js +1 -1
package/src/gpu/kernels/pixel_shuffle.wgsl +4 -5
package/src/gpu/kernels/pixel_shuffle_f16.wgsl +4 -5
package/src/gpu/kernels/relu.js +15 -2
package/src/gpu/kernels/relu.wgsl +2 -1
package/src/gpu/kernels/relu_f16.wgsl +2 -1
package/src/gpu/kernels/repeat_channels.js +1 -1
package/src/gpu/kernels/repeat_channels.wgsl +4 -5
package/src/gpu/kernels/repeat_channels_f16.wgsl +4 -5
package/src/gpu/kernels/residual.js +44 -8
package/src/gpu/kernels/residual.wgsl +6 -3
package/src/gpu/kernels/residual_f16.wgsl +2 -1
package/src/gpu/kernels/residual_f16_vec4.wgsl +2 -1
package/src/gpu/kernels/residual_vec4.wgsl +2 -1
package/src/gpu/kernels/rmsnorm.js +58 -6
package/src/gpu/kernels/rmsnorm.wgsl +14 -6
package/src/gpu/kernels/rmsnorm_f16.wgsl +10 -2
package/src/gpu/kernels/rope.d.ts +2 -0
package/src/gpu/kernels/rope.js +11 -1
package/src/gpu/kernels/rope.wgsl +56 -40
package/src/gpu/kernels/sana_linear_attention.js +1 -2
package/src/gpu/kernels/sana_linear_attention_apply.wgsl +4 -5
package/src/gpu/kernels/sana_linear_attention_apply_f16.wgsl +4 -5
package/src/gpu/kernels/sana_linear_attention_summary.wgsl +4 -0
package/src/gpu/kernels/sana_linear_attention_summary_f16.wgsl +4 -0
package/src/gpu/kernels/silu.d.ts +1 -0
package/src/gpu/kernels/silu.js +32 -14
package/src/gpu/kernels/silu.wgsl +19 -9
package/src/gpu/kernels/silu_f16.wgsl +19 -9
package/src/gpu/kernels/transpose.js +15 -2
package/src/gpu/kernels/transpose.wgsl +5 -6
package/src/gpu/kernels/upsample2d.js +2 -1
package/src/gpu/kernels/upsample2d.wgsl +6 -9
package/src/gpu/kernels/upsample2d_f16.wgsl +6 -9
package/src/gpu/kernels/utils.js +16 -1
package/src/inference/browser-harness.js +47 -1
package/src/inference/pipelines/diffusion/pipeline.js +15 -6
package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts +5 -0
package/src/inference/pipelines/diffusion/text-encoder-gpu.js +27 -15
package/src/inference/pipelines/text/attention/record.js +11 -2
package/src/inference/pipelines/text/attention/run.js +11 -2
package/src/inference/pipelines/text/chat-format.js +25 -1
package/src/inference/pipelines/text/config.d.ts +4 -0
package/src/inference/pipelines/text/config.js +68 -1
package/src/inference/pipelines/text/execution-plan.js +23 -31
package/src/inference/pipelines/text/execution-v0.js +29 -2
package/src/inference/pipelines/text/ffn/standard.js +3 -0
package/src/inference/pipelines/text/init.d.ts +4 -0
package/src/inference/pipelines/text/init.js +56 -9
package/src/inference/pipelines/text/layer.js +11 -0
package/src/inference/pipelines/text.js +4 -0
package/src/inference/tokenizers/bundled.js +156 -33
package/src/rules/tooling/command-runtime.rules.json +18 -0
package/src/tooling/command-api.d.ts +27 -1
package/src/tooling/command-api.js +142 -3
package/src/tooling/node-browser-command-runner.d.ts +4 -0
package/src/tooling/node-browser-command-runner.js +58 -3
package/src/tooling/node-command-runner.js +15 -0
package/src/tooling/node-webgpu.js +9 -87
package/src/training/checkpoint-watch.d.ts +7 -0
package/src/training/checkpoint-watch.js +106 -0
package/src/training/checkpoint.d.ts +6 -1
package/src/training/checkpoint.js +12 -2
package/src/training/distillation/artifacts.d.ts +71 -0
package/src/training/distillation/artifacts.js +132 -0
package/src/training/distillation/checkpoint-watch.d.ts +10 -0
package/src/training/distillation/checkpoint-watch.js +57 -0
package/src/training/distillation/dataset.d.ts +59 -0
package/src/training/distillation/dataset.js +337 -0
package/src/training/distillation/eval.d.ts +34 -0
package/src/training/distillation/eval.js +310 -0
package/src/training/distillation/index.d.ts +29 -0
package/src/training/distillation/index.js +29 -0
package/src/training/distillation/runtime.d.ts +20 -0
package/src/training/distillation/runtime.js +121 -0
package/src/training/distillation/scoreboard.d.ts +6 -0
package/src/training/distillation/scoreboard.js +8 -0
package/src/training/distillation/stage-a.d.ts +45 -0
package/src/training/distillation/stage-a.js +338 -0
package/src/training/distillation/stage-b.d.ts +24 -0
package/src/training/distillation/stage-b.js +20 -0
package/src/training/index.d.ts +10 -0
package/src/training/index.js +10 -0
package/src/training/lora-pipeline.d.ts +40 -0
package/src/training/lora-pipeline.js +796 -0
package/src/training/operator-artifacts.d.ts +62 -0
package/src/training/operator-artifacts.js +140 -0
package/src/training/operator-command.d.ts +5 -0
package/src/training/operator-command.js +453 -0
package/src/training/operator-eval.d.ts +48 -0
package/src/training/operator-eval.js +230 -0
package/src/training/operator-scoreboard.d.ts +5 -0
package/src/training/operator-scoreboard.js +44 -0
package/src/training/runner.d.ts +52 -0
package/src/training/runner.js +29 -4
package/src/training/suite.d.ts +112 -0
package/src/training/suite.js +9 -9
package/src/training/workloads.d.ts +164 -0
package/src/training/workloads.js +539 -0
package/src/version.js +1 -1
package/tools/doppler-cli.js +137 -40

package/src/gpu/kernels/transpose.js CHANGED Viewed

@@ -3,19 +3,32 @@ import { createTensor, dtypeBytes } from '../tensor.js';
 import { WORKGROUP_SIZES } from './constants.js';
 import { unifiedKernelWrapper } from './utils.js';
+function planTransposeDispatch(target, cols) {
+  const device = target?.device;
+  const maxPerDim = Number.isFinite(device?.limits?.maxComputeWorkgroupsPerDimension)
+    ? device.limits.maxComputeWorkgroupsPerDimension
+    : 65535;
+  const dispatchStride = Math.min(cols, maxPerDim * WORKGROUP_SIZES.DEFAULT);
+  return {
+    dispatchStride,
+    workgroups: [Math.ceil(dispatchStride / WORKGROUP_SIZES.DEFAULT), 1, 1],
+  };
+}
 async function _transpose(target, input, rows, cols, options = {}) {
   const { outputBuffer = null } = options;
   const bytesPerElement = dtypeBytes(input.dtype);
   const outputSize = rows * cols * bytesPerElement;
   const outputBuf = outputBuffer || acquireBuffer(outputSize, undefined, 'transpose_output');
+  const dispatchPlan = planTransposeDispatch(target, cols);
   await unifiedKernelWrapper(
     'transpose',
     target,
     'default',
     [input, outputBuf],
-    { rows, cols },
-    Math.ceil((rows * cols) / WORKGROUP_SIZES.DEFAULT)
+    { rows, cols, _pad0: dispatchPlan.dispatchStride, _pad1: 0 },
+    [dispatchPlan.workgroups[0], rows, 1]
   );
   return createTensor(outputBuf, input.dtype, [cols, rows], 'transpose_output');

package/src/gpu/kernels/transpose.wgsl CHANGED Viewed

@@ -19,14 +19,13 @@ struct Uniforms {
 @compute @workgroup_size(WORKGROUP_SIZE, 1, 1)
 fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
-    let idx = gid.x;
-    let total = u.rows * u.cols;
-    if (idx >= total) {
+    let dispatch_stride = max(u._pad0, 1u);
+    let row = gid.y;
+    let col = gid.x + row * dispatch_stride;
+    if (row >= u.rows || col >= u.cols) {
         return;
     }
-    let row = idx / u.cols;
-    let col = idx % u.cols;
+    let idx = row * u.cols + col;
     let out_idx = col * u.rows + row;
     output[out_idx] = input[idx];
 }

package/src/gpu/kernels/upsample2d.js CHANGED Viewed

@@ -31,6 +31,7 @@ async function _upsample2d(target, input, options = {}) {
   const outHeight = resolvedHeight * scale;
   const outWidth = resolvedWidth * scale;
+  const outSpatial = outHeight * outWidth;
   const bytesPerElement = dtypeBytes(input.dtype);
   const outputSize = channels * outHeight * outWidth * bytesPerElement;
   const output = outputBuffer || acquireBuffer(outputSize, undefined, 'upsample2d_output');
@@ -43,7 +44,7 @@ async function _upsample2d(target, input, options = {}) {
       out_height: outHeight, out_width: outWidth, scale,
       _pad0: 0, _pad1: 0,
     },
-    Math.ceil((channels * outHeight * outWidth) / WORKGROUP_SIZES.DEFAULT)
+    [Math.ceil(outSpatial / WORKGROUP_SIZES.DEFAULT), channels, 1]
   );
   return createTensor(output, input.dtype, [channels, outHeight, outWidth], 'upsample2d_output');

package/src/gpu/kernels/upsample2d.wgsl CHANGED Viewed

@@ -19,19 +19,16 @@ struct Uniforms {
 @compute @workgroup_size(WORKGROUP_SIZE, 1, 1)
 fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
-    let idx = gid.x;
     let out_spatial = u.out_height * u.out_width;
-    let total = u.channels * out_spatial;
-    if (idx >= total) {
+    let spatial_idx = gid.x;
+    let channel = gid.y;
+    if (spatial_idx >= out_spatial || channel >= u.channels) {
         return;
     }
-    let channel = idx / out_spatial;
-    let rem = idx - channel * out_spatial;
-    let out_y = rem / u.out_width;
-    let out_x = rem - out_y * u.out_width;
+    let out_y = spatial_idx / u.out_width;
+    let out_x = spatial_idx - out_y * u.out_width;
     let in_y = out_y / u.scale;
     let in_x = out_x / u.scale;
     let in_idx = (channel * u.in_height + in_y) * u.in_width + in_x;
-    output[idx] = input[in_idx];
+    output[channel * out_spatial + spatial_idx] = input[in_idx];
 }

package/src/gpu/kernels/upsample2d_f16.wgsl CHANGED Viewed

@@ -23,19 +23,16 @@ struct Uniforms {
 @compute @workgroup_size(WORKGROUP_SIZE, 1, 1)
 fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
-    let idx = gid.x;
     let out_spatial = u.out_height * u.out_width;
-    let total = u.channels * out_spatial;
-    if (idx >= total) {
+    let spatial_idx = gid.x;
+    let channel = gid.y;
+    if (spatial_idx >= out_spatial || channel >= u.channels) {
         return;
     }
-    let channel = idx / out_spatial;
-    let rem = idx - channel * out_spatial;
-    let out_y = rem / u.out_width;
-    let out_x = rem - out_y * u.out_width;
+    let out_y = spatial_idx / u.out_width;
+    let out_x = spatial_idx - out_y * u.out_width;
     let in_y = out_y / u.scale;
     let in_x = out_x / u.scale;
     let in_idx = (channel * u.in_height + in_y) * u.in_width + in_x;
-    output[idx] = input[in_idx];
+    output[channel * out_spatial + spatial_idx] = input[in_idx];
 }

package/src/gpu/kernels/utils.js CHANGED Viewed

@@ -116,9 +116,24 @@ export async function unifiedKernelWrapper(opName, target, variant, bindings, un
       index = config.variantMetadata.outputBinding;
     }
+    const buffer = binding?.buffer || binding;
+    const isGpuBuffer = buffer && (
+      typeof GPUBuffer === 'undefined'
+        ? true
+        : buffer instanceof GPUBuffer
+    );
+    if (!isGpuBuffer) {
+      const bindingLabel = binding?.label ?? buffer?.label ?? 'unknown';
+      const bufferType = buffer === null ? 'null' : buffer === undefined ? 'undefined' : buffer.constructor?.name || typeof buffer;
+      throw new Error(
+        `Kernel "${opName}/${variant}" binding "${bindingConfig.name}" (index ${index}) requires a GPUBuffer ` +
+        `(label=${bindingLabel}, type=${bufferType}).`
+      );
+    }
     bindGroupEntries.push({
       binding: index,
-      resource: { buffer: binding?.buffer || binding }
+      resource: { buffer }
     });
   }

package/src/inference/browser-harness.js CHANGED Viewed

@@ -929,6 +929,9 @@ async function resolveHarnessOverride(options = {}) {
 async function initializeSuiteModel(options = {}) {
   if (options.harnessOverride) {
+    if (options.runtime?.runtimeConfig) {
+      setRuntimeConfig(options.runtime.runtimeConfig);
+    }
     return resolveHarnessOverride(options);
   }
   const loadStart = performance.now();
@@ -988,6 +991,14 @@ async function runKernelSuite(options = {}) {
 const DEFAULT_HARNESS_PROMPT = 'Summarize this input in one sentence.';
 const DEFAULT_RUNTIME_PLACEHOLDER_PROMPT = 'Hello from Doppler.';
+const DEFAULT_QWEN_PROMPT = Object.freeze({
+  messages: Object.freeze([
+    Object.freeze({
+      role: 'user',
+      content: 'Answer in one short sentence: What color is the sky on a clear day?',
+    }),
+  ]),
+});
 const DEFAULT_TRANSLATEGEMMA_PROMPT = Object.freeze({
   messages: Object.freeze([
     Object.freeze({
@@ -1273,6 +1284,9 @@ function resolvePromptTemplateType(source) {
 }
 function buildDefaultGenerationPrompt(templateType) {
+  if (templateType === 'qwen') {
+    return clonePromptInput(DEFAULT_QWEN_PROMPT);
+  }
   if (templateType === 'translategemma') {
     return clonePromptInput(DEFAULT_TRANSLATEGEMMA_PROMPT);
   }
@@ -1280,7 +1294,7 @@ function buildDefaultGenerationPrompt(templateType) {
 }
 function shouldPreferModelDefaultPrompt(runtimePrompt, templateType) {
-  if (templateType !== 'translategemma') {
+  if (templateType !== 'translategemma' && templateType !== 'qwen') {
     return false;
   }
   if (typeof runtimePrompt !== 'string') {
@@ -1289,6 +1303,31 @@ function shouldPreferModelDefaultPrompt(runtimePrompt, templateType) {
   return runtimePrompt.trim() === DEFAULT_RUNTIME_PLACEHOLDER_PROMPT;
 }
+function assertPromptContract(runtimePrompt, templateType, source = 'runtime.inference.prompt') {
+  if (templateType !== 'translategemma') {
+    return;
+  }
+  if (runtimePrompt === undefined || runtimePrompt === null) {
+    return;
+  }
+  if (typeof runtimePrompt === 'string') {
+    const trimmed = runtimePrompt.trim();
+    if (!trimmed || trimmed === DEFAULT_RUNTIME_PLACEHOLDER_PROMPT) {
+      return;
+    }
+    throw new Error(
+      `TranslateGemma harness prompt contract violation: ${source} must be ` +
+      '{ messages: [...] } with source_lang_code/target_lang_code blocks, not a plain string.'
+    );
+  }
+  if (!isStructuredPromptInput(runtimePrompt)) {
+    throw new Error(
+      `TranslateGemma harness prompt contract violation: ${source} must be ` +
+      '{ messages: [...] } with source_lang_code/target_lang_code blocks.'
+    );
+  }
+}
 function describePromptInput(promptInput) {
   if (typeof promptInput === 'string') {
     return promptInput.trim() || DEFAULT_HARNESS_PROMPT;
@@ -1305,6 +1344,11 @@ function describePromptInput(promptInput) {
   if (sourceLang && targetLang) {
     return `${sourceLang} -> ${targetLang}: ${text || '[non-text request]'}`;
   }
+  const stringContent = asText(firstMessage?.content);
+  if (stringContent) {
+    const role = asText(firstMessage?.role) || 'user';
+    return `${role}: ${stringContent}`;
+  }
   try {
     return JSON.stringify(promptInput);
   } catch {
@@ -1315,6 +1359,7 @@ function describePromptInput(promptInput) {
 function resolveGenerationPromptInput(runtimeConfig, runOverrides = null, source = null) {
   const templateType = resolvePromptTemplateType(source);
   const overridePrompt = runOverrides?.prompt;
+  assertPromptContract(overridePrompt, templateType, 'runOverrides.prompt');
   if (typeof overridePrompt === 'string' && overridePrompt.trim()) {
     return overridePrompt.trim();
   }
@@ -1323,6 +1368,7 @@ function resolveGenerationPromptInput(runtimeConfig, runOverrides = null, source
   }
   const runtimePrompt = runtimeConfig?.inference?.prompt;
+  assertPromptContract(runtimePrompt, templateType, 'runtimeConfig.inference.prompt');
   if (shouldPreferModelDefaultPrompt(runtimePrompt, templateType)) {
     return buildDefaultGenerationPrompt(templateType);
   }

package/src/inference/pipelines/diffusion/pipeline.js CHANGED Viewed

@@ -52,6 +52,18 @@ function generateLatents(width, height, channels, latentScale, seed) {
   return { latents, latentWidth, latentHeight };
 }
+function generateNoiseVector(size, seed) {
+  if (!Number.isFinite(size) || size <= 0) {
+    throw new Error(`generateNoiseVector requires a positive size, got ${size}.`);
+  }
+  const out = new Float32Array(size);
+  const rand = createRng(seed ?? createRandomSeed());
+  for (let i = 0; i < size; i++) {
+    out[i] = sampleNormal(rand);
+  }
+  return out;
+}
 function extractTokenSet(tokensByEncoder, key) {
   const output = {};
   for (const [name, entry] of Object.entries(tokensByEncoder || {})) {
@@ -195,13 +207,10 @@ async function applySchedulerStep(latentsTensor, scheduler, stepIndex, timestep,
     const isFinalStep = stepIndex + 1 >= scheduler.timesteps.length - 1;
     const noise = isFinalStep
       ? null
-      : generateLatents(
-          runtime.latent.width,
-          runtime.latent.height,
-          runtime.latent.channels,
-          runtime.latent.scale,
+      : generateNoiseVector(
+          sample.length,
           (options.seedBase ?? createRandomSeed()) + stepIndex + 1
-        ).latents;
+        );
     const step = stepScmScheduler(scheduler, modelOutput, timestep, sample, stepIndex, noise);
     return createLatentTensor(step.prevSample, [...latentsTensor.shape], runtime);
   }

package/src/inference/pipelines/diffusion/text-encoder-gpu.d.ts CHANGED Viewed

@@ -80,3 +80,8 @@ export declare function projectContext(
 ): Promise<Tensor>;
 export declare function assertClipHiddenActivationSupported(config: { hidden_act?: string }): void;
+export declare function resolveGemma2WeightRoot(
+  weights: Map<string, any>,
+  prefix?: string
+): string;

package/src/inference/pipelines/diffusion/text-encoder-gpu.js CHANGED Viewed

@@ -723,8 +723,19 @@ function buildGemma2LayerTypes(layerCount, slidingWindow) {
   ));
 }
-function getGemma2LayerWeight(weights, prefix, layerIdx, suffix, required = true) {
-  const key = `${prefix}.model.layers.${layerIdx}.${suffix}`;
+export function resolveGemma2WeightRoot(weights, prefix = 'text_encoder') {
+  const nestedRoot = `${prefix}.model`;
+  if (weights?.has(`${nestedRoot}.embed_tokens.weight`)) {
+    return nestedRoot;
+  }
+  if (weights?.has(`${prefix}.embed_tokens.weight`)) {
+    return prefix;
+  }
+  return nestedRoot;
+}
+function getGemma2LayerWeight(weights, weightRoot, layerIdx, suffix, required = true) {
+  const key = `${weightRoot}.layers.${layerIdx}.${suffix}`;
   const weight = weights.get(key) || null;
   if (!weight && required) {
     throw new Error(`Missing Gemma2 diffusion weight "${key}".`);
@@ -805,8 +816,9 @@ async function runGemma2TextEncoder(tokens, weightsEntry, config, runtime, optio
   const tokenIds = normalizeTokens(tokens, options.maxLength ?? resolved.maxPositionEmbeddings, padTokenId);
   const numTokens = tokenIds.length;
   const tokenBuffer = createDiffusionIndexBuffer(device, tokenIds, `${prefix}_tokens`);
+  const weightRoot = resolveGemma2WeightRoot(weights, prefix);
-  const embedKey = `${prefix}.model.embed_tokens.weight`;
+  const embedKey = `${weightRoot}.embed_tokens.weight`;
   const embedWeight = expectDiffusionWeight(
     weights.get(embedKey),
     embedKey
@@ -837,16 +849,16 @@ async function runGemma2TextEncoder(tokens, weightsEntry, config, runtime, optio
   const layerWeights = new Map();
   for (let layerIdx = 0; layerIdx < resolved.numLayers; layerIdx++) {
     layerWeights.set(`layer_${layerIdx}`, {
-      inputNorm: getGemma2LayerWeight(weights, prefix, layerIdx, 'input_layernorm.weight'),
-      qProj: getGemma2LayerWeight(weights, prefix, layerIdx, 'self_attn.q_proj.weight'),
-      kProj: getGemma2LayerWeight(weights, prefix, layerIdx, 'self_attn.k_proj.weight'),
-      vProj: getGemma2LayerWeight(weights, prefix, layerIdx, 'self_attn.v_proj.weight'),
-      oProj: getGemma2LayerWeight(weights, prefix, layerIdx, 'self_attn.o_proj.weight'),
-      postAttentionNorm: getGemma2LayerWeight(weights, prefix, layerIdx, 'post_attention_layernorm.weight'),
-      preFeedforwardNorm: getGemma2LayerWeight(weights, prefix, layerIdx, 'pre_feedforward_layernorm.weight'),
-      gate: getGemma2LayerWeight(weights, prefix, layerIdx, 'mlp.gate_proj.weight'),
-      up: getGemma2LayerWeight(weights, prefix, layerIdx, 'mlp.up_proj.weight'),
-      down: getGemma2LayerWeight(weights, prefix, layerIdx, 'mlp.down_proj.weight'),
+      inputNorm: getGemma2LayerWeight(weights, weightRoot, layerIdx, 'input_layernorm.weight'),
+      qProj: getGemma2LayerWeight(weights, weightRoot, layerIdx, 'self_attn.q_proj.weight'),
+      kProj: getGemma2LayerWeight(weights, weightRoot, layerIdx, 'self_attn.k_proj.weight'),
+      vProj: getGemma2LayerWeight(weights, weightRoot, layerIdx, 'self_attn.v_proj.weight'),
+      oProj: getGemma2LayerWeight(weights, weightRoot, layerIdx, 'self_attn.o_proj.weight'),
+      postAttentionNorm: getGemma2LayerWeight(weights, weightRoot, layerIdx, 'post_attention_layernorm.weight'),
+      preFeedforwardNorm: getGemma2LayerWeight(weights, weightRoot, layerIdx, 'pre_feedforward_layernorm.weight'),
+      gate: getGemma2LayerWeight(weights, weightRoot, layerIdx, 'mlp.gate_proj.weight'),
+      up: getGemma2LayerWeight(weights, weightRoot, layerIdx, 'mlp.up_proj.weight'),
+      down: getGemma2LayerWeight(weights, weightRoot, layerIdx, 'mlp.down_proj.weight'),
     });
   }
@@ -910,10 +922,10 @@ async function runGemma2TextEncoder(tokens, weightsEntry, config, runtime, optio
       numTokens * resolved.hiddenSize,
       context
     );
-    hidden = createTensor(output.buffer, output.dtype, [numTokens, resolved.hiddenSize], `gemma2_layer_${layerIdx}`);
+    hidden = createTensor(output, activationDtype, [numTokens, resolved.hiddenSize], `gemma2_layer_${layerIdx}`);
   }
-  const finalNormKey = `${prefix}.model.norm.weight`;
+  const finalNormKey = `${weightRoot}.norm.weight`;
   const finalNorm = expectDiffusionWeight(weights.get(finalNormKey), finalNormKey);
   const final = await ops.rmsNorm(hidden, getBuffer(finalNorm), resolved.rmsNormEps, {
     batchSize: numTokens,

package/src/inference/pipelines/text/attention/record.js CHANGED Viewed

@@ -182,10 +182,18 @@ export async function recordLayerAttentionGPU(
   // 3. RoPE (modifies tensor in-place)
   if (!disableRoPE && state.ropeFreqsCos && state.ropeFreqsSin) {
     await recordRoPE(recorder, qTensor, state.ropeFreqsCos, state.ropeFreqsSin, numTokens, {
-      numHeads, headDim, startPos: currentSeqLen,
+      numHeads,
+      headDim,
+      rotaryDim: config.ropeRotaryDim,
+      interleaved: config.ropeInterleaved,
+      startPos: currentSeqLen,
     });
     await recordRoPE(recorder, kTensor, state.ropeFreqsCos, state.ropeFreqsSin, numTokens, {
-      numHeads: numKVHeads, headDim, startPos: currentSeqLen,
+      numHeads: numKVHeads,
+      headDim,
+      rotaryDim: config.ropeRotaryDim,
+      interleaved: config.ropeInterleaved,
+      startPos: currentSeqLen,
     });
   }
@@ -502,6 +510,7 @@ export async function recordLayerAttentionGPU(
       size: numTokens * numHeads * headDim,
       gate: qGateTensor,
       gateActivation: 'sigmoid',
+      inputActivation: 'identity',
       swigluLimit: null,
     });
     recorder.trackTemporaryBuffer(attnOutput.buffer);

package/src/inference/pipelines/text/attention/run.js CHANGED Viewed

@@ -299,10 +299,18 @@ export async function runLayerAttentionGPU(
   if (!disableRoPE && state.ropeFreqsCos && state.ropeFreqsSin) {
     await runRoPE(qTensor, state.ropeFreqsCos, state.ropeFreqsSin, numTokens, {
-      numHeads, headDim, startPos: currentSeqLen,
+      numHeads,
+      headDim,
+      rotaryDim: config.ropeRotaryDim,
+      interleaved: config.ropeInterleaved,
+      startPos: currentSeqLen,
     });
     await runRoPE(kTensor, state.ropeFreqsCos, state.ropeFreqsSin, numTokens, {
-      numHeads: numKVHeads, headDim, startPos: currentSeqLen,
+      numHeads: numKVHeads,
+      headDim,
+      rotaryDim: config.ropeRotaryDim,
+      interleaved: config.ropeInterleaved,
+      startPos: currentSeqLen,
     });
     // Trace RoPE outputs
@@ -690,6 +698,7 @@ export async function runLayerAttentionGPU(
       size: numTokens * numHeads * headDim,
       gate: qGateTensor,
       gateActivation: 'sigmoid',
+      inputActivation: 'identity',
       swigluLimit: null,
     });
     releaseBuffer(attnOutput.buffer);

package/src/inference/pipelines/text/chat-format.js CHANGED Viewed

@@ -224,6 +224,29 @@ function formatChatML(messages) {
   return parts.join('');
 }
+function formatQwen(messages) {
+  // Qwen 3.5 chat format is ChatML-like, but the generation prelude includes
+  // an explicit empty thinking block before assistant output.
+  const parts = [];
+  for (const [index, message] of messages.entries()) {
+    const role = normalizeChatRole(message?.role);
+    assertSupportedChatRole(role, 'Qwen', index);
+    if (role === 'system' && index !== 0) {
+      throw new Error('Qwen template requires any system message to appear first.');
+    }
+    const content = normalizeChatMessageContent(message?.content);
+    if (role === 'system') {
+      parts.push(`<|im_start|>system\n${content}<|im_end|>\n`);
+    } else if (role === 'user') {
+      parts.push(`<|im_start|>user\n${content}<|im_end|>\n`);
+    } else if (role === 'assistant') {
+      parts.push(`<|im_start|>assistant\n${content}<|im_end|>\n`);
+    }
+  }
+  parts.push('<|im_start|>assistant\n<think>\n\n</think>\n\n');
+  return parts.join('');
+}
 function formatTranslateGemmaUserPrompt(content) {
   if (!Array.isArray(content) || content.length !== 1) {
     throw new Error(
@@ -345,7 +368,7 @@ const CHAT_FORMATTERS = {
   'llama3': formatHeaderBased,
   'gpt-oss': formatChannelBased,
   'chatml': formatChatML,
-  'qwen': formatChatML,
+  'qwen': formatQwen,
   'translategemma': formatTranslateGemma,
 };
@@ -363,4 +386,5 @@ export function formatChatMessages(messages, templateType) {
 export const formatGemmaChat = formatTurnBased;
 export const formatLlama3Chat = formatHeaderBased;
 export const formatGptOssChat = formatChannelBased;
+export const formatQwenChat = formatQwen;
 export const formatTranslateGemmaChat = formatTranslateGemma;

package/src/inference/pipelines/text/config.d.ts CHANGED Viewed

@@ -148,6 +148,10 @@ export interface ParsedModelConfig {
   slidingWindow: number | null;
   ropeTheta: number;
   ropeLocalTheta: number | null;
+  ropeRotaryDim: number;
+  ropeInterleaved: boolean;
+  mropeSection: number[] | null;
+  partialRotaryFactor: number | null;
   ropeScale: number;
   ropeLocalScale: number;
   ropeScalingType: string | null;

package/src/inference/pipelines/text/config.js CHANGED Viewed

@@ -21,6 +21,28 @@ function assertSupportedRuntimeModelType(manifest) {
   );
 }
+function resolveRotaryDim(headDim, partialRotaryFactor, modelId) {
+  if (partialRotaryFactor == null) {
+    return headDim;
+  }
+  if (typeof partialRotaryFactor !== 'number' || Number.isNaN(partialRotaryFactor)) {
+    throw new Error(`Manifest "${modelId}" has invalid rope.partialRotaryFactor.`);
+  }
+  if (partialRotaryFactor <= 0 || partialRotaryFactor > 1) {
+    throw new Error(
+      `Manifest "${modelId}" requires 0 < rope.partialRotaryFactor <= 1; got ${partialRotaryFactor}.`
+    );
+  }
+  const rotaryDim = Math.trunc(headDim * partialRotaryFactor);
+  if (rotaryDim <= 0 || (rotaryDim % 2) !== 0) {
+    throw new Error(
+      `Manifest "${modelId}" resolves rope rotary dim ${rotaryDim} from headDim=${headDim} ` +
+      `and partialRotaryFactor=${partialRotaryFactor}, but rotary dim must be a positive even integer.`
+    );
+  }
+  return rotaryDim;
+}
 export function getStopTokenIds(manifest) {
   const eosTokenId = manifest?.eos_token_id;
   if (Array.isArray(eosTokenId)) return eosTokenId;
@@ -130,7 +152,14 @@ export function hasManifestInference(manifest) {
 export function validateRequiredInferenceFields(inf, modelId) {
+  inf = inf ?? {};
+  inf.attention = inf.attention ?? {};
+  inf.normalization = inf.normalization ?? {};
+  inf.ffn = inf.ffn ?? {};
+  inf.rope = inf.rope ?? {};
+  inf.output = inf.output ?? {};
+  inf.layerPattern = inf.layerPattern ?? {};
+  inf.chatTemplate = inf.chatTemplate ?? {};
   const errors = [];
   // Attention fields - non-nullable required
@@ -201,6 +230,20 @@ export function validateRequiredInferenceFields(inf, modelId) {
   if (inf.rope.ropeLocalTheta === undefined) {
     errors.push('rope.ropeLocalTheta must be explicitly set (null for no local theta, or number)');
   }
+  if (inf.rope.mropeInterleaved == null) {
+    errors.push('rope.mropeInterleaved is required');
+  }
+  if (inf.rope.mropeSection === undefined) {
+    errors.push('rope.mropeSection must be explicitly set (null when unused, or an array of positive integers)');
+  }
+  if (inf.rope.partialRotaryFactor === undefined) {
+    errors.push('rope.partialRotaryFactor must be explicitly set (null when unused, or a number in (0, 1])');
+  } else {
+    const factor = inf.rope.partialRotaryFactor;
+    if (factor !== null && (typeof factor !== 'number' || Number.isNaN(factor) || factor <= 0 || factor > 1)) {
+      errors.push('rope.partialRotaryFactor must be a number in (0, 1] or null');
+    }
+  }
   // Output fields - non-nullable required
   if (inf.output.tieWordEmbeddings == null) {
@@ -458,6 +501,26 @@ export function toParsedConfigFromMerged(merged, manifest) {
   const ropeScalingType = inf.rope.ropeScalingType;
   const ropeLocalScale = inf.rope.ropeLocalScalingFactor ?? ropeScale;
   const ropeLocalScalingType = inf.rope.ropeLocalScalingType ?? ropeScalingType;
+  const partialRotaryFactor = inf.rope.partialRotaryFactor;
+  const ropeInterleaved = inf.rope.mropeInterleaved === true;
+  const mropeSection = Array.isArray(inf.rope.mropeSection)
+    ? inf.rope.mropeSection.map((entry) => Math.trunc(Number(entry)))
+    : null;
+  const ropeRotaryDim = resolveRotaryDim(arch.headDim, partialRotaryFactor, merged.modelId);
+  if (mropeSection && mropeSection.some((entry) => !Number.isFinite(entry) || entry <= 0)) {
+    throw new Error(
+      `Manifest "${merged.modelId}" has invalid rope.mropeSection; expected positive integers.`
+    );
+  }
+  if (ropeInterleaved && mropeSection) {
+    const doubledMropeDim = mropeSection.reduce((sum, entry) => sum + entry, 0) * 2;
+    if (doubledMropeDim !== ropeRotaryDim) {
+      throw new Error(
+        `Manifest "${merged.modelId}" declares rope.mropeSection=${JSON.stringify(mropeSection)}, ` +
+        `which expands to rotary dim ${doubledMropeDim}, but the resolved rotary dim is ${ropeRotaryDim}.`
+      );
+    }
+  }
   // Build ropeScaling object from manifest values if scaling is enabled
   // Include YARN params when present
@@ -532,6 +595,10 @@ export function toParsedConfigFromMerged(merged, manifest) {
     slidingWindow: inf.attention.slidingWindow,
     ropeTheta: inf.rope.ropeTheta,
     ropeLocalTheta: inf.rope.ropeLocalTheta,
+    ropeRotaryDim,
+    ropeInterleaved,
+    mropeSection,
+    partialRotaryFactor,
     ropeScale,
     ropeLocalScale,
     ropeScalingType,