npm - @simulatte/doppler - Versions diffs - 0.1.7 → 0.1.8 - Mend

@simulatte/doppler 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

package/CHANGELOG.md +19 -0
package/package.json +21 -36
package/src/browser/browser-converter.js +5 -0
package/src/client/doppler-registry.json +1 -17
package/src/config/kernel-path-loader.d.ts +5 -0
package/src/config/kernel-path-loader.js +13 -0
package/src/config/kernels/registry.json +74 -0
package/src/config/loader.js +3 -0
package/src/config/merge-contract-check.js +7 -0
package/src/config/presets/kernel-paths/gemma3-q4k-dequant-f32w-f32a-online.json +56 -0
package/src/config/presets/kernel-paths/lfm2-q4k-dequant-f32a-nosubgroups.json +61 -0
package/src/config/presets/kernel-paths/registry.json +14 -0
package/src/config/presets/models/gemma2.json +2 -1
package/src/config/presets/models/gemma3.json +2 -0
package/src/config/presets/models/qwen3.json +4 -3
package/src/config/presets/models/qwen3_5.json +16 -0
package/src/config/presets/runtime/model/qwen3-5-layer-probe.json +52 -0
package/src/config/presets/runtime/model/qwen3-5-linear-attn-debug.json +90 -0
package/src/config/schema/conversion.schema.d.ts +1 -0
package/src/config/schema/manifest.schema.d.ts +1 -1
package/src/config/schema/manifest.schema.js +1 -1
package/src/config/schema/storage.schema.js +1 -1
package/src/converter/conversion-plan.js +10 -2
package/src/converter/core.js +2 -0
package/src/converter/manifest-inference.js +12 -22
package/src/converter/parsers/transformer.js +4 -0
package/src/converter/quantization-info.js +5 -1
package/src/converter/quantizer.js +19 -12
package/src/converter/rope-config.js +8 -6
package/src/converter/tokenizer-utils.d.ts +1 -0
package/src/converter/tokenizer-utils.js +4 -1
package/src/debug/reference/hf_qwen35_linear_attn_debug.py +268 -0
package/src/distribution/shard-delivery.js +6 -1
package/src/formats/rdrr/parsing.d.ts +4 -0
package/src/formats/rdrr/parsing.js +14 -1
package/src/gpu/kernels/index.d.ts +8 -0
package/src/gpu/kernels/index.js +6 -0
package/src/gpu/kernels/matmul-selection.js +47 -4
package/src/gpu/kernels/matmul.d.ts +2 -0
package/src/gpu/kernels/matmul.js +1 -1
package/src/gpu/kernels/rmsnorm.js +9 -2
package/src/gpu/kernels/split_qg.d.ts +50 -0
package/src/gpu/kernels/split_qg.js +46 -0
package/src/gpu/kernels/split_qg.wgsl +58 -0
package/src/gpu/kernels/split_qg_f16.wgsl +62 -0
package/src/gpu/weight-buffer.d.ts +1 -1
package/src/gpu/weight-buffer.js +1 -1
package/src/inference/browser-harness.d.ts +2 -0
package/src/inference/browser-harness.js +20 -1
package/src/inference/pipelines/diffusion/helpers.js +3 -0
package/src/inference/pipelines/diffusion/text-encoder-gpu.js +8 -2
package/src/inference/pipelines/text/attention/output-projection.d.ts +12 -0
package/src/inference/pipelines/text/attention/output-projection.js +8 -0
package/src/inference/pipelines/text/attention/projections.d.ts +10 -1
package/src/inference/pipelines/text/attention/projections.js +41 -11
package/src/inference/pipelines/text/attention/record.js +15 -6
package/src/inference/pipelines/text/attention/run.js +50 -6
package/src/inference/pipelines/text/config.js +14 -0
package/src/inference/pipelines/text/execution-plan.js +5 -4
package/src/inference/pipelines/text/generator-runtime.js +5 -0
package/src/inference/pipelines/text/generator-steps.d.ts +6 -0
package/src/inference/pipelines/text/generator-steps.js +43 -15
package/src/inference/pipelines/text/generator.js +50 -17
package/src/inference/pipelines/text/init.d.ts +13 -0
package/src/inference/pipelines/text/init.js +16 -5
package/src/inference/pipelines/text/layer.js +1 -0
package/src/inference/pipelines/text/linear-attention.d.ts +5 -0
package/src/inference/pipelines/text/linear-attention.js +33 -3
package/src/inference/pipelines/text/logits/gpu.js +2 -2
package/src/inference/pipelines/text/logits/index.d.ts +6 -1
package/src/inference/pipelines/text/logits/index.js +3 -1
package/src/inference/pipelines/text/model-load.js +3 -0
package/src/inference/pipelines/text/sampling.js +52 -6
package/src/inference/test-harness.js +2 -2
package/src/loader/final-weights-loader.js +2 -0
package/src/loader/shard-cache.js +3 -2
package/src/loader/tensors/tensor-loader.js +6 -1
package/src/rules/inference/dtype.rules.json +5 -0
package/src/rules/inference/kernel-path.rules.json +2 -2
package/src/rules/kernels/split-qg.rules.json +6 -0
package/src/rules/rule-registry.js +2 -0
package/src/storage/downloader.js +2 -1
package/src/storage/shard-manager.js +4 -3
package/src/tooling/conversion-config-materializer.js +3 -5
package/src/tooling/node-converter.js +3 -0
package/src/tooling/node-source-runtime.js +36 -0
package/src/types/model.d.ts +5 -0
package/tools/doppler-cli.js +6 -1

package/src/tooling/conversion-config-materializer.js CHANGED Viewed

@@ -2,6 +2,7 @@ import path from 'node:path';
 import { createConverterConfig } from '../config/schema/index.js';
 import { resolveConversionPlan } from '../converter/conversion-plan.js';
+import { normalizeQuantTag } from '../converter/quantization-info.js';
 function toSafeString(value) {
   if (typeof value !== 'string') return '';
@@ -10,10 +11,7 @@ function toSafeString(value) {
 }
 function normalizeQuantizationTag(value) {
-  const raw = toSafeString(value).toUpperCase();
-  if (!raw) return 'f16';
-  if (raw === 'Q4_K_M' || raw === 'Q4_K') return 'q4k';
-  return raw.toLowerCase();
+  return normalizeQuantTag(toSafeString(value));
 }
 function resolveArchitectureHint(architecture) {
@@ -37,7 +35,7 @@ function extractSourceQuantization(manifest) {
   if (explicitWeights) return explicitWeights;
   const explicitQuant = toSafeString(manifest?.quantization);
   if (explicitQuant) return explicitQuant;
-  return 'f16';
+  return normalizeQuantTag(null);
 }
 function buildRefreshRawConfig(manifest) {

package/src/tooling/node-converter.js CHANGED Viewed

@@ -875,6 +875,7 @@ export async function convertSafetensorsDirectory(options) {
   let sourceQuantization = null;
   let tokenizerJson = null;
   let tokenizerConfig = null;
+  let generationConfig = null;
   let hasTokenizerModel = false;
   let tokenizerModelPath = null;
   let diffusionAuxFiles = [];
@@ -1101,6 +1102,7 @@ export async function convertSafetensorsDirectory(options) {
       },
     });
     config = parsedTransformer.config;
+    generationConfig = parsedTransformer.generationConfig ?? null;
     tensors = parsedTransformer.tensors;
     architectureHint = parsedTransformer.architectureHint;
     architecture = extractArchitecture(config, null);
@@ -1169,6 +1171,7 @@ export async function convertSafetensorsDirectory(options) {
     quantization: targetQuantization,
     tokenizerJson,
     tokenizerConfig,
+    generationConfig,
     tokenizerModel: hasTokenizerModel ? 'tokenizer.model' : null,
   };

package/src/tooling/node-source-runtime.js CHANGED Viewed

@@ -411,6 +411,39 @@ function buildNodeFileReaders() {
   };
 }
+// Source dtype → compute precision mapping for source-runtime inference.
+// BF16/F32 sources require f32 compute (BF16 has no native WebGPU support).
+// Quantized formats require f32 compute for dequantization accuracy.
+// F16 sources can use f16 compute directly.
+const SOURCE_QUANT_COMPUTE_MAP = {
+  'F16': 'f16',
+  'BF16': 'f32',
+  'F32': 'f32',
+  'Q4_K': 'f32',
+  'Q4_K_M': 'f32',
+  'Q6_K': 'f32',
+};
+const SOURCE_COMPUTE_DEFAULT = 'f16';
+function resolveSourceRuntimeComputePrecision(tensors, sourceQuantization) {
+  const dtypes = new Set();
+  for (const tensor of Array.isArray(tensors) ? tensors : []) {
+    const dtype = String(tensor?.dtype || '').trim().toUpperCase();
+    if (dtype) {
+      dtypes.add(dtype);
+    }
+  }
+  // If any tensor requires f32 compute, use f32 for all.
+  for (const dtype of dtypes) {
+    if (SOURCE_QUANT_COMPUTE_MAP[dtype] === 'f32') {
+      return 'f32';
+    }
+  }
+  const normalized = String(sourceQuantization || '').trim().toUpperCase();
+  return SOURCE_QUANT_COMPUTE_MAP[normalized] ?? SOURCE_COMPUTE_DEFAULT;
+}
 async function addHashesToFileEntries(entries, hashAlgorithm) {
   const normalized = [];
   for (const entry of Array.isArray(entries) ? entries : []) {
@@ -473,6 +506,9 @@ export async function resolveNodeSourceRuntimeBundle(options = {}) {
   assertSupportedSourceDtypes(parsed.tensors, parsed.sourceKind);
   const converterConfig = createConverterConfig({
+    quantization: {
+      computePrecision: resolveSourceRuntimeComputePrecision(parsed.tensors, parsed.sourceQuantization),
+    },
     output: {
       modelBaseId: options.modelId || null,
     },

package/src/types/model.d.ts CHANGED Viewed

@@ -9,7 +9,11 @@ export type ModelArchitecture =
   | 'gemma'
   | 'gemma2'
   | 'gemma3'
+  | 'embeddinggemma'
   | 'functiongemma'
+  | 'janus_text'
+  | 'lfm2'
+  | 'modernbert'
   | 'qwen2'
   | 'qwen3'
   | 'phi3'
@@ -19,6 +23,7 @@ export type ModelArchitecture =
   | 'deepseek'
   | 'mamba'
   | 'kimi_k2'
+  | 'translategemma'
   | 'transformer';
 /** Attention type variants */

package/tools/doppler-cli.js CHANGED Viewed

@@ -1,5 +1,6 @@
 #!/usr/bin/env node
+import { existsSync } from 'node:fs';
 import fs from 'node:fs/promises';
 import path from 'node:path';
 import { fileURLToPath, pathToFileURL } from 'node:url';
@@ -13,7 +14,8 @@ import { createToolingErrorEnvelope } from '../src/tooling/command-envelope.js';
 const NODE_WEBGPU_INCOMPLETE_MESSAGE = 'node command: WebGPU runtime is incomplete in Node';
 const CLI_POLICY_PATH = fileURLToPath(new URL('./configs/cli/doppler-cli-policy.json', import.meta.url));
-const DEFAULT_EXTERNAL_MODELS_ROOT = process.env.DOPPLER_EXTERNAL_MODELS_ROOT || '/media/x/models';
+const DEFAULT_EXTERNAL_MODELS_ROOT = process.env.DOPPLER_EXTERNAL_MODELS_ROOT
+  || (existsSync('/Volumes/models') ? '/Volumes/models' : '/media/x/models');
 const DEFAULT_EXTERNAL_RDRR_ROOT = path.join(DEFAULT_EXTERNAL_MODELS_ROOT, 'rdrr');
 const DEFAULT_CLI_POLICY = {
   defaults: {
@@ -1260,6 +1262,9 @@ function printMetricsSummary(result) {
       `prefill=${formatNumber(metrics.prefillTokensPerSec)} ` +
       `decode=${formatNumber(metrics.decodeTokensPerSec)}`
     );
+    if (typeof result.output === 'string' && result.output.length > 0) {
+      console.log(`[output] ${quoteOneLine(result.output)}`);
+    }
     printExecutionContractSummary(result);
     printExecutionV0GraphSummary(metrics.executionV0GraphContractArtifact);
     return;