npm - mambacode.js - Versions diffs - 1.0.0 → 1.0.2 - Mend

mambacode.js 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

package/README.md +198 -76
package/dist/index.d.ts +19 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +18 -0
package/dist/index.js.map +1 -0
package/dist/kernels/activations.d.ts +3 -0
package/dist/kernels/activations.d.ts.map +1 -0
package/dist/kernels/activations.js +87 -0
package/dist/kernels/activations.js.map +1 -0
package/dist/kernels/conv1d.d.ts +3 -0
package/dist/kernels/conv1d.d.ts.map +1 -0
package/dist/kernels/conv1d.js +152 -0
package/dist/kernels/conv1d.js.map +1 -0
package/dist/kernels/linear_projection.d.ts +3 -0
package/dist/kernels/linear_projection.d.ts.map +1 -0
package/dist/kernels/linear_projection.js +219 -0
package/dist/kernels/linear_projection.js.map +1 -0
package/dist/kernels/selective_scan.d.ts +3 -0
package/dist/kernels/selective_scan.d.ts.map +1 -0
package/dist/kernels/selective_scan.js +348 -0
package/dist/kernels/selective_scan.js.map +1 -0
package/dist/kernels/weight_update.d.ts +3 -0
package/dist/kernels/weight_update.d.ts.map +1 -0
package/dist/kernels/weight_update.js +119 -0
package/dist/kernels/weight_update.js.map +1 -0
package/dist/model/mamba_block.d.ts +64 -0
package/dist/model/mamba_block.d.ts.map +1 -0
package/dist/model/mamba_block.js +309 -0
package/dist/model/mamba_block.js.map +1 -0
package/dist/model/mamba_model.d.ts +66 -0
package/dist/model/mamba_model.d.ts.map +1 -0
package/dist/model/mamba_model.js +289 -0
package/dist/model/mamba_model.js.map +1 -0
package/dist/tokenizer/bpe.d.ts +29 -0
package/dist/tokenizer/bpe.d.ts.map +1 -0
package/dist/tokenizer/bpe.js +164 -0
package/dist/tokenizer/bpe.js.map +1 -0
package/dist/training/autograd.d.ts +27 -0
package/dist/training/autograd.d.ts.map +1 -0
package/dist/training/autograd.js +120 -0
package/dist/training/autograd.js.map +1 -0
package/dist/training/trainer.d.ts +37 -0
package/dist/training/trainer.d.ts.map +1 -0
package/dist/training/trainer.js +183 -0
package/dist/training/trainer.js.map +1 -0
package/dist/utils/gpu_utils.d.ts +21 -0
package/dist/utils/gpu_utils.d.ts.map +1 -0
package/dist/utils/gpu_utils.js +111 -0
package/dist/utils/gpu_utils.js.map +1 -0
package/dist/utils/quantization.d.ts +26 -0
package/dist/utils/quantization.d.ts.map +1 -0
package/dist/utils/quantization.js +116 -0
package/dist/utils/quantization.js.map +1 -0
package/package.json +43 -18
package/src/index.ts +61 -0
package/src/kernels/{activations.js → activations.ts} +2 -2
package/src/kernels/{linear_projection.js → linear_projection.ts} +2 -2
package/src/kernels/{selective_scan.js → selective_scan.ts} +2 -2
package/src/kernels/{weight_update.js → weight_update.ts} +2 -2
package/src/model/{mamba_block.js → mamba_block.ts} +134 -170
package/src/model/{mamba_model.js → mamba_model.ts} +165 -121
package/src/tokenizer/bpe.ts +186 -0
package/src/training/autograd.ts +135 -0
package/src/training/{trainer.js → trainer.ts} +79 -161
package/src/utils/gpu_utils.ts +147 -0
package/src/utils/quantization.ts +154 -0
package/src/index.js +0 -89
package/src/tokenizer/bpe.js +0 -256
package/src/training/autograd.js +0 -221
package/src/utils/gpu_utils.js +0 -217
package/src/utils/quantization.js +0 -215
/package/src/kernels/{conv1d.js → conv1d.ts} +0 -0

package/src/training/autograd.js DELETED Viewed

@@ -1,221 +0,0 @@
-/**
- * autograd.js – Lightweight tape-based automatic differentiation engine.
- *
- * Design
- * ------
- * Every differentiable GPU operation appends an entry to a global "tape"
- * (a reverse-mode AD record).  During the backward pass we replay the tape
- * in reverse, dispatching backward GPU kernels that accumulate gradients
- * into per-parameter gradient buffers.
- *
- * A "Tensor" in this context is a thin wrapper that holds:
- *   - a GPUBuffer (the data)
- *   - shape metadata
- *   - an optional gradient GPUBuffer
- *   - a reference to the tape node that produced it
- *
- * The tape stores closures so that complex operations (selective scan,
- * conv, linear) can have their own custom backward logic.
- */
-/** @type {TapeEntry[]} */
-let _tape = [];
-let _gradEnabled = true;
-/**
- * @typedef {Object} TapeEntry
- * @property {() => void} backward  – closure that computes and accumulates gradients
- */
-/**
- * Tensor – wraps a GPUBuffer with shape, gradient, and autograd metadata.
- */
-export class Tensor {
-    /**
-     * @param {GPUBuffer}   data     – GPU buffer holding the tensor values (FP32)
-     * @param {number[]}    shape    – dimensions, e.g. [batch, seqLen, dInner]
-     * @param {boolean}     [requiresGrad=false]
-     */
-    constructor(data, shape, requiresGrad = false) {
-        this.data         = data;
-        this.shape        = shape;
-        this.numel        = shape.reduce((a, b) => a * b, 1);
-        this.requiresGrad = requiresGrad;
-        this.grad         = null;   // GPUBuffer, populated during backward()
-        this._gradFn      = null;   // tape node index
-    }
-    /** Number of bytes occupied by this tensor (FP32). */
-    get byteSize() { return this.numel * 4; }
-    /**
-     * Manually zero-out the gradient buffer (keeps the GPUBuffer allocated).
-     * @param {GPUDevice} device
-     */
-    zeroGrad(device) {
-        if (this.grad) {
-            device.queue.writeBuffer(this.grad, 0, new Float32Array(this.numel));
-        }
-    }
-    /** Free GPU memory for both data and grad buffers. */
-    destroy() {
-        this.data?.destroy();
-        this.grad?.destroy();
-        this.data = null;
-        this.grad = null;
-    }
-}
-// ─── Tape control ─────────────────────────────────────────────────────────────
-/** Start recording operations onto the tape. */
-export function enableGrad()  { _gradEnabled = true;  }
-/** Stop recording (inference-only mode). */
-export function noGrad()      { _gradEnabled = false; }
-/** Clear the tape without running backward. */
-export function clearTape()   { _tape = []; }
-/**
- * Register a backward closure onto the tape.
- * Called internally by differentiable operations.
- *
- * @param {() => void} backwardFn
- * @returns {number} tape index (for reference by the output Tensor)
- */
-export function recordOperation(backwardFn) {
-    if (!_gradEnabled) return -1;
-    _tape.push({ backward: backwardFn });
-    return _tape.length - 1;
-}
-// ─── Backward pass ────────────────────────────────────────────────────────────
-/**
- * Run the backward pass by replaying the tape in reverse.
- * Gradients accumulate into the `.grad` GPUBuffers of leaf tensors.
- *
- * After backward() the tape is cleared automatically.
- */
-export async function backward() {
-    for (let i = _tape.length - 1; i >= 0; i--) {
-        await _tape[i].backward();
-    }
-    clearTape();
-}
-// ─── Gradient buffer management ───────────────────────────────────────────────
-/**
- * Ensure a Tensor has an allocated (zeroed) gradient buffer.
- *
- * @param {GPUDevice} device
- * @param {Tensor}    tensor
- */
-export function ensureGradBuffer(device, tensor) {
-    if (!tensor.grad) {
-        tensor.grad = device.createBuffer({
-            size  : tensor.byteSize,
-            usage : GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
-        });
-        // Zero-init
-        device.queue.writeBuffer(tensor.grad, 0, new Float32Array(tensor.numel));
-    }
-}
-/**
- * Allocate gradient buffers for a list of tensors.
- *
- * @param {GPUDevice}  device
- * @param {Tensor[]}   tensors
- */
-export function allocateGradients(device, tensors) {
-    for (const t of tensors) {
-        if (t.requiresGrad) ensureGradBuffer(device, t);
-    }
-}
-/**
- * Zero all gradient buffers in-place (GPU write).
- *
- * @param {GPUDevice}  device
- * @param {Tensor[]}   tensors
- */
-export function zeroGradients(device, tensors) {
-    for (const t of tensors) {
-        if (t.grad) {
-            device.queue.writeBuffer(t.grad, 0, new Float32Array(t.numel));
-        }
-    }
-}
-// ─── Loss helpers ─────────────────────────────────────────────────────────────
-/**
- * Create a scalar "1.0" gradient tensor to seed the backward pass.
- * (Equivalent to calling loss.backward() with grad=1.)
- *
- * @param {GPUDevice} device
- * @returns {GPUBuffer}  – single-element FP32 buffer containing 1.0
- */
-export function onesLikeScalar(device) {
-    const buf = device.createBuffer({
-        size  : 4,
-        usage : GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
-        mappedAtCreation: true,
-    });
-    new Float32Array(buf.getMappedRange()).set([1.0]);
-    buf.unmap();
-    return buf;
-}
-/**
- * Cross-entropy loss (computed on CPU after reading back logits).
- * Returns a scalar JS number.
- *
- * @param {Float32Array} logits    – (vocabSize,)
- * @param {number}       targetId  – correct token index
- * @returns {number}
- */
-export function crossEntropyLoss(logits, targetId) {
-    // Numerically stable softmax
-    let maxLogit = -Infinity;
-    for (let i = 0; i < logits.length; i++) {
-        if (logits[i] > maxLogit) maxLogit = logits[i];
-    }
-    let sumExp = 0;
-    for (let i = 0; i < logits.length; i++) {
-        sumExp += Math.exp(logits[i] - maxLogit);
-    }
-    const logSumExp = Math.log(sumExp) + maxLogit;
-    return logSumExp - logits[targetId];
-}
-/**
- * Gradient of the cross-entropy loss w.r.t. logits.
- * Returns a Float32Array of shape (vocabSize,).
- *
- * @param {Float32Array} logits
- * @param {number}       targetId
- * @returns {Float32Array}
- */
-export function crossEntropyGrad(logits, targetId) {
-    let maxLogit = -Infinity;
-    for (let i = 0; i < logits.length; i++) {
-        if (logits[i] > maxLogit) maxLogit = logits[i];
-    }
-    let sumExp = 0;
-    const exp_shifted = new Float32Array(logits.length);
-    for (let i = 0; i < logits.length; i++) {
-        exp_shifted[i] = Math.exp(logits[i] - maxLogit);
-        sumExp += exp_shifted[i];
-    }
-    const probs = new Float32Array(logits.length);
-    for (let i = 0; i < logits.length; i++) {
-        probs[i] = exp_shifted[i] / sumExp;
-    }
-    probs[targetId] -= 1.0;   // dL/d logit_i = prob_i - 1{i==target}
-    return probs;
-}

package/src/utils/gpu_utils.js DELETED Viewed

@@ -1,217 +0,0 @@
-/**
- * gpu_utils.js – WebGPU device management and buffer helpers.
- *
- * Provides thin, consistent wrappers around the WebGPU API so that
- * the rest of MambaCode.js never calls navigator.gpu directly.
- */
-/**
- * Initialise WebGPU and return the { device, adapter } pair.
- *
- * @param {{ powerPreference?: 'high-performance'|'low-power' }} [opts]
- * @returns {Promise<{ device: GPUDevice, adapter: GPUAdapter }>}
- */
-export async function initWebGPU(opts = {}) {
-    if (typeof navigator === 'undefined' || !navigator.gpu) {
-        throw new Error(
-            'WebGPU is not available in this environment. ' +
-            'Use Chrome 113+, Edge 113+, or Firefox Nightly with WebGPU enabled.'
-        );
-    }
-    const adapter = await navigator.gpu.requestAdapter({
-        powerPreference: opts.powerPreference ?? 'high-performance',
-    });
-    if (!adapter) {
-        throw new Error('Failed to acquire a GPUAdapter. Your GPU may not support WebGPU.');
-    }
-    // Request a device, capping requested limits to what the adapter supports.
-    const adapterLimits = adapter.limits;
-    const requested3GB  = 3 * 1024 * 1024 * 1024;
-    const device = await adapter.requestDevice({
-        requiredLimits: {
-            maxBufferSize: Math.min(
-                requested3GB,
-                adapterLimits.maxBufferSize
-            ),
-            maxStorageBufferBindingSize: Math.min(
-                requested3GB,
-                adapterLimits.maxStorageBufferBindingSize
-            ),
-            maxComputeInvocationsPerWorkgroup: Math.min(
-                256,
-                adapterLimits.maxComputeInvocationsPerWorkgroup
-            ),
-        },
-    });
-    device.lost.then((info) => {
-        console.error('WebGPU device lost:', info.message);
-    });
-    return { device, adapter };
-}
-// ─── Buffer factory helpers ───────────────────────────────────────────────────
-const UNIFORM = GPUBufferUsage?.UNIFORM  ?? 0x40;
-const STORAGE = GPUBufferUsage?.STORAGE  ?? 0x80;
-const COPY_SRC = GPUBufferUsage?.COPY_SRC ?? 0x04;
-const COPY_DST = GPUBufferUsage?.COPY_DST ?? 0x08;
-const MAP_READ = GPUBufferUsage?.MAP_READ ?? 0x01;
-/**
- * Create a GPU storage buffer pre-filled with Float32 data.
- *
- * @param {GPUDevice} device
- * @param {Float32Array|number[]} data
- * @param {boolean} [readable=false]  Also attach COPY_SRC so it can be read back.
- * @returns {GPUBuffer}
- */
-export function createStorageBuffer(device, data, readable = false) {
-    const arr    = data instanceof Float32Array ? data : new Float32Array(data);
-    const usage  = STORAGE | COPY_DST | (readable ? COPY_SRC : 0);
-    const buffer = device.createBuffer({ size: arr.byteLength, usage, mappedAtCreation: true });
-    new Float32Array(buffer.getMappedRange()).set(arr);
-    buffer.unmap();
-    return buffer;
-}
-/**
- * Create a GPU storage buffer of `size` bytes, zeroed.
- *
- * @param {GPUDevice} device
- * @param {number} byteSize
- * @param {boolean} [readable=false]
- * @returns {GPUBuffer}
- */
-export function createEmptyStorageBuffer(device, byteSize, readable = false) {
-    const usage = STORAGE | COPY_DST | (readable ? COPY_SRC : 0);
-    return device.createBuffer({ size: byteSize, usage });
-}
-/**
- * Create a uniform buffer for a plain-old-data struct.
- * The caller must supply a correctly-packed ArrayBuffer / TypedArray.
- *
- * @param {GPUDevice} device
- * @param {ArrayBuffer|TypedArray} data
- * @returns {GPUBuffer}
- */
-export function createUniformBuffer(device, data) {
-    const bytes  = ArrayBuffer.isView(data) ? data.buffer : data;
-    const buffer = device.createBuffer({
-        size  : bytes.byteLength,
-        usage : UNIFORM | COPY_DST,
-        mappedAtCreation: true,
-    });
-    new Uint8Array(buffer.getMappedRange()).set(new Uint8Array(bytes));
-    buffer.unmap();
-    return buffer;
-}
-/**
- * Read back a GPU storage buffer to a Float32Array (async, for debugging/eval).
- *
- * @param {GPUDevice}  device
- * @param {GPUBuffer}  srcBuffer   Must have COPY_SRC usage.
- * @param {number}     byteSize
- * @returns {Promise<Float32Array>}
- */
-export async function readBuffer(device, srcBuffer, byteSize) {
-    const stagingBuffer = device.createBuffer({
-        size  : byteSize,
-        usage : MAP_READ | COPY_DST,
-    });
-    const encoder = device.createCommandEncoder();
-    encoder.copyBufferToBuffer(srcBuffer, 0, stagingBuffer, 0, byteSize);
-    device.queue.submit([encoder.finish()]);
-    await stagingBuffer.mapAsync(GPUMapMode?.READ ?? 0x01);
-    const result = new Float32Array(stagingBuffer.getMappedRange().slice(0));
-    stagingBuffer.unmap();
-    stagingBuffer.destroy();
-    return result;
-}
-/**
- * Upload a Float32Array to an existing GPU buffer.
- *
- * @param {GPUDevice}    device
- * @param {GPUBuffer}    buffer   Must have COPY_DST usage.
- * @param {Float32Array} data
- * @param {number}       [byteOffset=0]
- */
-export function uploadBuffer(device, buffer, data, byteOffset = 0) {
-    device.queue.writeBuffer(buffer, byteOffset, data);
-}
-// ─── Pipeline / Shader helpers ────────────────────────────────────────────────
-/**
- * Compile a WGSL compute shader and return a GPUComputePipeline.
- *
- * @param {GPUDevice} device
- * @param {string}    wgslSource
- * @param {string}    entryPoint
- * @returns {GPUComputePipeline}
- */
-export function createComputePipeline(device, wgslSource, entryPoint) {
-    const shaderModule = device.createShaderModule({ code: wgslSource });
-    return device.createComputePipeline({
-        layout : 'auto',
-        compute: { module: shaderModule, entryPoint },
-    });
-}
-/**
- * Build a GPUBindGroup from an array of GPUBuffer bindings.
- *
- * @param {GPUDevice}           device
- * @param {GPUComputePipeline}  pipeline
- * @param {GPUBuffer[]}         buffers   Ordered list matching @binding(i).
- * @param {number}              [groupIndex=0]
- * @returns {GPUBindGroup}
- */
-export function createBindGroup(device, pipeline, buffers, groupIndex = 0) {
-    const entries = buffers.map((buf, i) => ({
-        binding : i,
-        resource: { buffer: buf },
-    }));
-    return device.createBindGroup({
-        layout : pipeline.getBindGroupLayout(groupIndex),
-        entries,
-    });
-}
-/**
- * Dispatch a compute pipeline synchronously (encodes + submits in one call).
- *
- * @param {GPUDevice}           device
- * @param {GPUComputePipeline}  pipeline
- * @param {GPUBindGroup}        bindGroup
- * @param {[number, number, number]} workgroups  [x, y, z]
- */
-export function dispatchKernel(device, pipeline, bindGroup, workgroups) {
-    const encoder = device.createCommandEncoder();
-    const pass    = encoder.beginComputePass();
-    pass.setPipeline(pipeline);
-    pass.setBindGroup(0, bindGroup);
-    pass.dispatchWorkgroups(...workgroups);
-    pass.end();
-    device.queue.submit([encoder.finish()]);
-}
-/**
- * Ceil-divide helper: Math.ceil(a / b) in integer arithmetic.
- *
- * @param {number} a
- * @param {number} b
- * @returns {number}
- */
-export function cdiv(a, b) {
-    return Math.ceil(a / b);
-}

package/src/utils/quantization.js DELETED Viewed

@@ -1,215 +0,0 @@
-/**
- * quantization.js – FP16 and Int8 quantization utilities.
- *
- * MambaCode.js supports two quantization modes to reduce VRAM usage:
- *   • FP16  – weights stored as 16-bit floats (halves memory vs FP32)
- *   • Int8  – non-critical activations quantized to signed 8-bit integers
- *
- * All quantization/dequantization happens in JavaScript; the GPU kernels
- * always operate on FP32 tensors internally (dequantized on upload).
- */
-// ─── FP16 Utilities ──────────────────────────────────────────────────────────
-/**
- * Convert a 32-bit float to a 16-bit IEEE 754 float (represented as Uint16).
- * Uses bit manipulation to avoid the need for a Float16Array (not in spec yet).
- *
- * @param {number} val  – 32-bit float
- * @returns {number}    – 16-bit float packed as an integer (0–65535)
- */
-export function floatToFp16(val) {
-    const buf = new ArrayBuffer(4);
-    const f32 = new Float32Array(buf);
-    const u32 = new Uint32Array(buf);
-    f32[0] = val;
-    const bits = u32[0];
-    const sign     = (bits >>> 31) & 0x1;
-    const exponent = (bits >>> 23) & 0xFF;
-    const mantissa =  bits         & 0x7FFFFF;
-    if (exponent === 255) {
-        // Inf / NaN
-        return (sign << 15) | 0x7C00 | (mantissa ? 0x200 : 0);
-    }
-    const expAdj = exponent - 127 + 15;  // re-bias from 127 to 15
-    if (expAdj >= 31) {
-        // Overflow → Inf
-        return (sign << 15) | 0x7C00;
-    }
-    if (expAdj <= 0) {
-        // Underflow or denormal
-        if (expAdj < -10) { return sign << 15; }  // flush to zero
-        const shift = 14 - expAdj;
-        return (sign << 15) | ((mantissa | 0x800000) >> shift);
-    }
-    return (sign << 15) | (expAdj << 10) | (mantissa >> 13);
-}
-/**
- * Convert a 16-bit FP16 integer to a 32-bit float.
- *
- * @param {number} val – Uint16 representation of an FP16 value
- * @returns {number}   – JavaScript number (float64, but semantically float32)
- */
-export function fp16ToFloat(val) {
-    const sign     = (val >>> 15) & 0x1;
-    const exponent = (val >>> 10) & 0x1F;
-    const mantissa =  val         & 0x3FF;
-    if (exponent === 0) {
-        // Denormal or zero
-        const f = mantissa / 1024.0;
-        return sign ? -f : f;
-    }
-    if (exponent === 31) {
-        // Inf / NaN
-        return sign ? -Infinity : (mantissa ? NaN : Infinity);
-    }
-    const expUnbiased = exponent - 15;
-    const f = (1 + mantissa / 1024.0) * Math.pow(2, expUnbiased);
-    return sign ? -f : f;
-}
-/**
- * Quantize a Float32Array to FP16 (stored as Uint16Array).
- *
- * @param {Float32Array} f32
- * @returns {Uint16Array}
- */
-export function quantizeFp16(f32) {
-    const out = new Uint16Array(f32.length);
-    for (let i = 0; i < f32.length; i++) {
-        out[i] = floatToFp16(f32[i]);
-    }
-    return out;
-}
-/**
- * Dequantize a Uint16Array (FP16) back to Float32Array.
- *
- * @param {Uint16Array} fp16
- * @returns {Float32Array}
- */
-export function dequantizeFp16(fp16) {
-    const out = new Float32Array(fp16.length);
-    for (let i = 0; i < fp16.length; i++) {
-        out[i] = fp16ToFloat(fp16[i]);
-    }
-    return out;
-}
-// ─── Int8 Quantization ───────────────────────────────────────────────────────
-/**
- * Symmetric per-tensor Int8 quantization.
- * Quantization: q = round(x / scale),  scale = max(|x|) / 127
- *
- * @param {Float32Array} f32
- * @returns {{ data: Int8Array, scale: number }}
- */
-export function quantizeInt8(f32) {
-    let maxAbs = 0;
-    for (let i = 0; i < f32.length; i++) {
-        const a = Math.abs(f32[i]);
-        if (a > maxAbs) maxAbs = a;
-    }
-    const scale = maxAbs / 127.0 || 1.0;  // avoid division by zero
-    const data  = new Int8Array(f32.length);
-    for (let i = 0; i < f32.length; i++) {
-        data[i] = Math.max(-128, Math.min(127, Math.round(f32[i] / scale)));
-    }
-    return { data, scale };
-}
-/**
- * Dequantize an Int8Array back to Float32Array.
- *
- * @param {Int8Array} int8
- * @param {number}    scale
- * @returns {Float32Array}
- */
-export function dequantizeInt8(int8, scale) {
-    const out = new Float32Array(int8.length);
-    for (let i = 0; i < int8.length; i++) {
-        out[i] = int8[i] * scale;
-    }
-    return out;
-}
-/**
- * Per-channel Int8 quantization (useful for weight matrices).
- * Each output channel gets its own scale factor for better accuracy.
- *
- * @param {Float32Array} f32          – Flat weight tensor, row-major
- * @param {number}       numChannels  – Number of output channels (rows)
- * @returns {{ data: Int8Array, scales: Float32Array }}
- */
-export function quantizeInt8PerChannel(f32, numChannels) {
-    const channelSize = f32.length / numChannels;
-    const scales = new Float32Array(numChannels);
-    const data   = new Int8Array(f32.length);
-    for (let c = 0; c < numChannels; c++) {
-        let maxAbs = 0;
-        const base = c * channelSize;
-        for (let j = 0; j < channelSize; j++) {
-            const a = Math.abs(f32[base + j]);
-            if (a > maxAbs) maxAbs = a;
-        }
-        scales[c] = maxAbs / 127.0 || 1.0;
-        for (let j = 0; j < channelSize; j++) {
-            data[base + j] = Math.max(-128, Math.min(127,
-                Math.round(f32[base + j] / scales[c])
-            ));
-        }
-    }
-    return { data, scales };
-}
-/**
- * Dequantize per-channel Int8 data.
- *
- * @param {Int8Array}    int8
- * @param {Float32Array} scales
- * @param {number}       numChannels
- * @returns {Float32Array}
- */
-export function dequantizeInt8PerChannel(int8, scales, numChannels) {
-    const channelSize = int8.length / numChannels;
-    const out = new Float32Array(int8.length);
-    for (let c = 0; c < numChannels; c++) {
-        const base = c * channelSize;
-        for (let j = 0; j < channelSize; j++) {
-            out[base + j] = int8[base + j] * scales[c];
-        }
-    }
-    return out;
-}
-/**
- * Estimate memory usage for a weight tensor under different precisions.
- *
- * @param {number} numElements
- * @returns {{ fp32: number, fp16: number, int8: number }}  – bytes
- */
-export function estimateMemory(numElements) {
-    return {
-        fp32: numElements * 4,
-        fp16: numElements * 2,
-        int8: numElements * 1,
-    };
-}

/package/src/kernels/{conv1d.js → conv1d.ts} RENAMED Viewed

File without changes