npm - scalar-autograd - Versions diffs - 0.1.0 - Mend

scalar-autograd 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/LICENSE +21 -0
package/Losses.ts +144 -0
package/Optimizers.ts +222 -0
package/README.md +113 -0
package/V.ts +0 -0
package/Value.edge-cases.spec.ts +60 -0
package/Value.grad-flow.spec.ts +24 -0
package/Value.losses-edge-cases.spec.ts +32 -0
package/Value.memory.spec.ts +25 -0
package/Value.nn.spec.ts +109 -0
package/Value.spec.ts +268 -0
package/Value.ts +456 -0
package/ValueActivation.ts +51 -0
package/ValueArithmetic.ts +272 -0
package/ValueComparison.ts +85 -0
package/ValueTrig.ts +70 -0
package/package.json +41 -0

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 Mattias Fagerlund
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/Losses.ts ADDED Viewed

@@ -0,0 +1,144 @@
+import { Value } from "./Value";
+import { V } from "./V";
+/**
+ * Throws an error if outputs and targets length do not match.
+ * @param outputs Array of output Values.
+ * @param targets Array of target Values.
+ */
+function checkLengthMatch(outputs: Value[], targets: Value[]): void {
+  if (outputs.length !== targets.length) {
+    throw new Error('Outputs and targets must have the same length');
+  }
+}
+/**
+ * Computes mean squared error (MSE) loss between outputs and targets.
+ * @param outputs Array of Value predictions.
+ * @param targets Array of Value targets.
+ * @returns Mean squared error as a Value.
+ */
+export function mse(outputs: Value[], targets: Value[]): Value {
+  checkLengthMatch(outputs, targets);
+  if (!Array.isArray(outputs) || !Array.isArray(targets)) throw new TypeError('mse expects Value[] for both arguments.');
+  if (!outputs.length) return new Value(0);
+  const diffs = outputs.map((out, i) => out.sub(targets[i]).square());
+  return Value.mean(diffs);
+}
+/**
+ * Computes mean absolute error (MAE) loss between outputs and targets.
+ * @param outputs Array of Value predictions.
+ * @param targets Array of Value targets.
+ * @returns Mean absolute error as a Value.
+ */
+export function mae(outputs: Value[], targets: Value[]): Value {
+  checkLengthMatch(outputs, targets);
+  if (!Array.isArray(outputs) || !Array.isArray(targets)) throw new TypeError('mae expects Value[] for both arguments.');
+  if (!outputs.length) return new Value(0);
+  const diffs = outputs.map((out, i) => out.sub(targets[i]).abs());
+  return Value.mean(diffs);
+}
+const EPS = 1e-12;
+/**
+ * Computes binary cross-entropy loss between predicted outputs and targets (after sigmoid).
+ * @param outputs Array of Value predictions (expected in (0,1)).
+ * @param targets Array of Value targets (typically 0 or 1).
+ * @returns Binary cross-entropy loss as a Value.
+ */
+export function binaryCrossEntropy(outputs: Value[], targets: Value[]): Value {
+  checkLengthMatch(outputs, targets);
+  if (!Array.isArray(outputs) || !Array.isArray(targets)) throw new TypeError('binaryCrossEntropy expects Value[] for both arguments.');
+  if (!outputs.length) return new Value(0);
+  const eps = EPS;
+  const one = new Value(1);
+  const losses = outputs.map((out, i) => {
+    const t = targets[i];
+    const outClamped = out.clamp(eps, 1 - eps); // sigmoid should output (0,1)
+    return t.mul(outClamped.log()).add(one.sub(t).mul(one.sub(outClamped).log()));
+  });
+  return Value.mean(losses).mul(-1);
+}
+/**
+ * Computes categorical cross-entropy loss between outputs (logits) and integer target classes.
+ * @param outputs Array of Value logits for each class.
+ * @param targets Array of integer class indices (0-based, one per sample).
+ * @returns Categorical cross-entropy loss as a Value.
+ */
+export function categoricalCrossEntropy(outputs: Value[], targets: number[]): Value {
+  // targets: integer encoded class indices
+  if (!Array.isArray(outputs) || !Array.isArray(targets)) throw new TypeError('categoricalCrossEntropy expects Value[] and number[].');
+  if (!outputs.length || !targets.length) return new Value(0);
+  if (targets.some(t => typeof t !== 'number' || !isFinite(t) || t < 0 || t >= outputs.length || Math.floor(t) !== t)) {
+    throw new Error('Target indices must be valid integers in [0, outputs.length)');
+  }
+  const eps = EPS;
+  const maxLogit = outputs.reduce((a, b) => a.data > b.data ? a : b);
+  const exps = outputs.map(out => out.sub(maxLogit).exp());
+  const sumExp = Value.sum(exps).add(eps);
+  const softmax = exps.map(e => e.div(sumExp));
+  const tIndices = targets.map((t, i) => softmax[t]);
+  return Value.mean(tIndices.map(sm => sm.add(eps).log().mul(-1)));
+}
+/**
+ * Computes Huber loss between outputs and targets.
+ * Combines quadratic loss for small residuals and linear loss for large residuals.
+ * @param outputs Array of Value predictions.
+ * @param targets Array of Value targets.
+ * @param delta Threshold at which to switch from quadratic to linear (default: 1.0).
+ * @returns Huber loss as a Value.
+ */
+export function huber(outputs: Value[], targets: Value[], delta = 1.0): Value {
+  checkLengthMatch(outputs, targets);
+  if (!Array.isArray(outputs) || !Array.isArray(targets)) throw new TypeError('huber expects Value[] for both arguments.');
+  if (!outputs.length) return new Value(0);
+  const deltaValue = new Value(delta);
+  const half = new Value(0.5);
+  const losses = outputs.map((out, i) => {
+    const residual = V.abs(V.sub(out, targets[i]));
+    const condition = V.lt(residual, deltaValue);
+    const quadraticLoss = V.mul(half, V.square(residual));
+    const linearLoss = V.mul(deltaValue, V.sub(residual, V.mul(half, deltaValue)));
+    return V.ifThenElse(condition, quadraticLoss, linearLoss);
+  });
+  return V.mean(losses);
+}
+/**
+ * Computes Tukey loss between outputs and targets.
+ * This robust loss function saturates for large residuals.
+ *
+ * @param outputs Array of Value predictions.
+ * @param targets Array of Value targets.
+ * @param c Threshold constant (typically 4.685).
+ * @returns Tukey loss as a Value.
+ */
+export function tukey(outputs: Value[], targets: Value[], c: number = 4.685): Value {
+  checkLengthMatch(outputs, targets);
+  const c2_over_6 = (c * c) / 6;
+  const cValue = V.C(c);
+  const c2_over_6_Value = V.C(c2_over_6);
+  const losses = outputs.map((out, i) => {
+    const diff = V.abs(V.sub(out, targets[i]));
+    const inlier = V.lte(diff, cValue);
+    const rc = V.div(diff, cValue);
+    const rc2 = V.square(rc);
+    const oneMinusRC2 = V.sub(1, rc2);
+    const inner = V.pow(oneMinusRC2, 3);
+    const inlierLoss = V.mul(c2_over_6_Value, V.sub(1, inner));
+    const loss = V.ifThenElse(inlier, inlierLoss, c2_over_6_Value);
+    return loss;
+  });
+  return V.mean(losses);
+}

package/Optimizers.ts ADDED Viewed

@@ -0,0 +1,222 @@
+// Optimizers.ts
+import { Value } from "./Value";
+/**
+ * Abstract base class for all optimizers.
+ * Ensures only requiresGrad parameters are optimized.
+ */
+export abstract class Optimizer {
+  protected trainables: Value[];
+  public learningRate: number;
+  /**
+   * Constructs an Optimizer.
+   * @param trainables Array of Value parameters to optimize.
+   * @param learningRate Learning rate for updates.
+   */
+  constructor(trainables: Value[], learningRate: number) {
+    this.trainables = trainables.filter(v => v.requiresGrad);
+    this.learningRate = learningRate;
+  }
+  /**
+   * Performs a parameter update step.
+   */
+  abstract step(): void;
+  /**
+   * Sets grads of all trainables to zero.
+   */
+  zeroGrad(): void {
+    for (const v of this.trainables) v.grad = 0;
+  }
+  /**
+   * Clips global norm of gradients as regularization.
+   * @param maxNorm Maximum allowed norm for gradients.
+   */
+  clipGradients(maxNorm: number): void {
+    const totalNorm = Math.sqrt(
+      this.trainables.reduce((sum, v) => sum + v.grad * v.grad, 0)
+    );
+    if (totalNorm > maxNorm) {
+      const scale = maxNorm / (totalNorm + 1e-6);
+      for (const v of this.trainables) v.grad *= scale;
+    }
+  }
+}
+/**
+ * Optional arguments for basic optimizers.
+ * @property learningRate: Overrides the step size for parameter updates (default varies by optimizer).
+ * @property weightDecay: L2 regularization multiplier (default 0). Ignored for plain SGD.
+ * @property gradientClip: Maximum absolute value for gradient updates (default 0: no clipping).
+ */
+interface OptimizerOptions {
+  learningRate?: number;
+  weightDecay?: number;
+  gradientClip?: number;
+}
+/**
+ * Stochastic Gradient Descent (SGD) optimizer. Accepts weightDecay and gradientClip for API consistency (ignored).
+ */
+export class SGD extends Optimizer {
+  private weightDecay: number;
+  private gradientClip: number;
+  /**
+   * Constructs an SGD optimizer.
+   * @param trainables Array of Value parameters to optimize.
+   * @param opts Optional parameters (learningRate, weightDecay, gradientClip).
+   */
+  constructor(trainables: Value[], opts: OptimizerOptions = {}) {
+    super(
+      trainables,
+      opts.learningRate ?? 1e-2
+    );
+    this.weightDecay = opts.weightDecay ?? 0;
+    this.gradientClip = opts.gradientClip ?? 0;
+  }
+  /**
+   * Performs a parameter update using standard SGD.
+   */
+  step(): void {
+    // Intentionally ignoring weightDecay/gradientClip for SGD
+    for (const v of this.trainables) {
+      v.data -= this.learningRate * v.grad;
+    }
+  }
+}
+/**
+ * Adam and AdamW optimizer parameters.
+ * Extends OptimizerOptions.
+ * @property beta1: Exponential decay rate for 1st moment (default 0.9).
+ * @property beta2: Exponential decay rate for 2nd moment (default 0.999).
+ * @property epsilon: Numerical stability fudge factor (default 1e-8).
+ */
+interface AdamOptions extends OptimizerOptions {
+  beta1?: number;
+  beta2?: number;
+  epsilon?: number;
+}
+/**
+ * Adam optimizer, supports decoupled weight decay and gradient clipping.
+ */
+export class Adam extends Optimizer {
+  private beta1: number;
+  private beta2: number;
+  private epsilon: number;
+  private weightDecay: number;
+  private gradientClip: number;
+  private m: Map<Value, number> = new Map();
+  private v: Map<Value, number> = new Map();
+  private stepCount: number = 0;
+  /**
+   * Constructs an Adam optimizer.
+   * @param trainables Array of Value parameters to optimize.
+   * @param opts Optional parameters (learningRate, weightDecay, gradientClip, beta1, beta2, epsilon).
+   */
+  constructor(
+    trainables: Value[],
+    opts: AdamOptions = {}
+  ) {
+    super(trainables, opts.learningRate ?? 0.001);
+    this.beta1 = opts.beta1 ?? 0.9;
+    this.beta2 = opts.beta2 ?? 0.999;
+    this.epsilon = opts.epsilon ?? 1e-8;
+    this.weightDecay = opts.weightDecay ?? 0;
+    this.gradientClip = opts.gradientClip ?? 0;
+    for (const v of this.trainables) {
+      this.m.set(v, 0);
+      this.v.set(v, 0);
+    }
+  }
+  /**
+   * Performs a parameter update using Adam optimization.
+   */
+  step(): void {
+    this.stepCount++;
+    for (const v of this.trainables) {
+      let grad = v.grad;
+      if (this.weightDecay > 0) grad += this.weightDecay * v.data;
+      let m = this.m.get(v)!;
+      let vVal = this.v.get(v)!;
+      m = this.beta1 * m + (1 - this.beta1) * grad;
+      vVal = this.beta2 * vVal + (1 - this.beta2) * grad * grad;
+      const mHat = m / (1 - Math.pow(this.beta1, this.stepCount));
+      const vHat = vVal / (1 - Math.pow(this.beta2, this.stepCount));
+      let update = mHat / (Math.sqrt(vHat) + this.epsilon);
+      if (this.gradientClip > 0) {
+        update = Math.max(-this.gradientClip, Math.min(update, this.gradientClip));
+      }
+      v.data -= this.learningRate * update;
+      this.m.set(v, m);
+      this.v.set(v, vVal);
+    }
+  }
+}
+/**
+ * AdamW optimizer, supports decoupled weight decay and gradient clipping (same options as Adam).
+ */
+export class AdamW extends Optimizer {
+  private beta1: number;
+  private beta2: number;
+  private epsilon: number;
+  private weightDecay: number;
+  private gradientClip: number;
+  private m: Map<Value, number> = new Map();
+  private v: Map<Value, number> = new Map();
+  private stepCount: number = 0;
+  /**
+   * Constructs an AdamW optimizer.
+   * @param trainables Array of Value parameters to optimize.
+   * @param opts Optional parameters (learningRate, weightDecay, gradientClip, beta1, beta2, epsilon).
+   */
+  constructor(
+    trainables: Value[],
+    opts: AdamOptions = {}
+  ) {
+    super(trainables, opts.learningRate ?? 0.001);
+    this.beta1 = opts.beta1 ?? 0.9;
+    this.beta2 = opts.beta2 ?? 0.999;
+    this.epsilon = opts.epsilon ?? 1e-8;
+    this.weightDecay = opts.weightDecay ?? 0.01;
+    this.gradientClip = opts.gradientClip ?? 0;
+    for (const v of this.trainables) {
+      this.m.set(v, 0);
+      this.v.set(v, 0);
+    }
+  }
+  /**
+   * Performs a parameter update using AdamW optimization (decoupled weight decay).
+   */
+  step(): void {
+    this.stepCount++;
+    for (const v of this.trainables) {
+      let grad = v.grad;
+      let m = this.m.get(v)!;
+      let vVal = this.v.get(v)!;
+      m = this.beta1 * m + (1 - this.beta1) * grad;
+      vVal = this.beta2 * vVal + (1 - this.beta2) * grad * grad;
+      const mHat = m / (1 - Math.pow(this.beta1, this.stepCount));
+      const vHat = vVal / (1 - Math.pow(this.beta2, this.stepCount));
+      let update = mHat / (Math.sqrt(vHat) + this.epsilon);
+      if (this.gradientClip > 0) {
+        update = Math.max(-this.gradientClip, Math.min(update, this.gradientClip));
+      }
+      // Weight decay is decoupled as in AdamW paper:
+      v.data -= this.learningRate * update + this.learningRate * this.weightDecay * v.data;
+      this.m.set(v, m);
+      this.v.set(v, vVal);
+    }
+  }
+}

package/README.md ADDED Viewed

@@ -0,0 +1,113 @@
+# ScalarAutograd for TypeScript
+A tiny scalar autograd engine for TypeScript/JavaScript.
+Scautograd enables automatic differentiation for scalar operations, similar to what you'd find in PyTorch's `autograd`, but designed for TypeScript codebases. This makes it useful for building and training small neural networks, performing optimization, or experimenting with gradient-based techniques—entirely in the browser or Node.js.
+## Features
+- Scalar `Value` objects for tracking data, gradients, and computation graph.
+- Backpropagation via `.backward()` to compute gradients automatically.
+- Clean, TypeScript-first API.
+- Does *NOT* handle matrices or tensors, just scalars.
+## Installation
+Simply copy the files in this folder into your project, or import as a local module if desired.
+## Basic Usage
+```typescript
+import { V } from './V';
+// Basic differentiation using static V API
+const x = V.W(2.0); // differentiable variable
+const y = V.W(3.0);
+const z = V.add(V.mul(x, y), V.pow(x, 2)); // z = x*y + x^2
+z.backward();
+console.log('dz/dx:', x.grad); // Output: dz/dx = y + 2*x = 3 + 2*2 = 7
+console.log('dz/dy:', y.grad); // Output: dz/dy = x = 2
+```
+## Example: Tiny Gradient Descent
+```typescript
+const a = V.W(5);
+const b = V.W(-3);
+const c = V.sin(V.mul(a, b)); // f = sin(a * b)
+c.backward();
+console.log(a.grad, b.grad); // Gradients w.r.t. a and b
+```
+## Example: Solving for Parameters via Backpropagation
+Here's how you can use Scautograd's backpropagation and a simple optimizer to fit a linear regression model (y = 2x + 3):
+```typescript
+import { V } from './V';
+import { SGD } from './Optimizers';
+// Initialize parameters
+let w = V.W(Math.random(), "w");
+let b = V.W(Math.random(), "b");
+// Example data: y = 2x + 3
+const samples = [
+  { x: 1, y: 5 },
+  { x: 2, y: 7 },
+  { x: 3, y: 9 },
+];
+const opt = new SGD([w, b], { learningRate: 0.1 });
+for (let epoch = 0; epoch < 300; ++epoch) {
+  let losses = [];
+  for (const sample of samples) {
+    const x = V.C(sample.x, "x");
+    const pred = V.add(V.mul(w, x), b);
+    const target = V.C(sample.y, "target");
+    const loss = V.pow(V.sub(pred, target), 2);
+    losses.push(loss);
+  }
+  const totalLoss = V.mean(losses);
+  opt.zeroGrad();
+  totalLoss.backward();
+  opt.step();
+  if (totalLoss.data < 1e-4) break;
+}
+console.log('Fitted w:', w.data); // ~2
+console.log('Fitted b:', b.data); // ~3
+```
+This pattern—forward pass, backward for gradients, and calling `optimizer.step()`—applies to more complex optimization tasks and neural networks as well!
+## API Overview
+- **Core Value construction:**
+    - `V.C(data, label?)` — constant (non-differentiable), e.g. for data/inputs.
+    - `V.W(data, label?)` — weight/parameter (differentiable).
+- **Operators:**
+    - Basic: `V.add(a, b)`, `V.sub(a, b)`, `V.mul(a, b)`, `V.div(a, b)`, `V.pow(a, n)`, `V.powValue(a, b)`.
+    - Reductions: `V.sum(array)`, `V.mean(array)`
+    - Trig: `V.sin(x)`, `V.cos(x)`, `V.tan(x)`, ...
+    - Activations: `V.relu(x)`, `V.tanh(x)`, `V.sigmoid(x)`, etc.
+    - Comparison: `V.eq(a, b)`, `V.gt(a, b)`, ... (outputs constant Values; never has grad)
+- **Backward:**
+    - `.backward()` — trigger automatic differentiation from this node.
+    - `.grad` — access the computed gradient after backward pass.
+- **Optimizers:**
+    - E.g. `const opt = new SGD([w, b], {learningRate: 0.01})`
+- **Losses:**
+    - Import from `Losses.ts` (e.g. `import { mse } from './Losses'`)
+All API operations work with both `Value` and raw number inputs (numbers are automatically wrapped as non-grad constants).
+## Testing
+To run the test suite and verify the correctness of ScalarAutograd, execute the following command in your project directory:
+```shell
+npm run test
+```
+## License
+MIT

package/V.ts ADDED Viewed

Binary file

package/Value.edge-cases.spec.ts ADDED Viewed

@@ -0,0 +1,60 @@
+import { Value } from "./Value";
+// Edge cases and error handling
+describe('Value edge cases and error handling', () => {
+    it('throws on invalid numeric inputs', () => {
+      expect(() => new Value(NaN)).toThrow();
+      expect(() => new Value(Infinity)).toThrow();
+      expect(() => new Value(-Infinity)).toThrow();
+    });
+    it('handles gradient accumulation correctly', () => {
+      const x = new Value(2, 'x', true);
+      const y = x.mul(3);
+      const z = x.mul(4);
+      const out = y.add(z);
+      out.backward();
+      expect(x.grad).toBe(7); // 3 + 4
+    });
+    it('handles repeated use of same value in expression', () => {
+      const x = new Value(3, 'x', true);
+      const y = x.mul(x).mul(x); // x^3
+      y.backward();
+      expect(x.grad).toBeCloseTo(27); // 3*x^2 = 27
+    });
+    it('throws on division by zero', () => {
+      const a = new Value(1);
+      const b = new Value(0);
+      expect(() => a.div(b)).toThrow();
+    });
+    it('throws on log of negative number', () => {
+      const x = new Value(-1);
+      expect(() => x.log()).toThrow();
+    });
+    it('throws on negative base with fractional exponent', () => {
+      const x = new Value(-2);
+      expect(() => x.pow(0.5)).toThrow();
+    });
+});
+// Complex expressions
+describe('Complex mathematical expressions', () => {
+    it('computes gradient of complex expression', () => {
+      const x = new Value(0.5, 'x', true);
+      const y = x.sin().mul(x.cos()).add(x.exp());
+      y.backward();
+      const expected = Math.cos(0.5)**2 - Math.sin(0.5)**2 + Math.exp(0.5);
+      expect(x.grad).toBeCloseTo(expected, 4);
+    });
+    it('handles nested activation functions', () => {
+      const x = new Value(0.5, 'x', true);
+      const y = x.tanh().sigmoid().relu();
+      y.backward();
+      expect(x.grad).toBeGreaterThan(0);
+    });
+});

package/Value.grad-flow.spec.ts ADDED Viewed

@@ -0,0 +1,24 @@
+import { Value } from "./Value";
+describe('Gradient flow control', () => {
+  it('stops gradient at non-requiresGrad nodes', () => {
+    const x = new Value(2, 'x', true);
+    const y = new Value(3, 'y', false);
+    const z = new Value(4, 'z', true);
+    const out = x.mul(y).add(z);
+    out.backward();
+    expect(x.grad).toBe(3);
+    expect(y.grad).toBe(0);
+    expect(z.grad).toBe(1);
+  });
+  it('handles detached computation graphs', () => {
+    const x = new Value(2, 'x', true);
+    const y = x.mul(3);
+    const z = new Value(y.data, 'z', true); // detached
+    const out = z.mul(4);
+    out.backward();
+    expect(z.grad).toBe(4);
+    expect(x.grad).toBe(0); // no gradient flows to x
+  });
+});

package/Value.losses-edge-cases.spec.ts ADDED Viewed

@@ -0,0 +1,32 @@
+import { Value } from "./Value";
+import { mse, mae, binaryCrossEntropy, categoricalCrossEntropy } from "./Losses";
+describe('Loss function edge cases', () => {
+  it('handles empty arrays', () => {
+    expect(mse([], []).data).toBe(0);
+    expect(mae([], []).data).toBe(0);
+    expect(binaryCrossEntropy([], []).data).toBe(0);
+    expect(categoricalCrossEntropy([], []).data).toBe(0);
+  });
+  it('throws on mismatched lengths', () => {
+    const a = [new Value(1)];
+    const b = [new Value(1), new Value(2)];
+    expect(() => mse(a, b)).toThrow();
+  });
+  it('handles extreme values in binary cross entropy', () => {
+    const out = new Value(0.999999, 'out', true);
+    const target = new Value(1);
+    const loss = binaryCrossEntropy([out], [target]);
+    expect(loss.data).toBeGreaterThan(0);
+    expect(loss.data).toBeLessThan(0.1);
+  });
+  it('throws on invalid class indices in categorical cross entropy', () => {
+    const outputs = [new Value(1), new Value(2)];
+    expect(() => categoricalCrossEntropy(outputs, [2])).toThrow();
+    expect(() => categoricalCrossEntropy(outputs, [-1])).toThrow();
+    expect(() => categoricalCrossEntropy(outputs, [1.5])).toThrow();
+  });
+});

package/Value.memory.spec.ts ADDED Viewed

@@ -0,0 +1,25 @@
+import { Value } from "./Value";
+describe('Memory management', () => {
+  it('handles large computation graphs', () => {
+    let x = new Value(1, 'x', true);
+    for (let i = 0; i < 100; i++) {
+      x = x.add(1).mul(1.01);
+    }
+    expect(() => x.backward()).not.toThrow();
+  });
+  it('zeroGradAll handles multiple disconnected graphs', () => {
+    const x1 = new Value(1, 'x1', true);
+    const y1 = x1.mul(2);
+    const x2 = new Value(2, 'x2', true);
+    const y2 = x2.mul(3);
+    y1.backward();
+    y2.backward();
+    Value.zeroGradAll([y1, y2]);
+    expect(x1.grad).toBe(0);
+    expect(x2.grad).toBe(0);
+  });
+});