npm - scalar-autograd - Versions diffs - 0.1.4 → 0.1.6 - Mend

scalar-autograd 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

package/dist/Losses.d.ts +51 -0
package/dist/Losses.js +145 -0
package/dist/Losses.spec.d.ts +1 -0
package/dist/Losses.spec.js +54 -0
package/dist/Optimizers.d.ts +114 -0
package/dist/Optimizers.edge-cases.spec.d.ts +1 -0
package/dist/Optimizers.edge-cases.spec.js +29 -0
package/dist/Optimizers.js +177 -0
package/dist/Optimizers.spec.d.ts +1 -0
package/dist/Optimizers.spec.js +56 -0
package/dist/V.d.ts +0 -0
package/dist/V.js +0 -0
package/dist/Value.d.ts +260 -0
package/dist/Value.edge-cases.spec.d.ts +1 -0
package/dist/Value.edge-cases.spec.js +54 -0
package/dist/Value.grad-flow.spec.d.ts +1 -0
package/dist/Value.grad-flow.spec.js +24 -0
package/dist/Value.js +424 -0
package/dist/Value.losses-edge-cases.spec.d.ts +1 -0
package/dist/Value.losses-edge-cases.spec.js +30 -0
package/dist/Value.memory.spec.d.ts +1 -0
package/dist/Value.memory.spec.js +23 -0
package/dist/Value.nn.spec.d.ts +1 -0
package/dist/Value.nn.spec.js +111 -0
package/dist/Value.spec.d.ts +1 -0
package/dist/Value.spec.js +245 -0
package/dist/ValueActivation.d.ts +7 -0
package/dist/ValueActivation.js +34 -0
package/dist/ValueArithmetic.d.ts +26 -0
package/dist/ValueArithmetic.js +180 -0
package/dist/ValueComparison.d.ts +10 -0
package/dist/ValueComparison.js +47 -0
package/dist/ValueTrig.d.ts +9 -0
package/dist/ValueTrig.js +49 -0
package/package.json +4 -12
package/Losses.ts +0 -145
package/Optimizers.ts +0 -222
package/V.ts +0 -0
package/Value.edge-cases.spec.ts +0 -60
package/Value.grad-flow.spec.ts +0 -24
package/Value.losses-edge-cases.spec.ts +0 -32
package/Value.memory.spec.ts +0 -25
package/Value.nn.spec.ts +0 -109
package/Value.spec.ts +0 -268
package/Value.ts +0 -461
package/ValueActivation.ts +0 -51
package/ValueArithmetic.ts +0 -272
package/ValueComparison.ts +0 -85
package/ValueTrig.ts +0 -70

package/Losses.ts DELETED Viewed

@@ -1,145 +0,0 @@
-import { Value } from "./Value";
-import { V } from "./V";
-/**
- * Throws an error if outputs and targets length do not match.
- * @param outputs Array of output Values.
- * @param targets Array of target Values.
- */
-function checkLengthMatch(outputs: Value[], targets: Value[]): void {
-  if (outputs.length !== targets.length) {
-    throw new Error('Outputs and targets must have the same length');
-  }
-}
-export class Losses {
-  /**
-   * Computes mean squared error (MSE) loss between outputs and targets.
-   * @param outputs Array of Value predictions.
-   * @param targets Array of Value targets.
-   * @returns Mean squared error as a Value.
-   */
-  public static mse(outputs: Value[], targets: Value[]): Value {
-    checkLengthMatch(outputs, targets);
-    if (!Array.isArray(outputs) || !Array.isArray(targets)) throw new TypeError('mse expects Value[] for both arguments.');
-    if (!outputs.length) return new Value(0);
-    const diffs = outputs.map((out, i) => out.sub(targets[i]).square());
-    return Value.mean(diffs);
-  }
-  /**
-   * Computes mean absolute error (MAE) loss between outputs and targets.
-   * @param outputs Array of Value predictions.
-   * @param targets Array of Value targets.
-   * @returns Mean absolute error as a Value.
-   */
-  public static mae(outputs: Value[], targets: Value[]): Value {
-    checkLengthMatch(outputs, targets);
-    if (!Array.isArray(outputs) || !Array.isArray(targets)) throw new TypeError('mae expects Value[] for both arguments.');
-    if (!outputs.length) return new Value(0);
-    const diffs = outputs.map((out, i) => out.sub(targets[i]).abs());
-    return Value.mean(diffs);
-  }
-  static EPS = 1e-12;
-  /**
-   * Computes binary cross-entropy loss between predicted outputs and targets (after sigmoid).
-   * @param outputs Array of Value predictions (expected in (0,1)).
-   * @param targets Array of Value targets (typically 0 or 1).
-   * @returns Binary cross-entropy loss as a Value.
-   */
-  public static binaryCrossEntropy(outputs: Value[], targets: Value[]): Value {
-    checkLengthMatch(outputs, targets);
-    if (!Array.isArray(outputs) || !Array.isArray(targets)) throw new TypeError('binaryCrossEntropy expects Value[] for both arguments.');
-    if (!outputs.length) return new Value(0);
-    const eps = Losses.EPS;
-    const one = new Value(1);
-    const losses = outputs.map((out, i) => {
-      const t = targets[i];
-      const outClamped = out.clamp(eps, 1 - eps); // sigmoid should output (0,1)
-      return t.mul(outClamped.log()).add(one.sub(t).mul(one.sub(outClamped).log()));
-    });
-    return Value.mean(losses).mul(-1);
-  }
-  /**
-   * Computes categorical cross-entropy loss between outputs (logits) and integer target classes.
-   * @param outputs Array of Value logits for each class.
-   * @param targets Array of integer class indices (0-based, one per sample).
-   * @returns Categorical cross-entropy loss as a Value.
-   */
-  public static categoricalCrossEntropy(outputs: Value[], targets: number[]): Value {
-    // targets: integer encoded class indices
-    if (!Array.isArray(outputs) || !Array.isArray(targets)) throw new TypeError('categoricalCrossEntropy expects Value[] and number[].');
-    if (!outputs.length || !targets.length) return new Value(0);
-    if (targets.some(t => typeof t !== 'number' || !isFinite(t) || t < 0 || t >= outputs.length || Math.floor(t) !== t)) {
-      throw new Error('Target indices must be valid integers in [0, outputs.length)');
-    }
-    const eps = Losses.EPS;
-    const maxLogit = outputs.reduce((a, b) => a.data > b.data ? a : b);
-    const exps = outputs.map(out => out.sub(maxLogit).exp());
-    const sumExp = Value.sum(exps).add(eps);
-    const softmax = exps.map(e => e.div(sumExp));
-    const tIndices = targets.map((t, i) => softmax[t]);
-    return Value.mean(tIndices.map(sm => sm.add(eps).log().mul(-1)));
-  }
-  /**
-   * Computes Huber loss between outputs and targets.
-   * Combines quadratic loss for small residuals and linear loss for large residuals.
-   * @param outputs Array of Value predictions.
-   * @param targets Array of Value targets.
-   * @param delta Threshold at which to switch from quadratic to linear (default: 1.0).
-   * @returns Huber loss as a Value.
-   */
-  public static huber(outputs: Value[], targets: Value[], delta = 1.0): Value {
-    checkLengthMatch(outputs, targets);
-    if (!Array.isArray(outputs) || !Array.isArray(targets)) throw new TypeError('huber expects Value[] for both arguments.');
-    if (!outputs.length) return new Value(0);
-    const deltaValue = new Value(delta);
-    const half = new Value(0.5);
-    const losses = outputs.map((out, i) => {
-      const residual = V.abs(V.sub(out, targets[i]));
-      const condition = V.lt(residual, deltaValue);
-      const quadraticLoss = V.mul(half, V.square(residual));
-      const linearLoss = V.mul(deltaValue, V.sub(residual, V.mul(half, deltaValue)));
-      return V.ifThenElse(condition, quadraticLoss, linearLoss);
-    });
-    return V.mean(losses);
-  }
-  /**
-   * Computes Tukey loss between outputs and targets.
-   * This robust loss function saturates for large residuals.
-   *
-   * @param outputs Array of Value predictions.
-   * @param targets Array of Value targets.
-   * @param c Threshold constant (typically 4.685).
-   * @returns Tukey loss as a Value.
-   */
-  public static tukey(outputs: Value[], targets: Value[], c: number = 4.685): Value {
-    checkLengthMatch(outputs, targets);
-    const c2_over_6 = (c * c) / 6;
-    const cValue = V.C(c);
-    const c2_over_6_Value = V.C(c2_over_6);
-    const losses = outputs.map((out, i) => {
-      const diff = V.abs(V.sub(out, targets[i]));
-      const inlier = V.lte(diff, cValue);
-      const rc = V.div(diff, cValue);
-      const rc2 = V.square(rc);
-      const oneMinusRC2 = V.sub(1, rc2);
-      const inner = V.pow(oneMinusRC2, 3);
-      const inlierLoss = V.mul(c2_over_6_Value, V.sub(1, inner));
-      const loss = V.ifThenElse(inlier, inlierLoss, c2_over_6_Value);
-      return loss;
-    });
-    return V.mean(losses);
-}
-}

package/Optimizers.ts DELETED Viewed

@@ -1,222 +0,0 @@
-// Optimizers.ts
-import { Value } from "./Value";
-/**
- * Abstract base class for all optimizers.
- * Ensures only requiresGrad parameters are optimized.
- */
-export abstract class Optimizer {
-  protected trainables: Value[];
-  public learningRate: number;
-  /**
-   * Constructs an Optimizer.
-   * @param trainables Array of Value parameters to optimize.
-   * @param learningRate Learning rate for updates.
-   */
-  constructor(trainables: Value[], learningRate: number) {
-    this.trainables = trainables.filter(v => v.requiresGrad);
-    this.learningRate = learningRate;
-  }
-  /**
-   * Performs a parameter update step.
-   */
-  abstract step(): void;
-  /**
-   * Sets grads of all trainables to zero.
-   */
-  zeroGrad(): void {
-    for (const v of this.trainables) v.grad = 0;
-  }
-  /**
-   * Clips global norm of gradients as regularization.
-   * @param maxNorm Maximum allowed norm for gradients.
-   */
-  clipGradients(maxNorm: number): void {
-    const totalNorm = Math.sqrt(
-      this.trainables.reduce((sum, v) => sum + v.grad * v.grad, 0)
-    );
-    if (totalNorm > maxNorm) {
-      const scale = maxNorm / (totalNorm + 1e-6);
-      for (const v of this.trainables) v.grad *= scale;
-    }
-  }
-}
-/**
- * Optional arguments for basic optimizers.
- * @property learningRate: Overrides the step size for parameter updates (default varies by optimizer).
- * @property weightDecay: L2 regularization multiplier (default 0). Ignored for plain SGD.
- * @property gradientClip: Maximum absolute value for gradient updates (default 0: no clipping).
- */
-export interface OptimizerOptions {
-  learningRate?: number;
-  weightDecay?: number;
-  gradientClip?: number;
-}
-/**
- * Stochastic Gradient Descent (SGD) optimizer. Accepts weightDecay and gradientClip for API consistency (ignored).
- */
-export class SGD extends Optimizer {
-  private weightDecay: number;
-  private gradientClip: number;
-  /**
-   * Constructs an SGD optimizer.
-   * @param trainables Array of Value parameters to optimize.
-   * @param opts Optional parameters (learningRate, weightDecay, gradientClip).
-   */
-  constructor(trainables: Value[], opts: OptimizerOptions = {}) {
-    super(
-      trainables,
-      opts.learningRate ?? 1e-2
-    );
-    this.weightDecay = opts.weightDecay ?? 0;
-    this.gradientClip = opts.gradientClip ?? 0;
-  }
-  /**
-   * Performs a parameter update using standard SGD.
-   */
-  step(): void {
-    // Intentionally ignoring weightDecay/gradientClip for SGD
-    for (const v of this.trainables) {
-      v.data -= this.learningRate * v.grad;
-    }
-  }
-}
-/**
- * Adam and AdamW optimizer parameters.
- * Extends OptimizerOptions.
- * @property beta1: Exponential decay rate for 1st moment (default 0.9).
- * @property beta2: Exponential decay rate for 2nd moment (default 0.999).
- * @property epsilon: Numerical stability fudge factor (default 1e-8).
- */
-export interface AdamOptions extends OptimizerOptions {
-  beta1?: number;
-  beta2?: number;
-  epsilon?: number;
-}
-/**
- * Adam optimizer, supports decoupled weight decay and gradient clipping.
- */
-export class Adam extends Optimizer {
-  private beta1: number;
-  private beta2: number;
-  private epsilon: number;
-  private weightDecay: number;
-  private gradientClip: number;
-  private m: Map<Value, number> = new Map();
-  private v: Map<Value, number> = new Map();
-  private stepCount: number = 0;
-  /**
-   * Constructs an Adam optimizer.
-   * @param trainables Array of Value parameters to optimize.
-   * @param opts Optional parameters (learningRate, weightDecay, gradientClip, beta1, beta2, epsilon).
-   */
-  constructor(
-    trainables: Value[],
-    opts: AdamOptions = {}
-  ) {
-    super(trainables, opts.learningRate ?? 0.001);
-    this.beta1 = opts.beta1 ?? 0.9;
-    this.beta2 = opts.beta2 ?? 0.999;
-    this.epsilon = opts.epsilon ?? 1e-8;
-    this.weightDecay = opts.weightDecay ?? 0;
-    this.gradientClip = opts.gradientClip ?? 0;
-    for (const v of this.trainables) {
-      this.m.set(v, 0);
-      this.v.set(v, 0);
-    }
-  }
-  /**
-   * Performs a parameter update using Adam optimization.
-   */
-  step(): void {
-    this.stepCount++;
-    for (const v of this.trainables) {
-      let grad = v.grad;
-      if (this.weightDecay > 0) grad += this.weightDecay * v.data;
-      let m = this.m.get(v)!;
-      let vVal = this.v.get(v)!;
-      m = this.beta1 * m + (1 - this.beta1) * grad;
-      vVal = this.beta2 * vVal + (1 - this.beta2) * grad * grad;
-      const mHat = m / (1 - Math.pow(this.beta1, this.stepCount));
-      const vHat = vVal / (1 - Math.pow(this.beta2, this.stepCount));
-      let update = mHat / (Math.sqrt(vHat) + this.epsilon);
-      if (this.gradientClip > 0) {
-        update = Math.max(-this.gradientClip, Math.min(update, this.gradientClip));
-      }
-      v.data -= this.learningRate * update;
-      this.m.set(v, m);
-      this.v.set(v, vVal);
-    }
-  }
-}
-/**
- * AdamW optimizer, supports decoupled weight decay and gradient clipping (same options as Adam).
- */
-export class AdamW extends Optimizer {
-  private beta1: number;
-  private beta2: number;
-  private epsilon: number;
-  private weightDecay: number;
-  private gradientClip: number;
-  private m: Map<Value, number> = new Map();
-  private v: Map<Value, number> = new Map();
-  private stepCount: number = 0;
-  /**
-   * Constructs an AdamW optimizer.
-   * @param trainables Array of Value parameters to optimize.
-   * @param opts Optional parameters (learningRate, weightDecay, gradientClip, beta1, beta2, epsilon).
-   */
-  constructor(
-    trainables: Value[],
-    opts: AdamOptions = {}
-  ) {
-    super(trainables, opts.learningRate ?? 0.001);
-    this.beta1 = opts.beta1 ?? 0.9;
-    this.beta2 = opts.beta2 ?? 0.999;
-    this.epsilon = opts.epsilon ?? 1e-8;
-    this.weightDecay = opts.weightDecay ?? 0.01;
-    this.gradientClip = opts.gradientClip ?? 0;
-    for (const v of this.trainables) {
-      this.m.set(v, 0);
-      this.v.set(v, 0);
-    }
-  }
-  /**
-   * Performs a parameter update using AdamW optimization (decoupled weight decay).
-   */
-  step(): void {
-    this.stepCount++;
-    for (const v of this.trainables) {
-      let grad = v.grad;
-      let m = this.m.get(v)!;
-      let vVal = this.v.get(v)!;
-      m = this.beta1 * m + (1 - this.beta1) * grad;
-      vVal = this.beta2 * vVal + (1 - this.beta2) * grad * grad;
-      const mHat = m / (1 - Math.pow(this.beta1, this.stepCount));
-      const vHat = vVal / (1 - Math.pow(this.beta2, this.stepCount));
-      let update = mHat / (Math.sqrt(vHat) + this.epsilon);
-      if (this.gradientClip > 0) {
-        update = Math.max(-this.gradientClip, Math.min(update, this.gradientClip));
-      }
-      // Weight decay is decoupled as in AdamW paper:
-      v.data -= this.learningRate * update + this.learningRate * this.weightDecay * v.data;
-      this.m.set(v, m);
-      this.v.set(v, vVal);
-    }
-  }
-}

package/V.ts DELETED Viewed

Binary file

package/Value.edge-cases.spec.ts DELETED Viewed

@@ -1,60 +0,0 @@
-import { Value } from "./Value";
-// Edge cases and error handling
-describe('Value edge cases and error handling', () => {
-    it('throws on invalid numeric inputs', () => {
-      expect(() => new Value(NaN)).toThrow();
-      expect(() => new Value(Infinity)).toThrow();
-      expect(() => new Value(-Infinity)).toThrow();
-    });
-    it('handles gradient accumulation correctly', () => {
-      const x = new Value(2, 'x', true);
-      const y = x.mul(3);
-      const z = x.mul(4);
-      const out = y.add(z);
-      out.backward();
-      expect(x.grad).toBe(7); // 3 + 4
-    });
-    it('handles repeated use of same value in expression', () => {
-      const x = new Value(3, 'x', true);
-      const y = x.mul(x).mul(x); // x^3
-      y.backward();
-      expect(x.grad).toBeCloseTo(27); // 3*x^2 = 27
-    });
-    it('throws on division by zero', () => {
-      const a = new Value(1);
-      const b = new Value(0);
-      expect(() => a.div(b)).toThrow();
-    });
-    it('throws on log of negative number', () => {
-      const x = new Value(-1);
-      expect(() => x.log()).toThrow();
-    });
-    it('throws on negative base with fractional exponent', () => {
-      const x = new Value(-2);
-      expect(() => x.pow(0.5)).toThrow();
-    });
-});
-// Complex expressions
-describe('Complex mathematical expressions', () => {
-    it('computes gradient of complex expression', () => {
-      const x = new Value(0.5, 'x', true);
-      const y = x.sin().mul(x.cos()).add(x.exp());
-      y.backward();
-      const expected = Math.cos(0.5)**2 - Math.sin(0.5)**2 + Math.exp(0.5);
-      expect(x.grad).toBeCloseTo(expected, 4);
-    });
-    it('handles nested activation functions', () => {
-      const x = new Value(0.5, 'x', true);
-      const y = x.tanh().sigmoid().relu();
-      y.backward();
-      expect(x.grad).toBeGreaterThan(0);
-    });
-});

package/Value.grad-flow.spec.ts DELETED Viewed

@@ -1,24 +0,0 @@
-import { Value } from "./Value";
-describe('Gradient flow control', () => {
-  it('stops gradient at non-requiresGrad nodes', () => {
-    const x = new Value(2, 'x', true);
-    const y = new Value(3, 'y', false);
-    const z = new Value(4, 'z', true);
-    const out = x.mul(y).add(z);
-    out.backward();
-    expect(x.grad).toBe(3);
-    expect(y.grad).toBe(0);
-    expect(z.grad).toBe(1);
-  });
-  it('handles detached computation graphs', () => {
-    const x = new Value(2, 'x', true);
-    const y = x.mul(3);
-    const z = new Value(y.data, 'z', true); // detached
-    const out = z.mul(4);
-    out.backward();
-    expect(z.grad).toBe(4);
-    expect(x.grad).toBe(0); // no gradient flows to x
-  });
-});

package/Value.losses-edge-cases.spec.ts DELETED Viewed

@@ -1,32 +0,0 @@
-import { Value } from "./Value";
-import { Losses } from "./Losses";
-describe('Loss function edge cases', () => {
-  it('handles empty arrays', () => {
-    expect(Losses.mse([], []).data).toBe(0);
-    expect(Losses.mae([], []).data).toBe(0);
-    expect(Losses.binaryCrossEntropy([], []).data).toBe(0);
-    expect(Losses.categoricalCrossEntropy([], []).data).toBe(0);
-  });
-  it('throws on mismatched lengths', () => {
-    const a = [new Value(1)];
-    const b = [new Value(1), new Value(2)];
-    expect(() => Losses.mse(a, b)).toThrow();
-  });
-  it('handles extreme values in binary cross entropy', () => {
-    const out = new Value(0.999999, 'out', true);
-    const target = new Value(1);
-    const loss = Losses.binaryCrossEntropy([out], [target]);
-    expect(loss.data).toBeGreaterThan(0);
-    expect(loss.data).toBeLessThan(0.1);
-  });
-  it('throws on invalid class indices in categorical cross entropy', () => {
-    const outputs = [new Value(1), new Value(2)];
-    expect(() => Losses.categoricalCrossEntropy(outputs, [2])).toThrow();
-    expect(() => Losses.categoricalCrossEntropy(outputs, [-1])).toThrow();
-    expect(() => Losses.categoricalCrossEntropy(outputs, [1.5])).toThrow();
-  });
-});

package/Value.memory.spec.ts DELETED Viewed

@@ -1,25 +0,0 @@
-import { Value } from "./Value";
-describe('Memory management', () => {
-  it('handles large computation graphs', () => {
-    let x = new Value(1, 'x', true);
-    for (let i = 0; i < 100; i++) {
-      x = x.add(1).mul(1.01);
-    }
-    expect(() => x.backward()).not.toThrow();
-  });
-  it('zeroGradAll handles multiple disconnected graphs', () => {
-    const x1 = new Value(1, 'x1', true);
-    const y1 = x1.mul(2);
-    const x2 = new Value(2, 'x2', true);
-    const y2 = x2.mul(3);
-    y1.backward();
-    y2.backward();
-    Value.zeroGradAll([y1, y2]);
-    expect(x1.grad).toBe(0);
-    expect(x2.grad).toBe(0);
-  });
-});

package/Value.nn.spec.ts DELETED Viewed

@@ -1,109 +0,0 @@
-import { Value } from "./Value";
-import { SGD, Adam } from "./Optimizers";
-import { Losses } from "./Losses";
-describe("can train scalar neural networks on minimal problems", () => {
-  it("1. learns linear regression (y = 2x + 3) with SGD", () => {
-    let w = new Value(Math.random(), "w", true);
-    let b = new Value(Math.random(), "b", true);
-    const examples = [
-      { x: 1, y: 5 },
-      { x: 2, y: 7 },
-      { x: 3, y: 9 },
-    ];
-    const opt = new SGD([w, b], { learningRate: 0.1 });
-    for (let epoch = 0; epoch < 300; ++epoch) {
-      let preds: Value[] = [];
-      let targets: Value[] = [];
-      for (const ex of examples) {
-        const x = new Value(ex.x, "x");
-        const pred = w.mul(x).add(b);
-        preds.push(pred);
-        targets.push(new Value(ex.y));
-      }
-      let loss = Losses.mse(preds, targets);
-      if (loss.data < 1e-4) break;
-      w.grad = 0; b.grad = 0;
-      loss.backward();
-      opt.step();
-    }
-    expect(w.data).toBeCloseTo(2, 1);
-    expect(b.data).toBeCloseTo(3, 1);
-  });
-  it("2. learns quadratic fit (y = x^2) with SGD", () => {
-    let a = new Value(Math.random(), "a", true);
-    let b = new Value(Math.random(), "b", true);
-    let c = new Value(Math.random(), "c", true);
-    const examples = [
-      { x: -1, y: 1 },
-      { x: 0, y: 0 },
-      { x: 2, y: 4 },
-      { x: 3, y: 9 },
-    ];
-    const opt = new SGD([a, b, c], { learningRate: 0.01 });
-    for (let epoch = 0; epoch < 400; ++epoch) {
-      let preds: Value[] = [];
-      let targets: Value[] = [];
-      for (const ex of examples) {
-        const x = new Value(ex.x);
-        const pred = a.mul(x.square()).add(b.mul(x)).add(c);
-        preds.push(pred);
-        targets.push(new Value(ex.y));
-      }
-      let loss = Losses.mse(preds, targets);
-      if (loss.data < 1e-4) break;
-      a.grad = 0; b.grad = 0; c.grad = 0;
-      loss.backward();
-      opt.step();
-    }
-    expect(a.data).toBeCloseTo(1, 1);
-    expect(Math.abs(b.data)).toBeLessThan(0.5);
-    expect(Math.abs(c.data)).toBeLessThan(0.5);
-  });
-  /*
-  // This is hard to get to work reliably, I believe it's a difficult problem to solve!?
-  it("3. learns XOR with tiny MLP (2-2-1) with SGD", () => {
-    function mlp(x1: Value, x2: Value, params: Value[]): Value {
-      const [w1, w2, w3, w4, b1, b2, v1, v2, c] = params;
-      const h1 = w1.mul(x1).add(w2.mul(x2)).add(b1).tanh();
-      const h2 = w3.mul(x1).add(w4.mul(x2)).add(b2).tanh();
-      return v1.mul(h1).add(v2.mul(h2)).add(c).sigmoid();
-    }
-    let params = Array.from({ length: 9 }, (_, i) => new Value(Math.random() - 0.5, "p" + i, true));
-    const data = [
-      { x: [0, 0], y: 0 },
-      { x: [0, 1], y: 1 },
-      { x: [1, 0], y: 1 },
-      { x: [1, 1], y: 0 },
-    ];
-    const opt = new SGD(params, { learningRate: 0.01 });
-    for (let epoch = 0; epoch < 5000; ++epoch) {
-      let preds: Value[] = [];
-      let targets: Value[] = [];
-      for (const ex of data) {
-        const x1 = new Value(ex.x[0]);
-        const x2 = new Value(ex.x[1]);
-        const pred = mlp(x1, x2, params);
-        preds.push(pred);
-        targets.push(new Value(ex.y));
-      }
-      let loss = binaryCrossEntropy(preds, targets);
-      if (loss.data < 1e-3) break;
-      for (const p of params) p.grad = 0;
-      loss.backward();
-      opt.step();
-    }
-    const out00 = mlp(new Value(0), new Value(0), params).data;
-    const out01 = mlp(new Value(0), new Value(1), params).data;
-    const out10 = mlp(new Value(1), new Value(0), params).data;
-    const out11 = mlp(new Value(1), new Value(1), params).data;
-    expect((out00 < 0.4 || out00 > 0.6)).toBe(true);
-    expect(out01).toBeGreaterThan(0.6);
-    expect(out10).toBeGreaterThan(0.6);
-    expect(out11).toBeLessThan(0.4);
-  });*/
-});