scalar-autograd 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Mattias Fagerlund
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/Losses.ts ADDED
@@ -0,0 +1,144 @@
1
+ import { Value } from "./Value";
2
+ import { V } from "./V";
3
+
4
+ /**
5
+ * Throws an error if outputs and targets length do not match.
6
+ * @param outputs Array of output Values.
7
+ * @param targets Array of target Values.
8
+ */
9
+ function checkLengthMatch(outputs: Value[], targets: Value[]): void {
10
+ if (outputs.length !== targets.length) {
11
+ throw new Error('Outputs and targets must have the same length');
12
+ }
13
+ }
14
+
15
+ /**
16
+ * Computes mean squared error (MSE) loss between outputs and targets.
17
+ * @param outputs Array of Value predictions.
18
+ * @param targets Array of Value targets.
19
+ * @returns Mean squared error as a Value.
20
+ */
21
+ export function mse(outputs: Value[], targets: Value[]): Value {
22
+ checkLengthMatch(outputs, targets);
23
+ if (!Array.isArray(outputs) || !Array.isArray(targets)) throw new TypeError('mse expects Value[] for both arguments.');
24
+ if (!outputs.length) return new Value(0);
25
+ const diffs = outputs.map((out, i) => out.sub(targets[i]).square());
26
+ return Value.mean(diffs);
27
+ }
28
+
29
+ /**
30
+ * Computes mean absolute error (MAE) loss between outputs and targets.
31
+ * @param outputs Array of Value predictions.
32
+ * @param targets Array of Value targets.
33
+ * @returns Mean absolute error as a Value.
34
+ */
35
+ export function mae(outputs: Value[], targets: Value[]): Value {
36
+ checkLengthMatch(outputs, targets);
37
+ if (!Array.isArray(outputs) || !Array.isArray(targets)) throw new TypeError('mae expects Value[] for both arguments.');
38
+ if (!outputs.length) return new Value(0);
39
+ const diffs = outputs.map((out, i) => out.sub(targets[i]).abs());
40
+ return Value.mean(diffs);
41
+ }
42
+
43
+ const EPS = 1e-12;
44
+
45
+ /**
46
+ * Computes binary cross-entropy loss between predicted outputs and targets (after sigmoid).
47
+ * @param outputs Array of Value predictions (expected in (0,1)).
48
+ * @param targets Array of Value targets (typically 0 or 1).
49
+ * @returns Binary cross-entropy loss as a Value.
50
+ */
51
+ export function binaryCrossEntropy(outputs: Value[], targets: Value[]): Value {
52
+ checkLengthMatch(outputs, targets);
53
+ if (!Array.isArray(outputs) || !Array.isArray(targets)) throw new TypeError('binaryCrossEntropy expects Value[] for both arguments.');
54
+ if (!outputs.length) return new Value(0);
55
+ const eps = EPS;
56
+ const one = new Value(1);
57
+ const losses = outputs.map((out, i) => {
58
+ const t = targets[i];
59
+ const outClamped = out.clamp(eps, 1 - eps); // sigmoid should output (0,1)
60
+ return t.mul(outClamped.log()).add(one.sub(t).mul(one.sub(outClamped).log()));
61
+ });
62
+ return Value.mean(losses).mul(-1);
63
+ }
64
+
65
+ /**
66
+ * Computes categorical cross-entropy loss between outputs (logits) and integer target classes.
67
+ * @param outputs Array of Value logits for each class.
68
+ * @param targets Array of integer class indices (0-based, one per sample).
69
+ * @returns Categorical cross-entropy loss as a Value.
70
+ */
71
+ export function categoricalCrossEntropy(outputs: Value[], targets: number[]): Value {
72
+ // targets: integer encoded class indices
73
+ if (!Array.isArray(outputs) || !Array.isArray(targets)) throw new TypeError('categoricalCrossEntropy expects Value[] and number[].');
74
+ if (!outputs.length || !targets.length) return new Value(0);
75
+ if (targets.some(t => typeof t !== 'number' || !isFinite(t) || t < 0 || t >= outputs.length || Math.floor(t) !== t)) {
76
+ throw new Error('Target indices must be valid integers in [0, outputs.length)');
77
+ }
78
+ const eps = EPS;
79
+ const maxLogit = outputs.reduce((a, b) => a.data > b.data ? a : b);
80
+ const exps = outputs.map(out => out.sub(maxLogit).exp());
81
+ const sumExp = Value.sum(exps).add(eps);
82
+ const softmax = exps.map(e => e.div(sumExp));
83
+ const tIndices = targets.map((t, i) => softmax[t]);
84
+ return Value.mean(tIndices.map(sm => sm.add(eps).log().mul(-1)));
85
+ }
86
+
87
+ /**
88
+ * Computes Huber loss between outputs and targets.
89
+ * Combines quadratic loss for small residuals and linear loss for large residuals.
90
+ * @param outputs Array of Value predictions.
91
+ * @param targets Array of Value targets.
92
+ * @param delta Threshold at which to switch from quadratic to linear (default: 1.0).
93
+ * @returns Huber loss as a Value.
94
+ */
95
+ export function huber(outputs: Value[], targets: Value[], delta = 1.0): Value {
96
+ checkLengthMatch(outputs, targets);
97
+ if (!Array.isArray(outputs) || !Array.isArray(targets)) throw new TypeError('huber expects Value[] for both arguments.');
98
+ if (!outputs.length) return new Value(0);
99
+
100
+ const deltaValue = new Value(delta);
101
+ const half = new Value(0.5);
102
+
103
+ const losses = outputs.map((out, i) => {
104
+ const residual = V.abs(V.sub(out, targets[i]));
105
+ const condition = V.lt(residual, deltaValue);
106
+
107
+ const quadraticLoss = V.mul(half, V.square(residual));
108
+ const linearLoss = V.mul(deltaValue, V.sub(residual, V.mul(half, deltaValue)));
109
+
110
+ return V.ifThenElse(condition, quadraticLoss, linearLoss);
111
+ });
112
+
113
+ return V.mean(losses);
114
+ }
115
+
116
+
117
+ /**
118
+ * Computes Tukey loss between outputs and targets.
119
+ * This robust loss function saturates for large residuals.
120
+ *
121
+ * @param outputs Array of Value predictions.
122
+ * @param targets Array of Value targets.
123
+ * @param c Threshold constant (typically 4.685).
124
+ * @returns Tukey loss as a Value.
125
+ */
126
+ export function tukey(outputs: Value[], targets: Value[], c: number = 4.685): Value {
127
+ checkLengthMatch(outputs, targets);
128
+ const c2_over_6 = (c * c) / 6;
129
+ const cValue = V.C(c);
130
+ const c2_over_6_Value = V.C(c2_over_6);
131
+
132
+ const losses = outputs.map((out, i) => {
133
+ const diff = V.abs(V.sub(out, targets[i]));
134
+ const inlier = V.lte(diff, cValue);
135
+ const rc = V.div(diff, cValue);
136
+ const rc2 = V.square(rc);
137
+ const oneMinusRC2 = V.sub(1, rc2);
138
+ const inner = V.pow(oneMinusRC2, 3);
139
+ const inlierLoss = V.mul(c2_over_6_Value, V.sub(1, inner));
140
+ const loss = V.ifThenElse(inlier, inlierLoss, c2_over_6_Value);
141
+ return loss;
142
+ });
143
+ return V.mean(losses);
144
+ }
package/Optimizers.ts ADDED
@@ -0,0 +1,222 @@
1
+ // Optimizers.ts
2
+
3
+ import { Value } from "./Value";
4
+
5
+ /**
6
+ * Abstract base class for all optimizers.
7
+ * Ensures only requiresGrad parameters are optimized.
8
+ */
9
+ export abstract class Optimizer {
10
+ protected trainables: Value[];
11
+ public learningRate: number;
12
+
13
+ /**
14
+ * Constructs an Optimizer.
15
+ * @param trainables Array of Value parameters to optimize.
16
+ * @param learningRate Learning rate for updates.
17
+ */
18
+ constructor(trainables: Value[], learningRate: number) {
19
+ this.trainables = trainables.filter(v => v.requiresGrad);
20
+ this.learningRate = learningRate;
21
+ }
22
+
23
+ /**
24
+ * Performs a parameter update step.
25
+ */
26
+ abstract step(): void;
27
+
28
+ /**
29
+ * Sets grads of all trainables to zero.
30
+ */
31
+ zeroGrad(): void {
32
+ for (const v of this.trainables) v.grad = 0;
33
+ }
34
+
35
+ /**
36
+ * Clips global norm of gradients as regularization.
37
+ * @param maxNorm Maximum allowed norm for gradients.
38
+ */
39
+ clipGradients(maxNorm: number): void {
40
+ const totalNorm = Math.sqrt(
41
+ this.trainables.reduce((sum, v) => sum + v.grad * v.grad, 0)
42
+ );
43
+ if (totalNorm > maxNorm) {
44
+ const scale = maxNorm / (totalNorm + 1e-6);
45
+ for (const v of this.trainables) v.grad *= scale;
46
+ }
47
+ }
48
+ }
49
+
50
+ /**
51
+ * Optional arguments for basic optimizers.
52
+ * @property learningRate: Overrides the step size for parameter updates (default varies by optimizer).
53
+ * @property weightDecay: L2 regularization multiplier (default 0). Ignored for plain SGD.
54
+ * @property gradientClip: Maximum absolute value for gradient updates (default 0: no clipping).
55
+ */
56
+ interface OptimizerOptions {
57
+ learningRate?: number;
58
+ weightDecay?: number;
59
+ gradientClip?: number;
60
+ }
61
+
62
+ /**
63
+ * Stochastic Gradient Descent (SGD) optimizer. Accepts weightDecay and gradientClip for API consistency (ignored).
64
+ */
65
+ export class SGD extends Optimizer {
66
+ private weightDecay: number;
67
+ private gradientClip: number;
68
+ /**
69
+ * Constructs an SGD optimizer.
70
+ * @param trainables Array of Value parameters to optimize.
71
+ * @param opts Optional parameters (learningRate, weightDecay, gradientClip).
72
+ */
73
+ constructor(trainables: Value[], opts: OptimizerOptions = {}) {
74
+ super(
75
+ trainables,
76
+ opts.learningRate ?? 1e-2
77
+ );
78
+ this.weightDecay = opts.weightDecay ?? 0;
79
+ this.gradientClip = opts.gradientClip ?? 0;
80
+ }
81
+ /**
82
+ * Performs a parameter update using standard SGD.
83
+ */
84
+ step(): void {
85
+ // Intentionally ignoring weightDecay/gradientClip for SGD
86
+ for (const v of this.trainables) {
87
+ v.data -= this.learningRate * v.grad;
88
+ }
89
+ }
90
+ }
91
+
92
+ /**
93
+ * Adam and AdamW optimizer parameters.
94
+ * Extends OptimizerOptions.
95
+ * @property beta1: Exponential decay rate for 1st moment (default 0.9).
96
+ * @property beta2: Exponential decay rate for 2nd moment (default 0.999).
97
+ * @property epsilon: Numerical stability fudge factor (default 1e-8).
98
+ */
99
+ interface AdamOptions extends OptimizerOptions {
100
+ beta1?: number;
101
+ beta2?: number;
102
+ epsilon?: number;
103
+ }
104
+
105
+ /**
106
+ * Adam optimizer, supports decoupled weight decay and gradient clipping.
107
+ */
108
+ export class Adam extends Optimizer {
109
+ private beta1: number;
110
+ private beta2: number;
111
+ private epsilon: number;
112
+ private weightDecay: number;
113
+ private gradientClip: number;
114
+ private m: Map<Value, number> = new Map();
115
+ private v: Map<Value, number> = new Map();
116
+ private stepCount: number = 0;
117
+ /**
118
+ * Constructs an Adam optimizer.
119
+ * @param trainables Array of Value parameters to optimize.
120
+ * @param opts Optional parameters (learningRate, weightDecay, gradientClip, beta1, beta2, epsilon).
121
+ */
122
+ constructor(
123
+ trainables: Value[],
124
+ opts: AdamOptions = {}
125
+ ) {
126
+ super(trainables, opts.learningRate ?? 0.001);
127
+ this.beta1 = opts.beta1 ?? 0.9;
128
+ this.beta2 = opts.beta2 ?? 0.999;
129
+ this.epsilon = opts.epsilon ?? 1e-8;
130
+ this.weightDecay = opts.weightDecay ?? 0;
131
+ this.gradientClip = opts.gradientClip ?? 0;
132
+ for (const v of this.trainables) {
133
+ this.m.set(v, 0);
134
+ this.v.set(v, 0);
135
+ }
136
+ }
137
+ /**
138
+ * Performs a parameter update using Adam optimization.
139
+ */
140
+ step(): void {
141
+ this.stepCount++;
142
+ for (const v of this.trainables) {
143
+ let grad = v.grad;
144
+ if (this.weightDecay > 0) grad += this.weightDecay * v.data;
145
+
146
+ let m = this.m.get(v)!;
147
+ let vVal = this.v.get(v)!;
148
+ m = this.beta1 * m + (1 - this.beta1) * grad;
149
+ vVal = this.beta2 * vVal + (1 - this.beta2) * grad * grad;
150
+
151
+ const mHat = m / (1 - Math.pow(this.beta1, this.stepCount));
152
+ const vHat = vVal / (1 - Math.pow(this.beta2, this.stepCount));
153
+ let update = mHat / (Math.sqrt(vHat) + this.epsilon);
154
+
155
+ if (this.gradientClip > 0) {
156
+ update = Math.max(-this.gradientClip, Math.min(update, this.gradientClip));
157
+ }
158
+ v.data -= this.learningRate * update;
159
+
160
+ this.m.set(v, m);
161
+ this.v.set(v, vVal);
162
+ }
163
+ }
164
+ }
165
+
166
+ /**
167
+ * AdamW optimizer, supports decoupled weight decay and gradient clipping (same options as Adam).
168
+ */
169
+ export class AdamW extends Optimizer {
170
+ private beta1: number;
171
+ private beta2: number;
172
+ private epsilon: number;
173
+ private weightDecay: number;
174
+ private gradientClip: number;
175
+ private m: Map<Value, number> = new Map();
176
+ private v: Map<Value, number> = new Map();
177
+ private stepCount: number = 0;
178
+ /**
179
+ * Constructs an AdamW optimizer.
180
+ * @param trainables Array of Value parameters to optimize.
181
+ * @param opts Optional parameters (learningRate, weightDecay, gradientClip, beta1, beta2, epsilon).
182
+ */
183
+ constructor(
184
+ trainables: Value[],
185
+ opts: AdamOptions = {}
186
+ ) {
187
+ super(trainables, opts.learningRate ?? 0.001);
188
+ this.beta1 = opts.beta1 ?? 0.9;
189
+ this.beta2 = opts.beta2 ?? 0.999;
190
+ this.epsilon = opts.epsilon ?? 1e-8;
191
+ this.weightDecay = opts.weightDecay ?? 0.01;
192
+ this.gradientClip = opts.gradientClip ?? 0;
193
+ for (const v of this.trainables) {
194
+ this.m.set(v, 0);
195
+ this.v.set(v, 0);
196
+ }
197
+ }
198
+ /**
199
+ * Performs a parameter update using AdamW optimization (decoupled weight decay).
200
+ */
201
+ step(): void {
202
+ this.stepCount++;
203
+ for (const v of this.trainables) {
204
+ let grad = v.grad;
205
+ let m = this.m.get(v)!;
206
+ let vVal = this.v.get(v)!;
207
+ m = this.beta1 * m + (1 - this.beta1) * grad;
208
+ vVal = this.beta2 * vVal + (1 - this.beta2) * grad * grad;
209
+
210
+ const mHat = m / (1 - Math.pow(this.beta1, this.stepCount));
211
+ const vHat = vVal / (1 - Math.pow(this.beta2, this.stepCount));
212
+ let update = mHat / (Math.sqrt(vHat) + this.epsilon);
213
+ if (this.gradientClip > 0) {
214
+ update = Math.max(-this.gradientClip, Math.min(update, this.gradientClip));
215
+ }
216
+ // Weight decay is decoupled as in AdamW paper:
217
+ v.data -= this.learningRate * update + this.learningRate * this.weightDecay * v.data;
218
+ this.m.set(v, m);
219
+ this.v.set(v, vVal);
220
+ }
221
+ }
222
+ }
package/README.md ADDED
@@ -0,0 +1,113 @@
1
+ # ScalarAutograd for TypeScript
2
+
3
+ A tiny scalar autograd engine for TypeScript/JavaScript.
4
+
5
+ Scautograd enables automatic differentiation for scalar operations, similar to what you'd find in PyTorch's `autograd`, but designed for TypeScript codebases. This makes it useful for building and training small neural networks, performing optimization, or experimenting with gradient-based techniques—entirely in the browser or Node.js.
6
+
7
+ ## Features
8
+ - Scalar `Value` objects for tracking data, gradients, and computation graph.
9
+ - Backpropagation via `.backward()` to compute gradients automatically.
10
+ - Clean, TypeScript-first API.
11
+ - Does *NOT* handle matrices or tensors, just scalars.
12
+
13
+ ## Installation
14
+
15
+ Simply copy the files in this folder into your project, or import as a local module if desired.
16
+
17
+ ## Basic Usage
18
+
19
+ ```typescript
20
+ import { V } from './V';
21
+
22
+ // Basic differentiation using static V API
23
+ const x = V.W(2.0); // differentiable variable
24
+ const y = V.W(3.0);
25
+ const z = V.add(V.mul(x, y), V.pow(x, 2)); // z = x*y + x^2
26
+ z.backward();
27
+ console.log('dz/dx:', x.grad); // Output: dz/dx = y + 2*x = 3 + 2*2 = 7
28
+ console.log('dz/dy:', y.grad); // Output: dz/dy = x = 2
29
+ ```
30
+
31
+ ## Example: Tiny Gradient Descent
32
+
33
+ ```typescript
34
+ const a = V.W(5);
35
+ const b = V.W(-3);
36
+ const c = V.sin(V.mul(a, b)); // f = sin(a * b)
37
+ c.backward();
38
+ console.log(a.grad, b.grad); // Gradients w.r.t. a and b
39
+ ```
40
+
41
+ ## Example: Solving for Parameters via Backpropagation
42
+
43
+ Here's how you can use Scautograd's backpropagation and a simple optimizer to fit a linear regression model (y = 2x + 3):
44
+
45
+ ```typescript
46
+ import { V } from './V';
47
+ import { SGD } from './Optimizers';
48
+
49
+ // Initialize parameters
50
+ let w = V.W(Math.random(), "w");
51
+ let b = V.W(Math.random(), "b");
52
+
53
+ // Example data: y = 2x + 3
54
+ const samples = [
55
+ { x: 1, y: 5 },
56
+ { x: 2, y: 7 },
57
+ { x: 3, y: 9 },
58
+ ];
59
+
60
+ const opt = new SGD([w, b], { learningRate: 0.1 });
61
+
62
+ for (let epoch = 0; epoch < 300; ++epoch) {
63
+ let losses = [];
64
+ for (const sample of samples) {
65
+ const x = V.C(sample.x, "x");
66
+ const pred = V.add(V.mul(w, x), b);
67
+ const target = V.C(sample.y, "target");
68
+ const loss = V.pow(V.sub(pred, target), 2);
69
+ losses.push(loss);
70
+ }
71
+ const totalLoss = V.mean(losses);
72
+ opt.zeroGrad();
73
+ totalLoss.backward();
74
+ opt.step();
75
+ if (totalLoss.data < 1e-4) break;
76
+ }
77
+
78
+ console.log('Fitted w:', w.data); // ~2
79
+ console.log('Fitted b:', b.data); // ~3
80
+ ```
81
+
82
+ This pattern—forward pass, backward for gradients, and calling `optimizer.step()`—applies to more complex optimization tasks and neural networks as well!
83
+
84
+ ## API Overview
85
+ - **Core Value construction:**
86
+ - `V.C(data, label?)` — constant (non-differentiable), e.g. for data/inputs.
87
+ - `V.W(data, label?)` — weight/parameter (differentiable).
88
+ - **Operators:**
89
+ - Basic: `V.add(a, b)`, `V.sub(a, b)`, `V.mul(a, b)`, `V.div(a, b)`, `V.pow(a, n)`, `V.powValue(a, b)`.
90
+ - Reductions: `V.sum(array)`, `V.mean(array)`
91
+ - Trig: `V.sin(x)`, `V.cos(x)`, `V.tan(x)`, ...
92
+ - Activations: `V.relu(x)`, `V.tanh(x)`, `V.sigmoid(x)`, etc.
93
+ - Comparison: `V.eq(a, b)`, `V.gt(a, b)`, ... (outputs constant Values; never has grad)
94
+ - **Backward:**
95
+ - `.backward()` — trigger automatic differentiation from this node.
96
+ - `.grad` — access the computed gradient after backward pass.
97
+ - **Optimizers:**
98
+ - E.g. `const opt = new SGD([w, b], {learningRate: 0.01})`
99
+ - **Losses:**
100
+ - Import from `Losses.ts` (e.g. `import { mse } from './Losses'`)
101
+
102
+ All API operations work with both `Value` and raw number inputs (numbers are automatically wrapped as non-grad constants).
103
+
104
+ ## Testing
105
+
106
+ To run the test suite and verify the correctness of ScalarAutograd, execute the following command in your project directory:
107
+
108
+ ```shell
109
+ npm run test
110
+ ```
111
+
112
+ ## License
113
+ MIT
package/V.ts ADDED
Binary file
@@ -0,0 +1,60 @@
1
+ import { Value } from "./Value";
2
+
3
+ // Edge cases and error handling
4
+ describe('Value edge cases and error handling', () => {
5
+ it('throws on invalid numeric inputs', () => {
6
+ expect(() => new Value(NaN)).toThrow();
7
+ expect(() => new Value(Infinity)).toThrow();
8
+ expect(() => new Value(-Infinity)).toThrow();
9
+ });
10
+
11
+ it('handles gradient accumulation correctly', () => {
12
+ const x = new Value(2, 'x', true);
13
+ const y = x.mul(3);
14
+ const z = x.mul(4);
15
+ const out = y.add(z);
16
+ out.backward();
17
+ expect(x.grad).toBe(7); // 3 + 4
18
+ });
19
+
20
+ it('handles repeated use of same value in expression', () => {
21
+ const x = new Value(3, 'x', true);
22
+ const y = x.mul(x).mul(x); // x^3
23
+ y.backward();
24
+ expect(x.grad).toBeCloseTo(27); // 3*x^2 = 27
25
+ });
26
+
27
+ it('throws on division by zero', () => {
28
+ const a = new Value(1);
29
+ const b = new Value(0);
30
+ expect(() => a.div(b)).toThrow();
31
+ });
32
+
33
+ it('throws on log of negative number', () => {
34
+ const x = new Value(-1);
35
+ expect(() => x.log()).toThrow();
36
+ });
37
+
38
+ it('throws on negative base with fractional exponent', () => {
39
+ const x = new Value(-2);
40
+ expect(() => x.pow(0.5)).toThrow();
41
+ });
42
+ });
43
+
44
+ // Complex expressions
45
+ describe('Complex mathematical expressions', () => {
46
+ it('computes gradient of complex expression', () => {
47
+ const x = new Value(0.5, 'x', true);
48
+ const y = x.sin().mul(x.cos()).add(x.exp());
49
+ y.backward();
50
+ const expected = Math.cos(0.5)**2 - Math.sin(0.5)**2 + Math.exp(0.5);
51
+ expect(x.grad).toBeCloseTo(expected, 4);
52
+ });
53
+
54
+ it('handles nested activation functions', () => {
55
+ const x = new Value(0.5, 'x', true);
56
+ const y = x.tanh().sigmoid().relu();
57
+ y.backward();
58
+ expect(x.grad).toBeGreaterThan(0);
59
+ });
60
+ });
@@ -0,0 +1,24 @@
1
+ import { Value } from "./Value";
2
+
3
+ describe('Gradient flow control', () => {
4
+ it('stops gradient at non-requiresGrad nodes', () => {
5
+ const x = new Value(2, 'x', true);
6
+ const y = new Value(3, 'y', false);
7
+ const z = new Value(4, 'z', true);
8
+ const out = x.mul(y).add(z);
9
+ out.backward();
10
+ expect(x.grad).toBe(3);
11
+ expect(y.grad).toBe(0);
12
+ expect(z.grad).toBe(1);
13
+ });
14
+
15
+ it('handles detached computation graphs', () => {
16
+ const x = new Value(2, 'x', true);
17
+ const y = x.mul(3);
18
+ const z = new Value(y.data, 'z', true); // detached
19
+ const out = z.mul(4);
20
+ out.backward();
21
+ expect(z.grad).toBe(4);
22
+ expect(x.grad).toBe(0); // no gradient flows to x
23
+ });
24
+ });
@@ -0,0 +1,32 @@
1
+ import { Value } from "./Value";
2
+ import { mse, mae, binaryCrossEntropy, categoricalCrossEntropy } from "./Losses";
3
+
4
+ describe('Loss function edge cases', () => {
5
+ it('handles empty arrays', () => {
6
+ expect(mse([], []).data).toBe(0);
7
+ expect(mae([], []).data).toBe(0);
8
+ expect(binaryCrossEntropy([], []).data).toBe(0);
9
+ expect(categoricalCrossEntropy([], []).data).toBe(0);
10
+ });
11
+
12
+ it('throws on mismatched lengths', () => {
13
+ const a = [new Value(1)];
14
+ const b = [new Value(1), new Value(2)];
15
+ expect(() => mse(a, b)).toThrow();
16
+ });
17
+
18
+ it('handles extreme values in binary cross entropy', () => {
19
+ const out = new Value(0.999999, 'out', true);
20
+ const target = new Value(1);
21
+ const loss = binaryCrossEntropy([out], [target]);
22
+ expect(loss.data).toBeGreaterThan(0);
23
+ expect(loss.data).toBeLessThan(0.1);
24
+ });
25
+
26
+ it('throws on invalid class indices in categorical cross entropy', () => {
27
+ const outputs = [new Value(1), new Value(2)];
28
+ expect(() => categoricalCrossEntropy(outputs, [2])).toThrow();
29
+ expect(() => categoricalCrossEntropy(outputs, [-1])).toThrow();
30
+ expect(() => categoricalCrossEntropy(outputs, [1.5])).toThrow();
31
+ });
32
+ });
@@ -0,0 +1,25 @@
1
+ import { Value } from "./Value";
2
+
3
+ describe('Memory management', () => {
4
+ it('handles large computation graphs', () => {
5
+ let x = new Value(1, 'x', true);
6
+ for (let i = 0; i < 100; i++) {
7
+ x = x.add(1).mul(1.01);
8
+ }
9
+ expect(() => x.backward()).not.toThrow();
10
+ });
11
+
12
+ it('zeroGradAll handles multiple disconnected graphs', () => {
13
+ const x1 = new Value(1, 'x1', true);
14
+ const y1 = x1.mul(2);
15
+ const x2 = new Value(2, 'x2', true);
16
+ const y2 = x2.mul(3);
17
+
18
+ y1.backward();
19
+ y2.backward();
20
+
21
+ Value.zeroGradAll([y1, y2]);
22
+ expect(x1.grad).toBe(0);
23
+ expect(x2.grad).toBe(0);
24
+ });
25
+ });