scalar-autograd 0.1.0 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Losses.ts +119 -118
- package/Optimizers.ts +2 -2
- package/README.md +8 -0
- package/Value.losses-edge-cases.spec.ts +10 -10
- package/Value.nn.spec.ts +3 -3
- package/Value.ts +5 -1
- package/package.json +1 -1
package/Losses.ts
CHANGED
|
@@ -12,133 +12,134 @@ function checkLengthMatch(outputs: Value[], targets: Value[]): void {
|
|
|
12
12
|
}
|
|
13
13
|
}
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
/**
|
|
30
|
-
* Computes mean absolute error (MAE) loss between outputs and targets.
|
|
31
|
-
* @param outputs Array of Value predictions.
|
|
32
|
-
* @param targets Array of Value targets.
|
|
33
|
-
* @returns Mean absolute error as a Value.
|
|
34
|
-
*/
|
|
35
|
-
export function mae(outputs: Value[], targets: Value[]): Value {
|
|
36
|
-
checkLengthMatch(outputs, targets);
|
|
37
|
-
if (!Array.isArray(outputs) || !Array.isArray(targets)) throw new TypeError('mae expects Value[] for both arguments.');
|
|
38
|
-
if (!outputs.length) return new Value(0);
|
|
39
|
-
const diffs = outputs.map((out, i) => out.sub(targets[i]).abs());
|
|
40
|
-
return Value.mean(diffs);
|
|
41
|
-
}
|
|
15
|
+
export class Losses {
|
|
16
|
+
/**
|
|
17
|
+
* Computes mean squared error (MSE) loss between outputs and targets.
|
|
18
|
+
* @param outputs Array of Value predictions.
|
|
19
|
+
* @param targets Array of Value targets.
|
|
20
|
+
* @returns Mean squared error as a Value.
|
|
21
|
+
*/
|
|
22
|
+
public static mse(outputs: Value[], targets: Value[]): Value {
|
|
23
|
+
checkLengthMatch(outputs, targets);
|
|
24
|
+
if (!Array.isArray(outputs) || !Array.isArray(targets)) throw new TypeError('mse expects Value[] for both arguments.');
|
|
25
|
+
if (!outputs.length) return new Value(0);
|
|
26
|
+
const diffs = outputs.map((out, i) => out.sub(targets[i]).square());
|
|
27
|
+
return Value.mean(diffs);
|
|
28
|
+
}
|
|
42
29
|
|
|
43
|
-
|
|
30
|
+
/**
|
|
31
|
+
* Computes mean absolute error (MAE) loss between outputs and targets.
|
|
32
|
+
* @param outputs Array of Value predictions.
|
|
33
|
+
* @param targets Array of Value targets.
|
|
34
|
+
* @returns Mean absolute error as a Value.
|
|
35
|
+
*/
|
|
36
|
+
public static mae(outputs: Value[], targets: Value[]): Value {
|
|
37
|
+
checkLengthMatch(outputs, targets);
|
|
38
|
+
if (!Array.isArray(outputs) || !Array.isArray(targets)) throw new TypeError('mae expects Value[] for both arguments.');
|
|
39
|
+
if (!outputs.length) return new Value(0);
|
|
40
|
+
const diffs = outputs.map((out, i) => out.sub(targets[i]).abs());
|
|
41
|
+
return Value.mean(diffs);
|
|
42
|
+
}
|
|
44
43
|
|
|
45
|
-
|
|
46
|
-
* Computes binary cross-entropy loss between predicted outputs and targets (after sigmoid).
|
|
47
|
-
* @param outputs Array of Value predictions (expected in (0,1)).
|
|
48
|
-
* @param targets Array of Value targets (typically 0 or 1).
|
|
49
|
-
* @returns Binary cross-entropy loss as a Value.
|
|
50
|
-
*/
|
|
51
|
-
export function binaryCrossEntropy(outputs: Value[], targets: Value[]): Value {
|
|
52
|
-
checkLengthMatch(outputs, targets);
|
|
53
|
-
if (!Array.isArray(outputs) || !Array.isArray(targets)) throw new TypeError('binaryCrossEntropy expects Value[] for both arguments.');
|
|
54
|
-
if (!outputs.length) return new Value(0);
|
|
55
|
-
const eps = EPS;
|
|
56
|
-
const one = new Value(1);
|
|
57
|
-
const losses = outputs.map((out, i) => {
|
|
58
|
-
const t = targets[i];
|
|
59
|
-
const outClamped = out.clamp(eps, 1 - eps); // sigmoid should output (0,1)
|
|
60
|
-
return t.mul(outClamped.log()).add(one.sub(t).mul(one.sub(outClamped).log()));
|
|
61
|
-
});
|
|
62
|
-
return Value.mean(losses).mul(-1);
|
|
63
|
-
}
|
|
44
|
+
static EPS = 1e-12;
|
|
64
45
|
|
|
65
|
-
/**
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
46
|
+
/**
|
|
47
|
+
* Computes binary cross-entropy loss between predicted outputs and targets (after sigmoid).
|
|
48
|
+
* @param outputs Array of Value predictions (expected in (0,1)).
|
|
49
|
+
* @param targets Array of Value targets (typically 0 or 1).
|
|
50
|
+
* @returns Binary cross-entropy loss as a Value.
|
|
51
|
+
*/
|
|
52
|
+
public static binaryCrossEntropy(outputs: Value[], targets: Value[]): Value {
|
|
53
|
+
checkLengthMatch(outputs, targets);
|
|
54
|
+
if (!Array.isArray(outputs) || !Array.isArray(targets)) throw new TypeError('binaryCrossEntropy expects Value[] for both arguments.');
|
|
55
|
+
if (!outputs.length) return new Value(0);
|
|
56
|
+
const eps = Losses.EPS;
|
|
57
|
+
const one = new Value(1);
|
|
58
|
+
const losses = outputs.map((out, i) => {
|
|
59
|
+
const t = targets[i];
|
|
60
|
+
const outClamped = out.clamp(eps, 1 - eps); // sigmoid should output (0,1)
|
|
61
|
+
return t.mul(outClamped.log()).add(one.sub(t).mul(one.sub(outClamped).log()));
|
|
62
|
+
});
|
|
63
|
+
return Value.mean(losses).mul(-1);
|
|
77
64
|
}
|
|
78
|
-
const eps = EPS;
|
|
79
|
-
const maxLogit = outputs.reduce((a, b) => a.data > b.data ? a : b);
|
|
80
|
-
const exps = outputs.map(out => out.sub(maxLogit).exp());
|
|
81
|
-
const sumExp = Value.sum(exps).add(eps);
|
|
82
|
-
const softmax = exps.map(e => e.div(sumExp));
|
|
83
|
-
const tIndices = targets.map((t, i) => softmax[t]);
|
|
84
|
-
return Value.mean(tIndices.map(sm => sm.add(eps).log().mul(-1)));
|
|
85
|
-
}
|
|
86
65
|
|
|
87
|
-
/**
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
const
|
|
105
|
-
const
|
|
66
|
+
/**
|
|
67
|
+
* Computes categorical cross-entropy loss between outputs (logits) and integer target classes.
|
|
68
|
+
* @param outputs Array of Value logits for each class.
|
|
69
|
+
* @param targets Array of integer class indices (0-based, one per sample).
|
|
70
|
+
* @returns Categorical cross-entropy loss as a Value.
|
|
71
|
+
*/
|
|
72
|
+
public static categoricalCrossEntropy(outputs: Value[], targets: number[]): Value {
|
|
73
|
+
// targets: integer encoded class indices
|
|
74
|
+
if (!Array.isArray(outputs) || !Array.isArray(targets)) throw new TypeError('categoricalCrossEntropy expects Value[] and number[].');
|
|
75
|
+
if (!outputs.length || !targets.length) return new Value(0);
|
|
76
|
+
if (targets.some(t => typeof t !== 'number' || !isFinite(t) || t < 0 || t >= outputs.length || Math.floor(t) !== t)) {
|
|
77
|
+
throw new Error('Target indices must be valid integers in [0, outputs.length)');
|
|
78
|
+
}
|
|
79
|
+
const eps = Losses.EPS;
|
|
80
|
+
const maxLogit = outputs.reduce((a, b) => a.data > b.data ? a : b);
|
|
81
|
+
const exps = outputs.map(out => out.sub(maxLogit).exp());
|
|
82
|
+
const sumExp = Value.sum(exps).add(eps);
|
|
83
|
+
const softmax = exps.map(e => e.div(sumExp));
|
|
84
|
+
const tIndices = targets.map((t, i) => softmax[t]);
|
|
85
|
+
return Value.mean(tIndices.map(sm => sm.add(eps).log().mul(-1)));
|
|
86
|
+
}
|
|
106
87
|
|
|
107
|
-
|
|
108
|
-
|
|
88
|
+
/**
|
|
89
|
+
* Computes Huber loss between outputs and targets.
|
|
90
|
+
* Combines quadratic loss for small residuals and linear loss for large residuals.
|
|
91
|
+
* @param outputs Array of Value predictions.
|
|
92
|
+
* @param targets Array of Value targets.
|
|
93
|
+
* @param delta Threshold at which to switch from quadratic to linear (default: 1.0).
|
|
94
|
+
* @returns Huber loss as a Value.
|
|
95
|
+
*/
|
|
96
|
+
public static huber(outputs: Value[], targets: Value[], delta = 1.0): Value {
|
|
97
|
+
checkLengthMatch(outputs, targets);
|
|
98
|
+
if (!Array.isArray(outputs) || !Array.isArray(targets)) throw new TypeError('huber expects Value[] for both arguments.');
|
|
99
|
+
if (!outputs.length) return new Value(0);
|
|
100
|
+
|
|
101
|
+
const deltaValue = new Value(delta);
|
|
102
|
+
const half = new Value(0.5);
|
|
103
|
+
|
|
104
|
+
const losses = outputs.map((out, i) => {
|
|
105
|
+
const residual = V.abs(V.sub(out, targets[i]));
|
|
106
|
+
const condition = V.lt(residual, deltaValue);
|
|
109
107
|
|
|
110
|
-
|
|
111
|
-
|
|
108
|
+
const quadraticLoss = V.mul(half, V.square(residual));
|
|
109
|
+
const linearLoss = V.mul(deltaValue, V.sub(residual, V.mul(half, deltaValue)));
|
|
112
110
|
|
|
113
|
-
|
|
114
|
-
}
|
|
111
|
+
return V.ifThenElse(condition, quadraticLoss, linearLoss);
|
|
112
|
+
});
|
|
115
113
|
|
|
114
|
+
return V.mean(losses);
|
|
115
|
+
}
|
|
116
116
|
|
|
117
|
-
/**
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
117
|
+
/**
|
|
118
|
+
* Computes Tukey loss between outputs and targets.
|
|
119
|
+
* This robust loss function saturates for large residuals.
|
|
120
|
+
*
|
|
121
|
+
* @param outputs Array of Value predictions.
|
|
122
|
+
* @param targets Array of Value targets.
|
|
123
|
+
* @param c Threshold constant (typically 4.685).
|
|
124
|
+
* @returns Tukey loss as a Value.
|
|
125
|
+
*/
|
|
126
|
+
public static tukey(outputs: Value[], targets: Value[], c: number = 4.685): Value {
|
|
127
|
+
checkLengthMatch(outputs, targets);
|
|
128
|
+
const c2_over_6 = (c * c) / 6;
|
|
129
|
+
const cValue = V.C(c);
|
|
130
|
+
const c2_over_6_Value = V.C(c2_over_6);
|
|
131
131
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
132
|
+
const losses = outputs.map((out, i) => {
|
|
133
|
+
const diff = V.abs(V.sub(out, targets[i]));
|
|
134
|
+
const inlier = V.lte(diff, cValue);
|
|
135
|
+
const rc = V.div(diff, cValue);
|
|
136
|
+
const rc2 = V.square(rc);
|
|
137
|
+
const oneMinusRC2 = V.sub(1, rc2);
|
|
138
|
+
const inner = V.pow(oneMinusRC2, 3);
|
|
139
|
+
const inlierLoss = V.mul(c2_over_6_Value, V.sub(1, inner));
|
|
140
|
+
const loss = V.ifThenElse(inlier, inlierLoss, c2_over_6_Value);
|
|
141
|
+
return loss;
|
|
142
|
+
});
|
|
143
|
+
return V.mean(losses);
|
|
144
|
+
}
|
|
144
145
|
}
|
package/Optimizers.ts
CHANGED
|
@@ -53,7 +53,7 @@ export abstract class Optimizer {
|
|
|
53
53
|
* @property weightDecay: L2 regularization multiplier (default 0). Ignored for plain SGD.
|
|
54
54
|
* @property gradientClip: Maximum absolute value for gradient updates (default 0: no clipping).
|
|
55
55
|
*/
|
|
56
|
-
interface OptimizerOptions {
|
|
56
|
+
export interface OptimizerOptions {
|
|
57
57
|
learningRate?: number;
|
|
58
58
|
weightDecay?: number;
|
|
59
59
|
gradientClip?: number;
|
|
@@ -96,7 +96,7 @@ export class SGD extends Optimizer {
|
|
|
96
96
|
* @property beta2: Exponential decay rate for 2nd moment (default 0.999).
|
|
97
97
|
* @property epsilon: Numerical stability fudge factor (default 1e-8).
|
|
98
98
|
*/
|
|
99
|
-
interface AdamOptions extends OptimizerOptions {
|
|
99
|
+
export interface AdamOptions extends OptimizerOptions {
|
|
100
100
|
beta1?: number;
|
|
101
101
|
beta2?: number;
|
|
102
102
|
epsilon?: number;
|
package/README.md
CHANGED
|
@@ -101,6 +101,7 @@ This pattern—forward pass, backward for gradients, and calling `optimizer.step
|
|
|
101
101
|
|
|
102
102
|
All API operations work with both `Value` and raw number inputs (numbers are automatically wrapped as non-grad constants).
|
|
103
103
|
|
|
104
|
+
|
|
104
105
|
## Testing
|
|
105
106
|
|
|
106
107
|
To run the test suite and verify the correctness of ScalarAutograd, execute the following command in your project directory:
|
|
@@ -109,5 +110,12 @@ To run the test suite and verify the correctness of ScalarAutograd, execute the
|
|
|
109
110
|
npm run test
|
|
110
111
|
```
|
|
111
112
|
|
|
113
|
+
## Deploy to npm
|
|
114
|
+
|
|
115
|
+
Start "Git Bash" terminal
|
|
116
|
+
Type ./release.sh
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
112
120
|
## License
|
|
113
121
|
MIT
|
|
@@ -1,32 +1,32 @@
|
|
|
1
1
|
import { Value } from "./Value";
|
|
2
|
-
import {
|
|
2
|
+
import { Losses } from "./Losses";
|
|
3
3
|
|
|
4
4
|
describe('Loss function edge cases', () => {
|
|
5
5
|
it('handles empty arrays', () => {
|
|
6
|
-
expect(mse([], []).data).toBe(0);
|
|
7
|
-
expect(mae([], []).data).toBe(0);
|
|
8
|
-
expect(binaryCrossEntropy([], []).data).toBe(0);
|
|
9
|
-
expect(categoricalCrossEntropy([], []).data).toBe(0);
|
|
6
|
+
expect(Losses.mse([], []).data).toBe(0);
|
|
7
|
+
expect(Losses.mae([], []).data).toBe(0);
|
|
8
|
+
expect(Losses.binaryCrossEntropy([], []).data).toBe(0);
|
|
9
|
+
expect(Losses.categoricalCrossEntropy([], []).data).toBe(0);
|
|
10
10
|
});
|
|
11
11
|
|
|
12
12
|
it('throws on mismatched lengths', () => {
|
|
13
13
|
const a = [new Value(1)];
|
|
14
14
|
const b = [new Value(1), new Value(2)];
|
|
15
|
-
expect(() => mse(a, b)).toThrow();
|
|
15
|
+
expect(() => Losses.mse(a, b)).toThrow();
|
|
16
16
|
});
|
|
17
17
|
|
|
18
18
|
it('handles extreme values in binary cross entropy', () => {
|
|
19
19
|
const out = new Value(0.999999, 'out', true);
|
|
20
20
|
const target = new Value(1);
|
|
21
|
-
const loss = binaryCrossEntropy([out], [target]);
|
|
21
|
+
const loss = Losses.binaryCrossEntropy([out], [target]);
|
|
22
22
|
expect(loss.data).toBeGreaterThan(0);
|
|
23
23
|
expect(loss.data).toBeLessThan(0.1);
|
|
24
24
|
});
|
|
25
25
|
|
|
26
26
|
it('throws on invalid class indices in categorical cross entropy', () => {
|
|
27
27
|
const outputs = [new Value(1), new Value(2)];
|
|
28
|
-
expect(() => categoricalCrossEntropy(outputs, [2])).toThrow();
|
|
29
|
-
expect(() => categoricalCrossEntropy(outputs, [-1])).toThrow();
|
|
30
|
-
expect(() => categoricalCrossEntropy(outputs, [1.5])).toThrow();
|
|
28
|
+
expect(() => Losses.categoricalCrossEntropy(outputs, [2])).toThrow();
|
|
29
|
+
expect(() => Losses.categoricalCrossEntropy(outputs, [-1])).toThrow();
|
|
30
|
+
expect(() => Losses.categoricalCrossEntropy(outputs, [1.5])).toThrow();
|
|
31
31
|
});
|
|
32
32
|
});
|
package/Value.nn.spec.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { Value } from "./Value";
|
|
2
2
|
import { SGD, Adam } from "./Optimizers";
|
|
3
|
-
import {
|
|
3
|
+
import { Losses } from "./Losses";
|
|
4
4
|
|
|
5
5
|
describe("can train scalar neural networks on minimal problems", () => {
|
|
6
6
|
|
|
@@ -22,7 +22,7 @@ describe("can train scalar neural networks on minimal problems", () => {
|
|
|
22
22
|
preds.push(pred);
|
|
23
23
|
targets.push(new Value(ex.y));
|
|
24
24
|
}
|
|
25
|
-
let loss = mse(preds, targets);
|
|
25
|
+
let loss = Losses.mse(preds, targets);
|
|
26
26
|
if (loss.data < 1e-4) break;
|
|
27
27
|
w.grad = 0; b.grad = 0;
|
|
28
28
|
loss.backward();
|
|
@@ -53,7 +53,7 @@ describe("can train scalar neural networks on minimal problems", () => {
|
|
|
53
53
|
preds.push(pred);
|
|
54
54
|
targets.push(new Value(ex.y));
|
|
55
55
|
}
|
|
56
|
-
let loss = mse(preds, targets);
|
|
56
|
+
let loss = Losses.mse(preds, targets);
|
|
57
57
|
if (loss.data < 1e-4) break;
|
|
58
58
|
a.grad = 0; b.grad = 0; c.grad = 0;
|
|
59
59
|
loss.backward();
|
package/Value.ts
CHANGED
|
@@ -1,7 +1,11 @@
|
|
|
1
|
-
type BackwardFn = () => void;
|
|
1
|
+
export type BackwardFn = () => void;
|
|
2
|
+
export { V } from './V';
|
|
3
|
+
export { Optimizer, SGD, Adam, AdamW, OptimizerOptions, AdamOptions } from './Optimizers';
|
|
4
|
+
export { Losses } from './Losses';
|
|
2
5
|
|
|
3
6
|
const EPS = 1e-12;
|
|
4
7
|
|
|
8
|
+
|
|
5
9
|
import { ValueTrig } from './ValueTrig';
|
|
6
10
|
import { ValueActivation } from './ValueActivation';
|
|
7
11
|
import { ValueArithmetic } from './ValueArithmetic';
|