catniff 0.5.5 → 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -3
- package/dist/core.d.ts +4 -0
- package/dist/core.js +118 -27
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -120,10 +120,11 @@ All available APIs are in [`./src/`](./src/) if you want to dig deeper.
|
|
|
120
120
|
|
|
121
121
|
## Todos
|
|
122
122
|
|
|
123
|
-
*
|
|
124
|
-
* More tensor ops.
|
|
125
|
-
* GPU acceleration.
|
|
123
|
+
* More general tensor ops.
|
|
126
124
|
* More general neural net APIs.
|
|
125
|
+
* GPU acceleration.
|
|
126
|
+
* Comprehensive caching.
|
|
127
|
+
* Bug fixes.
|
|
127
128
|
* More detailed documentation.
|
|
128
129
|
* Code refactoring.
|
|
129
130
|
* Proper tests.
|
package/dist/core.d.ts
CHANGED
|
@@ -40,6 +40,9 @@ export declare class Tensor {
|
|
|
40
40
|
elementWiseSelfDAG(op: (a: number) => number, thisGrad?: (self: Tensor, outGrad: Tensor) => Tensor): Tensor;
|
|
41
41
|
static forceTensor(value: TensorValue | Tensor): Tensor;
|
|
42
42
|
static addGrad(tensor: Tensor, accumGrad: Tensor): void;
|
|
43
|
+
isContiguous(): boolean;
|
|
44
|
+
contiguous(): Tensor;
|
|
45
|
+
reshape(newShape: readonly number[]): Tensor;
|
|
43
46
|
squeeze(dims?: number[] | number): Tensor;
|
|
44
47
|
unsqueeze(dim: number): Tensor;
|
|
45
48
|
sum(dims?: number[] | number, keepDims?: boolean): Tensor;
|
|
@@ -143,6 +146,7 @@ export declare class Tensor {
|
|
|
143
146
|
swapaxes: (dim1: number, dim2: number) => Tensor;
|
|
144
147
|
swapdims: (dim1: number, dim2: number) => Tensor;
|
|
145
148
|
t(): Tensor;
|
|
149
|
+
permute(dims: number[]): Tensor;
|
|
146
150
|
dot(other: TensorValue | Tensor): Tensor;
|
|
147
151
|
mm(other: TensorValue | Tensor): Tensor;
|
|
148
152
|
bmm(other: TensorValue | Tensor): Tensor;
|
package/dist/core.js
CHANGED
|
@@ -201,9 +201,9 @@ class Tensor {
|
|
|
201
201
|
if (out.requiresGrad) {
|
|
202
202
|
out.gradFn = () => {
|
|
203
203
|
// Disable gradient collecting of gradients themselves
|
|
204
|
-
const outGrad = out.grad
|
|
205
|
-
const selfNoGrad = this.
|
|
206
|
-
const otherNoGrad = other.
|
|
204
|
+
const outGrad = out.grad;
|
|
205
|
+
const selfNoGrad = this.detach();
|
|
206
|
+
const otherNoGrad = other.detach();
|
|
207
207
|
if (this.requiresGrad)
|
|
208
208
|
Tensor.addGrad(this, thisGrad(selfNoGrad, otherNoGrad, outGrad));
|
|
209
209
|
if (other.requiresGrad)
|
|
@@ -222,8 +222,8 @@ class Tensor {
|
|
|
222
222
|
if (out.requiresGrad) {
|
|
223
223
|
out.gradFn = () => {
|
|
224
224
|
// Disable gradient collecting of gradients themselves
|
|
225
|
-
const outGrad = out.grad
|
|
226
|
-
const selfNoGrad = this.
|
|
225
|
+
const outGrad = out.grad;
|
|
226
|
+
const selfNoGrad = this.detach();
|
|
227
227
|
if (this.requiresGrad)
|
|
228
228
|
Tensor.addGrad(this, thisGrad(selfNoGrad, outGrad));
|
|
229
229
|
};
|
|
@@ -261,6 +261,64 @@ class Tensor {
|
|
|
261
261
|
tensor.grad = tensor.grad.add(squeezedGrad);
|
|
262
262
|
}
|
|
263
263
|
}
|
|
264
|
+
// Contiguity-related ops
|
|
265
|
+
isContiguous() {
|
|
266
|
+
const expectedStrides = Tensor.getStrides(this.shape);
|
|
267
|
+
if (expectedStrides.length !== this.strides.length) {
|
|
268
|
+
return false;
|
|
269
|
+
}
|
|
270
|
+
for (let i = 0; i < this.strides.length; i++) {
|
|
271
|
+
if (this.strides[i] !== expectedStrides[i]) {
|
|
272
|
+
return false;
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
return true;
|
|
276
|
+
}
|
|
277
|
+
contiguous() {
|
|
278
|
+
// Check if scalar
|
|
279
|
+
if (typeof this.value === "number")
|
|
280
|
+
return this;
|
|
281
|
+
// Check if already contiguous
|
|
282
|
+
if (this.isContiguous())
|
|
283
|
+
return this;
|
|
284
|
+
const outputStrides = Tensor.getStrides(this.shape);
|
|
285
|
+
const outputSize = Tensor.shapeToSize(this.shape);
|
|
286
|
+
const outputValue = new Array(outputSize);
|
|
287
|
+
for (let index = 0; index < outputSize; index++) {
|
|
288
|
+
const outputCoords = Tensor.indexToCoords(index, outputStrides);
|
|
289
|
+
const originalIndex = Tensor.coordsToIndex(outputCoords, this.strides);
|
|
290
|
+
outputValue[index] = this.value[originalIndex];
|
|
291
|
+
}
|
|
292
|
+
const out = new Tensor(outputValue, { shape: this.shape, strides: outputStrides });
|
|
293
|
+
// Gradient flow back to the original tensor
|
|
294
|
+
if (this.requiresGrad) {
|
|
295
|
+
out.requiresGrad = true;
|
|
296
|
+
out.children.push(this);
|
|
297
|
+
out.gradFn = () => {
|
|
298
|
+
Tensor.addGrad(this, out.grad);
|
|
299
|
+
};
|
|
300
|
+
}
|
|
301
|
+
return out;
|
|
302
|
+
}
|
|
303
|
+
reshape(newShape) {
|
|
304
|
+
// Verify shape size
|
|
305
|
+
const originalSize = Tensor.shapeToSize(this.shape);
|
|
306
|
+
const outputSize = Tensor.shapeToSize(newShape);
|
|
307
|
+
if (originalSize !== outputSize) {
|
|
308
|
+
throw new Error("Cannot reshape: incompatible sizes");
|
|
309
|
+
}
|
|
310
|
+
const outputStrides = Tensor.getStrides(newShape);
|
|
311
|
+
const out = new Tensor(this.contiguous().value, { shape: newShape, strides: outputStrides });
|
|
312
|
+
// Gradient reshaped and flow back to the original tensor
|
|
313
|
+
if (this.requiresGrad) {
|
|
314
|
+
out.requiresGrad = true;
|
|
315
|
+
out.children.push(this);
|
|
316
|
+
out.gradFn = () => {
|
|
317
|
+
Tensor.addGrad(this, out.grad.reshape(this.shape));
|
|
318
|
+
};
|
|
319
|
+
}
|
|
320
|
+
return out;
|
|
321
|
+
}
|
|
264
322
|
// Tensor squeeze
|
|
265
323
|
squeeze(dims) {
|
|
266
324
|
if (typeof this.value === "number")
|
|
@@ -302,7 +360,7 @@ class Tensor {
|
|
|
302
360
|
out.requiresGrad = true;
|
|
303
361
|
out.children.push(this);
|
|
304
362
|
out.gradFn = () => {
|
|
305
|
-
let restoredGrad = out.grad
|
|
363
|
+
let restoredGrad = out.grad;
|
|
306
364
|
for (let i = dims.length - 1; i >= 0; i--) {
|
|
307
365
|
restoredGrad = restoredGrad.unsqueeze(dims[i]);
|
|
308
366
|
}
|
|
@@ -338,7 +396,7 @@ class Tensor {
|
|
|
338
396
|
out.requiresGrad = true;
|
|
339
397
|
out.children.push(this);
|
|
340
398
|
out.gradFn = () => {
|
|
341
|
-
Tensor.addGrad(this, out.grad.
|
|
399
|
+
Tensor.addGrad(this, out.grad.squeeze(dim));
|
|
342
400
|
};
|
|
343
401
|
}
|
|
344
402
|
return out;
|
|
@@ -397,7 +455,7 @@ class Tensor {
|
|
|
397
455
|
out.children.push(this);
|
|
398
456
|
out.gradFn = () => {
|
|
399
457
|
const localGrad = new Tensor(gradValue, { shape: gradShape, strides: gradStrides });
|
|
400
|
-
Tensor.addGrad(this, out.grad.
|
|
458
|
+
Tensor.addGrad(this, out.grad.mul(localGrad));
|
|
401
459
|
};
|
|
402
460
|
}
|
|
403
461
|
return keepDims ? out : out.squeeze(dims);
|
|
@@ -454,7 +512,7 @@ class Tensor {
|
|
|
454
512
|
gradValue[realFlatIndex] = outputValue[outFlatIndex] / this.value[realFlatIndex];
|
|
455
513
|
}
|
|
456
514
|
const localGrad = new Tensor(gradValue, { shape: gradShape, strides: gradStrides });
|
|
457
|
-
Tensor.addGrad(this, out.grad.
|
|
515
|
+
Tensor.addGrad(this, out.grad.mul(localGrad));
|
|
458
516
|
};
|
|
459
517
|
}
|
|
460
518
|
return keepDims ? out : out.squeeze(dims);
|
|
@@ -518,7 +576,7 @@ class Tensor {
|
|
|
518
576
|
gradValue[realFlatIndex] = 1 / outputFeeders[outFlatIndex];
|
|
519
577
|
}
|
|
520
578
|
const localGrad = new Tensor(gradValue, { shape: gradShape, strides: gradStrides });
|
|
521
|
-
Tensor.addGrad(this, out.grad.
|
|
579
|
+
Tensor.addGrad(this, out.grad.mul(localGrad));
|
|
522
580
|
};
|
|
523
581
|
}
|
|
524
582
|
return keepDims ? out : out.squeeze(dims);
|
|
@@ -588,7 +646,7 @@ class Tensor {
|
|
|
588
646
|
gradValue[realFlatIndex] = outputValue[outFlatIndex] === originalValue[realFlatIndex] ? 1 / shareCounts[outFlatIndex] : 0;
|
|
589
647
|
}
|
|
590
648
|
const localGrad = new Tensor(gradValue, { shape: gradShape, strides: gradStrides });
|
|
591
|
-
Tensor.addGrad(this, out.grad.
|
|
649
|
+
Tensor.addGrad(this, out.grad.mul(localGrad));
|
|
592
650
|
};
|
|
593
651
|
}
|
|
594
652
|
return keepDims ? out : out.squeeze(dims);
|
|
@@ -658,7 +716,7 @@ class Tensor {
|
|
|
658
716
|
gradValue[realFlatIndex] = outputValue[outFlatIndex] === originalValue[realFlatIndex] ? 1 / shareCounts[outFlatIndex] : 0;
|
|
659
717
|
}
|
|
660
718
|
const localGrad = new Tensor(gradValue, { shape: gradShape, strides: gradStrides });
|
|
661
|
-
Tensor.addGrad(this, out.grad.
|
|
719
|
+
Tensor.addGrad(this, out.grad.mul(localGrad));
|
|
662
720
|
};
|
|
663
721
|
}
|
|
664
722
|
return keepDims ? out : out.squeeze(dims);
|
|
@@ -727,8 +785,8 @@ class Tensor {
|
|
|
727
785
|
out.requiresGrad = true;
|
|
728
786
|
out.children.push(this);
|
|
729
787
|
out.gradFn = () => {
|
|
730
|
-
const upstreamGrad = out.grad
|
|
731
|
-
const softmaxOutput = out.
|
|
788
|
+
const upstreamGrad = out.grad;
|
|
789
|
+
const softmaxOutput = out.detach();
|
|
732
790
|
// Compute element-wise product: ∂L/∂σᵢ × σᵢ
|
|
733
791
|
const gradTimesOutput = upstreamGrad.mul(softmaxOutput);
|
|
734
792
|
// Sum over softmax dimensions: Σᵢ(∂L/∂σᵢ × σᵢ)
|
|
@@ -1096,7 +1154,7 @@ class Tensor {
|
|
|
1096
1154
|
if (this.requiresGrad) {
|
|
1097
1155
|
out.children.push(this);
|
|
1098
1156
|
out.gradFn = () => {
|
|
1099
|
-
Tensor.addGrad(this, out.grad.
|
|
1157
|
+
Tensor.addGrad(this, out.grad.transpose(dim1, dim2));
|
|
1100
1158
|
};
|
|
1101
1159
|
}
|
|
1102
1160
|
return out;
|
|
@@ -1111,6 +1169,39 @@ class Tensor {
|
|
|
1111
1169
|
}
|
|
1112
1170
|
return this.transpose(0, 1);
|
|
1113
1171
|
}
|
|
1172
|
+
// Permute
|
|
1173
|
+
permute(dims) {
|
|
1174
|
+
if (dims.length !== this.shape.length) {
|
|
1175
|
+
throw new Error("Permutation must specify all dimensions");
|
|
1176
|
+
}
|
|
1177
|
+
// Compute new shape and strides
|
|
1178
|
+
const newShape = new Array(dims.length);
|
|
1179
|
+
const newStrides = new Array(dims.length);
|
|
1180
|
+
for (let index = 0; index < dims.length; index++) {
|
|
1181
|
+
const dim = dims[index];
|
|
1182
|
+
newShape[index] = this.shape[dim];
|
|
1183
|
+
newStrides[index] = this.strides[dim];
|
|
1184
|
+
}
|
|
1185
|
+
const out = new Tensor(this.value, {
|
|
1186
|
+
shape: newShape,
|
|
1187
|
+
strides: newStrides
|
|
1188
|
+
});
|
|
1189
|
+
if (this.requiresGrad) {
|
|
1190
|
+
out.requiresGrad = true;
|
|
1191
|
+
out.children.push(this);
|
|
1192
|
+
out.gradFn = () => {
|
|
1193
|
+
// Compute inverse permutation
|
|
1194
|
+
const inverseAxes = new Array(dims.length);
|
|
1195
|
+
for (let i = 0; i < dims.length; i++) {
|
|
1196
|
+
inverseAxes[dims[i]] = i;
|
|
1197
|
+
}
|
|
1198
|
+
// Permute gradient back to original order
|
|
1199
|
+
const permutedGrad = out.grad.permute(inverseAxes);
|
|
1200
|
+
Tensor.addGrad(this, permutedGrad);
|
|
1201
|
+
};
|
|
1202
|
+
}
|
|
1203
|
+
return out;
|
|
1204
|
+
}
|
|
1114
1205
|
// 1D tensor dot product
|
|
1115
1206
|
dot(other) {
|
|
1116
1207
|
other = Tensor.forceTensor(other);
|
|
@@ -1138,9 +1229,9 @@ class Tensor {
|
|
|
1138
1229
|
if (out.requiresGrad) {
|
|
1139
1230
|
out.gradFn = () => {
|
|
1140
1231
|
// Disable gradient collecting of gradients themselves
|
|
1141
|
-
const outGrad = out.grad
|
|
1142
|
-
const selfNoGrad = this.
|
|
1143
|
-
const otherNoGrad = other.
|
|
1232
|
+
const outGrad = out.grad;
|
|
1233
|
+
const selfNoGrad = this.detach();
|
|
1234
|
+
const otherNoGrad = other.detach();
|
|
1144
1235
|
if (this.requiresGrad)
|
|
1145
1236
|
Tensor.addGrad(this, outGrad.mul(otherNoGrad));
|
|
1146
1237
|
if (other.requiresGrad)
|
|
@@ -1193,9 +1284,9 @@ class Tensor {
|
|
|
1193
1284
|
if (out.requiresGrad) {
|
|
1194
1285
|
out.gradFn = () => {
|
|
1195
1286
|
// Disable gradient collecting of gradients themselves
|
|
1196
|
-
const outGrad = out.grad
|
|
1197
|
-
const selfNoGrad = this.
|
|
1198
|
-
const otherNoGrad = other.
|
|
1287
|
+
const outGrad = out.grad;
|
|
1288
|
+
const selfNoGrad = this.detach();
|
|
1289
|
+
const otherNoGrad = other.detach();
|
|
1199
1290
|
if (this.requiresGrad)
|
|
1200
1291
|
Tensor.addGrad(this, outGrad.mm(otherNoGrad.t()));
|
|
1201
1292
|
if (other.requiresGrad)
|
|
@@ -1251,9 +1342,9 @@ class Tensor {
|
|
|
1251
1342
|
if (out.requiresGrad) {
|
|
1252
1343
|
out.gradFn = () => {
|
|
1253
1344
|
// Disable gradient collecting of gradients themselves
|
|
1254
|
-
const outGrad = out.grad
|
|
1255
|
-
const selfNoGrad = this.
|
|
1256
|
-
const otherNoGrad = other.
|
|
1345
|
+
const outGrad = out.grad;
|
|
1346
|
+
const selfNoGrad = this.detach();
|
|
1347
|
+
const otherNoGrad = other.detach();
|
|
1257
1348
|
if (this.requiresGrad)
|
|
1258
1349
|
Tensor.addGrad(this, outGrad.bmm(otherNoGrad.transpose(1, 2)));
|
|
1259
1350
|
if (other.requiresGrad)
|
|
@@ -1350,9 +1441,9 @@ class Tensor {
|
|
|
1350
1441
|
if (out.requiresGrad) {
|
|
1351
1442
|
out.gradFn = () => {
|
|
1352
1443
|
other = other;
|
|
1353
|
-
const outGrad = out.grad
|
|
1354
|
-
const selfNoGrad = self.
|
|
1355
|
-
const otherNoGrad = other.
|
|
1444
|
+
const outGrad = out.grad;
|
|
1445
|
+
const selfNoGrad = self.detach();
|
|
1446
|
+
const otherNoGrad = other.detach();
|
|
1356
1447
|
if (this.requiresGrad)
|
|
1357
1448
|
Tensor.addGrad(this, outGrad.matmul(otherNoGrad.transpose(lastDim - 1, lastDim)));
|
|
1358
1449
|
if (other.requiresGrad)
|