catniff 0.5.10 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core.d.ts +38 -12
- package/dist/core.js +375 -504
- package/package.json +1 -1
package/dist/core.d.ts
CHANGED
|
@@ -3,6 +3,8 @@ export type TensorValue = number | TensorValue[];
|
|
|
3
3
|
export interface TensorOptions {
|
|
4
4
|
shape?: readonly number[];
|
|
5
5
|
strides?: readonly number[];
|
|
6
|
+
offset?: number;
|
|
7
|
+
numel?: number;
|
|
6
8
|
grad?: Tensor;
|
|
7
9
|
requiresGrad?: boolean;
|
|
8
10
|
gradFn?: Function;
|
|
@@ -13,6 +15,8 @@ export declare class Tensor {
|
|
|
13
15
|
value: number[] | number;
|
|
14
16
|
readonly shape: readonly number[];
|
|
15
17
|
readonly strides: readonly number[];
|
|
18
|
+
offset: number;
|
|
19
|
+
numel: number;
|
|
16
20
|
grad?: Tensor;
|
|
17
21
|
requiresGrad: boolean;
|
|
18
22
|
gradFn: Function;
|
|
@@ -40,19 +44,46 @@ export declare class Tensor {
|
|
|
40
44
|
elementWiseSelfDAG(op: (a: number) => number, thisGrad?: (self: Tensor, outGrad: Tensor) => Tensor): Tensor;
|
|
41
45
|
handleOther(other: Tensor | TensorValue): Tensor;
|
|
42
46
|
static addGrad(tensor: Tensor, accumGrad: Tensor): void;
|
|
47
|
+
static normalizeDims(dims: number[], numDims: number): number[];
|
|
43
48
|
isContiguous(): boolean;
|
|
44
49
|
contiguous(): Tensor;
|
|
45
50
|
reshape(newShape: readonly number[]): Tensor;
|
|
51
|
+
transpose(dim1: number, dim2: number): Tensor;
|
|
52
|
+
swapaxes: (dim1: number, dim2: number) => Tensor;
|
|
53
|
+
swapdims: (dim1: number, dim2: number) => Tensor;
|
|
54
|
+
t(): Tensor;
|
|
55
|
+
permute(dims: number[]): Tensor;
|
|
56
|
+
slice(ranges: number[][]): Tensor;
|
|
46
57
|
squeeze(dims?: number[] | number): Tensor;
|
|
47
58
|
unsqueeze(dim: number): Tensor;
|
|
59
|
+
static reduce(tensor: Tensor, dims: number[] | number | undefined, keepDims: boolean, config: {
|
|
60
|
+
identity: number;
|
|
61
|
+
operation: (accumulator: number, value: number) => number;
|
|
62
|
+
needsCounters?: boolean;
|
|
63
|
+
postProcess?: (options: {
|
|
64
|
+
values: number[];
|
|
65
|
+
counters?: number[];
|
|
66
|
+
}) => void;
|
|
67
|
+
needsShareCounts?: boolean;
|
|
68
|
+
gradientFn: (options: {
|
|
69
|
+
outputValue: number[];
|
|
70
|
+
originalValue: number[];
|
|
71
|
+
counters: number[];
|
|
72
|
+
shareCounts: number[];
|
|
73
|
+
realIndex: number;
|
|
74
|
+
outIndex: number;
|
|
75
|
+
}) => number;
|
|
76
|
+
}): Tensor;
|
|
48
77
|
sum(dims?: number[] | number, keepDims?: boolean): Tensor;
|
|
49
78
|
prod(dims?: number[] | number, keepDims?: boolean): Tensor;
|
|
50
79
|
mean(dims?: number[] | number, keepDims?: boolean): Tensor;
|
|
51
80
|
max(dims?: number[] | number, keepDims?: boolean): Tensor;
|
|
52
81
|
min(dims?: number[] | number, keepDims?: boolean): Tensor;
|
|
82
|
+
all(dims?: number[] | number, keepDims?: boolean): Tensor;
|
|
83
|
+
any(dims?: number[] | number, keepDims?: boolean): Tensor;
|
|
53
84
|
var(dims?: number[] | number, keepDims?: boolean): Tensor;
|
|
54
85
|
std(dims?: number[] | number, keepDims?: boolean): Tensor;
|
|
55
|
-
softmax(
|
|
86
|
+
softmax(dim?: number): Tensor;
|
|
56
87
|
add(other: TensorValue | Tensor): Tensor;
|
|
57
88
|
sub(other: TensorValue | Tensor): Tensor;
|
|
58
89
|
subtract: (other: TensorValue | Tensor) => Tensor;
|
|
@@ -142,28 +173,23 @@ export declare class Tensor {
|
|
|
142
173
|
erf(): Tensor;
|
|
143
174
|
erfc(): Tensor;
|
|
144
175
|
erfinv(): Tensor;
|
|
145
|
-
transpose(dim1: number, dim2: number): Tensor;
|
|
146
|
-
swapaxes: (dim1: number, dim2: number) => Tensor;
|
|
147
|
-
swapdims: (dim1: number, dim2: number) => Tensor;
|
|
148
|
-
t(): Tensor;
|
|
149
|
-
permute(dims: number[]): Tensor;
|
|
150
176
|
dot(other: TensorValue | Tensor): Tensor;
|
|
151
177
|
mm(other: TensorValue | Tensor): Tensor;
|
|
152
178
|
bmm(other: TensorValue | Tensor): Tensor;
|
|
153
179
|
mv(other: TensorValue | Tensor): Tensor;
|
|
154
180
|
matmul(other: TensorValue | Tensor): Tensor;
|
|
155
181
|
dropout(rate: number): Tensor;
|
|
156
|
-
static full(shape: number[], num: number, options?: TensorOptions): Tensor;
|
|
182
|
+
static full(shape: readonly number[], num: number, options?: TensorOptions): Tensor;
|
|
157
183
|
static fullLike(tensor: Tensor, num: number, options?: TensorOptions): Tensor;
|
|
158
|
-
static ones(shape?: number[], options?: TensorOptions): Tensor;
|
|
184
|
+
static ones(shape?: readonly number[], options?: TensorOptions): Tensor;
|
|
159
185
|
static onesLike(tensor: Tensor, options?: TensorOptions): Tensor;
|
|
160
|
-
static zeros(shape?: number[], options?: TensorOptions): Tensor;
|
|
186
|
+
static zeros(shape?: readonly number[], options?: TensorOptions): Tensor;
|
|
161
187
|
static zerosLike(tensor: Tensor, options?: TensorOptions): Tensor;
|
|
162
|
-
static rand(shape?: number[], options?: TensorOptions): Tensor;
|
|
188
|
+
static rand(shape?: readonly number[], options?: TensorOptions): Tensor;
|
|
163
189
|
static randLike(tensor: Tensor, options?: TensorOptions): Tensor;
|
|
164
|
-
static randn(shape?: number[], options?: TensorOptions): Tensor;
|
|
190
|
+
static randn(shape?: readonly number[], options?: TensorOptions): Tensor;
|
|
165
191
|
static randnLike(tensor: Tensor, options?: TensorOptions): Tensor;
|
|
166
|
-
static randint(shape: number[], low: number, high: number, options?: TensorOptions): Tensor;
|
|
192
|
+
static randint(shape: readonly number[], low: number, high: number, options?: TensorOptions): Tensor;
|
|
167
193
|
static randintLike(tensor: Tensor, low: number, high: number, options?: TensorOptions): Tensor;
|
|
168
194
|
static normal(shape: number[], mean: number, stdDev: number, options?: TensorOptions): Tensor;
|
|
169
195
|
static uniform(shape: number[], low: number, high: number, options?: TensorOptions): Tensor;
|
package/dist/core.js
CHANGED
|
@@ -6,6 +6,8 @@ class Tensor {
|
|
|
6
6
|
value;
|
|
7
7
|
shape;
|
|
8
8
|
strides;
|
|
9
|
+
offset;
|
|
10
|
+
numel;
|
|
9
11
|
grad;
|
|
10
12
|
requiresGrad;
|
|
11
13
|
gradFn;
|
|
@@ -13,14 +15,19 @@ class Tensor {
|
|
|
13
15
|
device;
|
|
14
16
|
static training = false;
|
|
15
17
|
constructor(value, options = {}) {
|
|
18
|
+
// Storage
|
|
16
19
|
this.value = Tensor.flatten(value);
|
|
20
|
+
// Tensor metadata
|
|
17
21
|
this.shape = options.shape || Tensor.getShape(value);
|
|
18
22
|
this.strides = options.strides || Tensor.getStrides(this.shape);
|
|
23
|
+
this.offset = options.offset || 0;
|
|
24
|
+
this.numel = options.numel || Tensor.shapeToSize(this.shape);
|
|
25
|
+
this.device = options.device || "cpu";
|
|
26
|
+
// Autograd data
|
|
19
27
|
this.grad = options.grad;
|
|
20
28
|
this.requiresGrad = options.requiresGrad ?? false;
|
|
21
29
|
this.gradFn = options.gradFn || (() => { });
|
|
22
30
|
this.children = options.children || [];
|
|
23
|
-
this.device = options.device || "cpu";
|
|
24
31
|
// Move to device in-place
|
|
25
32
|
this.to_(this.device);
|
|
26
33
|
}
|
|
@@ -164,22 +171,28 @@ class Tensor {
|
|
|
164
171
|
// Convert the coordinates to 1D index of flattened B with respect to B's shape
|
|
165
172
|
const indexB = Tensor.coordsToUnbroadcastedIndex(coordsOutput, paddedBShape, paddedBStrides);
|
|
166
173
|
// Calculate with op
|
|
167
|
-
outputValue[i] = op(tA.value[indexA], tB.value[indexB]);
|
|
174
|
+
outputValue[i] = op(tA.value[indexA + tA.offset], tB.value[indexB + tB.offset]);
|
|
168
175
|
}
|
|
169
176
|
return new Tensor(outputValue, {
|
|
170
177
|
shape: outputShape,
|
|
171
|
-
strides: outputStrides
|
|
178
|
+
strides: outputStrides,
|
|
179
|
+
numel: outputSize
|
|
172
180
|
});
|
|
173
181
|
}
|
|
174
182
|
// Utility for self-inflicting element-wise ops
|
|
175
183
|
static elementWiseSelf(tA, op) {
|
|
176
184
|
if (typeof tA.value === "number")
|
|
177
185
|
return new Tensor(op(tA.value));
|
|
178
|
-
const
|
|
179
|
-
|
|
180
|
-
|
|
186
|
+
const outputShape = tA.shape;
|
|
187
|
+
const outputStrides = Tensor.getStrides(outputShape);
|
|
188
|
+
const outputSize = tA.numel;
|
|
189
|
+
const outputValue = new Array(outputSize);
|
|
190
|
+
for (let index = 0; index < outputSize; index++) {
|
|
191
|
+
const outputCoords = Tensor.indexToCoords(index, outputStrides);
|
|
192
|
+
const originalIndex = tA.offset + Tensor.coordsToIndex(outputCoords, tA.strides);
|
|
193
|
+
outputValue[index] = op(tA.value[originalIndex]);
|
|
181
194
|
}
|
|
182
|
-
return new Tensor(
|
|
195
|
+
return new Tensor(outputValue, { shape: outputShape, strides: outputStrides, numel: tA.numel });
|
|
183
196
|
}
|
|
184
197
|
// Utility to do element-wise operation and build a dag node with another tensor
|
|
185
198
|
elementWiseABDAG(other, op, thisGrad = () => new Tensor(0), otherGrad = () => new Tensor(0)) {
|
|
@@ -260,6 +273,19 @@ class Tensor {
|
|
|
260
273
|
tensor.grad = tensor.grad.add(squeezedGrad);
|
|
261
274
|
}
|
|
262
275
|
}
|
|
276
|
+
static normalizeDims(dims, numDims) {
|
|
277
|
+
for (let index = 0; index < dims.length; index++) {
|
|
278
|
+
// Handle negative indices
|
|
279
|
+
if (dims[index] < 0) {
|
|
280
|
+
dims[index] += numDims;
|
|
281
|
+
}
|
|
282
|
+
// If dimension out of bound, throw error
|
|
283
|
+
if (dims[index] >= numDims || dims[index] < 0) {
|
|
284
|
+
throw new Error("Dimensions do not exist");
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
return dims;
|
|
288
|
+
}
|
|
263
289
|
// Contiguity-related ops
|
|
264
290
|
isContiguous() {
|
|
265
291
|
const expectedStrides = Tensor.getStrides(this.shape);
|
|
@@ -281,14 +307,14 @@ class Tensor {
|
|
|
281
307
|
if (this.isContiguous())
|
|
282
308
|
return this;
|
|
283
309
|
const outputStrides = Tensor.getStrides(this.shape);
|
|
284
|
-
const outputSize =
|
|
310
|
+
const outputSize = this.numel;
|
|
285
311
|
const outputValue = new Array(outputSize);
|
|
286
312
|
for (let index = 0; index < outputSize; index++) {
|
|
287
313
|
const outputCoords = Tensor.indexToCoords(index, outputStrides);
|
|
288
314
|
const originalIndex = Tensor.coordsToIndex(outputCoords, this.strides);
|
|
289
|
-
outputValue[index] = this.value[originalIndex];
|
|
315
|
+
outputValue[index] = this.value[this.offset + originalIndex];
|
|
290
316
|
}
|
|
291
|
-
const out = new Tensor(outputValue, { shape: this.shape, strides: outputStrides });
|
|
317
|
+
const out = new Tensor(outputValue, { shape: this.shape, strides: outputStrides, numel: outputSize });
|
|
292
318
|
// Gradient flow back to the original tensor
|
|
293
319
|
if (this.requiresGrad) {
|
|
294
320
|
out.requiresGrad = true;
|
|
@@ -301,13 +327,13 @@ class Tensor {
|
|
|
301
327
|
}
|
|
302
328
|
reshape(newShape) {
|
|
303
329
|
// Verify shape size
|
|
304
|
-
const originalSize =
|
|
330
|
+
const originalSize = this.numel;
|
|
305
331
|
const outputSize = Tensor.shapeToSize(newShape);
|
|
306
332
|
if (originalSize !== outputSize) {
|
|
307
333
|
throw new Error("Cannot reshape: incompatible sizes");
|
|
308
334
|
}
|
|
309
335
|
const outputStrides = Tensor.getStrides(newShape);
|
|
310
|
-
const out = new Tensor(this.contiguous().value, { shape: newShape, strides: outputStrides });
|
|
336
|
+
const out = new Tensor(this.contiguous().value, { shape: newShape, strides: outputStrides, numel: outputSize });
|
|
311
337
|
// Gradient reshaped and flow back to the original tensor
|
|
312
338
|
if (this.requiresGrad) {
|
|
313
339
|
out.requiresGrad = true;
|
|
@@ -318,6 +344,168 @@ class Tensor {
|
|
|
318
344
|
}
|
|
319
345
|
return out;
|
|
320
346
|
}
|
|
347
|
+
// Transpose
|
|
348
|
+
transpose(dim1, dim2) {
|
|
349
|
+
// Handle negative indices
|
|
350
|
+
if (dim1 < 0) {
|
|
351
|
+
dim1 += this.shape.length;
|
|
352
|
+
}
|
|
353
|
+
if (dim2 < 0) {
|
|
354
|
+
dim2 += this.shape.length;
|
|
355
|
+
}
|
|
356
|
+
// If dimension out of bound, throw error
|
|
357
|
+
if (dim1 >= this.shape.length || dim2 >= this.shape.length || dim1 < 0 || dim2 < 0) {
|
|
358
|
+
throw new Error("Dimensions do not exist to transpose");
|
|
359
|
+
}
|
|
360
|
+
// If same dimension, return view
|
|
361
|
+
if (dim1 === dim2)
|
|
362
|
+
return this;
|
|
363
|
+
// Create new shape and strides by swapping
|
|
364
|
+
const newShape = [...this.shape];
|
|
365
|
+
const newStrides = [...this.strides];
|
|
366
|
+
[newShape[dim1], newShape[dim2]] = [newShape[dim2], newShape[dim1]];
|
|
367
|
+
[newStrides[dim1], newStrides[dim2]] = [newStrides[dim2], newStrides[dim1]];
|
|
368
|
+
// Create new tensor with same data but swapped shape/strides
|
|
369
|
+
const out = new Tensor(this.value, {
|
|
370
|
+
shape: newShape,
|
|
371
|
+
strides: newStrides,
|
|
372
|
+
offset: this.offset,
|
|
373
|
+
numel: this.numel,
|
|
374
|
+
device: this.device
|
|
375
|
+
});
|
|
376
|
+
out.requiresGrad = this.requiresGrad;
|
|
377
|
+
// Handle gradient if needed
|
|
378
|
+
if (this.requiresGrad) {
|
|
379
|
+
out.children.push(this);
|
|
380
|
+
out.gradFn = () => {
|
|
381
|
+
Tensor.addGrad(this, out.grad.transpose(dim1, dim2));
|
|
382
|
+
};
|
|
383
|
+
}
|
|
384
|
+
return out;
|
|
385
|
+
}
|
|
386
|
+
swapaxes = this.transpose;
|
|
387
|
+
swapdims = this.transpose;
|
|
388
|
+
// Transpose 2D
|
|
389
|
+
t() {
|
|
390
|
+
// Verify matrix shape
|
|
391
|
+
if (this.shape.length !== 2) {
|
|
392
|
+
throw new Error("Input is not a matrix");
|
|
393
|
+
}
|
|
394
|
+
return this.transpose(0, 1);
|
|
395
|
+
}
|
|
396
|
+
// Permute
|
|
397
|
+
permute(dims) {
|
|
398
|
+
dims = Tensor.normalizeDims(dims, this.shape.length);
|
|
399
|
+
if (dims.length !== this.shape.length) {
|
|
400
|
+
throw new Error("Permutation must specify all dimensions");
|
|
401
|
+
}
|
|
402
|
+
// Compute new shape and strides
|
|
403
|
+
const newShape = new Array(dims.length);
|
|
404
|
+
const newStrides = new Array(dims.length);
|
|
405
|
+
for (let index = 0; index < dims.length; index++) {
|
|
406
|
+
const dim = dims[index];
|
|
407
|
+
newShape[index] = this.shape[dim];
|
|
408
|
+
newStrides[index] = this.strides[dim];
|
|
409
|
+
}
|
|
410
|
+
const out = new Tensor(this.value, {
|
|
411
|
+
shape: newShape,
|
|
412
|
+
strides: newStrides,
|
|
413
|
+
offset: this.offset,
|
|
414
|
+
numel: this.numel,
|
|
415
|
+
device: this.device
|
|
416
|
+
});
|
|
417
|
+
if (this.requiresGrad) {
|
|
418
|
+
out.requiresGrad = true;
|
|
419
|
+
out.children.push(this);
|
|
420
|
+
out.gradFn = () => {
|
|
421
|
+
// Compute inverse permutation
|
|
422
|
+
const inverseAxes = new Array(dims.length);
|
|
423
|
+
for (let i = 0; i < dims.length; i++) {
|
|
424
|
+
inverseAxes[dims[i]] = i;
|
|
425
|
+
}
|
|
426
|
+
// Permute gradient back to original order
|
|
427
|
+
const permutedGrad = out.grad.permute(inverseAxes);
|
|
428
|
+
Tensor.addGrad(this, permutedGrad);
|
|
429
|
+
};
|
|
430
|
+
}
|
|
431
|
+
return out;
|
|
432
|
+
}
|
|
433
|
+
// Tensor slicing
|
|
434
|
+
slice(ranges) {
|
|
435
|
+
// Handle scalars
|
|
436
|
+
if (typeof this.value === "number")
|
|
437
|
+
return this;
|
|
438
|
+
const newShape = [];
|
|
439
|
+
const newStrides = [];
|
|
440
|
+
let newOffset = this.offset || 0;
|
|
441
|
+
// Pad ranges to match tensor dimensions
|
|
442
|
+
const paddedRanges = [...ranges];
|
|
443
|
+
while (paddedRanges.length < this.shape.length) {
|
|
444
|
+
paddedRanges.push([]);
|
|
445
|
+
}
|
|
446
|
+
for (let i = 0; i < this.shape.length; i++) {
|
|
447
|
+
const range = paddedRanges[i] || [];
|
|
448
|
+
const dimSize = this.shape[i];
|
|
449
|
+
const stride = this.strides[i];
|
|
450
|
+
// Default values
|
|
451
|
+
let start = range[0] ?? 0;
|
|
452
|
+
let end = range[1] ?? dimSize;
|
|
453
|
+
let step = range[2] ?? 1;
|
|
454
|
+
// Handle negative indices
|
|
455
|
+
if (start < 0)
|
|
456
|
+
start += dimSize;
|
|
457
|
+
if (end < 0)
|
|
458
|
+
end += dimSize;
|
|
459
|
+
// Clamp to valid range
|
|
460
|
+
start = Math.max(0, Math.min(start, dimSize));
|
|
461
|
+
end = Math.max(0, Math.min(end, dimSize));
|
|
462
|
+
// Calculate new dimension size
|
|
463
|
+
const newDimSize = step > 0
|
|
464
|
+
? Math.max(0, Math.ceil((end - start) / step))
|
|
465
|
+
: Math.max(0, Math.ceil((start - end) / Math.abs(step)));
|
|
466
|
+
newShape.push(newDimSize);
|
|
467
|
+
newStrides.push(stride * step);
|
|
468
|
+
newOffset += start * stride;
|
|
469
|
+
}
|
|
470
|
+
const out = new Tensor(this.value, {
|
|
471
|
+
shape: newShape,
|
|
472
|
+
strides: newStrides,
|
|
473
|
+
offset: newOffset,
|
|
474
|
+
device: this.device
|
|
475
|
+
});
|
|
476
|
+
if (this.requiresGrad) {
|
|
477
|
+
out.requiresGrad = true;
|
|
478
|
+
out.children.push(this);
|
|
479
|
+
out.gradFn = () => {
|
|
480
|
+
// Create zero tensor of original shape
|
|
481
|
+
const zeroGrad = Tensor.zerosLike(this);
|
|
482
|
+
// Upstream grad
|
|
483
|
+
const outGrad = out.grad;
|
|
484
|
+
const totalElements = outGrad.numel;
|
|
485
|
+
for (let i = 0; i < totalElements; i++) {
|
|
486
|
+
// Convert flat index to coordinates in sliced tensor
|
|
487
|
+
const slicedCoords = Tensor.indexToCoords(i, outGrad.strides);
|
|
488
|
+
// Map back to original coordinates
|
|
489
|
+
const originalCoords = new Array(slicedCoords.length);
|
|
490
|
+
for (let dim = 0; dim < slicedCoords.length; dim++) {
|
|
491
|
+
const coord = slicedCoords[dim];
|
|
492
|
+
const range = paddedRanges[dim] || [];
|
|
493
|
+
const start = range[0] ?? 0;
|
|
494
|
+
const step = range[2] ?? 1;
|
|
495
|
+
const normalizedStart = start < 0 ? start + this.shape[dim] : start;
|
|
496
|
+
originalCoords[dim] = normalizedStart + coord * step;
|
|
497
|
+
}
|
|
498
|
+
// Get flat indices with offsets
|
|
499
|
+
const srcIndex = Tensor.coordsToIndex(slicedCoords, outGrad.strides) + outGrad.offset;
|
|
500
|
+
const targetIndex = Tensor.coordsToIndex(originalCoords, zeroGrad.strides) + zeroGrad.offset;
|
|
501
|
+
// Accumulate gradient
|
|
502
|
+
zeroGrad.value[targetIndex] += outGrad.value[srcIndex];
|
|
503
|
+
}
|
|
504
|
+
Tensor.addGrad(this, zeroGrad);
|
|
505
|
+
};
|
|
506
|
+
}
|
|
507
|
+
return out;
|
|
508
|
+
}
|
|
321
509
|
// Tensor squeeze
|
|
322
510
|
squeeze(dims) {
|
|
323
511
|
if (typeof this.value === "number")
|
|
@@ -334,6 +522,7 @@ class Tensor {
|
|
|
334
522
|
}
|
|
335
523
|
}
|
|
336
524
|
}
|
|
525
|
+
dims = Tensor.normalizeDims(dims, this.shape.length);
|
|
337
526
|
// Remove size-1 dims only
|
|
338
527
|
const outShape = [], outStrides = [];
|
|
339
528
|
for (let index = 0; index < this.shape.length; index++) {
|
|
@@ -348,10 +537,11 @@ class Tensor {
|
|
|
348
537
|
outStrides.push(stride);
|
|
349
538
|
}
|
|
350
539
|
}
|
|
351
|
-
const outValue = outShape.length === 0 ? this.value[
|
|
540
|
+
const outValue = outShape.length === 0 ? this.value[this.offset] : this.value;
|
|
352
541
|
const out = new Tensor(outValue, {
|
|
353
542
|
shape: outShape,
|
|
354
543
|
strides: outStrides,
|
|
544
|
+
offset: this.offset,
|
|
355
545
|
device: this.device
|
|
356
546
|
});
|
|
357
547
|
// Set up gradient if needed
|
|
@@ -370,6 +560,10 @@ class Tensor {
|
|
|
370
560
|
}
|
|
371
561
|
// Tensor unsqueeze - adds dimension of size 1 at specified position
|
|
372
562
|
unsqueeze(dim) {
|
|
563
|
+
// Handle negative indices
|
|
564
|
+
if (dim < 0) {
|
|
565
|
+
dim += this.shape.length;
|
|
566
|
+
}
|
|
373
567
|
let thisValue = this.value;
|
|
374
568
|
if (typeof thisValue === "number") {
|
|
375
569
|
thisValue = [thisValue];
|
|
@@ -389,7 +583,12 @@ class Tensor {
|
|
|
389
583
|
newDimStride = this.strides[dim] * this.shape[dim];
|
|
390
584
|
}
|
|
391
585
|
newStrides.splice(dim, 0, newDimStride);
|
|
392
|
-
const out = new Tensor(thisValue, {
|
|
586
|
+
const out = new Tensor(thisValue, {
|
|
587
|
+
shape: newShape,
|
|
588
|
+
strides: newStrides,
|
|
589
|
+
offset: this.offset,
|
|
590
|
+
device: this.device
|
|
591
|
+
});
|
|
393
592
|
// Set up gradient if needed
|
|
394
593
|
if (this.requiresGrad) {
|
|
395
594
|
out.requiresGrad = true;
|
|
@@ -400,325 +599,146 @@ class Tensor {
|
|
|
400
599
|
}
|
|
401
600
|
return out;
|
|
402
601
|
}
|
|
403
|
-
//
|
|
404
|
-
|
|
405
|
-
if (typeof
|
|
406
|
-
return
|
|
602
|
+
// Generic reduction operation handler
|
|
603
|
+
static reduce(tensor, dims, keepDims, config) {
|
|
604
|
+
if (typeof tensor.value === "number")
|
|
605
|
+
return tensor;
|
|
407
606
|
if (typeof dims === "undefined") {
|
|
408
|
-
dims = Array.from({ length:
|
|
607
|
+
dims = Array.from({ length: tensor.shape.length }, (_, index) => index);
|
|
409
608
|
}
|
|
410
609
|
if (Array.isArray(dims)) {
|
|
411
|
-
|
|
610
|
+
dims = Tensor.normalizeDims(dims, tensor.shape.length);
|
|
412
611
|
const sortedDims = dims.sort((a, b) => b - a);
|
|
413
|
-
let reducedThis =
|
|
612
|
+
let reducedThis = tensor;
|
|
414
613
|
for (let i = 0; i < sortedDims.length; i++) {
|
|
415
|
-
reducedThis =
|
|
614
|
+
reducedThis = Tensor.reduce(reducedThis, sortedDims[i], true, config);
|
|
416
615
|
}
|
|
417
616
|
return keepDims ? reducedThis : reducedThis.squeeze(dims);
|
|
418
617
|
}
|
|
419
|
-
|
|
420
|
-
const outputShape = this.shape.map((dim, i) => dims === i ? 1 : dim);
|
|
618
|
+
const outputShape = tensor.shape.map((dim, i) => dims === i ? 1 : dim);
|
|
421
619
|
const outputStrides = Tensor.getStrides(outputShape);
|
|
422
620
|
const outputSize = Tensor.shapeToSize(outputShape);
|
|
423
|
-
const outputValue = new Array(outputSize).fill(
|
|
424
|
-
const
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
// Mark for gradient if needed
|
|
443
|
-
if (this.requiresGrad) {
|
|
444
|
-
gradValue[realFlatIndex] = 1;
|
|
621
|
+
const outputValue = new Array(outputSize).fill(config.identity);
|
|
622
|
+
const outputCounters = config.needsCounters ? new Array(outputSize).fill(0) : [];
|
|
623
|
+
const originalSize = tensor.numel;
|
|
624
|
+
const originalValue = tensor.value;
|
|
625
|
+
const linearStrides = Tensor.getStrides(tensor.shape);
|
|
626
|
+
// Forward pass
|
|
627
|
+
for (let flatIndex = 0; flatIndex < originalSize; flatIndex++) {
|
|
628
|
+
// Convert linear index to coordinates using contiguous strides
|
|
629
|
+
const coords = Tensor.indexToCoords(flatIndex, linearStrides);
|
|
630
|
+
// Convert coordinates to actual strided index
|
|
631
|
+
const realFlatIndex = Tensor.coordsToIndex(coords, tensor.strides) + tensor.offset;
|
|
632
|
+
// Convert coords to reduced index
|
|
633
|
+
coords[dims] = 0;
|
|
634
|
+
const outFlatIndex = Tensor.coordsToIndex(coords, outputStrides);
|
|
635
|
+
// Apply op
|
|
636
|
+
outputValue[outFlatIndex] = config.operation(outputValue[outFlatIndex], originalValue[realFlatIndex]);
|
|
637
|
+
// Count el if needed
|
|
638
|
+
if (config.needsCounters) {
|
|
639
|
+
outputCounters[outFlatIndex]++;
|
|
445
640
|
}
|
|
446
641
|
}
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
}
|
|
451
|
-
|
|
452
|
-
|
|
642
|
+
// Post-process if needed (e.g., divide by count for mean)
|
|
643
|
+
if (config.postProcess) {
|
|
644
|
+
config.postProcess({ values: outputValue, counters: outputCounters });
|
|
645
|
+
}
|
|
646
|
+
const out = new Tensor(outputValue, { shape: outputShape, strides: outputStrides, numel: outputSize });
|
|
647
|
+
// Gradient setup
|
|
648
|
+
if (tensor.requiresGrad) {
|
|
453
649
|
out.requiresGrad = true;
|
|
454
|
-
out.children.push(
|
|
650
|
+
out.children.push(tensor);
|
|
455
651
|
out.gradFn = () => {
|
|
456
|
-
|
|
457
|
-
|
|
652
|
+
let shareCounts = [];
|
|
653
|
+
if (config.needsShareCounts) {
|
|
654
|
+
shareCounts = new Array(outputSize).fill(0);
|
|
655
|
+
for (let flatIndex = 0; flatIndex < originalSize; flatIndex++) {
|
|
656
|
+
// Convert linear index to coordinates using contiguous strides
|
|
657
|
+
const coords = Tensor.indexToCoords(flatIndex, linearStrides);
|
|
658
|
+
// Convert coordinates to actual strided index
|
|
659
|
+
const realFlatIndex = Tensor.coordsToIndex(coords, tensor.strides) + tensor.offset;
|
|
660
|
+
// Convert coords to reduced index
|
|
661
|
+
coords[dims] = 0;
|
|
662
|
+
const outFlatIndex = Tensor.coordsToIndex(coords, outputStrides);
|
|
663
|
+
// We collect how many elements share the same max value first
|
|
664
|
+
shareCounts[outFlatIndex] += outputValue[outFlatIndex] === originalValue[realFlatIndex] ? 1 : 0;
|
|
665
|
+
}
|
|
666
|
+
}
|
|
667
|
+
const gradValue = new Array(originalSize);
|
|
668
|
+
for (let flatIndex = 0; flatIndex < originalSize; flatIndex++) {
|
|
669
|
+
// Convert linear index to coordinates using contiguous strides
|
|
670
|
+
const coords = Tensor.indexToCoords(flatIndex, linearStrides);
|
|
671
|
+
// Convert coordinates to actual strided index
|
|
672
|
+
const realFlatIndex = Tensor.coordsToIndex(coords, tensor.strides) + tensor.offset;
|
|
673
|
+
// Convert coords to reduced index
|
|
674
|
+
coords[dims] = 0;
|
|
675
|
+
const outFlatIndex = Tensor.coordsToIndex(coords, outputStrides);
|
|
676
|
+
gradValue[flatIndex] = config.gradientFn({
|
|
677
|
+
outputValue,
|
|
678
|
+
originalValue: tensor.value,
|
|
679
|
+
counters: outputCounters,
|
|
680
|
+
shareCounts,
|
|
681
|
+
realIndex: realFlatIndex,
|
|
682
|
+
outIndex: outFlatIndex
|
|
683
|
+
});
|
|
684
|
+
}
|
|
685
|
+
const localGrad = new Tensor(gradValue, { shape: tensor.shape, numel: tensor.numel });
|
|
686
|
+
Tensor.addGrad(tensor, out.grad.mul(localGrad));
|
|
458
687
|
};
|
|
459
688
|
}
|
|
460
689
|
return keepDims ? out : out.squeeze(dims);
|
|
461
690
|
}
|
|
462
|
-
//
|
|
691
|
+
// Simplified reduction operations
|
|
692
|
+
sum(dims, keepDims = false) {
|
|
693
|
+
return Tensor.reduce(this, dims, keepDims, {
|
|
694
|
+
identity: 0,
|
|
695
|
+
operation: (a, b) => a + b,
|
|
696
|
+
gradientFn: ({}) => 1
|
|
697
|
+
});
|
|
698
|
+
}
|
|
463
699
|
prod(dims, keepDims = false) {
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
}
|
|
469
|
-
if (Array.isArray(dims)) {
|
|
470
|
-
// Sort in descending order
|
|
471
|
-
const sortedDims = dims.sort((a, b) => b - a);
|
|
472
|
-
let reducedThis = this;
|
|
473
|
-
for (let i = 0; i < sortedDims.length; i++) {
|
|
474
|
-
reducedThis = reducedThis.prod(sortedDims[i], true);
|
|
475
|
-
}
|
|
476
|
-
return keepDims ? reducedThis : reducedThis.squeeze(dims);
|
|
477
|
-
}
|
|
478
|
-
// Dims that are reduced now have size-1
|
|
479
|
-
const outputShape = this.shape.map((dim, i) => dims === i ? 1 : dim);
|
|
480
|
-
const outputStrides = Tensor.getStrides(outputShape);
|
|
481
|
-
const outputSize = Tensor.shapeToSize(outputShape);
|
|
482
|
-
const outputValue = new Array(outputSize).fill(1);
|
|
483
|
-
const originalSize = Tensor.shapeToSize(this.shape);
|
|
484
|
-
// Calculate new value after multiplying
|
|
485
|
-
for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
|
|
486
|
-
const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
|
|
487
|
-
// Force 0 on reduced axes to collapse into size-1 dims
|
|
488
|
-
const outCoords = coords.map((val, i) => dims === i ? 0 : val);
|
|
489
|
-
// Convert output coordinates to flat index
|
|
490
|
-
const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
|
|
491
|
-
// Multiply into product
|
|
492
|
-
outputValue[outFlatIndex] *= this.value[realFlatIndex];
|
|
493
|
-
}
|
|
494
|
-
const out = new Tensor(outputValue, {
|
|
495
|
-
shape: outputShape,
|
|
496
|
-
strides: outputStrides
|
|
700
|
+
return Tensor.reduce(this, dims, keepDims, {
|
|
701
|
+
identity: 1,
|
|
702
|
+
operation: (a, b) => a * b,
|
|
703
|
+
gradientFn: ({ outputValue, originalValue, realIndex, outIndex }) => outputValue[outIndex] / originalValue[realIndex]
|
|
497
704
|
});
|
|
498
|
-
// Set up gradient if needed
|
|
499
|
-
if (this.requiresGrad) {
|
|
500
|
-
out.requiresGrad = true;
|
|
501
|
-
out.children.push(this);
|
|
502
|
-
out.gradFn = () => {
|
|
503
|
-
const gradShape = this.shape, gradStrides = this.strides, gradValue = new Array(originalSize).fill(0);
|
|
504
|
-
for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
|
|
505
|
-
const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
|
|
506
|
-
// Force 0 on reduced axes to collapse into size-1 dims
|
|
507
|
-
const outCoords = coords.map((val, i) => dims === i ? 0 : val);
|
|
508
|
-
// Convert output coordinates to flat index
|
|
509
|
-
const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
|
|
510
|
-
// Grad is the product of other elements of the same axis, which is product of all els divided by the current value
|
|
511
|
-
gradValue[realFlatIndex] = outputValue[outFlatIndex] / this.value[realFlatIndex];
|
|
512
|
-
}
|
|
513
|
-
const localGrad = new Tensor(gradValue, { shape: gradShape, strides: gradStrides });
|
|
514
|
-
Tensor.addGrad(this, out.grad.mul(localGrad));
|
|
515
|
-
};
|
|
516
|
-
}
|
|
517
|
-
return keepDims ? out : out.squeeze(dims);
|
|
518
705
|
}
|
|
519
|
-
// Tensor mean reduction
|
|
520
706
|
mean(dims, keepDims = false) {
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
const sortedDims = dims.sort((a, b) => b - a);
|
|
529
|
-
let reducedThis = this;
|
|
530
|
-
for (let i = 0; i < sortedDims.length; i++) {
|
|
531
|
-
reducedThis = reducedThis.mean(sortedDims[i], true);
|
|
532
|
-
}
|
|
533
|
-
return keepDims ? reducedThis : reducedThis.squeeze(dims);
|
|
534
|
-
}
|
|
535
|
-
// Dims that are reduced now have size-1
|
|
536
|
-
const outputShape = this.shape.map((dim, i) => dims === i ? 1 : dim);
|
|
537
|
-
const outputStrides = Tensor.getStrides(outputShape);
|
|
538
|
-
const outputSize = Tensor.shapeToSize(outputShape);
|
|
539
|
-
const outputValue = new Array(outputSize).fill(0);
|
|
540
|
-
const outputFeeders = new Array(outputSize).fill(0);
|
|
541
|
-
const originalSize = Tensor.shapeToSize(this.shape);
|
|
542
|
-
// Calculate sums and how many elements contribute to specific positions
|
|
543
|
-
for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
|
|
544
|
-
const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
|
|
545
|
-
// Force 0 on reduced axes to collapse into size-1 dims
|
|
546
|
-
const outCoords = coords.map((val, i) => dims === i ? 0 : val);
|
|
547
|
-
// Convert output coordinates to flat index
|
|
548
|
-
const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
|
|
549
|
-
// Calculate sum and contributors to the sum
|
|
550
|
-
outputValue[outFlatIndex] += this.value[realFlatIndex];
|
|
551
|
-
outputFeeders[outFlatIndex]++;
|
|
552
|
-
}
|
|
553
|
-
// Calculate mean by dividing sum by the number of contributors to the position
|
|
554
|
-
for (let index = 0; index < outputSize; index++) {
|
|
555
|
-
outputValue[index] /= outputFeeders[index];
|
|
556
|
-
}
|
|
557
|
-
const out = new Tensor(outputValue, {
|
|
558
|
-
shape: outputShape,
|
|
559
|
-
strides: outputStrides
|
|
560
|
-
});
|
|
561
|
-
// Set up gradient if needed
|
|
562
|
-
if (this.requiresGrad) {
|
|
563
|
-
out.requiresGrad = true;
|
|
564
|
-
out.children.push(this);
|
|
565
|
-
out.gradFn = () => {
|
|
566
|
-
const gradShape = this.shape, gradStrides = this.strides, gradValue = new Array(originalSize).fill(0);
|
|
567
|
-
// Calculate grad by assigning 1 divided by the number of contributors to the position
|
|
568
|
-
for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
|
|
569
|
-
const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
|
|
570
|
-
// Force 0 on reduced axes to collapse into size-1 dims
|
|
571
|
-
const outCoords = coords.map((val, i) => dims === i ? 0 : val);
|
|
572
|
-
// Convert output coordinates to flat index
|
|
573
|
-
const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
|
|
574
|
-
// Mean = 1/n * (el1 + el2 + ... + eln) so grad = 1/n
|
|
575
|
-
gradValue[realFlatIndex] = 1 / outputFeeders[outFlatIndex];
|
|
707
|
+
return Tensor.reduce(this, dims, keepDims, {
|
|
708
|
+
identity: 0,
|
|
709
|
+
operation: (a, b) => a + b,
|
|
710
|
+
needsCounters: true,
|
|
711
|
+
postProcess: ({ values, counters }) => {
|
|
712
|
+
for (let i = 0; i < values.length; i++) {
|
|
713
|
+
values[i] /= counters[i];
|
|
576
714
|
}
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
}
|
|
581
|
-
return keepDims ? out : out.squeeze(dims);
|
|
715
|
+
},
|
|
716
|
+
gradientFn: ({ counters, outIndex }) => 1 / counters[outIndex]
|
|
717
|
+
});
|
|
582
718
|
}
|
|
583
|
-
// Tensor maximum reduction
|
|
584
719
|
max(dims, keepDims = false) {
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
if (Array.isArray(dims)) {
|
|
591
|
-
// Sort in descending order
|
|
592
|
-
const sortedDims = dims.sort((a, b) => b - a);
|
|
593
|
-
let reducedThis = this;
|
|
594
|
-
for (let i = 0; i < sortedDims.length; i++) {
|
|
595
|
-
reducedThis = reducedThis.max(sortedDims[i], true);
|
|
596
|
-
}
|
|
597
|
-
return keepDims ? reducedThis : reducedThis.squeeze(dims);
|
|
598
|
-
}
|
|
599
|
-
// Dims that are reduced now have size-1
|
|
600
|
-
const outputShape = this.shape.map((dim, i) => dims === i ? 1 : dim);
|
|
601
|
-
const outputStrides = Tensor.getStrides(outputShape);
|
|
602
|
-
const outputSize = Tensor.shapeToSize(outputShape);
|
|
603
|
-
const outputValue = new Array(outputSize).fill(-Infinity);
|
|
604
|
-
const originalSize = Tensor.shapeToSize(this.shape);
|
|
605
|
-
// Calculate maximum values of axes
|
|
606
|
-
for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
|
|
607
|
-
const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
|
|
608
|
-
// Force 0 on reduced axes to collapse into size-1 dims
|
|
609
|
-
const outCoords = coords.map((val, i) => dims === i ? 0 : val);
|
|
610
|
-
// Convert output coordinates to flat index
|
|
611
|
-
const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
|
|
612
|
-
// Get max over time
|
|
613
|
-
if (this.value[realFlatIndex] > outputValue[outFlatIndex]) {
|
|
614
|
-
outputValue[outFlatIndex] = this.value[realFlatIndex];
|
|
615
|
-
}
|
|
616
|
-
}
|
|
617
|
-
const out = new Tensor(outputValue, {
|
|
618
|
-
shape: outputShape,
|
|
619
|
-
strides: outputStrides
|
|
720
|
+
return Tensor.reduce(this, dims, keepDims, {
|
|
721
|
+
identity: -Infinity,
|
|
722
|
+
operation: (a, b) => Math.max(a, b),
|
|
723
|
+
needsShareCounts: true,
|
|
724
|
+
gradientFn: ({ outputValue, originalValue, shareCounts, realIndex, outIndex }) => outputValue[outIndex] === originalValue[realIndex] ? 1 / shareCounts[outIndex] : 0
|
|
620
725
|
});
|
|
621
|
-
// Set up gradient if needed
|
|
622
|
-
if (this.requiresGrad) {
|
|
623
|
-
out.requiresGrad = true;
|
|
624
|
-
out.children.push(this);
|
|
625
|
-
out.gradFn = () => {
|
|
626
|
-
const gradShape = this.shape, gradStrides = this.strides, gradValue = new Array(originalSize).fill(0);
|
|
627
|
-
const shareCounts = new Array(outputSize).fill(0);
|
|
628
|
-
const originalValue = this.value;
|
|
629
|
-
for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
|
|
630
|
-
const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
|
|
631
|
-
// Force 0 on reduced axes to collapse into size-1 dims
|
|
632
|
-
const outCoords = coords.map((val, i) => dims === i ? 0 : val);
|
|
633
|
-
// Convert output coordinates to flat index
|
|
634
|
-
const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
|
|
635
|
-
// We collect how many elements share the same max value first
|
|
636
|
-
shareCounts[outFlatIndex] += outputValue[outFlatIndex] === originalValue[realFlatIndex] ? 1 : 0;
|
|
637
|
-
}
|
|
638
|
-
for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
|
|
639
|
-
const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
|
|
640
|
-
// Force 0 on reduced axes to collapse into size-1 dims
|
|
641
|
-
const outCoords = coords.map((val, i) => dims === i ? 0 : val);
|
|
642
|
-
// Convert output coordinates to flat index
|
|
643
|
-
const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
|
|
644
|
-
// Here we share the grad between the elements that share the same max value
|
|
645
|
-
gradValue[realFlatIndex] = outputValue[outFlatIndex] === originalValue[realFlatIndex] ? 1 / shareCounts[outFlatIndex] : 0;
|
|
646
|
-
}
|
|
647
|
-
const localGrad = new Tensor(gradValue, { shape: gradShape, strides: gradStrides });
|
|
648
|
-
Tensor.addGrad(this, out.grad.mul(localGrad));
|
|
649
|
-
};
|
|
650
|
-
}
|
|
651
|
-
return keepDims ? out : out.squeeze(dims);
|
|
652
726
|
}
|
|
653
|
-
// Tensor minimum reduction
|
|
654
727
|
min(dims, keepDims = false) {
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
if (Array.isArray(dims)) {
|
|
661
|
-
// Sort in descending order
|
|
662
|
-
const sortedDims = dims.sort((a, b) => b - a);
|
|
663
|
-
let reducedThis = this;
|
|
664
|
-
for (let i = 0; i < sortedDims.length; i++) {
|
|
665
|
-
reducedThis = reducedThis.min(sortedDims[i], true);
|
|
666
|
-
}
|
|
667
|
-
return keepDims ? reducedThis : reducedThis.squeeze(dims);
|
|
668
|
-
}
|
|
669
|
-
// Dims that are reduced now have size-1
|
|
670
|
-
const outputShape = this.shape.map((dim, i) => dims === i ? 1 : dim);
|
|
671
|
-
const outputStrides = Tensor.getStrides(outputShape);
|
|
672
|
-
const outputSize = Tensor.shapeToSize(outputShape);
|
|
673
|
-
const outputValue = new Array(outputSize).fill(Infinity);
|
|
674
|
-
const originalSize = Tensor.shapeToSize(this.shape);
|
|
675
|
-
// Calculate minimum values of axes
|
|
676
|
-
for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
|
|
677
|
-
const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
|
|
678
|
-
// Force 0 on reduced axes to collapse into size-1 dims
|
|
679
|
-
const outCoords = coords.map((val, i) => dims === i ? 0 : val);
|
|
680
|
-
// Convert output coordinates to flat index
|
|
681
|
-
const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
|
|
682
|
-
// Get min over time
|
|
683
|
-
if (this.value[realFlatIndex] < outputValue[outFlatIndex]) {
|
|
684
|
-
outputValue[outFlatIndex] = this.value[realFlatIndex];
|
|
685
|
-
}
|
|
686
|
-
}
|
|
687
|
-
const out = new Tensor(outputValue, {
|
|
688
|
-
shape: outputShape,
|
|
689
|
-
strides: outputStrides
|
|
728
|
+
return Tensor.reduce(this, dims, keepDims, {
|
|
729
|
+
identity: Infinity,
|
|
730
|
+
operation: (a, b) => Math.min(a, b),
|
|
731
|
+
needsShareCounts: true,
|
|
732
|
+
gradientFn: ({ outputValue, originalValue, shareCounts, realIndex, outIndex }) => outputValue[outIndex] === originalValue[realIndex] ? 1 / shareCounts[outIndex] : 0
|
|
690
733
|
});
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
|
|
700
|
-
const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
|
|
701
|
-
// Force 0 on reduced axes to collapse into size-1 dims
|
|
702
|
-
const outCoords = coords.map((val, i) => dims === i ? 0 : val);
|
|
703
|
-
// Convert output coordinates to flat index
|
|
704
|
-
const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
|
|
705
|
-
// We collect how many elements share the same min value first
|
|
706
|
-
shareCounts[outFlatIndex] += outputValue[outFlatIndex] === originalValue[realFlatIndex] ? 1 : 0;
|
|
707
|
-
}
|
|
708
|
-
for (let realFlatIndex = 0; realFlatIndex < originalSize; realFlatIndex++) {
|
|
709
|
-
const coords = Tensor.indexToCoords(realFlatIndex, this.strides);
|
|
710
|
-
// Force 0 on reduced axes to collapse into size-1 dims
|
|
711
|
-
const outCoords = coords.map((val, i) => dims === i ? 0 : val);
|
|
712
|
-
// Convert output coordinates to flat index
|
|
713
|
-
const outFlatIndex = Tensor.coordsToIndex(outCoords, outputStrides);
|
|
714
|
-
// Here we share the grad between the elements that share the same min value
|
|
715
|
-
gradValue[realFlatIndex] = outputValue[outFlatIndex] === originalValue[realFlatIndex] ? 1 / shareCounts[outFlatIndex] : 0;
|
|
716
|
-
}
|
|
717
|
-
const localGrad = new Tensor(gradValue, { shape: gradShape, strides: gradStrides });
|
|
718
|
-
Tensor.addGrad(this, out.grad.mul(localGrad));
|
|
719
|
-
};
|
|
720
|
-
}
|
|
721
|
-
return keepDims ? out : out.squeeze(dims);
|
|
734
|
+
}
|
|
735
|
+
// Tensor all condition reduction
|
|
736
|
+
all(dims, keepDims = false) {
|
|
737
|
+
return this.min(dims, keepDims).ne(0);
|
|
738
|
+
}
|
|
739
|
+
// Tensor any condition reduction
|
|
740
|
+
any(dims, keepDims = false) {
|
|
741
|
+
return this.max(dims, keepDims).ne(0);
|
|
722
742
|
}
|
|
723
743
|
// Tensor variance reduction
|
|
724
744
|
var(dims, keepDims = false) {
|
|
@@ -730,75 +750,18 @@ class Tensor {
|
|
|
730
750
|
std(dims, keepDims = false) {
|
|
731
751
|
return this.var(dims, keepDims).sqrt();
|
|
732
752
|
}
|
|
733
|
-
// Tensor
|
|
734
|
-
softmax(
|
|
753
|
+
// Tensor softmax
|
|
754
|
+
softmax(dim = -1) {
|
|
735
755
|
if (typeof this.value === "number")
|
|
736
756
|
return this;
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
reducedThis = reducedThis.softmax(sortedDims[i]);
|
|
746
|
-
}
|
|
747
|
-
return reducedThis;
|
|
748
|
-
}
|
|
749
|
-
// Dims that are reduced now have size-1
|
|
750
|
-
const expSumShape = this.shape.map((dim, i) => dims === i ? 1 : dim);
|
|
751
|
-
const expSumStrides = Tensor.getStrides(expSumShape);
|
|
752
|
-
const expSumSize = Tensor.shapeToSize(expSumShape);
|
|
753
|
-
const expSumValue = new Array(expSumSize).fill(0);
|
|
754
|
-
const outputShape = this.shape;
|
|
755
|
-
const outputStrides = this.strides;
|
|
756
|
-
const outputSize = Tensor.shapeToSize(outputShape);
|
|
757
|
-
const outputValue = new Array(outputSize);
|
|
758
|
-
// Calculate sums of e^xi over axes
|
|
759
|
-
for (let realFlatIndex = 0; realFlatIndex < outputSize; realFlatIndex++) {
|
|
760
|
-
const coords = Tensor.indexToCoords(realFlatIndex, outputStrides);
|
|
761
|
-
// Force 0 on reduced axes to collapse into size-1 dims
|
|
762
|
-
const expSumCoords = coords.map((val, i) => dims === i ? 0 : val);
|
|
763
|
-
// Convert exp sum coordinates to flat index
|
|
764
|
-
const expSumFlatIndex = Tensor.coordsToIndex(expSumCoords, expSumStrides);
|
|
765
|
-
// Add e^x to the sum cache
|
|
766
|
-
expSumValue[expSumFlatIndex] += Math.exp(this.value[realFlatIndex]);
|
|
767
|
-
}
|
|
768
|
-
// Calculate e^xi / sum over axes
|
|
769
|
-
for (let realFlatIndex = 0; realFlatIndex < outputSize; realFlatIndex++) {
|
|
770
|
-
const coords = Tensor.indexToCoords(realFlatIndex, outputStrides);
|
|
771
|
-
// Force 0 on reduced axes to collapse into size-1 dims
|
|
772
|
-
const expSumCoords = coords.map((val, i) => dims === i ? 0 : val);
|
|
773
|
-
// Convert exp sum coordinates to flat index
|
|
774
|
-
const expSumFlatIndex = Tensor.coordsToIndex(expSumCoords, expSumStrides);
|
|
775
|
-
// Calculate e^xi / sum
|
|
776
|
-
outputValue[realFlatIndex] = Math.exp(this.value[realFlatIndex]) / expSumValue[expSumFlatIndex];
|
|
777
|
-
}
|
|
778
|
-
const out = new Tensor(outputValue, {
|
|
779
|
-
shape: outputShape,
|
|
780
|
-
strides: outputStrides
|
|
781
|
-
});
|
|
782
|
-
// Set up gradient if needed
|
|
783
|
-
if (this.requiresGrad) {
|
|
784
|
-
out.requiresGrad = true;
|
|
785
|
-
out.children.push(this);
|
|
786
|
-
out.gradFn = () => {
|
|
787
|
-
const upstreamGrad = out.grad;
|
|
788
|
-
const softmaxOutput = out.detach();
|
|
789
|
-
// Compute element-wise product: ∂L/∂σᵢ × σᵢ
|
|
790
|
-
const gradTimesOutput = upstreamGrad.mul(softmaxOutput);
|
|
791
|
-
// Sum over softmax dimensions: Σᵢ(∂L/∂σᵢ × σᵢ)
|
|
792
|
-
const sumGradOutput = gradTimesOutput.sum(dims, true); // keepDims=true for broadcasting
|
|
793
|
-
// Apply softmax gradient formula:
|
|
794
|
-
// ∂L/∂zⱼ = (∂L/∂σⱼ × σⱼ) - (σⱼ × Σᵢ(∂L/∂σᵢ × σᵢ))
|
|
795
|
-
const term1 = upstreamGrad.mul(softmaxOutput); // ∂L/∂σⱼ × σⱼ
|
|
796
|
-
const term2 = softmaxOutput.mul(sumGradOutput); // σⱼ × Σᵢ(∂L/∂σᵢ × σᵢ)
|
|
797
|
-
const localGrad = term1.sub(term2);
|
|
798
|
-
Tensor.addGrad(this, localGrad);
|
|
799
|
-
};
|
|
800
|
-
}
|
|
801
|
-
return out;
|
|
757
|
+
// Handle negative indexing
|
|
758
|
+
if (dim < 0)
|
|
759
|
+
dim = this.shape.length + dim;
|
|
760
|
+
const maxVals = this.max(dim, true);
|
|
761
|
+
const shifted = this.sub(maxVals);
|
|
762
|
+
const expVals = shifted.exp();
|
|
763
|
+
const sumExp = expVals.sum(dim, true);
|
|
764
|
+
return expVals.div(sumExp);
|
|
802
765
|
}
|
|
803
766
|
// Tensor element-wise addition
|
|
804
767
|
add(other) {
|
|
@@ -1131,76 +1094,6 @@ class Tensor {
|
|
|
1131
1094
|
erfinv() {
|
|
1132
1095
|
return this.elementWiseSelfDAG((a) => (0, utils_1.erfinv)(a), (self, outGrad) => outGrad.mul(self.erfinv().square().exp().mul(Math.sqrt(Math.PI) / 2)));
|
|
1133
1096
|
}
|
|
1134
|
-
// Transpose
|
|
1135
|
-
transpose(dim1, dim2) {
|
|
1136
|
-
// If dimension out of bound, throw error
|
|
1137
|
-
if (dim1 >= this.shape.length || dim2 >= this.shape.length || dim1 < 0 || dim2 < 0) {
|
|
1138
|
-
throw new Error("Dimensions do not exist to tranpose");
|
|
1139
|
-
}
|
|
1140
|
-
// If same dimension, return copy
|
|
1141
|
-
if (dim1 === dim2) {
|
|
1142
|
-
return new Tensor(this.value, { shape: this.shape, strides: this.strides });
|
|
1143
|
-
}
|
|
1144
|
-
// Create new shape and strides by swapping
|
|
1145
|
-
const newShape = [...this.shape];
|
|
1146
|
-
const newStrides = [...this.strides];
|
|
1147
|
-
[newShape[dim1], newShape[dim2]] = [newShape[dim2], newShape[dim1]];
|
|
1148
|
-
[newStrides[dim1], newStrides[dim2]] = [newStrides[dim2], newStrides[dim1]];
|
|
1149
|
-
// Create new tensor with same data but swapped shape/strides
|
|
1150
|
-
const out = new Tensor(this.value, { shape: newShape, strides: newStrides, device: this.device });
|
|
1151
|
-
out.requiresGrad = this.requiresGrad;
|
|
1152
|
-
// Handle gradient if needed
|
|
1153
|
-
if (this.requiresGrad) {
|
|
1154
|
-
out.children.push(this);
|
|
1155
|
-
out.gradFn = () => {
|
|
1156
|
-
Tensor.addGrad(this, out.grad.transpose(dim1, dim2));
|
|
1157
|
-
};
|
|
1158
|
-
}
|
|
1159
|
-
return out;
|
|
1160
|
-
}
|
|
1161
|
-
swapaxes = this.transpose;
|
|
1162
|
-
swapdims = this.transpose;
|
|
1163
|
-
// Transpose 2D
|
|
1164
|
-
t() {
|
|
1165
|
-
// Verify matrix shape
|
|
1166
|
-
if (this.shape.length !== 2) {
|
|
1167
|
-
throw new Error("Input is not a matrix");
|
|
1168
|
-
}
|
|
1169
|
-
return this.transpose(0, 1);
|
|
1170
|
-
}
|
|
1171
|
-
// Permute
|
|
1172
|
-
permute(dims) {
|
|
1173
|
-
if (dims.length !== this.shape.length) {
|
|
1174
|
-
throw new Error("Permutation must specify all dimensions");
|
|
1175
|
-
}
|
|
1176
|
-
// Compute new shape and strides
|
|
1177
|
-
const newShape = new Array(dims.length);
|
|
1178
|
-
const newStrides = new Array(dims.length);
|
|
1179
|
-
for (let index = 0; index < dims.length; index++) {
|
|
1180
|
-
const dim = dims[index];
|
|
1181
|
-
newShape[index] = this.shape[dim];
|
|
1182
|
-
newStrides[index] = this.strides[dim];
|
|
1183
|
-
}
|
|
1184
|
-
const out = new Tensor(this.value, {
|
|
1185
|
-
shape: newShape,
|
|
1186
|
-
strides: newStrides
|
|
1187
|
-
});
|
|
1188
|
-
if (this.requiresGrad) {
|
|
1189
|
-
out.requiresGrad = true;
|
|
1190
|
-
out.children.push(this);
|
|
1191
|
-
out.gradFn = () => {
|
|
1192
|
-
// Compute inverse permutation
|
|
1193
|
-
const inverseAxes = new Array(dims.length);
|
|
1194
|
-
for (let i = 0; i < dims.length; i++) {
|
|
1195
|
-
inverseAxes[dims[i]] = i;
|
|
1196
|
-
}
|
|
1197
|
-
// Permute gradient back to original order
|
|
1198
|
-
const permutedGrad = out.grad.permute(inverseAxes);
|
|
1199
|
-
Tensor.addGrad(this, permutedGrad);
|
|
1200
|
-
};
|
|
1201
|
-
}
|
|
1202
|
-
return out;
|
|
1203
|
-
}
|
|
1204
1097
|
// 1D tensor dot product
|
|
1205
1098
|
dot(other) {
|
|
1206
1099
|
other = this.handleOther(other);
|
|
@@ -1208,36 +1101,7 @@ class Tensor {
|
|
|
1208
1101
|
if (this.shape.length !== 1 || other.shape.length !== 1) {
|
|
1209
1102
|
throw new Error("Inputs are not 1D tensors");
|
|
1210
1103
|
}
|
|
1211
|
-
|
|
1212
|
-
const vectLen = this.shape[0];
|
|
1213
|
-
const vectA = this.value;
|
|
1214
|
-
const vectB = other.value;
|
|
1215
|
-
let sum = 0;
|
|
1216
|
-
for (let index = 0; index < vectLen; index++) {
|
|
1217
|
-
sum += vectA[index] * vectB[index];
|
|
1218
|
-
}
|
|
1219
|
-
const out = new Tensor(sum);
|
|
1220
|
-
if (this.requiresGrad) {
|
|
1221
|
-
out.requiresGrad = true;
|
|
1222
|
-
out.children.push(this);
|
|
1223
|
-
}
|
|
1224
|
-
if (other.requiresGrad) {
|
|
1225
|
-
out.requiresGrad = true;
|
|
1226
|
-
out.children.push(other);
|
|
1227
|
-
}
|
|
1228
|
-
if (out.requiresGrad) {
|
|
1229
|
-
out.gradFn = () => {
|
|
1230
|
-
// Disable gradient collecting of gradients themselves
|
|
1231
|
-
const outGrad = out.grad;
|
|
1232
|
-
const selfNoGrad = this.detach();
|
|
1233
|
-
const otherNoGrad = other.detach();
|
|
1234
|
-
if (this.requiresGrad)
|
|
1235
|
-
Tensor.addGrad(this, outGrad.mul(otherNoGrad));
|
|
1236
|
-
if (other.requiresGrad)
|
|
1237
|
-
Tensor.addGrad(other, outGrad.mul(selfNoGrad));
|
|
1238
|
-
};
|
|
1239
|
-
}
|
|
1240
|
-
return out;
|
|
1104
|
+
return this.mul(other).sum();
|
|
1241
1105
|
}
|
|
1242
1106
|
// Matrix multiplication
|
|
1243
1107
|
mm(other) {
|
|
@@ -1266,12 +1130,12 @@ class Tensor {
|
|
|
1266
1130
|
for (let k = 0; k < matACols; k++) {
|
|
1267
1131
|
// Tensor values are 1D arrays so we have to get real index using strides
|
|
1268
1132
|
matC[i * matCStrides[0] + j * matCStrides[1]] +=
|
|
1269
|
-
matA[i * matAStrides[0] + k * matAStrides[1]] *
|
|
1270
|
-
matB[k * matBStrides[0] + j * matBStrides[1]];
|
|
1133
|
+
matA[i * matAStrides[0] + k * matAStrides[1] + this.offset] *
|
|
1134
|
+
matB[k * matBStrides[0] + j * matBStrides[1] + other.offset];
|
|
1271
1135
|
}
|
|
1272
1136
|
}
|
|
1273
1137
|
}
|
|
1274
|
-
const out = new Tensor(matC, { shape: matCShape, strides: matCStrides });
|
|
1138
|
+
const out = new Tensor(matC, { shape: matCShape, strides: matCStrides, numel: matCSize });
|
|
1275
1139
|
if (this.requiresGrad) {
|
|
1276
1140
|
out.requiresGrad = true;
|
|
1277
1141
|
out.children.push(this);
|
|
@@ -1323,13 +1187,13 @@ class Tensor {
|
|
|
1323
1187
|
for (let k = 0; k < batchACols; k++) {
|
|
1324
1188
|
// Tensor values are 1D arrays so we have to get real index using strides
|
|
1325
1189
|
batchC[q * batchCStrides[0] + i * batchCStrides[1] + j * batchCStrides[2]] +=
|
|
1326
|
-
batchA[q * batchAStrides[0] + i * batchAStrides[1] + k * batchAStrides[2]] *
|
|
1327
|
-
batchB[q * batchBStrides[0] + k * batchBStrides[1] + j * batchBStrides[2]];
|
|
1190
|
+
batchA[q * batchAStrides[0] + i * batchAStrides[1] + k * batchAStrides[2] + this.offset] *
|
|
1191
|
+
batchB[q * batchBStrides[0] + k * batchBStrides[1] + j * batchBStrides[2] + other.offset];
|
|
1328
1192
|
}
|
|
1329
1193
|
}
|
|
1330
1194
|
}
|
|
1331
1195
|
}
|
|
1332
|
-
const out = new Tensor(batchC, { shape: batchCShape, strides: batchCStrides });
|
|
1196
|
+
const out = new Tensor(batchC, { shape: batchCShape, strides: batchCStrides, numel: batchCSize });
|
|
1333
1197
|
if (this.requiresGrad) {
|
|
1334
1198
|
out.requiresGrad = true;
|
|
1335
1199
|
out.children.push(this);
|
|
@@ -1402,7 +1266,7 @@ class Tensor {
|
|
|
1402
1266
|
const otherOffsetShape = otherShape.slice(0, -2);
|
|
1403
1267
|
const selfOffsetStrides = selfStrides.slice(0, -2);
|
|
1404
1268
|
const otherOffsetStrides = otherStrides.slice(0, -2);
|
|
1405
|
-
//
|
|
1269
|
+
// Base offset data
|
|
1406
1270
|
const offsetShape = Tensor.broadcastShapes(selfOffsetShape, otherOffsetShape);
|
|
1407
1271
|
const offsetSize = Tensor.shapeToSize(offsetShape);
|
|
1408
1272
|
const offsetStrides = Tensor.getStrides(offsetShape);
|
|
@@ -1411,10 +1275,11 @@ class Tensor {
|
|
|
1411
1275
|
const outputStrides = Tensor.getStrides(outputShape);
|
|
1412
1276
|
const outputSize = Tensor.shapeToSize(outputShape);
|
|
1413
1277
|
const outputValue = new Array(outputSize).fill(0);
|
|
1278
|
+
const outputOffsetStrides = outputStrides.slice(0, -2);
|
|
1414
1279
|
// Loop through outer dims and do matmul on two outer-most dims
|
|
1415
1280
|
for (let index = 0; index < offsetSize; index++) {
|
|
1416
1281
|
const coords = Tensor.indexToCoords(index, offsetStrides);
|
|
1417
|
-
const offset = Tensor.coordsToIndex(coords,
|
|
1282
|
+
const offset = Tensor.coordsToIndex(coords, outputOffsetStrides);
|
|
1418
1283
|
const selfOffset = Tensor.coordsToUnbroadcastedIndex(coords, selfOffsetShape, selfOffsetStrides);
|
|
1419
1284
|
const otherOffset = Tensor.coordsToUnbroadcastedIndex(coords, otherOffsetShape, otherOffsetStrides);
|
|
1420
1285
|
for (let i = 0; i < batchARows; i++) {
|
|
@@ -1423,12 +1288,12 @@ class Tensor {
|
|
|
1423
1288
|
const outputIdx = offset + i * outputStrides[lastDim - 1] + j * outputStrides[lastDim];
|
|
1424
1289
|
const selfIdx = selfOffset + i * selfStrides[lastDim - 1] + k * selfStrides[lastDim];
|
|
1425
1290
|
const otherIdx = otherOffset + k * otherStrides[lastDim - 1] + j * otherStrides[lastDim];
|
|
1426
|
-
outputValue[outputIdx] += batchA[selfIdx] * batchB[otherIdx];
|
|
1291
|
+
outputValue[outputIdx] += batchA[selfIdx + this.offset] * batchB[otherIdx + other.offset];
|
|
1427
1292
|
}
|
|
1428
1293
|
}
|
|
1429
1294
|
}
|
|
1430
1295
|
}
|
|
1431
|
-
const out = new Tensor(outputValue, { shape: outputShape, strides: outputStrides });
|
|
1296
|
+
const out = new Tensor(outputValue, { shape: outputShape, strides: outputStrides, numel: outputSize });
|
|
1432
1297
|
if (this.requiresGrad) {
|
|
1433
1298
|
out.requiresGrad = true;
|
|
1434
1299
|
out.children.push(this);
|
|
@@ -1444,9 +1309,9 @@ class Tensor {
|
|
|
1444
1309
|
const selfNoGrad = self.detach();
|
|
1445
1310
|
const otherNoGrad = other.detach();
|
|
1446
1311
|
if (this.requiresGrad)
|
|
1447
|
-
Tensor.addGrad(this, outGrad.matmul(otherNoGrad.transpose(
|
|
1312
|
+
Tensor.addGrad(this, outGrad.matmul(otherNoGrad.transpose(-2, -1)));
|
|
1448
1313
|
if (other.requiresGrad)
|
|
1449
|
-
Tensor.addGrad(other, selfNoGrad.transpose(
|
|
1314
|
+
Tensor.addGrad(other, selfNoGrad.transpose(-2, -1).matmul(outGrad));
|
|
1450
1315
|
};
|
|
1451
1316
|
}
|
|
1452
1317
|
return out;
|
|
@@ -1468,15 +1333,15 @@ class Tensor {
|
|
|
1468
1333
|
return new Tensor(num, options);
|
|
1469
1334
|
const outputSize = Tensor.shapeToSize(shape);
|
|
1470
1335
|
const outputValue = new Array(outputSize).fill(num);
|
|
1471
|
-
return new Tensor(outputValue, { shape, ...options });
|
|
1336
|
+
return new Tensor(outputValue, { shape, numel: outputSize, ...options });
|
|
1472
1337
|
}
|
|
1473
1338
|
// Utility to create a new tensor with shape of another tensor, filled with a number
|
|
1474
1339
|
static fullLike(tensor, num, options = {}) {
|
|
1475
1340
|
if (typeof tensor.value === "number")
|
|
1476
1341
|
return new Tensor(num, options);
|
|
1477
|
-
return new Tensor(new Array(tensor.
|
|
1342
|
+
return new Tensor(new Array(tensor.numel).fill(num), {
|
|
1478
1343
|
shape: tensor.shape,
|
|
1479
|
-
|
|
1344
|
+
numel: tensor.numel,
|
|
1480
1345
|
device: tensor.device,
|
|
1481
1346
|
...options
|
|
1482
1347
|
});
|
|
@@ -1487,15 +1352,15 @@ class Tensor {
|
|
|
1487
1352
|
return new Tensor(1, options);
|
|
1488
1353
|
const outputSize = Tensor.shapeToSize(shape);
|
|
1489
1354
|
const outputValue = new Array(outputSize).fill(1);
|
|
1490
|
-
return new Tensor(outputValue, { shape, ...options });
|
|
1355
|
+
return new Tensor(outputValue, { shape, numel: outputSize, ...options });
|
|
1491
1356
|
}
|
|
1492
1357
|
// Utility to create a new tensor with shape of another tensor, filled with 1
|
|
1493
1358
|
static onesLike(tensor, options = {}) {
|
|
1494
1359
|
if (typeof tensor.value === "number")
|
|
1495
1360
|
return new Tensor(1, options);
|
|
1496
|
-
return new Tensor(new Array(tensor.
|
|
1361
|
+
return new Tensor(new Array(tensor.numel).fill(1), {
|
|
1497
1362
|
shape: tensor.shape,
|
|
1498
|
-
|
|
1363
|
+
numel: tensor.numel,
|
|
1499
1364
|
device: tensor.device,
|
|
1500
1365
|
...options
|
|
1501
1366
|
});
|
|
@@ -1506,15 +1371,15 @@ class Tensor {
|
|
|
1506
1371
|
return new Tensor(0, options);
|
|
1507
1372
|
const outputSize = Tensor.shapeToSize(shape);
|
|
1508
1373
|
const outputValue = new Array(outputSize).fill(0);
|
|
1509
|
-
return new Tensor(outputValue, { shape, ...options });
|
|
1374
|
+
return new Tensor(outputValue, { shape, numel: outputSize, ...options });
|
|
1510
1375
|
}
|
|
1511
1376
|
// Utility to create a new tensor with shape of another tensor, filled with 0
|
|
1512
1377
|
static zerosLike(tensor, options = {}) {
|
|
1513
1378
|
if (typeof tensor.value === "number")
|
|
1514
1379
|
return new Tensor(0, options);
|
|
1515
|
-
return new Tensor(new Array(tensor.
|
|
1380
|
+
return new Tensor(new Array(tensor.numel).fill(0), {
|
|
1516
1381
|
shape: tensor.shape,
|
|
1517
|
-
|
|
1382
|
+
numel: tensor.numel,
|
|
1518
1383
|
device: tensor.device,
|
|
1519
1384
|
...options
|
|
1520
1385
|
});
|
|
@@ -1528,19 +1393,19 @@ class Tensor {
|
|
|
1528
1393
|
for (let index = 0; index < outputValue.length; index++) {
|
|
1529
1394
|
outputValue[index] = (0, utils_1.randUniform)();
|
|
1530
1395
|
}
|
|
1531
|
-
return new Tensor(outputValue, { shape, ...options });
|
|
1396
|
+
return new Tensor(outputValue, { shape, numel: outputSize, ...options });
|
|
1532
1397
|
}
|
|
1533
1398
|
// Utility to create a new tensor with shape of another tensor, filled with a random number with uniform distribution from 0 to 1
|
|
1534
1399
|
static randLike(tensor, options = {}) {
|
|
1535
1400
|
if (typeof tensor.value === "number")
|
|
1536
1401
|
return new Tensor((0, utils_1.randUniform)(), options);
|
|
1537
|
-
const outputValue = new Array(tensor.
|
|
1402
|
+
const outputValue = new Array(tensor.numel);
|
|
1538
1403
|
for (let index = 0; index < outputValue.length; index++) {
|
|
1539
1404
|
outputValue[index] = (0, utils_1.randUniform)();
|
|
1540
1405
|
}
|
|
1541
1406
|
return new Tensor(outputValue, {
|
|
1542
1407
|
shape: tensor.shape,
|
|
1543
|
-
|
|
1408
|
+
numel: tensor.numel,
|
|
1544
1409
|
device: tensor.device,
|
|
1545
1410
|
...options
|
|
1546
1411
|
});
|
|
@@ -1554,19 +1419,19 @@ class Tensor {
|
|
|
1554
1419
|
for (let index = 0; index < outputValue.length; index++) {
|
|
1555
1420
|
outputValue[index] = (0, utils_1.randNormal)();
|
|
1556
1421
|
}
|
|
1557
|
-
return new Tensor(outputValue, { shape, ...options });
|
|
1422
|
+
return new Tensor(outputValue, { shape, numel: outputSize, ...options });
|
|
1558
1423
|
}
|
|
1559
1424
|
// Utility to create a new tensor with shape of another tensor, filled with a random number with normal distribution of mean=0 and stddev=1
|
|
1560
1425
|
static randnLike(tensor, options = {}) {
|
|
1561
1426
|
if (typeof tensor.value === "number")
|
|
1562
1427
|
return new Tensor((0, utils_1.randNormal)(), options);
|
|
1563
|
-
const outputValue = new Array(tensor.
|
|
1428
|
+
const outputValue = new Array(tensor.numel);
|
|
1564
1429
|
for (let index = 0; index < outputValue.length; index++) {
|
|
1565
1430
|
outputValue[index] = (0, utils_1.randNormal)();
|
|
1566
1431
|
}
|
|
1567
1432
|
return new Tensor(outputValue, {
|
|
1568
1433
|
shape: tensor.shape,
|
|
1569
|
-
|
|
1434
|
+
numel: tensor.numel,
|
|
1570
1435
|
device: tensor.device,
|
|
1571
1436
|
...options
|
|
1572
1437
|
});
|
|
@@ -1580,19 +1445,19 @@ class Tensor {
|
|
|
1580
1445
|
for (let index = 0; index < outputValue.length; index++) {
|
|
1581
1446
|
outputValue[index] = (0, utils_1.randInt)(low, high);
|
|
1582
1447
|
}
|
|
1583
|
-
return new Tensor(outputValue, { shape, ...options });
|
|
1448
|
+
return new Tensor(outputValue, { shape, numel: outputSize, ...options });
|
|
1584
1449
|
}
|
|
1585
1450
|
// Utility to create a new tensor with shape of another tensor, filled with a random integer between low and high
|
|
1586
1451
|
static randintLike(tensor, low, high, options = {}) {
|
|
1587
1452
|
if (typeof tensor.value === "number")
|
|
1588
1453
|
return new Tensor((0, utils_1.randInt)(low, high), options);
|
|
1589
|
-
const outputValue = new Array(tensor.
|
|
1454
|
+
const outputValue = new Array(tensor.numel);
|
|
1590
1455
|
for (let index = 0; index < outputValue.length; index++) {
|
|
1591
1456
|
outputValue[index] = (0, utils_1.randInt)(low, high);
|
|
1592
1457
|
}
|
|
1593
1458
|
return new Tensor(outputValue, {
|
|
1594
1459
|
shape: tensor.shape,
|
|
1595
|
-
|
|
1460
|
+
numel: tensor.numel,
|
|
1596
1461
|
device: tensor.device,
|
|
1597
1462
|
...options
|
|
1598
1463
|
});
|
|
@@ -1606,7 +1471,7 @@ class Tensor {
|
|
|
1606
1471
|
for (let index = 0; index < outputValue.length; index++) {
|
|
1607
1472
|
outputValue[index] = (0, utils_1.randNormal)(mean, stdDev);
|
|
1608
1473
|
}
|
|
1609
|
-
return new Tensor(outputValue, { shape, ...options });
|
|
1474
|
+
return new Tensor(outputValue, { shape, numel: outputSize, ...options });
|
|
1610
1475
|
}
|
|
1611
1476
|
// Utility to create a new tensor filled with a random number with uniform distribution from low to high
|
|
1612
1477
|
static uniform(shape, low, high, options = {}) {
|
|
@@ -1617,7 +1482,7 @@ class Tensor {
|
|
|
1617
1482
|
for (let index = 0; index < outputValue.length; index++) {
|
|
1618
1483
|
outputValue[index] = (0, utils_1.randUniform)(low, high);
|
|
1619
1484
|
}
|
|
1620
|
-
return new Tensor(outputValue, { shape, ...options });
|
|
1485
|
+
return new Tensor(outputValue, { shape, numel: outputSize, ...options });
|
|
1621
1486
|
}
|
|
1622
1487
|
// Reverse-mode autodiff call
|
|
1623
1488
|
backward(options = {}) {
|
|
@@ -1666,13 +1531,15 @@ class Tensor {
|
|
|
1666
1531
|
}
|
|
1667
1532
|
return result;
|
|
1668
1533
|
}
|
|
1669
|
-
return buildNested(this.value, this.shape, this.strides);
|
|
1534
|
+
return buildNested(this.value, this.shape, this.strides, this.offset);
|
|
1670
1535
|
}
|
|
1671
1536
|
// Returns a view of the tensor with gradient turned on/off and detaches from autograd
|
|
1672
1537
|
withGrad(requiresGrad) {
|
|
1673
1538
|
return new Tensor(this.value, {
|
|
1674
1539
|
shape: this.shape,
|
|
1675
1540
|
strides: this.strides,
|
|
1541
|
+
offset: this.offset,
|
|
1542
|
+
numel: this.numel,
|
|
1676
1543
|
device: this.device,
|
|
1677
1544
|
requiresGrad
|
|
1678
1545
|
});
|
|
@@ -1682,6 +1549,8 @@ class Tensor {
|
|
|
1682
1549
|
return new Tensor(this.value, {
|
|
1683
1550
|
shape: this.shape,
|
|
1684
1551
|
strides: this.strides,
|
|
1552
|
+
offset: this.offset,
|
|
1553
|
+
numel: this.numel,
|
|
1685
1554
|
device: this.device,
|
|
1686
1555
|
requiresGrad: false
|
|
1687
1556
|
});
|
|
@@ -1691,6 +1560,8 @@ class Tensor {
|
|
|
1691
1560
|
return new Tensor(typeof this.value === "number" ? this.value : [...this.value], {
|
|
1692
1561
|
shape: this.shape,
|
|
1693
1562
|
strides: this.strides,
|
|
1563
|
+
offset: this.offset,
|
|
1564
|
+
numel: this.numel,
|
|
1694
1565
|
requiresGrad: this.requiresGrad
|
|
1695
1566
|
});
|
|
1696
1567
|
}
|