catniff 0.6.7 → 0.6.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core.d.ts +27 -25
- package/dist/core.js +56 -3
- package/dist/optim.js +10 -10
- package/package.json +1 -1
package/dist/core.d.ts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { Backend } from "./backend";
|
|
2
2
|
export type TensorValue = number | TensorValue[];
|
|
3
3
|
export interface TensorOptions {
|
|
4
|
-
shape?:
|
|
5
|
-
strides?:
|
|
4
|
+
shape?: number[];
|
|
5
|
+
strides?: number[];
|
|
6
6
|
offset?: number;
|
|
7
7
|
numel?: number;
|
|
8
8
|
grad?: Tensor;
|
|
@@ -13,8 +13,8 @@ export interface TensorOptions {
|
|
|
13
13
|
}
|
|
14
14
|
export declare class Tensor {
|
|
15
15
|
value: number[] | number;
|
|
16
|
-
|
|
17
|
-
|
|
16
|
+
shape: number[];
|
|
17
|
+
strides: number[];
|
|
18
18
|
offset: number;
|
|
19
19
|
numel: number;
|
|
20
20
|
grad?: Tensor;
|
|
@@ -24,20 +24,20 @@ export declare class Tensor {
|
|
|
24
24
|
device: string;
|
|
25
25
|
static training: boolean;
|
|
26
26
|
constructor(value: TensorValue, options?: TensorOptions);
|
|
27
|
-
static
|
|
28
|
-
static getShape(tensor: TensorValue):
|
|
29
|
-
static getStrides(shape:
|
|
30
|
-
static padShape(stridesA:
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
27
|
+
static flattenValue(tensor: TensorValue): number[] | number;
|
|
28
|
+
static getShape(tensor: TensorValue): number[];
|
|
29
|
+
static getStrides(shape: number[]): number[];
|
|
30
|
+
static padShape(stridesA: number[], stridesB: number[], shapeA: number[], shapeB: number[]): [
|
|
31
|
+
number[],
|
|
32
|
+
number[],
|
|
33
|
+
number[],
|
|
34
|
+
number[]
|
|
35
35
|
];
|
|
36
|
-
static broadcastShapes(shapeA:
|
|
37
|
-
static indexToCoords(index: number, strides:
|
|
38
|
-
static coordsToUnbroadcastedIndex(coords: number[], shape:
|
|
39
|
-
static coordsToIndex(coords: number[], strides:
|
|
40
|
-
static shapeToSize(shape:
|
|
36
|
+
static broadcastShapes(shapeA: number[], shapeB: number[]): number[];
|
|
37
|
+
static indexToCoords(index: number, strides: number[]): number[];
|
|
38
|
+
static coordsToUnbroadcastedIndex(coords: number[], shape: number[], strides: number[]): number;
|
|
39
|
+
static coordsToIndex(coords: number[], strides: number[]): number;
|
|
40
|
+
static shapeToSize(shape: number[]): number;
|
|
41
41
|
static elementWiseAB(tA: Tensor, tB: Tensor, op: (tA: number, tB: number) => number): Tensor;
|
|
42
42
|
static elementWiseSelf(tA: Tensor, op: (tA: number) => number): Tensor;
|
|
43
43
|
elementWiseABDAG(other: TensorValue | Tensor, op: (a: number, b: number) => number, thisGrad?: (self: Tensor, other: Tensor, outGrad: Tensor) => Tensor, otherGrad?: (self: Tensor, other: Tensor, outGrad: Tensor) => Tensor): Tensor;
|
|
@@ -47,8 +47,9 @@ export declare class Tensor {
|
|
|
47
47
|
static normalizeDims(dims: number[], numDims: number): number[];
|
|
48
48
|
isContiguous(): boolean;
|
|
49
49
|
contiguous(): Tensor;
|
|
50
|
-
view(newShape:
|
|
51
|
-
reshape(newShape:
|
|
50
|
+
view(newShape: number[]): Tensor;
|
|
51
|
+
reshape(newShape: number[]): Tensor;
|
|
52
|
+
flatten(startDim?: number, endDim?: number): Tensor;
|
|
52
53
|
transpose(dim1: number, dim2: number): Tensor;
|
|
53
54
|
swapaxes: (dim1: number, dim2: number) => Tensor;
|
|
54
55
|
swapdims: (dim1: number, dim2: number) => Tensor;
|
|
@@ -57,6 +58,7 @@ export declare class Tensor {
|
|
|
57
58
|
indexWithArray(indices: number[]): Tensor;
|
|
58
59
|
index(indices: Tensor | TensorValue): Tensor;
|
|
59
60
|
slice(ranges: number[][]): Tensor;
|
|
61
|
+
chunk(chunks: number, dim?: number): Tensor[];
|
|
60
62
|
squeeze(dims?: number[] | number): Tensor;
|
|
61
63
|
unsqueeze(dim: number): Tensor;
|
|
62
64
|
static reduce(tensor: Tensor, dims: number[] | number | undefined, keepDims: boolean, config: {
|
|
@@ -185,17 +187,17 @@ export declare class Tensor {
|
|
|
185
187
|
triu(diagonal?: number): Tensor;
|
|
186
188
|
tril(diagonal?: number): Tensor;
|
|
187
189
|
maskedFill(mask: Tensor | TensorValue, value: number): Tensor;
|
|
188
|
-
static full(shape:
|
|
190
|
+
static full(shape: number[], num: number, options?: TensorOptions): Tensor;
|
|
189
191
|
static fullLike(tensor: Tensor, num: number, options?: TensorOptions): Tensor;
|
|
190
|
-
static ones(shape?:
|
|
192
|
+
static ones(shape?: number[], options?: TensorOptions): Tensor;
|
|
191
193
|
static onesLike(tensor: Tensor, options?: TensorOptions): Tensor;
|
|
192
|
-
static zeros(shape?:
|
|
194
|
+
static zeros(shape?: number[], options?: TensorOptions): Tensor;
|
|
193
195
|
static zerosLike(tensor: Tensor, options?: TensorOptions): Tensor;
|
|
194
|
-
static rand(shape?:
|
|
196
|
+
static rand(shape?: number[], options?: TensorOptions): Tensor;
|
|
195
197
|
static randLike(tensor: Tensor, options?: TensorOptions): Tensor;
|
|
196
|
-
static randn(shape?:
|
|
198
|
+
static randn(shape?: number[], options?: TensorOptions): Tensor;
|
|
197
199
|
static randnLike(tensor: Tensor, options?: TensorOptions): Tensor;
|
|
198
|
-
static randint(shape:
|
|
200
|
+
static randint(shape: number[], low: number, high: number, options?: TensorOptions): Tensor;
|
|
199
201
|
static randintLike(tensor: Tensor, low: number, high: number, options?: TensorOptions): Tensor;
|
|
200
202
|
static randperm(n: number, options?: TensorOptions): Tensor;
|
|
201
203
|
static normal(shape: number[], mean: number, stdDev: number, options?: TensorOptions): Tensor;
|
package/dist/core.js
CHANGED
|
@@ -16,7 +16,7 @@ class Tensor {
|
|
|
16
16
|
static training = false;
|
|
17
17
|
constructor(value, options = {}) {
|
|
18
18
|
// Storage
|
|
19
|
-
this.value = Tensor.
|
|
19
|
+
this.value = Tensor.flattenValue(value);
|
|
20
20
|
// Tensor metadata
|
|
21
21
|
this.shape = options.shape || Tensor.getShape(value);
|
|
22
22
|
this.strides = options.strides || Tensor.getStrides(this.shape);
|
|
@@ -32,7 +32,7 @@ class Tensor {
|
|
|
32
32
|
this.to_(this.device);
|
|
33
33
|
}
|
|
34
34
|
// Utility to flatten an nD array to be 1D
|
|
35
|
-
static
|
|
35
|
+
static flattenValue(tensor) {
|
|
36
36
|
// Handle scalar tensors
|
|
37
37
|
if (typeof tensor === "number")
|
|
38
38
|
return tensor;
|
|
@@ -377,6 +377,40 @@ class Tensor {
|
|
|
377
377
|
}
|
|
378
378
|
return out;
|
|
379
379
|
}
|
|
380
|
+
flatten(startDim = 0, endDim = -1) {
|
|
381
|
+
// Handle negative indices
|
|
382
|
+
if (startDim < 0) {
|
|
383
|
+
startDim += this.shape.length;
|
|
384
|
+
}
|
|
385
|
+
if (endDim < 0) {
|
|
386
|
+
endDim += this.shape.length;
|
|
387
|
+
}
|
|
388
|
+
// If dimension out of bound, throw error
|
|
389
|
+
if (startDim >= this.shape.length || endDim >= this.shape.length || startDim < 0 || endDim < 0) {
|
|
390
|
+
throw new Error("Dimensions do not exist to flatten");
|
|
391
|
+
}
|
|
392
|
+
const newShape = [];
|
|
393
|
+
let middleSize = 1;
|
|
394
|
+
for (let index = 0; index < this.shape.length; index++) {
|
|
395
|
+
// Keep dims before startDim
|
|
396
|
+
if (index < startDim) {
|
|
397
|
+
newShape.push(this.shape[index]);
|
|
398
|
+
}
|
|
399
|
+
// Multiply dims from startDim to endDim
|
|
400
|
+
if (index >= startDim && index <= endDim) {
|
|
401
|
+
middleSize *= this.shape[index];
|
|
402
|
+
}
|
|
403
|
+
// Push new flatten middle
|
|
404
|
+
if (index === endDim) {
|
|
405
|
+
newShape.push(middleSize);
|
|
406
|
+
}
|
|
407
|
+
// Keep dims after endDim
|
|
408
|
+
if (index > endDim) {
|
|
409
|
+
newShape.push(this.shape[index]);
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
return this.reshape(newShape);
|
|
413
|
+
}
|
|
380
414
|
// Transpose
|
|
381
415
|
transpose(dim1, dim2) {
|
|
382
416
|
// Handle negative indices
|
|
@@ -605,6 +639,25 @@ class Tensor {
|
|
|
605
639
|
}
|
|
606
640
|
return out;
|
|
607
641
|
}
|
|
642
|
+
// Tensor chunk
|
|
643
|
+
chunk(chunks, dim = 0) {
|
|
644
|
+
// Handle negative indices
|
|
645
|
+
if (dim < 0) {
|
|
646
|
+
dim += this.shape.length;
|
|
647
|
+
}
|
|
648
|
+
const sliceOpt = new Array(this.shape.length);
|
|
649
|
+
for (let index = 0; index < sliceOpt.length; index++) {
|
|
650
|
+
sliceOpt[index] = [];
|
|
651
|
+
}
|
|
652
|
+
const dimSize = this.shape[dim];
|
|
653
|
+
const chunkDimSize = Math.ceil(dimSize / chunks);
|
|
654
|
+
const results = [];
|
|
655
|
+
for (let index = 0; index < dimSize; index += chunkDimSize) {
|
|
656
|
+
sliceOpt[dim] = [index, Math.min(index + chunkDimSize, dimSize)];
|
|
657
|
+
results.push(this.slice(sliceOpt));
|
|
658
|
+
}
|
|
659
|
+
return results;
|
|
660
|
+
}
|
|
608
661
|
// Tensor squeeze
|
|
609
662
|
squeeze(dims) {
|
|
610
663
|
if (typeof this.value === "number")
|
|
@@ -1114,7 +1167,7 @@ class Tensor {
|
|
|
1114
1167
|
mish() {
|
|
1115
1168
|
return this.elementWiseSelfDAG((a) => a * Math.tanh(Math.log1p(Math.exp(a))), (self, outGrad) => {
|
|
1116
1169
|
const tanhSoftPlus = self.exp().add(1).log().tanh();
|
|
1117
|
-
// tanh(softplus(x)) + x * (1 - tanh
|
|
1170
|
+
// tanh(softplus(x)) + x * (1 - tanh^2(softplus(x))) * sigmoid(x)
|
|
1118
1171
|
const derivative = tanhSoftPlus.add(self.mul(tanhSoftPlus.square().neg().add(1)).mul(self.sigmoid()));
|
|
1119
1172
|
return outGrad.mul(derivative);
|
|
1120
1173
|
});
|
package/dist/optim.js
CHANGED
|
@@ -108,17 +108,17 @@ class Adam extends BaseOptimizer {
|
|
|
108
108
|
velocityBuffer = core_1.Tensor.zerosLike(grad); // Initialize with zeros (same shape as grad)
|
|
109
109
|
this.velocityBuffers.set(param, velocityBuffer);
|
|
110
110
|
}
|
|
111
|
-
// Update biased first moment estimate: m_t =
|
|
111
|
+
// Update biased first moment estimate: m_t = beta1 * m_{t-1} + (1 - beta1) * g_t
|
|
112
112
|
momentumBuffer = momentumBuffer.mul(beta1).add(grad.mul(1 - beta1));
|
|
113
113
|
this.momentumBuffers.set(param, momentumBuffer);
|
|
114
|
-
// Update biased second moment estimate: v_t =
|
|
114
|
+
// Update biased second moment estimate: v_t = beta2 * v_{t-1} + (1 - beta2) * g_t^2
|
|
115
115
|
velocityBuffer = velocityBuffer.mul(beta2).add(grad.pow(2).mul(1 - beta2));
|
|
116
116
|
this.velocityBuffers.set(param, velocityBuffer);
|
|
117
|
-
// Compute bias-corrected first moment:
|
|
117
|
+
// Compute bias-corrected first moment: m_hat_t = m_t / (1 - beta1^t)
|
|
118
118
|
const correctedMomentum = momentumBuffer.div(biasCorrection1);
|
|
119
|
-
// Compute bias-corrected second moment:
|
|
119
|
+
// Compute bias-corrected second moment: v_hat_t = v_t / (1 - beta2^t)
|
|
120
120
|
const correctedVelocity = velocityBuffer.div(biasCorrection2);
|
|
121
|
-
// Update parameters:
|
|
121
|
+
// Update parameters: theta_t = theta_{t-1} - alpha * m_hat_t / (sqrt(v_hat_t) + epsilon)
|
|
122
122
|
const denom = correctedVelocity.sqrt().add(this.eps);
|
|
123
123
|
const stepSize = correctedMomentum.div(denom).mul(this.lr);
|
|
124
124
|
const newParam = detachedParam.sub(stepSize);
|
|
@@ -166,17 +166,17 @@ class AdamW extends BaseOptimizer {
|
|
|
166
166
|
velocityBuffer = core_1.Tensor.zerosLike(grad); // Initialize with zeros (same shape as grad)
|
|
167
167
|
this.velocityBuffers.set(param, velocityBuffer);
|
|
168
168
|
}
|
|
169
|
-
// Update biased first moment estimate: m_t =
|
|
169
|
+
// Update biased first moment estimate: m_t = beta1 * m_{t-1} + (1 - beta1) * g_t
|
|
170
170
|
momentumBuffer = momentumBuffer.mul(beta1).add(grad.mul(1 - beta1));
|
|
171
171
|
this.momentumBuffers.set(param, momentumBuffer);
|
|
172
|
-
// Update biased second moment estimate: v_t =
|
|
172
|
+
// Update biased second moment estimate: v_t = beta2 * v_{t-1} + (1 - beta2) * g_t^2
|
|
173
173
|
velocityBuffer = velocityBuffer.mul(beta2).add(grad.pow(2).mul(1 - beta2));
|
|
174
174
|
this.velocityBuffers.set(param, velocityBuffer);
|
|
175
|
-
// Compute bias-corrected first moment:
|
|
175
|
+
// Compute bias-corrected first moment: m_hat_t = m_t / (1 - beta1^t)
|
|
176
176
|
const correctedMomentum = momentumBuffer.div(biasCorrection1);
|
|
177
|
-
// Compute bias-corrected second moment:
|
|
177
|
+
// Compute bias-corrected second moment: v_hat_t = v_t / (1 - beta2^t)
|
|
178
178
|
const correctedVelocity = velocityBuffer.div(biasCorrection2);
|
|
179
|
-
// Update parameters:
|
|
179
|
+
// Update parameters: theta_t = theta_{t-1} - alpha * m_hat_t / (sqrt(v_hat_t) + epsilon)
|
|
180
180
|
const denom = correctedVelocity.sqrt().add(this.eps);
|
|
181
181
|
const stepSize = correctedMomentum.div(denom).mul(this.lr);
|
|
182
182
|
const newParam = detachedParam.sub(stepSize);
|