catniff 0.6.8 → 0.6.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core.d.ts +26 -25
- package/dist/core.js +37 -3
- package/dist/optim.js +10 -10
- package/package.json +1 -1
package/dist/core.d.ts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { Backend } from "./backend";
|
|
2
2
|
export type TensorValue = number | TensorValue[];
|
|
3
3
|
export interface TensorOptions {
|
|
4
|
-
shape?:
|
|
5
|
-
strides?:
|
|
4
|
+
shape?: number[];
|
|
5
|
+
strides?: number[];
|
|
6
6
|
offset?: number;
|
|
7
7
|
numel?: number;
|
|
8
8
|
grad?: Tensor;
|
|
@@ -13,8 +13,8 @@ export interface TensorOptions {
|
|
|
13
13
|
}
|
|
14
14
|
export declare class Tensor {
|
|
15
15
|
value: number[] | number;
|
|
16
|
-
|
|
17
|
-
|
|
16
|
+
shape: number[];
|
|
17
|
+
strides: number[];
|
|
18
18
|
offset: number;
|
|
19
19
|
numel: number;
|
|
20
20
|
grad?: Tensor;
|
|
@@ -24,20 +24,20 @@ export declare class Tensor {
|
|
|
24
24
|
device: string;
|
|
25
25
|
static training: boolean;
|
|
26
26
|
constructor(value: TensorValue, options?: TensorOptions);
|
|
27
|
-
static
|
|
28
|
-
static getShape(tensor: TensorValue):
|
|
29
|
-
static getStrides(shape:
|
|
30
|
-
static padShape(stridesA:
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
27
|
+
static flattenValue(tensor: TensorValue): number[] | number;
|
|
28
|
+
static getShape(tensor: TensorValue): number[];
|
|
29
|
+
static getStrides(shape: number[]): number[];
|
|
30
|
+
static padShape(stridesA: number[], stridesB: number[], shapeA: number[], shapeB: number[]): [
|
|
31
|
+
number[],
|
|
32
|
+
number[],
|
|
33
|
+
number[],
|
|
34
|
+
number[]
|
|
35
35
|
];
|
|
36
|
-
static broadcastShapes(shapeA:
|
|
37
|
-
static indexToCoords(index: number, strides:
|
|
38
|
-
static coordsToUnbroadcastedIndex(coords: number[], shape:
|
|
39
|
-
static coordsToIndex(coords: number[], strides:
|
|
40
|
-
static shapeToSize(shape:
|
|
36
|
+
static broadcastShapes(shapeA: number[], shapeB: number[]): number[];
|
|
37
|
+
static indexToCoords(index: number, strides: number[]): number[];
|
|
38
|
+
static coordsToUnbroadcastedIndex(coords: number[], shape: number[], strides: number[]): number;
|
|
39
|
+
static coordsToIndex(coords: number[], strides: number[]): number;
|
|
40
|
+
static shapeToSize(shape: number[]): number;
|
|
41
41
|
static elementWiseAB(tA: Tensor, tB: Tensor, op: (tA: number, tB: number) => number): Tensor;
|
|
42
42
|
static elementWiseSelf(tA: Tensor, op: (tA: number) => number): Tensor;
|
|
43
43
|
elementWiseABDAG(other: TensorValue | Tensor, op: (a: number, b: number) => number, thisGrad?: (self: Tensor, other: Tensor, outGrad: Tensor) => Tensor, otherGrad?: (self: Tensor, other: Tensor, outGrad: Tensor) => Tensor): Tensor;
|
|
@@ -47,8 +47,9 @@ export declare class Tensor {
|
|
|
47
47
|
static normalizeDims(dims: number[], numDims: number): number[];
|
|
48
48
|
isContiguous(): boolean;
|
|
49
49
|
contiguous(): Tensor;
|
|
50
|
-
view(newShape:
|
|
51
|
-
reshape(newShape:
|
|
50
|
+
view(newShape: number[]): Tensor;
|
|
51
|
+
reshape(newShape: number[]): Tensor;
|
|
52
|
+
flatten(startDim?: number, endDim?: number): Tensor;
|
|
52
53
|
transpose(dim1: number, dim2: number): Tensor;
|
|
53
54
|
swapaxes: (dim1: number, dim2: number) => Tensor;
|
|
54
55
|
swapdims: (dim1: number, dim2: number) => Tensor;
|
|
@@ -186,17 +187,17 @@ export declare class Tensor {
|
|
|
186
187
|
triu(diagonal?: number): Tensor;
|
|
187
188
|
tril(diagonal?: number): Tensor;
|
|
188
189
|
maskedFill(mask: Tensor | TensorValue, value: number): Tensor;
|
|
189
|
-
static full(shape:
|
|
190
|
+
static full(shape: number[], num: number, options?: TensorOptions): Tensor;
|
|
190
191
|
static fullLike(tensor: Tensor, num: number, options?: TensorOptions): Tensor;
|
|
191
|
-
static ones(shape?:
|
|
192
|
+
static ones(shape?: number[], options?: TensorOptions): Tensor;
|
|
192
193
|
static onesLike(tensor: Tensor, options?: TensorOptions): Tensor;
|
|
193
|
-
static zeros(shape?:
|
|
194
|
+
static zeros(shape?: number[], options?: TensorOptions): Tensor;
|
|
194
195
|
static zerosLike(tensor: Tensor, options?: TensorOptions): Tensor;
|
|
195
|
-
static rand(shape?:
|
|
196
|
+
static rand(shape?: number[], options?: TensorOptions): Tensor;
|
|
196
197
|
static randLike(tensor: Tensor, options?: TensorOptions): Tensor;
|
|
197
|
-
static randn(shape?:
|
|
198
|
+
static randn(shape?: number[], options?: TensorOptions): Tensor;
|
|
198
199
|
static randnLike(tensor: Tensor, options?: TensorOptions): Tensor;
|
|
199
|
-
static randint(shape:
|
|
200
|
+
static randint(shape: number[], low: number, high: number, options?: TensorOptions): Tensor;
|
|
200
201
|
static randintLike(tensor: Tensor, low: number, high: number, options?: TensorOptions): Tensor;
|
|
201
202
|
static randperm(n: number, options?: TensorOptions): Tensor;
|
|
202
203
|
static normal(shape: number[], mean: number, stdDev: number, options?: TensorOptions): Tensor;
|
package/dist/core.js
CHANGED
|
@@ -16,7 +16,7 @@ class Tensor {
|
|
|
16
16
|
static training = false;
|
|
17
17
|
constructor(value, options = {}) {
|
|
18
18
|
// Storage
|
|
19
|
-
this.value = Tensor.
|
|
19
|
+
this.value = Tensor.flattenValue(value);
|
|
20
20
|
// Tensor metadata
|
|
21
21
|
this.shape = options.shape || Tensor.getShape(value);
|
|
22
22
|
this.strides = options.strides || Tensor.getStrides(this.shape);
|
|
@@ -32,7 +32,7 @@ class Tensor {
|
|
|
32
32
|
this.to_(this.device);
|
|
33
33
|
}
|
|
34
34
|
// Utility to flatten an nD array to be 1D
|
|
35
|
-
static
|
|
35
|
+
static flattenValue(tensor) {
|
|
36
36
|
// Handle scalar tensors
|
|
37
37
|
if (typeof tensor === "number")
|
|
38
38
|
return tensor;
|
|
@@ -377,6 +377,40 @@ class Tensor {
|
|
|
377
377
|
}
|
|
378
378
|
return out;
|
|
379
379
|
}
|
|
380
|
+
flatten(startDim = 0, endDim = -1) {
|
|
381
|
+
// Handle negative indices
|
|
382
|
+
if (startDim < 0) {
|
|
383
|
+
startDim += this.shape.length;
|
|
384
|
+
}
|
|
385
|
+
if (endDim < 0) {
|
|
386
|
+
endDim += this.shape.length;
|
|
387
|
+
}
|
|
388
|
+
// If dimension out of bound, throw error
|
|
389
|
+
if (startDim >= this.shape.length || endDim >= this.shape.length || startDim < 0 || endDim < 0) {
|
|
390
|
+
throw new Error("Dimensions do not exist to flatten");
|
|
391
|
+
}
|
|
392
|
+
const newShape = [];
|
|
393
|
+
let middleSize = 1;
|
|
394
|
+
for (let index = 0; index < this.shape.length; index++) {
|
|
395
|
+
// Keep dims before startDim
|
|
396
|
+
if (index < startDim) {
|
|
397
|
+
newShape.push(this.shape[index]);
|
|
398
|
+
}
|
|
399
|
+
// Multiply dims from startDim to endDim
|
|
400
|
+
if (index >= startDim && index <= endDim) {
|
|
401
|
+
middleSize *= this.shape[index];
|
|
402
|
+
}
|
|
403
|
+
// Push new flatten middle
|
|
404
|
+
if (index === endDim) {
|
|
405
|
+
newShape.push(middleSize);
|
|
406
|
+
}
|
|
407
|
+
// Keep dims after endDim
|
|
408
|
+
if (index > endDim) {
|
|
409
|
+
newShape.push(this.shape[index]);
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
return this.reshape(newShape);
|
|
413
|
+
}
|
|
380
414
|
// Transpose
|
|
381
415
|
transpose(dim1, dim2) {
|
|
382
416
|
// Handle negative indices
|
|
@@ -1133,7 +1167,7 @@ class Tensor {
|
|
|
1133
1167
|
mish() {
|
|
1134
1168
|
return this.elementWiseSelfDAG((a) => a * Math.tanh(Math.log1p(Math.exp(a))), (self, outGrad) => {
|
|
1135
1169
|
const tanhSoftPlus = self.exp().add(1).log().tanh();
|
|
1136
|
-
// tanh(softplus(x)) + x * (1 - tanh
|
|
1170
|
+
// tanh(softplus(x)) + x * (1 - tanh^2(softplus(x))) * sigmoid(x)
|
|
1137
1171
|
const derivative = tanhSoftPlus.add(self.mul(tanhSoftPlus.square().neg().add(1)).mul(self.sigmoid()));
|
|
1138
1172
|
return outGrad.mul(derivative);
|
|
1139
1173
|
});
|
package/dist/optim.js
CHANGED
|
@@ -108,17 +108,17 @@ class Adam extends BaseOptimizer {
|
|
|
108
108
|
velocityBuffer = core_1.Tensor.zerosLike(grad); // Initialize with zeros (same shape as grad)
|
|
109
109
|
this.velocityBuffers.set(param, velocityBuffer);
|
|
110
110
|
}
|
|
111
|
-
// Update biased first moment estimate: m_t =
|
|
111
|
+
// Update biased first moment estimate: m_t = beta1 * m_{t-1} + (1 - beta1) * g_t
|
|
112
112
|
momentumBuffer = momentumBuffer.mul(beta1).add(grad.mul(1 - beta1));
|
|
113
113
|
this.momentumBuffers.set(param, momentumBuffer);
|
|
114
|
-
// Update biased second moment estimate: v_t =
|
|
114
|
+
// Update biased second moment estimate: v_t = beta2 * v_{t-1} + (1 - beta2) * g_t^2
|
|
115
115
|
velocityBuffer = velocityBuffer.mul(beta2).add(grad.pow(2).mul(1 - beta2));
|
|
116
116
|
this.velocityBuffers.set(param, velocityBuffer);
|
|
117
|
-
// Compute bias-corrected first moment:
|
|
117
|
+
// Compute bias-corrected first moment: m_hat_t = m_t / (1 - beta1^t)
|
|
118
118
|
const correctedMomentum = momentumBuffer.div(biasCorrection1);
|
|
119
|
-
// Compute bias-corrected second moment:
|
|
119
|
+
// Compute bias-corrected second moment: v_hat_t = v_t / (1 - beta2^t)
|
|
120
120
|
const correctedVelocity = velocityBuffer.div(biasCorrection2);
|
|
121
|
-
// Update parameters:
|
|
121
|
+
// Update parameters: theta_t = theta_{t-1} - alpha * m_hat_t / (sqrt(v_hat_t) + epsilon)
|
|
122
122
|
const denom = correctedVelocity.sqrt().add(this.eps);
|
|
123
123
|
const stepSize = correctedMomentum.div(denom).mul(this.lr);
|
|
124
124
|
const newParam = detachedParam.sub(stepSize);
|
|
@@ -166,17 +166,17 @@ class AdamW extends BaseOptimizer {
|
|
|
166
166
|
velocityBuffer = core_1.Tensor.zerosLike(grad); // Initialize with zeros (same shape as grad)
|
|
167
167
|
this.velocityBuffers.set(param, velocityBuffer);
|
|
168
168
|
}
|
|
169
|
-
// Update biased first moment estimate: m_t =
|
|
169
|
+
// Update biased first moment estimate: m_t = beta1 * m_{t-1} + (1 - beta1) * g_t
|
|
170
170
|
momentumBuffer = momentumBuffer.mul(beta1).add(grad.mul(1 - beta1));
|
|
171
171
|
this.momentumBuffers.set(param, momentumBuffer);
|
|
172
|
-
// Update biased second moment estimate: v_t =
|
|
172
|
+
// Update biased second moment estimate: v_t = beta2 * v_{t-1} + (1 - beta2) * g_t^2
|
|
173
173
|
velocityBuffer = velocityBuffer.mul(beta2).add(grad.pow(2).mul(1 - beta2));
|
|
174
174
|
this.velocityBuffers.set(param, velocityBuffer);
|
|
175
|
-
// Compute bias-corrected first moment:
|
|
175
|
+
// Compute bias-corrected first moment: m_hat_t = m_t / (1 - beta1^t)
|
|
176
176
|
const correctedMomentum = momentumBuffer.div(biasCorrection1);
|
|
177
|
-
// Compute bias-corrected second moment:
|
|
177
|
+
// Compute bias-corrected second moment: v_hat_t = v_t / (1 - beta2^t)
|
|
178
178
|
const correctedVelocity = velocityBuffer.div(biasCorrection2);
|
|
179
|
-
// Update parameters:
|
|
179
|
+
// Update parameters: theta_t = theta_{t-1} - alpha * m_hat_t / (sqrt(v_hat_t) + epsilon)
|
|
180
180
|
const denom = correctedVelocity.sqrt().add(this.eps);
|
|
181
181
|
const stepSize = correctedMomentum.div(denom).mul(this.lr);
|
|
182
182
|
const newParam = detachedParam.sub(stepSize);
|