catniff 0.6.3 → 0.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -2
- package/dist/core.d.ts +4 -0
- package/dist/core.js +62 -0
- package/dist/nn.d.ts +13 -0
- package/dist/nn.js +52 -0
- package/dist/utils.d.ts +1 -0
- package/dist/utils.js +12 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -76,9 +76,9 @@ optim.step();
|
|
|
76
76
|
console.log("Updated weight:", w.data); // Should move toward 3.0
|
|
77
77
|
```
|
|
78
78
|
|
|
79
|
-
## Neural networks
|
|
79
|
+
## Neural networks & Deep learning
|
|
80
80
|
|
|
81
|
-
There are built-in neural network constructs in Catniff as well:
|
|
81
|
+
There are built-in neural network constructs in Catniff as well, from simple prebuilt nn layers:
|
|
82
82
|
```js
|
|
83
83
|
const { Tensor, nn } = require("catniff");
|
|
84
84
|
|
|
@@ -102,6 +102,24 @@ gruCell.forward(b, c);
|
|
|
102
102
|
lstmCell.forward(b, c, c);
|
|
103
103
|
```
|
|
104
104
|
|
|
105
|
+
to more advanced constructs like normalization, embedding, and attention:
|
|
106
|
+
```js
|
|
107
|
+
// 1. Embedding: tokens -> vectors
|
|
108
|
+
const embedding = new nn.Embedding(100, 64);
|
|
109
|
+
const tokens = new Tensor([[1, 5, 23], [8, 2, 15]]);
|
|
110
|
+
const embedded = embedding.forward(tokens);
|
|
111
|
+
|
|
112
|
+
// 2. Self-Attention
|
|
113
|
+
const attention = new nn.MultiheadAttention(64, 8, 0.1);
|
|
114
|
+
const [output, weights] = attention.forward(embedded, embedded, embedded);
|
|
115
|
+
|
|
116
|
+
// 3. Layer Normalization
|
|
117
|
+
const layerNorm = new nn.LayerNorm(64);
|
|
118
|
+
const normalized = layerNorm.forward(output);
|
|
119
|
+
|
|
120
|
+
console.log(normalized.val());
|
|
121
|
+
```
|
|
122
|
+
|
|
105
123
|
And it can still do much more, check out the docs and examples below for more information.
|
|
106
124
|
|
|
107
125
|
## Documentation
|
package/dist/core.d.ts
CHANGED
|
@@ -182,6 +182,9 @@ export declare class Tensor {
|
|
|
182
182
|
mv(other: TensorValue | Tensor): Tensor;
|
|
183
183
|
matmul(other: TensorValue | Tensor): Tensor;
|
|
184
184
|
dropout(rate: number): Tensor;
|
|
185
|
+
triu(diagonal?: number): Tensor;
|
|
186
|
+
tril(diagonal?: number): Tensor;
|
|
187
|
+
maskedFill(mask: Tensor | TensorValue, value: number): Tensor;
|
|
185
188
|
static full(shape: readonly number[], num: number, options?: TensorOptions): Tensor;
|
|
186
189
|
static fullLike(tensor: Tensor, num: number, options?: TensorOptions): Tensor;
|
|
187
190
|
static ones(shape?: readonly number[], options?: TensorOptions): Tensor;
|
|
@@ -194,6 +197,7 @@ export declare class Tensor {
|
|
|
194
197
|
static randnLike(tensor: Tensor, options?: TensorOptions): Tensor;
|
|
195
198
|
static randint(shape: readonly number[], low: number, high: number, options?: TensorOptions): Tensor;
|
|
196
199
|
static randintLike(tensor: Tensor, low: number, high: number, options?: TensorOptions): Tensor;
|
|
200
|
+
static randperm(n: number, options?: TensorOptions): Tensor;
|
|
197
201
|
static normal(shape: number[], mean: number, stdDev: number, options?: TensorOptions): Tensor;
|
|
198
202
|
static uniform(shape: number[], low: number, high: number, options?: TensorOptions): Tensor;
|
|
199
203
|
static arange(start: number, stop?: number, step?: number, options?: TensorOptions): Tensor;
|
package/dist/core.js
CHANGED
|
@@ -1417,6 +1417,59 @@ class Tensor {
|
|
|
1417
1417
|
const mask = uniform.lt(keepRate);
|
|
1418
1418
|
return this.mul(mask).div(keepRate);
|
|
1419
1419
|
}
|
|
1420
|
+
// Get the upper triangular part with respect to main diagonal
|
|
1421
|
+
triu(diagonal = 0) {
|
|
1422
|
+
if (this.shape.length < 2) {
|
|
1423
|
+
throw new Error("triu requires at least 2 dimensions");
|
|
1424
|
+
}
|
|
1425
|
+
const maskShape = this.shape.slice(-2);
|
|
1426
|
+
const maskStrides = Tensor.getStrides(maskShape);
|
|
1427
|
+
const maskSize = Tensor.shapeToSize(maskShape);
|
|
1428
|
+
const maskValue = new Array(maskSize).fill(1);
|
|
1429
|
+
const [rows, cols] = maskShape;
|
|
1430
|
+
for (let i = 0; i < rows; i++) {
|
|
1431
|
+
const maxJ = Math.min(cols, i + diagonal);
|
|
1432
|
+
for (let j = 0; j < maxJ; j++) {
|
|
1433
|
+
maskValue[i * maskStrides[0] + j * maskStrides[1]] = 0;
|
|
1434
|
+
}
|
|
1435
|
+
}
|
|
1436
|
+
const mask = new Tensor(maskValue, {
|
|
1437
|
+
shape: maskShape,
|
|
1438
|
+
strides: maskStrides,
|
|
1439
|
+
numel: maskSize,
|
|
1440
|
+
device: this.device
|
|
1441
|
+
});
|
|
1442
|
+
return this.mul(mask);
|
|
1443
|
+
}
|
|
1444
|
+
// Get the lower triangular part with respect to main diagonal
|
|
1445
|
+
tril(diagonal = 0) {
|
|
1446
|
+
if (this.shape.length < 2) {
|
|
1447
|
+
throw new Error("triu requires at least 2 dimensions");
|
|
1448
|
+
}
|
|
1449
|
+
const maskShape = this.shape.slice(-2);
|
|
1450
|
+
const maskStrides = Tensor.getStrides(maskShape);
|
|
1451
|
+
const maskSize = Tensor.shapeToSize(maskShape);
|
|
1452
|
+
const maskValue = new Array(maskSize).fill(0);
|
|
1453
|
+
const [rows, cols] = maskShape;
|
|
1454
|
+
for (let i = 0; i < rows; i++) {
|
|
1455
|
+
const maxJ = Math.min(cols, i + diagonal + 1);
|
|
1456
|
+
for (let j = 0; j < maxJ; j++) {
|
|
1457
|
+
maskValue[i * maskStrides[0] + j * maskStrides[1]] = 1;
|
|
1458
|
+
}
|
|
1459
|
+
}
|
|
1460
|
+
const mask = new Tensor(maskValue, {
|
|
1461
|
+
shape: maskShape,
|
|
1462
|
+
strides: maskStrides,
|
|
1463
|
+
numel: maskSize,
|
|
1464
|
+
device: this.device
|
|
1465
|
+
});
|
|
1466
|
+
return this.mul(mask);
|
|
1467
|
+
}
|
|
1468
|
+
// Fill specific positions of this tensor with a value through a mask
|
|
1469
|
+
maskedFill(mask, value) {
|
|
1470
|
+
mask = this.handleOther(mask);
|
|
1471
|
+
return this.mul(mask.logicalNot()).add(mask.mul(value));
|
|
1472
|
+
}
|
|
1420
1473
|
// Utility to create a new tensor filled with a number
|
|
1421
1474
|
static full(shape, num, options = {}) {
|
|
1422
1475
|
if (shape.length === 0)
|
|
@@ -1552,6 +1605,15 @@ class Tensor {
|
|
|
1552
1605
|
...options
|
|
1553
1606
|
});
|
|
1554
1607
|
}
|
|
1608
|
+
// Utility to create a new tensor filled with integers from 0 to n, randomly shuffled
|
|
1609
|
+
static randperm(n, options = {}) {
|
|
1610
|
+
const outputValue = new Array(n);
|
|
1611
|
+
for (let i = 0; i < n; i++) {
|
|
1612
|
+
outputValue[i] = i;
|
|
1613
|
+
}
|
|
1614
|
+
(0, utils_1.fyShuffle)(outputValue);
|
|
1615
|
+
return new Tensor(outputValue, { shape: [n], numel: n, ...options });
|
|
1616
|
+
}
|
|
1555
1617
|
// Utility to create a new tensor filled with a random number with normal distribution of custom mean and stddev
|
|
1556
1618
|
static normal(shape, mean, stdDev, options = {}) {
|
|
1557
1619
|
if (shape.length === 0)
|
package/dist/nn.d.ts
CHANGED
|
@@ -62,6 +62,18 @@ declare class Embedding {
|
|
|
62
62
|
constructor(numEmbeddings: number, embeddingDim: number, device: string);
|
|
63
63
|
forward(input: Tensor | TensorValue): Tensor;
|
|
64
64
|
}
|
|
65
|
+
declare class MultiheadAttention {
|
|
66
|
+
qProjection: Linear;
|
|
67
|
+
kProjection: Linear;
|
|
68
|
+
vProjection: Linear;
|
|
69
|
+
oProjection: Linear;
|
|
70
|
+
embedDim: number;
|
|
71
|
+
numHeads: number;
|
|
72
|
+
headDim: number;
|
|
73
|
+
dropout: number;
|
|
74
|
+
constructor(embedDim: number, numHeads: number, dropout?: number, bias?: boolean, device?: string);
|
|
75
|
+
forward(query: Tensor, key: Tensor, value: Tensor, needWeights?: boolean, attnMask?: Tensor, averageAttnWeights?: boolean): [Tensor, Tensor | undefined];
|
|
76
|
+
}
|
|
65
77
|
export interface StateDict {
|
|
66
78
|
[key: string]: any;
|
|
67
79
|
}
|
|
@@ -72,6 +84,7 @@ export declare const nn: {
|
|
|
72
84
|
LSTMCell: typeof LSTMCell;
|
|
73
85
|
LayerNorm: typeof LayerNorm;
|
|
74
86
|
Embedding: typeof Embedding;
|
|
87
|
+
MultiheadAttention: typeof MultiheadAttention;
|
|
75
88
|
state: {
|
|
76
89
|
getParameters(model: any, visited?: WeakSet<object>): Tensor[];
|
|
77
90
|
moveParameters(model: any, device: string): void;
|
package/dist/nn.js
CHANGED
|
@@ -197,6 +197,57 @@ class Embedding {
|
|
|
197
197
|
return this.weight.index(input);
|
|
198
198
|
}
|
|
199
199
|
}
|
|
200
|
+
class MultiheadAttention {
|
|
201
|
+
qProjection;
|
|
202
|
+
kProjection;
|
|
203
|
+
vProjection;
|
|
204
|
+
oProjection;
|
|
205
|
+
embedDim;
|
|
206
|
+
numHeads;
|
|
207
|
+
headDim;
|
|
208
|
+
dropout;
|
|
209
|
+
constructor(embedDim, numHeads, dropout = 0, bias = true, device) {
|
|
210
|
+
this.qProjection = new exports.nn.Linear(embedDim, embedDim, bias, device);
|
|
211
|
+
this.kProjection = new exports.nn.Linear(embedDim, embedDim, bias, device);
|
|
212
|
+
this.vProjection = new exports.nn.Linear(embedDim, embedDim, bias, device);
|
|
213
|
+
this.oProjection = new exports.nn.Linear(embedDim, embedDim, bias, device);
|
|
214
|
+
this.embedDim = embedDim;
|
|
215
|
+
this.numHeads = numHeads;
|
|
216
|
+
this.headDim = Math.floor(embedDim / numHeads);
|
|
217
|
+
this.dropout = dropout;
|
|
218
|
+
}
|
|
219
|
+
forward(query, key, value, needWeights = true, attnMask, averageAttnWeights = true) {
|
|
220
|
+
// Batch-first
|
|
221
|
+
const [batchSize, targetLen, embedDim] = query.shape;
|
|
222
|
+
const sourceLen = key.shape[1];
|
|
223
|
+
let Q = this.qProjection.forward(query); // (batchSize, targetLen, embedDim)
|
|
224
|
+
let K = this.kProjection.forward(key); // (batchSize, sourceLen, embedDim)
|
|
225
|
+
let V = this.vProjection.forward(value); // (batchSize, sourceLen, embedDim)
|
|
226
|
+
// (batchSize, numHeads, targetLen/sourceLen, headDim)
|
|
227
|
+
Q = Q.reshape([batchSize, targetLen, this.numHeads, this.headDim]).transpose(1, 2);
|
|
228
|
+
K = K.reshape([batchSize, sourceLen, this.numHeads, this.headDim]).transpose(1, 2);
|
|
229
|
+
V = V.reshape([batchSize, sourceLen, this.numHeads, this.headDim]).transpose(1, 2);
|
|
230
|
+
// Attention scores
|
|
231
|
+
let scores = Q.matmul(K.transpose(-2, -1)).div(Math.sqrt(this.headDim));
|
|
232
|
+
// Apply attention mask if specified
|
|
233
|
+
if (attnMask) {
|
|
234
|
+
scores = scores.maskedFill(attnMask, -Infinity);
|
|
235
|
+
}
|
|
236
|
+
// Calculate attention weights
|
|
237
|
+
let attnWeights = scores.softmax().dropout(this.dropout);
|
|
238
|
+
// Apply attention to values
|
|
239
|
+
let attnOutput = attnWeights.matmul(V); // (batchSize, numHeads, targetLen, headDim)
|
|
240
|
+
// (batchSize, targetLen, embedDim)
|
|
241
|
+
attnOutput = attnOutput.transpose(1, 2).reshape([batchSize, targetLen, embedDim]);
|
|
242
|
+
// Output
|
|
243
|
+
const output = this.oProjection.forward(attnOutput);
|
|
244
|
+
// Average weights if needed
|
|
245
|
+
if (averageAttnWeights) {
|
|
246
|
+
attnWeights = attnWeights.mean(1);
|
|
247
|
+
}
|
|
248
|
+
return [output, needWeights ? attnWeights : undefined];
|
|
249
|
+
}
|
|
250
|
+
}
|
|
200
251
|
const state = {
|
|
201
252
|
getParameters(model, visited = new WeakSet()) {
|
|
202
253
|
if (visited.has(model))
|
|
@@ -266,5 +317,6 @@ exports.nn = {
|
|
|
266
317
|
LSTMCell,
|
|
267
318
|
LayerNorm,
|
|
268
319
|
Embedding,
|
|
320
|
+
MultiheadAttention,
|
|
269
321
|
state
|
|
270
322
|
};
|
package/dist/utils.d.ts
CHANGED
|
@@ -4,3 +4,4 @@ export declare function erfinv(x: number): number;
|
|
|
4
4
|
export declare function randUniform(low?: number, high?: number): number;
|
|
5
5
|
export declare function randNormal(mean?: number, stdDev?: number): number;
|
|
6
6
|
export declare function randInt(low: number, high: number): number;
|
|
7
|
+
export declare function fyShuffle(array: any[]): void;
|
package/dist/utils.js
CHANGED
|
@@ -6,6 +6,7 @@ exports.erfinv = erfinv;
|
|
|
6
6
|
exports.randUniform = randUniform;
|
|
7
7
|
exports.randNormal = randNormal;
|
|
8
8
|
exports.randInt = randInt;
|
|
9
|
+
exports.fyShuffle = fyShuffle;
|
|
9
10
|
// Error function using Abramowitz and Stegun approximation
|
|
10
11
|
function erf(x) {
|
|
11
12
|
const a1 = 0.254829592;
|
|
@@ -36,15 +37,26 @@ function erfinv(x) {
|
|
|
36
37
|
const sign = x >= 0 ? 1 : -1;
|
|
37
38
|
return sign * Math.sqrt(-part1 + Math.sqrt(part1 * part1 - part2));
|
|
38
39
|
}
|
|
40
|
+
// Generate a random number with uniform distribution
|
|
39
41
|
function randUniform(low = 0, high = 1) {
|
|
40
42
|
return Math.random() * (high - low) + low;
|
|
41
43
|
}
|
|
44
|
+
// Generate a random number with normal distribution
|
|
42
45
|
function randNormal(mean = 0, stdDev = 1) {
|
|
43
46
|
const u = 1 - Math.random();
|
|
44
47
|
const v = 1 - Math.random();
|
|
45
48
|
const z = Math.sqrt(-2.0 * Math.log(u)) * Math.cos(2.0 * Math.PI * v);
|
|
46
49
|
return z * stdDev + mean;
|
|
47
50
|
}
|
|
51
|
+
// Generate a random integer
|
|
48
52
|
function randInt(low, high) {
|
|
49
53
|
return Math.floor(Math.random() * (high - low) + low);
|
|
50
54
|
}
|
|
55
|
+
// Randomly shuffle an array with fisher-yates algorithm
|
|
56
|
+
function fyShuffle(array) {
|
|
57
|
+
for (let i = array.length - 1; i > 0; i--) {
|
|
58
|
+
const j = Math.floor(Math.random() * (i + 1));
|
|
59
|
+
[array[i], array[j]] = [array[j], array[i]];
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
;
|