catniff 0.6.3 → 0.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -76,9 +76,9 @@ optim.step();
76
76
  console.log("Updated weight:", w.data); // Should move toward 3.0
77
77
  ```
78
78
 
79
- ## Neural networks
79
+ ## Neural networks & Deep learning
80
80
 
81
- There are built-in neural network constructs in Catniff as well:
81
+ There are built-in neural network constructs in Catniff as well, from simple prebuilt nn layers:
82
82
  ```js
83
83
  const { Tensor, nn } = require("catniff");
84
84
 
@@ -102,6 +102,24 @@ gruCell.forward(b, c);
102
102
  lstmCell.forward(b, c, c);
103
103
  ```
104
104
 
105
+ to more advanced constructs like normalization, embedding, and attention:
106
+ ```js
107
+ // 1. Embedding: tokens -> vectors
108
+ const embedding = new nn.Embedding(100, 64);
109
+ const tokens = new Tensor([[1, 5, 23], [8, 2, 15]]);
110
+ const embedded = embedding.forward(tokens);
111
+
112
+ // 2. Self-Attention
113
+ const attention = new nn.MultiheadAttention(64, 8, 0.1);
114
+ const [output, weights] = attention.forward(embedded, embedded, embedded);
115
+
116
+ // 3. Layer Normalization
117
+ const layerNorm = new nn.LayerNorm(64);
118
+ const normalized = layerNorm.forward(output);
119
+
120
+ console.log(normalized.val());
121
+ ```
122
+
105
123
  And it can still do much more, check out the docs and examples below for more information.
106
124
 
107
125
  ## Documentation
package/dist/core.d.ts CHANGED
@@ -182,6 +182,9 @@ export declare class Tensor {
182
182
  mv(other: TensorValue | Tensor): Tensor;
183
183
  matmul(other: TensorValue | Tensor): Tensor;
184
184
  dropout(rate: number): Tensor;
185
+ triu(diagonal?: number): Tensor;
186
+ tril(diagonal?: number): Tensor;
187
+ maskedFill(mask: Tensor | TensorValue, value: number): Tensor;
185
188
  static full(shape: readonly number[], num: number, options?: TensorOptions): Tensor;
186
189
  static fullLike(tensor: Tensor, num: number, options?: TensorOptions): Tensor;
187
190
  static ones(shape?: readonly number[], options?: TensorOptions): Tensor;
@@ -194,6 +197,7 @@ export declare class Tensor {
194
197
  static randnLike(tensor: Tensor, options?: TensorOptions): Tensor;
195
198
  static randint(shape: readonly number[], low: number, high: number, options?: TensorOptions): Tensor;
196
199
  static randintLike(tensor: Tensor, low: number, high: number, options?: TensorOptions): Tensor;
200
+ static randperm(n: number, options?: TensorOptions): Tensor;
197
201
  static normal(shape: number[], mean: number, stdDev: number, options?: TensorOptions): Tensor;
198
202
  static uniform(shape: number[], low: number, high: number, options?: TensorOptions): Tensor;
199
203
  static arange(start: number, stop?: number, step?: number, options?: TensorOptions): Tensor;
package/dist/core.js CHANGED
@@ -1417,6 +1417,59 @@ class Tensor {
1417
1417
  const mask = uniform.lt(keepRate);
1418
1418
  return this.mul(mask).div(keepRate);
1419
1419
  }
1420
+ // Get the upper triangular part with respect to main diagonal
1421
+ triu(diagonal = 0) {
1422
+ if (this.shape.length < 2) {
1423
+ throw new Error("triu requires at least 2 dimensions");
1424
+ }
1425
+ const maskShape = this.shape.slice(-2);
1426
+ const maskStrides = Tensor.getStrides(maskShape);
1427
+ const maskSize = Tensor.shapeToSize(maskShape);
1428
+ const maskValue = new Array(maskSize).fill(1);
1429
+ const [rows, cols] = maskShape;
1430
+ for (let i = 0; i < rows; i++) {
1431
+ const maxJ = Math.min(cols, i + diagonal);
1432
+ for (let j = 0; j < maxJ; j++) {
1433
+ maskValue[i * maskStrides[0] + j * maskStrides[1]] = 0;
1434
+ }
1435
+ }
1436
+ const mask = new Tensor(maskValue, {
1437
+ shape: maskShape,
1438
+ strides: maskStrides,
1439
+ numel: maskSize,
1440
+ device: this.device
1441
+ });
1442
+ return this.mul(mask);
1443
+ }
1444
+ // Get the lower triangular part with respect to main diagonal
1445
+ tril(diagonal = 0) {
1446
+ if (this.shape.length < 2) {
1447
+ throw new Error("triu requires at least 2 dimensions");
1448
+ }
1449
+ const maskShape = this.shape.slice(-2);
1450
+ const maskStrides = Tensor.getStrides(maskShape);
1451
+ const maskSize = Tensor.shapeToSize(maskShape);
1452
+ const maskValue = new Array(maskSize).fill(0);
1453
+ const [rows, cols] = maskShape;
1454
+ for (let i = 0; i < rows; i++) {
1455
+ const maxJ = Math.min(cols, i + diagonal + 1);
1456
+ for (let j = 0; j < maxJ; j++) {
1457
+ maskValue[i * maskStrides[0] + j * maskStrides[1]] = 1;
1458
+ }
1459
+ }
1460
+ const mask = new Tensor(maskValue, {
1461
+ shape: maskShape,
1462
+ strides: maskStrides,
1463
+ numel: maskSize,
1464
+ device: this.device
1465
+ });
1466
+ return this.mul(mask);
1467
+ }
1468
+ // Fill specific positions of this tensor with a value through a mask
1469
+ maskedFill(mask, value) {
1470
+ mask = this.handleOther(mask);
1471
+ return this.mul(mask.logicalNot()).add(mask.mul(value));
1472
+ }
1420
1473
  // Utility to create a new tensor filled with a number
1421
1474
  static full(shape, num, options = {}) {
1422
1475
  if (shape.length === 0)
@@ -1552,6 +1605,15 @@ class Tensor {
1552
1605
  ...options
1553
1606
  });
1554
1607
  }
1608
+ // Utility to create a new tensor filled with integers from 0 to n, randomly shuffled
1609
+ static randperm(n, options = {}) {
1610
+ const outputValue = new Array(n);
1611
+ for (let i = 0; i < n; i++) {
1612
+ outputValue[i] = i;
1613
+ }
1614
+ (0, utils_1.fyShuffle)(outputValue);
1615
+ return new Tensor(outputValue, { shape: [n], numel: n, ...options });
1616
+ }
1555
1617
  // Utility to create a new tensor filled with a random number with normal distribution of custom mean and stddev
1556
1618
  static normal(shape, mean, stdDev, options = {}) {
1557
1619
  if (shape.length === 0)
package/dist/nn.d.ts CHANGED
@@ -62,6 +62,18 @@ declare class Embedding {
62
62
  constructor(numEmbeddings: number, embeddingDim: number, device: string);
63
63
  forward(input: Tensor | TensorValue): Tensor;
64
64
  }
65
+ declare class MultiheadAttention {
66
+ qProjection: Linear;
67
+ kProjection: Linear;
68
+ vProjection: Linear;
69
+ oProjection: Linear;
70
+ embedDim: number;
71
+ numHeads: number;
72
+ headDim: number;
73
+ dropout: number;
74
+ constructor(embedDim: number, numHeads: number, dropout?: number, bias?: boolean, device?: string);
75
+ forward(query: Tensor, key: Tensor, value: Tensor, needWeights?: boolean, attnMask?: Tensor, averageAttnWeights?: boolean): [Tensor, Tensor | undefined];
76
+ }
65
77
  export interface StateDict {
66
78
  [key: string]: any;
67
79
  }
@@ -72,6 +84,7 @@ export declare const nn: {
72
84
  LSTMCell: typeof LSTMCell;
73
85
  LayerNorm: typeof LayerNorm;
74
86
  Embedding: typeof Embedding;
87
+ MultiheadAttention: typeof MultiheadAttention;
75
88
  state: {
76
89
  getParameters(model: any, visited?: WeakSet<object>): Tensor[];
77
90
  moveParameters(model: any, device: string): void;
package/dist/nn.js CHANGED
@@ -197,6 +197,57 @@ class Embedding {
197
197
  return this.weight.index(input);
198
198
  }
199
199
  }
200
+ class MultiheadAttention {
201
+ qProjection;
202
+ kProjection;
203
+ vProjection;
204
+ oProjection;
205
+ embedDim;
206
+ numHeads;
207
+ headDim;
208
+ dropout;
209
+ constructor(embedDim, numHeads, dropout = 0, bias = true, device) {
210
+ this.qProjection = new exports.nn.Linear(embedDim, embedDim, bias, device);
211
+ this.kProjection = new exports.nn.Linear(embedDim, embedDim, bias, device);
212
+ this.vProjection = new exports.nn.Linear(embedDim, embedDim, bias, device);
213
+ this.oProjection = new exports.nn.Linear(embedDim, embedDim, bias, device);
214
+ this.embedDim = embedDim;
215
+ this.numHeads = numHeads;
216
+ this.headDim = Math.floor(embedDim / numHeads);
217
+ this.dropout = dropout;
218
+ }
219
+ forward(query, key, value, needWeights = true, attnMask, averageAttnWeights = true) {
220
+ // Batch-first
221
+ const [batchSize, targetLen, embedDim] = query.shape;
222
+ const sourceLen = key.shape[1];
223
+ let Q = this.qProjection.forward(query); // (batchSize, targetLen, embedDim)
224
+ let K = this.kProjection.forward(key); // (batchSize, sourceLen, embedDim)
225
+ let V = this.vProjection.forward(value); // (batchSize, sourceLen, embedDim)
226
+ // (batchSize, numHeads, targetLen/sourceLen, headDim)
227
+ Q = Q.reshape([batchSize, targetLen, this.numHeads, this.headDim]).transpose(1, 2);
228
+ K = K.reshape([batchSize, sourceLen, this.numHeads, this.headDim]).transpose(1, 2);
229
+ V = V.reshape([batchSize, sourceLen, this.numHeads, this.headDim]).transpose(1, 2);
230
+ // Attention scores
231
+ let scores = Q.matmul(K.transpose(-2, -1)).div(Math.sqrt(this.headDim));
232
+ // Apply attention mask if specified
233
+ if (attnMask) {
234
+ scores = scores.maskedFill(attnMask, -Infinity);
235
+ }
236
+ // Calculate attention weights
237
+ let attnWeights = scores.softmax().dropout(this.dropout);
238
+ // Apply attention to values
239
+ let attnOutput = attnWeights.matmul(V); // (batchSize, numHeads, targetLen, headDim)
240
+ // (batchSize, targetLen, embedDim)
241
+ attnOutput = attnOutput.transpose(1, 2).reshape([batchSize, targetLen, embedDim]);
242
+ // Output
243
+ const output = this.oProjection.forward(attnOutput);
244
+ // Average weights if needed
245
+ if (averageAttnWeights) {
246
+ attnWeights = attnWeights.mean(1);
247
+ }
248
+ return [output, needWeights ? attnWeights : undefined];
249
+ }
250
+ }
200
251
  const state = {
201
252
  getParameters(model, visited = new WeakSet()) {
202
253
  if (visited.has(model))
@@ -266,5 +317,6 @@ exports.nn = {
266
317
  LSTMCell,
267
318
  LayerNorm,
268
319
  Embedding,
320
+ MultiheadAttention,
269
321
  state
270
322
  };
package/dist/utils.d.ts CHANGED
@@ -4,3 +4,4 @@ export declare function erfinv(x: number): number;
4
4
  export declare function randUniform(low?: number, high?: number): number;
5
5
  export declare function randNormal(mean?: number, stdDev?: number): number;
6
6
  export declare function randInt(low: number, high: number): number;
7
+ export declare function fyShuffle(array: any[]): void;
package/dist/utils.js CHANGED
@@ -6,6 +6,7 @@ exports.erfinv = erfinv;
6
6
  exports.randUniform = randUniform;
7
7
  exports.randNormal = randNormal;
8
8
  exports.randInt = randInt;
9
+ exports.fyShuffle = fyShuffle;
9
10
  // Error function using Abramowitz and Stegun approximation
10
11
  function erf(x) {
11
12
  const a1 = 0.254829592;
@@ -36,15 +37,26 @@ function erfinv(x) {
36
37
  const sign = x >= 0 ? 1 : -1;
37
38
  return sign * Math.sqrt(-part1 + Math.sqrt(part1 * part1 - part2));
38
39
  }
40
+ // Generate a random number with uniform distribution
39
41
  function randUniform(low = 0, high = 1) {
40
42
  return Math.random() * (high - low) + low;
41
43
  }
44
+ // Generate a random number with normal distribution
42
45
  function randNormal(mean = 0, stdDev = 1) {
43
46
  const u = 1 - Math.random();
44
47
  const v = 1 - Math.random();
45
48
  const z = Math.sqrt(-2.0 * Math.log(u)) * Math.cos(2.0 * Math.PI * v);
46
49
  return z * stdDev + mean;
47
50
  }
51
+ // Generate a random integer
48
52
  function randInt(low, high) {
49
53
  return Math.floor(Math.random() * (high - low) + low);
50
54
  }
55
+ // Randomly shuffle an array with fisher-yates algorithm
56
+ function fyShuffle(array) {
57
+ for (let i = array.length - 1; i > 0; i--) {
58
+ const j = Math.floor(Math.random() * (i + 1));
59
+ [array[i], array[j]] = [array[j], array[i]];
60
+ }
61
+ }
62
+ ;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "catniff",
3
- "version": "0.6.3",
3
+ "version": "0.6.5",
4
4
  "description": "A small Torch-like deep learning framework for Javascript",
5
5
  "main": "index.js",
6
6
  "scripts": {