catniff 0.5.5 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -120,10 +120,11 @@ All available APIs are in [`./src/`](./src/) if you want to dig deeper.
120
120
 
121
121
  ## Todos
122
122
 
123
- * Bug fixes.
124
- * More tensor ops.
125
- * GPU acceleration.
123
+ * More general tensor ops.
126
124
  * More general neural net APIs.
125
+ * GPU acceleration.
126
+ * Comprehensive caching.
127
+ * Bug fixes.
127
128
  * More detailed documentation.
128
129
  * Code refactoring.
129
130
  * Proper tests.
package/dist/core.d.ts CHANGED
@@ -40,6 +40,9 @@ export declare class Tensor {
40
40
  elementWiseSelfDAG(op: (a: number) => number, thisGrad?: (self: Tensor, outGrad: Tensor) => Tensor): Tensor;
41
41
  static forceTensor(value: TensorValue | Tensor): Tensor;
42
42
  static addGrad(tensor: Tensor, accumGrad: Tensor): void;
43
+ isContiguous(): boolean;
44
+ contiguous(): Tensor;
45
+ reshape(newShape: readonly number[]): Tensor;
43
46
  squeeze(dims?: number[] | number): Tensor;
44
47
  unsqueeze(dim: number): Tensor;
45
48
  sum(dims?: number[] | number, keepDims?: boolean): Tensor;
@@ -143,6 +146,7 @@ export declare class Tensor {
143
146
  swapaxes: (dim1: number, dim2: number) => Tensor;
144
147
  swapdims: (dim1: number, dim2: number) => Tensor;
145
148
  t(): Tensor;
149
+ permute(dims: number[]): Tensor;
146
150
  dot(other: TensorValue | Tensor): Tensor;
147
151
  mm(other: TensorValue | Tensor): Tensor;
148
152
  bmm(other: TensorValue | Tensor): Tensor;
package/dist/core.js CHANGED
@@ -201,9 +201,9 @@ class Tensor {
201
201
  if (out.requiresGrad) {
202
202
  out.gradFn = () => {
203
203
  // Disable gradient collecting of gradients themselves
204
- const outGrad = out.grad.withGrad(false);
205
- const selfNoGrad = this.withGrad(false);
206
- const otherNoGrad = other.withGrad(false);
204
+ const outGrad = out.grad;
205
+ const selfNoGrad = this.detach();
206
+ const otherNoGrad = other.detach();
207
207
  if (this.requiresGrad)
208
208
  Tensor.addGrad(this, thisGrad(selfNoGrad, otherNoGrad, outGrad));
209
209
  if (other.requiresGrad)
@@ -222,8 +222,8 @@ class Tensor {
222
222
  if (out.requiresGrad) {
223
223
  out.gradFn = () => {
224
224
  // Disable gradient collecting of gradients themselves
225
- const outGrad = out.grad.withGrad(false);
226
- const selfNoGrad = this.withGrad(false);
225
+ const outGrad = out.grad;
226
+ const selfNoGrad = this.detach();
227
227
  if (this.requiresGrad)
228
228
  Tensor.addGrad(this, thisGrad(selfNoGrad, outGrad));
229
229
  };
@@ -261,6 +261,64 @@ class Tensor {
261
261
  tensor.grad = tensor.grad.add(squeezedGrad);
262
262
  }
263
263
  }
264
+ // Contiguity-related ops
265
+ isContiguous() {
266
+ const expectedStrides = Tensor.getStrides(this.shape);
267
+ if (expectedStrides.length !== this.strides.length) {
268
+ return false;
269
+ }
270
+ for (let i = 0; i < this.strides.length; i++) {
271
+ if (this.strides[i] !== expectedStrides[i]) {
272
+ return false;
273
+ }
274
+ }
275
+ return true;
276
+ }
277
+ contiguous() {
278
+ // Check if scalar
279
+ if (typeof this.value === "number")
280
+ return this;
281
+ // Check if already contiguous
282
+ if (this.isContiguous())
283
+ return this;
284
+ const outputStrides = Tensor.getStrides(this.shape);
285
+ const outputSize = Tensor.shapeToSize(this.shape);
286
+ const outputValue = new Array(outputSize);
287
+ for (let index = 0; index < outputSize; index++) {
288
+ const outputCoords = Tensor.indexToCoords(index, outputStrides);
289
+ const originalIndex = Tensor.coordsToIndex(outputCoords, this.strides);
290
+ outputValue[index] = this.value[originalIndex];
291
+ }
292
+ const out = new Tensor(outputValue, { shape: this.shape, strides: outputStrides });
293
+ // Gradient flow back to the original tensor
294
+ if (this.requiresGrad) {
295
+ out.requiresGrad = true;
296
+ out.children.push(this);
297
+ out.gradFn = () => {
298
+ Tensor.addGrad(this, out.grad);
299
+ };
300
+ }
301
+ return out;
302
+ }
303
+ reshape(newShape) {
304
+ // Verify shape size
305
+ const originalSize = Tensor.shapeToSize(this.shape);
306
+ const outputSize = Tensor.shapeToSize(newShape);
307
+ if (originalSize !== outputSize) {
308
+ throw new Error("Cannot reshape: incompatible sizes");
309
+ }
310
+ const outputStrides = Tensor.getStrides(newShape);
311
+ const out = new Tensor(this.contiguous().value, { shape: newShape, strides: outputStrides });
312
+ // Gradient reshaped and flow back to the original tensor
313
+ if (this.requiresGrad) {
314
+ out.requiresGrad = true;
315
+ out.children.push(this);
316
+ out.gradFn = () => {
317
+ Tensor.addGrad(this, out.grad.reshape(this.shape));
318
+ };
319
+ }
320
+ return out;
321
+ }
264
322
  // Tensor squeeze
265
323
  squeeze(dims) {
266
324
  if (typeof this.value === "number")
@@ -302,7 +360,7 @@ class Tensor {
302
360
  out.requiresGrad = true;
303
361
  out.children.push(this);
304
362
  out.gradFn = () => {
305
- let restoredGrad = out.grad.withGrad(false);
363
+ let restoredGrad = out.grad;
306
364
  for (let i = dims.length - 1; i >= 0; i--) {
307
365
  restoredGrad = restoredGrad.unsqueeze(dims[i]);
308
366
  }
@@ -338,7 +396,7 @@ class Tensor {
338
396
  out.requiresGrad = true;
339
397
  out.children.push(this);
340
398
  out.gradFn = () => {
341
- Tensor.addGrad(this, out.grad.withGrad(false).squeeze(dim));
399
+ Tensor.addGrad(this, out.grad.squeeze(dim));
342
400
  };
343
401
  }
344
402
  return out;
@@ -397,7 +455,7 @@ class Tensor {
397
455
  out.children.push(this);
398
456
  out.gradFn = () => {
399
457
  const localGrad = new Tensor(gradValue, { shape: gradShape, strides: gradStrides });
400
- Tensor.addGrad(this, out.grad.withGrad(false).mul(localGrad));
458
+ Tensor.addGrad(this, out.grad.mul(localGrad));
401
459
  };
402
460
  }
403
461
  return keepDims ? out : out.squeeze(dims);
@@ -454,7 +512,7 @@ class Tensor {
454
512
  gradValue[realFlatIndex] = outputValue[outFlatIndex] / this.value[realFlatIndex];
455
513
  }
456
514
  const localGrad = new Tensor(gradValue, { shape: gradShape, strides: gradStrides });
457
- Tensor.addGrad(this, out.grad.withGrad(false).mul(localGrad));
515
+ Tensor.addGrad(this, out.grad.mul(localGrad));
458
516
  };
459
517
  }
460
518
  return keepDims ? out : out.squeeze(dims);
@@ -518,7 +576,7 @@ class Tensor {
518
576
  gradValue[realFlatIndex] = 1 / outputFeeders[outFlatIndex];
519
577
  }
520
578
  const localGrad = new Tensor(gradValue, { shape: gradShape, strides: gradStrides });
521
- Tensor.addGrad(this, out.grad.withGrad(false).mul(localGrad));
579
+ Tensor.addGrad(this, out.grad.mul(localGrad));
522
580
  };
523
581
  }
524
582
  return keepDims ? out : out.squeeze(dims);
@@ -588,7 +646,7 @@ class Tensor {
588
646
  gradValue[realFlatIndex] = outputValue[outFlatIndex] === originalValue[realFlatIndex] ? 1 / shareCounts[outFlatIndex] : 0;
589
647
  }
590
648
  const localGrad = new Tensor(gradValue, { shape: gradShape, strides: gradStrides });
591
- Tensor.addGrad(this, out.grad.withGrad(false).mul(localGrad));
649
+ Tensor.addGrad(this, out.grad.mul(localGrad));
592
650
  };
593
651
  }
594
652
  return keepDims ? out : out.squeeze(dims);
@@ -658,7 +716,7 @@ class Tensor {
658
716
  gradValue[realFlatIndex] = outputValue[outFlatIndex] === originalValue[realFlatIndex] ? 1 / shareCounts[outFlatIndex] : 0;
659
717
  }
660
718
  const localGrad = new Tensor(gradValue, { shape: gradShape, strides: gradStrides });
661
- Tensor.addGrad(this, out.grad.withGrad(false).mul(localGrad));
719
+ Tensor.addGrad(this, out.grad.mul(localGrad));
662
720
  };
663
721
  }
664
722
  return keepDims ? out : out.squeeze(dims);
@@ -727,8 +785,8 @@ class Tensor {
727
785
  out.requiresGrad = true;
728
786
  out.children.push(this);
729
787
  out.gradFn = () => {
730
- const upstreamGrad = out.grad.withGrad(false);
731
- const softmaxOutput = out.withGrad(false);
788
+ const upstreamGrad = out.grad;
789
+ const softmaxOutput = out.detach();
732
790
  // Compute element-wise product: ∂L/∂σᵢ × σᵢ
733
791
  const gradTimesOutput = upstreamGrad.mul(softmaxOutput);
734
792
  // Sum over softmax dimensions: Σᵢ(∂L/∂σᵢ × σᵢ)
@@ -1096,7 +1154,7 @@ class Tensor {
1096
1154
  if (this.requiresGrad) {
1097
1155
  out.children.push(this);
1098
1156
  out.gradFn = () => {
1099
- Tensor.addGrad(this, out.grad.withGrad(false).transpose(dim1, dim2));
1157
+ Tensor.addGrad(this, out.grad.transpose(dim1, dim2));
1100
1158
  };
1101
1159
  }
1102
1160
  return out;
@@ -1111,6 +1169,39 @@ class Tensor {
1111
1169
  }
1112
1170
  return this.transpose(0, 1);
1113
1171
  }
1172
+ // Permute
1173
+ permute(dims) {
1174
+ if (dims.length !== this.shape.length) {
1175
+ throw new Error("Permutation must specify all dimensions");
1176
+ }
1177
+ // Compute new shape and strides
1178
+ const newShape = new Array(dims.length);
1179
+ const newStrides = new Array(dims.length);
1180
+ for (let index = 0; index < dims.length; index++) {
1181
+ const dim = dims[index];
1182
+ newShape[index] = this.shape[dim];
1183
+ newStrides[index] = this.strides[dim];
1184
+ }
1185
+ const out = new Tensor(this.value, {
1186
+ shape: newShape,
1187
+ strides: newStrides
1188
+ });
1189
+ if (this.requiresGrad) {
1190
+ out.requiresGrad = true;
1191
+ out.children.push(this);
1192
+ out.gradFn = () => {
1193
+ // Compute inverse permutation
1194
+ const inverseAxes = new Array(dims.length);
1195
+ for (let i = 0; i < dims.length; i++) {
1196
+ inverseAxes[dims[i]] = i;
1197
+ }
1198
+ // Permute gradient back to original order
1199
+ const permutedGrad = out.grad.permute(inverseAxes);
1200
+ Tensor.addGrad(this, permutedGrad);
1201
+ };
1202
+ }
1203
+ return out;
1204
+ }
1114
1205
  // 1D tensor dot product
1115
1206
  dot(other) {
1116
1207
  other = Tensor.forceTensor(other);
@@ -1138,9 +1229,9 @@ class Tensor {
1138
1229
  if (out.requiresGrad) {
1139
1230
  out.gradFn = () => {
1140
1231
  // Disable gradient collecting of gradients themselves
1141
- const outGrad = out.grad.withGrad(false);
1142
- const selfNoGrad = this.withGrad(false);
1143
- const otherNoGrad = other.withGrad(false);
1232
+ const outGrad = out.grad;
1233
+ const selfNoGrad = this.detach();
1234
+ const otherNoGrad = other.detach();
1144
1235
  if (this.requiresGrad)
1145
1236
  Tensor.addGrad(this, outGrad.mul(otherNoGrad));
1146
1237
  if (other.requiresGrad)
@@ -1193,9 +1284,9 @@ class Tensor {
1193
1284
  if (out.requiresGrad) {
1194
1285
  out.gradFn = () => {
1195
1286
  // Disable gradient collecting of gradients themselves
1196
- const outGrad = out.grad.withGrad(false);
1197
- const selfNoGrad = this.withGrad(false);
1198
- const otherNoGrad = other.withGrad(false);
1287
+ const outGrad = out.grad;
1288
+ const selfNoGrad = this.detach();
1289
+ const otherNoGrad = other.detach();
1199
1290
  if (this.requiresGrad)
1200
1291
  Tensor.addGrad(this, outGrad.mm(otherNoGrad.t()));
1201
1292
  if (other.requiresGrad)
@@ -1251,9 +1342,9 @@ class Tensor {
1251
1342
  if (out.requiresGrad) {
1252
1343
  out.gradFn = () => {
1253
1344
  // Disable gradient collecting of gradients themselves
1254
- const outGrad = out.grad.withGrad(false);
1255
- const selfNoGrad = this.withGrad(false);
1256
- const otherNoGrad = other.withGrad(false);
1345
+ const outGrad = out.grad;
1346
+ const selfNoGrad = this.detach();
1347
+ const otherNoGrad = other.detach();
1257
1348
  if (this.requiresGrad)
1258
1349
  Tensor.addGrad(this, outGrad.bmm(otherNoGrad.transpose(1, 2)));
1259
1350
  if (other.requiresGrad)
@@ -1350,9 +1441,9 @@ class Tensor {
1350
1441
  if (out.requiresGrad) {
1351
1442
  out.gradFn = () => {
1352
1443
  other = other;
1353
- const outGrad = out.grad.withGrad(false);
1354
- const selfNoGrad = self.withGrad(false);
1355
- const otherNoGrad = other.withGrad(false);
1444
+ const outGrad = out.grad;
1445
+ const selfNoGrad = self.detach();
1446
+ const otherNoGrad = other.detach();
1356
1447
  if (this.requiresGrad)
1357
1448
  Tensor.addGrad(this, outGrad.matmul(otherNoGrad.transpose(lastDim - 1, lastDim)));
1358
1449
  if (other.requiresGrad)
package/dist/nn.d.ts CHANGED
@@ -29,7 +29,7 @@ declare class GRUCell {
29
29
  constructor(inputSize: number, hiddenSize: number, bias?: boolean, device?: string);
30
30
  forward(input: Tensor | TensorValue, hidden: Tensor | TensorValue): Tensor;
31
31
  }
32
- export declare class LSTMCell {
32
+ declare class LSTMCell {
33
33
  weightII: Tensor;
34
34
  weightIF: Tensor;
35
35
  weightIG: Tensor;
@@ -49,6 +49,14 @@ export declare class LSTMCell {
49
49
  constructor(inputSize: number, hiddenSize: number, bias?: boolean, device?: string);
50
50
  forward(input: Tensor | TensorValue, hidden: Tensor | TensorValue, cell: Tensor | TensorValue): [Tensor, Tensor];
51
51
  }
52
+ declare class LayerNorm {
53
+ weight?: Tensor;
54
+ bias?: Tensor;
55
+ eps: number;
56
+ normalizedShape: number[];
57
+ constructor(normalizedShape: number | number[], eps?: number, elementwiseAffine?: boolean, bias?: boolean, device?: string);
58
+ forward(input: Tensor | TensorValue): Tensor;
59
+ }
52
60
  interface StateDict {
53
61
  [key: string]: any;
54
62
  }
@@ -56,6 +64,8 @@ export declare const nn: {
56
64
  Linear: typeof Linear;
57
65
  RNNCell: typeof RNNCell;
58
66
  GRUCell: typeof GRUCell;
67
+ LSTMCell: typeof LSTMCell;
68
+ LayerNorm: typeof LayerNorm;
59
69
  state: {
60
70
  getParameters(model: any, visited?: WeakSet<object>): Tensor[];
61
71
  getStateDict(model: any, prefix?: string, visited?: WeakSet<object>): StateDict;
package/dist/nn.js CHANGED
@@ -1,6 +1,6 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.nn = exports.LSTMCell = void 0;
3
+ exports.nn = void 0;
4
4
  const core_1 = require("./core");
5
5
  function linearTransform(input, weight, bias) {
6
6
  let output = input.matmul(weight.t());
@@ -144,7 +144,51 @@ class LSTMCell {
144
144
  return [h, c];
145
145
  }
146
146
  }
147
- exports.LSTMCell = LSTMCell;
147
+ class LayerNorm {
148
+ weight;
149
+ bias;
150
+ eps;
151
+ normalizedShape;
152
+ constructor(normalizedShape, eps = 1e-5, elementwiseAffine = true, bias = true, device) {
153
+ this.eps = eps;
154
+ this.normalizedShape = Array.isArray(normalizedShape) ? normalizedShape : [normalizedShape];
155
+ if (this.normalizedShape.length === 0) {
156
+ throw new Error("Normalized shape cannot be empty");
157
+ }
158
+ if (elementwiseAffine) {
159
+ this.weight = core_1.Tensor.ones(this.normalizedShape, { requiresGrad: true, device });
160
+ if (bias) {
161
+ this.bias = core_1.Tensor.zeros(this.normalizedShape, { requiresGrad: true, device });
162
+ }
163
+ }
164
+ }
165
+ forward(input) {
166
+ input = core_1.Tensor.forceTensor(input);
167
+ // Normalize over the specified dimensions
168
+ const normalizedDims = this.normalizedShape.length;
169
+ const startDim = input.shape.length - normalizedDims;
170
+ if (startDim < 0) {
171
+ throw new Error("Input does not have enough dims to normalize");
172
+ }
173
+ const dims = [];
174
+ for (let i = 0; i < normalizedDims; i++) {
175
+ if (input.shape[startDim + i] !== this.normalizedShape[i]) {
176
+ throw new Error(`Shape mismatch at dim ${startDim + i}: expected ${this.normalizedShape[i]}, got ${input.shape[startDim + i]}`);
177
+ }
178
+ dims.push(startDim + i);
179
+ }
180
+ const mean = input.mean(dims, true);
181
+ const variance = input.sub(mean).pow(2).mean(dims, true);
182
+ let normalized = input.sub(mean).div(variance.add(this.eps).sqrt());
183
+ if (this.weight) {
184
+ normalized = normalized.mul(this.weight);
185
+ }
186
+ if (this.bias) {
187
+ normalized = normalized.add(this.bias);
188
+ }
189
+ return normalized;
190
+ }
191
+ }
148
192
  const state = {
149
193
  getParameters(model, visited = new WeakSet()) {
150
194
  if (visited.has(model))
@@ -178,7 +222,7 @@ const state = {
178
222
  stateDict[fullKey] = value.val();
179
223
  }
180
224
  else if (typeof value === "object" && value !== null) {
181
- Object.assign(stateDict, this.getStateDict(value, fullKey, visited));
225
+ Object.assign(stateDict, state.getStateDict(value, fullKey, visited));
182
226
  }
183
227
  }
184
228
  return stateDict;
@@ -196,7 +240,7 @@ const state = {
196
240
  value.replace(new core_1.Tensor(stateDict[fullKey], { device: value.device }));
197
241
  }
198
242
  else if (typeof value === "object" && value !== null) {
199
- this.loadStateDict(value, stateDict, fullKey, visited);
243
+ state.loadStateDict(value, stateDict, fullKey, visited);
200
244
  }
201
245
  }
202
246
  }
@@ -205,5 +249,7 @@ exports.nn = {
205
249
  Linear,
206
250
  RNNCell,
207
251
  GRUCell,
252
+ LSTMCell,
253
+ LayerNorm,
208
254
  state
209
255
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "catniff",
3
- "version": "0.5.5",
3
+ "version": "0.5.7",
4
4
  "description": "A small Torch-like deep learning framework for Javascript",
5
5
  "main": "index.js",
6
6
  "scripts": {