openarchx 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. openarchx/__init__.py +11 -0
  2. openarchx/core/tensor.py +179 -0
  3. openarchx/cuda/__init__.py +27 -0
  4. openarchx/cuda/cuda_ops.py +296 -0
  5. openarchx/layers/activations.py +63 -0
  6. openarchx/layers/base.py +40 -0
  7. openarchx/layers/cnn.py +145 -0
  8. openarchx/layers/transformer.py +131 -0
  9. openarchx/nn/__init__.py +26 -0
  10. openarchx/nn/activations.py +127 -0
  11. openarchx/nn/containers.py +174 -0
  12. openarchx/nn/dropout.py +121 -0
  13. openarchx/nn/layers.py +338 -0
  14. openarchx/nn/losses.py +156 -0
  15. openarchx/nn/module.py +18 -0
  16. openarchx/nn/padding.py +120 -0
  17. openarchx/nn/pooling.py +318 -0
  18. openarchx/nn/rnn.py +226 -0
  19. openarchx/nn/transformers.py +187 -0
  20. openarchx/optimizers/adam.py +49 -0
  21. openarchx/optimizers/adaptive.py +63 -0
  22. openarchx/optimizers/base.py +24 -0
  23. openarchx/optimizers/modern.py +98 -0
  24. openarchx/optimizers/optx.py +91 -0
  25. openarchx/optimizers/sgd.py +63 -0
  26. openarchx/quantum/circuit.py +92 -0
  27. openarchx/quantum/gates.py +126 -0
  28. openarchx/utils/__init__.py +50 -0
  29. openarchx/utils/data.py +229 -0
  30. openarchx/utils/huggingface.py +288 -0
  31. openarchx/utils/losses.py +21 -0
  32. openarchx/utils/model_io.py +553 -0
  33. openarchx/utils/pytorch.py +420 -0
  34. openarchx/utils/tensorflow.py +467 -0
  35. openarchx/utils/transforms.py +259 -0
  36. openarchx-0.1.0.dist-info/METADATA +180 -0
  37. openarchx-0.1.0.dist-info/RECORD +43 -0
  38. openarchx-0.1.0.dist-info/WHEEL +5 -0
  39. openarchx-0.1.0.dist-info/licenses/LICENSE +21 -0
  40. openarchx-0.1.0.dist-info/top_level.txt +2 -0
  41. tests/__init__.py +1 -0
  42. tests/test_cuda_ops.py +205 -0
  43. tests/test_integrations.py +236 -0
@@ -0,0 +1,120 @@
1
+ import numpy as np
2
+ from ..core.tensor import Tensor
3
+ from .module import Module
4
+
5
+ class _ConstantPadNd(Module):
6
+ def __init__(self, value):
7
+ super().__init__()
8
+ self.value = value
9
+
10
+ def _pad_array(self, x, pad_width):
11
+ return np.pad(x.data, pad_width, mode='constant', constant_values=self.value)
12
+
13
+ class ConstantPad1d(_ConstantPadNd):
14
+ def __init__(self, padding, value=0):
15
+ super().__init__(value)
16
+ self.padding = padding if isinstance(padding, tuple) else (padding, padding)
17
+
18
+ def forward(self, x):
19
+ pad_width = ((0, 0),) * (len(x.data.shape) - 1) + (self.padding,)
20
+ return Tensor(self._pad_array(x, pad_width), requires_grad=True)
21
+
22
+ class ConstantPad2d(_ConstantPadNd):
23
+ def __init__(self, padding, value=0):
24
+ super().__init__(value)
25
+ if isinstance(padding, int):
26
+ self.padding = ((padding, padding), (padding, padding))
27
+ elif len(padding) == 2:
28
+ self.padding = ((padding[0], padding[0]), (padding[1], padding[1]))
29
+ else:
30
+ self.padding = ((padding[0], padding[1]), (padding[2], padding[3]))
31
+
32
+ def forward(self, x):
33
+ pad_width = ((0, 0), (0, 0)) + self.padding
34
+ return Tensor(self._pad_array(x, pad_width), requires_grad=True)
35
+
36
+ class ConstantPad3d(_ConstantPadNd):
37
+ def __init__(self, padding, value=0):
38
+ super().__init__(value)
39
+ if isinstance(padding, int):
40
+ self.padding = ((padding, padding), (padding, padding), (padding, padding))
41
+ elif len(padding) == 3:
42
+ self.padding = ((padding[0], padding[0]),
43
+ (padding[1], padding[1]),
44
+ (padding[2], padding[2]))
45
+ else:
46
+ self.padding = ((padding[0], padding[1]),
47
+ (padding[2], padding[3]),
48
+ (padding[4], padding[5]))
49
+
50
+ def forward(self, x):
51
+ pad_width = ((0, 0), (0, 0)) + self.padding
52
+ return Tensor(self._pad_array(x, pad_width), requires_grad=True)
53
+
54
+ class ReflectionPad1d(Module):
55
+ def __init__(self, padding):
56
+ super().__init__()
57
+ self.padding = padding if isinstance(padding, tuple) else (padding, padding)
58
+
59
+ def forward(self, x):
60
+ pad_width = ((0, 0),) * (len(x.data.shape) - 1) + (self.padding,)
61
+ return Tensor(np.pad(x.data, pad_width, mode='reflect'), requires_grad=True)
62
+
63
+ class ReflectionPad2d(Module):
64
+ def __init__(self, padding):
65
+ super().__init__()
66
+ if isinstance(padding, int):
67
+ self.padding = ((padding, padding), (padding, padding))
68
+ elif len(padding) == 2:
69
+ self.padding = ((padding[0], padding[0]), (padding[1], padding[1]))
70
+ else:
71
+ self.padding = ((padding[0], padding[1]), (padding[2], padding[3]))
72
+
73
+ def forward(self, x):
74
+ pad_width = ((0, 0), (0, 0)) + self.padding
75
+ return Tensor(np.pad(x.data, pad_width, mode='reflect'), requires_grad=True)
76
+
77
+ class ReplicationPad1d(Module):
78
+ def __init__(self, padding):
79
+ super().__init__()
80
+ self.padding = padding if isinstance(padding, tuple) else (padding, padding)
81
+
82
+ def forward(self, x):
83
+ pad_width = ((0, 0),) * (len(x.data.shape) - 1) + (self.padding,)
84
+ return Tensor(np.pad(x.data, pad_width, mode='edge'), requires_grad=True)
85
+
86
+ class ReplicationPad2d(Module):
87
+ def __init__(self, padding):
88
+ super().__init__()
89
+ if isinstance(padding, int):
90
+ self.padding = ((padding, padding), (padding, padding))
91
+ elif len(padding) == 2:
92
+ self.padding = ((padding[0], padding[0]), (padding[1], padding[1]))
93
+ else:
94
+ self.padding = ((padding[0], padding[1]), (padding[2], padding[3]))
95
+
96
+ def forward(self, x):
97
+ pad_width = ((0, 0), (0, 0)) + self.padding
98
+ return Tensor(np.pad(x.data, pad_width, mode='edge'), requires_grad=True)
99
+
100
+ class ReplicationPad3d(Module):
101
+ def __init__(self, padding):
102
+ super().__init__()
103
+ if isinstance(padding, int):
104
+ self.padding = ((padding, padding), (padding, padding), (padding, padding))
105
+ elif len(padding) == 3:
106
+ self.padding = ((padding[0], padding[0]),
107
+ (padding[1], padding[1]),
108
+ (padding[2], padding[2]))
109
+ else:
110
+ self.padding = ((padding[0], padding[1]),
111
+ (padding[2], padding[3]),
112
+ (padding[4], padding[5]))
113
+
114
+ def forward(self, x):
115
+ pad_width = ((0, 0), (0, 0)) + self.padding
116
+ return Tensor(np.pad(x.data, pad_width, mode='edge'), requires_grad=True)
117
+
118
+ class ZeroPad2d(ConstantPad2d):
119
+ def __init__(self, padding):
120
+ super().__init__(padding, value=0)
@@ -0,0 +1,318 @@
1
+ import numpy as np
2
+ from ..core.tensor import Tensor
3
+ from .module import Module
4
+
5
+ class MaxPool1d(Module):
6
+ def __init__(self, kernel_size, stride=None, padding=0):
7
+ super().__init__()
8
+ self.kernel_size = kernel_size
9
+ self.stride = stride if stride is not None else kernel_size
10
+ self.padding = padding
11
+
12
+ def forward(self, x):
13
+ # TODO: Implement MaxPool1d forward pass
14
+ pass
15
+
16
+ class MaxPool2d(Module):
17
+ def __init__(self, kernel_size, stride=None, padding=0):
18
+ super().__init__()
19
+ self.kernel_size = kernel_size if isinstance(kernel_size, tuple) else (kernel_size, kernel_size)
20
+ self.stride = stride if stride is not None else self.kernel_size
21
+ self.stride = self.stride if isinstance(self.stride, tuple) else (self.stride, self.stride)
22
+ self.padding = padding if isinstance(padding, tuple) else (padding, padding)
23
+
24
+ def forward(self, x):
25
+ batch_size, channels, height, width = x.data.shape
26
+ pad_h, pad_w = self.padding
27
+ stride_h, stride_w = self.stride
28
+ kernel_h, kernel_w = self.kernel_size
29
+
30
+ # Add padding if needed
31
+ if pad_h > 0 or pad_w > 0:
32
+ x_padded = np.pad(x.data, ((0, 0), (0, 0), (pad_h, pad_h), (pad_w, pad_w)), mode='constant')
33
+ else:
34
+ x_padded = x.data
35
+
36
+ # Calculate output dimensions
37
+ out_height = (height + 2 * pad_h - kernel_h) // stride_h + 1
38
+ out_width = (width + 2 * pad_w - kernel_w) // stride_w + 1
39
+
40
+ # Prepare output array
41
+ out = np.zeros((batch_size, channels, out_height, out_width))
42
+
43
+ # Perform max pooling
44
+ for b in range(batch_size):
45
+ for c in range(channels):
46
+ for h in range(out_height):
47
+ for w in range(out_width):
48
+ h_start = h * stride_h
49
+ w_start = w * stride_w
50
+ h_end = h_start + kernel_h
51
+ w_end = w_start + kernel_w
52
+
53
+ pool_region = x_padded[b, c, h_start:h_end, w_start:w_end]
54
+ out[b, c, h, w] = np.max(pool_region)
55
+
56
+ return Tensor(out, requires_grad=True)
57
+
58
+ class AvgPool1d(Module):
59
+ def __init__(self, kernel_size, stride=None, padding=0):
60
+ super().__init__()
61
+ self.kernel_size = kernel_size
62
+ self.stride = stride if stride is not None else kernel_size
63
+ self.padding = padding
64
+
65
+ def forward(self, x):
66
+ # TODO: Implement AvgPool1d forward pass
67
+ pass
68
+
69
+ class AvgPool2d(Module):
70
+ def __init__(self, kernel_size, stride=None, padding=0):
71
+ super().__init__()
72
+ self.kernel_size = kernel_size if isinstance(kernel_size, tuple) else (kernel_size, kernel_size)
73
+ self.stride = stride if stride is not None else self.kernel_size
74
+ self.stride = self.stride if isinstance(self.stride, tuple) else (self.stride, self.stride)
75
+ self.padding = padding if isinstance(padding, tuple) else (padding, padding)
76
+
77
+ def forward(self, x):
78
+ batch_size, channels, height, width = x.data.shape
79
+ pad_h, pad_w = self.padding
80
+ stride_h, stride_w = self.stride
81
+ kernel_h, kernel_w = self.kernel_size
82
+
83
+ # Add padding if needed
84
+ if pad_h > 0 or pad_w > 0:
85
+ x_padded = np.pad(x.data, ((0, 0), (0, 0), (pad_h, pad_h), (pad_w, pad_w)), mode='constant')
86
+ else:
87
+ x_padded = x.data
88
+
89
+ # Calculate output dimensions
90
+ out_height = (height + 2 * pad_h - kernel_h) // stride_h + 1
91
+ out_width = (width + 2 * pad_w - kernel_w) // stride_w + 1
92
+
93
+ # Prepare output array
94
+ out = np.zeros((batch_size, channels, out_height, out_width))
95
+
96
+ # Perform average pooling
97
+ for b in range(batch_size):
98
+ for c in range(channels):
99
+ for h in range(out_height):
100
+ for w in range(out_width):
101
+ h_start = h * stride_h
102
+ w_start = w * stride_w
103
+ h_end = h_start + kernel_h
104
+ w_end = w_start + kernel_w
105
+
106
+ pool_region = x_padded[b, c, h_start:h_end, w_start:w_end]
107
+ out[b, c, h, w] = np.mean(pool_region)
108
+
109
+ return Tensor(out, requires_grad=True)
110
+
111
+ class AdaptiveAvgPool2d(Module):
112
+ def __init__(self, output_size):
113
+ super().__init__()
114
+ self.output_size = output_size if isinstance(output_size, tuple) else (output_size, output_size)
115
+
116
+ def forward(self, x):
117
+ batch_size, channels, height, width = x.data.shape
118
+ out_h, out_w = self.output_size
119
+
120
+ # Calculate the kernel and stride sizes
121
+ stride_h = height // out_h
122
+ stride_w = width // out_w
123
+ kernel_h = height - (out_h - 1) * stride_h
124
+ kernel_w = width - (out_w - 1) * stride_w
125
+
126
+ # Prepare output array
127
+ out = np.zeros((batch_size, channels, out_h, out_w))
128
+
129
+ # Perform adaptive average pooling
130
+ for b in range(batch_size):
131
+ for c in range(channels):
132
+ for h in range(out_h):
133
+ for w in range(out_w):
134
+ h_start = h * stride_h
135
+ w_start = w * stride_w
136
+ h_end = min(h_start + kernel_h, height)
137
+ w_end = min(w_start + kernel_w, width)
138
+
139
+ pool_region = x.data[b, c, h_start:h_end, w_start:w_end]
140
+ out[b, c, h, w] = np.mean(pool_region)
141
+
142
+ return Tensor(out, requires_grad=True)
143
+
144
+ class AdaptiveMaxPool2d(Module):
145
+ def __init__(self, output_size):
146
+ super().__init__()
147
+ self.output_size = output_size if isinstance(output_size, tuple) else (output_size, output_size)
148
+
149
+ def forward(self, x):
150
+ batch_size, channels, height, width = x.data.shape
151
+ out_h, out_w = self.output_size
152
+
153
+ # Calculate the kernel and stride sizes
154
+ stride_h = height // out_h
155
+ stride_w = width // out_w
156
+ kernel_h = height - (out_h - 1) * stride_h
157
+ kernel_w = width - (out_w - 1) * stride_w
158
+
159
+ # Prepare output array
160
+ out = np.zeros((batch_size, channels, out_h, out_w))
161
+
162
+ # Perform adaptive max pooling
163
+ for b in range(batch_size):
164
+ for c in range(channels):
165
+ for h in range(out_h):
166
+ for w in range(out_w):
167
+ h_start = h * stride_h
168
+ w_start = w * stride_w
169
+ h_end = min(h_start + kernel_h, height)
170
+ w_end = min(w_start + kernel_w, width)
171
+
172
+ pool_region = x.data[b, c, h_start:h_end, w_start:w_end]
173
+ out[b, c, h, w] = np.max(pool_region)
174
+
175
+ return Tensor(out, requires_grad=True)
176
+
177
+ class FractionalMaxPool2d(Module):
178
+ def __init__(self, kernel_size, output_size=None, output_ratio=None, return_indices=False):
179
+ super().__init__()
180
+ self.kernel_size = kernel_size if isinstance(kernel_size, tuple) else (kernel_size, kernel_size)
181
+ self.output_size = output_size
182
+ self.output_ratio = output_ratio
183
+ self.return_indices = return_indices
184
+
185
+ def forward(self, x):
186
+ batch_size, channels, height, width = x.data.shape
187
+
188
+ if self.output_size is not None:
189
+ out_h, out_w = self.output_size
190
+ else:
191
+ out_h = int(height * self.output_ratio[0])
192
+ out_w = int(width * self.output_ratio[1])
193
+
194
+ # Generate random pooling regions
195
+ h_indices = np.linspace(0, height - self.kernel_size[0], out_h, dtype=int)
196
+ w_indices = np.linspace(0, width - self.kernel_size[1], out_w, dtype=int)
197
+
198
+ out = np.zeros((batch_size, channels, out_h, out_w))
199
+ indices = np.zeros((batch_size, channels, out_h, out_w, 2), dtype=int) if self.return_indices else None
200
+
201
+ for b in range(batch_size):
202
+ for c in range(channels):
203
+ for i, h_idx in enumerate(h_indices):
204
+ for j, w_idx in enumerate(w_indices):
205
+ region = x.data[b, c,
206
+ h_idx:h_idx + self.kernel_size[0],
207
+ w_idx:w_idx + self.kernel_size[1]]
208
+ out[b, c, i, j] = np.max(region)
209
+ if self.return_indices:
210
+ max_idx = np.unravel_index(np.argmax(region), region.shape)
211
+ indices[b, c, i, j] = [h_idx + max_idx[0], w_idx + max_idx[1]]
212
+
213
+ if self.return_indices:
214
+ return Tensor(out, requires_grad=True), indices
215
+ return Tensor(out, requires_grad=True)
216
+
217
+ class FractionalMaxPool3d(Module):
218
+ def __init__(self, kernel_size, output_size=None, output_ratio=None, return_indices=False):
219
+ super().__init__()
220
+ self.kernel_size = kernel_size if isinstance(kernel_size, tuple) else (kernel_size, kernel_size, kernel_size)
221
+ self.output_size = output_size
222
+ self.output_ratio = output_ratio
223
+ self.return_indices = return_indices
224
+
225
+ def forward(self, x):
226
+ batch_size, channels, depth, height, width = x.data.shape
227
+
228
+ if self.output_size is not None:
229
+ out_d, out_h, out_w = self.output_size
230
+ else:
231
+ out_d = int(depth * self.output_ratio[0])
232
+ out_h = int(height * self.output_ratio[1])
233
+ out_w = int(width * self.output_ratio[2])
234
+
235
+ # Generate random pooling regions
236
+ d_indices = np.linspace(0, depth - self.kernel_size[0], out_d, dtype=int)
237
+ h_indices = np.linspace(0, height - self.kernel_size[1], out_h, dtype=int)
238
+ w_indices = np.linspace(0, width - self.kernel_size[2], out_w, dtype=int)
239
+
240
+ out = np.zeros((batch_size, channels, out_d, out_h, out_w))
241
+ indices = np.zeros((batch_size, channels, out_d, out_h, out_w, 3), dtype=int) if self.return_indices else None
242
+
243
+ for b in range(batch_size):
244
+ for c in range(channels):
245
+ for i, d_idx in enumerate(d_indices):
246
+ for j, h_idx in enumerate(h_indices):
247
+ for k, w_idx in enumerate(w_indices):
248
+ region = x.data[b, c,
249
+ d_idx:d_idx + self.kernel_size[0],
250
+ h_idx:h_idx + self.kernel_size[1],
251
+ w_idx:w_idx + self.kernel_size[2]]
252
+ out[b, c, i, j, k] = np.max(region)
253
+ if self.return_indices:
254
+ max_idx = np.unravel_index(np.argmax(region), region.shape)
255
+ indices[b, c, i, j, k] = [d_idx + max_idx[0],
256
+ h_idx + max_idx[1],
257
+ w_idx + max_idx[2]]
258
+
259
+ if self.return_indices:
260
+ return Tensor(out, requires_grad=True), indices
261
+ return Tensor(out, requires_grad=True)
262
+
263
+ class LPPool1d(Module):
264
+ def __init__(self, norm_type, kernel_size, stride=None):
265
+ super().__init__()
266
+ self.norm_type = norm_type
267
+ self.kernel_size = kernel_size
268
+ self.stride = stride if stride is not None else kernel_size
269
+
270
+ def forward(self, x):
271
+ batch_size, channels, length = x.data.shape
272
+ out_length = (length - self.kernel_size) // self.stride + 1
273
+
274
+ out = np.zeros((batch_size, channels, out_length))
275
+
276
+ for b in range(batch_size):
277
+ for c in range(channels):
278
+ for i in range(out_length):
279
+ start_idx = i * self.stride
280
+ end_idx = start_idx + self.kernel_size
281
+ region = x.data[b, c, start_idx:end_idx]
282
+ out[b, c, i] = np.power(np.sum(np.power(np.abs(region), self.norm_type)),
283
+ 1.0 / self.norm_type)
284
+
285
+ return Tensor(out, requires_grad=True)
286
+
287
+ class LPPool2d(Module):
288
+ def __init__(self, norm_type, kernel_size, stride=None):
289
+ super().__init__()
290
+ self.norm_type = norm_type
291
+ self.kernel_size = kernel_size if isinstance(kernel_size, tuple) else (kernel_size, kernel_size)
292
+ self.stride = stride if stride is not None else self.kernel_size
293
+
294
+ def forward(self, x):
295
+ batch_size, channels, height, width = x.data.shape
296
+ stride_h, stride_w = self.stride if isinstance(self.stride, tuple) else (self.stride, self.stride)
297
+ kernel_h, kernel_w = self.kernel_size
298
+
299
+ out_height = (height - kernel_h) // stride_h + 1
300
+ out_width = (width - kernel_w) // stride_w + 1
301
+
302
+ out = np.zeros((batch_size, channels, out_height, out_width))
303
+
304
+ for b in range(batch_size):
305
+ for c in range(channels):
306
+ for i in range(out_height):
307
+ for j in range(out_width):
308
+ start_h = i * stride_h
309
+ start_w = j * stride_w
310
+ region = x.data[b, c,
311
+ start_h:start_h + kernel_h,
312
+ start_w:start_w + kernel_w]
313
+ out[b, c, i, j] = np.power(
314
+ np.sum(np.power(np.abs(region), self.norm_type)),
315
+ 1.0 / self.norm_type
316
+ )
317
+
318
+ return Tensor(out, requires_grad=True)
openarchx/nn/rnn.py ADDED
@@ -0,0 +1,226 @@
1
+ import numpy as np
2
+ from ..core.tensor import Tensor
3
+ from .module import Module
4
+ from .layers import Linear
5
+
6
+ class RNNCell(Module):
7
+ def __init__(self, input_size, hidden_size, bias=True, nonlinearity="tanh"):
8
+ super().__init__()
9
+ self.input_size = input_size
10
+ self.hidden_size = hidden_size
11
+ self.nonlinearity = nonlinearity
12
+
13
+ self.ih = Linear(input_size, hidden_size, bias=bias)
14
+ self.hh = Linear(hidden_size, hidden_size, bias=bias)
15
+
16
+ def forward(self, x, h=None):
17
+ if h is None:
18
+ h = Tensor(np.zeros((x.data.shape[0], self.hidden_size)), requires_grad=True)
19
+
20
+ hidden = self.ih(x) + self.hh(h)
21
+ if self.nonlinearity == "tanh":
22
+ hidden = Tensor(np.tanh(hidden.data), requires_grad=True)
23
+ else: # relu
24
+ hidden = Tensor(np.maximum(0, hidden.data), requires_grad=True)
25
+ return hidden
26
+
27
+ class LSTMCell(Module):
28
+ def __init__(self, input_size, hidden_size, bias=True):
29
+ super().__init__()
30
+ self.input_size = input_size
31
+ self.hidden_size = hidden_size
32
+
33
+ self.ih = Linear(input_size, 4 * hidden_size, bias=bias)
34
+ self.hh = Linear(hidden_size, 4 * hidden_size, bias=bias)
35
+
36
+ def forward(self, x, state=None):
37
+ if state is None:
38
+ h = Tensor(np.zeros((x.data.shape[0], self.hidden_size)), requires_grad=True)
39
+ c = Tensor(np.zeros((x.data.shape[0], self.hidden_size)), requires_grad=True)
40
+ else:
41
+ h, c = state
42
+
43
+ gates = self.ih(x) + self.hh(h)
44
+
45
+ # Split gates
46
+ i, f, g, o = np.split(gates.data, 4, axis=1)
47
+
48
+ # Apply activations
49
+ i = 1 / (1 + np.exp(-i)) # input gate
50
+ f = 1 / (1 + np.exp(-f)) # forget gate
51
+ g = np.tanh(g) # cell gate
52
+ o = 1 / (1 + np.exp(-o)) # output gate
53
+
54
+ # Update cell state
55
+ c = Tensor(f * c.data + i * g, requires_grad=True)
56
+ # Compute output
57
+ h = Tensor(o * np.tanh(c.data), requires_grad=True)
58
+
59
+ return h, c
60
+
61
+ class GRUCell(Module):
62
+ def __init__(self, input_size, hidden_size, bias=True):
63
+ super().__init__()
64
+ self.input_size = input_size
65
+ self.hidden_size = hidden_size
66
+
67
+ self.ih = Linear(input_size, 3 * hidden_size, bias=bias)
68
+ self.hh = Linear(hidden_size, 3 * hidden_size, bias=bias)
69
+
70
+ def forward(self, x, h=None):
71
+ if h is None:
72
+ h = Tensor(np.zeros((x.data.shape[0], self.hidden_size)), requires_grad=True)
73
+
74
+ gi = self.ih(x)
75
+ gh = self.hh(h)
76
+
77
+ # Split gates
78
+ i_r, i_z, i_n = np.split(gi.data, 3, axis=1)
79
+ h_r, h_z, h_n = np.split(gh.data, 3, axis=1)
80
+
81
+ r = 1 / (1 + np.exp(-(i_r + h_r))) # reset gate
82
+ z = 1 / (1 + np.exp(-(i_z + h_z))) # update gate
83
+ n = np.tanh(i_n + r * h_n) # new gate
84
+
85
+ h = Tensor((1 - z) * n + z * h.data, requires_grad=True)
86
+ return h
87
+
88
+ class RNN(Module):
89
+ def __init__(self, input_size, hidden_size, num_layers=1, bias=True,
90
+ nonlinearity="tanh", bidirectional=False):
91
+ super().__init__()
92
+ self.input_size = input_size
93
+ self.hidden_size = hidden_size
94
+ self.num_layers = num_layers
95
+ self.bidirectional = bidirectional
96
+
97
+ self.cells = []
98
+ for layer in range(num_layers):
99
+ layer_input_size = input_size if layer == 0 else hidden_size * (2 if bidirectional else 1)
100
+ self.cells.append(RNNCell(layer_input_size, hidden_size, bias, nonlinearity))
101
+ if bidirectional:
102
+ self.cells.append(RNNCell(layer_input_size, hidden_size, bias, nonlinearity))
103
+
104
+ def forward(self, x, h=None):
105
+ # Assuming x is of shape (batch, seq_len, input_size)
106
+ seq_len = x.data.shape[1]
107
+ batch_size = x.data.shape[0]
108
+ num_directions = 2 if self.bidirectional else 1
109
+
110
+ if h is None:
111
+ h = [Tensor(np.zeros((batch_size, self.hidden_size)), requires_grad=True)
112
+ for _ in range(self.num_layers * num_directions)]
113
+
114
+ output = []
115
+ for t in range(seq_len):
116
+ x_t = Tensor(x.data[:, t, :], requires_grad=True)
117
+
118
+ for layer in range(self.num_layers):
119
+ idx = layer * num_directions
120
+ h[idx] = self.cells[idx](x_t, h[idx])
121
+ if self.bidirectional:
122
+ h[idx + 1] = self.cells[idx + 1](x_t, h[idx + 1])
123
+
124
+ # Prepare input for next layer
125
+ if self.bidirectional:
126
+ x_t = Tensor(np.concatenate([h[idx].data, h[idx + 1].data], axis=1), requires_grad=True)
127
+ else:
128
+ x_t = h[idx]
129
+
130
+ output.append(x_t.data)
131
+
132
+ # Stack outputs along sequence dimension
133
+ output = Tensor(np.stack(output, axis=1), requires_grad=True)
134
+ return output, h
135
+
136
+ class LSTM(Module):
137
+ def __init__(self, input_size, hidden_size, num_layers=1, bias=True, bidirectional=False):
138
+ super().__init__()
139
+ self.input_size = input_size
140
+ self.hidden_size = hidden_size
141
+ self.num_layers = num_layers
142
+ self.bidirectional = bidirectional
143
+
144
+ self.cells = []
145
+ for layer in range(num_layers):
146
+ layer_input_size = input_size if layer == 0 else hidden_size * (2 if bidirectional else 1)
147
+ self.cells.append(LSTMCell(layer_input_size, hidden_size, bias))
148
+ if bidirectional:
149
+ self.cells.append(LSTMCell(layer_input_size, hidden_size, bias))
150
+
151
+ def forward(self, x, state=None):
152
+ seq_len = x.data.shape[1]
153
+ batch_size = x.data.shape[0]
154
+ num_directions = 2 if self.bidirectional else 1
155
+
156
+ if state is None:
157
+ h = [Tensor(np.zeros((batch_size, self.hidden_size)), requires_grad=True)
158
+ for _ in range(self.num_layers * num_directions)]
159
+ c = [Tensor(np.zeros((batch_size, self.hidden_size)), requires_grad=True)
160
+ for _ in range(self.num_layers * num_directions)]
161
+ else:
162
+ h, c = state
163
+
164
+ output = []
165
+ for t in range(seq_len):
166
+ x_t = Tensor(x.data[:, t, :], requires_grad=True)
167
+
168
+ for layer in range(self.num_layers):
169
+ idx = layer * num_directions
170
+ h[idx], c[idx] = self.cells[idx](x_t, (h[idx], c[idx]))
171
+ if self.bidirectional:
172
+ h[idx + 1], c[idx + 1] = self.cells[idx + 1](x_t, (h[idx + 1], c[idx + 1]))
173
+
174
+ if self.bidirectional:
175
+ x_t = Tensor(np.concatenate([h[idx].data, h[idx + 1].data], axis=1), requires_grad=True)
176
+ else:
177
+ x_t = h[idx]
178
+
179
+ output.append(x_t.data)
180
+
181
+ output = Tensor(np.stack(output, axis=1), requires_grad=True)
182
+ return output, (h, c)
183
+
184
+ class GRU(Module):
185
+ def __init__(self, input_size, hidden_size, num_layers=1, bias=True, bidirectional=False):
186
+ super().__init__()
187
+ self.input_size = input_size
188
+ self.hidden_size = hidden_size
189
+ self.num_layers = num_layers
190
+ self.bidirectional = bidirectional
191
+
192
+ self.cells = []
193
+ for layer in range(num_layers):
194
+ layer_input_size = input_size if layer == 0 else hidden_size * (2 if bidirectional else 1)
195
+ self.cells.append(GRUCell(layer_input_size, hidden_size, bias))
196
+ if bidirectional:
197
+ self.cells.append(GRUCell(layer_input_size, hidden_size, bias))
198
+
199
+ def forward(self, x, h=None):
200
+ seq_len = x.data.shape[1]
201
+ batch_size = x.data.shape[0]
202
+ num_directions = 2 if self.bidirectional else 1
203
+
204
+ if h is None:
205
+ h = [Tensor(np.zeros((batch_size, self.hidden_size)), requires_grad=True)
206
+ for _ in range(self.num_layers * num_directions)]
207
+
208
+ output = []
209
+ for t in range(seq_len):
210
+ x_t = Tensor(x.data[:, t, :], requires_grad=True)
211
+
212
+ for layer in range(self.num_layers):
213
+ idx = layer * num_directions
214
+ h[idx] = self.cells[idx](x_t, h[idx])
215
+ if self.bidirectional:
216
+ h[idx + 1] = self.cells[idx + 1](x_t, h[idx + 1])
217
+
218
+ if self.bidirectional:
219
+ x_t = Tensor(np.concatenate([h[idx].data, h[idx + 1].data], axis=1), requires_grad=True)
220
+ else:
221
+ x_t = h[idx]
222
+
223
+ output.append(x_t.data)
224
+
225
+ output = Tensor(np.stack(output, axis=1), requires_grad=True)
226
+ return output, h