openarchx 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openarchx/__init__.py +11 -0
- openarchx/core/tensor.py +179 -0
- openarchx/cuda/__init__.py +27 -0
- openarchx/cuda/cuda_ops.py +296 -0
- openarchx/layers/activations.py +63 -0
- openarchx/layers/base.py +40 -0
- openarchx/layers/cnn.py +145 -0
- openarchx/layers/transformer.py +131 -0
- openarchx/nn/__init__.py +26 -0
- openarchx/nn/activations.py +127 -0
- openarchx/nn/containers.py +174 -0
- openarchx/nn/dropout.py +121 -0
- openarchx/nn/layers.py +338 -0
- openarchx/nn/losses.py +156 -0
- openarchx/nn/module.py +18 -0
- openarchx/nn/padding.py +120 -0
- openarchx/nn/pooling.py +318 -0
- openarchx/nn/rnn.py +226 -0
- openarchx/nn/transformers.py +187 -0
- openarchx/optimizers/adam.py +49 -0
- openarchx/optimizers/adaptive.py +63 -0
- openarchx/optimizers/base.py +24 -0
- openarchx/optimizers/modern.py +98 -0
- openarchx/optimizers/optx.py +91 -0
- openarchx/optimizers/sgd.py +63 -0
- openarchx/quantum/circuit.py +92 -0
- openarchx/quantum/gates.py +126 -0
- openarchx/utils/__init__.py +50 -0
- openarchx/utils/data.py +229 -0
- openarchx/utils/huggingface.py +288 -0
- openarchx/utils/losses.py +21 -0
- openarchx/utils/model_io.py +553 -0
- openarchx/utils/pytorch.py +420 -0
- openarchx/utils/tensorflow.py +467 -0
- openarchx/utils/transforms.py +259 -0
- openarchx-0.1.0.dist-info/METADATA +180 -0
- openarchx-0.1.0.dist-info/RECORD +43 -0
- openarchx-0.1.0.dist-info/WHEEL +5 -0
- openarchx-0.1.0.dist-info/licenses/LICENSE +21 -0
- openarchx-0.1.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +1 -0
- tests/test_cuda_ops.py +205 -0
- tests/test_integrations.py +236 -0
openarchx/nn/dropout.py
ADDED
@@ -0,0 +1,121 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from ..core.tensor import Tensor
|
3
|
+
from .module import Module
|
4
|
+
|
5
|
+
class Dropout(Module):
|
6
|
+
def __init__(self, p=0.5, inplace=False):
|
7
|
+
super().__init__()
|
8
|
+
self.p = p
|
9
|
+
self.inplace = inplace
|
10
|
+
self.training = True
|
11
|
+
|
12
|
+
def forward(self, x):
|
13
|
+
if not self.training or self.p == 0:
|
14
|
+
return x
|
15
|
+
|
16
|
+
mask = np.random.random(x.data.shape) > self.p
|
17
|
+
if self.inplace:
|
18
|
+
x.data = x.data * mask / (1 - self.p)
|
19
|
+
return x
|
20
|
+
return Tensor(x.data * mask / (1 - self.p), requires_grad=True)
|
21
|
+
|
22
|
+
class Dropout2d(Module):
|
23
|
+
def __init__(self, p=0.5, inplace=False):
|
24
|
+
super().__init__()
|
25
|
+
self.p = p
|
26
|
+
self.inplace = inplace
|
27
|
+
self.training = True
|
28
|
+
|
29
|
+
def forward(self, x):
|
30
|
+
if not self.training or self.p == 0:
|
31
|
+
return x
|
32
|
+
|
33
|
+
# Create mask for entire channels
|
34
|
+
mask = np.random.random((x.data.shape[0], x.data.shape[1], 1, 1)) > self.p
|
35
|
+
mask = np.broadcast_to(mask, x.data.shape)
|
36
|
+
|
37
|
+
if self.inplace:
|
38
|
+
x.data = x.data * mask / (1 - self.p)
|
39
|
+
return x
|
40
|
+
return Tensor(x.data * mask / (1 - self.p), requires_grad=True)
|
41
|
+
|
42
|
+
class Dropout3d(Module):
|
43
|
+
def __init__(self, p=0.5, inplace=False):
|
44
|
+
super().__init__()
|
45
|
+
self.p = p
|
46
|
+
self.inplace = inplace
|
47
|
+
self.training = True
|
48
|
+
|
49
|
+
def forward(self, x):
|
50
|
+
if not self.training or self.p == 0:
|
51
|
+
return x
|
52
|
+
|
53
|
+
# Create mask for entire 3D feature maps
|
54
|
+
mask = np.random.random((x.data.shape[0], x.data.shape[1], 1, 1, 1)) > self.p
|
55
|
+
mask = np.broadcast_to(mask, x.data.shape)
|
56
|
+
|
57
|
+
if self.inplace:
|
58
|
+
x.data = x.data * mask / (1 - self.p)
|
59
|
+
return x
|
60
|
+
return Tensor(x.data * mask / (1 - self.p), requires_grad=True)
|
61
|
+
|
62
|
+
class AlphaDropout(Module):
|
63
|
+
def __init__(self, p=0.5, inplace=False):
|
64
|
+
super().__init__()
|
65
|
+
self.p = p
|
66
|
+
self.inplace = inplace
|
67
|
+
self.training = True
|
68
|
+
# SELU parameters
|
69
|
+
self.alpha = 1.6732632423543772848170429916717
|
70
|
+
self.scale = 1.0507009873554804934193349852946
|
71
|
+
|
72
|
+
self.alpha_p = -self.alpha * self.scale
|
73
|
+
|
74
|
+
def forward(self, x):
|
75
|
+
if not self.training or self.p == 0:
|
76
|
+
return x
|
77
|
+
|
78
|
+
# Keep mean and variance the same during training and evaluation
|
79
|
+
mask = np.random.random(x.data.shape) > self.p
|
80
|
+
|
81
|
+
# Calculate the affine transformation parameters
|
82
|
+
a = ((1 - self.p) + self.p * self.alpha_p ** 2) ** (-0.5)
|
83
|
+
b = -a * self.p * self.alpha_p
|
84
|
+
|
85
|
+
if self.inplace:
|
86
|
+
x.data = mask * x.data + (1 - mask) * self.alpha_p
|
87
|
+
x.data = a * x.data + b
|
88
|
+
return x
|
89
|
+
return Tensor(a * (mask * x.data + (1 - mask) * self.alpha_p) + b, requires_grad=True)
|
90
|
+
|
91
|
+
class FeatureAlphaDropout(Module):
|
92
|
+
def __init__(self, p=0.5, inplace=False):
|
93
|
+
super().__init__()
|
94
|
+
self.p = p
|
95
|
+
self.inplace = inplace
|
96
|
+
self.training = True
|
97
|
+
# SELU parameters
|
98
|
+
self.alpha = 1.6732632423543772848170429916717
|
99
|
+
self.scale = 1.0507009873554804934193349852946
|
100
|
+
|
101
|
+
self.alpha_p = -self.alpha * self.scale
|
102
|
+
|
103
|
+
def forward(self, x):
|
104
|
+
if not self.training or self.p == 0:
|
105
|
+
return x
|
106
|
+
|
107
|
+
# Create mask for entire features
|
108
|
+
shape = list(x.data.shape)
|
109
|
+
shape[1:] = [1] * (len(shape) - 1)
|
110
|
+
mask = np.random.random(shape) > self.p
|
111
|
+
mask = np.broadcast_to(mask, x.data.shape)
|
112
|
+
|
113
|
+
# Calculate the affine transformation parameters
|
114
|
+
a = ((1 - self.p) + self.p * self.alpha_p ** 2) ** (-0.5)
|
115
|
+
b = -a * self.p * self.alpha_p
|
116
|
+
|
117
|
+
if self.inplace:
|
118
|
+
x.data = mask * x.data + (1 - mask) * self.alpha_p
|
119
|
+
x.data = a * x.data + b
|
120
|
+
return x
|
121
|
+
return Tensor(a * (mask * x.data + (1 - mask) * self.alpha_p) + b, requires_grad=True)
|
openarchx/nn/layers.py
ADDED
@@ -0,0 +1,338 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from ..core.tensor import Tensor
|
3
|
+
from .module import Module
|
4
|
+
|
5
|
+
class Linear(Module):
|
6
|
+
def __init__(self, in_features, out_features, bias=True):
|
7
|
+
super().__init__()
|
8
|
+
self.in_features = in_features
|
9
|
+
self.out_features = out_features
|
10
|
+
scale = np.sqrt(2.0 / in_features)
|
11
|
+
self.weight = Tensor(np.random.normal(0, scale, (in_features, out_features)), requires_grad=True)
|
12
|
+
self.bias = Tensor(np.zeros(out_features), requires_grad=True) if bias else None
|
13
|
+
|
14
|
+
def forward(self, x):
|
15
|
+
if not isinstance(x, Tensor):
|
16
|
+
x = Tensor(x, requires_grad=True)
|
17
|
+
out = x @ self.weight
|
18
|
+
if self.bias is not None:
|
19
|
+
out = out + self.bias
|
20
|
+
return out
|
21
|
+
|
22
|
+
class Conv1d(Module):
|
23
|
+
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, bias=True):
|
24
|
+
super().__init__()
|
25
|
+
self.in_channels = in_channels
|
26
|
+
self.out_channels = out_channels
|
27
|
+
self.kernel_size = kernel_size if isinstance(kernel_size, tuple) else (kernel_size,)
|
28
|
+
self.stride = stride if isinstance(stride, tuple) else (stride,)
|
29
|
+
self.padding = padding if isinstance(padding, tuple) else (padding,)
|
30
|
+
|
31
|
+
scale = np.sqrt(2.0 / (in_channels * kernel_size))
|
32
|
+
self.weight = Tensor(
|
33
|
+
np.random.normal(0, scale, (out_channels, in_channels, kernel_size)),
|
34
|
+
requires_grad=True
|
35
|
+
)
|
36
|
+
self.bias = Tensor(np.zeros(out_channels), requires_grad=True) if bias else None
|
37
|
+
|
38
|
+
def forward(self, x):
|
39
|
+
# Implementation for 1D convolution
|
40
|
+
pass # TODO: Implement actual convolution
|
41
|
+
|
42
|
+
class Conv2d(Module):
|
43
|
+
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, bias=True):
|
44
|
+
super().__init__()
|
45
|
+
self.in_channels = in_channels
|
46
|
+
self.out_channels = out_channels
|
47
|
+
self.kernel_size = kernel_size if isinstance(kernel_size, tuple) else (kernel_size, kernel_size)
|
48
|
+
self.stride = stride if isinstance(stride, tuple) else (stride, stride)
|
49
|
+
self.padding = padding if isinstance(padding, tuple) else (padding, padding)
|
50
|
+
|
51
|
+
scale = np.sqrt(2.0 / (in_channels * kernel_size * kernel_size))
|
52
|
+
self.weight = Tensor(
|
53
|
+
np.random.normal(0, scale, (out_channels, in_channels, *self.kernel_size)),
|
54
|
+
requires_grad=True
|
55
|
+
)
|
56
|
+
self.bias = Tensor(np.zeros(out_channels), requires_grad=True) if bias else None
|
57
|
+
|
58
|
+
def forward(self, x):
|
59
|
+
# Implementation for 2D convolution
|
60
|
+
pass # TODO: Implement actual convolution
|
61
|
+
|
62
|
+
class BatchNorm1d(Module):
|
63
|
+
def __init__(self, num_features, eps=1e-5, momentum=0.1):
|
64
|
+
super().__init__()
|
65
|
+
self.num_features = num_features
|
66
|
+
self.eps = eps
|
67
|
+
self.momentum = momentum
|
68
|
+
|
69
|
+
self.gamma = Tensor(np.ones(num_features), requires_grad=True)
|
70
|
+
self.beta = Tensor(np.zeros(num_features), requires_grad=True)
|
71
|
+
self.running_mean = Tensor(np.zeros(num_features), requires_grad=False)
|
72
|
+
self.running_var = Tensor(np.ones(num_features), requires_grad=False)
|
73
|
+
|
74
|
+
def forward(self, x):
|
75
|
+
if self.training:
|
76
|
+
mean = x.mean(axis=0)
|
77
|
+
var = x.var(axis=0)
|
78
|
+
|
79
|
+
self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * mean
|
80
|
+
self.running_var = (1 - self.momentum) * self.running_var + self.momentum * var
|
81
|
+
else:
|
82
|
+
mean = self.running_mean
|
83
|
+
var = self.running_var
|
84
|
+
|
85
|
+
x_norm = (x - mean) / np.sqrt(var + self.eps)
|
86
|
+
return self.gamma * x_norm + self.beta
|
87
|
+
|
88
|
+
class LayerNorm(Module):
|
89
|
+
def __init__(self, normalized_shape, eps=1e-5):
|
90
|
+
super().__init__()
|
91
|
+
self.normalized_shape = normalized_shape
|
92
|
+
self.eps = eps
|
93
|
+
self.gamma = Tensor(np.ones(normalized_shape), requires_grad=True)
|
94
|
+
self.beta = Tensor(np.zeros(normalized_shape), requires_grad=True)
|
95
|
+
|
96
|
+
def forward(self, x):
|
97
|
+
mean = x.mean(axis=-1, keepdims=True)
|
98
|
+
var = x.var(axis=-1, keepdims=True)
|
99
|
+
return self.gamma * (x - mean) / np.sqrt(var + self.eps) + self.beta
|
100
|
+
|
101
|
+
class Embedding(Module):
|
102
|
+
def __init__(self, num_embeddings, embedding_dim):
|
103
|
+
super().__init__()
|
104
|
+
self.num_embeddings = num_embeddings
|
105
|
+
self.embedding_dim = embedding_dim
|
106
|
+
self.weight = Tensor(
|
107
|
+
np.random.normal(0, 0.02, (num_embeddings, embedding_dim)),
|
108
|
+
requires_grad=True
|
109
|
+
)
|
110
|
+
|
111
|
+
def forward(self, x):
|
112
|
+
return self.weight[x]
|
113
|
+
|
114
|
+
class ConvTranspose1d(Module):
|
115
|
+
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, bias=True):
|
116
|
+
super().__init__()
|
117
|
+
self.in_channels = in_channels
|
118
|
+
self.out_channels = out_channels
|
119
|
+
self.kernel_size = kernel_size if isinstance(kernel_size, tuple) else (kernel_size,)
|
120
|
+
self.stride = stride if isinstance(stride, tuple) else (stride,)
|
121
|
+
self.padding = padding if isinstance(padding, tuple) else (padding,)
|
122
|
+
self.output_padding = output_padding if isinstance(output_padding, tuple) else (output_padding,)
|
123
|
+
|
124
|
+
scale = np.sqrt(2.0 / (in_channels * kernel_size))
|
125
|
+
self.weight = Tensor(
|
126
|
+
np.random.normal(0, scale, (in_channels, out_channels, *self.kernel_size)),
|
127
|
+
requires_grad=True
|
128
|
+
)
|
129
|
+
if bias:
|
130
|
+
self.bias = Tensor(np.zeros(out_channels), requires_grad=True)
|
131
|
+
else:
|
132
|
+
self.bias = None
|
133
|
+
|
134
|
+
def forward(self, x):
|
135
|
+
# TODO: Implement actual transposed convolution
|
136
|
+
pass
|
137
|
+
|
138
|
+
class ConvTranspose2d(Module):
|
139
|
+
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, bias=True):
|
140
|
+
super().__init__()
|
141
|
+
self.in_channels = in_channels
|
142
|
+
self.out_channels = out_channels
|
143
|
+
self.kernel_size = kernel_size if isinstance(kernel_size, tuple) else (kernel_size, kernel_size)
|
144
|
+
self.stride = stride if isinstance(stride, tuple) else (stride, stride)
|
145
|
+
self.padding = padding if isinstance(padding, tuple) else (padding, padding)
|
146
|
+
self.output_padding = output_padding if isinstance(output_padding, tuple) else (output_padding, output_padding)
|
147
|
+
|
148
|
+
scale = np.sqrt(2.0 / (in_channels * kernel_size * kernel_size))
|
149
|
+
self.weight = Tensor(
|
150
|
+
np.random.normal(0, scale, (in_channels, out_channels, *self.kernel_size)),
|
151
|
+
requires_grad=True
|
152
|
+
)
|
153
|
+
if bias:
|
154
|
+
self.bias = Tensor(np.zeros(out_channels), requires_grad=True)
|
155
|
+
else:
|
156
|
+
self.bias = None
|
157
|
+
|
158
|
+
def forward(self, x):
|
159
|
+
# TODO: Implement actual transposed convolution
|
160
|
+
pass
|
161
|
+
|
162
|
+
class ConvTranspose3d(Module):
|
163
|
+
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, bias=True):
|
164
|
+
super().__init__()
|
165
|
+
self.in_channels = in_channels
|
166
|
+
self.out_channels = out_channels
|
167
|
+
self.kernel_size = kernel_size if isinstance(kernel_size, tuple) else (kernel_size, kernel_size, kernel_size)
|
168
|
+
self.stride = stride if isinstance(stride, tuple) else (stride, stride, stride)
|
169
|
+
self.padding = padding if isinstance(padding, tuple) else (padding, padding, padding)
|
170
|
+
self.output_padding = output_padding if isinstance(output_padding, tuple) else (output_padding, output_padding, output_padding)
|
171
|
+
|
172
|
+
scale = np.sqrt(2.0 / (in_channels * kernel_size * kernel_size * kernel_size))
|
173
|
+
self.weight = Tensor(
|
174
|
+
np.random.normal(0, scale, (in_channels, out_channels, *self.kernel_size)),
|
175
|
+
requires_grad=True
|
176
|
+
)
|
177
|
+
if bias:
|
178
|
+
self.bias = Tensor(np.zeros(out_channels), requires_grad=True)
|
179
|
+
else:
|
180
|
+
self.bias = None
|
181
|
+
|
182
|
+
def forward(self, x):
|
183
|
+
# TODO: Implement actual transposed convolution
|
184
|
+
pass
|
185
|
+
|
186
|
+
class InstanceNorm1d(Module):
|
187
|
+
def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True):
|
188
|
+
super().__init__()
|
189
|
+
self.num_features = num_features
|
190
|
+
self.eps = eps
|
191
|
+
self.momentum = momentum
|
192
|
+
self.affine = affine
|
193
|
+
|
194
|
+
if affine:
|
195
|
+
self.weight = Tensor(np.ones(num_features), requires_grad=True)
|
196
|
+
self.bias = Tensor(np.zeros(num_features), requires_grad=True)
|
197
|
+
|
198
|
+
def forward(self, x):
|
199
|
+
mean = np.mean(x.data, axis=(2,), keepdims=True)
|
200
|
+
var = np.var(x.data, axis=(2,), keepdims=True)
|
201
|
+
|
202
|
+
x_norm = (x.data - mean) / np.sqrt(var + self.eps)
|
203
|
+
|
204
|
+
if self.affine:
|
205
|
+
x_norm = self.weight.data.reshape(-1, 1) * x_norm + self.bias.data.reshape(-1, 1)
|
206
|
+
|
207
|
+
return Tensor(x_norm, requires_grad=True)
|
208
|
+
|
209
|
+
class InstanceNorm2d(Module):
|
210
|
+
def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True):
|
211
|
+
super().__init__()
|
212
|
+
self.num_features = num_features
|
213
|
+
self.eps = eps
|
214
|
+
self.momentum = momentum
|
215
|
+
self.affine = affine
|
216
|
+
|
217
|
+
if affine:
|
218
|
+
self.weight = Tensor(np.ones(num_features), requires_grad=True)
|
219
|
+
self.bias = Tensor(np.zeros(num_features), requires_grad=True)
|
220
|
+
|
221
|
+
def forward(self, x):
|
222
|
+
mean = np.mean(x.data, axis=(2, 3), keepdims=True)
|
223
|
+
var = np.var(x.data, axis=(2, 3), keepdims=True)
|
224
|
+
|
225
|
+
x_norm = (x.data - mean) / np.sqrt(var + self.eps)
|
226
|
+
|
227
|
+
if self.affine:
|
228
|
+
x_norm = self.weight.data.reshape(-1, 1, 1) * x_norm + self.bias.data.reshape(-1, 1, 1)
|
229
|
+
|
230
|
+
return Tensor(x_norm, requires_grad=True)
|
231
|
+
|
232
|
+
class InstanceNorm3d(Module):
|
233
|
+
def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True):
|
234
|
+
super().__init__()
|
235
|
+
self.num_features = num_features
|
236
|
+
self.eps = eps
|
237
|
+
self.momentum = momentum
|
238
|
+
self.affine = affine
|
239
|
+
|
240
|
+
if affine:
|
241
|
+
self.weight = Tensor(np.ones(num_features), requires_grad=True)
|
242
|
+
self.bias = Tensor(np.zeros(num_features), requires_grad=True)
|
243
|
+
|
244
|
+
def forward(self, x):
|
245
|
+
mean = np.mean(x.data, axis=(2, 3, 4), keepdims=True)
|
246
|
+
var = np.var(x.data, axis=(2, 3, 4), keepdims=True)
|
247
|
+
|
248
|
+
x_norm = (x.data - mean) / np.sqrt(var + self.eps)
|
249
|
+
|
250
|
+
if self.affine:
|
251
|
+
x_norm = self.weight.data.reshape(-1, 1, 1, 1) * x_norm + self.bias.data.reshape(-1, 1, 1, 1)
|
252
|
+
|
253
|
+
return Tensor(x_norm, requires_grad=True)
|
254
|
+
|
255
|
+
class GroupNorm(Module):
|
256
|
+
def __init__(self, num_groups, num_channels, eps=1e-5, affine=True):
|
257
|
+
super().__init__()
|
258
|
+
self.num_groups = num_groups
|
259
|
+
self.num_channels = num_channels
|
260
|
+
self.eps = eps
|
261
|
+
self.affine = affine
|
262
|
+
|
263
|
+
if affine:
|
264
|
+
self.weight = Tensor(np.ones(num_channels), requires_grad=True)
|
265
|
+
self.bias = Tensor(np.zeros(num_channels), requires_grad=True)
|
266
|
+
|
267
|
+
def forward(self, x):
|
268
|
+
N, C, *spatial = x.data.shape
|
269
|
+
x_reshaped = x.data.reshape(N, self.num_groups, -1)
|
270
|
+
|
271
|
+
mean = np.mean(x_reshaped, axis=2, keepdims=True)
|
272
|
+
var = np.var(x_reshaped, axis=2, keepdims=True)
|
273
|
+
|
274
|
+
x_norm = (x_reshaped - mean) / np.sqrt(var + self.eps)
|
275
|
+
x_norm = x_norm.reshape(N, C, *spatial)
|
276
|
+
|
277
|
+
if self.affine:
|
278
|
+
shape = (1, -1) + (1,) * len(spatial)
|
279
|
+
x_norm = self.weight.data.reshape(shape) * x_norm + self.bias.data.reshape(shape)
|
280
|
+
|
281
|
+
return Tensor(x_norm, requires_grad=True)
|
282
|
+
|
283
|
+
class LocalResponseNorm(Module):
|
284
|
+
def __init__(self, size, alpha=1e-4, beta=0.75, k=1.0):
|
285
|
+
super().__init__()
|
286
|
+
self.size = size
|
287
|
+
self.alpha = alpha
|
288
|
+
self.beta = beta
|
289
|
+
self.k = k
|
290
|
+
|
291
|
+
def forward(self, x):
|
292
|
+
N, C, *spatial = x.data.shape
|
293
|
+
half_size = self.size // 2
|
294
|
+
|
295
|
+
square = x.data ** 2
|
296
|
+
for i in range(C):
|
297
|
+
start = max(0, i - half_size)
|
298
|
+
end = min(C, i + half_size + 1)
|
299
|
+
scale = self.k + self.alpha * np.sum(square[:, start:end], axis=1, keepdims=True)
|
300
|
+
x.data[:, i] /= scale ** self.beta
|
301
|
+
|
302
|
+
return Tensor(x.data, requires_grad=True)
|
303
|
+
|
304
|
+
class EmbeddingBag(Module):
|
305
|
+
def __init__(self, num_embeddings, embedding_dim, mode='mean'):
|
306
|
+
super().__init__()
|
307
|
+
self.num_embeddings = num_embeddings
|
308
|
+
self.embedding_dim = embedding_dim
|
309
|
+
self.mode = mode
|
310
|
+
self.weight = Tensor(
|
311
|
+
np.random.normal(0, 0.02, (num_embeddings, embedding_dim)),
|
312
|
+
requires_grad=True
|
313
|
+
)
|
314
|
+
|
315
|
+
def forward(self, x, offsets=None):
|
316
|
+
embeddings = self.weight.data[x.data]
|
317
|
+
if offsets is None:
|
318
|
+
# Treat entire input as a single bag
|
319
|
+
if self.mode == 'mean':
|
320
|
+
return Tensor(np.mean(embeddings, axis=1), requires_grad=True)
|
321
|
+
elif self.mode == 'sum':
|
322
|
+
return Tensor(np.sum(embeddings, axis=1), requires_grad=True)
|
323
|
+
else: # max
|
324
|
+
return Tensor(np.max(embeddings, axis=1), requires_grad=True)
|
325
|
+
|
326
|
+
# Handle multiple bags using offsets
|
327
|
+
result = []
|
328
|
+
for i in range(len(offsets) - 1):
|
329
|
+
start, end = offsets[i:i+2]
|
330
|
+
if self.mode == 'mean':
|
331
|
+
bag = np.mean(embeddings[start:end], axis=0)
|
332
|
+
elif self.mode == 'sum':
|
333
|
+
bag = np.sum(embeddings[start:end], axis=0)
|
334
|
+
else: # max
|
335
|
+
bag = np.max(embeddings[start:end], axis=0)
|
336
|
+
result.append(bag)
|
337
|
+
|
338
|
+
return Tensor(np.stack(result), requires_grad=True)
|
openarchx/nn/losses.py
ADDED
@@ -0,0 +1,156 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from ..core.tensor import Tensor
|
3
|
+
from .module import Module
|
4
|
+
|
5
|
+
class MSELoss(Module):
|
6
|
+
def __init__(self, reduction='mean'):
|
7
|
+
super().__init__()
|
8
|
+
self.reduction = reduction
|
9
|
+
|
10
|
+
def forward(self, pred, target):
|
11
|
+
if not isinstance(target, Tensor):
|
12
|
+
target = Tensor(target, requires_grad=False)
|
13
|
+
|
14
|
+
loss = (pred - target) ** 2
|
15
|
+
if self.reduction == 'mean':
|
16
|
+
return Tensor(np.mean(loss.data), requires_grad=True)
|
17
|
+
elif self.reduction == 'sum':
|
18
|
+
return Tensor(np.sum(loss.data), requires_grad=True)
|
19
|
+
return loss
|
20
|
+
|
21
|
+
class CrossEntropyLoss(Module):
|
22
|
+
def __init__(self, reduction='mean'):
|
23
|
+
super().__init__()
|
24
|
+
self.reduction = reduction
|
25
|
+
|
26
|
+
def forward(self, pred, target):
|
27
|
+
if not isinstance(target, Tensor):
|
28
|
+
target = Tensor(target, requires_grad=False)
|
29
|
+
|
30
|
+
# Apply log softmax
|
31
|
+
max_val = np.max(pred.data, axis=1, keepdims=True)
|
32
|
+
exp_x = np.exp(pred.data - max_val)
|
33
|
+
softmax = exp_x / np.sum(exp_x, axis=1, keepdims=True)
|
34
|
+
log_softmax = np.log(softmax + 1e-10)
|
35
|
+
|
36
|
+
# Compute cross entropy
|
37
|
+
batch_size = pred.data.shape[0]
|
38
|
+
loss = -np.sum(target.data * log_softmax) / batch_size if self.reduction == 'mean' \
|
39
|
+
else -np.sum(target.data * log_softmax)
|
40
|
+
|
41
|
+
return Tensor(loss, requires_grad=True)
|
42
|
+
|
43
|
+
class BCELoss(Module):
|
44
|
+
def __init__(self, reduction='mean'):
|
45
|
+
super().__init__()
|
46
|
+
self.reduction = reduction
|
47
|
+
|
48
|
+
def forward(self, pred, target):
|
49
|
+
if not isinstance(target, Tensor):
|
50
|
+
target = Tensor(target, requires_grad=False)
|
51
|
+
|
52
|
+
eps = 1e-12
|
53
|
+
loss = -(target.data * np.log(pred.data + eps) + (1 - target.data) * np.log(1 - pred.data + eps))
|
54
|
+
|
55
|
+
if self.reduction == 'mean':
|
56
|
+
return Tensor(np.mean(loss), requires_grad=True)
|
57
|
+
elif self.reduction == 'sum':
|
58
|
+
return Tensor(np.sum(loss), requires_grad=True)
|
59
|
+
return Tensor(loss, requires_grad=True)
|
60
|
+
|
61
|
+
class BCEWithLogitsLoss(Module):
|
62
|
+
def __init__(self, reduction='mean'):
|
63
|
+
super().__init__()
|
64
|
+
self.reduction = reduction
|
65
|
+
|
66
|
+
def forward(self, pred, target):
|
67
|
+
if not isinstance(target, Tensor):
|
68
|
+
target = Tensor(target, requires_grad=False)
|
69
|
+
|
70
|
+
# Compute sigmoid and BCE in a numerically stable way
|
71
|
+
max_val = np.maximum(0, pred.data)
|
72
|
+
loss = pred.data - pred.data * target.data + max_val + \
|
73
|
+
np.log(np.exp(-max_val) + np.exp(pred.data - max_val))
|
74
|
+
|
75
|
+
if self.reduction == 'mean':
|
76
|
+
return Tensor(np.mean(loss), requires_grad=True)
|
77
|
+
elif self.reduction == 'sum':
|
78
|
+
return Tensor(np.sum(loss), requires_grad=True)
|
79
|
+
return Tensor(loss, requires_grad=True)
|
80
|
+
|
81
|
+
class L1Loss(Module):
|
82
|
+
def __init__(self, reduction='mean'):
|
83
|
+
super().__init__()
|
84
|
+
self.reduction = reduction
|
85
|
+
|
86
|
+
def forward(self, pred, target):
|
87
|
+
if not isinstance(target, Tensor):
|
88
|
+
target = Tensor(target, requires_grad=False)
|
89
|
+
|
90
|
+
loss = np.abs(pred.data - target.data)
|
91
|
+
if self.reduction == 'mean':
|
92
|
+
return Tensor(np.mean(loss), requires_grad=True)
|
93
|
+
elif self.reduction == 'sum':
|
94
|
+
return Tensor(np.sum(loss), requires_grad=True)
|
95
|
+
return Tensor(loss, requires_grad=True)
|
96
|
+
|
97
|
+
class SmoothL1Loss(Module):
|
98
|
+
def __init__(self, reduction='mean', beta=1.0):
|
99
|
+
super().__init__()
|
100
|
+
self.reduction = reduction
|
101
|
+
self.beta = beta
|
102
|
+
|
103
|
+
def forward(self, pred, target):
|
104
|
+
if not isinstance(target, Tensor):
|
105
|
+
target = Tensor(target, requires_grad=False)
|
106
|
+
|
107
|
+
diff = np.abs(pred.data - target.data)
|
108
|
+
loss = np.where(diff < self.beta, 0.5 * diff ** 2 / self.beta, diff - 0.5 * self.beta)
|
109
|
+
|
110
|
+
if self.reduction == 'mean':
|
111
|
+
return Tensor(np.mean(loss), requires_grad=True)
|
112
|
+
elif self.reduction == 'sum':
|
113
|
+
return Tensor(np.sum(loss), requires_grad=True)
|
114
|
+
return Tensor(loss, requires_grad=True)
|
115
|
+
|
116
|
+
class KLDivLoss(Module):
|
117
|
+
def __init__(self, reduction='mean', log_target=False):
|
118
|
+
super().__init__()
|
119
|
+
self.reduction = reduction
|
120
|
+
self.log_target = log_target
|
121
|
+
|
122
|
+
def forward(self, pred, target):
|
123
|
+
if not isinstance(target, Tensor):
|
124
|
+
target = Tensor(target, requires_grad=False)
|
125
|
+
|
126
|
+
if self.log_target:
|
127
|
+
loss = np.exp(target.data) * (target.data - pred.data)
|
128
|
+
else:
|
129
|
+
loss = target.data * (np.log(target.data + 1e-10) - pred.data)
|
130
|
+
|
131
|
+
if self.reduction == 'mean':
|
132
|
+
return Tensor(np.mean(loss), requires_grad=True)
|
133
|
+
elif self.reduction == 'sum':
|
134
|
+
return Tensor(np.sum(loss), requires_grad=True)
|
135
|
+
return Tensor(loss, requires_grad=True)
|
136
|
+
|
137
|
+
class CosineEmbeddingLoss(Module):
|
138
|
+
def __init__(self, margin=0.0, reduction='mean'):
|
139
|
+
super().__init__()
|
140
|
+
self.margin = margin
|
141
|
+
self.reduction = reduction
|
142
|
+
|
143
|
+
def forward(self, x1, x2, y):
|
144
|
+
if not isinstance(y, Tensor):
|
145
|
+
y = Tensor(y, requires_grad=False)
|
146
|
+
|
147
|
+
cos_sim = np.sum(x1.data * x2.data, axis=1) / \
|
148
|
+
(np.sqrt(np.sum(x1.data ** 2, axis=1)) * np.sqrt(np.sum(x2.data ** 2, axis=1)) + 1e-10)
|
149
|
+
|
150
|
+
loss = np.where(y.data == 1, 1 - cos_sim, np.maximum(0, cos_sim - self.margin))
|
151
|
+
|
152
|
+
if self.reduction == 'mean':
|
153
|
+
return Tensor(np.mean(loss), requires_grad=True)
|
154
|
+
elif self.reduction == 'sum':
|
155
|
+
return Tensor(np.sum(loss), requires_grad=True)
|
156
|
+
return Tensor(loss, requires_grad=True)
|
openarchx/nn/module.py
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
class Module:
|
2
|
+
def __init__(self):
|
3
|
+
self._parameters = []
|
4
|
+
|
5
|
+
def parameters(self):
|
6
|
+
params = []
|
7
|
+
for attr in self.__dict__.values():
|
8
|
+
if isinstance(attr, Module):
|
9
|
+
params.extend(attr.parameters())
|
10
|
+
elif hasattr(attr, 'parameters'):
|
11
|
+
params.extend(attr.parameters())
|
12
|
+
return params + self._parameters
|
13
|
+
|
14
|
+
def forward(self, *args, **kwargs):
|
15
|
+
raise NotImplementedError
|
16
|
+
|
17
|
+
def __call__(self, *args, **kwargs):
|
18
|
+
return self.forward(*args, **kwargs)
|