Enilnets 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- Enilnets/__init__.py +33 -0
- Enilnets/activations.py +39 -0
- Enilnets/backward.py +121 -0
- Enilnets/base.py +41 -0
- Enilnets/forward.py +101 -0
- Enilnets/io.py +48 -0
- Enilnets/layers.py +34 -0
- Enilnets/loss.py +43 -0
- Enilnets/optimizer.py +96 -0
- Enilnets/reinforce.py +25 -0
- Enilnets/train.py +54 -0
- Enilnets/weight_init.py +49 -0
- enilnets-1.0.0.dist-info/METADATA +7 -0
- enilnets-1.0.0.dist-info/RECORD +17 -0
- enilnets-1.0.0.dist-info/WHEEL +5 -0
- enilnets-1.0.0.dist-info/licenses/LICENCE +18 -0
- enilnets-1.0.0.dist-info/top_level.txt +1 -0
Enilnets/__init__.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from .base import NeuralNet
|
|
2
|
+
from .layers import add_dense, add_sparse, add_conv2d, add_flatten, add_maxpool2d, add_avgpool2d, add_batchnorm, add_dropout
|
|
3
|
+
from .forward import Forward
|
|
4
|
+
from .backward import Backward
|
|
5
|
+
from .optimizer import update
|
|
6
|
+
from .train import TrainBatch, Train, compute_accuracy
|
|
7
|
+
from .reinforce import Reinforce
|
|
8
|
+
from .loss import ComputeLoss
|
|
9
|
+
from .io import Save, Load
|
|
10
|
+
|
|
11
|
+
print("Loading Modules....")
|
|
12
|
+
|
|
13
|
+
NeuralNet.add_dense = add_dense
|
|
14
|
+
NeuralNet.add_sparse = add_sparse
|
|
15
|
+
NeuralNet.add_conv2d = add_conv2d
|
|
16
|
+
NeuralNet.add_flatten = add_flatten
|
|
17
|
+
NeuralNet.add_maxpool2d = add_maxpool2d
|
|
18
|
+
NeuralNet.add_avgpool2d = add_avgpool2d
|
|
19
|
+
NeuralNet.add_batchnorm = add_batchnorm
|
|
20
|
+
NeuralNet.add_dropout = add_dropout
|
|
21
|
+
NeuralNet.Forward = Forward
|
|
22
|
+
NeuralNet.predict = Forward
|
|
23
|
+
NeuralNet.Backward = Backward
|
|
24
|
+
NeuralNet.update = update
|
|
25
|
+
NeuralNet.TrainBatch = TrainBatch
|
|
26
|
+
NeuralNet.Train = Train
|
|
27
|
+
NeuralNet.compute_accuracy = compute_accuracy
|
|
28
|
+
NeuralNet.Reinforce = Reinforce
|
|
29
|
+
NeuralNet.ComputeLoss = ComputeLoss
|
|
30
|
+
NeuralNet.Save = Save
|
|
31
|
+
NeuralNet.Load = Load
|
|
32
|
+
|
|
33
|
+
print("Enilnets Library Ready!")
|
Enilnets/activations.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
def activate(name, x):
|
|
4
|
+
if name == "relu": return np.maximum(0, x)
|
|
5
|
+
if name == "leakyrelu": return np.where(x > 0, x, 0.01 * x)
|
|
6
|
+
if name == "elu": return np.where(x > 0, x, np.exp(x) - 1)
|
|
7
|
+
if name == "selu":
|
|
8
|
+
alpha = 1.6732632423543772848170429916717
|
|
9
|
+
scale = 1.0507009873554804934193349852946
|
|
10
|
+
return scale * np.where(x > 0, x, alpha * (np.exp(x) - 1))
|
|
11
|
+
if name == "gelu": return 0.5 * x * (1 + np.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * x**3)))
|
|
12
|
+
if name == "swish": return x * 1.0 / (1.0 + np.exp(-x))
|
|
13
|
+
if name == "sigmoid": return 1.0 / (1.0 + np.exp(-np.clip(x, -500, 500)))
|
|
14
|
+
if name == "tanh": return np.tanh(x)
|
|
15
|
+
if name == "softmax":
|
|
16
|
+
e_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
|
|
17
|
+
return e_x / np.sum(e_x, axis=-1, keepdims=True)
|
|
18
|
+
return x
|
|
19
|
+
|
|
20
|
+
def derivative(name, x):
|
|
21
|
+
if name == "relu": return (x > 0).astype(np.float64)
|
|
22
|
+
if name == "leakyrelu": return np.where(x > 0, 1.0, 0.01)
|
|
23
|
+
if name == "elu": return np.where(x > 0, 1.0, np.exp(x))
|
|
24
|
+
if name == "selu":
|
|
25
|
+
alpha = 1.6732632423543772848170429916717
|
|
26
|
+
scale = 1.0507009873554804934193349852946
|
|
27
|
+
return scale * np.where(x > 0, 1.0, alpha * np.exp(x))
|
|
28
|
+
if name == "gelu":
|
|
29
|
+
cdf = 0.5 * (1 + np.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * x**3)))
|
|
30
|
+
pdf = np.exp(-0.5 * x**2) / np.sqrt(2 * np.pi)
|
|
31
|
+
return cdf + x * pdf
|
|
32
|
+
if name == "swish":
|
|
33
|
+
s = 1.0 / (1.0 + np.exp(-x))
|
|
34
|
+
return s + x * s * (1 - s)
|
|
35
|
+
if name == "sigmoid":
|
|
36
|
+
s = 1.0 / (1.0 + np.exp(-np.clip(x, -500, 500)))
|
|
37
|
+
return s * (1 - s)
|
|
38
|
+
if name == "tanh": return 1 - np.tanh(x) ** 2
|
|
39
|
+
return np.ones_like(x)
|
Enilnets/backward.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from .activations import derivative
|
|
3
|
+
from .forward import im2col
|
|
4
|
+
|
|
5
|
+
def maxpool2d_backward(delta, x, p):
|
|
6
|
+
B, C, H, W = x.shape
|
|
7
|
+
H_trim = (H // p) * p
|
|
8
|
+
W_trim = (W // p) * p
|
|
9
|
+
x_trim = x[:, :, :H_trim, :W_trim]
|
|
10
|
+
|
|
11
|
+
H_b, W_b = H_trim // p, W_trim // p
|
|
12
|
+
strides = x_trim.strides
|
|
13
|
+
new_shape = (B, C, H_b, p, W_b, p)
|
|
14
|
+
new_strides = (strides[0], strides[1], strides[2]*p, strides[2], strides[3]*p, strides[3])
|
|
15
|
+
x_blocks = np.lib.stride_tricks.as_strided(x_trim, shape=new_shape, strides=new_strides)
|
|
16
|
+
|
|
17
|
+
x_max = x_blocks.max(axis=(3, 5), keepdims=True)
|
|
18
|
+
mask = (x_blocks == x_max).astype(np.float64)
|
|
19
|
+
mask_sum = mask.sum(axis=(3, 5), keepdims=True)
|
|
20
|
+
mask = mask / np.maximum(mask_sum, 1e-12)
|
|
21
|
+
|
|
22
|
+
delta_expanded = delta[:, :, :H_b, :W_b][:, :, :, None, :, None]
|
|
23
|
+
|
|
24
|
+
dx = np.zeros_like(x)
|
|
25
|
+
dx_view = np.lib.stride_tricks.as_strided(dx[:, :, :H_trim, :W_trim],
|
|
26
|
+
shape=new_shape, strides=new_strides)
|
|
27
|
+
dx_view[:] = mask * delta_expanded
|
|
28
|
+
return dx
|
|
29
|
+
|
|
30
|
+
def avgpool2d_backward(delta, x, p):
|
|
31
|
+
B, C, H, W = x.shape
|
|
32
|
+
H_trim = (H // p) * p
|
|
33
|
+
W_trim = (W // p) * p
|
|
34
|
+
H_b, W_b = H_trim // p, W_trim // p
|
|
35
|
+
|
|
36
|
+
dx = np.zeros_like(x)
|
|
37
|
+
strides = dx[:, :, :H_trim, :W_trim].strides
|
|
38
|
+
new_shape = (B, C, H_b, p, W_b, p)
|
|
39
|
+
new_strides = (strides[0], strides[1], strides[2]*p, strides[2], strides[3]*p, strides[3])
|
|
40
|
+
dx_view = np.lib.stride_tricks.as_strided(dx[:, :, :H_trim, :W_trim],
|
|
41
|
+
shape=new_shape, strides=new_strides)
|
|
42
|
+
dx_view[:] = delta[:, :, :H_b, :W_b][:, :, :, None, :, None] / (p * p)
|
|
43
|
+
return dx
|
|
44
|
+
|
|
45
|
+
def batchnorm_backward(dout, cache):
|
|
46
|
+
x, x_norm, mean, var, gamma, epsilon = cache
|
|
47
|
+
N = x.shape[0]
|
|
48
|
+
dbeta = np.sum(dout, axis=0)
|
|
49
|
+
dgamma = np.sum(dout * x_norm, axis=0)
|
|
50
|
+
dx_norm = dout * gamma
|
|
51
|
+
dvar = np.sum(dx_norm * (x - mean) * -0.5 * (var + epsilon) ** (-1.5), axis=0)
|
|
52
|
+
dmean = np.sum(dx_norm * -1.0 / np.sqrt(var + epsilon), axis=0)
|
|
53
|
+
dx = dx_norm / np.sqrt(var + epsilon) + dvar * 2.0 * (x - mean) / N + dmean / N
|
|
54
|
+
return dx, dgamma, dbeta
|
|
55
|
+
|
|
56
|
+
def conv2d_backward_input(delta, weights, input_shape):
|
|
57
|
+
B, F, out_h, out_w = delta.shape
|
|
58
|
+
F, C, K, _ = weights.shape
|
|
59
|
+
H, W = input_shape[2], input_shape[3]
|
|
60
|
+
|
|
61
|
+
padded_delta = np.pad(delta, [(0, 0), (0, 0), (K - 1, K - 1), (K - 1, K - 1)], mode="constant")
|
|
62
|
+
col = im2col(padded_delta, K, K)
|
|
63
|
+
weights_flat = weights[:, :, ::-1, ::-1].transpose(1, 0, 2, 3).reshape(C, -1)
|
|
64
|
+
grad = np.dot(col, weights_flat.T)
|
|
65
|
+
grad = grad.reshape(B, H, W, C).transpose(0, 3, 1, 2)
|
|
66
|
+
return grad
|
|
67
|
+
|
|
68
|
+
def Backward(self, targets):
|
|
69
|
+
targets = np.asarray(targets, dtype=np.float64)
|
|
70
|
+
if targets.ndim == 1:
|
|
71
|
+
targets = targets.reshape(1, -1)
|
|
72
|
+
batch_size = targets.shape[0]
|
|
73
|
+
self.deltas = [None] * len(self.layers)
|
|
74
|
+
out = self.outputs[-1]
|
|
75
|
+
last = self.layers[-1]
|
|
76
|
+
if last.get("activation") == "softmax":
|
|
77
|
+
delta = (out - targets) / batch_size
|
|
78
|
+
else:
|
|
79
|
+
activation_input = self.pre_activations[-1] if self.pre_activations[-1] is not None else out
|
|
80
|
+
delta = (out - targets) * derivative(last.get("activation", "linear"), activation_input) / batch_size
|
|
81
|
+
self.deltas[-1] = delta
|
|
82
|
+
|
|
83
|
+
for l in reversed(range(len(self.layers) - 1)):
|
|
84
|
+
curr = self.layers[l]
|
|
85
|
+
nxt = self.layers[l + 1]
|
|
86
|
+
next_delta = self.deltas[l + 1]
|
|
87
|
+
|
|
88
|
+
if nxt["type"] in ("dense", "sparse"):
|
|
89
|
+
err = np.dot(next_delta, nxt["weights"])
|
|
90
|
+
elif nxt["type"] == "flatten":
|
|
91
|
+
err = next_delta.reshape(self.outputs[l + 1].shape)
|
|
92
|
+
elif nxt["type"] == "conv2d":
|
|
93
|
+
err = conv2d_backward_input(next_delta, nxt["weights"], self.outputs[l + 1].shape)
|
|
94
|
+
elif nxt["type"] == "maxpool2d":
|
|
95
|
+
err = maxpool2d_backward(next_delta, self.outputs[l + 1], nxt["p"])
|
|
96
|
+
elif nxt["type"] == "avgpool2d":
|
|
97
|
+
err = avgpool2d_backward(next_delta, self.outputs[l + 1], nxt["p"])
|
|
98
|
+
elif nxt["type"] == "dropout":
|
|
99
|
+
mask = nxt.get("mask")
|
|
100
|
+
rate = nxt.get("rate", 0.0)
|
|
101
|
+
if mask is None or rate == 0.0:
|
|
102
|
+
err = next_delta
|
|
103
|
+
else:
|
|
104
|
+
err = next_delta * mask / (1.0 - rate)
|
|
105
|
+
elif nxt["type"] == "batchnorm":
|
|
106
|
+
flat = next_delta.reshape(self.outputs[l + 1].shape[0], -1)
|
|
107
|
+
cache = self.batchnorm_cache[l + 1]
|
|
108
|
+
if cache is None:
|
|
109
|
+
raise ValueError("BatchNorm cache is None. Ensure Forward(training=True) was called before Backward.")
|
|
110
|
+
err_flat, dgamma, dbeta = batchnorm_backward(flat, cache)
|
|
111
|
+
nxt["d_gamma"] = dgamma
|
|
112
|
+
nxt["d_beta"] = dbeta
|
|
113
|
+
err = err_flat.reshape(self.outputs[l + 1].shape)
|
|
114
|
+
else:
|
|
115
|
+
err = np.zeros_like(self.outputs[l + 1])
|
|
116
|
+
|
|
117
|
+
if curr["type"] in ("dense", "sparse", "conv2d"):
|
|
118
|
+
activation_input = self.pre_activations[l+1] if self.pre_activations[l+1] is not None else self.outputs[l + 1]
|
|
119
|
+
self.deltas[l] = err * derivative(curr.get("activation", "linear"), activation_input)
|
|
120
|
+
else:
|
|
121
|
+
self.deltas[l] = err
|
Enilnets/base.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
class NeuralNet:
|
|
4
|
+
def __init__(self, learning_rate=0.001, optimizer="adam", l2_lambda=0.01, momentum=0.9):
|
|
5
|
+
self.layers = []
|
|
6
|
+
self.learning_rate = learning_rate
|
|
7
|
+
self.optimizer_type = optimizer.lower()
|
|
8
|
+
self.l2_lambda = l2_lambda
|
|
9
|
+
self.momentum = momentum
|
|
10
|
+
|
|
11
|
+
self.outputs = []
|
|
12
|
+
self.pre_activations = []
|
|
13
|
+
self.batchnorm_cache = []
|
|
14
|
+
self.deltas = []
|
|
15
|
+
self.opt_state = []
|
|
16
|
+
self.t = 0
|
|
17
|
+
|
|
18
|
+
def summary(self):
|
|
19
|
+
print("Model Summary")
|
|
20
|
+
print("=" * 60)
|
|
21
|
+
print(f"Optimizer: {self.optimizer_type.upper()} | LR: {self.learning_rate} | L2: {self.l2_lambda}")
|
|
22
|
+
print("=" * 60)
|
|
23
|
+
total_params = 0
|
|
24
|
+
for i, layer in enumerate(self.layers):
|
|
25
|
+
layer_type = layer["type"]
|
|
26
|
+
if layer_type in ("dense", "sparse"):
|
|
27
|
+
params = layer["weights"].size + layer["bias"].size
|
|
28
|
+
total_params += params
|
|
29
|
+
print(f"Layer {i}: {layer_type.upper()} - Input: {layer['weights'].shape[1]}, Output: {layer['weights'].shape[0]}, Params: {params}")
|
|
30
|
+
elif layer_type == "conv2d":
|
|
31
|
+
params = layer["weights"].size + layer["bias"].size
|
|
32
|
+
total_params += params
|
|
33
|
+
print(f"Layer {i}: {layer_type.upper()} - In_ch: {layer['in_ch']}, Out_ch: {layer['out_ch']}, Kernel: {layer['k']}x{layer['k']}, Params: {params}")
|
|
34
|
+
elif layer_type == "batchnorm":
|
|
35
|
+
params = layer["gamma"].size + layer["beta"].size
|
|
36
|
+
total_params += params
|
|
37
|
+
print(f"Layer {i}: {layer_type.upper()} - Features: {layer['num_features']}, Params: {params}")
|
|
38
|
+
else:
|
|
39
|
+
print(f"Layer {i}: {layer_type.upper()}")
|
|
40
|
+
print(f"Total Parameters: {total_params}")
|
|
41
|
+
print("=" * 60)
|
Enilnets/forward.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from .activations import activate
|
|
3
|
+
|
|
4
|
+
def im2col(input_data, filter_h, filter_w, stride=1, pad=0):
|
|
5
|
+
N, C, H, W = input_data.shape
|
|
6
|
+
out_h = (H + 2 * pad - filter_h) // stride + 1
|
|
7
|
+
out_w = (W + 2 * pad - filter_w) // stride + 1
|
|
8
|
+
img = np.pad(input_data, [(0, 0), (0, 0), (pad, pad), (pad, pad)], mode='constant')
|
|
9
|
+
|
|
10
|
+
N_stride, C_stride, H_stride, W_stride = img.strides
|
|
11
|
+
shape = (N, C, filter_h, filter_w, out_h, out_w)
|
|
12
|
+
strides = (N_stride, C_stride, H_stride, W_stride, H_stride * stride, W_stride * stride)
|
|
13
|
+
|
|
14
|
+
col = np.lib.stride_tricks.as_strided(img, shape=shape, strides=strides)
|
|
15
|
+
return col.transpose(0, 4, 5, 1, 2, 3).reshape(N * out_h * out_w, -1)
|
|
16
|
+
|
|
17
|
+
def batchnorm_forward(x, layer, training):
|
|
18
|
+
epsilon = layer.get("epsilon", 1e-5)
|
|
19
|
+
momentum = layer.get("momentum", 0.1)
|
|
20
|
+
if training:
|
|
21
|
+
mean = np.mean(x, axis=0)
|
|
22
|
+
variance = np.var(x, axis=0)
|
|
23
|
+
x_norm = (x - mean) / np.sqrt(variance + epsilon)
|
|
24
|
+
out = layer["gamma"] * x_norm + layer["beta"]
|
|
25
|
+
layer["running_mean"] = (1 - momentum) * layer["running_mean"] + momentum * mean
|
|
26
|
+
layer["running_var"] = (1 - momentum) * layer["running_var"] + momentum * variance
|
|
27
|
+
cache = (x, x_norm, mean, variance, layer["gamma"], epsilon)
|
|
28
|
+
else:
|
|
29
|
+
x_norm = (x - layer["running_mean"]) / np.sqrt(layer["running_var"] + epsilon)
|
|
30
|
+
out = layer["gamma"] * x_norm + layer["beta"]
|
|
31
|
+
cache = None
|
|
32
|
+
return out, cache
|
|
33
|
+
|
|
34
|
+
def Forward(self, inputs, training=False, dropout_rate=0.0):
|
|
35
|
+
x = np.asarray(inputs, dtype=np.float64)
|
|
36
|
+
if x.ndim == 1:
|
|
37
|
+
x = x.reshape(1, -1)
|
|
38
|
+
elif x.ndim == 3:
|
|
39
|
+
x = x.reshape(1, *x.shape)
|
|
40
|
+
|
|
41
|
+
self.outputs = [x]
|
|
42
|
+
self.pre_activations = [None]
|
|
43
|
+
self.batchnorm_cache = []
|
|
44
|
+
|
|
45
|
+
for layer in self.layers:
|
|
46
|
+
x = self.outputs[-1]
|
|
47
|
+
if layer["type"] in ("dense", "sparse"):
|
|
48
|
+
z = np.dot(x, layer["weights"].T) + layer["bias"]
|
|
49
|
+
x = activate(layer["activation"], z)
|
|
50
|
+
self.pre_activations.append(z)
|
|
51
|
+
self.batchnorm_cache.append(None)
|
|
52
|
+
elif layer["type"] == "conv2d":
|
|
53
|
+
B, C, H, W = x.shape
|
|
54
|
+
F, _, K, _ = layer["weights"].shape
|
|
55
|
+
out_h, out_w = H - K + 1, W - K + 1
|
|
56
|
+
col = im2col(x, K, K)
|
|
57
|
+
weights_flat = layer["weights"].reshape(F, -1)
|
|
58
|
+
out = np.dot(col, weights_flat.T).reshape(B, out_h, out_w, F).transpose(0, 3, 1, 2)
|
|
59
|
+
z = out + layer["bias"][None, :, None, None]
|
|
60
|
+
x = activate(layer["activation"], z)
|
|
61
|
+
self.pre_activations.append(z)
|
|
62
|
+
self.batchnorm_cache.append(None)
|
|
63
|
+
elif layer["type"] == "flatten":
|
|
64
|
+
x = x.reshape(x.shape[0], -1)
|
|
65
|
+
self.pre_activations.append(None)
|
|
66
|
+
self.batchnorm_cache.append(None)
|
|
67
|
+
elif layer["type"] == "maxpool2d":
|
|
68
|
+
B, C, H, W, p = *x.shape, layer["p"]
|
|
69
|
+
x = x[:, :, : H // p * p, : W // p * p].reshape(B, C, H // p, p, W // p, p).max(axis=(3, 5))
|
|
70
|
+
self.pre_activations.append(None)
|
|
71
|
+
self.batchnorm_cache.append(None)
|
|
72
|
+
elif layer["type"] == "avgpool2d":
|
|
73
|
+
B, C, H, W, p = *x.shape, layer["p"]
|
|
74
|
+
x = x[:, :, : H // p * p, : W // p * p].reshape(B, C, H // p, p, W // p, p).mean(axis=(3, 5))
|
|
75
|
+
self.pre_activations.append(None)
|
|
76
|
+
self.batchnorm_cache.append(None)
|
|
77
|
+
elif layer["type"] == "batchnorm":
|
|
78
|
+
flat = x.reshape(x.shape[0], -1)
|
|
79
|
+
normalized, cache = batchnorm_forward(flat, layer, training)
|
|
80
|
+
x = normalized.reshape(x.shape)
|
|
81
|
+
self.pre_activations.append(None)
|
|
82
|
+
self.batchnorm_cache.append(cache)
|
|
83
|
+
elif layer["type"] == "dropout":
|
|
84
|
+
rate = layer.get("rate", dropout_rate)
|
|
85
|
+
if training and rate > 0:
|
|
86
|
+
if rate >= 1.0:
|
|
87
|
+
mask = np.zeros_like(x, dtype=np.float64)
|
|
88
|
+
x = np.zeros_like(x)
|
|
89
|
+
else:
|
|
90
|
+
mask = (np.random.rand(*x.shape) > rate).astype(np.float64)
|
|
91
|
+
x = x * mask / (1.0 - rate)
|
|
92
|
+
layer["mask"] = mask
|
|
93
|
+
else:
|
|
94
|
+
layer["mask"] = None
|
|
95
|
+
x = x
|
|
96
|
+
self.pre_activations.append(None)
|
|
97
|
+
self.batchnorm_cache.append(None)
|
|
98
|
+
else:
|
|
99
|
+
raise ValueError(f"Unknown layer type: {layer['type']}")
|
|
100
|
+
self.outputs.append(x)
|
|
101
|
+
return self.outputs[-1]
|
Enilnets/io.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import pickle
|
|
3
|
+
import os
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
def _numpy_encoder(obj):
|
|
7
|
+
if isinstance(obj, np.ndarray):
|
|
8
|
+
return obj.tolist()
|
|
9
|
+
raise TypeError(f"Object of type {type(obj)} is not JSON serializable")
|
|
10
|
+
|
|
11
|
+
def Save(self, file):
|
|
12
|
+
payload = {
|
|
13
|
+
"version": 2,
|
|
14
|
+
"layers": self.layers,
|
|
15
|
+
"optimizer": self.optimizer_type,
|
|
16
|
+
"learning_rate": self.learning_rate,
|
|
17
|
+
"l2_lambda": self.l2_lambda,
|
|
18
|
+
"momentum": self.momentum,
|
|
19
|
+
"t": self.t,
|
|
20
|
+
}
|
|
21
|
+
ext = os.path.splitext(file)[1].lower()
|
|
22
|
+
if ext == ".pkl":
|
|
23
|
+
with open(file, "wb") as f:
|
|
24
|
+
pickle.dump(payload, f)
|
|
25
|
+
else:
|
|
26
|
+
with open(file, "w") as f:
|
|
27
|
+
json.dump(payload, f, default=_numpy_encoder)
|
|
28
|
+
|
|
29
|
+
def Load(self, file):
|
|
30
|
+
ext = os.path.splitext(file)[1].lower()
|
|
31
|
+
if ext == ".pkl":
|
|
32
|
+
with open(file, "rb") as f:
|
|
33
|
+
raw = pickle.load(f)
|
|
34
|
+
else:
|
|
35
|
+
with open(file, "r") as f:
|
|
36
|
+
raw = json.load(f)
|
|
37
|
+
self.layers = []
|
|
38
|
+
for l in raw.get("layers", []):
|
|
39
|
+
for k in ["weights", "bias", "mask", "gamma", "beta", "running_mean", "running_var"]:
|
|
40
|
+
if k in l:
|
|
41
|
+
l[k] = np.array(l[k], dtype=np.float64)
|
|
42
|
+
self.layers.append(l)
|
|
43
|
+
self.opt_state = []
|
|
44
|
+
self.t = raw.get("t", 0)
|
|
45
|
+
self.learning_rate = raw.get("learning_rate", self.learning_rate)
|
|
46
|
+
self.optimizer_type = raw.get("optimizer", self.optimizer_type)
|
|
47
|
+
self.l2_lambda = raw.get("l2_lambda", self.l2_lambda)
|
|
48
|
+
self.momentum = raw.get("momentum", self.momentum)
|
Enilnets/layers.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from .weight_init import init_weights, init_conv_weights
|
|
3
|
+
|
|
4
|
+
def add_dense(self, n_in, n_out, activation="relu", init_method="xavier_uniform"):
|
|
5
|
+
w, b = init_weights(n_in, n_out, method=init_method)
|
|
6
|
+
self.layers.append({"type": "dense", "weights": w, "bias": b, "activation": activation})
|
|
7
|
+
|
|
8
|
+
def add_sparse(self, n_in, n_out, connectivity=0.5, activation="relu", init_method="xavier_uniform"):
|
|
9
|
+
w, b = init_weights(n_in, n_out, method=init_method)
|
|
10
|
+
mask = (np.random.rand(n_out, n_in) < connectivity).astype(np.float64)
|
|
11
|
+
self.layers.append({"type": "sparse", "weights": w * mask, "bias": b, "mask": mask, "activation": activation})
|
|
12
|
+
|
|
13
|
+
def add_conv2d(self, in_ch, out_ch, k, activation="relu", init_method="he_normal"):
|
|
14
|
+
w, b = init_conv_weights(in_ch, out_ch, k, method=init_method)
|
|
15
|
+
self.layers.append({"type": "conv2d", "weights": w, "bias": b, "in_ch": in_ch, "out_ch": out_ch, "k": k, "activation": activation})
|
|
16
|
+
|
|
17
|
+
def add_flatten(self):
|
|
18
|
+
self.layers.append({"type": "flatten"})
|
|
19
|
+
|
|
20
|
+
def add_maxpool2d(self, pool_size=2):
|
|
21
|
+
self.layers.append({"type": "maxpool2d", "p": pool_size})
|
|
22
|
+
|
|
23
|
+
def add_avgpool2d(self, pool_size=2):
|
|
24
|
+
self.layers.append({"type": "avgpool2d", "p": pool_size})
|
|
25
|
+
|
|
26
|
+
def add_batchnorm(self, num_features, epsilon=1e-5, momentum=0.1):
|
|
27
|
+
self.layers.append({"type": "batchnorm", "num_features": num_features, "epsilon": epsilon, "momentum": momentum,
|
|
28
|
+
"running_mean": np.zeros(num_features, dtype=np.float64),
|
|
29
|
+
"running_var": np.ones(num_features, dtype=np.float64),
|
|
30
|
+
"gamma": np.ones(num_features, dtype=np.float64),
|
|
31
|
+
"beta": np.zeros(num_features, dtype=np.float64)})
|
|
32
|
+
|
|
33
|
+
def add_dropout(self, rate=0.5):
|
|
34
|
+
self.layers.append({"type": "dropout", "rate": rate})
|
Enilnets/loss.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
def ComputeLoss(self, output, target, function="mse", reduction="mean", **kwargs):
|
|
4
|
+
o = np.asarray(output, dtype=np.float64)
|
|
5
|
+
t = np.asarray(target, dtype=np.float64)
|
|
6
|
+
if function == "mse":
|
|
7
|
+
loss = (o - t) ** 2
|
|
8
|
+
elif function == "mae":
|
|
9
|
+
loss = np.abs(o - t)
|
|
10
|
+
elif function == "huber":
|
|
11
|
+
delta = kwargs.get("delta", 1.0)
|
|
12
|
+
diff = np.abs(o - t)
|
|
13
|
+
loss = np.where(diff < delta, 0.5 * diff**2, delta * (diff - 0.5 * delta))
|
|
14
|
+
elif function == "smooth_l1":
|
|
15
|
+
diff = np.abs(o - t)
|
|
16
|
+
loss = np.where(diff < 1, 0.5 * diff**2, diff - 0.5)
|
|
17
|
+
elif function == "binary_cross_entropy":
|
|
18
|
+
o = np.clip(o, 1e-12, 1 - 1e-12)
|
|
19
|
+
loss = -(t * np.log(o) + (1 - t) * np.log(1 - o))
|
|
20
|
+
elif function in ("cross_entropy", "categorical_cross_entropy"):
|
|
21
|
+
o = np.clip(o, 1e-12, 1.0)
|
|
22
|
+
loss = -t * np.log(o)
|
|
23
|
+
if reduction == "mean":
|
|
24
|
+
return float(np.sum(loss) / o.shape[0])
|
|
25
|
+
if reduction == "sum":
|
|
26
|
+
return float(np.sum(loss))
|
|
27
|
+
return loss
|
|
28
|
+
elif function == "focal":
|
|
29
|
+
alpha = kwargs.get("alpha", 0.25)
|
|
30
|
+
gamma = kwargs.get("gamma", 2.0)
|
|
31
|
+
o = np.clip(o, 1e-12, 1.0)
|
|
32
|
+
pt = o * t + (1 - o) * (1 - t)
|
|
33
|
+
loss = - (alpha * t * (1 - pt) ** gamma * np.log(o) + (1 - alpha) * (1 - t) * pt ** gamma * np.log(1 - o))
|
|
34
|
+
elif function == "hinge":
|
|
35
|
+
loss = np.maximum(0, 1 - t * o)
|
|
36
|
+
else:
|
|
37
|
+
raise ValueError(f"Unknown loss function: {function}")
|
|
38
|
+
|
|
39
|
+
if reduction == "mean":
|
|
40
|
+
return float(np.mean(loss))
|
|
41
|
+
if reduction == "sum":
|
|
42
|
+
return float(np.sum(loss))
|
|
43
|
+
return loss
|
Enilnets/optimizer.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from .forward import im2col
|
|
3
|
+
|
|
4
|
+
def update(self):
|
|
5
|
+
self.t += 1
|
|
6
|
+
b1, b2, eps = 0.9, 0.999, 1e-8
|
|
7
|
+
|
|
8
|
+
if not self.opt_state:
|
|
9
|
+
for layer in self.layers:
|
|
10
|
+
if layer["type"] in ("dense", "sparse", "conv2d"):
|
|
11
|
+
self.opt_state.append({
|
|
12
|
+
"mw": np.zeros_like(layer["weights"]),
|
|
13
|
+
"vw": np.zeros_like(layer["weights"]),
|
|
14
|
+
"mb": np.zeros_like(layer["bias"]),
|
|
15
|
+
"vb": np.zeros_like(layer["bias"]),
|
|
16
|
+
"vgw": np.zeros_like(layer["weights"]),
|
|
17
|
+
"vgb": np.zeros_like(layer["bias"]),
|
|
18
|
+
})
|
|
19
|
+
elif layer["type"] == "batchnorm":
|
|
20
|
+
self.opt_state.append({
|
|
21
|
+
"mg": np.zeros_like(layer["gamma"]),
|
|
22
|
+
"vg": np.zeros_like(layer["gamma"]),
|
|
23
|
+
"mb": np.zeros_like(layer["beta"]),
|
|
24
|
+
"vb": np.zeros_like(layer["beta"]),
|
|
25
|
+
})
|
|
26
|
+
else:
|
|
27
|
+
self.opt_state.append(None)
|
|
28
|
+
|
|
29
|
+
for l, layer in enumerate(self.layers):
|
|
30
|
+
state = self.opt_state[l]
|
|
31
|
+
if layer["type"] in ("dense", "sparse"):
|
|
32
|
+
grad_w = np.dot(self.deltas[l].T, self.outputs[l])
|
|
33
|
+
grad_b = np.sum(self.deltas[l], axis=0)
|
|
34
|
+
if layer["type"] == "sparse":
|
|
35
|
+
grad_w *= layer["mask"]
|
|
36
|
+
elif layer["type"] == "conv2d":
|
|
37
|
+
K = layer["k"]
|
|
38
|
+
col = im2col(self.outputs[l], K, K)
|
|
39
|
+
delta_flat = self.deltas[l].transpose(0, 2, 3, 1).reshape(-1, layer["weights"].shape[0])
|
|
40
|
+
grad_w_flat = np.dot(delta_flat.T, col)
|
|
41
|
+
grad_w = grad_w_flat.reshape(layer["weights"].shape)
|
|
42
|
+
grad_b = np.sum(self.deltas[l], axis=(0, 2, 3))
|
|
43
|
+
elif layer["type"] == "batchnorm":
|
|
44
|
+
grad_gamma = layer.get("d_gamma", np.zeros_like(layer["gamma"]))
|
|
45
|
+
grad_beta = layer.get("d_beta", np.zeros_like(layer["beta"]))
|
|
46
|
+
|
|
47
|
+
if self.optimizer_type == "sgd":
|
|
48
|
+
state["mg"] = self.momentum * state["mg"] - self.learning_rate * grad_gamma
|
|
49
|
+
state["mb"] = self.momentum * state["mb"] - self.learning_rate * grad_beta
|
|
50
|
+
layer["gamma"] += state["mg"]
|
|
51
|
+
layer["beta"] += state["mb"]
|
|
52
|
+
elif self.optimizer_type == "rmsprop":
|
|
53
|
+
state["vg"] = b2 * state["vg"] + (1 - b2) * (grad_gamma ** 2)
|
|
54
|
+
state["vb"] = b2 * state["vb"] + (1 - b2) * (grad_beta ** 2)
|
|
55
|
+
layer["gamma"] -= self.learning_rate * grad_gamma / (np.sqrt(state["vg"]) + eps)
|
|
56
|
+
layer["beta"] -= self.learning_rate * grad_beta / (np.sqrt(state["vb"]) + eps)
|
|
57
|
+
elif self.optimizer_type == "adagrad":
|
|
58
|
+
state["vg"] += grad_gamma ** 2
|
|
59
|
+
state["vb"] += grad_beta ** 2
|
|
60
|
+
layer["gamma"] -= self.learning_rate * grad_gamma / (np.sqrt(state["vg"]) + eps)
|
|
61
|
+
layer["beta"] -= self.learning_rate * grad_beta / (np.sqrt(state["vb"]) + eps)
|
|
62
|
+
else: # adam
|
|
63
|
+
state["mg"] = b1 * state["mg"] + (1 - b1) * grad_gamma
|
|
64
|
+
state["vg"] = b2 * state["vg"] + (1 - b2) * (grad_gamma ** 2)
|
|
65
|
+
layer["gamma"] -= self.learning_rate * (state["mg"] / (1 - b1 ** self.t)) / (np.sqrt(state["vg"] / (1 - b2 ** self.t)) + eps)
|
|
66
|
+
state["mb"] = b1 * state["mb"] + (1 - b1) * grad_beta
|
|
67
|
+
state["vb"] = b2 * state["vb"] + (1 - b2) * (grad_beta ** 2)
|
|
68
|
+
layer["beta"] -= self.learning_rate * (state["mb"] / (1 - b1 ** self.t)) / (np.sqrt(state["vb"] / (1 - b2 ** self.t)) + eps)
|
|
69
|
+
continue
|
|
70
|
+
else:
|
|
71
|
+
continue
|
|
72
|
+
|
|
73
|
+
grad_w = grad_w + self.l2_lambda * layer["weights"] * layer.get("mask", 1.0)
|
|
74
|
+
|
|
75
|
+
if self.optimizer_type == "sgd":
|
|
76
|
+
state["vgw"] = self.momentum * state["vgw"] - self.learning_rate * grad_w
|
|
77
|
+
state["vgb"] = self.momentum * state["vgb"] - self.learning_rate * grad_b
|
|
78
|
+
layer["weights"] += state["vgw"]
|
|
79
|
+
layer["bias"] += state["vgb"]
|
|
80
|
+
elif self.optimizer_type == "rmsprop":
|
|
81
|
+
state["vw"] = b2 * state["vw"] + (1 - b2) * (grad_w ** 2)
|
|
82
|
+
state["vb"] = b2 * state["vb"] + (1 - b2) * (grad_b ** 2)
|
|
83
|
+
layer["weights"] -= self.learning_rate * grad_w / (np.sqrt(state["vw"]) + eps)
|
|
84
|
+
layer["bias"] -= self.learning_rate * grad_b / (np.sqrt(state["vb"]) + eps)
|
|
85
|
+
elif self.optimizer_type == "adagrad":
|
|
86
|
+
state["vw"] += grad_w ** 2
|
|
87
|
+
state["vb"] += grad_b ** 2
|
|
88
|
+
layer["weights"] -= self.learning_rate * grad_w / (np.sqrt(state["vw"]) + eps)
|
|
89
|
+
layer["bias"] -= self.learning_rate * grad_b / (np.sqrt(state["vb"]) + eps)
|
|
90
|
+
else:
|
|
91
|
+
state["mw"] = b1 * state["mw"] + (1 - b1) * grad_w
|
|
92
|
+
state["vw"] = b2 * state["vw"] + (1 - b2) * (grad_w ** 2)
|
|
93
|
+
layer["weights"] -= self.learning_rate * (state["mw"] / (1 - b1 ** self.t)) / (np.sqrt(state["vw"] / (1 - b2 ** self.t)) + eps)
|
|
94
|
+
state["mb"] = b1 * state["mb"] + (1 - b1) * grad_b
|
|
95
|
+
state["vb"] = b2 * state["vb"] + (1 - b2) * (grad_b ** 2)
|
|
96
|
+
layer["bias"] -= self.learning_rate * (state["mb"] / (1 - b1 ** self.t)) / (np.sqrt(state["vb"] / (1 - b2 ** self.t)) + eps)
|
Enilnets/reinforce.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import numpy as np
|
|
3
|
+
|
|
4
|
+
def Reinforce(self, inputs, score_fn, noise=0.05, tries=10, sigma=1.0):
|
|
5
|
+
inputs = np.asarray(inputs, dtype=np.float64)
|
|
6
|
+
best_score = score_fn(self.Forward(inputs))
|
|
7
|
+
best_layers = copy.deepcopy(self.layers)
|
|
8
|
+
base_layers = copy.deepcopy(self.layers)
|
|
9
|
+
|
|
10
|
+
for _ in range(max(1, tries)):
|
|
11
|
+
candidate = copy.deepcopy(base_layers)
|
|
12
|
+
for layer in candidate:
|
|
13
|
+
if "weights" in layer:
|
|
14
|
+
layer["weights"] += np.random.normal(0, sigma * noise, layer["weights"].shape)
|
|
15
|
+
if layer["type"] == "sparse":
|
|
16
|
+
layer["weights"] *= layer["mask"]
|
|
17
|
+
layer["bias"] += np.random.normal(0, sigma * noise, layer["bias"].shape)
|
|
18
|
+
self.layers = candidate
|
|
19
|
+
score = score_fn(self.Forward(inputs))
|
|
20
|
+
if score > best_score:
|
|
21
|
+
best_score = score
|
|
22
|
+
best_layers = copy.deepcopy(candidate)
|
|
23
|
+
|
|
24
|
+
self.layers = best_layers
|
|
25
|
+
return best_score
|
Enilnets/train.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
def TrainBatch(self, xs, ys, loss_function=None, **loss_kwargs):
|
|
4
|
+
out = self.Forward(xs, training=True)
|
|
5
|
+
if loss_function is None:
|
|
6
|
+
loss_function = "cross_entropy" if self.layers[-1].get("activation") == "softmax" else "mse"
|
|
7
|
+
loss = self.ComputeLoss(out, ys, loss_function, **loss_kwargs)
|
|
8
|
+
self.Backward(ys)
|
|
9
|
+
self.update()
|
|
10
|
+
return loss, out
|
|
11
|
+
|
|
12
|
+
def compute_accuracy(self, predictions, targets):
|
|
13
|
+
if predictions.shape[-1] > 1: # Multi-class
|
|
14
|
+
pred_classes = np.argmax(predictions, axis=1)
|
|
15
|
+
true_classes = np.argmax(targets, axis=1)
|
|
16
|
+
else: # Binary
|
|
17
|
+
pred_classes = (predictions > 0.5).astype(int).flatten()
|
|
18
|
+
true_classes = targets.flatten()
|
|
19
|
+
return np.mean(pred_classes == true_classes)
|
|
20
|
+
|
|
21
|
+
def Train(self, X_train, Y_train, epochs=10, batch_size=32, X_val=None, Y_val=None, loss_function=None, verbose=True, **loss_kwargs):
|
|
22
|
+
history = {"loss": [], "val_loss": [], "accuracy": [], "val_accuracy": []}
|
|
23
|
+
n_samples = X_train.shape[0]
|
|
24
|
+
for epoch in range(epochs):
|
|
25
|
+
indices = np.random.permutation(n_samples)
|
|
26
|
+
X_shuffled = X_train[indices]
|
|
27
|
+
Y_shuffled = Y_train[indices]
|
|
28
|
+
epoch_loss = 0.0
|
|
29
|
+
epoch_acc = 0.0
|
|
30
|
+
total_samples = 0
|
|
31
|
+
for i in range(0, n_samples, batch_size):
|
|
32
|
+
X_batch = X_shuffled[i:i+batch_size]
|
|
33
|
+
Y_batch = Y_shuffled[i:i+batch_size]
|
|
34
|
+
loss, preds = self.TrainBatch(X_batch, Y_batch, loss_function=loss_function, **loss_kwargs)
|
|
35
|
+
batch_size_actual = X_batch.shape[0]
|
|
36
|
+
epoch_loss += loss * batch_size_actual
|
|
37
|
+
epoch_acc += self.compute_accuracy(preds, Y_batch) * batch_size_actual
|
|
38
|
+
total_samples += batch_size_actual
|
|
39
|
+
avg_loss = epoch_loss / total_samples
|
|
40
|
+
avg_acc = epoch_acc / total_samples
|
|
41
|
+
history["loss"].append(avg_loss)
|
|
42
|
+
history["accuracy"].append(avg_acc)
|
|
43
|
+
if X_val is not None and Y_val is not None:
|
|
44
|
+
val_pred = self.Forward(X_val)
|
|
45
|
+
val_loss = self.ComputeLoss(val_pred, Y_val, loss_function if loss_function is not None else ("cross_entropy" if self.layers[-1].get("activation") == "softmax" else "mse"), **loss_kwargs)
|
|
46
|
+
val_acc = self.compute_accuracy(val_pred, Y_val)
|
|
47
|
+
history["val_loss"].append(val_loss)
|
|
48
|
+
history["val_accuracy"].append(val_acc)
|
|
49
|
+
if verbose:
|
|
50
|
+
print(f"Epoch {epoch+1}/{epochs} - loss: {avg_loss:.4f} - acc: {avg_acc:.4f} - val_loss: {val_loss:.4f} - val_acc: {val_acc:.4f}")
|
|
51
|
+
else:
|
|
52
|
+
if verbose:
|
|
53
|
+
print(f"Epoch {epoch+1}/{epochs} - loss: {avg_loss:.4f} - acc: {avg_acc:.4f}")
|
|
54
|
+
return history
|
Enilnets/weight_init.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
def init_weights(n_in, n_out, method="xavier_uniform"):
|
|
4
|
+
if method == "xavier_uniform":
|
|
5
|
+
limit = np.sqrt(6 / (n_in + n_out))
|
|
6
|
+
w = np.random.uniform(-limit, limit, (n_out, n_in)).astype(np.float64)
|
|
7
|
+
elif method == "xavier_normal":
|
|
8
|
+
std = np.sqrt(2 / (n_in + n_out))
|
|
9
|
+
w = np.random.normal(0, std, (n_out, n_in)).astype(np.float64)
|
|
10
|
+
elif method == "he_uniform":
|
|
11
|
+
limit = np.sqrt(6 / n_in)
|
|
12
|
+
w = np.random.uniform(-limit, limit, (n_out, n_in)).astype(np.float64)
|
|
13
|
+
elif method == "he_normal":
|
|
14
|
+
std = np.sqrt(2 / n_in)
|
|
15
|
+
w = np.random.normal(0, std, (n_out, n_in)).astype(np.float64)
|
|
16
|
+
elif method == "normal":
|
|
17
|
+
w = np.random.normal(0, 0.1, (n_out, n_in)).astype(np.float64)
|
|
18
|
+
elif method == "orthogonal":
|
|
19
|
+
w = np.random.normal(0, 1, (n_out, n_in)).astype(np.float64)
|
|
20
|
+
u, _, vt = np.linalg.svd(w, full_matrices=False)
|
|
21
|
+
w = u @ vt
|
|
22
|
+
else:
|
|
23
|
+
raise ValueError(f"Unknown initialization method: {method}")
|
|
24
|
+
b = np.zeros(n_out, dtype=np.float64)
|
|
25
|
+
return w, b
|
|
26
|
+
|
|
27
|
+
def init_conv_weights(in_ch, out_ch, k, method="he_normal"):
|
|
28
|
+
if method == "xavier_uniform":
|
|
29
|
+
limit = np.sqrt(6 / (in_ch * k * k + out_ch))
|
|
30
|
+
w = np.random.uniform(-limit, limit, (out_ch, in_ch, k, k)).astype(np.float64)
|
|
31
|
+
elif method == "xavier_normal":
|
|
32
|
+
std = np.sqrt(2 / (in_ch * k * k + out_ch))
|
|
33
|
+
w = np.random.normal(0, std, (out_ch, in_ch, k, k)).astype(np.float64)
|
|
34
|
+
elif method == "he_uniform":
|
|
35
|
+
limit = np.sqrt(6 / (in_ch * k * k))
|
|
36
|
+
w = np.random.uniform(-limit, limit, (out_ch, in_ch, k, k)).astype(np.float64)
|
|
37
|
+
elif method == "he_normal":
|
|
38
|
+
std = np.sqrt(2 / (in_ch * k * k))
|
|
39
|
+
w = np.random.normal(0, std, (out_ch, in_ch, k, k)).astype(np.float64)
|
|
40
|
+
elif method == "normal":
|
|
41
|
+
w = np.random.normal(0, 0.1, (out_ch, in_ch, k, k)).astype(np.float64)
|
|
42
|
+
elif method == "orthogonal":
|
|
43
|
+
w = np.random.normal(0, 1, (out_ch, in_ch * k * k)).astype(np.float64)
|
|
44
|
+
u, _, vt = np.linalg.svd(w, full_matrices=False)
|
|
45
|
+
w = (u @ vt).reshape(out_ch, in_ch, k, k)
|
|
46
|
+
else:
|
|
47
|
+
raise ValueError(f"Unknown initialization method: {method}")
|
|
48
|
+
b = np.zeros(out_ch, dtype=np.float64)
|
|
49
|
+
return w, b
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
Enilnets/__init__.py,sha256=toKZal5qObYjtglQ8Phdn5sssZKxdMw66_8oJV4b2p8,1073
|
|
2
|
+
Enilnets/activations.py,sha256=PfpE-BdBKyjTFWKO9nzOvBHTkrusudnxRfoy8YLDZsM,1700
|
|
3
|
+
Enilnets/backward.py,sha256=bWvyXQrX1PhhVIbWynb0ru3fQsg1ZUwjmE5ogDXbgvQ,5158
|
|
4
|
+
Enilnets/base.py,sha256=39dPV_vaD0TP0wTqf6BzqggBrC8Ul8VwtCi6lrarGj0,1836
|
|
5
|
+
Enilnets/forward.py,sha256=rmWFDrTVayYhovZjbiaWFIzYrV6AgySk_vnirseoHkk,4443
|
|
6
|
+
Enilnets/io.py,sha256=OIaBSe6yUuZohxtBn5qLj1nV2FFhGDufw-0CVfLqo3c,1538
|
|
7
|
+
Enilnets/layers.py,sha256=4cQHzahtkMjWkeS_hQSl2mGIlJy8-Cpy24RSu5EQrzE,1806
|
|
8
|
+
Enilnets/loss.py,sha256=44dlcEwZPhwISqJTb-k8UK4k2oalhBv2rfrpDU5X-x0,1651
|
|
9
|
+
Enilnets/optimizer.py,sha256=YDFWXh19b1nVTLT-Ukl8tPe9yyWo1kHxIC99xBK_glc,5283
|
|
10
|
+
Enilnets/reinforce.py,sha256=u9OnVzfBKVOA2Cnyy4v_dFbQmECZRH5AAINtlNZqZpc,971
|
|
11
|
+
Enilnets/train.py,sha256=pS5-h35vRmrcyQVx_ei21uzXQjN8WD68iK-4RYnArfI,2683
|
|
12
|
+
Enilnets/weight_init.py,sha256=QucqaxYj1kB6ibe3xcFCxJIRHuW3oM_ciNkgzuI5ERI,2268
|
|
13
|
+
enilnets-1.0.0.dist-info/licenses/LICENCE,sha256=7ZZBXgnxbZqeBtsHi9mj5xlGBsseLliqp_5VHwD0Xq0,941
|
|
14
|
+
enilnets-1.0.0.dist-info/METADATA,sha256=ZDiJPbFaPWM2WIi-2rcfZUfPLejlF7MObC34LuLBuS8,147
|
|
15
|
+
enilnets-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
16
|
+
enilnets-1.0.0.dist-info/top_level.txt,sha256=1ABcCejrnRjpAeLRwS_hMad_TDgW3JmLece8UkuRsmg,9
|
|
17
|
+
enilnets-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
Copyright (c) 2023-2026 Marcel-Philipp Walter Stauch
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
4
|
+
of this software and associated documentation files (the "Software"), to use,
|
|
5
|
+
copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
|
6
|
+
the Software for any purpose.
|
|
7
|
+
|
|
8
|
+
The above copyright notice and the following attribution notice must be
|
|
9
|
+
included in all copies or substantial portions of the Software:
|
|
10
|
+
|
|
11
|
+
"Original work by Marcel-Philipp Walter Stauch"
|
|
12
|
+
|
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
16
|
+
AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
17
|
+
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
18
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
Enilnets
|