Enilnets 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- enilnets-1.0.0/Enilnets/__init__.py +33 -0
- enilnets-1.0.0/Enilnets/activations.py +39 -0
- enilnets-1.0.0/Enilnets/backward.py +121 -0
- enilnets-1.0.0/Enilnets/base.py +41 -0
- enilnets-1.0.0/Enilnets/forward.py +101 -0
- enilnets-1.0.0/Enilnets/io.py +48 -0
- enilnets-1.0.0/Enilnets/layers.py +34 -0
- enilnets-1.0.0/Enilnets/loss.py +43 -0
- enilnets-1.0.0/Enilnets/optimizer.py +96 -0
- enilnets-1.0.0/Enilnets/reinforce.py +25 -0
- enilnets-1.0.0/Enilnets/train.py +54 -0
- enilnets-1.0.0/Enilnets/weight_init.py +49 -0
- enilnets-1.0.0/Enilnets.egg-info/PKG-INFO +7 -0
- enilnets-1.0.0/Enilnets.egg-info/SOURCES.txt +20 -0
- enilnets-1.0.0/Enilnets.egg-info/dependency_links.txt +1 -0
- enilnets-1.0.0/Enilnets.egg-info/requires.txt +1 -0
- enilnets-1.0.0/Enilnets.egg-info/top_level.txt +1 -0
- enilnets-1.0.0/LICENCE +18 -0
- enilnets-1.0.0/PKG-INFO +7 -0
- enilnets-1.0.0/README.md +992 -0
- enilnets-1.0.0/setup.cfg +4 -0
- enilnets-1.0.0/setup.py +10 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from .base import NeuralNet
|
|
2
|
+
from .layers import add_dense, add_sparse, add_conv2d, add_flatten, add_maxpool2d, add_avgpool2d, add_batchnorm, add_dropout
|
|
3
|
+
from .forward import Forward
|
|
4
|
+
from .backward import Backward
|
|
5
|
+
from .optimizer import update
|
|
6
|
+
from .train import TrainBatch, Train, compute_accuracy
|
|
7
|
+
from .reinforce import Reinforce
|
|
8
|
+
from .loss import ComputeLoss
|
|
9
|
+
from .io import Save, Load
|
|
10
|
+
|
|
11
|
+
print("Loading Modules....")
|
|
12
|
+
|
|
13
|
+
NeuralNet.add_dense = add_dense
|
|
14
|
+
NeuralNet.add_sparse = add_sparse
|
|
15
|
+
NeuralNet.add_conv2d = add_conv2d
|
|
16
|
+
NeuralNet.add_flatten = add_flatten
|
|
17
|
+
NeuralNet.add_maxpool2d = add_maxpool2d
|
|
18
|
+
NeuralNet.add_avgpool2d = add_avgpool2d
|
|
19
|
+
NeuralNet.add_batchnorm = add_batchnorm
|
|
20
|
+
NeuralNet.add_dropout = add_dropout
|
|
21
|
+
NeuralNet.Forward = Forward
|
|
22
|
+
NeuralNet.predict = Forward
|
|
23
|
+
NeuralNet.Backward = Backward
|
|
24
|
+
NeuralNet.update = update
|
|
25
|
+
NeuralNet.TrainBatch = TrainBatch
|
|
26
|
+
NeuralNet.Train = Train
|
|
27
|
+
NeuralNet.compute_accuracy = compute_accuracy
|
|
28
|
+
NeuralNet.Reinforce = Reinforce
|
|
29
|
+
NeuralNet.ComputeLoss = ComputeLoss
|
|
30
|
+
NeuralNet.Save = Save
|
|
31
|
+
NeuralNet.Load = Load
|
|
32
|
+
|
|
33
|
+
print("Enilnets Library Ready!")
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
def activate(name, x):
|
|
4
|
+
if name == "relu": return np.maximum(0, x)
|
|
5
|
+
if name == "leakyrelu": return np.where(x > 0, x, 0.01 * x)
|
|
6
|
+
if name == "elu": return np.where(x > 0, x, np.exp(x) - 1)
|
|
7
|
+
if name == "selu":
|
|
8
|
+
alpha = 1.6732632423543772848170429916717
|
|
9
|
+
scale = 1.0507009873554804934193349852946
|
|
10
|
+
return scale * np.where(x > 0, x, alpha * (np.exp(x) - 1))
|
|
11
|
+
if name == "gelu": return 0.5 * x * (1 + np.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * x**3)))
|
|
12
|
+
if name == "swish": return x * 1.0 / (1.0 + np.exp(-x))
|
|
13
|
+
if name == "sigmoid": return 1.0 / (1.0 + np.exp(-np.clip(x, -500, 500)))
|
|
14
|
+
if name == "tanh": return np.tanh(x)
|
|
15
|
+
if name == "softmax":
|
|
16
|
+
e_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
|
|
17
|
+
return e_x / np.sum(e_x, axis=-1, keepdims=True)
|
|
18
|
+
return x
|
|
19
|
+
|
|
20
|
+
def derivative(name, x):
|
|
21
|
+
if name == "relu": return (x > 0).astype(np.float64)
|
|
22
|
+
if name == "leakyrelu": return np.where(x > 0, 1.0, 0.01)
|
|
23
|
+
if name == "elu": return np.where(x > 0, 1.0, np.exp(x))
|
|
24
|
+
if name == "selu":
|
|
25
|
+
alpha = 1.6732632423543772848170429916717
|
|
26
|
+
scale = 1.0507009873554804934193349852946
|
|
27
|
+
return scale * np.where(x > 0, 1.0, alpha * np.exp(x))
|
|
28
|
+
if name == "gelu":
|
|
29
|
+
cdf = 0.5 * (1 + np.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * x**3)))
|
|
30
|
+
pdf = np.exp(-0.5 * x**2) / np.sqrt(2 * np.pi)
|
|
31
|
+
return cdf + x * pdf
|
|
32
|
+
if name == "swish":
|
|
33
|
+
s = 1.0 / (1.0 + np.exp(-x))
|
|
34
|
+
return s + x * s * (1 - s)
|
|
35
|
+
if name == "sigmoid":
|
|
36
|
+
s = 1.0 / (1.0 + np.exp(-np.clip(x, -500, 500)))
|
|
37
|
+
return s * (1 - s)
|
|
38
|
+
if name == "tanh": return 1 - np.tanh(x) ** 2
|
|
39
|
+
return np.ones_like(x)
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from .activations import derivative
|
|
3
|
+
from .forward import im2col
|
|
4
|
+
|
|
5
|
+
def maxpool2d_backward(delta, x, p):
|
|
6
|
+
B, C, H, W = x.shape
|
|
7
|
+
H_trim = (H // p) * p
|
|
8
|
+
W_trim = (W // p) * p
|
|
9
|
+
x_trim = x[:, :, :H_trim, :W_trim]
|
|
10
|
+
|
|
11
|
+
H_b, W_b = H_trim // p, W_trim // p
|
|
12
|
+
strides = x_trim.strides
|
|
13
|
+
new_shape = (B, C, H_b, p, W_b, p)
|
|
14
|
+
new_strides = (strides[0], strides[1], strides[2]*p, strides[2], strides[3]*p, strides[3])
|
|
15
|
+
x_blocks = np.lib.stride_tricks.as_strided(x_trim, shape=new_shape, strides=new_strides)
|
|
16
|
+
|
|
17
|
+
x_max = x_blocks.max(axis=(3, 5), keepdims=True)
|
|
18
|
+
mask = (x_blocks == x_max).astype(np.float64)
|
|
19
|
+
mask_sum = mask.sum(axis=(3, 5), keepdims=True)
|
|
20
|
+
mask = mask / np.maximum(mask_sum, 1e-12)
|
|
21
|
+
|
|
22
|
+
delta_expanded = delta[:, :, :H_b, :W_b][:, :, :, None, :, None]
|
|
23
|
+
|
|
24
|
+
dx = np.zeros_like(x)
|
|
25
|
+
dx_view = np.lib.stride_tricks.as_strided(dx[:, :, :H_trim, :W_trim],
|
|
26
|
+
shape=new_shape, strides=new_strides)
|
|
27
|
+
dx_view[:] = mask * delta_expanded
|
|
28
|
+
return dx
|
|
29
|
+
|
|
30
|
+
def avgpool2d_backward(delta, x, p):
|
|
31
|
+
B, C, H, W = x.shape
|
|
32
|
+
H_trim = (H // p) * p
|
|
33
|
+
W_trim = (W // p) * p
|
|
34
|
+
H_b, W_b = H_trim // p, W_trim // p
|
|
35
|
+
|
|
36
|
+
dx = np.zeros_like(x)
|
|
37
|
+
strides = dx[:, :, :H_trim, :W_trim].strides
|
|
38
|
+
new_shape = (B, C, H_b, p, W_b, p)
|
|
39
|
+
new_strides = (strides[0], strides[1], strides[2]*p, strides[2], strides[3]*p, strides[3])
|
|
40
|
+
dx_view = np.lib.stride_tricks.as_strided(dx[:, :, :H_trim, :W_trim],
|
|
41
|
+
shape=new_shape, strides=new_strides)
|
|
42
|
+
dx_view[:] = delta[:, :, :H_b, :W_b][:, :, :, None, :, None] / (p * p)
|
|
43
|
+
return dx
|
|
44
|
+
|
|
45
|
+
def batchnorm_backward(dout, cache):
|
|
46
|
+
x, x_norm, mean, var, gamma, epsilon = cache
|
|
47
|
+
N = x.shape[0]
|
|
48
|
+
dbeta = np.sum(dout, axis=0)
|
|
49
|
+
dgamma = np.sum(dout * x_norm, axis=0)
|
|
50
|
+
dx_norm = dout * gamma
|
|
51
|
+
dvar = np.sum(dx_norm * (x - mean) * -0.5 * (var + epsilon) ** (-1.5), axis=0)
|
|
52
|
+
dmean = np.sum(dx_norm * -1.0 / np.sqrt(var + epsilon), axis=0)
|
|
53
|
+
dx = dx_norm / np.sqrt(var + epsilon) + dvar * 2.0 * (x - mean) / N + dmean / N
|
|
54
|
+
return dx, dgamma, dbeta
|
|
55
|
+
|
|
56
|
+
def conv2d_backward_input(delta, weights, input_shape):
|
|
57
|
+
B, F, out_h, out_w = delta.shape
|
|
58
|
+
F, C, K, _ = weights.shape
|
|
59
|
+
H, W = input_shape[2], input_shape[3]
|
|
60
|
+
|
|
61
|
+
padded_delta = np.pad(delta, [(0, 0), (0, 0), (K - 1, K - 1), (K - 1, K - 1)], mode="constant")
|
|
62
|
+
col = im2col(padded_delta, K, K)
|
|
63
|
+
weights_flat = weights[:, :, ::-1, ::-1].transpose(1, 0, 2, 3).reshape(C, -1)
|
|
64
|
+
grad = np.dot(col, weights_flat.T)
|
|
65
|
+
grad = grad.reshape(B, H, W, C).transpose(0, 3, 1, 2)
|
|
66
|
+
return grad
|
|
67
|
+
|
|
68
|
+
def Backward(self, targets):
|
|
69
|
+
targets = np.asarray(targets, dtype=np.float64)
|
|
70
|
+
if targets.ndim == 1:
|
|
71
|
+
targets = targets.reshape(1, -1)
|
|
72
|
+
batch_size = targets.shape[0]
|
|
73
|
+
self.deltas = [None] * len(self.layers)
|
|
74
|
+
out = self.outputs[-1]
|
|
75
|
+
last = self.layers[-1]
|
|
76
|
+
if last.get("activation") == "softmax":
|
|
77
|
+
delta = (out - targets) / batch_size
|
|
78
|
+
else:
|
|
79
|
+
activation_input = self.pre_activations[-1] if self.pre_activations[-1] is not None else out
|
|
80
|
+
delta = (out - targets) * derivative(last.get("activation", "linear"), activation_input) / batch_size
|
|
81
|
+
self.deltas[-1] = delta
|
|
82
|
+
|
|
83
|
+
for l in reversed(range(len(self.layers) - 1)):
|
|
84
|
+
curr = self.layers[l]
|
|
85
|
+
nxt = self.layers[l + 1]
|
|
86
|
+
next_delta = self.deltas[l + 1]
|
|
87
|
+
|
|
88
|
+
if nxt["type"] in ("dense", "sparse"):
|
|
89
|
+
err = np.dot(next_delta, nxt["weights"])
|
|
90
|
+
elif nxt["type"] == "flatten":
|
|
91
|
+
err = next_delta.reshape(self.outputs[l + 1].shape)
|
|
92
|
+
elif nxt["type"] == "conv2d":
|
|
93
|
+
err = conv2d_backward_input(next_delta, nxt["weights"], self.outputs[l + 1].shape)
|
|
94
|
+
elif nxt["type"] == "maxpool2d":
|
|
95
|
+
err = maxpool2d_backward(next_delta, self.outputs[l + 1], nxt["p"])
|
|
96
|
+
elif nxt["type"] == "avgpool2d":
|
|
97
|
+
err = avgpool2d_backward(next_delta, self.outputs[l + 1], nxt["p"])
|
|
98
|
+
elif nxt["type"] == "dropout":
|
|
99
|
+
mask = nxt.get("mask")
|
|
100
|
+
rate = nxt.get("rate", 0.0)
|
|
101
|
+
if mask is None or rate == 0.0:
|
|
102
|
+
err = next_delta
|
|
103
|
+
else:
|
|
104
|
+
err = next_delta * mask / (1.0 - rate)
|
|
105
|
+
elif nxt["type"] == "batchnorm":
|
|
106
|
+
flat = next_delta.reshape(self.outputs[l + 1].shape[0], -1)
|
|
107
|
+
cache = self.batchnorm_cache[l + 1]
|
|
108
|
+
if cache is None:
|
|
109
|
+
raise ValueError("BatchNorm cache is None. Ensure Forward(training=True) was called before Backward.")
|
|
110
|
+
err_flat, dgamma, dbeta = batchnorm_backward(flat, cache)
|
|
111
|
+
nxt["d_gamma"] = dgamma
|
|
112
|
+
nxt["d_beta"] = dbeta
|
|
113
|
+
err = err_flat.reshape(self.outputs[l + 1].shape)
|
|
114
|
+
else:
|
|
115
|
+
err = np.zeros_like(self.outputs[l + 1])
|
|
116
|
+
|
|
117
|
+
if curr["type"] in ("dense", "sparse", "conv2d"):
|
|
118
|
+
activation_input = self.pre_activations[l+1] if self.pre_activations[l+1] is not None else self.outputs[l + 1]
|
|
119
|
+
self.deltas[l] = err * derivative(curr.get("activation", "linear"), activation_input)
|
|
120
|
+
else:
|
|
121
|
+
self.deltas[l] = err
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
class NeuralNet:
|
|
4
|
+
def __init__(self, learning_rate=0.001, optimizer="adam", l2_lambda=0.01, momentum=0.9):
|
|
5
|
+
self.layers = []
|
|
6
|
+
self.learning_rate = learning_rate
|
|
7
|
+
self.optimizer_type = optimizer.lower()
|
|
8
|
+
self.l2_lambda = l2_lambda
|
|
9
|
+
self.momentum = momentum
|
|
10
|
+
|
|
11
|
+
self.outputs = []
|
|
12
|
+
self.pre_activations = []
|
|
13
|
+
self.batchnorm_cache = []
|
|
14
|
+
self.deltas = []
|
|
15
|
+
self.opt_state = []
|
|
16
|
+
self.t = 0
|
|
17
|
+
|
|
18
|
+
def summary(self):
|
|
19
|
+
print("Model Summary")
|
|
20
|
+
print("=" * 60)
|
|
21
|
+
print(f"Optimizer: {self.optimizer_type.upper()} | LR: {self.learning_rate} | L2: {self.l2_lambda}")
|
|
22
|
+
print("=" * 60)
|
|
23
|
+
total_params = 0
|
|
24
|
+
for i, layer in enumerate(self.layers):
|
|
25
|
+
layer_type = layer["type"]
|
|
26
|
+
if layer_type in ("dense", "sparse"):
|
|
27
|
+
params = layer["weights"].size + layer["bias"].size
|
|
28
|
+
total_params += params
|
|
29
|
+
print(f"Layer {i}: {layer_type.upper()} - Input: {layer['weights'].shape[1]}, Output: {layer['weights'].shape[0]}, Params: {params}")
|
|
30
|
+
elif layer_type == "conv2d":
|
|
31
|
+
params = layer["weights"].size + layer["bias"].size
|
|
32
|
+
total_params += params
|
|
33
|
+
print(f"Layer {i}: {layer_type.upper()} - In_ch: {layer['in_ch']}, Out_ch: {layer['out_ch']}, Kernel: {layer['k']}x{layer['k']}, Params: {params}")
|
|
34
|
+
elif layer_type == "batchnorm":
|
|
35
|
+
params = layer["gamma"].size + layer["beta"].size
|
|
36
|
+
total_params += params
|
|
37
|
+
print(f"Layer {i}: {layer_type.upper()} - Features: {layer['num_features']}, Params: {params}")
|
|
38
|
+
else:
|
|
39
|
+
print(f"Layer {i}: {layer_type.upper()}")
|
|
40
|
+
print(f"Total Parameters: {total_params}")
|
|
41
|
+
print("=" * 60)
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from .activations import activate
|
|
3
|
+
|
|
4
|
+
def im2col(input_data, filter_h, filter_w, stride=1, pad=0):
|
|
5
|
+
N, C, H, W = input_data.shape
|
|
6
|
+
out_h = (H + 2 * pad - filter_h) // stride + 1
|
|
7
|
+
out_w = (W + 2 * pad - filter_w) // stride + 1
|
|
8
|
+
img = np.pad(input_data, [(0, 0), (0, 0), (pad, pad), (pad, pad)], mode='constant')
|
|
9
|
+
|
|
10
|
+
N_stride, C_stride, H_stride, W_stride = img.strides
|
|
11
|
+
shape = (N, C, filter_h, filter_w, out_h, out_w)
|
|
12
|
+
strides = (N_stride, C_stride, H_stride, W_stride, H_stride * stride, W_stride * stride)
|
|
13
|
+
|
|
14
|
+
col = np.lib.stride_tricks.as_strided(img, shape=shape, strides=strides)
|
|
15
|
+
return col.transpose(0, 4, 5, 1, 2, 3).reshape(N * out_h * out_w, -1)
|
|
16
|
+
|
|
17
|
+
def batchnorm_forward(x, layer, training):
|
|
18
|
+
epsilon = layer.get("epsilon", 1e-5)
|
|
19
|
+
momentum = layer.get("momentum", 0.1)
|
|
20
|
+
if training:
|
|
21
|
+
mean = np.mean(x, axis=0)
|
|
22
|
+
variance = np.var(x, axis=0)
|
|
23
|
+
x_norm = (x - mean) / np.sqrt(variance + epsilon)
|
|
24
|
+
out = layer["gamma"] * x_norm + layer["beta"]
|
|
25
|
+
layer["running_mean"] = (1 - momentum) * layer["running_mean"] + momentum * mean
|
|
26
|
+
layer["running_var"] = (1 - momentum) * layer["running_var"] + momentum * variance
|
|
27
|
+
cache = (x, x_norm, mean, variance, layer["gamma"], epsilon)
|
|
28
|
+
else:
|
|
29
|
+
x_norm = (x - layer["running_mean"]) / np.sqrt(layer["running_var"] + epsilon)
|
|
30
|
+
out = layer["gamma"] * x_norm + layer["beta"]
|
|
31
|
+
cache = None
|
|
32
|
+
return out, cache
|
|
33
|
+
|
|
34
|
+
def Forward(self, inputs, training=False, dropout_rate=0.0):
|
|
35
|
+
x = np.asarray(inputs, dtype=np.float64)
|
|
36
|
+
if x.ndim == 1:
|
|
37
|
+
x = x.reshape(1, -1)
|
|
38
|
+
elif x.ndim == 3:
|
|
39
|
+
x = x.reshape(1, *x.shape)
|
|
40
|
+
|
|
41
|
+
self.outputs = [x]
|
|
42
|
+
self.pre_activations = [None]
|
|
43
|
+
self.batchnorm_cache = []
|
|
44
|
+
|
|
45
|
+
for layer in self.layers:
|
|
46
|
+
x = self.outputs[-1]
|
|
47
|
+
if layer["type"] in ("dense", "sparse"):
|
|
48
|
+
z = np.dot(x, layer["weights"].T) + layer["bias"]
|
|
49
|
+
x = activate(layer["activation"], z)
|
|
50
|
+
self.pre_activations.append(z)
|
|
51
|
+
self.batchnorm_cache.append(None)
|
|
52
|
+
elif layer["type"] == "conv2d":
|
|
53
|
+
B, C, H, W = x.shape
|
|
54
|
+
F, _, K, _ = layer["weights"].shape
|
|
55
|
+
out_h, out_w = H - K + 1, W - K + 1
|
|
56
|
+
col = im2col(x, K, K)
|
|
57
|
+
weights_flat = layer["weights"].reshape(F, -1)
|
|
58
|
+
out = np.dot(col, weights_flat.T).reshape(B, out_h, out_w, F).transpose(0, 3, 1, 2)
|
|
59
|
+
z = out + layer["bias"][None, :, None, None]
|
|
60
|
+
x = activate(layer["activation"], z)
|
|
61
|
+
self.pre_activations.append(z)
|
|
62
|
+
self.batchnorm_cache.append(None)
|
|
63
|
+
elif layer["type"] == "flatten":
|
|
64
|
+
x = x.reshape(x.shape[0], -1)
|
|
65
|
+
self.pre_activations.append(None)
|
|
66
|
+
self.batchnorm_cache.append(None)
|
|
67
|
+
elif layer["type"] == "maxpool2d":
|
|
68
|
+
B, C, H, W, p = *x.shape, layer["p"]
|
|
69
|
+
x = x[:, :, : H // p * p, : W // p * p].reshape(B, C, H // p, p, W // p, p).max(axis=(3, 5))
|
|
70
|
+
self.pre_activations.append(None)
|
|
71
|
+
self.batchnorm_cache.append(None)
|
|
72
|
+
elif layer["type"] == "avgpool2d":
|
|
73
|
+
B, C, H, W, p = *x.shape, layer["p"]
|
|
74
|
+
x = x[:, :, : H // p * p, : W // p * p].reshape(B, C, H // p, p, W // p, p).mean(axis=(3, 5))
|
|
75
|
+
self.pre_activations.append(None)
|
|
76
|
+
self.batchnorm_cache.append(None)
|
|
77
|
+
elif layer["type"] == "batchnorm":
|
|
78
|
+
flat = x.reshape(x.shape[0], -1)
|
|
79
|
+
normalized, cache = batchnorm_forward(flat, layer, training)
|
|
80
|
+
x = normalized.reshape(x.shape)
|
|
81
|
+
self.pre_activations.append(None)
|
|
82
|
+
self.batchnorm_cache.append(cache)
|
|
83
|
+
elif layer["type"] == "dropout":
|
|
84
|
+
rate = layer.get("rate", dropout_rate)
|
|
85
|
+
if training and rate > 0:
|
|
86
|
+
if rate >= 1.0:
|
|
87
|
+
mask = np.zeros_like(x, dtype=np.float64)
|
|
88
|
+
x = np.zeros_like(x)
|
|
89
|
+
else:
|
|
90
|
+
mask = (np.random.rand(*x.shape) > rate).astype(np.float64)
|
|
91
|
+
x = x * mask / (1.0 - rate)
|
|
92
|
+
layer["mask"] = mask
|
|
93
|
+
else:
|
|
94
|
+
layer["mask"] = None
|
|
95
|
+
x = x
|
|
96
|
+
self.pre_activations.append(None)
|
|
97
|
+
self.batchnorm_cache.append(None)
|
|
98
|
+
else:
|
|
99
|
+
raise ValueError(f"Unknown layer type: {layer['type']}")
|
|
100
|
+
self.outputs.append(x)
|
|
101
|
+
return self.outputs[-1]
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import pickle
|
|
3
|
+
import os
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
def _numpy_encoder(obj):
|
|
7
|
+
if isinstance(obj, np.ndarray):
|
|
8
|
+
return obj.tolist()
|
|
9
|
+
raise TypeError(f"Object of type {type(obj)} is not JSON serializable")
|
|
10
|
+
|
|
11
|
+
def Save(self, file):
|
|
12
|
+
payload = {
|
|
13
|
+
"version": 2,
|
|
14
|
+
"layers": self.layers,
|
|
15
|
+
"optimizer": self.optimizer_type,
|
|
16
|
+
"learning_rate": self.learning_rate,
|
|
17
|
+
"l2_lambda": self.l2_lambda,
|
|
18
|
+
"momentum": self.momentum,
|
|
19
|
+
"t": self.t,
|
|
20
|
+
}
|
|
21
|
+
ext = os.path.splitext(file)[1].lower()
|
|
22
|
+
if ext == ".pkl":
|
|
23
|
+
with open(file, "wb") as f:
|
|
24
|
+
pickle.dump(payload, f)
|
|
25
|
+
else:
|
|
26
|
+
with open(file, "w") as f:
|
|
27
|
+
json.dump(payload, f, default=_numpy_encoder)
|
|
28
|
+
|
|
29
|
+
def Load(self, file):
|
|
30
|
+
ext = os.path.splitext(file)[1].lower()
|
|
31
|
+
if ext == ".pkl":
|
|
32
|
+
with open(file, "rb") as f:
|
|
33
|
+
raw = pickle.load(f)
|
|
34
|
+
else:
|
|
35
|
+
with open(file, "r") as f:
|
|
36
|
+
raw = json.load(f)
|
|
37
|
+
self.layers = []
|
|
38
|
+
for l in raw.get("layers", []):
|
|
39
|
+
for k in ["weights", "bias", "mask", "gamma", "beta", "running_mean", "running_var"]:
|
|
40
|
+
if k in l:
|
|
41
|
+
l[k] = np.array(l[k], dtype=np.float64)
|
|
42
|
+
self.layers.append(l)
|
|
43
|
+
self.opt_state = []
|
|
44
|
+
self.t = raw.get("t", 0)
|
|
45
|
+
self.learning_rate = raw.get("learning_rate", self.learning_rate)
|
|
46
|
+
self.optimizer_type = raw.get("optimizer", self.optimizer_type)
|
|
47
|
+
self.l2_lambda = raw.get("l2_lambda", self.l2_lambda)
|
|
48
|
+
self.momentum = raw.get("momentum", self.momentum)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from .weight_init import init_weights, init_conv_weights
|
|
3
|
+
|
|
4
|
+
def add_dense(self, n_in, n_out, activation="relu", init_method="xavier_uniform"):
|
|
5
|
+
w, b = init_weights(n_in, n_out, method=init_method)
|
|
6
|
+
self.layers.append({"type": "dense", "weights": w, "bias": b, "activation": activation})
|
|
7
|
+
|
|
8
|
+
def add_sparse(self, n_in, n_out, connectivity=0.5, activation="relu", init_method="xavier_uniform"):
|
|
9
|
+
w, b = init_weights(n_in, n_out, method=init_method)
|
|
10
|
+
mask = (np.random.rand(n_out, n_in) < connectivity).astype(np.float64)
|
|
11
|
+
self.layers.append({"type": "sparse", "weights": w * mask, "bias": b, "mask": mask, "activation": activation})
|
|
12
|
+
|
|
13
|
+
def add_conv2d(self, in_ch, out_ch, k, activation="relu", init_method="he_normal"):
|
|
14
|
+
w, b = init_conv_weights(in_ch, out_ch, k, method=init_method)
|
|
15
|
+
self.layers.append({"type": "conv2d", "weights": w, "bias": b, "in_ch": in_ch, "out_ch": out_ch, "k": k, "activation": activation})
|
|
16
|
+
|
|
17
|
+
def add_flatten(self):
|
|
18
|
+
self.layers.append({"type": "flatten"})
|
|
19
|
+
|
|
20
|
+
def add_maxpool2d(self, pool_size=2):
|
|
21
|
+
self.layers.append({"type": "maxpool2d", "p": pool_size})
|
|
22
|
+
|
|
23
|
+
def add_avgpool2d(self, pool_size=2):
|
|
24
|
+
self.layers.append({"type": "avgpool2d", "p": pool_size})
|
|
25
|
+
|
|
26
|
+
def add_batchnorm(self, num_features, epsilon=1e-5, momentum=0.1):
|
|
27
|
+
self.layers.append({"type": "batchnorm", "num_features": num_features, "epsilon": epsilon, "momentum": momentum,
|
|
28
|
+
"running_mean": np.zeros(num_features, dtype=np.float64),
|
|
29
|
+
"running_var": np.ones(num_features, dtype=np.float64),
|
|
30
|
+
"gamma": np.ones(num_features, dtype=np.float64),
|
|
31
|
+
"beta": np.zeros(num_features, dtype=np.float64)})
|
|
32
|
+
|
|
33
|
+
def add_dropout(self, rate=0.5):
|
|
34
|
+
self.layers.append({"type": "dropout", "rate": rate})
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
def ComputeLoss(self, output, target, function="mse", reduction="mean", **kwargs):
|
|
4
|
+
o = np.asarray(output, dtype=np.float64)
|
|
5
|
+
t = np.asarray(target, dtype=np.float64)
|
|
6
|
+
if function == "mse":
|
|
7
|
+
loss = (o - t) ** 2
|
|
8
|
+
elif function == "mae":
|
|
9
|
+
loss = np.abs(o - t)
|
|
10
|
+
elif function == "huber":
|
|
11
|
+
delta = kwargs.get("delta", 1.0)
|
|
12
|
+
diff = np.abs(o - t)
|
|
13
|
+
loss = np.where(diff < delta, 0.5 * diff**2, delta * (diff - 0.5 * delta))
|
|
14
|
+
elif function == "smooth_l1":
|
|
15
|
+
diff = np.abs(o - t)
|
|
16
|
+
loss = np.where(diff < 1, 0.5 * diff**2, diff - 0.5)
|
|
17
|
+
elif function == "binary_cross_entropy":
|
|
18
|
+
o = np.clip(o, 1e-12, 1 - 1e-12)
|
|
19
|
+
loss = -(t * np.log(o) + (1 - t) * np.log(1 - o))
|
|
20
|
+
elif function in ("cross_entropy", "categorical_cross_entropy"):
|
|
21
|
+
o = np.clip(o, 1e-12, 1.0)
|
|
22
|
+
loss = -t * np.log(o)
|
|
23
|
+
if reduction == "mean":
|
|
24
|
+
return float(np.sum(loss) / o.shape[0])
|
|
25
|
+
if reduction == "sum":
|
|
26
|
+
return float(np.sum(loss))
|
|
27
|
+
return loss
|
|
28
|
+
elif function == "focal":
|
|
29
|
+
alpha = kwargs.get("alpha", 0.25)
|
|
30
|
+
gamma = kwargs.get("gamma", 2.0)
|
|
31
|
+
o = np.clip(o, 1e-12, 1.0)
|
|
32
|
+
pt = o * t + (1 - o) * (1 - t)
|
|
33
|
+
loss = - (alpha * t * (1 - pt) ** gamma * np.log(o) + (1 - alpha) * (1 - t) * pt ** gamma * np.log(1 - o))
|
|
34
|
+
elif function == "hinge":
|
|
35
|
+
loss = np.maximum(0, 1 - t * o)
|
|
36
|
+
else:
|
|
37
|
+
raise ValueError(f"Unknown loss function: {function}")
|
|
38
|
+
|
|
39
|
+
if reduction == "mean":
|
|
40
|
+
return float(np.mean(loss))
|
|
41
|
+
if reduction == "sum":
|
|
42
|
+
return float(np.sum(loss))
|
|
43
|
+
return loss
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from .forward import im2col
|
|
3
|
+
|
|
4
|
+
def update(self):
|
|
5
|
+
self.t += 1
|
|
6
|
+
b1, b2, eps = 0.9, 0.999, 1e-8
|
|
7
|
+
|
|
8
|
+
if not self.opt_state:
|
|
9
|
+
for layer in self.layers:
|
|
10
|
+
if layer["type"] in ("dense", "sparse", "conv2d"):
|
|
11
|
+
self.opt_state.append({
|
|
12
|
+
"mw": np.zeros_like(layer["weights"]),
|
|
13
|
+
"vw": np.zeros_like(layer["weights"]),
|
|
14
|
+
"mb": np.zeros_like(layer["bias"]),
|
|
15
|
+
"vb": np.zeros_like(layer["bias"]),
|
|
16
|
+
"vgw": np.zeros_like(layer["weights"]),
|
|
17
|
+
"vgb": np.zeros_like(layer["bias"]),
|
|
18
|
+
})
|
|
19
|
+
elif layer["type"] == "batchnorm":
|
|
20
|
+
self.opt_state.append({
|
|
21
|
+
"mg": np.zeros_like(layer["gamma"]),
|
|
22
|
+
"vg": np.zeros_like(layer["gamma"]),
|
|
23
|
+
"mb": np.zeros_like(layer["beta"]),
|
|
24
|
+
"vb": np.zeros_like(layer["beta"]),
|
|
25
|
+
})
|
|
26
|
+
else:
|
|
27
|
+
self.opt_state.append(None)
|
|
28
|
+
|
|
29
|
+
for l, layer in enumerate(self.layers):
|
|
30
|
+
state = self.opt_state[l]
|
|
31
|
+
if layer["type"] in ("dense", "sparse"):
|
|
32
|
+
grad_w = np.dot(self.deltas[l].T, self.outputs[l])
|
|
33
|
+
grad_b = np.sum(self.deltas[l], axis=0)
|
|
34
|
+
if layer["type"] == "sparse":
|
|
35
|
+
grad_w *= layer["mask"]
|
|
36
|
+
elif layer["type"] == "conv2d":
|
|
37
|
+
K = layer["k"]
|
|
38
|
+
col = im2col(self.outputs[l], K, K)
|
|
39
|
+
delta_flat = self.deltas[l].transpose(0, 2, 3, 1).reshape(-1, layer["weights"].shape[0])
|
|
40
|
+
grad_w_flat = np.dot(delta_flat.T, col)
|
|
41
|
+
grad_w = grad_w_flat.reshape(layer["weights"].shape)
|
|
42
|
+
grad_b = np.sum(self.deltas[l], axis=(0, 2, 3))
|
|
43
|
+
elif layer["type"] == "batchnorm":
|
|
44
|
+
grad_gamma = layer.get("d_gamma", np.zeros_like(layer["gamma"]))
|
|
45
|
+
grad_beta = layer.get("d_beta", np.zeros_like(layer["beta"]))
|
|
46
|
+
|
|
47
|
+
if self.optimizer_type == "sgd":
|
|
48
|
+
state["mg"] = self.momentum * state["mg"] - self.learning_rate * grad_gamma
|
|
49
|
+
state["mb"] = self.momentum * state["mb"] - self.learning_rate * grad_beta
|
|
50
|
+
layer["gamma"] += state["mg"]
|
|
51
|
+
layer["beta"] += state["mb"]
|
|
52
|
+
elif self.optimizer_type == "rmsprop":
|
|
53
|
+
state["vg"] = b2 * state["vg"] + (1 - b2) * (grad_gamma ** 2)
|
|
54
|
+
state["vb"] = b2 * state["vb"] + (1 - b2) * (grad_beta ** 2)
|
|
55
|
+
layer["gamma"] -= self.learning_rate * grad_gamma / (np.sqrt(state["vg"]) + eps)
|
|
56
|
+
layer["beta"] -= self.learning_rate * grad_beta / (np.sqrt(state["vb"]) + eps)
|
|
57
|
+
elif self.optimizer_type == "adagrad":
|
|
58
|
+
state["vg"] += grad_gamma ** 2
|
|
59
|
+
state["vb"] += grad_beta ** 2
|
|
60
|
+
layer["gamma"] -= self.learning_rate * grad_gamma / (np.sqrt(state["vg"]) + eps)
|
|
61
|
+
layer["beta"] -= self.learning_rate * grad_beta / (np.sqrt(state["vb"]) + eps)
|
|
62
|
+
else: # adam
|
|
63
|
+
state["mg"] = b1 * state["mg"] + (1 - b1) * grad_gamma
|
|
64
|
+
state["vg"] = b2 * state["vg"] + (1 - b2) * (grad_gamma ** 2)
|
|
65
|
+
layer["gamma"] -= self.learning_rate * (state["mg"] / (1 - b1 ** self.t)) / (np.sqrt(state["vg"] / (1 - b2 ** self.t)) + eps)
|
|
66
|
+
state["mb"] = b1 * state["mb"] + (1 - b1) * grad_beta
|
|
67
|
+
state["vb"] = b2 * state["vb"] + (1 - b2) * (grad_beta ** 2)
|
|
68
|
+
layer["beta"] -= self.learning_rate * (state["mb"] / (1 - b1 ** self.t)) / (np.sqrt(state["vb"] / (1 - b2 ** self.t)) + eps)
|
|
69
|
+
continue
|
|
70
|
+
else:
|
|
71
|
+
continue
|
|
72
|
+
|
|
73
|
+
grad_w = grad_w + self.l2_lambda * layer["weights"] * layer.get("mask", 1.0)
|
|
74
|
+
|
|
75
|
+
if self.optimizer_type == "sgd":
|
|
76
|
+
state["vgw"] = self.momentum * state["vgw"] - self.learning_rate * grad_w
|
|
77
|
+
state["vgb"] = self.momentum * state["vgb"] - self.learning_rate * grad_b
|
|
78
|
+
layer["weights"] += state["vgw"]
|
|
79
|
+
layer["bias"] += state["vgb"]
|
|
80
|
+
elif self.optimizer_type == "rmsprop":
|
|
81
|
+
state["vw"] = b2 * state["vw"] + (1 - b2) * (grad_w ** 2)
|
|
82
|
+
state["vb"] = b2 * state["vb"] + (1 - b2) * (grad_b ** 2)
|
|
83
|
+
layer["weights"] -= self.learning_rate * grad_w / (np.sqrt(state["vw"]) + eps)
|
|
84
|
+
layer["bias"] -= self.learning_rate * grad_b / (np.sqrt(state["vb"]) + eps)
|
|
85
|
+
elif self.optimizer_type == "adagrad":
|
|
86
|
+
state["vw"] += grad_w ** 2
|
|
87
|
+
state["vb"] += grad_b ** 2
|
|
88
|
+
layer["weights"] -= self.learning_rate * grad_w / (np.sqrt(state["vw"]) + eps)
|
|
89
|
+
layer["bias"] -= self.learning_rate * grad_b / (np.sqrt(state["vb"]) + eps)
|
|
90
|
+
else:
|
|
91
|
+
state["mw"] = b1 * state["mw"] + (1 - b1) * grad_w
|
|
92
|
+
state["vw"] = b2 * state["vw"] + (1 - b2) * (grad_w ** 2)
|
|
93
|
+
layer["weights"] -= self.learning_rate * (state["mw"] / (1 - b1 ** self.t)) / (np.sqrt(state["vw"] / (1 - b2 ** self.t)) + eps)
|
|
94
|
+
state["mb"] = b1 * state["mb"] + (1 - b1) * grad_b
|
|
95
|
+
state["vb"] = b2 * state["vb"] + (1 - b2) * (grad_b ** 2)
|
|
96
|
+
layer["bias"] -= self.learning_rate * (state["mb"] / (1 - b1 ** self.t)) / (np.sqrt(state["vb"] / (1 - b2 ** self.t)) + eps)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import numpy as np
|
|
3
|
+
|
|
4
|
+
def Reinforce(self, inputs, score_fn, noise=0.05, tries=10, sigma=1.0):
|
|
5
|
+
inputs = np.asarray(inputs, dtype=np.float64)
|
|
6
|
+
best_score = score_fn(self.Forward(inputs))
|
|
7
|
+
best_layers = copy.deepcopy(self.layers)
|
|
8
|
+
base_layers = copy.deepcopy(self.layers)
|
|
9
|
+
|
|
10
|
+
for _ in range(max(1, tries)):
|
|
11
|
+
candidate = copy.deepcopy(base_layers)
|
|
12
|
+
for layer in candidate:
|
|
13
|
+
if "weights" in layer:
|
|
14
|
+
layer["weights"] += np.random.normal(0, sigma * noise, layer["weights"].shape)
|
|
15
|
+
if layer["type"] == "sparse":
|
|
16
|
+
layer["weights"] *= layer["mask"]
|
|
17
|
+
layer["bias"] += np.random.normal(0, sigma * noise, layer["bias"].shape)
|
|
18
|
+
self.layers = candidate
|
|
19
|
+
score = score_fn(self.Forward(inputs))
|
|
20
|
+
if score > best_score:
|
|
21
|
+
best_score = score
|
|
22
|
+
best_layers = copy.deepcopy(candidate)
|
|
23
|
+
|
|
24
|
+
self.layers = best_layers
|
|
25
|
+
return best_score
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
def TrainBatch(self, xs, ys, loss_function=None, **loss_kwargs):
|
|
4
|
+
out = self.Forward(xs, training=True)
|
|
5
|
+
if loss_function is None:
|
|
6
|
+
loss_function = "cross_entropy" if self.layers[-1].get("activation") == "softmax" else "mse"
|
|
7
|
+
loss = self.ComputeLoss(out, ys, loss_function, **loss_kwargs)
|
|
8
|
+
self.Backward(ys)
|
|
9
|
+
self.update()
|
|
10
|
+
return loss, out
|
|
11
|
+
|
|
12
|
+
def compute_accuracy(self, predictions, targets):
|
|
13
|
+
if predictions.shape[-1] > 1: # Multi-class
|
|
14
|
+
pred_classes = np.argmax(predictions, axis=1)
|
|
15
|
+
true_classes = np.argmax(targets, axis=1)
|
|
16
|
+
else: # Binary
|
|
17
|
+
pred_classes = (predictions > 0.5).astype(int).flatten()
|
|
18
|
+
true_classes = targets.flatten()
|
|
19
|
+
return np.mean(pred_classes == true_classes)
|
|
20
|
+
|
|
21
|
+
def Train(self, X_train, Y_train, epochs=10, batch_size=32, X_val=None, Y_val=None, loss_function=None, verbose=True, **loss_kwargs):
|
|
22
|
+
history = {"loss": [], "val_loss": [], "accuracy": [], "val_accuracy": []}
|
|
23
|
+
n_samples = X_train.shape[0]
|
|
24
|
+
for epoch in range(epochs):
|
|
25
|
+
indices = np.random.permutation(n_samples)
|
|
26
|
+
X_shuffled = X_train[indices]
|
|
27
|
+
Y_shuffled = Y_train[indices]
|
|
28
|
+
epoch_loss = 0.0
|
|
29
|
+
epoch_acc = 0.0
|
|
30
|
+
total_samples = 0
|
|
31
|
+
for i in range(0, n_samples, batch_size):
|
|
32
|
+
X_batch = X_shuffled[i:i+batch_size]
|
|
33
|
+
Y_batch = Y_shuffled[i:i+batch_size]
|
|
34
|
+
loss, preds = self.TrainBatch(X_batch, Y_batch, loss_function=loss_function, **loss_kwargs)
|
|
35
|
+
batch_size_actual = X_batch.shape[0]
|
|
36
|
+
epoch_loss += loss * batch_size_actual
|
|
37
|
+
epoch_acc += self.compute_accuracy(preds, Y_batch) * batch_size_actual
|
|
38
|
+
total_samples += batch_size_actual
|
|
39
|
+
avg_loss = epoch_loss / total_samples
|
|
40
|
+
avg_acc = epoch_acc / total_samples
|
|
41
|
+
history["loss"].append(avg_loss)
|
|
42
|
+
history["accuracy"].append(avg_acc)
|
|
43
|
+
if X_val is not None and Y_val is not None:
|
|
44
|
+
val_pred = self.Forward(X_val)
|
|
45
|
+
val_loss = self.ComputeLoss(val_pred, Y_val, loss_function if loss_function is not None else ("cross_entropy" if self.layers[-1].get("activation") == "softmax" else "mse"), **loss_kwargs)
|
|
46
|
+
val_acc = self.compute_accuracy(val_pred, Y_val)
|
|
47
|
+
history["val_loss"].append(val_loss)
|
|
48
|
+
history["val_accuracy"].append(val_acc)
|
|
49
|
+
if verbose:
|
|
50
|
+
print(f"Epoch {epoch+1}/{epochs} - loss: {avg_loss:.4f} - acc: {avg_acc:.4f} - val_loss: {val_loss:.4f} - val_acc: {val_acc:.4f}")
|
|
51
|
+
else:
|
|
52
|
+
if verbose:
|
|
53
|
+
print(f"Epoch {epoch+1}/{epochs} - loss: {avg_loss:.4f} - acc: {avg_acc:.4f}")
|
|
54
|
+
return history
|