Enilnets 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,33 @@
1
+ from .base import NeuralNet
2
+ from .layers import add_dense, add_sparse, add_conv2d, add_flatten, add_maxpool2d, add_avgpool2d, add_batchnorm, add_dropout
3
+ from .forward import Forward
4
+ from .backward import Backward
5
+ from .optimizer import update
6
+ from .train import TrainBatch, Train, compute_accuracy
7
+ from .reinforce import Reinforce
8
+ from .loss import ComputeLoss
9
+ from .io import Save, Load
10
+
11
+ print("Loading Modules....")
12
+
13
+ NeuralNet.add_dense = add_dense
14
+ NeuralNet.add_sparse = add_sparse
15
+ NeuralNet.add_conv2d = add_conv2d
16
+ NeuralNet.add_flatten = add_flatten
17
+ NeuralNet.add_maxpool2d = add_maxpool2d
18
+ NeuralNet.add_avgpool2d = add_avgpool2d
19
+ NeuralNet.add_batchnorm = add_batchnorm
20
+ NeuralNet.add_dropout = add_dropout
21
+ NeuralNet.Forward = Forward
22
+ NeuralNet.predict = Forward
23
+ NeuralNet.Backward = Backward
24
+ NeuralNet.update = update
25
+ NeuralNet.TrainBatch = TrainBatch
26
+ NeuralNet.Train = Train
27
+ NeuralNet.compute_accuracy = compute_accuracy
28
+ NeuralNet.Reinforce = Reinforce
29
+ NeuralNet.ComputeLoss = ComputeLoss
30
+ NeuralNet.Save = Save
31
+ NeuralNet.Load = Load
32
+
33
+ print("Enilnets Library Ready!")
@@ -0,0 +1,39 @@
1
+ import numpy as np
2
+
3
+ def activate(name, x):
4
+ if name == "relu": return np.maximum(0, x)
5
+ if name == "leakyrelu": return np.where(x > 0, x, 0.01 * x)
6
+ if name == "elu": return np.where(x > 0, x, np.exp(x) - 1)
7
+ if name == "selu":
8
+ alpha = 1.6732632423543772848170429916717
9
+ scale = 1.0507009873554804934193349852946
10
+ return scale * np.where(x > 0, x, alpha * (np.exp(x) - 1))
11
+ if name == "gelu": return 0.5 * x * (1 + np.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * x**3)))
12
+ if name == "swish": return x * 1.0 / (1.0 + np.exp(-x))
13
+ if name == "sigmoid": return 1.0 / (1.0 + np.exp(-np.clip(x, -500, 500)))
14
+ if name == "tanh": return np.tanh(x)
15
+ if name == "softmax":
16
+ e_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
17
+ return e_x / np.sum(e_x, axis=-1, keepdims=True)
18
+ return x
19
+
20
+ def derivative(name, x):
21
+ if name == "relu": return (x > 0).astype(np.float64)
22
+ if name == "leakyrelu": return np.where(x > 0, 1.0, 0.01)
23
+ if name == "elu": return np.where(x > 0, 1.0, np.exp(x))
24
+ if name == "selu":
25
+ alpha = 1.6732632423543772848170429916717
26
+ scale = 1.0507009873554804934193349852946
27
+ return scale * np.where(x > 0, 1.0, alpha * np.exp(x))
28
+ if name == "gelu":
29
+ cdf = 0.5 * (1 + np.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * x**3)))
30
+ pdf = np.exp(-0.5 * x**2) / np.sqrt(2 * np.pi)
31
+ return cdf + x * pdf
32
+ if name == "swish":
33
+ s = 1.0 / (1.0 + np.exp(-x))
34
+ return s + x * s * (1 - s)
35
+ if name == "sigmoid":
36
+ s = 1.0 / (1.0 + np.exp(-np.clip(x, -500, 500)))
37
+ return s * (1 - s)
38
+ if name == "tanh": return 1 - np.tanh(x) ** 2
39
+ return np.ones_like(x)
@@ -0,0 +1,121 @@
1
+ import numpy as np
2
+ from .activations import derivative
3
+ from .forward import im2col
4
+
5
+ def maxpool2d_backward(delta, x, p):
6
+ B, C, H, W = x.shape
7
+ H_trim = (H // p) * p
8
+ W_trim = (W // p) * p
9
+ x_trim = x[:, :, :H_trim, :W_trim]
10
+
11
+ H_b, W_b = H_trim // p, W_trim // p
12
+ strides = x_trim.strides
13
+ new_shape = (B, C, H_b, p, W_b, p)
14
+ new_strides = (strides[0], strides[1], strides[2]*p, strides[2], strides[3]*p, strides[3])
15
+ x_blocks = np.lib.stride_tricks.as_strided(x_trim, shape=new_shape, strides=new_strides)
16
+
17
+ x_max = x_blocks.max(axis=(3, 5), keepdims=True)
18
+ mask = (x_blocks == x_max).astype(np.float64)
19
+ mask_sum = mask.sum(axis=(3, 5), keepdims=True)
20
+ mask = mask / np.maximum(mask_sum, 1e-12)
21
+
22
+ delta_expanded = delta[:, :, :H_b, :W_b][:, :, :, None, :, None]
23
+
24
+ dx = np.zeros_like(x)
25
+ dx_view = np.lib.stride_tricks.as_strided(dx[:, :, :H_trim, :W_trim],
26
+ shape=new_shape, strides=new_strides)
27
+ dx_view[:] = mask * delta_expanded
28
+ return dx
29
+
30
+ def avgpool2d_backward(delta, x, p):
31
+ B, C, H, W = x.shape
32
+ H_trim = (H // p) * p
33
+ W_trim = (W // p) * p
34
+ H_b, W_b = H_trim // p, W_trim // p
35
+
36
+ dx = np.zeros_like(x)
37
+ strides = dx[:, :, :H_trim, :W_trim].strides
38
+ new_shape = (B, C, H_b, p, W_b, p)
39
+ new_strides = (strides[0], strides[1], strides[2]*p, strides[2], strides[3]*p, strides[3])
40
+ dx_view = np.lib.stride_tricks.as_strided(dx[:, :, :H_trim, :W_trim],
41
+ shape=new_shape, strides=new_strides)
42
+ dx_view[:] = delta[:, :, :H_b, :W_b][:, :, :, None, :, None] / (p * p)
43
+ return dx
44
+
45
+ def batchnorm_backward(dout, cache):
46
+ x, x_norm, mean, var, gamma, epsilon = cache
47
+ N = x.shape[0]
48
+ dbeta = np.sum(dout, axis=0)
49
+ dgamma = np.sum(dout * x_norm, axis=0)
50
+ dx_norm = dout * gamma
51
+ dvar = np.sum(dx_norm * (x - mean) * -0.5 * (var + epsilon) ** (-1.5), axis=0)
52
+ dmean = np.sum(dx_norm * -1.0 / np.sqrt(var + epsilon), axis=0)
53
+ dx = dx_norm / np.sqrt(var + epsilon) + dvar * 2.0 * (x - mean) / N + dmean / N
54
+ return dx, dgamma, dbeta
55
+
56
+ def conv2d_backward_input(delta, weights, input_shape):
57
+ B, F, out_h, out_w = delta.shape
58
+ F, C, K, _ = weights.shape
59
+ H, W = input_shape[2], input_shape[3]
60
+
61
+ padded_delta = np.pad(delta, [(0, 0), (0, 0), (K - 1, K - 1), (K - 1, K - 1)], mode="constant")
62
+ col = im2col(padded_delta, K, K)
63
+ weights_flat = weights[:, :, ::-1, ::-1].transpose(1, 0, 2, 3).reshape(C, -1)
64
+ grad = np.dot(col, weights_flat.T)
65
+ grad = grad.reshape(B, H, W, C).transpose(0, 3, 1, 2)
66
+ return grad
67
+
68
+ def Backward(self, targets):
69
+ targets = np.asarray(targets, dtype=np.float64)
70
+ if targets.ndim == 1:
71
+ targets = targets.reshape(1, -1)
72
+ batch_size = targets.shape[0]
73
+ self.deltas = [None] * len(self.layers)
74
+ out = self.outputs[-1]
75
+ last = self.layers[-1]
76
+ if last.get("activation") == "softmax":
77
+ delta = (out - targets) / batch_size
78
+ else:
79
+ activation_input = self.pre_activations[-1] if self.pre_activations[-1] is not None else out
80
+ delta = (out - targets) * derivative(last.get("activation", "linear"), activation_input) / batch_size
81
+ self.deltas[-1] = delta
82
+
83
+ for l in reversed(range(len(self.layers) - 1)):
84
+ curr = self.layers[l]
85
+ nxt = self.layers[l + 1]
86
+ next_delta = self.deltas[l + 1]
87
+
88
+ if nxt["type"] in ("dense", "sparse"):
89
+ err = np.dot(next_delta, nxt["weights"])
90
+ elif nxt["type"] == "flatten":
91
+ err = next_delta.reshape(self.outputs[l + 1].shape)
92
+ elif nxt["type"] == "conv2d":
93
+ err = conv2d_backward_input(next_delta, nxt["weights"], self.outputs[l + 1].shape)
94
+ elif nxt["type"] == "maxpool2d":
95
+ err = maxpool2d_backward(next_delta, self.outputs[l + 1], nxt["p"])
96
+ elif nxt["type"] == "avgpool2d":
97
+ err = avgpool2d_backward(next_delta, self.outputs[l + 1], nxt["p"])
98
+ elif nxt["type"] == "dropout":
99
+ mask = nxt.get("mask")
100
+ rate = nxt.get("rate", 0.0)
101
+ if mask is None or rate == 0.0:
102
+ err = next_delta
103
+ else:
104
+ err = next_delta * mask / (1.0 - rate)
105
+ elif nxt["type"] == "batchnorm":
106
+ flat = next_delta.reshape(self.outputs[l + 1].shape[0], -1)
107
+ cache = self.batchnorm_cache[l + 1]
108
+ if cache is None:
109
+ raise ValueError("BatchNorm cache is None. Ensure Forward(training=True) was called before Backward.")
110
+ err_flat, dgamma, dbeta = batchnorm_backward(flat, cache)
111
+ nxt["d_gamma"] = dgamma
112
+ nxt["d_beta"] = dbeta
113
+ err = err_flat.reshape(self.outputs[l + 1].shape)
114
+ else:
115
+ err = np.zeros_like(self.outputs[l + 1])
116
+
117
+ if curr["type"] in ("dense", "sparse", "conv2d"):
118
+ activation_input = self.pre_activations[l+1] if self.pre_activations[l+1] is not None else self.outputs[l + 1]
119
+ self.deltas[l] = err * derivative(curr.get("activation", "linear"), activation_input)
120
+ else:
121
+ self.deltas[l] = err
@@ -0,0 +1,41 @@
1
+ import numpy as np
2
+
3
+ class NeuralNet:
4
+ def __init__(self, learning_rate=0.001, optimizer="adam", l2_lambda=0.01, momentum=0.9):
5
+ self.layers = []
6
+ self.learning_rate = learning_rate
7
+ self.optimizer_type = optimizer.lower()
8
+ self.l2_lambda = l2_lambda
9
+ self.momentum = momentum
10
+
11
+ self.outputs = []
12
+ self.pre_activations = []
13
+ self.batchnorm_cache = []
14
+ self.deltas = []
15
+ self.opt_state = []
16
+ self.t = 0
17
+
18
+ def summary(self):
19
+ print("Model Summary")
20
+ print("=" * 60)
21
+ print(f"Optimizer: {self.optimizer_type.upper()} | LR: {self.learning_rate} | L2: {self.l2_lambda}")
22
+ print("=" * 60)
23
+ total_params = 0
24
+ for i, layer in enumerate(self.layers):
25
+ layer_type = layer["type"]
26
+ if layer_type in ("dense", "sparse"):
27
+ params = layer["weights"].size + layer["bias"].size
28
+ total_params += params
29
+ print(f"Layer {i}: {layer_type.upper()} - Input: {layer['weights'].shape[1]}, Output: {layer['weights'].shape[0]}, Params: {params}")
30
+ elif layer_type == "conv2d":
31
+ params = layer["weights"].size + layer["bias"].size
32
+ total_params += params
33
+ print(f"Layer {i}: {layer_type.upper()} - In_ch: {layer['in_ch']}, Out_ch: {layer['out_ch']}, Kernel: {layer['k']}x{layer['k']}, Params: {params}")
34
+ elif layer_type == "batchnorm":
35
+ params = layer["gamma"].size + layer["beta"].size
36
+ total_params += params
37
+ print(f"Layer {i}: {layer_type.upper()} - Features: {layer['num_features']}, Params: {params}")
38
+ else:
39
+ print(f"Layer {i}: {layer_type.upper()}")
40
+ print(f"Total Parameters: {total_params}")
41
+ print("=" * 60)
@@ -0,0 +1,101 @@
1
+ import numpy as np
2
+ from .activations import activate
3
+
4
+ def im2col(input_data, filter_h, filter_w, stride=1, pad=0):
5
+ N, C, H, W = input_data.shape
6
+ out_h = (H + 2 * pad - filter_h) // stride + 1
7
+ out_w = (W + 2 * pad - filter_w) // stride + 1
8
+ img = np.pad(input_data, [(0, 0), (0, 0), (pad, pad), (pad, pad)], mode='constant')
9
+
10
+ N_stride, C_stride, H_stride, W_stride = img.strides
11
+ shape = (N, C, filter_h, filter_w, out_h, out_w)
12
+ strides = (N_stride, C_stride, H_stride, W_stride, H_stride * stride, W_stride * stride)
13
+
14
+ col = np.lib.stride_tricks.as_strided(img, shape=shape, strides=strides)
15
+ return col.transpose(0, 4, 5, 1, 2, 3).reshape(N * out_h * out_w, -1)
16
+
17
+ def batchnorm_forward(x, layer, training):
18
+ epsilon = layer.get("epsilon", 1e-5)
19
+ momentum = layer.get("momentum", 0.1)
20
+ if training:
21
+ mean = np.mean(x, axis=0)
22
+ variance = np.var(x, axis=0)
23
+ x_norm = (x - mean) / np.sqrt(variance + epsilon)
24
+ out = layer["gamma"] * x_norm + layer["beta"]
25
+ layer["running_mean"] = (1 - momentum) * layer["running_mean"] + momentum * mean
26
+ layer["running_var"] = (1 - momentum) * layer["running_var"] + momentum * variance
27
+ cache = (x, x_norm, mean, variance, layer["gamma"], epsilon)
28
+ else:
29
+ x_norm = (x - layer["running_mean"]) / np.sqrt(layer["running_var"] + epsilon)
30
+ out = layer["gamma"] * x_norm + layer["beta"]
31
+ cache = None
32
+ return out, cache
33
+
34
+ def Forward(self, inputs, training=False, dropout_rate=0.0):
35
+ x = np.asarray(inputs, dtype=np.float64)
36
+ if x.ndim == 1:
37
+ x = x.reshape(1, -1)
38
+ elif x.ndim == 3:
39
+ x = x.reshape(1, *x.shape)
40
+
41
+ self.outputs = [x]
42
+ self.pre_activations = [None]
43
+ self.batchnorm_cache = []
44
+
45
+ for layer in self.layers:
46
+ x = self.outputs[-1]
47
+ if layer["type"] in ("dense", "sparse"):
48
+ z = np.dot(x, layer["weights"].T) + layer["bias"]
49
+ x = activate(layer["activation"], z)
50
+ self.pre_activations.append(z)
51
+ self.batchnorm_cache.append(None)
52
+ elif layer["type"] == "conv2d":
53
+ B, C, H, W = x.shape
54
+ F, _, K, _ = layer["weights"].shape
55
+ out_h, out_w = H - K + 1, W - K + 1
56
+ col = im2col(x, K, K)
57
+ weights_flat = layer["weights"].reshape(F, -1)
58
+ out = np.dot(col, weights_flat.T).reshape(B, out_h, out_w, F).transpose(0, 3, 1, 2)
59
+ z = out + layer["bias"][None, :, None, None]
60
+ x = activate(layer["activation"], z)
61
+ self.pre_activations.append(z)
62
+ self.batchnorm_cache.append(None)
63
+ elif layer["type"] == "flatten":
64
+ x = x.reshape(x.shape[0], -1)
65
+ self.pre_activations.append(None)
66
+ self.batchnorm_cache.append(None)
67
+ elif layer["type"] == "maxpool2d":
68
+ B, C, H, W, p = *x.shape, layer["p"]
69
+ x = x[:, :, : H // p * p, : W // p * p].reshape(B, C, H // p, p, W // p, p).max(axis=(3, 5))
70
+ self.pre_activations.append(None)
71
+ self.batchnorm_cache.append(None)
72
+ elif layer["type"] == "avgpool2d":
73
+ B, C, H, W, p = *x.shape, layer["p"]
74
+ x = x[:, :, : H // p * p, : W // p * p].reshape(B, C, H // p, p, W // p, p).mean(axis=(3, 5))
75
+ self.pre_activations.append(None)
76
+ self.batchnorm_cache.append(None)
77
+ elif layer["type"] == "batchnorm":
78
+ flat = x.reshape(x.shape[0], -1)
79
+ normalized, cache = batchnorm_forward(flat, layer, training)
80
+ x = normalized.reshape(x.shape)
81
+ self.pre_activations.append(None)
82
+ self.batchnorm_cache.append(cache)
83
+ elif layer["type"] == "dropout":
84
+ rate = layer.get("rate", dropout_rate)
85
+ if training and rate > 0:
86
+ if rate >= 1.0:
87
+ mask = np.zeros_like(x, dtype=np.float64)
88
+ x = np.zeros_like(x)
89
+ else:
90
+ mask = (np.random.rand(*x.shape) > rate).astype(np.float64)
91
+ x = x * mask / (1.0 - rate)
92
+ layer["mask"] = mask
93
+ else:
94
+ layer["mask"] = None
95
+ x = x
96
+ self.pre_activations.append(None)
97
+ self.batchnorm_cache.append(None)
98
+ else:
99
+ raise ValueError(f"Unknown layer type: {layer['type']}")
100
+ self.outputs.append(x)
101
+ return self.outputs[-1]
@@ -0,0 +1,48 @@
1
+ import json
2
+ import pickle
3
+ import os
4
+ import numpy as np
5
+
6
+ def _numpy_encoder(obj):
7
+ if isinstance(obj, np.ndarray):
8
+ return obj.tolist()
9
+ raise TypeError(f"Object of type {type(obj)} is not JSON serializable")
10
+
11
+ def Save(self, file):
12
+ payload = {
13
+ "version": 2,
14
+ "layers": self.layers,
15
+ "optimizer": self.optimizer_type,
16
+ "learning_rate": self.learning_rate,
17
+ "l2_lambda": self.l2_lambda,
18
+ "momentum": self.momentum,
19
+ "t": self.t,
20
+ }
21
+ ext = os.path.splitext(file)[1].lower()
22
+ if ext == ".pkl":
23
+ with open(file, "wb") as f:
24
+ pickle.dump(payload, f)
25
+ else:
26
+ with open(file, "w") as f:
27
+ json.dump(payload, f, default=_numpy_encoder)
28
+
29
+ def Load(self, file):
30
+ ext = os.path.splitext(file)[1].lower()
31
+ if ext == ".pkl":
32
+ with open(file, "rb") as f:
33
+ raw = pickle.load(f)
34
+ else:
35
+ with open(file, "r") as f:
36
+ raw = json.load(f)
37
+ self.layers = []
38
+ for l in raw.get("layers", []):
39
+ for k in ["weights", "bias", "mask", "gamma", "beta", "running_mean", "running_var"]:
40
+ if k in l:
41
+ l[k] = np.array(l[k], dtype=np.float64)
42
+ self.layers.append(l)
43
+ self.opt_state = []
44
+ self.t = raw.get("t", 0)
45
+ self.learning_rate = raw.get("learning_rate", self.learning_rate)
46
+ self.optimizer_type = raw.get("optimizer", self.optimizer_type)
47
+ self.l2_lambda = raw.get("l2_lambda", self.l2_lambda)
48
+ self.momentum = raw.get("momentum", self.momentum)
@@ -0,0 +1,34 @@
1
+ import numpy as np
2
+ from .weight_init import init_weights, init_conv_weights
3
+
4
+ def add_dense(self, n_in, n_out, activation="relu", init_method="xavier_uniform"):
5
+ w, b = init_weights(n_in, n_out, method=init_method)
6
+ self.layers.append({"type": "dense", "weights": w, "bias": b, "activation": activation})
7
+
8
+ def add_sparse(self, n_in, n_out, connectivity=0.5, activation="relu", init_method="xavier_uniform"):
9
+ w, b = init_weights(n_in, n_out, method=init_method)
10
+ mask = (np.random.rand(n_out, n_in) < connectivity).astype(np.float64)
11
+ self.layers.append({"type": "sparse", "weights": w * mask, "bias": b, "mask": mask, "activation": activation})
12
+
13
+ def add_conv2d(self, in_ch, out_ch, k, activation="relu", init_method="he_normal"):
14
+ w, b = init_conv_weights(in_ch, out_ch, k, method=init_method)
15
+ self.layers.append({"type": "conv2d", "weights": w, "bias": b, "in_ch": in_ch, "out_ch": out_ch, "k": k, "activation": activation})
16
+
17
+ def add_flatten(self):
18
+ self.layers.append({"type": "flatten"})
19
+
20
+ def add_maxpool2d(self, pool_size=2):
21
+ self.layers.append({"type": "maxpool2d", "p": pool_size})
22
+
23
+ def add_avgpool2d(self, pool_size=2):
24
+ self.layers.append({"type": "avgpool2d", "p": pool_size})
25
+
26
+ def add_batchnorm(self, num_features, epsilon=1e-5, momentum=0.1):
27
+ self.layers.append({"type": "batchnorm", "num_features": num_features, "epsilon": epsilon, "momentum": momentum,
28
+ "running_mean": np.zeros(num_features, dtype=np.float64),
29
+ "running_var": np.ones(num_features, dtype=np.float64),
30
+ "gamma": np.ones(num_features, dtype=np.float64),
31
+ "beta": np.zeros(num_features, dtype=np.float64)})
32
+
33
+ def add_dropout(self, rate=0.5):
34
+ self.layers.append({"type": "dropout", "rate": rate})
@@ -0,0 +1,43 @@
1
+ import numpy as np
2
+
3
+ def ComputeLoss(self, output, target, function="mse", reduction="mean", **kwargs):
4
+ o = np.asarray(output, dtype=np.float64)
5
+ t = np.asarray(target, dtype=np.float64)
6
+ if function == "mse":
7
+ loss = (o - t) ** 2
8
+ elif function == "mae":
9
+ loss = np.abs(o - t)
10
+ elif function == "huber":
11
+ delta = kwargs.get("delta", 1.0)
12
+ diff = np.abs(o - t)
13
+ loss = np.where(diff < delta, 0.5 * diff**2, delta * (diff - 0.5 * delta))
14
+ elif function == "smooth_l1":
15
+ diff = np.abs(o - t)
16
+ loss = np.where(diff < 1, 0.5 * diff**2, diff - 0.5)
17
+ elif function == "binary_cross_entropy":
18
+ o = np.clip(o, 1e-12, 1 - 1e-12)
19
+ loss = -(t * np.log(o) + (1 - t) * np.log(1 - o))
20
+ elif function in ("cross_entropy", "categorical_cross_entropy"):
21
+ o = np.clip(o, 1e-12, 1.0)
22
+ loss = -t * np.log(o)
23
+ if reduction == "mean":
24
+ return float(np.sum(loss) / o.shape[0])
25
+ if reduction == "sum":
26
+ return float(np.sum(loss))
27
+ return loss
28
+ elif function == "focal":
29
+ alpha = kwargs.get("alpha", 0.25)
30
+ gamma = kwargs.get("gamma", 2.0)
31
+ o = np.clip(o, 1e-12, 1.0)
32
+ pt = o * t + (1 - o) * (1 - t)
33
+ loss = - (alpha * t * (1 - pt) ** gamma * np.log(o) + (1 - alpha) * (1 - t) * pt ** gamma * np.log(1 - o))
34
+ elif function == "hinge":
35
+ loss = np.maximum(0, 1 - t * o)
36
+ else:
37
+ raise ValueError(f"Unknown loss function: {function}")
38
+
39
+ if reduction == "mean":
40
+ return float(np.mean(loss))
41
+ if reduction == "sum":
42
+ return float(np.sum(loss))
43
+ return loss
@@ -0,0 +1,96 @@
1
+ import numpy as np
2
+ from .forward import im2col
3
+
4
+ def update(self):
5
+ self.t += 1
6
+ b1, b2, eps = 0.9, 0.999, 1e-8
7
+
8
+ if not self.opt_state:
9
+ for layer in self.layers:
10
+ if layer["type"] in ("dense", "sparse", "conv2d"):
11
+ self.opt_state.append({
12
+ "mw": np.zeros_like(layer["weights"]),
13
+ "vw": np.zeros_like(layer["weights"]),
14
+ "mb": np.zeros_like(layer["bias"]),
15
+ "vb": np.zeros_like(layer["bias"]),
16
+ "vgw": np.zeros_like(layer["weights"]),
17
+ "vgb": np.zeros_like(layer["bias"]),
18
+ })
19
+ elif layer["type"] == "batchnorm":
20
+ self.opt_state.append({
21
+ "mg": np.zeros_like(layer["gamma"]),
22
+ "vg": np.zeros_like(layer["gamma"]),
23
+ "mb": np.zeros_like(layer["beta"]),
24
+ "vb": np.zeros_like(layer["beta"]),
25
+ })
26
+ else:
27
+ self.opt_state.append(None)
28
+
29
+ for l, layer in enumerate(self.layers):
30
+ state = self.opt_state[l]
31
+ if layer["type"] in ("dense", "sparse"):
32
+ grad_w = np.dot(self.deltas[l].T, self.outputs[l])
33
+ grad_b = np.sum(self.deltas[l], axis=0)
34
+ if layer["type"] == "sparse":
35
+ grad_w *= layer["mask"]
36
+ elif layer["type"] == "conv2d":
37
+ K = layer["k"]
38
+ col = im2col(self.outputs[l], K, K)
39
+ delta_flat = self.deltas[l].transpose(0, 2, 3, 1).reshape(-1, layer["weights"].shape[0])
40
+ grad_w_flat = np.dot(delta_flat.T, col)
41
+ grad_w = grad_w_flat.reshape(layer["weights"].shape)
42
+ grad_b = np.sum(self.deltas[l], axis=(0, 2, 3))
43
+ elif layer["type"] == "batchnorm":
44
+ grad_gamma = layer.get("d_gamma", np.zeros_like(layer["gamma"]))
45
+ grad_beta = layer.get("d_beta", np.zeros_like(layer["beta"]))
46
+
47
+ if self.optimizer_type == "sgd":
48
+ state["mg"] = self.momentum * state["mg"] - self.learning_rate * grad_gamma
49
+ state["mb"] = self.momentum * state["mb"] - self.learning_rate * grad_beta
50
+ layer["gamma"] += state["mg"]
51
+ layer["beta"] += state["mb"]
52
+ elif self.optimizer_type == "rmsprop":
53
+ state["vg"] = b2 * state["vg"] + (1 - b2) * (grad_gamma ** 2)
54
+ state["vb"] = b2 * state["vb"] + (1 - b2) * (grad_beta ** 2)
55
+ layer["gamma"] -= self.learning_rate * grad_gamma / (np.sqrt(state["vg"]) + eps)
56
+ layer["beta"] -= self.learning_rate * grad_beta / (np.sqrt(state["vb"]) + eps)
57
+ elif self.optimizer_type == "adagrad":
58
+ state["vg"] += grad_gamma ** 2
59
+ state["vb"] += grad_beta ** 2
60
+ layer["gamma"] -= self.learning_rate * grad_gamma / (np.sqrt(state["vg"]) + eps)
61
+ layer["beta"] -= self.learning_rate * grad_beta / (np.sqrt(state["vb"]) + eps)
62
+ else: # adam
63
+ state["mg"] = b1 * state["mg"] + (1 - b1) * grad_gamma
64
+ state["vg"] = b2 * state["vg"] + (1 - b2) * (grad_gamma ** 2)
65
+ layer["gamma"] -= self.learning_rate * (state["mg"] / (1 - b1 ** self.t)) / (np.sqrt(state["vg"] / (1 - b2 ** self.t)) + eps)
66
+ state["mb"] = b1 * state["mb"] + (1 - b1) * grad_beta
67
+ state["vb"] = b2 * state["vb"] + (1 - b2) * (grad_beta ** 2)
68
+ layer["beta"] -= self.learning_rate * (state["mb"] / (1 - b1 ** self.t)) / (np.sqrt(state["vb"] / (1 - b2 ** self.t)) + eps)
69
+ continue
70
+ else:
71
+ continue
72
+
73
+ grad_w = grad_w + self.l2_lambda * layer["weights"] * layer.get("mask", 1.0)
74
+
75
+ if self.optimizer_type == "sgd":
76
+ state["vgw"] = self.momentum * state["vgw"] - self.learning_rate * grad_w
77
+ state["vgb"] = self.momentum * state["vgb"] - self.learning_rate * grad_b
78
+ layer["weights"] += state["vgw"]
79
+ layer["bias"] += state["vgb"]
80
+ elif self.optimizer_type == "rmsprop":
81
+ state["vw"] = b2 * state["vw"] + (1 - b2) * (grad_w ** 2)
82
+ state["vb"] = b2 * state["vb"] + (1 - b2) * (grad_b ** 2)
83
+ layer["weights"] -= self.learning_rate * grad_w / (np.sqrt(state["vw"]) + eps)
84
+ layer["bias"] -= self.learning_rate * grad_b / (np.sqrt(state["vb"]) + eps)
85
+ elif self.optimizer_type == "adagrad":
86
+ state["vw"] += grad_w ** 2
87
+ state["vb"] += grad_b ** 2
88
+ layer["weights"] -= self.learning_rate * grad_w / (np.sqrt(state["vw"]) + eps)
89
+ layer["bias"] -= self.learning_rate * grad_b / (np.sqrt(state["vb"]) + eps)
90
+ else:
91
+ state["mw"] = b1 * state["mw"] + (1 - b1) * grad_w
92
+ state["vw"] = b2 * state["vw"] + (1 - b2) * (grad_w ** 2)
93
+ layer["weights"] -= self.learning_rate * (state["mw"] / (1 - b1 ** self.t)) / (np.sqrt(state["vw"] / (1 - b2 ** self.t)) + eps)
94
+ state["mb"] = b1 * state["mb"] + (1 - b1) * grad_b
95
+ state["vb"] = b2 * state["vb"] + (1 - b2) * (grad_b ** 2)
96
+ layer["bias"] -= self.learning_rate * (state["mb"] / (1 - b1 ** self.t)) / (np.sqrt(state["vb"] / (1 - b2 ** self.t)) + eps)
@@ -0,0 +1,25 @@
1
+ import copy
2
+ import numpy as np
3
+
4
+ def Reinforce(self, inputs, score_fn, noise=0.05, tries=10, sigma=1.0):
5
+ inputs = np.asarray(inputs, dtype=np.float64)
6
+ best_score = score_fn(self.Forward(inputs))
7
+ best_layers = copy.deepcopy(self.layers)
8
+ base_layers = copy.deepcopy(self.layers)
9
+
10
+ for _ in range(max(1, tries)):
11
+ candidate = copy.deepcopy(base_layers)
12
+ for layer in candidate:
13
+ if "weights" in layer:
14
+ layer["weights"] += np.random.normal(0, sigma * noise, layer["weights"].shape)
15
+ if layer["type"] == "sparse":
16
+ layer["weights"] *= layer["mask"]
17
+ layer["bias"] += np.random.normal(0, sigma * noise, layer["bias"].shape)
18
+ self.layers = candidate
19
+ score = score_fn(self.Forward(inputs))
20
+ if score > best_score:
21
+ best_score = score
22
+ best_layers = copy.deepcopy(candidate)
23
+
24
+ self.layers = best_layers
25
+ return best_score
@@ -0,0 +1,54 @@
1
+ import numpy as np
2
+
3
+ def TrainBatch(self, xs, ys, loss_function=None, **loss_kwargs):
4
+ out = self.Forward(xs, training=True)
5
+ if loss_function is None:
6
+ loss_function = "cross_entropy" if self.layers[-1].get("activation") == "softmax" else "mse"
7
+ loss = self.ComputeLoss(out, ys, loss_function, **loss_kwargs)
8
+ self.Backward(ys)
9
+ self.update()
10
+ return loss, out
11
+
12
+ def compute_accuracy(self, predictions, targets):
13
+ if predictions.shape[-1] > 1: # Multi-class
14
+ pred_classes = np.argmax(predictions, axis=1)
15
+ true_classes = np.argmax(targets, axis=1)
16
+ else: # Binary
17
+ pred_classes = (predictions > 0.5).astype(int).flatten()
18
+ true_classes = targets.flatten()
19
+ return np.mean(pred_classes == true_classes)
20
+
21
+ def Train(self, X_train, Y_train, epochs=10, batch_size=32, X_val=None, Y_val=None, loss_function=None, verbose=True, **loss_kwargs):
22
+ history = {"loss": [], "val_loss": [], "accuracy": [], "val_accuracy": []}
23
+ n_samples = X_train.shape[0]
24
+ for epoch in range(epochs):
25
+ indices = np.random.permutation(n_samples)
26
+ X_shuffled = X_train[indices]
27
+ Y_shuffled = Y_train[indices]
28
+ epoch_loss = 0.0
29
+ epoch_acc = 0.0
30
+ total_samples = 0
31
+ for i in range(0, n_samples, batch_size):
32
+ X_batch = X_shuffled[i:i+batch_size]
33
+ Y_batch = Y_shuffled[i:i+batch_size]
34
+ loss, preds = self.TrainBatch(X_batch, Y_batch, loss_function=loss_function, **loss_kwargs)
35
+ batch_size_actual = X_batch.shape[0]
36
+ epoch_loss += loss * batch_size_actual
37
+ epoch_acc += self.compute_accuracy(preds, Y_batch) * batch_size_actual
38
+ total_samples += batch_size_actual
39
+ avg_loss = epoch_loss / total_samples
40
+ avg_acc = epoch_acc / total_samples
41
+ history["loss"].append(avg_loss)
42
+ history["accuracy"].append(avg_acc)
43
+ if X_val is not None and Y_val is not None:
44
+ val_pred = self.Forward(X_val)
45
+ val_loss = self.ComputeLoss(val_pred, Y_val, loss_function if loss_function is not None else ("cross_entropy" if self.layers[-1].get("activation") == "softmax" else "mse"), **loss_kwargs)
46
+ val_acc = self.compute_accuracy(val_pred, Y_val)
47
+ history["val_loss"].append(val_loss)
48
+ history["val_accuracy"].append(val_acc)
49
+ if verbose:
50
+ print(f"Epoch {epoch+1}/{epochs} - loss: {avg_loss:.4f} - acc: {avg_acc:.4f} - val_loss: {val_loss:.4f} - val_acc: {val_acc:.4f}")
51
+ else:
52
+ if verbose:
53
+ print(f"Epoch {epoch+1}/{epochs} - loss: {avg_loss:.4f} - acc: {avg_acc:.4f}")
54
+ return history