ilovetools 0.2.15__tar.gz → 0.2.17__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ilovetools-0.2.15/ilovetools.egg-info → ilovetools-0.2.17}/PKG-INFO +2 -2
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/__init__.py +2 -2
- ilovetools-0.2.17/ilovetools/ml/normalization.py +523 -0
- ilovetools-0.2.17/ilovetools/ml/optimizers.py +731 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17/ilovetools.egg-info}/PKG-INFO +2 -2
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools.egg-info/SOURCES.txt +4 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/pyproject.toml +2 -2
- {ilovetools-0.2.15 → ilovetools-0.2.17}/setup.py +2 -2
- ilovetools-0.2.17/tests/test_normalization.py +439 -0
- ilovetools-0.2.17/tests/test_optimizers.py +328 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/LICENSE +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/MANIFEST.in +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/README.md +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/ai/__init__.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/ai/embeddings.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/ai/inference.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/ai/llm_helpers.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/audio/__init__.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/automation/__init__.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/automation/file_organizer.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/conversion/__init__.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/conversion/config_converter.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/conversion/config_converter_fixed_header.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/data/__init__.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/data/feature_engineering.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/data/preprocessing.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/database/__init__.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/datetime/__init__.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/email/__init__.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/email/template_engine.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/files/__init__.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/image/__init__.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/ml/__init__.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/ml/activations.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/ml/anomaly_detection.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/ml/clustering.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/ml/cross_validation.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/ml/dimensionality.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/ml/ensemble.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/ml/feature_selection.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/ml/gradient_descent.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/ml/imbalanced.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/ml/interpretation.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/ml/loss_functions.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/ml/metrics.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/ml/neural_network.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/ml/pipeline.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/ml/regularization.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/ml/timeseries.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/ml/tuning.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/security/__init__.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/security/password_checker.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/text/__init__.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/utils/__init__.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/utils/cache_system.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/utils/logger.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/utils/rate_limiter.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/utils/retry.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/validation/__init__.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/validation/data_validator.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/web/__init__.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/web/scraper.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools/web/url_shortener.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools.egg-info/dependency_links.txt +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools.egg-info/requires.txt +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/ilovetools.egg-info/top_level.txt +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/requirements.txt +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/setup.cfg +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/tests/__init__.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/tests/test_activations.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/tests/test_gradient_descent.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/tests/test_loss_functions.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/tests/test_neural_network.py +0 -0
- {ilovetools-0.2.15 → ilovetools-0.2.17}/tests/test_regularization.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ilovetools
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.17
|
|
4
4
|
Summary: A comprehensive Python utility library with modular tools for AI/ML, data processing, and daily programming needs
|
|
5
5
|
Home-page: https://github.com/AliMehdi512/ilovetools
|
|
6
6
|
Author: Ali Mehdi
|
|
@@ -11,7 +11,7 @@ Project-URL: Repository, https://github.com/AliMehdi512/ilovetools
|
|
|
11
11
|
Project-URL: Issues, https://github.com/AliMehdi512/ilovetools/issues
|
|
12
12
|
Project-URL: Bug Reports, https://github.com/AliMehdi512/ilovetools/issues
|
|
13
13
|
Project-URL: Source, https://github.com/AliMehdi512/ilovetools
|
|
14
|
-
Keywords: utilities,tools,ai,ml,data-processing,automation,
|
|
14
|
+
Keywords: utilities,tools,ai,ml,data-processing,automation,batch-normalization,layer-normalization,group-normalization,instance-normalization,weight-normalization,deep-learning,transformers
|
|
15
15
|
Classifier: Development Status :: 3 - Alpha
|
|
16
16
|
Classifier: Intended Audience :: Developers
|
|
17
17
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
@@ -0,0 +1,523 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Normalization Techniques for Neural Networks
|
|
3
|
+
|
|
4
|
+
This module provides various normalization techniques used in deep learning:
|
|
5
|
+
- Batch Normalization
|
|
6
|
+
- Layer Normalization
|
|
7
|
+
- Group Normalization
|
|
8
|
+
- Instance Normalization
|
|
9
|
+
- Weight Normalization
|
|
10
|
+
|
|
11
|
+
All normalization functions support both training and inference modes.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
from typing import Tuple, Optional
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# ============================================================================
|
|
19
|
+
# BATCH NORMALIZATION
|
|
20
|
+
# ============================================================================
|
|
21
|
+
|
|
22
|
+
def batch_normalization(
|
|
23
|
+
x: np.ndarray,
|
|
24
|
+
gamma: np.ndarray,
|
|
25
|
+
beta: np.ndarray,
|
|
26
|
+
running_mean: Optional[np.ndarray] = None,
|
|
27
|
+
running_var: Optional[np.ndarray] = None,
|
|
28
|
+
training: bool = True,
|
|
29
|
+
momentum: float = 0.9,
|
|
30
|
+
epsilon: float = 1e-5
|
|
31
|
+
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
|
32
|
+
"""
|
|
33
|
+
Batch Normalization
|
|
34
|
+
|
|
35
|
+
Normalizes activations across the batch dimension. Reduces internal covariate shift
|
|
36
|
+
and allows higher learning rates.
|
|
37
|
+
|
|
38
|
+
Formula: y = gamma * (x - mean) / sqrt(var + epsilon) + beta
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
x: Input tensor of shape (N, D) or (N, C, H, W)
|
|
42
|
+
gamma: Scale parameter of shape (D,) or (C,)
|
|
43
|
+
beta: Shift parameter of shape (D,) or (C,)
|
|
44
|
+
running_mean: Running mean for inference (updated during training)
|
|
45
|
+
running_var: Running variance for inference (updated during training)
|
|
46
|
+
training: Whether in training mode
|
|
47
|
+
momentum: Momentum for running statistics (default: 0.9)
|
|
48
|
+
epsilon: Small constant for numerical stability (default: 1e-5)
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
Tuple of (normalized_output, updated_running_mean, updated_running_var)
|
|
52
|
+
|
|
53
|
+
Example:
|
|
54
|
+
>>> x = np.random.randn(32, 64) # Batch of 32, 64 features
|
|
55
|
+
>>> gamma = np.ones(64)
|
|
56
|
+
>>> beta = np.zeros(64)
|
|
57
|
+
>>> out, mean, var = batch_normalization(x, gamma, beta, training=True)
|
|
58
|
+
>>> print(out.shape) # (32, 64)
|
|
59
|
+
"""
|
|
60
|
+
if running_mean is None:
|
|
61
|
+
running_mean = np.zeros(gamma.shape)
|
|
62
|
+
if running_var is None:
|
|
63
|
+
running_var = np.ones(gamma.shape)
|
|
64
|
+
|
|
65
|
+
if training:
|
|
66
|
+
# Compute batch statistics
|
|
67
|
+
if x.ndim == 2:
|
|
68
|
+
# Fully connected layer: (N, D)
|
|
69
|
+
batch_mean = np.mean(x, axis=0)
|
|
70
|
+
batch_var = np.var(x, axis=0)
|
|
71
|
+
elif x.ndim == 4:
|
|
72
|
+
# Convolutional layer: (N, C, H, W)
|
|
73
|
+
batch_mean = np.mean(x, axis=(0, 2, 3), keepdims=True)
|
|
74
|
+
batch_var = np.var(x, axis=(0, 2, 3), keepdims=True)
|
|
75
|
+
else:
|
|
76
|
+
raise ValueError(f"Unsupported input shape: {x.shape}")
|
|
77
|
+
|
|
78
|
+
# Normalize
|
|
79
|
+
x_normalized = (x - batch_mean) / np.sqrt(batch_var + epsilon)
|
|
80
|
+
|
|
81
|
+
# Update running statistics
|
|
82
|
+
running_mean = momentum * running_mean + (1 - momentum) * batch_mean.squeeze()
|
|
83
|
+
running_var = momentum * running_var + (1 - momentum) * batch_var.squeeze()
|
|
84
|
+
else:
|
|
85
|
+
# Use running statistics for inference
|
|
86
|
+
if x.ndim == 2:
|
|
87
|
+
x_normalized = (x - running_mean) / np.sqrt(running_var + epsilon)
|
|
88
|
+
elif x.ndim == 4:
|
|
89
|
+
mean_reshaped = running_mean.reshape(1, -1, 1, 1)
|
|
90
|
+
var_reshaped = running_var.reshape(1, -1, 1, 1)
|
|
91
|
+
x_normalized = (x - mean_reshaped) / np.sqrt(var_reshaped + epsilon)
|
|
92
|
+
|
|
93
|
+
# Scale and shift
|
|
94
|
+
if x.ndim == 2:
|
|
95
|
+
out = gamma * x_normalized + beta
|
|
96
|
+
elif x.ndim == 4:
|
|
97
|
+
gamma_reshaped = gamma.reshape(1, -1, 1, 1)
|
|
98
|
+
beta_reshaped = beta.reshape(1, -1, 1, 1)
|
|
99
|
+
out = gamma_reshaped * x_normalized + beta_reshaped
|
|
100
|
+
|
|
101
|
+
return out, running_mean, running_var
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def batch_norm_forward(
|
|
105
|
+
x: np.ndarray,
|
|
106
|
+
gamma: np.ndarray,
|
|
107
|
+
beta: np.ndarray,
|
|
108
|
+
bn_params: dict
|
|
109
|
+
) -> Tuple[np.ndarray, dict]:
|
|
110
|
+
"""
|
|
111
|
+
Forward pass for batch normalization with cache
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
x: Input data
|
|
115
|
+
gamma: Scale parameter
|
|
116
|
+
beta: Shift parameter
|
|
117
|
+
bn_params: Dictionary with keys:
|
|
118
|
+
- mode: 'train' or 'test'
|
|
119
|
+
- eps: Epsilon for numerical stability
|
|
120
|
+
- momentum: Momentum for running statistics
|
|
121
|
+
- running_mean: Running mean
|
|
122
|
+
- running_var: Running variance
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
Tuple of (output, cache)
|
|
126
|
+
|
|
127
|
+
Example:
|
|
128
|
+
>>> x = np.random.randn(32, 64)
|
|
129
|
+
>>> gamma = np.ones(64)
|
|
130
|
+
>>> beta = np.zeros(64)
|
|
131
|
+
>>> bn_params = {
|
|
132
|
+
... 'mode': 'train',
|
|
133
|
+
... 'eps': 1e-5,
|
|
134
|
+
... 'momentum': 0.9,
|
|
135
|
+
... 'running_mean': np.zeros(64),
|
|
136
|
+
... 'running_var': np.ones(64)
|
|
137
|
+
... }
|
|
138
|
+
>>> out, cache = batch_norm_forward(x, gamma, beta, bn_params)
|
|
139
|
+
"""
|
|
140
|
+
mode = bn_params.get('mode', 'train')
|
|
141
|
+
eps = bn_params.get('eps', 1e-5)
|
|
142
|
+
momentum = bn_params.get('momentum', 0.9)
|
|
143
|
+
|
|
144
|
+
running_mean = bn_params.get('running_mean', np.zeros(gamma.shape))
|
|
145
|
+
running_var = bn_params.get('running_var', np.ones(gamma.shape))
|
|
146
|
+
|
|
147
|
+
training = (mode == 'train')
|
|
148
|
+
|
|
149
|
+
out, running_mean, running_var = batch_normalization(
|
|
150
|
+
x, gamma, beta, running_mean, running_var,
|
|
151
|
+
training=training, momentum=momentum, epsilon=eps
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# Update running statistics in params
|
|
155
|
+
bn_params['running_mean'] = running_mean
|
|
156
|
+
bn_params['running_var'] = running_var
|
|
157
|
+
|
|
158
|
+
# Cache for backward pass
|
|
159
|
+
cache = {
|
|
160
|
+
'x': x,
|
|
161
|
+
'gamma': gamma,
|
|
162
|
+
'beta': beta,
|
|
163
|
+
'eps': eps,
|
|
164
|
+
'x_normalized': (x - running_mean) / np.sqrt(running_var + eps),
|
|
165
|
+
'mean': running_mean,
|
|
166
|
+
'var': running_var
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
return out, cache
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
# ============================================================================
|
|
173
|
+
# LAYER NORMALIZATION
|
|
174
|
+
# ============================================================================
|
|
175
|
+
|
|
176
|
+
def layer_normalization(
|
|
177
|
+
x: np.ndarray,
|
|
178
|
+
gamma: np.ndarray,
|
|
179
|
+
beta: np.ndarray,
|
|
180
|
+
epsilon: float = 1e-5
|
|
181
|
+
) -> np.ndarray:
|
|
182
|
+
"""
|
|
183
|
+
Layer Normalization
|
|
184
|
+
|
|
185
|
+
Normalizes activations across the feature dimension. Independent of batch size,
|
|
186
|
+
making it suitable for RNNs and Transformers.
|
|
187
|
+
|
|
188
|
+
Formula: y = gamma * (x - mean) / sqrt(var + epsilon) + beta
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
x: Input tensor of shape (N, D) or (N, L, D)
|
|
192
|
+
gamma: Scale parameter of shape (D,)
|
|
193
|
+
beta: Shift parameter of shape (D,)
|
|
194
|
+
epsilon: Small constant for numerical stability (default: 1e-5)
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
Normalized output of same shape as input
|
|
198
|
+
|
|
199
|
+
Example:
|
|
200
|
+
>>> x = np.random.randn(32, 512) # Batch of 32, 512 features
|
|
201
|
+
>>> gamma = np.ones(512)
|
|
202
|
+
>>> beta = np.zeros(512)
|
|
203
|
+
>>> out = layer_normalization(x, gamma, beta)
|
|
204
|
+
>>> print(out.shape) # (32, 512)
|
|
205
|
+
|
|
206
|
+
>>> # For sequences (Transformers)
|
|
207
|
+
>>> x = np.random.randn(32, 10, 512) # Batch 32, seq len 10, dim 512
|
|
208
|
+
>>> out = layer_normalization(x, gamma, beta)
|
|
209
|
+
>>> print(out.shape) # (32, 10, 512)
|
|
210
|
+
"""
|
|
211
|
+
# Compute mean and variance across feature dimension
|
|
212
|
+
if x.ndim == 2:
|
|
213
|
+
# (N, D) - Fully connected
|
|
214
|
+
mean = np.mean(x, axis=1, keepdims=True)
|
|
215
|
+
var = np.var(x, axis=1, keepdims=True)
|
|
216
|
+
elif x.ndim == 3:
|
|
217
|
+
# (N, L, D) - Sequence data (Transformers)
|
|
218
|
+
mean = np.mean(x, axis=2, keepdims=True)
|
|
219
|
+
var = np.var(x, axis=2, keepdims=True)
|
|
220
|
+
else:
|
|
221
|
+
raise ValueError(f"Unsupported input shape: {x.shape}")
|
|
222
|
+
|
|
223
|
+
# Normalize
|
|
224
|
+
x_normalized = (x - mean) / np.sqrt(var + epsilon)
|
|
225
|
+
|
|
226
|
+
# Scale and shift
|
|
227
|
+
out = gamma * x_normalized + beta
|
|
228
|
+
|
|
229
|
+
return out
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def layer_norm_forward(
|
|
233
|
+
x: np.ndarray,
|
|
234
|
+
gamma: np.ndarray,
|
|
235
|
+
beta: np.ndarray,
|
|
236
|
+
epsilon: float = 1e-5
|
|
237
|
+
) -> Tuple[np.ndarray, dict]:
|
|
238
|
+
"""
|
|
239
|
+
Forward pass for layer normalization with cache
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
x: Input data
|
|
243
|
+
gamma: Scale parameter
|
|
244
|
+
beta: Shift parameter
|
|
245
|
+
epsilon: Epsilon for numerical stability
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
Tuple of (output, cache)
|
|
249
|
+
|
|
250
|
+
Example:
|
|
251
|
+
>>> x = np.random.randn(32, 512)
|
|
252
|
+
>>> gamma = np.ones(512)
|
|
253
|
+
>>> beta = np.zeros(512)
|
|
254
|
+
>>> out, cache = layer_norm_forward(x, gamma, beta)
|
|
255
|
+
"""
|
|
256
|
+
# Compute statistics
|
|
257
|
+
if x.ndim == 2:
|
|
258
|
+
mean = np.mean(x, axis=1, keepdims=True)
|
|
259
|
+
var = np.var(x, axis=1, keepdims=True)
|
|
260
|
+
elif x.ndim == 3:
|
|
261
|
+
mean = np.mean(x, axis=2, keepdims=True)
|
|
262
|
+
var = np.var(x, axis=2, keepdims=True)
|
|
263
|
+
else:
|
|
264
|
+
raise ValueError(f"Unsupported input shape: {x.shape}")
|
|
265
|
+
|
|
266
|
+
# Normalize
|
|
267
|
+
x_normalized = (x - mean) / np.sqrt(var + epsilon)
|
|
268
|
+
|
|
269
|
+
# Scale and shift
|
|
270
|
+
out = gamma * x_normalized + beta
|
|
271
|
+
|
|
272
|
+
# Cache for backward pass
|
|
273
|
+
cache = {
|
|
274
|
+
'x': x,
|
|
275
|
+
'gamma': gamma,
|
|
276
|
+
'beta': beta,
|
|
277
|
+
'eps': epsilon,
|
|
278
|
+
'x_normalized': x_normalized,
|
|
279
|
+
'mean': mean,
|
|
280
|
+
'var': var
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
return out, cache
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
# ============================================================================
|
|
287
|
+
# GROUP NORMALIZATION
|
|
288
|
+
# ============================================================================
|
|
289
|
+
|
|
290
|
+
def group_normalization(
|
|
291
|
+
x: np.ndarray,
|
|
292
|
+
gamma: np.ndarray,
|
|
293
|
+
beta: np.ndarray,
|
|
294
|
+
num_groups: int = 32,
|
|
295
|
+
epsilon: float = 1e-5
|
|
296
|
+
) -> np.ndarray:
|
|
297
|
+
"""
|
|
298
|
+
Group Normalization
|
|
299
|
+
|
|
300
|
+
Divides channels into groups and normalizes within each group.
|
|
301
|
+
Works well with small batch sizes.
|
|
302
|
+
|
|
303
|
+
Args:
|
|
304
|
+
x: Input tensor of shape (N, C, H, W)
|
|
305
|
+
gamma: Scale parameter of shape (C,)
|
|
306
|
+
beta: Shift parameter of shape (C,)
|
|
307
|
+
num_groups: Number of groups (default: 32)
|
|
308
|
+
epsilon: Small constant for numerical stability
|
|
309
|
+
|
|
310
|
+
Returns:
|
|
311
|
+
Normalized output of same shape as input
|
|
312
|
+
|
|
313
|
+
Example:
|
|
314
|
+
>>> x = np.random.randn(8, 64, 32, 32) # Small batch
|
|
315
|
+
>>> gamma = np.ones(64)
|
|
316
|
+
>>> beta = np.zeros(64)
|
|
317
|
+
>>> out = group_normalization(x, gamma, beta, num_groups=32)
|
|
318
|
+
>>> print(out.shape) # (8, 64, 32, 32)
|
|
319
|
+
"""
|
|
320
|
+
N, C, H, W = x.shape
|
|
321
|
+
|
|
322
|
+
if C % num_groups != 0:
|
|
323
|
+
raise ValueError(f"Number of channels ({C}) must be divisible by num_groups ({num_groups})")
|
|
324
|
+
|
|
325
|
+
# Reshape to (N, num_groups, C // num_groups, H, W)
|
|
326
|
+
x_grouped = x.reshape(N, num_groups, C // num_groups, H, W)
|
|
327
|
+
|
|
328
|
+
# Compute mean and variance per group
|
|
329
|
+
mean = np.mean(x_grouped, axis=(2, 3, 4), keepdims=True)
|
|
330
|
+
var = np.var(x_grouped, axis=(2, 3, 4), keepdims=True)
|
|
331
|
+
|
|
332
|
+
# Normalize
|
|
333
|
+
x_normalized = (x_grouped - mean) / np.sqrt(var + epsilon)
|
|
334
|
+
|
|
335
|
+
# Reshape back
|
|
336
|
+
x_normalized = x_normalized.reshape(N, C, H, W)
|
|
337
|
+
|
|
338
|
+
# Scale and shift
|
|
339
|
+
gamma_reshaped = gamma.reshape(1, -1, 1, 1)
|
|
340
|
+
beta_reshaped = beta.reshape(1, -1, 1, 1)
|
|
341
|
+
out = gamma_reshaped * x_normalized + beta_reshaped
|
|
342
|
+
|
|
343
|
+
return out
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
# ============================================================================
|
|
347
|
+
# INSTANCE NORMALIZATION
|
|
348
|
+
# ============================================================================
|
|
349
|
+
|
|
350
|
+
def instance_normalization(
|
|
351
|
+
x: np.ndarray,
|
|
352
|
+
gamma: np.ndarray,
|
|
353
|
+
beta: np.ndarray,
|
|
354
|
+
epsilon: float = 1e-5
|
|
355
|
+
) -> np.ndarray:
|
|
356
|
+
"""
|
|
357
|
+
Instance Normalization
|
|
358
|
+
|
|
359
|
+
Normalizes each sample independently. Used in style transfer and GANs.
|
|
360
|
+
|
|
361
|
+
Args:
|
|
362
|
+
x: Input tensor of shape (N, C, H, W)
|
|
363
|
+
gamma: Scale parameter of shape (C,)
|
|
364
|
+
beta: Shift parameter of shape (C,)
|
|
365
|
+
epsilon: Small constant for numerical stability
|
|
366
|
+
|
|
367
|
+
Returns:
|
|
368
|
+
Normalized output of same shape as input
|
|
369
|
+
|
|
370
|
+
Example:
|
|
371
|
+
>>> x = np.random.randn(8, 64, 32, 32)
|
|
372
|
+
>>> gamma = np.ones(64)
|
|
373
|
+
>>> beta = np.zeros(64)
|
|
374
|
+
>>> out = instance_normalization(x, gamma, beta)
|
|
375
|
+
>>> print(out.shape) # (8, 64, 32, 32)
|
|
376
|
+
"""
|
|
377
|
+
N, C, H, W = x.shape
|
|
378
|
+
|
|
379
|
+
# Compute mean and variance per instance per channel
|
|
380
|
+
mean = np.mean(x, axis=(2, 3), keepdims=True)
|
|
381
|
+
var = np.var(x, axis=(2, 3), keepdims=True)
|
|
382
|
+
|
|
383
|
+
# Normalize
|
|
384
|
+
x_normalized = (x - mean) / np.sqrt(var + epsilon)
|
|
385
|
+
|
|
386
|
+
# Scale and shift
|
|
387
|
+
gamma_reshaped = gamma.reshape(1, -1, 1, 1)
|
|
388
|
+
beta_reshaped = beta.reshape(1, -1, 1, 1)
|
|
389
|
+
out = gamma_reshaped * x_normalized + beta_reshaped
|
|
390
|
+
|
|
391
|
+
return out
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
# ============================================================================
|
|
395
|
+
# WEIGHT NORMALIZATION
|
|
396
|
+
# ============================================================================
|
|
397
|
+
|
|
398
|
+
def weight_normalization(
|
|
399
|
+
w: np.ndarray,
|
|
400
|
+
g: Optional[np.ndarray] = None,
|
|
401
|
+
axis: int = 0
|
|
402
|
+
) -> Tuple[np.ndarray, np.ndarray]:
|
|
403
|
+
"""
|
|
404
|
+
Weight Normalization
|
|
405
|
+
|
|
406
|
+
Decouples the magnitude and direction of weight vectors.
|
|
407
|
+
Faster than batch normalization.
|
|
408
|
+
|
|
409
|
+
Formula: w = g * (v / ||v||)
|
|
410
|
+
|
|
411
|
+
Args:
|
|
412
|
+
w: Weight matrix
|
|
413
|
+
g: Magnitude parameter (if None, computed from w)
|
|
414
|
+
axis: Axis along which to normalize (default: 0)
|
|
415
|
+
|
|
416
|
+
Returns:
|
|
417
|
+
Tuple of (normalized_weights, magnitude)
|
|
418
|
+
|
|
419
|
+
Example:
|
|
420
|
+
>>> w = np.random.randn(512, 256) # Weight matrix
|
|
421
|
+
>>> w_norm, g = weight_normalization(w)
|
|
422
|
+
>>> print(w_norm.shape) # (512, 256)
|
|
423
|
+
>>> print(g.shape) # (512,) or (256,) depending on axis
|
|
424
|
+
"""
|
|
425
|
+
# Compute norm along specified axis
|
|
426
|
+
norm = np.linalg.norm(w, axis=axis, keepdims=True)
|
|
427
|
+
|
|
428
|
+
# Normalize direction
|
|
429
|
+
v = w / (norm + 1e-8)
|
|
430
|
+
|
|
431
|
+
# Compute or use provided magnitude
|
|
432
|
+
if g is None:
|
|
433
|
+
g = norm.squeeze()
|
|
434
|
+
|
|
435
|
+
# Reconstruct weights
|
|
436
|
+
if axis == 0:
|
|
437
|
+
w_normalized = g.reshape(-1, 1) * v
|
|
438
|
+
else:
|
|
439
|
+
w_normalized = g.reshape(1, -1) * v
|
|
440
|
+
|
|
441
|
+
return w_normalized, g
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
# ============================================================================
|
|
445
|
+
# UTILITY FUNCTIONS
|
|
446
|
+
# ============================================================================
|
|
447
|
+
|
|
448
|
+
def create_normalization_params(
|
|
449
|
+
num_features: int,
|
|
450
|
+
norm_type: str = 'batch'
|
|
451
|
+
) -> dict:
|
|
452
|
+
"""
|
|
453
|
+
Create parameters for normalization layers
|
|
454
|
+
|
|
455
|
+
Args:
|
|
456
|
+
num_features: Number of features/channels
|
|
457
|
+
norm_type: Type of normalization ('batch', 'layer', 'group', 'instance')
|
|
458
|
+
|
|
459
|
+
Returns:
|
|
460
|
+
Dictionary with initialized parameters
|
|
461
|
+
|
|
462
|
+
Example:
|
|
463
|
+
>>> params = create_normalization_params(64, 'batch')
|
|
464
|
+
>>> print(params.keys())
|
|
465
|
+
dict_keys(['gamma', 'beta', 'running_mean', 'running_var'])
|
|
466
|
+
"""
|
|
467
|
+
params = {
|
|
468
|
+
'gamma': np.ones(num_features),
|
|
469
|
+
'beta': np.zeros(num_features)
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
if norm_type == 'batch':
|
|
473
|
+
params['running_mean'] = np.zeros(num_features)
|
|
474
|
+
params['running_var'] = np.ones(num_features)
|
|
475
|
+
|
|
476
|
+
return params
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
def apply_normalization(
|
|
480
|
+
x: np.ndarray,
|
|
481
|
+
norm_type: str,
|
|
482
|
+
gamma: np.ndarray,
|
|
483
|
+
beta: np.ndarray,
|
|
484
|
+
**kwargs
|
|
485
|
+
) -> np.ndarray:
|
|
486
|
+
"""
|
|
487
|
+
Apply normalization by type name
|
|
488
|
+
|
|
489
|
+
Args:
|
|
490
|
+
x: Input tensor
|
|
491
|
+
norm_type: Type of normalization
|
|
492
|
+
gamma: Scale parameter
|
|
493
|
+
beta: Shift parameter
|
|
494
|
+
**kwargs: Additional arguments for specific normalization
|
|
495
|
+
|
|
496
|
+
Returns:
|
|
497
|
+
Normalized output
|
|
498
|
+
|
|
499
|
+
Example:
|
|
500
|
+
>>> x = np.random.randn(32, 64)
|
|
501
|
+
>>> gamma = np.ones(64)
|
|
502
|
+
>>> beta = np.zeros(64)
|
|
503
|
+
>>> out = apply_normalization(x, 'layer', gamma, beta)
|
|
504
|
+
"""
|
|
505
|
+
if norm_type == 'batch':
|
|
506
|
+
out, _, _ = batch_normalization(x, gamma, beta, **kwargs)
|
|
507
|
+
return out
|
|
508
|
+
elif norm_type == 'layer':
|
|
509
|
+
return layer_normalization(x, gamma, beta, **kwargs)
|
|
510
|
+
elif norm_type == 'group':
|
|
511
|
+
return group_normalization(x, gamma, beta, **kwargs)
|
|
512
|
+
elif norm_type == 'instance':
|
|
513
|
+
return instance_normalization(x, gamma, beta, **kwargs)
|
|
514
|
+
else:
|
|
515
|
+
raise ValueError(f"Unknown normalization type: {norm_type}")
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
# Aliases for convenience
|
|
519
|
+
batchnorm = batch_normalization
|
|
520
|
+
layernorm = layer_normalization
|
|
521
|
+
groupnorm = group_normalization
|
|
522
|
+
instancenorm = instance_normalization
|
|
523
|
+
weightnorm = weight_normalization
|