ilovetools 0.2.19__tar.gz → 0.2.21__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ilovetools-0.2.19/ilovetools.egg-info → ilovetools-0.2.21}/PKG-INFO +2 -2
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/__init__.py +2 -2
- ilovetools-0.2.21/ilovetools/ml/normalization_advanced.py +471 -0
- ilovetools-0.2.21/ilovetools/ml/rnn.py +498 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21/ilovetools.egg-info}/PKG-INFO +2 -2
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools.egg-info/SOURCES.txt +5 -1
- {ilovetools-0.2.19 → ilovetools-0.2.21}/pyproject.toml +2 -2
- {ilovetools-0.2.19 → ilovetools-0.2.21}/setup.py +2 -2
- ilovetools-0.2.21/tests/test_normalization_advanced.py +375 -0
- ilovetools-0.2.21/tests/test_rnn.py +419 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/LICENSE +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/MANIFEST.in +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/README.md +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/ai/__init__.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/ai/embeddings.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/ai/inference.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/ai/llm_helpers.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/audio/__init__.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/automation/__init__.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/automation/file_organizer.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/conversion/__init__.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/conversion/config_converter.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/conversion/config_converter_fixed_header.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/data/__init__.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/data/feature_engineering.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/data/preprocessing.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/database/__init__.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/datetime/__init__.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/email/__init__.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/email/template_engine.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/files/__init__.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/image/__init__.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/ml/__init__.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/ml/activations.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/ml/anomaly_detection.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/ml/attention.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/ml/clustering.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/ml/cnn.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/ml/cross_validation.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/ml/dimensionality.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/ml/ensemble.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/ml/feature_selection.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/ml/gradient_descent.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/ml/imbalanced.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/ml/interpretation.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/ml/loss_functions.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/ml/metrics.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/ml/neural_network.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/ml/normalization.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/ml/optimizers.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/ml/pipeline.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/ml/regularization.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/ml/timeseries.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/ml/tuning.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/security/__init__.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/security/password_checker.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/text/__init__.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/utils/__init__.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/utils/cache_system.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/utils/logger.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/utils/rate_limiter.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/utils/retry.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/validation/__init__.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/validation/data_validator.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/web/__init__.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/web/scraper.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools/web/url_shortener.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools.egg-info/dependency_links.txt +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools.egg-info/requires.txt +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/ilovetools.egg-info/top_level.txt +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/requirements.txt +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/setup.cfg +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/tests/__init__.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/tests/test_activations.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/tests/test_attention.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/tests/test_cnn.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/tests/test_gradient_descent.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/tests/test_loss_functions.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/tests/test_neural_network.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/tests/test_normalization.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/tests/test_optimizers.py +0 -0
- {ilovetools-0.2.19 → ilovetools-0.2.21}/tests/test_regularization.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ilovetools
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.21
|
|
4
4
|
Summary: A comprehensive Python utility library with modular tools for AI/ML, data processing, and daily programming needs
|
|
5
5
|
Home-page: https://github.com/AliMehdi512/ilovetools
|
|
6
6
|
Author: Ali Mehdi
|
|
@@ -11,7 +11,7 @@ Project-URL: Repository, https://github.com/AliMehdi512/ilovetools
|
|
|
11
11
|
Project-URL: Issues, https://github.com/AliMehdi512/ilovetools/issues
|
|
12
12
|
Project-URL: Bug Reports, https://github.com/AliMehdi512/ilovetools/issues
|
|
13
13
|
Project-URL: Source, https://github.com/AliMehdi512/ilovetools
|
|
14
|
-
Keywords: utilities,tools,ai,ml,data-processing,automation,
|
|
14
|
+
Keywords: utilities,tools,ai,ml,data-processing,automation,batch-normalization,layer-normalization,normalization-techniques,deep-learning
|
|
15
15
|
Classifier: Development Status :: 3 - Alpha
|
|
16
16
|
Classifier: Intended Audience :: Developers
|
|
17
17
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
@@ -0,0 +1,471 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Advanced Normalization Techniques
|
|
3
|
+
|
|
4
|
+
This module provides advanced normalization methods:
|
|
5
|
+
- Batch Normalization (BatchNorm)
|
|
6
|
+
- Layer Normalization (LayerNorm)
|
|
7
|
+
- Instance Normalization (InstanceNorm)
|
|
8
|
+
- Group Normalization (GroupNorm)
|
|
9
|
+
- Weight Normalization
|
|
10
|
+
- Spectral Normalization
|
|
11
|
+
|
|
12
|
+
All operations support batched inputs and are optimized for deep learning.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
from typing import Tuple, Optional
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# ============================================================================
|
|
20
|
+
# BATCH NORMALIZATION
|
|
21
|
+
# ============================================================================
|
|
22
|
+
|
|
23
|
+
def batch_norm_forward(
|
|
24
|
+
x: np.ndarray,
|
|
25
|
+
gamma: np.ndarray,
|
|
26
|
+
beta: np.ndarray,
|
|
27
|
+
running_mean: Optional[np.ndarray] = None,
|
|
28
|
+
running_var: Optional[np.ndarray] = None,
|
|
29
|
+
momentum: float = 0.9,
|
|
30
|
+
eps: float = 1e-5,
|
|
31
|
+
training: bool = True
|
|
32
|
+
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
|
33
|
+
"""
|
|
34
|
+
Batch Normalization forward pass
|
|
35
|
+
|
|
36
|
+
Normalizes across the batch dimension. Used in CNNs.
|
|
37
|
+
|
|
38
|
+
Formula:
|
|
39
|
+
μ_B = (1/m) Σ x_i
|
|
40
|
+
σ²_B = (1/m) Σ (x_i - μ_B)²
|
|
41
|
+
x̂ = (x - μ_B) / √(σ²_B + ε)
|
|
42
|
+
y = γ x̂ + β
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
x: Input tensor, shape (batch, channels, height, width) or (batch, features)
|
|
46
|
+
gamma: Scale parameter, shape (channels,) or (features,)
|
|
47
|
+
beta: Shift parameter, shape (channels,) or (features,)
|
|
48
|
+
running_mean: Running mean for inference
|
|
49
|
+
running_var: Running variance for inference
|
|
50
|
+
momentum: Momentum for running statistics
|
|
51
|
+
eps: Small constant for numerical stability
|
|
52
|
+
training: Whether in training mode
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Tuple of (output, updated_running_mean, updated_running_var)
|
|
56
|
+
|
|
57
|
+
Example:
|
|
58
|
+
>>> # For CNNs
|
|
59
|
+
>>> x = np.random.randn(32, 64, 28, 28) # (batch, channels, H, W)
|
|
60
|
+
>>> gamma = np.ones(64)
|
|
61
|
+
>>> beta = np.zeros(64)
|
|
62
|
+
>>> output, mean, var = batch_norm_forward(x, gamma, beta, training=True)
|
|
63
|
+
>>> print(output.shape) # (32, 64, 28, 28)
|
|
64
|
+
|
|
65
|
+
>>> # For fully connected
|
|
66
|
+
>>> x = np.random.randn(32, 256) # (batch, features)
|
|
67
|
+
>>> gamma = np.ones(256)
|
|
68
|
+
>>> beta = np.zeros(256)
|
|
69
|
+
>>> output, mean, var = batch_norm_forward(x, gamma, beta, training=True)
|
|
70
|
+
>>> print(output.shape) # (32, 256)
|
|
71
|
+
"""
|
|
72
|
+
if training:
|
|
73
|
+
# Calculate batch statistics
|
|
74
|
+
if x.ndim == 4: # Conv: (batch, channels, height, width)
|
|
75
|
+
# Mean and variance across batch, height, width
|
|
76
|
+
axes = (0, 2, 3)
|
|
77
|
+
mean = np.mean(x, axis=axes, keepdims=True)
|
|
78
|
+
var = np.var(x, axis=axes, keepdims=True)
|
|
79
|
+
|
|
80
|
+
# Squeeze for running stats
|
|
81
|
+
mean_squeeze = np.squeeze(mean, axis=(0, 2, 3))
|
|
82
|
+
var_squeeze = np.squeeze(var, axis=(0, 2, 3))
|
|
83
|
+
else: # FC: (batch, features)
|
|
84
|
+
axes = 0
|
|
85
|
+
mean = np.mean(x, axis=axes, keepdims=True)
|
|
86
|
+
var = np.var(x, axis=axes, keepdims=True)
|
|
87
|
+
|
|
88
|
+
mean_squeeze = np.squeeze(mean, axis=0)
|
|
89
|
+
var_squeeze = np.squeeze(var, axis=0)
|
|
90
|
+
|
|
91
|
+
# Update running statistics
|
|
92
|
+
if running_mean is None:
|
|
93
|
+
running_mean = mean_squeeze
|
|
94
|
+
else:
|
|
95
|
+
running_mean = momentum * running_mean + (1 - momentum) * mean_squeeze
|
|
96
|
+
|
|
97
|
+
if running_var is None:
|
|
98
|
+
running_var = var_squeeze
|
|
99
|
+
else:
|
|
100
|
+
running_var = momentum * running_var + (1 - momentum) * var_squeeze
|
|
101
|
+
|
|
102
|
+
# Normalize
|
|
103
|
+
x_normalized = (x - mean) / np.sqrt(var + eps)
|
|
104
|
+
else:
|
|
105
|
+
# Use running statistics for inference
|
|
106
|
+
if running_mean is None or running_var is None:
|
|
107
|
+
raise ValueError("Running statistics required for inference mode")
|
|
108
|
+
|
|
109
|
+
if x.ndim == 4:
|
|
110
|
+
mean = running_mean.reshape(1, -1, 1, 1)
|
|
111
|
+
var = running_var.reshape(1, -1, 1, 1)
|
|
112
|
+
else:
|
|
113
|
+
mean = running_mean.reshape(1, -1)
|
|
114
|
+
var = running_var.reshape(1, -1)
|
|
115
|
+
|
|
116
|
+
x_normalized = (x - mean) / np.sqrt(var + eps)
|
|
117
|
+
|
|
118
|
+
# Scale and shift
|
|
119
|
+
if x.ndim == 4:
|
|
120
|
+
gamma = gamma.reshape(1, -1, 1, 1)
|
|
121
|
+
beta = beta.reshape(1, -1, 1, 1)
|
|
122
|
+
else:
|
|
123
|
+
gamma = gamma.reshape(1, -1)
|
|
124
|
+
beta = beta.reshape(1, -1)
|
|
125
|
+
|
|
126
|
+
output = gamma * x_normalized + beta
|
|
127
|
+
|
|
128
|
+
return output, running_mean, running_var
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
# ============================================================================
|
|
132
|
+
# LAYER NORMALIZATION
|
|
133
|
+
# ============================================================================
|
|
134
|
+
|
|
135
|
+
def layer_norm_forward(
|
|
136
|
+
x: np.ndarray,
|
|
137
|
+
gamma: np.ndarray,
|
|
138
|
+
beta: np.ndarray,
|
|
139
|
+
eps: float = 1e-5
|
|
140
|
+
) -> np.ndarray:
|
|
141
|
+
"""
|
|
142
|
+
Layer Normalization forward pass
|
|
143
|
+
|
|
144
|
+
Normalizes across the feature dimension. Used in Transformers and RNNs.
|
|
145
|
+
|
|
146
|
+
Formula:
|
|
147
|
+
μ = (1/H) Σ x_i
|
|
148
|
+
σ² = (1/H) Σ (x_i - μ)²
|
|
149
|
+
x̂ = (x - μ) / √(σ² + ε)
|
|
150
|
+
y = γ x̂ + β
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
x: Input tensor, shape (batch, features) or (batch, seq_len, features)
|
|
154
|
+
gamma: Scale parameter, shape (features,)
|
|
155
|
+
beta: Shift parameter, shape (features,)
|
|
156
|
+
eps: Small constant for numerical stability
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
Normalized output with same shape as input
|
|
160
|
+
|
|
161
|
+
Example:
|
|
162
|
+
>>> # For Transformers
|
|
163
|
+
>>> x = np.random.randn(32, 10, 512) # (batch, seq_len, features)
|
|
164
|
+
>>> gamma = np.ones(512)
|
|
165
|
+
>>> beta = np.zeros(512)
|
|
166
|
+
>>> output = layer_norm_forward(x, gamma, beta)
|
|
167
|
+
>>> print(output.shape) # (32, 10, 512)
|
|
168
|
+
|
|
169
|
+
>>> # For fully connected
|
|
170
|
+
>>> x = np.random.randn(32, 256) # (batch, features)
|
|
171
|
+
>>> gamma = np.ones(256)
|
|
172
|
+
>>> beta = np.zeros(256)
|
|
173
|
+
>>> output = layer_norm_forward(x, gamma, beta)
|
|
174
|
+
>>> print(output.shape) # (32, 256)
|
|
175
|
+
"""
|
|
176
|
+
# Calculate mean and variance across feature dimension
|
|
177
|
+
if x.ndim == 3: # (batch, seq_len, features)
|
|
178
|
+
axes = -1
|
|
179
|
+
mean = np.mean(x, axis=axes, keepdims=True)
|
|
180
|
+
var = np.var(x, axis=axes, keepdims=True)
|
|
181
|
+
elif x.ndim == 2: # (batch, features)
|
|
182
|
+
axes = -1
|
|
183
|
+
mean = np.mean(x, axis=axes, keepdims=True)
|
|
184
|
+
var = np.var(x, axis=axes, keepdims=True)
|
|
185
|
+
else:
|
|
186
|
+
raise ValueError(f"Unsupported input shape: {x.shape}")
|
|
187
|
+
|
|
188
|
+
# Normalize
|
|
189
|
+
x_normalized = (x - mean) / np.sqrt(var + eps)
|
|
190
|
+
|
|
191
|
+
# Scale and shift
|
|
192
|
+
output = gamma * x_normalized + beta
|
|
193
|
+
|
|
194
|
+
return output
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
# ============================================================================
|
|
198
|
+
# INSTANCE NORMALIZATION
|
|
199
|
+
# ============================================================================
|
|
200
|
+
|
|
201
|
+
def instance_norm_forward(
|
|
202
|
+
x: np.ndarray,
|
|
203
|
+
gamma: np.ndarray,
|
|
204
|
+
beta: np.ndarray,
|
|
205
|
+
eps: float = 1e-5
|
|
206
|
+
) -> np.ndarray:
|
|
207
|
+
"""
|
|
208
|
+
Instance Normalization forward pass
|
|
209
|
+
|
|
210
|
+
Normalizes each sample and channel independently. Used in style transfer.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
x: Input tensor, shape (batch, channels, height, width)
|
|
214
|
+
gamma: Scale parameter, shape (channels,)
|
|
215
|
+
beta: Shift parameter, shape (channels,)
|
|
216
|
+
eps: Small constant for numerical stability
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
Normalized output with same shape as input
|
|
220
|
+
|
|
221
|
+
Example:
|
|
222
|
+
>>> x = np.random.randn(32, 64, 28, 28) # (batch, channels, H, W)
|
|
223
|
+
>>> gamma = np.ones(64)
|
|
224
|
+
>>> beta = np.zeros(64)
|
|
225
|
+
>>> output = instance_norm_forward(x, gamma, beta)
|
|
226
|
+
>>> print(output.shape) # (32, 64, 28, 28)
|
|
227
|
+
"""
|
|
228
|
+
if x.ndim != 4:
|
|
229
|
+
raise ValueError("Instance normalization requires 4D input (batch, channels, H, W)")
|
|
230
|
+
|
|
231
|
+
# Calculate mean and variance per instance, per channel
|
|
232
|
+
# Normalize across spatial dimensions (H, W)
|
|
233
|
+
axes = (2, 3)
|
|
234
|
+
mean = np.mean(x, axis=axes, keepdims=True)
|
|
235
|
+
var = np.var(x, axis=axes, keepdims=True)
|
|
236
|
+
|
|
237
|
+
# Normalize
|
|
238
|
+
x_normalized = (x - mean) / np.sqrt(var + eps)
|
|
239
|
+
|
|
240
|
+
# Scale and shift
|
|
241
|
+
gamma = gamma.reshape(1, -1, 1, 1)
|
|
242
|
+
beta = beta.reshape(1, -1, 1, 1)
|
|
243
|
+
output = gamma * x_normalized + beta
|
|
244
|
+
|
|
245
|
+
return output
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
# ============================================================================
|
|
249
|
+
# GROUP NORMALIZATION
|
|
250
|
+
# ============================================================================
|
|
251
|
+
|
|
252
|
+
def group_norm_forward(
|
|
253
|
+
x: np.ndarray,
|
|
254
|
+
gamma: np.ndarray,
|
|
255
|
+
beta: np.ndarray,
|
|
256
|
+
num_groups: int = 32,
|
|
257
|
+
eps: float = 1e-5
|
|
258
|
+
) -> np.ndarray:
|
|
259
|
+
"""
|
|
260
|
+
Group Normalization forward pass
|
|
261
|
+
|
|
262
|
+
Divides channels into groups and normalizes within each group.
|
|
263
|
+
Good for small batch sizes.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
x: Input tensor, shape (batch, channels, height, width)
|
|
267
|
+
gamma: Scale parameter, shape (channels,)
|
|
268
|
+
beta: Shift parameter, shape (channels,)
|
|
269
|
+
num_groups: Number of groups to divide channels into
|
|
270
|
+
eps: Small constant for numerical stability
|
|
271
|
+
|
|
272
|
+
Returns:
|
|
273
|
+
Normalized output with same shape as input
|
|
274
|
+
|
|
275
|
+
Example:
|
|
276
|
+
>>> x = np.random.randn(32, 64, 28, 28) # (batch, channels, H, W)
|
|
277
|
+
>>> gamma = np.ones(64)
|
|
278
|
+
>>> beta = np.zeros(64)
|
|
279
|
+
>>> output = group_norm_forward(x, gamma, beta, num_groups=32)
|
|
280
|
+
>>> print(output.shape) # (32, 64, 28, 28)
|
|
281
|
+
"""
|
|
282
|
+
if x.ndim != 4:
|
|
283
|
+
raise ValueError("Group normalization requires 4D input (batch, channels, H, W)")
|
|
284
|
+
|
|
285
|
+
batch_size, channels, height, width = x.shape
|
|
286
|
+
|
|
287
|
+
if channels % num_groups != 0:
|
|
288
|
+
raise ValueError(f"Number of channels ({channels}) must be divisible by num_groups ({num_groups})")
|
|
289
|
+
|
|
290
|
+
# Reshape to separate groups
|
|
291
|
+
x_grouped = x.reshape(batch_size, num_groups, channels // num_groups, height, width)
|
|
292
|
+
|
|
293
|
+
# Calculate mean and variance per group
|
|
294
|
+
axes = (2, 3, 4)
|
|
295
|
+
mean = np.mean(x_grouped, axis=axes, keepdims=True)
|
|
296
|
+
var = np.var(x_grouped, axis=axes, keepdims=True)
|
|
297
|
+
|
|
298
|
+
# Normalize
|
|
299
|
+
x_normalized = (x_grouped - mean) / np.sqrt(var + eps)
|
|
300
|
+
|
|
301
|
+
# Reshape back
|
|
302
|
+
x_normalized = x_normalized.reshape(batch_size, channels, height, width)
|
|
303
|
+
|
|
304
|
+
# Scale and shift
|
|
305
|
+
gamma = gamma.reshape(1, -1, 1, 1)
|
|
306
|
+
beta = beta.reshape(1, -1, 1, 1)
|
|
307
|
+
output = gamma * x_normalized + beta
|
|
308
|
+
|
|
309
|
+
return output
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
# ============================================================================
|
|
313
|
+
# WEIGHT NORMALIZATION
|
|
314
|
+
# ============================================================================
|
|
315
|
+
|
|
316
|
+
def weight_norm(
|
|
317
|
+
weight: np.ndarray,
|
|
318
|
+
dim: int = 0
|
|
319
|
+
) -> Tuple[np.ndarray, np.ndarray]:
|
|
320
|
+
"""
|
|
321
|
+
Weight Normalization
|
|
322
|
+
|
|
323
|
+
Reparameterizes weight vectors to decouple magnitude and direction.
|
|
324
|
+
|
|
325
|
+
Formula:
|
|
326
|
+
w = g * (v / ||v||)
|
|
327
|
+
|
|
328
|
+
Args:
|
|
329
|
+
weight: Weight tensor
|
|
330
|
+
dim: Dimension along which to compute norm
|
|
331
|
+
|
|
332
|
+
Returns:
|
|
333
|
+
Tuple of (normalized_weight, norm)
|
|
334
|
+
|
|
335
|
+
Example:
|
|
336
|
+
>>> weight = np.random.randn(64, 128) # (out_features, in_features)
|
|
337
|
+
>>> w_normalized, g = weight_norm(weight, dim=1)
|
|
338
|
+
>>> print(w_normalized.shape) # (64, 128)
|
|
339
|
+
>>> print(g.shape) # (64, 1)
|
|
340
|
+
"""
|
|
341
|
+
# Calculate norm along specified dimension
|
|
342
|
+
norm = np.linalg.norm(weight, axis=dim, keepdims=True)
|
|
343
|
+
|
|
344
|
+
# Normalize
|
|
345
|
+
normalized_weight = weight / (norm + 1e-8)
|
|
346
|
+
|
|
347
|
+
return normalized_weight, norm
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
# ============================================================================
|
|
351
|
+
# SPECTRAL NORMALIZATION
|
|
352
|
+
# ============================================================================
|
|
353
|
+
|
|
354
|
+
def spectral_norm(
|
|
355
|
+
weight: np.ndarray,
|
|
356
|
+
num_iterations: int = 1
|
|
357
|
+
) -> np.ndarray:
|
|
358
|
+
"""
|
|
359
|
+
Spectral Normalization
|
|
360
|
+
|
|
361
|
+
Normalizes weight matrix by its largest singular value.
|
|
362
|
+
Used in GANs for training stability.
|
|
363
|
+
|
|
364
|
+
Args:
|
|
365
|
+
weight: Weight tensor, shape (out_features, in_features)
|
|
366
|
+
num_iterations: Number of power iterations
|
|
367
|
+
|
|
368
|
+
Returns:
|
|
369
|
+
Spectrally normalized weight
|
|
370
|
+
|
|
371
|
+
Example:
|
|
372
|
+
>>> weight = np.random.randn(64, 128)
|
|
373
|
+
>>> w_normalized = spectral_norm(weight, num_iterations=1)
|
|
374
|
+
>>> print(w_normalized.shape) # (64, 128)
|
|
375
|
+
"""
|
|
376
|
+
# Reshape weight to 2D if needed
|
|
377
|
+
original_shape = weight.shape
|
|
378
|
+
if weight.ndim > 2:
|
|
379
|
+
weight = weight.reshape(weight.shape[0], -1)
|
|
380
|
+
|
|
381
|
+
# Power iteration to estimate largest singular value
|
|
382
|
+
u = np.random.randn(weight.shape[0])
|
|
383
|
+
u = u / np.linalg.norm(u)
|
|
384
|
+
|
|
385
|
+
for _ in range(num_iterations):
|
|
386
|
+
v = np.dot(weight.T, u)
|
|
387
|
+
v = v / np.linalg.norm(v)
|
|
388
|
+
u = np.dot(weight, v)
|
|
389
|
+
u = u / np.linalg.norm(u)
|
|
390
|
+
|
|
391
|
+
# Calculate spectral norm (largest singular value)
|
|
392
|
+
sigma = np.dot(u, np.dot(weight, v))
|
|
393
|
+
|
|
394
|
+
# Normalize by spectral norm
|
|
395
|
+
normalized_weight = weight / sigma
|
|
396
|
+
|
|
397
|
+
# Reshape back to original shape
|
|
398
|
+
normalized_weight = normalized_weight.reshape(original_shape)
|
|
399
|
+
|
|
400
|
+
return normalized_weight
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
# ============================================================================
|
|
404
|
+
# UTILITY FUNCTIONS
|
|
405
|
+
# ============================================================================
|
|
406
|
+
|
|
407
|
+
def initialize_norm_params(
|
|
408
|
+
num_features: int
|
|
409
|
+
) -> Tuple[np.ndarray, np.ndarray]:
|
|
410
|
+
"""
|
|
411
|
+
Initialize normalization parameters
|
|
412
|
+
|
|
413
|
+
Args:
|
|
414
|
+
num_features: Number of features/channels
|
|
415
|
+
|
|
416
|
+
Returns:
|
|
417
|
+
Tuple of (gamma, beta)
|
|
418
|
+
- gamma: Scale parameter initialized to 1
|
|
419
|
+
- beta: Shift parameter initialized to 0
|
|
420
|
+
|
|
421
|
+
Example:
|
|
422
|
+
>>> gamma, beta = initialize_norm_params(256)
|
|
423
|
+
>>> print(gamma.shape, beta.shape) # (256,) (256,)
|
|
424
|
+
"""
|
|
425
|
+
gamma = np.ones(num_features)
|
|
426
|
+
beta = np.zeros(num_features)
|
|
427
|
+
return gamma, beta
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def compute_norm_stats(
|
|
431
|
+
x: np.ndarray,
|
|
432
|
+
norm_type: str = 'batch'
|
|
433
|
+
) -> Tuple[np.ndarray, np.ndarray]:
|
|
434
|
+
"""
|
|
435
|
+
Compute normalization statistics
|
|
436
|
+
|
|
437
|
+
Args:
|
|
438
|
+
x: Input tensor
|
|
439
|
+
norm_type: Type of normalization ('batch', 'layer', 'instance', 'group')
|
|
440
|
+
|
|
441
|
+
Returns:
|
|
442
|
+
Tuple of (mean, variance)
|
|
443
|
+
|
|
444
|
+
Example:
|
|
445
|
+
>>> x = np.random.randn(32, 64, 28, 28)
|
|
446
|
+
>>> mean, var = compute_norm_stats(x, norm_type='batch')
|
|
447
|
+
>>> print(mean.shape, var.shape)
|
|
448
|
+
"""
|
|
449
|
+
if norm_type == 'batch':
|
|
450
|
+
if x.ndim == 4:
|
|
451
|
+
axes = (0, 2, 3)
|
|
452
|
+
else:
|
|
453
|
+
axes = 0
|
|
454
|
+
elif norm_type == 'layer':
|
|
455
|
+
axes = -1
|
|
456
|
+
elif norm_type == 'instance':
|
|
457
|
+
axes = (2, 3)
|
|
458
|
+
else:
|
|
459
|
+
raise ValueError(f"Unknown norm_type: {norm_type}")
|
|
460
|
+
|
|
461
|
+
mean = np.mean(x, axis=axes, keepdims=True)
|
|
462
|
+
var = np.var(x, axis=axes, keepdims=True)
|
|
463
|
+
|
|
464
|
+
return mean, var
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
# Aliases for convenience
|
|
468
|
+
batch_norm = batch_norm_forward
|
|
469
|
+
layer_norm = layer_norm_forward
|
|
470
|
+
instance_norm = instance_norm_forward
|
|
471
|
+
group_norm = group_norm_forward
|