ilovetools 0.2.24__tar.gz → 0.2.25__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ilovetools-0.2.24/ilovetools.egg-info → ilovetools-0.2.25}/PKG-INFO +2 -2
- ilovetools-0.2.25/ilovetools/ml/weight_init.py +564 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25/ilovetools.egg-info}/PKG-INFO +2 -2
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools.egg-info/SOURCES.txt +2 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/pyproject.toml +2 -2
- {ilovetools-0.2.24 → ilovetools-0.2.25}/setup.py +2 -2
- ilovetools-0.2.25/tests/test_weight_init.py +539 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/LICENSE +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/MANIFEST.in +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/README.md +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/__init__.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ai/__init__.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ai/embeddings.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ai/inference.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ai/llm_helpers.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/audio/__init__.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/automation/__init__.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/automation/file_organizer.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/conversion/__init__.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/conversion/config_converter.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/conversion/config_converter_fixed_header.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/data/__init__.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/data/feature_engineering.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/data/preprocessing.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/database/__init__.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/datetime/__init__.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/email/__init__.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/email/template_engine.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/files/__init__.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/image/__init__.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ml/__init__.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ml/activations.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ml/anomaly_detection.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ml/attention.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ml/clustering.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ml/cnn.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ml/cross_validation.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ml/dimensionality.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ml/ensemble.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ml/feature_selection.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ml/gradient_descent.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ml/imbalanced.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ml/interpretation.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ml/loss_functions.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ml/lr_schedulers.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ml/metrics.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ml/neural_network.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ml/normalization.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ml/normalization_advanced.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ml/optimizers.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ml/pipeline.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ml/positional_encoding.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ml/regularization.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ml/rnn.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ml/timeseries.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/ml/tuning.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/security/__init__.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/security/password_checker.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/text/__init__.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/utils/__init__.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/utils/cache_system.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/utils/logger.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/utils/rate_limiter.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/utils/retry.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/validation/__init__.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/validation/data_validator.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/web/__init__.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/web/scraper.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools/web/url_shortener.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools.egg-info/dependency_links.txt +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools.egg-info/requires.txt +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/ilovetools.egg-info/top_level.txt +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/requirements.txt +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/setup.cfg +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/tests/__init__.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/tests/test_activations.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/tests/test_attention.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/tests/test_cnn.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/tests/test_gradient_descent.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/tests/test_loss_functions.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/tests/test_lr_schedulers.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/tests/test_neural_network.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/tests/test_normalization.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/tests/test_normalization_advanced.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/tests/test_optimizers.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/tests/test_positional_encoding.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/tests/test_pypi_installation.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/tests/test_regularization.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/tests/test_rnn.py +0 -0
- {ilovetools-0.2.24 → ilovetools-0.2.25}/tests/verify_positional_encoding.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ilovetools
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.25
|
|
4
4
|
Summary: A comprehensive Python utility library with modular tools for AI/ML, data processing, and daily programming needs
|
|
5
5
|
Home-page: https://github.com/AliMehdi512/ilovetools
|
|
6
6
|
Author: Ali Mehdi
|
|
@@ -11,7 +11,7 @@ Project-URL: Repository, https://github.com/AliMehdi512/ilovetools
|
|
|
11
11
|
Project-URL: Issues, https://github.com/AliMehdi512/ilovetools/issues
|
|
12
12
|
Project-URL: Bug Reports, https://github.com/AliMehdi512/ilovetools/issues
|
|
13
13
|
Project-URL: Source, https://github.com/AliMehdi512/ilovetools
|
|
14
|
-
Keywords: utilities,tools,ai,ml,data-processing,automation,
|
|
14
|
+
Keywords: utilities,tools,ai,ml,data-processing,automation,weight-initialization,xavier-initialization,he-initialization,kaiming-initialization,deep-learning,neural-networks
|
|
15
15
|
Classifier: Development Status :: 3 - Alpha
|
|
16
16
|
Classifier: Intended Audience :: Developers
|
|
17
17
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
@@ -0,0 +1,564 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Weight Initialization Techniques Module
|
|
3
|
+
|
|
4
|
+
This module provides comprehensive implementations of weight initialization
|
|
5
|
+
strategies for training deep neural networks effectively.
|
|
6
|
+
|
|
7
|
+
Features:
|
|
8
|
+
- Xavier/Glorot Initialization (Uniform & Normal)
|
|
9
|
+
- He/Kaiming Initialization (Uniform & Normal)
|
|
10
|
+
- LeCun Initialization (Uniform & Normal)
|
|
11
|
+
- Orthogonal Initialization
|
|
12
|
+
- Identity Initialization
|
|
13
|
+
- Sparse Initialization
|
|
14
|
+
- Variance Scaling (Generalized)
|
|
15
|
+
- Constant Initialization
|
|
16
|
+
- Uniform Initialization
|
|
17
|
+
- Normal Initialization
|
|
18
|
+
|
|
19
|
+
Author: Ali Mehdi
|
|
20
|
+
License: MIT
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import numpy as np
|
|
24
|
+
from typing import Tuple, Optional, Union
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# ============================================================================
|
|
28
|
+
# XAVIER/GLOROT INITIALIZATION
|
|
29
|
+
# ============================================================================
|
|
30
|
+
|
|
31
|
+
def xavier_uniform(shape: Tuple[int, ...], gain: float = 1.0) -> np.ndarray:
|
|
32
|
+
"""
|
|
33
|
+
Xavier/Glorot Uniform Initialization
|
|
34
|
+
|
|
35
|
+
Samples weights from uniform distribution U(-a, a) where:
|
|
36
|
+
a = gain * sqrt(6 / (fan_in + fan_out))
|
|
37
|
+
|
|
38
|
+
Best for: sigmoid, tanh activations
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
shape: Shape of weight tensor (e.g., (input_size, output_size))
|
|
42
|
+
gain: Scaling factor (default: 1.0)
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
Initialized weight array
|
|
46
|
+
|
|
47
|
+
Reference:
|
|
48
|
+
"Understanding the difficulty of training deep feedforward neural networks"
|
|
49
|
+
- Glorot & Bengio (2010)
|
|
50
|
+
"""
|
|
51
|
+
fan_in, fan_out = _calculate_fan_in_fan_out(shape)
|
|
52
|
+
std = gain * np.sqrt(6.0 / (fan_in + fan_out))
|
|
53
|
+
return np.random.uniform(-std, std, size=shape)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def xavier_normal(shape: Tuple[int, ...], gain: float = 1.0) -> np.ndarray:
|
|
57
|
+
"""
|
|
58
|
+
Xavier/Glorot Normal Initialization
|
|
59
|
+
|
|
60
|
+
Samples weights from normal distribution N(0, std^2) where:
|
|
61
|
+
std = gain * sqrt(2 / (fan_in + fan_out))
|
|
62
|
+
|
|
63
|
+
Best for: sigmoid, tanh activations
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
shape: Shape of weight tensor
|
|
67
|
+
gain: Scaling factor (default: 1.0)
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
Initialized weight array
|
|
71
|
+
"""
|
|
72
|
+
fan_in, fan_out = _calculate_fan_in_fan_out(shape)
|
|
73
|
+
std = gain * np.sqrt(2.0 / (fan_in + fan_out))
|
|
74
|
+
return np.random.normal(0, std, size=shape)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# ============================================================================
|
|
78
|
+
# HE/KAIMING INITIALIZATION
|
|
79
|
+
# ============================================================================
|
|
80
|
+
|
|
81
|
+
def he_uniform(shape: Tuple[int, ...], gain: float = np.sqrt(2.0)) -> np.ndarray:
|
|
82
|
+
"""
|
|
83
|
+
He/Kaiming Uniform Initialization
|
|
84
|
+
|
|
85
|
+
Samples weights from uniform distribution U(-a, a) where:
|
|
86
|
+
a = gain * sqrt(3 / fan_in)
|
|
87
|
+
|
|
88
|
+
Best for: ReLU, LeakyReLU, PReLU activations
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
shape: Shape of weight tensor
|
|
92
|
+
gain: Scaling factor (default: sqrt(2) for ReLU)
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
Initialized weight array
|
|
96
|
+
|
|
97
|
+
Reference:
|
|
98
|
+
"Delving Deep into Rectifiers: Surpassing Human-Level Performance"
|
|
99
|
+
- He et al. (2015)
|
|
100
|
+
"""
|
|
101
|
+
fan_in, _ = _calculate_fan_in_fan_out(shape)
|
|
102
|
+
std = gain * np.sqrt(3.0 / fan_in)
|
|
103
|
+
return np.random.uniform(-std, std, size=shape)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def he_normal(shape: Tuple[int, ...], gain: float = np.sqrt(2.0)) -> np.ndarray:
|
|
107
|
+
"""
|
|
108
|
+
He/Kaiming Normal Initialization
|
|
109
|
+
|
|
110
|
+
Samples weights from normal distribution N(0, std^2) where:
|
|
111
|
+
std = gain * sqrt(1 / fan_in)
|
|
112
|
+
|
|
113
|
+
Best for: ReLU, LeakyReLU, PReLU activations
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
shape: Shape of weight tensor
|
|
117
|
+
gain: Scaling factor (default: sqrt(2) for ReLU)
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
Initialized weight array
|
|
121
|
+
"""
|
|
122
|
+
fan_in, _ = _calculate_fan_in_fan_out(shape)
|
|
123
|
+
std = gain / np.sqrt(fan_in)
|
|
124
|
+
return np.random.normal(0, std, size=shape)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# ============================================================================
|
|
128
|
+
# LECUN INITIALIZATION
|
|
129
|
+
# ============================================================================
|
|
130
|
+
|
|
131
|
+
def lecun_uniform(shape: Tuple[int, ...]) -> np.ndarray:
|
|
132
|
+
"""
|
|
133
|
+
LeCun Uniform Initialization
|
|
134
|
+
|
|
135
|
+
Samples weights from uniform distribution U(-a, a) where:
|
|
136
|
+
a = sqrt(3 / fan_in)
|
|
137
|
+
|
|
138
|
+
Best for: SELU activation (self-normalizing networks)
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
shape: Shape of weight tensor
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
Initialized weight array
|
|
145
|
+
|
|
146
|
+
Reference:
|
|
147
|
+
"Efficient BackProp" - LeCun et al. (1998)
|
|
148
|
+
"""
|
|
149
|
+
fan_in, _ = _calculate_fan_in_fan_out(shape)
|
|
150
|
+
std = np.sqrt(3.0 / fan_in)
|
|
151
|
+
return np.random.uniform(-std, std, size=shape)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def lecun_normal(shape: Tuple[int, ...]) -> np.ndarray:
|
|
155
|
+
"""
|
|
156
|
+
LeCun Normal Initialization
|
|
157
|
+
|
|
158
|
+
Samples weights from normal distribution N(0, std^2) where:
|
|
159
|
+
std = sqrt(1 / fan_in)
|
|
160
|
+
|
|
161
|
+
Best for: SELU activation (self-normalizing networks)
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
shape: Shape of weight tensor
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
Initialized weight array
|
|
168
|
+
"""
|
|
169
|
+
fan_in, _ = _calculate_fan_in_fan_out(shape)
|
|
170
|
+
std = 1.0 / np.sqrt(fan_in)
|
|
171
|
+
return np.random.normal(0, std, size=shape)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
# ============================================================================
|
|
175
|
+
# ORTHOGONAL INITIALIZATION
|
|
176
|
+
# ============================================================================
|
|
177
|
+
|
|
178
|
+
def orthogonal(shape: Tuple[int, ...], gain: float = 1.0) -> np.ndarray:
|
|
179
|
+
"""
|
|
180
|
+
Orthogonal Initialization
|
|
181
|
+
|
|
182
|
+
Initializes weights as (semi-)orthogonal matrix.
|
|
183
|
+
Useful for RNNs and very deep networks.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
shape: Shape of weight tensor
|
|
187
|
+
gain: Scaling factor (default: 1.0)
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
Initialized weight array
|
|
191
|
+
|
|
192
|
+
Reference:
|
|
193
|
+
"Exact solutions to the nonlinear dynamics of learning in deep linear neural networks"
|
|
194
|
+
- Saxe et al. (2013)
|
|
195
|
+
"""
|
|
196
|
+
if len(shape) < 2:
|
|
197
|
+
raise ValueError("Orthogonal initialization requires at least 2D tensor")
|
|
198
|
+
|
|
199
|
+
# Flatten to 2D
|
|
200
|
+
rows = shape[0]
|
|
201
|
+
cols = np.prod(shape[1:])
|
|
202
|
+
flat_shape = (rows, cols)
|
|
203
|
+
|
|
204
|
+
# Generate random matrix
|
|
205
|
+
a = np.random.normal(0, 1, flat_shape)
|
|
206
|
+
|
|
207
|
+
# QR decomposition
|
|
208
|
+
q, r = np.linalg.qr(a)
|
|
209
|
+
|
|
210
|
+
# Make Q uniform
|
|
211
|
+
d = np.diag(r)
|
|
212
|
+
q *= np.sign(d)
|
|
213
|
+
|
|
214
|
+
# Scale by gain
|
|
215
|
+
q *= gain
|
|
216
|
+
|
|
217
|
+
# Reshape to original shape
|
|
218
|
+
return q.reshape(shape)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
# ============================================================================
|
|
222
|
+
# VARIANCE SCALING INITIALIZATION
|
|
223
|
+
# ============================================================================
|
|
224
|
+
|
|
225
|
+
def variance_scaling(
|
|
226
|
+
shape: Tuple[int, ...],
|
|
227
|
+
scale: float = 1.0,
|
|
228
|
+
mode: str = 'fan_in',
|
|
229
|
+
distribution: str = 'normal'
|
|
230
|
+
) -> np.ndarray:
|
|
231
|
+
"""
|
|
232
|
+
Variance Scaling Initialization (Generalized)
|
|
233
|
+
|
|
234
|
+
Flexible initialization that generalizes Xavier and He methods.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
shape: Shape of weight tensor
|
|
238
|
+
scale: Scaling factor
|
|
239
|
+
mode: 'fan_in', 'fan_out', or 'fan_avg'
|
|
240
|
+
distribution: 'normal' or 'uniform'
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
Initialized weight array
|
|
244
|
+
|
|
245
|
+
Examples:
|
|
246
|
+
Xavier Normal: variance_scaling(shape, scale=1.0, mode='fan_avg')
|
|
247
|
+
He Normal: variance_scaling(shape, scale=2.0, mode='fan_in')
|
|
248
|
+
"""
|
|
249
|
+
fan_in, fan_out = _calculate_fan_in_fan_out(shape)
|
|
250
|
+
|
|
251
|
+
if mode == 'fan_in':
|
|
252
|
+
denominator = fan_in
|
|
253
|
+
elif mode == 'fan_out':
|
|
254
|
+
denominator = fan_out
|
|
255
|
+
elif mode == 'fan_avg':
|
|
256
|
+
denominator = (fan_in + fan_out) / 2.0
|
|
257
|
+
else:
|
|
258
|
+
raise ValueError(f"Invalid mode: {mode}")
|
|
259
|
+
|
|
260
|
+
variance = scale / denominator
|
|
261
|
+
|
|
262
|
+
if distribution == 'normal':
|
|
263
|
+
std = np.sqrt(variance)
|
|
264
|
+
return np.random.normal(0, std, size=shape)
|
|
265
|
+
elif distribution == 'uniform':
|
|
266
|
+
limit = np.sqrt(3.0 * variance)
|
|
267
|
+
return np.random.uniform(-limit, limit, size=shape)
|
|
268
|
+
else:
|
|
269
|
+
raise ValueError(f"Invalid distribution: {distribution}")
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
# ============================================================================
|
|
273
|
+
# SPARSE INITIALIZATION
|
|
274
|
+
# ============================================================================
|
|
275
|
+
|
|
276
|
+
def sparse(shape: Tuple[int, ...], sparsity: float = 0.1, std: float = 0.01) -> np.ndarray:
|
|
277
|
+
"""
|
|
278
|
+
Sparse Initialization
|
|
279
|
+
|
|
280
|
+
Initializes weights with specified sparsity (fraction of zeros).
|
|
281
|
+
Non-zero weights are sampled from N(0, std^2).
|
|
282
|
+
|
|
283
|
+
Args:
|
|
284
|
+
shape: Shape of weight tensor
|
|
285
|
+
sparsity: Fraction of weights to set to zero (0 to 1)
|
|
286
|
+
std: Standard deviation for non-zero weights
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
Initialized weight array
|
|
290
|
+
"""
|
|
291
|
+
if not 0 <= sparsity <= 1:
|
|
292
|
+
raise ValueError("Sparsity must be between 0 and 1")
|
|
293
|
+
|
|
294
|
+
weights = np.random.normal(0, std, size=shape)
|
|
295
|
+
mask = np.random.random(shape) < sparsity
|
|
296
|
+
weights[mask] = 0
|
|
297
|
+
return weights
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
# ============================================================================
|
|
301
|
+
# IDENTITY INITIALIZATION
|
|
302
|
+
# ============================================================================
|
|
303
|
+
|
|
304
|
+
def identity(shape: Tuple[int, ...], gain: float = 1.0) -> np.ndarray:
|
|
305
|
+
"""
|
|
306
|
+
Identity Initialization
|
|
307
|
+
|
|
308
|
+
Initializes weights as identity matrix (or close to it).
|
|
309
|
+
Useful for residual connections and skip connections.
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
shape: Shape of weight tensor (must be square or have square leading dims)
|
|
313
|
+
gain: Scaling factor
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
Initialized weight array
|
|
317
|
+
"""
|
|
318
|
+
if len(shape) < 2:
|
|
319
|
+
raise ValueError("Identity initialization requires at least 2D tensor")
|
|
320
|
+
|
|
321
|
+
if shape[0] != shape[1]:
|
|
322
|
+
raise ValueError("Identity initialization requires square matrix")
|
|
323
|
+
|
|
324
|
+
weights = np.eye(shape[0], shape[1]) * gain
|
|
325
|
+
|
|
326
|
+
# If more dimensions, tile the identity
|
|
327
|
+
if len(shape) > 2:
|
|
328
|
+
weights = np.tile(weights.reshape(shape[0], shape[1], 1),
|
|
329
|
+
(1, 1) + shape[2:])
|
|
330
|
+
|
|
331
|
+
return weights
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
# ============================================================================
|
|
335
|
+
# SIMPLE INITIALIZATIONS
|
|
336
|
+
# ============================================================================
|
|
337
|
+
|
|
338
|
+
def constant(shape: Tuple[int, ...], value: float = 0.0) -> np.ndarray:
|
|
339
|
+
"""
|
|
340
|
+
Constant Initialization
|
|
341
|
+
|
|
342
|
+
Initializes all weights to a constant value.
|
|
343
|
+
|
|
344
|
+
Args:
|
|
345
|
+
shape: Shape of weight tensor
|
|
346
|
+
value: Constant value (default: 0.0)
|
|
347
|
+
|
|
348
|
+
Returns:
|
|
349
|
+
Initialized weight array
|
|
350
|
+
"""
|
|
351
|
+
return np.full(shape, value, dtype=np.float32)
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def uniform(shape: Tuple[int, ...], low: float = -0.05, high: float = 0.05) -> np.ndarray:
|
|
355
|
+
"""
|
|
356
|
+
Uniform Initialization
|
|
357
|
+
|
|
358
|
+
Samples weights uniformly from [low, high].
|
|
359
|
+
|
|
360
|
+
Args:
|
|
361
|
+
shape: Shape of weight tensor
|
|
362
|
+
low: Lower bound
|
|
363
|
+
high: Upper bound
|
|
364
|
+
|
|
365
|
+
Returns:
|
|
366
|
+
Initialized weight array
|
|
367
|
+
"""
|
|
368
|
+
return np.random.uniform(low, high, size=shape)
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def normal(shape: Tuple[int, ...], mean: float = 0.0, std: float = 0.01) -> np.ndarray:
|
|
372
|
+
"""
|
|
373
|
+
Normal Initialization
|
|
374
|
+
|
|
375
|
+
Samples weights from normal distribution N(mean, std^2).
|
|
376
|
+
|
|
377
|
+
Args:
|
|
378
|
+
shape: Shape of weight tensor
|
|
379
|
+
mean: Mean of distribution
|
|
380
|
+
std: Standard deviation
|
|
381
|
+
|
|
382
|
+
Returns:
|
|
383
|
+
Initialized weight array
|
|
384
|
+
"""
|
|
385
|
+
return np.random.normal(mean, std, size=shape)
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
# ============================================================================
|
|
389
|
+
# UTILITY FUNCTIONS
|
|
390
|
+
# ============================================================================
|
|
391
|
+
|
|
392
|
+
def _calculate_fan_in_fan_out(shape: Tuple[int, ...]) -> Tuple[int, int]:
|
|
393
|
+
"""
|
|
394
|
+
Calculate fan_in and fan_out for a weight tensor
|
|
395
|
+
|
|
396
|
+
Args:
|
|
397
|
+
shape: Shape of weight tensor
|
|
398
|
+
|
|
399
|
+
Returns:
|
|
400
|
+
Tuple of (fan_in, fan_out)
|
|
401
|
+
"""
|
|
402
|
+
if len(shape) < 2:
|
|
403
|
+
raise ValueError("Weight tensor must have at least 2 dimensions")
|
|
404
|
+
|
|
405
|
+
if len(shape) == 2:
|
|
406
|
+
# Fully connected layer
|
|
407
|
+
fan_in = shape[0]
|
|
408
|
+
fan_out = shape[1]
|
|
409
|
+
else:
|
|
410
|
+
# Convolutional layer: (out_channels, in_channels, kernel_h, kernel_w)
|
|
411
|
+
receptive_field_size = np.prod(shape[2:])
|
|
412
|
+
fan_in = shape[1] * receptive_field_size
|
|
413
|
+
fan_out = shape[0] * receptive_field_size
|
|
414
|
+
|
|
415
|
+
return fan_in, fan_out
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
def calculate_gain(activation: str) -> float:
|
|
419
|
+
"""
|
|
420
|
+
Calculate recommended gain for different activation functions
|
|
421
|
+
|
|
422
|
+
Args:
|
|
423
|
+
activation: Name of activation function
|
|
424
|
+
|
|
425
|
+
Returns:
|
|
426
|
+
Recommended gain value
|
|
427
|
+
"""
|
|
428
|
+
gains = {
|
|
429
|
+
'linear': 1.0,
|
|
430
|
+
'sigmoid': 1.0,
|
|
431
|
+
'tanh': 5.0 / 3.0,
|
|
432
|
+
'relu': np.sqrt(2.0),
|
|
433
|
+
'leaky_relu': np.sqrt(2.0 / (1 + 0.01**2)),
|
|
434
|
+
'selu': 1.0,
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
activation = activation.lower()
|
|
438
|
+
if activation not in gains:
|
|
439
|
+
raise ValueError(f"Unknown activation: {activation}")
|
|
440
|
+
|
|
441
|
+
return gains[activation]
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
def get_initializer(
|
|
445
|
+
method: str,
|
|
446
|
+
shape: Tuple[int, ...],
|
|
447
|
+
**kwargs
|
|
448
|
+
) -> np.ndarray:
|
|
449
|
+
"""
|
|
450
|
+
Factory function to get initializer by name
|
|
451
|
+
|
|
452
|
+
Args:
|
|
453
|
+
method: Name of initialization method
|
|
454
|
+
shape: Shape of weight tensor
|
|
455
|
+
**kwargs: Additional method-specific arguments
|
|
456
|
+
|
|
457
|
+
Returns:
|
|
458
|
+
Initialized weight array
|
|
459
|
+
"""
|
|
460
|
+
initializers = {
|
|
461
|
+
'xavier_uniform': xavier_uniform,
|
|
462
|
+
'xavier_normal': xavier_normal,
|
|
463
|
+
'glorot_uniform': xavier_uniform,
|
|
464
|
+
'glorot_normal': xavier_normal,
|
|
465
|
+
'he_uniform': he_uniform,
|
|
466
|
+
'he_normal': he_normal,
|
|
467
|
+
'kaiming_uniform': he_uniform,
|
|
468
|
+
'kaiming_normal': he_normal,
|
|
469
|
+
'lecun_uniform': lecun_uniform,
|
|
470
|
+
'lecun_normal': lecun_normal,
|
|
471
|
+
'orthogonal': orthogonal,
|
|
472
|
+
'identity': identity,
|
|
473
|
+
'sparse': sparse,
|
|
474
|
+
'constant': constant,
|
|
475
|
+
'uniform': uniform,
|
|
476
|
+
'normal': normal,
|
|
477
|
+
'variance_scaling': variance_scaling,
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
method = method.lower()
|
|
481
|
+
if method not in initializers:
|
|
482
|
+
raise ValueError(f"Unknown initialization method: {method}")
|
|
483
|
+
|
|
484
|
+
return initializers[method](shape, **kwargs)
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
# ============================================================================
|
|
488
|
+
# WEIGHT INITIALIZER CLASS
|
|
489
|
+
# ============================================================================
|
|
490
|
+
|
|
491
|
+
class WeightInitializer:
|
|
492
|
+
"""
|
|
493
|
+
Weight Initializer Class
|
|
494
|
+
|
|
495
|
+
Provides a convenient interface for weight initialization
|
|
496
|
+
with support for different methods and configurations.
|
|
497
|
+
"""
|
|
498
|
+
|
|
499
|
+
def __init__(self, method: str = 'xavier_normal', **kwargs):
|
|
500
|
+
"""
|
|
501
|
+
Initialize WeightInitializer
|
|
502
|
+
|
|
503
|
+
Args:
|
|
504
|
+
method: Initialization method name
|
|
505
|
+
**kwargs: Method-specific parameters
|
|
506
|
+
"""
|
|
507
|
+
self.method = method
|
|
508
|
+
self.kwargs = kwargs
|
|
509
|
+
|
|
510
|
+
def initialize(self, shape: Tuple[int, ...]) -> np.ndarray:
|
|
511
|
+
"""
|
|
512
|
+
Initialize weights with specified shape
|
|
513
|
+
|
|
514
|
+
Args:
|
|
515
|
+
shape: Shape of weight tensor
|
|
516
|
+
|
|
517
|
+
Returns:
|
|
518
|
+
Initialized weight array
|
|
519
|
+
"""
|
|
520
|
+
return get_initializer(self.method, shape, **self.kwargs)
|
|
521
|
+
|
|
522
|
+
def __call__(self, shape: Tuple[int, ...]) -> np.ndarray:
|
|
523
|
+
"""Allow calling instance as function"""
|
|
524
|
+
return self.initialize(shape)
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
# ============================================================================
|
|
528
|
+
# ALIASES FOR CONVENIENCE
|
|
529
|
+
# ============================================================================
|
|
530
|
+
|
|
531
|
+
glorot_uniform = xavier_uniform
|
|
532
|
+
glorot_normal = xavier_normal
|
|
533
|
+
kaiming_uniform = he_uniform
|
|
534
|
+
kaiming_normal = he_normal
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
__all__ = [
|
|
538
|
+
# Xavier/Glorot
|
|
539
|
+
'xavier_uniform',
|
|
540
|
+
'xavier_normal',
|
|
541
|
+
'glorot_uniform',
|
|
542
|
+
'glorot_normal',
|
|
543
|
+
# He/Kaiming
|
|
544
|
+
'he_uniform',
|
|
545
|
+
'he_normal',
|
|
546
|
+
'kaiming_uniform',
|
|
547
|
+
'kaiming_normal',
|
|
548
|
+
# LeCun
|
|
549
|
+
'lecun_uniform',
|
|
550
|
+
'lecun_normal',
|
|
551
|
+
# Advanced
|
|
552
|
+
'orthogonal',
|
|
553
|
+
'identity',
|
|
554
|
+
'sparse',
|
|
555
|
+
'variance_scaling',
|
|
556
|
+
# Simple
|
|
557
|
+
'constant',
|
|
558
|
+
'uniform',
|
|
559
|
+
'normal',
|
|
560
|
+
# Utilities
|
|
561
|
+
'calculate_gain',
|
|
562
|
+
'get_initializer',
|
|
563
|
+
'WeightInitializer',
|
|
564
|
+
]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ilovetools
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.25
|
|
4
4
|
Summary: A comprehensive Python utility library with modular tools for AI/ML, data processing, and daily programming needs
|
|
5
5
|
Home-page: https://github.com/AliMehdi512/ilovetools
|
|
6
6
|
Author: Ali Mehdi
|
|
@@ -11,7 +11,7 @@ Project-URL: Repository, https://github.com/AliMehdi512/ilovetools
|
|
|
11
11
|
Project-URL: Issues, https://github.com/AliMehdi512/ilovetools/issues
|
|
12
12
|
Project-URL: Bug Reports, https://github.com/AliMehdi512/ilovetools/issues
|
|
13
13
|
Project-URL: Source, https://github.com/AliMehdi512/ilovetools
|
|
14
|
-
Keywords: utilities,tools,ai,ml,data-processing,automation,
|
|
14
|
+
Keywords: utilities,tools,ai,ml,data-processing,automation,weight-initialization,xavier-initialization,he-initialization,kaiming-initialization,deep-learning,neural-networks
|
|
15
15
|
Classifier: Development Status :: 3 - Alpha
|
|
16
16
|
Classifier: Intended Audience :: Developers
|
|
17
17
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
@@ -55,6 +55,7 @@ ilovetools/ml/regularization.py
|
|
|
55
55
|
ilovetools/ml/rnn.py
|
|
56
56
|
ilovetools/ml/timeseries.py
|
|
57
57
|
ilovetools/ml/tuning.py
|
|
58
|
+
ilovetools/ml/weight_init.py
|
|
58
59
|
ilovetools/security/__init__.py
|
|
59
60
|
ilovetools/security/password_checker.py
|
|
60
61
|
ilovetools/text/__init__.py
|
|
@@ -83,4 +84,5 @@ tests/test_positional_encoding.py
|
|
|
83
84
|
tests/test_pypi_installation.py
|
|
84
85
|
tests/test_regularization.py
|
|
85
86
|
tests/test_rnn.py
|
|
87
|
+
tests/test_weight_init.py
|
|
86
88
|
tests/verify_positional_encoding.py
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "ilovetools"
|
|
7
|
-
version = "0.2.
|
|
7
|
+
version = "0.2.25"
|
|
8
8
|
description = "A comprehensive Python utility library with modular tools for AI/ML, data processing, and daily programming needs"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.8"
|
|
@@ -12,7 +12,7 @@ license = "MIT"
|
|
|
12
12
|
authors = [
|
|
13
13
|
{name = "Ali Mehdi", email = "ali.mehdi.dev579@gmail.com"}
|
|
14
14
|
]
|
|
15
|
-
keywords = ["utilities", "tools", "ai", "ml", "data-processing", "automation", "
|
|
15
|
+
keywords = ["utilities", "tools", "ai", "ml", "data-processing", "automation", "weight-initialization", "xavier-initialization", "he-initialization", "kaiming-initialization", "deep-learning", "neural-networks"]
|
|
16
16
|
classifiers = [
|
|
17
17
|
"Development Status :: 3 - Alpha",
|
|
18
18
|
"Intended Audience :: Developers",
|
|
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
|
|
|
5
5
|
|
|
6
6
|
setup(
|
|
7
7
|
name="ilovetools",
|
|
8
|
-
version="0.2.
|
|
8
|
+
version="0.2.25",
|
|
9
9
|
author="Ali Mehdi",
|
|
10
10
|
author_email="ali.mehdi.dev579@gmail.com",
|
|
11
11
|
description="A comprehensive Python utility library with modular tools for AI/ML, data processing, and daily programming needs",
|
|
@@ -57,7 +57,7 @@ setup(
|
|
|
57
57
|
"soundfile>=0.12.0",
|
|
58
58
|
],
|
|
59
59
|
},
|
|
60
|
-
keywords="utilities, tools, ai, ml, data-processing, automation, python-library, neural-networks,
|
|
60
|
+
keywords="utilities, tools, ai, ml, data-processing, automation, python-library, neural-networks, weight-initialization, xavier-initialization, he-initialization, kaiming-initialization, deep-learning",
|
|
61
61
|
project_urls={
|
|
62
62
|
"Bug Reports": "https://github.com/AliMehdi512/ilovetools/issues",
|
|
63
63
|
"Source": "https://github.com/AliMehdi512/ilovetools",
|