noshot 11.0.0__py3-none-any.whl → 13.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- noshot/data/DLE FSD BDA/DLE/DLE 1 (Json)/1. DNN (Image Classification).ipynb +389 -0
- noshot/data/DLE FSD BDA/DLE/DLE 1 (Json)/2. DNN vs CNN.ipynb +516 -0
- noshot/data/DLE FSD BDA/DLE/DLE 1 (Json)/3. CNN (Object Detecrion).ipynb +259 -0
- noshot/data/DLE FSD BDA/DLE/DLE 1 (Json)/4. FCN (Image Segmentaion).ipynb +274 -0
- noshot/data/DLE FSD BDA/DLE/DLE 2 (tim stan s)/1.1 DNN (Pytorch).ipynb +164 -0
- noshot/data/DLE FSD BDA/DLE/DLE 2 (tim stan s)/1.2 DNN (Tensorflow).ipynb +94 -0
- noshot/data/DLE FSD BDA/DLE/DLE 2 (tim stan s)/1.3 DNN (Image Classification).ipynb +134 -0
- noshot/data/DLE FSD BDA/DLE/DLE 2 (tim stan s)/2.1 DNN vs CNN.ipynb +127 -0
- noshot/data/DLE FSD BDA/DLE/DLE 2 (tim stan s)/2.2 DNN vs CNN.ipynb +123 -0
- noshot/data/DLE FSD BDA/DLE/DLE 2 (tim stan s)/4. FCNN (Image Segmentation).ipynb +108 -0
- noshot/data/DLE FSD BDA/DLE/DLE 2 (tim stan s)/Lab Excercise (Training DNN).ipynb +646 -0
- noshot/data/DLE FSD BDA/DLE/DLE 2 (tim stan s)/Load-Images.ipynb +553 -0
- noshot/data/DLE FSD BDA/DLE/DLE 3 (sonic boy)/Ex1.ipynb +216 -0
- noshot/data/DLE FSD BDA/DLE/DLE 3 (sonic boy)/Ex2.ipynb +195 -0
- noshot/data/DLE FSD BDA/DLE/DLE 3 (sonic boy)/Ex3.ipynb +427 -0
- noshot/data/DLE FSD BDA/DLE/DLE 3 (sonic boy)/Ex4.ipynb +186 -0
- noshot/data/DLE FSD BDA/DLE/DLE 4 (senior)/Exp01/DNN Ex No 1.ipynb +398 -0
- noshot/data/DLE FSD BDA/DLE/DLE 4 (senior)/Exp01/Ex No 1 Build in dataset.ipynb +171 -0
- noshot/data/DLE FSD BDA/DLE/DLE 4 (senior)/Exp01/Exp1-Short-DL_ANN_ImageClassification.ipynb +401 -0
- noshot/data/DLE FSD BDA/DLE/DLE 4 (senior)/Exp01/OR GATE .ipynb +8511 -0
- noshot/data/DLE FSD BDA/DLE/DLE 4 (senior)/Exp02/Exp2-Short-DL_CNN_ImageClassification.ipynb +737 -0
- noshot/data/DLE FSD BDA/DLE/DLE 4 (senior)/Exp03/DL-Ex3-RNN.ipynb +591 -0
- noshot/data/DLE FSD BDA/DLE/DLE 4 (senior)/Exp04/Ex no 4.ipynb +551 -0
- noshot/main.py +3 -3
- {noshot-11.0.0.dist-info → noshot-13.0.0.dist-info}/METADATA +1 -1
- noshot-13.0.0.dist-info/RECORD +32 -0
- noshot/data/ML TS XAI/ML/CNN(Image_for_Folders_5).ipynb +0 -201
- noshot/data/ML TS XAI/ML/CNN(Image_form_Folder_2).ipynb +0 -201
- noshot/data/ML TS XAI/ML/Json Codes/ML LAB CIA 2.ipynb +0 -409
- noshot/data/ML TS XAI/ML/ML 1/1. EDA-PCA (Balance Scale Dataset).ipynb +0 -147
- noshot/data/ML TS XAI/ML/ML 1/1. EDA-PCA (Rice Dataset).ipynb +0 -181
- noshot/data/ML TS XAI/ML/ML 1/10. HMM Veterbi.ipynb +0 -152
- noshot/data/ML TS XAI/ML/ML 1/2. KNN (Balance Scale Dataset).ipynb +0 -117
- noshot/data/ML TS XAI/ML/ML 1/2. KNN (Iris Dataset).ipynb +0 -156
- noshot/data/ML TS XAI/ML/ML 1/2. KNN (Sobar-72 Dataset).ipynb +0 -215
- noshot/data/ML TS XAI/ML/ML 1/3. LDA (Balance Scale Dataset).ipynb +0 -78
- noshot/data/ML TS XAI/ML/ML 1/3. LDA (NPHA Doctor Visits Dataset).ipynb +0 -114
- noshot/data/ML TS XAI/ML/ML 1/4. Linear Regression (Machine Dataset).ipynb +0 -115
- noshot/data/ML TS XAI/ML/ML 1/4. Linear Regression (Real Estate Dataset).ipynb +0 -146
- noshot/data/ML TS XAI/ML/ML 1/5. Logistic Regression (Magic04 Dataset).ipynb +0 -130
- noshot/data/ML TS XAI/ML/ML 1/5. Logistic Regression (Wine Dataset).ipynb +0 -112
- noshot/data/ML TS XAI/ML/ML 1/6. Naive Bayes Classifier (Agaricus Lepiota Dataset).ipynb +0 -118
- noshot/data/ML TS XAI/ML/ML 1/6. Naive Bayes Classifier (Wine Dataset).ipynb +0 -89
- noshot/data/ML TS XAI/ML/ML 1/7. SVM (Rice Dataset).ipynb +0 -120
- noshot/data/ML TS XAI/ML/ML 1/8. FeedForward NN (Sobar72 Dataset).ipynb +0 -262
- noshot/data/ML TS XAI/ML/ML 1/9. CNN (Cifar10 Dataset).ipynb +0 -156
- noshot/data/ML TS XAI/ML/ML 2/1. PCA.ipynb +0 -162
- noshot/data/ML TS XAI/ML/ML 2/10. CNN.ipynb +0 -100
- noshot/data/ML TS XAI/ML/ML 2/11. HMM.ipynb +0 -336
- noshot/data/ML TS XAI/ML/ML 2/2. KNN.ipynb +0 -149
- noshot/data/ML TS XAI/ML/ML 2/3. LDA.ipynb +0 -132
- noshot/data/ML TS XAI/ML/ML 2/4. Linear Regression.ipynb +0 -86
- noshot/data/ML TS XAI/ML/ML 2/5. Logistic Regression.ipynb +0 -115
- noshot/data/ML TS XAI/ML/ML 2/6. Naive Bayes (Titanic).ipynb +0 -196
- noshot/data/ML TS XAI/ML/ML 2/6. Naive Bayes (Wine).ipynb +0 -98
- noshot/data/ML TS XAI/ML/ML 2/7. SVM Linear.ipynb +0 -109
- noshot/data/ML TS XAI/ML/ML 2/8. SVM Non-Linear.ipynb +0 -195
- noshot/data/ML TS XAI/ML/ML 2/9. FNN With Regularization.ipynb +0 -189
- noshot/data/ML TS XAI/ML/ML 2/9. FNN Without Regularization.ipynb +0 -197
- noshot/data/ML TS XAI/ML/ML 2/All in One Lab CIA 1 Q.ipynb +0 -1087
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/1. PCA EDA.ipynb +0 -274
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/10. CNN.ipynb +0 -170
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/11. HMM 2.ipynb +0 -1087
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/11. HMM 3.ipynb +0 -178
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/11. HMM 4.ipynb +0 -185
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/11. HMM.ipynb +0 -106
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/2. KNN.ipynb +0 -177
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/3. LDA.ipynb +0 -195
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/4. Linear Regression.ipynb +0 -267
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/5. Logistic Regression.ipynb +0 -104
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/6. Bayesian Classifier.ipynb +0 -109
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/7. SVM.ipynb +0 -220
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/8. MLP.ipynb +0 -99
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/9. Ridge - Lasso.ipynb +0 -211
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/9. Ridge Lasso 2.ipynb +0 -99
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/Image Load Example.ipynb +0 -118
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/Updated_Untitled.ipynb +0 -603
- noshot/data/ML TS XAI/ML/ML Lab AllinOne.ipynb +0 -961
- noshot/data/ML TS XAI/ML/ML Lab H Sec/1. Iris Dataset (Softmax vs Sigmoid).ipynb +0 -231
- noshot/data/ML TS XAI/ML/ML Lab H Sec/2. Student Dataset (Overfit vs Regularized).ipynb +0 -269
- noshot/data/ML TS XAI/ML/ML Lab H Sec/3. Insurance Target Categorical (Overfit vs Regularized).ipynb +0 -274
- noshot/data/ML TS XAI/ML/ML Lab H Sec/3. Insurance Target Numerical (Overfit vs Regularized).ipynb +0 -263
- noshot/data/ML TS XAI/ML/ML Lab H Sec/4. Smart House System HMM.ipynb +0 -198
- noshot/data/ML TS XAI/ML/ML Lab H Sec/5. Fraud Detection System HMM.ipynb +0 -201
- noshot/data/ML TS XAI/ML/ML Lab H Sec/insurance.csv +0 -1339
- noshot/data/ML TS XAI/ML/ML Lab H Sec/iris1.data +0 -151
- noshot/data/ML TS XAI/ML/ML Lab H Sec/student-mat.csv +0 -396
- noshot/data/ML TS XAI/ML/ML Lab H Sec/student-por.csv +0 -650
- noshot/data/ML TS XAI/ML/Rolls Royce AllinOne.ipynb +0 -691
- noshot-11.0.0.dist-info/RECORD +0 -72
- {noshot-11.0.0.dist-info → noshot-13.0.0.dist-info}/WHEEL +0 -0
- {noshot-11.0.0.dist-info → noshot-13.0.0.dist-info}/licenses/LICENSE.txt +0 -0
- {noshot-11.0.0.dist-info → noshot-13.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,646 @@
|
|
1
|
+
{
|
2
|
+
"cells": [
|
3
|
+
{
|
4
|
+
"cell_type": "raw",
|
5
|
+
"id": "fbc03c64-ef16-4473-b581-fc27a0fc5e37",
|
6
|
+
"metadata": {},
|
7
|
+
"source": [
|
8
|
+
"1. Print the pre-activated values and post-activated values of a neural network\n",
|
9
|
+
"2. Check the correctness of backpropagation\n",
|
10
|
+
"3. Standardise the values\n",
|
11
|
+
"4. Normalise the values by keeping min as 10 and max as 50\n",
|
12
|
+
"5. Initialize the weights using normal and uniform xavier initialization\n",
|
13
|
+
"5. Apply the variants of ReLU for the preactivated values and print the results\n",
|
14
|
+
"6. Apply exponential decay and inverse time decay for adjusting the initial learning rate\n",
|
15
|
+
"7. Implement a feed-forward neural network with different optimizers (SGD, Adagrad, RMSProp, RMSProp with Nesterov momentum, AdaDelta and Adam) and compare the results\n",
|
16
|
+
"8. Clip the gradients using value & norm based clipping\n",
|
17
|
+
"9. Implement Hessian-Free Optimization (HFO) by computing the Hessian-vector product (HVP) using PyTorch’s autograd, Tensorflow's tf.GradientTape (nested) and solve the Newton system Hv=−∇f using the Conjugate Gradient (CG) method.\n",
|
18
|
+
"10. Compute the Hessian matrix of a scalar function and determine if a critical point is a saddle point, local minimum, or local maximum by analyzing the eigenvalues of the Hessian"
|
19
|
+
]
|
20
|
+
},
|
21
|
+
{
|
22
|
+
"cell_type": "code",
|
23
|
+
"execution_count": null,
|
24
|
+
"id": "489759c2",
|
25
|
+
"metadata": {},
|
26
|
+
"outputs": [],
|
27
|
+
"source": [
|
28
|
+
"import numpy as np\n",
|
29
|
+
"import torch\n",
|
30
|
+
"import torch.optim as optim\n",
|
31
|
+
"import tensorflow as tf\n",
|
32
|
+
"from sklearn.preprocessing import StandardScaler, MinMaxScaler\n",
|
33
|
+
"from sklearn.datasets import make_regression\n",
|
34
|
+
"import matplotlib.pyplot as plt"
|
35
|
+
]
|
36
|
+
},
|
37
|
+
{
|
38
|
+
"cell_type": "code",
|
39
|
+
"execution_count": null,
|
40
|
+
"id": "7e92ecaa",
|
41
|
+
"metadata": {},
|
42
|
+
"outputs": [],
|
43
|
+
"source": [
|
44
|
+
"np.random.seed(42)\n",
|
45
|
+
"torch.manual_seed(42)\n",
|
46
|
+
"tf.random.set_seed(42)\n",
|
47
|
+
"\n",
|
48
|
+
"X = np.random.randn(10, 3)\n",
|
49
|
+
"y = np.random.randn(10, 1)"
|
50
|
+
]
|
51
|
+
},
|
52
|
+
{
|
53
|
+
"cell_type": "raw",
|
54
|
+
"id": "d4c1f591-424f-44d0-bc84-c04f8839fc8f",
|
55
|
+
"metadata": {},
|
56
|
+
"source": [
|
57
|
+
"1. Print pre-activated and post-activated values"
|
58
|
+
]
|
59
|
+
},
|
60
|
+
{
|
61
|
+
"cell_type": "code",
|
62
|
+
"execution_count": null,
|
63
|
+
"id": "7388227e",
|
64
|
+
"metadata": {},
|
65
|
+
"outputs": [],
|
66
|
+
"source": [
|
67
|
+
"def print_activations(X, W, b, activation='relu'):\n",
|
68
|
+
" pre_activation = np.dot(X, W) + b\n",
|
69
|
+
" print(\"\\n1. Pre-activated values:\\n\", pre_activation)\n",
|
70
|
+
" \n",
|
71
|
+
" if activation == 'relu':\n",
|
72
|
+
" post_activation = np.maximum(0, pre_activation)\n",
|
73
|
+
" elif activation == 'sigmoid':\n",
|
74
|
+
" post_activation = 1 / (1 + np.exp(-pre_activation))\n",
|
75
|
+
" elif activation == 'tanh':\n",
|
76
|
+
" post_activation = np.tanh(pre_activation)\n",
|
77
|
+
" else:\n",
|
78
|
+
" raise ValueError(\"Unsupported activation function\")\n",
|
79
|
+
" \n",
|
80
|
+
" print(\"\\nPost-activated values:\\n\", post_activation)\n",
|
81
|
+
" return pre_activation, post_activation"
|
82
|
+
]
|
83
|
+
},
|
84
|
+
{
|
85
|
+
"cell_type": "code",
|
86
|
+
"execution_count": null,
|
87
|
+
"id": "6bcc03bf",
|
88
|
+
"metadata": {},
|
89
|
+
"outputs": [],
|
90
|
+
"source": [
|
91
|
+
"W = np.random.randn(3, 4)\n",
|
92
|
+
"b = np.random.randn(4)\n",
|
93
|
+
"pre_act, post_act = print_activations(X, W, b, activation='relu')"
|
94
|
+
]
|
95
|
+
},
|
96
|
+
{
|
97
|
+
"cell_type": "raw",
|
98
|
+
"id": "4cfb185b-a048-4806-879e-ad5af75b2299",
|
99
|
+
"metadata": {},
|
100
|
+
"source": [
|
101
|
+
"2. Check backpropagation correctness"
|
102
|
+
]
|
103
|
+
},
|
104
|
+
{
|
105
|
+
"cell_type": "code",
|
106
|
+
"execution_count": null,
|
107
|
+
"id": "ca501ee2",
|
108
|
+
"metadata": {},
|
109
|
+
"outputs": [],
|
110
|
+
"source": [
|
111
|
+
"def check_backprop():\n",
|
112
|
+
" print(\"\\n2. Checking backpropagation...\")\n",
|
113
|
+
" \n",
|
114
|
+
" model = torch.nn.Sequential(\n",
|
115
|
+
" torch.nn.Linear(2, 3),\n",
|
116
|
+
" torch.nn.Tanh(),\n",
|
117
|
+
" torch.nn.Linear(3, 1)\n",
|
118
|
+
" ).double()\n",
|
119
|
+
" \n",
|
120
|
+
" X = torch.randn(5, 2, dtype=torch.double, requires_grad=True)\n",
|
121
|
+
" y = torch.randn(5, 1, dtype=torch.double)\n",
|
122
|
+
" \n",
|
123
|
+
" test_input = torch.autograd.gradcheck(\n",
|
124
|
+
" lambda x: torch.nn.functional.mse_loss(model(x), y),\n",
|
125
|
+
" X,\n",
|
126
|
+
" eps=1e-6,\n",
|
127
|
+
" atol=1e-4,\n",
|
128
|
+
" rtol=1e-4,\n",
|
129
|
+
" raise_exception=False\n",
|
130
|
+
" )\n",
|
131
|
+
" print(\"Input gradient check passed:\", test_input)\n",
|
132
|
+
" \n",
|
133
|
+
" for name, param in model.named_parameters():\n",
|
134
|
+
" if param.requires_grad:\n",
|
135
|
+
" def func(input):\n",
|
136
|
+
" with torch.no_grad():\n",
|
137
|
+
" old_data = param.data.clone()\n",
|
138
|
+
" param.data.copy_(input)\n",
|
139
|
+
" output = model(X)\n",
|
140
|
+
" loss = torch.nn.functional.mse_loss(output, y)\n",
|
141
|
+
" with torch.no_grad():\n",
|
142
|
+
" param.data.copy_(old_data)\n",
|
143
|
+
" return loss\n",
|
144
|
+
" \n",
|
145
|
+
" test_param = torch.autograd.gradcheck(\n",
|
146
|
+
" func,\n",
|
147
|
+
" param.data.clone().requires_grad_(True),\n",
|
148
|
+
" eps=1e-6,\n",
|
149
|
+
" atol=1e-4,\n",
|
150
|
+
" rtol=1e-4,\n",
|
151
|
+
" raise_exception=False\n",
|
152
|
+
" )\n",
|
153
|
+
" print(f\"Parameter {name} gradient check passed:\", test_param)"
|
154
|
+
]
|
155
|
+
},
|
156
|
+
{
|
157
|
+
"cell_type": "code",
|
158
|
+
"execution_count": null,
|
159
|
+
"id": "b2f0082f",
|
160
|
+
"metadata": {},
|
161
|
+
"outputs": [],
|
162
|
+
"source": [
|
163
|
+
"check_backprop()"
|
164
|
+
]
|
165
|
+
},
|
166
|
+
{
|
167
|
+
"cell_type": "raw",
|
168
|
+
"id": "b5a4397a-fd32-4a34-a14d-8c777be04812",
|
169
|
+
"metadata": {},
|
170
|
+
"source": [
|
171
|
+
"3. Standardize values"
|
172
|
+
]
|
173
|
+
},
|
174
|
+
{
|
175
|
+
"cell_type": "code",
|
176
|
+
"execution_count": null,
|
177
|
+
"id": "75ec2b4b",
|
178
|
+
"metadata": {},
|
179
|
+
"outputs": [],
|
180
|
+
"source": [
|
181
|
+
"def standardize_data(X):\n",
|
182
|
+
" print(\"\\n3. Standardizing data...\")\n",
|
183
|
+
" scaler = StandardScaler()\n",
|
184
|
+
" X_std = scaler.fit_transform(X)\n",
|
185
|
+
" print(\"Mean after standardization:\", X_std.mean(axis=0))\n",
|
186
|
+
" print(\"Std after standardization:\", X_std.std(axis=0))\n",
|
187
|
+
" return X_std"
|
188
|
+
]
|
189
|
+
},
|
190
|
+
{
|
191
|
+
"cell_type": "code",
|
192
|
+
"execution_count": null,
|
193
|
+
"id": "af896e94",
|
194
|
+
"metadata": {},
|
195
|
+
"outputs": [],
|
196
|
+
"source": [
|
197
|
+
"X_std = standardize_data(X)"
|
198
|
+
]
|
199
|
+
},
|
200
|
+
{
|
201
|
+
"cell_type": "raw",
|
202
|
+
"id": "116879b1-151c-4bcd-8ace-cab5d5daaa2f",
|
203
|
+
"metadata": {},
|
204
|
+
"source": [
|
205
|
+
"4. Normalize values (min=10, max=50)"
|
206
|
+
]
|
207
|
+
},
|
208
|
+
{
|
209
|
+
"cell_type": "code",
|
210
|
+
"execution_count": null,
|
211
|
+
"id": "4d77159d",
|
212
|
+
"metadata": {},
|
213
|
+
"outputs": [],
|
214
|
+
"source": [
|
215
|
+
"def normalize_data(X):\n",
|
216
|
+
" print(\"\\n4. Normalizing data (min=10, max=50)...\")\n",
|
217
|
+
" scaler = MinMaxScaler(feature_range=(10, 50))\n",
|
218
|
+
" X_norm = scaler.fit_transform(X)\n",
|
219
|
+
" print(\"Min after normalization:\", X_norm.min(axis=0))\n",
|
220
|
+
" print(\"Max after normalization:\", X_norm.max(axis=0))\n",
|
221
|
+
" return X_norm"
|
222
|
+
]
|
223
|
+
},
|
224
|
+
{
|
225
|
+
"cell_type": "code",
|
226
|
+
"execution_count": null,
|
227
|
+
"id": "d0d4596e",
|
228
|
+
"metadata": {},
|
229
|
+
"outputs": [],
|
230
|
+
"source": [
|
231
|
+
"X_norm = normalize_data(X)"
|
232
|
+
]
|
233
|
+
},
|
234
|
+
{
|
235
|
+
"cell_type": "raw",
|
236
|
+
"id": "3e39dd60-9f56-48be-9d9f-2106a8209ea4",
|
237
|
+
"metadata": {},
|
238
|
+
"source": [
|
239
|
+
"5. Xavier initialization"
|
240
|
+
]
|
241
|
+
},
|
242
|
+
{
|
243
|
+
"cell_type": "code",
|
244
|
+
"execution_count": null,
|
245
|
+
"id": "2132e8d4",
|
246
|
+
"metadata": {},
|
247
|
+
"outputs": [],
|
248
|
+
"source": [
|
249
|
+
"def xavier_init(n_input, n_output):\n",
|
250
|
+
" print(\"\\n5. Xavier initialization...\")\n",
|
251
|
+
" std_normal = np.sqrt(2.0 / (n_input + n_output))\n",
|
252
|
+
" weights_normal = np.random.normal(0, std_normal, (n_input, n_output))\n",
|
253
|
+
" \n",
|
254
|
+
" limit = np.sqrt(6.0 / (n_input + n_output))\n",
|
255
|
+
" weights_uniform = np.random.uniform(-limit, limit, (n_input, n_output))\n",
|
256
|
+
" \n",
|
257
|
+
" print(\"Xavier Normal weights mean/std:\", weights_normal.mean(), weights_normal.std())\n",
|
258
|
+
" print(\"Xavier Uniform weights min/max:\", weights_uniform.min(), weights_uniform.max())\n",
|
259
|
+
" return weights_normal, weights_uniform"
|
260
|
+
]
|
261
|
+
},
|
262
|
+
{
|
263
|
+
"cell_type": "code",
|
264
|
+
"execution_count": null,
|
265
|
+
"id": "96b7c91b",
|
266
|
+
"metadata": {},
|
267
|
+
"outputs": [],
|
268
|
+
"source": [
|
269
|
+
"weights_normal, weights_uniform = xavier_init(3, 4)"
|
270
|
+
]
|
271
|
+
},
|
272
|
+
{
|
273
|
+
"cell_type": "raw",
|
274
|
+
"id": "2273476f-e51c-48a0-9426-cf8624b6e963",
|
275
|
+
"metadata": {},
|
276
|
+
"source": [
|
277
|
+
"5. ReLU variants"
|
278
|
+
]
|
279
|
+
},
|
280
|
+
{
|
281
|
+
"cell_type": "code",
|
282
|
+
"execution_count": null,
|
283
|
+
"id": "e4805aa6",
|
284
|
+
"metadata": {},
|
285
|
+
"outputs": [],
|
286
|
+
"source": [
|
287
|
+
"def apply_relu_variants(pre_activation):\n",
|
288
|
+
" print(\"\\n5. Applying ReLU variants...\")\n",
|
289
|
+
" relu = np.maximum(0, pre_activation)\n",
|
290
|
+
" \n",
|
291
|
+
" leaky = np.where(pre_activation > 0, pre_activation, pre_activation * 0.01)\n",
|
292
|
+
" \n",
|
293
|
+
" parametric = np.where(pre_activation > 0, pre_activation, pre_activation * 0.25)\n",
|
294
|
+
" \n",
|
295
|
+
" elu = np.where(pre_activation > 0, pre_activation, 1.0 * (np.exp(pre_activation) - 1))\n",
|
296
|
+
"\n",
|
297
|
+
" swish = pre_activation * (1 / (1 + np.exp(-pre_activation)))\n",
|
298
|
+
" \n",
|
299
|
+
" print(\"Original values:\\n\", pre_activation)\n",
|
300
|
+
" print(\"\\nReLU:\\n\", relu)\n",
|
301
|
+
" print(\"\\nLeaky ReLU:\\n\", leaky)\n",
|
302
|
+
" print(\"\\nParametric ReLU:\\n\", parametric)\n",
|
303
|
+
" print(\"\\nELU:\\n\", elu)\n",
|
304
|
+
" print(\"\\nSwish:\\n\", swish)\n",
|
305
|
+
" \n",
|
306
|
+
" return {'relu': relu, 'leaky': leaky, 'parametric': parametric, 'elu': elu, 'swish': swish}"
|
307
|
+
]
|
308
|
+
},
|
309
|
+
{
|
310
|
+
"cell_type": "code",
|
311
|
+
"execution_count": null,
|
312
|
+
"id": "d91f44d1",
|
313
|
+
"metadata": {},
|
314
|
+
"outputs": [],
|
315
|
+
"source": [
|
316
|
+
"relu_results = apply_relu_variants(pre_act)"
|
317
|
+
]
|
318
|
+
},
|
319
|
+
{
|
320
|
+
"cell_type": "raw",
|
321
|
+
"id": "6db6a2dc-1cf5-4f96-ba2b-e6d7ad219c38",
|
322
|
+
"metadata": {},
|
323
|
+
"source": [
|
324
|
+
"6. Learning rate decay"
|
325
|
+
]
|
326
|
+
},
|
327
|
+
{
|
328
|
+
"cell_type": "code",
|
329
|
+
"execution_count": null,
|
330
|
+
"id": "f6903dab",
|
331
|
+
"metadata": {},
|
332
|
+
"outputs": [],
|
333
|
+
"source": [
|
334
|
+
"def learning_rate_decay(initial_lr=0.1, decay_type='exponential', steps=100):\n",
|
335
|
+
" print(f\"\\n6. {decay_type} learning rate decay over {steps} steps:\")\n",
|
336
|
+
" \n",
|
337
|
+
" lr_values = []\n",
|
338
|
+
" for global_step in range(steps):\n",
|
339
|
+
" if decay_type == 'exponential':\n",
|
340
|
+
" decay_rate = 0.96\n",
|
341
|
+
" lr = initial_lr * decay_rate ** (global_step / (steps/10))\n",
|
342
|
+
" elif decay_type == 'inverse_time':\n",
|
343
|
+
" decay_rate = 1.0\n",
|
344
|
+
" lr = initial_lr / (1 + decay_rate * global_step / (steps/10))\n",
|
345
|
+
" else:\n",
|
346
|
+
" raise ValueError(\"Unsupported decay type\")\n",
|
347
|
+
" lr_values.append(lr)\n",
|
348
|
+
" \n",
|
349
|
+
" plt.figure(figsize=(10, 5))\n",
|
350
|
+
" plt.plot(lr_values)\n",
|
351
|
+
" plt.title(f\"{decay_type} Learning Rate Decay\")\n",
|
352
|
+
" plt.xlabel(\"Step\")\n",
|
353
|
+
" plt.ylabel(\"Learning Rate\")\n",
|
354
|
+
" plt.grid()\n",
|
355
|
+
" plt.show()\n",
|
356
|
+
" \n",
|
357
|
+
" return lr_values"
|
358
|
+
]
|
359
|
+
},
|
360
|
+
{
|
361
|
+
"cell_type": "code",
|
362
|
+
"execution_count": null,
|
363
|
+
"id": "759ef663",
|
364
|
+
"metadata": {},
|
365
|
+
"outputs": [],
|
366
|
+
"source": [
|
367
|
+
"lr_exp = learning_rate_decay(decay_type='exponential')\n",
|
368
|
+
"lr_inv = learning_rate_decay(decay_type='inverse_time')"
|
369
|
+
]
|
370
|
+
},
|
371
|
+
{
|
372
|
+
"cell_type": "raw",
|
373
|
+
"id": "865922ec-db01-4b34-b848-c1971f00ef62",
|
374
|
+
"metadata": {},
|
375
|
+
"source": [
|
376
|
+
"7. Neural network with different optimizers"
|
377
|
+
]
|
378
|
+
},
|
379
|
+
{
|
380
|
+
"cell_type": "code",
|
381
|
+
"execution_count": null,
|
382
|
+
"id": "75a65fab",
|
383
|
+
"metadata": {},
|
384
|
+
"outputs": [],
|
385
|
+
"source": [
|
386
|
+
"def compare_optimizers():\n",
|
387
|
+
" print(\"\\n7. Comparing optimizers...\")\n",
|
388
|
+
" \n",
|
389
|
+
" X, y = make_regression(n_samples=100, n_features=5, noise=0.1, random_state=42)\n",
|
390
|
+
" X = X.astype(np.float32)\n",
|
391
|
+
" y = y.astype(np.float32).reshape(-1, 1)\n",
|
392
|
+
"\n",
|
393
|
+
" X_tensor = torch.tensor(X, dtype=torch.float32)\n",
|
394
|
+
" y_tensor = torch.tensor(y, dtype=torch.float32)\n",
|
395
|
+
" \n",
|
396
|
+
" def create_model():\n",
|
397
|
+
" return torch.nn.Sequential(\n",
|
398
|
+
" torch.nn.Linear(5, 10),\n",
|
399
|
+
" torch.nn.ReLU(),\n",
|
400
|
+
" torch.nn.Linear(10, 1)\n",
|
401
|
+
" )\n",
|
402
|
+
"\n",
|
403
|
+
" optimizers = {\n",
|
404
|
+
" 'SGD': optim.SGD,\n",
|
405
|
+
" 'Adagrad': optim.Adagrad,\n",
|
406
|
+
" 'RMSprop': optim.RMSprop,\n",
|
407
|
+
" 'Adam': optim.Adam,\n",
|
408
|
+
" 'Adadelta': optim.Adadelta\n",
|
409
|
+
" }\n",
|
410
|
+
" \n",
|
411
|
+
" n_epochs = 200\n",
|
412
|
+
" results = {}\n",
|
413
|
+
" \n",
|
414
|
+
" plt.figure(figsize=(12, 6))\n",
|
415
|
+
" \n",
|
416
|
+
" for name, opt_class in optimizers.items():\n",
|
417
|
+
" model = create_model()\n",
|
418
|
+
" optimizer = opt_class(model.parameters(), lr=0.01)\n",
|
419
|
+
" \n",
|
420
|
+
" losses = []\n",
|
421
|
+
" for epoch in range(n_epochs):\n",
|
422
|
+
" optimizer.zero_grad()\n",
|
423
|
+
" y_pred = model(X_tensor)\n",
|
424
|
+
" loss = torch.nn.functional.mse_loss(y_pred, y_tensor)\n",
|
425
|
+
" loss.backward()\n",
|
426
|
+
" optimizer.step()\n",
|
427
|
+
" losses.append(loss.item())\n",
|
428
|
+
" \n",
|
429
|
+
" results[name] = losses\n",
|
430
|
+
" plt.plot(losses, label=name)\n",
|
431
|
+
" print(f\"{name}: Final loss = {losses[-1]:.4f}\")\n",
|
432
|
+
" \n",
|
433
|
+
" plt.title(\"Optimizer Comparison\")\n",
|
434
|
+
" plt.xlabel(\"Epoch\")\n",
|
435
|
+
" plt.ylabel(\"Loss\")\n",
|
436
|
+
" plt.legend()\n",
|
437
|
+
" plt.grid()\n",
|
438
|
+
" plt.show()\n",
|
439
|
+
" \n",
|
440
|
+
" return results"
|
441
|
+
]
|
442
|
+
},
|
443
|
+
{
|
444
|
+
"cell_type": "code",
|
445
|
+
"execution_count": null,
|
446
|
+
"id": "b09a56d5",
|
447
|
+
"metadata": {},
|
448
|
+
"outputs": [],
|
449
|
+
"source": [
|
450
|
+
"optimizer_results = compare_optimizers()"
|
451
|
+
]
|
452
|
+
},
|
453
|
+
{
|
454
|
+
"cell_type": "raw",
|
455
|
+
"id": "4bbeda82-1dc3-4964-868e-2cce413cd927",
|
456
|
+
"metadata": {},
|
457
|
+
"source": [
|
458
|
+
"8. Gradient clipping"
|
459
|
+
]
|
460
|
+
},
|
461
|
+
{
|
462
|
+
"cell_type": "code",
|
463
|
+
"execution_count": null,
|
464
|
+
"id": "7b281491",
|
465
|
+
"metadata": {},
|
466
|
+
"outputs": [],
|
467
|
+
"source": [
|
468
|
+
"def clip_gradients(gradients, method='value', threshold=1.0):\n",
|
469
|
+
" print(f\"\\n8. Gradient clipping ({method} with threshold={threshold})...\")\n",
|
470
|
+
" \n",
|
471
|
+
" if method == 'value':\n",
|
472
|
+
" clipped = [np.clip(g, -threshold, threshold) for g in gradients]\n",
|
473
|
+
" elif method == 'norm':\n",
|
474
|
+
" global_norm = np.sqrt(sum(np.sum(g**2) for g in gradients))\n",
|
475
|
+
" scale = threshold / max(global_norm, threshold)\n",
|
476
|
+
" clipped = [g * scale for g in gradients]\n",
|
477
|
+
" else:\n",
|
478
|
+
" raise ValueError(\"Unsupported clipping method\")\n",
|
479
|
+
" \n",
|
480
|
+
" print(\"Original gradient norms:\", [np.linalg.norm(g) for g in gradients])\n",
|
481
|
+
" print(\"Clipped gradient norms:\", [np.linalg.norm(g) for g in clipped])\n",
|
482
|
+
" \n",
|
483
|
+
" return clipped"
|
484
|
+
]
|
485
|
+
},
|
486
|
+
{
|
487
|
+
"cell_type": "code",
|
488
|
+
"execution_count": null,
|
489
|
+
"id": "c09dd387",
|
490
|
+
"metadata": {},
|
491
|
+
"outputs": [],
|
492
|
+
"source": [
|
493
|
+
"gradients = [np.random.randn(3,4), np.random.randn(4,1)]\n",
|
494
|
+
"clipped_value = clip_gradients(gradients, method='value', threshold=1.0)\n",
|
495
|
+
"clipped_norm = clip_gradients(gradients, method='norm', threshold=1.0)"
|
496
|
+
]
|
497
|
+
},
|
498
|
+
{
|
499
|
+
"cell_type": "raw",
|
500
|
+
"id": "6680e526-6710-481c-bccf-a0ab4b643eac",
|
501
|
+
"metadata": {},
|
502
|
+
"source": [
|
503
|
+
"9. Hessian-Free Optimization"
|
504
|
+
]
|
505
|
+
},
|
506
|
+
{
|
507
|
+
"cell_type": "code",
|
508
|
+
"execution_count": null,
|
509
|
+
"id": "500ee068",
|
510
|
+
"metadata": {},
|
511
|
+
"outputs": [],
|
512
|
+
"source": [
|
513
|
+
"def hessian_free_optimization():\n",
|
514
|
+
" print(\"\\n9. Hessian-Free Optimization...\")\n",
|
515
|
+
" \n",
|
516
|
+
" print(\"PyTorch implementation:\")\n",
|
517
|
+
" x = torch.randn(3, requires_grad=True, dtype=torch.double)\n",
|
518
|
+
" \n",
|
519
|
+
" def f(x):\n",
|
520
|
+
" return x @ torch.diag(torch.tensor([1.0, 2.0, 3.0], dtype=torch.double)) @ x\n",
|
521
|
+
" \n",
|
522
|
+
" grad = torch.autograd.grad(f(x), x, create_graph=True)[0]\n",
|
523
|
+
" \n",
|
524
|
+
" def hvp(v):\n",
|
525
|
+
" return torch.autograd.grad(grad @ v, x, retain_graph=True)[0]\n",
|
526
|
+
" \n",
|
527
|
+
" def cg_solve(A, b, max_iter=10, tol=1e-6):\n",
|
528
|
+
" x = torch.zeros_like(b)\n",
|
529
|
+
" r = b - A(x)\n",
|
530
|
+
" p = r.clone()\n",
|
531
|
+
" rsold = r @ r\n",
|
532
|
+
" \n",
|
533
|
+
" for i in range(max_iter):\n",
|
534
|
+
" Ap = A(p)\n",
|
535
|
+
" alpha = rsold / (p @ Ap)\n",
|
536
|
+
" x = x + alpha * p\n",
|
537
|
+
" r = r - alpha * Ap\n",
|
538
|
+
" rsnew = r @ r\n",
|
539
|
+
" if torch.sqrt(rsnew) < tol:\n",
|
540
|
+
" break\n",
|
541
|
+
" p = r + (rsnew / rsold) * p\n",
|
542
|
+
" rsold = rsnew\n",
|
543
|
+
" \n",
|
544
|
+
" return x\n",
|
545
|
+
" \n",
|
546
|
+
" v = cg_solve(hvp, -grad)\n",
|
547
|
+
" print(\"Solution v:\", v.detach().numpy())"
|
548
|
+
]
|
549
|
+
},
|
550
|
+
{
|
551
|
+
"cell_type": "code",
|
552
|
+
"execution_count": null,
|
553
|
+
"id": "5401b781",
|
554
|
+
"metadata": {},
|
555
|
+
"outputs": [],
|
556
|
+
"source": [
|
557
|
+
"hessian_free_optimization()"
|
558
|
+
]
|
559
|
+
},
|
560
|
+
{
|
561
|
+
"cell_type": "raw",
|
562
|
+
"id": "fe14e90e-d4a2-4d37-a27c-94b632a8c5a1",
|
563
|
+
"metadata": {},
|
564
|
+
"source": [
|
565
|
+
"10. Hessian matrix analysis"
|
566
|
+
]
|
567
|
+
},
|
568
|
+
{
|
569
|
+
"cell_type": "code",
|
570
|
+
"execution_count": null,
|
571
|
+
"id": "cbc5032d",
|
572
|
+
"metadata": {},
|
573
|
+
"outputs": [],
|
574
|
+
"source": [
|
575
|
+
"def hessian_analysis():\n",
|
576
|
+
" print(\"\\n10. Hessian matrix analysis...\")\n",
|
577
|
+
" \n",
|
578
|
+
" def f(x):\n",
|
579
|
+
" return x[0]**2 + x[1]**3 - x[1]**2\n",
|
580
|
+
" \n",
|
581
|
+
" critical_point = np.array([0, 2/3])\n",
|
582
|
+
" \n",
|
583
|
+
" eps = 1e-5\n",
|
584
|
+
" hessian = np.zeros((2, 2))\n",
|
585
|
+
" \n",
|
586
|
+
" for i in range(2):\n",
|
587
|
+
" for j in range(2):\n",
|
588
|
+
" def partial_derivative(x):\n",
|
589
|
+
" x_plus = x.copy()\n",
|
590
|
+
" x_plus[j] += eps\n",
|
591
|
+
" x_minus = x.copy()\n",
|
592
|
+
" x_minus[j] -= eps\n",
|
593
|
+
" return (f(x_plus) - f(x_minus)) / (2 * eps)\n",
|
594
|
+
" \n",
|
595
|
+
" x_plus = critical_point.copy()\n",
|
596
|
+
" x_plus[i] += eps\n",
|
597
|
+
" x_minus = critical_point.copy()\n",
|
598
|
+
" x_minus[i] -= eps\n",
|
599
|
+
" hessian[i,j] = (partial_derivative(x_plus) - partial_derivative(x_minus)) / (2 * eps)\n",
|
600
|
+
" \n",
|
601
|
+
" print(\"Hessian matrix:\\n\", hessian)\n",
|
602
|
+
" \n",
|
603
|
+
" eigenvalues = np.linalg.eigvals(hessian)\n",
|
604
|
+
" print(\"Eigenvalues:\", eigenvalues)\n",
|
605
|
+
" \n",
|
606
|
+
" if all(eig > 0 for eig in eigenvalues):\n",
|
607
|
+
" print(\"Conclusion: Local minimum\")\n",
|
608
|
+
" elif all(eig < 0 for eig in eigenvalues):\n",
|
609
|
+
" print(\"Conclusion: Local maximum\")\n",
|
610
|
+
" else:\n",
|
611
|
+
" print(\"Conclusion: Saddle point\")"
|
612
|
+
]
|
613
|
+
},
|
614
|
+
{
|
615
|
+
"cell_type": "code",
|
616
|
+
"execution_count": null,
|
617
|
+
"id": "8a2503c5",
|
618
|
+
"metadata": {},
|
619
|
+
"outputs": [],
|
620
|
+
"source": [
|
621
|
+
"hessian_analysis()"
|
622
|
+
]
|
623
|
+
}
|
624
|
+
],
|
625
|
+
"metadata": {
|
626
|
+
"kernelspec": {
|
627
|
+
"display_name": "Python 3 (ipykernel)",
|
628
|
+
"language": "python",
|
629
|
+
"name": "python3"
|
630
|
+
},
|
631
|
+
"language_info": {
|
632
|
+
"codemirror_mode": {
|
633
|
+
"name": "ipython",
|
634
|
+
"version": 3
|
635
|
+
},
|
636
|
+
"file_extension": ".py",
|
637
|
+
"mimetype": "text/x-python",
|
638
|
+
"name": "python",
|
639
|
+
"nbconvert_exporter": "python",
|
640
|
+
"pygments_lexer": "ipython3",
|
641
|
+
"version": "3.12.4"
|
642
|
+
}
|
643
|
+
},
|
644
|
+
"nbformat": 4,
|
645
|
+
"nbformat_minor": 5
|
646
|
+
}
|