noshot 11.0.0__py3-none-any.whl → 13.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. noshot/data/DLE FSD BDA/DLE/DLE 1 (Json)/1. DNN (Image Classification).ipynb +389 -0
  2. noshot/data/DLE FSD BDA/DLE/DLE 1 (Json)/2. DNN vs CNN.ipynb +516 -0
  3. noshot/data/DLE FSD BDA/DLE/DLE 1 (Json)/3. CNN (Object Detecrion).ipynb +259 -0
  4. noshot/data/DLE FSD BDA/DLE/DLE 1 (Json)/4. FCN (Image Segmentaion).ipynb +274 -0
  5. noshot/data/DLE FSD BDA/DLE/DLE 2 (tim stan s)/1.1 DNN (Pytorch).ipynb +164 -0
  6. noshot/data/DLE FSD BDA/DLE/DLE 2 (tim stan s)/1.2 DNN (Tensorflow).ipynb +94 -0
  7. noshot/data/DLE FSD BDA/DLE/DLE 2 (tim stan s)/1.3 DNN (Image Classification).ipynb +134 -0
  8. noshot/data/DLE FSD BDA/DLE/DLE 2 (tim stan s)/2.1 DNN vs CNN.ipynb +127 -0
  9. noshot/data/DLE FSD BDA/DLE/DLE 2 (tim stan s)/2.2 DNN vs CNN.ipynb +123 -0
  10. noshot/data/DLE FSD BDA/DLE/DLE 2 (tim stan s)/4. FCNN (Image Segmentation).ipynb +108 -0
  11. noshot/data/DLE FSD BDA/DLE/DLE 2 (tim stan s)/Lab Excercise (Training DNN).ipynb +646 -0
  12. noshot/data/DLE FSD BDA/DLE/DLE 2 (tim stan s)/Load-Images.ipynb +553 -0
  13. noshot/data/DLE FSD BDA/DLE/DLE 3 (sonic boy)/Ex1.ipynb +216 -0
  14. noshot/data/DLE FSD BDA/DLE/DLE 3 (sonic boy)/Ex2.ipynb +195 -0
  15. noshot/data/DLE FSD BDA/DLE/DLE 3 (sonic boy)/Ex3.ipynb +427 -0
  16. noshot/data/DLE FSD BDA/DLE/DLE 3 (sonic boy)/Ex4.ipynb +186 -0
  17. noshot/data/DLE FSD BDA/DLE/DLE 4 (senior)/Exp01/DNN Ex No 1.ipynb +398 -0
  18. noshot/data/DLE FSD BDA/DLE/DLE 4 (senior)/Exp01/Ex No 1 Build in dataset.ipynb +171 -0
  19. noshot/data/DLE FSD BDA/DLE/DLE 4 (senior)/Exp01/Exp1-Short-DL_ANN_ImageClassification.ipynb +401 -0
  20. noshot/data/DLE FSD BDA/DLE/DLE 4 (senior)/Exp01/OR GATE .ipynb +8511 -0
  21. noshot/data/DLE FSD BDA/DLE/DLE 4 (senior)/Exp02/Exp2-Short-DL_CNN_ImageClassification.ipynb +737 -0
  22. noshot/data/DLE FSD BDA/DLE/DLE 4 (senior)/Exp03/DL-Ex3-RNN.ipynb +591 -0
  23. noshot/data/DLE FSD BDA/DLE/DLE 4 (senior)/Exp04/Ex no 4.ipynb +551 -0
  24. noshot/main.py +3 -3
  25. {noshot-11.0.0.dist-info → noshot-13.0.0.dist-info}/METADATA +1 -1
  26. noshot-13.0.0.dist-info/RECORD +32 -0
  27. noshot/data/ML TS XAI/ML/CNN(Image_for_Folders_5).ipynb +0 -201
  28. noshot/data/ML TS XAI/ML/CNN(Image_form_Folder_2).ipynb +0 -201
  29. noshot/data/ML TS XAI/ML/Json Codes/ML LAB CIA 2.ipynb +0 -409
  30. noshot/data/ML TS XAI/ML/ML 1/1. EDA-PCA (Balance Scale Dataset).ipynb +0 -147
  31. noshot/data/ML TS XAI/ML/ML 1/1. EDA-PCA (Rice Dataset).ipynb +0 -181
  32. noshot/data/ML TS XAI/ML/ML 1/10. HMM Veterbi.ipynb +0 -152
  33. noshot/data/ML TS XAI/ML/ML 1/2. KNN (Balance Scale Dataset).ipynb +0 -117
  34. noshot/data/ML TS XAI/ML/ML 1/2. KNN (Iris Dataset).ipynb +0 -156
  35. noshot/data/ML TS XAI/ML/ML 1/2. KNN (Sobar-72 Dataset).ipynb +0 -215
  36. noshot/data/ML TS XAI/ML/ML 1/3. LDA (Balance Scale Dataset).ipynb +0 -78
  37. noshot/data/ML TS XAI/ML/ML 1/3. LDA (NPHA Doctor Visits Dataset).ipynb +0 -114
  38. noshot/data/ML TS XAI/ML/ML 1/4. Linear Regression (Machine Dataset).ipynb +0 -115
  39. noshot/data/ML TS XAI/ML/ML 1/4. Linear Regression (Real Estate Dataset).ipynb +0 -146
  40. noshot/data/ML TS XAI/ML/ML 1/5. Logistic Regression (Magic04 Dataset).ipynb +0 -130
  41. noshot/data/ML TS XAI/ML/ML 1/5. Logistic Regression (Wine Dataset).ipynb +0 -112
  42. noshot/data/ML TS XAI/ML/ML 1/6. Naive Bayes Classifier (Agaricus Lepiota Dataset).ipynb +0 -118
  43. noshot/data/ML TS XAI/ML/ML 1/6. Naive Bayes Classifier (Wine Dataset).ipynb +0 -89
  44. noshot/data/ML TS XAI/ML/ML 1/7. SVM (Rice Dataset).ipynb +0 -120
  45. noshot/data/ML TS XAI/ML/ML 1/8. FeedForward NN (Sobar72 Dataset).ipynb +0 -262
  46. noshot/data/ML TS XAI/ML/ML 1/9. CNN (Cifar10 Dataset).ipynb +0 -156
  47. noshot/data/ML TS XAI/ML/ML 2/1. PCA.ipynb +0 -162
  48. noshot/data/ML TS XAI/ML/ML 2/10. CNN.ipynb +0 -100
  49. noshot/data/ML TS XAI/ML/ML 2/11. HMM.ipynb +0 -336
  50. noshot/data/ML TS XAI/ML/ML 2/2. KNN.ipynb +0 -149
  51. noshot/data/ML TS XAI/ML/ML 2/3. LDA.ipynb +0 -132
  52. noshot/data/ML TS XAI/ML/ML 2/4. Linear Regression.ipynb +0 -86
  53. noshot/data/ML TS XAI/ML/ML 2/5. Logistic Regression.ipynb +0 -115
  54. noshot/data/ML TS XAI/ML/ML 2/6. Naive Bayes (Titanic).ipynb +0 -196
  55. noshot/data/ML TS XAI/ML/ML 2/6. Naive Bayes (Wine).ipynb +0 -98
  56. noshot/data/ML TS XAI/ML/ML 2/7. SVM Linear.ipynb +0 -109
  57. noshot/data/ML TS XAI/ML/ML 2/8. SVM Non-Linear.ipynb +0 -195
  58. noshot/data/ML TS XAI/ML/ML 2/9. FNN With Regularization.ipynb +0 -189
  59. noshot/data/ML TS XAI/ML/ML 2/9. FNN Without Regularization.ipynb +0 -197
  60. noshot/data/ML TS XAI/ML/ML 2/All in One Lab CIA 1 Q.ipynb +0 -1087
  61. noshot/data/ML TS XAI/ML/ML 3 (Latest)/1. PCA EDA.ipynb +0 -274
  62. noshot/data/ML TS XAI/ML/ML 3 (Latest)/10. CNN.ipynb +0 -170
  63. noshot/data/ML TS XAI/ML/ML 3 (Latest)/11. HMM 2.ipynb +0 -1087
  64. noshot/data/ML TS XAI/ML/ML 3 (Latest)/11. HMM 3.ipynb +0 -178
  65. noshot/data/ML TS XAI/ML/ML 3 (Latest)/11. HMM 4.ipynb +0 -185
  66. noshot/data/ML TS XAI/ML/ML 3 (Latest)/11. HMM.ipynb +0 -106
  67. noshot/data/ML TS XAI/ML/ML 3 (Latest)/2. KNN.ipynb +0 -177
  68. noshot/data/ML TS XAI/ML/ML 3 (Latest)/3. LDA.ipynb +0 -195
  69. noshot/data/ML TS XAI/ML/ML 3 (Latest)/4. Linear Regression.ipynb +0 -267
  70. noshot/data/ML TS XAI/ML/ML 3 (Latest)/5. Logistic Regression.ipynb +0 -104
  71. noshot/data/ML TS XAI/ML/ML 3 (Latest)/6. Bayesian Classifier.ipynb +0 -109
  72. noshot/data/ML TS XAI/ML/ML 3 (Latest)/7. SVM.ipynb +0 -220
  73. noshot/data/ML TS XAI/ML/ML 3 (Latest)/8. MLP.ipynb +0 -99
  74. noshot/data/ML TS XAI/ML/ML 3 (Latest)/9. Ridge - Lasso.ipynb +0 -211
  75. noshot/data/ML TS XAI/ML/ML 3 (Latest)/9. Ridge Lasso 2.ipynb +0 -99
  76. noshot/data/ML TS XAI/ML/ML 3 (Latest)/Image Load Example.ipynb +0 -118
  77. noshot/data/ML TS XAI/ML/ML 3 (Latest)/Updated_Untitled.ipynb +0 -603
  78. noshot/data/ML TS XAI/ML/ML Lab AllinOne.ipynb +0 -961
  79. noshot/data/ML TS XAI/ML/ML Lab H Sec/1. Iris Dataset (Softmax vs Sigmoid).ipynb +0 -231
  80. noshot/data/ML TS XAI/ML/ML Lab H Sec/2. Student Dataset (Overfit vs Regularized).ipynb +0 -269
  81. noshot/data/ML TS XAI/ML/ML Lab H Sec/3. Insurance Target Categorical (Overfit vs Regularized).ipynb +0 -274
  82. noshot/data/ML TS XAI/ML/ML Lab H Sec/3. Insurance Target Numerical (Overfit vs Regularized).ipynb +0 -263
  83. noshot/data/ML TS XAI/ML/ML Lab H Sec/4. Smart House System HMM.ipynb +0 -198
  84. noshot/data/ML TS XAI/ML/ML Lab H Sec/5. Fraud Detection System HMM.ipynb +0 -201
  85. noshot/data/ML TS XAI/ML/ML Lab H Sec/insurance.csv +0 -1339
  86. noshot/data/ML TS XAI/ML/ML Lab H Sec/iris1.data +0 -151
  87. noshot/data/ML TS XAI/ML/ML Lab H Sec/student-mat.csv +0 -396
  88. noshot/data/ML TS XAI/ML/ML Lab H Sec/student-por.csv +0 -650
  89. noshot/data/ML TS XAI/ML/Rolls Royce AllinOne.ipynb +0 -691
  90. noshot-11.0.0.dist-info/RECORD +0 -72
  91. {noshot-11.0.0.dist-info → noshot-13.0.0.dist-info}/WHEEL +0 -0
  92. {noshot-11.0.0.dist-info → noshot-13.0.0.dist-info}/licenses/LICENSE.txt +0 -0
  93. {noshot-11.0.0.dist-info → noshot-13.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,646 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "raw",
5
+ "id": "fbc03c64-ef16-4473-b581-fc27a0fc5e37",
6
+ "metadata": {},
7
+ "source": [
8
+ "1. Print the pre-activated values and post-activated values of a neural network\n",
9
+ "2. Check the correctness of backpropagation\n",
10
+ "3. Standardise the values\n",
11
+ "4. Normalise the values by keeping min as 10 and max as 50\n",
12
+ "5. Initialize the weights using normal and uniform xavier initialization\n",
13
+ "5. Apply the variants of ReLU for the preactivated values and print the results\n",
14
+ "6. Apply exponential decay and inverse time decay for adjusting the initial learning rate\n",
15
+ "7. Implement a feed-forward neural network with different optimizers (SGD, Adagrad, RMSProp, RMSProp with Nesterov momentum, AdaDelta and Adam) and compare the results\n",
16
+ "8. Clip the gradients using value & norm based clipping\n",
17
+ "9. Implement Hessian-Free Optimization (HFO) by computing the Hessian-vector product (HVP) using PyTorch’s autograd, Tensorflow's tf.GradientTape (nested) and solve the Newton system Hv=−∇f using the Conjugate Gradient (CG) method.\n",
18
+ "10. Compute the Hessian matrix of a scalar function and determine if a critical point is a saddle point, local minimum, or local maximum by analyzing the eigenvalues of the Hessian"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": null,
24
+ "id": "489759c2",
25
+ "metadata": {},
26
+ "outputs": [],
27
+ "source": [
28
+ "import numpy as np\n",
29
+ "import torch\n",
30
+ "import torch.optim as optim\n",
31
+ "import tensorflow as tf\n",
32
+ "from sklearn.preprocessing import StandardScaler, MinMaxScaler\n",
33
+ "from sklearn.datasets import make_regression\n",
34
+ "import matplotlib.pyplot as plt"
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "code",
39
+ "execution_count": null,
40
+ "id": "7e92ecaa",
41
+ "metadata": {},
42
+ "outputs": [],
43
+ "source": [
44
+ "np.random.seed(42)\n",
45
+ "torch.manual_seed(42)\n",
46
+ "tf.random.set_seed(42)\n",
47
+ "\n",
48
+ "X = np.random.randn(10, 3)\n",
49
+ "y = np.random.randn(10, 1)"
50
+ ]
51
+ },
52
+ {
53
+ "cell_type": "raw",
54
+ "id": "d4c1f591-424f-44d0-bc84-c04f8839fc8f",
55
+ "metadata": {},
56
+ "source": [
57
+ "1. Print pre-activated and post-activated values"
58
+ ]
59
+ },
60
+ {
61
+ "cell_type": "code",
62
+ "execution_count": null,
63
+ "id": "7388227e",
64
+ "metadata": {},
65
+ "outputs": [],
66
+ "source": [
67
+ "def print_activations(X, W, b, activation='relu'):\n",
68
+ " pre_activation = np.dot(X, W) + b\n",
69
+ " print(\"\\n1. Pre-activated values:\\n\", pre_activation)\n",
70
+ " \n",
71
+ " if activation == 'relu':\n",
72
+ " post_activation = np.maximum(0, pre_activation)\n",
73
+ " elif activation == 'sigmoid':\n",
74
+ " post_activation = 1 / (1 + np.exp(-pre_activation))\n",
75
+ " elif activation == 'tanh':\n",
76
+ " post_activation = np.tanh(pre_activation)\n",
77
+ " else:\n",
78
+ " raise ValueError(\"Unsupported activation function\")\n",
79
+ " \n",
80
+ " print(\"\\nPost-activated values:\\n\", post_activation)\n",
81
+ " return pre_activation, post_activation"
82
+ ]
83
+ },
84
+ {
85
+ "cell_type": "code",
86
+ "execution_count": null,
87
+ "id": "6bcc03bf",
88
+ "metadata": {},
89
+ "outputs": [],
90
+ "source": [
91
+ "W = np.random.randn(3, 4)\n",
92
+ "b = np.random.randn(4)\n",
93
+ "pre_act, post_act = print_activations(X, W, b, activation='relu')"
94
+ ]
95
+ },
96
+ {
97
+ "cell_type": "raw",
98
+ "id": "4cfb185b-a048-4806-879e-ad5af75b2299",
99
+ "metadata": {},
100
+ "source": [
101
+ "2. Check backpropagation correctness"
102
+ ]
103
+ },
104
+ {
105
+ "cell_type": "code",
106
+ "execution_count": null,
107
+ "id": "ca501ee2",
108
+ "metadata": {},
109
+ "outputs": [],
110
+ "source": [
111
+ "def check_backprop():\n",
112
+ " print(\"\\n2. Checking backpropagation...\")\n",
113
+ " \n",
114
+ " model = torch.nn.Sequential(\n",
115
+ " torch.nn.Linear(2, 3),\n",
116
+ " torch.nn.Tanh(),\n",
117
+ " torch.nn.Linear(3, 1)\n",
118
+ " ).double()\n",
119
+ " \n",
120
+ " X = torch.randn(5, 2, dtype=torch.double, requires_grad=True)\n",
121
+ " y = torch.randn(5, 1, dtype=torch.double)\n",
122
+ " \n",
123
+ " test_input = torch.autograd.gradcheck(\n",
124
+ " lambda x: torch.nn.functional.mse_loss(model(x), y),\n",
125
+ " X,\n",
126
+ " eps=1e-6,\n",
127
+ " atol=1e-4,\n",
128
+ " rtol=1e-4,\n",
129
+ " raise_exception=False\n",
130
+ " )\n",
131
+ " print(\"Input gradient check passed:\", test_input)\n",
132
+ " \n",
133
+ " for name, param in model.named_parameters():\n",
134
+ " if param.requires_grad:\n",
135
+ " def func(input):\n",
136
+ " with torch.no_grad():\n",
137
+ " old_data = param.data.clone()\n",
138
+ " param.data.copy_(input)\n",
139
+ " output = model(X)\n",
140
+ " loss = torch.nn.functional.mse_loss(output, y)\n",
141
+ " with torch.no_grad():\n",
142
+ " param.data.copy_(old_data)\n",
143
+ " return loss\n",
144
+ " \n",
145
+ " test_param = torch.autograd.gradcheck(\n",
146
+ " func,\n",
147
+ " param.data.clone().requires_grad_(True),\n",
148
+ " eps=1e-6,\n",
149
+ " atol=1e-4,\n",
150
+ " rtol=1e-4,\n",
151
+ " raise_exception=False\n",
152
+ " )\n",
153
+ " print(f\"Parameter {name} gradient check passed:\", test_param)"
154
+ ]
155
+ },
156
+ {
157
+ "cell_type": "code",
158
+ "execution_count": null,
159
+ "id": "b2f0082f",
160
+ "metadata": {},
161
+ "outputs": [],
162
+ "source": [
163
+ "check_backprop()"
164
+ ]
165
+ },
166
+ {
167
+ "cell_type": "raw",
168
+ "id": "b5a4397a-fd32-4a34-a14d-8c777be04812",
169
+ "metadata": {},
170
+ "source": [
171
+ "3. Standardize values"
172
+ ]
173
+ },
174
+ {
175
+ "cell_type": "code",
176
+ "execution_count": null,
177
+ "id": "75ec2b4b",
178
+ "metadata": {},
179
+ "outputs": [],
180
+ "source": [
181
+ "def standardize_data(X):\n",
182
+ " print(\"\\n3. Standardizing data...\")\n",
183
+ " scaler = StandardScaler()\n",
184
+ " X_std = scaler.fit_transform(X)\n",
185
+ " print(\"Mean after standardization:\", X_std.mean(axis=0))\n",
186
+ " print(\"Std after standardization:\", X_std.std(axis=0))\n",
187
+ " return X_std"
188
+ ]
189
+ },
190
+ {
191
+ "cell_type": "code",
192
+ "execution_count": null,
193
+ "id": "af896e94",
194
+ "metadata": {},
195
+ "outputs": [],
196
+ "source": [
197
+ "X_std = standardize_data(X)"
198
+ ]
199
+ },
200
+ {
201
+ "cell_type": "raw",
202
+ "id": "116879b1-151c-4bcd-8ace-cab5d5daaa2f",
203
+ "metadata": {},
204
+ "source": [
205
+ "4. Normalize values (min=10, max=50)"
206
+ ]
207
+ },
208
+ {
209
+ "cell_type": "code",
210
+ "execution_count": null,
211
+ "id": "4d77159d",
212
+ "metadata": {},
213
+ "outputs": [],
214
+ "source": [
215
+ "def normalize_data(X):\n",
216
+ " print(\"\\n4. Normalizing data (min=10, max=50)...\")\n",
217
+ " scaler = MinMaxScaler(feature_range=(10, 50))\n",
218
+ " X_norm = scaler.fit_transform(X)\n",
219
+ " print(\"Min after normalization:\", X_norm.min(axis=0))\n",
220
+ " print(\"Max after normalization:\", X_norm.max(axis=0))\n",
221
+ " return X_norm"
222
+ ]
223
+ },
224
+ {
225
+ "cell_type": "code",
226
+ "execution_count": null,
227
+ "id": "d0d4596e",
228
+ "metadata": {},
229
+ "outputs": [],
230
+ "source": [
231
+ "X_norm = normalize_data(X)"
232
+ ]
233
+ },
234
+ {
235
+ "cell_type": "raw",
236
+ "id": "3e39dd60-9f56-48be-9d9f-2106a8209ea4",
237
+ "metadata": {},
238
+ "source": [
239
+ "5. Xavier initialization"
240
+ ]
241
+ },
242
+ {
243
+ "cell_type": "code",
244
+ "execution_count": null,
245
+ "id": "2132e8d4",
246
+ "metadata": {},
247
+ "outputs": [],
248
+ "source": [
249
+ "def xavier_init(n_input, n_output):\n",
250
+ " print(\"\\n5. Xavier initialization...\")\n",
251
+ " std_normal = np.sqrt(2.0 / (n_input + n_output))\n",
252
+ " weights_normal = np.random.normal(0, std_normal, (n_input, n_output))\n",
253
+ " \n",
254
+ " limit = np.sqrt(6.0 / (n_input + n_output))\n",
255
+ " weights_uniform = np.random.uniform(-limit, limit, (n_input, n_output))\n",
256
+ " \n",
257
+ " print(\"Xavier Normal weights mean/std:\", weights_normal.mean(), weights_normal.std())\n",
258
+ " print(\"Xavier Uniform weights min/max:\", weights_uniform.min(), weights_uniform.max())\n",
259
+ " return weights_normal, weights_uniform"
260
+ ]
261
+ },
262
+ {
263
+ "cell_type": "code",
264
+ "execution_count": null,
265
+ "id": "96b7c91b",
266
+ "metadata": {},
267
+ "outputs": [],
268
+ "source": [
269
+ "weights_normal, weights_uniform = xavier_init(3, 4)"
270
+ ]
271
+ },
272
+ {
273
+ "cell_type": "raw",
274
+ "id": "2273476f-e51c-48a0-9426-cf8624b6e963",
275
+ "metadata": {},
276
+ "source": [
277
+ "5. ReLU variants"
278
+ ]
279
+ },
280
+ {
281
+ "cell_type": "code",
282
+ "execution_count": null,
283
+ "id": "e4805aa6",
284
+ "metadata": {},
285
+ "outputs": [],
286
+ "source": [
287
+ "def apply_relu_variants(pre_activation):\n",
288
+ " print(\"\\n5. Applying ReLU variants...\")\n",
289
+ " relu = np.maximum(0, pre_activation)\n",
290
+ " \n",
291
+ " leaky = np.where(pre_activation > 0, pre_activation, pre_activation * 0.01)\n",
292
+ " \n",
293
+ " parametric = np.where(pre_activation > 0, pre_activation, pre_activation * 0.25)\n",
294
+ " \n",
295
+ " elu = np.where(pre_activation > 0, pre_activation, 1.0 * (np.exp(pre_activation) - 1))\n",
296
+ "\n",
297
+ " swish = pre_activation * (1 / (1 + np.exp(-pre_activation)))\n",
298
+ " \n",
299
+ " print(\"Original values:\\n\", pre_activation)\n",
300
+ " print(\"\\nReLU:\\n\", relu)\n",
301
+ " print(\"\\nLeaky ReLU:\\n\", leaky)\n",
302
+ " print(\"\\nParametric ReLU:\\n\", parametric)\n",
303
+ " print(\"\\nELU:\\n\", elu)\n",
304
+ " print(\"\\nSwish:\\n\", swish)\n",
305
+ " \n",
306
+ " return {'relu': relu, 'leaky': leaky, 'parametric': parametric, 'elu': elu, 'swish': swish}"
307
+ ]
308
+ },
309
+ {
310
+ "cell_type": "code",
311
+ "execution_count": null,
312
+ "id": "d91f44d1",
313
+ "metadata": {},
314
+ "outputs": [],
315
+ "source": [
316
+ "relu_results = apply_relu_variants(pre_act)"
317
+ ]
318
+ },
319
+ {
320
+ "cell_type": "raw",
321
+ "id": "6db6a2dc-1cf5-4f96-ba2b-e6d7ad219c38",
322
+ "metadata": {},
323
+ "source": [
324
+ "6. Learning rate decay"
325
+ ]
326
+ },
327
+ {
328
+ "cell_type": "code",
329
+ "execution_count": null,
330
+ "id": "f6903dab",
331
+ "metadata": {},
332
+ "outputs": [],
333
+ "source": [
334
+ "def learning_rate_decay(initial_lr=0.1, decay_type='exponential', steps=100):\n",
335
+ " print(f\"\\n6. {decay_type} learning rate decay over {steps} steps:\")\n",
336
+ " \n",
337
+ " lr_values = []\n",
338
+ " for global_step in range(steps):\n",
339
+ " if decay_type == 'exponential':\n",
340
+ " decay_rate = 0.96\n",
341
+ " lr = initial_lr * decay_rate ** (global_step / (steps/10))\n",
342
+ " elif decay_type == 'inverse_time':\n",
343
+ " decay_rate = 1.0\n",
344
+ " lr = initial_lr / (1 + decay_rate * global_step / (steps/10))\n",
345
+ " else:\n",
346
+ " raise ValueError(\"Unsupported decay type\")\n",
347
+ " lr_values.append(lr)\n",
348
+ " \n",
349
+ " plt.figure(figsize=(10, 5))\n",
350
+ " plt.plot(lr_values)\n",
351
+ " plt.title(f\"{decay_type} Learning Rate Decay\")\n",
352
+ " plt.xlabel(\"Step\")\n",
353
+ " plt.ylabel(\"Learning Rate\")\n",
354
+ " plt.grid()\n",
355
+ " plt.show()\n",
356
+ " \n",
357
+ " return lr_values"
358
+ ]
359
+ },
360
+ {
361
+ "cell_type": "code",
362
+ "execution_count": null,
363
+ "id": "759ef663",
364
+ "metadata": {},
365
+ "outputs": [],
366
+ "source": [
367
+ "lr_exp = learning_rate_decay(decay_type='exponential')\n",
368
+ "lr_inv = learning_rate_decay(decay_type='inverse_time')"
369
+ ]
370
+ },
371
+ {
372
+ "cell_type": "raw",
373
+ "id": "865922ec-db01-4b34-b848-c1971f00ef62",
374
+ "metadata": {},
375
+ "source": [
376
+ "7. Neural network with different optimizers"
377
+ ]
378
+ },
379
+ {
380
+ "cell_type": "code",
381
+ "execution_count": null,
382
+ "id": "75a65fab",
383
+ "metadata": {},
384
+ "outputs": [],
385
+ "source": [
386
+ "def compare_optimizers():\n",
387
+ " print(\"\\n7. Comparing optimizers...\")\n",
388
+ " \n",
389
+ " X, y = make_regression(n_samples=100, n_features=5, noise=0.1, random_state=42)\n",
390
+ " X = X.astype(np.float32)\n",
391
+ " y = y.astype(np.float32).reshape(-1, 1)\n",
392
+ "\n",
393
+ " X_tensor = torch.tensor(X, dtype=torch.float32)\n",
394
+ " y_tensor = torch.tensor(y, dtype=torch.float32)\n",
395
+ " \n",
396
+ " def create_model():\n",
397
+ " return torch.nn.Sequential(\n",
398
+ " torch.nn.Linear(5, 10),\n",
399
+ " torch.nn.ReLU(),\n",
400
+ " torch.nn.Linear(10, 1)\n",
401
+ " )\n",
402
+ "\n",
403
+ " optimizers = {\n",
404
+ " 'SGD': optim.SGD,\n",
405
+ " 'Adagrad': optim.Adagrad,\n",
406
+ " 'RMSprop': optim.RMSprop,\n",
407
+ " 'Adam': optim.Adam,\n",
408
+ " 'Adadelta': optim.Adadelta\n",
409
+ " }\n",
410
+ " \n",
411
+ " n_epochs = 200\n",
412
+ " results = {}\n",
413
+ " \n",
414
+ " plt.figure(figsize=(12, 6))\n",
415
+ " \n",
416
+ " for name, opt_class in optimizers.items():\n",
417
+ " model = create_model()\n",
418
+ " optimizer = opt_class(model.parameters(), lr=0.01)\n",
419
+ " \n",
420
+ " losses = []\n",
421
+ " for epoch in range(n_epochs):\n",
422
+ " optimizer.zero_grad()\n",
423
+ " y_pred = model(X_tensor)\n",
424
+ " loss = torch.nn.functional.mse_loss(y_pred, y_tensor)\n",
425
+ " loss.backward()\n",
426
+ " optimizer.step()\n",
427
+ " losses.append(loss.item())\n",
428
+ " \n",
429
+ " results[name] = losses\n",
430
+ " plt.plot(losses, label=name)\n",
431
+ " print(f\"{name}: Final loss = {losses[-1]:.4f}\")\n",
432
+ " \n",
433
+ " plt.title(\"Optimizer Comparison\")\n",
434
+ " plt.xlabel(\"Epoch\")\n",
435
+ " plt.ylabel(\"Loss\")\n",
436
+ " plt.legend()\n",
437
+ " plt.grid()\n",
438
+ " plt.show()\n",
439
+ " \n",
440
+ " return results"
441
+ ]
442
+ },
443
+ {
444
+ "cell_type": "code",
445
+ "execution_count": null,
446
+ "id": "b09a56d5",
447
+ "metadata": {},
448
+ "outputs": [],
449
+ "source": [
450
+ "optimizer_results = compare_optimizers()"
451
+ ]
452
+ },
453
+ {
454
+ "cell_type": "raw",
455
+ "id": "4bbeda82-1dc3-4964-868e-2cce413cd927",
456
+ "metadata": {},
457
+ "source": [
458
+ "8. Gradient clipping"
459
+ ]
460
+ },
461
+ {
462
+ "cell_type": "code",
463
+ "execution_count": null,
464
+ "id": "7b281491",
465
+ "metadata": {},
466
+ "outputs": [],
467
+ "source": [
468
+ "def clip_gradients(gradients, method='value', threshold=1.0):\n",
469
+ " print(f\"\\n8. Gradient clipping ({method} with threshold={threshold})...\")\n",
470
+ " \n",
471
+ " if method == 'value':\n",
472
+ " clipped = [np.clip(g, -threshold, threshold) for g in gradients]\n",
473
+ " elif method == 'norm':\n",
474
+ " global_norm = np.sqrt(sum(np.sum(g**2) for g in gradients))\n",
475
+ " scale = threshold / max(global_norm, threshold)\n",
476
+ " clipped = [g * scale for g in gradients]\n",
477
+ " else:\n",
478
+ " raise ValueError(\"Unsupported clipping method\")\n",
479
+ " \n",
480
+ " print(\"Original gradient norms:\", [np.linalg.norm(g) for g in gradients])\n",
481
+ " print(\"Clipped gradient norms:\", [np.linalg.norm(g) for g in clipped])\n",
482
+ " \n",
483
+ " return clipped"
484
+ ]
485
+ },
486
+ {
487
+ "cell_type": "code",
488
+ "execution_count": null,
489
+ "id": "c09dd387",
490
+ "metadata": {},
491
+ "outputs": [],
492
+ "source": [
493
+ "gradients = [np.random.randn(3,4), np.random.randn(4,1)]\n",
494
+ "clipped_value = clip_gradients(gradients, method='value', threshold=1.0)\n",
495
+ "clipped_norm = clip_gradients(gradients, method='norm', threshold=1.0)"
496
+ ]
497
+ },
498
+ {
499
+ "cell_type": "raw",
500
+ "id": "6680e526-6710-481c-bccf-a0ab4b643eac",
501
+ "metadata": {},
502
+ "source": [
503
+ "9. Hessian-Free Optimization"
504
+ ]
505
+ },
506
+ {
507
+ "cell_type": "code",
508
+ "execution_count": null,
509
+ "id": "500ee068",
510
+ "metadata": {},
511
+ "outputs": [],
512
+ "source": [
513
+ "def hessian_free_optimization():\n",
514
+ " print(\"\\n9. Hessian-Free Optimization...\")\n",
515
+ " \n",
516
+ " print(\"PyTorch implementation:\")\n",
517
+ " x = torch.randn(3, requires_grad=True, dtype=torch.double)\n",
518
+ " \n",
519
+ " def f(x):\n",
520
+ " return x @ torch.diag(torch.tensor([1.0, 2.0, 3.0], dtype=torch.double)) @ x\n",
521
+ " \n",
522
+ " grad = torch.autograd.grad(f(x), x, create_graph=True)[0]\n",
523
+ " \n",
524
+ " def hvp(v):\n",
525
+ " return torch.autograd.grad(grad @ v, x, retain_graph=True)[0]\n",
526
+ " \n",
527
+ " def cg_solve(A, b, max_iter=10, tol=1e-6):\n",
528
+ " x = torch.zeros_like(b)\n",
529
+ " r = b - A(x)\n",
530
+ " p = r.clone()\n",
531
+ " rsold = r @ r\n",
532
+ " \n",
533
+ " for i in range(max_iter):\n",
534
+ " Ap = A(p)\n",
535
+ " alpha = rsold / (p @ Ap)\n",
536
+ " x = x + alpha * p\n",
537
+ " r = r - alpha * Ap\n",
538
+ " rsnew = r @ r\n",
539
+ " if torch.sqrt(rsnew) < tol:\n",
540
+ " break\n",
541
+ " p = r + (rsnew / rsold) * p\n",
542
+ " rsold = rsnew\n",
543
+ " \n",
544
+ " return x\n",
545
+ " \n",
546
+ " v = cg_solve(hvp, -grad)\n",
547
+ " print(\"Solution v:\", v.detach().numpy())"
548
+ ]
549
+ },
550
+ {
551
+ "cell_type": "code",
552
+ "execution_count": null,
553
+ "id": "5401b781",
554
+ "metadata": {},
555
+ "outputs": [],
556
+ "source": [
557
+ "hessian_free_optimization()"
558
+ ]
559
+ },
560
+ {
561
+ "cell_type": "raw",
562
+ "id": "fe14e90e-d4a2-4d37-a27c-94b632a8c5a1",
563
+ "metadata": {},
564
+ "source": [
565
+ "10. Hessian matrix analysis"
566
+ ]
567
+ },
568
+ {
569
+ "cell_type": "code",
570
+ "execution_count": null,
571
+ "id": "cbc5032d",
572
+ "metadata": {},
573
+ "outputs": [],
574
+ "source": [
575
+ "def hessian_analysis():\n",
576
+ " print(\"\\n10. Hessian matrix analysis...\")\n",
577
+ " \n",
578
+ " def f(x):\n",
579
+ " return x[0]**2 + x[1]**3 - x[1]**2\n",
580
+ " \n",
581
+ " critical_point = np.array([0, 2/3])\n",
582
+ " \n",
583
+ " eps = 1e-5\n",
584
+ " hessian = np.zeros((2, 2))\n",
585
+ " \n",
586
+ " for i in range(2):\n",
587
+ " for j in range(2):\n",
588
+ " def partial_derivative(x):\n",
589
+ " x_plus = x.copy()\n",
590
+ " x_plus[j] += eps\n",
591
+ " x_minus = x.copy()\n",
592
+ " x_minus[j] -= eps\n",
593
+ " return (f(x_plus) - f(x_minus)) / (2 * eps)\n",
594
+ " \n",
595
+ " x_plus = critical_point.copy()\n",
596
+ " x_plus[i] += eps\n",
597
+ " x_minus = critical_point.copy()\n",
598
+ " x_minus[i] -= eps\n",
599
+ " hessian[i,j] = (partial_derivative(x_plus) - partial_derivative(x_minus)) / (2 * eps)\n",
600
+ " \n",
601
+ " print(\"Hessian matrix:\\n\", hessian)\n",
602
+ " \n",
603
+ " eigenvalues = np.linalg.eigvals(hessian)\n",
604
+ " print(\"Eigenvalues:\", eigenvalues)\n",
605
+ " \n",
606
+ " if all(eig > 0 for eig in eigenvalues):\n",
607
+ " print(\"Conclusion: Local minimum\")\n",
608
+ " elif all(eig < 0 for eig in eigenvalues):\n",
609
+ " print(\"Conclusion: Local maximum\")\n",
610
+ " else:\n",
611
+ " print(\"Conclusion: Saddle point\")"
612
+ ]
613
+ },
614
+ {
615
+ "cell_type": "code",
616
+ "execution_count": null,
617
+ "id": "8a2503c5",
618
+ "metadata": {},
619
+ "outputs": [],
620
+ "source": [
621
+ "hessian_analysis()"
622
+ ]
623
+ }
624
+ ],
625
+ "metadata": {
626
+ "kernelspec": {
627
+ "display_name": "Python 3 (ipykernel)",
628
+ "language": "python",
629
+ "name": "python3"
630
+ },
631
+ "language_info": {
632
+ "codemirror_mode": {
633
+ "name": "ipython",
634
+ "version": 3
635
+ },
636
+ "file_extension": ".py",
637
+ "mimetype": "text/x-python",
638
+ "name": "python",
639
+ "nbconvert_exporter": "python",
640
+ "pygments_lexer": "ipython3",
641
+ "version": "3.12.4"
642
+ }
643
+ },
644
+ "nbformat": 4,
645
+ "nbformat_minor": 5
646
+ }