noshot 12.0.0__py3-none-any.whl → 14.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. noshot/data/DLE FSD BDA/BDA/BDA Lab/Ex1/input.txt +1 -0
  2. noshot/data/DLE FSD BDA/BDA/BDA Lab/Ex1/mapper.py +6 -0
  3. noshot/data/DLE FSD BDA/BDA/BDA Lab/Ex1/reducer.py +22 -0
  4. noshot/data/DLE FSD BDA/BDA/BDA Lab/Ex2/Weatherdataset.csv +200 -0
  5. noshot/data/DLE FSD BDA/BDA/BDA Lab/Ex2/mapper.py +20 -0
  6. noshot/data/DLE FSD BDA/BDA/BDA Lab/Ex2/reducer.py +32 -0
  7. noshot/data/DLE FSD BDA/BDA/BDA Lab/Ex3/BF_Map.py +11 -0
  8. noshot/data/DLE FSD BDA/BDA/BDA Lab/Ex3/BF_Red.py +30 -0
  9. noshot/data/DLE FSD BDA/BDA/BDA Lab/Ex3/bloom_filter.py +71 -0
  10. noshot/data/DLE FSD BDA/BDA/BDA Lab/Ex3/bloom_filter_mapper.py +71 -0
  11. noshot/data/DLE FSD BDA/BDA/BDA Lab/Ex3/bloom_filter_reducer.py +71 -0
  12. noshot/data/DLE FSD BDA/BDA/BDA Lab/Ex3/weblog.csv +100 -0
  13. noshot/data/DLE FSD BDA/BDA/BDA Lab/Ex4/FMA_mapper.py +14 -0
  14. noshot/data/DLE FSD BDA/BDA/BDA Lab/Ex4/FMA_reducer.py +14 -0
  15. noshot/data/DLE FSD BDA/BDA/BDA Lab/Ex4/Tweets.csv +92 -0
  16. noshot/data/DLE FSD BDA/BDA/BDA Lab/Instructions.txt +56 -0
  17. noshot/data/DLE FSD BDA/BDA/BDA Lab.iso +0 -0
  18. noshot/data/DLE FSD BDA/DLE/DLE 2 (tim stan s)/1.1 DNN (Pytorch).ipynb +164 -0
  19. noshot/data/DLE FSD BDA/DLE/DLE 2 (tim stan s)/1.2 DNN (Tensorflow).ipynb +94 -0
  20. noshot/data/DLE FSD BDA/DLE/DLE 2 (tim stan s)/1.3 DNN (Image Classification).ipynb +134 -0
  21. noshot/data/DLE FSD BDA/DLE/DLE 2 (tim stan s)/2.1 DNN vs CNN.ipynb +127 -0
  22. noshot/data/DLE FSD BDA/DLE/DLE 2 (tim stan s)/2.2 DNN vs CNN.ipynb +123 -0
  23. noshot/data/DLE FSD BDA/DLE/DLE 2 (tim stan s)/3 Bounding Boxes.ipynb +109 -0
  24. noshot/data/DLE FSD BDA/DLE/DLE 2 (tim stan s)/4. FCNN (Image Segmentation).ipynb +108 -0
  25. noshot/data/DLE FSD BDA/DLE/DLE 2 (tim stan s)/Lab Excercise (Training DNN).ipynb +646 -0
  26. noshot/data/DLE FSD BDA/DLE/DLE 2 (tim stan s)/Load-Images.ipynb +553 -0
  27. noshot/data/DLE FSD BDA/DLE/DLE 3 (sonic boy)/Ex1.ipynb +216 -0
  28. noshot/data/DLE FSD BDA/DLE/DLE 3 (sonic boy)/Ex2.ipynb +195 -0
  29. noshot/data/DLE FSD BDA/DLE/DLE 3 (sonic boy)/Ex3.ipynb +427 -0
  30. noshot/data/DLE FSD BDA/DLE/DLE 3 (sonic boy)/Ex4.ipynb +186 -0
  31. noshot/data/DLE FSD BDA/DLE/DLE 4 (senior)/Exp01/DNN Ex No 1.ipynb +398 -0
  32. noshot/data/DLE FSD BDA/DLE/DLE 4 (senior)/Exp01/Ex No 1 Build in dataset.ipynb +171 -0
  33. noshot/data/DLE FSD BDA/DLE/DLE 4 (senior)/Exp01/Exp1-Short-DL_ANN_ImageClassification.ipynb +401 -0
  34. noshot/data/DLE FSD BDA/DLE/DLE 4 (senior)/Exp01/OR GATE .ipynb +8511 -0
  35. noshot/data/DLE FSD BDA/DLE/DLE 4 (senior)/Exp02/Exp2-Short-DL_CNN_ImageClassification.ipynb +737 -0
  36. noshot/data/DLE FSD BDA/DLE/DLE 4 (senior)/Exp03/DL-Ex3-RNN.ipynb +591 -0
  37. noshot/data/DLE FSD BDA/DLE/DLE 4 (senior)/Exp04/Ex no 4.ipynb +551 -0
  38. {noshot-12.0.0.dist-info → noshot-14.0.0.dist-info}/METADATA +1 -1
  39. noshot-14.0.0.dist-info/RECORD +50 -0
  40. noshot-12.0.0.dist-info/RECORD +0 -13
  41. /noshot/data/DLE FSD BDA/DLE/{1. DNN (Image Classification).ipynb → DLE 1 (Json)/1. DNN (Image Classification).ipynb} +0 -0
  42. /noshot/data/DLE FSD BDA/DLE/{2. DNN vs CNN.ipynb → DLE 1 (Json)/2. DNN vs CNN.ipynb} +0 -0
  43. /noshot/data/DLE FSD BDA/DLE/{3. CNN (Object Detecrion).ipynb → DLE 1 (Json)/3. CNN (Object Detecrion).ipynb} +0 -0
  44. /noshot/data/DLE FSD BDA/DLE/{4. FCN (Image Segmentaion).ipynb → DLE 1 (Json)/4. FCN (Image Segmentaion).ipynb} +0 -0
  45. {noshot-12.0.0.dist-info → noshot-14.0.0.dist-info}/WHEEL +0 -0
  46. {noshot-12.0.0.dist-info → noshot-14.0.0.dist-info}/licenses/LICENSE.txt +0 -0
  47. {noshot-12.0.0.dist-info → noshot-14.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,646 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "raw",
5
+ "id": "fbc03c64-ef16-4473-b581-fc27a0fc5e37",
6
+ "metadata": {},
7
+ "source": [
8
+ "1. Print the pre-activated values and post-activated values of a neural network\n",
9
+ "2. Check the correctness of backpropagation\n",
10
+ "3. Standardise the values\n",
11
+ "4. Normalise the values by keeping min as 10 and max as 50\n",
12
+ "5. Initialize the weights using normal and uniform xavier initialization\n",
13
+ "5. Apply the variants of ReLU for the preactivated values and print the results\n",
14
+ "6. Apply exponential decay and inverse time decay for adjusting the initial learning rate\n",
15
+ "7. Implement a feed-forward neural network with different optimizers (SGD, Adagrad, RMSProp, RMSProp with Nesterov momentum, AdaDelta and Adam) and compare the results\n",
16
+ "8. Clip the gradients using value & norm based clipping\n",
17
+ "9. Implement Hessian-Free Optimization (HFO) by computing the Hessian-vector product (HVP) using PyTorch’s autograd, Tensorflow's tf.GradientTape (nested) and solve the Newton system Hv=−∇f using the Conjugate Gradient (CG) method.\n",
18
+ "10. Compute the Hessian matrix of a scalar function and determine if a critical point is a saddle point, local minimum, or local maximum by analyzing the eigenvalues of the Hessian"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": null,
24
+ "id": "489759c2",
25
+ "metadata": {},
26
+ "outputs": [],
27
+ "source": [
28
+ "import numpy as np\n",
29
+ "import torch\n",
30
+ "import torch.optim as optim\n",
31
+ "import tensorflow as tf\n",
32
+ "from sklearn.preprocessing import StandardScaler, MinMaxScaler\n",
33
+ "from sklearn.datasets import make_regression\n",
34
+ "import matplotlib.pyplot as plt"
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "code",
39
+ "execution_count": null,
40
+ "id": "7e92ecaa",
41
+ "metadata": {},
42
+ "outputs": [],
43
+ "source": [
44
+ "np.random.seed(42)\n",
45
+ "torch.manual_seed(42)\n",
46
+ "tf.random.set_seed(42)\n",
47
+ "\n",
48
+ "X = np.random.randn(10, 3)\n",
49
+ "y = np.random.randn(10, 1)"
50
+ ]
51
+ },
52
+ {
53
+ "cell_type": "raw",
54
+ "id": "d4c1f591-424f-44d0-bc84-c04f8839fc8f",
55
+ "metadata": {},
56
+ "source": [
57
+ "1. Print pre-activated and post-activated values"
58
+ ]
59
+ },
60
+ {
61
+ "cell_type": "code",
62
+ "execution_count": null,
63
+ "id": "7388227e",
64
+ "metadata": {},
65
+ "outputs": [],
66
+ "source": [
67
+ "def print_activations(X, W, b, activation='relu'):\n",
68
+ " pre_activation = np.dot(X, W) + b\n",
69
+ " print(\"\\n1. Pre-activated values:\\n\", pre_activation)\n",
70
+ " \n",
71
+ " if activation == 'relu':\n",
72
+ " post_activation = np.maximum(0, pre_activation)\n",
73
+ " elif activation == 'sigmoid':\n",
74
+ " post_activation = 1 / (1 + np.exp(-pre_activation))\n",
75
+ " elif activation == 'tanh':\n",
76
+ " post_activation = np.tanh(pre_activation)\n",
77
+ " else:\n",
78
+ " raise ValueError(\"Unsupported activation function\")\n",
79
+ " \n",
80
+ " print(\"\\nPost-activated values:\\n\", post_activation)\n",
81
+ " return pre_activation, post_activation"
82
+ ]
83
+ },
84
+ {
85
+ "cell_type": "code",
86
+ "execution_count": null,
87
+ "id": "6bcc03bf",
88
+ "metadata": {},
89
+ "outputs": [],
90
+ "source": [
91
+ "W = np.random.randn(3, 4)\n",
92
+ "b = np.random.randn(4)\n",
93
+ "pre_act, post_act = print_activations(X, W, b, activation='relu')"
94
+ ]
95
+ },
96
+ {
97
+ "cell_type": "raw",
98
+ "id": "4cfb185b-a048-4806-879e-ad5af75b2299",
99
+ "metadata": {},
100
+ "source": [
101
+ "2. Check backpropagation correctness"
102
+ ]
103
+ },
104
+ {
105
+ "cell_type": "code",
106
+ "execution_count": null,
107
+ "id": "ca501ee2",
108
+ "metadata": {},
109
+ "outputs": [],
110
+ "source": [
111
+ "def check_backprop():\n",
112
+ " print(\"\\n2. Checking backpropagation...\")\n",
113
+ " \n",
114
+ " model = torch.nn.Sequential(\n",
115
+ " torch.nn.Linear(2, 3),\n",
116
+ " torch.nn.Tanh(),\n",
117
+ " torch.nn.Linear(3, 1)\n",
118
+ " ).double()\n",
119
+ " \n",
120
+ " X = torch.randn(5, 2, dtype=torch.double, requires_grad=True)\n",
121
+ " y = torch.randn(5, 1, dtype=torch.double)\n",
122
+ " \n",
123
+ " test_input = torch.autograd.gradcheck(\n",
124
+ " lambda x: torch.nn.functional.mse_loss(model(x), y),\n",
125
+ " X,\n",
126
+ " eps=1e-6,\n",
127
+ " atol=1e-4,\n",
128
+ " rtol=1e-4,\n",
129
+ " raise_exception=False\n",
130
+ " )\n",
131
+ " print(\"Input gradient check passed:\", test_input)\n",
132
+ " \n",
133
+ " for name, param in model.named_parameters():\n",
134
+ " if param.requires_grad:\n",
135
+ " def func(input):\n",
136
+ " with torch.no_grad():\n",
137
+ " old_data = param.data.clone()\n",
138
+ " param.data.copy_(input)\n",
139
+ " output = model(X)\n",
140
+ " loss = torch.nn.functional.mse_loss(output, y)\n",
141
+ " with torch.no_grad():\n",
142
+ " param.data.copy_(old_data)\n",
143
+ " return loss\n",
144
+ " \n",
145
+ " test_param = torch.autograd.gradcheck(\n",
146
+ " func,\n",
147
+ " param.data.clone().requires_grad_(True),\n",
148
+ " eps=1e-6,\n",
149
+ " atol=1e-4,\n",
150
+ " rtol=1e-4,\n",
151
+ " raise_exception=False\n",
152
+ " )\n",
153
+ " print(f\"Parameter {name} gradient check passed:\", test_param)"
154
+ ]
155
+ },
156
+ {
157
+ "cell_type": "code",
158
+ "execution_count": null,
159
+ "id": "b2f0082f",
160
+ "metadata": {},
161
+ "outputs": [],
162
+ "source": [
163
+ "check_backprop()"
164
+ ]
165
+ },
166
+ {
167
+ "cell_type": "raw",
168
+ "id": "b5a4397a-fd32-4a34-a14d-8c777be04812",
169
+ "metadata": {},
170
+ "source": [
171
+ "3. Standardize values"
172
+ ]
173
+ },
174
+ {
175
+ "cell_type": "code",
176
+ "execution_count": null,
177
+ "id": "75ec2b4b",
178
+ "metadata": {},
179
+ "outputs": [],
180
+ "source": [
181
+ "def standardize_data(X):\n",
182
+ " print(\"\\n3. Standardizing data...\")\n",
183
+ " scaler = StandardScaler()\n",
184
+ " X_std = scaler.fit_transform(X)\n",
185
+ " print(\"Mean after standardization:\", X_std.mean(axis=0))\n",
186
+ " print(\"Std after standardization:\", X_std.std(axis=0))\n",
187
+ " return X_std"
188
+ ]
189
+ },
190
+ {
191
+ "cell_type": "code",
192
+ "execution_count": null,
193
+ "id": "af896e94",
194
+ "metadata": {},
195
+ "outputs": [],
196
+ "source": [
197
+ "X_std = standardize_data(X)"
198
+ ]
199
+ },
200
+ {
201
+ "cell_type": "raw",
202
+ "id": "116879b1-151c-4bcd-8ace-cab5d5daaa2f",
203
+ "metadata": {},
204
+ "source": [
205
+ "4. Normalize values (min=10, max=50)"
206
+ ]
207
+ },
208
+ {
209
+ "cell_type": "code",
210
+ "execution_count": null,
211
+ "id": "4d77159d",
212
+ "metadata": {},
213
+ "outputs": [],
214
+ "source": [
215
+ "def normalize_data(X):\n",
216
+ " print(\"\\n4. Normalizing data (min=10, max=50)...\")\n",
217
+ " scaler = MinMaxScaler(feature_range=(10, 50))\n",
218
+ " X_norm = scaler.fit_transform(X)\n",
219
+ " print(\"Min after normalization:\", X_norm.min(axis=0))\n",
220
+ " print(\"Max after normalization:\", X_norm.max(axis=0))\n",
221
+ " return X_norm"
222
+ ]
223
+ },
224
+ {
225
+ "cell_type": "code",
226
+ "execution_count": null,
227
+ "id": "d0d4596e",
228
+ "metadata": {},
229
+ "outputs": [],
230
+ "source": [
231
+ "X_norm = normalize_data(X)"
232
+ ]
233
+ },
234
+ {
235
+ "cell_type": "raw",
236
+ "id": "3e39dd60-9f56-48be-9d9f-2106a8209ea4",
237
+ "metadata": {},
238
+ "source": [
239
+ "5. Xavier initialization"
240
+ ]
241
+ },
242
+ {
243
+ "cell_type": "code",
244
+ "execution_count": null,
245
+ "id": "2132e8d4",
246
+ "metadata": {},
247
+ "outputs": [],
248
+ "source": [
249
+ "def xavier_init(n_input, n_output):\n",
250
+ " print(\"\\n5. Xavier initialization...\")\n",
251
+ " std_normal = np.sqrt(2.0 / (n_input + n_output))\n",
252
+ " weights_normal = np.random.normal(0, std_normal, (n_input, n_output))\n",
253
+ " \n",
254
+ " limit = np.sqrt(6.0 / (n_input + n_output))\n",
255
+ " weights_uniform = np.random.uniform(-limit, limit, (n_input, n_output))\n",
256
+ " \n",
257
+ " print(\"Xavier Normal weights mean/std:\", weights_normal.mean(), weights_normal.std())\n",
258
+ " print(\"Xavier Uniform weights min/max:\", weights_uniform.min(), weights_uniform.max())\n",
259
+ " return weights_normal, weights_uniform"
260
+ ]
261
+ },
262
+ {
263
+ "cell_type": "code",
264
+ "execution_count": null,
265
+ "id": "96b7c91b",
266
+ "metadata": {},
267
+ "outputs": [],
268
+ "source": [
269
+ "weights_normal, weights_uniform = xavier_init(3, 4)"
270
+ ]
271
+ },
272
+ {
273
+ "cell_type": "raw",
274
+ "id": "2273476f-e51c-48a0-9426-cf8624b6e963",
275
+ "metadata": {},
276
+ "source": [
277
+ "5. ReLU variants"
278
+ ]
279
+ },
280
+ {
281
+ "cell_type": "code",
282
+ "execution_count": null,
283
+ "id": "e4805aa6",
284
+ "metadata": {},
285
+ "outputs": [],
286
+ "source": [
287
+ "def apply_relu_variants(pre_activation):\n",
288
+ " print(\"\\n5. Applying ReLU variants...\")\n",
289
+ " relu = np.maximum(0, pre_activation)\n",
290
+ " \n",
291
+ " leaky = np.where(pre_activation > 0, pre_activation, pre_activation * 0.01)\n",
292
+ " \n",
293
+ " parametric = np.where(pre_activation > 0, pre_activation, pre_activation * 0.25)\n",
294
+ " \n",
295
+ " elu = np.where(pre_activation > 0, pre_activation, 1.0 * (np.exp(pre_activation) - 1))\n",
296
+ "\n",
297
+ " swish = pre_activation * (1 / (1 + np.exp(-pre_activation)))\n",
298
+ " \n",
299
+ " print(\"Original values:\\n\", pre_activation)\n",
300
+ " print(\"\\nReLU:\\n\", relu)\n",
301
+ " print(\"\\nLeaky ReLU:\\n\", leaky)\n",
302
+ " print(\"\\nParametric ReLU:\\n\", parametric)\n",
303
+ " print(\"\\nELU:\\n\", elu)\n",
304
+ " print(\"\\nSwish:\\n\", swish)\n",
305
+ " \n",
306
+ " return {'relu': relu, 'leaky': leaky, 'parametric': parametric, 'elu': elu, 'swish': swish}"
307
+ ]
308
+ },
309
+ {
310
+ "cell_type": "code",
311
+ "execution_count": null,
312
+ "id": "d91f44d1",
313
+ "metadata": {},
314
+ "outputs": [],
315
+ "source": [
316
+ "relu_results = apply_relu_variants(pre_act)"
317
+ ]
318
+ },
319
+ {
320
+ "cell_type": "raw",
321
+ "id": "6db6a2dc-1cf5-4f96-ba2b-e6d7ad219c38",
322
+ "metadata": {},
323
+ "source": [
324
+ "6. Learning rate decay"
325
+ ]
326
+ },
327
+ {
328
+ "cell_type": "code",
329
+ "execution_count": null,
330
+ "id": "f6903dab",
331
+ "metadata": {},
332
+ "outputs": [],
333
+ "source": [
334
+ "def learning_rate_decay(initial_lr=0.1, decay_type='exponential', steps=100):\n",
335
+ " print(f\"\\n6. {decay_type} learning rate decay over {steps} steps:\")\n",
336
+ " \n",
337
+ " lr_values = []\n",
338
+ " for global_step in range(steps):\n",
339
+ " if decay_type == 'exponential':\n",
340
+ " decay_rate = 0.96\n",
341
+ " lr = initial_lr * decay_rate ** (global_step / (steps/10))\n",
342
+ " elif decay_type == 'inverse_time':\n",
343
+ " decay_rate = 1.0\n",
344
+ " lr = initial_lr / (1 + decay_rate * global_step / (steps/10))\n",
345
+ " else:\n",
346
+ " raise ValueError(\"Unsupported decay type\")\n",
347
+ " lr_values.append(lr)\n",
348
+ " \n",
349
+ " plt.figure(figsize=(10, 5))\n",
350
+ " plt.plot(lr_values)\n",
351
+ " plt.title(f\"{decay_type} Learning Rate Decay\")\n",
352
+ " plt.xlabel(\"Step\")\n",
353
+ " plt.ylabel(\"Learning Rate\")\n",
354
+ " plt.grid()\n",
355
+ " plt.show()\n",
356
+ " \n",
357
+ " return lr_values"
358
+ ]
359
+ },
360
+ {
361
+ "cell_type": "code",
362
+ "execution_count": null,
363
+ "id": "759ef663",
364
+ "metadata": {},
365
+ "outputs": [],
366
+ "source": [
367
+ "lr_exp = learning_rate_decay(decay_type='exponential')\n",
368
+ "lr_inv = learning_rate_decay(decay_type='inverse_time')"
369
+ ]
370
+ },
371
+ {
372
+ "cell_type": "raw",
373
+ "id": "865922ec-db01-4b34-b848-c1971f00ef62",
374
+ "metadata": {},
375
+ "source": [
376
+ "7. Neural network with different optimizers"
377
+ ]
378
+ },
379
+ {
380
+ "cell_type": "code",
381
+ "execution_count": null,
382
+ "id": "75a65fab",
383
+ "metadata": {},
384
+ "outputs": [],
385
+ "source": [
386
+ "def compare_optimizers():\n",
387
+ " print(\"\\n7. Comparing optimizers...\")\n",
388
+ " \n",
389
+ " X, y = make_regression(n_samples=100, n_features=5, noise=0.1, random_state=42)\n",
390
+ " X = X.astype(np.float32)\n",
391
+ " y = y.astype(np.float32).reshape(-1, 1)\n",
392
+ "\n",
393
+ " X_tensor = torch.tensor(X, dtype=torch.float32)\n",
394
+ " y_tensor = torch.tensor(y, dtype=torch.float32)\n",
395
+ " \n",
396
+ " def create_model():\n",
397
+ " return torch.nn.Sequential(\n",
398
+ " torch.nn.Linear(5, 10),\n",
399
+ " torch.nn.ReLU(),\n",
400
+ " torch.nn.Linear(10, 1)\n",
401
+ " )\n",
402
+ "\n",
403
+ " optimizers = {\n",
404
+ " 'SGD': optim.SGD,\n",
405
+ " 'Adagrad': optim.Adagrad,\n",
406
+ " 'RMSprop': optim.RMSprop,\n",
407
+ " 'Adam': optim.Adam,\n",
408
+ " 'Adadelta': optim.Adadelta\n",
409
+ " }\n",
410
+ " \n",
411
+ " n_epochs = 200\n",
412
+ " results = {}\n",
413
+ " \n",
414
+ " plt.figure(figsize=(12, 6))\n",
415
+ " \n",
416
+ " for name, opt_class in optimizers.items():\n",
417
+ " model = create_model()\n",
418
+ " optimizer = opt_class(model.parameters(), lr=0.01)\n",
419
+ " \n",
420
+ " losses = []\n",
421
+ " for epoch in range(n_epochs):\n",
422
+ " optimizer.zero_grad()\n",
423
+ " y_pred = model(X_tensor)\n",
424
+ " loss = torch.nn.functional.mse_loss(y_pred, y_tensor)\n",
425
+ " loss.backward()\n",
426
+ " optimizer.step()\n",
427
+ " losses.append(loss.item())\n",
428
+ " \n",
429
+ " results[name] = losses\n",
430
+ " plt.plot(losses, label=name)\n",
431
+ " print(f\"{name}: Final loss = {losses[-1]:.4f}\")\n",
432
+ " \n",
433
+ " plt.title(\"Optimizer Comparison\")\n",
434
+ " plt.xlabel(\"Epoch\")\n",
435
+ " plt.ylabel(\"Loss\")\n",
436
+ " plt.legend()\n",
437
+ " plt.grid()\n",
438
+ " plt.show()\n",
439
+ " \n",
440
+ " return results"
441
+ ]
442
+ },
443
+ {
444
+ "cell_type": "code",
445
+ "execution_count": null,
446
+ "id": "b09a56d5",
447
+ "metadata": {},
448
+ "outputs": [],
449
+ "source": [
450
+ "optimizer_results = compare_optimizers()"
451
+ ]
452
+ },
453
+ {
454
+ "cell_type": "raw",
455
+ "id": "4bbeda82-1dc3-4964-868e-2cce413cd927",
456
+ "metadata": {},
457
+ "source": [
458
+ "8. Gradient clipping"
459
+ ]
460
+ },
461
+ {
462
+ "cell_type": "code",
463
+ "execution_count": null,
464
+ "id": "7b281491",
465
+ "metadata": {},
466
+ "outputs": [],
467
+ "source": [
468
+ "def clip_gradients(gradients, method='value', threshold=1.0):\n",
469
+ " print(f\"\\n8. Gradient clipping ({method} with threshold={threshold})...\")\n",
470
+ " \n",
471
+ " if method == 'value':\n",
472
+ " clipped = [np.clip(g, -threshold, threshold) for g in gradients]\n",
473
+ " elif method == 'norm':\n",
474
+ " global_norm = np.sqrt(sum(np.sum(g**2) for g in gradients))\n",
475
+ " scale = threshold / max(global_norm, threshold)\n",
476
+ " clipped = [g * scale for g in gradients]\n",
477
+ " else:\n",
478
+ " raise ValueError(\"Unsupported clipping method\")\n",
479
+ " \n",
480
+ " print(\"Original gradient norms:\", [np.linalg.norm(g) for g in gradients])\n",
481
+ " print(\"Clipped gradient norms:\", [np.linalg.norm(g) for g in clipped])\n",
482
+ " \n",
483
+ " return clipped"
484
+ ]
485
+ },
486
+ {
487
+ "cell_type": "code",
488
+ "execution_count": null,
489
+ "id": "c09dd387",
490
+ "metadata": {},
491
+ "outputs": [],
492
+ "source": [
493
+ "gradients = [np.random.randn(3,4), np.random.randn(4,1)]\n",
494
+ "clipped_value = clip_gradients(gradients, method='value', threshold=1.0)\n",
495
+ "clipped_norm = clip_gradients(gradients, method='norm', threshold=1.0)"
496
+ ]
497
+ },
498
+ {
499
+ "cell_type": "raw",
500
+ "id": "6680e526-6710-481c-bccf-a0ab4b643eac",
501
+ "metadata": {},
502
+ "source": [
503
+ "9. Hessian-Free Optimization"
504
+ ]
505
+ },
506
+ {
507
+ "cell_type": "code",
508
+ "execution_count": null,
509
+ "id": "500ee068",
510
+ "metadata": {},
511
+ "outputs": [],
512
+ "source": [
513
+ "def hessian_free_optimization():\n",
514
+ " print(\"\\n9. Hessian-Free Optimization...\")\n",
515
+ " \n",
516
+ " print(\"PyTorch implementation:\")\n",
517
+ " x = torch.randn(3, requires_grad=True, dtype=torch.double)\n",
518
+ " \n",
519
+ " def f(x):\n",
520
+ " return x @ torch.diag(torch.tensor([1.0, 2.0, 3.0], dtype=torch.double)) @ x\n",
521
+ " \n",
522
+ " grad = torch.autograd.grad(f(x), x, create_graph=True)[0]\n",
523
+ " \n",
524
+ " def hvp(v):\n",
525
+ " return torch.autograd.grad(grad @ v, x, retain_graph=True)[0]\n",
526
+ " \n",
527
+ " def cg_solve(A, b, max_iter=10, tol=1e-6):\n",
528
+ " x = torch.zeros_like(b)\n",
529
+ " r = b - A(x)\n",
530
+ " p = r.clone()\n",
531
+ " rsold = r @ r\n",
532
+ " \n",
533
+ " for i in range(max_iter):\n",
534
+ " Ap = A(p)\n",
535
+ " alpha = rsold / (p @ Ap)\n",
536
+ " x = x + alpha * p\n",
537
+ " r = r - alpha * Ap\n",
538
+ " rsnew = r @ r\n",
539
+ " if torch.sqrt(rsnew) < tol:\n",
540
+ " break\n",
541
+ " p = r + (rsnew / rsold) * p\n",
542
+ " rsold = rsnew\n",
543
+ " \n",
544
+ " return x\n",
545
+ " \n",
546
+ " v = cg_solve(hvp, -grad)\n",
547
+ " print(\"Solution v:\", v.detach().numpy())"
548
+ ]
549
+ },
550
+ {
551
+ "cell_type": "code",
552
+ "execution_count": null,
553
+ "id": "5401b781",
554
+ "metadata": {},
555
+ "outputs": [],
556
+ "source": [
557
+ "hessian_free_optimization()"
558
+ ]
559
+ },
560
+ {
561
+ "cell_type": "raw",
562
+ "id": "fe14e90e-d4a2-4d37-a27c-94b632a8c5a1",
563
+ "metadata": {},
564
+ "source": [
565
+ "10. Hessian matrix analysis"
566
+ ]
567
+ },
568
+ {
569
+ "cell_type": "code",
570
+ "execution_count": null,
571
+ "id": "cbc5032d",
572
+ "metadata": {},
573
+ "outputs": [],
574
+ "source": [
575
+ "def hessian_analysis():\n",
576
+ " print(\"\\n10. Hessian matrix analysis...\")\n",
577
+ " \n",
578
+ " def f(x):\n",
579
+ " return x[0]**2 + x[1]**3 - x[1]**2\n",
580
+ " \n",
581
+ " critical_point = np.array([0, 2/3])\n",
582
+ " \n",
583
+ " eps = 1e-5\n",
584
+ " hessian = np.zeros((2, 2))\n",
585
+ " \n",
586
+ " for i in range(2):\n",
587
+ " for j in range(2):\n",
588
+ " def partial_derivative(x):\n",
589
+ " x_plus = x.copy()\n",
590
+ " x_plus[j] += eps\n",
591
+ " x_minus = x.copy()\n",
592
+ " x_minus[j] -= eps\n",
593
+ " return (f(x_plus) - f(x_minus)) / (2 * eps)\n",
594
+ " \n",
595
+ " x_plus = critical_point.copy()\n",
596
+ " x_plus[i] += eps\n",
597
+ " x_minus = critical_point.copy()\n",
598
+ " x_minus[i] -= eps\n",
599
+ " hessian[i,j] = (partial_derivative(x_plus) - partial_derivative(x_minus)) / (2 * eps)\n",
600
+ " \n",
601
+ " print(\"Hessian matrix:\\n\", hessian)\n",
602
+ " \n",
603
+ " eigenvalues = np.linalg.eigvals(hessian)\n",
604
+ " print(\"Eigenvalues:\", eigenvalues)\n",
605
+ " \n",
606
+ " if all(eig > 0 for eig in eigenvalues):\n",
607
+ " print(\"Conclusion: Local minimum\")\n",
608
+ " elif all(eig < 0 for eig in eigenvalues):\n",
609
+ " print(\"Conclusion: Local maximum\")\n",
610
+ " else:\n",
611
+ " print(\"Conclusion: Saddle point\")"
612
+ ]
613
+ },
614
+ {
615
+ "cell_type": "code",
616
+ "execution_count": null,
617
+ "id": "8a2503c5",
618
+ "metadata": {},
619
+ "outputs": [],
620
+ "source": [
621
+ "hessian_analysis()"
622
+ ]
623
+ }
624
+ ],
625
+ "metadata": {
626
+ "kernelspec": {
627
+ "display_name": "Python 3 (ipykernel)",
628
+ "language": "python",
629
+ "name": "python3"
630
+ },
631
+ "language_info": {
632
+ "codemirror_mode": {
633
+ "name": "ipython",
634
+ "version": 3
635
+ },
636
+ "file_extension": ".py",
637
+ "mimetype": "text/x-python",
638
+ "name": "python",
639
+ "nbconvert_exporter": "python",
640
+ "pygments_lexer": "ipython3",
641
+ "version": "3.12.4"
642
+ }
643
+ },
644
+ "nbformat": 4,
645
+ "nbformat_minor": 5
646
+ }