gradboard 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gradboard might be problematic. Click here for more details.
- gradboard/optimiser.py +1 -1
- gradboard/scheduler.py +3 -13
- {gradboard-1.0.0.dist-info → gradboard-1.0.2.dist-info}/METADATA +1 -1
- gradboard-1.0.2.dist-info/RECORD +8 -0
- gradboard-1.0.0.dist-info/RECORD +0 -8
- {gradboard-1.0.0.dist-info → gradboard-1.0.2.dist-info}/LICENSE +0 -0
- {gradboard-1.0.0.dist-info → gradboard-1.0.2.dist-info}/WHEEL +0 -0
gradboard/optimiser.py
CHANGED
|
@@ -129,7 +129,7 @@ class AdamS(Optimizer):
|
|
|
129
129
|
return loss
|
|
130
130
|
|
|
131
131
|
|
|
132
|
-
def get_optimiser(model, optimiser=AdamW, lr=
|
|
132
|
+
def get_optimiser(model, optimiser=AdamW, lr=1e-3, weight_decay=1e-2):
|
|
133
133
|
"""
|
|
134
134
|
Defaults are from one of the presets from the accompanying repo to Hassani
|
|
135
135
|
et al. (2023) "Escaping the Big Data Paradigm with Compact Transformers",
|
gradboard/scheduler.py
CHANGED
|
@@ -159,20 +159,10 @@ class PASS:
|
|
|
159
159
|
"""
|
|
160
160
|
range_test_results = self._smoothed_range_test(self.range_test_results)
|
|
161
161
|
minimum = min(range_test_results, key=lambda x: x[1])
|
|
162
|
-
min_loss = minimum[1]
|
|
163
|
-
points_left_of_min = [p for p in range_test_results if p[0] < minimum[0]]
|
|
164
|
-
highest_point_left_of_min = max(points_left_of_min, key=lambda x: x[1])
|
|
165
|
-
loss_difference = highest_point_left_of_min[1] - minimum[1]
|
|
166
|
-
cool_point_loss = min_loss + 0.8 * loss_difference
|
|
167
|
-
max_lr_loss = min_loss + 0.2 * loss_difference
|
|
168
162
|
for r in range_test_results:
|
|
169
|
-
if r[
|
|
170
|
-
self.
|
|
171
|
-
|
|
172
|
-
for r in range_test_results:
|
|
173
|
-
if r[1] <= max_lr_loss:
|
|
174
|
-
self.max_lr = r[0]
|
|
175
|
-
break
|
|
163
|
+
if (r[0] < minimum[0]) and (r[1] > minimum[1]):
|
|
164
|
+
self.max_lr = r[0] / 2
|
|
165
|
+
self.cool_point = self.max_lr * 0.1
|
|
176
166
|
print("High LR", self.max_lr)
|
|
177
167
|
print("Cool point", self.cool_point)
|
|
178
168
|
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
gradboard/__init__.py,sha256=57AkHusYwLCsusiVnajH5pMFKioRCj-3IjF9qpdOzE0,69
|
|
2
|
+
gradboard/cycles.py,sha256=iGEW3Rlp-JNyQZLfpaDXxUCAcEV01ANjLF-Fnhug-qA,10120
|
|
3
|
+
gradboard/optimiser.py,sha256=h5nntenAhD8E9-Ayt17r7AK42S7O-XdhsrxedDd_4co,6201
|
|
4
|
+
gradboard/scheduler.py,sha256=icIDEcem0jwB-NORUsaZvFNjOOn52R5g8OFBD4CYzsE,6279
|
|
5
|
+
gradboard-1.0.2.dist-info/LICENSE,sha256=0BAzJE5BqQ7Iixp_AFdB2W1uO-HCRX-Qfun8PHt6yVM,1073
|
|
6
|
+
gradboard-1.0.2.dist-info/METADATA,sha256=ZVjXU2FrPsPtuX1-OPXlZY97F4E0aaRGpOk3Uhps8BQ,2173
|
|
7
|
+
gradboard-1.0.2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
8
|
+
gradboard-1.0.2.dist-info/RECORD,,
|
gradboard-1.0.0.dist-info/RECORD
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
gradboard/__init__.py,sha256=57AkHusYwLCsusiVnajH5pMFKioRCj-3IjF9qpdOzE0,69
|
|
2
|
-
gradboard/cycles.py,sha256=iGEW3Rlp-JNyQZLfpaDXxUCAcEV01ANjLF-Fnhug-qA,10120
|
|
3
|
-
gradboard/optimiser.py,sha256=34FyFvavFLpaZhfC_nBD-WJ-hzQ05wCTdZMudhMVmbU,6201
|
|
4
|
-
gradboard/scheduler.py,sha256=R0lnCYJc3oKh3WR_5A29EfF3hkERqHEyplhYgON_k8M,6732
|
|
5
|
-
gradboard-1.0.0.dist-info/LICENSE,sha256=0BAzJE5BqQ7Iixp_AFdB2W1uO-HCRX-Qfun8PHt6yVM,1073
|
|
6
|
-
gradboard-1.0.0.dist-info/METADATA,sha256=jN1U9BRiebBQ08IG0pzkjq498Z_x6QmBogkctgR0iuk,2173
|
|
7
|
-
gradboard-1.0.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
8
|
-
gradboard-1.0.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|