ista-daslab-optimizers 1.1.6__tar.gz → 1.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ista_daslab_optimizers-1.1.6/ista_daslab_optimizers.egg-info → ista_daslab_optimizers-1.1.7}/PKG-INFO +26 -10
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/README.md +23 -8
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/ista_daslab_optimizers/__init__.py +2 -0
- ista_daslab_optimizers-1.1.7/ista_daslab_optimizers/ista_optimizer/__init__.py +5 -0
- ista_daslab_optimizers-1.1.7/ista_daslab_optimizers/ista_optimizer/ista_optimizer.py +36 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/ista_daslab_optimizers/tools.py +2 -1
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7/ista_daslab_optimizers.egg-info}/PKG-INFO +26 -10
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/ista_daslab_optimizers.egg-info/SOURCES.txt +2 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/ista_daslab_optimizers.egg-info/requires.txt +1 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/pyproject.toml +3 -1
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/LICENSE +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/MANIFEST.in +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/ista_daslab_optimizers/acdc/__init__.py +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/ista_daslab_optimizers/acdc/acdc.py +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/ista_daslab_optimizers/acdc/wd_scheduler.py +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/ista_daslab_optimizers/dense_mfac/__init__.py +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/ista_daslab_optimizers/dense_mfac/dense_core_mfac.py +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/ista_daslab_optimizers/dense_mfac/dense_mfac.py +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/ista_daslab_optimizers/micro_adam/__init__.py +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/ista_daslab_optimizers/micro_adam/micro_adam.py +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/ista_daslab_optimizers/sparse_mfac/__init__.py +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/ista_daslab_optimizers/sparse_mfac/sparse_core_mfac_w_ef.py +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/ista_daslab_optimizers/sparse_mfac/sparse_mfac.py +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/ista_daslab_optimizers.egg-info/dependency_links.txt +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/ista_daslab_optimizers.egg-info/top_level.txt +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/kernels/dense_mfac/dense_mfac.cpp +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/kernels/dense_mfac/dense_mfac_kernel.cu +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/kernels/micro_adam/micro_adam.cpp +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/kernels/micro_adam/micro_adam_asymm_block_quant.cu +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/kernels/micro_adam/micro_adam_asymm_block_quant_inv.cu +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/kernels/micro_adam/micro_adam_update.cu +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/kernels/sparse_mfac/sparse_mfac.cpp +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/kernels/sparse_mfac/sparse_mfac_LCG_kernel.cu +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/kernels/sparse_mfac/sparse_mfac_SP_kernel.cu +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/kernels/sparse_mfac_pruner/sparse_mfac_pruner.cpp +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/kernels/sparse_mfac_pruner/sparse_mfac_pruner.cu +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/kernels/tools/tools.cpp +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/kernels/tools/tools_kernel.cu +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/kernels/utils.h +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/setup.cfg +0 -0
- {ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
2
|
Name: ista_daslab_optimizers
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.7
|
|
4
4
|
Summary: Deep Learning optimizers developed in the Distributed Algorithms and Systems group (DASLab) @ Institute of Science and Technology Austria (ISTA)
|
|
5
5
|
Author-email: Ionut-Vlad Modoranu <ionut-vlad.modoranu@ist.ac.at>
|
|
6
6
|
Maintainer-email: Ionut-Vlad Modoranu <ionut-vlad.modoranu@ist.ac.at>
|
|
@@ -222,6 +222,7 @@ Requires-Dist: gpustat
|
|
|
222
222
|
Requires-Dist: timm
|
|
223
223
|
Requires-Dist: einops
|
|
224
224
|
Requires-Dist: psutil
|
|
225
|
+
Requires-Dist: fast-hadamard-transform
|
|
225
226
|
|
|
226
227
|
# ISTA DAS Lab Optimization Algorithms Package
|
|
227
228
|
This repository contains optimization algorithms for Deep Learning developed by
|
|
@@ -240,6 +241,9 @@ The repository contains code for the following optimizers published by DASLab @
|
|
|
240
241
|
- **MicroAdam**:
|
|
241
242
|
- paper: [MicroAdam: Accurate Adaptive Optimization with Low Space Overhead and Provable Convergence](https://arxiv.org/abs/2405.15593)
|
|
242
243
|
- official repository: [GitHub](https://github.com/IST-DASLab/MicroAdam)
|
|
244
|
+
- **Trion / DCT-AdamW**:
|
|
245
|
+
- paper: [FFT-based Dynamic Subspace Selection for Low-Rank Adaptive Optimization of Large Language Models](https://arxiv.org/abs/2505.17967v3)
|
|
246
|
+
- code: [GitHub](https://github.com/IST-DASLab/ISTA-DASLab-Optimizers/tree/main/ista_daslab_optimizers/fft_low_rank)
|
|
243
247
|
|
|
244
248
|
### Installation
|
|
245
249
|
To use the latest stable version of this repository, you can install via pip:
|
|
@@ -261,7 +265,8 @@ source install.sh
|
|
|
261
265
|
|
|
262
266
|
## How to use optimizers?
|
|
263
267
|
|
|
264
|
-
In this repository we provide a minimal working example for CIFAR-10 for optimizers `acdc`,
|
|
268
|
+
In this repository we provide a minimal working example for CIFAR-10 for optimizers `acdc`,
|
|
269
|
+
`dense_mfac`, `sparse_mfac` and `micro_adam`:
|
|
265
270
|
```shell
|
|
266
271
|
cd examples/cifar10
|
|
267
272
|
OPTIMIZER=micro_adam # or any other optimizer listed above
|
|
@@ -291,18 +296,29 @@ optimizer = MicroAdam(
|
|
|
291
296
|
# Versions summary:
|
|
292
297
|
|
|
293
298
|
---
|
|
299
|
+
- **1.1.7** @ October 8th, 2025:
|
|
300
|
+
- added code for `Trion & DCT-AdamW`
|
|
294
301
|
- **1.1.6** @ February 19th, 2025:
|
|
295
|
-
- do not update the parameters that have `None` gradient in method `update_model` from `tools.py`.
|
|
302
|
+
- do not update the parameters that have `None` gradient in method `update_model` from `tools.py`.
|
|
303
|
+
This is useful when using M-FAC for models with more than one classification head in the Continual Learning framework.
|
|
296
304
|
- **1.1.5** @ February 19th, 2025:
|
|
297
|
-
- adapted `DenseMFAC` for a model with multiple classification heads for Continual Learning where
|
|
305
|
+
- adapted `DenseMFAC` for a model with multiple classification heads for Continual Learning where
|
|
306
|
+
we have one feature extractor block and a list of classification heads. The issue was related to
|
|
307
|
+
the model size, which included the feature extractor backbone and all classification heads, but
|
|
308
|
+
in practice only one classification head will be used for training and inference. This caused some
|
|
309
|
+
size mismatch errors at runtime in the `DenseCoreMFAC` module because the gradient at runtime had
|
|
310
|
+
fewer entries than the entire model. When using `DenseMFAC` for such settings, set `optimizer.model_size`
|
|
311
|
+
to the correct size after calling the constructor and the `DenseCoreMFAC` object will be created
|
|
312
|
+
automatically in the `step` function.
|
|
298
313
|
- **1.1.3** @ September 5th, 2024:
|
|
299
314
|
- allow using `SparseCoreMFACwithEF` separately by importing it in `sparse_mfac.__init__.py`
|
|
300
315
|
- **1.1.2** @ August 1st, 2024:
|
|
301
|
-
- ***[1.1.0]:*** added support to densify the final update: introduced parameter alpha that controls
|
|
302
|
-
(EF) to be integrated into the update to make it dense. Finally, the
|
|
303
|
-
the expense of another call to `Qinv` and `Q` (and
|
|
304
|
-
|
|
305
|
-
|
|
316
|
+
- ***[1.1.0]:*** added support to densify the final update: introduced parameter alpha that controls
|
|
317
|
+
the fraction of error feedback (EF) to be integrated into the update to make it dense. Finally, the
|
|
318
|
+
fraction alpha will be discarded from the EF at the expense of another call to `Qinv` and `Q` (and
|
|
319
|
+
implicitly quantization statistics computation).
|
|
320
|
+
- ***[1.0.2]:*** added FSDP-compatible implementation by initializing the parameter states in the
|
|
321
|
+
`update_step` method instead of MicroAdam constructor
|
|
306
322
|
- **1.0.1** @ June 27th, 2024:
|
|
307
323
|
- removed version in dependencies to avoid conflicts with llm-foundry
|
|
308
324
|
- **1.0.0** @ June 20th, 2024:
|
|
@@ -15,6 +15,9 @@ The repository contains code for the following optimizers published by DASLab @
|
|
|
15
15
|
- **MicroAdam**:
|
|
16
16
|
- paper: [MicroAdam: Accurate Adaptive Optimization with Low Space Overhead and Provable Convergence](https://arxiv.org/abs/2405.15593)
|
|
17
17
|
- official repository: [GitHub](https://github.com/IST-DASLab/MicroAdam)
|
|
18
|
+
- **Trion / DCT-AdamW**:
|
|
19
|
+
- paper: [FFT-based Dynamic Subspace Selection for Low-Rank Adaptive Optimization of Large Language Models](https://arxiv.org/abs/2505.17967v3)
|
|
20
|
+
- code: [GitHub](https://github.com/IST-DASLab/ISTA-DASLab-Optimizers/tree/main/ista_daslab_optimizers/fft_low_rank)
|
|
18
21
|
|
|
19
22
|
### Installation
|
|
20
23
|
To use the latest stable version of this repository, you can install via pip:
|
|
@@ -36,7 +39,8 @@ source install.sh
|
|
|
36
39
|
|
|
37
40
|
## How to use optimizers?
|
|
38
41
|
|
|
39
|
-
In this repository we provide a minimal working example for CIFAR-10 for optimizers `acdc`,
|
|
42
|
+
In this repository we provide a minimal working example for CIFAR-10 for optimizers `acdc`,
|
|
43
|
+
`dense_mfac`, `sparse_mfac` and `micro_adam`:
|
|
40
44
|
```shell
|
|
41
45
|
cd examples/cifar10
|
|
42
46
|
OPTIMIZER=micro_adam # or any other optimizer listed above
|
|
@@ -66,18 +70,29 @@ optimizer = MicroAdam(
|
|
|
66
70
|
# Versions summary:
|
|
67
71
|
|
|
68
72
|
---
|
|
73
|
+
- **1.1.7** @ October 8th, 2025:
|
|
74
|
+
- added code for `Trion & DCT-AdamW`
|
|
69
75
|
- **1.1.6** @ February 19th, 2025:
|
|
70
|
-
- do not update the parameters that have `None` gradient in method `update_model` from `tools.py`.
|
|
76
|
+
- do not update the parameters that have `None` gradient in method `update_model` from `tools.py`.
|
|
77
|
+
This is useful when using M-FAC for models with more than one classification head in the Continual Learning framework.
|
|
71
78
|
- **1.1.5** @ February 19th, 2025:
|
|
72
|
-
- adapted `DenseMFAC` for a model with multiple classification heads for Continual Learning where
|
|
79
|
+
- adapted `DenseMFAC` for a model with multiple classification heads for Continual Learning where
|
|
80
|
+
we have one feature extractor block and a list of classification heads. The issue was related to
|
|
81
|
+
the model size, which included the feature extractor backbone and all classification heads, but
|
|
82
|
+
in practice only one classification head will be used for training and inference. This caused some
|
|
83
|
+
size mismatch errors at runtime in the `DenseCoreMFAC` module because the gradient at runtime had
|
|
84
|
+
fewer entries than the entire model. When using `DenseMFAC` for such settings, set `optimizer.model_size`
|
|
85
|
+
to the correct size after calling the constructor and the `DenseCoreMFAC` object will be created
|
|
86
|
+
automatically in the `step` function.
|
|
73
87
|
- **1.1.3** @ September 5th, 2024:
|
|
74
88
|
- allow using `SparseCoreMFACwithEF` separately by importing it in `sparse_mfac.__init__.py`
|
|
75
89
|
- **1.1.2** @ August 1st, 2024:
|
|
76
|
-
- ***[1.1.0]:*** added support to densify the final update: introduced parameter alpha that controls
|
|
77
|
-
(EF) to be integrated into the update to make it dense. Finally, the
|
|
78
|
-
the expense of another call to `Qinv` and `Q` (and
|
|
79
|
-
|
|
80
|
-
|
|
90
|
+
- ***[1.1.0]:*** added support to densify the final update: introduced parameter alpha that controls
|
|
91
|
+
the fraction of error feedback (EF) to be integrated into the update to make it dense. Finally, the
|
|
92
|
+
fraction alpha will be discarded from the EF at the expense of another call to `Qinv` and `Q` (and
|
|
93
|
+
implicitly quantization statistics computation).
|
|
94
|
+
- ***[1.0.2]:*** added FSDP-compatible implementation by initializing the parameter states in the
|
|
95
|
+
`update_step` method instead of MicroAdam constructor
|
|
81
96
|
- **1.0.1** @ June 27th, 2024:
|
|
82
97
|
- removed version in dependencies to avoid conflicts with llm-foundry
|
|
83
98
|
- **1.0.0** @ June 20th, 2024:
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
|
|
3
|
+
class ISTAOptimizer(torch.optim.Optimizer):
|
|
4
|
+
def __init__(self, params, lr, weight_decay):
|
|
5
|
+
super().__init__(params, dict(lr=lr, weight_decay=weight_decay))
|
|
6
|
+
self.lr = lr
|
|
7
|
+
self.weight_decay = weight_decay
|
|
8
|
+
self.optim_steps = 0
|
|
9
|
+
|
|
10
|
+
def loop_params(self, check_grad=True):
|
|
11
|
+
for group in self.param_groups:
|
|
12
|
+
for p in group['params']:
|
|
13
|
+
if check_grad:
|
|
14
|
+
if p.grad is None: continue
|
|
15
|
+
yield group, self.state[p], p
|
|
16
|
+
|
|
17
|
+
@torch.no_grad()
|
|
18
|
+
def init_optimizer_states(self):
|
|
19
|
+
raise NotImplementedError
|
|
20
|
+
|
|
21
|
+
@torch.no_grad()
|
|
22
|
+
def optimizer_step(self):
|
|
23
|
+
raise NotImplementedError
|
|
24
|
+
|
|
25
|
+
@torch.no_grad()
|
|
26
|
+
def step(self, closure=None):
|
|
27
|
+
self.optim_steps += 1
|
|
28
|
+
|
|
29
|
+
loss = None
|
|
30
|
+
if closure is not None:
|
|
31
|
+
with torch.enable_grad():
|
|
32
|
+
loss = closure()
|
|
33
|
+
|
|
34
|
+
self.optimizer_step()
|
|
35
|
+
|
|
36
|
+
return loss
|
{ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/ista_daslab_optimizers/tools.py
RENAMED
|
@@ -214,4 +214,5 @@ class KernelVersionsManager:
|
|
|
214
214
|
return self.LCG_BLOCKS_THREADS[self.version_LCG][self.BLOCK_INDEX]
|
|
215
215
|
|
|
216
216
|
def get_LCG_threads(self):
|
|
217
|
-
return self.LCG_BLOCKS_THREADS[self.version_LCG][self.THREAD_INDEX]
|
|
217
|
+
return self.LCG_BLOCKS_THREADS[self.version_LCG][self.THREAD_INDEX]
|
|
218
|
+
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
2
|
Name: ista_daslab_optimizers
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.7
|
|
4
4
|
Summary: Deep Learning optimizers developed in the Distributed Algorithms and Systems group (DASLab) @ Institute of Science and Technology Austria (ISTA)
|
|
5
5
|
Author-email: Ionut-Vlad Modoranu <ionut-vlad.modoranu@ist.ac.at>
|
|
6
6
|
Maintainer-email: Ionut-Vlad Modoranu <ionut-vlad.modoranu@ist.ac.at>
|
|
@@ -222,6 +222,7 @@ Requires-Dist: gpustat
|
|
|
222
222
|
Requires-Dist: timm
|
|
223
223
|
Requires-Dist: einops
|
|
224
224
|
Requires-Dist: psutil
|
|
225
|
+
Requires-Dist: fast-hadamard-transform
|
|
225
226
|
|
|
226
227
|
# ISTA DAS Lab Optimization Algorithms Package
|
|
227
228
|
This repository contains optimization algorithms for Deep Learning developed by
|
|
@@ -240,6 +241,9 @@ The repository contains code for the following optimizers published by DASLab @
|
|
|
240
241
|
- **MicroAdam**:
|
|
241
242
|
- paper: [MicroAdam: Accurate Adaptive Optimization with Low Space Overhead and Provable Convergence](https://arxiv.org/abs/2405.15593)
|
|
242
243
|
- official repository: [GitHub](https://github.com/IST-DASLab/MicroAdam)
|
|
244
|
+
- **Trion / DCT-AdamW**:
|
|
245
|
+
- paper: [FFT-based Dynamic Subspace Selection for Low-Rank Adaptive Optimization of Large Language Models](https://arxiv.org/abs/2505.17967v3)
|
|
246
|
+
- code: [GitHub](https://github.com/IST-DASLab/ISTA-DASLab-Optimizers/tree/main/ista_daslab_optimizers/fft_low_rank)
|
|
243
247
|
|
|
244
248
|
### Installation
|
|
245
249
|
To use the latest stable version of this repository, you can install via pip:
|
|
@@ -261,7 +265,8 @@ source install.sh
|
|
|
261
265
|
|
|
262
266
|
## How to use optimizers?
|
|
263
267
|
|
|
264
|
-
In this repository we provide a minimal working example for CIFAR-10 for optimizers `acdc`,
|
|
268
|
+
In this repository we provide a minimal working example for CIFAR-10 for optimizers `acdc`,
|
|
269
|
+
`dense_mfac`, `sparse_mfac` and `micro_adam`:
|
|
265
270
|
```shell
|
|
266
271
|
cd examples/cifar10
|
|
267
272
|
OPTIMIZER=micro_adam # or any other optimizer listed above
|
|
@@ -291,18 +296,29 @@ optimizer = MicroAdam(
|
|
|
291
296
|
# Versions summary:
|
|
292
297
|
|
|
293
298
|
---
|
|
299
|
+
- **1.1.7** @ October 8th, 2025:
|
|
300
|
+
- added code for `Trion & DCT-AdamW`
|
|
294
301
|
- **1.1.6** @ February 19th, 2025:
|
|
295
|
-
- do not update the parameters that have `None` gradient in method `update_model` from `tools.py`.
|
|
302
|
+
- do not update the parameters that have `None` gradient in method `update_model` from `tools.py`.
|
|
303
|
+
This is useful when using M-FAC for models with more than one classification head in the Continual Learning framework.
|
|
296
304
|
- **1.1.5** @ February 19th, 2025:
|
|
297
|
-
- adapted `DenseMFAC` for a model with multiple classification heads for Continual Learning where
|
|
305
|
+
- adapted `DenseMFAC` for a model with multiple classification heads for Continual Learning where
|
|
306
|
+
we have one feature extractor block and a list of classification heads. The issue was related to
|
|
307
|
+
the model size, which included the feature extractor backbone and all classification heads, but
|
|
308
|
+
in practice only one classification head will be used for training and inference. This caused some
|
|
309
|
+
size mismatch errors at runtime in the `DenseCoreMFAC` module because the gradient at runtime had
|
|
310
|
+
fewer entries than the entire model. When using `DenseMFAC` for such settings, set `optimizer.model_size`
|
|
311
|
+
to the correct size after calling the constructor and the `DenseCoreMFAC` object will be created
|
|
312
|
+
automatically in the `step` function.
|
|
298
313
|
- **1.1.3** @ September 5th, 2024:
|
|
299
314
|
- allow using `SparseCoreMFACwithEF` separately by importing it in `sparse_mfac.__init__.py`
|
|
300
315
|
- **1.1.2** @ August 1st, 2024:
|
|
301
|
-
- ***[1.1.0]:*** added support to densify the final update: introduced parameter alpha that controls
|
|
302
|
-
(EF) to be integrated into the update to make it dense. Finally, the
|
|
303
|
-
the expense of another call to `Qinv` and `Q` (and
|
|
304
|
-
|
|
305
|
-
|
|
316
|
+
- ***[1.1.0]:*** added support to densify the final update: introduced parameter alpha that controls
|
|
317
|
+
the fraction of error feedback (EF) to be integrated into the update to make it dense. Finally, the
|
|
318
|
+
fraction alpha will be discarded from the EF at the expense of another call to `Qinv` and `Q` (and
|
|
319
|
+
implicitly quantization statistics computation).
|
|
320
|
+
- ***[1.0.2]:*** added FSDP-compatible implementation by initializing the parameter states in the
|
|
321
|
+
`update_step` method instead of MicroAdam constructor
|
|
306
322
|
- **1.0.1** @ June 27th, 2024:
|
|
307
323
|
- removed version in dependencies to avoid conflicts with llm-foundry
|
|
308
324
|
- **1.0.0** @ June 20th, 2024:
|
|
@@ -27,6 +27,8 @@ ista_daslab_optimizers/acdc/wd_scheduler.py
|
|
|
27
27
|
ista_daslab_optimizers/dense_mfac/__init__.py
|
|
28
28
|
ista_daslab_optimizers/dense_mfac/dense_core_mfac.py
|
|
29
29
|
ista_daslab_optimizers/dense_mfac/dense_mfac.py
|
|
30
|
+
ista_daslab_optimizers/ista_optimizer/__init__.py
|
|
31
|
+
ista_daslab_optimizers/ista_optimizer/ista_optimizer.py
|
|
30
32
|
ista_daslab_optimizers/micro_adam/__init__.py
|
|
31
33
|
ista_daslab_optimizers/micro_adam/micro_adam.py
|
|
32
34
|
ista_daslab_optimizers/sparse_mfac/__init__.py
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name='ista_daslab_optimizers'
|
|
7
|
-
version='1.1.
|
|
7
|
+
version='1.1.7'
|
|
8
8
|
dependencies = [
|
|
9
9
|
"torch", # >=2.3.1",
|
|
10
10
|
"torchaudio", # >=2.3.1",
|
|
@@ -15,6 +15,8 @@ dependencies = [
|
|
|
15
15
|
"timm", # >=1.0.3",
|
|
16
16
|
"einops", # >=0.7.0",
|
|
17
17
|
"psutil", # >=5.9.8",
|
|
18
|
+
"fast-hadamard-transform",
|
|
19
|
+
# "fast-hadamard-transform @ git+https://github.com/Dao-AILab/fast-hadamard-transform.git",
|
|
18
20
|
]
|
|
19
21
|
requires-python = '>= 3.8'
|
|
20
22
|
authors = [
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/ista_daslab_optimizers/acdc/acdc.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/kernels/dense_mfac/dense_mfac.cpp
RENAMED
|
File without changes
|
|
File without changes
|
{ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/kernels/micro_adam/micro_adam.cpp
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ista_daslab_optimizers-1.1.6 → ista_daslab_optimizers-1.1.7}/kernels/sparse_mfac/sparse_mfac.cpp
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|