ista-daslab-optimizers 1.1.2__tar.gz → 1.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ista_daslab_optimizers-1.1.2/ista_daslab_optimizers.egg-info → ista_daslab_optimizers-1.1.3}/PKG-INFO +3 -1
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/README.md +2 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/ista_daslab_optimizers/micro_adam/micro_adam.py +87 -23
- ista_daslab_optimizers-1.1.3/ista_daslab_optimizers/sparse_mfac/__init__.py +7 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3/ista_daslab_optimizers.egg-info}/PKG-INFO +3 -1
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/ista_daslab_optimizers.egg-info/SOURCES.txt +13 -2
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/pyproject.toml +1 -1
- ista_daslab_optimizers-1.1.2/ista_daslab_optimizers/sparse_mfac/__init__.py +0 -5
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/LICENSE +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/MANIFEST.in +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/ista_daslab_optimizers/__init__.py +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/ista_daslab_optimizers/acdc/__init__.py +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/ista_daslab_optimizers/acdc/acdc.py +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/ista_daslab_optimizers/acdc/wd_scheduler.py +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/ista_daslab_optimizers/dense_mfac/__init__.py +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/ista_daslab_optimizers/dense_mfac/dense_core_mfac.py +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/ista_daslab_optimizers/dense_mfac/dense_mfac.py +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/ista_daslab_optimizers/micro_adam/__init__.py +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/ista_daslab_optimizers/sparse_mfac/sparse_core_mfac_w_ef.py +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/ista_daslab_optimizers/sparse_mfac/sparse_mfac.py +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/ista_daslab_optimizers/tools.py +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/ista_daslab_optimizers.egg-info/dependency_links.txt +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/ista_daslab_optimizers.egg-info/requires.txt +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/ista_daslab_optimizers.egg-info/top_level.txt +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/kernels/dense_mfac/dense_mfac.cpp +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/kernels/dense_mfac/dense_mfac_kernel.cu +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/kernels/micro_adam/micro_adam.cpp +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/kernels/micro_adam/micro_adam_asymm_block_quant.cu +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/kernels/micro_adam/micro_adam_asymm_block_quant_inv.cu +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/kernels/micro_adam/micro_adam_update.cu +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/kernels/sparse_mfac/sparse_mfac.cpp +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/kernels/sparse_mfac/sparse_mfac_LCG_kernel.cu +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/kernels/sparse_mfac/sparse_mfac_SP_kernel.cu +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/kernels/tools/tools.cpp +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/kernels/tools/tools_kernel.cu +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/kernels/utils.h +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/setup.cfg +0 -0
- {ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ista_daslab_optimizers
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.3
|
|
4
4
|
Summary: Deep Learning optimizers developed in the Distributed Algorithms and Systems group (DASLab) @ Institute of Science and Technology Austria (ISTA)
|
|
5
5
|
Author-email: Ionut-Vlad Modoranu <ionut-vlad.modoranu@ist.ac.at>
|
|
6
6
|
Maintainer-email: Ionut-Vlad Modoranu <ionut-vlad.modoranu@ist.ac.at>
|
|
@@ -289,6 +289,8 @@ optimizer = MicroAdam(
|
|
|
289
289
|
# Versions summary:
|
|
290
290
|
|
|
291
291
|
---
|
|
292
|
+
- **1.1.3** @ September 5th, 2024:
|
|
293
|
+
- allow using `SparseCoreMFACwithEF` separately by importing it in `sparse_mfac.__init__.py`
|
|
292
294
|
- **1.1.2** @ August 1st, 2024:
|
|
293
295
|
- ***[1.1.0]:*** added support to densify the final update: introduced parameter alpha that controls the fraction of error feedback
|
|
294
296
|
(EF) to be integrated into the update to make it dense. Finally, the fraction alpha will be discarded from the EF at
|
|
@@ -64,6 +64,8 @@ optimizer = MicroAdam(
|
|
|
64
64
|
# Versions summary:
|
|
65
65
|
|
|
66
66
|
---
|
|
67
|
+
- **1.1.3** @ September 5th, 2024:
|
|
68
|
+
- allow using `SparseCoreMFACwithEF` separately by importing it in `sparse_mfac.__init__.py`
|
|
67
69
|
- **1.1.2** @ August 1st, 2024:
|
|
68
70
|
- ***[1.1.0]:*** added support to densify the final update: introduced parameter alpha that controls the fraction of error feedback
|
|
69
71
|
(EF) to be integrated into the update to make it dense. Finally, the fraction alpha will be discarded from the EF at
|
|
@@ -15,7 +15,7 @@ class MicroAdam(torch.optim.Optimizer):
|
|
|
15
15
|
defaults = dict(lr=lr, weight_decay=weight_decay, eps=eps, alpha=alpha)
|
|
16
16
|
super(MicroAdam, self).__init__(params, defaults)
|
|
17
17
|
|
|
18
|
-
assert 0 <= alpha < 1, 'Alpha must be in the [0, 1) interval'
|
|
18
|
+
assert (0 <= alpha < 1) or alpha == -2, 'Alpha must be in the [0, 1) interval or -2'
|
|
19
19
|
|
|
20
20
|
self.m = m
|
|
21
21
|
self.lr = lr
|
|
@@ -27,7 +27,9 @@ class MicroAdam(torch.optim.Optimizer):
|
|
|
27
27
|
self.beta2 = betas[1]
|
|
28
28
|
self.eps = eps
|
|
29
29
|
|
|
30
|
-
self.
|
|
30
|
+
self.densify_update_using_ef = (self.alpha > 0)
|
|
31
|
+
self.densify_update_using_quant_error = (self.alpha == -2)
|
|
32
|
+
|
|
31
33
|
self.model_size = sum([p.numel() for group in self.param_groups for p in group['params']])
|
|
32
34
|
|
|
33
35
|
self.steps = 0 # how many optimization steps were performed so far
|
|
@@ -41,8 +43,12 @@ class MicroAdam(torch.optim.Optimizer):
|
|
|
41
43
|
self.max_floats = ista_daslab_tools.get_max_floats_for_shared_memory_per_thread_block()
|
|
42
44
|
self.d_block_size = self.max_floats // 2 // int(100 / self.shared_memory_carveout)
|
|
43
45
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
+
if torch.distributed.is_initialized():
|
|
47
|
+
self.fsdp_dict_size_count = [{} for _ in range(
|
|
48
|
+
torch.distributed.get_world_size())] # key = layer size, value = how many layers of that size the model has (per worker)
|
|
49
|
+
else:
|
|
50
|
+
self.fsdp_dict_size_count = [{}]
|
|
51
|
+
|
|
46
52
|
self.dict_size_count = {} # key = layer size, value = how many layers of that size the model has
|
|
47
53
|
for param in self.param_groups:
|
|
48
54
|
for p in param['params']:
|
|
@@ -56,7 +62,10 @@ class MicroAdam(torch.optim.Optimizer):
|
|
|
56
62
|
layer_size = p.numel()
|
|
57
63
|
st = self.state[p]
|
|
58
64
|
|
|
59
|
-
rank = torch.distributed.get_rank()
|
|
65
|
+
rank = torch.distributed.get_rank() if torch.distributed.is_initialized() else 0
|
|
66
|
+
|
|
67
|
+
if self.densify_update_using_quant_error:
|
|
68
|
+
st['quant_err'] = torch.zeros_like(p)
|
|
60
69
|
|
|
61
70
|
st['blocks'] = max(1, int(math.floor(self.blocks * layer_size * self.fsdp_dict_size_count[rank][layer_size] / self.model_size)))
|
|
62
71
|
|
|
@@ -96,7 +105,7 @@ class MicroAdam(torch.optim.Optimizer):
|
|
|
96
105
|
loss = closure()
|
|
97
106
|
|
|
98
107
|
if self.steps == 1:
|
|
99
|
-
rank = torch.distributed.get_rank()
|
|
108
|
+
rank = torch.distributed.get_rank() if torch.distributed.is_initialized() else 0
|
|
100
109
|
for param in self.param_groups:
|
|
101
110
|
for p in param['params']:
|
|
102
111
|
if p is not None:
|
|
@@ -106,7 +115,7 @@ class MicroAdam(torch.optim.Optimizer):
|
|
|
106
115
|
|
|
107
116
|
time_start = time.time()
|
|
108
117
|
|
|
109
|
-
norm_g, norm_u, norm_e, sparsity_u = 0, 0, 0, 0
|
|
118
|
+
norm_qe, norm_g, norm_u, norm_e, sparsity_u, sparsity_qe = 0, 0, 0, 0, 0, 0
|
|
110
119
|
|
|
111
120
|
for group in self.param_groups:
|
|
112
121
|
lr = group['lr']
|
|
@@ -119,23 +128,28 @@ class MicroAdam(torch.optim.Optimizer):
|
|
|
119
128
|
if p is None:
|
|
120
129
|
continue
|
|
121
130
|
|
|
122
|
-
ng, nu, ne, sp_u = self.update_step(p, lr, wd)
|
|
131
|
+
nqe, ng, nu, ne, sp_u, sp_qe = self.update_step(p, lr, wd)
|
|
132
|
+
norm_qe += nqe
|
|
123
133
|
norm_g += ng
|
|
124
134
|
norm_u += nu
|
|
125
135
|
norm_e += ne
|
|
126
136
|
sparsity_u += sp_u
|
|
137
|
+
sparsity_qe += sp_qe
|
|
127
138
|
|
|
128
139
|
# torch.cuda.synchronize()
|
|
129
140
|
time_end = time.time()
|
|
130
141
|
elapsed_step = time_end - time_start
|
|
131
|
-
self._log(norm_g, norm_u, norm_e, sparsity_u, elapsed_step)
|
|
142
|
+
self._log(norm_qe, norm_g, norm_u, norm_e, sparsity_u, sparsity_qe, elapsed_step)
|
|
132
143
|
|
|
133
144
|
return loss
|
|
134
145
|
|
|
135
146
|
@torch.no_grad()
|
|
136
147
|
def update_step(self, p, lr, wd):
|
|
137
|
-
norm_g, norm_u, norm_e, sp_u = 0, 0, 0, 0
|
|
148
|
+
norm_qe, norm_g, norm_u, norm_e, sp_u, sp_qe = 0, 0, 0, 0, 0, 0
|
|
138
149
|
|
|
150
|
+
# if p.grad.dtype != torch.bfloat16:
|
|
151
|
+
# grad = p.grad.to(dtype=torch.bfloat16).reshape(-1)
|
|
152
|
+
# else:
|
|
139
153
|
grad = p.grad.view(-1)
|
|
140
154
|
|
|
141
155
|
if self.steps % self.log_interval == 0:
|
|
@@ -216,6 +230,48 @@ class MicroAdam(torch.optim.Optimizer):
|
|
|
216
230
|
##### STEP 8
|
|
217
231
|
ista_daslab_micro_adam.asymm_block_quant(d, self.quant_block_size, error, min_vals, max_vals, grad) # error = Q(a, min, max)
|
|
218
232
|
|
|
233
|
+
# weight decay step
|
|
234
|
+
if wd > 0:
|
|
235
|
+
p.mul_(1 - lr * wd)
|
|
236
|
+
|
|
237
|
+
##### NEW: densify using quant error
|
|
238
|
+
if self.densify_update_using_quant_error:
|
|
239
|
+
# When entering this if-statement, we have:
|
|
240
|
+
# - p is theta_t
|
|
241
|
+
# - p.grad is a_t (from step 6 in algorithm 1)
|
|
242
|
+
# - error is e_t+1 (from step 8 in algorithm 1)
|
|
243
|
+
#
|
|
244
|
+
# Below we have the formula to update the model parameters:
|
|
245
|
+
# [a = -1] with lr
|
|
246
|
+
# theta_t+1 = theta_t - lr * (a_t - Qinv(e_t+1)) - lr * u_t
|
|
247
|
+
# = theta_t - lr * a_t + lr * Qinv(e_t+1) - lr * u_t
|
|
248
|
+
# = theta_t - lr * a_t # STEP A below, in this if statmenet
|
|
249
|
+
# + lr * Qinv(e_t+1) # STEP B below, in this if statmenet
|
|
250
|
+
# - lr * u_t # this is steps 10-11
|
|
251
|
+
#
|
|
252
|
+
# [a = -2] without lr
|
|
253
|
+
# theta_t+1 = theta_t - (a_t - Qinv(e_t+1)) - lr * u_t
|
|
254
|
+
# = theta_t - a_t + Qinv(e_t+1) - lr * u_t
|
|
255
|
+
# = theta_t - a_t # STEP A below, in this if statmenet
|
|
256
|
+
# + Qinv(e_t+1) # STEP B below, in this if statmenet
|
|
257
|
+
# - lr * u_t # this is steps 10-11
|
|
258
|
+
quant_err = st['quant_err']
|
|
259
|
+
quant_err.zero_()
|
|
260
|
+
quant_err.add_(p.grad)
|
|
261
|
+
|
|
262
|
+
##### STEP A
|
|
263
|
+
p.add_(p.grad, alpha=-1)
|
|
264
|
+
|
|
265
|
+
##### STEP B
|
|
266
|
+
p.grad.zero_() # zerorize to prepare the accumulator for Qinv
|
|
267
|
+
ista_daslab_micro_adam.asymm_block_quant_inv(d, self.quant_block_size, error, min_vals, max_vals, grad, 1)
|
|
268
|
+
p.add_(p.grad)
|
|
269
|
+
|
|
270
|
+
quant_err.sub_(p.grad)
|
|
271
|
+
|
|
272
|
+
norm_qe = quant_err.norm(p=2) ** 2
|
|
273
|
+
sp_qe = (quant_err == 0).sum()
|
|
274
|
+
|
|
219
275
|
##### STEPS 10-11
|
|
220
276
|
grad.zero_()
|
|
221
277
|
ista_daslab_micro_adam.compute_microadam_update(blocks, # blocks
|
|
@@ -237,16 +293,22 @@ class MicroAdam(torch.optim.Optimizer):
|
|
|
237
293
|
##### STEP 12: # side idea: only decay the weights that are update
|
|
238
294
|
|
|
239
295
|
##### if PRETRAINING #1
|
|
240
|
-
if self.
|
|
296
|
+
if self.densify_update_using_ef: # we add alpha * EF to update that is stored in grad buffer
|
|
241
297
|
# p.grad += alpha * Qinv(error), alpha=0.1
|
|
242
298
|
ista_daslab_micro_adam.asymm_block_quant_inv(d, self.quant_block_size, error, min_vals, max_vals, grad, self.alpha)
|
|
243
299
|
##### END IF PRETRAINING #1
|
|
244
300
|
|
|
245
301
|
# if alpha > 0, then the update u=p.grad is dense now
|
|
246
|
-
|
|
302
|
+
|
|
303
|
+
# update model using MicroAdam update stored in p.grad
|
|
304
|
+
p.add_(p.grad, alpha=-lr)
|
|
305
|
+
|
|
306
|
+
if self.steps % self.log_interval == 0:
|
|
307
|
+
norm_u = grad.norm(p=2) ** 2
|
|
308
|
+
sp_u = (grad == 0).sum() # check sparsity before zerorizing
|
|
247
309
|
|
|
248
310
|
##### if PRETRAINING #2
|
|
249
|
-
if self.
|
|
311
|
+
if self.densify_update_using_ef:
|
|
250
312
|
grad.zero_()
|
|
251
313
|
ista_daslab_micro_adam.asymm_block_quant_inv(d, self.quant_block_size, error, min_vals, max_vals, grad, 1-self.alpha)
|
|
252
314
|
|
|
@@ -256,27 +318,29 @@ class MicroAdam(torch.optim.Optimizer):
|
|
|
256
318
|
|
|
257
319
|
# compute error norm
|
|
258
320
|
if self.steps % self.log_interval == 0:
|
|
259
|
-
norm_u = grad.norm(p=2) ** 2
|
|
260
|
-
sp_u = (grad == 0).sum() # check sparsity before zerorizing
|
|
261
|
-
|
|
262
321
|
grad.zero_()
|
|
263
322
|
ista_daslab_micro_adam.asymm_block_quant_inv(d, self.quant_block_size, error, min_vals, max_vals, grad, 1.0)
|
|
264
323
|
|
|
265
324
|
norm_e = grad.norm(p=2) ** 2
|
|
266
325
|
|
|
267
|
-
|
|
326
|
+
# p.grad = p.grad.to(dtype=original_grad_type)
|
|
327
|
+
|
|
328
|
+
return norm_qe, norm_g, norm_u, norm_e, sp_u, sp_qe
|
|
268
329
|
|
|
269
|
-
def _log(self, norm_g, norm_u, norm_e, sparsity_u, elapsed_step):
|
|
330
|
+
def _log(self, norm_qe, norm_g, norm_u, norm_e, sparsity_u, sparsity_qe, elapsed_step):
|
|
270
331
|
if self.steps % self.log_interval == 0:
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
332
|
+
if is_initialized():
|
|
333
|
+
sync_data = torch.tensor([norm_qe, norm_g, norm_u, norm_e, sparsity_u, sparsity_qe, elapsed_step], dtype=torch.float,
|
|
334
|
+
requires_grad=False).cuda() # correct, loss, size
|
|
335
|
+
all_reduce(sync_data, op=ReduceOp.SUM)
|
|
336
|
+
norm_qe, norm_g, norm_u, norm_e, sparsity_u, sparsity_qe, elapsed_step = sync_data
|
|
275
337
|
|
|
276
338
|
if not is_initialized() or get_rank() == 0:
|
|
277
339
|
wandb_data = {
|
|
278
340
|
'step/optimizer_steps': self.steps,
|
|
279
341
|
'step/gpu_mem_usage': get_gpu_mem_usage(),
|
|
342
|
+
'step/norm_quant_err': math.sqrt(norm_qe),
|
|
343
|
+
'step/sparsity_quant_err': sparsity_qe / self.model_size * 100.,
|
|
280
344
|
'step/norm_g': math.sqrt(norm_g),
|
|
281
345
|
'step/norm_u': math.sqrt(norm_u),
|
|
282
346
|
'step/norm_error': math.sqrt(norm_e),
|
|
@@ -335,4 +399,4 @@ class MicroAdam(torch.optim.Optimizer):
|
|
|
335
399
|
# st['quant_full_blocks_count'], st['d_index_quant'] = block_split(st['d'], self.quant_block_size)
|
|
336
400
|
# st['error'] = torch.zeros(int(math.ceil(st['d'] / 2)), dtype=torch.uint8, device=self.device) # ceil(d/2) bytes
|
|
337
401
|
# st['min_vals'] = torch.zeros(st['quant_full_blocks_count'] + 1, dtype=torch.bfloat16, device=self.device) # ceil(d/q_bsz)*2 bytes
|
|
338
|
-
# st['max_vals'] = torch.zeros(st['quant_full_blocks_count'] + 1, dtype=torch.bfloat16, device=self.device) # ceil(d/q_bsz)*2 bytes
|
|
402
|
+
# st['max_vals'] = torch.zeros(st['quant_full_blocks_count'] + 1, dtype=torch.bfloat16, device=self.device) # ceil(d/q_bsz)*2 bytes
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ista_daslab_optimizers
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.3
|
|
4
4
|
Summary: Deep Learning optimizers developed in the Distributed Algorithms and Systems group (DASLab) @ Institute of Science and Technology Austria (ISTA)
|
|
5
5
|
Author-email: Ionut-Vlad Modoranu <ionut-vlad.modoranu@ist.ac.at>
|
|
6
6
|
Maintainer-email: Ionut-Vlad Modoranu <ionut-vlad.modoranu@ist.ac.at>
|
|
@@ -289,6 +289,8 @@ optimizer = MicroAdam(
|
|
|
289
289
|
# Versions summary:
|
|
290
290
|
|
|
291
291
|
---
|
|
292
|
+
- **1.1.3** @ September 5th, 2024:
|
|
293
|
+
- allow using `SparseCoreMFACwithEF` separately by importing it in `sparse_mfac.__init__.py`
|
|
292
294
|
- **1.1.2** @ August 1st, 2024:
|
|
293
295
|
- ***[1.1.0]:*** added support to densify the final update: introduced parameter alpha that controls the fraction of error feedback
|
|
294
296
|
(EF) to be integrated into the update to make it dense. Finally, the fraction alpha will be discarded from the EF at
|
|
@@ -3,7 +3,6 @@ MANIFEST.in
|
|
|
3
3
|
README.md
|
|
4
4
|
pyproject.toml
|
|
5
5
|
setup.py
|
|
6
|
-
./kernels/utils.h
|
|
7
6
|
./kernels/dense_mfac/dense_mfac.cpp
|
|
8
7
|
./kernels/dense_mfac/dense_mfac_kernel.cu
|
|
9
8
|
./kernels/micro_adam/micro_adam.cpp
|
|
@@ -32,4 +31,16 @@ ista_daslab_optimizers/micro_adam/__init__.py
|
|
|
32
31
|
ista_daslab_optimizers/micro_adam/micro_adam.py
|
|
33
32
|
ista_daslab_optimizers/sparse_mfac/__init__.py
|
|
34
33
|
ista_daslab_optimizers/sparse_mfac/sparse_core_mfac_w_ef.py
|
|
35
|
-
ista_daslab_optimizers/sparse_mfac/sparse_mfac.py
|
|
34
|
+
ista_daslab_optimizers/sparse_mfac/sparse_mfac.py
|
|
35
|
+
kernels/utils.h
|
|
36
|
+
kernels/dense_mfac/dense_mfac.cpp
|
|
37
|
+
kernels/dense_mfac/dense_mfac_kernel.cu
|
|
38
|
+
kernels/micro_adam/micro_adam.cpp
|
|
39
|
+
kernels/micro_adam/micro_adam_asymm_block_quant.cu
|
|
40
|
+
kernels/micro_adam/micro_adam_asymm_block_quant_inv.cu
|
|
41
|
+
kernels/micro_adam/micro_adam_update.cu
|
|
42
|
+
kernels/sparse_mfac/sparse_mfac.cpp
|
|
43
|
+
kernels/sparse_mfac/sparse_mfac_LCG_kernel.cu
|
|
44
|
+
kernels/sparse_mfac/sparse_mfac_SP_kernel.cu
|
|
45
|
+
kernels/tools/tools.cpp
|
|
46
|
+
kernels/tools/tools_kernel.cu
|
|
File without changes
|
|
File without changes
|
{ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/ista_daslab_optimizers/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/ista_daslab_optimizers/acdc/acdc.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/ista_daslab_optimizers/tools.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/kernels/dense_mfac/dense_mfac.cpp
RENAMED
|
File without changes
|
|
File without changes
|
{ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/kernels/micro_adam/micro_adam.cpp
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ista_daslab_optimizers-1.1.2 → ista_daslab_optimizers-1.1.3}/kernels/sparse_mfac/sparse_mfac.cpp
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|