mmgp 3.5.3__py3-none-any.whl → 3.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mmgp might be problematic. Click here for more details.
- mmgp/offload.py +21 -11
- {mmgp-3.5.3.dist-info → mmgp-3.5.6.dist-info}/METADATA +2 -2
- mmgp-3.5.6.dist-info/RECORD +9 -0
- mmgp-3.5.3.dist-info/RECORD +0 -9
- {mmgp-3.5.3.dist-info → mmgp-3.5.6.dist-info}/WHEEL +0 -0
- {mmgp-3.5.3.dist-info → mmgp-3.5.6.dist-info}/licenses/LICENSE.md +0 -0
- {mmgp-3.5.3.dist-info → mmgp-3.5.6.dist-info}/top_level.txt +0 -0
mmgp/offload.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# ------------------ Memory Management 3.5.
|
|
1
|
+
# ------------------ Memory Management 3.5.6 for the GPU Poor by DeepBeepMeep (mmgp)------------------
|
|
2
2
|
#
|
|
3
3
|
# This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
|
|
4
4
|
# This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
|
|
@@ -642,6 +642,7 @@ def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = Tru
|
|
|
642
642
|
else:
|
|
643
643
|
length = torch.numel(p.data) * p.data.element_size()
|
|
644
644
|
p.data = _move_to_pinned_tensor(p.data, current_big_tensor, offset, length)
|
|
645
|
+
|
|
645
646
|
tensor_no += 1
|
|
646
647
|
del p
|
|
647
648
|
del dummy_pinned_tensor
|
|
@@ -667,7 +668,7 @@ def _welcome():
|
|
|
667
668
|
if welcome_displayed:
|
|
668
669
|
return
|
|
669
670
|
welcome_displayed = True
|
|
670
|
-
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.5.
|
|
671
|
+
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.5.6) by DeepBeepMeep ************{ENDC}{UNBOLD}")
|
|
671
672
|
|
|
672
673
|
def change_dtype(model, new_dtype, exclude_buffers = False):
|
|
673
674
|
for submodule_name, submodule in model.named_modules():
|
|
@@ -1145,6 +1146,8 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
|
|
|
1145
1146
|
break
|
|
1146
1147
|
elif diff_b != None:
|
|
1147
1148
|
rank = diff_b.shape[0]
|
|
1149
|
+
if not hasattr(module, "bias"):
|
|
1150
|
+
pass
|
|
1148
1151
|
if module.bias == None:
|
|
1149
1152
|
msg = f"Lora '{path}': Lora Basis is defined while it doesnt exist in model '{_get_module_name(model)}'. It is likely this Lora has been made for another version of this model."
|
|
1150
1153
|
fail = True
|
|
@@ -1248,6 +1251,7 @@ def sync_models_loras(model, model2):
|
|
|
1248
1251
|
model2._loras_scaling = model._loras_scaling
|
|
1249
1252
|
|
|
1250
1253
|
def unload_loras_from_model(model):
|
|
1254
|
+
if model is None: return
|
|
1251
1255
|
for _, v in model._loras_model_data.items():
|
|
1252
1256
|
v.clear()
|
|
1253
1257
|
for _, v in model._loras_model_shortcuts.items():
|
|
@@ -2087,25 +2091,27 @@ class offload:
|
|
|
2087
2091
|
if data == None:
|
|
2088
2092
|
continue
|
|
2089
2093
|
diff_w , _ , diff_b, alpha = data
|
|
2094
|
+
scaling = self._get_lora_scaling( loras_scaling, model, active_adapter) * alpha
|
|
2095
|
+
if scaling == 0:
|
|
2096
|
+
continue
|
|
2090
2097
|
if first_weight:
|
|
2091
|
-
original_weight= weight.clone() if weight
|
|
2098
|
+
original_weight= weight.clone() if weight is not None else None
|
|
2092
2099
|
first_weight = False
|
|
2093
2100
|
if first_bias:
|
|
2094
|
-
original_bias= bias.clone() if bias
|
|
2101
|
+
original_bias= bias.clone() if bias is not None else None
|
|
2095
2102
|
first_bias = False
|
|
2096
|
-
|
|
2097
|
-
if diff_w
|
|
2103
|
+
|
|
2104
|
+
if diff_w is not None:
|
|
2098
2105
|
weight.add_(diff_w, alpha= scaling)
|
|
2099
2106
|
diff_w = None
|
|
2100
|
-
if diff_b
|
|
2107
|
+
if diff_b is not None:
|
|
2101
2108
|
bias.add_(diff_b, alpha= scaling)
|
|
2102
2109
|
diff_b = None
|
|
2103
2110
|
|
|
2104
2111
|
ret = func(*args, **kwargs )
|
|
2105
2112
|
|
|
2106
|
-
|
|
2107
|
-
if original_bias
|
|
2108
|
-
bias.data = original_bias
|
|
2113
|
+
if original_weight is not None: weight.data = original_weight
|
|
2114
|
+
if original_bias is not None: bias.data = original_bias
|
|
2109
2115
|
|
|
2110
2116
|
return ret
|
|
2111
2117
|
|
|
@@ -2131,6 +2137,8 @@ class offload:
|
|
|
2131
2137
|
continue
|
|
2132
2138
|
lora_A_weight, lora_B_weight, diff_b, alpha = data
|
|
2133
2139
|
scaling = self._get_lora_scaling(loras_scaling, model, active_adapter) * alpha
|
|
2140
|
+
if scaling == 0:
|
|
2141
|
+
continue
|
|
2134
2142
|
if lora_A_weight != None:
|
|
2135
2143
|
weight.addmm_(lora_B_weight, lora_A_weight, alpha= scaling )
|
|
2136
2144
|
|
|
@@ -2162,6 +2170,8 @@ class offload:
|
|
|
2162
2170
|
lora_A, lora_B, diff_b, alpha = data
|
|
2163
2171
|
# dropout = self.lora_dropout[active_adapter]
|
|
2164
2172
|
scaling = self._get_lora_scaling(loras_scaling, model, active_adapter) * alpha
|
|
2173
|
+
if scaling == 0:
|
|
2174
|
+
continue
|
|
2165
2175
|
if lora_A == None:
|
|
2166
2176
|
result.add_(diff_b, alpha=scaling)
|
|
2167
2177
|
else:
|
|
@@ -2193,7 +2203,7 @@ class offload:
|
|
|
2193
2203
|
if len(loras_data) == 0:
|
|
2194
2204
|
return old_forward(*args, **kwargs)
|
|
2195
2205
|
else:
|
|
2196
|
-
|
|
2206
|
+
submodule.aaa = submodule_name
|
|
2197
2207
|
return self._lora_linear_forward(current_model, submodule, loras_data, *args, **kwargs)
|
|
2198
2208
|
target_fn = lora_linear_forward
|
|
2199
2209
|
else:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mmgp
|
|
3
|
-
Version: 3.5.
|
|
3
|
+
Version: 3.5.6
|
|
4
4
|
Summary: Memory Management for the GPU Poor
|
|
5
5
|
Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -15,7 +15,7 @@ Dynamic: license-file
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
<p align="center">
|
|
18
|
-
<H2>Memory Management 3.5.
|
|
18
|
+
<H2>Memory Management 3.5.6 for the GPU Poor by DeepBeepMeep</H2>
|
|
19
19
|
</p>
|
|
20
20
|
|
|
21
21
|
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
+
mmgp/offload.py,sha256=uoif7gOSNmWg5eqvMkmuVkTErNL6q_QJ0Lmm0QP7FLo,126305
|
|
4
|
+
mmgp/safetensors2.py,sha256=4nKV13qCMabnNEB1TA_ueFbfGYYmiQ9racR_C6SsGug,18693
|
|
5
|
+
mmgp-3.5.6.dist-info/licenses/LICENSE.md,sha256=DD-WIS0BkPoWJ_8hQO3J8hMP9K_1-dyrYv1YCbkxcDU,94
|
|
6
|
+
mmgp-3.5.6.dist-info/METADATA,sha256=hgR8mrkLImQWNkSU3ayt78df5whCozfVqzIUvV9jo1I,16309
|
|
7
|
+
mmgp-3.5.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
8
|
+
mmgp-3.5.6.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
9
|
+
mmgp-3.5.6.dist-info/RECORD,,
|
mmgp-3.5.3.dist-info/RECORD
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
-
mmgp/offload.py,sha256=owsKU30CiOVioGExG28B9F93R09mTvIoe4RwuHv_f7s,125986
|
|
4
|
-
mmgp/safetensors2.py,sha256=4nKV13qCMabnNEB1TA_ueFbfGYYmiQ9racR_C6SsGug,18693
|
|
5
|
-
mmgp-3.5.3.dist-info/licenses/LICENSE.md,sha256=DD-WIS0BkPoWJ_8hQO3J8hMP9K_1-dyrYv1YCbkxcDU,94
|
|
6
|
-
mmgp-3.5.3.dist-info/METADATA,sha256=lPWpMmbWiXt-ZOV5dRyrBnInGBjBiVkfbFYu19aeOkw,16309
|
|
7
|
-
mmgp-3.5.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
8
|
-
mmgp-3.5.3.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
9
|
-
mmgp-3.5.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|