mmgp 3.5.3__tar.gz → 3.5.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mmgp might be problematic. Click here for more details.
- {mmgp-3.5.3/src/mmgp.egg-info → mmgp-3.5.5}/PKG-INFO +2 -2
- {mmgp-3.5.3 → mmgp-3.5.5}/README.md +1 -1
- {mmgp-3.5.3 → mmgp-3.5.5}/pyproject.toml +1 -1
- {mmgp-3.5.3 → mmgp-3.5.5}/src/mmgp/offload.py +15 -4
- {mmgp-3.5.3 → mmgp-3.5.5/src/mmgp.egg-info}/PKG-INFO +2 -2
- {mmgp-3.5.3 → mmgp-3.5.5}/LICENSE.md +0 -0
- {mmgp-3.5.3 → mmgp-3.5.5}/setup.cfg +0 -0
- {mmgp-3.5.3 → mmgp-3.5.5}/src/__init__.py +0 -0
- {mmgp-3.5.3 → mmgp-3.5.5}/src/mmgp/__init__.py +0 -0
- {mmgp-3.5.3 → mmgp-3.5.5}/src/mmgp/safetensors2.py +0 -0
- {mmgp-3.5.3 → mmgp-3.5.5}/src/mmgp.egg-info/SOURCES.txt +0 -0
- {mmgp-3.5.3 → mmgp-3.5.5}/src/mmgp.egg-info/dependency_links.txt +0 -0
- {mmgp-3.5.3 → mmgp-3.5.5}/src/mmgp.egg-info/requires.txt +0 -0
- {mmgp-3.5.3 → mmgp-3.5.5}/src/mmgp.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mmgp
|
|
3
|
-
Version: 3.5.
|
|
3
|
+
Version: 3.5.5
|
|
4
4
|
Summary: Memory Management for the GPU Poor
|
|
5
5
|
Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -15,7 +15,7 @@ Dynamic: license-file
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
<p align="center">
|
|
18
|
-
<H2>Memory Management 3.5.
|
|
18
|
+
<H2>Memory Management 3.5.5 for the GPU Poor by DeepBeepMeep</H2>
|
|
19
19
|
</p>
|
|
20
20
|
|
|
21
21
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# ------------------ Memory Management 3.5.
|
|
1
|
+
# ------------------ Memory Management 3.5.5 for the GPU Poor by DeepBeepMeep (mmgp)------------------
|
|
2
2
|
#
|
|
3
3
|
# This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
|
|
4
4
|
# This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
|
|
@@ -642,6 +642,7 @@ def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = Tru
|
|
|
642
642
|
else:
|
|
643
643
|
length = torch.numel(p.data) * p.data.element_size()
|
|
644
644
|
p.data = _move_to_pinned_tensor(p.data, current_big_tensor, offset, length)
|
|
645
|
+
|
|
645
646
|
tensor_no += 1
|
|
646
647
|
del p
|
|
647
648
|
del dummy_pinned_tensor
|
|
@@ -667,7 +668,7 @@ def _welcome():
|
|
|
667
668
|
if welcome_displayed:
|
|
668
669
|
return
|
|
669
670
|
welcome_displayed = True
|
|
670
|
-
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.5.
|
|
671
|
+
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.5.5) by DeepBeepMeep ************{ENDC}{UNBOLD}")
|
|
671
672
|
|
|
672
673
|
def change_dtype(model, new_dtype, exclude_buffers = False):
|
|
673
674
|
for submodule_name, submodule in model.named_modules():
|
|
@@ -1145,6 +1146,8 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
|
|
|
1145
1146
|
break
|
|
1146
1147
|
elif diff_b != None:
|
|
1147
1148
|
rank = diff_b.shape[0]
|
|
1149
|
+
if not hasattr(module, "bias"):
|
|
1150
|
+
pass
|
|
1148
1151
|
if module.bias == None:
|
|
1149
1152
|
msg = f"Lora '{path}': Lora Basis is defined while it doesnt exist in model '{_get_module_name(model)}'. It is likely this Lora has been made for another version of this model."
|
|
1150
1153
|
fail = True
|
|
@@ -1248,6 +1251,7 @@ def sync_models_loras(model, model2):
|
|
|
1248
1251
|
model2._loras_scaling = model._loras_scaling
|
|
1249
1252
|
|
|
1250
1253
|
def unload_loras_from_model(model):
|
|
1254
|
+
if model is None: return
|
|
1251
1255
|
for _, v in model._loras_model_data.items():
|
|
1252
1256
|
v.clear()
|
|
1253
1257
|
for _, v in model._loras_model_shortcuts.items():
|
|
@@ -2087,13 +2091,16 @@ class offload:
|
|
|
2087
2091
|
if data == None:
|
|
2088
2092
|
continue
|
|
2089
2093
|
diff_w , _ , diff_b, alpha = data
|
|
2094
|
+
scaling = self._get_lora_scaling( loras_scaling, model, active_adapter) * alpha
|
|
2095
|
+
if scaling == 0:
|
|
2096
|
+
continue
|
|
2090
2097
|
if first_weight:
|
|
2091
2098
|
original_weight= weight.clone() if weight != None else None
|
|
2092
2099
|
first_weight = False
|
|
2093
2100
|
if first_bias:
|
|
2094
2101
|
original_bias= bias.clone() if bias != None else None
|
|
2095
2102
|
first_bias = False
|
|
2096
|
-
|
|
2103
|
+
|
|
2097
2104
|
if diff_w != None:
|
|
2098
2105
|
weight.add_(diff_w, alpha= scaling)
|
|
2099
2106
|
diff_w = None
|
|
@@ -2131,6 +2138,8 @@ class offload:
|
|
|
2131
2138
|
continue
|
|
2132
2139
|
lora_A_weight, lora_B_weight, diff_b, alpha = data
|
|
2133
2140
|
scaling = self._get_lora_scaling(loras_scaling, model, active_adapter) * alpha
|
|
2141
|
+
if scaling == 0:
|
|
2142
|
+
continue
|
|
2134
2143
|
if lora_A_weight != None:
|
|
2135
2144
|
weight.addmm_(lora_B_weight, lora_A_weight, alpha= scaling )
|
|
2136
2145
|
|
|
@@ -2162,6 +2171,8 @@ class offload:
|
|
|
2162
2171
|
lora_A, lora_B, diff_b, alpha = data
|
|
2163
2172
|
# dropout = self.lora_dropout[active_adapter]
|
|
2164
2173
|
scaling = self._get_lora_scaling(loras_scaling, model, active_adapter) * alpha
|
|
2174
|
+
if scaling == 0:
|
|
2175
|
+
continue
|
|
2165
2176
|
if lora_A == None:
|
|
2166
2177
|
result.add_(diff_b, alpha=scaling)
|
|
2167
2178
|
else:
|
|
@@ -2193,7 +2204,7 @@ class offload:
|
|
|
2193
2204
|
if len(loras_data) == 0:
|
|
2194
2205
|
return old_forward(*args, **kwargs)
|
|
2195
2206
|
else:
|
|
2196
|
-
|
|
2207
|
+
submodule.aaa = submodule_name
|
|
2197
2208
|
return self._lora_linear_forward(current_model, submodule, loras_data, *args, **kwargs)
|
|
2198
2209
|
target_fn = lora_linear_forward
|
|
2199
2210
|
else:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mmgp
|
|
3
|
-
Version: 3.5.
|
|
3
|
+
Version: 3.5.5
|
|
4
4
|
Summary: Memory Management for the GPU Poor
|
|
5
5
|
Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -15,7 +15,7 @@ Dynamic: license-file
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
<p align="center">
|
|
18
|
-
<H2>Memory Management 3.5.
|
|
18
|
+
<H2>Memory Management 3.5.5 for the GPU Poor by DeepBeepMeep</H2>
|
|
19
19
|
</p>
|
|
20
20
|
|
|
21
21
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|