mmgp 3.4.7__py3-none-any.whl → 3.4.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mmgp might be problematic. Click here for more details.
- mmgp/offload.py +28 -21
- {mmgp-3.4.7.dist-info → mmgp-3.4.8.dist-info}/METADATA +2 -2
- mmgp-3.4.8.dist-info/RECORD +9 -0
- {mmgp-3.4.7.dist-info → mmgp-3.4.8.dist-info}/WHEEL +1 -1
- mmgp-3.4.7.dist-info/RECORD +0 -9
- {mmgp-3.4.7.dist-info → mmgp-3.4.8.dist-info}/licenses/LICENSE.md +0 -0
- {mmgp-3.4.7.dist-info → mmgp-3.4.8.dist-info}/top_level.txt +0 -0
mmgp/offload.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# ------------------ Memory Management 3.4.
|
|
1
|
+
# ------------------ Memory Management 3.4.8 for the GPU Poor by DeepBeepMeep (mmgp)------------------
|
|
2
2
|
#
|
|
3
3
|
# This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
|
|
4
4
|
# This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
|
|
@@ -448,9 +448,9 @@ def _pin_sd_to_memory(sd, sd_name, tied_weights = None, gig_tensor_size = BIG_TE
|
|
|
448
448
|
print(f"'{','.join(names_list)}' was partially pinned to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
|
|
449
449
|
else:
|
|
450
450
|
if len(names_list) > 0:
|
|
451
|
-
print(f"'{','.join(names_list)}' was pinned entirely to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
|
|
452
|
-
else:
|
|
453
451
|
print(f"'{','.join(names_list)}' were pinned entirely to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
|
|
452
|
+
else:
|
|
453
|
+
print(f"'{','.join(names_list)}' was pinned entirely to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
|
|
454
454
|
|
|
455
455
|
|
|
456
456
|
return
|
|
@@ -658,7 +658,7 @@ def _welcome():
|
|
|
658
658
|
if welcome_displayed:
|
|
659
659
|
return
|
|
660
660
|
welcome_displayed = True
|
|
661
|
-
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.4.
|
|
661
|
+
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.4.8) by DeepBeepMeep ************{ENDC}{UNBOLD}")
|
|
662
662
|
|
|
663
663
|
def change_dtype(model, new_dtype, exclude_buffers = False):
|
|
664
664
|
for submodule_name, submodule in model.named_modules():
|
|
@@ -1226,7 +1226,7 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
|
|
|
1226
1226
|
|
|
1227
1227
|
model._loras_errors = errors
|
|
1228
1228
|
if not check_only:
|
|
1229
|
-
if pinnedLora:
|
|
1229
|
+
if pinnedLora and len(pinned_sd_list) > 0:
|
|
1230
1230
|
_pin_sd_to_memory(pinned_sd_list, pinned_names_list)
|
|
1231
1231
|
model._loras_adapters = adapters
|
|
1232
1232
|
if activate_all_loras:
|
|
@@ -1407,14 +1407,14 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
|
|
|
1407
1407
|
if not (".safetensors" in file or ".sft" in file):
|
|
1408
1408
|
if pinToMemory:
|
|
1409
1409
|
raise Exception("Pinning to memory while loading only supported for safe tensors files")
|
|
1410
|
-
state_dict = torch.load(file, weights_only=True)
|
|
1410
|
+
state_dict = torch.load(file, weights_only=True, map_location="cpu")
|
|
1411
1411
|
if "module" in state_dict:
|
|
1412
1412
|
state_dict = state_dict["module"]
|
|
1413
1413
|
|
|
1414
1414
|
else:
|
|
1415
1415
|
basename = os.path.basename(file)
|
|
1416
1416
|
|
|
1417
|
-
if "
|
|
1417
|
+
if "-of-" in basename:
|
|
1418
1418
|
metadata = None
|
|
1419
1419
|
file_parts= basename.split("-")
|
|
1420
1420
|
parts_max = int(file_parts[-1][:5])
|
|
@@ -1621,9 +1621,12 @@ class HfHook:
|
|
|
1621
1621
|
def __init__(self):
|
|
1622
1622
|
self.execution_device = "cuda"
|
|
1623
1623
|
|
|
1624
|
-
def
|
|
1625
|
-
|
|
1624
|
+
def init_hook(self, module):
|
|
1625
|
+
return module
|
|
1626
1626
|
|
|
1627
|
+
def detach_hook(self, module):
|
|
1628
|
+
return module
|
|
1629
|
+
|
|
1627
1630
|
last_offload_obj = None
|
|
1628
1631
|
class offload:
|
|
1629
1632
|
def __init__(self):
|
|
@@ -2028,7 +2031,9 @@ class offload:
|
|
|
2028
2031
|
continue
|
|
2029
2032
|
lora_A_weight, lora_B_weight, diff_b, alpha = data
|
|
2030
2033
|
scaling = self._get_lora_scaling(loras_scaling, model, active_adapter) * alpha
|
|
2031
|
-
|
|
2034
|
+
if lora_A_weight != None:
|
|
2035
|
+
weight.addmm_(lora_B_weight, lora_A_weight, alpha= scaling )
|
|
2036
|
+
|
|
2032
2037
|
if diff_b != None:
|
|
2033
2038
|
if bias == None:
|
|
2034
2039
|
bias = diff_b.clone()
|
|
@@ -2059,17 +2064,20 @@ class offload:
|
|
|
2059
2064
|
lora_A, lora_B, diff_b, alpha = data
|
|
2060
2065
|
# dropout = self.lora_dropout[active_adapter]
|
|
2061
2066
|
scaling = self._get_lora_scaling(loras_scaling, model, active_adapter) * alpha
|
|
2062
|
-
|
|
2063
|
-
|
|
2064
|
-
if training:
|
|
2065
|
-
pass
|
|
2066
|
-
# y = lora_A(dropout(x))
|
|
2067
|
+
if lora_A == None:
|
|
2068
|
+
result.add_(diff_b, alpha=scaling)
|
|
2067
2069
|
else:
|
|
2068
|
-
|
|
2069
|
-
|
|
2070
|
-
|
|
2071
|
-
|
|
2072
|
-
|
|
2070
|
+
x = x.to(lora_A.dtype)
|
|
2071
|
+
|
|
2072
|
+
if training:
|
|
2073
|
+
pass
|
|
2074
|
+
# y = lora_A(dropout(x))
|
|
2075
|
+
else:
|
|
2076
|
+
y = torch.nn.functional.linear(x, lora_A, bias=None)
|
|
2077
|
+
y = torch.nn.functional.linear(y, lora_B, bias=diff_b)
|
|
2078
|
+
y*= scaling
|
|
2079
|
+
result+= y
|
|
2080
|
+
del y
|
|
2073
2081
|
|
|
2074
2082
|
return result
|
|
2075
2083
|
|
|
@@ -2405,7 +2413,6 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
|
|
|
2405
2413
|
model_dtype = getattr(current_model, "_model_dtype", None)
|
|
2406
2414
|
# if model_dtype == None:
|
|
2407
2415
|
# model_dtype = getattr(current_model, "dtype", None)
|
|
2408
|
-
|
|
2409
2416
|
for _ , m in current_model.named_modules():
|
|
2410
2417
|
ignore_dtype = hasattr(m, "_lock_dtype")
|
|
2411
2418
|
for n, p in m.named_parameters(recurse = False):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mmgp
|
|
3
|
-
Version: 3.4.
|
|
3
|
+
Version: 3.4.8
|
|
4
4
|
Summary: Memory Management for the GPU Poor
|
|
5
5
|
Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -15,7 +15,7 @@ Dynamic: license-file
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
<p align="center">
|
|
18
|
-
<H2>Memory Management 3.4.
|
|
18
|
+
<H2>Memory Management 3.4.8 for the GPU Poor by DeepBeepMeep</H2>
|
|
19
19
|
</p>
|
|
20
20
|
|
|
21
21
|
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
+
mmgp/offload.py,sha256=UhILpsjJdWDv0IzOeis9KMgmPzcwZFsfPU04BLk_3To,121471
|
|
4
|
+
mmgp/safetensors2.py,sha256=4nKV13qCMabnNEB1TA_ueFbfGYYmiQ9racR_C6SsGug,18693
|
|
5
|
+
mmgp-3.4.8.dist-info/licenses/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
|
|
6
|
+
mmgp-3.4.8.dist-info/METADATA,sha256=Ux77MBs2BZl3fDw5BeJyOPZgyra7eyk4c4PFpmQGhtk,16309
|
|
7
|
+
mmgp-3.4.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
8
|
+
mmgp-3.4.8.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
9
|
+
mmgp-3.4.8.dist-info/RECORD,,
|
mmgp-3.4.7.dist-info/RECORD
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
-
mmgp/offload.py,sha256=wf5u4qaGKYm6OTyGg4TXCa3aA0h3nuhnml7qOzn6JOY,121124
|
|
4
|
-
mmgp/safetensors2.py,sha256=4nKV13qCMabnNEB1TA_ueFbfGYYmiQ9racR_C6SsGug,18693
|
|
5
|
-
mmgp-3.4.7.dist-info/licenses/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
|
|
6
|
-
mmgp-3.4.7.dist-info/METADATA,sha256=DztABKlGwAcKYogKuTzsOWs0he3elTFZXAkWpH4yIEU,16309
|
|
7
|
-
mmgp-3.4.7.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
|
|
8
|
-
mmgp-3.4.7.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
9
|
-
mmgp-3.4.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|