mmgp 3.2.7__py3-none-any.whl → 3.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mmgp might be problematic. Click here for more details.
- mmgp/offload.py +38 -36
- {mmgp-3.2.7.dist-info → mmgp-3.3.0.dist-info}/METADATA +4 -3
- mmgp-3.3.0.dist-info/RECORD +9 -0
- {mmgp-3.2.7.dist-info → mmgp-3.3.0.dist-info}/WHEEL +1 -1
- mmgp-3.2.7.dist-info/RECORD +0 -9
- {mmgp-3.2.7.dist-info → mmgp-3.3.0.dist-info/licenses}/LICENSE.md +0 -0
- {mmgp-3.2.7.dist-info → mmgp-3.3.0.dist-info}/top_level.txt +0 -0
mmgp/offload.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# ------------------ Memory Management 3.
|
|
1
|
+
# ------------------ Memory Management 3.3.0 for the GPU Poor by DeepBeepMeep (mmgp)------------------
|
|
2
2
|
#
|
|
3
3
|
# This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
|
|
4
4
|
# This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
|
|
@@ -575,7 +575,7 @@ def _welcome():
|
|
|
575
575
|
if welcome_displayed:
|
|
576
576
|
return
|
|
577
577
|
welcome_displayed = True
|
|
578
|
-
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.2.
|
|
578
|
+
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.2.8) by DeepBeepMeep ************{ENDC}{UNBOLD}")
|
|
579
579
|
|
|
580
580
|
def _extract_num_from_str(num_in_str):
|
|
581
581
|
size = len(num_in_str)
|
|
@@ -882,10 +882,11 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
|
|
|
882
882
|
return source + CrLf + text
|
|
883
883
|
|
|
884
884
|
def trunc(text, sz):
|
|
885
|
+
text = str(text)
|
|
885
886
|
if len(text) < sz:
|
|
886
|
-
return
|
|
887
|
+
return text
|
|
887
888
|
else:
|
|
888
|
-
return
|
|
889
|
+
return text[0:sz] + '...'
|
|
889
890
|
|
|
890
891
|
if not isinstance(lora_path, list):
|
|
891
892
|
lora_path = [lora_path]
|
|
@@ -910,6 +911,9 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
|
|
|
910
911
|
skip = False
|
|
911
912
|
state_dict = safetensors2.torch_load_file(path)
|
|
912
913
|
|
|
914
|
+
|
|
915
|
+
|
|
916
|
+
|
|
913
917
|
if preprocess_sd != None:
|
|
914
918
|
state_dict = preprocess_sd(state_dict)
|
|
915
919
|
|
|
@@ -947,42 +951,34 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
|
|
|
947
951
|
# tied_weights = _extract_tie_weights_from_sd(state_dict, path) # to do
|
|
948
952
|
|
|
949
953
|
clean_up = False
|
|
950
|
-
|
|
951
|
-
if
|
|
954
|
+
first_key = next(iter(state_dict), None)
|
|
955
|
+
if first_key == None:
|
|
952
956
|
msg = f"Empty Lora '{path}'"
|
|
953
957
|
error_msg = append(error_msg, msg)
|
|
954
958
|
fail = True
|
|
955
959
|
|
|
956
960
|
if not fail:
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
if "alpha" in k:
|
|
960
|
-
alpha_value = state_dict.pop(k)
|
|
961
|
-
if not ( (torch.is_tensor(alpha_value) and torch.is_floating_point(alpha_value)) or isinstance(
|
|
962
|
-
alpha_value, float
|
|
963
|
-
)):
|
|
964
|
-
network_alphas[k] = torch.tensor( float(alpha_value.item() ) )
|
|
965
|
-
|
|
966
|
-
pos = keys[0].find(".")
|
|
967
|
-
prefix = keys[0][0:pos]
|
|
961
|
+
pos = first_key.find(".")
|
|
962
|
+
prefix = first_key[0:pos]
|
|
968
963
|
if prefix not in ["diffusion_model", "transformer"]:
|
|
969
964
|
msg = f"No compatible weight was found in Lora file '{path}'. Please check that it is compatible with the Diffusers format."
|
|
970
965
|
error_msg = append(error_msg, msg)
|
|
971
966
|
fail = True
|
|
972
967
|
|
|
973
968
|
if not fail:
|
|
969
|
+
|
|
974
970
|
state_dict = { k[ len(prefix) + 1:]: v for k, v in state_dict.items() if k.startswith(prefix) }
|
|
975
|
-
rank = {}
|
|
976
971
|
clean_up = True
|
|
977
972
|
|
|
978
|
-
|
|
979
|
-
# if "lora_B" in key:
|
|
980
|
-
# rank[key] = val.shape[1]
|
|
973
|
+
keys = list(state_dict.keys())
|
|
981
974
|
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
975
|
+
lora_alphas = {}
|
|
976
|
+
for k in keys:
|
|
977
|
+
if "alpha" in k:
|
|
978
|
+
alpha_value = state_dict.pop(k)
|
|
979
|
+
if torch.is_tensor(alpha_value):
|
|
980
|
+
alpha_value = float(alpha_value.item())
|
|
981
|
+
lora_alphas[k] = alpha_value
|
|
986
982
|
|
|
987
983
|
invalid_keys = []
|
|
988
984
|
unexpected_keys = []
|
|
@@ -1037,14 +1033,18 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
|
|
|
1037
1033
|
loras_module_data = dict()
|
|
1038
1034
|
loras_model_data[module] = loras_module_data
|
|
1039
1035
|
loras_adapter_data = loras_module_data.get(adapter_name, None)
|
|
1036
|
+
lora_A = None if lora_A == None else lora_A.to(torch.bfloat16)
|
|
1037
|
+
lora_B = None if lora_B == None else lora_B.to(torch.bfloat16)
|
|
1040
1038
|
if loras_adapter_data == None:
|
|
1041
|
-
|
|
1039
|
+
alpha = lora_alphas.get(k[:-len("lora_X.weight")] + "alpha", 1.)
|
|
1040
|
+
loras_adapter_data = [lora_A, lora_B, alpha]
|
|
1042
1041
|
loras_module_data[adapter_name] = loras_adapter_data
|
|
1043
1042
|
elif lora_A != None:
|
|
1044
1043
|
loras_adapter_data[0] = lora_A
|
|
1045
1044
|
else:
|
|
1046
1045
|
loras_adapter_data[1] = lora_B
|
|
1047
1046
|
lora_A, lora_B, v, loras_module_data, loras_adapter_data = None, None, None, None, None
|
|
1047
|
+
lora_alphas = None
|
|
1048
1048
|
|
|
1049
1049
|
if len(invalid_keys) > 0:
|
|
1050
1050
|
msg = "Lora '{path}' contains non Lora keys '{trunc(invalid_keys,200)}'"
|
|
@@ -1409,7 +1409,9 @@ def extract_models(obj = None, prefix = None):
|
|
|
1409
1409
|
elif prefix[ -1:] != "/":
|
|
1410
1410
|
prefix + "/"
|
|
1411
1411
|
|
|
1412
|
-
for name in dir(obj):
|
|
1412
|
+
for name in dir(obj):
|
|
1413
|
+
if name in ["_execution_device"]:
|
|
1414
|
+
continue
|
|
1413
1415
|
element = getattr(obj,name)
|
|
1414
1416
|
if name in ("pipeline", "pipe"):
|
|
1415
1417
|
pipeline = element
|
|
@@ -1548,10 +1550,10 @@ class offload:
|
|
|
1548
1550
|
lora_data = lora_module.get(adapter, None)
|
|
1549
1551
|
if lora_data == None:
|
|
1550
1552
|
continue
|
|
1551
|
-
lora_A, lora_B = lora_data
|
|
1553
|
+
lora_A, lora_B, alpha = lora_data
|
|
1552
1554
|
key = adapter + '_GPU'
|
|
1553
1555
|
if to_GPU:
|
|
1554
|
-
lora_module[key] = [lora_A.cuda(), lora_B.cuda()]
|
|
1556
|
+
lora_module[key] = [lora_A.cuda(non_blocking=True), lora_B.cuda(non_blocking=True), alpha]
|
|
1555
1557
|
elif key in lora_module:
|
|
1556
1558
|
del lora_module[key]
|
|
1557
1559
|
|
|
@@ -1595,8 +1597,8 @@ class offload:
|
|
|
1595
1597
|
lora_data = loras_model_data.get(parent_module, None)
|
|
1596
1598
|
if lora_data != None:
|
|
1597
1599
|
loras_modules[parent_module]= lora_data
|
|
1598
|
-
|
|
1599
|
-
|
|
1600
|
+
if len(loras_modules) > 0:
|
|
1601
|
+
self._move_loras(loras_active_adapters, loras_modules, True)
|
|
1600
1602
|
|
|
1601
1603
|
loaded_block = self.loaded_blocks[model_id]
|
|
1602
1604
|
|
|
@@ -1801,8 +1803,8 @@ class offload:
|
|
|
1801
1803
|
data = loras_data.get(active_adapter + '_GPU', None)
|
|
1802
1804
|
if data == None:
|
|
1803
1805
|
continue
|
|
1804
|
-
lora_A_weight, lora_B_weight = data
|
|
1805
|
-
scaling = get_scaling(active_adapter)
|
|
1806
|
+
lora_A_weight, lora_B_weight, alpha = data
|
|
1807
|
+
scaling = get_scaling(active_adapter) * alpha
|
|
1806
1808
|
weight.addmm_(lora_B_weight, lora_A_weight, alpha= scaling )
|
|
1807
1809
|
# base_weight += scaling * lora_B_weight @ lora_A_weight
|
|
1808
1810
|
|
|
@@ -1822,9 +1824,9 @@ class offload:
|
|
|
1822
1824
|
data = loras_data.get(active_adapter + '_GPU', None)
|
|
1823
1825
|
if data == None:
|
|
1824
1826
|
continue
|
|
1825
|
-
lora_A, lora_B = data
|
|
1827
|
+
lora_A, lora_B, alpha = data
|
|
1826
1828
|
# dropout = self.lora_dropout[active_adapter]
|
|
1827
|
-
scaling = get_scaling(active_adapter)
|
|
1829
|
+
scaling = get_scaling(active_adapter) * alpha
|
|
1828
1830
|
x = x.to(lora_A.dtype)
|
|
1829
1831
|
|
|
1830
1832
|
if training:
|
|
@@ -2248,7 +2250,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, l
|
|
|
2248
2250
|
print(f"Model '{model_id}' already pinned to reserved memory")
|
|
2249
2251
|
else:
|
|
2250
2252
|
_pin_to_memory(current_model, model_id, partialPinning= partialPinning, pinnedPEFTLora = pinnedPEFTLora, verboseLevel=verboseLevel)
|
|
2251
|
-
|
|
2253
|
+
|
|
2252
2254
|
current_budget = model_budgets[model_id]
|
|
2253
2255
|
cur_blocks_prefix, prev_blocks_name, cur_blocks_name,cur_blocks_seq, is_mod_seq = None, None, None, -1, False
|
|
2254
2256
|
self.loaded_blocks[model_id] = None
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: mmgp
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.3.0
|
|
4
4
|
Summary: Memory Management for the GPU Poor
|
|
5
5
|
Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
|
|
6
6
|
License: GNU GENERAL PUBLIC LICENSE
|
|
@@ -13,10 +13,11 @@ Requires-Dist: optimum-quanto
|
|
|
13
13
|
Requires-Dist: accelerate
|
|
14
14
|
Requires-Dist: safetensors
|
|
15
15
|
Requires-Dist: psutil
|
|
16
|
+
Dynamic: license-file
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
<p align="center">
|
|
19
|
-
<H2>Memory Management 3.
|
|
20
|
+
<H2>Memory Management 3.3.0 for the GPU Poor by DeepBeepMeep</H2>
|
|
20
21
|
</p>
|
|
21
22
|
|
|
22
23
|
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
+
mmgp/offload.py,sha256=xdlYbB8nKUywAAMPcfCzJmCxYHvBB5vcZgv2wEQTtbE,105329
|
|
4
|
+
mmgp/safetensors2.py,sha256=DCdlRH3769CTyraAmWAB3b0XrVua7z6ygQ-OyKgJN6A,16453
|
|
5
|
+
mmgp-3.3.0.dist-info/licenses/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
|
|
6
|
+
mmgp-3.3.0.dist-info/METADATA,sha256=33eB_YmC6PciTkzi_Z_gsWWzoz6RJgyLbEItFatVghk,16153
|
|
7
|
+
mmgp-3.3.0.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
|
|
8
|
+
mmgp-3.3.0.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
9
|
+
mmgp-3.3.0.dist-info/RECORD,,
|
mmgp-3.2.7.dist-info/RECORD
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
-
mmgp/offload.py,sha256=6qJrxM3EPqUHC04njZetVY2sr2x9DQwh13CZIM5oLIA,105417
|
|
4
|
-
mmgp/safetensors2.py,sha256=DCdlRH3769CTyraAmWAB3b0XrVua7z6ygQ-OyKgJN6A,16453
|
|
5
|
-
mmgp-3.2.7.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
|
|
6
|
-
mmgp-3.2.7.dist-info/METADATA,sha256=zu_MxYB3j6sYNqQShyKnNwJkv0_j-fO6qOHoO8PUUfY,16131
|
|
7
|
-
mmgp-3.2.7.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
|
|
8
|
-
mmgp-3.2.7.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
9
|
-
mmgp-3.2.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|