mmgp 3.2.7__py3-none-any.whl → 3.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mmgp might be problematic. Click here for more details.
- mmgp/offload.py +30 -31
- {mmgp-3.2.7.dist-info → mmgp-3.2.8.dist-info}/METADATA +2 -2
- mmgp-3.2.8.dist-info/RECORD +9 -0
- {mmgp-3.2.7.dist-info → mmgp-3.2.8.dist-info}/WHEEL +1 -1
- mmgp-3.2.7.dist-info/RECORD +0 -9
- {mmgp-3.2.7.dist-info → mmgp-3.2.8.dist-info}/LICENSE.md +0 -0
- {mmgp-3.2.7.dist-info → mmgp-3.2.8.dist-info}/top_level.txt +0 -0
mmgp/offload.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# ------------------ Memory Management 3.2.
|
|
1
|
+
# ------------------ Memory Management 3.2.8 for the GPU Poor by DeepBeepMeep (mmgp)------------------
|
|
2
2
|
#
|
|
3
3
|
# This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
|
|
4
4
|
# This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
|
|
@@ -575,7 +575,7 @@ def _welcome():
|
|
|
575
575
|
if welcome_displayed:
|
|
576
576
|
return
|
|
577
577
|
welcome_displayed = True
|
|
578
|
-
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.2.
|
|
578
|
+
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.2.8) by DeepBeepMeep ************{ENDC}{UNBOLD}")
|
|
579
579
|
|
|
580
580
|
def _extract_num_from_str(num_in_str):
|
|
581
581
|
size = len(num_in_str)
|
|
@@ -910,6 +910,9 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
|
|
|
910
910
|
skip = False
|
|
911
911
|
state_dict = safetensors2.torch_load_file(path)
|
|
912
912
|
|
|
913
|
+
|
|
914
|
+
|
|
915
|
+
|
|
913
916
|
if preprocess_sd != None:
|
|
914
917
|
state_dict = preprocess_sd(state_dict)
|
|
915
918
|
|
|
@@ -947,42 +950,34 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
|
|
|
947
950
|
# tied_weights = _extract_tie_weights_from_sd(state_dict, path) # to do
|
|
948
951
|
|
|
949
952
|
clean_up = False
|
|
950
|
-
|
|
951
|
-
if
|
|
953
|
+
first_key = next(iter(state_dict), None)
|
|
954
|
+
if first_key == None:
|
|
952
955
|
msg = f"Empty Lora '{path}'"
|
|
953
956
|
error_msg = append(error_msg, msg)
|
|
954
957
|
fail = True
|
|
955
958
|
|
|
956
959
|
if not fail:
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
if "alpha" in k:
|
|
960
|
-
alpha_value = state_dict.pop(k)
|
|
961
|
-
if not ( (torch.is_tensor(alpha_value) and torch.is_floating_point(alpha_value)) or isinstance(
|
|
962
|
-
alpha_value, float
|
|
963
|
-
)):
|
|
964
|
-
network_alphas[k] = torch.tensor( float(alpha_value.item() ) )
|
|
965
|
-
|
|
966
|
-
pos = keys[0].find(".")
|
|
967
|
-
prefix = keys[0][0:pos]
|
|
960
|
+
pos = first_key.find(".")
|
|
961
|
+
prefix = first_key[0:pos]
|
|
968
962
|
if prefix not in ["diffusion_model", "transformer"]:
|
|
969
963
|
msg = f"No compatible weight was found in Lora file '{path}'. Please check that it is compatible with the Diffusers format."
|
|
970
964
|
error_msg = append(error_msg, msg)
|
|
971
965
|
fail = True
|
|
972
966
|
|
|
973
967
|
if not fail:
|
|
968
|
+
|
|
974
969
|
state_dict = { k[ len(prefix) + 1:]: v for k, v in state_dict.items() if k.startswith(prefix) }
|
|
975
|
-
rank = {}
|
|
976
970
|
clean_up = True
|
|
977
971
|
|
|
978
|
-
|
|
979
|
-
# if "lora_B" in key:
|
|
980
|
-
# rank[key] = val.shape[1]
|
|
972
|
+
keys = list(state_dict.keys())
|
|
981
973
|
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
974
|
+
lora_alphas = {}
|
|
975
|
+
for k in keys:
|
|
976
|
+
if "alpha" in k:
|
|
977
|
+
alpha_value = state_dict.pop(k)
|
|
978
|
+
if torch.is_tensor(alpha_value):
|
|
979
|
+
alpha_value = float(alpha_value.item())
|
|
980
|
+
lora_alphas[k] = alpha_value
|
|
986
981
|
|
|
987
982
|
invalid_keys = []
|
|
988
983
|
unexpected_keys = []
|
|
@@ -1037,14 +1032,18 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
|
|
|
1037
1032
|
loras_module_data = dict()
|
|
1038
1033
|
loras_model_data[module] = loras_module_data
|
|
1039
1034
|
loras_adapter_data = loras_module_data.get(adapter_name, None)
|
|
1035
|
+
lora_A = None if lora_A == None else lora_A.to(torch.bfloat16)
|
|
1036
|
+
lora_B = None if lora_B == None else lora_B.to(torch.bfloat16)
|
|
1040
1037
|
if loras_adapter_data == None:
|
|
1041
|
-
|
|
1038
|
+
alpha = lora_alphas.get(k[:-len("lora_X.weight")] + "alpha", 1.)
|
|
1039
|
+
loras_adapter_data = [lora_A, lora_B, alpha]
|
|
1042
1040
|
loras_module_data[adapter_name] = loras_adapter_data
|
|
1043
1041
|
elif lora_A != None:
|
|
1044
1042
|
loras_adapter_data[0] = lora_A
|
|
1045
1043
|
else:
|
|
1046
1044
|
loras_adapter_data[1] = lora_B
|
|
1047
1045
|
lora_A, lora_B, v, loras_module_data, loras_adapter_data = None, None, None, None, None
|
|
1046
|
+
lora_alphas = None
|
|
1048
1047
|
|
|
1049
1048
|
if len(invalid_keys) > 0:
|
|
1050
1049
|
msg = "Lora '{path}' contains non Lora keys '{trunc(invalid_keys,200)}'"
|
|
@@ -1548,10 +1547,10 @@ class offload:
|
|
|
1548
1547
|
lora_data = lora_module.get(adapter, None)
|
|
1549
1548
|
if lora_data == None:
|
|
1550
1549
|
continue
|
|
1551
|
-
lora_A, lora_B = lora_data
|
|
1550
|
+
lora_A, lora_B, alpha = lora_data
|
|
1552
1551
|
key = adapter + '_GPU'
|
|
1553
1552
|
if to_GPU:
|
|
1554
|
-
lora_module[key] = [lora_A.cuda(), lora_B.cuda()]
|
|
1553
|
+
lora_module[key] = [lora_A.cuda(), lora_B.cuda(), alpha]
|
|
1555
1554
|
elif key in lora_module:
|
|
1556
1555
|
del lora_module[key]
|
|
1557
1556
|
|
|
@@ -1801,8 +1800,8 @@ class offload:
|
|
|
1801
1800
|
data = loras_data.get(active_adapter + '_GPU', None)
|
|
1802
1801
|
if data == None:
|
|
1803
1802
|
continue
|
|
1804
|
-
lora_A_weight, lora_B_weight = data
|
|
1805
|
-
scaling = get_scaling(active_adapter)
|
|
1803
|
+
lora_A_weight, lora_B_weight, alpha = data
|
|
1804
|
+
scaling = get_scaling(active_adapter) * alpha
|
|
1806
1805
|
weight.addmm_(lora_B_weight, lora_A_weight, alpha= scaling )
|
|
1807
1806
|
# base_weight += scaling * lora_B_weight @ lora_A_weight
|
|
1808
1807
|
|
|
@@ -1822,9 +1821,9 @@ class offload:
|
|
|
1822
1821
|
data = loras_data.get(active_adapter + '_GPU', None)
|
|
1823
1822
|
if data == None:
|
|
1824
1823
|
continue
|
|
1825
|
-
lora_A, lora_B = data
|
|
1824
|
+
lora_A, lora_B, alpha = data
|
|
1826
1825
|
# dropout = self.lora_dropout[active_adapter]
|
|
1827
|
-
scaling = get_scaling(active_adapter)
|
|
1826
|
+
scaling = get_scaling(active_adapter) * alpha
|
|
1828
1827
|
x = x.to(lora_A.dtype)
|
|
1829
1828
|
|
|
1830
1829
|
if training:
|
|
@@ -2248,7 +2247,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, l
|
|
|
2248
2247
|
print(f"Model '{model_id}' already pinned to reserved memory")
|
|
2249
2248
|
else:
|
|
2250
2249
|
_pin_to_memory(current_model, model_id, partialPinning= partialPinning, pinnedPEFTLora = pinnedPEFTLora, verboseLevel=verboseLevel)
|
|
2251
|
-
|
|
2250
|
+
|
|
2252
2251
|
current_budget = model_budgets[model_id]
|
|
2253
2252
|
cur_blocks_prefix, prev_blocks_name, cur_blocks_name,cur_blocks_seq, is_mod_seq = None, None, None, -1, False
|
|
2254
2253
|
self.loaded_blocks[model_id] = None
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: mmgp
|
|
3
|
-
Version: 3.2.
|
|
3
|
+
Version: 3.2.8
|
|
4
4
|
Summary: Memory Management for the GPU Poor
|
|
5
5
|
Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
|
|
6
6
|
License: GNU GENERAL PUBLIC LICENSE
|
|
@@ -16,7 +16,7 @@ Requires-Dist: psutil
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
<p align="center">
|
|
19
|
-
<H2>Memory Management 3.2.
|
|
19
|
+
<H2>Memory Management 3.2.8 for the GPU Poor by DeepBeepMeep</H2>
|
|
20
20
|
</p>
|
|
21
21
|
|
|
22
22
|
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
+
mmgp/offload.py,sha256=Wwk3uV3ZJv3ApyX-vpzukOllkBOTkLwGm5qDadmqVqQ,105209
|
|
4
|
+
mmgp/safetensors2.py,sha256=DCdlRH3769CTyraAmWAB3b0XrVua7z6ygQ-OyKgJN6A,16453
|
|
5
|
+
mmgp-3.2.8.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
|
|
6
|
+
mmgp-3.2.8.dist-info/METADATA,sha256=_3nE_8-UHpItfJsJsb4KUIs_WdROc68SCTNTP5lj_ho,16131
|
|
7
|
+
mmgp-3.2.8.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
|
8
|
+
mmgp-3.2.8.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
9
|
+
mmgp-3.2.8.dist-info/RECORD,,
|
mmgp-3.2.7.dist-info/RECORD
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
-
mmgp/offload.py,sha256=6qJrxM3EPqUHC04njZetVY2sr2x9DQwh13CZIM5oLIA,105417
|
|
4
|
-
mmgp/safetensors2.py,sha256=DCdlRH3769CTyraAmWAB3b0XrVua7z6ygQ-OyKgJN6A,16453
|
|
5
|
-
mmgp-3.2.7.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
|
|
6
|
-
mmgp-3.2.7.dist-info/METADATA,sha256=zu_MxYB3j6sYNqQShyKnNwJkv0_j-fO6qOHoO8PUUfY,16131
|
|
7
|
-
mmgp-3.2.7.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
|
|
8
|
-
mmgp-3.2.7.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
9
|
-
mmgp-3.2.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|