mmgp 3.4.9__tar.gz → 3.5.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mmgp might be problematic. Click here for more details.
- {mmgp-3.4.9/src/mmgp.egg-info → mmgp-3.5.1}/PKG-INFO +2 -2
- {mmgp-3.4.9 → mmgp-3.5.1}/README.md +1 -1
- {mmgp-3.4.9 → mmgp-3.5.1}/pyproject.toml +1 -1
- {mmgp-3.4.9 → mmgp-3.5.1}/src/mmgp/offload.py +41 -55
- {mmgp-3.4.9 → mmgp-3.5.1/src/mmgp.egg-info}/PKG-INFO +2 -2
- {mmgp-3.4.9 → mmgp-3.5.1}/LICENSE.md +0 -0
- {mmgp-3.4.9 → mmgp-3.5.1}/setup.cfg +0 -0
- {mmgp-3.4.9 → mmgp-3.5.1}/src/__init__.py +0 -0
- {mmgp-3.4.9 → mmgp-3.5.1}/src/mmgp/__init__.py +0 -0
- {mmgp-3.4.9 → mmgp-3.5.1}/src/mmgp/safetensors2.py +0 -0
- {mmgp-3.4.9 → mmgp-3.5.1}/src/mmgp.egg-info/SOURCES.txt +0 -0
- {mmgp-3.4.9 → mmgp-3.5.1}/src/mmgp.egg-info/dependency_links.txt +0 -0
- {mmgp-3.4.9 → mmgp-3.5.1}/src/mmgp.egg-info/requires.txt +0 -0
- {mmgp-3.4.9 → mmgp-3.5.1}/src/mmgp.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mmgp
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.5.1
|
|
4
4
|
Summary: Memory Management for the GPU Poor
|
|
5
5
|
Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -15,7 +15,7 @@ Dynamic: license-file
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
<p align="center">
|
|
18
|
-
<H2>Memory Management 3.
|
|
18
|
+
<H2>Memory Management 3.5.1 for the GPU Poor by DeepBeepMeep</H2>
|
|
19
19
|
</p>
|
|
20
20
|
|
|
21
21
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# ------------------ Memory Management 3.
|
|
1
|
+
# ------------------ Memory Management 3.5.1 for the GPU Poor by DeepBeepMeep (mmgp)------------------
|
|
2
2
|
#
|
|
3
3
|
# This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
|
|
4
4
|
# This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
|
|
@@ -658,7 +658,7 @@ def _welcome():
|
|
|
658
658
|
if welcome_displayed:
|
|
659
659
|
return
|
|
660
660
|
welcome_displayed = True
|
|
661
|
-
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.
|
|
661
|
+
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.5.1) by DeepBeepMeep ************{ENDC}{UNBOLD}")
|
|
662
662
|
|
|
663
663
|
def change_dtype(model, new_dtype, exclude_buffers = False):
|
|
664
664
|
for submodule_name, submodule in model.named_modules():
|
|
@@ -1019,33 +1019,18 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
|
|
|
1019
1019
|
|
|
1020
1020
|
if split_linear_modules_map != None:
|
|
1021
1021
|
new_state_dict = dict()
|
|
1022
|
-
|
|
1023
|
-
targets_B = { "."+k+".lora_B.weight" : k for k in split_linear_modules_map }
|
|
1022
|
+
suffixes = [(".alpha", -2, False), (".lora_B.weight", -3, True), (".lora_A.weight", -3, False)]
|
|
1024
1023
|
for module_name, module_data in state_dict.items():
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
new_module_name = parent_module_name + "." + sub_name + ".lora_B.weight"
|
|
1036
|
-
new_state_dict[new_module_name] = subdata
|
|
1037
|
-
elif any(module_name.endswith(suffix) for suffix in targets_A):
|
|
1038
|
-
for suffix, target_module in targets_A.items():
|
|
1039
|
-
if module_name.endswith(suffix):
|
|
1040
|
-
break
|
|
1041
|
-
parent_module_name = module_name[:-len(suffix)]
|
|
1042
|
-
map = split_linear_modules_map[target_module]
|
|
1043
|
-
mapped_modules = map["mapped_modules"]
|
|
1044
|
-
for sub_name in mapped_modules :
|
|
1045
|
-
new_module_name = parent_module_name + "." + sub_name + ".lora_A.weight"
|
|
1046
|
-
new_state_dict[new_module_name] = module_data
|
|
1047
|
-
else:
|
|
1048
|
-
new_state_dict[module_name] = module_data
|
|
1024
|
+
name_parts = module_name.split(".")
|
|
1025
|
+
for suffix, pos, any_split in suffixes:
|
|
1026
|
+
if module_name.endswith(suffix) and (map := split_linear_modules_map.get(name_parts[pos], None )) != None:
|
|
1027
|
+
parent_module_name, module_name = ".".join(name_parts[:pos]), None
|
|
1028
|
+
sub_data = torch.split(module_data, map["split_sizes"], dim=0) if any_split else [None] * len(map["mapped_modules"])
|
|
1029
|
+
for sub_name, subdata in zip(map["mapped_modules"], sub_data):
|
|
1030
|
+
new_module_name = parent_module_name + "." + sub_name + suffix
|
|
1031
|
+
new_state_dict[new_module_name] = subdata if any_split else module_data
|
|
1032
|
+
break
|
|
1033
|
+
if module_name != None: new_state_dict[module_name] = module_data
|
|
1049
1034
|
state_dict = new_state_dict
|
|
1050
1035
|
del new_state_dict
|
|
1051
1036
|
# tied_weights = _extract_tie_weights_from_sd(state_dict, path) # to do
|
|
@@ -1118,7 +1103,9 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
|
|
|
1118
1103
|
fail = True
|
|
1119
1104
|
break
|
|
1120
1105
|
module_shape = module.weight.shape
|
|
1106
|
+
rank = None
|
|
1121
1107
|
if lora_A != None:
|
|
1108
|
+
rank = lora_A.shape[0]
|
|
1122
1109
|
if module_shape[1] != v.shape[1]:
|
|
1123
1110
|
if ignore_model_variations:
|
|
1124
1111
|
skip = True
|
|
@@ -1128,6 +1115,7 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
|
|
|
1128
1115
|
fail = True
|
|
1129
1116
|
break
|
|
1130
1117
|
elif lora_B != None:
|
|
1118
|
+
rank = lora_B.shape[1]
|
|
1131
1119
|
if module_shape[0] != v.shape[0]:
|
|
1132
1120
|
if ignore_model_variations:
|
|
1133
1121
|
skip = True
|
|
@@ -1147,6 +1135,7 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
|
|
|
1147
1135
|
fail = True
|
|
1148
1136
|
break
|
|
1149
1137
|
elif diff_b != None:
|
|
1138
|
+
rank = diff_b.shape[0]
|
|
1150
1139
|
if module.bias == None:
|
|
1151
1140
|
msg = f"Lora '{path}': Lora Basis is defined while it doesnt exist in model '{_get_module_name(model)}'. It is likely this Lora has been made for another version of this model."
|
|
1152
1141
|
fail = True
|
|
@@ -1164,25 +1153,23 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
|
|
|
1164
1153
|
|
|
1165
1154
|
if not check_only:
|
|
1166
1155
|
loras_module_data = loras_model_data.get(module, None)
|
|
1167
|
-
if loras_module_data == None:
|
|
1168
|
-
pass
|
|
1169
1156
|
assert loras_module_data != None
|
|
1170
1157
|
loras_adapter_data = loras_module_data.get(adapter_name, None)
|
|
1171
|
-
lora_A = None if lora_A == None else lora_A.to(module.weight.dtype)
|
|
1172
|
-
lora_B = None if lora_B == None else lora_B.to(module.weight.dtype)
|
|
1173
|
-
diff_b = None if diff_b == None else diff_b.to(module.weight.dtype)
|
|
1174
1158
|
if loras_adapter_data == None:
|
|
1175
|
-
|
|
1176
|
-
loras_adapter_data = [lora_A, lora_B, diff_b, alpha]
|
|
1159
|
+
loras_adapter_data = [None, None, None, 1.]
|
|
1177
1160
|
loras_module_data[adapter_name] = loras_adapter_data
|
|
1178
|
-
|
|
1179
|
-
loras_adapter_data[0] = lora_A
|
|
1161
|
+
if lora_A != None:
|
|
1162
|
+
loras_adapter_data[0] = lora_A.to(module.weight.dtype)
|
|
1180
1163
|
elif lora_B != None:
|
|
1181
|
-
loras_adapter_data[1] = lora_B
|
|
1164
|
+
loras_adapter_data[1] = lora_B.to(module.weight.dtype)
|
|
1182
1165
|
else:
|
|
1183
|
-
loras_adapter_data[2] = diff_b
|
|
1184
|
-
|
|
1185
|
-
|
|
1166
|
+
loras_adapter_data[2] = diff_b.to(module.weight.dtype)
|
|
1167
|
+
if rank != None:
|
|
1168
|
+
alpha_key = k[:-len("lora_X.weight")] + "alpha"
|
|
1169
|
+
alpha = lora_alphas.get(alpha_key, None)
|
|
1170
|
+
alpha = 1. if alpha == None else alpha / rank
|
|
1171
|
+
loras_adapter_data[3] = alpha
|
|
1172
|
+
lora_A = lora_B = diff = diff_b = v = loras_module_data = loras_adapter_data = lora_alphas = None
|
|
1186
1173
|
|
|
1187
1174
|
if len(invalid_keys) > 0:
|
|
1188
1175
|
msg = f"Lora '{path}' contains non Lora keys '{trunc(invalid_keys,200)}'"
|
|
@@ -1237,7 +1224,7 @@ def unload_loras_from_model(model):
|
|
|
1237
1224
|
for _, v in model._loras_model_data.items():
|
|
1238
1225
|
v.clear()
|
|
1239
1226
|
|
|
1240
|
-
model._loras_active_adapters =
|
|
1227
|
+
model._loras_active_adapters = []
|
|
1241
1228
|
model._loras_scaling = dict()
|
|
1242
1229
|
model._loras_tied_weights = dict()
|
|
1243
1230
|
model._loras_errors = None
|
|
@@ -1248,7 +1235,7 @@ def unload_loras_from_model(model):
|
|
|
1248
1235
|
def set_step_no_for_lora(model, step_no):
|
|
1249
1236
|
model._lora_step_no = step_no
|
|
1250
1237
|
|
|
1251
|
-
def activate_loras(model, lora_nos, lora_multi = None
|
|
1238
|
+
def activate_loras(model, lora_nos, lora_multi = None):
|
|
1252
1239
|
if not isinstance(lora_nos, list):
|
|
1253
1240
|
lora_nos = [lora_nos]
|
|
1254
1241
|
lora_nos = [str(l) for l in lora_nos]
|
|
@@ -1261,7 +1248,7 @@ def activate_loras(model, lora_nos, lora_multi = None ):
|
|
|
1261
1248
|
lora_scaling_dict[no] = multi
|
|
1262
1249
|
|
|
1263
1250
|
model._lora_step_no = 0
|
|
1264
|
-
model._loras_active_adapters =
|
|
1251
|
+
model._loras_active_adapters = lora_nos
|
|
1265
1252
|
model._loras_scaling = lora_scaling_dict
|
|
1266
1253
|
|
|
1267
1254
|
|
|
@@ -1287,7 +1274,7 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizat
|
|
|
1287
1274
|
model_path = [model_path]
|
|
1288
1275
|
|
|
1289
1276
|
|
|
1290
|
-
if not builtins.all(file_name.endswith(".sft") or file_name.endswith(".safetensors") for file_name in model_path):
|
|
1277
|
+
if not builtins.all(file_name.endswith(".sft") or file_name.endswith(".safetensors") or file_name.endswith(".pt") for file_name in model_path):
|
|
1291
1278
|
raise Exception("full model path to file expected")
|
|
1292
1279
|
|
|
1293
1280
|
model_path = [ _get_model(file) for file in model_path]
|
|
@@ -1295,9 +1282,11 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizat
|
|
|
1295
1282
|
raise Exception("Unable to find file")
|
|
1296
1283
|
|
|
1297
1284
|
verboseLevel = _compute_verbose_level(verboseLevel)
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1285
|
+
if model_path[-1].endswith(".pt"):
|
|
1286
|
+
metadata = None
|
|
1287
|
+
else:
|
|
1288
|
+
with safetensors2.safe_open(model_path[-1], writable_tensors =writable_tensors) as f:
|
|
1289
|
+
metadata = f.metadata()
|
|
1301
1290
|
|
|
1302
1291
|
if metadata is None:
|
|
1303
1292
|
transformer_config = None
|
|
@@ -1737,7 +1726,7 @@ class offload:
|
|
|
1737
1726
|
continue
|
|
1738
1727
|
key = adapter + '_GPU'
|
|
1739
1728
|
if to_GPU:
|
|
1740
|
-
lora_module[key] = [None if item == None else item.cuda(non_blocking=True) for item in lora_data[ :-1] ] + lora_data[ -1:]
|
|
1729
|
+
lora_module[key] = [None if item == None else item.cuda(non_blocking=True) for item in lora_data[ :-1] ] + lora_data[ -1:]
|
|
1741
1730
|
elif key in lora_module:
|
|
1742
1731
|
del lora_module[key]
|
|
1743
1732
|
|
|
@@ -2015,7 +2004,7 @@ class offload:
|
|
|
2015
2004
|
training = False
|
|
2016
2005
|
|
|
2017
2006
|
dtype = weight.dtype
|
|
2018
|
-
if weight.shape[-1] < x.shape[-2]
|
|
2007
|
+
if weight.shape[-1] < x.shape[-2]: # sum base weight and lora matrices instead of applying input on each sub lora matrice if input is too large. This will save a lot VRAM and compute
|
|
2019
2008
|
bias = submodule.bias
|
|
2020
2009
|
original_bias = True
|
|
2021
2010
|
if len(active_adapters) > 0:
|
|
@@ -2023,12 +2012,10 @@ class offload:
|
|
|
2023
2012
|
weight = weight.view(weight.shape) # get a persistent copy of the on the fly dequantized weights
|
|
2024
2013
|
else:
|
|
2025
2014
|
weight = weight.clone()
|
|
2026
|
-
|
|
2027
|
-
|
|
2028
2015
|
for active_adapter in active_adapters:
|
|
2029
2016
|
data = loras_data.get(active_adapter + '_GPU', None)
|
|
2030
2017
|
if data == None:
|
|
2031
|
-
continue
|
|
2018
|
+
continue
|
|
2032
2019
|
lora_A_weight, lora_B_weight, diff_b, alpha = data
|
|
2033
2020
|
scaling = self._get_lora_scaling(loras_scaling, model, active_adapter) * alpha
|
|
2034
2021
|
if lora_A_weight != None:
|
|
@@ -2042,9 +2029,7 @@ class offload:
|
|
|
2042
2029
|
bias = bias.clone()
|
|
2043
2030
|
original_bias = False
|
|
2044
2031
|
bias.add_(diff_b, alpha=scaling)
|
|
2045
|
-
|
|
2046
2032
|
# base_weight += scaling * lora_B_weight @ lora_A_weight
|
|
2047
|
-
|
|
2048
2033
|
if training:
|
|
2049
2034
|
pass
|
|
2050
2035
|
# result = torch.nn.functional.linear(dropout(x), base_weight, bias=submodule.bias)
|
|
@@ -2093,6 +2078,7 @@ class offload:
|
|
|
2093
2078
|
if len(loras_data) == 0:
|
|
2094
2079
|
return old_forward(*args, **kwargs)
|
|
2095
2080
|
else:
|
|
2081
|
+
# submodule.aaa = submodule_name
|
|
2096
2082
|
return self._lora_linear_forward(current_model, submodule, loras_data, *args, **kwargs)
|
|
2097
2083
|
target_fn = lora_linear_forward
|
|
2098
2084
|
else:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mmgp
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.5.1
|
|
4
4
|
Summary: Memory Management for the GPU Poor
|
|
5
5
|
Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -15,7 +15,7 @@ Dynamic: license-file
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
<p align="center">
|
|
18
|
-
<H2>Memory Management 3.
|
|
18
|
+
<H2>Memory Management 3.5.1 for the GPU Poor by DeepBeepMeep</H2>
|
|
19
19
|
</p>
|
|
20
20
|
|
|
21
21
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|