mmgp 3.2.3__py3-none-any.whl → 3.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mmgp might be problematic. Click here for more details.
- mmgp/offload.py +41 -33
- {mmgp-3.2.3.dist-info → mmgp-3.2.4.dist-info}/METADATA +2 -2
- mmgp-3.2.4.dist-info/RECORD +9 -0
- mmgp-3.2.3.dist-info/RECORD +0 -9
- {mmgp-3.2.3.dist-info → mmgp-3.2.4.dist-info}/LICENSE.md +0 -0
- {mmgp-3.2.3.dist-info → mmgp-3.2.4.dist-info}/WHEEL +0 -0
- {mmgp-3.2.3.dist-info → mmgp-3.2.4.dist-info}/top_level.txt +0 -0
mmgp/offload.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# ------------------ Memory Management 3.2.
|
|
1
|
+
# ------------------ Memory Management 3.2.4 for the GPU Poor by DeepBeepMeep (mmgp)------------------
|
|
2
2
|
#
|
|
3
3
|
# This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
|
|
4
4
|
# This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
|
|
@@ -479,7 +479,7 @@ def _welcome():
|
|
|
479
479
|
if welcome_displayed:
|
|
480
480
|
return
|
|
481
481
|
welcome_displayed = True
|
|
482
|
-
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.2.
|
|
482
|
+
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.2.4) by DeepBeepMeep ************{ENDC}{UNBOLD}")
|
|
483
483
|
|
|
484
484
|
def _extract_num_from_str(num_in_str):
|
|
485
485
|
size = len(num_in_str)
|
|
@@ -858,7 +858,7 @@ def _lora_linear_forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor
|
|
|
858
858
|
result = result.to(torch_result_dtype)
|
|
859
859
|
return result
|
|
860
860
|
|
|
861
|
-
def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_loras = True, split_linear_modules_map = None,verboseLevel = -1,):
|
|
861
|
+
def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_loras = True, split_linear_modules_map = None, preprocess_sd = None, verboseLevel = -1,):
|
|
862
862
|
verboseLevel = _compute_verbose_level(verboseLevel)
|
|
863
863
|
|
|
864
864
|
if inject_adapter_in_model == None or set_weights_and_activate_adapters == None or get_peft_kwargs == None:
|
|
@@ -877,7 +877,8 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
|
|
|
877
877
|
adapter_name = str(i)
|
|
878
878
|
|
|
879
879
|
state_dict = safetensors2.torch_load_file(path)
|
|
880
|
-
|
|
880
|
+
if preprocess_sd != None:
|
|
881
|
+
state_dict = preprocess_sd(state_dict)
|
|
881
882
|
|
|
882
883
|
if split_linear_modules_map != None:
|
|
883
884
|
new_state_dict = {}
|
|
@@ -977,7 +978,8 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
|
|
|
977
978
|
# Check only for unexpected keys.
|
|
978
979
|
unexpected_keys = getattr(incompatible_keys, "unexpected_keys", None)
|
|
979
980
|
if unexpected_keys:
|
|
980
|
-
|
|
981
|
+
raise Exception(f"Lora '{path}' contains invalid keys '{unexpected_keys}'")
|
|
982
|
+
|
|
981
983
|
if verboseLevel >=1:
|
|
982
984
|
print(f"Lora '{path}' was loaded in model '{_get_module_name(model)}'")
|
|
983
985
|
if activate_all_loras:
|
|
@@ -1015,7 +1017,7 @@ def move_loras_to_device(model, device="cpu" ):
|
|
|
1015
1017
|
if ".lora_" in k:
|
|
1016
1018
|
m.to(device)
|
|
1017
1019
|
|
|
1018
|
-
def fast_load_transformers_model(model_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, modelClass=None, verboseLevel = -1):
|
|
1020
|
+
def fast_load_transformers_model(model_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, modelClass=None, modelPrefix = None, verboseLevel = -1):
|
|
1019
1021
|
"""
|
|
1020
1022
|
quick version of .LoadfromPretrained of the transformers library
|
|
1021
1023
|
used to build a model and load the corresponding weights (quantized or not)
|
|
@@ -1096,13 +1098,13 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizat
|
|
|
1096
1098
|
|
|
1097
1099
|
model._config = transformer_config
|
|
1098
1100
|
|
|
1099
|
-
load_model_data(model,model_path, do_quantize = do_quantize, quantizationType = quantizationType, pinToMemory= pinToMemory, partialPinning= partialPinning, verboseLevel=verboseLevel )
|
|
1101
|
+
load_model_data(model,model_path, do_quantize = do_quantize, quantizationType = quantizationType, pinToMemory= pinToMemory, partialPinning= partialPinning, modelPrefix = modelPrefix, verboseLevel=verboseLevel )
|
|
1100
1102
|
|
|
1101
1103
|
return model
|
|
1102
1104
|
|
|
1103
1105
|
|
|
1104
1106
|
|
|
1105
|
-
def load_model_data(model, file_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, verboseLevel = -1):
|
|
1107
|
+
def load_model_data(model, file_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, modelPrefix = None, verboseLevel = -1):
|
|
1106
1108
|
"""
|
|
1107
1109
|
Load a model, detect if it has been previously quantized using quanto and do the extra setup if necessary
|
|
1108
1110
|
"""
|
|
@@ -1113,6 +1115,26 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
|
|
|
1113
1115
|
verboseLevel = _compute_verbose_level(verboseLevel)
|
|
1114
1116
|
|
|
1115
1117
|
model = _remove_model_wrapper(model)
|
|
1118
|
+
|
|
1119
|
+
def filter_state_dict(state_dict, base_model_prefix):
|
|
1120
|
+
new_state_dict= {}
|
|
1121
|
+
start = -1
|
|
1122
|
+
for k,v in state_dict.items():
|
|
1123
|
+
if k.startswith(base_model_prefix):
|
|
1124
|
+
|
|
1125
|
+
new_start = len(base_model_prefix)
|
|
1126
|
+
else:
|
|
1127
|
+
pos = k.find("." + base_model_prefix)
|
|
1128
|
+
if pos < 0:
|
|
1129
|
+
continue
|
|
1130
|
+
new_start = pos + len(base_model_prefix) +1
|
|
1131
|
+
if start != -1 and start != new_start:
|
|
1132
|
+
new_state_dict = state_dict
|
|
1133
|
+
break
|
|
1134
|
+
start = new_start
|
|
1135
|
+
new_state_dict[k[ start:]] = v
|
|
1136
|
+
return new_state_dict
|
|
1137
|
+
|
|
1116
1138
|
if not (".safetensors" in file_path or ".sft" in file_path):
|
|
1117
1139
|
if pinToMemory:
|
|
1118
1140
|
raise Exception("Pinning to memory while loading only supported for safe tensors files")
|
|
@@ -1151,6 +1173,11 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
|
|
|
1151
1173
|
quantization_map = json.load(f)
|
|
1152
1174
|
|
|
1153
1175
|
|
|
1176
|
+
# deal if we are trying to load just a sub part of a larger model
|
|
1177
|
+
if modelPrefix != None:
|
|
1178
|
+
base_model_prefix = modelPrefix + "."
|
|
1179
|
+
state_dict = filter_state_dict(state_dict,base_model_prefix)
|
|
1180
|
+
quantization_map = filter_state_dict(quantization_map,base_model_prefix)
|
|
1154
1181
|
|
|
1155
1182
|
if quantization_map is None :
|
|
1156
1183
|
if "quanto" in file_path and not do_quantize:
|
|
@@ -1160,32 +1187,12 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
|
|
|
1160
1187
|
|
|
1161
1188
|
missing_keys , unexpected_keys = model.load_state_dict(state_dict, False, assign = True )
|
|
1162
1189
|
if len(missing_keys) > 0 :
|
|
1163
|
-
# if there is a key mismatch maybe we forgot to remove some prefix
|
|
1164
|
-
if hasattr(model, "base_model_prefix"):
|
|
1165
|
-
base_model_prefix = model.base_model_prefix + "."
|
|
1166
|
-
else:
|
|
1167
|
-
for k,v in state_dict.items():
|
|
1168
|
-
if k.endswith(missing_keys[0]):
|
|
1169
|
-
base_model_prefix = k[:-len(missing_keys[0])]
|
|
1170
|
-
break
|
|
1171
|
-
|
|
1172
|
-
new_state_dict= {}
|
|
1173
|
-
start = -1
|
|
1190
|
+
# if there is a key mismatch maybe we forgot to remove some prefix
|
|
1174
1191
|
for k,v in state_dict.items():
|
|
1175
|
-
if k.
|
|
1176
|
-
|
|
1177
|
-
else:
|
|
1178
|
-
pos = k.find("." + base_model_prefix)
|
|
1179
|
-
if pos < 0:
|
|
1180
|
-
continue
|
|
1181
|
-
new_start = pos + len(base_model_prefix) +1
|
|
1182
|
-
if start != -1 and start != new_start:
|
|
1183
|
-
new_state_dict = state_dict
|
|
1192
|
+
if k.endswith(missing_keys[0]):
|
|
1193
|
+
base_model_prefix = k[:-len(missing_keys[0])]
|
|
1184
1194
|
break
|
|
1185
|
-
|
|
1186
|
-
new_state_dict[k[ start:]] = v
|
|
1187
|
-
state_dict = new_state_dict
|
|
1188
|
-
del new_state_dict
|
|
1195
|
+
state_dict = filter_state_dict(state_dict,base_model_prefix)
|
|
1189
1196
|
missing_keys , unexpected_keys = model.load_state_dict(state_dict, False, assign = True )
|
|
1190
1197
|
del state_dict
|
|
1191
1198
|
|
|
@@ -1354,6 +1361,8 @@ class offload:
|
|
|
1354
1361
|
|
|
1355
1362
|
def add_module_to_blocks(self, model_id, blocks_name, submodule, prev_block_name, submodule_name):
|
|
1356
1363
|
|
|
1364
|
+
if blocks_name!=None and ".lora_" in blocks_name:
|
|
1365
|
+
blocks_name = None
|
|
1357
1366
|
entry_name = model_id if blocks_name is None else model_id + "/" + blocks_name
|
|
1358
1367
|
if entry_name in self.blocks_of_modules:
|
|
1359
1368
|
blocks_params = self.blocks_of_modules[entry_name]
|
|
@@ -1372,7 +1381,6 @@ class offload:
|
|
|
1372
1381
|
lora_name = None
|
|
1373
1382
|
if self.lora_parents.get(submodule, None) != None:
|
|
1374
1383
|
lora_name = str(submodule_name[ submodule_name.rfind(".") + 1: ] )
|
|
1375
|
-
|
|
1376
1384
|
for k,p in submodule.named_parameters(recurse=False):
|
|
1377
1385
|
param_size = 0
|
|
1378
1386
|
ref = _get_tensor_ref(p)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: mmgp
|
|
3
|
-
Version: 3.2.
|
|
3
|
+
Version: 3.2.4
|
|
4
4
|
Summary: Memory Management for the GPU Poor
|
|
5
5
|
Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
|
|
6
6
|
License: GNU GENERAL PUBLIC LICENSE
|
|
@@ -17,7 +17,7 @@ Requires-Dist: peft
|
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
<p align="center">
|
|
20
|
-
<H2>Memory Management 3.2.
|
|
20
|
+
<H2>Memory Management 3.2.4 for the GPU Poor by DeepBeepMeep</H2>
|
|
21
21
|
</p>
|
|
22
22
|
|
|
23
23
|
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
+
mmgp/offload.py,sha256=vGxgCcWV8PQQ4JjSlYFOX57Mr9RLlvPBMOOj3f63qL4,96389
|
|
4
|
+
mmgp/safetensors2.py,sha256=DCdlRH3769CTyraAmWAB3b0XrVua7z6ygQ-OyKgJN6A,16453
|
|
5
|
+
mmgp-3.2.4.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
|
|
6
|
+
mmgp-3.2.4.dist-info/METADATA,sha256=UGZ7ADvrhU5P0hS7gFgu8SHpEnzzpEgE3Ionk-I7ckw,16151
|
|
7
|
+
mmgp-3.2.4.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
|
8
|
+
mmgp-3.2.4.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
9
|
+
mmgp-3.2.4.dist-info/RECORD,,
|
mmgp-3.2.3.dist-info/RECORD
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
-
mmgp/offload.py,sha256=EeC-eSJLq8Z8K1wq7UGCzDpaW7JAL-RSFVr8fPUmtPc,95853
|
|
4
|
-
mmgp/safetensors2.py,sha256=DCdlRH3769CTyraAmWAB3b0XrVua7z6ygQ-OyKgJN6A,16453
|
|
5
|
-
mmgp-3.2.3.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
|
|
6
|
-
mmgp-3.2.3.dist-info/METADATA,sha256=9Z2SIaf6fBdZDuIn8Pqqr93qXZS_tiRLU9KbKMDSuSM,16151
|
|
7
|
-
mmgp-3.2.3.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
|
8
|
-
mmgp-3.2.3.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
9
|
-
mmgp-3.2.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|