mmgp 3.3.3__tar.gz → 3.3.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mmgp might be problematic. Click here for more details.
- {mmgp-3.3.3/src/mmgp.egg-info → mmgp-3.3.4}/PKG-INFO +2 -2
- {mmgp-3.3.3 → mmgp-3.3.4}/README.md +1 -1
- {mmgp-3.3.3 → mmgp-3.3.4}/pyproject.toml +1 -1
- {mmgp-3.3.3 → mmgp-3.3.4}/src/mmgp/offload.py +31 -27
- {mmgp-3.3.3 → mmgp-3.3.4/src/mmgp.egg-info}/PKG-INFO +2 -2
- {mmgp-3.3.3 → mmgp-3.3.4}/LICENSE.md +0 -0
- {mmgp-3.3.3 → mmgp-3.3.4}/setup.cfg +0 -0
- {mmgp-3.3.3 → mmgp-3.3.4}/src/__init__.py +0 -0
- {mmgp-3.3.3 → mmgp-3.3.4}/src/mmgp/__init__.py +0 -0
- {mmgp-3.3.3 → mmgp-3.3.4}/src/mmgp/safetensors2.py +0 -0
- {mmgp-3.3.3 → mmgp-3.3.4}/src/mmgp.egg-info/SOURCES.txt +0 -0
- {mmgp-3.3.3 → mmgp-3.3.4}/src/mmgp.egg-info/dependency_links.txt +0 -0
- {mmgp-3.3.3 → mmgp-3.3.4}/src/mmgp.egg-info/requires.txt +0 -0
- {mmgp-3.3.3 → mmgp-3.3.4}/src/mmgp.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mmgp
|
|
3
|
-
Version: 3.3.
|
|
3
|
+
Version: 3.3.4
|
|
4
4
|
Summary: Memory Management for the GPU Poor
|
|
5
5
|
Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
|
|
6
6
|
License: GNU GENERAL PUBLIC LICENSE
|
|
@@ -17,7 +17,7 @@ Dynamic: license-file
|
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
<p align="center">
|
|
20
|
-
<H2>Memory Management 3.3.
|
|
20
|
+
<H2>Memory Management 3.3.4 for the GPU Poor by DeepBeepMeep</H2>
|
|
21
21
|
</p>
|
|
22
22
|
|
|
23
23
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# ------------------ Memory Management 3.3.
|
|
1
|
+
# ------------------ Memory Management 3.3.4 for the GPU Poor by DeepBeepMeep (mmgp)------------------
|
|
2
2
|
#
|
|
3
3
|
# This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
|
|
4
4
|
# This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
|
|
@@ -591,7 +591,7 @@ def _welcome():
|
|
|
591
591
|
if welcome_displayed:
|
|
592
592
|
return
|
|
593
593
|
welcome_displayed = True
|
|
594
|
-
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.3.
|
|
594
|
+
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.3.4) by DeepBeepMeep ************{ENDC}{UNBOLD}")
|
|
595
595
|
|
|
596
596
|
def _extract_num_from_str(num_in_str):
|
|
597
597
|
size = len(num_in_str)
|
|
@@ -877,17 +877,15 @@ def split_linear_modules(model, map ):
|
|
|
877
877
|
|
|
878
878
|
def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_loras = True, check_only = False, ignore_model_variations = False, pinnedLora = False, split_linear_modules_map = None, preprocess_sd = None, verboseLevel = -1,):
|
|
879
879
|
verboseLevel = _compute_verbose_level(verboseLevel)
|
|
880
|
-
modules_dict = {k: v for k,v in model.named_modules()}
|
|
881
880
|
|
|
881
|
+
loras_model_data = getattr(model, "_loras_model_data", None)
|
|
882
|
+
if loras_model_data == None:
|
|
883
|
+
raise Exception(f"No Loras has been declared for this model while creating the corresponding offload object")
|
|
884
|
+
|
|
882
885
|
if not check_only:
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
model._loras_active_adapters = loras_active_adapters
|
|
887
|
-
loras_scaling = dict()
|
|
888
|
-
model._loras_scaling = loras_scaling
|
|
889
|
-
loras_tied_weights = dict()
|
|
890
|
-
model._loras_tied_weights = loras_tied_weights
|
|
886
|
+
unload_loras_from_model(model)
|
|
887
|
+
|
|
888
|
+
modules_dict = {k: v for k,v in model.named_modules()}
|
|
891
889
|
|
|
892
890
|
CrLf = '\r\n'
|
|
893
891
|
error_msg = ""
|
|
@@ -927,9 +925,6 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
|
|
|
927
925
|
skip = False
|
|
928
926
|
state_dict = safetensors2.torch_load_file(path, writable_tensors= False)
|
|
929
927
|
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
928
|
if preprocess_sd != None:
|
|
934
929
|
state_dict = preprocess_sd(state_dict)
|
|
935
930
|
|
|
@@ -1045,9 +1040,10 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
|
|
|
1045
1040
|
break
|
|
1046
1041
|
if not check_only:
|
|
1047
1042
|
loras_module_data = loras_model_data.get(module, None)
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1043
|
+
assert loras_module_data != None
|
|
1044
|
+
# if loras_module_data == None:
|
|
1045
|
+
# loras_module_data = dict()
|
|
1046
|
+
# loras_model_data[module] = loras_module_data
|
|
1051
1047
|
loras_adapter_data = loras_module_data.get(adapter_name, None)
|
|
1052
1048
|
lora_A = None if lora_A == None else lora_A.to(torch.bfloat16)
|
|
1053
1049
|
lora_B = None if lora_B == None else lora_B.to(torch.bfloat16)
|
|
@@ -1108,12 +1104,17 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
|
|
|
1108
1104
|
return new_lora_path
|
|
1109
1105
|
|
|
1110
1106
|
def unload_loras_from_model(model):
|
|
1111
|
-
model._loras_model_data
|
|
1107
|
+
for _, v in model._loras_model_data.items():
|
|
1108
|
+
v.clear()
|
|
1109
|
+
|
|
1110
|
+
model._loras_active_adapters = set()
|
|
1111
|
+
model._loras_scaling = dict()
|
|
1112
|
+
model._loras_tied_weights = dict()
|
|
1112
1113
|
model._loras_errors = None
|
|
1113
1114
|
model._loras_adapters = None
|
|
1114
|
-
model._loras_active_adapters = None
|
|
1115
1115
|
model._loras_scaling = None
|
|
1116
1116
|
|
|
1117
|
+
|
|
1117
1118
|
def set_step_no_for_lora(model, step_no):
|
|
1118
1119
|
model._lora_step_no = step_no
|
|
1119
1120
|
|
|
@@ -1857,14 +1858,14 @@ class offload:
|
|
|
1857
1858
|
return result
|
|
1858
1859
|
|
|
1859
1860
|
|
|
1860
|
-
def hook_lora_linear(self, submodule, current_model, model_id, submodule_name):
|
|
1861
|
+
def hook_lora_linear(self, submodule, current_model, model_id, loras_model_data, submodule_name):
|
|
1861
1862
|
old_forward = submodule.forward
|
|
1863
|
+
|
|
1864
|
+
loras_data = {}
|
|
1865
|
+
loras_model_data[submodule] = loras_data
|
|
1866
|
+
|
|
1862
1867
|
def lora_linear_forward(module, *args, **kwargs):
|
|
1863
|
-
|
|
1864
|
-
loras_data = None
|
|
1865
|
-
if loras_model_data != None:
|
|
1866
|
-
loras_data = loras_model_data.get(submodule, None)
|
|
1867
|
-
if loras_data == None:
|
|
1868
|
+
if len(loras_data) == 0:
|
|
1868
1869
|
return old_forward(*args, **kwargs)
|
|
1869
1870
|
else:
|
|
1870
1871
|
return self._lora_linear_forward(current_model, submodule, loras_data, *args, **kwargs)
|
|
@@ -2271,7 +2272,10 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
|
|
|
2271
2272
|
current_budget = model_budgets[model_id]
|
|
2272
2273
|
cur_blocks_prefix, prev_blocks_name, cur_blocks_name,cur_blocks_seq, is_mod_seq = None, None, None, -1, False
|
|
2273
2274
|
self.loaded_blocks[model_id] = None
|
|
2274
|
-
any_lora = loras !=None and model_id in loras
|
|
2275
|
+
any_lora = loras !=None and model_id in loras
|
|
2276
|
+
if any_lora:
|
|
2277
|
+
loras_model_data = {}
|
|
2278
|
+
current_model._loras_model_data = loras_model_data
|
|
2275
2279
|
for submodule_name, submodule in current_model.named_modules():
|
|
2276
2280
|
# create a fake 'accelerate' parameter so that the _execution_device property returns always "cuda"
|
|
2277
2281
|
# (it is queried in many pipelines even if offloading is not properly implemented)
|
|
@@ -2304,7 +2308,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
|
|
|
2304
2308
|
|
|
2305
2309
|
if hasattr(submodule, "forward"):
|
|
2306
2310
|
if any_lora and isinstance(submodule, torch.nn.Linear):
|
|
2307
|
-
submodule_method = self.hook_lora_linear(submodule, current_model, model_id, submodule_name)
|
|
2311
|
+
submodule_method = self.hook_lora_linear(submodule, current_model, model_id, loras_model_data, submodule_name)
|
|
2308
2312
|
else:
|
|
2309
2313
|
submodule_method = getattr(submodule, "forward")
|
|
2310
2314
|
if callable(submodule_method):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mmgp
|
|
3
|
-
Version: 3.3.
|
|
3
|
+
Version: 3.3.4
|
|
4
4
|
Summary: Memory Management for the GPU Poor
|
|
5
5
|
Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
|
|
6
6
|
License: GNU GENERAL PUBLIC LICENSE
|
|
@@ -17,7 +17,7 @@ Dynamic: license-file
|
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
<p align="center">
|
|
20
|
-
<H2>Memory Management 3.3.
|
|
20
|
+
<H2>Memory Management 3.3.4 for the GPU Poor by DeepBeepMeep</H2>
|
|
21
21
|
</p>
|
|
22
22
|
|
|
23
23
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|