mmgp 3.5.6__py3-none-any.whl → 3.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mmgp/offload.py +28 -22
- {mmgp-3.5.6.dist-info → mmgp-3.5.7.dist-info}/METADATA +2 -2
- mmgp-3.5.7.dist-info/RECORD +9 -0
- mmgp-3.5.6.dist-info/RECORD +0 -9
- {mmgp-3.5.6.dist-info → mmgp-3.5.7.dist-info}/WHEEL +0 -0
- {mmgp-3.5.6.dist-info → mmgp-3.5.7.dist-info}/licenses/LICENSE.md +0 -0
- {mmgp-3.5.6.dist-info → mmgp-3.5.7.dist-info}/top_level.txt +0 -0
mmgp/offload.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# ------------------ Memory Management 3.5.
|
|
1
|
+
# ------------------ Memory Management 3.5.7 for the GPU Poor by DeepBeepMeep (mmgp)------------------
|
|
2
2
|
#
|
|
3
3
|
# This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
|
|
4
4
|
# This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
|
|
@@ -122,8 +122,6 @@ class clock:
|
|
|
122
122
|
def format_time_gap(self):
|
|
123
123
|
return f"{self.stop_time - self.start_time:.2f}s"
|
|
124
124
|
|
|
125
|
-
|
|
126
|
-
|
|
127
125
|
# useful functions to move a group of tensors (to design custom offload patches)
|
|
128
126
|
def move_tensors(obj, device):
|
|
129
127
|
if torch.is_tensor(obj):
|
|
@@ -668,7 +666,7 @@ def _welcome():
|
|
|
668
666
|
if welcome_displayed:
|
|
669
667
|
return
|
|
670
668
|
welcome_displayed = True
|
|
671
|
-
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.5.
|
|
669
|
+
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.5.7) by DeepBeepMeep ************{ENDC}{UNBOLD}")
|
|
672
670
|
|
|
673
671
|
def change_dtype(model, new_dtype, exclude_buffers = False):
|
|
674
672
|
for submodule_name, submodule in model.named_modules():
|
|
@@ -1295,7 +1293,7 @@ def move_loras_to_device(model, device="cpu" ):
|
|
|
1295
1293
|
if ".lora_" in k:
|
|
1296
1294
|
m.to(device)
|
|
1297
1295
|
|
|
1298
|
-
def fast_load_transformers_model(model_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, defaultConfigPath = None, modelClass=None, modelPrefix = None, writable_tensors = True, verboseLevel = -1, modules = None, return_shared_modules = None, configKwargs ={}):
|
|
1296
|
+
def fast_load_transformers_model(model_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, defaultConfigPath = None, modelClass=None, modelPrefix = None, writable_tensors = True, verboseLevel = -1, preprocess_sd = None, modules = None, return_shared_modules = None, configKwargs ={}):
|
|
1299
1297
|
"""
|
|
1300
1298
|
quick version of .LoadfromPretrained of the transformers library
|
|
1301
1299
|
used to build a model and load the corresponding weights (quantized or not)
|
|
@@ -1383,13 +1381,13 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantiza
|
|
|
1383
1381
|
|
|
1384
1382
|
model._config = transformer_config
|
|
1385
1383
|
|
|
1386
|
-
load_model_data(model,model_path, do_quantize = do_quantize, quantizationType = quantizationType, pinToMemory= pinToMemory, partialPinning= partialPinning, modelPrefix = modelPrefix, writable_tensors =writable_tensors, modules = modules, return_shared_modules = return_shared_modules, verboseLevel=verboseLevel )
|
|
1384
|
+
load_model_data(model,model_path, do_quantize = do_quantize, quantizationType = quantizationType, pinToMemory= pinToMemory, partialPinning= partialPinning, modelPrefix = modelPrefix, writable_tensors =writable_tensors, preprocess_sd = preprocess_sd , modules = modules, return_shared_modules = return_shared_modules, verboseLevel=verboseLevel )
|
|
1387
1385
|
|
|
1388
1386
|
return model
|
|
1389
1387
|
|
|
1390
1388
|
|
|
1391
1389
|
|
|
1392
|
-
def load_model_data(model, file_path, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, modelPrefix = None, writable_tensors = True, modules = None, return_shared_modules = None, verboseLevel = -1):
|
|
1390
|
+
def load_model_data(model, file_path, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, modelPrefix = None, writable_tensors = True, preprocess_sd = None, modules = None, return_shared_modules = None, verboseLevel = -1):
|
|
1393
1391
|
"""
|
|
1394
1392
|
Load a model, detect if it has been previously quantized using quanto and do the extra setup if necessary
|
|
1395
1393
|
"""
|
|
@@ -1506,6 +1504,9 @@ def load_model_data(model, file_path, do_quantize = False, quantizationType = qi
|
|
|
1506
1504
|
full_state_dict, full_quantization_map, full_tied_weights_map = None, None, None
|
|
1507
1505
|
|
|
1508
1506
|
# deal if we are trying to load just a sub part of a larger model
|
|
1507
|
+
if preprocess_sd != None:
|
|
1508
|
+
state_dict, quantization_map = preprocess_sd(state_dict, quantization_map)
|
|
1509
|
+
|
|
1509
1510
|
if modelPrefix != None:
|
|
1510
1511
|
base_model_prefix = modelPrefix + "."
|
|
1511
1512
|
state_dict = filter_state_dict(state_dict,base_model_prefix)
|
|
@@ -2276,7 +2277,7 @@ class offload:
|
|
|
2276
2277
|
setattr(target_module, "forward", functools.update_wrapper(functools.partial(check_empty_cuda_cache, target_module), previous_method) )
|
|
2277
2278
|
|
|
2278
2279
|
|
|
2279
|
-
def hook_change_module(self, target_module, model, model_id, module_id, previous_method):
|
|
2280
|
+
def hook_change_module(self, target_module, model, model_id, module_id, previous_method, previous_method_name ):
|
|
2280
2281
|
if hasattr(target_module, "_lock_dtype"):
|
|
2281
2282
|
dtype = target_module._lock_dtype
|
|
2282
2283
|
else:
|
|
@@ -2289,11 +2290,12 @@ class offload:
|
|
|
2289
2290
|
args, kwargs = self.move_args_to_gpu(dtype, *args, **kwargs)
|
|
2290
2291
|
return previous_method(*args, **kwargs)
|
|
2291
2292
|
|
|
2292
|
-
if hasattr(target_module, "
|
|
2293
|
+
if hasattr(target_module, "_mm_" + previous_method_name):
|
|
2293
2294
|
return
|
|
2294
|
-
setattr(target_module, "
|
|
2295
|
+
setattr(target_module, "_mm_Id", model_id)
|
|
2296
|
+
setattr(target_module, "_mm_" + previous_method_name, previous_method)
|
|
2295
2297
|
|
|
2296
|
-
setattr(target_module,
|
|
2298
|
+
setattr(target_module, previous_method_name, functools.update_wrapper(functools.partial(check_change_module, target_module), previous_method) )
|
|
2297
2299
|
|
|
2298
2300
|
if not self.verboseLevel >=1:
|
|
2299
2301
|
return
|
|
@@ -2661,23 +2663,27 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
|
|
|
2661
2663
|
cur_blocks_prefix, prev_blocks_name, cur_blocks_seq, is_mod_seq = pre, None, num, False
|
|
2662
2664
|
cur_blocks_name = submodule_name
|
|
2663
2665
|
# print(f"new block: {model_id}/{cur_blocks_name} - {submodule_name}")
|
|
2664
|
-
|
|
2665
|
-
|
|
2666
|
-
if
|
|
2667
|
-
|
|
2668
|
-
|
|
2666
|
+
top_submodule = len(submodule_name.split("."))==1
|
|
2667
|
+
offload_hooks = submodule._offload_hooks if hasattr(submodule, "_offload_hooks") else []
|
|
2668
|
+
if len(offload_hooks) > 0:
|
|
2669
|
+
pass
|
|
2670
|
+
assert top_submodule or len(offload_hooks) == 0, "custom offload hooks can only be set at the of the module"
|
|
2671
|
+
submodule_method_names = ["forward"] + offload_hooks
|
|
2672
|
+
for submodule_method_name in submodule_method_names:
|
|
2673
|
+
if not hasattr(submodule, submodule_method_name ): continue
|
|
2674
|
+
if submodule_method_name == "forward" and any_lora and hasattr(submodule,"weight"):
|
|
2669
2675
|
submodule_method = self.hook_lora(submodule, current_model, model_id, loras_model_data, loras_model_shortcuts, submodule_name)
|
|
2670
2676
|
else:
|
|
2671
|
-
submodule_method = getattr(submodule,
|
|
2672
|
-
if callable(submodule_method):
|
|
2673
|
-
if
|
|
2674
|
-
self.hook_change_module(submodule, current_model, model_id, submodule_name, submodule_method)
|
|
2677
|
+
submodule_method = getattr(submodule, submodule_method_name)
|
|
2678
|
+
if callable(submodule_method):
|
|
2679
|
+
if top_submodule and cur_blocks_name is None:
|
|
2680
|
+
self.hook_change_module(submodule, current_model, model_id, submodule_name, submodule_method, submodule_method_name)
|
|
2675
2681
|
elif compilationInThisOne and submodule in towers_modules:
|
|
2676
2682
|
self.hook_preload_blocks_for_compilation(submodule, model_id, cur_blocks_name, context = submodule_name )
|
|
2677
2683
|
else:
|
|
2678
2684
|
self.hook_check_empty_cache_needed(submodule, current_model, model_id, cur_blocks_name, submodule_method, context = submodule_name )
|
|
2679
|
-
|
|
2680
|
-
|
|
2685
|
+
|
|
2686
|
+
self.add_module_to_blocks(model_id, cur_blocks_name, submodule, prev_blocks_name, submodule_name)
|
|
2681
2687
|
|
|
2682
2688
|
|
|
2683
2689
|
self.tune_preloading(model_id, current_budget, towers_names)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mmgp
|
|
3
|
-
Version: 3.5.
|
|
3
|
+
Version: 3.5.7
|
|
4
4
|
Summary: Memory Management for the GPU Poor
|
|
5
5
|
Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -15,7 +15,7 @@ Dynamic: license-file
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
<p align="center">
|
|
18
|
-
<H2>Memory Management 3.5.
|
|
18
|
+
<H2>Memory Management 3.5.7 for the GPU Poor by DeepBeepMeep</H2>
|
|
19
19
|
</p>
|
|
20
20
|
|
|
21
21
|
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
+
mmgp/offload.py,sha256=SKt-EunQrH6omBFI7aNLe82GIoXBKW9y1i0HMPFrKLY,127089
|
|
4
|
+
mmgp/safetensors2.py,sha256=4nKV13qCMabnNEB1TA_ueFbfGYYmiQ9racR_C6SsGug,18693
|
|
5
|
+
mmgp-3.5.7.dist-info/licenses/LICENSE.md,sha256=DD-WIS0BkPoWJ_8hQO3J8hMP9K_1-dyrYv1YCbkxcDU,94
|
|
6
|
+
mmgp-3.5.7.dist-info/METADATA,sha256=s420bK-WQuSZM2RpVwYjzXY-QmtIHkRbIiL9hAyV7sA,16309
|
|
7
|
+
mmgp-3.5.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
8
|
+
mmgp-3.5.7.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
9
|
+
mmgp-3.5.7.dist-info/RECORD,,
|
mmgp-3.5.6.dist-info/RECORD
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
-
mmgp/offload.py,sha256=uoif7gOSNmWg5eqvMkmuVkTErNL6q_QJ0Lmm0QP7FLo,126305
|
|
4
|
-
mmgp/safetensors2.py,sha256=4nKV13qCMabnNEB1TA_ueFbfGYYmiQ9racR_C6SsGug,18693
|
|
5
|
-
mmgp-3.5.6.dist-info/licenses/LICENSE.md,sha256=DD-WIS0BkPoWJ_8hQO3J8hMP9K_1-dyrYv1YCbkxcDU,94
|
|
6
|
-
mmgp-3.5.6.dist-info/METADATA,sha256=hgR8mrkLImQWNkSU3ayt78df5whCozfVqzIUvV9jo1I,16309
|
|
7
|
-
mmgp-3.5.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
8
|
-
mmgp-3.5.6.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
9
|
-
mmgp-3.5.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|