mmgp 3.6.4__py3-none-any.whl → 3.6.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mmgp might be problematic. Click here for more details.
- mmgp/offload.py +14 -5
- {mmgp-3.6.4.dist-info → mmgp-3.6.6.dist-info}/METADATA +1 -1
- mmgp-3.6.6.dist-info/RECORD +10 -0
- {mmgp-3.6.4.dist-info → mmgp-3.6.6.dist-info}/licenses/LICENSE.md +1 -1
- mmgp-3.6.4.dist-info/RECORD +0 -10
- {mmgp-3.6.4.dist-info → mmgp-3.6.6.dist-info}/WHEEL +0 -0
- {mmgp-3.6.4.dist-info → mmgp-3.6.6.dist-info}/top_level.txt +0 -0
mmgp/offload.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# ------------------ Memory Management 3.6.
|
|
1
|
+
# ------------------ Memory Management 3.6.6 for the GPU Poor by DeepBeepMeep (mmgp)------------------
|
|
2
2
|
#
|
|
3
3
|
# This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
|
|
4
4
|
# This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
|
|
@@ -63,7 +63,7 @@ import json
|
|
|
63
63
|
import psutil
|
|
64
64
|
import builtins
|
|
65
65
|
from accelerate import init_empty_weights
|
|
66
|
-
|
|
66
|
+
from functools import wraps
|
|
67
67
|
import functools
|
|
68
68
|
import types
|
|
69
69
|
|
|
@@ -86,6 +86,14 @@ class QEmbedding(QModuleMixin, torch.nn.Embedding):
|
|
|
86
86
|
|
|
87
87
|
|
|
88
88
|
|
|
89
|
+
def cudacontext(device):
|
|
90
|
+
def decorator(func):
|
|
91
|
+
@wraps(func)
|
|
92
|
+
def wrapper(*args, **kwargs):
|
|
93
|
+
with torch.device(device):
|
|
94
|
+
return func(*args, **kwargs)
|
|
95
|
+
return wrapper
|
|
96
|
+
return decorator
|
|
89
97
|
|
|
90
98
|
|
|
91
99
|
shared_state = {}
|
|
@@ -689,7 +697,7 @@ def _welcome():
|
|
|
689
697
|
if welcome_displayed:
|
|
690
698
|
return
|
|
691
699
|
welcome_displayed = True
|
|
692
|
-
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.6.
|
|
700
|
+
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.6.6) by DeepBeepMeep ************{ENDC}{UNBOLD}")
|
|
693
701
|
|
|
694
702
|
def change_dtype(model, new_dtype, exclude_buffers = False):
|
|
695
703
|
for submodule_name, submodule in model.named_modules():
|
|
@@ -1294,6 +1302,7 @@ def sync_models_loras(model, model2):
|
|
|
1294
1302
|
|
|
1295
1303
|
def unload_loras_from_model(model):
|
|
1296
1304
|
if model is None: return
|
|
1305
|
+
if not hasattr(model, "_loras_model_data"): return
|
|
1297
1306
|
for _, v in model._loras_model_data.items():
|
|
1298
1307
|
v.clear()
|
|
1299
1308
|
for _, v in model._loras_model_shortcuts.items():
|
|
@@ -1420,7 +1429,6 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantiza
|
|
|
1420
1429
|
model = transfomer_class.from_config(transformer_config )
|
|
1421
1430
|
|
|
1422
1431
|
|
|
1423
|
-
torch.set_default_device('cpu')
|
|
1424
1432
|
model.eval().requires_grad_(False)
|
|
1425
1433
|
|
|
1426
1434
|
model._config = transformer_config
|
|
@@ -1431,6 +1439,7 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantiza
|
|
|
1431
1439
|
|
|
1432
1440
|
|
|
1433
1441
|
|
|
1442
|
+
@cudacontext("cpu")
|
|
1434
1443
|
def load_model_data(model, file_path, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, modelPrefix = None, writable_tensors = True, preprocess_sd = None, postprocess_sd = None, modules = None, return_shared_modules = None, default_dtype = torch.bfloat16, ignore_unused_weights = False, verboseLevel = -1):
|
|
1435
1444
|
"""
|
|
1436
1445
|
Load a model, detect if it has been previously quantized using quanto and do the extra setup if necessary
|
|
@@ -1486,6 +1495,7 @@ def load_model_data(model, file_path, do_quantize = False, quantizationType = qi
|
|
|
1486
1495
|
for no, file in enumerate(file_path):
|
|
1487
1496
|
quantization_map = None
|
|
1488
1497
|
tied_weights_map = None
|
|
1498
|
+
metadata = None
|
|
1489
1499
|
if not (".safetensors" in file or ".sft" in file):
|
|
1490
1500
|
if pinToMemory:
|
|
1491
1501
|
raise Exception("Pinning to memory while loading only supported for safe tensors files")
|
|
@@ -1497,7 +1507,6 @@ def load_model_data(model, file_path, do_quantize = False, quantizationType = qi
|
|
|
1497
1507
|
basename = os.path.basename(file)
|
|
1498
1508
|
|
|
1499
1509
|
if "-of-" in basename:
|
|
1500
|
-
metadata = None
|
|
1501
1510
|
file_parts= basename.split("-")
|
|
1502
1511
|
parts_max = int(file_parts[-1][:5])
|
|
1503
1512
|
state_dict = {}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
+
mmgp/fp8_quanto_bridge.py,sha256=VtUaD6wzo7Yn9vGY0LMtbhwt6KMWRpSWLc65bU_sfZU,21155
|
|
4
|
+
mmgp/offload.py,sha256=Zd14BTWZ-Y_rcBzrKITimNQlMezIdzTkbhxbd-ggsrY,139244
|
|
5
|
+
mmgp/safetensors2.py,sha256=zYNMprt1KoxgVALbcz6DawxsQDNNRImvgO9cYRChUiY,19028
|
|
6
|
+
mmgp-3.6.6.dist-info/licenses/LICENSE.md,sha256=DD-WIS0BkPoWJ_8hQO3J8hMP9K_1-dyrYv1YCbkxcDU,94
|
|
7
|
+
mmgp-3.6.6.dist-info/METADATA,sha256=B4y6VLFaEDAi3E59WV7fNNEFwS4HP9zT0UILUq7GOn0,16309
|
|
8
|
+
mmgp-3.6.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
9
|
+
mmgp-3.6.6.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
10
|
+
mmgp-3.6.6.dist-info/RECORD,,
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
GNU GENERAL PUBLIC LICENSE
|
|
1
|
+
GNU GENERAL PUBLIC LICENSE
|
|
2
2
|
Version 3, 29 June 2007
|
mmgp-3.6.4.dist-info/RECORD
DELETED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
-
mmgp/fp8_quanto_bridge.py,sha256=VtUaD6wzo7Yn9vGY0LMtbhwt6KMWRpSWLc65bU_sfZU,21155
|
|
4
|
-
mmgp/offload.py,sha256=hRSLKfUY_7bMRS2BUv79cB4BGjub5uLhVpZuoc9AWTI,138940
|
|
5
|
-
mmgp/safetensors2.py,sha256=zYNMprt1KoxgVALbcz6DawxsQDNNRImvgO9cYRChUiY,19028
|
|
6
|
-
mmgp-3.6.4.dist-info/licenses/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
|
|
7
|
-
mmgp-3.6.4.dist-info/METADATA,sha256=e2NavzQQfgehOKy1KRyy_I2sL3tIpePcBHMzlvFDOGE,16309
|
|
8
|
-
mmgp-3.6.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
9
|
-
mmgp-3.6.4.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
10
|
-
mmgp-3.6.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|