mmgp 3.4.7__py3-none-any.whl → 3.4.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mmgp might be problematic. Click here for more details.
- mmgp/offload.py +33 -26
- {mmgp-3.4.7.dist-info → mmgp-3.4.9.dist-info}/METADATA +2 -2
- mmgp-3.4.9.dist-info/RECORD +9 -0
- {mmgp-3.4.7.dist-info → mmgp-3.4.9.dist-info}/WHEEL +1 -1
- mmgp-3.4.7.dist-info/RECORD +0 -9
- {mmgp-3.4.7.dist-info → mmgp-3.4.9.dist-info}/licenses/LICENSE.md +0 -0
- {mmgp-3.4.7.dist-info → mmgp-3.4.9.dist-info}/top_level.txt +0 -0
mmgp/offload.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# ------------------ Memory Management 3.4.
|
|
1
|
+
# ------------------ Memory Management 3.4.9 for the GPU Poor by DeepBeepMeep (mmgp)------------------
|
|
2
2
|
#
|
|
3
3
|
# This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
|
|
4
4
|
# This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
|
|
@@ -448,9 +448,9 @@ def _pin_sd_to_memory(sd, sd_name, tied_weights = None, gig_tensor_size = BIG_TE
|
|
|
448
448
|
print(f"'{','.join(names_list)}' was partially pinned to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
|
|
449
449
|
else:
|
|
450
450
|
if len(names_list) > 0:
|
|
451
|
-
print(f"'{','.join(names_list)}' was pinned entirely to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
|
|
452
|
-
else:
|
|
453
451
|
print(f"'{','.join(names_list)}' were pinned entirely to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
|
|
452
|
+
else:
|
|
453
|
+
print(f"'{','.join(names_list)}' was pinned entirely to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
|
|
454
454
|
|
|
455
455
|
|
|
456
456
|
return
|
|
@@ -658,7 +658,7 @@ def _welcome():
|
|
|
658
658
|
if welcome_displayed:
|
|
659
659
|
return
|
|
660
660
|
welcome_displayed = True
|
|
661
|
-
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.4.
|
|
661
|
+
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.4.9) by DeepBeepMeep ************{ENDC}{UNBOLD}")
|
|
662
662
|
|
|
663
663
|
def change_dtype(model, new_dtype, exclude_buffers = False):
|
|
664
664
|
for submodule_name, submodule in model.named_modules():
|
|
@@ -1226,7 +1226,7 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
|
|
|
1226
1226
|
|
|
1227
1227
|
model._loras_errors = errors
|
|
1228
1228
|
if not check_only:
|
|
1229
|
-
if pinnedLora:
|
|
1229
|
+
if pinnedLora and len(pinned_sd_list) > 0:
|
|
1230
1230
|
_pin_sd_to_memory(pinned_sd_list, pinned_names_list)
|
|
1231
1231
|
model._loras_adapters = adapters
|
|
1232
1232
|
if activate_all_loras:
|
|
@@ -1275,7 +1275,7 @@ def move_loras_to_device(model, device="cpu" ):
|
|
|
1275
1275
|
if ".lora_" in k:
|
|
1276
1276
|
m.to(device)
|
|
1277
1277
|
|
|
1278
|
-
def fast_load_transformers_model(model_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, modelClass=None, modelPrefix = None, writable_tensors = True, verboseLevel = -1, configKwargs ={}):
|
|
1278
|
+
def fast_load_transformers_model(model_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, defaultConfigPath = None, modelClass=None, modelPrefix = None, writable_tensors = True, verboseLevel = -1, configKwargs ={}):
|
|
1279
1279
|
"""
|
|
1280
1280
|
quick version of .LoadfromPretrained of the transformers library
|
|
1281
1281
|
used to build a model and load the corresponding weights (quantized or not)
|
|
@@ -1308,7 +1308,7 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizat
|
|
|
1308
1308
|
if forcedConfigPath != None:
|
|
1309
1309
|
config_fullpath = forcedConfigPath
|
|
1310
1310
|
else:
|
|
1311
|
-
config_fullpath = os.path.join(os.path.dirname(model_path[-1]), "config.json")
|
|
1311
|
+
config_fullpath = os.path.join(os.path.dirname(model_path[-1]), "config.json") if defaultConfigPath == None else defaultConfigPath
|
|
1312
1312
|
|
|
1313
1313
|
if not os.path.isfile(config_fullpath):
|
|
1314
1314
|
raise Exception("a 'config.json' that describes the model is required in the directory of the model or inside the safetensor file")
|
|
@@ -1407,14 +1407,14 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
|
|
|
1407
1407
|
if not (".safetensors" in file or ".sft" in file):
|
|
1408
1408
|
if pinToMemory:
|
|
1409
1409
|
raise Exception("Pinning to memory while loading only supported for safe tensors files")
|
|
1410
|
-
state_dict = torch.load(file, weights_only=True)
|
|
1410
|
+
state_dict = torch.load(file, weights_only=True, map_location="cpu")
|
|
1411
1411
|
if "module" in state_dict:
|
|
1412
1412
|
state_dict = state_dict["module"]
|
|
1413
1413
|
|
|
1414
1414
|
else:
|
|
1415
1415
|
basename = os.path.basename(file)
|
|
1416
1416
|
|
|
1417
|
-
if "
|
|
1417
|
+
if "-of-" in basename:
|
|
1418
1418
|
metadata = None
|
|
1419
1419
|
file_parts= basename.split("-")
|
|
1420
1420
|
parts_max = int(file_parts[-1][:5])
|
|
@@ -1500,11 +1500,11 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
|
|
|
1500
1500
|
|
|
1501
1501
|
if do_quantize:
|
|
1502
1502
|
if quantization_map != None and len(quantization_map) > 0 :
|
|
1503
|
-
if _quantize(model, quantizationType, verboseLevel=verboseLevel, model_id=file_path):
|
|
1504
|
-
quantization_map = model._quanto_map
|
|
1505
|
-
else:
|
|
1506
1503
|
if verboseLevel >=1:
|
|
1507
1504
|
print("Model already quantized")
|
|
1505
|
+
else:
|
|
1506
|
+
if _quantize(model, quantizationType, verboseLevel=verboseLevel, model_id=file_path):
|
|
1507
|
+
quantization_map = model._quanto_map
|
|
1508
1508
|
|
|
1509
1509
|
if pinToMemory:
|
|
1510
1510
|
_pin_to_memory(model, file_path, partialPinning = partialPinning, verboseLevel = verboseLevel)
|
|
@@ -1621,9 +1621,12 @@ class HfHook:
|
|
|
1621
1621
|
def __init__(self):
|
|
1622
1622
|
self.execution_device = "cuda"
|
|
1623
1623
|
|
|
1624
|
-
def
|
|
1625
|
-
|
|
1624
|
+
def init_hook(self, module):
|
|
1625
|
+
return module
|
|
1626
1626
|
|
|
1627
|
+
def detach_hook(self, module):
|
|
1628
|
+
return module
|
|
1629
|
+
|
|
1627
1630
|
last_offload_obj = None
|
|
1628
1631
|
class offload:
|
|
1629
1632
|
def __init__(self):
|
|
@@ -2028,7 +2031,9 @@ class offload:
|
|
|
2028
2031
|
continue
|
|
2029
2032
|
lora_A_weight, lora_B_weight, diff_b, alpha = data
|
|
2030
2033
|
scaling = self._get_lora_scaling(loras_scaling, model, active_adapter) * alpha
|
|
2031
|
-
|
|
2034
|
+
if lora_A_weight != None:
|
|
2035
|
+
weight.addmm_(lora_B_weight, lora_A_weight, alpha= scaling )
|
|
2036
|
+
|
|
2032
2037
|
if diff_b != None:
|
|
2033
2038
|
if bias == None:
|
|
2034
2039
|
bias = diff_b.clone()
|
|
@@ -2059,17 +2064,20 @@ class offload:
|
|
|
2059
2064
|
lora_A, lora_B, diff_b, alpha = data
|
|
2060
2065
|
# dropout = self.lora_dropout[active_adapter]
|
|
2061
2066
|
scaling = self._get_lora_scaling(loras_scaling, model, active_adapter) * alpha
|
|
2062
|
-
|
|
2063
|
-
|
|
2064
|
-
if training:
|
|
2065
|
-
pass
|
|
2066
|
-
# y = lora_A(dropout(x))
|
|
2067
|
+
if lora_A == None:
|
|
2068
|
+
result.add_(diff_b, alpha=scaling)
|
|
2067
2069
|
else:
|
|
2068
|
-
|
|
2069
|
-
|
|
2070
|
-
|
|
2071
|
-
|
|
2072
|
-
|
|
2070
|
+
x = x.to(lora_A.dtype)
|
|
2071
|
+
|
|
2072
|
+
if training:
|
|
2073
|
+
pass
|
|
2074
|
+
# y = lora_A(dropout(x))
|
|
2075
|
+
else:
|
|
2076
|
+
y = torch.nn.functional.linear(x, lora_A, bias=None)
|
|
2077
|
+
y = torch.nn.functional.linear(y, lora_B, bias=diff_b)
|
|
2078
|
+
y*= scaling
|
|
2079
|
+
result+= y
|
|
2080
|
+
del y
|
|
2073
2081
|
|
|
2074
2082
|
return result
|
|
2075
2083
|
|
|
@@ -2405,7 +2413,6 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
|
|
|
2405
2413
|
model_dtype = getattr(current_model, "_model_dtype", None)
|
|
2406
2414
|
# if model_dtype == None:
|
|
2407
2415
|
# model_dtype = getattr(current_model, "dtype", None)
|
|
2408
|
-
|
|
2409
2416
|
for _ , m in current_model.named_modules():
|
|
2410
2417
|
ignore_dtype = hasattr(m, "_lock_dtype")
|
|
2411
2418
|
for n, p in m.named_parameters(recurse = False):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mmgp
|
|
3
|
-
Version: 3.4.
|
|
3
|
+
Version: 3.4.9
|
|
4
4
|
Summary: Memory Management for the GPU Poor
|
|
5
5
|
Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -15,7 +15,7 @@ Dynamic: license-file
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
<p align="center">
|
|
18
|
-
<H2>Memory Management 3.4.
|
|
18
|
+
<H2>Memory Management 3.4.9 for the GPU Poor by DeepBeepMeep</H2>
|
|
19
19
|
</p>
|
|
20
20
|
|
|
21
21
|
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
+
mmgp/offload.py,sha256=oIicu2S5E_lBlA3wqi5RW9UIQe9vCKCka_wkTXJwlUg,121549
|
|
4
|
+
mmgp/safetensors2.py,sha256=4nKV13qCMabnNEB1TA_ueFbfGYYmiQ9racR_C6SsGug,18693
|
|
5
|
+
mmgp-3.4.9.dist-info/licenses/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
|
|
6
|
+
mmgp-3.4.9.dist-info/METADATA,sha256=XwnMyOWZbXeKOyaTjOOMf_6j7jcOFs46P0b4tKv7BlU,16309
|
|
7
|
+
mmgp-3.4.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
8
|
+
mmgp-3.4.9.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
9
|
+
mmgp-3.4.9.dist-info/RECORD,,
|
mmgp-3.4.7.dist-info/RECORD
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
-
mmgp/offload.py,sha256=wf5u4qaGKYm6OTyGg4TXCa3aA0h3nuhnml7qOzn6JOY,121124
|
|
4
|
-
mmgp/safetensors2.py,sha256=4nKV13qCMabnNEB1TA_ueFbfGYYmiQ9racR_C6SsGug,18693
|
|
5
|
-
mmgp-3.4.7.dist-info/licenses/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
|
|
6
|
-
mmgp-3.4.7.dist-info/METADATA,sha256=DztABKlGwAcKYogKuTzsOWs0he3elTFZXAkWpH4yIEU,16309
|
|
7
|
-
mmgp-3.4.7.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
|
|
8
|
-
mmgp-3.4.7.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
9
|
-
mmgp-3.4.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|