mmgp 3.5.9__py3-none-any.whl → 3.5.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mmgp might be problematic. Click here for more details.
- mmgp/offload.py +40 -11
- mmgp/safetensors2.py +13 -3
- {mmgp-3.5.9.dist-info → mmgp-3.5.11.dist-info}/METADATA +2 -2
- mmgp-3.5.11.dist-info/RECORD +9 -0
- {mmgp-3.5.9.dist-info → mmgp-3.5.11.dist-info}/licenses/LICENSE.md +1 -1
- mmgp-3.5.9.dist-info/RECORD +0 -9
- {mmgp-3.5.9.dist-info → mmgp-3.5.11.dist-info}/WHEEL +0 -0
- {mmgp-3.5.9.dist-info → mmgp-3.5.11.dist-info}/top_level.txt +0 -0
mmgp/offload.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# ------------------ Memory Management 3.5.
|
|
1
|
+
# ------------------ Memory Management 3.5.11 for the GPU Poor by DeepBeepMeep (mmgp)------------------
|
|
2
2
|
#
|
|
3
3
|
# This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
|
|
4
4
|
# This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
|
|
@@ -66,7 +66,6 @@ from accelerate import init_empty_weights
|
|
|
66
66
|
|
|
67
67
|
import functools
|
|
68
68
|
import types
|
|
69
|
-
from functools import lru_cache
|
|
70
69
|
import torch
|
|
71
70
|
|
|
72
71
|
|
|
@@ -90,6 +89,23 @@ class QEmbedding(QModuleMixin, torch.nn.Embedding):
|
|
|
90
89
|
|
|
91
90
|
shared_state = {}
|
|
92
91
|
|
|
92
|
+
def get_cache(cache_name):
|
|
93
|
+
all_cache = shared_state.get("_cache", None)
|
|
94
|
+
if all_cache is None:
|
|
95
|
+
all_cache = {}
|
|
96
|
+
shared_state["_cache"]= all_cache
|
|
97
|
+
cache = shared_state.get(cache_name, None)
|
|
98
|
+
if cache is None:
|
|
99
|
+
cache = {}
|
|
100
|
+
all_cache[cache_name] = cache
|
|
101
|
+
return cache
|
|
102
|
+
|
|
103
|
+
def clear_caches():
|
|
104
|
+
all_cache = shared_state.get("_cache", None)
|
|
105
|
+
if all_cache is not None:
|
|
106
|
+
all_cache.clear()
|
|
107
|
+
|
|
108
|
+
|
|
93
109
|
mmm = safetensors2.mmm
|
|
94
110
|
|
|
95
111
|
default_verboseLevel = 1
|
|
@@ -623,6 +639,7 @@ def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = Tru
|
|
|
623
639
|
total += size
|
|
624
640
|
|
|
625
641
|
current_big_tensor = big_tensors[big_tensor_no]
|
|
642
|
+
|
|
626
643
|
if is_buffer :
|
|
627
644
|
_force_load_buffer(p) # otherwise potential memory leak
|
|
628
645
|
if isinstance(p, QTensor):
|
|
@@ -671,7 +688,7 @@ def _welcome():
|
|
|
671
688
|
if welcome_displayed:
|
|
672
689
|
return
|
|
673
690
|
welcome_displayed = True
|
|
674
|
-
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.5.
|
|
691
|
+
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.5.11) by DeepBeepMeep ************{ENDC}{UNBOLD}")
|
|
675
692
|
|
|
676
693
|
def change_dtype(model, new_dtype, exclude_buffers = False):
|
|
677
694
|
for submodule_name, submodule in model.named_modules():
|
|
@@ -1032,7 +1049,7 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
|
|
|
1032
1049
|
|
|
1033
1050
|
if split_linear_modules_map != None:
|
|
1034
1051
|
new_state_dict = dict()
|
|
1035
|
-
suffixes = [(".alpha", -2, False), (".lora_B.weight", -3, True), (".lora_A.weight", -3, False)]
|
|
1052
|
+
suffixes = [(".alpha", -2, False), (".lora_B.weight", -3, True), (".lora_A.weight", -3, False), (".lora_up.weight", -3, True), (".lora_down.weight", -3, False)]
|
|
1036
1053
|
for module_name, module_data in state_dict.items():
|
|
1037
1054
|
name_parts = module_name.split(".")
|
|
1038
1055
|
for suffix, pos, any_split in suffixes:
|
|
@@ -1081,10 +1098,7 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
|
|
|
1081
1098
|
invalid_keys = []
|
|
1082
1099
|
unexpected_keys = []
|
|
1083
1100
|
for k, v in state_dict.items():
|
|
1084
|
-
lora_A = None
|
|
1085
|
-
lora_B = None
|
|
1086
|
-
diff_b = None
|
|
1087
|
-
diff = None
|
|
1101
|
+
lora_A = lora_B = diff_b = diff = lora_key = None
|
|
1088
1102
|
if k.endswith(".diff"):
|
|
1089
1103
|
diff = v
|
|
1090
1104
|
module_name = k[ : -5]
|
|
@@ -1179,7 +1193,7 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
|
|
|
1179
1193
|
loras_adapter_data[1] = lora_B.to(module.weight.dtype)
|
|
1180
1194
|
else:
|
|
1181
1195
|
loras_adapter_data[2] = diff_b.to(module.weight.dtype)
|
|
1182
|
-
if rank != None and "lora" in lora_key:
|
|
1196
|
+
if rank != None and lora_key is not None and "lora" in lora_key:
|
|
1183
1197
|
alpha_key = k[:-len(lora_key)] + "alpha"
|
|
1184
1198
|
alpha = lora_alphas.get(alpha_key, None)
|
|
1185
1199
|
if alpha is not None: loras_adapter_data[3] = alpha / rank
|
|
@@ -1309,7 +1323,7 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantiza
|
|
|
1309
1323
|
model_path = [model_path]
|
|
1310
1324
|
|
|
1311
1325
|
|
|
1312
|
-
if not builtins.all(file_name.endswith(".sft") or file_name.endswith(".safetensors") or file_name.endswith(".pt") for file_name in model_path):
|
|
1326
|
+
if not builtins.all(file_name.endswith(".sft") or file_name.endswith(".safetensors") or file_name.endswith(".pt") or file_name.endswith(".ckpt") for file_name in model_path):
|
|
1313
1327
|
raise Exception("full model path to file expected")
|
|
1314
1328
|
|
|
1315
1329
|
model_path = [ _get_model(file) for file in model_path]
|
|
@@ -1317,7 +1331,7 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantiza
|
|
|
1317
1331
|
raise Exception("Unable to find file")
|
|
1318
1332
|
|
|
1319
1333
|
verboseLevel = _compute_verbose_level(verboseLevel)
|
|
1320
|
-
if model_path[-1].endswith(".pt"):
|
|
1334
|
+
if model_path[-1].endswith(".pt") or model_path[-1].endswith(".ckpt"):
|
|
1321
1335
|
metadata = None
|
|
1322
1336
|
else:
|
|
1323
1337
|
with safetensors2.safe_open(model_path[-1], writable_tensors =writable_tensors) as f:
|
|
@@ -2684,6 +2698,21 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
|
|
|
2684
2698
|
print(f"Model '{model_id}' already pinned to reserved memory")
|
|
2685
2699
|
else:
|
|
2686
2700
|
_pin_to_memory(current_model, model_id, partialPinning= partialPinning, pinnedPEFTLora = pinnedPEFTLora, perc_reserved_mem_max = perc_reserved_mem_max, verboseLevel=verboseLevel)
|
|
2701
|
+
# empty_tensor = torch.empty((1,))
|
|
2702
|
+
# for sub_module_name, sub_module in current_model.named_modules():
|
|
2703
|
+
# for k, p in sub_module.named_parameters(recurse=False):
|
|
2704
|
+
# if p is not None:
|
|
2705
|
+
# if isinstance(p, QTensor):
|
|
2706
|
+
# p._data.data = empty_tensor
|
|
2707
|
+
# p._scale.data = empty_tensor
|
|
2708
|
+
# else:
|
|
2709
|
+
# p.data = empty_tensor
|
|
2710
|
+
# del k
|
|
2711
|
+
# for k, v in sub_module.named_buffers(recurse=False):
|
|
2712
|
+
# del k
|
|
2713
|
+
# sub_module = None
|
|
2714
|
+
# v = None
|
|
2715
|
+
# gc.collect()
|
|
2687
2716
|
current_budget = model_budgets[model_id]
|
|
2688
2717
|
cur_blocks_prefix, prev_blocks_name, cur_blocks_name,cur_blocks_seq, is_mod_seq = None, None, None, -1, False
|
|
2689
2718
|
self.loaded_blocks[model_id] = None
|
mmgp/safetensors2.py
CHANGED
|
@@ -46,7 +46,16 @@ class MmapTracker:
|
|
|
46
46
|
file_path = os.path.join(*s)
|
|
47
47
|
self.file_path = file_path # os.path.abspath(file_path)
|
|
48
48
|
self.count = 0
|
|
49
|
-
|
|
49
|
+
key = file_path
|
|
50
|
+
i = 1
|
|
51
|
+
while True:
|
|
52
|
+
if key not in mmm:
|
|
53
|
+
mmm[key] = self
|
|
54
|
+
break
|
|
55
|
+
i +=1
|
|
56
|
+
key = key + "#" + str(i)
|
|
57
|
+
self.mmm_key = key
|
|
58
|
+
# print(f"MMAP Add: {file_path}: {mmm.keys()}")
|
|
50
59
|
|
|
51
60
|
def register(self, mmap_obj, map_id, start, size):
|
|
52
61
|
|
|
@@ -61,7 +70,8 @@ class MmapTracker:
|
|
|
61
70
|
|
|
62
71
|
print(f"MMap Manager of file '{self.file_path}' : MMap no {map_id} has been released" + text)
|
|
63
72
|
if self.count == self._already_released:
|
|
64
|
-
|
|
73
|
+
# print(f"MMAP Del: {self.file_path}: {mmm.keys()}")
|
|
74
|
+
del mmm[self.mmm_key ]
|
|
65
75
|
|
|
66
76
|
self._maps.pop(map_id, None)
|
|
67
77
|
|
|
@@ -240,7 +250,7 @@ def torch_write_file(sd, file_path, quantization_map = None, config = None, extr
|
|
|
240
250
|
t = t.view(torch.uint16)
|
|
241
251
|
elif dtype == torch.float8_e5m2 or dtype == torch.float8_e4m3fn:
|
|
242
252
|
t = t.view(torch.uint8)
|
|
243
|
-
buffer = t.numpy().tobytes()
|
|
253
|
+
buffer = t.cpu().numpy().tobytes()
|
|
244
254
|
bytes_written = writer.write(buffer)
|
|
245
255
|
assert bytes_written == size
|
|
246
256
|
i+=1
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mmgp
|
|
3
|
-
Version: 3.5.
|
|
3
|
+
Version: 3.5.11
|
|
4
4
|
Summary: Memory Management for the GPU Poor
|
|
5
5
|
Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -15,7 +15,7 @@ Dynamic: license-file
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
<p align="center">
|
|
18
|
-
<H2>Memory Management 3.5.
|
|
18
|
+
<H2>Memory Management 3.5.11 for the GPU Poor by DeepBeepMeep</H2>
|
|
19
19
|
</p>
|
|
20
20
|
|
|
21
21
|
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
+
mmgp/offload.py,sha256=KO5wEuGNKxJPaL_ZHuGZDL8l0ZZIY_zf3yI4vBYzoFQ,131664
|
|
4
|
+
mmgp/safetensors2.py,sha256=zYNMprt1KoxgVALbcz6DawxsQDNNRImvgO9cYRChUiY,19028
|
|
5
|
+
mmgp-3.5.11.dist-info/licenses/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
|
|
6
|
+
mmgp-3.5.11.dist-info/METADATA,sha256=-071YZvgNg093aC0OMNZT1-o3ZXu9RqTquoEzBYsPBE,16311
|
|
7
|
+
mmgp-3.5.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
8
|
+
mmgp-3.5.11.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
9
|
+
mmgp-3.5.11.dist-info/RECORD,,
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
GNU GENERAL PUBLIC LICENSE
|
|
1
|
+
GNU GENERAL PUBLIC LICENSE
|
|
2
2
|
Version 3, 29 June 2007
|
mmgp-3.5.9.dist-info/RECORD
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
-
mmgp/offload.py,sha256=AViVBdUYDN42SnICeeTFa3K3JQ7a8rXB-eC2qPIY2yM,130347
|
|
4
|
-
mmgp/safetensors2.py,sha256=4nKV13qCMabnNEB1TA_ueFbfGYYmiQ9racR_C6SsGug,18693
|
|
5
|
-
mmgp-3.5.9.dist-info/licenses/LICENSE.md,sha256=DD-WIS0BkPoWJ_8hQO3J8hMP9K_1-dyrYv1YCbkxcDU,94
|
|
6
|
-
mmgp-3.5.9.dist-info/METADATA,sha256=PXpq_dDRmAQED1dTW8NKUUB_FcYb54VRqlpjqOY771Y,16309
|
|
7
|
-
mmgp-3.5.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
8
|
-
mmgp-3.5.9.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
9
|
-
mmgp-3.5.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|