mmgp 3.4.4__py3-none-any.whl → 3.4.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mmgp might be problematic. Click here for more details.
- mmgp/offload.py +22 -24
- mmgp/safetensors2.py +11 -13
- {mmgp-3.4.4.dist-info → mmgp-3.4.6.dist-info}/METADATA +3 -3
- mmgp-3.4.6.dist-info/RECORD +9 -0
- {mmgp-3.4.4.dist-info → mmgp-3.4.6.dist-info}/WHEEL +1 -1
- mmgp-3.4.4.dist-info/RECORD +0 -9
- {mmgp-3.4.4.dist-info → mmgp-3.4.6.dist-info}/licenses/LICENSE.md +0 -0
- {mmgp-3.4.4.dist-info → mmgp-3.4.6.dist-info}/top_level.txt +0 -0
mmgp/offload.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# ------------------ Memory Management 3.4.
|
|
1
|
+
# ------------------ Memory Management 3.4.5 for the GPU Poor by DeepBeepMeep (mmgp)------------------
|
|
2
2
|
#
|
|
3
3
|
# This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
|
|
4
4
|
# This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
|
|
@@ -619,7 +619,7 @@ def _welcome():
|
|
|
619
619
|
if welcome_displayed:
|
|
620
620
|
return
|
|
621
621
|
welcome_displayed = True
|
|
622
|
-
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.4.
|
|
622
|
+
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.4.5) by DeepBeepMeep ************{ENDC}{UNBOLD}")
|
|
623
623
|
|
|
624
624
|
def change_dtype(model, new_dtype, exclude_buffers = False):
|
|
625
625
|
for submodule_name, submodule in model.named_modules():
|
|
@@ -1259,7 +1259,6 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizat
|
|
|
1259
1259
|
with init_empty_weights():
|
|
1260
1260
|
model = transfomer_class(config_obj)
|
|
1261
1261
|
|
|
1262
|
-
model = model.base_model
|
|
1263
1262
|
|
|
1264
1263
|
elif "_class_name" in transformer_config:
|
|
1265
1264
|
class_name = transformer_config["_class_name"]
|
|
@@ -1401,7 +1400,7 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
|
|
|
1401
1400
|
base_model_prefix = k[:-len(missing_keys[0])]
|
|
1402
1401
|
break
|
|
1403
1402
|
if base_model_prefix == None:
|
|
1404
|
-
raise Exception("Missing keys: {missing_keys}")
|
|
1403
|
+
raise Exception(f"Missing keys: {missing_keys}")
|
|
1405
1404
|
state_dict = filter_state_dict(state_dict, base_model_prefix)
|
|
1406
1405
|
missing_keys , unexpected_keys = model.load_state_dict(state_dict, False, assign = True )
|
|
1407
1406
|
del state_dict
|
|
@@ -2030,7 +2029,7 @@ class offload:
|
|
|
2030
2029
|
else:
|
|
2031
2030
|
dtype = model._dtype
|
|
2032
2031
|
|
|
2033
|
-
def check_change_module(module, *args, **kwargs):
|
|
2032
|
+
def check_change_module(module, *args, **kwargs):
|
|
2034
2033
|
self.ensure_model_loaded(model_id)
|
|
2035
2034
|
# transfer leftovers inputs that were incorrectly created in the RAM (mostly due to some .device tests that returned incorrectly "cpu")
|
|
2036
2035
|
if dtype != None:
|
|
@@ -2064,10 +2063,7 @@ class offload:
|
|
|
2064
2063
|
# current_budget = 5000 * ONE_MB
|
|
2065
2064
|
base_size = self.blocks_of_modules_sizes[model_id]
|
|
2066
2065
|
current_budget -= base_size
|
|
2067
|
-
|
|
2068
|
-
if self.verboseLevel >=1:
|
|
2069
|
-
print(f"Async loading plan for model '{model_id}' : minimum budget management, beside the async shuttle only base model ({(base_size)/ONE_MB:0.2f} MB) will be preloaded")
|
|
2070
|
-
return
|
|
2066
|
+
current_budget = max(0, current_budget)
|
|
2071
2067
|
|
|
2072
2068
|
towers = []
|
|
2073
2069
|
total_size = 0
|
|
@@ -2086,25 +2082,21 @@ class offload:
|
|
|
2086
2082
|
towers.append( (floors, max_floor_size, tower_size) )
|
|
2087
2083
|
total_size += tower_size
|
|
2088
2084
|
current_budget -= 2 * max_floor_size
|
|
2089
|
-
|
|
2090
|
-
if self.verboseLevel >=1:
|
|
2091
|
-
print(f"Async loading plan for model '{model_id}' : minimum budget management, beside the async shuttle only the base model ({(base_size)/ONE_MB:0.2f} MB) will be preloaded")
|
|
2092
|
-
return
|
|
2093
|
-
|
|
2085
|
+
current_budget = max(0, current_budget)
|
|
2094
2086
|
|
|
2095
2087
|
for floors, max_floor_size, tower_size in towers:
|
|
2096
2088
|
tower_budget = tower_size / total_size * current_budget
|
|
2097
2089
|
preload_blocks_count = int( tower_budget / max_floor_size)
|
|
2098
2090
|
preload_total += preload_blocks_count * max_floor_size
|
|
2099
2091
|
max_blocks_fetch = max(max_floor_size, max_blocks_fetch)
|
|
2100
|
-
if preload_blocks_count <= 0:
|
|
2101
|
-
if self.verboseLevel >=1:
|
|
2102
|
-
print(f"Async loading plan for model '{model_id}' : minimum budget management, beside the async shuttle only the base model ({(base_size)/ONE_MB:0.2f} MB) will be preloaded")
|
|
2103
|
-
return
|
|
2104
2092
|
|
|
2105
2093
|
nb_blocks= len(floors)
|
|
2106
|
-
|
|
2107
|
-
|
|
2094
|
+
if preload_blocks_count == 0:
|
|
2095
|
+
space_between = 0
|
|
2096
|
+
cursor = len(floors)
|
|
2097
|
+
else:
|
|
2098
|
+
space_between = (nb_blocks - preload_blocks_count) / preload_blocks_count
|
|
2099
|
+
cursor = space_between
|
|
2108
2100
|
first_non_preloaded = None
|
|
2109
2101
|
prev_non_preloaded = None
|
|
2110
2102
|
for block in floors:
|
|
@@ -2131,7 +2123,10 @@ class offload:
|
|
|
2131
2123
|
self.preloaded_blocks_per_model[model_id] = preloaded_blocks
|
|
2132
2124
|
|
|
2133
2125
|
if self.verboseLevel >=1:
|
|
2134
|
-
|
|
2126
|
+
if preload_total == 0:
|
|
2127
|
+
print(f"Async loading plan for model '{model_id}' : base size of {(preload_total+base_size)/ONE_MB:0.2f} MB will be preloaded with a {max_blocks_fetch/ONE_MB:0.2f} MB async" + (" circular" if len(towers) == 1 else "") + " shuttle")
|
|
2128
|
+
else:
|
|
2129
|
+
print(f"Async loading plan for model '{model_id}' : {(preload_total+base_size)/ONE_MB:0.2f} MB will be preloaded (base size of {base_size/ONE_MB:0.2f} MB + {preload_total/total_size*100:0.1f}% of recurrent layers data) with a {max_blocks_fetch/ONE_MB:0.2f} MB async" + (" circular" if len(towers) == 1 else "") + " shuttle")
|
|
2135
2130
|
|
|
2136
2131
|
def release(self):
|
|
2137
2132
|
global last_offload_obj, total_pinned_bytes
|
|
@@ -2273,7 +2268,9 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
|
|
|
2273
2268
|
modelPinned = (pinAllModels or model_id in modelsToPin) and not hasattr(current_model,"_already_pinned")
|
|
2274
2269
|
|
|
2275
2270
|
current_model_size = 0
|
|
2276
|
-
model_dtype = None
|
|
2271
|
+
model_dtype = getattr(current_model, "_model_dtype", None)
|
|
2272
|
+
# if model_dtype == None:
|
|
2273
|
+
# model_dtype = getattr(current_model, "dtype", None)
|
|
2277
2274
|
|
|
2278
2275
|
for _ , m in current_model.named_modules():
|
|
2279
2276
|
ignore_dtype = hasattr(m, "_lock_dtype")
|
|
@@ -2296,10 +2293,11 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
|
|
|
2296
2293
|
else:
|
|
2297
2294
|
if not ignore_dtype:
|
|
2298
2295
|
dtype = p.data.dtype
|
|
2299
|
-
if convertWeightsFloatTo != None and
|
|
2296
|
+
if convertWeightsFloatTo != None and dtype == torch.float32 :
|
|
2300
2297
|
# convert any left overs float32 weight to bfloat16 / float16 to divide by 2 the model memory footprint
|
|
2301
2298
|
dtype = convertWeightsFloatTo if model_dtype == None else model_dtype
|
|
2302
|
-
|
|
2299
|
+
if dtype != torch.float32:
|
|
2300
|
+
p.data = p.data.to(dtype)
|
|
2303
2301
|
if model_dtype== None:
|
|
2304
2302
|
model_dtype = dtype
|
|
2305
2303
|
else:
|
mmgp/safetensors2.py
CHANGED
|
@@ -125,20 +125,18 @@ class cached_metadata:
|
|
|
125
125
|
_cached_entry = None # ideally we should create a dict of the last n entries but one entry covers most cases
|
|
126
126
|
|
|
127
127
|
def _parse_metadata(metadata):
|
|
128
|
-
if metadata == None:
|
|
129
|
-
return None
|
|
130
|
-
|
|
131
128
|
new_metadata= {}
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
129
|
+
if metadata != None:
|
|
130
|
+
for k,v in metadata.items():
|
|
131
|
+
if k.endswith("_base64"):
|
|
132
|
+
v_decoded = json.loads(base64.b64decode(v.encode('utf8')).decode('utf8'))
|
|
133
|
+
p = k.rfind("_")
|
|
134
|
+
new_k = k[:p]
|
|
135
|
+
new_metadata[new_k]= v_decoded
|
|
136
|
+
else:
|
|
137
|
+
new_metadata[k] = v
|
|
138
|
+
if "format" not in new_metadata:
|
|
139
|
+
new_metadata["format"] = "pt"
|
|
142
140
|
return new_metadata
|
|
143
141
|
|
|
144
142
|
def _read_safetensors_header(path, file):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mmgp
|
|
3
|
-
Version: 3.4.
|
|
3
|
+
Version: 3.4.6
|
|
4
4
|
Summary: Memory Management for the GPU Poor
|
|
5
5
|
Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -15,7 +15,7 @@ Dynamic: license-file
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
<p align="center">
|
|
18
|
-
<H2>Memory Management 3.4.
|
|
18
|
+
<H2>Memory Management 3.4.6 for the GPU Poor by DeepBeepMeep</H2>
|
|
19
19
|
</p>
|
|
20
20
|
|
|
21
21
|
|
|
@@ -43,7 +43,7 @@ Each profile may use a combination of the following:
|
|
|
43
43
|
## Sample applications that use mmgp
|
|
44
44
|
It is recommended to have a look at these applications to see how mmgp was implemented in each of them:
|
|
45
45
|
- Wan2GP: https://github.com/deepbeepmeep/Wan2GP :\
|
|
46
|
-
An excellent text to video and image to video generator
|
|
46
|
+
An excellent text to video and image to video generator that supports the best Open Source Video Architectures: Wan, Hunyuan and LTX Video
|
|
47
47
|
|
|
48
48
|
- Hunyuan3D-2GP: https://github.com/deepbeepmeep/Hunyuan3D-2GP :\
|
|
49
49
|
A great image to 3D and text to 3D tool by the Tencent team. Thanks to mmgp it can run with less than 6 GB of VRAM
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
+
mmgp/offload.py,sha256=2oWFiDcwIx3lGOb_6_aac1zzIIF-nhP8bwOA-G9HxsU,114594
|
|
4
|
+
mmgp/safetensors2.py,sha256=4nKV13qCMabnNEB1TA_ueFbfGYYmiQ9racR_C6SsGug,18693
|
|
5
|
+
mmgp-3.4.6.dist-info/licenses/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
|
|
6
|
+
mmgp-3.4.6.dist-info/METADATA,sha256=kv9OfYHAAHKyiv9p9vrf4guU3tNd0I7vUgQ6xm7dkk8,16309
|
|
7
|
+
mmgp-3.4.6.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
|
|
8
|
+
mmgp-3.4.6.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
9
|
+
mmgp-3.4.6.dist-info/RECORD,,
|
mmgp-3.4.4.dist-info/RECORD
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
-
mmgp/offload.py,sha256=fR6ACUxT4rVIbfxM8p-bkKCbNYwZi6061yyfjzCEnlM,114769
|
|
4
|
-
mmgp/safetensors2.py,sha256=zwThjxFd_wqWz0udkoD-DKmSX5x4ojmcu2wyBeuCTdU,18619
|
|
5
|
-
mmgp-3.4.4.dist-info/licenses/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
|
|
6
|
-
mmgp-3.4.4.dist-info/METADATA,sha256=LrzjB5uuaIJ1UoniRNBDD0pex0UR8EvWPUdeR9nXNXg,16237
|
|
7
|
-
mmgp-3.4.4.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
|
|
8
|
-
mmgp-3.4.4.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
9
|
-
mmgp-3.4.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|