mmgp 3.4.4__tar.gz → 3.4.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mmgp might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mmgp
3
- Version: 3.4.4
3
+ Version: 3.4.6
4
4
  Summary: Memory Management for the GPU Poor
5
5
  Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
6
6
  Requires-Python: >=3.10
@@ -15,7 +15,7 @@ Dynamic: license-file
15
15
 
16
16
 
17
17
  <p align="center">
18
- <H2>Memory Management 3.4.4 for the GPU Poor by DeepBeepMeep</H2>
18
+ <H2>Memory Management 3.4.6 for the GPU Poor by DeepBeepMeep</H2>
19
19
  </p>
20
20
 
21
21
 
@@ -43,7 +43,7 @@ Each profile may use a combination of the following:
43
43
  ## Sample applications that use mmgp
44
44
  It is recommended to have a look at these applications to see how mmgp was implemented in each of them:
45
45
  - Wan2GP: https://github.com/deepbeepmeep/Wan2GP :\
46
- An excellent text to video and image to video generator by Alibaba
46
+ An excellent text to video and image to video generator that supports the best Open Source Video Architectures: Wan, Hunyuan and LTX Video
47
47
 
48
48
  - Hunyuan3D-2GP: https://github.com/deepbeepmeep/Hunyuan3D-2GP :\
49
49
  A great image to 3D and text to 3D tool by the Tencent team. Thanks to mmgp it can run with less than 6 GB of VRAM
@@ -1,6 +1,6 @@
1
1
 
2
2
  <p align="center">
3
- <H2>Memory Management 3.4.4 for the GPU Poor by DeepBeepMeep</H2>
3
+ <H2>Memory Management 3.4.6 for the GPU Poor by DeepBeepMeep</H2>
4
4
  </p>
5
5
 
6
6
 
@@ -28,7 +28,7 @@ Each profile may use a combination of the following:
28
28
  ## Sample applications that use mmgp
29
29
  It is recommended to have a look at these applications to see how mmgp was implemented in each of them:
30
30
  - Wan2GP: https://github.com/deepbeepmeep/Wan2GP :\
31
- An excellent text to video and image to video generator by Alibaba
31
+ An excellent text to video and image to video generator that supports the best Open Source Video Architectures: Wan, Hunyuan and LTX Video
32
32
 
33
33
  - Hunyuan3D-2GP: https://github.com/deepbeepmeep/Hunyuan3D-2GP :\
34
34
  A great image to 3D and text to 3D tool by the Tencent team. Thanks to mmgp it can run with less than 6 GB of VRAM
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mmgp"
3
- version = "3.4.4"
3
+ version = "3.4.6"
4
4
  authors = [
5
5
  { name = "deepbeepmeep", email = "deepbeepmeep@yahoo.com" },
6
6
  ]
@@ -1,4 +1,4 @@
1
- # ------------------ Memory Management 3.4.3 for the GPU Poor by DeepBeepMeep (mmgp)------------------
1
+ # ------------------ Memory Management 3.4.5 for the GPU Poor by DeepBeepMeep (mmgp)------------------
2
2
  #
3
3
  # This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
4
4
  # This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
@@ -619,7 +619,7 @@ def _welcome():
619
619
  if welcome_displayed:
620
620
  return
621
621
  welcome_displayed = True
622
- print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.4.3) by DeepBeepMeep ************{ENDC}{UNBOLD}")
622
+ print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.4.5) by DeepBeepMeep ************{ENDC}{UNBOLD}")
623
623
 
624
624
  def change_dtype(model, new_dtype, exclude_buffers = False):
625
625
  for submodule_name, submodule in model.named_modules():
@@ -1259,7 +1259,6 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizat
1259
1259
  with init_empty_weights():
1260
1260
  model = transfomer_class(config_obj)
1261
1261
 
1262
- model = model.base_model
1263
1262
 
1264
1263
  elif "_class_name" in transformer_config:
1265
1264
  class_name = transformer_config["_class_name"]
@@ -1401,7 +1400,7 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
1401
1400
  base_model_prefix = k[:-len(missing_keys[0])]
1402
1401
  break
1403
1402
  if base_model_prefix == None:
1404
- raise Exception("Missing keys: {missing_keys}")
1403
+ raise Exception(f"Missing keys: {missing_keys}")
1405
1404
  state_dict = filter_state_dict(state_dict, base_model_prefix)
1406
1405
  missing_keys , unexpected_keys = model.load_state_dict(state_dict, False, assign = True )
1407
1406
  del state_dict
@@ -2030,7 +2029,7 @@ class offload:
2030
2029
  else:
2031
2030
  dtype = model._dtype
2032
2031
 
2033
- def check_change_module(module, *args, **kwargs):
2032
+ def check_change_module(module, *args, **kwargs):
2034
2033
  self.ensure_model_loaded(model_id)
2035
2034
  # transfer leftovers inputs that were incorrectly created in the RAM (mostly due to some .device tests that returned incorrectly "cpu")
2036
2035
  if dtype != None:
@@ -2064,10 +2063,7 @@ class offload:
2064
2063
  # current_budget = 5000 * ONE_MB
2065
2064
  base_size = self.blocks_of_modules_sizes[model_id]
2066
2065
  current_budget -= base_size
2067
- if current_budget <= 0:
2068
- if self.verboseLevel >=1:
2069
- print(f"Async loading plan for model '{model_id}' : minimum budget management, beside the async shuttle only base model ({(base_size)/ONE_MB:0.2f} MB) will be preloaded")
2070
- return
2066
+ current_budget = max(0, current_budget)
2071
2067
 
2072
2068
  towers = []
2073
2069
  total_size = 0
@@ -2086,25 +2082,21 @@ class offload:
2086
2082
  towers.append( (floors, max_floor_size, tower_size) )
2087
2083
  total_size += tower_size
2088
2084
  current_budget -= 2 * max_floor_size
2089
- if current_budget <= 0:
2090
- if self.verboseLevel >=1:
2091
- print(f"Async loading plan for model '{model_id}' : minimum budget management, beside the async shuttle only the base model ({(base_size)/ONE_MB:0.2f} MB) will be preloaded")
2092
- return
2093
-
2085
+ current_budget = max(0, current_budget)
2094
2086
 
2095
2087
  for floors, max_floor_size, tower_size in towers:
2096
2088
  tower_budget = tower_size / total_size * current_budget
2097
2089
  preload_blocks_count = int( tower_budget / max_floor_size)
2098
2090
  preload_total += preload_blocks_count * max_floor_size
2099
2091
  max_blocks_fetch = max(max_floor_size, max_blocks_fetch)
2100
- if preload_blocks_count <= 0:
2101
- if self.verboseLevel >=1:
2102
- print(f"Async loading plan for model '{model_id}' : minimum budget management, beside the async shuttle only the base model ({(base_size)/ONE_MB:0.2f} MB) will be preloaded")
2103
- return
2104
2092
 
2105
2093
  nb_blocks= len(floors)
2106
- space_between = (nb_blocks - preload_blocks_count) / preload_blocks_count
2107
- cursor = space_between
2094
+ if preload_blocks_count == 0:
2095
+ space_between = 0
2096
+ cursor = len(floors)
2097
+ else:
2098
+ space_between = (nb_blocks - preload_blocks_count) / preload_blocks_count
2099
+ cursor = space_between
2108
2100
  first_non_preloaded = None
2109
2101
  prev_non_preloaded = None
2110
2102
  for block in floors:
@@ -2131,7 +2123,10 @@ class offload:
2131
2123
  self.preloaded_blocks_per_model[model_id] = preloaded_blocks
2132
2124
 
2133
2125
  if self.verboseLevel >=1:
2134
- print(f"Async loading plan for model '{model_id}' : {(preload_total+base_size)/ONE_MB:0.2f} MB will be preloaded (base size of {base_size/ONE_MB:0.2f} MB + {preload_total/total_size*100:0.1f}% of recurrent layers data) with a {max_blocks_fetch/ONE_MB:0.2f} MB async" + (" circular" if len(towers) == 1 else "") + " shuttle")
2126
+ if preload_total == 0:
2127
+ print(f"Async loading plan for model '{model_id}' : base size of {(preload_total+base_size)/ONE_MB:0.2f} MB will be preloaded with a {max_blocks_fetch/ONE_MB:0.2f} MB async" + (" circular" if len(towers) == 1 else "") + " shuttle")
2128
+ else:
2129
+ print(f"Async loading plan for model '{model_id}' : {(preload_total+base_size)/ONE_MB:0.2f} MB will be preloaded (base size of {base_size/ONE_MB:0.2f} MB + {preload_total/total_size*100:0.1f}% of recurrent layers data) with a {max_blocks_fetch/ONE_MB:0.2f} MB async" + (" circular" if len(towers) == 1 else "") + " shuttle")
2135
2130
 
2136
2131
  def release(self):
2137
2132
  global last_offload_obj, total_pinned_bytes
@@ -2273,7 +2268,9 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
2273
2268
  modelPinned = (pinAllModels or model_id in modelsToPin) and not hasattr(current_model,"_already_pinned")
2274
2269
 
2275
2270
  current_model_size = 0
2276
- model_dtype = None
2271
+ model_dtype = getattr(current_model, "_model_dtype", None)
2272
+ # if model_dtype == None:
2273
+ # model_dtype = getattr(current_model, "dtype", None)
2277
2274
 
2278
2275
  for _ , m in current_model.named_modules():
2279
2276
  ignore_dtype = hasattr(m, "_lock_dtype")
@@ -2296,10 +2293,11 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
2296
2293
  else:
2297
2294
  if not ignore_dtype:
2298
2295
  dtype = p.data.dtype
2299
- if convertWeightsFloatTo != None and dtype == torch.float32 :
2296
+ if convertWeightsFloatTo != None and dtype == torch.float32 :
2300
2297
  # convert any left overs float32 weight to bfloat16 / float16 to divide by 2 the model memory footprint
2301
2298
  dtype = convertWeightsFloatTo if model_dtype == None else model_dtype
2302
- p.data = p.data.to(dtype)
2299
+ if dtype != torch.float32:
2300
+ p.data = p.data.to(dtype)
2303
2301
  if model_dtype== None:
2304
2302
  model_dtype = dtype
2305
2303
  else:
@@ -125,20 +125,18 @@ class cached_metadata:
125
125
  _cached_entry = None # ideally we should create a dict of the last n entries but one entry covers most cases
126
126
 
127
127
  def _parse_metadata(metadata):
128
- if metadata == None:
129
- return None
130
-
131
128
  new_metadata= {}
132
-
133
- for k,v in metadata.items():
134
- if k.endswith("_base64"):
135
- v_decoded = json.loads(base64.b64decode(v.encode('utf8')).decode('utf8'))
136
- p = k.rfind("_")
137
- new_k = k[:p]
138
- new_metadata[new_k]= v_decoded
139
- else:
140
- new_metadata[k] = v
141
-
129
+ if metadata != None:
130
+ for k,v in metadata.items():
131
+ if k.endswith("_base64"):
132
+ v_decoded = json.loads(base64.b64decode(v.encode('utf8')).decode('utf8'))
133
+ p = k.rfind("_")
134
+ new_k = k[:p]
135
+ new_metadata[new_k]= v_decoded
136
+ else:
137
+ new_metadata[k] = v
138
+ if "format" not in new_metadata:
139
+ new_metadata["format"] = "pt"
142
140
  return new_metadata
143
141
 
144
142
  def _read_safetensors_header(path, file):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mmgp
3
- Version: 3.4.4
3
+ Version: 3.4.6
4
4
  Summary: Memory Management for the GPU Poor
5
5
  Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
6
6
  Requires-Python: >=3.10
@@ -15,7 +15,7 @@ Dynamic: license-file
15
15
 
16
16
 
17
17
  <p align="center">
18
- <H2>Memory Management 3.4.4 for the GPU Poor by DeepBeepMeep</H2>
18
+ <H2>Memory Management 3.4.6 for the GPU Poor by DeepBeepMeep</H2>
19
19
  </p>
20
20
 
21
21
 
@@ -43,7 +43,7 @@ Each profile may use a combination of the following:
43
43
  ## Sample applications that use mmgp
44
44
  It is recommended to have a look at these applications to see how mmgp was implemented in each of them:
45
45
  - Wan2GP: https://github.com/deepbeepmeep/Wan2GP :\
46
- An excellent text to video and image to video generator by Alibaba
46
+ An excellent text to video and image to video generator that supports the best Open Source Video Architectures: Wan, Hunyuan and LTX Video
47
47
 
48
48
  - Hunyuan3D-2GP: https://github.com/deepbeepmeep/Hunyuan3D-2GP :\
49
49
  A great image to 3D and text to 3D tool by the Tencent team. Thanks to mmgp it can run with less than 6 GB of VRAM
File without changes
File without changes
File without changes
File without changes
File without changes