PyPI - mmgp - Versions diffs - 3.5.9__py3-none-any.whl → 3.5.11__py3-none-any.whl - Mend

mmgp 3.5.9py3-none-any.whl → 3.5.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mmgp might be problematic. Click here for more details.

Files changed (8) hide show

mmgp/offload.py +40 -11
mmgp/safetensors2.py +13 -3
{mmgp-3.5.9.dist-info → mmgp-3.5.11.dist-info}/METADATA +2 -2
mmgp-3.5.11.dist-info/RECORD +9 -0
{mmgp-3.5.9.dist-info → mmgp-3.5.11.dist-info}/licenses/LICENSE.md +1 -1
mmgp-3.5.9.dist-info/RECORD +0 -9
{mmgp-3.5.9.dist-info → mmgp-3.5.11.dist-info}/WHEEL +0 -0
{mmgp-3.5.9.dist-info → mmgp-3.5.11.dist-info}/top_level.txt +0 -0

mmgp/offload.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# ------------------ Memory Management 3.5.9 for the GPU Poor by DeepBeepMeep (mmgp)------------------
+# ------------------ Memory Management 3.5.11 for the GPU Poor by DeepBeepMeep (mmgp)------------------
 #
 # This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ...  can run smoothly on a 24 GB GPU limited card.
 # This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
@@ -66,7 +66,6 @@ from accelerate import init_empty_weights
 import functools
 import types
-from functools import lru_cache
 import torch
@@ -90,6 +89,23 @@ class QEmbedding(QModuleMixin, torch.nn.Embedding):
 shared_state = {}
+def get_cache(cache_name):
+    all_cache = shared_state.get("_cache",  None)
+    if all_cache is None:
+        all_cache = {}
+        shared_state["_cache"]=  all_cache
+    cache = shared_state.get(cache_name, None)
+    if cache is None:
+        cache = {}
+        all_cache[cache_name] = cache
+    return cache
+def clear_caches():
+    all_cache = shared_state.get("_cache",  None)
+    if all_cache is not None:
+        all_cache.clear()
 mmm = safetensors2.mmm
 default_verboseLevel = 1
@@ -623,6 +639,7 @@ def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = Tru
                 total += size
             current_big_tensor = big_tensors[big_tensor_no]
             if is_buffer :
                 _force_load_buffer(p) # otherwise potential memory leak
             if isinstance(p, QTensor):
@@ -671,7 +688,7 @@ def _welcome():
     if welcome_displayed:
          return
     welcome_displayed = True
-    print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.5.9) by DeepBeepMeep ************{ENDC}{UNBOLD}")
+    print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.5.11) by DeepBeepMeep ************{ENDC}{UNBOLD}")
 def change_dtype(model, new_dtype, exclude_buffers = False):
     for submodule_name, submodule in model.named_modules():
@@ -1032,7 +1049,7 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
         if split_linear_modules_map != None:
             new_state_dict = dict()
-            suffixes = [(".alpha", -2, False), (".lora_B.weight", -3, True), (".lora_A.weight", -3, False)]
+            suffixes = [(".alpha", -2, False), (".lora_B.weight", -3, True), (".lora_A.weight", -3, False), (".lora_up.weight", -3, True), (".lora_down.weight", -3, False)]
             for module_name, module_data in state_dict.items():
                 name_parts = module_name.split(".")
                 for suffix, pos, any_split in suffixes:
@@ -1081,10 +1098,7 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
             invalid_keys = []
             unexpected_keys = []
             for k, v in state_dict.items():
-                lora_A = None
-                lora_B = None
-                diff_b = None
-                diff = None
+                lora_A = lora_B = diff_b = diff = lora_key = None
                 if k.endswith(".diff"):
                     diff = v
                     module_name = k[ : -5]
@@ -1179,7 +1193,7 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
                         loras_adapter_data[1] = lora_B.to(module.weight.dtype)
                     else:
                         loras_adapter_data[2] = diff_b.to(module.weight.dtype)
-                    if rank != None and "lora" in lora_key:
+                    if rank != None and lora_key is not None and "lora" in lora_key:
                         alpha_key = k[:-len(lora_key)] + "alpha"
                         alpha = lora_alphas.get(alpha_key, None)
                         if alpha is not None: loras_adapter_data[3] = alpha / rank
@@ -1309,7 +1323,7 @@ def fast_load_transformers_model(model_path: str,  do_quantize = False, quantiza
         model_path = [model_path]
-    if not builtins.all(file_name.endswith(".sft") or file_name.endswith(".safetensors") or file_name.endswith(".pt") for file_name in model_path):
+    if not builtins.all(file_name.endswith(".sft") or file_name.endswith(".safetensors") or file_name.endswith(".pt") or file_name.endswith(".ckpt") for file_name in model_path):
         raise Exception("full model path to file expected")
     model_path = [ _get_model(file) for file in model_path]
@@ -1317,7 +1331,7 @@ def fast_load_transformers_model(model_path: str,  do_quantize = False, quantiza
         raise Exception("Unable to find file")
     verboseLevel = _compute_verbose_level(verboseLevel)
-    if model_path[-1].endswith(".pt"):
+    if model_path[-1].endswith(".pt") or model_path[-1].endswith(".ckpt"):
         metadata = None
     else:
         with safetensors2.safe_open(model_path[-1], writable_tensors =writable_tensors) as f:
@@ -2684,6 +2698,21 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
                     print(f"Model '{model_id}' already pinned to reserved memory")
             else:
                 _pin_to_memory(current_model, model_id, partialPinning= partialPinning, pinnedPEFTLora = pinnedPEFTLora, perc_reserved_mem_max = perc_reserved_mem_max, verboseLevel=verboseLevel)
+                # empty_tensor = torch.empty((1,))
+                # for sub_module_name, sub_module  in current_model.named_modules():
+                #     for k, p in  sub_module.named_parameters(recurse=False):
+                #         if p is not None:
+                #             if isinstance(p, QTensor):
+                #                 p._data.data = empty_tensor
+                #                 p._scale.data = empty_tensor
+                #             else:
+                #                 p.data = empty_tensor
+                #             del k
+                #     for k, v in  sub_module.named_buffers(recurse=False):
+                #         del k
+                # sub_module = None
+                # v = None
+                # gc.collect()
         current_budget = model_budgets[model_id]
         cur_blocks_prefix, prev_blocks_name, cur_blocks_name,cur_blocks_seq, is_mod_seq = None, None, None, -1, False
         self.loaded_blocks[model_id] = None

mmgp/safetensors2.py CHANGED Viewed

@@ -46,7 +46,16 @@ class MmapTracker:
         file_path = os.path.join(*s)
         self.file_path = file_path # os.path.abspath(file_path)
         self.count = 0
-        mmm[file_path] = self
+        key = file_path
+        i = 1
+        while True:
+            if key not in mmm:
+                mmm[key] = self
+                break
+            i +=1
+            key = key + "#" + str(i)
+        self.mmm_key = key
+        # print(f"MMAP Add: {file_path}: {mmm.keys()}")
     def register(self, mmap_obj, map_id, start, size):
@@ -61,7 +70,8 @@ class MmapTracker:
                 print(f"MMap Manager of file '{self.file_path}' : MMap no {map_id} has been released" + text)
             if self.count == self._already_released:
-                del mmm[self.file_path]
+                # print(f"MMAP Del: {self.file_path}: {mmm.keys()}")
+                del mmm[self.mmm_key ]
             self._maps.pop(map_id, None)
@@ -240,7 +250,7 @@ def torch_write_file(sd, file_path, quantization_map = None, config = None, extr
                         t = t.view(torch.uint16)
                     elif  dtype ==  torch.float8_e5m2 or dtype ==  torch.float8_e4m3fn:
                         t = t.view(torch.uint8)
-                    buffer = t.numpy().tobytes()
+                    buffer = t.cpu().numpy().tobytes()
                     bytes_written = writer.write(buffer)
                     assert bytes_written == size
             i+=1

{mmgp-3.5.9.dist-info → mmgp-3.5.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mmgp
-Version: 3.5.9
+Version: 3.5.11
 Summary: Memory Management for the GPU Poor
 Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
 Requires-Python: >=3.10
@@ -15,7 +15,7 @@ Dynamic: license-file
 <p align="center">
-  <H2>Memory Management 3.5.9 for the GPU Poor by DeepBeepMeep</H2>
+  <H2>Memory Management 3.5.11 for the GPU Poor by DeepBeepMeep</H2>
 </p>

mmgp-3.5.11.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
+mmgp/offload.py,sha256=KO5wEuGNKxJPaL_ZHuGZDL8l0ZZIY_zf3yI4vBYzoFQ,131664
+mmgp/safetensors2.py,sha256=zYNMprt1KoxgVALbcz6DawxsQDNNRImvgO9cYRChUiY,19028
+mmgp-3.5.11.dist-info/licenses/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
+mmgp-3.5.11.dist-info/METADATA,sha256=-071YZvgNg093aC0OMNZT1-o3ZXu9RqTquoEzBYsPBE,16311
+mmgp-3.5.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+mmgp-3.5.11.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
+mmgp-3.5.11.dist-info/RECORD,,

{mmgp-3.5.9.dist-info → mmgp-3.5.11.dist-info}/licenses/LICENSE.md RENAMED Viewed

@@ -1,2 +1,2 @@
-                    GNU GENERAL PUBLIC LICENSE
+                    GNU GENERAL PUBLIC LICENSE
                        Version 3, 29 June 2007

mmgp-3.5.9.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
-mmgp/offload.py,sha256=AViVBdUYDN42SnICeeTFa3K3JQ7a8rXB-eC2qPIY2yM,130347
-mmgp/safetensors2.py,sha256=4nKV13qCMabnNEB1TA_ueFbfGYYmiQ9racR_C6SsGug,18693
-mmgp-3.5.9.dist-info/licenses/LICENSE.md,sha256=DD-WIS0BkPoWJ_8hQO3J8hMP9K_1-dyrYv1YCbkxcDU,94
-mmgp-3.5.9.dist-info/METADATA,sha256=PXpq_dDRmAQED1dTW8NKUUB_FcYb54VRqlpjqOY771Y,16309
-mmgp-3.5.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-mmgp-3.5.9.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
-mmgp-3.5.9.dist-info/RECORD,,

{mmgp-3.5.9.dist-info → mmgp-3.5.11.dist-info}/WHEEL RENAMED Viewed

File without changes

{mmgp-3.5.9.dist-info → mmgp-3.5.11.dist-info}/top_level.txt RENAMED Viewed

File without changes

mmgp 3.5.9__py3-none-any.whl → 3.5.11__py3-none-any.whl

Potentially problematic release.

mmgp 3.5.9py3-none-any.whl → 3.5.11py3-none-any.whl