PyPI - mmgp - Versions diffs - 1.0.2__tar.gz → 1.0.3__tar.gz - Mend

mmgp 1.0.2tar.gz → 1.0.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mmgp might be problematic. Click here for more details.

Files changed (13) hide show

{mmgp-1.0.2 → mmgp-1.0.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mmgp
-Version: 1.0.2
+Version: 1.0.3
 Summary: Memory Management for the GPU Poor
 Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
 License:                                  Apache License

{mmgp-1.0.2 → mmgp-1.0.3}/pyproject.toml RENAMED Viewed

@@ -70,5 +70,5 @@ sort_first = [
 [tool.setuptools_scm]
 write_to = "src/_version.py"
 parentdir_prefix_version = "mmgp-"
-fallback_version = "1.0.2"
+fallback_version = "1.0.3"
 version_scheme = "post-release"

{mmgp-1.0.2 → mmgp-1.0.3}/src/_version.py RENAMED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '1.0.2'
-__version_tuple__ = version_tuple = (1, 0, 2)
+__version__ = version = '1.0.3'
+__version_tuple__ = version_tuple = (1, 0, 3)

{mmgp-1.0.2 → mmgp-1.0.3}/src/mmgp.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mmgp
-Version: 1.0.2
+Version: 1.0.3
 Summary: Memory Management for the GPU Poor
 Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
 License:                                  Apache License

{mmgp-1.0.2 → mmgp-1.0.3}/src/mmgp.py RENAMED Viewed

@@ -135,7 +135,7 @@ class offload:
                 else:
                     p.data = p.data.cuda(non_blocking=True) #
         # torch.cuda.current_stream().synchronize()
+    @torch.compiler.disable()
     def unload_all(self):
         for model, model_id in zip(self.active_models, self.active_models_ids):
             if not self.pinInRAM:
@@ -198,7 +198,6 @@ class offload:
                 # print(f"New cached memory after purge is {torch.cuda.memory_reserved()/1024000:0f} MB)  ")
     def hook_me_light(self, target_module, forceMemoryCheck, previous_method):
-   #     @torch.compiler.disable()
         def check_empty_cache(module, *args, **kwargs):
             if self.ready_to_check_mem(forceMemoryCheck):
                 self.empty_cache_if_needed()
@@ -208,7 +207,6 @@ class offload:
     def hook_me(self, target_module, model, model_id, module_id, previous_method):
-        @torch.compiler.disable()
         def check_change_module(module, *args, **kwargs):
             performEmptyCacheTest = False
             if not model_id in self.active_models_ids:
@@ -240,7 +238,7 @@ class offload:
         if module_id == None or module_id =='':
             model_name = model._get_name()
-            print(f"Hooked in model {model_name} ({model_id})")
+            print(f"Hooked in model '{model_id}' ({model_name})")
     # Not implemented yet, but why would one want to get rid of these features ?
@@ -258,26 +256,27 @@ class offload:
     @classmethod
-    def all(cls, pipe_or_dict_of_modules, quantizeTransformer = True, pinInRAM = True, compile= True, verbose = True):
+    def all(cls, pipe_or_dict_of_modules, quantizeTransformer = True, pinInRAM = True,  verbose = True):
         self = cls()
         self.verbose = verbose
         self.pinned_modules_data = {}
+        # compile not working yet or slower
+        compile = False
         self.pinInRAM = pinInRAM
+        pipe = None
         preloadInRAM = True
         torch.set_default_device('cuda')
         if hasattr(pipe_or_dict_of_modules, "components"):
             pipe_or_dict_of_modules.to("cpu") #XXXX
             # create a fake Accelerate parameter so that lora loading doesn't change the device
             pipe_or_dict_of_modules.hf_device_map = torch.device("cuda")
+            pipe = pipe_or_dict_of_modules
             pipe_or_dict_of_modules= pipe_or_dict_of_modules.components
         models = {k: v for k, v in pipe_or_dict_of_modules.items() if isinstance(v, torch.nn.Module)}
         if quantizeTransformer:
             self.models_to_quantize = ["transformer"]
  #       del  models["transformer"] # to test everything but the transformer that has a much longer loading
@@ -389,10 +388,14 @@ class offload:
             if verbose:
                 print("Torch compilation started")
             torch._dynamo.config.cache_size_limit = 10000
+            # if pipe != None and hasattr(pipe, "__call__"):
+            #     pipe.__call__= torch.compile(pipe.__call__, mode= "max-autotune")
             for model_id in models:
                     current_model: torch.nn.Module = models[model_id]
-                    current_model.compile()
-            #models["transformer"].compile()
+                    current_model.compile(mode= "max-autotune")
+            #models["transformer"].compile()
             if verbose:
                 print("Torch compilation done")