mmgp 3.5.11__tar.gz → 3.5.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mmgp might be problematic. Click here for more details.
- {mmgp-3.5.11/src/mmgp.egg-info → mmgp-3.5.12}/PKG-INFO +2 -2
- {mmgp-3.5.11 → mmgp-3.5.12}/README.md +1 -1
- {mmgp-3.5.11 → mmgp-3.5.12}/pyproject.toml +1 -1
- {mmgp-3.5.11 → mmgp-3.5.12}/src/mmgp/offload.py +14 -8
- {mmgp-3.5.11 → mmgp-3.5.12/src/mmgp.egg-info}/PKG-INFO +2 -2
- {mmgp-3.5.11 → mmgp-3.5.12}/LICENSE.md +0 -0
- {mmgp-3.5.11 → mmgp-3.5.12}/setup.cfg +0 -0
- {mmgp-3.5.11 → mmgp-3.5.12}/src/__init__.py +0 -0
- {mmgp-3.5.11 → mmgp-3.5.12}/src/mmgp/__init__.py +0 -0
- {mmgp-3.5.11 → mmgp-3.5.12}/src/mmgp/safetensors2.py +0 -0
- {mmgp-3.5.11 → mmgp-3.5.12}/src/mmgp.egg-info/SOURCES.txt +0 -0
- {mmgp-3.5.11 → mmgp-3.5.12}/src/mmgp.egg-info/dependency_links.txt +0 -0
- {mmgp-3.5.11 → mmgp-3.5.12}/src/mmgp.egg-info/requires.txt +0 -0
- {mmgp-3.5.11 → mmgp-3.5.12}/src/mmgp.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mmgp
|
|
3
|
-
Version: 3.5.
|
|
3
|
+
Version: 3.5.12
|
|
4
4
|
Summary: Memory Management for the GPU Poor
|
|
5
5
|
Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -15,7 +15,7 @@ Dynamic: license-file
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
<p align="center">
|
|
18
|
-
<H2>Memory Management 3.5.
|
|
18
|
+
<H2>Memory Management 3.5.12 for the GPU Poor by DeepBeepMeep</H2>
|
|
19
19
|
</p>
|
|
20
20
|
|
|
21
21
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# ------------------ Memory Management 3.5.
|
|
1
|
+
# ------------------ Memory Management 3.5.12 for the GPU Poor by DeepBeepMeep (mmgp)------------------
|
|
2
2
|
#
|
|
3
3
|
# This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
|
|
4
4
|
# This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
|
|
@@ -94,7 +94,7 @@ def get_cache(cache_name):
|
|
|
94
94
|
if all_cache is None:
|
|
95
95
|
all_cache = {}
|
|
96
96
|
shared_state["_cache"]= all_cache
|
|
97
|
-
cache =
|
|
97
|
+
cache = all_cache.get(cache_name, None)
|
|
98
98
|
if cache is None:
|
|
99
99
|
cache = {}
|
|
100
100
|
all_cache[cache_name] = cache
|
|
@@ -688,7 +688,7 @@ def _welcome():
|
|
|
688
688
|
if welcome_displayed:
|
|
689
689
|
return
|
|
690
690
|
welcome_displayed = True
|
|
691
|
-
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.5.
|
|
691
|
+
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.5.12) by DeepBeepMeep ************{ENDC}{UNBOLD}")
|
|
692
692
|
|
|
693
693
|
def change_dtype(model, new_dtype, exclude_buffers = False):
|
|
694
694
|
for submodule_name, submodule in model.named_modules():
|
|
@@ -2498,7 +2498,7 @@ def {fname}(module, *args, **kwargs):
|
|
|
2498
2498
|
|
|
2499
2499
|
|
|
2500
2500
|
|
|
2501
|
-
def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, partialPinning = False, loras = None, quantizeTransformer = True, extraModelsToQuantize = None, quantizationType = qint8, budgets= 0, workingVRAM = None, asyncTransfers = True, compile = False, convertWeightsFloatTo = torch.bfloat16, perc_reserved_mem_max = 0, coTenantsMap = None, verboseLevel = -1):
|
|
2501
|
+
def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, partialPinning = False, loras = None, quantizeTransformer = True, extraModelsToQuantize = None, quantizationType = qint8, budgets= 0, workingVRAM = None, asyncTransfers = True, compile = False, convertWeightsFloatTo = torch.bfloat16, perc_reserved_mem_max = 0, coTenantsMap = None, vram_safety_coefficient = 0.8, verboseLevel = -1):
|
|
2502
2502
|
"""Hook to a pipeline or a group of modules in order to reduce their VRAM requirements:
|
|
2503
2503
|
pipe_or_dict_of_modules : the pipeline object or a dictionary of modules of the model
|
|
2504
2504
|
quantizeTransformer: set True by default will quantize on the fly the video / image model
|
|
@@ -2507,6 +2507,8 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
|
|
|
2507
2507
|
budgets: 0 by default (unlimited). If non 0, it corresponds to the maximum size in MB that every model will occupy at any moment
|
|
2508
2508
|
(in fact the real usage is twice this number). It is very efficient to reduce VRAM consumption but this feature may be very slow
|
|
2509
2509
|
if pinnedMemory is not enabled
|
|
2510
|
+
vram_safety_coefficient: float between 0 and 1 (exclusive), default 0.8. Sets the maximum portion of VRAM that can be used for models.
|
|
2511
|
+
Lower values provide more safety margin but may reduce performance.
|
|
2510
2512
|
"""
|
|
2511
2513
|
self = offload()
|
|
2512
2514
|
self.verboseLevel = verboseLevel
|
|
@@ -2522,7 +2524,11 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
|
|
|
2522
2524
|
return float(b[:-1]) * self.device_mem_capacity
|
|
2523
2525
|
else:
|
|
2524
2526
|
return b * ONE_MB
|
|
2525
|
-
|
|
2527
|
+
|
|
2528
|
+
# Validate vram_safety_coefficient
|
|
2529
|
+
if not isinstance(vram_safety_coefficient, float) or vram_safety_coefficient <= 0 or vram_safety_coefficient >= 1:
|
|
2530
|
+
raise ValueError("vram_safety_coefficient must be a float between 0 and 1 (exclusive)")
|
|
2531
|
+
|
|
2526
2532
|
budget = 0
|
|
2527
2533
|
if not budgets is None:
|
|
2528
2534
|
if isinstance(budgets , dict):
|
|
@@ -2667,14 +2673,14 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
|
|
|
2667
2673
|
model_budget = new_budget if model_budget == 0 or new_budget < model_budget else model_budget
|
|
2668
2674
|
if model_budget > 0 and model_budget > current_model_size:
|
|
2669
2675
|
model_budget = 0
|
|
2670
|
-
coef =
|
|
2676
|
+
coef =vram_safety_coefficient
|
|
2671
2677
|
if current_model_size > coef * self.device_mem_capacity and model_budget == 0 or model_budget > coef * self.device_mem_capacity:
|
|
2672
2678
|
if verboseLevel >= 1:
|
|
2673
2679
|
if model_budget == 0:
|
|
2674
|
-
print(f"Model '{model_id}' is too large ({current_model_size/ONE_MB:0.1f} MB) to fit entirely in {coef * 100}% of the VRAM (max capacity is {coef * self.device_mem_capacity/ONE_MB}) MB)")
|
|
2680
|
+
print(f"Model '{model_id}' is too large ({current_model_size/ONE_MB:0.1f} MB) to fit entirely in {coef * 100:.0f}% of the VRAM (max capacity is {coef * self.device_mem_capacity/ONE_MB:0.1f}) MB)")
|
|
2675
2681
|
else:
|
|
2676
2682
|
print(f"Budget ({budget/ONE_MB:0.1f} MB) for Model '{model_id}' is too important so that this model can fit in the VRAM (max capacity is {self.device_mem_capacity/ONE_MB}) MB)")
|
|
2677
|
-
print(f"Budget allocation for this model has been consequently reduced to the
|
|
2683
|
+
print(f"Budget allocation for this model has been consequently reduced to the {coef * 100:.0f}% of max GPU Memory ({coef * self.device_mem_capacity/ONE_MB:0.1f} MB). This may not leave enough working VRAM and you will probably need to define manually a lower budget for this model.")
|
|
2678
2684
|
model_budget = coef * self.device_mem_capacity
|
|
2679
2685
|
|
|
2680
2686
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mmgp
|
|
3
|
-
Version: 3.5.
|
|
3
|
+
Version: 3.5.12
|
|
4
4
|
Summary: Memory Management for the GPU Poor
|
|
5
5
|
Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -15,7 +15,7 @@ Dynamic: license-file
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
<p align="center">
|
|
18
|
-
<H2>Memory Management 3.5.
|
|
18
|
+
<H2>Memory Management 3.5.12 for the GPU Poor by DeepBeepMeep</H2>
|
|
19
19
|
</p>
|
|
20
20
|
|
|
21
21
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|