mmgp 3.4.8__tar.gz → 3.4.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mmgp might be problematic. Click here for more details.
- {mmgp-3.4.8/src/mmgp.egg-info → mmgp-3.4.9}/PKG-INFO +2 -2
- {mmgp-3.4.8 → mmgp-3.4.9}/README.md +1 -1
- {mmgp-3.4.8 → mmgp-3.4.9}/pyproject.toml +1 -1
- {mmgp-3.4.8 → mmgp-3.4.9}/src/mmgp/offload.py +7 -7
- {mmgp-3.4.8 → mmgp-3.4.9/src/mmgp.egg-info}/PKG-INFO +2 -2
- {mmgp-3.4.8 → mmgp-3.4.9}/LICENSE.md +0 -0
- {mmgp-3.4.8 → mmgp-3.4.9}/setup.cfg +0 -0
- {mmgp-3.4.8 → mmgp-3.4.9}/src/__init__.py +0 -0
- {mmgp-3.4.8 → mmgp-3.4.9}/src/mmgp/__init__.py +0 -0
- {mmgp-3.4.8 → mmgp-3.4.9}/src/mmgp/safetensors2.py +0 -0
- {mmgp-3.4.8 → mmgp-3.4.9}/src/mmgp.egg-info/SOURCES.txt +0 -0
- {mmgp-3.4.8 → mmgp-3.4.9}/src/mmgp.egg-info/dependency_links.txt +0 -0
- {mmgp-3.4.8 → mmgp-3.4.9}/src/mmgp.egg-info/requires.txt +0 -0
- {mmgp-3.4.8 → mmgp-3.4.9}/src/mmgp.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mmgp
|
|
3
|
-
Version: 3.4.
|
|
3
|
+
Version: 3.4.9
|
|
4
4
|
Summary: Memory Management for the GPU Poor
|
|
5
5
|
Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -15,7 +15,7 @@ Dynamic: license-file
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
<p align="center">
|
|
18
|
-
<H2>Memory Management 3.4.
|
|
18
|
+
<H2>Memory Management 3.4.9 for the GPU Poor by DeepBeepMeep</H2>
|
|
19
19
|
</p>
|
|
20
20
|
|
|
21
21
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# ------------------ Memory Management 3.4.
|
|
1
|
+
# ------------------ Memory Management 3.4.9 for the GPU Poor by DeepBeepMeep (mmgp)------------------
|
|
2
2
|
#
|
|
3
3
|
# This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
|
|
4
4
|
# This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
|
|
@@ -658,7 +658,7 @@ def _welcome():
|
|
|
658
658
|
if welcome_displayed:
|
|
659
659
|
return
|
|
660
660
|
welcome_displayed = True
|
|
661
|
-
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.4.
|
|
661
|
+
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.4.9) by DeepBeepMeep ************{ENDC}{UNBOLD}")
|
|
662
662
|
|
|
663
663
|
def change_dtype(model, new_dtype, exclude_buffers = False):
|
|
664
664
|
for submodule_name, submodule in model.named_modules():
|
|
@@ -1275,7 +1275,7 @@ def move_loras_to_device(model, device="cpu" ):
|
|
|
1275
1275
|
if ".lora_" in k:
|
|
1276
1276
|
m.to(device)
|
|
1277
1277
|
|
|
1278
|
-
def fast_load_transformers_model(model_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, modelClass=None, modelPrefix = None, writable_tensors = True, verboseLevel = -1, configKwargs ={}):
|
|
1278
|
+
def fast_load_transformers_model(model_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, defaultConfigPath = None, modelClass=None, modelPrefix = None, writable_tensors = True, verboseLevel = -1, configKwargs ={}):
|
|
1279
1279
|
"""
|
|
1280
1280
|
quick version of .LoadfromPretrained of the transformers library
|
|
1281
1281
|
used to build a model and load the corresponding weights (quantized or not)
|
|
@@ -1308,7 +1308,7 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizat
|
|
|
1308
1308
|
if forcedConfigPath != None:
|
|
1309
1309
|
config_fullpath = forcedConfigPath
|
|
1310
1310
|
else:
|
|
1311
|
-
config_fullpath = os.path.join(os.path.dirname(model_path[-1]), "config.json")
|
|
1311
|
+
config_fullpath = os.path.join(os.path.dirname(model_path[-1]), "config.json") if defaultConfigPath == None else defaultConfigPath
|
|
1312
1312
|
|
|
1313
1313
|
if not os.path.isfile(config_fullpath):
|
|
1314
1314
|
raise Exception("a 'config.json' that describes the model is required in the directory of the model or inside the safetensor file")
|
|
@@ -1500,11 +1500,11 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
|
|
|
1500
1500
|
|
|
1501
1501
|
if do_quantize:
|
|
1502
1502
|
if quantization_map != None and len(quantization_map) > 0 :
|
|
1503
|
-
if _quantize(model, quantizationType, verboseLevel=verboseLevel, model_id=file_path):
|
|
1504
|
-
quantization_map = model._quanto_map
|
|
1505
|
-
else:
|
|
1506
1503
|
if verboseLevel >=1:
|
|
1507
1504
|
print("Model already quantized")
|
|
1505
|
+
else:
|
|
1506
|
+
if _quantize(model, quantizationType, verboseLevel=verboseLevel, model_id=file_path):
|
|
1507
|
+
quantization_map = model._quanto_map
|
|
1508
1508
|
|
|
1509
1509
|
if pinToMemory:
|
|
1510
1510
|
_pin_to_memory(model, file_path, partialPinning = partialPinning, verboseLevel = verboseLevel)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mmgp
|
|
3
|
-
Version: 3.4.
|
|
3
|
+
Version: 3.4.9
|
|
4
4
|
Summary: Memory Management for the GPU Poor
|
|
5
5
|
Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -15,7 +15,7 @@ Dynamic: license-file
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
<p align="center">
|
|
18
|
-
<H2>Memory Management 3.4.
|
|
18
|
+
<H2>Memory Management 3.4.9 for the GPU Poor by DeepBeepMeep</H2>
|
|
19
19
|
</p>
|
|
20
20
|
|
|
21
21
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|