mmgp 2.0.0__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mmgp might be problematic. Click here for more details.
- {mmgp-2.0.0.dist-info → mmgp-2.0.1.dist-info}/METADATA +3 -3
- mmgp-2.0.1.dist-info/RECORD +7 -0
- mmgp.py +6 -9
- mmgp-2.0.0.dist-info/RECORD +0 -7
- {mmgp-2.0.0.dist-info → mmgp-2.0.1.dist-info}/LICENSE.md +0 -0
- {mmgp-2.0.0.dist-info → mmgp-2.0.1.dist-info}/WHEEL +0 -0
- {mmgp-2.0.0.dist-info → mmgp-2.0.1.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: mmgp
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.1
|
|
4
4
|
Summary: Memory Management for the GPU Poor
|
|
5
5
|
Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
|
|
6
6
|
License: GNU GENERAL PUBLIC LICENSE
|
|
@@ -92,13 +92,13 @@ Load the tensors data of a model in RAM of a model already initialized with no d
|
|
|
92
92
|
|
|
93
93
|
- *fast_load_transformers_model(model_path: str)*\
|
|
94
94
|
Initialize (build the model hierarchy in memory) and fast load the corresponding tensors of a 'transformers' library model.
|
|
95
|
-
The advantages over the original *
|
|
95
|
+
The advantages over the original *from_pretrained* method is that the full model can fit into a single file with a filename of your choosing (thefore you can have multiple 'transformers' versions of the same model in the same directory) and prequantized model are processed in a transparent way.
|
|
96
96
|
Please note that you need to keep the original file transformers 'config.json' in the same directory.
|
|
97
97
|
|
|
98
98
|
|
|
99
99
|
The typical workflow wil be:
|
|
100
100
|
1) temporarly insert the *save_model* function just after a model has been fully loaded to save a copy of the model / quantized model.
|
|
101
|
-
2) replace the full initalizing / loading logic with *fast_load_transformers_model* (if there is a
|
|
101
|
+
2) replace the full initalizing / loading logic with *fast_load_transformers_model* (if there is a *from_pretrained* call to a transformers object) or only the tensor loading functions (*torch.load_model_file* and *torch.load_state_dict*) with *load_model_data after* the initializing logic.
|
|
102
102
|
|
|
103
103
|
## Special cases
|
|
104
104
|
Sometime there isn't an explicit pipe object as each submodel is loaded separately in the main app. If this is the case, you need to create a dictionary that manually maps all the models.\
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
mmgp.py,sha256=UuAUF76QIve6j2qEkYucxaOq4uo9mKptP4AdkQKr8Eg,45152
|
|
3
|
+
mmgp-2.0.1.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
|
|
4
|
+
mmgp-2.0.1.dist-info/METADATA,sha256=y-6bIJqU6FrX4NMVXheTjs7n2PeoG-kilyyULqgxnt4,8601
|
|
5
|
+
mmgp-2.0.1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
6
|
+
mmgp-2.0.1.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
7
|
+
mmgp-2.0.1.dist-info/RECORD,,
|
mmgp.py
CHANGED
|
@@ -680,9 +680,9 @@ class offload:
|
|
|
680
680
|
if (budgets!= None or budget >0) :
|
|
681
681
|
self.async_transfers = True
|
|
682
682
|
|
|
683
|
-
|
|
683
|
+
pinInRAM = True
|
|
684
684
|
# compile not working yet or slower
|
|
685
|
-
compile = False
|
|
685
|
+
compile = False # True
|
|
686
686
|
#quantizeTransformer = False
|
|
687
687
|
#self.async_transfers = False
|
|
688
688
|
self.compile = compile
|
|
@@ -804,9 +804,6 @@ class offload:
|
|
|
804
804
|
p._data = p._data.pin_memory()
|
|
805
805
|
# fix quanto bug (that seems to have been fixed since&) that allows _scale to be float32 if the original weight was float32
|
|
806
806
|
# (this may cause type mismatch between dequantified bfloat16 weights and float32 scales)
|
|
807
|
-
if p._scale.dtype == torch.float32:
|
|
808
|
-
pass
|
|
809
|
-
|
|
810
807
|
p._scale = p._scale.to(torch.bfloat16).pin_memory() if p._scale.dtype == torch.float32 else p._scale.pin_memory()
|
|
811
808
|
pinned_parameters_data[p]=[p._data, p._scale]
|
|
812
809
|
else:
|
|
@@ -872,13 +869,13 @@ class offload:
|
|
|
872
869
|
# we limit this check to the first level of blocks as quering the cuda cache is time consuming
|
|
873
870
|
self.hook_me_light(submodule, model_id, cur_blocks_name, submodule_method, context = submodule_name)
|
|
874
871
|
|
|
875
|
-
|
|
876
|
-
|
|
872
|
+
if compile and cur_blocks_name != None and model_id == "transformer" and "_blocks" in submodule_name:
|
|
873
|
+
submodule.compile(mode="reduce-overhead" ) #mode= "max-autotune"
|
|
877
874
|
|
|
878
875
|
current_size = self.add_module_to_blocks(model_id, cur_blocks_name, submodule, prev_blocks_name)
|
|
879
876
|
|
|
880
877
|
|
|
881
|
-
if compile:
|
|
878
|
+
if compile and False:
|
|
882
879
|
if verboseLevel>=1:
|
|
883
880
|
print("Torch compilation started")
|
|
884
881
|
torch._dynamo.config.cache_size_limit = 10000
|
|
@@ -943,7 +940,7 @@ class offload:
|
|
|
943
940
|
info = "You have chosen a Medium speed profile that requires at least 32 GB of RAM and 24 GB of VRAM."
|
|
944
941
|
return offload.all(pipe_or_dict_of_modules, pinInRAM= "transformer", modelsToQuantize= extra_mod_to_quantize , info = info, quantizeTransformer= quantizeTransformer)
|
|
945
942
|
elif profile_no == profile_type.LowRAM_LowVRAM_Slow:
|
|
946
|
-
info = "You have chosen the
|
|
943
|
+
info = "You have chosen the Slow profile that requires at least 32 GB of RAM and 12 GB of VRAM."
|
|
947
944
|
return offload.all(pipe_or_dict_of_modules, pinInRAM= "transformer", modelsToQuantize= extra_mod_to_quantize , budgets=budgets, info = info, quantizeTransformer= quantizeTransformer)
|
|
948
945
|
elif profile_no == profile_type.VerylowRAM_LowVRAM_Slowest:
|
|
949
946
|
budgets["transformer"] = 400
|
mmgp-2.0.0.dist-info/RECORD
DELETED
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
mmgp.py,sha256=iJsy5WKd-X1lee37YWdFm8NrHYXa325_jAunzu7zdYM,45231
|
|
3
|
-
mmgp-2.0.0.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
|
|
4
|
-
mmgp-2.0.0.dist-info/METADATA,sha256=u2SiQXefqXAwyXkpJFwben-9n9l9z80dsbRXJpYnqMM,8609
|
|
5
|
-
mmgp-2.0.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
6
|
-
mmgp-2.0.0.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
7
|
-
mmgp-2.0.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|