mmgp 3.3.2__py3-none-any.whl → 3.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mmgp might be problematic. Click here for more details.
- mmgp/offload.py +6 -30
- {mmgp-3.3.2.dist-info → mmgp-3.3.3.dist-info}/METADATA +2 -2
- mmgp-3.3.3.dist-info/RECORD +9 -0
- mmgp-3.3.2.dist-info/RECORD +0 -9
- {mmgp-3.3.2.dist-info → mmgp-3.3.3.dist-info}/WHEEL +0 -0
- {mmgp-3.3.2.dist-info → mmgp-3.3.3.dist-info}/licenses/LICENSE.md +0 -0
- {mmgp-3.3.2.dist-info → mmgp-3.3.3.dist-info}/top_level.txt +0 -0
mmgp/offload.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# ------------------ Memory Management 3.3.
|
|
1
|
+
# ------------------ Memory Management 3.3.3 for the GPU Poor by DeepBeepMeep (mmgp)------------------
|
|
2
2
|
#
|
|
3
3
|
# This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
|
|
4
4
|
# This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
|
|
@@ -401,7 +401,7 @@ def _pin_sd_to_memory(sd, sd_name, tied_weights = None, gig_tensor_size = BIG_TE
|
|
|
401
401
|
return
|
|
402
402
|
|
|
403
403
|
|
|
404
|
-
def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = True,
|
|
404
|
+
def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = True, big_tensor_size = BIG_TENSOR_MAX_SIZE, verboseLevel = 1):
|
|
405
405
|
|
|
406
406
|
global max_pinnable_bytes, total_pinned_bytes
|
|
407
407
|
if max_pinnable_bytes > 0 and max_pinnable_bytes >= max_pinnable_bytes:
|
|
@@ -474,7 +474,7 @@ def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = Tru
|
|
|
474
474
|
length = torch.numel(p.data) * p.data.element_size()
|
|
475
475
|
|
|
476
476
|
ref_cache[ref] = (n, length)
|
|
477
|
-
if current_big_tensor_size + length >
|
|
477
|
+
if current_big_tensor_size + length > big_tensor_size and current_big_tensor_size !=0 :
|
|
478
478
|
big_tensors_sizes.append(current_big_tensor_size)
|
|
479
479
|
current_big_tensor_size = 0
|
|
480
480
|
big_tensor_no += 1
|
|
@@ -498,28 +498,11 @@ def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = Tru
|
|
|
498
498
|
big_tensors_sizes.append(current_big_tensor_size)
|
|
499
499
|
|
|
500
500
|
big_tensors = []
|
|
501
|
-
last_big_tensor = 0
|
|
502
501
|
total = 0
|
|
503
502
|
|
|
504
503
|
|
|
505
504
|
failed_planned_allocation = False
|
|
506
505
|
|
|
507
|
-
# for size in big_tensors_sizes:
|
|
508
|
-
# try:
|
|
509
|
-
# # if total > 7000 * ONE_MB:
|
|
510
|
-
# # raise Exception ("test no more reserved RAM")
|
|
511
|
-
# current_big_tensor = torch.empty( size, dtype= torch.uint8, pin_memory=True, device="cpu")
|
|
512
|
-
# big_tensors.append(current_big_tensor)
|
|
513
|
-
# except:
|
|
514
|
-
# print(f"Unable to pin more tensors for this model as the maximum reservable memory has been reached ({total/ONE_MB:.2f})")
|
|
515
|
-
# max_pinnable_bytes = total + total_pinned_bytes
|
|
516
|
-
# failed_planned_allocation = True
|
|
517
|
-
# break
|
|
518
|
-
|
|
519
|
-
# last_big_tensor += 1
|
|
520
|
-
# total += size
|
|
521
|
-
|
|
522
|
-
|
|
523
506
|
gc.collect()
|
|
524
507
|
|
|
525
508
|
last_allocated_big_tensor = -1
|
|
@@ -561,13 +544,6 @@ def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = Tru
|
|
|
561
544
|
|
|
562
545
|
total += size
|
|
563
546
|
|
|
564
|
-
# if big_tensor_no != prev_big_tensor:
|
|
565
|
-
# gc.collect()
|
|
566
|
-
# prev_big_tensor = big_tensor_no
|
|
567
|
-
# match_param, match_isbuffer = tied_weights.get(n, (None, False))
|
|
568
|
-
# if match_param != None:
|
|
569
|
-
|
|
570
|
-
# if big_tensor_no>=0 and big_tensor_no < last_big_tensor:
|
|
571
547
|
current_big_tensor = big_tensors[big_tensor_no]
|
|
572
548
|
if is_buffer :
|
|
573
549
|
_force_load_buffer(p) # otherwise potential memory leak
|
|
@@ -600,9 +576,9 @@ def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = Tru
|
|
|
600
576
|
|
|
601
577
|
if verboseLevel >=1:
|
|
602
578
|
if partialPinning or failed_planned_allocation:
|
|
603
|
-
print(f"The model was partially pinned to reserved RAM: {
|
|
579
|
+
print(f"The model was partially pinned to reserved RAM: {last_allocated_big_tensor + 1} large blocks spread across {total/ONE_MB:.2f} MB")
|
|
604
580
|
else:
|
|
605
|
-
print(f"The whole model was pinned to reserved RAM: {
|
|
581
|
+
print(f"The whole model was pinned to reserved RAM: {last_allocated_big_tensor + 1} large blocks spread across {total/ONE_MB:.2f} MB")
|
|
606
582
|
|
|
607
583
|
model._already_pinned = True
|
|
608
584
|
|
|
@@ -949,7 +925,7 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
|
|
|
949
925
|
continue
|
|
950
926
|
fail = False
|
|
951
927
|
skip = False
|
|
952
|
-
state_dict = safetensors2.torch_load_file(path)
|
|
928
|
+
state_dict = safetensors2.torch_load_file(path, writable_tensors= False)
|
|
953
929
|
|
|
954
930
|
|
|
955
931
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mmgp
|
|
3
|
-
Version: 3.3.
|
|
3
|
+
Version: 3.3.3
|
|
4
4
|
Summary: Memory Management for the GPU Poor
|
|
5
5
|
Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
|
|
6
6
|
License: GNU GENERAL PUBLIC LICENSE
|
|
@@ -17,7 +17,7 @@ Dynamic: license-file
|
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
<p align="center">
|
|
20
|
-
<H2>Memory Management 3.3.
|
|
20
|
+
<H2>Memory Management 3.3.3 for the GPU Poor by DeepBeepMeep</H2>
|
|
21
21
|
</p>
|
|
22
22
|
|
|
23
23
|
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
+
mmgp/offload.py,sha256=R0UbOXEGAFKd_6090o8v5CkVmJiWmHDQsww7A3-LZEU,106550
|
|
4
|
+
mmgp/safetensors2.py,sha256=rmUbBmK3Dra5prUTTRSVi6-XUFAa9Mj6B5CNPgzt9To,17333
|
|
5
|
+
mmgp-3.3.3.dist-info/licenses/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
|
|
6
|
+
mmgp-3.3.3.dist-info/METADATA,sha256=xcODp7uhIfvy7Il1xEp8ed2VYmH1Eln-EnLy3MM4VGM,16153
|
|
7
|
+
mmgp-3.3.3.dist-info/WHEEL,sha256=DK49LOLCYiurdXXOXwGJm6U4DkHkg4lcxjhqwRa0CP4,91
|
|
8
|
+
mmgp-3.3.3.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
9
|
+
mmgp-3.3.3.dist-info/RECORD,,
|
mmgp-3.3.2.dist-info/RECORD
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
-
mmgp/offload.py,sha256=43FnFfWqwhh2qz0uykqEpxb_XP9Jx8MPGzN31PExT2w,107470
|
|
4
|
-
mmgp/safetensors2.py,sha256=rmUbBmK3Dra5prUTTRSVi6-XUFAa9Mj6B5CNPgzt9To,17333
|
|
5
|
-
mmgp-3.3.2.dist-info/licenses/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
|
|
6
|
-
mmgp-3.3.2.dist-info/METADATA,sha256=mVMLkutqhUihIeo8uo_LK71ithm84_AEaNvnyRnzmEA,16153
|
|
7
|
-
mmgp-3.3.2.dist-info/WHEEL,sha256=DK49LOLCYiurdXXOXwGJm6U4DkHkg4lcxjhqwRa0CP4,91
|
|
8
|
-
mmgp-3.3.2.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
9
|
-
mmgp-3.3.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|