mmgp 3.3.2__py3-none-any.whl → 3.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mmgp might be problematic. Click here for more details.

mmgp/offload.py CHANGED
@@ -1,4 +1,4 @@
1
- # ------------------ Memory Management 3.3.2 for the GPU Poor by DeepBeepMeep (mmgp)------------------
1
+ # ------------------ Memory Management 3.3.3 for the GPU Poor by DeepBeepMeep (mmgp)------------------
2
2
  #
3
3
  # This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
4
4
  # This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
@@ -401,7 +401,7 @@ def _pin_sd_to_memory(sd, sd_name, tied_weights = None, gig_tensor_size = BIG_TE
401
401
  return
402
402
 
403
403
 
404
- def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = True, gig_tensor_size = BIG_TENSOR_MAX_SIZE, verboseLevel = 1):
404
+ def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = True, big_tensor_size = BIG_TENSOR_MAX_SIZE, verboseLevel = 1):
405
405
 
406
406
  global max_pinnable_bytes, total_pinned_bytes
407
407
  if max_pinnable_bytes > 0 and max_pinnable_bytes >= max_pinnable_bytes:
@@ -474,7 +474,7 @@ def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = Tru
474
474
  length = torch.numel(p.data) * p.data.element_size()
475
475
 
476
476
  ref_cache[ref] = (n, length)
477
- if current_big_tensor_size + length > gig_tensor_size :
477
+ if current_big_tensor_size + length > big_tensor_size and current_big_tensor_size !=0 :
478
478
  big_tensors_sizes.append(current_big_tensor_size)
479
479
  current_big_tensor_size = 0
480
480
  big_tensor_no += 1
@@ -498,28 +498,11 @@ def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = Tru
498
498
  big_tensors_sizes.append(current_big_tensor_size)
499
499
 
500
500
  big_tensors = []
501
- last_big_tensor = 0
502
501
  total = 0
503
502
 
504
503
 
505
504
  failed_planned_allocation = False
506
505
 
507
- # for size in big_tensors_sizes:
508
- # try:
509
- # # if total > 7000 * ONE_MB:
510
- # # raise Exception ("test no more reserved RAM")
511
- # current_big_tensor = torch.empty( size, dtype= torch.uint8, pin_memory=True, device="cpu")
512
- # big_tensors.append(current_big_tensor)
513
- # except:
514
- # print(f"Unable to pin more tensors for this model as the maximum reservable memory has been reached ({total/ONE_MB:.2f})")
515
- # max_pinnable_bytes = total + total_pinned_bytes
516
- # failed_planned_allocation = True
517
- # break
518
-
519
- # last_big_tensor += 1
520
- # total += size
521
-
522
-
523
506
  gc.collect()
524
507
 
525
508
  last_allocated_big_tensor = -1
@@ -561,13 +544,6 @@ def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = Tru
561
544
 
562
545
  total += size
563
546
 
564
- # if big_tensor_no != prev_big_tensor:
565
- # gc.collect()
566
- # prev_big_tensor = big_tensor_no
567
- # match_param, match_isbuffer = tied_weights.get(n, (None, False))
568
- # if match_param != None:
569
-
570
- # if big_tensor_no>=0 and big_tensor_no < last_big_tensor:
571
547
  current_big_tensor = big_tensors[big_tensor_no]
572
548
  if is_buffer :
573
549
  _force_load_buffer(p) # otherwise potential memory leak
@@ -600,9 +576,9 @@ def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = Tru
600
576
 
601
577
  if verboseLevel >=1:
602
578
  if partialPinning or failed_planned_allocation:
603
- print(f"The model was partially pinned to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
579
+ print(f"The model was partially pinned to reserved RAM: {last_allocated_big_tensor + 1} large blocks spread across {total/ONE_MB:.2f} MB")
604
580
  else:
605
- print(f"The whole model was pinned to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
581
+ print(f"The whole model was pinned to reserved RAM: {last_allocated_big_tensor + 1} large blocks spread across {total/ONE_MB:.2f} MB")
606
582
 
607
583
  model._already_pinned = True
608
584
 
@@ -949,7 +925,7 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
949
925
  continue
950
926
  fail = False
951
927
  skip = False
952
- state_dict = safetensors2.torch_load_file(path)
928
+ state_dict = safetensors2.torch_load_file(path, writable_tensors= False)
953
929
 
954
930
 
955
931
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mmgp
3
- Version: 3.3.2
3
+ Version: 3.3.3
4
4
  Summary: Memory Management for the GPU Poor
5
5
  Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
6
6
  License: GNU GENERAL PUBLIC LICENSE
@@ -17,7 +17,7 @@ Dynamic: license-file
17
17
 
18
18
 
19
19
  <p align="center">
20
- <H2>Memory Management 3.3.2 for the GPU Poor by DeepBeepMeep</H2>
20
+ <H2>Memory Management 3.3.3 for the GPU Poor by DeepBeepMeep</H2>
21
21
  </p>
22
22
 
23
23
 
@@ -0,0 +1,9 @@
1
+ __init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
3
+ mmgp/offload.py,sha256=R0UbOXEGAFKd_6090o8v5CkVmJiWmHDQsww7A3-LZEU,106550
4
+ mmgp/safetensors2.py,sha256=rmUbBmK3Dra5prUTTRSVi6-XUFAa9Mj6B5CNPgzt9To,17333
5
+ mmgp-3.3.3.dist-info/licenses/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
6
+ mmgp-3.3.3.dist-info/METADATA,sha256=xcODp7uhIfvy7Il1xEp8ed2VYmH1Eln-EnLy3MM4VGM,16153
7
+ mmgp-3.3.3.dist-info/WHEEL,sha256=DK49LOLCYiurdXXOXwGJm6U4DkHkg4lcxjhqwRa0CP4,91
8
+ mmgp-3.3.3.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
9
+ mmgp-3.3.3.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- __init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
3
- mmgp/offload.py,sha256=43FnFfWqwhh2qz0uykqEpxb_XP9Jx8MPGzN31PExT2w,107470
4
- mmgp/safetensors2.py,sha256=rmUbBmK3Dra5prUTTRSVi6-XUFAa9Mj6B5CNPgzt9To,17333
5
- mmgp-3.3.2.dist-info/licenses/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
6
- mmgp-3.3.2.dist-info/METADATA,sha256=mVMLkutqhUihIeo8uo_LK71ithm84_AEaNvnyRnzmEA,16153
7
- mmgp-3.3.2.dist-info/WHEEL,sha256=DK49LOLCYiurdXXOXwGJm6U4DkHkg4lcxjhqwRa0CP4,91
8
- mmgp-3.3.2.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
9
- mmgp-3.3.2.dist-info/RECORD,,
File without changes