mmgp 3.1.1__py3-none-any.whl → 3.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mmgp might be problematic. Click here for more details.

mmgp/offload.py CHANGED
@@ -576,7 +576,7 @@ def _quantize(model_to_quantize, weights=qint8, verboseLevel = 1, threshold = 10
576
576
  if hasattr(model_to_quantize, "_quanto_map"):
577
577
  for k, entry in model_to_quantize._quanto_map.items():
578
578
  weights = entry["weights"]
579
- print(f"Model '{model_id}' is already quantized in format '{weights}'")
579
+ print(f"Model '{model_id}' is already quantized to format '{weights}'")
580
580
  return False
581
581
  print(f"Model '{model_id}' is already quantized")
582
582
  return False
@@ -680,7 +680,7 @@ def _quantize(model_to_quantize, weights=qint8, verboseLevel = 1, threshold = 10
680
680
 
681
681
  return True
682
682
 
683
- def load_loras_into_model(model, lora_path, lora_multi = None, verboseLevel = -1):
683
+ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_loras = True, verboseLevel = -1,):
684
684
  verboseLevel = _compute_verbose_level(verboseLevel)
685
685
 
686
686
  if inject_adapter_in_model == None or set_weights_and_activate_adapters == None or get_peft_kwargs == None:
@@ -731,9 +731,6 @@ def load_loras_into_model(model, lora_path, lora_multi = None, verboseLevel = -1
731
731
 
732
732
  # is_correct_format = all("lora" in key for key in state_dict.keys())
733
733
 
734
-
735
-
736
-
737
734
  # check with first key if is not in peft format
738
735
  # first_key = next(iter(state_dict.keys()))
739
736
  # if "lora_A" not in first_key:
@@ -770,7 +767,17 @@ def load_loras_into_model(model, lora_path, lora_multi = None, verboseLevel = -1
770
767
  pass
771
768
  if verboseLevel >=1:
772
769
  print(f"Lora '{path}' was loaded in model '{_get_module_name(model)}'")
773
- set_weights_and_activate_adapters(model,[ str(i) for i in range(len(lora_multi))], lora_multi)
770
+ if activate_all_loras:
771
+ set_weights_and_activate_adapters(model,[ str(i) for i in range(len(lora_multi))], lora_multi)
772
+
773
+ def activate_loras(model, lora_nos, lora_multi = None ):
774
+ if not isinstance(lora_nos, list):
775
+ lora_nos = [lora_nos]
776
+ lora_nos = [str(l) for l in lora_nos]
777
+ if lora_multi is None:
778
+ lora_multi = [1. for _ in lora_nos]
779
+ set_weights_and_activate_adapters(model, lora_nos, lora_multi)
780
+
774
781
 
775
782
  def move_loras_to_device(model, device="cpu" ):
776
783
  if hasattr( model, "_lora_loadable_modules"):
@@ -979,14 +986,13 @@ class offload:
979
986
  self.blocks_of_modules[entry_name] = blocks_params
980
987
  blocks_params_size = 0
981
988
  if blocks_name !=None:
982
-
983
989
  prev_entry_name = None if prev_block_name == None else model_id + "/" + prev_block_name
984
990
  self.prev_blocks_names[entry_name] = prev_entry_name
985
991
  if not prev_block_name == None:
986
992
  self.next_blocks_names[prev_entry_name] = entry_name
987
993
 
988
-
989
994
  for k,p in submodule.named_parameters(recurse=False):
995
+
990
996
  if isinstance(p, QTensor):
991
997
  blocks_params.append( (submodule, k, p, False ) )
992
998
 
@@ -1268,7 +1274,7 @@ class offload:
1268
1274
 
1269
1275
  if module_id == None or module_id =='':
1270
1276
  model_name = model._get_name()
1271
- print(f"Hooked in model '{model_id}' ({model_name})")
1277
+ print(f"Hooked to model '{model_id}' ({model_name})")
1272
1278
 
1273
1279
 
1274
1280
  def save_model(model, file_path, do_quantize = False, quantizationType = qint8, verboseLevel = -1, config_file_path = None ):
@@ -1311,7 +1317,29 @@ def save_model(model, file_path, do_quantize = False, quantizationType = qint8,
1311
1317
  print(f"File '{file_path}' saved")
1312
1318
 
1313
1319
 
1320
+ def extract_models(prefix, obj):
1321
+ pipe = {}
1322
+ for name in dir(obj):
1323
+ element = getattr(obj,name)
1324
+ if name in ("pipeline", "pipe"):
1325
+ pipeline = element
1326
+ if hasattr(pipeline , "components") and isinstance(pipeline.components, dict):
1327
+ for k, model in pipeline.components.items():
1328
+ if model != None:
1329
+ pipe[prefix + "/" + k ] = model
1330
+ elif isinstance(element, torch.nn.Module):
1331
+ if prefix + "/" + name in pipe:
1332
+ pipe[prefix + "/_" + name ] = element
1333
+ else:
1334
+ pipe[prefix + "/" + name ] = element
1335
+ elif isinstance(element, dict):
1336
+ for k, element in element.items():
1337
+ if hasattr(element , "pipeline"):
1338
+ pipe.update( extract_models(prefix + "/" + k,element ))
1339
+
1314
1340
 
1341
+ return pipe
1342
+
1315
1343
 
1316
1344
  def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = True, extraModelsToQuantize = None, quantizationType = qint8, budgets= 0, asyncTransfers = True, compile = False, perc_reserved_mem_max = 0, verboseLevel = -1):
1317
1345
  """Hook to a pipeline or a group of modules in order to reduce their VRAM requirements:
@@ -1337,6 +1365,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
1337
1365
  if not budgets is None:
1338
1366
  if isinstance(budgets , dict):
1339
1367
  model_budgets = budgets
1368
+ budget = budgets.get("*", 0) * ONE_MB
1340
1369
  else:
1341
1370
  budget = int(budgets) * ONE_MB
1342
1371
 
@@ -1451,7 +1480,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
1451
1480
  if model_budget > 0 and model_budget > current_model_size:
1452
1481
  model_budget = 0
1453
1482
 
1454
- model_budgets[model_id] = model_budget
1483
+ model_budgets[model_id] = model_budget #/ 2 if asyncTransfers else model_budget
1455
1484
 
1456
1485
  partialPinning = False
1457
1486
 
@@ -1495,10 +1524,11 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
1495
1524
  if not hasattr(submodule, "_hf_hook"):
1496
1525
  setattr(submodule, "_hf_hook", HfHook())
1497
1526
 
1498
- if submodule_name=='':
1499
- continue
1500
-
1501
- if current_budget > 0:
1527
+ # if submodule_name=='':
1528
+ # continue
1529
+
1530
+
1531
+ if current_budget > 0 and len(submodule_name) > 0:
1502
1532
  if cur_blocks_prefix != None:
1503
1533
  if submodule_name.startswith(cur_blocks_prefix):
1504
1534
  depth_prefix = cur_blocks_prefix.split(".")
@@ -1508,7 +1538,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
1508
1538
  if num != cur_blocks_seq and (cur_blocks_seq == -1 or current_size > current_budget):
1509
1539
  prev_blocks_name = cur_blocks_name
1510
1540
  cur_blocks_name = cur_blocks_prefix + str(num)
1511
- # print(f"new block: {model_id}/{cur_blocks_name} - {submodule_name}")
1541
+ print(f"new block: {model_id}/{cur_blocks_name} - {submodule_name}")
1512
1542
  cur_blocks_seq = num
1513
1543
  else:
1514
1544
  cur_blocks_prefix, prev_blocks_name, cur_blocks_name,cur_blocks_seq = None, None, None, -1
@@ -1520,7 +1550,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
1520
1550
  elif num >=0:
1521
1551
  cur_blocks_prefix, prev_blocks_name, cur_blocks_seq = pre, None, num
1522
1552
  cur_blocks_name = submodule_name
1523
- # print(f"new block: {model_id}/{cur_blocks_name} - {submodule_name}")
1553
+ print(f"new block: {model_id}/{cur_blocks_name} - {submodule_name}")
1524
1554
 
1525
1555
 
1526
1556
  if hasattr(submodule, "forward"):
@@ -1590,37 +1620,45 @@ def profile(pipe_or_dict_of_modules, profile_no: profile_type = profile_type.Ve
1590
1620
  # transformer (video or image generator) should be as small as possible not to occupy space that could be used by actual image data
1591
1621
  # on the other hand the text encoder should be quite large (as long as it fits in 10 GB of VRAM) to reduce sequence offloading
1592
1622
 
1593
- default_budgets = { "transformer" : 600 , "text_encoder": 3000, "text_encoder_2": 3000 }
1623
+ budgets = {}
1624
+ if "transformer" in modules:
1625
+ budgets["transformer"] = 1200
1626
+
1594
1627
  extraModelsToQuantize = None
1595
1628
  asyncTransfers = True
1596
- budgets = None
1597
1629
 
1598
1630
  if profile_no == profile_type.HighRAM_HighVRAM:
1599
1631
  pinnedMemory= True
1600
1632
  budgets = None
1601
- info = "You have chosen a profile that requires at least 48 GB of RAM and 24 GB of VRAM. Some VRAM is consuming just to make the model runs faster."
1633
+ info = "You have chosen a profile that requires at least 48 GB of RAM and 24 GB of VRAM. Some VRAM is consumed just to make the model runs faster."
1602
1634
  elif profile_no == profile_type.HighRAM_LowVRAM:
1603
1635
  pinnedMemory= True
1604
- budgets = default_budgets
1636
+ budgets["*"] = 3000
1605
1637
  info = "You have chosen a profile that requires at least 48 GB of RAM and 12 GB of VRAM. Some RAM is consumed to reduce VRAM consumption."
1606
1638
  elif profile_no == profile_type.LowRAM_HighVRAM:
1607
1639
  pinnedMemory= "transformer"
1608
1640
  extraModelsToQuantize = default_extraModelsToQuantize
1641
+ budgets = None
1609
1642
  info = "You have chosen a Medium speed profile that requires at least 32 GB of RAM and 24 GB of VRAM. Some VRAM is consuming just to make the model runs faster"
1610
1643
  elif profile_no == profile_type.LowRAM_LowVRAM:
1611
1644
  pinnedMemory= "transformer"
1612
1645
  extraModelsToQuantize = default_extraModelsToQuantize
1613
- budgets=default_budgets
1646
+ budgets["*"] = 3000
1614
1647
  info = "You have chosen a profile that requires at least 32 GB of RAM and 12 GB of VRAM. Some RAM is consumed to reduce VRAM consumption. "
1615
1648
  elif profile_no == profile_type.VerylowRAM_LowVRAM:
1616
1649
  pinnedMemory= False
1617
1650
  extraModelsToQuantize = default_extraModelsToQuantize
1618
- budgets=default_budgets
1619
- budgets["transformer"] = 400
1620
- asyncTransfers = False
1651
+ budgets["*"] = 3000
1652
+ if "transformer" in modules:
1653
+ budgets["transformer"] = 400
1654
+ #asyncTransfers = False
1621
1655
  info = "You have chosen the slowest profile that requires at least 24 GB of RAM and 10 GB of VRAM."
1622
1656
  else:
1623
1657
  raise Exception("Unknown profile")
1658
+
1659
+ if budgets != None and len(budgets) == 0:
1660
+ budgets = None
1661
+
1624
1662
  CrLf = '\r\n'
1625
1663
  kwargs = { "pinnedMemory": pinnedMemory, "extraModelsToQuantize" : extraModelsToQuantize, "budgets": budgets, "asyncTransfers" : asyncTransfers, "quantizeTransformer": quantizeTransformer }
1626
1664
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mmgp
3
- Version: 3.1.1
3
+ Version: 3.1.3
4
4
  Summary: Memory Management for the GPU Poor
5
5
  Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
6
6
  License: GNU GENERAL PUBLIC LICENSE
@@ -0,0 +1,9 @@
1
+ __init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
3
+ mmgp/offload.py,sha256=JB40Ky84Njhuf2BauLvNhH_-IS_27lhfYuLqVVhmJtA,71080
4
+ mmgp/safetensors2.py,sha256=OkJAvENfWeb-PL0FcxS1-eYeHLbemTaNXYvNxURrzIs,16154
5
+ mmgp-3.1.3.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
6
+ mmgp-3.1.3.dist-info/METADATA,sha256=pfkzWdQKY-7wNEMN66pwUPxfmXDGZSjJpBwvYolUDb4,12708
7
+ mmgp-3.1.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
8
+ mmgp-3.1.3.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
9
+ mmgp-3.1.3.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- __init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
3
- mmgp/offload.py,sha256=h74eKyWVZmDM--l4KbiZYXdpkcGM8ySUgyvkFtFRtNQ,69593
4
- mmgp/safetensors2.py,sha256=OkJAvENfWeb-PL0FcxS1-eYeHLbemTaNXYvNxURrzIs,16154
5
- mmgp-3.1.1.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
6
- mmgp-3.1.1.dist-info/METADATA,sha256=wtHNzulNFaWmruVO4cGgcRuIIN2eHPHo47nkgGMOWqw,12708
7
- mmgp-3.1.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
8
- mmgp-3.1.1.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
9
- mmgp-3.1.1.dist-info/RECORD,,
File without changes