mmgp 3.1.2__py3-none-any.whl → 3.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mmgp might be problematic. Click here for more details.

mmgp/offload.py CHANGED
@@ -986,14 +986,13 @@ class offload:
986
986
  self.blocks_of_modules[entry_name] = blocks_params
987
987
  blocks_params_size = 0
988
988
  if blocks_name !=None:
989
-
990
989
  prev_entry_name = None if prev_block_name == None else model_id + "/" + prev_block_name
991
990
  self.prev_blocks_names[entry_name] = prev_entry_name
992
991
  if not prev_block_name == None:
993
992
  self.next_blocks_names[prev_entry_name] = entry_name
994
993
 
995
-
996
994
  for k,p in submodule.named_parameters(recurse=False):
995
+
997
996
  if isinstance(p, QTensor):
998
997
  blocks_params.append( (submodule, k, p, False ) )
999
998
 
@@ -1275,7 +1274,7 @@ class offload:
1275
1274
 
1276
1275
  if module_id == None or module_id =='':
1277
1276
  model_name = model._get_name()
1278
- print(f"Hooked in model '{model_id}' ({model_name})")
1277
+ print(f"Hooked to model '{model_id}' ({model_name})")
1279
1278
 
1280
1279
 
1281
1280
  def save_model(model, file_path, do_quantize = False, quantizationType = qint8, verboseLevel = -1, config_file_path = None ):
@@ -1318,8 +1317,30 @@ def save_model(model, file_path, do_quantize = False, quantizationType = qint8,
1318
1317
  print(f"File '{file_path}' saved")
1319
1318
 
1320
1319
 
1320
+ def extract_models(prefix, obj):
1321
+ pipe = {}
1322
+ for name in dir(obj):
1323
+ element = getattr(obj,name)
1324
+ if name in ("pipeline", "pipe"):
1325
+ pipeline = element
1326
+ if hasattr(pipeline , "components") and isinstance(pipeline.components, dict):
1327
+ for k, model in pipeline.components.items():
1328
+ if model != None:
1329
+ pipe[prefix + "/" + k ] = model
1330
+ elif isinstance(element, torch.nn.Module):
1331
+ if prefix + "/" + name in pipe:
1332
+ pipe[prefix + "/_" + name ] = element
1333
+ else:
1334
+ pipe[prefix + "/" + name ] = element
1335
+ elif isinstance(element, dict):
1336
+ for k, element in element.items():
1337
+ if hasattr(element , "pipeline"):
1338
+ pipe.update( extract_models(prefix + "/" + k,element ))
1321
1339
 
1322
1340
 
1341
+ return pipe
1342
+
1343
+
1323
1344
  def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = True, extraModelsToQuantize = None, quantizationType = qint8, budgets= 0, asyncTransfers = True, compile = False, perc_reserved_mem_max = 0, verboseLevel = -1):
1324
1345
  """Hook to a pipeline or a group of modules in order to reduce their VRAM requirements:
1325
1346
  pipe_or_dict_of_modules : the pipeline object or a dictionary of modules of the model
@@ -1344,6 +1365,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
1344
1365
  if not budgets is None:
1345
1366
  if isinstance(budgets , dict):
1346
1367
  model_budgets = budgets
1368
+ budget = budgets.get("*", 0) * ONE_MB
1347
1369
  else:
1348
1370
  budget = int(budgets) * ONE_MB
1349
1371
 
@@ -1458,7 +1480,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
1458
1480
  if model_budget > 0 and model_budget > current_model_size:
1459
1481
  model_budget = 0
1460
1482
 
1461
- model_budgets[model_id] = model_budget
1483
+ model_budgets[model_id] = model_budget #/ 2 if asyncTransfers else model_budget
1462
1484
 
1463
1485
  partialPinning = False
1464
1486
 
@@ -1502,10 +1524,11 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
1502
1524
  if not hasattr(submodule, "_hf_hook"):
1503
1525
  setattr(submodule, "_hf_hook", HfHook())
1504
1526
 
1505
- if submodule_name=='':
1506
- continue
1507
-
1508
- if current_budget > 0:
1527
+ # if submodule_name=='':
1528
+ # continue
1529
+
1530
+
1531
+ if current_budget > 0 and len(submodule_name) > 0:
1509
1532
  if cur_blocks_prefix != None:
1510
1533
  if submodule_name.startswith(cur_blocks_prefix):
1511
1534
  depth_prefix = cur_blocks_prefix.split(".")
@@ -1515,7 +1538,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
1515
1538
  if num != cur_blocks_seq and (cur_blocks_seq == -1 or current_size > current_budget):
1516
1539
  prev_blocks_name = cur_blocks_name
1517
1540
  cur_blocks_name = cur_blocks_prefix + str(num)
1518
- # print(f"new block: {model_id}/{cur_blocks_name} - {submodule_name}")
1541
+ print(f"new block: {model_id}/{cur_blocks_name} - {submodule_name}")
1519
1542
  cur_blocks_seq = num
1520
1543
  else:
1521
1544
  cur_blocks_prefix, prev_blocks_name, cur_blocks_name,cur_blocks_seq = None, None, None, -1
@@ -1527,7 +1550,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
1527
1550
  elif num >=0:
1528
1551
  cur_blocks_prefix, prev_blocks_name, cur_blocks_seq = pre, None, num
1529
1552
  cur_blocks_name = submodule_name
1530
- # print(f"new block: {model_id}/{cur_blocks_name} - {submodule_name}")
1553
+ print(f"new block: {model_id}/{cur_blocks_name} - {submodule_name}")
1531
1554
 
1532
1555
 
1533
1556
  if hasattr(submodule, "forward"):
@@ -1597,37 +1620,45 @@ def profile(pipe_or_dict_of_modules, profile_no: profile_type = profile_type.Ve
1597
1620
  # transformer (video or image generator) should be as small as possible not to occupy space that could be used by actual image data
1598
1621
  # on the other hand the text encoder should be quite large (as long as it fits in 10 GB of VRAM) to reduce sequence offloading
1599
1622
 
1600
- default_budgets = { "transformer" : 600 , "text_encoder": 3000, "text_encoder_2": 3000 }
1623
+ budgets = {}
1624
+ if "transformer" in modules:
1625
+ budgets["transformer"] = 1200
1626
+
1601
1627
  extraModelsToQuantize = None
1602
1628
  asyncTransfers = True
1603
- budgets = None
1604
1629
 
1605
1630
  if profile_no == profile_type.HighRAM_HighVRAM:
1606
1631
  pinnedMemory= True
1607
1632
  budgets = None
1608
- info = "You have chosen a profile that requires at least 48 GB of RAM and 24 GB of VRAM. Some VRAM is consuming just to make the model runs faster."
1633
+ info = "You have chosen a profile that requires at least 48 GB of RAM and 24 GB of VRAM. Some VRAM is consumed just to make the model runs faster."
1609
1634
  elif profile_no == profile_type.HighRAM_LowVRAM:
1610
1635
  pinnedMemory= True
1611
- budgets = default_budgets
1636
+ budgets["*"] = 3000
1612
1637
  info = "You have chosen a profile that requires at least 48 GB of RAM and 12 GB of VRAM. Some RAM is consumed to reduce VRAM consumption."
1613
1638
  elif profile_no == profile_type.LowRAM_HighVRAM:
1614
1639
  pinnedMemory= "transformer"
1615
1640
  extraModelsToQuantize = default_extraModelsToQuantize
1641
+ budgets = None
1616
1642
  info = "You have chosen a Medium speed profile that requires at least 32 GB of RAM and 24 GB of VRAM. Some VRAM is consuming just to make the model runs faster"
1617
1643
  elif profile_no == profile_type.LowRAM_LowVRAM:
1618
1644
  pinnedMemory= "transformer"
1619
1645
  extraModelsToQuantize = default_extraModelsToQuantize
1620
- budgets=default_budgets
1646
+ budgets["*"] = 3000
1621
1647
  info = "You have chosen a profile that requires at least 32 GB of RAM and 12 GB of VRAM. Some RAM is consumed to reduce VRAM consumption. "
1622
1648
  elif profile_no == profile_type.VerylowRAM_LowVRAM:
1623
1649
  pinnedMemory= False
1624
1650
  extraModelsToQuantize = default_extraModelsToQuantize
1625
- budgets=default_budgets
1626
- budgets["transformer"] = 400
1651
+ budgets["*"] = 3000
1652
+ if "transformer" in modules:
1653
+ budgets["transformer"] = 400
1627
1654
  #asyncTransfers = False
1628
1655
  info = "You have chosen the slowest profile that requires at least 24 GB of RAM and 10 GB of VRAM."
1629
1656
  else:
1630
1657
  raise Exception("Unknown profile")
1658
+
1659
+ if budgets != None and len(budgets) == 0:
1660
+ budgets = None
1661
+
1631
1662
  CrLf = '\r\n'
1632
1663
  kwargs = { "pinnedMemory": pinnedMemory, "extraModelsToQuantize" : extraModelsToQuantize, "budgets": budgets, "asyncTransfers" : asyncTransfers, "quantizeTransformer": quantizeTransformer }
1633
1664
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mmgp
3
- Version: 3.1.2
3
+ Version: 3.1.3
4
4
  Summary: Memory Management for the GPU Poor
5
5
  Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
6
6
  License: GNU GENERAL PUBLIC LICENSE
@@ -0,0 +1,9 @@
1
+ __init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
3
+ mmgp/offload.py,sha256=JB40Ky84Njhuf2BauLvNhH_-IS_27lhfYuLqVVhmJtA,71080
4
+ mmgp/safetensors2.py,sha256=OkJAvENfWeb-PL0FcxS1-eYeHLbemTaNXYvNxURrzIs,16154
5
+ mmgp-3.1.3.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
6
+ mmgp-3.1.3.dist-info/METADATA,sha256=pfkzWdQKY-7wNEMN66pwUPxfmXDGZSjJpBwvYolUDb4,12708
7
+ mmgp-3.1.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
8
+ mmgp-3.1.3.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
9
+ mmgp-3.1.3.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- __init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
3
- mmgp/offload.py,sha256=4CuoGrviBgNfKWD87JIK4FCSAV4hjWI85Ta3mL2v1Mw,69958
4
- mmgp/safetensors2.py,sha256=OkJAvENfWeb-PL0FcxS1-eYeHLbemTaNXYvNxURrzIs,16154
5
- mmgp-3.1.2.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
6
- mmgp-3.1.2.dist-info/METADATA,sha256=j938ccarHciRswKcyiMobN2M1ACrpZZPXSXjr-4Hjdk,12708
7
- mmgp-3.1.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
8
- mmgp-3.1.2.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
9
- mmgp-3.1.2.dist-info/RECORD,,
File without changes