mmgp 3.1.2__tar.gz → 3.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mmgp might be problematic. Click here for more details.
- {mmgp-3.1.2/src/mmgp.egg-info → mmgp-3.1.3}/PKG-INFO +1 -1
- {mmgp-3.1.2 → mmgp-3.1.3}/pyproject.toml +1 -1
- {mmgp-3.1.2 → mmgp-3.1.3}/src/mmgp/offload.py +48 -17
- {mmgp-3.1.2 → mmgp-3.1.3/src/mmgp.egg-info}/PKG-INFO +1 -1
- {mmgp-3.1.2 → mmgp-3.1.3}/LICENSE.md +0 -0
- {mmgp-3.1.2 → mmgp-3.1.3}/README.md +0 -0
- {mmgp-3.1.2 → mmgp-3.1.3}/setup.cfg +0 -0
- {mmgp-3.1.2 → mmgp-3.1.3}/src/__init__.py +0 -0
- {mmgp-3.1.2 → mmgp-3.1.3}/src/mmgp/__init__.py +0 -0
- {mmgp-3.1.2 → mmgp-3.1.3}/src/mmgp/safetensors2.py +0 -0
- {mmgp-3.1.2 → mmgp-3.1.3}/src/mmgp.egg-info/SOURCES.txt +0 -0
- {mmgp-3.1.2 → mmgp-3.1.3}/src/mmgp.egg-info/dependency_links.txt +0 -0
- {mmgp-3.1.2 → mmgp-3.1.3}/src/mmgp.egg-info/requires.txt +0 -0
- {mmgp-3.1.2 → mmgp-3.1.3}/src/mmgp.egg-info/top_level.txt +0 -0
|
@@ -986,14 +986,13 @@ class offload:
|
|
|
986
986
|
self.blocks_of_modules[entry_name] = blocks_params
|
|
987
987
|
blocks_params_size = 0
|
|
988
988
|
if blocks_name !=None:
|
|
989
|
-
|
|
990
989
|
prev_entry_name = None if prev_block_name == None else model_id + "/" + prev_block_name
|
|
991
990
|
self.prev_blocks_names[entry_name] = prev_entry_name
|
|
992
991
|
if not prev_block_name == None:
|
|
993
992
|
self.next_blocks_names[prev_entry_name] = entry_name
|
|
994
993
|
|
|
995
|
-
|
|
996
994
|
for k,p in submodule.named_parameters(recurse=False):
|
|
995
|
+
|
|
997
996
|
if isinstance(p, QTensor):
|
|
998
997
|
blocks_params.append( (submodule, k, p, False ) )
|
|
999
998
|
|
|
@@ -1275,7 +1274,7 @@ class offload:
|
|
|
1275
1274
|
|
|
1276
1275
|
if module_id == None or module_id =='':
|
|
1277
1276
|
model_name = model._get_name()
|
|
1278
|
-
print(f"Hooked
|
|
1277
|
+
print(f"Hooked to model '{model_id}' ({model_name})")
|
|
1279
1278
|
|
|
1280
1279
|
|
|
1281
1280
|
def save_model(model, file_path, do_quantize = False, quantizationType = qint8, verboseLevel = -1, config_file_path = None ):
|
|
@@ -1318,8 +1317,30 @@ def save_model(model, file_path, do_quantize = False, quantizationType = qint8,
|
|
|
1318
1317
|
print(f"File '{file_path}' saved")
|
|
1319
1318
|
|
|
1320
1319
|
|
|
1320
|
+
def extract_models(prefix, obj):
|
|
1321
|
+
pipe = {}
|
|
1322
|
+
for name in dir(obj):
|
|
1323
|
+
element = getattr(obj,name)
|
|
1324
|
+
if name in ("pipeline", "pipe"):
|
|
1325
|
+
pipeline = element
|
|
1326
|
+
if hasattr(pipeline , "components") and isinstance(pipeline.components, dict):
|
|
1327
|
+
for k, model in pipeline.components.items():
|
|
1328
|
+
if model != None:
|
|
1329
|
+
pipe[prefix + "/" + k ] = model
|
|
1330
|
+
elif isinstance(element, torch.nn.Module):
|
|
1331
|
+
if prefix + "/" + name in pipe:
|
|
1332
|
+
pipe[prefix + "/_" + name ] = element
|
|
1333
|
+
else:
|
|
1334
|
+
pipe[prefix + "/" + name ] = element
|
|
1335
|
+
elif isinstance(element, dict):
|
|
1336
|
+
for k, element in element.items():
|
|
1337
|
+
if hasattr(element , "pipeline"):
|
|
1338
|
+
pipe.update( extract_models(prefix + "/" + k,element ))
|
|
1321
1339
|
|
|
1322
1340
|
|
|
1341
|
+
return pipe
|
|
1342
|
+
|
|
1343
|
+
|
|
1323
1344
|
def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = True, extraModelsToQuantize = None, quantizationType = qint8, budgets= 0, asyncTransfers = True, compile = False, perc_reserved_mem_max = 0, verboseLevel = -1):
|
|
1324
1345
|
"""Hook to a pipeline or a group of modules in order to reduce their VRAM requirements:
|
|
1325
1346
|
pipe_or_dict_of_modules : the pipeline object or a dictionary of modules of the model
|
|
@@ -1344,6 +1365,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
|
|
|
1344
1365
|
if not budgets is None:
|
|
1345
1366
|
if isinstance(budgets , dict):
|
|
1346
1367
|
model_budgets = budgets
|
|
1368
|
+
budget = budgets.get("*", 0) * ONE_MB
|
|
1347
1369
|
else:
|
|
1348
1370
|
budget = int(budgets) * ONE_MB
|
|
1349
1371
|
|
|
@@ -1458,7 +1480,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
|
|
|
1458
1480
|
if model_budget > 0 and model_budget > current_model_size:
|
|
1459
1481
|
model_budget = 0
|
|
1460
1482
|
|
|
1461
|
-
model_budgets[model_id] = model_budget
|
|
1483
|
+
model_budgets[model_id] = model_budget #/ 2 if asyncTransfers else model_budget
|
|
1462
1484
|
|
|
1463
1485
|
partialPinning = False
|
|
1464
1486
|
|
|
@@ -1502,10 +1524,11 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
|
|
|
1502
1524
|
if not hasattr(submodule, "_hf_hook"):
|
|
1503
1525
|
setattr(submodule, "_hf_hook", HfHook())
|
|
1504
1526
|
|
|
1505
|
-
if submodule_name=='':
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
|
|
1527
|
+
# if submodule_name=='':
|
|
1528
|
+
# continue
|
|
1529
|
+
|
|
1530
|
+
|
|
1531
|
+
if current_budget > 0 and len(submodule_name) > 0:
|
|
1509
1532
|
if cur_blocks_prefix != None:
|
|
1510
1533
|
if submodule_name.startswith(cur_blocks_prefix):
|
|
1511
1534
|
depth_prefix = cur_blocks_prefix.split(".")
|
|
@@ -1515,7 +1538,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
|
|
|
1515
1538
|
if num != cur_blocks_seq and (cur_blocks_seq == -1 or current_size > current_budget):
|
|
1516
1539
|
prev_blocks_name = cur_blocks_name
|
|
1517
1540
|
cur_blocks_name = cur_blocks_prefix + str(num)
|
|
1518
|
-
|
|
1541
|
+
print(f"new block: {model_id}/{cur_blocks_name} - {submodule_name}")
|
|
1519
1542
|
cur_blocks_seq = num
|
|
1520
1543
|
else:
|
|
1521
1544
|
cur_blocks_prefix, prev_blocks_name, cur_blocks_name,cur_blocks_seq = None, None, None, -1
|
|
@@ -1527,7 +1550,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
|
|
|
1527
1550
|
elif num >=0:
|
|
1528
1551
|
cur_blocks_prefix, prev_blocks_name, cur_blocks_seq = pre, None, num
|
|
1529
1552
|
cur_blocks_name = submodule_name
|
|
1530
|
-
|
|
1553
|
+
print(f"new block: {model_id}/{cur_blocks_name} - {submodule_name}")
|
|
1531
1554
|
|
|
1532
1555
|
|
|
1533
1556
|
if hasattr(submodule, "forward"):
|
|
@@ -1597,37 +1620,45 @@ def profile(pipe_or_dict_of_modules, profile_no: profile_type = profile_type.Ve
|
|
|
1597
1620
|
# transformer (video or image generator) should be as small as possible not to occupy space that could be used by actual image data
|
|
1598
1621
|
# on the other hand the text encoder should be quite large (as long as it fits in 10 GB of VRAM) to reduce sequence offloading
|
|
1599
1622
|
|
|
1600
|
-
|
|
1623
|
+
budgets = {}
|
|
1624
|
+
if "transformer" in modules:
|
|
1625
|
+
budgets["transformer"] = 1200
|
|
1626
|
+
|
|
1601
1627
|
extraModelsToQuantize = None
|
|
1602
1628
|
asyncTransfers = True
|
|
1603
|
-
budgets = None
|
|
1604
1629
|
|
|
1605
1630
|
if profile_no == profile_type.HighRAM_HighVRAM:
|
|
1606
1631
|
pinnedMemory= True
|
|
1607
1632
|
budgets = None
|
|
1608
|
-
info = "You have chosen a profile that requires at least 48 GB of RAM and 24 GB of VRAM. Some VRAM is
|
|
1633
|
+
info = "You have chosen a profile that requires at least 48 GB of RAM and 24 GB of VRAM. Some VRAM is consumed just to make the model runs faster."
|
|
1609
1634
|
elif profile_no == profile_type.HighRAM_LowVRAM:
|
|
1610
1635
|
pinnedMemory= True
|
|
1611
|
-
budgets =
|
|
1636
|
+
budgets["*"] = 3000
|
|
1612
1637
|
info = "You have chosen a profile that requires at least 48 GB of RAM and 12 GB of VRAM. Some RAM is consumed to reduce VRAM consumption."
|
|
1613
1638
|
elif profile_no == profile_type.LowRAM_HighVRAM:
|
|
1614
1639
|
pinnedMemory= "transformer"
|
|
1615
1640
|
extraModelsToQuantize = default_extraModelsToQuantize
|
|
1641
|
+
budgets = None
|
|
1616
1642
|
info = "You have chosen a Medium speed profile that requires at least 32 GB of RAM and 24 GB of VRAM. Some VRAM is consuming just to make the model runs faster"
|
|
1617
1643
|
elif profile_no == profile_type.LowRAM_LowVRAM:
|
|
1618
1644
|
pinnedMemory= "transformer"
|
|
1619
1645
|
extraModelsToQuantize = default_extraModelsToQuantize
|
|
1620
|
-
budgets=
|
|
1646
|
+
budgets["*"] = 3000
|
|
1621
1647
|
info = "You have chosen a profile that requires at least 32 GB of RAM and 12 GB of VRAM. Some RAM is consumed to reduce VRAM consumption. "
|
|
1622
1648
|
elif profile_no == profile_type.VerylowRAM_LowVRAM:
|
|
1623
1649
|
pinnedMemory= False
|
|
1624
1650
|
extraModelsToQuantize = default_extraModelsToQuantize
|
|
1625
|
-
budgets=
|
|
1626
|
-
|
|
1651
|
+
budgets["*"] = 3000
|
|
1652
|
+
if "transformer" in modules:
|
|
1653
|
+
budgets["transformer"] = 400
|
|
1627
1654
|
#asyncTransfers = False
|
|
1628
1655
|
info = "You have chosen the slowest profile that requires at least 24 GB of RAM and 10 GB of VRAM."
|
|
1629
1656
|
else:
|
|
1630
1657
|
raise Exception("Unknown profile")
|
|
1658
|
+
|
|
1659
|
+
if budgets != None and len(budgets) == 0:
|
|
1660
|
+
budgets = None
|
|
1661
|
+
|
|
1631
1662
|
CrLf = '\r\n'
|
|
1632
1663
|
kwargs = { "pinnedMemory": pinnedMemory, "extraModelsToQuantize" : extraModelsToQuantize, "budgets": budgets, "asyncTransfers" : asyncTransfers, "quantizeTransformer": quantizeTransformer }
|
|
1633
1664
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|