mmgp 3.1.1__py3-none-any.whl → 3.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mmgp might be problematic. Click here for more details.
- mmgp/offload.py +62 -24
- {mmgp-3.1.1.dist-info → mmgp-3.1.3.dist-info}/METADATA +1 -1
- mmgp-3.1.3.dist-info/RECORD +9 -0
- mmgp-3.1.1.dist-info/RECORD +0 -9
- {mmgp-3.1.1.dist-info → mmgp-3.1.3.dist-info}/LICENSE.md +0 -0
- {mmgp-3.1.1.dist-info → mmgp-3.1.3.dist-info}/WHEEL +0 -0
- {mmgp-3.1.1.dist-info → mmgp-3.1.3.dist-info}/top_level.txt +0 -0
mmgp/offload.py
CHANGED
|
@@ -576,7 +576,7 @@ def _quantize(model_to_quantize, weights=qint8, verboseLevel = 1, threshold = 10
|
|
|
576
576
|
if hasattr(model_to_quantize, "_quanto_map"):
|
|
577
577
|
for k, entry in model_to_quantize._quanto_map.items():
|
|
578
578
|
weights = entry["weights"]
|
|
579
|
-
print(f"Model '{model_id}' is already quantized
|
|
579
|
+
print(f"Model '{model_id}' is already quantized to format '{weights}'")
|
|
580
580
|
return False
|
|
581
581
|
print(f"Model '{model_id}' is already quantized")
|
|
582
582
|
return False
|
|
@@ -680,7 +680,7 @@ def _quantize(model_to_quantize, weights=qint8, verboseLevel = 1, threshold = 10
|
|
|
680
680
|
|
|
681
681
|
return True
|
|
682
682
|
|
|
683
|
-
def load_loras_into_model(model, lora_path, lora_multi = None, verboseLevel = -1):
|
|
683
|
+
def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_loras = True, verboseLevel = -1,):
|
|
684
684
|
verboseLevel = _compute_verbose_level(verboseLevel)
|
|
685
685
|
|
|
686
686
|
if inject_adapter_in_model == None or set_weights_and_activate_adapters == None or get_peft_kwargs == None:
|
|
@@ -731,9 +731,6 @@ def load_loras_into_model(model, lora_path, lora_multi = None, verboseLevel = -1
|
|
|
731
731
|
|
|
732
732
|
# is_correct_format = all("lora" in key for key in state_dict.keys())
|
|
733
733
|
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
734
|
# check with first key if is not in peft format
|
|
738
735
|
# first_key = next(iter(state_dict.keys()))
|
|
739
736
|
# if "lora_A" not in first_key:
|
|
@@ -770,7 +767,17 @@ def load_loras_into_model(model, lora_path, lora_multi = None, verboseLevel = -1
|
|
|
770
767
|
pass
|
|
771
768
|
if verboseLevel >=1:
|
|
772
769
|
print(f"Lora '{path}' was loaded in model '{_get_module_name(model)}'")
|
|
773
|
-
|
|
770
|
+
if activate_all_loras:
|
|
771
|
+
set_weights_and_activate_adapters(model,[ str(i) for i in range(len(lora_multi))], lora_multi)
|
|
772
|
+
|
|
773
|
+
def activate_loras(model, lora_nos, lora_multi = None ):
|
|
774
|
+
if not isinstance(lora_nos, list):
|
|
775
|
+
lora_nos = [lora_nos]
|
|
776
|
+
lora_nos = [str(l) for l in lora_nos]
|
|
777
|
+
if lora_multi is None:
|
|
778
|
+
lora_multi = [1. for _ in lora_nos]
|
|
779
|
+
set_weights_and_activate_adapters(model, lora_nos, lora_multi)
|
|
780
|
+
|
|
774
781
|
|
|
775
782
|
def move_loras_to_device(model, device="cpu" ):
|
|
776
783
|
if hasattr( model, "_lora_loadable_modules"):
|
|
@@ -979,14 +986,13 @@ class offload:
|
|
|
979
986
|
self.blocks_of_modules[entry_name] = blocks_params
|
|
980
987
|
blocks_params_size = 0
|
|
981
988
|
if blocks_name !=None:
|
|
982
|
-
|
|
983
989
|
prev_entry_name = None if prev_block_name == None else model_id + "/" + prev_block_name
|
|
984
990
|
self.prev_blocks_names[entry_name] = prev_entry_name
|
|
985
991
|
if not prev_block_name == None:
|
|
986
992
|
self.next_blocks_names[prev_entry_name] = entry_name
|
|
987
993
|
|
|
988
|
-
|
|
989
994
|
for k,p in submodule.named_parameters(recurse=False):
|
|
995
|
+
|
|
990
996
|
if isinstance(p, QTensor):
|
|
991
997
|
blocks_params.append( (submodule, k, p, False ) )
|
|
992
998
|
|
|
@@ -1268,7 +1274,7 @@ class offload:
|
|
|
1268
1274
|
|
|
1269
1275
|
if module_id == None or module_id =='':
|
|
1270
1276
|
model_name = model._get_name()
|
|
1271
|
-
print(f"Hooked
|
|
1277
|
+
print(f"Hooked to model '{model_id}' ({model_name})")
|
|
1272
1278
|
|
|
1273
1279
|
|
|
1274
1280
|
def save_model(model, file_path, do_quantize = False, quantizationType = qint8, verboseLevel = -1, config_file_path = None ):
|
|
@@ -1311,7 +1317,29 @@ def save_model(model, file_path, do_quantize = False, quantizationType = qint8,
|
|
|
1311
1317
|
print(f"File '{file_path}' saved")
|
|
1312
1318
|
|
|
1313
1319
|
|
|
1320
|
+
def extract_models(prefix, obj):
|
|
1321
|
+
pipe = {}
|
|
1322
|
+
for name in dir(obj):
|
|
1323
|
+
element = getattr(obj,name)
|
|
1324
|
+
if name in ("pipeline", "pipe"):
|
|
1325
|
+
pipeline = element
|
|
1326
|
+
if hasattr(pipeline , "components") and isinstance(pipeline.components, dict):
|
|
1327
|
+
for k, model in pipeline.components.items():
|
|
1328
|
+
if model != None:
|
|
1329
|
+
pipe[prefix + "/" + k ] = model
|
|
1330
|
+
elif isinstance(element, torch.nn.Module):
|
|
1331
|
+
if prefix + "/" + name in pipe:
|
|
1332
|
+
pipe[prefix + "/_" + name ] = element
|
|
1333
|
+
else:
|
|
1334
|
+
pipe[prefix + "/" + name ] = element
|
|
1335
|
+
elif isinstance(element, dict):
|
|
1336
|
+
for k, element in element.items():
|
|
1337
|
+
if hasattr(element , "pipeline"):
|
|
1338
|
+
pipe.update( extract_models(prefix + "/" + k,element ))
|
|
1339
|
+
|
|
1314
1340
|
|
|
1341
|
+
return pipe
|
|
1342
|
+
|
|
1315
1343
|
|
|
1316
1344
|
def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = True, extraModelsToQuantize = None, quantizationType = qint8, budgets= 0, asyncTransfers = True, compile = False, perc_reserved_mem_max = 0, verboseLevel = -1):
|
|
1317
1345
|
"""Hook to a pipeline or a group of modules in order to reduce their VRAM requirements:
|
|
@@ -1337,6 +1365,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
|
|
|
1337
1365
|
if not budgets is None:
|
|
1338
1366
|
if isinstance(budgets , dict):
|
|
1339
1367
|
model_budgets = budgets
|
|
1368
|
+
budget = budgets.get("*", 0) * ONE_MB
|
|
1340
1369
|
else:
|
|
1341
1370
|
budget = int(budgets) * ONE_MB
|
|
1342
1371
|
|
|
@@ -1451,7 +1480,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
|
|
|
1451
1480
|
if model_budget > 0 and model_budget > current_model_size:
|
|
1452
1481
|
model_budget = 0
|
|
1453
1482
|
|
|
1454
|
-
model_budgets[model_id] = model_budget
|
|
1483
|
+
model_budgets[model_id] = model_budget #/ 2 if asyncTransfers else model_budget
|
|
1455
1484
|
|
|
1456
1485
|
partialPinning = False
|
|
1457
1486
|
|
|
@@ -1495,10 +1524,11 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
|
|
|
1495
1524
|
if not hasattr(submodule, "_hf_hook"):
|
|
1496
1525
|
setattr(submodule, "_hf_hook", HfHook())
|
|
1497
1526
|
|
|
1498
|
-
if submodule_name=='':
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1527
|
+
# if submodule_name=='':
|
|
1528
|
+
# continue
|
|
1529
|
+
|
|
1530
|
+
|
|
1531
|
+
if current_budget > 0 and len(submodule_name) > 0:
|
|
1502
1532
|
if cur_blocks_prefix != None:
|
|
1503
1533
|
if submodule_name.startswith(cur_blocks_prefix):
|
|
1504
1534
|
depth_prefix = cur_blocks_prefix.split(".")
|
|
@@ -1508,7 +1538,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
|
|
|
1508
1538
|
if num != cur_blocks_seq and (cur_blocks_seq == -1 or current_size > current_budget):
|
|
1509
1539
|
prev_blocks_name = cur_blocks_name
|
|
1510
1540
|
cur_blocks_name = cur_blocks_prefix + str(num)
|
|
1511
|
-
|
|
1541
|
+
print(f"new block: {model_id}/{cur_blocks_name} - {submodule_name}")
|
|
1512
1542
|
cur_blocks_seq = num
|
|
1513
1543
|
else:
|
|
1514
1544
|
cur_blocks_prefix, prev_blocks_name, cur_blocks_name,cur_blocks_seq = None, None, None, -1
|
|
@@ -1520,7 +1550,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
|
|
|
1520
1550
|
elif num >=0:
|
|
1521
1551
|
cur_blocks_prefix, prev_blocks_name, cur_blocks_seq = pre, None, num
|
|
1522
1552
|
cur_blocks_name = submodule_name
|
|
1523
|
-
|
|
1553
|
+
print(f"new block: {model_id}/{cur_blocks_name} - {submodule_name}")
|
|
1524
1554
|
|
|
1525
1555
|
|
|
1526
1556
|
if hasattr(submodule, "forward"):
|
|
@@ -1590,37 +1620,45 @@ def profile(pipe_or_dict_of_modules, profile_no: profile_type = profile_type.Ve
|
|
|
1590
1620
|
# transformer (video or image generator) should be as small as possible not to occupy space that could be used by actual image data
|
|
1591
1621
|
# on the other hand the text encoder should be quite large (as long as it fits in 10 GB of VRAM) to reduce sequence offloading
|
|
1592
1622
|
|
|
1593
|
-
|
|
1623
|
+
budgets = {}
|
|
1624
|
+
if "transformer" in modules:
|
|
1625
|
+
budgets["transformer"] = 1200
|
|
1626
|
+
|
|
1594
1627
|
extraModelsToQuantize = None
|
|
1595
1628
|
asyncTransfers = True
|
|
1596
|
-
budgets = None
|
|
1597
1629
|
|
|
1598
1630
|
if profile_no == profile_type.HighRAM_HighVRAM:
|
|
1599
1631
|
pinnedMemory= True
|
|
1600
1632
|
budgets = None
|
|
1601
|
-
info = "You have chosen a profile that requires at least 48 GB of RAM and 24 GB of VRAM. Some VRAM is
|
|
1633
|
+
info = "You have chosen a profile that requires at least 48 GB of RAM and 24 GB of VRAM. Some VRAM is consumed just to make the model runs faster."
|
|
1602
1634
|
elif profile_no == profile_type.HighRAM_LowVRAM:
|
|
1603
1635
|
pinnedMemory= True
|
|
1604
|
-
budgets =
|
|
1636
|
+
budgets["*"] = 3000
|
|
1605
1637
|
info = "You have chosen a profile that requires at least 48 GB of RAM and 12 GB of VRAM. Some RAM is consumed to reduce VRAM consumption."
|
|
1606
1638
|
elif profile_no == profile_type.LowRAM_HighVRAM:
|
|
1607
1639
|
pinnedMemory= "transformer"
|
|
1608
1640
|
extraModelsToQuantize = default_extraModelsToQuantize
|
|
1641
|
+
budgets = None
|
|
1609
1642
|
info = "You have chosen a Medium speed profile that requires at least 32 GB of RAM and 24 GB of VRAM. Some VRAM is consuming just to make the model runs faster"
|
|
1610
1643
|
elif profile_no == profile_type.LowRAM_LowVRAM:
|
|
1611
1644
|
pinnedMemory= "transformer"
|
|
1612
1645
|
extraModelsToQuantize = default_extraModelsToQuantize
|
|
1613
|
-
budgets=
|
|
1646
|
+
budgets["*"] = 3000
|
|
1614
1647
|
info = "You have chosen a profile that requires at least 32 GB of RAM and 12 GB of VRAM. Some RAM is consumed to reduce VRAM consumption. "
|
|
1615
1648
|
elif profile_no == profile_type.VerylowRAM_LowVRAM:
|
|
1616
1649
|
pinnedMemory= False
|
|
1617
1650
|
extraModelsToQuantize = default_extraModelsToQuantize
|
|
1618
|
-
budgets=
|
|
1619
|
-
|
|
1620
|
-
|
|
1651
|
+
budgets["*"] = 3000
|
|
1652
|
+
if "transformer" in modules:
|
|
1653
|
+
budgets["transformer"] = 400
|
|
1654
|
+
#asyncTransfers = False
|
|
1621
1655
|
info = "You have chosen the slowest profile that requires at least 24 GB of RAM and 10 GB of VRAM."
|
|
1622
1656
|
else:
|
|
1623
1657
|
raise Exception("Unknown profile")
|
|
1658
|
+
|
|
1659
|
+
if budgets != None and len(budgets) == 0:
|
|
1660
|
+
budgets = None
|
|
1661
|
+
|
|
1624
1662
|
CrLf = '\r\n'
|
|
1625
1663
|
kwargs = { "pinnedMemory": pinnedMemory, "extraModelsToQuantize" : extraModelsToQuantize, "budgets": budgets, "asyncTransfers" : asyncTransfers, "quantizeTransformer": quantizeTransformer }
|
|
1626
1664
|
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
+
mmgp/offload.py,sha256=JB40Ky84Njhuf2BauLvNhH_-IS_27lhfYuLqVVhmJtA,71080
|
|
4
|
+
mmgp/safetensors2.py,sha256=OkJAvENfWeb-PL0FcxS1-eYeHLbemTaNXYvNxURrzIs,16154
|
|
5
|
+
mmgp-3.1.3.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
|
|
6
|
+
mmgp-3.1.3.dist-info/METADATA,sha256=pfkzWdQKY-7wNEMN66pwUPxfmXDGZSjJpBwvYolUDb4,12708
|
|
7
|
+
mmgp-3.1.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
8
|
+
mmgp-3.1.3.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
9
|
+
mmgp-3.1.3.dist-info/RECORD,,
|
mmgp-3.1.1.dist-info/RECORD
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
-
mmgp/offload.py,sha256=h74eKyWVZmDM--l4KbiZYXdpkcGM8ySUgyvkFtFRtNQ,69593
|
|
4
|
-
mmgp/safetensors2.py,sha256=OkJAvENfWeb-PL0FcxS1-eYeHLbemTaNXYvNxURrzIs,16154
|
|
5
|
-
mmgp-3.1.1.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
|
|
6
|
-
mmgp-3.1.1.dist-info/METADATA,sha256=wtHNzulNFaWmruVO4cGgcRuIIN2eHPHo47nkgGMOWqw,12708
|
|
7
|
-
mmgp-3.1.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
8
|
-
mmgp-3.1.1.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
9
|
-
mmgp-3.1.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|