mmgp 3.2.2__py3-none-any.whl → 3.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mmgp might be problematic. Click here for more details.
- mmgp/offload.py +62 -50
- {mmgp-3.2.2.dist-info → mmgp-3.2.4.dist-info}/METADATA +4 -4
- mmgp-3.2.4.dist-info/RECORD +9 -0
- mmgp-3.2.2.dist-info/RECORD +0 -9
- {mmgp-3.2.2.dist-info → mmgp-3.2.4.dist-info}/LICENSE.md +0 -0
- {mmgp-3.2.2.dist-info → mmgp-3.2.4.dist-info}/WHEEL +0 -0
- {mmgp-3.2.2.dist-info → mmgp-3.2.4.dist-info}/top_level.txt +0 -0
mmgp/offload.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# ------------------ Memory Management 3.2.
|
|
1
|
+
# ------------------ Memory Management 3.2.4 for the GPU Poor by DeepBeepMeep (mmgp)------------------
|
|
2
2
|
#
|
|
3
3
|
# This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
|
|
4
4
|
# This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
|
|
@@ -479,7 +479,7 @@ def _welcome():
|
|
|
479
479
|
if welcome_displayed:
|
|
480
480
|
return
|
|
481
481
|
welcome_displayed = True
|
|
482
|
-
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.2.
|
|
482
|
+
print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.2.4) by DeepBeepMeep ************{ENDC}{UNBOLD}")
|
|
483
483
|
|
|
484
484
|
def _extract_num_from_str(num_in_str):
|
|
485
485
|
size = len(num_in_str)
|
|
@@ -858,7 +858,7 @@ def _lora_linear_forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor
|
|
|
858
858
|
result = result.to(torch_result_dtype)
|
|
859
859
|
return result
|
|
860
860
|
|
|
861
|
-
def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_loras = True, split_linear_modules_map = None,verboseLevel = -1,):
|
|
861
|
+
def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_loras = True, split_linear_modules_map = None, preprocess_sd = None, verboseLevel = -1,):
|
|
862
862
|
verboseLevel = _compute_verbose_level(verboseLevel)
|
|
863
863
|
|
|
864
864
|
if inject_adapter_in_model == None or set_weights_and_activate_adapters == None or get_peft_kwargs == None:
|
|
@@ -877,7 +877,8 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
|
|
|
877
877
|
adapter_name = str(i)
|
|
878
878
|
|
|
879
879
|
state_dict = safetensors2.torch_load_file(path)
|
|
880
|
-
|
|
880
|
+
if preprocess_sd != None:
|
|
881
|
+
state_dict = preprocess_sd(state_dict)
|
|
881
882
|
|
|
882
883
|
if split_linear_modules_map != None:
|
|
883
884
|
new_state_dict = {}
|
|
@@ -977,7 +978,8 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
|
|
|
977
978
|
# Check only for unexpected keys.
|
|
978
979
|
unexpected_keys = getattr(incompatible_keys, "unexpected_keys", None)
|
|
979
980
|
if unexpected_keys:
|
|
980
|
-
|
|
981
|
+
raise Exception(f"Lora '{path}' contains invalid keys '{unexpected_keys}'")
|
|
982
|
+
|
|
981
983
|
if verboseLevel >=1:
|
|
982
984
|
print(f"Lora '{path}' was loaded in model '{_get_module_name(model)}'")
|
|
983
985
|
if activate_all_loras:
|
|
@@ -1015,7 +1017,7 @@ def move_loras_to_device(model, device="cpu" ):
|
|
|
1015
1017
|
if ".lora_" in k:
|
|
1016
1018
|
m.to(device)
|
|
1017
1019
|
|
|
1018
|
-
def fast_load_transformers_model(model_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, modelClass=None, verboseLevel = -1):
|
|
1020
|
+
def fast_load_transformers_model(model_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, modelClass=None, modelPrefix = None, verboseLevel = -1):
|
|
1019
1021
|
"""
|
|
1020
1022
|
quick version of .LoadfromPretrained of the transformers library
|
|
1021
1023
|
used to build a model and load the corresponding weights (quantized or not)
|
|
@@ -1096,13 +1098,13 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizat
|
|
|
1096
1098
|
|
|
1097
1099
|
model._config = transformer_config
|
|
1098
1100
|
|
|
1099
|
-
load_model_data(model,model_path, do_quantize = do_quantize, quantizationType = quantizationType, pinToMemory= pinToMemory, partialPinning= partialPinning, verboseLevel=verboseLevel )
|
|
1101
|
+
load_model_data(model,model_path, do_quantize = do_quantize, quantizationType = quantizationType, pinToMemory= pinToMemory, partialPinning= partialPinning, modelPrefix = modelPrefix, verboseLevel=verboseLevel )
|
|
1100
1102
|
|
|
1101
1103
|
return model
|
|
1102
1104
|
|
|
1103
1105
|
|
|
1104
1106
|
|
|
1105
|
-
def load_model_data(model, file_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, verboseLevel = -1):
|
|
1107
|
+
def load_model_data(model, file_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, modelPrefix = None, verboseLevel = -1):
|
|
1106
1108
|
"""
|
|
1107
1109
|
Load a model, detect if it has been previously quantized using quanto and do the extra setup if necessary
|
|
1108
1110
|
"""
|
|
@@ -1113,6 +1115,26 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
|
|
|
1113
1115
|
verboseLevel = _compute_verbose_level(verboseLevel)
|
|
1114
1116
|
|
|
1115
1117
|
model = _remove_model_wrapper(model)
|
|
1118
|
+
|
|
1119
|
+
def filter_state_dict(state_dict, base_model_prefix):
|
|
1120
|
+
new_state_dict= {}
|
|
1121
|
+
start = -1
|
|
1122
|
+
for k,v in state_dict.items():
|
|
1123
|
+
if k.startswith(base_model_prefix):
|
|
1124
|
+
|
|
1125
|
+
new_start = len(base_model_prefix)
|
|
1126
|
+
else:
|
|
1127
|
+
pos = k.find("." + base_model_prefix)
|
|
1128
|
+
if pos < 0:
|
|
1129
|
+
continue
|
|
1130
|
+
new_start = pos + len(base_model_prefix) +1
|
|
1131
|
+
if start != -1 and start != new_start:
|
|
1132
|
+
new_state_dict = state_dict
|
|
1133
|
+
break
|
|
1134
|
+
start = new_start
|
|
1135
|
+
new_state_dict[k[ start:]] = v
|
|
1136
|
+
return new_state_dict
|
|
1137
|
+
|
|
1116
1138
|
if not (".safetensors" in file_path or ".sft" in file_path):
|
|
1117
1139
|
if pinToMemory:
|
|
1118
1140
|
raise Exception("Pinning to memory while loading only supported for safe tensors files")
|
|
@@ -1151,6 +1173,11 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
|
|
|
1151
1173
|
quantization_map = json.load(f)
|
|
1152
1174
|
|
|
1153
1175
|
|
|
1176
|
+
# deal if we are trying to load just a sub part of a larger model
|
|
1177
|
+
if modelPrefix != None:
|
|
1178
|
+
base_model_prefix = modelPrefix + "."
|
|
1179
|
+
state_dict = filter_state_dict(state_dict,base_model_prefix)
|
|
1180
|
+
quantization_map = filter_state_dict(quantization_map,base_model_prefix)
|
|
1154
1181
|
|
|
1155
1182
|
if quantization_map is None :
|
|
1156
1183
|
if "quanto" in file_path and not do_quantize:
|
|
@@ -1160,32 +1187,12 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
|
|
|
1160
1187
|
|
|
1161
1188
|
missing_keys , unexpected_keys = model.load_state_dict(state_dict, False, assign = True )
|
|
1162
1189
|
if len(missing_keys) > 0 :
|
|
1163
|
-
# if there is a key mismatch maybe we forgot to remove some prefix
|
|
1164
|
-
if hasattr(model, "base_model_prefix"):
|
|
1165
|
-
base_model_prefix = model.base_model_prefix + "."
|
|
1166
|
-
else:
|
|
1167
|
-
for k,v in state_dict.items():
|
|
1168
|
-
if k.endswith(missing_keys[0]):
|
|
1169
|
-
base_model_prefix = k[:-len(missing_keys[0])]
|
|
1170
|
-
break
|
|
1171
|
-
|
|
1172
|
-
new_state_dict= {}
|
|
1173
|
-
start = -1
|
|
1190
|
+
# if there is a key mismatch maybe we forgot to remove some prefix
|
|
1174
1191
|
for k,v in state_dict.items():
|
|
1175
|
-
if k.
|
|
1176
|
-
|
|
1177
|
-
else:
|
|
1178
|
-
pos = k.find("." + base_model_prefix)
|
|
1179
|
-
if pos < 0:
|
|
1180
|
-
continue
|
|
1181
|
-
new_start = pos + len(base_model_prefix) +1
|
|
1182
|
-
if start != -1 and start != new_start:
|
|
1183
|
-
new_state_dict = state_dict
|
|
1192
|
+
if k.endswith(missing_keys[0]):
|
|
1193
|
+
base_model_prefix = k[:-len(missing_keys[0])]
|
|
1184
1194
|
break
|
|
1185
|
-
|
|
1186
|
-
new_state_dict[k[ start:]] = v
|
|
1187
|
-
state_dict = new_state_dict
|
|
1188
|
-
del new_state_dict
|
|
1195
|
+
state_dict = filter_state_dict(state_dict,base_model_prefix)
|
|
1189
1196
|
missing_keys , unexpected_keys = model.load_state_dict(state_dict, False, assign = True )
|
|
1190
1197
|
del state_dict
|
|
1191
1198
|
|
|
@@ -1354,6 +1361,8 @@ class offload:
|
|
|
1354
1361
|
|
|
1355
1362
|
def add_module_to_blocks(self, model_id, blocks_name, submodule, prev_block_name, submodule_name):
|
|
1356
1363
|
|
|
1364
|
+
if blocks_name!=None and ".lora_" in blocks_name:
|
|
1365
|
+
blocks_name = None
|
|
1357
1366
|
entry_name = model_id if blocks_name is None else model_id + "/" + blocks_name
|
|
1358
1367
|
if entry_name in self.blocks_of_modules:
|
|
1359
1368
|
blocks_params = self.blocks_of_modules[entry_name]
|
|
@@ -1372,7 +1381,6 @@ class offload:
|
|
|
1372
1381
|
lora_name = None
|
|
1373
1382
|
if self.lora_parents.get(submodule, None) != None:
|
|
1374
1383
|
lora_name = str(submodule_name[ submodule_name.rfind(".") + 1: ] )
|
|
1375
|
-
|
|
1376
1384
|
for k,p in submodule.named_parameters(recurse=False):
|
|
1377
1385
|
param_size = 0
|
|
1378
1386
|
ref = _get_tensor_ref(p)
|
|
@@ -1457,11 +1465,10 @@ class offload:
|
|
|
1457
1465
|
if tied_param != None:
|
|
1458
1466
|
setattr( tied_param[0], tied_param[1], q)
|
|
1459
1467
|
del p, q
|
|
1460
|
-
any_past_block = False
|
|
1461
1468
|
|
|
1462
1469
|
loaded_block = self.loaded_blocks[model_id]
|
|
1470
|
+
|
|
1463
1471
|
if not preload and loaded_block != None:
|
|
1464
|
-
any_past_block = True
|
|
1465
1472
|
self.gpu_unload_blocks(model_id, loaded_block)
|
|
1466
1473
|
if self.ready_to_check_mem():
|
|
1467
1474
|
self.empty_cache_if_needed()
|
|
@@ -1475,7 +1482,8 @@ class offload:
|
|
|
1475
1482
|
|
|
1476
1483
|
|
|
1477
1484
|
if self.async_transfers and blocks_name != None:
|
|
1478
|
-
|
|
1485
|
+
prev = self.prev_blocks_names[entry_name]
|
|
1486
|
+
first = prev == None or prev != loaded_block
|
|
1479
1487
|
next_blocks_entry = self.next_blocks_names[entry_name] if entry_name in self.next_blocks_names else None
|
|
1480
1488
|
if first:
|
|
1481
1489
|
if self.verboseLevel >=2:
|
|
@@ -1497,7 +1505,6 @@ class offload:
|
|
|
1497
1505
|
print(f"Loading model {entry_name} ({model_name}) in GPU")
|
|
1498
1506
|
cpu_to_gpu(self.default_stream, self.blocks_of_modules[entry_name])
|
|
1499
1507
|
torch.cuda.synchronize()
|
|
1500
|
-
|
|
1501
1508
|
if not preload:
|
|
1502
1509
|
self.loaded_blocks[model_id] = blocks_name
|
|
1503
1510
|
|
|
@@ -1710,7 +1717,7 @@ class offload:
|
|
|
1710
1717
|
current_budget -= base_size
|
|
1711
1718
|
if current_budget <= 0:
|
|
1712
1719
|
if self.verboseLevel >=1:
|
|
1713
|
-
print(f"Async loading plan for model '{model_id}' :
|
|
1720
|
+
print(f"Async loading plan for model '{model_id}' : minimum budget management, beside the async shuttle only base model ({(base_size)/ONE_MB:0.2f} MB) will be preloaded")
|
|
1714
1721
|
return
|
|
1715
1722
|
|
|
1716
1723
|
towers = []
|
|
@@ -1732,7 +1739,7 @@ class offload:
|
|
|
1732
1739
|
current_budget -= 2 * max_floor_size
|
|
1733
1740
|
if current_budget <= 0:
|
|
1734
1741
|
if self.verboseLevel >=1:
|
|
1735
|
-
print(f"Async loading plan for model '{model_id}' :
|
|
1742
|
+
print(f"Async loading plan for model '{model_id}' : minimum budget management, beside the async shuttle only the base model ({(base_size)/ONE_MB:0.2f} MB) will be preloaded")
|
|
1736
1743
|
return
|
|
1737
1744
|
|
|
1738
1745
|
|
|
@@ -1743,7 +1750,7 @@ class offload:
|
|
|
1743
1750
|
max_blocks_fetch = max(max_floor_size, max_blocks_fetch)
|
|
1744
1751
|
if preload_blocks_count <= 0:
|
|
1745
1752
|
if self.verboseLevel >=1:
|
|
1746
|
-
print(f"Async loading plan for model '{model_id}' :
|
|
1753
|
+
print(f"Async loading plan for model '{model_id}' : minimum budget management, beside the async shuttle only the base model ({(base_size)/ONE_MB:0.2f} MB) will be preloaded")
|
|
1747
1754
|
return
|
|
1748
1755
|
|
|
1749
1756
|
nb_blocks= len(floors)
|
|
@@ -1821,16 +1828,20 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
|
|
|
1821
1828
|
|
|
1822
1829
|
windows_os = os.name == 'nt'
|
|
1823
1830
|
|
|
1831
|
+
def get_parsed_budget(b):
|
|
1832
|
+
if isinstance(b , str) and b.endswith("%"):
|
|
1833
|
+
return float(b[:-1]) * self.device_mem_capacity
|
|
1834
|
+
else:
|
|
1835
|
+
return b * ONE_MB
|
|
1836
|
+
|
|
1824
1837
|
budget = 0
|
|
1825
1838
|
if not budgets is None:
|
|
1826
1839
|
if isinstance(budgets , dict):
|
|
1827
|
-
model_budgets = budgets
|
|
1828
|
-
budget =
|
|
1840
|
+
model_budgets = { k : get_parsed_budget(b) for k , b in budgets.items() }
|
|
1841
|
+
budget = model_budgets.get("*", 0)
|
|
1829
1842
|
else:
|
|
1830
|
-
budget =
|
|
1843
|
+
budget = get_parsed_budget(budget)
|
|
1831
1844
|
|
|
1832
|
-
# if (budgets!= None or budget >0) :
|
|
1833
|
-
# self.async_transfers = True
|
|
1834
1845
|
self.async_transfers = asyncTransfers
|
|
1835
1846
|
|
|
1836
1847
|
|
|
@@ -1938,18 +1949,19 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, quantizeTransformer = Tru
|
|
|
1938
1949
|
estimatesBytesToPin += current_model_size
|
|
1939
1950
|
|
|
1940
1951
|
|
|
1941
|
-
model_budget = model_budgets[model_id]
|
|
1952
|
+
model_budget = model_budgets[model_id] if model_id in model_budgets else budget
|
|
1942
1953
|
if workingVRAM != None:
|
|
1943
1954
|
model_minimumVRAM = -1
|
|
1944
1955
|
if isinstance(workingVRAM, dict):
|
|
1945
1956
|
if model_id in workingVRAM:
|
|
1946
|
-
model_minimumVRAM = workingVRAM[model_id]
|
|
1957
|
+
model_minimumVRAM = get_parsed_budget(workingVRAM[model_id])
|
|
1947
1958
|
elif "*" in model_id in workingVRAM:
|
|
1948
|
-
model_minimumVRAM = workingVRAM["*"]
|
|
1959
|
+
model_minimumVRAM = get_parsed_budget(workingVRAM["*"])
|
|
1949
1960
|
else:
|
|
1950
|
-
model_minimumVRAM = workingVRAM
|
|
1961
|
+
model_minimumVRAM = get_parsed_budget(workingVRAM)
|
|
1962
|
+
|
|
1951
1963
|
if model_minimumVRAM > 0:
|
|
1952
|
-
new_budget = self.device_mem_capacity - model_minimumVRAM
|
|
1964
|
+
new_budget = self.device_mem_capacity - model_minimumVRAM
|
|
1953
1965
|
new_budget = 1 if new_budget < 0 else new_budget
|
|
1954
1966
|
model_budget = new_budget if model_budget == 0 or new_budget < model_budget else model_budget
|
|
1955
1967
|
if model_budget > 0 and model_budget > current_model_size:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: mmgp
|
|
3
|
-
Version: 3.2.
|
|
3
|
+
Version: 3.2.4
|
|
4
4
|
Summary: Memory Management for the GPU Poor
|
|
5
5
|
Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
|
|
6
6
|
License: GNU GENERAL PUBLIC LICENSE
|
|
@@ -17,7 +17,7 @@ Requires-Dist: peft
|
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
<p align="center">
|
|
20
|
-
<H2>Memory Management 3.2.
|
|
20
|
+
<H2>Memory Management 3.2.4 for the GPU Poor by DeepBeepMeep</H2>
|
|
21
21
|
</p>
|
|
22
22
|
|
|
23
23
|
|
|
@@ -119,9 +119,9 @@ For example:
|
|
|
119
119
|
- pinnedMemory: Boolean (for all models) or List of models ids to pin to RAM. Every model pinned to RAM will load much faster (up to 2 times) but this requires more RAM
|
|
120
120
|
- quantizeTransformer: boolean by default True. The 'transformer' model in the pipe contains usually the video or image generator is by defaut; quantized on the fly by default to 8 bits. If you want to save time on disk and reduce the loading time, you may want to load directly a prequantized model. If you don't want to quantize the image generator, you need to set the option *quantizeTransformer* to *False* to turn off on the fly quantization.
|
|
121
121
|
- extraModelsToQuantize: list of additional modelids of models to quantize on the fly. If the corresponding model is already quantized, this option will be ignored.
|
|
122
|
-
- budgets: either a number in mega bytes
|
|
122
|
+
- budgets: either a number in mega bytes, (for all models, if 0 unlimited budget) a string that is perecentage of the total VRAM or a dictionary that maps model ids to mega bytes : define the approximate budget in mega bytes that is allocated in VRAM for a model. Try not to allocate all the available VRAM so that the rest can be used to process the data. To define the default value in the dictionary, you may add entry named "*".
|
|
123
123
|
The smaller this number, the more VRAM left for image data / longer video but also the slower because there will be lots of loading / unloading between the RAM and the VRAM. If model is too big to fit in a budget, it will be broken down in multiples parts that will be unloaded / loaded consequently. The speed of low budget can be increased (up to 2 times) by turning on the options pinnedMemory and asyncTransfers.
|
|
124
|
-
- workingVRAM: either a number in mega bytes or a dictionary that maps a model ids to a number in mega bytes that corresponds to a minimum amount of VRAM that should be left for the data processed by the model. This number will prevail if it is in conflict with a too high budget defined for the same model.
|
|
124
|
+
- workingVRAM: either a number in mega bytes, a string that is perecentage of the total VRAM or a dictionary that maps a model ids to a number in mega bytes that corresponds to a minimum amount of VRAM that should be left for the data processed by the model. This number will prevail if it is in conflict with a too high budget defined for the same model.
|
|
125
125
|
- asyncTransfers: boolean, load to the GPU the next model part while the current part is being processed. This requires twice the budget if any is defined. This may increase speed by 20% (mostly visible on fast modern GPUs).
|
|
126
126
|
- verboseLevel: number between 0 and 2 (1 by default), provides various level of feedback of the different processes
|
|
127
127
|
- compile: list of model ids to compile, may accelerate up x2 depending on the type of GPU. It makes sense to compile only the model that is frequently used such as the "transformer" model in the case of video or image generation. Compilation requires Triton to be installed. Triton is available out of the box on Linux or WSL but requires to be installed with Windows: https://github.com/woct0rdho/triton-windows
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
+
mmgp/offload.py,sha256=vGxgCcWV8PQQ4JjSlYFOX57Mr9RLlvPBMOOj3f63qL4,96389
|
|
4
|
+
mmgp/safetensors2.py,sha256=DCdlRH3769CTyraAmWAB3b0XrVua7z6ygQ-OyKgJN6A,16453
|
|
5
|
+
mmgp-3.2.4.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
|
|
6
|
+
mmgp-3.2.4.dist-info/METADATA,sha256=UGZ7ADvrhU5P0hS7gFgu8SHpEnzzpEgE3Ionk-I7ckw,16151
|
|
7
|
+
mmgp-3.2.4.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
|
8
|
+
mmgp-3.2.4.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
9
|
+
mmgp-3.2.4.dist-info/RECORD,,
|
mmgp-3.2.2.dist-info/RECORD
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
-
mmgp/offload.py,sha256=hzirru31j78E88OIT38GJ46iMvddEFM2c3_CCn4N4K4,95676
|
|
4
|
-
mmgp/safetensors2.py,sha256=DCdlRH3769CTyraAmWAB3b0XrVua7z6ygQ-OyKgJN6A,16453
|
|
5
|
-
mmgp-3.2.2.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
|
|
6
|
-
mmgp-3.2.2.dist-info/METADATA,sha256=hTjAL-soDwYbUlnD1Om7kefG8D4vaXUTjsHoQDikVQA,16054
|
|
7
|
-
mmgp-3.2.2.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
|
8
|
-
mmgp-3.2.2.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
9
|
-
mmgp-3.2.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|