mmgp 1.0.1__tar.gz → 1.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mmgp might be problematic. Click here for more details.
- {mmgp-1.0.1 → mmgp-1.0.3}/PKG-INFO +1 -1
- {mmgp-1.0.1 → mmgp-1.0.3}/pyproject.toml +3 -3
- {mmgp-1.0.1/src/mmgp → mmgp-1.0.3/src}/_version.py +2 -2
- {mmgp-1.0.1 → mmgp-1.0.3}/src/mmgp.egg-info/PKG-INFO +1 -1
- {mmgp-1.0.1 → mmgp-1.0.3}/src/mmgp.egg-info/SOURCES.txt +3 -3
- mmgp-1.0.3/src/mmgp.egg-info/top_level.txt +3 -0
- {mmgp-1.0.1/src/mmgp → mmgp-1.0.3/src}/mmgp.py +13 -10
- mmgp-1.0.1/src/mmgp.egg-info/top_level.txt +0 -1
- {mmgp-1.0.1 → mmgp-1.0.3}/LICENSE.md +0 -0
- {mmgp-1.0.1 → mmgp-1.0.3}/README.md +0 -0
- {mmgp-1.0.1 → mmgp-1.0.3}/setup.cfg +0 -0
- {mmgp-1.0.1/src/mmgp → mmgp-1.0.3/src}/__init__.py +0 -0
- {mmgp-1.0.1 → mmgp-1.0.3}/src/mmgp.egg-info/dependency_links.txt +0 -0
- {mmgp-1.0.1 → mmgp-1.0.3}/src/mmgp.egg-info/requires.txt +0 -0
|
@@ -37,7 +37,7 @@ line-ending = "auto"
|
|
|
37
37
|
skip-magic-trailing-comma = false
|
|
38
38
|
docstring-code-format = true
|
|
39
39
|
exclude = [
|
|
40
|
-
"src/
|
|
40
|
+
"src/_version.py", # generated by setuptools_scm
|
|
41
41
|
]
|
|
42
42
|
|
|
43
43
|
[tool.ruff.lint.isort]
|
|
@@ -68,7 +68,7 @@ sort_first = [
|
|
|
68
68
|
|
|
69
69
|
# needs to be last for CI reasons
|
|
70
70
|
[tool.setuptools_scm]
|
|
71
|
-
write_to = "src/
|
|
71
|
+
write_to = "src/_version.py"
|
|
72
72
|
parentdir_prefix_version = "mmgp-"
|
|
73
|
-
fallback_version = "1.0.
|
|
73
|
+
fallback_version = "1.0.3"
|
|
74
74
|
version_scheme = "post-release"
|
|
@@ -135,7 +135,7 @@ class offload:
|
|
|
135
135
|
else:
|
|
136
136
|
p.data = p.data.cuda(non_blocking=True) #
|
|
137
137
|
# torch.cuda.current_stream().synchronize()
|
|
138
|
-
|
|
138
|
+
@torch.compiler.disable()
|
|
139
139
|
def unload_all(self):
|
|
140
140
|
for model, model_id in zip(self.active_models, self.active_models_ids):
|
|
141
141
|
if not self.pinInRAM:
|
|
@@ -198,7 +198,6 @@ class offload:
|
|
|
198
198
|
# print(f"New cached memory after purge is {torch.cuda.memory_reserved()/1024000:0f} MB) ")
|
|
199
199
|
|
|
200
200
|
def hook_me_light(self, target_module, forceMemoryCheck, previous_method):
|
|
201
|
-
# @torch.compiler.disable()
|
|
202
201
|
def check_empty_cache(module, *args, **kwargs):
|
|
203
202
|
if self.ready_to_check_mem(forceMemoryCheck):
|
|
204
203
|
self.empty_cache_if_needed()
|
|
@@ -208,7 +207,6 @@ class offload:
|
|
|
208
207
|
|
|
209
208
|
|
|
210
209
|
def hook_me(self, target_module, model, model_id, module_id, previous_method):
|
|
211
|
-
@torch.compiler.disable()
|
|
212
210
|
def check_change_module(module, *args, **kwargs):
|
|
213
211
|
performEmptyCacheTest = False
|
|
214
212
|
if not model_id in self.active_models_ids:
|
|
@@ -240,7 +238,7 @@ class offload:
|
|
|
240
238
|
|
|
241
239
|
if module_id == None or module_id =='':
|
|
242
240
|
model_name = model._get_name()
|
|
243
|
-
print(f"Hooked in model {
|
|
241
|
+
print(f"Hooked in model '{model_id}' ({model_name})")
|
|
244
242
|
|
|
245
243
|
|
|
246
244
|
# Not implemented yet, but why would one want to get rid of these features ?
|
|
@@ -258,26 +256,27 @@ class offload:
|
|
|
258
256
|
|
|
259
257
|
|
|
260
258
|
@classmethod
|
|
261
|
-
def all(cls, pipe_or_dict_of_modules, quantizeTransformer = True, pinInRAM = True,
|
|
259
|
+
def all(cls, pipe_or_dict_of_modules, quantizeTransformer = True, pinInRAM = True, verbose = True):
|
|
262
260
|
self = cls()
|
|
263
261
|
self.verbose = verbose
|
|
264
262
|
self.pinned_modules_data = {}
|
|
265
263
|
|
|
264
|
+
# compile not working yet or slower
|
|
265
|
+
compile = False
|
|
266
266
|
self.pinInRAM = pinInRAM
|
|
267
|
-
|
|
267
|
+
pipe = None
|
|
268
268
|
preloadInRAM = True
|
|
269
269
|
torch.set_default_device('cuda')
|
|
270
270
|
if hasattr(pipe_or_dict_of_modules, "components"):
|
|
271
271
|
pipe_or_dict_of_modules.to("cpu") #XXXX
|
|
272
272
|
# create a fake Accelerate parameter so that lora loading doesn't change the device
|
|
273
273
|
pipe_or_dict_of_modules.hf_device_map = torch.device("cuda")
|
|
274
|
+
pipe = pipe_or_dict_of_modules
|
|
274
275
|
pipe_or_dict_of_modules= pipe_or_dict_of_modules.components
|
|
275
276
|
|
|
276
277
|
|
|
277
|
-
|
|
278
278
|
models = {k: v for k, v in pipe_or_dict_of_modules.items() if isinstance(v, torch.nn.Module)}
|
|
279
279
|
|
|
280
|
-
|
|
281
280
|
if quantizeTransformer:
|
|
282
281
|
self.models_to_quantize = ["transformer"]
|
|
283
282
|
# del models["transformer"] # to test everything but the transformer that has a much longer loading
|
|
@@ -389,10 +388,14 @@ class offload:
|
|
|
389
388
|
if verbose:
|
|
390
389
|
print("Torch compilation started")
|
|
391
390
|
torch._dynamo.config.cache_size_limit = 10000
|
|
391
|
+
# if pipe != None and hasattr(pipe, "__call__"):
|
|
392
|
+
# pipe.__call__= torch.compile(pipe.__call__, mode= "max-autotune")
|
|
393
|
+
|
|
392
394
|
for model_id in models:
|
|
393
395
|
current_model: torch.nn.Module = models[model_id]
|
|
394
|
-
current_model.compile()
|
|
395
|
-
#models["transformer"].compile()
|
|
396
|
+
current_model.compile(mode= "max-autotune")
|
|
397
|
+
#models["transformer"].compile()
|
|
398
|
+
|
|
396
399
|
if verbose:
|
|
397
400
|
print("Torch compilation done")
|
|
398
401
|
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
mmgp
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|