mmgp 3.5.1__py3-none-any.whl → 3.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mmgp might be problematic. Click here for more details.

mmgp/offload.py CHANGED
@@ -1,4 +1,4 @@
1
- # ------------------ Memory Management 3.5.1 for the GPU Poor by DeepBeepMeep (mmgp)------------------
1
+ # ------------------ Memory Management 3.5.3 for the GPU Poor by DeepBeepMeep (mmgp)------------------
2
2
  #
3
3
  # This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
4
4
  # This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
@@ -253,17 +253,17 @@ def _remove_model_wrapper(model):
253
253
  def _move_to_pinned_tensor(source_tensor, big_tensor, offset, length):
254
254
  dtype= source_tensor.dtype
255
255
  shape = source_tensor.shape
256
- if len(shape) == 0:
257
- return source_tensor
258
- else:
256
+ if len(shape) > 0 :
259
257
  t = source_tensor.view(torch.uint8)
260
258
  t = torch.reshape(t, (length,))
261
- # magic swap !
262
- big_tensor[offset: offset + length] = t
263
- t = big_tensor[offset: offset + length]
264
- t = t.view(dtype)
265
- t = torch.reshape(t, shape)
266
- assert t.is_pinned()
259
+ else:
260
+ t = source_tensor
261
+ # magic swap !
262
+ big_tensor[offset: offset + length] = t
263
+ t = big_tensor[offset: offset + length]
264
+ t = t.view(dtype)
265
+ t = torch.reshape(t, shape)
266
+ assert t.is_pinned()
267
267
  return t
268
268
 
269
269
  def _safetensors_load_file(file_path, writable_tensors = True):
@@ -336,9 +336,8 @@ def _pin_sd_to_memory(sd, sd_name, tied_weights = None, gig_tensor_size = BIG_TE
336
336
  names_list = sd_name if isinstance(sd, list) else [sd_name]
337
337
 
338
338
  if max_pinnable_bytes > 0 and total_pinned_bytes >= max_pinnable_bytes:
339
-
340
339
  if verboseLevel>=1 :
341
- print(f"Unable pin data of '{','.join(names_list)}' to reserved RAM as there is no reserved RAM left")
340
+ print(f"Unable to pin data of '{','.join(names_list)}' to reserved RAM as there is no reserved RAM left. Transfer speed from RAM to VRAM will may be slower.")
342
341
  return
343
342
 
344
343
 
@@ -404,7 +403,7 @@ def _pin_sd_to_memory(sd, sd_name, tied_weights = None, gig_tensor_size = BIG_TE
404
403
  big_tensors.append(current_big_tensor)
405
404
  except:
406
405
  incomplete_pinning = True
407
- print(f"Unable to pin more tensors for '{sd_name}' as the maximum reservable memory has been reached ({total/ONE_MB:.2f})")
406
+ print(f"Unable to pin more tensors for '{sd_name}' as the maximum reservable memory has been reached ({total/ONE_MB:.2f}). Transfer speed from RAM to VRAM may be slower.")
408
407
  break
409
408
 
410
409
  last_big_tensor += 1
@@ -442,12 +441,12 @@ def _pin_sd_to_memory(sd, sd_name, tied_weights = None, gig_tensor_size = BIG_TE
442
441
 
443
442
  if verboseLevel >=1:
444
443
  if incomplete_pinning :
445
- if len(names_list) > 0:
444
+ if len(names_list) > 1:
446
445
  print(f"'{','.join(names_list)}' were partially pinned to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
447
446
  else:
448
447
  print(f"'{','.join(names_list)}' was partially pinned to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
449
448
  else:
450
- if len(names_list) > 0:
449
+ if len(names_list) > 1:
451
450
  print(f"'{','.join(names_list)}' were pinned entirely to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
452
451
  else:
453
452
  print(f"'{','.join(names_list)}' was pinned entirely to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
@@ -462,7 +461,7 @@ def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = Tru
462
461
  if max_pinnable_bytes > 0 and total_pinned_bytes >= max_pinnable_bytes:
463
462
 
464
463
  if verboseLevel>=1 :
465
- print(f"Unable pin data of '{model_id}' to reserved RAM as there is no reserved RAM left")
464
+ print(f"Unable to pin data of '{model_id}' to reserved RAM as there is no reserved RAM left. Transfer speed from RAM to VRAM may be slower.")
466
465
  return
467
466
 
468
467
  if partialPinning:
@@ -499,7 +498,7 @@ def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = Tru
499
498
  else:
500
499
  print(f"Pinning data of '{model_id}' to reserved RAM")
501
500
 
502
- if partialPinning and len(params_dict) == 0:
501
+ if len(params_dict) == 0:
503
502
  return
504
503
 
505
504
  ref_cache = {}
@@ -521,13 +520,22 @@ def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = Tru
521
520
  else:
522
521
  if isinstance(p, QTensor):
523
522
  if p._qtype == qint4:
523
+ if p._data._data.is_pinned():
524
+ params_dict[n] = (None, False)
525
+ continue
524
526
  if hasattr(p,"_scale_shift"):
525
527
  length = torch.numel(p._data._data) * p._data._data.element_size() + torch.numel(p._scale_shift) * p._scale_shift.element_size()
526
528
  else:
527
529
  length = torch.numel(p._data._data) * p._data._data.element_size() + torch.numel(p._scale) * p._scale.element_size() + torch.numel(p._shift) * p._shift.element_size()
528
530
  else:
529
531
  length = torch.numel(p._data) * p._data.element_size() + torch.numel(p._scale) * p._scale.element_size()
532
+ if p._data.is_pinned():
533
+ params_dict[n] = (None, False)
534
+ continue
530
535
  else:
536
+ if p.data.is_pinned():
537
+ params_dict[n] = (None, False)
538
+ continue
531
539
  length = torch.numel(p.data) * p.data.element_size()
532
540
 
533
541
  ref_cache[ref] = (n, length)
@@ -544,7 +552,7 @@ def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = Tru
544
552
  current_big_tensor_size += length
545
553
 
546
554
  total_tensor_bytes += length
547
-
555
+ p = None
548
556
  if verboseLevel >=1 and tied_weights_count > 0:
549
557
  if tied_weights_count == 1:
550
558
  print(f"Tied weights of {tied_weights_total/ONE_MB:0.2f} MB detected: {tied_weights_last}")
@@ -570,6 +578,7 @@ def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = Tru
570
578
  tensor_no = 0
571
579
  # prev_big_tensor = 0
572
580
  for n, (p, is_buffer) in params_dict.items():
581
+ if p is None: continue
573
582
  q_name = tied_weights.get(n,None)
574
583
  if q_name != None:
575
584
  q , _ = params_dict[q_name]
@@ -658,7 +667,7 @@ def _welcome():
658
667
  if welcome_displayed:
659
668
  return
660
669
  welcome_displayed = True
661
- print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.5.1) by DeepBeepMeep ************{ENDC}{UNBOLD}")
670
+ print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.5.3) by DeepBeepMeep ************{ENDC}{UNBOLD}")
662
671
 
663
672
  def change_dtype(model, new_dtype, exclude_buffers = False):
664
673
  for submodule_name, submodule in model.named_modules():
@@ -1220,9 +1229,29 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
1220
1229
  activate_loras(model, loras_nos, loras_multi)
1221
1230
  return new_lora_path
1222
1231
 
1232
+
1233
+ def merge_dicts(A, B):
1234
+ for key, value in A.items():
1235
+ if isinstance(value, dict):
1236
+ if key not in B or not isinstance(B[key], dict):
1237
+ B[key] = value # Copy entire dict reference from A
1238
+ else:
1239
+ merge_dicts(value, B[key]) # Recurse into both dicts
1240
+ else:
1241
+ B[key] = value # Copy non-dict value from A to B
1242
+
1243
+
1244
+ def sync_models_loras(model, model2):
1245
+ merge_dicts(model._loras_model_shortcuts , model2._loras_model_shortcuts)
1246
+ model2._loras_active_adapters = model._loras_active_adapters
1247
+ model2._loras_adapters = model._loras_adapters
1248
+ model2._loras_scaling = model._loras_scaling
1249
+
1223
1250
  def unload_loras_from_model(model):
1224
1251
  for _, v in model._loras_model_data.items():
1225
1252
  v.clear()
1253
+ for _, v in model._loras_model_shortcuts.items():
1254
+ v.clear()
1226
1255
 
1227
1256
  model._loras_active_adapters = []
1228
1257
  model._loras_scaling = dict()
@@ -1262,7 +1291,7 @@ def move_loras_to_device(model, device="cpu" ):
1262
1291
  if ".lora_" in k:
1263
1292
  m.to(device)
1264
1293
 
1265
- def fast_load_transformers_model(model_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, defaultConfigPath = None, modelClass=None, modelPrefix = None, writable_tensors = True, verboseLevel = -1, configKwargs ={}):
1294
+ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, defaultConfigPath = None, modelClass=None, modelPrefix = None, writable_tensors = True, verboseLevel = -1, modules = None, return_shared_modules = None, configKwargs ={}):
1266
1295
  """
1267
1296
  quick version of .LoadfromPretrained of the transformers library
1268
1297
  used to build a model and load the corresponding weights (quantized or not)
@@ -1331,42 +1360,36 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizat
1331
1360
  model = transfomer_class(config_obj)
1332
1361
 
1333
1362
 
1334
- elif "_class_name" in transformer_config:
1335
- class_name = transformer_config["_class_name"]
1336
-
1363
+ else:
1337
1364
  if modelClass !=None:
1338
1365
  transfomer_class = modelClass
1339
- else:
1366
+ elif "_class_name" in transformer_config:
1367
+ class_name = 'Transformer3DModel'
1340
1368
  module = __import__("diffusers")
1341
1369
  transfomer_class = getattr(module, class_name)
1370
+ else:
1371
+ raise Exception("class not defined")
1342
1372
 
1343
1373
  with init_empty_weights():
1344
1374
  model = transfomer_class.from_config(transformer_config )
1345
1375
 
1346
1376
 
1347
1377
  torch.set_default_device('cpu')
1378
+ model.eval().requires_grad_(False)
1348
1379
 
1349
1380
  model._config = transformer_config
1350
1381
 
1351
- load_model_data(model,model_path, do_quantize = do_quantize, quantizationType = quantizationType, pinToMemory= pinToMemory, partialPinning= partialPinning, modelPrefix = modelPrefix, writable_tensors =writable_tensors ,verboseLevel=verboseLevel )
1382
+ load_model_data(model,model_path, do_quantize = do_quantize, quantizationType = quantizationType, pinToMemory= pinToMemory, partialPinning= partialPinning, modelPrefix = modelPrefix, writable_tensors =writable_tensors, modules = modules, return_shared_modules = return_shared_modules, verboseLevel=verboseLevel )
1352
1383
 
1353
1384
  return model
1354
1385
 
1355
1386
 
1356
1387
 
1357
- def load_model_data(model, file_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, modelPrefix = None, writable_tensors = True, verboseLevel = -1):
1388
+ def load_model_data(model, file_path, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, modelPrefix = None, writable_tensors = True, modules = None, return_shared_modules = None, verboseLevel = -1):
1358
1389
  """
1359
1390
  Load a model, detect if it has been previously quantized using quanto and do the extra setup if necessary
1360
1391
  """
1361
- if not isinstance(file_path, list):
1362
- file_path = [file_path]
1363
1392
 
1364
- file_path = [ _get_model(file) for file in file_path]
1365
- if any( file == None for file in file_path):
1366
- raise Exception("Unable to find file")
1367
- verboseLevel = _compute_verbose_level(verboseLevel)
1368
-
1369
- model = _remove_model_wrapper(model)
1370
1393
 
1371
1394
  def filter_state_dict(state_dict, base_model_prefix):
1372
1395
  new_state_dict= {}
@@ -1387,10 +1410,34 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
1387
1410
  new_state_dict[k[ start:]] = v
1388
1411
  return new_state_dict
1389
1412
 
1413
+
1414
+
1415
+ if not isinstance(file_path, list):
1416
+ file_path = [file_path]
1417
+
1418
+ file_count = len(file_path)
1419
+ if isinstance(modules, (list,str)):
1420
+ if isinstance(modules, str): modules = [modules]
1421
+ file_path += modules
1422
+ modules = None
1423
+
1424
+ file_path = [ _get_model(file) for file in file_path]
1425
+ if any( file == None for file in file_path):
1426
+ raise Exception("Unable to find file")
1427
+ verboseLevel = _compute_verbose_level(verboseLevel)
1428
+
1429
+ model = _remove_model_wrapper(model)
1430
+
1431
+ if return_shared_modules is not None:
1432
+ return_state_dict ={}
1433
+ return_quantization_map ={}
1434
+ return_shared_modules["state_dict"] = return_state_dict
1435
+ return_shared_modules["quantization_map"] = return_quantization_map
1436
+
1390
1437
  full_quantization_map = {}
1391
1438
  full_tied_weights_map = {}
1392
1439
  full_state_dict = {}
1393
- for file in file_path:
1440
+ for no, file in enumerate(file_path):
1394
1441
  quantization_map = None
1395
1442
  tied_weights_map = None
1396
1443
  if not (".safetensors" in file or ".sft" in file):
@@ -1443,6 +1490,13 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
1443
1490
  full_quantization_map.update(quantization_map)
1444
1491
  if tied_weights_map != None:
1445
1492
  full_tied_weights_map.update(tied_weights_map)
1493
+ if return_shared_modules is not None and no >= file_count:
1494
+ return_state_dict.update(state_dict)
1495
+ if quantization_map is not None: return_quantization_map.update(quantization_map)
1496
+
1497
+ if isinstance(modules, dict) :
1498
+ full_state_dict.update(modules["state_dict"])
1499
+ full_quantization_map.update(modules["quantization_map"])
1446
1500
 
1447
1501
  state_dict, quantization_map, tied_weights_map = full_state_dict, full_quantization_map, full_tied_weights_map
1448
1502
  full_state_dict, full_quantization_map, full_tied_weights_map = None, None, None
@@ -1463,7 +1517,7 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
1463
1517
 
1464
1518
 
1465
1519
  missing_keys , unexpected_keys = model.load_state_dict(state_dict, False, assign = True )
1466
- if len(missing_keys) > 0 :
1520
+ if len(missing_keys) > 0 :
1467
1521
  # if there is a key mismatch maybe we forgot to remove some prefix
1468
1522
  base_model_prefix = None
1469
1523
  for k,v in state_dict.items():
@@ -1474,18 +1528,53 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
1474
1528
  raise Exception(f"Missing keys: {missing_keys}")
1475
1529
  state_dict = filter_state_dict(state_dict, base_model_prefix)
1476
1530
  missing_keys , unexpected_keys = model.load_state_dict(state_dict, False, assign = True )
1531
+
1477
1532
  del state_dict
1533
+
1478
1534
  if len(unexpected_keys) > 0 and verboseLevel >=2:
1479
1535
  print(f"Unexpected keys while loading '{file_path}': {unexpected_keys}")
1480
1536
 
1481
1537
  for k,p in model.named_parameters():
1482
- if p.is_meta:
1538
+ if p.is_meta :
1483
1539
  txt = f"Incompatible State Dictionary or 'Init_Empty_Weights' not set since parameter '{k}' has no data"
1484
1540
  raise Exception(txt)
1485
1541
  for k,b in model.named_buffers():
1486
- if b.is_meta:
1542
+ if b.is_meta :
1487
1543
  txt = f"Incompatible State Dictionary or 'Init_Empty_Weights' not set since buffer '{k}' has no data"
1488
1544
  raise Exception(txt)
1545
+
1546
+ if return_shared_modules is not None:
1547
+ mods = { k : v for k,v in model.named_modules()}
1548
+ return_parameters = {}
1549
+ return_shared_modules["parameters"] = return_parameters
1550
+ for k in return_state_dict:
1551
+ if k.endswith("._data"):
1552
+ k = k[:-6]
1553
+ pos = k.rfind(".")
1554
+ mod_name = k[:pos]
1555
+ param_name = k[pos +1:]
1556
+ mod = mods.get(mod_name, None)
1557
+ if mod is not None:
1558
+ p = mod._parameters.get(param_name, None)
1559
+ if p is None: p = mod._buffers.get(param_name, None)
1560
+ if p is not None:
1561
+ return_parameters[k] = p
1562
+ del mods
1563
+
1564
+ if isinstance(modules, dict) :
1565
+ mods = { k : v for k,v in model.named_modules()}
1566
+ # replace Parameter outer shell so that both models parameters are tied
1567
+ for k, rep_p in modules["parameters"].items():
1568
+ pos = k.rfind(".")
1569
+ mod_name = k[:pos]
1570
+ param_name = k[pos +1:]
1571
+ mod = mods.get(mod_name, None)
1572
+ if mod is not None:
1573
+ setattr(mod, param_name, rep_p)
1574
+ del mods
1575
+ modules["parameters"].clear()
1576
+ modules["state_dict"].clear()
1577
+ rep_p = p = None
1489
1578
 
1490
1579
  if do_quantize:
1491
1580
  if quantization_map != None and len(quantization_map) > 0 :
@@ -1500,7 +1589,7 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
1500
1589
 
1501
1590
  return
1502
1591
 
1503
- def save_model(model, file_path, do_quantize = False, quantizationType = qint8, verboseLevel = -1, config_file_path = None ):
1592
+ def save_model(model, file_path, do_quantize = False, quantizationType = qint8, verboseLevel = -1, config_file_path = None, filter_sd =None ):
1504
1593
  """save the weights of a model and quantize them if requested
1505
1594
  These weights can be loaded again using 'load_model_data'
1506
1595
  """
@@ -1541,6 +1630,24 @@ def save_model(model, file_path, do_quantize = False, quantizationType = qint8,
1541
1630
  cache_ref = {}
1542
1631
  tied_weights_map = {}
1543
1632
  sd = model.state_dict()
1633
+ if filter_sd != None:
1634
+ new_sd = {}
1635
+ new_quantization_map = {}
1636
+ for k_k in filter_sd:
1637
+ for s in [".weight", ".bias", ".weight._data", ".weight._scale"]:
1638
+ if k_k.endswith(s):
1639
+ k_k= k_k[:-len(s)]
1640
+ break
1641
+ for k,v in sd.items():
1642
+ if k.startswith(k_k):
1643
+ new_sd[k] = v
1644
+ if quantization_map != None:
1645
+ for k,v in quantization_map.items():
1646
+ if k.startswith(k_k):
1647
+ new_quantization_map[k] = v
1648
+ sd = new_sd
1649
+ if quantization_map != None: quantization_map = new_quantization_map
1650
+
1544
1651
  out_sd = OrderedDict()
1545
1652
 
1546
1653
 
@@ -1755,6 +1862,12 @@ class offload:
1755
1862
  if tied_p.is_cuda:
1756
1863
  setattr(parent_module, n , tied_p)
1757
1864
  continue
1865
+ # if hasattr(p,'_data'):
1866
+ # if not p._data.is_pinned() or not p._scale.is_pinned():
1867
+ # pass
1868
+ # else:
1869
+ # if not p.data.is_pinned():
1870
+ # pass
1758
1871
 
1759
1872
  q = p.to("cuda", non_blocking=True)
1760
1873
  if is_buffer:
@@ -2067,10 +2180,12 @@ class offload:
2067
2180
  return result
2068
2181
 
2069
2182
 
2070
- def hook_lora(self, submodule, current_model, model_id, loras_model_data, submodule_name):
2183
+ def hook_lora(self, submodule, current_model, model_id, loras_model_data, loras_model_shortcuts, submodule_name):
2071
2184
  old_forward = submodule.forward
2072
2185
 
2073
2186
  loras_data = {}
2187
+ assert submodule_name not in loras_model_shortcuts
2188
+ loras_model_shortcuts[submodule_name] = loras_data
2074
2189
  loras_model_data[submodule] = loras_data
2075
2190
 
2076
2191
  if isinstance(submodule, torch.nn.Linear):
@@ -2188,7 +2303,6 @@ class offload:
2188
2303
 
2189
2304
  if current_budget == 0 or towers_names is None or len(towers_names) == 0 or not self.async_transfers:
2190
2305
  return
2191
- # current_budget = 5000 * ONE_MB
2192
2306
  base_size = self.blocks_of_modules_sizes[model_id]
2193
2307
  current_budget -= base_size
2194
2308
  current_budget = max(0, current_budget)
@@ -2501,14 +2615,14 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
2501
2615
  print(f"Model '{model_id}' already pinned to reserved memory")
2502
2616
  else:
2503
2617
  _pin_to_memory(current_model, model_id, partialPinning= partialPinning, pinnedPEFTLora = pinnedPEFTLora, perc_reserved_mem_max = perc_reserved_mem_max, verboseLevel=verboseLevel)
2504
-
2505
2618
  current_budget = model_budgets[model_id]
2506
2619
  cur_blocks_prefix, prev_blocks_name, cur_blocks_name,cur_blocks_seq, is_mod_seq = None, None, None, -1, False
2507
2620
  self.loaded_blocks[model_id] = None
2508
2621
  any_lora = loras !=None and model_id in loras
2509
2622
  if any_lora:
2510
- loras_model_data = {}
2623
+ loras_model_data, loras_model_shortcuts = {}, {}
2511
2624
  current_model._loras_model_data = loras_model_data
2625
+ current_model._loras_model_shortcuts = loras_model_shortcuts
2512
2626
  for submodule_name, submodule in current_model.named_modules():
2513
2627
  # create a fake 'accelerate' parameter so that the _execution_device property returns always "cuda"
2514
2628
  # (it is queried in many pipelines even if offloading is not properly implemented)
@@ -2542,7 +2656,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
2542
2656
  if hasattr(submodule, "forward"):
2543
2657
  # if any_lora and isinstance(submodule, ( torch.nn.Linear, torch.nn.Conv3d, torch.nn.LayerNorm)):
2544
2658
  if any_lora and hasattr(submodule,"weight"):
2545
- submodule_method = self.hook_lora(submodule, current_model, model_id, loras_model_data, submodule_name)
2659
+ submodule_method = self.hook_lora(submodule, current_model, model_id, loras_model_data, loras_model_shortcuts, submodule_name)
2546
2660
  else:
2547
2661
  submodule_method = getattr(submodule, "forward")
2548
2662
  if callable(submodule_method):
@@ -2552,11 +2666,12 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
2552
2666
  self.hook_preload_blocks_for_compilation(submodule, model_id, cur_blocks_name, context = submodule_name )
2553
2667
  else:
2554
2668
  self.hook_check_empty_cache_needed(submodule, current_model, model_id, cur_blocks_name, submodule_method, context = submodule_name )
2555
-
2669
+
2556
2670
  self.add_module_to_blocks(model_id, cur_blocks_name, submodule, prev_blocks_name, submodule_name)
2557
2671
 
2558
2672
 
2559
2673
  self.tune_preloading(model_id, current_budget, towers_names)
2674
+ self.parameters_ref = {}
2560
2675
 
2561
2676
 
2562
2677
  if self.verboseLevel >=2:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mmgp
3
- Version: 3.5.1
3
+ Version: 3.5.3
4
4
  Summary: Memory Management for the GPU Poor
5
5
  Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
6
6
  Requires-Python: >=3.10
@@ -15,7 +15,7 @@ Dynamic: license-file
15
15
 
16
16
 
17
17
  <p align="center">
18
- <H2>Memory Management 3.5.1 for the GPU Poor by DeepBeepMeep</H2>
18
+ <H2>Memory Management 3.5.3 for the GPU Poor by DeepBeepMeep</H2>
19
19
  </p>
20
20
 
21
21
 
@@ -0,0 +1,9 @@
1
+ __init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
3
+ mmgp/offload.py,sha256=owsKU30CiOVioGExG28B9F93R09mTvIoe4RwuHv_f7s,125986
4
+ mmgp/safetensors2.py,sha256=4nKV13qCMabnNEB1TA_ueFbfGYYmiQ9racR_C6SsGug,18693
5
+ mmgp-3.5.3.dist-info/licenses/LICENSE.md,sha256=DD-WIS0BkPoWJ_8hQO3J8hMP9K_1-dyrYv1YCbkxcDU,94
6
+ mmgp-3.5.3.dist-info/METADATA,sha256=lPWpMmbWiXt-ZOV5dRyrBnInGBjBiVkfbFYu19aeOkw,16309
7
+ mmgp-3.5.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
8
+ mmgp-3.5.3.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
9
+ mmgp-3.5.3.dist-info/RECORD,,
@@ -1,2 +1,2 @@
1
- GNU GENERAL PUBLIC LICENSE
1
+ GNU GENERAL PUBLIC LICENSE
2
2
  Version 3, 29 June 2007
@@ -1,9 +0,0 @@
1
- __init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
3
- mmgp/offload.py,sha256=3TpjzT7DJ2yGHIm-u2O-U2wAR_V2ZH1NqmC1bMYhfso,120962
4
- mmgp/safetensors2.py,sha256=4nKV13qCMabnNEB1TA_ueFbfGYYmiQ9racR_C6SsGug,18693
5
- mmgp-3.5.1.dist-info/licenses/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
6
- mmgp-3.5.1.dist-info/METADATA,sha256=IOEJfRTedEF4lv9A9DEHJF9-kr3UK1yhlo6Nsdhca10,16309
7
- mmgp-3.5.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
8
- mmgp-3.5.1.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
9
- mmgp-3.5.1.dist-info/RECORD,,
File without changes