mmgp 3.4.6__py3-none-any.whl → 3.4.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mmgp might be problematic. Click here for more details.

mmgp/offload.py CHANGED
@@ -1,4 +1,4 @@
1
- # ------------------ Memory Management 3.4.5 for the GPU Poor by DeepBeepMeep (mmgp)------------------
1
+ # ------------------ Memory Management 3.4.7 for the GPU Poor by DeepBeepMeep (mmgp)------------------
2
2
  #
3
3
  # This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
4
4
  # This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
@@ -331,12 +331,35 @@ def _extract_tie_weights_from_sd(sd , sd_name, verboseLevel =1):
331
331
 
332
332
  def _pin_sd_to_memory(sd, sd_name, tied_weights = None, gig_tensor_size = BIG_TENSOR_MAX_SIZE, verboseLevel = 1):
333
333
  global max_pinnable_bytes, total_pinned_bytes
334
+
335
+
336
+ names_list = sd_name if isinstance(sd, list) else [sd_name]
337
+
334
338
  if max_pinnable_bytes > 0 and total_pinned_bytes >= max_pinnable_bytes:
335
339
 
336
340
  if verboseLevel>=1 :
337
- print(f"Unable pin data of '{sd_name}' to reserved RAM as there is no reserved RAM left")
341
+ print(f"Unable pin data of '{','.join(names_list)}' to reserved RAM as there is no reserved RAM left")
338
342
  return
339
343
 
344
+
345
+ if isinstance(sd, list):
346
+ new_sd = {}
347
+ for i, sub_sd, in enumerate(sd):
348
+ for k, v in sub_sd.items():
349
+ new_sd[str(i) + "#" + k] =v
350
+ sd = new_sd
351
+ del new_sd
352
+ sub_sd = None
353
+
354
+ if isinstance(tied_weights, list):
355
+ new_tied_weights = {}
356
+ for i, sub_tied_weights, in enumerate(tied_weights):
357
+ for k, v in sub_tied_weights.items():
358
+ new_tied_weights[str(i) + "#" + k] =v
359
+ sd = new_tied_weights
360
+ del new_tied_weights
361
+ sub_tied_weights = None
362
+
340
363
  current_big_tensor_size = 0
341
364
  big_tensor_no = 0
342
365
  big_tensors_sizes = []
@@ -365,11 +388,14 @@ def _pin_sd_to_memory(sd, sd_name, tied_weights = None, gig_tensor_size = BIG_TE
365
388
  big_tensors = []
366
389
  last_big_tensor = 0
367
390
  total = 0
391
+ incomplete_pinning = False
368
392
 
369
393
  try:
370
394
  dummy_pinned_tensor = torch.empty( RESERVED_RAM_MIN_AVAILABLE, dtype= torch.uint8, pin_memory=True, device="cpu")
371
395
  except:
372
396
  print("There isn't any Reserved RAM left, you may need to choose a profile with a higher number that requires less Reserved RAM or set OS env 'perc_reserved_mem_max' to a value less 0.3")
397
+ gc.collect()
398
+ torch.cuda.empty_cache()
373
399
  return
374
400
 
375
401
  for size in big_tensors_sizes:
@@ -377,6 +403,7 @@ def _pin_sd_to_memory(sd, sd_name, tied_weights = None, gig_tensor_size = BIG_TE
377
403
  current_big_tensor = torch.empty( size, dtype= torch.uint8, pin_memory=True, device="cpu")
378
404
  big_tensors.append(current_big_tensor)
379
405
  except:
406
+ incomplete_pinning = True
380
407
  print(f"Unable to pin more tensors for '{sd_name}' as the maximum reservable memory has been reached ({total/ONE_MB:.2f})")
381
408
  break
382
409
 
@@ -410,9 +437,21 @@ def _pin_sd_to_memory(sd, sd_name, tied_weights = None, gig_tensor_size = BIG_TE
410
437
  # global total_pinned_bytes
411
438
  # total_pinned_bytes += total
412
439
  gc.collect()
440
+ torch.cuda.empty_cache()
441
+
413
442
 
414
443
  if verboseLevel >=1:
415
- print(f"'{sd_name}' was pinned entirely to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
444
+ if incomplete_pinning :
445
+ if len(names_list) > 0:
446
+ print(f"'{','.join(names_list)}' were partially pinned to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
447
+ else:
448
+ print(f"'{','.join(names_list)}' was partially pinned to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
449
+ else:
450
+ if len(names_list) > 0:
451
+ print(f"'{','.join(names_list)}' was pinned entirely to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
452
+ else:
453
+ print(f"'{','.join(names_list)}' were pinned entirely to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
454
+
416
455
 
417
456
  return
418
457
 
@@ -619,7 +658,7 @@ def _welcome():
619
658
  if welcome_displayed:
620
659
  return
621
660
  welcome_displayed = True
622
- print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.4.5) by DeepBeepMeep ************{ENDC}{UNBOLD}")
661
+ print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.4.7) by DeepBeepMeep ************{ENDC}{UNBOLD}")
623
662
 
624
663
  def change_dtype(model, new_dtype, exclude_buffers = False):
625
664
  for submodule_name, submodule in model.named_modules():
@@ -961,6 +1000,8 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
961
1000
  errors = []
962
1001
  adapters = {}
963
1002
  adapter_no = 0
1003
+ pinned_sd_list = []
1004
+ pinned_names_list = []
964
1005
  for i, path in enumerate(lora_path):
965
1006
  adapter_name = str(adapter_no)
966
1007
  error_msg = ""
@@ -1042,28 +1083,37 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
1042
1083
  invalid_keys = []
1043
1084
  unexpected_keys = []
1044
1085
  for k, v in state_dict.items():
1045
- pos = k.rfind(".lora_")
1046
- if pos <=0:
1047
- invalid_keys.append(k)
1048
- continue
1049
- module_name = k[ : pos]
1050
- lora_key = k[ pos+1:]
1051
1086
  lora_A = None
1052
1087
  lora_B = None
1053
- if lora_key == "lora_A.weight":
1054
- lora_A = v
1055
- elif lora_key == "lora_B.weight":
1056
- lora_B = v
1088
+ diff_b = None
1089
+ diff = None
1090
+ if k.endswith(".diff"):
1091
+ diff = v
1092
+ module_name = k[ : -5]
1093
+ elif k.endswith(".diff_b"):
1094
+ diff_b = v
1095
+ module_name = k[ : -7]
1057
1096
  else:
1058
- invalid_keys.append(k)
1059
- continue
1097
+ pos = k.rfind(".lora_")
1098
+ if pos <=0:
1099
+ invalid_keys.append(k)
1100
+ continue
1101
+ module_name = k[ : pos]
1102
+ lora_key = k[ pos+1:]
1103
+ if lora_key in ("lora_A.weight", "lora_down.weight"):
1104
+ lora_A = v
1105
+ elif lora_key in ("lora_B.weight", "lora_up.weight"):
1106
+ lora_B = v
1107
+ else:
1108
+ invalid_keys.append(k)
1109
+ continue
1060
1110
 
1061
1111
  module = modules_dict.get(module_name, None)
1062
1112
  if module == None:
1063
1113
  unexpected_keys.append(k)
1064
1114
  continue
1065
- if not isinstance(module, (QLinear, torch.nn.Linear)):
1066
- msg = f"Lora '{path}' contains a non linear layer '{k}'"
1115
+ if False: #not isinstance(module, (QLinear, torch.nn.Linear, torch.nn.Conv3d, torch.nn.LayerNorm)):
1116
+ msg = f"Lora '{path}' contains a non supported type of layer '{k}'"
1067
1117
  error_msg = append(error_msg, msg)
1068
1118
  fail = True
1069
1119
  break
@@ -1077,7 +1127,7 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
1077
1127
  error_msg = append(error_msg, msg)
1078
1128
  fail = True
1079
1129
  break
1080
- if lora_B != None:
1130
+ elif lora_B != None:
1081
1131
  if module_shape[0] != v.shape[0]:
1082
1132
  if ignore_model_variations:
1083
1133
  skip = True
@@ -1086,28 +1136,56 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
1086
1136
  error_msg = append(error_msg, msg)
1087
1137
  fail = True
1088
1138
  break
1139
+ elif diff != None:
1140
+ lora_B = diff
1141
+ if module_shape != v.shape:
1142
+ if ignore_model_variations:
1143
+ skip = True
1144
+ else:
1145
+ msg = f"Lora '{path}': Lora shape is not compatible with model '{_get_module_name(model)}' (model = {module_shape[0]}, lora = {v.shape[0]}). It is likely this Lora has been made for another version of this model."
1146
+ error_msg = append(error_msg, msg)
1147
+ fail = True
1148
+ break
1149
+ elif diff_b != None:
1150
+ if module.bias == None:
1151
+ msg = f"Lora '{path}': Lora Basis is defined while it doesnt exist in model '{_get_module_name(model)}'. It is likely this Lora has been made for another version of this model."
1152
+ fail = True
1153
+ break
1154
+ else:
1155
+ module_shape = module.bias.shape
1156
+ if module_shape != v.shape:
1157
+ if ignore_model_variations:
1158
+ skip = True
1159
+ else:
1160
+ msg = f"Lora '{path}': Lora Basis dimension is not compatible with model '{_get_module_name(model)}' (model = {module_shape[0]}, lora Basis = {v.shape[0]}). It is likely this Lora has been made for another version of this model."
1161
+ error_msg = append(error_msg, msg)
1162
+ fail = True
1163
+ break
1164
+
1089
1165
  if not check_only:
1090
1166
  loras_module_data = loras_model_data.get(module, None)
1167
+ if loras_module_data == None:
1168
+ pass
1091
1169
  assert loras_module_data != None
1092
- # if loras_module_data == None:
1093
- # loras_module_data = dict()
1094
- # loras_model_data[module] = loras_module_data
1095
1170
  loras_adapter_data = loras_module_data.get(adapter_name, None)
1096
1171
  lora_A = None if lora_A == None else lora_A.to(module.weight.dtype)
1097
1172
  lora_B = None if lora_B == None else lora_B.to(module.weight.dtype)
1173
+ diff_b = None if diff_b == None else diff_b.to(module.weight.dtype)
1098
1174
  if loras_adapter_data == None:
1099
1175
  alpha = lora_alphas.get(k[:-len("lora_X.weight")] + "alpha", 1.)
1100
- loras_adapter_data = [lora_A, lora_B, alpha]
1176
+ loras_adapter_data = [lora_A, lora_B, diff_b, alpha]
1101
1177
  loras_module_data[adapter_name] = loras_adapter_data
1102
1178
  elif lora_A != None:
1103
1179
  loras_adapter_data[0] = lora_A
1104
- else:
1180
+ elif lora_B != None:
1105
1181
  loras_adapter_data[1] = lora_B
1106
- lora_A, lora_B, v, loras_module_data, loras_adapter_data = None, None, None, None, None
1182
+ else:
1183
+ loras_adapter_data[2] = diff_b
1184
+ lora_A, lora_B, diff, diff_b, v, loras_module_data, loras_adapter_data = None, None, None, None, None, None, None
1107
1185
  lora_alphas = None
1108
1186
 
1109
1187
  if len(invalid_keys) > 0:
1110
- msg = "Lora '{path}' contains non Lora keys '{trunc(invalid_keys,200)}'"
1188
+ msg = f"Lora '{path}' contains non Lora keys '{trunc(invalid_keys,200)}'"
1111
1189
  error_msg = append(error_msg, msg)
1112
1190
  fail = True
1113
1191
  if len(unexpected_keys) > 0:
@@ -1127,7 +1205,9 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
1127
1205
  if not check_only:
1128
1206
  # model._loras_tied_weights[adapter_name] = tied_weights
1129
1207
  if pinnedLora:
1130
- _pin_sd_to_memory(state_dict, path)
1208
+ pinned_sd_list.append(state_dict)
1209
+ pinned_names_list.append(path)
1210
+ # _pin_sd_to_memory(state_dict, path)
1131
1211
 
1132
1212
  del state_dict
1133
1213
 
@@ -1146,6 +1226,8 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
1146
1226
 
1147
1227
  model._loras_errors = errors
1148
1228
  if not check_only:
1229
+ if pinnedLora:
1230
+ _pin_sd_to_memory(pinned_sd_list, pinned_names_list)
1149
1231
  model._loras_adapters = adapters
1150
1232
  if activate_all_loras:
1151
1233
  activate_loras(model, loras_nos, loras_multi)
@@ -1193,7 +1275,7 @@ def move_loras_to_device(model, device="cpu" ):
1193
1275
  if ".lora_" in k:
1194
1276
  m.to(device)
1195
1277
 
1196
- def fast_load_transformers_model(model_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, modelClass=None, modelPrefix = None, writable_tensors = True, verboseLevel = -1):
1278
+ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, modelClass=None, modelPrefix = None, writable_tensors = True, verboseLevel = -1, configKwargs ={}):
1197
1279
  """
1198
1280
  quick version of .LoadfromPretrained of the transformers library
1199
1281
  used to build a model and load the corresponding weights (quantized or not)
@@ -1235,6 +1317,7 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizat
1235
1317
  text = reader.read()
1236
1318
  transformer_config= json.loads(text)
1237
1319
 
1320
+ transformer_config.update( configKwargs )
1238
1321
 
1239
1322
  if "architectures" in transformer_config:
1240
1323
  architectures = transformer_config["architectures"]
@@ -1254,7 +1337,6 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizat
1254
1337
  fp.close()
1255
1338
  config_obj = AutoConfig.from_pretrained(fp.name)
1256
1339
  os.remove(fp.name)
1257
-
1258
1340
  #needed to keep inits of non persistent buffers
1259
1341
  with init_empty_weights():
1260
1342
  model = transfomer_class(config_obj)
@@ -1270,7 +1352,7 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizat
1270
1352
  transfomer_class = getattr(module, class_name)
1271
1353
 
1272
1354
  with init_empty_weights():
1273
- model = transfomer_class.from_config(transformer_config)
1355
+ model = transfomer_class.from_config(transformer_config )
1274
1356
 
1275
1357
 
1276
1358
  torch.set_default_device('cpu')
@@ -1650,10 +1732,9 @@ class offload:
1650
1732
  lora_data = lora_module.get(adapter, None)
1651
1733
  if lora_data == None:
1652
1734
  continue
1653
- lora_A, lora_B, alpha = lora_data
1654
1735
  key = adapter + '_GPU'
1655
1736
  if to_GPU:
1656
- lora_module[key] = [lora_A.cuda(non_blocking=True), lora_B.cuda(non_blocking=True), alpha]
1737
+ lora_module[key] = [None if item == None else item.cuda(non_blocking=True) for item in lora_data[ :-1] ] + lora_data[ -1:]
1657
1738
  elif key in lora_module:
1658
1739
  del lora_module[key]
1659
1740
 
@@ -1876,27 +1957,64 @@ class offload:
1876
1957
 
1877
1958
  return False
1878
1959
 
1879
- def _lora_linear_forward(self, model, submodule, loras_data, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
1960
+ def _get_lora_scaling(self, loras_scaling, model, active_adapter):
1961
+ scaling_list = loras_scaling[active_adapter]
1962
+ if isinstance(scaling_list, list):
1963
+ step_no =getattr(model, "_lora_step_no", 0)
1964
+ return scaling_list[step_no]
1965
+ else:
1966
+ return float(scaling_list)
1880
1967
 
1881
- def get_scaling(active_adapter):
1882
- scaling_list = loras_scaling[active_adapter]
1883
- if isinstance(scaling_list, list):
1884
- step_no =getattr(model, "_lora_step_no", 0)
1885
- return scaling_list[step_no]
1886
- else:
1887
- return float(scaling_list)
1888
1968
 
1889
- weight = submodule.weight
1890
1969
 
1891
- if loras_data == None:
1892
- return torch.nn.functional.linear(x, weight, bias=submodule.bias)
1970
+ def _lora_generic_forward(self, model, submodule, loras_data, func, *args, **kwargs) -> torch.Tensor:
1971
+
1972
+ weight = submodule.weight
1973
+ bias = getattr(submodule, "bias", None)
1974
+ original_weight = None
1975
+ original_bias = None
1976
+ active_adapters = model._loras_active_adapters
1977
+ loras_scaling = model._loras_scaling
1978
+ first_weight = True
1979
+ first_bias = True
1980
+ for active_adapter in active_adapters:
1981
+ data = loras_data.get(active_adapter + '_GPU', None)
1982
+ if data == None:
1983
+ continue
1984
+ diff_w , _ , diff_b, alpha = data
1985
+ if first_weight:
1986
+ original_weight= weight.clone() if weight != None else None
1987
+ first_weight = False
1988
+ if first_bias:
1989
+ original_bias= bias.clone() if bias != None else None
1990
+ first_bias = False
1991
+ scaling = self._get_lora_scaling( loras_scaling, model, active_adapter) * alpha
1992
+ if diff_w != None:
1993
+ weight.add_(diff_w, alpha= scaling)
1994
+ diff_w = None
1995
+ if diff_b != None:
1996
+ bias.add_(diff_b, alpha= scaling)
1997
+ diff_b = None
1998
+
1999
+ ret = func(*args, **kwargs )
2000
+
2001
+ weight.data = original_weight if original_weight != None else None
2002
+ if original_bias != None:
2003
+ bias.data = original_bias
2004
+
2005
+ return ret
2006
+
1893
2007
 
2008
+ def _lora_linear_forward(self, model, submodule, loras_data, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
2009
+ weight = submodule.weight
1894
2010
  active_adapters = model._loras_active_adapters
1895
2011
  loras_scaling = model._loras_scaling
1896
2012
  training = False
1897
2013
 
1898
2014
  dtype = weight.dtype
1899
2015
  if weight.shape[-1] < x.shape[-2] : # sum base weight and lora matrices instead of applying input on each sub lora matrice if input is too large. This will save a lot VRAM and compute
2016
+ bias = submodule.bias
2017
+ original_bias = True
1900
2018
  if len(active_adapters) > 0:
1901
2019
  if isinstance(submodule, QModuleMixin):
1902
2020
  weight = weight.view(weight.shape) # get a persistent copy of the on the fly dequantized weights
@@ -1908,16 +2026,25 @@ class offload:
1908
2026
  data = loras_data.get(active_adapter + '_GPU', None)
1909
2027
  if data == None:
1910
2028
  continue
1911
- lora_A_weight, lora_B_weight, alpha = data
1912
- scaling = get_scaling(active_adapter) * alpha
2029
+ lora_A_weight, lora_B_weight, diff_b, alpha = data
2030
+ scaling = self._get_lora_scaling(loras_scaling, model, active_adapter) * alpha
1913
2031
  weight.addmm_(lora_B_weight, lora_A_weight, alpha= scaling )
2032
+ if diff_b != None:
2033
+ if bias == None:
2034
+ bias = diff_b.clone()
2035
+ original_bias = False
2036
+ elif original_bias:
2037
+ bias = bias.clone()
2038
+ original_bias = False
2039
+ bias.add_(diff_b, alpha=scaling)
2040
+
1914
2041
  # base_weight += scaling * lora_B_weight @ lora_A_weight
1915
2042
 
1916
2043
  if training:
1917
2044
  pass
1918
2045
  # result = torch.nn.functional.linear(dropout(x), base_weight, bias=submodule.bias)
1919
2046
  else:
1920
- result = torch.nn.functional.linear(x, weight, bias=submodule.bias)
2047
+ result = torch.nn.functional.linear(x, weight, bias=bias)
1921
2048
 
1922
2049
  else:
1923
2050
  result = torch.nn.functional.linear(x, weight, bias=submodule.bias)
@@ -1929,9 +2056,9 @@ class offload:
1929
2056
  data = loras_data.get(active_adapter + '_GPU', None)
1930
2057
  if data == None:
1931
2058
  continue
1932
- lora_A, lora_B, alpha = data
2059
+ lora_A, lora_B, diff_b, alpha = data
1933
2060
  # dropout = self.lora_dropout[active_adapter]
1934
- scaling = get_scaling(active_adapter) * alpha
2061
+ scaling = self._get_lora_scaling(loras_scaling, model, active_adapter) * alpha
1935
2062
  x = x.to(lora_A.dtype)
1936
2063
 
1937
2064
  if training:
@@ -1939,8 +2066,7 @@ class offload:
1939
2066
  # y = lora_A(dropout(x))
1940
2067
  else:
1941
2068
  y = torch.nn.functional.linear(x, lora_A, bias=None)
1942
-
1943
- y = torch.nn.functional.linear(y, lora_B, bias=None)
2069
+ y = torch.nn.functional.linear(y, lora_B, bias=diff_b)
1944
2070
  y*= scaling
1945
2071
  result+= y
1946
2072
  del y
@@ -1948,19 +2074,27 @@ class offload:
1948
2074
  return result
1949
2075
 
1950
2076
 
1951
- def hook_lora_linear(self, submodule, current_model, model_id, loras_model_data, submodule_name):
2077
+ def hook_lora(self, submodule, current_model, model_id, loras_model_data, submodule_name):
1952
2078
  old_forward = submodule.forward
1953
2079
 
1954
2080
  loras_data = {}
1955
2081
  loras_model_data[submodule] = loras_data
1956
2082
 
1957
- def lora_linear_forward(module, *args, **kwargs):
1958
- if len(loras_data) == 0:
1959
- return old_forward(*args, **kwargs)
1960
- else:
1961
- return self._lora_linear_forward(current_model, submodule, loras_data, *args, **kwargs)
1962
-
1963
- return functools.update_wrapper(functools.partial(lora_linear_forward, submodule), old_forward)
2083
+ if isinstance(submodule, torch.nn.Linear):
2084
+ def lora_linear_forward(module, *args, **kwargs):
2085
+ if len(loras_data) == 0:
2086
+ return old_forward(*args, **kwargs)
2087
+ else:
2088
+ return self._lora_linear_forward(current_model, submodule, loras_data, *args, **kwargs)
2089
+ target_fn = lora_linear_forward
2090
+ else:
2091
+ def lora_generic_forward(module, *args, **kwargs):
2092
+ if len(loras_data) == 0:
2093
+ return old_forward(*args, **kwargs)
2094
+ else:
2095
+ return self._lora_generic_forward(current_model, submodule, loras_data, old_forward, *args, **kwargs)
2096
+ target_fn = lora_generic_forward
2097
+ return functools.update_wrapper(functools.partial(target_fn, submodule), old_forward)
1964
2098
 
1965
2099
  def ensure_model_loaded(self, model_id):
1966
2100
  if model_id in self.active_models_ids:
@@ -2413,8 +2547,9 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
2413
2547
 
2414
2548
 
2415
2549
  if hasattr(submodule, "forward"):
2416
- if any_lora and isinstance(submodule, torch.nn.Linear):
2417
- submodule_method = self.hook_lora_linear(submodule, current_model, model_id, loras_model_data, submodule_name)
2550
+ # if any_lora and isinstance(submodule, ( torch.nn.Linear, torch.nn.Conv3d, torch.nn.LayerNorm)):
2551
+ if any_lora and hasattr(submodule,"weight"):
2552
+ submodule_method = self.hook_lora(submodule, current_model, model_id, loras_model_data, submodule_name)
2418
2553
  else:
2419
2554
  submodule_method = getattr(submodule, "forward")
2420
2555
  if callable(submodule_method):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mmgp
3
- Version: 3.4.6
3
+ Version: 3.4.7
4
4
  Summary: Memory Management for the GPU Poor
5
5
  Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
6
6
  Requires-Python: >=3.10
@@ -15,7 +15,7 @@ Dynamic: license-file
15
15
 
16
16
 
17
17
  <p align="center">
18
- <H2>Memory Management 3.4.6 for the GPU Poor by DeepBeepMeep</H2>
18
+ <H2>Memory Management 3.4.7 for the GPU Poor by DeepBeepMeep</H2>
19
19
  </p>
20
20
 
21
21
 
@@ -0,0 +1,9 @@
1
+ __init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
3
+ mmgp/offload.py,sha256=wf5u4qaGKYm6OTyGg4TXCa3aA0h3nuhnml7qOzn6JOY,121124
4
+ mmgp/safetensors2.py,sha256=4nKV13qCMabnNEB1TA_ueFbfGYYmiQ9racR_C6SsGug,18693
5
+ mmgp-3.4.7.dist-info/licenses/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
6
+ mmgp-3.4.7.dist-info/METADATA,sha256=DztABKlGwAcKYogKuTzsOWs0he3elTFZXAkWpH4yIEU,16309
7
+ mmgp-3.4.7.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
8
+ mmgp-3.4.7.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
9
+ mmgp-3.4.7.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- __init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
3
- mmgp/offload.py,sha256=2oWFiDcwIx3lGOb_6_aac1zzIIF-nhP8bwOA-G9HxsU,114594
4
- mmgp/safetensors2.py,sha256=4nKV13qCMabnNEB1TA_ueFbfGYYmiQ9racR_C6SsGug,18693
5
- mmgp-3.4.6.dist-info/licenses/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
6
- mmgp-3.4.6.dist-info/METADATA,sha256=kv9OfYHAAHKyiv9p9vrf4guU3tNd0I7vUgQ6xm7dkk8,16309
7
- mmgp-3.4.6.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
8
- mmgp-3.4.6.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
9
- mmgp-3.4.6.dist-info/RECORD,,
File without changes