mmgp 3.3.1__py3-none-any.whl → 3.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mmgp might be problematic. Click here for more details.

mmgp/offload.py CHANGED
@@ -1,4 +1,4 @@
1
- # ------------------ Memory Management 3.3.1 for the GPU Poor by DeepBeepMeep (mmgp)------------------
1
+ # ------------------ Memory Management 3.3.3 for the GPU Poor by DeepBeepMeep (mmgp)------------------
2
2
  #
3
3
  # This module contains multiples optimisations so that models such as Flux (and derived), Mochi, CogView, HunyuanVideo, ... can run smoothly on a 24 GB GPU limited card.
4
4
  # This a replacement for the accelerate library that should in theory manage offloading, but doesn't work properly with models that are loaded / unloaded several
@@ -258,11 +258,11 @@ def _move_to_pinned_tensor(source_tensor, big_tensor, offset, length):
258
258
  assert t.is_pinned()
259
259
  return t
260
260
 
261
- def _safetensors_load_file(file_path):
261
+ def _safetensors_load_file(file_path, writable_tensors = True):
262
262
  from collections import OrderedDict
263
263
  sd = OrderedDict()
264
264
 
265
- with safetensors2.safe_open(file_path, framework="pt", device="cpu") as f:
265
+ with safetensors2.safe_open(file_path, framework="pt", device="cpu", writable_tensors =writable_tensors) as f:
266
266
  for k in f.keys():
267
267
  sd[k] = f.get_tensor(k)
268
268
  metadata = f.metadata()
@@ -401,7 +401,7 @@ def _pin_sd_to_memory(sd, sd_name, tied_weights = None, gig_tensor_size = BIG_TE
401
401
  return
402
402
 
403
403
 
404
- def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = True, gig_tensor_size = BIG_TENSOR_MAX_SIZE, verboseLevel = 1):
404
+ def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = True, big_tensor_size = BIG_TENSOR_MAX_SIZE, verboseLevel = 1):
405
405
 
406
406
  global max_pinnable_bytes, total_pinned_bytes
407
407
  if max_pinnable_bytes > 0 and max_pinnable_bytes >= max_pinnable_bytes:
@@ -474,7 +474,7 @@ def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = Tru
474
474
  length = torch.numel(p.data) * p.data.element_size()
475
475
 
476
476
  ref_cache[ref] = (n, length)
477
- if current_big_tensor_size + length > gig_tensor_size :
477
+ if current_big_tensor_size + length > big_tensor_size and current_big_tensor_size !=0 :
478
478
  big_tensors_sizes.append(current_big_tensor_size)
479
479
  current_big_tensor_size = 0
480
480
  big_tensor_no += 1
@@ -498,31 +498,14 @@ def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = Tru
498
498
  big_tensors_sizes.append(current_big_tensor_size)
499
499
 
500
500
  big_tensors = []
501
- last_big_tensor = 0
502
501
  total = 0
503
502
 
504
503
 
505
504
  failed_planned_allocation = False
506
505
 
507
- for size in big_tensors_sizes:
508
- try:
509
- # if total > 7000 * ONE_MB:
510
- # raise Exception ("test no more reserved RAM")
511
- current_big_tensor = torch.empty( size, dtype= torch.uint8, pin_memory=True, device="cpu")
512
- big_tensors.append(current_big_tensor)
513
- except:
514
- print(f"Unable to pin more tensors for this model as the maximum reservable memory has been reached ({total/ONE_MB:.2f})")
515
- max_pinnable_bytes = total + total_pinned_bytes
516
- failed_planned_allocation = True
517
- break
518
-
519
- last_big_tensor += 1
520
- total += size
521
-
522
-
523
506
  gc.collect()
524
507
 
525
-
508
+ last_allocated_big_tensor = -1
526
509
  tensor_no = 0
527
510
  # prev_big_tensor = 0
528
511
  for n, (p, is_buffer) in params_dict.items():
@@ -543,37 +526,47 @@ def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = Tru
543
526
  assert p.data.is_pinned()
544
527
  q = None
545
528
  else:
529
+
546
530
  big_tensor_no, offset, length = tensor_map_indexes[tensor_no]
547
- # if big_tensor_no != prev_big_tensor:
548
- # gc.collect()
549
- # prev_big_tensor = big_tensor_no
550
- # match_param, match_isbuffer = tied_weights.get(n, (None, False))
551
- # if match_param != None:
531
+ if last_allocated_big_tensor < big_tensor_no:
532
+ last_allocated_big_tensor += 1
533
+ size = big_tensors_sizes[last_allocated_big_tensor]
534
+ try:
535
+ # if total > 7000 * ONE_MB:
536
+ # raise Exception ("test no more reserved RAM")
537
+ current_big_tensor = torch.empty( size, dtype= torch.uint8, pin_memory=True, device="cpu")
538
+ big_tensors.append(current_big_tensor)
539
+ except:
540
+ print(f"Unable to pin more tensors for this model as the maximum reservable memory has been reached ({total/ONE_MB:.2f})")
541
+ max_pinnable_bytes = total + total_pinned_bytes
542
+ failed_planned_allocation = True
543
+ break
552
544
 
553
- if big_tensor_no>=0 and big_tensor_no < last_big_tensor:
554
- current_big_tensor = big_tensors[big_tensor_no]
555
- if is_buffer :
556
- _force_load_buffer(p) # otherwise potential memory leak
557
- if isinstance(p, QTensor):
558
- if p._qtype == qint4:
559
- length1 = torch.numel(p._data._data) * p._data._data.element_size()
560
- p._data._data = _move_to_pinned_tensor(p._data._data, current_big_tensor, offset, length1)
561
- if hasattr(p,"_scale_shift"):
562
- length2 = torch.numel(p._scale_shift) * p._scale_shift.element_size()
563
- p._scale_shift = _move_to_pinned_tensor(p._scale_shift, current_big_tensor, offset + length1, length2)
564
- else:
565
- length2 = torch.numel(p._scale) * p._scale.element_size()
566
- p._scale = _move_to_pinned_tensor(p._scale, current_big_tensor, offset + length1, length2)
567
- length3 = torch.numel(p._shift) * p._shift.element_size()
568
- p._shift = _move_to_pinned_tensor(p._shift, current_big_tensor, offset + length1 + length2, length3)
545
+ total += size
546
+
547
+ current_big_tensor = big_tensors[big_tensor_no]
548
+ if is_buffer :
549
+ _force_load_buffer(p) # otherwise potential memory leak
550
+ if isinstance(p, QTensor):
551
+ if p._qtype == qint4:
552
+ length1 = torch.numel(p._data._data) * p._data._data.element_size()
553
+ p._data._data = _move_to_pinned_tensor(p._data._data, current_big_tensor, offset, length1)
554
+ if hasattr(p,"_scale_shift"):
555
+ length2 = torch.numel(p._scale_shift) * p._scale_shift.element_size()
556
+ p._scale_shift = _move_to_pinned_tensor(p._scale_shift, current_big_tensor, offset + length1, length2)
569
557
  else:
570
- length1 = torch.numel(p._data) * p._data.element_size()
571
- p._data = _move_to_pinned_tensor(p._data, current_big_tensor, offset, length1)
572
558
  length2 = torch.numel(p._scale) * p._scale.element_size()
573
559
  p._scale = _move_to_pinned_tensor(p._scale, current_big_tensor, offset + length1, length2)
560
+ length3 = torch.numel(p._shift) * p._shift.element_size()
561
+ p._shift = _move_to_pinned_tensor(p._shift, current_big_tensor, offset + length1 + length2, length3)
574
562
  else:
575
- length = torch.numel(p.data) * p.data.element_size()
576
- p.data = _move_to_pinned_tensor(p.data, current_big_tensor, offset, length)
563
+ length1 = torch.numel(p._data) * p._data.element_size()
564
+ p._data = _move_to_pinned_tensor(p._data, current_big_tensor, offset, length1)
565
+ length2 = torch.numel(p._scale) * p._scale.element_size()
566
+ p._scale = _move_to_pinned_tensor(p._scale, current_big_tensor, offset + length1, length2)
567
+ else:
568
+ length = torch.numel(p.data) * p.data.element_size()
569
+ p.data = _move_to_pinned_tensor(p.data, current_big_tensor, offset, length)
577
570
  tensor_no += 1
578
571
  del p
579
572
  model._pinned_bytes = total
@@ -583,9 +576,9 @@ def _pin_to_memory(model, model_id, partialPinning = False, pinnedPEFTLora = Tru
583
576
 
584
577
  if verboseLevel >=1:
585
578
  if partialPinning or failed_planned_allocation:
586
- print(f"The model was partially pinned to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
579
+ print(f"The model was partially pinned to reserved RAM: {last_allocated_big_tensor + 1} large blocks spread across {total/ONE_MB:.2f} MB")
587
580
  else:
588
- print(f"The whole model was pinned to reserved RAM: {last_big_tensor} large blocks spread across {total/ONE_MB:.2f} MB")
581
+ print(f"The whole model was pinned to reserved RAM: {last_allocated_big_tensor + 1} large blocks spread across {total/ONE_MB:.2f} MB")
589
582
 
590
583
  model._already_pinned = True
591
584
 
@@ -598,7 +591,7 @@ def _welcome():
598
591
  if welcome_displayed:
599
592
  return
600
593
  welcome_displayed = True
601
- print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.3.1) by DeepBeepMeep ************{ENDC}{UNBOLD}")
594
+ print(f"{BOLD}{HEADER}************ Memory Management for the GPU Poor (mmgp 3.3.3) by DeepBeepMeep ************{ENDC}{UNBOLD}")
602
595
 
603
596
  def _extract_num_from_str(num_in_str):
604
597
  size = len(num_in_str)
@@ -932,7 +925,7 @@ def load_loras_into_model(model, lora_path, lora_multi = None, activate_all_lora
932
925
  continue
933
926
  fail = False
934
927
  skip = False
935
- state_dict = safetensors2.torch_load_file(path)
928
+ state_dict = safetensors2.torch_load_file(path, writable_tensors= False)
936
929
 
937
930
 
938
931
 
@@ -1151,7 +1144,7 @@ def move_loras_to_device(model, device="cpu" ):
1151
1144
  if ".lora_" in k:
1152
1145
  m.to(device)
1153
1146
 
1154
- def fast_load_transformers_model(model_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, modelClass=None, modelPrefix = None, verboseLevel = -1):
1147
+ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, forcedConfigPath = None, modelClass=None, modelPrefix = None, writable_tensors = True, verboseLevel = -1):
1155
1148
  """
1156
1149
  quick version of .LoadfromPretrained of the transformers library
1157
1150
  used to build a model and load the corresponding weights (quantized or not)
@@ -1167,7 +1160,7 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizat
1167
1160
 
1168
1161
  verboseLevel = _compute_verbose_level(verboseLevel)
1169
1162
 
1170
- with safetensors2.safe_open(model_path) as f:
1163
+ with safetensors2.safe_open(model_path, writable_tensors =writable_tensors) as f:
1171
1164
  metadata = f.metadata()
1172
1165
 
1173
1166
  if metadata is None:
@@ -1231,13 +1224,13 @@ def fast_load_transformers_model(model_path: str, do_quantize = False, quantizat
1231
1224
 
1232
1225
  model._config = transformer_config
1233
1226
 
1234
- load_model_data(model,model_path, do_quantize = do_quantize, quantizationType = quantizationType, pinToMemory= pinToMemory, partialPinning= partialPinning, modelPrefix = modelPrefix, verboseLevel=verboseLevel )
1227
+ load_model_data(model,model_path, do_quantize = do_quantize, quantizationType = quantizationType, pinToMemory= pinToMemory, partialPinning= partialPinning, modelPrefix = modelPrefix, writable_tensors =writable_tensors ,verboseLevel=verboseLevel )
1235
1228
 
1236
1229
  return model
1237
1230
 
1238
1231
 
1239
1232
 
1240
- def load_model_data(model, file_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, modelPrefix = None, verboseLevel = -1):
1233
+ def load_model_data(model, file_path: str, do_quantize = False, quantizationType = qint8, pinToMemory = False, partialPinning = False, modelPrefix = None, writable_tensors = True, verboseLevel = -1):
1241
1234
  """
1242
1235
  Load a model, detect if it has been previously quantized using quanto and do the extra setup if necessary
1243
1236
  """
@@ -1275,7 +1268,7 @@ def load_model_data(model, file_path: str, do_quantize = False, quantizationType
1275
1268
  if "module" in state_dict:
1276
1269
  state_dict = state_dict["module"]
1277
1270
  else:
1278
- state_dict, metadata = _safetensors_load_file(file_path)
1271
+ state_dict, metadata = _safetensors_load_file(file_path, writable_tensors =writable_tensors)
1279
1272
 
1280
1273
  if metadata is None:
1281
1274
  quantization_map = None
@@ -1470,7 +1463,6 @@ class offload:
1470
1463
  def __init__(self):
1471
1464
  self.active_models = []
1472
1465
  self.active_models_ids = []
1473
- self.active_subcaches = {}
1474
1466
  self.models = {}
1475
1467
  self.cotenants_map = {
1476
1468
  "text_encoder": ["vae", "text_encoder_2"],
@@ -1732,7 +1724,6 @@ class offload:
1732
1724
 
1733
1725
  self.active_models = []
1734
1726
  self.active_models_ids = []
1735
- self.active_subcaches = []
1736
1727
  torch.cuda.empty_cache()
1737
1728
  gc.collect()
1738
1729
  self.last_reserved_mem_check = time.time()
@@ -2051,20 +2042,23 @@ class offload:
2051
2042
  last_offload_obj = None
2052
2043
 
2053
2044
  self.unload_all()
2054
- self.default_stream = None
2045
+ self.active_models = None
2046
+ self.default_stream = None
2047
+ self.transfer_stream = None
2048
+ self.parameters_ref = None
2055
2049
  keys= [k for k in self.blocks_of_modules.keys()]
2056
2050
  for k in keys:
2057
2051
  del self.blocks_of_modules[k]
2058
2052
 
2059
2053
  self.blocks_of_modules = None
2060
2054
 
2061
-
2062
2055
  for model_id, model in self.models.items():
2063
2056
  move_loras_to_device(model, "cpu")
2064
2057
  if hasattr(model, "_pinned_bytes"):
2065
2058
  total_pinned_bytes -= model._pinned_bytes
2066
2059
  if hasattr(model, "_loras_model_data"):
2067
2060
  unload_loras_from_model(model)
2061
+ model = None
2068
2062
 
2069
2063
  self.models = None
2070
2064
 
@@ -2074,7 +2068,7 @@ class offload:
2074
2068
 
2075
2069
 
2076
2070
 
2077
- def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, partialPinning = False, loras = None, quantizeTransformer = True, extraModelsToQuantize = None, quantizationType = qint8, budgets= 0, workingVRAM = None, asyncTransfers = True, compile = False, perc_reserved_mem_max = 0, coTenantsMap = None, verboseLevel = -1):
2071
+ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, partialPinning = False, loras = None, quantizeTransformer = True, extraModelsToQuantize = None, quantizationType = qint8, budgets= 0, workingVRAM = None, asyncTransfers = True, compile = False, convertFloatToBfloat16 = True, perc_reserved_mem_max = 0, coTenantsMap = None, verboseLevel = -1):
2078
2072
  """Hook to a pipeline or a group of modules in order to reduce their VRAM requirements:
2079
2073
  pipe_or_dict_of_modules : the pipeline object or a dictionary of modules of the model
2080
2074
  quantizeTransformer: set True by default will quantize on the fly the video / image model
@@ -2181,7 +2175,6 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
2181
2175
  modelPinned = (pinAllModels or model_id in modelsToPin) and not hasattr(current_model,"_already_pinned")
2182
2176
 
2183
2177
  current_model_size = 0
2184
-
2185
2178
  for n, p in current_model.named_parameters():
2186
2179
  p.requires_grad = False
2187
2180
  if isinstance(p, QTensor):
@@ -2201,7 +2194,7 @@ def all(pipe_or_dict_of_modules, pinnedMemory = False, pinnedPEFTLora = False, p
2201
2194
  current_model_size += torch.numel(p._data) * p._data.element_size()
2202
2195
 
2203
2196
  else:
2204
- if p.data.dtype == torch.float32:
2197
+ if convertFloatToBfloat16 and p.data.dtype == torch.float32:
2205
2198
  # convert any left overs float32 weight to bloat16 to divide by 2 the model memory footprint
2206
2199
  p.data = p.data.to(torch.bfloat16)
2207
2200
  current_model_size += torch.numel(p.data) * p.data.element_size()
mmgp/safetensors2.py CHANGED
@@ -1,4 +1,4 @@
1
- # ------------------ Safetensors2 1.0 by DeepBeepMeep (mmgp)------------------
1
+ # ------------------ Safetensors2 1.1 by DeepBeepMeep (mmgp)------------------
2
2
  #
3
3
  # This module entirely written in Python is a replacement for the safetensor library which requires much less RAM to load models.
4
4
  # It can be conveniently used to keep a low RAM consumption when handling transit data (for instance when quantizing or transferring tensors to reserver RAM)
@@ -16,12 +16,14 @@ import safetensors
16
16
  import accelerate
17
17
  import os
18
18
  from collections import OrderedDict
19
+ import warnings
19
20
 
21
+ warnings.filterwarnings("ignore", ".*The given buffer is not writable, and PyTorch does not support non-writable tensors*")
20
22
 
21
23
  _old_torch_load_file = None
22
24
  _old_safe_open = None
23
25
 
24
-
26
+ all_tensors_are_read_only = False
25
27
 
26
28
  mmm = {}
27
29
  verboseLevel = 1
@@ -232,7 +234,7 @@ def torch_write_file(sd, file_path, quantization_map = None, config = None, extr
232
234
  class SafeTensorFile:
233
235
  """Main class for accessing safetensors files that provides memory-efficient access"""
234
236
 
235
- def __init__(self, file_path, metadata, catalog, skip_bytes, lazy_loading = True):
237
+ def __init__(self, file_path, metadata, catalog, skip_bytes, lazy_loading = True, writable_tensors = True):
236
238
  self._file_path = file_path
237
239
  self._metadata = metadata
238
240
  self._catalog = catalog
@@ -241,19 +243,20 @@ class SafeTensorFile:
241
243
  self.sd = None
242
244
  self.mtracker = None
243
245
  self.lazy_loading = lazy_loading
246
+ self.writable_tensors = writable_tensors
244
247
 
245
248
  @classmethod
246
- def load_metadata(cls, file_path, lazy_loading = True):
249
+ def load_metadata(cls, file_path, lazy_loading = True, writable_tensors = True):
247
250
  with open(file_path, 'rb') as f:
248
251
  catalog, metadata, skip_bytes = _read_safetensors_header(file_path, f)
249
252
 
250
- return cls(file_path, metadata, catalog, skip_bytes, lazy_loading)
253
+ return cls(file_path, metadata, catalog, skip_bytes, lazy_loading, writable_tensors )
251
254
 
252
- def init_tensors(self, lazyTensors = True):
255
+ def init_tensors(self, lazyTensors = True, writable_tensors = True):
253
256
  if self.sd is None:
254
257
  self.lazy_loading = lazyTensors
255
258
  if lazyTensors:
256
- self.sd = self.create_tensors_with_mmap()
259
+ self.sd = self.create_tensors_with_mmap(writable_tensors)
257
260
  else:
258
261
  self.sd = self.create_tensors_without_mmap()
259
262
  # else:
@@ -263,7 +266,7 @@ class SafeTensorFile:
263
266
  return self.sd
264
267
 
265
268
 
266
- def create_tensors_with_mmap(self):
269
+ def create_tensors_with_mmap(self, writable_tensors = True):
267
270
 
268
271
  self.mtracker = MmapTracker(self._file_path)
269
272
  import mmap
@@ -302,7 +305,7 @@ class SafeTensorFile:
302
305
  with open(self._file_path, 'rb') as f:
303
306
  i = 0
304
307
  for map_start, map_size in maps_info:
305
- mm = mmap.mmap(f.fileno(), map_size, offset=map_start, access=mmap.ACCESS_COPY) #.ACCESS_READ
308
+ mm = mmap.mmap(f.fileno(), map_size, offset=map_start, access= mmap.ACCESS_COPY if writable_tensors else mmap.ACCESS_READ)
306
309
  maps.append((mm, map_start, map_size))
307
310
  self.mtracker.register(mm, i, map_start, map_size)
308
311
  i = i+ 1
@@ -359,7 +362,7 @@ class SafeTensorFile:
359
362
  def get_tensor(self, name: str) -> torch.tensor:
360
363
  """Get a tensor by name"""
361
364
  # To do : switch to a JIT tensor creation per tensor
362
- self.init_tensors()
365
+ self.init_tensors(self.lazy_loading, writable_tensors= self.writable_tensors)
363
366
  return self.sd[name]
364
367
 
365
368
  def keys(self) -> List[str]:
@@ -374,7 +377,7 @@ class SafeTensorFile:
374
377
 
375
378
  def tensors(self) -> Dict[str, torch.tensor]:
376
379
  """Get dictionary of all tensors"""
377
- self.init_tensors(self.lazy_loading)
380
+ self.init_tensors(self.lazy_loading, writable_tensors= self.writable_tensors)
378
381
  return self.sd
379
382
 
380
383
  def metadata(self) -> Optional[Dict[str, str]]:
@@ -383,7 +386,7 @@ class SafeTensorFile:
383
386
 
384
387
  def __len__(self) -> int:
385
388
  """Get number of tensors"""
386
- self.init_tensors(self.lazy_loading)
389
+ self.init_tensors(self.lazy_loading, writable_tensors= self.writable_tensors)
387
390
  return len(self.keys())
388
391
 
389
392
  def __contains__(self, key: str) -> bool:
@@ -401,17 +404,22 @@ class SafeTensorFile:
401
404
  class _SafeTensorLoader:
402
405
  """Context manager for loading SafeTensorFile"""
403
406
 
404
- def __init__(self, filename: str ):
407
+ def __init__(self, filename: str, writable_tensors = True ):
405
408
  self.filename = Path(filename)
409
+ self.writable_tensors = writable_tensors
406
410
  self.sft = None
407
411
  if not self.filename.exists():
408
412
  raise FileNotFoundError(f"File not found: {filename}")
409
413
 
410
414
  def __enter__(self) -> SafeTensorFile:
411
415
  """Open file and return SafeTensorFile instance"""
412
-
416
+ writable_tensors = self.writable_tensors
417
+
418
+ if all_tensors_are_read_only:
419
+ writable_tensors = False
420
+
413
421
  try:
414
- self.sft = SafeTensorFile.load_metadata(self.filename)
422
+ self.sft = SafeTensorFile.load_metadata(self.filename, writable_tensors= writable_tensors)
415
423
  return self.sft
416
424
 
417
425
  except Exception as e:
@@ -428,14 +436,14 @@ class _SafeTensorLoader:
428
436
  pass
429
437
 
430
438
 
431
- def safe_open(filename: str, framework: str = "pt",device = "cpu") -> _SafeTensorLoader:
439
+ def safe_open(filename: str, framework: str = "pt",device = "cpu", writable_tensors = True) -> _SafeTensorLoader:
432
440
  if device != "cpu" or framework !="pt":
433
441
  return _old_safe_open(filename =filename, framework=framework, device=device)
434
- return _SafeTensorLoader(filename)
442
+ return _SafeTensorLoader(filename, writable_tensors = writable_tensors)
435
443
 
436
- def torch_load_file( filename, device = 'cpu' ) -> Dict[str, torch.Tensor]:
444
+ def torch_load_file( filename, device = 'cpu', writable_tensors = True) -> Dict[str, torch.Tensor]:
437
445
  sd = {}
438
- with safe_open(filename, framework="pt", device = device ) as f:
446
+ with safe_open(filename, framework="pt", device = device, writable_tensors =writable_tensors ) as f:
439
447
  for k in f.keys():
440
448
  sd[k] = f.get_tensor(k)
441
449
  return sd
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mmgp
3
- Version: 3.3.1
3
+ Version: 3.3.3
4
4
  Summary: Memory Management for the GPU Poor
5
5
  Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
6
6
  License: GNU GENERAL PUBLIC LICENSE
@@ -17,7 +17,7 @@ Dynamic: license-file
17
17
 
18
18
 
19
19
  <p align="center">
20
- <H2>Memory Management 3.3.1 for the GPU Poor by DeepBeepMeep</H2>
20
+ <H2>Memory Management 3.3.3 for the GPU Poor by DeepBeepMeep</H2>
21
21
  </p>
22
22
 
23
23
 
@@ -0,0 +1,9 @@
1
+ __init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
3
+ mmgp/offload.py,sha256=R0UbOXEGAFKd_6090o8v5CkVmJiWmHDQsww7A3-LZEU,106550
4
+ mmgp/safetensors2.py,sha256=rmUbBmK3Dra5prUTTRSVi6-XUFAa9Mj6B5CNPgzt9To,17333
5
+ mmgp-3.3.3.dist-info/licenses/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
6
+ mmgp-3.3.3.dist-info/METADATA,sha256=xcODp7uhIfvy7Il1xEp8ed2VYmH1Eln-EnLy3MM4VGM,16153
7
+ mmgp-3.3.3.dist-info/WHEEL,sha256=DK49LOLCYiurdXXOXwGJm6U4DkHkg4lcxjhqwRa0CP4,91
8
+ mmgp-3.3.3.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
9
+ mmgp-3.3.3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (77.0.3)
2
+ Generator: setuptools (78.0.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,9 +0,0 @@
1
- __init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
3
- mmgp/offload.py,sha256=hG-gMFeHsRkjaPan_lwiTsQOctkXylJMiWhyL3KvGQA,106337
4
- mmgp/safetensors2.py,sha256=DCdlRH3769CTyraAmWAB3b0XrVua7z6ygQ-OyKgJN6A,16453
5
- mmgp-3.3.1.dist-info/licenses/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
6
- mmgp-3.3.1.dist-info/METADATA,sha256=SF0kLwi8zGHF1F53ZxFDZq5bDCWE39l-A24tYeyyhHo,16153
7
- mmgp-3.3.1.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
8
- mmgp-3.3.1.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
9
- mmgp-3.3.1.dist-info/RECORD,,