mmgp 2.0.4__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mmgp might be problematic. Click here for more details.

mmgp/safetensors2.py ADDED
@@ -0,0 +1,387 @@
1
+ from typing import Optional, Dict, List, Iterator, Tuple
2
+ from pathlib import Path
3
+ import torch
4
+ import mmap
5
+ import struct
6
+ import json
7
+ import base64
8
+ import safetensors
9
+ import accelerate
10
+ import os
11
+ from collections import OrderedDict
12
+
13
+
14
+ _old_torch_load_file = None
15
+ _old_safe_open = None
16
+
17
+
18
+
19
+ mmm = {}
20
+ verboseLevel = 1
21
+
22
+ import weakref
23
+
24
+ _map_to_dtype = { 'BF16': torch.bfloat16, 'U8': torch.uint8 , 'U16': torch.uint16, 'U32' : torch.uint32 , 'U64' : torch.uint64,
25
+ 'I8': torch.int8, 'I16': torch.int16, 'I32' : torch.int32 , 'I64' : torch.int64,
26
+ 'F64' : torch.float64, 'F32': torch.float32, 'F16': torch.float16, 'BOOL' : torch.bool, "F8_E5M2" : torch.float8_e5m2, "F8_E4M3" : torch.float8_e4m3fn }
27
+
28
+
29
+ class MmapTracker:
30
+ def __init__(self, file_path):
31
+ self._maps = {}
32
+ self._already_released = 0
33
+ from pathlib import Path
34
+ s = Path(file_path).parts
35
+ if len(s)>2:
36
+ s = s[-2:]
37
+ file_path = os.path.join(*s)
38
+ self.file_path = file_path # os.path.abspath(file_path)
39
+ self.count = 0
40
+ mmm[file_path] = self
41
+
42
+ def register(self, mmap_obj, map_id, start, size):
43
+
44
+ self.count += 1
45
+ def finalizer(ref):
46
+ self._already_released += 1
47
+ if verboseLevel >=2:
48
+ if self.count == self._already_released:
49
+ text =" (all the mmaps have been released)"
50
+ else:
51
+ text =f" ({self.count-self._already_released:} left)"
52
+
53
+ print(f"MMap Manager of file '{self.file_path}' : MMap no {map_id} has been released" + text)
54
+ if self.count == self._already_released:
55
+ del mmm[self.file_path]
56
+
57
+ self._maps.pop(map_id, None)
58
+
59
+ wr = weakref.ref(mmap_obj, finalizer)
60
+ self._maps[map_id] = {
61
+ 'mmap' : wr,
62
+ 'start': start,
63
+ 'size': size,
64
+ 'end': start + size
65
+ }
66
+ return wr
67
+
68
+ def get_active_maps(self):
69
+ return dict(self._maps)
70
+
71
+
72
+ class cached_metadata:
73
+ file_path = None
74
+ file_length = 0
75
+ file_date = None
76
+ catalog = None
77
+ metadata = None
78
+ skip_bytes = 0
79
+
80
+ def __init__(self, file_path, catalog, metadata, skip_bytes):
81
+ self.catalog = catalog
82
+ self.metadata = metadata
83
+ self.skip_bytes = skip_bytes
84
+ file_stats = os.stat(file_path)
85
+ self.file_path = os.path.abspath(file_path)
86
+ self.file_length = file_stats.st_size
87
+ self.file_date = file_stats.st_ctime
88
+
89
+ def get_metadata(self, file_path):
90
+ file_stats = os.stat(file_path)
91
+ file_length = file_stats.st_size
92
+ file_date = file_stats.st_ctime
93
+ file_path = os.path.abspath(file_path)
94
+ if self.file_path != file_path or self.file_length != file_length or self.file_date != file_date:
95
+ return None, None, None
96
+ return self.catalog, self.metadata, self.skip_bytes
97
+
98
+ _cached_entry = None # ideally we should create a dict of the last n entries but one entry covers most cases
99
+
100
+ def _parse_metadata(metadata):
101
+ if metadata == None:
102
+ return None
103
+
104
+ new_metadata= {}
105
+
106
+ for k,v in metadata.items():
107
+ if k.endswith("_base64"):
108
+ v_decoded = json.loads(base64.b64decode(v.encode('utf8')).decode('utf8'))
109
+ p = k.rfind("_")
110
+ new_k = k[:p]
111
+ new_metadata[new_k]= v_decoded
112
+ else:
113
+ new_metadata[k] = v
114
+
115
+ return new_metadata
116
+
117
+ def _read_safetensors_header(path, file):
118
+ global _cached_entry
119
+ length_of_header_bytes = file.read(8)
120
+ # Interpret the bytes as a little-endian unsigned 64-bit integer
121
+ length_of_header = struct.unpack('<Q', length_of_header_bytes)[0]
122
+
123
+ if _cached_entry != None:
124
+ catalog, metadata, _ = _cached_entry.get_metadata(path)
125
+ else:
126
+ catalog = None
127
+
128
+ if catalog == None:
129
+ header_bytes = file.read(length_of_header)
130
+ #catalog = json.loads(header_bytes.decode('utf-8'))
131
+ catalog = json.loads(header_bytes)
132
+ metadata = catalog.pop("__metadata__", None)
133
+ metadata = _parse_metadata(metadata)
134
+
135
+ _cached_entry = cached_metadata(path, catalog, metadata,length_of_header )
136
+ else:
137
+ file.seek(length_of_header, 1)
138
+
139
+ return catalog, metadata, length_of_header + 8
140
+
141
+
142
+ def torch_write_file(sd, file_path, quantization_map = None, config = None):
143
+ from collections import OrderedDict
144
+ sf_sd = OrderedDict()
145
+
146
+ map = { torch.bfloat16 : 'BF16' , torch.int64 : 'I64' , torch.int32 : 'I32' , torch.int16 : 'I16' , torch.int8 : 'I8' ,
147
+ torch.uint64 : 'U64' , torch.uint32 : 'U32' , torch.uint16 : 'U16' , torch.uint8 : 'U8' ,
148
+ torch.bool : 'BOOL' , torch.float64 : 'F64' , torch.float32 : 'F32' , torch.float16 : 'F16', torch.float8_e5m2 : "F8_E5M2", torch.float8_e4m3fn: "F8_E4M3" }
149
+ pos = 0
150
+ i = 0
151
+ mx = 1000000
152
+ for k , t in sd.items():
153
+ entry = {}
154
+ dtypestr= map[t.dtype]
155
+ entry["dtype"] = dtypestr
156
+ entry["shape"] = list(t.shape)
157
+ size = torch.numel(t) * t.element_size()
158
+ entry["data_offsets"] = [pos, pos + size]
159
+ pos += size
160
+ sf_sd[k] = entry
161
+ i+=1
162
+ if i==mx:
163
+ break
164
+ metadata = dict()
165
+ if not quantization_map is None:
166
+ metadata["quantization_format"] = "quanto"
167
+ metadata["quantization_map_base64"] = base64.b64encode(json.dumps(quantization_map, ensure_ascii=False).encode('utf8')).decode('utf8')
168
+
169
+ if not config is None:
170
+ metadata["config_base64"] = base64.b64encode(json.dumps(config, ensure_ascii=False).encode('utf8')).decode('utf8')
171
+
172
+ if len(metadata) > 0:
173
+ sf_sd["__metadata__"] = metadata
174
+
175
+ header_bytes = json.dumps(sf_sd).encode()
176
+ #header_bytes =json.dumps(config, ensure_ascii=False).encode('utf8')
177
+ size_header = len(header_bytes)
178
+ import struct
179
+
180
+ length_of_header_bytes = struct.pack('<Q', size_header)
181
+
182
+ empty_tensor = b'\x80\x3f'
183
+
184
+ with open(file_path, "wb") as writer:
185
+ bytes_written = writer.write(length_of_header_bytes)
186
+ bytes_written = writer.write(header_bytes)
187
+
188
+ i = 0
189
+ for k , t in sd.items():
190
+ size = torch.numel(t) * t.element_size()
191
+ if len(t.shape) == 0:
192
+ bytes_written = writer.write(empty_tensor)
193
+ else:
194
+ buffer = t.view(torch.uint8).numpy().tobytes()
195
+ bytes_written = writer.write(buffer)
196
+ assert bytes_written == size
197
+ i+=1
198
+ if i==mx:
199
+ break
200
+
201
+ class SafeTensorFile:
202
+ """Main class for accessing safetensors files that provides memory-efficient access"""
203
+
204
+ def __init__(self, file_path, metadata, catalog, skip_bytes):
205
+ self._file_path = file_path
206
+ self._metadata = metadata
207
+ self._catalog = catalog
208
+ self._skip_bytes = skip_bytes
209
+ self._keys = None
210
+ self.sd = None
211
+ self.mtracker = None
212
+
213
+ @classmethod
214
+ def load_metadata(cls, file_path):
215
+ with open(file_path, 'rb') as f:
216
+ catalog, metadata, skip_bytes = _read_safetensors_header(file_path, f)
217
+
218
+ return cls(file_path, metadata, catalog, skip_bytes)
219
+
220
+ def init_tensors(self):
221
+ if self.sd is None:
222
+ self.sd = self.create_tensors()
223
+ return self.sd
224
+
225
+ def create_tensors(self):
226
+
227
+ self.mtracker = MmapTracker(self._file_path)
228
+ import mmap
229
+
230
+ PAGE_SIZE = mmap.ALLOCATIONGRANULARITY
231
+ MMAP_SIZE = 1024 * 1024 * 1024 # 1GB
232
+
233
+ # First pass: find optimal aligned map boundaries
234
+ skip_bytes = self._skip_bytes
235
+ tensor_map_indexes = []
236
+ maps_info = []
237
+ current_pos = skip_bytes
238
+ current_map_start = (skip_bytes // PAGE_SIZE) * PAGE_SIZE
239
+ current_map_size = skip_bytes - current_map_start
240
+ idx = 0
241
+ for k,v in self._catalog.items():
242
+ data_offsets = v["data_offsets"]
243
+ length = data_offsets[1]-data_offsets[0]
244
+ if current_map_size + length > MMAP_SIZE:
245
+ maps_info.append((current_map_start, current_map_size))
246
+ current_map_start = (current_pos // PAGE_SIZE) * PAGE_SIZE
247
+ current_map_size = current_pos - current_map_start
248
+ idx += 1
249
+ tensor_map_indexes.append(idx)
250
+ current_map_size += length
251
+ current_pos += length
252
+
253
+ maps_info.append((current_map_start, current_map_size))
254
+
255
+ # Second pass: create maps and tensors
256
+ maps = []
257
+ sd = OrderedDict()
258
+
259
+ current_pos = skip_bytes
260
+ with open(self._file_path, 'rb') as f:
261
+ i = 0
262
+ for map_start, map_size in maps_info:
263
+ mm = mmap.mmap(f.fileno(), map_size, offset=map_start, access=mmap.ACCESS_COPY) #.ACCESS_READ
264
+ maps.append((mm, map_start, map_size))
265
+ self.mtracker.register(mm, i, map_start, map_size)
266
+ i = i+ 1
267
+
268
+ iter_tensor_no = iter(tensor_map_indexes)
269
+ for k,v in self._catalog.items():
270
+ dtypestr = v["dtype"]
271
+ dtype= _map_to_dtype[dtypestr]
272
+ shape = v["shape"]
273
+ data_offsets = v["data_offsets"]
274
+ length = data_offsets[1]-data_offsets[0]
275
+ map_idx = next(iter_tensor_no)
276
+ offset = current_pos - maps[map_idx][1]
277
+ if len(shape) == 0:
278
+ t = torch.ones((), dtype=dtype, device="cpu")
279
+ else:
280
+ mv = memoryview(maps[map_idx][0])[offset:offset + length]
281
+ t = torch.frombuffer(mv, dtype=dtype)
282
+ t = torch.reshape(t, shape)
283
+ # t._mmap = maps[map_idx][0]
284
+ sd[k] = t
285
+ current_pos += length
286
+
287
+ return sd
288
+
289
+ def get_tensor(self, name: str) -> torch.tensor:
290
+ """Get a tensor by name"""
291
+ self.init_tensors()
292
+ return self.sd[name]
293
+
294
+ def keys(self) -> List[str]:
295
+ """Get list of tensor names"""
296
+ if self._keys is None:
297
+ self._keys = list(self._catalog)
298
+ return self._keys
299
+
300
+ def names(self) -> List[str]:
301
+ """Alias for keys()"""
302
+ return self.keys()
303
+
304
+ def tensors(self) -> Dict[str, torch.tensor]:
305
+ """Get dictionary of all tensors"""
306
+ self.init_tensors()
307
+ return self.sd
308
+
309
+ def metadata(self) -> Optional[Dict[str, str]]:
310
+ """Get metadata dictionary"""
311
+ return self._metadata
312
+
313
+ def __len__(self) -> int:
314
+ """Get number of tensors"""
315
+ self.init_tensors()
316
+ return len(self.keys())
317
+
318
+ def __contains__(self, key: str) -> bool:
319
+ """Check if tensor exists"""
320
+ return key in self.keys()
321
+
322
+ def __iter__(self) -> Iterator[Tuple[str, torch.tensor ]]:
323
+ """Iterate over (name, tensor) pairs"""
324
+ return ((name, self.get_tensor(name)) for name in self.keys())
325
+
326
+ def _free_resources(self):
327
+ del self.sd
328
+ del self._catalog
329
+
330
+ class _SafeTensorLoader:
331
+ """Context manager for loading SafeTensorFile"""
332
+
333
+ def __init__(self, filename: str):
334
+ self.filename = Path(filename)
335
+ self.sft = None
336
+
337
+ if not self.filename.exists():
338
+ raise FileNotFoundError(f"File not found: {filename}")
339
+
340
+ def __enter__(self) -> SafeTensorFile:
341
+ """Open file and return SafeTensorFile instance"""
342
+
343
+ try:
344
+ self.sft = SafeTensorFile.load_metadata(self.filename)
345
+ return self.sft
346
+
347
+ except Exception as e:
348
+ self.close()
349
+ raise Exception(f"Failed to load safetensors file: {e}") from e
350
+
351
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
352
+ """Clean up resources"""
353
+ self.close()
354
+
355
+ def close(self) -> None:
356
+ if self.sft != None:
357
+ self.sft._free_resources()
358
+ pass
359
+
360
+
361
+ def safe_open(filename: str, framework: str = "pt",device = "cpu") -> _SafeTensorLoader:
362
+ if device != "cpu" or framework !="pt":
363
+ pass
364
+ return _old_safe_open(filename =filename, framework=framework, device=device)
365
+ return _SafeTensorLoader(filename)
366
+
367
+ def torch_load_file( filename, device = 'cpu' ) -> Dict[str, torch.Tensor]:
368
+ sd = {}
369
+ with safe_open(filename, framework="pt", device = device ) as f:
370
+ for k in f.keys():
371
+ sd[k] = f.get_tensor(k)
372
+ return sd
373
+
374
+ _old_torch_load_file = safetensors.torch.load_file
375
+ safetensors.torch.load_file = torch_load_file
376
+ _old_safe_open = safetensors.safe_open
377
+ safetensors.safe_open = safe_open
378
+ accelerate.utils.modeling.safe_open = safe_open
379
+ accelerate.utils.modeling.safe_load_file = torch_load_file
380
+ try:
381
+ import transformers
382
+ transformers.modeling_utils.safe_open = safe_open
383
+ transformers.modeling_utils.safe_load_file = torch_load_file
384
+ except:
385
+ pass
386
+
387
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mmgp
3
- Version: 2.0.4
3
+ Version: 3.0.0
4
4
  Summary: Memory Management for the GPU Poor
5
5
  Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
6
6
  License: GNU GENERAL PUBLIC LICENSE
@@ -11,10 +11,12 @@ License-File: LICENSE.md
11
11
  Requires-Dist: torch>=2.1.0
12
12
  Requires-Dist: optimum-quanto
13
13
  Requires-Dist: accelerate
14
+ Requires-Dist: safetensors
15
+ Requires-Dist: psutil
14
16
 
15
17
 
16
18
  <p align="center">
17
- <H2>Memory Management 2.0 for the GPU Poor by DeepBeepMeep</H2>
19
+ <H2>Memory Management 3.0 for the GPU Poor by DeepBeepMeep</H2>
18
20
  </p>
19
21
 
20
22
 
@@ -26,15 +28,16 @@ Requirements:
26
28
  - VRAM: minimum 12 GB, recommended 24 GB (RTX 3090/ RTX 4090)
27
29
  - RAM: minimum 24 GB, recommended 48 GB
28
30
 
29
- This module features 5 profiles in order to able to run the model at a decent speed on a low end consumer config (32 GB of RAM and 12 VRAM) and to run it at a very good speed on a high end consumer config (48 GB of RAM and 24 GB of VRAM).
31
+ This module features 5 profiles in order to able to run the model at a decent speed on a low end consumer config (32 GB of RAM and 12 VRAM) and to run it at a very good speed on a high end consumer config (48 GB of RAM and 24 GB of VRAM).\
32
+ These RAM requirements are for Linux systems. Due to different memory management Windows will require an extra 16 GB of RAM to run the corresponding profile.
30
33
 
31
34
  Each profile may use the following:
32
- - Smart preloading of models in RAM to reduce RAM requirements
35
+ - Low RAM consumption (thanks to a rewritten safetensors library) that allows low RAM on the fly quantization
33
36
  - Smart automated loading / unloading of models in the GPU to avoid unloading models that may be needed again soon
34
37
  - Smart slicing of models to reduce memory occupied by models in the VRAM
35
- - Ability to pin models in reserved RAM to accelerate transfers to VRAM
38
+ - Ability to pin models to reserved RAM to accelerate transfers to VRAM
36
39
  - Async transfers to VRAM to avoid a pause when loading a new slice of a model
37
- - Automated on the fly quantization or ability to load quantized models
40
+ - Automated on the fly quantization or ability to load pre quantized models
38
41
 
39
42
  ## Installation
40
43
  First you need to install the module in your current project with:
@@ -67,33 +70,48 @@ You can choose between 5 profiles depending on your hardware:
67
70
 
68
71
  By default the 'transformer' will be quantized to 8 bits for all profiles. If you don't want that you may specify the optional parameter *quantizeTransformer = False*.
69
72
 
73
+ Every parameter set automatically by a profile can be overridden with one or multiple parameters accepted by *offload.all* (see below):
74
+ ```
75
+ from mmgp import offload, profile_type
76
+ offload.profile(pipe, profile_type.HighRAM_LowVRAM_Fast, budgets = 1000)
77
+ ```
78
+ If you want to know which parameter are set by one specific profile you can use the parameter *verboseLevel=2*
79
+
70
80
  ## Alternatively you may want to create your own profile with specific parameters:
71
81
 
72
82
  For example:
73
83
  ```
74
84
  from mmgp import offload
75
- offload.all(pipe, pinInRAM=True, modelsToQuantize = ["text_encoder_2"] )
85
+ offload.all(pipe, pinnedMemory=True, ExtraModelsToQuantize = ["text_encoder_2"] )
76
86
  ```
77
- - pinInRAM: Boolean (for all models) or List of models ids to pin in RAM. Every model pinned in RAM will load much faster (4 times) but this requires more RAM
78
- - modelsToQuantize: list of model ids to quantize on the fly. If the corresponding model is already quantized, this option will be ignored.
87
+ - pinnedMemory: Boolean (for all models) or List of models ids to pin to RAM. Every model pinned to RAM will load much faster (up to 2 times) but this requires more RAM
79
88
  - quantizeTransformer: boolean by default True. The 'transformer' model in the pipe contains usually the video or image generator is by defaut; quantized on the fly by default to 8 bits. If you want to save time on disk and reduce the loading time, you may want to load directly a prequantized model. If you don't want to quantize the image generator, you need to set the option *quantizeTransformer* to *False* to turn off on the fly quantization.
80
- - budgets: either a number in mega bytes (for all models, if 0 unlimited budget) or a dictionary that maps model ids to mega bytes : define the budget in VRAM (in fact the real number is 2.5 this number) that is allocated in VRAM for each model. The smaller this number, the more VRAM left for image data / longer video but also the slower because there will be lots of loading / unloading between the RAM and the VRAM. Turning on the PinInRAM accelerates greatly (4x) small budgets but consumes usually 50% more RAM.
89
+ - extraModelsToQuantize: list of additional modelids of models to quantize on the fly. If the corresponding model is already quantized, this option will be ignored.
90
+ - budgets: either a number in mega bytes (for all models, if 0 unlimited budget) or a dictionary that maps model ids to mega bytes : define the budget in VRAM (in fact the real number is 1.5 this number or 2.5 if asyncTransfers are also enabled) that is allocated in VRAM for each model.
91
+ The smaller this number, the more VRAM left for image data / longer video but also the slower because there will be lots of loading / unloading between the RAM and the VRAM. If model is too big to fit in a budget, it will be broken down in multiples parts that will be unloaded / loaded consequently. The speed of low budget can be increased (up to 2 times) by turning on the options pinnedMemory and asyncTransfers.
92
+ - asyncTransfers: boolean, load to the GPU the next model part while the current part is being processed. This requires twice the budget if any is defined. This may increase speed by 20% (mostly visible on fast modern GPUs).
93
+ - verboseLevel: number between 0 and 2 (1 by default), provides various level of feedback of the different processes
94
+ - perc_reserved_mem_max: a float below 0.5 (or 0 for auto), may be reduced to a lower number if any out of memory is triggered while using pinnedMemory
95
+ - compile (experimental): list of model ids to compile, may accelerate processing (or not) depending on the type of GPU. As of 01/01/2025 it will work only on Linux since compilation relies on Triton which is not yet supported on Windows
81
96
 
97
+ If you are short on RAM and plan to work with quantized models, it is recommended to load pre-quantized models direclty rather than using on the fly quantization (especially on Windows) as due to the way safetensors work almost twice the amount of RAM may be needed for the loading of the model.
82
98
 
83
99
  ## Going further
84
100
 
85
101
  The module includes several tools to package a light version of your favorite video / image generator:
86
102
  - *save_model(model, file_path, do_quantize = False, quantization_type = qint8 )*\
87
103
  Save tensors of a model already loaded in memory in a safetensor format (much faster to reload). You can save it in a quantized format (default qint8 quantization recommended).
88
- If the model is saved in a quantized format, an extra file that ends with '_map.json' will be created and needed to reload the model again.
104
+ The resulting safetensor file will contain extra fields in its metadata such as the quantization map and its configuration, so you will be able to move the file around without files such as *config.json* or *file_map.json*.
105
+ You will need *load_model_data* or *fast_load_transformers_model* to read the file again . You may also load it using the default *safetensor* librar however you will need to provide in the same directory any complementary file that are usually requested (for instance *config.json*)
106
+
107
+ - *load_model_data(model, file_path: str, do_quantize = False, quantization_type = qint8, pinToRAM = False, partialPin = False)*\
108
+ Load the tensors data of a model in RAM of a model already initialized with no data. Detect and handle quantized models saved previously with *save_model*.A model can also be quantized on the fly while being loaded. The model which is loaded can be pinned to RAM while it is loaded, this is more RAM efficient than pinning tensors later using *offline.all* or *offline.profile*
89
109
 
90
- - *load_model_data(model, file_path: str)*\
91
- Load the tensors data of a model in RAM of a model already initialized with no data. Detect and handle quantized models saved previously with save_model.
110
+ - *fast_load_transformers_model(model_path: str, do_quantize = False, quantization_type = qint8, pinToRAM = False, partialPin = False)*\
111
+ Initialize (build the model hierarchy in memory) and fast load the corresponding tensors of a 'transformers' or 'diffusers' library model.
112
+ The advantages over the original *from_pretrained* method is that a full model can fit into a single file with a filename of your choosing (thefore you can have multiple 'transformers' versions of the same model in the same directory) and prequantized models are processed in a transparent way.
113
+ Last but not least, you can also on the fly pin to RAM the whole model or the most important part of it (partialPin = True) in a more efficient way (faster and requires less RAM) than if you did through *offload.all* or *offload.profile*.
92
114
 
93
- - *fast_load_transformers_model(model_path: str)*\
94
- Initialize (build the model hierarchy in memory) and fast load the corresponding tensors of a 'transformers' library model.
95
- The advantages over the original *from_pretrained* method is that the full model can fit into a single file with a filename of your choosing (thefore you can have multiple 'transformers' versions of the same model in the same directory) and prequantized model are processed in a transparent way.
96
- Please note that you need to keep the original file transformers 'config.json' in the same directory.
97
115
 
98
116
 
99
117
  The typical workflow wil be:
@@ -0,0 +1,9 @@
1
+ __init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ mmgp/__init__.py,sha256=tpo6gl8CKe1guWxcJJ5Xwq6OUJfEeFD7Mkw2IKOrq48,592
3
+ mmgp/offload.py,sha256=-1Rn_XtXswjoCmUBBjdhU4e0qUqvDVVnCnjAUmGHwh8,62859
4
+ mmgp/safetensors2.py,sha256=blCnOF1qNJ27vqbiX5jKJxv5vVdvqEtEwdm0KXwbM68,13482
5
+ mmgp-3.0.0.dist-info/LICENSE.md,sha256=DD-WIS0BkPoWJ_8hQO3J8hMP9K_1-dyrYv1YCbkxcDU,94
6
+ mmgp-3.0.0.dist-info/METADATA,sha256=sZ0Sf1ZEXSa72qKrm5jLFRkcufxUPsZd1yu0kwdVPYE,11565
7
+ mmgp-3.0.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
8
+ mmgp-3.0.0.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
9
+ mmgp-3.0.0.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- __init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- mmgp.py,sha256=oPFAP8-l6iaFp1dewl786Esi0XSxq6ivVe2oRtL8kiE,45168
3
- mmgp-2.0.4.dist-info/LICENSE.md,sha256=DD-WIS0BkPoWJ_8hQO3J8hMP9K_1-dyrYv1YCbkxcDU,94
4
- mmgp-2.0.4.dist-info/METADATA,sha256=yNjgGUlTPtX77tlmuv0xk5bYdtYpWS4k64bj1udJ1zU,8738
5
- mmgp-2.0.4.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
6
- mmgp-2.0.4.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
7
- mmgp-2.0.4.dist-info/RECORD,,