mmgp 3.0.9__py3-none-any.whl → 3.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mmgp might be problematic. Click here for more details.
- mmgp/offload.py +697 -583
- mmgp/safetensors2.py +40 -30
- {mmgp-3.0.9.dist-info → mmgp-3.1.1.dist-info}/METADATA +3 -3
- mmgp-3.1.1.dist-info/RECORD +9 -0
- mmgp-3.0.9.dist-info/RECORD +0 -9
- {mmgp-3.0.9.dist-info → mmgp-3.1.1.dist-info}/LICENSE.md +0 -0
- {mmgp-3.0.9.dist-info → mmgp-3.1.1.dist-info}/WHEEL +0 -0
- {mmgp-3.0.9.dist-info → mmgp-3.1.1.dist-info}/top_level.txt +0 -0
mmgp/safetensors2.py
CHANGED
|
@@ -156,19 +156,32 @@ def torch_write_file(sd, file_path, quantization_map = None, config = None):
|
|
|
156
156
|
pos = 0
|
|
157
157
|
i = 0
|
|
158
158
|
mx = 100000
|
|
159
|
+
metadata = dict()
|
|
159
160
|
for k , t in sd.items():
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
161
|
+
if torch.is_tensor(t):
|
|
162
|
+
entry = {}
|
|
163
|
+
dtypestr= map[t.dtype]
|
|
164
|
+
entry["dtype"] = dtypestr
|
|
165
|
+
entry["shape"] = list(t.shape)
|
|
166
|
+
size = torch.numel(t) * t.element_size()
|
|
167
|
+
if size == 0:
|
|
168
|
+
pass
|
|
169
|
+
entry["data_offsets"] = [pos, pos + size]
|
|
170
|
+
pos += size
|
|
171
|
+
sf_sd[k] = entry
|
|
172
|
+
else:
|
|
173
|
+
if isinstance(t, str):
|
|
174
|
+
metadata[k] = t
|
|
175
|
+
else:
|
|
176
|
+
try:
|
|
177
|
+
b64 = base64.b64encode(json.dumps(t, ensure_ascii=False).encode('utf8')).decode('utf8')
|
|
178
|
+
metadata[k + "_base64"] = b64
|
|
179
|
+
except:
|
|
180
|
+
pass
|
|
181
|
+
|
|
168
182
|
i+=1
|
|
169
183
|
if i==mx:
|
|
170
184
|
break
|
|
171
|
-
metadata = dict()
|
|
172
185
|
if not quantization_map is None:
|
|
173
186
|
metadata["quantization_format"] = "quanto"
|
|
174
187
|
metadata["quantization_map_base64"] = base64.b64encode(json.dumps(quantization_map, ensure_ascii=False).encode('utf8')).decode('utf8')
|
|
@@ -192,9 +205,9 @@ def torch_write_file(sd, file_path, quantization_map = None, config = None):
|
|
|
192
205
|
|
|
193
206
|
i = 0
|
|
194
207
|
for k , t in sd.items():
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
if
|
|
208
|
+
if torch.is_tensor(t):
|
|
209
|
+
size = torch.numel(t) * t.element_size()
|
|
210
|
+
if size != 0:
|
|
198
211
|
dtype = t.dtype
|
|
199
212
|
# convert in a friendly format, scalars types not supported by numpy
|
|
200
213
|
if dtype == torch.bfloat16:
|
|
@@ -202,11 +215,8 @@ def torch_write_file(sd, file_path, quantization_map = None, config = None):
|
|
|
202
215
|
elif dtype == torch.float8_e5m2 or dtype == torch.float8_e4m3fn:
|
|
203
216
|
t = t.view(torch.uint8)
|
|
204
217
|
buffer = t.numpy().tobytes()
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
bytes_written = writer.write(buffer)
|
|
208
|
-
assert bytes_written == size
|
|
209
|
-
|
|
218
|
+
bytes_written = writer.write(buffer)
|
|
219
|
+
assert bytes_written == size
|
|
210
220
|
i+=1
|
|
211
221
|
if i==mx:
|
|
212
222
|
break
|
|
@@ -252,6 +262,7 @@ class SafeTensorFile:
|
|
|
252
262
|
|
|
253
263
|
PAGE_SIZE = mmap.ALLOCATIONGRANULARITY
|
|
254
264
|
MMAP_SIZE = 1024 * 1024 * 1024 # 1GB
|
|
265
|
+
# MMAP_SIZE = 256 * 1024 * 1024 # 1GB
|
|
255
266
|
|
|
256
267
|
# First pass: find optimal aligned map boundaries
|
|
257
268
|
skip_bytes = self._skip_bytes
|
|
@@ -297,13 +308,12 @@ class SafeTensorFile:
|
|
|
297
308
|
length = data_offsets[1]-data_offsets[0]
|
|
298
309
|
map_idx = next(iter_tensor_no)
|
|
299
310
|
offset = current_pos - maps[map_idx][1]
|
|
300
|
-
if
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
t = t.view(dtype)
|
|
311
|
+
if length == 0:
|
|
312
|
+
t = torch.empty(shape, dtype=dtype)
|
|
313
|
+
elif len(shape) == 0:
|
|
314
|
+
# don't waste a memory view for a scalar
|
|
315
|
+
t = torch.frombuffer(bytearray(maps[map_idx][0][offset:offset + length]), dtype=torch.uint8)
|
|
316
|
+
t = t.view(dtype)
|
|
307
317
|
else:
|
|
308
318
|
mv = memoryview(maps[map_idx][0])[offset:offset + length]
|
|
309
319
|
t = torch.frombuffer(mv, dtype=dtype)
|
|
@@ -313,6 +323,7 @@ class SafeTensorFile:
|
|
|
313
323
|
current_pos += length
|
|
314
324
|
|
|
315
325
|
return sd
|
|
326
|
+
|
|
316
327
|
|
|
317
328
|
def create_tensors_without_mmap(self):
|
|
318
329
|
sd = OrderedDict()
|
|
@@ -326,12 +337,11 @@ class SafeTensorFile:
|
|
|
326
337
|
data_offsets = v["data_offsets"]
|
|
327
338
|
length = data_offsets[1]-data_offsets[0]
|
|
328
339
|
buffer = f.read(length)
|
|
329
|
-
if
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
t = t.view(dtype)
|
|
340
|
+
if length == 0:
|
|
341
|
+
t = torch.empty(0, dtype=dtype)
|
|
342
|
+
elif len(shape) == 0:
|
|
343
|
+
t = torch.frombuffer(bytearray(buffer), dtype=torch.uint8)
|
|
344
|
+
t = t.view(dtype)
|
|
335
345
|
else:
|
|
336
346
|
t = torch.frombuffer(bytearray(buffer), dtype=dtype)
|
|
337
347
|
t = torch.reshape(t, shape)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: mmgp
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.1.1
|
|
4
4
|
Summary: Memory Management for the GPU Poor
|
|
5
5
|
Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
|
|
6
6
|
License: GNU GENERAL PUBLIC LICENSE
|
|
@@ -17,7 +17,7 @@ Requires-Dist: peft
|
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
<p align="center">
|
|
20
|
-
<H2>Memory Management 3.0
|
|
20
|
+
<H2>Memory Management 3.1.0 for the GPU Poor by DeepBeepMeep</H2>
|
|
21
21
|
</p>
|
|
22
22
|
|
|
23
23
|
|
|
@@ -100,7 +100,7 @@ For example:
|
|
|
100
100
|
The smaller this number, the more VRAM left for image data / longer video but also the slower because there will be lots of loading / unloading between the RAM and the VRAM. If model is too big to fit in a budget, it will be broken down in multiples parts that will be unloaded / loaded consequently. The speed of low budget can be increased (up to 2 times) by turning on the options pinnedMemory and asyncTransfers.
|
|
101
101
|
- asyncTransfers: boolean, load to the GPU the next model part while the current part is being processed. This requires twice the budget if any is defined. This may increase speed by 20% (mostly visible on fast modern GPUs).
|
|
102
102
|
- verboseLevel: number between 0 and 2 (1 by default), provides various level of feedback of the different processes
|
|
103
|
-
- compile: list of model ids to compile, may accelerate up x2 depending on the type of GPU. As of 01/01/2025 it will work only on Linux or WSL since compilation relies on Triton which is not yet supported on Windows
|
|
103
|
+
- compile: list of model ids to compile, may accelerate up x2 depending on the type of GPU. It makes sens to compile only the model that is frequently used such as the "transformer" model in the case of video or image generation. As of 01/01/2025 it will work only on Linux or WSL since compilation relies on Triton which is not yet supported on Windows
|
|
104
104
|
|
|
105
105
|
If you are short on RAM and plan to work with quantized models, it is recommended to load pre-quantized models direclty rather than using on the fly quantization, it will be faster and consume slightly less RAM.
|
|
106
106
|
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
+
mmgp/offload.py,sha256=h74eKyWVZmDM--l4KbiZYXdpkcGM8ySUgyvkFtFRtNQ,69593
|
|
4
|
+
mmgp/safetensors2.py,sha256=OkJAvENfWeb-PL0FcxS1-eYeHLbemTaNXYvNxURrzIs,16154
|
|
5
|
+
mmgp-3.1.1.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
|
|
6
|
+
mmgp-3.1.1.dist-info/METADATA,sha256=wtHNzulNFaWmruVO4cGgcRuIIN2eHPHo47nkgGMOWqw,12708
|
|
7
|
+
mmgp-3.1.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
8
|
+
mmgp-3.1.1.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
9
|
+
mmgp-3.1.1.dist-info/RECORD,,
|
mmgp-3.0.9.dist-info/RECORD
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
|
|
3
|
-
mmgp/offload.py,sha256=bYjpbAHbVX2Vf3nBJXYEc1u9B5JIYvJxv4eMS8L5Tco,64209
|
|
4
|
-
mmgp/safetensors2.py,sha256=G6uzvpGauJLPEvN74MX1ib4YK0E4wzNMyrZO5wOX2k0,15812
|
|
5
|
-
mmgp-3.0.9.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
|
|
6
|
-
mmgp-3.0.9.dist-info/METADATA,sha256=0vNt8lNKfMkyBrFUN8pOfkDRf8i_jmndgH2ePIekmdg,12570
|
|
7
|
-
mmgp-3.0.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
8
|
-
mmgp-3.0.9.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
|
|
9
|
-
mmgp-3.0.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|