PyPI - mmgp - Versions diffs - 3.0.9__py3-none-any.whl → 3.1.1__py3-none-any.whl - Mend

mmgp 3.0.9py3-none-any.whl → 3.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mmgp might be problematic. Click here for more details.

Files changed (8) hide show

mmgp/offload.py +697 -583
mmgp/safetensors2.py +40 -30
{mmgp-3.0.9.dist-info → mmgp-3.1.1.dist-info}/METADATA +3 -3
mmgp-3.1.1.dist-info/RECORD +9 -0
mmgp-3.0.9.dist-info/RECORD +0 -9
{mmgp-3.0.9.dist-info → mmgp-3.1.1.dist-info}/LICENSE.md +0 -0
{mmgp-3.0.9.dist-info → mmgp-3.1.1.dist-info}/WHEEL +0 -0
{mmgp-3.0.9.dist-info → mmgp-3.1.1.dist-info}/top_level.txt +0 -0

mmgp/safetensors2.py CHANGED Viewed

@@ -156,19 +156,32 @@ def torch_write_file(sd, file_path, quantization_map = None, config = None):
     pos = 0
     i = 0
     mx = 100000
+    metadata = dict()
     for k , t  in sd.items():
-        entry = {}
-        dtypestr= map[t.dtype]
-        entry["dtype"] = dtypestr
-        entry["shape"] = list(t.shape)
-        size = torch.numel(t) * t.element_size()
-        entry["data_offsets"] = [pos, pos + size]
-        pos += size
-        sf_sd[k] = entry
+        if torch.is_tensor(t):
+            entry = {}
+            dtypestr= map[t.dtype]
+            entry["dtype"] = dtypestr
+            entry["shape"] = list(t.shape)
+            size = torch.numel(t) * t.element_size()
+            if size == 0:
+                pass
+            entry["data_offsets"] = [pos, pos + size]
+            pos += size
+            sf_sd[k] = entry
+        else:
+            if isinstance(t, str):
+                metadata[k] = t
+            else:
+                try:
+                    b64 = base64.b64encode(json.dumps(t, ensure_ascii=False).encode('utf8')).decode('utf8')
+                    metadata[k + "_base64"] = b64
+                except:
+                    pass
         i+=1
         if i==mx:
             break
-    metadata = dict()
     if not quantization_map is None:
         metadata["quantization_format"] = "quanto"
         metadata["quantization_map_base64"] =  base64.b64encode(json.dumps(quantization_map, ensure_ascii=False).encode('utf8')).decode('utf8')
@@ -192,9 +205,9 @@ def torch_write_file(sd, file_path, quantization_map = None, config = None):
         i = 0
         for k , t  in sd.items():
-            size = torch.numel(t) * t.element_size()
-            if size != 0:
-                if len(t.shape) == 0:
+            if torch.is_tensor(t):
+                size = torch.numel(t) * t.element_size()
+                if size != 0:
                     dtype = t.dtype
                     # convert in a friendly format, scalars types not supported by numpy
                     if  dtype ==  torch.bfloat16:
@@ -202,11 +215,8 @@ def torch_write_file(sd, file_path, quantization_map = None, config = None):
                     elif  dtype ==  torch.float8_e5m2 or dtype ==  torch.float8_e4m3fn:
                         t = t.view(torch.uint8)
                     buffer = t.numpy().tobytes()
-                else:
-                    buffer = t.view(torch.uint8).numpy().tobytes()
-                bytes_written = writer.write(buffer)
-                assert bytes_written == size
+                    bytes_written = writer.write(buffer)
+                    assert bytes_written == size
             i+=1
             if i==mx:
                 break
@@ -252,6 +262,7 @@ class SafeTensorFile:
         PAGE_SIZE =  mmap.ALLOCATIONGRANULARITY
         MMAP_SIZE = 1024 * 1024 * 1024  # 1GB
+        # MMAP_SIZE = 256 * 1024 * 1024  # 1GB
         # First pass: find optimal aligned map boundaries
         skip_bytes = self._skip_bytes
@@ -297,13 +308,12 @@ class SafeTensorFile:
                 length = data_offsets[1]-data_offsets[0]
                 map_idx = next(iter_tensor_no)
                 offset = current_pos - maps[map_idx][1]
-                if len(shape) == 0:
-                    if length == 0:
-                        t = torch.empty(0, dtype=dtype)
-                    else:
-                        # don't waste a memory view for a scalar
-                        t = torch.frombuffer(bytearray(maps[map_idx][0][offset:offset + length]), dtype=torch.uint8)
-                        t = t.view(dtype)
+                if length == 0:
+                    t = torch.empty(shape, dtype=dtype)
+                elif len(shape) == 0:
+                    # don't waste a memory view for a scalar
+                    t = torch.frombuffer(bytearray(maps[map_idx][0][offset:offset + length]), dtype=torch.uint8)
+                    t = t.view(dtype)
                 else:
                     mv = memoryview(maps[map_idx][0])[offset:offset + length]
                     t = torch.frombuffer(mv, dtype=dtype)
@@ -313,6 +323,7 @@ class SafeTensorFile:
                 current_pos += length
         return sd
     def create_tensors_without_mmap(self):
         sd = OrderedDict()
@@ -326,12 +337,11 @@ class SafeTensorFile:
                 data_offsets = v["data_offsets"]
                 length = data_offsets[1]-data_offsets[0]
                 buffer = f.read(length)
-                if len(shape) == 0:
-                    if length == 0:
-                        t = torch.empty(0, dtype=dtype)
-                    else:
-                        t = torch.frombuffer(bytearray(buffer), dtype=torch.uint8)
-                        t = t.view(dtype)
+                if length == 0:
+                    t = torch.empty(0, dtype=dtype)
+                elif len(shape) == 0:
+                    t = torch.frombuffer(bytearray(buffer), dtype=torch.uint8)
+                    t = t.view(dtype)
                 else:
                     t = torch.frombuffer(bytearray(buffer), dtype=dtype)
                     t = torch.reshape(t, shape)

{mmgp-3.0.9.dist-info → mmgp-3.1.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: mmgp
-Version: 3.0.9
+Version: 3.1.1
 Summary: Memory Management for the GPU Poor
 Author-email: deepbeepmeep <deepbeepmeep@yahoo.com>
 License:                     GNU GENERAL PUBLIC LICENSE
@@ -17,7 +17,7 @@ Requires-Dist: peft
 <p align="center">
-  <H2>Memory Management 3.0.9 for the GPU Poor by DeepBeepMeep</H2>
+  <H2>Memory Management 3.1.0 for the GPU Poor by DeepBeepMeep</H2>
 </p>
@@ -100,7 +100,7 @@ For example:
 The smaller this number, the more VRAM left for image data / longer video but also the slower because there will be lots of loading / unloading between the RAM and the VRAM. If model is too big to fit in a budget, it will be broken down in multiples parts that will be unloaded / loaded consequently. The speed of low budget can be  increased (up to 2 times) by turning on the options pinnedMemory and asyncTransfers.
 - asyncTransfers: boolean, load to the GPU the next model part while the current part is being processed. This requires twice the budget if any is defined. This may increase speed by 20% (mostly visible on fast modern GPUs).
 - verboseLevel: number between 0 and 2 (1 by default), provides various level of feedback of the different processes
-- compile: list of model ids to compile, may accelerate up x2 depending on the type of GPU. As of 01/01/2025 it will work only on Linux or WSL since compilation relies on Triton which is not yet supported on Windows
+- compile: list of model ids to compile, may accelerate up x2 depending on the type of GPU. It makes sens to compile only the model that is frequently used such as the "transformer" model in the case of video or image generation. As of 01/01/2025 it will work only on Linux or WSL since compilation relies on Triton which is not yet supported on Windows
 If you are short on RAM and plan to work with quantized models, it is recommended to load pre-quantized models direclty rather than using on the fly quantization, it will be faster and consume slightly less RAM.

mmgp-3.1.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
+mmgp/offload.py,sha256=h74eKyWVZmDM--l4KbiZYXdpkcGM8ySUgyvkFtFRtNQ,69593
+mmgp/safetensors2.py,sha256=OkJAvENfWeb-PL0FcxS1-eYeHLbemTaNXYvNxURrzIs,16154
+mmgp-3.1.1.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
+mmgp-3.1.1.dist-info/METADATA,sha256=wtHNzulNFaWmruVO4cGgcRuIIN2eHPHo47nkgGMOWqw,12708
+mmgp-3.1.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+mmgp-3.1.1.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
+mmgp-3.1.1.dist-info/RECORD,,

mmgp-3.0.9.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-mmgp/__init__.py,sha256=A9qBwyQMd1M7vshSTOBnFGP1MQvS2hXmTcTCMUcmyzE,509
-mmgp/offload.py,sha256=bYjpbAHbVX2Vf3nBJXYEc1u9B5JIYvJxv4eMS8L5Tco,64209
-mmgp/safetensors2.py,sha256=G6uzvpGauJLPEvN74MX1ib4YK0E4wzNMyrZO5wOX2k0,15812
-mmgp-3.0.9.dist-info/LICENSE.md,sha256=HjzvY2grdtdduZclbZ46B2M-XpT4MDCxFub5ZwTWq2g,93
-mmgp-3.0.9.dist-info/METADATA,sha256=0vNt8lNKfMkyBrFUN8pOfkDRf8i_jmndgH2ePIekmdg,12570
-mmgp-3.0.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-mmgp-3.0.9.dist-info/top_level.txt,sha256=waGaepj2qVfnS2yAOkaMu4r9mJaVjGbEi6AwOUogU_U,14
-mmgp-3.0.9.dist-info/RECORD,,

{mmgp-3.0.9.dist-info → mmgp-3.1.1.dist-info}/LICENSE.md RENAMED Viewed

File without changes

{mmgp-3.0.9.dist-info → mmgp-3.1.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{mmgp-3.0.9.dist-info → mmgp-3.1.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

mmgp 3.0.9__py3-none-any.whl → 3.1.1__py3-none-any.whl

Potentially problematic release.

mmgp 3.0.9py3-none-any.whl → 3.1.1py3-none-any.whl