PyPI - checkpoint-engine - Versions diffs - 0.3.2__tar.gz → 0.3.4__tar.gz - Mend

checkpoint-engine 0.3.2tar.gz → 0.3.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

{checkpoint_engine-0.3.2 → checkpoint_engine-0.3.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: checkpoint-engine
-Version: 0.3.2
+Version: 0.3.4
 Summary: checkpoint-engine is a lightweight, decoupling and efficient weight update middleware
 Project-URL: Homepage, https://github.com/MoonshotAI/checkpoint-engine
 Project-URL: Repository, https://github.com/MoonshotAI/checkpoint-engine

{checkpoint_engine-0.3.2 → checkpoint_engine-0.3.4}/checkpoint_engine/_version.py RENAMED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.3.2'
-__version_tuple__ = version_tuple = (0, 3, 2)
+__version__ = version = '0.3.4'
+__version_tuple__ = version_tuple = (0, 3, 4)
-__commit_id__ = commit_id = 'g4a73109a3'
+__commit_id__ = commit_id = 'g15446dd22'

{checkpoint_engine-0.3.2 → checkpoint_engine-0.3.4}/checkpoint_engine/pin_memory.py RENAMED Viewed

@@ -209,7 +209,9 @@ def _inplace_pin_memory(files: list[str], rank: int | None = None) -> list[Memor
             torch.cuda.set_device(device_index)
             cudart = torch.cuda.cudart()
             r = cudart.cudaHostRegister(t.data_ptr(), t.numel() * t.element_size(), 0)
-            assert r == 0, f"pin memory error, error code: {r}"
+            if r != 0:
+                error_msg = cudart.cudaGetErrorString(r)
+                raise RuntimeError(f"pin memory error, error code: {r}, error message: {error_msg}")
         # TODO: should only support /dev/shm? but we found files in disk also work?
         size = os.stat(file_path).st_size
@@ -254,6 +256,12 @@ def _inplace_pin_memory(files: list[str], rank: int | None = None) -> list[Memor
         # Remove the file after successfully loading. This will avoid doubling the memory usage.
         # We assume files in /dev/shm/ are temporary files. So it's safe to remove them after loading.
         os.remove(file_path)
+        if not metas:
+            # TODO: should we still return this buffer?
+            assert buffer.nbytes == 0, f"buffer nbytes {buffer.nbytes} should be 0"
+            logger.warning(f"[rank{rank}] no metas found in {file_path}, skip pin memory")
+            return MemoryBuffer(buffer=buffer, size=buffer.nbytes, metas=[], manually_pinned=False)
         _pin(buffer)
         logger.info(
             f"[rank{rank}] inplace pin memory for file {file_path} finished, size {buffer.nbytes / 1024 / 1024:.2f}MiB"

{checkpoint_engine-0.3.2 → checkpoint_engine-0.3.4}/checkpoint_engine/ps.py RENAMED Viewed

@@ -391,7 +391,11 @@ class ParameterServer:
                 )
                 cudart = torch.cuda.cudart()
                 r = cudart.cudaHostUnregister(t.data_ptr())
-                assert r == 0, f"unpin memory error, error code: {r}"
+                if r != 0:
+                    error_msg = cudart.cudaGetErrorString(r)
+                    raise RuntimeError(
+                        f"unpin memory error, error code: {r}, error message: {error_msg}"
+                    )
             # if the checkpoint is pinned by cudaHostRegister manually, we need to unpin it manually
             try:
@@ -407,7 +411,13 @@ class ParameterServer:
             del self._memory_pool[checkpoint_name]
         # see https://github.com/pytorch/pytorch/blob/31d5c675394705f8a6bc767f80ae14bf4f01246b/torch/csrc/cuda/Module.cpp#L2018
         # this works by using torch>=2.5.0
-        torch._C._host_emptyCache()
+        if self.device_manager.device_type == "cuda":
+            torch._C._host_emptyCache()
+        else:
+            # torch._C._host_emptyCache() is not supported on NPU, so we call gc.collect() to empty host cache.
+            import gc
+            gc.collect()
     def gather_metas(self, checkpoint_name: str):
         """

{checkpoint_engine-0.3.2 → checkpoint_engine-0.3.4}/checkpoint_engine/worker.py RENAMED Viewed

@@ -10,6 +10,9 @@ import zmq
 from checkpoint_engine.device_utils import DeviceManager, npu_generate_uuid
+_WEIGHTS_TYPE = list[tuple[str, torch.Tensor]]
 def _rebuild_ipc(handle: tuple[Callable, tuple], device_id: int | None = None) -> torch.Tensor:
     func, args = handle
     list_args = list(args)
@@ -29,11 +32,9 @@ class FlattenedTensorMetadata(TypedDict):
     offset: int
-def _extract_weights(
-    payload: list[FlattenedTensorMetadata], buffer: torch.Tensor
-) -> list[tuple[str, torch.Tensor]]:
+def _extract_weights(payload: list[FlattenedTensorMetadata], buffer: torch.Tensor) -> _WEIGHTS_TYPE:
     assert buffer is not None
-    weights: list[tuple[str, torch.Tensor]] = []
+    weights: _WEIGHTS_TYPE = []
     for item in payload:
         shape = item["shape"]
         if isinstance(shape, list | tuple):
@@ -166,12 +167,31 @@ class VllmColocateWorkerExtension:
             self.device = torch.device(f"npu:{self.local_rank}")
         assert self.device is not None
+        def _load_weights(weights: _WEIGHTS_TYPE):
+            # Load main model weights
+            self.model_runner.model.load_weights(weights)
+            # Load drafter model weights if MTP/speculative decoding is enabled
+            if (
+                getattr(self.model_runner, "drafter", None) is not None
+                and getattr(self.model_runner.drafter, "model", None) is not None
+            ):
+                self.model_runner.drafter.model.load_weights(weights=weights)
+        def _post_hook():
+            process_weights_after_loading(self.model_runner.model, self.model_config, self.device)
+            # Also trigger drafter model's post processing if MTP is enabled
+            if (
+                getattr(self.model_runner, "drafter", None) is not None
+                and getattr(self.model_runner.drafter, "model", None) is not None
+            ):
+                process_weights_after_loading(
+                    self.model_runner.drafter.model, self.model_config, self.device
+                )
         update_weights_from_ipc(
             self._zmq_ctx,
             zmq_handles[self._device_uuid],
             device_id=self.device.index,
-            run=self.model_runner.model.load_weights,
-            post_hook=lambda: process_weights_after_loading(
-                self.model_runner.model, self.model_config, self.device
-            ),
+            run=_load_weights,
+            post_hook=_post_hook,
         )

{checkpoint_engine-0.3.2 → checkpoint_engine-0.3.4}/checkpoint_engine.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: checkpoint-engine
-Version: 0.3.2
+Version: 0.3.4
 Summary: checkpoint-engine is a lightweight, decoupling and efficient weight update middleware
 Project-URL: Homepage, https://github.com/MoonshotAI/checkpoint-engine
 Project-URL: Repository, https://github.com/MoonshotAI/checkpoint-engine