checkpoint-engine 0.3.0rc1__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  import gc
2
2
  import traceback
3
3
  from collections.abc import Callable
4
+ from functools import cached_property
4
5
  from typing import TypedDict
5
6
 
6
7
  import torch
@@ -117,6 +118,21 @@ class VllmColocateWorkerExtension:
117
118
  `worker_extension_cls` argument when initializing the vLLM worker.
118
119
  """
119
120
 
121
+ @cached_property
122
+ def _device_uuid(self) -> str:
123
+ from vllm.platforms import current_platform
124
+
125
+ if current_platform.device_type == "cuda":
126
+ return current_platform.get_device_uuid(self.device.index)
127
+ elif current_platform.device_type == "npu":
128
+ return f"NPU-{npu_generate_uuid()}"
129
+ else:
130
+ raise ValueError(f"Unsupported device type: {current_platform.device_type}")
131
+
132
+ @cached_property
133
+ def _zmq_ctx(self) -> zmq.Context:
134
+ return zmq.Context()
135
+
120
136
  def update_weights_from_ipc(self, zmq_handles: dict[str, str]):
121
137
  """
122
138
  Update model weights from checkpoint-engine via IPC communication.
@@ -149,17 +165,10 @@ class VllmColocateWorkerExtension:
149
165
  if current_platform.device_type == "npu" and self.device is None:
150
166
  self.device = torch.device(f"npu:{self.local_rank}")
151
167
  assert self.device is not None
152
- if not hasattr(self, "_zmq_ctx") or self._zmq_ctx is None:
153
- self._zmq_ctx = zmq.Context()
154
- if current_platform.device_type == "cuda":
155
- device_uuid = current_platform.get_device_uuid(self.device.index)
156
- elif current_platform.device_type == "npu":
157
- device_uuid = f"NPU-{npu_generate_uuid()}"
158
- else:
159
- raise ValueError(f"Unsupported device type: {current_platform.device_type}")
168
+
160
169
  update_weights_from_ipc(
161
170
  self._zmq_ctx,
162
- zmq_handles[device_uuid],
171
+ zmq_handles[self._device_uuid],
163
172
  device_id=self.device.index,
164
173
  run=self.model_runner.model.load_weights,
165
174
  post_hook=lambda: process_weights_after_loading(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: checkpoint-engine
3
- Version: 0.3.0rc1
3
+ Version: 0.3.1
4
4
  Summary: checkpoint-engine is a lightweight, decoupling and efficient weight update middleware
5
5
  Project-URL: Homepage, https://github.com/MoonshotAI/checkpoint-engine
6
6
  Project-URL: Repository, https://github.com/MoonshotAI/checkpoint-engine
@@ -0,0 +1,15 @@
1
+ checkpoint_engine/__init__.py,sha256=OeWxe9mxl2sZ6cW-blSTg6JbFlOMpGbBghLZtxGOqXk,942
2
+ checkpoint_engine/__main__.py,sha256=yzQlApuYo6eIOqtqM018RosyxNzXzB5a-stxUvsh-dg,709
3
+ checkpoint_engine/_version.py,sha256=gGLpQUQx-ty9SEy9PYw9OgJWWzJLBnCpfJOfzL7SjlI,704
4
+ checkpoint_engine/api.py,sha256=JDiQ4i3Gb6GoaBhlp8lNuUPaVURoFFdeGJY9ZDDGvPc,3518
5
+ checkpoint_engine/data_types.py,sha256=O9uAXjwB20iwrOHfEEQd8Y9CmaFspNJ9ks9noHqwQKk,2716
6
+ checkpoint_engine/device_utils.py,sha256=iKrof60j3CY3fStRTq3DRTt_kE1vYoEWHhAeyh0lByA,3020
7
+ checkpoint_engine/p2p_store.py,sha256=abiCDVmRISPt9QFfavHB9Jo7ZpBbSjUS1NevGuB-AVA,8721
8
+ checkpoint_engine/pin_memory.py,sha256=gpoe_z5XxbWkCvFLaXXpyUUFetBXUjsOrxBSX-ksZTw,16141
9
+ checkpoint_engine/ps.py,sha256=0d68Sqb_y3H6b5H37exMbghDJ294VKaGqoWkcKE-Ao8,40316
10
+ checkpoint_engine/worker.py,sha256=ghj9d2u8hY_U2uiOZWIN2CqRNZH6PrzujT22fHUFBWI,6879
11
+ checkpoint_engine-0.3.1.dist-info/licenses/LICENCE,sha256=D3gPmHKpGtF1yxYNhqjtBtZY_brZjDotJTzpnmClzlY,1067
12
+ checkpoint_engine-0.3.1.dist-info/METADATA,sha256=RSkQaKNs4euXk162_9AQzuNWaJQT4gMTnQ-3QJeYY_E,11559
13
+ checkpoint_engine-0.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
+ checkpoint_engine-0.3.1.dist-info/top_level.txt,sha256=66sik_1eLakLYmcllOEJzFaNbSfjsueuP0tHYEzhMSs,18
15
+ checkpoint_engine-0.3.1.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- checkpoint_engine/__init__.py,sha256=Zj4I008kn9R6fYr0lVBzcQSnvckLpX2s1ljCOOqV1c8,87
2
- checkpoint_engine/_version.py,sha256=Ctme-brbITV9k9eCj361Q_klPsndHOTci7ZqCb_3Wk8,714
3
- checkpoint_engine/device_utils.py,sha256=iKrof60j3CY3fStRTq3DRTt_kE1vYoEWHhAeyh0lByA,3020
4
- checkpoint_engine/ps.py,sha256=xGoiy4bfRl_USj9ws9g7yUos0Gw513oouV0QbChQ3rk,70668
5
- checkpoint_engine/worker.py,sha256=f6kS1ushIXxkRCEHXM5wVofUer9OxRiVY03vmKYLzgo,6757
6
- checkpoint_engine-0.3.0rc1.dist-info/licenses/LICENCE,sha256=D3gPmHKpGtF1yxYNhqjtBtZY_brZjDotJTzpnmClzlY,1067
7
- checkpoint_engine-0.3.0rc1.dist-info/METADATA,sha256=1KjhSfes8NyRV7mF6bLmr1uGgNDqTUakS3QduK95OJY,11562
8
- checkpoint_engine-0.3.0rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
9
- checkpoint_engine-0.3.0rc1.dist-info/top_level.txt,sha256=66sik_1eLakLYmcllOEJzFaNbSfjsueuP0tHYEzhMSs,18
10
- checkpoint_engine-0.3.0rc1.dist-info/RECORD,,