mooncake-transfer-engine 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mooncake_transfer_engine-0.1.0/PKG-INFO +20 -0
- mooncake_transfer_engine-0.1.0/mooncake/__init__.py +3 -0
- mooncake_transfer_engine-0.1.0/mooncake/lib_so/libetcd-cpp-api.so +0 -0
- mooncake_transfer_engine-0.1.0/mooncake/mooncake_master +0 -0
- mooncake_transfer_engine-0.1.0/mooncake/mooncake_sglang_adaptor.cpython-310-x86_64-linux-gnu.so +0 -0
- mooncake_transfer_engine-0.1.0/mooncake/mooncake_vllm_adaptor.cpython-310-x86_64-linux-gnu.so +0 -0
- mooncake_transfer_engine-0.1.0/mooncake_transfer_engine.egg-info/PKG-INFO +20 -0
- mooncake_transfer_engine-0.1.0/mooncake_transfer_engine.egg-info/SOURCES.txt +13 -0
- mooncake_transfer_engine-0.1.0/mooncake_transfer_engine.egg-info/dependency_links.txt +1 -0
- mooncake_transfer_engine-0.1.0/mooncake_transfer_engine.egg-info/not-zip-safe +1 -0
- mooncake_transfer_engine-0.1.0/mooncake_transfer_engine.egg-info/top_level.txt +1 -0
- mooncake_transfer_engine-0.1.0/pyproject.toml +3 -0
- mooncake_transfer_engine-0.1.0/setup.cfg +4 -0
- mooncake_transfer_engine-0.1.0/setup.py +52 -0
- mooncake_transfer_engine-0.1.0/tests/test_distributed_object_store.py +271 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mooncake-transfer-engine
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python binding of a Mooncake library using pybind11
|
|
5
|
+
Home-page: https://github.com/kvcache-ai/Mooncake
|
|
6
|
+
Author: Mooncake Authors
|
|
7
|
+
Project-URL: Documentation, https://github.com/kvcache-ai/Mooncake/tree/main/doc
|
|
8
|
+
Project-URL: Source, https://github.com/kvcache-ai/Mooncake
|
|
9
|
+
Project-URL: Issues, https://github.com/kvcache-ai/Mooncake/issues
|
|
10
|
+
Keywords: mooncake,data transfer,kv cache,llm inference
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: C++
|
|
13
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
14
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
15
|
+
Dynamic: author
|
|
16
|
+
Dynamic: classifier
|
|
17
|
+
Dynamic: home-page
|
|
18
|
+
Dynamic: keywords
|
|
19
|
+
Dynamic: project-url
|
|
20
|
+
Dynamic: summary
|
|
Binary file
|
|
Binary file
|
mooncake_transfer_engine-0.1.0/mooncake/mooncake_sglang_adaptor.cpython-310-x86_64-linux-gnu.so
ADDED
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mooncake-transfer-engine
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python binding of a Mooncake library using pybind11
|
|
5
|
+
Home-page: https://github.com/kvcache-ai/Mooncake
|
|
6
|
+
Author: Mooncake Authors
|
|
7
|
+
Project-URL: Documentation, https://github.com/kvcache-ai/Mooncake/tree/main/doc
|
|
8
|
+
Project-URL: Source, https://github.com/kvcache-ai/Mooncake
|
|
9
|
+
Project-URL: Issues, https://github.com/kvcache-ai/Mooncake/issues
|
|
10
|
+
Keywords: mooncake,data transfer,kv cache,llm inference
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: C++
|
|
13
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
14
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
15
|
+
Dynamic: author
|
|
16
|
+
Dynamic: classifier
|
|
17
|
+
Dynamic: home-page
|
|
18
|
+
Dynamic: keywords
|
|
19
|
+
Dynamic: project-url
|
|
20
|
+
Dynamic: summary
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
pyproject.toml
|
|
2
|
+
setup.py
|
|
3
|
+
mooncake/__init__.py
|
|
4
|
+
mooncake/mooncake_master
|
|
5
|
+
mooncake/mooncake_sglang_adaptor.cpython-310-x86_64-linux-gnu.so
|
|
6
|
+
mooncake/mooncake_vllm_adaptor.cpython-310-x86_64-linux-gnu.so
|
|
7
|
+
mooncake/lib_so/libetcd-cpp-api.so
|
|
8
|
+
mooncake_transfer_engine.egg-info/PKG-INFO
|
|
9
|
+
mooncake_transfer_engine.egg-info/SOURCES.txt
|
|
10
|
+
mooncake_transfer_engine.egg-info/dependency_links.txt
|
|
11
|
+
mooncake_transfer_engine.egg-info/not-zip-safe
|
|
12
|
+
mooncake_transfer_engine.egg-info/top_level.txt
|
|
13
|
+
tests/test_distributed_object_store.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
mooncake
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import os
|
|
3
|
+
from setuptools import setup, find_packages
|
|
4
|
+
from setuptools.dist import Distribution
|
|
5
|
+
from wheel.bdist_wheel import bdist_wheel
|
|
6
|
+
|
|
7
|
+
class BinaryDistribution(Distribution):
|
|
8
|
+
def has_ext_modules(self):
|
|
9
|
+
return True
|
|
10
|
+
|
|
11
|
+
class CustomBdistWheel(bdist_wheel):
|
|
12
|
+
def finalize_options(self):
|
|
13
|
+
bdist_wheel.finalize_options(self)
|
|
14
|
+
self.root_is_pure = False
|
|
15
|
+
self.plat_name_supplied = True
|
|
16
|
+
self.plat_name = "manylinux2014_x86_64"
|
|
17
|
+
|
|
18
|
+
python_version = f">={sys.version_info.major}.{sys.version_info.minor}"
|
|
19
|
+
|
|
20
|
+
VERSION = os.environ.get("VERSION", "0.1.0")
|
|
21
|
+
|
|
22
|
+
setup(
|
|
23
|
+
name="mooncake-transfer-engine",
|
|
24
|
+
version=VERSION,
|
|
25
|
+
packages=find_packages(),
|
|
26
|
+
package_data={"mooncake": [
|
|
27
|
+
"*.so",
|
|
28
|
+
"mooncake_master",
|
|
29
|
+
"lib_so/libetcd-cpp-api.so",
|
|
30
|
+
]},
|
|
31
|
+
include_package_data=True,
|
|
32
|
+
zip_safe=False,
|
|
33
|
+
distclass=BinaryDistribution,
|
|
34
|
+
cmdclass={
|
|
35
|
+
'bdist_wheel': CustomBdistWheel,
|
|
36
|
+
},
|
|
37
|
+
author="Mooncake Authors",
|
|
38
|
+
description="Python binding of a Mooncake library using pybind11",
|
|
39
|
+
url="https://github.com/kvcache-ai/Mooncake",
|
|
40
|
+
project_urls={
|
|
41
|
+
"Documentation": "https://github.com/kvcache-ai/Mooncake/tree/main/doc",
|
|
42
|
+
"Source": "https://github.com/kvcache-ai/Mooncake",
|
|
43
|
+
"Issues": "https://github.com/kvcache-ai/Mooncake/issues",
|
|
44
|
+
},
|
|
45
|
+
keywords=["mooncake", "data transfer", "kv cache", "llm inference"],
|
|
46
|
+
classifiers=[
|
|
47
|
+
"Programming Language :: Python :: 3",
|
|
48
|
+
"Programming Language :: C++",
|
|
49
|
+
"Operating System :: POSIX :: Linux",
|
|
50
|
+
"License :: OSI Approved :: Apache Software License",
|
|
51
|
+
]
|
|
52
|
+
)
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
import unittest
|
|
2
|
+
import os
|
|
3
|
+
import time
|
|
4
|
+
import threading
|
|
5
|
+
import random
|
|
6
|
+
from mooncake import MooncakeDistributedStore
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_client(store):
|
|
10
|
+
"""Initialize and setup the distributed store client."""
|
|
11
|
+
protocol = os.getenv("PROTOCOL", "tcp")
|
|
12
|
+
device_name = os.getenv("DEVICE_NAME", "ibp6s0")
|
|
13
|
+
local_hostname = os.getenv("LOCAL_HOSTNAME", "localhost")
|
|
14
|
+
metadata_server = os.getenv("MC_METADATA_SERVER", "127.0.0.1:2379")
|
|
15
|
+
global_segment_size = 3200 * 1024 * 1024 # 3200 MB
|
|
16
|
+
local_buffer_size = 512 * 1024 * 1024 # 512 MB
|
|
17
|
+
master_server_address = os.getenv("MASTER_SERVER", "127.0.0.1:50051")
|
|
18
|
+
|
|
19
|
+
retcode = store.setup(
|
|
20
|
+
local_hostname,
|
|
21
|
+
metadata_server,
|
|
22
|
+
global_segment_size,
|
|
23
|
+
local_buffer_size,
|
|
24
|
+
protocol,
|
|
25
|
+
device_name,
|
|
26
|
+
master_server_address
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
if retcode:
|
|
30
|
+
raise RuntimeError(f"Failed to setup store client. Return code: {retcode}")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class TestDistributedObjectStore(unittest.TestCase):
|
|
34
|
+
@classmethod
|
|
35
|
+
def setUpClass(cls):
|
|
36
|
+
"""Initialize the store once for all tests."""
|
|
37
|
+
cls.store = MooncakeDistributedStore()
|
|
38
|
+
get_client(cls.store)
|
|
39
|
+
|
|
40
|
+
def test_client_tear_down(self):
|
|
41
|
+
"""Test client tear down and re-initialization."""
|
|
42
|
+
test_data = b"Hello, World!"
|
|
43
|
+
key = "test_teardown_key"
|
|
44
|
+
|
|
45
|
+
# Put data and verify teardown clears it
|
|
46
|
+
self.assertEqual(self.store.put(key, test_data), 0)
|
|
47
|
+
self.assertEqual(self.store.close(), 0)
|
|
48
|
+
time.sleep(1) # Allow time for teardown to complete
|
|
49
|
+
|
|
50
|
+
# Re-initialize the store
|
|
51
|
+
get_client(self.store)
|
|
52
|
+
|
|
53
|
+
# Verify data is gone after teardown
|
|
54
|
+
retrieved_data = self.store.get(key)
|
|
55
|
+
self.assertEqual(retrieved_data, b"")
|
|
56
|
+
|
|
57
|
+
# Verify store is functional after re-initialization
|
|
58
|
+
self.assertEqual(self.store.put(key, test_data), 0)
|
|
59
|
+
retrieved_data = self.store.get(key)
|
|
60
|
+
self.assertEqual(retrieved_data, test_data)
|
|
61
|
+
|
|
62
|
+
def test_basic_put_get_exist_operations(self):
|
|
63
|
+
"""Test basic Put/Get/Exist operations through the Python interface."""
|
|
64
|
+
test_data = b"Hello, World!"
|
|
65
|
+
key = "test_basic_key"
|
|
66
|
+
|
|
67
|
+
# Test Put operation
|
|
68
|
+
self.assertEqual(self.store.put(key, test_data), 0)
|
|
69
|
+
|
|
70
|
+
# Verify data through Get operation
|
|
71
|
+
self.assertEqual(self.store.getSize(key), len(test_data))
|
|
72
|
+
retrieved_data = self.store.get(key)
|
|
73
|
+
self.assertEqual(retrieved_data, test_data)
|
|
74
|
+
|
|
75
|
+
# Put again with the same key, should succeed
|
|
76
|
+
self.assertEqual(self.store.put(key, test_data), 0)
|
|
77
|
+
|
|
78
|
+
# Remove the key
|
|
79
|
+
self.assertEqual(self.store.remove(key), 0)
|
|
80
|
+
|
|
81
|
+
# Get after remove should return empty bytes
|
|
82
|
+
self.assertLess(self.store.getSize(key), 0)
|
|
83
|
+
empty_data = self.store.get(key)
|
|
84
|
+
self.assertEqual(empty_data, b"")
|
|
85
|
+
|
|
86
|
+
# Test isExist functionality
|
|
87
|
+
test_data_2 = b"Testing exists!"
|
|
88
|
+
key_2 = "test_exist_key"
|
|
89
|
+
|
|
90
|
+
# Should not exist initially
|
|
91
|
+
self.assertLess(self.store.getSize(key_2), 0)
|
|
92
|
+
self.assertEqual(self.store.isExist(key_2), 0)
|
|
93
|
+
|
|
94
|
+
# Should exist after put
|
|
95
|
+
self.assertEqual(self.store.put(key_2, test_data_2), 0)
|
|
96
|
+
self.assertEqual(self.store.isExist(key_2), 1)
|
|
97
|
+
self.assertEqual(self.store.getSize(key_2), len(test_data_2))
|
|
98
|
+
|
|
99
|
+
# Should not exist after remove
|
|
100
|
+
self.assertEqual(self.store.remove(key_2), 0)
|
|
101
|
+
self.assertLess(self.store.getSize(key_2), 0)
|
|
102
|
+
self.assertEqual(self.store.isExist(key_2), 0)
|
|
103
|
+
|
|
104
|
+
def test_concurrent_stress_with_barrier(self):
|
|
105
|
+
"""Test concurrent Put/Get operations with multiple threads using barrier."""
|
|
106
|
+
NUM_THREADS = 8
|
|
107
|
+
VALUE_SIZE = 1024 * 1024 # 1MB
|
|
108
|
+
OPERATIONS_PER_THREAD = 100
|
|
109
|
+
|
|
110
|
+
# Create barriers for synchronization
|
|
111
|
+
start_barrier = threading.Barrier(NUM_THREADS + 1) # +1 for main thread
|
|
112
|
+
put_barrier = threading.Barrier(NUM_THREADS + 1) # Barrier after put operations
|
|
113
|
+
get_barrier = threading.Barrier(NUM_THREADS + 1) # Barrier after get operations
|
|
114
|
+
|
|
115
|
+
# Statistics for system-wide timing
|
|
116
|
+
system_stats = {
|
|
117
|
+
'put_start': 0,
|
|
118
|
+
'put_end': 0,
|
|
119
|
+
'get_start': 0,
|
|
120
|
+
'get_end': 0
|
|
121
|
+
}
|
|
122
|
+
thread_exceptions = []
|
|
123
|
+
|
|
124
|
+
def worker(thread_id):
|
|
125
|
+
try:
|
|
126
|
+
# Generate test data (1MB)
|
|
127
|
+
test_data = os.urandom(VALUE_SIZE)
|
|
128
|
+
thread_keys = [f"key_{thread_id}_{i}" for i in range(OPERATIONS_PER_THREAD)]
|
|
129
|
+
|
|
130
|
+
# Wait for all threads to be ready
|
|
131
|
+
start_barrier.wait()
|
|
132
|
+
|
|
133
|
+
# Put operations
|
|
134
|
+
for key in thread_keys:
|
|
135
|
+
result = self.store.put(key, test_data)
|
|
136
|
+
self.assertEqual(result, 0, f"Put operation failed for key {key}")
|
|
137
|
+
|
|
138
|
+
# Wait for all threads to complete put operations
|
|
139
|
+
put_barrier.wait()
|
|
140
|
+
|
|
141
|
+
# Get operations
|
|
142
|
+
for key in thread_keys:
|
|
143
|
+
retrieved_data = self.store.get(key)
|
|
144
|
+
self.assertEqual(len(retrieved_data), VALUE_SIZE,
|
|
145
|
+
f"Retrieved data size mismatch for key {key}")
|
|
146
|
+
self.assertEqual(retrieved_data, test_data,
|
|
147
|
+
f"Retrieved data content mismatch for key {key}")
|
|
148
|
+
|
|
149
|
+
# Wait for all threads to complete get operations
|
|
150
|
+
get_barrier.wait()
|
|
151
|
+
|
|
152
|
+
# Remove all keys
|
|
153
|
+
for key in thread_keys:
|
|
154
|
+
self.assertEqual(self.store.remove(key), 0)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
except Exception as e:
|
|
158
|
+
thread_exceptions.append(f"Thread {thread_id} failed: {str(e)}")
|
|
159
|
+
|
|
160
|
+
# Create and start threads
|
|
161
|
+
threads = []
|
|
162
|
+
for i in range(NUM_THREADS):
|
|
163
|
+
t = threading.Thread(target=worker, args=(i,), name=f"Worker-{i}")
|
|
164
|
+
threads.append(t)
|
|
165
|
+
t.start()
|
|
166
|
+
|
|
167
|
+
# Wait for all threads to be ready and start the test
|
|
168
|
+
start_barrier.wait()
|
|
169
|
+
|
|
170
|
+
# Record put start time
|
|
171
|
+
system_stats['put_start'] = time.time()
|
|
172
|
+
|
|
173
|
+
# Wait for all put operations to complete
|
|
174
|
+
put_barrier.wait()
|
|
175
|
+
system_stats['put_end'] = time.time()
|
|
176
|
+
|
|
177
|
+
# Record get start time
|
|
178
|
+
system_stats['get_start'] = time.time()
|
|
179
|
+
|
|
180
|
+
# Wait for all get operations to complete
|
|
181
|
+
get_barrier.wait()
|
|
182
|
+
system_stats['get_end'] = time.time()
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
# Join all threads
|
|
186
|
+
for t in threads:
|
|
187
|
+
t.join()
|
|
188
|
+
|
|
189
|
+
# Check for any exceptions
|
|
190
|
+
self.assertEqual(len(thread_exceptions), 0, "\n".join(thread_exceptions))
|
|
191
|
+
|
|
192
|
+
# Calculate system-wide statistics
|
|
193
|
+
total_operations = NUM_THREADS * OPERATIONS_PER_THREAD
|
|
194
|
+
put_duration = system_stats['put_end'] - system_stats['put_start']
|
|
195
|
+
get_duration = system_stats['get_end'] - system_stats['get_start']
|
|
196
|
+
total_data_size_gb = (VALUE_SIZE * total_operations) / (1024**3)
|
|
197
|
+
|
|
198
|
+
print(f"\nConcurrent Stress Test Results:")
|
|
199
|
+
print(f"Total threads: {NUM_THREADS}")
|
|
200
|
+
print(f"Operations per thread: {OPERATIONS_PER_THREAD}")
|
|
201
|
+
print(f"Total operations: {total_operations}")
|
|
202
|
+
print(f"Data block size: {VALUE_SIZE/1024/1024:.2f}MB")
|
|
203
|
+
print(f"Total data processed: {total_data_size_gb:.2f}GB")
|
|
204
|
+
print(f"Put duration: {put_duration:.2f} seconds")
|
|
205
|
+
print(f"Get duration: {get_duration:.2f} seconds")
|
|
206
|
+
print(f"System Put throughput: {total_operations/put_duration:.2f} ops/sec")
|
|
207
|
+
print(f"System Get throughput: {total_operations/get_duration:.2f} ops/sec")
|
|
208
|
+
print(f"System Put bandwidth: {total_data_size_gb/put_duration:.2f} GB/sec")
|
|
209
|
+
print(f"System Get bandwidth: {total_data_size_gb/get_duration:.2f} GB/sec")
|
|
210
|
+
|
|
211
|
+
def test_dict_fuzz_e2e(self):
|
|
212
|
+
"""End-to-end fuzz test comparing distributed store behavior with dict.
|
|
213
|
+
Performs ~1000 random operations (put, get, remove) with random value sizes between 1KB and 64MB.
|
|
214
|
+
After testing, all keys are removed.
|
|
215
|
+
"""
|
|
216
|
+
import random
|
|
217
|
+
# Local reference dict to simulate expected dict behavior
|
|
218
|
+
reference = {}
|
|
219
|
+
operations = 1000
|
|
220
|
+
# Use a pool of keys to limit memory consumption
|
|
221
|
+
keys_pool = [f"key_{i}" for i in range(100)]
|
|
222
|
+
# Track which keys have values assigned to ensure consistency
|
|
223
|
+
key_values = {}
|
|
224
|
+
# Fuzz record for debugging in case of errors
|
|
225
|
+
fuzz_record = []
|
|
226
|
+
try:
|
|
227
|
+
for i in range(operations):
|
|
228
|
+
op = random.choice(["put", "get", "remove"])
|
|
229
|
+
key = random.choice(keys_pool)
|
|
230
|
+
if op == "put":
|
|
231
|
+
# If key already exists, use the same value to ensure consistency
|
|
232
|
+
if key in key_values:
|
|
233
|
+
value = key_values[key]
|
|
234
|
+
size = len(value)
|
|
235
|
+
else:
|
|
236
|
+
size = random.randint(1, 64 * 1024 * 1024)
|
|
237
|
+
value = os.urandom(size)
|
|
238
|
+
key_values[key] = value
|
|
239
|
+
|
|
240
|
+
fuzz_record.append(f"{i}: put {key} [size: {size}]")
|
|
241
|
+
error_code = self.store.put(key, value)
|
|
242
|
+
if error_code == -200:
|
|
243
|
+
# The space is not enough, continue to next operation
|
|
244
|
+
continue
|
|
245
|
+
elif error_code == 0:
|
|
246
|
+
reference[key] = value
|
|
247
|
+
else:
|
|
248
|
+
raise RuntimeError(f"Put operation failed for key {key}. Error code: {error_code}")
|
|
249
|
+
elif op == "get":
|
|
250
|
+
fuzz_record.append(f"{i}: get {key}")
|
|
251
|
+
retrieved = self.store.get(key)
|
|
252
|
+
expected = reference.get(key, b"")
|
|
253
|
+
self.assertEqual(retrieved, expected)
|
|
254
|
+
elif op == "remove":
|
|
255
|
+
fuzz_record.append(f"{i}: remove {key}")
|
|
256
|
+
self.store.remove(key)
|
|
257
|
+
reference.pop(key, None)
|
|
258
|
+
# Also remove from key_values to allow new value if key is reused
|
|
259
|
+
key_values.pop(key, None)
|
|
260
|
+
except Exception as e:
|
|
261
|
+
print(f"Error: {e}")
|
|
262
|
+
print('\nFuzz record (operations so far):')
|
|
263
|
+
for record in fuzz_record:
|
|
264
|
+
print(record)
|
|
265
|
+
raise e
|
|
266
|
+
# Cleanup: ensure all remaining keys are removed
|
|
267
|
+
for key in list(reference.keys()):
|
|
268
|
+
self.store.remove(key)
|
|
269
|
+
|
|
270
|
+
if __name__ == '__main__':
|
|
271
|
+
unittest.main()
|