literegistry 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. literegistry-1.0.0/LICENSE +21 -0
  2. literegistry-1.0.0/MANIFEST.in +17 -0
  3. literegistry-1.0.0/PKG-INFO +212 -0
  4. literegistry-1.0.0/README.md +195 -0
  5. literegistry-1.0.0/literegistry/__init__.py +22 -0
  6. literegistry-1.0.0/literegistry/api.py +94 -0
  7. literegistry-1.0.0/literegistry/bandit.py +148 -0
  8. literegistry-1.0.0/literegistry/cli.py +60 -0
  9. literegistry-1.0.0/literegistry/client.py +226 -0
  10. literegistry-1.0.0/literegistry/consul.py +66 -0
  11. literegistry-1.0.0/literegistry/executable_wrapper.py +178 -0
  12. literegistry-1.0.0/literegistry/gateway.py +253 -0
  13. literegistry-1.0.0/literegistry/gateway_basic.py +242 -0
  14. literegistry-1.0.0/literegistry/http.py +268 -0
  15. literegistry-1.0.0/literegistry/kvstore.py +83 -0
  16. literegistry-1.0.0/literegistry/redis.py +166 -0
  17. literegistry-1.0.0/literegistry/registry.py +120 -0
  18. literegistry-1.0.0/literegistry/sglang_wrapper.py +61 -0
  19. literegistry-1.0.0/literegistry/telemetry.py +68 -0
  20. literegistry-1.0.0/literegistry/vllm_wrapper.py +64 -0
  21. literegistry-1.0.0/literegistry.egg-info/PKG-INFO +212 -0
  22. literegistry-1.0.0/literegistry.egg-info/SOURCES.txt +32 -0
  23. literegistry-1.0.0/literegistry.egg-info/dependency_links.txt +1 -0
  24. literegistry-1.0.0/literegistry.egg-info/entry_points.txt +2 -0
  25. literegistry-1.0.0/literegistry.egg-info/requires.txt +3 -0
  26. literegistry-1.0.0/literegistry.egg-info/top_level.txt +1 -0
  27. literegistry-1.0.0/requirements.txt +3 -0
  28. literegistry-1.0.0/setup.cfg +4 -0
  29. literegistry-1.0.0/setup.py +31 -0
  30. literegistry-1.0.0/tests/test_client.py +31 -0
  31. literegistry-1.0.0/tests/test_consul.py +18 -0
  32. literegistry-1.0.0/tests/test_file.py +15 -0
  33. literegistry-1.0.0/tests/test_http.py +34 -0
  34. literegistry-1.0.0/tests/test_registry.py +28 -0
@@ -0,0 +1,21 @@
1
+ MIT LICENSE
2
+ Copyright (c) 2025 Gonçalo Faria
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining a copy
5
+ of this software and associated documentation files (the "Software"), to deal
6
+ in the Software without restriction, including without limitation the rights
7
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the Software is
9
+ furnished to do so, subject to the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included in all
12
+ copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20
+ SOFTWARE.
21
+
@@ -0,0 +1,17 @@
1
+
2
+ include README.md
3
+ include LICENSE
4
+ include requirements.txt
5
+ include setup.py
6
+
7
+ # Include all files in the package directory
8
+ recursive-include expkit *.py
9
+ recursive-include expkit *.txt
10
+ recursive-include expkit *.md
11
+
12
+ # Include test files
13
+ recursive-include tests *.py
14
+
15
+
16
+
17
+
@@ -0,0 +1,212 @@
1
+ Metadata-Version: 2.1
2
+ Name: literegistry
3
+ Version: 1.0.0
4
+ Summary: Package for implementing service discovery in a really lite way.
5
+ Home-page: https://github.com/goncalorafaria/lightregistry
6
+ Author: Goncalo Faria
7
+ Author-email: gfaria@cs.washington.edu
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.6.0
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: aiohttp
15
+ Requires-Dist: asyncio
16
+ Requires-Dist: redis>=4.5.0
17
+
18
+ # LiteRegistry
19
+
20
+ Lightweight service registry and discovery system for distributed model inference clusters. Built for deployments on HPC environments with load balancing and automatic failover.
21
+
22
+
23
+ ## Installation
24
+
25
+ ```bash
26
+ pip install literegistry
27
+ ```
28
+
29
+ ## Components
30
+
31
+ ### Registry (Key-Value Store)
32
+ The registry stores service metadata and health information. Choose between:
33
+ - **FileSystem**: Simple file-based storage for single-node setups
34
+ - **Redis**: Distributed storage for multi-node HPC clusters (recommended for production)
35
+
36
+ The registry tracks which model servers are available, their endpoints, and performance metrics.
37
+
38
+ ### vLLM Module
39
+ Wraps vLLM servers with automatic registry integration. When you launch vLLM through LiteRegistry, it:
40
+ - Auto-registers with the registry on startup
41
+ - Sends heartbeats to maintain active status
42
+ - Reports performance metrics
43
+
44
+ ### Gateway Server
45
+ HTTP reverse proxy that routes client requests to model servers. Features:
46
+ - OpenAI-compatible API endpoints (`/v1/completions`, `/v1/models`, `/classify`)
47
+ - Automatic load balancing based on server latency
48
+ - Model routing based on the `model` parameter in requests
49
+
50
+ ### CLI Tool
51
+ Command-line interface for monitoring your cluster:
52
+ - View registered models and server counts
53
+ - Check server health and request statistics
54
+ - Monitor latency metrics and request throughput
55
+
56
+ ### Client Library
57
+ Python API for programmatic interaction:
58
+ - `RegistryClient`: Register servers and query available models
59
+ - `RegistryHTTPClient`: Make requests with automatic failover and retry
60
+
61
+ ### How Components Work Together
62
+
63
+ ```
64
+ 1. vLLM servers register themselves:
65
+ vLLM Instance → Registry (Redis/FS)
66
+
67
+ 2. Client sends request to Gateway:
68
+ Client → Gateway Server
69
+
70
+ 3. Gateway queries Registry and routes to best server:
71
+ Gateway → Registry (get available servers)
72
+ Gateway → vLLM Instance (send request)
73
+
74
+ 4. Gateway reports metrics back:
75
+ Gateway → Registry (update latency/stats)
76
+ ```
77
+
78
+ ## HPC Cluster Deployment
79
+
80
+ Complete workflow for deploying distributed model inference:
81
+
82
+ **1. Start Redis Server**
83
+ ```bash
84
+ python -m literegistry.redis --port 6379
85
+ ```
86
+
87
+ **2. Launch vLLM Instances** (supports all standard vLLM arguments)
88
+ ```bash
89
+ python -m literegistry.vllm \
90
+ --model "meta-llama/Llama-3.1-8B-Instruct" \
91
+ --registry redis://login-node:6379 \
92
+ --tensor-parallel-size 4
93
+ ```
94
+
95
+ **3. Start Gateway Server**
96
+ ```bash
97
+ python -m literegistry.gateway \
98
+ --registry redis://login-node:6379 \
99
+ --host 0.0.0.0 \
100
+ --port 8080
101
+ ```
102
+
103
+ **4. Monitor Cluster**
104
+ ```bash
105
+ # Summary view
106
+ python -m literegistry.cli --mode summary --registry redis://login-node:6379
107
+
108
+ ## Quick Start
109
+
110
+ ### Basic Usage
111
+
112
+ ```python
113
+ from literegistry import RegistryClient, get_kvstore
114
+ import asyncio
115
+
116
+ async def main():
117
+ # Auto-detect backend (redis:// or file path)
118
+ store = get_kvstore("redis://localhost:6379")
119
+ client = RegistryClient(store, service_type="model_path")
120
+
121
+ # Register a server
122
+ await client.register(
123
+ port=8000,
124
+ metadata={"model_path": "meta-llama/Llama-3.1-8B-Instruct"}
125
+ )
126
+
127
+ # List available models
128
+ models = await client.models()
129
+ print(models)
130
+
131
+ asyncio.run(main())
132
+ ```
133
+
134
+ ### HTTP Client with Automatic Failover
135
+
136
+ ```python
137
+ from literegistry import RegistryHTTPClient
138
+
139
+ async with RegistryHTTPClient(client, "meta-llama/Llama-3.1-8B-Instruct") as http_client:
140
+ result, _ = await http_client.request_with_rotation(
141
+ "v1/completions",
142
+ {"prompt": "Hello"},
143
+ timeout=30,
144
+ max_retries=3
145
+ )
146
+ ```
147
+
148
+ ## Storage Backends
149
+
150
+ LiteRegistry supports different backends depending on your deployment:
151
+
152
+ **FileSystem** - For single-node or shared filesystem environments
153
+ ```python
154
+ from literegistry import FileSystemKVStore
155
+ store = FileSystemKVStore("registry_data")
156
+ ```
157
+ Use when: Running on a single machine or when all nodes share a filesystem (common in HPC clusters with NFS). Note: Can bottleneck with high concurrency.
158
+
159
+ **Redis** - For distributed multi-node clusters
160
+ ```python
161
+ from literegistry import RedisKVStore
162
+ store = RedisKVStore("redis://localhost:6379")
163
+ ```
164
+ Use when: Running across multiple nodes without shared storage, or need high-concurrency access. Recommended for production HPC deployments.
165
+
166
+ ## Advanced Usage
167
+
168
+ ### Gateway API
169
+
170
+ The gateway provides OpenAI-compatible HTTP endpoints that work with existing tools:
171
+
172
+ ```bash
173
+ # Send completion request
174
+ curl -X POST http://localhost:8080/v1/completions \
175
+ -H "Content-Type: application/json" \
176
+ -d '{"model": "meta-llama/Llama-3.1-8B-Instruct", "prompt": "Hello"}'
177
+
178
+ # List all available models
179
+ curl http://localhost:8080/v1/models
180
+
181
+ # Check gateway health
182
+ curl http://localhost:8080/health
183
+ ```
184
+
185
+ The gateway automatically routes requests to the appropriate model server based on the `model` field.
186
+
187
+ ### Batch Processing with Parallel Requests
188
+
189
+ Process multiple requests concurrently with automatic load balancing:
190
+
191
+ ```python
192
+ async with RegistryHTTPClient(client, model) as http_client:
193
+ # Process 100 requests with max 5 concurrent
194
+ results = await http_client.parallel_requests(
195
+ "v1/completions",
196
+ payloads_list,
197
+ max_parallel_requests=5,
198
+ timeout=30,
199
+ max_retries=3
200
+ )
201
+ ```
202
+
203
+ This is useful for batch inference workloads. The client handles retry logic and server rotation automatically.
204
+
205
+
206
+ ## Contributing
207
+
208
+ Contributions welcome! Please submit a Pull Request.
209
+
210
+ ## License
211
+
212
+ MIT License - see LICENSE file for details
@@ -0,0 +1,195 @@
1
+ # LiteRegistry
2
+
3
+ Lightweight service registry and discovery system for distributed model inference clusters. Built for deployments on HPC environments with load balancing and automatic failover.
4
+
5
+
6
+ ## Installation
7
+
8
+ ```bash
9
+ pip install literegistry
10
+ ```
11
+
12
+ ## Components
13
+
14
+ ### Registry (Key-Value Store)
15
+ The registry stores service metadata and health information. Choose between:
16
+ - **FileSystem**: Simple file-based storage for single-node setups
17
+ - **Redis**: Distributed storage for multi-node HPC clusters (recommended for production)
18
+
19
+ The registry tracks which model servers are available, their endpoints, and performance metrics.
20
+
21
+ ### vLLM Module
22
+ Wraps vLLM servers with automatic registry integration. When you launch vLLM through LiteRegistry, it:
23
+ - Auto-registers with the registry on startup
24
+ - Sends heartbeats to maintain active status
25
+ - Reports performance metrics
26
+
27
+ ### Gateway Server
28
+ HTTP reverse proxy that routes client requests to model servers. Features:
29
+ - OpenAI-compatible API endpoints (`/v1/completions`, `/v1/models`, `/classify`)
30
+ - Automatic load balancing based on server latency
31
+ - Model routing based on the `model` parameter in requests
32
+
33
+ ### CLI Tool
34
+ Command-line interface for monitoring your cluster:
35
+ - View registered models and server counts
36
+ - Check server health and request statistics
37
+ - Monitor latency metrics and request throughput
38
+
39
+ ### Client Library
40
+ Python API for programmatic interaction:
41
+ - `RegistryClient`: Register servers and query available models
42
+ - `RegistryHTTPClient`: Make requests with automatic failover and retry
43
+
44
+ ### How Components Work Together
45
+
46
+ ```
47
+ 1. vLLM servers register themselves:
48
+ vLLM Instance → Registry (Redis/FS)
49
+
50
+ 2. Client sends request to Gateway:
51
+ Client → Gateway Server
52
+
53
+ 3. Gateway queries Registry and routes to best server:
54
+ Gateway → Registry (get available servers)
55
+ Gateway → vLLM Instance (send request)
56
+
57
+ 4. Gateway reports metrics back:
58
+ Gateway → Registry (update latency/stats)
59
+ ```
60
+
61
+ ## HPC Cluster Deployment
62
+
63
+ Complete workflow for deploying distributed model inference:
64
+
65
+ **1. Start Redis Server**
66
+ ```bash
67
+ python -m literegistry.redis --port 6379
68
+ ```
69
+
70
+ **2. Launch vLLM Instances** (supports all standard vLLM arguments)
71
+ ```bash
72
+ python -m literegistry.vllm \
73
+ --model "meta-llama/Llama-3.1-8B-Instruct" \
74
+ --registry redis://login-node:6379 \
75
+ --tensor-parallel-size 4
76
+ ```
77
+
78
+ **3. Start Gateway Server**
79
+ ```bash
80
+ python -m literegistry.gateway \
81
+ --registry redis://login-node:6379 \
82
+ --host 0.0.0.0 \
83
+ --port 8080
84
+ ```
85
+
86
+ **4. Monitor Cluster**
87
+ ```bash
88
+ # Summary view
89
+ python -m literegistry.cli --mode summary --registry redis://login-node:6379
90
+
91
+ ## Quick Start
92
+
93
+ ### Basic Usage
94
+
95
+ ```python
96
+ from literegistry import RegistryClient, get_kvstore
97
+ import asyncio
98
+
99
+ async def main():
100
+ # Auto-detect backend (redis:// or file path)
101
+ store = get_kvstore("redis://localhost:6379")
102
+ client = RegistryClient(store, service_type="model_path")
103
+
104
+ # Register a server
105
+ await client.register(
106
+ port=8000,
107
+ metadata={"model_path": "meta-llama/Llama-3.1-8B-Instruct"}
108
+ )
109
+
110
+ # List available models
111
+ models = await client.models()
112
+ print(models)
113
+
114
+ asyncio.run(main())
115
+ ```
116
+
117
+ ### HTTP Client with Automatic Failover
118
+
119
+ ```python
120
+ from literegistry import RegistryHTTPClient
121
+
122
+ async with RegistryHTTPClient(client, "meta-llama/Llama-3.1-8B-Instruct") as http_client:
123
+ result, _ = await http_client.request_with_rotation(
124
+ "v1/completions",
125
+ {"prompt": "Hello"},
126
+ timeout=30,
127
+ max_retries=3
128
+ )
129
+ ```
130
+
131
+ ## Storage Backends
132
+
133
+ LiteRegistry supports different backends depending on your deployment:
134
+
135
+ **FileSystem** - For single-node or shared filesystem environments
136
+ ```python
137
+ from literegistry import FileSystemKVStore
138
+ store = FileSystemKVStore("registry_data")
139
+ ```
140
+ Use when: Running on a single machine or when all nodes share a filesystem (common in HPC clusters with NFS). Note: Can bottleneck with high concurrency.
141
+
142
+ **Redis** - For distributed multi-node clusters
143
+ ```python
144
+ from literegistry import RedisKVStore
145
+ store = RedisKVStore("redis://localhost:6379")
146
+ ```
147
+ Use when: Running across multiple nodes without shared storage, or need high-concurrency access. Recommended for production HPC deployments.
148
+
149
+ ## Advanced Usage
150
+
151
+ ### Gateway API
152
+
153
+ The gateway provides OpenAI-compatible HTTP endpoints that work with existing tools:
154
+
155
+ ```bash
156
+ # Send completion request
157
+ curl -X POST http://localhost:8080/v1/completions \
158
+ -H "Content-Type: application/json" \
159
+ -d '{"model": "meta-llama/Llama-3.1-8B-Instruct", "prompt": "Hello"}'
160
+
161
+ # List all available models
162
+ curl http://localhost:8080/v1/models
163
+
164
+ # Check gateway health
165
+ curl http://localhost:8080/health
166
+ ```
167
+
168
+ The gateway automatically routes requests to the appropriate model server based on the `model` field.
169
+
170
+ ### Batch Processing with Parallel Requests
171
+
172
+ Process multiple requests concurrently with automatic load balancing:
173
+
174
+ ```python
175
+ async with RegistryHTTPClient(client, model) as http_client:
176
+ # Process 100 requests with max 5 concurrent
177
+ results = await http_client.parallel_requests(
178
+ "v1/completions",
179
+ payloads_list,
180
+ max_parallel_requests=5,
181
+ timeout=30,
182
+ max_retries=3
183
+ )
184
+ ```
185
+
186
+ This is useful for batch inference workloads. The client handles retry logic and server rotation automatically.
187
+
188
+
189
+ ## Contributing
190
+
191
+ Contributions welcome! Please submit a Pull Request.
192
+
193
+ ## License
194
+
195
+ MIT License - see LICENSE file for details
@@ -0,0 +1,22 @@
1
+ from .registry import ServerRegistry
2
+ from .client import RegistryClient
3
+ from .kvstore import FileSystemKVStore
4
+ from .redis import RedisKVStore, start_redis_server
5
+ from .http import RegistryHTTPClient
6
+ from .api import ServiceAPI
7
+
8
+ __all__ = [
9
+ "RegistryClient",
10
+ "ServerRegistry",
11
+ "FileSystemKVStore",
12
+ "RedisKVStore",
13
+ "RegistryHTTPClient",
14
+ "ServiceAPI",
15
+ "start_redis_server",
16
+ ]
17
+
18
+ def get_kvstore(registry):
19
+ if "redis://" in registry:
20
+ return RedisKVStore(registry)
21
+ else:
22
+ return FileSystemKVStore(registry)
@@ -0,0 +1,94 @@
1
+ from literegistry import ServerRegistry, FileSystemKVStore, RedisKVStore
2
+ import asyncio
3
+ from fastapi import FastAPI, HTTPException
4
+ from typing import List, Optional, Dict, Any
5
+ import time
6
+ from threading import Thread
7
+ import socket
8
+
9
+
10
+ class ServiceAPI(FastAPI):
11
+ """
12
+ FastAPI extension that automatically handles server registration, heartbeat, and deregistration.
13
+ """
14
+
15
+ def __init__(
16
+ self,
17
+ *args,
18
+ registry_path: str = "redis://klone-login01.hyak.local:6379",# "/gscratch/ark/graf/registry", # "redis://klone-login01.hyak.local:6379"
19
+ port: int = None,
20
+ hostname: str = None,
21
+ metadata: Dict[str, Any] = None,
22
+ heartbeat_interval: int = 120,
23
+ max_history=3600,
24
+ **kwargs,
25
+ ):
26
+ """
27
+ Initialize RewardModelServer with automatic registration and heartbeat.
28
+
29
+ Args:
30
+ *args: Arguments to pass to FastAPI constructor
31
+ registry_path: Path to the registry filesystem
32
+ port: Port number for the server
33
+ metadata: Server metadata for registration
34
+ heartbeat_interval: Interval in seconds for heartbeat
35
+ **kwargs: Keyword arguments to pass to FastAPI constructor
36
+ """
37
+ super().__init__(*args, **kwargs)
38
+
39
+ if "redis://" in registry_path:
40
+ store = RedisKVStore(registry_path)
41
+ else:
42
+ store = FileSystemKVStore(registry_path)
43
+
44
+
45
+ self.registry_path = registry_path
46
+ self.port = port
47
+ self.hostname = hostname
48
+ self.metadata = metadata or {}
49
+ self.heartbeat_interval = heartbeat_interval
50
+ self.registry = ServerRegistry(
51
+ store=store,#RedisKVStore("redis://klone-login01.hyak.local:6379"),#FileSystemKVStore(self.registry_path),
52
+ max_history=max_history,
53
+ )
54
+ self.heartbeat_thread = None
55
+ self.url = f"http://{hostname}"
56
+
57
+ # Register startup and shutdown events
58
+ self._register_startup_events()
59
+ self._register_shutdown_events()
60
+
61
+ def _register_startup_events(self):
62
+ """Register startup event handlers."""
63
+
64
+ @self.on_event("startup")
65
+ async def startup_event():
66
+
67
+ # Register server
68
+ await self.registry.register_server(
69
+ url= self.url,
70
+ port=self.port,
71
+ metadata=self.metadata,
72
+ )
73
+
74
+ # Start heartbeat thread
75
+ self._start_heartbeat_thread()
76
+
77
+ def _register_shutdown_events(self):
78
+ """Register shutdown event handlers."""
79
+
80
+ @self.on_event("shutdown")
81
+ async def shutdown_event():
82
+ if self.registry:
83
+ await self.registry.deregister()
84
+
85
+ def _start_heartbeat_thread(self):
86
+ """Start a daemon thread for heartbeat operations."""
87
+
88
+ def heartbeat_loop():
89
+ while True:
90
+ asyncio.run(self.registry.heartbeat(self.url, self.port))
91
+ time.sleep(self.heartbeat_interval)
92
+
93
+ self.heartbeat_thread = Thread(target=heartbeat_loop, daemon=True)
94
+ self.heartbeat_thread.start()