PyPI - literegistry - Versions diffs - 1.0.0__tar.gz - Mend

literegistry 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

literegistry-1.0.0/LICENSE +21 -0
literegistry-1.0.0/MANIFEST.in +17 -0
literegistry-1.0.0/PKG-INFO +212 -0
literegistry-1.0.0/README.md +195 -0
literegistry-1.0.0/literegistry/__init__.py +22 -0
literegistry-1.0.0/literegistry/api.py +94 -0
literegistry-1.0.0/literegistry/bandit.py +148 -0
literegistry-1.0.0/literegistry/cli.py +60 -0
literegistry-1.0.0/literegistry/client.py +226 -0
literegistry-1.0.0/literegistry/consul.py +66 -0
literegistry-1.0.0/literegistry/executable_wrapper.py +178 -0
literegistry-1.0.0/literegistry/gateway.py +253 -0
literegistry-1.0.0/literegistry/gateway_basic.py +242 -0
literegistry-1.0.0/literegistry/http.py +268 -0
literegistry-1.0.0/literegistry/kvstore.py +83 -0
literegistry-1.0.0/literegistry/redis.py +166 -0
literegistry-1.0.0/literegistry/registry.py +120 -0
literegistry-1.0.0/literegistry/sglang_wrapper.py +61 -0
literegistry-1.0.0/literegistry/telemetry.py +68 -0
literegistry-1.0.0/literegistry/vllm_wrapper.py +64 -0
literegistry-1.0.0/literegistry.egg-info/PKG-INFO +212 -0
literegistry-1.0.0/literegistry.egg-info/SOURCES.txt +32 -0
literegistry-1.0.0/literegistry.egg-info/dependency_links.txt +1 -0
literegistry-1.0.0/literegistry.egg-info/entry_points.txt +2 -0
literegistry-1.0.0/literegistry.egg-info/requires.txt +3 -0
literegistry-1.0.0/literegistry.egg-info/top_level.txt +1 -0
literegistry-1.0.0/requirements.txt +3 -0
literegistry-1.0.0/setup.cfg +4 -0
literegistry-1.0.0/setup.py +31 -0
literegistry-1.0.0/tests/test_client.py +31 -0
literegistry-1.0.0/tests/test_consul.py +18 -0
literegistry-1.0.0/tests/test_file.py +15 -0
literegistry-1.0.0/tests/test_http.py +34 -0
literegistry-1.0.0/tests/test_registry.py +28 -0

literegistry-1.0.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT LICENSE
+Copyright (c) 2025 Gonçalo Faria
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

literegistry-1.0.0/MANIFEST.in ADDED Viewed

@@ -0,0 +1,17 @@
+include README.md
+include LICENSE
+include requirements.txt
+include setup.py
+# Include all files in the package directory
+recursive-include expkit *.py
+recursive-include expkit *.txt
+recursive-include expkit *.md
+# Include test files
+recursive-include tests *.py

literegistry-1.0.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,212 @@
+Metadata-Version: 2.1
+Name: literegistry
+Version: 1.0.0
+Summary: Package for implementing service discovery in a really lite way.
+Home-page: https://github.com/goncalorafaria/lightregistry
+Author: Goncalo Faria
+Author-email: gfaria@cs.washington.edu
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.6.0
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: aiohttp
+Requires-Dist: asyncio
+Requires-Dist: redis>=4.5.0
+# LiteRegistry
+Lightweight service registry and discovery system for distributed model inference clusters. Built for deployments on HPC environments with load balancing and automatic failover.
+## Installation
+```bash
+pip install literegistry
+```
+## Components
+### Registry (Key-Value Store)
+The registry stores service metadata and health information. Choose between:
+- **FileSystem**: Simple file-based storage for single-node setups
+- **Redis**: Distributed storage for multi-node HPC clusters (recommended for production)
+The registry tracks which model servers are available, their endpoints, and performance metrics.
+### vLLM Module
+Wraps vLLM servers with automatic registry integration. When you launch vLLM through LiteRegistry, it:
+- Auto-registers with the registry on startup
+- Sends heartbeats to maintain active status
+- Reports performance metrics
+### Gateway Server
+HTTP reverse proxy that routes client requests to model servers. Features:
+- OpenAI-compatible API endpoints (`/v1/completions`, `/v1/models`, `/classify`)
+- Automatic load balancing based on server latency
+- Model routing based on the `model` parameter in requests
+### CLI Tool
+Command-line interface for monitoring your cluster:
+- View registered models and server counts
+- Check server health and request statistics
+- Monitor latency metrics and request throughput
+### Client Library
+Python API for programmatic interaction:
+- `RegistryClient`: Register servers and query available models
+- `RegistryHTTPClient`: Make requests with automatic failover and retry
+### How Components Work Together
+```
+1. vLLM servers register themselves:
+   vLLM Instance → Registry (Redis/FS)
+2. Client sends request to Gateway:
+   Client → Gateway Server
+3. Gateway queries Registry and routes to best server:
+   Gateway → Registry (get available servers)
+   Gateway → vLLM Instance (send request)
+4. Gateway reports metrics back:
+   Gateway → Registry (update latency/stats)
+```
+## HPC Cluster Deployment
+Complete workflow for deploying distributed model inference:
+**1. Start Redis Server**
+```bash
+python -m literegistry.redis --port 6379
+```
+**2. Launch vLLM Instances** (supports all standard vLLM arguments)
+```bash
+python -m literegistry.vllm \
+  --model "meta-llama/Llama-3.1-8B-Instruct" \
+  --registry redis://login-node:6379 \
+  --tensor-parallel-size 4
+```
+**3. Start Gateway Server**
+```bash
+python -m literegistry.gateway \
+  --registry redis://login-node:6379 \
+  --host 0.0.0.0 \
+  --port 8080
+```
+**4. Monitor Cluster**
+```bash
+# Summary view
+python -m literegistry.cli --mode summary --registry redis://login-node:6379
+## Quick Start
+### Basic Usage
+```python
+from literegistry import RegistryClient, get_kvstore
+import asyncio
+async def main():
+    # Auto-detect backend (redis:// or file path)
+    store = get_kvstore("redis://localhost:6379")
+    client = RegistryClient(store, service_type="model_path")
+    # Register a server
+    await client.register(
+        port=8000,
+        metadata={"model_path": "meta-llama/Llama-3.1-8B-Instruct"}
+    )
+    # List available models
+    models = await client.models()
+    print(models)
+asyncio.run(main())
+```
+### HTTP Client with Automatic Failover
+```python
+from literegistry import RegistryHTTPClient
+async with RegistryHTTPClient(client, "meta-llama/Llama-3.1-8B-Instruct") as http_client:
+    result, _ = await http_client.request_with_rotation(
+        "v1/completions",
+        {"prompt": "Hello"},
+        timeout=30,
+        max_retries=3
+    )
+```
+## Storage Backends
+LiteRegistry supports different backends depending on your deployment:
+**FileSystem** - For single-node or shared filesystem environments
+```python
+from literegistry import FileSystemKVStore
+store = FileSystemKVStore("registry_data")
+```
+Use when: Running on a single machine or when all nodes share a filesystem (common in HPC clusters with NFS). Note: Can bottleneck with high concurrency.
+**Redis** - For distributed multi-node clusters
+```python
+from literegistry import RedisKVStore
+store = RedisKVStore("redis://localhost:6379")
+```
+Use when: Running across multiple nodes without shared storage, or need high-concurrency access. Recommended for production HPC deployments.
+## Advanced Usage
+### Gateway API
+The gateway provides OpenAI-compatible HTTP endpoints that work with existing tools:
+```bash
+# Send completion request
+curl -X POST http://localhost:8080/v1/completions \
+  -H "Content-Type: application/json" \
+  -d '{"model": "meta-llama/Llama-3.1-8B-Instruct", "prompt": "Hello"}'
+# List all available models
+curl http://localhost:8080/v1/models
+# Check gateway health
+curl http://localhost:8080/health
+```
+The gateway automatically routes requests to the appropriate model server based on the `model` field.
+### Batch Processing with Parallel Requests
+Process multiple requests concurrently with automatic load balancing:
+```python
+async with RegistryHTTPClient(client, model) as http_client:
+    # Process 100 requests with max 5 concurrent
+    results = await http_client.parallel_requests(
+        "v1/completions",
+        payloads_list,
+        max_parallel_requests=5,
+        timeout=30,
+        max_retries=3
+    )
+```
+This is useful for batch inference workloads. The client handles retry logic and server rotation automatically.
+## Contributing
+Contributions welcome! Please submit a Pull Request.
+## License
+MIT License - see LICENSE file for details

literegistry-1.0.0/README.md ADDED Viewed

@@ -0,0 +1,195 @@
+# LiteRegistry
+Lightweight service registry and discovery system for distributed model inference clusters. Built for deployments on HPC environments with load balancing and automatic failover.
+## Installation
+```bash
+pip install literegistry
+```
+## Components
+### Registry (Key-Value Store)
+The registry stores service metadata and health information. Choose between:
+- **FileSystem**: Simple file-based storage for single-node setups
+- **Redis**: Distributed storage for multi-node HPC clusters (recommended for production)
+The registry tracks which model servers are available, their endpoints, and performance metrics.
+### vLLM Module
+Wraps vLLM servers with automatic registry integration. When you launch vLLM through LiteRegistry, it:
+- Auto-registers with the registry on startup
+- Sends heartbeats to maintain active status
+- Reports performance metrics
+### Gateway Server
+HTTP reverse proxy that routes client requests to model servers. Features:
+- OpenAI-compatible API endpoints (`/v1/completions`, `/v1/models`, `/classify`)
+- Automatic load balancing based on server latency
+- Model routing based on the `model` parameter in requests
+### CLI Tool
+Command-line interface for monitoring your cluster:
+- View registered models and server counts
+- Check server health and request statistics
+- Monitor latency metrics and request throughput
+### Client Library
+Python API for programmatic interaction:
+- `RegistryClient`: Register servers and query available models
+- `RegistryHTTPClient`: Make requests with automatic failover and retry
+### How Components Work Together
+```
+1. vLLM servers register themselves:
+   vLLM Instance → Registry (Redis/FS)
+2. Client sends request to Gateway:
+   Client → Gateway Server
+3. Gateway queries Registry and routes to best server:
+   Gateway → Registry (get available servers)
+   Gateway → vLLM Instance (send request)
+4. Gateway reports metrics back:
+   Gateway → Registry (update latency/stats)
+```
+## HPC Cluster Deployment
+Complete workflow for deploying distributed model inference:
+**1. Start Redis Server**
+```bash
+python -m literegistry.redis --port 6379
+```
+**2. Launch vLLM Instances** (supports all standard vLLM arguments)
+```bash
+python -m literegistry.vllm \
+  --model "meta-llama/Llama-3.1-8B-Instruct" \
+  --registry redis://login-node:6379 \
+  --tensor-parallel-size 4
+```
+**3. Start Gateway Server**
+```bash
+python -m literegistry.gateway \
+  --registry redis://login-node:6379 \
+  --host 0.0.0.0 \
+  --port 8080
+```
+**4. Monitor Cluster**
+```bash
+# Summary view
+python -m literegistry.cli --mode summary --registry redis://login-node:6379
+## Quick Start
+### Basic Usage
+```python
+from literegistry import RegistryClient, get_kvstore
+import asyncio
+async def main():
+    # Auto-detect backend (redis:// or file path)
+    store = get_kvstore("redis://localhost:6379")
+    client = RegistryClient(store, service_type="model_path")
+    # Register a server
+    await client.register(
+        port=8000,
+        metadata={"model_path": "meta-llama/Llama-3.1-8B-Instruct"}
+    )
+    # List available models
+    models = await client.models()
+    print(models)
+asyncio.run(main())
+```
+### HTTP Client with Automatic Failover
+```python
+from literegistry import RegistryHTTPClient
+async with RegistryHTTPClient(client, "meta-llama/Llama-3.1-8B-Instruct") as http_client:
+    result, _ = await http_client.request_with_rotation(
+        "v1/completions",
+        {"prompt": "Hello"},
+        timeout=30,
+        max_retries=3
+    )
+```
+## Storage Backends
+LiteRegistry supports different backends depending on your deployment:
+**FileSystem** - For single-node or shared filesystem environments
+```python
+from literegistry import FileSystemKVStore
+store = FileSystemKVStore("registry_data")
+```
+Use when: Running on a single machine or when all nodes share a filesystem (common in HPC clusters with NFS). Note: Can bottleneck with high concurrency.
+**Redis** - For distributed multi-node clusters
+```python
+from literegistry import RedisKVStore
+store = RedisKVStore("redis://localhost:6379")
+```
+Use when: Running across multiple nodes without shared storage, or need high-concurrency access. Recommended for production HPC deployments.
+## Advanced Usage
+### Gateway API
+The gateway provides OpenAI-compatible HTTP endpoints that work with existing tools:
+```bash
+# Send completion request
+curl -X POST http://localhost:8080/v1/completions \
+  -H "Content-Type: application/json" \
+  -d '{"model": "meta-llama/Llama-3.1-8B-Instruct", "prompt": "Hello"}'
+# List all available models
+curl http://localhost:8080/v1/models
+# Check gateway health
+curl http://localhost:8080/health
+```
+The gateway automatically routes requests to the appropriate model server based on the `model` field.
+### Batch Processing with Parallel Requests
+Process multiple requests concurrently with automatic load balancing:
+```python
+async with RegistryHTTPClient(client, model) as http_client:
+    # Process 100 requests with max 5 concurrent
+    results = await http_client.parallel_requests(
+        "v1/completions",
+        payloads_list,
+        max_parallel_requests=5,
+        timeout=30,
+        max_retries=3
+    )
+```
+This is useful for batch inference workloads. The client handles retry logic and server rotation automatically.
+## Contributing
+Contributions welcome! Please submit a Pull Request.
+## License
+MIT License - see LICENSE file for details

literegistry-1.0.0/literegistry/__init__.py ADDED Viewed

@@ -0,0 +1,22 @@
+from .registry import ServerRegistry
+from .client import RegistryClient
+from .kvstore import FileSystemKVStore
+from .redis import RedisKVStore, start_redis_server
+from .http import RegistryHTTPClient
+from .api import ServiceAPI
+__all__ = [
+    "RegistryClient",
+    "ServerRegistry",
+    "FileSystemKVStore",
+    "RedisKVStore",
+    "RegistryHTTPClient",
+    "ServiceAPI",
+    "start_redis_server",
+]
+def get_kvstore(registry):
+    if "redis://" in registry:
+        return RedisKVStore(registry)
+    else:
+        return FileSystemKVStore(registry)

literegistry-1.0.0/literegistry/api.py ADDED Viewed

@@ -0,0 +1,94 @@
+from literegistry import ServerRegistry, FileSystemKVStore, RedisKVStore
+import asyncio
+from fastapi import FastAPI, HTTPException
+from typing import List, Optional, Dict, Any
+import time
+from threading import Thread
+import socket
+class ServiceAPI(FastAPI):
+    """
+    FastAPI extension that automatically handles server registration, heartbeat, and deregistration.
+    """
+    def __init__(
+        self,
+        *args,
+        registry_path: str = "redis://klone-login01.hyak.local:6379",# "/gscratch/ark/graf/registry", # "redis://klone-login01.hyak.local:6379"
+        port: int = None,
+        hostname: str = None,
+        metadata: Dict[str, Any] = None,
+        heartbeat_interval: int = 120,
+        max_history=3600,
+        **kwargs,
+    ):
+        """
+        Initialize RewardModelServer with automatic registration and heartbeat.
+        Args:
+            *args: Arguments to pass to FastAPI constructor
+            registry_path: Path to the registry filesystem
+            port: Port number for the server
+            metadata: Server metadata for registration
+            heartbeat_interval: Interval in seconds for heartbeat
+            **kwargs: Keyword arguments to pass to FastAPI constructor
+        """
+        super().__init__(*args, **kwargs)
+        if "redis://" in registry_path:
+            store = RedisKVStore(registry_path)
+        else:
+            store = FileSystemKVStore(registry_path)
+        self.registry_path = registry_path
+        self.port = port
+        self.hostname = hostname
+        self.metadata = metadata or {}
+        self.heartbeat_interval = heartbeat_interval
+        self.registry = ServerRegistry(
+            store=store,#RedisKVStore("redis://klone-login01.hyak.local:6379"),#FileSystemKVStore(self.registry_path),
+            max_history=max_history,
+        )
+        self.heartbeat_thread = None
+        self.url = f"http://{hostname}"
+        # Register startup and shutdown events
+        self._register_startup_events()
+        self._register_shutdown_events()
+    def _register_startup_events(self):
+        """Register startup event handlers."""
+        @self.on_event("startup")
+        async def startup_event():
+            # Register server
+            await self.registry.register_server(
+                url= self.url,
+                port=self.port,
+                metadata=self.metadata,
+            )
+            # Start heartbeat thread
+            self._start_heartbeat_thread()
+    def _register_shutdown_events(self):
+        """Register shutdown event handlers."""
+        @self.on_event("shutdown")
+        async def shutdown_event():
+            if self.registry:
+                await self.registry.deregister()
+    def _start_heartbeat_thread(self):
+        """Start a daemon thread for heartbeat operations."""
+        def heartbeat_loop():
+            while True:
+                asyncio.run(self.registry.heartbeat(self.url, self.port))
+                time.sleep(self.heartbeat_interval)
+        self.heartbeat_thread = Thread(target=heartbeat_loop, daemon=True)
+        self.heartbeat_thread.start()