gpu-worker 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,115 @@
1
+ # GPU Worker
2
+
3
+ Distributed GPU Inference Worker - Share your idle GPU computing power for LLM inference and image generation.
4
+
5
+ ## Features
6
+
7
+ - **Easy Setup**: Single command installation with automatic Python environment management
8
+ - **Multiple Engines**: Support for native Transformers, vLLM, SGLang backends
9
+ - **LLM Inference**: Run Qwen, Llama, GLM, DeepSeek and other popular models
10
+ - **Image Generation**: Support FLUX, Stable Diffusion XL and more
11
+ - **Cross-Platform**: Works on Windows, Linux, and macOS
12
+ - **Auto Configuration**: Interactive setup wizard
13
+
14
+ ## Quick Start
15
+
16
+ ### Using npx (Recommended)
17
+
18
+ ```bash
19
+ # Interactive menu
20
+ npx gpu-worker
21
+
22
+ # Or step by step
23
+ npx gpu-worker configure # Setup configuration
24
+ npx gpu-worker start # Start worker
25
+ npx gpu-worker status # Check status
26
+ ```
27
+
28
+ ### Using npm global install
29
+
30
+ ```bash
31
+ npm install -g gpu-worker
32
+ gpu-worker configure
33
+ gpu-worker start
34
+ ```
35
+
36
+ ## Requirements
37
+
38
+ - **Node.js**: >= 16.0.0
39
+ - **Python**: >= 3.9
40
+ - **GPU**: NVIDIA GPU with CUDA 11.8+ (optional, for GPU inference)
41
+ - **RAM**: 16GB+ recommended
42
+ - **Storage**: 50GB+ for model storage
43
+
44
+ ## Configuration
45
+
46
+ The worker can be configured via:
47
+
48
+ 1. **Interactive wizard**: `gpu-worker configure`
49
+ 2. **Environment variables**: Copy `.env.example` to `.env`
50
+ 3. **YAML config file**: Edit `config.yaml`
51
+
52
+ ### Key Configuration Options
53
+
54
+ | Option | Environment Variable | Description |
55
+ |--------|---------------------|-------------|
56
+ | Server URL | `GPU_SERVER_URL` | Central server address |
57
+ | Worker Name | `GPU_WORKER_NAME` | Display name for this worker |
58
+ | Region | `GPU_REGION` | Geographic region (e.g., asia-east) |
59
+ | Supported Types | `GPU_SUPPORTED_TYPES` | Task types: llm, image_gen |
60
+ | LLM Model | `GPU_LLM_MODEL` | HuggingFace model ID |
61
+
62
+ ## Supported Models
63
+
64
+ ### LLM Models
65
+
66
+ | Model | VRAM Required | Model ID |
67
+ |-------|---------------|----------|
68
+ | Qwen2.5-7B | 16GB | `Qwen/Qwen2.5-7B-Instruct` |
69
+ | Llama-3.1-8B | 18GB | `meta-llama/Llama-3.1-8B-Instruct` |
70
+ | GLM-4-9B | 20GB | `THUDM/glm-4-9b-chat` |
71
+
72
+ ### Image Generation Models
73
+
74
+ | Model | VRAM Required | Model ID |
75
+ |-------|---------------|----------|
76
+ | FLUX.1-schnell | 24GB | `black-forest-labs/FLUX.1-schnell` |
77
+ | SDXL | 12GB | `stabilityai/stable-diffusion-xl-base-1.0` |
78
+
79
+ ## High-Performance Backends
80
+
81
+ For production use, install optional high-performance backends:
82
+
83
+ ```bash
84
+ # SGLang (recommended for high throughput)
85
+ pip install sglang[all]
86
+
87
+ # vLLM (alternative)
88
+ pip install vllm
89
+ ```
90
+
91
+ ## Architecture
92
+
93
+ ```
94
+ ┌─────────────────┐ ┌─────────────────┐
95
+ │ Central Server │◄────│ GPU Worker │
96
+ │ (Scheduler) │ │ (This Package) │
97
+ └─────────────────┘ └─────────────────┘
98
+ │ │
99
+ │ ▼
100
+ │ ┌───────────────┐
101
+ │ │ GPU/CPU │
102
+ │ │ Inference │
103
+ └───────────────┤ Engine │
104
+ └───────────────┘
105
+ ```
106
+
107
+ ## License
108
+
109
+ MIT License - see [LICENSE](../LICENSE) for details.
110
+
111
+ ## Links
112
+
113
+ - [GitHub Repository](https://github.com/Baozhi888/distributed-gpu-inference)
114
+ - [Documentation](https://github.com/Baozhi888/distributed-gpu-inference#readme)
115
+ - [Issue Tracker](https://github.com/Baozhi888/distributed-gpu-inference/issues)
package/api_client.py ADDED
@@ -0,0 +1,288 @@
1
+ """
2
+ API客户端 - 轻量版
3
+ 支持Token刷新、请求签名、远程配置获取
4
+ """
5
+ import httpx
6
+ from typing import Optional, List, Dict, Any
7
+ import logging
8
+ import time
9
+ import hashlib
10
+ import hmac
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class APIClient:
16
+ """与中央服务器通信的客户端 - 轻量版"""
17
+
18
+ def __init__(
19
+ self,
20
+ base_url: str,
21
+ token: Optional[str] = None,
22
+ timeout: int = 30,
23
+ max_retries: int = 3
24
+ ):
25
+ self.base_url = base_url.rstrip("/")
26
+ self.token = token
27
+ self.signing_secret: Optional[str] = None
28
+ self.timeout = timeout
29
+ self.max_retries = max_retries
30
+ self.client = httpx.Client(timeout=timeout)
31
+
32
+ def set_credentials(self, token: str, signing_secret: Optional[str] = None):
33
+ """设置认证凭据"""
34
+ self.token = token
35
+ self.signing_secret = signing_secret
36
+
37
+ def _headers(self, body: Optional[str] = None, path: str = "") -> dict:
38
+ """生成请求头(含签名)"""
39
+ headers = {"Content-Type": "application/json"}
40
+ if self.token:
41
+ headers["X-Worker-Token"] = self.token
42
+
43
+ # 如果有签名密钥,添加请求签名
44
+ if self.signing_secret and body:
45
+ timestamp = int(time.time())
46
+ signature = self._sign_request("POST", path, body, timestamp)
47
+ headers["X-Signature"] = signature
48
+ headers["X-Timestamp"] = str(timestamp)
49
+
50
+ return headers
51
+
52
+ def _sign_request(
53
+ self,
54
+ method: str,
55
+ path: str,
56
+ body: Optional[str],
57
+ timestamp: int
58
+ ) -> str:
59
+ """生成请求签名"""
60
+ body_hash = hashlib.sha256((body or "").encode()).hexdigest()
61
+ sign_content = f"{method.upper()}:{path}:{body_hash}:{timestamp}"
62
+
63
+ signature = hmac.new(
64
+ self.signing_secret.encode(),
65
+ sign_content.encode(),
66
+ hashlib.sha256
67
+ ).hexdigest()
68
+
69
+ return signature
70
+
71
+ def _request_with_retry(
72
+ self,
73
+ method: str,
74
+ url: str,
75
+ **kwargs
76
+ ) -> httpx.Response:
77
+ """带重试的请求"""
78
+ last_error = None
79
+
80
+ for attempt in range(self.max_retries):
81
+ try:
82
+ response = self.client.request(method, url, **kwargs)
83
+ response.raise_for_status()
84
+ return response
85
+ except httpx.HTTPStatusError as e:
86
+ # 4xx错误不重试
87
+ if 400 <= e.response.status_code < 500:
88
+ raise
89
+ last_error = e
90
+ except httpx.RequestError as e:
91
+ last_error = e
92
+
93
+ # 指数退避
94
+ if attempt < self.max_retries - 1:
95
+ wait_time = 2 ** attempt
96
+ logger.warning(f"Request failed, retrying in {wait_time}s...")
97
+ time.sleep(wait_time)
98
+
99
+ raise last_error
100
+
101
+ def register(
102
+ self,
103
+ name: str,
104
+ region: str,
105
+ country: Optional[str] = None,
106
+ city: Optional[str] = None,
107
+ timezone: Optional[str] = None,
108
+ gpu_model: Optional[str] = None,
109
+ gpu_memory_gb: Optional[float] = None,
110
+ gpu_count: int = 1,
111
+ supported_types: List[str] = None,
112
+ direct_url: Optional[str] = None,
113
+ supports_direct: bool = False
114
+ ) -> dict:
115
+ """注册Worker"""
116
+ response = self._request_with_retry(
117
+ "POST",
118
+ f"{self.base_url}/api/v1/workers/register",
119
+ json={
120
+ "name": name,
121
+ "region": region,
122
+ "country": country,
123
+ "city": city,
124
+ "timezone": timezone,
125
+ "gpu_model": gpu_model,
126
+ "gpu_memory_gb": gpu_memory_gb,
127
+ "gpu_count": gpu_count,
128
+ "supported_types": supported_types or [],
129
+ "direct_url": direct_url,
130
+ "supports_direct": supports_direct
131
+ }
132
+ )
133
+ return response.json()
134
+
135
+ def heartbeat(
136
+ self,
137
+ worker_id: str,
138
+ status: str,
139
+ current_job_id: Optional[str] = None,
140
+ gpu_memory_used_gb: Optional[float] = None,
141
+ supported_types: Optional[List[str]] = None,
142
+ loaded_models: Optional[List[str]] = None,
143
+ config_version: int = 0
144
+ ) -> dict:
145
+ """发送心跳"""
146
+ response = self._request_with_retry(
147
+ "POST",
148
+ f"{self.base_url}/api/v1/workers/{worker_id}/heartbeat",
149
+ headers=self._headers(),
150
+ json={
151
+ "status": status,
152
+ "current_job_id": current_job_id,
153
+ "gpu_memory_used_gb": gpu_memory_used_gb,
154
+ "supported_types": supported_types,
155
+ "loaded_models": loaded_models,
156
+ "config_version": config_version
157
+ }
158
+ )
159
+ return response.json()
160
+
161
+ def fetch_next_job(self, worker_id: str) -> Optional[dict]:
162
+ """获取下一个任务"""
163
+ try:
164
+ response = self.client.get(
165
+ f"{self.base_url}/api/v1/workers/{worker_id}/next-job",
166
+ headers=self._headers(),
167
+ timeout=10 # 短超时
168
+ )
169
+
170
+ if response.status_code == 204:
171
+ return None
172
+
173
+ if response.status_code == 200:
174
+ data = response.json()
175
+ return data if data else None
176
+
177
+ response.raise_for_status()
178
+
179
+ except httpx.HTTPStatusError as e:
180
+ if e.response.status_code == 404:
181
+ return None
182
+ raise
183
+
184
+ return None
185
+
186
+ def complete_job(
187
+ self,
188
+ worker_id: str,
189
+ job_id: str,
190
+ success: bool,
191
+ result: Optional[dict] = None,
192
+ error: Optional[str] = None,
193
+ processing_time_ms: Optional[int] = None
194
+ ) -> dict:
195
+ """完成任务"""
196
+ response = self._request_with_retry(
197
+ "POST",
198
+ f"{self.base_url}/api/v1/workers/{worker_id}/jobs/{job_id}/complete",
199
+ headers=self._headers(),
200
+ json={
201
+ "success": success,
202
+ "result": result,
203
+ "error": error,
204
+ "processing_time_ms": processing_time_ms
205
+ }
206
+ )
207
+ return response.json()
208
+
209
+ def notify_going_offline(
210
+ self,
211
+ worker_id: str,
212
+ finish_current: bool = True
213
+ ) -> dict:
214
+ """通知即将下线"""
215
+ response = self._request_with_retry(
216
+ "POST",
217
+ f"{self.base_url}/api/v1/workers/{worker_id}/going-offline",
218
+ headers=self._headers(),
219
+ params={"finish_current": finish_current}
220
+ )
221
+ return response.json()
222
+
223
+ def notify_offline(self, worker_id: str) -> dict:
224
+ """通知已下线"""
225
+ response = self._request_with_retry(
226
+ "POST",
227
+ f"{self.base_url}/api/v1/workers/{worker_id}/offline",
228
+ headers=self._headers()
229
+ )
230
+ return response.json()
231
+
232
+ def verify_credentials(self, worker_id: str, token: str) -> bool:
233
+ """验证凭据是否有效"""
234
+ try:
235
+ response = self.client.post(
236
+ f"{self.base_url}/api/v1/workers/{worker_id}/verify",
237
+ headers={"X-Worker-Token": token},
238
+ timeout=10
239
+ )
240
+ return response.status_code == 200
241
+ except Exception as e:
242
+ logger.error(f"Credential verification error: {e}")
243
+ return False
244
+
245
+ def get_config(self, worker_id: str) -> Optional[Dict[str, Any]]:
246
+ """获取远程配置"""
247
+ try:
248
+ response = self.client.get(
249
+ f"{self.base_url}/api/v1/workers/{worker_id}/config",
250
+ headers=self._headers(),
251
+ timeout=10
252
+ )
253
+
254
+ if response.status_code == 200:
255
+ return response.json()
256
+
257
+ return None
258
+
259
+ except Exception as e:
260
+ logger.error(f"Failed to get remote config: {e}")
261
+ return None
262
+
263
+ def refresh_token(
264
+ self,
265
+ worker_id: str,
266
+ refresh_token: str
267
+ ) -> Optional[Dict[str, Any]]:
268
+ """刷新Token"""
269
+ try:
270
+ response = self._request_with_retry(
271
+ "POST",
272
+ f"{self.base_url}/api/v1/workers/{worker_id}/refresh-token",
273
+ headers=self._headers(),
274
+ json={"refresh_token": refresh_token}
275
+ )
276
+
277
+ if response.status_code == 200:
278
+ return response.json()
279
+
280
+ return None
281
+
282
+ except Exception as e:
283
+ logger.error(f"Token refresh error: {e}")
284
+ return None
285
+
286
+ def close(self):
287
+ """关闭客户端"""
288
+ self.client.close()