gpu-worker 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +115 -0
- package/api_client.py +288 -0
- package/batch_processor.py +436 -0
- package/bin/gpu-worker.js +275 -0
- package/cli.py +729 -0
- package/config.2gb.yaml +32 -0
- package/config.8gb.yaml +29 -0
- package/config.example.yaml +72 -0
- package/config.py +213 -0
- package/direct_server.py +140 -0
- package/distributed/__init__.py +35 -0
- package/distributed/grpc_server.py +561 -0
- package/distributed/kv_cache.py +555 -0
- package/distributed/model_shard.py +465 -0
- package/distributed/session.py +455 -0
- package/engines/__init__.py +215 -0
- package/engines/base.py +57 -0
- package/engines/image_gen.py +83 -0
- package/engines/llm.py +97 -0
- package/engines/llm_base.py +216 -0
- package/engines/llm_sglang.py +489 -0
- package/engines/llm_vllm.py +539 -0
- package/engines/speculative.py +513 -0
- package/engines/vision.py +139 -0
- package/machine_id.py +200 -0
- package/main.py +521 -0
- package/package.json +64 -0
- package/requirements-sglang.txt +12 -0
- package/requirements-vllm.txt +15 -0
- package/requirements.txt +35 -0
- package/scripts/postinstall.js +60 -0
- package/setup.py +43 -0
package/README.md
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# GPU Worker
|
|
2
|
+
|
|
3
|
+
Distributed GPU Inference Worker - Share your idle GPU computing power for LLM inference and image generation.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Easy Setup**: Single command installation with automatic Python environment management
|
|
8
|
+
- **Multiple Engines**: Support for native Transformers, vLLM, SGLang backends
|
|
9
|
+
- **LLM Inference**: Run Qwen, Llama, GLM, DeepSeek and other popular models
|
|
10
|
+
- **Image Generation**: Support FLUX, Stable Diffusion XL and more
|
|
11
|
+
- **Cross-Platform**: Works on Windows, Linux, and macOS
|
|
12
|
+
- **Auto Configuration**: Interactive setup wizard
|
|
13
|
+
|
|
14
|
+
## Quick Start
|
|
15
|
+
|
|
16
|
+
### Using npx (Recommended)
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
# Interactive menu
|
|
20
|
+
npx gpu-worker
|
|
21
|
+
|
|
22
|
+
# Or step by step
|
|
23
|
+
npx gpu-worker configure # Setup configuration
|
|
24
|
+
npx gpu-worker start # Start worker
|
|
25
|
+
npx gpu-worker status # Check status
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### Using npm global install
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
npm install -g gpu-worker
|
|
32
|
+
gpu-worker configure
|
|
33
|
+
gpu-worker start
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Requirements
|
|
37
|
+
|
|
38
|
+
- **Node.js**: >= 16.0.0
|
|
39
|
+
- **Python**: >= 3.9
|
|
40
|
+
- **GPU**: NVIDIA GPU with CUDA 11.8+ (optional, for GPU inference)
|
|
41
|
+
- **RAM**: 16GB+ recommended
|
|
42
|
+
- **Storage**: 50GB+ for model storage
|
|
43
|
+
|
|
44
|
+
## Configuration
|
|
45
|
+
|
|
46
|
+
The worker can be configured via:
|
|
47
|
+
|
|
48
|
+
1. **Interactive wizard**: `gpu-worker configure`
|
|
49
|
+
2. **Environment variables**: Copy `.env.example` to `.env`
|
|
50
|
+
3. **YAML config file**: Edit `config.yaml`
|
|
51
|
+
|
|
52
|
+
### Key Configuration Options
|
|
53
|
+
|
|
54
|
+
| Option | Environment Variable | Description |
|
|
55
|
+
|--------|---------------------|-------------|
|
|
56
|
+
| Server URL | `GPU_SERVER_URL` | Central server address |
|
|
57
|
+
| Worker Name | `GPU_WORKER_NAME` | Display name for this worker |
|
|
58
|
+
| Region | `GPU_REGION` | Geographic region (e.g., asia-east) |
|
|
59
|
+
| Supported Types | `GPU_SUPPORTED_TYPES` | Task types: llm, image_gen |
|
|
60
|
+
| LLM Model | `GPU_LLM_MODEL` | HuggingFace model ID |
|
|
61
|
+
|
|
62
|
+
## Supported Models
|
|
63
|
+
|
|
64
|
+
### LLM Models
|
|
65
|
+
|
|
66
|
+
| Model | VRAM Required | Model ID |
|
|
67
|
+
|-------|---------------|----------|
|
|
68
|
+
| Qwen2.5-7B | 16GB | `Qwen/Qwen2.5-7B-Instruct` |
|
|
69
|
+
| Llama-3.1-8B | 18GB | `meta-llama/Llama-3.1-8B-Instruct` |
|
|
70
|
+
| GLM-4-9B | 20GB | `THUDM/glm-4-9b-chat` |
|
|
71
|
+
|
|
72
|
+
### Image Generation Models
|
|
73
|
+
|
|
74
|
+
| Model | VRAM Required | Model ID |
|
|
75
|
+
|-------|---------------|----------|
|
|
76
|
+
| FLUX.1-schnell | 24GB | `black-forest-labs/FLUX.1-schnell` |
|
|
77
|
+
| SDXL | 12GB | `stabilityai/stable-diffusion-xl-base-1.0` |
|
|
78
|
+
|
|
79
|
+
## High-Performance Backends
|
|
80
|
+
|
|
81
|
+
For production use, install optional high-performance backends:
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
# SGLang (recommended for high throughput)
|
|
85
|
+
pip install sglang[all]
|
|
86
|
+
|
|
87
|
+
# vLLM (alternative)
|
|
88
|
+
pip install vllm
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Architecture
|
|
92
|
+
|
|
93
|
+
```
|
|
94
|
+
┌─────────────────┐ ┌─────────────────┐
|
|
95
|
+
│ Central Server │◄────│ GPU Worker │
|
|
96
|
+
│ (Scheduler) │ │ (This Package) │
|
|
97
|
+
└─────────────────┘ └─────────────────┘
|
|
98
|
+
│ │
|
|
99
|
+
│ ▼
|
|
100
|
+
│ ┌───────────────┐
|
|
101
|
+
│ │ GPU/CPU │
|
|
102
|
+
│ │ Inference │
|
|
103
|
+
└───────────────┤ Engine │
|
|
104
|
+
└───────────────┘
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## License
|
|
108
|
+
|
|
109
|
+
MIT License - see [LICENSE](../LICENSE) for details.
|
|
110
|
+
|
|
111
|
+
## Links
|
|
112
|
+
|
|
113
|
+
- [GitHub Repository](https://github.com/Baozhi888/distributed-gpu-inference)
|
|
114
|
+
- [Documentation](https://github.com/Baozhi888/distributed-gpu-inference#readme)
|
|
115
|
+
- [Issue Tracker](https://github.com/Baozhi888/distributed-gpu-inference/issues)
|
package/api_client.py
ADDED
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
"""
|
|
2
|
+
API客户端 - 轻量版
|
|
3
|
+
支持Token刷新、请求签名、远程配置获取
|
|
4
|
+
"""
|
|
5
|
+
import httpx
|
|
6
|
+
from typing import Optional, List, Dict, Any
|
|
7
|
+
import logging
|
|
8
|
+
import time
|
|
9
|
+
import hashlib
|
|
10
|
+
import hmac
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class APIClient:
|
|
16
|
+
"""与中央服务器通信的客户端 - 轻量版"""
|
|
17
|
+
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
base_url: str,
|
|
21
|
+
token: Optional[str] = None,
|
|
22
|
+
timeout: int = 30,
|
|
23
|
+
max_retries: int = 3
|
|
24
|
+
):
|
|
25
|
+
self.base_url = base_url.rstrip("/")
|
|
26
|
+
self.token = token
|
|
27
|
+
self.signing_secret: Optional[str] = None
|
|
28
|
+
self.timeout = timeout
|
|
29
|
+
self.max_retries = max_retries
|
|
30
|
+
self.client = httpx.Client(timeout=timeout)
|
|
31
|
+
|
|
32
|
+
def set_credentials(self, token: str, signing_secret: Optional[str] = None):
|
|
33
|
+
"""设置认证凭据"""
|
|
34
|
+
self.token = token
|
|
35
|
+
self.signing_secret = signing_secret
|
|
36
|
+
|
|
37
|
+
def _headers(self, body: Optional[str] = None, path: str = "") -> dict:
|
|
38
|
+
"""生成请求头(含签名)"""
|
|
39
|
+
headers = {"Content-Type": "application/json"}
|
|
40
|
+
if self.token:
|
|
41
|
+
headers["X-Worker-Token"] = self.token
|
|
42
|
+
|
|
43
|
+
# 如果有签名密钥,添加请求签名
|
|
44
|
+
if self.signing_secret and body:
|
|
45
|
+
timestamp = int(time.time())
|
|
46
|
+
signature = self._sign_request("POST", path, body, timestamp)
|
|
47
|
+
headers["X-Signature"] = signature
|
|
48
|
+
headers["X-Timestamp"] = str(timestamp)
|
|
49
|
+
|
|
50
|
+
return headers
|
|
51
|
+
|
|
52
|
+
def _sign_request(
|
|
53
|
+
self,
|
|
54
|
+
method: str,
|
|
55
|
+
path: str,
|
|
56
|
+
body: Optional[str],
|
|
57
|
+
timestamp: int
|
|
58
|
+
) -> str:
|
|
59
|
+
"""生成请求签名"""
|
|
60
|
+
body_hash = hashlib.sha256((body or "").encode()).hexdigest()
|
|
61
|
+
sign_content = f"{method.upper()}:{path}:{body_hash}:{timestamp}"
|
|
62
|
+
|
|
63
|
+
signature = hmac.new(
|
|
64
|
+
self.signing_secret.encode(),
|
|
65
|
+
sign_content.encode(),
|
|
66
|
+
hashlib.sha256
|
|
67
|
+
).hexdigest()
|
|
68
|
+
|
|
69
|
+
return signature
|
|
70
|
+
|
|
71
|
+
def _request_with_retry(
|
|
72
|
+
self,
|
|
73
|
+
method: str,
|
|
74
|
+
url: str,
|
|
75
|
+
**kwargs
|
|
76
|
+
) -> httpx.Response:
|
|
77
|
+
"""带重试的请求"""
|
|
78
|
+
last_error = None
|
|
79
|
+
|
|
80
|
+
for attempt in range(self.max_retries):
|
|
81
|
+
try:
|
|
82
|
+
response = self.client.request(method, url, **kwargs)
|
|
83
|
+
response.raise_for_status()
|
|
84
|
+
return response
|
|
85
|
+
except httpx.HTTPStatusError as e:
|
|
86
|
+
# 4xx错误不重试
|
|
87
|
+
if 400 <= e.response.status_code < 500:
|
|
88
|
+
raise
|
|
89
|
+
last_error = e
|
|
90
|
+
except httpx.RequestError as e:
|
|
91
|
+
last_error = e
|
|
92
|
+
|
|
93
|
+
# 指数退避
|
|
94
|
+
if attempt < self.max_retries - 1:
|
|
95
|
+
wait_time = 2 ** attempt
|
|
96
|
+
logger.warning(f"Request failed, retrying in {wait_time}s...")
|
|
97
|
+
time.sleep(wait_time)
|
|
98
|
+
|
|
99
|
+
raise last_error
|
|
100
|
+
|
|
101
|
+
def register(
|
|
102
|
+
self,
|
|
103
|
+
name: str,
|
|
104
|
+
region: str,
|
|
105
|
+
country: Optional[str] = None,
|
|
106
|
+
city: Optional[str] = None,
|
|
107
|
+
timezone: Optional[str] = None,
|
|
108
|
+
gpu_model: Optional[str] = None,
|
|
109
|
+
gpu_memory_gb: Optional[float] = None,
|
|
110
|
+
gpu_count: int = 1,
|
|
111
|
+
supported_types: List[str] = None,
|
|
112
|
+
direct_url: Optional[str] = None,
|
|
113
|
+
supports_direct: bool = False
|
|
114
|
+
) -> dict:
|
|
115
|
+
"""注册Worker"""
|
|
116
|
+
response = self._request_with_retry(
|
|
117
|
+
"POST",
|
|
118
|
+
f"{self.base_url}/api/v1/workers/register",
|
|
119
|
+
json={
|
|
120
|
+
"name": name,
|
|
121
|
+
"region": region,
|
|
122
|
+
"country": country,
|
|
123
|
+
"city": city,
|
|
124
|
+
"timezone": timezone,
|
|
125
|
+
"gpu_model": gpu_model,
|
|
126
|
+
"gpu_memory_gb": gpu_memory_gb,
|
|
127
|
+
"gpu_count": gpu_count,
|
|
128
|
+
"supported_types": supported_types or [],
|
|
129
|
+
"direct_url": direct_url,
|
|
130
|
+
"supports_direct": supports_direct
|
|
131
|
+
}
|
|
132
|
+
)
|
|
133
|
+
return response.json()
|
|
134
|
+
|
|
135
|
+
def heartbeat(
|
|
136
|
+
self,
|
|
137
|
+
worker_id: str,
|
|
138
|
+
status: str,
|
|
139
|
+
current_job_id: Optional[str] = None,
|
|
140
|
+
gpu_memory_used_gb: Optional[float] = None,
|
|
141
|
+
supported_types: Optional[List[str]] = None,
|
|
142
|
+
loaded_models: Optional[List[str]] = None,
|
|
143
|
+
config_version: int = 0
|
|
144
|
+
) -> dict:
|
|
145
|
+
"""发送心跳"""
|
|
146
|
+
response = self._request_with_retry(
|
|
147
|
+
"POST",
|
|
148
|
+
f"{self.base_url}/api/v1/workers/{worker_id}/heartbeat",
|
|
149
|
+
headers=self._headers(),
|
|
150
|
+
json={
|
|
151
|
+
"status": status,
|
|
152
|
+
"current_job_id": current_job_id,
|
|
153
|
+
"gpu_memory_used_gb": gpu_memory_used_gb,
|
|
154
|
+
"supported_types": supported_types,
|
|
155
|
+
"loaded_models": loaded_models,
|
|
156
|
+
"config_version": config_version
|
|
157
|
+
}
|
|
158
|
+
)
|
|
159
|
+
return response.json()
|
|
160
|
+
|
|
161
|
+
def fetch_next_job(self, worker_id: str) -> Optional[dict]:
|
|
162
|
+
"""获取下一个任务"""
|
|
163
|
+
try:
|
|
164
|
+
response = self.client.get(
|
|
165
|
+
f"{self.base_url}/api/v1/workers/{worker_id}/next-job",
|
|
166
|
+
headers=self._headers(),
|
|
167
|
+
timeout=10 # 短超时
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
if response.status_code == 204:
|
|
171
|
+
return None
|
|
172
|
+
|
|
173
|
+
if response.status_code == 200:
|
|
174
|
+
data = response.json()
|
|
175
|
+
return data if data else None
|
|
176
|
+
|
|
177
|
+
response.raise_for_status()
|
|
178
|
+
|
|
179
|
+
except httpx.HTTPStatusError as e:
|
|
180
|
+
if e.response.status_code == 404:
|
|
181
|
+
return None
|
|
182
|
+
raise
|
|
183
|
+
|
|
184
|
+
return None
|
|
185
|
+
|
|
186
|
+
def complete_job(
|
|
187
|
+
self,
|
|
188
|
+
worker_id: str,
|
|
189
|
+
job_id: str,
|
|
190
|
+
success: bool,
|
|
191
|
+
result: Optional[dict] = None,
|
|
192
|
+
error: Optional[str] = None,
|
|
193
|
+
processing_time_ms: Optional[int] = None
|
|
194
|
+
) -> dict:
|
|
195
|
+
"""完成任务"""
|
|
196
|
+
response = self._request_with_retry(
|
|
197
|
+
"POST",
|
|
198
|
+
f"{self.base_url}/api/v1/workers/{worker_id}/jobs/{job_id}/complete",
|
|
199
|
+
headers=self._headers(),
|
|
200
|
+
json={
|
|
201
|
+
"success": success,
|
|
202
|
+
"result": result,
|
|
203
|
+
"error": error,
|
|
204
|
+
"processing_time_ms": processing_time_ms
|
|
205
|
+
}
|
|
206
|
+
)
|
|
207
|
+
return response.json()
|
|
208
|
+
|
|
209
|
+
def notify_going_offline(
|
|
210
|
+
self,
|
|
211
|
+
worker_id: str,
|
|
212
|
+
finish_current: bool = True
|
|
213
|
+
) -> dict:
|
|
214
|
+
"""通知即将下线"""
|
|
215
|
+
response = self._request_with_retry(
|
|
216
|
+
"POST",
|
|
217
|
+
f"{self.base_url}/api/v1/workers/{worker_id}/going-offline",
|
|
218
|
+
headers=self._headers(),
|
|
219
|
+
params={"finish_current": finish_current}
|
|
220
|
+
)
|
|
221
|
+
return response.json()
|
|
222
|
+
|
|
223
|
+
def notify_offline(self, worker_id: str) -> dict:
|
|
224
|
+
"""通知已下线"""
|
|
225
|
+
response = self._request_with_retry(
|
|
226
|
+
"POST",
|
|
227
|
+
f"{self.base_url}/api/v1/workers/{worker_id}/offline",
|
|
228
|
+
headers=self._headers()
|
|
229
|
+
)
|
|
230
|
+
return response.json()
|
|
231
|
+
|
|
232
|
+
def verify_credentials(self, worker_id: str, token: str) -> bool:
|
|
233
|
+
"""验证凭据是否有效"""
|
|
234
|
+
try:
|
|
235
|
+
response = self.client.post(
|
|
236
|
+
f"{self.base_url}/api/v1/workers/{worker_id}/verify",
|
|
237
|
+
headers={"X-Worker-Token": token},
|
|
238
|
+
timeout=10
|
|
239
|
+
)
|
|
240
|
+
return response.status_code == 200
|
|
241
|
+
except Exception as e:
|
|
242
|
+
logger.error(f"Credential verification error: {e}")
|
|
243
|
+
return False
|
|
244
|
+
|
|
245
|
+
def get_config(self, worker_id: str) -> Optional[Dict[str, Any]]:
|
|
246
|
+
"""获取远程配置"""
|
|
247
|
+
try:
|
|
248
|
+
response = self.client.get(
|
|
249
|
+
f"{self.base_url}/api/v1/workers/{worker_id}/config",
|
|
250
|
+
headers=self._headers(),
|
|
251
|
+
timeout=10
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
if response.status_code == 200:
|
|
255
|
+
return response.json()
|
|
256
|
+
|
|
257
|
+
return None
|
|
258
|
+
|
|
259
|
+
except Exception as e:
|
|
260
|
+
logger.error(f"Failed to get remote config: {e}")
|
|
261
|
+
return None
|
|
262
|
+
|
|
263
|
+
def refresh_token(
|
|
264
|
+
self,
|
|
265
|
+
worker_id: str,
|
|
266
|
+
refresh_token: str
|
|
267
|
+
) -> Optional[Dict[str, Any]]:
|
|
268
|
+
"""刷新Token"""
|
|
269
|
+
try:
|
|
270
|
+
response = self._request_with_retry(
|
|
271
|
+
"POST",
|
|
272
|
+
f"{self.base_url}/api/v1/workers/{worker_id}/refresh-token",
|
|
273
|
+
headers=self._headers(),
|
|
274
|
+
json={"refresh_token": refresh_token}
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
if response.status_code == 200:
|
|
278
|
+
return response.json()
|
|
279
|
+
|
|
280
|
+
return None
|
|
281
|
+
|
|
282
|
+
except Exception as e:
|
|
283
|
+
logger.error(f"Token refresh error: {e}")
|
|
284
|
+
return None
|
|
285
|
+
|
|
286
|
+
def close(self):
|
|
287
|
+
"""关闭客户端"""
|
|
288
|
+
self.client.close()
|