gpu-worker 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +115 -0
- package/api_client.py +288 -0
- package/batch_processor.py +436 -0
- package/bin/gpu-worker.js +275 -0
- package/cli.py +729 -0
- package/config.2gb.yaml +32 -0
- package/config.8gb.yaml +29 -0
- package/config.example.yaml +72 -0
- package/config.py +213 -0
- package/direct_server.py +140 -0
- package/distributed/__init__.py +35 -0
- package/distributed/grpc_server.py +561 -0
- package/distributed/kv_cache.py +555 -0
- package/distributed/model_shard.py +465 -0
- package/distributed/session.py +455 -0
- package/engines/__init__.py +215 -0
- package/engines/base.py +57 -0
- package/engines/image_gen.py +83 -0
- package/engines/llm.py +97 -0
- package/engines/llm_base.py +216 -0
- package/engines/llm_sglang.py +489 -0
- package/engines/llm_vllm.py +539 -0
- package/engines/speculative.py +513 -0
- package/engines/vision.py +139 -0
- package/machine_id.py +200 -0
- package/main.py +521 -0
- package/package.json +64 -0
- package/requirements-sglang.txt +12 -0
- package/requirements-vllm.txt +15 -0
- package/requirements.txt +35 -0
- package/scripts/postinstall.js +60 -0
- package/setup.py +43 -0
package/machine_id.py
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
"""
|
|
2
|
+
机器码/设备指纹生成器
|
|
3
|
+
生成唯一的硬件指纹用于识别Worker节点
|
|
4
|
+
"""
|
|
5
|
+
import hashlib
|
|
6
|
+
import platform
|
|
7
|
+
import uuid
|
|
8
|
+
import os
|
|
9
|
+
import json
|
|
10
|
+
from typing import Optional, Dict, Any
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
import logging
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class MachineFingerprint:
|
|
18
|
+
"""机器指纹生成器"""
|
|
19
|
+
|
|
20
|
+
FINGERPRINT_FILE = ".gpu_worker_fingerprint"
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def generate(cls) -> Dict[str, Any]:
|
|
24
|
+
"""
|
|
25
|
+
生成机器指纹
|
|
26
|
+
包含硬件信息的哈希值,用于唯一标识设备
|
|
27
|
+
"""
|
|
28
|
+
fingerprint_data = {
|
|
29
|
+
"platform": platform.system(),
|
|
30
|
+
"platform_release": platform.release(),
|
|
31
|
+
"platform_version": platform.version(),
|
|
32
|
+
"architecture": platform.machine(),
|
|
33
|
+
"processor": platform.processor(),
|
|
34
|
+
"hostname": platform.node(),
|
|
35
|
+
"mac_address": cls._get_mac_address(),
|
|
36
|
+
"machine_id": cls._get_machine_id(),
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
# 添加GPU信息
|
|
40
|
+
gpu_info = cls._get_gpu_info()
|
|
41
|
+
if gpu_info:
|
|
42
|
+
fingerprint_data["gpu"] = gpu_info
|
|
43
|
+
|
|
44
|
+
# 生成指纹哈希
|
|
45
|
+
fingerprint_string = json.dumps(fingerprint_data, sort_keys=True)
|
|
46
|
+
fingerprint_hash = hashlib.sha256(fingerprint_string.encode()).hexdigest()
|
|
47
|
+
|
|
48
|
+
return {
|
|
49
|
+
"machine_id": fingerprint_hash[:32], # 32字符的机器码
|
|
50
|
+
"hardware_hash": fingerprint_hash,
|
|
51
|
+
"details": fingerprint_data,
|
|
52
|
+
"generated_at": cls._get_timestamp()
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def _get_mac_address(cls) -> str:
|
|
57
|
+
"""获取MAC地址"""
|
|
58
|
+
try:
|
|
59
|
+
mac = uuid.getnode()
|
|
60
|
+
return ':'.join(('%012X' % mac)[i:i+2] for i in range(0, 12, 2))
|
|
61
|
+
except Exception:
|
|
62
|
+
return "unknown"
|
|
63
|
+
|
|
64
|
+
@classmethod
|
|
65
|
+
def _get_machine_id(cls) -> str:
|
|
66
|
+
"""获取系统机器ID"""
|
|
67
|
+
# Linux
|
|
68
|
+
if os.path.exists('/etc/machine-id'):
|
|
69
|
+
with open('/etc/machine-id', 'r') as f:
|
|
70
|
+
return f.read().strip()
|
|
71
|
+
|
|
72
|
+
# macOS
|
|
73
|
+
if platform.system() == 'Darwin':
|
|
74
|
+
try:
|
|
75
|
+
import subprocess
|
|
76
|
+
result = subprocess.run(
|
|
77
|
+
['ioreg', '-rd1', '-c', 'IOPlatformExpertDevice'],
|
|
78
|
+
capture_output=True, text=True
|
|
79
|
+
)
|
|
80
|
+
for line in result.stdout.split('\n'):
|
|
81
|
+
if 'IOPlatformUUID' in line:
|
|
82
|
+
return line.split('"')[-2]
|
|
83
|
+
except Exception:
|
|
84
|
+
pass
|
|
85
|
+
|
|
86
|
+
# Windows
|
|
87
|
+
if platform.system() == 'Windows':
|
|
88
|
+
try:
|
|
89
|
+
import subprocess
|
|
90
|
+
result = subprocess.run(
|
|
91
|
+
['wmic', 'csproduct', 'get', 'UUID'],
|
|
92
|
+
capture_output=True, text=True
|
|
93
|
+
)
|
|
94
|
+
lines = result.stdout.strip().split('\n')
|
|
95
|
+
if len(lines) > 1:
|
|
96
|
+
return lines[1].strip()
|
|
97
|
+
except Exception:
|
|
98
|
+
pass
|
|
99
|
+
|
|
100
|
+
# 回退:使用MAC地址
|
|
101
|
+
return str(uuid.getnode())
|
|
102
|
+
|
|
103
|
+
@classmethod
|
|
104
|
+
def _get_gpu_info(cls) -> Optional[Dict[str, Any]]:
|
|
105
|
+
"""获取GPU信息"""
|
|
106
|
+
try:
|
|
107
|
+
import torch
|
|
108
|
+
if torch.cuda.is_available():
|
|
109
|
+
return {
|
|
110
|
+
"count": torch.cuda.device_count(),
|
|
111
|
+
"name": torch.cuda.get_device_name(0),
|
|
112
|
+
"uuid": cls._get_gpu_uuid()
|
|
113
|
+
}
|
|
114
|
+
except ImportError:
|
|
115
|
+
pass
|
|
116
|
+
return None
|
|
117
|
+
|
|
118
|
+
@classmethod
|
|
119
|
+
def _get_gpu_uuid(cls) -> Optional[str]:
|
|
120
|
+
"""获取GPU UUID"""
|
|
121
|
+
try:
|
|
122
|
+
import subprocess
|
|
123
|
+
result = subprocess.run(
|
|
124
|
+
['nvidia-smi', '--query-gpu=uuid', '--format=csv,noheader'],
|
|
125
|
+
capture_output=True, text=True
|
|
126
|
+
)
|
|
127
|
+
if result.returncode == 0:
|
|
128
|
+
return result.stdout.strip().split('\n')[0]
|
|
129
|
+
except Exception:
|
|
130
|
+
pass
|
|
131
|
+
return None
|
|
132
|
+
|
|
133
|
+
@classmethod
|
|
134
|
+
def _get_timestamp(cls) -> str:
|
|
135
|
+
"""获取当前时间戳"""
|
|
136
|
+
from datetime import datetime
|
|
137
|
+
return datetime.utcnow().isoformat() + 'Z'
|
|
138
|
+
|
|
139
|
+
@classmethod
|
|
140
|
+
def get_or_create(cls, storage_path: str = None) -> Dict[str, Any]:
|
|
141
|
+
"""
|
|
142
|
+
获取或创建机器指纹
|
|
143
|
+
首次运行时生成并保存,后续读取已保存的指纹
|
|
144
|
+
"""
|
|
145
|
+
if storage_path is None:
|
|
146
|
+
storage_path = Path.home() / cls.FINGERPRINT_FILE
|
|
147
|
+
|
|
148
|
+
storage_path = Path(storage_path)
|
|
149
|
+
|
|
150
|
+
# 尝试读取已存在的指纹
|
|
151
|
+
if storage_path.exists():
|
|
152
|
+
try:
|
|
153
|
+
with open(storage_path, 'r') as f:
|
|
154
|
+
saved = json.load(f)
|
|
155
|
+
|
|
156
|
+
# 验证指纹仍然有效(硬件未更换)
|
|
157
|
+
current = cls.generate()
|
|
158
|
+
if saved.get('hardware_hash') == current['hardware_hash']:
|
|
159
|
+
return saved
|
|
160
|
+
|
|
161
|
+
logger.warning("Hardware changed, regenerating fingerprint")
|
|
162
|
+
except Exception as e:
|
|
163
|
+
logger.warning(f"Failed to read fingerprint: {e}")
|
|
164
|
+
|
|
165
|
+
# 生成新指纹
|
|
166
|
+
fingerprint = cls.generate()
|
|
167
|
+
|
|
168
|
+
# 保存指纹
|
|
169
|
+
try:
|
|
170
|
+
with open(storage_path, 'w') as f:
|
|
171
|
+
json.dump(fingerprint, f, indent=2)
|
|
172
|
+
logger.info(f"Machine fingerprint saved: {fingerprint['machine_id']}")
|
|
173
|
+
except Exception as e:
|
|
174
|
+
logger.warning(f"Failed to save fingerprint: {e}")
|
|
175
|
+
|
|
176
|
+
return fingerprint
|
|
177
|
+
|
|
178
|
+
@classmethod
|
|
179
|
+
def get_machine_id(cls) -> str:
|
|
180
|
+
"""获取机器码(简化接口)"""
|
|
181
|
+
fingerprint = cls.get_or_create()
|
|
182
|
+
return fingerprint['machine_id']
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def get_machine_id() -> str:
|
|
186
|
+
"""获取当前机器的唯一标识码"""
|
|
187
|
+
return MachineFingerprint.get_machine_id()
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def get_full_fingerprint() -> Dict[str, Any]:
|
|
191
|
+
"""获取完整的机器指纹信息"""
|
|
192
|
+
return MachineFingerprint.get_or_create()
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
if __name__ == "__main__":
|
|
196
|
+
# 测试
|
|
197
|
+
fingerprint = get_full_fingerprint()
|
|
198
|
+
print(f"Machine ID: {fingerprint['machine_id']}")
|
|
199
|
+
print(f"Hardware Hash: {fingerprint['hardware_hash']}")
|
|
200
|
+
print(f"Details: {json.dumps(fingerprint['details'], indent=2)}")
|