gitinstall 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitinstall/__init__.py +61 -0
- gitinstall/_sdk.py +541 -0
- gitinstall/academic.py +831 -0
- gitinstall/admin.html +327 -0
- gitinstall/auto_update.py +384 -0
- gitinstall/autopilot.py +349 -0
- gitinstall/badge.py +476 -0
- gitinstall/checkpoint.py +330 -0
- gitinstall/cicd.py +499 -0
- gitinstall/clawhub.html +718 -0
- gitinstall/config_schema.py +353 -0
- gitinstall/db.py +984 -0
- gitinstall/db_backend.py +445 -0
- gitinstall/dep_chain.py +337 -0
- gitinstall/dependency_audit.py +1153 -0
- gitinstall/detector.py +542 -0
- gitinstall/doctor.py +493 -0
- gitinstall/education.py +869 -0
- gitinstall/enterprise.py +802 -0
- gitinstall/error_fixer.py +953 -0
- gitinstall/event_bus.py +251 -0
- gitinstall/executor.py +577 -0
- gitinstall/feature_flags.py +138 -0
- gitinstall/fetcher.py +921 -0
- gitinstall/huggingface.py +922 -0
- gitinstall/hw_detect.py +988 -0
- gitinstall/i18n.py +664 -0
- gitinstall/installer_registry.py +362 -0
- gitinstall/knowledge_base.py +379 -0
- gitinstall/license_check.py +605 -0
- gitinstall/llm.py +569 -0
- gitinstall/log.py +236 -0
- gitinstall/main.py +1408 -0
- gitinstall/mcp_agent.py +841 -0
- gitinstall/mcp_server.py +386 -0
- gitinstall/monorepo.py +810 -0
- gitinstall/multi_source.py +425 -0
- gitinstall/onboard.py +276 -0
- gitinstall/planner.py +222 -0
- gitinstall/planner_helpers.py +323 -0
- gitinstall/planner_known_projects.py +1010 -0
- gitinstall/planner_templates.py +996 -0
- gitinstall/remote_gpu.py +633 -0
- gitinstall/resilience.py +608 -0
- gitinstall/run_tests.py +572 -0
- gitinstall/skills.py +476 -0
- gitinstall/tool_schemas.py +324 -0
- gitinstall/trending.py +279 -0
- gitinstall/uninstaller.py +415 -0
- gitinstall/validate_top100.py +607 -0
- gitinstall/watchdog.py +180 -0
- gitinstall/web.py +1277 -0
- gitinstall/web_ui.html +2277 -0
- gitinstall-1.1.0.dist-info/METADATA +275 -0
- gitinstall-1.1.0.dist-info/RECORD +59 -0
- gitinstall-1.1.0.dist-info/WHEEL +5 -0
- gitinstall-1.1.0.dist-info/entry_points.txt +3 -0
- gitinstall-1.1.0.dist-info/licenses/LICENSE +21 -0
- gitinstall-1.1.0.dist-info/top_level.txt +1 -0
gitinstall/remote_gpu.py
ADDED
|
@@ -0,0 +1,633 @@
|
|
|
1
|
+
"""
|
|
2
|
+
remote_gpu.py — 远程 GPU 开发机管理引擎
|
|
3
|
+
=========================================
|
|
4
|
+
|
|
5
|
+
目标市场:远程 GPU 开发机管理(GPU 云市场增长中,★★★☆☆)
|
|
6
|
+
|
|
7
|
+
功能:
|
|
8
|
+
1. SSH 远程执行(支持密钥/密码认证)
|
|
9
|
+
2. 云 GPU 提供商集成(Lambda Labs, RunPod, Vast.ai, AWS, GCP)
|
|
10
|
+
3. 远程环境探测(GPU 型号/VRAM/驱动/CUDA)
|
|
11
|
+
4. 远程项目安装(将 gitinstall 的计划在远程执行)
|
|
12
|
+
5. 成本估算 & 优化建议
|
|
13
|
+
6. 多机并行安装(集群模式)
|
|
14
|
+
7. 端口转发 & Jupyter 远程访问
|
|
15
|
+
|
|
16
|
+
零外部依赖,纯 Python 标准库。
|
|
17
|
+
SSH 通过系统 ssh 命令执行(macOS/Linux 内置)。
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import json
|
|
23
|
+
import os
|
|
24
|
+
import re
|
|
25
|
+
import shlex
|
|
26
|
+
import subprocess
|
|
27
|
+
import time
|
|
28
|
+
from dataclasses import dataclass, field
|
|
29
|
+
from pathlib import Path
|
|
30
|
+
from typing import Any, Optional
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# ─────────────────────────────────────────────
|
|
34
|
+
# 数据结构
|
|
35
|
+
# ─────────────────────────────────────────────
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class RemoteHost:
|
|
39
|
+
"""远程主机配置"""
|
|
40
|
+
name: str = "" # 别名
|
|
41
|
+
host: str = "" # IP 或域名
|
|
42
|
+
port: int = 22
|
|
43
|
+
user: str = ""
|
|
44
|
+
key_file: str = "" # SSH 密钥路径
|
|
45
|
+
gpu_type: str = "" # 探测到的 GPU 类型
|
|
46
|
+
gpu_count: int = 0
|
|
47
|
+
vram_gb: float = 0.0
|
|
48
|
+
cuda_version: str = ""
|
|
49
|
+
python_version: str = ""
|
|
50
|
+
os_info: str = ""
|
|
51
|
+
status: str = "unknown" # unknown | online | offline | busy
|
|
52
|
+
provider: str = "" # lambda | runpod | vastai | aws | gcp | custom
|
|
53
|
+
cost_per_hour: float = 0.0
|
|
54
|
+
tags: list[str] = field(default_factory=list)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class RemoteExecResult:
|
|
59
|
+
"""远程执行结果"""
|
|
60
|
+
host: str = ""
|
|
61
|
+
command: str = ""
|
|
62
|
+
exit_code: int = -1
|
|
63
|
+
stdout: str = ""
|
|
64
|
+
stderr: str = ""
|
|
65
|
+
duration_sec: float = 0.0
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@dataclass
|
|
69
|
+
class GPUProviderInfo:
|
|
70
|
+
"""GPU 云提供商信息"""
|
|
71
|
+
name: str = ""
|
|
72
|
+
display_name: str = ""
|
|
73
|
+
gpu_types: list[str] = field(default_factory=list)
|
|
74
|
+
pricing: dict[str, float] = field(default_factory=dict) # GPU型号 → $/hour
|
|
75
|
+
regions: list[str] = field(default_factory=list)
|
|
76
|
+
api_url: str = ""
|
|
77
|
+
env_var: str = "" # API Key 环境变量名
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
# ─────────────────────────────────────────────
|
|
81
|
+
# GPU 云提供商数据库
|
|
82
|
+
# ─────────────────────────────────────────────
|
|
83
|
+
|
|
84
|
+
_GPU_PROVIDERS: dict[str, GPUProviderInfo] = {
|
|
85
|
+
"lambda": GPUProviderInfo(
|
|
86
|
+
name="lambda",
|
|
87
|
+
display_name="Lambda Labs",
|
|
88
|
+
gpu_types=["A100-80GB", "A100-40GB", "H100-80GB", "A10-24GB", "RTX-6000-24GB"],
|
|
89
|
+
pricing={
|
|
90
|
+
"H100-80GB": 3.29, "A100-80GB": 1.99, "A100-40GB": 1.49,
|
|
91
|
+
"A10-24GB": 0.75, "RTX-6000-24GB": 0.99,
|
|
92
|
+
},
|
|
93
|
+
regions=["us-west-1", "us-east-1", "us-south-1"],
|
|
94
|
+
api_url="https://cloud.lambdalabs.com/api/v1",
|
|
95
|
+
env_var="LAMBDA_API_KEY",
|
|
96
|
+
),
|
|
97
|
+
"runpod": GPUProviderInfo(
|
|
98
|
+
name="runpod",
|
|
99
|
+
display_name="RunPod",
|
|
100
|
+
gpu_types=["A100-80GB", "A100-40GB", "H100-80GB", "RTX-4090-24GB", "RTX-3090-24GB", "A40-48GB"],
|
|
101
|
+
pricing={
|
|
102
|
+
"H100-80GB": 3.89, "A100-80GB": 1.94, "A100-40GB": 1.44,
|
|
103
|
+
"RTX-4090-24GB": 0.74, "RTX-3090-24GB": 0.44, "A40-48GB": 0.79,
|
|
104
|
+
},
|
|
105
|
+
regions=["US", "EU", "CA"],
|
|
106
|
+
api_url="https://api.runpod.io/graphql",
|
|
107
|
+
env_var="RUNPOD_API_KEY",
|
|
108
|
+
),
|
|
109
|
+
"vastai": GPUProviderInfo(
|
|
110
|
+
name="vastai",
|
|
111
|
+
display_name="Vast.ai",
|
|
112
|
+
gpu_types=["A100-80GB", "A100-40GB", "RTX-4090-24GB", "RTX-3090-24GB", "RTX-4080-16GB"],
|
|
113
|
+
pricing={
|
|
114
|
+
"A100-80GB": 1.50, "A100-40GB": 1.10,
|
|
115
|
+
"RTX-4090-24GB": 0.55, "RTX-3090-24GB": 0.30, "RTX-4080-16GB": 0.40,
|
|
116
|
+
},
|
|
117
|
+
regions=["Worldwide (P2P)"],
|
|
118
|
+
api_url="https://console.vast.ai/api/v0",
|
|
119
|
+
env_var="VASTAI_API_KEY",
|
|
120
|
+
),
|
|
121
|
+
"aws": GPUProviderInfo(
|
|
122
|
+
name="aws",
|
|
123
|
+
display_name="AWS EC2 (GPU)",
|
|
124
|
+
gpu_types=["A100-40GB (p4d)", "A100-80GB (p4de)", "H100-80GB (p5)", "T4-16GB (g4dn)", "A10G-24GB (g5)"],
|
|
125
|
+
pricing={
|
|
126
|
+
"T4-16GB (g4dn)": 0.526, "A10G-24GB (g5)": 1.006,
|
|
127
|
+
"A100-40GB (p4d)": 3.672, "A100-80GB (p4de)": 4.576, "H100-80GB (p5)": 6.672,
|
|
128
|
+
},
|
|
129
|
+
regions=["us-east-1", "us-west-2", "eu-west-1", "ap-northeast-1"],
|
|
130
|
+
api_url="https://ec2.amazonaws.com",
|
|
131
|
+
env_var="AWS_ACCESS_KEY_ID",
|
|
132
|
+
),
|
|
133
|
+
"gcp": GPUProviderInfo(
|
|
134
|
+
name="gcp",
|
|
135
|
+
display_name="Google Cloud (GPU)",
|
|
136
|
+
gpu_types=["T4-16GB", "A100-40GB", "A100-80GB", "H100-80GB", "L4-24GB"],
|
|
137
|
+
pricing={
|
|
138
|
+
"T4-16GB": 0.35, "L4-24GB": 0.49,
|
|
139
|
+
"A100-40GB": 2.48, "A100-80GB": 3.67, "H100-80GB": 5.67,
|
|
140
|
+
},
|
|
141
|
+
regions=["us-central1", "us-east1", "europe-west4", "asia-east1"],
|
|
142
|
+
api_url="https://compute.googleapis.com/compute/v1",
|
|
143
|
+
env_var="GOOGLE_APPLICATION_CREDENTIALS",
|
|
144
|
+
),
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
# ─────────────────────────────────────────────
|
|
149
|
+
# SSH 远程执行
|
|
150
|
+
# ─────────────────────────────────────────────
|
|
151
|
+
|
|
152
|
+
def ssh_exec(
|
|
153
|
+
host: RemoteHost,
|
|
154
|
+
command: str,
|
|
155
|
+
timeout: int = 60,
|
|
156
|
+
env: dict[str, str] | None = None,
|
|
157
|
+
) -> RemoteExecResult:
|
|
158
|
+
"""
|
|
159
|
+
通过 SSH 在远程主机上执行命令。
|
|
160
|
+
|
|
161
|
+
使用系统 ssh 命令,不依赖 paramiko。
|
|
162
|
+
"""
|
|
163
|
+
ssh_cmd = ["ssh", "-o", "StrictHostKeyChecking=no", "-o", "ConnectTimeout=10"]
|
|
164
|
+
|
|
165
|
+
if host.key_file:
|
|
166
|
+
ssh_cmd.extend(["-i", host.key_file])
|
|
167
|
+
if host.port != 22:
|
|
168
|
+
ssh_cmd.extend(["-p", str(host.port)])
|
|
169
|
+
|
|
170
|
+
target = f"{host.user}@{host.host}" if host.user else host.host
|
|
171
|
+
|
|
172
|
+
# 构建远程命令(包含环境变量)
|
|
173
|
+
remote_cmd = command
|
|
174
|
+
if env:
|
|
175
|
+
env_prefix = " ".join(f"{k}={shlex.quote(v)}" for k, v in env.items())
|
|
176
|
+
remote_cmd = f"{env_prefix} {command}"
|
|
177
|
+
|
|
178
|
+
ssh_cmd.extend([target, remote_cmd])
|
|
179
|
+
|
|
180
|
+
start = time.monotonic()
|
|
181
|
+
try:
|
|
182
|
+
result = subprocess.run(
|
|
183
|
+
ssh_cmd,
|
|
184
|
+
capture_output=True,
|
|
185
|
+
text=True,
|
|
186
|
+
timeout=timeout,
|
|
187
|
+
)
|
|
188
|
+
duration = time.monotonic() - start
|
|
189
|
+
|
|
190
|
+
return RemoteExecResult(
|
|
191
|
+
host=host.host,
|
|
192
|
+
command=command,
|
|
193
|
+
exit_code=result.returncode,
|
|
194
|
+
stdout=result.stdout,
|
|
195
|
+
stderr=result.stderr,
|
|
196
|
+
duration_sec=duration,
|
|
197
|
+
)
|
|
198
|
+
except subprocess.TimeoutExpired:
|
|
199
|
+
return RemoteExecResult(
|
|
200
|
+
host=host.host,
|
|
201
|
+
command=command,
|
|
202
|
+
exit_code=-1,
|
|
203
|
+
stderr=f"Timeout after {timeout}s",
|
|
204
|
+
duration_sec=timeout,
|
|
205
|
+
)
|
|
206
|
+
except FileNotFoundError:
|
|
207
|
+
return RemoteExecResult(
|
|
208
|
+
host=host.host,
|
|
209
|
+
command=command,
|
|
210
|
+
exit_code=-1,
|
|
211
|
+
stderr="ssh command not found",
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def ssh_probe(host: RemoteHost) -> RemoteHost:
|
|
216
|
+
"""
|
|
217
|
+
探测远程主机的环境(GPU、CUDA、Python)。
|
|
218
|
+
|
|
219
|
+
更新 host 对象并返回。
|
|
220
|
+
"""
|
|
221
|
+
# 一次性执行多个检测命令
|
|
222
|
+
probe_cmd = """
|
|
223
|
+
echo "===OS==="
|
|
224
|
+
uname -a 2>/dev/null || echo "unknown"
|
|
225
|
+
echo "===GPU==="
|
|
226
|
+
nvidia-smi --query-gpu=name,memory.total,driver_version --format=csv,noheader 2>/dev/null || echo "no-gpu"
|
|
227
|
+
echo "===CUDA==="
|
|
228
|
+
nvcc --version 2>/dev/null | grep -oP 'release \\K[0-9.]+' || echo "unknown"
|
|
229
|
+
echo "===PYTHON==="
|
|
230
|
+
python3 --version 2>/dev/null || python --version 2>/dev/null || echo "unknown"
|
|
231
|
+
echo "===DONE==="
|
|
232
|
+
"""
|
|
233
|
+
result = ssh_exec(host, probe_cmd.strip(), timeout=15)
|
|
234
|
+
|
|
235
|
+
if result.exit_code != 0:
|
|
236
|
+
host.status = "offline"
|
|
237
|
+
return host
|
|
238
|
+
|
|
239
|
+
host.status = "online"
|
|
240
|
+
output = result.stdout
|
|
241
|
+
|
|
242
|
+
# 解析 OS
|
|
243
|
+
os_match = re.search(r'===OS===\n(.+)', output)
|
|
244
|
+
if os_match:
|
|
245
|
+
host.os_info = os_match.group(1).strip()[:100]
|
|
246
|
+
|
|
247
|
+
# 解析 GPU
|
|
248
|
+
gpu_match = re.search(r'===GPU===\n(.+)', output)
|
|
249
|
+
if gpu_match:
|
|
250
|
+
gpu_line = gpu_match.group(1).strip()
|
|
251
|
+
if gpu_line != "no-gpu":
|
|
252
|
+
parts = [p.strip() for p in gpu_line.split(",")]
|
|
253
|
+
host.gpu_type = parts[0] if parts else ""
|
|
254
|
+
if len(parts) > 1:
|
|
255
|
+
mem_str = parts[1].replace("MiB", "").strip()
|
|
256
|
+
try:
|
|
257
|
+
host.vram_gb = float(mem_str) / 1024
|
|
258
|
+
except ValueError:
|
|
259
|
+
pass
|
|
260
|
+
# 计算 GPU 数量(多行输出)
|
|
261
|
+
gpu_lines = re.findall(r'(?<====GPU===\n)(.+?)(?=\n===)', output, re.DOTALL)
|
|
262
|
+
if gpu_lines:
|
|
263
|
+
host.gpu_count = len(gpu_lines[0].strip().splitlines())
|
|
264
|
+
else:
|
|
265
|
+
host.gpu_count = 1
|
|
266
|
+
|
|
267
|
+
# 解析 CUDA
|
|
268
|
+
cuda_match = re.search(r'===CUDA===\n(.+)', output)
|
|
269
|
+
if cuda_match:
|
|
270
|
+
host.cuda_version = cuda_match.group(1).strip()
|
|
271
|
+
|
|
272
|
+
# 解析 Python
|
|
273
|
+
py_match = re.search(r'===PYTHON===\n(.+)', output)
|
|
274
|
+
if py_match:
|
|
275
|
+
ver_str = py_match.group(1).strip()
|
|
276
|
+
m = re.search(r'(\d+\.\d+\.\d+)', ver_str)
|
|
277
|
+
if m:
|
|
278
|
+
host.python_version = m.group(1)
|
|
279
|
+
|
|
280
|
+
return host
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
# ─────────────────────────────────────────────
|
|
284
|
+
# 远程安装
|
|
285
|
+
# ─────────────────────────────────────────────
|
|
286
|
+
|
|
287
|
+
def remote_install(
|
|
288
|
+
host: RemoteHost,
|
|
289
|
+
project: str,
|
|
290
|
+
install_dir: str = "~/projects",
|
|
291
|
+
plan_steps: list[dict] | None = None,
|
|
292
|
+
) -> list[RemoteExecResult]:
|
|
293
|
+
"""
|
|
294
|
+
在远程主机上安装 GitHub 项目。
|
|
295
|
+
|
|
296
|
+
如果提供 plan_steps,直接执行;否则先克隆再用本地方式安装。
|
|
297
|
+
"""
|
|
298
|
+
results = []
|
|
299
|
+
|
|
300
|
+
# 确保目标目录存在
|
|
301
|
+
r = ssh_exec(host, f"mkdir -p {install_dir}", timeout=10)
|
|
302
|
+
results.append(r)
|
|
303
|
+
|
|
304
|
+
if plan_steps:
|
|
305
|
+
# 执行预定义的安装计划
|
|
306
|
+
for step in plan_steps:
|
|
307
|
+
cmds = step.get("commands", [])
|
|
308
|
+
for cmd in cmds:
|
|
309
|
+
if isinstance(cmd, str):
|
|
310
|
+
r = ssh_exec(host, f"cd {install_dir} && {cmd}", timeout=300)
|
|
311
|
+
results.append(r)
|
|
312
|
+
if r.exit_code != 0:
|
|
313
|
+
return results # 失败时停止
|
|
314
|
+
else:
|
|
315
|
+
# 默认流程: clone → detect → install
|
|
316
|
+
repo_name = project.split("/")[-1] if "/" in project else project
|
|
317
|
+
clone_url = f"https://github.com/{project}.git"
|
|
318
|
+
|
|
319
|
+
# 克隆
|
|
320
|
+
r = ssh_exec(
|
|
321
|
+
host,
|
|
322
|
+
f"cd {install_dir} && git clone {clone_url} 2>&1 || (cd {repo_name} && git pull)",
|
|
323
|
+
timeout=120,
|
|
324
|
+
)
|
|
325
|
+
results.append(r)
|
|
326
|
+
|
|
327
|
+
# 检测并安装
|
|
328
|
+
detect_install_cmd = f"""
|
|
329
|
+
cd {install_dir}/{repo_name}
|
|
330
|
+
if [ -f requirements.txt ]; then
|
|
331
|
+
python3 -m venv .venv && source .venv/bin/activate && pip install -r requirements.txt
|
|
332
|
+
elif [ -f setup.py ] || [ -f pyproject.toml ]; then
|
|
333
|
+
python3 -m venv .venv && source .venv/bin/activate && pip install -e .
|
|
334
|
+
elif [ -f package.json ]; then
|
|
335
|
+
npm install
|
|
336
|
+
elif [ -f Cargo.toml ]; then
|
|
337
|
+
cargo build --release
|
|
338
|
+
elif [ -f go.mod ]; then
|
|
339
|
+
go build ./...
|
|
340
|
+
elif [ -f CMakeLists.txt ]; then
|
|
341
|
+
mkdir -p build && cd build && cmake .. && make -j$(nproc)
|
|
342
|
+
elif [ -f Makefile ]; then
|
|
343
|
+
make
|
|
344
|
+
fi
|
|
345
|
+
echo "INSTALL_DONE"
|
|
346
|
+
"""
|
|
347
|
+
r = ssh_exec(host, detect_install_cmd.strip(), timeout=600)
|
|
348
|
+
results.append(r)
|
|
349
|
+
|
|
350
|
+
return results
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
# ─────────────────────────────────────────────
|
|
354
|
+
# 成本估算
|
|
355
|
+
# ─────────────────────────────────────────────
|
|
356
|
+
|
|
357
|
+
def estimate_cost(
|
|
358
|
+
gpu_type: str,
|
|
359
|
+
hours: float,
|
|
360
|
+
provider: str | None = None,
|
|
361
|
+
) -> list[dict]:
|
|
362
|
+
"""
|
|
363
|
+
估算 GPU 云使用成本。
|
|
364
|
+
|
|
365
|
+
如果不指定提供商,返回所有提供商的对比。
|
|
366
|
+
"""
|
|
367
|
+
results = []
|
|
368
|
+
|
|
369
|
+
for pname, pinfo in _GPU_PROVIDERS.items():
|
|
370
|
+
if provider and pname != provider:
|
|
371
|
+
continue
|
|
372
|
+
|
|
373
|
+
for gpu, price in pinfo.pricing.items():
|
|
374
|
+
if gpu_type.lower() in gpu.lower():
|
|
375
|
+
cost = price * hours
|
|
376
|
+
results.append({
|
|
377
|
+
"provider": pinfo.display_name,
|
|
378
|
+
"gpu": gpu,
|
|
379
|
+
"price_per_hour": price,
|
|
380
|
+
"hours": hours,
|
|
381
|
+
"total_cost": round(cost, 2),
|
|
382
|
+
"currency": "USD",
|
|
383
|
+
})
|
|
384
|
+
|
|
385
|
+
results.sort(key=lambda x: x["total_cost"])
|
|
386
|
+
return results
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def recommend_gpu_provider(
|
|
390
|
+
vram_needed_gb: float,
|
|
391
|
+
budget_per_hour: float = 5.0,
|
|
392
|
+
prefer_region: str = "",
|
|
393
|
+
) -> list[dict]:
|
|
394
|
+
"""
|
|
395
|
+
根据 VRAM 需求和预算推荐 GPU 提供商。
|
|
396
|
+
"""
|
|
397
|
+
recommendations = []
|
|
398
|
+
|
|
399
|
+
for pname, pinfo in _GPU_PROVIDERS.items():
|
|
400
|
+
for gpu, price in pinfo.pricing.items():
|
|
401
|
+
if price > budget_per_hour:
|
|
402
|
+
continue
|
|
403
|
+
|
|
404
|
+
# 从 GPU 名称估算 VRAM
|
|
405
|
+
vram = _estimate_gpu_vram(gpu)
|
|
406
|
+
if vram < vram_needed_gb:
|
|
407
|
+
continue
|
|
408
|
+
|
|
409
|
+
# 区域匹配
|
|
410
|
+
region_match = not prefer_region or any(
|
|
411
|
+
prefer_region.lower() in r.lower() for r in pinfo.regions
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
score = 100 - (price / budget_per_hour * 50) + (vram / vram_needed_gb * 30)
|
|
415
|
+
if region_match:
|
|
416
|
+
score += 20
|
|
417
|
+
|
|
418
|
+
recommendations.append({
|
|
419
|
+
"provider": pinfo.display_name,
|
|
420
|
+
"gpu": gpu,
|
|
421
|
+
"vram_gb": vram,
|
|
422
|
+
"price_per_hour": price,
|
|
423
|
+
"score": round(score, 1),
|
|
424
|
+
"regions": pinfo.regions,
|
|
425
|
+
})
|
|
426
|
+
|
|
427
|
+
recommendations.sort(key=lambda x: x["score"], reverse=True)
|
|
428
|
+
return recommendations[:10]
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
def _estimate_gpu_vram(gpu_name: str) -> float:
|
|
432
|
+
"""从 GPU 名称估算 VRAM"""
|
|
433
|
+
m = re.search(r'(\d+)\s*GB', gpu_name, re.IGNORECASE)
|
|
434
|
+
if m:
|
|
435
|
+
return float(m.group(1))
|
|
436
|
+
|
|
437
|
+
# 常见型号
|
|
438
|
+
vram_map = {
|
|
439
|
+
"T4": 16, "A10": 24, "A10G": 24, "L4": 24,
|
|
440
|
+
"A40": 48, "A100-40": 40, "A100-80": 80,
|
|
441
|
+
"H100": 80, "H200": 141,
|
|
442
|
+
"RTX-3090": 24, "RTX-4090": 24, "RTX-4080": 16,
|
|
443
|
+
"RTX-6000": 24, "RTX-A6000": 48,
|
|
444
|
+
}
|
|
445
|
+
for key, vram in vram_map.items():
|
|
446
|
+
if key.lower() in gpu_name.lower():
|
|
447
|
+
return vram
|
|
448
|
+
return 0
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
# ─────────────────────────────────────────────
|
|
452
|
+
# 主机管理
|
|
453
|
+
# ─────────────────────────────────────────────
|
|
454
|
+
|
|
455
|
+
_HOSTS_FILE = os.path.expanduser("~/.gitinstall/remote_hosts.json")
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
def save_host(host: RemoteHost) -> None:
|
|
459
|
+
"""保存远程主机配置"""
|
|
460
|
+
hosts = load_hosts()
|
|
461
|
+
# 更新或添加
|
|
462
|
+
updated = False
|
|
463
|
+
for i, h in enumerate(hosts):
|
|
464
|
+
if h.get("name") == host.name or h.get("host") == host.host:
|
|
465
|
+
hosts[i] = _host_to_dict(host)
|
|
466
|
+
updated = True
|
|
467
|
+
break
|
|
468
|
+
if not updated:
|
|
469
|
+
hosts.append(_host_to_dict(host))
|
|
470
|
+
|
|
471
|
+
os.makedirs(os.path.dirname(_HOSTS_FILE), exist_ok=True)
|
|
472
|
+
with open(_HOSTS_FILE, "w", encoding="utf-8") as f:
|
|
473
|
+
json.dump(hosts, f, indent=2, ensure_ascii=False)
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def load_hosts() -> list[dict]:
|
|
477
|
+
"""加载所有远程主机"""
|
|
478
|
+
if not os.path.isfile(_HOSTS_FILE):
|
|
479
|
+
return []
|
|
480
|
+
try:
|
|
481
|
+
with open(_HOSTS_FILE, "r", encoding="utf-8") as f:
|
|
482
|
+
return json.load(f)
|
|
483
|
+
except (json.JSONDecodeError, OSError):
|
|
484
|
+
return []
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
def get_host(name_or_ip: str) -> RemoteHost:
|
|
488
|
+
"""获取远程主机"""
|
|
489
|
+
hosts = load_hosts()
|
|
490
|
+
for h in hosts:
|
|
491
|
+
if h.get("name") == name_or_ip or h.get("host") == name_or_ip:
|
|
492
|
+
return RemoteHost(**{k: v for k, v in h.items() if k in RemoteHost.__dataclass_fields__})
|
|
493
|
+
return RemoteHost()
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
def _host_to_dict(host: RemoteHost) -> dict:
|
|
497
|
+
return {
|
|
498
|
+
"name": host.name,
|
|
499
|
+
"host": host.host,
|
|
500
|
+
"port": host.port,
|
|
501
|
+
"user": host.user,
|
|
502
|
+
"key_file": host.key_file,
|
|
503
|
+
"gpu_type": host.gpu_type,
|
|
504
|
+
"gpu_count": host.gpu_count,
|
|
505
|
+
"vram_gb": host.vram_gb,
|
|
506
|
+
"cuda_version": host.cuda_version,
|
|
507
|
+
"python_version": host.python_version,
|
|
508
|
+
"os_info": host.os_info,
|
|
509
|
+
"status": host.status,
|
|
510
|
+
"provider": host.provider,
|
|
511
|
+
"cost_per_hour": host.cost_per_hour,
|
|
512
|
+
"tags": host.tags,
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
# ─────────────────────────────────────────────
|
|
517
|
+
# 端口转发
|
|
518
|
+
# ─────────────────────────────────────────────
|
|
519
|
+
|
|
520
|
+
def create_tunnel(
|
|
521
|
+
host: RemoteHost,
|
|
522
|
+
remote_port: int,
|
|
523
|
+
local_port: int | None = None,
|
|
524
|
+
) -> dict:
|
|
525
|
+
"""
|
|
526
|
+
创建 SSH 端口转发隧道的命令。
|
|
527
|
+
|
|
528
|
+
用途:远程 Jupyter Notebook、TensorBoard、vLLM API 等。
|
|
529
|
+
"""
|
|
530
|
+
local_port = local_port or remote_port
|
|
531
|
+
|
|
532
|
+
ssh_cmd = ["ssh", "-N", "-L", f"{local_port}:localhost:{remote_port}"]
|
|
533
|
+
|
|
534
|
+
if host.key_file:
|
|
535
|
+
ssh_cmd.extend(["-i", host.key_file])
|
|
536
|
+
if host.port != 22:
|
|
537
|
+
ssh_cmd.extend(["-p", str(host.port)])
|
|
538
|
+
|
|
539
|
+
target = f"{host.user}@{host.host}" if host.user else host.host
|
|
540
|
+
ssh_cmd.append(target)
|
|
541
|
+
|
|
542
|
+
return {
|
|
543
|
+
"command": " ".join(ssh_cmd),
|
|
544
|
+
"local_url": f"http://localhost:{local_port}",
|
|
545
|
+
"description": f"转发 {host.host}:{remote_port} → localhost:{local_port}",
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
def generate_jupyter_remote_cmd(host: RemoteHost, port: int = 8888) -> dict:
|
|
550
|
+
"""生成远程 Jupyter 访问命令"""
|
|
551
|
+
# 远程启动 Jupyter
|
|
552
|
+
start_cmd = f"jupyter lab --no-browser --port={port} --ip=0.0.0.0"
|
|
553
|
+
|
|
554
|
+
tunnel = create_tunnel(host, port)
|
|
555
|
+
|
|
556
|
+
return {
|
|
557
|
+
"step1_remote": f"ssh {host.user}@{host.host} '{start_cmd}'",
|
|
558
|
+
"step2_tunnel": tunnel["command"],
|
|
559
|
+
"step3_access": tunnel["local_url"],
|
|
560
|
+
"note": "先在终端1执行step1,再在终端2执行step2,然后浏览器打开step3",
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
# ─────────────────────────────────────────────
|
|
565
|
+
# 多机并行
|
|
566
|
+
# ─────────────────────────────────────────────
|
|
567
|
+
|
|
568
|
+
def parallel_probe(hosts: list[RemoteHost]) -> list[RemoteHost]:
|
|
569
|
+
"""
|
|
570
|
+
并行探测多台主机(通过 subprocess 并发)。
|
|
571
|
+
|
|
572
|
+
注意:这里用串行模拟,因为纯标准库不便做真并行。
|
|
573
|
+
生产环境可用 concurrent.futures。
|
|
574
|
+
"""
|
|
575
|
+
results = []
|
|
576
|
+
for host in hosts:
|
|
577
|
+
probed = ssh_probe(host)
|
|
578
|
+
results.append(probed)
|
|
579
|
+
return results
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
# ─────────────────────────────────────────────
|
|
583
|
+
# 格式化输出
|
|
584
|
+
# ─────────────────────────────────────────────
|
|
585
|
+
|
|
586
|
+
def format_host_info(host: RemoteHost) -> str:
|
|
587
|
+
"""格式化远程主机信息"""
|
|
588
|
+
status_icon = {"online": "🟢", "offline": "🔴", "busy": "🟡", "unknown": "⚪"}.get(host.status, "⚪")
|
|
589
|
+
|
|
590
|
+
lines = [
|
|
591
|
+
f"{status_icon} {host.name or host.host}",
|
|
592
|
+
f" 地址: {host.user}@{host.host}:{host.port}",
|
|
593
|
+
]
|
|
594
|
+
if host.gpu_type:
|
|
595
|
+
lines.append(f" GPU: {host.gpu_type} × {host.gpu_count} ({host.vram_gb:.0f}GB)")
|
|
596
|
+
if host.cuda_version:
|
|
597
|
+
lines.append(f" CUDA: {host.cuda_version}")
|
|
598
|
+
if host.python_version:
|
|
599
|
+
lines.append(f" Python: {host.python_version}")
|
|
600
|
+
if host.cost_per_hour > 0:
|
|
601
|
+
lines.append(f" 费用: ${host.cost_per_hour}/h ({host.provider})")
|
|
602
|
+
if host.os_info:
|
|
603
|
+
lines.append(f" 系统: {host.os_info[:60]}")
|
|
604
|
+
|
|
605
|
+
return "\n".join(lines)
|
|
606
|
+
|
|
607
|
+
|
|
608
|
+
def format_cost_comparison(costs: list[dict]) -> str:
|
|
609
|
+
"""格式化成本对比"""
|
|
610
|
+
if not costs:
|
|
611
|
+
return "💰 没有找到匹配的 GPU 提供商"
|
|
612
|
+
|
|
613
|
+
lines = [
|
|
614
|
+
"💰 GPU 云成本对比",
|
|
615
|
+
"",
|
|
616
|
+
f"{'提供商':<15} {'GPU':<20} {'单价($/h)':<12} {'总费用($)':<12}",
|
|
617
|
+
"─" * 60,
|
|
618
|
+
]
|
|
619
|
+
|
|
620
|
+
for c in costs:
|
|
621
|
+
lines.append(
|
|
622
|
+
f"{c['provider']:<15} {c['gpu']:<20} "
|
|
623
|
+
f"${c['price_per_hour']:<11.2f} ${c['total_cost']:<11.2f}"
|
|
624
|
+
)
|
|
625
|
+
|
|
626
|
+
if costs:
|
|
627
|
+
cheapest = costs[0]
|
|
628
|
+
lines.extend([
|
|
629
|
+
"",
|
|
630
|
+
f"💡 最便宜: {cheapest['provider']} {cheapest['gpu']} — ${cheapest['total_cost']}/{'%.1f' % cheapest['hours']}h",
|
|
631
|
+
])
|
|
632
|
+
|
|
633
|
+
return "\n".join(lines)
|