gitinstall 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitinstall/__init__.py +61 -0
- gitinstall/_sdk.py +541 -0
- gitinstall/academic.py +831 -0
- gitinstall/admin.html +327 -0
- gitinstall/auto_update.py +384 -0
- gitinstall/autopilot.py +349 -0
- gitinstall/badge.py +476 -0
- gitinstall/checkpoint.py +330 -0
- gitinstall/cicd.py +499 -0
- gitinstall/clawhub.html +718 -0
- gitinstall/config_schema.py +353 -0
- gitinstall/db.py +984 -0
- gitinstall/db_backend.py +445 -0
- gitinstall/dep_chain.py +337 -0
- gitinstall/dependency_audit.py +1153 -0
- gitinstall/detector.py +542 -0
- gitinstall/doctor.py +493 -0
- gitinstall/education.py +869 -0
- gitinstall/enterprise.py +802 -0
- gitinstall/error_fixer.py +953 -0
- gitinstall/event_bus.py +251 -0
- gitinstall/executor.py +577 -0
- gitinstall/feature_flags.py +138 -0
- gitinstall/fetcher.py +921 -0
- gitinstall/huggingface.py +922 -0
- gitinstall/hw_detect.py +988 -0
- gitinstall/i18n.py +664 -0
- gitinstall/installer_registry.py +362 -0
- gitinstall/knowledge_base.py +379 -0
- gitinstall/license_check.py +605 -0
- gitinstall/llm.py +569 -0
- gitinstall/log.py +236 -0
- gitinstall/main.py +1408 -0
- gitinstall/mcp_agent.py +841 -0
- gitinstall/mcp_server.py +386 -0
- gitinstall/monorepo.py +810 -0
- gitinstall/multi_source.py +425 -0
- gitinstall/onboard.py +276 -0
- gitinstall/planner.py +222 -0
- gitinstall/planner_helpers.py +323 -0
- gitinstall/planner_known_projects.py +1010 -0
- gitinstall/planner_templates.py +996 -0
- gitinstall/remote_gpu.py +633 -0
- gitinstall/resilience.py +608 -0
- gitinstall/run_tests.py +572 -0
- gitinstall/skills.py +476 -0
- gitinstall/tool_schemas.py +324 -0
- gitinstall/trending.py +279 -0
- gitinstall/uninstaller.py +415 -0
- gitinstall/validate_top100.py +607 -0
- gitinstall/watchdog.py +180 -0
- gitinstall/web.py +1277 -0
- gitinstall/web_ui.html +2277 -0
- gitinstall-1.1.0.dist-info/METADATA +275 -0
- gitinstall-1.1.0.dist-info/RECORD +59 -0
- gitinstall-1.1.0.dist-info/WHEEL +5 -0
- gitinstall-1.1.0.dist-info/entry_points.txt +3 -0
- gitinstall-1.1.0.dist-info/licenses/LICENSE +21 -0
- gitinstall-1.1.0.dist-info/top_level.txt +1 -0
gitinstall/fetcher.py
ADDED
|
@@ -0,0 +1,921 @@
|
|
|
1
|
+
"""
|
|
2
|
+
fetcher.py - GitHub 项目信息抓取与解析
|
|
3
|
+
=====================================
|
|
4
|
+
|
|
5
|
+
功能:
|
|
6
|
+
1. 解析各种格式的项目标识:URL / "owner/repo" / 项目名
|
|
7
|
+
2. 通过 GitHub API 或 git clone --depth 1 本地分析 获取项目信息
|
|
8
|
+
3. 下载并解析 README(支持 .md / .rst / .txt)
|
|
9
|
+
4. 提取项目类型(Python/Node/Rust/Go/Docker 等)
|
|
10
|
+
5. 提取依赖文件(requirements.txt / package.json / Cargo.toml 等)
|
|
11
|
+
|
|
12
|
+
两种模式:
|
|
13
|
+
- API 模式(默认):使用 GitHub REST API,受限 60 次/小时
|
|
14
|
+
- 本地模式(推荐):git clone --depth 1 后本地分析,无任何限制
|
|
15
|
+
|
|
16
|
+
只使用 Python 标准库,无需安装任何第三方包。
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import hashlib
|
|
22
|
+
import json
|
|
23
|
+
import os
|
|
24
|
+
import re
|
|
25
|
+
import shutil
|
|
26
|
+
import subprocess
|
|
27
|
+
import tempfile
|
|
28
|
+
import time
|
|
29
|
+
import urllib.error
|
|
30
|
+
import urllib.parse
|
|
31
|
+
import urllib.request
|
|
32
|
+
from dataclasses import dataclass, field
|
|
33
|
+
from pathlib import Path, PurePosixPath
|
|
34
|
+
from typing import Optional
|
|
35
|
+
|
|
36
|
+
from log import get_logger
|
|
37
|
+
from i18n import t
|
|
38
|
+
|
|
39
|
+
logger = get_logger(__name__)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
_LOCAL_ANALYSIS_SKIP_DIRS = {
|
|
43
|
+
".git", "node_modules", "vendor", "third_party", "thirdparty",
|
|
44
|
+
"target", "dist", "build", "__pycache__", ".venv", "venv",
|
|
45
|
+
".mypy_cache", ".pytest_cache",
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# ─────────────────────────────────────────────
|
|
50
|
+
# API 响应缓存
|
|
51
|
+
# ─────────────────────────────────────────────
|
|
52
|
+
|
|
53
|
+
_CACHE_DIR = Path.home() / ".cache" / "gitinstall" / "api"
|
|
54
|
+
_CACHE_TTL = int(os.getenv("GITINSTALL_CACHE_TTL", str(24 * 3600))) # 默认 24 小时
|
|
55
|
+
_NO_CACHE = os.getenv("GITINSTALL_NO_CACHE", "").strip() in ("1", "true", "yes")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _cache_path(url: str) -> Path:
|
|
59
|
+
"""URL → 缓存文件路径(SHA-256 前 16 位)"""
|
|
60
|
+
h = hashlib.sha256(url.encode()).hexdigest()[:16]
|
|
61
|
+
return _CACHE_DIR / f"{h}.json"
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _cache_read(url: str):
|
|
65
|
+
"""读取缓存。命中返回 data,未命中/过期返回 None。"""
|
|
66
|
+
if _NO_CACHE:
|
|
67
|
+
return None
|
|
68
|
+
p = _cache_path(url)
|
|
69
|
+
if not p.exists():
|
|
70
|
+
return None
|
|
71
|
+
try:
|
|
72
|
+
raw = json.loads(p.read_text("utf-8"))
|
|
73
|
+
if time.time() - raw.get("ts", 0) > _CACHE_TTL:
|
|
74
|
+
return None # 过期但不删除 — 留给 ETag 条件请求复用
|
|
75
|
+
return raw["data"]
|
|
76
|
+
except Exception:
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _cache_read_etag(url: str) -> tuple:
|
|
81
|
+
"""读取缓存中的 ETag 和过期数据(用于条件请求)。
|
|
82
|
+
返回 (etag, data) 若有,否则 (None, None)。"""
|
|
83
|
+
if _NO_CACHE:
|
|
84
|
+
return None, None
|
|
85
|
+
p = _cache_path(url)
|
|
86
|
+
if not p.exists():
|
|
87
|
+
return None, None
|
|
88
|
+
try:
|
|
89
|
+
raw = json.loads(p.read_text("utf-8"))
|
|
90
|
+
return raw.get("etag"), raw.get("data")
|
|
91
|
+
except Exception:
|
|
92
|
+
return None, None
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _cache_write(url: str, data, etag: str = None) -> None:
|
|
96
|
+
"""写入缓存(含 ETag)。失败静默,不阻塞主流程。"""
|
|
97
|
+
if _NO_CACHE:
|
|
98
|
+
return
|
|
99
|
+
try:
|
|
100
|
+
_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
101
|
+
entry = {"url": url, "ts": time.time(), "data": data}
|
|
102
|
+
if etag:
|
|
103
|
+
entry["etag"] = etag
|
|
104
|
+
_cache_path(url).write_text(
|
|
105
|
+
json.dumps(entry, ensure_ascii=False),
|
|
106
|
+
encoding="utf-8",
|
|
107
|
+
)
|
|
108
|
+
except Exception:
|
|
109
|
+
pass
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
# ─────────────────────────────────────────────
|
|
113
|
+
# 数据结构
|
|
114
|
+
# ─────────────────────────────────────────────
|
|
115
|
+
|
|
116
|
+
@dataclass
|
|
117
|
+
class RepoInfo:
|
|
118
|
+
owner: str
|
|
119
|
+
repo: str
|
|
120
|
+
full_name: str # "owner/repo"
|
|
121
|
+
description: str
|
|
122
|
+
stars: int
|
|
123
|
+
language: str # 主要语言
|
|
124
|
+
license: str
|
|
125
|
+
default_branch: str
|
|
126
|
+
readme: str # README 全文
|
|
127
|
+
project_type: list[str] # ["python", "docker"] 等
|
|
128
|
+
dependency_files: dict # {"requirements.txt": "内容", ...}
|
|
129
|
+
clone_url: str
|
|
130
|
+
homepage: str
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
# ─────────────────────────────────────────────
|
|
134
|
+
# URL / 名称解析
|
|
135
|
+
# ─────────────────────────────────────────────
|
|
136
|
+
|
|
137
|
+
def parse_repo_identifier(identifier: str) -> tuple[str, str]:
|
|
138
|
+
"""
|
|
139
|
+
解析各种格式的项目标识,返回 (owner, repo)
|
|
140
|
+
|
|
141
|
+
支持格式:
|
|
142
|
+
- https://github.com/comfyanonymous/ComfyUI
|
|
143
|
+
- https://gitlab.com/user/project
|
|
144
|
+
- https://gitee.com/user/project
|
|
145
|
+
- https://bitbucket.org/user/project
|
|
146
|
+
- https://codeberg.org/user/project
|
|
147
|
+
- github.com/comfyanonymous/ComfyUI
|
|
148
|
+
- comfyanonymous/ComfyUI
|
|
149
|
+
- comfyanonymous/ComfyUI/tree/main
|
|
150
|
+
- ComfyUI (仅项目名,会尝试搜索)
|
|
151
|
+
"""
|
|
152
|
+
identifier = identifier.strip()
|
|
153
|
+
|
|
154
|
+
# 提取 URL 中的 owner/repo(支持多平台)
|
|
155
|
+
patterns = [
|
|
156
|
+
r'github\.com[:/]([^/]+)/([^/\s\.]+?)(?:\.git)?(?:[/\s]|$)',
|
|
157
|
+
r'gitlab\.com[:/]([^/]+)/([^/\s\.]+?)(?:\.git)?(?:[/\s]|$)',
|
|
158
|
+
r'bitbucket\.org[:/]([^/]+)/([^/\s\.]+?)(?:\.git)?(?:[/\s]|$)',
|
|
159
|
+
r'gitee\.com[:/]([^/]+)/([^/\s\.]+?)(?:\.git)?(?:[/\s]|$)',
|
|
160
|
+
r'codeberg\.org[:/]([^/]+)/([^/\s\.]+?)(?:\.git)?(?:[/\s]|$)',
|
|
161
|
+
]
|
|
162
|
+
for pattern in patterns:
|
|
163
|
+
match = re.search(pattern, identifier, re.IGNORECASE)
|
|
164
|
+
if match:
|
|
165
|
+
return match.group(1), match.group(2)
|
|
166
|
+
|
|
167
|
+
# "owner/repo" 格式
|
|
168
|
+
if "/" in identifier and not identifier.startswith("http"):
|
|
169
|
+
parts = identifier.split("/")
|
|
170
|
+
if len(parts) >= 2:
|
|
171
|
+
owner, repo = parts[0], parts[1]
|
|
172
|
+
# 验证 owner/repo 格式:仅允许字母数字、连字符、下划线、点
|
|
173
|
+
# 禁止路径遍历(. 或 ..)
|
|
174
|
+
if not re.match(r'^[a-zA-Z0-9_-]+$', owner):
|
|
175
|
+
raise ValueError(f"无效的仓库所有者: {owner}")
|
|
176
|
+
if not re.match(r'^[a-zA-Z0-9_.-]+$', repo) or repo in ('.', '..'):
|
|
177
|
+
raise ValueError(f"无效的仓库名: {repo}")
|
|
178
|
+
return owner, repo
|
|
179
|
+
|
|
180
|
+
# 仅项目名,需要搜索
|
|
181
|
+
return "", identifier
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
# ─────────────────────────────────────────────
|
|
185
|
+
# GitHub API 客户端
|
|
186
|
+
# ─────────────────────────────────────────────
|
|
187
|
+
|
|
188
|
+
class GitHubFetcher:
|
|
189
|
+
"""
|
|
190
|
+
GitHub REST API v3 封装。
|
|
191
|
+
公开仓库无需认证(每小时 60 次请求限制)。
|
|
192
|
+
设置 GITHUB_TOKEN 环境变量可提升到 5000 次/小时。
|
|
193
|
+
"""
|
|
194
|
+
|
|
195
|
+
API_BASE = "https://api.github.com"
|
|
196
|
+
RAW_BASE = "https://raw.githubusercontent.com"
|
|
197
|
+
|
|
198
|
+
def __init__(self):
|
|
199
|
+
import os
|
|
200
|
+
token = os.getenv("GITHUB_TOKEN", "").strip()
|
|
201
|
+
self._headers = {
|
|
202
|
+
"Accept": "application/vnd.github.v3+json",
|
|
203
|
+
"User-Agent": "gitinstall/1.0",
|
|
204
|
+
}
|
|
205
|
+
if token:
|
|
206
|
+
self._headers["Authorization"] = f"Bearer {token}"
|
|
207
|
+
|
|
208
|
+
def _get(self, url: str, timeout: int = 15, _retries: int = 2) -> Optional[dict | list | str]:
|
|
209
|
+
"""发送 GET 请求,返回解析后的 JSON 或原始文本。
|
|
210
|
+
|
|
211
|
+
缓存策略:
|
|
212
|
+
1. TTL 内直接返回缓存(零网络开销)
|
|
213
|
+
2. TTL 过期但有 ETag → 发送条件请求 If-None-Match
|
|
214
|
+
- 304 Not Modified → 复用缓存数据(不消耗 API 配额)
|
|
215
|
+
- 200 → 更新缓存
|
|
216
|
+
3. 无缓存 → 正常请求
|
|
217
|
+
"""
|
|
218
|
+
cached = _cache_read(url)
|
|
219
|
+
if cached is not None:
|
|
220
|
+
return cached
|
|
221
|
+
|
|
222
|
+
# 检查是否有过期缓存的 ETag(用于条件请求)
|
|
223
|
+
old_etag, old_data = _cache_read_etag(url)
|
|
224
|
+
|
|
225
|
+
req = urllib.request.Request(url, headers=self._headers)
|
|
226
|
+
if old_etag:
|
|
227
|
+
req.add_header("If-None-Match", old_etag)
|
|
228
|
+
|
|
229
|
+
for attempt in range(_retries + 1):
|
|
230
|
+
try:
|
|
231
|
+
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
232
|
+
content_type = resp.headers.get("Content-Type", "")
|
|
233
|
+
resp_etag = resp.headers.get("ETag")
|
|
234
|
+
body = resp.read().decode("utf-8", errors="replace")
|
|
235
|
+
if "json" in content_type:
|
|
236
|
+
result = json.loads(body)
|
|
237
|
+
else:
|
|
238
|
+
result = body
|
|
239
|
+
_cache_write(url, result, etag=resp_etag)
|
|
240
|
+
return result
|
|
241
|
+
except urllib.error.HTTPError as e:
|
|
242
|
+
if e.code == 304 and old_data is not None:
|
|
243
|
+
# 304 Not Modified — 数据未变,复用缓存(不消耗配额)
|
|
244
|
+
_cache_write(url, old_data, etag=old_etag) # 刷新 TTL
|
|
245
|
+
return old_data
|
|
246
|
+
elif e.code == 404:
|
|
247
|
+
raise FileNotFoundError(f"GitHub 上找不到该资源:{url}") from e
|
|
248
|
+
elif e.code == 403:
|
|
249
|
+
# 检查 Retry-After header,等待后重试
|
|
250
|
+
retry_after = e.headers.get("Retry-After")
|
|
251
|
+
if retry_after and attempt < _retries:
|
|
252
|
+
import time
|
|
253
|
+
wait = min(int(retry_after), 60)
|
|
254
|
+
time.sleep(wait)
|
|
255
|
+
continue
|
|
256
|
+
# 被限速但有过期缓存 → 降级使用旧数据
|
|
257
|
+
if old_data is not None:
|
|
258
|
+
return old_data
|
|
259
|
+
raise PermissionError(
|
|
260
|
+
"RATELIMIT: GitHub API 频率超限。\n"
|
|
261
|
+
"设置 GITHUB_TOKEN 环境变量可提升到 5000次/小时。\n"
|
|
262
|
+
"获取 Token:https://github.com/settings/tokens"
|
|
263
|
+
) from e
|
|
264
|
+
elif e.code >= 500 and attempt < _retries:
|
|
265
|
+
import time
|
|
266
|
+
time.sleep(2 ** attempt)
|
|
267
|
+
continue
|
|
268
|
+
raise RuntimeError(f"GitHub API 错误 {e.code}: {url}") from e
|
|
269
|
+
except urllib.error.URLError as e:
|
|
270
|
+
if attempt < _retries:
|
|
271
|
+
import time
|
|
272
|
+
time.sleep(2 ** attempt)
|
|
273
|
+
continue
|
|
274
|
+
# 网络失败但有过期缓存 → 降级使用旧数据
|
|
275
|
+
if old_data is not None:
|
|
276
|
+
return old_data
|
|
277
|
+
raise RuntimeError(f"网络连接失败,请检查网络:{e.reason}") from e
|
|
278
|
+
|
|
279
|
+
def search_repo(self, query: str) -> tuple[str, str]:
|
|
280
|
+
"""当只有项目名时,通过搜索找到 owner/repo"""
|
|
281
|
+
url = f"{self.API_BASE}/search/repositories?q={urllib.parse.quote(query)}&per_page=1"
|
|
282
|
+
data = self._get(url)
|
|
283
|
+
items = data.get("items", []) if isinstance(data, dict) else []
|
|
284
|
+
if not items:
|
|
285
|
+
raise FileNotFoundError(f"在 GitHub 上找不到项目:{query}")
|
|
286
|
+
repo = items[0]
|
|
287
|
+
return repo["owner"]["login"], repo["name"]
|
|
288
|
+
|
|
289
|
+
def fetch_repo_info(self, owner: str, repo: str) -> dict:
|
|
290
|
+
"""获取仓库基本信息"""
|
|
291
|
+
url = f"{self.API_BASE}/repos/{owner}/{repo}"
|
|
292
|
+
return self._get(url)
|
|
293
|
+
|
|
294
|
+
def _get_raw(self, url: str, timeout: int = 10) -> Optional[str]:
|
|
295
|
+
"""GET 原始文件 URL,带缓存。仅允许 GitHub 域名。"""
|
|
296
|
+
# C2: SSRF 防护 — 仅允许 GitHub 域名
|
|
297
|
+
if not url.startswith(("https://raw.githubusercontent.com/", "https://api.github.com/")):
|
|
298
|
+
return None
|
|
299
|
+
cached = _cache_read(url)
|
|
300
|
+
if cached is not None:
|
|
301
|
+
return cached
|
|
302
|
+
try:
|
|
303
|
+
req = urllib.request.Request(url, headers=self._headers)
|
|
304
|
+
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
305
|
+
text = resp.read().decode("utf-8", errors="replace")
|
|
306
|
+
_cache_write(url, text)
|
|
307
|
+
return text
|
|
308
|
+
except (urllib.error.HTTPError, urllib.error.URLError):
|
|
309
|
+
return None
|
|
310
|
+
|
|
311
|
+
def fetch_readme(self, owner: str, repo: str, branch: str = "main") -> str:
|
|
312
|
+
"""
|
|
313
|
+
获取 README 内容。
|
|
314
|
+
优先使用 GitHub API /readme 端点(1次请求),失败后降级到原始 URL。
|
|
315
|
+
"""
|
|
316
|
+
import base64
|
|
317
|
+
# 方式 1:GitHub API /repos/.../readme — 自动识别文件名和分支,1次请求
|
|
318
|
+
try:
|
|
319
|
+
data = self._get(f"{self.API_BASE}/repos/{owner}/{repo}/readme")
|
|
320
|
+
if isinstance(data, dict) and data.get("encoding") == "base64":
|
|
321
|
+
return base64.b64decode(data["content"]).decode("utf-8", errors="replace")
|
|
322
|
+
except (FileNotFoundError, RuntimeError, PermissionError):
|
|
323
|
+
pass
|
|
324
|
+
|
|
325
|
+
# 方式 2(降级):直接访问已知 default_branch 下的 README.md
|
|
326
|
+
url = f"{self.RAW_BASE}/{owner}/{repo}/{branch}/README.md"
|
|
327
|
+
text = self._get_raw(url)
|
|
328
|
+
if text is not None:
|
|
329
|
+
return text
|
|
330
|
+
|
|
331
|
+
return "" # README 不存在也不阻塞安装计划生成
|
|
332
|
+
|
|
333
|
+
def fetch_file(self, owner: str, repo: str, path: str, branch: str = "main") -> Optional[str]:
|
|
334
|
+
"""获取仓库中的特定文件"""
|
|
335
|
+
branches = [branch, "master", "main"]
|
|
336
|
+
for b in branches:
|
|
337
|
+
url = f"{self.RAW_BASE}/{owner}/{repo}/{b}/{path}"
|
|
338
|
+
text = self._get_raw(url)
|
|
339
|
+
if text is not None:
|
|
340
|
+
return text
|
|
341
|
+
return None
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
# ─────────────────────────────────────────────
|
|
345
|
+
# 项目类型识别
|
|
346
|
+
# ─────────────────────────────────────────────
|
|
347
|
+
|
|
348
|
+
def detect_project_types(
|
|
349
|
+
repo_data: dict,
|
|
350
|
+
readme: str,
|
|
351
|
+
dependency_files: dict,
|
|
352
|
+
) -> list[str]:
|
|
353
|
+
"""
|
|
354
|
+
识别项目技术栈,返回类型列表(可多个)。
|
|
355
|
+
|
|
356
|
+
Returns: ["python", "pytorch", "docker"] 等
|
|
357
|
+
"""
|
|
358
|
+
types = set()
|
|
359
|
+
|
|
360
|
+
# 从 GitHub 主语言字段
|
|
361
|
+
lang = (repo_data.get("language") or "").lower()
|
|
362
|
+
_LANG_MAP = {
|
|
363
|
+
"python": "python", "javascript": "node", "typescript": "node",
|
|
364
|
+
"rust": "rust", "go": "go", "java": "java", "kotlin": "kotlin",
|
|
365
|
+
"c++": "cpp", "c": "c", "ruby": "ruby", "php": "php",
|
|
366
|
+
"c#": "dotnet", "swift": "swift", "dart": "dart",
|
|
367
|
+
"scala": "scala", "shell": "shell",
|
|
368
|
+
"elixir": "elixir", "erlang": "erlang", "haskell": "haskell",
|
|
369
|
+
"lua": "lua", "perl": "perl", "r": "r", "julia": "julia",
|
|
370
|
+
"zig": "zig", "clojure": "clojure", "nim": "nim",
|
|
371
|
+
"crystal": "crystal", "hcl": "hcl",
|
|
372
|
+
}
|
|
373
|
+
if lang in _LANG_MAP:
|
|
374
|
+
types.add(_LANG_MAP[lang])
|
|
375
|
+
|
|
376
|
+
# 从依赖文件名
|
|
377
|
+
dep_file_indicators = {
|
|
378
|
+
"requirements.txt": "python",
|
|
379
|
+
"setup.py": "python",
|
|
380
|
+
"setup.cfg": "python",
|
|
381
|
+
"pyproject.toml": "python",
|
|
382
|
+
"environment.yml": "conda",
|
|
383
|
+
"Pipfile": "python",
|
|
384
|
+
"package.json": "node",
|
|
385
|
+
"yarn.lock": "node",
|
|
386
|
+
"pnpm-lock.yaml": "node",
|
|
387
|
+
"Cargo.toml": "rust",
|
|
388
|
+
"go.mod": "go",
|
|
389
|
+
"pom.xml": "java",
|
|
390
|
+
"build.gradle": "java",
|
|
391
|
+
"Dockerfile": "docker",
|
|
392
|
+
"docker-compose.yml": "docker",
|
|
393
|
+
"docker-compose.yaml": "docker",
|
|
394
|
+
"Makefile": "make",
|
|
395
|
+
"CMakeLists.txt": "cmake",
|
|
396
|
+
"configure": "autotools",
|
|
397
|
+
"configure.ac": "autotools",
|
|
398
|
+
"Makefile.am": "autotools",
|
|
399
|
+
"build.gradle.kts": "java",
|
|
400
|
+
"Gemfile": "ruby",
|
|
401
|
+
"composer.json": "php",
|
|
402
|
+
"Package.swift": "swift",
|
|
403
|
+
"mix.exs": "elixir",
|
|
404
|
+
"rebar.config": "erlang",
|
|
405
|
+
"pubspec.yaml": "dart",
|
|
406
|
+
"build.sbt": "scala",
|
|
407
|
+
"meson.build": "meson",
|
|
408
|
+
"WORKSPACE": "bazel",
|
|
409
|
+
"BUILD.bazel": "bazel",
|
|
410
|
+
"stack.yaml": "haskell",
|
|
411
|
+
"project.clj": "clojure",
|
|
412
|
+
"DESCRIPTION": "r",
|
|
413
|
+
"Project.toml": "julia",
|
|
414
|
+
"build.zig": "zig",
|
|
415
|
+
"nimble": "nim",
|
|
416
|
+
"shard.yml": "crystal",
|
|
417
|
+
"cpanfile": "perl",
|
|
418
|
+
"Makefile.PL": "perl",
|
|
419
|
+
"Build.PL": "perl",
|
|
420
|
+
}
|
|
421
|
+
dep_names = {Path(fname).name for fname in dependency_files}
|
|
422
|
+
for fname, ptype in dep_file_indicators.items():
|
|
423
|
+
if fname in dep_names:
|
|
424
|
+
types.add(ptype)
|
|
425
|
+
|
|
426
|
+
# 检测 glob 模式的依赖文件(.cabal / .nimble 文件名不固定)
|
|
427
|
+
for fname in dep_names:
|
|
428
|
+
if fname.endswith(".cabal"):
|
|
429
|
+
types.add("haskell")
|
|
430
|
+
elif fname.endswith(".nimble"):
|
|
431
|
+
types.add("nim")
|
|
432
|
+
elif fname.endswith(".ino"):
|
|
433
|
+
types.add("arduino")
|
|
434
|
+
|
|
435
|
+
# PlatformIO 检测
|
|
436
|
+
if "platformio.ini" in dep_names:
|
|
437
|
+
types.add("platformio")
|
|
438
|
+
if "library.json" in dep_names or "library.properties" in dep_names:
|
|
439
|
+
types.add("platformio")
|
|
440
|
+
|
|
441
|
+
# 从 README 关键词识别深度学习框架
|
|
442
|
+
readme_lower = readme.lower()
|
|
443
|
+
framework_keywords = {
|
|
444
|
+
"pytorch": ["torch", "pytorch", "pip install torch"],
|
|
445
|
+
"tensorflow": ["tensorflow", "pip install tensorflow"],
|
|
446
|
+
"diffusers": ["diffusers", "stable diffusion", "stable-diffusion"],
|
|
447
|
+
"ollama": ["ollama"],
|
|
448
|
+
"docker": ["docker-compose", "dockerfile"],
|
|
449
|
+
"comfyui": ["comfyui"],
|
|
450
|
+
"gradio": ["gradio"],
|
|
451
|
+
"fastapi": ["fastapi"],
|
|
452
|
+
"nextjs": ["next.js", "nextjs"],
|
|
453
|
+
}
|
|
454
|
+
for fw, keywords in framework_keywords.items():
|
|
455
|
+
if any(kw in readme_lower for kw in keywords):
|
|
456
|
+
types.add(fw)
|
|
457
|
+
|
|
458
|
+
# conda/anaconda/miniconda 需要词边界匹配(避免 "secondary" 中的 "conda" 误判)
|
|
459
|
+
if re.search(r'\bconda\b|\banaconda\b|\bminiconda\b', readme_lower):
|
|
460
|
+
types.add("conda")
|
|
461
|
+
# docker 单独处理("docker-compose" 和 "dockerfile" 已在上面,这里加 docker 命令)
|
|
462
|
+
if re.search(r'\bdocker\b', readme_lower):
|
|
463
|
+
types.add("docker")
|
|
464
|
+
|
|
465
|
+
return sorted(types)
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
# 我们关心的依赖文件集合(用于快速 set 查询)
|
|
469
|
+
_KNOWN_DEP_FILES = {
|
|
470
|
+
# Python
|
|
471
|
+
"requirements.txt", "requirements-dev.txt", "setup.py", "pyproject.toml",
|
|
472
|
+
# Node.js
|
|
473
|
+
"package.json",
|
|
474
|
+
# Rust / Go
|
|
475
|
+
"Cargo.toml", "go.mod",
|
|
476
|
+
# Docker
|
|
477
|
+
"Dockerfile", "docker-compose.yml", "docker-compose.yaml",
|
|
478
|
+
# Conda
|
|
479
|
+
"environment.yml",
|
|
480
|
+
# Java / Kotlin
|
|
481
|
+
"pom.xml", "build.gradle", "build.gradle.kts",
|
|
482
|
+
# Scala
|
|
483
|
+
"build.sbt",
|
|
484
|
+
# Ruby
|
|
485
|
+
"Gemfile",
|
|
486
|
+
# PHP
|
|
487
|
+
"composer.json",
|
|
488
|
+
# .NET / C#
|
|
489
|
+
# 注:.csproj/.sln 通常不在根目录,靠语言字段检测
|
|
490
|
+
# C/C++
|
|
491
|
+
"CMakeLists.txt", "Makefile", "configure", "configure.ac", "Makefile.am",
|
|
492
|
+
# Swift
|
|
493
|
+
"Package.swift",
|
|
494
|
+
# Dart / Flutter
|
|
495
|
+
"pubspec.yaml",
|
|
496
|
+
# Elixir / Erlang
|
|
497
|
+
"mix.exs", "rebar.config",
|
|
498
|
+
# Haskell
|
|
499
|
+
"stack.yaml",
|
|
500
|
+
# Zig
|
|
501
|
+
"build.zig", "build.zig.zon", ".zig-version",
|
|
502
|
+
# Clojure
|
|
503
|
+
"project.clj",
|
|
504
|
+
# Julia
|
|
505
|
+
"Project.toml",
|
|
506
|
+
# R
|
|
507
|
+
"DESCRIPTION",
|
|
508
|
+
# Meson / Bazel
|
|
509
|
+
"meson.build", "WORKSPACE", "BUILD.bazel",
|
|
510
|
+
# Perl
|
|
511
|
+
"cpanfile", "Makefile.PL", "Build.PL",
|
|
512
|
+
# Crystal
|
|
513
|
+
"shard.yml",
|
|
514
|
+
# Nim
|
|
515
|
+
# nim 用 .nimble 文件但名字不固定,靠语言检测
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
def extract_dependency_files(
|
|
520
|
+
fetcher: GitHubFetcher,
|
|
521
|
+
owner: str,
|
|
522
|
+
repo: str,
|
|
523
|
+
branch: str,
|
|
524
|
+
) -> dict:
|
|
525
|
+
"""
|
|
526
|
+
获取项目依赖文件。
|
|
527
|
+
|
|
528
|
+
优化策略:先用 GitHub Contents API 获取根目录清单(1次请求),
|
|
529
|
+
再只下载清单中存在的依赖文件,避免对每个文件都盲目尝试多个分支。
|
|
530
|
+
"""
|
|
531
|
+
# 1. 获取根目录文件清单(1 次 API 请求)
|
|
532
|
+
try:
|
|
533
|
+
contents = fetcher._get(
|
|
534
|
+
f"{fetcher.API_BASE}/repos/{owner}/{repo}/contents/?ref={branch}"
|
|
535
|
+
)
|
|
536
|
+
if not isinstance(contents, list):
|
|
537
|
+
raise RuntimeError("contents 返回非列表")
|
|
538
|
+
root_files = {item["name"] for item in contents if item.get("type") == "file"}
|
|
539
|
+
except Exception:
|
|
540
|
+
# API 失败时回退:手动尝试所有文件(兼容性保底)
|
|
541
|
+
root_files = _KNOWN_DEP_FILES
|
|
542
|
+
|
|
543
|
+
# 2. 只下载清单中实际存在的依赖文件
|
|
544
|
+
to_fetch = root_files & _KNOWN_DEP_FILES
|
|
545
|
+
result = {}
|
|
546
|
+
raw_base = f"{fetcher.RAW_BASE}/{owner}/{repo}/{branch}"
|
|
547
|
+
for fname in sorted(to_fetch):
|
|
548
|
+
text = fetcher._get_raw(f"{raw_base}/{fname}")
|
|
549
|
+
if text is not None:
|
|
550
|
+
result[fname] = text[:15000]
|
|
551
|
+
return result
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
# ─────────────────────────────────────────────
|
|
555
|
+
# 主入口
|
|
556
|
+
# ─────────────────────────────────────────────
|
|
557
|
+
|
|
558
|
+
def fetch_project(identifier: str) -> RepoInfo:
|
|
559
|
+
"""
|
|
560
|
+
一站式获取项目的所有安装相关信息。
|
|
561
|
+
|
|
562
|
+
支持多平台自动路由:
|
|
563
|
+
- GitHub URL / owner/repo → GitHub API(带缓存+ETag)
|
|
564
|
+
- GitLab/Bitbucket/Gitee/Codeberg URL → multi_source Provider
|
|
565
|
+
- 本地路径 → 请用 fetch_project_from_path()
|
|
566
|
+
|
|
567
|
+
Args:
|
|
568
|
+
identifier: 平台 URL / "owner/repo" / 项目名
|
|
569
|
+
|
|
570
|
+
Returns:
|
|
571
|
+
RepoInfo 包含 README、依赖文件、项目类型等
|
|
572
|
+
"""
|
|
573
|
+
from multi_source import detect_platform, get_provider
|
|
574
|
+
|
|
575
|
+
platform, ms_owner, ms_repo = detect_platform(identifier)
|
|
576
|
+
|
|
577
|
+
# ── 非 GitHub 平台:走 multi_source Provider ──
|
|
578
|
+
if platform != "github":
|
|
579
|
+
logger.info(f"🌐 检测到 {platform} 平台,使用对应 Provider...")
|
|
580
|
+
provider = get_provider(platform)
|
|
581
|
+
meta = provider.get_repo_metadata(ms_owner, ms_repo)
|
|
582
|
+
readme = provider.get_readme(ms_owner, ms_repo, meta.default_branch)
|
|
583
|
+
|
|
584
|
+
# 获取依赖文件
|
|
585
|
+
dep_files = {}
|
|
586
|
+
for fname in sorted(_KNOWN_DEP_FILES):
|
|
587
|
+
content = provider.get_file_content(ms_owner, ms_repo, fname, meta.default_branch)
|
|
588
|
+
if content:
|
|
589
|
+
dep_files[fname] = content[:15000]
|
|
590
|
+
|
|
591
|
+
repo_data = {"language": meta.language}
|
|
592
|
+
project_types = detect_project_types(repo_data, readme, dep_files)
|
|
593
|
+
|
|
594
|
+
return RepoInfo(
|
|
595
|
+
owner=meta.owner,
|
|
596
|
+
repo=meta.repo,
|
|
597
|
+
full_name=meta.full_name,
|
|
598
|
+
description=meta.description,
|
|
599
|
+
stars=meta.stars,
|
|
600
|
+
language=meta.language or "Unknown",
|
|
601
|
+
license=meta.license or "Unknown",
|
|
602
|
+
default_branch=meta.default_branch,
|
|
603
|
+
readme=readme[:15000],
|
|
604
|
+
project_type=project_types,
|
|
605
|
+
dependency_files=dep_files,
|
|
606
|
+
clone_url=meta.clone_url,
|
|
607
|
+
homepage=meta.homepage,
|
|
608
|
+
)
|
|
609
|
+
|
|
610
|
+
# ── GitHub:保留原有流程(带缓存 + ETag + 搜索) ──
|
|
611
|
+
fetcher = GitHubFetcher()
|
|
612
|
+
|
|
613
|
+
# 1. 解析 owner/repo
|
|
614
|
+
owner, repo = parse_repo_identifier(identifier)
|
|
615
|
+
if not owner:
|
|
616
|
+
logger.info(t("fetcher.searching", repo=repo))
|
|
617
|
+
owner, repo = fetcher.search_repo(repo)
|
|
618
|
+
|
|
619
|
+
logger.info(t("fetcher.fetching_info", owner=owner, repo=repo))
|
|
620
|
+
|
|
621
|
+
# 2. 基本信息
|
|
622
|
+
repo_data = fetcher.fetch_repo_info(owner, repo)
|
|
623
|
+
branch = repo_data.get("default_branch", "main")
|
|
624
|
+
|
|
625
|
+
# 3. README
|
|
626
|
+
logger.info(t("fetcher.reading_readme"))
|
|
627
|
+
readme = fetcher.fetch_readme(owner, repo, branch)
|
|
628
|
+
|
|
629
|
+
# 4. 依赖文件
|
|
630
|
+
logger.info(t("fetcher.detecting_deps"))
|
|
631
|
+
dep_files = extract_dependency_files(fetcher, owner, repo, branch)
|
|
632
|
+
|
|
633
|
+
# 5. 项目类型
|
|
634
|
+
project_types = detect_project_types(repo_data, readme, dep_files)
|
|
635
|
+
|
|
636
|
+
return RepoInfo(
|
|
637
|
+
owner=owner,
|
|
638
|
+
repo=repo,
|
|
639
|
+
full_name=f"{owner}/{repo}",
|
|
640
|
+
description=repo_data.get("description") or "",
|
|
641
|
+
stars=repo_data.get("stargazers_count", 0),
|
|
642
|
+
language=repo_data.get("language") or "Unknown",
|
|
643
|
+
license=(repo_data.get("license") or {}).get("spdx_id", "Unknown"),
|
|
644
|
+
default_branch=branch,
|
|
645
|
+
readme=readme[:15000], # 限制 README 长度,节省 LLM token
|
|
646
|
+
project_type=project_types,
|
|
647
|
+
dependency_files=dep_files,
|
|
648
|
+
clone_url=repo_data.get("clone_url", f"https://github.com/{owner}/{repo}.git"),
|
|
649
|
+
homepage=repo_data.get("homepage") or f"https://github.com/{owner}/{repo}",
|
|
650
|
+
)
|
|
651
|
+
|
|
652
|
+
|
|
653
|
+
def format_project_summary(info: RepoInfo) -> str:
|
|
654
|
+
"""格式化项目摘要"""
|
|
655
|
+
stars = f"{info.stars:,}"
|
|
656
|
+
types = " | ".join(info.project_type) or "Unknown"
|
|
657
|
+
return (
|
|
658
|
+
f"📦 {info.full_name}\n"
|
|
659
|
+
f" ⭐ {stars} stars | 语言:{info.language} | 类型:{types}\n"
|
|
660
|
+
f" 📝 {info.description[:100]}\n"
|
|
661
|
+
f" 🔗 {info.homepage}"
|
|
662
|
+
)
|
|
663
|
+
|
|
664
|
+
|
|
665
|
+
# ─────────────────────────────────────────────
|
|
666
|
+
# 本地模式:git clone --depth 1 分析
|
|
667
|
+
# ─────────────────────────────────────────────
|
|
668
|
+
|
|
669
|
+
def _detect_language_from_files(root: Path) -> str:
|
|
670
|
+
"""通过文件扩展名统计推断主要语言"""
|
|
671
|
+
ext_lang = {
|
|
672
|
+
".py": "Python", ".js": "JavaScript", ".ts": "TypeScript",
|
|
673
|
+
".java": "Java", ".kt": "Kotlin", ".go": "Go", ".rs": "Rust",
|
|
674
|
+
".rb": "Ruby", ".php": "PHP", ".cs": "C#", ".swift": "Swift",
|
|
675
|
+
".c": "C", ".cpp": "C++", ".cc": "C++", ".cxx": "C++", ".h": "C", ".hpp": "C++",
|
|
676
|
+
".dart": "Dart", ".scala": "Scala", ".sh": "Shell",
|
|
677
|
+
".ex": "Elixir", ".exs": "Elixir", ".erl": "Erlang",
|
|
678
|
+
".hs": "Haskell", ".lua": "Lua", ".pl": "Perl", ".pm": "Perl",
|
|
679
|
+
".r": "R", ".R": "R", ".jl": "Julia", ".zig": "Zig",
|
|
680
|
+
".clj": "Clojure", ".nim": "Nim", ".cr": "Crystal",
|
|
681
|
+
".tf": "HCL", ".hcl": "HCL",
|
|
682
|
+
}
|
|
683
|
+
# 排除测试/vendor 目录(与 GitHub Linguist 同理,避免测试脚本干扰主语言检测)
|
|
684
|
+
_SKIP_DIRS = {"t", "test", "tests", "spec", "vendor", "node_modules",
|
|
685
|
+
"third_party", "thirdparty", "fixtures", "testdata"}
|
|
686
|
+
counts: dict[str, int] = {}
|
|
687
|
+
try:
|
|
688
|
+
for f in root.rglob("*"):
|
|
689
|
+
if f.is_file() and not any(p.startswith(".") for p in f.relative_to(root).parts):
|
|
690
|
+
# 跳过测试/第三方目录中的文件
|
|
691
|
+
rel_parts = f.relative_to(root).parts
|
|
692
|
+
if rel_parts and rel_parts[0].lower() in _SKIP_DIRS:
|
|
693
|
+
continue
|
|
694
|
+
lang = ext_lang.get(f.suffix.lower())
|
|
695
|
+
if lang:
|
|
696
|
+
counts[lang] = counts.get(lang, 0) + 1
|
|
697
|
+
except Exception:
|
|
698
|
+
pass
|
|
699
|
+
if not counts:
|
|
700
|
+
return "Unknown"
|
|
701
|
+
return max(counts, key=counts.get)
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
def _find_readme(root: Path) -> str:
|
|
705
|
+
"""在仓库根目录找 README 文件并读内容"""
|
|
706
|
+
candidates = ["README.md", "readme.md", "README.rst", "README.txt", "README"]
|
|
707
|
+
for name in candidates:
|
|
708
|
+
p = root / name
|
|
709
|
+
if p.is_file():
|
|
710
|
+
try:
|
|
711
|
+
return p.read_text(encoding="utf-8", errors="replace")[:15000]
|
|
712
|
+
except Exception:
|
|
713
|
+
pass
|
|
714
|
+
return ""
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
def _extract_local_dep_files(root: Path) -> dict[str, str]:
|
|
718
|
+
"""从本地仓库递归提取依赖文件,保留相对路径。"""
|
|
719
|
+
result = {}
|
|
720
|
+
for p in root.rglob("*"):
|
|
721
|
+
if not p.is_file():
|
|
722
|
+
continue
|
|
723
|
+
rel = p.relative_to(root)
|
|
724
|
+
parts = rel.parts
|
|
725
|
+
name = p.name
|
|
726
|
+
if name not in _KNOWN_DEP_FILES and not name.endswith((".cabal", ".nimble")):
|
|
727
|
+
continue
|
|
728
|
+
if any(part.lower() in _LOCAL_ANALYSIS_SKIP_DIRS for part in parts[:-1]):
|
|
729
|
+
continue
|
|
730
|
+
if any(part.startswith(".") for part in parts[:-1]):
|
|
731
|
+
continue
|
|
732
|
+
if len(parts) > 1 and parts[-1].startswith("."):
|
|
733
|
+
continue
|
|
734
|
+
try:
|
|
735
|
+
result[rel.as_posix()] = p.read_text(encoding="utf-8", errors="replace")[:15000]
|
|
736
|
+
except Exception:
|
|
737
|
+
continue
|
|
738
|
+
return result
|
|
739
|
+
|
|
740
|
+
|
|
741
|
+
def fetch_project_local(identifier: str) -> RepoInfo:
|
|
742
|
+
"""
|
|
743
|
+
本地模式:git clone --depth 1 后分析项目信息。
|
|
744
|
+
|
|
745
|
+
优势:
|
|
746
|
+
- 无 API 限额(不受 60 次/小时限制)
|
|
747
|
+
- 可离线工作(只需 git 可用)
|
|
748
|
+
- 获取完整的根目录文件
|
|
749
|
+
|
|
750
|
+
支持所有平台的 URL 和 owner/repo 格式。
|
|
751
|
+
|
|
752
|
+
Args:
|
|
753
|
+
identifier: 任意平台 URL / "owner/repo"
|
|
754
|
+
|
|
755
|
+
Returns:
|
|
756
|
+
RepoInfo 包含 README、依赖文件、项目类型等
|
|
757
|
+
"""
|
|
758
|
+
from multi_source import detect_platform
|
|
759
|
+
|
|
760
|
+
platform, owner, repo = detect_platform(identifier)
|
|
761
|
+
if not owner:
|
|
762
|
+
raise ValueError(
|
|
763
|
+
f"本地模式需要完整的 owner/repo 格式,无法仅通过项目名 '{repo}' 分析。\n"
|
|
764
|
+
f"请使用完整格式,如:owner/{repo}"
|
|
765
|
+
)
|
|
766
|
+
|
|
767
|
+
# 根据平台生成 clone URL
|
|
768
|
+
if platform == "github":
|
|
769
|
+
clone_url = f"https://github.com/{owner}/{repo}.git"
|
|
770
|
+
elif platform == "gitlab":
|
|
771
|
+
clone_url = f"https://gitlab.com/{owner}/{repo}.git"
|
|
772
|
+
elif platform == "bitbucket":
|
|
773
|
+
clone_url = f"https://bitbucket.org/{owner}/{repo}.git"
|
|
774
|
+
elif platform == "gitee":
|
|
775
|
+
clone_url = f"https://gitee.com/{owner}/{repo}.git"
|
|
776
|
+
elif platform == "codeberg":
|
|
777
|
+
clone_url = f"https://codeberg.org/{owner}/{repo}.git"
|
|
778
|
+
else:
|
|
779
|
+
clone_url = f"https://github.com/{owner}/{repo}.git"
|
|
780
|
+
|
|
781
|
+
homepage = clone_url.removesuffix(".git")
|
|
782
|
+
|
|
783
|
+
# 使用临时目录进行 shallow clone
|
|
784
|
+
tmp_dir = tempfile.mkdtemp(prefix="gitinstall_")
|
|
785
|
+
clone_path = Path(tmp_dir) / repo
|
|
786
|
+
|
|
787
|
+
try:
|
|
788
|
+
logger.info(t("fetcher.cloning", owner=owner, repo=repo))
|
|
789
|
+
result = subprocess.run(
|
|
790
|
+
["git", "clone", "--depth", "1", "--single-branch", clone_url, str(clone_path)],
|
|
791
|
+
capture_output=True, text=True, timeout=120,
|
|
792
|
+
)
|
|
793
|
+
if result.returncode != 0:
|
|
794
|
+
stderr = result.stderr.strip()
|
|
795
|
+
if "not found" in stderr.lower() or "does not exist" in stderr.lower():
|
|
796
|
+
raise FileNotFoundError(f"GitHub 上找不到项目:{owner}/{repo}")
|
|
797
|
+
raise RuntimeError(f"git clone 失败:{stderr}")
|
|
798
|
+
|
|
799
|
+
# 本地分析
|
|
800
|
+
logger.info(t("fetcher.local_analysis"))
|
|
801
|
+
readme = _find_readme(clone_path)
|
|
802
|
+
dep_files = _extract_local_dep_files(clone_path)
|
|
803
|
+
language = _detect_language_from_files(clone_path)
|
|
804
|
+
|
|
805
|
+
# 构建类似 GitHub API 返回的 repo_data 结构
|
|
806
|
+
repo_data = {"language": language}
|
|
807
|
+
project_types = detect_project_types(repo_data, readme, dep_files)
|
|
808
|
+
|
|
809
|
+
return RepoInfo(
|
|
810
|
+
owner=owner,
|
|
811
|
+
repo=repo,
|
|
812
|
+
full_name=f"{owner}/{repo}",
|
|
813
|
+
description="", # 本地模式无法获取描述
|
|
814
|
+
stars=0, # 本地模式无法获取 stars
|
|
815
|
+
language=language,
|
|
816
|
+
license="Unknown", # 可后续从 LICENSE 文件检测
|
|
817
|
+
default_branch="main",
|
|
818
|
+
readme=readme,
|
|
819
|
+
project_type=project_types,
|
|
820
|
+
dependency_files=dep_files,
|
|
821
|
+
clone_url=clone_url,
|
|
822
|
+
homepage=homepage,
|
|
823
|
+
)
|
|
824
|
+
finally:
|
|
825
|
+
# 清理临时目录
|
|
826
|
+
shutil.rmtree(tmp_dir, ignore_errors=True)
|
|
827
|
+
|
|
828
|
+
|
|
829
|
+
# ─────────────────────────────────────────────
|
|
830
|
+
# 本地路径模式:直接分析已有目录
|
|
831
|
+
# ─────────────────────────────────────────────
|
|
832
|
+
|
|
833
|
+
def is_local_path(identifier: str) -> bool:
|
|
834
|
+
"""判断标识符是否为本地文件系统路径。"""
|
|
835
|
+
s = identifier.strip()
|
|
836
|
+
return (
|
|
837
|
+
s.startswith("/")
|
|
838
|
+
or s.startswith("./")
|
|
839
|
+
or s.startswith("../")
|
|
840
|
+
or s.startswith("~/")
|
|
841
|
+
or s == "."
|
|
842
|
+
)
|
|
843
|
+
|
|
844
|
+
|
|
845
|
+
def fetch_project_from_path(path: str) -> RepoInfo:
|
|
846
|
+
"""
|
|
847
|
+
直接分析本地目录中的项目信息,不做任何网络请求。
|
|
848
|
+
|
|
849
|
+
与 fetch_project_local() 的区别:
|
|
850
|
+
- fetch_project_local() → git clone 后分析(仍需网络)
|
|
851
|
+
- fetch_project_from_path() → 直接读本地目录(完全离线)
|
|
852
|
+
|
|
853
|
+
适用场景:
|
|
854
|
+
- 企业私有项目(不在 GitHub 上)
|
|
855
|
+
- 本地开发中的项目
|
|
856
|
+
- OTA 下载后的软件包
|
|
857
|
+
- 任何已经存在于文件系统上的代码
|
|
858
|
+
|
|
859
|
+
Args:
|
|
860
|
+
path: 本地目录路径(绝对或相对路径)
|
|
861
|
+
|
|
862
|
+
Returns:
|
|
863
|
+
RepoInfo 包含 README、依赖文件、项目类型等
|
|
864
|
+
"""
|
|
865
|
+
# 展开 ~ 和解析为绝对路径
|
|
866
|
+
root = Path(path).expanduser().resolve()
|
|
867
|
+
|
|
868
|
+
if not root.is_dir():
|
|
869
|
+
raise FileNotFoundError(f"本地路径不存在或不是目录:{root}")
|
|
870
|
+
|
|
871
|
+
logger.info(f"📂 分析本地项目:{root}")
|
|
872
|
+
|
|
873
|
+
# 从目录名推导 repo 名
|
|
874
|
+
repo_name = root.name
|
|
875
|
+
# 尝试从 .git/config 获取远程 URL → 推导 owner
|
|
876
|
+
owner = "_local"
|
|
877
|
+
clone_url = str(root)
|
|
878
|
+
try:
|
|
879
|
+
git_config = root / ".git" / "config"
|
|
880
|
+
if git_config.is_file():
|
|
881
|
+
content = git_config.read_text(encoding="utf-8", errors="replace")
|
|
882
|
+
m = re.search(r'url\s*=\s*\S+[:/]([^/\s]+)/([^/\s.]+?)(?:\.git)?\s*$',
|
|
883
|
+
content, re.MULTILINE)
|
|
884
|
+
if m:
|
|
885
|
+
owner = m.group(1)
|
|
886
|
+
repo_name = m.group(2)
|
|
887
|
+
clone_url = re.search(r'url\s*=\s*(\S+)', content).group(1)
|
|
888
|
+
except Exception:
|
|
889
|
+
pass
|
|
890
|
+
|
|
891
|
+
# 本地分析
|
|
892
|
+
readme = _find_readme(root)
|
|
893
|
+
dep_files = _extract_local_dep_files(root)
|
|
894
|
+
language = _detect_language_from_files(root)
|
|
895
|
+
|
|
896
|
+
# 检测许可证
|
|
897
|
+
license_id = "Unknown"
|
|
898
|
+
for lname in ("LICENSE", "LICENSE.md", "LICENSE.txt", "LICENCE", "COPYING"):
|
|
899
|
+
lp = root / lname
|
|
900
|
+
if lp.is_file():
|
|
901
|
+
license_id = "Detected"
|
|
902
|
+
break
|
|
903
|
+
|
|
904
|
+
repo_data = {"language": language}
|
|
905
|
+
project_types = detect_project_types(repo_data, readme, dep_files)
|
|
906
|
+
|
|
907
|
+
return RepoInfo(
|
|
908
|
+
owner=owner,
|
|
909
|
+
repo=repo_name,
|
|
910
|
+
full_name=f"{owner}/{repo_name}" if owner != "_local" else repo_name,
|
|
911
|
+
description="",
|
|
912
|
+
stars=0,
|
|
913
|
+
language=language,
|
|
914
|
+
license=license_id,
|
|
915
|
+
default_branch="",
|
|
916
|
+
readme=readme,
|
|
917
|
+
project_type=project_types,
|
|
918
|
+
dependency_files=dep_files,
|
|
919
|
+
clone_url=clone_url,
|
|
920
|
+
homepage="",
|
|
921
|
+
)
|