gitinstall 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. gitinstall/__init__.py +61 -0
  2. gitinstall/_sdk.py +541 -0
  3. gitinstall/academic.py +831 -0
  4. gitinstall/admin.html +327 -0
  5. gitinstall/auto_update.py +384 -0
  6. gitinstall/autopilot.py +349 -0
  7. gitinstall/badge.py +476 -0
  8. gitinstall/checkpoint.py +330 -0
  9. gitinstall/cicd.py +499 -0
  10. gitinstall/clawhub.html +718 -0
  11. gitinstall/config_schema.py +353 -0
  12. gitinstall/db.py +984 -0
  13. gitinstall/db_backend.py +445 -0
  14. gitinstall/dep_chain.py +337 -0
  15. gitinstall/dependency_audit.py +1153 -0
  16. gitinstall/detector.py +542 -0
  17. gitinstall/doctor.py +493 -0
  18. gitinstall/education.py +869 -0
  19. gitinstall/enterprise.py +802 -0
  20. gitinstall/error_fixer.py +953 -0
  21. gitinstall/event_bus.py +251 -0
  22. gitinstall/executor.py +577 -0
  23. gitinstall/feature_flags.py +138 -0
  24. gitinstall/fetcher.py +921 -0
  25. gitinstall/huggingface.py +922 -0
  26. gitinstall/hw_detect.py +988 -0
  27. gitinstall/i18n.py +664 -0
  28. gitinstall/installer_registry.py +362 -0
  29. gitinstall/knowledge_base.py +379 -0
  30. gitinstall/license_check.py +605 -0
  31. gitinstall/llm.py +569 -0
  32. gitinstall/log.py +236 -0
  33. gitinstall/main.py +1408 -0
  34. gitinstall/mcp_agent.py +841 -0
  35. gitinstall/mcp_server.py +386 -0
  36. gitinstall/monorepo.py +810 -0
  37. gitinstall/multi_source.py +425 -0
  38. gitinstall/onboard.py +276 -0
  39. gitinstall/planner.py +222 -0
  40. gitinstall/planner_helpers.py +323 -0
  41. gitinstall/planner_known_projects.py +1010 -0
  42. gitinstall/planner_templates.py +996 -0
  43. gitinstall/remote_gpu.py +633 -0
  44. gitinstall/resilience.py +608 -0
  45. gitinstall/run_tests.py +572 -0
  46. gitinstall/skills.py +476 -0
  47. gitinstall/tool_schemas.py +324 -0
  48. gitinstall/trending.py +279 -0
  49. gitinstall/uninstaller.py +415 -0
  50. gitinstall/validate_top100.py +607 -0
  51. gitinstall/watchdog.py +180 -0
  52. gitinstall/web.py +1277 -0
  53. gitinstall/web_ui.html +2277 -0
  54. gitinstall-1.1.0.dist-info/METADATA +275 -0
  55. gitinstall-1.1.0.dist-info/RECORD +59 -0
  56. gitinstall-1.1.0.dist-info/WHEEL +5 -0
  57. gitinstall-1.1.0.dist-info/entry_points.txt +3 -0
  58. gitinstall-1.1.0.dist-info/licenses/LICENSE +21 -0
  59. gitinstall-1.1.0.dist-info/top_level.txt +1 -0
gitinstall/fetcher.py ADDED
@@ -0,0 +1,921 @@
1
+ """
2
+ fetcher.py - GitHub 项目信息抓取与解析
3
+ =====================================
4
+
5
+ 功能:
6
+ 1. 解析各种格式的项目标识:URL / "owner/repo" / 项目名
7
+ 2. 通过 GitHub API 或 git clone --depth 1 本地分析 获取项目信息
8
+ 3. 下载并解析 README(支持 .md / .rst / .txt)
9
+ 4. 提取项目类型(Python/Node/Rust/Go/Docker 等)
10
+ 5. 提取依赖文件(requirements.txt / package.json / Cargo.toml 等)
11
+
12
+ 两种模式:
13
+ - API 模式(默认):使用 GitHub REST API,受限 60 次/小时
14
+ - 本地模式(推荐):git clone --depth 1 后本地分析,无任何限制
15
+
16
+ 只使用 Python 标准库,无需安装任何第三方包。
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import hashlib
22
+ import json
23
+ import os
24
+ import re
25
+ import shutil
26
+ import subprocess
27
+ import tempfile
28
+ import time
29
+ import urllib.error
30
+ import urllib.parse
31
+ import urllib.request
32
+ from dataclasses import dataclass, field
33
+ from pathlib import Path, PurePosixPath
34
+ from typing import Optional
35
+
36
+ from log import get_logger
37
+ from i18n import t
38
+
39
+ logger = get_logger(__name__)
40
+
41
+
42
+ _LOCAL_ANALYSIS_SKIP_DIRS = {
43
+ ".git", "node_modules", "vendor", "third_party", "thirdparty",
44
+ "target", "dist", "build", "__pycache__", ".venv", "venv",
45
+ ".mypy_cache", ".pytest_cache",
46
+ }
47
+
48
+
49
+ # ─────────────────────────────────────────────
50
+ # API 响应缓存
51
+ # ─────────────────────────────────────────────
52
+
53
+ _CACHE_DIR = Path.home() / ".cache" / "gitinstall" / "api"
54
+ _CACHE_TTL = int(os.getenv("GITINSTALL_CACHE_TTL", str(24 * 3600))) # 默认 24 小时
55
+ _NO_CACHE = os.getenv("GITINSTALL_NO_CACHE", "").strip() in ("1", "true", "yes")
56
+
57
+
58
+ def _cache_path(url: str) -> Path:
59
+ """URL → 缓存文件路径(SHA-256 前 16 位)"""
60
+ h = hashlib.sha256(url.encode()).hexdigest()[:16]
61
+ return _CACHE_DIR / f"{h}.json"
62
+
63
+
64
+ def _cache_read(url: str):
65
+ """读取缓存。命中返回 data,未命中/过期返回 None。"""
66
+ if _NO_CACHE:
67
+ return None
68
+ p = _cache_path(url)
69
+ if not p.exists():
70
+ return None
71
+ try:
72
+ raw = json.loads(p.read_text("utf-8"))
73
+ if time.time() - raw.get("ts", 0) > _CACHE_TTL:
74
+ return None # 过期但不删除 — 留给 ETag 条件请求复用
75
+ return raw["data"]
76
+ except Exception:
77
+ return None
78
+
79
+
80
+ def _cache_read_etag(url: str) -> tuple:
81
+ """读取缓存中的 ETag 和过期数据(用于条件请求)。
82
+ 返回 (etag, data) 若有,否则 (None, None)。"""
83
+ if _NO_CACHE:
84
+ return None, None
85
+ p = _cache_path(url)
86
+ if not p.exists():
87
+ return None, None
88
+ try:
89
+ raw = json.loads(p.read_text("utf-8"))
90
+ return raw.get("etag"), raw.get("data")
91
+ except Exception:
92
+ return None, None
93
+
94
+
95
+ def _cache_write(url: str, data, etag: str = None) -> None:
96
+ """写入缓存(含 ETag)。失败静默,不阻塞主流程。"""
97
+ if _NO_CACHE:
98
+ return
99
+ try:
100
+ _CACHE_DIR.mkdir(parents=True, exist_ok=True)
101
+ entry = {"url": url, "ts": time.time(), "data": data}
102
+ if etag:
103
+ entry["etag"] = etag
104
+ _cache_path(url).write_text(
105
+ json.dumps(entry, ensure_ascii=False),
106
+ encoding="utf-8",
107
+ )
108
+ except Exception:
109
+ pass
110
+
111
+
112
+ # ─────────────────────────────────────────────
113
+ # 数据结构
114
+ # ─────────────────────────────────────────────
115
+
116
+ @dataclass
117
+ class RepoInfo:
118
+ owner: str
119
+ repo: str
120
+ full_name: str # "owner/repo"
121
+ description: str
122
+ stars: int
123
+ language: str # 主要语言
124
+ license: str
125
+ default_branch: str
126
+ readme: str # README 全文
127
+ project_type: list[str] # ["python", "docker"] 等
128
+ dependency_files: dict # {"requirements.txt": "内容", ...}
129
+ clone_url: str
130
+ homepage: str
131
+
132
+
133
+ # ─────────────────────────────────────────────
134
+ # URL / 名称解析
135
+ # ─────────────────────────────────────────────
136
+
137
+ def parse_repo_identifier(identifier: str) -> tuple[str, str]:
138
+ """
139
+ 解析各种格式的项目标识,返回 (owner, repo)
140
+
141
+ 支持格式:
142
+ - https://github.com/comfyanonymous/ComfyUI
143
+ - https://gitlab.com/user/project
144
+ - https://gitee.com/user/project
145
+ - https://bitbucket.org/user/project
146
+ - https://codeberg.org/user/project
147
+ - github.com/comfyanonymous/ComfyUI
148
+ - comfyanonymous/ComfyUI
149
+ - comfyanonymous/ComfyUI/tree/main
150
+ - ComfyUI (仅项目名,会尝试搜索)
151
+ """
152
+ identifier = identifier.strip()
153
+
154
+ # 提取 URL 中的 owner/repo(支持多平台)
155
+ patterns = [
156
+ r'github\.com[:/]([^/]+)/([^/\s\.]+?)(?:\.git)?(?:[/\s]|$)',
157
+ r'gitlab\.com[:/]([^/]+)/([^/\s\.]+?)(?:\.git)?(?:[/\s]|$)',
158
+ r'bitbucket\.org[:/]([^/]+)/([^/\s\.]+?)(?:\.git)?(?:[/\s]|$)',
159
+ r'gitee\.com[:/]([^/]+)/([^/\s\.]+?)(?:\.git)?(?:[/\s]|$)',
160
+ r'codeberg\.org[:/]([^/]+)/([^/\s\.]+?)(?:\.git)?(?:[/\s]|$)',
161
+ ]
162
+ for pattern in patterns:
163
+ match = re.search(pattern, identifier, re.IGNORECASE)
164
+ if match:
165
+ return match.group(1), match.group(2)
166
+
167
+ # "owner/repo" 格式
168
+ if "/" in identifier and not identifier.startswith("http"):
169
+ parts = identifier.split("/")
170
+ if len(parts) >= 2:
171
+ owner, repo = parts[0], parts[1]
172
+ # 验证 owner/repo 格式:仅允许字母数字、连字符、下划线、点
173
+ # 禁止路径遍历(. 或 ..)
174
+ if not re.match(r'^[a-zA-Z0-9_-]+$', owner):
175
+ raise ValueError(f"无效的仓库所有者: {owner}")
176
+ if not re.match(r'^[a-zA-Z0-9_.-]+$', repo) or repo in ('.', '..'):
177
+ raise ValueError(f"无效的仓库名: {repo}")
178
+ return owner, repo
179
+
180
+ # 仅项目名,需要搜索
181
+ return "", identifier
182
+
183
+
184
+ # ─────────────────────────────────────────────
185
+ # GitHub API 客户端
186
+ # ─────────────────────────────────────────────
187
+
188
+ class GitHubFetcher:
189
+ """
190
+ GitHub REST API v3 封装。
191
+ 公开仓库无需认证(每小时 60 次请求限制)。
192
+ 设置 GITHUB_TOKEN 环境变量可提升到 5000 次/小时。
193
+ """
194
+
195
+ API_BASE = "https://api.github.com"
196
+ RAW_BASE = "https://raw.githubusercontent.com"
197
+
198
+ def __init__(self):
199
+ import os
200
+ token = os.getenv("GITHUB_TOKEN", "").strip()
201
+ self._headers = {
202
+ "Accept": "application/vnd.github.v3+json",
203
+ "User-Agent": "gitinstall/1.0",
204
+ }
205
+ if token:
206
+ self._headers["Authorization"] = f"Bearer {token}"
207
+
208
+ def _get(self, url: str, timeout: int = 15, _retries: int = 2) -> Optional[dict | list | str]:
209
+ """发送 GET 请求,返回解析后的 JSON 或原始文本。
210
+
211
+ 缓存策略:
212
+ 1. TTL 内直接返回缓存(零网络开销)
213
+ 2. TTL 过期但有 ETag → 发送条件请求 If-None-Match
214
+ - 304 Not Modified → 复用缓存数据(不消耗 API 配额)
215
+ - 200 → 更新缓存
216
+ 3. 无缓存 → 正常请求
217
+ """
218
+ cached = _cache_read(url)
219
+ if cached is not None:
220
+ return cached
221
+
222
+ # 检查是否有过期缓存的 ETag(用于条件请求)
223
+ old_etag, old_data = _cache_read_etag(url)
224
+
225
+ req = urllib.request.Request(url, headers=self._headers)
226
+ if old_etag:
227
+ req.add_header("If-None-Match", old_etag)
228
+
229
+ for attempt in range(_retries + 1):
230
+ try:
231
+ with urllib.request.urlopen(req, timeout=timeout) as resp:
232
+ content_type = resp.headers.get("Content-Type", "")
233
+ resp_etag = resp.headers.get("ETag")
234
+ body = resp.read().decode("utf-8", errors="replace")
235
+ if "json" in content_type:
236
+ result = json.loads(body)
237
+ else:
238
+ result = body
239
+ _cache_write(url, result, etag=resp_etag)
240
+ return result
241
+ except urllib.error.HTTPError as e:
242
+ if e.code == 304 and old_data is not None:
243
+ # 304 Not Modified — 数据未变,复用缓存(不消耗配额)
244
+ _cache_write(url, old_data, etag=old_etag) # 刷新 TTL
245
+ return old_data
246
+ elif e.code == 404:
247
+ raise FileNotFoundError(f"GitHub 上找不到该资源:{url}") from e
248
+ elif e.code == 403:
249
+ # 检查 Retry-After header,等待后重试
250
+ retry_after = e.headers.get("Retry-After")
251
+ if retry_after and attempt < _retries:
252
+ import time
253
+ wait = min(int(retry_after), 60)
254
+ time.sleep(wait)
255
+ continue
256
+ # 被限速但有过期缓存 → 降级使用旧数据
257
+ if old_data is not None:
258
+ return old_data
259
+ raise PermissionError(
260
+ "RATELIMIT: GitHub API 频率超限。\n"
261
+ "设置 GITHUB_TOKEN 环境变量可提升到 5000次/小时。\n"
262
+ "获取 Token:https://github.com/settings/tokens"
263
+ ) from e
264
+ elif e.code >= 500 and attempt < _retries:
265
+ import time
266
+ time.sleep(2 ** attempt)
267
+ continue
268
+ raise RuntimeError(f"GitHub API 错误 {e.code}: {url}") from e
269
+ except urllib.error.URLError as e:
270
+ if attempt < _retries:
271
+ import time
272
+ time.sleep(2 ** attempt)
273
+ continue
274
+ # 网络失败但有过期缓存 → 降级使用旧数据
275
+ if old_data is not None:
276
+ return old_data
277
+ raise RuntimeError(f"网络连接失败,请检查网络:{e.reason}") from e
278
+
279
+ def search_repo(self, query: str) -> tuple[str, str]:
280
+ """当只有项目名时,通过搜索找到 owner/repo"""
281
+ url = f"{self.API_BASE}/search/repositories?q={urllib.parse.quote(query)}&per_page=1"
282
+ data = self._get(url)
283
+ items = data.get("items", []) if isinstance(data, dict) else []
284
+ if not items:
285
+ raise FileNotFoundError(f"在 GitHub 上找不到项目:{query}")
286
+ repo = items[0]
287
+ return repo["owner"]["login"], repo["name"]
288
+
289
+ def fetch_repo_info(self, owner: str, repo: str) -> dict:
290
+ """获取仓库基本信息"""
291
+ url = f"{self.API_BASE}/repos/{owner}/{repo}"
292
+ return self._get(url)
293
+
294
+ def _get_raw(self, url: str, timeout: int = 10) -> Optional[str]:
295
+ """GET 原始文件 URL,带缓存。仅允许 GitHub 域名。"""
296
+ # C2: SSRF 防护 — 仅允许 GitHub 域名
297
+ if not url.startswith(("https://raw.githubusercontent.com/", "https://api.github.com/")):
298
+ return None
299
+ cached = _cache_read(url)
300
+ if cached is not None:
301
+ return cached
302
+ try:
303
+ req = urllib.request.Request(url, headers=self._headers)
304
+ with urllib.request.urlopen(req, timeout=timeout) as resp:
305
+ text = resp.read().decode("utf-8", errors="replace")
306
+ _cache_write(url, text)
307
+ return text
308
+ except (urllib.error.HTTPError, urllib.error.URLError):
309
+ return None
310
+
311
+ def fetch_readme(self, owner: str, repo: str, branch: str = "main") -> str:
312
+ """
313
+ 获取 README 内容。
314
+ 优先使用 GitHub API /readme 端点(1次请求),失败后降级到原始 URL。
315
+ """
316
+ import base64
317
+ # 方式 1:GitHub API /repos/.../readme — 自动识别文件名和分支,1次请求
318
+ try:
319
+ data = self._get(f"{self.API_BASE}/repos/{owner}/{repo}/readme")
320
+ if isinstance(data, dict) and data.get("encoding") == "base64":
321
+ return base64.b64decode(data["content"]).decode("utf-8", errors="replace")
322
+ except (FileNotFoundError, RuntimeError, PermissionError):
323
+ pass
324
+
325
+ # 方式 2(降级):直接访问已知 default_branch 下的 README.md
326
+ url = f"{self.RAW_BASE}/{owner}/{repo}/{branch}/README.md"
327
+ text = self._get_raw(url)
328
+ if text is not None:
329
+ return text
330
+
331
+ return "" # README 不存在也不阻塞安装计划生成
332
+
333
+ def fetch_file(self, owner: str, repo: str, path: str, branch: str = "main") -> Optional[str]:
334
+ """获取仓库中的特定文件"""
335
+ branches = [branch, "master", "main"]
336
+ for b in branches:
337
+ url = f"{self.RAW_BASE}/{owner}/{repo}/{b}/{path}"
338
+ text = self._get_raw(url)
339
+ if text is not None:
340
+ return text
341
+ return None
342
+
343
+
344
+ # ─────────────────────────────────────────────
345
+ # 项目类型识别
346
+ # ─────────────────────────────────────────────
347
+
348
+ def detect_project_types(
349
+ repo_data: dict,
350
+ readme: str,
351
+ dependency_files: dict,
352
+ ) -> list[str]:
353
+ """
354
+ 识别项目技术栈,返回类型列表(可多个)。
355
+
356
+ Returns: ["python", "pytorch", "docker"] 等
357
+ """
358
+ types = set()
359
+
360
+ # 从 GitHub 主语言字段
361
+ lang = (repo_data.get("language") or "").lower()
362
+ _LANG_MAP = {
363
+ "python": "python", "javascript": "node", "typescript": "node",
364
+ "rust": "rust", "go": "go", "java": "java", "kotlin": "kotlin",
365
+ "c++": "cpp", "c": "c", "ruby": "ruby", "php": "php",
366
+ "c#": "dotnet", "swift": "swift", "dart": "dart",
367
+ "scala": "scala", "shell": "shell",
368
+ "elixir": "elixir", "erlang": "erlang", "haskell": "haskell",
369
+ "lua": "lua", "perl": "perl", "r": "r", "julia": "julia",
370
+ "zig": "zig", "clojure": "clojure", "nim": "nim",
371
+ "crystal": "crystal", "hcl": "hcl",
372
+ }
373
+ if lang in _LANG_MAP:
374
+ types.add(_LANG_MAP[lang])
375
+
376
+ # 从依赖文件名
377
+ dep_file_indicators = {
378
+ "requirements.txt": "python",
379
+ "setup.py": "python",
380
+ "setup.cfg": "python",
381
+ "pyproject.toml": "python",
382
+ "environment.yml": "conda",
383
+ "Pipfile": "python",
384
+ "package.json": "node",
385
+ "yarn.lock": "node",
386
+ "pnpm-lock.yaml": "node",
387
+ "Cargo.toml": "rust",
388
+ "go.mod": "go",
389
+ "pom.xml": "java",
390
+ "build.gradle": "java",
391
+ "Dockerfile": "docker",
392
+ "docker-compose.yml": "docker",
393
+ "docker-compose.yaml": "docker",
394
+ "Makefile": "make",
395
+ "CMakeLists.txt": "cmake",
396
+ "configure": "autotools",
397
+ "configure.ac": "autotools",
398
+ "Makefile.am": "autotools",
399
+ "build.gradle.kts": "java",
400
+ "Gemfile": "ruby",
401
+ "composer.json": "php",
402
+ "Package.swift": "swift",
403
+ "mix.exs": "elixir",
404
+ "rebar.config": "erlang",
405
+ "pubspec.yaml": "dart",
406
+ "build.sbt": "scala",
407
+ "meson.build": "meson",
408
+ "WORKSPACE": "bazel",
409
+ "BUILD.bazel": "bazel",
410
+ "stack.yaml": "haskell",
411
+ "project.clj": "clojure",
412
+ "DESCRIPTION": "r",
413
+ "Project.toml": "julia",
414
+ "build.zig": "zig",
415
+ "nimble": "nim",
416
+ "shard.yml": "crystal",
417
+ "cpanfile": "perl",
418
+ "Makefile.PL": "perl",
419
+ "Build.PL": "perl",
420
+ }
421
+ dep_names = {Path(fname).name for fname in dependency_files}
422
+ for fname, ptype in dep_file_indicators.items():
423
+ if fname in dep_names:
424
+ types.add(ptype)
425
+
426
+ # 检测 glob 模式的依赖文件(.cabal / .nimble 文件名不固定)
427
+ for fname in dep_names:
428
+ if fname.endswith(".cabal"):
429
+ types.add("haskell")
430
+ elif fname.endswith(".nimble"):
431
+ types.add("nim")
432
+ elif fname.endswith(".ino"):
433
+ types.add("arduino")
434
+
435
+ # PlatformIO 检测
436
+ if "platformio.ini" in dep_names:
437
+ types.add("platformio")
438
+ if "library.json" in dep_names or "library.properties" in dep_names:
439
+ types.add("platformio")
440
+
441
+ # 从 README 关键词识别深度学习框架
442
+ readme_lower = readme.lower()
443
+ framework_keywords = {
444
+ "pytorch": ["torch", "pytorch", "pip install torch"],
445
+ "tensorflow": ["tensorflow", "pip install tensorflow"],
446
+ "diffusers": ["diffusers", "stable diffusion", "stable-diffusion"],
447
+ "ollama": ["ollama"],
448
+ "docker": ["docker-compose", "dockerfile"],
449
+ "comfyui": ["comfyui"],
450
+ "gradio": ["gradio"],
451
+ "fastapi": ["fastapi"],
452
+ "nextjs": ["next.js", "nextjs"],
453
+ }
454
+ for fw, keywords in framework_keywords.items():
455
+ if any(kw in readme_lower for kw in keywords):
456
+ types.add(fw)
457
+
458
+ # conda/anaconda/miniconda 需要词边界匹配(避免 "secondary" 中的 "conda" 误判)
459
+ if re.search(r'\bconda\b|\banaconda\b|\bminiconda\b', readme_lower):
460
+ types.add("conda")
461
+ # docker 单独处理("docker-compose" 和 "dockerfile" 已在上面,这里加 docker 命令)
462
+ if re.search(r'\bdocker\b', readme_lower):
463
+ types.add("docker")
464
+
465
+ return sorted(types)
466
+
467
+
468
+ # 我们关心的依赖文件集合(用于快速 set 查询)
469
+ _KNOWN_DEP_FILES = {
470
+ # Python
471
+ "requirements.txt", "requirements-dev.txt", "setup.py", "pyproject.toml",
472
+ # Node.js
473
+ "package.json",
474
+ # Rust / Go
475
+ "Cargo.toml", "go.mod",
476
+ # Docker
477
+ "Dockerfile", "docker-compose.yml", "docker-compose.yaml",
478
+ # Conda
479
+ "environment.yml",
480
+ # Java / Kotlin
481
+ "pom.xml", "build.gradle", "build.gradle.kts",
482
+ # Scala
483
+ "build.sbt",
484
+ # Ruby
485
+ "Gemfile",
486
+ # PHP
487
+ "composer.json",
488
+ # .NET / C#
489
+ # 注:.csproj/.sln 通常不在根目录,靠语言字段检测
490
+ # C/C++
491
+ "CMakeLists.txt", "Makefile", "configure", "configure.ac", "Makefile.am",
492
+ # Swift
493
+ "Package.swift",
494
+ # Dart / Flutter
495
+ "pubspec.yaml",
496
+ # Elixir / Erlang
497
+ "mix.exs", "rebar.config",
498
+ # Haskell
499
+ "stack.yaml",
500
+ # Zig
501
+ "build.zig", "build.zig.zon", ".zig-version",
502
+ # Clojure
503
+ "project.clj",
504
+ # Julia
505
+ "Project.toml",
506
+ # R
507
+ "DESCRIPTION",
508
+ # Meson / Bazel
509
+ "meson.build", "WORKSPACE", "BUILD.bazel",
510
+ # Perl
511
+ "cpanfile", "Makefile.PL", "Build.PL",
512
+ # Crystal
513
+ "shard.yml",
514
+ # Nim
515
+ # nim 用 .nimble 文件但名字不固定,靠语言检测
516
+ }
517
+
518
+
519
+ def extract_dependency_files(
520
+ fetcher: GitHubFetcher,
521
+ owner: str,
522
+ repo: str,
523
+ branch: str,
524
+ ) -> dict:
525
+ """
526
+ 获取项目依赖文件。
527
+
528
+ 优化策略:先用 GitHub Contents API 获取根目录清单(1次请求),
529
+ 再只下载清单中存在的依赖文件,避免对每个文件都盲目尝试多个分支。
530
+ """
531
+ # 1. 获取根目录文件清单(1 次 API 请求)
532
+ try:
533
+ contents = fetcher._get(
534
+ f"{fetcher.API_BASE}/repos/{owner}/{repo}/contents/?ref={branch}"
535
+ )
536
+ if not isinstance(contents, list):
537
+ raise RuntimeError("contents 返回非列表")
538
+ root_files = {item["name"] for item in contents if item.get("type") == "file"}
539
+ except Exception:
540
+ # API 失败时回退:手动尝试所有文件(兼容性保底)
541
+ root_files = _KNOWN_DEP_FILES
542
+
543
+ # 2. 只下载清单中实际存在的依赖文件
544
+ to_fetch = root_files & _KNOWN_DEP_FILES
545
+ result = {}
546
+ raw_base = f"{fetcher.RAW_BASE}/{owner}/{repo}/{branch}"
547
+ for fname in sorted(to_fetch):
548
+ text = fetcher._get_raw(f"{raw_base}/{fname}")
549
+ if text is not None:
550
+ result[fname] = text[:15000]
551
+ return result
552
+
553
+
554
+ # ─────────────────────────────────────────────
555
+ # 主入口
556
+ # ─────────────────────────────────────────────
557
+
558
+ def fetch_project(identifier: str) -> RepoInfo:
559
+ """
560
+ 一站式获取项目的所有安装相关信息。
561
+
562
+ 支持多平台自动路由:
563
+ - GitHub URL / owner/repo → GitHub API(带缓存+ETag)
564
+ - GitLab/Bitbucket/Gitee/Codeberg URL → multi_source Provider
565
+ - 本地路径 → 请用 fetch_project_from_path()
566
+
567
+ Args:
568
+ identifier: 平台 URL / "owner/repo" / 项目名
569
+
570
+ Returns:
571
+ RepoInfo 包含 README、依赖文件、项目类型等
572
+ """
573
+ from multi_source import detect_platform, get_provider
574
+
575
+ platform, ms_owner, ms_repo = detect_platform(identifier)
576
+
577
+ # ── 非 GitHub 平台:走 multi_source Provider ──
578
+ if platform != "github":
579
+ logger.info(f"🌐 检测到 {platform} 平台,使用对应 Provider...")
580
+ provider = get_provider(platform)
581
+ meta = provider.get_repo_metadata(ms_owner, ms_repo)
582
+ readme = provider.get_readme(ms_owner, ms_repo, meta.default_branch)
583
+
584
+ # 获取依赖文件
585
+ dep_files = {}
586
+ for fname in sorted(_KNOWN_DEP_FILES):
587
+ content = provider.get_file_content(ms_owner, ms_repo, fname, meta.default_branch)
588
+ if content:
589
+ dep_files[fname] = content[:15000]
590
+
591
+ repo_data = {"language": meta.language}
592
+ project_types = detect_project_types(repo_data, readme, dep_files)
593
+
594
+ return RepoInfo(
595
+ owner=meta.owner,
596
+ repo=meta.repo,
597
+ full_name=meta.full_name,
598
+ description=meta.description,
599
+ stars=meta.stars,
600
+ language=meta.language or "Unknown",
601
+ license=meta.license or "Unknown",
602
+ default_branch=meta.default_branch,
603
+ readme=readme[:15000],
604
+ project_type=project_types,
605
+ dependency_files=dep_files,
606
+ clone_url=meta.clone_url,
607
+ homepage=meta.homepage,
608
+ )
609
+
610
+ # ── GitHub:保留原有流程(带缓存 + ETag + 搜索) ──
611
+ fetcher = GitHubFetcher()
612
+
613
+ # 1. 解析 owner/repo
614
+ owner, repo = parse_repo_identifier(identifier)
615
+ if not owner:
616
+ logger.info(t("fetcher.searching", repo=repo))
617
+ owner, repo = fetcher.search_repo(repo)
618
+
619
+ logger.info(t("fetcher.fetching_info", owner=owner, repo=repo))
620
+
621
+ # 2. 基本信息
622
+ repo_data = fetcher.fetch_repo_info(owner, repo)
623
+ branch = repo_data.get("default_branch", "main")
624
+
625
+ # 3. README
626
+ logger.info(t("fetcher.reading_readme"))
627
+ readme = fetcher.fetch_readme(owner, repo, branch)
628
+
629
+ # 4. 依赖文件
630
+ logger.info(t("fetcher.detecting_deps"))
631
+ dep_files = extract_dependency_files(fetcher, owner, repo, branch)
632
+
633
+ # 5. 项目类型
634
+ project_types = detect_project_types(repo_data, readme, dep_files)
635
+
636
+ return RepoInfo(
637
+ owner=owner,
638
+ repo=repo,
639
+ full_name=f"{owner}/{repo}",
640
+ description=repo_data.get("description") or "",
641
+ stars=repo_data.get("stargazers_count", 0),
642
+ language=repo_data.get("language") or "Unknown",
643
+ license=(repo_data.get("license") or {}).get("spdx_id", "Unknown"),
644
+ default_branch=branch,
645
+ readme=readme[:15000], # 限制 README 长度,节省 LLM token
646
+ project_type=project_types,
647
+ dependency_files=dep_files,
648
+ clone_url=repo_data.get("clone_url", f"https://github.com/{owner}/{repo}.git"),
649
+ homepage=repo_data.get("homepage") or f"https://github.com/{owner}/{repo}",
650
+ )
651
+
652
+
653
+ def format_project_summary(info: RepoInfo) -> str:
654
+ """格式化项目摘要"""
655
+ stars = f"{info.stars:,}"
656
+ types = " | ".join(info.project_type) or "Unknown"
657
+ return (
658
+ f"📦 {info.full_name}\n"
659
+ f" ⭐ {stars} stars | 语言:{info.language} | 类型:{types}\n"
660
+ f" 📝 {info.description[:100]}\n"
661
+ f" 🔗 {info.homepage}"
662
+ )
663
+
664
+
665
+ # ─────────────────────────────────────────────
666
+ # 本地模式:git clone --depth 1 分析
667
+ # ─────────────────────────────────────────────
668
+
669
+ def _detect_language_from_files(root: Path) -> str:
670
+ """通过文件扩展名统计推断主要语言"""
671
+ ext_lang = {
672
+ ".py": "Python", ".js": "JavaScript", ".ts": "TypeScript",
673
+ ".java": "Java", ".kt": "Kotlin", ".go": "Go", ".rs": "Rust",
674
+ ".rb": "Ruby", ".php": "PHP", ".cs": "C#", ".swift": "Swift",
675
+ ".c": "C", ".cpp": "C++", ".cc": "C++", ".cxx": "C++", ".h": "C", ".hpp": "C++",
676
+ ".dart": "Dart", ".scala": "Scala", ".sh": "Shell",
677
+ ".ex": "Elixir", ".exs": "Elixir", ".erl": "Erlang",
678
+ ".hs": "Haskell", ".lua": "Lua", ".pl": "Perl", ".pm": "Perl",
679
+ ".r": "R", ".R": "R", ".jl": "Julia", ".zig": "Zig",
680
+ ".clj": "Clojure", ".nim": "Nim", ".cr": "Crystal",
681
+ ".tf": "HCL", ".hcl": "HCL",
682
+ }
683
+ # 排除测试/vendor 目录(与 GitHub Linguist 同理,避免测试脚本干扰主语言检测)
684
+ _SKIP_DIRS = {"t", "test", "tests", "spec", "vendor", "node_modules",
685
+ "third_party", "thirdparty", "fixtures", "testdata"}
686
+ counts: dict[str, int] = {}
687
+ try:
688
+ for f in root.rglob("*"):
689
+ if f.is_file() and not any(p.startswith(".") for p in f.relative_to(root).parts):
690
+ # 跳过测试/第三方目录中的文件
691
+ rel_parts = f.relative_to(root).parts
692
+ if rel_parts and rel_parts[0].lower() in _SKIP_DIRS:
693
+ continue
694
+ lang = ext_lang.get(f.suffix.lower())
695
+ if lang:
696
+ counts[lang] = counts.get(lang, 0) + 1
697
+ except Exception:
698
+ pass
699
+ if not counts:
700
+ return "Unknown"
701
+ return max(counts, key=counts.get)
702
+
703
+
704
+ def _find_readme(root: Path) -> str:
705
+ """在仓库根目录找 README 文件并读内容"""
706
+ candidates = ["README.md", "readme.md", "README.rst", "README.txt", "README"]
707
+ for name in candidates:
708
+ p = root / name
709
+ if p.is_file():
710
+ try:
711
+ return p.read_text(encoding="utf-8", errors="replace")[:15000]
712
+ except Exception:
713
+ pass
714
+ return ""
715
+
716
+
717
+ def _extract_local_dep_files(root: Path) -> dict[str, str]:
718
+ """从本地仓库递归提取依赖文件,保留相对路径。"""
719
+ result = {}
720
+ for p in root.rglob("*"):
721
+ if not p.is_file():
722
+ continue
723
+ rel = p.relative_to(root)
724
+ parts = rel.parts
725
+ name = p.name
726
+ if name not in _KNOWN_DEP_FILES and not name.endswith((".cabal", ".nimble")):
727
+ continue
728
+ if any(part.lower() in _LOCAL_ANALYSIS_SKIP_DIRS for part in parts[:-1]):
729
+ continue
730
+ if any(part.startswith(".") for part in parts[:-1]):
731
+ continue
732
+ if len(parts) > 1 and parts[-1].startswith("."):
733
+ continue
734
+ try:
735
+ result[rel.as_posix()] = p.read_text(encoding="utf-8", errors="replace")[:15000]
736
+ except Exception:
737
+ continue
738
+ return result
739
+
740
+
741
+ def fetch_project_local(identifier: str) -> RepoInfo:
742
+ """
743
+ 本地模式:git clone --depth 1 后分析项目信息。
744
+
745
+ 优势:
746
+ - 无 API 限额(不受 60 次/小时限制)
747
+ - 可离线工作(只需 git 可用)
748
+ - 获取完整的根目录文件
749
+
750
+ 支持所有平台的 URL 和 owner/repo 格式。
751
+
752
+ Args:
753
+ identifier: 任意平台 URL / "owner/repo"
754
+
755
+ Returns:
756
+ RepoInfo 包含 README、依赖文件、项目类型等
757
+ """
758
+ from multi_source import detect_platform
759
+
760
+ platform, owner, repo = detect_platform(identifier)
761
+ if not owner:
762
+ raise ValueError(
763
+ f"本地模式需要完整的 owner/repo 格式,无法仅通过项目名 '{repo}' 分析。\n"
764
+ f"请使用完整格式,如:owner/{repo}"
765
+ )
766
+
767
+ # 根据平台生成 clone URL
768
+ if platform == "github":
769
+ clone_url = f"https://github.com/{owner}/{repo}.git"
770
+ elif platform == "gitlab":
771
+ clone_url = f"https://gitlab.com/{owner}/{repo}.git"
772
+ elif platform == "bitbucket":
773
+ clone_url = f"https://bitbucket.org/{owner}/{repo}.git"
774
+ elif platform == "gitee":
775
+ clone_url = f"https://gitee.com/{owner}/{repo}.git"
776
+ elif platform == "codeberg":
777
+ clone_url = f"https://codeberg.org/{owner}/{repo}.git"
778
+ else:
779
+ clone_url = f"https://github.com/{owner}/{repo}.git"
780
+
781
+ homepage = clone_url.removesuffix(".git")
782
+
783
+ # 使用临时目录进行 shallow clone
784
+ tmp_dir = tempfile.mkdtemp(prefix="gitinstall_")
785
+ clone_path = Path(tmp_dir) / repo
786
+
787
+ try:
788
+ logger.info(t("fetcher.cloning", owner=owner, repo=repo))
789
+ result = subprocess.run(
790
+ ["git", "clone", "--depth", "1", "--single-branch", clone_url, str(clone_path)],
791
+ capture_output=True, text=True, timeout=120,
792
+ )
793
+ if result.returncode != 0:
794
+ stderr = result.stderr.strip()
795
+ if "not found" in stderr.lower() or "does not exist" in stderr.lower():
796
+ raise FileNotFoundError(f"GitHub 上找不到项目:{owner}/{repo}")
797
+ raise RuntimeError(f"git clone 失败:{stderr}")
798
+
799
+ # 本地分析
800
+ logger.info(t("fetcher.local_analysis"))
801
+ readme = _find_readme(clone_path)
802
+ dep_files = _extract_local_dep_files(clone_path)
803
+ language = _detect_language_from_files(clone_path)
804
+
805
+ # 构建类似 GitHub API 返回的 repo_data 结构
806
+ repo_data = {"language": language}
807
+ project_types = detect_project_types(repo_data, readme, dep_files)
808
+
809
+ return RepoInfo(
810
+ owner=owner,
811
+ repo=repo,
812
+ full_name=f"{owner}/{repo}",
813
+ description="", # 本地模式无法获取描述
814
+ stars=0, # 本地模式无法获取 stars
815
+ language=language,
816
+ license="Unknown", # 可后续从 LICENSE 文件检测
817
+ default_branch="main",
818
+ readme=readme,
819
+ project_type=project_types,
820
+ dependency_files=dep_files,
821
+ clone_url=clone_url,
822
+ homepage=homepage,
823
+ )
824
+ finally:
825
+ # 清理临时目录
826
+ shutil.rmtree(tmp_dir, ignore_errors=True)
827
+
828
+
829
+ # ─────────────────────────────────────────────
830
+ # 本地路径模式:直接分析已有目录
831
+ # ─────────────────────────────────────────────
832
+
833
+ def is_local_path(identifier: str) -> bool:
834
+ """判断标识符是否为本地文件系统路径。"""
835
+ s = identifier.strip()
836
+ return (
837
+ s.startswith("/")
838
+ or s.startswith("./")
839
+ or s.startswith("../")
840
+ or s.startswith("~/")
841
+ or s == "."
842
+ )
843
+
844
+
845
+ def fetch_project_from_path(path: str) -> RepoInfo:
846
+ """
847
+ 直接分析本地目录中的项目信息,不做任何网络请求。
848
+
849
+ 与 fetch_project_local() 的区别:
850
+ - fetch_project_local() → git clone 后分析(仍需网络)
851
+ - fetch_project_from_path() → 直接读本地目录(完全离线)
852
+
853
+ 适用场景:
854
+ - 企业私有项目(不在 GitHub 上)
855
+ - 本地开发中的项目
856
+ - OTA 下载后的软件包
857
+ - 任何已经存在于文件系统上的代码
858
+
859
+ Args:
860
+ path: 本地目录路径(绝对或相对路径)
861
+
862
+ Returns:
863
+ RepoInfo 包含 README、依赖文件、项目类型等
864
+ """
865
+ # 展开 ~ 和解析为绝对路径
866
+ root = Path(path).expanduser().resolve()
867
+
868
+ if not root.is_dir():
869
+ raise FileNotFoundError(f"本地路径不存在或不是目录:{root}")
870
+
871
+ logger.info(f"📂 分析本地项目:{root}")
872
+
873
+ # 从目录名推导 repo 名
874
+ repo_name = root.name
875
+ # 尝试从 .git/config 获取远程 URL → 推导 owner
876
+ owner = "_local"
877
+ clone_url = str(root)
878
+ try:
879
+ git_config = root / ".git" / "config"
880
+ if git_config.is_file():
881
+ content = git_config.read_text(encoding="utf-8", errors="replace")
882
+ m = re.search(r'url\s*=\s*\S+[:/]([^/\s]+)/([^/\s.]+?)(?:\.git)?\s*$',
883
+ content, re.MULTILINE)
884
+ if m:
885
+ owner = m.group(1)
886
+ repo_name = m.group(2)
887
+ clone_url = re.search(r'url\s*=\s*(\S+)', content).group(1)
888
+ except Exception:
889
+ pass
890
+
891
+ # 本地分析
892
+ readme = _find_readme(root)
893
+ dep_files = _extract_local_dep_files(root)
894
+ language = _detect_language_from_files(root)
895
+
896
+ # 检测许可证
897
+ license_id = "Unknown"
898
+ for lname in ("LICENSE", "LICENSE.md", "LICENSE.txt", "LICENCE", "COPYING"):
899
+ lp = root / lname
900
+ if lp.is_file():
901
+ license_id = "Detected"
902
+ break
903
+
904
+ repo_data = {"language": language}
905
+ project_types = detect_project_types(repo_data, readme, dep_files)
906
+
907
+ return RepoInfo(
908
+ owner=owner,
909
+ repo=repo_name,
910
+ full_name=f"{owner}/{repo_name}" if owner != "_local" else repo_name,
911
+ description="",
912
+ stars=0,
913
+ language=language,
914
+ license=license_id,
915
+ default_branch="",
916
+ readme=readme,
917
+ project_type=project_types,
918
+ dependency_files=dep_files,
919
+ clone_url=clone_url,
920
+ homepage="",
921
+ )