repomap-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
repomap/core.py ADDED
@@ -0,0 +1,730 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Repo Map Core — Tree-sitter Analysis Engine (Coordinator Layer)
4
+ ================================================================
5
+ 给 RepoMap CLI 提供扫描、解析、图构建和 AI overview 能力。
6
+
7
+ 目标:AI 在逐文件阅读代码之前,先通过这个工具建立
8
+ "项目地图"——了解业务模块划分、核心函数调用关系、
9
+ 高密度文件分布、入口点等,从而更高效地定位和理解代码。
10
+
11
+ 安装 & 运行(CLI 模式):
12
+ uv run python -m repomap_cli overview --project /path/to/your/project
13
+
14
+ 本地调试(直接打印 repo map):
15
+ python -m repomap_cli overview --project /path/to/your/project
16
+ python -m repomap_cli call-chain --project /path/to/your/project --symbol MyClassName
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import logging
22
+ import os
23
+ import subprocess
24
+ import sys
25
+ from pathlib import Path, PurePosixPath
26
+ from typing import Any
27
+
28
+ from .ai import (
29
+ render_call_chain_report,
30
+ render_file_detail_report,
31
+ render_overview_report,
32
+ )
33
+ from .parser import EXT_TO_LANG, TreeSitterAdapter
34
+ from .ranking import EdgeBuilder, GraphAnalyzer
35
+ from .resolver import ImportResolver
36
+ from . import (
37
+ RepoGraph,
38
+ ScanStats,
39
+ Symbol,
40
+ get_incremental_cache_path,
41
+ serialize_symbol,
42
+ )
43
+
44
+ # ── 日志:统一写 stderr,绝不污染 CLI stdout ────────────────────────────────
45
+ logging.basicConfig(
46
+ level=logging.INFO,
47
+ format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
48
+ stream=sys.stderr,
49
+ )
50
+ logger = logging.getLogger("repomap")
51
+
52
+ DEFAULT_MAX_FILE_BYTES = 512 * 1024
53
+
54
+ SKIP_DIR_NAMES = {
55
+ ".cache",
56
+ ".git",
57
+ ".hg",
58
+ ".idea",
59
+ ".mypy_cache",
60
+ ".next",
61
+ ".nox",
62
+ ".nuxt",
63
+ ".parcel-cache",
64
+ ".pnpm-store",
65
+ ".pytest_cache",
66
+ ".ruff_cache",
67
+ ".svelte-kit",
68
+ ".tox",
69
+ ".turbo",
70
+ ".venv",
71
+ ".vscode",
72
+ ".yarn",
73
+ "__pypackages__",
74
+ "__pycache__",
75
+ "build",
76
+ "coverage",
77
+ "dist",
78
+ "env",
79
+ "ENV",
80
+ "node_modules",
81
+ "site-packages",
82
+ "target",
83
+ "venv",
84
+ # 第三方库目录
85
+ "monaco-editor",
86
+ "monaco",
87
+ "vendor",
88
+ "third_party",
89
+ "third-party",
90
+ "libs",
91
+ "external",
92
+ }
93
+
94
+ SKIP_FILE_NAMES = {
95
+ "package-lock.json",
96
+ "npm-shrinkwrap.json",
97
+ "bun.lock",
98
+ "bun.lockb",
99
+ "yarn.lock",
100
+ "pnpm-lock.yaml",
101
+ "Cargo.lock",
102
+ }
103
+
104
+ SUPPORTING_FILE_NAMES = {
105
+ "AGENTS.md",
106
+ "CLAUDE.md",
107
+ "README.md",
108
+ "SKILL.md",
109
+ "CONTRIBUTING.md",
110
+ "CHANGELOG.md",
111
+ "Makefile",
112
+ "Dockerfile",
113
+ "docker-compose.yml",
114
+ "compose.yml",
115
+ "package.json",
116
+ "pyproject.toml",
117
+ "Cargo.toml",
118
+ "go.mod",
119
+ "requirements.txt",
120
+ "tsconfig.json",
121
+ "tsconfig.app.json",
122
+ "tsconfig.node.json",
123
+ "vitest.config.ts",
124
+ "vitest.config.js",
125
+ "vite.config.ts",
126
+ "vite.config.js",
127
+ "eslint.config.js",
128
+ "eslint.config.mjs",
129
+ "pytest.ini",
130
+ "tox.ini",
131
+ }
132
+
133
+ SENSITIVE_SUPPORTING_FILE_NAMES = {
134
+ ".env",
135
+ ".env.local",
136
+ ".env.development",
137
+ ".env.production",
138
+ ".env.test",
139
+ }
140
+
141
+
142
+ # ═══════════════════════════════════════════════════════════════════════════════
143
+ # 核心引擎(协调层)
144
+ # ═══════════════════════════════════════════════════════════════════════════════
145
+
146
+
147
+ class RepoMapEngine:
148
+ """
149
+ 项目地图引擎:扫描代码库 → 构建符号依赖图 → PageRank → 输出 AI 友好摘要。
150
+
151
+ 给 AI 提供的"项目地图"信息包括:
152
+ 1. 模块/文件分布(哪些文件密度高,可能是核心业务)
153
+ 2. 入口点(main/app/index 等)
154
+ 3. 重要符号(PageRank 高 = 被很多地方调用/导入)
155
+ 4. 调用链(某函数被谁调、调了谁)
156
+ """
157
+
158
+ IMPORT_WEIGHT = 0.35
159
+ CALL_WEIGHT = 0.50
160
+
161
+ def __init__(self, project_root: str) -> None:
162
+ self.project_root = Path(project_root).resolve()
163
+ self.ts = TreeSitterAdapter()
164
+ self.graph = RepoGraph()
165
+ # file -> mtime 增量缓存(只存 mtime,不存 tree 对象以避免内存泄漏)
166
+ self._cache: dict[str, float] = {}
167
+ self.scan_state = "idle"
168
+ self.max_file_bytes = self._read_max_file_bytes()
169
+ self.scan_stats = ScanStats()
170
+ # 子组件
171
+ self._resolver: ImportResolver | None = None
172
+ self._analyzer = GraphAnalyzer(self.graph)
173
+ # 路由提取结果
174
+ self.routes: list = []
175
+
176
+ @staticmethod
177
+ def _read_max_file_bytes() -> int:
178
+ raw = os.getenv("REPOMAP_MAX_FILE_BYTES", str(DEFAULT_MAX_FILE_BYTES))
179
+ try:
180
+ value = int(raw)
181
+ except ValueError:
182
+ return DEFAULT_MAX_FILE_BYTES
183
+ return max(0, value)
184
+
185
+ # ═══════════════════════════════════════════════════════════════════════════
186
+ # 扫描主流程
187
+ # ═══════════════════════════════════════════════════════════════════════════
188
+
189
+ def scan(self, max_files: int = 8000, max_scan_time: float = 300.0,
190
+ incremental: bool = False) -> None:
191
+ """三阶段扫描:提取符号 → 建依赖边 → PageRank。
192
+
193
+ Args:
194
+ max_files: 最多扫描文件数
195
+ max_scan_time: 扫描超时时间(秒),默认 300 秒(5 分钟)
196
+ incremental: 尝试增量扫描——只重新解析 git 变更文件
197
+ """
198
+ import time
199
+ start_time = time.time()
200
+
201
+ self.scan_state = "invalid"
202
+ if not self.ts.parsers:
203
+ raise RuntimeError(
204
+ "未检测到任何 tree-sitter 语言绑定。\n"
205
+ "请安装:pip install tree-sitter tree-sitter-python tree-sitter-javascript ..."
206
+ )
207
+
208
+ self.graph = RepoGraph()
209
+ self._cache = {}
210
+ self.scan_stats = ScanStats()
211
+ self.routes = []
212
+ self._inc_cache_loaded = False
213
+
214
+ # 尝试加载增量缓存
215
+ inc_cache = None
216
+ if incremental:
217
+ inc_cache = self._load_incremental_cache_if_valid()
218
+ if inc_cache:
219
+ changed_files, deleted_files = self._git_changed_files()
220
+ all_candidate_files = self._list_files(max_files)
221
+ # 过滤:只保留仍在项目中的变更文件
222
+ changed_set = set(changed_files) & set(all_candidate_files)
223
+ unchanged_set = set(inc_cache.files.keys()) - changed_set - set(deleted_files)
224
+ # 只解析变更文件
225
+ files_to_scan = [f for f in all_candidate_files if f in changed_set]
226
+ logger.info(
227
+ f"Incremental scan: {len(files_to_scan)} changed, "
228
+ f"{len(unchanged_set)} unchanged, {len(deleted_files)} deleted"
229
+ )
230
+ # 还原未变更文件
231
+ for f in sorted(unchanged_set):
232
+ if f in all_candidate_files:
233
+ self._restore_from_inc_cache(f, inc_cache.files[f])
234
+ self._inc_cache_loaded = True
235
+ else:
236
+ files_to_scan = self._list_files(max_files)
237
+ logger.info(f"Found {len(files_to_scan)} source files")
238
+
239
+ try:
240
+ for f in files_to_scan:
241
+ # 超时熔断检查
242
+ elapsed = time.time() - start_time
243
+ if elapsed > max_scan_time:
244
+ self.scan_stats.timeout_triggered = True
245
+ logger.warning(f"扫描超时熔断:已运行 {elapsed:.1f}s,超过 {max_scan_time}s 限制")
246
+ break
247
+
248
+ try:
249
+ self._process_file(f)
250
+ except Exception as e:
251
+ if len(self.scan_stats.failed_files) < 5:
252
+ self.scan_stats.failed_files.append(f"{f}: {type(e).__name__}: {str(e)[:50]}")
253
+ logger.warning(f"Failed to process file {f}: {e}")
254
+
255
+ self._build_edges()
256
+ self._analyzer = GraphAnalyzer(self.graph)
257
+ self._calculate_pagerank()
258
+ self.scan_state = "scanned"
259
+ except Exception:
260
+ self.scan_state = "invalid"
261
+ raise
262
+ finally:
263
+ self.scan_stats.scan_duration_ms = int((time.time() - start_time) * 1000)
264
+
265
+ # 全量扫描后保存增量基线
266
+ if not self._inc_cache_loaded and self.scan_state == "scanned":
267
+ try:
268
+ from .toolkit import save_incremental_cache
269
+ save_incremental_cache(str(self.project_root), self)
270
+ except Exception as e:
271
+ logger.debug(f"Failed to save incremental cache: {e}")
272
+
273
+ sym_count = len(self.graph.symbols)
274
+ edge_count = sum(len(v) for v in self.graph.outgoing.values())
275
+
276
+ summary_parts = [f"Scan complete — {sym_count} symbols, {edge_count} edges, {self.scan_stats.scan_duration_ms}ms"]
277
+ if self.scan_stats.skipped_files:
278
+ summary_parts.append(f", {self.scan_stats.skipped_files} skipped (unchanged)")
279
+ if self.scan_stats.failed_files:
280
+ summary_parts.append(f", {len(self.scan_stats.failed_files)} failed files")
281
+ if self.scan_stats.timeout_triggered:
282
+ summary_parts.append(", timeout triggered")
283
+
284
+ if self.scan_stats.failed_files or self.scan_stats.timeout_triggered:
285
+ logger.warning("".join(summary_parts))
286
+ else:
287
+ logger.info("".join(summary_parts))
288
+
289
+ def is_scanned(self) -> bool:
290
+ return self.scan_state == "scanned"
291
+
292
+ # ── 增量扫描辅助 ─────────────────────────────────────────────────────────
293
+
294
+ def _load_incremental_cache_if_valid(self) -> Any | None:
295
+ """加载增量缓存并校验有效性(项目路径 + git HEAD 匹配)。"""
296
+ try:
297
+ from .toolkit import load_incremental_cache
298
+ cache = load_incremental_cache(str(self.project_root))
299
+ if cache is None or not cache.files:
300
+ return None
301
+ # 校验 git HEAD 是否匹配
302
+ try:
303
+ result = subprocess.run(
304
+ ["git", "rev-parse", "HEAD"],
305
+ cwd=self.project_root, capture_output=True, text=True, timeout=5,
306
+ )
307
+ if result.returncode != 0:
308
+ return None
309
+ if cache.git_head and cache.git_head != result.stdout.strip():
310
+ logger.debug("Incremental cache stale: git HEAD changed")
311
+ return None
312
+ except Exception:
313
+ pass
314
+ return cache
315
+ except Exception:
316
+ return None
317
+
318
+ def _git_changed_files(self) -> tuple[list[str], list[str]]:
319
+ """返回 (modified_files, deleted_files),相对于项目根目录。"""
320
+ modified, deleted = [], []
321
+ try:
322
+ # unstaged + staged modifications
323
+ for status_cmd in (["git", "diff", "--name-only", "HEAD"],):
324
+ result = subprocess.run(
325
+ status_cmd, cwd=self.project_root, capture_output=True, text=True, timeout=10,
326
+ )
327
+ if result.returncode == 0:
328
+ for line in result.stdout.strip().split("\n"):
329
+ if line:
330
+ modified.append(line)
331
+ # deleted files
332
+ result = subprocess.run(
333
+ ["git", "diff", "--name-only", "--diff-filter=D", "HEAD"],
334
+ cwd=self.project_root, capture_output=True, text=True, timeout=10,
335
+ )
336
+ if result.returncode == 0:
337
+ for line in result.stdout.strip().split("\n"):
338
+ if line:
339
+ deleted.append(line)
340
+ except Exception:
341
+ pass
342
+ return sorted(set(modified)), sorted(set(deleted))
343
+
344
+ def _restore_from_inc_cache(self, file_path: str, entry: Any) -> None:
345
+ """从增量缓存还原文件解析结果,跳过 tree-sitter 解析。"""
346
+ # 检查文件 mtime 是否一致
347
+ full = self.project_root / file_path
348
+ if full.exists():
349
+ actual_mtime = full.stat().st_mtime
350
+ if abs(actual_mtime - entry.mtime) > 0.001:
351
+ return # mtime 不一致,不还原
352
+
353
+ # 还原符号
354
+ self.graph.file_symbols.setdefault(file_path, [])
355
+ for sym_dict in entry.symbols_json:
356
+ sym = Symbol(
357
+ id=sym_dict["id"], name=sym_dict["name"], kind=sym_dict["kind"],
358
+ file=sym_dict["file"], line=sym_dict["line"],
359
+ end_line=sym_dict.get("end_line", sym_dict["line"]),
360
+ col=sym_dict.get("col", 0),
361
+ visibility=sym_dict.get("visibility", "private"),
362
+ docstring=sym_dict.get("docstring", ""),
363
+ signature=sym_dict.get("signature", ""),
364
+ pagerank=sym_dict.get("pagerank", 0.0),
365
+ )
366
+ self.graph.symbols[sym.id] = sym
367
+ self.graph.file_symbols[file_path].append(sym.id)
368
+
369
+ # 还原 imports
370
+ self.graph.file_imports[file_path] = list(entry.imports)
371
+
372
+ # 还原 import bindings
373
+ from . import JSImportBinding
374
+ self.graph.file_import_bindings[file_path] = [
375
+ JSImportBinding(
376
+ local_name=b["local_name"], imported_name=b["imported_name"],
377
+ module=b["module"], line=b["line"], kind=b.get("kind", "named"),
378
+ )
379
+ for b in entry.import_bindings_json
380
+ ]
381
+
382
+ # 还原 exports
383
+ from . import JSExportBinding
384
+ self.graph.file_exports[file_path] = [
385
+ JSExportBinding(
386
+ exported_name=b["exported_name"], source_name=b.get("source_name"),
387
+ module=b.get("module"), line=b["line"], kind=b.get("kind", "local"),
388
+ )
389
+ for b in entry.exports_json
390
+ ]
391
+
392
+ # 还原 calls
393
+ self.graph.file_calls[file_path] = [
394
+ (c["name"], c["line"], c.get("kind", "direct"))
395
+ for c in entry.calls_json
396
+ ]
397
+
398
+ # 更新 mtime 缓存
399
+ self._cache[file_path] = entry.mtime
400
+ self.scan_stats.processed_files += 1
401
+
402
+ # ── 文件处理 ───────────────────────────────────────────────────────────────
403
+
404
+ def _list_files(self, max_files: int) -> list[str]:
405
+ """用 ripgrep 快速列文件,fallback 到 pathlib。"""
406
+ rg_cmd = ["rg", "--files", "--hidden", "-g", "!**/*.min.js"]
407
+ for ext in sorted(EXT_TO_LANG):
408
+ rg_cmd.extend(["-g", f"**/*{ext}"])
409
+ try:
410
+ result = subprocess.run(
411
+ rg_cmd, cwd=self.project_root,
412
+ capture_output=True, text=True, timeout=30,
413
+ )
414
+ candidates = sorted(
415
+ line for line in result.stdout.strip().split("\n")
416
+ if line
417
+ and Path(line).suffix.lower() in EXT_TO_LANG
418
+ )
419
+ except Exception:
420
+ # fallback:一次遍历过滤扩展名
421
+ valid_exts = set(EXT_TO_LANG)
422
+ candidates = sorted(
423
+ str(p.relative_to(self.project_root))
424
+ for p in self.project_root.rglob("*")
425
+ if p.is_file()
426
+ and p.suffix.lower() in valid_exts
427
+ )
428
+
429
+ filtered_files: list[str] = []
430
+ for file in candidates:
431
+ if self._should_skip_path(file):
432
+ self.scan_stats.filtered_path_files += 1
433
+ continue
434
+ filtered_files.append(file)
435
+
436
+ self.scan_stats.listed_source_files = len(candidates)
437
+ if len(filtered_files) > max_files:
438
+ self.scan_stats.truncated_files = len(filtered_files) - max_files
439
+ selected_files = filtered_files[:max_files]
440
+ self.scan_stats.selected_source_files = len(selected_files)
441
+ return selected_files
442
+
443
+ def supporting_files(self, limit: int = 8) -> list[dict[str, Any]]:
444
+ """列出符号图之外也值得先看的文档、脚本和配置文件。"""
445
+ rows: list[dict[str, Any]] = []
446
+ seen: set[str] = set()
447
+ for file in self._list_supporting_file_candidates():
448
+ if file in seen:
449
+ continue
450
+ seen.add(file)
451
+ classified = self._classify_supporting_file(file)
452
+ if not classified:
453
+ continue
454
+ priority, role, reason = classified
455
+ rows.append({"file": file, "role": role, "reason": reason, "priority": priority})
456
+ rows.sort(key=lambda row: (row["priority"], row["file"]))
457
+ return [
458
+ {"file": row["file"], "role": row["role"], "reason": row["reason"]}
459
+ for row in rows[:limit]
460
+ ]
461
+
462
+ def _list_supporting_file_candidates(self) -> list[str]:
463
+ """快速列出仓库文件,用于轻量支撑文件清单;不读取文件内容。"""
464
+ try:
465
+ result = subprocess.run(
466
+ ["rg", "--files", "--hidden", "-g", "!**/*.min.js"],
467
+ cwd=self.project_root,
468
+ capture_output=True,
469
+ text=True,
470
+ timeout=30,
471
+ )
472
+ candidates = sorted(line for line in result.stdout.strip().split("\n") if line)
473
+ except Exception:
474
+ candidates = sorted(
475
+ str(p.relative_to(self.project_root))
476
+ for p in self.project_root.rglob("*")
477
+ if p.is_file()
478
+ )
479
+ root_context_files = [
480
+ name
481
+ for name in ("AGENTS.md", "CLAUDE.md", "README.md", "SKILL.md")
482
+ if (self.project_root / name).is_file()
483
+ ]
484
+ candidates = sorted(set(root_context_files + candidates))
485
+ return [file for file in candidates if not self._should_skip_supporting_path(file)]
486
+
487
+ def _should_skip_supporting_path(self, file: str) -> bool:
488
+ path = Path(file)
489
+ name = path.name
490
+ name_lower = name.lower()
491
+ if self._should_skip_path(file):
492
+ return True
493
+ if name in SENSITIVE_SUPPORTING_FILE_NAMES or name_lower.startswith(".env."):
494
+ return True
495
+ if name_lower.endswith((".pem", ".key", ".p12", ".pfx")):
496
+ return True
497
+ return False
498
+
499
+ @staticmethod
500
+ def _classify_supporting_file(file: str) -> tuple[int, str, str] | None:
501
+ path = PurePosixPath(file)
502
+ parts = path.parts
503
+ name = path.name
504
+ name_lower = name.lower()
505
+ suffix = path.suffix.lower()
506
+ depth = len(parts)
507
+
508
+ if name in {"AGENTS.md", "CLAUDE.md"}:
509
+ return 0, "agent-context", "注入的项目结构、规则和工作流上下文"
510
+ if name == "SKILL.md":
511
+ return 1, "skill-doc", "技能入口说明,通常是 skill 仓库核心"
512
+ if name == "README.md":
513
+ return 2, "readme", "用户/项目说明入口"
514
+ if name in {"package.json", "pyproject.toml", "Cargo.toml", "go.mod", "requirements.txt"}:
515
+ return 3, "manifest", "依赖、脚本或包元数据"
516
+ if name.startswith("tsconfig") and suffix == ".json":
517
+ return 4, "tooling-config", "TypeScript 编译配置"
518
+ if name_lower.startswith(("vite.config", "vitest.config", "eslint.config")):
519
+ return 4, "tooling-config", "构建、测试或 lint 配置"
520
+ if name in {"Makefile", "Dockerfile", "docker-compose.yml", "compose.yml"}:
521
+ return 5, "automation", "构建、容器或自动化入口"
522
+ if suffix == ".service":
523
+ return 5, "service", "服务部署/启动配置"
524
+ if suffix == ".sh" and (depth <= 2 or (parts and parts[0] in {"scripts", "bin"})):
525
+ return 6, "script", "启动、验证或维护脚本"
526
+ if suffix == ".md" and (depth <= 2 or (parts and parts[0] in {"docs", "references"})):
527
+ return 7, "docs", "补充文档或参考资料"
528
+ if name in SUPPORTING_FILE_NAMES:
529
+ return 8, "supporting", "项目支撑文件"
530
+ return None
531
+
532
+ def _should_skip_path(self, file: str) -> bool:
533
+ path = Path(file)
534
+ if path.name.endswith(".min.js"):
535
+ return True
536
+ if path.name in SKIP_FILE_NAMES:
537
+ return True
538
+ return any(part in SKIP_DIR_NAMES for part in path.parts)
539
+
540
+ def _should_skip_large_file(self, path: Path) -> bool:
541
+ if os.getenv("REPOMAP_SCAN_LARGE_FILES", "0") == "1":
542
+ return False
543
+ try:
544
+ return path.stat().st_size > self.max_file_bytes
545
+ except OSError:
546
+ return True
547
+
548
+ def _process_file(self, file: str) -> None:
549
+ path = self.project_root / file
550
+ if not path.exists():
551
+ return
552
+ if self._should_skip_large_file(path):
553
+ self.scan_stats.filtered_large_files += 1
554
+ logger.debug(f"Skip oversized file: {file}")
555
+ return
556
+
557
+ mtime = path.stat().st_mtime
558
+ cached_mtime = self._cache.get(file)
559
+ if cached_mtime == mtime:
560
+ self.scan_stats.skipped_files += 1
561
+ return # 未变更,复用缓存
562
+
563
+ ext = Path(file).suffix.lower()
564
+ lang = EXT_TO_LANG.get(ext)
565
+ if not lang or lang not in self.ts.parsers:
566
+ return
567
+
568
+ content = path.read_bytes()
569
+ tree = self.ts.parse(content, lang)
570
+ if not tree:
571
+ return
572
+
573
+ symbols = self.ts.extract_symbols(tree, lang, file, content)
574
+ self.graph.file_symbols.setdefault(file, [])
575
+ for sym in symbols:
576
+ self.graph.symbols[sym.id] = sym
577
+ self.graph.file_symbols[file].append(sym.id)
578
+
579
+ imports = self.ts.extract_imports(tree, lang)
580
+ import_bindings = self.ts.extract_js_ts_import_bindings(content, lang, tree=tree)
581
+ import_modules = {module for module, _ in imports}
582
+ import_modules.update(binding.module for binding in import_bindings if binding.module)
583
+ self.graph.file_imports[file] = sorted(import_modules)
584
+ self.graph.file_import_bindings[file] = import_bindings
585
+ self.graph.file_exports[file] = self.ts.extract_js_ts_export_bindings(content, lang, tree=tree)
586
+ self._mark_exported_symbols(file)
587
+
588
+ self.graph.file_calls[file] = self.ts.extract_calls(tree, lang)
589
+
590
+ # 提取 HTTP 路由(Python/JS/TS/Rust)
591
+ routes = self.ts.extract_http_routes(tree, lang, file)
592
+ if routes:
593
+ self.routes.extend(routes)
594
+
595
+ # 立即释放 tree 对象以避免内存泄漏,只缓存 mtime
596
+ del tree
597
+ self._cache[file] = mtime
598
+ self.scan_stats.processed_files += 1
599
+
600
+ # 清理已消失文件的缓存
601
+ stale = [k for k in list(self._cache) if not (self.project_root / k).exists()]
602
+ for k in stale:
603
+ del self._cache[k]
604
+
605
+ def _mark_exported_symbols(self, file: str) -> None:
606
+ exported_names = {
607
+ binding.source_name
608
+ for binding in self.graph.file_exports.get(file, [])
609
+ if binding.module is None and binding.source_name and binding.source_name != "*"
610
+ }
611
+ if not exported_names:
612
+ return
613
+ for symbol_id in self.graph.file_symbols.get(file, []):
614
+ symbol = self.graph.symbols.get(symbol_id)
615
+ if symbol and symbol.name in exported_names:
616
+ symbol.visibility = "exported"
617
+
618
+ # ── 构建边 ─────────────────────────────────────────────────────────────────
619
+
620
+ def _build_edges(self) -> None:
621
+ self._resolver = ImportResolver(self.project_root, self.graph)
622
+ edge_builder = EdgeBuilder(self.graph, self._resolver)
623
+ edge_builder.build_edges()
624
+
625
+ # ── PageRank ───────────────────────────────────────────────────────────────
626
+
627
+ def _calculate_pagerank(self, damping: float = 0.85, max_iter: int = 50,
628
+ tol: float = 1e-6) -> None:
629
+ self._analyzer.calculate_pagerank(damping, max_iter, tol)
630
+
631
+ # ═══════════════════════════════════════════════════════════════════════════
632
+ # 查询接口(委托给 analyzer)
633
+ # ═══════════════════════════════════════════════════════════════════════════
634
+
635
+ def query_symbol(self, name: str) -> list[Any]:
636
+ """按名称模糊查找符号,按 PageRank 降序返回。"""
637
+ return self._analyzer.query_symbol(name)
638
+
639
+ def call_chain(self, symbol_id: str, direction: str = "both",
640
+ max_depth: int = 3) -> dict[str, list[Any]]:
641
+ """
642
+ 返回指定符号的调用链。
643
+ direction: "callers" | "callees" | "both"
644
+ """
645
+ if direction not in ("callers", "callees", "both"):
646
+ raise ValueError("direction must be 'callers', 'callees', or 'both'")
647
+ return self._analyzer.call_chain(symbol_id, direction, max_depth)
648
+
649
+ def hotspots(self, limit: int = 15) -> list[dict]:
650
+ """识别高密度文件。"""
651
+ return self._analyzer.hotspots(limit)
652
+
653
+ def entry_points(self) -> list[str]:
654
+ """识别入口文件。"""
655
+ return self._analyzer.entry_points()
656
+
657
+ def file_analysis(self) -> dict[str, dict[str, Any]]:
658
+ """分析每个文件的复杂度和连接性。"""
659
+ return self._analyzer.file_analysis()
660
+
661
+ def module_summary(self, limit: int = 8) -> list[dict[str, Any]]:
662
+ """生成模块级别的摘要。"""
663
+ return self._analyzer.module_summary(limit)
664
+
665
+ def suggested_reading_order(self, limit: int = 8) -> list[dict[str, Any]]:
666
+ """为 AI 生成推荐阅读顺序。"""
667
+ return self._analyzer.suggested_reading_order(limit)
668
+
669
+ def list_routes(self) -> list:
670
+ """返回提取到的 HTTP 路由列表。"""
671
+ return self.routes
672
+
673
+ def summary_symbols(self, limit_files: int = 6, per_file: int = 4) -> list[dict[str, Any]]:
674
+ """返回适合 overview 展示的关键实现符号。"""
675
+ return self._analyzer.summary_symbols(limit_files, per_file)
676
+
677
+ def _scan_summary_lines(self) -> list[str]:
678
+ lines = [
679
+ f"- 文件数: {self.scan_stats.processed_files}",
680
+ f"- 符号数: {len(self.graph.symbols)}",
681
+ f"- 依赖边: {sum(len(v) for v in self.graph.outgoing.values())}",
682
+ f"- 过滤路径: {self.scan_stats.filtered_path_files}",
683
+ f"- 过滤大文件: {self.scan_stats.filtered_large_files}",
684
+ ]
685
+ if self._resolver and self._resolver.import_configs:
686
+ lines.append(f"- 解析配置: {len(self._resolver.import_configs)}")
687
+ # 超时熔断提示
688
+ if self.scan_stats.timeout_triggered:
689
+ lines.append(f"- ⚠️ 扫描超时熔断: 部分文件未处理,结果不完整")
690
+ # 失败文件提示(最多显示 3 个)
691
+ if self.scan_stats.failed_files:
692
+ lines.append(f"- 处理失败: {len(self.scan_stats.failed_files)} 个文件")
693
+ for ff in self.scan_stats.failed_files[:3]:
694
+ lines.append(f" - {ff}")
695
+ return lines
696
+
697
+ # ═══════════════════════════════════════════════════════════════════════════
698
+ # AI 输出格式(委托给 repomap_ai)
699
+ # ═══════════════════════════════════════════════════════════════════════════
700
+
701
+ def render_overview(self, max_chars: int = 16000, with_heat: bool = False,
702
+ with_co_change: bool = False, granularity: str = "auto") -> str:
703
+ return render_overview_report(self, max_chars, with_heat=with_heat,
704
+ with_co_change=with_co_change,
705
+ granularity=granularity)
706
+
707
+ def render_call_chain(self, symbol_name: str, max_depth: int = 3) -> str:
708
+ return render_call_chain_report(self, symbol_name, max_depth)
709
+
710
+ def render_file_detail(self, file_path: str, max_symbols: int = 12, max_chars: int = 6000) -> str:
711
+ return render_file_detail_report(self, file_path, max_symbols=max_symbols, max_chars=max_chars)
712
+
713
+
714
+ # ═══════════════════════════════════════════════════════════════════════════════
715
+ # 向后兼容导出
716
+ # ═══════════════════════════════════════════════════════════════════════════════
717
+
718
+ # 从 parser 模块导出常量以保持兼容性
719
+ from .parser import QUERIES
720
+
721
+ __all__ = [
722
+ "DEFAULT_MAX_FILE_BYTES",
723
+ "EXT_TO_LANG",
724
+ "QUERIES",
725
+ "RepoMapEngine",
726
+ "SKIP_DIR_NAMES",
727
+ "SKIP_FILE_NAMES",
728
+ "TreeSitterAdapter",
729
+ "logger",
730
+ ]