repomap-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
repomap/cli/cli.py ADDED
@@ -0,0 +1,2475 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import hashlib
5
+ import importlib.util
6
+ import json
7
+ import os
8
+ import subprocess
9
+ import sys
10
+ import tempfile
11
+ from datetime import datetime
12
+ from pathlib import Path, PurePosixPath
13
+ from typing import Any, Sequence
14
+
15
+ from ..ai import (
16
+ _build_query_reading_order,
17
+ _get_hot_files,
18
+ _rank_symbols_for_file,
19
+ render_impact_report,
20
+ render_query_report,
21
+ render_routes_report,
22
+ render_verify_report,
23
+ )
24
+ from ..check import RepoMapChecker
25
+ from ..core import RepoMapEngine, SKIP_DIR_NAMES, SKIP_FILE_NAMES
26
+ from ..parser import EXT_TO_LANG
27
+ from .. import (
28
+ Edge,
29
+ HttpRoute,
30
+ RepoGraph,
31
+ ScanStats,
32
+ Symbol,
33
+ get_cache_paths,
34
+ get_session_cache_path,
35
+ serialize_edge,
36
+ serialize_symbol,
37
+ )
38
+ from ..toolkit import diff_project, save_cache, scan_project
39
+ from ..topic import (
40
+ FileMatch,
41
+ TestMatch,
42
+ classify_file_role,
43
+ compute_keyword_weights,
44
+ find_related_tests,
45
+ find_untested_symbols,
46
+ is_test_like_file,
47
+ split_identifier,
48
+ topic_score,
49
+ )
50
+
51
+ PACKAGE_ROOT = Path(__file__).resolve().parent
52
+ PROJECT_ROOT = PACKAGE_ROOT.parent
53
+ CLI_NAME = "repomap"
54
+
55
+ # 统一 exit code 语义
56
+ EXIT_SUCCESS = 0 # 成功,有有效输出
57
+ EXIT_ERROR = 1 # 命令执行失败
58
+ EXIT_INVALID_ARGS = 2 # 参数错误
59
+ EXIT_NO_RESULTS = 3 # 无结果(query 无匹配、routes 为空)
60
+ PYINSTALLER_BINDINGS = [
61
+ "tree_sitter",
62
+ "tree_sitter_python",
63
+ "tree_sitter_javascript",
64
+ "tree_sitter_typescript",
65
+ "tree_sitter_go",
66
+ "tree_sitter_rust",
67
+ "tree_sitter_html",
68
+ "tree_sitter_css",
69
+ "tree_sitter_json",
70
+ "repomap_lsp",
71
+ ]
72
+
73
+ _SCAN_CACHE: dict[tuple[str, int, int, str, bool], tuple[str, RepoMapEngine]] = {}
74
+ # 缓存语义变更时需要升级,避免 CLI/Binary 复用旧结果误导阅读顺序和调用链。
75
+ SESSION_CACHE_VERSION = 5
76
+ DEFAULT_OVERVIEW_MAX_CHARS = 16000
77
+ DEFAULT_QUERY_SYMBOL_MAX_CHARS = 4000
78
+ DEFAULT_CALL_CHAIN_MAX_CHARS = 4000
79
+ DEFAULT_FILE_DETAIL_MAX_CHARS = 6000
80
+ DEFAULT_FILE_DETAIL_MAX_SYMBOLS = 12
81
+ DEFAULT_OVERVIEW_JSON_HOTSPOTS = 8
82
+ DEFAULT_OVERVIEW_JSON_READING_ORDER = 6
83
+ DEFAULT_OVERVIEW_JSON_MODULES = 6
84
+ DEFAULT_OVERVIEW_JSON_SUMMARY_FILES = 4
85
+ DEFAULT_OVERVIEW_JSON_SYMBOLS_PER_FILE = 3
86
+ DEFAULT_OVERVIEW_JSON_SUPPORTING_FILES = 8
87
+
88
+
89
+ def build_parser() -> argparse.ArgumentParser:
90
+ parser = argparse.ArgumentParser(
91
+ prog=CLI_NAME,
92
+ description="Standalone RepoMap CLI. Former MCP capabilities are exposed as direct subcommands.",
93
+ )
94
+ subparsers = parser.add_subparsers(dest="command", required=True)
95
+
96
+ scan_parser = subparsers.add_parser("scan", help="Scan a repository and print the scan summary.")
97
+ _add_project_args(scan_parser)
98
+
99
+ overview_parser = subparsers.add_parser("overview", help="Scan a repository and print the overview report.")
100
+ _add_project_args(overview_parser)
101
+ overview_parser.add_argument(
102
+ "--max-chars",
103
+ type=int,
104
+ default=DEFAULT_OVERVIEW_MAX_CHARS,
105
+ help="Maximum overview size for AI-friendly output.",
106
+ )
107
+ overview_parser.add_argument("--json", action="store_true", help="Print raw JSON output.")
108
+ overview_parser.add_argument("--with-heat", action="store_true", default=False,
109
+ help="Mark files changed in the last 30 days with [HOT].")
110
+ overview_parser.add_argument("--with-co-change", action="store_true", default=False,
111
+ help="Include Git co-change coupling section; disabled by default for speed.")
112
+ overview_parser.add_argument("--granularity", choices=["full", "medium", "compact", "auto"],
113
+ default="auto",
114
+ help="Report granularity (default: auto, based on project size).")
115
+
116
+ chain_parser = subparsers.add_parser("call-chain", help="Scan a repository and print a symbol call chain.")
117
+ _add_project_args(chain_parser)
118
+ chain_parser.add_argument("--symbol", required=True, help="Symbol name to analyze.")
119
+ chain_parser.add_argument("--file-path", help="Disambiguate by relative file path.")
120
+ chain_parser.add_argument("--direction", choices=["callers", "callees", "both"], default="both")
121
+ chain_parser.add_argument("--depth", type=int, default=3, help="Traversal depth.")
122
+ chain_parser.add_argument(
123
+ "--max-chars",
124
+ type=int,
125
+ default=DEFAULT_CALL_CHAIN_MAX_CHARS,
126
+ help="Maximum text output size.",
127
+ )
128
+ chain_parser.add_argument("--json", action="store_true", help="Print raw JSON output.")
129
+
130
+ query_parser = subparsers.add_parser("query-symbol", help="Scan a repository and query matching symbols.")
131
+ _add_project_args(query_parser)
132
+ query_parser.add_argument("--symbol", required=True, help="Symbol name to search for.")
133
+ query_parser.add_argument("--file-path", help="Optional relative file path filter.")
134
+ query_parser.add_argument(
135
+ "--max-chars",
136
+ type=int,
137
+ default=DEFAULT_QUERY_SYMBOL_MAX_CHARS,
138
+ help="Maximum text output size.",
139
+ )
140
+ query_parser.add_argument("--with-lsp", action="store_true", help="Also query local LSP definition/reference evidence for the best match.")
141
+ query_parser.add_argument("--lsp-timeout", type=float, default=8.0, help="Seconds to wait for LSP responses.")
142
+
143
+ # ── 新增: query(主题关键词搜索)──────────────────────────────────────────
144
+ topic_query_parser = subparsers.add_parser("query", help="Search repository by topic keyword.")
145
+ topic_query_parser.add_argument("--project", "-p", default=None, help="Project root path. Defaults to the current working directory.")
146
+ topic_query_parser.add_argument("--query", "-q", required=True, help="Topic keyword.")
147
+ topic_query_parser.add_argument("--max-files", type=int, default=20, help="Max result files (default 20).")
148
+ topic_query_parser.add_argument("--max-symbols", type=int, default=40, help="Max result symbols (default 40).")
149
+ topic_query_parser.add_argument("--no-tests", action="store_true")
150
+ topic_query_parser.add_argument("--json", action="store_true")
151
+ topic_query_parser.add_argument("--paths", help="Limit search to comma-separated directories.")
152
+ topic_query_parser.add_argument("--exclude", help="Exclude comma-separated directories.")
153
+
154
+ # ── 新增: impact(文件级影响分析)──────────────────────────────────────────
155
+ impact_parser = subparsers.add_parser("impact", help="Analyze file-level change impact.")
156
+ impact_parser.add_argument("--project", "-p", default=None, help="Project root path. Defaults to the current working directory.")
157
+ impact_parser.add_argument("--files", required=True, nargs="+", help="Files to analyze (one or more).")
158
+ impact_parser.add_argument("--json", action="store_true")
159
+ impact_parser.add_argument("--max-files", type=int, default=20, help="Max affected files to show.")
160
+ impact_parser.add_argument("--with-symbols", action="store_true", help="Include edit-planning key symbols, read-next order, and LSP availability hint.")
161
+ impact_parser.add_argument("--depth", type=int, default=1, help="Transitive impact depth (default 1=direct, 2=one hop out).")
162
+
163
+ verify_parser = subparsers.add_parser("verify", help="Aggregate post-edit evidence before final handoff.")
164
+ verify_parser.add_argument("--project", "-p", default=None, help="Project root path. Defaults to the current working directory.")
165
+ verify_parser.add_argument("--json", action="store_true", help="Print raw JSON output.")
166
+ verify_parser.add_argument("--types", nargs="*", choices=["typescript", "rust", "python", "go", "javascript"], help="Explicit project types to check.")
167
+ verify_parser.add_argument("--max-issues", type=int, default=50, help="Maximum issues per tool.")
168
+ verify_parser.add_argument("--no-symbols", action="store_true", help="Skip scan-based symbol resolution for diagnostics.")
169
+ verify_parser.add_argument("--with-lsp", action="store_true", help="Include focused LSP diagnostics for changed files.")
170
+ verify_parser.add_argument("--no-incremental", action="store_true", help="Force full scan instead of incremental.")
171
+ verify_parser.add_argument("--lsp-timeout", type=float, default=8.0, help="Seconds to wait for LSP responses.")
172
+ verify_parser.add_argument("--lsp-max-files", type=int, default=20, help="Maximum changed files to open through LSP.")
173
+ verify_parser.add_argument("--with-diff", action="store_true", help="Include graph diff when a cache baseline exists.")
174
+ verify_parser.add_argument("--quick", action="store_true", help="Risk-only mode for current Git changes; skips compiler and LSP checks.")
175
+
176
+ file_parser = subparsers.add_parser("file-detail", help="Scan a repository and print file detail.")
177
+ _add_project_args(file_parser)
178
+ file_parser.add_argument("--file-path", required=True, help="Relative file path to inspect.")
179
+ file_parser.add_argument(
180
+ "--max-symbols",
181
+ type=int,
182
+ default=DEFAULT_FILE_DETAIL_MAX_SYMBOLS,
183
+ help="Maximum symbols to expand in text output.",
184
+ )
185
+ file_parser.add_argument(
186
+ "--max-chars",
187
+ type=int,
188
+ default=DEFAULT_FILE_DETAIL_MAX_CHARS,
189
+ help="Maximum text output size.",
190
+ )
191
+
192
+ hotspots_parser = subparsers.add_parser("hotspots", help="Scan a repository and print hotspot files.")
193
+ _add_project_args(hotspots_parser)
194
+ hotspots_parser.add_argument("--limit", type=int, default=15, help="Number of files to print.")
195
+
196
+ cache_parser = subparsers.add_parser("cache", help="Prepare a graph baseline before the target edits.")
197
+ cache_parser.add_argument("action", choices=["save"], help="Cache action. Only save is public; graph comparison reads the baseline through diff/verify --with-diff.")
198
+ cache_parser.add_argument("--project", "-p", default=None, help="Project root path. Defaults to the current working directory.")
199
+
200
+ diff_parser = subparsers.add_parser("diff", help="Advanced graph-only comparison against a baseline saved before the target edits.")
201
+ diff_parser.add_argument("--project", "-p", default=None, help="Project root path. Defaults to the current working directory.")
202
+ diff_parser.add_argument("--json", action="store_true", help="Print raw JSON output.")
203
+
204
+ git_parser = subparsers.add_parser("git-history", help="Scan a repository and inspect symbol git history.")
205
+ _add_project_args(git_parser)
206
+ git_parser.add_argument("--symbol", required=True, help="Symbol name to inspect.")
207
+ git_parser.add_argument("--file-path", help="Disambiguate by relative file path.")
208
+
209
+ refs_parser = subparsers.add_parser("refs", help="Scan a repository and analyze references.")
210
+ _add_project_args(refs_parser)
211
+ refs_parser.add_argument("--symbol", help="Optional symbol name.")
212
+ refs_parser.add_argument("--file-path", help="Disambiguate symbol analysis by relative file path.")
213
+ refs_parser.add_argument("--json", action="store_true", help="Print raw JSON output.")
214
+ refs_parser.add_argument("--with-lsp", action="store_true", help="Also query local LSP definition/reference evidence for the selected symbol.")
215
+ refs_parser.add_argument("--lsp-timeout", type=float, default=8.0, help="Seconds to wait for LSP responses.")
216
+
217
+ orphan_parser = subparsers.add_parser("orphan", help="Scan a repository and find orphaned symbols.")
218
+ _add_project_args(orphan_parser)
219
+ orphan_parser.add_argument("--json", action="store_true", help="Print raw JSON output.")
220
+ orphan_parser.add_argument("--limit", type=int, default=20, help="Max candidates per confidence tier in text mode (default 20).")
221
+ orphan_parser.add_argument("--min-confidence", type=int, default=0, help="Minimum confidence score 0-100 to include in output (default 0).")
222
+
223
+ check_parser = subparsers.add_parser("check", help="Run compiler/static analysis diagnostics.")
224
+ check_parser.add_argument("--project", "-p", default=None, help="Project root path. Defaults to the current working directory.")
225
+ check_parser.add_argument(
226
+ "--types",
227
+ nargs="*",
228
+ choices=["typescript", "rust", "python", "go", "javascript"],
229
+ help="Explicit project types to check.",
230
+ )
231
+ check_parser.add_argument("--max-issues", type=int, default=50, help="Maximum issues per tool.")
232
+ check_parser.add_argument("--since-commit", help="Only check files changed since the given commit.")
233
+ check_parser.add_argument("--modified-file", action="append", dest="modified_files", metavar="PATH", help="Explicit modified file path.")
234
+ check_parser.add_argument("--no-symbols", action="store_true", help="Skip scan-based symbol resolution.")
235
+ check_parser.add_argument("--with-lsp", action="store_true", help="Also collect diagnostics from local LSP servers for explicit files.")
236
+ check_parser.add_argument("--lsp-timeout", type=float, default=8.0, help="Seconds to wait for LSP responses.")
237
+ check_parser.add_argument("--lsp-max-files", type=int, default=20, help="Maximum explicit files to open through LSP.")
238
+
239
+ diagnostics_parser = subparsers.add_parser("diagnostics", help="Focused diagnostics for explicit files from optional evidence sources.")
240
+ diagnostics_parser.add_argument("--project", "-p", default=None, help="Project root path. Defaults to the current working directory.")
241
+ diagnostics_parser.add_argument("--source", choices=["lsp"], default="lsp", help="Diagnostics source.")
242
+ diagnostics_parser.add_argument("--files", nargs="+", required=True, help="Project files to check with the diagnostics source.")
243
+ diagnostics_parser.add_argument("--json", action="store_true", help="Print raw JSON output.")
244
+ diagnostics_parser.add_argument("--lsp-timeout", type=float, default=8.0, help="Seconds to wait for LSP responses.")
245
+ diagnostics_parser.add_argument("--lsp-max-files", type=int, default=20, help="Maximum files to open through LSP.")
246
+
247
+ lsp_parser = subparsers.add_parser("lsp", help="Inspect local LSP server availability.")
248
+ lsp_subparsers = lsp_parser.add_subparsers(dest="lsp_command", required=True)
249
+ lsp_doctor_parser = lsp_subparsers.add_parser("doctor", help="Detect local LSP servers without starting analysis.")
250
+ lsp_doctor_parser.add_argument("--project", "-p", default=None, help="Project root path. Defaults to the current working directory.")
251
+ lsp_doctor_parser.add_argument("--json", action="store_true", help="Print raw JSON output.")
252
+
253
+ routes_parser = subparsers.add_parser("routes", help="Extract direct HTTP/API route inventory.")
254
+ _add_project_args(routes_parser)
255
+ routes_parser.add_argument("--json", action="store_true", help="Print raw JSON output.")
256
+
257
+ doctor_parser = subparsers.add_parser("doctor", help="Validate runtime and build prerequisites.")
258
+ doctor_parser.add_argument("--project", "-p", default=None, help="Project root path. Defaults to the current working directory.")
259
+
260
+ build_parser_cmd = subparsers.add_parser("build-binary", help="Build a one-file executable with PyInstaller.")
261
+ build_parser_cmd.add_argument("--output", default="dist", help="Directory for the final binary.")
262
+ build_parser_cmd.add_argument("--name", default=CLI_NAME, help="Binary file name.")
263
+
264
+ return parser
265
+
266
+
267
+ def _add_project_args(parser: argparse.ArgumentParser) -> None:
268
+ parser.add_argument("--project", "-p", default=None, help="Project root path. Defaults to the current working directory.")
269
+ parser.add_argument("--max-files", type=int, default=8000, help="Maximum number of files to scan.")
270
+
271
+
272
+ def _prepare_argv(argv: Sequence[str] | None) -> list[str] | None:
273
+ if argv is None:
274
+ raw_args = sys.argv[1:]
275
+ else:
276
+ raw_args = list(argv)
277
+ prepared: list[str] = []
278
+ i = 0
279
+ while i < len(raw_args):
280
+ item = raw_args[i]
281
+ if item == "--modified-file" and i + 1 < len(raw_args):
282
+ prepared.append(f"--modified-file={raw_args[i + 1]}")
283
+ i += 2
284
+ continue
285
+ prepared.append(item)
286
+ i += 1
287
+ return prepared
288
+
289
+
290
+ def main(argv: Sequence[str] | None = None) -> int:
291
+ parser = build_parser()
292
+ try:
293
+ args = parser.parse_args(_prepare_argv(argv))
294
+ except SystemExit as exc:
295
+ return int(exc.code or 0)
296
+
297
+ command = args.command
298
+ if command == "scan":
299
+ return run_scan(args.project, args.max_files)
300
+ if command == "overview":
301
+ return run_overview(args.project, args.max_files, args.max_chars, args.json,
302
+ with_heat=getattr(args, "with_heat", False),
303
+ with_co_change=getattr(args, "with_co_change", False),
304
+ granularity=getattr(args, "granularity", "auto"))
305
+ if command == "call-chain":
306
+ return run_call_chain(
307
+ args.project,
308
+ args.max_files,
309
+ args.symbol,
310
+ args.file_path,
311
+ args.direction,
312
+ args.depth,
313
+ args.max_chars,
314
+ args.json,
315
+ )
316
+ if command == "query-symbol":
317
+ return run_query_symbol(args.project, args.max_files, args.symbol, args.file_path, args.max_chars, args.with_lsp, args.lsp_timeout)
318
+ if command == "query":
319
+ return run_query(
320
+ args.project, 8000, args.query,
321
+ getattr(args, "max_files", 20), getattr(args, "max_symbols", 40),
322
+ args.no_tests, args.json, args.paths, args.exclude,
323
+ )
324
+ if command == "impact":
325
+ return run_impact(
326
+ args.project, 8000, args.files,
327
+ getattr(args, "max_files", 20), args.json, getattr(args, "with_symbols", False),
328
+ depth=getattr(args, "depth", 1),
329
+ incremental=not getattr(args, "no_incremental", False),
330
+ )
331
+ if command == "verify":
332
+ return run_verify(
333
+ project=args.project,
334
+ as_json=args.json,
335
+ types=args.types,
336
+ max_issues=args.max_issues,
337
+ resolve_symbols=not args.no_symbols,
338
+ with_lsp=args.with_lsp,
339
+ lsp_timeout=args.lsp_timeout,
340
+ lsp_max_files=args.lsp_max_files,
341
+ with_diff=args.with_diff,
342
+ quick=args.quick,
343
+ incremental=not getattr(args, "no_incremental", False),
344
+ )
345
+ if command == "file-detail":
346
+ return run_file_detail(args.project, args.max_files, args.file_path, args.max_symbols, args.max_chars)
347
+ if command == "hotspots":
348
+ return run_hotspots(args.project, args.max_files, args.limit)
349
+ if command == "cache":
350
+ return run_cache(args.project, args.action)
351
+ if command == "diff":
352
+ return run_diff(args.project, args.json)
353
+ if command == "git-history":
354
+ return run_git_history(args.project, args.max_files, args.symbol, args.file_path)
355
+ if command == "refs":
356
+ return run_refs(args.project, args.max_files, args.symbol, args.file_path, args.json, args.with_lsp, args.lsp_timeout)
357
+ if command == "orphan":
358
+ return run_orphan(args.project, args.max_files, args.json, args.limit, args.min_confidence)
359
+ if command == "check":
360
+ return run_check(
361
+ project=args.project,
362
+ types=args.types,
363
+ max_issues=args.max_issues,
364
+ since_commit=args.since_commit,
365
+ modified_files=args.modified_files,
366
+ resolve_symbols=not args.no_symbols,
367
+ with_lsp=args.with_lsp,
368
+ lsp_timeout=args.lsp_timeout,
369
+ lsp_max_files=args.lsp_max_files,
370
+ )
371
+ if command == "diagnostics":
372
+ return run_diagnostics(args.project, args.source, args.files, args.json, args.lsp_timeout, args.lsp_max_files)
373
+ if command == "lsp":
374
+ if args.lsp_command == "doctor":
375
+ return run_lsp_doctor(args.project, args.json)
376
+ parser.error(f"unknown lsp command: {args.lsp_command}")
377
+ return 2
378
+ if command == "routes":
379
+ return run_routes(args.project, args.max_files, args.json)
380
+ if command == "doctor":
381
+ return run_doctor(args.project)
382
+ if command == "build-binary":
383
+ return run_build_binary(args.output, args.name)
384
+ parser.error(f"unknown command: {command}")
385
+ return 2
386
+
387
+
388
+ def _resolve_project(project: str | None) -> str:
389
+ project_path = Path.cwd().resolve() if project is None else Path(project).expanduser().resolve()
390
+ if not project_path.is_dir():
391
+ raise ValueError(f"project path is not a directory: {project_path}")
392
+ if project is None and project_path == Path.home().resolve():
393
+ print(
394
+ f"[{CLI_NAME}] warning: default project root is your home directory: {project_path}. "
395
+ "Run from the intended project directory or pass --project explicitly.",
396
+ file=sys.stderr,
397
+ )
398
+ return str(project_path)
399
+
400
+
401
+ def _normalize_project_relative_path(project_root: str | Path, value: str, *, must_exist: bool = False) -> str:
402
+ raw = value.strip()
403
+ if not raw:
404
+ raise ValueError("path is empty")
405
+ if raw.startswith("-"):
406
+ raise ValueError(f"unsafe path starts with '-': {value}")
407
+ project_path = Path(project_root).resolve()
408
+ input_path = Path(raw).expanduser()
409
+ abs_path = input_path.resolve() if input_path.is_absolute() else (project_path / input_path).resolve()
410
+ try:
411
+ rel = abs_path.relative_to(project_path)
412
+ except ValueError:
413
+ raise ValueError(f"path is outside project: {value}") from None
414
+ if must_exist and not abs_path.exists():
415
+ raise ValueError(f"path does not exist: {value}")
416
+ rel_path = rel.as_posix()
417
+ if rel_path in ("", "."):
418
+ raise ValueError(f"path must reference a project file or subdirectory: {value}")
419
+ return rel_path
420
+
421
+
422
+ def _normalize_project_relative_paths(project_root: str | Path, values: list[str], *, must_exist: bool = False) -> list[str]:
423
+ return [_normalize_project_relative_path(project_root, value, must_exist=must_exist) for value in values]
424
+
425
+
426
+ def _normalize_path_prefix(project_root: str | Path, prefix: str) -> str:
427
+ return _normalize_project_relative_path(project_root, prefix.rstrip("/"), must_exist=False)
428
+
429
+
430
+ def _path_matches_prefix(file_path: str, prefix: str) -> bool:
431
+ return file_path == prefix or file_path.startswith(prefix.rstrip("/") + "/")
432
+
433
+
434
+ def _read_max_file_bytes() -> int:
435
+ raw = os.getenv("REPOMAP_MAX_FILE_BYTES", str(512 * 1024))
436
+ try:
437
+ value = int(raw)
438
+ except ValueError:
439
+ return 512 * 1024
440
+ return max(0, value)
441
+
442
+
443
+ def _iter_source_files(project_root: Path) -> list[str]:
444
+ files: list[str] = []
445
+ for root, dir_names, file_names in os.walk(project_root):
446
+ dir_names[:] = [name for name in dir_names if name not in SKIP_DIR_NAMES]
447
+ rel_root = Path(root).relative_to(project_root)
448
+ for file_name in file_names:
449
+ suffix = Path(file_name).suffix.lower()
450
+ if suffix not in EXT_TO_LANG:
451
+ continue
452
+ if file_name in SKIP_FILE_NAMES or file_name.endswith(".min.js"):
453
+ continue
454
+ rel_path = (rel_root / file_name).as_posix() if str(rel_root) != "." else file_name
455
+ if any(part in SKIP_DIR_NAMES for part in Path(rel_path).parts):
456
+ continue
457
+ files.append(rel_path)
458
+ return sorted(files)
459
+
460
+
461
+ def _scan_fingerprint(project_root: str, max_files: int) -> str:
462
+ root = Path(project_root)
463
+ max_file_bytes = _read_max_file_bytes()
464
+ scan_large_files = os.getenv("REPOMAP_SCAN_LARGE_FILES", "0")
465
+ digest = hashlib.sha256()
466
+ digest.update(project_root.encode("utf-8"))
467
+ digest.update(str(max_files).encode("utf-8"))
468
+ digest.update(str(max_file_bytes).encode("utf-8"))
469
+ digest.update(scan_large_files.encode("utf-8"))
470
+
471
+ selected = _iter_source_files(root)[:max_files]
472
+ digest.update(str(len(selected)).encode("utf-8"))
473
+ for rel_path in selected:
474
+ path = root / rel_path
475
+ try:
476
+ stat = path.stat()
477
+ except OSError:
478
+ digest.update(f"{rel_path}:missing".encode("utf-8"))
479
+ continue
480
+ if scan_large_files != "1" and stat.st_size > max_file_bytes:
481
+ digest.update(f"{rel_path}:skip:{stat.st_size}".encode("utf-8"))
482
+ continue
483
+ digest.update(rel_path.encode("utf-8"))
484
+ digest.update(str(stat.st_mtime_ns).encode("utf-8"))
485
+ digest.update(str(stat.st_size).encode("utf-8"))
486
+ return digest.hexdigest()
487
+
488
+
489
+ def _scan_engine(project: str | None, max_files: int, incremental: bool = False) -> RepoMapEngine:
490
+ resolved_project = _resolve_project(project)
491
+ cache_key = (
492
+ resolved_project,
493
+ max_files,
494
+ _read_max_file_bytes(),
495
+ os.getenv("REPOMAP_SCAN_LARGE_FILES", "0"),
496
+ incremental,
497
+ )
498
+ fingerprint = _scan_fingerprint(resolved_project, max_files)
499
+ cached = _SCAN_CACHE.get(cache_key)
500
+ if cached and cached[0] == fingerprint:
501
+ return cached[1]
502
+
503
+ session_engine = _load_session_engine(resolved_project, fingerprint)
504
+ if session_engine is not None:
505
+ _SCAN_CACHE[cache_key] = (fingerprint, session_engine)
506
+ print(f"[{CLI_NAME}] 从磁盘恢复会话缓存", file=sys.stderr)
507
+ return session_engine
508
+
509
+ engine = RepoMapEngine(resolved_project)
510
+ engine.scan(max_files=max_files, incremental=incremental)
511
+ _save_session_engine(resolved_project, fingerprint, engine)
512
+ _SCAN_CACHE[cache_key] = (fingerprint, engine)
513
+ return engine
514
+
515
+
516
+ def _engine_to_session_payload(project_root: str, fingerprint: str, engine: RepoMapEngine) -> dict[str, Any]:
517
+ symbols = [serialize_symbol(symbol) for symbol in engine.graph.symbols.values()]
518
+ outgoing = {
519
+ source_id: [serialize_edge(edge) for edge in edges]
520
+ for source_id, edges in engine.graph.outgoing.items()
521
+ if edges
522
+ }
523
+ return {
524
+ "version": SESSION_CACHE_VERSION,
525
+ "project_root": project_root,
526
+ "fingerprint": fingerprint,
527
+ "scan_state": engine.scan_state,
528
+ "scan_stats": {
529
+ "listed_source_files": engine.scan_stats.listed_source_files,
530
+ "selected_source_files": engine.scan_stats.selected_source_files,
531
+ "processed_files": engine.scan_stats.processed_files,
532
+ "filtered_path_files": engine.scan_stats.filtered_path_files,
533
+ "filtered_large_files": engine.scan_stats.filtered_large_files,
534
+ "truncated_files": engine.scan_stats.truncated_files,
535
+ "failed_files": list(engine.scan_stats.failed_files),
536
+ "scan_duration_ms": engine.scan_stats.scan_duration_ms,
537
+ "timeout_triggered": engine.scan_stats.timeout_triggered,
538
+ "skipped_files": engine.scan_stats.skipped_files,
539
+ },
540
+ "symbols": symbols,
541
+ "outgoing": outgoing,
542
+ "file_symbols": {
543
+ file_path: list(symbol_ids)
544
+ for file_path, symbol_ids in engine.graph.file_symbols.items()
545
+ },
546
+ "file_imports": {
547
+ file_path: list(imports)
548
+ for file_path, imports in engine.graph.file_imports.items()
549
+ },
550
+ "routes": [
551
+ _route_payload(r)
552
+ for r in engine.routes
553
+ ],
554
+ }
555
+
556
+
557
+ def _restore_engine_from_session_payload(payload: dict[str, Any]) -> RepoMapEngine | None:
558
+ if payload.get("version") != SESSION_CACHE_VERSION:
559
+ return None
560
+ project_root = payload.get("project_root")
561
+ if not project_root:
562
+ return None
563
+
564
+ engine = RepoMapEngine(project_root)
565
+ graph = RepoGraph()
566
+
567
+ for row in payload.get("symbols", []):
568
+ symbol = Symbol(
569
+ id=row["id"],
570
+ name=row["name"],
571
+ kind=row["kind"],
572
+ file=row["file"],
573
+ line=row["line"],
574
+ end_line=row.get("end_line", 0),
575
+ col=row.get("col", 0),
576
+ visibility=row.get("visibility", "private"),
577
+ docstring=row.get("docstring", ""),
578
+ signature=row.get("signature", ""),
579
+ pagerank=row.get("pagerank", 0.0),
580
+ )
581
+ graph.symbols[symbol.id] = symbol
582
+
583
+ for source_id, rows in payload.get("outgoing", {}).items():
584
+ for row in rows:
585
+ edge = Edge(
586
+ source=row["source"],
587
+ target=row["target"],
588
+ weight=row.get("weight", 1.0),
589
+ kind=row.get("kind", "call"),
590
+ )
591
+ graph.outgoing[source_id].append(edge)
592
+ graph.incoming[edge.target].append(edge)
593
+
594
+ for file_path, symbol_ids in payload.get("file_symbols", {}).items():
595
+ graph.file_symbols[file_path].extend(symbol_ids)
596
+
597
+ for file_path, imports in payload.get("file_imports", {}).items():
598
+ graph.file_imports[file_path].extend(imports)
599
+
600
+ stats_row = payload.get("scan_stats", {})
601
+ engine.graph = graph
602
+ engine.scan_stats = ScanStats(
603
+ listed_source_files=stats_row.get("listed_source_files", 0),
604
+ selected_source_files=stats_row.get("selected_source_files", 0),
605
+ processed_files=stats_row.get("processed_files", 0),
606
+ filtered_path_files=stats_row.get("filtered_path_files", 0),
607
+ filtered_large_files=stats_row.get("filtered_large_files", 0),
608
+ truncated_files=stats_row.get("truncated_files", 0),
609
+ failed_files=list(stats_row.get("failed_files", [])),
610
+ scan_duration_ms=stats_row.get("scan_duration_ms", 0),
611
+ timeout_triggered=bool(stats_row.get("timeout_triggered", False)),
612
+ skipped_files=stats_row.get("skipped_files", 0),
613
+ )
614
+ engine.scan_state = payload.get("scan_state", "scanned")
615
+ engine._analyzer = type(engine._analyzer)(engine.graph)
616
+ # 恢复路由数据
617
+ engine.routes = [
618
+ HttpRoute(**r) for r in payload.get("routes", [])
619
+ ]
620
+ return engine if engine.scan_state == "scanned" else None
621
+
622
+
623
+ def _load_session_engine(project_root: str, fingerprint: str) -> RepoMapEngine | None:
624
+ cache_path = get_session_cache_path(project_root)
625
+ if not cache_path.exists():
626
+ return None
627
+ try:
628
+ payload = json.loads(cache_path.read_text(encoding="utf-8"))
629
+ except Exception:
630
+ return None
631
+ if payload.get("project_root") != project_root:
632
+ return None
633
+ if payload.get("fingerprint") != fingerprint:
634
+ return None
635
+ return _restore_engine_from_session_payload(payload)
636
+
637
+
638
+ def _save_session_engine(project_root: str, fingerprint: str, engine: RepoMapEngine) -> None:
639
+ if engine.scan_state != "scanned":
640
+ return
641
+ cache_path = get_session_cache_path(project_root)
642
+ cache_path.parent.mkdir(parents=True, exist_ok=True)
643
+ payload = _engine_to_session_payload(project_root, fingerprint, engine)
644
+ try:
645
+ with tempfile.NamedTemporaryFile(
646
+ mode="w",
647
+ encoding="utf-8",
648
+ dir=cache_path.parent,
649
+ prefix="session_scan.",
650
+ suffix=".tmp",
651
+ delete=False,
652
+ ) as handle:
653
+ json.dump(payload, handle, ensure_ascii=False, indent=2)
654
+ tmp_path = Path(handle.name)
655
+ tmp_path.replace(cache_path)
656
+ except Exception:
657
+ try:
658
+ if "tmp_path" in locals() and tmp_path.exists():
659
+ tmp_path.unlink()
660
+ except OSError:
661
+ pass
662
+
663
+
664
+ def _select_symbol_match(
665
+ engine: RepoMapEngine,
666
+ symbol: str,
667
+ *,
668
+ file_path: str | None = None,
669
+ ) -> tuple[Any | None, str | None]:
670
+ matches = engine.query_symbol(symbol)
671
+ if not matches:
672
+ return None, f"> 未找到符号 `{symbol}`"
673
+
674
+ exact_matches = [item for item in matches if item.name == symbol]
675
+ candidates = exact_matches or matches
676
+
677
+ if file_path:
678
+ filtered = [item for item in candidates if item.file == file_path]
679
+ if not filtered:
680
+ return None, f"> 未找到符号 `{symbol}` 在 `{file_path}` 中的匹配"
681
+ candidates = filtered
682
+
683
+ if len(candidates) == 1:
684
+ return candidates[0], None
685
+
686
+ lines = [f"> 符号 `{symbol}` 存在多个候选,请用 `--file-path` 指定目标文件:"]
687
+ for item in candidates[:10]:
688
+ lines.append(f"- `{item.file}:{item.line}` ({item.kind})")
689
+ if len(candidates) > 10:
690
+ lines.append(f"- ...还有 {len(candidates) - 10} 个候选")
691
+ lines.append(f"\n提示: 使用 `--file-path <file>` 参数来指定目标文件,例如:")
692
+ lines.append(f" repomap call-chain --symbol {symbol} --file-path {candidates[0].file}")
693
+ return None, "\n".join(lines)
694
+
695
+
696
+ def _group_symbol_matches(results: list[Any], symbol: str) -> tuple[list[Any], list[Any]]:
697
+ exact = [item for item in results if item.name == symbol]
698
+ fuzzy = [item for item in results if item.name != symbol]
699
+ return exact, fuzzy
700
+
701
+
702
+ def _render_selected_call_chain(engine: RepoMapEngine, symbol: Any, depth: int) -> str:
703
+ chain = engine.call_chain(symbol.id, "both", depth)
704
+ lines = [
705
+ f"## 调用链 — `{symbol.name}`\n",
706
+ f"- **类型**: {symbol.kind}",
707
+ f"- **位置**: `{symbol.file}:{symbol.line}`",
708
+ f"- **重要性**: PR={symbol.pagerank * 1000:.1f}",
709
+ ]
710
+ if symbol.signature:
711
+ lines.append(f"- **签名**: `{symbol.signature}`")
712
+ lines.append("")
713
+
714
+ callers = chain["callers"]
715
+ lines.append(f"### 被以下符号调用({len(callers)})\n")
716
+ if callers:
717
+ for caller in callers[:20]:
718
+ lines.append(f"- `{caller.name}` ({caller.kind}) — `{caller.file}:{caller.line}`")
719
+ if len(callers) > 20:
720
+ lines.append(f"- …还有 {len(callers) - 20} 个")
721
+ else:
722
+ lines.append("- (无,可能是入口点)")
723
+
724
+ callees = chain["callees"]
725
+ lines.append(f"\n### 调用了以下符号({len(callees)})\n")
726
+ if callees:
727
+ for callee in callees[:20]:
728
+ lines.append(f"- `{callee.name}` ({callee.kind}) — `{callee.file}:{callee.line}`")
729
+ if len(callees) > 20:
730
+ lines.append(f"- …还有 {len(callees) - 20} 个")
731
+ else:
732
+ lines.append("- (无,叶子函数)")
733
+
734
+ return "\n".join(lines)
735
+
736
+
737
+ def _truncate_output(text: str, max_chars: int) -> str:
738
+ if max_chars <= 0 or len(text) <= max_chars:
739
+ return text
740
+ return text[:max_chars] + "\n\n…(超出字符限制,已截断)"
741
+
742
+
743
+ def run_scan(project: str, max_files: int) -> int:
744
+ try:
745
+ engine = _scan_engine(project, max_files)
746
+ hot = engine.hotspots(5)
747
+ entry_points = engine.entry_points()
748
+ lines = [
749
+ f"✅ 扫描完成 — `{engine.project_root}`\n",
750
+ *engine._scan_summary_lines(),
751
+ f"- 入口点: {', '.join(entry_points) or '未检测到'}",
752
+ "\n**高密度文件(Top 5)**:",
753
+ ]
754
+ if engine.scan_stats.truncated_files:
755
+ lines.insert(6, f"- max_files 截断: {engine.scan_stats.truncated_files}")
756
+ for item in hot:
757
+ lines.append(f" - `{item['file']}` — {item['symbol_count']} symbols ({item['risk']} risk)")
758
+ lines.append("\n> 建议下一步调用 `repomap overview --project <path>` 获取完整项目地图。")
759
+ print("\n".join(lines))
760
+ return 0
761
+ except Exception as exc:
762
+ print(f"[{CLI_NAME}] scan failed: {exc}", file=sys.stderr)
763
+ return 1
764
+
765
+
766
+ def _route_payload(route: HttpRoute) -> dict[str, Any]:
767
+ return {
768
+ "method": route.method,
769
+ "path": route.path,
770
+ "handler": route.handler,
771
+ "file": route.file,
772
+ "line": route.line,
773
+ "framework": route.framework,
774
+ }
775
+
776
+
777
+
778
+ def _scan_stats_payload(engine: RepoMapEngine) -> dict[str, Any]:
779
+ return {
780
+ "listed_source_files": engine.scan_stats.listed_source_files,
781
+ "selected_source_files": engine.scan_stats.selected_source_files,
782
+ "processed_files": engine.scan_stats.processed_files,
783
+ "filtered_path_files": engine.scan_stats.filtered_path_files,
784
+ "filtered_large_files": engine.scan_stats.filtered_large_files,
785
+ "truncated_files": engine.scan_stats.truncated_files,
786
+ "failed_files": list(engine.scan_stats.failed_files),
787
+ "scan_duration_ms": engine.scan_stats.scan_duration_ms,
788
+ "timeout_triggered": engine.scan_stats.timeout_triggered,
789
+ "symbol_count": len(engine.graph.symbols),
790
+ "edge_count": sum(len(edges) for edges in engine.graph.outgoing.values()),
791
+ }
792
+
793
+
794
+ def run_overview(project: str, max_files: int, max_chars: int, as_json: bool,
795
+ with_heat: bool = False, with_co_change: bool = False,
796
+ granularity: str = "auto") -> int:
797
+ try:
798
+ engine = _scan_engine(project, max_files)
799
+
800
+ if as_json:
801
+ payload = {
802
+ "project_root": str(engine.project_root),
803
+ "scan_stats": _scan_stats_payload(engine),
804
+ "entry_points": engine.entry_points(),
805
+ "hotspots": engine.hotspots(DEFAULT_OVERVIEW_JSON_HOTSPOTS),
806
+ "reading_order": engine.suggested_reading_order(DEFAULT_OVERVIEW_JSON_READING_ORDER),
807
+ "modules": engine.module_summary(DEFAULT_OVERVIEW_JSON_MODULES),
808
+ "summary_symbols": engine.summary_symbols(
809
+ DEFAULT_OVERVIEW_JSON_SUMMARY_FILES,
810
+ DEFAULT_OVERVIEW_JSON_SYMBOLS_PER_FILE,
811
+ ),
812
+ "supporting_files": engine.supporting_files(DEFAULT_OVERVIEW_JSON_SUPPORTING_FILES),
813
+ "hot_files": list(_get_hot_files(str(engine.project_root))) if with_heat else [],
814
+ }
815
+ print(json.dumps(payload, ensure_ascii=False, indent=2))
816
+ return 0
817
+ print(engine.render_overview(max_chars, with_heat=with_heat, with_co_change=with_co_change, granularity=granularity))
818
+ return 0
819
+ except Exception as exc:
820
+ print(f"[{CLI_NAME}] overview failed: {exc}", file=sys.stderr)
821
+ return 1
822
+
823
+
824
+ def run_call_chain(
825
+ project: str,
826
+ max_files: int,
827
+ symbol: str,
828
+ file_path: str | None,
829
+ direction: str,
830
+ depth: int,
831
+ max_chars: int,
832
+ as_json: bool,
833
+ ) -> int:
834
+ try:
835
+ engine = _scan_engine(project, max_files)
836
+ selected, error = _select_symbol_match(engine, symbol, file_path=file_path)
837
+ if error:
838
+ print(error, file=sys.stderr)
839
+ return 1
840
+ assert selected is not None
841
+ if as_json:
842
+ chain = engine.call_chain(selected.id, "both", depth)
843
+ payload = {
844
+ "symbol": {
845
+ "id": selected.id,
846
+ "name": selected.name,
847
+ "kind": selected.kind,
848
+ "file": selected.file,
849
+ "line": selected.line,
850
+ "signature": selected.signature,
851
+ "pagerank": selected.pagerank,
852
+ },
853
+ "direction": direction,
854
+ "depth": depth,
855
+ "callers": [_format_symbol_ref(engine, item.id) for item in chain["callers"]],
856
+ "callees": [_format_symbol_ref(engine, item.id) for item in chain["callees"]],
857
+ }
858
+ print(json.dumps(payload, ensure_ascii=False, indent=2))
859
+ return 0
860
+ if direction != "both":
861
+ data = engine.call_chain(selected.id, direction, depth)
862
+ lines = [f"## 调用链 — `{selected.name}`\n"]
863
+ for item in data[direction]:
864
+ lines.append(f"- `{item.name}` ({item.file}:{item.line})")
865
+ print(_truncate_output("\n".join(lines), max_chars))
866
+ return 0
867
+ print(_truncate_output(_render_selected_call_chain(engine, selected, depth), max_chars))
868
+ return 0
869
+ except Exception as exc:
870
+ print(f"[{CLI_NAME}] call-chain failed: {exc}", file=sys.stderr)
871
+ return 1
872
+
873
+
874
+ def _collect_lsp_evidence_for_symbol(engine: RepoMapEngine, symbol: Any, timeout: float) -> dict[str, Any]:
875
+ from ..lsp import collect_lsp_symbol_evidence, run_result_to_dict
876
+
877
+ run = collect_lsp_symbol_evidence(
878
+ engine.project_root,
879
+ symbol.file,
880
+ symbol.line,
881
+ symbol.name,
882
+ timeout=timeout,
883
+ )
884
+ return run_result_to_dict(run)
885
+
886
+
887
+ def _format_lsp_evidence(evidence: dict[str, Any]) -> list[str]:
888
+ lines = ["", "### LSP evidence", ""]
889
+ lines.append(f"- Status: {evidence.get('status')}")
890
+ if evidence.get("server"):
891
+ lines.append(f"- Server: {evidence['server']}")
892
+ if evidence.get("reason"):
893
+ lines.append(f"- Reason: {evidence['reason']}")
894
+ definitions = evidence.get("definitions", [])
895
+ references = evidence.get("references", [])
896
+ lines.append(f"- Definitions: {len(definitions)}")
897
+ for item in definitions[:10]:
898
+ lines.append(f" - `{item['file']}:{item['line']}:{item['col']}`")
899
+ lines.append(f"- References: {len(references)}")
900
+ for item in references[:20]:
901
+ lines.append(f" - `{item['file']}:{item['line']}:{item['col']}`")
902
+ return lines
903
+
904
+
905
+ def run_query_symbol(
906
+ project: str,
907
+ max_files: int,
908
+ symbol: str,
909
+ file_path: str | None,
910
+ max_chars: int,
911
+ with_lsp: bool = False,
912
+ lsp_timeout: float = 8.0,
913
+ ) -> int:
914
+ try:
915
+ engine = _scan_engine(project, max_files)
916
+ results = engine.query_symbol(symbol)
917
+ if file_path:
918
+ results = [item for item in results if item.file == file_path]
919
+ if not results:
920
+ print(f"> 未找到匹配 `{symbol}` 的符号", file=sys.stderr)
921
+ return EXIT_NO_RESULTS
922
+ exact_matches, fuzzy_matches = _group_symbol_matches(results, symbol)
923
+
924
+ lines = [f"找到 {len(results)} 个匹配结果。\n"]
925
+ if file_path:
926
+ lines.append(f"已按文件过滤: `{file_path}`\n")
927
+ if len(exact_matches) > 1 and not file_path:
928
+ lines.append(f"精确匹配有 {len(exact_matches)} 个候选,建议加 `--file-path` 锁定目标文件。\n")
929
+
930
+ if exact_matches:
931
+ lines.append(f"## 精确匹配 `{symbol}` ({len(exact_matches)})\n")
932
+ for item in exact_matches[:10]:
933
+ pr = item.pagerank * 1000
934
+ lines.append(f"- **{item.name}** ({item.kind}) `{item.file}:{item.line}` PR={pr:.1f}")
935
+ if item.signature:
936
+ lines.append(f" - sig: `{item.signature}`")
937
+
938
+ if fuzzy_matches:
939
+ lines.append(f"\n## 模糊匹配 ({len(fuzzy_matches)})\n")
940
+ for item in fuzzy_matches[:10]:
941
+ pr = item.pagerank * 1000
942
+ lines.append(f"- **{item.name}** ({item.kind}) `{item.file}:{item.line}` PR={pr:.1f}")
943
+ if item.signature:
944
+ lines.append(f" - sig: `{item.signature}`")
945
+
946
+ if len(results) > 10 and (len(exact_matches) > 10 or len(fuzzy_matches) > 10):
947
+ lines.append("\n> 结果较多,建议补 `--file-path` 缩小范围。")
948
+ if with_lsp:
949
+ selected = (exact_matches or results)[0]
950
+ lines.extend(_format_lsp_evidence(_collect_lsp_evidence_for_symbol(engine, selected, lsp_timeout)))
951
+ print(_truncate_output("\n".join(lines), max_chars))
952
+ return 0
953
+ except Exception as exc:
954
+ print(f"[{CLI_NAME}] query-symbol failed: {exc}", file=sys.stderr)
955
+ return 1
956
+
957
+
958
+ def run_file_detail(project: str, max_files: int, file_path: str, max_symbols: int, max_chars: int) -> int:
959
+ try:
960
+ engine = _scan_engine(project, max_files)
961
+ normalized_file_path = _normalize_project_relative_path(engine.project_root, file_path, must_exist=True)
962
+
963
+ # 动态调整 max_symbols:如果用户未指定(使用默认值),根据文件符号数量自动调整
964
+ if max_symbols == DEFAULT_FILE_DETAIL_MAX_SYMBOLS:
965
+ file_symbol_count = len(engine.graph.file_symbols.get(normalized_file_path, []))
966
+ if file_symbol_count > 50:
967
+ max_symbols = min(file_symbol_count, 50) # 大文件最多显示 50 个符号
968
+ elif file_symbol_count > 20:
969
+ max_symbols = file_symbol_count # 中等文件显示所有符号
970
+
971
+ print(engine.render_file_detail(normalized_file_path, max_symbols=max_symbols, max_chars=max_chars))
972
+ return 0
973
+ except Exception as exc:
974
+ print(f"[{CLI_NAME}] file-detail failed: {exc}", file=sys.stderr)
975
+ return 1
976
+
977
+
978
+ def run_routes(project: str, max_files: int, as_json: bool) -> int:
979
+ try:
980
+ engine = _scan_engine(project, max_files)
981
+ if as_json:
982
+ payload = {
983
+ "command": "routes",
984
+ "project": str(engine.project_root),
985
+ "scanStats": _scan_stats_payload(engine),
986
+ "routes": [_route_payload(route) for route in engine.list_routes()],
987
+ }
988
+ print(json.dumps(payload, ensure_ascii=False, indent=2))
989
+ return 0
990
+ print(render_routes_report(engine))
991
+ return 0
992
+ except Exception as exc:
993
+ print(f"[{CLI_NAME}] routes failed: {exc}", file=sys.stderr)
994
+ return 1
995
+
996
+
997
+ def run_hotspots(project: str, max_files: int, limit: int) -> int:
998
+ try:
999
+ engine = _scan_engine(project, max_files)
1000
+ hotspots = engine.hotspots(limit)
1001
+ risk_mark = {"high": "🔴", "medium": "🟡", "low": "🟢"}
1002
+ lines = ["## 高密度文件(符号数排名)\n"]
1003
+ for index, item in enumerate(hotspots, 1):
1004
+ lines.append(f"{index}. {risk_mark[item['risk']]} `{item['file']}` — **{item['symbol_count']}** 个符号")
1005
+ print("\n".join(lines))
1006
+ return 0
1007
+ except Exception as exc:
1008
+ print(f"[{CLI_NAME}] hotspots failed: {exc}", file=sys.stderr)
1009
+ return 1
1010
+
1011
+
1012
+ def run_cache(project: str, action: str) -> int:
1013
+ project_path = _resolve_project(project)
1014
+ if action != "save":
1015
+ print(f"[{CLI_NAME}] unsupported cache action: {action}", file=sys.stderr)
1016
+ return 2
1017
+ try:
1018
+ symbols, edges = scan_project(project_path)
1019
+ cache_path = save_cache(project_path, symbols, edges)
1020
+ print(
1021
+ "✅ Graph baseline saved for a future comparison\n"
1022
+ f"- Path: `{cache_path}`\n"
1023
+ f"- Symbols: {len(symbols)}\n"
1024
+ f"- Edges: {len(edges)}\n"
1025
+ "- Use before the target edits; saving after edits cannot prove those edits are safe."
1026
+ )
1027
+ return 0
1028
+ except Exception as exc:
1029
+ print(f"[{CLI_NAME}] cache save failed: {exc}", file=sys.stderr)
1030
+ return 1
1031
+
1032
+
1033
+
1034
+ def run_diff(project: str, as_json: bool) -> int:
1035
+ result = diff_project(_resolve_project(project))
1036
+ if "error" in result:
1037
+ print(result["error"], file=sys.stderr)
1038
+ return 1
1039
+ if as_json:
1040
+ print(json.dumps(result, ensure_ascii=False, indent=2))
1041
+ return 0
1042
+ lines = ["## 变更检测\n"]
1043
+ lines.append(f"**对比**: {result.get('last_scan', 'unknown')} → {result.get('scan_time', datetime.now().isoformat())}\n")
1044
+ lines.append(f"- 新增符号: {result['summary']['added']}")
1045
+ lines.append(f"- 删除符号: {result['summary']['removed']}")
1046
+ lines.append(f"- 修改符号: {result['summary']['modified']}")
1047
+ lines.append(f"- 新增调用: {result['summary']['edges_added']}")
1048
+ lines.append(f"- 删除调用: {result['summary']['edges_removed']}\n")
1049
+ if result["added_symbols"]:
1050
+ lines.append("**新增符号** (Top 10):")
1051
+ for item in result["added_symbols"][:10]:
1052
+ lines.append(f" - `{item['name']}` ({item['file']}:{item['line']})")
1053
+ if result["call_chain_changes"]["new_calls"]:
1054
+ lines.append("\n**新增调用关系** (Top 10):")
1055
+ for change in result["call_chain_changes"]["new_calls"][:10]:
1056
+ src_name = change["from"].split("::")[-2] if "::" in change["from"] else change["from"]
1057
+ tgt_name = change["to"].split("::")[-2] if "::" in change["to"] else change["to"]
1058
+ lines.append(f" - `{src_name}` -[{change['kind']}]-> `{tgt_name}`")
1059
+ print("\n".join(lines))
1060
+ return 0
1061
+
1062
+
1063
+ def run_query(
1064
+ project: str,
1065
+ max_files: int,
1066
+ query: str,
1067
+ max_result_files: int,
1068
+ max_result_symbols: int,
1069
+ no_tests: bool,
1070
+ as_json: bool,
1071
+ paths: str | None,
1072
+ exclude: str | None,
1073
+ ) -> int:
1074
+ try:
1075
+ engine = _scan_engine(project, max_files)
1076
+ analysis = engine.file_analysis()
1077
+
1078
+ # 过滤搜索范围
1079
+ candidate_files = list(engine.graph.file_symbols.keys())
1080
+ if paths:
1081
+ allowed = {_normalize_path_prefix(engine.project_root, p) for p in paths.split(",") if p.strip()}
1082
+ candidate_files = [f for f in candidate_files if any(_path_matches_prefix(f, a) for a in allowed)]
1083
+ if exclude:
1084
+ excluded = {_normalize_path_prefix(engine.project_root, e) for e in exclude.split(",") if e.strip()}
1085
+ candidate_files = [f for f in candidate_files if not any(_path_matches_prefix(f, e) for e in excluded)]
1086
+ if no_tests:
1087
+ candidate_files = [f for f in candidate_files if not is_test_like_file(f)]
1088
+
1089
+ # 计算高频词权重(命中文件过多的关键词降权)
1090
+ kw_weights = compute_keyword_weights(query.lower().split(), candidate_files, engine.graph)
1091
+
1092
+ # 主题评分
1093
+ matches: list[FileMatch] = []
1094
+ for file_path in candidate_files:
1095
+ file_data = analysis.get(file_path, {})
1096
+ score = topic_score(query, file_path, file_data, engine.graph, keyword_weights=kw_weights)
1097
+ if score > 0:
1098
+ role = classify_file_role(file_path, engine.graph)
1099
+ reasons = _build_match_reasons(query, file_path, engine.graph)
1100
+ matches.append(FileMatch(path=file_path, role=role, score=score, reasons=reasons))
1101
+
1102
+ matches.sort(key=lambda m: (-m.score, m.path))
1103
+ top_matches = matches[:max_result_files]
1104
+
1105
+ # 找相关测试
1106
+ tests: list[TestMatch] = []
1107
+ if not no_tests:
1108
+ target_files = [m.path for m in top_matches if not is_test_like_file(m.path)]
1109
+ tests = find_related_tests(target_files, engine.graph, analysis, engine.project_root)
1110
+
1111
+ if as_json:
1112
+ payload = {
1113
+ "command": "query",
1114
+ "project": str(engine.project_root),
1115
+ "query": query,
1116
+ "scanStats": _scan_stats_payload(engine),
1117
+ "result": {
1118
+ "filesConsidered": len(candidate_files),
1119
+ "matchedFiles": len(matches),
1120
+ "readingOrder": _build_query_reading_order(top_matches, analysis, max_result_files),
1121
+ "coreFiles": [
1122
+ {"path": m.path, "role": m.role, "score": m.score, "reasons": m.reasons}
1123
+ for m in top_matches if m.score >= 30 and not is_test_like_file(m.path)
1124
+ ],
1125
+ "supportingFiles": [
1126
+ {"path": m.path, "role": m.role, "score": m.score, "reasons": m.reasons}
1127
+ for m in top_matches if m.score < 30
1128
+ ],
1129
+ "tests": [
1130
+ {"testFile": t.test_file, "targetFile": t.target_file,
1131
+ "confidence": t.confidence, "reason": t.reason}
1132
+ for t in tests
1133
+ ],
1134
+ "symbols": _query_symbols_json(engine, top_matches, max_result_symbols),
1135
+ },
1136
+ }
1137
+ print(json.dumps(payload, ensure_ascii=False, indent=2))
1138
+ return 0
1139
+
1140
+ print(render_query_report(engine, query, top_matches, tests, max_result_files, max_result_symbols))
1141
+ return 0
1142
+ except Exception as exc:
1143
+ print(f"[{CLI_NAME}] query failed: {exc}", file=sys.stderr)
1144
+ return 1
1145
+
1146
+
1147
+ def _build_match_reasons(query: str, file_path: str, graph: RepoGraph) -> list[str]:
1148
+ """构建匹配原因列表。"""
1149
+ reasons: list[str] = []
1150
+ keywords = query.lower().split()
1151
+ path_lower = file_path.lower()
1152
+ file_name = PurePosixPath(file_path).stem.lower()
1153
+ tokens = split_identifier(PurePosixPath(file_path).stem)
1154
+
1155
+ for kw in keywords:
1156
+ if kw in path_lower:
1157
+ reasons.append(f"路径包含 {kw}")
1158
+ if kw in file_name:
1159
+ reasons.append(f"文件名命中 {kw}")
1160
+ elif any(kw in t for t in tokens):
1161
+ reasons.append(f"文件名拆分匹配 {kw}")
1162
+ return reasons[:3]
1163
+
1164
+
1165
+ def _query_symbols_json(
1166
+ engine: RepoMapEngine,
1167
+ matches: list[FileMatch],
1168
+ max_symbols: int,
1169
+ ) -> list[dict[str, Any]]:
1170
+ """为 JSON 输出提取符号列表。"""
1171
+ result: list[dict[str, Any]] = []
1172
+ for m in matches:
1173
+ if len(result) >= max_symbols:
1174
+ break
1175
+ for sym in _rank_symbols_for_file(engine, m.path):
1176
+ if len(result) >= max_symbols:
1177
+ break
1178
+ result.append({
1179
+ "name": sym["name"],
1180
+ "kind": sym["kind"],
1181
+ "file": m.path,
1182
+ "line": sym["line"],
1183
+ "role": classify_file_role(m.path, engine.graph),
1184
+ })
1185
+ return result
1186
+
1187
+
1188
+ def _impact_key_symbols(engine: RepoMapEngine, target_files: list[str], limit_per_file: int = 8) -> list[dict[str, Any]]:
1189
+ result: list[dict[str, Any]] = []
1190
+ for file_path in target_files:
1191
+ symbols = [
1192
+ engine.graph.symbols[sid]
1193
+ for sid in engine.graph.file_symbols.get(file_path, [])
1194
+ if sid in engine.graph.symbols
1195
+ ]
1196
+ symbols.sort(
1197
+ key=lambda symbol: (
1198
+ -symbol.pagerank,
1199
+ -len(engine.graph.incoming.get(symbol.id, [])),
1200
+ symbol.line,
1201
+ symbol.name,
1202
+ )
1203
+ )
1204
+ for symbol in symbols[:limit_per_file]:
1205
+ result.append({
1206
+ "name": symbol.name,
1207
+ "kind": symbol.kind,
1208
+ "file": symbol.file,
1209
+ "line": symbol.line,
1210
+ "pagerank": symbol.pagerank,
1211
+ "incomingCount": len(engine.graph.incoming.get(symbol.id, [])),
1212
+ "outgoingCount": len(engine.graph.outgoing.get(symbol.id, [])),
1213
+ "signature": symbol.signature,
1214
+ })
1215
+ return result
1216
+
1217
+
1218
+ def _impact_read_next(
1219
+ target_files: list[str],
1220
+ affected_list: list[tuple[str, str, str]],
1221
+ tests: list[TestMatch],
1222
+ limit: int = 10,
1223
+ ) -> list[dict[str, str]]:
1224
+ result: list[dict[str, str]] = []
1225
+ seen: set[str] = set()
1226
+
1227
+ def add(path: str, reason: str, role: str) -> None:
1228
+ if len(result) >= limit or path in seen:
1229
+ return
1230
+ seen.add(path)
1231
+ result.append({"file": path, "reason": reason, "role": role})
1232
+
1233
+ for file_path in target_files:
1234
+ add(file_path, "target file", "target")
1235
+ for file_path, why, confidence in affected_list:
1236
+ if confidence == "high":
1237
+ add(file_path, why, "affected")
1238
+ for test in tests:
1239
+ add(test.test_file, test.reason, "test")
1240
+ for file_path, why, _confidence in affected_list:
1241
+ add(file_path, why, "affected")
1242
+ return result
1243
+
1244
+
1245
+ def _impact_lsp_hint(project_root: str | Path, target_files: list[str]) -> dict[str, Any]:
1246
+ try:
1247
+ from ..lsp import detect_lsp_server, detection_to_dict, language_for_file
1248
+ except Exception as exc:
1249
+ return {"available": False, "servers": [], "suggestedCommands": [], "reason": str(exc)}
1250
+
1251
+ servers: list[dict[str, Any]] = []
1252
+ seen: set[tuple[str, str]] = set()
1253
+ for file_path in target_files:
1254
+ language = language_for_file(file_path)
1255
+ if not language:
1256
+ continue
1257
+ detection = detect_lsp_server(project_root, language, file_path)
1258
+ key = (detection.language, detection.server_name)
1259
+ if key in seen:
1260
+ continue
1261
+ seen.add(key)
1262
+ servers.append(detection_to_dict(detection))
1263
+ available = any(server.get("status") == "available" for server in servers)
1264
+ suggested: list[str] = []
1265
+ if available and target_files:
1266
+ files_arg = " ".join(target_files)
1267
+ suggested.append(f"repomap diagnostics --project {project_root} --source lsp --files {files_arg}")
1268
+ suggested.append(f"repomap refs --project {project_root} --symbol <symbol> --file-path <file> --with-lsp")
1269
+ return {"available": available, "servers": servers, "suggestedCommands": suggested}
1270
+
1271
+
1272
+ def run_impact(
1273
+ project: str,
1274
+ max_files: int,
1275
+ target_files: list[str],
1276
+ max_affected_files: int,
1277
+ as_json: bool,
1278
+ with_symbols: bool = False,
1279
+ depth: int = 1,
1280
+ incremental: bool = False,
1281
+ ) -> int:
1282
+ try:
1283
+ engine = _scan_engine(project, max_files, incremental=incremental)
1284
+
1285
+ target_files = _normalize_project_relative_paths(engine.project_root, target_files)
1286
+
1287
+ # 收集目标文件符号
1288
+ target_symbols: set[str] = set()
1289
+ for f in target_files:
1290
+ for sid in engine.graph.file_symbols.get(f, []):
1291
+ target_symbols.add(sid)
1292
+
1293
+ # 找出引用者有谁(incoming edges)
1294
+ affected_files: dict[str, tuple[str, str]] = {} # file -> (why, confidence)
1295
+ for sid in target_symbols:
1296
+ for edge in engine.graph.incoming.get(sid, []):
1297
+ caller = engine.graph.symbols.get(edge.source)
1298
+ if caller and caller.file not in target_files:
1299
+ caller_name = caller.name
1300
+ affected_files[caller.file] = (
1301
+ f"引用了 {_sym_name(engine, sid)}",
1302
+ "high",
1303
+ )
1304
+
1305
+ for edge in engine.graph.outgoing.get(sid, []):
1306
+ callee = engine.graph.symbols.get(edge.target)
1307
+ if callee and callee.file not in target_files:
1308
+ callee_name = callee.name
1309
+ if callee.file not in affected_files:
1310
+ affected_files[callee.file] = (
1311
+ f"输入文件调用了 {callee_name}(via {_sym_name(engine, sid)})",
1312
+ "medium",
1313
+ )
1314
+
1315
+ # 传递影响展开:用 BFS 从已影响文件的符号出发,找更深层的文件
1316
+ if depth > 1 and affected_files:
1317
+ processed_files = set(target_files) | set(affected_files)
1318
+ frontier: set[str] = set(affected_files)
1319
+ for current_depth in range(1, depth):
1320
+ next_frontier: set[str] = set()
1321
+ for affected_file in frontier:
1322
+ for sid in engine.graph.file_symbols.get(affected_file, []):
1323
+ # 谁调用了这个受影响文件的符号?
1324
+ for edge in engine.graph.incoming.get(sid, []):
1325
+ src_sym = engine.graph.symbols.get(edge.source)
1326
+ if src_sym and src_sym.file not in processed_files:
1327
+ next_frontier.add(src_sym.file)
1328
+ if src_sym.file not in affected_files:
1329
+ affected_files[src_sym.file] = (
1330
+ f"传递影响 depth={current_depth + 1}: 调用了 {affected_file} 中的 {src_sym.name}",
1331
+ "low",
1332
+ )
1333
+ # 这个受影响文件的符号调用了谁?
1334
+ for edge in engine.graph.outgoing.get(sid, []):
1335
+ tgt_sym = engine.graph.symbols.get(edge.target)
1336
+ if tgt_sym and tgt_sym.file not in processed_files:
1337
+ next_frontier.add(tgt_sym.file)
1338
+ if tgt_sym.file not in affected_files:
1339
+ affected_files[tgt_sym.file] = (
1340
+ f"传递影响 depth={current_depth + 1}: 被 {affected_file} 中的 {_sym_name(engine, sid)} 调用",
1341
+ "low",
1342
+ )
1343
+ processed_files |= next_frontier
1344
+ frontier = next_frontier
1345
+ if not frontier:
1346
+ break
1347
+
1348
+ # 找相关测试
1349
+ analysis = engine.file_analysis()
1350
+ tests = find_related_tests(target_files, engine.graph, analysis, engine.project_root)
1351
+
1352
+ # 风险评估
1353
+ risk_level, risk_notes = _assess_risk(target_files, set(affected_files), engine)
1354
+
1355
+ affected_list = [(f, why, conf) for f, (why, conf) in affected_files.items()]
1356
+ # 按影响严重程度排序:受影响文件中符号的外部调用者越多越靠前
1357
+ affected_list.sort(key=lambda x: (
1358
+ {"high": 3, "medium": 2, "low": 1}.get(x[2], 0),
1359
+ -_affected_severity(x[0], engine),
1360
+ x[0],
1361
+ ), reverse=True)
1362
+ affected_list = sorted(affected_list, key=lambda x: (
1363
+ -{"high": 3, "medium": 2, "low": 1}.get(x[2], 0),
1364
+ -_affected_severity(x[0], engine),
1365
+ ))
1366
+ affected_list = affected_list[:max_affected_files]
1367
+ key_symbols = _impact_key_symbols(engine, target_files) if with_symbols else []
1368
+ read_next = _impact_read_next(target_files, affected_list, tests)
1369
+ lsp_hint = _impact_lsp_hint(engine.project_root, target_files) if with_symbols else {}
1370
+
1371
+ if as_json:
1372
+ payload = {
1373
+ "schema_version": "1.0",
1374
+ "command": "impact",
1375
+ "project": str(engine.project_root),
1376
+ "scanStats": _scan_stats_payload(engine),
1377
+ "result": {
1378
+ "inputFiles": target_files,
1379
+ "affectedFiles": [
1380
+ {"file": f, "why": why, "confidence": conf}
1381
+ for f, why, conf in affected_list
1382
+ ],
1383
+ "tests": [
1384
+ {"testFile": t.test_file, "targetFile": t.target_file,
1385
+ "confidence": t.confidence, "reason": t.reason}
1386
+ for t in tests
1387
+ ],
1388
+ "riskLevel": risk_level,
1389
+ "riskNotes": risk_notes,
1390
+ "keySymbols": key_symbols,
1391
+ "readNext": read_next,
1392
+ "lspHint": lsp_hint,
1393
+ },
1394
+ }
1395
+ print(json.dumps(payload, ensure_ascii=False, indent=2))
1396
+ return 0
1397
+
1398
+ print(render_impact_report(
1399
+ engine,
1400
+ target_files,
1401
+ affected_list,
1402
+ tests,
1403
+ risk_level,
1404
+ risk_notes,
1405
+ key_symbols=key_symbols,
1406
+ read_next=read_next,
1407
+ lsp_hint=lsp_hint,
1408
+ ))
1409
+ return 0
1410
+ except Exception as exc:
1411
+ print(f"[{CLI_NAME}] impact failed: {exc}", file=sys.stderr)
1412
+ return 1
1413
+
1414
+
1415
+ def _affected_severity(file_path: str, engine: RepoMapEngine) -> int:
1416
+ """计算受影响文件的严重程度:文件中符号被外部调用的总次数。"""
1417
+ total = 0
1418
+ for sid in engine.graph.file_symbols.get(file_path, []):
1419
+ for edge in engine.graph.incoming.get(sid, []):
1420
+ if edge.kind == "call":
1421
+ src_sym = engine.graph.symbols.get(edge.source)
1422
+ if src_sym and src_sym.file != file_path:
1423
+ total += 1
1424
+ return total
1425
+
1426
+
1427
+ def _assess_risk(
1428
+ target_files: list[str],
1429
+ affected_files: set[str],
1430
+ engine: RepoMapEngine,
1431
+ ) -> tuple[str, list[str]]:
1432
+ """三层风险评估模型。返回 (risk_level, risk_notes)。"""
1433
+ risk_notes: list[str] = []
1434
+ total_score = 0
1435
+
1436
+ # 第1层:结构风险
1437
+ analysis = engine.file_analysis()
1438
+ structural_risk = 0
1439
+ for f in target_files:
1440
+ file_data = analysis.get(f, {})
1441
+ nc = file_data.get("neighbor_count", 0)
1442
+ if nc >= 10:
1443
+ structural_risk += 4
1444
+ risk_notes.append(f"`{f}` 被 {nc} 个文件关联,改动影响面很大")
1445
+ elif nc >= 5:
1446
+ structural_risk += 3
1447
+ risk_notes.append(f"`{f}` 被 {nc} 个文件关联,改动影响面大")
1448
+ for sid in engine.graph.file_symbols.get(f, []):
1449
+ sym = engine.graph.symbols.get(sid)
1450
+ if sym and sym.pagerank > 0.01:
1451
+ structural_risk += 1
1452
+ break
1453
+ total_score += structural_risk
1454
+
1455
+ # 第2层:领域关键词风险
1456
+ domain_risk = 0
1457
+ risk_keywords_high = ["auth", "token", "session", "password", "security",
1458
+ "migration", "database", "schema", "persistence"]
1459
+ risk_keywords_medium = ["terminal", "websocket", "pty", "input", "config",
1460
+ "build", "deploy", "ci"]
1461
+ all_paths = " ".join(target_files + list(affected_files)).lower()
1462
+ for kw in risk_keywords_high:
1463
+ if kw in all_paths:
1464
+ domain_risk += 3
1465
+ for kw in risk_keywords_medium:
1466
+ if kw in all_paths:
1467
+ domain_risk += 1
1468
+ if domain_risk >= 6:
1469
+ risk_notes.append(f"涉及高风险领域(认证/安全/数据持久化)")
1470
+ elif domain_risk >= 3:
1471
+ risk_notes.append(f"涉及中风险领域(终端/配置/构建)")
1472
+ total_score += domain_risk
1473
+
1474
+ # 第3层:变更类型风险
1475
+ change_type_risk = 0
1476
+ for f in target_files:
1477
+ if is_test_like_file(f):
1478
+ pass # 只改测试不改实现,低风险
1479
+ elif any(f.endswith(ext) for ext in [".config.ts", ".config.js", "package.json"]):
1480
+ change_type_risk += 2
1481
+ risk_notes.append(f"`{f}` 是配置文件变更,影响全局")
1482
+ elif "types" in PurePosixPath(f).parts or f.endswith(".d.ts"):
1483
+ change_type_risk += 1
1484
+ risk_notes.append(f"`{f}` 是类型定义变更,影响面大")
1485
+ total_score += change_type_risk
1486
+
1487
+ level = "high" if total_score >= 6 else "medium" if total_score >= 3 else "low"
1488
+ return level, risk_notes
1489
+
1490
+
1491
+ def _parse_git_status_porcelain_paths(output: str) -> list[str]:
1492
+ paths: list[str] = []
1493
+ for line in output.splitlines():
1494
+ if not line:
1495
+ continue
1496
+ if len(line) < 4:
1497
+ continue
1498
+ path = line[3:]
1499
+ if " -> " in path:
1500
+ path = path.split(" -> ")[-1]
1501
+ path = path.strip()
1502
+ if path:
1503
+ paths.append(path)
1504
+ return paths
1505
+
1506
+
1507
+ def _collect_changed_files(project_root: str | Path) -> tuple[list[str], str | None]:
1508
+ project_path = Path(project_root).resolve()
1509
+ git_root_result = subprocess.run(
1510
+ ["git", "rev-parse", "--show-toplevel"],
1511
+ cwd=project_path, capture_output=True, text=True, timeout=10,
1512
+ )
1513
+ git_root = git_root_result.stdout.strip()
1514
+ if git_root_result.returncode != 0 or not git_root:
1515
+ return [], f"git root failed: {git_root_result.stderr.strip() or 'not a git repository'}"
1516
+
1517
+ status = subprocess.run(
1518
+ ["git", "status", "--porcelain"],
1519
+ cwd=project_path, capture_output=True, text=True, timeout=10,
1520
+ )
1521
+ if status.returncode != 0:
1522
+ return [], f"git status failed: {status.stderr.strip() or status.stdout.strip()}"
1523
+
1524
+ changed_files: list[str] = []
1525
+ for git_relative_path in _parse_git_status_porcelain_paths(status.stdout):
1526
+ abs_path = Path(git_root, git_relative_path).resolve()
1527
+ try:
1528
+ changed_files.append(abs_path.relative_to(project_path).as_posix())
1529
+ except ValueError:
1530
+ pass
1531
+ return changed_files, None
1532
+
1533
+
1534
+ def _diff_risk_evidence(engine: RepoMapEngine, changed_files: list[str]) -> dict[str, Any]:
1535
+ analysis = engine.file_analysis()
1536
+
1537
+ target_symbols: set[str] = set()
1538
+ for file_path in changed_files:
1539
+ for symbol_id in engine.graph.file_symbols.get(file_path, []):
1540
+ target_symbols.add(symbol_id)
1541
+
1542
+ affected_files_dict: dict[str, tuple[str, str]] = {}
1543
+ for symbol_id in target_symbols:
1544
+ for edge in engine.graph.incoming.get(symbol_id, []):
1545
+ caller = engine.graph.symbols.get(edge.source)
1546
+ if caller and caller.file not in changed_files:
1547
+ affected_files_dict[caller.file] = (
1548
+ f"引用了变更符号 {_sym_name(engine, symbol_id)}",
1549
+ "high",
1550
+ )
1551
+
1552
+ affected_list = [(file_path, why, confidence) for file_path, (why, confidence) in affected_files_dict.items()]
1553
+ affected_list.sort(key=lambda item: (item[2], item[0]))
1554
+
1555
+ source_files = [file_path for file_path in changed_files if not is_test_like_file(file_path)]
1556
+ tests = find_related_tests(source_files, engine.graph, analysis, engine.project_root)
1557
+ risk_level, risk_reasons = _assess_risk(source_files, set(file_path for file_path, _, _ in affected_list), engine)
1558
+
1559
+ missing_checks: list[str] = []
1560
+ all_exts = set(Path(file_path).suffix for file_path in changed_files)
1561
+ if ".ts" in all_exts or ".tsx" in all_exts:
1562
+ if not any(test.test_file.endswith((".ts", ".tsx")) for test in tests):
1563
+ missing_checks.append("没有检测到前端测试文件变更,建议补充前端测试")
1564
+ if ".py" in all_exts:
1565
+ if not any(test.test_file.endswith(".py") for test in tests):
1566
+ missing_checks.append("没有检测到 Python 测试文件变更,建议补充后端测试")
1567
+
1568
+ return {
1569
+ "affectedList": affected_list,
1570
+ "tests": tests,
1571
+ "riskLevel": risk_level,
1572
+ "riskReasons": risk_reasons,
1573
+ "missingChecks": missing_checks,
1574
+ }
1575
+
1576
+
1577
+
1578
+
1579
+ def _run_check_payload(
1580
+ project_root: str,
1581
+ types: list[str] | None,
1582
+ max_issues: int,
1583
+ modified_files: list[str] | None,
1584
+ resolve_symbols: bool,
1585
+ with_lsp: bool,
1586
+ lsp_timeout: float,
1587
+ lsp_max_files: int,
1588
+ ) -> dict[str, Any]:
1589
+ symbols_map = None
1590
+ if resolve_symbols:
1591
+ engine = _scan_engine(project_root, 8000)
1592
+ symbols_map = engine.graph.symbols
1593
+ checker = RepoMapChecker(project_root, max_issues)
1594
+ return checker.check(
1595
+ types=types,
1596
+ resolve_symbols=resolve_symbols and symbols_map is not None,
1597
+ symbols_map=symbols_map,
1598
+ modified_files=modified_files,
1599
+ with_lsp=with_lsp,
1600
+ lsp_timeout=lsp_timeout,
1601
+ lsp_max_files=lsp_max_files,
1602
+ )
1603
+
1604
+
1605
+ def _verify_lsp_payload(
1606
+ project_root: str,
1607
+ changed_files: list[str],
1608
+ enabled: bool,
1609
+ timeout: float,
1610
+ max_files: int,
1611
+ ) -> dict[str, Any]:
1612
+ if not enabled:
1613
+ return {"enabled": False, "status": "skipped", "runs": [], "summary": {}}
1614
+ if not changed_files:
1615
+ return {"enabled": True, "status": "skipped", "runs": [], "summary": {}, "reason": "no changed files"}
1616
+ try:
1617
+ from ..lsp import collect_lsp_diagnostics, run_result_to_dict
1618
+
1619
+ runs = collect_lsp_diagnostics(project_root, changed_files, timeout=timeout, max_files=max_files)
1620
+ run_dicts = [run_result_to_dict(run) for run in runs]
1621
+ total_errors = sum(1 for run in runs for item in run.diagnostics if item.severity == "error")
1622
+ total_warnings = sum(1 for run in runs for item in run.diagnostics if item.severity != "error")
1623
+ failed_runs = sum(1 for run in runs if run.status in {"failed", "timeout"})
1624
+ skipped_runs = sum(1 for run in runs if run.status == "skipped")
1625
+ status = "failed" if total_errors or failed_runs else "passed"
1626
+ if skipped_runs and skipped_runs == len(runs):
1627
+ status = "skipped"
1628
+ return {
1629
+ "enabled": True,
1630
+ "status": status,
1631
+ "runs": run_dicts,
1632
+ "summary": {
1633
+ "totalErrors": total_errors,
1634
+ "totalWarnings": total_warnings,
1635
+ "failedRuns": failed_runs,
1636
+ "skippedRuns": skipped_runs,
1637
+ },
1638
+ }
1639
+ except Exception as exc:
1640
+ return {"enabled": True, "status": "failed", "runs": [], "summary": {}, "reason": str(exc)}
1641
+
1642
+
1643
+ def _verify_graph_diff_payload(project_root: str, enabled: bool, incoming_map: dict | None = None) -> dict[str, Any]:
1644
+ if not enabled:
1645
+ return {"enabled": False, "status": "skipped", "summary": {}, "breakingChanges": []}
1646
+ result = diff_project(project_root)
1647
+ if "error" in result:
1648
+ return {"enabled": True, "status": "skipped", "summary": {}, "breakingChanges": [], "reason": result["error"]}
1649
+ # 如果提供了 incoming_map,二次调用带调用者分析的 compare
1650
+ if incoming_map is not None:
1651
+ from ..toolkit import load_cache
1652
+ from .. import compare_graph_snapshots
1653
+ cache = load_cache(project_root)
1654
+ if cache:
1655
+ current_symbols, current_edges = scan_project(project_root, max_files=5000)
1656
+ enriched = compare_graph_snapshots(
1657
+ current_symbols=current_symbols, current_edges=current_edges,
1658
+ previous_symbols=cache.symbols, previous_edges=cache.edges,
1659
+ incoming_map=incoming_map,
1660
+ )
1661
+ breaking = [
1662
+ ms for ms in enriched.get("modified_symbols", [])
1663
+ if ms.get("risk") in ("HIGH", "MEDIUM") and ms.get("signature_changed")
1664
+ ]
1665
+ result["breakingChanges"] = breaking[:20]
1666
+ if "breakingChanges" not in result:
1667
+ result["breakingChanges"] = []
1668
+ summary = result.get("summary", {})
1669
+ changed = any(summary.get(key, 0) for key in ("added", "removed", "modified", "edges_added", "edges_removed"))
1670
+ result["status"] = "changed" if changed else "unchanged"
1671
+ return result
1672
+
1673
+
1674
+ def _overall_verify_status(
1675
+ changed_files: list[str],
1676
+ risk_level: str,
1677
+ missing_checks: list[str],
1678
+ check_payload: dict[str, Any],
1679
+ lsp_payload: dict[str, Any],
1680
+ graph_diff_payload: dict[str, Any],
1681
+ ) -> str:
1682
+ if check_payload.get("status") == "failed" or lsp_payload.get("status") == "failed":
1683
+ return "failed"
1684
+ if not changed_files:
1685
+ return "warning"
1686
+ if risk_level == "high" or missing_checks or graph_diff_payload.get("status") == "changed":
1687
+ return "warning"
1688
+ if check_payload.get("status") in {"warning", "unknown"}:
1689
+ return "warning"
1690
+ return "passed"
1691
+
1692
+
1693
+ def run_verify(
1694
+ project: str,
1695
+ as_json: bool,
1696
+ types: list[str] | None,
1697
+ max_issues: int,
1698
+ resolve_symbols: bool,
1699
+ with_lsp: bool,
1700
+ lsp_timeout: float,
1701
+ lsp_max_files: int,
1702
+ with_diff: bool,
1703
+ quick: bool = False,
1704
+ incremental: bool = False,
1705
+ ) -> int:
1706
+ try:
1707
+ project_root = _resolve_project(project)
1708
+ changed_files, error = _collect_changed_files(project_root)
1709
+ if error:
1710
+ print(f"[{CLI_NAME}] verify failed: {error}", file=sys.stderr)
1711
+ return 1
1712
+
1713
+ engine = _scan_engine(project_root, 8000, incremental=incremental)
1714
+ evidence = _diff_risk_evidence(engine, changed_files)
1715
+
1716
+ if quick:
1717
+ check_payload = {"status": "skipped", "summary": {}, "runs": [], "reason": "verify --quick"}
1718
+ lsp_payload = {"enabled": False, "status": "skipped", "runs": [], "summary": {}, "reason": "verify --quick"}
1719
+ else:
1720
+ check_payload = _run_check_payload(
1721
+ project_root=project_root,
1722
+ types=types,
1723
+ max_issues=max_issues,
1724
+ modified_files=changed_files,
1725
+ resolve_symbols=resolve_symbols,
1726
+ with_lsp=False,
1727
+ lsp_timeout=lsp_timeout,
1728
+ lsp_max_files=lsp_max_files,
1729
+ )
1730
+ lsp_payload = _verify_lsp_payload(project_root, changed_files, with_lsp, lsp_timeout, lsp_max_files)
1731
+
1732
+ graph_diff_payload = _verify_graph_diff_payload(
1733
+ project_root, with_diff,
1734
+ incoming_map=engine.graph.incoming if with_diff else None,
1735
+ )
1736
+ status = _overall_verify_status(
1737
+ changed_files,
1738
+ evidence["riskLevel"],
1739
+ evidence["missingChecks"],
1740
+ check_payload,
1741
+ lsp_payload,
1742
+ graph_diff_payload,
1743
+ )
1744
+ untested = find_untested_symbols(engine.graph) if not quick else []
1745
+
1746
+ payload = {
1747
+ "schema_version": "1.0",
1748
+ "command": "verify",
1749
+ "project": str(engine.project_root),
1750
+ "scanStats": _scan_stats_payload(engine),
1751
+ "result": {
1752
+ "status": status,
1753
+ "changedFiles": changed_files,
1754
+ "risk": {
1755
+ "level": evidence["riskLevel"],
1756
+ "reasons": evidence["riskReasons"],
1757
+ "missingChecks": evidence["missingChecks"],
1758
+ },
1759
+ "affectedFiles": [
1760
+ {"file": file_path, "why": why, "confidence": confidence}
1761
+ for file_path, why, confidence in evidence["affectedList"]
1762
+ ],
1763
+ "tests": [
1764
+ {"testFile": test.test_file, "targetFile": test.target_file,
1765
+ "confidence": test.confidence, "reason": test.reason}
1766
+ for test in evidence["tests"]
1767
+ ],
1768
+ "untestedSymbols": untested,
1769
+ "check": {
1770
+ "status": check_payload.get("status", "unknown"),
1771
+ "summary": check_payload.get("summary", {}),
1772
+ "incremental": check_payload.get("incremental", {}),
1773
+ "runs": check_payload.get("runs", []),
1774
+ "errorsByFile": check_payload.get("errors_by_file", {}),
1775
+ },
1776
+ "lsp": lsp_payload,
1777
+ "graphDiff": graph_diff_payload,
1778
+ },
1779
+ }
1780
+ if as_json:
1781
+ print(json.dumps(payload, ensure_ascii=False, indent=2))
1782
+ else:
1783
+ print(render_verify_report(payload))
1784
+
1785
+ # 如果没有 git 变更,给出下一步建议
1786
+ if not changed_files:
1787
+ print("\n> 未检测到 git 变更。", file=sys.stderr)
1788
+ if quick:
1789
+ print("> verify --quick 模式只分析 git 变更,没有变更时无法提供风险评估。", file=sys.stderr)
1790
+ print("> 建议: 先进行代码修改,然后运行 `repomap verify` 进行完整验证。", file=sys.stderr)
1791
+ else:
1792
+ print("> 建议: 使用 `repomap overview` 了解项目结构,或使用 `repomap check` 进行编译检查。", file=sys.stderr)
1793
+
1794
+ return 1 if status == "failed" else 0
1795
+ except Exception as exc:
1796
+ print(f"[{CLI_NAME}] verify failed: {exc}", file=sys.stderr)
1797
+ return 1
1798
+
1799
+
1800
+ def run_git_history(project: str, max_files: int, symbol: str, file_path: str | None) -> int:
1801
+ try:
1802
+ engine = _scan_engine(project, max_files)
1803
+ selected, error = _select_symbol_match(engine, symbol, file_path=file_path)
1804
+ if error:
1805
+ print(error, file=sys.stderr)
1806
+ return 1
1807
+ assert selected is not None
1808
+ target = selected
1809
+ result = subprocess.run(
1810
+ ["git", "blame", "-L", f"{target.line},{target.line}", "-p", target.file],
1811
+ cwd=engine.project_root,
1812
+ capture_output=True,
1813
+ text=True,
1814
+ timeout=10,
1815
+ check=False,
1816
+ )
1817
+ if result.returncode != 0:
1818
+ print(
1819
+ f"📍 符号: `{target.name}`\n📁 位置: `{target.file}:{target.line}`\n\n❌ Git 信息获取失败(可能不是 git 仓库)",
1820
+ file=sys.stderr,
1821
+ )
1822
+ return 1
1823
+ commit_hash = result.stdout.split()[0] if result.stdout else "unknown"
1824
+ file_commits = subprocess.run(
1825
+ ["git", "log", "--follow", "-10", "--format=%H|%an|%ad|%s", "--", target.file],
1826
+ cwd=engine.project_root,
1827
+ capture_output=True,
1828
+ text=True,
1829
+ timeout=10,
1830
+ check=False,
1831
+ )
1832
+ lines = [f"## Git 历史 — `{target.name}`\n"]
1833
+ lines.append(f"📍 位置: `{target.file}:{target.line}`")
1834
+ lines.append(f"🔖 当前版本: `{commit_hash[:8]}`\n")
1835
+ if file_commits.returncode == 0 and file_commits.stdout:
1836
+ lines.append("**最近提交**:")
1837
+ for row in file_commits.stdout.strip().split("\n")[:5]:
1838
+ parts = row.split("|", 3)
1839
+ if len(parts) >= 4:
1840
+ lines.append(f" - `[{parts[0][:8]}]` {parts[2][:10]} by {parts[1]}: {parts[3][:50]}")
1841
+ print("\n".join(lines))
1842
+ return 0
1843
+ except Exception as exc:
1844
+ print(f"[{CLI_NAME}] git-history failed: {exc}", file=sys.stderr)
1845
+ return 1
1846
+
1847
+
1848
+ def run_refs(
1849
+ project: str,
1850
+ max_files: int,
1851
+ symbol: str | None,
1852
+ file_path: str | None,
1853
+ as_json: bool,
1854
+ with_lsp: bool = False,
1855
+ lsp_timeout: float = 8.0,
1856
+ ) -> int:
1857
+ try:
1858
+ engine = _scan_engine(project, max_files)
1859
+ symbol_ids = set(engine.graph.symbols.keys())
1860
+ calls_out: dict[str, set[str]] = {symbol_id: set() for symbol_id in symbol_ids}
1861
+ calls_in: dict[str, set[str]] = {symbol_id: set() for symbol_id in symbol_ids}
1862
+ for source_id, edge_list in engine.graph.outgoing.items():
1863
+ for edge in edge_list:
1864
+ if edge.kind != "call":
1865
+ continue
1866
+ calls_out.setdefault(source_id, set()).add(edge.target)
1867
+ calls_in.setdefault(edge.target, set()).add(source_id)
1868
+
1869
+ if symbol:
1870
+ selected, error = _select_symbol_match(engine, symbol, file_path=file_path)
1871
+ if error:
1872
+ print(error, file=sys.stderr)
1873
+ return 1
1874
+ assert selected is not None
1875
+ sid = selected.id
1876
+ target = engine.graph.symbols[sid]
1877
+ payload = {
1878
+ "symbol": target.name,
1879
+ "id": sid,
1880
+ "called_by": [_format_symbol_ref(engine, item) for item in sorted(calls_in[sid])[:20]],
1881
+ "calls": [_format_symbol_ref(engine, item) for item in sorted(calls_out[sid])[:20]],
1882
+ "ref_count": len(calls_in[sid]),
1883
+ "is_entry": len(calls_in[sid]) == 0,
1884
+ "is_leaf": len(calls_out[sid]) == 0,
1885
+ }
1886
+ if with_lsp:
1887
+ payload["lsp"] = _collect_lsp_evidence_for_symbol(engine, target, lsp_timeout)
1888
+ if as_json:
1889
+ print(json.dumps(payload, ensure_ascii=False, indent=2))
1890
+ else:
1891
+ lines = [f"## 引用分析 — `{target.name}`\n"]
1892
+ lines.append(f"- 被引用次数: {payload['ref_count']}")
1893
+ lines.append(f"- 调用其他: {len(payload['calls'])}")
1894
+ lines.append(f"- 入口函数: {'是' if payload['is_entry'] else '否'}")
1895
+ lines.append(f"- 叶子函数: {'是' if payload['is_leaf'] else '否'}\n")
1896
+ if payload["called_by"]:
1897
+ lines.append("**被调用** (Top 10):")
1898
+ for row in payload["called_by"][:10]:
1899
+ lines.append(f" - `{row['name']}` ({row['file']}:{row['line']})")
1900
+ if payload["calls"]:
1901
+ lines.append("\n**调用** (Top 10):")
1902
+ for row in payload["calls"][:10]:
1903
+ lines.append(f" - `{row['name']}` ({row['file']}:{row['line']})")
1904
+ if with_lsp:
1905
+ lines.extend(_format_lsp_evidence(payload["lsp"]))
1906
+ print("\n".join(lines))
1907
+ return 0
1908
+
1909
+ entries = [sid for sid in symbol_ids if len(calls_in[sid]) == 0]
1910
+ orphans = [sid for sid in symbol_ids if len(calls_in[sid]) == 0 and len(calls_out[sid]) == 0]
1911
+ ref_counts = sorted(((sid, len(calls_in[sid])) for sid in symbol_ids), key=lambda item: item[1], reverse=True)
1912
+ payload = {
1913
+ "total_symbols": len(symbol_ids),
1914
+ "entry_points": [_format_symbol_ref(engine, sid) for sid in entries],
1915
+ "orphaned_symbols": [_format_symbol_ref(engine, sid) for sid in orphans],
1916
+ "most_referenced": [
1917
+ {**_format_symbol_ref(engine, sid), "ref_count": count}
1918
+ for sid, count in ref_counts[:20]
1919
+ ],
1920
+ }
1921
+ if as_json:
1922
+ print(json.dumps(payload, ensure_ascii=False, indent=2))
1923
+ return 0
1924
+ lines = ["## 全局引用分析\n"]
1925
+ lines.append(f"- 总符号数: {payload['total_symbols']}")
1926
+ lines.append(f"- 入口函数: {len(payload['entry_points'])}")
1927
+ lines.append(f"- 孤立符号: {len(payload['orphaned_symbols'])}\n")
1928
+ lines.append("**被引用最多** (Top 10):")
1929
+ for row in payload["most_referenced"][:10]:
1930
+ lines.append(f" - `{row['name']}`: {row['ref_count']} 次引用 ({row['file']})")
1931
+ print("\n".join(lines))
1932
+ return 0
1933
+ except Exception as exc:
1934
+ print(f"[{CLI_NAME}] refs failed: {exc}", file=sys.stderr)
1935
+ return 1
1936
+
1937
+
1938
+ # Kinds that are always structural noise, never dead code.
1939
+ _ORPHAN_EXCLUDED_KINDS: set[str] = {
1940
+ "element", # HTML tags in JSX/HTML files
1941
+ "json_key", # JSON object keys in config files
1942
+ "module", # mod declarations, import wrappers
1943
+ "handler", # web route handlers (framework-dispatched)
1944
+ }
1945
+
1946
+ # File extensions that are pure config — skip orphan detection entirely.
1947
+ _ORPHAN_EXCLUDED_EXTENSIONS: set[str] = {
1948
+ ".json", ".toml", ".yaml", ".yml", ".html", ".css", ".scss", ".less",
1949
+ }
1950
+
1951
+ # Test-related path markers.
1952
+ _TEST_PATH_MARKERS: tuple[str, ...] = ("test", "spec", "e2e", "__test__", "__tests__")
1953
+
1954
+ # Base confidence by symbol kind (0-100). Higher = more likely truly dead.
1955
+ _ORPHAN_KIND_BASE: dict[str, int] = {
1956
+ "function": 60,
1957
+ "method": 60,
1958
+ "struct": 40,
1959
+ "enum": 40,
1960
+ "class": 40,
1961
+ "type": 40,
1962
+ "interface": 35,
1963
+ "anonymous_function": 30,
1964
+ "variable": 30,
1965
+ "const": 30,
1966
+ "impl": 15,
1967
+ "trait": 35,
1968
+ }
1969
+
1970
+
1971
+ def _orphan_confidence(symbol: Symbol, orphan_names: set[str]) -> int:
1972
+ """Compute a confidence score (0-100) that a symbol is truly dead code."""
1973
+ score = _ORPHAN_KIND_BASE.get(symbol.kind, 30)
1974
+
1975
+ # File-level signals
1976
+ file_lower = symbol.file.lower()
1977
+ for marker in _TEST_PATH_MARKERS:
1978
+ if marker in file_lower:
1979
+ score -= 20
1980
+ break
1981
+
1982
+ # Extension-based filtering (should already be excluded, defensive)
1983
+ if any(file_lower.endswith(ext) for ext in _ORPHAN_EXCLUDED_EXTENSIONS):
1984
+ score -= 50
1985
+
1986
+ # Name-based signals for test helpers
1987
+ name_lower = symbol.name.lower()
1988
+ if any(name_lower.startswith(prefix) for prefix in ("test_", "it_", "should_", "test")):
1989
+ score -= 30
1990
+
1991
+ # Visibility signal: private symbols are more likely truly dead
1992
+ if symbol.visibility == "private":
1993
+ score += 10
1994
+
1995
+ # Struct/impl pairing heuristics
1996
+ if symbol.kind == "impl":
1997
+ # impl block whose struct also appears as orphan → the pair might all be dead
1998
+ if symbol.name in orphan_names:
1999
+ score += 25
2000
+ elif symbol.kind in ("struct", "enum", "class", "type"):
2001
+ # Struct whose impl also appears → more likely truly dead (entire unit unused)
2002
+ if symbol.name in orphan_names:
2003
+ score += 25
2004
+
2005
+ return max(0, min(100, score))
2006
+
2007
+
2008
+ def _orphan_note(symbol: Symbol) -> str:
2009
+ """Generate a brief reason string for the confidence score."""
2010
+ reasons: list[str] = []
2011
+ file_lower = symbol.file.lower()
2012
+ for marker in _TEST_PATH_MARKERS:
2013
+ if marker in file_lower:
2014
+ reasons.append("测试文件")
2015
+ break
2016
+ name_lower = symbol.name.lower()
2017
+ if any(name_lower.startswith(prefix) for prefix in ("test_", "it_", "should_")):
2018
+ reasons.append("测试辅助函数")
2019
+ if symbol.kind == "impl":
2020
+ reasons.append("实现块(可能宏驱动)")
2021
+ if symbol.kind in ("struct", "enum", "class"):
2022
+ reasons.append("类型定义(可能反射/宏使用)")
2023
+ if not reasons:
2024
+ reasons.append("无调用者和被调用者")
2025
+ return "; ".join(reasons)
2026
+
2027
+
2028
+ def run_orphan(project: str, max_files: int, as_json: bool = False, limit: int = 20, min_confidence: int = 0) -> int:
2029
+ try:
2030
+ engine = _scan_engine(project, max_files)
2031
+ symbol_ids = set(engine.graph.symbols.keys())
2032
+ calls_in: dict[str, set[str]] = {symbol_id: set() for symbol_id in symbol_ids}
2033
+ calls_out: dict[str, set[str]] = {symbol_id: set() for symbol_id in symbol_ids}
2034
+ for source_id, edge_list in engine.graph.outgoing.items():
2035
+ for edge in edge_list:
2036
+ if edge.kind != "call":
2037
+ continue
2038
+ calls_out.setdefault(source_id, set()).add(edge.target)
2039
+ calls_in.setdefault(edge.target, set()).add(source_id)
2040
+
2041
+ candidates: list[Symbol] = []
2042
+ filtered_structural_count = 0
2043
+ for sid in symbol_ids:
2044
+ if len(calls_in[sid]) == 0 and len(calls_out[sid]) == 0:
2045
+ symbol = engine.graph.symbols[sid]
2046
+ if symbol.name in {"main", "__main__"}:
2047
+ continue
2048
+ if symbol.visibility == "exported":
2049
+ continue
2050
+ if symbol.kind in _ORPHAN_EXCLUDED_KINDS:
2051
+ filtered_structural_count += 1
2052
+ continue
2053
+ if any(symbol.file.lower().endswith(ext) for ext in _ORPHAN_EXCLUDED_EXTENSIONS):
2054
+ filtered_structural_count += 1
2055
+ continue
2056
+ candidates.append(symbol)
2057
+
2058
+ # Build orphan name set for struct/impl pairing heuristic
2059
+ orphan_names: set[str] = {s.name for s in candidates}
2060
+
2061
+ # Compute confidence for each candidate
2062
+ scored: list[dict] = []
2063
+ for symbol in candidates:
2064
+ conf = _orphan_confidence(symbol, orphan_names)
2065
+ scored.append({
2066
+ "symbol": symbol,
2067
+ "confidence": conf,
2068
+ "note": _orphan_note(symbol),
2069
+ })
2070
+
2071
+ scored.sort(key=lambda x: (-x["confidence"], x["symbol"].file, x["symbol"].line, x["symbol"].name))
2072
+
2073
+ # Filter by min_confidence
2074
+ if min_confidence > 0:
2075
+ scored = [s for s in scored if s["confidence"] >= min_confidence]
2076
+
2077
+ # Tier classification
2078
+ high = [s for s in scored if s["confidence"] >= 70]
2079
+ medium = [s for s in scored if 40 <= s["confidence"] < 70]
2080
+ low = [s for s in scored if s["confidence"] < 40]
2081
+
2082
+ if as_json:
2083
+ def _to_dict(item):
2084
+ sym = item["symbol"]
2085
+ return {
2086
+ "name": sym.name,
2087
+ "kind": sym.kind,
2088
+ "file": sym.file,
2089
+ "line": sym.line,
2090
+ "confidence": item["confidence"],
2091
+ "note": item["note"],
2092
+ "visibility": sym.visibility,
2093
+ }
2094
+
2095
+ payload = {
2096
+ "project_root": str(engine.project_root),
2097
+ "total_candidates": len(candidates),
2098
+ "filtered_structural": filtered_structural_count,
2099
+ "high_confidence": [_to_dict(s) for s in high],
2100
+ "medium_confidence": [_to_dict(s) for s in medium],
2101
+ "low_confidence": [_to_dict(s) for s in low],
2102
+ }
2103
+ print(json.dumps(payload, ensure_ascii=False, indent=2))
2104
+ return 0
2105
+
2106
+ # Text output
2107
+ lines = ["## 死代码分析\n"]
2108
+ lines.append(f"总计 {len(candidates)} 候选(已过滤 module/element/json_key 等 {filtered_structural_count} 个结构元素)")
2109
+ if min_confidence > 0:
2110
+ lines.append(f"置信度阈值: {min_confidence}(已过滤低置信项)")
2111
+ lines.append("")
2112
+
2113
+ def _module_for_file(file_path: str) -> str:
2114
+ parts = [p for p in PurePosixPath(file_path).parts if p not in ("", ".")]
2115
+ if not parts:
2116
+ return "(root)"
2117
+ if len(parts) == 1:
2118
+ return "(root)"
2119
+ if parts[0] in {"src", "app", "apps", "packages", "services", "modules", "libs", "lib", "crates"}:
2120
+ return "/".join(parts[:2]) if len(parts) > 1 else parts[0]
2121
+ return parts[0]
2122
+
2123
+ def _render_tier(title: str, emoji: str, items: list[dict], max_items: int):
2124
+ if not items:
2125
+ return []
2126
+ tier_lines = [f"### {emoji} {title} — {len(items)} 个"]
2127
+ # 按模块分组
2128
+ by_module: dict[str, list[dict]] = {}
2129
+ for item in items:
2130
+ mod = _module_for_file(item["symbol"].file)
2131
+ by_module.setdefault(mod, []).append(item)
2132
+ tier_lines.append("")
2133
+ for mod in sorted(by_module, key=lambda m: -len(by_module[m])):
2134
+ mod_items = by_module[mod][:max(3, max_items // max(len(by_module), 1))]
2135
+ tier_lines.append(f"**`{mod}/`** ({len(by_module[mod])} 个)")
2136
+ for item in mod_items:
2137
+ sym = item["symbol"]
2138
+ tier_lines.append(f"- `{sym.name}` ({sym.kind}) `{sym.file}:{sym.line}` — {item['confidence']}% | {item['note']}")
2139
+ if len(by_module[mod]) > len(mod_items):
2140
+ tier_lines.append(f" …还有 {len(by_module[mod]) - len(mod_items)} 个")
2141
+ tier_lines.append("")
2142
+ return tier_lines
2143
+
2144
+ lines.extend(_render_tier("高置信(建议审查)", "🔴", high, limit))
2145
+ lines.extend(_render_tier("中置信(需要确认)", "🟡", medium, limit))
2146
+ lines.extend(_render_tier("低置信(可能为活跃代码)", "🟢", low, limit))
2147
+
2148
+ # 如果过滤后无结果,给出建议
2149
+ if not high and not medium and not low:
2150
+ if min_confidence > 0:
2151
+ lines.append(f"\n> 使用 `--min-confidence {min_confidence}` 过滤后无结果。")
2152
+ lines.append(f"> 尝试降低置信度阈值,例如: `--min-confidence {max(0, min_confidence - 20)}`")
2153
+ else:
2154
+ lines.append("\n> 未发现死代码候选。")
2155
+ lines.append("> 这可能意味着项目代码质量良好,或者需要调整分析参数。")
2156
+ else:
2157
+ if low:
2158
+ lines.append("> 使用 `--min-confidence 40` 过滤低置信项。")
2159
+ lines.append("> 不能仅据此删除,需要额外代码/业务验证。使用 `--json` 获取完整结构化输出。")
2160
+ print("\n".join(lines))
2161
+ return 0
2162
+ except Exception as exc:
2163
+ print(f"[{CLI_NAME}] orphan failed: {exc}", file=sys.stderr)
2164
+ return 1
2165
+
2166
+
2167
+ def _sym_name(engine: RepoMapEngine, sid: str) -> str:
2168
+ sym = engine.graph.symbols.get(sid)
2169
+ return sym.name if sym else "?"
2170
+
2171
+
2172
+ def _format_symbol_ref(engine: RepoMapEngine, sid: str) -> dict[str, Any]:
2173
+ symbol = engine.graph.symbols[sid]
2174
+ return {"name": symbol.name, "file": symbol.file, "line": symbol.line}
2175
+
2176
+
2177
+ def run_lsp_doctor(project: str, as_json: bool = False) -> int:
2178
+ try:
2179
+ project_root = _resolve_project(project)
2180
+ from ..lsp import detect_lsp_servers, detection_to_dict
2181
+
2182
+ detections = detect_lsp_servers(project_root)
2183
+ payload = {
2184
+ "command": "lsp doctor",
2185
+ "project": project_root,
2186
+ "lspClient": "available",
2187
+ "bundledServers": [],
2188
+ "servers": [detection_to_dict(item) for item in detections],
2189
+ }
2190
+ if as_json:
2191
+ print(json.dumps(payload, ensure_ascii=False, indent=2))
2192
+ return 0
2193
+ lines = ["## LSP Doctor\n"]
2194
+ lines.append(f"Project: `{project_root}`")
2195
+ lines.append("LSP client: available")
2196
+ lines.append("Bundled LSP servers: none")
2197
+ if not detections:
2198
+ lines.append("\nNo supported source files detected.")
2199
+ else:
2200
+ lines.append("\n| Language | Server | Status | Source | Workspace |")
2201
+ lines.append("|---|---|---|---|---|")
2202
+ for item in detections:
2203
+ status = "available" if item.status == "available" else f"missing ({item.reason or 'not found'})"
2204
+ lines.append(
2205
+ f"| {item.language} | {item.server_name or '-'} | {status} | {item.source or '-'} | `{item.workspace_root or project_root}` |"
2206
+ )
2207
+ lines.append("\n> repomap checks project-local executables, PATH, and trusted user tool bins such as npm/pnpm/yarn/bun/pipx/uv/mason/cargo/go directories; it does not install or bundle servers.")
2208
+ print("\n".join(lines))
2209
+ return 0
2210
+ except Exception as exc:
2211
+ print(f"[{CLI_NAME}] lsp doctor failed: {exc}", file=sys.stderr)
2212
+ return 1
2213
+
2214
+
2215
+ def run_diagnostics(
2216
+ project: str,
2217
+ source: str,
2218
+ files: list[str],
2219
+ as_json: bool,
2220
+ lsp_timeout: float,
2221
+ lsp_max_files: int,
2222
+ ) -> int:
2223
+ try:
2224
+ project_root = _resolve_project(project)
2225
+ normalized_files = _normalize_project_relative_paths(project_root, files, must_exist=True)
2226
+ if source != "lsp":
2227
+ print(f"[{CLI_NAME}] unsupported diagnostics source: {source}", file=sys.stderr)
2228
+ return 2
2229
+ from ..lsp import collect_lsp_diagnostics, run_result_to_dict
2230
+
2231
+ runs = collect_lsp_diagnostics(project_root, normalized_files, timeout=lsp_timeout, max_files=lsp_max_files)
2232
+ payload = {
2233
+ "command": "diagnostics",
2234
+ "project": project_root,
2235
+ "source": source,
2236
+ "files": normalized_files,
2237
+ "runs": [run_result_to_dict(run) for run in runs],
2238
+ }
2239
+ total_errors = sum(1 for run in runs for item in run.diagnostics if item.severity == "error")
2240
+ total_warnings = sum(1 for run in runs for item in run.diagnostics if item.severity != "error")
2241
+ payload["summary"] = {
2242
+ "totalErrors": total_errors,
2243
+ "totalWarnings": total_warnings,
2244
+ "failedRuns": sum(1 for run in runs if run.status in {"failed", "timeout"}),
2245
+ "skippedRuns": sum(1 for run in runs if run.status == "skipped"),
2246
+ }
2247
+ if as_json:
2248
+ print(json.dumps(payload, ensure_ascii=False, indent=2))
2249
+ else:
2250
+ print(_format_lsp_diagnostics_report(payload))
2251
+ return 1 if total_errors or payload["summary"]["failedRuns"] else 0
2252
+ except ValueError as exc:
2253
+ print(f"[{CLI_NAME}] diagnostics failed: {exc}", file=sys.stderr)
2254
+ return 1
2255
+ except Exception as exc:
2256
+ print(f"[{CLI_NAME}] diagnostics failed: {exc}", file=sys.stderr)
2257
+ return 1
2258
+
2259
+
2260
+ def _format_lsp_diagnostics_report(payload: dict[str, Any]) -> str:
2261
+ lines = ["## LSP Diagnostics\n"]
2262
+ lines.append(f"Project: `{payload['project']}`")
2263
+ lines.append(f"Files: {len(payload.get('files', []))}")
2264
+ summary = payload.get("summary", {})
2265
+ lines.append(f"Errors: **{summary.get('totalErrors', 0)}** | Warnings: **{summary.get('totalWarnings', 0)}**")
2266
+ lines.append("")
2267
+ for run in payload.get("runs", []):
2268
+ status = run.get("status")
2269
+ lines.append(f"### {run.get('language')} / {run.get('server')} — {status}")
2270
+ if run.get("reason"):
2271
+ lines.append(f"- Reason: {run['reason']}")
2272
+ if run.get("workspaceRoot"):
2273
+ lines.append(f"- Workspace: `{run['workspaceRoot']}`")
2274
+ diagnostics = run.get("diagnostics", [])
2275
+ if diagnostics:
2276
+ for item in diagnostics[:20]:
2277
+ icon = {"error": "❌", "warning": "⚠️", "info": "ℹ️"}.get(item.get("severity"), "ℹ️")
2278
+ lines.append(f" {icon} `{item['file']}:{item['line']}:{item['col']}` [{item.get('code', '')}] {item.get('message', '')[:120]}")
2279
+ else:
2280
+ lines.append("- No diagnostics returned.")
2281
+ lines.append("")
2282
+ return "\n".join(lines)
2283
+
2284
+
2285
+ def run_check(
2286
+ project: str,
2287
+ types: list[str] | None,
2288
+ max_issues: int,
2289
+ since_commit: str | None,
2290
+ modified_files: list[str] | None,
2291
+ resolve_symbols: bool,
2292
+ with_lsp: bool = False,
2293
+ lsp_timeout: float = 8.0,
2294
+ lsp_max_files: int = 20,
2295
+ ) -> int:
2296
+ try:
2297
+ project_root = _resolve_project(project)
2298
+ normalized_modified_files = None
2299
+ if modified_files:
2300
+ try:
2301
+ normalized_modified_files = _normalize_project_relative_paths(project_root, modified_files, must_exist=False)
2302
+ except ValueError as exc:
2303
+ print(f"[{CLI_NAME}] check failed: unsafe modified file: {exc}", file=sys.stderr)
2304
+ return 1
2305
+ symbols_map = None
2306
+ if resolve_symbols:
2307
+ engine = _scan_engine(project_root, 8000)
2308
+ symbols_map = engine.graph.symbols
2309
+
2310
+ checker = RepoMapChecker(project_root, max_issues)
2311
+ result = checker.check(
2312
+ types=types,
2313
+ resolve_symbols=resolve_symbols and symbols_map is not None,
2314
+ symbols_map=symbols_map,
2315
+ since_commit=since_commit,
2316
+ modified_files=normalized_modified_files,
2317
+ with_lsp=with_lsp,
2318
+ lsp_timeout=lsp_timeout,
2319
+ lsp_max_files=lsp_max_files,
2320
+ )
2321
+ print(_format_check_report(result, max_issues))
2322
+ return 0 if result.get("status") in {"passed", "warning", "unknown"} else 1
2323
+ except Exception as exc:
2324
+ print(f"[{CLI_NAME}] check failed: {exc}", file=sys.stderr)
2325
+ return 1
2326
+
2327
+
2328
+ def _format_check_report(result: dict[str, Any], max_issues: int) -> str:
2329
+ lines = ["## 编译器/静态分析诊断\n"]
2330
+ lines.append(f"**项目**: `{result['project_root']}`")
2331
+ status_label = {
2332
+ "passed": "✅ 通过",
2333
+ "warning": "⚠️ 有警告",
2334
+ "unknown": "ℹ️ 未实际运行诊断工具" if result.get("message") else "ℹ️ 未检测到支持类型",
2335
+ }.get(result["status"], "❌ 有错误")
2336
+ lines.append(f"**状态**: {status_label}")
2337
+ if result.get("message"):
2338
+ lines.append(f"**说明**: {result['message']}")
2339
+ lines.append(f"**检测类型**: {', '.join(result.get('types', [])) or '自动检测'}")
2340
+ lines.append(f"**时间**: {result['timestamp']}\n")
2341
+
2342
+ summary = result.get("summary", {})
2343
+ lines.append("### 汇总")
2344
+ lines.append(f"- 错误总数: **{summary.get('total_errors', 0)}** 🔴")
2345
+ lines.append(f"- 警告总数: **{summary.get('total_warnings', 0)}** ⚠️")
2346
+ lines.append(f"- 涉及文件: {summary.get('files_with_errors', 0)}")
2347
+ lines.append(f"- 运行工具: {summary.get('tools_run', 0)} | 跳过: {summary.get('tools_skipped', 0)}")
2348
+ if summary.get("tool_failures", 0):
2349
+ lines.append(f"- 工具执行失败: **{summary.get('tool_failures', 0)}**")
2350
+ lines.append("")
2351
+
2352
+ runs = result.get("runs", [])
2353
+ if runs:
2354
+ lines.append("### 工具执行详情\n")
2355
+ for run in runs:
2356
+ status = "⏭️ 跳过" if run.get("skipped") else ("✅ 通过" if run["exit_code"] == 0 and run["error_count"] == 0 else "❌ 失败")
2357
+ lines.append(f"**{run['tool']}** {status} ({run['duration_ms']}ms)")
2358
+ if run.get("skipped"):
2359
+ lines.append(f" - 原因: {run.get('skip_reason', '未知')}")
2360
+ else:
2361
+ lines.append(f" - 命令: `{run['command']}`")
2362
+ if run.get("exit_code", 0) != 0:
2363
+ lines.append(f" - 退出码: {run['exit_code']}")
2364
+ if run.get("tool_failure_reason"):
2365
+ lines.append(f" - 原因: {run['tool_failure_reason']}")
2366
+ excerpt = run.get("raw_excerpt") or []
2367
+ if excerpt:
2368
+ lines.append(f" - 输出: {str(excerpt[0])[:120]}")
2369
+ if run["error_count"] > 0:
2370
+ lines.append(f" - 错误: **{run['error_count']}**")
2371
+ if run["warning_count"] > 0:
2372
+ lines.append(f" - 警告: {run['warning_count']}")
2373
+ if run.get("truncated"):
2374
+ lines.append(f" - ⚠️ 结果已截断,仅显示前 {max_issues} 条")
2375
+ lines.append("")
2376
+
2377
+ errors_by_file = result.get("errors_by_file", {})
2378
+ if errors_by_file:
2379
+ lines.append("### 按文件分组的问题 (Top 10)\n")
2380
+ for file_path, issues in list(errors_by_file.items())[:10]:
2381
+ error_count = sum(1 for issue in issues if issue["severity"] == "error")
2382
+ warning_count = sum(1 for issue in issues if issue["severity"] == "warning")
2383
+ info_count = sum(1 for issue in issues if issue["severity"] == "info")
2384
+ counts = []
2385
+ if error_count:
2386
+ counts.append(f"{error_count} 错误")
2387
+ if warning_count:
2388
+ counts.append(f"{warning_count} 警告")
2389
+ if info_count:
2390
+ counts.append(f"{info_count} 信息")
2391
+ lines.append(f"**{file_path}**: {', '.join(counts)}")
2392
+ for issue in issues[:3]:
2393
+ icon = {"error": "❌", "warning": "⚠️", "info": "ℹ️"}.get(issue["severity"], "❌")
2394
+ confidence_icon = {"exact": "🎯", "line": "📍", "none": ""}.get(issue.get("symbol_confidence", "none"), "")
2395
+ symbol_info = f" {confidence_icon}`{issue['symbol']}`" if issue.get("symbol") else ""
2396
+ lines.append(f" {icon} 行{issue['line']}{symbol_info}: [{issue['code']}] {issue['message'][:50]}")
2397
+ lines.append("")
2398
+
2399
+ return "\n".join(lines)
2400
+
2401
+
2402
+ def _module_origin(module_name: str) -> str:
2403
+ spec = importlib.util.find_spec(module_name)
2404
+ if spec is None:
2405
+ return "not found"
2406
+ return spec.origin or "built-in"
2407
+
2408
+
2409
+ def run_doctor(project: str | None = None) -> int:
2410
+ from ..parser import TreeSitterAdapter
2411
+
2412
+ # 如果提供了 --project 参数,显示提示(doctor 不使用它,但保持一致性)
2413
+ if project:
2414
+ print(f"Note: --project is accepted for consistency but not used by doctor command.", file=sys.stderr)
2415
+
2416
+ adapter = TreeSitterAdapter()
2417
+ parsers = sorted(adapter.parsers)
2418
+ pyinstaller_spec = importlib.util.find_spec("PyInstaller")
2419
+ if parsers:
2420
+ print(f"tree-sitter parsers: {', '.join(parsers)}")
2421
+ else:
2422
+ print("tree-sitter bindings are missing", file=sys.stderr)
2423
+ return 1
2424
+ if "tsx" not in adapter.parsers:
2425
+ print("TSX parser: unavailable", file=sys.stderr)
2426
+ return 1
2427
+ print(f"repomap_cli: {_module_origin('repomap_cli')}")
2428
+ print(f"repomap_parser: {_module_origin('repomap_parser')}")
2429
+ print(f"repomap_core: {_module_origin('repomap_core')}")
2430
+ print(f"tree_sitter: {_module_origin('tree_sitter')}")
2431
+ print(f"tree_sitter_typescript: {_module_origin('tree_sitter_typescript')}")
2432
+ print(f"PACKAGE_ROOT: {PACKAGE_ROOT}")
2433
+ print(f"PROJECT_ROOT: {PROJECT_ROOT}")
2434
+ print("LSP client: available")
2435
+ print("Bundled LSP servers: none")
2436
+ print("LSP server detection: run `repomap lsp doctor --project <path>`")
2437
+ if pyinstaller_spec is not None:
2438
+ print("PyInstaller: available")
2439
+ else:
2440
+ print("PyInstaller: not installed in current runtime, only required for build-binary")
2441
+ return 0
2442
+
2443
+
2444
+ def _pyinstaller_command(output_dir: Path, name: str) -> list[str]:
2445
+ build_root = output_dir / ".pyinstaller"
2446
+ command = [
2447
+ sys.executable,
2448
+ "-m",
2449
+ "PyInstaller",
2450
+ "--noconfirm",
2451
+ "--onefile",
2452
+ "--name",
2453
+ name,
2454
+ "--distpath",
2455
+ str(output_dir),
2456
+ "--workpath",
2457
+ str(build_root / "build"),
2458
+ "--specpath",
2459
+ str(build_root / "spec"),
2460
+ ]
2461
+ for module_name in PYINSTALLER_BINDINGS:
2462
+ command.extend(["--hidden-import", module_name])
2463
+ command.append(str(PACKAGE_ROOT / "__main__.py"))
2464
+ return command
2465
+
2466
+
2467
+ def run_build_binary(output: str, name: str) -> int:
2468
+ output_dir = Path(output).resolve()
2469
+ output_dir.mkdir(parents=True, exist_ok=True)
2470
+ result = subprocess.run(_pyinstaller_command(output_dir, name), cwd=str(PROJECT_ROOT), check=False)
2471
+ if result.returncode != 0:
2472
+ print(f"[{CLI_NAME}] build failed with exit code {result.returncode}", file=sys.stderr)
2473
+ return result.returncode or 1
2474
+ print(f"binary ready: {output_dir / name}")
2475
+ return 0