repomap-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- repomap/__init__.py +320 -0
- repomap/ai.py +1108 -0
- repomap/check.py +1212 -0
- repomap/cli/__init__.py +3 -0
- repomap/cli/__main__.py +12 -0
- repomap/cli/cli.py +2475 -0
- repomap/core.py +730 -0
- repomap/lsp.py +753 -0
- repomap/parser.py +1697 -0
- repomap/ranking.py +639 -0
- repomap/resolver.py +906 -0
- repomap/toolkit.py +850 -0
- repomap/topic.py +600 -0
- repomap_cli-1.0.0.dist-info/METADATA +284 -0
- repomap_cli-1.0.0.dist-info/RECORD +18 -0
- repomap_cli-1.0.0.dist-info/WHEEL +4 -0
- repomap_cli-1.0.0.dist-info/entry_points.txt +2 -0
- repomap_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
repomap/toolkit.py
ADDED
|
@@ -0,0 +1,850 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
RepoMap Toolkit - 轻量级代码分析工具
|
|
4
|
+
=====================================
|
|
5
|
+
功能:
|
|
6
|
+
1. 符号缓存持久化 (cache)
|
|
7
|
+
2. 变更检测 (diff)
|
|
8
|
+
3. Git 历史关联 (git)
|
|
9
|
+
4. 引用计数分析 (refs)
|
|
10
|
+
|
|
11
|
+
使用:
|
|
12
|
+
python repomap_toolkit.py cache --save --project /path/to/project
|
|
13
|
+
python repomap_toolkit.py diff --project /path/to/project
|
|
14
|
+
python repomap_toolkit.py git --symbol calculate_kpi --project /path/to/project
|
|
15
|
+
python repomap_toolkit.py refs --symbol calculate_kpi --project /path/to/project
|
|
16
|
+
python repomap_toolkit.py orphan --project /path/to/project
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import argparse
|
|
22
|
+
import json
|
|
23
|
+
import os
|
|
24
|
+
import subprocess
|
|
25
|
+
from dataclasses import asdict, dataclass
|
|
26
|
+
from datetime import datetime
|
|
27
|
+
from pathlib import Path
|
|
28
|
+
from typing import Any
|
|
29
|
+
|
|
30
|
+
import sys
|
|
31
|
+
sys.path.insert(0, str(Path(__file__).parent))
|
|
32
|
+
from .core import RepoMapEngine
|
|
33
|
+
from . import (
|
|
34
|
+
Edge,
|
|
35
|
+
FileCacheEntry,
|
|
36
|
+
IncrementalCache,
|
|
37
|
+
Symbol,
|
|
38
|
+
compare_graph_snapshots,
|
|
39
|
+
get_cache_paths,
|
|
40
|
+
get_incremental_cache_path,
|
|
41
|
+
serialize_edge,
|
|
42
|
+
serialize_symbol,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
47
|
+
# 数据模型
|
|
48
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
49
|
+
|
|
50
|
+
CACHE_SCHEMA_VERSION = 1
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass
|
|
54
|
+
class SymbolCache:
|
|
55
|
+
"""符号缓存数据结构"""
|
|
56
|
+
symbols: list[dict]
|
|
57
|
+
edges: list[dict]
|
|
58
|
+
scan_time: str
|
|
59
|
+
project_path: str
|
|
60
|
+
file_count: int
|
|
61
|
+
symbol_count: int
|
|
62
|
+
edge_count: int
|
|
63
|
+
_schema_version: int = CACHE_SCHEMA_VERSION
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class GitSymbolInfo:
|
|
68
|
+
"""符号的 Git 历史信息"""
|
|
69
|
+
symbol_id: str
|
|
70
|
+
first_seen: str
|
|
71
|
+
last_modified: str
|
|
72
|
+
commit_count: int
|
|
73
|
+
authors: list[str]
|
|
74
|
+
recent_commits: list[dict]
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dataclass
|
|
78
|
+
class RefCountInfo:
|
|
79
|
+
"""引用计数信息"""
|
|
80
|
+
symbol_id: str
|
|
81
|
+
called_by: list[str] # 被谁调用
|
|
82
|
+
calls: list[str] # 调用谁
|
|
83
|
+
ref_count: int # 被引用次数
|
|
84
|
+
is_entry: bool # 是否是入口(不被任何人调用)
|
|
85
|
+
is_leaf: bool # 是否是叶子(不调用任何人)
|
|
86
|
+
is_orphan: bool # 是否是孤儿(不被调用也不调用别人,且非入口)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
90
|
+
# 核心功能:扫描与缓存
|
|
91
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
92
|
+
|
|
93
|
+
def scan_project(project_path: str, max_files: int = 5000) -> tuple[list[Symbol], list[Edge]]:
|
|
94
|
+
"""扫描项目,返回符号和边"""
|
|
95
|
+
engine = RepoMapEngine(project_path)
|
|
96
|
+
engine.scan(max_files=max_files)
|
|
97
|
+
|
|
98
|
+
# 从 graph 中提取所有 symbols 和 edges
|
|
99
|
+
symbols = list(engine.graph.symbols.values())
|
|
100
|
+
|
|
101
|
+
# edges 存储在 outgoing/incoming 中,需要去重收集
|
|
102
|
+
edges = []
|
|
103
|
+
seen_edges = set()
|
|
104
|
+
for src_id, edge_list in engine.graph.outgoing.items():
|
|
105
|
+
for edge in edge_list:
|
|
106
|
+
edge_key = (src_id, edge.target, edge.kind)
|
|
107
|
+
if edge_key not in seen_edges:
|
|
108
|
+
seen_edges.add(edge_key)
|
|
109
|
+
edges.append(edge)
|
|
110
|
+
|
|
111
|
+
return symbols, edges
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def save_cache(project_path: str, symbols: list[Symbol], edges: list[Edge]) -> Path:
|
|
115
|
+
"""保存扫描结果到缓存(原子写入,崩溃安全)"""
|
|
116
|
+
import os
|
|
117
|
+
import tempfile
|
|
118
|
+
|
|
119
|
+
cache_file, _, last_file = get_cache_paths(project_path)
|
|
120
|
+
cache_dir = cache_file.parent
|
|
121
|
+
|
|
122
|
+
# 如果已有缓存,先备份到 last_snapshot
|
|
123
|
+
if cache_file.exists():
|
|
124
|
+
import shutil
|
|
125
|
+
shutil.copy2(cache_file, last_file)
|
|
126
|
+
|
|
127
|
+
cache = SymbolCache(
|
|
128
|
+
symbols=sorted(
|
|
129
|
+
[serialize_symbol(s) for s in symbols],
|
|
130
|
+
key=lambda row: (row['file'], row['line'], row.get('end_line', row['line']), row['name'], row['kind']),
|
|
131
|
+
),
|
|
132
|
+
edges=sorted(
|
|
133
|
+
[serialize_edge(e) for e in edges],
|
|
134
|
+
key=lambda row: (row['source'], row['target'], row['kind']),
|
|
135
|
+
),
|
|
136
|
+
scan_time=datetime.now().isoformat(),
|
|
137
|
+
project_path=project_path,
|
|
138
|
+
file_count=len(set(s.file for s in symbols)),
|
|
139
|
+
symbol_count=len(symbols),
|
|
140
|
+
edge_count=len(edges)
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# 原子写入:先写入临时文件,再原子替换
|
|
144
|
+
try:
|
|
145
|
+
with tempfile.NamedTemporaryFile(
|
|
146
|
+
mode='w',
|
|
147
|
+
encoding='utf-8',
|
|
148
|
+
dir=cache_dir,
|
|
149
|
+
prefix='.tmp_cache_',
|
|
150
|
+
suffix='.json',
|
|
151
|
+
delete=False
|
|
152
|
+
) as f:
|
|
153
|
+
temp_path = f.name
|
|
154
|
+
json.dump(asdict(cache), f, indent=2, ensure_ascii=False)
|
|
155
|
+
# 原子替换(Windows 和 Linux 都支持)
|
|
156
|
+
os.replace(temp_path, cache_file)
|
|
157
|
+
except Exception:
|
|
158
|
+
# 清理临时文件(如果存在)
|
|
159
|
+
if 'temp_path' in locals() and os.path.exists(temp_path):
|
|
160
|
+
os.unlink(temp_path)
|
|
161
|
+
raise
|
|
162
|
+
|
|
163
|
+
return cache_file
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def load_cache(project_path: str) -> SymbolCache | None:
|
|
167
|
+
"""从缓存加载扫描结果"""
|
|
168
|
+
import os
|
|
169
|
+
|
|
170
|
+
cache_file, _, _ = get_cache_paths(project_path)
|
|
171
|
+
|
|
172
|
+
if not cache_file.exists():
|
|
173
|
+
return None
|
|
174
|
+
|
|
175
|
+
try:
|
|
176
|
+
with open(cache_file, 'r', encoding='utf-8') as f:
|
|
177
|
+
data = json.load(f)
|
|
178
|
+
# Schema version check - 不匹配时删除旧缓存,触发重建
|
|
179
|
+
if data.get("_schema_version") != CACHE_SCHEMA_VERSION:
|
|
180
|
+
print(f"[repomap] Cache schema 版本不匹配 (缓存: v{data.get('_schema_version')}, 当前: v{CACHE_SCHEMA_VERSION}),自动清理旧缓存并重新扫描", file=sys.stderr)
|
|
181
|
+
try:
|
|
182
|
+
os.unlink(cache_file)
|
|
183
|
+
except OSError:
|
|
184
|
+
pass
|
|
185
|
+
return None
|
|
186
|
+
return SymbolCache(**data)
|
|
187
|
+
except json.JSONDecodeError:
|
|
188
|
+
# 缓存文件损坏
|
|
189
|
+
print(f"[repomap] 缓存文件损坏 ({cache_file}),自动清理并重新扫描", file=sys.stderr)
|
|
190
|
+
try:
|
|
191
|
+
os.unlink(cache_file)
|
|
192
|
+
except OSError:
|
|
193
|
+
pass
|
|
194
|
+
return None
|
|
195
|
+
except Exception:
|
|
196
|
+
return None
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def load_last_snapshot(project_path: str) -> SymbolCache | None:
|
|
200
|
+
"""加载上次快照(用于 diff)"""
|
|
201
|
+
_, _, last_file = get_cache_paths(project_path)
|
|
202
|
+
|
|
203
|
+
if not last_file.exists():
|
|
204
|
+
return None
|
|
205
|
+
|
|
206
|
+
try:
|
|
207
|
+
with open(last_file, 'r', encoding='utf-8') as f:
|
|
208
|
+
data = json.load(f)
|
|
209
|
+
return SymbolCache(**data)
|
|
210
|
+
except Exception:
|
|
211
|
+
return None
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def save_incremental_cache(project_path: str, engine: RepoMapEngine) -> Path:
|
|
215
|
+
"""保存增量扫描基线——存储每个文件的解析结果以支持后续增量扫描。
|
|
216
|
+
|
|
217
|
+
在全量扫描完成后调用,建立基线快照。
|
|
218
|
+
"""
|
|
219
|
+
cache_path = get_incremental_cache_path(project_path)
|
|
220
|
+
cache_dir = cache_path.parent
|
|
221
|
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
222
|
+
|
|
223
|
+
git_head = ""
|
|
224
|
+
try:
|
|
225
|
+
result = subprocess.run(
|
|
226
|
+
["git", "rev-parse", "HEAD"],
|
|
227
|
+
cwd=project_path, capture_output=True, text=True, timeout=5,
|
|
228
|
+
)
|
|
229
|
+
if result.returncode == 0:
|
|
230
|
+
git_head = result.stdout.strip()
|
|
231
|
+
except Exception:
|
|
232
|
+
pass
|
|
233
|
+
|
|
234
|
+
files: dict[str, dict] = {}
|
|
235
|
+
for file_path in engine.graph.file_symbols:
|
|
236
|
+
full = engine.project_root / file_path
|
|
237
|
+
mtime = full.stat().st_mtime if full.exists() else 0.0
|
|
238
|
+
files[file_path] = {
|
|
239
|
+
"mtime": mtime,
|
|
240
|
+
"symbols_json": [serialize_symbol(engine.graph.symbols[sid]) for sid in engine.graph.file_symbols[file_path] if sid in engine.graph.symbols],
|
|
241
|
+
"imports": engine.graph.file_imports.get(file_path, []),
|
|
242
|
+
"import_bindings_json": [{"local_name": b.local_name, "imported_name": b.imported_name, "module": b.module, "line": b.line, "kind": b.kind} for b in engine.graph.file_import_bindings.get(file_path, [])],
|
|
243
|
+
"exports_json": [{"exported_name": b.exported_name, "source_name": b.source_name, "module": b.module, "line": b.line, "kind": b.kind} for b in engine.graph.file_exports.get(file_path, [])],
|
|
244
|
+
"calls_json": [{"name": c[0], "line": c[1], "kind": c[2]} if len(c) >= 3 else {"name": c[0], "line": c[1], "kind": "direct"} for c in engine.graph.file_calls.get(file_path, [])],
|
|
245
|
+
"routes_json": [{"method": r.method, "path": r.path, "handler": r.handler, "file": r.file, "line": r.line, "framework": r.framework} for r in engine.routes if r.file == file_path],
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
cache = IncrementalCache(
|
|
249
|
+
project_root_hash=str(hash(str(engine.project_root))),
|
|
250
|
+
git_head=git_head,
|
|
251
|
+
files={fp: FileCacheEntry(**data) for fp, data in files.items()},
|
|
252
|
+
scan_stats_json={
|
|
253
|
+
"processed_files": engine.scan_stats.processed_files,
|
|
254
|
+
"total_symbols": len(engine.graph.symbols),
|
|
255
|
+
"total_edges": sum(len(v) for v in engine.graph.outgoing.values()),
|
|
256
|
+
},
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
import tempfile
|
|
260
|
+
try:
|
|
261
|
+
with tempfile.NamedTemporaryFile(
|
|
262
|
+
mode='w', encoding='utf-8', dir=cache_dir,
|
|
263
|
+
prefix='.tmp_inc_', suffix='.json', delete=False,
|
|
264
|
+
) as f:
|
|
265
|
+
temp_path = f.name
|
|
266
|
+
json.dump(_inc_cache_to_dict(cache), f, indent=2, ensure_ascii=False)
|
|
267
|
+
os.replace(temp_path, cache_path)
|
|
268
|
+
except Exception:
|
|
269
|
+
if 'temp_path' in locals() and os.path.exists(temp_path):
|
|
270
|
+
os.unlink(temp_path)
|
|
271
|
+
raise
|
|
272
|
+
|
|
273
|
+
return cache_path
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def load_incremental_cache(project_path: str) -> IncrementalCache | None:
|
|
277
|
+
"""加载增量扫描基线。返回 None 表示基线不存在或已失效。"""
|
|
278
|
+
cache_path = get_incremental_cache_path(project_path)
|
|
279
|
+
if not cache_path.exists():
|
|
280
|
+
return None
|
|
281
|
+
try:
|
|
282
|
+
with open(cache_path, 'r', encoding='utf-8') as f:
|
|
283
|
+
data = json.load(f)
|
|
284
|
+
files = {}
|
|
285
|
+
for fp, entry_data in data.get("files", {}).items():
|
|
286
|
+
files[fp] = FileCacheEntry(
|
|
287
|
+
mtime=entry_data.get("mtime", 0.0),
|
|
288
|
+
symbols_json=entry_data.get("symbols_json", []),
|
|
289
|
+
imports=entry_data.get("imports", []),
|
|
290
|
+
import_bindings_json=entry_data.get("import_bindings_json", []),
|
|
291
|
+
exports_json=entry_data.get("exports_json", []),
|
|
292
|
+
calls_json=entry_data.get("calls_json", []),
|
|
293
|
+
routes_json=entry_data.get("routes_json", []),
|
|
294
|
+
)
|
|
295
|
+
return IncrementalCache(
|
|
296
|
+
project_root_hash=data.get("project_root_hash", ""),
|
|
297
|
+
git_head=data.get("git_head", ""),
|
|
298
|
+
files=files,
|
|
299
|
+
scan_stats_json=data.get("scan_stats_json", {}),
|
|
300
|
+
)
|
|
301
|
+
except (json.JSONDecodeError, KeyError):
|
|
302
|
+
return None
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def _inc_cache_to_dict(cache: IncrementalCache) -> dict:
|
|
306
|
+
return {
|
|
307
|
+
"project_root_hash": cache.project_root_hash,
|
|
308
|
+
"git_head": cache.git_head,
|
|
309
|
+
"files": {
|
|
310
|
+
fp: {
|
|
311
|
+
"mtime": entry.mtime,
|
|
312
|
+
"symbols_json": entry.symbols_json,
|
|
313
|
+
"imports": entry.imports,
|
|
314
|
+
"import_bindings_json": entry.import_bindings_json,
|
|
315
|
+
"exports_json": entry.exports_json,
|
|
316
|
+
"calls_json": entry.calls_json,
|
|
317
|
+
"routes_json": entry.routes_json,
|
|
318
|
+
}
|
|
319
|
+
for fp, entry in cache.files.items()
|
|
320
|
+
},
|
|
321
|
+
"scan_stats_json": cache.scan_stats_json,
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def _symbol_to_dict(s: Symbol) -> dict:
|
|
326
|
+
"""Symbol 转 dict(兼容 dataclass)"""
|
|
327
|
+
if hasattr(s, '__dataclass_fields__'):
|
|
328
|
+
return asdict(s)
|
|
329
|
+
return {
|
|
330
|
+
'id': s.id,
|
|
331
|
+
'name': s.name,
|
|
332
|
+
'kind': s.kind,
|
|
333
|
+
'file': s.file,
|
|
334
|
+
'line': s.line,
|
|
335
|
+
'col': s.col,
|
|
336
|
+
'visibility': s.visibility,
|
|
337
|
+
'signature': getattr(s, 'signature', ''),
|
|
338
|
+
'docstring': getattr(s, 'docstring', ''),
|
|
339
|
+
'pagerank': getattr(s, 'pagerank', 0.0),
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def _edge_to_dict(e: Edge) -> dict:
|
|
344
|
+
"""Edge 转 dict"""
|
|
345
|
+
if hasattr(e, '__dataclass_fields__'):
|
|
346
|
+
return asdict(e)
|
|
347
|
+
return {
|
|
348
|
+
'source': getattr(e, 'source', None),
|
|
349
|
+
'target': getattr(e, 'target', None),
|
|
350
|
+
'weight': getattr(e, 'weight', 0.0),
|
|
351
|
+
'kind': getattr(e, 'kind', 'call'),
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
356
|
+
# 功能 1: 变更检测 (Diff)
|
|
357
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
358
|
+
|
|
359
|
+
def diff_project(project_path: str) -> dict:
|
|
360
|
+
"""对比上次缓存与当前状态"""
|
|
361
|
+
current_symbols, current_edges = scan_project(project_path)
|
|
362
|
+
last = load_cache(project_path)
|
|
363
|
+
|
|
364
|
+
if last is None:
|
|
365
|
+
return {"error": "没有缓存,请先运行 cache --save"}
|
|
366
|
+
comparison = compare_graph_snapshots(
|
|
367
|
+
current_symbols=current_symbols,
|
|
368
|
+
current_edges=current_edges,
|
|
369
|
+
previous_symbols=last.symbols,
|
|
370
|
+
previous_edges=last.edges,
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
return {
|
|
374
|
+
'scan_time': datetime.now().isoformat(),
|
|
375
|
+
'last_scan': last.scan_time,
|
|
376
|
+
**comparison,
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def _symbol_info(sid: str, symbol_map: dict) -> dict:
|
|
381
|
+
"""获取符号简要信息"""
|
|
382
|
+
s = symbol_map.get(sid)
|
|
383
|
+
if s:
|
|
384
|
+
return {
|
|
385
|
+
'id': sid,
|
|
386
|
+
'name': s.name,
|
|
387
|
+
'kind': s.kind,
|
|
388
|
+
'file': s.file,
|
|
389
|
+
'line': s.line,
|
|
390
|
+
}
|
|
391
|
+
return {'id': sid}
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
395
|
+
# 功能 2: Git 历史关联
|
|
396
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
397
|
+
|
|
398
|
+
def get_symbol_git_history(project_path: str, symbol_name: str) -> dict | None:
|
|
399
|
+
"""获取符号的 Git 历史信息"""
|
|
400
|
+
# 先找到符号所在的文件和行号
|
|
401
|
+
cache = load_cache(project_path)
|
|
402
|
+
if not cache:
|
|
403
|
+
return None
|
|
404
|
+
|
|
405
|
+
# 查找匹配的符号
|
|
406
|
+
matches = [s for s in cache.symbols if symbol_name in s['name']]
|
|
407
|
+
if not matches:
|
|
408
|
+
return None
|
|
409
|
+
|
|
410
|
+
symbol = matches[0]
|
|
411
|
+
file_path = symbol['file']
|
|
412
|
+
line = symbol['line']
|
|
413
|
+
|
|
414
|
+
# Git blame 获取最近修改
|
|
415
|
+
full_path = Path(project_path) / file_path
|
|
416
|
+
if not full_path.exists():
|
|
417
|
+
return None
|
|
418
|
+
|
|
419
|
+
try:
|
|
420
|
+
# 获取该行的 blame 信息
|
|
421
|
+
result = subprocess.run(
|
|
422
|
+
['git', 'blame', '-L', f'{line},{line}', '-p', str(file_path)],
|
|
423
|
+
cwd=project_path,
|
|
424
|
+
capture_output=True,
|
|
425
|
+
text=True,
|
|
426
|
+
timeout=10
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
if result.returncode != 0:
|
|
430
|
+
return None
|
|
431
|
+
|
|
432
|
+
blame_output = result.stdout
|
|
433
|
+
|
|
434
|
+
# 解析 blame 输出
|
|
435
|
+
commit_hash = blame_output.split()[0] if blame_output else 'unknown'
|
|
436
|
+
|
|
437
|
+
# 获取 commit 详情
|
|
438
|
+
commit_info = subprocess.run(
|
|
439
|
+
['git', 'log', '-1', '--format=%H|%an|%ae|%ad|%s', commit_hash],
|
|
440
|
+
cwd=project_path,
|
|
441
|
+
capture_output=True,
|
|
442
|
+
text=True,
|
|
443
|
+
timeout=5
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
# 获取该符号相关的所有 commits
|
|
447
|
+
symbol_commits = subprocess.run(
|
|
448
|
+
['git', 'log', '--follow', '-20', '--format=%H|%an|%ad|%s', '--', str(file_path)],
|
|
449
|
+
cwd=project_path,
|
|
450
|
+
capture_output=True,
|
|
451
|
+
text=True,
|
|
452
|
+
timeout=10
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
recent_commits = []
|
|
456
|
+
if symbol_commits.returncode == 0:
|
|
457
|
+
for line in symbol_commits.stdout.strip().split('\n'):
|
|
458
|
+
if '|' in line:
|
|
459
|
+
parts = line.split('|', 3)
|
|
460
|
+
if len(parts) >= 4:
|
|
461
|
+
recent_commits.append({
|
|
462
|
+
'hash': parts[0][:8],
|
|
463
|
+
'author': parts[1],
|
|
464
|
+
'date': parts[2],
|
|
465
|
+
'message': parts[3],
|
|
466
|
+
})
|
|
467
|
+
|
|
468
|
+
# 统计该文件的作者
|
|
469
|
+
authors_result = subprocess.run(
|
|
470
|
+
['git', 'shortlog', '-sn', '--', str(file_path)],
|
|
471
|
+
cwd=project_path,
|
|
472
|
+
capture_output=True,
|
|
473
|
+
text=True,
|
|
474
|
+
timeout=5
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
authors = []
|
|
478
|
+
if authors_result.returncode == 0:
|
|
479
|
+
for line in authors_result.stdout.strip().split('\n'):
|
|
480
|
+
parts = line.strip().split('\t', 1)
|
|
481
|
+
if len(parts) == 2:
|
|
482
|
+
authors.append(parts[1])
|
|
483
|
+
|
|
484
|
+
return {
|
|
485
|
+
'symbol': symbol['name'],
|
|
486
|
+
'file': file_path,
|
|
487
|
+
'line': line,
|
|
488
|
+
'current_commit': commit_hash[:8] if len(commit_hash) > 8 else commit_hash,
|
|
489
|
+
'authors': authors[:5],
|
|
490
|
+
'recent_commits': recent_commits[:10],
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
except subprocess.TimeoutExpired:
|
|
494
|
+
return {'error': 'Git 操作超时'}
|
|
495
|
+
except Exception as e:
|
|
496
|
+
return {'error': str(e)}
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
def get_hot_symbols(project_path: str, days: int = 30) -> list[dict]:
|
|
500
|
+
"""获取最近修改频繁的文件/符号"""
|
|
501
|
+
try:
|
|
502
|
+
result = subprocess.run(
|
|
503
|
+
['git', 'diff', '--name-only', f'HEAD@{{{days}.days ago}}', 'HEAD'],
|
|
504
|
+
cwd=project_path,
|
|
505
|
+
capture_output=True,
|
|
506
|
+
text=True,
|
|
507
|
+
timeout=10
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
if result.returncode != 0:
|
|
511
|
+
return []
|
|
512
|
+
|
|
513
|
+
changed_files = result.stdout.strip().split('\n')
|
|
514
|
+
|
|
515
|
+
cache = load_cache(project_path)
|
|
516
|
+
if not cache:
|
|
517
|
+
return []
|
|
518
|
+
|
|
519
|
+
# 统计每个文件的符号数
|
|
520
|
+
file_symbols = {}
|
|
521
|
+
for s in cache.symbols:
|
|
522
|
+
f = s['file']
|
|
523
|
+
if f not in file_symbols:
|
|
524
|
+
file_symbols[f] = []
|
|
525
|
+
file_symbols[f].append(s['name'])
|
|
526
|
+
|
|
527
|
+
# 找出变更文件中的符号
|
|
528
|
+
hot_symbols = []
|
|
529
|
+
for f in changed_files:
|
|
530
|
+
if f in file_symbols:
|
|
531
|
+
hot_symbols.append({
|
|
532
|
+
'file': f,
|
|
533
|
+
'symbols': file_symbols[f][:10],
|
|
534
|
+
'symbol_count': len(file_symbols[f]),
|
|
535
|
+
})
|
|
536
|
+
|
|
537
|
+
return sorted(hot_symbols, key=lambda x: x['symbol_count'], reverse=True)[:10]
|
|
538
|
+
|
|
539
|
+
except Exception:
|
|
540
|
+
return []
|
|
541
|
+
|
|
542
|
+
|
|
543
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
544
|
+
# 功能 3: 引用计数分析
|
|
545
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
546
|
+
|
|
547
|
+
def analyze_refs(project_path: str, symbol_name: str | None = None) -> dict:
|
|
548
|
+
"""分析符号引用关系"""
|
|
549
|
+
cache = load_cache(project_path)
|
|
550
|
+
if not cache:
|
|
551
|
+
return {'error': '没有缓存,请先运行 cache --save'}
|
|
552
|
+
|
|
553
|
+
# 构建调用图
|
|
554
|
+
symbol_ids = {s['id'] for s in cache.symbols}
|
|
555
|
+
|
|
556
|
+
# from_id -> [to_id] (这个符号调用了谁)
|
|
557
|
+
calls_out: dict[str, set] = {s: set() for s in symbol_ids}
|
|
558
|
+
# to_id -> [from_id] (谁调用了这个符号)
|
|
559
|
+
calls_in: dict[str, set] = {s: set() for s in symbol_ids}
|
|
560
|
+
|
|
561
|
+
for e in cache.edges:
|
|
562
|
+
if e.get('kind', 'call') != 'call':
|
|
563
|
+
continue
|
|
564
|
+
from_id = e.get('source', e.get('from_id'))
|
|
565
|
+
to_id = e.get('target', e.get('to_id'))
|
|
566
|
+
if from_id and to_id:
|
|
567
|
+
if from_id in calls_out:
|
|
568
|
+
calls_out[from_id].add(to_id)
|
|
569
|
+
if to_id in calls_in:
|
|
570
|
+
calls_in[to_id].add(from_id)
|
|
571
|
+
|
|
572
|
+
symbol_map = {s['id']: s for s in cache.symbols}
|
|
573
|
+
|
|
574
|
+
if symbol_name:
|
|
575
|
+
# 查找特定符号
|
|
576
|
+
matches = [sid for sid in symbol_ids if symbol_name in sid]
|
|
577
|
+
if not matches:
|
|
578
|
+
return {'error': f'未找到符号: {symbol_name}'}
|
|
579
|
+
|
|
580
|
+
sid = matches[0]
|
|
581
|
+
s = symbol_map[sid]
|
|
582
|
+
|
|
583
|
+
return {
|
|
584
|
+
'symbol': s['name'],
|
|
585
|
+
'id': sid,
|
|
586
|
+
'called_by': [
|
|
587
|
+
_format_ref(cid, symbol_map)
|
|
588
|
+
for cid in sorted(
|
|
589
|
+
calls_in[sid],
|
|
590
|
+
key=lambda item: (symbol_map[item]['file'], symbol_map[item]['line'], symbol_map[item]['name']),
|
|
591
|
+
)[:20]
|
|
592
|
+
],
|
|
593
|
+
'calls': [
|
|
594
|
+
_format_ref(cid, symbol_map)
|
|
595
|
+
for cid in sorted(
|
|
596
|
+
calls_out[sid],
|
|
597
|
+
key=lambda item: (symbol_map[item]['file'], symbol_map[item]['line'], symbol_map[item]['name']),
|
|
598
|
+
)[:20]
|
|
599
|
+
],
|
|
600
|
+
'ref_count': len(calls_in[sid]),
|
|
601
|
+
'is_entry': len(calls_in[sid]) == 0,
|
|
602
|
+
'is_leaf': len(calls_out[sid]) == 0,
|
|
603
|
+
}
|
|
604
|
+
else:
|
|
605
|
+
# 全局分析
|
|
606
|
+
results = []
|
|
607
|
+
for sid in symbol_ids:
|
|
608
|
+
ref_count = len(calls_in[sid])
|
|
609
|
+
calls_out_count = len(calls_out[sid])
|
|
610
|
+
is_entry = ref_count == 0
|
|
611
|
+
is_leaf = calls_out_count == 0
|
|
612
|
+
is_orphan = is_entry and is_leaf and not _is_public_entry(symbol_map[sid])
|
|
613
|
+
|
|
614
|
+
results.append({
|
|
615
|
+
'id': sid,
|
|
616
|
+
'name': symbol_map[sid]['name'],
|
|
617
|
+
'file': symbol_map[sid]['file'],
|
|
618
|
+
'ref_count': ref_count,
|
|
619
|
+
'calls_count': calls_out_count,
|
|
620
|
+
'is_entry': is_entry,
|
|
621
|
+
'is_leaf': is_leaf,
|
|
622
|
+
'is_orphan': is_orphan,
|
|
623
|
+
})
|
|
624
|
+
|
|
625
|
+
return {
|
|
626
|
+
'total_symbols': len(results),
|
|
627
|
+
'entry_points': [r for r in results if r['is_entry']],
|
|
628
|
+
'leaf_functions': sorted([r for r in results if r['is_leaf']],
|
|
629
|
+
key=lambda x: x['ref_count'], reverse=True)[:20],
|
|
630
|
+
'orphaned_symbols': [r for r in results if r['is_orphan']],
|
|
631
|
+
'most_referenced': sorted(results, key=lambda x: x['ref_count'], reverse=True)[:20],
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
def _format_ref(sid: str, symbol_map: dict) -> dict:
|
|
636
|
+
"""格式化引用信息"""
|
|
637
|
+
s = symbol_map.get(sid, {})
|
|
638
|
+
return {
|
|
639
|
+
'name': s.get('name', sid),
|
|
640
|
+
'file': s.get('file', 'unknown'),
|
|
641
|
+
'line': s.get('line', 0),
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
def _is_public_entry(symbol: dict) -> bool:
|
|
646
|
+
"""判断是否是公开入口(如 main, handler 等)"""
|
|
647
|
+
name = symbol.get('name', '')
|
|
648
|
+
visibility = symbol.get('visibility', '')
|
|
649
|
+
kind = symbol.get('kind', '')
|
|
650
|
+
|
|
651
|
+
# 只保留有真实静态证据的入口豁免,避免用命名猜测掩盖死代码
|
|
652
|
+
if name in {'main', '__main__'}:
|
|
653
|
+
return True
|
|
654
|
+
if kind == 'handler' or visibility == 'exported':
|
|
655
|
+
return True
|
|
656
|
+
return False
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
def find_orphans(project_path: str) -> list[dict]:
|
|
660
|
+
"""查找死代码(孤儿符号)"""
|
|
661
|
+
result = analyze_refs(project_path)
|
|
662
|
+
if 'error' in result:
|
|
663
|
+
return []
|
|
664
|
+
return result.get('orphaned_symbols', [])
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
668
|
+
# CLI 入口
|
|
669
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
670
|
+
|
|
671
|
+
def main():
|
|
672
|
+
parser = argparse.ArgumentParser(
|
|
673
|
+
description='RepoMap Toolkit - 轻量级代码分析工具',
|
|
674
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
675
|
+
epilog="""
|
|
676
|
+
示例:
|
|
677
|
+
%(prog)s cache --save --project ./my-project
|
|
678
|
+
%(prog)s diff --project ./my-project
|
|
679
|
+
%(prog)s git --symbol calculate_kpi --project ./my-project
|
|
680
|
+
%(prog)s refs --symbol calculate_kpi --project ./my-project
|
|
681
|
+
%(prog)s orphan --project ./my-project
|
|
682
|
+
"""
|
|
683
|
+
)
|
|
684
|
+
|
|
685
|
+
subparsers = parser.add_subparsers(dest='command', help='可用命令')
|
|
686
|
+
|
|
687
|
+
# cache 命令
|
|
688
|
+
cache_parser = subparsers.add_parser('cache', help='缓存管理')
|
|
689
|
+
cache_parser.add_argument('--save', action='store_true', help='保存当前扫描到缓存')
|
|
690
|
+
cache_parser.add_argument('--load', action='store_true', help='从缓存加载并显示')
|
|
691
|
+
cache_parser.add_argument('--project', '-p', default='.', help='项目路径')
|
|
692
|
+
|
|
693
|
+
# diff 命令
|
|
694
|
+
diff_parser = subparsers.add_parser('diff', help='变更检测')
|
|
695
|
+
diff_parser.add_argument('--project', '-p', default='.', help='项目路径')
|
|
696
|
+
diff_parser.add_argument('--json', action='store_true', help='输出 JSON 格式')
|
|
697
|
+
|
|
698
|
+
# git 命令
|
|
699
|
+
git_parser = subparsers.add_parser('git', help='Git 历史关联')
|
|
700
|
+
git_parser.add_argument('--symbol', '-s', required=True, help='符号名称')
|
|
701
|
+
git_parser.add_argument('--hot', action='store_true', help='显示热点文件')
|
|
702
|
+
git_parser.add_argument('--days', '-d', type=int, default=30, help='统计天数')
|
|
703
|
+
git_parser.add_argument('--project', '-p', default='.', help='项目路径')
|
|
704
|
+
|
|
705
|
+
# refs 命令
|
|
706
|
+
refs_parser = subparsers.add_parser('refs', help='引用计数分析')
|
|
707
|
+
refs_parser.add_argument('--symbol', '-s', help='特定符号名称(可选)')
|
|
708
|
+
refs_parser.add_argument('--project', '-p', default='.', help='项目路径')
|
|
709
|
+
refs_parser.add_argument('--json', action='store_true', help='输出 JSON 格式')
|
|
710
|
+
|
|
711
|
+
# orphan 命令
|
|
712
|
+
orphan_parser = subparsers.add_parser('orphan', help='查找死代码')
|
|
713
|
+
orphan_parser.add_argument('--project', '-p', default='.', help='项目路径')
|
|
714
|
+
|
|
715
|
+
args = parser.parse_args()
|
|
716
|
+
|
|
717
|
+
if not args.command:
|
|
718
|
+
parser.print_help()
|
|
719
|
+
return
|
|
720
|
+
|
|
721
|
+
project_path = os.path.abspath(args.project)
|
|
722
|
+
|
|
723
|
+
if args.command == 'cache':
|
|
724
|
+
if args.save:
|
|
725
|
+
print(f"📦 正在扫描项目: {project_path}")
|
|
726
|
+
symbols, edges = scan_project(project_path)
|
|
727
|
+
cache_path = save_cache(project_path, symbols, edges)
|
|
728
|
+
print(f"✅ 缓存已保存: {cache_path}")
|
|
729
|
+
print(f" 符号数: {len(symbols)}, 依赖边: {len(edges)}")
|
|
730
|
+
elif args.load:
|
|
731
|
+
cache = load_cache(project_path)
|
|
732
|
+
if cache:
|
|
733
|
+
print(f"📂 缓存信息:")
|
|
734
|
+
print(f" 扫描时间: {cache.scan_time}")
|
|
735
|
+
print(f" 文件数: {cache.file_count}")
|
|
736
|
+
print(f" 符号数: {cache.symbol_count}")
|
|
737
|
+
print(f" 依赖边: {cache.edge_count}")
|
|
738
|
+
else:
|
|
739
|
+
print("❌ 没有找到缓存")
|
|
740
|
+
else:
|
|
741
|
+
cache_parser.print_help()
|
|
742
|
+
|
|
743
|
+
elif args.command == 'diff':
|
|
744
|
+
print(f"🔍 正在对比变更: {project_path}")
|
|
745
|
+
result = diff_project(project_path)
|
|
746
|
+
|
|
747
|
+
if 'error' in result:
|
|
748
|
+
print(f"❌ {result['error']}")
|
|
749
|
+
return
|
|
750
|
+
|
|
751
|
+
if args.json:
|
|
752
|
+
print(json.dumps(result, indent=2, ensure_ascii=False))
|
|
753
|
+
else:
|
|
754
|
+
print(f"\n📊 变更摘要 ({result['last_scan']} -> {result['scan_time']})")
|
|
755
|
+
print(f" 新增符号: {result['summary']['added']}")
|
|
756
|
+
print(f" 删除符号: {result['summary']['removed']}")
|
|
757
|
+
print(f" 修改符号: {result['summary']['modified']}")
|
|
758
|
+
print(f" 新增调用: {result['summary']['edges_added']}")
|
|
759
|
+
print(f" 删除调用: {result['summary']['edges_removed']}")
|
|
760
|
+
|
|
761
|
+
if result['added_symbols']:
|
|
762
|
+
print(f"\n➕ 新增符号 (Top 10):")
|
|
763
|
+
for s in result['added_symbols'][:10]:
|
|
764
|
+
print(f" - {s['name']} ({s['file']}:{s['line']})")
|
|
765
|
+
|
|
766
|
+
if result['call_chain_changes']['new_calls']:
|
|
767
|
+
print(f"\n🔗 新增调用关系 (Top 10):")
|
|
768
|
+
for c in result['call_chain_changes']['new_calls'][:10]:
|
|
769
|
+
from_name = c['from'].split('::')[-2] if '::' in c['from'] else c['from']
|
|
770
|
+
to_name = c['to'].split('::')[-2] if '::' in c['to'] else c['to']
|
|
771
|
+
print(f" - {from_name} -[{c['kind']}]-> {to_name}")
|
|
772
|
+
|
|
773
|
+
elif args.command == 'git':
|
|
774
|
+
if args.hot:
|
|
775
|
+
print(f"🔥 热点文件 (最近 {args.days} 天):")
|
|
776
|
+
hot = get_hot_symbols(project_path, args.days)
|
|
777
|
+
for item in hot:
|
|
778
|
+
print(f"\n 📄 {item['file']} ({item['symbol_count']} 个符号)")
|
|
779
|
+
for s in item['symbols'][:5]:
|
|
780
|
+
print(f" - {s}")
|
|
781
|
+
else:
|
|
782
|
+
print(f"📜 正在查询 Git 历史: {args.symbol}")
|
|
783
|
+
result = get_symbol_git_history(project_path, args.symbol)
|
|
784
|
+
|
|
785
|
+
if not result:
|
|
786
|
+
print(f"❌ 未找到符号或 Git 信息")
|
|
787
|
+
return
|
|
788
|
+
|
|
789
|
+
if 'error' in result:
|
|
790
|
+
print(f"❌ {result['error']}")
|
|
791
|
+
return
|
|
792
|
+
|
|
793
|
+
print(f"\n📍 符号位置: {result['file']}:{result['line']}")
|
|
794
|
+
print(f"👤 当前版本: {result['current_commit']}")
|
|
795
|
+
print(f"\n📝 相关作者: {', '.join(result['authors'])}")
|
|
796
|
+
print(f"\n📅 最近提交:")
|
|
797
|
+
for c in result['recent_commits'][:5]:
|
|
798
|
+
print(f" [{c['hash']}] {c['date'][:10]} by {c['author']}")
|
|
799
|
+
print(f" {c['message'][:60]}")
|
|
800
|
+
|
|
801
|
+
elif args.command == 'refs':
|
|
802
|
+
print(f"🔗 正在分析引用关系: {project_path}")
|
|
803
|
+
result = analyze_refs(project_path, args.symbol)
|
|
804
|
+
|
|
805
|
+
if 'error' in result:
|
|
806
|
+
print(f"❌ {result['error']}")
|
|
807
|
+
return
|
|
808
|
+
|
|
809
|
+
if args.json:
|
|
810
|
+
print(json.dumps(result, indent=2, ensure_ascii=False))
|
|
811
|
+
elif args.symbol:
|
|
812
|
+
print(f"\n📌 {result['symbol']}")
|
|
813
|
+
print(f" 被引用次数: {result['ref_count']}")
|
|
814
|
+
print(f" 入口函数: {'是' if result['is_entry'] else '否'}")
|
|
815
|
+
print(f" 叶子函数: {'是' if result['is_leaf'] else '否'}")
|
|
816
|
+
|
|
817
|
+
if result['called_by']:
|
|
818
|
+
print(f"\n📥 被调用 ({len(result['called_by'])} 个):")
|
|
819
|
+
for ref in result['called_by'][:10]:
|
|
820
|
+
print(f" - {ref['name']} ({ref['file']}:{ref['line']})")
|
|
821
|
+
|
|
822
|
+
if result['calls']:
|
|
823
|
+
print(f"\n📤 调用 ({len(result['calls'])} 个):")
|
|
824
|
+
for ref in result['calls'][:10]:
|
|
825
|
+
print(f" - {ref['name']} ({ref['file']}:{ref['line']})")
|
|
826
|
+
else:
|
|
827
|
+
print(f"\n📊 全局引用分析")
|
|
828
|
+
print(f" 总符号数: {result['total_symbols']}")
|
|
829
|
+
print(f" 入口函数: {len(result['entry_points'])}")
|
|
830
|
+
print(f" 死代码: {len(result['orphaned_symbols'])}")
|
|
831
|
+
|
|
832
|
+
print(f"\n🔝 被引用最多 (Top 10):")
|
|
833
|
+
for r in result['most_referenced'][:10]:
|
|
834
|
+
status = "🚪" if r['is_entry'] else "🍃" if r['is_leaf'] else " "
|
|
835
|
+
print(f" {status} {r['name']}: {r['ref_count']} 次引用 ({r['file']})")
|
|
836
|
+
|
|
837
|
+
elif args.command == 'orphan':
|
|
838
|
+
print(f"🧹 正在查找死代码: {project_path}")
|
|
839
|
+
orphans = find_orphans(project_path)
|
|
840
|
+
|
|
841
|
+
if orphans:
|
|
842
|
+
print(f"\n⚠️ 发现 {len(orphans)} 个可疑死代码:")
|
|
843
|
+
for o in orphans[:20]:
|
|
844
|
+
print(f" - {o['name']} ({o['file']})")
|
|
845
|
+
else:
|
|
846
|
+
print("\n✅ 未发现明显死代码")
|
|
847
|
+
|
|
848
|
+
|
|
849
|
+
if __name__ == '__main__':
|
|
850
|
+
main()
|