repomap-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
repomap/__init__.py ADDED
@@ -0,0 +1,320 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ from collections import defaultdict
5
+ from dataclasses import dataclass, field
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+
10
+ CACHE_DIR = Path.home() / ".cache" / "repomap"
11
+
12
+
13
+ def get_project_cache_dir(project_path: str) -> Path:
14
+ """获取项目的缓存目录(基于规范化后的项目路径哈希隔离)。"""
15
+ canonical_path = str(Path(project_path).expanduser().resolve())
16
+ path_hash = hashlib.md5(canonical_path.encode()).hexdigest()[:8]
17
+ project_name = Path(canonical_path).name
18
+ cache_dir = CACHE_DIR / f"{project_name}_{path_hash}"
19
+ cache_dir.mkdir(parents=True, exist_ok=True)
20
+ return cache_dir
21
+
22
+
23
+ def get_cache_paths(project_path: str) -> tuple[Path, Path, Path]:
24
+ """获取缓存文件路径: (symbols_cache, git_cache, last_snapshot)"""
25
+ cache_dir = get_project_cache_dir(project_path)
26
+ return (
27
+ cache_dir / "symbols.json",
28
+ cache_dir / "git.json",
29
+ cache_dir / "last_snapshot.json",
30
+ )
31
+
32
+
33
+ def get_session_cache_path(project_path: str) -> Path:
34
+ """获取跨进程短期扫描缓存路径。"""
35
+ cache_dir = get_project_cache_dir(project_path)
36
+ return cache_dir / "session_scan.json"
37
+
38
+
39
+ def get_incremental_cache_path(project_path: str) -> Path:
40
+ """获取增量扫描持久化缓存路径。"""
41
+ cache_dir = get_project_cache_dir(project_path)
42
+ return cache_dir / "incremental.json"
43
+
44
+
45
+ @dataclass
46
+ class FileCacheEntry:
47
+ """单文件增量缓存条目——对应一次全量扫描中一个文件的解析结果"""
48
+ mtime: float
49
+ symbols_json: list[dict] = field(default_factory=list)
50
+ imports: list[str] = field(default_factory=list)
51
+ import_bindings_json: list[dict] = field(default_factory=list)
52
+ exports_json: list[dict] = field(default_factory=list)
53
+ calls_json: list[dict] = field(default_factory=list)
54
+ routes_json: list[dict] = field(default_factory=list)
55
+
56
+
57
+ @dataclass
58
+ class IncrementalCache:
59
+ """持久化的增量扫描基线,用于后续增量扫描时识别变更文件并还原未变更文件"""
60
+ project_root_hash: str = ""
61
+ git_head: str = ""
62
+ files: dict[str, FileCacheEntry] = field(default_factory=dict)
63
+ scan_stats_json: dict = field(default_factory=dict)
64
+
65
+
66
+ @dataclass
67
+ class Symbol:
68
+ """代码符号(函数 / 类 / 接口等)"""
69
+
70
+ id: str
71
+ name: str
72
+ kind: str
73
+ file: str
74
+ line: int
75
+ end_line: int = 0
76
+ col: int = 0
77
+ visibility: str = "private"
78
+ docstring: str = ""
79
+ signature: str = ""
80
+ pagerank: float = 0.0
81
+
82
+
83
+ @dataclass
84
+ class Edge:
85
+ source: str
86
+ target: str
87
+ weight: float
88
+ kind: str
89
+
90
+
91
+ @dataclass(frozen=True)
92
+ class JSImportBinding:
93
+ local_name: str
94
+ imported_name: str
95
+ module: str
96
+ line: int
97
+ kind: str = "named"
98
+
99
+
100
+ @dataclass(frozen=True)
101
+ class JSExportBinding:
102
+ exported_name: str
103
+ source_name: str | None
104
+ module: str | None
105
+ line: int
106
+ kind: str = "local"
107
+
108
+
109
+ @dataclass(frozen=True)
110
+ class PathAliasRule:
111
+ alias_pattern: str
112
+ target_patterns: tuple[str, ...]
113
+
114
+
115
+ @dataclass
116
+ class ProjectImportConfig:
117
+ config_path: str | None = None
118
+ config_dir: str | None = None
119
+ base_url: str | None = None
120
+ alias_rules: list[PathAliasRule] = field(default_factory=list)
121
+
122
+
123
+ @dataclass
124
+ class ScanStats:
125
+ listed_source_files: int = 0
126
+ selected_source_files: int = 0
127
+ processed_files: int = 0
128
+ filtered_path_files: int = 0
129
+ filtered_large_files: int = 0
130
+ truncated_files: int = 0
131
+ failed_files: list[str] = field(default_factory=list) # 记录失败的文件路径(前N个)
132
+ scan_duration_ms: int = 0 # 扫描耗时(毫秒)
133
+ timeout_triggered: bool = False # 是否触发超时熔断
134
+ skipped_files: int = 0 # 因 mtime 未变跳过的文件数(增量扫描)
135
+
136
+
137
+ @dataclass
138
+ class HttpRoute:
139
+ """HTTP 路由定义(从 AST 中提取)"""
140
+ method: str # GET, POST, PUT, DELETE, PATCH
141
+ path: str # /api/users/:id
142
+ handler: str # 处理函数名
143
+ file: str # 文件路径
144
+ line: int # 行号
145
+ framework: str # fastapi, flask, express, axum
146
+
147
+
148
+ @dataclass
149
+ class RepoGraph:
150
+ symbols: dict[str, Symbol] = field(default_factory=dict)
151
+ outgoing: dict[str, list[Edge]] = field(default_factory=lambda: defaultdict(list))
152
+ incoming: dict[str, list[Edge]] = field(default_factory=lambda: defaultdict(list))
153
+ file_symbols: dict[str, list[str]] = field(default_factory=lambda: defaultdict(list))
154
+ file_imports: dict[str, list[str]] = field(default_factory=lambda: defaultdict(list))
155
+ file_calls: dict[str, list[tuple[str, int]]] = field(default_factory=lambda: defaultdict(list))
156
+ file_import_bindings: dict[str, list[JSImportBinding]] = field(default_factory=lambda: defaultdict(list))
157
+ file_exports: dict[str, list[JSExportBinding]] = field(default_factory=lambda: defaultdict(list))
158
+
159
+
160
+ def call_reference_parts(call_ref: Any) -> tuple[str, int, str]:
161
+ if isinstance(call_ref, (list, tuple)):
162
+ if len(call_ref) >= 3:
163
+ return str(call_ref[0]), int(call_ref[1]), str(call_ref[2] or "direct")
164
+ if len(call_ref) >= 2:
165
+ return str(call_ref[0]), int(call_ref[1]), "direct"
166
+ name = getattr(call_ref, "name", "")
167
+ line = getattr(call_ref, "line", 0)
168
+ kind = getattr(call_ref, "kind", "direct")
169
+ return str(name), int(line), str(kind or "direct")
170
+
171
+
172
+ def serialize_symbol(symbol: Symbol) -> dict[str, Any]:
173
+ return {
174
+ "id": symbol.id,
175
+ "name": symbol.name,
176
+ "kind": symbol.kind,
177
+ "file": symbol.file,
178
+ "line": symbol.line,
179
+ "end_line": symbol.end_line,
180
+ "col": symbol.col,
181
+ "visibility": symbol.visibility,
182
+ "signature": symbol.signature,
183
+ "docstring": symbol.docstring,
184
+ "pagerank": symbol.pagerank,
185
+ }
186
+
187
+
188
+ def serialize_edge(edge: Edge) -> dict[str, Any]:
189
+ return {
190
+ "source": edge.source,
191
+ "target": edge.target,
192
+ "weight": edge.weight,
193
+ "kind": edge.kind,
194
+ }
195
+
196
+
197
+ def edge_identity_from_edge(edge: Edge) -> tuple[str, str, str] | None:
198
+ if not edge.source or not edge.target:
199
+ return None
200
+ return (edge.source, edge.target, edge.kind)
201
+
202
+
203
+ def edge_identity_from_row(row: dict[str, Any]) -> tuple[str, str, str] | None:
204
+ source = row.get("source", row.get("from_id"))
205
+ target = row.get("target", row.get("to_id"))
206
+ kind = row.get("kind", "call")
207
+ if not source or not target:
208
+ return None
209
+ return (source, target, kind)
210
+
211
+
212
+ def compare_graph_snapshots(
213
+ current_symbols: list[Symbol],
214
+ current_edges: list[Edge],
215
+ previous_symbols: list[dict[str, Any]],
216
+ previous_edges: list[dict[str, Any]],
217
+ incoming_map: dict[str, list[Edge]] | None = None,
218
+ ) -> dict[str, Any]:
219
+ current_symbol_map = {symbol.id: symbol for symbol in current_symbols}
220
+ previous_symbol_map = {row["id"]: row for row in previous_symbols}
221
+
222
+ current_symbol_ids = set(current_symbol_map)
223
+ previous_symbol_ids = set(previous_symbol_map)
224
+
225
+ added_symbol_ids = sorted(current_symbol_ids - previous_symbol_ids)
226
+ removed_symbol_ids = sorted(previous_symbol_ids - current_symbol_ids)
227
+
228
+ modified_symbols = []
229
+ for symbol_id in sorted(current_symbol_ids & previous_symbol_ids):
230
+ current = current_symbol_map[symbol_id]
231
+ previous = previous_symbol_map[symbol_id]
232
+ sig_changed = current.signature != previous.get("signature", "")
233
+ loc_changed = (
234
+ current.line != previous.get("line")
235
+ or current.end_line != previous.get("end_line", current.end_line)
236
+ or current.file != previous.get("file")
237
+ )
238
+ if sig_changed or loc_changed:
239
+ entry = {
240
+ "id": symbol_id,
241
+ "name": current.name,
242
+ "file": current.file,
243
+ "visibility": current.visibility,
244
+ "kind": current.kind,
245
+ "line_change": f"{previous.get('line')} -> {current.line}",
246
+ "old_signature": previous.get("signature", ""),
247
+ "new_signature": current.signature,
248
+ "signature_changed": sig_changed,
249
+ }
250
+ # 附加调用者信息
251
+ if incoming_map:
252
+ callers = [e for e in incoming_map.get(symbol_id, []) if e.kind == "call"]
253
+ entry["affected_callers"] = [
254
+ {"symbol_id": e.source, "kind": e.kind}
255
+ for e in callers[:10]
256
+ ]
257
+ entry["affected_caller_count"] = len(callers)
258
+ # 风险评级:导出符号签名变更→HIGH,否则有调用者→MEDIUM
259
+ if sig_changed and entry.get("visibility") == "exported":
260
+ entry["risk"] = "HIGH"
261
+ elif sig_changed and len(callers) >= 3:
262
+ entry["risk"] = "MEDIUM"
263
+ else:
264
+ entry["risk"] = "LOW"
265
+ modified_symbols.append(entry)
266
+
267
+ current_edge_set = {
268
+ edge_id
269
+ for edge in current_edges
270
+ for edge_id in [edge_identity_from_edge(edge)]
271
+ if edge_id is not None
272
+ }
273
+ previous_edge_set = {
274
+ edge_id
275
+ for row in previous_edges
276
+ for edge_id in [edge_identity_from_row(row)]
277
+ if edge_id is not None
278
+ }
279
+
280
+ edges_added = sorted(current_edge_set - previous_edge_set)
281
+ edges_removed = sorted(previous_edge_set - current_edge_set)
282
+
283
+ return {
284
+ "summary": {
285
+ "added": len(added_symbol_ids),
286
+ "removed": len(removed_symbol_ids),
287
+ "modified": len(modified_symbols),
288
+ "edges_added": len(edges_added),
289
+ "edges_removed": len(edges_removed),
290
+ },
291
+ "added_symbols": [
292
+ {
293
+ "id": symbol_id,
294
+ "name": current_symbol_map[symbol_id].name,
295
+ "file": current_symbol_map[symbol_id].file,
296
+ "line": current_symbol_map[symbol_id].line,
297
+ }
298
+ for symbol_id in added_symbol_ids
299
+ ],
300
+ "removed_symbols": [
301
+ {
302
+ "id": symbol_id,
303
+ "name": previous_symbol_map[symbol_id].get("name", symbol_id),
304
+ "file": previous_symbol_map[symbol_id].get("file", ""),
305
+ "line": previous_symbol_map[symbol_id].get("line", 0),
306
+ }
307
+ for symbol_id in removed_symbol_ids
308
+ ],
309
+ "modified_symbols": modified_symbols,
310
+ "call_chain_changes": {
311
+ "new_calls": [
312
+ {"from": source, "to": target, "kind": kind}
313
+ for source, target, kind in edges_added[:20]
314
+ ],
315
+ "removed_calls": [
316
+ {"from": source, "to": target, "kind": kind}
317
+ for source, target, kind in edges_removed[:20]
318
+ ],
319
+ },
320
+ }