repomap-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
repomap/check.py ADDED
@@ -0,0 +1,1212 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ RepoMap Check — 编译器/静态分析诊断模块
4
+
5
+ 自动检测项目类型并运行对应诊断工具,将结构化错误信息与符号图结合,
6
+ 帮助 AI 在修改代码后快速发现问题并定位到具体符号。
7
+
8
+ 支持:TypeScript (tsc)、Rust (cargo check)、Python (mypy/ruff)、Go (go vet/build)
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import os
15
+ import re
16
+ import shutil
17
+ import subprocess
18
+ import sys
19
+ from dataclasses import dataclass, field
20
+ from pathlib import Path
21
+ from typing import Any
22
+
23
+
24
+ @dataclass
25
+ class DiagnosticIssue:
26
+ """单个诊断问题"""
27
+
28
+ tool: str
29
+ file: str
30
+ line: int
31
+ col: int
32
+ severity: str # "error" | "warning" | "info"
33
+ code: str
34
+ message: str
35
+ symbol: str | None = None # 关联的符号名称(通过符号图解析)
36
+ symbol_confidence: str = "none" # 符号关联置信度: "exact" | "line" | "none"
37
+ callers: list[str] = field(default_factory=list) # 调用该符号的函数列表
38
+ suggested_fix: str | None = None # 建议的修复代码(如有)
39
+
40
+
41
+ @dataclass
42
+ class DiagnosticResult:
43
+ """诊断结果"""
44
+
45
+ tool: str
46
+ command: str
47
+ exit_code: int
48
+ duration_ms: int
49
+ skipped: bool = False
50
+ skip_reason: str = ""
51
+ errors: list[DiagnosticIssue] = field(default_factory=list)
52
+ warnings: list[DiagnosticIssue] = field(default_factory=list)
53
+ truncated: bool = False
54
+ raw_excerpt: list[str] = field(default_factory=list)
55
+
56
+
57
+ class ProjectDetector:
58
+ """检测项目类型"""
59
+
60
+ @staticmethod
61
+ def detect(project_root: Path) -> list[str]:
62
+ """检测项目包含的语言类型列表"""
63
+ types = set()
64
+
65
+ # TypeScript
66
+ if list(project_root.glob("tsconfig*.json")):
67
+ types.add("typescript")
68
+
69
+ # Rust
70
+ if (project_root / "Cargo.toml").exists():
71
+ types.add("rust")
72
+
73
+ # Python
74
+ if any(
75
+ (project_root / f).exists()
76
+ for f in ["pyproject.toml", "setup.py", "setup.cfg", "requirements.txt"]
77
+ ):
78
+ types.add("python")
79
+
80
+ # Go
81
+ if (project_root / "go.mod").exists():
82
+ types.add("go")
83
+
84
+ # JavaScript (只有 TypeScript 不存在时才单独检测)
85
+ if "typescript" not in types:
86
+ if ProjectDetector._has_js_files(project_root):
87
+ types.add("javascript")
88
+
89
+ return sorted(types)
90
+
91
+ @staticmethod
92
+ def _has_js_files(project_root: Path) -> bool:
93
+ """检查是否有 JS 文件(排除 node_modules)"""
94
+ try:
95
+ result = subprocess.run(
96
+ ["rg", "--files", "-g", "!node_modules/**", "-g", "!dist/**", "-g", "!build/**"],
97
+ cwd=project_root,
98
+ capture_output=True,
99
+ text=True,
100
+ timeout=5,
101
+ )
102
+ if result.returncode == 0:
103
+ for line in result.stdout.split("\n")[:100]:
104
+ if re.search(r"\.(mjs|cjs|js|jsx)$", line):
105
+ return True
106
+ except Exception:
107
+ pass
108
+
109
+ # fallback: 简单遍历,剪掉依赖和构建目录,避免只因 node_modules 内文件误判为 JS 项目
110
+ skip_dirs = {"node_modules", "dist", "build", ".git", ".venv", "venv", "__pycache__"}
111
+ for root, dir_names, file_names in os.walk(project_root):
112
+ dir_names[:] = [name for name in dir_names if name not in skip_dirs]
113
+ if any(Path(file_name).suffix.lower() in {".js", ".jsx", ".mjs", ".cjs"} for file_name in file_names):
114
+ return True
115
+ return False
116
+
117
+
118
+ class GitHelper:
119
+ """Git 辅助工具"""
120
+
121
+ @staticmethod
122
+ def get_modified_files(project_root: Path, since_commit: str | None = None) -> list[str]:
123
+ """获取变更的文件列表"""
124
+ files = set()
125
+
126
+ # 1. 获取 staged 文件
127
+ try:
128
+ result = subprocess.run(
129
+ ["git", "diff", "--cached", "--name-only"],
130
+ cwd=project_root,
131
+ capture_output=True,
132
+ text=True,
133
+ timeout=10,
134
+ )
135
+ if result.returncode == 0:
136
+ files.update(f for f in result.stdout.strip().split("\n") if f)
137
+ except Exception:
138
+ pass
139
+
140
+ # 2. 获取 unstaged 文件
141
+ try:
142
+ result = subprocess.run(
143
+ ["git", "diff", "--name-only"],
144
+ cwd=project_root,
145
+ capture_output=True,
146
+ text=True,
147
+ timeout=10,
148
+ )
149
+ if result.returncode == 0:
150
+ files.update(f for f in result.stdout.strip().split("\n") if f)
151
+ except Exception:
152
+ pass
153
+
154
+ # 3. 获取 since_commit 以来的变更
155
+ if since_commit:
156
+ try:
157
+ result = subprocess.run(
158
+ ["git", "diff", "--name-only", since_commit, "HEAD"],
159
+ cwd=project_root,
160
+ capture_output=True,
161
+ text=True,
162
+ timeout=10,
163
+ )
164
+ if result.returncode == 0:
165
+ files.update(f for f in result.stdout.strip().split("\n") if f)
166
+ except Exception:
167
+ pass
168
+
169
+ return sorted(files)
170
+
171
+
172
+ class DiagnosticRunner:
173
+ """运行诊断工具"""
174
+
175
+ def __init__(self, project_root: Path, max_items: int = 100, modified_files: list[str] | None = None):
176
+ self.project_root = project_root.resolve()
177
+ self.max_items = max_items
178
+ self.modified_files = {
179
+ normalized for file_path in (modified_files or [])
180
+ if (normalized := self._normalize_safe_path(file_path)) is not None
181
+ } # 增量检查的文件列表统一为项目内相对路径
182
+
183
+ def _normalize_safe_path(self, file_path: str) -> str | None:
184
+ """将工具或 CLI 传入的文件路径归一为项目内相对路径;非法路径返回 None。"""
185
+ if not file_path or file_path.startswith("-"):
186
+ return None
187
+ dangerous_chars = [';', '&', '|', '`', '$', '(', ')', '<', '>', '\\', '\x00']
188
+ if any(c in file_path for c in dangerous_chars):
189
+ return None
190
+ input_path = Path(file_path).expanduser()
191
+ abs_path = input_path.resolve() if input_path.is_absolute() else (self.project_root / input_path).resolve()
192
+ try:
193
+ rel_path = abs_path.relative_to(self.project_root).as_posix()
194
+ except ValueError:
195
+ return None
196
+ if rel_path in ("", ".") or any(part == ".." for part in Path(rel_path).parts):
197
+ return None
198
+ return rel_path
199
+
200
+ def _safe_modified_files(self, suffixes: tuple[str, ...]) -> list[str]:
201
+ return sorted(f for f in self.modified_files if f.endswith(suffixes))
202
+
203
+ def run_all(self, types: list[str]) -> list[DiagnosticResult]:
204
+ """运行所有适用的诊断工具"""
205
+ results = []
206
+
207
+ if "typescript" in types:
208
+ results.append(self._run_tsc())
209
+ # TypeScript 项目也运行 ESLint(如果有配置)
210
+ if self._has_eslint_config():
211
+ results.append(self._run_eslint())
212
+
213
+ if "javascript" in types and "typescript" not in types:
214
+ if self._has_eslint_config():
215
+ results.append(self._run_eslint())
216
+ else:
217
+ results.append(DiagnosticResult(
218
+ tool="eslint",
219
+ command="skip (no eslint config)",
220
+ exit_code=0,
221
+ duration_ms=0,
222
+ skipped=True,
223
+ skip_reason="eslint config not found",
224
+ ))
225
+
226
+ if "rust" in types:
227
+ results.append(self._run_cargo_check())
228
+
229
+ if "python" in types:
230
+ results.append(self._run_mypy())
231
+ results.append(self._run_ruff())
232
+
233
+ if "go" in types:
234
+ results.append(self._run_go_vet())
235
+ results.append(self._run_go_build())
236
+
237
+ return results
238
+
239
+ def _is_safe_path(self, file_path: str) -> bool:
240
+ """检查文件路径是否安全(防止路径遍历和命令注入)"""
241
+ return self._normalize_safe_path(file_path) is not None
242
+
243
+ def _should_check_file(self, file_path: str) -> bool:
244
+ """检查文件是否在增量检查列表中"""
245
+ normalized = self._normalize_safe_path(file_path)
246
+ if normalized is None:
247
+ return False
248
+ if not self.modified_files:
249
+ return True # 没有指定则检查全部
250
+ return normalized in self.modified_files
251
+
252
+ def _has_eslint_config(self) -> bool:
253
+ """检查是否有 ESLint 配置"""
254
+ config_files = [
255
+ ".eslintrc",
256
+ ".eslintrc.js",
257
+ ".eslintrc.cjs",
258
+ ".eslintrc.json",
259
+ "eslint.config.js",
260
+ "eslint.config.mjs",
261
+ "eslint.config.cjs",
262
+ ]
263
+ return any((self.project_root / f).exists() for f in config_files)
264
+
265
+ def _has_cmd(self, cmd: str) -> bool:
266
+ """检查命令是否存在"""
267
+ return shutil.which(cmd) is not None
268
+
269
+ def _now_ms(self) -> int:
270
+ """获取当前毫秒时间戳"""
271
+ import time
272
+
273
+ return int(time.time() * 1000)
274
+
275
+ def _run_command(
276
+ self, cmd: list[str], tool_name: str
277
+ ) -> tuple[int, str, int]:
278
+ """运行命令并返回 (exit_code, stdout, duration_ms)"""
279
+ start = self._now_ms()
280
+ try:
281
+ result = subprocess.run(
282
+ cmd,
283
+ cwd=self.project_root,
284
+ capture_output=True,
285
+ text=True,
286
+ timeout=120,
287
+ )
288
+ duration = self._now_ms() - start
289
+ output = result.stdout + result.stderr
290
+ return result.returncode, output, duration
291
+ except subprocess.TimeoutExpired:
292
+ return -1, f"Timeout after 120s", self._now_ms() - start
293
+ except Exception as e:
294
+ return -1, str(e), self._now_ms() - start
295
+
296
+ def _run_tsc(self) -> DiagnosticResult:
297
+ """运行 TypeScript 编译器检查"""
298
+ tool = "tsc"
299
+ cmd_str = "tsc --noEmit --pretty false"
300
+
301
+ if not self._has_cmd("tsc") and not self._has_cmd("npx"):
302
+ return DiagnosticResult(
303
+ tool=tool,
304
+ command=cmd_str,
305
+ exit_code=0,
306
+ duration_ms=0,
307
+ skipped=True,
308
+ skip_reason="tsc/npx not found",
309
+ )
310
+
311
+ cmd = ["tsc", "--noEmit", "--pretty", "false"]
312
+ if not self._has_cmd("tsc"):
313
+ cmd = ["npx", "tsc", "--noEmit", "--pretty", "false"]
314
+
315
+ exit_code, output, duration = self._run_command(cmd, tool)
316
+ errors, warnings = self._parse_tsc_output(output)
317
+
318
+ return DiagnosticResult(
319
+ tool=tool,
320
+ command=" ".join(cmd),
321
+ exit_code=exit_code,
322
+ duration_ms=duration,
323
+ errors=errors[: self.max_items],
324
+ warnings=warnings[: self.max_items],
325
+ truncated=len(errors) > self.max_items or len(warnings) > self.max_items,
326
+ raw_excerpt=output.split("\n")[:30],
327
+ )
328
+
329
+ def _parse_tsc_output(self, output: str) -> tuple[list[DiagnosticIssue], list[DiagnosticIssue]]:
330
+ """解析 tsc 输出"""
331
+ errors, warnings = [], []
332
+ # 匹配: file.ts(42,8): error TS2345: message
333
+ pattern = re.compile(r'^(.+)\((\d+),(\d+)\):\s+(error|warning)\s+(TS\d+):\s+(.+)$')
334
+
335
+ for line in output.split("\n"):
336
+ match = pattern.match(line.strip())
337
+ if match:
338
+ file_path = match.group(1)
339
+ # 增量检查过滤
340
+ if not self._should_check_file(file_path):
341
+ continue
342
+
343
+ issue = DiagnosticIssue(
344
+ tool="tsc",
345
+ file=file_path,
346
+ line=int(match.group(2)),
347
+ col=int(match.group(3)),
348
+ severity=match.group(4),
349
+ code=match.group(5),
350
+ message=match.group(6),
351
+ )
352
+ if issue.severity == "error":
353
+ errors.append(issue)
354
+ else:
355
+ warnings.append(issue)
356
+
357
+ return errors, warnings
358
+
359
+ def _run_eslint(self) -> DiagnosticResult:
360
+ """运行 ESLint"""
361
+ tool = "eslint"
362
+ cmd_str = "eslint . --ext .js,.jsx,.mjs,.cjs,.ts,.tsx --format json"
363
+
364
+ if not self._has_cmd("eslint") and not self._has_cmd("npx"):
365
+ return DiagnosticResult(
366
+ tool=tool,
367
+ command=cmd_str,
368
+ exit_code=0,
369
+ duration_ms=0,
370
+ skipped=True,
371
+ skip_reason="eslint/npx not found",
372
+ )
373
+
374
+ # 增量检查:只检查指定文件
375
+ if self.modified_files:
376
+ target_files = self._safe_modified_files(('.js', '.jsx', '.mjs', '.cjs', '.ts', '.tsx'))
377
+ if not target_files:
378
+ return DiagnosticResult(
379
+ tool=tool,
380
+ command="skip (no matching files)",
381
+ exit_code=0,
382
+ duration_ms=0,
383
+ skipped=True,
384
+ skip_reason="no modified JS/TS files",
385
+ )
386
+ cmd = ["eslint", "--format", "json", "--"] + target_files
387
+ else:
388
+ cmd = [
389
+ "eslint", ".",
390
+ "--ext", ".js,.jsx,.mjs,.cjs,.ts,.tsx",
391
+ "--format", "json",
392
+ ]
393
+
394
+ if not self._has_cmd("eslint"):
395
+ cmd = ["npx"] + cmd
396
+
397
+ exit_code, output, duration = self._run_command(cmd, tool)
398
+ errors, warnings = self._parse_eslint_output(output)
399
+
400
+ return DiagnosticResult(
401
+ tool=tool,
402
+ command=" ".join(cmd[:6]) + "..." if len(cmd) > 6 else " ".join(cmd),
403
+ exit_code=exit_code,
404
+ duration_ms=duration,
405
+ errors=errors[: self.max_items],
406
+ warnings=warnings[: self.max_items],
407
+ truncated=len(errors) > self.max_items or len(warnings) > self.max_items,
408
+ raw_excerpt=output.split("\n")[:20],
409
+ )
410
+
411
+ def _parse_eslint_output(self, output: str) -> tuple[list[DiagnosticIssue], list[DiagnosticIssue]]:
412
+ """解析 ESLint JSON 输出"""
413
+ errors, warnings = [], []
414
+ try:
415
+ data = json.loads(output) if output.strip() else []
416
+ for record in data:
417
+ file_path = record.get("filePath", "")
418
+ for msg in record.get("messages", []):
419
+ severity_num = msg.get("severity", 0)
420
+ if severity_num == 2:
421
+ severity = "error"
422
+ elif severity_num == 1:
423
+ severity = "warning"
424
+ else:
425
+ severity = "info"
426
+
427
+ # 尝试获取修复建议
428
+ suggested_fix = None
429
+ fix_data = msg.get("fix")
430
+ if fix_data:
431
+ fix_text = fix_data.get("text", "")
432
+ if fix_text:
433
+ suggested_fix = fix_text[:200] # 限制长度
434
+
435
+ issue = DiagnosticIssue(
436
+ tool="eslint",
437
+ file=file_path,
438
+ line=msg.get("line", 0),
439
+ col=msg.get("column", 0),
440
+ severity=severity,
441
+ code=msg.get("ruleId") or "eslint",
442
+ message=msg.get("message", ""),
443
+ suggested_fix=suggested_fix,
444
+ )
445
+ if severity == "error":
446
+ errors.append(issue)
447
+ elif severity == "warning":
448
+ warnings.append(issue)
449
+ except json.JSONDecodeError:
450
+ pass
451
+
452
+ return errors, warnings
453
+
454
+ def _run_cargo_check(self) -> DiagnosticResult:
455
+ """运行 cargo check"""
456
+ tool = "cargo-check"
457
+ cmd_str = "cargo check --message-format json"
458
+
459
+ if not self._has_cmd("cargo"):
460
+ return DiagnosticResult(
461
+ tool=tool,
462
+ command=cmd_str,
463
+ exit_code=0,
464
+ duration_ms=0,
465
+ skipped=True,
466
+ skip_reason="cargo not found",
467
+ )
468
+
469
+ # 显示进度提示
470
+ print(f"[{tool}] Running cargo check (may take a minute for large projects)...", file=sys.stderr)
471
+
472
+ cmd = ["cargo", "check", "--message-format", "json"]
473
+ exit_code, output, duration = self._run_command(cmd, tool)
474
+ errors, warnings = self._parse_cargo_output(output)
475
+
476
+ # 显示完成提示
477
+ print(f"[{tool}] Completed in {duration}ms", file=sys.stderr)
478
+
479
+ return DiagnosticResult(
480
+ tool=tool,
481
+ command=" ".join(cmd),
482
+ exit_code=exit_code,
483
+ duration_ms=duration,
484
+ errors=errors[: self.max_items],
485
+ warnings=warnings[: self.max_items],
486
+ truncated=len(errors) > self.max_items or len(warnings) > self.max_items,
487
+ raw_excerpt=output.split("\n")[:30],
488
+ )
489
+
490
+ def _parse_cargo_output(self, output: str) -> tuple[list[DiagnosticIssue], list[DiagnosticIssue]]:
491
+ """解析 cargo JSON 输出"""
492
+ errors, warnings = [], []
493
+
494
+ for line in output.split("\n"):
495
+ line = line.strip()
496
+ if not line:
497
+ continue
498
+ try:
499
+ obj = json.loads(line)
500
+ if obj.get("reason") != "compiler-message":
501
+ continue
502
+ msg = obj.get("message", {})
503
+ level = msg.get("level", "")
504
+ if level not in ("error", "warning"):
505
+ continue
506
+
507
+ spans = msg.get("spans", [])
508
+ primary = next(
509
+ (s for s in spans if s.get("is_primary")),
510
+ spans[0] if spans else {},
511
+ )
512
+
513
+ file_path = primary.get("file_name", "")
514
+ # 增量检查过滤
515
+ if not self._should_check_file(file_path):
516
+ continue
517
+
518
+ # 尝试提取修复建议
519
+ suggested_fix = None
520
+ children = msg.get("children", [])
521
+ for child in children:
522
+ if child.get("level") == "help":
523
+ suggested_fix = child.get("message", "")[:200]
524
+ break
525
+
526
+ issue = DiagnosticIssue(
527
+ tool="cargo",
528
+ file=file_path,
529
+ line=primary.get("line_start", 0),
530
+ col=primary.get("column_start", 0),
531
+ severity=level,
532
+ code=(msg.get("code") or {}).get("code", ""),
533
+ message=msg.get("message", ""),
534
+ suggested_fix=suggested_fix,
535
+ )
536
+ if level == "error":
537
+ errors.append(issue)
538
+ else:
539
+ warnings.append(issue)
540
+ except json.JSONDecodeError:
541
+ continue
542
+
543
+ return errors, warnings
544
+
545
+ def _run_mypy(self) -> DiagnosticResult:
546
+ """运行 mypy 类型检查"""
547
+ tool = "mypy"
548
+ cmd_str = "mypy . --show-error-codes --ignore-missing-imports"
549
+
550
+ if not self._has_cmd("mypy") and not self._has_cmd("dmypy"):
551
+ return DiagnosticResult(
552
+ tool=tool,
553
+ command=cmd_str,
554
+ exit_code=0,
555
+ duration_ms=0,
556
+ skipped=True,
557
+ skip_reason="mypy/dmypy not found",
558
+ )
559
+
560
+ # 增量检查:只检查指定文件
561
+ if self.modified_files:
562
+ target_files = self._safe_modified_files(('.py',))
563
+ if not target_files:
564
+ return DiagnosticResult(
565
+ tool=tool,
566
+ command="skip (no matching files)",
567
+ exit_code=0,
568
+ duration_ms=0,
569
+ skipped=True,
570
+ skip_reason="no modified Python files",
571
+ )
572
+ else:
573
+ target_files = ["."]
574
+
575
+ # 优先使用 dmypy daemon 模式(更快)
576
+ use_daemon = (
577
+ os.getenv("USE_DAEMON_MYPY", "1") == "1"
578
+ and self._has_cmd("dmypy")
579
+ and target_files == ["."]
580
+ )
581
+
582
+ if use_daemon:
583
+ cmd = [
584
+ "dmypy", "run", "--",
585
+ "--show-error-codes",
586
+ "--hide-error-context",
587
+ "--no-color-output",
588
+ "--ignore-missing-imports",
589
+ ] + target_files
590
+ else:
591
+ cmd = [
592
+ "mypy",
593
+ "--show-error-codes",
594
+ "--hide-error-context",
595
+ "--no-color-output",
596
+ "--ignore-missing-imports",
597
+ "--",
598
+ ] + target_files
599
+
600
+ exit_code, output, duration = self._run_command(cmd, tool)
601
+ errors, warnings = self._parse_mypy_output(output)
602
+
603
+ return DiagnosticResult(
604
+ tool=tool,
605
+ command=" ".join(cmd) if not use_daemon else "dmypy run ...",
606
+ exit_code=exit_code,
607
+ duration_ms=duration,
608
+ errors=errors[: self.max_items],
609
+ warnings=warnings[: self.max_items],
610
+ truncated=len(errors) > self.max_items or len(warnings) > self.max_items,
611
+ raw_excerpt=output.split("\n")[:30],
612
+ )
613
+
614
+ def _parse_mypy_output(self, output: str) -> tuple[list[DiagnosticIssue], list[DiagnosticIssue]]:
615
+ """解析 mypy 输出"""
616
+ errors, warnings = [], []
617
+ # 匹配: file.py:42: error: message [code]
618
+ pattern = re.compile(r'^(.+\.py):(\d+):\s*(error|warning|note):\s+(.+)$')
619
+
620
+ for line in output.split("\n"):
621
+ match = pattern.match(line)
622
+ if match:
623
+ msg = match.group(4)
624
+ code = "mypy"
625
+ code_match = re.search(r'\[([^\]]+)\]\s*$', msg)
626
+ if code_match:
627
+ code = code_match.group(1)
628
+
629
+ severity = match.group(3)
630
+ if severity == "note":
631
+ severity = "info"
632
+
633
+ issue = DiagnosticIssue(
634
+ tool="mypy",
635
+ file=match.group(1),
636
+ line=int(match.group(2)),
637
+ col=0,
638
+ severity=severity,
639
+ code=code,
640
+ message=msg,
641
+ )
642
+ if severity == "error":
643
+ errors.append(issue)
644
+ else:
645
+ warnings.append(issue)
646
+
647
+ return errors, warnings
648
+
649
+ def _run_ruff(self) -> DiagnosticResult:
650
+ """运行 ruff lint"""
651
+ tool = "ruff"
652
+ cmd_str = "ruff check . --output-format json"
653
+
654
+ if not self._has_cmd("ruff"):
655
+ return DiagnosticResult(
656
+ tool=tool,
657
+ command=cmd_str,
658
+ exit_code=0,
659
+ duration_ms=0,
660
+ skipped=True,
661
+ skip_reason="ruff not found",
662
+ )
663
+
664
+ # 增量检查:只检查指定文件
665
+ if self.modified_files:
666
+ target_files = self._safe_modified_files(('.py',))
667
+ if not target_files:
668
+ return DiagnosticResult(
669
+ tool=tool,
670
+ command="skip (no matching files)",
671
+ exit_code=0,
672
+ duration_ms=0,
673
+ skipped=True,
674
+ skip_reason="no modified Python files",
675
+ )
676
+ cmd = ["ruff", "check", "--output-format", "json", "--"] + target_files
677
+ else:
678
+ cmd = ["ruff", "check", ".", "--output-format", "json"]
679
+
680
+ exit_code, output, duration = self._run_command(cmd, tool)
681
+ errors, warnings = self._parse_ruff_output(output)
682
+
683
+ return DiagnosticResult(
684
+ tool=tool,
685
+ command=" ".join(cmd[:5]) + "..." if len(cmd) > 5 else " ".join(cmd),
686
+ exit_code=exit_code,
687
+ duration_ms=duration,
688
+ errors=errors[: self.max_items],
689
+ warnings=warnings[: self.max_items],
690
+ truncated=len(errors) > self.max_items,
691
+ raw_excerpt=output.split("\n")[:20],
692
+ )
693
+
694
+ def _parse_ruff_output(self, output: str) -> tuple[list[DiagnosticIssue], list[DiagnosticIssue]]:
695
+ """解析 ruff JSON 输出,尝试获取修复建议"""
696
+ errors = []
697
+ try:
698
+ data = json.loads(output) if output.strip() else []
699
+ for item in data:
700
+ loc = item.get("location", {})
701
+
702
+ # 尝试获取修复建议
703
+ suggested_fix = None
704
+ fix_data = item.get("fix")
705
+ if fix_data:
706
+ fix_content = fix_data.get("content", "")
707
+ if fix_content:
708
+ suggested_fix = fix_content[:200]
709
+
710
+ issue = DiagnosticIssue(
711
+ tool="ruff",
712
+ file=item.get("filename", ""),
713
+ line=loc.get("row", 0),
714
+ col=loc.get("column", 0),
715
+ severity="error",
716
+ code=item.get("code", "ruff"),
717
+ message=item.get("message", ""),
718
+ suggested_fix=suggested_fix,
719
+ )
720
+ errors.append(issue)
721
+ except json.JSONDecodeError:
722
+ pass
723
+
724
+ return errors, []
725
+
726
+ def _run_go_vet(self) -> DiagnosticResult:
727
+ """运行 go vet"""
728
+ tool = "go-vet"
729
+ cmd_str = "go vet ./..."
730
+
731
+ if not self._has_cmd("go"):
732
+ return DiagnosticResult(
733
+ tool=tool,
734
+ command=cmd_str,
735
+ exit_code=0,
736
+ duration_ms=0,
737
+ skipped=True,
738
+ skip_reason="go not found",
739
+ )
740
+
741
+ # 增量检查:只检查指定文件
742
+ if self.modified_files:
743
+ target_files = self._safe_modified_files(('.go',))
744
+ if not target_files:
745
+ return DiagnosticResult(
746
+ tool=tool,
747
+ command="skip (no matching files)",
748
+ exit_code=0,
749
+ duration_ms=0,
750
+ skipped=True,
751
+ skip_reason="no modified Go files",
752
+ )
753
+ cmd = ["go", "vet", "--"] + target_files
754
+ else:
755
+ cmd = ["go", "vet", "./..."]
756
+
757
+ exit_code, output, duration = self._run_command(cmd, tool)
758
+ errors, _ = self._parse_go_output(output)
759
+
760
+ return DiagnosticResult(
761
+ tool=tool,
762
+ command=" ".join(cmd),
763
+ exit_code=exit_code,
764
+ duration_ms=duration,
765
+ errors=errors[: self.max_items],
766
+ truncated=len(errors) > self.max_items,
767
+ raw_excerpt=output.split("\n")[:30],
768
+ )
769
+
770
+ def _run_go_build(self) -> DiagnosticResult:
771
+ """运行 go build"""
772
+ tool = "go-build"
773
+ cmd_str = "go build ./..."
774
+
775
+ if not self._has_cmd("go"):
776
+ return DiagnosticResult(
777
+ tool=tool,
778
+ command=cmd_str,
779
+ exit_code=0,
780
+ duration_ms=0,
781
+ skipped=True,
782
+ skip_reason="go not found",
783
+ )
784
+
785
+ # 增量检查:只检查指定文件
786
+ if self.modified_files:
787
+ target_files = self._safe_modified_files(('.go',))
788
+ if not target_files:
789
+ return DiagnosticResult(
790
+ tool=tool,
791
+ command="skip (no matching files)",
792
+ exit_code=0,
793
+ duration_ms=0,
794
+ skipped=True,
795
+ skip_reason="no modified Go files",
796
+ )
797
+ # go build 需要包路径,这里简化处理,检查整个项目但过滤错误
798
+ cmd = ["go", "build", "./..."]
799
+ else:
800
+ cmd = ["go", "build", "./..."]
801
+
802
+ exit_code, output, duration = self._run_command(cmd, tool)
803
+ errors, _ = self._parse_go_output(output)
804
+
805
+ # 增量检查:过滤错误
806
+ if self.modified_files:
807
+ errors = [e for e in errors if self._should_check_file(e.file)]
808
+
809
+ return DiagnosticResult(
810
+ tool=tool,
811
+ command=" ".join(cmd),
812
+ exit_code=exit_code,
813
+ duration_ms=duration,
814
+ errors=errors[: self.max_items],
815
+ truncated=len(errors) > self.max_items,
816
+ raw_excerpt=output.split("\n")[:30],
817
+ )
818
+
819
+ def _parse_go_output(self, output: str) -> tuple[list[DiagnosticIssue], list[DiagnosticIssue]]:
820
+ """解析 go vet/build 输出"""
821
+ errors = []
822
+ # 匹配: file.go:42:8: message
823
+ pattern = re.compile(r'^(.+\.go):(\d+):(\d+):\s+(.+)$')
824
+
825
+ for line in output.split("\n"):
826
+ match = pattern.match(line)
827
+ if match:
828
+ issue = DiagnosticIssue(
829
+ tool="go",
830
+ file=match.group(1),
831
+ line=int(match.group(2)),
832
+ col=int(match.group(3)),
833
+ severity="error",
834
+ code="go",
835
+ message=match.group(4),
836
+ )
837
+ errors.append(issue)
838
+
839
+ return errors, []
840
+
841
+
842
+ class RepoMapChecker:
843
+ """RepoMap 诊断检查器主类"""
844
+
845
+ def __init__(self, project_root: str | Path, max_items: int = 100):
846
+ self.project_root = Path(project_root).resolve()
847
+ self.max_items = max_items
848
+ self.detector = ProjectDetector()
849
+ # runner 延迟初始化,以便传入 modified_files
850
+
851
+ def check(
852
+ self,
853
+ types: list[str] | None = None,
854
+ resolve_symbols: bool = True,
855
+ symbols_map: dict[str, Any] | None = None,
856
+ since_commit: str | None = None,
857
+ modified_files: list[str] | None = None,
858
+ with_lsp: bool = False,
859
+ lsp_timeout: float = 8.0,
860
+ lsp_max_files: int = 20,
861
+ graph: Any = None,
862
+ ) -> dict[str, Any]:
863
+ """
864
+ 运行诊断检查
865
+
866
+ Args:
867
+ types: 指定要检查的语言类型,None 则自动检测
868
+ resolve_symbols: 是否将错误位置解析为符号名称
869
+ symbols_map: 符号图,用于解析错误位置到符号
870
+ since_commit: 检查自某 commit 以来的变更(如 "HEAD~1")
871
+ modified_files: 显式指定要检查的文件列表(与 since_commit 互斥)
872
+ graph: 可选的 RepoGraph,用于为诊断问题附加上下文调用者信息
873
+
874
+ Returns:
875
+ 结构化的诊断报告
876
+ """
877
+ # 处理增量检查参数
878
+ target_files = modified_files
879
+ if since_commit and not modified_files:
880
+ target_files = GitHelper.get_modified_files(self.project_root, since_commit)
881
+
882
+ detected_types = types or self.detector.detect(self.project_root)
883
+
884
+ if not detected_types:
885
+ return {
886
+ "timestamp": self._get_timestamp(),
887
+ "project_root": str(self.project_root),
888
+ "status": "unknown",
889
+ "message": "未检测到支持的项目类型",
890
+ "types": [],
891
+ "runs": [],
892
+ "summary": {"total_errors": 0, "total_warnings": 0, "files_with_errors": 0},
893
+ "errors_by_file": {},
894
+ "incremental": {
895
+ "enabled": target_files is not None,
896
+ "files_checked": target_files or [],
897
+ },
898
+ }
899
+
900
+ # 初始化 runner,传入增量检查文件列表
901
+ self.runner = DiagnosticRunner(self.project_root, self.max_items, target_files)
902
+
903
+ # 运行所有诊断工具
904
+ results = self.runner.run_all(detected_types)
905
+ if with_lsp:
906
+ results.extend(self._run_lsp_diagnostics(target_files, lsp_timeout, lsp_max_files))
907
+
908
+ # 解析符号关联
909
+ if resolve_symbols and symbols_map:
910
+ self._resolve_symbols(results, symbols_map, graph=graph)
911
+
912
+ # 构建报告
913
+ report = self._build_report(results, detected_types, target_files)
914
+ return report
915
+
916
+ def _run_lsp_diagnostics(
917
+ self,
918
+ target_files: list[str] | None,
919
+ lsp_timeout: float,
920
+ lsp_max_files: int,
921
+ ) -> list[DiagnosticResult]:
922
+ if not target_files:
923
+ return [DiagnosticResult(
924
+ tool="lsp",
925
+ command="repomap lsp diagnostics",
926
+ exit_code=0,
927
+ duration_ms=0,
928
+ skipped=True,
929
+ skip_reason="no explicit files; pass --modified-file or use diagnostics --files",
930
+ )]
931
+ from .lsp import collect_lsp_diagnostics
932
+
933
+ diagnostic_results: list[DiagnosticResult] = []
934
+ for run in collect_lsp_diagnostics(self.project_root, target_files, timeout=lsp_timeout, max_files=lsp_max_files):
935
+ issues = [
936
+ DiagnosticIssue(
937
+ tool=f"lsp:{run.server}",
938
+ file=item.file,
939
+ line=item.line,
940
+ col=item.col,
941
+ severity=item.severity,
942
+ code=item.code,
943
+ message=item.message,
944
+ )
945
+ for item in run.diagnostics
946
+ ]
947
+ errors = [issue for issue in issues if issue.severity == "error"]
948
+ warnings = [issue for issue in issues if issue.severity != "error"]
949
+ skipped = run.status == "skipped"
950
+ exit_code = 1 if run.status in {"failed", "timeout"} else 0
951
+ diagnostic_results.append(DiagnosticResult(
952
+ tool=f"lsp:{run.server}",
953
+ command=" ".join(run.command) if run.command else "repomap lsp diagnostics",
954
+ exit_code=exit_code,
955
+ duration_ms=run.duration_ms,
956
+ skipped=skipped,
957
+ skip_reason=run.reason if skipped else "",
958
+ errors=errors,
959
+ warnings=warnings,
960
+ raw_excerpt=[run.reason] if run.reason and not skipped else [],
961
+ ))
962
+ return diagnostic_results
963
+
964
+ def _get_timestamp(self) -> str:
965
+ """获取 ISO 格式时间戳"""
966
+ from datetime import datetime, timezone
967
+
968
+ return datetime.now(timezone.utc).isoformat()
969
+
970
+ def _resolve_symbols(
971
+ self, results: list[DiagnosticResult], symbols_map: dict[str, Any],
972
+ graph: Any = None,
973
+ ) -> None:
974
+ """将错误位置解析为符号名称,并计算置信度;可选附加上下文调用者。"""
975
+ # 构建文件 -> 符号列表 的映射,包含行号范围
976
+ file_symbols: dict[str, list[tuple[str, int, int, str]]] = {}
977
+ for symbol_id, symbol in symbols_map.items():
978
+ def _get_attr(obj: Any, attr: str, default: Any = None) -> Any:
979
+ if hasattr(obj, attr):
980
+ return getattr(obj, attr, default)
981
+ elif isinstance(obj, dict):
982
+ return obj.get(attr, default)
983
+ return default
984
+
985
+ file_path = _get_attr(symbol, "file", "")
986
+ line = _get_attr(symbol, "line", 0)
987
+ end_line = _get_attr(symbol, "end_line", line)
988
+ name = _get_attr(symbol, "name", "")
989
+ if file_path:
990
+ if file_path not in file_symbols:
991
+ file_symbols[file_path] = []
992
+ file_symbols[file_path].append((name, line, end_line, symbol_id))
993
+
994
+ # 为每个 issue 查找对应符号
995
+ for result in results:
996
+ for issue in result.errors + result.warnings:
997
+ file_key = issue.file
998
+ if file_key.startswith("./"):
999
+ file_key = file_key[2:]
1000
+
1001
+ candidates = file_symbols.get(file_key, [])
1002
+ best_match = None
1003
+ best_match_id = None
1004
+ best_confidence = "none"
1005
+
1006
+ for name, sym_line, sym_end_line, sym_id in candidates:
1007
+ if sym_line <= issue.line <= max(sym_end_line, sym_line + 50):
1008
+ if issue.line == sym_line:
1009
+ confidence = "exact"
1010
+ else:
1011
+ confidence = "line"
1012
+
1013
+ if confidence == "exact" or best_confidence == "none":
1014
+ best_match = name
1015
+ best_match_id = sym_id
1016
+ best_confidence = confidence
1017
+ if confidence == "exact":
1018
+ break
1019
+
1020
+ if best_match:
1021
+ issue.symbol = best_match
1022
+ issue.symbol_confidence = best_confidence
1023
+ # 附加上下文调用者信息
1024
+ if graph is not None and best_match_id:
1025
+ issue.callers = [
1026
+ graph.symbols[e.source].name
1027
+ for e in graph.incoming.get(best_match_id, [])
1028
+ if e.kind == "call" and e.source in graph.symbols
1029
+ ][:5]
1030
+
1031
+ def _build_report(
1032
+ self,
1033
+ results: list[DiagnosticResult],
1034
+ types: list[str],
1035
+ modified_files: list[str] | None = None,
1036
+ ) -> dict[str, Any]:
1037
+ """构建最终报告"""
1038
+ total_errors = sum(len(r.errors) for r in results)
1039
+ total_warnings = sum(len(r.warnings) for r in results)
1040
+
1041
+ # 按文件分组错误
1042
+ errors_by_file: dict[str, list[dict]] = {}
1043
+ for result in results:
1044
+ for issue in result.errors + result.warnings:
1045
+ file_key = issue.file or "unknown"
1046
+ if file_key not in errors_by_file:
1047
+ errors_by_file[file_key] = []
1048
+ errors_by_file[file_key].append({
1049
+ "tool": issue.tool,
1050
+ "line": issue.line,
1051
+ "col": issue.col,
1052
+ "severity": issue.severity,
1053
+ "code": issue.code,
1054
+ "message": issue.message,
1055
+ "symbol": issue.symbol,
1056
+ "symbol_confidence": issue.symbol_confidence,
1057
+ "callers": issue.callers,
1058
+ "suggested_fix": issue.suggested_fix,
1059
+ })
1060
+
1061
+ # 构建 runs 详情
1062
+ runs = []
1063
+ for r in results:
1064
+ run_data = {
1065
+ "tool": r.tool,
1066
+ "command": r.command,
1067
+ "exit_code": r.exit_code,
1068
+ "duration_ms": r.duration_ms,
1069
+ "skipped": r.skipped,
1070
+ "error_count": len(r.errors),
1071
+ "warning_count": len(r.warnings),
1072
+ "truncated": r.truncated,
1073
+ }
1074
+ if r.skip_reason:
1075
+ run_data["skip_reason"] = r.skip_reason
1076
+ if not r.skipped and r.exit_code != 0 and not r.errors and not r.warnings:
1077
+ run_data["tool_failure_reason"] = "工具退出码非 0,但未解析到结构化错误"
1078
+ if not r.skipped:
1079
+ run_data["raw_excerpt"] = list(r.raw_excerpt[:10])
1080
+ run_data["errors"] = [
1081
+ {
1082
+ "file": e.file,
1083
+ "line": e.line,
1084
+ "col": e.col,
1085
+ "code": e.code,
1086
+ "message": e.message,
1087
+ "symbol": e.symbol,
1088
+ "symbol_confidence": e.symbol_confidence,
1089
+ "callers": e.callers,
1090
+ "suggested_fix": e.suggested_fix,
1091
+ }
1092
+ for e in r.errors[:20]
1093
+ ]
1094
+ runs.append(run_data)
1095
+
1096
+ tool_failures = [r for r in results if not r.skipped and r.exit_code != 0]
1097
+ tools_run = len([r for r in results if not r.skipped])
1098
+ tools_skipped = len([r for r in results if r.skipped])
1099
+ message = ""
1100
+ if total_errors > 0 or tool_failures:
1101
+ status = "failed"
1102
+ elif total_warnings > 0:
1103
+ status = "warning"
1104
+ elif tools_run == 0 and tools_skipped > 0:
1105
+ status = "unknown"
1106
+ message = "检测到项目类型,但没有实际运行任何诊断工具"
1107
+ else:
1108
+ status = "passed"
1109
+
1110
+ return {
1111
+ "timestamp": self._get_timestamp(),
1112
+ "project_root": str(self.project_root),
1113
+ "status": status,
1114
+ "message": message,
1115
+ "types": types,
1116
+ "incremental": {
1117
+ "enabled": modified_files is not None,
1118
+ "files_checked": modified_files or [],
1119
+ "files_count": len(modified_files) if modified_files else 0,
1120
+ },
1121
+ "runs": runs,
1122
+ "summary": {
1123
+ "total_errors": total_errors,
1124
+ "total_warnings": total_warnings,
1125
+ "files_with_errors": len(errors_by_file),
1126
+ "tools_run": tools_run,
1127
+ "tools_skipped": tools_skipped,
1128
+ "tool_failures": len(tool_failures),
1129
+ },
1130
+ "errors_by_file": dict(sorted(errors_by_file.items(), key=lambda x: len(x[1]), reverse=True)[:20]),
1131
+ }
1132
+
1133
+
1134
+ def check_project(
1135
+ project_root: str,
1136
+ types: list[str] | None = None,
1137
+ max_items: int = 100,
1138
+ symbols_map: dict[str, Any] | None = None,
1139
+ since_commit: str | None = None,
1140
+ modified_files: list[str] | None = None,
1141
+ with_lsp: bool = False,
1142
+ lsp_timeout: float = 8.0,
1143
+ lsp_max_files: int = 20,
1144
+ ) -> dict[str, Any]:
1145
+ """
1146
+ 便捷函数:检查项目诊断
1147
+
1148
+ Args:
1149
+ project_root: 项目根目录路径
1150
+ types: 指定语言类型,None 则自动检测
1151
+ max_items: 每种工具最多返回的问题数
1152
+ symbols_map: 可选的符号图,用于关联错误到符号
1153
+ since_commit: 检查自某 commit 以来的变更
1154
+ modified_files: 显式指定要检查的文件列表
1155
+
1156
+ Returns:
1157
+ 诊断报告字典
1158
+ """
1159
+ checker = RepoMapChecker(project_root, max_items)
1160
+ return checker.check(
1161
+ types=types,
1162
+ resolve_symbols=symbols_map is not None,
1163
+ symbols_map=symbols_map,
1164
+ since_commit=since_commit,
1165
+ modified_files=modified_files,
1166
+ with_lsp=with_lsp,
1167
+ lsp_timeout=lsp_timeout,
1168
+ lsp_max_files=lsp_max_files,
1169
+ )
1170
+
1171
+
1172
+ if __name__ == "__main__":
1173
+ import sys
1174
+
1175
+ if len(sys.argv) < 2:
1176
+ print("Usage: python repomap_check.py <project_root> [types...]")
1177
+ print(" python repomap_check.py <project_root> --since HEAD~1")
1178
+ print(" python repomap_check.py <project_root> --files file1.py file2.py")
1179
+ print("Example: python repomap_check.py ./my-project typescript")
1180
+ sys.exit(1)
1181
+
1182
+ root = sys.argv[1]
1183
+
1184
+ # 解析参数
1185
+ types = None
1186
+ since_commit = None
1187
+ modified_files = None
1188
+
1189
+ i = 2
1190
+ while i < len(sys.argv):
1191
+ if sys.argv[i] == "--since" and i + 1 < len(sys.argv):
1192
+ since_commit = sys.argv[i + 1]
1193
+ i += 2
1194
+ elif sys.argv[i] == "--files":
1195
+ modified_files = []
1196
+ i += 1
1197
+ while i < len(sys.argv) and not sys.argv[i].startswith("--"):
1198
+ modified_files.append(sys.argv[i])
1199
+ i += 1
1200
+ else:
1201
+ if types is None:
1202
+ types = []
1203
+ types.append(sys.argv[i])
1204
+ i += 1
1205
+
1206
+ result = check_project(
1207
+ root,
1208
+ types=types if types else None,
1209
+ since_commit=since_commit,
1210
+ modified_files=modified_files,
1211
+ )
1212
+ print(json.dumps(result, indent=2, ensure_ascii=False))