@haaaiawd/anws 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +96 -0
- package/bin/cli.js +76 -0
- package/lib/copy.js +38 -0
- package/lib/index.js +8 -0
- package/lib/init.js +139 -0
- package/lib/manifest.js +53 -0
- package/lib/output.js +74 -0
- package/lib/update.js +85 -0
- package/package.json +36 -0
- package/templates/.agent/rules/agents.md +90 -0
- package/templates/.agent/skills/build-inspector/SKILL.md +83 -0
- package/templates/.agent/skills/complexity-guard/SKILL.md +71 -0
- package/templates/.agent/skills/complexity-guard/references/anti_patterns.md +21 -0
- package/templates/.agent/skills/concept-modeler/SKILL.md +112 -0
- package/templates/.agent/skills/concept-modeler/prompts/GLOSSARY_PROMPT.md +40 -0
- package/templates/.agent/skills/concept-modeler/references/ENTITY_EXTRACTION_PROMPT.md +299 -0
- package/templates/.agent/skills/concept-modeler/scripts/glossary_gen.py +66 -0
- package/templates/.agent/skills/git-forensics/SKILL.md +74 -0
- package/templates/.agent/skills/git-forensics/references/ANALYSIS_METHODOLOGY.md +193 -0
- package/templates/.agent/skills/git-forensics/scripts/git_forensics.py +615 -0
- package/templates/.agent/skills/git-forensics/scripts/git_hotspots.py +118 -0
- package/templates/.agent/skills/report-template/SKILL.md +88 -0
- package/templates/.agent/skills/report-template/references/REPORT_TEMPLATE.md +100 -0
- package/templates/.agent/skills/runtime-inspector/SKILL.md +93 -0
- package/templates/.agent/skills/spec-writer/SKILL.md +58 -0
- package/templates/.agent/skills/spec-writer/references/prd_template.md +174 -0
- package/templates/.agent/skills/system-architect/SKILL.md +620 -0
- package/templates/.agent/skills/system-architect/references/rfc_template.md +59 -0
- package/templates/.agent/skills/system-designer/SKILL.md +439 -0
- package/templates/.agent/skills/system-designer/references/system-design-template.md +533 -0
- package/templates/.agent/skills/task-planner/SKILL.md +474 -0
- package/templates/.agent/skills/task-planner/references/TASK_TEMPLATE.md +133 -0
- package/templates/.agent/skills/tech-evaluator/SKILL.md +135 -0
- package/templates/.agent/skills/tech-evaluator/references/ADR_TEMPLATE.md +68 -0
- package/templates/.agent/workflows/blueprint.md +185 -0
- package/templates/.agent/workflows/challenge.md +467 -0
- package/templates/.agent/workflows/change.md +294 -0
- package/templates/.agent/workflows/craft.md +626 -0
- package/templates/.agent/workflows/design-system.md +497 -0
- package/templates/.agent/workflows/explore.md +307 -0
- package/templates/.agent/workflows/forge.md +354 -0
- package/templates/.agent/workflows/genesis.md +265 -0
- package/templates/.agent/workflows/scout.md +130 -0
|
@@ -0,0 +1,615 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
git_forensics.py - Git 历史取证与共改分析工具
|
|
4
|
+
|
|
5
|
+
分析 Git 提交历史,识别与目标文件存在逻辑耦合(co-change)的文件。
|
|
6
|
+
|
|
7
|
+
依赖:
|
|
8
|
+
- git (命令行工具)
|
|
9
|
+
|
|
10
|
+
调研结果:
|
|
11
|
+
- 使用 git log 获取目标文件的 commit 列表
|
|
12
|
+
- 对每个 commit,获取所有修改的文件
|
|
13
|
+
- 统计共改频率,识别隐性耦合
|
|
14
|
+
|
|
15
|
+
用法:
|
|
16
|
+
python git_forensics.py --file ./src/auth/login.ts
|
|
17
|
+
python git_forensics.py --file ./src/auth/login.ts --days 180 --threshold 0.5
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import argparse
|
|
21
|
+
import json
|
|
22
|
+
import subprocess
|
|
23
|
+
import sys
|
|
24
|
+
from collections import Counter, defaultdict
|
|
25
|
+
from datetime import datetime, timedelta
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
from typing import Dict, List, Optional, Tuple
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# 文件类型分类
|
|
31
|
+
FILE_CATEGORIES = {
|
|
32
|
+
"TEST_FILE": ["test", "spec", "__tests__", "tests"],
|
|
33
|
+
"CONFIG_FILE": ["config", ".env", "settings", ".json", ".yaml", ".yml", ".toml"],
|
|
34
|
+
"DOC_FILE": [".md", ".rst", ".txt", "README", "CHANGELOG"],
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def check_git_available() -> bool:
|
|
39
|
+
"""检查 git 是否可用"""
|
|
40
|
+
try:
|
|
41
|
+
subprocess.run(
|
|
42
|
+
["git", "--version"],
|
|
43
|
+
capture_output=True,
|
|
44
|
+
check=True
|
|
45
|
+
)
|
|
46
|
+
return True
|
|
47
|
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
48
|
+
return False
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def is_git_repo(path: str) -> bool:
|
|
52
|
+
"""检查是否在 git 仓库中"""
|
|
53
|
+
try:
|
|
54
|
+
subprocess.run(
|
|
55
|
+
["git", "rev-parse", "--git-dir"],
|
|
56
|
+
capture_output=True,
|
|
57
|
+
check=True,
|
|
58
|
+
cwd=path
|
|
59
|
+
)
|
|
60
|
+
return True
|
|
61
|
+
except subprocess.CalledProcessError:
|
|
62
|
+
return False
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def check_shallow_clone(cwd: str = ".") -> Tuple[bool, int]:
|
|
66
|
+
"""
|
|
67
|
+
检查是否为浅克隆,以及有多少 commit
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
(is_shallow, commit_count)
|
|
71
|
+
"""
|
|
72
|
+
# 检查是否浅克隆
|
|
73
|
+
shallow_file = Path(cwd) / ".git" / "shallow"
|
|
74
|
+
is_shallow = shallow_file.exists()
|
|
75
|
+
|
|
76
|
+
# 获取 commit 数量
|
|
77
|
+
try:
|
|
78
|
+
result = subprocess.run(
|
|
79
|
+
["git", "rev-list", "--count", "HEAD"],
|
|
80
|
+
capture_output=True,
|
|
81
|
+
text=True,
|
|
82
|
+
check=True,
|
|
83
|
+
cwd=cwd
|
|
84
|
+
)
|
|
85
|
+
commit_count = int(result.stdout.strip())
|
|
86
|
+
except (subprocess.CalledProcessError, ValueError):
|
|
87
|
+
commit_count = 0
|
|
88
|
+
|
|
89
|
+
return is_shallow, commit_count
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def categorize_file(file_path: str) -> str:
|
|
93
|
+
"""根据文件路径判断类型"""
|
|
94
|
+
lower_path = file_path.lower()
|
|
95
|
+
|
|
96
|
+
for category, patterns in FILE_CATEGORIES.items():
|
|
97
|
+
for pattern in patterns:
|
|
98
|
+
if pattern in lower_path:
|
|
99
|
+
return category
|
|
100
|
+
|
|
101
|
+
return "PRODUCTION"
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def get_commits_for_file(
|
|
105
|
+
file_path: str,
|
|
106
|
+
days: int = 180,
|
|
107
|
+
cwd: str = "."
|
|
108
|
+
) -> List[str]:
|
|
109
|
+
"""获取修改过目标文件的 commit 列表"""
|
|
110
|
+
since_date = (datetime.now() - timedelta(days=days)).strftime("%Y-%m-%d")
|
|
111
|
+
|
|
112
|
+
cmd = [
|
|
113
|
+
"git", "log",
|
|
114
|
+
f"--since={since_date}",
|
|
115
|
+
"--pretty=format:%H",
|
|
116
|
+
"--",
|
|
117
|
+
file_path
|
|
118
|
+
]
|
|
119
|
+
|
|
120
|
+
try:
|
|
121
|
+
result = subprocess.run(
|
|
122
|
+
cmd,
|
|
123
|
+
capture_output=True,
|
|
124
|
+
text=True,
|
|
125
|
+
check=True,
|
|
126
|
+
cwd=cwd
|
|
127
|
+
)
|
|
128
|
+
commits = [c.strip() for c in result.stdout.strip().split("\n") if c.strip()]
|
|
129
|
+
return commits
|
|
130
|
+
except subprocess.CalledProcessError as e:
|
|
131
|
+
print(f"Error getting commits: {e.stderr}", file=sys.stderr)
|
|
132
|
+
return []
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def get_files_in_commit(commit_hash: str, cwd: str = ".") -> List[str]:
|
|
136
|
+
"""获取某次 commit 中修改的所有文件"""
|
|
137
|
+
cmd = [
|
|
138
|
+
"git", "show",
|
|
139
|
+
"--name-only",
|
|
140
|
+
"--pretty=format:",
|
|
141
|
+
commit_hash
|
|
142
|
+
]
|
|
143
|
+
|
|
144
|
+
try:
|
|
145
|
+
result = subprocess.run(
|
|
146
|
+
cmd,
|
|
147
|
+
capture_output=True,
|
|
148
|
+
text=True,
|
|
149
|
+
check=True,
|
|
150
|
+
cwd=cwd
|
|
151
|
+
)
|
|
152
|
+
files = [f.strip() for f in result.stdout.strip().split("\n") if f.strip()]
|
|
153
|
+
return files
|
|
154
|
+
except subprocess.CalledProcessError:
|
|
155
|
+
return []
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def get_file_authors(file_path: str, cwd: str = ".") -> List[str]:
|
|
159
|
+
"""获取文件的主要作者"""
|
|
160
|
+
cmd = [
|
|
161
|
+
"git", "log",
|
|
162
|
+
"--pretty=format:%an",
|
|
163
|
+
"--",
|
|
164
|
+
file_path
|
|
165
|
+
]
|
|
166
|
+
|
|
167
|
+
try:
|
|
168
|
+
result = subprocess.run(
|
|
169
|
+
cmd,
|
|
170
|
+
capture_output=True,
|
|
171
|
+
text=True,
|
|
172
|
+
check=True,
|
|
173
|
+
cwd=cwd
|
|
174
|
+
)
|
|
175
|
+
authors = [a.strip() for a in result.stdout.strip().split("\n") if a.strip()]
|
|
176
|
+
# 返回最常见的作者
|
|
177
|
+
author_counts = Counter(authors)
|
|
178
|
+
return [author for author, _ in author_counts.most_common(5)]
|
|
179
|
+
except subprocess.CalledProcessError:
|
|
180
|
+
return []
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def get_last_modified(file_path: str, cwd: str = ".") -> Optional[str]:
|
|
184
|
+
"""获取文件最后修改日期"""
|
|
185
|
+
cmd = [
|
|
186
|
+
"git", "log",
|
|
187
|
+
"-1",
|
|
188
|
+
"--pretty=format:%ci",
|
|
189
|
+
"--",
|
|
190
|
+
file_path
|
|
191
|
+
]
|
|
192
|
+
|
|
193
|
+
try:
|
|
194
|
+
result = subprocess.run(
|
|
195
|
+
cmd,
|
|
196
|
+
capture_output=True,
|
|
197
|
+
text=True,
|
|
198
|
+
check=True,
|
|
199
|
+
cwd=cwd
|
|
200
|
+
)
|
|
201
|
+
return result.stdout.strip().split()[0] if result.stdout.strip() else None
|
|
202
|
+
except subprocess.CalledProcessError:
|
|
203
|
+
return None
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def analyze_co_changes(
|
|
207
|
+
target_file: str,
|
|
208
|
+
days: int = 180,
|
|
209
|
+
threshold: float = 0.5,
|
|
210
|
+
cwd: str = "."
|
|
211
|
+
) -> Dict:
|
|
212
|
+
"""
|
|
213
|
+
分析共改模式
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
target_file: 目标文件路径
|
|
217
|
+
days: 分析多少天的历史
|
|
218
|
+
threshold: 耦合频率阈值
|
|
219
|
+
cwd: 工作目录
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
分析结果字典
|
|
223
|
+
"""
|
|
224
|
+
# 获取目标文件的 commits
|
|
225
|
+
commits = get_commits_for_file(target_file, days, cwd)
|
|
226
|
+
total_commits = len(commits)
|
|
227
|
+
|
|
228
|
+
if total_commits == 0:
|
|
229
|
+
return {
|
|
230
|
+
"target_file": target_file,
|
|
231
|
+
"analysis_period_days": days,
|
|
232
|
+
"total_commits_modifying_target": 0,
|
|
233
|
+
"co_changed_files": [],
|
|
234
|
+
"last_modified_date": None,
|
|
235
|
+
"primary_authors": [],
|
|
236
|
+
"analysis": {
|
|
237
|
+
"high_risk_files": [],
|
|
238
|
+
"recommendations": ["目标文件在指定时间段内没有修改记录"]
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
# 统计共改文件
|
|
243
|
+
co_change_counter = Counter()
|
|
244
|
+
|
|
245
|
+
for commit in commits:
|
|
246
|
+
files_in_commit = get_files_in_commit(commit, cwd)
|
|
247
|
+
for f in files_in_commit:
|
|
248
|
+
if f != target_file:
|
|
249
|
+
co_change_counter[f] += 1
|
|
250
|
+
|
|
251
|
+
# 计算共改结果
|
|
252
|
+
co_changed_files = []
|
|
253
|
+
high_risk_files = []
|
|
254
|
+
|
|
255
|
+
for file_path, count in co_change_counter.most_common(20):
|
|
256
|
+
frequency = round(count / total_commits, 2)
|
|
257
|
+
category = categorize_file(file_path)
|
|
258
|
+
|
|
259
|
+
entry = {
|
|
260
|
+
"file": file_path,
|
|
261
|
+
"co_change_count": count,
|
|
262
|
+
"frequency": frequency,
|
|
263
|
+
"category": category
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
# 判断风险等级
|
|
267
|
+
if frequency >= 0.7:
|
|
268
|
+
entry["warning"] = "HIGH_COUPLING"
|
|
269
|
+
if category == "PRODUCTION":
|
|
270
|
+
high_risk_files.append(file_path)
|
|
271
|
+
elif frequency >= threshold:
|
|
272
|
+
entry["warning"] = "MEDIUM_COUPLING"
|
|
273
|
+
|
|
274
|
+
co_changed_files.append(entry)
|
|
275
|
+
|
|
276
|
+
# 生成建议
|
|
277
|
+
recommendations = generate_recommendations(target_file, co_changed_files, high_risk_files)
|
|
278
|
+
|
|
279
|
+
# 获取作者和修改日期
|
|
280
|
+
authors = get_file_authors(target_file, cwd)
|
|
281
|
+
last_modified = get_last_modified(target_file, cwd)
|
|
282
|
+
|
|
283
|
+
return {
|
|
284
|
+
"target_file": target_file,
|
|
285
|
+
"analysis_period_days": days,
|
|
286
|
+
"total_commits_modifying_target": total_commits,
|
|
287
|
+
"co_changed_files": co_changed_files,
|
|
288
|
+
"last_modified_date": last_modified,
|
|
289
|
+
"primary_authors": authors,
|
|
290
|
+
"analysis": {
|
|
291
|
+
"high_risk_files": high_risk_files,
|
|
292
|
+
"recommendations": recommendations
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def generate_recommendations(
|
|
298
|
+
target_file: str,
|
|
299
|
+
co_changed_files: List[Dict],
|
|
300
|
+
high_risk_files: List[str]
|
|
301
|
+
) -> List[str]:
|
|
302
|
+
"""根据分析结果生成建议"""
|
|
303
|
+
recommendations = []
|
|
304
|
+
|
|
305
|
+
# 高耦合生产代码建议
|
|
306
|
+
if high_risk_files:
|
|
307
|
+
recommendations.append(
|
|
308
|
+
f"发现 {len(high_risk_files)} 个高耦合生产文件。"
|
|
309
|
+
"考虑:(1) 合并为同一模块;(2) 提取公共接口;(3) 使用事件解耦。"
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
# 检测测试文件耦合(正常)
|
|
313
|
+
test_files = [f for f in co_changed_files if f["category"] == "TEST_FILE" and f["frequency"] > 0.5]
|
|
314
|
+
if test_files:
|
|
315
|
+
recommendations.append(
|
|
316
|
+
f"检测到 {len(test_files)} 个测试文件高频共改,这是正常的测试-代码耦合。"
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
# 检测配置文件耦合(可能有问题)
|
|
320
|
+
config_files = [f for f in co_changed_files if f["category"] == "CONFIG_FILE" and f["frequency"] > 0.3]
|
|
321
|
+
if config_files:
|
|
322
|
+
recommendations.append(
|
|
323
|
+
"检测到配置文件频繁与代码一起修改。考虑是否存在硬编码或配置管理问题。"
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
if not recommendations:
|
|
327
|
+
recommendations.append("未发现明显的耦合问题。")
|
|
328
|
+
|
|
329
|
+
return recommendations
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def get_all_commits(days: int = 180, cwd: str = ".") -> List[str]:
|
|
333
|
+
"""获取指定时间段内的所有 commits"""
|
|
334
|
+
since_date = (datetime.now() - timedelta(days=days)).strftime("%Y-%m-%d")
|
|
335
|
+
|
|
336
|
+
cmd = [
|
|
337
|
+
"git", "log",
|
|
338
|
+
f"--since={since_date}",
|
|
339
|
+
"--pretty=format:%H"
|
|
340
|
+
]
|
|
341
|
+
|
|
342
|
+
try:
|
|
343
|
+
result = subprocess.run(
|
|
344
|
+
cmd,
|
|
345
|
+
capture_output=True,
|
|
346
|
+
text=True,
|
|
347
|
+
check=True,
|
|
348
|
+
cwd=cwd
|
|
349
|
+
)
|
|
350
|
+
commits = [c.strip() for c in result.stdout.strip().split("\n") if c.strip()]
|
|
351
|
+
return commits
|
|
352
|
+
except subprocess.CalledProcessError as e:
|
|
353
|
+
print(f"Error getting commits: {e.stderr}", file=sys.stderr)
|
|
354
|
+
return []
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def analyze_global_coupling(
|
|
358
|
+
days: int = 180,
|
|
359
|
+
threshold: float = 0.3,
|
|
360
|
+
top_n: int = 20,
|
|
361
|
+
cwd: str = "."
|
|
362
|
+
) -> Dict:
|
|
363
|
+
"""
|
|
364
|
+
全局分析所有文件对的耦合度
|
|
365
|
+
|
|
366
|
+
Args:
|
|
367
|
+
days: 分析多少天的历史
|
|
368
|
+
threshold: 耦合频率阈值
|
|
369
|
+
top_n: 返回 top N 高耦合文件对
|
|
370
|
+
cwd: 工作目录
|
|
371
|
+
|
|
372
|
+
Returns:
|
|
373
|
+
分析结果字典
|
|
374
|
+
"""
|
|
375
|
+
commits = get_all_commits(days, cwd)
|
|
376
|
+
total_commits = len(commits)
|
|
377
|
+
|
|
378
|
+
if total_commits == 0:
|
|
379
|
+
return {
|
|
380
|
+
"analysis_type": "global",
|
|
381
|
+
"analysis_period_days": days,
|
|
382
|
+
"total_commits_analyzed": 0,
|
|
383
|
+
"high_coupling_pairs": [],
|
|
384
|
+
"recommendations": ["指定时间段内没有 commit 记录"]
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
# 统计每对文件的共改次数
|
|
388
|
+
pair_counter = Counter()
|
|
389
|
+
file_commit_count = Counter() # 每个文件被修改的次数
|
|
390
|
+
|
|
391
|
+
print(f"分析 {total_commits} 个 commits...", file=sys.stderr)
|
|
392
|
+
|
|
393
|
+
for i, commit in enumerate(commits):
|
|
394
|
+
if (i + 1) % 50 == 0:
|
|
395
|
+
print(f" 进度: {i + 1}/{total_commits}", file=sys.stderr)
|
|
396
|
+
|
|
397
|
+
files_in_commit = get_files_in_commit(commit, cwd)
|
|
398
|
+
|
|
399
|
+
# 更新每个文件的修改次数
|
|
400
|
+
for f in files_in_commit:
|
|
401
|
+
file_commit_count[f] += 1
|
|
402
|
+
|
|
403
|
+
# 统计文件对
|
|
404
|
+
for i in range(len(files_in_commit)):
|
|
405
|
+
for j in range(i + 1, len(files_in_commit)):
|
|
406
|
+
pair = tuple(sorted([files_in_commit[i], files_in_commit[j]]))
|
|
407
|
+
pair_counter[pair] += 1
|
|
408
|
+
|
|
409
|
+
# 计算耦合度并排序
|
|
410
|
+
high_coupling_pairs = []
|
|
411
|
+
|
|
412
|
+
for (file_a, file_b), co_change_count in pair_counter.most_common(top_n * 3):
|
|
413
|
+
# 计算 Jaccard-like 耦合度:共改次数 / max(各自修改次数)
|
|
414
|
+
max_changes = max(file_commit_count[file_a], file_commit_count[file_b])
|
|
415
|
+
frequency = round(co_change_count / max_changes, 2) if max_changes > 0 else 0
|
|
416
|
+
|
|
417
|
+
if frequency >= threshold:
|
|
418
|
+
category_a = categorize_file(file_a)
|
|
419
|
+
category_b = categorize_file(file_b)
|
|
420
|
+
|
|
421
|
+
entry = {
|
|
422
|
+
"file_a": file_a,
|
|
423
|
+
"file_b": file_b,
|
|
424
|
+
"co_change_count": co_change_count,
|
|
425
|
+
"frequency": frequency,
|
|
426
|
+
"category_a": category_a,
|
|
427
|
+
"category_b": category_b
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
# 两个都是生产代码则标记高风险
|
|
431
|
+
if category_a == "PRODUCTION" and category_b == "PRODUCTION":
|
|
432
|
+
if frequency >= 0.7:
|
|
433
|
+
entry["warning"] = "HIGH_COUPLING"
|
|
434
|
+
elif frequency >= 0.5:
|
|
435
|
+
entry["warning"] = "MEDIUM_COUPLING"
|
|
436
|
+
|
|
437
|
+
high_coupling_pairs.append(entry)
|
|
438
|
+
|
|
439
|
+
if len(high_coupling_pairs) >= top_n:
|
|
440
|
+
break
|
|
441
|
+
|
|
442
|
+
# 生成建议
|
|
443
|
+
recommendations = []
|
|
444
|
+
high_risk_pairs = [p for p in high_coupling_pairs if p.get("warning") == "HIGH_COUPLING"]
|
|
445
|
+
if high_risk_pairs:
|
|
446
|
+
recommendations.append(
|
|
447
|
+
f"发现 {len(high_risk_pairs)} 对高耦合生产文件,考虑合并或重构。"
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
if not recommendations:
|
|
451
|
+
recommendations.append("未发现严重的耦合问题。")
|
|
452
|
+
|
|
453
|
+
return {
|
|
454
|
+
"analysis_type": "global",
|
|
455
|
+
"analysis_period_days": days,
|
|
456
|
+
"total_commits_analyzed": total_commits,
|
|
457
|
+
"total_files_analyzed": len(file_commit_count),
|
|
458
|
+
"high_coupling_pairs": high_coupling_pairs,
|
|
459
|
+
"recommendations": recommendations
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
def main():
|
|
464
|
+
parser = argparse.ArgumentParser(
|
|
465
|
+
description="Git 历史取证与共改分析工具"
|
|
466
|
+
)
|
|
467
|
+
parser.add_argument(
|
|
468
|
+
"--focus", "--entry",
|
|
469
|
+
dest="focus",
|
|
470
|
+
help="聚焦分析单个文件(可选,不指定则进行全局分析)"
|
|
471
|
+
)
|
|
472
|
+
parser.add_argument(
|
|
473
|
+
"--days",
|
|
474
|
+
type=int,
|
|
475
|
+
default=180,
|
|
476
|
+
help="分析多少天的历史 (默认: 180)"
|
|
477
|
+
)
|
|
478
|
+
parser.add_argument(
|
|
479
|
+
"--threshold",
|
|
480
|
+
type=float,
|
|
481
|
+
default=0.3,
|
|
482
|
+
help="耦合频率阈值 (默认: 0.3)"
|
|
483
|
+
)
|
|
484
|
+
parser.add_argument(
|
|
485
|
+
"--top",
|
|
486
|
+
type=int,
|
|
487
|
+
default=20,
|
|
488
|
+
help="返回 top N 高耦合文件对 (默认: 20)"
|
|
489
|
+
)
|
|
490
|
+
parser.add_argument(
|
|
491
|
+
"--repo",
|
|
492
|
+
default=".",
|
|
493
|
+
help="仓库路径 (默认: 当前目录)"
|
|
494
|
+
)
|
|
495
|
+
parser.add_argument(
|
|
496
|
+
"--format",
|
|
497
|
+
choices=["json", "markdown"],
|
|
498
|
+
default="json",
|
|
499
|
+
help="输出格式 (默认: json)"
|
|
500
|
+
)
|
|
501
|
+
parser.add_argument(
|
|
502
|
+
"--output", "-o",
|
|
503
|
+
help="输出文件路径 (默认: stdout)"
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
args = parser.parse_args()
|
|
507
|
+
|
|
508
|
+
# 检查 git
|
|
509
|
+
if not check_git_available():
|
|
510
|
+
print("Error: git is not available.", file=sys.stderr)
|
|
511
|
+
sys.exit(1)
|
|
512
|
+
|
|
513
|
+
# 检查是否在 git 仓库中
|
|
514
|
+
if not is_git_repo(args.repo):
|
|
515
|
+
print(f"Error: {args.repo} is not a git repository.", file=sys.stderr)
|
|
516
|
+
sys.exit(1)
|
|
517
|
+
|
|
518
|
+
# 检查浅克隆
|
|
519
|
+
is_shallow, commit_count = check_shallow_clone(args.repo)
|
|
520
|
+
if is_shallow:
|
|
521
|
+
print(f"⚠️ Warning: This is a shallow clone with only {commit_count} commits.", file=sys.stderr)
|
|
522
|
+
print(f" Analysis results may be unreliable. Use 'git fetch --unshallow' for full history.", file=sys.stderr)
|
|
523
|
+
elif commit_count < 50:
|
|
524
|
+
print(f"⚠️ Warning: Repository has only {commit_count} commits. Results may be limited.", file=sys.stderr)
|
|
525
|
+
|
|
526
|
+
# 执行分析
|
|
527
|
+
if args.focus:
|
|
528
|
+
# Focus 模式:单文件分析
|
|
529
|
+
print(f"Focus 模式:分析文件 {args.focus}", file=sys.stderr)
|
|
530
|
+
result = analyze_co_changes(
|
|
531
|
+
target_file=args.focus,
|
|
532
|
+
days=args.days,
|
|
533
|
+
threshold=args.threshold,
|
|
534
|
+
cwd=args.repo
|
|
535
|
+
)
|
|
536
|
+
else:
|
|
537
|
+
# 全局模式
|
|
538
|
+
print("全局模式:分析所有文件对的耦合度", file=sys.stderr)
|
|
539
|
+
result = analyze_global_coupling(
|
|
540
|
+
days=args.days,
|
|
541
|
+
threshold=args.threshold,
|
|
542
|
+
top_n=args.top,
|
|
543
|
+
cwd=args.repo
|
|
544
|
+
)
|
|
545
|
+
|
|
546
|
+
# 添加元数据
|
|
547
|
+
result["metadata"] = {
|
|
548
|
+
"is_shallow_clone": is_shallow,
|
|
549
|
+
"total_repo_commits": commit_count
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
# 输出
|
|
553
|
+
# 输出
|
|
554
|
+
if args.format == "markdown":
|
|
555
|
+
output_str = format_as_markdown(result)
|
|
556
|
+
else:
|
|
557
|
+
output_str = json.dumps(result, indent=2, ensure_ascii=False)
|
|
558
|
+
|
|
559
|
+
if args.output:
|
|
560
|
+
Path(args.output).write_text(output_str, encoding="utf-8")
|
|
561
|
+
print(f"Output written to: {args.output}", file=sys.stderr)
|
|
562
|
+
else:
|
|
563
|
+
print(output_str)
|
|
564
|
+
|
|
565
|
+
|
|
566
|
+
def format_as_markdown(result: Dict) -> str:
|
|
567
|
+
"""Generate Markdown report from analysis result."""
|
|
568
|
+
lines = ["# Git Forensics Report"]
|
|
569
|
+
|
|
570
|
+
meta = result.get("metadata", {})
|
|
571
|
+
lines.append(f"**Analysis Period**: {result.get('analysis_period_days')} days | **Commits**: {meta.get('total_repo_commits')} | **Shallow**: {meta.get('is_shallow_clone')}")
|
|
572
|
+
lines.append("")
|
|
573
|
+
|
|
574
|
+
if "co_changed_files" in result:
|
|
575
|
+
# Single file analysis
|
|
576
|
+
target = result.get("target_file")
|
|
577
|
+
lines.append(f"## Focus: `{target}`")
|
|
578
|
+
lines.append("")
|
|
579
|
+
|
|
580
|
+
# Co-change table
|
|
581
|
+
lines.append("### High Coupling Files")
|
|
582
|
+
lines.append("| File | Frequency | Count | Category | Warning |")
|
|
583
|
+
lines.append("|------|-----------|-------|----------|---------|")
|
|
584
|
+
|
|
585
|
+
for f in result.get("co_changed_files", []):
|
|
586
|
+
warning = f["warning"] if "warning" in f else ""
|
|
587
|
+
lines.append(f"| `{f['file']}` | {f['frequency']} | {f['co_change_count']} | {f['category']} | {warning} |")
|
|
588
|
+
lines.append("")
|
|
589
|
+
|
|
590
|
+
elif "high_coupling_pairs" in result:
|
|
591
|
+
# Global analysis
|
|
592
|
+
lines.append("## Global Coupling Analysis")
|
|
593
|
+
lines.append("")
|
|
594
|
+
|
|
595
|
+
lines.append("### Top Coupled Pairs")
|
|
596
|
+
lines.append("| File A | File B | Frequency | Count | Risk |")
|
|
597
|
+
lines.append("|--------|--------|-----------|-------|------|")
|
|
598
|
+
|
|
599
|
+
for p in result.get("high_coupling_pairs", []):
|
|
600
|
+
warning = p.get("warning", "")
|
|
601
|
+
lines.append(f"| `{p['file_a']}` | `{p['file_b']}` | {p['frequency']} | {p['co_change_count']} | {warning} |")
|
|
602
|
+
lines.append("")
|
|
603
|
+
|
|
604
|
+
# Recommendations
|
|
605
|
+
recs = result.get("analysis", {}).get("recommendations", []) or result.get("recommendations", [])
|
|
606
|
+
if recs:
|
|
607
|
+
lines.append("## Recommendations")
|
|
608
|
+
for rec in recs:
|
|
609
|
+
lines.append(f"- {rec}")
|
|
610
|
+
|
|
611
|
+
return "\n".join(lines)
|
|
612
|
+
|
|
613
|
+
|
|
614
|
+
if __name__ == "__main__":
|
|
615
|
+
main()
|