gitcode-insight 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gitcode_insight/pr.py ADDED
@@ -0,0 +1,843 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ GitCode PR 洞察模块
4
+ 分析指定仓库近 N 天的 PR 情况
5
+ """
6
+
7
+ import json
8
+ import time
9
+ import os
10
+ from datetime import datetime, timedelta, timezone
11
+ from typing import List, Dict, Optional
12
+ import requests
13
+
14
+ from .utils import request_with_retry
15
+
16
+
17
+ class GitCodePRInsight:
18
+ """GitCode PR 洞察分析器"""
19
+
20
+ def __init__(self, repo: str, token: str, owner: str = None, days: int = 30, output_dir: str = None):
21
+ """
22
+ 初始化
23
+
24
+ Args:
25
+ repo: 仓库名称(path)
26
+ token: API 访问令牌
27
+ owner: 组织名
28
+ days: 统计天数
29
+ output_dir: 输出目录
30
+ """
31
+ self.repo = repo
32
+ self.token = token
33
+ self.owner = owner or self._get_default_owner()
34
+ self.days = days
35
+ self.base_url = "https://api.gitcode.com/api/v5"
36
+
37
+ # 设置输出目录
38
+ if output_dir is None:
39
+ output_dir = os.path.join(os.getcwd(), "output")
40
+ self.output_dir = output_dir
41
+
42
+ self.session = requests.Session()
43
+ self.session.headers.update({"Content-Type": "application/json"})
44
+
45
+ # 计算时间范围
46
+ self.since_date = (datetime.now(timezone.utc) - timedelta(days=days)).isoformat()
47
+
48
+ def _get_default_owner(self) -> str:
49
+ """从配置文件获取默认 owner"""
50
+ config_file = os.path.join(os.getcwd(), "config", "gitcode.json")
51
+ if os.path.exists(config_file):
52
+ with open(config_file, 'r', encoding='utf-8') as f:
53
+ config = json.load(f)
54
+ return config.get("owner", "")
55
+ return ""
56
+
57
+ def get_prs(self) -> List[Dict]:
58
+ """获取 PR 列表"""
59
+ print(f"获取 {self.owner}/{self.repo} 近 {self.days} 天的 PR 列表...")
60
+
61
+ url = f"{self.base_url}/repos/{self.owner}/{self.repo}/pulls"
62
+ all_prs = []
63
+ page = 1
64
+ max_pages = 50
65
+
66
+ while page <= max_pages:
67
+ params = {
68
+ "access_token": self.token,
69
+ "state": "all",
70
+ "per_page": 100,
71
+ "page": page,
72
+ "sort": "created",
73
+ "direction": "desc"
74
+ }
75
+
76
+ data = request_with_retry(self.session, url, params)
77
+ if data is None:
78
+ break
79
+
80
+ if not isinstance(data, list) or len(data) == 0:
81
+ break
82
+
83
+ # 过滤在时间范围内的 PR
84
+ filtered = [
85
+ pr for pr in data
86
+ if self._is_within_range(pr.get("created_at", ""))
87
+ or self._is_within_range(pr.get("updated_at", ""))
88
+ ]
89
+
90
+ all_prs.extend(filtered)
91
+ print(f" 第 {page} 页获取到 {len(filtered)} 条 PR")
92
+
93
+ if len(data) < 100:
94
+ break
95
+
96
+ page += 1
97
+ time.sleep(0.6) # API 限流控制
98
+
99
+ print(f"共获取到 {len(all_prs)} 条 PR")
100
+ return all_prs
101
+
102
+ def _is_within_range(self, date_str: str) -> bool:
103
+ """检查日期是否在统计范围内"""
104
+ if not date_str:
105
+ return False
106
+ try:
107
+ pr_date = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
108
+ since = datetime.fromisoformat(self.since_date.replace("Z", "+00:00"))
109
+ return pr_date >= since
110
+ except:
111
+ return False
112
+
113
+ def get_pr_comments(self, pr_number: int) -> List[Dict]:
114
+ """获取 PR 评论列表"""
115
+ url = f"{self.base_url}/repos/{self.owner}/{self.repo}/pulls/{pr_number}/comments"
116
+ params = {"access_token": self.token, "per_page": 100}
117
+ data = request_with_retry(self.session, url, params)
118
+ return data if isinstance(data, list) else []
119
+
120
+ def analyze_pr(self, pr: Dict) -> Dict:
121
+ """分析单个 PR,计算各项指标"""
122
+ pr_number = pr.get("number", "")
123
+ created_at_str = pr.get("created_at", "")
124
+ merged_at = pr.get("merged_at")
125
+ closed_at = pr.get("closed_at")
126
+
127
+ # 获取评论计算首次评审时间
128
+ comments = self.get_pr_comments(pr_number)
129
+
130
+ first_review_time = None
131
+ if comments:
132
+ creator_id = pr.get("user", {}).get("id")
133
+ for comment in comments:
134
+ commenter_id = comment.get("user", {}).get("id")
135
+ if commenter_id != creator_id:
136
+ try:
137
+ comment_time = datetime.fromisoformat(comment.get("created_at", "").replace("Z", "+00:00"))
138
+ created_time = datetime.fromisoformat(created_at_str.replace("Z", "+00:00"))
139
+ first_review_time = (comment_time - created_time).total_seconds() / 60
140
+ except:
141
+ pass
142
+ break
143
+
144
+ # 计算合并耗时
145
+ merge_duration = None
146
+ if merged_at and created_at_str:
147
+ try:
148
+ merged_time = datetime.fromisoformat(merged_at.replace("Z", "+00:00"))
149
+ created_time = datetime.fromisoformat(created_at_str.replace("Z", "+00:00"))
150
+ merge_duration = (merged_time - created_time).total_seconds() / 60
151
+ except:
152
+ pass
153
+
154
+ # 计算关闭耗时(未合并的 PR)
155
+ close_duration = None
156
+ if closed_at and not merged_at and created_at_str:
157
+ try:
158
+ closed_time = datetime.fromisoformat(closed_at.replace("Z", "+00:00"))
159
+ created_time = datetime.fromisoformat(created_at_str.replace("Z", "+00:00"))
160
+ close_duration = (closed_time - created_time).total_seconds() / 60
161
+ except:
162
+ pass
163
+
164
+ # 计算打开天数
165
+ open_days = None
166
+ if pr.get("state") == "open":
167
+ try:
168
+ created_time = datetime.fromisoformat(created_at_str.replace("Z", "+00:00"))
169
+ open_days = (datetime.now(timezone.utc) - created_time).total_seconds() / 86400
170
+ except:
171
+ pass
172
+
173
+ # 代码变更行数
174
+ added_lines = pr.get("added_lines", 0) or 0
175
+ removed_lines = pr.get("removed_lines", 0) or 0
176
+ total_changes = added_lines + removed_lines
177
+
178
+ # 提取评审者
179
+ assignees = [a.get("login", "") for a in pr.get("assignees", [])]
180
+ testers = [t.get("login", "") for t in pr.get("testers", [])]
181
+
182
+ # 提取标签
183
+ labels = [l.get("name", "") for l in pr.get("labels", [])]
184
+
185
+ # 合并者
186
+ merged_by = pr.get("merged_by", {})
187
+ merged_by_login = merged_by.get("login", "") if merged_by else ""
188
+
189
+ return {
190
+ "pr_number": pr_number,
191
+ "title": pr.get("title", ""),
192
+ "state": pr.get("state", ""),
193
+ "draft": pr.get("draft", False),
194
+ "locked": pr.get("locked", False),
195
+ "created_at": created_at_str,
196
+ "updated_at": pr.get("updated_at", ""),
197
+ "merged_at": merged_at or "",
198
+ "closed_at": closed_at or "",
199
+ "creator": pr.get("user", {}).get("login", ""),
200
+ "source_branch": pr.get("source_branch", ""),
201
+ "target_branch": pr.get("target_branch", ""),
202
+ "added_lines": added_lines,
203
+ "removed_lines": removed_lines,
204
+ "total_changes": total_changes,
205
+ "notes_count": pr.get("notes", 0) or 0,
206
+ "labels": ",".join(labels),
207
+ "assignees": ",".join(assignees),
208
+ "testers": ",".join(testers),
209
+ "merged_by": merged_by_login,
210
+ "mergeable": pr.get("mergeable"),
211
+ "pipeline_status": pr.get("pipeline_status", ""),
212
+ "html_url": pr.get("html_url", ""),
213
+ "first_review_time": round(first_review_time, 2) if first_review_time else None,
214
+ "merge_duration": round(merge_duration, 2) if merge_duration else None,
215
+ "close_duration": round(close_duration, 2) if close_duration else None,
216
+ "open_days": round(open_days, 2) if open_days else None
217
+ }
218
+
219
+ def calculate_insights(self, prs_data: List[Dict]) -> tuple:
220
+ """计算洞察指标"""
221
+ total = len(prs_data)
222
+
223
+ # 状态分布
224
+ opened = [p for p in prs_data if p["state"] == "open"]
225
+ merged = [p for p in prs_data if p["merged_at"]]
226
+ closed_not_merged = [p for p in prs_data if p["state"] == "closed" and not p["merged_at"]]
227
+
228
+ # 冲突 PR
229
+ conflicts = [p for p in prs_data if p["mergeable"] is False]
230
+
231
+ # 代码变更统计
232
+ change_sizes = [p["total_changes"] for p in prs_data if p["total_changes"] > 0]
233
+ avg_changes = sum(change_sizes) / len(change_sizes) if change_sizes else 0
234
+ max_changes = max(change_sizes) if change_sizes else 0
235
+ large_prs = [p for p in prs_data if p["total_changes"] > 500]
236
+
237
+ # 评论密度
238
+ total_notes = sum(p["notes_count"] for p in prs_data)
239
+ total_lines = sum(p["total_changes"] for p in prs_data)
240
+ comment_density = total_notes / total_lines if total_lines > 0 else 0
241
+
242
+ # 评审时间统计
243
+ review_times = [p["first_review_time"] for p in prs_data if p["first_review_time"]]
244
+ avg_review_time = sum(review_times) / len(review_times) if review_times else 0
245
+
246
+ # 合并耗时统计(分钟)
247
+ merge_durations = [p["merge_duration"] for p in prs_data if p["merge_duration"]]
248
+ avg_merge_duration = sum(merge_durations) / len(merge_durations) if merge_durations else 0
249
+ min_merge_duration = min(merge_durations) if merge_durations else 0
250
+ max_merge_duration = max(merge_durations) if merge_durations else 0
251
+
252
+ # 创建者分布
253
+ creator_dist = {}
254
+ for pr in prs_data:
255
+ creator = pr["creator"]
256
+ if creator:
257
+ creator_dist[creator] = creator_dist.get(creator, 0) + 1
258
+
259
+ # 目标分支分布
260
+ branch_dist = {}
261
+ for pr in prs_data:
262
+ branch = pr["target_branch"]
263
+ if branch:
264
+ branch_dist[branch] = branch_dist.get(branch, 0) + 1
265
+
266
+ # 标签分布
267
+ label_dist = {}
268
+ for pr in prs_data:
269
+ for label in pr["labels"].split(","):
270
+ if label:
271
+ label_dist[label] = label_dist.get(label, 0) + 1
272
+
273
+ # 评审者分布
274
+ reviewer_dist = {}
275
+ for pr in prs_data:
276
+ for assignee in pr["assignees"].split(","):
277
+ if assignee:
278
+ reviewer_dist[assignee] = reviewer_dist.get(assignee, 0) + 1
279
+
280
+ # 每日趋势
281
+ daily_trend = {}
282
+ for pr in prs_data:
283
+ try:
284
+ created = datetime.fromisoformat(pr["created_at"].replace("Z", "+00:00"))
285
+ date_str = created.strftime("%Y-%m-%d")
286
+ if date_str not in daily_trend:
287
+ daily_trend[date_str] = {"created": 0, "merged": 0, "closed": 0}
288
+ daily_trend[date_str]["created"] += 1
289
+ except:
290
+ pass
291
+
292
+ for pr in merged:
293
+ try:
294
+ merged_date = datetime.fromisoformat(pr["merged_at"].replace("Z", "+00:00"))
295
+ date_str = merged_date.strftime("%Y-%m-%d")
296
+ if date_str not in daily_trend:
297
+ daily_trend[date_str] = {"created": 0, "merged": 0, "closed": 0}
298
+ daily_trend[date_str]["merged"] += 1
299
+ except:
300
+ pass
301
+
302
+ for pr in closed_not_merged:
303
+ try:
304
+ closed_date = datetime.fromisoformat(pr["closed_at"].replace("Z", "+00:00"))
305
+ date_str = closed_date.strftime("%Y-%m-%d")
306
+ if date_str not in daily_trend:
307
+ daily_trend[date_str] = {"created": 0, "merged": 0, "closed": 0}
308
+ daily_trend[date_str]["closed"] += 1
309
+ except:
310
+ pass
311
+
312
+ # 24小时内评审率
313
+ timely_reviews = len([t for t in review_times if t <= 1440])
314
+ timely_review_rate = timely_reviews / len(review_times) * 100 if review_times else 0
315
+
316
+ # CI 成功率:根据标签计算
317
+ ci_success_count = label_dist.get("ci_successful", 0)
318
+ ci_failed_count = label_dist.get("ci_failed", 0)
319
+ ci_total = ci_success_count + ci_failed_count
320
+ ci_success_rate = round(ci_success_count / ci_total * 100, 2) if ci_total > 0 else 0
321
+
322
+ return {
323
+ "repo": f"{self.owner}/{self.repo}",
324
+ "analysis_period": f"近 {self.days} 天",
325
+ "analysis_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
326
+ "summary": {
327
+ "total_prs": total,
328
+ "opened_prs": len(opened),
329
+ "merged_prs": len(merged),
330
+ "closed_prs": len(closed_not_merged),
331
+ "merge_rate": round(len(merged) / total * 100, 2) if total > 0 else 0,
332
+ "conflict_rate": round(len(conflicts) / total * 100, 2) if total > 0 else 0
333
+ },
334
+ "efficiency": {
335
+ "avg_first_review_time_minutes": round(avg_review_time, 2),
336
+ "avg_merge_duration_hours": round(avg_merge_duration / 60, 2),
337
+ "min_merge_duration_hours": round(min_merge_duration / 60, 2),
338
+ "max_merge_duration_hours": round(max_merge_duration / 60, 2),
339
+ "timely_review_rate": round(timely_review_rate, 2),
340
+ "review_time_samples": len(review_times),
341
+ "merge_duration_samples": len(merge_durations)
342
+ },
343
+ "quality": {
344
+ "avg_change_lines": round(avg_changes, 2),
345
+ "max_change_lines": max_changes,
346
+ "large_pr_count": len(large_prs),
347
+ "large_pr_rate": round(len(large_prs) / total * 100, 2) if total > 0 else 0,
348
+ "comment_density": round(comment_density, 4),
349
+ "ci_success_rate": ci_success_rate
350
+ },
351
+ "distribution": {
352
+ "by_creator": dict(sorted(creator_dist.items(), key=lambda x: x[1], reverse=True)[:10]),
353
+ "by_target_branch": dict(sorted(branch_dist.items(), key=lambda x: x[1], reverse=True)[:10]),
354
+ "by_label": dict(sorted(label_dist.items(), key=lambda x: x[1], reverse=True)[:10]),
355
+ "by_reviewer": dict(sorted(reviewer_dist.items(), key=lambda x: x[1], reverse=True)[:10])
356
+ },
357
+ "daily_trend": dict(sorted(daily_trend.items()))
358
+ }, prs_data
359
+
360
+ def generate_html_report(self, insights: Dict, prs_data: List[Dict], output_file: str):
361
+ """生成 HTML 报告(统计数据总结)"""
362
+ summary = insights["summary"]
363
+ efficiency = insights["efficiency"]
364
+ quality = insights["quality"]
365
+ distribution = insights["distribution"]
366
+ daily_trend = insights["daily_trend"]
367
+
368
+ # 准备图表数据
369
+ dates = list(daily_trend.keys())
370
+ created_counts = [daily_trend[d]["created"] for d in dates]
371
+ merged_counts = [daily_trend[d]["merged"] for d in dates]
372
+ closed_counts = [daily_trend[d]["closed"] for d in dates]
373
+
374
+ creator_names = list(distribution["by_creator"].keys())
375
+ creator_counts = list(distribution["by_creator"].values())
376
+
377
+ branch_names = list(distribution["by_target_branch"].keys())
378
+ branch_counts = list(distribution["by_target_branch"].values())
379
+
380
+ # 准备规模分布数据
381
+ size_ranges = {"0-50": 0, "51-200": 0, "201-500": 0, "501-1000": 0, ">1000": 0}
382
+ for pr in prs_data:
383
+ size = pr["total_changes"]
384
+ if size <= 50:
385
+ size_ranges["0-50"] += 1
386
+ elif size <= 200:
387
+ size_ranges["51-200"] += 1
388
+ elif size <= 500:
389
+ size_ranges["201-500"] += 1
390
+ elif size <= 1000:
391
+ size_ranges["501-1000"] += 1
392
+ else:
393
+ size_ranges[">1000"] += 1
394
+
395
+ html_content = f'''<!DOCTYPE html>
396
+ <html lang="zh-CN">
397
+ <head>
398
+ <meta charset="UTF-8">
399
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
400
+ <title>PR 洞察报告 - {insights["repo"]}</title>
401
+ <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
402
+ <style>
403
+ * {{ margin: 0; padding: 0; box-sizing: border-box; }}
404
+ body {{ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; background: #f5f7fa; color: #333; }}
405
+ .container {{ max-width: 1200px; margin: 0 auto; padding: 20px; }}
406
+ h1 {{ text-align: center; color: #1a365d; margin-bottom: 30px; padding-bottom: 15px; border-bottom: 2px solid #e2e8f0; }}
407
+ h2 {{ color: #1a365d; margin: 20px 0 15px 0; }}
408
+ .stats-grid {{ display: grid; grid-template-columns: repeat(auto-fit, minmax(140px, 1fr)); gap: 12px; margin-bottom: 25px; }}
409
+ .stat-card {{ background: white; border-radius: 8px; padding: 15px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); text-align: center; }}
410
+ .stat-value {{ font-size: 26px; font-weight: bold; color: #1e40af; }}
411
+ .stat-label {{ color: #64748b; margin-top: 5px; font-size: 13px; }}
412
+ .charts-grid {{ display: grid; grid-template-columns: repeat(auto-fit, minmax(400px, 1fr)); gap: 20px; margin-bottom: 25px; }}
413
+ .chart-box {{ background: white; border-radius: 8px; padding: 20px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); }}
414
+ .chart-title {{ font-size: 15px; font-weight: bold; color: #1a365d; margin-bottom: 12px; }}
415
+ .dist-section {{ background: white; border-radius: 8px; padding: 20px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); margin-bottom: 20px; }}
416
+ .dist-item {{ display: flex; justify-content: space-between; padding: 8px 0; border-bottom: 1px solid #e2e8f0; }}
417
+ .dist-item:last-child {{ border-bottom: none; }}
418
+ .footer {{ text-align: center; color: #64748b; padding: 20px; font-size: 12px; }}
419
+ </style>
420
+ </head>
421
+ <body>
422
+ <div class="container">
423
+ <h1>PR 洞察报告 - {insights["repo"]}</h1>
424
+
425
+ <h2>概览统计</h2>
426
+ <div class="stats-grid">
427
+ <div class="stat-card">
428
+ <div class="stat-value">{summary["total_prs"]}</div>
429
+ <div class="stat-label">总 PR 数</div>
430
+ </div>
431
+ <div class="stat-card">
432
+ <div class="stat-value">{summary["opened_prs"]}</div>
433
+ <div class="stat-label">打开中</div>
434
+ </div>
435
+ <div class="stat-card">
436
+ <div class="stat-value">{summary["merged_prs"]}</div>
437
+ <div class="stat-label">已合并</div>
438
+ </div>
439
+ <div class="stat-card">
440
+ <div class="stat-value">{summary["closed_prs"]}</div>
441
+ <div class="stat-label">已关闭(未合并)</div>
442
+ </div>
443
+ <div class="stat-card">
444
+ <div class="stat-value">{summary["merge_rate"]}%</div>
445
+ <div class="stat-label">合并率</div>
446
+ </div>
447
+ <div class="stat-card">
448
+ <div class="stat-value">{summary["conflict_rate"]}%</div>
449
+ <div class="stat-label">冲突率</div>
450
+ </div>
451
+ </div>
452
+
453
+ <h2>效率指标</h2>
454
+ <div class="stats-grid">
455
+ <div class="stat-card">
456
+ <div class="stat-value">{efficiency["avg_first_review_time_minutes"]:.1f}</div>
457
+ <div class="stat-label">平均首次评审(分钟)</div>
458
+ </div>
459
+ <div class="stat-card">
460
+ <div class="stat-value">{efficiency["avg_merge_duration_hours"]:.1f}</div>
461
+ <div class="stat-label">平均合并耗时(小时)</div>
462
+ </div>
463
+ <div class="stat-card">
464
+ <div class="stat-value">{efficiency["min_merge_duration_hours"]:.1f}</div>
465
+ <div class="stat-label">最短合并耗时(小时)</div>
466
+ </div>
467
+ <div class="stat-card">
468
+ <div class="stat-value">{efficiency["max_merge_duration_hours"]:.1f}</div>
469
+ <div class="stat-label">最长合并耗时(小时)</div>
470
+ </div>
471
+ <div class="stat-card">
472
+ <div class="stat-value">{efficiency["timely_review_rate"]}%</div>
473
+ <div class="stat-label">24h评审率</div>
474
+ </div>
475
+ </div>
476
+
477
+ <h2>质量指标</h2>
478
+ <div class="stats-grid">
479
+ <div class="stat-card">
480
+ <div class="stat-value">{quality["avg_change_lines"]:.0f}</div>
481
+ <div class="stat-label">平均变更行数</div>
482
+ </div>
483
+ <div class="stat-card">
484
+ <div class="stat-value">{quality["max_change_lines"]}</div>
485
+ <div class="stat-label">最大变更行数</div>
486
+ </div>
487
+ <div class="stat-card">
488
+ <div class="stat-value">{quality["large_pr_count"]}</div>
489
+ <div class="stat-label">大PR数(>500行)</div>
490
+ </div>
491
+ <div class="stat-card">
492
+ <div class="stat-value">{quality["large_pr_rate"]}%</div>
493
+ <div class="stat-label">大PR占比</div>
494
+ </div>
495
+ <div class="stat-card">
496
+ <div class="stat-value">{quality["comment_density"]:.4f}</div>
497
+ <div class="stat-label">评论密度</div>
498
+ </div>
499
+ </div>
500
+
501
+ <h2>趋势图表</h2>
502
+ <div class="charts-grid">
503
+ <div class="chart-box">
504
+ <div class="chart-title">每日 PR 趋势</div>
505
+ <canvas id="trendChart"></canvas>
506
+ </div>
507
+ <div class="chart-box">
508
+ <div class="chart-title">创建者分布 Top 10</div>
509
+ <canvas id="creatorChart"></canvas>
510
+ </div>
511
+ </div>
512
+
513
+ <div class="charts-grid">
514
+ <div class="chart-box">
515
+ <div class="chart-title">目标分支分布</div>
516
+ <canvas id="branchChart"></canvas>
517
+ </div>
518
+ <div class="chart-box">
519
+ <div class="chart-title">代码变更规模分布</div>
520
+ <canvas id="sizeChart"></canvas>
521
+ </div>
522
+ </div>
523
+
524
+ <h2>分布统计</h2>
525
+ <div class="charts-grid">
526
+ <div class="dist-section">
527
+ <div class="chart-title">创建者分布 Top 10</div>
528
+ '''
529
+ for creator, count in distribution["by_creator"].items():
530
+ html_content += f' <div class="dist-item"><span>{creator}</span><span>{count}</span></div>\n'
531
+
532
+ html_content += f''' </div>
533
+ <div class="dist-section">
534
+ <div class="chart-title">目标分支分布</div>
535
+ '''
536
+ for branch, count in distribution["by_target_branch"].items():
537
+ html_content += f' <div class="dist-item"><span>{branch}</span><span>{count}</span></div>\n'
538
+
539
+ html_content += f''' </div>
540
+ </div>
541
+
542
+ <h2>PR 列表</h2>
543
+ <div class="dist-section">
544
+ <table style="width: 100%; border-collapse: collapse; font-size: 13px;">
545
+ <thead>
546
+ <tr style="background: #f1f5f9;">
547
+ <th style="padding: 10px; text-align: left; border-bottom: 2px solid #e2e8f0;">PR</th>
548
+ <th style="padding: 10px; text-align: left; border-bottom: 2px solid #e2e8f0;">标题</th>
549
+ <th style="padding: 10px; text-align: center; border-bottom: 2px solid #e2e8f0;">状态</th>
550
+ <th style="padding: 10px; text-align: left; border-bottom: 2px solid #e2e8f0;">创建者</th>
551
+ <th style="padding: 10px; text-align: left; border-bottom: 2px solid #e2e8f0;">目标分支</th>
552
+ <th style="padding: 10px; text-align: right; border-bottom: 2px solid #e2e8f0;">变更行数</th>
553
+ <th style="padding: 10px; text-align: center; border-bottom: 2px solid #e2e8f0;">创建时间</th>
554
+ </tr>
555
+ </thead>
556
+ <tbody>
557
+ '''
558
+ # PR 列表按创建时间倒序排列(API 返回顺序)
559
+ for pr in prs_data:
560
+ state_display = {"open": "打开", "merged": "已合并", "closed": "已关闭"}.get(pr["state"], pr["state"])
561
+ state_color = {"open": "#3b82f6", "merged": "#10b981", "closed": "#ef4444"}.get(pr["state"], "#64748b")
562
+ html_content += f''' <tr>
563
+ <td style="padding: 8px; border-bottom: 1px solid #e2e8f0;"><a href="{pr["html_url"]}" target="_blank">#{pr["pr_number"]}</a></td>
564
+ <td style="padding: 8px; border-bottom: 1px solid #e2e8f0; max-width: 300px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;">{pr["title"][:50]}{"..." if len(pr["title"]) > 50 else ""}</td>
565
+ <td style="padding: 8px; border-bottom: 1px solid #e2e8f0; text-align: center;"><span style="color: {state_color}; font-weight: 500;">{state_display}</span></td>
566
+ <td style="padding: 8px; border-bottom: 1px solid #e2e8f0;">{pr["creator"]}</td>
567
+ <td style="padding: 8px; border-bottom: 1px solid #e2e8f0;">{pr["target_branch"]}</td>
568
+ <td style="padding: 8px; border-bottom: 1px solid #e2e8f0; text-align: right;">{pr["total_changes"]}</td>
569
+ <td style="padding: 8px; border-bottom: 1px solid #e2e8f0; text-align: center;">{pr["created_at"][:10] if pr["created_at"] else "-"}</td>
570
+ </tr>
571
+ '''
572
+
573
+ html_content += f''' </tbody>
574
+ </table>
575
+ </div>
576
+
577
+ <div class="footer">
578
+ <p>分析时间: {insights["analysis_time"]} | 分析周期: {insights["analysis_period"]}</p>
579
+ </div>
580
+ </div>
581
+
582
+ <script>
583
+ // 趋势图
584
+ new Chart(document.getElementById('trendChart'), {{
585
+ type: 'line',
586
+ data: {{
587
+ labels: {json.dumps(dates)},
588
+ datasets: [
589
+ {{
590
+ label: '创建',
591
+ data: {json.dumps(created_counts)},
592
+ borderColor: '#3b82f6',
593
+ backgroundColor: 'rgba(59, 130, 246, 0.1)',
594
+ fill: true
595
+ }},
596
+ {{
597
+ label: '合并',
598
+ data: {json.dumps(merged_counts)},
599
+ borderColor: '#10b981',
600
+ backgroundColor: 'rgba(16, 185, 129, 0.1)',
601
+ fill: true
602
+ }},
603
+ {{
604
+ label: '关闭',
605
+ data: {json.dumps(closed_counts)},
606
+ borderColor: '#ef4444',
607
+ backgroundColor: 'rgba(239, 68, 68, 0.1)',
608
+ fill: true
609
+ }}
610
+ ]
611
+ }},
612
+ options: {{
613
+ responsive: true,
614
+ scales: {{ y: {{ beginAtZero: true }} }}
615
+ }}
616
+ }});
617
+
618
+ // 创建者分布图
619
+ new Chart(document.getElementById('creatorChart'), {{
620
+ type: 'bar',
621
+ data: {{
622
+ labels: {json.dumps(creator_names)},
623
+ datasets: [{{
624
+ label: 'PR 数量',
625
+ data: {json.dumps(creator_counts)},
626
+ backgroundColor: ['#3b82f6', '#10b981', '#f59e0b', '#ef4444', '#8b5cf6', '#ec4899', '#06b6d4', '#84cc16', '#f97316', '#6366f1']
627
+ }}]
628
+ }},
629
+ options: {{
630
+ responsive: true,
631
+ indexAxis: 'y',
632
+ scales: {{ x: {{ beginAtZero: true }} }}
633
+ }}
634
+ }});
635
+
636
+ // 目标分支分布图
637
+ new Chart(document.getElementById('branchChart'), {{
638
+ type: 'bar',
639
+ data: {{
640
+ labels: {json.dumps(branch_names)},
641
+ datasets: [{{
642
+ label: 'PR 数量',
643
+ data: {json.dumps(branch_counts)},
644
+ backgroundColor: '#8b5cf6'
645
+ }}]
646
+ }},
647
+ options: {{
648
+ responsive: true,
649
+ indexAxis: 'y',
650
+ scales: {{ x: {{ beginAtZero: true }} }}
651
+ }}
652
+ }});
653
+
654
+ // 规模分布图
655
+ new Chart(document.getElementById('sizeChart'), {{
656
+ type: 'bar',
657
+ data: {{
658
+ labels: {json.dumps(list(size_ranges.keys()))},
659
+ datasets: [{{
660
+ label: 'PR 数量',
661
+ data: {json.dumps(list(size_ranges.values()))},
662
+ backgroundColor: ['#10b981', '#3b82f6', '#f59e0b', '#ef4444', '#8b5cf6']
663
+ }}]
664
+ }},
665
+ options: {{
666
+ responsive: true,
667
+ scales: {{ y: {{ beginAtZero: true }} }}
668
+ }}
669
+ }});
670
+ </script>
671
+ </body>
672
+ </html>
673
+ '''
674
+
675
+ with open(output_file, 'w', encoding='utf-8') as f:
676
+ f.write(html_content)
677
+
678
+ print(f"HTML 报告: {output_file}")
679
+
680
+ def generate_markdown_report(self, insights: Dict, prs_data: List[Dict], output_file: str):
681
+ """生成 Markdown 报告(统计数据总结)"""
682
+ summary = insights["summary"]
683
+ efficiency = insights["efficiency"]
684
+ quality = insights["quality"]
685
+ distribution = insights["distribution"]
686
+ daily_trend = insights["daily_trend"]
687
+
688
+ md_content = f'''# PR 洞察报告 - {insights["repo"]}
689
+
690
+ > 分析时间: {insights["analysis_time"]} | 分析周期: {insights["analysis_period"]}
691
+
692
+ ## 概览统计
693
+
694
+ | 指标 | 数值 |
695
+ |------|------|
696
+ | 总 PR 数 | {summary["total_prs"]} |
697
+ | 打开中 | {summary["opened_prs"]} |
698
+ | 已合并 | {summary["merged_prs"]} |
699
+ | 已关闭(未合并) | {summary["closed_prs"]} |
700
+ | 合并率 | {summary["merge_rate"]}% |
701
+ | 冲突率 | {summary["conflict_rate"]}% |
702
+
703
+ ## 效率指标
704
+
705
+ | 指标 | 数值 |
706
+ |------|------|
707
+ | 平均首次评审 | {efficiency["avg_first_review_time_minutes"]:.1f} 分钟 |
708
+ | 平均合并耗时 | {efficiency["avg_merge_duration_hours"]:.1f} 小时 |
709
+ | 最短合并耗时 | {efficiency["min_merge_duration_hours"]:.1f} 小时 |
710
+ | 最长合并耗时 | {efficiency["max_merge_duration_hours"]:.1f} 小时 |
711
+ | 24h评审率 | {efficiency["timely_review_rate"]}% |
712
+ | 评审时间样本数 | {efficiency["review_time_samples"]} |
713
+ | 合并耗时样本数 | {efficiency["merge_duration_samples"]} |
714
+
715
+ ## 质量指标
716
+
717
+ | 指标 | 数值 |
718
+ |------|------|
719
+ | 平均变更行数 | {quality["avg_change_lines"]:.0f} |
720
+ | 最大变更行数 | {quality["max_change_lines"]} |
721
+ | 大PR数(>500行) | {quality["large_pr_count"]} |
722
+ | 大PR占比 | {quality["large_pr_rate"]}% |
723
+ | 评论密度 | {quality["comment_density"]:.4f} |
724
+
725
+ ## 每日趋势
726
+
727
+ | 日期 | 创建 | 合并 | 关闭 |
728
+ |------|------|------|------|
729
+ '''
730
+ for date, counts in sorted(daily_trend.items()):
731
+ md_content += f"| {date} | {counts['created']} | {counts['merged']} | {counts['closed']} |\n"
732
+
733
+ md_content += '''
734
+ ## 创建者分布 Top 10
735
+
736
+ | 创建者 | 数量 |
737
+ |------|------|
738
+ '''
739
+ for creator, count in distribution["by_creator"].items():
740
+ md_content += f"| {creator} | {count} |\n"
741
+
742
+ md_content += '''
743
+ ## 目标分支分布
744
+
745
+ | 分支 | 数量 |
746
+ |------|------|
747
+ '''
748
+ for branch, count in distribution["by_target_branch"].items():
749
+ md_content += f"| {branch} | {count} |\n"
750
+
751
+ md_content += '''
752
+ ## 标签分布 Top 10
753
+
754
+ | 标签 | 数量 |
755
+ |------|------|
756
+ '''
757
+ for label, count in distribution["by_label"].items():
758
+ md_content += f"| {label} | {count} |\n"
759
+
760
+ md_content += '''
761
+ ## PR 列表
762
+
763
+ | PR | 标题 | 状态 | 创建者 | 目标分支 | 变更行数 | 创建时间 |
764
+ |------|------|------|------|------|------|------|
765
+ '''
766
+ # PR 列表按创建时间倒序排列
767
+ for pr in prs_data:
768
+ state_display = {"open": "打开", "merged": "已合并", "closed": "已关闭"}.get(pr["state"], pr["state"])
769
+ title_short = pr["title"][:40] + "..." if len(pr["title"]) > 40 else pr["title"]
770
+ md_content += f"| [#{pr['pr_number']}]({pr['html_url']}) | {title_short} | {state_display} | {pr['creator']} | {pr['target_branch']} | {pr['total_changes']} | {pr['created_at'][:10] if pr['created_at'] else '-'} |\n"
771
+
772
+ with open(output_file, 'w', encoding='utf-8') as f:
773
+ f.write(md_content)
774
+
775
+ print(f"Markdown 报告: {output_file}")
776
+
777
+ def run(self) -> Dict:
778
+ """执行完整的分析流程"""
779
+ os.makedirs(self.output_dir, exist_ok=True)
780
+
781
+ print(f"\n{'='*60}")
782
+ print(f"PR 洞察分析: {self.owner}/{self.repo}")
783
+ print(f"分析周期: 近 {self.days} 天")
784
+ print(f"{'='*60}\n")
785
+
786
+ # 获取 PR 列表
787
+ prs = self.get_prs()
788
+
789
+ if not prs:
790
+ print("未获取到任何 PR 数据")
791
+ return {}
792
+
793
+ # 分析每个 PR
794
+ print(f"\n分析 PR 详情...")
795
+ prs_data = []
796
+ for i, pr in enumerate(prs, 1):
797
+ print(f" 处理 {i}/{len(prs)}: PR #{pr.get('number')}")
798
+ analyzed = self.analyze_pr(pr)
799
+ prs_data.append(analyzed)
800
+ time.sleep(0.6) # API 限流控制
801
+
802
+ # 计算洞察指标
803
+ print(f"\n计算洞察指标...")
804
+ insights, raw_data = self.calculate_insights(prs_data)
805
+
806
+ # 保存 JSON(统计数据 + 原始数据)
807
+ json_file = os.path.join(self.output_dir, f"pr_insight_{self.repo}_{self.days}d.json")
808
+ full_data = {
809
+ "statistics": insights,
810
+ "raw_data": raw_data
811
+ }
812
+ with open(json_file, 'w', encoding='utf-8') as f:
813
+ json.dump(full_data, f, ensure_ascii=False, indent=2)
814
+
815
+ # 生成 HTML 报告
816
+ html_file = os.path.join(self.output_dir, f"pr_insight_{self.repo}_{self.days}d.html")
817
+ self.generate_html_report(insights, prs_data, html_file)
818
+
819
+ # 生成 Markdown 报告
820
+ md_file = os.path.join(self.output_dir, f"pr_insight_{self.repo}_{self.days}d.md")
821
+ self.generate_markdown_report(insights, prs_data, md_file)
822
+
823
+ # 打印摘要
824
+ print(f"\n{'='*60}")
825
+ print(f"分析完成!")
826
+ print(f"{'='*60}")
827
+ print(f"总 PR 数: {insights['summary']['total_prs']}")
828
+ print(f"打开中: {insights['summary']['opened_prs']}")
829
+ print(f"已合并: {insights['summary']['merged_prs']}")
830
+ print(f"合并率: {insights['summary']['merge_rate']}%")
831
+ print(f"平均首次评审: {insights['efficiency']['avg_first_review_time_minutes']:.1f} 分钟")
832
+ print(f"平均合并耗时: {insights['efficiency']['avg_merge_duration_hours']:.1f} 小时")
833
+ print(f"最短合并耗时: {insights['efficiency']['min_merge_duration_hours']:.1f} 小时")
834
+ print(f"最长合并耗时: {insights['efficiency']['max_merge_duration_hours']:.1f} 小时")
835
+ print(f"平均变更行数: {insights['quality']['avg_change_lines']:.0f}")
836
+ print(f"最大变更行数: {insights['quality']['max_change_lines']}")
837
+ print(f"\n输出文件:")
838
+ print(f"- JSON 数据: {json_file}")
839
+ print(f"- HTML 报告: {html_file}")
840
+ print(f"- Markdown 报告: {md_file}")
841
+ print(f"{'='*60}\n")
842
+
843
+ return full_data