git-analytics-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
git_analytics.py ADDED
@@ -0,0 +1,843 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Git Analytics - 个人代码习惯体检工具
4
+ 扫描本地 Git 仓库,生成跨项目的个人开发画像
5
+ """
6
+
7
+ import os
8
+ import json
9
+ import subprocess
10
+ import re
11
+ from datetime import datetime, timedelta
12
+ from collections import defaultdict, Counter
13
+ from pathlib import Path
14
+
15
+ # ============================================================
16
+ # 配置
17
+ # ============================================================
18
+ DEFAULT_SCAN_DIR = os.path.expanduser("~/Desktop")
19
+ OUTPUT_DATA = "data.json"
20
+ OUTPUT_REPORT = "report.html"
21
+
22
+ # commit message 低信息量关键词
23
+ LOW_INFO_PATTERNS = [
24
+ r'^update$', r'^fix$', r'^wip$', r'^temp$', r'^misc$',
25
+ r'^test$', r'^debug$', r'^tmp$', r'^save$', r'^checkpoint$',
26
+ r'^\.+$', r'^merge', r'^revert'
27
+ ]
28
+
29
+ # 测试文件模式
30
+ TEST_PATTERNS = [
31
+ r'test[/\\]', r'tests[/\\]', r'__tests__[/\\]',
32
+ r'\.spec\.', r'\.test\.', r'_test\.', r'test_',
33
+ r'pytest', r'jest', r'vitest', r'unittest'
34
+ ]
35
+
36
+ # 文档文件模式
37
+ DOC_PATTERNS = [
38
+ r'README', r'docs[/\\]', r'\.md$', r'CHANGELOG',
39
+ r'CONTRIBUTING', r'\.rst$', r'\.txt$'
40
+ ]
41
+
42
+ # AI 工具痕迹
43
+ # strong signal: commit message/body/footer 明确写了 AI 协作来源
44
+ AI_MESSAGE_PATTERNS = [
45
+ r'generated\s+with\s+(claude|codex|copilot|cursor|chatgpt|openai)',
46
+ r'co-authored-by:.*(claude|codex|copilot|cursor|chatgpt|openai)',
47
+ r'claude\s+code',
48
+ r'codex',
49
+ r'copilot',
50
+ r'cursor\s+ai',
51
+ r'chatgpt',
52
+ r'openai'
53
+ ]
54
+
55
+ # strong signal: 本次 commit 修改了 AI 工具配置或上下文文件
56
+ AI_FILE_PATTERNS = [
57
+ r'(^|[/\\])\.claude([/\\]|$)',
58
+ r'(^|[/\\])\.cursor([/\\]|$)',
59
+ r'(^|[/\\])\.codex([/\\]|$)',
60
+ r'(^|[/\\])\.cursorrules$',
61
+ r'(^|[/\\])AGENTS\.md$',
62
+ r'(^|[/\\])CLAUDE\.md$',
63
+ r'(^|[/\\])CODEX\.md$',
64
+ r'(^|[/\\])\.github[/\\]copilot'
65
+ ]
66
+
67
+ # weak signal: 工作区存在 AI tooling,只说明这个 repo 接入过 AI,不等于每个 commit 都是 AI 写的
68
+ AI_WORKSPACE_PATHS = ['AGENTS.md', 'CLAUDE.md', 'CODEX.md', '.claude', '.cursor', '.codex', '.cursorrules']
69
+
70
+
71
+ # ============================================================
72
+ # Git 仓库发现
73
+ # ============================================================
74
+ def find_git_repos(scan_dir, max_depth=3):
75
+ """扫描目录,找到所有 Git 仓库"""
76
+ repos = []
77
+ scan_dir = os.path.expanduser(scan_dir)
78
+
79
+ for root, dirs, files in os.walk(scan_dir):
80
+ # 计算当前深度
81
+ depth = root.replace(scan_dir, '').count(os.sep)
82
+ if depth > max_depth:
83
+ dirs.clear()
84
+ continue
85
+
86
+ if '.git' in dirs:
87
+ repos.append(root)
88
+ dirs.remove('.git') # 不递归进入 .git
89
+
90
+ return repos
91
+
92
+
93
+ # ============================================================
94
+ # Git 数据收集
95
+ # ============================================================
96
+ def run_git(repo_path, args):
97
+ """在指定仓库执行 git 命令"""
98
+ try:
99
+ result = subprocess.run(
100
+ ['git'] + args,
101
+ cwd=repo_path,
102
+ capture_output=True,
103
+ text=True,
104
+ timeout=30
105
+ )
106
+ return result.stdout.strip()
107
+ except Exception:
108
+ return ""
109
+
110
+
111
+ def collect_repo_data(repo_path, since=None, until=None):
112
+ """收集单个仓库的详细数据"""
113
+ repo_name = os.path.basename(repo_path)
114
+
115
+ # 获取所有 commit 的详细信息
116
+ # 格式: hash|timestamp|message
117
+ log_cmd = ['log', '--all', '--format=%H|%at|%s', '--no-merges']
118
+ if since:
119
+ log_cmd.append(f'--since={since}')
120
+ if until:
121
+ log_cmd.append(f'--until={until}')
122
+ log_output = run_git(repo_path, log_cmd)
123
+
124
+ if not log_output:
125
+ return None
126
+
127
+ commits = []
128
+ for line in log_output.split('\n'):
129
+ if '|' not in line:
130
+ continue
131
+ parts = line.split('|', 2)
132
+ if len(parts) < 3:
133
+ continue
134
+ hash_val, timestamp, message = parts
135
+ try:
136
+ ts = int(timestamp)
137
+ dt = datetime.fromtimestamp(ts)
138
+ commits.append({
139
+ 'hash': hash_val,
140
+ 'timestamp': ts,
141
+ 'datetime': dt,
142
+ 'hour': dt.hour,
143
+ 'weekday': dt.weekday(), # 0=Monday
144
+ 'month': dt.strftime('%Y-%m'),
145
+ 'date': dt.strftime('%Y-%m-%d'),
146
+ 'message': message.strip()
147
+ })
148
+ except (ValueError, OSError):
149
+ continue
150
+
151
+ if not commits:
152
+ return None
153
+
154
+ # 获取文件类型统计(最近 100 个 commit)
155
+ file_extensions = Counter()
156
+ file_changes = []
157
+
158
+ # 获取最近 commit 的文件变更
159
+ # 前端筛选后需要重新计算指标,所以这里记录每个 commit 的文件统计
160
+ for c in commits:
161
+ commit_file_changes = []
162
+ diff_output = run_git(repo_path, [
163
+ 'diff-tree', '--no-commit-id', '-r', '--name-only', c['hash']
164
+ ])
165
+ if diff_output:
166
+ for f in diff_output.split('\n'):
167
+ f = f.strip()
168
+ if f:
169
+ file_changes.append(f)
170
+ commit_file_changes.append(f)
171
+ ext = Path(f).suffix.lower()
172
+ if ext:
173
+ file_extensions[ext] += 1
174
+
175
+ c['changed_files'] = commit_file_changes
176
+ c['file_change_count'] = len(commit_file_changes)
177
+ c['test_files'] = sum(1 for f in commit_file_changes if any(re.search(p, f, re.I) for p in TEST_PATTERNS))
178
+ c['doc_files'] = sum(1 for f in commit_file_changes if any(re.search(p, f, re.I) for p in DOC_PATTERNS))
179
+ c['ai_message_signal'] = any(re.search(p, c['message'], re.I) for p in AI_MESSAGE_PATTERNS)
180
+ c['ai_file_signal'] = any(
181
+ any(re.search(p, f, re.I) for p in AI_FILE_PATTERNS) for f in commit_file_changes
182
+ )
183
+ c['ai_signal'] = c['ai_message_signal'] or c['ai_file_signal']
184
+
185
+ # 判断主要语言
186
+ lang_map = {
187
+ '.py': 'Python', '.js': 'JavaScript', '.ts': 'TypeScript',
188
+ '.tsx': 'TypeScript', '.jsx': 'JavaScript', '.go': 'Go',
189
+ '.rs': 'Rust', '.java': 'Java', '.cpp': 'C++', '.c': 'C',
190
+ '.rb': 'Ruby', '.php': 'PHP', '.swift': 'Swift',
191
+ '.kt': 'Kotlin', '.scala': 'Scala', '.sh': 'Shell',
192
+ '.ipynb': 'Jupyter', '.md': 'Markdown', '.yaml': 'YAML',
193
+ '.yml': 'YAML', '.json': 'JSON', '.html': 'HTML', '.css': 'CSS'
194
+ }
195
+
196
+ lang_counter = Counter()
197
+ for ext, count in file_extensions.items():
198
+ lang = lang_map.get(ext, 'Other')
199
+ lang_counter[lang] += count
200
+
201
+ main_language = lang_counter.most_common(1)[0][0] if lang_counter else 'Unknown'
202
+
203
+ # 分析 commit 类型(message 优先,文件路径兜底)
204
+ def classify_commit(msg, files):
205
+ msg = msg.lower().strip()
206
+ if any(msg.startswith(p) for p in ['feat', 'feature', 'add', 'new']):
207
+ return 'feat', 'message'
208
+ elif any(msg.startswith(p) for p in ['fix', 'bug', 'patch', 'hotfix']):
209
+ return 'fix', 'message'
210
+ elif any(msg.startswith(p) for p in ['doc', 'readme', 'comment']):
211
+ return 'docs', 'message'
212
+ elif any(msg.startswith(p) for p in ['test', 'spec']):
213
+ return 'test', 'message'
214
+ elif any(msg.startswith(p) for p in ['refactor', 'clean', 'restructure']):
215
+ return 'refactor', 'message'
216
+ elif any(msg.startswith(p) for p in ['chore', 'build', 'ci', 'deps']):
217
+ return 'chore', 'message'
218
+
219
+ if files:
220
+ test_count = sum(1 for f in files if any(re.search(p, f, re.I) for p in TEST_PATTERNS))
221
+ doc_count = sum(1 for f in files if any(re.search(p, f, re.I) for p in DOC_PATTERNS))
222
+ config_count = sum(1 for f in files if Path(f).name.lower() in {
223
+ 'package.json', 'package-lock.json', 'pnpm-lock.yaml', 'yarn.lock',
224
+ 'requirements.txt', 'pyproject.toml', 'poetry.lock', 'dockerfile',
225
+ 'docker-compose.yml', 'makefile', 'cmakelists.txt'
226
+ } or any(part in f.lower() for part in ['.github/', 'ci/', '.circleci/']))
227
+ if test_count and test_count >= max(doc_count, len(files) - test_count):
228
+ return 'test', 'files'
229
+ if doc_count and doc_count >= max(test_count, len(files) - doc_count):
230
+ return 'docs', 'files'
231
+ if config_count and config_count >= len(files) / 2:
232
+ return 'chore', 'files'
233
+
234
+ return 'other', 'unknown'
235
+
236
+ commit_types = Counter()
237
+ for c in commits:
238
+ c['type'], c['classification_source'] = classify_commit(c['message'], c.get('changed_files', []))
239
+ c['classification_confidence'] = 'high' if c['classification_source'] == 'message' else (
240
+ 'medium' if c['classification_source'] == 'files' else 'low'
241
+ )
242
+ c['low_info'] = any(re.match(p, c['message'].lower()) for p in LOW_INFO_PATTERNS)
243
+ commit_types[c['type']] += 1
244
+
245
+ # 时间分布
246
+ hourly = [0] * 24
247
+ weekly = defaultdict(int)
248
+ monthly = defaultdict(int)
249
+ daily_commits = defaultdict(int)
250
+
251
+ for c in commits:
252
+ hourly[c['hour']] += 1
253
+ weekly[c['weekday']] += 1
254
+ monthly[c['month']] += 1
255
+ daily_commits[c['date']] += 1
256
+
257
+ # 活跃天数
258
+ active_days = len(daily_commits)
259
+
260
+ # 日期范围
261
+ dates = sorted(daily_commits.keys())
262
+ first_commit = dates[0] if dates else None
263
+ last_commit = dates[-1] if dates else None
264
+
265
+ # 测试/文档/重构意识(基于文件路径)
266
+ test_files = 0
267
+ doc_files = 0
268
+ for f in file_changes:
269
+ if any(re.search(p, f, re.I) for p in TEST_PATTERNS):
270
+ test_files += 1
271
+ if any(re.search(p, f, re.I) for p in DOC_PATTERNS):
272
+ doc_files += 1
273
+
274
+ # AI 痕迹检测
275
+ ai_signals = []
276
+ weak_ai_signals = []
277
+ for c in commits:
278
+ if c.get('ai_message_signal'):
279
+ ai_signals.append(f"AI commit message: {c['message']}")
280
+ if c.get('ai_file_signal'):
281
+ ai_signals.append(f"AI commit file: {c['message']}")
282
+
283
+ # 检查是否有 AI 相关文件,作为 commit-level strong signal 的可读证据
284
+ for f in file_changes:
285
+ if any(re.search(p, f, re.I) for p in AI_FILE_PATTERNS):
286
+ ai_signals.append(f"AI file: {f}")
287
+
288
+ # 检查工作区 AI tooling。这里只作为 weak signal,不直接算 AI commit。
289
+ for rel_path in AI_WORKSPACE_PATHS:
290
+ if (Path(repo_path) / rel_path).exists():
291
+ weak_ai_signals.append(f"AI workspace: {rel_path}")
292
+
293
+ repo_ai_detected = bool(weak_ai_signals)
294
+
295
+ return {
296
+ 'name': repo_name,
297
+ 'path': repo_path,
298
+ 'total_commits': len(commits),
299
+ 'first_commit': first_commit,
300
+ 'last_commit': last_commit,
301
+ 'active_days': active_days,
302
+ 'main_language': main_language,
303
+ 'commit_types': dict(commit_types),
304
+ 'hourly': hourly,
305
+ 'weekly': dict(weekly),
306
+ 'monthly': dict(monthly),
307
+ 'daily_commits': dict(daily_commits),
308
+ 'file_extensions': dict(file_extensions.most_common(20)),
309
+ 'test_files': test_files,
310
+ 'doc_files': doc_files,
311
+ 'total_file_changes': len(file_changes),
312
+ 'ai_signals': ai_signals[:10],
313
+ 'weak_ai_signals': weak_ai_signals[:10],
314
+ 'repo_ai_detected': repo_ai_detected,
315
+ 'low_info_commits': sum(1 for c in commits if any(
316
+ re.match(p, c['message'].lower()) for p in LOW_INFO_PATTERNS
317
+ )),
318
+ 'commit_messages': [c['message'] for c in commits[:50]],
319
+ 'commits': [{'ts': c['timestamp'], 'hour': c['hour'], 'weekday': c['weekday'],
320
+ 'month': c['month'], 'type': c['type'],
321
+ 'file_change_count': c.get('file_change_count', 0),
322
+ 'test_files': c.get('test_files', 0),
323
+ 'doc_files': c.get('doc_files', 0),
324
+ 'low_info': c.get('low_info', False),
325
+ 'ai_signal': c.get('ai_signal', False),
326
+ 'repo_ai_signal': repo_ai_detected,
327
+ 'classification_confidence': c.get('classification_confidence', 'low')} for c in commits]
328
+ }
329
+
330
+
331
+ # ============================================================
332
+ # 习惯分析引擎
333
+ # ============================================================
334
+ def analyze_habits(all_repos):
335
+ """分析所有仓库的数据,生成习惯画像"""
336
+
337
+ # 汇总数据
338
+ total_commits = sum(r['total_commits'] for r in all_repos)
339
+ total_projects = len(all_repos)
340
+
341
+ # 合并时间分布
342
+ total_hourly = [0] * 24
343
+ total_weekly = defaultdict(int)
344
+ total_monthly = defaultdict(int)
345
+
346
+ for r in all_repos:
347
+ for h in range(24):
348
+ total_hourly[h] += r['hourly'][h]
349
+ for k, v in r['weekly'].items():
350
+ total_weekly[k] += v
351
+ for k, v in r['monthly'].items():
352
+ total_monthly[k] += v
353
+
354
+ # 合并 commit 类型
355
+ total_types = Counter()
356
+ for r in all_repos:
357
+ for k, v in r['commit_types'].items():
358
+ total_types[k] += v
359
+
360
+ # 合并文件统计
361
+ total_test_files = sum(r['test_files'] for r in all_repos)
362
+ total_doc_files = sum(r['doc_files'] for r in all_repos)
363
+ total_file_changes = sum(r['total_file_changes'] for r in all_repos)
364
+
365
+ # 合并低信息量 commit
366
+ total_low_info = sum(r['low_info_commits'] for r in all_repos)
367
+
368
+ # 合并 AI 信号
369
+ all_ai_signals = []
370
+ all_weak_ai_signals = []
371
+ for r in all_repos:
372
+ all_ai_signals.extend(r['ai_signals'])
373
+ all_weak_ai_signals.extend(r.get('weak_ai_signals', []))
374
+
375
+ # ============================================================
376
+ # 计算 Developer Habit Score
377
+ # ============================================================
378
+
379
+ # 1. 提交粒度得分 (30分)
380
+ # 小步快跑 = 高分,大包提交 = 低分,线性插值
381
+ avg_commits_per_day = total_commits / max(sum(r['active_days'] for r in all_repos), 1)
382
+ granularity_score = round(min(30, avg_commits_per_day / 4.5 * 30))
383
+
384
+ # 2. 测试意识得分 (20分)
385
+ test_ratio = total_test_files / max(total_file_changes, 1)
386
+ test_score = round(min(20, test_ratio / 0.15 * 20))
387
+
388
+ # 3. 文档意识得分 (15分)
389
+ doc_ratio = total_doc_files / max(total_file_changes, 1)
390
+ doc_score = round(min(15, doc_ratio / 0.10 * 15))
391
+
392
+ # 4. 作息规律得分 (20分)
393
+ # 夜间提交 (22:00-04:00) 占比,越规律分越高
394
+ night_commits = sum(total_hourly[h] for h in range(22, 24)) + sum(total_hourly[h] for h in range(0, 5))
395
+ night_ratio = night_commits / max(total_commits, 1)
396
+ schedule_score = round(min(20, max(0, (1 - night_ratio / 0.4)) * 20))
397
+
398
+ # 5. 项目聚焦度得分 (15分)
399
+ # Focus Index = Top 3 项目提交数 / 总提交数
400
+ sorted_repos = sorted(all_repos, key=lambda x: x['total_commits'], reverse=True)
401
+ top3_commits = sum(r['total_commits'] for r in sorted_repos[:3])
402
+ focus_index = top3_commits / max(total_commits, 1)
403
+ focus_score = round(min(15, focus_index / 0.7 * 15))
404
+
405
+ total_score = granularity_score + test_score + doc_score + schedule_score + focus_score
406
+
407
+ # ============================================================
408
+ # 开发者人格系统 (DevPersona) - 6 维度光谱分类
409
+ # ============================================================
410
+
411
+ # 计算各维度指标
412
+ # 1. 时间维度 (T): D=Day 白天型, N=Night 夜猫型
413
+ # spectrum: 0=纯白天, 100=纯夜间
414
+ day_commits = sum(total_hourly[h] for h in range(8, 20))
415
+ night_commits = sum(total_hourly[h] for h in range(20, 24)) + sum(total_hourly[h] for h in range(0, 6))
416
+ time_spectrum = round(night_commits / max(day_commits + night_commits, 1) * 100)
417
+ time_type = 'N' if time_spectrum > 50 else 'D'
418
+
419
+ # 2. 节奏维度 (R): S=Sprint 冲刺型, M=Marathon 马拉松型
420
+ # spectrum: 0=极慢, 100=极速(5次/天)
421
+ rhythm_spectrum = round(min(avg_commits_per_day / 5 * 100, 100))
422
+ rhythm_type = 'S' if rhythm_spectrum > 50 else 'M'
423
+
424
+ # 3. 专注维度 (F): C=Concentrated 专注型, D=Distributed 分散型
425
+ # spectrum: 0=极度分散, 100=极度集中
426
+ focus_spectrum = round(focus_index * 100)
427
+ focus_type = 'C' if focus_spectrum > 50 else 'D'
428
+
429
+ # 4. 风格维度 (S): P=Pioneer 先锋型, G=Guardian 守护型
430
+ # spectrum: 0=纯维护, 100=纯新功能
431
+ feat_ratio = total_types.get('feat', 0) / max(total_commits, 1)
432
+ fix_ratio = total_types.get('fix', 0) / max(total_commits, 1)
433
+ refactor_ratio = total_types.get('refactor', 0) / max(total_commits, 1)
434
+ maintenance_ratio = (fix_ratio + refactor_ratio + total_types.get('chore', 0) / max(total_commits, 1))
435
+ style_spectrum = round(feat_ratio / max(feat_ratio + maintenance_ratio, 0.01) * 100)
436
+ style_type = 'P' if style_spectrum > 50 else 'G'
437
+
438
+ # 5. 工程维度 (E): R=Rapid 快速迭代, Q=Quality 质量导向
439
+ # spectrum: 0=纯速度, 100=纯质量(测试+文档占比25%为满分)
440
+ eng_spectrum = round(min((test_ratio + doc_ratio) / 0.25 * 100, 100))
441
+ eng_type = 'Q' if eng_spectrum > 50 else 'R'
442
+
443
+ # 6. AI 维度 (A): H=Handcraft 手工型, A=AI-assisted AI 协作型
444
+ # spectrum: 0=纯手工, 100=AI 辅助
445
+ explicit_ai_commit_count_for_persona = sum(
446
+ 1 for r in all_repos for c in r.get('commits', []) if c.get('ai_signal')
447
+ )
448
+ ai_tooling_commit_count_for_persona = sum(
449
+ r['total_commits'] for r in all_repos if r.get('repo_ai_detected')
450
+ )
451
+ explicit_ai_ratio_for_persona = explicit_ai_commit_count_for_persona / max(total_commits, 1)
452
+ ai_tooling_ratio_for_persona = ai_tooling_commit_count_for_persona / max(total_commits, 1)
453
+ ai_detected = explicit_ai_commit_count_for_persona >= 3 or len(all_weak_ai_signals) >= 1
454
+ # commit-level 证据是 strong signal;workspace tooling 是 weak signal,最多贡献 15 分
455
+ ai_spectrum = round(min(explicit_ai_ratio_for_persona * 300 + min(ai_tooling_ratio_for_persona * 15, 15), 100))
456
+ ai_type = 'A' if ai_spectrum > 50 else 'H'
457
+
458
+ # 组合人格类型 (6位)
459
+ persona_code = time_type + rhythm_type + focus_type + style_type + eng_type + ai_type
460
+
461
+ # 基于主要特征生成人格名称
462
+ def generate_persona_name(code):
463
+ """根据 6 位代码生成人格名称"""
464
+ t, r, f, s, e, a = code[0], code[1], code[2], code[3], code[4], code[5]
465
+
466
+ # 核心人格(时间 × 节奏)
467
+ CORE = {
468
+ 'NS': {'name': '深夜闪电侠', 'icon': '⚡', 'desc': '凌晨两点还在敲代码,提交速度飞快,是夜晚效率之王'},
469
+ 'NM': {'name': '午夜造物主', 'icon': '🌌', 'desc': '深夜独处时灵感爆发,从零开始构建一切,享受安静的创造'},
470
+ 'DS': {'name': '晨曦突击手', 'icon': '🚀', 'desc': '早上开工就是一顿猛冲,快速迭代是你的核心竞争力'},
471
+ 'DM': {'name': '日光打磨者', 'icon': '☀️', 'desc': '白天稳步推进,像匠人一样打磨每一行代码,稳健是你的代名词'},
472
+ }
473
+ core = CORE.get(t + r, {'name': '独特开发者', 'icon': '💻', 'desc': '你的开发风格独一无二'})
474
+
475
+ # 风格修饰语(开发风格维度)
476
+ style_mod = ''
477
+ if s == 'P':
478
+ style_mod = '黑客' if t == 'N' else '创造者'
479
+ else:
480
+ style_mod = '工匠'
481
+
482
+ # 工程修饰语(工程取向维度,仅质量导向时添加)
483
+ eng_mod = ''
484
+ if e == 'Q':
485
+ eng_mod = '全能' if f == 'D' else '质量派'
486
+
487
+ # 组合人格名称
488
+ name = core['name']
489
+ if style_mod:
490
+ name += ' · ' + style_mod
491
+ if eng_mod:
492
+ name += ' · ' + eng_mod
493
+
494
+ # 图标
495
+ icon = core['icon']
496
+
497
+ # 描述生成(带光谱百分比)
498
+ desc_parts = []
499
+ desc_parts.append(f'夜间 {time_spectrum}%' if t == 'N' else f'白天 {100 - time_spectrum}%')
500
+ desc_parts.append(f'高频提交 {rhythm_spectrum}%' if r == 'S' else f'深度专注 {100 - rhythm_spectrum}%')
501
+ desc_parts.append(f'专注核心 {focus_spectrum}%' if f == 'C' else f'多项目并行 {100 - focus_spectrum}%')
502
+ desc_parts.append(f'推进新功能 {style_spectrum}%' if s == 'P' else f'维护系统 {100 - style_spectrum}%')
503
+ desc_parts.append(f'注重质量 {eng_spectrum}%' if e == 'Q' else f'快速迭代 {100 - eng_spectrum}%')
504
+ desc_parts.append(f'善用 AI' if a == 'A' else f'纯手工开发')
505
+
506
+ return {
507
+ 'name': name,
508
+ 'icon': icon,
509
+ 'desc': core['desc'],
510
+ 'detail': ','.join(desc_parts[:4]) # 取前 4 个特征
511
+ }
512
+
513
+ persona_info = generate_persona_name(persona_code)
514
+ persona = {
515
+ 'code': persona_code,
516
+ 'name': persona_info['name'],
517
+ 'icon': persona_info['icon'],
518
+ 'desc': persona_info['desc'],
519
+ 'detail': persona_info['detail']
520
+ }
521
+
522
+ # 基础标签
523
+ developer_tags = [
524
+ {
525
+ 'icon': persona['icon'],
526
+ 'name': persona['name'],
527
+ 'desc': persona['desc'],
528
+ 'detail': persona['detail'],
529
+ 'code': persona_code,
530
+ 'is_primary': True
531
+ }
532
+ ]
533
+
534
+ # 补充标签(额外特征)
535
+ weekend_commits = total_weekly.get(5, 0) + total_weekly.get(6, 0)
536
+ weekend_ratio = weekend_commits / max(total_commits, 1)
537
+
538
+ if weekend_ratio >= 0.3:
539
+ developer_tags.append({'icon': '📅', 'name': '周末战士', 'desc': f'周末提交占比 {weekend_ratio*100:.0f}%'})
540
+
541
+ if ai_detected:
542
+ developer_tags.append({'icon': '🤖', 'name': 'AI 协作者', 'desc': '使用 AI 工具辅助开发'})
543
+
544
+ if test_ratio >= 0.15:
545
+ developer_tags.append({'icon': '✅', 'name': '测试达人', 'desc': f'测试覆盖 {test_ratio*100:.0f}%'})
546
+ elif test_ratio < 0.05:
547
+ developer_tags.append({'icon': '⚠️', 'name': '测试待加强', 'desc': f'测试覆盖仅 {test_ratio*100:.0f}%'})
548
+
549
+ if doc_ratio >= 0.10:
550
+ developer_tags.append({'icon': '📚', 'name': '文档之星', 'desc': '文档维护优秀'})
551
+ elif doc_ratio < 0.03:
552
+ developer_tags.append({'icon': '📝', 'name': '文档债务', 'desc': '文档投入不足'})
553
+
554
+ if total_projects >= 10:
555
+ developer_tags.append({'icon': '🎪', 'name': '多面手', 'desc': f'同时维护 {total_projects} 个项目'})
556
+
557
+ if night_commits > day_commits:
558
+ developer_tags.append({'icon': '🌙', 'name': '夜猫子', 'desc': '夜间比白天更活跃'})
559
+
560
+ if avg_commits_per_day >= 5:
561
+ developer_tags.append({'icon': '🏃', 'name': '暴风提交', 'desc': f'日均提交 {avg_commits_per_day:.1f} 次'})
562
+ elif avg_commits_per_day < 0.5:
563
+ developer_tags.append({'icon': '🦥', 'name': '佛系开发', 'desc': f'日均提交仅 {avg_commits_per_day:.1f} 次'})
564
+
565
+ if refactor_ratio >= 0.1:
566
+ developer_tags.append({'icon': '🔧', 'name': '重构狂魔', 'desc': f'重构占比 {refactor_ratio*100:.0f}%'})
567
+
568
+ low_info_ratio = total_low_info / max(total_commits, 1)
569
+ if low_info_ratio < 0.05:
570
+ developer_tags.append({'icon': '✍️', 'name': '精确提交', 'desc': 'Commit 信息质量高'})
571
+ elif low_info_ratio > 0.3:
572
+ developer_tags.append({'icon': '😶', 'name': '沉默提交', 'desc': f'{low_info_ratio*100:.0f}% 的 commit 缺少描述'})
573
+
574
+ if feat_ratio >= 0.5:
575
+ developer_tags.append({'icon': '🚀', 'name': '功能先锋', 'desc': f'功能开发占比 {feat_ratio*100:.0f}%'})
576
+
577
+ if weekend_ratio >= 0.3 and night_commits > day_commits:
578
+ developer_tags.append({'icon': '💀', 'name': '爆肝战士', 'desc': '周末 + 深夜双杀'})
579
+
580
+ # 限制标签数量
581
+ developer_tags = developer_tags[:6]
582
+
583
+ # ============================================================
584
+ # 项目排行榜 + 原始 commit 数据
585
+ # ============================================================
586
+ project_ranking = []
587
+ all_commits = []
588
+ for r in sorted_repos:
589
+ project_ranking.append({
590
+ 'name': r['name'],
591
+ 'commits': r['total_commits'],
592
+ 'language': r['main_language'],
593
+ 'active_days': r['active_days'],
594
+ 'first_commit': r['first_commit'],
595
+ 'last_commit': r['last_commit'],
596
+ 'hourly': r['hourly'],
597
+ 'monthly': r['monthly'],
598
+ 'raw_commits': r.get('commits', [])
599
+ })
600
+ for c in r.get('commits', []):
601
+ all_commits.append({**c, 'project': r['name']})
602
+
603
+ # ============================================================
604
+ # AI 使用统计
605
+ # ============================================================
606
+ ai_commit_count = sum(1 for c in all_commits if c.get('ai_signal'))
607
+ ai_tooling_commit_count = sum(1 for c in all_commits if c.get('repo_ai_signal'))
608
+ ai_commit_ratio = ai_commit_count / max(total_commits, 1)
609
+ ai_tooling_ratio = ai_tooling_commit_count / max(total_commits, 1)
610
+
611
+ monthly_ai = defaultdict(int)
612
+ for c in all_commits:
613
+ month = c.get('month', '')
614
+ if month and c.get('ai_signal'):
615
+ monthly_ai[month] += 1
616
+
617
+ tool_counts = {'Claude': 0, 'Cursor': 0, 'Codex': 0, 'Copilot': 0, 'Other': 0}
618
+ for signal in all_ai_signals + all_weak_ai_signals:
619
+ signal_lower = signal.lower()
620
+ if 'claude' in signal_lower:
621
+ tool_counts['Claude'] += 1
622
+ elif 'cursor' in signal_lower:
623
+ tool_counts['Cursor'] += 1
624
+ elif 'codex' in signal_lower:
625
+ tool_counts['Codex'] += 1
626
+ elif 'copilot' in signal_lower:
627
+ tool_counts['Copilot'] += 1
628
+ else:
629
+ tool_counts['Other'] += 1
630
+ tool_counts = {k: v for k, v in tool_counts.items() if v > 0}
631
+
632
+ # ============================================================
633
+ # 时间习惯分析
634
+ # ============================================================
635
+
636
+ # 最活跃时段
637
+ peak_hours = sorted(range(24), key=lambda h: total_hourly[h], reverse=True)[:3]
638
+
639
+ # 最活跃星期
640
+ weekday_names = ['周一', '周二', '周三', '周四', '周五', '周六', '周日']
641
+ peak_weekdays = sorted(range(7), key=lambda d: total_weekly.get(d, 0), reverse=True)[:3]
642
+
643
+ # ============================================================
644
+ # 构建最终数据
645
+ # ============================================================
646
+
647
+ analysis = {
648
+ 'generated_at': datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ'),
649
+ 'scan_dir': DEFAULT_SCAN_DIR,
650
+
651
+ # 总览
652
+ 'summary': {
653
+ 'total_projects': total_projects,
654
+ 'total_commits': total_commits,
655
+ 'total_active_days': sum(r['active_days'] for r in all_repos),
656
+ 'avg_commits_per_day': round(avg_commits_per_day, 1)
657
+ },
658
+
659
+ # Developer Habit Score
660
+ 'habit_score': {
661
+ 'total': total_score,
662
+ 'granularity': granularity_score,
663
+ 'test_awareness': test_score,
664
+ 'doc_awareness': doc_score,
665
+ 'schedule': schedule_score,
666
+ 'focus': focus_score
667
+ },
668
+
669
+ # 开发者人格类型 (DevPersona)
670
+ 'persona': {
671
+ 'code': persona_code,
672
+ 'name': persona['name'],
673
+ 'icon': persona['icon'],
674
+ 'desc': persona['desc'],
675
+ 'detail': persona['detail'],
676
+ 'dimensions': {
677
+ 'time': {'code': time_type, 'spectrum': time_spectrum, 'left': '白天型', 'right': '夜猫型'},
678
+ 'rhythm': {'code': rhythm_type, 'spectrum': rhythm_spectrum, 'left': '马拉松型', 'right': '冲刺型'},
679
+ 'focus': {'code': focus_type, 'spectrum': focus_spectrum, 'left': '分散型', 'right': '专注型'},
680
+ 'style': {'code': style_type, 'spectrum': style_spectrum, 'left': '守护型', 'right': '先锋型'},
681
+ 'engineering': {'code': eng_type, 'spectrum': eng_spectrum, 'left': '快速迭代', 'right': '质量导向'},
682
+ 'ai': {'code': ai_type, 'spectrum': ai_spectrum, 'left': '手工型', 'right': 'AI 协作型'}
683
+ }
684
+ },
685
+
686
+ # 开发者类型标签
687
+ 'developer_tags': developer_tags,
688
+
689
+ # 时间分布
690
+ 'hourly': total_hourly,
691
+ 'weekly': dict(total_weekly),
692
+ 'monthly': dict(total_monthly),
693
+ 'peak_hours': peak_hours,
694
+ 'peak_weekdays': [weekday_names[d] for d in peak_weekdays],
695
+
696
+ # 项目数据
697
+ 'projects': project_ranking,
698
+
699
+ # Commit 类型
700
+ 'commit_types': dict(total_types),
701
+
702
+ # 工程健康
703
+ 'engineering_health': {
704
+ 'test_ratio': round(test_ratio * 100, 1),
705
+ 'doc_ratio': round(doc_ratio * 100, 1),
706
+ 'feat_ratio': round(feat_ratio * 100, 1),
707
+ 'fix_ratio': round(fix_ratio * 100, 1),
708
+ 'refactor_ratio': round(refactor_ratio * 100, 1),
709
+ 'night_ratio': round(night_ratio * 100, 1),
710
+ 'weekend_ratio': round(weekend_ratio * 100, 1),
711
+ 'low_info_ratio': round(total_low_info / max(total_commits, 1) * 100, 1)
712
+ },
713
+
714
+ # AI 信号
715
+ 'ai_signals': {
716
+ 'detected': ai_detected,
717
+ 'count': len(all_ai_signals) + len(all_weak_ai_signals),
718
+ 'examples': (all_ai_signals + all_weak_ai_signals)[:5],
719
+ 'strong_signal_count': len(all_ai_signals),
720
+ 'weak_signal_count': len(all_weak_ai_signals),
721
+ 'ai_commit_count': ai_commit_count,
722
+ 'ai_commit_ratio': round(ai_commit_ratio * 100, 1),
723
+ 'ai_tooling_commit_count': ai_tooling_commit_count,
724
+ 'ai_tooling_ratio': round(ai_tooling_ratio * 100, 1),
725
+ 'ai_influence_score': ai_spectrum,
726
+ 'monthly_ai': dict(monthly_ai),
727
+ 'tools': tool_counts
728
+ },
729
+
730
+ # 项目聚焦度
731
+ 'focus_index': round(focus_index * 100, 1),
732
+
733
+ # 夜猫指数
734
+ 'night_owl_index': round(night_ratio * 100, 1),
735
+
736
+ # 原始 commit 数据(用于前端筛选)
737
+ 'all_commits': all_commits
738
+ }
739
+
740
+ return analysis
741
+
742
+
743
+ # ============================================================
744
+ # 主函数
745
+ # ============================================================
746
+ def main(scan_dir=None, since=None, until=None, project=None, output_dir=None, max_depth=3):
747
+ """主函数"""
748
+ if scan_dir is None:
749
+ scan_dir = os.getcwd()
750
+ scan_dirs = scan_dir if isinstance(scan_dir, (list, tuple)) else [scan_dir]
751
+ if output_dir is None:
752
+ output_dir = os.getcwd()
753
+ output_dir = os.path.abspath(os.path.expanduser(output_dir))
754
+ os.makedirs(output_dir, exist_ok=True)
755
+
756
+ print("🔍 扫描目录:")
757
+ for item in scan_dirs:
758
+ print(f" - {os.path.abspath(os.path.expanduser(item))}")
759
+ print(f"🔎 扫描深度: {max_depth}")
760
+ if since or until:
761
+ print(f"📅 时间范围: {since or '起始'} ~ {until or '至今'}")
762
+ if project:
763
+ print(f"🎯 指定项目: {project}")
764
+ print("=" * 50)
765
+
766
+ # 1. 发现 Git 仓库
767
+ repos = []
768
+ seen_repos = set()
769
+ for item in scan_dirs:
770
+ for repo in find_git_repos(item, max_depth=max_depth):
771
+ repo_path = os.path.abspath(repo)
772
+ if repo_path not in seen_repos:
773
+ seen_repos.add(repo_path)
774
+ repos.append(repo_path)
775
+
776
+ # 项目筛选(模糊匹配)
777
+ if project:
778
+ repos = [r for r in repos if project.lower() in os.path.basename(r).lower()]
779
+ if not repos:
780
+ print(f"❌ 未找到匹配 '{project}' 的项目")
781
+ return
782
+
783
+ print(f"📁 发现 {len(repos)} 个 Git 仓库")
784
+
785
+ if not repos:
786
+ print("❌ 未发现任何 Git 仓库")
787
+ return
788
+
789
+ # 2. 收集每个仓库的数据
790
+ all_repos = []
791
+ for i, repo_path in enumerate(repos, 1):
792
+ repo_name = os.path.basename(repo_path)
793
+ print(f"[{i}/{len(repos)}] 分析: {repo_name}...", end=" ")
794
+
795
+ data = collect_repo_data(repo_path, since=since, until=until)
796
+ if data:
797
+ all_repos.append(data)
798
+ print(f"✓ ({data['total_commits']} commits)")
799
+ else:
800
+ print("✗ (无数据)")
801
+
802
+ if not all_repos:
803
+ print("❌ 未收集到任何数据")
804
+ return
805
+
806
+ # 3. 分析习惯
807
+ print("\n" + "=" * 50)
808
+ print("📊 分析开发习惯...")
809
+ analysis = analyze_habits(all_repos)
810
+
811
+ # 保存筛选条件到数据中
812
+ analysis['filters'] = {
813
+ 'since': since,
814
+ 'until': until,
815
+ 'project': project,
816
+ 'scan_dir': scan_dirs,
817
+ 'max_depth': max_depth
818
+ }
819
+
820
+ # 4. 保存数据
821
+ output_path = os.path.join(output_dir, OUTPUT_DATA)
822
+ with open(output_path, 'w', encoding='utf-8') as f:
823
+ json.dump(analysis, f, ensure_ascii=False, indent=2)
824
+
825
+ print(f"\n✅ 数据已保存到: {output_path}")
826
+ print(f"📊 总计: {analysis['summary']['total_projects']} 个项目, {analysis['summary']['total_commits']} 次提交")
827
+ print(f"🏆 Developer Habit Score: {analysis['habit_score']['total']}/100")
828
+
829
+ return analysis
830
+
831
+
832
+ if __name__ == '__main__':
833
+ import argparse
834
+ parser = argparse.ArgumentParser()
835
+ parser.add_argument('scan_dir', nargs='*', default=None)
836
+ parser.add_argument('--since', help='起始日期')
837
+ parser.add_argument('--until', help='截止日期')
838
+ parser.add_argument('--project', help='指定项目')
839
+ parser.add_argument('--output-dir', default=None, help='输出目录')
840
+ parser.add_argument('--max-depth', type=int, default=3, help='扫描目录深度')
841
+ args = parser.parse_args()
842
+ main(scan_dir=args.scan_dir or None, since=args.since, until=args.until,
843
+ project=args.project, output_dir=args.output_dir, max_depth=args.max_depth)