gitinsight-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,225 @@
1
+ """
2
+ git_reader.py — 从 Git 仓库提取完整提交记录与文件变更统计。
3
+
4
+ 输出:
5
+ - commits_df: 每行一条提交 (hash, author, email, datetime_str, message, insertions, deletions)
6
+ - file_stats_df: 每行一个文件变更 (hash, filepath, insertions, deletions)
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import os
12
+ import re
13
+ import subprocess
14
+ from typing import Optional
15
+
16
+ from loguru import logger
17
+ from tqdm import tqdm
18
+
19
+ import pandas as pd
20
+
21
+ # 分隔符,用于区分每条提交
22
+ _COMMIT_SEP = "---COMMIT_BOUNDARY---"
23
+
24
+ _GIT_LOG_FORMAT = f"{_COMMIT_SEP}%n%H%n%an%n%ae%n%ad%n%s"
25
+
26
+
27
+ def _count_commits(git_dir: str) -> Optional[int]:
28
+ """快速获取仓库的总提交数(用于进度条)。"""
29
+ try:
30
+ result = subprocess.run(
31
+ ["git", "-C", git_dir, "rev-list", "--all", "--count"],
32
+ capture_output=True,
33
+ text=True,
34
+ encoding="utf-8",
35
+ errors="replace",
36
+ )
37
+ if result.returncode == 0:
38
+ return int(result.stdout.strip())
39
+ except (FileNotFoundError, ValueError):
40
+ pass
41
+ return None
42
+
43
+
44
+ def get_git_log(git_dir: str) -> Optional[str]:
45
+ """在指定 Git 目录中执行 git log 命令并返回输出文本,同时显示进度条。"""
46
+ import time
47
+ import hashlib
48
+ from pathlib import Path
49
+
50
+ if not os.path.exists(git_dir):
51
+ logger.error(f"❌ 错误:目录 '{git_dir}' 不存在。")
52
+ return None
53
+
54
+ # Determine cache file path
55
+ git_path = Path(git_dir).resolve()
56
+ if git_path.name == ".git":
57
+ git_path = git_path.parent
58
+
59
+ # Save cache to ~/.cache/gitinsight directory
60
+ cache_dir = Path.home() / ".cache" / "gitinsight"
61
+ cache_dir.mkdir(parents=True, exist_ok=True)
62
+
63
+ # Use hash of the absolute path to generate unique cache filename
64
+ repo_hash = hashlib.md5(str(git_path).encode("utf-8")).hexdigest()[:12]
65
+ cache_path = cache_dir / f".git_log_{repo_hash}.cache"
66
+
67
+ # Check cache validity (1 day = 86400 seconds)
68
+ if cache_path.exists():
69
+ try:
70
+ mtime = cache_path.stat().st_mtime
71
+ if time.time() - mtime < 86400:
72
+ logger.info("✅ 发现有效的 Git 日志缓存,直接读取...")
73
+ with open(cache_path, "r", encoding="utf-8") as f:
74
+ return f.read()
75
+ else:
76
+ logger.info("ℹ️ Git 日志缓存已过期,正在重新读取...")
77
+ except Exception as e:
78
+ logger.warning(f"⚠️ 读取缓存失败: {e}")
79
+
80
+ # 先获取总提交数,用于进度条
81
+ total_commits = _count_commits(git_dir)
82
+
83
+ try:
84
+ proc = subprocess.Popen(
85
+ [
86
+ "git",
87
+ "-C",
88
+ git_dir,
89
+ "log",
90
+ "--all",
91
+ f"--pretty=format:{_GIT_LOG_FORMAT}",
92
+ "--date=iso",
93
+ "--numstat",
94
+ "--no-color",
95
+ ],
96
+ stdout=subprocess.PIPE,
97
+ stderr=subprocess.PIPE,
98
+ text=True,
99
+ encoding="utf-8",
100
+ errors="replace",
101
+ )
102
+ except FileNotFoundError:
103
+ logger.error("❌ 错误:未找到 'git' 命令。请确保 Git 已安装并添加到 PATH。")
104
+ return None
105
+
106
+ # 逐行读取,遇到 COMMIT_SEP 时更新进度
107
+ lines: list[str] = []
108
+ with tqdm(
109
+ total=total_commits, desc="正在读取 Git 日志", unit="commit", leave=False
110
+ ) as pbar:
111
+ for line in proc.stdout: # type: ignore[union-attr]
112
+ lines.append(line)
113
+ if _COMMIT_SEP in line:
114
+ pbar.update(1)
115
+
116
+ stderr_output = proc.stderr.read() if proc.stderr else "" # type: ignore[union-attr]
117
+ proc.wait()
118
+
119
+ if proc.returncode != 0:
120
+ logger.error(f"❌ Git 命令执行失败:{stderr_output}")
121
+ logger.error(f" 仓库路径:{os.path.abspath(git_dir)}")
122
+ logger.error(" 请确认这是一个有效的 Git 仓库。")
123
+ return None
124
+
125
+ result = "".join(lines)
126
+
127
+ # Save to cache
128
+ try:
129
+ with open(cache_path, "w", encoding="utf-8") as f:
130
+ f.write(result)
131
+ logger.info(f"✅ Git 日志已缓存至: {cache_path}")
132
+ except Exception as e:
133
+ logger.warning(f"⚠️ 写入缓存失败: {e}")
134
+
135
+ return result
136
+
137
+
138
+ # numstat 行的正则: <insertions>\t<deletions>\t<filepath>
139
+ # 二进制文件显示为 -\t-\tfilepath
140
+ _NUMSTAT_RE = re.compile(r"^(\d+|-)\t(\d+|-)\t(.+)$")
141
+
142
+
143
+ def parse_git_log(stdout: str) -> tuple[pd.DataFrame, pd.DataFrame]:
144
+ """
145
+ 将 git log --numstat 输出解析为两个 DataFrame。
146
+
147
+ Returns:
148
+ commits_df: 每条提交一行
149
+ file_stats_df: 每个文件变更一行
150
+ """
151
+ commits: list[dict] = []
152
+ file_stats: list[dict] = []
153
+
154
+ blocks = stdout.split(_COMMIT_SEP)
155
+
156
+ for block in tqdm(blocks, desc="正在解析提交记录", unit="commit", leave=False):
157
+ block = block.strip()
158
+ if not block:
159
+ continue
160
+
161
+ lines = block.split("\n")
162
+ # 前5行: hash, author, email, date, message
163
+ if len(lines) < 5:
164
+ continue
165
+
166
+ commit_hash = lines[0].strip()
167
+ author = lines[1].strip()
168
+ email = lines[2].strip()
169
+ datetime_str = lines[3].strip()
170
+ message = lines[4].strip()
171
+
172
+ total_ins = 0
173
+ total_del = 0
174
+
175
+ # 从第5行开始是 numstat(可能有空行间隔)
176
+ for i in range(5, len(lines)):
177
+ line = lines[i].strip()
178
+ if not line:
179
+ continue
180
+ m = _NUMSTAT_RE.match(line)
181
+ if m:
182
+ ins_str, del_str, filepath = m.groups()
183
+ ins = int(ins_str) if ins_str != "-" else 0
184
+ dels = int(del_str) if del_str != "-" else 0
185
+ total_ins += ins
186
+ total_del += dels
187
+ file_stats.append(
188
+ {
189
+ "hash": commit_hash,
190
+ "filepath": filepath,
191
+ "insertions": ins,
192
+ "deletions": dels,
193
+ }
194
+ )
195
+
196
+ commits.append(
197
+ {
198
+ "hash": commit_hash,
199
+ "author": author,
200
+ "email": email,
201
+ "datetime_str": datetime_str,
202
+ "message": message,
203
+ "insertions": total_ins,
204
+ "deletions": total_del,
205
+ }
206
+ )
207
+
208
+ commits_df = pd.DataFrame(
209
+ commits,
210
+ columns=[
211
+ "hash",
212
+ "author",
213
+ "email",
214
+ "datetime_str",
215
+ "message",
216
+ "insertions",
217
+ "deletions",
218
+ ],
219
+ )
220
+ file_stats_df = pd.DataFrame(
221
+ file_stats,
222
+ columns=["hash", "filepath", "insertions", "deletions"],
223
+ )
224
+
225
+ return commits_df, file_stats_df
gitinsight/report.py ADDED
@@ -0,0 +1,26 @@
1
+ """
2
+ report.py — 控制台摘要输出。
3
+ """
4
+ from __future__ import annotations
5
+
6
+ from typing import Dict
7
+
8
+
9
+ def print_summary(
10
+ metrics: Dict[str, object],
11
+ filter_stats,
12
+ outputs: Dict[str, str],
13
+ ) -> None:
14
+ print("\n" + "=" * 50)
15
+ print(" Git 项目人员分析报告摘要")
16
+ print("=" * 50)
17
+ print(f"总提交次数: {metrics.get('total_commits', 0):,} (过滤前 {filter_stats.before:,}, 过滤掉 {filter_stats.removed:,} 条自动化提交)")
18
+ print(f"参与开发者: {metrics.get('total_authors', 0)} 人")
19
+ print(f"活跃开发者(近半年): {metrics.get('active_authors_6m', 0)} 人")
20
+ print(f"代码净增长: {metrics.get('net_lines', 0):+,} 行")
21
+ print(f"项目生命周期: {metrics.get('project_lifecycle_days', 0):,} 天")
22
+ print(f"数据范围: {metrics.get('date_range', '无')}")
23
+ print("-" * 50)
24
+ print("[产出文件]")
25
+ print(f" 可视化仪表板: {outputs.get('html', '')}")
26
+ print("=" * 50)
@@ -0,0 +1,152 @@
1
+ Metadata-Version: 2.4
2
+ Name: gitinsight-cli
3
+ Version: 0.1.0
4
+ Summary: A powerful collection of Git repository analysis tools and visualizations.
5
+ Project-URL: Homepage, https://github.com/Rain-kl/GitInsight
6
+ Project-URL: Bug Tracker, https://github.com/Rain-kl/GitInsight/issues
7
+ Author-email: Ryan <ryan@arctel.net>
8
+ License-Expression: MIT
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Programming Language :: Python :: 3
12
+ Requires-Python: >=3.12
13
+ Requires-Dist: loguru>=0.7.3
14
+ Requires-Dist: pandas>=3.0.0
15
+ Requires-Dist: pyecharts>=2.0.0
16
+ Requires-Dist: tqdm>=4.67.3
17
+ Description-Content-Type: text/markdown
18
+
19
+ # GitInsight
20
+
21
+ GitRepository Analysis and Visualization Tool / Git 仓库分析与可视化工具
22
+
23
+ <!-- PROJECT SHIELDS -->
24
+
25
+ [![MIT License][license-shield]][license-url]
26
+
27
+ [![Python Version][python-version]][python-version]
28
+
29
+ ---
30
+
31
+ ## 目录
32
+
33
+ - [GitInsight](#gitinsight)
34
+ - [目录](#目录)
35
+ - [上手指南](#上手指南)
36
+ - [开发前的配置要求](#开发前的配置要求)
37
+ - [**安装步骤**](#安装步骤)
38
+ - [文件目录说明](#文件目录说明)
39
+ - [开发的架构](#开发的架构)
40
+ - [部署](#部署)
41
+ - [使用到的框架](#使用到的框架)
42
+ - [贡献者](#贡献者)
43
+ - [如何参与开源项目](#如何参与开源项目)
44
+
45
+ ### 上手指南
46
+
47
+ ###### 开发前的配置要求
48
+
49
+ 1. Python >= 3.12
50
+ 2. uv (推荐使用 uv 进行包管理) 或 pip
51
+
52
+ ###### **安装步骤**
53
+
54
+ 1. Clone the repo
55
+
56
+ ```sh
57
+ git clone https://github.com/Rain-kl/GitInsight.git
58
+ cd GitInsight
59
+ ```
60
+
61
+ 2. Install as a command-line tool
62
+
63
+ ```sh
64
+ # Install from PyPI
65
+ uv tool install gitinsight-cli
66
+
67
+ # Install from the current checkout
68
+ uv tool install .
69
+
70
+ # Or install directly from GitHub
71
+ uv tool install git+https://github.com/Rain-kl/GitInsight.git
72
+ ```
73
+
74
+ 3. Run the tool
75
+
76
+ ```sh
77
+ gitinsight /path/to/git/repository
78
+ ```
79
+
80
+ Upgrade or uninstall the tool:
81
+
82
+ ```sh
83
+ uv tool upgrade gitinsight-cli
84
+ uv tool uninstall gitinsight-cli
85
+ ```
86
+
87
+ For local development:
88
+
89
+ ```sh
90
+ uv sync
91
+ uv run gitinsight /path/to/git/repository
92
+ ```
93
+
94
+ ### 文件目录说明
95
+
96
+ ```
97
+ GitInsight
98
+ ├── .github/ # GitHub 配置
99
+ ├── gitinsight/ # 源代码目录
100
+ │ ├── __init__.py
101
+ │ ├── __main__.py # 程序入口
102
+ │ ├── analysis.py # 分析逻辑 (Pandas)
103
+ │ ├── charts.py # 图表绘制 (Pyecharts)
104
+ │ ├── dashboard.py # 仪表盘展示
105
+ │ ├── git_reader.py # Git日志读取
106
+ │ └── report.py # 报告生成
107
+ ├── .gitignore
108
+ ├── pyproject.toml # 项目配置与依赖
109
+ ├── uv.lock # 依赖锁定文件
110
+ └── README.md # 项目说明
111
+ ```
112
+
113
+ ### 开发的架构
114
+
115
+ 本项目主要由数据读取、数据分析、图表生成和仪表盘展示四个部分组成。
116
+ - `git_reader.py`: 负责读取 Git 仓库的提交日志。
117
+ - `analysis.py`: 使用 Pandas 对日志数据进行清洗和统计分析。
118
+ - `charts.py`: 使用 Pyecharts 生成各类可视化图表。
119
+ - `dashboard.py`: 整合图表生成 HTML 仪表盘。
120
+
121
+ 请阅读架构文档(如有)查阅为该项目的架构。
122
+
123
+ ### 部署
124
+
125
+ 本项目为 Python 命令行工具,可直接本地运行或打包发布到 PyPI。
126
+ 暂无特殊部署部署。
127
+
128
+ ### 使用到的框架
129
+
130
+ - [Pandas](https://pandas.pydata.org/) - Data structures and analysis tools
131
+ - [Pyecharts](https://github.com/pyecharts/pyecharts) - Python Echarts Plotting Library
132
+ - [Loguru](https://github.com/Delgan/loguru) - Python logging made (stupidly) simple
133
+ - [Hatchling](https://hatch.pypa.io/latest/) - Modern, extensible Python build backend
134
+
135
+ ### 贡献者
136
+
137
+ 请阅读**CONTRIBUTING.md** 查阅为该项目做出贡献的开发者。
138
+
139
+ #### 如何参与开源项目
140
+
141
+ 贡献使开源社区成为一个学习、激励和创造的绝佳场所。你所作的任何贡献都是**非常感谢**的。
142
+
143
+ 1. Fork the Project
144
+ 2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`)
145
+ 3. Commit your Changes (`git commit -m 'Add some AmazingFeature'`)
146
+ 4. Push to the Branch (`git push origin feature/AmazingFeature`)
147
+ 5. Open a Pull Request
148
+
149
+ <!-- links -->
150
+ [license-shield]: https://img.shields.io/github/license/shaojintian/Best_README_template.svg?style=flat-square
151
+ [license-url]: https://github.com/Rain-kl/GitInsight/blob/main/LICENSE
152
+ [python-version]:https://img.shields.io/pypi/pyversions/pandas
@@ -0,0 +1,11 @@
1
+ gitinsight/__init__.py,sha256=6AWu5_EtfXYnu5jSCbcTnIFO8o7LYUbYpudTiP72l6M,100
2
+ gitinsight/__main__.py,sha256=EjOIyLp410y4XDHLbAlNurcNWl4za2C-53xNnWnUTVk,2715
3
+ gitinsight/analysis.py,sha256=FDVRqfxeALcwwJ1rwIHG1At-iPHpXKXfJQJKS3o449I,17351
4
+ gitinsight/charts.py,sha256=DISuXRS-vwNDJ3a_B52lUl2oLcZZbm1hUzVNvBxmWWY,30485
5
+ gitinsight/dashboard.py,sha256=u0-ebljtj8G7U1cnuoBrVDQXBJLMkg3NAZEnOlc4wuk,18519
6
+ gitinsight/git_reader.py,sha256=h5YScSixNnNFXhRe0PP16nhj8w1wIpdwwqHWDWu0U_I,6857
7
+ gitinsight/report.py,sha256=v-WRm_slXs3ZRXPk0WmQpIZ4vQRpt3PsE1EeN_a-NO8,969
8
+ gitinsight_cli-0.1.0.dist-info/METADATA,sha256=TNggyRshYyWDaYSJ_sfvoyVHXE85iJvys92J99F9-7o,4444
9
+ gitinsight_cli-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
10
+ gitinsight_cli-0.1.0.dist-info/entry_points.txt,sha256=mUsNFsZ6tKoM-0W2GCGHVSTDJlYGVtgfdDPhOpe_Ru8,56
11
+ gitinsight_cli-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ gitinsight = gitinsight.__main__:main