gitflow-analytics 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/__init__.py +11 -11
- gitflow_analytics/_version.py +2 -2
- gitflow_analytics/cli.py +612 -258
- gitflow_analytics/cli_rich.py +353 -0
- gitflow_analytics/config.py +251 -141
- gitflow_analytics/core/analyzer.py +140 -103
- gitflow_analytics/core/branch_mapper.py +132 -132
- gitflow_analytics/core/cache.py +240 -169
- gitflow_analytics/core/identity.py +210 -173
- gitflow_analytics/extractors/base.py +13 -11
- gitflow_analytics/extractors/story_points.py +70 -59
- gitflow_analytics/extractors/tickets.py +101 -87
- gitflow_analytics/integrations/github_integration.py +84 -77
- gitflow_analytics/integrations/jira_integration.py +116 -104
- gitflow_analytics/integrations/orchestrator.py +86 -85
- gitflow_analytics/metrics/dora.py +181 -177
- gitflow_analytics/models/database.py +190 -53
- gitflow_analytics/qualitative/__init__.py +30 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
- gitflow_analytics/qualitative/classifiers/change_type.py +468 -0
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +399 -0
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +436 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +412 -0
- gitflow_analytics/qualitative/core/__init__.py +13 -0
- gitflow_analytics/qualitative/core/llm_fallback.py +653 -0
- gitflow_analytics/qualitative/core/nlp_engine.py +373 -0
- gitflow_analytics/qualitative/core/pattern_cache.py +457 -0
- gitflow_analytics/qualitative/core/processor.py +540 -0
- gitflow_analytics/qualitative/models/__init__.py +25 -0
- gitflow_analytics/qualitative/models/schemas.py +272 -0
- gitflow_analytics/qualitative/utils/__init__.py +13 -0
- gitflow_analytics/qualitative/utils/batch_processor.py +326 -0
- gitflow_analytics/qualitative/utils/cost_tracker.py +343 -0
- gitflow_analytics/qualitative/utils/metrics.py +347 -0
- gitflow_analytics/qualitative/utils/text_processing.py +243 -0
- gitflow_analytics/reports/analytics_writer.py +11 -4
- gitflow_analytics/reports/csv_writer.py +51 -31
- gitflow_analytics/reports/narrative_writer.py +16 -14
- gitflow_analytics/tui/__init__.py +5 -0
- gitflow_analytics/tui/app.py +721 -0
- gitflow_analytics/tui/screens/__init__.py +8 -0
- gitflow_analytics/tui/screens/analysis_progress_screen.py +487 -0
- gitflow_analytics/tui/screens/configuration_screen.py +547 -0
- gitflow_analytics/tui/screens/loading_screen.py +358 -0
- gitflow_analytics/tui/screens/main_screen.py +304 -0
- gitflow_analytics/tui/screens/results_screen.py +698 -0
- gitflow_analytics/tui/widgets/__init__.py +7 -0
- gitflow_analytics/tui/widgets/data_table.py +257 -0
- gitflow_analytics/tui/widgets/export_modal.py +301 -0
- gitflow_analytics/tui/widgets/progress_widget.py +192 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/METADATA +31 -4
- gitflow_analytics-1.0.3.dist-info/RECORD +62 -0
- gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/top_level.txt +0 -0
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
"""Git repository analyzer with batch processing support."""
|
|
2
|
+
|
|
2
3
|
import fnmatch
|
|
4
|
+
from collections.abc import Generator
|
|
3
5
|
from datetime import datetime
|
|
4
6
|
from pathlib import Path
|
|
5
|
-
from typing import Any,
|
|
7
|
+
from typing import Any, Optional
|
|
6
8
|
|
|
7
9
|
import git
|
|
8
10
|
from git import Repo
|
|
@@ -16,11 +18,15 @@ from .cache import GitAnalysisCache
|
|
|
16
18
|
|
|
17
19
|
class GitAnalyzer:
|
|
18
20
|
"""Analyze Git repositories with caching and batch processing."""
|
|
19
|
-
|
|
20
|
-
def __init__(
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
cache: GitAnalysisCache,
|
|
25
|
+
batch_size: int = 1000,
|
|
26
|
+
branch_mapping_rules: Optional[dict[str, list[str]]] = None,
|
|
27
|
+
allowed_ticket_platforms: Optional[list[str]] = None,
|
|
28
|
+
exclude_paths: Optional[list[str]] = None,
|
|
29
|
+
):
|
|
24
30
|
"""Initialize analyzer with cache."""
|
|
25
31
|
self.cache = cache
|
|
26
32
|
self.batch_size = batch_size
|
|
@@ -28,39 +34,41 @@ class GitAnalyzer:
|
|
|
28
34
|
self.ticket_extractor = TicketExtractor(allowed_platforms=allowed_ticket_platforms)
|
|
29
35
|
self.branch_mapper = BranchToProjectMapper(branch_mapping_rules)
|
|
30
36
|
self.exclude_paths = exclude_paths or []
|
|
31
|
-
|
|
32
|
-
def analyze_repository(
|
|
33
|
-
|
|
37
|
+
|
|
38
|
+
def analyze_repository(
|
|
39
|
+
self, repo_path: Path, since: datetime, branch: Optional[str] = None
|
|
40
|
+
) -> list[dict[str, Any]]:
|
|
34
41
|
"""Analyze a Git repository with batch processing."""
|
|
35
42
|
try:
|
|
36
43
|
repo = Repo(repo_path)
|
|
37
44
|
except Exception as e:
|
|
38
45
|
raise ValueError(f"Failed to open repository at {repo_path}: {e}") from e
|
|
39
|
-
|
|
46
|
+
|
|
40
47
|
# Get commits to analyze
|
|
41
48
|
commits = self._get_commits(repo, since, branch)
|
|
42
49
|
total_commits = len(commits)
|
|
43
|
-
|
|
50
|
+
|
|
44
51
|
if total_commits == 0:
|
|
45
52
|
return []
|
|
46
|
-
|
|
53
|
+
|
|
47
54
|
analyzed_commits = []
|
|
48
|
-
|
|
55
|
+
|
|
49
56
|
# Process in batches with progress bar
|
|
50
57
|
with tqdm(total=total_commits, desc=f"Analyzing {repo_path.name}") as pbar:
|
|
51
58
|
for batch in self._batch_commits(commits, self.batch_size):
|
|
52
59
|
batch_results = self._process_batch(repo, repo_path, batch)
|
|
53
60
|
analyzed_commits.extend(batch_results)
|
|
54
|
-
|
|
61
|
+
|
|
55
62
|
# Cache the batch
|
|
56
63
|
self.cache.cache_commits_batch(str(repo_path), batch_results)
|
|
57
|
-
|
|
64
|
+
|
|
58
65
|
pbar.update(len(batch))
|
|
59
|
-
|
|
66
|
+
|
|
60
67
|
return analyzed_commits
|
|
61
|
-
|
|
62
|
-
def _get_commits(
|
|
63
|
-
|
|
68
|
+
|
|
69
|
+
def _get_commits(
|
|
70
|
+
self, repo: Repo, since: datetime, branch: Optional[str] = None
|
|
71
|
+
) -> list[git.Commit]:
|
|
64
72
|
"""Get commits from repository."""
|
|
65
73
|
if branch:
|
|
66
74
|
try:
|
|
@@ -72,14 +80,14 @@ class GitAnalyzer:
|
|
|
72
80
|
# Get commits from all branches
|
|
73
81
|
commits = []
|
|
74
82
|
for ref in repo.refs:
|
|
75
|
-
if ref.name.startswith(
|
|
83
|
+
if ref.name.startswith("origin/"):
|
|
76
84
|
continue # Skip remote branches
|
|
77
85
|
try:
|
|
78
86
|
branch_commits = list(repo.iter_commits(ref, since=since))
|
|
79
87
|
commits.extend(branch_commits)
|
|
80
88
|
except git.GitCommandError:
|
|
81
89
|
continue
|
|
82
|
-
|
|
90
|
+
|
|
83
91
|
# Remove duplicates while preserving order
|
|
84
92
|
seen = set()
|
|
85
93
|
unique_commits = []
|
|
@@ -87,99 +95,102 @@ class GitAnalyzer:
|
|
|
87
95
|
if commit.hexsha not in seen:
|
|
88
96
|
seen.add(commit.hexsha)
|
|
89
97
|
unique_commits.append(commit)
|
|
90
|
-
|
|
98
|
+
|
|
91
99
|
commits = unique_commits
|
|
92
|
-
|
|
100
|
+
|
|
93
101
|
# Sort by date
|
|
94
102
|
return sorted(commits, key=lambda c: c.committed_datetime)
|
|
95
|
-
|
|
96
|
-
def _batch_commits(
|
|
97
|
-
|
|
103
|
+
|
|
104
|
+
def _batch_commits(
|
|
105
|
+
self, commits: list[git.Commit], batch_size: int
|
|
106
|
+
) -> Generator[list[git.Commit], None, None]:
|
|
98
107
|
"""Yield batches of commits."""
|
|
99
108
|
for i in range(0, len(commits), batch_size):
|
|
100
|
-
yield commits[i:i + batch_size]
|
|
101
|
-
|
|
102
|
-
def _process_batch(
|
|
103
|
-
|
|
109
|
+
yield commits[i : i + batch_size]
|
|
110
|
+
|
|
111
|
+
def _process_batch(
|
|
112
|
+
self, repo: Repo, repo_path: Path, commits: list[git.Commit]
|
|
113
|
+
) -> list[dict[str, Any]]:
|
|
104
114
|
"""Process a batch of commits."""
|
|
105
115
|
results = []
|
|
106
|
-
|
|
116
|
+
|
|
107
117
|
for commit in commits:
|
|
108
118
|
# Check cache first
|
|
109
119
|
cached = self.cache.get_cached_commit(str(repo_path), commit.hexsha)
|
|
110
120
|
if cached:
|
|
111
121
|
results.append(cached)
|
|
112
122
|
continue
|
|
113
|
-
|
|
123
|
+
|
|
114
124
|
# Analyze commit
|
|
115
125
|
commit_data = self._analyze_commit(repo, commit, repo_path)
|
|
116
126
|
results.append(commit_data)
|
|
117
|
-
|
|
127
|
+
|
|
118
128
|
return results
|
|
119
|
-
|
|
120
|
-
def _analyze_commit(self, repo: Repo, commit: git.Commit, repo_path: Path) ->
|
|
129
|
+
|
|
130
|
+
def _analyze_commit(self, repo: Repo, commit: git.Commit, repo_path: Path) -> dict[str, Any]:
|
|
121
131
|
"""Analyze a single commit."""
|
|
122
132
|
# Basic commit data
|
|
123
133
|
commit_data = {
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
134
|
+
"hash": commit.hexsha,
|
|
135
|
+
"author_name": commit.author.name,
|
|
136
|
+
"author_email": commit.author.email,
|
|
137
|
+
"message": commit.message,
|
|
138
|
+
"timestamp": commit.committed_datetime,
|
|
139
|
+
"is_merge": len(commit.parents) > 1,
|
|
130
140
|
}
|
|
131
|
-
|
|
141
|
+
|
|
132
142
|
# Get branch name
|
|
133
|
-
commit_data[
|
|
134
|
-
|
|
143
|
+
commit_data["branch"] = self._get_commit_branch(repo, commit)
|
|
144
|
+
|
|
135
145
|
# Map branch to project
|
|
136
|
-
commit_data[
|
|
137
|
-
commit_data[
|
|
146
|
+
commit_data["inferred_project"] = self.branch_mapper.map_branch_to_project(
|
|
147
|
+
str(commit_data["branch"]), repo_path
|
|
138
148
|
)
|
|
139
|
-
|
|
149
|
+
|
|
140
150
|
# Calculate metrics - use raw stats for backward compatibility
|
|
141
151
|
stats = commit.stats.total
|
|
142
|
-
commit_data[
|
|
143
|
-
commit_data[
|
|
144
|
-
commit_data[
|
|
145
|
-
|
|
152
|
+
commit_data["files_changed"] = int(stats.get("files", 0)) if hasattr(stats, "get") else 0
|
|
153
|
+
commit_data["insertions"] = int(stats.get("insertions", 0)) if hasattr(stats, "get") else 0
|
|
154
|
+
commit_data["deletions"] = int(stats.get("deletions", 0)) if hasattr(stats, "get") else 0
|
|
155
|
+
|
|
146
156
|
# Calculate filtered metrics (excluding boilerplate/generated files)
|
|
147
157
|
filtered_stats = self._calculate_filtered_stats(commit)
|
|
148
|
-
commit_data[
|
|
149
|
-
commit_data[
|
|
150
|
-
commit_data[
|
|
151
|
-
|
|
158
|
+
commit_data["filtered_files_changed"] = filtered_stats["files"]
|
|
159
|
+
commit_data["filtered_insertions"] = filtered_stats["insertions"]
|
|
160
|
+
commit_data["filtered_deletions"] = filtered_stats["deletions"]
|
|
161
|
+
|
|
152
162
|
# Extract story points
|
|
153
|
-
|
|
163
|
+
message_str = (
|
|
154
164
|
commit.message
|
|
165
|
+
if isinstance(commit.message, str)
|
|
166
|
+
else commit.message.decode("utf-8", errors="ignore")
|
|
155
167
|
)
|
|
156
|
-
|
|
168
|
+
commit_data["story_points"] = self.story_point_extractor.extract_from_text(message_str)
|
|
169
|
+
|
|
157
170
|
# Extract ticket references
|
|
158
|
-
commit_data[
|
|
159
|
-
|
|
160
|
-
)
|
|
161
|
-
|
|
171
|
+
commit_data["ticket_references"] = self.ticket_extractor.extract_from_text(message_str)
|
|
172
|
+
|
|
162
173
|
# Calculate complexity delta
|
|
163
|
-
commit_data[
|
|
164
|
-
|
|
174
|
+
commit_data["complexity_delta"] = self._calculate_complexity_delta(commit)
|
|
175
|
+
|
|
165
176
|
return commit_data
|
|
166
|
-
|
|
177
|
+
|
|
167
178
|
def _get_commit_branch(self, repo: Repo, commit: git.Commit) -> str:
|
|
168
179
|
"""Get the branch name for a commit."""
|
|
169
180
|
# This is a simplified approach - getting the first branch that contains the commit
|
|
170
181
|
for branch in repo.branches:
|
|
171
182
|
if commit in repo.iter_commits(branch):
|
|
172
183
|
return branch.name
|
|
173
|
-
return
|
|
174
|
-
|
|
184
|
+
return "unknown"
|
|
185
|
+
|
|
175
186
|
def _calculate_complexity_delta(self, commit: git.Commit) -> float:
|
|
176
187
|
"""Calculate complexity change for a commit."""
|
|
177
188
|
total_delta = 0.0
|
|
178
|
-
|
|
189
|
+
|
|
179
190
|
for diff in commit.diff(commit.parents[0] if commit.parents else None):
|
|
180
|
-
if not self._is_code_file(diff.b_path or diff.a_path or
|
|
191
|
+
if not self._is_code_file(diff.b_path or diff.a_path or ""):
|
|
181
192
|
continue
|
|
182
|
-
|
|
193
|
+
|
|
183
194
|
# Simple complexity estimation based on diff size
|
|
184
195
|
# In a real implementation, you'd parse the code and calculate cyclomatic complexity
|
|
185
196
|
if diff.new_file:
|
|
@@ -188,68 +199,94 @@ class GitAnalyzer:
|
|
|
188
199
|
total_delta -= diff.a_blob.size / 100 if diff.a_blob else 0
|
|
189
200
|
else:
|
|
190
201
|
# Modified file - estimate based on change size
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
202
|
+
if diff.diff:
|
|
203
|
+
diff_content = (
|
|
204
|
+
diff.diff
|
|
205
|
+
if isinstance(diff.diff, str)
|
|
206
|
+
else diff.diff.decode("utf-8", errors="ignore")
|
|
207
|
+
)
|
|
208
|
+
added = len(diff_content.split("\n+"))
|
|
209
|
+
removed = len(diff_content.split("\n-"))
|
|
210
|
+
total_delta += (added - removed) / 10
|
|
211
|
+
|
|
195
212
|
return total_delta
|
|
196
|
-
|
|
213
|
+
|
|
197
214
|
def _is_code_file(self, filepath: str) -> bool:
|
|
198
215
|
"""Check if file is a code file."""
|
|
199
216
|
code_extensions = {
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
217
|
+
".py",
|
|
218
|
+
".js",
|
|
219
|
+
".ts",
|
|
220
|
+
".java",
|
|
221
|
+
".cpp",
|
|
222
|
+
".c",
|
|
223
|
+
".h",
|
|
224
|
+
".hpp",
|
|
225
|
+
".go",
|
|
226
|
+
".rs",
|
|
227
|
+
".rb",
|
|
228
|
+
".php",
|
|
229
|
+
".swift",
|
|
230
|
+
".kt",
|
|
231
|
+
".scala",
|
|
232
|
+
".cs",
|
|
233
|
+
".vb",
|
|
234
|
+
".r",
|
|
235
|
+
".m",
|
|
236
|
+
".mm",
|
|
237
|
+
".f90",
|
|
238
|
+
".f95",
|
|
239
|
+
".lua",
|
|
203
240
|
}
|
|
204
|
-
|
|
241
|
+
|
|
205
242
|
return any(filepath.endswith(ext) for ext in code_extensions)
|
|
206
|
-
|
|
243
|
+
|
|
207
244
|
def _should_exclude_file(self, filepath: str) -> bool:
|
|
208
245
|
"""Check if file should be excluded from line counting."""
|
|
209
246
|
if not filepath:
|
|
210
247
|
return False
|
|
211
|
-
|
|
248
|
+
|
|
212
249
|
# Normalize path separators for consistent matching
|
|
213
|
-
filepath = filepath.replace(
|
|
214
|
-
|
|
250
|
+
filepath = filepath.replace("\\", "/")
|
|
251
|
+
|
|
215
252
|
# Check against exclude patterns
|
|
216
253
|
return any(fnmatch.fnmatch(filepath, pattern) for pattern in self.exclude_paths)
|
|
217
|
-
|
|
218
|
-
def _calculate_filtered_stats(self, commit: git.Commit) ->
|
|
254
|
+
|
|
255
|
+
def _calculate_filtered_stats(self, commit: git.Commit) -> dict[str, int]:
|
|
219
256
|
"""Calculate commit statistics excluding boilerplate/generated files."""
|
|
220
|
-
filtered_stats = {
|
|
221
|
-
|
|
222
|
-
'insertions': 0,
|
|
223
|
-
'deletions': 0
|
|
224
|
-
}
|
|
225
|
-
|
|
257
|
+
filtered_stats = {"files": 0, "insertions": 0, "deletions": 0}
|
|
258
|
+
|
|
226
259
|
# For initial commits or commits without parents
|
|
227
260
|
parent = commit.parents[0] if commit.parents else None
|
|
228
|
-
|
|
261
|
+
|
|
229
262
|
try:
|
|
230
263
|
for diff in commit.diff(parent):
|
|
231
264
|
# Get file path
|
|
232
265
|
file_path = diff.b_path if diff.b_path else diff.a_path
|
|
233
266
|
if not file_path:
|
|
234
267
|
continue
|
|
235
|
-
|
|
268
|
+
|
|
236
269
|
# Skip excluded files
|
|
237
270
|
if self._should_exclude_file(file_path):
|
|
238
271
|
continue
|
|
239
|
-
|
|
272
|
+
|
|
240
273
|
# Count the file
|
|
241
|
-
filtered_stats[
|
|
242
|
-
|
|
274
|
+
filtered_stats["files"] += 1
|
|
275
|
+
|
|
243
276
|
# Count insertions and deletions
|
|
244
277
|
if diff.diff:
|
|
245
|
-
diff_text =
|
|
246
|
-
|
|
247
|
-
if
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
278
|
+
diff_text = (
|
|
279
|
+
diff.diff
|
|
280
|
+
if isinstance(diff.diff, str)
|
|
281
|
+
else diff.diff.decode("utf-8", errors="ignore")
|
|
282
|
+
)
|
|
283
|
+
for line in diff_text.split("\n"):
|
|
284
|
+
if line.startswith("+") and not line.startswith("+++"):
|
|
285
|
+
filtered_stats["insertions"] += 1
|
|
286
|
+
elif line.startswith("-") and not line.startswith("---"):
|
|
287
|
+
filtered_stats["deletions"] += 1
|
|
251
288
|
except Exception:
|
|
252
289
|
# If we can't calculate filtered stats, return zeros
|
|
253
290
|
pass
|
|
254
|
-
|
|
255
|
-
return filtered_stats
|
|
291
|
+
|
|
292
|
+
return filtered_stats
|