gitflow-analytics 1.0.0__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/__init__.py +11 -9
- gitflow_analytics/_version.py +2 -2
- gitflow_analytics/cli.py +691 -243
- gitflow_analytics/cli_rich.py +353 -0
- gitflow_analytics/config.py +389 -96
- gitflow_analytics/core/analyzer.py +175 -78
- gitflow_analytics/core/branch_mapper.py +132 -132
- gitflow_analytics/core/cache.py +242 -173
- gitflow_analytics/core/identity.py +214 -178
- gitflow_analytics/extractors/base.py +13 -11
- gitflow_analytics/extractors/story_points.py +70 -59
- gitflow_analytics/extractors/tickets.py +111 -88
- gitflow_analytics/integrations/github_integration.py +91 -77
- gitflow_analytics/integrations/jira_integration.py +284 -0
- gitflow_analytics/integrations/orchestrator.py +99 -72
- gitflow_analytics/metrics/dora.py +183 -179
- gitflow_analytics/models/database.py +191 -54
- gitflow_analytics/qualitative/__init__.py +30 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
- gitflow_analytics/qualitative/classifiers/change_type.py +468 -0
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +399 -0
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +436 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +412 -0
- gitflow_analytics/qualitative/core/__init__.py +13 -0
- gitflow_analytics/qualitative/core/llm_fallback.py +653 -0
- gitflow_analytics/qualitative/core/nlp_engine.py +373 -0
- gitflow_analytics/qualitative/core/pattern_cache.py +457 -0
- gitflow_analytics/qualitative/core/processor.py +540 -0
- gitflow_analytics/qualitative/models/__init__.py +25 -0
- gitflow_analytics/qualitative/models/schemas.py +272 -0
- gitflow_analytics/qualitative/utils/__init__.py +13 -0
- gitflow_analytics/qualitative/utils/batch_processor.py +326 -0
- gitflow_analytics/qualitative/utils/cost_tracker.py +343 -0
- gitflow_analytics/qualitative/utils/metrics.py +347 -0
- gitflow_analytics/qualitative/utils/text_processing.py +243 -0
- gitflow_analytics/reports/analytics_writer.py +25 -8
- gitflow_analytics/reports/csv_writer.py +60 -32
- gitflow_analytics/reports/narrative_writer.py +21 -15
- gitflow_analytics/tui/__init__.py +5 -0
- gitflow_analytics/tui/app.py +721 -0
- gitflow_analytics/tui/screens/__init__.py +8 -0
- gitflow_analytics/tui/screens/analysis_progress_screen.py +487 -0
- gitflow_analytics/tui/screens/configuration_screen.py +547 -0
- gitflow_analytics/tui/screens/loading_screen.py +358 -0
- gitflow_analytics/tui/screens/main_screen.py +304 -0
- gitflow_analytics/tui/screens/results_screen.py +698 -0
- gitflow_analytics/tui/widgets/__init__.py +7 -0
- gitflow_analytics/tui/widgets/data_table.py +257 -0
- gitflow_analytics/tui/widgets/export_modal.py +301 -0
- gitflow_analytics/tui/widgets/progress_widget.py +192 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +490 -0
- gitflow_analytics-1.0.3.dist-info/RECORD +62 -0
- gitflow_analytics-1.0.0.dist-info/METADATA +0 -201
- gitflow_analytics-1.0.0.dist-info/RECORD +0 -30
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/top_level.txt +0 -0
|
@@ -1,62 +1,74 @@
|
|
|
1
1
|
"""Git repository analyzer with batch processing support."""
|
|
2
|
-
|
|
2
|
+
|
|
3
|
+
import fnmatch
|
|
4
|
+
from collections.abc import Generator
|
|
3
5
|
from datetime import datetime
|
|
4
|
-
from typing import List, Dict, Any, Optional, Tuple, Generator
|
|
5
6
|
from pathlib import Path
|
|
7
|
+
from typing import Any, Optional
|
|
8
|
+
|
|
6
9
|
import git
|
|
7
10
|
from git import Repo
|
|
8
11
|
from tqdm import tqdm
|
|
9
12
|
|
|
10
|
-
from .cache import GitAnalysisCache
|
|
11
13
|
from ..extractors.story_points import StoryPointExtractor
|
|
12
14
|
from ..extractors.tickets import TicketExtractor
|
|
13
15
|
from .branch_mapper import BranchToProjectMapper
|
|
16
|
+
from .cache import GitAnalysisCache
|
|
14
17
|
|
|
15
18
|
|
|
16
19
|
class GitAnalyzer:
|
|
17
20
|
"""Analyze Git repositories with caching and batch processing."""
|
|
18
|
-
|
|
19
|
-
def __init__(
|
|
20
|
-
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
cache: GitAnalysisCache,
|
|
25
|
+
batch_size: int = 1000,
|
|
26
|
+
branch_mapping_rules: Optional[dict[str, list[str]]] = None,
|
|
27
|
+
allowed_ticket_platforms: Optional[list[str]] = None,
|
|
28
|
+
exclude_paths: Optional[list[str]] = None,
|
|
29
|
+
):
|
|
21
30
|
"""Initialize analyzer with cache."""
|
|
22
31
|
self.cache = cache
|
|
23
32
|
self.batch_size = batch_size
|
|
24
33
|
self.story_point_extractor = StoryPointExtractor()
|
|
25
|
-
self.ticket_extractor = TicketExtractor()
|
|
34
|
+
self.ticket_extractor = TicketExtractor(allowed_platforms=allowed_ticket_platforms)
|
|
26
35
|
self.branch_mapper = BranchToProjectMapper(branch_mapping_rules)
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
36
|
+
self.exclude_paths = exclude_paths or []
|
|
37
|
+
|
|
38
|
+
def analyze_repository(
|
|
39
|
+
self, repo_path: Path, since: datetime, branch: Optional[str] = None
|
|
40
|
+
) -> list[dict[str, Any]]:
|
|
30
41
|
"""Analyze a Git repository with batch processing."""
|
|
31
42
|
try:
|
|
32
43
|
repo = Repo(repo_path)
|
|
33
44
|
except Exception as e:
|
|
34
|
-
raise ValueError(f"Failed to open repository at {repo_path}: {e}")
|
|
35
|
-
|
|
45
|
+
raise ValueError(f"Failed to open repository at {repo_path}: {e}") from e
|
|
46
|
+
|
|
36
47
|
# Get commits to analyze
|
|
37
48
|
commits = self._get_commits(repo, since, branch)
|
|
38
49
|
total_commits = len(commits)
|
|
39
|
-
|
|
50
|
+
|
|
40
51
|
if total_commits == 0:
|
|
41
52
|
return []
|
|
42
|
-
|
|
53
|
+
|
|
43
54
|
analyzed_commits = []
|
|
44
|
-
|
|
55
|
+
|
|
45
56
|
# Process in batches with progress bar
|
|
46
57
|
with tqdm(total=total_commits, desc=f"Analyzing {repo_path.name}") as pbar:
|
|
47
58
|
for batch in self._batch_commits(commits, self.batch_size):
|
|
48
59
|
batch_results = self._process_batch(repo, repo_path, batch)
|
|
49
60
|
analyzed_commits.extend(batch_results)
|
|
50
|
-
|
|
61
|
+
|
|
51
62
|
# Cache the batch
|
|
52
63
|
self.cache.cache_commits_batch(str(repo_path), batch_results)
|
|
53
|
-
|
|
64
|
+
|
|
54
65
|
pbar.update(len(batch))
|
|
55
|
-
|
|
66
|
+
|
|
56
67
|
return analyzed_commits
|
|
57
|
-
|
|
58
|
-
def _get_commits(
|
|
59
|
-
|
|
68
|
+
|
|
69
|
+
def _get_commits(
|
|
70
|
+
self, repo: Repo, since: datetime, branch: Optional[str] = None
|
|
71
|
+
) -> list[git.Commit]:
|
|
60
72
|
"""Get commits from repository."""
|
|
61
73
|
if branch:
|
|
62
74
|
try:
|
|
@@ -68,14 +80,14 @@ class GitAnalyzer:
|
|
|
68
80
|
# Get commits from all branches
|
|
69
81
|
commits = []
|
|
70
82
|
for ref in repo.refs:
|
|
71
|
-
if ref.name.startswith(
|
|
83
|
+
if ref.name.startswith("origin/"):
|
|
72
84
|
continue # Skip remote branches
|
|
73
85
|
try:
|
|
74
86
|
branch_commits = list(repo.iter_commits(ref, since=since))
|
|
75
87
|
commits.extend(branch_commits)
|
|
76
88
|
except git.GitCommandError:
|
|
77
89
|
continue
|
|
78
|
-
|
|
90
|
+
|
|
79
91
|
# Remove duplicates while preserving order
|
|
80
92
|
seen = set()
|
|
81
93
|
unique_commits = []
|
|
@@ -83,93 +95,102 @@ class GitAnalyzer:
|
|
|
83
95
|
if commit.hexsha not in seen:
|
|
84
96
|
seen.add(commit.hexsha)
|
|
85
97
|
unique_commits.append(commit)
|
|
86
|
-
|
|
98
|
+
|
|
87
99
|
commits = unique_commits
|
|
88
|
-
|
|
100
|
+
|
|
89
101
|
# Sort by date
|
|
90
102
|
return sorted(commits, key=lambda c: c.committed_datetime)
|
|
91
|
-
|
|
92
|
-
def _batch_commits(
|
|
93
|
-
|
|
103
|
+
|
|
104
|
+
def _batch_commits(
|
|
105
|
+
self, commits: list[git.Commit], batch_size: int
|
|
106
|
+
) -> Generator[list[git.Commit], None, None]:
|
|
94
107
|
"""Yield batches of commits."""
|
|
95
108
|
for i in range(0, len(commits), batch_size):
|
|
96
|
-
yield commits[i:i + batch_size]
|
|
97
|
-
|
|
98
|
-
def _process_batch(
|
|
99
|
-
|
|
109
|
+
yield commits[i : i + batch_size]
|
|
110
|
+
|
|
111
|
+
def _process_batch(
|
|
112
|
+
self, repo: Repo, repo_path: Path, commits: list[git.Commit]
|
|
113
|
+
) -> list[dict[str, Any]]:
|
|
100
114
|
"""Process a batch of commits."""
|
|
101
115
|
results = []
|
|
102
|
-
|
|
116
|
+
|
|
103
117
|
for commit in commits:
|
|
104
118
|
# Check cache first
|
|
105
119
|
cached = self.cache.get_cached_commit(str(repo_path), commit.hexsha)
|
|
106
120
|
if cached:
|
|
107
121
|
results.append(cached)
|
|
108
122
|
continue
|
|
109
|
-
|
|
123
|
+
|
|
110
124
|
# Analyze commit
|
|
111
125
|
commit_data = self._analyze_commit(repo, commit, repo_path)
|
|
112
126
|
results.append(commit_data)
|
|
113
|
-
|
|
127
|
+
|
|
114
128
|
return results
|
|
115
|
-
|
|
116
|
-
def _analyze_commit(self, repo: Repo, commit: git.Commit, repo_path: Path) ->
|
|
129
|
+
|
|
130
|
+
def _analyze_commit(self, repo: Repo, commit: git.Commit, repo_path: Path) -> dict[str, Any]:
|
|
117
131
|
"""Analyze a single commit."""
|
|
118
132
|
# Basic commit data
|
|
119
133
|
commit_data = {
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
134
|
+
"hash": commit.hexsha,
|
|
135
|
+
"author_name": commit.author.name,
|
|
136
|
+
"author_email": commit.author.email,
|
|
137
|
+
"message": commit.message,
|
|
138
|
+
"timestamp": commit.committed_datetime,
|
|
139
|
+
"is_merge": len(commit.parents) > 1,
|
|
126
140
|
}
|
|
127
|
-
|
|
141
|
+
|
|
128
142
|
# Get branch name
|
|
129
|
-
commit_data[
|
|
130
|
-
|
|
143
|
+
commit_data["branch"] = self._get_commit_branch(repo, commit)
|
|
144
|
+
|
|
131
145
|
# Map branch to project
|
|
132
|
-
commit_data[
|
|
133
|
-
commit_data[
|
|
146
|
+
commit_data["inferred_project"] = self.branch_mapper.map_branch_to_project(
|
|
147
|
+
str(commit_data["branch"]), repo_path
|
|
134
148
|
)
|
|
135
|
-
|
|
136
|
-
# Calculate metrics
|
|
149
|
+
|
|
150
|
+
# Calculate metrics - use raw stats for backward compatibility
|
|
137
151
|
stats = commit.stats.total
|
|
138
|
-
commit_data[
|
|
139
|
-
commit_data[
|
|
140
|
-
commit_data[
|
|
141
|
-
|
|
152
|
+
commit_data["files_changed"] = int(stats.get("files", 0)) if hasattr(stats, "get") else 0
|
|
153
|
+
commit_data["insertions"] = int(stats.get("insertions", 0)) if hasattr(stats, "get") else 0
|
|
154
|
+
commit_data["deletions"] = int(stats.get("deletions", 0)) if hasattr(stats, "get") else 0
|
|
155
|
+
|
|
156
|
+
# Calculate filtered metrics (excluding boilerplate/generated files)
|
|
157
|
+
filtered_stats = self._calculate_filtered_stats(commit)
|
|
158
|
+
commit_data["filtered_files_changed"] = filtered_stats["files"]
|
|
159
|
+
commit_data["filtered_insertions"] = filtered_stats["insertions"]
|
|
160
|
+
commit_data["filtered_deletions"] = filtered_stats["deletions"]
|
|
161
|
+
|
|
142
162
|
# Extract story points
|
|
143
|
-
|
|
163
|
+
message_str = (
|
|
144
164
|
commit.message
|
|
165
|
+
if isinstance(commit.message, str)
|
|
166
|
+
else commit.message.decode("utf-8", errors="ignore")
|
|
145
167
|
)
|
|
146
|
-
|
|
168
|
+
commit_data["story_points"] = self.story_point_extractor.extract_from_text(message_str)
|
|
169
|
+
|
|
147
170
|
# Extract ticket references
|
|
148
|
-
commit_data[
|
|
149
|
-
|
|
150
|
-
)
|
|
151
|
-
|
|
171
|
+
commit_data["ticket_references"] = self.ticket_extractor.extract_from_text(message_str)
|
|
172
|
+
|
|
152
173
|
# Calculate complexity delta
|
|
153
|
-
commit_data[
|
|
154
|
-
|
|
174
|
+
commit_data["complexity_delta"] = self._calculate_complexity_delta(commit)
|
|
175
|
+
|
|
155
176
|
return commit_data
|
|
156
|
-
|
|
177
|
+
|
|
157
178
|
def _get_commit_branch(self, repo: Repo, commit: git.Commit) -> str:
|
|
158
179
|
"""Get the branch name for a commit."""
|
|
159
180
|
# This is a simplified approach - getting the first branch that contains the commit
|
|
160
181
|
for branch in repo.branches:
|
|
161
182
|
if commit in repo.iter_commits(branch):
|
|
162
183
|
return branch.name
|
|
163
|
-
return
|
|
164
|
-
|
|
184
|
+
return "unknown"
|
|
185
|
+
|
|
165
186
|
def _calculate_complexity_delta(self, commit: git.Commit) -> float:
|
|
166
187
|
"""Calculate complexity change for a commit."""
|
|
167
188
|
total_delta = 0.0
|
|
168
|
-
|
|
189
|
+
|
|
169
190
|
for diff in commit.diff(commit.parents[0] if commit.parents else None):
|
|
170
|
-
if not self._is_code_file(diff.b_path or diff.a_path or
|
|
191
|
+
if not self._is_code_file(diff.b_path or diff.a_path or ""):
|
|
171
192
|
continue
|
|
172
|
-
|
|
193
|
+
|
|
173
194
|
# Simple complexity estimation based on diff size
|
|
174
195
|
# In a real implementation, you'd parse the code and calculate cyclomatic complexity
|
|
175
196
|
if diff.new_file:
|
|
@@ -178,18 +199,94 @@ class GitAnalyzer:
|
|
|
178
199
|
total_delta -= diff.a_blob.size / 100 if diff.a_blob else 0
|
|
179
200
|
else:
|
|
180
201
|
# Modified file - estimate based on change size
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
202
|
+
if diff.diff:
|
|
203
|
+
diff_content = (
|
|
204
|
+
diff.diff
|
|
205
|
+
if isinstance(diff.diff, str)
|
|
206
|
+
else diff.diff.decode("utf-8", errors="ignore")
|
|
207
|
+
)
|
|
208
|
+
added = len(diff_content.split("\n+"))
|
|
209
|
+
removed = len(diff_content.split("\n-"))
|
|
210
|
+
total_delta += (added - removed) / 10
|
|
211
|
+
|
|
185
212
|
return total_delta
|
|
186
|
-
|
|
213
|
+
|
|
187
214
|
def _is_code_file(self, filepath: str) -> bool:
|
|
188
215
|
"""Check if file is a code file."""
|
|
189
216
|
code_extensions = {
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
217
|
+
".py",
|
|
218
|
+
".js",
|
|
219
|
+
".ts",
|
|
220
|
+
".java",
|
|
221
|
+
".cpp",
|
|
222
|
+
".c",
|
|
223
|
+
".h",
|
|
224
|
+
".hpp",
|
|
225
|
+
".go",
|
|
226
|
+
".rs",
|
|
227
|
+
".rb",
|
|
228
|
+
".php",
|
|
229
|
+
".swift",
|
|
230
|
+
".kt",
|
|
231
|
+
".scala",
|
|
232
|
+
".cs",
|
|
233
|
+
".vb",
|
|
234
|
+
".r",
|
|
235
|
+
".m",
|
|
236
|
+
".mm",
|
|
237
|
+
".f90",
|
|
238
|
+
".f95",
|
|
239
|
+
".lua",
|
|
193
240
|
}
|
|
194
|
-
|
|
195
|
-
return any(filepath.endswith(ext) for ext in code_extensions)
|
|
241
|
+
|
|
242
|
+
return any(filepath.endswith(ext) for ext in code_extensions)
|
|
243
|
+
|
|
244
|
+
def _should_exclude_file(self, filepath: str) -> bool:
|
|
245
|
+
"""Check if file should be excluded from line counting."""
|
|
246
|
+
if not filepath:
|
|
247
|
+
return False
|
|
248
|
+
|
|
249
|
+
# Normalize path separators for consistent matching
|
|
250
|
+
filepath = filepath.replace("\\", "/")
|
|
251
|
+
|
|
252
|
+
# Check against exclude patterns
|
|
253
|
+
return any(fnmatch.fnmatch(filepath, pattern) for pattern in self.exclude_paths)
|
|
254
|
+
|
|
255
|
+
def _calculate_filtered_stats(self, commit: git.Commit) -> dict[str, int]:
|
|
256
|
+
"""Calculate commit statistics excluding boilerplate/generated files."""
|
|
257
|
+
filtered_stats = {"files": 0, "insertions": 0, "deletions": 0}
|
|
258
|
+
|
|
259
|
+
# For initial commits or commits without parents
|
|
260
|
+
parent = commit.parents[0] if commit.parents else None
|
|
261
|
+
|
|
262
|
+
try:
|
|
263
|
+
for diff in commit.diff(parent):
|
|
264
|
+
# Get file path
|
|
265
|
+
file_path = diff.b_path if diff.b_path else diff.a_path
|
|
266
|
+
if not file_path:
|
|
267
|
+
continue
|
|
268
|
+
|
|
269
|
+
# Skip excluded files
|
|
270
|
+
if self._should_exclude_file(file_path):
|
|
271
|
+
continue
|
|
272
|
+
|
|
273
|
+
# Count the file
|
|
274
|
+
filtered_stats["files"] += 1
|
|
275
|
+
|
|
276
|
+
# Count insertions and deletions
|
|
277
|
+
if diff.diff:
|
|
278
|
+
diff_text = (
|
|
279
|
+
diff.diff
|
|
280
|
+
if isinstance(diff.diff, str)
|
|
281
|
+
else diff.diff.decode("utf-8", errors="ignore")
|
|
282
|
+
)
|
|
283
|
+
for line in diff_text.split("\n"):
|
|
284
|
+
if line.startswith("+") and not line.startswith("+++"):
|
|
285
|
+
filtered_stats["insertions"] += 1
|
|
286
|
+
elif line.startswith("-") and not line.startswith("---"):
|
|
287
|
+
filtered_stats["deletions"] += 1
|
|
288
|
+
except Exception:
|
|
289
|
+
# If we can't calculate filtered stats, return zeros
|
|
290
|
+
pass
|
|
291
|
+
|
|
292
|
+
return filtered_stats
|