gitflow-analytics 1.0.0__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. gitflow_analytics/__init__.py +11 -9
  2. gitflow_analytics/_version.py +2 -2
  3. gitflow_analytics/cli.py +691 -243
  4. gitflow_analytics/cli_rich.py +353 -0
  5. gitflow_analytics/config.py +389 -96
  6. gitflow_analytics/core/analyzer.py +175 -78
  7. gitflow_analytics/core/branch_mapper.py +132 -132
  8. gitflow_analytics/core/cache.py +242 -173
  9. gitflow_analytics/core/identity.py +214 -178
  10. gitflow_analytics/extractors/base.py +13 -11
  11. gitflow_analytics/extractors/story_points.py +70 -59
  12. gitflow_analytics/extractors/tickets.py +111 -88
  13. gitflow_analytics/integrations/github_integration.py +91 -77
  14. gitflow_analytics/integrations/jira_integration.py +284 -0
  15. gitflow_analytics/integrations/orchestrator.py +99 -72
  16. gitflow_analytics/metrics/dora.py +183 -179
  17. gitflow_analytics/models/database.py +191 -54
  18. gitflow_analytics/qualitative/__init__.py +30 -0
  19. gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
  20. gitflow_analytics/qualitative/classifiers/change_type.py +468 -0
  21. gitflow_analytics/qualitative/classifiers/domain_classifier.py +399 -0
  22. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +436 -0
  23. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +412 -0
  24. gitflow_analytics/qualitative/core/__init__.py +13 -0
  25. gitflow_analytics/qualitative/core/llm_fallback.py +653 -0
  26. gitflow_analytics/qualitative/core/nlp_engine.py +373 -0
  27. gitflow_analytics/qualitative/core/pattern_cache.py +457 -0
  28. gitflow_analytics/qualitative/core/processor.py +540 -0
  29. gitflow_analytics/qualitative/models/__init__.py +25 -0
  30. gitflow_analytics/qualitative/models/schemas.py +272 -0
  31. gitflow_analytics/qualitative/utils/__init__.py +13 -0
  32. gitflow_analytics/qualitative/utils/batch_processor.py +326 -0
  33. gitflow_analytics/qualitative/utils/cost_tracker.py +343 -0
  34. gitflow_analytics/qualitative/utils/metrics.py +347 -0
  35. gitflow_analytics/qualitative/utils/text_processing.py +243 -0
  36. gitflow_analytics/reports/analytics_writer.py +25 -8
  37. gitflow_analytics/reports/csv_writer.py +60 -32
  38. gitflow_analytics/reports/narrative_writer.py +21 -15
  39. gitflow_analytics/tui/__init__.py +5 -0
  40. gitflow_analytics/tui/app.py +721 -0
  41. gitflow_analytics/tui/screens/__init__.py +8 -0
  42. gitflow_analytics/tui/screens/analysis_progress_screen.py +487 -0
  43. gitflow_analytics/tui/screens/configuration_screen.py +547 -0
  44. gitflow_analytics/tui/screens/loading_screen.py +358 -0
  45. gitflow_analytics/tui/screens/main_screen.py +304 -0
  46. gitflow_analytics/tui/screens/results_screen.py +698 -0
  47. gitflow_analytics/tui/widgets/__init__.py +7 -0
  48. gitflow_analytics/tui/widgets/data_table.py +257 -0
  49. gitflow_analytics/tui/widgets/export_modal.py +301 -0
  50. gitflow_analytics/tui/widgets/progress_widget.py +192 -0
  51. gitflow_analytics-1.0.3.dist-info/METADATA +490 -0
  52. gitflow_analytics-1.0.3.dist-info/RECORD +62 -0
  53. gitflow_analytics-1.0.0.dist-info/METADATA +0 -201
  54. gitflow_analytics-1.0.0.dist-info/RECORD +0 -30
  55. {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/WHEEL +0 -0
  56. {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/entry_points.txt +0 -0
  57. {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/licenses/LICENSE +0 -0
  58. {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/top_level.txt +0 -0
@@ -1,62 +1,74 @@
1
1
  """Git repository analyzer with batch processing support."""
2
- import re
2
+
3
+ import fnmatch
4
+ from collections.abc import Generator
3
5
  from datetime import datetime
4
- from typing import List, Dict, Any, Optional, Tuple, Generator
5
6
  from pathlib import Path
7
+ from typing import Any, Optional
8
+
6
9
  import git
7
10
  from git import Repo
8
11
  from tqdm import tqdm
9
12
 
10
- from .cache import GitAnalysisCache
11
13
  from ..extractors.story_points import StoryPointExtractor
12
14
  from ..extractors.tickets import TicketExtractor
13
15
  from .branch_mapper import BranchToProjectMapper
16
+ from .cache import GitAnalysisCache
14
17
 
15
18
 
16
19
  class GitAnalyzer:
17
20
  """Analyze Git repositories with caching and batch processing."""
18
-
19
- def __init__(self, cache: GitAnalysisCache, batch_size: int = 1000,
20
- branch_mapping_rules: Optional[Dict[str, List[str]]] = None):
21
+
22
+ def __init__(
23
+ self,
24
+ cache: GitAnalysisCache,
25
+ batch_size: int = 1000,
26
+ branch_mapping_rules: Optional[dict[str, list[str]]] = None,
27
+ allowed_ticket_platforms: Optional[list[str]] = None,
28
+ exclude_paths: Optional[list[str]] = None,
29
+ ):
21
30
  """Initialize analyzer with cache."""
22
31
  self.cache = cache
23
32
  self.batch_size = batch_size
24
33
  self.story_point_extractor = StoryPointExtractor()
25
- self.ticket_extractor = TicketExtractor()
34
+ self.ticket_extractor = TicketExtractor(allowed_platforms=allowed_ticket_platforms)
26
35
  self.branch_mapper = BranchToProjectMapper(branch_mapping_rules)
27
-
28
- def analyze_repository(self, repo_path: Path, since: datetime,
29
- branch: Optional[str] = None) -> List[Dict[str, Any]]:
36
+ self.exclude_paths = exclude_paths or []
37
+
38
+ def analyze_repository(
39
+ self, repo_path: Path, since: datetime, branch: Optional[str] = None
40
+ ) -> list[dict[str, Any]]:
30
41
  """Analyze a Git repository with batch processing."""
31
42
  try:
32
43
  repo = Repo(repo_path)
33
44
  except Exception as e:
34
- raise ValueError(f"Failed to open repository at {repo_path}: {e}")
35
-
45
+ raise ValueError(f"Failed to open repository at {repo_path}: {e}") from e
46
+
36
47
  # Get commits to analyze
37
48
  commits = self._get_commits(repo, since, branch)
38
49
  total_commits = len(commits)
39
-
50
+
40
51
  if total_commits == 0:
41
52
  return []
42
-
53
+
43
54
  analyzed_commits = []
44
-
55
+
45
56
  # Process in batches with progress bar
46
57
  with tqdm(total=total_commits, desc=f"Analyzing {repo_path.name}") as pbar:
47
58
  for batch in self._batch_commits(commits, self.batch_size):
48
59
  batch_results = self._process_batch(repo, repo_path, batch)
49
60
  analyzed_commits.extend(batch_results)
50
-
61
+
51
62
  # Cache the batch
52
63
  self.cache.cache_commits_batch(str(repo_path), batch_results)
53
-
64
+
54
65
  pbar.update(len(batch))
55
-
66
+
56
67
  return analyzed_commits
57
-
58
- def _get_commits(self, repo: Repo, since: datetime,
59
- branch: Optional[str] = None) -> List[git.Commit]:
68
+
69
+ def _get_commits(
70
+ self, repo: Repo, since: datetime, branch: Optional[str] = None
71
+ ) -> list[git.Commit]:
60
72
  """Get commits from repository."""
61
73
  if branch:
62
74
  try:
@@ -68,14 +80,14 @@ class GitAnalyzer:
68
80
  # Get commits from all branches
69
81
  commits = []
70
82
  for ref in repo.refs:
71
- if ref.name.startswith('origin/'):
83
+ if ref.name.startswith("origin/"):
72
84
  continue # Skip remote branches
73
85
  try:
74
86
  branch_commits = list(repo.iter_commits(ref, since=since))
75
87
  commits.extend(branch_commits)
76
88
  except git.GitCommandError:
77
89
  continue
78
-
90
+
79
91
  # Remove duplicates while preserving order
80
92
  seen = set()
81
93
  unique_commits = []
@@ -83,93 +95,102 @@ class GitAnalyzer:
83
95
  if commit.hexsha not in seen:
84
96
  seen.add(commit.hexsha)
85
97
  unique_commits.append(commit)
86
-
98
+
87
99
  commits = unique_commits
88
-
100
+
89
101
  # Sort by date
90
102
  return sorted(commits, key=lambda c: c.committed_datetime)
91
-
92
- def _batch_commits(self, commits: List[git.Commit],
93
- batch_size: int) -> Generator[List[git.Commit], None, None]:
103
+
104
+ def _batch_commits(
105
+ self, commits: list[git.Commit], batch_size: int
106
+ ) -> Generator[list[git.Commit], None, None]:
94
107
  """Yield batches of commits."""
95
108
  for i in range(0, len(commits), batch_size):
96
- yield commits[i:i + batch_size]
97
-
98
- def _process_batch(self, repo: Repo, repo_path: Path,
99
- commits: List[git.Commit]) -> List[Dict[str, Any]]:
109
+ yield commits[i : i + batch_size]
110
+
111
+ def _process_batch(
112
+ self, repo: Repo, repo_path: Path, commits: list[git.Commit]
113
+ ) -> list[dict[str, Any]]:
100
114
  """Process a batch of commits."""
101
115
  results = []
102
-
116
+
103
117
  for commit in commits:
104
118
  # Check cache first
105
119
  cached = self.cache.get_cached_commit(str(repo_path), commit.hexsha)
106
120
  if cached:
107
121
  results.append(cached)
108
122
  continue
109
-
123
+
110
124
  # Analyze commit
111
125
  commit_data = self._analyze_commit(repo, commit, repo_path)
112
126
  results.append(commit_data)
113
-
127
+
114
128
  return results
115
-
116
- def _analyze_commit(self, repo: Repo, commit: git.Commit, repo_path: Path) -> Dict[str, Any]:
129
+
130
+ def _analyze_commit(self, repo: Repo, commit: git.Commit, repo_path: Path) -> dict[str, Any]:
117
131
  """Analyze a single commit."""
118
132
  # Basic commit data
119
133
  commit_data = {
120
- 'hash': commit.hexsha,
121
- 'author_name': commit.author.name,
122
- 'author_email': commit.author.email,
123
- 'message': commit.message,
124
- 'timestamp': commit.committed_datetime,
125
- 'is_merge': len(commit.parents) > 1
134
+ "hash": commit.hexsha,
135
+ "author_name": commit.author.name,
136
+ "author_email": commit.author.email,
137
+ "message": commit.message,
138
+ "timestamp": commit.committed_datetime,
139
+ "is_merge": len(commit.parents) > 1,
126
140
  }
127
-
141
+
128
142
  # Get branch name
129
- commit_data['branch'] = self._get_commit_branch(repo, commit)
130
-
143
+ commit_data["branch"] = self._get_commit_branch(repo, commit)
144
+
131
145
  # Map branch to project
132
- commit_data['inferred_project'] = self.branch_mapper.map_branch_to_project(
133
- commit_data['branch'], repo_path
146
+ commit_data["inferred_project"] = self.branch_mapper.map_branch_to_project(
147
+ str(commit_data["branch"]), repo_path
134
148
  )
135
-
136
- # Calculate metrics
149
+
150
+ # Calculate metrics - use raw stats for backward compatibility
137
151
  stats = commit.stats.total
138
- commit_data['files_changed'] = stats.get('files', 0)
139
- commit_data['insertions'] = stats.get('insertions', 0)
140
- commit_data['deletions'] = stats.get('deletions', 0)
141
-
152
+ commit_data["files_changed"] = int(stats.get("files", 0)) if hasattr(stats, "get") else 0
153
+ commit_data["insertions"] = int(stats.get("insertions", 0)) if hasattr(stats, "get") else 0
154
+ commit_data["deletions"] = int(stats.get("deletions", 0)) if hasattr(stats, "get") else 0
155
+
156
+ # Calculate filtered metrics (excluding boilerplate/generated files)
157
+ filtered_stats = self._calculate_filtered_stats(commit)
158
+ commit_data["filtered_files_changed"] = filtered_stats["files"]
159
+ commit_data["filtered_insertions"] = filtered_stats["insertions"]
160
+ commit_data["filtered_deletions"] = filtered_stats["deletions"]
161
+
142
162
  # Extract story points
143
- commit_data['story_points'] = self.story_point_extractor.extract_from_text(
163
+ message_str = (
144
164
  commit.message
165
+ if isinstance(commit.message, str)
166
+ else commit.message.decode("utf-8", errors="ignore")
145
167
  )
146
-
168
+ commit_data["story_points"] = self.story_point_extractor.extract_from_text(message_str)
169
+
147
170
  # Extract ticket references
148
- commit_data['ticket_references'] = self.ticket_extractor.extract_from_text(
149
- commit.message
150
- )
151
-
171
+ commit_data["ticket_references"] = self.ticket_extractor.extract_from_text(message_str)
172
+
152
173
  # Calculate complexity delta
153
- commit_data['complexity_delta'] = self._calculate_complexity_delta(commit)
154
-
174
+ commit_data["complexity_delta"] = self._calculate_complexity_delta(commit)
175
+
155
176
  return commit_data
156
-
177
+
157
178
  def _get_commit_branch(self, repo: Repo, commit: git.Commit) -> str:
158
179
  """Get the branch name for a commit."""
159
180
  # This is a simplified approach - getting the first branch that contains the commit
160
181
  for branch in repo.branches:
161
182
  if commit in repo.iter_commits(branch):
162
183
  return branch.name
163
- return 'unknown'
164
-
184
+ return "unknown"
185
+
165
186
  def _calculate_complexity_delta(self, commit: git.Commit) -> float:
166
187
  """Calculate complexity change for a commit."""
167
188
  total_delta = 0.0
168
-
189
+
169
190
  for diff in commit.diff(commit.parents[0] if commit.parents else None):
170
- if not self._is_code_file(diff.b_path or diff.a_path or ''):
191
+ if not self._is_code_file(diff.b_path or diff.a_path or ""):
171
192
  continue
172
-
193
+
173
194
  # Simple complexity estimation based on diff size
174
195
  # In a real implementation, you'd parse the code and calculate cyclomatic complexity
175
196
  if diff.new_file:
@@ -178,18 +199,94 @@ class GitAnalyzer:
178
199
  total_delta -= diff.a_blob.size / 100 if diff.a_blob else 0
179
200
  else:
180
201
  # Modified file - estimate based on change size
181
- added = len(diff.diff.decode('utf-8', errors='ignore').split('\n+')) if diff.diff else 0
182
- removed = len(diff.diff.decode('utf-8', errors='ignore').split('\n-')) if diff.diff else 0
183
- total_delta += (added - removed) / 10
184
-
202
+ if diff.diff:
203
+ diff_content = (
204
+ diff.diff
205
+ if isinstance(diff.diff, str)
206
+ else diff.diff.decode("utf-8", errors="ignore")
207
+ )
208
+ added = len(diff_content.split("\n+"))
209
+ removed = len(diff_content.split("\n-"))
210
+ total_delta += (added - removed) / 10
211
+
185
212
  return total_delta
186
-
213
+
187
214
  def _is_code_file(self, filepath: str) -> bool:
188
215
  """Check if file is a code file."""
189
216
  code_extensions = {
190
- '.py', '.js', '.ts', '.java', '.cpp', '.c', '.h', '.hpp',
191
- '.go', '.rs', '.rb', '.php', '.swift', '.kt', '.scala',
192
- '.cs', '.vb', '.r', '.m', '.mm', '.f90', '.f95', '.lua'
217
+ ".py",
218
+ ".js",
219
+ ".ts",
220
+ ".java",
221
+ ".cpp",
222
+ ".c",
223
+ ".h",
224
+ ".hpp",
225
+ ".go",
226
+ ".rs",
227
+ ".rb",
228
+ ".php",
229
+ ".swift",
230
+ ".kt",
231
+ ".scala",
232
+ ".cs",
233
+ ".vb",
234
+ ".r",
235
+ ".m",
236
+ ".mm",
237
+ ".f90",
238
+ ".f95",
239
+ ".lua",
193
240
  }
194
-
195
- return any(filepath.endswith(ext) for ext in code_extensions)
241
+
242
+ return any(filepath.endswith(ext) for ext in code_extensions)
243
+
244
+ def _should_exclude_file(self, filepath: str) -> bool:
245
+ """Check if file should be excluded from line counting."""
246
+ if not filepath:
247
+ return False
248
+
249
+ # Normalize path separators for consistent matching
250
+ filepath = filepath.replace("\\", "/")
251
+
252
+ # Check against exclude patterns
253
+ return any(fnmatch.fnmatch(filepath, pattern) for pattern in self.exclude_paths)
254
+
255
+ def _calculate_filtered_stats(self, commit: git.Commit) -> dict[str, int]:
256
+ """Calculate commit statistics excluding boilerplate/generated files."""
257
+ filtered_stats = {"files": 0, "insertions": 0, "deletions": 0}
258
+
259
+ # For initial commits or commits without parents
260
+ parent = commit.parents[0] if commit.parents else None
261
+
262
+ try:
263
+ for diff in commit.diff(parent):
264
+ # Get file path
265
+ file_path = diff.b_path if diff.b_path else diff.a_path
266
+ if not file_path:
267
+ continue
268
+
269
+ # Skip excluded files
270
+ if self._should_exclude_file(file_path):
271
+ continue
272
+
273
+ # Count the file
274
+ filtered_stats["files"] += 1
275
+
276
+ # Count insertions and deletions
277
+ if diff.diff:
278
+ diff_text = (
279
+ diff.diff
280
+ if isinstance(diff.diff, str)
281
+ else diff.diff.decode("utf-8", errors="ignore")
282
+ )
283
+ for line in diff_text.split("\n"):
284
+ if line.startswith("+") and not line.startswith("+++"):
285
+ filtered_stats["insertions"] += 1
286
+ elif line.startswith("-") and not line.startswith("---"):
287
+ filtered_stats["deletions"] += 1
288
+ except Exception:
289
+ # If we can't calculate filtered stats, return zeros
290
+ pass
291
+
292
+ return filtered_stats