gitflow-analytics 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. gitflow_analytics/__init__.py +11 -11
  2. gitflow_analytics/_version.py +2 -2
  3. gitflow_analytics/cli.py +612 -258
  4. gitflow_analytics/cli_rich.py +353 -0
  5. gitflow_analytics/config.py +251 -141
  6. gitflow_analytics/core/analyzer.py +140 -103
  7. gitflow_analytics/core/branch_mapper.py +132 -132
  8. gitflow_analytics/core/cache.py +240 -169
  9. gitflow_analytics/core/identity.py +210 -173
  10. gitflow_analytics/extractors/base.py +13 -11
  11. gitflow_analytics/extractors/story_points.py +70 -59
  12. gitflow_analytics/extractors/tickets.py +101 -87
  13. gitflow_analytics/integrations/github_integration.py +84 -77
  14. gitflow_analytics/integrations/jira_integration.py +116 -104
  15. gitflow_analytics/integrations/orchestrator.py +86 -85
  16. gitflow_analytics/metrics/dora.py +181 -177
  17. gitflow_analytics/models/database.py +190 -53
  18. gitflow_analytics/qualitative/__init__.py +30 -0
  19. gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
  20. gitflow_analytics/qualitative/classifiers/change_type.py +468 -0
  21. gitflow_analytics/qualitative/classifiers/domain_classifier.py +399 -0
  22. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +436 -0
  23. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +412 -0
  24. gitflow_analytics/qualitative/core/__init__.py +13 -0
  25. gitflow_analytics/qualitative/core/llm_fallback.py +653 -0
  26. gitflow_analytics/qualitative/core/nlp_engine.py +373 -0
  27. gitflow_analytics/qualitative/core/pattern_cache.py +457 -0
  28. gitflow_analytics/qualitative/core/processor.py +540 -0
  29. gitflow_analytics/qualitative/models/__init__.py +25 -0
  30. gitflow_analytics/qualitative/models/schemas.py +272 -0
  31. gitflow_analytics/qualitative/utils/__init__.py +13 -0
  32. gitflow_analytics/qualitative/utils/batch_processor.py +326 -0
  33. gitflow_analytics/qualitative/utils/cost_tracker.py +343 -0
  34. gitflow_analytics/qualitative/utils/metrics.py +347 -0
  35. gitflow_analytics/qualitative/utils/text_processing.py +243 -0
  36. gitflow_analytics/reports/analytics_writer.py +11 -4
  37. gitflow_analytics/reports/csv_writer.py +51 -31
  38. gitflow_analytics/reports/narrative_writer.py +16 -14
  39. gitflow_analytics/tui/__init__.py +5 -0
  40. gitflow_analytics/tui/app.py +721 -0
  41. gitflow_analytics/tui/screens/__init__.py +8 -0
  42. gitflow_analytics/tui/screens/analysis_progress_screen.py +487 -0
  43. gitflow_analytics/tui/screens/configuration_screen.py +547 -0
  44. gitflow_analytics/tui/screens/loading_screen.py +358 -0
  45. gitflow_analytics/tui/screens/main_screen.py +304 -0
  46. gitflow_analytics/tui/screens/results_screen.py +698 -0
  47. gitflow_analytics/tui/widgets/__init__.py +7 -0
  48. gitflow_analytics/tui/widgets/data_table.py +257 -0
  49. gitflow_analytics/tui/widgets/export_modal.py +301 -0
  50. gitflow_analytics/tui/widgets/progress_widget.py +192 -0
  51. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/METADATA +31 -4
  52. gitflow_analytics-1.0.3.dist-info/RECORD +62 -0
  53. gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
  54. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/WHEEL +0 -0
  55. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/entry_points.txt +0 -0
  56. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/licenses/LICENSE +0 -0
  57. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.0.3.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,10 @@
1
1
  """Git repository analyzer with batch processing support."""
2
+
2
3
  import fnmatch
4
+ from collections.abc import Generator
3
5
  from datetime import datetime
4
6
  from pathlib import Path
5
- from typing import Any, Dict, Generator, List, Optional
7
+ from typing import Any, Optional
6
8
 
7
9
  import git
8
10
  from git import Repo
@@ -16,11 +18,15 @@ from .cache import GitAnalysisCache
16
18
 
17
19
  class GitAnalyzer:
18
20
  """Analyze Git repositories with caching and batch processing."""
19
-
20
- def __init__(self, cache: GitAnalysisCache, batch_size: int = 1000,
21
- branch_mapping_rules: Optional[Dict[str, List[str]]] = None,
22
- allowed_ticket_platforms: Optional[List[str]] = None,
23
- exclude_paths: Optional[List[str]] = None):
21
+
22
+ def __init__(
23
+ self,
24
+ cache: GitAnalysisCache,
25
+ batch_size: int = 1000,
26
+ branch_mapping_rules: Optional[dict[str, list[str]]] = None,
27
+ allowed_ticket_platforms: Optional[list[str]] = None,
28
+ exclude_paths: Optional[list[str]] = None,
29
+ ):
24
30
  """Initialize analyzer with cache."""
25
31
  self.cache = cache
26
32
  self.batch_size = batch_size
@@ -28,39 +34,41 @@ class GitAnalyzer:
28
34
  self.ticket_extractor = TicketExtractor(allowed_platforms=allowed_ticket_platforms)
29
35
  self.branch_mapper = BranchToProjectMapper(branch_mapping_rules)
30
36
  self.exclude_paths = exclude_paths or []
31
-
32
- def analyze_repository(self, repo_path: Path, since: datetime,
33
- branch: Optional[str] = None) -> List[Dict[str, Any]]:
37
+
38
+ def analyze_repository(
39
+ self, repo_path: Path, since: datetime, branch: Optional[str] = None
40
+ ) -> list[dict[str, Any]]:
34
41
  """Analyze a Git repository with batch processing."""
35
42
  try:
36
43
  repo = Repo(repo_path)
37
44
  except Exception as e:
38
45
  raise ValueError(f"Failed to open repository at {repo_path}: {e}") from e
39
-
46
+
40
47
  # Get commits to analyze
41
48
  commits = self._get_commits(repo, since, branch)
42
49
  total_commits = len(commits)
43
-
50
+
44
51
  if total_commits == 0:
45
52
  return []
46
-
53
+
47
54
  analyzed_commits = []
48
-
55
+
49
56
  # Process in batches with progress bar
50
57
  with tqdm(total=total_commits, desc=f"Analyzing {repo_path.name}") as pbar:
51
58
  for batch in self._batch_commits(commits, self.batch_size):
52
59
  batch_results = self._process_batch(repo, repo_path, batch)
53
60
  analyzed_commits.extend(batch_results)
54
-
61
+
55
62
  # Cache the batch
56
63
  self.cache.cache_commits_batch(str(repo_path), batch_results)
57
-
64
+
58
65
  pbar.update(len(batch))
59
-
66
+
60
67
  return analyzed_commits
61
-
62
- def _get_commits(self, repo: Repo, since: datetime,
63
- branch: Optional[str] = None) -> List[git.Commit]:
68
+
69
+ def _get_commits(
70
+ self, repo: Repo, since: datetime, branch: Optional[str] = None
71
+ ) -> list[git.Commit]:
64
72
  """Get commits from repository."""
65
73
  if branch:
66
74
  try:
@@ -72,14 +80,14 @@ class GitAnalyzer:
72
80
  # Get commits from all branches
73
81
  commits = []
74
82
  for ref in repo.refs:
75
- if ref.name.startswith('origin/'):
83
+ if ref.name.startswith("origin/"):
76
84
  continue # Skip remote branches
77
85
  try:
78
86
  branch_commits = list(repo.iter_commits(ref, since=since))
79
87
  commits.extend(branch_commits)
80
88
  except git.GitCommandError:
81
89
  continue
82
-
90
+
83
91
  # Remove duplicates while preserving order
84
92
  seen = set()
85
93
  unique_commits = []
@@ -87,99 +95,102 @@ class GitAnalyzer:
87
95
  if commit.hexsha not in seen:
88
96
  seen.add(commit.hexsha)
89
97
  unique_commits.append(commit)
90
-
98
+
91
99
  commits = unique_commits
92
-
100
+
93
101
  # Sort by date
94
102
  return sorted(commits, key=lambda c: c.committed_datetime)
95
-
96
- def _batch_commits(self, commits: List[git.Commit],
97
- batch_size: int) -> Generator[List[git.Commit], None, None]:
103
+
104
+ def _batch_commits(
105
+ self, commits: list[git.Commit], batch_size: int
106
+ ) -> Generator[list[git.Commit], None, None]:
98
107
  """Yield batches of commits."""
99
108
  for i in range(0, len(commits), batch_size):
100
- yield commits[i:i + batch_size]
101
-
102
- def _process_batch(self, repo: Repo, repo_path: Path,
103
- commits: List[git.Commit]) -> List[Dict[str, Any]]:
109
+ yield commits[i : i + batch_size]
110
+
111
+ def _process_batch(
112
+ self, repo: Repo, repo_path: Path, commits: list[git.Commit]
113
+ ) -> list[dict[str, Any]]:
104
114
  """Process a batch of commits."""
105
115
  results = []
106
-
116
+
107
117
  for commit in commits:
108
118
  # Check cache first
109
119
  cached = self.cache.get_cached_commit(str(repo_path), commit.hexsha)
110
120
  if cached:
111
121
  results.append(cached)
112
122
  continue
113
-
123
+
114
124
  # Analyze commit
115
125
  commit_data = self._analyze_commit(repo, commit, repo_path)
116
126
  results.append(commit_data)
117
-
127
+
118
128
  return results
119
-
120
- def _analyze_commit(self, repo: Repo, commit: git.Commit, repo_path: Path) -> Dict[str, Any]:
129
+
130
+ def _analyze_commit(self, repo: Repo, commit: git.Commit, repo_path: Path) -> dict[str, Any]:
121
131
  """Analyze a single commit."""
122
132
  # Basic commit data
123
133
  commit_data = {
124
- 'hash': commit.hexsha,
125
- 'author_name': commit.author.name,
126
- 'author_email': commit.author.email,
127
- 'message': commit.message,
128
- 'timestamp': commit.committed_datetime,
129
- 'is_merge': len(commit.parents) > 1
134
+ "hash": commit.hexsha,
135
+ "author_name": commit.author.name,
136
+ "author_email": commit.author.email,
137
+ "message": commit.message,
138
+ "timestamp": commit.committed_datetime,
139
+ "is_merge": len(commit.parents) > 1,
130
140
  }
131
-
141
+
132
142
  # Get branch name
133
- commit_data['branch'] = self._get_commit_branch(repo, commit)
134
-
143
+ commit_data["branch"] = self._get_commit_branch(repo, commit)
144
+
135
145
  # Map branch to project
136
- commit_data['inferred_project'] = self.branch_mapper.map_branch_to_project(
137
- commit_data['branch'], repo_path
146
+ commit_data["inferred_project"] = self.branch_mapper.map_branch_to_project(
147
+ str(commit_data["branch"]), repo_path
138
148
  )
139
-
149
+
140
150
  # Calculate metrics - use raw stats for backward compatibility
141
151
  stats = commit.stats.total
142
- commit_data['files_changed'] = stats.get('files', 0)
143
- commit_data['insertions'] = stats.get('insertions', 0)
144
- commit_data['deletions'] = stats.get('deletions', 0)
145
-
152
+ commit_data["files_changed"] = int(stats.get("files", 0)) if hasattr(stats, "get") else 0
153
+ commit_data["insertions"] = int(stats.get("insertions", 0)) if hasattr(stats, "get") else 0
154
+ commit_data["deletions"] = int(stats.get("deletions", 0)) if hasattr(stats, "get") else 0
155
+
146
156
  # Calculate filtered metrics (excluding boilerplate/generated files)
147
157
  filtered_stats = self._calculate_filtered_stats(commit)
148
- commit_data['filtered_files_changed'] = filtered_stats['files']
149
- commit_data['filtered_insertions'] = filtered_stats['insertions']
150
- commit_data['filtered_deletions'] = filtered_stats['deletions']
151
-
158
+ commit_data["filtered_files_changed"] = filtered_stats["files"]
159
+ commit_data["filtered_insertions"] = filtered_stats["insertions"]
160
+ commit_data["filtered_deletions"] = filtered_stats["deletions"]
161
+
152
162
  # Extract story points
153
- commit_data['story_points'] = self.story_point_extractor.extract_from_text(
163
+ message_str = (
154
164
  commit.message
165
+ if isinstance(commit.message, str)
166
+ else commit.message.decode("utf-8", errors="ignore")
155
167
  )
156
-
168
+ commit_data["story_points"] = self.story_point_extractor.extract_from_text(message_str)
169
+
157
170
  # Extract ticket references
158
- commit_data['ticket_references'] = self.ticket_extractor.extract_from_text(
159
- commit.message
160
- )
161
-
171
+ commit_data["ticket_references"] = self.ticket_extractor.extract_from_text(message_str)
172
+
162
173
  # Calculate complexity delta
163
- commit_data['complexity_delta'] = self._calculate_complexity_delta(commit)
164
-
174
+ commit_data["complexity_delta"] = self._calculate_complexity_delta(commit)
175
+
165
176
  return commit_data
166
-
177
+
167
178
  def _get_commit_branch(self, repo: Repo, commit: git.Commit) -> str:
168
179
  """Get the branch name for a commit."""
169
180
  # This is a simplified approach - getting the first branch that contains the commit
170
181
  for branch in repo.branches:
171
182
  if commit in repo.iter_commits(branch):
172
183
  return branch.name
173
- return 'unknown'
174
-
184
+ return "unknown"
185
+
175
186
  def _calculate_complexity_delta(self, commit: git.Commit) -> float:
176
187
  """Calculate complexity change for a commit."""
177
188
  total_delta = 0.0
178
-
189
+
179
190
  for diff in commit.diff(commit.parents[0] if commit.parents else None):
180
- if not self._is_code_file(diff.b_path or diff.a_path or ''):
191
+ if not self._is_code_file(diff.b_path or diff.a_path or ""):
181
192
  continue
182
-
193
+
183
194
  # Simple complexity estimation based on diff size
184
195
  # In a real implementation, you'd parse the code and calculate cyclomatic complexity
185
196
  if diff.new_file:
@@ -188,68 +199,94 @@ class GitAnalyzer:
188
199
  total_delta -= diff.a_blob.size / 100 if diff.a_blob else 0
189
200
  else:
190
201
  # Modified file - estimate based on change size
191
- added = len(diff.diff.decode('utf-8', errors='ignore').split('\n+')) if diff.diff else 0
192
- removed = len(diff.diff.decode('utf-8', errors='ignore').split('\n-')) if diff.diff else 0
193
- total_delta += (added - removed) / 10
194
-
202
+ if diff.diff:
203
+ diff_content = (
204
+ diff.diff
205
+ if isinstance(diff.diff, str)
206
+ else diff.diff.decode("utf-8", errors="ignore")
207
+ )
208
+ added = len(diff_content.split("\n+"))
209
+ removed = len(diff_content.split("\n-"))
210
+ total_delta += (added - removed) / 10
211
+
195
212
  return total_delta
196
-
213
+
197
214
  def _is_code_file(self, filepath: str) -> bool:
198
215
  """Check if file is a code file."""
199
216
  code_extensions = {
200
- '.py', '.js', '.ts', '.java', '.cpp', '.c', '.h', '.hpp',
201
- '.go', '.rs', '.rb', '.php', '.swift', '.kt', '.scala',
202
- '.cs', '.vb', '.r', '.m', '.mm', '.f90', '.f95', '.lua'
217
+ ".py",
218
+ ".js",
219
+ ".ts",
220
+ ".java",
221
+ ".cpp",
222
+ ".c",
223
+ ".h",
224
+ ".hpp",
225
+ ".go",
226
+ ".rs",
227
+ ".rb",
228
+ ".php",
229
+ ".swift",
230
+ ".kt",
231
+ ".scala",
232
+ ".cs",
233
+ ".vb",
234
+ ".r",
235
+ ".m",
236
+ ".mm",
237
+ ".f90",
238
+ ".f95",
239
+ ".lua",
203
240
  }
204
-
241
+
205
242
  return any(filepath.endswith(ext) for ext in code_extensions)
206
-
243
+
207
244
  def _should_exclude_file(self, filepath: str) -> bool:
208
245
  """Check if file should be excluded from line counting."""
209
246
  if not filepath:
210
247
  return False
211
-
248
+
212
249
  # Normalize path separators for consistent matching
213
- filepath = filepath.replace('\\', '/')
214
-
250
+ filepath = filepath.replace("\\", "/")
251
+
215
252
  # Check against exclude patterns
216
253
  return any(fnmatch.fnmatch(filepath, pattern) for pattern in self.exclude_paths)
217
-
218
- def _calculate_filtered_stats(self, commit: git.Commit) -> Dict[str, int]:
254
+
255
+ def _calculate_filtered_stats(self, commit: git.Commit) -> dict[str, int]:
219
256
  """Calculate commit statistics excluding boilerplate/generated files."""
220
- filtered_stats = {
221
- 'files': 0,
222
- 'insertions': 0,
223
- 'deletions': 0
224
- }
225
-
257
+ filtered_stats = {"files": 0, "insertions": 0, "deletions": 0}
258
+
226
259
  # For initial commits or commits without parents
227
260
  parent = commit.parents[0] if commit.parents else None
228
-
261
+
229
262
  try:
230
263
  for diff in commit.diff(parent):
231
264
  # Get file path
232
265
  file_path = diff.b_path if diff.b_path else diff.a_path
233
266
  if not file_path:
234
267
  continue
235
-
268
+
236
269
  # Skip excluded files
237
270
  if self._should_exclude_file(file_path):
238
271
  continue
239
-
272
+
240
273
  # Count the file
241
- filtered_stats['files'] += 1
242
-
274
+ filtered_stats["files"] += 1
275
+
243
276
  # Count insertions and deletions
244
277
  if diff.diff:
245
- diff_text = diff.diff.decode('utf-8', errors='ignore')
246
- for line in diff_text.split('\n'):
247
- if line.startswith('+') and not line.startswith('+++'):
248
- filtered_stats['insertions'] += 1
249
- elif line.startswith('-') and not line.startswith('---'):
250
- filtered_stats['deletions'] += 1
278
+ diff_text = (
279
+ diff.diff
280
+ if isinstance(diff.diff, str)
281
+ else diff.diff.decode("utf-8", errors="ignore")
282
+ )
283
+ for line in diff_text.split("\n"):
284
+ if line.startswith("+") and not line.startswith("+++"):
285
+ filtered_stats["insertions"] += 1
286
+ elif line.startswith("-") and not line.startswith("---"):
287
+ filtered_stats["deletions"] += 1
251
288
  except Exception:
252
289
  # If we can't calculate filtered stats, return zeros
253
290
  pass
254
-
255
- return filtered_stats
291
+
292
+ return filtered_stats