gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. gitflow_analytics/_version.py +1 -1
  2. gitflow_analytics/classification/__init__.py +31 -0
  3. gitflow_analytics/classification/batch_classifier.py +752 -0
  4. gitflow_analytics/classification/classifier.py +464 -0
  5. gitflow_analytics/classification/feature_extractor.py +725 -0
  6. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  7. gitflow_analytics/classification/model.py +455 -0
  8. gitflow_analytics/cli.py +4108 -350
  9. gitflow_analytics/cli_rich.py +198 -48
  10. gitflow_analytics/config/__init__.py +43 -0
  11. gitflow_analytics/config/errors.py +261 -0
  12. gitflow_analytics/config/loader.py +904 -0
  13. gitflow_analytics/config/profiles.py +264 -0
  14. gitflow_analytics/config/repository.py +124 -0
  15. gitflow_analytics/config/schema.py +441 -0
  16. gitflow_analytics/config/validator.py +154 -0
  17. gitflow_analytics/config.py +44 -508
  18. gitflow_analytics/core/analyzer.py +1209 -98
  19. gitflow_analytics/core/cache.py +1337 -29
  20. gitflow_analytics/core/data_fetcher.py +1193 -0
  21. gitflow_analytics/core/identity.py +363 -14
  22. gitflow_analytics/core/metrics_storage.py +526 -0
  23. gitflow_analytics/core/progress.py +372 -0
  24. gitflow_analytics/core/schema_version.py +269 -0
  25. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  26. gitflow_analytics/extractors/story_points.py +8 -1
  27. gitflow_analytics/extractors/tickets.py +749 -11
  28. gitflow_analytics/identity_llm/__init__.py +6 -0
  29. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  30. gitflow_analytics/identity_llm/analyzer.py +464 -0
  31. gitflow_analytics/identity_llm/models.py +76 -0
  32. gitflow_analytics/integrations/github_integration.py +175 -11
  33. gitflow_analytics/integrations/jira_integration.py +461 -24
  34. gitflow_analytics/integrations/orchestrator.py +124 -1
  35. gitflow_analytics/metrics/activity_scoring.py +322 -0
  36. gitflow_analytics/metrics/branch_health.py +470 -0
  37. gitflow_analytics/metrics/dora.py +379 -20
  38. gitflow_analytics/models/database.py +843 -53
  39. gitflow_analytics/pm_framework/__init__.py +115 -0
  40. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  41. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  42. gitflow_analytics/pm_framework/base.py +406 -0
  43. gitflow_analytics/pm_framework/models.py +211 -0
  44. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  45. gitflow_analytics/pm_framework/registry.py +333 -0
  46. gitflow_analytics/qualitative/__init__.py +9 -10
  47. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  48. gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
  49. gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
  50. gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
  51. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
  52. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  53. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  54. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  55. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  56. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  57. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  58. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  59. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  60. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  61. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
  62. gitflow_analytics/qualitative/core/__init__.py +4 -4
  63. gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
  64. gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
  65. gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
  66. gitflow_analytics/qualitative/core/processor.py +381 -248
  67. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  68. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  69. gitflow_analytics/qualitative/models/__init__.py +7 -7
  70. gitflow_analytics/qualitative/models/schemas.py +155 -121
  71. gitflow_analytics/qualitative/utils/__init__.py +4 -4
  72. gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
  73. gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
  74. gitflow_analytics/qualitative/utils/metrics.py +172 -158
  75. gitflow_analytics/qualitative/utils/text_processing.py +146 -104
  76. gitflow_analytics/reports/__init__.py +100 -0
  77. gitflow_analytics/reports/analytics_writer.py +539 -14
  78. gitflow_analytics/reports/base.py +648 -0
  79. gitflow_analytics/reports/branch_health_writer.py +322 -0
  80. gitflow_analytics/reports/classification_writer.py +924 -0
  81. gitflow_analytics/reports/cli_integration.py +427 -0
  82. gitflow_analytics/reports/csv_writer.py +1676 -212
  83. gitflow_analytics/reports/data_models.py +504 -0
  84. gitflow_analytics/reports/database_report_generator.py +427 -0
  85. gitflow_analytics/reports/example_usage.py +344 -0
  86. gitflow_analytics/reports/factory.py +499 -0
  87. gitflow_analytics/reports/formatters.py +698 -0
  88. gitflow_analytics/reports/html_generator.py +1116 -0
  89. gitflow_analytics/reports/interfaces.py +489 -0
  90. gitflow_analytics/reports/json_exporter.py +2770 -0
  91. gitflow_analytics/reports/narrative_writer.py +2287 -158
  92. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  93. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  94. gitflow_analytics/training/__init__.py +5 -0
  95. gitflow_analytics/training/model_loader.py +377 -0
  96. gitflow_analytics/training/pipeline.py +550 -0
  97. gitflow_analytics/tui/__init__.py +1 -1
  98. gitflow_analytics/tui/app.py +129 -126
  99. gitflow_analytics/tui/screens/__init__.py +3 -3
  100. gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
  101. gitflow_analytics/tui/screens/configuration_screen.py +154 -178
  102. gitflow_analytics/tui/screens/loading_screen.py +100 -110
  103. gitflow_analytics/tui/screens/main_screen.py +89 -72
  104. gitflow_analytics/tui/screens/results_screen.py +305 -281
  105. gitflow_analytics/tui/widgets/__init__.py +2 -2
  106. gitflow_analytics/tui/widgets/data_table.py +67 -69
  107. gitflow_analytics/tui/widgets/export_modal.py +76 -76
  108. gitflow_analytics/tui/widgets/progress_widget.py +41 -46
  109. gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
  110. gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
  111. gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
  112. gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
  113. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
  114. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
  115. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
  116. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
@@ -8,6 +8,7 @@ from github import Github
8
8
  from github.GithubException import RateLimitExceededException, UnknownObjectException
9
9
 
10
10
  from ..core.cache import GitAnalysisCache
11
+ from ..core.schema_version import create_schema_manager
11
12
 
12
13
 
13
14
  class GitHubIntegration:
@@ -28,30 +29,107 @@ class GitHubIntegration:
28
29
  self.backoff_factor = backoff_factor
29
30
  self.allowed_ticket_platforms = allowed_ticket_platforms
30
31
 
32
+ # Initialize schema version manager for incremental API data fetching
33
+ self.schema_manager = create_schema_manager(cache.cache_dir)
34
+
35
+ def _get_incremental_fetch_date(
36
+ self, component: str, requested_since: datetime, config: dict[str, Any]
37
+ ) -> datetime:
38
+ """Determine the actual fetch date based on schema versioning."""
39
+ # Ensure requested_since is timezone-aware
40
+ if requested_since.tzinfo is None:
41
+ requested_since = requested_since.replace(tzinfo=timezone.utc)
42
+
43
+ # Check if schema has changed
44
+ if self.schema_manager.has_schema_changed(component, config):
45
+ print(
46
+ f" 🔄 {component.title()} API schema changed, fetching all data since {requested_since}"
47
+ )
48
+ return requested_since
49
+
50
+ # Get last processed date
51
+ last_processed = self.schema_manager.get_last_processed_date(component)
52
+ if not last_processed:
53
+ print(f" 📥 First {component} API fetch, getting data since {requested_since}")
54
+ return requested_since
55
+
56
+ # Ensure last_processed is timezone-aware
57
+ if last_processed.tzinfo is None:
58
+ last_processed = last_processed.replace(tzinfo=timezone.utc)
59
+
60
+ # Use the later of the two dates (don't go backwards)
61
+ fetch_since = max(last_processed, requested_since)
62
+
63
+ if fetch_since > requested_since:
64
+ print(f" ⚡ {component.title()} incremental fetch since {fetch_since}")
65
+ else:
66
+ print(f" 📥 {component.title()} full fetch since {requested_since}")
67
+
68
+ return fetch_since
69
+
31
70
  def enrich_repository_with_prs(
32
71
  self, repo_name: str, commits: list[dict[str, Any]], since: datetime
33
72
  ) -> list[dict[str, Any]]:
34
- """Enrich repository commits with PR data."""
73
+ """Enrich repository commits with PR data using incremental fetching."""
35
74
  try:
36
75
  repo = self.github.get_repo(repo_name)
37
76
  except UnknownObjectException:
38
77
  print(f" ⚠️ GitHub repo not found: {repo_name}")
39
78
  return []
40
79
 
41
- # Get PRs for the time period
42
- prs = self._get_pull_requests(repo, since)
80
+ # Check if we need to fetch new PR data
81
+ github_config = {
82
+ "rate_limit_retries": self.rate_limit_retries,
83
+ "backoff_factor": self.backoff_factor,
84
+ "allowed_ticket_platforms": self.allowed_ticket_platforms,
85
+ }
43
86
 
44
- # Build commit to PR mapping
45
- commit_to_pr = {}
46
- for pr in prs:
87
+ # Determine the actual start date for fetching
88
+ fetch_since = self._get_incremental_fetch_date("github", since, github_config)
89
+
90
+ # Check cache first for existing PRs in this time period
91
+ cached_prs_data = self._get_cached_prs_bulk(repo_name, fetch_since)
92
+
93
+ # Get PRs for the time period (may be incremental)
94
+ prs = self._get_pull_requests(repo, fetch_since)
95
+
96
+ # Track cache performance
97
+ cached_pr_numbers = {pr["number"] for pr in cached_prs_data}
98
+ new_prs = [pr for pr in prs if pr.number not in cached_pr_numbers]
99
+ cache_hits = len(cached_prs_data)
100
+ cache_misses = len(new_prs)
101
+
102
+ if cache_hits > 0 or cache_misses > 0:
103
+ print(
104
+ f" 📊 GitHub PR cache: {cache_hits} hits, {cache_misses} misses ({cache_hits/(cache_hits+cache_misses)*100:.1f}% hit rate)"
105
+ if (cache_hits + cache_misses) > 0
106
+ else ""
107
+ )
108
+
109
+ # Update schema tracking after successful fetch
110
+ if prs:
111
+ self.schema_manager.mark_date_processed("github", since, github_config)
112
+
113
+ # Process new PRs and cache them
114
+ new_pr_data = []
115
+ for pr in new_prs:
47
116
  pr_data = self._extract_pr_data(pr)
117
+ new_pr_data.append(pr_data)
48
118
 
49
- # Cache PR data
50
- self.cache.cache_pr(repo_name, pr_data)
119
+ # Bulk cache new PR data
120
+ if new_pr_data:
121
+ self._cache_prs_bulk(repo_name, new_pr_data)
122
+ print(f" 💾 Cached {len(new_pr_data)} new GitHub PRs")
123
+
124
+ # Combine cached and new PR data
125
+ all_pr_data = cached_prs_data + new_pr_data
51
126
 
52
- # Map commits to this PR
53
- for commit in pr.get_commits():
54
- commit_to_pr[commit.sha] = pr_data
127
+ # Build commit to PR mapping
128
+ commit_to_pr = {}
129
+ for pr_data in all_pr_data:
130
+ # Map commits to this PR (need to get commit hashes from cached data)
131
+ for commit_hash in pr_data.get("commit_hashes", []):
132
+ commit_to_pr[commit_hash] = pr_data
55
133
 
56
134
  # Enrich commits with PR data
57
135
  enriched_prs = []
@@ -73,6 +151,92 @@ class GitHubIntegration:
73
151
 
74
152
  return enriched_prs
75
153
 
154
+ def _get_cached_prs_bulk(self, repo_name: str, since: datetime) -> list[dict[str, Any]]:
155
+ """Get cached PRs for a repository from the given date onwards.
156
+
157
+ WHY: Bulk PR cache lookups avoid redundant GitHub API calls and
158
+ significantly improve performance on repeated analysis runs.
159
+
160
+ Args:
161
+ repo_name: GitHub repository name (e.g., "owner/repo")
162
+ since: Only return PRs merged after this date
163
+
164
+ Returns:
165
+ List of cached PR data dictionaries
166
+ """
167
+ cached_prs = []
168
+ with self.cache.get_session() as session:
169
+ from ..models.database import PullRequestCache
170
+
171
+ # Ensure since is timezone-aware for comparison
172
+ if since.tzinfo is None:
173
+ since = since.replace(tzinfo=timezone.utc)
174
+
175
+ cached_results = (
176
+ session.query(PullRequestCache)
177
+ .filter(
178
+ PullRequestCache.repo_path == repo_name,
179
+ PullRequestCache.merged_at >= since.replace(tzinfo=None), # Store as naive UTC
180
+ )
181
+ .all()
182
+ )
183
+
184
+ for cached_pr in cached_results:
185
+ if not self._is_pr_stale(cached_pr.cached_at):
186
+ pr_data = {
187
+ "number": cached_pr.pr_number,
188
+ "title": cached_pr.title or "",
189
+ "description": cached_pr.description or "",
190
+ "author": cached_pr.author or "",
191
+ "created_at": cached_pr.created_at,
192
+ "merged_at": cached_pr.merged_at,
193
+ "story_points": cached_pr.story_points or 0,
194
+ "labels": cached_pr.labels or [],
195
+ "commit_hashes": cached_pr.commit_hashes or [],
196
+ "ticket_references": [], # Would need additional extraction
197
+ "review_comments": 0, # Not stored in current schema
198
+ "changed_files": 0, # Not stored in current schema
199
+ "additions": 0, # Not stored in current schema
200
+ "deletions": 0, # Not stored in current schema
201
+ }
202
+ cached_prs.append(pr_data)
203
+
204
+ return cached_prs
205
+
206
+ def _cache_prs_bulk(self, repo_name: str, prs: list[dict[str, Any]]) -> None:
207
+ """Cache multiple PRs in bulk for better performance.
208
+
209
+ WHY: Bulk caching is more efficient than individual cache operations,
210
+ reducing database overhead when caching many PRs from GitHub API.
211
+
212
+ Args:
213
+ repo_name: GitHub repository name
214
+ prs: List of PR data dictionaries to cache
215
+ """
216
+ if not prs:
217
+ return
218
+
219
+ for pr_data in prs:
220
+ # Use existing cache_pr method which handles upserts properly
221
+ self.cache.cache_pr(repo_name, pr_data)
222
+
223
+ def _is_pr_stale(self, cached_at: datetime) -> bool:
224
+ """Check if cached PR data is stale based on cache TTL.
225
+
226
+ Args:
227
+ cached_at: When the PR was cached
228
+
229
+ Returns:
230
+ True if stale and should be refreshed, False if still fresh
231
+ """
232
+ from datetime import timedelta
233
+
234
+ if self.cache.ttl_hours == 0: # No expiration
235
+ return False
236
+
237
+ stale_threshold = datetime.utcnow() - timedelta(hours=self.cache.ttl_hours)
238
+ return cached_at < stale_threshold
239
+
76
240
  def _get_pull_requests(self, repo, since: datetime) -> list[Any]:
77
241
  """Get pull requests with rate limit handling."""
78
242
  prs = []