gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/classification/__init__.py +31 -0
- gitflow_analytics/classification/batch_classifier.py +752 -0
- gitflow_analytics/classification/classifier.py +464 -0
- gitflow_analytics/classification/feature_extractor.py +725 -0
- gitflow_analytics/classification/linguist_analyzer.py +574 -0
- gitflow_analytics/classification/model.py +455 -0
- gitflow_analytics/cli.py +4108 -350
- gitflow_analytics/cli_rich.py +198 -48
- gitflow_analytics/config/__init__.py +43 -0
- gitflow_analytics/config/errors.py +261 -0
- gitflow_analytics/config/loader.py +904 -0
- gitflow_analytics/config/profiles.py +264 -0
- gitflow_analytics/config/repository.py +124 -0
- gitflow_analytics/config/schema.py +441 -0
- gitflow_analytics/config/validator.py +154 -0
- gitflow_analytics/config.py +44 -508
- gitflow_analytics/core/analyzer.py +1209 -98
- gitflow_analytics/core/cache.py +1337 -29
- gitflow_analytics/core/data_fetcher.py +1193 -0
- gitflow_analytics/core/identity.py +363 -14
- gitflow_analytics/core/metrics_storage.py +526 -0
- gitflow_analytics/core/progress.py +372 -0
- gitflow_analytics/core/schema_version.py +269 -0
- gitflow_analytics/extractors/ml_tickets.py +1100 -0
- gitflow_analytics/extractors/story_points.py +8 -1
- gitflow_analytics/extractors/tickets.py +749 -11
- gitflow_analytics/identity_llm/__init__.py +6 -0
- gitflow_analytics/identity_llm/analysis_pass.py +231 -0
- gitflow_analytics/identity_llm/analyzer.py +464 -0
- gitflow_analytics/identity_llm/models.py +76 -0
- gitflow_analytics/integrations/github_integration.py +175 -11
- gitflow_analytics/integrations/jira_integration.py +461 -24
- gitflow_analytics/integrations/orchestrator.py +124 -1
- gitflow_analytics/metrics/activity_scoring.py +322 -0
- gitflow_analytics/metrics/branch_health.py +470 -0
- gitflow_analytics/metrics/dora.py +379 -20
- gitflow_analytics/models/database.py +843 -53
- gitflow_analytics/pm_framework/__init__.py +115 -0
- gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
- gitflow_analytics/pm_framework/base.py +406 -0
- gitflow_analytics/pm_framework/models.py +211 -0
- gitflow_analytics/pm_framework/orchestrator.py +652 -0
- gitflow_analytics/pm_framework/registry.py +333 -0
- gitflow_analytics/qualitative/__init__.py +9 -10
- gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
- gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
- gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
- gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
- gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
- gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
- gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
- gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
- gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
- gitflow_analytics/qualitative/core/__init__.py +4 -4
- gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
- gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
- gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
- gitflow_analytics/qualitative/core/processor.py +381 -248
- gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
- gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
- gitflow_analytics/qualitative/models/__init__.py +7 -7
- gitflow_analytics/qualitative/models/schemas.py +155 -121
- gitflow_analytics/qualitative/utils/__init__.py +4 -4
- gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
- gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
- gitflow_analytics/qualitative/utils/metrics.py +172 -158
- gitflow_analytics/qualitative/utils/text_processing.py +146 -104
- gitflow_analytics/reports/__init__.py +100 -0
- gitflow_analytics/reports/analytics_writer.py +539 -14
- gitflow_analytics/reports/base.py +648 -0
- gitflow_analytics/reports/branch_health_writer.py +322 -0
- gitflow_analytics/reports/classification_writer.py +924 -0
- gitflow_analytics/reports/cli_integration.py +427 -0
- gitflow_analytics/reports/csv_writer.py +1676 -212
- gitflow_analytics/reports/data_models.py +504 -0
- gitflow_analytics/reports/database_report_generator.py +427 -0
- gitflow_analytics/reports/example_usage.py +344 -0
- gitflow_analytics/reports/factory.py +499 -0
- gitflow_analytics/reports/formatters.py +698 -0
- gitflow_analytics/reports/html_generator.py +1116 -0
- gitflow_analytics/reports/interfaces.py +489 -0
- gitflow_analytics/reports/json_exporter.py +2770 -0
- gitflow_analytics/reports/narrative_writer.py +2287 -158
- gitflow_analytics/reports/story_point_correlation.py +1144 -0
- gitflow_analytics/reports/weekly_trends_writer.py +389 -0
- gitflow_analytics/training/__init__.py +5 -0
- gitflow_analytics/training/model_loader.py +377 -0
- gitflow_analytics/training/pipeline.py +550 -0
- gitflow_analytics/tui/__init__.py +1 -1
- gitflow_analytics/tui/app.py +129 -126
- gitflow_analytics/tui/screens/__init__.py +3 -3
- gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
- gitflow_analytics/tui/screens/configuration_screen.py +154 -178
- gitflow_analytics/tui/screens/loading_screen.py +100 -110
- gitflow_analytics/tui/screens/main_screen.py +89 -72
- gitflow_analytics/tui/screens/results_screen.py +305 -281
- gitflow_analytics/tui/widgets/__init__.py +2 -2
- gitflow_analytics/tui/widgets/data_table.py +67 -69
- gitflow_analytics/tui/widgets/export_modal.py +76 -76
- gitflow_analytics/tui/widgets/progress_widget.py +41 -46
- gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
- gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
- gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
|
@@ -8,6 +8,7 @@ from github import Github
|
|
|
8
8
|
from github.GithubException import RateLimitExceededException, UnknownObjectException
|
|
9
9
|
|
|
10
10
|
from ..core.cache import GitAnalysisCache
|
|
11
|
+
from ..core.schema_version import create_schema_manager
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
class GitHubIntegration:
|
|
@@ -28,30 +29,107 @@ class GitHubIntegration:
|
|
|
28
29
|
self.backoff_factor = backoff_factor
|
|
29
30
|
self.allowed_ticket_platforms = allowed_ticket_platforms
|
|
30
31
|
|
|
32
|
+
# Initialize schema version manager for incremental API data fetching
|
|
33
|
+
self.schema_manager = create_schema_manager(cache.cache_dir)
|
|
34
|
+
|
|
35
|
+
def _get_incremental_fetch_date(
|
|
36
|
+
self, component: str, requested_since: datetime, config: dict[str, Any]
|
|
37
|
+
) -> datetime:
|
|
38
|
+
"""Determine the actual fetch date based on schema versioning."""
|
|
39
|
+
# Ensure requested_since is timezone-aware
|
|
40
|
+
if requested_since.tzinfo is None:
|
|
41
|
+
requested_since = requested_since.replace(tzinfo=timezone.utc)
|
|
42
|
+
|
|
43
|
+
# Check if schema has changed
|
|
44
|
+
if self.schema_manager.has_schema_changed(component, config):
|
|
45
|
+
print(
|
|
46
|
+
f" 🔄 {component.title()} API schema changed, fetching all data since {requested_since}"
|
|
47
|
+
)
|
|
48
|
+
return requested_since
|
|
49
|
+
|
|
50
|
+
# Get last processed date
|
|
51
|
+
last_processed = self.schema_manager.get_last_processed_date(component)
|
|
52
|
+
if not last_processed:
|
|
53
|
+
print(f" 📥 First {component} API fetch, getting data since {requested_since}")
|
|
54
|
+
return requested_since
|
|
55
|
+
|
|
56
|
+
# Ensure last_processed is timezone-aware
|
|
57
|
+
if last_processed.tzinfo is None:
|
|
58
|
+
last_processed = last_processed.replace(tzinfo=timezone.utc)
|
|
59
|
+
|
|
60
|
+
# Use the later of the two dates (don't go backwards)
|
|
61
|
+
fetch_since = max(last_processed, requested_since)
|
|
62
|
+
|
|
63
|
+
if fetch_since > requested_since:
|
|
64
|
+
print(f" ⚡ {component.title()} incremental fetch since {fetch_since}")
|
|
65
|
+
else:
|
|
66
|
+
print(f" 📥 {component.title()} full fetch since {requested_since}")
|
|
67
|
+
|
|
68
|
+
return fetch_since
|
|
69
|
+
|
|
31
70
|
def enrich_repository_with_prs(
|
|
32
71
|
self, repo_name: str, commits: list[dict[str, Any]], since: datetime
|
|
33
72
|
) -> list[dict[str, Any]]:
|
|
34
|
-
"""Enrich repository commits with PR data."""
|
|
73
|
+
"""Enrich repository commits with PR data using incremental fetching."""
|
|
35
74
|
try:
|
|
36
75
|
repo = self.github.get_repo(repo_name)
|
|
37
76
|
except UnknownObjectException:
|
|
38
77
|
print(f" ⚠️ GitHub repo not found: {repo_name}")
|
|
39
78
|
return []
|
|
40
79
|
|
|
41
|
-
#
|
|
42
|
-
|
|
80
|
+
# Check if we need to fetch new PR data
|
|
81
|
+
github_config = {
|
|
82
|
+
"rate_limit_retries": self.rate_limit_retries,
|
|
83
|
+
"backoff_factor": self.backoff_factor,
|
|
84
|
+
"allowed_ticket_platforms": self.allowed_ticket_platforms,
|
|
85
|
+
}
|
|
43
86
|
|
|
44
|
-
#
|
|
45
|
-
|
|
46
|
-
|
|
87
|
+
# Determine the actual start date for fetching
|
|
88
|
+
fetch_since = self._get_incremental_fetch_date("github", since, github_config)
|
|
89
|
+
|
|
90
|
+
# Check cache first for existing PRs in this time period
|
|
91
|
+
cached_prs_data = self._get_cached_prs_bulk(repo_name, fetch_since)
|
|
92
|
+
|
|
93
|
+
# Get PRs for the time period (may be incremental)
|
|
94
|
+
prs = self._get_pull_requests(repo, fetch_since)
|
|
95
|
+
|
|
96
|
+
# Track cache performance
|
|
97
|
+
cached_pr_numbers = {pr["number"] for pr in cached_prs_data}
|
|
98
|
+
new_prs = [pr for pr in prs if pr.number not in cached_pr_numbers]
|
|
99
|
+
cache_hits = len(cached_prs_data)
|
|
100
|
+
cache_misses = len(new_prs)
|
|
101
|
+
|
|
102
|
+
if cache_hits > 0 or cache_misses > 0:
|
|
103
|
+
print(
|
|
104
|
+
f" 📊 GitHub PR cache: {cache_hits} hits, {cache_misses} misses ({cache_hits/(cache_hits+cache_misses)*100:.1f}% hit rate)"
|
|
105
|
+
if (cache_hits + cache_misses) > 0
|
|
106
|
+
else ""
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Update schema tracking after successful fetch
|
|
110
|
+
if prs:
|
|
111
|
+
self.schema_manager.mark_date_processed("github", since, github_config)
|
|
112
|
+
|
|
113
|
+
# Process new PRs and cache them
|
|
114
|
+
new_pr_data = []
|
|
115
|
+
for pr in new_prs:
|
|
47
116
|
pr_data = self._extract_pr_data(pr)
|
|
117
|
+
new_pr_data.append(pr_data)
|
|
48
118
|
|
|
49
|
-
|
|
50
|
-
|
|
119
|
+
# Bulk cache new PR data
|
|
120
|
+
if new_pr_data:
|
|
121
|
+
self._cache_prs_bulk(repo_name, new_pr_data)
|
|
122
|
+
print(f" 💾 Cached {len(new_pr_data)} new GitHub PRs")
|
|
123
|
+
|
|
124
|
+
# Combine cached and new PR data
|
|
125
|
+
all_pr_data = cached_prs_data + new_pr_data
|
|
51
126
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
127
|
+
# Build commit to PR mapping
|
|
128
|
+
commit_to_pr = {}
|
|
129
|
+
for pr_data in all_pr_data:
|
|
130
|
+
# Map commits to this PR (need to get commit hashes from cached data)
|
|
131
|
+
for commit_hash in pr_data.get("commit_hashes", []):
|
|
132
|
+
commit_to_pr[commit_hash] = pr_data
|
|
55
133
|
|
|
56
134
|
# Enrich commits with PR data
|
|
57
135
|
enriched_prs = []
|
|
@@ -73,6 +151,92 @@ class GitHubIntegration:
|
|
|
73
151
|
|
|
74
152
|
return enriched_prs
|
|
75
153
|
|
|
154
|
+
def _get_cached_prs_bulk(self, repo_name: str, since: datetime) -> list[dict[str, Any]]:
|
|
155
|
+
"""Get cached PRs for a repository from the given date onwards.
|
|
156
|
+
|
|
157
|
+
WHY: Bulk PR cache lookups avoid redundant GitHub API calls and
|
|
158
|
+
significantly improve performance on repeated analysis runs.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
repo_name: GitHub repository name (e.g., "owner/repo")
|
|
162
|
+
since: Only return PRs merged after this date
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
List of cached PR data dictionaries
|
|
166
|
+
"""
|
|
167
|
+
cached_prs = []
|
|
168
|
+
with self.cache.get_session() as session:
|
|
169
|
+
from ..models.database import PullRequestCache
|
|
170
|
+
|
|
171
|
+
# Ensure since is timezone-aware for comparison
|
|
172
|
+
if since.tzinfo is None:
|
|
173
|
+
since = since.replace(tzinfo=timezone.utc)
|
|
174
|
+
|
|
175
|
+
cached_results = (
|
|
176
|
+
session.query(PullRequestCache)
|
|
177
|
+
.filter(
|
|
178
|
+
PullRequestCache.repo_path == repo_name,
|
|
179
|
+
PullRequestCache.merged_at >= since.replace(tzinfo=None), # Store as naive UTC
|
|
180
|
+
)
|
|
181
|
+
.all()
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
for cached_pr in cached_results:
|
|
185
|
+
if not self._is_pr_stale(cached_pr.cached_at):
|
|
186
|
+
pr_data = {
|
|
187
|
+
"number": cached_pr.pr_number,
|
|
188
|
+
"title": cached_pr.title or "",
|
|
189
|
+
"description": cached_pr.description or "",
|
|
190
|
+
"author": cached_pr.author or "",
|
|
191
|
+
"created_at": cached_pr.created_at,
|
|
192
|
+
"merged_at": cached_pr.merged_at,
|
|
193
|
+
"story_points": cached_pr.story_points or 0,
|
|
194
|
+
"labels": cached_pr.labels or [],
|
|
195
|
+
"commit_hashes": cached_pr.commit_hashes or [],
|
|
196
|
+
"ticket_references": [], # Would need additional extraction
|
|
197
|
+
"review_comments": 0, # Not stored in current schema
|
|
198
|
+
"changed_files": 0, # Not stored in current schema
|
|
199
|
+
"additions": 0, # Not stored in current schema
|
|
200
|
+
"deletions": 0, # Not stored in current schema
|
|
201
|
+
}
|
|
202
|
+
cached_prs.append(pr_data)
|
|
203
|
+
|
|
204
|
+
return cached_prs
|
|
205
|
+
|
|
206
|
+
def _cache_prs_bulk(self, repo_name: str, prs: list[dict[str, Any]]) -> None:
|
|
207
|
+
"""Cache multiple PRs in bulk for better performance.
|
|
208
|
+
|
|
209
|
+
WHY: Bulk caching is more efficient than individual cache operations,
|
|
210
|
+
reducing database overhead when caching many PRs from GitHub API.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
repo_name: GitHub repository name
|
|
214
|
+
prs: List of PR data dictionaries to cache
|
|
215
|
+
"""
|
|
216
|
+
if not prs:
|
|
217
|
+
return
|
|
218
|
+
|
|
219
|
+
for pr_data in prs:
|
|
220
|
+
# Use existing cache_pr method which handles upserts properly
|
|
221
|
+
self.cache.cache_pr(repo_name, pr_data)
|
|
222
|
+
|
|
223
|
+
def _is_pr_stale(self, cached_at: datetime) -> bool:
|
|
224
|
+
"""Check if cached PR data is stale based on cache TTL.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
cached_at: When the PR was cached
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
True if stale and should be refreshed, False if still fresh
|
|
231
|
+
"""
|
|
232
|
+
from datetime import timedelta
|
|
233
|
+
|
|
234
|
+
if self.cache.ttl_hours == 0: # No expiration
|
|
235
|
+
return False
|
|
236
|
+
|
|
237
|
+
stale_threshold = datetime.utcnow() - timedelta(hours=self.cache.ttl_hours)
|
|
238
|
+
return cached_at < stale_threshold
|
|
239
|
+
|
|
76
240
|
def _get_pull_requests(self, repo, since: datetime) -> list[Any]:
|
|
77
241
|
"""Get pull requests with rate limit handling."""
|
|
78
242
|
prs = []
|