gitflow-analytics 1.0.1__py3-none-any.whl → 1.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/__init__.py +11 -11
- gitflow_analytics/_version.py +2 -2
- gitflow_analytics/classification/__init__.py +31 -0
- gitflow_analytics/classification/batch_classifier.py +752 -0
- gitflow_analytics/classification/classifier.py +464 -0
- gitflow_analytics/classification/feature_extractor.py +725 -0
- gitflow_analytics/classification/linguist_analyzer.py +574 -0
- gitflow_analytics/classification/model.py +455 -0
- gitflow_analytics/cli.py +4490 -378
- gitflow_analytics/cli_rich.py +503 -0
- gitflow_analytics/config/__init__.py +43 -0
- gitflow_analytics/config/errors.py +261 -0
- gitflow_analytics/config/loader.py +904 -0
- gitflow_analytics/config/profiles.py +264 -0
- gitflow_analytics/config/repository.py +124 -0
- gitflow_analytics/config/schema.py +441 -0
- gitflow_analytics/config/validator.py +154 -0
- gitflow_analytics/config.py +44 -398
- gitflow_analytics/core/analyzer.py +1320 -172
- gitflow_analytics/core/branch_mapper.py +132 -132
- gitflow_analytics/core/cache.py +1554 -175
- gitflow_analytics/core/data_fetcher.py +1193 -0
- gitflow_analytics/core/identity.py +571 -185
- gitflow_analytics/core/metrics_storage.py +526 -0
- gitflow_analytics/core/progress.py +372 -0
- gitflow_analytics/core/schema_version.py +269 -0
- gitflow_analytics/extractors/base.py +13 -11
- gitflow_analytics/extractors/ml_tickets.py +1100 -0
- gitflow_analytics/extractors/story_points.py +77 -59
- gitflow_analytics/extractors/tickets.py +841 -89
- gitflow_analytics/identity_llm/__init__.py +6 -0
- gitflow_analytics/identity_llm/analysis_pass.py +231 -0
- gitflow_analytics/identity_llm/analyzer.py +464 -0
- gitflow_analytics/identity_llm/models.py +76 -0
- gitflow_analytics/integrations/github_integration.py +258 -87
- gitflow_analytics/integrations/jira_integration.py +572 -123
- gitflow_analytics/integrations/orchestrator.py +206 -82
- gitflow_analytics/metrics/activity_scoring.py +322 -0
- gitflow_analytics/metrics/branch_health.py +470 -0
- gitflow_analytics/metrics/dora.py +542 -179
- gitflow_analytics/models/database.py +986 -59
- gitflow_analytics/pm_framework/__init__.py +115 -0
- gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
- gitflow_analytics/pm_framework/base.py +406 -0
- gitflow_analytics/pm_framework/models.py +211 -0
- gitflow_analytics/pm_framework/orchestrator.py +652 -0
- gitflow_analytics/pm_framework/registry.py +333 -0
- gitflow_analytics/qualitative/__init__.py +29 -0
- gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
- gitflow_analytics/qualitative/classifiers/change_type.py +742 -0
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +506 -0
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +535 -0
- gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
- gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
- gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
- gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
- gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
- gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
- gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +438 -0
- gitflow_analytics/qualitative/core/__init__.py +13 -0
- gitflow_analytics/qualitative/core/llm_fallback.py +657 -0
- gitflow_analytics/qualitative/core/nlp_engine.py +382 -0
- gitflow_analytics/qualitative/core/pattern_cache.py +479 -0
- gitflow_analytics/qualitative/core/processor.py +673 -0
- gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
- gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
- gitflow_analytics/qualitative/models/__init__.py +25 -0
- gitflow_analytics/qualitative/models/schemas.py +306 -0
- gitflow_analytics/qualitative/utils/__init__.py +13 -0
- gitflow_analytics/qualitative/utils/batch_processor.py +339 -0
- gitflow_analytics/qualitative/utils/cost_tracker.py +345 -0
- gitflow_analytics/qualitative/utils/metrics.py +361 -0
- gitflow_analytics/qualitative/utils/text_processing.py +285 -0
- gitflow_analytics/reports/__init__.py +100 -0
- gitflow_analytics/reports/analytics_writer.py +550 -18
- gitflow_analytics/reports/base.py +648 -0
- gitflow_analytics/reports/branch_health_writer.py +322 -0
- gitflow_analytics/reports/classification_writer.py +924 -0
- gitflow_analytics/reports/cli_integration.py +427 -0
- gitflow_analytics/reports/csv_writer.py +1700 -216
- gitflow_analytics/reports/data_models.py +504 -0
- gitflow_analytics/reports/database_report_generator.py +427 -0
- gitflow_analytics/reports/example_usage.py +344 -0
- gitflow_analytics/reports/factory.py +499 -0
- gitflow_analytics/reports/formatters.py +698 -0
- gitflow_analytics/reports/html_generator.py +1116 -0
- gitflow_analytics/reports/interfaces.py +489 -0
- gitflow_analytics/reports/json_exporter.py +2770 -0
- gitflow_analytics/reports/narrative_writer.py +2289 -158
- gitflow_analytics/reports/story_point_correlation.py +1144 -0
- gitflow_analytics/reports/weekly_trends_writer.py +389 -0
- gitflow_analytics/training/__init__.py +5 -0
- gitflow_analytics/training/model_loader.py +377 -0
- gitflow_analytics/training/pipeline.py +550 -0
- gitflow_analytics/tui/__init__.py +5 -0
- gitflow_analytics/tui/app.py +724 -0
- gitflow_analytics/tui/screens/__init__.py +8 -0
- gitflow_analytics/tui/screens/analysis_progress_screen.py +496 -0
- gitflow_analytics/tui/screens/configuration_screen.py +523 -0
- gitflow_analytics/tui/screens/loading_screen.py +348 -0
- gitflow_analytics/tui/screens/main_screen.py +321 -0
- gitflow_analytics/tui/screens/results_screen.py +722 -0
- gitflow_analytics/tui/widgets/__init__.py +7 -0
- gitflow_analytics/tui/widgets/data_table.py +255 -0
- gitflow_analytics/tui/widgets/export_modal.py +301 -0
- gitflow_analytics/tui/widgets/progress_widget.py +187 -0
- gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
- gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
- gitflow_analytics-1.0.1.dist-info/METADATA +0 -463
- gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
|
@@ -1,167 +1,338 @@
|
|
|
1
1
|
"""GitHub API integration for PR and issue enrichment."""
|
|
2
|
+
|
|
2
3
|
import time
|
|
3
4
|
from datetime import datetime, timezone
|
|
4
|
-
from typing import Any,
|
|
5
|
+
from typing import Any, Optional
|
|
5
6
|
|
|
6
7
|
from github import Github
|
|
7
8
|
from github.GithubException import RateLimitExceededException, UnknownObjectException
|
|
8
9
|
|
|
9
10
|
from ..core.cache import GitAnalysisCache
|
|
11
|
+
from ..core.schema_version import create_schema_manager
|
|
10
12
|
|
|
11
13
|
|
|
12
14
|
class GitHubIntegration:
|
|
13
15
|
"""Integrate with GitHub API for PR and issue data."""
|
|
14
|
-
|
|
15
|
-
def __init__(
|
|
16
|
-
|
|
17
|
-
|
|
16
|
+
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
token: str,
|
|
20
|
+
cache: GitAnalysisCache,
|
|
21
|
+
rate_limit_retries: int = 3,
|
|
22
|
+
backoff_factor: int = 2,
|
|
23
|
+
allowed_ticket_platforms: Optional[list[str]] = None,
|
|
24
|
+
):
|
|
18
25
|
"""Initialize GitHub integration."""
|
|
19
26
|
self.github = Github(token)
|
|
20
27
|
self.cache = cache
|
|
21
28
|
self.rate_limit_retries = rate_limit_retries
|
|
22
29
|
self.backoff_factor = backoff_factor
|
|
23
30
|
self.allowed_ticket_platforms = allowed_ticket_platforms
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
31
|
+
|
|
32
|
+
# Initialize schema version manager for incremental API data fetching
|
|
33
|
+
self.schema_manager = create_schema_manager(cache.cache_dir)
|
|
34
|
+
|
|
35
|
+
def _get_incremental_fetch_date(
|
|
36
|
+
self, component: str, requested_since: datetime, config: dict[str, Any]
|
|
37
|
+
) -> datetime:
|
|
38
|
+
"""Determine the actual fetch date based on schema versioning."""
|
|
39
|
+
# Ensure requested_since is timezone-aware
|
|
40
|
+
if requested_since.tzinfo is None:
|
|
41
|
+
requested_since = requested_since.replace(tzinfo=timezone.utc)
|
|
42
|
+
|
|
43
|
+
# Check if schema has changed
|
|
44
|
+
if self.schema_manager.has_schema_changed(component, config):
|
|
45
|
+
print(
|
|
46
|
+
f" 🔄 {component.title()} API schema changed, fetching all data since {requested_since}"
|
|
47
|
+
)
|
|
48
|
+
return requested_since
|
|
49
|
+
|
|
50
|
+
# Get last processed date
|
|
51
|
+
last_processed = self.schema_manager.get_last_processed_date(component)
|
|
52
|
+
if not last_processed:
|
|
53
|
+
print(f" 📥 First {component} API fetch, getting data since {requested_since}")
|
|
54
|
+
return requested_since
|
|
55
|
+
|
|
56
|
+
# Ensure last_processed is timezone-aware
|
|
57
|
+
if last_processed.tzinfo is None:
|
|
58
|
+
last_processed = last_processed.replace(tzinfo=timezone.utc)
|
|
59
|
+
|
|
60
|
+
# Use the later of the two dates (don't go backwards)
|
|
61
|
+
fetch_since = max(last_processed, requested_since)
|
|
62
|
+
|
|
63
|
+
if fetch_since > requested_since:
|
|
64
|
+
print(f" ⚡ {component.title()} incremental fetch since {fetch_since}")
|
|
65
|
+
else:
|
|
66
|
+
print(f" 📥 {component.title()} full fetch since {requested_since}")
|
|
67
|
+
|
|
68
|
+
return fetch_since
|
|
69
|
+
|
|
70
|
+
def enrich_repository_with_prs(
|
|
71
|
+
self, repo_name: str, commits: list[dict[str, Any]], since: datetime
|
|
72
|
+
) -> list[dict[str, Any]]:
|
|
73
|
+
"""Enrich repository commits with PR data using incremental fetching."""
|
|
28
74
|
try:
|
|
29
75
|
repo = self.github.get_repo(repo_name)
|
|
30
76
|
except UnknownObjectException:
|
|
31
77
|
print(f" ⚠️ GitHub repo not found: {repo_name}")
|
|
32
78
|
return []
|
|
33
|
-
|
|
34
|
-
#
|
|
35
|
-
|
|
36
|
-
|
|
79
|
+
|
|
80
|
+
# Check if we need to fetch new PR data
|
|
81
|
+
github_config = {
|
|
82
|
+
"rate_limit_retries": self.rate_limit_retries,
|
|
83
|
+
"backoff_factor": self.backoff_factor,
|
|
84
|
+
"allowed_ticket_platforms": self.allowed_ticket_platforms,
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
# Determine the actual start date for fetching
|
|
88
|
+
fetch_since = self._get_incremental_fetch_date("github", since, github_config)
|
|
89
|
+
|
|
90
|
+
# Check cache first for existing PRs in this time period
|
|
91
|
+
cached_prs_data = self._get_cached_prs_bulk(repo_name, fetch_since)
|
|
92
|
+
|
|
93
|
+
# Get PRs for the time period (may be incremental)
|
|
94
|
+
prs = self._get_pull_requests(repo, fetch_since)
|
|
95
|
+
|
|
96
|
+
# Track cache performance
|
|
97
|
+
cached_pr_numbers = {pr["number"] for pr in cached_prs_data}
|
|
98
|
+
new_prs = [pr for pr in prs if pr.number not in cached_pr_numbers]
|
|
99
|
+
cache_hits = len(cached_prs_data)
|
|
100
|
+
cache_misses = len(new_prs)
|
|
101
|
+
|
|
102
|
+
if cache_hits > 0 or cache_misses > 0:
|
|
103
|
+
print(
|
|
104
|
+
f" 📊 GitHub PR cache: {cache_hits} hits, {cache_misses} misses ({cache_hits/(cache_hits+cache_misses)*100:.1f}% hit rate)"
|
|
105
|
+
if (cache_hits + cache_misses) > 0
|
|
106
|
+
else ""
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Update schema tracking after successful fetch
|
|
110
|
+
if prs:
|
|
111
|
+
self.schema_manager.mark_date_processed("github", since, github_config)
|
|
112
|
+
|
|
113
|
+
# Process new PRs and cache them
|
|
114
|
+
new_pr_data = []
|
|
115
|
+
for pr in new_prs:
|
|
116
|
+
pr_data = self._extract_pr_data(pr)
|
|
117
|
+
new_pr_data.append(pr_data)
|
|
118
|
+
|
|
119
|
+
# Bulk cache new PR data
|
|
120
|
+
if new_pr_data:
|
|
121
|
+
self._cache_prs_bulk(repo_name, new_pr_data)
|
|
122
|
+
print(f" 💾 Cached {len(new_pr_data)} new GitHub PRs")
|
|
123
|
+
|
|
124
|
+
# Combine cached and new PR data
|
|
125
|
+
all_pr_data = cached_prs_data + new_pr_data
|
|
126
|
+
|
|
37
127
|
# Build commit to PR mapping
|
|
38
128
|
commit_to_pr = {}
|
|
39
|
-
for
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
# Map commits to this PR
|
|
46
|
-
for commit in pr.get_commits():
|
|
47
|
-
commit_to_pr[commit.sha] = pr_data
|
|
48
|
-
|
|
129
|
+
for pr_data in all_pr_data:
|
|
130
|
+
# Map commits to this PR (need to get commit hashes from cached data)
|
|
131
|
+
for commit_hash in pr_data.get("commit_hashes", []):
|
|
132
|
+
commit_to_pr[commit_hash] = pr_data
|
|
133
|
+
|
|
49
134
|
# Enrich commits with PR data
|
|
50
135
|
enriched_prs = []
|
|
51
136
|
for commit in commits:
|
|
52
|
-
if commit[
|
|
53
|
-
pr_data = commit_to_pr[commit[
|
|
54
|
-
|
|
137
|
+
if commit["hash"] in commit_to_pr:
|
|
138
|
+
pr_data = commit_to_pr[commit["hash"]]
|
|
139
|
+
|
|
55
140
|
# Use PR story points if commit doesn't have them
|
|
56
|
-
if not commit.get(
|
|
57
|
-
commit[
|
|
58
|
-
|
|
141
|
+
if not commit.get("story_points") and pr_data.get("story_points"):
|
|
142
|
+
commit["story_points"] = pr_data["story_points"]
|
|
143
|
+
|
|
59
144
|
# Add PR reference
|
|
60
|
-
commit[
|
|
61
|
-
commit[
|
|
62
|
-
|
|
145
|
+
commit["pr_number"] = pr_data["number"]
|
|
146
|
+
commit["pr_title"] = pr_data["title"]
|
|
147
|
+
|
|
63
148
|
# Add to PR list if not already there
|
|
64
149
|
if pr_data not in enriched_prs:
|
|
65
150
|
enriched_prs.append(pr_data)
|
|
66
|
-
|
|
151
|
+
|
|
67
152
|
return enriched_prs
|
|
68
|
-
|
|
69
|
-
def
|
|
153
|
+
|
|
154
|
+
def _get_cached_prs_bulk(self, repo_name: str, since: datetime) -> list[dict[str, Any]]:
|
|
155
|
+
"""Get cached PRs for a repository from the given date onwards.
|
|
156
|
+
|
|
157
|
+
WHY: Bulk PR cache lookups avoid redundant GitHub API calls and
|
|
158
|
+
significantly improve performance on repeated analysis runs.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
repo_name: GitHub repository name (e.g., "owner/repo")
|
|
162
|
+
since: Only return PRs merged after this date
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
List of cached PR data dictionaries
|
|
166
|
+
"""
|
|
167
|
+
cached_prs = []
|
|
168
|
+
with self.cache.get_session() as session:
|
|
169
|
+
from ..models.database import PullRequestCache
|
|
170
|
+
|
|
171
|
+
# Ensure since is timezone-aware for comparison
|
|
172
|
+
if since.tzinfo is None:
|
|
173
|
+
since = since.replace(tzinfo=timezone.utc)
|
|
174
|
+
|
|
175
|
+
cached_results = (
|
|
176
|
+
session.query(PullRequestCache)
|
|
177
|
+
.filter(
|
|
178
|
+
PullRequestCache.repo_path == repo_name,
|
|
179
|
+
PullRequestCache.merged_at >= since.replace(tzinfo=None), # Store as naive UTC
|
|
180
|
+
)
|
|
181
|
+
.all()
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
for cached_pr in cached_results:
|
|
185
|
+
if not self._is_pr_stale(cached_pr.cached_at):
|
|
186
|
+
pr_data = {
|
|
187
|
+
"number": cached_pr.pr_number,
|
|
188
|
+
"title": cached_pr.title or "",
|
|
189
|
+
"description": cached_pr.description or "",
|
|
190
|
+
"author": cached_pr.author or "",
|
|
191
|
+
"created_at": cached_pr.created_at,
|
|
192
|
+
"merged_at": cached_pr.merged_at,
|
|
193
|
+
"story_points": cached_pr.story_points or 0,
|
|
194
|
+
"labels": cached_pr.labels or [],
|
|
195
|
+
"commit_hashes": cached_pr.commit_hashes or [],
|
|
196
|
+
"ticket_references": [], # Would need additional extraction
|
|
197
|
+
"review_comments": 0, # Not stored in current schema
|
|
198
|
+
"changed_files": 0, # Not stored in current schema
|
|
199
|
+
"additions": 0, # Not stored in current schema
|
|
200
|
+
"deletions": 0, # Not stored in current schema
|
|
201
|
+
}
|
|
202
|
+
cached_prs.append(pr_data)
|
|
203
|
+
|
|
204
|
+
return cached_prs
|
|
205
|
+
|
|
206
|
+
def _cache_prs_bulk(self, repo_name: str, prs: list[dict[str, Any]]) -> None:
|
|
207
|
+
"""Cache multiple PRs in bulk for better performance.
|
|
208
|
+
|
|
209
|
+
WHY: Bulk caching is more efficient than individual cache operations,
|
|
210
|
+
reducing database overhead when caching many PRs from GitHub API.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
repo_name: GitHub repository name
|
|
214
|
+
prs: List of PR data dictionaries to cache
|
|
215
|
+
"""
|
|
216
|
+
if not prs:
|
|
217
|
+
return
|
|
218
|
+
|
|
219
|
+
for pr_data in prs:
|
|
220
|
+
# Use existing cache_pr method which handles upserts properly
|
|
221
|
+
self.cache.cache_pr(repo_name, pr_data)
|
|
222
|
+
|
|
223
|
+
def _is_pr_stale(self, cached_at: datetime) -> bool:
|
|
224
|
+
"""Check if cached PR data is stale based on cache TTL.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
cached_at: When the PR was cached
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
True if stale and should be refreshed, False if still fresh
|
|
231
|
+
"""
|
|
232
|
+
from datetime import timedelta
|
|
233
|
+
|
|
234
|
+
if self.cache.ttl_hours == 0: # No expiration
|
|
235
|
+
return False
|
|
236
|
+
|
|
237
|
+
stale_threshold = datetime.utcnow() - timedelta(hours=self.cache.ttl_hours)
|
|
238
|
+
return cached_at < stale_threshold
|
|
239
|
+
|
|
240
|
+
def _get_pull_requests(self, repo, since: datetime) -> list[Any]:
|
|
70
241
|
"""Get pull requests with rate limit handling."""
|
|
71
242
|
prs = []
|
|
72
|
-
|
|
243
|
+
|
|
73
244
|
# Ensure since is timezone-aware for comparison with GitHub's timezone-aware datetimes
|
|
74
245
|
if since.tzinfo is None:
|
|
75
246
|
since = since.replace(tzinfo=timezone.utc)
|
|
76
|
-
|
|
247
|
+
|
|
77
248
|
for attempt in range(self.rate_limit_retries):
|
|
78
249
|
try:
|
|
79
250
|
# Get all PRs updated since the date
|
|
80
|
-
for pr in repo.get_pulls(state=
|
|
251
|
+
for pr in repo.get_pulls(state="all", sort="updated", direction="desc"):
|
|
81
252
|
if pr.updated_at < since:
|
|
82
253
|
break
|
|
83
|
-
|
|
254
|
+
|
|
84
255
|
# Only include PRs that were merged in our time period
|
|
85
256
|
if pr.merged and pr.merged_at >= since:
|
|
86
257
|
prs.append(pr)
|
|
87
|
-
|
|
258
|
+
|
|
88
259
|
return prs
|
|
89
|
-
|
|
260
|
+
|
|
90
261
|
except RateLimitExceededException:
|
|
91
262
|
if attempt < self.rate_limit_retries - 1:
|
|
92
|
-
wait_time = self.backoff_factor
|
|
263
|
+
wait_time = self.backoff_factor**attempt
|
|
93
264
|
print(f" ⏳ GitHub rate limit hit, waiting {wait_time}s...")
|
|
94
265
|
time.sleep(wait_time)
|
|
95
266
|
else:
|
|
96
267
|
print(" ❌ GitHub rate limit exceeded, skipping PR enrichment")
|
|
97
268
|
return []
|
|
98
|
-
|
|
269
|
+
|
|
99
270
|
return prs
|
|
100
|
-
|
|
101
|
-
def _extract_pr_data(self, pr) ->
|
|
271
|
+
|
|
272
|
+
def _extract_pr_data(self, pr) -> dict[str, Any]:
|
|
102
273
|
"""Extract relevant data from a GitHub PR object."""
|
|
103
274
|
from ..extractors.story_points import StoryPointExtractor
|
|
104
275
|
from ..extractors.tickets import TicketExtractor
|
|
105
|
-
|
|
276
|
+
|
|
106
277
|
sp_extractor = StoryPointExtractor()
|
|
107
278
|
ticket_extractor = TicketExtractor(allowed_platforms=self.allowed_ticket_platforms)
|
|
108
|
-
|
|
279
|
+
|
|
109
280
|
# Extract story points from PR title and body
|
|
110
281
|
pr_text = f"{pr.title} {pr.body or ''}"
|
|
111
282
|
story_points = sp_extractor.extract_from_text(pr_text)
|
|
112
|
-
|
|
283
|
+
|
|
113
284
|
# Extract ticket references
|
|
114
285
|
tickets = ticket_extractor.extract_from_text(pr_text)
|
|
115
|
-
|
|
286
|
+
|
|
116
287
|
# Get commit SHAs
|
|
117
288
|
commit_hashes = [c.sha for c in pr.get_commits()]
|
|
118
|
-
|
|
289
|
+
|
|
119
290
|
return {
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
291
|
+
"number": pr.number,
|
|
292
|
+
"title": pr.title,
|
|
293
|
+
"description": pr.body,
|
|
294
|
+
"author": pr.user.login,
|
|
295
|
+
"created_at": pr.created_at,
|
|
296
|
+
"merged_at": pr.merged_at,
|
|
297
|
+
"story_points": story_points,
|
|
298
|
+
"labels": [label.name for label in pr.labels],
|
|
299
|
+
"commit_hashes": commit_hashes,
|
|
300
|
+
"ticket_references": tickets,
|
|
301
|
+
"review_comments": pr.review_comments,
|
|
302
|
+
"changed_files": pr.changed_files,
|
|
303
|
+
"additions": pr.additions,
|
|
304
|
+
"deletions": pr.deletions,
|
|
134
305
|
}
|
|
135
|
-
|
|
136
|
-
def calculate_pr_metrics(self, prs:
|
|
306
|
+
|
|
307
|
+
def calculate_pr_metrics(self, prs: list[dict[str, Any]]) -> dict[str, Any]:
|
|
137
308
|
"""Calculate PR-level metrics."""
|
|
138
309
|
if not prs:
|
|
139
310
|
return {
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
311
|
+
"avg_pr_size": 0,
|
|
312
|
+
"avg_pr_lifetime_hours": 0,
|
|
313
|
+
"avg_files_per_pr": 0,
|
|
314
|
+
"total_review_comments": 0,
|
|
144
315
|
}
|
|
145
|
-
|
|
146
|
-
total_size = sum(pr[
|
|
147
|
-
total_files = sum(pr.get(
|
|
148
|
-
total_comments = sum(pr.get(
|
|
149
|
-
|
|
316
|
+
|
|
317
|
+
total_size = sum(pr["additions"] + pr["deletions"] for pr in prs)
|
|
318
|
+
total_files = sum(pr.get("changed_files", 0) for pr in prs)
|
|
319
|
+
total_comments = sum(pr.get("review_comments", 0) for pr in prs)
|
|
320
|
+
|
|
150
321
|
# Calculate average PR lifetime
|
|
151
322
|
lifetimes = []
|
|
152
323
|
for pr in prs:
|
|
153
|
-
if pr.get(
|
|
154
|
-
lifetime = (pr[
|
|
324
|
+
if pr.get("merged_at") and pr.get("created_at"):
|
|
325
|
+
lifetime = (pr["merged_at"] - pr["created_at"]).total_seconds() / 3600
|
|
155
326
|
lifetimes.append(lifetime)
|
|
156
|
-
|
|
327
|
+
|
|
157
328
|
avg_lifetime = sum(lifetimes) / len(lifetimes) if lifetimes else 0
|
|
158
|
-
|
|
329
|
+
|
|
159
330
|
return {
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
}
|
|
331
|
+
"total_prs": len(prs),
|
|
332
|
+
"avg_pr_size": total_size / len(prs),
|
|
333
|
+
"avg_pr_lifetime_hours": avg_lifetime,
|
|
334
|
+
"avg_files_per_pr": total_files / len(prs),
|
|
335
|
+
"total_review_comments": total_comments,
|
|
336
|
+
"prs_with_story_points": sum(1 for pr in prs if pr.get("story_points")),
|
|
337
|
+
"story_point_coverage": sum(1 for pr in prs if pr.get("story_points")) / len(prs) * 100,
|
|
338
|
+
}
|