gitflow-analytics 1.3.6__py3-none-any.whl → 1.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/cli.py +103 -53
- gitflow_analytics/config/loader.py +1 -0
- gitflow_analytics/config/schema.py +3 -0
- gitflow_analytics/core/data_fetcher.py +246 -154
- {gitflow_analytics-1.3.6.dist-info → gitflow_analytics-1.3.11.dist-info}/METADATA +1 -1
- {gitflow_analytics-1.3.6.dist-info → gitflow_analytics-1.3.11.dist-info}/RECORD +11 -11
- {gitflow_analytics-1.3.6.dist-info → gitflow_analytics-1.3.11.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.3.6.dist-info → gitflow_analytics-1.3.11.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.3.6.dist-info → gitflow_analytics-1.3.11.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.3.6.dist-info → gitflow_analytics-1.3.11.dist-info}/top_level.txt +0 -0
gitflow_analytics/_version.py
CHANGED
gitflow_analytics/cli.py
CHANGED
|
@@ -1069,6 +1069,7 @@ def analyze(
|
|
|
1069
1069
|
|
|
1070
1070
|
# Perform data fetch for repositories that need analysis
|
|
1071
1071
|
from .core.data_fetcher import GitDataFetcher
|
|
1072
|
+
from .core.progress import get_progress_service
|
|
1072
1073
|
|
|
1073
1074
|
data_fetcher = GitDataFetcher(
|
|
1074
1075
|
cache=cache,
|
|
@@ -1083,74 +1084,105 @@ def analyze(
|
|
|
1083
1084
|
orchestrator = IntegrationOrchestrator(cfg, cache)
|
|
1084
1085
|
jira_integration = orchestrator.integrations.get("jira")
|
|
1085
1086
|
|
|
1087
|
+
# Get progress service for overall repository progress
|
|
1088
|
+
progress = get_progress_service()
|
|
1089
|
+
|
|
1086
1090
|
# Fetch data for repositories that need analysis
|
|
1087
1091
|
total_commits = 0
|
|
1088
1092
|
total_tickets = 0
|
|
1089
1093
|
|
|
1090
|
-
for
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
def progress_callback(message: str):
|
|
1097
|
-
if display:
|
|
1098
|
-
display.print_status(f" {message}", "info")
|
|
1099
|
-
|
|
1100
|
-
# Fetch repository data
|
|
1101
|
-
result = data_fetcher.fetch_repository_data(
|
|
1102
|
-
repo_path=repo_path,
|
|
1103
|
-
project_key=project_key,
|
|
1104
|
-
weeks_back=weeks,
|
|
1105
|
-
branch_patterns=getattr(repo_config, "branch_patterns", None),
|
|
1106
|
-
jira_integration=jira_integration,
|
|
1107
|
-
progress_callback=progress_callback,
|
|
1108
|
-
start_date=start_date,
|
|
1109
|
-
end_date=end_date,
|
|
1110
|
-
)
|
|
1094
|
+
# Create top-level progress for all repositories
|
|
1095
|
+
with progress.progress(
|
|
1096
|
+
total=len(repos_needing_analysis),
|
|
1097
|
+
description="Processing repositories",
|
|
1098
|
+
unit="repos",
|
|
1099
|
+
) as repos_progress_ctx:
|
|
1111
1100
|
|
|
1112
|
-
|
|
1113
|
-
|
|
1101
|
+
for idx, repo_config in enumerate(repos_needing_analysis, 1):
|
|
1102
|
+
try:
|
|
1103
|
+
repo_path = Path(repo_config.path)
|
|
1104
|
+
project_key = repo_config.project_key or repo_path.name
|
|
1114
1105
|
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
f"{
|
|
1119
|
-
"success",
|
|
1106
|
+
# Update overall progress description
|
|
1107
|
+
progress.set_description(
|
|
1108
|
+
repos_progress_ctx,
|
|
1109
|
+
f"Repository {idx}/{len(repos_needing_analysis)}: {project_key}",
|
|
1120
1110
|
)
|
|
1121
1111
|
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
project_key=project_key,
|
|
1127
|
-
analysis_start=start_date,
|
|
1128
|
-
analysis_end=end_date,
|
|
1129
|
-
weeks_analyzed=weeks,
|
|
1130
|
-
commit_count=result["stats"]["total_commits"],
|
|
1131
|
-
ticket_count=result["stats"]["unique_tickets"],
|
|
1132
|
-
config_hash=config_hash,
|
|
1133
|
-
)
|
|
1112
|
+
# Progress callback for fetch
|
|
1113
|
+
def progress_callback(message: str):
|
|
1114
|
+
if display:
|
|
1115
|
+
display.print_status(f" {message}", "info")
|
|
1134
1116
|
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1117
|
+
# Fetch repository data
|
|
1118
|
+
# For organization discovery, use branch patterns from analysis config
|
|
1119
|
+
# Default to ["*"] to analyze all branches when not specified
|
|
1120
|
+
branch_patterns = None
|
|
1121
|
+
if hasattr(cfg.analysis, "branch_patterns"):
|
|
1122
|
+
branch_patterns = cfg.analysis.branch_patterns
|
|
1123
|
+
elif cfg.github.organization:
|
|
1124
|
+
# For organization discovery, default to analyzing all branches
|
|
1125
|
+
branch_patterns = ["*"]
|
|
1126
|
+
|
|
1127
|
+
result = data_fetcher.fetch_repository_data(
|
|
1128
|
+
repo_path=repo_path,
|
|
1129
|
+
project_key=project_key,
|
|
1130
|
+
weeks_back=weeks,
|
|
1131
|
+
branch_patterns=branch_patterns,
|
|
1132
|
+
jira_integration=jira_integration,
|
|
1133
|
+
progress_callback=progress_callback,
|
|
1134
|
+
start_date=start_date,
|
|
1135
|
+
end_date=end_date,
|
|
1139
1136
|
)
|
|
1140
|
-
else:
|
|
1141
|
-
click.echo(f" ❌ Error fetching {project_key}: {e}")
|
|
1142
1137
|
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1138
|
+
total_commits += result["stats"]["total_commits"]
|
|
1139
|
+
total_tickets += result["stats"]["unique_tickets"]
|
|
1140
|
+
|
|
1141
|
+
if display:
|
|
1142
|
+
display.print_status(
|
|
1143
|
+
f" ✅ {project_key}: {result['stats']['total_commits']} commits, "
|
|
1144
|
+
f"{result['stats']['unique_tickets']} tickets",
|
|
1145
|
+
"success",
|
|
1146
|
+
)
|
|
1147
|
+
|
|
1148
|
+
# Mark repository analysis as complete
|
|
1149
|
+
cache.mark_repository_analysis_complete(
|
|
1146
1150
|
repo_path=str(repo_path),
|
|
1147
1151
|
repo_name=repo_config.name,
|
|
1152
|
+
project_key=project_key,
|
|
1148
1153
|
analysis_start=start_date,
|
|
1149
1154
|
analysis_end=end_date,
|
|
1150
|
-
|
|
1155
|
+
weeks_analyzed=weeks,
|
|
1156
|
+
commit_count=result["stats"]["total_commits"],
|
|
1157
|
+
ticket_count=result["stats"]["unique_tickets"],
|
|
1151
1158
|
config_hash=config_hash,
|
|
1152
1159
|
)
|
|
1153
|
-
|
|
1160
|
+
|
|
1161
|
+
# Update overall repository progress
|
|
1162
|
+
progress.update(repos_progress_ctx)
|
|
1163
|
+
|
|
1164
|
+
except Exception as e:
|
|
1165
|
+
if display:
|
|
1166
|
+
display.print_status(
|
|
1167
|
+
f" ❌ Error fetching {project_key}: {e}", "error"
|
|
1168
|
+
)
|
|
1169
|
+
else:
|
|
1170
|
+
click.echo(f" ❌ Error fetching {project_key}: {e}")
|
|
1171
|
+
|
|
1172
|
+
# Mark repository analysis as failed
|
|
1173
|
+
with contextlib.suppress(Exception):
|
|
1174
|
+
cache.mark_repository_analysis_failed(
|
|
1175
|
+
repo_path=str(repo_path),
|
|
1176
|
+
repo_name=repo_config.name,
|
|
1177
|
+
analysis_start=start_date,
|
|
1178
|
+
analysis_end=end_date,
|
|
1179
|
+
error_message=str(e),
|
|
1180
|
+
config_hash=config_hash,
|
|
1181
|
+
)
|
|
1182
|
+
|
|
1183
|
+
# Update progress even on failure
|
|
1184
|
+
progress.update(repos_progress_ctx)
|
|
1185
|
+
continue
|
|
1154
1186
|
|
|
1155
1187
|
if display:
|
|
1156
1188
|
display.print_status(
|
|
@@ -1322,11 +1354,20 @@ def analyze(
|
|
|
1322
1354
|
display.print_status(f" {message}", "info")
|
|
1323
1355
|
|
|
1324
1356
|
# Fetch repository data
|
|
1357
|
+
# For organization discovery, use branch patterns from analysis config
|
|
1358
|
+
# Default to ["*"] to analyze all branches when not specified
|
|
1359
|
+
branch_patterns = None
|
|
1360
|
+
if hasattr(cfg.analysis, "branch_patterns"):
|
|
1361
|
+
branch_patterns = cfg.analysis.branch_patterns
|
|
1362
|
+
elif cfg.github.organization:
|
|
1363
|
+
# For organization discovery, default to analyzing all branches
|
|
1364
|
+
branch_patterns = ["*"]
|
|
1365
|
+
|
|
1325
1366
|
result = data_fetcher.fetch_repository_data(
|
|
1326
1367
|
repo_path=repo_path,
|
|
1327
1368
|
project_key=project_key,
|
|
1328
1369
|
weeks_back=weeks,
|
|
1329
|
-
branch_patterns=
|
|
1370
|
+
branch_patterns=branch_patterns,
|
|
1330
1371
|
jira_integration=jira_integration,
|
|
1331
1372
|
progress_callback=progress_callback,
|
|
1332
1373
|
start_date=start_date,
|
|
@@ -3649,11 +3690,20 @@ def fetch(
|
|
|
3649
3690
|
click.echo(f" {message}")
|
|
3650
3691
|
|
|
3651
3692
|
# Fetch repository data
|
|
3693
|
+
# For organization discovery, use branch patterns from analysis config
|
|
3694
|
+
# Default to ["*"] to analyze all branches when not specified
|
|
3695
|
+
branch_patterns = None
|
|
3696
|
+
if hasattr(cfg.analysis, "branch_patterns"):
|
|
3697
|
+
branch_patterns = cfg.analysis.branch_patterns
|
|
3698
|
+
elif cfg.github.organization:
|
|
3699
|
+
# For organization discovery, default to analyzing all branches
|
|
3700
|
+
branch_patterns = ["*"]
|
|
3701
|
+
|
|
3652
3702
|
result = data_fetcher.fetch_repository_data(
|
|
3653
3703
|
repo_path=repo_path,
|
|
3654
3704
|
project_key=project_key,
|
|
3655
3705
|
weeks_back=weeks,
|
|
3656
|
-
branch_patterns=
|
|
3706
|
+
branch_patterns=branch_patterns,
|
|
3657
3707
|
jira_integration=jira_integration,
|
|
3658
3708
|
progress_callback=progress_callback,
|
|
3659
3709
|
start_date=start_date,
|
|
@@ -555,6 +555,7 @@ class ConfigLoader:
|
|
|
555
555
|
branch_mapping_rules=analysis_data.get("branch_mapping_rules", {}),
|
|
556
556
|
ticket_platforms=analysis_data.get("ticket_platforms"),
|
|
557
557
|
auto_identity_analysis=analysis_data.get("identity", {}).get("auto_analysis", True),
|
|
558
|
+
branch_patterns=analysis_data.get("branch_patterns"),
|
|
558
559
|
branch_analysis=branch_analysis_config,
|
|
559
560
|
ml_categorization=ml_categorization_config,
|
|
560
561
|
commit_classification=commit_classification_config,
|
|
@@ -305,6 +305,9 @@ class AnalysisConfig:
|
|
|
305
305
|
branch_mapping_rules: dict[str, list[str]] = field(default_factory=dict)
|
|
306
306
|
ticket_platforms: Optional[list[str]] = None
|
|
307
307
|
auto_identity_analysis: bool = True # Enable automatic identity analysis by default
|
|
308
|
+
branch_patterns: Optional[list[str]] = (
|
|
309
|
+
None # Branch patterns to analyze (e.g., ["*"] for all branches)
|
|
310
|
+
)
|
|
308
311
|
branch_analysis: BranchAnalysisConfig = field(default_factory=BranchAnalysisConfig)
|
|
309
312
|
ml_categorization: MLCategorization = field(default_factory=MLCategorization)
|
|
310
313
|
commit_classification: CommitClassificationConfig = field(
|
|
@@ -5,11 +5,9 @@ focusing purely on data collection from Git repositories and ticket systems
|
|
|
5
5
|
without performing any LLM-based classification.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
import contextlib
|
|
9
8
|
import logging
|
|
10
9
|
import os
|
|
11
10
|
import subprocess
|
|
12
|
-
from collections import defaultdict
|
|
13
11
|
from datetime import datetime, timedelta, timezone
|
|
14
12
|
from pathlib import Path
|
|
15
13
|
from typing import Any, Optional
|
|
@@ -117,37 +115,55 @@ class GitDataFetcher:
|
|
|
117
115
|
f"🔍 DEBUG: Calculated date range from weeks_back: {start_date} to {end_date}"
|
|
118
116
|
)
|
|
119
117
|
|
|
120
|
-
#
|
|
118
|
+
# Get progress service for top-level progress tracking
|
|
119
|
+
progress = get_progress_service()
|
|
120
|
+
|
|
121
|
+
# Step 1: Collect all commits organized by day with enhanced progress tracking
|
|
121
122
|
logger.info("🔍 DEBUG: About to fetch commits by day")
|
|
122
123
|
logger.info("Fetching commits organized by day...")
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
logger.info(f"Fetching {len(ticket_ids)} unique tickets from JIRA...")
|
|
136
|
-
self._fetch_detailed_tickets(
|
|
137
|
-
ticket_ids, jira_integration, project_key, progress_callback
|
|
124
|
+
|
|
125
|
+
# Create top-level progress for this repository
|
|
126
|
+
with progress.progress(
|
|
127
|
+
total=3, # Three main steps: fetch commits, extract tickets, store data
|
|
128
|
+
description=f"Processing {project_key}",
|
|
129
|
+
unit="steps",
|
|
130
|
+
) as repo_progress_ctx:
|
|
131
|
+
|
|
132
|
+
# Step 1: Fetch commits
|
|
133
|
+
progress.set_description(repo_progress_ctx, f"{project_key}: Fetching commits")
|
|
134
|
+
daily_commits = self._fetch_commits_by_day(
|
|
135
|
+
repo_path, project_key, start_date, end_date, branch_patterns, progress_callback
|
|
138
136
|
)
|
|
137
|
+
logger.info(f"🔍 DEBUG: Fetched {len(daily_commits)} days of commits")
|
|
138
|
+
progress.update(repo_progress_ctx)
|
|
139
|
+
|
|
140
|
+
# Step 2: Extract and fetch all referenced tickets
|
|
141
|
+
progress.set_description(repo_progress_ctx, f"{project_key}: Processing tickets")
|
|
142
|
+
logger.info("🔍 DEBUG: About to extract ticket references")
|
|
143
|
+
logger.info("Extracting ticket references...")
|
|
144
|
+
ticket_ids = self._extract_all_ticket_references(daily_commits)
|
|
145
|
+
logger.info(f"🔍 DEBUG: Extracted {len(ticket_ids)} ticket IDs")
|
|
146
|
+
|
|
147
|
+
if jira_integration and ticket_ids:
|
|
148
|
+
logger.info(f"Fetching {len(ticket_ids)} unique tickets from JIRA...")
|
|
149
|
+
self._fetch_detailed_tickets(
|
|
150
|
+
ticket_ids, jira_integration, project_key, progress_callback
|
|
151
|
+
)
|
|
139
152
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
153
|
+
# Build commit-ticket correlations
|
|
154
|
+
logger.info("Building commit-ticket correlations...")
|
|
155
|
+
correlations_created = self._build_commit_ticket_correlations(daily_commits, repo_path)
|
|
156
|
+
progress.update(repo_progress_ctx)
|
|
143
157
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
158
|
+
# Step 3: Store daily commit batches
|
|
159
|
+
progress.set_description(repo_progress_ctx, f"{project_key}: Storing data")
|
|
160
|
+
logger.info(
|
|
161
|
+
f"🔍 DEBUG: About to store daily batches. Daily commits has {len(daily_commits)} days"
|
|
162
|
+
)
|
|
163
|
+
logger.info("Storing daily commit batches...")
|
|
164
|
+
batches_created = self._store_daily_batches(daily_commits, repo_path, project_key)
|
|
165
|
+
logger.info(f"🔍 DEBUG: Storage complete. Batches created: {batches_created}")
|
|
166
|
+
progress.update(repo_progress_ctx)
|
|
151
167
|
|
|
152
168
|
# CRITICAL FIX: Verify actual storage before reporting success
|
|
153
169
|
session = self.database.get_session()
|
|
@@ -219,8 +235,7 @@ class GitDataFetcher:
|
|
|
219
235
|
logger.error(f"Failed to open repository at {repo_path}: {e}")
|
|
220
236
|
return {}
|
|
221
237
|
|
|
222
|
-
#
|
|
223
|
-
all_commits = []
|
|
238
|
+
# Get branches to analyze
|
|
224
239
|
branches_to_analyze = self._get_branches_to_analyze(repo, branch_patterns)
|
|
225
240
|
|
|
226
241
|
if not branches_to_analyze:
|
|
@@ -229,54 +244,98 @@ class GitDataFetcher:
|
|
|
229
244
|
|
|
230
245
|
logger.info(f"Analyzing branches: {branches_to_analyze}")
|
|
231
246
|
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
247
|
+
# Calculate days to process
|
|
248
|
+
current_date = start_date.date()
|
|
249
|
+
end_date_only = end_date.date()
|
|
250
|
+
days_to_process = []
|
|
251
|
+
while current_date <= end_date_only:
|
|
252
|
+
days_to_process.append(current_date)
|
|
253
|
+
current_date += timedelta(days=1)
|
|
237
254
|
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
255
|
+
logger.info(
|
|
256
|
+
f"Processing {len(days_to_process)} days from {start_date.date()} to {end_date.date()}"
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
# Get progress service for nested progress tracking
|
|
260
|
+
progress = get_progress_service()
|
|
261
|
+
|
|
262
|
+
# Dictionary to store commits by day
|
|
263
|
+
daily_commits = {}
|
|
264
|
+
all_commit_hashes = set() # Track all hashes for deduplication
|
|
265
|
+
|
|
266
|
+
# Create nested progress for day-by-day processing
|
|
267
|
+
with progress.progress(
|
|
268
|
+
total=len(days_to_process),
|
|
269
|
+
description=f"Fetching commits for {project_key}",
|
|
270
|
+
unit="days",
|
|
271
|
+
nested=True,
|
|
272
|
+
) as day_progress_ctx:
|
|
273
|
+
|
|
274
|
+
for day_date in days_to_process:
|
|
275
|
+
# Update description to show current day
|
|
276
|
+
day_str = day_date.strftime("%Y-%m-%d")
|
|
277
|
+
progress.set_description(day_progress_ctx, f"{project_key}: Processing {day_str}")
|
|
278
|
+
|
|
279
|
+
# Calculate day boundaries
|
|
280
|
+
day_start = datetime.combine(day_date, datetime.min.time(), tzinfo=timezone.utc)
|
|
281
|
+
day_end = datetime.combine(day_date, datetime.max.time(), tzinfo=timezone.utc)
|
|
241
282
|
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
# The original analyzer marks merge commits with is_merge=True but doesn't skip them
|
|
283
|
+
day_commits = []
|
|
284
|
+
commits_found_today = 0
|
|
245
285
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
286
|
+
# Process each branch for this specific day
|
|
287
|
+
for branch_name in branches_to_analyze:
|
|
288
|
+
try:
|
|
289
|
+
# Fetch commits for this specific day and branch
|
|
290
|
+
branch_commits = list(
|
|
291
|
+
repo.iter_commits(
|
|
292
|
+
branch_name, since=day_start, until=day_end, reverse=False
|
|
293
|
+
)
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
for commit in branch_commits:
|
|
297
|
+
# Skip if we've already processed this commit
|
|
298
|
+
if commit.hexsha in all_commit_hashes:
|
|
299
|
+
continue
|
|
300
|
+
|
|
301
|
+
# Extract commit data with full metadata
|
|
302
|
+
commit_data = self._extract_commit_data(
|
|
303
|
+
commit, branch_name, project_key, repo_path
|
|
304
|
+
)
|
|
305
|
+
if commit_data:
|
|
306
|
+
day_commits.append(commit_data)
|
|
307
|
+
all_commit_hashes.add(commit.hexsha)
|
|
308
|
+
commits_found_today += 1
|
|
309
|
+
|
|
310
|
+
except Exception as e:
|
|
311
|
+
logger.warning(
|
|
312
|
+
f"Error processing branch {branch_name} for day {day_str}: {e}"
|
|
313
|
+
)
|
|
314
|
+
continue
|
|
315
|
+
|
|
316
|
+
# Store commits for this day if any were found
|
|
317
|
+
if day_commits:
|
|
318
|
+
# Sort commits by timestamp
|
|
319
|
+
day_commits.sort(key=lambda c: c["timestamp"])
|
|
320
|
+
daily_commits[day_str] = day_commits
|
|
321
|
+
|
|
322
|
+
# Incremental caching - store commits for this day immediately
|
|
323
|
+
self._store_day_commits_incremental(
|
|
324
|
+
repo_path, day_str, day_commits, project_key
|
|
249
325
|
)
|
|
250
|
-
if commit_data:
|
|
251
|
-
all_commits.append(commit_data)
|
|
252
326
|
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
# Organize commits by day
|
|
267
|
-
daily_commits = defaultdict(list)
|
|
268
|
-
for commit_data in unique_commits:
|
|
269
|
-
# Convert timestamp to date key
|
|
270
|
-
commit_date = commit_data["timestamp"].date()
|
|
271
|
-
date_key = commit_date.strftime("%Y-%m-%d")
|
|
272
|
-
daily_commits[date_key].append(commit_data)
|
|
273
|
-
|
|
274
|
-
# Sort commits within each day by timestamp
|
|
275
|
-
for date_key in daily_commits:
|
|
276
|
-
daily_commits[date_key].sort(key=lambda c: c["timestamp"])
|
|
277
|
-
|
|
278
|
-
logger.info(f"Collected {len(unique_commits)} commits across {len(daily_commits)} days")
|
|
279
|
-
return dict(daily_commits)
|
|
327
|
+
logger.debug(f"Found {commits_found_today} commits on {day_str}")
|
|
328
|
+
|
|
329
|
+
# Update progress callback if provided
|
|
330
|
+
if progress_callback:
|
|
331
|
+
progress_callback(f"Processed {day_str}: {commits_found_today} commits")
|
|
332
|
+
|
|
333
|
+
# Update progress bar
|
|
334
|
+
progress.update(day_progress_ctx)
|
|
335
|
+
|
|
336
|
+
total_commits = sum(len(commits) for commits in daily_commits.values())
|
|
337
|
+
logger.info(f"Collected {total_commits} unique commits across {len(daily_commits)} days")
|
|
338
|
+
return daily_commits
|
|
280
339
|
|
|
281
340
|
def _extract_commit_data(
|
|
282
341
|
self, commit: Any, branch_name: str, project_key: str, repo_path: Path
|
|
@@ -373,114 +432,102 @@ class GitDataFetcher:
|
|
|
373
432
|
"""Get list of branches to analyze based on patterns.
|
|
374
433
|
|
|
375
434
|
WHY: Robust branch detection that handles missing remotes, missing default branches,
|
|
376
|
-
and provides good fallback behavior.
|
|
435
|
+
and provides good fallback behavior. When no patterns specified, analyzes ALL branches
|
|
436
|
+
to capture the complete development picture.
|
|
377
437
|
|
|
378
438
|
DESIGN DECISION:
|
|
379
|
-
-
|
|
439
|
+
- When no patterns: analyze ALL accessible branches (not just main)
|
|
440
|
+
- When patterns specified: match against those patterns only
|
|
380
441
|
- Handle missing remotes gracefully
|
|
381
442
|
- Skip remote tracking branches to avoid duplicates
|
|
382
443
|
- Use actual branch existence checking rather than assuming branches exist
|
|
383
444
|
"""
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
available_branches = []
|
|
445
|
+
# Collect all available branches (local branches preferred)
|
|
446
|
+
available_branches = []
|
|
387
447
|
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
448
|
+
# First, try local branches
|
|
449
|
+
try:
|
|
450
|
+
local_branches = [branch.name for branch in repo.branches]
|
|
451
|
+
available_branches.extend(local_branches)
|
|
452
|
+
logger.debug(f"Found local branches: {local_branches}")
|
|
453
|
+
except Exception as e:
|
|
454
|
+
logger.debug(f"Error getting local branches: {e}")
|
|
395
455
|
|
|
396
|
-
|
|
456
|
+
# If we have remotes, also consider remote branches (but clean the names)
|
|
457
|
+
try:
|
|
458
|
+
if repo.remotes and hasattr(repo.remotes, "origin"):
|
|
459
|
+
remote_branches = [
|
|
460
|
+
ref.name.replace("origin/", "")
|
|
461
|
+
for ref in repo.remotes.origin.refs
|
|
462
|
+
if not ref.name.endswith("HEAD") # Skip HEAD ref
|
|
463
|
+
]
|
|
464
|
+
# Only add remote branches that aren't already in local branches
|
|
465
|
+
for branch in remote_branches:
|
|
466
|
+
if branch not in available_branches:
|
|
467
|
+
available_branches.append(branch)
|
|
468
|
+
logger.debug(f"Found remote branches: {remote_branches}")
|
|
469
|
+
except Exception as e:
|
|
470
|
+
logger.debug(f"Error getting remote branches: {e}")
|
|
471
|
+
|
|
472
|
+
# If no branches found, fallback to trying common names directly
|
|
473
|
+
if not available_branches:
|
|
474
|
+
logger.warning("No branches found via normal detection, falling back to common names")
|
|
475
|
+
available_branches = ["main", "master", "develop", "dev"]
|
|
476
|
+
|
|
477
|
+
# Filter branches based on patterns if provided
|
|
478
|
+
if branch_patterns:
|
|
479
|
+
import fnmatch
|
|
480
|
+
|
|
481
|
+
matching_branches = []
|
|
482
|
+
for pattern in branch_patterns:
|
|
483
|
+
matching = [
|
|
484
|
+
branch for branch in available_branches if fnmatch.fnmatch(branch, pattern)
|
|
485
|
+
]
|
|
486
|
+
matching_branches.extend(matching)
|
|
487
|
+
# Remove duplicates while preserving order
|
|
488
|
+
branches_to_test = list(dict.fromkeys(matching_branches))
|
|
489
|
+
else:
|
|
490
|
+
# No patterns specified - analyze ALL branches for complete coverage
|
|
491
|
+
branches_to_test = available_branches
|
|
492
|
+
logger.info(
|
|
493
|
+
f"No branch patterns specified - will analyze all {len(branches_to_test)} branches"
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
# Test that branches are actually accessible
|
|
497
|
+
accessible_branches = []
|
|
498
|
+
for branch in branches_to_test:
|
|
397
499
|
try:
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
ref.name.replace("origin/", "")
|
|
401
|
-
for ref in repo.remotes.origin.refs
|
|
402
|
-
if not ref.name.endswith("HEAD") # Skip HEAD ref
|
|
403
|
-
]
|
|
404
|
-
# Only add remote branches that aren't already in local branches
|
|
405
|
-
for branch in remote_branches:
|
|
406
|
-
if branch not in available_branches:
|
|
407
|
-
available_branches.append(branch)
|
|
408
|
-
logger.debug(f"Found remote branches: {remote_branches}")
|
|
500
|
+
next(iter(repo.iter_commits(branch, max_count=1)), None)
|
|
501
|
+
accessible_branches.append(branch)
|
|
409
502
|
except Exception as e:
|
|
410
|
-
logger.debug(f"
|
|
503
|
+
logger.debug(f"Branch {branch} not accessible: {e}")
|
|
411
504
|
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
"No branches found via normal detection, falling back to common names"
|
|
416
|
-
)
|
|
417
|
-
available_branches = ["main", "master", "develop", "dev"]
|
|
418
|
-
|
|
419
|
-
# Try default main branches first, in order of preference
|
|
505
|
+
if not accessible_branches:
|
|
506
|
+
# Last resort: try to find ANY working branch
|
|
507
|
+
logger.warning("No accessible branches found from patterns/default, trying fallback")
|
|
420
508
|
main_branches = ["main", "master", "develop", "dev"]
|
|
421
509
|
for branch in main_branches:
|
|
422
510
|
if branch in available_branches:
|
|
423
|
-
# Test that we can actually access this branch
|
|
424
511
|
try:
|
|
425
|
-
# Just try to get the commit object to verify branch exists and is accessible
|
|
426
512
|
next(iter(repo.iter_commits(branch, max_count=1)), None)
|
|
427
|
-
logger.info(f"Using main branch: {branch}")
|
|
513
|
+
logger.info(f"Using fallback main branch: {branch}")
|
|
428
514
|
return [branch]
|
|
429
|
-
except Exception
|
|
430
|
-
logger.debug(f"Branch {branch} exists but not accessible: {e}")
|
|
515
|
+
except Exception:
|
|
431
516
|
continue
|
|
432
517
|
|
|
433
|
-
#
|
|
518
|
+
# Try any available branch
|
|
434
519
|
for branch in available_branches:
|
|
435
520
|
try:
|
|
436
521
|
next(iter(repo.iter_commits(branch, max_count=1)), None)
|
|
437
522
|
logger.info(f"Using fallback branch: {branch}")
|
|
438
523
|
return [branch]
|
|
439
|
-
except Exception
|
|
440
|
-
logger.debug(f"Branch {branch} not accessible: {e}")
|
|
524
|
+
except Exception:
|
|
441
525
|
continue
|
|
442
526
|
|
|
443
|
-
# Last resort: return empty list (will be handled gracefully by caller)
|
|
444
527
|
logger.warning("No accessible branches found")
|
|
445
528
|
return []
|
|
446
529
|
|
|
447
|
-
|
|
448
|
-
import fnmatch
|
|
449
|
-
|
|
450
|
-
available_branches = []
|
|
451
|
-
|
|
452
|
-
# Collect all branches (local and remote)
|
|
453
|
-
with contextlib.suppress(Exception):
|
|
454
|
-
available_branches.extend([branch.name for branch in repo.branches])
|
|
455
|
-
|
|
456
|
-
try:
|
|
457
|
-
if repo.remotes and hasattr(repo.remotes, "origin"):
|
|
458
|
-
remote_branches = [
|
|
459
|
-
ref.name.replace("origin/", "")
|
|
460
|
-
for ref in repo.remotes.origin.refs
|
|
461
|
-
if not ref.name.endswith("HEAD")
|
|
462
|
-
]
|
|
463
|
-
for branch in remote_branches:
|
|
464
|
-
if branch not in available_branches:
|
|
465
|
-
available_branches.append(branch)
|
|
466
|
-
except Exception:
|
|
467
|
-
pass
|
|
468
|
-
|
|
469
|
-
# Match patterns against available branches
|
|
470
|
-
matching_branches = []
|
|
471
|
-
for pattern in branch_patterns:
|
|
472
|
-
matching = [branch for branch in available_branches if fnmatch.fnmatch(branch, pattern)]
|
|
473
|
-
matching_branches.extend(matching)
|
|
474
|
-
|
|
475
|
-
# Test that matched branches are actually accessible
|
|
476
|
-
accessible_branches = []
|
|
477
|
-
for branch in list(set(matching_branches)): # Remove duplicates
|
|
478
|
-
try:
|
|
479
|
-
next(iter(repo.iter_commits(branch, max_count=1)), None)
|
|
480
|
-
accessible_branches.append(branch)
|
|
481
|
-
except Exception as e:
|
|
482
|
-
logger.debug(f"Matched branch {branch} not accessible: {e}")
|
|
483
|
-
|
|
530
|
+
logger.info(f"Will analyze {len(accessible_branches)} branches: {accessible_branches}")
|
|
484
531
|
return accessible_branches
|
|
485
532
|
|
|
486
533
|
def _update_repository(self, repo) -> bool:
|
|
@@ -516,7 +563,7 @@ class GitDataFetcher:
|
|
|
516
563
|
# Run git fetch with timeout
|
|
517
564
|
try:
|
|
518
565
|
result = subprocess.run(
|
|
519
|
-
["git", "fetch", "--all"
|
|
566
|
+
["git", "fetch", "--all"],
|
|
520
567
|
cwd=repo.working_dir,
|
|
521
568
|
env=env,
|
|
522
569
|
capture_output=True,
|
|
@@ -553,7 +600,7 @@ class GitDataFetcher:
|
|
|
553
600
|
# Pull latest changes using subprocess
|
|
554
601
|
try:
|
|
555
602
|
result = subprocess.run(
|
|
556
|
-
["git", "pull"
|
|
603
|
+
["git", "pull"],
|
|
557
604
|
cwd=repo.working_dir,
|
|
558
605
|
env=env,
|
|
559
606
|
capture_output=True,
|
|
@@ -1191,3 +1238,48 @@ class GitDataFetcher:
|
|
|
1191
1238
|
logger.warning(f"Error calculating commit stats for {commit.hexsha[:8]}: {e}")
|
|
1192
1239
|
|
|
1193
1240
|
return stats
|
|
1241
|
+
|
|
1242
|
+
def _store_day_commits_incremental(
|
|
1243
|
+
self, repo_path: Path, date_str: str, commits: list[dict[str, Any]], project_key: str
|
|
1244
|
+
) -> None:
|
|
1245
|
+
"""Store commits for a single day incrementally to enable progress tracking.
|
|
1246
|
+
|
|
1247
|
+
This method stores commits immediately after fetching them for a day,
|
|
1248
|
+
allowing for better progress tracking and recovery from interruptions.
|
|
1249
|
+
|
|
1250
|
+
Args:
|
|
1251
|
+
repo_path: Path to the repository
|
|
1252
|
+
date_str: Date string in YYYY-MM-DD format
|
|
1253
|
+
commits: List of commit data for the day
|
|
1254
|
+
project_key: Project identifier
|
|
1255
|
+
"""
|
|
1256
|
+
try:
|
|
1257
|
+
# Transform commits to cache format
|
|
1258
|
+
cache_format_commits = []
|
|
1259
|
+
for commit in commits:
|
|
1260
|
+
cache_format_commit = {
|
|
1261
|
+
"hash": commit["commit_hash"],
|
|
1262
|
+
"author_name": commit.get("author_name", ""),
|
|
1263
|
+
"author_email": commit.get("author_email", ""),
|
|
1264
|
+
"message": commit.get("message", ""),
|
|
1265
|
+
"timestamp": commit["timestamp"],
|
|
1266
|
+
"branch": commit.get("branch", "main"),
|
|
1267
|
+
"is_merge": commit.get("is_merge", False),
|
|
1268
|
+
"files_changed_count": commit.get("files_changed_count", 0),
|
|
1269
|
+
"insertions": commit.get("lines_added", 0),
|
|
1270
|
+
"deletions": commit.get("lines_deleted", 0),
|
|
1271
|
+
"story_points": commit.get("story_points"),
|
|
1272
|
+
"ticket_references": commit.get("ticket_references", []),
|
|
1273
|
+
}
|
|
1274
|
+
cache_format_commits.append(cache_format_commit)
|
|
1275
|
+
|
|
1276
|
+
# Use bulk store for efficiency
|
|
1277
|
+
if cache_format_commits:
|
|
1278
|
+
bulk_stats = self.cache.bulk_store_commits(str(repo_path), cache_format_commits)
|
|
1279
|
+
logger.debug(
|
|
1280
|
+
f"Incrementally stored {bulk_stats['inserted']} commits for {date_str} "
|
|
1281
|
+
f"({bulk_stats['skipped']} already cached)"
|
|
1282
|
+
)
|
|
1283
|
+
except Exception as e:
|
|
1284
|
+
# Log error but don't fail - commits will be stored again in batch at the end
|
|
1285
|
+
logger.warning(f"Failed to incrementally store commits for {date_str}: {e}")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
gitflow_analytics/__init__.py,sha256=yN1dyAUu4l9qX-YNAGRItEf4RFFe-5GQiOntXPIfdxo,683
|
|
2
|
-
gitflow_analytics/_version.py,sha256=
|
|
3
|
-
gitflow_analytics/cli.py,sha256=
|
|
2
|
+
gitflow_analytics/_version.py,sha256=bXQpvOWe_1iZd8zp8muFij7XZbsWHLHtQWQK6axVbUc,138
|
|
3
|
+
gitflow_analytics/cli.py,sha256=TI1v_IqiR_DpMp6N7IGH9aZDNkk7qwzgEc4kLtWTM8k,210011
|
|
4
4
|
gitflow_analytics/cli_rich.py,sha256=1Heeyadbqpn5d13jtI7jtcrpmbA0BmPY9lnMXrgSncI,19326
|
|
5
5
|
gitflow_analytics/config.py,sha256=XRuxvzLWyn_ML7mDCcuZ9-YFNAEsnt33vIuWxQQ_jxg,1033
|
|
6
6
|
gitflow_analytics/classification/__init__.py,sha256=p8shPUZpGaw7-ivhfAVrPDbSP2LrpvWC1WEsBJIg-PI,969
|
|
@@ -11,16 +11,16 @@ gitflow_analytics/classification/linguist_analyzer.py,sha256=HjLx9mM7hGXtrvMba6o
|
|
|
11
11
|
gitflow_analytics/classification/model.py,sha256=2KbmFh9MpyvHMcNHbqwUTAAVLHHu3MiTfFIPyZSGa-8,16356
|
|
12
12
|
gitflow_analytics/config/__init__.py,sha256=lzFOHsJGoeDHuu_NEgcSeUFwU0bgV3lnL9w0Pyc4FI0,1037
|
|
13
13
|
gitflow_analytics/config/errors.py,sha256=IBKhAIwJ4gscZFnLDyE3jEp03wn2stPR7JQJXNSIfok,10386
|
|
14
|
-
gitflow_analytics/config/loader.py,sha256=
|
|
14
|
+
gitflow_analytics/config/loader.py,sha256=oG6D6jEoVuK69a0060Oo2I9BEsZPs0jiE9YY7tzOv3Q,34238
|
|
15
15
|
gitflow_analytics/config/profiles.py,sha256=yUjFAWW6uzOUdi5qlPE-QV9681HigyrLiSJFpL8X9A0,7967
|
|
16
16
|
gitflow_analytics/config/repository.py,sha256=maptMAdCKDsuMAfoTAaTrMPVfVd_tKNLRenvuPe1-t4,4350
|
|
17
|
-
gitflow_analytics/config/schema.py,sha256=
|
|
17
|
+
gitflow_analytics/config/schema.py,sha256=JOeu8VCgCvYs5B1oQuogQsRTO3UINdQlPNezyoGLoY4,14761
|
|
18
18
|
gitflow_analytics/config/validator.py,sha256=l7AHjXYJ8wEmyA1rn2WiItZXtAiRb9YBLjFCDl53qKM,5907
|
|
19
19
|
gitflow_analytics/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
20
|
gitflow_analytics/core/analyzer.py,sha256=59kGObzjziOb8geFyZMKCUvWmo3hcXE0eTgrjYEc1XA,58736
|
|
21
21
|
gitflow_analytics/core/branch_mapper.py,sha256=1L1ctrhTEqMZ61eS1nZRkcyaarLipeQgotw4HdXcSmM,7407
|
|
22
22
|
gitflow_analytics/core/cache.py,sha256=O3I_1Jbuj3GcnUo6CBe0nEJ_8fxKY2wcxeq9sff-OhY,67807
|
|
23
|
-
gitflow_analytics/core/data_fetcher.py,sha256=
|
|
23
|
+
gitflow_analytics/core/data_fetcher.py,sha256=eO06-3lQ5O4j5d9Df8Qs4wT-1bvlLcUKbsgNe7L1LTU,57341
|
|
24
24
|
gitflow_analytics/core/identity.py,sha256=k7i-vcRJ2eiTU0_kYGY5QOhxcqnitibTTx7DVONW0kg,31237
|
|
25
25
|
gitflow_analytics/core/metrics_storage.py,sha256=hNXVXjpAaPHYoBFUCj_qR-hs9g8PbQKux_5esyevNEQ,21199
|
|
26
26
|
gitflow_analytics/core/progress.py,sha256=KUQU7ToX63JvPTm8RRy31OmnVeqzc8HfrdGpb2ZvtoY,12509
|
|
@@ -114,9 +114,9 @@ gitflow_analytics/tui/widgets/__init__.py,sha256=85l6vkJuRGJNvej-nUZZoNg562zl_1J
|
|
|
114
114
|
gitflow_analytics/tui/widgets/data_table.py,sha256=8fGNG4m7H41vCid3QwCHJa7bd8qu_DKrDf22iCks3XA,8722
|
|
115
115
|
gitflow_analytics/tui/widgets/export_modal.py,sha256=L-XKPOc6u-fow2TudPgDnC0kXZM1WZuGd_jahtV8lhg,10737
|
|
116
116
|
gitflow_analytics/tui/widgets/progress_widget.py,sha256=Qny6Q1nU0Pr3aj4aHfXLaRjya9MH3rldR2HWYiaQyGE,6167
|
|
117
|
-
gitflow_analytics-1.3.
|
|
118
|
-
gitflow_analytics-1.3.
|
|
119
|
-
gitflow_analytics-1.3.
|
|
120
|
-
gitflow_analytics-1.3.
|
|
121
|
-
gitflow_analytics-1.3.
|
|
122
|
-
gitflow_analytics-1.3.
|
|
117
|
+
gitflow_analytics-1.3.11.dist-info/licenses/LICENSE,sha256=xwvSwY1GYXpRpmbnFvvnbmMwpobnrdN9T821sGvjOY0,1066
|
|
118
|
+
gitflow_analytics-1.3.11.dist-info/METADATA,sha256=jMKDbH-PaYIUhDMkUWO3NDVlOgE-dMoMu-pzEB9KpgI,34092
|
|
119
|
+
gitflow_analytics-1.3.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
120
|
+
gitflow_analytics-1.3.11.dist-info/entry_points.txt,sha256=a3y8HnfLOvK1QVOgAkDY6VQXXm3o9ZSQRZrpiaS3hEM,65
|
|
121
|
+
gitflow_analytics-1.3.11.dist-info/top_level.txt,sha256=CQyxZXjKvpSB1kgqqtuE0PCRqfRsXZJL8JrYpJKtkrk,18
|
|
122
|
+
gitflow_analytics-1.3.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|