gitflow-analytics 1.3.6__py3-none-any.whl → 1.3.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
1
  """Version information for gitflow-analytics."""
2
2
 
3
- __version__ = "1.3.6"
3
+ __version__ = "1.3.11"
4
4
  __version_info__ = tuple(int(x) for x in __version__.split("."))
gitflow_analytics/cli.py CHANGED
@@ -1069,6 +1069,7 @@ def analyze(
1069
1069
 
1070
1070
  # Perform data fetch for repositories that need analysis
1071
1071
  from .core.data_fetcher import GitDataFetcher
1072
+ from .core.progress import get_progress_service
1072
1073
 
1073
1074
  data_fetcher = GitDataFetcher(
1074
1075
  cache=cache,
@@ -1083,74 +1084,105 @@ def analyze(
1083
1084
  orchestrator = IntegrationOrchestrator(cfg, cache)
1084
1085
  jira_integration = orchestrator.integrations.get("jira")
1085
1086
 
1087
+ # Get progress service for overall repository progress
1088
+ progress = get_progress_service()
1089
+
1086
1090
  # Fetch data for repositories that need analysis
1087
1091
  total_commits = 0
1088
1092
  total_tickets = 0
1089
1093
 
1090
- for repo_config in repos_needing_analysis:
1091
- try:
1092
- repo_path = Path(repo_config.path)
1093
- project_key = repo_config.project_key or repo_path.name
1094
-
1095
- # Progress callback for fetch
1096
- def progress_callback(message: str):
1097
- if display:
1098
- display.print_status(f" {message}", "info")
1099
-
1100
- # Fetch repository data
1101
- result = data_fetcher.fetch_repository_data(
1102
- repo_path=repo_path,
1103
- project_key=project_key,
1104
- weeks_back=weeks,
1105
- branch_patterns=getattr(repo_config, "branch_patterns", None),
1106
- jira_integration=jira_integration,
1107
- progress_callback=progress_callback,
1108
- start_date=start_date,
1109
- end_date=end_date,
1110
- )
1094
+ # Create top-level progress for all repositories
1095
+ with progress.progress(
1096
+ total=len(repos_needing_analysis),
1097
+ description="Processing repositories",
1098
+ unit="repos",
1099
+ ) as repos_progress_ctx:
1111
1100
 
1112
- total_commits += result["stats"]["total_commits"]
1113
- total_tickets += result["stats"]["unique_tickets"]
1101
+ for idx, repo_config in enumerate(repos_needing_analysis, 1):
1102
+ try:
1103
+ repo_path = Path(repo_config.path)
1104
+ project_key = repo_config.project_key or repo_path.name
1114
1105
 
1115
- if display:
1116
- display.print_status(
1117
- f" ✅ {project_key}: {result['stats']['total_commits']} commits, "
1118
- f"{result['stats']['unique_tickets']} tickets",
1119
- "success",
1106
+ # Update overall progress description
1107
+ progress.set_description(
1108
+ repos_progress_ctx,
1109
+ f"Repository {idx}/{len(repos_needing_analysis)}: {project_key}",
1120
1110
  )
1121
1111
 
1122
- # Mark repository analysis as complete
1123
- cache.mark_repository_analysis_complete(
1124
- repo_path=str(repo_path),
1125
- repo_name=repo_config.name,
1126
- project_key=project_key,
1127
- analysis_start=start_date,
1128
- analysis_end=end_date,
1129
- weeks_analyzed=weeks,
1130
- commit_count=result["stats"]["total_commits"],
1131
- ticket_count=result["stats"]["unique_tickets"],
1132
- config_hash=config_hash,
1133
- )
1112
+ # Progress callback for fetch
1113
+ def progress_callback(message: str):
1114
+ if display:
1115
+ display.print_status(f" {message}", "info")
1134
1116
 
1135
- except Exception as e:
1136
- if display:
1137
- display.print_status(
1138
- f" ❌ Error fetching {project_key}: {e}", "error"
1117
+ # Fetch repository data
1118
+ # For organization discovery, use branch patterns from analysis config
1119
+ # Default to ["*"] to analyze all branches when not specified
1120
+ branch_patterns = None
1121
+ if hasattr(cfg.analysis, "branch_patterns"):
1122
+ branch_patterns = cfg.analysis.branch_patterns
1123
+ elif cfg.github.organization:
1124
+ # For organization discovery, default to analyzing all branches
1125
+ branch_patterns = ["*"]
1126
+
1127
+ result = data_fetcher.fetch_repository_data(
1128
+ repo_path=repo_path,
1129
+ project_key=project_key,
1130
+ weeks_back=weeks,
1131
+ branch_patterns=branch_patterns,
1132
+ jira_integration=jira_integration,
1133
+ progress_callback=progress_callback,
1134
+ start_date=start_date,
1135
+ end_date=end_date,
1139
1136
  )
1140
- else:
1141
- click.echo(f" ❌ Error fetching {project_key}: {e}")
1142
1137
 
1143
- # Mark repository analysis as failed
1144
- with contextlib.suppress(Exception):
1145
- cache.mark_repository_analysis_failed(
1138
+ total_commits += result["stats"]["total_commits"]
1139
+ total_tickets += result["stats"]["unique_tickets"]
1140
+
1141
+ if display:
1142
+ display.print_status(
1143
+ f" ✅ {project_key}: {result['stats']['total_commits']} commits, "
1144
+ f"{result['stats']['unique_tickets']} tickets",
1145
+ "success",
1146
+ )
1147
+
1148
+ # Mark repository analysis as complete
1149
+ cache.mark_repository_analysis_complete(
1146
1150
  repo_path=str(repo_path),
1147
1151
  repo_name=repo_config.name,
1152
+ project_key=project_key,
1148
1153
  analysis_start=start_date,
1149
1154
  analysis_end=end_date,
1150
- error_message=str(e),
1155
+ weeks_analyzed=weeks,
1156
+ commit_count=result["stats"]["total_commits"],
1157
+ ticket_count=result["stats"]["unique_tickets"],
1151
1158
  config_hash=config_hash,
1152
1159
  )
1153
- continue
1160
+
1161
+ # Update overall repository progress
1162
+ progress.update(repos_progress_ctx)
1163
+
1164
+ except Exception as e:
1165
+ if display:
1166
+ display.print_status(
1167
+ f" ❌ Error fetching {project_key}: {e}", "error"
1168
+ )
1169
+ else:
1170
+ click.echo(f" ❌ Error fetching {project_key}: {e}")
1171
+
1172
+ # Mark repository analysis as failed
1173
+ with contextlib.suppress(Exception):
1174
+ cache.mark_repository_analysis_failed(
1175
+ repo_path=str(repo_path),
1176
+ repo_name=repo_config.name,
1177
+ analysis_start=start_date,
1178
+ analysis_end=end_date,
1179
+ error_message=str(e),
1180
+ config_hash=config_hash,
1181
+ )
1182
+
1183
+ # Update progress even on failure
1184
+ progress.update(repos_progress_ctx)
1185
+ continue
1154
1186
 
1155
1187
  if display:
1156
1188
  display.print_status(
@@ -1322,11 +1354,20 @@ def analyze(
1322
1354
  display.print_status(f" {message}", "info")
1323
1355
 
1324
1356
  # Fetch repository data
1357
+ # For organization discovery, use branch patterns from analysis config
1358
+ # Default to ["*"] to analyze all branches when not specified
1359
+ branch_patterns = None
1360
+ if hasattr(cfg.analysis, "branch_patterns"):
1361
+ branch_patterns = cfg.analysis.branch_patterns
1362
+ elif cfg.github.organization:
1363
+ # For organization discovery, default to analyzing all branches
1364
+ branch_patterns = ["*"]
1365
+
1325
1366
  result = data_fetcher.fetch_repository_data(
1326
1367
  repo_path=repo_path,
1327
1368
  project_key=project_key,
1328
1369
  weeks_back=weeks,
1329
- branch_patterns=getattr(repo_config, "branch_patterns", None),
1370
+ branch_patterns=branch_patterns,
1330
1371
  jira_integration=jira_integration,
1331
1372
  progress_callback=progress_callback,
1332
1373
  start_date=start_date,
@@ -3649,11 +3690,20 @@ def fetch(
3649
3690
  click.echo(f" {message}")
3650
3691
 
3651
3692
  # Fetch repository data
3693
+ # For organization discovery, use branch patterns from analysis config
3694
+ # Default to ["*"] to analyze all branches when not specified
3695
+ branch_patterns = None
3696
+ if hasattr(cfg.analysis, "branch_patterns"):
3697
+ branch_patterns = cfg.analysis.branch_patterns
3698
+ elif cfg.github.organization:
3699
+ # For organization discovery, default to analyzing all branches
3700
+ branch_patterns = ["*"]
3701
+
3652
3702
  result = data_fetcher.fetch_repository_data(
3653
3703
  repo_path=repo_path,
3654
3704
  project_key=project_key,
3655
3705
  weeks_back=weeks,
3656
- branch_patterns=getattr(repo_config, "branch_patterns", None),
3706
+ branch_patterns=branch_patterns,
3657
3707
  jira_integration=jira_integration,
3658
3708
  progress_callback=progress_callback,
3659
3709
  start_date=start_date,
@@ -555,6 +555,7 @@ class ConfigLoader:
555
555
  branch_mapping_rules=analysis_data.get("branch_mapping_rules", {}),
556
556
  ticket_platforms=analysis_data.get("ticket_platforms"),
557
557
  auto_identity_analysis=analysis_data.get("identity", {}).get("auto_analysis", True),
558
+ branch_patterns=analysis_data.get("branch_patterns"),
558
559
  branch_analysis=branch_analysis_config,
559
560
  ml_categorization=ml_categorization_config,
560
561
  commit_classification=commit_classification_config,
@@ -305,6 +305,9 @@ class AnalysisConfig:
305
305
  branch_mapping_rules: dict[str, list[str]] = field(default_factory=dict)
306
306
  ticket_platforms: Optional[list[str]] = None
307
307
  auto_identity_analysis: bool = True # Enable automatic identity analysis by default
308
+ branch_patterns: Optional[list[str]] = (
309
+ None # Branch patterns to analyze (e.g., ["*"] for all branches)
310
+ )
308
311
  branch_analysis: BranchAnalysisConfig = field(default_factory=BranchAnalysisConfig)
309
312
  ml_categorization: MLCategorization = field(default_factory=MLCategorization)
310
313
  commit_classification: CommitClassificationConfig = field(
@@ -5,11 +5,9 @@ focusing purely on data collection from Git repositories and ticket systems
5
5
  without performing any LLM-based classification.
6
6
  """
7
7
 
8
- import contextlib
9
8
  import logging
10
9
  import os
11
10
  import subprocess
12
- from collections import defaultdict
13
11
  from datetime import datetime, timedelta, timezone
14
12
  from pathlib import Path
15
13
  from typing import Any, Optional
@@ -117,37 +115,55 @@ class GitDataFetcher:
117
115
  f"🔍 DEBUG: Calculated date range from weeks_back: {start_date} to {end_date}"
118
116
  )
119
117
 
120
- # Step 1: Collect all commits organized by day
118
+ # Get progress service for top-level progress tracking
119
+ progress = get_progress_service()
120
+
121
+ # Step 1: Collect all commits organized by day with enhanced progress tracking
121
122
  logger.info("🔍 DEBUG: About to fetch commits by day")
122
123
  logger.info("Fetching commits organized by day...")
123
- daily_commits = self._fetch_commits_by_day(
124
- repo_path, project_key, start_date, end_date, branch_patterns, progress_callback
125
- )
126
- logger.info(f"🔍 DEBUG: Fetched {len(daily_commits)} days of commits")
127
-
128
- # Step 2: Extract and fetch all referenced tickets
129
- logger.info("🔍 DEBUG: About to extract ticket references")
130
- logger.info("Extracting ticket references...")
131
- ticket_ids = self._extract_all_ticket_references(daily_commits)
132
- logger.info(f"🔍 DEBUG: Extracted {len(ticket_ids)} ticket IDs")
133
-
134
- if jira_integration and ticket_ids:
135
- logger.info(f"Fetching {len(ticket_ids)} unique tickets from JIRA...")
136
- self._fetch_detailed_tickets(
137
- ticket_ids, jira_integration, project_key, progress_callback
124
+
125
+ # Create top-level progress for this repository
126
+ with progress.progress(
127
+ total=3, # Three main steps: fetch commits, extract tickets, store data
128
+ description=f"Processing {project_key}",
129
+ unit="steps",
130
+ ) as repo_progress_ctx:
131
+
132
+ # Step 1: Fetch commits
133
+ progress.set_description(repo_progress_ctx, f"{project_key}: Fetching commits")
134
+ daily_commits = self._fetch_commits_by_day(
135
+ repo_path, project_key, start_date, end_date, branch_patterns, progress_callback
138
136
  )
137
+ logger.info(f"🔍 DEBUG: Fetched {len(daily_commits)} days of commits")
138
+ progress.update(repo_progress_ctx)
139
+
140
+ # Step 2: Extract and fetch all referenced tickets
141
+ progress.set_description(repo_progress_ctx, f"{project_key}: Processing tickets")
142
+ logger.info("🔍 DEBUG: About to extract ticket references")
143
+ logger.info("Extracting ticket references...")
144
+ ticket_ids = self._extract_all_ticket_references(daily_commits)
145
+ logger.info(f"🔍 DEBUG: Extracted {len(ticket_ids)} ticket IDs")
146
+
147
+ if jira_integration and ticket_ids:
148
+ logger.info(f"Fetching {len(ticket_ids)} unique tickets from JIRA...")
149
+ self._fetch_detailed_tickets(
150
+ ticket_ids, jira_integration, project_key, progress_callback
151
+ )
139
152
 
140
- # Step 3: Store commit-ticket correlations
141
- logger.info("Building commit-ticket correlations...")
142
- correlations_created = self._build_commit_ticket_correlations(daily_commits, repo_path)
153
+ # Build commit-ticket correlations
154
+ logger.info("Building commit-ticket correlations...")
155
+ correlations_created = self._build_commit_ticket_correlations(daily_commits, repo_path)
156
+ progress.update(repo_progress_ctx)
143
157
 
144
- # Step 4: Store daily commit batches
145
- logger.info(
146
- f"🔍 DEBUG: About to store daily batches. Daily commits has {len(daily_commits)} days"
147
- )
148
- logger.info("Storing daily commit batches...")
149
- batches_created = self._store_daily_batches(daily_commits, repo_path, project_key)
150
- logger.info(f"🔍 DEBUG: Storage complete. Batches created: {batches_created}")
158
+ # Step 3: Store daily commit batches
159
+ progress.set_description(repo_progress_ctx, f"{project_key}: Storing data")
160
+ logger.info(
161
+ f"🔍 DEBUG: About to store daily batches. Daily commits has {len(daily_commits)} days"
162
+ )
163
+ logger.info("Storing daily commit batches...")
164
+ batches_created = self._store_daily_batches(daily_commits, repo_path, project_key)
165
+ logger.info(f"🔍 DEBUG: Storage complete. Batches created: {batches_created}")
166
+ progress.update(repo_progress_ctx)
151
167
 
152
168
  # CRITICAL FIX: Verify actual storage before reporting success
153
169
  session = self.database.get_session()
@@ -219,8 +235,7 @@ class GitDataFetcher:
219
235
  logger.error(f"Failed to open repository at {repo_path}: {e}")
220
236
  return {}
221
237
 
222
- # Collect commits from all relevant branches
223
- all_commits = []
238
+ # Get branches to analyze
224
239
  branches_to_analyze = self._get_branches_to_analyze(repo, branch_patterns)
225
240
 
226
241
  if not branches_to_analyze:
@@ -229,54 +244,98 @@ class GitDataFetcher:
229
244
 
230
245
  logger.info(f"Analyzing branches: {branches_to_analyze}")
231
246
 
232
- for branch_name in branches_to_analyze:
233
- try:
234
- branch_commits = list(
235
- repo.iter_commits(branch_name, since=start_date, until=end_date, reverse=False)
236
- )
247
+ # Calculate days to process
248
+ current_date = start_date.date()
249
+ end_date_only = end_date.date()
250
+ days_to_process = []
251
+ while current_date <= end_date_only:
252
+ days_to_process.append(current_date)
253
+ current_date += timedelta(days=1)
237
254
 
238
- logger.debug(
239
- f"Found {len(branch_commits)} commits in branch {branch_name} for date range"
240
- )
255
+ logger.info(
256
+ f"Processing {len(days_to_process)} days from {start_date.date()} to {end_date.date()}"
257
+ )
258
+
259
+ # Get progress service for nested progress tracking
260
+ progress = get_progress_service()
261
+
262
+ # Dictionary to store commits by day
263
+ daily_commits = {}
264
+ all_commit_hashes = set() # Track all hashes for deduplication
265
+
266
+ # Create nested progress for day-by-day processing
267
+ with progress.progress(
268
+ total=len(days_to_process),
269
+ description=f"Fetching commits for {project_key}",
270
+ unit="days",
271
+ nested=True,
272
+ ) as day_progress_ctx:
273
+
274
+ for day_date in days_to_process:
275
+ # Update description to show current day
276
+ day_str = day_date.strftime("%Y-%m-%d")
277
+ progress.set_description(day_progress_ctx, f"{project_key}: Processing {day_str}")
278
+
279
+ # Calculate day boundaries
280
+ day_start = datetime.combine(day_date, datetime.min.time(), tzinfo=timezone.utc)
281
+ day_end = datetime.combine(day_date, datetime.max.time(), tzinfo=timezone.utc)
241
282
 
242
- for commit in branch_commits:
243
- # Include merge commits like the original analyzer
244
- # The original analyzer marks merge commits with is_merge=True but doesn't skip them
283
+ day_commits = []
284
+ commits_found_today = 0
245
285
 
246
- # Extract commit data with full metadata
247
- commit_data = self._extract_commit_data(
248
- commit, branch_name, project_key, repo_path
286
+ # Process each branch for this specific day
287
+ for branch_name in branches_to_analyze:
288
+ try:
289
+ # Fetch commits for this specific day and branch
290
+ branch_commits = list(
291
+ repo.iter_commits(
292
+ branch_name, since=day_start, until=day_end, reverse=False
293
+ )
294
+ )
295
+
296
+ for commit in branch_commits:
297
+ # Skip if we've already processed this commit
298
+ if commit.hexsha in all_commit_hashes:
299
+ continue
300
+
301
+ # Extract commit data with full metadata
302
+ commit_data = self._extract_commit_data(
303
+ commit, branch_name, project_key, repo_path
304
+ )
305
+ if commit_data:
306
+ day_commits.append(commit_data)
307
+ all_commit_hashes.add(commit.hexsha)
308
+ commits_found_today += 1
309
+
310
+ except Exception as e:
311
+ logger.warning(
312
+ f"Error processing branch {branch_name} for day {day_str}: {e}"
313
+ )
314
+ continue
315
+
316
+ # Store commits for this day if any were found
317
+ if day_commits:
318
+ # Sort commits by timestamp
319
+ day_commits.sort(key=lambda c: c["timestamp"])
320
+ daily_commits[day_str] = day_commits
321
+
322
+ # Incremental caching - store commits for this day immediately
323
+ self._store_day_commits_incremental(
324
+ repo_path, day_str, day_commits, project_key
249
325
  )
250
- if commit_data:
251
- all_commits.append(commit_data)
252
326
 
253
- except Exception as e:
254
- logger.warning(f"Error processing branch {branch_name}: {e}")
255
- continue
256
-
257
- # Deduplicate commits (same commit may appear in multiple branches)
258
- seen_hashes = set()
259
- unique_commits = []
260
- for commit_data in all_commits:
261
- commit_hash = commit_data["commit_hash"]
262
- if commit_hash not in seen_hashes:
263
- seen_hashes.add(commit_hash)
264
- unique_commits.append(commit_data)
265
-
266
- # Organize commits by day
267
- daily_commits = defaultdict(list)
268
- for commit_data in unique_commits:
269
- # Convert timestamp to date key
270
- commit_date = commit_data["timestamp"].date()
271
- date_key = commit_date.strftime("%Y-%m-%d")
272
- daily_commits[date_key].append(commit_data)
273
-
274
- # Sort commits within each day by timestamp
275
- for date_key in daily_commits:
276
- daily_commits[date_key].sort(key=lambda c: c["timestamp"])
277
-
278
- logger.info(f"Collected {len(unique_commits)} commits across {len(daily_commits)} days")
279
- return dict(daily_commits)
327
+ logger.debug(f"Found {commits_found_today} commits on {day_str}")
328
+
329
+ # Update progress callback if provided
330
+ if progress_callback:
331
+ progress_callback(f"Processed {day_str}: {commits_found_today} commits")
332
+
333
+ # Update progress bar
334
+ progress.update(day_progress_ctx)
335
+
336
+ total_commits = sum(len(commits) for commits in daily_commits.values())
337
+ logger.info(f"Collected {total_commits} unique commits across {len(daily_commits)} days")
338
+ return daily_commits
280
339
 
281
340
  def _extract_commit_data(
282
341
  self, commit: Any, branch_name: str, project_key: str, repo_path: Path
@@ -373,114 +432,102 @@ class GitDataFetcher:
373
432
  """Get list of branches to analyze based on patterns.
374
433
 
375
434
  WHY: Robust branch detection that handles missing remotes, missing default branches,
376
- and provides good fallback behavior. Based on the approach used in the existing analyzer.
435
+ and provides good fallback behavior. When no patterns specified, analyzes ALL branches
436
+ to capture the complete development picture.
377
437
 
378
438
  DESIGN DECISION:
379
- - Try default branches first, fall back to all available branches
439
+ - When no patterns: analyze ALL accessible branches (not just main)
440
+ - When patterns specified: match against those patterns only
380
441
  - Handle missing remotes gracefully
381
442
  - Skip remote tracking branches to avoid duplicates
382
443
  - Use actual branch existence checking rather than assuming branches exist
383
444
  """
384
- if not branch_patterns:
385
- # Get all available branches (local branches preferred)
386
- available_branches = []
445
+ # Collect all available branches (local branches preferred)
446
+ available_branches = []
387
447
 
388
- # First, try local branches
389
- try:
390
- local_branches = [branch.name for branch in repo.branches]
391
- available_branches.extend(local_branches)
392
- logger.debug(f"Found local branches: {local_branches}")
393
- except Exception as e:
394
- logger.debug(f"Error getting local branches: {e}")
448
+ # First, try local branches
449
+ try:
450
+ local_branches = [branch.name for branch in repo.branches]
451
+ available_branches.extend(local_branches)
452
+ logger.debug(f"Found local branches: {local_branches}")
453
+ except Exception as e:
454
+ logger.debug(f"Error getting local branches: {e}")
395
455
 
396
- # If we have remotes, also consider remote branches (but clean the names)
456
+ # If we have remotes, also consider remote branches (but clean the names)
457
+ try:
458
+ if repo.remotes and hasattr(repo.remotes, "origin"):
459
+ remote_branches = [
460
+ ref.name.replace("origin/", "")
461
+ for ref in repo.remotes.origin.refs
462
+ if not ref.name.endswith("HEAD") # Skip HEAD ref
463
+ ]
464
+ # Only add remote branches that aren't already in local branches
465
+ for branch in remote_branches:
466
+ if branch not in available_branches:
467
+ available_branches.append(branch)
468
+ logger.debug(f"Found remote branches: {remote_branches}")
469
+ except Exception as e:
470
+ logger.debug(f"Error getting remote branches: {e}")
471
+
472
+ # If no branches found, fallback to trying common names directly
473
+ if not available_branches:
474
+ logger.warning("No branches found via normal detection, falling back to common names")
475
+ available_branches = ["main", "master", "develop", "dev"]
476
+
477
+ # Filter branches based on patterns if provided
478
+ if branch_patterns:
479
+ import fnmatch
480
+
481
+ matching_branches = []
482
+ for pattern in branch_patterns:
483
+ matching = [
484
+ branch for branch in available_branches if fnmatch.fnmatch(branch, pattern)
485
+ ]
486
+ matching_branches.extend(matching)
487
+ # Remove duplicates while preserving order
488
+ branches_to_test = list(dict.fromkeys(matching_branches))
489
+ else:
490
+ # No patterns specified - analyze ALL branches for complete coverage
491
+ branches_to_test = available_branches
492
+ logger.info(
493
+ f"No branch patterns specified - will analyze all {len(branches_to_test)} branches"
494
+ )
495
+
496
+ # Test that branches are actually accessible
497
+ accessible_branches = []
498
+ for branch in branches_to_test:
397
499
  try:
398
- if repo.remotes and hasattr(repo.remotes, "origin"):
399
- remote_branches = [
400
- ref.name.replace("origin/", "")
401
- for ref in repo.remotes.origin.refs
402
- if not ref.name.endswith("HEAD") # Skip HEAD ref
403
- ]
404
- # Only add remote branches that aren't already in local branches
405
- for branch in remote_branches:
406
- if branch not in available_branches:
407
- available_branches.append(branch)
408
- logger.debug(f"Found remote branches: {remote_branches}")
500
+ next(iter(repo.iter_commits(branch, max_count=1)), None)
501
+ accessible_branches.append(branch)
409
502
  except Exception as e:
410
- logger.debug(f"Error getting remote branches: {e}")
503
+ logger.debug(f"Branch {branch} not accessible: {e}")
411
504
 
412
- # If no branches found, fallback to trying common names directly
413
- if not available_branches:
414
- logger.warning(
415
- "No branches found via normal detection, falling back to common names"
416
- )
417
- available_branches = ["main", "master", "develop", "dev"]
418
-
419
- # Try default main branches first, in order of preference
505
+ if not accessible_branches:
506
+ # Last resort: try to find ANY working branch
507
+ logger.warning("No accessible branches found from patterns/default, trying fallback")
420
508
  main_branches = ["main", "master", "develop", "dev"]
421
509
  for branch in main_branches:
422
510
  if branch in available_branches:
423
- # Test that we can actually access this branch
424
511
  try:
425
- # Just try to get the commit object to verify branch exists and is accessible
426
512
  next(iter(repo.iter_commits(branch, max_count=1)), None)
427
- logger.info(f"Using main branch: {branch}")
513
+ logger.info(f"Using fallback main branch: {branch}")
428
514
  return [branch]
429
- except Exception as e:
430
- logger.debug(f"Branch {branch} exists but not accessible: {e}")
515
+ except Exception:
431
516
  continue
432
517
 
433
- # If no main branches work, try the first available branch that actually works
518
+ # Try any available branch
434
519
  for branch in available_branches:
435
520
  try:
436
521
  next(iter(repo.iter_commits(branch, max_count=1)), None)
437
522
  logger.info(f"Using fallback branch: {branch}")
438
523
  return [branch]
439
- except Exception as e:
440
- logger.debug(f"Branch {branch} not accessible: {e}")
524
+ except Exception:
441
525
  continue
442
526
 
443
- # Last resort: return empty list (will be handled gracefully by caller)
444
527
  logger.warning("No accessible branches found")
445
528
  return []
446
529
 
447
- # Use specified patterns - match against all available branches
448
- import fnmatch
449
-
450
- available_branches = []
451
-
452
- # Collect all branches (local and remote)
453
- with contextlib.suppress(Exception):
454
- available_branches.extend([branch.name for branch in repo.branches])
455
-
456
- try:
457
- if repo.remotes and hasattr(repo.remotes, "origin"):
458
- remote_branches = [
459
- ref.name.replace("origin/", "")
460
- for ref in repo.remotes.origin.refs
461
- if not ref.name.endswith("HEAD")
462
- ]
463
- for branch in remote_branches:
464
- if branch not in available_branches:
465
- available_branches.append(branch)
466
- except Exception:
467
- pass
468
-
469
- # Match patterns against available branches
470
- matching_branches = []
471
- for pattern in branch_patterns:
472
- matching = [branch for branch in available_branches if fnmatch.fnmatch(branch, pattern)]
473
- matching_branches.extend(matching)
474
-
475
- # Test that matched branches are actually accessible
476
- accessible_branches = []
477
- for branch in list(set(matching_branches)): # Remove duplicates
478
- try:
479
- next(iter(repo.iter_commits(branch, max_count=1)), None)
480
- accessible_branches.append(branch)
481
- except Exception as e:
482
- logger.debug(f"Matched branch {branch} not accessible: {e}")
483
-
530
+ logger.info(f"Will analyze {len(accessible_branches)} branches: {accessible_branches}")
484
531
  return accessible_branches
485
532
 
486
533
  def _update_repository(self, repo) -> bool:
@@ -516,7 +563,7 @@ class GitDataFetcher:
516
563
  # Run git fetch with timeout
517
564
  try:
518
565
  result = subprocess.run(
519
- ["git", "fetch", "--all", "--config", "credential.helper="],
566
+ ["git", "fetch", "--all"],
520
567
  cwd=repo.working_dir,
521
568
  env=env,
522
569
  capture_output=True,
@@ -553,7 +600,7 @@ class GitDataFetcher:
553
600
  # Pull latest changes using subprocess
554
601
  try:
555
602
  result = subprocess.run(
556
- ["git", "pull", "--config", "credential.helper="],
603
+ ["git", "pull"],
557
604
  cwd=repo.working_dir,
558
605
  env=env,
559
606
  capture_output=True,
@@ -1191,3 +1238,48 @@ class GitDataFetcher:
1191
1238
  logger.warning(f"Error calculating commit stats for {commit.hexsha[:8]}: {e}")
1192
1239
 
1193
1240
  return stats
1241
+
1242
+ def _store_day_commits_incremental(
1243
+ self, repo_path: Path, date_str: str, commits: list[dict[str, Any]], project_key: str
1244
+ ) -> None:
1245
+ """Store commits for a single day incrementally to enable progress tracking.
1246
+
1247
+ This method stores commits immediately after fetching them for a day,
1248
+ allowing for better progress tracking and recovery from interruptions.
1249
+
1250
+ Args:
1251
+ repo_path: Path to the repository
1252
+ date_str: Date string in YYYY-MM-DD format
1253
+ commits: List of commit data for the day
1254
+ project_key: Project identifier
1255
+ """
1256
+ try:
1257
+ # Transform commits to cache format
1258
+ cache_format_commits = []
1259
+ for commit in commits:
1260
+ cache_format_commit = {
1261
+ "hash": commit["commit_hash"],
1262
+ "author_name": commit.get("author_name", ""),
1263
+ "author_email": commit.get("author_email", ""),
1264
+ "message": commit.get("message", ""),
1265
+ "timestamp": commit["timestamp"],
1266
+ "branch": commit.get("branch", "main"),
1267
+ "is_merge": commit.get("is_merge", False),
1268
+ "files_changed_count": commit.get("files_changed_count", 0),
1269
+ "insertions": commit.get("lines_added", 0),
1270
+ "deletions": commit.get("lines_deleted", 0),
1271
+ "story_points": commit.get("story_points"),
1272
+ "ticket_references": commit.get("ticket_references", []),
1273
+ }
1274
+ cache_format_commits.append(cache_format_commit)
1275
+
1276
+ # Use bulk store for efficiency
1277
+ if cache_format_commits:
1278
+ bulk_stats = self.cache.bulk_store_commits(str(repo_path), cache_format_commits)
1279
+ logger.debug(
1280
+ f"Incrementally stored {bulk_stats['inserted']} commits for {date_str} "
1281
+ f"({bulk_stats['skipped']} already cached)"
1282
+ )
1283
+ except Exception as e:
1284
+ # Log error but don't fail - commits will be stored again in batch at the end
1285
+ logger.warning(f"Failed to incrementally store commits for {date_str}: {e}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gitflow-analytics
3
- Version: 1.3.6
3
+ Version: 1.3.11
4
4
  Summary: Analyze Git repositories for developer productivity insights
5
5
  Author-email: Bob Matyas <bobmatnyc@gmail.com>
6
6
  License: MIT
@@ -1,6 +1,6 @@
1
1
  gitflow_analytics/__init__.py,sha256=yN1dyAUu4l9qX-YNAGRItEf4RFFe-5GQiOntXPIfdxo,683
2
- gitflow_analytics/_version.py,sha256=9PnCSmhRIvnAdYvd_L43lJY5JxZ9voHOQOlNDFzIYB8,137
3
- gitflow_analytics/cli.py,sha256=RtXDOZlE3fq-xV0sWIvU_F1Slwr9Ma23CJNIdSrO8s0,207102
2
+ gitflow_analytics/_version.py,sha256=bXQpvOWe_1iZd8zp8muFij7XZbsWHLHtQWQK6axVbUc,138
3
+ gitflow_analytics/cli.py,sha256=TI1v_IqiR_DpMp6N7IGH9aZDNkk7qwzgEc4kLtWTM8k,210011
4
4
  gitflow_analytics/cli_rich.py,sha256=1Heeyadbqpn5d13jtI7jtcrpmbA0BmPY9lnMXrgSncI,19326
5
5
  gitflow_analytics/config.py,sha256=XRuxvzLWyn_ML7mDCcuZ9-YFNAEsnt33vIuWxQQ_jxg,1033
6
6
  gitflow_analytics/classification/__init__.py,sha256=p8shPUZpGaw7-ivhfAVrPDbSP2LrpvWC1WEsBJIg-PI,969
@@ -11,16 +11,16 @@ gitflow_analytics/classification/linguist_analyzer.py,sha256=HjLx9mM7hGXtrvMba6o
11
11
  gitflow_analytics/classification/model.py,sha256=2KbmFh9MpyvHMcNHbqwUTAAVLHHu3MiTfFIPyZSGa-8,16356
12
12
  gitflow_analytics/config/__init__.py,sha256=lzFOHsJGoeDHuu_NEgcSeUFwU0bgV3lnL9w0Pyc4FI0,1037
13
13
  gitflow_analytics/config/errors.py,sha256=IBKhAIwJ4gscZFnLDyE3jEp03wn2stPR7JQJXNSIfok,10386
14
- gitflow_analytics/config/loader.py,sha256=afdr_uQN5BDhK0iEIbUL4O3XbDVtCSEWZ-by_zp8E58,34172
14
+ gitflow_analytics/config/loader.py,sha256=oG6D6jEoVuK69a0060Oo2I9BEsZPs0jiE9YY7tzOv3Q,34238
15
15
  gitflow_analytics/config/profiles.py,sha256=yUjFAWW6uzOUdi5qlPE-QV9681HigyrLiSJFpL8X9A0,7967
16
16
  gitflow_analytics/config/repository.py,sha256=maptMAdCKDsuMAfoTAaTrMPVfVd_tKNLRenvuPe1-t4,4350
17
- gitflow_analytics/config/schema.py,sha256=NQO2KmaVDaCiPH6kwmMvAOw7cwufSjp3-Gpwl8-ox2U,14636
17
+ gitflow_analytics/config/schema.py,sha256=JOeu8VCgCvYs5B1oQuogQsRTO3UINdQlPNezyoGLoY4,14761
18
18
  gitflow_analytics/config/validator.py,sha256=l7AHjXYJ8wEmyA1rn2WiItZXtAiRb9YBLjFCDl53qKM,5907
19
19
  gitflow_analytics/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
20
  gitflow_analytics/core/analyzer.py,sha256=59kGObzjziOb8geFyZMKCUvWmo3hcXE0eTgrjYEc1XA,58736
21
21
  gitflow_analytics/core/branch_mapper.py,sha256=1L1ctrhTEqMZ61eS1nZRkcyaarLipeQgotw4HdXcSmM,7407
22
22
  gitflow_analytics/core/cache.py,sha256=O3I_1Jbuj3GcnUo6CBe0nEJ_8fxKY2wcxeq9sff-OhY,67807
23
- gitflow_analytics/core/data_fetcher.py,sha256=eroe7MKA6g9_3Uxs03DSXAYX65Rc6_JyLYlcRi11xPo,52987
23
+ gitflow_analytics/core/data_fetcher.py,sha256=eO06-3lQ5O4j5d9Df8Qs4wT-1bvlLcUKbsgNe7L1LTU,57341
24
24
  gitflow_analytics/core/identity.py,sha256=k7i-vcRJ2eiTU0_kYGY5QOhxcqnitibTTx7DVONW0kg,31237
25
25
  gitflow_analytics/core/metrics_storage.py,sha256=hNXVXjpAaPHYoBFUCj_qR-hs9g8PbQKux_5esyevNEQ,21199
26
26
  gitflow_analytics/core/progress.py,sha256=KUQU7ToX63JvPTm8RRy31OmnVeqzc8HfrdGpb2ZvtoY,12509
@@ -114,9 +114,9 @@ gitflow_analytics/tui/widgets/__init__.py,sha256=85l6vkJuRGJNvej-nUZZoNg562zl_1J
114
114
  gitflow_analytics/tui/widgets/data_table.py,sha256=8fGNG4m7H41vCid3QwCHJa7bd8qu_DKrDf22iCks3XA,8722
115
115
  gitflow_analytics/tui/widgets/export_modal.py,sha256=L-XKPOc6u-fow2TudPgDnC0kXZM1WZuGd_jahtV8lhg,10737
116
116
  gitflow_analytics/tui/widgets/progress_widget.py,sha256=Qny6Q1nU0Pr3aj4aHfXLaRjya9MH3rldR2HWYiaQyGE,6167
117
- gitflow_analytics-1.3.6.dist-info/licenses/LICENSE,sha256=xwvSwY1GYXpRpmbnFvvnbmMwpobnrdN9T821sGvjOY0,1066
118
- gitflow_analytics-1.3.6.dist-info/METADATA,sha256=lkK6bJEnq7bp50-CI5adwJtGQxP4ush1UeRdV9k_pak,34091
119
- gitflow_analytics-1.3.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
120
- gitflow_analytics-1.3.6.dist-info/entry_points.txt,sha256=a3y8HnfLOvK1QVOgAkDY6VQXXm3o9ZSQRZrpiaS3hEM,65
121
- gitflow_analytics-1.3.6.dist-info/top_level.txt,sha256=CQyxZXjKvpSB1kgqqtuE0PCRqfRsXZJL8JrYpJKtkrk,18
122
- gitflow_analytics-1.3.6.dist-info/RECORD,,
117
+ gitflow_analytics-1.3.11.dist-info/licenses/LICENSE,sha256=xwvSwY1GYXpRpmbnFvvnbmMwpobnrdN9T821sGvjOY0,1066
118
+ gitflow_analytics-1.3.11.dist-info/METADATA,sha256=jMKDbH-PaYIUhDMkUWO3NDVlOgE-dMoMu-pzEB9KpgI,34092
119
+ gitflow_analytics-1.3.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
120
+ gitflow_analytics-1.3.11.dist-info/entry_points.txt,sha256=a3y8HnfLOvK1QVOgAkDY6VQXXm3o9ZSQRZrpiaS3hEM,65
121
+ gitflow_analytics-1.3.11.dist-info/top_level.txt,sha256=CQyxZXjKvpSB1kgqqtuE0PCRqfRsXZJL8JrYpJKtkrk,18
122
+ gitflow_analytics-1.3.11.dist-info/RECORD,,