gitflow-analytics 3.7.0__py3-none-any.whl → 3.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
1
  """Version information for gitflow-analytics."""
2
2
 
3
- __version__ = "3.7.0"
3
+ __version__ = "3.7.4"
4
4
  __version_info__ = tuple(int(x) for x in __version__.split("."))
gitflow_analytics/cli.py CHANGED
@@ -944,7 +944,7 @@ def analyze(
944
944
  data_fetcher = GitDataFetcher(
945
945
  cache=cache,
946
946
  branch_mapping_rules=cfg.analysis.branch_mapping_rules,
947
- allowed_ticket_platforms=cfg.analysis.ticket_platforms,
947
+ allowed_ticket_platforms=cfg.get_effective_ticket_platforms(),
948
948
  exclude_paths=cfg.analysis.exclude_paths,
949
949
  )
950
950
 
@@ -1135,9 +1135,7 @@ def analyze(
1135
1135
  analyzer = GitAnalyzer(
1136
1136
  cache,
1137
1137
  branch_mapping_rules=cfg.analysis.branch_mapping_rules,
1138
- allowed_ticket_platforms=getattr(
1139
- cfg.analysis, "ticket_platforms", ["jira", "github", "clickup", "linear"]
1140
- ),
1138
+ allowed_ticket_platforms=cfg.get_effective_ticket_platforms(),
1141
1139
  exclude_paths=cfg.analysis.exclude_paths,
1142
1140
  story_point_patterns=cfg.analysis.story_point_patterns,
1143
1141
  ml_categorization_config=ml_config,
@@ -1164,9 +1162,28 @@ def analyze(
1164
1162
  # Use a 'repos' directory in the config directory for cloned repositories
1165
1163
  config_dir = Path(config).parent if config else Path.cwd()
1166
1164
  repos_dir = config_dir / "repos"
1167
- discovered_repos = cfg.discover_organization_repositories(clone_base_path=repos_dir)
1165
+
1166
+ # Progress callback for repository discovery
1167
+ def discovery_progress(repo_name, count):
1168
+ if display and display._live:
1169
+ display.update_progress_task(
1170
+ "main",
1171
+ description=f"🔍 Discovering: {repo_name} ({count} repos checked)",
1172
+ completed=15 + min(count % 5, 4), # Show some movement
1173
+ )
1174
+ else:
1175
+ # Simple inline progress - just show count
1176
+ click.echo(f"\r 📦 Checking repositories... {count}", nl=False)
1177
+
1178
+ discovered_repos = cfg.discover_organization_repositories(
1179
+ clone_base_path=repos_dir, progress_callback=discovery_progress
1180
+ )
1168
1181
  repositories_to_analyze = discovered_repos
1169
1182
 
1183
+ # Clear the progress line
1184
+ if not (display and display._live):
1185
+ click.echo("\r" + " " * 60 + "\r", nl=False) # Clear line
1186
+
1170
1187
  if display and display._live:
1171
1188
  # We're in full-screen mode, update progress and initialize repo list
1172
1189
  display.update_progress_task(
@@ -1703,14 +1720,14 @@ def analyze(
1703
1720
  "📊 No commits or batches found for date range - proceeding with data fetch"
1704
1721
  )
1705
1722
 
1706
- # PROCEED WITH EMERGENCY FETCH if validation didn't pass
1723
+ # PROCEED WITH INITIAL FETCH if validation didn't pass
1707
1724
  if not validation_passed:
1708
1725
  if display:
1709
1726
  display.print_status(
1710
- "Data validation failed - running emergency data fetch", "warning"
1727
+ "Data validation failed - running initial data fetch", "warning"
1711
1728
  )
1712
1729
  else:
1713
- click.echo("⚠️ Data validation failed - running emergency data fetch")
1730
+ click.echo("⚠️ Data validation failed - running initial data fetch")
1714
1731
 
1715
1732
  # Force data fetch for all repositories since we have no batches
1716
1733
  repos_needing_analysis = repositories_to_analyze
@@ -1719,12 +1736,12 @@ def analyze(
1719
1736
  if repos_needing_analysis:
1720
1737
  if display:
1721
1738
  display.print_status(
1722
- f"Emergency fetch: Fetching data for {len(repos_needing_analysis)} repositories...",
1739
+ f"Initial fetch: Fetching data for {len(repos_needing_analysis)} repositories...",
1723
1740
  "info",
1724
1741
  )
1725
1742
  else:
1726
1743
  click.echo(
1727
- f"🚨 Emergency fetch: Fetching data for {len(repos_needing_analysis)} repositories..."
1744
+ f"🚨 Initial fetch: Fetching data for {len(repos_needing_analysis)} repositories..."
1728
1745
  )
1729
1746
  click.echo(
1730
1747
  " 📋 Reason: Need to ensure commits and batches exist for classification"
@@ -1760,89 +1777,163 @@ def analyze(
1760
1777
  # Check if repo exists, clone if needed (critical for organization mode)
1761
1778
  if not repo_path.exists():
1762
1779
  if repo_config.github_repo and cfg.github.organization:
1763
- if display:
1764
- display.print_status(
1765
- f" 📥 Cloning {repo_config.github_repo} from GitHub...",
1766
- "info",
1767
- )
1768
- else:
1769
- click.echo(f" 📥 Cloning {repo_config.github_repo} from GitHub...")
1770
- try:
1771
- # Ensure parent directory exists
1772
- repo_path.parent.mkdir(parents=True, exist_ok=True)
1773
-
1774
- # Build clone URL with authentication
1775
- clone_url = f"https://github.com/{repo_config.github_repo}.git"
1776
- if cfg.github.token:
1777
- clone_url = f"https://{cfg.github.token}@github.com/{repo_config.github_repo}.git"
1778
-
1779
- # Clone using subprocess for better control
1780
- env = os.environ.copy()
1781
- env["GIT_TERMINAL_PROMPT"] = "0"
1782
- env["GIT_ASKPASS"] = ""
1783
- env["GCM_INTERACTIVE"] = "never"
1784
-
1785
- cmd = ["git", "clone", "--config", "credential.helper="]
1786
- if repo_config.branch:
1787
- cmd.extend(["-b", repo_config.branch])
1788
- cmd.extend([clone_url, str(repo_path)])
1789
-
1790
- result = subprocess.run(
1791
- cmd,
1792
- env=env,
1793
- capture_output=True,
1794
- text=True,
1795
- timeout=30,
1796
- )
1780
+ # Retry logic for cloning
1781
+ max_retries = 2
1782
+ retry_count = 0
1783
+ clone_success = False
1784
+
1785
+ while retry_count <= max_retries and not clone_success:
1786
+ if retry_count > 0:
1787
+ if display:
1788
+ display.print_status(
1789
+ f" 🔄 Retry {retry_count}/{max_retries}: {repo_config.github_repo}",
1790
+ "warning",
1791
+ )
1792
+ else:
1793
+ click.echo(
1794
+ f" 🔄 Retry {retry_count}/{max_retries}: {repo_config.github_repo}"
1795
+ )
1796
+ else:
1797
+ if display:
1798
+ display.print_status(
1799
+ f" 📥 Cloning {repo_config.github_repo} from GitHub...",
1800
+ "info",
1801
+ )
1802
+ else:
1803
+ click.echo(
1804
+ f" 📥 Cloning {repo_config.github_repo} from GitHub..."
1805
+ )
1806
+
1807
+ try:
1808
+ # Ensure parent directory exists
1809
+ repo_path.parent.mkdir(parents=True, exist_ok=True)
1810
+
1811
+ # Build clone URL with authentication
1812
+ clone_url = (
1813
+ f"https://github.com/{repo_config.github_repo}.git"
1814
+ )
1815
+ if cfg.github.token:
1816
+ clone_url = f"https://{cfg.github.token}@github.com/{repo_config.github_repo}.git"
1817
+
1818
+ # Clone using subprocess for better control
1819
+ env = os.environ.copy()
1820
+ env["GIT_TERMINAL_PROMPT"] = "0"
1821
+ env["GIT_ASKPASS"] = ""
1822
+ env["GCM_INTERACTIVE"] = "never"
1823
+ env["GIT_PROGRESS"] = "1" # Force progress output
1824
+
1825
+ cmd = [
1826
+ "git",
1827
+ "clone",
1828
+ "--progress",
1829
+ "--config",
1830
+ "credential.helper=",
1831
+ ]
1832
+ if repo_config.branch:
1833
+ cmd.extend(["-b", repo_config.branch])
1834
+ cmd.extend([clone_url, str(repo_path)])
1835
+
1836
+ # Track start time for timeout reporting
1837
+ import time
1838
+
1839
+ start_time = time.time()
1840
+ timeout_seconds = 300 # 5 minutes for large repos
1841
+
1842
+ # Run without capturing stderr to show git progress
1843
+ result = subprocess.run(
1844
+ cmd,
1845
+ env=env,
1846
+ stdout=subprocess.PIPE,
1847
+ stderr=None, # Let stderr (progress) flow to terminal
1848
+ text=True,
1849
+ timeout=timeout_seconds,
1850
+ )
1797
1851
 
1798
- if result.returncode != 0:
1799
- error_msg = result.stderr or result.stdout
1800
- if any(
1801
- x in error_msg.lower()
1802
- for x in ["authentication", "permission denied", "401", "403"]
1803
- ):
1852
+ elapsed = time.time() - start_time
1853
+
1854
+ if result.returncode != 0:
1855
+ error_msg = "Clone failed"
1856
+ if any(
1857
+ x in error_msg.lower()
1858
+ for x in [
1859
+ "authentication",
1860
+ "permission denied",
1861
+ "401",
1862
+ "403",
1863
+ ]
1864
+ ):
1865
+ if display:
1866
+ display.print_status(
1867
+ f" ❌ Authentication failed for {repo_config.github_repo}",
1868
+ "error",
1869
+ )
1870
+ else:
1871
+ click.echo(
1872
+ f" ❌ Authentication failed for {repo_config.github_repo}"
1873
+ )
1874
+ break # Don't retry auth failures
1875
+ else:
1876
+ raise subprocess.CalledProcessError(
1877
+ result.returncode,
1878
+ cmd,
1879
+ result.stdout,
1880
+ result.stderr,
1881
+ )
1882
+ else:
1883
+ clone_success = True
1804
1884
  if display:
1805
1885
  display.print_status(
1806
- f" Authentication failed for {repo_config.github_repo}",
1807
- "error",
1886
+ f" Cloned {repo_config.github_repo} ({elapsed:.1f}s)",
1887
+ "success",
1808
1888
  )
1809
1889
  else:
1810
1890
  click.echo(
1811
- f" Authentication failed for {repo_config.github_repo}"
1891
+ f" Cloned {repo_config.github_repo} ({elapsed:.1f}s)"
1812
1892
  )
1813
- continue
1893
+
1894
+ except subprocess.TimeoutExpired:
1895
+ retry_count += 1
1896
+ if display:
1897
+ display.print_status(
1898
+ f" ⏱️ Clone timeout after {timeout_seconds}s: {repo_config.github_repo}",
1899
+ "error",
1900
+ )
1814
1901
  else:
1815
- raise subprocess.CalledProcessError(
1816
- result.returncode, cmd, result.stdout, result.stderr
1902
+ click.echo(
1903
+ f" ⏱️ Clone timeout after {timeout_seconds}s: {repo_config.github_repo}"
1817
1904
  )
1905
+ # Clean up partial clone
1906
+ if repo_path.exists():
1907
+ import shutil
1818
1908
 
1819
- if display:
1820
- display.print_status(
1821
- f" ✅ Cloned {repo_config.github_repo}", "success"
1822
- )
1823
- else:
1824
- click.echo(f" ✅ Cloned {repo_config.github_repo}")
1909
+ shutil.rmtree(repo_path, ignore_errors=True)
1910
+ if retry_count > max_retries:
1911
+ if display:
1912
+ display.print_status(
1913
+ f" ❌ Skipping {repo_config.github_repo} after {max_retries} timeouts",
1914
+ "error",
1915
+ )
1916
+ else:
1917
+ click.echo(
1918
+ f" ❌ Skipping {repo_config.github_repo} after {max_retries} timeouts"
1919
+ )
1920
+ break
1921
+ continue # Try again
1922
+
1923
+ except Exception as e:
1924
+ retry_count += 1
1925
+ if display:
1926
+ display.print_status(
1927
+ f" ❌ Clone error: {e}", "error"
1928
+ )
1929
+ else:
1930
+ click.echo(f" ❌ Clone error: {e}")
1931
+ if retry_count > max_retries:
1932
+ break
1933
+ continue # Try again
1825
1934
 
1826
- except subprocess.TimeoutExpired:
1827
- if display:
1828
- display.print_status(
1829
- f" ❌ Clone timeout for {repo_config.github_repo}",
1830
- "error",
1831
- )
1832
- else:
1833
- click.echo(f" ❌ Clone timeout for {repo_config.github_repo}")
1834
- continue
1835
- except Exception as e:
1836
- if display:
1837
- display.print_status(
1838
- f" ❌ Failed to clone {repo_config.github_repo}: {e}",
1839
- "error",
1840
- )
1841
- else:
1842
- click.echo(
1843
- f" ❌ Failed to clone {repo_config.github_repo}: {e}"
1844
- )
1845
- continue
1935
+ if not clone_success:
1936
+ continue # Skip this repo and move to next
1846
1937
  else:
1847
1938
  # No github_repo configured, can't clone
1848
1939
  if display:
@@ -1928,15 +2019,15 @@ def analyze(
1928
2019
 
1929
2020
  if display:
1930
2021
  display.print_status(
1931
- f"Emergency fetch complete: {total_commits} commits, {total_tickets} tickets",
2022
+ f"Initial fetch complete: {total_commits} commits, {total_tickets} tickets",
1932
2023
  "success",
1933
2024
  )
1934
2025
  else:
1935
2026
  click.echo(
1936
- f"🚨 Emergency fetch complete: {total_commits} commits, {total_tickets} tickets"
2027
+ f"🚨 Initial fetch complete: {total_commits} commits, {total_tickets} tickets"
1937
2028
  )
1938
2029
 
1939
- # RE-VALIDATE after emergency fetch
2030
+ # RE-VALIDATE after initial fetch
1940
2031
  with cache.get_session() as session:
1941
2032
  final_commits = (
1942
2033
  session.query(CachedCommit)
@@ -1961,7 +2052,7 @@ def analyze(
1961
2052
  )
1962
2053
 
1963
2054
  if final_commits == 0:
1964
- error_msg = "❌ CRITICAL: Emergency fetch completed but still 0 commits stored in database"
2055
+ error_msg = "❌ CRITICAL: Initial fetch completed but still 0 commits stored in database"
1965
2056
  if display:
1966
2057
  display.print_status(error_msg, "error")
1967
2058
  else:
@@ -1969,7 +2060,9 @@ def analyze(
1969
2060
  click.echo(
1970
2061
  f" 📅 Date range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}"
1971
2062
  )
1972
- click.echo(f" 📊 Emergency stats: {total_commits} commits reported")
2063
+ click.echo(
2064
+ f" 📊 Initial fetch stats: {total_commits} commits reported"
2065
+ )
1973
2066
  click.echo(
1974
2067
  f" 🗃️ Database result: {final_commits} commits, {final_batches} batches"
1975
2068
  )
@@ -1983,7 +2076,7 @@ def analyze(
1983
2076
  " - Repository has no commits in the specified time range"
1984
2077
  )
1985
2078
  raise click.ClickException(
1986
- "Emergency fetch failed validation - no data available for classification"
2079
+ "Initial fetch failed validation - no data available for classification"
1987
2080
  )
1988
2081
 
1989
2082
  if display:
@@ -2342,25 +2435,33 @@ def analyze(
2342
2435
  env["GIT_TERMINAL_PROMPT"] = "0"
2343
2436
  env["GIT_ASKPASS"] = ""
2344
2437
  env["GCM_INTERACTIVE"] = "never"
2438
+ env["GIT_PROGRESS"] = "1" # Force progress output
2345
2439
 
2346
2440
  # Build git clone command
2347
- cmd = ["git", "clone", "--config", "credential.helper="]
2441
+ cmd = [
2442
+ "git",
2443
+ "clone",
2444
+ "--progress",
2445
+ "--config",
2446
+ "credential.helper=",
2447
+ ]
2348
2448
  if repo_config.branch:
2349
2449
  cmd.extend(["-b", repo_config.branch])
2350
2450
  cmd.extend([clone_url, str(repo_config.path)])
2351
2451
 
2352
- # Run with timeout to prevent hanging
2452
+ # Run with timeout to prevent hanging, let progress show on stderr
2353
2453
  result = subprocess.run(
2354
2454
  cmd,
2355
2455
  env=env,
2356
- capture_output=True,
2456
+ stdout=subprocess.PIPE,
2457
+ stderr=None, # Let stderr (progress) flow to terminal
2357
2458
  text=True,
2358
- timeout=30, # 30 second timeout
2459
+ timeout=120, # Increase timeout for large repos
2359
2460
  )
2360
2461
 
2361
2462
  if result.returncode != 0:
2362
2463
  raise git.GitCommandError(
2363
- cmd, result.returncode, stderr=result.stderr
2464
+ cmd, result.returncode, stderr="Clone failed"
2364
2465
  )
2365
2466
  except subprocess.TimeoutExpired:
2366
2467
  if display:
@@ -2409,13 +2510,19 @@ def analyze(
2409
2510
  cmd = [
2410
2511
  "git",
2411
2512
  "clone",
2513
+ "--progress",
2412
2514
  "--config",
2413
2515
  "credential.helper=",
2414
2516
  clone_url,
2415
2517
  str(repo_config.path),
2416
2518
  ]
2417
2519
  result = subprocess.run(
2418
- cmd, env=env, capture_output=True, text=True, timeout=30
2520
+ cmd,
2521
+ env=env,
2522
+ stdout=subprocess.PIPE,
2523
+ stderr=None,
2524
+ text=True,
2525
+ timeout=120,
2419
2526
  )
2420
2527
  if result.returncode != 0:
2421
2528
  raise git.GitCommandError(
@@ -4192,9 +4299,7 @@ def fetch(
4192
4299
  data_fetcher = GitDataFetcher(
4193
4300
  cache=cache,
4194
4301
  branch_mapping_rules=getattr(cfg.analysis, "branch_mapping_rules", {}),
4195
- allowed_ticket_platforms=getattr(
4196
- cfg.analysis, "ticket_platforms", ["jira", "github", "clickup", "linear"]
4197
- ),
4302
+ allowed_ticket_platforms=cfg.get_effective_ticket_platforms(),
4198
4303
  exclude_paths=getattr(cfg.analysis, "exclude_paths", None),
4199
4304
  )
4200
4305
 
@@ -4217,9 +4322,23 @@ def fetch(
4217
4322
  # Use a 'repos' directory in the config directory for cloned repositories
4218
4323
  config_dir = Path(config).parent if config else Path.cwd()
4219
4324
  repos_dir = config_dir / "repos"
4220
- discovered_repos = cfg.discover_organization_repositories(clone_base_path=repos_dir)
4325
+
4326
+ # Progress callback for repository discovery
4327
+ def discovery_progress(repo_name, count):
4328
+ if display:
4329
+ display.print_status(f" 📦 Checking: {repo_name} ({count})", "info")
4330
+ else:
4331
+ click.echo(f"\r 📦 Checking repositories... {count}", nl=False)
4332
+
4333
+ discovered_repos = cfg.discover_organization_repositories(
4334
+ clone_base_path=repos_dir, progress_callback=discovery_progress
4335
+ )
4221
4336
  repositories_to_fetch = discovered_repos
4222
4337
 
4338
+ # Clear the progress line
4339
+ if not display:
4340
+ click.echo("\r" + " " * 60 + "\r", nl=False) # Clear line
4341
+
4223
4342
  if display:
4224
4343
  display.print_status(
4225
4344
  f"Found {len(discovered_repos)} repositories in organization", "success"
@@ -4400,6 +4519,8 @@ def cache_stats(config: Path) -> None:
4400
4519
  - Decide when to clear cache
4401
4520
  - Troubleshoot slow analyses
4402
4521
  """
4522
+ from .core.cache import GitAnalysisCache
4523
+
4403
4524
  try:
4404
4525
  cfg = ConfigLoader.load(config)
4405
4526
  cache = GitAnalysisCache(cfg.cache.directory)
@@ -4454,6 +4575,8 @@ def merge_identity(config: Path, dev1: str, dev2: str) -> None:
4454
4575
  - Refreshes cached statistics
4455
4576
  - Updates identity mappings
4456
4577
  """
4578
+ from .core.identity import DeveloperIdentityResolver
4579
+
4457
4580
  try:
4458
4581
  cfg = ConfigLoader.load(config)
4459
4582
  identity_resolver = DeveloperIdentityResolver(cfg.cache.directory / "identities.db")
@@ -4719,6 +4842,7 @@ def discover_storypoint_fields(config: Path) -> None:
4719
4842
  return
4720
4843
 
4721
4844
  # Initialize PM integration (currently JIRA)
4845
+ from .core.cache import GitAnalysisCache
4722
4846
  from .integrations.jira_integration import JIRAIntegration
4723
4847
 
4724
4848
  # Create minimal cache for integration
@@ -4802,6 +4926,9 @@ def identities(config: Path, weeks: int, apply: bool) -> None:
4802
4926
  Mappings are saved to 'analysis.identity.manual_mappings'
4803
4927
  Bot exclusions go to 'analysis.exclude.authors'
4804
4928
  """
4929
+ from .core.analyzer import GitAnalyzer
4930
+ from .core.cache import GitAnalysisCache
4931
+
4805
4932
  try:
4806
4933
  cfg = ConfigLoader.load(config)
4807
4934
  cache = GitAnalysisCache(cfg.cache.directory)
@@ -4862,9 +4989,7 @@ def identities(config: Path, weeks: int, apply: bool) -> None:
4862
4989
  analyzer = GitAnalyzer(
4863
4990
  cache,
4864
4991
  branch_mapping_rules=cfg.analysis.branch_mapping_rules,
4865
- allowed_ticket_platforms=getattr(
4866
- cfg.analysis, "ticket_platforms", ["jira", "github", "clickup", "linear"]
4867
- ),
4992
+ allowed_ticket_platforms=cfg.get_effective_ticket_platforms(),
4868
4993
  exclude_paths=cfg.analysis.exclude_paths,
4869
4994
  story_point_patterns=cfg.analysis.story_point_patterns,
4870
4995
  ml_categorization_config=ml_config,
@@ -5032,6 +5157,8 @@ def aliases_command(
5032
5157
  """
5033
5158
  try:
5034
5159
  from .config.aliases import AliasesManager, DeveloperAlias
5160
+ from .core.analyzer import GitAnalyzer
5161
+ from .core.cache import GitAnalysisCache
5035
5162
  from .identity_llm.analyzer import LLMIdentityAnalyzer
5036
5163
 
5037
5164
  # Load configuration
@@ -5099,9 +5226,7 @@ def aliases_command(
5099
5226
  analyzer = GitAnalyzer(
5100
5227
  cache,
5101
5228
  branch_mapping_rules=cfg.analysis.branch_mapping_rules,
5102
- allowed_ticket_platforms=getattr(
5103
- cfg.analysis, "ticket_platforms", ["jira", "github", "clickup", "linear"]
5104
- ),
5229
+ allowed_ticket_platforms=cfg.get_effective_ticket_platforms(),
5105
5230
  exclude_paths=cfg.analysis.exclude_paths,
5106
5231
  story_point_patterns=cfg.analysis.story_point_patterns,
5107
5232
  ml_categorization_config=ml_config,
@@ -5335,6 +5460,8 @@ def list_developers(config: Path) -> None:
5335
5460
  - Finding developer email addresses
5336
5461
  - Checking contribution statistics
5337
5462
  """
5463
+ from .core.identity import DeveloperIdentityResolver
5464
+
5338
5465
  try:
5339
5466
  cfg = ConfigLoader.load(config)
5340
5467
  identity_resolver = DeveloperIdentityResolver(cfg.cache.directory / "identities.db")
@@ -5468,6 +5595,8 @@ def train(
5468
5595
  - scikit-learn and pandas dependencies
5469
5596
  - ~100MB disk space for model storage
5470
5597
  """
5598
+ from .core.cache import GitAnalysisCache
5599
+ from .integrations.orchestrator import IntegrationOrchestrator
5471
5600
 
5472
5601
  # Configure logging
5473
5602
  if log.upper() != "NONE":
@@ -5551,8 +5680,18 @@ def train(
5551
5680
  try:
5552
5681
  config_dir = Path(config).parent if config else Path.cwd()
5553
5682
  repos_dir = config_dir / "repos"
5554
- discovered_repos = cfg.discover_organization_repositories(clone_base_path=repos_dir)
5683
+
5684
+ # Progress callback for repository discovery
5685
+ def discovery_progress(repo_name, count):
5686
+ click.echo(f"\r 📦 Checking repositories... {count}", nl=False)
5687
+
5688
+ discovered_repos = cfg.discover_organization_repositories(
5689
+ clone_base_path=repos_dir, progress_callback=discovery_progress
5690
+ )
5555
5691
  repositories_to_analyze = discovered_repos
5692
+
5693
+ # Clear the progress line and show result
5694
+ click.echo("\r" + " " * 60 + "\r", nl=False)
5556
5695
  click.echo(f"✅ Found {len(discovered_repos)} repositories in organization")
5557
5696
  except Exception as e:
5558
5697
  click.echo(f"❌ Failed to discover repositories: {e}")
@@ -20,12 +20,13 @@ class RepositoryManager:
20
20
  self.github_config = github_config
21
21
 
22
22
  def discover_organization_repositories(
23
- self, clone_base_path: Optional[Path] = None
23
+ self, clone_base_path: Optional[Path] = None, progress_callback=None
24
24
  ) -> list[RepositoryConfig]:
25
25
  """Discover repositories from GitHub organization.
26
26
 
27
27
  Args:
28
28
  clone_base_path: Base directory where repos should be cloned/found.
29
+ progress_callback: Optional callback function(repo_name, count) for progress updates.
29
30
 
30
31
  Returns:
31
32
  List of discovered repository configurations.
@@ -42,7 +43,14 @@ class RepositoryManager:
42
43
  if clone_base_path is None:
43
44
  raise ValueError("No base path available for repository cloning")
44
45
 
46
+ repo_count = 0
45
47
  for repo in org.get_repos():
48
+ repo_count += 1
49
+
50
+ # Call progress callback if provided
51
+ if progress_callback:
52
+ progress_callback(repo.name, repo_count)
53
+
46
54
  # Skip archived repositories
47
55
  if repo.archived:
48
56
  continue
@@ -456,3 +456,42 @@ class Config:
456
456
  raise ValueError(
457
457
  f"Failed to discover repositories from organization {self.github.organization}: {e}"
458
458
  ) from e
459
+
460
+ def get_effective_ticket_platforms(self) -> list[str]:
461
+ """Get the effective list of ticket platforms to extract.
462
+
463
+ If ticket_platforms is explicitly configured in analysis config, use that.
464
+ Otherwise, infer from which PM platforms are actually configured.
465
+
466
+ Returns:
467
+ List of ticket platform names to extract (e.g., ['jira', 'github'])
468
+ """
469
+ # If explicitly configured, use that
470
+ if self.analysis.ticket_platforms is not None:
471
+ return self.analysis.ticket_platforms
472
+
473
+ # Otherwise, infer from configured PM platforms
474
+ platforms = []
475
+
476
+ # Check modern PM framework config
477
+ if self.pm:
478
+ if hasattr(self.pm, "jira") and self.pm.jira:
479
+ platforms.append("jira")
480
+ if hasattr(self.pm, "linear") and self.pm.linear:
481
+ platforms.append("linear")
482
+ if hasattr(self.pm, "clickup") and self.pm.clickup:
483
+ platforms.append("clickup")
484
+
485
+ # Check legacy JIRA config
486
+ if (self.jira or self.jira_integration) and "jira" not in platforms:
487
+ platforms.append("jira")
488
+
489
+ # Always include GitHub if we have GitHub configured (for issue tracking)
490
+ if self.github.token:
491
+ platforms.append("github")
492
+
493
+ # If nothing configured, fall back to common platforms
494
+ if not platforms:
495
+ platforms = ["jira", "github", "clickup", "linear"]
496
+
497
+ return platforms
@@ -155,8 +155,13 @@ class IdentityAnalysisPass:
155
155
  existing_emails.add(email.lower())
156
156
 
157
157
  for new_mapping in new_mappings:
158
- # New mappings use primary_email
159
- canonical_email = new_mapping["primary_email"].lower()
158
+ # New mappings use primary_email, but support canonical_email for backward compat
159
+ canonical_email = (
160
+ new_mapping.get("primary_email") or new_mapping.get("canonical_email", "")
161
+ ).lower()
162
+ if not canonical_email:
163
+ logger.warning(f"Skipping mapping with no email: {new_mapping}")
164
+ continue
160
165
  if canonical_email not in existing_emails:
161
166
  existing_mappings.append(new_mapping)
162
167
  logger.info(f"Added identity mapping for: {canonical_email}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gitflow-analytics
3
- Version: 3.7.0
3
+ Version: 3.7.4
4
4
  Summary: Analyze Git repositories for developer productivity insights
5
5
  Author-email: Bob Matyas <bobmatnyc@gmail.com>
6
6
  License: MIT
@@ -1,6 +1,6 @@
1
1
  gitflow_analytics/__init__.py,sha256=W3Jaey5wuT1nBPehVLTIRkVIyBa5jgYOlBKc_UFfh-4,773
2
- gitflow_analytics/_version.py,sha256=T-QbXBKclZrHBaB4-SQ98LRwBVn2vxV_-Cib1L8W30g,137
3
- gitflow_analytics/cli.py,sha256=11n4M0TCTM_Wsc1KaJQxizb0hlUCobCGl84kQDdlxfM,265577
2
+ gitflow_analytics/_version.py,sha256=xZ8QK9HyXEtX-sea8S5xFccBqQR_dn4tQAInEXd1CqY,137
3
+ gitflow_analytics/cli.py,sha256=pYW6V0b6SRa3-NyOmXGQhf5emcKHUHgOVL2PFOAS8LQ,273331
4
4
  gitflow_analytics/config.py,sha256=XRuxvzLWyn_ML7mDCcuZ9-YFNAEsnt33vIuWxQQ_jxg,1033
5
5
  gitflow_analytics/constants.py,sha256=GXEncUJS9ijOI5KWtQCTANwdqxPfXpw-4lNjhaWTKC4,2488
6
6
  gitflow_analytics/verify_activity.py,sha256=aRQnmypf5NDasXudf2iz_WdJnCWtwlbAiJ5go0DJLSU,27050
@@ -18,8 +18,8 @@ gitflow_analytics/config/aliases.py,sha256=z9F0X6qbbF544Tw7sHlOoBj5mpRSddMkCpoKL
18
18
  gitflow_analytics/config/errors.py,sha256=IBKhAIwJ4gscZFnLDyE3jEp03wn2stPR7JQJXNSIfok,10386
19
19
  gitflow_analytics/config/loader.py,sha256=EiksTB1Uqey63FxIvuud_kMdab3sNDfuICE_RwMLYFA,37290
20
20
  gitflow_analytics/config/profiles.py,sha256=yUjFAWW6uzOUdi5qlPE-QV9681HigyrLiSJFpL8X9A0,7967
21
- gitflow_analytics/config/repository.py,sha256=maptMAdCKDsuMAfoTAaTrMPVfVd_tKNLRenvuPe1-t4,4350
22
- gitflow_analytics/config/schema.py,sha256=lFN80-YcSqu33UwiJryFHn8F5_zX619AaJXSuJ3aht8,15271
21
+ gitflow_analytics/config/repository.py,sha256=u7JHcKvqmXOl3i7EmNUfJ6wtjzElxPMyXRkATnVyQ0I,4685
22
+ gitflow_analytics/config/schema.py,sha256=XP2VTpMzGIZ-dn0OcCNIgmTFe6_sIFyJarLcy-zGg2A,16753
23
23
  gitflow_analytics/config/validator.py,sha256=l7AHjXYJ8wEmyA1rn2WiItZXtAiRb9YBLjFCDl53qKM,5907
24
24
  gitflow_analytics/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
25
  gitflow_analytics/core/analyzer.py,sha256=59kGObzjziOb8geFyZMKCUvWmo3hcXE0eTgrjYEc1XA,58736
@@ -39,7 +39,7 @@ gitflow_analytics/extractors/ml_tickets.py,sha256=js5OFmbZt9JHy5r_crehhuB1Mxrkdf
39
39
  gitflow_analytics/extractors/story_points.py,sha256=IggP-Ei832oV9aD08a3li08kmjF3BqyU9i8EgAZcpfs,5324
40
40
  gitflow_analytics/extractors/tickets.py,sha256=2s5Iu7eZXVi8yl9Yme5HKzrJo3mDjzsSOUr_iJGUeLM,43799
41
41
  gitflow_analytics/identity_llm/__init__.py,sha256=tpWDwapm6zIyb8LxLO8A6pHlE3wNorT_fBL-Yp9-XnU,250
42
- gitflow_analytics/identity_llm/analysis_pass.py,sha256=lYfjM6f82agXstTrUBsS0R9-ipfRnviIqe8ExkjKnvc,9459
42
+ gitflow_analytics/identity_llm/analysis_pass.py,sha256=FJF1BEGekHRY4i5jasgxxL_UWFGYP5kBkvn8hAtMorY,9728
43
43
  gitflow_analytics/identity_llm/analyzer.py,sha256=-a7lUJt_Dlgx9aNOH1YlFqPe7BSxtwY2RoGruIzwrzs,17932
44
44
  gitflow_analytics/identity_llm/models.py,sha256=F1RN6g8og9esj-m4TPY_928Ci9TA43G9NFNHYf4zHHQ,2677
45
45
  gitflow_analytics/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -115,16 +115,14 @@ gitflow_analytics/security/extractors/__init__.py,sha256=K64IAhS0k47J5oUFNmm1NcL
115
115
  gitflow_analytics/security/extractors/dependency_checker.py,sha256=Tb-Rb4G7jJkH_hsvH3dmEV2SyqjN8ycTHjgItObnCHg,14601
116
116
  gitflow_analytics/security/extractors/secret_detector.py,sha256=aa2YiKeR6SurWHMHU5fr71Q83stSmIRKvwWFpp0IVFs,6857
117
117
  gitflow_analytics/security/extractors/vulnerability_scanner.py,sha256=TWK1fPMN5s_EM8AwTFpkxRjXEsHIMwxYUZQqM2l5dV0,12682
118
- gitflow_analytics/security/reports/__init__.py,sha256=C6a4tHi-xCtTE5sSkQU7bjhRtEGE9l-ApUC13DVZqVQ,125
119
- gitflow_analytics/security/reports/security_report.py,sha256=W0DXpR2ddej0Di6X6YqI6U05M08oTPjW8VWWHdNrGCg,15466
120
118
  gitflow_analytics/training/__init__.py,sha256=YT5p7Wm4U8trzLnbS5FASJBWPMKhqp3rlAThjpxWnxo,143
121
119
  gitflow_analytics/training/model_loader.py,sha256=xGZLSopGxDhC--2XN6ytRgi2CyjOKY4zS4fZ-ZlO6lM,13245
122
120
  gitflow_analytics/training/pipeline.py,sha256=PQegTk_-OsPexVyRDfiy-3Df-7pcs25C4vPASr-HT9E,19951
123
121
  gitflow_analytics/ui/__init__.py,sha256=UBhYhZMvwlSrCuGWjkIdoP2zNbiQxOHOli-I8mqIZUE,441
124
122
  gitflow_analytics/ui/progress_display.py,sha256=3xJnCOSs1DRVAfS-rTu37EsLfWDFW5-mbv-bPS9NMm4,59182
125
- gitflow_analytics-3.7.0.dist-info/licenses/LICENSE,sha256=xwvSwY1GYXpRpmbnFvvnbmMwpobnrdN9T821sGvjOY0,1066
126
- gitflow_analytics-3.7.0.dist-info/METADATA,sha256=lkG2AphxDMqNDGPU9jC6vDoMqlEcrCug3R8FwgFhuKY,34051
127
- gitflow_analytics-3.7.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
128
- gitflow_analytics-3.7.0.dist-info/entry_points.txt,sha256=a3y8HnfLOvK1QVOgAkDY6VQXXm3o9ZSQRZrpiaS3hEM,65
129
- gitflow_analytics-3.7.0.dist-info/top_level.txt,sha256=CQyxZXjKvpSB1kgqqtuE0PCRqfRsXZJL8JrYpJKtkrk,18
130
- gitflow_analytics-3.7.0.dist-info/RECORD,,
123
+ gitflow_analytics-3.7.4.dist-info/licenses/LICENSE,sha256=xwvSwY1GYXpRpmbnFvvnbmMwpobnrdN9T821sGvjOY0,1066
124
+ gitflow_analytics-3.7.4.dist-info/METADATA,sha256=lfxt6pRpjIjcpNDvEpbZxur80jVaUDVoCS7taWt8BO8,34051
125
+ gitflow_analytics-3.7.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
126
+ gitflow_analytics-3.7.4.dist-info/entry_points.txt,sha256=a3y8HnfLOvK1QVOgAkDY6VQXXm3o9ZSQRZrpiaS3hEM,65
127
+ gitflow_analytics-3.7.4.dist-info/top_level.txt,sha256=CQyxZXjKvpSB1kgqqtuE0PCRqfRsXZJL8JrYpJKtkrk,18
128
+ gitflow_analytics-3.7.4.dist-info/RECORD,,
@@ -1,5 +0,0 @@
1
- """Security reporting module."""
2
-
3
- from .security_report import SecurityReportGenerator
4
-
5
- __all__ = ["SecurityReportGenerator"]
@@ -1,358 +0,0 @@
1
- """Generate security analysis reports."""
2
-
3
- import json
4
- import csv
5
- from typing import List, Dict, Any, Optional
6
- from pathlib import Path
7
- from datetime import datetime
8
- from ..security_analyzer import SecurityAnalysis
9
-
10
-
11
- class SecurityReportGenerator:
12
- """Generate various format reports for security findings."""
13
-
14
- def __init__(self, output_dir: Optional[Path] = None):
15
- """Initialize report generator.
16
-
17
- Args:
18
- output_dir: Directory for report output
19
- """
20
- self.output_dir = output_dir or Path("reports")
21
- self.output_dir.mkdir(parents=True, exist_ok=True)
22
-
23
- def generate_reports(self, analyses: List[SecurityAnalysis], summary: Dict[str, Any]) -> Dict[str, Path]:
24
- """Generate all report formats.
25
-
26
- Args:
27
- analyses: List of security analyses
28
- summary: Summary statistics
29
-
30
- Returns:
31
- Dictionary of report type to file path
32
- """
33
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
34
- reports = {}
35
-
36
- # Generate Markdown report
37
- md_path = self.output_dir / f"security_report_{timestamp}.md"
38
- self._generate_markdown_report(analyses, summary, md_path)
39
- reports["markdown"] = md_path
40
-
41
- # Generate JSON report
42
- json_path = self.output_dir / f"security_findings_{timestamp}.json"
43
- self._generate_json_report(analyses, summary, json_path)
44
- reports["json"] = json_path
45
-
46
- # Generate CSV report
47
- csv_path = self.output_dir / f"security_issues_{timestamp}.csv"
48
- self._generate_csv_report(analyses, csv_path)
49
- reports["csv"] = csv_path
50
-
51
- # Generate SARIF report if requested
52
- if any(a.total_findings > 0 for a in analyses):
53
- sarif_path = self.output_dir / f"security_sarif_{timestamp}.json"
54
- self._generate_sarif_report(analyses, sarif_path)
55
- reports["sarif"] = sarif_path
56
-
57
- return reports
58
-
59
- def _generate_markdown_report(self, analyses: List[SecurityAnalysis], summary: Dict, path: Path) -> None:
60
- """Generate comprehensive Markdown security report."""
61
- with open(path, 'w') as f:
62
- # Header
63
- f.write("# 🔒 Security Analysis Report\n\n")
64
- f.write(f"**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
65
-
66
- # Executive Summary
67
- f.write("## 📊 Executive Summary\n\n")
68
- f.write(f"- **Commits Analyzed**: {summary['total_commits']}\n")
69
- f.write(f"- **Commits with Issues**: {summary['commits_with_issues']}\n")
70
- f.write(f"- **Total Findings**: {summary['total_findings']}\n")
71
- f.write(f"- **Risk Level**: **{summary['risk_level']}** (Score: {summary['average_risk_score']})\n\n")
72
-
73
- # Risk Assessment
74
- self._write_risk_assessment(f, summary)
75
-
76
- # Severity Distribution
77
- f.write("## 🎯 Severity Distribution\n\n")
78
- severity = summary['severity_distribution']
79
- if severity['critical'] > 0:
80
- f.write(f"- 🔴 **Critical**: {severity['critical']}\n")
81
- if severity['high'] > 0:
82
- f.write(f"- 🟠 **High**: {severity['high']}\n")
83
- if severity['medium'] > 0:
84
- f.write(f"- 🟡 **Medium**: {severity['medium']}\n")
85
- if severity['low'] > 0:
86
- f.write(f"- 🟢 **Low**: {severity['low']}\n")
87
- f.write("\n")
88
-
89
- # Top Issues
90
- if summary['top_issues']:
91
- f.write("## 🔝 Top Security Issues\n\n")
92
- f.write("| Issue Type | Severity | Occurrences | Affected Files |\n")
93
- f.write("|------------|----------|-------------|----------------|\n")
94
- for issue in summary['top_issues']:
95
- f.write(f"| {issue['type']} | {issue['severity'].upper()} | "
96
- f"{issue['occurrences']} | {issue['affected_files']} |\n")
97
- f.write("\n")
98
-
99
- # Detailed Findings by Category
100
- self._write_detailed_findings(f, analyses)
101
-
102
- # LLM Insights
103
- if 'llm_insights' in summary and summary['llm_insights']:
104
- f.write("## 🤖 AI Security Insights\n\n")
105
- f.write(summary['llm_insights'])
106
- f.write("\n\n")
107
-
108
- # Recommendations
109
- f.write("## 💡 Recommendations\n\n")
110
- for rec in summary['recommendations']:
111
- f.write(f"- {rec}\n")
112
- f.write("\n")
113
-
114
- # Appendix - All Findings
115
- f.write("## 📋 Detailed Findings\n\n")
116
- self._write_all_findings(f, analyses)
117
-
118
- def _write_risk_assessment(self, f, summary: Dict) -> None:
119
- """Write risk assessment section."""
120
- risk_level = summary['risk_level']
121
- score = summary['average_risk_score']
122
-
123
- f.write("## ⚠️ Risk Assessment\n\n")
124
-
125
- if risk_level == "CRITICAL":
126
- f.write("### 🚨 CRITICAL RISK DETECTED\n\n")
127
- f.write("Immediate action required. Critical security vulnerabilities have been identified "
128
- "that could lead to severe security breaches.\n\n")
129
- elif risk_level == "HIGH":
130
- f.write("### 🔴 High Risk\n\n")
131
- f.write("Significant security issues detected that should be addressed urgently.\n\n")
132
- elif risk_level == "MEDIUM":
133
- f.write("### 🟡 Medium Risk\n\n")
134
- f.write("Moderate security concerns identified that should be addressed in the near term.\n\n")
135
- else:
136
- f.write("### 🟢 Low Risk\n\n")
137
- f.write("Minor security issues detected. Continue with regular security practices.\n\n")
138
-
139
- # Risk score visualization
140
- f.write("**Risk Score Breakdown**:\n")
141
- f.write("```\n")
142
- bar_length = 50
143
- filled = int(score / 100 * bar_length)
144
- bar = "█" * filled + "░" * (bar_length - filled)
145
- f.write(f"[{bar}] {score:.1f}/100\n")
146
- f.write("```\n\n")
147
-
148
- def _write_detailed_findings(self, f, analyses: List[SecurityAnalysis]) -> None:
149
- """Write detailed findings by category."""
150
- # Aggregate findings
151
- all_secrets = []
152
- all_vulnerabilities = []
153
- all_dependencies = []
154
- all_llm = []
155
-
156
- for analysis in analyses:
157
- all_secrets.extend(analysis.secrets)
158
- all_vulnerabilities.extend(analysis.vulnerabilities)
159
- all_dependencies.extend(analysis.dependency_issues)
160
- all_llm.extend(analysis.llm_findings)
161
-
162
- # Secrets Section
163
- if all_secrets:
164
- f.write("## 🔑 Exposed Secrets\n\n")
165
- f.write(f"**Total**: {len(all_secrets)} potential secrets detected\n\n")
166
-
167
- # Group by secret type
168
- by_type = {}
169
- for secret in all_secrets:
170
- secret_type = secret.get('secret_type', 'unknown')
171
- if secret_type not in by_type:
172
- by_type[secret_type] = []
173
- by_type[secret_type].append(secret)
174
-
175
- for secret_type, secrets in sorted(by_type.items()):
176
- f.write(f"### {secret_type.replace('_', ' ').title()}\n")
177
- for s in secrets[:5]: # Show first 5 of each type
178
- f.write(f"- **File**: `{s.get('file', 'unknown')}`\n")
179
- f.write(f" - Line: {s.get('line', 'N/A')}\n")
180
- f.write(f" - Pattern: `{s.get('match', 'N/A')}`\n")
181
- if len(secrets) > 5:
182
- f.write(f" - *... and {len(secrets) - 5} more*\n")
183
- f.write("\n")
184
-
185
- # Vulnerabilities Section
186
- if all_vulnerabilities:
187
- f.write("## 🛡️ Code Vulnerabilities\n\n")
188
- f.write(f"**Total**: {len(all_vulnerabilities)} vulnerabilities detected\n\n")
189
-
190
- # Group by vulnerability type
191
- by_type = {}
192
- for vuln in all_vulnerabilities:
193
- vuln_type = vuln.get('vulnerability_type', 'unknown')
194
- if vuln_type not in by_type:
195
- by_type[vuln_type] = []
196
- by_type[vuln_type].append(vuln)
197
-
198
- for vuln_type, vulns in sorted(by_type.items()):
199
- f.write(f"### {vuln_type.replace('_', ' ').title()}\n")
200
- for v in vulns[:5]:
201
- f.write(f"- **File**: `{v.get('file', 'unknown')}:{v.get('line', 'N/A')}`\n")
202
- f.write(f" - Tool: {v.get('tool', 'N/A')}\n")
203
- f.write(f" - Message: {v.get('message', 'N/A')}\n")
204
- if len(vulns) > 5:
205
- f.write(f" - *... and {len(vulns) - 5} more*\n")
206
- f.write("\n")
207
-
208
- # Dependencies Section
209
- if all_dependencies:
210
- f.write("## 📦 Vulnerable Dependencies\n\n")
211
- f.write(f"**Total**: {len(all_dependencies)} vulnerable dependencies\n\n")
212
-
213
- for dep in all_dependencies[:10]:
214
- f.write(f"- **{dep.get('package', 'unknown')}** @ {dep.get('version', 'unknown')}\n")
215
- f.write(f" - File: `{dep.get('file', 'unknown')}`\n")
216
- if dep.get('cve'):
217
- f.write(f" - CVE: {dep['cve']}\n")
218
- f.write(f" - Message: {dep.get('message', 'N/A')}\n")
219
- if len(all_dependencies) > 10:
220
- f.write(f"\n*... and {len(all_dependencies) - 10} more vulnerable dependencies*\n")
221
- f.write("\n")
222
-
223
- def _write_all_findings(self, f, analyses: List[SecurityAnalysis]) -> None:
224
- """Write all findings in detail."""
225
- for analysis in analyses:
226
- if analysis.total_findings == 0:
227
- continue
228
-
229
- f.write(f"### Commit: `{analysis.commit_hash[:8]}`\n")
230
- f.write(f"**Time**: {analysis.timestamp.strftime('%Y-%m-%d %H:%M:%S')}\n")
231
- f.write(f"**Files Changed**: {len(analysis.files_changed)}\n")
232
- f.write(f"**Risk Score**: {analysis.risk_score:.1f}\n\n")
233
-
234
- if analysis.secrets:
235
- f.write("**Secrets**:\n")
236
- for s in analysis.secrets:
237
- f.write(f"- {s.get('secret_type', 'unknown')}: {s.get('file', 'N/A')}\n")
238
-
239
- if analysis.vulnerabilities:
240
- f.write("**Vulnerabilities**:\n")
241
- for v in analysis.vulnerabilities:
242
- f.write(f"- {v.get('vulnerability_type', 'unknown')}: {v.get('file', 'N/A')}\n")
243
-
244
- f.write("\n---\n\n")
245
-
246
- def _generate_json_report(self, analyses: List[SecurityAnalysis], summary: Dict, path: Path) -> None:
247
- """Generate JSON report with all findings."""
248
- report = {
249
- "metadata": {
250
- "generated": datetime.now().isoformat(),
251
- "version": "1.0.0"
252
- },
253
- "summary": summary,
254
- "analyses": []
255
- }
256
-
257
- for analysis in analyses:
258
- report["analyses"].append({
259
- "commit_hash": analysis.commit_hash,
260
- "timestamp": analysis.timestamp.isoformat(),
261
- "files_changed": analysis.files_changed,
262
- "risk_score": analysis.risk_score,
263
- "findings": {
264
- "secrets": analysis.secrets,
265
- "vulnerabilities": analysis.vulnerabilities,
266
- "dependency_issues": analysis.dependency_issues,
267
- "llm_findings": analysis.llm_findings
268
- },
269
- "metrics": {
270
- "total": analysis.total_findings,
271
- "critical": analysis.critical_count,
272
- "high": analysis.high_count,
273
- "medium": analysis.medium_count,
274
- "low": analysis.low_count
275
- }
276
- })
277
-
278
- with open(path, 'w') as f:
279
- json.dump(report, f, indent=2)
280
-
281
- def _generate_csv_report(self, analyses: List[SecurityAnalysis], path: Path) -> None:
282
- """Generate CSV report of all findings."""
283
- with open(path, 'w', newline='') as f:
284
- writer = csv.DictWriter(f, fieldnames=[
285
- 'commit_hash', 'timestamp', 'type', 'severity',
286
- 'category', 'file', 'line', 'message', 'tool', 'confidence'
287
- ])
288
- writer.writeheader()
289
-
290
- for analysis in analyses:
291
- # Write all findings
292
- for finding in (analysis.secrets + analysis.vulnerabilities +
293
- analysis.dependency_issues + analysis.llm_findings):
294
- writer.writerow({
295
- 'commit_hash': analysis.commit_hash[:8],
296
- 'timestamp': analysis.timestamp.isoformat(),
297
- 'type': finding.get('type', 'unknown'),
298
- 'severity': finding.get('severity', 'medium'),
299
- 'category': finding.get('vulnerability_type',
300
- finding.get('secret_type', 'unknown')),
301
- 'file': finding.get('file', ''),
302
- 'line': finding.get('line', ''),
303
- 'message': finding.get('message', ''),
304
- 'tool': finding.get('tool', finding.get('source', '')),
305
- 'confidence': finding.get('confidence', '')
306
- })
307
-
308
- def _generate_sarif_report(self, analyses: List[SecurityAnalysis], path: Path) -> None:
309
- """Generate SARIF format report for GitHub Security tab integration."""
310
- sarif = {
311
- "$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json",
312
- "version": "2.1.0",
313
- "runs": [{
314
- "tool": {
315
- "driver": {
316
- "name": "GitFlow Analytics Security",
317
- "version": "1.0.0",
318
- "informationUri": "https://github.com/yourusername/gitflow-analytics"
319
- }
320
- },
321
- "results": []
322
- }]
323
- }
324
-
325
- for analysis in analyses:
326
- for finding in (analysis.secrets + analysis.vulnerabilities):
327
- result = {
328
- "ruleId": finding.get('vulnerability_type',
329
- finding.get('secret_type', 'unknown')),
330
- "level": self._severity_to_sarif_level(finding.get('severity', 'medium')),
331
- "message": {
332
- "text": finding.get('message', 'Security issue detected')
333
- },
334
- "locations": [{
335
- "physicalLocation": {
336
- "artifactLocation": {
337
- "uri": finding.get('file', 'unknown')
338
- },
339
- "region": {
340
- "startLine": finding.get('line', 1)
341
- }
342
- }
343
- }]
344
- }
345
- sarif["runs"][0]["results"].append(result)
346
-
347
- with open(path, 'w') as f:
348
- json.dump(sarif, f, indent=2)
349
-
350
- def _severity_to_sarif_level(self, severity: str) -> str:
351
- """Convert severity to SARIF level."""
352
- mapping = {
353
- "critical": "error",
354
- "high": "error",
355
- "medium": "warning",
356
- "low": "note"
357
- }
358
- return mapping.get(severity.lower(), "warning")