gitflow-analytics 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/__init__.py +2 -0
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/cli.py +113 -19
- gitflow_analytics/config.py +211 -28
- gitflow_analytics/core/analyzer.py +68 -8
- gitflow_analytics/core/branch_mapper.py +1 -1
- gitflow_analytics/core/cache.py +3 -5
- gitflow_analytics/core/identity.py +5 -6
- gitflow_analytics/extractors/base.py +1 -1
- gitflow_analytics/extractors/story_points.py +1 -1
- gitflow_analytics/extractors/tickets.py +13 -4
- gitflow_analytics/integrations/github_integration.py +11 -4
- gitflow_analytics/integrations/jira_integration.py +272 -0
- gitflow_analytics/integrations/orchestrator.py +33 -7
- gitflow_analytics/metrics/dora.py +3 -3
- gitflow_analytics/models/database.py +4 -4
- gitflow_analytics/reports/analytics_writer.py +14 -4
- gitflow_analytics/reports/csv_writer.py +12 -4
- gitflow_analytics/reports/narrative_writer.py +5 -1
- gitflow_analytics-1.0.1.dist-info/METADATA +463 -0
- gitflow_analytics-1.0.1.dist-info/RECORD +31 -0
- gitflow_analytics-1.0.0.dist-info/METADATA +0 -201
- gitflow_analytics-1.0.0.dist-info/RECORD +0 -30
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.1.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.1.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.1.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.1.dist-info}/top_level.txt +0 -0
gitflow_analytics/__init__.py
CHANGED
gitflow_analytics/_version.py
CHANGED
gitflow_analytics/cli.py
CHANGED
|
@@ -1,23 +1,23 @@
|
|
|
1
1
|
"""Command-line interface for GitFlow Analytics."""
|
|
2
|
-
import
|
|
3
|
-
import yaml
|
|
4
|
-
from pathlib import Path
|
|
2
|
+
import sys
|
|
5
3
|
from datetime import datetime, timedelta
|
|
4
|
+
from pathlib import Path
|
|
6
5
|
from typing import Optional
|
|
7
|
-
|
|
6
|
+
|
|
7
|
+
import click
|
|
8
|
+
import git
|
|
8
9
|
import pandas as pd
|
|
9
10
|
|
|
10
|
-
from .config import ConfigLoader
|
|
11
|
-
from .core.cache import GitAnalysisCache
|
|
11
|
+
from .config import ConfigLoader
|
|
12
12
|
from .core.analyzer import GitAnalyzer
|
|
13
|
+
from .core.cache import GitAnalysisCache
|
|
13
14
|
from .core.identity import DeveloperIdentityResolver
|
|
14
|
-
from .extractors.story_points import StoryPointExtractor
|
|
15
15
|
from .extractors.tickets import TicketExtractor
|
|
16
|
-
from .
|
|
16
|
+
from .integrations.orchestrator import IntegrationOrchestrator
|
|
17
|
+
from .metrics.dora import DORAMetricsCalculator
|
|
17
18
|
from .reports.analytics_writer import AnalyticsReportGenerator
|
|
19
|
+
from .reports.csv_writer import CSVReportGenerator
|
|
18
20
|
from .reports.narrative_writer import NarrativeReportGenerator
|
|
19
|
-
from .metrics.dora import DORAMetricsCalculator
|
|
20
|
-
from .integrations.orchestrator import IntegrationOrchestrator
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
@click.group()
|
|
@@ -101,14 +101,36 @@ def analyze(config: Path, weeks: int, output: Optional[Path], anonymize: bool,
|
|
|
101
101
|
manual_mappings=cfg.analysis.manual_identity_mappings
|
|
102
102
|
)
|
|
103
103
|
|
|
104
|
-
analyzer = GitAnalyzer(
|
|
104
|
+
analyzer = GitAnalyzer(
|
|
105
|
+
cache,
|
|
106
|
+
branch_mapping_rules=cfg.analysis.branch_mapping_rules,
|
|
107
|
+
allowed_ticket_platforms=getattr(cfg.analysis, 'ticket_platforms', None),
|
|
108
|
+
exclude_paths=cfg.analysis.exclude_paths
|
|
109
|
+
)
|
|
105
110
|
orchestrator = IntegrationOrchestrator(cfg, cache)
|
|
106
111
|
|
|
112
|
+
# Discovery organization repositories if needed
|
|
113
|
+
repositories_to_analyze = cfg.repositories
|
|
114
|
+
if cfg.github.organization and not repositories_to_analyze:
|
|
115
|
+
click.echo(f"🔍 Discovering repositories from organization: {cfg.github.organization}")
|
|
116
|
+
try:
|
|
117
|
+
# Use a 'repos' directory in the config directory for cloned repositories
|
|
118
|
+
config_dir = Path(config).parent if config else Path.cwd()
|
|
119
|
+
repos_dir = config_dir / "repos"
|
|
120
|
+
discovered_repos = cfg.discover_organization_repositories(clone_base_path=repos_dir)
|
|
121
|
+
repositories_to_analyze = discovered_repos
|
|
122
|
+
click.echo(f" ✅ Found {len(discovered_repos)} repositories in organization")
|
|
123
|
+
for repo in discovered_repos:
|
|
124
|
+
click.echo(f" - {repo.name} ({repo.github_repo})")
|
|
125
|
+
except Exception as e:
|
|
126
|
+
click.echo(f" ❌ Failed to discover repositories: {e}")
|
|
127
|
+
return
|
|
128
|
+
|
|
107
129
|
# Analysis period
|
|
108
130
|
end_date = datetime.now()
|
|
109
131
|
start_date = end_date - timedelta(weeks=weeks)
|
|
110
132
|
|
|
111
|
-
click.echo(f"\n🚀 Analyzing {len(
|
|
133
|
+
click.echo(f"\n🚀 Analyzing {len(repositories_to_analyze)} repositories...")
|
|
112
134
|
click.echo(f" Period: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
|
|
113
135
|
|
|
114
136
|
# Analyze repositories
|
|
@@ -116,13 +138,32 @@ def analyze(config: Path, weeks: int, output: Optional[Path], anonymize: bool,
|
|
|
116
138
|
all_prs = []
|
|
117
139
|
all_enrichments = {}
|
|
118
140
|
|
|
119
|
-
for repo_config in
|
|
141
|
+
for repo_config in repositories_to_analyze:
|
|
120
142
|
click.echo(f"\n📁 Analyzing {repo_config.name}...")
|
|
121
143
|
|
|
122
|
-
# Check if repo exists
|
|
144
|
+
# Check if repo exists, clone if needed
|
|
123
145
|
if not repo_config.path.exists():
|
|
124
|
-
|
|
125
|
-
|
|
146
|
+
# Try to clone if we have a github_repo configured
|
|
147
|
+
if repo_config.github_repo and cfg.github.organization:
|
|
148
|
+
click.echo(" 📥 Cloning repository from GitHub...")
|
|
149
|
+
try:
|
|
150
|
+
# Ensure parent directory exists
|
|
151
|
+
repo_config.path.parent.mkdir(parents=True, exist_ok=True)
|
|
152
|
+
|
|
153
|
+
# Clone the repository
|
|
154
|
+
clone_url = f"https://github.com/{repo_config.github_repo}.git"
|
|
155
|
+
if cfg.github.token:
|
|
156
|
+
# Use token for authentication
|
|
157
|
+
clone_url = f"https://{cfg.github.token}@github.com/{repo_config.github_repo}.git"
|
|
158
|
+
|
|
159
|
+
git.Repo.clone_from(clone_url, repo_config.path, branch=repo_config.branch)
|
|
160
|
+
click.echo(f" ✅ Successfully cloned {repo_config.github_repo}")
|
|
161
|
+
except Exception as e:
|
|
162
|
+
click.echo(f" ❌ Failed to clone repository: {e}")
|
|
163
|
+
continue
|
|
164
|
+
else:
|
|
165
|
+
click.echo(f" ❌ Repository path not found: {repo_config.path}")
|
|
166
|
+
continue
|
|
126
167
|
|
|
127
168
|
# Analyze repository
|
|
128
169
|
try:
|
|
@@ -174,7 +215,7 @@ def analyze(config: Path, weeks: int, output: Optional[Path], anonymize: bool,
|
|
|
174
215
|
|
|
175
216
|
# Analyze tickets
|
|
176
217
|
click.echo("\n🎫 Analyzing ticket references...")
|
|
177
|
-
ticket_extractor = TicketExtractor()
|
|
218
|
+
ticket_extractor = TicketExtractor(allowed_platforms=getattr(cfg.analysis, 'ticket_platforms', None))
|
|
178
219
|
ticket_analysis = ticket_extractor.analyze_ticket_coverage(all_commits, all_prs)
|
|
179
220
|
|
|
180
221
|
for platform, count in ticket_analysis['ticket_summary'].items():
|
|
@@ -319,7 +360,7 @@ def analyze(config: Path, weeks: int, output: Optional[Path], anonymize: bool,
|
|
|
319
360
|
click.echo(f" - Total story points: {total_story_points}")
|
|
320
361
|
|
|
321
362
|
if dora_metrics:
|
|
322
|
-
click.echo(
|
|
363
|
+
click.echo("\n🎯 DORA Metrics:")
|
|
323
364
|
click.echo(f" - Deployment frequency: {dora_metrics['deployment_frequency']['category']}")
|
|
324
365
|
click.echo(f" - Lead time: {dora_metrics['lead_time_hours']:.1f} hours")
|
|
325
366
|
click.echo(f" - Change failure rate: {dora_metrics['change_failure_rate']:.1f}%")
|
|
@@ -357,7 +398,7 @@ def cache_stats(config: Path):
|
|
|
357
398
|
# Calculate cache size
|
|
358
399
|
import os
|
|
359
400
|
cache_size = 0
|
|
360
|
-
for root,
|
|
401
|
+
for root, _dirs, files in os.walk(cfg.cache.directory):
|
|
361
402
|
for f in files:
|
|
362
403
|
cache_size += os.path.getsize(os.path.join(root, f))
|
|
363
404
|
|
|
@@ -392,6 +433,59 @@ def merge_identity(config: Path, dev1: str, dev2: str):
|
|
|
392
433
|
sys.exit(1)
|
|
393
434
|
|
|
394
435
|
|
|
436
|
+
@cli.command()
|
|
437
|
+
@click.option('--config', '-c',
|
|
438
|
+
type=click.Path(exists=True, path_type=Path),
|
|
439
|
+
required=True,
|
|
440
|
+
help='Path to YAML configuration file')
|
|
441
|
+
def discover_jira_fields(config: Path):
|
|
442
|
+
"""Discover available JIRA fields, particularly story point fields."""
|
|
443
|
+
try:
|
|
444
|
+
cfg = ConfigLoader.load(config)
|
|
445
|
+
|
|
446
|
+
# Check if JIRA is configured
|
|
447
|
+
if not cfg.jira or not cfg.jira.base_url:
|
|
448
|
+
click.echo("❌ JIRA is not configured in the configuration file")
|
|
449
|
+
return
|
|
450
|
+
|
|
451
|
+
# Initialize JIRA integration
|
|
452
|
+
from .integrations.jira_integration import JIRAIntegration
|
|
453
|
+
|
|
454
|
+
jira = JIRAIntegration(
|
|
455
|
+
cfg.jira.base_url,
|
|
456
|
+
cfg.jira.access_user,
|
|
457
|
+
cfg.jira.access_token,
|
|
458
|
+
None # No cache needed for field discovery
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
# Validate connection
|
|
462
|
+
click.echo(f"🔗 Connecting to JIRA at {cfg.jira.base_url}...")
|
|
463
|
+
if not jira.validate_connection():
|
|
464
|
+
click.echo("❌ Failed to connect to JIRA. Check your credentials.")
|
|
465
|
+
return
|
|
466
|
+
|
|
467
|
+
click.echo("✅ Connected successfully!\n")
|
|
468
|
+
click.echo("🔍 Discovering fields with potential story point data...")
|
|
469
|
+
|
|
470
|
+
fields = jira.discover_fields()
|
|
471
|
+
|
|
472
|
+
if not fields:
|
|
473
|
+
click.echo("No potential story point fields found.")
|
|
474
|
+
else:
|
|
475
|
+
click.echo(f"\nFound {len(fields)} potential story point fields:")
|
|
476
|
+
click.echo("\nAdd these to your configuration under jira_integration.story_point_fields:")
|
|
477
|
+
click.echo("```yaml")
|
|
478
|
+
click.echo("jira_integration:")
|
|
479
|
+
click.echo(" story_point_fields:")
|
|
480
|
+
for field_id, field_info in fields.items():
|
|
481
|
+
click.echo(f' - "{field_id}" # {field_info["name"]}')
|
|
482
|
+
click.echo("```")
|
|
483
|
+
|
|
484
|
+
except Exception as e:
|
|
485
|
+
click.echo(f"❌ Error: {e}", err=True)
|
|
486
|
+
sys.exit(1)
|
|
487
|
+
|
|
488
|
+
|
|
395
489
|
@cli.command()
|
|
396
490
|
@click.option('--config', '-c',
|
|
397
491
|
type=click.Path(exists=True, path_type=Path),
|
gitflow_analytics/config.py
CHANGED
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
"""Configuration management for GitFlow Analytics."""
|
|
2
2
|
import os
|
|
3
|
-
import yaml
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
from typing import Dict, Any, Optional, List
|
|
6
3
|
from dataclasses import dataclass, field
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Dict, List, Optional
|
|
6
|
+
|
|
7
|
+
import yaml
|
|
8
|
+
from dotenv import load_dotenv
|
|
9
|
+
|
|
7
10
|
|
|
8
11
|
@dataclass
|
|
9
12
|
class RepositoryConfig:
|
|
@@ -24,6 +27,7 @@ class GitHubConfig:
|
|
|
24
27
|
"""GitHub API configuration."""
|
|
25
28
|
token: Optional[str] = None
|
|
26
29
|
owner: Optional[str] = None
|
|
30
|
+
organization: Optional[str] = None
|
|
27
31
|
base_url: str = "https://api.github.com"
|
|
28
32
|
max_retries: int = 3
|
|
29
33
|
backoff_factor: int = 2
|
|
@@ -42,10 +46,12 @@ class AnalysisConfig:
|
|
|
42
46
|
story_point_patterns: List[str] = field(default_factory=list)
|
|
43
47
|
exclude_authors: List[str] = field(default_factory=list)
|
|
44
48
|
exclude_message_patterns: List[str] = field(default_factory=list)
|
|
49
|
+
exclude_paths: List[str] = field(default_factory=list)
|
|
45
50
|
similarity_threshold: float = 0.85
|
|
46
51
|
manual_identity_mappings: List[Dict[str, Any]] = field(default_factory=list)
|
|
47
52
|
default_ticket_platform: Optional[str] = None
|
|
48
53
|
branch_mapping_rules: Dict[str, List[str]] = field(default_factory=dict)
|
|
54
|
+
ticket_platforms: Optional[List[str]] = None
|
|
49
55
|
|
|
50
56
|
@dataclass
|
|
51
57
|
class OutputConfig:
|
|
@@ -65,6 +71,25 @@ class CacheConfig:
|
|
|
65
71
|
ttl_hours: int = 168
|
|
66
72
|
max_size_mb: int = 500
|
|
67
73
|
|
|
74
|
+
@dataclass
|
|
75
|
+
class JIRAConfig:
|
|
76
|
+
"""JIRA configuration."""
|
|
77
|
+
access_user: str
|
|
78
|
+
access_token: str
|
|
79
|
+
base_url: Optional[str] = None
|
|
80
|
+
|
|
81
|
+
@dataclass
|
|
82
|
+
class JIRAIntegrationConfig:
|
|
83
|
+
"""JIRA integration specific configuration."""
|
|
84
|
+
enabled: bool = True
|
|
85
|
+
fetch_story_points: bool = True
|
|
86
|
+
project_keys: List[str] = field(default_factory=list)
|
|
87
|
+
story_point_fields: List[str] = field(default_factory=lambda: [
|
|
88
|
+
"customfield_10016",
|
|
89
|
+
"customfield_10021",
|
|
90
|
+
"Story Points"
|
|
91
|
+
])
|
|
92
|
+
|
|
68
93
|
@dataclass
|
|
69
94
|
class Config:
|
|
70
95
|
"""Main configuration container."""
|
|
@@ -73,14 +98,69 @@ class Config:
|
|
|
73
98
|
analysis: AnalysisConfig
|
|
74
99
|
output: OutputConfig
|
|
75
100
|
cache: CacheConfig
|
|
101
|
+
jira: Optional[JIRAConfig] = None
|
|
102
|
+
jira_integration: Optional[JIRAIntegrationConfig] = None
|
|
103
|
+
|
|
104
|
+
def discover_organization_repositories(self, clone_base_path: Optional[Path] = None) -> List[RepositoryConfig]:
|
|
105
|
+
"""Discover repositories from GitHub organization.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
clone_base_path: Base directory where repos should be cloned/found.
|
|
109
|
+
If None, uses output directory.
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
List of discovered repository configurations.
|
|
113
|
+
"""
|
|
114
|
+
if not self.github.organization or not self.github.token:
|
|
115
|
+
return []
|
|
116
|
+
|
|
117
|
+
from github import Github
|
|
118
|
+
|
|
119
|
+
github_client = Github(self.github.token, base_url=self.github.base_url)
|
|
120
|
+
|
|
121
|
+
try:
|
|
122
|
+
org = github_client.get_organization(self.github.organization)
|
|
123
|
+
discovered_repos = []
|
|
124
|
+
|
|
125
|
+
base_path = clone_base_path or self.output.directory
|
|
126
|
+
if base_path is None:
|
|
127
|
+
raise ValueError("No base path available for repository cloning")
|
|
128
|
+
|
|
129
|
+
for repo in org.get_repos():
|
|
130
|
+
# Skip archived repositories
|
|
131
|
+
if repo.archived:
|
|
132
|
+
continue
|
|
133
|
+
|
|
134
|
+
# Create repository configuration
|
|
135
|
+
repo_path = base_path / repo.name
|
|
136
|
+
repo_config = RepositoryConfig(
|
|
137
|
+
name=repo.name,
|
|
138
|
+
path=repo_path,
|
|
139
|
+
github_repo=repo.full_name,
|
|
140
|
+
project_key=repo.name.upper().replace('-', '_'),
|
|
141
|
+
branch=repo.default_branch
|
|
142
|
+
)
|
|
143
|
+
discovered_repos.append(repo_config)
|
|
144
|
+
|
|
145
|
+
return discovered_repos
|
|
146
|
+
|
|
147
|
+
except Exception as e:
|
|
148
|
+
raise ValueError(f"Failed to discover repositories from organization {self.github.organization}: {e}") from e
|
|
76
149
|
|
|
77
150
|
class ConfigLoader:
|
|
78
151
|
"""Load and validate configuration from YAML files."""
|
|
79
152
|
|
|
80
|
-
@
|
|
81
|
-
def load(config_path: Path) -> Config:
|
|
153
|
+
@classmethod
|
|
154
|
+
def load(cls, config_path: Path) -> Config:
|
|
82
155
|
"""Load configuration from YAML file."""
|
|
83
|
-
|
|
156
|
+
# Load .env file from the same directory as the config file if it exists
|
|
157
|
+
config_dir = config_path.parent
|
|
158
|
+
env_file = config_dir / '.env'
|
|
159
|
+
if env_file.exists():
|
|
160
|
+
load_dotenv(env_file, override=True)
|
|
161
|
+
print(f"📋 Loaded environment variables from {env_file}")
|
|
162
|
+
|
|
163
|
+
with open(config_path) as f:
|
|
84
164
|
data = yaml.safe_load(f)
|
|
85
165
|
|
|
86
166
|
# Validate version
|
|
@@ -90,9 +170,16 @@ class ConfigLoader:
|
|
|
90
170
|
|
|
91
171
|
# Process GitHub config
|
|
92
172
|
github_data = data.get('github', {})
|
|
173
|
+
|
|
174
|
+
# Resolve GitHub token
|
|
175
|
+
github_token = cls._resolve_env_var(github_data.get('token'))
|
|
176
|
+
if github_data.get('token') and not github_token:
|
|
177
|
+
raise ValueError("GitHub is configured but GITHUB_TOKEN environment variable is not set")
|
|
178
|
+
|
|
93
179
|
github_config = GitHubConfig(
|
|
94
|
-
token=
|
|
95
|
-
owner=
|
|
180
|
+
token=github_token,
|
|
181
|
+
owner=cls._resolve_env_var(github_data.get('owner')),
|
|
182
|
+
organization=cls._resolve_env_var(github_data.get('organization')),
|
|
96
183
|
base_url=github_data.get('base_url', 'https://api.github.com'),
|
|
97
184
|
max_retries=github_data.get('rate_limit', {}).get('max_retries', 3),
|
|
98
185
|
backoff_factor=github_data.get('rate_limit', {}).get('backoff_factor', 2)
|
|
@@ -100,26 +187,70 @@ class ConfigLoader:
|
|
|
100
187
|
|
|
101
188
|
# Process repositories
|
|
102
189
|
repositories = []
|
|
103
|
-
for repo_data in data.get('repositories', []):
|
|
104
|
-
# Handle github_repo with owner fallback
|
|
105
|
-
github_repo = repo_data.get('github_repo')
|
|
106
|
-
if github_repo and github_config.owner and '/' not in github_repo:
|
|
107
|
-
github_repo = f"{github_config.owner}/{github_repo}"
|
|
108
|
-
|
|
109
|
-
repo_config = RepositoryConfig(
|
|
110
|
-
name=repo_data['name'],
|
|
111
|
-
path=repo_data['path'],
|
|
112
|
-
github_repo=github_repo,
|
|
113
|
-
project_key=repo_data.get('project_key'),
|
|
114
|
-
branch=repo_data.get('branch')
|
|
115
|
-
)
|
|
116
|
-
repositories.append(repo_config)
|
|
117
190
|
|
|
118
|
-
|
|
119
|
-
|
|
191
|
+
# Handle organization-based repository discovery
|
|
192
|
+
if github_config.organization and not data.get('repositories'):
|
|
193
|
+
# Organization specified but no explicit repositories - will be discovered at runtime
|
|
194
|
+
pass
|
|
195
|
+
else:
|
|
196
|
+
# Process explicitly defined repositories
|
|
197
|
+
for repo_data in data.get('repositories', []):
|
|
198
|
+
# Handle github_repo with owner/organization fallback
|
|
199
|
+
github_repo = repo_data.get('github_repo')
|
|
200
|
+
if github_repo and '/' not in github_repo:
|
|
201
|
+
if github_config.organization:
|
|
202
|
+
github_repo = f"{github_config.organization}/{github_repo}"
|
|
203
|
+
elif github_config.owner:
|
|
204
|
+
github_repo = f"{github_config.owner}/{github_repo}"
|
|
205
|
+
|
|
206
|
+
repo_config = RepositoryConfig(
|
|
207
|
+
name=repo_data['name'],
|
|
208
|
+
path=repo_data['path'],
|
|
209
|
+
github_repo=github_repo,
|
|
210
|
+
project_key=repo_data.get('project_key'),
|
|
211
|
+
branch=repo_data.get('branch')
|
|
212
|
+
)
|
|
213
|
+
repositories.append(repo_config)
|
|
214
|
+
|
|
215
|
+
# Allow empty repositories list if organization is specified
|
|
216
|
+
if not repositories and not github_config.organization:
|
|
217
|
+
raise ValueError("No repositories defined and no organization specified for discovery")
|
|
120
218
|
|
|
121
219
|
# Process analysis settings
|
|
122
220
|
analysis_data = data.get('analysis', {})
|
|
221
|
+
|
|
222
|
+
# Default exclude paths for common boilerplate/generated files
|
|
223
|
+
default_exclude_paths = [
|
|
224
|
+
"**/node_modules/**",
|
|
225
|
+
"**/vendor/**",
|
|
226
|
+
"**/dist/**",
|
|
227
|
+
"**/build/**",
|
|
228
|
+
"**/.next/**",
|
|
229
|
+
"**/__pycache__/**",
|
|
230
|
+
"**/*.min.js",
|
|
231
|
+
"**/*.min.css",
|
|
232
|
+
"**/*.bundle.js",
|
|
233
|
+
"**/*.bundle.css",
|
|
234
|
+
"**/package-lock.json",
|
|
235
|
+
"**/yarn.lock",
|
|
236
|
+
"**/poetry.lock",
|
|
237
|
+
"**/Pipfile.lock",
|
|
238
|
+
"**/composer.lock",
|
|
239
|
+
"**/Gemfile.lock",
|
|
240
|
+
"**/Cargo.lock",
|
|
241
|
+
"**/go.sum",
|
|
242
|
+
"**/*.generated.*",
|
|
243
|
+
"**/generated/**",
|
|
244
|
+
"**/coverage/**",
|
|
245
|
+
"**/.coverage/**",
|
|
246
|
+
"**/htmlcov/**",
|
|
247
|
+
"**/*.map"
|
|
248
|
+
]
|
|
249
|
+
|
|
250
|
+
# Merge user-provided paths with defaults (user paths take precedence)
|
|
251
|
+
user_exclude_paths = analysis_data.get('exclude', {}).get('paths', [])
|
|
252
|
+
exclude_paths = user_exclude_paths if user_exclude_paths else default_exclude_paths
|
|
253
|
+
|
|
123
254
|
analysis_config = AnalysisConfig(
|
|
124
255
|
story_point_patterns=analysis_data.get('story_point_patterns', [
|
|
125
256
|
r"(?:story\s*points?|sp|pts?)\s*[:=]\s*(\d+)",
|
|
@@ -131,17 +262,26 @@ class ConfigLoader:
|
|
|
131
262
|
"renovate[bot]"
|
|
132
263
|
]),
|
|
133
264
|
exclude_message_patterns=analysis_data.get('exclude', {}).get('message_patterns', []),
|
|
265
|
+
exclude_paths=exclude_paths,
|
|
134
266
|
similarity_threshold=analysis_data.get('identity', {}).get('similarity_threshold', 0.85),
|
|
135
267
|
manual_identity_mappings=analysis_data.get('identity', {}).get('manual_mappings', []),
|
|
136
268
|
default_ticket_platform=analysis_data.get('default_ticket_platform'),
|
|
137
|
-
branch_mapping_rules=analysis_data.get('branch_mapping_rules', {})
|
|
269
|
+
branch_mapping_rules=analysis_data.get('branch_mapping_rules', {}),
|
|
270
|
+
ticket_platforms=analysis_data.get('ticket_platforms')
|
|
138
271
|
)
|
|
139
272
|
|
|
140
273
|
# Process output settings
|
|
141
274
|
output_data = data.get('output', {})
|
|
142
275
|
output_dir = output_data.get('directory')
|
|
143
276
|
if output_dir:
|
|
144
|
-
output_dir = Path(output_dir).expanduser()
|
|
277
|
+
output_dir = Path(output_dir).expanduser()
|
|
278
|
+
# If relative path, make it relative to config file directory
|
|
279
|
+
if not output_dir.is_absolute():
|
|
280
|
+
output_dir = config_path.parent / output_dir
|
|
281
|
+
output_dir = output_dir.resolve()
|
|
282
|
+
else:
|
|
283
|
+
# Default to config file directory if not specified
|
|
284
|
+
output_dir = config_path.parent
|
|
145
285
|
|
|
146
286
|
output_config = OutputConfig(
|
|
147
287
|
directory=output_dir,
|
|
@@ -155,18 +295,61 @@ class ConfigLoader:
|
|
|
155
295
|
|
|
156
296
|
# Process cache settings
|
|
157
297
|
cache_data = data.get('cache', {})
|
|
298
|
+
cache_dir = cache_data.get('directory', '.gitflow-cache')
|
|
299
|
+
cache_path = Path(cache_dir)
|
|
300
|
+
# If relative path, make it relative to config file directory
|
|
301
|
+
if not cache_path.is_absolute():
|
|
302
|
+
cache_path = config_path.parent / cache_path
|
|
303
|
+
|
|
158
304
|
cache_config = CacheConfig(
|
|
159
|
-
directory=
|
|
305
|
+
directory=cache_path.resolve(),
|
|
160
306
|
ttl_hours=cache_data.get('ttl_hours', 168),
|
|
161
307
|
max_size_mb=cache_data.get('max_size_mb', 500)
|
|
162
308
|
)
|
|
163
309
|
|
|
310
|
+
# Process JIRA settings
|
|
311
|
+
jira_config = None
|
|
312
|
+
jira_data = data.get('jira', {})
|
|
313
|
+
if jira_data:
|
|
314
|
+
access_user = cls._resolve_env_var(jira_data.get('access_user', ''))
|
|
315
|
+
access_token = cls._resolve_env_var(jira_data.get('access_token', ''))
|
|
316
|
+
|
|
317
|
+
# Validate JIRA credentials if JIRA is configured
|
|
318
|
+
if jira_data.get('access_user') and jira_data.get('access_token'):
|
|
319
|
+
if not access_user:
|
|
320
|
+
raise ValueError("JIRA is configured but JIRA_ACCESS_USER environment variable is not set")
|
|
321
|
+
if not access_token:
|
|
322
|
+
raise ValueError("JIRA is configured but JIRA_ACCESS_TOKEN environment variable is not set")
|
|
323
|
+
|
|
324
|
+
jira_config = JIRAConfig(
|
|
325
|
+
access_user=access_user,
|
|
326
|
+
access_token=access_token,
|
|
327
|
+
base_url=jira_data.get('base_url')
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
# Process JIRA integration settings
|
|
331
|
+
jira_integration_config = None
|
|
332
|
+
jira_integration_data = data.get('jira_integration', {})
|
|
333
|
+
if jira_integration_data:
|
|
334
|
+
jira_integration_config = JIRAIntegrationConfig(
|
|
335
|
+
enabled=jira_integration_data.get('enabled', True),
|
|
336
|
+
fetch_story_points=jira_integration_data.get('fetch_story_points', True),
|
|
337
|
+
project_keys=jira_integration_data.get('project_keys', []),
|
|
338
|
+
story_point_fields=jira_integration_data.get('story_point_fields', [
|
|
339
|
+
"customfield_10016",
|
|
340
|
+
"customfield_10021",
|
|
341
|
+
"Story Points"
|
|
342
|
+
])
|
|
343
|
+
)
|
|
344
|
+
|
|
164
345
|
return Config(
|
|
165
346
|
repositories=repositories,
|
|
166
347
|
github=github_config,
|
|
167
348
|
analysis=analysis_config,
|
|
168
349
|
output=output_config,
|
|
169
|
-
cache=cache_config
|
|
350
|
+
cache=cache_config,
|
|
351
|
+
jira=jira_config,
|
|
352
|
+
jira_integration=jira_integration_config
|
|
170
353
|
)
|
|
171
354
|
|
|
172
355
|
@staticmethod
|
|
@@ -1,29 +1,33 @@
|
|
|
1
1
|
"""Git repository analyzer with batch processing support."""
|
|
2
|
-
import
|
|
2
|
+
import fnmatch
|
|
3
3
|
from datetime import datetime
|
|
4
|
-
from typing import List, Dict, Any, Optional, Tuple, Generator
|
|
5
4
|
from pathlib import Path
|
|
5
|
+
from typing import Any, Dict, Generator, List, Optional
|
|
6
|
+
|
|
6
7
|
import git
|
|
7
8
|
from git import Repo
|
|
8
9
|
from tqdm import tqdm
|
|
9
10
|
|
|
10
|
-
from .cache import GitAnalysisCache
|
|
11
11
|
from ..extractors.story_points import StoryPointExtractor
|
|
12
12
|
from ..extractors.tickets import TicketExtractor
|
|
13
13
|
from .branch_mapper import BranchToProjectMapper
|
|
14
|
+
from .cache import GitAnalysisCache
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
class GitAnalyzer:
|
|
17
18
|
"""Analyze Git repositories with caching and batch processing."""
|
|
18
19
|
|
|
19
20
|
def __init__(self, cache: GitAnalysisCache, batch_size: int = 1000,
|
|
20
|
-
branch_mapping_rules: Optional[Dict[str, List[str]]] = None
|
|
21
|
+
branch_mapping_rules: Optional[Dict[str, List[str]]] = None,
|
|
22
|
+
allowed_ticket_platforms: Optional[List[str]] = None,
|
|
23
|
+
exclude_paths: Optional[List[str]] = None):
|
|
21
24
|
"""Initialize analyzer with cache."""
|
|
22
25
|
self.cache = cache
|
|
23
26
|
self.batch_size = batch_size
|
|
24
27
|
self.story_point_extractor = StoryPointExtractor()
|
|
25
|
-
self.ticket_extractor = TicketExtractor()
|
|
28
|
+
self.ticket_extractor = TicketExtractor(allowed_platforms=allowed_ticket_platforms)
|
|
26
29
|
self.branch_mapper = BranchToProjectMapper(branch_mapping_rules)
|
|
30
|
+
self.exclude_paths = exclude_paths or []
|
|
27
31
|
|
|
28
32
|
def analyze_repository(self, repo_path: Path, since: datetime,
|
|
29
33
|
branch: Optional[str] = None) -> List[Dict[str, Any]]:
|
|
@@ -31,7 +35,7 @@ class GitAnalyzer:
|
|
|
31
35
|
try:
|
|
32
36
|
repo = Repo(repo_path)
|
|
33
37
|
except Exception as e:
|
|
34
|
-
raise ValueError(f"Failed to open repository at {repo_path}: {e}")
|
|
38
|
+
raise ValueError(f"Failed to open repository at {repo_path}: {e}") from e
|
|
35
39
|
|
|
36
40
|
# Get commits to analyze
|
|
37
41
|
commits = self._get_commits(repo, since, branch)
|
|
@@ -133,12 +137,18 @@ class GitAnalyzer:
|
|
|
133
137
|
commit_data['branch'], repo_path
|
|
134
138
|
)
|
|
135
139
|
|
|
136
|
-
# Calculate metrics
|
|
140
|
+
# Calculate metrics - use raw stats for backward compatibility
|
|
137
141
|
stats = commit.stats.total
|
|
138
142
|
commit_data['files_changed'] = stats.get('files', 0)
|
|
139
143
|
commit_data['insertions'] = stats.get('insertions', 0)
|
|
140
144
|
commit_data['deletions'] = stats.get('deletions', 0)
|
|
141
145
|
|
|
146
|
+
# Calculate filtered metrics (excluding boilerplate/generated files)
|
|
147
|
+
filtered_stats = self._calculate_filtered_stats(commit)
|
|
148
|
+
commit_data['filtered_files_changed'] = filtered_stats['files']
|
|
149
|
+
commit_data['filtered_insertions'] = filtered_stats['insertions']
|
|
150
|
+
commit_data['filtered_deletions'] = filtered_stats['deletions']
|
|
151
|
+
|
|
142
152
|
# Extract story points
|
|
143
153
|
commit_data['story_points'] = self.story_point_extractor.extract_from_text(
|
|
144
154
|
commit.message
|
|
@@ -192,4 +202,54 @@ class GitAnalyzer:
|
|
|
192
202
|
'.cs', '.vb', '.r', '.m', '.mm', '.f90', '.f95', '.lua'
|
|
193
203
|
}
|
|
194
204
|
|
|
195
|
-
return any(filepath.endswith(ext) for ext in code_extensions)
|
|
205
|
+
return any(filepath.endswith(ext) for ext in code_extensions)
|
|
206
|
+
|
|
207
|
+
def _should_exclude_file(self, filepath: str) -> bool:
|
|
208
|
+
"""Check if file should be excluded from line counting."""
|
|
209
|
+
if not filepath:
|
|
210
|
+
return False
|
|
211
|
+
|
|
212
|
+
# Normalize path separators for consistent matching
|
|
213
|
+
filepath = filepath.replace('\\', '/')
|
|
214
|
+
|
|
215
|
+
# Check against exclude patterns
|
|
216
|
+
return any(fnmatch.fnmatch(filepath, pattern) for pattern in self.exclude_paths)
|
|
217
|
+
|
|
218
|
+
def _calculate_filtered_stats(self, commit: git.Commit) -> Dict[str, int]:
|
|
219
|
+
"""Calculate commit statistics excluding boilerplate/generated files."""
|
|
220
|
+
filtered_stats = {
|
|
221
|
+
'files': 0,
|
|
222
|
+
'insertions': 0,
|
|
223
|
+
'deletions': 0
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
# For initial commits or commits without parents
|
|
227
|
+
parent = commit.parents[0] if commit.parents else None
|
|
228
|
+
|
|
229
|
+
try:
|
|
230
|
+
for diff in commit.diff(parent):
|
|
231
|
+
# Get file path
|
|
232
|
+
file_path = diff.b_path if diff.b_path else diff.a_path
|
|
233
|
+
if not file_path:
|
|
234
|
+
continue
|
|
235
|
+
|
|
236
|
+
# Skip excluded files
|
|
237
|
+
if self._should_exclude_file(file_path):
|
|
238
|
+
continue
|
|
239
|
+
|
|
240
|
+
# Count the file
|
|
241
|
+
filtered_stats['files'] += 1
|
|
242
|
+
|
|
243
|
+
# Count insertions and deletions
|
|
244
|
+
if diff.diff:
|
|
245
|
+
diff_text = diff.diff.decode('utf-8', errors='ignore')
|
|
246
|
+
for line in diff_text.split('\n'):
|
|
247
|
+
if line.startswith('+') and not line.startswith('+++'):
|
|
248
|
+
filtered_stats['insertions'] += 1
|
|
249
|
+
elif line.startswith('-') and not line.startswith('---'):
|
|
250
|
+
filtered_stats['deletions'] += 1
|
|
251
|
+
except Exception:
|
|
252
|
+
# If we can't calculate filtered stats, return zeros
|
|
253
|
+
pass
|
|
254
|
+
|
|
255
|
+
return filtered_stats
|