gitflow-analytics 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,6 +13,8 @@ from .extractors.tickets import TicketExtractor
13
13
  from .reports.csv_writer import CSVReportGenerator
14
14
 
15
15
  __all__ = [
16
+ '__version__',
17
+ '__version_info__',
16
18
  'GitAnalyzer',
17
19
  'GitAnalysisCache',
18
20
  'DeveloperIdentityResolver',
@@ -1,4 +1,4 @@
1
1
  """Version information for gitflow-analytics."""
2
2
 
3
- __version__ = "1.0.0"
3
+ __version__ = "1.0.1"
4
4
  __version_info__ = tuple(int(x) for x in __version__.split("."))
gitflow_analytics/cli.py CHANGED
@@ -1,23 +1,23 @@
1
1
  """Command-line interface for GitFlow Analytics."""
2
- import click
3
- import yaml
4
- from pathlib import Path
2
+ import sys
5
3
  from datetime import datetime, timedelta
4
+ from pathlib import Path
6
5
  from typing import Optional
7
- import sys
6
+
7
+ import click
8
+ import git
8
9
  import pandas as pd
9
10
 
10
- from .config import ConfigLoader, Config
11
- from .core.cache import GitAnalysisCache
11
+ from .config import ConfigLoader
12
12
  from .core.analyzer import GitAnalyzer
13
+ from .core.cache import GitAnalysisCache
13
14
  from .core.identity import DeveloperIdentityResolver
14
- from .extractors.story_points import StoryPointExtractor
15
15
  from .extractors.tickets import TicketExtractor
16
- from .reports.csv_writer import CSVReportGenerator
16
+ from .integrations.orchestrator import IntegrationOrchestrator
17
+ from .metrics.dora import DORAMetricsCalculator
17
18
  from .reports.analytics_writer import AnalyticsReportGenerator
19
+ from .reports.csv_writer import CSVReportGenerator
18
20
  from .reports.narrative_writer import NarrativeReportGenerator
19
- from .metrics.dora import DORAMetricsCalculator
20
- from .integrations.orchestrator import IntegrationOrchestrator
21
21
 
22
22
 
23
23
  @click.group()
@@ -101,14 +101,36 @@ def analyze(config: Path, weeks: int, output: Optional[Path], anonymize: bool,
101
101
  manual_mappings=cfg.analysis.manual_identity_mappings
102
102
  )
103
103
 
104
- analyzer = GitAnalyzer(cache, branch_mapping_rules=cfg.analysis.branch_mapping_rules)
104
+ analyzer = GitAnalyzer(
105
+ cache,
106
+ branch_mapping_rules=cfg.analysis.branch_mapping_rules,
107
+ allowed_ticket_platforms=getattr(cfg.analysis, 'ticket_platforms', None),
108
+ exclude_paths=cfg.analysis.exclude_paths
109
+ )
105
110
  orchestrator = IntegrationOrchestrator(cfg, cache)
106
111
 
112
+ # Discovery organization repositories if needed
113
+ repositories_to_analyze = cfg.repositories
114
+ if cfg.github.organization and not repositories_to_analyze:
115
+ click.echo(f"🔍 Discovering repositories from organization: {cfg.github.organization}")
116
+ try:
117
+ # Use a 'repos' directory in the config directory for cloned repositories
118
+ config_dir = Path(config).parent if config else Path.cwd()
119
+ repos_dir = config_dir / "repos"
120
+ discovered_repos = cfg.discover_organization_repositories(clone_base_path=repos_dir)
121
+ repositories_to_analyze = discovered_repos
122
+ click.echo(f" ✅ Found {len(discovered_repos)} repositories in organization")
123
+ for repo in discovered_repos:
124
+ click.echo(f" - {repo.name} ({repo.github_repo})")
125
+ except Exception as e:
126
+ click.echo(f" ❌ Failed to discover repositories: {e}")
127
+ return
128
+
107
129
  # Analysis period
108
130
  end_date = datetime.now()
109
131
  start_date = end_date - timedelta(weeks=weeks)
110
132
 
111
- click.echo(f"\n🚀 Analyzing {len(cfg.repositories)} repositories...")
133
+ click.echo(f"\n🚀 Analyzing {len(repositories_to_analyze)} repositories...")
112
134
  click.echo(f" Period: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
113
135
 
114
136
  # Analyze repositories
@@ -116,13 +138,32 @@ def analyze(config: Path, weeks: int, output: Optional[Path], anonymize: bool,
116
138
  all_prs = []
117
139
  all_enrichments = {}
118
140
 
119
- for repo_config in cfg.repositories:
141
+ for repo_config in repositories_to_analyze:
120
142
  click.echo(f"\n📁 Analyzing {repo_config.name}...")
121
143
 
122
- # Check if repo exists
144
+ # Check if repo exists, clone if needed
123
145
  if not repo_config.path.exists():
124
- click.echo(f" ❌ Repository path not found: {repo_config.path}")
125
- continue
146
+ # Try to clone if we have a github_repo configured
147
+ if repo_config.github_repo and cfg.github.organization:
148
+ click.echo(" 📥 Cloning repository from GitHub...")
149
+ try:
150
+ # Ensure parent directory exists
151
+ repo_config.path.parent.mkdir(parents=True, exist_ok=True)
152
+
153
+ # Clone the repository
154
+ clone_url = f"https://github.com/{repo_config.github_repo}.git"
155
+ if cfg.github.token:
156
+ # Use token for authentication
157
+ clone_url = f"https://{cfg.github.token}@github.com/{repo_config.github_repo}.git"
158
+
159
+ git.Repo.clone_from(clone_url, repo_config.path, branch=repo_config.branch)
160
+ click.echo(f" ✅ Successfully cloned {repo_config.github_repo}")
161
+ except Exception as e:
162
+ click.echo(f" ❌ Failed to clone repository: {e}")
163
+ continue
164
+ else:
165
+ click.echo(f" ❌ Repository path not found: {repo_config.path}")
166
+ continue
126
167
 
127
168
  # Analyze repository
128
169
  try:
@@ -174,7 +215,7 @@ def analyze(config: Path, weeks: int, output: Optional[Path], anonymize: bool,
174
215
 
175
216
  # Analyze tickets
176
217
  click.echo("\n🎫 Analyzing ticket references...")
177
- ticket_extractor = TicketExtractor()
218
+ ticket_extractor = TicketExtractor(allowed_platforms=getattr(cfg.analysis, 'ticket_platforms', None))
178
219
  ticket_analysis = ticket_extractor.analyze_ticket_coverage(all_commits, all_prs)
179
220
 
180
221
  for platform, count in ticket_analysis['ticket_summary'].items():
@@ -319,7 +360,7 @@ def analyze(config: Path, weeks: int, output: Optional[Path], anonymize: bool,
319
360
  click.echo(f" - Total story points: {total_story_points}")
320
361
 
321
362
  if dora_metrics:
322
- click.echo(f"\n🎯 DORA Metrics:")
363
+ click.echo("\n🎯 DORA Metrics:")
323
364
  click.echo(f" - Deployment frequency: {dora_metrics['deployment_frequency']['category']}")
324
365
  click.echo(f" - Lead time: {dora_metrics['lead_time_hours']:.1f} hours")
325
366
  click.echo(f" - Change failure rate: {dora_metrics['change_failure_rate']:.1f}%")
@@ -357,7 +398,7 @@ def cache_stats(config: Path):
357
398
  # Calculate cache size
358
399
  import os
359
400
  cache_size = 0
360
- for root, dirs, files in os.walk(cfg.cache.directory):
401
+ for root, _dirs, files in os.walk(cfg.cache.directory):
361
402
  for f in files:
362
403
  cache_size += os.path.getsize(os.path.join(root, f))
363
404
 
@@ -392,6 +433,59 @@ def merge_identity(config: Path, dev1: str, dev2: str):
392
433
  sys.exit(1)
393
434
 
394
435
 
436
+ @cli.command()
437
+ @click.option('--config', '-c',
438
+ type=click.Path(exists=True, path_type=Path),
439
+ required=True,
440
+ help='Path to YAML configuration file')
441
+ def discover_jira_fields(config: Path):
442
+ """Discover available JIRA fields, particularly story point fields."""
443
+ try:
444
+ cfg = ConfigLoader.load(config)
445
+
446
+ # Check if JIRA is configured
447
+ if not cfg.jira or not cfg.jira.base_url:
448
+ click.echo("❌ JIRA is not configured in the configuration file")
449
+ return
450
+
451
+ # Initialize JIRA integration
452
+ from .integrations.jira_integration import JIRAIntegration
453
+
454
+ jira = JIRAIntegration(
455
+ cfg.jira.base_url,
456
+ cfg.jira.access_user,
457
+ cfg.jira.access_token,
458
+ None # No cache needed for field discovery
459
+ )
460
+
461
+ # Validate connection
462
+ click.echo(f"🔗 Connecting to JIRA at {cfg.jira.base_url}...")
463
+ if not jira.validate_connection():
464
+ click.echo("❌ Failed to connect to JIRA. Check your credentials.")
465
+ return
466
+
467
+ click.echo("✅ Connected successfully!\n")
468
+ click.echo("🔍 Discovering fields with potential story point data...")
469
+
470
+ fields = jira.discover_fields()
471
+
472
+ if not fields:
473
+ click.echo("No potential story point fields found.")
474
+ else:
475
+ click.echo(f"\nFound {len(fields)} potential story point fields:")
476
+ click.echo("\nAdd these to your configuration under jira_integration.story_point_fields:")
477
+ click.echo("```yaml")
478
+ click.echo("jira_integration:")
479
+ click.echo(" story_point_fields:")
480
+ for field_id, field_info in fields.items():
481
+ click.echo(f' - "{field_id}" # {field_info["name"]}')
482
+ click.echo("```")
483
+
484
+ except Exception as e:
485
+ click.echo(f"❌ Error: {e}", err=True)
486
+ sys.exit(1)
487
+
488
+
395
489
  @cli.command()
396
490
  @click.option('--config', '-c',
397
491
  type=click.Path(exists=True, path_type=Path),
@@ -1,9 +1,12 @@
1
1
  """Configuration management for GitFlow Analytics."""
2
2
  import os
3
- import yaml
4
- from pathlib import Path
5
- from typing import Dict, Any, Optional, List
6
3
  from dataclasses import dataclass, field
4
+ from pathlib import Path
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ import yaml
8
+ from dotenv import load_dotenv
9
+
7
10
 
8
11
  @dataclass
9
12
  class RepositoryConfig:
@@ -24,6 +27,7 @@ class GitHubConfig:
24
27
  """GitHub API configuration."""
25
28
  token: Optional[str] = None
26
29
  owner: Optional[str] = None
30
+ organization: Optional[str] = None
27
31
  base_url: str = "https://api.github.com"
28
32
  max_retries: int = 3
29
33
  backoff_factor: int = 2
@@ -42,10 +46,12 @@ class AnalysisConfig:
42
46
  story_point_patterns: List[str] = field(default_factory=list)
43
47
  exclude_authors: List[str] = field(default_factory=list)
44
48
  exclude_message_patterns: List[str] = field(default_factory=list)
49
+ exclude_paths: List[str] = field(default_factory=list)
45
50
  similarity_threshold: float = 0.85
46
51
  manual_identity_mappings: List[Dict[str, Any]] = field(default_factory=list)
47
52
  default_ticket_platform: Optional[str] = None
48
53
  branch_mapping_rules: Dict[str, List[str]] = field(default_factory=dict)
54
+ ticket_platforms: Optional[List[str]] = None
49
55
 
50
56
  @dataclass
51
57
  class OutputConfig:
@@ -65,6 +71,25 @@ class CacheConfig:
65
71
  ttl_hours: int = 168
66
72
  max_size_mb: int = 500
67
73
 
74
+ @dataclass
75
+ class JIRAConfig:
76
+ """JIRA configuration."""
77
+ access_user: str
78
+ access_token: str
79
+ base_url: Optional[str] = None
80
+
81
+ @dataclass
82
+ class JIRAIntegrationConfig:
83
+ """JIRA integration specific configuration."""
84
+ enabled: bool = True
85
+ fetch_story_points: bool = True
86
+ project_keys: List[str] = field(default_factory=list)
87
+ story_point_fields: List[str] = field(default_factory=lambda: [
88
+ "customfield_10016",
89
+ "customfield_10021",
90
+ "Story Points"
91
+ ])
92
+
68
93
  @dataclass
69
94
  class Config:
70
95
  """Main configuration container."""
@@ -73,14 +98,69 @@ class Config:
73
98
  analysis: AnalysisConfig
74
99
  output: OutputConfig
75
100
  cache: CacheConfig
101
+ jira: Optional[JIRAConfig] = None
102
+ jira_integration: Optional[JIRAIntegrationConfig] = None
103
+
104
+ def discover_organization_repositories(self, clone_base_path: Optional[Path] = None) -> List[RepositoryConfig]:
105
+ """Discover repositories from GitHub organization.
106
+
107
+ Args:
108
+ clone_base_path: Base directory where repos should be cloned/found.
109
+ If None, uses output directory.
110
+
111
+ Returns:
112
+ List of discovered repository configurations.
113
+ """
114
+ if not self.github.organization or not self.github.token:
115
+ return []
116
+
117
+ from github import Github
118
+
119
+ github_client = Github(self.github.token, base_url=self.github.base_url)
120
+
121
+ try:
122
+ org = github_client.get_organization(self.github.organization)
123
+ discovered_repos = []
124
+
125
+ base_path = clone_base_path or self.output.directory
126
+ if base_path is None:
127
+ raise ValueError("No base path available for repository cloning")
128
+
129
+ for repo in org.get_repos():
130
+ # Skip archived repositories
131
+ if repo.archived:
132
+ continue
133
+
134
+ # Create repository configuration
135
+ repo_path = base_path / repo.name
136
+ repo_config = RepositoryConfig(
137
+ name=repo.name,
138
+ path=repo_path,
139
+ github_repo=repo.full_name,
140
+ project_key=repo.name.upper().replace('-', '_'),
141
+ branch=repo.default_branch
142
+ )
143
+ discovered_repos.append(repo_config)
144
+
145
+ return discovered_repos
146
+
147
+ except Exception as e:
148
+ raise ValueError(f"Failed to discover repositories from organization {self.github.organization}: {e}") from e
76
149
 
77
150
  class ConfigLoader:
78
151
  """Load and validate configuration from YAML files."""
79
152
 
80
- @staticmethod
81
- def load(config_path: Path) -> Config:
153
+ @classmethod
154
+ def load(cls, config_path: Path) -> Config:
82
155
  """Load configuration from YAML file."""
83
- with open(config_path, 'r') as f:
156
+ # Load .env file from the same directory as the config file if it exists
157
+ config_dir = config_path.parent
158
+ env_file = config_dir / '.env'
159
+ if env_file.exists():
160
+ load_dotenv(env_file, override=True)
161
+ print(f"📋 Loaded environment variables from {env_file}")
162
+
163
+ with open(config_path) as f:
84
164
  data = yaml.safe_load(f)
85
165
 
86
166
  # Validate version
@@ -90,9 +170,16 @@ class ConfigLoader:
90
170
 
91
171
  # Process GitHub config
92
172
  github_data = data.get('github', {})
173
+
174
+ # Resolve GitHub token
175
+ github_token = cls._resolve_env_var(github_data.get('token'))
176
+ if github_data.get('token') and not github_token:
177
+ raise ValueError("GitHub is configured but GITHUB_TOKEN environment variable is not set")
178
+
93
179
  github_config = GitHubConfig(
94
- token=ConfigLoader._resolve_env_var(github_data.get('token')),
95
- owner=ConfigLoader._resolve_env_var(github_data.get('owner')),
180
+ token=github_token,
181
+ owner=cls._resolve_env_var(github_data.get('owner')),
182
+ organization=cls._resolve_env_var(github_data.get('organization')),
96
183
  base_url=github_data.get('base_url', 'https://api.github.com'),
97
184
  max_retries=github_data.get('rate_limit', {}).get('max_retries', 3),
98
185
  backoff_factor=github_data.get('rate_limit', {}).get('backoff_factor', 2)
@@ -100,26 +187,70 @@ class ConfigLoader:
100
187
 
101
188
  # Process repositories
102
189
  repositories = []
103
- for repo_data in data.get('repositories', []):
104
- # Handle github_repo with owner fallback
105
- github_repo = repo_data.get('github_repo')
106
- if github_repo and github_config.owner and '/' not in github_repo:
107
- github_repo = f"{github_config.owner}/{github_repo}"
108
-
109
- repo_config = RepositoryConfig(
110
- name=repo_data['name'],
111
- path=repo_data['path'],
112
- github_repo=github_repo,
113
- project_key=repo_data.get('project_key'),
114
- branch=repo_data.get('branch')
115
- )
116
- repositories.append(repo_config)
117
190
 
118
- if not repositories:
119
- raise ValueError("No repositories defined in configuration")
191
+ # Handle organization-based repository discovery
192
+ if github_config.organization and not data.get('repositories'):
193
+ # Organization specified but no explicit repositories - will be discovered at runtime
194
+ pass
195
+ else:
196
+ # Process explicitly defined repositories
197
+ for repo_data in data.get('repositories', []):
198
+ # Handle github_repo with owner/organization fallback
199
+ github_repo = repo_data.get('github_repo')
200
+ if github_repo and '/' not in github_repo:
201
+ if github_config.organization:
202
+ github_repo = f"{github_config.organization}/{github_repo}"
203
+ elif github_config.owner:
204
+ github_repo = f"{github_config.owner}/{github_repo}"
205
+
206
+ repo_config = RepositoryConfig(
207
+ name=repo_data['name'],
208
+ path=repo_data['path'],
209
+ github_repo=github_repo,
210
+ project_key=repo_data.get('project_key'),
211
+ branch=repo_data.get('branch')
212
+ )
213
+ repositories.append(repo_config)
214
+
215
+ # Allow empty repositories list if organization is specified
216
+ if not repositories and not github_config.organization:
217
+ raise ValueError("No repositories defined and no organization specified for discovery")
120
218
 
121
219
  # Process analysis settings
122
220
  analysis_data = data.get('analysis', {})
221
+
222
+ # Default exclude paths for common boilerplate/generated files
223
+ default_exclude_paths = [
224
+ "**/node_modules/**",
225
+ "**/vendor/**",
226
+ "**/dist/**",
227
+ "**/build/**",
228
+ "**/.next/**",
229
+ "**/__pycache__/**",
230
+ "**/*.min.js",
231
+ "**/*.min.css",
232
+ "**/*.bundle.js",
233
+ "**/*.bundle.css",
234
+ "**/package-lock.json",
235
+ "**/yarn.lock",
236
+ "**/poetry.lock",
237
+ "**/Pipfile.lock",
238
+ "**/composer.lock",
239
+ "**/Gemfile.lock",
240
+ "**/Cargo.lock",
241
+ "**/go.sum",
242
+ "**/*.generated.*",
243
+ "**/generated/**",
244
+ "**/coverage/**",
245
+ "**/.coverage/**",
246
+ "**/htmlcov/**",
247
+ "**/*.map"
248
+ ]
249
+
250
+ # Merge user-provided paths with defaults (user paths take precedence)
251
+ user_exclude_paths = analysis_data.get('exclude', {}).get('paths', [])
252
+ exclude_paths = user_exclude_paths if user_exclude_paths else default_exclude_paths
253
+
123
254
  analysis_config = AnalysisConfig(
124
255
  story_point_patterns=analysis_data.get('story_point_patterns', [
125
256
  r"(?:story\s*points?|sp|pts?)\s*[:=]\s*(\d+)",
@@ -131,17 +262,26 @@ class ConfigLoader:
131
262
  "renovate[bot]"
132
263
  ]),
133
264
  exclude_message_patterns=analysis_data.get('exclude', {}).get('message_patterns', []),
265
+ exclude_paths=exclude_paths,
134
266
  similarity_threshold=analysis_data.get('identity', {}).get('similarity_threshold', 0.85),
135
267
  manual_identity_mappings=analysis_data.get('identity', {}).get('manual_mappings', []),
136
268
  default_ticket_platform=analysis_data.get('default_ticket_platform'),
137
- branch_mapping_rules=analysis_data.get('branch_mapping_rules', {})
269
+ branch_mapping_rules=analysis_data.get('branch_mapping_rules', {}),
270
+ ticket_platforms=analysis_data.get('ticket_platforms')
138
271
  )
139
272
 
140
273
  # Process output settings
141
274
  output_data = data.get('output', {})
142
275
  output_dir = output_data.get('directory')
143
276
  if output_dir:
144
- output_dir = Path(output_dir).expanduser().resolve()
277
+ output_dir = Path(output_dir).expanduser()
278
+ # If relative path, make it relative to config file directory
279
+ if not output_dir.is_absolute():
280
+ output_dir = config_path.parent / output_dir
281
+ output_dir = output_dir.resolve()
282
+ else:
283
+ # Default to config file directory if not specified
284
+ output_dir = config_path.parent
145
285
 
146
286
  output_config = OutputConfig(
147
287
  directory=output_dir,
@@ -155,18 +295,61 @@ class ConfigLoader:
155
295
 
156
296
  # Process cache settings
157
297
  cache_data = data.get('cache', {})
298
+ cache_dir = cache_data.get('directory', '.gitflow-cache')
299
+ cache_path = Path(cache_dir)
300
+ # If relative path, make it relative to config file directory
301
+ if not cache_path.is_absolute():
302
+ cache_path = config_path.parent / cache_path
303
+
158
304
  cache_config = CacheConfig(
159
- directory=Path(cache_data.get('directory', '.gitflow-cache')),
305
+ directory=cache_path.resolve(),
160
306
  ttl_hours=cache_data.get('ttl_hours', 168),
161
307
  max_size_mb=cache_data.get('max_size_mb', 500)
162
308
  )
163
309
 
310
+ # Process JIRA settings
311
+ jira_config = None
312
+ jira_data = data.get('jira', {})
313
+ if jira_data:
314
+ access_user = cls._resolve_env_var(jira_data.get('access_user', ''))
315
+ access_token = cls._resolve_env_var(jira_data.get('access_token', ''))
316
+
317
+ # Validate JIRA credentials if JIRA is configured
318
+ if jira_data.get('access_user') and jira_data.get('access_token'):
319
+ if not access_user:
320
+ raise ValueError("JIRA is configured but JIRA_ACCESS_USER environment variable is not set")
321
+ if not access_token:
322
+ raise ValueError("JIRA is configured but JIRA_ACCESS_TOKEN environment variable is not set")
323
+
324
+ jira_config = JIRAConfig(
325
+ access_user=access_user,
326
+ access_token=access_token,
327
+ base_url=jira_data.get('base_url')
328
+ )
329
+
330
+ # Process JIRA integration settings
331
+ jira_integration_config = None
332
+ jira_integration_data = data.get('jira_integration', {})
333
+ if jira_integration_data:
334
+ jira_integration_config = JIRAIntegrationConfig(
335
+ enabled=jira_integration_data.get('enabled', True),
336
+ fetch_story_points=jira_integration_data.get('fetch_story_points', True),
337
+ project_keys=jira_integration_data.get('project_keys', []),
338
+ story_point_fields=jira_integration_data.get('story_point_fields', [
339
+ "customfield_10016",
340
+ "customfield_10021",
341
+ "Story Points"
342
+ ])
343
+ )
344
+
164
345
  return Config(
165
346
  repositories=repositories,
166
347
  github=github_config,
167
348
  analysis=analysis_config,
168
349
  output=output_config,
169
- cache=cache_config
350
+ cache=cache_config,
351
+ jira=jira_config,
352
+ jira_integration=jira_integration_config
170
353
  )
171
354
 
172
355
  @staticmethod
@@ -1,29 +1,33 @@
1
1
  """Git repository analyzer with batch processing support."""
2
- import re
2
+ import fnmatch
3
3
  from datetime import datetime
4
- from typing import List, Dict, Any, Optional, Tuple, Generator
5
4
  from pathlib import Path
5
+ from typing import Any, Dict, Generator, List, Optional
6
+
6
7
  import git
7
8
  from git import Repo
8
9
  from tqdm import tqdm
9
10
 
10
- from .cache import GitAnalysisCache
11
11
  from ..extractors.story_points import StoryPointExtractor
12
12
  from ..extractors.tickets import TicketExtractor
13
13
  from .branch_mapper import BranchToProjectMapper
14
+ from .cache import GitAnalysisCache
14
15
 
15
16
 
16
17
  class GitAnalyzer:
17
18
  """Analyze Git repositories with caching and batch processing."""
18
19
 
19
20
  def __init__(self, cache: GitAnalysisCache, batch_size: int = 1000,
20
- branch_mapping_rules: Optional[Dict[str, List[str]]] = None):
21
+ branch_mapping_rules: Optional[Dict[str, List[str]]] = None,
22
+ allowed_ticket_platforms: Optional[List[str]] = None,
23
+ exclude_paths: Optional[List[str]] = None):
21
24
  """Initialize analyzer with cache."""
22
25
  self.cache = cache
23
26
  self.batch_size = batch_size
24
27
  self.story_point_extractor = StoryPointExtractor()
25
- self.ticket_extractor = TicketExtractor()
28
+ self.ticket_extractor = TicketExtractor(allowed_platforms=allowed_ticket_platforms)
26
29
  self.branch_mapper = BranchToProjectMapper(branch_mapping_rules)
30
+ self.exclude_paths = exclude_paths or []
27
31
 
28
32
  def analyze_repository(self, repo_path: Path, since: datetime,
29
33
  branch: Optional[str] = None) -> List[Dict[str, Any]]:
@@ -31,7 +35,7 @@ class GitAnalyzer:
31
35
  try:
32
36
  repo = Repo(repo_path)
33
37
  except Exception as e:
34
- raise ValueError(f"Failed to open repository at {repo_path}: {e}")
38
+ raise ValueError(f"Failed to open repository at {repo_path}: {e}") from e
35
39
 
36
40
  # Get commits to analyze
37
41
  commits = self._get_commits(repo, since, branch)
@@ -133,12 +137,18 @@ class GitAnalyzer:
133
137
  commit_data['branch'], repo_path
134
138
  )
135
139
 
136
- # Calculate metrics
140
+ # Calculate metrics - use raw stats for backward compatibility
137
141
  stats = commit.stats.total
138
142
  commit_data['files_changed'] = stats.get('files', 0)
139
143
  commit_data['insertions'] = stats.get('insertions', 0)
140
144
  commit_data['deletions'] = stats.get('deletions', 0)
141
145
 
146
+ # Calculate filtered metrics (excluding boilerplate/generated files)
147
+ filtered_stats = self._calculate_filtered_stats(commit)
148
+ commit_data['filtered_files_changed'] = filtered_stats['files']
149
+ commit_data['filtered_insertions'] = filtered_stats['insertions']
150
+ commit_data['filtered_deletions'] = filtered_stats['deletions']
151
+
142
152
  # Extract story points
143
153
  commit_data['story_points'] = self.story_point_extractor.extract_from_text(
144
154
  commit.message
@@ -192,4 +202,54 @@ class GitAnalyzer:
192
202
  '.cs', '.vb', '.r', '.m', '.mm', '.f90', '.f95', '.lua'
193
203
  }
194
204
 
195
- return any(filepath.endswith(ext) for ext in code_extensions)
205
+ return any(filepath.endswith(ext) for ext in code_extensions)
206
+
207
+ def _should_exclude_file(self, filepath: str) -> bool:
208
+ """Check if file should be excluded from line counting."""
209
+ if not filepath:
210
+ return False
211
+
212
+ # Normalize path separators for consistent matching
213
+ filepath = filepath.replace('\\', '/')
214
+
215
+ # Check against exclude patterns
216
+ return any(fnmatch.fnmatch(filepath, pattern) for pattern in self.exclude_paths)
217
+
218
+ def _calculate_filtered_stats(self, commit: git.Commit) -> Dict[str, int]:
219
+ """Calculate commit statistics excluding boilerplate/generated files."""
220
+ filtered_stats = {
221
+ 'files': 0,
222
+ 'insertions': 0,
223
+ 'deletions': 0
224
+ }
225
+
226
+ # For initial commits or commits without parents
227
+ parent = commit.parents[0] if commit.parents else None
228
+
229
+ try:
230
+ for diff in commit.diff(parent):
231
+ # Get file path
232
+ file_path = diff.b_path if diff.b_path else diff.a_path
233
+ if not file_path:
234
+ continue
235
+
236
+ # Skip excluded files
237
+ if self._should_exclude_file(file_path):
238
+ continue
239
+
240
+ # Count the file
241
+ filtered_stats['files'] += 1
242
+
243
+ # Count insertions and deletions
244
+ if diff.diff:
245
+ diff_text = diff.diff.decode('utf-8', errors='ignore')
246
+ for line in diff_text.split('\n'):
247
+ if line.startswith('+') and not line.startswith('+++'):
248
+ filtered_stats['insertions'] += 1
249
+ elif line.startswith('-') and not line.startswith('---'):
250
+ filtered_stats['deletions'] += 1
251
+ except Exception:
252
+ # If we can't calculate filtered stats, return zeros
253
+ pass
254
+
255
+ return filtered_stats
@@ -1,7 +1,7 @@
1
1
  """Map git branches to projects based on naming conventions."""
2
2
  import re
3
- from typing import Dict, List, Optional, Tuple
4
3
  from pathlib import Path
4
+ from typing import Dict, List, Optional
5
5
 
6
6
 
7
7
  class BranchToProjectMapper: