gitflow-analytics 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+ """GitFlow Analytics - Git repository productivity analysis tool."""
2
+
3
+ from ._version import __version__, __version_info__
4
+
5
+ __author__ = 'Bob Matyas'
6
+ __email__ = 'bobmatnyc@gmail.com'
7
+
8
+ from .core.analyzer import GitAnalyzer
9
+ from .core.cache import GitAnalysisCache
10
+ from .core.identity import DeveloperIdentityResolver
11
+ from .extractors.story_points import StoryPointExtractor
12
+ from .extractors.tickets import TicketExtractor
13
+ from .reports.csv_writer import CSVReportGenerator
14
+
15
+ __all__ = [
16
+ 'GitAnalyzer',
17
+ 'GitAnalysisCache',
18
+ 'DeveloperIdentityResolver',
19
+ 'StoryPointExtractor',
20
+ 'TicketExtractor',
21
+ 'CSVReportGenerator',
22
+ ]
@@ -0,0 +1,4 @@
1
+ """Version information for gitflow-analytics."""
2
+
3
+ __version__ = "1.0.0"
4
+ __version_info__ = tuple(int(x) for x in __version__.split("."))
@@ -0,0 +1,441 @@
1
+ """Command-line interface for GitFlow Analytics."""
2
+ import click
3
+ import yaml
4
+ from pathlib import Path
5
+ from datetime import datetime, timedelta
6
+ from typing import Optional
7
+ import sys
8
+ import pandas as pd
9
+
10
+ from .config import ConfigLoader, Config
11
+ from .core.cache import GitAnalysisCache
12
+ from .core.analyzer import GitAnalyzer
13
+ from .core.identity import DeveloperIdentityResolver
14
+ from .extractors.story_points import StoryPointExtractor
15
+ from .extractors.tickets import TicketExtractor
16
+ from .reports.csv_writer import CSVReportGenerator
17
+ from .reports.analytics_writer import AnalyticsReportGenerator
18
+ from .reports.narrative_writer import NarrativeReportGenerator
19
+ from .metrics.dora import DORAMetricsCalculator
20
+ from .integrations.orchestrator import IntegrationOrchestrator
21
+
22
+
23
+ @click.group()
24
+ @click.version_option(version='0.1.0', prog_name='GitFlow Analytics')
25
+ def cli():
26
+ """GitFlow Analytics - Analyze Git repositories for productivity insights."""
27
+ pass
28
+
29
+
30
+ @cli.command()
31
+ @click.option('--config', '-c',
32
+ type=click.Path(exists=True, path_type=Path),
33
+ required=True,
34
+ help='Path to YAML configuration file')
35
+ @click.option('--weeks', '-w',
36
+ type=int,
37
+ default=12,
38
+ help='Number of weeks to analyze (default: 12)')
39
+ @click.option('--output', '-o',
40
+ type=click.Path(path_type=Path),
41
+ default=None,
42
+ help='Output directory for reports (overrides config file)')
43
+ @click.option('--anonymize',
44
+ is_flag=True,
45
+ help='Anonymize developer information in reports')
46
+ @click.option('--no-cache',
47
+ is_flag=True,
48
+ help='Disable caching (slower but always fresh)')
49
+ @click.option('--validate-only',
50
+ is_flag=True,
51
+ help='Validate configuration without running analysis')
52
+ @click.option('--clear-cache',
53
+ is_flag=True,
54
+ help='Clear cache before running analysis')
55
+ def analyze(config: Path, weeks: int, output: Optional[Path], anonymize: bool,
56
+ no_cache: bool, validate_only: bool, clear_cache: bool):
57
+ """Analyze Git repositories using configuration file."""
58
+
59
+ try:
60
+ # Load configuration
61
+ click.echo(f"📋 Loading configuration from {config}...")
62
+ cfg = ConfigLoader.load(config)
63
+
64
+ # Validate configuration
65
+ warnings = ConfigLoader.validate_config(cfg)
66
+ if warnings:
67
+ click.echo("⚠️ Configuration warnings:")
68
+ for warning in warnings:
69
+ click.echo(f" - {warning}")
70
+
71
+ if validate_only:
72
+ if not warnings:
73
+ click.echo("✅ Configuration is valid!")
74
+ else:
75
+ click.echo("❌ Configuration has issues that should be addressed.")
76
+ return
77
+
78
+ # Use output directory from CLI or config
79
+ if output is None:
80
+ output = cfg.output.directory if cfg.output.directory else Path('./reports')
81
+
82
+ # Setup output directory
83
+ output.mkdir(parents=True, exist_ok=True)
84
+
85
+ # Initialize components
86
+ cache_dir = cfg.cache.directory
87
+ if clear_cache:
88
+ click.echo("🗑️ Clearing cache...")
89
+ import shutil
90
+ if cache_dir.exists():
91
+ shutil.rmtree(cache_dir)
92
+
93
+ cache = GitAnalysisCache(
94
+ cache_dir,
95
+ ttl_hours=0 if no_cache else cfg.cache.ttl_hours
96
+ )
97
+
98
+ identity_resolver = DeveloperIdentityResolver(
99
+ cache_dir / 'identities.db',
100
+ similarity_threshold=cfg.analysis.similarity_threshold,
101
+ manual_mappings=cfg.analysis.manual_identity_mappings
102
+ )
103
+
104
+ analyzer = GitAnalyzer(cache, branch_mapping_rules=cfg.analysis.branch_mapping_rules)
105
+ orchestrator = IntegrationOrchestrator(cfg, cache)
106
+
107
+ # Analysis period
108
+ end_date = datetime.now()
109
+ start_date = end_date - timedelta(weeks=weeks)
110
+
111
+ click.echo(f"\n🚀 Analyzing {len(cfg.repositories)} repositories...")
112
+ click.echo(f" Period: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
113
+
114
+ # Analyze repositories
115
+ all_commits = []
116
+ all_prs = []
117
+ all_enrichments = {}
118
+
119
+ for repo_config in cfg.repositories:
120
+ click.echo(f"\n📁 Analyzing {repo_config.name}...")
121
+
122
+ # Check if repo exists
123
+ if not repo_config.path.exists():
124
+ click.echo(f" ❌ Repository path not found: {repo_config.path}")
125
+ continue
126
+
127
+ # Analyze repository
128
+ try:
129
+ commits = analyzer.analyze_repository(
130
+ repo_config.path,
131
+ start_date,
132
+ repo_config.branch
133
+ )
134
+
135
+ # Add project key and resolve developer identities
136
+ for commit in commits:
137
+ # Use configured project key or fall back to inferred project
138
+ if repo_config.project_key and repo_config.project_key != 'UNKNOWN':
139
+ commit['project_key'] = repo_config.project_key
140
+ else:
141
+ commit['project_key'] = commit.get('inferred_project', 'UNKNOWN')
142
+
143
+ commit['canonical_id'] = identity_resolver.resolve_developer(
144
+ commit['author_name'],
145
+ commit['author_email']
146
+ )
147
+
148
+ all_commits.extend(commits)
149
+ click.echo(f" ✅ Found {len(commits)} commits")
150
+
151
+ # Enrich with integration data
152
+ enrichment = orchestrator.enrich_repository_data(
153
+ repo_config, commits, start_date
154
+ )
155
+ all_enrichments[repo_config.name] = enrichment
156
+
157
+ if enrichment['prs']:
158
+ all_prs.extend(enrichment['prs'])
159
+ click.echo(f" ✅ Found {len(enrichment['prs'])} pull requests")
160
+
161
+ except Exception as e:
162
+ click.echo(f" ❌ Error: {e}")
163
+ continue
164
+
165
+ if not all_commits:
166
+ click.echo("\n❌ No commits found in the specified period!")
167
+ return
168
+
169
+ # Update developer statistics
170
+ click.echo("\n👥 Resolving developer identities...")
171
+ identity_resolver.update_commit_stats(all_commits)
172
+ developer_stats = identity_resolver.get_developer_stats()
173
+ click.echo(f" ✅ Identified {len(developer_stats)} unique developers")
174
+
175
+ # Analyze tickets
176
+ click.echo("\n🎫 Analyzing ticket references...")
177
+ ticket_extractor = TicketExtractor()
178
+ ticket_analysis = ticket_extractor.analyze_ticket_coverage(all_commits, all_prs)
179
+
180
+ for platform, count in ticket_analysis['ticket_summary'].items():
181
+ click.echo(f" - {platform.title()}: {count} unique tickets")
182
+
183
+ # Generate reports
184
+ click.echo("\n📊 Generating reports...")
185
+ report_gen = CSVReportGenerator(anonymize=anonymize or cfg.output.anonymize_enabled)
186
+ analytics_gen = AnalyticsReportGenerator(anonymize=anonymize or cfg.output.anonymize_enabled)
187
+
188
+ # Weekly metrics report
189
+ weekly_report = output / f'weekly_metrics_{datetime.now().strftime("%Y%m%d")}.csv'
190
+ report_gen.generate_weekly_report(
191
+ all_commits,
192
+ developer_stats,
193
+ weekly_report,
194
+ weeks
195
+ )
196
+ click.echo(f" ✅ Weekly metrics: {weekly_report}")
197
+
198
+ # Summary report
199
+ summary_report = output / f'summary_{datetime.now().strftime("%Y%m%d")}.csv'
200
+ report_gen.generate_summary_report(
201
+ all_commits,
202
+ all_prs,
203
+ developer_stats,
204
+ ticket_analysis,
205
+ summary_report
206
+ )
207
+ click.echo(f" ✅ Summary stats: {summary_report}")
208
+
209
+ # Developer report
210
+ developer_report = output / f'developers_{datetime.now().strftime("%Y%m%d")}.csv'
211
+ report_gen.generate_developer_report(
212
+ developer_stats,
213
+ developer_report
214
+ )
215
+ click.echo(f" ✅ Developer stats: {developer_report}")
216
+
217
+ # Activity distribution report
218
+ activity_report = output / f'activity_distribution_{datetime.now().strftime("%Y%m%d")}.csv'
219
+ analytics_gen.generate_activity_distribution_report(
220
+ all_commits,
221
+ developer_stats,
222
+ activity_report
223
+ )
224
+ click.echo(f" ✅ Activity distribution: {activity_report}")
225
+
226
+ # Developer focus report
227
+ focus_report = output / f'developer_focus_{datetime.now().strftime("%Y%m%d")}.csv'
228
+ analytics_gen.generate_developer_focus_report(
229
+ all_commits,
230
+ developer_stats,
231
+ focus_report,
232
+ weeks
233
+ )
234
+ click.echo(f" ✅ Developer focus: {focus_report}")
235
+
236
+ # Qualitative insights report
237
+ insights_report = output / f'qualitative_insights_{datetime.now().strftime("%Y%m%d")}.csv'
238
+ analytics_gen.generate_qualitative_insights_report(
239
+ all_commits,
240
+ developer_stats,
241
+ ticket_analysis,
242
+ insights_report
243
+ )
244
+ click.echo(f" ✅ Qualitative insights: {insights_report}")
245
+
246
+ # Calculate DORA metrics
247
+ dora_calculator = DORAMetricsCalculator()
248
+ dora_metrics = dora_calculator.calculate_dora_metrics(
249
+ all_commits, all_prs, start_date, end_date
250
+ )
251
+
252
+ # Aggregate PR metrics
253
+ pr_metrics = {}
254
+ for enrichment in all_enrichments.values():
255
+ if enrichment.get('pr_metrics'):
256
+ # Combine metrics (simplified - in production would properly aggregate)
257
+ pr_metrics = enrichment['pr_metrics']
258
+ break
259
+
260
+ # Generate narrative report if markdown format is enabled
261
+ if 'markdown' in cfg.output.formats:
262
+ narrative_gen = NarrativeReportGenerator()
263
+
264
+ # Load activity distribution data
265
+ activity_df = pd.read_csv(activity_report)
266
+ activity_data = activity_df.to_dict('records')
267
+
268
+ # Load focus data
269
+ focus_df = pd.read_csv(focus_report)
270
+ focus_data = focus_df.to_dict('records')
271
+
272
+ # Load insights data
273
+ insights_df = pd.read_csv(insights_report)
274
+ insights_data = insights_df.to_dict('records')
275
+
276
+ narrative_report = output / f'narrative_report_{datetime.now().strftime("%Y%m%d")}.md'
277
+ narrative_gen.generate_narrative_report(
278
+ all_commits,
279
+ all_prs,
280
+ developer_stats,
281
+ activity_data,
282
+ focus_data,
283
+ insights_data,
284
+ ticket_analysis,
285
+ pr_metrics,
286
+ narrative_report,
287
+ weeks
288
+ )
289
+ click.echo(f" ✅ Narrative report: {narrative_report}")
290
+
291
+ # Generate JSON export if enabled
292
+ if 'json' in cfg.output.formats:
293
+ json_report = output / f'gitflow_export_{datetime.now().strftime("%Y%m%d")}.json'
294
+
295
+ project_metrics = {
296
+ 'ticket_analysis': ticket_analysis,
297
+ 'pr_metrics': pr_metrics,
298
+ 'enrichments': all_enrichments
299
+ }
300
+
301
+ orchestrator.export_to_json(
302
+ all_commits,
303
+ all_prs,
304
+ developer_stats,
305
+ project_metrics,
306
+ dora_metrics,
307
+ str(json_report)
308
+ )
309
+ click.echo(f" ✅ JSON export: {json_report}")
310
+
311
+ # Print summary
312
+ click.echo("\n📈 Analysis Summary:")
313
+ click.echo(f" - Total commits: {len(all_commits)}")
314
+ click.echo(f" - Total PRs: {len(all_prs)}")
315
+ click.echo(f" - Active developers: {len(developer_stats)}")
316
+ click.echo(f" - Ticket coverage: {ticket_analysis['commit_coverage_pct']:.1f}%")
317
+
318
+ total_story_points = sum(c.get('story_points', 0) or 0 for c in all_commits)
319
+ click.echo(f" - Total story points: {total_story_points}")
320
+
321
+ if dora_metrics:
322
+ click.echo(f"\n🎯 DORA Metrics:")
323
+ click.echo(f" - Deployment frequency: {dora_metrics['deployment_frequency']['category']}")
324
+ click.echo(f" - Lead time: {dora_metrics['lead_time_hours']:.1f} hours")
325
+ click.echo(f" - Change failure rate: {dora_metrics['change_failure_rate']:.1f}%")
326
+ click.echo(f" - MTTR: {dora_metrics['mttr_hours']:.1f} hours")
327
+ click.echo(f" - Performance level: {dora_metrics['performance_level']}")
328
+
329
+ click.echo(f"\n✅ Analysis complete! Reports saved to {output}")
330
+
331
+ except Exception as e:
332
+ click.echo(f"\n❌ Error: {e}", err=True)
333
+ if '--debug' in sys.argv:
334
+ raise
335
+ sys.exit(1)
336
+
337
+
338
+ @cli.command()
339
+ @click.option('--config', '-c',
340
+ type=click.Path(exists=True, path_type=Path),
341
+ required=True,
342
+ help='Path to YAML configuration file')
343
+ def cache_stats(config: Path):
344
+ """Show cache statistics."""
345
+ try:
346
+ cfg = ConfigLoader.load(config)
347
+ cache = GitAnalysisCache(cfg.cache.directory)
348
+
349
+ stats = cache.get_cache_stats()
350
+
351
+ click.echo("📊 Cache Statistics:")
352
+ click.echo(f" - Cached commits: {stats['cached_commits']}")
353
+ click.echo(f" - Cached PRs: {stats['cached_prs']}")
354
+ click.echo(f" - Cached issues: {stats['cached_issues']}")
355
+ click.echo(f" - Stale entries: {stats['stale_commits']}")
356
+
357
+ # Calculate cache size
358
+ import os
359
+ cache_size = 0
360
+ for root, dirs, files in os.walk(cfg.cache.directory):
361
+ for f in files:
362
+ cache_size += os.path.getsize(os.path.join(root, f))
363
+
364
+ click.echo(f" - Cache size: {cache_size / 1024 / 1024:.1f} MB")
365
+
366
+ except Exception as e:
367
+ click.echo(f"❌ Error: {e}", err=True)
368
+ sys.exit(1)
369
+
370
+
371
+ @cli.command()
372
+ @click.option('--config', '-c',
373
+ type=click.Path(exists=True, path_type=Path),
374
+ required=True,
375
+ help='Path to YAML configuration file')
376
+ @click.argument('dev1')
377
+ @click.argument('dev2')
378
+ def merge_identity(config: Path, dev1: str, dev2: str):
379
+ """Merge two developer identities."""
380
+ try:
381
+ cfg = ConfigLoader.load(config)
382
+ identity_resolver = DeveloperIdentityResolver(
383
+ cfg.cache.directory / 'identities.db'
384
+ )
385
+
386
+ click.echo(f"🔄 Merging {dev2} into {dev1}...")
387
+ identity_resolver.merge_identities(dev1, dev2)
388
+ click.echo("✅ Identities merged successfully!")
389
+
390
+ except Exception as e:
391
+ click.echo(f"❌ Error: {e}", err=True)
392
+ sys.exit(1)
393
+
394
+
395
+ @cli.command()
396
+ @click.option('--config', '-c',
397
+ type=click.Path(exists=True, path_type=Path),
398
+ required=True,
399
+ help='Path to YAML configuration file')
400
+ def list_developers(config: Path):
401
+ """List all known developers."""
402
+ try:
403
+ cfg = ConfigLoader.load(config)
404
+ identity_resolver = DeveloperIdentityResolver(
405
+ cfg.cache.directory / 'identities.db'
406
+ )
407
+
408
+ developers = identity_resolver.get_developer_stats()
409
+
410
+ if not developers:
411
+ click.echo("No developers found. Run analysis first.")
412
+ return
413
+
414
+ click.echo("👥 Known Developers:")
415
+ click.echo(f"{'Name':<30} {'Email':<40} {'Commits':<10} {'Points':<10} {'Aliases'}")
416
+ click.echo("-" * 100)
417
+
418
+ for dev in developers[:20]: # Show top 20
419
+ click.echo(
420
+ f"{dev['primary_name']:<30} "
421
+ f"{dev['primary_email']:<40} "
422
+ f"{dev['total_commits']:<10} "
423
+ f"{dev['total_story_points']:<10} "
424
+ f"{dev['alias_count']}"
425
+ )
426
+
427
+ if len(developers) > 20:
428
+ click.echo(f"\n... and {len(developers) - 20} more developers")
429
+
430
+ except Exception as e:
431
+ click.echo(f"❌ Error: {e}", err=True)
432
+ sys.exit(1)
433
+
434
+
435
+ def main():
436
+ """Main entry point."""
437
+ cli()
438
+
439
+
440
+ if __name__ == '__main__':
441
+ main()
@@ -0,0 +1,215 @@
1
+ """Configuration management for GitFlow Analytics."""
2
+ import os
3
+ import yaml
4
+ from pathlib import Path
5
+ from typing import Dict, Any, Optional, List
6
+ from dataclasses import dataclass, field
7
+
8
+ @dataclass
9
+ class RepositoryConfig:
10
+ """Configuration for a single repository."""
11
+ name: str
12
+ path: Path
13
+ github_repo: Optional[str] = None
14
+ project_key: Optional[str] = None
15
+ branch: Optional[str] = None
16
+
17
+ def __post_init__(self):
18
+ self.path = Path(self.path).expanduser().resolve()
19
+ if not self.project_key:
20
+ self.project_key = self.name.upper().replace('-', '_')
21
+
22
+ @dataclass
23
+ class GitHubConfig:
24
+ """GitHub API configuration."""
25
+ token: Optional[str] = None
26
+ owner: Optional[str] = None
27
+ base_url: str = "https://api.github.com"
28
+ max_retries: int = 3
29
+ backoff_factor: int = 2
30
+
31
+ def get_repo_full_name(self, repo_name: str) -> str:
32
+ """Get full repository name including owner."""
33
+ if '/' in repo_name:
34
+ return repo_name
35
+ if self.owner:
36
+ return f"{self.owner}/{repo_name}"
37
+ raise ValueError(f"Repository {repo_name} needs owner specified")
38
+
39
+ @dataclass
40
+ class AnalysisConfig:
41
+ """Analysis-specific configuration."""
42
+ story_point_patterns: List[str] = field(default_factory=list)
43
+ exclude_authors: List[str] = field(default_factory=list)
44
+ exclude_message_patterns: List[str] = field(default_factory=list)
45
+ similarity_threshold: float = 0.85
46
+ manual_identity_mappings: List[Dict[str, Any]] = field(default_factory=list)
47
+ default_ticket_platform: Optional[str] = None
48
+ branch_mapping_rules: Dict[str, List[str]] = field(default_factory=dict)
49
+
50
+ @dataclass
51
+ class OutputConfig:
52
+ """Output configuration."""
53
+ directory: Optional[Path] = None
54
+ formats: List[str] = field(default_factory=lambda: ["csv", "markdown"])
55
+ csv_delimiter: str = ","
56
+ csv_encoding: str = "utf-8"
57
+ anonymize_enabled: bool = False
58
+ anonymize_fields: List[str] = field(default_factory=list)
59
+ anonymize_method: str = "hash"
60
+
61
+ @dataclass
62
+ class CacheConfig:
63
+ """Cache configuration."""
64
+ directory: Path = Path(".gitflow-cache")
65
+ ttl_hours: int = 168
66
+ max_size_mb: int = 500
67
+
68
+ @dataclass
69
+ class Config:
70
+ """Main configuration container."""
71
+ repositories: List[RepositoryConfig]
72
+ github: GitHubConfig
73
+ analysis: AnalysisConfig
74
+ output: OutputConfig
75
+ cache: CacheConfig
76
+
77
+ class ConfigLoader:
78
+ """Load and validate configuration from YAML files."""
79
+
80
+ @staticmethod
81
+ def load(config_path: Path) -> Config:
82
+ """Load configuration from YAML file."""
83
+ with open(config_path, 'r') as f:
84
+ data = yaml.safe_load(f)
85
+
86
+ # Validate version
87
+ version = data.get('version', '1.0')
88
+ if version not in ['1.0']:
89
+ raise ValueError(f"Unsupported config version: {version}")
90
+
91
+ # Process GitHub config
92
+ github_data = data.get('github', {})
93
+ github_config = GitHubConfig(
94
+ token=ConfigLoader._resolve_env_var(github_data.get('token')),
95
+ owner=ConfigLoader._resolve_env_var(github_data.get('owner')),
96
+ base_url=github_data.get('base_url', 'https://api.github.com'),
97
+ max_retries=github_data.get('rate_limit', {}).get('max_retries', 3),
98
+ backoff_factor=github_data.get('rate_limit', {}).get('backoff_factor', 2)
99
+ )
100
+
101
+ # Process repositories
102
+ repositories = []
103
+ for repo_data in data.get('repositories', []):
104
+ # Handle github_repo with owner fallback
105
+ github_repo = repo_data.get('github_repo')
106
+ if github_repo and github_config.owner and '/' not in github_repo:
107
+ github_repo = f"{github_config.owner}/{github_repo}"
108
+
109
+ repo_config = RepositoryConfig(
110
+ name=repo_data['name'],
111
+ path=repo_data['path'],
112
+ github_repo=github_repo,
113
+ project_key=repo_data.get('project_key'),
114
+ branch=repo_data.get('branch')
115
+ )
116
+ repositories.append(repo_config)
117
+
118
+ if not repositories:
119
+ raise ValueError("No repositories defined in configuration")
120
+
121
+ # Process analysis settings
122
+ analysis_data = data.get('analysis', {})
123
+ analysis_config = AnalysisConfig(
124
+ story_point_patterns=analysis_data.get('story_point_patterns', [
125
+ r"(?:story\s*points?|sp|pts?)\s*[:=]\s*(\d+)",
126
+ r"\[(\d+)\s*(?:sp|pts?)\]",
127
+ r"#(\d+)sp"
128
+ ]),
129
+ exclude_authors=analysis_data.get('exclude', {}).get('authors', [
130
+ "dependabot[bot]",
131
+ "renovate[bot]"
132
+ ]),
133
+ exclude_message_patterns=analysis_data.get('exclude', {}).get('message_patterns', []),
134
+ similarity_threshold=analysis_data.get('identity', {}).get('similarity_threshold', 0.85),
135
+ manual_identity_mappings=analysis_data.get('identity', {}).get('manual_mappings', []),
136
+ default_ticket_platform=analysis_data.get('default_ticket_platform'),
137
+ branch_mapping_rules=analysis_data.get('branch_mapping_rules', {})
138
+ )
139
+
140
+ # Process output settings
141
+ output_data = data.get('output', {})
142
+ output_dir = output_data.get('directory')
143
+ if output_dir:
144
+ output_dir = Path(output_dir).expanduser().resolve()
145
+
146
+ output_config = OutputConfig(
147
+ directory=output_dir,
148
+ formats=output_data.get('formats', ['csv', 'markdown']),
149
+ csv_delimiter=output_data.get('csv', {}).get('delimiter', ','),
150
+ csv_encoding=output_data.get('csv', {}).get('encoding', 'utf-8'),
151
+ anonymize_enabled=output_data.get('anonymization', {}).get('enabled', False),
152
+ anonymize_fields=output_data.get('anonymization', {}).get('fields', []),
153
+ anonymize_method=output_data.get('anonymization', {}).get('method', 'hash')
154
+ )
155
+
156
+ # Process cache settings
157
+ cache_data = data.get('cache', {})
158
+ cache_config = CacheConfig(
159
+ directory=Path(cache_data.get('directory', '.gitflow-cache')),
160
+ ttl_hours=cache_data.get('ttl_hours', 168),
161
+ max_size_mb=cache_data.get('max_size_mb', 500)
162
+ )
163
+
164
+ return Config(
165
+ repositories=repositories,
166
+ github=github_config,
167
+ analysis=analysis_config,
168
+ output=output_config,
169
+ cache=cache_config
170
+ )
171
+
172
+ @staticmethod
173
+ def _resolve_env_var(value: Optional[str]) -> Optional[str]:
174
+ """Resolve environment variable references."""
175
+ if not value:
176
+ return None
177
+
178
+ if value.startswith('${') and value.endswith('}'):
179
+ env_var = value[2:-1]
180
+ resolved = os.environ.get(env_var)
181
+ if not resolved:
182
+ raise ValueError(f"Environment variable {env_var} not set")
183
+ return resolved
184
+
185
+ return value
186
+
187
+ @staticmethod
188
+ def validate_config(config: Config) -> List[str]:
189
+ """Validate configuration and return list of warnings."""
190
+ warnings = []
191
+
192
+ # Check repository paths exist
193
+ for repo in config.repositories:
194
+ if not repo.path.exists():
195
+ warnings.append(f"Repository path does not exist: {repo.path}")
196
+ elif not (repo.path / '.git').exists():
197
+ warnings.append(f"Path is not a git repository: {repo.path}")
198
+
199
+ # Check GitHub token if GitHub repos are specified
200
+ has_github_repos = any(r.github_repo for r in config.repositories)
201
+ if has_github_repos and not config.github.token:
202
+ warnings.append("GitHub repositories specified but no GitHub token provided")
203
+
204
+ # Check if owner is needed
205
+ for repo in config.repositories:
206
+ if repo.github_repo and '/' not in repo.github_repo and not config.github.owner:
207
+ warnings.append(f"Repository {repo.github_repo} needs owner specified")
208
+
209
+ # Check cache directory permissions
210
+ try:
211
+ config.cache.directory.mkdir(exist_ok=True, parents=True)
212
+ except PermissionError:
213
+ warnings.append(f"Cannot create cache directory: {config.cache.directory}")
214
+
215
+ return warnings
File without changes