greenmining 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. greenmining/__init__.py +11 -29
  2. greenmining/__main__.py +9 -3
  3. greenmining/__version__.py +2 -2
  4. greenmining/analyzers/__init__.py +3 -7
  5. greenmining/analyzers/code_diff_analyzer.py +151 -61
  6. greenmining/analyzers/qualitative_analyzer.py +15 -81
  7. greenmining/analyzers/statistical_analyzer.py +8 -69
  8. greenmining/analyzers/temporal_analyzer.py +16 -72
  9. greenmining/config.py +105 -58
  10. greenmining/controllers/__init__.py +1 -5
  11. greenmining/controllers/repository_controller.py +153 -94
  12. greenmining/energy/__init__.py +13 -0
  13. greenmining/energy/base.py +165 -0
  14. greenmining/energy/codecarbon_meter.py +146 -0
  15. greenmining/energy/rapl.py +157 -0
  16. greenmining/gsf_patterns.py +4 -26
  17. greenmining/models/__init__.py +1 -5
  18. greenmining/models/aggregated_stats.py +4 -4
  19. greenmining/models/analysis_result.py +4 -4
  20. greenmining/models/commit.py +5 -5
  21. greenmining/models/repository.py +5 -5
  22. greenmining/presenters/__init__.py +1 -5
  23. greenmining/presenters/console_presenter.py +24 -24
  24. greenmining/services/__init__.py +10 -6
  25. greenmining/services/commit_extractor.py +8 -152
  26. greenmining/services/data_aggregator.py +45 -175
  27. greenmining/services/data_analyzer.py +9 -202
  28. greenmining/services/github_fetcher.py +212 -323
  29. greenmining/services/github_graphql_fetcher.py +371 -0
  30. greenmining/services/local_repo_analyzer.py +387 -0
  31. greenmining/services/reports.py +33 -137
  32. greenmining/utils.py +21 -149
  33. {greenmining-1.0.3.dist-info → greenmining-1.0.4.dist-info}/METADATA +61 -151
  34. greenmining-1.0.4.dist-info/RECORD +37 -0
  35. {greenmining-1.0.3.dist-info → greenmining-1.0.4.dist-info}/WHEEL +1 -1
  36. greenmining/analyzers/ml_feature_extractor.py +0 -512
  37. greenmining/analyzers/nlp_analyzer.py +0 -365
  38. greenmining/cli.py +0 -471
  39. greenmining/main.py +0 -37
  40. greenmining-1.0.3.dist-info/RECORD +0 -36
  41. greenmining-1.0.3.dist-info/entry_points.txt +0 -2
  42. {greenmining-1.0.3.dist-info → greenmining-1.0.4.dist-info}/licenses/LICENSE +0 -0
  43. {greenmining-1.0.3.dist-info → greenmining-1.0.4.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- """Commit extractor for green microservices mining."""
1
+ # Commit extractor for green microservices mining.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -7,7 +7,6 @@ from datetime import datetime, timedelta
7
7
  from pathlib import Path
8
8
  from typing import Any, Dict, List, Optional
9
9
 
10
- import click
11
10
  from github import Github
12
11
  from tqdm import tqdm
13
12
 
@@ -24,7 +23,7 @@ from greenmining.utils import (
24
23
 
25
24
 
26
25
  class CommitExtractor:
27
- """Extracts commit data from repositories using GitHub API."""
26
+ # Extracts commit data from repositories using GitHub API.
28
27
 
29
28
  def __init__(
30
29
  self,
@@ -34,15 +33,7 @@ class CommitExtractor:
34
33
  github_token: str | None = None,
35
34
  timeout: int = 60,
36
35
  ):
37
- """Initialize commit extractor.
38
-
39
- Args:
40
- max_commits: Maximum commits per repository
41
- skip_merges: Skip merge commits
42
- days_back: Only analyze commits from last N days
43
- github_token: GitHub API token (optional)
44
- timeout: Timeout in seconds per repository (default: 60)
45
- """
36
+ # Initialize commit extractor.
46
37
  self.max_commits = max_commits
47
38
  self.skip_merges = skip_merges
48
39
  self.days_back = days_back
@@ -51,14 +42,7 @@ class CommitExtractor:
51
42
  self.timeout = timeout
52
43
 
53
44
  def extract_from_repositories(self, repositories: list[dict[str, Any] | Repository]) -> list[dict[str, Any]]:
54
- """Extract commits from list of repositories.
55
-
56
- Args:
57
- repositories: List of repository metadata (dicts or Repository objects)
58
-
59
- Returns:
60
- List of commit data dictionaries
61
- """
45
+ # Extract commits from list of repositories.
62
46
  all_commits = []
63
47
  failed_repos = []
64
48
 
@@ -114,14 +98,7 @@ class CommitExtractor:
114
98
 
115
99
  @retry_on_exception(max_retries=2, delay=5.0, exceptions=(Exception,))
116
100
  def _extract_repo_commits(self, repo: dict[str, Any]) -> list[dict[str, Any]]:
117
- """Extract commits from a single repository using GitHub API.
118
-
119
- Args:
120
- repo: Repository metadata (dict or Repository object)
121
-
122
- Returns:
123
- List of commit dictionaries
124
- """
101
+ # Extract commits from a single repository using GitHub API.
125
102
  commits = []
126
103
  # Handle both Repository objects and dicts
127
104
  repo_name = repo.full_name if isinstance(repo, Repository) else repo["full_name"]
@@ -163,15 +140,7 @@ class CommitExtractor:
163
140
  return commits
164
141
 
165
142
  def _extract_commit_metadata(self, commit, repo_name: str) -> dict[str, Any]:
166
- """Extract metadata from commit object.
167
-
168
- Args:
169
- commit: PyDriller commit object
170
- repo_name: Repository name
171
-
172
- Returns:
173
- Dictionary with commit metadata
174
- """
143
+ # Extract metadata from commit object.
175
144
  # Get modified files
176
145
  files_changed = []
177
146
  lines_added = 0
@@ -205,15 +174,7 @@ class CommitExtractor:
205
174
  }
206
175
 
207
176
  def _extract_commit_metadata_from_github(self, commit, repo_name: str) -> dict[str, Any]:
208
- """Extract metadata from GitHub API commit object.
209
-
210
- Args:
211
- commit: GitHub API commit object
212
- repo_name: Repository name
213
-
214
- Returns:
215
- Dictionary with commit metadata
216
- """
177
+ # Extract metadata from GitHub API commit object.
217
178
  # Get modified files and stats
218
179
  files_changed = []
219
180
  lines_added = 0
@@ -245,13 +206,7 @@ class CommitExtractor:
245
206
  }
246
207
 
247
208
  def save_results(self, commits: list[dict[str, Any]], output_file: Path, repos_count: int):
248
- """Save extracted commits to JSON file.
249
-
250
- Args:
251
- commits: List of commit data
252
- output_file: Output file path
253
- repos_count: Number of repositories processed
254
- """
209
+ # Save extracted commits to JSON file.
255
210
  data = {
256
211
  "metadata": {
257
212
  "extracted_at": format_timestamp(),
@@ -267,102 +222,3 @@ class CommitExtractor:
267
222
 
268
223
  save_json_file(data, output_file)
269
224
  colored_print(f"Saved {len(commits)} commits to {output_file}", "green")
270
-
271
-
272
- @click.command()
273
- @click.option("--max-commits", default=50, help="Maximum commits per repository")
274
- @click.option("--skip-merges/--include-merges", default=True, help="Skip merge commits")
275
- @click.option("--days-back", default=730, help="Only analyze commits from last N days")
276
- @click.option(
277
- "--repos-file", default=None, help="Input repositories file (default: data/repositories.json)"
278
- )
279
- @click.option("--output", default=None, help="Output file path (default: data/commits.json)")
280
- @click.option("--config-file", default=".env", help="Path to .env configuration file")
281
- def extract(
282
- max_commits: int,
283
- skip_merges: bool,
284
- days_back: int,
285
- repos_file: Optional[str],
286
- output: Optional[str],
287
- config_file: str,
288
- ):
289
- """Extract commits from fetched repositories."""
290
- print_banner("Commit Data Extractor")
291
-
292
- try:
293
- # Load configuration
294
- config = get_config(config_file)
295
-
296
- # Determine input/output files
297
- input_file = Path(repos_file) if repos_file else config.REPOS_FILE
298
- output_file = Path(output) if output else config.COMMITS_FILE
299
-
300
- # Check if input file exists
301
- if not input_file.exists():
302
- colored_print(f"Input file not found: {input_file}", "red")
303
- colored_print("Please run 'fetch' command first to fetch repositories", "yellow")
304
- exit(1)
305
-
306
- # Load repositories
307
- colored_print(f"Loading repositories from {input_file}...", "blue")
308
- data = load_json_file(input_file)
309
- repositories = data.get("repositories", [])
310
-
311
- if not repositories:
312
- colored_print("No repositories found in input file", "yellow")
313
- exit(1)
314
-
315
- colored_print(f"Loaded {len(repositories)} repositories", "green")
316
-
317
- # Initialize extractor
318
- extractor = CommitExtractor(
319
- max_commits=max_commits, skip_merges=skip_merges, days_back=days_back
320
- )
321
-
322
- # Extract commits
323
- commits = extractor.extract_from_repositories(repositories)
324
-
325
- if not commits:
326
- colored_print("No commits extracted", "yellow")
327
- exit(1)
328
-
329
- # Save results
330
- extractor.save_results(commits, output_file, len(repositories))
331
-
332
- # Display summary
333
- colored_print(f"\n✓ Successfully extracted {len(commits)} commits", "green")
334
- colored_print(f"Output saved to: {output_file}", "green")
335
-
336
- # Calculate statistics
337
- avg_commits = len(commits) / len(repositories)
338
- colored_print("\nStatistics:", "cyan")
339
- colored_print(f" Total repositories: {len(repositories)}", "white")
340
- colored_print(f" Total commits: {len(commits)}", "white")
341
- colored_print(f" Average commits per repo: {avg_commits:.1f}", "white")
342
-
343
- # Show language breakdown
344
- from collections import Counter
345
-
346
- repo_languages = [repo["language"] for repo in repositories if repo.get("language")]
347
- language_counts = Counter(repo_languages)
348
-
349
- colored_print("\nLanguage breakdown:", "cyan")
350
- for lang, count in language_counts.most_common(5):
351
- colored_print(f" {lang}: {count} repos", "white")
352
-
353
- except FileNotFoundError as e:
354
- colored_print(f"File not found: {e}", "red")
355
- exit(1)
356
- except json.JSONDecodeError:
357
- colored_print(f"Invalid JSON in input file: {input_file}", "red")
358
- exit(1)
359
- except Exception as e:
360
- colored_print(f"Error: {e}", "red")
361
- import traceback
362
-
363
- traceback.print_exc()
364
- exit(1)
365
-
366
-
367
- if __name__ == "__main__":
368
- extract()
@@ -1,4 +1,4 @@
1
- """Data aggregator for green microservices analysis results."""
1
+ # Data aggregator for green microservices analysis results.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -7,11 +7,10 @@ from collections import defaultdict
7
7
  from pathlib import Path
8
8
  from typing import Any, Dict, List, Optional
9
9
 
10
- import click
11
10
  import pandas as pd
12
11
 
13
12
  from greenmining.analyzers import (
14
- EnhancedStatisticalAnalyzer,
13
+ StatisticalAnalyzer,
15
14
  TemporalAnalyzer,
16
15
  QualitativeAnalyzer,
17
16
  )
@@ -29,27 +28,21 @@ from greenmining.utils import (
29
28
 
30
29
 
31
30
  class DataAggregator:
32
- """Aggregates analysis results and generates statistics."""
31
+ # Aggregates analysis results and generates statistics.
33
32
 
34
33
  def __init__(
35
34
  self,
36
- enable_enhanced_stats: bool = False,
35
+ enable_stats: bool = False,
37
36
  enable_temporal: bool = False,
38
37
  temporal_granularity: str = "quarter",
39
38
  ):
40
- """Initialize aggregator.
41
-
42
- Args:
43
- enable_enhanced_stats: Enable enhanced statistical analysis
44
- enable_temporal: Enable temporal trend analysis
45
- temporal_granularity: Granularity for temporal analysis (day/week/month/quarter/year)
46
- """
47
- self.enable_enhanced_stats = enable_enhanced_stats
39
+ # Initialize aggregator.
40
+ self.enable_stats = enable_stats
48
41
  self.enable_temporal = enable_temporal
49
42
 
50
- if self.enable_enhanced_stats:
51
- self.statistical_analyzer = EnhancedStatisticalAnalyzer()
52
- colored_print("Enhanced statistical analysis enabled", "cyan")
43
+ if self.enable_stats:
44
+ self.statistical_analyzer = StatisticalAnalyzer()
45
+ colored_print("Statistical analysis enabled", "cyan")
53
46
  else:
54
47
  self.statistical_analyzer = None
55
48
 
@@ -64,15 +57,7 @@ class DataAggregator:
64
57
  def aggregate(
65
58
  self, analysis_results: list[dict[str, Any]], repositories: list[dict[str, Any]]
66
59
  ) -> dict[str, Any]:
67
- """Aggregate analysis results into summary statistics.
68
-
69
- Args:
70
- analysis_results: List of commit analysis results
71
- repositories: List of repository metadata
72
-
73
- Returns:
74
- Aggregated statistics dictionary
75
- """
60
+ # Aggregate analysis results into summary statistics.
76
61
  colored_print("\nAggregating analysis results...", "cyan")
77
62
 
78
63
  # Summary statistics
@@ -90,15 +75,15 @@ class DataAggregator:
90
75
  # Per-language statistics
91
76
  per_language_stats = self._generate_language_stats(analysis_results, repositories)
92
77
 
93
- # Enhanced statistical analysis (if enabled)
94
- enhanced_stats = None
95
- if self.enable_enhanced_stats and len(analysis_results) > 0:
78
+ # Statistical analysis (if enabled)
79
+ stats_analysis = None
80
+ if self.enable_stats and len(analysis_results) > 0:
96
81
  try:
97
- enhanced_stats = self._generate_enhanced_statistics(analysis_results)
98
- colored_print(" Enhanced statistical analysis complete", "green")
82
+ stats_analysis = self._generate_statistics(analysis_results)
83
+ colored_print(" Statistical analysis complete", "green")
99
84
  except Exception as e:
100
- colored_print(f"⚠️ Enhanced statistics failed: {e}", "yellow")
101
- enhanced_stats = {"error": str(e)}
85
+ colored_print(f" Statistics failed: {e}", "yellow")
86
+ stats_analysis = {"error": str(e)}
102
87
 
103
88
  # Temporal trend analysis (if enabled)
104
89
  temporal_analysis = None
@@ -116,9 +101,9 @@ class DataAggregator:
116
101
  ]
117
102
 
118
103
  temporal_analysis = self.temporal_analyzer.analyze_trends(commits, analysis_results)
119
- colored_print(" Temporal trend analysis complete", "green")
104
+ colored_print(" Temporal trend analysis complete", "green")
120
105
  except Exception as e:
121
- colored_print(f"⚠️ Temporal analysis failed: {e}", "yellow")
106
+ colored_print(f" Temporal analysis failed: {e}", "yellow")
122
107
  temporal_analysis = {"error": str(e)}
123
108
 
124
109
  result = {
@@ -129,8 +114,8 @@ class DataAggregator:
129
114
  "per_language_stats": per_language_stats,
130
115
  }
131
116
 
132
- if enhanced_stats:
133
- result["enhanced_statistics"] = enhanced_stats
117
+ if stats_analysis:
118
+ result["statistics"] = stats_analysis
134
119
 
135
120
  if temporal_analysis:
136
121
  result["temporal_analysis"] = temporal_analysis
@@ -140,7 +125,7 @@ class DataAggregator:
140
125
  def _generate_summary(
141
126
  self, results: list[dict[str, Any]], repos: list[dict[str, Any]]
142
127
  ) -> dict[str, Any]:
143
- """Generate overall summary statistics."""
128
+ # Generate overall summary statistics.
144
129
  total_commits = len(results)
145
130
  green_aware_count = sum(1 for r in results if r.get("green_aware", False))
146
131
 
@@ -158,7 +143,7 @@ class DataAggregator:
158
143
  }
159
144
 
160
145
  def _analyze_known_patterns(self, results: list[dict[str, Any]]) -> list[dict[str, Any]]:
161
- """Analyze known green software patterns."""
146
+ # Analyze known green software patterns.
162
147
  pattern_data = defaultdict(
163
148
  lambda: {"count": 0, "HIGH": 0, "MEDIUM": 0, "LOW": 0, "example_commits": []}
164
149
  )
@@ -209,7 +194,7 @@ class DataAggregator:
209
194
  return patterns_list
210
195
 
211
196
  def _analyze_emergent_patterns(self, results: list[dict[str, Any]]) -> list[dict[str, Any]]:
212
- """Analyze emergent patterns (placeholder for manual review)."""
197
+ # Analyze emergent patterns (placeholder for manual review).
213
198
  emergent = []
214
199
 
215
200
  for result in results:
@@ -228,7 +213,7 @@ class DataAggregator:
228
213
  def _generate_repo_stats(
229
214
  self, results: list[dict[str, Any]], repos: list[dict[str, Any]]
230
215
  ) -> list[dict[str, Any]]:
231
- """Generate per-repository statistics."""
216
+ # Generate per-repository statistics.
232
217
  repo_commits = defaultdict(list)
233
218
 
234
219
  # Group commits by repository
@@ -270,7 +255,7 @@ class DataAggregator:
270
255
  def _generate_language_stats(
271
256
  self, results: list[dict[str, Any]], repos: list[dict[str, Any]]
272
257
  ) -> list[dict[str, Any]]:
273
- """Generate per-language statistics."""
258
+ # Generate per-language statistics.
274
259
  # Create repo name to language mapping (handle both Repository objects and dicts)
275
260
  repo_language_map = {}
276
261
  for repo in repos:
@@ -306,23 +291,16 @@ class DataAggregator:
306
291
 
307
292
  return language_stats
308
293
 
309
- def _generate_enhanced_statistics(self, results: list[dict[str, Any]]) -> dict[str, Any]:
310
- """Generate enhanced statistical analysis.
311
-
312
- Args:
313
- results: List of commit analysis results
314
-
315
- Returns:
316
- Dictionary with enhanced statistical analysis
317
- """
294
+ def _generate_statistics(self, results: list[dict[str, Any]]) -> dict[str, Any]:
295
+ # Generate statistical analysis.
318
296
  # Prepare DataFrame
319
297
  df = pd.DataFrame(results)
320
298
 
321
299
  # Ensure required columns exist
322
300
  if "date" not in df.columns or "green_aware" not in df.columns:
323
- return {"error": "Missing required columns for enhanced statistics"}
301
+ return {"error": "Missing required columns for statistics"}
324
302
 
325
- enhanced_stats = {}
303
+ stats_result = {}
326
304
 
327
305
  # 1. Temporal Trend Analysis
328
306
  if len(df) >= 8: # Need at least 8 data points
@@ -330,7 +308,7 @@ class DataAggregator:
330
308
  df_copy = df.copy()
331
309
  df_copy["commit_hash"] = df_copy.get("commit_hash", df_copy.index)
332
310
  trends = self.statistical_analyzer.temporal_trend_analysis(df_copy)
333
- enhanced_stats["temporal_trends"] = {
311
+ stats_result["temporal_trends"] = {
334
312
  "trend_direction": trends["trend"]["direction"],
335
313
  "correlation": float(trends["trend"]["correlation"]),
336
314
  "p_value": float(trends["trend"]["p_value"]),
@@ -338,20 +316,20 @@ class DataAggregator:
338
316
  "monthly_data_points": len(trends.get("monthly_data", {})),
339
317
  }
340
318
  except Exception as e:
341
- enhanced_stats["temporal_trends"] = {"error": str(e)}
319
+ stats_result["temporal_trends"] = {"error": str(e)}
342
320
 
343
321
  # 2. Pattern Correlation Analysis (if pattern columns exist)
344
322
  pattern_cols = [col for col in df.columns if col.startswith("pattern_")]
345
323
  if pattern_cols and len(pattern_cols) >= 2:
346
324
  try:
347
325
  correlations = self.statistical_analyzer.analyze_pattern_correlations(df)
348
- enhanced_stats["pattern_correlations"] = {
326
+ stats_result["pattern_correlations"] = {
349
327
  "significant_pairs_count": len(correlations["significant_pairs"]),
350
328
  "significant_pairs": correlations["significant_pairs"][:5], # Top 5
351
329
  "interpretation": correlations["interpretation"],
352
330
  }
353
331
  except Exception as e:
354
- enhanced_stats["pattern_correlations"] = {"error": str(e)}
332
+ stats_result["pattern_correlations"] = {"error": str(e)}
355
333
 
356
334
  # 3. Effect Size Analysis by Repository
357
335
  if "repository" in df.columns:
@@ -369,7 +347,7 @@ class DataAggregator:
369
347
  effect = self.statistical_analyzer.effect_size_analysis(
370
348
  list(group1), list(group2)
371
349
  )
372
- enhanced_stats["effect_size"] = {
350
+ stats_result["effect_size"] = {
373
351
  "cohens_d": float(effect["cohens_d"]),
374
352
  "magnitude": effect["magnitude"],
375
353
  "mean_difference": float(effect["mean_difference"]),
@@ -377,10 +355,10 @@ class DataAggregator:
377
355
  "comparison": "high_green_vs_low_green_repos",
378
356
  }
379
357
  except Exception as e:
380
- enhanced_stats["effect_size"] = {"error": str(e)}
358
+ stats_result["effect_size"] = {"error": str(e)}
381
359
 
382
360
  # 4. Basic descriptive statistics
383
- enhanced_stats["descriptive"] = {
361
+ stats_result["descriptive"] = {
384
362
  "total_commits": len(df),
385
363
  "green_commits": int(df["green_aware"].sum()),
386
364
  "green_rate_mean": float(df["green_aware"].mean()),
@@ -390,7 +368,7 @@ class DataAggregator:
390
368
  ),
391
369
  }
392
370
 
393
- return enhanced_stats
371
+ return stats_result
394
372
 
395
373
  def save_results(
396
374
  self,
@@ -399,14 +377,7 @@ class DataAggregator:
399
377
  csv_file: Path,
400
378
  analysis_results: list[dict[str, Any]],
401
379
  ):
402
- """Save aggregated results to JSON and CSV files.
403
-
404
- Args:
405
- aggregated_data: Aggregated statistics
406
- json_file: JSON output file path
407
- csv_file: CSV output file path
408
- analysis_results: Original analysis results for CSV
409
- """
380
+ # Save aggregated results to JSON and CSV files.
410
381
  # Save JSON
411
382
  save_json_file(aggregated_data, json_file)
412
383
  colored_print(f"Saved aggregated statistics to {json_file}", "green")
@@ -434,17 +405,17 @@ class DataAggregator:
434
405
  colored_print(f"Saved detailed results to {csv_file}", "green")
435
406
 
436
407
  def print_summary(self, aggregated_data: dict[str, Any]):
437
- """Print summary to console."""
408
+ # Print summary to console.
438
409
  from tabulate import tabulate
439
410
 
440
411
  summary = aggregated_data["summary"]
441
412
 
442
413
  colored_print("\n" + "=" * 60, "cyan")
443
- colored_print("📊 AGGREGATED STATISTICS SUMMARY", "cyan")
414
+ colored_print(" AGGREGATED STATISTICS SUMMARY", "cyan")
444
415
  colored_print("=" * 60, "cyan")
445
416
 
446
417
  # Overall summary
447
- colored_print("\n📈 Overall Statistics:", "blue")
418
+ colored_print("\n Overall Statistics:", "blue")
448
419
  summary_table = [
449
420
  ["Total Commits Analyzed", format_number(summary["total_commits"])],
450
421
  [
@@ -458,7 +429,7 @@ class DataAggregator:
458
429
 
459
430
  # Top patterns
460
431
  if aggregated_data["known_patterns"]:
461
- colored_print("\n🎯 Top Green Patterns Detected:", "blue")
432
+ colored_print("\n Top Green Patterns Detected:", "blue")
462
433
  pattern_table = []
463
434
  for pattern in aggregated_data["known_patterns"][:10]:
464
435
  pattern_table.append(
@@ -479,7 +450,7 @@ class DataAggregator:
479
450
 
480
451
  # Top repositories
481
452
  if aggregated_data["per_repo_stats"]:
482
- colored_print("\n🏆 Top 10 Greenest Repositories:", "blue")
453
+ colored_print("\n Top 10 Greenest Repositories:", "blue")
483
454
  repo_table = []
484
455
  for repo in aggregated_data["per_repo_stats"][:10]:
485
456
  repo_table.append(
@@ -498,7 +469,7 @@ class DataAggregator:
498
469
 
499
470
  # Language breakdown
500
471
  if aggregated_data["per_language_stats"]:
501
- colored_print("\n💻 Language Breakdown:", "blue")
472
+ colored_print("\n Language Breakdown:", "blue")
502
473
  lang_table = []
503
474
  for lang in aggregated_data["per_language_stats"]:
504
475
  lang_table.append(
@@ -512,104 +483,3 @@ class DataAggregator:
512
483
  print(
513
484
  tabulate(lang_table, headers=["Language", "Total", "Green", "%"], tablefmt="simple")
514
485
  )
515
-
516
-
517
- @click.command()
518
- @click.option(
519
- "--analysis-file",
520
- default=None,
521
- help="Input analysis file (default: data/analysis_results.json)",
522
- )
523
- @click.option(
524
- "--repos-file", default=None, help="Input repositories file (default: data/repositories.json)"
525
- )
526
- @click.option(
527
- "--output-json",
528
- default=None,
529
- help="Output JSON file (default: data/aggregated_statistics.json)",
530
- )
531
- @click.option(
532
- "--output-csv", default=None, help="Output CSV file (default: data/green_analysis_results.csv)"
533
- )
534
- @click.option("--config-file", default=".env", help="Path to .env configuration file")
535
- def aggregate(
536
- analysis_file: Optional[str],
537
- repos_file: Optional[str],
538
- output_json: Optional[str],
539
- output_csv: Optional[str],
540
- config_file: str,
541
- ):
542
- """Aggregate analysis results and generate statistics."""
543
- print_banner("Data Aggregator")
544
-
545
- try:
546
- # Load configuration
547
- config = get_config(config_file)
548
-
549
- # Determine input/output files
550
- analysis_input = Path(analysis_file) if analysis_file else config.ANALYSIS_FILE
551
- repos_input = Path(repos_file) if repos_file else config.REPOS_FILE
552
- json_output = Path(output_json) if output_json else config.AGGREGATED_FILE
553
- csv_output = Path(output_csv) if output_csv else config.CSV_FILE
554
-
555
- # Check if input files exist
556
- if not analysis_input.exists():
557
- colored_print(f"Analysis file not found: {analysis_input}", "red")
558
- colored_print("Please run 'analyze' command first", "yellow")
559
- exit(1)
560
-
561
- if not repos_input.exists():
562
- colored_print(f"Repositories file not found: {repos_input}", "red")
563
- colored_print("Please run 'fetch' command first", "yellow")
564
- exit(1)
565
-
566
- # Load data
567
- colored_print(f"Loading analysis results from {analysis_input}...", "blue")
568
- analysis_data = load_json_file(analysis_input)
569
- analysis_results = analysis_data.get("results", [])
570
-
571
- colored_print(f"Loading repositories from {repos_input}...", "blue")
572
- repos_data = load_json_file(repos_input)
573
- repositories = repos_data.get("repositories", [])
574
-
575
- if not analysis_results:
576
- colored_print("No analysis results found", "yellow")
577
- exit(1)
578
-
579
- colored_print(
580
- f"Loaded {len(analysis_results)} analysis results and {len(repositories)} repositories",
581
- "green",
582
- )
583
-
584
- # Initialize aggregator
585
- aggregator = DataAggregator()
586
-
587
- # Aggregate data
588
- aggregated_data = aggregator.aggregate(analysis_results, repositories)
589
-
590
- # Save results
591
- aggregator.save_results(aggregated_data, json_output, csv_output, analysis_results)
592
-
593
- # Print summary
594
- aggregator.print_summary(aggregated_data)
595
-
596
- colored_print("\n✓ Aggregation complete!", "green")
597
- colored_print(f"JSON output: {json_output}", "green")
598
- colored_print(f"CSV output: {csv_output}", "green")
599
-
600
- except FileNotFoundError as e:
601
- colored_print(f"File not found: {e}", "red")
602
- exit(1)
603
- except json.JSONDecodeError as e:
604
- colored_print(f"Invalid JSON: {e}", "red")
605
- exit(1)
606
- except Exception as e:
607
- colored_print(f"Error: {e}", "red")
608
- import traceback
609
-
610
- traceback.print_exc()
611
- exit(1)
612
-
613
-
614
- if __name__ == "__main__":
615
- aggregate()