greenmining 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. greenmining/__init__.py +11 -29
  2. greenmining/__main__.py +9 -3
  3. greenmining/__version__.py +2 -2
  4. greenmining/analyzers/__init__.py +3 -7
  5. greenmining/analyzers/code_diff_analyzer.py +151 -61
  6. greenmining/analyzers/qualitative_analyzer.py +15 -81
  7. greenmining/analyzers/statistical_analyzer.py +8 -69
  8. greenmining/analyzers/temporal_analyzer.py +16 -72
  9. greenmining/config.py +105 -58
  10. greenmining/controllers/__init__.py +1 -5
  11. greenmining/controllers/repository_controller.py +153 -94
  12. greenmining/energy/__init__.py +13 -0
  13. greenmining/energy/base.py +165 -0
  14. greenmining/energy/codecarbon_meter.py +146 -0
  15. greenmining/energy/rapl.py +157 -0
  16. greenmining/gsf_patterns.py +4 -26
  17. greenmining/models/__init__.py +1 -5
  18. greenmining/models/aggregated_stats.py +4 -4
  19. greenmining/models/analysis_result.py +4 -4
  20. greenmining/models/commit.py +5 -5
  21. greenmining/models/repository.py +5 -5
  22. greenmining/presenters/__init__.py +1 -5
  23. greenmining/presenters/console_presenter.py +24 -24
  24. greenmining/services/__init__.py +10 -6
  25. greenmining/services/commit_extractor.py +8 -152
  26. greenmining/services/data_aggregator.py +45 -175
  27. greenmining/services/data_analyzer.py +9 -202
  28. greenmining/services/github_fetcher.py +212 -323
  29. greenmining/services/github_graphql_fetcher.py +371 -0
  30. greenmining/services/local_repo_analyzer.py +387 -0
  31. greenmining/services/reports.py +33 -137
  32. greenmining/utils.py +21 -149
  33. {greenmining-1.0.2.dist-info → greenmining-1.0.4.dist-info}/METADATA +169 -146
  34. greenmining-1.0.4.dist-info/RECORD +37 -0
  35. {greenmining-1.0.2.dist-info → greenmining-1.0.4.dist-info}/WHEEL +1 -1
  36. greenmining/analyzers/ml_feature_extractor.py +0 -512
  37. greenmining/analyzers/nlp_analyzer.py +0 -365
  38. greenmining/cli.py +0 -471
  39. greenmining/main.py +0 -37
  40. greenmining-1.0.2.dist-info/RECORD +0 -36
  41. greenmining-1.0.2.dist-info/entry_points.txt +0 -2
  42. {greenmining-1.0.2.dist-info → greenmining-1.0.4.dist-info}/licenses/LICENSE +0 -0
  43. {greenmining-1.0.2.dist-info → greenmining-1.0.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,157 @@
1
+ # Intel RAPL (Running Average Power Limit) energy measurement for Linux.
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import time
7
+ from datetime import datetime
8
+ from pathlib import Path
9
+ from typing import Dict, List, Optional
10
+
11
+ from .base import EnergyMeter, EnergyMetrics, EnergyBackend
12
+
13
+
14
+ class RAPLEnergyMeter(EnergyMeter):
15
+ # Energy measurement using Intel RAPL on Linux.
16
+
17
+ RAPL_PATH = Path("/sys/class/powercap/intel-rapl")
18
+
19
+ def __init__(self):
20
+ # Initialize RAPL energy meter.
21
+ super().__init__(EnergyBackend.RAPL)
22
+ self._domains: Dict[str, Path] = {}
23
+ self._start_energy: Dict[str, int] = {}
24
+ self._start_time: Optional[float] = None
25
+ self._power_samples: List[float] = []
26
+ self._discover_domains()
27
+
28
+ def _discover_domains(self) -> None:
29
+ # Discover available RAPL domains.
30
+ if not self.RAPL_PATH.exists():
31
+ return
32
+
33
+ # Find all RAPL domains (intel-rapl:0, intel-rapl:0:0, etc.)
34
+ for domain_path in self.RAPL_PATH.glob("intel-rapl:*"):
35
+ if (domain_path / "energy_uj").exists():
36
+ # Get domain name
37
+ name_file = domain_path / "name"
38
+ if name_file.exists():
39
+ domain_name = name_file.read_text().strip()
40
+ else:
41
+ domain_name = domain_path.name
42
+
43
+ self._domains[domain_name] = domain_path / "energy_uj"
44
+
45
+ # Check for sub-domains (core, uncore, dram, etc.)
46
+ for subdomain_path in domain_path.glob("intel-rapl:*:*"):
47
+ if (subdomain_path / "energy_uj").exists():
48
+ name_file = subdomain_path / "name"
49
+ if name_file.exists():
50
+ subdomain_name = name_file.read_text().strip()
51
+ else:
52
+ subdomain_name = subdomain_path.name
53
+
54
+ self._domains[subdomain_name] = subdomain_path / "energy_uj"
55
+
56
+ def _read_energy(self, path: Path) -> int:
57
+ # Read energy value in microjoules from a RAPL file.
58
+ try:
59
+ return int(path.read_text().strip())
60
+ except (PermissionError, FileNotFoundError, ValueError):
61
+ return 0
62
+
63
+ def is_available(self) -> bool:
64
+ # Check if RAPL is available on this system.
65
+ if not self.RAPL_PATH.exists():
66
+ return False
67
+
68
+ if not self._domains:
69
+ return False
70
+
71
+ # Try to read at least one domain
72
+ for path in self._domains.values():
73
+ try:
74
+ self._read_energy(path)
75
+ return True
76
+ except Exception:
77
+ continue
78
+
79
+ return False
80
+
81
+ def start(self) -> None:
82
+ # Start energy measurement.
83
+ if self._is_measuring:
84
+ raise RuntimeError("Already measuring energy")
85
+
86
+ self._is_measuring = True
87
+ self._start_time = time.time()
88
+ self._power_samples = []
89
+
90
+ # Read starting energy values for all domains
91
+ self._start_energy = {
92
+ name: self._read_energy(path)
93
+ for name, path in self._domains.items()
94
+ }
95
+
96
+ def stop(self) -> EnergyMetrics:
97
+ # Stop energy measurement and return results.
98
+ if not self._is_measuring:
99
+ raise RuntimeError("Not currently measuring energy")
100
+
101
+ end_time = time.time()
102
+ self._is_measuring = False
103
+
104
+ # Read ending energy values
105
+ end_energy = {
106
+ name: self._read_energy(path)
107
+ for name, path in self._domains.items()
108
+ }
109
+
110
+ # Calculate energy consumption per domain (in joules)
111
+ duration = end_time - self._start_time
112
+
113
+ # Handle counter wrap-around (RAPL counters are typically 32-bit)
114
+ MAX_ENERGY_UJ = 2**32
115
+
116
+ domain_energy = {}
117
+ for name in self._domains:
118
+ start = self._start_energy.get(name, 0)
119
+ end = end_energy.get(name, 0)
120
+
121
+ if end >= start:
122
+ delta_uj = end - start
123
+ else:
124
+ # Counter wrapped around
125
+ delta_uj = (MAX_ENERGY_UJ - start) + end
126
+
127
+ domain_energy[name] = delta_uj / 1_000_000 # Convert to joules
128
+
129
+ # Aggregate metrics
130
+ total_joules = sum(domain_energy.values())
131
+
132
+ # Extract component-specific energy
133
+ cpu_energy = domain_energy.get("core", 0) or domain_energy.get("package-0", total_joules)
134
+ dram_energy = domain_energy.get("dram", 0)
135
+ gpu_energy = domain_energy.get("uncore", None) # Integrated GPU
136
+
137
+ # Calculate power
138
+ watts_avg = total_joules / duration if duration > 0 else 0
139
+
140
+ return EnergyMetrics(
141
+ joules=total_joules,
142
+ watts_avg=watts_avg,
143
+ watts_peak=watts_avg, # RAPL doesn't provide instantaneous peak
144
+ duration_seconds=duration,
145
+ cpu_energy_joules=cpu_energy,
146
+ dram_energy_joules=dram_energy,
147
+ gpu_energy_joules=gpu_energy,
148
+ carbon_grams=None, # RAPL doesn't track carbon
149
+ carbon_intensity=None,
150
+ backend="rapl",
151
+ start_time=datetime.fromtimestamp(self._start_time),
152
+ end_time=datetime.fromtimestamp(end_time),
153
+ )
154
+
155
+ def get_available_domains(self) -> List[str]:
156
+ # Get list of available RAPL domains.
157
+ return list(self._domains.keys())
@@ -1,10 +1,4 @@
1
- """
2
- Green Software Foundation Patterns
3
- Official patterns from https://patterns.greensoftware.foundation/
4
-
5
- Categories: Cloud (40+ patterns), Web (15+ patterns), AI/ML (10+ patterns)
6
- Total: 65+ official GSF patterns
7
- """
1
+ # Green Software Foundation Patterns
8
2
 
9
3
  GSF_PATTERNS = {
10
4
  # ==================== CLOUD PATTERNS (40+) ====================
@@ -1219,7 +1213,7 @@ GSF_PATTERNS = {
1219
1213
  },
1220
1214
  }
1221
1215
 
1222
- # Green software keywords (comprehensive list from all GSF patterns + VU Amsterdam research)
1216
+ # Green software keywords (comprehensive list from all GSF patterns)
1223
1217
  GREEN_KEYWORDS = [
1224
1218
  # Core sustainability terms
1225
1219
  "energy",
@@ -1565,15 +1559,7 @@ GREEN_KEYWORDS = [
1565
1559
 
1566
1560
 
1567
1561
  def get_pattern_by_keywords(commit_message: str) -> list:
1568
- """
1569
- Match commit message against GSF patterns.
1570
-
1571
- Args:
1572
- commit_message: The commit message to analyze
1573
-
1574
- Returns:
1575
- List of matched pattern names
1576
- """
1562
+ # Match commit message against GSF patterns.
1577
1563
  message_lower = commit_message.lower()
1578
1564
  matched_patterns = []
1579
1565
 
@@ -1587,14 +1573,6 @@ def get_pattern_by_keywords(commit_message: str) -> list:
1587
1573
 
1588
1574
 
1589
1575
  def is_green_aware(commit_message: str) -> bool:
1590
- """
1591
- Check if commit shows green software awareness.
1592
-
1593
- Args:
1594
- commit_message: The commit message to analyze
1595
-
1596
- Returns:
1597
- True if commit has green keywords
1598
- """
1576
+ # Check if commit shows green software awareness.
1599
1577
  message_lower = commit_message.lower()
1600
1578
  return any(keyword.lower() in message_lower for keyword in GREEN_KEYWORDS)
@@ -1,8 +1,4 @@
1
- """
2
- Models Package - Data models and entities for green microservices mining.
3
-
4
- This package contains all data structures and domain models following MCP architecture.
5
- """
1
+ # Models Package - Data models and entities for green microservices mining.
6
2
 
7
3
  from .aggregated_stats import AggregatedStats
8
4
  from .analysis_result import AnalysisResult
@@ -1,4 +1,4 @@
1
- """Aggregated Statistics Model - Represents aggregated analysis data."""
1
+ # Aggregated Statistics Model - Represents aggregated analysis data.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -8,7 +8,7 @@ from typing import Dict, List, Optional
8
8
 
9
9
  @dataclass
10
10
  class AggregatedStats:
11
- """Data model for aggregated statistics."""
11
+ # Data model for aggregated statistics.
12
12
 
13
13
  summary: dict = field(default_factory=dict)
14
14
  known_patterns: dict = field(default_factory=dict)
@@ -17,7 +17,7 @@ class AggregatedStats:
17
17
  timestamp: Optional[str] = None
18
18
 
19
19
  def to_dict(self) -> dict:
20
- """Convert to dictionary."""
20
+ # Convert to dictionary.
21
21
  return {
22
22
  "summary": self.summary,
23
23
  "known_patterns": self.known_patterns,
@@ -28,5 +28,5 @@ class AggregatedStats:
28
28
 
29
29
  @classmethod
30
30
  def from_dict(cls, data: dict) -> "AggregatedStats":
31
- """Create from dictionary."""
31
+ # Create from dictionary.
32
32
  return cls(**{k: v for k, v in data.items() if k in cls.__annotations__})
@@ -1,4 +1,4 @@
1
- """Analysis Result Model - Represents commit analysis output."""
1
+ # Analysis Result Model - Represents commit analysis output.
2
2
 
3
3
  from dataclasses import dataclass
4
4
  from typing import Optional
@@ -6,7 +6,7 @@ from typing import Optional
6
6
 
7
7
  @dataclass
8
8
  class AnalysisResult:
9
- """Data model for commit analysis results."""
9
+ # Data model for commit analysis results.
10
10
 
11
11
  commit_id: str
12
12
  repo_name: str
@@ -26,7 +26,7 @@ class AnalysisResult:
26
26
  self.files_changed = []
27
27
 
28
28
  def to_dict(self) -> dict:
29
- """Convert to dictionary."""
29
+ # Convert to dictionary.
30
30
  return {
31
31
  "commit_id": self.commit_id,
32
32
  "repo_name": self.repo_name,
@@ -44,5 +44,5 @@ class AnalysisResult:
44
44
 
45
45
  @classmethod
46
46
  def from_dict(cls, data: dict) -> "AnalysisResult":
47
- """Create from dictionary."""
47
+ # Create from dictionary.
48
48
  return cls(**{k: v for k, v in data.items() if k in cls.__annotations__})
@@ -1,4 +1,4 @@
1
- """Commit Model - Represents a Git commit."""
1
+ # Commit Model - Represents a Git commit.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -8,7 +8,7 @@ from typing import List
8
8
 
9
9
  @dataclass
10
10
  class Commit:
11
- """Data model for a Git commit."""
11
+ # Data model for a Git commit.
12
12
 
13
13
  commit_id: str
14
14
  repo_name: str
@@ -26,7 +26,7 @@ class Commit:
26
26
  in_main_branch: bool = True
27
27
 
28
28
  def to_dict(self) -> dict:
29
- """Convert to dictionary."""
29
+ # Convert to dictionary.
30
30
  return {
31
31
  "commit_id": self.commit_id,
32
32
  "repo_name": self.repo_name,
@@ -46,12 +46,12 @@ class Commit:
46
46
 
47
47
  @classmethod
48
48
  def from_dict(cls, data: dict) -> "Commit":
49
- """Create from dictionary."""
49
+ # Create from dictionary.
50
50
  return cls(**{k: v for k, v in data.items() if k in cls.__annotations__})
51
51
 
52
52
  @classmethod
53
53
  def from_pydriller_commit(cls, commit, repo_name: str) -> "Commit":
54
- """Create from PyDriller commit object."""
54
+ # Create from PyDriller commit object.
55
55
  return cls(
56
56
  commit_id=commit.hash,
57
57
  repo_name=repo_name,
@@ -1,4 +1,4 @@
1
- """Repository Model - Represents a GitHub repository."""
1
+ # Repository Model - Represents a GitHub repository.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -8,7 +8,7 @@ from typing import List, Optional
8
8
 
9
9
  @dataclass
10
10
  class Repository:
11
- """Data model for a GitHub repository."""
11
+ # Data model for a GitHub repository.
12
12
 
13
13
  repo_id: int
14
14
  name: str
@@ -33,7 +33,7 @@ class Repository:
33
33
  license: Optional[str] = None
34
34
 
35
35
  def to_dict(self) -> dict:
36
- """Convert to dictionary."""
36
+ # Convert to dictionary.
37
37
  return {
38
38
  "repo_id": self.repo_id,
39
39
  "name": self.name,
@@ -60,12 +60,12 @@ class Repository:
60
60
 
61
61
  @classmethod
62
62
  def from_dict(cls, data: dict) -> "Repository":
63
- """Create from dictionary."""
63
+ # Create from dictionary.
64
64
  return cls(**{k: v for k, v in data.items() if k in cls.__annotations__})
65
65
 
66
66
  @classmethod
67
67
  def from_github_repo(cls, repo, repo_id: int) -> "Repository":
68
- """Create from PyGithub repository object."""
68
+ # Create from PyGithub repository object.
69
69
  return cls(
70
70
  repo_id=repo_id,
71
71
  name=repo.name,
@@ -1,8 +1,4 @@
1
- """
2
- Presenters Package - UI/CLI presentation layer.
3
-
4
- Presenters handle output formatting and user interaction.
5
- """
1
+ # Presenters Package - UI/CLI presentation layer.
6
2
 
7
3
  from .console_presenter import ConsolePresenter
8
4
 
@@ -1,4 +1,4 @@
1
- """Console Presenter - Handles console output formatting."""
1
+ # Console Presenter - Handles console output formatting.
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -10,26 +10,26 @@ from greenmining.utils import colored_print
10
10
 
11
11
 
12
12
  class ConsolePresenter:
13
- """Presenter for console/terminal output."""
13
+ # Presenter for console/terminal output.
14
14
 
15
15
  @staticmethod
16
16
  def show_banner():
17
- """Display application banner."""
17
+ # Display application banner.
18
18
  banner = """
19
- ╔══════════════════════════════════════════════════════════╗
20
- Green Microservices Mining
21
- ╚══════════════════════════════════════════════════════════╝
19
+
20
+ Green Microservices Mining
21
+
22
22
  """
23
23
  colored_print(banner, "green")
24
24
 
25
25
  @staticmethod
26
26
  def show_repositories(repositories: list[dict], limit: int = 10):
27
- """Display repository table."""
27
+ # Display repository table.
28
28
  if not repositories:
29
29
  colored_print("No repositories to display", "yellow")
30
30
  return
31
31
 
32
- colored_print(f"\n📊 Top {min(limit, len(repositories))} Repositories:\n", "cyan")
32
+ colored_print(f"\n Top {min(limit, len(repositories))} Repositories:\n", "cyan")
33
33
 
34
34
  table_data = []
35
35
  for repo in repositories[:limit]:
@@ -51,8 +51,8 @@ class ConsolePresenter:
51
51
 
52
52
  @staticmethod
53
53
  def show_commit_stats(stats: dict[str, Any]):
54
- """Display commit statistics."""
55
- colored_print("\n📈 Commit Statistics:\n", "cyan")
54
+ # Display commit statistics.
55
+ colored_print("\n Commit Statistics:\n", "cyan")
56
56
 
57
57
  table_data = [
58
58
  ["Total Commits", f"{stats.get('total_commits', 0):,}"],
@@ -65,8 +65,8 @@ class ConsolePresenter:
65
65
 
66
66
  @staticmethod
67
67
  def show_analysis_results(results: dict[str, Any]):
68
- """Display analysis results."""
69
- colored_print("\n🔬 Analysis Results:\n", "cyan")
68
+ # Display analysis results.
69
+ colored_print("\n Analysis Results:\n", "cyan")
70
70
 
71
71
  summary = results.get("summary", {})
72
72
  table_data = [
@@ -80,12 +80,12 @@ class ConsolePresenter:
80
80
 
81
81
  @staticmethod
82
82
  def show_pattern_distribution(patterns: dict[str, Any], limit: int = 10):
83
- """Display pattern distribution."""
83
+ # Display pattern distribution.
84
84
  if not patterns:
85
85
  colored_print("No patterns to display", "yellow")
86
86
  return
87
87
 
88
- colored_print(f"\n🎯 Top {limit} Green Patterns:\n", "cyan")
88
+ colored_print(f"\n Top {limit} Green Patterns:\n", "cyan")
89
89
 
90
90
  # Sort by count
91
91
  sorted_patterns = sorted(
@@ -108,12 +108,12 @@ class ConsolePresenter:
108
108
 
109
109
  @staticmethod
110
110
  def show_pipeline_status(status: dict[str, Any]):
111
- """Display pipeline status."""
112
- colored_print("\n⚙️ Pipeline Status:\n", "cyan")
111
+ # Display pipeline status.
112
+ colored_print("\n Pipeline Status:\n", "cyan")
113
113
 
114
114
  table_data = []
115
115
  for phase, info in status.items():
116
- status_icon = "" if info.get("completed") else "⏳"
116
+ status_icon = "" if info.get("completed") else "⏳"
117
117
  table_data.append(
118
118
  [status_icon, phase, info.get("file", "N/A"), info.get("size", "N/A")]
119
119
  )
@@ -123,21 +123,21 @@ class ConsolePresenter:
123
123
 
124
124
  @staticmethod
125
125
  def show_progress_message(phase: str, current: int, total: int):
126
- """Display progress message."""
126
+ # Display progress message.
127
127
  percentage = (current / total * 100) if total > 0 else 0
128
128
  colored_print(f"[{phase}] Progress: {current}/{total} ({percentage:.1f}%)", "cyan")
129
129
 
130
130
  @staticmethod
131
131
  def show_error(message: str):
132
- """Display error message."""
133
- colored_print(f" Error: {message}", "red")
132
+ # Display error message.
133
+ colored_print(f" Error: {message}", "red")
134
134
 
135
135
  @staticmethod
136
136
  def show_success(message: str):
137
- """Display success message."""
138
- colored_print(f" {message}", "green")
137
+ # Display success message.
138
+ colored_print(f" {message}", "green")
139
139
 
140
140
  @staticmethod
141
141
  def show_warning(message: str):
142
- """Display warning message."""
143
- colored_print(f"⚠️ Warning: {message}", "yellow")
142
+ # Display warning message.
143
+ colored_print(f" Warning: {message}", "yellow")
@@ -1,13 +1,17 @@
1
- """
2
- Services Package - Core business logic and data processing services.
3
-
4
- Services implement the actual mining, extraction, analysis operations.
5
- """
1
+ # Services Package - Core business logic and data processing services.
6
2
 
7
3
  from .commit_extractor import CommitExtractor
8
4
  from .data_aggregator import DataAggregator
9
5
  from .data_analyzer import DataAnalyzer
10
6
  from .github_fetcher import GitHubFetcher
7
+ from .local_repo_analyzer import LocalRepoAnalyzer
11
8
  from .reports import ReportGenerator
12
9
 
13
- __all__ = ["GitHubFetcher", "CommitExtractor", "DataAnalyzer", "DataAggregator", "ReportGenerator"]
10
+ __all__ = [
11
+ "GitHubFetcher",
12
+ "CommitExtractor",
13
+ "DataAnalyzer",
14
+ "DataAggregator",
15
+ "ReportGenerator",
16
+ "LocalRepoAnalyzer",
17
+ ]