greenmining 1.1.6__tar.gz → 1.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {greenmining-1.1.6 → greenmining-1.1.8}/CHANGELOG.md +11 -1
  2. {greenmining-1.1.6/greenmining.egg-info → greenmining-1.1.8}/PKG-INFO +3 -17
  3. {greenmining-1.1.6 → greenmining-1.1.8}/README.md +2 -14
  4. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/__init__.py +1 -1
  5. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/analyzers/metrics_power_correlator.py +1 -1
  6. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/analyzers/power_regression.py +0 -1
  7. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/analyzers/qualitative_analyzer.py +1 -1
  8. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/analyzers/statistical_analyzer.py +0 -32
  9. greenmining-1.1.8/greenmining/config.py +91 -0
  10. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/controllers/repository_controller.py +0 -2
  11. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/energy/codecarbon_meter.py +0 -21
  12. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/energy/cpu_meter.py +1 -1
  13. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/gsf_patterns.py +41 -0
  14. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/models/aggregated_stats.py +1 -1
  15. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/models/commit.py +0 -1
  16. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/models/repository.py +1 -1
  17. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/services/commit_extractor.py +2 -41
  18. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/services/data_aggregator.py +1 -6
  19. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/services/data_analyzer.py +1 -57
  20. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/services/local_repo_analyzer.py +1 -2
  21. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/services/reports.py +1 -6
  22. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/utils.py +0 -87
  23. {greenmining-1.1.6 → greenmining-1.1.8/greenmining.egg-info}/PKG-INFO +3 -17
  24. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining.egg-info/SOURCES.txt +0 -8
  25. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining.egg-info/requires.txt +0 -3
  26. {greenmining-1.1.6 → greenmining-1.1.8}/pyproject.toml +1 -4
  27. greenmining-1.1.6/greenmining/__version__.py +0 -3
  28. greenmining-1.1.6/greenmining/config.py +0 -200
  29. greenmining-1.1.6/greenmining/dashboard/__init__.py +0 -5
  30. greenmining-1.1.6/greenmining/dashboard/app.py +0 -200
  31. greenmining-1.1.6/greenmining/services/github_fetcher.py +0 -2
  32. {greenmining-1.1.6 → greenmining-1.1.8}/LICENSE +0 -0
  33. {greenmining-1.1.6 → greenmining-1.1.8}/MANIFEST.in +0 -0
  34. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/__main__.py +0 -0
  35. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/analyzers/__init__.py +0 -0
  36. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/analyzers/code_diff_analyzer.py +0 -0
  37. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/analyzers/temporal_analyzer.py +0 -0
  38. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/analyzers/version_power_analyzer.py +0 -0
  39. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/controllers/__init__.py +0 -0
  40. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/energy/__init__.py +0 -0
  41. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/energy/base.py +0 -0
  42. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/energy/carbon_reporter.py +0 -0
  43. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/energy/rapl.py +0 -0
  44. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/models/__init__.py +0 -0
  45. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/models/analysis_result.py +0 -0
  46. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/presenters/__init__.py +0 -0
  47. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/presenters/console_presenter.py +0 -0
  48. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/services/__init__.py +0 -0
  49. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining/services/github_graphql_fetcher.py +0 -0
  50. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining.egg-info/dependency_links.txt +0 -0
  51. {greenmining-1.1.6 → greenmining-1.1.8}/greenmining.egg-info/top_level.txt +0 -0
  52. {greenmining-1.1.6 → greenmining-1.1.8}/setup.cfg +0 -0
  53. {greenmining-1.1.6 → greenmining-1.1.8}/setup.py +0 -0
@@ -1,5 +1,15 @@
1
1
  # Changelog
2
2
 
3
+ ## [1.1.8] - 2026-01-31
4
+
5
+ ### Removed
6
+ - Web dashboard module (`greenmining/dashboard/`) and Flask dependency
7
+ - Dashboard documentation page and all dashboard references
8
+
9
+ ### Fixed
10
+ - ReadTheDocs experiment page not rendering (trailing whitespace in mkdocs nav)
11
+ - Plotly rendering in notebook (nbformat dependency)
12
+
3
13
  ## [1.1.6] - 2026-01-31
4
14
 
5
15
  ### Fixed
@@ -11,7 +21,7 @@
11
21
  ### Added
12
22
  - `since_date` / `to_date` parameters for date-bounded commit analysis
13
23
  - `created_before` / `pushed_after` search filters
14
- - Dashboard, GraphQL API, and experiment documentation pages
24
+ - GraphQL API and experiment documentation pages
15
25
  - Full process metrics and method-level metrics documentation
16
26
 
17
27
  ### Changed
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: greenmining
3
- Version: 1.1.6
3
+ Version: 1.1.8
4
4
  Summary: An empirical Python library for Mining Software Repositories (MSR) in Green IT research
5
5
  Author-email: Adam Bouafia <a.bouafia@student.vu.nl>
6
6
  License: MIT
@@ -49,8 +49,6 @@ Requires-Dist: twine; extra == "dev"
49
49
  Provides-Extra: energy
50
50
  Requires-Dist: psutil; extra == "energy"
51
51
  Requires-Dist: codecarbon; extra == "energy"
52
- Provides-Extra: dashboard
53
- Requires-Dist: flask; extra == "dashboard"
54
52
  Provides-Extra: docs
55
53
  Requires-Dist: sphinx; extra == "docs"
56
54
  Requires-Dist: sphinx-rtd-theme; extra == "docs"
@@ -70,8 +68,8 @@ An empirical Python library for Mining Software Repositories (MSR) in Green IT r
70
68
 
71
69
  `greenmining` is a research-grade Python library designed for **empirical Mining Software Repositories (MSR)** studies in **Green IT**. It enables researchers and practitioners to:
72
70
 
73
- - **Mine repositories at scale** - Fetch and analyze GitHub repositories via GraphQL API with configurable filters
74
- - **Batch analysis with parallelism** - Analyze multiple repositories concurrently with configurable worker pools
71
+ - **Mine repositories at scale** - Search, Fetch and analyze GitHub repositories via GraphQL API with configurable filters
72
+
75
73
  - **Classify green commits** - Detect 122 sustainability patterns from the Green Software Foundation (GSF) catalog
76
74
  - **Analyze any repository by URL** - Direct Git-based analysis with support for private repositories
77
75
  - **Measure energy consumption** - RAPL, CodeCarbon, and CPU Energy Meter backends for power profiling
@@ -80,7 +78,6 @@ An empirical Python library for Mining Software Repositories (MSR) in Green IT r
80
78
  - **Method-level analysis** - Per-method complexity and metrics via Lizard integration
81
79
  - **Version power comparison** - Compare power consumption across software versions
82
80
  - **Generate research datasets** - Statistical analysis, temporal trends, and publication-ready reports
83
- - **Web dashboard** - Flask-based interactive visualization of analysis results
84
81
 
85
82
  Whether you're conducting MSR research, analyzing green software adoption, or measuring the energy footprint of codebases, GreenMining provides the empirical toolkit you need.
86
83
 
@@ -452,15 +449,6 @@ print(f"Spearman: {correlator.spearman}")
452
449
  print(f"Feature importance: {correlator.feature_importance}")
453
450
  ```
454
451
 
455
- #### Web Dashboard
456
-
457
- ```python
458
- from greenmining.dashboard import run_dashboard
459
-
460
- # Launch interactive dashboard (requires pip install greenmining[dashboard])
461
- run_dashboard(data_dir="./data", host="127.0.0.1", port=5000)
462
- ```
463
-
464
452
  #### Pipeline Batch Analysis
465
453
 
466
454
  ```python
@@ -698,7 +686,6 @@ config = Config(
698
686
  - **Full Process Metrics**: All 8 process metrics (ChangeSet, CodeChurn, CommitsCount, ContributorsCount, ContributorsExperience, HistoryComplexity, HunksCount, LinesCount)
699
687
  - **Statistical Analysis**: Correlations, effect sizes, and temporal trends
700
688
  - **Multi-format Output**: Markdown reports, CSV exports, JSON data
701
- - **Web Dashboard**: Flask-based interactive visualization (`pip install greenmining[dashboard]`)
702
689
  - **Docker Support**: Pre-built images for containerized analysis
703
690
 
704
691
  ### Energy Measurement
@@ -857,7 +844,6 @@ ruff check greenmining/ tests/
857
844
 
858
845
  ```bash
859
846
  pip install greenmining[energy] # psutil, codecarbon (energy measurement)
860
- pip install greenmining[dashboard] # flask (web dashboard)
861
847
  pip install greenmining[dev] # pytest, black, ruff, mypy (development)
862
848
  ```
863
849
 
@@ -11,8 +11,8 @@ An empirical Python library for Mining Software Repositories (MSR) in Green IT r
11
11
 
12
12
  `greenmining` is a research-grade Python library designed for **empirical Mining Software Repositories (MSR)** studies in **Green IT**. It enables researchers and practitioners to:
13
13
 
14
- - **Mine repositories at scale** - Fetch and analyze GitHub repositories via GraphQL API with configurable filters
15
- - **Batch analysis with parallelism** - Analyze multiple repositories concurrently with configurable worker pools
14
+ - **Mine repositories at scale** - Search, Fetch and analyze GitHub repositories via GraphQL API with configurable filters
15
+
16
16
  - **Classify green commits** - Detect 122 sustainability patterns from the Green Software Foundation (GSF) catalog
17
17
  - **Analyze any repository by URL** - Direct Git-based analysis with support for private repositories
18
18
  - **Measure energy consumption** - RAPL, CodeCarbon, and CPU Energy Meter backends for power profiling
@@ -21,7 +21,6 @@ An empirical Python library for Mining Software Repositories (MSR) in Green IT r
21
21
  - **Method-level analysis** - Per-method complexity and metrics via Lizard integration
22
22
  - **Version power comparison** - Compare power consumption across software versions
23
23
  - **Generate research datasets** - Statistical analysis, temporal trends, and publication-ready reports
24
- - **Web dashboard** - Flask-based interactive visualization of analysis results
25
24
 
26
25
  Whether you're conducting MSR research, analyzing green software adoption, or measuring the energy footprint of codebases, GreenMining provides the empirical toolkit you need.
27
26
 
@@ -393,15 +392,6 @@ print(f"Spearman: {correlator.spearman}")
393
392
  print(f"Feature importance: {correlator.feature_importance}")
394
393
  ```
395
394
 
396
- #### Web Dashboard
397
-
398
- ```python
399
- from greenmining.dashboard import run_dashboard
400
-
401
- # Launch interactive dashboard (requires pip install greenmining[dashboard])
402
- run_dashboard(data_dir="./data", host="127.0.0.1", port=5000)
403
- ```
404
-
405
395
  #### Pipeline Batch Analysis
406
396
 
407
397
  ```python
@@ -639,7 +629,6 @@ config = Config(
639
629
  - **Full Process Metrics**: All 8 process metrics (ChangeSet, CodeChurn, CommitsCount, ContributorsCount, ContributorsExperience, HistoryComplexity, HunksCount, LinesCount)
640
630
  - **Statistical Analysis**: Correlations, effect sizes, and temporal trends
641
631
  - **Multi-format Output**: Markdown reports, CSV exports, JSON data
642
- - **Web Dashboard**: Flask-based interactive visualization (`pip install greenmining[dashboard]`)
643
632
  - **Docker Support**: Pre-built images for containerized analysis
644
633
 
645
634
  ### Energy Measurement
@@ -798,7 +787,6 @@ ruff check greenmining/ tests/
798
787
 
799
788
  ```bash
800
789
  pip install greenmining[energy] # psutil, codecarbon (energy measurement)
801
- pip install greenmining[dashboard] # flask (web dashboard)
802
790
  pip install greenmining[dev] # pytest, black, ruff, mypy (development)
803
791
  ```
804
792
 
@@ -9,7 +9,7 @@ from greenmining.gsf_patterns import (
9
9
  is_green_aware,
10
10
  )
11
11
 
12
- __version__ = "1.1.6"
12
+ __version__ = "1.1.8"
13
13
 
14
14
 
15
15
  def fetch_repositories(
@@ -4,7 +4,7 @@
4
4
  from __future__ import annotations
5
5
 
6
6
  from dataclasses import dataclass, field
7
- from typing import Any, Dict, List, Optional, Tuple
7
+ from typing import Any, Dict, List, Optional
8
8
 
9
9
  import numpy as np
10
10
  from scipy import stats
@@ -4,7 +4,6 @@
4
4
  from __future__ import annotations
5
5
 
6
6
  import subprocess
7
- import time
8
7
  from dataclasses import dataclass, field
9
8
  from typing import Any, Dict, List, Optional
10
9
 
@@ -3,7 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import random
6
- from typing import Dict, List, Optional, Set, Tuple
6
+ from typing import Dict, List, Optional
7
7
  from dataclasses import dataclass
8
8
  from collections import defaultdict
9
9
  import json
@@ -135,38 +135,6 @@ class StatisticalAnalyzer:
135
135
  "significant": bool(p_value < 0.05),
136
136
  }
137
137
 
138
- def pattern_adoption_rate_analysis(self, commits_df: pd.DataFrame) -> Dict[str, Any]:
139
- # Analyze pattern adoption rates over repository lifetime.
140
- results = {}
141
-
142
- for pattern in commits_df["pattern"].unique():
143
- pattern_commits = commits_df[commits_df["pattern"] == pattern].sort_values("date")
144
-
145
- if len(pattern_commits) == 0:
146
- continue
147
-
148
- # Time to first adoption
149
- first_adoption = pattern_commits.iloc[0]["date"]
150
- repo_start = commits_df["date"].min()
151
- ttfa_days = (first_adoption - repo_start).days
152
-
153
- # Adoption frequency over time
154
- monthly_adoption = pattern_commits.set_index("date").resample("ME").size()
155
-
156
- # Pattern stickiness (months with at least one adoption)
157
- total_months = len(commits_df.set_index("date").resample("ME").size())
158
- active_months = len(monthly_adoption[monthly_adoption > 0])
159
- stickiness = active_months / total_months if total_months > 0 else 0
160
-
161
- results[pattern] = {
162
- "ttfa_days": ttfa_days,
163
- "total_adoptions": len(pattern_commits),
164
- "stickiness": stickiness,
165
- "monthly_adoption_rate": monthly_adoption.mean(),
166
- }
167
-
168
- return results
169
-
170
138
  def _interpret_correlations(self, significant_pairs: List[Dict[str, Any]]) -> str:
171
139
  # Generate interpretation of correlation results.
172
140
  if not significant_pairs:
@@ -0,0 +1,91 @@
1
+ import os
2
+ from pathlib import Path
3
+ from typing import Any, Dict, List
4
+
5
+ from dotenv import load_dotenv
6
+
7
+
8
+ def _load_yaml_config(yaml_path: Path) -> Dict[str, Any]:
9
+ # Load configuration from YAML file if it exists.
10
+ if not yaml_path.exists():
11
+ return {}
12
+ try:
13
+ import yaml
14
+
15
+ with open(yaml_path, "r") as f:
16
+ return yaml.safe_load(f) or {}
17
+ except ImportError:
18
+ return {}
19
+ except Exception:
20
+ return {}
21
+
22
+
23
+ class Config:
24
+ # Configuration class for loading from env vars and YAML.
25
+
26
+ def __init__(self, env_file: str = ".env", yaml_file: str = "greenmining.yaml"):
27
+ # Initialize configuration from environment and YAML file.
28
+ env_path = Path(env_file)
29
+ if env_path.exists():
30
+ load_dotenv(env_path)
31
+ else:
32
+ load_dotenv()
33
+
34
+ # Load YAML config
35
+ yaml_path = Path(yaml_file)
36
+ self._yaml_config = _load_yaml_config(yaml_path)
37
+
38
+ # GitHub API Configuration
39
+ self.GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
40
+ if not self.GITHUB_TOKEN or self.GITHUB_TOKEN == "your_github_pat_here":
41
+ raise ValueError("GITHUB_TOKEN not set. Please set it in .env file or environment.")
42
+
43
+ # Search Configuration (YAML: sources.search.*)
44
+ yaml_search = self._yaml_config.get("sources", {}).get("search", {})
45
+
46
+ self.SUPPORTED_LANGUAGES: List[str] = yaml_search.get(
47
+ "languages",
48
+ [
49
+ "Python",
50
+ "JavaScript",
51
+ "TypeScript",
52
+ "Java",
53
+ "C++",
54
+ "C#",
55
+ "Go",
56
+ "Rust",
57
+ "PHP",
58
+ "Ruby",
59
+ "Swift",
60
+ "Kotlin",
61
+ "Scala",
62
+ "R",
63
+ "MATLAB",
64
+ "Dart",
65
+ "Lua",
66
+ "Perl",
67
+ "Haskell",
68
+ "Elixir",
69
+ ],
70
+ )
71
+
72
+ # Repository Limits
73
+ self.MIN_STARS = yaml_search.get("min_stars", int(os.getenv("MIN_STARS", "100")))
74
+ self.MAX_REPOS = int(os.getenv("MAX_REPOS", "100"))
75
+
76
+ # Output Configuration (YAML: output.directory)
77
+ yaml_output = self._yaml_config.get("output", {})
78
+ self.OUTPUT_DIR = Path(yaml_output.get("directory", os.getenv("OUTPUT_DIR", "./data")))
79
+ self.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
80
+
81
+ # File Paths
82
+ self.REPOS_FILE = self.OUTPUT_DIR / "repositories.json"
83
+
84
+ def __repr__(self) -> str:
85
+ # String representation of configuration (hiding sensitive data).
86
+ return (
87
+ f"Config("
88
+ f"MAX_REPOS={self.MAX_REPOS}, "
89
+ f"OUTPUT_DIR={self.OUTPUT_DIR}"
90
+ f")"
91
+ )
@@ -1,7 +1,5 @@
1
1
  # Repository Controller - Handles repository fetching operations.
2
2
 
3
- from tqdm import tqdm
4
-
5
3
  from greenmining.config import Config
6
4
  from greenmining.models.repository import Repository
7
5
  from greenmining.services.github_graphql_fetcher import GitHubGraphQLFetcher
@@ -124,24 +124,3 @@ class CodeCarbonMeter(EnergyMeter):
124
124
  end_time=datetime.fromtimestamp(end_time),
125
125
  )
126
126
 
127
- def get_carbon_intensity(self) -> Optional[float]:
128
- # Get current carbon intensity for the configured region.
129
- if not self._codecarbon_available:
130
- return None
131
-
132
- try:
133
- from codecarbon import EmissionsTracker
134
-
135
- # Create temporary tracker to get carbon intensity
136
- tracker = EmissionsTracker(
137
- project_name="carbon_check",
138
- country_iso_code=self.country_iso_code,
139
- save_to_file=False,
140
- log_level="error",
141
- )
142
- tracker.start()
143
- tracker.stop()
144
-
145
- return getattr(tracker, "_carbon_intensity", None)
146
- except Exception:
147
- return None
@@ -6,7 +6,7 @@ from __future__ import annotations
6
6
  import time
7
7
  import platform
8
8
  from datetime import datetime
9
- from typing import Dict, List, Optional
9
+ from typing import List, Optional
10
10
 
11
11
  from .base import EnergyMeter, EnergyMetrics, EnergyBackend
12
12
 
@@ -254,6 +254,35 @@ GSF_PATTERNS = {
254
254
  "description": "Choose hardware optimized for energy efficiency",
255
255
  "sci_impact": "Direct reduction in energy consumption",
256
256
  },
257
+ "match_preconfigured_server": {
258
+ "name": "Match Utilization Requirements with Pre-configured Servers",
259
+ "category": "cloud",
260
+ "keywords": [
261
+ "pre-configured server",
262
+ "energy proportionality",
263
+ "server utilization",
264
+ "oversized server",
265
+ "underutilized server",
266
+ "server consolidation",
267
+ ],
268
+ "description": "Select pre-configured servers that match utilization needs; one highly utilized server is more energy-efficient than two underutilized ones",
269
+ "sci_impact": "Higher utilization improves energy proportionality; fewer servers reduces embodied carbon",
270
+ },
271
+ "optimize_customer_device_impact": {
272
+ "name": "Optimize Impact on Customer Devices and Equipment",
273
+ "category": "cloud",
274
+ "keywords": [
275
+ "customer device",
276
+ "backward compatible",
277
+ "backwards compatible",
278
+ "older hardware",
279
+ "device lifetime",
280
+ "older browser",
281
+ "end-of-life hardware",
282
+ ],
283
+ "description": "Design software to extend customer hardware lifetimes through backward compatibility with older devices, browsers, and operating systems",
284
+ "sci_impact": "Extending device lifetimes reduces embodied carbon; optimizing for older hardware may also reduce energy intensity",
285
+ },
257
286
  # ==================== WEB PATTERNS (15+) ====================
258
287
  "avoid_chaining_requests": {
259
288
  "name": "Avoid Chaining Critical Requests",
@@ -1555,6 +1584,18 @@ GREEN_KEYWORDS = [
1555
1584
  "workload",
1556
1585
  "overhead",
1557
1586
  "footprint",
1587
+ # Server utilization & customer device patterns
1588
+ "pre-configured server",
1589
+ "energy proportionality",
1590
+ "server consolidation",
1591
+ "underutilized server",
1592
+ "oversized server",
1593
+ "backward compatible",
1594
+ "backwards compatible",
1595
+ "customer device",
1596
+ "device lifetime",
1597
+ "older browser",
1598
+ "end-of-life hardware",
1558
1599
  ]
1559
1600
 
1560
1601
 
@@ -3,7 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from dataclasses import dataclass, field
6
- from typing import Dict, List, Optional
6
+ from typing import Optional
7
7
 
8
8
 
9
9
  @dataclass
@@ -3,7 +3,6 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from dataclasses import dataclass, field
6
- from typing import List
7
6
 
8
7
 
9
8
  @dataclass
@@ -3,7 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from dataclasses import dataclass, field
6
- from typing import List, Optional
6
+ from typing import Optional
7
7
 
8
8
 
9
9
  @dataclass
@@ -2,21 +2,17 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- import json
6
5
  from datetime import datetime, timedelta
7
6
  from pathlib import Path
8
- from typing import Any, Dict, List, Optional
7
+ from typing import Any
9
8
 
10
9
  from github import Github
11
10
  from tqdm import tqdm
12
11
 
13
- from greenmining.config import get_config
14
12
  from greenmining.models.repository import Repository
15
13
  from greenmining.utils import (
16
14
  colored_print,
17
15
  format_timestamp,
18
- load_json_file,
19
- print_banner,
20
16
  retry_on_exception,
21
17
  save_json_file,
22
18
  )
@@ -110,8 +106,7 @@ class CommitExtractor:
110
106
  try:
111
107
  # Get repository from GitHub API
112
108
  if not self.github:
113
- config = get_config()
114
- self.github = Github(config.GITHUB_TOKEN)
109
+ raise ValueError("github_token is required for commit extraction")
115
110
 
116
111
  gh_repo = self.github.get_repo(repo_name)
117
112
 
@@ -143,40 +138,6 @@ class CommitExtractor:
143
138
 
144
139
  return commits
145
140
 
146
- def _extract_commit_metadata(self, commit, repo_name: str) -> dict[str, Any]:
147
- # Extract metadata from commit object.
148
- # Get modified files
149
- files_changed = []
150
- lines_added = 0
151
- lines_deleted = 0
152
-
153
- try:
154
- for modified_file in commit.modified_files:
155
- files_changed.append(modified_file.filename)
156
- lines_added += modified_file.added_lines
157
- lines_deleted += modified_file.deleted_lines
158
- except Exception:
159
- pass
160
-
161
- return {
162
- "commit_id": commit.hash,
163
- "repo_name": repo_name,
164
- "date": commit.committer_date.isoformat(),
165
- "author": commit.author.name,
166
- "author_email": commit.author.email,
167
- "message": commit.msg.strip(),
168
- "files_changed": files_changed[:20], # Limit to 20 files
169
- "lines_added": lines_added,
170
- "lines_deleted": lines_deleted,
171
- "insertions": lines_added,
172
- "deletions": lines_deleted,
173
- "is_merge": commit.merge,
174
- "branches": (
175
- list(commit.branches) if hasattr(commit, "branches") and commit.branches else []
176
- ),
177
- "in_main_branch": commit.in_main_branch if hasattr(commit, "in_main_branch") else True,
178
- }
179
-
180
141
  def _extract_commit_metadata_from_github(self, commit, repo_name: str) -> dict[str, Any]:
181
142
  # Extract metadata from GitHub API commit object.
182
143
  # Get modified files and stats
@@ -2,26 +2,21 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- import json
6
5
  from collections import defaultdict
7
6
  from pathlib import Path
8
- from typing import Any, Dict, List, Optional
7
+ from typing import Any
9
8
 
10
9
  import pandas as pd
11
10
 
12
11
  from greenmining.analyzers import (
13
12
  StatisticalAnalyzer,
14
13
  TemporalAnalyzer,
15
- QualitativeAnalyzer,
16
14
  )
17
- from greenmining.config import get_config
18
15
  from greenmining.models.repository import Repository
19
16
  from greenmining.utils import (
20
17
  colored_print,
21
18
  format_number,
22
19
  format_percentage,
23
- load_json_file,
24
- print_banner,
25
20
  save_csv_file,
26
21
  save_json_file,
27
22
  )
@@ -2,18 +2,15 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- import json
6
- import re
7
5
  from collections import Counter
8
6
  from pathlib import Path
9
- from typing import Any, Dict, List, Optional, Tuple
7
+ from typing import Any
10
8
 
11
9
  from tqdm import tqdm
12
10
 
13
11
  from greenmining.analyzers import (
14
12
  CodeDiffAnalyzer,
15
13
  )
16
- from greenmining.config import get_config
17
14
  from greenmining.gsf_patterns import (
18
15
  GREEN_KEYWORDS,
19
16
  GSF_PATTERNS,
@@ -22,11 +19,7 @@ from greenmining.gsf_patterns import (
22
19
  )
23
20
  from greenmining.utils import (
24
21
  colored_print,
25
- create_checkpoint,
26
22
  format_timestamp,
27
- load_checkpoint,
28
- load_json_file,
29
- print_banner,
30
23
  save_json_file,
31
24
  )
32
25
 
@@ -156,55 +149,6 @@ class DataAnalyzer:
156
149
 
157
150
  return result
158
151
 
159
- def _check_green_awareness(self, message: str, files: list[str]) -> tuple[bool, Optional[str]]:
160
- # Check if commit explicitly mentions green/energy concerns.
161
- # Check message for green keywords
162
- for keyword in self.GREEN_KEYWORDS:
163
- if keyword in message:
164
- # Extract context around keyword
165
- pattern = rf".{{0,30}}{re.escape(keyword)}.{{0,30}}"
166
- match = re.search(pattern, message, re.IGNORECASE)
167
- if match:
168
- evidence = match.group(0).strip()
169
- return True, f"Keyword '{keyword}': {evidence}"
170
-
171
- # Check file names for patterns
172
- cache_files = [f for f in files if "cache" in f or "redis" in f]
173
- if cache_files:
174
- return True, f"Modified cache-related file: {cache_files[0]}"
175
-
176
- perf_files = [f for f in files if "performance" in f or "optimization" in f]
177
- if perf_files:
178
- return True, f"Modified performance file: {perf_files[0]}"
179
-
180
- return False, None
181
-
182
- def _detect_known_pattern(self, message: str, files: list[str]) -> tuple[Optional[str], str]:
183
- # Detect known green software pattern.
184
- matches = []
185
-
186
- # Check each pattern
187
- for pattern_name, keywords in self.GREEN_PATTERNS.items():
188
- for keyword in keywords:
189
- if keyword in message:
190
- # Calculate confidence based on specificity
191
- confidence = "HIGH" if len(keyword) > 10 else "MEDIUM"
192
- matches.append((pattern_name, confidence, len(keyword)))
193
-
194
- # Check file names for pattern hints
195
- all_files = " ".join(files)
196
- for pattern_name, keywords in self.GREEN_PATTERNS.items():
197
- for keyword in keywords:
198
- if keyword in all_files:
199
- matches.append((pattern_name, "MEDIUM", len(keyword)))
200
-
201
- if not matches:
202
- return "NONE DETECTED", "NONE"
203
-
204
- # Return most specific match (longest keyword)
205
- matches.sort(key=lambda x: x[2], reverse=True)
206
- return matches[0][0], matches[0][1]
207
-
208
152
  def save_results(self, results: list[dict[str, Any]], output_file: Path):
209
153
  # Save analysis results to JSON file.
210
154
  # Calculate summary statistics
@@ -5,13 +5,12 @@ from __future__ import annotations
5
5
  import os
6
6
  import re
7
7
  import shutil
8
- import subprocess
9
8
  import tempfile
10
9
  from concurrent.futures import ThreadPoolExecutor, as_completed
11
10
  from dataclasses import dataclass, field
12
11
  from datetime import datetime, timedelta
13
12
  from pathlib import Path
14
- from typing import Any, Dict, List, Optional, Generator
13
+ from typing import Any, Dict, List, Optional
15
14
 
16
15
  from pydriller import Repository
17
16
  from pydriller.metrics.process.change_set import ChangeSet
@@ -1,20 +1,15 @@
1
1
  # Report generation for green mining analysis.
2
- """Report generation module for GreenMining analysis results."""
3
2
 
4
3
  from __future__ import annotations
5
4
 
6
- import json
7
5
  from datetime import datetime
8
6
  from pathlib import Path
9
- from typing import Any, Dict, Optional
7
+ from typing import Any
10
8
 
11
- from greenmining.config import get_config
12
9
  from greenmining.utils import (
13
10
  colored_print,
14
11
  format_number,
15
12
  format_percentage,
16
- load_json_file,
17
- print_banner,
18
13
  )
19
14
 
20
15